diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,9 +2,9 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.03, + "epoch": 0.028, "eval_steps": 1000, - "global_step": 303000, + "global_step": 400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -78189,6 +78189,25032 @@ "eval_samples_per_second": 101.876, "eval_steps_per_second": 0.798, "step": 303000 + }, + { + "epoch": 0.000198, + "loss_gen": 6.050841808319092, + "loss_rtd": 0.2241448163986206, + "loss_sent": 0.12831345200538635, + "loss_sod": 0.07911839336156845, + "loss_total": 0.431576669216156, + "step": 303099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.845432758331299, + "loss_rtd": 0.22832554578781128, + "loss_sent": 0.2352820485830307, + "loss_sod": 0.0187496617436409, + "loss_total": 0.4823572635650635, + "step": 303099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.9500147700309753, + "learning_rate": 9.1612384728397e-06, + "loss": 0.4208, + "step": 303100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.8878021240234375, + "loss_rtd": 0.20205999910831451, + "loss_sent": 0.2727389633655548, + "loss_sod": 0.012358536943793297, + "loss_total": 0.4871574938297272, + "step": 303199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.56168794631958, + "loss_rtd": 0.19289152324199677, + "loss_sent": 0.13245266675949097, + "loss_sod": 0.01741240732371807, + "loss_total": 0.34275659918785095, + "step": 303199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.6196224689483643, + "learning_rate": 9.142938004827023e-06, + "loss": 0.4093, + "step": 303200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.765680313110352, + "loss_rtd": 0.2056213766336441, + "loss_sent": 0.2856610119342804, + "loss_sod": 0.002769284648820758, + "loss_total": 0.4940516948699951, + "step": 303299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.84200382232666, + "loss_rtd": 0.21612456440925598, + "loss_sent": 0.1716960370540619, + "loss_sod": 0.04607880860567093, + "loss_total": 0.4338994026184082, + "step": 303299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.8032750487327576, + "learning_rate": 9.124653994034022e-06, + "loss": 0.4204, + "step": 303300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.454245567321777, + "loss_rtd": 0.17996229231357574, + "loss_sent": 0.023502692580223083, + "loss_sod": 0.10512920469045639, + "loss_total": 0.3085941672325134, + "step": 303399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.909790992736816, + "loss_rtd": 0.2325899749994278, + "loss_sent": 0.40775030851364136, + "loss_sod": 0.013750200159847736, + "loss_total": 0.6540904641151428, + "step": 303399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.775709629058838, + "learning_rate": 9.106386447825499e-06, + "loss": 0.4226, + "step": 303400 + }, + { + "epoch": 0.000998, + "loss_gen": 6.396899223327637, + "loss_rtd": 0.22315895557403564, + "loss_sent": 0.3802189826965332, + "loss_sod": 0.08616264164447784, + "loss_total": 0.6895405650138855, + "step": 303499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.647336959838867, + "loss_rtd": 0.1995159238576889, + "loss_sent": 0.07161133736371994, + "loss_sod": 0.047437883913517, + "loss_total": 0.31856516003608704, + "step": 303499 + }, + { + "epoch": 0.001, + "grad_norm": 0.9866959452629089, + "learning_rate": 9.088135373559642e-06, + "loss": 0.4158, + "step": 303500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.747816562652588, + "loss_rtd": 0.2257116734981537, + "loss_sent": 0.09745519608259201, + "loss_sod": 0.050352804362773895, + "loss_total": 0.3735196888446808, + "step": 303599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.646799087524414, + "loss_rtd": 0.21798551082611084, + "loss_sent": 0.35812464356422424, + "loss_sod": 0.008219663053750992, + "loss_total": 0.5843298435211182, + "step": 303599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.3834220170974731, + "learning_rate": 9.069900778587948e-06, + "loss": 0.432, + "step": 303600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.22725248336792, + "loss_rtd": 0.18125180900096893, + "loss_sent": 0.0008502436685375869, + "loss_sod": 0.12379438430070877, + "loss_total": 0.30589643120765686, + "step": 303699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.671199321746826, + "loss_rtd": 0.1780216246843338, + "loss_sent": 0.11826789379119873, + "loss_sod": 0.10215851664543152, + "loss_total": 0.39844805002212524, + "step": 303699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.1688143014907837, + "learning_rate": 9.05168267025534e-06, + "loss": 0.4289, + "step": 303700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.7385573387146, + "loss_rtd": 0.194263756275177, + "loss_sent": 0.13795211911201477, + "loss_sod": 0.008502896875143051, + "loss_total": 0.3407187759876251, + "step": 303799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.806217670440674, + "loss_rtd": 0.21256524324417114, + "loss_sent": 0.2162608504295349, + "loss_sod": 0.007465574890375137, + "loss_total": 0.4362916648387909, + "step": 303799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.5026214122772217, + "learning_rate": 9.03348105590004e-06, + "loss": 0.4326, + "step": 303800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.081581115722656, + "loss_rtd": 0.17530463635921478, + "loss_sent": 2.538939588703215e-05, + "loss_sod": 0.09611823409795761, + "loss_total": 0.2714482545852661, + "step": 303899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.63226318359375, + "loss_rtd": 0.19327083230018616, + "loss_sent": 0.1276666522026062, + "loss_sod": 0.018620138987898827, + "loss_total": 0.33955761790275574, + "step": 303899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.214566707611084, + "learning_rate": 9.015295942853674e-06, + "loss": 0.4243, + "step": 303900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.632124423980713, + "loss_rtd": 0.17851640284061432, + "loss_sent": 0.05572657287120819, + "loss_sod": 0.06375811249017715, + "loss_total": 0.29800111055374146, + "step": 303999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.911339282989502, + "loss_rtd": 0.20451124012470245, + "loss_sent": 0.14055180549621582, + "loss_sod": 0.03734595701098442, + "loss_total": 0.382409006357193, + "step": 303999 + }, + { + "epoch": 0.002, + "grad_norm": 0.7258864045143127, + "learning_rate": 8.997127338441214e-06, + "loss": 0.4089, + "step": 304000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4066995084285736, + "eval_runtime": 154.1938, + "eval_samples_per_second": 100.153, + "eval_steps_per_second": 0.785, + "step": 304000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.176668643951416, + "loss_rtd": 0.17963097989559174, + "loss_sent": 0.1344403773546219, + "loss_sod": 0.015325573273003101, + "loss_total": 0.32939693331718445, + "step": 304099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.764191627502441, + "loss_rtd": 0.21740901470184326, + "loss_sent": 0.2713092565536499, + "loss_sod": 0.02650246024131775, + "loss_total": 0.5152207612991333, + "step": 304099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.444466233253479, + "learning_rate": 8.978975249980947e-06, + "loss": 0.4087, + "step": 304100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.423428058624268, + "loss_rtd": 0.16987082362174988, + "loss_sent": 0.03858550265431404, + "loss_sod": 0.011892799288034439, + "loss_total": 0.22034911811351776, + "step": 304199 + }, + { + "epoch": 0.002398, + "loss_gen": 6.461963653564453, + "loss_rtd": 0.2220069319009781, + "loss_sent": 0.12931038439273834, + "loss_sod": 0.06686516851186752, + "loss_total": 0.41818249225616455, + "step": 304199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.2428261041641235, + "learning_rate": 8.960839684784539e-06, + "loss": 0.4055, + "step": 304200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.889702796936035, + "loss_rtd": 0.21018104255199432, + "loss_sent": 0.27102744579315186, + "loss_sod": 0.05837943032383919, + "loss_total": 0.5395879149436951, + "step": 304299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.725460529327393, + "loss_rtd": 0.21940413117408752, + "loss_sent": 0.21741972863674164, + "loss_sod": 0.014049429446458817, + "loss_total": 0.4508732855319977, + "step": 304299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.1784645318984985, + "learning_rate": 8.942720650157004e-06, + "loss": 0.4199, + "step": 304300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.978394508361816, + "loss_rtd": 0.2223317176103592, + "loss_sent": 0.10638202726840973, + "loss_sod": 0.0260927714407444, + "loss_total": 0.354806512594223, + "step": 304399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.75954008102417, + "loss_rtd": 0.206836998462677, + "loss_sent": 0.1952584683895111, + "loss_sod": 0.01332041248679161, + "loss_total": 0.41541588306427, + "step": 304399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.024192214012146, + "learning_rate": 8.924618153396691e-06, + "loss": 0.4291, + "step": 304400 + }, + { + "epoch": 0.002998, + "loss_gen": 6.017472267150879, + "loss_rtd": 0.20987379550933838, + "loss_sent": 0.33753156661987305, + "loss_sod": 0.08601886034011841, + "loss_total": 0.6334242224693298, + "step": 304499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.853302001953125, + "loss_rtd": 0.19905000925064087, + "loss_sent": 0.12959928810596466, + "loss_sod": 0.04448812082409859, + "loss_total": 0.3731374144554138, + "step": 304499 + }, + { + "epoch": 0.003, + "grad_norm": 1.1366268396377563, + "learning_rate": 8.906532201795258e-06, + "loss": 0.427, + "step": 304500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.765444755554199, + "loss_rtd": 0.21887369453907013, + "loss_sent": 0.10184040665626526, + "loss_sod": 0.0014352030120790005, + "loss_total": 0.3221493065357208, + "step": 304599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.891476631164551, + "loss_rtd": 0.22070364654064178, + "loss_sent": 0.1429951786994934, + "loss_sod": 0.012814337387681007, + "loss_total": 0.37651318311691284, + "step": 304599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.5514282584190369, + "learning_rate": 8.888462802637747e-06, + "loss": 0.4148, + "step": 304600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.945662975311279, + "loss_rtd": 0.1951703280210495, + "loss_sent": 0.18956737220287323, + "loss_sod": 0.006553894840180874, + "loss_total": 0.3912915885448456, + "step": 304699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.197839736938477, + "loss_rtd": 0.15526467561721802, + "loss_sent": 2.5514187655062415e-05, + "loss_sod": 0.14672408998012543, + "loss_total": 0.3020142912864685, + "step": 304699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.3620028495788574, + "learning_rate": 8.870409963202498e-06, + "loss": 0.4273, + "step": 304700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.761125564575195, + "loss_rtd": 0.21085788309574127, + "loss_sent": 0.16229256987571716, + "loss_sod": 0.012492502108216286, + "loss_total": 0.385642945766449, + "step": 304799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.987504959106445, + "loss_rtd": 0.21323375403881073, + "loss_sent": 0.22197028994560242, + "loss_sod": 0.056242480874061584, + "loss_total": 0.49144652485847473, + "step": 304799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.070345401763916, + "learning_rate": 8.852373690761213e-06, + "loss": 0.4143, + "step": 304800 + }, + { + "epoch": 0.003798, + "loss_gen": 6.215188980102539, + "loss_rtd": 0.22704657912254333, + "loss_sent": 0.7372082471847534, + "loss_sod": 0.037779513746500015, + "loss_total": 1.002034306526184, + "step": 304899 + }, + { + "epoch": 0.003798, + "loss_gen": 6.315957069396973, + "loss_rtd": 0.2315492331981659, + "loss_sent": 0.12474516779184341, + "loss_sod": 0.039301835000514984, + "loss_total": 0.3955962359905243, + "step": 304899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.9579159021377563, + "learning_rate": 8.834353992578864e-06, + "loss": 0.4151, + "step": 304900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.721354961395264, + "loss_rtd": 0.20183220505714417, + "loss_sent": 0.026712244376540184, + "loss_sod": 0.019418183714151382, + "loss_total": 0.24796262383460999, + "step": 304999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.389822006225586, + "loss_rtd": 0.1734730303287506, + "loss_sent": 0.03372808173298836, + "loss_sod": 0.02425766922533512, + "loss_total": 0.23145878314971924, + "step": 304999 + }, + { + "epoch": 0.004, + "grad_norm": 0.6036795973777771, + "learning_rate": 8.816350875913809e-06, + "loss": 0.4372, + "step": 305000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4023120403289795, + "eval_runtime": 150.9107, + "eval_samples_per_second": 102.332, + "eval_steps_per_second": 0.802, + "step": 305000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.973085403442383, + "loss_rtd": 0.21758605539798737, + "loss_sent": 0.09473367780447006, + "loss_sod": 0.017782317474484444, + "loss_total": 0.3301020562648773, + "step": 305099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.739782333374023, + "loss_rtd": 0.1868346631526947, + "loss_sent": 0.06093979254364967, + "loss_sod": 0.11810218542814255, + "loss_total": 0.3658766448497772, + "step": 305099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.9413608908653259, + "learning_rate": 8.798364348017712e-06, + "loss": 0.4327, + "step": 305100 + }, + { + "epoch": 0.004398, + "loss_gen": 6.132180690765381, + "loss_rtd": 0.2076473832130432, + "loss_sent": 0.10466967523097992, + "loss_sod": 0.04797498136758804, + "loss_total": 0.3602920174598694, + "step": 305199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.889447212219238, + "loss_rtd": 0.21890375018119812, + "loss_sent": 0.40674641728401184, + "loss_sod": 0.020622648298740387, + "loss_total": 0.6462727785110474, + "step": 305199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.126071572303772, + "learning_rate": 8.780394416135512e-06, + "loss": 0.4266, + "step": 305200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.813969612121582, + "loss_rtd": 0.2175520807504654, + "loss_sent": 0.3686645030975342, + "loss_sod": 0.06446811556816101, + "loss_total": 0.6506847143173218, + "step": 305299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.83366584777832, + "loss_rtd": 0.20576824247837067, + "loss_sent": 0.15161006152629852, + "loss_sod": 0.07579024136066437, + "loss_total": 0.43316853046417236, + "step": 305299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.6219701766967773, + "learning_rate": 8.762441087505513e-06, + "loss": 0.4226, + "step": 305300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.857290744781494, + "loss_rtd": 0.22079241275787354, + "loss_sent": 0.2261514961719513, + "loss_sod": 0.05775216221809387, + "loss_total": 0.5046960711479187, + "step": 305399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.9695587158203125, + "loss_rtd": 0.22768771648406982, + "loss_sent": 0.1626943200826645, + "loss_sod": 0.03034215047955513, + "loss_total": 0.42072421312332153, + "step": 305399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.2125617265701294, + "learning_rate": 8.744504369359313e-06, + "loss": 0.4207, + "step": 305400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.4371018409729, + "loss_rtd": 0.1744883507490158, + "loss_sent": 0.03116326406598091, + "loss_sod": 0.0531020350754261, + "loss_total": 0.2587536573410034, + "step": 305499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.606621265411377, + "loss_rtd": 0.1650807112455368, + "loss_sent": 0.04597359150648117, + "loss_sod": 0.17810678482055664, + "loss_total": 0.3891611099243164, + "step": 305499 + }, + { + "epoch": 0.005, + "grad_norm": 1.2446157932281494, + "learning_rate": 8.726584268921827e-06, + "loss": 0.4172, + "step": 305500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.588064670562744, + "loss_rtd": 0.19660793244838715, + "loss_sent": 0.007900391705334187, + "loss_sod": 0.07882022112607956, + "loss_total": 0.2833285331726074, + "step": 305599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.797532558441162, + "loss_rtd": 0.19245637953281403, + "loss_sent": 0.3430267572402954, + "loss_sod": 0.0847749263048172, + "loss_total": 0.620258092880249, + "step": 305599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.202588438987732, + "learning_rate": 8.708680793411256e-06, + "loss": 0.428, + "step": 305600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.728855133056641, + "loss_rtd": 0.21642325818538666, + "loss_sent": 0.18111597001552582, + "loss_sod": 0.03321612998843193, + "loss_total": 0.4307553768157959, + "step": 305699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.846833229064941, + "loss_rtd": 0.2047511637210846, + "loss_sent": 0.27941465377807617, + "loss_sod": 0.06933329254388809, + "loss_total": 0.5534991025924683, + "step": 305699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.4724195003509521, + "learning_rate": 8.690793950039122e-06, + "loss": 0.4311, + "step": 305700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.72415828704834, + "loss_rtd": 0.2253650426864624, + "loss_sent": 0.2744627892971039, + "loss_sod": 0.016727153211832047, + "loss_total": 0.5165549516677856, + "step": 305799 + }, + { + "epoch": 0.005598, + "loss_gen": 6.1052350997924805, + "loss_rtd": 0.21438582241535187, + "loss_sent": 0.3632456064224243, + "loss_sod": 0.02788006141781807, + "loss_total": 0.605511486530304, + "step": 305799 + }, + { + "epoch": 0.0056, + "grad_norm": 2.0527873039245605, + "learning_rate": 8.672923746010242e-06, + "loss": 0.4223, + "step": 305800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.081293106079102, + "loss_rtd": 0.2098085582256317, + "loss_sent": 0.45756104588508606, + "loss_sod": 0.06941455602645874, + "loss_total": 0.7367841601371765, + "step": 305899 + }, + { + "epoch": 0.005798, + "loss_gen": 6.042714595794678, + "loss_rtd": 0.20942792296409607, + "loss_sent": 0.322293758392334, + "loss_sod": 0.14053134620189667, + "loss_total": 0.6722530126571655, + "step": 305899 + }, + { + "epoch": 0.0058, + "grad_norm": 2.0109331607818604, + "learning_rate": 8.655070188522752e-06, + "loss": 0.4232, + "step": 305900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.819046974182129, + "loss_rtd": 0.1984071135520935, + "loss_sent": 0.19649410247802734, + "loss_sod": 0.10485399514436722, + "loss_total": 0.49975520372390747, + "step": 305999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.798454761505127, + "loss_rtd": 0.21941687166690826, + "loss_sent": 0.30555278062820435, + "loss_sod": 0.06277532875537872, + "loss_total": 0.587744951248169, + "step": 305999 + }, + { + "epoch": 0.006, + "grad_norm": 1.1517330408096313, + "learning_rate": 8.637233284768026e-06, + "loss": 0.42, + "step": 306000 + }, + { + "epoch": 0.006, + "eval_loss": 0.4086720943450928, + "eval_runtime": 150.8522, + "eval_samples_per_second": 102.372, + "eval_steps_per_second": 0.802, + "step": 306000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.253348350524902, + "loss_rtd": 0.1574322134256363, + "loss_sent": 0.010820952244102955, + "loss_sod": 0.09293248504400253, + "loss_total": 0.2611856460571289, + "step": 306099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.309996604919434, + "loss_rtd": 0.1717710942029953, + "loss_sent": 0.0022359779104590416, + "loss_sod": 0.049111489206552505, + "loss_total": 0.22311855852603912, + "step": 306099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.753007709980011, + "learning_rate": 8.61941304193079e-06, + "loss": 0.4214, + "step": 306100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.302216529846191, + "loss_rtd": 0.1714804768562317, + "loss_sent": 0.00319567765109241, + "loss_sod": 0.10349278897047043, + "loss_total": 0.2781689465045929, + "step": 306199 + }, + { + "epoch": 0.006398, + "loss_gen": 6.137814521789551, + "loss_rtd": 0.20129691064357758, + "loss_sent": 0.11549151688814163, + "loss_sod": 0.021362487226724625, + "loss_total": 0.33815091848373413, + "step": 306199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.9512887597084045, + "learning_rate": 8.601609467189037e-06, + "loss": 0.4173, + "step": 306200 + }, + { + "epoch": 0.006598, + "loss_gen": 6.000729560852051, + "loss_rtd": 0.2171856015920639, + "loss_sent": 0.09850183129310608, + "loss_sod": 0.05850699171423912, + "loss_total": 0.3741944134235382, + "step": 306299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.706486701965332, + "loss_rtd": 0.22972096502780914, + "loss_sent": 0.1362563967704773, + "loss_sod": 0.0803494080901146, + "loss_total": 0.44632676243782043, + "step": 306299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.6421685218811035, + "learning_rate": 8.583822567714045e-06, + "loss": 0.4413, + "step": 306300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.928966522216797, + "loss_rtd": 0.20692603290081024, + "loss_sent": 0.3748835623264313, + "loss_sod": 0.013958632946014404, + "loss_total": 0.5957682132720947, + "step": 306399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.421674728393555, + "loss_rtd": 0.1743885576725006, + "loss_sent": 0.006461540702730417, + "loss_sod": 0.03082374669611454, + "loss_total": 0.21167385578155518, + "step": 306399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.5194551944732666, + "learning_rate": 8.566052350670362e-06, + "loss": 0.427, + "step": 306400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.7855939865112305, + "loss_rtd": 0.20919832587242126, + "loss_sent": 0.26255521178245544, + "loss_sod": 0.04820305109024048, + "loss_total": 0.5199565887451172, + "step": 306499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.920647144317627, + "loss_rtd": 0.19213224947452545, + "loss_sent": 0.16192471981048584, + "loss_sod": 0.0509570874273777, + "loss_total": 0.4050140380859375, + "step": 306499 + }, + { + "epoch": 0.007, + "grad_norm": 1.3673182725906372, + "learning_rate": 8.548298823215833e-06, + "loss": 0.4062, + "step": 306500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.848636150360107, + "loss_rtd": 0.22508534789085388, + "loss_sent": 0.1098591759800911, + "loss_sod": 0.07311011105775833, + "loss_total": 0.4080546200275421, + "step": 306599 + }, + { + "epoch": 0.007198, + "loss_gen": 6.084027290344238, + "loss_rtd": 0.22001692652702332, + "loss_sent": 0.05970962718129158, + "loss_sod": 0.00628052419051528, + "loss_total": 0.2860070765018463, + "step": 306599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.802567720413208, + "learning_rate": 8.530561992501595e-06, + "loss": 0.4224, + "step": 306600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.832769870758057, + "loss_rtd": 0.23815402388572693, + "loss_sent": 0.2707604169845581, + "loss_sod": 0.06610839813947678, + "loss_total": 0.57502281665802, + "step": 306699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.9994401931762695, + "loss_rtd": 0.2005050927400589, + "loss_sent": 0.06158899515867233, + "loss_sod": 0.0013490061974152923, + "loss_total": 0.26344308257102966, + "step": 306699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.000777244567871, + "learning_rate": 8.512841865672017e-06, + "loss": 0.425, + "step": 306700 + }, + { + "epoch": 0.007598, + "loss_gen": 6.238945484161377, + "loss_rtd": 0.21320757269859314, + "loss_sent": 0.16016341745853424, + "loss_sod": 0.1064838171005249, + "loss_total": 0.4798548221588135, + "step": 306799 + }, + { + "epoch": 0.007598, + "loss_gen": 6.0477423667907715, + "loss_rtd": 0.22165288031101227, + "loss_sent": 0.07312484830617905, + "loss_sod": 0.01312224194407463, + "loss_total": 0.30789998173713684, + "step": 306799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.1002517938613892, + "learning_rate": 8.495138449864775e-06, + "loss": 0.426, + "step": 306800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.6312031745910645, + "loss_rtd": 0.20619864761829376, + "loss_sent": 0.2728365361690521, + "loss_sod": 0.02045396715402603, + "loss_total": 0.4994891583919525, + "step": 306899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.870377063751221, + "loss_rtd": 0.23009894788265228, + "loss_sent": 0.32479292154312134, + "loss_sod": 0.20592084527015686, + "loss_total": 0.7608126997947693, + "step": 306899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.719900131225586, + "learning_rate": 8.477451752210803e-06, + "loss": 0.4312, + "step": 306900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.845320224761963, + "loss_rtd": 0.24552302062511444, + "loss_sent": 0.057985760271549225, + "loss_sod": 0.021562224254012108, + "loss_total": 0.3250710070133209, + "step": 306999 + }, + { + "epoch": 0.007998, + "loss_gen": 6.025938987731934, + "loss_rtd": 0.23549242317676544, + "loss_sent": 0.20588351786136627, + "loss_sod": 0.03881218284368515, + "loss_total": 0.48018813133239746, + "step": 306999 + }, + { + "epoch": 0.008, + "grad_norm": 0.9462911486625671, + "learning_rate": 8.459781779834303e-06, + "loss": 0.4277, + "step": 307000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4029849171638489, + "eval_runtime": 151.0495, + "eval_samples_per_second": 102.238, + "eval_steps_per_second": 0.801, + "step": 307000 + }, + { + "epoch": 0.008198, + "loss_gen": 6.112674713134766, + "loss_rtd": 0.2207801789045334, + "loss_sent": 0.28970956802368164, + "loss_sod": 0.05031801387667656, + "loss_total": 0.5608077645301819, + "step": 307099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.869450569152832, + "loss_rtd": 0.19692069292068481, + "loss_sent": 0.26349619030952454, + "loss_sod": 0.037413813173770905, + "loss_total": 0.49783068895339966, + "step": 307099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.637731671333313, + "learning_rate": 8.442128539852729e-06, + "loss": 0.4284, + "step": 307100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.389323711395264, + "loss_rtd": 0.20814189314842224, + "loss_sent": 0.06257633864879608, + "loss_sod": 0.02617780677974224, + "loss_total": 0.2968960404396057, + "step": 307199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.581353187561035, + "loss_rtd": 0.17588059604167938, + "loss_sent": 0.02419213205575943, + "loss_sod": 0.061465442180633545, + "loss_total": 0.26153817772865295, + "step": 307199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.6717695593833923, + "learning_rate": 8.424492039376809e-06, + "loss": 0.4137, + "step": 307200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.201351642608643, + "loss_rtd": 0.15850655734539032, + "loss_sent": 0.023376837372779846, + "loss_sod": 0.03525843098759651, + "loss_total": 0.21714182198047638, + "step": 307299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.674869060516357, + "loss_rtd": 0.21613983809947968, + "loss_sent": 0.11836820095777512, + "loss_sod": 0.012142570689320564, + "loss_total": 0.3466506004333496, + "step": 307299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.6594082713127136, + "learning_rate": 8.406872285510525e-06, + "loss": 0.4122, + "step": 307300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.550102233886719, + "loss_rtd": 0.16742391884326935, + "loss_sent": 0.043269120156764984, + "loss_sod": 0.037360548973083496, + "loss_total": 0.24805358052253723, + "step": 307399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.916943073272705, + "loss_rtd": 0.2425854653120041, + "loss_sent": 0.1675383597612381, + "loss_sod": 0.03806499019265175, + "loss_total": 0.448188841342926, + "step": 307399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.17293381690979, + "learning_rate": 8.38926928535112e-06, + "loss": 0.4251, + "step": 307400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.976449489593506, + "loss_rtd": 0.21067480742931366, + "loss_sent": 0.19202172756195068, + "loss_sod": 0.04000909626483917, + "loss_total": 0.4427056312561035, + "step": 307499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.856799125671387, + "loss_rtd": 0.19372926652431488, + "loss_sent": 0.03597626835107803, + "loss_sod": 0.016917118802666664, + "loss_total": 0.24662265181541443, + "step": 307499 + }, + { + "epoch": 0.009, + "grad_norm": 0.7623675465583801, + "learning_rate": 8.37168304598906e-06, + "loss": 0.421, + "step": 307500 + }, + { + "epoch": 0.009198, + "loss_gen": 6.130488872528076, + "loss_rtd": 0.2060985267162323, + "loss_sent": 0.07861499488353729, + "loss_sod": 0.017229467630386353, + "loss_total": 0.30194300413131714, + "step": 307599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.748977184295654, + "loss_rtd": 0.21738268435001373, + "loss_sent": 0.1314508467912674, + "loss_sod": 0.09008508175611496, + "loss_total": 0.4389185905456543, + "step": 307599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.7999606728553772, + "learning_rate": 8.354113574508088e-06, + "loss": 0.3978, + "step": 307600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.7744221687316895, + "loss_rtd": 0.20567235350608826, + "loss_sent": 0.19313567876815796, + "loss_sod": 0.006816547363996506, + "loss_total": 0.4056245684623718, + "step": 307699 + }, + { + "epoch": 0.009398, + "loss_gen": 6.0000715255737305, + "loss_rtd": 0.22208687663078308, + "loss_sent": 0.18611925840377808, + "loss_sod": 0.033019907772541046, + "loss_total": 0.4412260353565216, + "step": 307699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.6177722811698914, + "learning_rate": 8.33656087798519e-06, + "loss": 0.4489, + "step": 307700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.748345375061035, + "loss_rtd": 0.21914519369602203, + "loss_sent": 0.30765247344970703, + "loss_sod": 0.0384446457028389, + "loss_total": 0.5652422904968262, + "step": 307799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.634149074554443, + "loss_rtd": 0.19680051505565643, + "loss_sent": 0.12341060489416122, + "loss_sod": 0.014248199760913849, + "loss_total": 0.3344593346118927, + "step": 307799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.9522451162338257, + "learning_rate": 8.319024963490596e-06, + "loss": 0.4384, + "step": 307800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.977197170257568, + "loss_rtd": 0.20900194346904755, + "loss_sent": 0.17911319434642792, + "loss_sod": 0.00570686673745513, + "loss_total": 0.3938220143318176, + "step": 307899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.983529090881348, + "loss_rtd": 0.21527643501758575, + "loss_sent": 0.20467884838581085, + "loss_sod": 0.029048863798379898, + "loss_total": 0.4490041434764862, + "step": 307899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.6754030585289001, + "learning_rate": 8.301505838087753e-06, + "loss": 0.4323, + "step": 307900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.864013671875, + "loss_rtd": 0.21233950555324554, + "loss_sent": 0.45691630244255066, + "loss_sod": 0.010679269209504128, + "loss_total": 0.679935097694397, + "step": 307999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.977777004241943, + "loss_rtd": 0.2054664045572281, + "loss_sent": 0.17416636645793915, + "loss_sod": 0.06325425207614899, + "loss_total": 0.44288700819015503, + "step": 307999 + }, + { + "epoch": 0.01, + "grad_norm": 1.1362477540969849, + "learning_rate": 8.284003508833376e-06, + "loss": 0.437, + "step": 308000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4098188877105713, + "eval_runtime": 150.7216, + "eval_samples_per_second": 102.46, + "eval_steps_per_second": 0.803, + "step": 308000 + }, + { + "epoch": 0.010198, + "loss_gen": 6.214615345001221, + "loss_rtd": 0.2278728038072586, + "loss_sent": 0.1740763783454895, + "loss_sod": 0.06421086192131042, + "loss_total": 0.4661600589752197, + "step": 308099 + }, + { + "epoch": 0.010198, + "loss_gen": 6.3376569747924805, + "loss_rtd": 0.20672421157360077, + "loss_sent": 0.17966632544994354, + "loss_sod": 0.060254521667957306, + "loss_total": 0.4466450810432434, + "step": 308099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.337296962738037, + "learning_rate": 8.266517982777405e-06, + "loss": 0.4145, + "step": 308100 + }, + { + "epoch": 0.010398, + "loss_gen": 6.027499198913574, + "loss_rtd": 0.2186448723077774, + "loss_sent": 0.1646350473165512, + "loss_sod": 0.03394540026783943, + "loss_total": 0.41722530126571655, + "step": 308199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.978206634521484, + "loss_rtd": 0.21628262102603912, + "loss_sent": 0.06598977744579315, + "loss_sod": 0.00904631894081831, + "loss_total": 0.29131871461868286, + "step": 308199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.6215561628341675, + "learning_rate": 8.249049266962988e-06, + "loss": 0.4356, + "step": 308200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.798785209655762, + "loss_rtd": 0.201616570353508, + "loss_sent": 0.2186223417520523, + "loss_sod": 0.020315643399953842, + "loss_total": 0.44055455923080444, + "step": 308299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.9894700050354, + "loss_rtd": 0.20303881168365479, + "loss_sent": 0.12828856706619263, + "loss_sod": 0.06300602853298187, + "loss_total": 0.3943334221839905, + "step": 308299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.0811374187469482, + "learning_rate": 8.231597368426531e-06, + "loss": 0.4271, + "step": 308300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.905354022979736, + "loss_rtd": 0.21323953568935394, + "loss_sent": 0.25014716386795044, + "loss_sod": 0.012424895539879799, + "loss_total": 0.47581160068511963, + "step": 308399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.935938835144043, + "loss_rtd": 0.21692337095737457, + "loss_sent": 0.17644526064395905, + "loss_sod": 0.01989194005727768, + "loss_total": 0.4132605791091919, + "step": 308399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.8411424160003662, + "learning_rate": 8.214162294197664e-06, + "loss": 0.4363, + "step": 308400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.775577068328857, + "loss_rtd": 0.19676491618156433, + "loss_sent": 0.12638449668884277, + "loss_sod": 0.03428375720977783, + "loss_total": 0.35743317008018494, + "step": 308499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.870701789855957, + "loss_rtd": 0.18911170959472656, + "loss_sent": 0.3005780279636383, + "loss_sod": 0.04763563722372055, + "loss_total": 0.537325382232666, + "step": 308499 + }, + { + "epoch": 0.011, + "grad_norm": 0.9006572961807251, + "learning_rate": 8.196744051299239e-06, + "loss": 0.4176, + "step": 308500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.286965370178223, + "loss_rtd": 0.16980917751789093, + "loss_sent": 0.00037132465513423085, + "loss_sod": 0.02953839674592018, + "loss_total": 0.1997188925743103, + "step": 308599 + }, + { + "epoch": 0.011198, + "loss_gen": 6.053245544433594, + "loss_rtd": 0.21546247601509094, + "loss_sent": 0.24839432537555695, + "loss_sod": 0.01285785622894764, + "loss_total": 0.4767146706581116, + "step": 308599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.1296887397766113, + "learning_rate": 8.179342646747295e-06, + "loss": 0.4094, + "step": 308600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.91952657699585, + "loss_rtd": 0.18278734385967255, + "loss_sent": 0.0061375899240374565, + "loss_sod": 0.1125425398349762, + "loss_total": 0.3014674484729767, + "step": 308699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.580056190490723, + "loss_rtd": 0.19616815447807312, + "loss_sent": 0.025577362626791, + "loss_sod": 0.08445725589990616, + "loss_total": 0.30620276927948, + "step": 308699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.9004150629043579, + "learning_rate": 8.16195808755113e-06, + "loss": 0.422, + "step": 308700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.969113826751709, + "loss_rtd": 0.21206234395503998, + "loss_sent": 0.024632275104522705, + "loss_sod": 0.12887535989284515, + "loss_total": 0.36556997895240784, + "step": 308799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.520026683807373, + "loss_rtd": 0.23807401955127716, + "loss_sent": 0.10061891376972198, + "loss_sod": 0.0021364905405789614, + "loss_total": 0.34082943201065063, + "step": 308799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.0176562070846558, + "learning_rate": 8.144590380713252e-06, + "loss": 0.4253, + "step": 308800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.825716018676758, + "loss_rtd": 0.2084958255290985, + "loss_sent": 0.08576523512601852, + "loss_sod": 0.09504387527704239, + "loss_total": 0.3893049359321594, + "step": 308899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.504941463470459, + "loss_rtd": 0.22477424144744873, + "loss_sent": 0.41075342893600464, + "loss_sod": 0.013927502557635307, + "loss_total": 0.649455189704895, + "step": 308899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.9160724878311157, + "learning_rate": 8.127239533229369e-06, + "loss": 0.4335, + "step": 308900 + }, + { + "epoch": 0.011998, + "loss_gen": 6.3390679359436035, + "loss_rtd": 0.19545334577560425, + "loss_sent": 0.12336979061365128, + "loss_sod": 0.07010123133659363, + "loss_total": 0.38892436027526855, + "step": 308999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.7447404861450195, + "loss_rtd": 0.21294380724430084, + "loss_sent": 0.08362966030836105, + "loss_sod": 0.05657492205500603, + "loss_total": 0.3531484007835388, + "step": 308999 + }, + { + "epoch": 0.012, + "grad_norm": 1.0022826194763184, + "learning_rate": 8.109905552088388e-06, + "loss": 0.4265, + "step": 309000 + }, + { + "epoch": 0.012, + "eval_loss": 0.4004424810409546, + "eval_runtime": 151.3261, + "eval_samples_per_second": 102.051, + "eval_steps_per_second": 0.8, + "step": 309000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.181689739227295, + "loss_rtd": 0.1635502576828003, + "loss_sent": 0.004259913228452206, + "loss_sod": 0.0969497412443161, + "loss_total": 0.26475992798805237, + "step": 309099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.584157943725586, + "loss_rtd": 0.21053799986839294, + "loss_sent": 0.20525944232940674, + "loss_sod": 0.008120628073811531, + "loss_total": 0.42391806840896606, + "step": 309099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.9442334771156311, + "learning_rate": 8.092588444272437e-06, + "loss": 0.4211, + "step": 309100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.62851619720459, + "loss_rtd": 0.22425146400928497, + "loss_sent": 0.23456962406635284, + "loss_sod": 0.0038013344164937735, + "loss_total": 0.46262240409851074, + "step": 309199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.6538286209106445, + "loss_rtd": 0.19378627836704254, + "loss_sent": 0.03899012878537178, + "loss_sod": 0.04087996482849121, + "loss_total": 0.27365636825561523, + "step": 309199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.3899539709091187, + "learning_rate": 8.075288216756849e-06, + "loss": 0.4391, + "step": 309200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.944112777709961, + "loss_rtd": 0.21902185678482056, + "loss_sent": 0.16321153938770294, + "loss_sod": 0.050549089908599854, + "loss_total": 0.43278247117996216, + "step": 309299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.640238285064697, + "loss_rtd": 0.18983475863933563, + "loss_sent": 0.011616911739110947, + "loss_sod": 0.08104175329208374, + "loss_total": 0.2824934422969818, + "step": 309299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.8254089951515198, + "learning_rate": 8.058004876510167e-06, + "loss": 0.4284, + "step": 309300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.712543964385986, + "loss_rtd": 0.21910497546195984, + "loss_sent": 0.2007427215576172, + "loss_sod": 0.03808535262942314, + "loss_total": 0.4579330384731293, + "step": 309399 + }, + { + "epoch": 0.012798, + "loss_gen": 6.1219329833984375, + "loss_rtd": 0.20662803947925568, + "loss_sent": 0.009931064210832119, + "loss_sod": 0.09581126272678375, + "loss_total": 0.3123703598976135, + "step": 309399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.3430635929107666, + "learning_rate": 8.040738430494094e-06, + "loss": 0.4296, + "step": 309400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.938852787017822, + "loss_rtd": 0.205555260181427, + "loss_sent": 0.39438170194625854, + "loss_sod": 0.04952486604452133, + "loss_total": 0.6494618654251099, + "step": 309499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.96164608001709, + "loss_rtd": 0.2238394021987915, + "loss_sent": 0.4905053377151489, + "loss_sod": 0.03014589473605156, + "loss_total": 0.7444906234741211, + "step": 309499 + }, + { + "epoch": 0.013, + "grad_norm": 2.7371878623962402, + "learning_rate": 8.023488885663561e-06, + "loss": 0.4333, + "step": 309500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.913938522338867, + "loss_rtd": 0.2102501541376114, + "loss_sent": 0.12971247732639313, + "loss_sod": 0.020894642919301987, + "loss_total": 0.3608572781085968, + "step": 309599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.409268856048584, + "loss_rtd": 0.17835590243339539, + "loss_sent": 0.018471039831638336, + "loss_sod": 0.02411385253071785, + "loss_total": 0.22094079852104187, + "step": 309599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.6213912963867188, + "learning_rate": 8.006256248966698e-06, + "loss": 0.423, + "step": 309600 + }, + { + "epoch": 0.013398, + "loss_gen": 6.235551357269287, + "loss_rtd": 0.22233493626117706, + "loss_sent": 0.15506519377231598, + "loss_sod": 0.07004674524068832, + "loss_total": 0.44744688272476196, + "step": 309699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.766732692718506, + "loss_rtd": 0.18688441812992096, + "loss_sent": 0.2524843215942383, + "loss_sod": 0.02328348346054554, + "loss_total": 0.46265220642089844, + "step": 309699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.9869791269302368, + "learning_rate": 7.989040527344782e-06, + "loss": 0.4294, + "step": 309700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.2954487800598145, + "loss_rtd": 0.17030614614486694, + "loss_sent": 2.866191243811045e-05, + "loss_sod": 0.04264570400118828, + "loss_total": 0.2129805088043213, + "step": 309799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.9240593910217285, + "loss_rtd": 0.18576836585998535, + "loss_sent": 0.08892825245857239, + "loss_sod": 0.019113805145025253, + "loss_total": 0.2938104271888733, + "step": 309799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.9123243093490601, + "learning_rate": 7.971841727732322e-06, + "loss": 0.4303, + "step": 309800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.842307090759277, + "loss_rtd": 0.21397273242473602, + "loss_sent": 0.13180017471313477, + "loss_sod": 0.01135589275509119, + "loss_total": 0.3571287989616394, + "step": 309899 + }, + { + "epoch": 0.013798, + "loss_gen": 6.162484645843506, + "loss_rtd": 0.20132121443748474, + "loss_sent": 0.1628807932138443, + "loss_sod": 0.09502141177654266, + "loss_total": 0.4592233896255493, + "step": 309899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.8144239783287048, + "learning_rate": 7.954659857056984e-06, + "loss": 0.4129, + "step": 309900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.821217060089111, + "loss_rtd": 0.20290368795394897, + "loss_sent": 0.1426912546157837, + "loss_sod": 0.039541371166706085, + "loss_total": 0.38513630628585815, + "step": 309999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.852529525756836, + "loss_rtd": 0.19443050026893616, + "loss_sent": 0.14577209949493408, + "loss_sod": 0.03677899017930031, + "loss_total": 0.37698158621788025, + "step": 309999 + }, + { + "epoch": 0.014, + "grad_norm": 0.8347766399383545, + "learning_rate": 7.93749492223964e-06, + "loss": 0.4071, + "step": 310000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4037657082080841, + "eval_runtime": 150.9514, + "eval_samples_per_second": 102.304, + "eval_steps_per_second": 0.802, + "step": 310000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.922464370727539, + "loss_rtd": 0.20891152322292328, + "loss_sent": 0.2245611548423767, + "loss_sod": 0.07452702522277832, + "loss_total": 0.5079997181892395, + "step": 310099 + }, + { + "epoch": 0.014198, + "loss_gen": 6.004929542541504, + "loss_rtd": 0.1983024924993515, + "loss_sent": 0.17077364027500153, + "loss_sod": 0.055036187171936035, + "loss_total": 0.42411231994628906, + "step": 310099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.4000916481018066, + "learning_rate": 7.9203469301943e-06, + "loss": 0.4217, + "step": 310100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.872138500213623, + "loss_rtd": 0.21818286180496216, + "loss_sent": 0.2573728561401367, + "loss_sod": 0.09202329814434052, + "loss_total": 0.5675790309906006, + "step": 310199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.679378032684326, + "loss_rtd": 0.2042427510023117, + "loss_sent": 0.2703692317008972, + "loss_sod": 0.08388488739728928, + "loss_total": 0.5584968328475952, + "step": 310199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.8075625896453857, + "learning_rate": 7.90321588782818e-06, + "loss": 0.4301, + "step": 310200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.760520935058594, + "loss_rtd": 0.20171736180782318, + "loss_sent": 0.19153904914855957, + "loss_sod": 0.0881851464509964, + "loss_total": 0.48144155740737915, + "step": 310299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.656811237335205, + "loss_rtd": 0.2206784337759018, + "loss_sent": 0.14364071190357208, + "loss_sod": 0.00568934204056859, + "loss_total": 0.3700084686279297, + "step": 310299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.1731743812561035, + "learning_rate": 7.886101802041672e-06, + "loss": 0.43, + "step": 310300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.315280914306641, + "loss_rtd": 0.1629236340522766, + "loss_sent": 0.020134510472416878, + "loss_sod": 0.09313705563545227, + "loss_total": 0.2761951982975006, + "step": 310399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.15852689743042, + "loss_rtd": 0.17851246893405914, + "loss_sent": 0.009823448956012726, + "loss_sod": 0.04415597766637802, + "loss_total": 0.2324918806552887, + "step": 310399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.8541032075881958, + "learning_rate": 7.86900467972833e-06, + "loss": 0.4118, + "step": 310400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.873555660247803, + "loss_rtd": 0.20283553004264832, + "loss_sent": 0.04049250856041908, + "loss_sod": 0.0539071299135685, + "loss_total": 0.2972351610660553, + "step": 310499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.465541839599609, + "loss_rtd": 0.17580774426460266, + "loss_sent": 0.06673498451709747, + "loss_sod": 0.03656630963087082, + "loss_total": 0.27910906076431274, + "step": 310499 + }, + { + "epoch": 0.015, + "grad_norm": 0.7130053043365479, + "learning_rate": 7.851924527774856e-06, + "loss": 0.4077, + "step": 310500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.883062362670898, + "loss_rtd": 0.20074811577796936, + "loss_sent": 0.2130950391292572, + "loss_sod": 0.015295304358005524, + "loss_total": 0.4291384518146515, + "step": 310599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.838763236999512, + "loss_rtd": 0.2280886322259903, + "loss_sent": 0.060331884771585464, + "loss_sod": 0.06348002701997757, + "loss_total": 0.35190054774284363, + "step": 310599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.7528257966041565, + "learning_rate": 7.834861353061146e-06, + "loss": 0.4148, + "step": 310600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.73049783706665, + "loss_rtd": 0.21847784519195557, + "loss_sent": 0.38030803203582764, + "loss_sod": 0.01773514412343502, + "loss_total": 0.6165210008621216, + "step": 310699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.263930797576904, + "loss_rtd": 0.17946848273277283, + "loss_sent": 0.05554213747382164, + "loss_sod": 0.10321007668972015, + "loss_total": 0.3382206857204437, + "step": 310699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.1871514320373535, + "learning_rate": 7.817815162460234e-06, + "loss": 0.4204, + "step": 310700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.89235258102417, + "loss_rtd": 0.22214749455451965, + "loss_sent": 0.32768210768699646, + "loss_sod": 0.03052462451159954, + "loss_total": 0.5803542137145996, + "step": 310799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.6772260665893555, + "loss_rtd": 0.22638951241970062, + "loss_sent": 0.17034272849559784, + "loss_sod": 0.0030031949281692505, + "loss_total": 0.3997354507446289, + "step": 310799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.5054242610931396, + "learning_rate": 7.800785962838342e-06, + "loss": 0.4151, + "step": 310800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.18792200088501, + "loss_rtd": 0.1806230992078781, + "loss_sent": 0.10035458207130432, + "loss_sod": 0.0033172129187732935, + "loss_total": 0.28429490327835083, + "step": 310899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.800225257873535, + "loss_rtd": 0.23230841755867004, + "loss_sent": 0.23793214559555054, + "loss_sod": 0.012393715791404247, + "loss_total": 0.4826342761516571, + "step": 310899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.760189414024353, + "learning_rate": 7.783773761054808e-06, + "loss": 0.4163, + "step": 310900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.997812747955322, + "loss_rtd": 0.20307239890098572, + "loss_sent": 0.15201528370380402, + "loss_sod": 0.019326893612742424, + "loss_total": 0.3744145631790161, + "step": 310999 + }, + { + "epoch": 0.015998, + "loss_gen": 6.301429748535156, + "loss_rtd": 0.20385730266571045, + "loss_sent": 0.3304522931575775, + "loss_sod": 0.01187446340918541, + "loss_total": 0.5461840629577637, + "step": 310999 + }, + { + "epoch": 0.016, + "grad_norm": 1.095665693283081, + "learning_rate": 7.76677856396215e-06, + "loss": 0.4285, + "step": 311000 + }, + { + "epoch": 0.016, + "eval_loss": 0.40931645035743713, + "eval_runtime": 151.065, + "eval_samples_per_second": 102.228, + "eval_steps_per_second": 0.801, + "step": 311000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.989282608032227, + "loss_rtd": 0.21287782490253448, + "loss_sent": 0.24741996824741364, + "loss_sod": 0.05294226109981537, + "loss_total": 0.5132400393486023, + "step": 311099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.814924716949463, + "loss_rtd": 0.2096560150384903, + "loss_sent": 0.08405181020498276, + "loss_sod": 0.008440888486802578, + "loss_total": 0.302148699760437, + "step": 311099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.880911111831665, + "learning_rate": 7.749800378406042e-06, + "loss": 0.4382, + "step": 311100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.812239646911621, + "loss_rtd": 0.21669985353946686, + "loss_sent": 0.3005824089050293, + "loss_sod": 0.040739044547080994, + "loss_total": 0.5580213069915771, + "step": 311199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.896325588226318, + "loss_rtd": 0.20856140553951263, + "loss_sent": 0.22192752361297607, + "loss_sod": 0.004638968035578728, + "loss_total": 0.4351279139518738, + "step": 311199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.3912092447280884, + "learning_rate": 7.732839211225295e-06, + "loss": 0.4333, + "step": 311200 + }, + { + "epoch": 0.016598, + "loss_gen": 6.134925842285156, + "loss_rtd": 0.22416922450065613, + "loss_sent": 0.21561919152736664, + "loss_sod": 0.015460368245840073, + "loss_total": 0.45524877309799194, + "step": 311299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.324389934539795, + "loss_rtd": 0.1754237413406372, + "loss_sent": 0.048315126448869705, + "loss_sod": 0.04650310054421425, + "loss_total": 0.27024197578430176, + "step": 311299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.8866177797317505, + "learning_rate": 7.715895069251855e-06, + "loss": 0.4177, + "step": 311300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.278530597686768, + "loss_rtd": 0.15986032783985138, + "loss_sent": 0.024686088785529137, + "loss_sod": 0.12176243960857391, + "loss_total": 0.3063088655471802, + "step": 311399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.9626312255859375, + "loss_rtd": 0.21207858622074127, + "loss_sent": 0.15108805894851685, + "loss_sod": 0.07089990377426147, + "loss_total": 0.4340665340423584, + "step": 311399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.3012689352035522, + "learning_rate": 7.698967959310815e-06, + "loss": 0.3987, + "step": 311400 + }, + { + "epoch": 0.016998, + "loss_gen": 6.174948215484619, + "loss_rtd": 0.22797290980815887, + "loss_sent": 0.15986447036266327, + "loss_sod": 0.030468493700027466, + "loss_total": 0.4183058738708496, + "step": 311499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.9885149002075195, + "loss_rtd": 0.20035885274410248, + "loss_sent": 0.2758614718914032, + "loss_sod": 0.05242743343114853, + "loss_total": 0.528647780418396, + "step": 311499 + }, + { + "epoch": 0.017, + "grad_norm": 0.9514748454093933, + "learning_rate": 7.682057888220439e-06, + "loss": 0.4372, + "step": 311500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.339448928833008, + "loss_rtd": 0.14678730070590973, + "loss_sent": 2.946843596873805e-05, + "loss_sod": 0.2171320915222168, + "loss_total": 0.36394885182380676, + "step": 311599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.157167434692383, + "loss_rtd": 0.15583649277687073, + "loss_sent": 0.130470409989357, + "loss_sod": 0.05063977092504501, + "loss_total": 0.33694666624069214, + "step": 311599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.4903481006622314, + "learning_rate": 7.665164862792074e-06, + "loss": 0.4313, + "step": 311600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.914399147033691, + "loss_rtd": 0.20276793837547302, + "loss_sent": 0.1353200525045395, + "loss_sod": 0.017816588282585144, + "loss_total": 0.35590457916259766, + "step": 311699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.839435577392578, + "loss_rtd": 0.20555274188518524, + "loss_sent": 0.08742252737283707, + "loss_sod": 0.07438893616199493, + "loss_total": 0.36736419796943665, + "step": 311699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.7307558655738831, + "learning_rate": 7.64828888983024e-06, + "loss": 0.4353, + "step": 311700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.628203392028809, + "loss_rtd": 0.21813525259494781, + "loss_sent": 0.08447877317667007, + "loss_sod": 0.03288706764578819, + "loss_total": 0.3355010747909546, + "step": 311799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.8870849609375, + "loss_rtd": 0.20519371330738068, + "loss_sent": 0.0029134685173630714, + "loss_sod": 0.18275833129882812, + "loss_total": 0.3908655047416687, + "step": 311799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.9509856104850769, + "learning_rate": 7.631429976132577e-06, + "loss": 0.4224, + "step": 311800 + }, + { + "epoch": 0.017798, + "loss_gen": 6.52073335647583, + "loss_rtd": 0.23186054825782776, + "loss_sent": 0.12316082417964935, + "loss_sod": 0.08576104044914246, + "loss_total": 0.44078242778778076, + "step": 311899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.790531635284424, + "loss_rtd": 0.21376632153987885, + "loss_sent": 0.10404068231582642, + "loss_sod": 0.020191598683595657, + "loss_total": 0.3379985988140106, + "step": 311899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.0056074857711792, + "learning_rate": 7.614588128489864e-06, + "loss": 0.4193, + "step": 311900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.688158988952637, + "loss_rtd": 0.22824282944202423, + "loss_sent": 0.2352687120437622, + "loss_sod": 0.009616414085030556, + "loss_total": 0.47312796115875244, + "step": 311999 + }, + { + "epoch": 0.017998, + "loss_gen": 6.112806797027588, + "loss_rtd": 0.20789451897144318, + "loss_sent": 0.24069654941558838, + "loss_sod": 0.04331938922405243, + "loss_total": 0.491910457611084, + "step": 311999 + }, + { + "epoch": 0.018, + "grad_norm": 1.3421379327774048, + "learning_rate": 7.597763353685966e-06, + "loss": 0.4189, + "step": 312000 + }, + { + "epoch": 0.018, + "eval_loss": 0.40631282329559326, + "eval_runtime": 151.4112, + "eval_samples_per_second": 101.994, + "eval_steps_per_second": 0.799, + "step": 312000 + }, + { + "epoch": 0.018198, + "loss_gen": 6.034616947174072, + "loss_rtd": 0.21921761333942413, + "loss_sent": 0.13182824850082397, + "loss_sod": 0.06258413195610046, + "loss_total": 0.41363000869750977, + "step": 312099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.633738040924072, + "loss_rtd": 0.19921241700649261, + "loss_sent": 0.10855448246002197, + "loss_sod": 0.04418952018022537, + "loss_total": 0.35195642709732056, + "step": 312099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.8034456372261047, + "learning_rate": 7.580955658497924e-06, + "loss": 0.427, + "step": 312100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.59451150894165, + "loss_rtd": 0.19150112569332123, + "loss_sent": 0.035920217633247375, + "loss_sod": 0.08746415376663208, + "loss_total": 0.3148854970932007, + "step": 312199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.153200626373291, + "loss_rtd": 0.1642102599143982, + "loss_sent": 0.02211596444249153, + "loss_sod": 0.04951123893260956, + "loss_total": 0.23583745956420898, + "step": 312199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.1884304285049438, + "learning_rate": 7.564165049695882e-06, + "loss": 0.4254, + "step": 312200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.964243412017822, + "loss_rtd": 0.22288161516189575, + "loss_sent": 0.3737553060054779, + "loss_sod": 0.061662301421165466, + "loss_total": 0.6582992076873779, + "step": 312299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.274056434631348, + "loss_rtd": 0.1981636881828308, + "loss_sent": 0.006828839424997568, + "loss_sod": 0.16011330485343933, + "loss_total": 0.36510583758354187, + "step": 312299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.2488231658935547, + "learning_rate": 7.547391534043069e-06, + "loss": 0.4405, + "step": 312300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.872720718383789, + "loss_rtd": 0.2019290030002594, + "loss_sent": 0.11224554479122162, + "loss_sod": 0.07444722950458527, + "loss_total": 0.38862180709838867, + "step": 312399 + }, + { + "epoch": 0.018798, + "loss_gen": 6.146448135375977, + "loss_rtd": 0.20309272408485413, + "loss_sent": 0.02162293717265129, + "loss_sod": 0.03802800923585892, + "loss_total": 0.26274365186691284, + "step": 312399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.240323781967163, + "learning_rate": 7.5306351182958865e-06, + "loss": 0.426, + "step": 312400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.935499668121338, + "loss_rtd": 0.2323479950428009, + "loss_sent": 0.08306394517421722, + "loss_sod": 0.02807151898741722, + "loss_total": 0.34348344802856445, + "step": 312499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.975403785705566, + "loss_rtd": 0.21715359389781952, + "loss_sent": 0.11344219744205475, + "loss_sod": 0.14398878812789917, + "loss_total": 0.47458457946777344, + "step": 312499 + }, + { + "epoch": 0.019, + "grad_norm": 1.4612895250320435, + "learning_rate": 7.5138958092037806e-06, + "loss": 0.428, + "step": 312500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.920339107513428, + "loss_rtd": 0.218119814991951, + "loss_sent": 0.2784358561038971, + "loss_sod": 0.008258887566626072, + "loss_total": 0.5048145651817322, + "step": 312599 + }, + { + "epoch": 0.019198, + "loss_gen": 6.144434452056885, + "loss_rtd": 0.2286521941423416, + "loss_sent": 0.08838971704244614, + "loss_sod": 0.050909243524074554, + "loss_total": 0.3679511547088623, + "step": 312599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.8755446672439575, + "learning_rate": 7.497173613509367e-06, + "loss": 0.4299, + "step": 312600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.931336879730225, + "loss_rtd": 0.19997598230838776, + "loss_sent": 0.27158093452453613, + "loss_sod": 0.014547404833137989, + "loss_total": 0.4861043095588684, + "step": 312699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.624778747558594, + "loss_rtd": 0.2028387188911438, + "loss_sent": 0.16659888625144958, + "loss_sod": 0.098826102912426, + "loss_total": 0.4682637155056, + "step": 312699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.1667513847351074, + "learning_rate": 7.4804685379483486e-06, + "loss": 0.4254, + "step": 312700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.845862865447998, + "loss_rtd": 0.21606336534023285, + "loss_sent": 0.18905384838581085, + "loss_sod": 0.018588026985526085, + "loss_total": 0.42370522022247314, + "step": 312799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.843987464904785, + "loss_rtd": 0.22016587853431702, + "loss_sent": 0.24973087012767792, + "loss_sod": 0.030912479385733604, + "loss_total": 0.5008092522621155, + "step": 312799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.0143873691558838, + "learning_rate": 7.463780589249508e-06, + "loss": 0.4393, + "step": 312800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.206735134124756, + "loss_rtd": 0.16853559017181396, + "loss_sent": 0.0024505567271262407, + "loss_sod": 0.03042198345065117, + "loss_total": 0.20140813291072845, + "step": 312899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.293466091156006, + "loss_rtd": 0.1788954883813858, + "loss_sent": 0.013938656076788902, + "loss_sod": 0.043361686170101166, + "loss_total": 0.23619581758975983, + "step": 312899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.5000883340835571, + "learning_rate": 7.447109774134758e-06, + "loss": 0.4363, + "step": 312900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.893698215484619, + "loss_rtd": 0.20332132279872894, + "loss_sent": 0.0011745416559278965, + "loss_sod": 0.18095089495182037, + "loss_total": 0.3854467570781708, + "step": 312999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.233047008514404, + "loss_rtd": 0.2026233971118927, + "loss_sent": 2.6892364985542372e-05, + "loss_sod": 0.1481531858444214, + "loss_total": 0.3508034646511078, + "step": 312999 + }, + { + "epoch": 0.02, + "grad_norm": 1.1689213514328003, + "learning_rate": 7.4304560993191e-06, + "loss": 0.4296, + "step": 313000 + }, + { + "epoch": 0.02, + "eval_loss": 0.40171995759010315, + "eval_runtime": 151.2712, + "eval_samples_per_second": 102.088, + "eval_steps_per_second": 0.8, + "step": 313000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.944424152374268, + "loss_rtd": 0.21740028262138367, + "loss_sent": 0.06304635107517242, + "loss_sod": 0.02363731525838375, + "loss_total": 0.3040839433670044, + "step": 313099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.934466361999512, + "loss_rtd": 0.19472838938236237, + "loss_sent": 0.2642924189567566, + "loss_sod": 0.009696826338768005, + "loss_total": 0.46871763467788696, + "step": 313099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.8409777879714966, + "learning_rate": 7.41381957151065e-06, + "loss": 0.4214, + "step": 313100 + }, + { + "epoch": 0.020398, + "loss_gen": 6.062003135681152, + "loss_rtd": 0.1981760859489441, + "loss_sent": 0.281217485666275, + "loss_sod": 0.0636417493224144, + "loss_total": 0.5430353283882141, + "step": 313199 + }, + { + "epoch": 0.020398, + "loss_gen": 6.133944988250732, + "loss_rtd": 0.21455906331539154, + "loss_sent": 0.11391421407461166, + "loss_sod": 0.025152625516057014, + "loss_total": 0.3536258935928345, + "step": 313199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.8949501514434814, + "learning_rate": 7.3972001974105694e-06, + "loss": 0.4187, + "step": 313200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.584046840667725, + "loss_rtd": 0.21862752735614777, + "loss_sent": 0.26217788457870483, + "loss_sod": 0.035076115280389786, + "loss_total": 0.5158815383911133, + "step": 313299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.972877502441406, + "loss_rtd": 0.2138463407754898, + "loss_sent": 0.18063798546791077, + "loss_sod": 0.0426754429936409, + "loss_total": 0.43715977668762207, + "step": 313299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.3265968561172485, + "learning_rate": 7.380597983713155e-06, + "loss": 0.4306, + "step": 313300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.408375263214111, + "loss_rtd": 0.17621396481990814, + "loss_sent": 0.027988268062472343, + "loss_sod": 0.05240663141012192, + "loss_total": 0.25660884380340576, + "step": 313399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.836403846740723, + "loss_rtd": 0.22482101619243622, + "loss_sent": 0.4502793848514557, + "loss_sod": 0.013839355669915676, + "loss_total": 0.6889397501945496, + "step": 313399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.960966944694519, + "learning_rate": 7.36401293710578e-06, + "loss": 0.4351, + "step": 313400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.090680122375488, + "loss_rtd": 0.15573930740356445, + "loss_sent": 0.1062464490532875, + "loss_sod": 0.008934445679187775, + "loss_total": 0.27092018723487854, + "step": 313499 + }, + { + "epoch": 0.020998, + "loss_gen": 6.124439716339111, + "loss_rtd": 0.20087675750255585, + "loss_sent": 0.10363459587097168, + "loss_sod": 0.09466443955898285, + "loss_total": 0.3991757929325104, + "step": 313499 + }, + { + "epoch": 0.021, + "grad_norm": 0.8966503739356995, + "learning_rate": 7.347445064268898e-06, + "loss": 0.4205, + "step": 313500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.830630779266357, + "loss_rtd": 0.21341219544410706, + "loss_sent": 0.0895337164402008, + "loss_sod": 0.034656867384910583, + "loss_total": 0.33760279417037964, + "step": 313599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.908732891082764, + "loss_rtd": 0.19999216496944427, + "loss_sent": 0.3119899332523346, + "loss_sod": 0.023746121674776077, + "loss_total": 0.5357282161712646, + "step": 313599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.1671432256698608, + "learning_rate": 7.33089437187604e-06, + "loss": 0.4306, + "step": 313600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.955630779266357, + "loss_rtd": 0.21357406675815582, + "loss_sent": 0.3937452733516693, + "loss_sod": 0.03356746584177017, + "loss_total": 0.6408867835998535, + "step": 313699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.9988532066345215, + "loss_rtd": 0.2110321819782257, + "loss_sent": 0.10222094506025314, + "loss_sod": 0.1282128244638443, + "loss_total": 0.44146597385406494, + "step": 313699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.0259250402450562, + "learning_rate": 7.3143608665938225e-06, + "loss": 0.4255, + "step": 313700 + }, + { + "epoch": 0.021598, + "loss_gen": 6.168946266174316, + "loss_rtd": 0.20611238479614258, + "loss_sent": 0.07899408787488937, + "loss_sod": 0.05453554168343544, + "loss_total": 0.3396420180797577, + "step": 313799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.612627983093262, + "loss_rtd": 0.1827203631401062, + "loss_sent": 0.0346013680100441, + "loss_sod": 0.03594528138637543, + "loss_total": 0.2532670199871063, + "step": 313799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.996342122554779, + "learning_rate": 7.297844555081945e-06, + "loss": 0.4272, + "step": 313800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.412449836730957, + "loss_rtd": 0.19608855247497559, + "loss_sent": 0.1503404676914215, + "loss_sod": 0.0775788277387619, + "loss_total": 0.4240078628063202, + "step": 313899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.896376609802246, + "loss_rtd": 0.21017661690711975, + "loss_sent": 0.2769564688205719, + "loss_sod": 0.13673001527786255, + "loss_total": 0.6238631010055542, + "step": 313899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.518585205078125, + "learning_rate": 7.2813454439931875e-06, + "loss": 0.4045, + "step": 313900 + }, + { + "epoch": 0.021998, + "loss_gen": 6.018864154815674, + "loss_rtd": 0.2055513709783554, + "loss_sent": 0.2541314363479614, + "loss_sod": 0.09997926652431488, + "loss_total": 0.5596621036529541, + "step": 313999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.715604305267334, + "loss_rtd": 0.20882795751094818, + "loss_sent": 0.023550404235720634, + "loss_sod": 0.056766293942928314, + "loss_total": 0.2891446352005005, + "step": 313999 + }, + { + "epoch": 0.022, + "grad_norm": 1.240139126777649, + "learning_rate": 7.26486353997336e-06, + "loss": 0.4328, + "step": 314000 + }, + { + "epoch": 0.022, + "eval_loss": 0.40542072057724, + "eval_runtime": 152.7304, + "eval_samples_per_second": 101.113, + "eval_steps_per_second": 0.792, + "step": 314000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.8403120040893555, + "loss_rtd": 0.21521833539009094, + "loss_sent": 0.2516106367111206, + "loss_sod": 0.13746392726898193, + "loss_total": 0.6042928695678711, + "step": 314099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.720678329467773, + "loss_rtd": 0.21709758043289185, + "loss_sent": 0.4640616476535797, + "loss_sod": 0.006305079907178879, + "loss_total": 0.6874642968177795, + "step": 314099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.88277006149292, + "learning_rate": 7.248398849661392e-06, + "loss": 0.4231, + "step": 314100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.740084648132324, + "loss_rtd": 0.1869879513978958, + "loss_sent": 0.24680136144161224, + "loss_sod": 0.047100603580474854, + "loss_total": 0.4808899164199829, + "step": 314199 + }, + { + "epoch": 0.022398, + "loss_gen": 6.137388229370117, + "loss_rtd": 0.2184019237756729, + "loss_sent": 0.5415347814559937, + "loss_sod": 0.05859958007931709, + "loss_total": 0.8185362815856934, + "step": 314199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.4765161275863647, + "learning_rate": 7.2319513796892615e-06, + "loss": 0.4046, + "step": 314200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.72477388381958, + "loss_rtd": 0.22151228785514832, + "loss_sent": 0.10201455652713776, + "loss_sod": 0.08975923806428909, + "loss_total": 0.41328608989715576, + "step": 314299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.935103416442871, + "loss_rtd": 0.2225561887025833, + "loss_sent": 0.24215862154960632, + "loss_sod": 0.0822596549987793, + "loss_total": 0.5469744205474854, + "step": 314299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.2356349229812622, + "learning_rate": 7.215521136681996e-06, + "loss": 0.402, + "step": 314300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.923129081726074, + "loss_rtd": 0.2045292854309082, + "loss_sent": 0.35342252254486084, + "loss_sod": 0.03676004707813263, + "loss_total": 0.5947118401527405, + "step": 314399 + }, + { + "epoch": 0.022798, + "loss_gen": 6.300541877746582, + "loss_rtd": 0.22245635092258453, + "loss_sent": 0.11081698536872864, + "loss_sod": 0.10975705832242966, + "loss_total": 0.44303038716316223, + "step": 314399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.436285376548767, + "learning_rate": 7.199108127257692e-06, + "loss": 0.4262, + "step": 314400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.1642913818359375, + "loss_rtd": 0.18441419303417206, + "loss_sent": 2.651439353940077e-05, + "loss_sod": 0.1234402135014534, + "loss_total": 0.3078809380531311, + "step": 314499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.329806804656982, + "loss_rtd": 0.15288867056369781, + "loss_sent": 0.17453642189502716, + "loss_sod": 0.002712737303227186, + "loss_total": 0.33013784885406494, + "step": 314499 + }, + { + "epoch": 0.023, + "grad_norm": 1.0421608686447144, + "learning_rate": 7.182712358027521e-06, + "loss": 0.4233, + "step": 314500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.7112908363342285, + "loss_rtd": 0.20314361155033112, + "loss_sent": 0.11444418877363205, + "loss_sod": 0.07358378916978836, + "loss_total": 0.3911716043949127, + "step": 314599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.245554447174072, + "loss_rtd": 0.1752987504005432, + "loss_sent": 4.759164949064143e-05, + "loss_sod": 0.06419199705123901, + "loss_total": 0.23953834176063538, + "step": 314599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.0081534385681152, + "learning_rate": 7.166333835595707e-06, + "loss": 0.4377, + "step": 314600 + }, + { + "epoch": 0.023398, + "loss_gen": 6.1736836433410645, + "loss_rtd": 0.2087811827659607, + "loss_sent": 0.13610079884529114, + "loss_sod": 0.03175272047519684, + "loss_total": 0.37663471698760986, + "step": 314699 + }, + { + "epoch": 0.023398, + "loss_gen": 6.245881080627441, + "loss_rtd": 0.20825320482254028, + "loss_sent": 0.11125075817108154, + "loss_sod": 0.05730395019054413, + "loss_total": 0.37680792808532715, + "step": 314699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.6630667448043823, + "learning_rate": 7.149972566559482e-06, + "loss": 0.4169, + "step": 314700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.723850727081299, + "loss_rtd": 0.21545104682445526, + "loss_sent": 0.05776102840900421, + "loss_sod": 0.01691570319235325, + "loss_total": 0.29012778401374817, + "step": 314799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.832726001739502, + "loss_rtd": 0.20727503299713135, + "loss_sent": 0.10226765275001526, + "loss_sod": 0.023137206211686134, + "loss_total": 0.3326798975467682, + "step": 314799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.6317310333251953, + "learning_rate": 7.133628557509187e-06, + "loss": 0.4218, + "step": 314800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.593243598937988, + "loss_rtd": 0.17638765275478363, + "loss_sent": 0.010891798883676529, + "loss_sod": 0.05664808303117752, + "loss_total": 0.24392753839492798, + "step": 314899 + }, + { + "epoch": 0.023798, + "loss_gen": 6.372849464416504, + "loss_rtd": 0.2191629558801651, + "loss_sent": 0.10814128071069717, + "loss_sod": 0.0372735895216465, + "loss_total": 0.3645778298377991, + "step": 314899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.8626344799995422, + "learning_rate": 7.117301815028182e-06, + "loss": 0.4228, + "step": 314900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.699850082397461, + "loss_rtd": 0.21511022746562958, + "loss_sent": 0.09173356741666794, + "loss_sod": 0.0150165855884552, + "loss_total": 0.3218603730201721, + "step": 314999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.611486911773682, + "loss_rtd": 0.21274064481258392, + "loss_sent": 0.286617249250412, + "loss_sod": 0.01891474798321724, + "loss_total": 0.5182726383209229, + "step": 314999 + }, + { + "epoch": 0.024, + "grad_norm": 0.7114839553833008, + "learning_rate": 7.1009923456928915e-06, + "loss": 0.4359, + "step": 315000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4024796187877655, + "eval_runtime": 151.1801, + "eval_samples_per_second": 102.15, + "eval_steps_per_second": 0.8, + "step": 315000 + }, + { + "epoch": 0.024198, + "loss_gen": 6.079152584075928, + "loss_rtd": 0.22925634682178497, + "loss_sent": 0.23604364693164825, + "loss_sod": 0.01550333108752966, + "loss_total": 0.48080331087112427, + "step": 315099 + }, + { + "epoch": 0.024198, + "loss_gen": 6.009952545166016, + "loss_rtd": 0.21401935815811157, + "loss_sent": 0.11091934144496918, + "loss_sod": 0.15941095352172852, + "loss_total": 0.48434966802597046, + "step": 315099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.0999048948287964, + "learning_rate": 7.0847001560727375e-06, + "loss": 0.4386, + "step": 315100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.308565616607666, + "loss_rtd": 0.17562098801136017, + "loss_sent": 0.058068182319402695, + "loss_sod": 0.04143832251429558, + "loss_total": 0.27512750029563904, + "step": 315199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.552271366119385, + "loss_rtd": 0.21226602792739868, + "loss_sent": 0.08181682974100113, + "loss_sod": 0.0055959089659154415, + "loss_total": 0.299678772687912, + "step": 315199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.7959131002426147, + "learning_rate": 7.068425252730232e-06, + "loss": 0.4408, + "step": 315200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.913577556610107, + "loss_rtd": 0.20919254422187805, + "loss_sent": 0.2013363540172577, + "loss_sod": 0.10238775610923767, + "loss_total": 0.5129166841506958, + "step": 315299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.806041240692139, + "loss_rtd": 0.21975742280483246, + "loss_sent": 0.16951124370098114, + "loss_sod": 0.010074999183416367, + "loss_total": 0.39934366941452026, + "step": 315299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.209050178527832, + "learning_rate": 7.052167642220903e-06, + "loss": 0.4294, + "step": 315300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.845470428466797, + "loss_rtd": 0.20528078079223633, + "loss_sent": 0.4749945104122162, + "loss_sod": 0.012741737067699432, + "loss_total": 0.6930170059204102, + "step": 315399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.882688045501709, + "loss_rtd": 0.24752117693424225, + "loss_sent": 0.3105274438858032, + "loss_sod": 0.018023159354925156, + "loss_total": 0.5760717391967773, + "step": 315399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.6325089931488037, + "learning_rate": 7.035927331093317e-06, + "loss": 0.4296, + "step": 315400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.894001483917236, + "loss_rtd": 0.23202523589134216, + "loss_sent": 0.22358691692352295, + "loss_sod": 0.04244053363800049, + "loss_total": 0.4980526864528656, + "step": 315499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.756228923797607, + "loss_rtd": 0.21856294572353363, + "loss_sent": 0.25014421343803406, + "loss_sod": 0.058342427015304565, + "loss_total": 0.5270495414733887, + "step": 315499 + }, + { + "epoch": 0.025, + "grad_norm": 1.3457165956497192, + "learning_rate": 7.0197043258890596e-06, + "loss": 0.421, + "step": 315500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.60030460357666, + "loss_rtd": 0.18207070231437683, + "loss_sent": 0.0326513908803463, + "loss_sod": 0.05149613320827484, + "loss_total": 0.2662182152271271, + "step": 315599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.878728866577148, + "loss_rtd": 0.2024921476840973, + "loss_sent": 0.13212017714977264, + "loss_sod": 0.014641729183495045, + "loss_total": 0.3492540717124939, + "step": 315599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.8276498317718506, + "learning_rate": 7.003498633142752e-06, + "loss": 0.4289, + "step": 315600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.8293938636779785, + "loss_rtd": 0.22322751581668854, + "loss_sent": 0.25092437863349915, + "loss_sod": 0.009997747838497162, + "loss_total": 0.48414963483810425, + "step": 315699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.895769119262695, + "loss_rtd": 0.21014940738677979, + "loss_sent": 0.07953742891550064, + "loss_sod": 0.048508595675230026, + "loss_total": 0.33819541335105896, + "step": 315699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.4700539112091064, + "learning_rate": 6.987310259382057e-06, + "loss": 0.4307, + "step": 315700 + }, + { + "epoch": 0.025598, + "loss_gen": 6.073229789733887, + "loss_rtd": 0.2322186529636383, + "loss_sent": 0.22029481828212738, + "loss_sod": 0.025288663804531097, + "loss_total": 0.4778021574020386, + "step": 315799 + }, + { + "epoch": 0.025598, + "loss_gen": 6.111647129058838, + "loss_rtd": 0.21665161848068237, + "loss_sent": 0.2351970076560974, + "loss_sod": 0.08440050482749939, + "loss_total": 0.5362491607666016, + "step": 315799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.3426125049591064, + "learning_rate": 6.971139211127659e-06, + "loss": 0.4119, + "step": 315800 + }, + { + "epoch": 0.025798, + "loss_gen": 6.150747776031494, + "loss_rtd": 0.2225160300731659, + "loss_sent": 0.22520409524440765, + "loss_sod": 0.02255828306078911, + "loss_total": 0.47027841210365295, + "step": 315899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.702725887298584, + "loss_rtd": 0.19524094462394714, + "loss_sent": 0.3580003082752228, + "loss_sod": 0.054254043847322464, + "loss_total": 0.6074953079223633, + "step": 315899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.545710563659668, + "learning_rate": 6.954985494893229e-06, + "loss": 0.4348, + "step": 315900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.883828163146973, + "loss_rtd": 0.2226816862821579, + "loss_sent": 0.10587580502033234, + "loss_sod": 0.01885738968849182, + "loss_total": 0.34741488099098206, + "step": 315999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.713308334350586, + "loss_rtd": 0.19941580295562744, + "loss_sent": 0.03474288806319237, + "loss_sod": 0.08108684420585632, + "loss_total": 0.31524553894996643, + "step": 315999 + }, + { + "epoch": 0.026, + "grad_norm": 0.74559485912323, + "learning_rate": 6.938849117185492e-06, + "loss": 0.4246, + "step": 316000 + }, + { + "epoch": 0.026, + "eval_loss": 0.40634486079216003, + "eval_runtime": 151.2059, + "eval_samples_per_second": 102.132, + "eval_steps_per_second": 0.8, + "step": 316000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.952905178070068, + "loss_rtd": 0.232254758477211, + "loss_sent": 0.07545031607151031, + "loss_sod": 0.003857793053612113, + "loss_total": 0.3115628659725189, + "step": 316099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.720460891723633, + "loss_rtd": 0.20521163940429688, + "loss_sent": 0.2356717437505722, + "loss_sod": 0.0021521239541471004, + "loss_total": 0.4430355131626129, + "step": 316099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.8450875282287598, + "learning_rate": 6.922730084504192e-06, + "loss": 0.4247, + "step": 316100 + }, + { + "epoch": 0.026398, + "loss_gen": 6.488436222076416, + "loss_rtd": 0.20710326731204987, + "loss_sent": 0.020241430029273033, + "loss_sod": 0.11870314925909042, + "loss_total": 0.34604784846305847, + "step": 316199 + }, + { + "epoch": 0.026398, + "loss_gen": 6.002236843109131, + "loss_rtd": 0.23638859391212463, + "loss_sent": 0.09361567348241806, + "loss_sod": 0.05375950038433075, + "loss_total": 0.38376376032829285, + "step": 316199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.783872127532959, + "learning_rate": 6.906628403342052e-06, + "loss": 0.4241, + "step": 316200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.753622055053711, + "loss_rtd": 0.17708782851696014, + "loss_sent": 0.1306205838918686, + "loss_sod": 0.025192318484187126, + "loss_total": 0.332900732755661, + "step": 316299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.759315490722656, + "loss_rtd": 0.20716163516044617, + "loss_sent": 0.34905558824539185, + "loss_sod": 0.008463521488010883, + "loss_total": 0.5646807551383972, + "step": 316299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.2977489233016968, + "learning_rate": 6.8905440801848405e-06, + "loss": 0.4236, + "step": 316300 + }, + { + "epoch": 0.026798, + "loss_gen": 6.12885856628418, + "loss_rtd": 0.21159934997558594, + "loss_sent": 0.322174996137619, + "loss_sod": 0.023119572550058365, + "loss_total": 0.5568939447402954, + "step": 316399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.945991039276123, + "loss_rtd": 0.20298978686332703, + "loss_sent": 0.1152675673365593, + "loss_sod": 0.0343075655400753, + "loss_total": 0.3525649309158325, + "step": 316399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.7906752824783325, + "learning_rate": 6.8744771215113135e-06, + "loss": 0.4062, + "step": 316400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.150700092315674, + "loss_rtd": 0.16034561395645142, + "loss_sent": 0.038422808051109314, + "loss_sod": 0.05374298244714737, + "loss_total": 0.2525113821029663, + "step": 316499 + }, + { + "epoch": 0.026998, + "loss_gen": 6.009461402893066, + "loss_rtd": 0.21647918224334717, + "loss_sent": 0.1017431765794754, + "loss_sod": 0.01856192946434021, + "loss_total": 0.336784303188324, + "step": 316499 + }, + { + "epoch": 0.027, + "grad_norm": 0.8859041929244995, + "learning_rate": 6.85842753379326e-06, + "loss": 0.4262, + "step": 316500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.820038318634033, + "loss_rtd": 0.2261316031217575, + "loss_sent": 0.29477351903915405, + "loss_sod": 0.03190707787871361, + "loss_total": 0.5528122186660767, + "step": 316599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.3127641677856445, + "loss_rtd": 0.17756026983261108, + "loss_sent": 0.002590323332697153, + "loss_sod": 0.05262359604239464, + "loss_total": 0.23277419805526733, + "step": 316599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.209650993347168, + "learning_rate": 6.842395323495426e-06, + "loss": 0.4207, + "step": 316600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.927210807800293, + "loss_rtd": 0.21501344442367554, + "loss_sent": 0.24600785970687866, + "loss_sod": 0.011339105665683746, + "loss_total": 0.47236040234565735, + "step": 316699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.2444844245910645, + "loss_rtd": 0.16733349859714508, + "loss_sent": 0.005530119873583317, + "loss_sod": 0.12684503197669983, + "loss_total": 0.29970866441726685, + "step": 316699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.082572340965271, + "learning_rate": 6.82638049707559e-06, + "loss": 0.4273, + "step": 316700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.0740885734558105, + "loss_rtd": 0.2189008593559265, + "loss_sent": 0.14137573540210724, + "loss_sod": 0.06856880336999893, + "loss_total": 0.4288454055786133, + "step": 316799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.498562812805176, + "loss_rtd": 0.18266771733760834, + "loss_sent": 0.053660523146390915, + "loss_sod": 0.1443597674369812, + "loss_total": 0.38068801164627075, + "step": 316799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.530004620552063, + "learning_rate": 6.810383060984527e-06, + "loss": 0.4201, + "step": 316800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.307231903076172, + "loss_rtd": 0.17917531728744507, + "loss_sent": 2.47952248173533e-05, + "loss_sod": 0.04512486606836319, + "loss_total": 0.22432497143745422, + "step": 316899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.204262733459473, + "loss_rtd": 0.16439220309257507, + "loss_sent": 0.0013377791037783027, + "loss_sod": 0.09129650890827179, + "loss_total": 0.2570264935493469, + "step": 316899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.849273681640625, + "learning_rate": 6.794403021666018e-06, + "loss": 0.4177, + "step": 316900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.209410667419434, + "loss_rtd": 0.17891815304756165, + "loss_sent": 2.555143873905763e-05, + "loss_sod": 0.026099855080246925, + "loss_total": 0.20504356920719147, + "step": 316999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.612294673919678, + "loss_rtd": 0.19472810626029968, + "loss_sent": 0.295123428106308, + "loss_sod": 0.029760317876935005, + "loss_total": 0.5196118354797363, + "step": 316999 + }, + { + "epoch": 0.028, + "grad_norm": 1.2646737098693848, + "learning_rate": 6.778440385556789e-06, + "loss": 0.4153, + "step": 317000 + }, + { + "epoch": 0.028, + "eval_loss": 0.40046969056129456, + "eval_runtime": 151.353, + "eval_samples_per_second": 102.033, + "eval_steps_per_second": 0.799, + "step": 317000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.807673931121826, + "loss_rtd": 0.1759045273065567, + "loss_sent": 0.08461452275514603, + "loss_sod": 0.018785716965794563, + "loss_total": 0.27930477261543274, + "step": 317099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.419627666473389, + "loss_rtd": 0.1807471215724945, + "loss_sent": 0.03169324994087219, + "loss_sod": 0.060750093311071396, + "loss_total": 0.2731904685497284, + "step": 317099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.9224417209625244, + "learning_rate": 6.762495159086607e-06, + "loss": 0.4238, + "step": 317100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.9013824462890625, + "loss_rtd": 0.21823909878730774, + "loss_sent": 0.3828633725643158, + "loss_sod": 0.01689605787396431, + "loss_total": 0.6179985404014587, + "step": 317199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.776583194732666, + "loss_rtd": 0.22855596244335175, + "loss_sent": 0.18834030628204346, + "loss_sod": 0.00982157327234745, + "loss_total": 0.4267178475856781, + "step": 317199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.315647840499878, + "learning_rate": 6.746567348678201e-06, + "loss": 0.4299, + "step": 317200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.961749076843262, + "loss_rtd": 0.19732806086540222, + "loss_sent": 0.3418771028518677, + "loss_sod": 0.10058655589818954, + "loss_total": 0.63979172706604, + "step": 317299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.753267288208008, + "loss_rtd": 0.18467769026756287, + "loss_sent": 0.042127400636672974, + "loss_sod": 0.04599412903189659, + "loss_total": 0.27279922366142273, + "step": 317299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.9673850536346436, + "learning_rate": 6.730656960747311e-06, + "loss": 0.4252, + "step": 317300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.565941333770752, + "loss_rtd": 0.1751120239496231, + "loss_sent": 0.015778986737132072, + "loss_sod": 0.1042807325720787, + "loss_total": 0.29517173767089844, + "step": 317399 + }, + { + "epoch": 0.028798, + "loss_gen": 6.044151306152344, + "loss_rtd": 0.20001552999019623, + "loss_sent": 0.08831530064344406, + "loss_sod": 0.09159000962972641, + "loss_total": 0.3799208402633667, + "step": 317399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.2267037630081177, + "learning_rate": 6.714764001702606e-06, + "loss": 0.411, + "step": 317400 + }, + { + "epoch": 0.028998, + "loss_gen": 6.121268272399902, + "loss_rtd": 0.22611607611179352, + "loss_sent": 0.4467504322528839, + "loss_sod": 0.05430105701088905, + "loss_total": 0.7271676063537598, + "step": 317499 + }, + { + "epoch": 0.028998, + "loss_gen": 6.016317367553711, + "loss_rtd": 0.21046489477157593, + "loss_sent": 0.27657005190849304, + "loss_sod": 0.04403619468212128, + "loss_total": 0.5310711860656738, + "step": 317499 + }, + { + "epoch": 0.029, + "grad_norm": 0.9841665029525757, + "learning_rate": 6.698888477945786e-06, + "loss": 0.4266, + "step": 317500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.926337242126465, + "loss_rtd": 0.19186662137508392, + "loss_sent": 0.07662045955657959, + "loss_sod": 0.12023736536502838, + "loss_total": 0.3887244462966919, + "step": 317599 + }, + { + "epoch": 0.029198, + "loss_gen": 6.068818092346191, + "loss_rtd": 0.19683921337127686, + "loss_sent": 0.2143409103155136, + "loss_sod": 0.05517958477139473, + "loss_total": 0.4663597345352173, + "step": 317599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.3692232370376587, + "learning_rate": 6.683030395871525e-06, + "loss": 0.4206, + "step": 317600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.710707664489746, + "loss_rtd": 0.21629835665225983, + "loss_sent": 0.09587308019399643, + "loss_sod": 0.007617895491421223, + "loss_total": 0.319789320230484, + "step": 317699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.954129219055176, + "loss_rtd": 0.22051052749156952, + "loss_sent": 0.08118244260549545, + "loss_sod": 0.03730803355574608, + "loss_total": 0.33900099992752075, + "step": 317699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.6702138781547546, + "learning_rate": 6.667189761867426e-06, + "loss": 0.4137, + "step": 317700 + }, + { + "epoch": 0.029598, + "loss_gen": 6.153414726257324, + "loss_rtd": 0.21158844232559204, + "loss_sent": 0.2731311023235321, + "loss_sod": 0.054120589047670364, + "loss_total": 0.5388401746749878, + "step": 317799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.767001152038574, + "loss_rtd": 0.21963752806186676, + "loss_sent": 0.056638315320014954, + "loss_sod": 0.060463808476924896, + "loss_total": 0.3367396593093872, + "step": 317799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.2742230892181396, + "learning_rate": 6.651366582314106e-06, + "loss": 0.4201, + "step": 317800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.668087959289551, + "loss_rtd": 0.19701330363750458, + "loss_sent": 0.3478797674179077, + "loss_sod": 0.044805239886045456, + "loss_total": 0.589698314666748, + "step": 317899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.5290913581848145, + "loss_rtd": 0.20819711685180664, + "loss_sent": 0.05382475256919861, + "loss_sod": 0.0117452098056674, + "loss_total": 0.2737670838832855, + "step": 317899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.291848063468933, + "learning_rate": 6.635560863585144e-06, + "loss": 0.4284, + "step": 317900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.629080772399902, + "loss_rtd": 0.22029152512550354, + "loss_sent": 0.38260847330093384, + "loss_sod": 0.006366434041410685, + "loss_total": 0.6092664003372192, + "step": 317999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.788712501525879, + "loss_rtd": 0.19587451219558716, + "loss_sent": 0.021938376128673553, + "loss_sod": 0.1297975331544876, + "loss_total": 0.3476104140281677, + "step": 317999 + }, + { + "epoch": 0.03, + "grad_norm": 2.7727575302124023, + "learning_rate": 6.619772612047092e-06, + "loss": 0.4329, + "step": 318000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4041973650455475, + "eval_runtime": 151.401, + "eval_samples_per_second": 102.001, + "eval_steps_per_second": 0.799, + "step": 318000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.844808101654053, + "loss_rtd": 0.23298917710781097, + "loss_sent": 0.13160644471645355, + "loss_sod": 0.06651133298873901, + "loss_total": 0.4311069846153259, + "step": 318099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.991744041442871, + "loss_rtd": 0.20404934883117676, + "loss_sent": 0.16844616830348969, + "loss_sod": 0.07261691242456436, + "loss_total": 0.445112407207489, + "step": 318099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.9993682503700256, + "learning_rate": 6.6040018340594315e-06, + "loss": 0.4178, + "step": 318100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.731882572174072, + "loss_rtd": 0.19733524322509766, + "loss_sent": 0.14549008011817932, + "loss_sod": 0.08695275336503983, + "loss_total": 0.4297780692577362, + "step": 318199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.993220329284668, + "loss_rtd": 0.20111925899982452, + "loss_sent": 0.29472586512565613, + "loss_sod": 0.016072260215878487, + "loss_total": 0.5119173526763916, + "step": 318199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.168389081954956, + "learning_rate": 6.588248535974645e-06, + "loss": 0.4434, + "step": 318200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.904257774353027, + "loss_rtd": 0.22033774852752686, + "loss_sent": 0.5427066683769226, + "loss_sod": 0.06353654712438583, + "loss_total": 0.8265810012817383, + "step": 318299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.867843151092529, + "loss_rtd": 0.18177077174186707, + "loss_sent": 0.05589713528752327, + "loss_sod": 0.11904123425483704, + "loss_total": 0.35670915246009827, + "step": 318299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.657719612121582, + "learning_rate": 6.572512724138158e-06, + "loss": 0.4178, + "step": 318300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.974632740020752, + "loss_rtd": 0.23202939331531525, + "loss_sent": 0.09834026545286179, + "loss_sod": 0.07482287287712097, + "loss_total": 0.4051925539970398, + "step": 318399 + }, + { + "epoch": 0.030798, + "loss_gen": 6.390172481536865, + "loss_rtd": 0.2133445143699646, + "loss_sent": 0.3928605914115906, + "loss_sod": 0.06184985488653183, + "loss_total": 0.6680549383163452, + "step": 318399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.7911866903305054, + "learning_rate": 6.5567944048883615e-06, + "loss": 0.4304, + "step": 318400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.871227264404297, + "loss_rtd": 0.2086278647184372, + "loss_sent": 0.3483510911464691, + "loss_sod": 0.011658594943583012, + "loss_total": 0.5686375498771667, + "step": 318499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.808342933654785, + "loss_rtd": 0.20525531470775604, + "loss_sent": 0.16486814618110657, + "loss_sod": 0.04720059782266617, + "loss_total": 0.4173240661621094, + "step": 318499 + }, + { + "epoch": 0.031, + "grad_norm": 1.1544314622879028, + "learning_rate": 6.541093584556574e-06, + "loss": 0.4256, + "step": 318500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.843674659729004, + "loss_rtd": 0.227028489112854, + "loss_sent": 0.3395182490348816, + "loss_sod": 0.03323065862059593, + "loss_total": 0.5997774004936218, + "step": 318599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.810780048370361, + "loss_rtd": 0.2051435261964798, + "loss_sent": 0.17964157462120056, + "loss_sod": 0.039741553366184235, + "loss_total": 0.4245266616344452, + "step": 318599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.5714060068130493, + "learning_rate": 6.525410269467091e-06, + "loss": 0.4142, + "step": 318600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.445639610290527, + "loss_rtd": 0.186831995844841, + "loss_sent": 0.0022034335415810347, + "loss_sod": 0.047516778111457825, + "loss_total": 0.23655220866203308, + "step": 318699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.501916885375977, + "loss_rtd": 0.18114088475704193, + "loss_sent": 0.12337516248226166, + "loss_sod": 0.0120387626811862, + "loss_total": 0.31655481457710266, + "step": 318699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.711101233959198, + "learning_rate": 6.509744465937151e-06, + "loss": 0.4182, + "step": 318700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.846083641052246, + "loss_rtd": 0.21401764452457428, + "loss_sent": 0.2561408281326294, + "loss_sod": 0.03093666210770607, + "loss_total": 0.5010951161384583, + "step": 318799 + }, + { + "epoch": 0.031598, + "loss_gen": 6.083571434020996, + "loss_rtd": 0.22676680982112885, + "loss_sent": 0.37605637311935425, + "loss_sod": 0.10717137902975082, + "loss_total": 0.7099945545196533, + "step": 318799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.3865742683410645, + "learning_rate": 6.494096180276954e-06, + "loss": 0.42, + "step": 318800 + }, + { + "epoch": 0.031798, + "loss_gen": 6.157656669616699, + "loss_rtd": 0.21140675246715546, + "loss_sent": 0.152816504240036, + "loss_sod": 0.07600642740726471, + "loss_total": 0.4402296841144562, + "step": 318899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.833266258239746, + "loss_rtd": 0.20781660079956055, + "loss_sent": 0.17311154305934906, + "loss_sod": 0.03738585114479065, + "loss_total": 0.41831398010253906, + "step": 318899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.2343511581420898, + "learning_rate": 6.478465418789598e-06, + "loss": 0.4118, + "step": 318900 + }, + { + "epoch": 0.031998, + "loss_gen": 6.131964206695557, + "loss_rtd": 0.2100180983543396, + "loss_sent": 0.2400071918964386, + "loss_sod": 0.04273661971092224, + "loss_total": 0.49276190996170044, + "step": 318999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.553122520446777, + "loss_rtd": 0.19109469652175903, + "loss_sent": 0.017574027180671692, + "loss_sod": 0.10851524770259857, + "loss_total": 0.3171839714050293, + "step": 318999 + }, + { + "epoch": 0.032, + "grad_norm": 0.9409855008125305, + "learning_rate": 6.4628521877711675e-06, + "loss": 0.4235, + "step": 319000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4110987186431885, + "eval_runtime": 151.2179, + "eval_samples_per_second": 102.124, + "eval_steps_per_second": 0.8, + "step": 319000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.976017475128174, + "loss_rtd": 0.19671468436717987, + "loss_sent": 0.5756129622459412, + "loss_sod": 0.05595851689577103, + "loss_total": 0.8282861709594727, + "step": 319099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.68550443649292, + "loss_rtd": 0.1934850513935089, + "loss_sent": 0.25003373622894287, + "loss_sod": 0.005886332131922245, + "loss_total": 0.44940513372421265, + "step": 319099 + }, + { + "epoch": 0.0002, + "grad_norm": 2.3204917907714844, + "learning_rate": 6.4472564935106835e-06, + "loss": 0.4239, + "step": 319100 + }, + { + "epoch": 0.000398, + "loss_gen": 6.178202152252197, + "loss_rtd": 0.23375770449638367, + "loss_sent": 0.39146536588668823, + "loss_sod": 0.03721356764435768, + "loss_total": 0.6624366044998169, + "step": 319199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.941952705383301, + "loss_rtd": 0.21831358969211578, + "loss_sent": 0.3978358209133148, + "loss_sod": 0.050270840525627136, + "loss_total": 0.6664202213287354, + "step": 319199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.753470778465271, + "learning_rate": 6.431678342290065e-06, + "loss": 0.4417, + "step": 319200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.796645641326904, + "loss_rtd": 0.21562138199806213, + "loss_sent": 0.1595989614725113, + "loss_sod": 0.04095842316746712, + "loss_total": 0.41617876291275024, + "step": 319299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.814482688903809, + "loss_rtd": 0.17968492209911346, + "loss_sent": 0.20665135979652405, + "loss_sod": 0.024276655167341232, + "loss_total": 0.41061294078826904, + "step": 319299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.8900070190429688, + "learning_rate": 6.416117740384198e-06, + "loss": 0.4323, + "step": 319300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.882384777069092, + "loss_rtd": 0.20576916635036469, + "loss_sent": 0.4200195074081421, + "loss_sod": 0.025177771225571632, + "loss_total": 0.650966465473175, + "step": 319399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.954499244689941, + "loss_rtd": 0.195393368601799, + "loss_sent": 0.045674994587898254, + "loss_sod": 0.035737112164497375, + "loss_total": 0.27680546045303345, + "step": 319399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.8523480892181396, + "learning_rate": 6.40057469406089e-06, + "loss": 0.4033, + "step": 319400 + }, + { + "epoch": 0.000998, + "loss_gen": 6.113741874694824, + "loss_rtd": 0.2027757465839386, + "loss_sent": 0.7217386364936829, + "loss_sod": 0.02413523755967617, + "loss_total": 0.9486496448516846, + "step": 319499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.816281795501709, + "loss_rtd": 0.21378113329410553, + "loss_sent": 0.09893307834863663, + "loss_sod": 0.006139421835541725, + "loss_total": 0.3188536465167999, + "step": 319499 + }, + { + "epoch": 0.001, + "grad_norm": 3.013335704803467, + "learning_rate": 6.385049209580896e-06, + "loss": 0.4202, + "step": 319500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.436474800109863, + "loss_rtd": 0.148299902677536, + "loss_sent": 0.00681539298966527, + "loss_sod": 0.07231418043375015, + "loss_total": 0.22742946445941925, + "step": 319599 + }, + { + "epoch": 0.001198, + "loss_gen": 6.113706111907959, + "loss_rtd": 0.22431142628192902, + "loss_sent": 0.1506250947713852, + "loss_sod": 0.011137381196022034, + "loss_total": 0.38607388734817505, + "step": 319599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.9200167059898376, + "learning_rate": 6.36954129319785e-06, + "loss": 0.4085, + "step": 319600 + }, + { + "epoch": 0.001398, + "loss_gen": 6.074091911315918, + "loss_rtd": 0.21150025725364685, + "loss_sent": 0.22251741588115692, + "loss_sod": 0.03574107587337494, + "loss_total": 0.4697587490081787, + "step": 319699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.260921001434326, + "loss_rtd": 0.20089313387870789, + "loss_sent": 0.00206866767257452, + "loss_sod": 0.16894492506980896, + "loss_total": 0.37190672755241394, + "step": 319699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.1115680932998657, + "learning_rate": 6.354050951158358e-06, + "loss": 0.4054, + "step": 319700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.841836452484131, + "loss_rtd": 0.21303756535053253, + "loss_sent": 0.1961507946252823, + "loss_sod": 0.012042567133903503, + "loss_total": 0.42123091220855713, + "step": 319799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.830868721008301, + "loss_rtd": 0.21649155020713806, + "loss_sent": 0.34402939677238464, + "loss_sod": 0.04911269620060921, + "loss_total": 0.6096336841583252, + "step": 319799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.6048423051834106, + "learning_rate": 6.338578189701921e-06, + "loss": 0.4472, + "step": 319800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.899473667144775, + "loss_rtd": 0.22005142271518707, + "loss_sent": 0.27338936924934387, + "loss_sod": 0.006652672775089741, + "loss_total": 0.5000934600830078, + "step": 319899 + }, + { + "epoch": 0.001798, + "loss_gen": 6.022148132324219, + "loss_rtd": 0.2090901881456375, + "loss_sent": 0.13876378536224365, + "loss_sod": 0.08535449951887131, + "loss_total": 0.4332084655761719, + "step": 319899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.0410821437835693, + "learning_rate": 6.323123015060978e-06, + "loss": 0.4223, + "step": 319900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.855429649353027, + "loss_rtd": 0.21403956413269043, + "loss_sent": 0.27373263239860535, + "loss_sod": 0.007027428597211838, + "loss_total": 0.4947996139526367, + "step": 319999 + }, + { + "epoch": 0.001998, + "loss_gen": 6.12824010848999, + "loss_rtd": 0.21014724671840668, + "loss_sent": 0.16254407167434692, + "loss_sod": 0.05450683832168579, + "loss_total": 0.4271981716156006, + "step": 319999 + }, + { + "epoch": 0.002, + "grad_norm": 0.716411828994751, + "learning_rate": 6.307685433460853e-06, + "loss": 0.444, + "step": 320000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4017051160335541, + "eval_runtime": 154.2258, + "eval_samples_per_second": 100.132, + "eval_steps_per_second": 0.785, + "step": 320000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.743134021759033, + "loss_rtd": 0.2047204226255417, + "loss_sent": 0.19227875769138336, + "loss_sod": 0.0972495898604393, + "loss_total": 0.49424874782562256, + "step": 320099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.994846820831299, + "loss_rtd": 0.21796832978725433, + "loss_sent": 0.6728981137275696, + "loss_sod": 0.03634445369243622, + "loss_total": 0.9272109270095825, + "step": 320099 + }, + { + "epoch": 0.0022, + "grad_norm": 2.0933148860931396, + "learning_rate": 6.292265451119805e-06, + "loss": 0.4335, + "step": 320100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.971792697906494, + "loss_rtd": 0.21235860884189606, + "loss_sent": 0.22334183752536774, + "loss_sod": 0.04061507433652878, + "loss_total": 0.4763154983520508, + "step": 320199 + }, + { + "epoch": 0.002398, + "loss_gen": 6.040041923522949, + "loss_rtd": 0.2328938990831375, + "loss_sent": 0.44883209466934204, + "loss_sod": 0.09541422128677368, + "loss_total": 0.777140200138092, + "step": 320199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.631795883178711, + "learning_rate": 6.276863074249012e-06, + "loss": 0.4122, + "step": 320200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.901726722717285, + "loss_rtd": 0.22118978202342987, + "loss_sent": 0.10289447009563446, + "loss_sod": 0.03695704787969589, + "loss_total": 0.3610413074493408, + "step": 320299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.704709529876709, + "loss_rtd": 0.2171352654695511, + "loss_sent": 0.07313850522041321, + "loss_sod": 0.00678838649764657, + "loss_total": 0.2970621585845947, + "step": 320299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.0979467630386353, + "learning_rate": 6.261478309052554e-06, + "loss": 0.4334, + "step": 320300 + }, + { + "epoch": 0.002798, + "loss_gen": 6.131543159484863, + "loss_rtd": 0.21549084782600403, + "loss_sent": 0.2658088207244873, + "loss_sod": 0.013012303039431572, + "loss_total": 0.49431198835372925, + "step": 320399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.947504043579102, + "loss_rtd": 0.2135101556777954, + "loss_sent": 0.24038216471672058, + "loss_sod": 0.03134508430957794, + "loss_total": 0.4852374196052551, + "step": 320399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.5543198585510254, + "learning_rate": 6.246111161727392e-06, + "loss": 0.4344, + "step": 320400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.960519313812256, + "loss_rtd": 0.2095993161201477, + "loss_sent": 0.17482279241085052, + "loss_sod": 0.10335630178451538, + "loss_total": 0.4877784252166748, + "step": 320499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.681966781616211, + "loss_rtd": 0.18359705805778503, + "loss_sent": 0.19377291202545166, + "loss_sod": 0.05301167815923691, + "loss_total": 0.4303816556930542, + "step": 320499 + }, + { + "epoch": 0.003, + "grad_norm": 1.1342074871063232, + "learning_rate": 6.230761638463417e-06, + "loss": 0.4281, + "step": 320500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.191170692443848, + "loss_rtd": 0.18369318544864655, + "loss_sent": 0.012727886438369751, + "loss_sod": 0.052066899836063385, + "loss_total": 0.24848797917366028, + "step": 320599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.531480312347412, + "loss_rtd": 0.1919640451669693, + "loss_sent": 0.10939568281173706, + "loss_sod": 0.07849612832069397, + "loss_total": 0.3798558712005615, + "step": 320599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.7807751297950745, + "learning_rate": 6.215429745443435e-06, + "loss": 0.4268, + "step": 320600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.721339702606201, + "loss_rtd": 0.21336989104747772, + "loss_sent": 0.21239691972732544, + "loss_sod": 0.023153558373451233, + "loss_total": 0.4489203691482544, + "step": 320699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.7912211418151855, + "loss_rtd": 0.2425805926322937, + "loss_sent": 0.06593421846628189, + "loss_sod": 0.024264683946967125, + "loss_total": 0.33277949690818787, + "step": 320699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.6599763035774231, + "learning_rate": 6.200115488843106e-06, + "loss": 0.4153, + "step": 320700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.787447929382324, + "loss_rtd": 0.20720696449279785, + "loss_sent": 0.08620306849479675, + "loss_sod": 0.039434611797332764, + "loss_total": 0.33284464478492737, + "step": 320799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.430987358093262, + "loss_rtd": 0.16499969363212585, + "loss_sent": 0.07435581833124161, + "loss_sod": 0.060972969979047775, + "loss_total": 0.30032849311828613, + "step": 320799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.8227694630622864, + "learning_rate": 6.1848188748310275e-06, + "loss": 0.4091, + "step": 320800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.704413414001465, + "loss_rtd": 0.22211644053459167, + "loss_sent": 0.2629146873950958, + "loss_sod": 0.0040412116795778275, + "loss_total": 0.489072322845459, + "step": 320899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.9420084953308105, + "loss_rtd": 0.20459522306919098, + "loss_sent": 0.1701173484325409, + "loss_sod": 0.019897334277629852, + "loss_total": 0.39460989832878113, + "step": 320899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.146616816520691, + "learning_rate": 6.169539909568655e-06, + "loss": 0.4191, + "step": 320900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.657702922821045, + "loss_rtd": 0.20114222168922424, + "loss_sent": 0.1660340279340744, + "loss_sod": 0.028680017217993736, + "loss_total": 0.39585626125335693, + "step": 320999 + }, + { + "epoch": 0.003998, + "loss_gen": 6.145058631896973, + "loss_rtd": 0.21122728288173676, + "loss_sent": 0.275448203086853, + "loss_sod": 0.007465800270438194, + "loss_total": 0.49414128065109253, + "step": 320999 + }, + { + "epoch": 0.004, + "grad_norm": 1.9879456758499146, + "learning_rate": 6.154278599210367e-06, + "loss": 0.4305, + "step": 321000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4108101725578308, + "eval_runtime": 150.8562, + "eval_samples_per_second": 102.369, + "eval_steps_per_second": 0.802, + "step": 321000 + }, + { + "epoch": 0.004198, + "loss_gen": 6.024953365325928, + "loss_rtd": 0.21365736424922943, + "loss_sent": 0.21364599466323853, + "loss_sod": 0.059438835829496384, + "loss_total": 0.48674219846725464, + "step": 321099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.437925338745117, + "loss_rtd": 0.2158796191215515, + "loss_sent": 0.2654319405555725, + "loss_sod": 0.040151964873075485, + "loss_total": 0.5214635133743286, + "step": 321099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.0901381969451904, + "learning_rate": 6.139034949903427e-06, + "loss": 0.4273, + "step": 321100 + }, + { + "epoch": 0.004398, + "loss_gen": 6.391094207763672, + "loss_rtd": 0.21198046207427979, + "loss_sent": 0.0628882646560669, + "loss_sod": 0.18223553895950317, + "loss_total": 0.45710426568984985, + "step": 321199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.735109806060791, + "loss_rtd": 0.2082480639219284, + "loss_sent": 0.11475709080696106, + "loss_sod": 0.02624213509261608, + "loss_total": 0.3492472767829895, + "step": 321199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.1446348428726196, + "learning_rate": 6.123808967787953e-06, + "loss": 0.415, + "step": 321200 + }, + { + "epoch": 0.004598, + "loss_gen": 6.019451141357422, + "loss_rtd": 0.21636879444122314, + "loss_sent": 0.5439419746398926, + "loss_sod": 0.027772653847932816, + "loss_total": 0.7880834341049194, + "step": 321299 + }, + { + "epoch": 0.004598, + "loss_gen": 6.030162334442139, + "loss_rtd": 0.22694973647594452, + "loss_sent": 0.13330230116844177, + "loss_sod": 0.05584421008825302, + "loss_total": 0.4160962402820587, + "step": 321299 + }, + { + "epoch": 0.0046, + "grad_norm": 2.6562745571136475, + "learning_rate": 6.108600658996977e-06, + "loss": 0.4349, + "step": 321300 + }, + { + "epoch": 0.004798, + "loss_gen": 6.245081424713135, + "loss_rtd": 0.21182465553283691, + "loss_sent": 0.13359057903289795, + "loss_sod": 0.08868192136287689, + "loss_total": 0.43409717082977295, + "step": 321399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.9391865730285645, + "loss_rtd": 0.22120217978954315, + "loss_sent": 0.06328216940164566, + "loss_sod": 0.05518423765897751, + "loss_total": 0.3396685719490051, + "step": 321399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.2882983684539795, + "learning_rate": 6.09341002965641e-06, + "loss": 0.431, + "step": 321400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.970085620880127, + "loss_rtd": 0.20211665332317352, + "loss_sent": 0.24362456798553467, + "loss_sod": 0.09598428010940552, + "loss_total": 0.5417255163192749, + "step": 321499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.794473171234131, + "loss_rtd": 0.2029036283493042, + "loss_sent": 0.12314584851264954, + "loss_sod": 0.08155947178602219, + "loss_total": 0.4076089560985565, + "step": 321499 + }, + { + "epoch": 0.005, + "grad_norm": 0.9801462292671204, + "learning_rate": 6.078237085885041e-06, + "loss": 0.4243, + "step": 321500 + }, + { + "epoch": 0.005198, + "loss_gen": 6.007789134979248, + "loss_rtd": 0.21848537027835846, + "loss_sent": 0.06751511991024017, + "loss_sod": 0.012613615021109581, + "loss_total": 0.2986140847206116, + "step": 321599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.825482368469238, + "loss_rtd": 0.2034791260957718, + "loss_sent": 0.2505021393299103, + "loss_sod": 0.0028420707676559687, + "loss_total": 0.45682334899902344, + "step": 321599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.6087115406990051, + "learning_rate": 6.063081833794515e-06, + "loss": 0.4432, + "step": 321600 + }, + { + "epoch": 0.005398, + "loss_gen": 6.068107604980469, + "loss_rtd": 0.2043725550174713, + "loss_sent": 0.17323701083660126, + "loss_sod": 0.0464274063706398, + "loss_total": 0.42403697967529297, + "step": 321699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.962821006774902, + "loss_rtd": 0.21008557081222534, + "loss_sent": 0.30374470353126526, + "loss_sod": 0.007715006824582815, + "loss_total": 0.5215452909469604, + "step": 321699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.6156496405601501, + "learning_rate": 6.04794427948937e-06, + "loss": 0.4023, + "step": 321700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.743560314178467, + "loss_rtd": 0.22464556992053986, + "loss_sent": 0.16896145045757294, + "loss_sod": 0.15009906888008118, + "loss_total": 0.5437061190605164, + "step": 321799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.551284313201904, + "loss_rtd": 0.21979455649852753, + "loss_sent": 0.11361131072044373, + "loss_sod": 0.19593751430511475, + "loss_total": 0.5293433666229248, + "step": 321799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.5520875453948975, + "learning_rate": 6.0328244290670076e-06, + "loss": 0.4123, + "step": 321800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.0887131690979, + "loss_rtd": 0.21372784674167633, + "loss_sent": 0.14582060277462006, + "loss_sod": 0.0354970283806324, + "loss_total": 0.3950454592704773, + "step": 321899 + }, + { + "epoch": 0.005798, + "loss_gen": 6.088348865509033, + "loss_rtd": 0.1735665500164032, + "loss_sent": 0.12466592341661453, + "loss_sod": 0.08897095173597336, + "loss_total": 0.3872034251689911, + "step": 321899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.028296947479248, + "learning_rate": 6.017722288617722e-06, + "loss": 0.4112, + "step": 321900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.786381721496582, + "loss_rtd": 0.22154591977596283, + "loss_sent": 0.2573598325252533, + "loss_sod": 0.1154954582452774, + "loss_total": 0.5944012403488159, + "step": 321999 + }, + { + "epoch": 0.005998, + "loss_gen": 6.044796943664551, + "loss_rtd": 0.1966133564710617, + "loss_sent": 0.09351838380098343, + "loss_sod": 0.01564120128750801, + "loss_total": 0.30577293038368225, + "step": 321999 + }, + { + "epoch": 0.006, + "grad_norm": 1.3540593385696411, + "learning_rate": 6.002637864224631e-06, + "loss": 0.4135, + "step": 322000 + }, + { + "epoch": 0.006, + "eval_loss": 0.4049164652824402, + "eval_runtime": 150.672, + "eval_samples_per_second": 102.494, + "eval_steps_per_second": 0.803, + "step": 322000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.896158218383789, + "loss_rtd": 0.1843385100364685, + "loss_sent": 0.23589207231998444, + "loss_sod": 0.007741671986877918, + "loss_total": 0.4279722571372986, + "step": 322099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.73489236831665, + "loss_rtd": 0.20232951641082764, + "loss_sent": 0.1733345240354538, + "loss_sod": 0.0740676149725914, + "loss_total": 0.44973164796829224, + "step": 322099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.8871188759803772, + "learning_rate": 5.9875711619637456e-06, + "loss": 0.4317, + "step": 322100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.838665008544922, + "loss_rtd": 0.20343336462974548, + "loss_sent": 0.11810265481472015, + "loss_sod": 0.03417399898171425, + "loss_total": 0.3557100296020508, + "step": 322199 + }, + { + "epoch": 0.006398, + "loss_gen": 6.101709365844727, + "loss_rtd": 0.1968148648738861, + "loss_sent": 0.48954933881759644, + "loss_sod": 0.017828695476055145, + "loss_total": 0.7041928768157959, + "step": 322199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.3429930210113525, + "learning_rate": 5.972522187903939e-06, + "loss": 0.4166, + "step": 322200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.993008136749268, + "loss_rtd": 0.19729797542095184, + "loss_sent": 0.11938892304897308, + "loss_sod": 0.026073159649968147, + "loss_total": 0.3427600562572479, + "step": 322299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.700710296630859, + "loss_rtd": 0.1850474774837494, + "loss_sent": 0.04739578440785408, + "loss_sod": 0.09909026324748993, + "loss_total": 0.3315335214138031, + "step": 322299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.0525033473968506, + "learning_rate": 5.957490948106925e-06, + "loss": 0.4198, + "step": 322300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.894399642944336, + "loss_rtd": 0.19894111156463623, + "loss_sent": 0.6496871113777161, + "loss_sod": 0.029115945100784302, + "loss_total": 0.877744197845459, + "step": 322399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.973742961883545, + "loss_rtd": 0.22670283913612366, + "loss_sent": 0.3474273383617401, + "loss_sod": 0.0482768714427948, + "loss_total": 0.622407078742981, + "step": 322399 + }, + { + "epoch": 0.0068, + "grad_norm": 2.0627973079681396, + "learning_rate": 5.942477448627287e-06, + "loss": 0.4355, + "step": 322400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.220848560333252, + "loss_rtd": 0.16829366981983185, + "loss_sent": 0.008023840375244617, + "loss_sod": 0.06601382046937943, + "loss_total": 0.2423313409090042, + "step": 322499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.9026336669921875, + "loss_rtd": 0.19783097505569458, + "loss_sent": 0.24712328612804413, + "loss_sod": 0.10601376742124557, + "loss_total": 0.5509680509567261, + "step": 322499 + }, + { + "epoch": 0.007, + "grad_norm": 1.1102864742279053, + "learning_rate": 5.92748169551246e-06, + "loss": 0.4332, + "step": 322500 + }, + { + "epoch": 0.007198, + "loss_gen": 6.023877143859863, + "loss_rtd": 0.19681943953037262, + "loss_sent": 0.20057518780231476, + "loss_sod": 0.038912322372198105, + "loss_total": 0.4363069534301758, + "step": 322599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.604940414428711, + "loss_rtd": 0.2346101701259613, + "loss_sent": 0.11208489537239075, + "loss_sod": 0.003951771650463343, + "loss_total": 0.35064685344696045, + "step": 322599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.5625324845314026, + "learning_rate": 5.9125036948027515e-06, + "loss": 0.4085, + "step": 322600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.948314189910889, + "loss_rtd": 0.21103662252426147, + "loss_sent": 0.05874745547771454, + "loss_sod": 0.13852140307426453, + "loss_total": 0.40830546617507935, + "step": 322699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.678983211517334, + "loss_rtd": 0.21298469603061676, + "loss_sent": 0.13734786212444305, + "loss_sod": 0.07159619778394699, + "loss_total": 0.4219287633895874, + "step": 322699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.4385826587677002, + "learning_rate": 5.897543452531273e-06, + "loss": 0.4225, + "step": 322700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.906988143920898, + "loss_rtd": 0.22667856514453888, + "loss_sent": 0.1278458535671234, + "loss_sod": 0.024468297138810158, + "loss_total": 0.3789927065372467, + "step": 322799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.956003665924072, + "loss_rtd": 0.22413918375968933, + "loss_sent": 0.27263471484184265, + "loss_sod": 0.08346781134605408, + "loss_total": 0.5802416801452637, + "step": 322799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.3001375198364258, + "learning_rate": 5.882600974724017e-06, + "loss": 0.4335, + "step": 322800 + }, + { + "epoch": 0.007798, + "loss_gen": 6.4441752433776855, + "loss_rtd": 0.21956200897693634, + "loss_sent": 0.056823354214429855, + "loss_sod": 0.19637469947338104, + "loss_total": 0.47276005148887634, + "step": 322899 + }, + { + "epoch": 0.007798, + "loss_gen": 6.187901496887207, + "loss_rtd": 0.18620547652244568, + "loss_sent": 0.08474566042423248, + "loss_sod": 0.14326469600200653, + "loss_total": 0.4142158031463623, + "step": 322899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.7970831394195557, + "learning_rate": 5.867676267399807e-06, + "loss": 0.4145, + "step": 322900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.986310005187988, + "loss_rtd": 0.20238037407398224, + "loss_sent": 0.16570836305618286, + "loss_sod": 0.012983284890651703, + "loss_total": 0.3810720145702362, + "step": 322999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.3566389083862305, + "loss_rtd": 0.18088479340076447, + "loss_sent": 2.4724446120671928e-05, + "loss_sod": 0.0914454311132431, + "loss_total": 0.27235496044158936, + "step": 322999 + }, + { + "epoch": 0.008, + "grad_norm": 0.8922746777534485, + "learning_rate": 5.852769336570335e-06, + "loss": 0.4157, + "step": 323000 + }, + { + "epoch": 0.008, + "eval_loss": 0.3941841125488281, + "eval_runtime": 150.7699, + "eval_samples_per_second": 102.428, + "eval_steps_per_second": 0.803, + "step": 323000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.788620471954346, + "loss_rtd": 0.21855397522449493, + "loss_sent": 0.5130893588066101, + "loss_sod": 0.01934962347149849, + "loss_total": 0.7509929537773132, + "step": 323099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.9072065353393555, + "loss_rtd": 0.21429945528507233, + "loss_sent": 0.4870833158493042, + "loss_sod": 0.052126333117485046, + "loss_total": 0.7535091042518616, + "step": 323099 + }, + { + "epoch": 0.0082, + "grad_norm": 2.9539873600006104, + "learning_rate": 5.83788018824008e-06, + "loss": 0.4355, + "step": 323100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.896347522735596, + "loss_rtd": 0.20507663488388062, + "loss_sent": 0.3175153136253357, + "loss_sod": 0.020972145721316338, + "loss_total": 0.5435640811920166, + "step": 323199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.557343006134033, + "loss_rtd": 0.1855645477771759, + "loss_sent": 0.0837344229221344, + "loss_sod": 0.05035916715860367, + "loss_total": 0.3196581304073334, + "step": 323199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.6112529039382935, + "learning_rate": 5.823008828406407e-06, + "loss": 0.4266, + "step": 323200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.066956043243408, + "loss_rtd": 0.1619899570941925, + "loss_sent": 0.000483002164401114, + "loss_sod": 0.02260011062026024, + "loss_total": 0.18507307767868042, + "step": 323299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.710513591766357, + "loss_rtd": 0.20172366499900818, + "loss_sent": 0.29260778427124023, + "loss_sod": 0.01542019471526146, + "loss_total": 0.5097516775131226, + "step": 323299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.088402509689331, + "learning_rate": 5.8081552630594945e-06, + "loss": 0.4283, + "step": 323300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.820318698883057, + "loss_rtd": 0.2332913875579834, + "loss_sent": 0.15792295336723328, + "loss_sod": 0.05823970586061478, + "loss_total": 0.44945403933525085, + "step": 323399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.7537150382995605, + "loss_rtd": 0.21362803876399994, + "loss_sent": 0.27700141072273254, + "loss_sod": 0.009000813588500023, + "loss_total": 0.49963027238845825, + "step": 323399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.7966939210891724, + "learning_rate": 5.79331949818237e-06, + "loss": 0.4246, + "step": 323400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.81212043762207, + "loss_rtd": 0.17437124252319336, + "loss_sent": 0.0001814306597225368, + "loss_sod": 0.2614227533340454, + "loss_total": 0.43597543239593506, + "step": 323499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.512197017669678, + "loss_rtd": 0.17744967341423035, + "loss_sent": 3.1737588869873434e-05, + "loss_sod": 0.10350587964057922, + "loss_total": 0.28098729252815247, + "step": 323499 + }, + { + "epoch": 0.009, + "grad_norm": 1.2898768186569214, + "learning_rate": 5.778501539750853e-06, + "loss": 0.424, + "step": 323500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.914320945739746, + "loss_rtd": 0.2515323758125305, + "loss_sent": 0.27580705285072327, + "loss_sod": 0.031762272119522095, + "loss_total": 0.5591017007827759, + "step": 323599 + }, + { + "epoch": 0.009198, + "loss_gen": 6.314299583435059, + "loss_rtd": 0.20393356680870056, + "loss_sent": 0.14942575991153717, + "loss_sod": 0.03397171571850777, + "loss_total": 0.3873310387134552, + "step": 323599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.2606782913208008, + "learning_rate": 5.763701393733628e-06, + "loss": 0.4336, + "step": 323600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.89138126373291, + "loss_rtd": 0.18766659498214722, + "loss_sent": 0.10472889244556427, + "loss_sod": 0.04665805399417877, + "loss_total": 0.33905354142189026, + "step": 323699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.872872352600098, + "loss_rtd": 0.21082136034965515, + "loss_sent": 0.14542880654335022, + "loss_sod": 0.03512772172689438, + "loss_total": 0.39137789607048035, + "step": 323699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.6540446877479553, + "learning_rate": 5.748919066092196e-06, + "loss": 0.4298, + "step": 323700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.380775451660156, + "loss_rtd": 0.1875913143157959, + "loss_sent": 0.0753634124994278, + "loss_sod": 0.03475553169846535, + "loss_total": 0.29771023988723755, + "step": 323799 + }, + { + "epoch": 0.009598, + "loss_gen": 6.2815985679626465, + "loss_rtd": 0.2141750007867813, + "loss_sent": 0.3287227153778076, + "loss_sod": 0.10725986957550049, + "loss_total": 0.6501575708389282, + "step": 323799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.8032573461532593, + "learning_rate": 5.734154562780869e-06, + "loss": 0.4346, + "step": 323800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.349405765533447, + "loss_rtd": 0.15455269813537598, + "loss_sent": 0.02965625189244747, + "loss_sod": 0.12667007744312286, + "loss_total": 0.31087902188301086, + "step": 323899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.886132717132568, + "loss_rtd": 0.20519687235355377, + "loss_sent": 0.18642914295196533, + "loss_sod": 0.025915272533893585, + "loss_total": 0.4175412952899933, + "step": 323899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.018308401107788, + "learning_rate": 5.7194078897467866e-06, + "loss": 0.399, + "step": 323900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.419346809387207, + "loss_rtd": 0.17705883085727692, + "loss_sent": 0.034329622983932495, + "loss_sod": 0.0573057122528553, + "loss_total": 0.2686941623687744, + "step": 323999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.800714492797852, + "loss_rtd": 0.22096692025661469, + "loss_sent": 0.18013055622577667, + "loss_sod": 0.0074227736331522465, + "loss_total": 0.40852025151252747, + "step": 323999 + }, + { + "epoch": 0.01, + "grad_norm": 0.788143515586853, + "learning_rate": 5.704679052929912e-06, + "loss": 0.4304, + "step": 324000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4050164520740509, + "eval_runtime": 150.6406, + "eval_samples_per_second": 102.516, + "eval_steps_per_second": 0.803, + "step": 324000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.502592086791992, + "loss_rtd": 0.16796788573265076, + "loss_sent": 0.009812552481889725, + "loss_sod": 0.012626749463379383, + "loss_total": 0.1904071867465973, + "step": 324099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.706894397735596, + "loss_rtd": 0.21625037491321564, + "loss_sent": 0.2922687232494354, + "loss_sod": 0.07662703096866608, + "loss_total": 0.5851461291313171, + "step": 324099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.191178798675537, + "learning_rate": 5.689968058263029e-06, + "loss": 0.4358, + "step": 324100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.98969030380249, + "loss_rtd": 0.22385506331920624, + "loss_sent": 0.09724408388137817, + "loss_sod": 0.001695240498520434, + "loss_total": 0.3227943778038025, + "step": 324199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.861635684967041, + "loss_rtd": 0.2096160501241684, + "loss_sent": 0.14023081958293915, + "loss_sod": 0.020481225103139877, + "loss_total": 0.3703280985355377, + "step": 324199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.924454927444458, + "learning_rate": 5.675274911671702e-06, + "loss": 0.4126, + "step": 324200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.927198886871338, + "loss_rtd": 0.19980433583259583, + "loss_sent": 0.07807888090610504, + "loss_sod": 0.051596079021692276, + "loss_total": 0.32947927713394165, + "step": 324299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.897017002105713, + "loss_rtd": 0.1724708378314972, + "loss_sent": 0.15576156973838806, + "loss_sod": 0.04659992828965187, + "loss_total": 0.3748323321342468, + "step": 324299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.426881194114685, + "learning_rate": 5.660599619074342e-06, + "loss": 0.4221, + "step": 324300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.188392162322998, + "loss_rtd": 0.16919992864131927, + "loss_sent": 2.4333296096301638e-05, + "loss_sod": 0.04423704743385315, + "loss_total": 0.21346130967140198, + "step": 324399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.4636993408203125, + "loss_rtd": 0.20324519276618958, + "loss_sent": 0.06986478716135025, + "loss_sod": 0.009771407581865788, + "loss_total": 0.28288137912750244, + "step": 324399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.5278782844543457, + "learning_rate": 5.645942186382147e-06, + "loss": 0.4087, + "step": 324400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.868215084075928, + "loss_rtd": 0.1995425522327423, + "loss_sent": 0.11778850108385086, + "loss_sod": 0.021918118000030518, + "loss_total": 0.3392491638660431, + "step": 324499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.459663391113281, + "loss_rtd": 0.17289361357688904, + "loss_sent": 0.000690351938828826, + "loss_sod": 0.10180986672639847, + "loss_total": 0.2753938138484955, + "step": 324499 + }, + { + "epoch": 0.011, + "grad_norm": 0.7451019883155823, + "learning_rate": 5.6313026194991515e-06, + "loss": 0.4, + "step": 324500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.870443344116211, + "loss_rtd": 0.22134725749492645, + "loss_sent": 0.07503363490104675, + "loss_sod": 0.022276245057582855, + "loss_total": 0.31865713000297546, + "step": 324599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.7765913009643555, + "loss_rtd": 0.21152400970458984, + "loss_sent": 0.16066452860832214, + "loss_sod": 0.027201417833566666, + "loss_total": 0.39938995242118835, + "step": 324599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.8734956383705139, + "learning_rate": 5.616680924322143e-06, + "loss": 0.4356, + "step": 324600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.798498153686523, + "loss_rtd": 0.210484117269516, + "loss_sent": 0.2456606924533844, + "loss_sod": 0.017464028671383858, + "loss_total": 0.4736088514328003, + "step": 324699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.863271236419678, + "loss_rtd": 0.20461349189281464, + "loss_sent": 0.08429497480392456, + "loss_sod": 0.013856226578354836, + "loss_total": 0.3027647137641907, + "step": 324699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.3390682935714722, + "learning_rate": 5.60207710674075e-06, + "loss": 0.4167, + "step": 324700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.786705493927002, + "loss_rtd": 0.2024630606174469, + "loss_sent": 0.045959584414958954, + "loss_sod": 0.0023766355589032173, + "loss_total": 0.2507992684841156, + "step": 324799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.913586616516113, + "loss_rtd": 0.20431970059871674, + "loss_sent": 0.16872656345367432, + "loss_sod": 0.006521163508296013, + "loss_total": 0.3795674443244934, + "step": 324799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.6634280681610107, + "learning_rate": 5.587491172637388e-06, + "loss": 0.4399, + "step": 324800 + }, + { + "epoch": 0.011798, + "loss_gen": 6.0628886222839355, + "loss_rtd": 0.20214658975601196, + "loss_sent": 0.27476999163627625, + "loss_sod": 0.025656037032604218, + "loss_total": 0.5025726556777954, + "step": 324899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.674528121948242, + "loss_rtd": 0.22252005338668823, + "loss_sent": 0.06508571654558182, + "loss_sod": 0.04835352301597595, + "loss_total": 0.3359592854976654, + "step": 324899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.7498753070831299, + "learning_rate": 5.572923127887281e-06, + "loss": 0.4248, + "step": 324900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.742950916290283, + "loss_rtd": 0.22713983058929443, + "loss_sent": 0.3100818395614624, + "loss_sod": 0.019239237532019615, + "loss_total": 0.5564609169960022, + "step": 324999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.700028419494629, + "loss_rtd": 0.23019778728485107, + "loss_sent": 0.11089194566011429, + "loss_sod": 0.02070794254541397, + "loss_total": 0.36179766058921814, + "step": 324999 + }, + { + "epoch": 0.012, + "grad_norm": 0.7887712717056274, + "learning_rate": 5.558372978358417e-06, + "loss": 0.4335, + "step": 325000 + }, + { + "epoch": 0.012, + "eval_loss": 0.4047861099243164, + "eval_runtime": 150.8833, + "eval_samples_per_second": 102.351, + "eval_steps_per_second": 0.802, + "step": 325000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.0647664070129395, + "loss_rtd": 0.21331389248371124, + "loss_sent": 0.17131194472312927, + "loss_sod": 0.044183120131492615, + "loss_total": 0.42880895733833313, + "step": 325099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.845245838165283, + "loss_rtd": 0.21022851765155792, + "loss_sent": 0.25248074531555176, + "loss_sod": 0.029903419315814972, + "loss_total": 0.49261268973350525, + "step": 325099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.7494087219238281, + "learning_rate": 5.543840729911598e-06, + "loss": 0.4031, + "step": 325100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.503693103790283, + "loss_rtd": 0.19288434088230133, + "loss_sent": 0.010657139122486115, + "loss_sod": 0.11942486464977264, + "loss_total": 0.3229663372039795, + "step": 325199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.176707744598389, + "loss_rtd": 0.16236528754234314, + "loss_sent": 2.5268305762438104e-05, + "loss_sod": 0.22292031347751617, + "loss_total": 0.3853108882904053, + "step": 325199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.164449691772461, + "learning_rate": 5.52932638840043e-06, + "loss": 0.433, + "step": 325200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.929834842681885, + "loss_rtd": 0.1952754110097885, + "loss_sent": 0.18080949783325195, + "loss_sod": 0.1016479954123497, + "loss_total": 0.47773289680480957, + "step": 325299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.479615211486816, + "loss_rtd": 0.16272349655628204, + "loss_sent": 0.04267306625843048, + "loss_sod": 0.06971586495637894, + "loss_total": 0.27511245012283325, + "step": 325299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.7706376314163208, + "learning_rate": 5.5148299596712635e-06, + "loss": 0.4289, + "step": 325300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.211487293243408, + "loss_rtd": 0.1634761542081833, + "loss_sent": 0.0009794557699933648, + "loss_sod": 0.1122213751077652, + "loss_total": 0.2766769826412201, + "step": 325399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.991980075836182, + "loss_rtd": 0.2033514678478241, + "loss_sent": 0.09735716879367828, + "loss_sod": 0.11001276969909668, + "loss_total": 0.41072142124176025, + "step": 325399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.1931016445159912, + "learning_rate": 5.500351449563274e-06, + "loss": 0.4179, + "step": 325400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.370450973510742, + "loss_rtd": 0.22828343510627747, + "loss_sent": 0.5143671631813049, + "loss_sod": 0.005769146606326103, + "loss_total": 0.7484197616577148, + "step": 325499 + }, + { + "epoch": 0.012998, + "loss_gen": 6.189851760864258, + "loss_rtd": 0.20508435368537903, + "loss_sent": 0.12709660828113556, + "loss_sod": 0.054082177579402924, + "loss_total": 0.3862631320953369, + "step": 325499 + }, + { + "epoch": 0.013, + "grad_norm": 1.8359318971633911, + "learning_rate": 5.4858908639083936e-06, + "loss": 0.4183, + "step": 325500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.988429069519043, + "loss_rtd": 0.18256030976772308, + "loss_sent": 2.42159567278577e-05, + "loss_sod": 0.053691912442445755, + "loss_total": 0.23627643287181854, + "step": 325599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.198243141174316, + "loss_rtd": 0.17098495364189148, + "loss_sent": 0.01786923222243786, + "loss_sod": 0.013378378935158253, + "loss_total": 0.20223256945610046, + "step": 325599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.44063425064086914, + "learning_rate": 5.47144820853136e-06, + "loss": 0.4013, + "step": 325600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.267978191375732, + "loss_rtd": 0.17308084666728973, + "loss_sent": 0.03930805251002312, + "loss_sod": 0.017490502446889877, + "loss_total": 0.22987940907478333, + "step": 325699 + }, + { + "epoch": 0.013398, + "loss_gen": 6.175917148590088, + "loss_rtd": 0.20863088965415955, + "loss_sent": 0.04831375926733017, + "loss_sod": 0.10151347517967224, + "loss_total": 0.35845813155174255, + "step": 325699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.7104655504226685, + "learning_rate": 5.4570234892496574e-06, + "loss": 0.4271, + "step": 325700 + }, + { + "epoch": 0.013598, + "loss_gen": 6.254371166229248, + "loss_rtd": 0.20921887457370758, + "loss_sent": 0.06468168646097183, + "loss_sod": 0.12483938038349152, + "loss_total": 0.39873993396759033, + "step": 325799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.966006755828857, + "loss_rtd": 0.2077091634273529, + "loss_sent": 0.1878802627325058, + "loss_sod": 0.042571596801280975, + "loss_total": 0.4381610155105591, + "step": 325799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.8915233016014099, + "learning_rate": 5.442616711873561e-06, + "loss": 0.4033, + "step": 325800 + }, + { + "epoch": 0.013798, + "loss_gen": 6.033672332763672, + "loss_rtd": 0.21461041271686554, + "loss_sent": 0.29450756311416626, + "loss_sod": 0.04083441197872162, + "loss_total": 0.5499523878097534, + "step": 325899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.867950439453125, + "loss_rtd": 0.19465017318725586, + "loss_sent": 0.17105290293693542, + "loss_sod": 0.0341770239174366, + "loss_total": 0.3998801112174988, + "step": 325899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.8925150632858276, + "learning_rate": 5.428227882206127e-06, + "loss": 0.4315, + "step": 325900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.686586380004883, + "loss_rtd": 0.24040189385414124, + "loss_sent": 0.5801467299461365, + "loss_sod": 0.04293229803442955, + "loss_total": 0.8634809255599976, + "step": 325999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.7457194328308105, + "loss_rtd": 0.19677533209323883, + "loss_sent": 0.36162111163139343, + "loss_sod": 0.018474020063877106, + "loss_total": 0.5768704414367676, + "step": 325999 + }, + { + "epoch": 0.014, + "grad_norm": 3.1317358016967773, + "learning_rate": 5.413857006043183e-06, + "loss": 0.4196, + "step": 326000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4038664698600769, + "eval_runtime": 150.9074, + "eval_samples_per_second": 102.334, + "eval_steps_per_second": 0.802, + "step": 326000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.6637749671936035, + "loss_rtd": 0.2100795954465866, + "loss_sent": 0.2706587016582489, + "loss_sod": 0.006405374966561794, + "loss_total": 0.4871436655521393, + "step": 326099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.706732749938965, + "loss_rtd": 0.21142153441905975, + "loss_sent": 0.04162096977233887, + "loss_sod": 0.06310342252254486, + "loss_total": 0.3161459267139435, + "step": 326099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.3565428256988525, + "learning_rate": 5.399504089173291e-06, + "loss": 0.413, + "step": 326100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.53071928024292, + "loss_rtd": 0.21235330402851105, + "loss_sent": 0.21853940188884735, + "loss_sod": 0.010951917618513107, + "loss_total": 0.441844642162323, + "step": 326199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.990543842315674, + "loss_rtd": 0.20678623020648956, + "loss_sent": 0.17031468451023102, + "loss_sod": 0.006323775742202997, + "loss_total": 0.38342469930648804, + "step": 326199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.7048099040985107, + "learning_rate": 5.3851691373778215e-06, + "loss": 0.411, + "step": 326200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.824671268463135, + "loss_rtd": 0.19651657342910767, + "loss_sent": 0.24372057616710663, + "loss_sod": 0.022527601569890976, + "loss_total": 0.4627647399902344, + "step": 326299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.953470230102539, + "loss_rtd": 0.2178725302219391, + "loss_sent": 0.2566494047641754, + "loss_sod": 0.00923298392444849, + "loss_total": 0.4837549328804016, + "step": 326299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.2701095342636108, + "learning_rate": 5.370852156430889e-06, + "loss": 0.4048, + "step": 326300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.846068382263184, + "loss_rtd": 0.21089419722557068, + "loss_sent": 0.3753054738044739, + "loss_sod": 0.045550860464572906, + "loss_total": 0.6317505240440369, + "step": 326399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.885951995849609, + "loss_rtd": 0.2226095050573349, + "loss_sent": 0.14516200125217438, + "loss_sod": 0.0367400124669075, + "loss_total": 0.4045115113258362, + "step": 326399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.4749726057052612, + "learning_rate": 5.356553152099381e-06, + "loss": 0.4165, + "step": 326400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.990963935852051, + "loss_rtd": 0.22117309272289276, + "loss_sent": 0.11019255220890045, + "loss_sod": 0.07310804724693298, + "loss_total": 0.4044736623764038, + "step": 326499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.933068752288818, + "loss_rtd": 0.20000134408473969, + "loss_sent": 0.19873477518558502, + "loss_sod": 0.05105184391140938, + "loss_total": 0.449787974357605, + "step": 326499 + }, + { + "epoch": 0.015, + "grad_norm": 1.7105019092559814, + "learning_rate": 5.342272130142911e-06, + "loss": 0.4207, + "step": 326500 + }, + { + "epoch": 0.015198, + "loss_gen": 6.19381046295166, + "loss_rtd": 0.22458291053771973, + "loss_sent": 0.17473535239696503, + "loss_sod": 0.15026640892028809, + "loss_total": 0.549584686756134, + "step": 326599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.7951836585998535, + "loss_rtd": 0.1988408863544464, + "loss_sent": 0.12660862505435944, + "loss_sod": 0.061969250440597534, + "loss_total": 0.3874187469482422, + "step": 326599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.3819634914398193, + "learning_rate": 5.3280090963138905e-06, + "loss": 0.4097, + "step": 326600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.61575984954834, + "loss_rtd": 0.1741504967212677, + "loss_sent": 2.4094882974168286e-05, + "loss_sod": 0.10541263222694397, + "loss_total": 0.2795872092247009, + "step": 326699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.273968696594238, + "loss_rtd": 0.1708516776561737, + "loss_sent": 0.0007279975106939673, + "loss_sod": 0.061606720089912415, + "loss_total": 0.23318639397621155, + "step": 326699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.1828755140304565, + "learning_rate": 5.313764056357462e-06, + "loss": 0.4176, + "step": 326700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.504697322845459, + "loss_rtd": 0.18083103001117706, + "loss_sent": 0.10256487876176834, + "loss_sod": 0.06174900382757187, + "loss_total": 0.3451448976993561, + "step": 326799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.871052265167236, + "loss_rtd": 0.20904289186000824, + "loss_sent": 0.42971178889274597, + "loss_sod": 0.027380742132663727, + "loss_total": 0.6661354303359985, + "step": 326799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.1960879564285278, + "learning_rate": 5.299537016011535e-06, + "loss": 0.4174, + "step": 326800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.506750583648682, + "loss_rtd": 0.17384295165538788, + "loss_sent": 0.06002357602119446, + "loss_sod": 0.12112460285425186, + "loss_total": 0.3549911379814148, + "step": 326899 + }, + { + "epoch": 0.015798, + "loss_gen": 6.256747722625732, + "loss_rtd": 0.2172124683856964, + "loss_sent": 0.09165572375059128, + "loss_sod": 0.06175126135349274, + "loss_total": 0.3706194758415222, + "step": 326899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.1491069793701172, + "learning_rate": 5.285327981006744e-06, + "loss": 0.4205, + "step": 326900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.526986598968506, + "loss_rtd": 0.22238966822624207, + "loss_sent": 0.061034757643938065, + "loss_sod": 0.01487022452056408, + "loss_total": 0.29829466342926025, + "step": 326999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.909547805786133, + "loss_rtd": 0.23055598139762878, + "loss_sent": 0.0981447771191597, + "loss_sod": 0.06247810646891594, + "loss_total": 0.39117884635925293, + "step": 326999 + }, + { + "epoch": 0.016, + "grad_norm": 1.1581236124038696, + "learning_rate": 5.271136957066497e-06, + "loss": 0.4098, + "step": 327000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4041591286659241, + "eval_runtime": 151.0159, + "eval_samples_per_second": 102.261, + "eval_steps_per_second": 0.801, + "step": 327000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.215590476989746, + "loss_rtd": 0.17874009907245636, + "loss_sent": 0.03347104787826538, + "loss_sod": 0.020950332283973694, + "loss_total": 0.23316147923469543, + "step": 327099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.9166669845581055, + "loss_rtd": 0.20646648108959198, + "loss_sent": 0.08283329755067825, + "loss_sod": 0.005750981159508228, + "loss_total": 0.2950507700443268, + "step": 327099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.5116104483604431, + "learning_rate": 5.25696394990694e-06, + "loss": 0.4102, + "step": 327100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.8377275466918945, + "loss_rtd": 0.2196062058210373, + "loss_sent": 0.18252575397491455, + "loss_sod": 0.019245656207203865, + "loss_total": 0.42137759923934937, + "step": 327199 + }, + { + "epoch": 0.016398, + "loss_gen": 6.1156744956970215, + "loss_rtd": 0.2223893404006958, + "loss_sent": 0.2272312045097351, + "loss_sod": 0.06531637907028198, + "loss_total": 0.5149369239807129, + "step": 327199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.089553952217102, + "learning_rate": 5.242808965236951e-06, + "loss": 0.4309, + "step": 327200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.7514424324035645, + "loss_rtd": 0.22236521542072296, + "loss_sent": 0.2495274692773819, + "loss_sod": 0.020663181319832802, + "loss_total": 0.4925558567047119, + "step": 327299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.885983467102051, + "loss_rtd": 0.21689851582050323, + "loss_sent": 0.14415277540683746, + "loss_sod": 0.06567038595676422, + "loss_total": 0.4267216920852661, + "step": 327299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.581281304359436, + "learning_rate": 5.228672008758151e-06, + "loss": 0.4229, + "step": 327300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.7649922370910645, + "loss_rtd": 0.19582818448543549, + "loss_sent": 0.09548603743314743, + "loss_sod": 0.06491593271493912, + "loss_total": 0.3562301695346832, + "step": 327399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.346714973449707, + "loss_rtd": 0.11843686550855637, + "loss_sent": 0.00842170137912035, + "loss_sod": 0.04954763874411583, + "loss_total": 0.17640620470046997, + "step": 327399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.840590238571167, + "learning_rate": 5.214553086164914e-06, + "loss": 0.4262, + "step": 327400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.7721052169799805, + "loss_rtd": 0.16569367051124573, + "loss_sent": 4.4171603803988546e-05, + "loss_sod": 0.1965005099773407, + "loss_total": 0.362238347530365, + "step": 327499 + }, + { + "epoch": 0.016998, + "loss_gen": 6.104852199554443, + "loss_rtd": 0.2262401133775711, + "loss_sent": 0.24634747207164764, + "loss_sod": 0.04126156121492386, + "loss_total": 0.513849139213562, + "step": 327499 + }, + { + "epoch": 0.017, + "grad_norm": 1.8922315835952759, + "learning_rate": 5.200452203144351e-06, + "loss": 0.4037, + "step": 327500 + }, + { + "epoch": 0.017198, + "loss_gen": 6.110281944274902, + "loss_rtd": 0.22877375781536102, + "loss_sent": 0.373697966337204, + "loss_sod": 0.0164099782705307, + "loss_total": 0.6188817024230957, + "step": 327599 + }, + { + "epoch": 0.017198, + "loss_gen": 6.031182289123535, + "loss_rtd": 0.2108943611383438, + "loss_sent": 0.25661003589630127, + "loss_sod": 0.11277800053358078, + "loss_total": 0.5802823901176453, + "step": 327599 + }, + { + "epoch": 0.0172, + "grad_norm": 2.1249122619628906, + "learning_rate": 5.18636936537627e-06, + "loss": 0.3971, + "step": 327600 + }, + { + "epoch": 0.017398, + "loss_gen": 6.0957417488098145, + "loss_rtd": 0.2222178876399994, + "loss_sent": 0.041979361325502396, + "loss_sod": 0.15878616273403168, + "loss_total": 0.42298340797424316, + "step": 327699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.945942401885986, + "loss_rtd": 0.21855933964252472, + "loss_sent": 0.32858410477638245, + "loss_sod": 0.015324393287301064, + "loss_total": 0.5624678134918213, + "step": 327699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.450487732887268, + "learning_rate": 5.1723045785332495e-06, + "loss": 0.4524, + "step": 327700 + }, + { + "epoch": 0.017598, + "loss_gen": 6.043429374694824, + "loss_rtd": 0.20989467203617096, + "loss_sent": 0.2914145588874817, + "loss_sod": 0.01982644572854042, + "loss_total": 0.521135687828064, + "step": 327799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.924368858337402, + "loss_rtd": 0.21258048713207245, + "loss_sent": 0.3502015769481659, + "loss_sod": 0.02351352758705616, + "loss_total": 0.5862956047058105, + "step": 327799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.5364757776260376, + "learning_rate": 5.1582578482805845e-06, + "loss": 0.424, + "step": 327800 + }, + { + "epoch": 0.017798, + "loss_gen": 6.11417293548584, + "loss_rtd": 0.20679043233394623, + "loss_sent": 0.13846267759799957, + "loss_sod": 0.028997337445616722, + "loss_total": 0.3742504417896271, + "step": 327899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.896090507507324, + "loss_rtd": 0.2007712870836258, + "loss_sent": 0.3696061074733734, + "loss_sod": 0.014266236685216427, + "loss_total": 0.5846436023712158, + "step": 327899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.9549663066864014, + "learning_rate": 5.144229180276306e-06, + "loss": 0.4127, + "step": 327900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.984939098358154, + "loss_rtd": 0.2159128189086914, + "loss_sent": 0.11442665755748749, + "loss_sod": 0.09687717258930206, + "loss_total": 0.42721664905548096, + "step": 327999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.556375503540039, + "loss_rtd": 0.1961243599653244, + "loss_sent": 0.0903892070055008, + "loss_sod": 0.04251960292458534, + "loss_total": 0.32903316617012024, + "step": 327999 + }, + { + "epoch": 0.018, + "grad_norm": 1.0247933864593506, + "learning_rate": 5.130218580171142e-06, + "loss": 0.4108, + "step": 328000 + }, + { + "epoch": 0.018, + "eval_loss": 0.40445011854171753, + "eval_runtime": 150.832, + "eval_samples_per_second": 102.385, + "eval_steps_per_second": 0.802, + "step": 328000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.796125411987305, + "loss_rtd": 0.2105560004711151, + "loss_sent": 0.11167427152395248, + "loss_sod": 0.09179627895355225, + "loss_total": 0.41402655839920044, + "step": 328099 + }, + { + "epoch": 0.018198, + "loss_gen": 6.014242649078369, + "loss_rtd": 0.2252071499824524, + "loss_sent": 0.6624555587768555, + "loss_sod": 0.03477327153086662, + "loss_total": 0.922435998916626, + "step": 328099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.8746533393859863, + "learning_rate": 5.116226053608564e-06, + "loss": 0.4303, + "step": 328100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.971130847930908, + "loss_rtd": 0.2006784975528717, + "loss_sent": 0.11580485105514526, + "loss_sod": 0.0019422958139330149, + "loss_total": 0.31842565536499023, + "step": 328199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.904659271240234, + "loss_rtd": 0.23206931352615356, + "loss_sent": 0.13666632771492004, + "loss_sod": 0.02995448186993599, + "loss_total": 0.3986901342868805, + "step": 328199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.0602086782455444, + "learning_rate": 5.102251606224767e-06, + "loss": 0.4404, + "step": 328200 + }, + { + "epoch": 0.018598, + "loss_gen": 6.061380863189697, + "loss_rtd": 0.21679025888442993, + "loss_sent": 0.10320959240198135, + "loss_sod": 0.049177706241607666, + "loss_total": 0.36917755007743835, + "step": 328299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.787722587585449, + "loss_rtd": 0.22701473534107208, + "loss_sent": 0.20476825535297394, + "loss_sod": 0.07264123857021332, + "loss_total": 0.5044242143630981, + "step": 328299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.087689995765686, + "learning_rate": 5.088295243648661e-06, + "loss": 0.4157, + "step": 328300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.234886169433594, + "loss_rtd": 0.16600844264030457, + "loss_sent": 0.025756871327757835, + "loss_sod": 0.03823602572083473, + "loss_total": 0.2300013303756714, + "step": 328399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.358644962310791, + "loss_rtd": 0.1765257567167282, + "loss_sent": 5.3043782827444375e-05, + "loss_sod": 0.06265078485012054, + "loss_total": 0.23922958970069885, + "step": 328399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.744238555431366, + "learning_rate": 5.074356971501853e-06, + "loss": 0.4128, + "step": 328400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.778722763061523, + "loss_rtd": 0.21752364933490753, + "loss_sent": 0.05778423324227333, + "loss_sod": 0.07231084257364273, + "loss_total": 0.3476187288761139, + "step": 328499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.061800956726074, + "loss_rtd": 0.17444182932376862, + "loss_sent": 0.015127585269510746, + "loss_sod": 0.03423468768596649, + "loss_total": 0.22380410134792328, + "step": 328499 + }, + { + "epoch": 0.019, + "grad_norm": 0.6600273251533508, + "learning_rate": 5.060436795398682e-06, + "loss": 0.4335, + "step": 328500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.3270487785339355, + "loss_rtd": 0.1867864727973938, + "loss_sent": 4.857310705119744e-05, + "loss_sod": 0.04032709077000618, + "loss_total": 0.22716213762760162, + "step": 328599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.446119785308838, + "loss_rtd": 0.18360395729541779, + "loss_sent": 0.053086526691913605, + "loss_sod": 0.06497945636510849, + "loss_total": 0.3016699254512787, + "step": 328599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.7255327105522156, + "learning_rate": 5.046534720946206e-06, + "loss": 0.4151, + "step": 328600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.637638092041016, + "loss_rtd": 0.22256742417812347, + "loss_sent": 0.10479529947042465, + "loss_sod": 0.05819655582308769, + "loss_total": 0.3855592906475067, + "step": 328699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.865545749664307, + "loss_rtd": 0.2192705124616623, + "loss_sent": 0.4976094961166382, + "loss_sod": 0.06025764346122742, + "loss_total": 0.7771376371383667, + "step": 328699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.254181146621704, + "learning_rate": 5.032650753744156e-06, + "loss": 0.4062, + "step": 328700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.697680950164795, + "loss_rtd": 0.1803479790687561, + "loss_sent": 0.006791319232434034, + "loss_sod": 0.06313110142946243, + "loss_total": 0.25027039647102356, + "step": 328799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.858571529388428, + "loss_rtd": 0.21300005912780762, + "loss_sent": 0.1427404135465622, + "loss_sod": 0.03782755136489868, + "loss_total": 0.3935680389404297, + "step": 328799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.8665528893470764, + "learning_rate": 5.018784899385009e-06, + "loss": 0.4164, + "step": 328800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.665689945220947, + "loss_rtd": 0.19979779422283173, + "loss_sent": 0.1384660005569458, + "loss_sod": 0.07756797969341278, + "loss_total": 0.4158317744731903, + "step": 328899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.6860857009887695, + "loss_rtd": 0.22062909603118896, + "loss_sent": 0.24843384325504303, + "loss_sod": 0.009958944283425808, + "loss_total": 0.4790218770503998, + "step": 328899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.0162591934204102, + "learning_rate": 5.004937163453926e-06, + "loss": 0.4225, + "step": 328900 + }, + { + "epoch": 0.019998, + "loss_gen": 6.146473407745361, + "loss_rtd": 0.20645081996917725, + "loss_sent": 0.26955464482307434, + "loss_sod": 0.06980663537979126, + "loss_total": 0.5458121299743652, + "step": 328999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.845710754394531, + "loss_rtd": 0.23007895052433014, + "loss_sent": 0.3061400055885315, + "loss_sod": 0.018914230167865753, + "loss_total": 0.5551332235336304, + "step": 328999 + }, + { + "epoch": 0.02, + "grad_norm": 2.0122175216674805, + "learning_rate": 4.9911075515287784e-06, + "loss": 0.4163, + "step": 329000 + }, + { + "epoch": 0.02, + "eval_loss": 0.40156105160713196, + "eval_runtime": 152.2938, + "eval_samples_per_second": 101.403, + "eval_steps_per_second": 0.795, + "step": 329000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.700454235076904, + "loss_rtd": 0.199051633477211, + "loss_sent": 0.14078226685523987, + "loss_sod": 0.01881776563823223, + "loss_total": 0.35865166783332825, + "step": 329099 + }, + { + "epoch": 0.020198, + "loss_gen": 6.165766716003418, + "loss_rtd": 0.21244393289089203, + "loss_sent": 0.2035900056362152, + "loss_sod": 0.02626965194940567, + "loss_total": 0.4423035979270935, + "step": 329099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.7645477652549744, + "learning_rate": 4.9772960691801205e-06, + "loss": 0.4157, + "step": 329100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.6507768630981445, + "loss_rtd": 0.18301333487033844, + "loss_sent": 0.06695782393217087, + "loss_sod": 0.01913287490606308, + "loss_total": 0.2691040337085724, + "step": 329199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.8309736251831055, + "loss_rtd": 0.2215300053358078, + "loss_sent": 0.11230618506669998, + "loss_sod": 0.053049247711896896, + "loss_total": 0.3868854343891144, + "step": 329199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.005005955696106, + "learning_rate": 4.963502721971219e-06, + "loss": 0.4304, + "step": 329200 + }, + { + "epoch": 0.020598, + "loss_gen": 6.053281784057617, + "loss_rtd": 0.2102501392364502, + "loss_sent": 0.28845369815826416, + "loss_sod": 0.04737616702914238, + "loss_total": 0.5460799932479858, + "step": 329299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.9263997077941895, + "loss_rtd": 0.2220664769411087, + "loss_sent": 0.570816159248352, + "loss_sod": 0.007466979790478945, + "loss_total": 0.8003495931625366, + "step": 329299 + }, + { + "epoch": 0.0206, + "grad_norm": 2.763258695602417, + "learning_rate": 4.949727515458047e-06, + "loss": 0.4051, + "step": 329300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.951574325561523, + "loss_rtd": 0.21616458892822266, + "loss_sent": 0.13302062451839447, + "loss_sod": 0.08037355542182922, + "loss_total": 0.42955875396728516, + "step": 329399 + }, + { + "epoch": 0.020798, + "loss_gen": 6.0092058181762695, + "loss_rtd": 0.22435683012008667, + "loss_sent": 0.19314207136631012, + "loss_sod": 0.012176268734037876, + "loss_total": 0.4296751618385315, + "step": 329399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8418480753898621, + "learning_rate": 4.935970455189231e-06, + "loss": 0.4428, + "step": 329400 + }, + { + "epoch": 0.020998, + "loss_gen": 6.190423488616943, + "loss_rtd": 0.22325991094112396, + "loss_sent": 0.2840415835380554, + "loss_sod": 0.133723646402359, + "loss_total": 0.6410251259803772, + "step": 329499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.801259517669678, + "loss_rtd": 0.18949387967586517, + "loss_sent": 0.1130494773387909, + "loss_sod": 0.004593970719724894, + "loss_total": 0.30713731050491333, + "step": 329499 + }, + { + "epoch": 0.021, + "grad_norm": 1.0208121538162231, + "learning_rate": 4.922231546706141e-06, + "loss": 0.3941, + "step": 329500 + }, + { + "epoch": 0.021198, + "loss_gen": 6.144215106964111, + "loss_rtd": 0.21672308444976807, + "loss_sent": 0.17375807464122772, + "loss_sod": 0.1187053769826889, + "loss_total": 0.5091865658760071, + "step": 329599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.733941078186035, + "loss_rtd": 0.21451887488365173, + "loss_sent": 0.061883535236120224, + "loss_sod": 0.05031023547053337, + "loss_total": 0.32671263813972473, + "step": 329599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.432464361190796, + "learning_rate": 4.908510795542781e-06, + "loss": 0.4102, + "step": 329600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.977442264556885, + "loss_rtd": 0.21298815310001373, + "loss_sent": 0.4723530411720276, + "loss_sod": 0.0020596610847860575, + "loss_total": 0.6874008774757385, + "step": 329699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.839695930480957, + "loss_rtd": 0.20961076021194458, + "loss_sent": 0.10564636439085007, + "loss_sod": 0.005157722160220146, + "loss_total": 0.32041484117507935, + "step": 329699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.1619139909744263, + "learning_rate": 4.894808207225882e-06, + "loss": 0.4134, + "step": 329700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.89856481552124, + "loss_rtd": 0.22122664749622345, + "loss_sent": 0.15422587096691132, + "loss_sod": 0.08974475413560867, + "loss_total": 0.46519726514816284, + "step": 329799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.273439884185791, + "loss_rtd": 0.17803272604942322, + "loss_sent": 0.04515692964196205, + "loss_sod": 0.06484217941761017, + "loss_total": 0.28803184628486633, + "step": 329799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.8945031762123108, + "learning_rate": 4.881123787274849e-06, + "loss": 0.4226, + "step": 329800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.8102569580078125, + "loss_rtd": 0.20951087772846222, + "loss_sent": 0.3647569715976715, + "loss_sod": 0.03198857977986336, + "loss_total": 0.6062564253807068, + "step": 329899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.953311443328857, + "loss_rtd": 0.21475659310817719, + "loss_sent": 0.1410730928182602, + "loss_sod": 0.08741606771945953, + "loss_total": 0.4432457685470581, + "step": 329899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.364906668663025, + "learning_rate": 4.8674575412017535e-06, + "loss": 0.4077, + "step": 329900 + }, + { + "epoch": 0.021998, + "loss_gen": 6.12127161026001, + "loss_rtd": 0.21860116720199585, + "loss_sent": 0.3105919361114502, + "loss_sod": 0.032918207347393036, + "loss_total": 0.5621113181114197, + "step": 329999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.730120658874512, + "loss_rtd": 0.21275348961353302, + "loss_sent": 0.09844788908958435, + "loss_sod": 0.004904065281152725, + "loss_total": 0.3161054253578186, + "step": 329999 + }, + { + "epoch": 0.022, + "grad_norm": 1.579740047454834, + "learning_rate": 4.853809474511362e-06, + "loss": 0.431, + "step": 330000 + }, + { + "epoch": 0.022, + "eval_loss": 0.3937540054321289, + "eval_runtime": 150.8178, + "eval_samples_per_second": 102.395, + "eval_steps_per_second": 0.802, + "step": 330000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.902409553527832, + "loss_rtd": 0.19768086075782776, + "loss_sent": 0.06069553270936012, + "loss_sod": 0.10106280446052551, + "loss_total": 0.3594391942024231, + "step": 330099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.86720085144043, + "loss_rtd": 0.2345312386751175, + "loss_sent": 0.07318239659070969, + "loss_sod": 0.002706903498619795, + "loss_total": 0.31042054295539856, + "step": 330099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.8370559811592102, + "learning_rate": 4.840179592701111e-06, + "loss": 0.3974, + "step": 330100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.917291641235352, + "loss_rtd": 0.19501982629299164, + "loss_sent": 0.24797093868255615, + "loss_sod": 0.03391135483980179, + "loss_total": 0.4769021272659302, + "step": 330199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.8460259437561035, + "loss_rtd": 0.20476824045181274, + "loss_sent": 0.421603262424469, + "loss_sod": 0.022958368062973022, + "loss_total": 0.6493299007415771, + "step": 330199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.2038854360580444, + "learning_rate": 4.826567901261131e-06, + "loss": 0.4202, + "step": 330200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.807995796203613, + "loss_rtd": 0.21853376924991608, + "loss_sent": 0.4487013816833496, + "loss_sod": 0.02856394648551941, + "loss_total": 0.6957991123199463, + "step": 330299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.814579010009766, + "loss_rtd": 0.2088613361120224, + "loss_sent": 0.11348633468151093, + "loss_sod": 0.032599225640296936, + "loss_total": 0.35494691133499146, + "step": 330299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.9923590421676636, + "learning_rate": 4.812974405674192e-06, + "loss": 0.4108, + "step": 330300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.453118324279785, + "loss_rtd": 0.18154007196426392, + "loss_sent": 0.0454830676317215, + "loss_sod": 0.029125642031431198, + "loss_total": 0.2561487853527069, + "step": 330399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.678781986236572, + "loss_rtd": 0.18436993658542633, + "loss_sent": 0.09236066788434982, + "loss_sod": 0.03614688292145729, + "loss_total": 0.31287747621536255, + "step": 330399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.7529614567756653, + "learning_rate": 4.7993991114157585e-06, + "loss": 0.4214, + "step": 330400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.745978355407715, + "loss_rtd": 0.21094991266727448, + "loss_sent": 0.08378388732671738, + "loss_sod": 0.014974737539887428, + "loss_total": 0.30970853567123413, + "step": 330499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.638060569763184, + "loss_rtd": 0.21033842861652374, + "loss_sent": 0.1420600712299347, + "loss_sod": 0.018450338393449783, + "loss_total": 0.3708488345146179, + "step": 330499 + }, + { + "epoch": 0.023, + "grad_norm": 0.7480770945549011, + "learning_rate": 4.785842023953951e-06, + "loss": 0.4241, + "step": 330500 + }, + { + "epoch": 0.023198, + "loss_gen": 6.05764102935791, + "loss_rtd": 0.22154130041599274, + "loss_sent": 0.09092435240745544, + "loss_sod": 0.047517575323581696, + "loss_total": 0.3599832355976105, + "step": 330599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.909764289855957, + "loss_rtd": 0.2100762575864792, + "loss_sent": 0.12694038450717926, + "loss_sod": 0.03214793652296066, + "loss_total": 0.3691645860671997, + "step": 330599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.1987636089324951, + "learning_rate": 4.772303148749585e-06, + "loss": 0.413, + "step": 330600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.682451248168945, + "loss_rtd": 0.22611628472805023, + "loss_sent": 0.3129121661186218, + "loss_sod": 0.007085670251399279, + "loss_total": 0.5461140871047974, + "step": 330699 + }, + { + "epoch": 0.023398, + "loss_gen": 6.099686622619629, + "loss_rtd": 0.20334909856319427, + "loss_sent": 0.1557920277118683, + "loss_sod": 0.021201398223638535, + "loss_total": 0.3803425431251526, + "step": 330699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.9493911266326904, + "learning_rate": 4.758782491256092e-06, + "loss": 0.4123, + "step": 330700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.896134853363037, + "loss_rtd": 0.2380102276802063, + "loss_sent": 0.2869156301021576, + "loss_sod": 0.017074715346097946, + "loss_total": 0.5420005321502686, + "step": 330799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.955774784088135, + "loss_rtd": 0.21714618802070618, + "loss_sent": 0.30493423342704773, + "loss_sod": 0.03374708816409111, + "loss_total": 0.5558274984359741, + "step": 330799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.0822604894638062, + "learning_rate": 4.745280056919599e-06, + "loss": 0.3997, + "step": 330800 + }, + { + "epoch": 0.023798, + "loss_gen": 6.148456573486328, + "loss_rtd": 0.2017562836408615, + "loss_sent": 0.35946691036224365, + "loss_sod": 0.013228020630776882, + "loss_total": 0.574451208114624, + "step": 330899 + }, + { + "epoch": 0.023798, + "loss_gen": 6.044058799743652, + "loss_rtd": 0.21628901362419128, + "loss_sent": 0.20557735860347748, + "loss_sod": 0.038398340344429016, + "loss_total": 0.46026474237442017, + "step": 330899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.6446350812911987, + "learning_rate": 4.731795851178889e-06, + "loss": 0.4111, + "step": 330900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.843855857849121, + "loss_rtd": 0.20253688097000122, + "loss_sent": 0.13047783076763153, + "loss_sod": 0.008157811127603054, + "loss_total": 0.3411725163459778, + "step": 330999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.928069591522217, + "loss_rtd": 0.19949424266815186, + "loss_sent": 0.28561946749687195, + "loss_sod": 0.13547632098197937, + "loss_total": 0.6205900311470032, + "step": 330999 + }, + { + "epoch": 0.024, + "grad_norm": 1.5329244136810303, + "learning_rate": 4.7183298794654055e-06, + "loss": 0.4174, + "step": 331000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4025688171386719, + "eval_runtime": 151.162, + "eval_samples_per_second": 102.162, + "eval_steps_per_second": 0.8, + "step": 331000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.378228664398193, + "loss_rtd": 0.17921258509159088, + "loss_sent": 2.3826671167626046e-05, + "loss_sod": 0.10741123557090759, + "loss_total": 0.28664764761924744, + "step": 331099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.4460296630859375, + "loss_rtd": 0.15814580023288727, + "loss_sent": 0.019092829897999763, + "loss_sod": 0.19544920325279236, + "loss_total": 0.37268784642219543, + "step": 331099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.2692033052444458, + "learning_rate": 4.704882147203221e-06, + "loss": 0.4189, + "step": 331100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.772451400756836, + "loss_rtd": 0.20426462590694427, + "loss_sent": 0.09192955493927002, + "loss_sod": 0.04017992690205574, + "loss_total": 0.33637410402297974, + "step": 331199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.893918991088867, + "loss_rtd": 0.20278365910053253, + "loss_sent": 0.18656004965305328, + "loss_sod": 0.10743507742881775, + "loss_total": 0.49677878618240356, + "step": 331199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.0564281940460205, + "learning_rate": 4.691452659809092e-06, + "loss": 0.4259, + "step": 331200 + }, + { + "epoch": 0.024598, + "loss_gen": 6.00196647644043, + "loss_rtd": 0.2053288370370865, + "loss_sent": 0.4615537226200104, + "loss_sod": 0.023048587143421173, + "loss_total": 0.6899311542510986, + "step": 331299 + }, + { + "epoch": 0.024598, + "loss_gen": 6.271486759185791, + "loss_rtd": 0.2220696359872818, + "loss_sent": 0.23215635120868683, + "loss_sod": 0.04173806309700012, + "loss_total": 0.49596405029296875, + "step": 331299 + }, + { + "epoch": 0.0246, + "grad_norm": 2.853314161300659, + "learning_rate": 4.678041422692414e-06, + "loss": 0.4395, + "step": 331300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.253432273864746, + "loss_rtd": 0.1898089200258255, + "loss_sent": 0.005767661612480879, + "loss_sod": 0.08395013958215714, + "loss_total": 0.2795267403125763, + "step": 331399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.980484962463379, + "loss_rtd": 0.1327882707118988, + "loss_sent": 2.9455086405505426e-05, + "loss_sod": 0.030677590519189835, + "loss_total": 0.16349531710147858, + "step": 331399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.982572078704834, + "learning_rate": 4.664648441255237e-06, + "loss": 0.4176, + "step": 331400 + }, + { + "epoch": 0.024998, + "loss_gen": 6.060155868530273, + "loss_rtd": 0.20426997542381287, + "loss_sent": 0.23714661598205566, + "loss_sod": 0.05675915628671646, + "loss_total": 0.4981757402420044, + "step": 331499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.616395473480225, + "loss_rtd": 0.20990914106369019, + "loss_sent": 0.08643140643835068, + "loss_sod": 0.025104522705078125, + "loss_total": 0.3214450776576996, + "step": 331499 + }, + { + "epoch": 0.025, + "grad_norm": 0.8399190306663513, + "learning_rate": 4.651273720892241e-06, + "loss": 0.433, + "step": 331500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.994178295135498, + "loss_rtd": 0.2139955461025238, + "loss_sent": 0.19764533638954163, + "loss_sod": 0.019860614091157913, + "loss_total": 0.43150150775909424, + "step": 331599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.882445335388184, + "loss_rtd": 0.2033146768808365, + "loss_sent": 0.07411482185125351, + "loss_sod": 0.01846025511622429, + "loss_total": 0.2958897650241852, + "step": 331599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.157501459121704, + "learning_rate": 4.637917266990766e-06, + "loss": 0.4205, + "step": 331600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.274556636810303, + "loss_rtd": 0.18524613976478577, + "loss_sent": 2.4407792807323858e-05, + "loss_sod": 0.19543135166168213, + "loss_total": 0.3807018995285034, + "step": 331699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.718486309051514, + "loss_rtd": 0.19666177034378052, + "loss_sent": 0.036764759570360184, + "loss_sod": 0.042713478207588196, + "loss_total": 0.2761400043964386, + "step": 331699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.2056779861450195, + "learning_rate": 4.6245790849307966e-06, + "loss": 0.4246, + "step": 331700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.873589515686035, + "loss_rtd": 0.2102714329957962, + "loss_sent": 0.1378559023141861, + "loss_sod": 0.07041903585195541, + "loss_total": 0.4185463786125183, + "step": 331799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.940451145172119, + "loss_rtd": 0.19915197789669037, + "loss_sent": 0.4912972152233124, + "loss_sod": 0.039577387273311615, + "loss_total": 0.7300266027450562, + "step": 331799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.8488335609436035, + "learning_rate": 4.611259180084942e-06, + "loss": 0.4208, + "step": 331800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.767001152038574, + "loss_rtd": 0.20476117730140686, + "loss_sent": 0.19007740914821625, + "loss_sod": 0.00638324860483408, + "loss_total": 0.4012218415737152, + "step": 331899 + }, + { + "epoch": 0.025798, + "loss_gen": 6.0202765464782715, + "loss_rtd": 0.2258429080247879, + "loss_sent": 0.17241758108139038, + "loss_sod": 0.02805159240961075, + "loss_total": 0.42631208896636963, + "step": 331899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.3682467937469482, + "learning_rate": 4.597957557818456e-06, + "loss": 0.4192, + "step": 331900 + }, + { + "epoch": 0.025998, + "loss_gen": 6.004212856292725, + "loss_rtd": 0.2101108431816101, + "loss_sent": 0.310147225856781, + "loss_sod": 0.02578401193022728, + "loss_total": 0.5460420846939087, + "step": 331999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.76234769821167, + "loss_rtd": 0.21826069056987762, + "loss_sent": 0.06891893595457077, + "loss_sod": 0.008715218864381313, + "loss_total": 0.2958948314189911, + "step": 331999 + }, + { + "epoch": 0.026, + "grad_norm": 0.9495974183082581, + "learning_rate": 4.584674223489238e-06, + "loss": 0.426, + "step": 332000 + }, + { + "epoch": 0.026, + "eval_loss": 0.40221095085144043, + "eval_runtime": 150.896, + "eval_samples_per_second": 102.342, + "eval_steps_per_second": 0.802, + "step": 332000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.7623610496521, + "loss_rtd": 0.19780445098876953, + "loss_sent": 0.1770644634962082, + "loss_sod": 0.0342760868370533, + "loss_total": 0.4091449975967407, + "step": 332099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.960206508636475, + "loss_rtd": 0.211539164185524, + "loss_sent": 0.10768450796604156, + "loss_sod": 0.03814418241381645, + "loss_total": 0.3573678731918335, + "step": 332099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9639382362365723, + "learning_rate": 4.5714091824478225e-06, + "loss": 0.4232, + "step": 332100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.600955486297607, + "loss_rtd": 0.2025652825832367, + "loss_sent": 0.08323058485984802, + "loss_sod": 0.016329387202858925, + "loss_total": 0.3021252751350403, + "step": 332199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.2745041847229, + "loss_rtd": 0.16661636531352997, + "loss_sent": 0.05867577716708183, + "loss_sod": 0.039194971323013306, + "loss_total": 0.2644871175289154, + "step": 332199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.7608261108398438, + "learning_rate": 4.55816244003735e-06, + "loss": 0.4154, + "step": 332200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.867252349853516, + "loss_rtd": 0.19580833613872528, + "loss_sent": 0.14206142723560333, + "loss_sod": 0.02759961597621441, + "loss_total": 0.36546939611434937, + "step": 332299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.935952186584473, + "loss_rtd": 0.20971213281154633, + "loss_sent": 0.22192716598510742, + "loss_sod": 0.01085263304412365, + "loss_total": 0.44249194860458374, + "step": 332299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.8669111132621765, + "learning_rate": 4.5449340015936035e-06, + "loss": 0.4324, + "step": 332300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.52266788482666, + "loss_rtd": 0.22774533927440643, + "loss_sent": 0.2556370198726654, + "loss_sod": 0.045659035444259644, + "loss_total": 0.5290414094924927, + "step": 332399 + }, + { + "epoch": 0.026798, + "loss_gen": 6.224093437194824, + "loss_rtd": 0.1985151618719101, + "loss_sent": 0.12970012426376343, + "loss_sod": 0.04957842454314232, + "loss_total": 0.37779372930526733, + "step": 332399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.9836230874061584, + "learning_rate": 4.531723872445015e-06, + "loss": 0.406, + "step": 332400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.808529376983643, + "loss_rtd": 0.20841383934020996, + "loss_sent": 0.18419690430164337, + "loss_sod": 0.04101523011922836, + "loss_total": 0.4336259961128235, + "step": 332499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.823904514312744, + "loss_rtd": 0.21034353971481323, + "loss_sent": 0.0916057601571083, + "loss_sod": 0.05213785171508789, + "loss_total": 0.35408714413642883, + "step": 332499 + }, + { + "epoch": 0.027, + "grad_norm": 0.9661129117012024, + "learning_rate": 4.518532057912617e-06, + "loss": 0.422, + "step": 332500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.203954219818115, + "loss_rtd": 0.15858696401119232, + "loss_sent": 2.3970091206138022e-05, + "loss_sod": 0.07966428995132446, + "loss_total": 0.23827522993087769, + "step": 332599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.551509857177734, + "loss_rtd": 0.1735328584909439, + "loss_sent": 0.059783339500427246, + "loss_sod": 0.16423340141773224, + "loss_total": 0.3975495994091034, + "step": 332599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.4101791381835938, + "learning_rate": 4.505358563310058e-06, + "loss": 0.4181, + "step": 332600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.864551067352295, + "loss_rtd": 0.2109133005142212, + "loss_sent": 0.35520753264427185, + "loss_sod": 0.09790265560150146, + "loss_total": 0.6640235185623169, + "step": 332699 + }, + { + "epoch": 0.027398, + "loss_gen": 6.5219855308532715, + "loss_rtd": 0.19909067451953888, + "loss_sent": 0.1204799935221672, + "loss_sod": 0.1021123081445694, + "loss_total": 0.4216829836368561, + "step": 332699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.3958179950714111, + "learning_rate": 4.4922033939436285e-06, + "loss": 0.4164, + "step": 332700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.039506912231445, + "loss_rtd": 0.22093138098716736, + "loss_sent": 0.4375079572200775, + "loss_sod": 0.019474415108561516, + "loss_total": 0.6779137849807739, + "step": 332799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.763535499572754, + "loss_rtd": 0.21234478056430817, + "loss_sent": 0.3248450458049774, + "loss_sod": 0.04969676584005356, + "loss_total": 0.5868865847587585, + "step": 332799 + }, + { + "epoch": 0.0276, + "grad_norm": 2.4942786693573, + "learning_rate": 4.479066555112233e-06, + "loss": 0.406, + "step": 332800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.916197776794434, + "loss_rtd": 0.2195403277873993, + "loss_sent": 0.2267300933599472, + "loss_sod": 0.0210191048681736, + "loss_total": 0.4672895073890686, + "step": 332899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.780374526977539, + "loss_rtd": 0.187737375497818, + "loss_sent": 0.11766417324542999, + "loss_sod": 0.004940190352499485, + "loss_total": 0.3103417456150055, + "step": 332899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.367370367050171, + "learning_rate": 4.465948052107388e-06, + "loss": 0.4057, + "step": 332900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.854354381561279, + "loss_rtd": 0.20612572133541107, + "loss_sent": 0.2872171103954315, + "loss_sod": 0.007354743778705597, + "loss_total": 0.5006976127624512, + "step": 332999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.293296813964844, + "loss_rtd": 0.17345906794071198, + "loss_sent": 0.002196255372837186, + "loss_sod": 0.08091104030609131, + "loss_total": 0.2565663456916809, + "step": 332999 + }, + { + "epoch": 0.028, + "grad_norm": 1.0123155117034912, + "learning_rate": 4.452847890213218e-06, + "loss": 0.4264, + "step": 333000 + }, + { + "epoch": 0.028, + "eval_loss": 0.39921894669532776, + "eval_runtime": 151.2059, + "eval_samples_per_second": 102.132, + "eval_steps_per_second": 0.8, + "step": 333000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.67648983001709, + "loss_rtd": 0.20993544161319733, + "loss_sent": 0.056589748710393906, + "loss_sod": 0.03604697436094284, + "loss_total": 0.3025721609592438, + "step": 333099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.277653694152832, + "loss_rtd": 0.19301404058933258, + "loss_sent": 2.6689376682043076e-05, + "loss_sod": 0.14895227551460266, + "loss_total": 0.3419930338859558, + "step": 333099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.9726792573928833, + "learning_rate": 4.439766074706469e-06, + "loss": 0.4038, + "step": 333100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.845165729522705, + "loss_rtd": 0.216050386428833, + "loss_sent": 0.23020198941230774, + "loss_sod": 0.024312572553753853, + "loss_total": 0.47056496143341064, + "step": 333199 + }, + { + "epoch": 0.028398, + "loss_gen": 6.083836555480957, + "loss_rtd": 0.20436830818653107, + "loss_sent": 0.1874687373638153, + "loss_sod": 0.031638503074645996, + "loss_total": 0.42347556352615356, + "step": 333199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.3433164358139038, + "learning_rate": 4.426702610856509e-06, + "loss": 0.41, + "step": 333200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.622547626495361, + "loss_rtd": 0.19420625269412994, + "loss_sent": 0.15851068496704102, + "loss_sod": 0.05337762087583542, + "loss_total": 0.4060945510864258, + "step": 333299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.85243558883667, + "loss_rtd": 0.2162763476371765, + "loss_sent": 0.17673559486865997, + "loss_sod": 0.0460338331758976, + "loss_total": 0.439045786857605, + "step": 333299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.818702220916748, + "learning_rate": 4.413657503925278e-06, + "loss": 0.4265, + "step": 333300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.828593730926514, + "loss_rtd": 0.20923027396202087, + "loss_sent": 0.08376859873533249, + "loss_sod": 0.011367118917405605, + "loss_total": 0.30436599254608154, + "step": 333399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.851904392242432, + "loss_rtd": 0.23087845742702484, + "loss_sent": 0.07225316017866135, + "loss_sod": 0.012547656893730164, + "loss_total": 0.31567928194999695, + "step": 333399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.5214357972145081, + "learning_rate": 4.400630759167351e-06, + "loss": 0.4255, + "step": 333400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.92397928237915, + "loss_rtd": 0.2033231556415558, + "loss_sent": 0.2656804621219635, + "loss_sod": 0.014136162586510181, + "loss_total": 0.4831397831439972, + "step": 333499 + }, + { + "epoch": 0.028998, + "loss_gen": 6.0329203605651855, + "loss_rtd": 0.19545994699001312, + "loss_sent": 0.3682764768600464, + "loss_sod": 0.059346456080675125, + "loss_total": 0.6230828762054443, + "step": 333499 + }, + { + "epoch": 0.029, + "grad_norm": 2.159041166305542, + "learning_rate": 4.387622381829898e-06, + "loss": 0.4516, + "step": 333500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.921048164367676, + "loss_rtd": 0.21536974608898163, + "loss_sent": 0.33759239315986633, + "loss_sod": 0.009517904371023178, + "loss_total": 0.5624800324440002, + "step": 333599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.990695953369141, + "loss_rtd": 0.19041845202445984, + "loss_sent": 0.3113158345222473, + "loss_sod": 0.08361324667930603, + "loss_total": 0.5853475332260132, + "step": 333599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.6791911125183105, + "learning_rate": 4.3746323771527095e-06, + "loss": 0.4228, + "step": 333600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.6814866065979, + "loss_rtd": 0.19786830246448517, + "loss_sent": 0.0681382566690445, + "loss_sod": 0.04563521221280098, + "loss_total": 0.31164175271987915, + "step": 333699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.993233680725098, + "loss_rtd": 0.14971184730529785, + "loss_sent": 0.001132254721596837, + "loss_sod": 0.06022341549396515, + "loss_total": 0.21106751263141632, + "step": 333699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.7783605456352234, + "learning_rate": 4.361660750368129e-06, + "loss": 0.4281, + "step": 333700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.527941703796387, + "loss_rtd": 0.16626811027526855, + "loss_sent": 3.38069221470505e-05, + "loss_sod": 0.06308425217866898, + "loss_total": 0.22938616573810577, + "step": 333799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.3150129318237305, + "loss_rtd": 0.14878471195697784, + "loss_sent": 0.010522390715777874, + "loss_sod": 0.09989697486162186, + "loss_total": 0.25920408964157104, + "step": 333799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.063338041305542, + "learning_rate": 4.348707506701144e-06, + "loss": 0.4405, + "step": 333800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.809854984283447, + "loss_rtd": 0.2180991917848587, + "loss_sent": 0.38718050718307495, + "loss_sod": 0.08889317512512207, + "loss_total": 0.6941728591918945, + "step": 333899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.348130702972412, + "loss_rtd": 0.17794682085514069, + "loss_sent": 0.0166650228202343, + "loss_sod": 0.07842186093330383, + "loss_total": 0.2730337083339691, + "step": 333899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.609898567199707, + "learning_rate": 4.335772651369318e-06, + "loss": 0.413, + "step": 333900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.576001167297363, + "loss_rtd": 0.1630786806344986, + "loss_sent": 2.432770634186454e-05, + "loss_sod": 0.19459104537963867, + "loss_total": 0.35769402980804443, + "step": 333999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.616293430328369, + "loss_rtd": 0.1615023910999298, + "loss_sent": 0.001869112253189087, + "loss_sod": 0.13924895226955414, + "loss_total": 0.30262044072151184, + "step": 333999 + }, + { + "epoch": 0.03, + "grad_norm": 1.396501064300537, + "learning_rate": 4.322856189582814e-06, + "loss": 0.4012, + "step": 334000 + }, + { + "epoch": 0.03, + "eval_loss": 0.40007197856903076, + "eval_runtime": 150.8858, + "eval_samples_per_second": 102.349, + "eval_steps_per_second": 0.802, + "step": 334000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.429172515869141, + "loss_rtd": 0.17763426899909973, + "loss_sent": 0.04315278306603432, + "loss_sod": 0.0121862031519413, + "loss_total": 0.23297324776649475, + "step": 334099 + }, + { + "epoch": 0.030198, + "loss_gen": 6.005903720855713, + "loss_rtd": 0.21334975957870483, + "loss_sent": 0.1491318941116333, + "loss_sod": 0.005976186133921146, + "loss_total": 0.3684578537940979, + "step": 334099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.41744357347488403, + "learning_rate": 4.309958126544361e-06, + "loss": 0.4623, + "step": 334100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.986791610717773, + "loss_rtd": 0.21256619691848755, + "loss_sent": 0.0793716236948967, + "loss_sod": 0.07309561222791672, + "loss_total": 0.36503344774246216, + "step": 334199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.930571556091309, + "loss_rtd": 0.20606614649295807, + "loss_sent": 0.2768394947052002, + "loss_sod": 0.054543472826480865, + "loss_total": 0.5374491214752197, + "step": 334199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.406572937965393, + "learning_rate": 4.297078467449317e-06, + "loss": 0.4166, + "step": 334200 + }, + { + "epoch": 0.030598, + "loss_gen": 6.10678768157959, + "loss_rtd": 0.2104867547750473, + "loss_sent": 0.17154887318611145, + "loss_sod": 0.06073742359876633, + "loss_total": 0.4427730441093445, + "step": 334299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.565155506134033, + "loss_rtd": 0.19027173519134521, + "loss_sent": 0.2150413691997528, + "loss_sod": 0.004843328148126602, + "loss_total": 0.4101564288139343, + "step": 334299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.7450965642929077, + "learning_rate": 4.284217217485598e-06, + "loss": 0.4079, + "step": 334300 + }, + { + "epoch": 0.030798, + "loss_gen": 6.118762016296387, + "loss_rtd": 0.19611217081546783, + "loss_sent": 0.520443320274353, + "loss_sod": 0.020224379375576973, + "loss_total": 0.7367798686027527, + "step": 334399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.879006862640381, + "loss_rtd": 0.21311043202877045, + "loss_sent": 0.07570214569568634, + "loss_sod": 0.029709361493587494, + "loss_total": 0.3185219168663025, + "step": 334399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.4805805683135986, + "learning_rate": 4.271374381833726e-06, + "loss": 0.4249, + "step": 334400 + }, + { + "epoch": 0.030998, + "loss_gen": 6.06980562210083, + "loss_rtd": 0.20259004831314087, + "loss_sent": 0.10015519708395004, + "loss_sod": 0.028424939140677452, + "loss_total": 0.3311701714992523, + "step": 334499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.831358432769775, + "loss_rtd": 0.23395182192325592, + "loss_sent": 0.17726194858551025, + "loss_sod": 0.033757809549570084, + "loss_total": 0.44497159123420715, + "step": 334499 + }, + { + "epoch": 0.031, + "grad_norm": 0.9060441255569458, + "learning_rate": 4.258549965666775e-06, + "loss": 0.4053, + "step": 334500 + }, + { + "epoch": 0.031198, + "loss_gen": 6.059021472930908, + "loss_rtd": 0.21139657497406006, + "loss_sent": 0.3153374493122101, + "loss_sod": 0.036520786583423615, + "loss_total": 0.5632548332214355, + "step": 334599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.8541364669799805, + "loss_rtd": 0.2198180854320526, + "loss_sent": 0.07089675217866898, + "loss_sod": 0.0195726677775383, + "loss_total": 0.3102875053882599, + "step": 334599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.8097517490386963, + "learning_rate": 4.245743974150429e-06, + "loss": 0.4226, + "step": 334600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.453299522399902, + "loss_rtd": 0.18011866509914398, + "loss_sent": 2.362178565817885e-05, + "loss_sod": 0.10460500419139862, + "loss_total": 0.28474730253219604, + "step": 334699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.481233596801758, + "loss_rtd": 0.17237406969070435, + "loss_sent": 2.381735248491168e-05, + "loss_sod": 0.08869794011116028, + "loss_total": 0.2610958218574524, + "step": 334699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.827171802520752, + "learning_rate": 4.2329564124429455e-06, + "loss": 0.4162, + "step": 334700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.341057777404785, + "loss_rtd": 0.18300172686576843, + "loss_sent": 0.013174586929380894, + "loss_sod": 0.038474343717098236, + "loss_total": 0.23465067148208618, + "step": 334799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.3318939208984375, + "loss_rtd": 0.19755342602729797, + "loss_sent": 0.13849912583827972, + "loss_sod": 0.021874723955988884, + "loss_total": 0.35792726278305054, + "step": 334799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.9487466812133789, + "learning_rate": 4.220187285695137e-06, + "loss": 0.418, + "step": 334800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.989569664001465, + "loss_rtd": 0.20516981184482574, + "loss_sent": 0.2394498884677887, + "loss_sod": 0.02916102670133114, + "loss_total": 0.47378072142601013, + "step": 334899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.98516845703125, + "loss_rtd": 0.21761848032474518, + "loss_sent": 0.14362525939941406, + "loss_sod": 0.04514048993587494, + "loss_total": 0.4063842296600342, + "step": 334899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.5992496013641357, + "learning_rate": 4.207436599050418e-06, + "loss": 0.4284, + "step": 334900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.7612624168396, + "loss_rtd": 0.20631392300128937, + "loss_sent": 0.07714895159006119, + "loss_sod": 0.015602566301822662, + "loss_total": 0.2990654408931732, + "step": 334999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.76633882522583, + "loss_rtd": 0.1994708627462387, + "loss_sent": 0.17649705708026886, + "loss_sod": 0.018484674394130707, + "loss_total": 0.3944525718688965, + "step": 334999 + }, + { + "epoch": 0.032, + "grad_norm": 0.6809886693954468, + "learning_rate": 4.1947043576447575e-06, + "loss": 0.4309, + "step": 335000 + }, + { + "epoch": 0.032, + "eval_loss": 0.39839765429496765, + "eval_runtime": 152.5008, + "eval_samples_per_second": 101.265, + "eval_steps_per_second": 0.793, + "step": 335000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.944319725036621, + "loss_rtd": 0.21528884768486023, + "loss_sent": 0.22339734435081482, + "loss_sod": 0.02848324179649353, + "loss_total": 0.4671694338321686, + "step": 335099 + }, + { + "epoch": 0.000198, + "loss_gen": 6.1428656578063965, + "loss_rtd": 0.207811176776886, + "loss_sent": 0.0520741268992424, + "loss_sod": 0.10995840281248093, + "loss_total": 0.3698437213897705, + "step": 335099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.1972646713256836, + "learning_rate": 4.181990566606714e-06, + "loss": 0.4186, + "step": 335100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.5024871826171875, + "loss_rtd": 0.2226995825767517, + "loss_sent": 0.1211906224489212, + "loss_sod": 0.00852234847843647, + "loss_total": 0.35241255164146423, + "step": 335199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.963098526000977, + "loss_rtd": 0.20816507935523987, + "loss_sent": 0.17893053591251373, + "loss_sod": 0.01881728321313858, + "loss_total": 0.4059128761291504, + "step": 335199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.5845366716384888, + "learning_rate": 4.169295231057385e-06, + "loss": 0.4128, + "step": 335200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.1882853507995605, + "loss_rtd": 0.18059375882148743, + "loss_sent": 2.342807420063764e-05, + "loss_sod": 0.14032980799674988, + "loss_total": 0.32094699144363403, + "step": 335299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.190073013305664, + "loss_rtd": 0.16980881989002228, + "loss_sent": 0.09756369143724442, + "loss_sod": 0.11427263915538788, + "loss_total": 0.381645143032074, + "step": 335299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.116196632385254, + "learning_rate": 4.156618356110453e-06, + "loss": 0.4281, + "step": 335300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.813076972961426, + "loss_rtd": 0.21575145423412323, + "loss_sent": 0.4184168577194214, + "loss_sod": 0.04405057057738304, + "loss_total": 0.6782188415527344, + "step": 335399 + }, + { + "epoch": 0.000798, + "loss_gen": 6.0512213706970215, + "loss_rtd": 0.19395382702350616, + "loss_sent": 0.21937574446201324, + "loss_sod": 0.07406766712665558, + "loss_total": 0.4873972535133362, + "step": 335399 + }, + { + "epoch": 0.0008, + "grad_norm": 2.974411964416504, + "learning_rate": 4.143959946872167e-06, + "loss": 0.4205, + "step": 335400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.950162887573242, + "loss_rtd": 0.18977870047092438, + "loss_sent": 0.1720137596130371, + "loss_sod": 0.0743497759103775, + "loss_total": 0.436142235994339, + "step": 335499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.927491188049316, + "loss_rtd": 0.20648464560508728, + "loss_sent": 0.5373300909996033, + "loss_sod": 0.02205422893166542, + "loss_total": 0.7658689618110657, + "step": 335499 + }, + { + "epoch": 0.001, + "grad_norm": 1.7152159214019775, + "learning_rate": 4.131320008441336e-06, + "loss": 0.4373, + "step": 335500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.957103252410889, + "loss_rtd": 0.20949038863182068, + "loss_sent": 0.12904676795005798, + "loss_sod": 0.029970047995448112, + "loss_total": 0.3685072064399719, + "step": 335599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.924739360809326, + "loss_rtd": 0.206039160490036, + "loss_sent": 0.1924024075269699, + "loss_sod": 0.005136069841682911, + "loss_total": 0.40357762575149536, + "step": 335599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.5711073279380798, + "learning_rate": 4.118698545909311e-06, + "loss": 0.4161, + "step": 335600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.936959743499756, + "loss_rtd": 0.20973554253578186, + "loss_sent": 0.09327895194292068, + "loss_sod": 0.014217485673725605, + "loss_total": 0.3172319829463959, + "step": 335699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.935591697692871, + "loss_rtd": 0.20587770640850067, + "loss_sent": 0.04953960329294205, + "loss_sod": 0.014704114757478237, + "loss_total": 0.27012142539024353, + "step": 335699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.6653057336807251, + "learning_rate": 4.10609556436003e-06, + "loss": 0.3989, + "step": 335700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.859136581420898, + "loss_rtd": 0.22443939745426178, + "loss_sent": 0.17341157793998718, + "loss_sod": 0.005505038425326347, + "loss_total": 0.40335601568222046, + "step": 335799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.770941257476807, + "loss_rtd": 0.19478027522563934, + "loss_sent": 0.20250475406646729, + "loss_sod": 0.04144607484340668, + "loss_total": 0.4387311041355133, + "step": 335799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.1641507148742676, + "learning_rate": 4.093511068869965e-06, + "loss": 0.4142, + "step": 335800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.79935359954834, + "loss_rtd": 0.22523431479930878, + "loss_sent": 0.06826870888471603, + "loss_sod": 0.03287557139992714, + "loss_total": 0.32637861371040344, + "step": 335899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.779669284820557, + "loss_rtd": 0.22280511260032654, + "loss_sent": 0.2086220234632492, + "loss_sod": 0.009479574859142303, + "loss_total": 0.44090670347213745, + "step": 335899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.1660898923873901, + "learning_rate": 4.080945064508157e-06, + "loss": 0.4219, + "step": 335900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.86229133605957, + "loss_rtd": 0.20115306973457336, + "loss_sent": 0.14512121677398682, + "loss_sod": 0.03269626945257187, + "loss_total": 0.37897056341171265, + "step": 335999 + }, + { + "epoch": 0.001998, + "loss_gen": 6.054866313934326, + "loss_rtd": 0.21286867558956146, + "loss_sent": 0.15473420917987823, + "loss_sod": 0.01898730918765068, + "loss_total": 0.3865901827812195, + "step": 335999 + }, + { + "epoch": 0.002, + "grad_norm": 0.7506734728813171, + "learning_rate": 4.068397556336179e-06, + "loss": 0.4249, + "step": 336000 + }, + { + "epoch": 0.002, + "eval_loss": 0.3963145911693573, + "eval_runtime": 153.2053, + "eval_samples_per_second": 100.799, + "eval_steps_per_second": 0.79, + "step": 336000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.863484859466553, + "loss_rtd": 0.2079601287841797, + "loss_sent": 0.14807070791721344, + "loss_sod": 0.10638213902711868, + "loss_total": 0.46241295337677, + "step": 336099 + }, + { + "epoch": 0.002198, + "loss_gen": 6.015509128570557, + "loss_rtd": 0.2064938247203827, + "loss_sent": 0.1779230684041977, + "loss_sod": 0.1254219114780426, + "loss_total": 0.5098388195037842, + "step": 336099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.2778013944625854, + "learning_rate": 4.0558685494081764e-06, + "loss": 0.4224, + "step": 336100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.445869445800781, + "loss_rtd": 0.20915165543556213, + "loss_sent": 0.0642315000295639, + "loss_sod": 0.07996993511915207, + "loss_total": 0.3533530831336975, + "step": 336199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.256261348724365, + "loss_rtd": 0.1652882844209671, + "loss_sent": 0.022786187008023262, + "loss_sod": 0.1049724817276001, + "loss_total": 0.2930469512939453, + "step": 336199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.9616134762763977, + "learning_rate": 4.043358048770834e-06, + "loss": 0.4217, + "step": 336200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.7176833152771, + "loss_rtd": 0.2001343071460724, + "loss_sent": 0.10172601789236069, + "loss_sod": 0.033488817512989044, + "loss_total": 0.3353491425514221, + "step": 336299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.589804172515869, + "loss_rtd": 0.1875990331172943, + "loss_sent": 0.0018354837084189057, + "loss_sod": 0.11609485745429993, + "loss_total": 0.30552938580513, + "step": 336299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.421616792678833, + "learning_rate": 4.030866059463362e-06, + "loss": 0.4166, + "step": 336300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.7938971519470215, + "loss_rtd": 0.22643759846687317, + "loss_sent": 0.21268542110919952, + "loss_sod": 0.02447209507226944, + "loss_total": 0.46359509229660034, + "step": 336399 + }, + { + "epoch": 0.002798, + "loss_gen": 6.084261894226074, + "loss_rtd": 0.1936543732881546, + "loss_sent": 0.0703633725643158, + "loss_sod": 0.009327598847448826, + "loss_total": 0.27334535121917725, + "step": 336399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.0431867837905884, + "learning_rate": 4.018392586517544e-06, + "loss": 0.4098, + "step": 336400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.781310558319092, + "loss_rtd": 0.21961914002895355, + "loss_sent": 0.1871901899576187, + "loss_sod": 0.0016068393597379327, + "loss_total": 0.40841615200042725, + "step": 336499 + }, + { + "epoch": 0.002998, + "loss_gen": 6.274241924285889, + "loss_rtd": 0.21812716126441956, + "loss_sent": 0.3167370557785034, + "loss_sod": 0.0775049701333046, + "loss_total": 0.612369179725647, + "step": 336499 + }, + { + "epoch": 0.003, + "grad_norm": 0.9896555542945862, + "learning_rate": 4.005937634957696e-06, + "loss": 0.422, + "step": 336500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.9590559005737305, + "loss_rtd": 0.23067691922187805, + "loss_sent": 0.9917678236961365, + "loss_sod": 0.029147058725357056, + "loss_total": 1.2515918016433716, + "step": 336599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.142953872680664, + "loss_rtd": 0.16721126437187195, + "loss_sent": 0.0006171088316477835, + "loss_sod": 0.07498796284198761, + "loss_total": 0.24281632900238037, + "step": 336599 + }, + { + "epoch": 0.0032, + "grad_norm": 3.682013511657715, + "learning_rate": 3.993501209800676e-06, + "loss": 0.4179, + "step": 336600 + }, + { + "epoch": 0.003398, + "loss_gen": 6.105652332305908, + "loss_rtd": 0.2125975489616394, + "loss_sent": 0.11406980454921722, + "loss_sod": 0.0600692518055439, + "loss_total": 0.3867366313934326, + "step": 336699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.841837406158447, + "loss_rtd": 0.22694866359233856, + "loss_sent": 0.21269488334655762, + "loss_sod": 0.031707677990198135, + "loss_total": 0.4713512361049652, + "step": 336699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.438538908958435, + "learning_rate": 3.981083316055862e-06, + "loss": 0.427, + "step": 336700 + }, + { + "epoch": 0.003598, + "loss_gen": 6.0138020515441895, + "loss_rtd": 0.20830239355564117, + "loss_sent": 0.1359240710735321, + "loss_sod": 0.08318870514631271, + "loss_total": 0.4274151623249054, + "step": 336799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.254580497741699, + "loss_rtd": 0.17418579757213593, + "loss_sent": 0.00016088095435407013, + "loss_sod": 0.059335820376873016, + "loss_total": 0.23368249833583832, + "step": 336799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.0473743677139282, + "learning_rate": 3.968683958725183e-06, + "loss": 0.4281, + "step": 336800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.933165550231934, + "loss_rtd": 0.2162189483642578, + "loss_sent": 0.28007441759109497, + "loss_sod": 0.0607638955116272, + "loss_total": 0.55705726146698, + "step": 336899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.874255180358887, + "loss_rtd": 0.2069503664970398, + "loss_sent": 0.2948680520057678, + "loss_sod": 0.07497943192720413, + "loss_total": 0.5767978429794312, + "step": 336899 + }, + { + "epoch": 0.0038, + "grad_norm": 2.0698721408843994, + "learning_rate": 3.956303142803097e-06, + "loss": 0.4111, + "step": 336900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.75591516494751, + "loss_rtd": 0.2001626044511795, + "loss_sent": 0.28182515501976013, + "loss_sod": 0.03909014165401459, + "loss_total": 0.5210778713226318, + "step": 336999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.950390338897705, + "loss_rtd": 0.2040172666311264, + "loss_sent": 0.05994034186005592, + "loss_sod": 0.027685757726430893, + "loss_total": 0.2916433811187744, + "step": 336999 + }, + { + "epoch": 0.004, + "grad_norm": 0.766802191734314, + "learning_rate": 3.943940873276608e-06, + "loss": 0.4463, + "step": 337000 + }, + { + "epoch": 0.004, + "eval_loss": 0.3947581350803375, + "eval_runtime": 150.6148, + "eval_samples_per_second": 102.533, + "eval_steps_per_second": 0.803, + "step": 337000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.382259845733643, + "loss_rtd": 0.17015394568443298, + "loss_sent": 0.013352852314710617, + "loss_sod": 0.11146476864814758, + "loss_total": 0.2949715554714203, + "step": 337099 + }, + { + "epoch": 0.004198, + "loss_gen": 6.169939994812012, + "loss_rtd": 0.21506501734256744, + "loss_sent": 0.0743965283036232, + "loss_sod": 0.050989117473363876, + "loss_total": 0.340450644493103, + "step": 337099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.0970839262008667, + "learning_rate": 3.931597155125222e-06, + "loss": 0.4321, + "step": 337100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.968051433563232, + "loss_rtd": 0.192471444606781, + "loss_sent": 0.36148494482040405, + "loss_sod": 0.07960955798625946, + "loss_total": 0.6335659623146057, + "step": 337199 + }, + { + "epoch": 0.004398, + "loss_gen": 6.046945571899414, + "loss_rtd": 0.20671626925468445, + "loss_sent": 0.19385822117328644, + "loss_sod": 0.08800552040338516, + "loss_total": 0.48857998847961426, + "step": 337199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.465620994567871, + "learning_rate": 3.919271993320994e-06, + "loss": 0.4207, + "step": 337200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.441961288452148, + "loss_rtd": 0.16775529086589813, + "loss_sent": 0.07787902653217316, + "loss_sod": 0.027034278959035873, + "loss_total": 0.27266860008239746, + "step": 337299 + }, + { + "epoch": 0.004598, + "loss_gen": 6.021490097045898, + "loss_rtd": 0.21516260504722595, + "loss_sent": 0.25842270255088806, + "loss_sod": 0.16038836538791656, + "loss_total": 0.6339737176895142, + "step": 337299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.9959056973457336, + "learning_rate": 3.906965392828493e-06, + "loss": 0.4231, + "step": 337300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.752862930297852, + "loss_rtd": 0.1995813250541687, + "loss_sent": 0.06377172470092773, + "loss_sod": 0.01823241449892521, + "loss_total": 0.2815854549407959, + "step": 337399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.549044609069824, + "loss_rtd": 0.19926828145980835, + "loss_sent": 0.007355755195021629, + "loss_sod": 0.1424844115972519, + "loss_total": 0.3491084575653076, + "step": 337399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.97892826795578, + "learning_rate": 3.894677358604826e-06, + "loss": 0.4143, + "step": 337400 + }, + { + "epoch": 0.004998, + "loss_gen": 6.17965030670166, + "loss_rtd": 0.20519477128982544, + "loss_sent": 0.23570743203163147, + "loss_sod": 0.05581946298480034, + "loss_total": 0.49672165513038635, + "step": 337499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.923890113830566, + "loss_rtd": 0.2153753787279129, + "loss_sent": 0.13199764490127563, + "loss_sod": 0.029959501698613167, + "loss_total": 0.37733250856399536, + "step": 337499 + }, + { + "epoch": 0.005, + "grad_norm": 0.9899687767028809, + "learning_rate": 3.882407895599599e-06, + "loss": 0.4197, + "step": 337500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.413747787475586, + "loss_rtd": 0.158955916762352, + "loss_sent": 0.040096450597047806, + "loss_sod": 0.023038877174258232, + "loss_total": 0.22209124267101288, + "step": 337599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.880749225616455, + "loss_rtd": 0.21064843237400055, + "loss_sent": 0.09280610829591751, + "loss_sod": 0.07383677363395691, + "loss_total": 0.37729132175445557, + "step": 337599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.6896345615386963, + "learning_rate": 3.87015700875496e-06, + "loss": 0.399, + "step": 337600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.829585075378418, + "loss_rtd": 0.22672949731349945, + "loss_sent": 0.1859520971775055, + "loss_sod": 0.07305468618869781, + "loss_total": 0.48573628067970276, + "step": 337699 + }, + { + "epoch": 0.005398, + "loss_gen": 6.094359874725342, + "loss_rtd": 0.19445569813251495, + "loss_sent": 0.241752028465271, + "loss_sod": 0.05559616535902023, + "loss_total": 0.4918038845062256, + "step": 337699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.1671324968338013, + "learning_rate": 3.857924703005555e-06, + "loss": 0.4338, + "step": 337700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.765002250671387, + "loss_rtd": 0.20760849118232727, + "loss_sent": 0.14129865169525146, + "loss_sod": 0.016897909343242645, + "loss_total": 0.365805059671402, + "step": 337799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.4883832931518555, + "loss_rtd": 0.16705027222633362, + "loss_sent": 0.0009274539770558476, + "loss_sod": 0.03859792277216911, + "loss_total": 0.2065756469964981, + "step": 337799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.7741756439208984, + "learning_rate": 3.845710983278583e-06, + "loss": 0.42, + "step": 337800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.211860656738281, + "loss_rtd": 0.2270527184009552, + "loss_sent": 0.07621984928846359, + "loss_sod": 0.028961829841136932, + "loss_total": 0.3322344124317169, + "step": 337899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.8315300941467285, + "loss_rtd": 0.22638453543186188, + "loss_sent": 0.47674882411956787, + "loss_sod": 0.0761602371931076, + "loss_total": 0.7792935967445374, + "step": 337899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.1903001070022583, + "learning_rate": 3.833515854493691e-06, + "loss": 0.4034, + "step": 337900 + }, + { + "epoch": 0.005998, + "loss_gen": 6.163069725036621, + "loss_rtd": 0.22339676320552826, + "loss_sent": 0.22539108991622925, + "loss_sod": 0.015134986490011215, + "loss_total": 0.4639228582382202, + "step": 337999 + }, + { + "epoch": 0.005998, + "loss_gen": 6.285745143890381, + "loss_rtd": 0.223933145403862, + "loss_sent": 0.14128655195236206, + "loss_sod": 0.015176388435065746, + "loss_total": 0.3803960680961609, + "step": 337999 + }, + { + "epoch": 0.006, + "grad_norm": 0.7822142839431763, + "learning_rate": 3.821339321563089e-06, + "loss": 0.4294, + "step": 338000 + }, + { + "epoch": 0.006, + "eval_loss": 0.4008514881134033, + "eval_runtime": 149.5304, + "eval_samples_per_second": 103.277, + "eval_steps_per_second": 0.809, + "step": 338000 + }, + { + "epoch": 0.006198, + "loss_gen": 6.204260349273682, + "loss_rtd": 0.20586571097373962, + "loss_sent": 0.40420326590538025, + "loss_sod": 0.2035951018333435, + "loss_total": 0.8136640787124634, + "step": 338099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.772861480712891, + "loss_rtd": 0.21573849022388458, + "loss_sent": 0.46023422479629517, + "loss_sod": 0.07706906646490097, + "loss_total": 0.7530417442321777, + "step": 338099 + }, + { + "epoch": 0.0062, + "grad_norm": 2.3411641120910645, + "learning_rate": 3.8091813893914864e-06, + "loss": 0.4146, + "step": 338100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.746396541595459, + "loss_rtd": 0.2155022770166397, + "loss_sent": 0.17408345639705658, + "loss_sod": 0.003342859912663698, + "loss_total": 0.3929286003112793, + "step": 338199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.945261478424072, + "loss_rtd": 0.21275067329406738, + "loss_sent": 0.10976219177246094, + "loss_sod": 0.028655165806412697, + "loss_total": 0.35116803646087646, + "step": 338199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.7478144764900208, + "learning_rate": 3.7970420628761105e-06, + "loss": 0.4304, + "step": 338200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.782810688018799, + "loss_rtd": 0.22391600906848907, + "loss_sent": 0.20661456882953644, + "loss_sod": 0.010610237717628479, + "loss_total": 0.4411408305168152, + "step": 338299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.733725070953369, + "loss_rtd": 0.206951305270195, + "loss_sent": 0.13049571216106415, + "loss_sod": 0.07839620858430862, + "loss_total": 0.41584324836730957, + "step": 338299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.2371070384979248, + "learning_rate": 3.78492134690665e-06, + "loss": 0.4254, + "step": 338300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.954866409301758, + "loss_rtd": 0.23353427648544312, + "loss_sent": 0.16861510276794434, + "loss_sod": 0.05040101706981659, + "loss_total": 0.45255041122436523, + "step": 338399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.877741813659668, + "loss_rtd": 0.19605544209480286, + "loss_sent": 0.030897455289959908, + "loss_sod": 0.056787945330142975, + "loss_total": 0.2837408483028412, + "step": 338399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.0223498344421387, + "learning_rate": 3.77281924636535e-06, + "loss": 0.4103, + "step": 338400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.412992000579834, + "loss_rtd": 0.17041103541851044, + "loss_sent": 0.24925978481769562, + "loss_sod": 0.018770035356283188, + "loss_total": 0.43844085931777954, + "step": 338499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.862917900085449, + "loss_rtd": 0.19434136152267456, + "loss_sent": 0.2050703763961792, + "loss_sod": 0.03860627859830856, + "loss_total": 0.4380180239677429, + "step": 338499 + }, + { + "epoch": 0.007, + "grad_norm": 1.2954009771347046, + "learning_rate": 3.7607357661269272e-06, + "loss": 0.4013, + "step": 338500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.875136852264404, + "loss_rtd": 0.22192594408988953, + "loss_sent": 0.13295848667621613, + "loss_sod": 0.029642509296536446, + "loss_total": 0.38452696800231934, + "step": 338599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.88073205947876, + "loss_rtd": 0.2221127152442932, + "loss_sent": 0.15584711730480194, + "loss_sod": 0.06707973778247833, + "loss_total": 0.4450395703315735, + "step": 338599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.4723381996154785, + "learning_rate": 3.748670911058616e-06, + "loss": 0.4199, + "step": 338600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.959644794464111, + "loss_rtd": 0.20913678407669067, + "loss_sent": 0.08110801875591278, + "loss_sod": 0.21267099678516388, + "loss_total": 0.5029157996177673, + "step": 338699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.260806083679199, + "loss_rtd": 0.17016012966632843, + "loss_sent": 0.0023471980821341276, + "loss_sod": 0.110336072742939, + "loss_total": 0.2828434109687805, + "step": 338699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.2005341053009033, + "learning_rate": 3.7366246860201182e-06, + "loss": 0.404, + "step": 338700 + }, + { + "epoch": 0.007598, + "loss_gen": 6.073202610015869, + "loss_rtd": 0.22639216482639313, + "loss_sent": 0.08206695318222046, + "loss_sod": 0.023818880319595337, + "loss_total": 0.3322780132293701, + "step": 338799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.572092533111572, + "loss_rtd": 0.17619915306568146, + "loss_sent": 0.05038800090551376, + "loss_sod": 0.07302147895097733, + "loss_total": 0.29960864782333374, + "step": 338799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.1604137420654297, + "learning_rate": 3.7245970958636687e-06, + "loss": 0.4133, + "step": 338800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.995290756225586, + "loss_rtd": 0.21080608665943146, + "loss_sent": 0.34130969643592834, + "loss_sod": 0.01080277469009161, + "loss_total": 0.5629185438156128, + "step": 338899 + }, + { + "epoch": 0.007798, + "loss_gen": 6.070387840270996, + "loss_rtd": 0.21030038595199585, + "loss_sent": 0.27426761388778687, + "loss_sod": 0.07019522786140442, + "loss_total": 0.5547631978988647, + "step": 338899 + }, + { + "epoch": 0.0078, + "grad_norm": 2.2233266830444336, + "learning_rate": 3.712588145433976e-06, + "loss": 0.4151, + "step": 338900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.772355079650879, + "loss_rtd": 0.2053069919347763, + "loss_sent": 0.064731165766716, + "loss_sod": 0.037257201969623566, + "loss_total": 0.30729538202285767, + "step": 338999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.197329044342041, + "loss_rtd": 0.15753231942653656, + "loss_sent": 2.3899308871477842e-05, + "loss_sod": 0.056196585297584534, + "loss_total": 0.21375280618667603, + "step": 338999 + }, + { + "epoch": 0.008, + "grad_norm": 0.8829795122146606, + "learning_rate": 3.7005978395682482e-06, + "loss": 0.4161, + "step": 339000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4010652005672455, + "eval_runtime": 149.9038, + "eval_samples_per_second": 103.019, + "eval_steps_per_second": 0.807, + "step": 339000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.983335971832275, + "loss_rtd": 0.19274388253688812, + "loss_sent": 0.09417889267206192, + "loss_sod": 0.05159136280417442, + "loss_total": 0.33851414918899536, + "step": 339099 + }, + { + "epoch": 0.008198, + "loss_gen": 6.370143413543701, + "loss_rtd": 0.22788284718990326, + "loss_sent": 0.1849139928817749, + "loss_sod": 0.05349213257431984, + "loss_total": 0.4662889838218689, + "step": 339099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.2802579402923584, + "learning_rate": 3.6886261830961665e-06, + "loss": 0.4087, + "step": 339100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.889089107513428, + "loss_rtd": 0.20249640941619873, + "loss_sent": 0.136704221367836, + "loss_sod": 0.0257358830422163, + "loss_total": 0.3649365305900574, + "step": 339199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.897090435028076, + "loss_rtd": 0.22126959264278412, + "loss_sent": 0.19033564627170563, + "loss_sod": 0.19496454298496246, + "loss_total": 0.606569766998291, + "step": 339199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.105751395225525, + "learning_rate": 3.6766731808399234e-06, + "loss": 0.4247, + "step": 339200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.897000789642334, + "loss_rtd": 0.2111540138721466, + "loss_sent": 0.19834111630916595, + "loss_sod": 0.047086890786886215, + "loss_total": 0.4565820097923279, + "step": 339299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.844721794128418, + "loss_rtd": 0.20830696821212769, + "loss_sent": 0.07999664545059204, + "loss_sod": 0.038247983902692795, + "loss_total": 0.326551616191864, + "step": 339299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.65416020154953, + "learning_rate": 3.66473883761419e-06, + "loss": 0.4281, + "step": 339300 + }, + { + "epoch": 0.008798, + "loss_gen": 6.223285675048828, + "loss_rtd": 0.2105582356452942, + "loss_sent": 0.10665461421012878, + "loss_sod": 0.013735967688262463, + "loss_total": 0.3309488296508789, + "step": 339399 + }, + { + "epoch": 0.008798, + "loss_gen": 6.001707077026367, + "loss_rtd": 0.2266661375761032, + "loss_sent": 0.16380570828914642, + "loss_sod": 0.07627823948860168, + "loss_total": 0.4667500853538513, + "step": 339399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.0086357593536377, + "learning_rate": 3.6528231582260984e-06, + "loss": 0.4255, + "step": 339400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.79616641998291, + "loss_rtd": 0.2053869664669037, + "loss_sent": 0.2299124300479889, + "loss_sod": 0.040310800075531006, + "loss_total": 0.4756101965904236, + "step": 339499 + }, + { + "epoch": 0.008998, + "loss_gen": 6.108221054077148, + "loss_rtd": 0.20687364041805267, + "loss_sent": 0.15326544642448425, + "loss_sod": 0.05609961599111557, + "loss_total": 0.4162386953830719, + "step": 339499 + }, + { + "epoch": 0.009, + "grad_norm": 1.5119096040725708, + "learning_rate": 3.6409261474753043e-06, + "loss": 0.4161, + "step": 339500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.806908130645752, + "loss_rtd": 0.20581737160682678, + "loss_sent": 0.07188493758440018, + "loss_sod": 0.026977792382240295, + "loss_total": 0.30468010902404785, + "step": 339599 + }, + { + "epoch": 0.009198, + "loss_gen": 6.116364002227783, + "loss_rtd": 0.22399155795574188, + "loss_sent": 0.14591443538665771, + "loss_sod": 0.028032680973410606, + "loss_total": 0.39793866872787476, + "step": 339599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.8714219331741333, + "learning_rate": 3.629047810153907e-06, + "loss": 0.437, + "step": 339600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.894096851348877, + "loss_rtd": 0.21251995861530304, + "loss_sent": 0.23561184108257294, + "loss_sod": 0.06450232863426208, + "loss_total": 0.5126341581344604, + "step": 339699 + }, + { + "epoch": 0.009398, + "loss_gen": 6.02771520614624, + "loss_rtd": 0.18507251143455505, + "loss_sent": 0.13063956797122955, + "loss_sod": 0.00920623168349266, + "loss_total": 0.32491832971572876, + "step": 339699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.1654026508331299, + "learning_rate": 3.617188151046519e-06, + "loss": 0.4049, + "step": 339700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.9308695793151855, + "loss_rtd": 0.21963249146938324, + "loss_sent": 0.3654840886592865, + "loss_sod": 0.050618160516023636, + "loss_total": 0.6357347369194031, + "step": 339799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.643835067749023, + "loss_rtd": 0.1976223737001419, + "loss_sent": 0.12658996880054474, + "loss_sod": 0.020686248317360878, + "loss_total": 0.3448985815048218, + "step": 339799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.81893390417099, + "learning_rate": 3.6053471749301847e-06, + "loss": 0.4131, + "step": 339800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.648099899291992, + "loss_rtd": 0.2215256541967392, + "loss_sent": 0.06031196191906929, + "loss_sod": 0.03526972606778145, + "loss_total": 0.31710734963417053, + "step": 339899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.75663948059082, + "loss_rtd": 0.2025076448917389, + "loss_sent": 0.269387811422348, + "loss_sod": 0.016846321523189545, + "loss_total": 0.48874178528785706, + "step": 339899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.0655364990234375, + "learning_rate": 3.5935248865744673e-06, + "loss": 0.4056, + "step": 339900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.908276557922363, + "loss_rtd": 0.19214953482151031, + "loss_sent": 0.23334498703479767, + "loss_sod": 0.04038998484611511, + "loss_total": 0.4658845067024231, + "step": 339999 + }, + { + "epoch": 0.009998, + "loss_gen": 6.1002516746521, + "loss_rtd": 0.20336997509002686, + "loss_sent": 0.22725780308246613, + "loss_sod": 0.0748271718621254, + "loss_total": 0.505454957485199, + "step": 339999 + }, + { + "epoch": 0.01, + "grad_norm": 1.4487290382385254, + "learning_rate": 3.581721290741369e-06, + "loss": 0.4154, + "step": 340000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4006238281726837, + "eval_runtime": 150.0884, + "eval_samples_per_second": 102.893, + "eval_steps_per_second": 0.806, + "step": 340000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.326809406280518, + "loss_rtd": 0.17155982553958893, + "loss_sent": 0.055715400725603104, + "loss_sod": 0.03777296096086502, + "loss_total": 0.26504820585250854, + "step": 340099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.804373741149902, + "loss_rtd": 0.19311803579330444, + "loss_sent": 0.23289738595485687, + "loss_sod": 0.019639087840914726, + "loss_total": 0.4456545114517212, + "step": 340099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8826153874397278, + "learning_rate": 3.569936392185391e-06, + "loss": 0.4208, + "step": 340100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.741005897521973, + "loss_rtd": 0.2289055734872818, + "loss_sent": 0.10577091574668884, + "loss_sod": 0.004277893807739019, + "loss_total": 0.3389543890953064, + "step": 340199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.89778470993042, + "loss_rtd": 0.21025080978870392, + "loss_sent": 0.15272365510463715, + "loss_sod": 0.028359398245811462, + "loss_total": 0.3913338780403137, + "step": 340199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.8030951619148254, + "learning_rate": 3.5581701956534818e-06, + "loss": 0.4252, + "step": 340200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.805662155151367, + "loss_rtd": 0.21620453894138336, + "loss_sent": 0.2286563366651535, + "loss_sod": 0.0036057299003005028, + "loss_total": 0.44846659898757935, + "step": 340299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.9886956214904785, + "loss_rtd": 0.21013662219047546, + "loss_sent": 0.1385829746723175, + "loss_sod": 0.09061326086521149, + "loss_total": 0.43933287262916565, + "step": 340299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.0946193933486938, + "learning_rate": 3.546422705885055e-06, + "loss": 0.4024, + "step": 340300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.908155918121338, + "loss_rtd": 0.22734303772449493, + "loss_sent": 0.4903893768787384, + "loss_sod": 0.11314050853252411, + "loss_total": 0.8308728933334351, + "step": 340399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.893324375152588, + "loss_rtd": 0.21723562479019165, + "loss_sent": 0.0734143853187561, + "loss_sod": 0.05252843722701073, + "loss_total": 0.3431784510612488, + "step": 340399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.9139138460159302, + "learning_rate": 3.5346939276120096e-06, + "loss": 0.4203, + "step": 340400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.145823955535889, + "loss_rtd": 0.17083795368671417, + "loss_sent": 2.4692773877177387e-05, + "loss_sod": 0.033463336527347565, + "loss_total": 0.20432598888874054, + "step": 340499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.2899370193481445, + "loss_rtd": 0.15717390179634094, + "loss_sent": 0.038508735597133636, + "loss_sod": 0.0543653629720211, + "loss_total": 0.2500480115413666, + "step": 340499 + }, + { + "epoch": 0.011, + "grad_norm": 0.7320871949195862, + "learning_rate": 3.5229838655587044e-06, + "loss": 0.4009, + "step": 340500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.847060203552246, + "loss_rtd": 0.21480822563171387, + "loss_sent": 0.2133861929178238, + "loss_sod": 0.004829126875847578, + "loss_total": 0.4330235421657562, + "step": 340599 + }, + { + "epoch": 0.011198, + "loss_gen": 6.185930252075195, + "loss_rtd": 0.2196062058210373, + "loss_sent": 0.1698533594608307, + "loss_sod": 0.03258263319730759, + "loss_total": 0.42204219102859497, + "step": 340599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.8248491883277893, + "learning_rate": 3.5112925244419337e-06, + "loss": 0.4295, + "step": 340600 + }, + { + "epoch": 0.011398, + "loss_gen": 6.373525619506836, + "loss_rtd": 0.19577983021736145, + "loss_sent": 0.06667690724134445, + "loss_sod": 0.16156956553459167, + "loss_total": 0.4240263104438782, + "step": 340699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.640323162078857, + "loss_rtd": 0.17199872434139252, + "loss_sent": 9.931313979905099e-05, + "loss_sod": 0.06525272130966187, + "loss_total": 0.23735076189041138, + "step": 340699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.078114628791809, + "learning_rate": 3.4996199089709692e-06, + "loss": 0.4074, + "step": 340700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.662497043609619, + "loss_rtd": 0.20580726861953735, + "loss_sent": 0.2913249433040619, + "loss_sod": 0.011537499725818634, + "loss_total": 0.5086697340011597, + "step": 340799 + }, + { + "epoch": 0.011598, + "loss_gen": 6.062875747680664, + "loss_rtd": 0.23240281641483307, + "loss_sent": 0.36906126141548157, + "loss_sod": 0.014366144314408302, + "loss_total": 0.6158302426338196, + "step": 340799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.3367756605148315, + "learning_rate": 3.487966023847555e-06, + "loss": 0.4049, + "step": 340800 + }, + { + "epoch": 0.011798, + "loss_gen": 6.10473108291626, + "loss_rtd": 0.20512263476848602, + "loss_sent": 0.4059183895587921, + "loss_sod": 0.15950946509838104, + "loss_total": 0.7705504894256592, + "step": 340899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.811824321746826, + "loss_rtd": 0.21852651238441467, + "loss_sent": 0.1622115671634674, + "loss_sod": 0.02196568250656128, + "loss_total": 0.40270376205444336, + "step": 340899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.750742793083191, + "learning_rate": 3.476330873765854e-06, + "loss": 0.4038, + "step": 340900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.998531818389893, + "loss_rtd": 0.21462537348270416, + "loss_sent": 0.14007796347141266, + "loss_sod": 0.05634921044111252, + "loss_total": 0.41105252504348755, + "step": 340999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.929116725921631, + "loss_rtd": 0.2102845311164856, + "loss_sent": 0.16890662908554077, + "loss_sod": 0.001856447197496891, + "loss_total": 0.3810476064682007, + "step": 340999 + }, + { + "epoch": 0.012, + "grad_norm": 0.7612988352775574, + "learning_rate": 3.464714463412516e-06, + "loss": 0.4116, + "step": 341000 + }, + { + "epoch": 0.012, + "eval_loss": 0.3963385820388794, + "eval_runtime": 150.8414, + "eval_samples_per_second": 102.379, + "eval_steps_per_second": 0.802, + "step": 341000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.03617000579834, + "loss_rtd": 0.1962297409772873, + "loss_sent": 0.22792571783065796, + "loss_sod": 0.11228495091199875, + "loss_total": 0.536440372467041, + "step": 341099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.751300811767578, + "loss_rtd": 0.18305706977844238, + "loss_sent": 0.17126314342021942, + "loss_sod": 0.01719723455607891, + "loss_total": 0.37151744961738586, + "step": 341099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.9330779314041138, + "learning_rate": 3.453116797466627e-06, + "loss": 0.416, + "step": 341100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.9853196144104, + "loss_rtd": 0.21844954788684845, + "loss_sent": 0.10476718842983246, + "loss_sod": 0.017409641295671463, + "loss_total": 0.3406263589859009, + "step": 341199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.535221576690674, + "loss_rtd": 0.17223674058914185, + "loss_sent": 0.004667786881327629, + "loss_sod": 0.0542733408510685, + "loss_total": 0.23117786645889282, + "step": 341199 + }, + { + "epoch": 0.0124, + "grad_norm": 3.3137733936309814, + "learning_rate": 3.441537880599732e-06, + "loss": 0.3993, + "step": 341200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.888561725616455, + "loss_rtd": 0.20931757986545563, + "loss_sent": 0.2222614884376526, + "loss_sod": 0.08970314264297485, + "loss_total": 0.5212821960449219, + "step": 341299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.249072074890137, + "loss_rtd": 0.16802287101745605, + "loss_sent": 0.001813581446185708, + "loss_sod": 0.03134084492921829, + "loss_total": 0.20117728412151337, + "step": 341299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.8807783722877502, + "learning_rate": 3.429977717475802e-06, + "loss": 0.4124, + "step": 341300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.882512092590332, + "loss_rtd": 0.21080563962459564, + "loss_sent": 0.30358272790908813, + "loss_sod": 0.06010904908180237, + "loss_total": 0.574497401714325, + "step": 341399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.923336982727051, + "loss_rtd": 0.20899485051631927, + "loss_sent": 0.11464784294366837, + "loss_sod": 0.14935755729675293, + "loss_total": 0.47300025820732117, + "step": 341399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.6107983589172363, + "learning_rate": 3.4184363127512833e-06, + "loss": 0.4165, + "step": 341400 + }, + { + "epoch": 0.012998, + "loss_gen": 6.030972957611084, + "loss_rtd": 0.2066817283630371, + "loss_sent": 0.34057924151420593, + "loss_sod": 0.021736331284046173, + "loss_total": 0.5689972639083862, + "step": 341499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.77199649810791, + "loss_rtd": 0.20100796222686768, + "loss_sent": 0.20542021095752716, + "loss_sod": 0.08238121122121811, + "loss_total": 0.48880940675735474, + "step": 341499 + }, + { + "epoch": 0.013, + "grad_norm": 1.3586835861206055, + "learning_rate": 3.4069136710750404e-06, + "loss": 0.422, + "step": 341500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.997402667999268, + "loss_rtd": 0.20390596985816956, + "loss_sent": 0.18963722884655, + "loss_sod": 0.02654527686536312, + "loss_total": 0.4200884699821472, + "step": 341599 + }, + { + "epoch": 0.013198, + "loss_gen": 6.158804416656494, + "loss_rtd": 0.20410539209842682, + "loss_sent": 0.1750335544347763, + "loss_sod": 0.03373246267437935, + "loss_total": 0.41287142038345337, + "step": 341599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.1250416040420532, + "learning_rate": 3.395409797088411e-06, + "loss": 0.419, + "step": 341600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.943315029144287, + "loss_rtd": 0.20542287826538086, + "loss_sent": 0.0901661068201065, + "loss_sod": 0.08766307681798935, + "loss_total": 0.3832520842552185, + "step": 341699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.370689868927002, + "loss_rtd": 0.15447942912578583, + "loss_sent": 0.0003166712122038007, + "loss_sod": 0.09208841621875763, + "loss_total": 0.2468845099210739, + "step": 341699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.168798804283142, + "learning_rate": 3.3839246954251337e-06, + "loss": 0.4119, + "step": 341700 + }, + { + "epoch": 0.013598, + "loss_gen": 6.064821243286133, + "loss_rtd": 0.2190457284450531, + "loss_sent": 0.09213320165872574, + "loss_sod": 0.048973917961120605, + "loss_total": 0.36015284061431885, + "step": 341799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.890889644622803, + "loss_rtd": 0.19885052740573883, + "loss_sent": 0.21808795630931854, + "loss_sod": 0.036045633256435394, + "loss_total": 0.452984094619751, + "step": 341799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.1047885417938232, + "learning_rate": 3.3724583707114123e-06, + "loss": 0.4099, + "step": 341800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.762095928192139, + "loss_rtd": 0.1914149671792984, + "loss_sent": 0.09680893272161484, + "loss_sod": 0.030672771856188774, + "loss_total": 0.31889668107032776, + "step": 341899 + }, + { + "epoch": 0.013798, + "loss_gen": 6.0736589431762695, + "loss_rtd": 0.2036304771900177, + "loss_sent": 0.19782862067222595, + "loss_sod": 0.03012201562523842, + "loss_total": 0.4315811097621918, + "step": 341899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.9368470907211304, + "learning_rate": 3.361010827565886e-06, + "loss": 0.4135, + "step": 341900 + }, + { + "epoch": 0.013998, + "loss_gen": 6.138782501220703, + "loss_rtd": 0.22057823836803436, + "loss_sent": 0.07051631808280945, + "loss_sod": 0.036086756736040115, + "loss_total": 0.32718130946159363, + "step": 341999 + }, + { + "epoch": 0.013998, + "loss_gen": 6.156554222106934, + "loss_rtd": 0.22142986953258514, + "loss_sent": 0.187363862991333, + "loss_sod": 0.06705398857593536, + "loss_total": 0.4758477210998535, + "step": 341999 + }, + { + "epoch": 0.014, + "grad_norm": 1.0070147514343262, + "learning_rate": 3.3495820705996274e-06, + "loss": 0.4201, + "step": 342000 + }, + { + "epoch": 0.014, + "eval_loss": 0.39424291253089905, + "eval_runtime": 150.2061, + "eval_samples_per_second": 102.812, + "eval_steps_per_second": 0.806, + "step": 342000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.940567970275879, + "loss_rtd": 0.23230868577957153, + "loss_sent": 0.35433995723724365, + "loss_sod": 0.049607060849666595, + "loss_total": 0.6362557411193848, + "step": 342099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.838633060455322, + "loss_rtd": 0.19210414588451385, + "loss_sent": 0.04289254918694496, + "loss_sod": 0.09127867966890335, + "loss_total": 0.32627537846565247, + "step": 342099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.1639574766159058, + "learning_rate": 3.3381721044161262e-06, + "loss": 0.4324, + "step": 342100 + }, + { + "epoch": 0.014398, + "loss_gen": 6.07776403427124, + "loss_rtd": 0.1801523119211197, + "loss_sent": 0.12497272342443466, + "loss_sod": 0.054323021322488785, + "loss_total": 0.35944804549217224, + "step": 342199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.801862716674805, + "loss_rtd": 0.21373224258422852, + "loss_sent": 0.1702360063791275, + "loss_sod": 0.026983605697751045, + "loss_total": 0.4109518527984619, + "step": 342199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.3709266185760498, + "learning_rate": 3.3267809336113175e-06, + "loss": 0.407, + "step": 342200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.709898471832275, + "loss_rtd": 0.2265806794166565, + "loss_sent": 0.3270280063152313, + "loss_sod": 0.03656139224767685, + "loss_total": 0.5901700854301453, + "step": 342299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.581545829772949, + "loss_rtd": 0.2105221003293991, + "loss_sent": 0.09505219757556915, + "loss_sod": 0.03128906339406967, + "loss_total": 0.33686333894729614, + "step": 342299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.9331885576248169, + "learning_rate": 3.3154085627735698e-06, + "loss": 0.4363, + "step": 342300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.754115104675293, + "loss_rtd": 0.19943825900554657, + "loss_sent": 0.3611813485622406, + "loss_sod": 0.013223467394709587, + "loss_total": 0.5738430619239807, + "step": 342399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.950212001800537, + "loss_rtd": 0.2199244648218155, + "loss_sent": 0.3878166675567627, + "loss_sod": 0.169979065656662, + "loss_total": 0.7777202129364014, + "step": 342399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.8014847040176392, + "learning_rate": 3.304054996483674e-06, + "loss": 0.4227, + "step": 342400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.824573516845703, + "loss_rtd": 0.2237839698791504, + "loss_sent": 0.09823964536190033, + "loss_sod": 0.017051920294761658, + "loss_total": 0.3390755355358124, + "step": 342499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.877294063568115, + "loss_rtd": 0.21096977591514587, + "loss_sent": 0.1755797415971756, + "loss_sod": 0.002020066836848855, + "loss_total": 0.38856959342956543, + "step": 342499 + }, + { + "epoch": 0.015, + "grad_norm": 0.5809124708175659, + "learning_rate": 3.2927202393148393e-06, + "loss": 0.4327, + "step": 342500 + }, + { + "epoch": 0.015198, + "loss_gen": 6.27562141418457, + "loss_rtd": 0.21944203972816467, + "loss_sent": 0.15664042532444, + "loss_sod": 0.05238547921180725, + "loss_total": 0.42846792936325073, + "step": 342599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.808011531829834, + "loss_rtd": 0.19500215351581573, + "loss_sent": 0.03753923252224922, + "loss_sod": 0.06146138906478882, + "loss_total": 0.2940027713775635, + "step": 342599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.0454044342041016, + "learning_rate": 3.2814042958327016e-06, + "loss": 0.4289, + "step": 342600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.459552764892578, + "loss_rtd": 0.16028933227062225, + "loss_sent": 0.017681865021586418, + "loss_sod": 0.0945415049791336, + "loss_total": 0.2725127041339874, + "step": 342699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.953822612762451, + "loss_rtd": 0.21610717475414276, + "loss_sent": 0.2768329083919525, + "loss_sod": 0.006910734809935093, + "loss_total": 0.4998508095741272, + "step": 342699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.4511522054672241, + "learning_rate": 3.2701071705953377e-06, + "loss": 0.4183, + "step": 342700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.7881598472595215, + "loss_rtd": 0.21523109078407288, + "loss_sent": 0.1633302867412567, + "loss_sod": 0.0501314215362072, + "loss_total": 0.4286927878856659, + "step": 342799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.951810836791992, + "loss_rtd": 0.2228037714958191, + "loss_sent": 0.15994679927825928, + "loss_sod": 0.041540808975696564, + "loss_total": 0.42429137229919434, + "step": 342799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.3096485137939453, + "learning_rate": 3.2588288681532077e-06, + "loss": 0.4134, + "step": 342800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.827404499053955, + "loss_rtd": 0.21465258300304413, + "loss_sent": 0.32007789611816406, + "loss_sod": 0.04397441819310188, + "loss_total": 0.5787048935890198, + "step": 342899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.772224426269531, + "loss_rtd": 0.21917395293712616, + "loss_sent": 0.33243751525878906, + "loss_sod": 0.07959354668855667, + "loss_total": 0.6312050223350525, + "step": 342899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.5774588584899902, + "learning_rate": 3.2475693930492214e-06, + "loss": 0.4261, + "step": 342900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.823678016662598, + "loss_rtd": 0.20381247997283936, + "loss_sent": 0.19986392557621002, + "loss_sod": 0.004302198067307472, + "loss_total": 0.4079785943031311, + "step": 342999 + }, + { + "epoch": 0.015998, + "loss_gen": 6.02730655670166, + "loss_rtd": 0.2274365872144699, + "loss_sent": 0.08434657007455826, + "loss_sod": 0.07830630987882614, + "loss_total": 0.3900894522666931, + "step": 342999 + }, + { + "epoch": 0.016, + "grad_norm": 0.8446077704429626, + "learning_rate": 3.23632874981869e-06, + "loss": 0.4326, + "step": 343000 + }, + { + "epoch": 0.016, + "eval_loss": 0.38928958773612976, + "eval_runtime": 150.3513, + "eval_samples_per_second": 102.713, + "eval_steps_per_second": 0.805, + "step": 343000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.625696182250977, + "loss_rtd": 0.20675015449523926, + "loss_sent": 0.4063093066215515, + "loss_sod": 0.041951365768909454, + "loss_total": 0.6550108194351196, + "step": 343099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.676318168640137, + "loss_rtd": 0.20205141603946686, + "loss_sent": 0.18781642615795135, + "loss_sod": 0.01967936009168625, + "loss_total": 0.40954720973968506, + "step": 343099 + }, + { + "epoch": 0.0162, + "grad_norm": 4.332789897918701, + "learning_rate": 3.225106942989359e-06, + "loss": 0.4034, + "step": 343100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.8456220626831055, + "loss_rtd": 0.2000020444393158, + "loss_sent": 0.24244247376918793, + "loss_sod": 0.03900426998734474, + "loss_total": 0.481448769569397, + "step": 343199 + }, + { + "epoch": 0.016398, + "loss_gen": 6.019463539123535, + "loss_rtd": 0.20181627571582794, + "loss_sent": 0.14744023978710175, + "loss_sod": 0.041491057723760605, + "loss_total": 0.3907475769519806, + "step": 343199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.7115260362625122, + "learning_rate": 3.213903977081345e-06, + "loss": 0.4255, + "step": 343200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.8853349685668945, + "loss_rtd": 0.18609805405139923, + "loss_sent": 0.12787283957004547, + "loss_sod": 0.03629517927765846, + "loss_total": 0.35026606917381287, + "step": 343299 + }, + { + "epoch": 0.016598, + "loss_gen": 6.13047981262207, + "loss_rtd": 0.1903739869594574, + "loss_sent": 0.21343553066253662, + "loss_sod": 0.0346207469701767, + "loss_total": 0.4384302496910095, + "step": 343299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.5341449975967407, + "learning_rate": 3.2027198566072115e-06, + "loss": 0.4192, + "step": 343300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.255621433258057, + "loss_rtd": 0.1636127382516861, + "loss_sent": 0.0005237420555204153, + "loss_sod": 0.02681725099682808, + "loss_total": 0.19095373153686523, + "step": 343399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.464029788970947, + "loss_rtd": 0.1909935027360916, + "loss_sent": 2.4588471205788665e-05, + "loss_sod": 0.13042181730270386, + "loss_total": 0.3214398920536041, + "step": 343399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.9985593557357788, + "learning_rate": 3.1915545860719265e-06, + "loss": 0.4172, + "step": 343400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.821273326873779, + "loss_rtd": 0.2242823988199234, + "loss_sent": 0.08130759000778198, + "loss_sod": 0.007630887441337109, + "loss_total": 0.3132208585739136, + "step": 343499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.664610862731934, + "loss_rtd": 0.197879821062088, + "loss_sent": 0.032269980758428574, + "loss_sod": 0.040124744176864624, + "loss_total": 0.2702745497226715, + "step": 343499 + }, + { + "epoch": 0.017, + "grad_norm": 0.8526556491851807, + "learning_rate": 3.180408169972865e-06, + "loss": 0.4159, + "step": 343500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.924649715423584, + "loss_rtd": 0.21138909459114075, + "loss_sent": 0.10157202929258347, + "loss_sod": 0.04640458524227142, + "loss_total": 0.35936570167541504, + "step": 343599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.885820388793945, + "loss_rtd": 0.21065209805965424, + "loss_sent": 0.4478943347930908, + "loss_sod": 0.0231836698949337, + "loss_total": 0.6817300915718079, + "step": 343599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.8805345296859741, + "learning_rate": 3.1692806127997853e-06, + "loss": 0.4183, + "step": 343600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.767274379730225, + "loss_rtd": 0.19500966370105743, + "loss_sent": 0.18620166182518005, + "loss_sod": 0.033180274069309235, + "loss_total": 0.4143916070461273, + "step": 343699 + }, + { + "epoch": 0.017398, + "loss_gen": 6.134350299835205, + "loss_rtd": 0.17665618658065796, + "loss_sent": 0.1611063927412033, + "loss_sod": 0.07501335442066193, + "loss_total": 0.4127759337425232, + "step": 343699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.3646003007888794, + "learning_rate": 3.1581719190348745e-06, + "loss": 0.4211, + "step": 343700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.61315393447876, + "loss_rtd": 0.20061041414737701, + "loss_sent": 0.34774741530418396, + "loss_sod": 0.0024513767566531897, + "loss_total": 0.5508092045783997, + "step": 343799 + }, + { + "epoch": 0.017598, + "loss_gen": 6.274115562438965, + "loss_rtd": 0.2139493077993393, + "loss_sent": 0.423194944858551, + "loss_sod": 0.06745512783527374, + "loss_total": 0.7045993804931641, + "step": 343799 + }, + { + "epoch": 0.0176, + "grad_norm": 3.2399985790252686, + "learning_rate": 3.1470820931527146e-06, + "loss": 0.4171, + "step": 343800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.903570652008057, + "loss_rtd": 0.19082964956760406, + "loss_sent": 0.27847811579704285, + "loss_sod": 0.0468842014670372, + "loss_total": 0.5161919593811035, + "step": 343899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.536373138427734, + "loss_rtd": 0.18130025267601013, + "loss_sent": 0.025892140343785286, + "loss_sod": 0.14663097262382507, + "loss_total": 0.35382336378097534, + "step": 343899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.5265181064605713, + "learning_rate": 3.136011139620293e-06, + "loss": 0.4298, + "step": 343900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.869435787200928, + "loss_rtd": 0.23094557225704193, + "loss_sent": 0.19800937175750732, + "loss_sod": 0.03712037205696106, + "loss_total": 0.4660753011703491, + "step": 343999 + }, + { + "epoch": 0.017998, + "loss_gen": 6.170574188232422, + "loss_rtd": 0.21233905851840973, + "loss_sent": 0.16728414595127106, + "loss_sod": 0.05359148979187012, + "loss_total": 0.4332146644592285, + "step": 343999 + }, + { + "epoch": 0.018, + "grad_norm": 1.372086524963379, + "learning_rate": 3.1249590628969703e-06, + "loss": 0.4355, + "step": 344000 + }, + { + "epoch": 0.018, + "eval_loss": 0.3993147313594818, + "eval_runtime": 150.0789, + "eval_samples_per_second": 102.899, + "eval_steps_per_second": 0.806, + "step": 344000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.801217079162598, + "loss_rtd": 0.2368241250514984, + "loss_sent": 0.20728328824043274, + "loss_sod": 0.08091040700674057, + "loss_total": 0.5250178575515747, + "step": 344099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.689062595367432, + "loss_rtd": 0.23044942319393158, + "loss_sent": 0.11517254263162613, + "loss_sod": 0.029528971761465073, + "loss_total": 0.3751509189605713, + "step": 344099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.1511733531951904, + "learning_rate": 3.1139258674345307e-06, + "loss": 0.423, + "step": 344100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.80804443359375, + "loss_rtd": 0.23635703325271606, + "loss_sent": 0.3243516981601715, + "loss_sod": 0.04271318018436432, + "loss_total": 0.6034219264984131, + "step": 344199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.5195794105529785, + "loss_rtd": 0.21610403060913086, + "loss_sent": 0.19665342569351196, + "loss_sod": 0.008008423261344433, + "loss_total": 0.42076587677001953, + "step": 344199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.282293677330017, + "learning_rate": 3.102911557677152e-06, + "loss": 0.42, + "step": 344200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.815941333770752, + "loss_rtd": 0.19316354393959045, + "loss_sent": 0.20119412243366241, + "loss_sod": 0.10021115094423294, + "loss_total": 0.4945688247680664, + "step": 344299 + }, + { + "epoch": 0.018598, + "loss_gen": 6.013330459594727, + "loss_rtd": 0.21986351907253265, + "loss_sent": 0.32029974460601807, + "loss_sod": 0.01828307844698429, + "loss_total": 0.5584463477134705, + "step": 344299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.2562536001205444, + "learning_rate": 3.0919161380613793e-06, + "loss": 0.4158, + "step": 344300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.973540782928467, + "loss_rtd": 0.21433651447296143, + "loss_sent": 0.32896122336387634, + "loss_sod": 0.04544921591877937, + "loss_total": 0.588746964931488, + "step": 344399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.3708815574646, + "loss_rtd": 0.17824068665504456, + "loss_sent": 0.03908013179898262, + "loss_sod": 0.0200839601457119, + "loss_total": 0.23740477859973907, + "step": 344399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.985539197921753, + "learning_rate": 3.0809396130161817e-06, + "loss": 0.427, + "step": 344400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.236983299255371, + "loss_rtd": 0.16330642998218536, + "loss_sent": 0.017720935866236687, + "loss_sod": 0.04442419111728668, + "loss_total": 0.22545155882835388, + "step": 344499 + }, + { + "epoch": 0.018998, + "loss_gen": 6.019647121429443, + "loss_rtd": 0.2066533863544464, + "loss_sent": 0.3200598657131195, + "loss_sod": 0.07714848965406418, + "loss_total": 0.6038617491722107, + "step": 344499 + }, + { + "epoch": 0.019, + "grad_norm": 1.3927303552627563, + "learning_rate": 3.0699819869628943e-06, + "loss": 0.4076, + "step": 344500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.909665584564209, + "loss_rtd": 0.2125091552734375, + "loss_sent": 0.10321272164583206, + "loss_sod": 0.07435595244169235, + "loss_total": 0.3900778293609619, + "step": 344599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.874019622802734, + "loss_rtd": 0.21441978216171265, + "loss_sent": 0.2573813199996948, + "loss_sod": 0.016887767240405083, + "loss_total": 0.4886888861656189, + "step": 344599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.1322945356369019, + "learning_rate": 3.059043264315259e-06, + "loss": 0.4258, + "step": 344600 + }, + { + "epoch": 0.019398, + "loss_gen": 6.0817437171936035, + "loss_rtd": 0.2001039981842041, + "loss_sent": 0.14977571368217468, + "loss_sod": 0.03723203018307686, + "loss_total": 0.38711172342300415, + "step": 344699 + }, + { + "epoch": 0.019398, + "loss_gen": 6.110212802886963, + "loss_rtd": 0.21464529633522034, + "loss_sent": 0.11174482852220535, + "loss_sod": 0.05072154104709625, + "loss_total": 0.37711167335510254, + "step": 344699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.9789242744445801, + "learning_rate": 3.0481234494793786e-06, + "loss": 0.3965, + "step": 344700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.1890058517456055, + "loss_rtd": 0.17944465577602386, + "loss_sent": 2.522169961594045e-05, + "loss_sod": 0.069935142993927, + "loss_total": 0.24940502643585205, + "step": 344799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.619328498840332, + "loss_rtd": 0.18111827969551086, + "loss_sent": 0.0649733766913414, + "loss_sod": 0.006838769651949406, + "loss_total": 0.2529304325580597, + "step": 344799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.6171516180038452, + "learning_rate": 3.0372225468537518e-06, + "loss": 0.389, + "step": 344800 + }, + { + "epoch": 0.019798, + "loss_gen": 6.050124168395996, + "loss_rtd": 0.22084671258926392, + "loss_sent": 0.23997093737125397, + "loss_sod": 0.016547055914998055, + "loss_total": 0.477364718914032, + "step": 344899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.950810432434082, + "loss_rtd": 0.20542003214359283, + "loss_sent": 0.1829788237810135, + "loss_sod": 0.017055897042155266, + "loss_total": 0.40545475482940674, + "step": 344899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.7295876741409302, + "learning_rate": 3.026340560829272e-06, + "loss": 0.4319, + "step": 344900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.952529430389404, + "loss_rtd": 0.22140717506408691, + "loss_sent": 0.1345573514699936, + "loss_sod": 0.05161639675498009, + "loss_total": 0.4075809121131897, + "step": 344999 + }, + { + "epoch": 0.019998, + "loss_gen": 6.1175336837768555, + "loss_rtd": 0.20115554332733154, + "loss_sent": 0.14179235696792603, + "loss_sod": 0.06289678812026978, + "loss_total": 0.40584468841552734, + "step": 344999 + }, + { + "epoch": 0.02, + "grad_norm": 1.2358646392822266, + "learning_rate": 3.015477495789204e-06, + "loss": 0.4123, + "step": 345000 + }, + { + "epoch": 0.02, + "eval_loss": 0.3962290287017822, + "eval_runtime": 151.7704, + "eval_samples_per_second": 101.752, + "eval_steps_per_second": 0.797, + "step": 345000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.8342156410217285, + "loss_rtd": 0.2033233940601349, + "loss_sent": 0.17222349345684052, + "loss_sod": 0.08103340864181519, + "loss_total": 0.4565802812576294, + "step": 345099 + }, + { + "epoch": 0.020198, + "loss_gen": 6.0856852531433105, + "loss_rtd": 0.2134939283132553, + "loss_sent": 0.10274343192577362, + "loss_sod": 0.14162708818912506, + "loss_total": 0.4578644633293152, + "step": 345099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.2765001058578491, + "learning_rate": 3.004633356109171e-06, + "loss": 0.4355, + "step": 345100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.485940456390381, + "loss_rtd": 0.17827960848808289, + "loss_sent": 0.012701097875833511, + "loss_sod": 0.04595581442117691, + "loss_total": 0.2369365096092224, + "step": 345199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.916727066040039, + "loss_rtd": 0.21812865138053894, + "loss_sent": 0.366161584854126, + "loss_sod": 0.05725693702697754, + "loss_total": 0.6415472030639648, + "step": 345199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.2278943061828613, + "learning_rate": 2.993808146157201e-06, + "loss": 0.4141, + "step": 345200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.699069976806641, + "loss_rtd": 0.1663743555545807, + "loss_sent": 0.06079315394163132, + "loss_sod": 0.039484769105911255, + "loss_total": 0.26665228605270386, + "step": 345299 + }, + { + "epoch": 0.020598, + "loss_gen": 6.088677883148193, + "loss_rtd": 0.19248007237911224, + "loss_sent": 0.3041754961013794, + "loss_sod": 0.08131252974271774, + "loss_total": 0.5779681205749512, + "step": 345299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.2986756563186646, + "learning_rate": 2.9830018702936946e-06, + "loss": 0.4206, + "step": 345300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.754996299743652, + "loss_rtd": 0.22188951075077057, + "loss_sent": 0.13857239484786987, + "loss_sod": 0.02905525080859661, + "loss_total": 0.3895171582698822, + "step": 345399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.5441813468933105, + "loss_rtd": 0.22319650650024414, + "loss_sent": 0.10199989378452301, + "loss_sod": 0.017508793622255325, + "loss_total": 0.3427051901817322, + "step": 345399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.043805480003357, + "learning_rate": 2.9722145328714147e-06, + "loss": 0.3906, + "step": 345400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.86095666885376, + "loss_rtd": 0.20533639192581177, + "loss_sent": 0.3244718611240387, + "loss_sod": 0.0008062995038926601, + "loss_total": 0.5306145548820496, + "step": 345499 + }, + { + "epoch": 0.020998, + "loss_gen": 6.024995803833008, + "loss_rtd": 0.19126074016094208, + "loss_sent": 0.1506563425064087, + "loss_sod": 0.04864851012825966, + "loss_total": 0.39056557416915894, + "step": 345499 + }, + { + "epoch": 0.021, + "grad_norm": 1.12796151638031, + "learning_rate": 2.961446138235491e-06, + "loss": 0.4078, + "step": 345500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.854345321655273, + "loss_rtd": 0.1963692456483841, + "loss_sent": 0.35575902462005615, + "loss_sod": 0.013423793949186802, + "loss_total": 0.5655520558357239, + "step": 345599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.773720741271973, + "loss_rtd": 0.2145538032054901, + "loss_sent": 0.12099415808916092, + "loss_sod": 0.03502482548356056, + "loss_total": 0.3705727756023407, + "step": 345599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.850409209728241, + "learning_rate": 2.950696690723437e-06, + "loss": 0.4429, + "step": 345600 + }, + { + "epoch": 0.021398, + "loss_gen": 6.223632335662842, + "loss_rtd": 0.21802760660648346, + "loss_sent": 0.28415265679359436, + "loss_sod": 0.02522401139140129, + "loss_total": 0.5274043083190918, + "step": 345699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.781085014343262, + "loss_rtd": 0.2077065408229828, + "loss_sent": 0.15649068355560303, + "loss_sod": 0.05073114484548569, + "loss_total": 0.4149283766746521, + "step": 345699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.5991970300674438, + "learning_rate": 2.939966194665139e-06, + "loss": 0.4119, + "step": 345700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.400541305541992, + "loss_rtd": 0.15435712039470673, + "loss_sent": 0.0008989697089418769, + "loss_sod": 0.17497660219669342, + "loss_total": 0.3302326798439026, + "step": 345799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.4629316329956055, + "loss_rtd": 0.17461073398590088, + "loss_sent": 0.00023651798255741596, + "loss_sod": 0.053559303283691406, + "loss_total": 0.22840654850006104, + "step": 345799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.1368045806884766, + "learning_rate": 2.929254654382818e-06, + "loss": 0.3953, + "step": 345800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.579807281494141, + "loss_rtd": 0.1817014068365097, + "loss_sent": 0.03756831958889961, + "loss_sod": 0.028328150510787964, + "loss_total": 0.24759787321090698, + "step": 345899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.854094505310059, + "loss_rtd": 0.20325829088687897, + "loss_sent": 0.22642068564891815, + "loss_sod": 0.11384735256433487, + "loss_total": 0.5435263514518738, + "step": 345899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.1834964752197266, + "learning_rate": 2.9185620741911e-06, + "loss": 0.4, + "step": 345900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.6838178634643555, + "loss_rtd": 0.2041275054216385, + "loss_sent": 0.45005539059638977, + "loss_sod": 0.013449644669890404, + "loss_total": 0.667632520198822, + "step": 345999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.546184539794922, + "loss_rtd": 0.20533013343811035, + "loss_sent": 0.13110969960689545, + "loss_sod": 0.013098989613354206, + "loss_total": 0.34953880310058594, + "step": 345999 + }, + { + "epoch": 0.022, + "grad_norm": 2.9552416801452637, + "learning_rate": 2.907888458396946e-06, + "loss": 0.4146, + "step": 346000 + }, + { + "epoch": 0.022, + "eval_loss": 0.394890695810318, + "eval_runtime": 150.3967, + "eval_samples_per_second": 102.682, + "eval_steps_per_second": 0.805, + "step": 346000 + }, + { + "epoch": 0.022198, + "loss_gen": 6.177165508270264, + "loss_rtd": 0.20960292220115662, + "loss_sent": 0.22950461506843567, + "loss_sod": 0.015612190589308739, + "loss_total": 0.4547197222709656, + "step": 346099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.956109046936035, + "loss_rtd": 0.18064485490322113, + "loss_sent": 0.06233496963977814, + "loss_sod": 0.015554279088973999, + "loss_total": 0.25853410363197327, + "step": 346099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.1283988952636719, + "learning_rate": 2.8972338112996933e-06, + "loss": 0.4164, + "step": 346100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.784541130065918, + "loss_rtd": 0.19813288748264313, + "loss_sent": 0.2804802358150482, + "loss_sod": 0.10672664642333984, + "loss_total": 0.5853397846221924, + "step": 346199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.828537940979004, + "loss_rtd": 0.1991063356399536, + "loss_sent": 0.13255983591079712, + "loss_sod": 0.02511822059750557, + "loss_total": 0.3567844033241272, + "step": 346199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.2302149534225464, + "learning_rate": 2.886598137191021e-06, + "loss": 0.4232, + "step": 346200 + }, + { + "epoch": 0.022598, + "loss_gen": 6.1805195808410645, + "loss_rtd": 0.1977512389421463, + "loss_sent": 0.053855400532484055, + "loss_sod": 0.10048055648803711, + "loss_total": 0.3520871698856354, + "step": 346299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.240399360656738, + "loss_rtd": 0.1770283430814743, + "loss_sent": 0.00022501042985823005, + "loss_sod": 0.07942786812782288, + "loss_total": 0.2566812038421631, + "step": 346299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.9154486656188965, + "learning_rate": 2.8759814403549857e-06, + "loss": 0.418, + "step": 346300 + }, + { + "epoch": 0.022798, + "loss_gen": 6.037413120269775, + "loss_rtd": 0.2178463488817215, + "loss_sent": 0.08771871775388718, + "loss_sod": 0.10883640497922897, + "loss_total": 0.41440147161483765, + "step": 346399 + }, + { + "epoch": 0.022798, + "loss_gen": 6.080913066864014, + "loss_rtd": 0.20475168526172638, + "loss_sent": 0.10265511274337769, + "loss_sod": 0.05653534084558487, + "loss_total": 0.36394214630126953, + "step": 346399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.0961288213729858, + "learning_rate": 2.8653837250679992e-06, + "loss": 0.4216, + "step": 346400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.868289470672607, + "loss_rtd": 0.18100692331790924, + "loss_sent": 0.22061115503311157, + "loss_sod": 0.008930054493248463, + "loss_total": 0.4105481505393982, + "step": 346499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.729867458343506, + "loss_rtd": 0.22023260593414307, + "loss_sent": 0.17562322318553925, + "loss_sod": 0.0014352818252518773, + "loss_total": 0.3972911238670349, + "step": 346499 + }, + { + "epoch": 0.023, + "grad_norm": 0.720500648021698, + "learning_rate": 2.8548049955988034e-06, + "loss": 0.4191, + "step": 346500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.30037260055542, + "loss_rtd": 0.1821168065071106, + "loss_sent": 0.018517278134822845, + "loss_sod": 0.06972560286521912, + "loss_total": 0.27035969495773315, + "step": 346599 + }, + { + "epoch": 0.023198, + "loss_gen": 6.110710144042969, + "loss_rtd": 0.21628817915916443, + "loss_sent": 0.0536821223795414, + "loss_sod": 0.012368336319923401, + "loss_total": 0.2823386490345001, + "step": 346599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.7004936933517456, + "learning_rate": 2.8442452562085277e-06, + "loss": 0.4147, + "step": 346600 + }, + { + "epoch": 0.023398, + "loss_gen": 6.185731887817383, + "loss_rtd": 0.2094772458076477, + "loss_sent": 0.33429858088493347, + "loss_sod": 0.0627126544713974, + "loss_total": 0.6064884662628174, + "step": 346699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.375607967376709, + "loss_rtd": 0.17778867483139038, + "loss_sent": 2.4312806999660097e-05, + "loss_sod": 0.14853551983833313, + "loss_total": 0.32634851336479187, + "step": 346699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.469561219215393, + "learning_rate": 2.8337045111506143e-06, + "loss": 0.4143, + "step": 346700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.915907859802246, + "loss_rtd": 0.213593989610672, + "loss_sent": 0.1473378986120224, + "loss_sod": 0.004312505479902029, + "loss_total": 0.36524438858032227, + "step": 346799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.875881671905518, + "loss_rtd": 0.19377167522907257, + "loss_sent": 0.03378697484731674, + "loss_sod": 0.0711243748664856, + "loss_total": 0.2986830174922943, + "step": 346799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.2263453006744385, + "learning_rate": 2.823182764670884e-06, + "loss": 0.415, + "step": 346800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.8458099365234375, + "loss_rtd": 0.21698597073554993, + "loss_sent": 0.12918789684772491, + "loss_sod": 0.046139199286699295, + "loss_total": 0.39231306314468384, + "step": 346899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.901025772094727, + "loss_rtd": 0.22223611176013947, + "loss_sent": 0.43904101848602295, + "loss_sod": 0.07018986344337463, + "loss_total": 0.7314670085906982, + "step": 346899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.2484575510025024, + "learning_rate": 2.812680021007491e-06, + "loss": 0.4213, + "step": 346900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.7027058601379395, + "loss_rtd": 0.17953623831272125, + "loss_sent": 0.10065238922834396, + "loss_sod": 0.06401006132364273, + "loss_total": 0.34419870376586914, + "step": 346999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.867369174957275, + "loss_rtd": 0.19031651318073273, + "loss_sent": 0.13639628887176514, + "loss_sod": 0.061066266149282455, + "loss_total": 0.3877790570259094, + "step": 346999 + }, + { + "epoch": 0.024, + "grad_norm": 1.2584656476974487, + "learning_rate": 2.802196284390951e-06, + "loss": 0.3971, + "step": 347000 + }, + { + "epoch": 0.024, + "eval_loss": 0.40038397908210754, + "eval_runtime": 150.6489, + "eval_samples_per_second": 102.51, + "eval_steps_per_second": 0.803, + "step": 347000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.800816059112549, + "loss_rtd": 0.19879066944122314, + "loss_sent": 0.19041036069393158, + "loss_sod": 0.038729384541511536, + "loss_total": 0.42793041467666626, + "step": 347099 + }, + { + "epoch": 0.024198, + "loss_gen": 6.2220778465271, + "loss_rtd": 0.21360954642295837, + "loss_sent": 0.12894247472286224, + "loss_sod": 0.03946185111999512, + "loss_total": 0.38201385736465454, + "step": 347099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.9547300934791565, + "learning_rate": 2.7917315590440975e-06, + "loss": 0.4114, + "step": 347100 + }, + { + "epoch": 0.024398, + "loss_gen": 6.054231643676758, + "loss_rtd": 0.22664403915405273, + "loss_sent": 0.33643460273742676, + "loss_sod": 0.013595287688076496, + "loss_total": 0.5766739249229431, + "step": 347199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.922176837921143, + "loss_rtd": 0.19353726506233215, + "loss_sent": 0.14798879623413086, + "loss_sod": 0.011312786489725113, + "loss_total": 0.3528388440608978, + "step": 347199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.0335471630096436, + "learning_rate": 2.7812858491821305e-06, + "loss": 0.4203, + "step": 347200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.435997009277344, + "loss_rtd": 0.16638603806495667, + "loss_sent": 0.041973795741796494, + "loss_sod": 0.014071225188672543, + "loss_total": 0.22243106365203857, + "step": 347299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.869622230529785, + "loss_rtd": 0.2162979245185852, + "loss_sent": 0.17184807360172272, + "loss_sod": 0.0443805567920208, + "loss_total": 0.43252652883529663, + "step": 347299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.9300585985183716, + "learning_rate": 2.7708591590125786e-06, + "loss": 0.4075, + "step": 347300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.8696818351745605, + "loss_rtd": 0.19919665157794952, + "loss_sent": 0.11572212725877762, + "loss_sod": 0.037589769810438156, + "loss_total": 0.352508544921875, + "step": 347399 + }, + { + "epoch": 0.024798, + "loss_gen": 6.15183162689209, + "loss_rtd": 0.2178945243358612, + "loss_sent": 0.16867373883724213, + "loss_sod": 0.0836891382932663, + "loss_total": 0.47025740146636963, + "step": 347399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.7121327519416809, + "learning_rate": 2.7604514927353e-06, + "loss": 0.391, + "step": 347400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.5167412757873535, + "loss_rtd": 0.21066704392433167, + "loss_sent": 0.05076691880822182, + "loss_sod": 0.02963225170969963, + "loss_total": 0.2910662293434143, + "step": 347499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.308770656585693, + "loss_rtd": 0.1771867871284485, + "loss_sent": 2.2852533220429905e-05, + "loss_sod": 0.1340395212173462, + "loss_total": 0.31124916672706604, + "step": 347499 + }, + { + "epoch": 0.025, + "grad_norm": 1.2258532047271729, + "learning_rate": 2.7500628545425177e-06, + "loss": 0.4178, + "step": 347500 + }, + { + "epoch": 0.025198, + "loss_gen": 6.061978816986084, + "loss_rtd": 0.21013672649860382, + "loss_sent": 0.22456708550453186, + "loss_sod": 0.07185360789299011, + "loss_total": 0.5065574645996094, + "step": 347599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.73763370513916, + "loss_rtd": 0.19905613362789154, + "loss_sent": 2.395332558080554e-05, + "loss_sod": 0.0841599628329277, + "loss_total": 0.28324005007743835, + "step": 347599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.3880962133407593, + "learning_rate": 2.7396932486187634e-06, + "loss": 0.4199, + "step": 347600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.509828567504883, + "loss_rtd": 0.1791846752166748, + "loss_sent": 2.3359158149105497e-05, + "loss_sod": 0.05962657183408737, + "loss_total": 0.23883461952209473, + "step": 347699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.709102630615234, + "loss_rtd": 0.18945716321468353, + "loss_sent": 0.09810947626829147, + "loss_sod": 0.01001054234802723, + "loss_total": 0.2975771725177765, + "step": 347699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.7576766014099121, + "learning_rate": 2.7293426791409228e-06, + "loss": 0.4236, + "step": 347700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.762884616851807, + "loss_rtd": 0.20979386568069458, + "loss_sent": 0.08806712180376053, + "loss_sod": 0.05111350864171982, + "loss_total": 0.3489744961261749, + "step": 347799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.711957931518555, + "loss_rtd": 0.19799841940402985, + "loss_sent": 0.07584696263074875, + "loss_sod": 0.07980622351169586, + "loss_total": 0.35365161299705505, + "step": 347799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.0089797973632812, + "learning_rate": 2.719011150278189e-06, + "loss": 0.4078, + "step": 347800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.955827713012695, + "loss_rtd": 0.20267730951309204, + "loss_sent": 0.20192524790763855, + "loss_sod": 0.06518060714006424, + "loss_total": 0.46978315711021423, + "step": 347899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.790525913238525, + "loss_rtd": 0.18522995710372925, + "loss_sent": 0.4387192726135254, + "loss_sod": 0.07361598312854767, + "loss_total": 0.6975651979446411, + "step": 347899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.8381214141845703, + "learning_rate": 2.7086986661921164e-06, + "loss": 0.4022, + "step": 347900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.7063164710998535, + "loss_rtd": 0.20931647717952728, + "loss_sent": 0.5352754592895508, + "loss_sod": 0.0051032924093306065, + "loss_total": 0.7496952414512634, + "step": 347999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.9625701904296875, + "loss_rtd": 0.2113124579191208, + "loss_sent": 0.09579739719629288, + "loss_sod": 0.0391378290951252, + "loss_total": 0.34624767303466797, + "step": 347999 + }, + { + "epoch": 0.026, + "grad_norm": 1.46800696849823, + "learning_rate": 2.6984052310365604e-06, + "loss": 0.4039, + "step": 348000 + }, + { + "epoch": 0.026, + "eval_loss": 0.3929156959056854, + "eval_runtime": 150.1285, + "eval_samples_per_second": 102.865, + "eval_steps_per_second": 0.806, + "step": 348000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.764568328857422, + "loss_rtd": 0.21054331958293915, + "loss_sent": 0.15424807369709015, + "loss_sod": 0.05876941233873367, + "loss_total": 0.4235607981681824, + "step": 348099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.832766532897949, + "loss_rtd": 0.22070473432540894, + "loss_sent": 0.26257625222206116, + "loss_sod": 0.001788466819562018, + "loss_total": 0.4850694537162781, + "step": 348099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9740235805511475, + "learning_rate": 2.688130848957726e-06, + "loss": 0.4014, + "step": 348100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.914451599121094, + "loss_rtd": 0.22258107364177704, + "loss_sent": 0.547576367855072, + "loss_sod": 0.03981813043355942, + "loss_total": 0.8099755644798279, + "step": 348199 + }, + { + "epoch": 0.026398, + "loss_gen": 6.155699253082275, + "loss_rtd": 0.23639720678329468, + "loss_sent": 0.14518557488918304, + "loss_sod": 0.11732011288404465, + "loss_total": 0.49890291690826416, + "step": 348199 + }, + { + "epoch": 0.0264, + "grad_norm": 2.6284737586975098, + "learning_rate": 2.6778755240941256e-06, + "loss": 0.4104, + "step": 348200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.842240333557129, + "loss_rtd": 0.20628581941127777, + "loss_sent": 0.024945596233010292, + "loss_sod": 0.07663668692111969, + "loss_total": 0.307868093252182, + "step": 348299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.494091033935547, + "loss_rtd": 0.16447770595550537, + "loss_sent": 2.5635152269387618e-05, + "loss_sod": 0.15456324815750122, + "loss_total": 0.31906658411026, + "step": 348299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.1853891611099243, + "learning_rate": 2.6676392605766043e-06, + "loss": 0.4373, + "step": 348300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.845241069793701, + "loss_rtd": 0.20244090259075165, + "loss_sent": 0.3507749140262604, + "loss_sod": 0.01659446582198143, + "loss_total": 0.5698102712631226, + "step": 348399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.945862293243408, + "loss_rtd": 0.20262795686721802, + "loss_sent": 0.23626303672790527, + "loss_sod": 0.03078293427824974, + "loss_total": 0.46967393159866333, + "step": 348399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.5612776279449463, + "learning_rate": 2.6574220625283253e-06, + "loss": 0.3915, + "step": 348400 + }, + { + "epoch": 0.026998, + "loss_gen": 6.285218715667725, + "loss_rtd": 0.22945177555084229, + "loss_sent": 0.058136165142059326, + "loss_sod": 0.09110814332962036, + "loss_total": 0.378696084022522, + "step": 348499 + }, + { + "epoch": 0.026998, + "loss_gen": 6.327558994293213, + "loss_rtd": 0.20909154415130615, + "loss_sent": 0.12776273488998413, + "loss_sod": 0.0658029168844223, + "loss_total": 0.4026572108268738, + "step": 348499 + }, + { + "epoch": 0.027, + "grad_norm": 1.0374902486801147, + "learning_rate": 2.647223934064791e-06, + "loss": 0.4079, + "step": 348500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.985374450683594, + "loss_rtd": 0.224440336227417, + "loss_sent": 0.20375190675258636, + "loss_sod": 0.13233348727226257, + "loss_total": 0.5605257153511047, + "step": 348599 + }, + { + "epoch": 0.027198, + "loss_gen": 6.076707363128662, + "loss_rtd": 0.2122603803873062, + "loss_sent": 0.17057929933071136, + "loss_sod": 0.1456124633550644, + "loss_total": 0.5284521579742432, + "step": 348599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.492030143737793, + "learning_rate": 2.6370448792937817e-06, + "loss": 0.429, + "step": 348600 + }, + { + "epoch": 0.027398, + "loss_gen": 6.192418098449707, + "loss_rtd": 0.2122466266155243, + "loss_sent": 0.03144949674606323, + "loss_sod": 0.05586479976773262, + "loss_total": 0.29956093430519104, + "step": 348699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.737111568450928, + "loss_rtd": 0.22504131495952606, + "loss_sent": 0.16083656251430511, + "loss_sod": 0.0524526983499527, + "loss_total": 0.43833059072494507, + "step": 348699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.9526489973068237, + "learning_rate": 2.6268849023154294e-06, + "loss": 0.4264, + "step": 348700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.714515686035156, + "loss_rtd": 0.215691477060318, + "loss_sent": 0.21480238437652588, + "loss_sod": 0.017601560801267624, + "loss_total": 0.448095440864563, + "step": 348799 + }, + { + "epoch": 0.027598, + "loss_gen": 6.153249263763428, + "loss_rtd": 0.20012231171131134, + "loss_sent": 0.21374326944351196, + "loss_sod": 0.12404444068670273, + "loss_total": 0.537909984588623, + "step": 348799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.3342827558517456, + "learning_rate": 2.6167440072221826e-06, + "loss": 0.4333, + "step": 348800 + }, + { + "epoch": 0.027798, + "loss_gen": 6.138761043548584, + "loss_rtd": 0.20648187398910522, + "loss_sent": 0.20477847754955292, + "loss_sod": 0.019451338797807693, + "loss_total": 0.43071168661117554, + "step": 348899 + }, + { + "epoch": 0.027798, + "loss_gen": 6.130977630615234, + "loss_rtd": 0.2113247811794281, + "loss_sent": 0.20160768926143646, + "loss_sod": 0.04805793613195419, + "loss_total": 0.46099042892456055, + "step": 348899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.7857123613357544, + "learning_rate": 2.606622198098774e-06, + "loss": 0.3957, + "step": 348900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.957788467407227, + "loss_rtd": 0.21118159592151642, + "loss_sent": 0.10581184178590775, + "loss_sod": 0.11430491507053375, + "loss_total": 0.4312983453273773, + "step": 348999 + }, + { + "epoch": 0.027998, + "loss_gen": 6.248266696929932, + "loss_rtd": 0.21391551196575165, + "loss_sent": 0.1562371701002121, + "loss_sod": 0.04114203900098801, + "loss_total": 0.41129469871520996, + "step": 348999 + }, + { + "epoch": 0.028, + "grad_norm": 0.9734150171279907, + "learning_rate": 2.59651947902228e-06, + "loss": 0.4003, + "step": 349000 + }, + { + "epoch": 0.028, + "eval_loss": 0.3954238295555115, + "eval_runtime": 150.5644, + "eval_samples_per_second": 102.567, + "eval_steps_per_second": 0.804, + "step": 349000 + }, + { + "epoch": 0.028198, + "loss_gen": 6.046821594238281, + "loss_rtd": 0.21712510287761688, + "loss_sent": 0.1850036084651947, + "loss_sod": 0.05440608412027359, + "loss_total": 0.45653480291366577, + "step": 349099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.506927013397217, + "loss_rtd": 0.1919369101524353, + "loss_sent": 0.006589873693883419, + "loss_sod": 0.13269713521003723, + "loss_total": 0.33122390508651733, + "step": 349099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.9546361565589905, + "learning_rate": 2.586435854062069e-06, + "loss": 0.4144, + "step": 349100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.589245796203613, + "loss_rtd": 0.17651744186878204, + "loss_sent": 2.693514215934556e-05, + "loss_sod": 0.2369680106639862, + "loss_total": 0.4135124087333679, + "step": 349199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.500578880310059, + "loss_rtd": 0.15838541090488434, + "loss_sent": 0.015942055732011795, + "loss_sod": 0.0537986196577549, + "loss_total": 0.22812607884407043, + "step": 349199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.1543065309524536, + "learning_rate": 2.5763713272798363e-06, + "loss": 0.4299, + "step": 349200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.8040385246276855, + "loss_rtd": 0.20990204811096191, + "loss_sent": 0.22593101859092712, + "loss_sod": 0.02127135917544365, + "loss_total": 0.4571044445037842, + "step": 349299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.805685520172119, + "loss_rtd": 0.2091977447271347, + "loss_sent": 0.13367465138435364, + "loss_sod": 0.19740842282772064, + "loss_total": 0.540280818939209, + "step": 349299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.1336307525634766, + "learning_rate": 2.5663259027295495e-06, + "loss": 0.426, + "step": 349300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.842103004455566, + "loss_rtd": 0.21626141667366028, + "loss_sent": 0.2969825565814972, + "loss_sod": 0.09903450310230255, + "loss_total": 0.6122784614562988, + "step": 349399 + }, + { + "epoch": 0.028798, + "loss_gen": 6.108442783355713, + "loss_rtd": 0.21364839375019073, + "loss_sent": 0.380411833524704, + "loss_sod": 0.10154962539672852, + "loss_total": 0.6956098079681396, + "step": 349399 + }, + { + "epoch": 0.0288, + "grad_norm": 2.2528610229492188, + "learning_rate": 2.5562995844575165e-06, + "loss": 0.4325, + "step": 349400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.953249931335449, + "loss_rtd": 0.22787602245807648, + "loss_sent": 0.47932255268096924, + "loss_sod": 0.010822047479450703, + "loss_total": 0.7180206179618835, + "step": 349499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.906355381011963, + "loss_rtd": 0.24051526188850403, + "loss_sent": 0.18612606823444366, + "loss_sod": 0.02720388025045395, + "loss_total": 0.45384520292282104, + "step": 349499 + }, + { + "epoch": 0.029, + "grad_norm": 1.124613881111145, + "learning_rate": 2.5462923765023405e-06, + "loss": 0.4311, + "step": 349500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.871358871459961, + "loss_rtd": 0.21468381583690643, + "loss_sent": 0.23703357577323914, + "loss_sod": 0.01827329769730568, + "loss_total": 0.46999067068099976, + "step": 349599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.956061840057373, + "loss_rtd": 0.2228805273771286, + "loss_sent": 0.22619858384132385, + "loss_sod": 0.03803536295890808, + "loss_total": 0.48711445927619934, + "step": 349599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.0738983154296875, + "learning_rate": 2.5363042828949244e-06, + "loss": 0.4307, + "step": 349600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.824796676635742, + "loss_rtd": 0.19950264692306519, + "loss_sent": 0.1863393485546112, + "loss_sod": 0.030530648306012154, + "loss_total": 0.4163726568222046, + "step": 349699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.915545463562012, + "loss_rtd": 0.19600270688533783, + "loss_sent": 0.21444423496723175, + "loss_sod": 0.049449022859334946, + "loss_total": 0.4598959684371948, + "step": 349699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.9910133481025696, + "learning_rate": 2.5263353076584675e-06, + "loss": 0.4212, + "step": 349700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.945225715637207, + "loss_rtd": 0.20018666982650757, + "loss_sent": 0.30984848737716675, + "loss_sod": 0.01725754328072071, + "loss_total": 0.5272927284240723, + "step": 349799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.607385635375977, + "loss_rtd": 0.19031044840812683, + "loss_sent": 0.00022725979215465486, + "loss_sod": 0.09896920621395111, + "loss_total": 0.2895069122314453, + "step": 349799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.5268876552581787, + "learning_rate": 2.516385454808462e-06, + "loss": 0.4151, + "step": 349800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.659405708312988, + "loss_rtd": 0.22153975069522858, + "loss_sent": 0.23582425713539124, + "loss_sod": 0.020174628123641014, + "loss_total": 0.4775386452674866, + "step": 349899 + }, + { + "epoch": 0.029798, + "loss_gen": 6.096982002258301, + "loss_rtd": 0.22046737372875214, + "loss_sent": 0.10626024752855301, + "loss_sod": 0.023365899920463562, + "loss_total": 0.3500935137271881, + "step": 349899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.1230626106262207, + "learning_rate": 2.5064547283527195e-06, + "loss": 0.4278, + "step": 349900 + }, + { + "epoch": 0.029998, + "loss_gen": 6.255684852600098, + "loss_rtd": 0.21020130813121796, + "loss_sent": 0.101527139544487, + "loss_sod": 0.10524416714906693, + "loss_total": 0.41697263717651367, + "step": 349999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.902994632720947, + "loss_rtd": 0.21985577046871185, + "loss_sent": 0.16969870030879974, + "loss_sod": 0.01671951822936535, + "loss_total": 0.4062739908695221, + "step": 349999 + }, + { + "epoch": 0.03, + "grad_norm": 0.980015218257904, + "learning_rate": 2.4965431322913403e-06, + "loss": 0.4131, + "step": 350000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3928295075893402, + "eval_runtime": 150.3192, + "eval_samples_per_second": 102.735, + "eval_steps_per_second": 0.805, + "step": 350000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.98183536529541, + "loss_rtd": 0.2173718810081482, + "loss_sent": 0.25578585267066956, + "loss_sod": 0.032859232276678085, + "loss_total": 0.5060169696807861, + "step": 350099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.7258782386779785, + "loss_rtd": 0.2088528573513031, + "loss_sent": 0.21010349690914154, + "loss_sod": 0.026808001101017, + "loss_total": 0.44576436281204224, + "step": 350099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.282133936882019, + "learning_rate": 2.4866506706167025e-06, + "loss": 0.4112, + "step": 350100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.903136730194092, + "loss_rtd": 0.20793114602565765, + "loss_sent": 0.3298494219779968, + "loss_sod": 0.019741952419281006, + "loss_total": 0.5575225353240967, + "step": 350199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.937349796295166, + "loss_rtd": 0.21580328047275543, + "loss_sent": 0.04803667217493057, + "loss_sod": 0.032336391508579254, + "loss_total": 0.29617634415626526, + "step": 350199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.0936003923416138, + "learning_rate": 2.4767773473134914e-06, + "loss": 0.4243, + "step": 350200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.933935642242432, + "loss_rtd": 0.23456254601478577, + "loss_sent": 0.2062845677137375, + "loss_sod": 0.09549696743488312, + "loss_total": 0.5363441109657288, + "step": 350299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.8825812339782715, + "loss_rtd": 0.21979853510856628, + "loss_sent": 0.15967939794063568, + "loss_sod": 0.03874390572309494, + "loss_total": 0.4182218313217163, + "step": 350299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.3687313795089722, + "learning_rate": 2.4669231663586867e-06, + "loss": 0.4157, + "step": 350300 + }, + { + "epoch": 0.030798, + "loss_gen": 6.029076099395752, + "loss_rtd": 0.22488151490688324, + "loss_sent": 0.26323971152305603, + "loss_sod": 0.010428966954350471, + "loss_total": 0.4985501766204834, + "step": 350399 + }, + { + "epoch": 0.030798, + "loss_gen": 6.057694911956787, + "loss_rtd": 0.20339101552963257, + "loss_sent": 0.28822627663612366, + "loss_sod": 0.10470068454742432, + "loss_total": 0.5963180065155029, + "step": 350399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.9787907600402832, + "learning_rate": 2.457088131721541e-06, + "loss": 0.435, + "step": 350400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.268889427185059, + "loss_rtd": 0.1550845503807068, + "loss_sent": 0.07245110720396042, + "loss_sod": 0.0402236171066761, + "loss_total": 0.2677592635154724, + "step": 350499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.932065486907959, + "loss_rtd": 0.20996759831905365, + "loss_sent": 0.5353861451148987, + "loss_sod": 0.04811495542526245, + "loss_total": 0.793468713760376, + "step": 350499 + }, + { + "epoch": 0.031, + "grad_norm": 1.6151803731918335, + "learning_rate": 2.447272247363608e-06, + "loss": 0.4297, + "step": 350500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.960935115814209, + "loss_rtd": 0.19832560420036316, + "loss_sent": 0.32032203674316406, + "loss_sod": 0.003909667953848839, + "loss_total": 0.5225573182106018, + "step": 350599 + }, + { + "epoch": 0.031198, + "loss_gen": 6.08972692489624, + "loss_rtd": 0.20870810747146606, + "loss_sent": 0.3987029194831848, + "loss_sod": 0.06763564050197601, + "loss_total": 0.6750466823577881, + "step": 350599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.2335033416748047, + "learning_rate": 2.43747551723873e-06, + "loss": 0.4207, + "step": 350600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.954829692840576, + "loss_rtd": 0.21568158268928528, + "loss_sent": 0.1402682065963745, + "loss_sod": 0.005869862157851458, + "loss_total": 0.36181965470314026, + "step": 350699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.8494367599487305, + "loss_rtd": 0.2147720456123352, + "loss_sent": 0.1354246884584427, + "loss_sod": 0.018866462633013725, + "loss_total": 0.36906319856643677, + "step": 350699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.8288174867630005, + "learning_rate": 2.4276979452930282e-06, + "loss": 0.4016, + "step": 350700 + }, + { + "epoch": 0.031598, + "loss_gen": 6.417673587799072, + "loss_rtd": 0.2109319418668747, + "loss_sent": 0.12982873618602753, + "loss_sod": 0.03536287695169449, + "loss_total": 0.3761235475540161, + "step": 350799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.627594947814941, + "loss_rtd": 0.1990581750869751, + "loss_sent": 0.17239323258399963, + "loss_sod": 0.016169602051377296, + "loss_total": 0.3876210153102875, + "step": 350799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.8868272304534912, + "learning_rate": 2.4179395354649026e-06, + "loss": 0.4166, + "step": 350800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.561457633972168, + "loss_rtd": 0.1908586621284485, + "loss_sent": 0.030903350561857224, + "loss_sod": 0.08093588054180145, + "loss_total": 0.30269789695739746, + "step": 350899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.4618988037109375, + "loss_rtd": 0.1912572979927063, + "loss_sent": 0.01910628378391266, + "loss_sod": 0.11057379841804504, + "loss_total": 0.3209373950958252, + "step": 350899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.0488332509994507, + "learning_rate": 2.4082002916850366e-06, + "loss": 0.4223, + "step": 350900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.624969005584717, + "loss_rtd": 0.19831901788711548, + "loss_sent": 0.04244771599769592, + "loss_sod": 0.028687791898846626, + "loss_total": 0.2694545388221741, + "step": 350999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.983346462249756, + "loss_rtd": 0.21950097382068634, + "loss_sent": 0.23065710067749023, + "loss_sod": 0.03074844554066658, + "loss_total": 0.48090651631355286, + "step": 350999 + }, + { + "epoch": 0.032, + "grad_norm": 1.4386192560195923, + "learning_rate": 2.3984802178764086e-06, + "loss": 0.4238, + "step": 351000 + }, + { + "epoch": 0.032, + "eval_loss": 0.3988931179046631, + "eval_runtime": 150.6248, + "eval_samples_per_second": 102.526, + "eval_steps_per_second": 0.803, + "step": 351000 + }, + { + "epoch": 0.032198, + "loss_gen": 6.070059776306152, + "loss_rtd": 0.2153177559375763, + "loss_sent": 0.143410786986351, + "loss_sod": 0.019158367067575455, + "loss_total": 0.37788689136505127, + "step": 351099 + }, + { + "epoch": 0.032198, + "loss_gen": 5.941234111785889, + "loss_rtd": 0.19865795969963074, + "loss_sent": 0.07141967117786407, + "loss_sod": 0.02514990046620369, + "loss_total": 0.2952275276184082, + "step": 351099 + }, + { + "epoch": 0.0322, + "grad_norm": 0.7352594137191772, + "learning_rate": 2.3887793179542594e-06, + "loss": 0.4142, + "step": 351100 + }, + { + "epoch": 0.032398, + "loss_gen": 5.969297409057617, + "loss_rtd": 0.1960950493812561, + "loss_sent": 0.21834954619407654, + "loss_sod": 0.05671348795294762, + "loss_total": 0.47115808725357056, + "step": 351199 + }, + { + "epoch": 0.032398, + "loss_gen": 5.7469282150268555, + "loss_rtd": 0.20001927018165588, + "loss_sent": 0.27628093957901, + "loss_sod": 0.14189350605010986, + "loss_total": 0.6181937456130981, + "step": 351199 + }, + { + "epoch": 0.0324, + "grad_norm": 1.7035173177719116, + "learning_rate": 2.379097595826102e-06, + "loss": 0.404, + "step": 351200 + }, + { + "epoch": 0.032598, + "loss_gen": 5.7358880043029785, + "loss_rtd": 0.19336995482444763, + "loss_sent": 0.2013741284608841, + "loss_sod": 0.00435943529009819, + "loss_total": 0.3991035223007202, + "step": 351299 + }, + { + "epoch": 0.032598, + "loss_gen": 6.023784637451172, + "loss_rtd": 0.19719797372817993, + "loss_sent": 0.1325165033340454, + "loss_sod": 0.024149995297193527, + "loss_total": 0.35386449098587036, + "step": 351299 + }, + { + "epoch": 0.0326, + "grad_norm": 0.83186274766922, + "learning_rate": 2.369435055391733e-06, + "loss": 0.413, + "step": 351300 + }, + { + "epoch": 0.032798, + "loss_gen": 6.057023525238037, + "loss_rtd": 0.20856040716171265, + "loss_sent": 0.13905474543571472, + "loss_sod": 0.04135048761963844, + "loss_total": 0.3889656364917755, + "step": 351399 + }, + { + "epoch": 0.032798, + "loss_gen": 5.9625325202941895, + "loss_rtd": 0.22160308063030243, + "loss_sent": 0.286081999540329, + "loss_sod": 0.05469052866101265, + "loss_total": 0.5623756051063538, + "step": 351399 + }, + { + "epoch": 0.0328, + "grad_norm": 0.7798752188682556, + "learning_rate": 2.3597917005432346e-06, + "loss": 0.416, + "step": 351400 + }, + { + "epoch": 0.032998, + "loss_gen": 5.706137657165527, + "loss_rtd": 0.19809125363826752, + "loss_sent": 0.12273921072483063, + "loss_sod": 0.012218467891216278, + "loss_total": 0.333048939704895, + "step": 351499 + }, + { + "epoch": 0.032998, + "loss_gen": 5.9923224449157715, + "loss_rtd": 0.1937067210674286, + "loss_sent": 0.11819963902235031, + "loss_sod": 0.14596877992153168, + "loss_total": 0.45787513256073, + "step": 351499 + }, + { + "epoch": 0.033, + "grad_norm": 1.1767587661743164, + "learning_rate": 2.350167535164943e-06, + "loss": 0.4342, + "step": 351500 + }, + { + "epoch": 0.033198, + "loss_gen": 6.103427886962891, + "loss_rtd": 0.19066065549850464, + "loss_sent": 0.24041242897510529, + "loss_sod": 0.025873901322484016, + "loss_total": 0.4569469690322876, + "step": 351599 + }, + { + "epoch": 0.033198, + "loss_gen": 5.711111545562744, + "loss_rtd": 0.2296997308731079, + "loss_sent": 0.1315288245677948, + "loss_sod": 0.02059810236096382, + "loss_total": 0.3818266689777374, + "step": 351599 + }, + { + "epoch": 0.0332, + "grad_norm": 1.1959350109100342, + "learning_rate": 2.3405625631334694e-06, + "loss": 0.4119, + "step": 351600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.726678371429443, + "loss_rtd": 0.20519202947616577, + "loss_sent": 0.22255714237689972, + "loss_sod": 0.010626724921166897, + "loss_total": 0.43837589025497437, + "step": 351699 + }, + { + "epoch": 0.033398, + "loss_gen": 6.037045478820801, + "loss_rtd": 0.20537537336349487, + "loss_sent": 0.030526742339134216, + "loss_sod": 0.22692060470581055, + "loss_total": 0.46282273530960083, + "step": 351699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.103611946105957, + "learning_rate": 2.33097678831769e-06, + "loss": 0.409, + "step": 351700 + }, + { + "epoch": 0.033598, + "loss_gen": 5.16274356842041, + "loss_rtd": 0.16753192245960236, + "loss_sent": 2.2867430743644945e-05, + "loss_sod": 0.07413460314273834, + "loss_total": 0.24168939888477325, + "step": 351799 + }, + { + "epoch": 0.033598, + "loss_gen": 5.506666660308838, + "loss_rtd": 0.16758745908737183, + "loss_sent": 0.10821570456027985, + "loss_sod": 0.10490790009498596, + "loss_total": 0.38071107864379883, + "step": 351799 + }, + { + "epoch": 0.0336, + "grad_norm": 1.0368785858154297, + "learning_rate": 2.321410214578762e-06, + "loss": 0.3977, + "step": 351800 + }, + { + "epoch": 0.033798, + "loss_gen": 5.81887674331665, + "loss_rtd": 0.2052488923072815, + "loss_sent": 0.14953112602233887, + "loss_sod": 0.0201532281935215, + "loss_total": 0.37493324279785156, + "step": 351899 + }, + { + "epoch": 0.033798, + "loss_gen": 5.939299583435059, + "loss_rtd": 0.20820355415344238, + "loss_sent": 0.13707712292671204, + "loss_sod": 0.040300384163856506, + "loss_total": 0.3855810761451721, + "step": 351899 + }, + { + "epoch": 0.0338, + "grad_norm": 0.7562331557273865, + "learning_rate": 2.3118628457701076e-06, + "loss": 0.4186, + "step": 351900 + }, + { + "epoch": 0.033998, + "loss_gen": 6.217770576477051, + "loss_rtd": 0.20336152613162994, + "loss_sent": 0.22984600067138672, + "loss_sod": 0.12778106331825256, + "loss_total": 0.5609886050224304, + "step": 351999 + }, + { + "epoch": 0.033998, + "loss_gen": 5.971132755279541, + "loss_rtd": 0.21956539154052734, + "loss_sent": 0.16360871493816376, + "loss_sod": 0.009821237064898014, + "loss_total": 0.39299535751342773, + "step": 351999 + }, + { + "epoch": 0.034, + "grad_norm": 1.0129319429397583, + "learning_rate": 2.302334685737384e-06, + "loss": 0.4287, + "step": 352000 + }, + { + "epoch": 0.034, + "eval_loss": 0.3950361907482147, + "eval_runtime": 150.3199, + "eval_samples_per_second": 102.734, + "eval_steps_per_second": 0.805, + "step": 352000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.844827175140381, + "loss_rtd": 0.21237419545650482, + "loss_sent": 0.1966703236103058, + "loss_sod": 0.11328340321779251, + "loss_total": 0.5223278999328613, + "step": 352099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.695902347564697, + "loss_rtd": 0.21383731067180634, + "loss_sent": 0.34264758229255676, + "loss_sod": 0.005707689095288515, + "loss_total": 0.5621925592422485, + "step": 352099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.374636173248291, + "learning_rate": 2.292825738318549e-06, + "loss": 0.4182, + "step": 352100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.194711208343506, + "loss_rtd": 0.1682964414358139, + "loss_sent": 2.9674323741346598e-05, + "loss_sod": 0.13813243806362152, + "loss_total": 0.30645856261253357, + "step": 352199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.3243889808654785, + "loss_rtd": 0.18843649327754974, + "loss_sent": 0.056837670505046844, + "loss_sod": 0.015786737203598022, + "loss_total": 0.261060893535614, + "step": 352199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.9507030248641968, + "learning_rate": 2.283336007343806e-06, + "loss": 0.4197, + "step": 352200 + }, + { + "epoch": 0.000598, + "loss_gen": 6.032334804534912, + "loss_rtd": 0.208065003156662, + "loss_sent": 0.1603982001543045, + "loss_sod": 0.021933242678642273, + "loss_total": 0.39039644598960876, + "step": 352299 + }, + { + "epoch": 0.000598, + "loss_gen": 6.044798851013184, + "loss_rtd": 0.21557746827602386, + "loss_sent": 0.21242542564868927, + "loss_sod": 0.018241213634610176, + "loss_total": 0.44624412059783936, + "step": 352299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.9371817708015442, + "learning_rate": 2.2738654966356022e-06, + "loss": 0.4366, + "step": 352300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.862653732299805, + "loss_rtd": 0.19908036291599274, + "loss_sent": 0.1968402862548828, + "loss_sod": 0.013897551223635674, + "loss_total": 0.40981820225715637, + "step": 352399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.86594820022583, + "loss_rtd": 0.2057826966047287, + "loss_sent": 0.1858564168214798, + "loss_sod": 0.03528302535414696, + "loss_total": 0.42692214250564575, + "step": 352399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.9980801939964294, + "learning_rate": 2.2644142100086753e-06, + "loss": 0.3959, + "step": 352400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.303768634796143, + "loss_rtd": 0.13790901005268097, + "loss_sent": 0.00020305546058807522, + "loss_sod": 0.07721947878599167, + "loss_total": 0.2153315395116806, + "step": 352499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.938932418823242, + "loss_rtd": 0.2166273444890976, + "loss_sent": 0.17042265832424164, + "loss_sod": 0.03659671172499657, + "loss_total": 0.4236466884613037, + "step": 352499 + }, + { + "epoch": 0.001, + "grad_norm": 1.0985387563705444, + "learning_rate": 2.2549821512699966e-06, + "loss": 0.413, + "step": 352500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.79124641418457, + "loss_rtd": 0.2195327877998352, + "loss_sent": 0.28755638003349304, + "loss_sod": 0.011121327057480812, + "loss_total": 0.5182105302810669, + "step": 352599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.831072807312012, + "loss_rtd": 0.21045763790607452, + "loss_sent": 0.03898901119828224, + "loss_sod": 0.01899530552327633, + "loss_total": 0.26844194531440735, + "step": 352599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.124880075454712, + "learning_rate": 2.245569324218799e-06, + "loss": 0.4126, + "step": 352600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.486479759216309, + "loss_rtd": 0.17362302541732788, + "loss_sent": 0.0638706311583519, + "loss_sod": 0.05291808769106865, + "loss_total": 0.29041174054145813, + "step": 352699 + }, + { + "epoch": 0.001398, + "loss_gen": 6.121653079986572, + "loss_rtd": 0.19986870884895325, + "loss_sent": 0.10900358855724335, + "loss_sod": 0.07488373667001724, + "loss_total": 0.38375604152679443, + "step": 352699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.9247352480888367, + "learning_rate": 2.2361757326465614e-06, + "loss": 0.4079, + "step": 352700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.895353317260742, + "loss_rtd": 0.19956815242767334, + "loss_sent": 0.12468837201595306, + "loss_sod": 0.08447334170341492, + "loss_total": 0.4087298512458801, + "step": 352799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.469888687133789, + "loss_rtd": 0.17113961279392242, + "loss_sent": 0.043581195175647736, + "loss_sod": 0.04969732090830803, + "loss_total": 0.2644181251525879, + "step": 352799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.890491247177124, + "learning_rate": 2.2268013803370293e-06, + "loss": 0.4425, + "step": 352800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.819416522979736, + "loss_rtd": 0.2038039118051529, + "loss_sent": 0.3093571364879608, + "loss_sod": 0.0046736132353544235, + "loss_total": 0.5178346633911133, + "step": 352899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.836423873901367, + "loss_rtd": 0.19253765046596527, + "loss_sent": 0.132777601480484, + "loss_sod": 0.046197764575481415, + "loss_total": 0.3715130090713501, + "step": 352899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.1710330247879028, + "learning_rate": 2.217446271066187e-06, + "loss": 0.4359, + "step": 352900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.7525739669799805, + "loss_rtd": 0.18090245127677917, + "loss_sent": 0.101189523935318, + "loss_sod": 0.05926916375756264, + "loss_total": 0.3413611352443695, + "step": 352999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.707718849182129, + "loss_rtd": 0.21078839898109436, + "loss_sent": 0.12415298819541931, + "loss_sod": 0.047776564955711365, + "loss_total": 0.38271793723106384, + "step": 352999 + }, + { + "epoch": 0.002, + "grad_norm": 0.9399814009666443, + "learning_rate": 2.208110408602276e-06, + "loss": 0.417, + "step": 353000 + }, + { + "epoch": 0.002, + "eval_loss": 0.3918011486530304, + "eval_runtime": 152.6188, + "eval_samples_per_second": 101.187, + "eval_steps_per_second": 0.793, + "step": 353000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.581538200378418, + "loss_rtd": 0.1928052455186844, + "loss_sent": 0.0830656960606575, + "loss_sod": 0.030780553817749023, + "loss_total": 0.3066515028476715, + "step": 353099 + }, + { + "epoch": 0.002198, + "loss_gen": 6.365875720977783, + "loss_rtd": 0.21668121218681335, + "loss_sent": 0.12727312743663788, + "loss_sod": 0.06451734900474548, + "loss_total": 0.4084717035293579, + "step": 353099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.1897996664047241, + "learning_rate": 2.1987937967057783e-06, + "loss": 0.4132, + "step": 353100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.393984317779541, + "loss_rtd": 0.18165439367294312, + "loss_sent": 0.03973681107163429, + "loss_sod": 0.03042837604880333, + "loss_total": 0.25181958079338074, + "step": 353199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.648141384124756, + "loss_rtd": 0.18211771547794342, + "loss_sent": 0.004699456971138716, + "loss_sod": 0.1369915008544922, + "loss_total": 0.3238086700439453, + "step": 353199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.7428009510040283, + "learning_rate": 2.1894964391294125e-06, + "loss": 0.4422, + "step": 353200 + }, + { + "epoch": 0.002598, + "loss_gen": 6.0232696533203125, + "loss_rtd": 0.19451023638248444, + "loss_sent": 0.27461326122283936, + "loss_sod": 0.012914705090224743, + "loss_total": 0.4820381999015808, + "step": 353299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.886040687561035, + "loss_rtd": 0.2097945511341095, + "loss_sent": 0.21921497583389282, + "loss_sod": 0.015865452587604523, + "loss_total": 0.44487497210502625, + "step": 353299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.9458757638931274, + "learning_rate": 2.180218339618173e-06, + "loss": 0.4166, + "step": 353300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.676329135894775, + "loss_rtd": 0.21489547193050385, + "loss_sent": 0.17359676957130432, + "loss_sod": 0.06085289642214775, + "loss_total": 0.4493451416492462, + "step": 353399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.956292629241943, + "loss_rtd": 0.2165389060974121, + "loss_sent": 0.17427848279476166, + "loss_sod": 0.04603196308016777, + "loss_total": 0.43684935569763184, + "step": 353399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.1209734678268433, + "learning_rate": 2.17095950190927e-06, + "loss": 0.4167, + "step": 353400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.919392108917236, + "loss_rtd": 0.21150611340999603, + "loss_sent": 0.1576291024684906, + "loss_sod": 0.007452788762748241, + "loss_total": 0.37658798694610596, + "step": 353499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.6520209312438965, + "loss_rtd": 0.21992997825145721, + "loss_sent": 0.23487496376037598, + "loss_sod": 0.03435094654560089, + "loss_total": 0.4891558885574341, + "step": 353499 + }, + { + "epoch": 0.003, + "grad_norm": 1.1233352422714233, + "learning_rate": 2.1617199297321534e-06, + "loss": 0.4284, + "step": 353500 + }, + { + "epoch": 0.003198, + "loss_gen": 6.06951904296875, + "loss_rtd": 0.1953224241733551, + "loss_sent": 0.20304681360721588, + "loss_sod": 0.07646000385284424, + "loss_total": 0.4748292565345764, + "step": 353599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.541001796722412, + "loss_rtd": 0.19102561473846436, + "loss_sent": 0.005712227895855904, + "loss_sod": 0.10757511854171753, + "loss_total": 0.30431297421455383, + "step": 353599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.9571014642715454, + "learning_rate": 2.1524996268085296e-06, + "loss": 0.4052, + "step": 353600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.754105567932129, + "loss_rtd": 0.2006927877664566, + "loss_sent": 0.06379576772451401, + "loss_sod": 0.030287204310297966, + "loss_total": 0.29477575421333313, + "step": 353699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.204795837402344, + "loss_rtd": 0.15813377499580383, + "loss_sent": 2.4847346139722504e-05, + "loss_sod": 0.08588747680187225, + "loss_total": 0.24404609203338623, + "step": 353699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.899935781955719, + "learning_rate": 2.143298596852339e-06, + "loss": 0.4236, + "step": 353700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.738497734069824, + "loss_rtd": 0.21620681881904602, + "loss_sent": 0.279610812664032, + "loss_sod": 0.022876763716340065, + "loss_total": 0.5186944007873535, + "step": 353799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.833151817321777, + "loss_rtd": 0.20951002836227417, + "loss_sent": 0.1569215953350067, + "loss_sod": 0.0022610409650951624, + "loss_total": 0.36869266629219055, + "step": 353799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.8144100904464722, + "learning_rate": 2.1341168435697447e-06, + "loss": 0.422, + "step": 353800 + }, + { + "epoch": 0.003798, + "loss_gen": 6.018966197967529, + "loss_rtd": 0.2131778597831726, + "loss_sent": 0.16325236856937408, + "loss_sod": 0.0442400798201561, + "loss_total": 0.4206703305244446, + "step": 353899 + }, + { + "epoch": 0.003798, + "loss_gen": 6.1745476722717285, + "loss_rtd": 0.23178789019584656, + "loss_sent": 0.0814250186085701, + "loss_sod": 0.07034356147050858, + "loss_total": 0.38355645537376404, + "step": 353899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.9145764112472534, + "learning_rate": 2.12495437065916e-06, + "loss": 0.4134, + "step": 353900 + }, + { + "epoch": 0.003998, + "loss_gen": 6.161501407623291, + "loss_rtd": 0.21247737109661102, + "loss_sent": 0.1705043762922287, + "loss_sod": 0.10521357506513596, + "loss_total": 0.4881953001022339, + "step": 353999 + }, + { + "epoch": 0.003998, + "loss_gen": 6.057038307189941, + "loss_rtd": 0.20198795199394226, + "loss_sent": 0.045900702476501465, + "loss_sod": 0.1148776113986969, + "loss_total": 0.3627662658691406, + "step": 353999 + }, + { + "epoch": 0.004, + "grad_norm": 1.0753530263900757, + "learning_rate": 2.115811181811228e-06, + "loss": 0.4343, + "step": 354000 + }, + { + "epoch": 0.004, + "eval_loss": 0.40010935068130493, + "eval_runtime": 150.0678, + "eval_samples_per_second": 102.907, + "eval_steps_per_second": 0.806, + "step": 354000 + }, + { + "epoch": 0.004198, + "loss_gen": 6.502257823944092, + "loss_rtd": 0.2139170914888382, + "loss_sent": 0.0895005464553833, + "loss_sod": 0.1457839459180832, + "loss_total": 0.4492015838623047, + "step": 354099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.863682746887207, + "loss_rtd": 0.19926874339580536, + "loss_sent": 0.1459185779094696, + "loss_sod": 0.10132648795843124, + "loss_total": 0.4465138018131256, + "step": 354099 + }, + { + "epoch": 0.0042, + "grad_norm": 2.152190685272217, + "learning_rate": 2.1066872807088354e-06, + "loss": 0.4372, + "step": 354100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.831955909729004, + "loss_rtd": 0.21212802827358246, + "loss_sent": 0.11381430178880692, + "loss_sod": 0.08838170766830444, + "loss_total": 0.4143240451812744, + "step": 354199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.714260101318359, + "loss_rtd": 0.22318173944950104, + "loss_sent": 0.19266586005687714, + "loss_sod": 0.03717808425426483, + "loss_total": 0.4530256986618042, + "step": 354199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.3364078998565674, + "learning_rate": 2.0975826710270707e-06, + "loss": 0.444, + "step": 354200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.322637557983398, + "loss_rtd": 0.19468343257904053, + "loss_sent": 2.3605018213856965e-05, + "loss_sod": 0.11811772733926773, + "loss_total": 0.3128247559070587, + "step": 354299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.0764617919921875, + "loss_rtd": 0.15593793988227844, + "loss_sent": 2.572458834038116e-05, + "loss_sod": 0.18683885037899017, + "loss_total": 0.342802494764328, + "step": 354299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.3551969528198242, + "learning_rate": 2.088497356433278e-06, + "loss": 0.4177, + "step": 354300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.867374897003174, + "loss_rtd": 0.18638940155506134, + "loss_sent": 0.10143978148698807, + "loss_sod": 0.04324105754494667, + "loss_total": 0.3310702443122864, + "step": 354399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.99941873550415, + "loss_rtd": 0.2125442624092102, + "loss_sent": 0.1141626313328743, + "loss_sod": 0.06408238410949707, + "loss_total": 0.390789270401001, + "step": 354399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.1286380290985107, + "learning_rate": 2.0794313405870236e-06, + "loss": 0.4195, + "step": 354400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.459729194641113, + "loss_rtd": 0.15806931257247925, + "loss_sent": 0.00018017186084762216, + "loss_sod": 0.023364700376987457, + "loss_total": 0.18161417543888092, + "step": 354499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.466588973999023, + "loss_rtd": 0.17173349857330322, + "loss_sent": 0.12588773667812347, + "loss_sod": 0.04384646192193031, + "loss_total": 0.3414676785469055, + "step": 354499 + }, + { + "epoch": 0.005, + "grad_norm": 0.7194051742553711, + "learning_rate": 2.0703846271400983e-06, + "loss": 0.4024, + "step": 354500 + }, + { + "epoch": 0.005198, + "loss_gen": 6.066932201385498, + "loss_rtd": 0.22332562506198883, + "loss_sent": 0.262665331363678, + "loss_sod": 0.04054757580161095, + "loss_total": 0.5265384912490845, + "step": 354599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.763453960418701, + "loss_rtd": 0.2057800590991974, + "loss_sent": 0.1919267326593399, + "loss_sod": 0.043964192271232605, + "loss_total": 0.4416709840297699, + "step": 354599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.1999951601028442, + "learning_rate": 2.061357219736504e-06, + "loss": 0.408, + "step": 354600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.837345123291016, + "loss_rtd": 0.19844655692577362, + "loss_sent": 0.1703146994113922, + "loss_sod": 0.17566174268722534, + "loss_total": 0.54442298412323, + "step": 354699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.693556308746338, + "loss_rtd": 0.18148230016231537, + "loss_sent": 2.3122607672121376e-05, + "loss_sod": 0.1757783591747284, + "loss_total": 0.3572837710380554, + "step": 354699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.7587082386016846, + "learning_rate": 2.0523491220124924e-06, + "loss": 0.4198, + "step": 354700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.87233304977417, + "loss_rtd": 0.1956353634595871, + "loss_sent": 0.16608545184135437, + "loss_sod": 0.02105873078107834, + "loss_total": 0.3827795386314392, + "step": 354799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.842890739440918, + "loss_rtd": 0.22205030918121338, + "loss_sent": 0.1724475771188736, + "loss_sod": 0.004117500968277454, + "loss_total": 0.39861539006233215, + "step": 354799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.5454592704772949, + "learning_rate": 2.0433603375965227e-06, + "loss": 0.4191, + "step": 354800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.969061374664307, + "loss_rtd": 0.22664503753185272, + "loss_sent": 0.39275848865509033, + "loss_sod": 0.24920883774757385, + "loss_total": 0.8686123490333557, + "step": 354899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.787162780761719, + "loss_rtd": 0.19412535429000854, + "loss_sent": 0.30275753140449524, + "loss_sod": 0.026805846020579338, + "loss_total": 0.5236887335777283, + "step": 354899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.8073854446411133, + "learning_rate": 2.0343908701092817e-06, + "loss": 0.4289, + "step": 354900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.882112979888916, + "loss_rtd": 0.20660750567913055, + "loss_sent": 0.17035874724388123, + "loss_sod": 0.023847075179219246, + "loss_total": 0.40081334114074707, + "step": 354999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.957112789154053, + "loss_rtd": 0.1990405172109604, + "loss_sent": 0.24796076118946075, + "loss_sod": 0.05260968953371048, + "loss_total": 0.499610960483551, + "step": 354999 + }, + { + "epoch": 0.006, + "grad_norm": 1.0234116315841675, + "learning_rate": 2.025440723163652e-06, + "loss": 0.4226, + "step": 355000 + }, + { + "epoch": 0.006, + "eval_loss": 0.3955202102661133, + "eval_runtime": 149.8632, + "eval_samples_per_second": 103.047, + "eval_steps_per_second": 0.807, + "step": 355000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.702818870544434, + "loss_rtd": 0.2071809321641922, + "loss_sent": 0.15628786385059357, + "loss_sod": 0.0020802603103220463, + "loss_total": 0.3655490577220917, + "step": 355099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.622329235076904, + "loss_rtd": 0.19860802590847015, + "loss_sent": 0.0619230642914772, + "loss_sod": 0.007985853590071201, + "loss_total": 0.2685169577598572, + "step": 355099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.6136819124221802, + "learning_rate": 2.0165099003647603e-06, + "loss": 0.4151, + "step": 355100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.951848983764648, + "loss_rtd": 0.21448597311973572, + "loss_sent": 0.1498323678970337, + "loss_sod": 0.02414526417851448, + "loss_total": 0.3884636163711548, + "step": 355199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.998314380645752, + "loss_rtd": 0.2019582837820053, + "loss_sent": 0.352652370929718, + "loss_sod": 0.1898314654827118, + "loss_total": 0.7444421052932739, + "step": 355199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.685928225517273, + "learning_rate": 2.007598405309946e-06, + "loss": 0.4235, + "step": 355200 + }, + { + "epoch": 0.006598, + "loss_gen": 6.01767110824585, + "loss_rtd": 0.22767655551433563, + "loss_sent": 0.1614169031381607, + "loss_sod": 0.12196917831897736, + "loss_total": 0.5110626220703125, + "step": 355299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.759350776672363, + "loss_rtd": 0.22138266265392303, + "loss_sent": 0.10136804729700089, + "loss_sod": 0.01644078828394413, + "loss_total": 0.3391914963722229, + "step": 355299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.9406350255012512, + "learning_rate": 1.9987062415887604e-06, + "loss": 0.4222, + "step": 355300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.697607517242432, + "loss_rtd": 0.20214276015758514, + "loss_sent": 0.0004224515869282186, + "loss_sod": 0.23241263628005981, + "loss_total": 0.43497785925865173, + "step": 355399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.344110488891602, + "loss_rtd": 0.1906622052192688, + "loss_sent": 2.2936350433155894e-05, + "loss_sod": 0.07854408025741577, + "loss_total": 0.2692292332649231, + "step": 355399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.0965168476104736, + "learning_rate": 1.9898334127829486e-06, + "loss": 0.4194, + "step": 355400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.239232540130615, + "loss_rtd": 0.16593962907791138, + "loss_sent": 0.022577477619051933, + "loss_sod": 0.1156080812215805, + "loss_total": 0.30412518978118896, + "step": 355499 + }, + { + "epoch": 0.006998, + "loss_gen": 6.062136173248291, + "loss_rtd": 0.20743438601493835, + "loss_sent": 0.210471048951149, + "loss_sod": 0.03318113461136818, + "loss_total": 0.4510865807533264, + "step": 355499 + }, + { + "epoch": 0.007, + "grad_norm": 0.9438206553459167, + "learning_rate": 1.9809799224665025e-06, + "loss": 0.418, + "step": 355500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.930011749267578, + "loss_rtd": 0.1955890953540802, + "loss_sent": 0.43107500672340393, + "loss_sod": 0.03834614157676697, + "loss_total": 0.6650102138519287, + "step": 355599 + }, + { + "epoch": 0.007198, + "loss_gen": 6.390570163726807, + "loss_rtd": 0.1871335357427597, + "loss_sent": 0.2828793525695801, + "loss_sod": 0.018466539680957794, + "loss_total": 0.4884794354438782, + "step": 355599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.360158920288086, + "learning_rate": 1.9721457742055973e-06, + "loss": 0.4128, + "step": 355600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.43441104888916, + "loss_rtd": 0.2008632868528366, + "loss_sent": 0.1046755313873291, + "loss_sod": 0.11621154844760895, + "loss_total": 0.42175036668777466, + "step": 355699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.410533905029297, + "loss_rtd": 0.17058065533638, + "loss_sent": 0.03604581207036972, + "loss_sod": 0.10525237023830414, + "loss_total": 0.31187883019447327, + "step": 355699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.3569351434707642, + "learning_rate": 1.9633309715586412e-06, + "loss": 0.4279, + "step": 355700 + }, + { + "epoch": 0.007598, + "loss_gen": 6.2586236000061035, + "loss_rtd": 0.2144034504890442, + "loss_sent": 0.1542971134185791, + "loss_sod": 0.03798966482281685, + "loss_total": 0.40669023990631104, + "step": 355799 + }, + { + "epoch": 0.007598, + "loss_gen": 6.216439723968506, + "loss_rtd": 0.18448476493358612, + "loss_sent": 0.07719166576862335, + "loss_sod": 0.09614969789981842, + "loss_total": 0.3578261137008667, + "step": 355799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.2310986518859863, + "learning_rate": 1.954535518076217e-06, + "loss": 0.433, + "step": 355800 + }, + { + "epoch": 0.007798, + "loss_gen": 6.04836368560791, + "loss_rtd": 0.2088451087474823, + "loss_sent": 0.10443481057882309, + "loss_sod": 0.11040560901165009, + "loss_total": 0.4236855208873749, + "step": 355899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.211187362670898, + "loss_rtd": 0.17163363099098206, + "loss_sent": 2.266068258904852e-05, + "loss_sod": 0.03339499980211258, + "loss_total": 0.20505128800868988, + "step": 355899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.9474918842315674, + "learning_rate": 1.945759417301135e-06, + "loss": 0.405, + "step": 355900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.996926307678223, + "loss_rtd": 0.22321800887584686, + "loss_sent": 0.38234007358551025, + "loss_sod": 0.09975333511829376, + "loss_total": 0.7053114175796509, + "step": 355999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.949470520019531, + "loss_rtd": 0.20287185907363892, + "loss_sent": 0.2822295129299164, + "loss_sod": 0.03472301736474037, + "loss_total": 0.5198243856430054, + "step": 355999 + }, + { + "epoch": 0.008, + "grad_norm": 1.4557656049728394, + "learning_rate": 1.9370026727684175e-06, + "loss": 0.4011, + "step": 356000 + }, + { + "epoch": 0.008, + "eval_loss": 0.40251606702804565, + "eval_runtime": 150.0712, + "eval_samples_per_second": 102.904, + "eval_steps_per_second": 0.806, + "step": 356000 + }, + { + "epoch": 0.008198, + "loss_gen": 6.220467567443848, + "loss_rtd": 0.20813670754432678, + "loss_sent": 0.41210559010505676, + "loss_sod": 0.014181757345795631, + "loss_total": 0.634424090385437, + "step": 356099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.828224182128906, + "loss_rtd": 0.21587900817394257, + "loss_sent": 0.25055649876594543, + "loss_sod": 0.037630707025527954, + "loss_total": 0.5040662288665771, + "step": 356099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.6622540950775146, + "learning_rate": 1.928265288005282e-06, + "loss": 0.4319, + "step": 356100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.877004623413086, + "loss_rtd": 0.21264329552650452, + "loss_sent": 0.16616591811180115, + "loss_sod": 0.01253450009971857, + "loss_total": 0.39134371280670166, + "step": 356199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.99909782409668, + "loss_rtd": 0.2093016356229782, + "loss_sent": 0.20995502173900604, + "loss_sod": 0.1282491534948349, + "loss_total": 0.547505795955658, + "step": 356199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.3165335655212402, + "learning_rate": 1.9195472665311355e-06, + "loss": 0.4186, + "step": 356200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.983824253082275, + "loss_rtd": 0.18738792836666107, + "loss_sent": 0.22271102666854858, + "loss_sod": 0.06770418584346771, + "loss_total": 0.47780314087867737, + "step": 356299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.583632469177246, + "loss_rtd": 0.18682065606117249, + "loss_sent": 0.005454404279589653, + "loss_sod": 0.15563565492630005, + "loss_total": 0.34791070222854614, + "step": 356299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.2503660917282104, + "learning_rate": 1.910848611857602e-06, + "loss": 0.4109, + "step": 356300 + }, + { + "epoch": 0.008798, + "loss_gen": 6.008808612823486, + "loss_rtd": 0.199904665350914, + "loss_sent": 0.0830218642950058, + "loss_sod": 0.07319788634777069, + "loss_total": 0.3561244010925293, + "step": 356399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.961312294006348, + "loss_rtd": 0.22446058690547943, + "loss_sent": 0.16952523589134216, + "loss_sod": 0.025866538286209106, + "loss_total": 0.4198523461818695, + "step": 356399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.1060283184051514, + "learning_rate": 1.90216932748849e-06, + "loss": 0.4202, + "step": 356400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.801023006439209, + "loss_rtd": 0.20940759778022766, + "loss_sent": 0.28753530979156494, + "loss_sod": 0.0116899898275733, + "loss_total": 0.5086328983306885, + "step": 356499 + }, + { + "epoch": 0.008998, + "loss_gen": 6.066738605499268, + "loss_rtd": 0.2130727916955948, + "loss_sent": 0.16303817927837372, + "loss_sod": 0.19328413903713226, + "loss_total": 0.569395124912262, + "step": 356499 + }, + { + "epoch": 0.009, + "grad_norm": 1.4080960750579834, + "learning_rate": 1.8935094169198308e-06, + "loss": 0.4117, + "step": 356500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.32443904876709, + "loss_rtd": 0.18271073698997498, + "loss_sent": 0.016854075714945793, + "loss_sod": 0.0061331442557275295, + "loss_total": 0.2056979537010193, + "step": 356599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.517611980438232, + "loss_rtd": 0.18196389079093933, + "loss_sent": 0.016464676707983017, + "loss_sod": 0.03613041341304779, + "loss_total": 0.23455898463726044, + "step": 356599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.4810616374015808, + "learning_rate": 1.8848688836398176e-06, + "loss": 0.3864, + "step": 356600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.816564559936523, + "loss_rtd": 0.1941842883825302, + "loss_sent": 0.3590242266654968, + "loss_sod": 0.0036043724976480007, + "loss_total": 0.5568128824234009, + "step": 356699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.704836845397949, + "loss_rtd": 0.17948199808597565, + "loss_sent": 0.014717033132910728, + "loss_sod": 0.11702261865139008, + "loss_total": 0.3112216591835022, + "step": 356699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.7406604290008545, + "learning_rate": 1.8762477311288663e-06, + "loss": 0.4343, + "step": 356700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.646180152893066, + "loss_rtd": 0.21884070336818695, + "loss_sent": 0.2832874357700348, + "loss_sod": 0.04909444972872734, + "loss_total": 0.551222562789917, + "step": 356799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.81979513168335, + "loss_rtd": 0.1980317085981369, + "loss_sent": 0.11066953837871552, + "loss_sod": 0.10543151944875717, + "loss_total": 0.4141327738761902, + "step": 356799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.0447427034378052, + "learning_rate": 1.8676459628595766e-06, + "loss": 0.4012, + "step": 356800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.995449542999268, + "loss_rtd": 0.20971901714801788, + "loss_sent": 0.24040107429027557, + "loss_sod": 0.03846020624041557, + "loss_total": 0.48858028650283813, + "step": 356899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.749032497406006, + "loss_rtd": 0.20903363823890686, + "loss_sent": 0.14285731315612793, + "loss_sod": 0.004941024351865053, + "loss_total": 0.35683196783065796, + "step": 356899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.6844404339790344, + "learning_rate": 1.8590635822967385e-06, + "loss": 0.4199, + "step": 356900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.497154712677002, + "loss_rtd": 0.187617689371109, + "loss_sent": 0.021793534979224205, + "loss_sod": 0.040550097823143005, + "loss_total": 0.24996131658554077, + "step": 356999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.992887020111084, + "loss_rtd": 0.20962125062942505, + "loss_sent": 0.37087512016296387, + "loss_sod": 0.029054788872599602, + "loss_total": 0.609551191329956, + "step": 356999 + }, + { + "epoch": 0.01, + "grad_norm": 0.9969701766967773, + "learning_rate": 1.850500592897325e-06, + "loss": 0.4095, + "step": 357000 + }, + { + "epoch": 0.01, + "eval_loss": 0.3956516981124878, + "eval_runtime": 149.9188, + "eval_samples_per_second": 103.009, + "eval_steps_per_second": 0.807, + "step": 357000 + }, + { + "epoch": 0.010198, + "loss_gen": 6.80160665512085, + "loss_rtd": 0.2306348979473114, + "loss_sent": 0.08139466494321823, + "loss_sod": 0.17715565860271454, + "loss_total": 0.4891852140426636, + "step": 357099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.674367904663086, + "loss_rtd": 0.23430436849594116, + "loss_sent": 0.25197505950927734, + "loss_sod": 0.01835767924785614, + "loss_total": 0.5046371221542358, + "step": 357099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.1275088787078857, + "learning_rate": 1.8419569981105166e-06, + "loss": 0.4216, + "step": 357100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.982246398925781, + "loss_rtd": 0.20664797723293304, + "loss_sent": 0.21538527309894562, + "loss_sod": 0.006636639591306448, + "loss_total": 0.42866986989974976, + "step": 357199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.894622802734375, + "loss_rtd": 0.22928206622600555, + "loss_sent": 0.16588445007801056, + "loss_sod": 0.03347032517194748, + "loss_total": 0.4286368489265442, + "step": 357199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.6683774590492249, + "learning_rate": 1.833432801377677e-06, + "loss": 0.4083, + "step": 357200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.725360870361328, + "loss_rtd": 0.21421876549720764, + "loss_sent": 0.24396134912967682, + "loss_sod": 0.009419852867722511, + "loss_total": 0.4675999879837036, + "step": 357299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.806037425994873, + "loss_rtd": 0.2170538306236267, + "loss_sent": 0.18101391196250916, + "loss_sod": 0.03645111620426178, + "loss_total": 0.43451884388923645, + "step": 357299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9233309626579285, + "learning_rate": 1.824928006132337e-06, + "loss": 0.4233, + "step": 357300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.391997814178467, + "loss_rtd": 0.19370044767856598, + "loss_sent": 0.004411658737808466, + "loss_sod": 0.13107523322105408, + "loss_total": 0.3291873633861542, + "step": 357399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.142665863037109, + "loss_rtd": 0.15653546154499054, + "loss_sent": 0.05312584340572357, + "loss_sod": 0.05472799763083458, + "loss_total": 0.264389306306839, + "step": 357399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.9104020595550537, + "learning_rate": 1.81644261580024e-06, + "loss": 0.4303, + "step": 357400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.425527095794678, + "loss_rtd": 0.17077837884426117, + "loss_sent": 0.0359957255423069, + "loss_sod": 0.04859377443790436, + "loss_total": 0.25536787509918213, + "step": 357499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.666432857513428, + "loss_rtd": 0.20479275286197662, + "loss_sent": 0.38120099902153015, + "loss_sod": 0.04730219766497612, + "loss_total": 0.6332959532737732, + "step": 357499 + }, + { + "epoch": 0.011, + "grad_norm": 1.7625724077224731, + "learning_rate": 1.807976633799291e-06, + "loss": 0.4121, + "step": 357500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.845230579376221, + "loss_rtd": 0.19764827191829681, + "loss_sent": 0.2432090938091278, + "loss_sod": 0.024489443749189377, + "loss_total": 0.4653468132019043, + "step": 357599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.8189544677734375, + "loss_rtd": 0.2043876349925995, + "loss_sent": 0.09671732783317566, + "loss_sod": 0.02181229554116726, + "loss_total": 0.32291725277900696, + "step": 357599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.6875277757644653, + "learning_rate": 1.7995300635395951e-06, + "loss": 0.4275, + "step": 357600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.883091926574707, + "loss_rtd": 0.19942772388458252, + "loss_sent": 0.05011919140815735, + "loss_sod": 0.012135586701333523, + "loss_total": 0.26168251037597656, + "step": 357699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.89393424987793, + "loss_rtd": 0.21682314574718475, + "loss_sent": 0.06651324033737183, + "loss_sod": 0.01784605160355568, + "loss_total": 0.30118244886398315, + "step": 357699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.4985584318637848, + "learning_rate": 1.79110290842342e-06, + "loss": 0.3998, + "step": 357700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.273667335510254, + "loss_rtd": 0.18570521473884583, + "loss_sent": 0.013371221721172333, + "loss_sod": 0.08951470255851746, + "loss_total": 0.2885911464691162, + "step": 357799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.142648220062256, + "loss_rtd": 0.16314288973808289, + "loss_sent": 2.5110792194027454e-05, + "loss_sod": 0.069739930331707, + "loss_total": 0.23290793597698212, + "step": 357799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.7510794401168823, + "learning_rate": 1.7826951718452335e-06, + "loss": 0.4145, + "step": 357800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.882158279418945, + "loss_rtd": 0.2175660878419876, + "loss_sent": 0.23877964913845062, + "loss_sod": 0.10670842230319977, + "loss_total": 0.5630541443824768, + "step": 357899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.799062728881836, + "loss_rtd": 0.19137810170650482, + "loss_sent": 0.13315854966640472, + "loss_sod": 0.04499879479408264, + "loss_total": 0.3695354461669922, + "step": 357899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.2190783023834229, + "learning_rate": 1.77430685719166e-06, + "loss": 0.4076, + "step": 357900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.983433246612549, + "loss_rtd": 0.20193849503993988, + "loss_sent": 0.2824704945087433, + "loss_sod": 0.020014457404613495, + "loss_total": 0.5044234395027161, + "step": 357999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.964925765991211, + "loss_rtd": 0.1909264177083969, + "loss_sent": 0.12705761194229126, + "loss_sod": 0.023693429306149483, + "loss_total": 0.3416774570941925, + "step": 357999 + }, + { + "epoch": 0.012, + "grad_norm": 0.8381167054176331, + "learning_rate": 1.7659379678415244e-06, + "loss": 0.4253, + "step": 358000 + }, + { + "epoch": 0.012, + "eval_loss": 0.40045633912086487, + "eval_runtime": 150.2548, + "eval_samples_per_second": 102.779, + "eval_steps_per_second": 0.805, + "step": 358000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.01365327835083, + "loss_rtd": 0.19572857022285461, + "loss_sent": 0.19667977094650269, + "loss_sod": 0.09414590150117874, + "loss_total": 0.48655423521995544, + "step": 358099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.961184501647949, + "loss_rtd": 0.237196147441864, + "loss_sent": 0.11936698853969574, + "loss_sod": 0.011582116596400738, + "loss_total": 0.36814525723457336, + "step": 358099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.356001377105713, + "learning_rate": 1.7575885071658027e-06, + "loss": 0.3948, + "step": 358100 + }, + { + "epoch": 0.012398, + "loss_gen": 6.13053035736084, + "loss_rtd": 0.21780376136302948, + "loss_sent": 0.14030757546424866, + "loss_sod": 0.11369664967060089, + "loss_total": 0.47180798649787903, + "step": 358199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.940971374511719, + "loss_rtd": 0.21676567196846008, + "loss_sent": 0.20150306820869446, + "loss_sod": 0.008833327330648899, + "loss_total": 0.42710208892822266, + "step": 358199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.0728989839553833, + "learning_rate": 1.74925847852766e-06, + "loss": 0.4119, + "step": 358200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.732760906219482, + "loss_rtd": 0.2261783331632614, + "loss_sent": 0.49197709560394287, + "loss_sod": 0.0071923090144991875, + "loss_total": 0.7253477573394775, + "step": 358299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.803589820861816, + "loss_rtd": 0.19533567130565643, + "loss_sent": 0.14458568394184113, + "loss_sod": 0.011589504778385162, + "loss_total": 0.3515108823776245, + "step": 358299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.2767122983932495, + "learning_rate": 1.74094788528244e-06, + "loss": 0.4075, + "step": 358300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.919802188873291, + "loss_rtd": 0.1859709918498993, + "loss_sent": 0.7436004281044006, + "loss_sod": 0.027039239183068275, + "loss_total": 0.9566106796264648, + "step": 358399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.749619007110596, + "loss_rtd": 0.21578435599803925, + "loss_sent": 0.12787169218063354, + "loss_sod": 0.00744420662522316, + "loss_total": 0.35110026597976685, + "step": 358399 + }, + { + "epoch": 0.0128, + "grad_norm": 3.856870174407959, + "learning_rate": 1.732656730777632e-06, + "loss": 0.4288, + "step": 358400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.978856086730957, + "loss_rtd": 0.21464388072490692, + "loss_sent": 0.32323721051216125, + "loss_sod": 0.04082895815372467, + "loss_total": 0.5787100791931152, + "step": 358499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.8420491218566895, + "loss_rtd": 0.18880939483642578, + "loss_sent": 0.14689841866493225, + "loss_sod": 0.0094448896124959, + "loss_total": 0.34515270590782166, + "step": 358499 + }, + { + "epoch": 0.013, + "grad_norm": 1.0212700366973877, + "learning_rate": 1.7243850183529197e-06, + "loss": 0.4125, + "step": 358500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.8610615730285645, + "loss_rtd": 0.20242686569690704, + "loss_sent": 0.3094406723976135, + "loss_sod": 0.0030163044575601816, + "loss_total": 0.5148838758468628, + "step": 358599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.5904717445373535, + "loss_rtd": 0.2228073626756668, + "loss_sent": 0.11718317866325378, + "loss_sod": 0.08247916400432587, + "loss_total": 0.42246970534324646, + "step": 358599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.3363889455795288, + "learning_rate": 1.7161327513401492e-06, + "loss": 0.4297, + "step": 358600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.875903129577637, + "loss_rtd": 0.21359845995903015, + "loss_sent": 0.08436977863311768, + "loss_sod": 0.002072614151984453, + "loss_total": 0.3000408411026001, + "step": 358699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.7648820877075195, + "loss_rtd": 0.2127772718667984, + "loss_sent": 0.3052695095539093, + "loss_sod": 0.030976422131061554, + "loss_total": 0.5490232110023499, + "step": 358699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.5202645063400269, + "learning_rate": 1.7078999330633395e-06, + "loss": 0.413, + "step": 358700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.926563739776611, + "loss_rtd": 0.21490149199962616, + "loss_sent": 0.09910505264997482, + "loss_sod": 0.0029799845069646835, + "loss_total": 0.3169865310192108, + "step": 358799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.8224568367004395, + "loss_rtd": 0.2163776457309723, + "loss_sent": 0.052175674587488174, + "loss_sod": 0.046546582132577896, + "loss_total": 0.31509989500045776, + "step": 358799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.0362154245376587, + "learning_rate": 1.6996865668386596e-06, + "loss": 0.4233, + "step": 358800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.657197952270508, + "loss_rtd": 0.20873859524726868, + "loss_sent": 0.15544243156909943, + "loss_sod": 0.010381044819951057, + "loss_total": 0.3745620846748352, + "step": 358899 + }, + { + "epoch": 0.013798, + "loss_gen": 6.037142753601074, + "loss_rtd": 0.22123707830905914, + "loss_sent": 0.21781538426876068, + "loss_sod": 0.031087543815374374, + "loss_total": 0.4701399803161621, + "step": 358899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.4298346042633057, + "learning_rate": 1.691492655974447e-06, + "loss": 0.4162, + "step": 358900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.96540641784668, + "loss_rtd": 0.19525554776191711, + "loss_sent": 0.15494407713413239, + "loss_sod": 0.03438544645905495, + "loss_total": 0.38458508253097534, + "step": 358999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.9260029792785645, + "loss_rtd": 0.19141554832458496, + "loss_sent": 0.05381951481103897, + "loss_sod": 0.023254919797182083, + "loss_total": 0.2684899866580963, + "step": 358999 + }, + { + "epoch": 0.014, + "grad_norm": 0.7269117832183838, + "learning_rate": 1.6833182037712226e-06, + "loss": 0.3892, + "step": 359000 + }, + { + "epoch": 0.014, + "eval_loss": 0.39771997928619385, + "eval_runtime": 149.8968, + "eval_samples_per_second": 103.024, + "eval_steps_per_second": 0.807, + "step": 359000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.809222221374512, + "loss_rtd": 0.20183490216732025, + "loss_sent": 0.13142982125282288, + "loss_sod": 0.06742454320192337, + "loss_total": 0.4006892740726471, + "step": 359099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.224217414855957, + "loss_rtd": 0.16696400940418243, + "loss_sent": 0.010838507674634457, + "loss_sod": 0.04039539396762848, + "loss_total": 0.21819791197776794, + "step": 359099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.8199211955070496, + "learning_rate": 1.6751632135216467e-06, + "loss": 0.4309, + "step": 359100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.704248428344727, + "loss_rtd": 0.19448193907737732, + "loss_sent": 0.10888109356164932, + "loss_sod": 0.01757044903934002, + "loss_total": 0.3209334909915924, + "step": 359199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.809918403625488, + "loss_rtd": 0.20986898243427277, + "loss_sent": 0.08062642812728882, + "loss_sod": 0.041022978723049164, + "loss_total": 0.33151838183403015, + "step": 359199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.7899925112724304, + "learning_rate": 1.6670276885105474e-06, + "loss": 0.4032, + "step": 359200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.747605323791504, + "loss_rtd": 0.2124408334493637, + "loss_sent": 0.13505761325359344, + "loss_sod": 0.009342052973806858, + "loss_total": 0.35684049129486084, + "step": 359299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.84201717376709, + "loss_rtd": 0.21779082715511322, + "loss_sent": 0.16671185195446014, + "loss_sod": 0.009043235331773758, + "loss_total": 0.393545925617218, + "step": 359299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.7241244912147522, + "learning_rate": 1.6589116320149145e-06, + "loss": 0.4184, + "step": 359300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.692610263824463, + "loss_rtd": 0.17546941339969635, + "loss_sent": 4.074591561220586e-05, + "loss_sod": 0.0981811136007309, + "loss_total": 0.2736912667751312, + "step": 359399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.110828399658203, + "loss_rtd": 0.15602681040763855, + "loss_sent": 0.0010624686256051064, + "loss_sod": 0.028543993830680847, + "loss_total": 0.18563327193260193, + "step": 359399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.8788437843322754, + "learning_rate": 1.6508150473038942e-06, + "loss": 0.4298, + "step": 359400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.625291347503662, + "loss_rtd": 0.19761300086975098, + "loss_sent": 0.08058664947748184, + "loss_sod": 0.0576014406979084, + "loss_total": 0.3358010947704315, + "step": 359499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.9618916511535645, + "loss_rtd": 0.21147429943084717, + "loss_sent": 0.14091724157333374, + "loss_sod": 0.059383898973464966, + "loss_total": 0.4117754399776459, + "step": 359499 + }, + { + "epoch": 0.015, + "grad_norm": 1.4707019329071045, + "learning_rate": 1.6427379376387997e-06, + "loss": 0.4107, + "step": 359500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.667448043823242, + "loss_rtd": 0.19172178208827972, + "loss_sent": 0.06701655685901642, + "loss_sod": 0.026283372193574905, + "loss_total": 0.28502172231674194, + "step": 359599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.998225688934326, + "loss_rtd": 0.22887977957725525, + "loss_sent": 0.2037605345249176, + "loss_sod": 0.02702626958489418, + "loss_total": 0.45966657996177673, + "step": 359599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.8633623123168945, + "learning_rate": 1.6346803062730732e-06, + "loss": 0.4138, + "step": 359600 + }, + { + "epoch": 0.015398, + "loss_gen": 6.027843952178955, + "loss_rtd": 0.2001073658466339, + "loss_sent": 0.255400687456131, + "loss_sod": 0.009331222623586655, + "loss_total": 0.46483927965164185, + "step": 359699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.828859806060791, + "loss_rtd": 0.21302835643291473, + "loss_sent": 0.12264768779277802, + "loss_sod": 0.042284101247787476, + "loss_total": 0.3779601454734802, + "step": 359699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.9584111571311951, + "learning_rate": 1.626642156452335e-06, + "loss": 0.4123, + "step": 359700 + }, + { + "epoch": 0.015598, + "loss_gen": 6.0631103515625, + "loss_rtd": 0.20375189185142517, + "loss_sent": 0.4830540716648102, + "loss_sod": 0.03947862237691879, + "loss_total": 0.7262846231460571, + "step": 359799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.555032253265381, + "loss_rtd": 0.17465907335281372, + "loss_sent": 0.010923276655375957, + "loss_sod": 0.014672109857201576, + "loss_total": 0.20025447010993958, + "step": 359799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.4968209266662598, + "learning_rate": 1.618623491414356e-06, + "loss": 0.4073, + "step": 359800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.842617988586426, + "loss_rtd": 0.22465088963508606, + "loss_sent": 0.2357582002878189, + "loss_sod": 0.0856863260269165, + "loss_total": 0.5460954308509827, + "step": 359899 + }, + { + "epoch": 0.015798, + "loss_gen": 6.2033867835998535, + "loss_rtd": 0.21900822222232819, + "loss_sent": 0.0709872618317604, + "loss_sod": 0.08439719676971436, + "loss_total": 0.37439265847206116, + "step": 359899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.3617589473724365, + "learning_rate": 1.6106243143890475e-06, + "loss": 0.4551, + "step": 359900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.951446533203125, + "loss_rtd": 0.19969972968101501, + "loss_sent": 0.2698728144168854, + "loss_sod": 0.02050439827144146, + "loss_total": 0.4900769591331482, + "step": 359999 + }, + { + "epoch": 0.015998, + "loss_gen": 6.22728157043457, + "loss_rtd": 0.21384070813655853, + "loss_sent": 0.1385621875524521, + "loss_sod": 0.11862621456384659, + "loss_total": 0.4710291028022766, + "step": 359999 + }, + { + "epoch": 0.016, + "grad_norm": 0.7700254917144775, + "learning_rate": 1.6026446285984764e-06, + "loss": 0.4238, + "step": 360000 + }, + { + "epoch": 0.016, + "eval_loss": 0.3966046869754791, + "eval_runtime": 150.2741, + "eval_samples_per_second": 102.766, + "eval_steps_per_second": 0.805, + "step": 360000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.609473705291748, + "loss_rtd": 0.2155919075012207, + "loss_sent": 0.3692477345466614, + "loss_sod": 0.021549124270677567, + "loss_total": 0.6063887476921082, + "step": 360099 + }, + { + "epoch": 0.016198, + "loss_gen": 6.194726943969727, + "loss_rtd": 0.20113371312618256, + "loss_sent": 0.15237769484519958, + "loss_sod": 0.09046486765146255, + "loss_total": 0.4439762830734253, + "step": 360099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.8825088739395142, + "learning_rate": 1.5946844372568603e-06, + "loss": 0.4204, + "step": 360100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.893786907196045, + "loss_rtd": 0.19623367488384247, + "loss_sent": 0.17366598546504974, + "loss_sod": 0.025182297453284264, + "loss_total": 0.39508193731307983, + "step": 360199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.7727766036987305, + "loss_rtd": 0.20183885097503662, + "loss_sent": 0.4144172966480255, + "loss_sod": 0.04856124892830849, + "loss_total": 0.6648173928260803, + "step": 360199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.9529916048049927, + "learning_rate": 1.586743743570568e-06, + "loss": 0.4187, + "step": 360200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.666574954986572, + "loss_rtd": 0.1906665414571762, + "loss_sent": 0.051899347454309464, + "loss_sod": 0.06233369559049606, + "loss_total": 0.30489957332611084, + "step": 360299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.374049663543701, + "loss_rtd": 0.16987192630767822, + "loss_sent": 2.2513539079227485e-05, + "loss_sod": 0.09455791860818863, + "loss_total": 0.26445233821868896, + "step": 360299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.1828080415725708, + "learning_rate": 1.5788225507381016e-06, + "loss": 0.4202, + "step": 360300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.958693027496338, + "loss_rtd": 0.21530373394489288, + "loss_sent": 0.49312102794647217, + "loss_sod": 0.018909523263573647, + "loss_total": 0.7273342609405518, + "step": 360399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.716071605682373, + "loss_rtd": 0.1904827058315277, + "loss_sent": 2.9666818591067567e-05, + "loss_sod": 0.09116768836975098, + "loss_total": 0.28168004751205444, + "step": 360399 + }, + { + "epoch": 0.0168, + "grad_norm": 2.5394370555877686, + "learning_rate": 1.5709208619501258e-06, + "loss": 0.4325, + "step": 360400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.553415775299072, + "loss_rtd": 0.17159251868724823, + "loss_sent": 0.014099686406552792, + "loss_sod": 0.08110490441322327, + "loss_total": 0.26679709553718567, + "step": 360499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.853082180023193, + "loss_rtd": 0.20450982451438904, + "loss_sent": 0.15783369541168213, + "loss_sod": 0.02496618591248989, + "loss_total": 0.3873097002506256, + "step": 360499 + }, + { + "epoch": 0.017, + "grad_norm": 0.9959359765052795, + "learning_rate": 1.563038680389428e-06, + "loss": 0.4166, + "step": 360500 + }, + { + "epoch": 0.017198, + "loss_gen": 6.147935390472412, + "loss_rtd": 0.22501952946186066, + "loss_sent": 0.21500326693058014, + "loss_sod": 0.08628611266613007, + "loss_total": 0.5263088941574097, + "step": 360599 + }, + { + "epoch": 0.017198, + "loss_gen": 6.3497843742370605, + "loss_rtd": 0.1911085546016693, + "loss_sent": 0.3059438467025757, + "loss_sod": 0.021273598074913025, + "loss_total": 0.5183259844779968, + "step": 360599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.5327043533325195, + "learning_rate": 1.5551760092309686e-06, + "loss": 0.417, + "step": 360600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.660865783691406, + "loss_rtd": 0.2337588220834732, + "loss_sent": 0.3001292049884796, + "loss_sod": 0.0288230050355196, + "loss_total": 0.5627110004425049, + "step": 360699 + }, + { + "epoch": 0.017398, + "loss_gen": 6.022895812988281, + "loss_rtd": 0.20605115592479706, + "loss_sent": 0.10438123345375061, + "loss_sod": 0.05512363463640213, + "loss_total": 0.3655560314655304, + "step": 360699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.8538192510604858, + "learning_rate": 1.5473328516418083e-06, + "loss": 0.4171, + "step": 360700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.789525985717773, + "loss_rtd": 0.20868565142154694, + "loss_sent": 0.251506507396698, + "loss_sod": 0.0150322075933218, + "loss_total": 0.4752243757247925, + "step": 360799 + }, + { + "epoch": 0.017598, + "loss_gen": 6.079143524169922, + "loss_rtd": 0.20754873752593994, + "loss_sent": 0.13171996176242828, + "loss_sod": 0.016614915803074837, + "loss_total": 0.3558835983276367, + "step": 360799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.6854639053344727, + "learning_rate": 1.5395092107811871e-06, + "loss": 0.4262, + "step": 360800 + }, + { + "epoch": 0.017798, + "loss_gen": 6.033287525177002, + "loss_rtd": 0.21023119986057281, + "loss_sent": 0.12515436112880707, + "loss_sod": 0.022964123636484146, + "loss_total": 0.35834968090057373, + "step": 360899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.790826320648193, + "loss_rtd": 0.20912012457847595, + "loss_sent": 0.3771190047264099, + "loss_sod": 0.05757752060890198, + "loss_total": 0.6438166499137878, + "step": 360899 + }, + { + "epoch": 0.0178, + "grad_norm": 2.1769378185272217, + "learning_rate": 1.531705089800456e-06, + "loss": 0.4316, + "step": 360900 + }, + { + "epoch": 0.017998, + "loss_gen": 6.075564861297607, + "loss_rtd": 0.21308259665966034, + "loss_sent": 0.18532679975032806, + "loss_sod": 0.06486063450574875, + "loss_total": 0.46327000856399536, + "step": 360999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.79880428314209, + "loss_rtd": 0.23234602808952332, + "loss_sent": 0.16001008450984955, + "loss_sod": 0.007651767693459988, + "loss_total": 0.40000787377357483, + "step": 360999 + }, + { + "epoch": 0.018, + "grad_norm": 0.8836267590522766, + "learning_rate": 1.5239204918431282e-06, + "loss": 0.4161, + "step": 361000 + }, + { + "epoch": 0.018, + "eval_loss": 0.3971779942512512, + "eval_runtime": 149.9999, + "eval_samples_per_second": 102.953, + "eval_steps_per_second": 0.807, + "step": 361000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.867193222045898, + "loss_rtd": 0.20037880539894104, + "loss_sent": 0.07236301898956299, + "loss_sod": 0.008503071032464504, + "loss_total": 0.2812448740005493, + "step": 361099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.917838096618652, + "loss_rtd": 0.21738804876804352, + "loss_sent": 0.20074941217899323, + "loss_sod": 0.010854817926883698, + "loss_total": 0.42899227142333984, + "step": 361099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.7468519806861877, + "learning_rate": 1.516155420044818e-06, + "loss": 0.4156, + "step": 361100 + }, + { + "epoch": 0.018398, + "loss_gen": 6.057809352874756, + "loss_rtd": 0.20180083811283112, + "loss_sent": 0.16114170849323273, + "loss_sod": 0.18150779604911804, + "loss_total": 0.5444503426551819, + "step": 361199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.785623550415039, + "loss_rtd": 0.2219465672969818, + "loss_sent": 0.25474539399147034, + "loss_sod": 0.010815219953656197, + "loss_total": 0.487507164478302, + "step": 361199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.78762948513031, + "learning_rate": 1.5084098775333122e-06, + "loss": 0.4116, + "step": 361200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.623623371124268, + "loss_rtd": 0.20676663517951965, + "loss_sent": 0.48878878355026245, + "loss_sod": 0.0915762186050415, + "loss_total": 0.787131667137146, + "step": 361299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.426612854003906, + "loss_rtd": 0.1787305772304535, + "loss_sent": 0.0002620848536025733, + "loss_sod": 0.07906591892242432, + "loss_total": 0.2580585777759552, + "step": 361299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.9698206186294556, + "learning_rate": 1.5006838674285094e-06, + "loss": 0.418, + "step": 361300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.861983776092529, + "loss_rtd": 0.2160624861717224, + "loss_sent": 0.43743348121643066, + "loss_sod": 0.018248524516820908, + "loss_total": 0.6717444658279419, + "step": 361399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.972048282623291, + "loss_rtd": 0.20444737374782562, + "loss_sent": 0.1747852861881256, + "loss_sod": 0.06728965044021606, + "loss_total": 0.4465223252773285, + "step": 361399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.4579147100448608, + "learning_rate": 1.492977392842443e-06, + "loss": 0.4198, + "step": 361400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.92585563659668, + "loss_rtd": 0.22486479580402374, + "loss_sent": 0.23057816922664642, + "loss_sod": 0.004533334169536829, + "loss_total": 0.45997631549835205, + "step": 361499 + }, + { + "epoch": 0.018998, + "loss_gen": 6.12306022644043, + "loss_rtd": 0.21385575830936432, + "loss_sent": 0.05615532025694847, + "loss_sod": 0.03984547033905983, + "loss_total": 0.3098565638065338, + "step": 361499 + }, + { + "epoch": 0.019, + "grad_norm": 0.5748463273048401, + "learning_rate": 1.4852904568792792e-06, + "loss": 0.4259, + "step": 361500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.845739364624023, + "loss_rtd": 0.2064388394355774, + "loss_sent": 0.11149514466524124, + "loss_sod": 0.046577051281929016, + "loss_total": 0.36451101303100586, + "step": 361599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.389118671417236, + "loss_rtd": 0.1845724731683731, + "loss_sent": 0.026308724656701088, + "loss_sod": 0.07386022061109543, + "loss_total": 0.2847414016723633, + "step": 361599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.7511069178581238, + "learning_rate": 1.4776230626353195e-06, + "loss": 0.4182, + "step": 361600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.899303913116455, + "loss_rtd": 0.2296900749206543, + "loss_sent": 0.07064184546470642, + "loss_sod": 0.03610319271683693, + "loss_total": 0.33643510937690735, + "step": 361699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.9688873291015625, + "loss_rtd": 0.2145996242761612, + "loss_sent": 0.17921024560928345, + "loss_sod": 0.07614393532276154, + "loss_total": 0.4699538052082062, + "step": 361699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.9803617596626282, + "learning_rate": 1.469975213198993e-06, + "loss": 0.4239, + "step": 361700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.839181423187256, + "loss_rtd": 0.21481937170028687, + "loss_sent": 0.09946290403604507, + "loss_sod": 0.03148527070879936, + "loss_total": 0.3457675576210022, + "step": 361799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.744566440582275, + "loss_rtd": 0.2183230072259903, + "loss_sent": 0.21551957726478577, + "loss_sod": 0.10369285941123962, + "loss_total": 0.5375354290008545, + "step": 361799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.0184943675994873, + "learning_rate": 1.4623469116508415e-06, + "loss": 0.421, + "step": 361800 + }, + { + "epoch": 0.019798, + "loss_gen": 6.41466760635376, + "loss_rtd": 0.205556258559227, + "loss_sent": 0.22103486955165863, + "loss_sod": 0.07430548965930939, + "loss_total": 0.5008966326713562, + "step": 361899 + }, + { + "epoch": 0.019798, + "loss_gen": 6.195828914642334, + "loss_rtd": 0.19055189192295074, + "loss_sent": 0.16132284700870514, + "loss_sod": 0.02069966122508049, + "loss_total": 0.3725743889808655, + "step": 361899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.7664250731468201, + "learning_rate": 1.4547381610635457e-06, + "loss": 0.4236, + "step": 361900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.322070598602295, + "loss_rtd": 0.1838359236717224, + "loss_sent": 2.66446058958536e-05, + "loss_sod": 0.025309057906270027, + "loss_total": 0.2091716229915619, + "step": 361999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.830729007720947, + "loss_rtd": 0.2038728892803192, + "loss_sent": 0.09125927835702896, + "loss_sod": 0.013036874122917652, + "loss_total": 0.30816903710365295, + "step": 361999 + }, + { + "epoch": 0.02, + "grad_norm": 0.7681975960731506, + "learning_rate": 1.4471489645019153e-06, + "loss": 0.4175, + "step": 362000 + }, + { + "epoch": 0.02, + "eval_loss": 0.39502063393592834, + "eval_runtime": 151.6702, + "eval_samples_per_second": 101.82, + "eval_steps_per_second": 0.798, + "step": 362000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.536657333374023, + "loss_rtd": 0.2285952866077423, + "loss_sent": 0.26336389780044556, + "loss_sod": 0.017407717183232307, + "loss_total": 0.5093668699264526, + "step": 362099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.961015701293945, + "loss_rtd": 0.22314313054084778, + "loss_sent": 0.3466446101665497, + "loss_sod": 0.07911237329244614, + "loss_total": 0.6489001512527466, + "step": 362099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.4666422605514526, + "learning_rate": 1.4395793250228828e-06, + "loss": 0.414, + "step": 362100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.490997791290283, + "loss_rtd": 0.16417410969734192, + "loss_sent": 0.009751019068062305, + "loss_sod": 0.06044970825314522, + "loss_total": 0.23437483608722687, + "step": 362199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.978743076324463, + "loss_rtd": 0.1945478767156601, + "loss_sent": 0.943418562412262, + "loss_sod": 0.05019240826368332, + "loss_total": 1.1881588697433472, + "step": 362199 + }, + { + "epoch": 0.0204, + "grad_norm": 3.0135691165924072, + "learning_rate": 1.4320292456754869e-06, + "loss": 0.4167, + "step": 362200 + }, + { + "epoch": 0.020598, + "loss_gen": 6.460975170135498, + "loss_rtd": 0.23090098798274994, + "loss_sent": 0.29713425040245056, + "loss_sod": 0.09923360496759415, + "loss_total": 0.6272688508033752, + "step": 362299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.66818380355835, + "loss_rtd": 0.18967388570308685, + "loss_sent": 0.13752974569797516, + "loss_sod": 0.041123323142528534, + "loss_total": 0.36832696199417114, + "step": 362299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.4002727270126343, + "learning_rate": 1.4244987295009004e-06, + "loss": 0.4088, + "step": 362300 + }, + { + "epoch": 0.020798, + "loss_gen": 6.059211730957031, + "loss_rtd": 0.2148807942867279, + "loss_sent": 0.2364805042743683, + "loss_sod": 0.09082937240600586, + "loss_total": 0.542190670967102, + "step": 362399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.923233985900879, + "loss_rtd": 0.2028600573539734, + "loss_sent": 0.2622675895690918, + "loss_sod": 0.02612539380788803, + "loss_total": 0.4912530481815338, + "step": 362399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.725727915763855, + "learning_rate": 1.4169877795324193e-06, + "loss": 0.4177, + "step": 362400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.69184684753418, + "loss_rtd": 0.2002236545085907, + "loss_sent": 0.1865220069885254, + "loss_sod": 0.028119299560785294, + "loss_total": 0.4148649573326111, + "step": 362499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.90725040435791, + "loss_rtd": 0.19531695544719696, + "loss_sent": 0.29432928562164307, + "loss_sod": 0.013063258491456509, + "loss_total": 0.5027095079421997, + "step": 362499 + }, + { + "epoch": 0.021, + "grad_norm": 1.4516067504882812, + "learning_rate": 1.4094963987954513e-06, + "loss": 0.4099, + "step": 362500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.707841873168945, + "loss_rtd": 0.19868823885917664, + "loss_sent": 0.1642095446586609, + "loss_sod": 0.027306100353598595, + "loss_total": 0.39020389318466187, + "step": 362599 + }, + { + "epoch": 0.021198, + "loss_gen": 6.0773844718933105, + "loss_rtd": 0.21753957867622375, + "loss_sent": 0.06987687200307846, + "loss_sod": 0.04470841586589813, + "loss_total": 0.33212485909461975, + "step": 362599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.6767948269844055, + "learning_rate": 1.4020245903075214e-06, + "loss": 0.4023, + "step": 362600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.555398464202881, + "loss_rtd": 0.20148345828056335, + "loss_sent": 0.22582140564918518, + "loss_sod": 0.030306756496429443, + "loss_total": 0.457611620426178, + "step": 362699 + }, + { + "epoch": 0.021398, + "loss_gen": 6.039510726928711, + "loss_rtd": 0.2075674682855606, + "loss_sent": 0.29000580310821533, + "loss_sod": 0.04601665958762169, + "loss_total": 0.5435899496078491, + "step": 362699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.5605874061584473, + "learning_rate": 1.3945723570782721e-06, + "loss": 0.4047, + "step": 362700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.936351299285889, + "loss_rtd": 0.20335261523723602, + "loss_sent": 0.1253519058227539, + "loss_sod": 0.08839675784111023, + "loss_total": 0.41710126399993896, + "step": 362799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.9179253578186035, + "loss_rtd": 0.1987505406141281, + "loss_sent": 0.3874909579753876, + "loss_sod": 0.026826800778508186, + "loss_total": 0.6130682826042175, + "step": 362799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.2404730319976807, + "learning_rate": 1.3871397021094634e-06, + "loss": 0.4224, + "step": 362800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.974052906036377, + "loss_rtd": 0.21229790151119232, + "loss_sent": 0.18950486183166504, + "loss_sod": 0.04930327832698822, + "loss_total": 0.4511060416698456, + "step": 362899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.780531883239746, + "loss_rtd": 0.20626120269298553, + "loss_sent": 0.3620285987854004, + "loss_sod": 0.0024015717208385468, + "loss_total": 0.5706913471221924, + "step": 362899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.1324671506881714, + "learning_rate": 1.3797266283949784e-06, + "loss": 0.4301, + "step": 362900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.887303352355957, + "loss_rtd": 0.21104301512241364, + "loss_sent": 0.11129486560821533, + "loss_sod": 0.08403178304433823, + "loss_total": 0.4063696563243866, + "step": 362999 + }, + { + "epoch": 0.021998, + "loss_gen": 6.0441107749938965, + "loss_rtd": 0.20486198365688324, + "loss_sent": 0.2891045808792114, + "loss_sod": 0.04153968393802643, + "loss_total": 0.5355062484741211, + "step": 362999 + }, + { + "epoch": 0.022, + "grad_norm": 1.1031920909881592, + "learning_rate": 1.3723331389207893e-06, + "loss": 0.4165, + "step": 363000 + }, + { + "epoch": 0.022, + "eval_loss": 0.3953239321708679, + "eval_runtime": 150.2575, + "eval_samples_per_second": 102.777, + "eval_steps_per_second": 0.805, + "step": 363000 + }, + { + "epoch": 0.022198, + "loss_gen": 6.003880977630615, + "loss_rtd": 0.2102266550064087, + "loss_sent": 0.4249902367591858, + "loss_sod": 0.037185702472925186, + "loss_total": 0.6724026203155518, + "step": 363099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.912100791931152, + "loss_rtd": 0.2274281084537506, + "loss_sent": 0.14505121111869812, + "loss_sod": 0.024947090074419975, + "loss_total": 0.39742639660835266, + "step": 363099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.2895781993865967, + "learning_rate": 1.3649592366649922e-06, + "loss": 0.4153, + "step": 363100 + }, + { + "epoch": 0.022398, + "loss_gen": 6.098489761352539, + "loss_rtd": 0.21353188157081604, + "loss_sent": 0.11935798078775406, + "loss_sod": 0.044251710176467896, + "loss_total": 0.3771415650844574, + "step": 363199 + }, + { + "epoch": 0.022398, + "loss_gen": 6.05353307723999, + "loss_rtd": 0.19804789125919342, + "loss_sent": 0.16045266389846802, + "loss_sod": 0.026974501088261604, + "loss_total": 0.3854750394821167, + "step": 363199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.8909897804260254, + "learning_rate": 1.3576049245978052e-06, + "loss": 0.4166, + "step": 363200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.051226615905762, + "loss_rtd": 0.15107493102550507, + "loss_sent": 0.005753755569458008, + "loss_sod": 0.012761048041284084, + "loss_total": 0.16958972811698914, + "step": 363299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.795234680175781, + "loss_rtd": 0.2226068526506424, + "loss_sent": 0.14512288570404053, + "loss_sod": 0.012116256169974804, + "loss_total": 0.3798459768295288, + "step": 363299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.6773077249526978, + "learning_rate": 1.3502702056815308e-06, + "loss": 0.4272, + "step": 363300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.938736438751221, + "loss_rtd": 0.20495794713497162, + "loss_sent": 0.18485291302204132, + "loss_sod": 0.04449421167373657, + "loss_total": 0.4343050718307495, + "step": 363399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.523612976074219, + "loss_rtd": 0.1764361560344696, + "loss_sent": 0.051566604524850845, + "loss_sod": 0.1222393810749054, + "loss_total": 0.35024213790893555, + "step": 363399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.1190216541290283, + "learning_rate": 1.342955082870606e-06, + "loss": 0.4081, + "step": 363400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.842376232147217, + "loss_rtd": 0.20203332602977753, + "loss_sent": 0.2586800754070282, + "loss_sod": 0.025043871253728867, + "loss_total": 0.4857572913169861, + "step": 363499 + }, + { + "epoch": 0.022998, + "loss_gen": 6.056408882141113, + "loss_rtd": 0.19791297614574432, + "loss_sent": 0.15465180575847626, + "loss_sod": 0.053231462836265564, + "loss_total": 0.40579622983932495, + "step": 363499 + }, + { + "epoch": 0.023, + "grad_norm": 1.9620479345321655, + "learning_rate": 1.3356595591115516e-06, + "loss": 0.4011, + "step": 363500 + }, + { + "epoch": 0.023198, + "loss_gen": 6.134500980377197, + "loss_rtd": 0.2163434773683548, + "loss_sent": 0.22586670517921448, + "loss_sod": 0.06513367593288422, + "loss_total": 0.5073438882827759, + "step": 363599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.782366752624512, + "loss_rtd": 0.2267577201128006, + "loss_sent": 0.0642230436205864, + "loss_sod": 0.003363637952134013, + "loss_total": 0.29434439539909363, + "step": 363599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.8331063389778137, + "learning_rate": 1.3283836373430059e-06, + "loss": 0.4111, + "step": 363600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.831057071685791, + "loss_rtd": 0.18285562098026276, + "loss_sent": 6.96662173140794e-05, + "loss_sod": 0.0824233740568161, + "loss_total": 0.2653486728668213, + "step": 363699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.61504602432251, + "loss_rtd": 0.1799226701259613, + "loss_sent": 3.860507786157541e-05, + "loss_sod": 0.09754588454961777, + "loss_total": 0.27750715613365173, + "step": 363699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8533154726028442, + "learning_rate": 1.3211273204957186e-06, + "loss": 0.4163, + "step": 363700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.171628475189209, + "loss_rtd": 0.17534704506397247, + "loss_sent": 0.002893816912546754, + "loss_sod": 0.13911409676074982, + "loss_total": 0.3173549473285675, + "step": 363799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.8331499099731445, + "loss_rtd": 0.21556037664413452, + "loss_sent": 0.10961310565471649, + "loss_sod": 0.06676691025495529, + "loss_total": 0.3919404149055481, + "step": 363799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.1043574810028076, + "learning_rate": 1.3138906114925132e-06, + "loss": 0.4018, + "step": 363800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.4935126304626465, + "loss_rtd": 0.1978653371334076, + "loss_sent": 2.524955925764516e-05, + "loss_sod": 0.13067789375782013, + "loss_total": 0.3285684883594513, + "step": 363899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.425945281982422, + "loss_rtd": 0.16025815904140472, + "loss_sent": 5.956891618552618e-05, + "loss_sod": 0.08635027706623077, + "loss_total": 0.2466680109500885, + "step": 363899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.9258561134338379, + "learning_rate": 1.306673513248352e-06, + "loss": 0.3935, + "step": 363900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.766495227813721, + "loss_rtd": 0.16648903489112854, + "loss_sent": 2.2863701815367676e-05, + "loss_sod": 0.06929951906204224, + "loss_total": 0.23581141233444214, + "step": 363999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.72847318649292, + "loss_rtd": 0.17708592116832733, + "loss_sent": 0.01743916980922222, + "loss_sod": 0.09215869009494781, + "loss_total": 0.2866837680339813, + "step": 363999 + }, + { + "epoch": 0.024, + "grad_norm": 0.9077220559120178, + "learning_rate": 1.2994760286702767e-06, + "loss": 0.4285, + "step": 364000 + }, + { + "epoch": 0.024, + "eval_loss": 0.39305803179740906, + "eval_runtime": 150.4121, + "eval_samples_per_second": 102.671, + "eval_steps_per_second": 0.804, + "step": 364000 + }, + { + "epoch": 0.024198, + "loss_gen": 6.001964092254639, + "loss_rtd": 0.21313053369522095, + "loss_sent": 0.3803410232067108, + "loss_sod": 0.010521373711526394, + "loss_total": 0.6039929389953613, + "step": 364099 + }, + { + "epoch": 0.024198, + "loss_gen": 6.142898082733154, + "loss_rtd": 0.22206230461597443, + "loss_sent": 0.13670524954795837, + "loss_sod": 0.012702586129307747, + "loss_total": 0.3714701533317566, + "step": 364099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.8884294629096985, + "learning_rate": 1.2922981606574348e-06, + "loss": 0.4052, + "step": 364100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.53360652923584, + "loss_rtd": 0.17999228835105896, + "loss_sent": 2.7344931368133985e-05, + "loss_sod": 0.03845468908548355, + "loss_total": 0.21847431361675262, + "step": 364199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.515370845794678, + "loss_rtd": 0.1846403181552887, + "loss_sent": 0.11708176881074905, + "loss_sod": 0.04682455211877823, + "loss_total": 0.3485466241836548, + "step": 364199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.9045760035514832, + "learning_rate": 1.2851399121010687e-06, + "loss": 0.4004, + "step": 364200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.977632999420166, + "loss_rtd": 0.2112598568201065, + "loss_sent": 0.5074205994606018, + "loss_sod": 0.03248698264360428, + "loss_total": 0.7511674165725708, + "step": 364299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.592776775360107, + "loss_rtd": 0.1873634159564972, + "loss_sent": 0.02264423482120037, + "loss_sod": 0.05196958780288696, + "loss_total": 0.2619772255420685, + "step": 364299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.5159434080123901, + "learning_rate": 1.2780012858845169e-06, + "loss": 0.4099, + "step": 364300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.8185529708862305, + "loss_rtd": 0.19173812866210938, + "loss_sent": 0.1363159418106079, + "loss_sod": 0.09146563708782196, + "loss_total": 0.41951972246170044, + "step": 364399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.35556697845459, + "loss_rtd": 0.19410699605941772, + "loss_sent": 0.09060783684253693, + "loss_sod": 0.029730046167969704, + "loss_total": 0.314444899559021, + "step": 364399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.2566590309143066, + "learning_rate": 1.270882284883229e-06, + "loss": 0.427, + "step": 364400 + }, + { + "epoch": 0.024998, + "loss_gen": 6.060112476348877, + "loss_rtd": 0.2194378525018692, + "loss_sent": 0.25543156266212463, + "loss_sod": 0.08118052780628204, + "loss_total": 0.5560499429702759, + "step": 364499 + }, + { + "epoch": 0.024998, + "loss_gen": 6.098330020904541, + "loss_rtd": 0.20278386771678925, + "loss_sent": 0.1597369760274887, + "loss_sod": 0.07793529331684113, + "loss_total": 0.4404561519622803, + "step": 364499 + }, + { + "epoch": 0.025, + "grad_norm": 1.1031123399734497, + "learning_rate": 1.2637829119647172e-06, + "loss": 0.4143, + "step": 364500 + }, + { + "epoch": 0.025198, + "loss_gen": 6.049230098724365, + "loss_rtd": 0.23129546642303467, + "loss_sent": 0.1357702761888504, + "loss_sod": 0.025197722017765045, + "loss_total": 0.3922634720802307, + "step": 364599 + }, + { + "epoch": 0.025198, + "loss_gen": 6.046126842498779, + "loss_rtd": 0.2011864334344864, + "loss_sent": 0.14850084483623505, + "loss_sod": 0.016730941832065582, + "loss_total": 0.3664182424545288, + "step": 364599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.8324997425079346, + "learning_rate": 1.2567031699886267e-06, + "loss": 0.4184, + "step": 364600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.379150867462158, + "loss_rtd": 0.17376330494880676, + "loss_sent": 0.038890544325113297, + "loss_sod": 0.07320712506771088, + "loss_total": 0.28586098551750183, + "step": 364699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.987701892852783, + "loss_rtd": 0.21855410933494568, + "loss_sent": 0.11376553028821945, + "loss_sod": 0.011206366121768951, + "loss_total": 0.3435260057449341, + "step": 364699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.7531861662864685, + "learning_rate": 1.2496430618066656e-06, + "loss": 0.4313, + "step": 364700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.782328128814697, + "loss_rtd": 0.18509866297245026, + "loss_sent": 0.289043664932251, + "loss_sod": 0.012807246297597885, + "loss_total": 0.4869495630264282, + "step": 364799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.947630405426025, + "loss_rtd": 0.21062570810317993, + "loss_sent": 0.24626106023788452, + "loss_sod": 0.03454527258872986, + "loss_total": 0.4914320409297943, + "step": 364799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.2206841707229614, + "learning_rate": 1.2426025902626592e-06, + "loss": 0.4143, + "step": 364800 + }, + { + "epoch": 0.025798, + "loss_gen": 6.341766834259033, + "loss_rtd": 0.23094379901885986, + "loss_sent": 0.15503183007240295, + "loss_sod": 0.020966900512576103, + "loss_total": 0.4069425165653229, + "step": 364899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.520390510559082, + "loss_rtd": 0.1978568583726883, + "loss_sent": 0.02651343122124672, + "loss_sod": 0.062034301459789276, + "loss_total": 0.2864045798778534, + "step": 364899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.8073655366897583, + "learning_rate": 1.2355817581924944e-06, + "loss": 0.3938, + "step": 364900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.278810501098633, + "loss_rtd": 0.1681014448404312, + "loss_sent": 0.009314697235822678, + "loss_sod": 0.07258976250886917, + "loss_total": 0.25000590085983276, + "step": 364999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.8487677574157715, + "loss_rtd": 0.21409372985363007, + "loss_sent": 0.11997484415769577, + "loss_sod": 0.00814978126436472, + "loss_total": 0.3422183692455292, + "step": 364999 + }, + { + "epoch": 0.026, + "grad_norm": 0.7892587780952454, + "learning_rate": 1.2285805684241592e-06, + "loss": 0.3964, + "step": 365000 + }, + { + "epoch": 0.026, + "eval_loss": 0.39621347188949585, + "eval_runtime": 150.439, + "eval_samples_per_second": 102.653, + "eval_steps_per_second": 0.804, + "step": 365000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.922516345977783, + "loss_rtd": 0.19338731467723846, + "loss_sent": 0.09866566210985184, + "loss_sod": 0.04476850479841232, + "loss_total": 0.33682146668434143, + "step": 365099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.713855266571045, + "loss_rtd": 0.2088758498430252, + "loss_sent": 0.4114687442779541, + "loss_sod": 0.08549314737319946, + "loss_total": 0.7058377265930176, + "step": 365099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.5730397701263428, + "learning_rate": 1.2215990237777419e-06, + "loss": 0.4098, + "step": 365100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.531763553619385, + "loss_rtd": 0.1814747154712677, + "loss_sent": 0.075872503221035, + "loss_sod": 0.06202581524848938, + "loss_total": 0.3193730413913727, + "step": 365199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.825388431549072, + "loss_rtd": 0.2028234899044037, + "loss_sent": 0.21024520695209503, + "loss_sod": 0.02161809802055359, + "loss_total": 0.4346867799758911, + "step": 365199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.2231762409210205, + "learning_rate": 1.21463712706541e-06, + "loss": 0.4154, + "step": 365200 + }, + { + "epoch": 0.026598, + "loss_gen": 6.497543811798096, + "loss_rtd": 0.19838689267635345, + "loss_sent": 0.1384795606136322, + "loss_sod": 0.04936708137392998, + "loss_total": 0.38623353838920593, + "step": 365299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.94274377822876, + "loss_rtd": 0.22551003098487854, + "loss_sent": 0.21004793047904968, + "loss_sod": 0.021115781739354134, + "loss_total": 0.4566737413406372, + "step": 365299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.6815785765647888, + "learning_rate": 1.2076948810914036e-06, + "loss": 0.421, + "step": 365300 + }, + { + "epoch": 0.026798, + "loss_gen": 6.2182111740112305, + "loss_rtd": 0.22094619274139404, + "loss_sent": 0.13484445214271545, + "loss_sod": 0.028378788381814957, + "loss_total": 0.38416942954063416, + "step": 365399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.6115193367004395, + "loss_rtd": 0.1851220428943634, + "loss_sent": 0.12120498716831207, + "loss_sod": 0.009966659359633923, + "loss_total": 0.3162936866283417, + "step": 365399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.6521903276443481, + "learning_rate": 1.2007722886520634e-06, + "loss": 0.4183, + "step": 365400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.724940299987793, + "loss_rtd": 0.19978727400302887, + "loss_sent": 0.2348742038011551, + "loss_sod": 0.024614332243800163, + "loss_total": 0.4592758119106293, + "step": 365499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.600672721862793, + "loss_rtd": 0.21980349719524384, + "loss_sent": 0.3051648736000061, + "loss_sod": 0.022819124162197113, + "loss_total": 0.5477874875068665, + "step": 365499 + }, + { + "epoch": 0.027, + "grad_norm": 2.144178867340088, + "learning_rate": 1.1938693525358147e-06, + "loss": 0.4189, + "step": 365500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.77287483215332, + "loss_rtd": 0.20586730539798737, + "loss_sent": 0.12783952057361603, + "loss_sod": 0.002401602454483509, + "loss_total": 0.3361084461212158, + "step": 365599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.98655891418457, + "loss_rtd": 0.23912228643894196, + "loss_sent": 0.18128874897956848, + "loss_sod": 0.031496576964855194, + "loss_total": 0.45190760493278503, + "step": 365599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.8339471220970154, + "learning_rate": 1.1869860755231555e-06, + "loss": 0.4093, + "step": 365600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.848016738891602, + "loss_rtd": 0.21581026911735535, + "loss_sent": 0.5503897070884705, + "loss_sod": 0.05699167400598526, + "loss_total": 0.8231916427612305, + "step": 365699 + }, + { + "epoch": 0.027398, + "loss_gen": 6.081898212432861, + "loss_rtd": 0.2190917730331421, + "loss_sent": 0.11327815055847168, + "loss_sod": 0.030282845720648766, + "loss_total": 0.3626527786254883, + "step": 365699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.380051612854004, + "learning_rate": 1.1801224603866624e-06, + "loss": 0.4239, + "step": 365700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.3949713706970215, + "loss_rtd": 0.17945027351379395, + "loss_sent": 0.07280128449201584, + "loss_sod": 0.039197977632284164, + "loss_total": 0.29144954681396484, + "step": 365799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.641369819641113, + "loss_rtd": 0.1944916695356369, + "loss_sent": 0.28476682305336, + "loss_sod": 0.052630335092544556, + "loss_total": 0.5318888425827026, + "step": 365799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.8354421854019165, + "learning_rate": 1.1732785098910015e-06, + "loss": 0.4149, + "step": 365800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.409779071807861, + "loss_rtd": 0.15218254923820496, + "loss_sent": 0.0001465770765207708, + "loss_sod": 0.05663083493709564, + "loss_total": 0.20895996689796448, + "step": 365899 + }, + { + "epoch": 0.027798, + "loss_gen": 6.117166996002197, + "loss_rtd": 0.2116507589817047, + "loss_sent": 0.08239760249853134, + "loss_sod": 0.08382310718297958, + "loss_total": 0.37787148356437683, + "step": 365899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.1110810041427612, + "learning_rate": 1.1664542267929236e-06, + "loss": 0.4229, + "step": 365900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.705819606781006, + "loss_rtd": 0.18144482374191284, + "loss_sent": 0.07757905125617981, + "loss_sod": 0.03736221790313721, + "loss_total": 0.29638609290122986, + "step": 365999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.793848037719727, + "loss_rtd": 0.1927725076675415, + "loss_sent": 0.10372845828533173, + "loss_sod": 0.020766176283359528, + "loss_total": 0.31726711988449097, + "step": 365999 + }, + { + "epoch": 0.028, + "grad_norm": 1.0994117259979248, + "learning_rate": 1.1596496138412405e-06, + "loss": 0.4248, + "step": 366000 + }, + { + "epoch": 0.028, + "eval_loss": 0.3949771225452423, + "eval_runtime": 150.2526, + "eval_samples_per_second": 102.78, + "eval_steps_per_second": 0.805, + "step": 366000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.71099328994751, + "loss_rtd": 0.2157634049654007, + "loss_sent": 0.10976104438304901, + "loss_sod": 0.0013143944088369608, + "loss_total": 0.3268388509750366, + "step": 366099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.858689308166504, + "loss_rtd": 0.2047213315963745, + "loss_sent": 0.035277750343084335, + "loss_sod": 0.042113568633794785, + "loss_total": 0.28211265802383423, + "step": 366099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.6429213285446167, + "learning_rate": 1.1528646737768544e-06, + "loss": 0.4217, + "step": 366100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.90069055557251, + "loss_rtd": 0.20099638402462006, + "loss_sent": 0.12928296625614166, + "loss_sod": 0.05825787037611008, + "loss_total": 0.3885372281074524, + "step": 366199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.547062873840332, + "loss_rtd": 0.17707639932632446, + "loss_sent": 0.010011816397309303, + "loss_sod": 0.07997766882181168, + "loss_total": 0.2670658826828003, + "step": 366199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.237799882888794, + "learning_rate": 1.1460994093327294e-06, + "loss": 0.4174, + "step": 366200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.897192001342773, + "loss_rtd": 0.21719658374786377, + "loss_sent": 0.11083323508501053, + "loss_sod": 0.02024351805448532, + "loss_total": 0.3482733368873596, + "step": 366299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.976822376251221, + "loss_rtd": 0.20563849806785583, + "loss_sent": 0.2585853934288025, + "loss_sod": 0.0752931609749794, + "loss_total": 0.5395170450210571, + "step": 366299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.2281782627105713, + "learning_rate": 1.1393538232339297e-06, + "loss": 0.428, + "step": 366300 + }, + { + "epoch": 0.028798, + "loss_gen": 6.089418888092041, + "loss_rtd": 0.19139453768730164, + "loss_sent": 0.23716497421264648, + "loss_sod": 0.030864574015140533, + "loss_total": 0.45942407846450806, + "step": 366399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.777108669281006, + "loss_rtd": 0.23330779373645782, + "loss_sent": 0.21502019464969635, + "loss_sod": 0.035990871489048004, + "loss_total": 0.4843188524246216, + "step": 366399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.0611236095428467, + "learning_rate": 1.1326279181975597e-06, + "loss": 0.4153, + "step": 366400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.207485198974609, + "loss_rtd": 0.1862115114927292, + "loss_sent": 2.4232707801274955e-05, + "loss_sod": 0.14000684022903442, + "loss_total": 0.326242595911026, + "step": 366499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.205263614654541, + "loss_rtd": 0.16241781413555145, + "loss_sent": 2.5706000087666325e-05, + "loss_sod": 0.39901670813560486, + "loss_total": 0.5614601969718933, + "step": 366499 + }, + { + "epoch": 0.029, + "grad_norm": 1.6571792364120483, + "learning_rate": 1.1259216969328245e-06, + "loss": 0.4161, + "step": 366500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.826868534088135, + "loss_rtd": 0.20336231589317322, + "loss_sent": 0.44961991906166077, + "loss_sod": 0.005061282776296139, + "loss_total": 0.6580435037612915, + "step": 366599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.7266411781311035, + "loss_rtd": 0.21946556866168976, + "loss_sent": 0.2193601280450821, + "loss_sod": 0.04416005313396454, + "loss_total": 0.4829857349395752, + "step": 366599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.7573833465576172, + "learning_rate": 1.1192351621409803e-06, + "loss": 0.4112, + "step": 366600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.756178855895996, + "loss_rtd": 0.22352372109889984, + "loss_sent": 0.04831600934267044, + "loss_sod": 0.08884535729885101, + "loss_total": 0.3606850802898407, + "step": 366699 + }, + { + "epoch": 0.029398, + "loss_gen": 6.121270656585693, + "loss_rtd": 0.20888565480709076, + "loss_sent": 0.11543800681829453, + "loss_sod": 0.08961872011423111, + "loss_total": 0.4139423966407776, + "step": 366699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.986507534980774, + "learning_rate": 1.1125683165153778e-06, + "loss": 0.4145, + "step": 366700 + }, + { + "epoch": 0.029598, + "loss_gen": 6.292279243469238, + "loss_rtd": 0.21221573650836945, + "loss_sent": 0.37807655334472656, + "loss_sod": 0.03985176235437393, + "loss_total": 0.6301440596580505, + "step": 366799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.803103923797607, + "loss_rtd": 0.22886456549167633, + "loss_sent": 0.4535507559776306, + "loss_sod": 0.009185624308884144, + "loss_total": 0.6916009187698364, + "step": 366799 + }, + { + "epoch": 0.0296, + "grad_norm": 2.8848581314086914, + "learning_rate": 1.1059211627414024e-06, + "loss": 0.4084, + "step": 366800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.5437517166137695, + "loss_rtd": 0.20916791260242462, + "loss_sent": 0.0702207088470459, + "loss_sod": 0.006545985583215952, + "loss_total": 0.2859346270561218, + "step": 366899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.918610572814941, + "loss_rtd": 0.20172882080078125, + "loss_sent": 0.2698323726654053, + "loss_sod": 0.023541470989584923, + "loss_total": 0.4951026439666748, + "step": 366899 + }, + { + "epoch": 0.0298, + "grad_norm": 0.6408466100692749, + "learning_rate": 1.0992937034965345e-06, + "loss": 0.4197, + "step": 366900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.614994049072266, + "loss_rtd": 0.19955208897590637, + "loss_sent": 0.23481421172618866, + "loss_sod": 0.018059633672237396, + "loss_total": 0.4524259567260742, + "step": 366999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.810892105102539, + "loss_rtd": 0.22652031481266022, + "loss_sent": 0.41318392753601074, + "loss_sod": 0.013888641260564327, + "loss_total": 0.6535928845405579, + "step": 366999 + }, + { + "epoch": 0.03, + "grad_norm": 0.8815780282020569, + "learning_rate": 1.0926859414503165e-06, + "loss": 0.4225, + "step": 367000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3982074558734894, + "eval_runtime": 150.1153, + "eval_samples_per_second": 102.874, + "eval_steps_per_second": 0.806, + "step": 367000 + }, + { + "epoch": 0.030198, + "loss_gen": 6.105481147766113, + "loss_rtd": 0.2054414600133896, + "loss_sent": 0.2658814489841461, + "loss_sod": 0.012263797223567963, + "loss_total": 0.48358669877052307, + "step": 367099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.74627161026001, + "loss_rtd": 0.1987999677658081, + "loss_sent": 0.3739352226257324, + "loss_sod": 0.03399643301963806, + "loss_total": 0.606731653213501, + "step": 367099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.4633259773254395, + "learning_rate": 1.0860978792643527e-06, + "loss": 0.4243, + "step": 367100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.5245490074157715, + "loss_rtd": 0.19503413140773773, + "loss_sent": 0.06523527204990387, + "loss_sod": 0.034987159073352814, + "loss_total": 0.2952565550804138, + "step": 367199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.764045715332031, + "loss_rtd": 0.20709584653377533, + "loss_sent": 0.519471287727356, + "loss_sod": 0.04452098533511162, + "loss_total": 0.7710881233215332, + "step": 367199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.2977943420410156, + "learning_rate": 1.079529519592315e-06, + "loss": 0.428, + "step": 367200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.746772289276123, + "loss_rtd": 0.17845812439918518, + "loss_sent": 0.022705502808094025, + "loss_sod": 0.14325875043869019, + "loss_total": 0.3444223701953888, + "step": 367299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.987921714782715, + "loss_rtd": 0.22216100990772247, + "loss_sent": 0.17972496151924133, + "loss_sod": 0.02743348479270935, + "loss_total": 0.42931944131851196, + "step": 367299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.1215088367462158, + "learning_rate": 1.0729808650799367e-06, + "loss": 0.4266, + "step": 367300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.921072483062744, + "loss_rtd": 0.22161927819252014, + "loss_sent": 0.17021888494491577, + "loss_sod": 0.026013188064098358, + "loss_total": 0.41785135865211487, + "step": 367399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.684786319732666, + "loss_rtd": 0.20871436595916748, + "loss_sent": 0.4201463460922241, + "loss_sod": 0.020044436678290367, + "loss_total": 0.6489051580429077, + "step": 367399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.6168192625045776, + "learning_rate": 1.0664519183650078e-06, + "loss": 0.403, + "step": 367400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.3276543617248535, + "loss_rtd": 0.17410051822662354, + "loss_sent": 0.0006031687371432781, + "loss_sod": 0.07032284140586853, + "loss_total": 0.24502652883529663, + "step": 367499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.103432655334473, + "loss_rtd": 0.15331293642520905, + "loss_sent": 0.0462593249976635, + "loss_sod": 0.021818481385707855, + "loss_total": 0.2213907390832901, + "step": 367499 + }, + { + "epoch": 0.031, + "grad_norm": 0.648463785648346, + "learning_rate": 1.0599426820774083e-06, + "loss": 0.4031, + "step": 367500 + }, + { + "epoch": 0.031198, + "loss_gen": 6.198997974395752, + "loss_rtd": 0.20361800491809845, + "loss_sent": 0.07401644438505173, + "loss_sod": 0.04422963783144951, + "loss_total": 0.3218640983104706, + "step": 367599 + }, + { + "epoch": 0.031198, + "loss_gen": 6.041176795959473, + "loss_rtd": 0.19243599474430084, + "loss_sent": 0.060878489166498184, + "loss_sod": 0.04446679353713989, + "loss_total": 0.2977812886238098, + "step": 367599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.5834943056106567, + "learning_rate": 1.0534531588390351e-06, + "loss": 0.4232, + "step": 367600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.941068172454834, + "loss_rtd": 0.20858453214168549, + "loss_sent": 0.2920042872428894, + "loss_sod": 0.003702358575537801, + "loss_total": 0.5042911767959595, + "step": 367699 + }, + { + "epoch": 0.031398, + "loss_gen": 6.093193054199219, + "loss_rtd": 0.21786224842071533, + "loss_sent": 0.06440935283899307, + "loss_sod": 0.036137934774160385, + "loss_total": 0.3184095323085785, + "step": 367699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.2168036699295044, + "learning_rate": 1.0469833512638749e-06, + "loss": 0.4244, + "step": 367700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.4780402183532715, + "loss_rtd": 0.16045156121253967, + "loss_sent": 0.05479176342487335, + "loss_sod": 0.08664466440677643, + "loss_total": 0.30188798904418945, + "step": 367799 + }, + { + "epoch": 0.031598, + "loss_gen": 6.051278114318848, + "loss_rtd": 0.21020716428756714, + "loss_sent": 0.29729753732681274, + "loss_sod": 0.01826038956642151, + "loss_total": 0.525765061378479, + "step": 367799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.131919264793396, + "learning_rate": 1.040533261957971e-06, + "loss": 0.4232, + "step": 367800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.865645408630371, + "loss_rtd": 0.2175774872303009, + "loss_sent": 0.3327351212501526, + "loss_sod": 0.054611686617136, + "loss_total": 0.6049243211746216, + "step": 367899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.993206977844238, + "loss_rtd": 0.2082110494375229, + "loss_sent": 0.503560483455658, + "loss_sod": 0.03816059231758118, + "loss_total": 0.7499321103096008, + "step": 367899 + }, + { + "epoch": 0.0318, + "grad_norm": 2.405414581298828, + "learning_rate": 1.0341028935194118e-06, + "loss": 0.4136, + "step": 367900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.362062931060791, + "loss_rtd": 0.16042421758174896, + "loss_sent": 0.036109164357185364, + "loss_sod": 0.03005451150238514, + "loss_total": 0.22658789157867432, + "step": 367999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.485085487365723, + "loss_rtd": 0.21209366619586945, + "loss_sent": 0.14857341349124908, + "loss_sod": 0.0034150639548897743, + "loss_total": 0.3640821576118469, + "step": 367999 + }, + { + "epoch": 0.032, + "grad_norm": 0.7494519352912903, + "learning_rate": 1.0276922485383478e-06, + "loss": 0.4176, + "step": 368000 + }, + { + "epoch": 0.032, + "eval_loss": 0.3909725546836853, + "eval_runtime": 151.9422, + "eval_samples_per_second": 101.637, + "eval_steps_per_second": 0.796, + "step": 368000 + }, + { + "epoch": 0.032198, + "loss_gen": 5.760014533996582, + "loss_rtd": 0.22472217679023743, + "loss_sent": 0.3562431335449219, + "loss_sod": 0.008840315043926239, + "loss_total": 0.5898056030273438, + "step": 368099 + }, + { + "epoch": 0.032198, + "loss_gen": 5.884953022003174, + "loss_rtd": 0.1917876899242401, + "loss_sent": 0.18902307748794556, + "loss_sod": 0.10972850024700165, + "loss_total": 0.4905392527580261, + "step": 368099 + }, + { + "epoch": 0.0322, + "grad_norm": 1.0253633260726929, + "learning_rate": 1.0213013295969909e-06, + "loss": 0.4224, + "step": 368100 + }, + { + "epoch": 0.032398, + "loss_gen": 6.02172327041626, + "loss_rtd": 0.20438213646411896, + "loss_sent": 0.33307620882987976, + "loss_sod": 0.05011449754238129, + "loss_total": 0.5875728130340576, + "step": 368199 + }, + { + "epoch": 0.032398, + "loss_gen": 5.7057085037231445, + "loss_rtd": 0.19295965135097504, + "loss_sent": 0.3768034279346466, + "loss_sod": 0.06536341458559036, + "loss_total": 0.6351264715194702, + "step": 368199 + }, + { + "epoch": 0.0324, + "grad_norm": 2.3299827575683594, + "learning_rate": 1.0149301392696097e-06, + "loss": 0.4343, + "step": 368200 + }, + { + "epoch": 0.032598, + "loss_gen": 5.719908237457275, + "loss_rtd": 0.21271894872188568, + "loss_sent": 0.7014883160591125, + "loss_sod": 0.07076235115528107, + "loss_total": 0.9849696159362793, + "step": 368299 + }, + { + "epoch": 0.032598, + "loss_gen": 5.860711097717285, + "loss_rtd": 0.20450043678283691, + "loss_sent": 0.10516748577356339, + "loss_sod": 0.016955647617578506, + "loss_total": 0.3266235589981079, + "step": 368299 + }, + { + "epoch": 0.0326, + "grad_norm": 2.1229238510131836, + "learning_rate": 1.0085786801225016e-06, + "loss": 0.4465, + "step": 368300 + }, + { + "epoch": 0.032798, + "loss_gen": 5.92908239364624, + "loss_rtd": 0.21102026104927063, + "loss_sent": 0.14362558722496033, + "loss_sod": 0.059954918920993805, + "loss_total": 0.41460075974464417, + "step": 368399 + }, + { + "epoch": 0.032798, + "loss_gen": 5.716544151306152, + "loss_rtd": 0.22120824456214905, + "loss_sent": 0.16445207595825195, + "loss_sod": 0.028329530730843544, + "loss_total": 0.4139898419380188, + "step": 368399 + }, + { + "epoch": 0.0328, + "grad_norm": 1.179724931716919, + "learning_rate": 1.0022469547140422e-06, + "loss": 0.4182, + "step": 368400 + }, + { + "epoch": 0.032998, + "loss_gen": 6.229667663574219, + "loss_rtd": 0.23950572311878204, + "loss_sent": 0.09101808816194534, + "loss_sod": 0.017648961395025253, + "loss_total": 0.34817275404930115, + "step": 368499 + }, + { + "epoch": 0.032998, + "loss_gen": 5.549963474273682, + "loss_rtd": 0.18107330799102783, + "loss_sent": 0.014808050356805325, + "loss_sod": 0.0342152938246727, + "loss_total": 0.23009665310382843, + "step": 368499 + }, + { + "epoch": 0.033, + "grad_norm": 0.6222190260887146, + "learning_rate": 9.959349655946527e-07, + "loss": 0.4016, + "step": 368500 + }, + { + "epoch": 0.033198, + "loss_gen": 6.249142169952393, + "loss_rtd": 0.22375130653381348, + "loss_sent": 0.11936704069375992, + "loss_sod": 0.059540338814258575, + "loss_total": 0.4026586711406708, + "step": 368599 + }, + { + "epoch": 0.033198, + "loss_gen": 5.763450622558594, + "loss_rtd": 0.20569217205047607, + "loss_sent": 0.18210792541503906, + "loss_sod": 0.035910092294216156, + "loss_total": 0.4237101972103119, + "step": 368599 + }, + { + "epoch": 0.0332, + "grad_norm": 0.7430477738380432, + "learning_rate": 9.896427153068045e-07, + "loss": 0.4256, + "step": 368600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.651984691619873, + "loss_rtd": 0.23930306732654572, + "loss_sent": 0.06848891824483871, + "loss_sod": 0.030633607879281044, + "loss_total": 0.3384256064891815, + "step": 368699 + }, + { + "epoch": 0.033398, + "loss_gen": 6.198563098907471, + "loss_rtd": 0.1870102882385254, + "loss_sent": 0.3745875656604767, + "loss_sod": 0.02009415253996849, + "loss_total": 0.5816919803619385, + "step": 368699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.0569384098052979, + "learning_rate": 9.833702063850037e-07, + "loss": 0.4194, + "step": 368700 + }, + { + "epoch": 0.033598, + "loss_gen": 5.391874313354492, + "loss_rtd": 0.16648748517036438, + "loss_sent": 0.0024607256054878235, + "loss_sod": 0.042681336402893066, + "loss_total": 0.21162953972816467, + "step": 368799 + }, + { + "epoch": 0.033598, + "loss_gen": 5.786269187927246, + "loss_rtd": 0.18479004502296448, + "loss_sent": 0.1572154462337494, + "loss_sod": 0.004926848225295544, + "loss_total": 0.3469323515892029, + "step": 368799 + }, + { + "epoch": 0.0336, + "grad_norm": 0.5583371520042419, + "learning_rate": 9.771174413558182e-07, + "loss": 0.4187, + "step": 368800 + }, + { + "epoch": 0.033798, + "loss_gen": 5.921062469482422, + "loss_rtd": 0.2207319587469101, + "loss_sent": 0.04883941635489464, + "loss_sod": 0.06598001718521118, + "loss_total": 0.3355514109134674, + "step": 368899 + }, + { + "epoch": 0.033798, + "loss_gen": 5.296463966369629, + "loss_rtd": 0.18052725493907928, + "loss_sent": 2.27016607823316e-05, + "loss_sod": 0.057295117527246475, + "loss_total": 0.2378450632095337, + "step": 368899 + }, + { + "epoch": 0.0338, + "grad_norm": 0.76303631067276, + "learning_rate": 9.708844227378666e-07, + "loss": 0.4373, + "step": 368900 + }, + { + "epoch": 0.033998, + "loss_gen": 5.713212966918945, + "loss_rtd": 0.20468173921108246, + "loss_sent": 0.21971869468688965, + "loss_sod": 0.050458066165447235, + "loss_total": 0.47485849261283875, + "step": 368999 + }, + { + "epoch": 0.033998, + "loss_gen": 5.200697422027588, + "loss_rtd": 0.15441444516181946, + "loss_sent": 0.010356126353144646, + "loss_sod": 0.09508315473794937, + "loss_total": 0.259853720664978, + "step": 368999 + }, + { + "epoch": 0.034, + "grad_norm": 1.424156665802002, + "learning_rate": 9.646711530418129e-07, + "loss": 0.4213, + "step": 369000 + }, + { + "epoch": 0.034, + "eval_loss": 0.39443323016166687, + "eval_runtime": 150.452, + "eval_samples_per_second": 102.644, + "eval_steps_per_second": 0.804, + "step": 369000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.88716983795166, + "loss_rtd": 0.2215983122587204, + "loss_sent": 0.4346535801887512, + "loss_sod": 0.0489102266728878, + "loss_total": 0.7051621079444885, + "step": 369099 + }, + { + "epoch": 0.000198, + "loss_gen": 6.08715295791626, + "loss_rtd": 0.19842500984668732, + "loss_sent": 0.047167420387268066, + "loss_sod": 0.2069673240184784, + "loss_total": 0.45255976915359497, + "step": 369099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.6441986560821533, + "learning_rate": 9.584776347703496e-07, + "loss": 0.4048, + "step": 369100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.984760284423828, + "loss_rtd": 0.19478578865528107, + "loss_sent": 0.0894625335931778, + "loss_sod": 0.04968101903796196, + "loss_total": 0.3339293599128723, + "step": 369199 + }, + { + "epoch": 0.000398, + "loss_gen": 6.103747844696045, + "loss_rtd": 0.20013120770454407, + "loss_sent": 0.1266428381204605, + "loss_sod": 0.07164319604635239, + "loss_total": 0.39841723442077637, + "step": 369199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.8013847470283508, + "learning_rate": 9.523038704182308e-07, + "loss": 0.4229, + "step": 369200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.663417816162109, + "loss_rtd": 0.18251462280750275, + "loss_sent": 0.17311504483222961, + "loss_sod": 0.016081828624010086, + "loss_total": 0.37171149253845215, + "step": 369299 + }, + { + "epoch": 0.000598, + "loss_gen": 6.117463111877441, + "loss_rtd": 0.20723600685596466, + "loss_sent": 0.2576310634613037, + "loss_sod": 0.044517409056425095, + "loss_total": 0.5093845129013062, + "step": 369299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.0062775611877441, + "learning_rate": 9.461498624722509e-07, + "loss": 0.4167, + "step": 369300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.934455871582031, + "loss_rtd": 0.21126939356327057, + "loss_sent": 0.16447478532791138, + "loss_sod": 0.009031460620462894, + "loss_total": 0.38477563858032227, + "step": 369399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.673975467681885, + "loss_rtd": 0.18478532135486603, + "loss_sent": 0.13288885354995728, + "loss_sod": 0.021034542471170425, + "loss_total": 0.33870869874954224, + "step": 369399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.9572023153305054, + "learning_rate": 9.400156134112326e-07, + "loss": 0.4103, + "step": 369400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.870675563812256, + "loss_rtd": 0.20630477368831635, + "loss_sent": 0.1055145338177681, + "loss_sod": 0.024161716923117638, + "loss_total": 0.33598101139068604, + "step": 369499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.971395492553711, + "loss_rtd": 0.22829517722129822, + "loss_sent": 0.15899527072906494, + "loss_sod": 0.013825427740812302, + "loss_total": 0.40111589431762695, + "step": 369499 + }, + { + "epoch": 0.001, + "grad_norm": 0.7026100754737854, + "learning_rate": 9.339011257060603e-07, + "loss": 0.425, + "step": 369500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.483034610748291, + "loss_rtd": 0.2005041539669037, + "loss_sent": 0.024152904748916626, + "loss_sod": 0.008470947854220867, + "loss_total": 0.23312801122665405, + "step": 369599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.93954610824585, + "loss_rtd": 0.1927112489938736, + "loss_sent": 0.08015337586402893, + "loss_sod": 0.037822175770998, + "loss_total": 0.31068679690361023, + "step": 369599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.759127676486969, + "learning_rate": 9.278064018196475e-07, + "loss": 0.4097, + "step": 369600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.919787406921387, + "loss_rtd": 0.2028111219406128, + "loss_sent": 0.14415960013866425, + "loss_sod": 0.004172762390226126, + "loss_total": 0.351143479347229, + "step": 369699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.837738037109375, + "loss_rtd": 0.2000478059053421, + "loss_sent": 0.2426430881023407, + "loss_sod": 0.046445779502391815, + "loss_total": 0.489136666059494, + "step": 369699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.3490941524505615, + "learning_rate": 9.217314442069524e-07, + "loss": 0.4247, + "step": 369700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.895453453063965, + "loss_rtd": 0.17332077026367188, + "loss_sent": 0.03327735885977745, + "loss_sod": 0.11678000539541245, + "loss_total": 0.32337814569473267, + "step": 369799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.440080165863037, + "loss_rtd": 0.2166971117258072, + "loss_sent": 0.033422715961933136, + "loss_sod": 0.023754417896270752, + "loss_total": 0.2738742530345917, + "step": 369799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.8282711505889893, + "learning_rate": 9.156762553149567e-07, + "loss": 0.4147, + "step": 369800 + }, + { + "epoch": 0.001798, + "loss_gen": 6.211438179016113, + "loss_rtd": 0.19635449349880219, + "loss_sent": 0.2437390685081482, + "loss_sod": 0.06331045925617218, + "loss_total": 0.5034040212631226, + "step": 369899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.734776973724365, + "loss_rtd": 0.23625712096691132, + "loss_sent": 0.2243865430355072, + "loss_sod": 0.09715672582387924, + "loss_total": 0.5578004121780396, + "step": 369899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.4983323812484741, + "learning_rate": 9.096408375826982e-07, + "loss": 0.4345, + "step": 369900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.560866832733154, + "loss_rtd": 0.1789192110300064, + "loss_sent": 0.02297811023890972, + "loss_sod": 0.07768049836158752, + "loss_total": 0.2795778214931488, + "step": 369999 + }, + { + "epoch": 0.001998, + "loss_gen": 6.039785861968994, + "loss_rtd": 0.22173763811588287, + "loss_sent": 0.22057120501995087, + "loss_sod": 0.010319402441382408, + "loss_total": 0.4526282548904419, + "step": 369999 + }, + { + "epoch": 0.002, + "grad_norm": 0.9138365983963013, + "learning_rate": 9.036251934412376e-07, + "loss": 0.4373, + "step": 370000 + }, + { + "epoch": 0.002, + "eval_loss": 0.3974721431732178, + "eval_runtime": 152.2824, + "eval_samples_per_second": 101.41, + "eval_steps_per_second": 0.795, + "step": 370000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.77457332611084, + "loss_rtd": 0.1922367513179779, + "loss_sent": 0.04758775234222412, + "loss_sod": 0.07165735960006714, + "loss_total": 0.31148186326026917, + "step": 370099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.326714515686035, + "loss_rtd": 0.1894858479499817, + "loss_sent": 2.274636244692374e-05, + "loss_sod": 0.08740726113319397, + "loss_total": 0.27691584825515747, + "step": 370099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.0516712665557861, + "learning_rate": 8.97629325313687e-07, + "loss": 0.4203, + "step": 370100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.750786781311035, + "loss_rtd": 0.1920003443956375, + "loss_sent": 0.16336694359779358, + "loss_sod": 0.049515023827552795, + "loss_total": 0.4048823118209839, + "step": 370199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.883330345153809, + "loss_rtd": 0.19946648180484772, + "loss_sent": 0.4097523093223572, + "loss_sod": 0.006896537728607655, + "loss_total": 0.6161153316497803, + "step": 370199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.3667125701904297, + "learning_rate": 8.916532356151586e-07, + "loss": 0.4088, + "step": 370200 + }, + { + "epoch": 0.002598, + "loss_gen": 6.184383869171143, + "loss_rtd": 0.19715061783790588, + "loss_sent": 0.2926498055458069, + "loss_sod": 0.026086285710334778, + "loss_total": 0.5158867239952087, + "step": 370299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.670344829559326, + "loss_rtd": 0.16458524763584137, + "loss_sent": 0.04784877970814705, + "loss_sod": 0.0956805944442749, + "loss_total": 0.308114618062973, + "step": 370299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.749548614025116, + "learning_rate": 8.856969267528436e-07, + "loss": 0.4216, + "step": 370300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.911998271942139, + "loss_rtd": 0.23035354912281036, + "loss_sent": 0.1706063449382782, + "loss_sod": 0.11654205620288849, + "loss_total": 0.517501950263977, + "step": 370399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.9849724769592285, + "loss_rtd": 0.22994734346866608, + "loss_sent": 0.14057643711566925, + "loss_sod": 0.014609819278120995, + "loss_total": 0.3851335942745209, + "step": 370399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.3703480958938599, + "learning_rate": 8.797604011259287e-07, + "loss": 0.423, + "step": 370400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.987913131713867, + "loss_rtd": 0.21612316370010376, + "loss_sent": 0.3199959397315979, + "loss_sod": 0.03054291009902954, + "loss_total": 0.5666620135307312, + "step": 370499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.7482171058654785, + "loss_rtd": 0.21483327448368073, + "loss_sent": 0.31899651885032654, + "loss_sod": 0.01689985767006874, + "loss_total": 0.5507296323776245, + "step": 370499 + }, + { + "epoch": 0.003, + "grad_norm": 1.6967475414276123, + "learning_rate": 8.738436611256507e-07, + "loss": 0.4164, + "step": 370500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.9801201820373535, + "loss_rtd": 0.21836118400096893, + "loss_sent": 0.19397112727165222, + "loss_sod": 0.05779365077614784, + "loss_total": 0.4701259732246399, + "step": 370599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.831068992614746, + "loss_rtd": 0.20626583695411682, + "loss_sent": 0.0638139620423317, + "loss_sod": 0.0216029305011034, + "loss_total": 0.29168272018432617, + "step": 370599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9187771081924438, + "learning_rate": 8.6794670913527e-07, + "loss": 0.4056, + "step": 370600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.984137535095215, + "loss_rtd": 0.20997627079486847, + "loss_sent": 0.15081514418125153, + "loss_sod": 0.04709908366203308, + "loss_total": 0.4078904986381531, + "step": 370699 + }, + { + "epoch": 0.003398, + "loss_gen": 6.200585842132568, + "loss_rtd": 0.22599759697914124, + "loss_sent": 0.1308494210243225, + "loss_sod": 0.17154893279075623, + "loss_total": 0.52839595079422, + "step": 370699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.126378059387207, + "learning_rate": 8.620695475300811e-07, + "loss": 0.4141, + "step": 370700 + }, + { + "epoch": 0.003598, + "loss_gen": 6.237166404724121, + "loss_rtd": 0.20681335031986237, + "loss_sent": 0.1380896270275116, + "loss_sod": 0.033758148550987244, + "loss_total": 0.3786611258983612, + "step": 370799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.950350761413574, + "loss_rtd": 0.1995602548122406, + "loss_sent": 0.12774446606636047, + "loss_sod": 0.02221403457224369, + "loss_total": 0.3495187759399414, + "step": 370799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.5069965720176697, + "learning_rate": 8.562121786774013e-07, + "loss": 0.4106, + "step": 370800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.380465984344482, + "loss_rtd": 0.18398678302764893, + "loss_sent": 0.022619973868131638, + "loss_sod": 0.06271077692508698, + "loss_total": 0.26931753754615784, + "step": 370899 + }, + { + "epoch": 0.003798, + "loss_gen": 6.374415397644043, + "loss_rtd": 0.22577594220638275, + "loss_sent": 0.08491288870573044, + "loss_sod": 0.16143465042114258, + "loss_total": 0.47212350368499756, + "step": 370899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.1972861289978027, + "learning_rate": 8.503746049365824e-07, + "loss": 0.4142, + "step": 370900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.563924312591553, + "loss_rtd": 0.19831405580043793, + "loss_sent": 0.08573966473340988, + "loss_sod": 0.01143189799040556, + "loss_total": 0.29548561573028564, + "step": 370999 + }, + { + "epoch": 0.003998, + "loss_gen": 6.10603141784668, + "loss_rtd": 0.2115442156791687, + "loss_sent": 0.25086724758148193, + "loss_sod": 0.04794318601489067, + "loss_total": 0.5103546380996704, + "step": 370999 + }, + { + "epoch": 0.004, + "grad_norm": 0.7185167074203491, + "learning_rate": 8.445568286589877e-07, + "loss": 0.4364, + "step": 371000 + }, + { + "epoch": 0.004, + "eval_loss": 0.398637056350708, + "eval_runtime": 150.0122, + "eval_samples_per_second": 102.945, + "eval_steps_per_second": 0.807, + "step": 371000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.839559555053711, + "loss_rtd": 0.2218700647354126, + "loss_sent": 0.359031081199646, + "loss_sod": 0.010356377810239792, + "loss_total": 0.5912575125694275, + "step": 371099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.853979110717773, + "loss_rtd": 0.19204586744308472, + "loss_sent": 0.2216770350933075, + "loss_sod": 0.0032190692145377398, + "loss_total": 0.41694197058677673, + "step": 371099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2039587497711182, + "learning_rate": 8.387588521880263e-07, + "loss": 0.423, + "step": 371100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.858482837677002, + "loss_rtd": 0.20762097835540771, + "loss_sent": 0.07991106808185577, + "loss_sod": 0.031615227460861206, + "loss_total": 0.3191472887992859, + "step": 371199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.80542516708374, + "loss_rtd": 0.21335211396217346, + "loss_sent": 0.20138762891292572, + "loss_sod": 0.00400786055251956, + "loss_total": 0.41874760389328003, + "step": 371199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.05449640750885, + "learning_rate": 8.329806778591299e-07, + "loss": 0.4141, + "step": 371200 + }, + { + "epoch": 0.004598, + "loss_gen": 6.135004043579102, + "loss_rtd": 0.22044090926647186, + "loss_sent": 0.8127871751785278, + "loss_sod": 0.02085859142243862, + "loss_total": 1.054086685180664, + "step": 371299 + }, + { + "epoch": 0.004598, + "loss_gen": 6.0553388595581055, + "loss_rtd": 0.20307990908622742, + "loss_sent": 0.2669093906879425, + "loss_sod": 0.016738833859562874, + "loss_total": 0.48672813177108765, + "step": 371299 + }, + { + "epoch": 0.0046, + "grad_norm": 2.1555721759796143, + "learning_rate": 8.272223079997255e-07, + "loss": 0.4306, + "step": 371300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.8579535484313965, + "loss_rtd": 0.21771731972694397, + "loss_sent": 0.04809056967496872, + "loss_sod": 0.09572092443704605, + "loss_total": 0.36152881383895874, + "step": 371399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.682850360870361, + "loss_rtd": 0.17702673375606537, + "loss_sent": 0.06961818784475327, + "loss_sod": 0.02396053448319435, + "loss_total": 0.2706054449081421, + "step": 371399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.9025712609291077, + "learning_rate": 8.214837449292967e-07, + "loss": 0.41, + "step": 371400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.280576705932617, + "loss_rtd": 0.14410962164402008, + "loss_sent": 0.003438427811488509, + "loss_sod": 0.034187521785497665, + "loss_total": 0.1817355751991272, + "step": 371499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.555894374847412, + "loss_rtd": 0.17416097223758698, + "loss_sent": 0.02043677493929863, + "loss_sod": 0.01033407635986805, + "loss_total": 0.2049318253993988, + "step": 371499 + }, + { + "epoch": 0.005, + "grad_norm": 0.6194052696228027, + "learning_rate": 8.157649909593335e-07, + "loss": 0.3958, + "step": 371500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.490361213684082, + "loss_rtd": 0.18433135747909546, + "loss_sent": 0.017026018351316452, + "loss_sod": 0.021255729719996452, + "loss_total": 0.2226131111383438, + "step": 371599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.757503032684326, + "loss_rtd": 0.19877558946609497, + "loss_sent": 0.2536308467388153, + "loss_sod": 0.006711670663207769, + "loss_total": 0.459118127822876, + "step": 371599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.7384194731712341, + "learning_rate": 8.100660483933542e-07, + "loss": 0.4202, + "step": 371600 + }, + { + "epoch": 0.005398, + "loss_gen": 6.322305202484131, + "loss_rtd": 0.2076469212770462, + "loss_sent": 0.07007652521133423, + "loss_sod": 0.04098549485206604, + "loss_total": 0.31870895624160767, + "step": 371699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.4590959548950195, + "loss_rtd": 0.1987762302160263, + "loss_sent": 2.4607086743344553e-05, + "loss_sod": 0.19191618263721466, + "loss_total": 0.3907170295715332, + "step": 371699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.9139410257339478, + "learning_rate": 8.043869195268894e-07, + "loss": 0.4048, + "step": 371700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.7678728103637695, + "loss_rtd": 0.2182355672121048, + "loss_sent": 0.16884374618530273, + "loss_sod": 0.005624018609523773, + "loss_total": 0.3927033245563507, + "step": 371799 + }, + { + "epoch": 0.005598, + "loss_gen": 6.226258277893066, + "loss_rtd": 0.2252960205078125, + "loss_sent": 0.19111286103725433, + "loss_sod": 0.038502730429172516, + "loss_total": 0.45491158962249756, + "step": 371799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.6736849546432495, + "learning_rate": 7.987276066474869e-07, + "loss": 0.4287, + "step": 371800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.072695255279541, + "loss_rtd": 0.15994885563850403, + "loss_sent": 2.2642059775535017e-05, + "loss_sod": 0.13835397362709045, + "loss_total": 0.29832547903060913, + "step": 371899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.742897033691406, + "loss_rtd": 0.19277235865592957, + "loss_sent": 0.07018637657165527, + "loss_sod": 0.018538443371653557, + "loss_total": 0.28149718046188354, + "step": 371899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.0694468021392822, + "learning_rate": 7.930881120347178e-07, + "loss": 0.4143, + "step": 371900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.143819808959961, + "loss_rtd": 0.14488475024700165, + "loss_sent": 0.06838599592447281, + "loss_sod": 0.03661201149225235, + "loss_total": 0.2498827576637268, + "step": 371999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.919182777404785, + "loss_rtd": 0.23770809173583984, + "loss_sent": 0.13715243339538574, + "loss_sod": 0.03412795066833496, + "loss_total": 0.40898847579956055, + "step": 371999 + }, + { + "epoch": 0.006, + "grad_norm": 0.8738687038421631, + "learning_rate": 7.874684379601759e-07, + "loss": 0.4084, + "step": 372000 + }, + { + "epoch": 0.006, + "eval_loss": 0.39766815304756165, + "eval_runtime": 150.0059, + "eval_samples_per_second": 102.949, + "eval_steps_per_second": 0.807, + "step": 372000 + }, + { + "epoch": 0.006198, + "loss_gen": 6.289210796356201, + "loss_rtd": 0.23419596254825592, + "loss_sent": 0.1906905472278595, + "loss_sod": 0.11558223515748978, + "loss_total": 0.5404687523841858, + "step": 372099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.580062389373779, + "loss_rtd": 0.2032138556241989, + "loss_sent": 0.09807055443525314, + "loss_sod": 0.04224370792508125, + "loss_total": 0.3435281217098236, + "step": 372099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.2346962690353394, + "learning_rate": 7.818685866874676e-07, + "loss": 0.4101, + "step": 372100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.849940776824951, + "loss_rtd": 0.20512159168720245, + "loss_sent": 0.20169490575790405, + "loss_sod": 0.04118049889802933, + "loss_total": 0.44799700379371643, + "step": 372199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.919855117797852, + "loss_rtd": 0.2348971962928772, + "loss_sent": 0.0944678857922554, + "loss_sod": 0.06624012440443039, + "loss_total": 0.395605206489563, + "step": 372199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.6020902395248413, + "learning_rate": 7.762885604721993e-07, + "loss": 0.4346, + "step": 372200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.477977275848389, + "loss_rtd": 0.17749889194965363, + "loss_sent": 0.09746012836694717, + "loss_sod": 0.017157068476080894, + "loss_total": 0.29211607575416565, + "step": 372299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.660252094268799, + "loss_rtd": 0.20052680373191833, + "loss_sent": 0.0274093858897686, + "loss_sod": 0.051001738756895065, + "loss_total": 0.2789379060268402, + "step": 372299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.9118999242782593, + "learning_rate": 7.70728361562012e-07, + "loss": 0.4052, + "step": 372300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.358914375305176, + "loss_rtd": 0.16730189323425293, + "loss_sent": 0.020632697269320488, + "loss_sod": 0.11295046657323837, + "loss_total": 0.30088505148887634, + "step": 372399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.22660493850708, + "loss_rtd": 0.17281118035316467, + "loss_sent": 2.5519548216834664e-05, + "loss_sod": 0.16093236207962036, + "loss_total": 0.3337690532207489, + "step": 372399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.1899315118789673, + "learning_rate": 7.651879921965588e-07, + "loss": 0.4135, + "step": 372400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.4811506271362305, + "loss_rtd": 0.17571194469928741, + "loss_sent": 0.09175330400466919, + "loss_sod": 0.021479532122612, + "loss_total": 0.2889447808265686, + "step": 372499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.855925559997559, + "loss_rtd": 0.20085661113262177, + "loss_sent": 0.22879676520824432, + "loss_sod": 0.027214793488383293, + "loss_total": 0.45686817169189453, + "step": 372499 + }, + { + "epoch": 0.007, + "grad_norm": 1.073201060295105, + "learning_rate": 7.596674546074878e-07, + "loss": 0.4049, + "step": 372500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.647804260253906, + "loss_rtd": 0.17650999128818512, + "loss_sent": 0.03875567764043808, + "loss_sod": 0.05519890785217285, + "loss_total": 0.27046456933021545, + "step": 372599 + }, + { + "epoch": 0.007198, + "loss_gen": 6.038423538208008, + "loss_rtd": 0.19966991245746613, + "loss_sent": 0.13861516118049622, + "loss_sod": 0.014357440173625946, + "loss_total": 0.3526425063610077, + "step": 372599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.722663402557373, + "learning_rate": 7.541667510184813e-07, + "loss": 0.4187, + "step": 372600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.560591697692871, + "loss_rtd": 0.19094672799110413, + "loss_sent": 0.0033574746921658516, + "loss_sod": 0.11088939011096954, + "loss_total": 0.30519360303878784, + "step": 372699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.180716514587402, + "loss_rtd": 0.1749231368303299, + "loss_sent": 2.4763548935879953e-05, + "loss_sod": 0.2965400516986847, + "loss_total": 0.47148796916007996, + "step": 372699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.639691948890686, + "learning_rate": 7.48685883645217e-07, + "loss": 0.4133, + "step": 372700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.789942264556885, + "loss_rtd": 0.199631467461586, + "loss_sent": 0.2081708163022995, + "loss_sod": 0.04268084093928337, + "loss_total": 0.45048314332962036, + "step": 372799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.569168567657471, + "loss_rtd": 0.1726132482290268, + "loss_sent": 0.018431365489959717, + "loss_sod": 0.08531937748193741, + "loss_total": 0.2763639986515045, + "step": 372799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.9298040270805359, + "learning_rate": 7.432248546953902e-07, + "loss": 0.41, + "step": 372800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.711914539337158, + "loss_rtd": 0.2136341780424118, + "loss_sent": 0.10143914073705673, + "loss_sod": 0.061825983226299286, + "loss_total": 0.3768993020057678, + "step": 372899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.920680046081543, + "loss_rtd": 0.21260219812393188, + "loss_sent": 0.07651954144239426, + "loss_sod": 0.027535876259207726, + "loss_total": 0.3166576027870178, + "step": 372899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.8316558599472046, + "learning_rate": 7.377836663687021e-07, + "loss": 0.4021, + "step": 372900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.0966668128967285, + "loss_rtd": 0.16073346138000488, + "loss_sent": 2.7389023671275936e-05, + "loss_sod": 0.02465459704399109, + "loss_total": 0.18541544675827026, + "step": 372999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.805216312408447, + "loss_rtd": 0.2233431339263916, + "loss_sent": 0.22811299562454224, + "loss_sod": 0.05292152613401413, + "loss_total": 0.5043776631355286, + "step": 372999 + }, + { + "epoch": 0.008, + "grad_norm": 0.739676296710968, + "learning_rate": 7.323623208568608e-07, + "loss": 0.403, + "step": 373000 + }, + { + "epoch": 0.008, + "eval_loss": 0.3985455334186554, + "eval_runtime": 150.3411, + "eval_samples_per_second": 102.72, + "eval_steps_per_second": 0.805, + "step": 373000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.874575138092041, + "loss_rtd": 0.21611829102039337, + "loss_sent": 0.298623263835907, + "loss_sod": 0.04416034370660782, + "loss_total": 0.5589019060134888, + "step": 373099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.559823989868164, + "loss_rtd": 0.2269548773765564, + "loss_sent": 0.4483194649219513, + "loss_sod": 0.03525351360440254, + "loss_total": 0.7105278968811035, + "step": 373099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.5289615392684937, + "learning_rate": 7.269608203435807e-07, + "loss": 0.4188, + "step": 373100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.941773414611816, + "loss_rtd": 0.19337964057922363, + "loss_sent": 0.031218715012073517, + "loss_sod": 0.1129196435213089, + "loss_total": 0.33751797676086426, + "step": 373199 + }, + { + "epoch": 0.008398, + "loss_gen": 6.265268802642822, + "loss_rtd": 0.2057289332151413, + "loss_sent": 0.10668730735778809, + "loss_sod": 0.028574064373970032, + "loss_total": 0.3409903049468994, + "step": 373199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.8747782707214355, + "learning_rate": 7.21579167004599e-07, + "loss": 0.4255, + "step": 373200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.486286640167236, + "loss_rtd": 0.17256313562393188, + "loss_sent": 0.029097484424710274, + "loss_sod": 0.08437571674585342, + "loss_total": 0.28603634238243103, + "step": 373299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.839051723480225, + "loss_rtd": 0.19659079611301422, + "loss_sent": 0.060338344424963, + "loss_sod": 0.053871020674705505, + "loss_total": 0.310800164937973, + "step": 373299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.8155134916305542, + "learning_rate": 7.162173630076263e-07, + "loss": 0.405, + "step": 373300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.77953577041626, + "loss_rtd": 0.21442870795726776, + "loss_sent": 0.28438112139701843, + "loss_sod": 0.023305360227823257, + "loss_total": 0.522115170955658, + "step": 373399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.832570552825928, + "loss_rtd": 0.19995197653770447, + "loss_sent": 0.3022240698337555, + "loss_sod": 0.014634872786700726, + "loss_total": 0.5168108940124512, + "step": 373399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.0309109687805176, + "learning_rate": 7.108754105124127e-07, + "loss": 0.3979, + "step": 373400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.288453102111816, + "loss_rtd": 0.16407644748687744, + "loss_sent": 5.9396268625278026e-05, + "loss_sod": 0.02640312723815441, + "loss_total": 0.19053897261619568, + "step": 373499 + }, + { + "epoch": 0.008998, + "loss_gen": 6.096435546875, + "loss_rtd": 0.1909773051738739, + "loss_sent": 0.23970675468444824, + "loss_sod": 0.008967695757746696, + "loss_total": 0.439651757478714, + "step": 373499 + }, + { + "epoch": 0.009, + "grad_norm": 0.7173610329627991, + "learning_rate": 7.055533116706869e-07, + "loss": 0.414, + "step": 373500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.57069206237793, + "loss_rtd": 0.17872034013271332, + "loss_sent": 0.11318963766098022, + "loss_sod": 0.0892697349190712, + "loss_total": 0.38117972016334534, + "step": 373599 + }, + { + "epoch": 0.009198, + "loss_gen": 6.252004146575928, + "loss_rtd": 0.19231130182743073, + "loss_sent": 0.12317709624767303, + "loss_sod": 0.09741857647895813, + "loss_total": 0.4129070043563843, + "step": 373599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.621515154838562, + "learning_rate": 7.002510686261898e-07, + "loss": 0.4194, + "step": 373600 + }, + { + "epoch": 0.009398, + "loss_gen": 6.404895305633545, + "loss_rtd": 0.2336234748363495, + "loss_sent": 0.0452134944498539, + "loss_sod": 0.04235793277621269, + "loss_total": 0.3211948871612549, + "step": 373699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.9691290855407715, + "loss_rtd": 0.2142149955034256, + "loss_sent": 0.3562409579753876, + "loss_sod": 0.016476528719067574, + "loss_total": 0.5869324803352356, + "step": 373699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.2180225849151611, + "learning_rate": 6.949686835146685e-07, + "loss": 0.4128, + "step": 373700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.945064067840576, + "loss_rtd": 0.20639783143997192, + "loss_sent": 0.33897995948791504, + "loss_sod": 0.1643020063638687, + "loss_total": 0.7096797823905945, + "step": 373799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.601099491119385, + "loss_rtd": 0.20600178837776184, + "loss_sent": 0.1476443111896515, + "loss_sod": 0.05563286691904068, + "loss_total": 0.4092789590358734, + "step": 373799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.3141870498657227, + "learning_rate": 6.897061584638654e-07, + "loss": 0.4031, + "step": 373800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.614050388336182, + "loss_rtd": 0.17569977045059204, + "loss_sent": 0.05042315647006035, + "loss_sod": 0.03975456953048706, + "loss_total": 0.26587748527526855, + "step": 373899 + }, + { + "epoch": 0.009798, + "loss_gen": 6.103392601013184, + "loss_rtd": 0.2152901589870453, + "loss_sent": 0.15932482481002808, + "loss_sod": 0.014758851379156113, + "loss_total": 0.3893738389015198, + "step": 373899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.6234253644943237, + "learning_rate": 6.844634955935292e-07, + "loss": 0.4201, + "step": 373900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.901730060577393, + "loss_rtd": 0.19243325293064117, + "loss_sent": 0.26281651854515076, + "loss_sod": 0.07989338785409927, + "loss_total": 0.5351431369781494, + "step": 373999 + }, + { + "epoch": 0.009998, + "loss_gen": 6.170499324798584, + "loss_rtd": 0.20321491360664368, + "loss_sent": 0.08104973286390305, + "loss_sod": 0.07737652212381363, + "loss_total": 0.36164116859436035, + "step": 373999 + }, + { + "epoch": 0.01, + "grad_norm": 1.4586005210876465, + "learning_rate": 6.79240697015393e-07, + "loss": 0.4122, + "step": 374000 + }, + { + "epoch": 0.01, + "eval_loss": 0.39439496397972107, + "eval_runtime": 149.9884, + "eval_samples_per_second": 102.961, + "eval_steps_per_second": 0.807, + "step": 374000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.722848892211914, + "loss_rtd": 0.21253347396850586, + "loss_sent": 0.41837382316589355, + "loss_sod": 0.0029890353325754404, + "loss_total": 0.6338963508605957, + "step": 374099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.7291340827941895, + "loss_rtd": 0.20271934568881989, + "loss_sent": 0.08784540742635727, + "loss_sod": 0.011821900494396687, + "loss_total": 0.30238664150238037, + "step": 374099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.226920247077942, + "learning_rate": 6.740377648332075e-07, + "loss": 0.395, + "step": 374100 + }, + { + "epoch": 0.010398, + "loss_gen": 6.0751566886901855, + "loss_rtd": 0.2085796594619751, + "loss_sent": 0.1787007749080658, + "loss_sod": 0.021359167993068695, + "loss_total": 0.4086396098136902, + "step": 374199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.632849216461182, + "loss_rtd": 0.22320057451725006, + "loss_sent": 0.3246040940284729, + "loss_sod": 0.0007886893581598997, + "loss_total": 0.5485933423042297, + "step": 374199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.323475956916809, + "learning_rate": 6.688547011427126e-07, + "loss": 0.4088, + "step": 374200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.7162184715271, + "loss_rtd": 0.21202796697616577, + "loss_sent": 0.22131867706775665, + "loss_sod": 0.03446045145392418, + "loss_total": 0.4678071141242981, + "step": 374299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.85204553604126, + "loss_rtd": 0.2098681926727295, + "loss_sent": 0.3486562669277191, + "loss_sod": 0.011006612330675125, + "loss_total": 0.5695310831069946, + "step": 374299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.8341209888458252, + "learning_rate": 6.636915080316442e-07, + "loss": 0.4208, + "step": 374300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.68907356262207, + "loss_rtd": 0.20671889185905457, + "loss_sent": 0.2256435751914978, + "loss_sod": 0.0048705353401601315, + "loss_total": 0.43723300099372864, + "step": 374399 + }, + { + "epoch": 0.010798, + "loss_gen": 6.107625961303711, + "loss_rtd": 0.20792579650878906, + "loss_sent": 0.6907059550285339, + "loss_sod": 0.028999656438827515, + "loss_total": 0.9276313781738281, + "step": 374399 + }, + { + "epoch": 0.0108, + "grad_norm": 4.132088661193848, + "learning_rate": 6.585481875797384e-07, + "loss": 0.419, + "step": 374400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.786088943481445, + "loss_rtd": 0.21682004630565643, + "loss_sent": 0.2618052065372467, + "loss_sod": 0.007072009611874819, + "loss_total": 0.48569726943969727, + "step": 374499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.653449535369873, + "loss_rtd": 0.20159128308296204, + "loss_sent": 0.18401367962360382, + "loss_sod": 0.09294983744621277, + "loss_total": 0.47855478525161743, + "step": 374499 + }, + { + "epoch": 0.011, + "grad_norm": 1.2994540929794312, + "learning_rate": 6.534247418587158e-07, + "loss": 0.3971, + "step": 374500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.844245433807373, + "loss_rtd": 0.21887655556201935, + "loss_sent": 0.1843392550945282, + "loss_sod": 0.056441012769937515, + "loss_total": 0.45965683460235596, + "step": 374599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.890125274658203, + "loss_rtd": 0.18784134089946747, + "loss_sent": 0.2701111137866974, + "loss_sod": 0.04189835861325264, + "loss_total": 0.4998508095741272, + "step": 374599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.931838870048523, + "learning_rate": 6.483211729323091e-07, + "loss": 0.4132, + "step": 374600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.651385307312012, + "loss_rtd": 0.17697350680828094, + "loss_sent": 0.004192035179585218, + "loss_sod": 0.06596556305885315, + "loss_total": 0.2471311092376709, + "step": 374699 + }, + { + "epoch": 0.011398, + "loss_gen": 6.000596046447754, + "loss_rtd": 0.2265426516532898, + "loss_sent": 0.17024017870426178, + "loss_sod": 0.06462591886520386, + "loss_total": 0.46140873432159424, + "step": 374699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.8797689080238342, + "learning_rate": 6.432374828562405e-07, + "loss": 0.3966, + "step": 374700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.8981475830078125, + "loss_rtd": 0.21112960577011108, + "loss_sent": 0.09434548765420914, + "loss_sod": 0.056651681661605835, + "loss_total": 0.36212676763534546, + "step": 374799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.834134578704834, + "loss_rtd": 0.20715786516666412, + "loss_sent": 0.2500268816947937, + "loss_sod": 0.012542951852083206, + "loss_total": 0.46972769498825073, + "step": 374799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.8593697547912598, + "learning_rate": 6.381736736781996e-07, + "loss": 0.4295, + "step": 374800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.788888454437256, + "loss_rtd": 0.19422529637813568, + "loss_sent": 0.10145033150911331, + "loss_sod": 0.031490758061409, + "loss_total": 0.3271663784980774, + "step": 374899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.987154960632324, + "loss_rtd": 0.210292786359787, + "loss_sent": 0.19489161670207977, + "loss_sod": 0.0206521637737751, + "loss_total": 0.42583656311035156, + "step": 374899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.7297804951667786, + "learning_rate": 6.33129747437905e-07, + "loss": 0.4093, + "step": 374900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.484149932861328, + "loss_rtd": 0.18677547574043274, + "loss_sent": 0.0006474746041931212, + "loss_sod": 0.1446777582168579, + "loss_total": 0.3321007192134857, + "step": 374999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.319724082946777, + "loss_rtd": 0.16887767612934113, + "loss_sent": 0.06040981039404869, + "loss_sod": 0.04936861991882324, + "loss_total": 0.27865609526634216, + "step": 374999 + }, + { + "epoch": 0.012, + "grad_norm": 1.015921950340271, + "learning_rate": 6.281057061670425e-07, + "loss": 0.4053, + "step": 375000 + }, + { + "epoch": 0.012, + "eval_loss": 0.39766398072242737, + "eval_runtime": 150.2669, + "eval_samples_per_second": 102.77, + "eval_steps_per_second": 0.805, + "step": 375000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.15659236907959, + "loss_rtd": 0.21070319414138794, + "loss_sent": 0.0696774274110794, + "loss_sod": 0.05396242439746857, + "loss_total": 0.3343430161476135, + "step": 375099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.826860427856445, + "loss_rtd": 0.20759032666683197, + "loss_sent": 0.06363599002361298, + "loss_sod": 0.07944272458553314, + "loss_total": 0.3506690263748169, + "step": 375099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.0957943201065063, + "learning_rate": 6.231015518892991e-07, + "loss": 0.4127, + "step": 375100 + }, + { + "epoch": 0.012398, + "loss_gen": 6.238338470458984, + "loss_rtd": 0.22129666805267334, + "loss_sent": 0.17585736513137817, + "loss_sod": 0.02833162620663643, + "loss_total": 0.42548567056655884, + "step": 375199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.958853721618652, + "loss_rtd": 0.2143506556749344, + "loss_sent": 0.3217431306838989, + "loss_sod": 0.018301580101251602, + "loss_total": 0.5543953776359558, + "step": 375199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.0528647899627686, + "learning_rate": 6.181172866203455e-07, + "loss": 0.4109, + "step": 375200 + }, + { + "epoch": 0.012598, + "loss_gen": 6.195026397705078, + "loss_rtd": 0.2206852287054062, + "loss_sent": 0.06826501339673996, + "loss_sod": 0.019596481695771217, + "loss_total": 0.3085467219352722, + "step": 375299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.812566757202148, + "loss_rtd": 0.2109343707561493, + "loss_sent": 0.12104281038045883, + "loss_sod": 0.012759190052747726, + "loss_total": 0.34473636746406555, + "step": 375299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.4340285062789917, + "learning_rate": 6.131529123678426e-07, + "loss": 0.4193, + "step": 375300 + }, + { + "epoch": 0.012798, + "loss_gen": 6.311695575714111, + "loss_rtd": 0.18870778381824493, + "loss_sent": 0.10390974581241608, + "loss_sod": 0.006855587009340525, + "loss_total": 0.2994731068611145, + "step": 375399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.540896892547607, + "loss_rtd": 0.18843285739421844, + "loss_sent": 0.01570253260433674, + "loss_sod": 0.03287762776017189, + "loss_total": 0.23701301217079163, + "step": 375399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.8500491976737976, + "learning_rate": 6.082084311314407e-07, + "loss": 0.432, + "step": 375400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.719391822814941, + "loss_rtd": 0.19244983792304993, + "loss_sent": 0.06878488510847092, + "loss_sod": 0.05827927961945534, + "loss_total": 0.3195140063762665, + "step": 375499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.634114742279053, + "loss_rtd": 0.23059429228305817, + "loss_sent": 0.0931749939918518, + "loss_sod": 0.020263686776161194, + "loss_total": 0.34403297305107117, + "step": 375499 + }, + { + "epoch": 0.013, + "grad_norm": 0.8408635258674622, + "learning_rate": 6.0328384490278e-07, + "loss": 0.4256, + "step": 375500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.912682056427002, + "loss_rtd": 0.1841646283864975, + "loss_sent": 0.35598501563072205, + "loss_sod": 0.08835349977016449, + "loss_total": 0.628503143787384, + "step": 375599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.978342533111572, + "loss_rtd": 0.20006583631038666, + "loss_sent": 0.34124019742012024, + "loss_sod": 0.011476047337055206, + "loss_total": 0.5527820587158203, + "step": 375599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.6812154054641724, + "learning_rate": 5.983791556654739e-07, + "loss": 0.4248, + "step": 375600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.824143886566162, + "loss_rtd": 0.21374043822288513, + "loss_sent": 0.2805253565311432, + "loss_sod": 0.013898611068725586, + "loss_total": 0.5081644058227539, + "step": 375699 + }, + { + "epoch": 0.013398, + "loss_gen": 6.045247554779053, + "loss_rtd": 0.21502593159675598, + "loss_sent": 0.3159756660461426, + "loss_sod": 0.014830069616436958, + "loss_total": 0.5458316802978516, + "step": 375699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.2717933654785156, + "learning_rate": 5.934943653951363e-07, + "loss": 0.4235, + "step": 375700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.889509677886963, + "loss_rtd": 0.2240026444196701, + "loss_sent": 0.05575236305594444, + "loss_sod": 0.05731187015771866, + "loss_total": 0.3370668888092041, + "step": 375799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.970902919769287, + "loss_rtd": 0.20444510877132416, + "loss_sent": 0.059766512364149094, + "loss_sod": 0.05850087106227875, + "loss_total": 0.3227124810218811, + "step": 375799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.0538915395736694, + "learning_rate": 5.886294760593602e-07, + "loss": 0.4027, + "step": 375800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.686825275421143, + "loss_rtd": 0.21245726943016052, + "loss_sent": 0.415727436542511, + "loss_sod": 0.0053630247712135315, + "loss_total": 0.6335477232933044, + "step": 375899 + }, + { + "epoch": 0.013798, + "loss_gen": 6.145212173461914, + "loss_rtd": 0.20726776123046875, + "loss_sent": 0.14269645512104034, + "loss_sod": 0.0076035140082240105, + "loss_total": 0.3575677275657654, + "step": 375899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.8053621053695679, + "learning_rate": 5.837844896177225e-07, + "loss": 0.4129, + "step": 375900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.996018409729004, + "loss_rtd": 0.19898325204849243, + "loss_sent": 0.10224363952875137, + "loss_sod": 0.1120859682559967, + "loss_total": 0.4133128523826599, + "step": 375999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.66213321685791, + "loss_rtd": 0.21605584025382996, + "loss_sent": 0.1493690013885498, + "loss_sod": 0.03288176283240318, + "loss_total": 0.39830660820007324, + "step": 375999 + }, + { + "epoch": 0.014, + "grad_norm": 1.5464924573898315, + "learning_rate": 5.789594080217842e-07, + "loss": 0.4238, + "step": 376000 + }, + { + "epoch": 0.014, + "eval_loss": 0.39667361974716187, + "eval_runtime": 150.1327, + "eval_samples_per_second": 102.862, + "eval_steps_per_second": 0.806, + "step": 376000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.963353633880615, + "loss_rtd": 0.2136533409357071, + "loss_sent": 0.11979737132787704, + "loss_sod": 0.022669518366456032, + "loss_total": 0.356120228767395, + "step": 376099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.6367411613464355, + "loss_rtd": 0.20582905411720276, + "loss_sent": 0.11848904937505722, + "loss_sod": 0.057092249393463135, + "loss_total": 0.3814103603363037, + "step": 376099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.599355161190033, + "learning_rate": 5.741542332150851e-07, + "loss": 0.4058, + "step": 376100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.652035236358643, + "loss_rtd": 0.21074751019477844, + "loss_sent": 0.2718053460121155, + "loss_sod": 0.014466974884271622, + "loss_total": 0.49701982736587524, + "step": 376199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.715115070343018, + "loss_rtd": 0.21487551927566528, + "loss_sent": 0.03479180857539177, + "loss_sod": 0.003185899928212166, + "loss_total": 0.2528532147407532, + "step": 376199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.0825930833816528, + "learning_rate": 5.693689671331548e-07, + "loss": 0.4034, + "step": 376200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.825916290283203, + "loss_rtd": 0.19891230762004852, + "loss_sent": 0.12521834671497345, + "loss_sod": 0.0035258494317531586, + "loss_total": 0.32765650749206543, + "step": 376299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.987072944641113, + "loss_rtd": 0.20104490220546722, + "loss_sent": 0.05299517884850502, + "loss_sod": 0.08359536528587341, + "loss_total": 0.33763542771339417, + "step": 376299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.9456308484077454, + "learning_rate": 5.646036117034847e-07, + "loss": 0.4399, + "step": 376300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.766575813293457, + "loss_rtd": 0.1912553310394287, + "loss_sent": 0.2122780978679657, + "loss_sod": 0.03363974392414093, + "loss_total": 0.43717318773269653, + "step": 376399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.739385604858398, + "loss_rtd": 0.2020392119884491, + "loss_sent": 0.057260312139987946, + "loss_sod": 0.04135715588927269, + "loss_total": 0.30065667629241943, + "step": 376399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.8377069234848022, + "learning_rate": 5.598581688455729e-07, + "loss": 0.4127, + "step": 376400 + }, + { + "epoch": 0.014998, + "loss_gen": 6.154478073120117, + "loss_rtd": 0.2175951898097992, + "loss_sent": 0.12526096403598785, + "loss_sod": 0.12082574516534805, + "loss_total": 0.4636818766593933, + "step": 376499 + }, + { + "epoch": 0.014998, + "loss_gen": 6.048160552978516, + "loss_rtd": 0.2201756089925766, + "loss_sent": 0.16244281828403473, + "loss_sod": 0.017377931624650955, + "loss_total": 0.3999963402748108, + "step": 376499 + }, + { + "epoch": 0.015, + "grad_norm": 1.3159281015396118, + "learning_rate": 5.55132640470879e-07, + "loss": 0.4078, + "step": 376500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.822164058685303, + "loss_rtd": 0.21577589213848114, + "loss_sent": 0.23769521713256836, + "loss_sod": 0.011649082414805889, + "loss_total": 0.46512019634246826, + "step": 376599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.9925079345703125, + "loss_rtd": 0.18235640227794647, + "loss_sent": 0.1412941962480545, + "loss_sod": 0.0333288311958313, + "loss_total": 0.3569794297218323, + "step": 376599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.7923346161842346, + "learning_rate": 5.504270284828417e-07, + "loss": 0.4225, + "step": 376600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.866918563842773, + "loss_rtd": 0.2069401741027832, + "loss_sent": 0.19980648159980774, + "loss_sod": 0.013730566948652267, + "loss_total": 0.4204772114753723, + "step": 376699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.525289535522461, + "loss_rtd": 0.1726590096950531, + "loss_sent": 0.07154635339975357, + "loss_sod": 0.053593751043081284, + "loss_total": 0.29779911041259766, + "step": 376699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.9670600891113281, + "learning_rate": 5.457413347768891e-07, + "loss": 0.4209, + "step": 376700 + }, + { + "epoch": 0.015598, + "loss_gen": 6.064262866973877, + "loss_rtd": 0.21177524328231812, + "loss_sent": 0.10027604550123215, + "loss_sod": 0.12483374774456024, + "loss_total": 0.4368850588798523, + "step": 376799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.948376655578613, + "loss_rtd": 0.2219124734401703, + "loss_sent": 0.08038464933633804, + "loss_sod": 0.19572797417640686, + "loss_total": 0.4980250895023346, + "step": 376799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.2898521423339844, + "learning_rate": 5.410755612404061e-07, + "loss": 0.4191, + "step": 376800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.998179912567139, + "loss_rtd": 0.19214703142642975, + "loss_sent": 0.09091558307409286, + "loss_sod": 0.0075148604810237885, + "loss_total": 0.2905774712562561, + "step": 376899 + }, + { + "epoch": 0.015798, + "loss_gen": 6.530083656311035, + "loss_rtd": 0.22096644341945648, + "loss_sent": 0.040187615901231766, + "loss_sod": 0.08871316909790039, + "loss_total": 0.3498672544956207, + "step": 376899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.7704149484634399, + "learning_rate": 5.364297097527781e-07, + "loss": 0.3964, + "step": 376900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.968879222869873, + "loss_rtd": 0.21126744151115417, + "loss_sent": 0.23530399799346924, + "loss_sod": 0.03537895902991295, + "loss_total": 0.48195040225982666, + "step": 376999 + }, + { + "epoch": 0.015998, + "loss_gen": 6.198812484741211, + "loss_rtd": 0.22787810862064362, + "loss_sent": 0.30264151096343994, + "loss_sod": 0.14800915122032166, + "loss_total": 0.6785287857055664, + "step": 376999 + }, + { + "epoch": 0.016, + "grad_norm": 1.2362818717956543, + "learning_rate": 5.318037821853417e-07, + "loss": 0.4005, + "step": 377000 + }, + { + "epoch": 0.016, + "eval_loss": 0.3953685462474823, + "eval_runtime": 150.2873, + "eval_samples_per_second": 102.756, + "eval_steps_per_second": 0.805, + "step": 377000 + }, + { + "epoch": 0.016198, + "loss_gen": 6.084448337554932, + "loss_rtd": 0.1892758458852768, + "loss_sent": 0.09442143887281418, + "loss_sod": 0.022943057119846344, + "loss_total": 0.3066403567790985, + "step": 377099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.924466609954834, + "loss_rtd": 0.2213367074728012, + "loss_sent": 0.26644495129585266, + "loss_sod": 0.09163976460695267, + "loss_total": 0.5794214010238647, + "step": 377099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.101434588432312, + "learning_rate": 5.271977804014283e-07, + "loss": 0.419, + "step": 377100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.712164878845215, + "loss_rtd": 0.20631778240203857, + "loss_sent": 0.07045809179544449, + "loss_sod": 0.0227108895778656, + "loss_total": 0.29948675632476807, + "step": 377199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.66616153717041, + "loss_rtd": 0.22503411769866943, + "loss_sent": 0.13139791786670685, + "loss_sod": 0.03343576192855835, + "loss_total": 0.38986778259277344, + "step": 377199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.961425244808197, + "learning_rate": 5.226117062563319e-07, + "loss": 0.4223, + "step": 377200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.948493480682373, + "loss_rtd": 0.19645100831985474, + "loss_sent": 0.21695654094219208, + "loss_sod": 0.06329571455717087, + "loss_total": 0.4767032861709595, + "step": 377299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.683925628662109, + "loss_rtd": 0.21708688139915466, + "loss_sent": 0.14205588400363922, + "loss_sod": 0.01813621260225773, + "loss_total": 0.37727898359298706, + "step": 377299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.0263874530792236, + "learning_rate": 5.180455615973301e-07, + "loss": 0.4139, + "step": 377300 + }, + { + "epoch": 0.016798, + "loss_gen": 6.031938076019287, + "loss_rtd": 0.1937197744846344, + "loss_sent": 0.13819313049316406, + "loss_sod": 0.027516640722751617, + "loss_total": 0.3594295382499695, + "step": 377399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.546912670135498, + "loss_rtd": 0.179046168923378, + "loss_sent": 0.0023346352390944958, + "loss_sod": 0.024476638063788414, + "loss_total": 0.20585744082927704, + "step": 377399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.8463380336761475, + "learning_rate": 5.134993482636518e-07, + "loss": 0.3982, + "step": 377400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.622273921966553, + "loss_rtd": 0.20638105273246765, + "loss_sent": 0.10678320378065109, + "loss_sod": 0.026414811611175537, + "loss_total": 0.3395790755748749, + "step": 377499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.296456336975098, + "loss_rtd": 0.18110458552837372, + "loss_sent": 2.4245722670457326e-05, + "loss_sod": 0.11737100034952164, + "loss_total": 0.29849985241889954, + "step": 377499 + }, + { + "epoch": 0.017, + "grad_norm": 0.8881797194480896, + "learning_rate": 5.089730680865212e-07, + "loss": 0.4057, + "step": 377500 + }, + { + "epoch": 0.017198, + "loss_gen": 6.0927934646606445, + "loss_rtd": 0.21190163493156433, + "loss_sent": 0.3824966847896576, + "loss_sod": 0.09746366739273071, + "loss_total": 0.6918619871139526, + "step": 377599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.840753078460693, + "loss_rtd": 0.20463943481445312, + "loss_sent": 0.27296513319015503, + "loss_sod": 0.07115224003791809, + "loss_total": 0.5487568378448486, + "step": 377599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.2717090845108032, + "learning_rate": 5.044667228891131e-07, + "loss": 0.4043, + "step": 377600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.747354984283447, + "loss_rtd": 0.2005842626094818, + "loss_sent": 0.2876623570919037, + "loss_sod": 0.011541787534952164, + "loss_total": 0.49978840351104736, + "step": 377699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.748420715332031, + "loss_rtd": 0.2131267488002777, + "loss_sent": 0.0777624249458313, + "loss_sod": 0.005437182728201151, + "loss_total": 0.29632633924484253, + "step": 377699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.786728024482727, + "learning_rate": 4.999803144865978e-07, + "loss": 0.4283, + "step": 377700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.955152988433838, + "loss_rtd": 0.2116282433271408, + "loss_sent": 0.14277306199073792, + "loss_sod": 0.01791638508439064, + "loss_total": 0.37231767177581787, + "step": 377799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.786294937133789, + "loss_rtd": 0.20854634046554565, + "loss_sent": 0.27219003438949585, + "loss_sod": 0.03270246088504791, + "loss_total": 0.5134388208389282, + "step": 377799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.8396150469779968, + "learning_rate": 4.955138446860907e-07, + "loss": 0.3925, + "step": 377800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.196096897125244, + "loss_rtd": 0.15838152170181274, + "loss_sent": 2.2329139028443024e-05, + "loss_sod": 0.10838563740253448, + "loss_total": 0.2667894661426544, + "step": 377899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.3700127601623535, + "loss_rtd": 0.1617504209280014, + "loss_sent": 0.020374365150928497, + "loss_sod": 0.066576287150383, + "loss_total": 0.2487010657787323, + "step": 377899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.0081185102462769, + "learning_rate": 4.910673152866862e-07, + "loss": 0.4188, + "step": 377900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.937511444091797, + "loss_rtd": 0.2209366112947464, + "loss_sent": 0.24928177893161774, + "loss_sod": 0.004064135253429413, + "loss_total": 0.47428250312805176, + "step": 377999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.362171173095703, + "loss_rtd": 0.1926054209470749, + "loss_sent": 0.13002458214759827, + "loss_sod": 0.0025852625258266926, + "loss_total": 0.32521528005599976, + "step": 377999 + }, + { + "epoch": 0.018, + "grad_norm": 0.7147202491760254, + "learning_rate": 4.86640728079446e-07, + "loss": 0.4346, + "step": 378000 + }, + { + "epoch": 0.018, + "eval_loss": 0.3972267806529999, + "eval_runtime": 150.3853, + "eval_samples_per_second": 102.69, + "eval_steps_per_second": 0.805, + "step": 378000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.6956658363342285, + "loss_rtd": 0.17405839264392853, + "loss_sent": 0.2557704746723175, + "loss_sod": 0.08298088610172272, + "loss_total": 0.5128097534179688, + "step": 378099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.304107666015625, + "loss_rtd": 0.1735163927078247, + "loss_sent": 2.3417098418576643e-05, + "loss_sod": 0.17949749529361725, + "loss_total": 0.3530372977256775, + "step": 378099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.085662841796875, + "learning_rate": 4.822340848473994e-07, + "loss": 0.4277, + "step": 378100 + }, + { + "epoch": 0.018398, + "loss_gen": 6.019848346710205, + "loss_rtd": 0.21076439321041107, + "loss_sent": 0.18268175423145294, + "loss_sod": 0.056887537240982056, + "loss_total": 0.4503336548805237, + "step": 378199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.67705774307251, + "loss_rtd": 0.1733749955892563, + "loss_sent": 0.06824298948049545, + "loss_sod": 0.0922779068350792, + "loss_total": 0.33389589190483093, + "step": 378199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.7420406341552734, + "learning_rate": 4.778473873655432e-07, + "loss": 0.418, + "step": 378200 + }, + { + "epoch": 0.018598, + "loss_gen": 6.0037007331848145, + "loss_rtd": 0.21802647411823273, + "loss_sent": 0.298694908618927, + "loss_sod": 0.007177173625677824, + "loss_total": 0.5238985419273376, + "step": 378299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.92147970199585, + "loss_rtd": 0.20518158376216888, + "loss_sent": 0.44562461972236633, + "loss_sod": 0.023967813700437546, + "loss_total": 0.6747740507125854, + "step": 378299 + }, + { + "epoch": 0.0186, + "grad_norm": 2.0879807472229004, + "learning_rate": 4.73480637400836e-07, + "loss": 0.406, + "step": 378300 + }, + { + "epoch": 0.018798, + "loss_gen": 6.265625953674316, + "loss_rtd": 0.20076626539230347, + "loss_sent": 0.18996816873550415, + "loss_sod": 0.019429976120591164, + "loss_total": 0.41016441583633423, + "step": 378399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.924288272857666, + "loss_rtd": 0.21152663230895996, + "loss_sent": 0.19503511488437653, + "loss_sod": 0.012357569299638271, + "loss_total": 0.41891932487487793, + "step": 378399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.8700541853904724, + "learning_rate": 4.691338367122045e-07, + "loss": 0.416, + "step": 378400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.599389553070068, + "loss_rtd": 0.20247696340084076, + "loss_sent": 0.010313127189874649, + "loss_sod": 0.040898703038692474, + "loss_total": 0.253688782453537, + "step": 378499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.468404769897461, + "loss_rtd": 0.17046506702899933, + "loss_sent": 0.05398714914917946, + "loss_sod": 0.18214984238147736, + "loss_total": 0.40660205483436584, + "step": 378499 + }, + { + "epoch": 0.019, + "grad_norm": 1.0383541584014893, + "learning_rate": 4.6480698705054226e-07, + "loss": 0.4245, + "step": 378500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.799564361572266, + "loss_rtd": 0.21832984685897827, + "loss_sent": 0.07897751778364182, + "loss_sod": 0.01685876026749611, + "loss_total": 0.3141661286354065, + "step": 378599 + }, + { + "epoch": 0.019198, + "loss_gen": 6.005757808685303, + "loss_rtd": 0.21833860874176025, + "loss_sent": 0.19936542212963104, + "loss_sod": 0.04156842827796936, + "loss_total": 0.45927244424819946, + "step": 378599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.8350887894630432, + "learning_rate": 4.6050009015869976e-07, + "loss": 0.4325, + "step": 378600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.9876389503479, + "loss_rtd": 0.20312464237213135, + "loss_sent": 0.2526932954788208, + "loss_sod": 0.1680360585451126, + "loss_total": 0.6238539814949036, + "step": 378699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.834028720855713, + "loss_rtd": 0.218141108751297, + "loss_sent": 0.13190466165542603, + "loss_sod": 0.05220230668783188, + "loss_total": 0.4022480845451355, + "step": 378699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.1730320453643799, + "learning_rate": 4.5621314777149483e-07, + "loss": 0.4131, + "step": 378700 + }, + { + "epoch": 0.019598, + "loss_gen": 6.032174587249756, + "loss_rtd": 0.1992754489183426, + "loss_sent": 0.4519427418708801, + "loss_sod": 0.030463453382253647, + "loss_total": 0.6816816329956055, + "step": 378799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.951891899108887, + "loss_rtd": 0.1831546276807785, + "loss_sent": 0.1545332670211792, + "loss_sod": 0.0045846295543015, + "loss_total": 0.3422725200653076, + "step": 378799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.8087645769119263, + "learning_rate": 4.519461616157183e-07, + "loss": 0.4072, + "step": 378800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.514514923095703, + "loss_rtd": 0.17730656266212463, + "loss_sent": 0.020894506946206093, + "loss_sod": 0.04431368410587311, + "loss_total": 0.24251475930213928, + "step": 378899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.795938491821289, + "loss_rtd": 0.19509421288967133, + "loss_sent": 0.139750137925148, + "loss_sod": 0.22240638732910156, + "loss_total": 0.5572507381439209, + "step": 378899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.5947258472442627, + "learning_rate": 4.476991334100955e-07, + "loss": 0.4291, + "step": 378900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.886747360229492, + "loss_rtd": 0.21091145277023315, + "loss_sent": 0.16238117218017578, + "loss_sod": 0.025292634963989258, + "loss_total": 0.3985852599143982, + "step": 378999 + }, + { + "epoch": 0.019998, + "loss_gen": 6.108876705169678, + "loss_rtd": 0.21461129188537598, + "loss_sent": 0.19172941148281097, + "loss_sod": 0.0646105706691742, + "loss_total": 0.47095125913619995, + "step": 378999 + }, + { + "epoch": 0.02, + "grad_norm": 1.4274873733520508, + "learning_rate": 4.4347206486533564e-07, + "loss": 0.4227, + "step": 379000 + }, + { + "epoch": 0.02, + "eval_loss": 0.3934597074985504, + "eval_runtime": 151.6554, + "eval_samples_per_second": 101.83, + "eval_steps_per_second": 0.798, + "step": 379000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.500655174255371, + "loss_rtd": 0.1995168924331665, + "loss_sent": 0.18026581406593323, + "loss_sod": 0.006543800700455904, + "loss_total": 0.38632649183273315, + "step": 379099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.8818039894104, + "loss_rtd": 0.203139528632164, + "loss_sent": 0.037558067589998245, + "loss_sod": 0.04296644777059555, + "loss_total": 0.2836640477180481, + "step": 379099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.7500585317611694, + "learning_rate": 4.39264957684099e-07, + "loss": 0.4148, + "step": 379100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.910189151763916, + "loss_rtd": 0.21938611567020416, + "loss_sent": 0.09466508030891418, + "loss_sod": 0.018406489863991737, + "loss_total": 0.33245769143104553, + "step": 379199 + }, + { + "epoch": 0.020398, + "loss_gen": 6.212303638458252, + "loss_rtd": 0.21138016879558563, + "loss_sent": 0.12816506624221802, + "loss_sod": 0.08652271330356598, + "loss_total": 0.42606794834136963, + "step": 379199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.9723511338233948, + "learning_rate": 4.350778135610134e-07, + "loss": 0.4195, + "step": 379200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.28507661819458, + "loss_rtd": 0.16205933690071106, + "loss_sent": 2.245020732516423e-05, + "loss_sod": 0.048897143453359604, + "loss_total": 0.2109789401292801, + "step": 379299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.454601287841797, + "loss_rtd": 0.1901465803384781, + "loss_sent": 0.03092203289270401, + "loss_sod": 0.10005979984998703, + "loss_total": 0.32112839818000793, + "step": 379299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.2063854932785034, + "learning_rate": 4.309106341826574e-07, + "loss": 0.4099, + "step": 379300 + }, + { + "epoch": 0.020798, + "loss_gen": 6.110975742340088, + "loss_rtd": 0.20995761454105377, + "loss_sent": 0.15996411442756653, + "loss_sod": 0.061814285814762115, + "loss_total": 0.431736022233963, + "step": 379399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.985228061676025, + "loss_rtd": 0.20598271489143372, + "loss_sent": 0.2394324690103531, + "loss_sod": 0.018116464838385582, + "loss_total": 0.46353164315223694, + "step": 379399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.0743968486785889, + "learning_rate": 4.267634212275717e-07, + "loss": 0.4161, + "step": 379400 + }, + { + "epoch": 0.020998, + "loss_gen": 6.155957221984863, + "loss_rtd": 0.2147091031074524, + "loss_sent": 0.09639734774827957, + "loss_sod": 0.03152427077293396, + "loss_total": 0.3426307141780853, + "step": 379499 + }, + { + "epoch": 0.020998, + "loss_gen": 6.12700891494751, + "loss_rtd": 0.19344329833984375, + "loss_sent": 0.3789941370487213, + "loss_sod": 0.03244452923536301, + "loss_total": 0.604882001876831, + "step": 379499 + }, + { + "epoch": 0.021, + "grad_norm": 1.2646256685256958, + "learning_rate": 4.2263617636624786e-07, + "loss": 0.4155, + "step": 379500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.411799430847168, + "loss_rtd": 0.173185795545578, + "loss_sent": 0.042459771037101746, + "loss_sod": 0.07946296036243439, + "loss_total": 0.2951085567474365, + "step": 379599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.8188629150390625, + "loss_rtd": 0.20450222492218018, + "loss_sent": 0.13934794068336487, + "loss_sod": 0.07375186681747437, + "loss_total": 0.4176020324230194, + "step": 379599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.7739236950874329, + "learning_rate": 4.185289012611504e-07, + "loss": 0.4182, + "step": 379600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.5912885665893555, + "loss_rtd": 0.18380384147167206, + "loss_sent": 0.14199651777744293, + "loss_sod": 0.09766144305467606, + "loss_total": 0.42346179485321045, + "step": 379699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.986878395080566, + "loss_rtd": 0.20203180611133575, + "loss_sent": 0.06627637147903442, + "loss_sod": 0.09104157984256744, + "loss_total": 0.3593497574329376, + "step": 379699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.4827905893325806, + "learning_rate": 4.1444159756667824e-07, + "loss": 0.4281, + "step": 379700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.994523048400879, + "loss_rtd": 0.19935733079910278, + "loss_sent": 0.24832437932491302, + "loss_sod": 0.04916829243302345, + "loss_total": 0.49685001373291016, + "step": 379799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.899327754974365, + "loss_rtd": 0.21462617814540863, + "loss_sent": 0.12212895601987839, + "loss_sod": 0.032889872789382935, + "loss_total": 0.36964499950408936, + "step": 379799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.5767993927001953, + "learning_rate": 4.103742669291977e-07, + "loss": 0.4254, + "step": 379800 + }, + { + "epoch": 0.021798, + "loss_gen": 6.336661338806152, + "loss_rtd": 0.20013760030269623, + "loss_sent": 0.10599343478679657, + "loss_sod": 0.10513609647750854, + "loss_total": 0.41126716136932373, + "step": 379899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.859251976013184, + "loss_rtd": 0.21320293843746185, + "loss_sent": 0.07086401432752609, + "loss_sod": 0.05484158918261528, + "loss_total": 0.3389085531234741, + "step": 379899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.0110816955566406, + "learning_rate": 4.063269109870316e-07, + "loss": 0.3923, + "step": 379900 + }, + { + "epoch": 0.021998, + "loss_gen": 6.067895412445068, + "loss_rtd": 0.21180304884910583, + "loss_sent": 0.08946903049945831, + "loss_sod": 0.03902129828929901, + "loss_total": 0.34029334783554077, + "step": 379999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.960249900817871, + "loss_rtd": 0.22714562714099884, + "loss_sent": 0.16184094548225403, + "loss_sod": 0.07150998711585999, + "loss_total": 0.46049657464027405, + "step": 379999 + }, + { + "epoch": 0.022, + "grad_norm": 1.3660411834716797, + "learning_rate": 4.0229953137045917e-07, + "loss": 0.4086, + "step": 380000 + }, + { + "epoch": 0.022, + "eval_loss": 0.3902009129524231, + "eval_runtime": 150.1671, + "eval_samples_per_second": 102.839, + "eval_steps_per_second": 0.806, + "step": 380000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.954092025756836, + "loss_rtd": 0.18810109794139862, + "loss_sent": 0.24659323692321777, + "loss_sod": 0.047537416219711304, + "loss_total": 0.4822317361831665, + "step": 380099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.394299507141113, + "loss_rtd": 0.1872742474079132, + "loss_sent": 2.289350777573418e-05, + "loss_sod": 0.08977370709180832, + "loss_total": 0.2770708501338959, + "step": 380099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.5520741939544678, + "learning_rate": 3.9829212970170506e-07, + "loss": 0.4212, + "step": 380100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.50739049911499, + "loss_rtd": 0.2191905975341797, + "loss_sent": 0.21501494944095612, + "loss_sod": 0.0014510282780975103, + "loss_total": 0.4356565773487091, + "step": 380199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.622569561004639, + "loss_rtd": 0.21803200244903564, + "loss_sent": 0.16014115512371063, + "loss_sod": 0.04715009406208992, + "loss_total": 0.4253232479095459, + "step": 380199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.610840916633606, + "learning_rate": 3.943047075949446e-07, + "loss": 0.4071, + "step": 380200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.882879257202148, + "loss_rtd": 0.18428292870521545, + "loss_sent": 0.17902645468711853, + "loss_sod": 0.09229131788015366, + "loss_total": 0.45560070872306824, + "step": 380299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.4265313148498535, + "loss_rtd": 0.18168246746063232, + "loss_sent": 4.284538226784207e-05, + "loss_sod": 0.11683396995067596, + "loss_total": 0.298559308052063, + "step": 380299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.1512634754180908, + "learning_rate": 3.9033726665632096e-07, + "loss": 0.4145, + "step": 380300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.272458553314209, + "loss_rtd": 0.17100802063941956, + "loss_sent": 0.015773722901940346, + "loss_sod": 0.04977009817957878, + "loss_total": 0.23655185103416443, + "step": 380399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.599095344543457, + "loss_rtd": 0.18492868542671204, + "loss_sent": 0.16344574093818665, + "loss_sod": 0.10426199436187744, + "loss_total": 0.4526364207267761, + "step": 380399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.2339563369750977, + "learning_rate": 3.8638980848391125e-07, + "loss": 0.4231, + "step": 380400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.604328155517578, + "loss_rtd": 0.2052101194858551, + "loss_sent": 0.2013697326183319, + "loss_sod": 0.07968216389417648, + "loss_total": 0.4862620234489441, + "step": 380499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.289646148681641, + "loss_rtd": 0.17533379793167114, + "loss_sent": 2.7983234758721665e-05, + "loss_sod": 0.11863875389099121, + "loss_total": 0.2940005362033844, + "step": 380499 + }, + { + "epoch": 0.023, + "grad_norm": 1.6092185974121094, + "learning_rate": 3.824623346677547e-07, + "loss": 0.4211, + "step": 380500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.850788116455078, + "loss_rtd": 0.20604585111141205, + "loss_sent": 0.275841623544693, + "loss_sod": 0.033807314932346344, + "loss_total": 0.515694797039032, + "step": 380599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.522738456726074, + "loss_rtd": 0.1687469333410263, + "loss_sent": 0.005756030790507793, + "loss_sod": 0.032747939229011536, + "loss_total": 0.2072509080171585, + "step": 380599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.8034012913703918, + "learning_rate": 3.785548467898359e-07, + "loss": 0.4376, + "step": 380600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.938969135284424, + "loss_rtd": 0.20497600734233856, + "loss_sent": 0.1025799810886383, + "loss_sod": 0.008984355255961418, + "loss_total": 0.31654036045074463, + "step": 380699 + }, + { + "epoch": 0.023398, + "loss_gen": 6.065103054046631, + "loss_rtd": 0.2054506540298462, + "loss_sent": 0.05428088828921318, + "loss_sod": 0.05435691773891449, + "loss_total": 0.31408846378326416, + "step": 380699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8988030552864075, + "learning_rate": 3.7466734642408463e-07, + "loss": 0.4211, + "step": 380700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.444797992706299, + "loss_rtd": 0.1727626621723175, + "loss_sent": 0.056168101727962494, + "loss_sod": 0.14595083892345428, + "loss_total": 0.3748815953731537, + "step": 380799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.7508955001831055, + "loss_rtd": 0.19710072875022888, + "loss_sent": 0.3005600571632385, + "loss_sod": 0.01652163825929165, + "loss_total": 0.514182448387146, + "step": 380799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.959169626235962, + "learning_rate": 3.707998351363984e-07, + "loss": 0.4136, + "step": 380800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.873311996459961, + "loss_rtd": 0.20481862127780914, + "loss_sent": 0.3269082307815552, + "loss_sod": 0.03254479914903641, + "loss_total": 0.5642716884613037, + "step": 380899 + }, + { + "epoch": 0.023798, + "loss_gen": 6.063734531402588, + "loss_rtd": 0.20686492323875427, + "loss_sent": 0.38673967123031616, + "loss_sod": 0.021104365587234497, + "loss_total": 0.6147089600563049, + "step": 380899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.3335402011871338, + "learning_rate": 3.6695231448460324e-07, + "loss": 0.4066, + "step": 380900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.842053413391113, + "loss_rtd": 0.20197023451328278, + "loss_sent": 0.11227338016033173, + "loss_sod": 0.027149679139256477, + "loss_total": 0.34139329195022583, + "step": 380999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.8046183586120605, + "loss_rtd": 0.23079979419708252, + "loss_sent": 0.17582298815250397, + "loss_sod": 0.009079264476895332, + "loss_total": 0.41570204496383667, + "step": 380999 + }, + { + "epoch": 0.024, + "grad_norm": 1.4506800174713135, + "learning_rate": 3.631247860184761e-07, + "loss": 0.4051, + "step": 381000 + }, + { + "epoch": 0.024, + "eval_loss": 0.3930273652076721, + "eval_runtime": 150.3698, + "eval_samples_per_second": 102.7, + "eval_steps_per_second": 0.805, + "step": 381000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.845580101013184, + "loss_rtd": 0.2230759710073471, + "loss_sent": 0.3735257089138031, + "loss_sod": 0.024172678589820862, + "loss_total": 0.6207743883132935, + "step": 381099 + }, + { + "epoch": 0.024198, + "loss_gen": 6.022526264190674, + "loss_rtd": 0.21440497040748596, + "loss_sent": 0.16394156217575073, + "loss_sod": 0.05334143340587616, + "loss_total": 0.43168795108795166, + "step": 381099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.4220038652420044, + "learning_rate": 3.5931725127975047e-07, + "loss": 0.4175, + "step": 381100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.436118125915527, + "loss_rtd": 0.16183926165103912, + "loss_sent": 0.08216781169176102, + "loss_sod": 0.01098170317709446, + "loss_total": 0.25498878955841064, + "step": 381199 + }, + { + "epoch": 0.024398, + "loss_gen": 6.093311309814453, + "loss_rtd": 0.19413651525974274, + "loss_sent": 0.12365799397230148, + "loss_sod": 0.02665119245648384, + "loss_total": 0.34444570541381836, + "step": 381199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.6130800843238831, + "learning_rate": 3.55529711802105e-07, + "loss": 0.4144, + "step": 381200 + }, + { + "epoch": 0.024598, + "loss_gen": 6.526711940765381, + "loss_rtd": 0.2141195684671402, + "loss_sent": 0.0823674201965332, + "loss_sod": 0.12698936462402344, + "loss_total": 0.42347633838653564, + "step": 381299 + }, + { + "epoch": 0.024598, + "loss_gen": 6.169975280761719, + "loss_rtd": 0.20990502834320068, + "loss_sent": 0.12368293106555939, + "loss_sod": 0.045053135603666306, + "loss_total": 0.3786410689353943, + "step": 381299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.2590303421020508, + "learning_rate": 3.5176216911114724e-07, + "loss": 0.4058, + "step": 381300 + }, + { + "epoch": 0.024798, + "loss_gen": 6.049837112426758, + "loss_rtd": 0.2047654390335083, + "loss_sent": 0.21812596917152405, + "loss_sod": 0.0736774355173111, + "loss_total": 0.49656882882118225, + "step": 381399 + }, + { + "epoch": 0.024798, + "loss_gen": 6.16081428527832, + "loss_rtd": 0.22861410677433014, + "loss_sent": 0.11025246977806091, + "loss_sod": 0.09058435261249542, + "loss_total": 0.4294509291648865, + "step": 381399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.8518393635749817, + "learning_rate": 3.4801462472445203e-07, + "loss": 0.4219, + "step": 381400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.403074264526367, + "loss_rtd": 0.19154304265975952, + "loss_sent": 0.03534320741891861, + "loss_sod": 0.0910448357462883, + "loss_total": 0.31793108582496643, + "step": 381499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.185169219970703, + "loss_rtd": 0.1610122174024582, + "loss_sent": 0.06993773579597473, + "loss_sod": 0.0635794997215271, + "loss_total": 0.29452943801879883, + "step": 381499 + }, + { + "epoch": 0.025, + "grad_norm": 0.8139126300811768, + "learning_rate": 3.442870801515341e-07, + "loss": 0.4193, + "step": 381500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.932486057281494, + "loss_rtd": 0.19373764097690582, + "loss_sent": 0.29014596343040466, + "loss_sod": 0.02605004794895649, + "loss_total": 0.5099336504936218, + "step": 381599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.304928302764893, + "loss_rtd": 0.1672595739364624, + "loss_sent": 0.026334472000598907, + "loss_sod": 0.15049013495445251, + "loss_total": 0.3440841734409332, + "step": 381599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.2460983991622925, + "learning_rate": 3.405795368938369e-07, + "loss": 0.4269, + "step": 381600 + }, + { + "epoch": 0.025398, + "loss_gen": 6.1368560791015625, + "loss_rtd": 0.209011048078537, + "loss_sent": 0.1695796251296997, + "loss_sod": 0.009269974194467068, + "loss_total": 0.38786065578460693, + "step": 381699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.6605682373046875, + "loss_rtd": 0.21456101536750793, + "loss_sent": 0.10210420936346054, + "loss_sod": 0.03545018285512924, + "loss_total": 0.3521154224872589, + "step": 381699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8272795677185059, + "learning_rate": 3.3689199644476587e-07, + "loss": 0.4083, + "step": 381700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.899969100952148, + "loss_rtd": 0.20056556165218353, + "loss_sent": 0.16467887163162231, + "loss_sod": 0.0619795024394989, + "loss_total": 0.42722392082214355, + "step": 381799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.529124736785889, + "loss_rtd": 0.20809099078178406, + "loss_sent": 0.06432030349969864, + "loss_sod": 0.037849728018045425, + "loss_total": 0.3102610111236572, + "step": 381799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.134244441986084, + "learning_rate": 3.3322446028965503e-07, + "loss": 0.4233, + "step": 381800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.27346134185791, + "loss_rtd": 0.19142065942287445, + "loss_sent": 0.007555554620921612, + "loss_sod": 0.03351636976003647, + "loss_total": 0.2324925810098648, + "step": 381899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.270022869110107, + "loss_rtd": 0.17058920860290527, + "loss_sent": 2.4998216758831404e-05, + "loss_sod": 0.10906675457954407, + "loss_total": 0.2796809673309326, + "step": 381899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.9053205847740173, + "learning_rate": 3.2957692990580046e-07, + "loss": 0.429, + "step": 381900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.275055885314941, + "loss_rtd": 0.16804182529449463, + "loss_sent": 2.5137811462627724e-05, + "loss_sod": 0.07545459270477295, + "loss_total": 0.2435215562582016, + "step": 381999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.577271461486816, + "loss_rtd": 0.209976926445961, + "loss_sent": 0.15293532609939575, + "loss_sod": 0.01812799833714962, + "loss_total": 0.3810402452945709, + "step": 381999 + }, + { + "epoch": 0.026, + "grad_norm": 0.7928724884986877, + "learning_rate": 3.2594940676241027e-07, + "loss": 0.4248, + "step": 382000 + }, + { + "epoch": 0.026, + "eval_loss": 0.39272138476371765, + "eval_runtime": 150.1275, + "eval_samples_per_second": 102.866, + "eval_steps_per_second": 0.806, + "step": 382000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.781370639801025, + "loss_rtd": 0.19625608623027802, + "loss_sent": 0.05940936878323555, + "loss_sod": 0.07032468914985657, + "loss_total": 0.32599014043807983, + "step": 382099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.562563419342041, + "loss_rtd": 0.170019268989563, + "loss_sent": 0.04797758534550667, + "loss_sod": 0.05059007182717323, + "loss_total": 0.2685869336128235, + "step": 382099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.752170741558075, + "learning_rate": 3.2234189232066004e-07, + "loss": 0.4143, + "step": 382100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.564212322235107, + "loss_rtd": 0.18514154851436615, + "loss_sent": 0.055654071271419525, + "loss_sod": 0.15288811922073364, + "loss_total": 0.3936837613582611, + "step": 382199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.553371906280518, + "loss_rtd": 0.1830948144197464, + "loss_sent": 0.16740046441555023, + "loss_sod": 0.10825850069522858, + "loss_total": 0.458753764629364, + "step": 382199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.5331205129623413, + "learning_rate": 3.187543880336541e-07, + "loss": 0.3994, + "step": 382200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.898194313049316, + "loss_rtd": 0.1988452970981598, + "loss_sent": 0.22878481447696686, + "loss_sod": 0.04501491039991379, + "loss_total": 0.4726450443267822, + "step": 382299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.903650283813477, + "loss_rtd": 0.19803264737129211, + "loss_sent": 0.5170067548751831, + "loss_sod": 0.03003372997045517, + "loss_total": 0.745073139667511, + "step": 382299 + }, + { + "epoch": 0.0266, + "grad_norm": 2.603910446166992, + "learning_rate": 3.151868953464476e-07, + "loss": 0.4201, + "step": 382300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.6080217361450195, + "loss_rtd": 0.2083578109741211, + "loss_sent": 0.1562943160533905, + "loss_sod": 0.011052027344703674, + "loss_total": 0.37570416927337646, + "step": 382399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.847711086273193, + "loss_rtd": 0.20529872179031372, + "loss_sent": 0.04937053471803665, + "loss_sod": 0.017431385815143585, + "loss_total": 0.27210062742233276, + "step": 382399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.7713347673416138, + "learning_rate": 3.1163941569600787e-07, + "loss": 0.4201, + "step": 382400 + }, + { + "epoch": 0.026998, + "loss_gen": 6.18224573135376, + "loss_rtd": 0.23028218746185303, + "loss_sent": 0.10545142740011215, + "loss_sod": 0.022825714200735092, + "loss_total": 0.35855934023857117, + "step": 382499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.670419692993164, + "loss_rtd": 0.21611681580543518, + "loss_sent": 0.14540377259254456, + "loss_sod": 0.012308724224567413, + "loss_total": 0.37382930517196655, + "step": 382499 + }, + { + "epoch": 0.027, + "grad_norm": 1.094574213027954, + "learning_rate": 3.0811195051127507e-07, + "loss": 0.4108, + "step": 382500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.5457892417907715, + "loss_rtd": 0.1806672364473343, + "loss_sent": 0.00011805249232565984, + "loss_sod": 0.08111029863357544, + "loss_total": 0.2618955671787262, + "step": 382599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.893887996673584, + "loss_rtd": 0.19793257117271423, + "loss_sent": 0.04744026064872742, + "loss_sod": 0.119545117020607, + "loss_total": 0.36491796374320984, + "step": 382599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.2319138050079346, + "learning_rate": 3.0460450121310156e-07, + "loss": 0.434, + "step": 382600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.232121467590332, + "loss_rtd": 0.15601125359535217, + "loss_sent": 0.02890804223716259, + "loss_sod": 0.14162380993366241, + "loss_total": 0.3265431225299835, + "step": 382699 + }, + { + "epoch": 0.027398, + "loss_gen": 6.055953502655029, + "loss_rtd": 0.20038843154907227, + "loss_sent": 0.22261404991149902, + "loss_sod": 0.006330553907901049, + "loss_total": 0.42933303117752075, + "step": 382699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.8192993402481079, + "learning_rate": 3.0111706921429615e-07, + "loss": 0.4135, + "step": 382700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.151395797729492, + "loss_rtd": 0.23215961456298828, + "loss_sent": 0.045967016369104385, + "loss_sod": 0.025773657485842705, + "loss_total": 0.30390027165412903, + "step": 382799 + }, + { + "epoch": 0.027598, + "loss_gen": 6.031484127044678, + "loss_rtd": 0.21376293897628784, + "loss_sent": 0.30357852578163147, + "loss_sod": 0.046195387840270996, + "loss_total": 0.5635368824005127, + "step": 382799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8617048859596252, + "learning_rate": 2.9764965591959073e-07, + "loss": 0.4175, + "step": 382800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.6411614418029785, + "loss_rtd": 0.21144753694534302, + "loss_sent": 0.16217614710330963, + "loss_sod": 0.013701547868549824, + "loss_total": 0.3873252272605896, + "step": 382899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.872683048248291, + "loss_rtd": 0.2178838551044464, + "loss_sent": 0.16557738184928894, + "loss_sod": 0.03549986705183983, + "loss_total": 0.4189611077308655, + "step": 382899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.8648281097412109, + "learning_rate": 2.942022627256624e-07, + "loss": 0.4013, + "step": 382900 + }, + { + "epoch": 0.027998, + "loss_gen": 6.025871753692627, + "loss_rtd": 0.20410758256912231, + "loss_sent": 0.1275903433561325, + "loss_sod": 0.0383104532957077, + "loss_total": 0.3700083792209625, + "step": 382999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.962867736816406, + "loss_rtd": 0.19217722117900848, + "loss_sent": 0.18039610981941223, + "loss_sod": 0.017892785370349884, + "loss_total": 0.3904661238193512, + "step": 382999 + }, + { + "epoch": 0.028, + "grad_norm": 1.1089924573898315, + "learning_rate": 2.9077489102111167e-07, + "loss": 0.4406, + "step": 383000 + }, + { + "epoch": 0.028, + "eval_loss": 0.3974106013774872, + "eval_runtime": 150.3955, + "eval_samples_per_second": 102.683, + "eval_steps_per_second": 0.805, + "step": 383000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.829176902770996, + "loss_rtd": 0.19727171957492828, + "loss_sent": 0.3101958632469177, + "loss_sod": 0.013695419766008854, + "loss_total": 0.5211629867553711, + "step": 383099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.7120747566223145, + "loss_rtd": 0.20222920179367065, + "loss_sent": 0.08801617473363876, + "loss_sod": 0.0060181827284395695, + "loss_total": 0.29626354575157166, + "step": 383099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.1163129806518555, + "learning_rate": 2.8736754218650076e-07, + "loss": 0.4124, + "step": 383100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.193973541259766, + "loss_rtd": 0.16248661279678345, + "loss_sent": 2.4179706088034436e-05, + "loss_sod": 0.08134716749191284, + "loss_total": 0.2438579648733139, + "step": 383199 + }, + { + "epoch": 0.028398, + "loss_gen": 6.03629207611084, + "loss_rtd": 0.198272705078125, + "loss_sent": 0.16059327125549316, + "loss_sod": 0.004946468397974968, + "loss_total": 0.3638124465942383, + "step": 383199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.1488338708877563, + "learning_rate": 2.8398021759429295e-07, + "loss": 0.4275, + "step": 383200 + }, + { + "epoch": 0.028598, + "loss_gen": 6.227768898010254, + "loss_rtd": 0.22333618998527527, + "loss_sent": 0.035455722361803055, + "loss_sod": 0.04420716315507889, + "loss_total": 0.3029990792274475, + "step": 383299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.957921981811523, + "loss_rtd": 0.21033763885498047, + "loss_sent": 0.12784285843372345, + "loss_sod": 0.024178283289074898, + "loss_total": 0.36235877871513367, + "step": 383299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.8141974806785583, + "learning_rate": 2.8061291860890236e-07, + "loss": 0.4191, + "step": 383300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.763555526733398, + "loss_rtd": 0.19557757675647736, + "loss_sent": 0.25977006554603577, + "loss_sod": 0.08676838129758835, + "loss_total": 0.5421160459518433, + "step": 383399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.887138843536377, + "loss_rtd": 0.20468755066394806, + "loss_sent": 0.275336354970932, + "loss_sod": 0.049571797251701355, + "loss_total": 0.5295957326889038, + "step": 383399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.3290231227874756, + "learning_rate": 2.7726564658669406e-07, + "loss": 0.3955, + "step": 383400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.662152290344238, + "loss_rtd": 0.2109338939189911, + "loss_sent": 0.13339297473430634, + "loss_sod": 0.04413021728396416, + "loss_total": 0.3884570598602295, + "step": 383499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.84342098236084, + "loss_rtd": 0.197341650724411, + "loss_sent": 0.1957792043685913, + "loss_sod": 0.05706150457262993, + "loss_total": 0.45018234848976135, + "step": 383499 + }, + { + "epoch": 0.029, + "grad_norm": 1.300395131111145, + "learning_rate": 2.739384028759284e-07, + "loss": 0.426, + "step": 383500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.621011257171631, + "loss_rtd": 0.1888379603624344, + "loss_sent": 0.07751519232988358, + "loss_sod": 0.08898812532424927, + "loss_total": 0.35534125566482544, + "step": 383599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.290693283081055, + "loss_rtd": 0.16856513917446136, + "loss_sent": 0.014348041266202927, + "loss_sod": 0.0435963049530983, + "loss_total": 0.22650949656963348, + "step": 383599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.8381374478340149, + "learning_rate": 2.706311888168278e-07, + "loss": 0.4217, + "step": 383600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.772955894470215, + "loss_rtd": 0.20471149682998657, + "loss_sent": 0.10987544804811478, + "loss_sod": 0.0125756049528718, + "loss_total": 0.32716256380081177, + "step": 383699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.772219181060791, + "loss_rtd": 0.18905729055404663, + "loss_sent": 0.05473000183701515, + "loss_sod": 0.0527057982981205, + "loss_total": 0.2964930832386017, + "step": 383699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.7054979205131531, + "learning_rate": 2.6734400574153773e-07, + "loss": 0.4373, + "step": 383700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.712389945983887, + "loss_rtd": 0.21115665137767792, + "loss_sent": 0.27840518951416016, + "loss_sod": 0.01093914732336998, + "loss_total": 0.5005009770393372, + "step": 383799 + }, + { + "epoch": 0.029598, + "loss_gen": 6.105762004852295, + "loss_rtd": 0.22365495562553406, + "loss_sent": 0.1008310467004776, + "loss_sod": 0.11533018946647644, + "loss_total": 0.4398161768913269, + "step": 383799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.273370623588562, + "learning_rate": 2.640768549741379e-07, + "loss": 0.4268, + "step": 383800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.544104099273682, + "loss_rtd": 0.1853761076927185, + "loss_sent": 0.28038954734802246, + "loss_sod": 0.017326954752206802, + "loss_total": 0.48309260606765747, + "step": 383899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.926936149597168, + "loss_rtd": 0.2125987857580185, + "loss_sent": 0.3669087588787079, + "loss_sod": 0.05336569994688034, + "loss_total": 0.6328732371330261, + "step": 383899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.3195164203643799, + "learning_rate": 2.6082973783063125e-07, + "loss": 0.4064, + "step": 383900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.669084072113037, + "loss_rtd": 0.17921671271324158, + "loss_sent": 0.05930311232805252, + "loss_sod": 0.017765387892723083, + "loss_total": 0.2562852203845978, + "step": 383999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.860673904418945, + "loss_rtd": 0.18688058853149414, + "loss_sent": 0.11732591688632965, + "loss_sod": 0.027635326609015465, + "loss_total": 0.3318418264389038, + "step": 383999 + }, + { + "epoch": 0.03, + "grad_norm": 1.0545306205749512, + "learning_rate": 2.5760265561896035e-07, + "loss": 0.4014, + "step": 384000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3930528163909912, + "eval_runtime": 150.1463, + "eval_samples_per_second": 102.853, + "eval_steps_per_second": 0.806, + "step": 384000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.7394208908081055, + "loss_rtd": 0.18927958607673645, + "loss_sent": 0.05443378537893295, + "loss_sod": 0.05979667976498604, + "loss_total": 0.30351004004478455, + "step": 384099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.6442341804504395, + "loss_rtd": 0.18970787525177002, + "loss_sent": 0.08145482838153839, + "loss_sod": 0.026608798652887344, + "loss_total": 0.29777151346206665, + "step": 384099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.8515053987503052, + "learning_rate": 2.5439560963898546e-07, + "loss": 0.4056, + "step": 384100 + }, + { + "epoch": 0.030398, + "loss_gen": 6.171351909637451, + "loss_rtd": 0.20819953083992004, + "loss_sent": 0.21172025799751282, + "loss_sod": 0.026197826489806175, + "loss_total": 0.4461176097393036, + "step": 384199 + }, + { + "epoch": 0.030398, + "loss_gen": 6.090738773345947, + "loss_rtd": 0.20947818458080292, + "loss_sent": 0.36782175302505493, + "loss_sod": 0.09364922344684601, + "loss_total": 0.6709491610527039, + "step": 384199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.9973644018173218, + "learning_rate": 2.5120860118251765e-07, + "loss": 0.4199, + "step": 384200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.825852394104004, + "loss_rtd": 0.17805254459381104, + "loss_sent": 0.006554849445819855, + "loss_sod": 0.14529208838939667, + "loss_total": 0.32989948987960815, + "step": 384299 + }, + { + "epoch": 0.030598, + "loss_gen": 6.071846008300781, + "loss_rtd": 0.2020629197359085, + "loss_sent": 0.37729617953300476, + "loss_sod": 0.14929164946079254, + "loss_total": 0.7286507487297058, + "step": 384299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.2775895595550537, + "learning_rate": 2.48041631533269e-07, + "loss": 0.4257, + "step": 384300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.82039737701416, + "loss_rtd": 0.20472243428230286, + "loss_sent": 0.3711397647857666, + "loss_sod": 0.07700426876544952, + "loss_total": 0.6528664827346802, + "step": 384399 + }, + { + "epoch": 0.030798, + "loss_gen": 6.16239070892334, + "loss_rtd": 0.23037785291671753, + "loss_sent": 0.13526126742362976, + "loss_sod": 0.07043778151273727, + "loss_total": 0.43607690930366516, + "step": 384399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.9397040605545044, + "learning_rate": 2.448947019669079e-07, + "loss": 0.4166, + "step": 384400 + }, + { + "epoch": 0.030998, + "loss_gen": 6.035325050354004, + "loss_rtd": 0.23700235784053802, + "loss_sent": 0.07824476063251495, + "loss_sod": 0.018092438578605652, + "loss_total": 0.3333395719528198, + "step": 384499 + }, + { + "epoch": 0.030998, + "loss_gen": 6.173375129699707, + "loss_rtd": 0.20125679671764374, + "loss_sent": 0.253811776638031, + "loss_sod": 0.050551868975162506, + "loss_total": 0.5056204795837402, + "step": 384499 + }, + { + "epoch": 0.031, + "grad_norm": 0.9964558482170105, + "learning_rate": 2.4176781375100374e-07, + "loss": 0.3826, + "step": 384500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.509469509124756, + "loss_rtd": 0.22962869703769684, + "loss_sent": 0.20768941938877106, + "loss_sod": 0.05147034674882889, + "loss_total": 0.4887884855270386, + "step": 384599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.993651390075684, + "loss_rtd": 0.19513122737407684, + "loss_sent": 0.17074358463287354, + "loss_sod": 0.04677882045507431, + "loss_total": 0.4126536250114441, + "step": 384599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.2593488693237305, + "learning_rate": 2.386609681450824e-07, + "loss": 0.4198, + "step": 384600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.636050701141357, + "loss_rtd": 0.19941259920597076, + "loss_sent": 0.08777549862861633, + "loss_sod": 0.0025134964380413294, + "loss_total": 0.28970158100128174, + "step": 384699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.861685752868652, + "loss_rtd": 0.22229737043380737, + "loss_sent": 0.22134365141391754, + "loss_sod": 0.010315588675439358, + "loss_total": 0.45395660400390625, + "step": 384699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.7069348692893982, + "learning_rate": 2.3557416640056507e-07, + "loss": 0.4189, + "step": 384700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.751885414123535, + "loss_rtd": 0.2034197449684143, + "loss_sent": 0.139282688498497, + "loss_sod": 0.12326791137456894, + "loss_total": 0.46597033739089966, + "step": 384799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.881619930267334, + "loss_rtd": 0.21095749735832214, + "loss_sent": 0.15282794833183289, + "loss_sod": 0.01254718005657196, + "loss_total": 0.3763326406478882, + "step": 384799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.5512088537216187, + "learning_rate": 2.3250740976082374e-07, + "loss": 0.4155, + "step": 384800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.850015163421631, + "loss_rtd": 0.17747046053409576, + "loss_sent": 0.1268550455570221, + "loss_sod": 0.1188102662563324, + "loss_total": 0.42313578724861145, + "step": 384899 + }, + { + "epoch": 0.031798, + "loss_gen": 6.000199794769287, + "loss_rtd": 0.20449306070804596, + "loss_sent": 0.10725359618663788, + "loss_sod": 0.049531131982803345, + "loss_total": 0.36127781867980957, + "step": 384899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.0175875425338745, + "learning_rate": 2.2946069946114812e-07, + "loss": 0.4079, + "step": 384900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.862027168273926, + "loss_rtd": 0.1923486888408661, + "loss_sent": 0.027933618053793907, + "loss_sod": 0.016524532809853554, + "loss_total": 0.23680683970451355, + "step": 384999 + }, + { + "epoch": 0.031998, + "loss_gen": 6.198087692260742, + "loss_rtd": 0.19978289306163788, + "loss_sent": 0.11878103762865067, + "loss_sod": 0.03205890953540802, + "loss_total": 0.35062283277511597, + "step": 384999 + }, + { + "epoch": 0.032, + "grad_norm": 0.5346891283988953, + "learning_rate": 2.2643403672875096e-07, + "loss": 0.4111, + "step": 385000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4042034149169922, + "eval_runtime": 151.5404, + "eval_samples_per_second": 101.907, + "eval_steps_per_second": 0.798, + "step": 385000 + }, + { + "epoch": 0.032198, + "loss_gen": 6.003377914428711, + "loss_rtd": 0.21727389097213745, + "loss_sent": 0.16895000636577606, + "loss_sod": 0.05419757217168808, + "loss_total": 0.440421462059021, + "step": 385099 + }, + { + "epoch": 0.032198, + "loss_gen": 5.99362850189209, + "loss_rtd": 0.21535775065422058, + "loss_sent": 0.1301010698080063, + "loss_sod": 0.09239351004362106, + "loss_total": 0.43785232305526733, + "step": 385099 + }, + { + "epoch": 0.0322, + "grad_norm": 1.2825134992599487, + "learning_rate": 2.234274227827682e-07, + "loss": 0.4274, + "step": 385100 + }, + { + "epoch": 0.032398, + "loss_gen": 5.866090774536133, + "loss_rtd": 0.2175987809896469, + "loss_sent": 0.18335312604904175, + "loss_sod": 0.10989538580179214, + "loss_total": 0.5108473300933838, + "step": 385199 + }, + { + "epoch": 0.032398, + "loss_gen": 6.124553203582764, + "loss_rtd": 0.19101719558238983, + "loss_sent": 0.06919978559017181, + "loss_sod": 0.0287528894841671, + "loss_total": 0.28896987438201904, + "step": 385199 + }, + { + "epoch": 0.0324, + "grad_norm": 0.9552285671234131, + "learning_rate": 2.2044085883426435e-07, + "loss": 0.4007, + "step": 385200 + }, + { + "epoch": 0.032598, + "loss_gen": 5.905416488647461, + "loss_rtd": 0.21314513683319092, + "loss_sent": 0.4079139530658722, + "loss_sod": 0.03686285391449928, + "loss_total": 0.6579219102859497, + "step": 385299 + }, + { + "epoch": 0.032598, + "loss_gen": 5.727965831756592, + "loss_rtd": 0.1941075623035431, + "loss_sent": 0.1891579031944275, + "loss_sod": 0.01784624718129635, + "loss_total": 0.4011117219924927, + "step": 385299 + }, + { + "epoch": 0.0326, + "grad_norm": 1.9801541566848755, + "learning_rate": 2.174743460862383e-07, + "loss": 0.4309, + "step": 385300 + }, + { + "epoch": 0.032798, + "loss_gen": 5.6684980392456055, + "loss_rtd": 0.2137640118598938, + "loss_sent": 0.22157108783721924, + "loss_sod": 0.03500162810087204, + "loss_total": 0.4703367352485657, + "step": 385399 + }, + { + "epoch": 0.032798, + "loss_gen": 6.408374309539795, + "loss_rtd": 0.22371450066566467, + "loss_sent": 0.09138733893632889, + "loss_sod": 0.020972158759832382, + "loss_total": 0.33607399463653564, + "step": 385399 + }, + { + "epoch": 0.0328, + "grad_norm": 0.9286766052246094, + "learning_rate": 2.1452788573358417e-07, + "loss": 0.4045, + "step": 385400 + }, + { + "epoch": 0.032998, + "loss_gen": 5.861077785491943, + "loss_rtd": 0.2003045678138733, + "loss_sent": 0.18459151685237885, + "loss_sod": 0.026224972680211067, + "loss_total": 0.41112107038497925, + "step": 385499 + }, + { + "epoch": 0.032998, + "loss_gen": 6.228074073791504, + "loss_rtd": 0.21457162499427795, + "loss_sent": 0.19774803519248962, + "loss_sod": 0.07375230640172958, + "loss_total": 0.48607197403907776, + "step": 385499 + }, + { + "epoch": 0.033, + "grad_norm": 0.6838713884353638, + "learning_rate": 2.1160147896314709e-07, + "loss": 0.4385, + "step": 385500 + }, + { + "epoch": 0.033198, + "loss_gen": 5.819832801818848, + "loss_rtd": 0.23806336522102356, + "loss_sent": 0.15349172055721283, + "loss_sod": 0.08721265941858292, + "loss_total": 0.4787677526473999, + "step": 385599 + }, + { + "epoch": 0.033198, + "loss_gen": 6.045207977294922, + "loss_rtd": 0.2031872421503067, + "loss_sent": 0.16494791209697723, + "loss_sod": 0.007363510318100452, + "loss_total": 0.3754986524581909, + "step": 385599 + }, + { + "epoch": 0.0332, + "grad_norm": 0.6510876417160034, + "learning_rate": 2.086951269536841e-07, + "loss": 0.4093, + "step": 385600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.804920673370361, + "loss_rtd": 0.2171359807252884, + "loss_sent": 0.40684863924980164, + "loss_sod": 0.036228228360414505, + "loss_total": 0.6602128744125366, + "step": 385699 + }, + { + "epoch": 0.033398, + "loss_gen": 5.90587854385376, + "loss_rtd": 0.20843030512332916, + "loss_sent": 0.33964505791664124, + "loss_sod": 0.02735271118581295, + "loss_total": 0.5754280686378479, + "step": 385699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.999657154083252, + "learning_rate": 2.0580883087586434e-07, + "loss": 0.4108, + "step": 385700 + }, + { + "epoch": 0.033598, + "loss_gen": 5.4843058586120605, + "loss_rtd": 0.18094661831855774, + "loss_sent": 0.04104515165090561, + "loss_sod": 0.06437574326992035, + "loss_total": 0.2863675057888031, + "step": 385799 + }, + { + "epoch": 0.033598, + "loss_gen": 5.223611831665039, + "loss_rtd": 0.14001010358333588, + "loss_sent": 3.558278694981709e-05, + "loss_sod": 0.06727577745914459, + "loss_total": 0.20732146501541138, + "step": 385799 + }, + { + "epoch": 0.0336, + "grad_norm": 0.6759509444236755, + "learning_rate": 2.0294259189229669e-07, + "loss": 0.4256, + "step": 385800 + }, + { + "epoch": 0.033798, + "loss_gen": 5.40152645111084, + "loss_rtd": 0.15343868732452393, + "loss_sent": 0.027872242033481598, + "loss_sod": 0.17344465851783752, + "loss_total": 0.35475558042526245, + "step": 385899 + }, + { + "epoch": 0.033798, + "loss_gen": 5.650938510894775, + "loss_rtd": 0.17356042563915253, + "loss_sent": 0.04674892500042915, + "loss_sod": 0.2110554277896881, + "loss_total": 0.4313647747039795, + "step": 385899 + }, + { + "epoch": 0.0338, + "grad_norm": 1.336422324180603, + "learning_rate": 2.0009641115749657e-07, + "loss": 0.4086, + "step": 385900 + }, + { + "epoch": 0.033998, + "loss_gen": 5.942296981811523, + "loss_rtd": 0.20877361297607422, + "loss_sent": 0.2556697726249695, + "loss_sod": 0.040888089686632156, + "loss_total": 0.5053314566612244, + "step": 385999 + }, + { + "epoch": 0.033998, + "loss_gen": 5.3268656730651855, + "loss_rtd": 0.2024465650320053, + "loss_sent": 0.01012949924916029, + "loss_sod": 0.11086844652891159, + "loss_total": 0.32344451546669006, + "step": 385999 + }, + { + "epoch": 0.034, + "grad_norm": 0.8952791690826416, + "learning_rate": 1.972702898179024e-07, + "loss": 0.4115, + "step": 386000 + }, + { + "epoch": 0.034, + "eval_loss": 0.4023250937461853, + "eval_runtime": 150.0033, + "eval_samples_per_second": 102.951, + "eval_steps_per_second": 0.807, + "step": 386000 + }, + { + "epoch": 0.000198, + "loss_gen": 6.070064067840576, + "loss_rtd": 0.22699445486068726, + "loss_sent": 0.23134654760360718, + "loss_sod": 0.002963309409096837, + "loss_total": 0.46130430698394775, + "step": 386099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.610538482666016, + "loss_rtd": 0.2264588475227356, + "loss_sent": 0.16599753499031067, + "loss_sod": 0.05153141915798187, + "loss_total": 0.44398781657218933, + "step": 386099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.428983449935913, + "learning_rate": 1.94464229011887e-07, + "loss": 0.4189, + "step": 386100 + }, + { + "epoch": 0.000398, + "loss_gen": 6.106116771697998, + "loss_rtd": 0.21767903864383698, + "loss_sent": 0.11991839110851288, + "loss_sod": 0.05915965884923935, + "loss_total": 0.3967570662498474, + "step": 386199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.915314674377441, + "loss_rtd": 0.20738781988620758, + "loss_sent": 0.1467045545578003, + "loss_sod": 0.01918071135878563, + "loss_total": 0.3732730746269226, + "step": 386199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.726487398147583, + "learning_rate": 1.9167822986971838e-07, + "loss": 0.418, + "step": 386200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.798270225524902, + "loss_rtd": 0.2067991942167282, + "loss_sent": 0.05629365146160126, + "loss_sod": 0.005625860765576363, + "loss_total": 0.2687187194824219, + "step": 386299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.905458927154541, + "loss_rtd": 0.22061729431152344, + "loss_sent": 0.4025327265262604, + "loss_sod": 0.11184671521186829, + "loss_total": 0.7349967360496521, + "step": 386299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.889122935136045e-07, + "loss": 0.4205, + "step": 386300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.854974746704102, + "loss_rtd": 0.2143116444349289, + "loss_sent": 0.21600039303302765, + "loss_sod": 0.030118010938167572, + "loss_total": 0.4604300260543823, + "step": 386399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.925917148590088, + "loss_rtd": 0.2109098732471466, + "loss_sent": 0.24010999500751495, + "loss_sod": 0.03709014505147934, + "loss_total": 0.4881100058555603, + "step": 386399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.3110747337341309, + "learning_rate": 1.861664210576597e-07, + "loss": 0.4123, + "step": 386400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.94522762298584, + "loss_rtd": 0.2248603105545044, + "loss_sent": 0.15762391686439514, + "loss_sod": 0.026143062859773636, + "loss_total": 0.40862730145454407, + "step": 386499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.8175578117370605, + "loss_rtd": 0.20158784091472626, + "loss_sent": 0.11587843298912048, + "loss_sod": 0.08866304159164429, + "loss_total": 0.4061293303966522, + "step": 386499 + }, + { + "epoch": 0.001, + "grad_norm": 1.1315805912017822, + "learning_rate": 1.8344061360792696e-07, + "loss": 0.4193, + "step": 386500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.619412422180176, + "loss_rtd": 0.20225612819194794, + "loss_sent": 0.432159423828125, + "loss_sod": 0.10655496269464493, + "loss_total": 0.7409704923629761, + "step": 386599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.699313640594482, + "loss_rtd": 0.1926557570695877, + "loss_sent": 0.09600414335727692, + "loss_sod": 0.01603185385465622, + "loss_total": 0.30469173192977905, + "step": 386599 + }, + { + "epoch": 0.0012, + "grad_norm": 2.7344155311584473, + "learning_rate": 1.8073487226235585e-07, + "loss": 0.4037, + "step": 386600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.58875036239624, + "loss_rtd": 0.21671243011951447, + "loss_sent": 0.05101096257567406, + "loss_sod": 0.03717564418911934, + "loss_total": 0.30489903688430786, + "step": 386699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.969257354736328, + "loss_rtd": 0.19849984347820282, + "loss_sent": 0.24645492434501648, + "loss_sod": 0.03874385356903076, + "loss_total": 0.48369860649108887, + "step": 386699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.7487657070159912, + "learning_rate": 1.78049198110819e-07, + "loss": 0.4166, + "step": 386700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.676285266876221, + "loss_rtd": 0.18517841398715973, + "loss_sent": 0.10563670098781586, + "loss_sod": 0.0461493618786335, + "loss_total": 0.33696448802948, + "step": 386799 + }, + { + "epoch": 0.001598, + "loss_gen": 6.1383209228515625, + "loss_rtd": 0.21085192263126373, + "loss_sent": 0.15274006128311157, + "loss_sod": 0.036854542791843414, + "loss_total": 0.4004465341567993, + "step": 386799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.7479475736618042, + "learning_rate": 1.7538359223510657e-07, + "loss": 0.4032, + "step": 386800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.616851806640625, + "loss_rtd": 0.18981054425239563, + "loss_sent": 0.0009357736562378705, + "loss_sod": 0.11195822805166245, + "loss_total": 0.3027045428752899, + "step": 386899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.281965255737305, + "loss_rtd": 0.15668818354606628, + "loss_sent": 0.14297324419021606, + "loss_sod": 0.019447026774287224, + "loss_total": 0.3191084563732147, + "step": 386899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.8839295506477356, + "learning_rate": 1.7273805570892643e-07, + "loss": 0.4094, + "step": 386900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.872824668884277, + "loss_rtd": 0.20653888583183289, + "loss_sent": 0.06045421585440636, + "loss_sod": 0.060713499784469604, + "loss_total": 0.32770660519599915, + "step": 386999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.871523380279541, + "loss_rtd": 0.21357491612434387, + "loss_sent": 0.16598744690418243, + "loss_sod": 0.0067842514254152775, + "loss_total": 0.38634660840034485, + "step": 386999 + }, + { + "epoch": 0.002, + "grad_norm": 0.70506352186203, + "learning_rate": 1.7011258959789832e-07, + "loss": 0.4138, + "step": 387000 + }, + { + "epoch": 0.002, + "eval_loss": 0.38906314969062805, + "eval_runtime": 152.8094, + "eval_samples_per_second": 101.061, + "eval_steps_per_second": 0.792, + "step": 387000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.941445827484131, + "loss_rtd": 0.2205839902162552, + "loss_sent": 0.24621763825416565, + "loss_sod": 0.03623174503445625, + "loss_total": 0.5030333995819092, + "step": 387099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.913673400878906, + "loss_rtd": 0.1947707235813141, + "loss_sent": 0.28452837467193604, + "loss_sod": 0.04408812150359154, + "loss_total": 0.5233871936798096, + "step": 387099 + }, + { + "epoch": 0.0022, + "grad_norm": 0.8668487071990967, + "learning_rate": 1.675071949595597e-07, + "loss": 0.4297, + "step": 387100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.786038398742676, + "loss_rtd": 0.23068787157535553, + "loss_sent": 0.16383348405361176, + "loss_sod": 0.0026258546859025955, + "loss_total": 0.39714720845222473, + "step": 387199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.983335971832275, + "loss_rtd": 0.21985170245170593, + "loss_sent": 0.2635365426540375, + "loss_sod": 0.07929453253746033, + "loss_total": 0.5626827478408813, + "step": 387199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.0871856212615967, + "learning_rate": 1.6492187284336546e-07, + "loss": 0.4306, + "step": 387200 + }, + { + "epoch": 0.002598, + "loss_gen": 6.006869316101074, + "loss_rtd": 0.19651633501052856, + "loss_sent": 0.11407383531332016, + "loss_sod": 0.06965118646621704, + "loss_total": 0.38024136424064636, + "step": 387299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.749596118927002, + "loss_rtd": 0.2103724628686905, + "loss_sent": 0.0785311684012413, + "loss_sod": 0.02853006310760975, + "loss_total": 0.3174336850643158, + "step": 387299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.018624186515808, + "learning_rate": 1.6235662429068266e-07, + "loss": 0.435, + "step": 387300 + }, + { + "epoch": 0.002798, + "loss_gen": 6.01922082901001, + "loss_rtd": 0.1941412389278412, + "loss_sent": 0.15151628851890564, + "loss_sod": 0.04446466267108917, + "loss_total": 0.3901221752166748, + "step": 387399 + }, + { + "epoch": 0.002798, + "loss_gen": 6.114126682281494, + "loss_rtd": 0.2113575041294098, + "loss_sent": 0.1491912603378296, + "loss_sod": 0.02346138283610344, + "loss_total": 0.3840101361274719, + "step": 387399 + }, + { + "epoch": 0.0028, + "grad_norm": 0.7707430124282837, + "learning_rate": 1.5981145033479027e-07, + "loss": 0.4003, + "step": 387400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.946076393127441, + "loss_rtd": 0.184996098279953, + "loss_sent": 0.534684419631958, + "loss_sod": 0.013645555824041367, + "loss_total": 0.7333260774612427, + "step": 387499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.926239490509033, + "loss_rtd": 0.2150571346282959, + "loss_sent": 0.20656444132328033, + "loss_sod": 0.12248778343200684, + "loss_total": 0.5441093444824219, + "step": 387499 + }, + { + "epoch": 0.003, + "grad_norm": 1.3904284238815308, + "learning_rate": 1.5728635200087938e-07, + "loss": 0.4064, + "step": 387500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.78752326965332, + "loss_rtd": 0.1943551003932953, + "loss_sent": 0.07164817303419113, + "loss_sod": 0.03743426129221916, + "loss_total": 0.3034375309944153, + "step": 387599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.449794292449951, + "loss_rtd": 0.1654985547065735, + "loss_sent": 0.016390370205044746, + "loss_sod": 0.1290266364812851, + "loss_total": 0.31091558933258057, + "step": 387599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9882694482803345, + "learning_rate": 1.5478133030607522e-07, + "loss": 0.4017, + "step": 387600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.94898796081543, + "loss_rtd": 0.21364334225654602, + "loss_sent": 0.15191717445850372, + "loss_sod": 0.05954572185873985, + "loss_total": 0.4251062273979187, + "step": 387699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.825323104858398, + "loss_rtd": 0.20545744895935059, + "loss_sent": 0.642839789390564, + "loss_sod": 0.12043068557977676, + "loss_total": 0.9687279462814331, + "step": 387699 + }, + { + "epoch": 0.0034, + "grad_norm": 2.926527500152588, + "learning_rate": 1.5229638625938182e-07, + "loss": 0.4243, + "step": 387700 + }, + { + "epoch": 0.003598, + "loss_gen": 6.00454044342041, + "loss_rtd": 0.21406090259552002, + "loss_sent": 0.28503379225730896, + "loss_sod": 0.08366947621107101, + "loss_total": 0.5827641487121582, + "step": 387799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.878026962280273, + "loss_rtd": 0.20210398733615875, + "loss_sent": 0.3536505401134491, + "loss_sod": 0.005776241421699524, + "loss_total": 0.5615307688713074, + "step": 387799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.3980095386505127, + "learning_rate": 1.4983152086174844e-07, + "loss": 0.4275, + "step": 387800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.576259613037109, + "loss_rtd": 0.1753866970539093, + "loss_sent": 0.0556168295443058, + "loss_sod": 0.012317564338445663, + "loss_total": 0.24332109093666077, + "step": 387899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.643679618835449, + "loss_rtd": 0.17319001257419586, + "loss_sent": 0.030518053099513054, + "loss_sod": 0.022975772619247437, + "loss_total": 0.2266838401556015, + "step": 387899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.5258404016494751, + "learning_rate": 1.473867351060143e-07, + "loss": 0.4209, + "step": 387900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.597390651702881, + "loss_rtd": 0.20969554781913757, + "loss_sent": 0.24825482070446014, + "loss_sod": 0.05265036225318909, + "loss_total": 0.510600745677948, + "step": 387999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.971643447875977, + "loss_rtd": 0.20426103472709656, + "loss_sent": 0.09622426331043243, + "loss_sod": 0.05200056731700897, + "loss_total": 0.3524858355522156, + "step": 387999 + }, + { + "epoch": 0.004, + "grad_norm": 1.4008424282073975, + "learning_rate": 1.4496202997694165e-07, + "loss": 0.4163, + "step": 388000 + }, + { + "epoch": 0.004, + "eval_loss": 0.40085962414741516, + "eval_runtime": 149.9182, + "eval_samples_per_second": 103.009, + "eval_steps_per_second": 0.807, + "step": 388000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.842921733856201, + "loss_rtd": 0.1869976669549942, + "loss_sent": 0.1804313212633133, + "loss_sod": 0.013124816119670868, + "loss_total": 0.3805537819862366, + "step": 388099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.944189548492432, + "loss_rtd": 0.21024784445762634, + "loss_sent": 0.3657797873020172, + "loss_sod": 0.13275648653507233, + "loss_total": 0.7087841033935547, + "step": 388099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.804450511932373, + "learning_rate": 1.4255740645120475e-07, + "loss": 0.4081, + "step": 388100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.947266101837158, + "loss_rtd": 0.200907364487648, + "loss_sent": 0.24104642868041992, + "loss_sod": 0.0234132781624794, + "loss_total": 0.46536707878112793, + "step": 388199 + }, + { + "epoch": 0.004398, + "loss_gen": 6.066715717315674, + "loss_rtd": 0.22250112891197205, + "loss_sent": 0.476871520280838, + "loss_sod": 0.01108524575829506, + "loss_total": 0.7104579210281372, + "step": 388199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.4031575918197632, + "learning_rate": 1.4017286549737884e-07, + "loss": 0.4126, + "step": 388200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.766085147857666, + "loss_rtd": 0.22764943540096283, + "loss_sent": 0.2466544657945633, + "loss_sod": 0.04013913869857788, + "loss_total": 0.514443039894104, + "step": 388299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.9625654220581055, + "loss_rtd": 0.1829644739627838, + "loss_sent": 0.2364800125360489, + "loss_sod": 0.0007151039317250252, + "loss_total": 0.42015957832336426, + "step": 388299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.9459004402160645, + "learning_rate": 1.378084080759623e-07, + "loss": 0.4243, + "step": 388300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.781798362731934, + "loss_rtd": 0.20330537855625153, + "loss_sent": 0.18447181582450867, + "loss_sod": 0.06919562816619873, + "loss_total": 0.45697280764579773, + "step": 388399 + }, + { + "epoch": 0.004798, + "loss_gen": 6.023786544799805, + "loss_rtd": 0.21240007877349854, + "loss_sent": 0.14837747812271118, + "loss_sod": 0.09386526793241501, + "loss_total": 0.4546428322792053, + "step": 388399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.1525017023086548, + "learning_rate": 1.354640351393488e-07, + "loss": 0.4147, + "step": 388400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.607063293457031, + "loss_rtd": 0.1774669736623764, + "loss_sent": 0.19217942655086517, + "loss_sod": 0.03440697491168976, + "loss_total": 0.40405339002609253, + "step": 388499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.909639835357666, + "loss_rtd": 0.20952454209327698, + "loss_sent": 0.10948611050844193, + "loss_sod": 0.04632039740681648, + "loss_total": 0.3653310537338257, + "step": 388499 + }, + { + "epoch": 0.005, + "grad_norm": 0.8776269555091858, + "learning_rate": 1.3313974763186632e-07, + "loss": 0.4087, + "step": 388500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.467298984527588, + "loss_rtd": 0.17283503711223602, + "loss_sent": 0.006112792529165745, + "loss_sod": 0.07768760621547699, + "loss_total": 0.2566354274749756, + "step": 388599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.354569911956787, + "loss_rtd": 0.16272178292274475, + "loss_sent": 0.07872873544692993, + "loss_sod": 0.08613304793834686, + "loss_total": 0.32758355140686035, + "step": 388599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.0944674015045166, + "learning_rate": 1.3083554648972707e-07, + "loss": 0.432, + "step": 388600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.879866600036621, + "loss_rtd": 0.22599825263023376, + "loss_sent": 0.4684654772281647, + "loss_sod": 0.02286195382475853, + "loss_total": 0.7173256874084473, + "step": 388699 + }, + { + "epoch": 0.005398, + "loss_gen": 6.004671573638916, + "loss_rtd": 0.20385044813156128, + "loss_sent": 0.20219197869300842, + "loss_sod": 0.02373768761754036, + "loss_total": 0.42978012561798096, + "step": 388699 + }, + { + "epoch": 0.0054, + "grad_norm": 2.1442813873291016, + "learning_rate": 1.285514326410664e-07, + "loss": 0.4185, + "step": 388700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.789896488189697, + "loss_rtd": 0.2033519148826599, + "loss_sent": 0.24579353630542755, + "loss_sod": 0.014285017736256123, + "loss_total": 0.4634304642677307, + "step": 388799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.791206359863281, + "loss_rtd": 0.2188941091299057, + "loss_sent": 0.37389636039733887, + "loss_sod": 0.018224112689495087, + "loss_total": 0.6110146045684814, + "step": 388799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.1618871688842773, + "learning_rate": 1.2628740700592613e-07, + "loss": 0.429, + "step": 388800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.514381408691406, + "loss_rtd": 0.22766588628292084, + "loss_sent": 0.10298759490251541, + "loss_sod": 0.004655986092984676, + "loss_total": 0.3353094756603241, + "step": 388899 + }, + { + "epoch": 0.005798, + "loss_gen": 6.139984130859375, + "loss_rtd": 0.19265694916248322, + "loss_sent": 0.3504784405231476, + "loss_sod": 0.017564527690410614, + "loss_total": 0.5606999397277832, + "step": 388899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.9985604882240295, + "learning_rate": 1.2404347049625453e-07, + "loss": 0.4122, + "step": 388900 + }, + { + "epoch": 0.005998, + "loss_gen": 6.484412670135498, + "loss_rtd": 0.2049032300710678, + "loss_sent": 0.12536132335662842, + "loss_sod": 0.11522291600704193, + "loss_total": 0.44548746943473816, + "step": 388999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.710115909576416, + "loss_rtd": 0.21021217107772827, + "loss_sent": 0.11833181977272034, + "loss_sod": 0.06307626515626907, + "loss_total": 0.3916202485561371, + "step": 388999 + }, + { + "epoch": 0.006, + "grad_norm": 0.8251280784606934, + "learning_rate": 1.218196240159064e-07, + "loss": 0.4118, + "step": 389000 + }, + { + "epoch": 0.006, + "eval_loss": 0.3989259898662567, + "eval_runtime": 151.4889, + "eval_samples_per_second": 101.941, + "eval_steps_per_second": 0.799, + "step": 389000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.922330856323242, + "loss_rtd": 0.20675304532051086, + "loss_sent": 0.2864395081996918, + "loss_sod": 0.04010416567325592, + "loss_total": 0.5332967042922974, + "step": 389099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.8388824462890625, + "loss_rtd": 0.22652123868465424, + "loss_sent": 0.16067945957183838, + "loss_sod": 0.005014869384467602, + "loss_total": 0.3922155499458313, + "step": 389099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.7238194942474365, + "learning_rate": 1.1961586846064855e-07, + "loss": 0.4005, + "step": 389100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.659124851226807, + "loss_rtd": 0.18338139355182648, + "loss_sent": 0.030780743807554245, + "loss_sod": 0.12981653213500977, + "loss_total": 0.3439786434173584, + "step": 389199 + }, + { + "epoch": 0.006398, + "loss_gen": 6.135606288909912, + "loss_rtd": 0.2033698856830597, + "loss_sent": 0.1476442962884903, + "loss_sod": 0.08842173963785172, + "loss_total": 0.4394358992576599, + "step": 389199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.0673482418060303, + "learning_rate": 1.174322047181542e-07, + "loss": 0.3953, + "step": 389200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.762836933135986, + "loss_rtd": 0.18234577775001526, + "loss_sent": 0.1359056979417801, + "loss_sod": 0.07956341654062271, + "loss_total": 0.39781486988067627, + "step": 389299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.6690497398376465, + "loss_rtd": 0.21876993775367737, + "loss_sent": 0.15573304891586304, + "loss_sod": 0.03257669508457184, + "loss_total": 0.40707969665527344, + "step": 389299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.1443850994110107, + "learning_rate": 1.1526863366800311e-07, + "loss": 0.4184, + "step": 389300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.593442916870117, + "loss_rtd": 0.16900567710399628, + "loss_sent": 2.23645238293102e-05, + "loss_sod": 0.13190466165542603, + "loss_total": 0.30093270540237427, + "step": 389399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.429333209991455, + "loss_rtd": 0.16373611986637115, + "loss_sent": 2.240178218926303e-05, + "loss_sod": 0.07734975218772888, + "loss_total": 0.2411082684993744, + "step": 389399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.9363405108451843, + "learning_rate": 1.1312515618168151e-07, + "loss": 0.4081, + "step": 389400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.979067802429199, + "loss_rtd": 0.21712931990623474, + "loss_sent": 0.09526192396879196, + "loss_sod": 0.10427667945623398, + "loss_total": 0.4166679382324219, + "step": 389499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.794955730438232, + "loss_rtd": 0.19414635002613068, + "loss_sent": 0.4644427001476288, + "loss_sod": 0.1854114681482315, + "loss_total": 0.844000518321991, + "step": 389499 + }, + { + "epoch": 0.007, + "grad_norm": 1.9044196605682373, + "learning_rate": 1.1100177312258209e-07, + "loss": 0.4137, + "step": 389500 + }, + { + "epoch": 0.007198, + "loss_gen": 6.104918479919434, + "loss_rtd": 0.20259903371334076, + "loss_sent": 0.13526050746440887, + "loss_sod": 0.012019608169794083, + "loss_total": 0.3498791456222534, + "step": 389599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.950196743011475, + "loss_rtd": 0.22066274285316467, + "loss_sent": 0.17763370275497437, + "loss_sod": 0.008551598526537418, + "loss_total": 0.4068480432033539, + "step": 389599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.5089385509490967, + "learning_rate": 1.0889848534599845e-07, + "loss": 0.4315, + "step": 389600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.721614360809326, + "loss_rtd": 0.21245476603507996, + "loss_sent": 0.2959735095500946, + "loss_sod": 0.014893703162670135, + "loss_total": 0.5233219861984253, + "step": 389699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.780533790588379, + "loss_rtd": 0.20026029646396637, + "loss_sent": 0.20466534793376923, + "loss_sod": 0.010142795741558075, + "loss_total": 0.41506844758987427, + "step": 389699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.5230454206466675, + "learning_rate": 1.0681529369914179e-07, + "loss": 0.4087, + "step": 389700 + }, + { + "epoch": 0.007598, + "loss_gen": 6.072732448577881, + "loss_rtd": 0.21685031056404114, + "loss_sent": 0.16334985196590424, + "loss_sod": 0.018847428262233734, + "loss_total": 0.3990476131439209, + "step": 389799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.786645889282227, + "loss_rtd": 0.18284183740615845, + "loss_sent": 0.20970797538757324, + "loss_sod": 0.011413590982556343, + "loss_total": 0.4039633870124817, + "step": 389799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.8005570769309998, + "learning_rate": 1.0475219902111311e-07, + "loss": 0.4078, + "step": 389800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.708725929260254, + "loss_rtd": 0.19107985496520996, + "loss_sent": 0.46121594309806824, + "loss_sod": 0.007444228045642376, + "loss_total": 0.6597400307655334, + "step": 389899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.856007099151611, + "loss_rtd": 0.20543049275875092, + "loss_sent": 0.16584698855876923, + "loss_sod": 0.008434868417680264, + "loss_total": 0.3797123432159424, + "step": 389899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.8368874788284302, + "learning_rate": 1.0270920214293656e-07, + "loss": 0.3981, + "step": 389900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.760108947753906, + "loss_rtd": 0.2410728484392166, + "loss_sent": 0.060165468603372574, + "loss_sod": 0.015030169859528542, + "loss_total": 0.3162684738636017, + "step": 389999 + }, + { + "epoch": 0.007998, + "loss_gen": 6.092087268829346, + "loss_rtd": 0.18792934715747833, + "loss_sent": 0.1581990271806717, + "loss_sod": 0.020702190697193146, + "loss_total": 0.36683058738708496, + "step": 389999 + }, + { + "epoch": 0.008, + "grad_norm": 0.7414451837539673, + "learning_rate": 1.0068630388752609e-07, + "loss": 0.4129, + "step": 390000 + }, + { + "epoch": 0.008, + "eval_loss": 0.39498767256736755, + "eval_runtime": 149.8397, + "eval_samples_per_second": 103.063, + "eval_steps_per_second": 0.808, + "step": 390000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.884185791015625, + "loss_rtd": 0.20744697749614716, + "loss_sent": 0.19164077937602997, + "loss_sod": 0.01973988302052021, + "loss_total": 0.4188276529312134, + "step": 390099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.681702613830566, + "loss_rtd": 0.22216004133224487, + "loss_sent": 0.12108003348112106, + "loss_sod": 0.06146732717752457, + "loss_total": 0.4047074019908905, + "step": 390099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.1543318033218384, + "learning_rate": 9.868350506970215e-08, + "loss": 0.4175, + "step": 390100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.980822563171387, + "loss_rtd": 0.2021224945783615, + "loss_sent": 0.3701927363872528, + "loss_sod": 0.01370286662131548, + "loss_total": 0.5860180854797363, + "step": 390199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.838639736175537, + "loss_rtd": 0.21277514100074768, + "loss_sent": 0.3194020688533783, + "loss_sod": 0.022717181593179703, + "loss_total": 0.5548943877220154, + "step": 390199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.0459808111190796, + "learning_rate": 9.670080649619717e-08, + "loss": 0.4047, + "step": 390200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.3288044929504395, + "loss_rtd": 0.17446598410606384, + "loss_sent": 2.305740963493008e-05, + "loss_sod": 0.059867922216653824, + "loss_total": 0.23435695469379425, + "step": 390299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.603953838348389, + "loss_rtd": 0.1641068011522293, + "loss_sent": 0.09302457422018051, + "loss_sod": 0.07938657701015472, + "loss_total": 0.33651795983314514, + "step": 390299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.8998137712478638, + "learning_rate": 9.473820896564456e-08, + "loss": 0.427, + "step": 390300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.830483913421631, + "loss_rtd": 0.2241693139076233, + "loss_sent": 0.3471631407737732, + "loss_sod": 0.005685629788786173, + "loss_total": 0.5770180821418762, + "step": 390399 + }, + { + "epoch": 0.008798, + "loss_gen": 6.0032854080200195, + "loss_rtd": 0.22199895977973938, + "loss_sent": 0.20922960340976715, + "loss_sod": 0.0637601763010025, + "loss_total": 0.49498873949050903, + "step": 390399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.4937852621078491, + "learning_rate": 9.27957132685675e-08, + "loss": 0.3951, + "step": 390400 + }, + { + "epoch": 0.008998, + "loss_gen": 6.0157470703125, + "loss_rtd": 0.2011171281337738, + "loss_sent": 0.2135574072599411, + "loss_sod": 0.003952609375119209, + "loss_total": 0.41862714290618896, + "step": 390499 + }, + { + "epoch": 0.008998, + "loss_gen": 6.025473117828369, + "loss_rtd": 0.2084123194217682, + "loss_sent": 0.30119219422340393, + "loss_sod": 0.012695659883320332, + "loss_total": 0.5223001837730408, + "step": 390499 + }, + { + "epoch": 0.009, + "grad_norm": 0.8088017702102661, + "learning_rate": 9.08733201874068e-08, + "loss": 0.4175, + "step": 390500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.977158546447754, + "loss_rtd": 0.20262813568115234, + "loss_sent": 0.2567712962627411, + "loss_sod": 0.06708398461341858, + "loss_total": 0.526483416557312, + "step": 390599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.823272705078125, + "loss_rtd": 0.21129167079925537, + "loss_sent": 0.08319632709026337, + "loss_sod": 0.1491374969482422, + "loss_total": 0.4436255097389221, + "step": 390599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.7465977668762207, + "learning_rate": 8.897103049650412e-08, + "loss": 0.3949, + "step": 390600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.868819236755371, + "loss_rtd": 0.214304119348526, + "loss_sent": 0.14899778366088867, + "loss_sod": 0.02877265214920044, + "loss_total": 0.3920745551586151, + "step": 390699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.775680065155029, + "loss_rtd": 0.23000310361385345, + "loss_sent": 0.06425362825393677, + "loss_sod": 0.10222698003053665, + "loss_total": 0.39648371934890747, + "step": 390699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.050985336303711, + "learning_rate": 8.708884496210768e-08, + "loss": 0.4117, + "step": 390700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.170206069946289, + "loss_rtd": 0.17056401073932648, + "loss_sent": 2.2412956241169013e-05, + "loss_sod": 0.08258362114429474, + "loss_total": 0.25317004323005676, + "step": 390799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.413459300994873, + "loss_rtd": 0.16320635378360748, + "loss_sent": 0.06688027083873749, + "loss_sod": 0.09646856784820557, + "loss_total": 0.32655519247055054, + "step": 390799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.9087501168251038, + "learning_rate": 8.522676434234989e-08, + "loss": 0.4019, + "step": 390800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.793584823608398, + "loss_rtd": 0.22636756300926208, + "loss_sent": 0.12923869490623474, + "loss_sod": 0.022730231285095215, + "loss_total": 0.37833648920059204, + "step": 390899 + }, + { + "epoch": 0.009798, + "loss_gen": 6.046942234039307, + "loss_rtd": 0.21500037610530853, + "loss_sent": 0.42974740266799927, + "loss_sod": 0.08335862308740616, + "loss_total": 0.7281063795089722, + "step": 390899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.06930673122406, + "learning_rate": 8.338478938728634e-08, + "loss": 0.4044, + "step": 390900 + }, + { + "epoch": 0.009998, + "loss_gen": 6.144214153289795, + "loss_rtd": 0.20340508222579956, + "loss_sent": 0.08943892270326614, + "loss_sod": 0.03819578140974045, + "loss_total": 0.33103978633880615, + "step": 390999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.959928512573242, + "loss_rtd": 0.2147907316684723, + "loss_sent": 0.08194345980882645, + "loss_sod": 0.08400901407003403, + "loss_total": 0.38074320554733276, + "step": 390999 + }, + { + "epoch": 0.01, + "grad_norm": 0.8810135722160339, + "learning_rate": 8.156292083885686e-08, + "loss": 0.4213, + "step": 391000 + }, + { + "epoch": 0.01, + "eval_loss": 0.389736145734787, + "eval_runtime": 150.3148, + "eval_samples_per_second": 102.738, + "eval_steps_per_second": 0.805, + "step": 391000 + }, + { + "epoch": 0.010198, + "loss_gen": 6.116897106170654, + "loss_rtd": 0.19888576865196228, + "loss_sent": 0.4463501274585724, + "loss_sod": 0.0318634957075119, + "loss_total": 0.6770994067192078, + "step": 391099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.774707794189453, + "loss_rtd": 0.2125256359577179, + "loss_sent": 0.07201242446899414, + "loss_sod": 0.03535296395421028, + "loss_total": 0.3198910355567932, + "step": 391099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.494367003440857, + "learning_rate": 7.976115943091334e-08, + "loss": 0.4085, + "step": 391100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.748145580291748, + "loss_rtd": 0.2154005765914917, + "loss_sent": 0.6336947679519653, + "loss_sod": 0.028976723551750183, + "loss_total": 0.8780720829963684, + "step": 391199 + }, + { + "epoch": 0.010398, + "loss_gen": 6.1587748527526855, + "loss_rtd": 0.21844086050987244, + "loss_sent": 0.5092006921768188, + "loss_sod": 0.09236706048250198, + "loss_total": 0.8200086355209351, + "step": 391199 + }, + { + "epoch": 0.0104, + "grad_norm": 2.631178617477417, + "learning_rate": 7.797950588920855e-08, + "loss": 0.4198, + "step": 391200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.9071364402771, + "loss_rtd": 0.21475382149219513, + "loss_sent": 0.10161326825618744, + "loss_sod": 0.10331739485263824, + "loss_total": 0.4196844696998596, + "step": 391299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.329215049743652, + "loss_rtd": 0.17051318287849426, + "loss_sent": 0.00010216770169790834, + "loss_sod": 0.024984223768115044, + "loss_total": 0.19559957087039948, + "step": 391299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9734504222869873, + "learning_rate": 7.621796093138511e-08, + "loss": 0.4274, + "step": 391300 + }, + { + "epoch": 0.010798, + "loss_gen": 6.007766246795654, + "loss_rtd": 0.20674045383930206, + "loss_sent": 0.043503355234861374, + "loss_sod": 0.0852990448474884, + "loss_total": 0.33554285764694214, + "step": 391399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.994940757751465, + "loss_rtd": 0.20298358798027039, + "loss_sent": 0.19987483322620392, + "loss_sod": 0.015618769451975822, + "loss_total": 0.4184771776199341, + "step": 391399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.005513310432434, + "learning_rate": 7.447652526699766e-08, + "loss": 0.4158, + "step": 391400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.739851951599121, + "loss_rtd": 0.20979925990104675, + "loss_sent": 0.16687257587909698, + "loss_sod": 0.02306627482175827, + "loss_total": 0.3997381329536438, + "step": 391499 + }, + { + "epoch": 0.010998, + "loss_gen": 6.118026256561279, + "loss_rtd": 0.22426855564117432, + "loss_sent": 0.2071017622947693, + "loss_sod": 0.040315523743629456, + "loss_total": 0.47168582677841187, + "step": 391499 + }, + { + "epoch": 0.011, + "grad_norm": 0.8095478415489197, + "learning_rate": 7.275519959749622e-08, + "loss": 0.4192, + "step": 391500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.163768291473389, + "loss_rtd": 0.15256644785404205, + "loss_sent": 2.355658943997696e-05, + "loss_sod": 0.03270483762025833, + "loss_total": 0.18529485166072845, + "step": 391599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.399328708648682, + "loss_rtd": 0.17275498807430267, + "loss_sent": 0.003234855132177472, + "loss_sod": 0.04913503676652908, + "loss_total": 0.22512488067150116, + "step": 391599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.6801968812942505, + "learning_rate": 7.105398461623169e-08, + "loss": 0.4044, + "step": 391600 + }, + { + "epoch": 0.011398, + "loss_gen": 6.149866580963135, + "loss_rtd": 0.2096904069185257, + "loss_sent": 0.21248237788677216, + "loss_sod": 0.052042677998542786, + "loss_total": 0.47421544790267944, + "step": 391699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.65095853805542, + "loss_rtd": 0.18483102321624756, + "loss_sent": 0.049149829894304276, + "loss_sod": 0.01666828989982605, + "loss_total": 0.2506491541862488, + "step": 391699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.081852674484253, + "learning_rate": 6.937288100844485e-08, + "loss": 0.4074, + "step": 391700 + }, + { + "epoch": 0.011598, + "loss_gen": 6.197673797607422, + "loss_rtd": 0.2072892040014267, + "loss_sent": 0.010362375527620316, + "loss_sod": 0.15921708941459656, + "loss_total": 0.37686866521835327, + "step": 391799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.257237434387207, + "loss_rtd": 0.16337601840496063, + "loss_sent": 2.4078102796920575e-05, + "loss_sod": 0.06838693469762802, + "loss_total": 0.23178702592849731, + "step": 391799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.020723819732666, + "learning_rate": 6.771188945129958e-08, + "loss": 0.4117, + "step": 391800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.401880741119385, + "loss_rtd": 0.15832042694091797, + "loss_sent": 0.002661922248080373, + "loss_sod": 0.049096040427684784, + "loss_total": 0.2100784033536911, + "step": 391899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.75095796585083, + "loss_rtd": 0.2094363421201706, + "loss_sent": 0.16649065911769867, + "loss_sod": 0.07074245810508728, + "loss_total": 0.44666945934295654, + "step": 391899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.9183186888694763, + "learning_rate": 6.607101061382737e-08, + "loss": 0.4106, + "step": 391900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.547502040863037, + "loss_rtd": 0.18443872034549713, + "loss_sent": 0.08254744112491608, + "loss_sod": 0.013162685558199883, + "loss_total": 0.28014886379241943, + "step": 391999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.738937854766846, + "loss_rtd": 0.20439235866069794, + "loss_sent": 0.30122336745262146, + "loss_sod": 0.04113311320543289, + "loss_total": 0.5467488765716553, + "step": 391999 + }, + { + "epoch": 0.012, + "grad_norm": 0.9484466910362244, + "learning_rate": 6.445024515698284e-08, + "loss": 0.4061, + "step": 392000 + }, + { + "epoch": 0.012, + "eval_loss": 0.40118321776390076, + "eval_runtime": 149.9632, + "eval_samples_per_second": 102.979, + "eval_steps_per_second": 0.807, + "step": 392000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.28818941116333, + "loss_rtd": 0.20023567974567413, + "loss_sent": 0.2177213877439499, + "loss_sod": 0.07984787225723267, + "loss_total": 0.4978049397468567, + "step": 392099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.625298023223877, + "loss_rtd": 0.18772730231285095, + "loss_sent": 2.5435860152356327e-05, + "loss_sod": 0.07460042834281921, + "loss_total": 0.26235315203666687, + "step": 392099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.8974661231040955, + "learning_rate": 6.284959373360489e-08, + "loss": 0.4146, + "step": 392100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.831862449645996, + "loss_rtd": 0.24395421147346497, + "loss_sent": 0.17839743196964264, + "loss_sod": 0.07743652909994125, + "loss_total": 0.49978816509246826, + "step": 392199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.410398006439209, + "loss_rtd": 0.18935169279575348, + "loss_sent": 0.0018181510968133807, + "loss_sod": 0.024343883618712425, + "loss_total": 0.21551372110843658, + "step": 392199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.2036210298538208, + "learning_rate": 6.126905698843887e-08, + "loss": 0.4086, + "step": 392200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.893172264099121, + "loss_rtd": 0.2119544893503189, + "loss_sent": 0.14370910823345184, + "loss_sod": 0.043216902762651443, + "loss_total": 0.3988804817199707, + "step": 392299 + }, + { + "epoch": 0.012598, + "loss_gen": 6.108611106872559, + "loss_rtd": 0.21778523921966553, + "loss_sent": 0.09171459078788757, + "loss_sod": 0.12346639484167099, + "loss_total": 0.4329662322998047, + "step": 392299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.3098056316375732, + "learning_rate": 5.970863555812555e-08, + "loss": 0.4117, + "step": 392300 + }, + { + "epoch": 0.012798, + "loss_gen": 6.1655192375183105, + "loss_rtd": 0.21565361320972443, + "loss_sent": 0.19704580307006836, + "loss_sod": 0.05005672574043274, + "loss_total": 0.4627561569213867, + "step": 392399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.855915069580078, + "loss_rtd": 0.18138495087623596, + "loss_sent": 0.2532788813114166, + "loss_sod": 0.031894732266664505, + "loss_total": 0.466558575630188, + "step": 392399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.983752965927124, + "learning_rate": 5.816833007120659e-08, + "loss": 0.4136, + "step": 392400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.982431411743164, + "loss_rtd": 0.19362428784370422, + "loss_sent": 0.1276284009218216, + "loss_sod": 0.018778566271066666, + "loss_total": 0.3400312662124634, + "step": 392499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.719311237335205, + "loss_rtd": 0.20908941328525543, + "loss_sent": 0.2693195939064026, + "loss_sod": 0.017360523343086243, + "loss_total": 0.49576953053474426, + "step": 392499 + }, + { + "epoch": 0.013, + "grad_norm": 1.3150999546051025, + "learning_rate": 5.66481411481079e-08, + "loss": 0.4242, + "step": 392500 + }, + { + "epoch": 0.013198, + "loss_gen": 6.014215469360352, + "loss_rtd": 0.20960800349712372, + "loss_sent": 0.2963671088218689, + "loss_sod": 0.06170869618654251, + "loss_total": 0.5676838159561157, + "step": 392599 + }, + { + "epoch": 0.013198, + "loss_gen": 6.473970890045166, + "loss_rtd": 0.19976429641246796, + "loss_sent": 0.1299380362033844, + "loss_sod": 0.07771517336368561, + "loss_total": 0.40741750597953796, + "step": 392599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0788829326629639, + "learning_rate": 5.514806940116191e-08, + "loss": 0.4149, + "step": 392600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.213524341583252, + "loss_rtd": 0.15696687996387482, + "loss_sent": 2.4247579858638346e-05, + "loss_sod": 0.0948181077837944, + "loss_total": 0.2518092393875122, + "step": 392699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.6687397956848145, + "loss_rtd": 0.19349080324172974, + "loss_sent": 0.07878529280424118, + "loss_sod": 0.01941695623099804, + "loss_total": 0.2916930615901947, + "step": 392699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.054790735244751, + "learning_rate": 5.366811543460748e-08, + "loss": 0.4295, + "step": 392700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.832163333892822, + "loss_rtd": 0.20374974608421326, + "loss_sent": 0.10759253799915314, + "loss_sod": 0.04071095958352089, + "loss_total": 0.3520532250404358, + "step": 392799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.9442877769470215, + "loss_rtd": 0.18590156733989716, + "loss_sent": 0.08245570957660675, + "loss_sod": 0.021068645641207695, + "loss_total": 0.28942590951919556, + "step": 392799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.20208740234375, + "learning_rate": 5.220827984455667e-08, + "loss": 0.3783, + "step": 392800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.723763942718506, + "loss_rtd": 0.23243838548660278, + "loss_sent": 0.045534372329711914, + "loss_sod": 0.03477635234594345, + "loss_total": 0.31274911761283875, + "step": 392899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.849414348602295, + "loss_rtd": 0.211965411901474, + "loss_sent": 0.16320006549358368, + "loss_sod": 0.058151278644800186, + "loss_total": 0.43331676721572876, + "step": 392899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.233581781387329, + "learning_rate": 5.0768563219044665e-08, + "loss": 0.4183, + "step": 392900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.444845676422119, + "loss_rtd": 0.18386173248291016, + "loss_sent": 0.00011939453543163836, + "loss_sod": 0.11187899112701416, + "loss_total": 0.2958601117134094, + "step": 392999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.19231653213501, + "loss_rtd": 0.1738746613264084, + "loss_sent": 2.268303433083929e-05, + "loss_sod": 0.14518167078495026, + "loss_total": 0.3190790116786957, + "step": 392999 + }, + { + "epoch": 0.014, + "grad_norm": 1.4038021564483643, + "learning_rate": 4.934896613797424e-08, + "loss": 0.4145, + "step": 393000 + }, + { + "epoch": 0.014, + "eval_loss": 0.39626842737197876, + "eval_runtime": 150.164, + "eval_samples_per_second": 102.841, + "eval_steps_per_second": 0.806, + "step": 393000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.95033597946167, + "loss_rtd": 0.1983281970024109, + "loss_sent": 0.647224485874176, + "loss_sod": 0.04138145223259926, + "loss_total": 0.8869341611862183, + "step": 393099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.889521598815918, + "loss_rtd": 0.1870291531085968, + "loss_sent": 0.07497472316026688, + "loss_sod": 0.06532853096723557, + "loss_total": 0.32733240723609924, + "step": 393099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.5091347694396973, + "learning_rate": 4.794948917317132e-08, + "loss": 0.4209, + "step": 393100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.345975399017334, + "loss_rtd": 0.17086759209632874, + "loss_sent": 0.05822031572461128, + "loss_sod": 0.04950503259897232, + "loss_total": 0.27859294414520264, + "step": 393199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.704697608947754, + "loss_rtd": 0.2205292135477066, + "loss_sent": 0.036288071423769, + "loss_sod": 0.06426063925027847, + "loss_total": 0.32107794284820557, + "step": 393199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.9023429155349731, + "learning_rate": 4.6570132888340556e-08, + "loss": 0.4216, + "step": 393200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.873831748962402, + "loss_rtd": 0.20705078542232513, + "loss_sent": 0.20871666073799133, + "loss_sod": 0.04147268086671829, + "loss_total": 0.45724013447761536, + "step": 393299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.4875288009643555, + "loss_rtd": 0.1822550743818283, + "loss_sent": 0.007055058144032955, + "loss_sod": 0.01906927488744259, + "loss_total": 0.20837940275669098, + "step": 393299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.661662757396698, + "learning_rate": 4.52108978390875e-08, + "loss": 0.402, + "step": 393300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.859102725982666, + "loss_rtd": 0.22546504437923431, + "loss_sent": 0.0710543766617775, + "loss_sod": 0.029986320063471794, + "loss_total": 0.32650572061538696, + "step": 393399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.5915303230285645, + "loss_rtd": 0.16614660620689392, + "loss_sent": 0.028622141107916832, + "loss_sod": 0.11509465426206589, + "loss_total": 0.309863418340683, + "step": 393399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.8462416529655457, + "learning_rate": 4.3871784572907524e-08, + "loss": 0.4071, + "step": 393400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.232621669769287, + "loss_rtd": 0.17480048537254333, + "loss_sent": 0.032950110733509064, + "loss_sod": 0.013565173372626305, + "loss_total": 0.22131577134132385, + "step": 393499 + }, + { + "epoch": 0.014998, + "loss_gen": 6.006125450134277, + "loss_rtd": 0.20174795389175415, + "loss_sent": 0.4658416509628296, + "loss_sod": 0.004407984670251608, + "loss_total": 0.671997606754303, + "step": 393499 + }, + { + "epoch": 0.015, + "grad_norm": 1.8555643558502197, + "learning_rate": 4.2552793629202504e-08, + "loss": 0.4105, + "step": 393500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.7444353103637695, + "loss_rtd": 0.21366789937019348, + "loss_sent": 0.3370654881000519, + "loss_sod": 0.021276114508509636, + "loss_total": 0.5720095038414001, + "step": 393599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.987033367156982, + "loss_rtd": 0.22995273768901825, + "loss_sent": 0.11565449088811874, + "loss_sod": 0.02097945846617222, + "loss_total": 0.36658668518066406, + "step": 393599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.9959455728530884, + "learning_rate": 4.1253925539253003e-08, + "loss": 0.4147, + "step": 393600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.640103340148926, + "loss_rtd": 0.20078890025615692, + "loss_sent": 0.006867082789540291, + "loss_sod": 0.09368519484996796, + "loss_total": 0.30134117603302, + "step": 393699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.250030994415283, + "loss_rtd": 0.16638965904712677, + "loss_sent": 0.00990522000938654, + "loss_sod": 0.05199884995818138, + "loss_total": 0.22829373180866241, + "step": 393699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.8872623443603516, + "learning_rate": 3.9975180826251626e-08, + "loss": 0.4305, + "step": 393700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.638567924499512, + "loss_rtd": 0.19004599750041962, + "loss_sent": 0.28857457637786865, + "loss_sod": 0.04916493967175484, + "loss_total": 0.5277855396270752, + "step": 393799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.700003147125244, + "loss_rtd": 0.22980214655399323, + "loss_sent": 0.15928246080875397, + "loss_sod": 0.01416728924959898, + "loss_total": 0.40325188636779785, + "step": 393799 + }, + { + "epoch": 0.0156, + "grad_norm": 2.3052096366882324, + "learning_rate": 3.8716560005269684e-08, + "loss": 0.4061, + "step": 393800 + }, + { + "epoch": 0.015798, + "loss_gen": 6.030062198638916, + "loss_rtd": 0.2230379283428192, + "loss_sent": 0.20075556635856628, + "loss_sod": 0.05416977405548096, + "loss_total": 0.47796326875686646, + "step": 393899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.665853977203369, + "loss_rtd": 0.22915604710578918, + "loss_sent": 0.2098565250635147, + "loss_sod": 0.008518276736140251, + "loss_total": 0.4475308656692505, + "step": 393899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.8969384431838989, + "learning_rate": 3.747806358328498e-08, + "loss": 0.4395, + "step": 393900 + }, + { + "epoch": 0.015998, + "loss_gen": 6.154298305511475, + "loss_rtd": 0.2080710530281067, + "loss_sent": 0.13580961525440216, + "loss_sod": 0.12290502339601517, + "loss_total": 0.4667856693267822, + "step": 393999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.941771507263184, + "loss_rtd": 0.22374530136585236, + "loss_sent": 0.1344057023525238, + "loss_sod": 0.06482969224452972, + "loss_total": 0.4229806959629059, + "step": 393999 + }, + { + "epoch": 0.016, + "grad_norm": 0.9252235293388367, + "learning_rate": 3.6259692059159576e-08, + "loss": 0.401, + "step": 394000 + }, + { + "epoch": 0.016, + "eval_loss": 0.39670079946517944, + "eval_runtime": 150.1918, + "eval_samples_per_second": 102.822, + "eval_steps_per_second": 0.806, + "step": 394000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.955575942993164, + "loss_rtd": 0.21677181124687195, + "loss_sent": 0.352148175239563, + "loss_sod": 0.018472759053111076, + "loss_total": 0.5873927474021912, + "step": 394099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.223123550415039, + "loss_rtd": 0.17382578551769257, + "loss_sent": 2.585828406154178e-05, + "loss_sod": 0.13978484272956848, + "loss_total": 0.3136364817619324, + "step": 394099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.3951376676559448, + "learning_rate": 3.506144592365645e-08, + "loss": 0.4154, + "step": 394100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.602262020111084, + "loss_rtd": 0.22612063586711884, + "loss_sent": 0.2637924551963806, + "loss_sod": 0.003283230122178793, + "loss_total": 0.4931963086128235, + "step": 394199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.5274152755737305, + "loss_rtd": 0.19322897493839264, + "loss_sent": 0.024348091334104538, + "loss_sod": 0.04663927108049393, + "loss_total": 0.2642163336277008, + "step": 394199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.7509187459945679, + "learning_rate": 3.3883325659428425e-08, + "loss": 0.3897, + "step": 394200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.995658874511719, + "loss_rtd": 0.2020183950662613, + "loss_sent": 0.04633113741874695, + "loss_sod": 0.07576620578765869, + "loss_total": 0.32411572337150574, + "step": 394299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.280943870544434, + "loss_rtd": 0.18137438595294952, + "loss_sent": 2.3182210497907363e-05, + "loss_sod": 0.126439169049263, + "loss_total": 0.30783674120903015, + "step": 394299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.3717312812805176, + "learning_rate": 3.272533174102366e-08, + "loss": 0.4239, + "step": 394300 + }, + { + "epoch": 0.016798, + "loss_gen": 6.185173511505127, + "loss_rtd": 0.21150723099708557, + "loss_sent": 0.03277455270290375, + "loss_sod": 0.04609215632081032, + "loss_total": 0.29037392139434814, + "step": 394399 + }, + { + "epoch": 0.016798, + "loss_gen": 6.213319778442383, + "loss_rtd": 0.2253490686416626, + "loss_sent": 0.1608581840991974, + "loss_sod": 0.02025735192000866, + "loss_total": 0.4064646065235138, + "step": 394399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.6647672057151794, + "learning_rate": 3.1587464634874606e-08, + "loss": 0.4042, + "step": 394400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.9767537117004395, + "loss_rtd": 0.19369633495807648, + "loss_sent": 0.1807086169719696, + "loss_sod": 0.020512813702225685, + "loss_total": 0.3949177861213684, + "step": 394499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.709785461425781, + "loss_rtd": 0.1785837709903717, + "loss_sent": 0.06159829720854759, + "loss_sod": 0.01736762747168541, + "loss_total": 0.2575497031211853, + "step": 394499 + }, + { + "epoch": 0.017, + "grad_norm": 0.7903895974159241, + "learning_rate": 3.0469724799320196e-08, + "loss": 0.4243, + "step": 394500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.914492130279541, + "loss_rtd": 0.19854649901390076, + "loss_sent": 0.08682210743427277, + "loss_sod": 0.042485035955905914, + "loss_total": 0.32785362005233765, + "step": 394599 + }, + { + "epoch": 0.017198, + "loss_gen": 6.035208702087402, + "loss_rtd": 0.21304945647716522, + "loss_sent": 0.20649196207523346, + "loss_sod": 0.017722059041261673, + "loss_total": 0.43726348876953125, + "step": 394599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.6683346033096313, + "learning_rate": 2.937211268458917e-08, + "loss": 0.4165, + "step": 394600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.919938087463379, + "loss_rtd": 0.20923687517642975, + "loss_sent": 0.27039802074432373, + "loss_sod": 0.06865854561328888, + "loss_total": 0.5482934713363647, + "step": 394699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.799861907958984, + "loss_rtd": 0.21269604563713074, + "loss_sent": 0.044693950563669205, + "loss_sod": 0.019913161173462868, + "loss_total": 0.27730315923690796, + "step": 394699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.3340352773666382, + "learning_rate": 2.8294628732788985e-08, + "loss": 0.4298, + "step": 394700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.700779914855957, + "loss_rtd": 0.1747732162475586, + "loss_sent": 0.048937201499938965, + "loss_sod": 0.04359501227736473, + "loss_total": 0.2673054337501526, + "step": 394799 + }, + { + "epoch": 0.017598, + "loss_gen": 6.062201499938965, + "loss_rtd": 0.22227883338928223, + "loss_sent": 0.3737484812736511, + "loss_sod": 0.011067884042859077, + "loss_total": 0.6070951819419861, + "step": 394799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.9948114156723022, + "learning_rate": 2.7237273377944684e-08, + "loss": 0.4083, + "step": 394800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.300630569458008, + "loss_rtd": 0.1731356531381607, + "loss_sent": 0.00145197962410748, + "loss_sod": 0.07960737496614456, + "loss_total": 0.2541950047016144, + "step": 394899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.130007266998291, + "loss_rtd": 0.17352192103862762, + "loss_sent": 2.2366391931427643e-05, + "loss_sod": 0.09852245450019836, + "loss_total": 0.27206671237945557, + "step": 394899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.9013128876686096, + "learning_rate": 2.6200047045943366e-08, + "loss": 0.406, + "step": 394900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.662559509277344, + "loss_rtd": 0.22165432572364807, + "loss_sent": 0.13525241613388062, + "loss_sod": 0.027606867253780365, + "loss_total": 0.38451361656188965, + "step": 394999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.6816792488098145, + "loss_rtd": 0.2266261875629425, + "loss_sent": 0.4238015115261078, + "loss_sod": 0.004062837455421686, + "loss_total": 0.6544905304908752, + "step": 394999 + }, + { + "epoch": 0.018, + "grad_norm": 1.2733234167099, + "learning_rate": 2.5182950154589712e-08, + "loss": 0.411, + "step": 395000 + }, + { + "epoch": 0.018, + "eval_loss": 0.3975522518157959, + "eval_runtime": 150.0529, + "eval_samples_per_second": 102.917, + "eval_steps_per_second": 0.806, + "step": 395000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.792178630828857, + "loss_rtd": 0.21859611570835114, + "loss_sent": 0.13951151072978973, + "loss_sod": 0.05840788781642914, + "loss_total": 0.4165155291557312, + "step": 395099 + }, + { + "epoch": 0.018198, + "loss_gen": 6.0479888916015625, + "loss_rtd": 0.23042891919612885, + "loss_sent": 0.20273616909980774, + "loss_sod": 0.0860833078622818, + "loss_total": 0.519248366355896, + "step": 395099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.0764565467834473, + "learning_rate": 2.4185983113567124e-08, + "loss": 0.428, + "step": 395100 + }, + { + "epoch": 0.018398, + "loss_gen": 6.170469760894775, + "loss_rtd": 0.24229945242404938, + "loss_sent": 0.7802306413650513, + "loss_sod": 0.023285791277885437, + "loss_total": 1.0458159446716309, + "step": 395199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.718085765838623, + "loss_rtd": 0.184598907828331, + "loss_sent": 0.26514795422554016, + "loss_sod": 0.03541194647550583, + "loss_total": 0.4851588010787964, + "step": 395199 + }, + { + "epoch": 0.0184, + "grad_norm": 2.733649492263794, + "learning_rate": 2.320914632445437e-08, + "loss": 0.4093, + "step": 395200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.969252109527588, + "loss_rtd": 0.20024250447750092, + "loss_sent": 0.32519903779029846, + "loss_sod": 0.0019377648131921887, + "loss_total": 0.5273792743682861, + "step": 395299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.480463981628418, + "loss_rtd": 0.18705987930297852, + "loss_sent": 0.034322500228881836, + "loss_sod": 0.06096341088414192, + "loss_total": 0.2823457717895508, + "step": 395299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.9037191271781921, + "learning_rate": 2.2252440180720036e-08, + "loss": 0.4011, + "step": 395300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.389925956726074, + "loss_rtd": 0.17318344116210938, + "loss_sent": 0.0014120283303782344, + "loss_sod": 0.11518971621990204, + "loss_total": 0.2897852063179016, + "step": 395399 + }, + { + "epoch": 0.018798, + "loss_gen": 6.047664642333984, + "loss_rtd": 0.2042461335659027, + "loss_sent": 0.1126847043633461, + "loss_sod": 0.06417940557003021, + "loss_total": 0.38111022114753723, + "step": 395399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.989812970161438, + "learning_rate": 2.131586506772254e-08, + "loss": 0.4059, + "step": 395400 + }, + { + "epoch": 0.018998, + "loss_gen": 6.207770347595215, + "loss_rtd": 0.21371395885944366, + "loss_sent": 0.3242473304271698, + "loss_sod": 0.06220316141843796, + "loss_total": 0.6001644134521484, + "step": 395499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.763589382171631, + "loss_rtd": 0.1895914524793625, + "loss_sent": 0.1785668432712555, + "loss_sod": 0.03489559143781662, + "loss_total": 0.403053879737854, + "step": 395499 + }, + { + "epoch": 0.019, + "grad_norm": 0.7213029265403748, + "learning_rate": 2.0399421362721215e-08, + "loss": 0.4095, + "step": 395500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.1523661613464355, + "loss_rtd": 0.1546034961938858, + "loss_sent": 0.018785694614052773, + "loss_sod": 0.037261370569467545, + "loss_total": 0.21065056324005127, + "step": 395599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.888722896575928, + "loss_rtd": 0.22429607808589935, + "loss_sent": 0.05058183893561363, + "loss_sod": 0.0337141752243042, + "loss_total": 0.3085921108722687, + "step": 395599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.6930906176567078, + "learning_rate": 1.950310943485967e-08, + "loss": 0.404, + "step": 395600 + }, + { + "epoch": 0.019398, + "loss_gen": 6.158482074737549, + "loss_rtd": 0.21151158213615417, + "loss_sent": 0.10480982065200806, + "loss_sod": 0.10025284439325333, + "loss_total": 0.41657423973083496, + "step": 395699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.929201602935791, + "loss_rtd": 0.19908545911312103, + "loss_sent": 0.08760825544595718, + "loss_sod": 0.04331972822546959, + "loss_total": 0.3300134539604187, + "step": 395699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.2213667631149292, + "learning_rate": 1.862692964516022e-08, + "loss": 0.4134, + "step": 395700 + }, + { + "epoch": 0.019598, + "loss_gen": 6.002203464508057, + "loss_rtd": 0.2268359512090683, + "loss_sent": 0.09985633939504623, + "loss_sod": 0.02582608349621296, + "loss_total": 0.35251837968826294, + "step": 395799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.99774694442749, + "loss_rtd": 0.19766496121883392, + "loss_sent": 0.3084019124507904, + "loss_sod": 0.010111101903021336, + "loss_total": 0.5161780118942261, + "step": 395799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.337019681930542, + "learning_rate": 1.7770882346562766e-08, + "loss": 0.4131, + "step": 395800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.453426361083984, + "loss_rtd": 0.19900771975517273, + "loss_sent": 0.03479054197669029, + "loss_sod": 0.039392851293087006, + "loss_total": 0.2731911242008209, + "step": 395899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.1930317878723145, + "loss_rtd": 0.17708735167980194, + "loss_sent": 2.3433634851244278e-05, + "loss_sod": 0.07683393359184265, + "loss_total": 0.2539446949958801, + "step": 395899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.7898704409599304, + "learning_rate": 1.693496788387483e-08, + "loss": 0.4209, + "step": 395900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.888876914978027, + "loss_rtd": 0.21547532081604004, + "loss_sent": 0.22610247135162354, + "loss_sod": 0.06966503709554672, + "loss_total": 0.5112428665161133, + "step": 395999 + }, + { + "epoch": 0.019998, + "loss_gen": 6.246582508087158, + "loss_rtd": 0.22070348262786865, + "loss_sent": 0.20763853192329407, + "loss_sod": 0.015469067730009556, + "loss_total": 0.4438110888004303, + "step": 395999 + }, + { + "epoch": 0.02, + "grad_norm": 0.9909964203834534, + "learning_rate": 1.6119186593804848e-08, + "loss": 0.4382, + "step": 396000 + }, + { + "epoch": 0.02, + "eval_loss": 0.3933391571044922, + "eval_runtime": 151.6017, + "eval_samples_per_second": 101.866, + "eval_steps_per_second": 0.798, + "step": 396000 + }, + { + "epoch": 0.020198, + "loss_gen": 6.113070487976074, + "loss_rtd": 0.2115258425474167, + "loss_sent": 0.20682215690612793, + "loss_sod": 0.042044926434755325, + "loss_total": 0.46039292216300964, + "step": 396099 + }, + { + "epoch": 0.020198, + "loss_gen": 6.27998161315918, + "loss_rtd": 0.20464813709259033, + "loss_sent": 0.31439003348350525, + "loss_sod": 0.11591437458992004, + "loss_total": 0.6349525451660156, + "step": 396099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.7895152568817139, + "learning_rate": 1.5323538804951077e-08, + "loss": 0.4144, + "step": 396100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.5880937576293945, + "loss_rtd": 0.21506370604038239, + "loss_sent": 0.1927960067987442, + "loss_sod": 0.034131310880184174, + "loss_total": 0.44199103116989136, + "step": 396199 + }, + { + "epoch": 0.020398, + "loss_gen": 6.354598522186279, + "loss_rtd": 0.21413680911064148, + "loss_sent": 0.1955765038728714, + "loss_sod": 0.10964728891849518, + "loss_total": 0.5193606019020081, + "step": 396199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.1391600370407104, + "learning_rate": 1.4548024837796048e-08, + "loss": 0.4092, + "step": 396200 + }, + { + "epoch": 0.020598, + "loss_gen": 6.109943866729736, + "loss_rtd": 0.18975844979286194, + "loss_sent": 0.198400616645813, + "loss_sod": 0.09451808780431747, + "loss_total": 0.482677161693573, + "step": 396299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.9123215675354, + "loss_rtd": 0.21667487919330597, + "loss_sent": 0.10699949413537979, + "loss_sod": 0.03720562532544136, + "loss_total": 0.3608799874782562, + "step": 396299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.8118402361869812, + "learning_rate": 1.3792645004717663e-08, + "loss": 0.3997, + "step": 396300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.112740516662598, + "loss_rtd": 0.17680253088474274, + "loss_sent": 2.3321877961279824e-05, + "loss_sod": 0.08652521669864655, + "loss_total": 0.26335108280181885, + "step": 396399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.7683186531066895, + "loss_rtd": 0.20547260344028473, + "loss_sent": 0.10369966924190521, + "loss_sod": 0.02828032150864601, + "loss_total": 0.33745259046554565, + "step": 396399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8942224979400635, + "learning_rate": 1.3057399609983646e-08, + "loss": 0.4087, + "step": 396400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.889406681060791, + "loss_rtd": 0.18509641289710999, + "loss_sent": 0.16096730530261993, + "loss_sod": 0.08021023869514465, + "loss_total": 0.4262739419937134, + "step": 396499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.772846221923828, + "loss_rtd": 0.20384618639945984, + "loss_sent": 0.2623909115791321, + "loss_sod": 0.05246247351169586, + "loss_total": 0.518699586391449, + "step": 396499 + }, + { + "epoch": 0.021, + "grad_norm": 1.794934630393982, + "learning_rate": 1.2342288949757086e-08, + "loss": 0.3897, + "step": 396500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.727966785430908, + "loss_rtd": 0.22208158671855927, + "loss_sent": 0.14549203217029572, + "loss_sod": 0.12180855125188828, + "loss_total": 0.48938214778900146, + "step": 396599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.7613325119018555, + "loss_rtd": 0.19115635752677917, + "loss_sent": 0.0005651103565469384, + "loss_sod": 0.11405406892299652, + "loss_total": 0.30577552318573, + "step": 396599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.111713171005249, + "learning_rate": 1.1647313312074248e-08, + "loss": 0.4166, + "step": 396600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.817107200622559, + "loss_rtd": 0.22824671864509583, + "loss_sent": 0.2634729743003845, + "loss_sod": 0.034896522760391235, + "loss_total": 0.5266162157058716, + "step": 396699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.967624664306641, + "loss_rtd": 0.22594067454338074, + "loss_sent": 0.13916714489459991, + "loss_sod": 0.013836721889674664, + "loss_total": 0.3789445459842682, + "step": 396699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.0892983675003052, + "learning_rate": 1.0972472976872317e-08, + "loss": 0.4199, + "step": 396700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.413094997406006, + "loss_rtd": 0.164296954870224, + "loss_sent": 0.0005756186437793076, + "loss_sod": 0.046316053718328476, + "loss_total": 0.21118862926959991, + "step": 396799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.681094646453857, + "loss_rtd": 0.18285973370075226, + "loss_sent": 0.11255313456058502, + "loss_sod": 0.027348056435585022, + "loss_total": 0.3227609395980835, + "step": 396799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.5557261109352112, + "learning_rate": 1.0317768215983847e-08, + "loss": 0.4111, + "step": 396800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.866828918457031, + "loss_rtd": 0.21312595903873444, + "loss_sent": 0.3726646602153778, + "loss_sod": 0.011665992438793182, + "loss_total": 0.5974565744400024, + "step": 396899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.861220359802246, + "loss_rtd": 0.2063438594341278, + "loss_sent": 0.04638810083270073, + "loss_sod": 0.02496756985783577, + "loss_total": 0.2776995301246643, + "step": 396899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.8123736381530762, + "learning_rate": 9.683199293120116e-09, + "loss": 0.4271, + "step": 396900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.931436061859131, + "loss_rtd": 0.21014858782291412, + "loss_sent": 0.16449576616287231, + "loss_sod": 0.035002026706933975, + "loss_total": 0.4096463918685913, + "step": 396999 + }, + { + "epoch": 0.021998, + "loss_gen": 6.23885440826416, + "loss_rtd": 0.2218223214149475, + "loss_sent": 0.13119150698184967, + "loss_sod": 0.0584576278924942, + "loss_total": 0.411471426486969, + "step": 396999 + }, + { + "epoch": 0.022, + "grad_norm": 0.967769980430603, + "learning_rate": 9.068766463887768e-09, + "loss": 0.4137, + "step": 397000 + }, + { + "epoch": 0.022, + "eval_loss": 0.39533543586730957, + "eval_runtime": 149.965, + "eval_samples_per_second": 102.977, + "eval_steps_per_second": 0.807, + "step": 397000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.757076740264893, + "loss_rtd": 0.21644426882266998, + "loss_sent": 0.36708390712738037, + "loss_sod": 0.004734056536108255, + "loss_total": 0.5882622003555298, + "step": 397099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.524250030517578, + "loss_rtd": 0.2052869349718094, + "loss_sent": 0.29954639077186584, + "loss_sod": 0.05333589389920235, + "loss_total": 0.5581692457199097, + "step": 397099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.7729299068450928, + "learning_rate": 8.47446997577217e-09, + "loss": 0.4073, + "step": 397100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.344290256500244, + "loss_rtd": 0.16685928404331207, + "loss_sent": 0.0002616595884319395, + "loss_sod": 0.03148813545703888, + "loss_total": 0.19860908389091492, + "step": 397199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.928763389587402, + "loss_rtd": 0.21665158867835999, + "loss_sent": 0.2575046122074127, + "loss_sod": 0.11655202507972717, + "loss_total": 0.5907082557678223, + "step": 397199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.2157509326934814, + "learning_rate": 7.900310068165163e-09, + "loss": 0.4283, + "step": 397200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.86064338684082, + "loss_rtd": 0.19511862099170685, + "loss_sent": 0.42830026149749756, + "loss_sod": 0.029919231310486794, + "loss_total": 0.6533381342887878, + "step": 397299 + }, + { + "epoch": 0.022598, + "loss_gen": 6.055613994598389, + "loss_rtd": 0.22604653239250183, + "loss_sent": 0.2757790982723236, + "loss_sod": 0.015424983575940132, + "loss_total": 0.5172505974769592, + "step": 397299 + }, + { + "epoch": 0.0226, + "grad_norm": 2.5486299991607666, + "learning_rate": 7.346286972337302e-09, + "loss": 0.4145, + "step": 397300 + }, + { + "epoch": 0.022798, + "loss_gen": 6.520084381103516, + "loss_rtd": 0.19186510145664215, + "loss_sent": 0.20222032070159912, + "loss_sod": 0.03658706694841385, + "loss_total": 0.4306724965572357, + "step": 397399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.960272789001465, + "loss_rtd": 0.22346623241901398, + "loss_sent": 0.15942415595054626, + "loss_sod": 0.015284555032849312, + "loss_total": 0.3981749415397644, + "step": 397399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.2572697401046753, + "learning_rate": 6.812400911443417e-09, + "loss": 0.4174, + "step": 397400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.888513088226318, + "loss_rtd": 0.20500653982162476, + "loss_sent": 0.08508677035570145, + "loss_sod": 0.2508503496646881, + "loss_total": 0.5409436821937561, + "step": 397499 + }, + { + "epoch": 0.022998, + "loss_gen": 6.165918827056885, + "loss_rtd": 0.23296770453453064, + "loss_sent": 0.1689748466014862, + "loss_sod": 0.031435467302799225, + "loss_total": 0.4333780109882355, + "step": 397499 + }, + { + "epoch": 0.023, + "grad_norm": 1.6528115272521973, + "learning_rate": 6.2986521005392595e-09, + "loss": 0.4172, + "step": 397500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.944231033325195, + "loss_rtd": 0.21620090305805206, + "loss_sent": 0.17428633570671082, + "loss_sod": 0.0337856188416481, + "loss_total": 0.4242728650569916, + "step": 397599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.931299209594727, + "loss_rtd": 0.2072267383337021, + "loss_sent": 0.07298165559768677, + "loss_sod": 0.020796025171875954, + "loss_total": 0.30100440979003906, + "step": 397599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.8798516392707825, + "learning_rate": 5.8050407465537475e-09, + "loss": 0.3918, + "step": 397600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.75743293762207, + "loss_rtd": 0.21875669062137604, + "loss_sent": 0.14590534567832947, + "loss_sod": 0.01297023892402649, + "loss_total": 0.3776322603225708, + "step": 397699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.408660411834717, + "loss_rtd": 0.18407467007637024, + "loss_sent": 0.011094531044363976, + "loss_sod": 0.08246287703514099, + "loss_total": 0.27763208746910095, + "step": 397699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8510765433311462, + "learning_rate": 5.331567048322272e-09, + "loss": 0.405, + "step": 397700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.693921089172363, + "loss_rtd": 0.19635532796382904, + "loss_sent": 0.11468133330345154, + "loss_sod": 0.14380469918251038, + "loss_total": 0.45484134554862976, + "step": 397799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.726851463317871, + "loss_rtd": 0.2062656730413437, + "loss_sent": 0.2271997481584549, + "loss_sod": 0.06671570986509323, + "loss_total": 0.5001811385154724, + "step": 397799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.4799067974090576, + "learning_rate": 4.878231196558947e-09, + "loss": 0.4147, + "step": 397800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.480199337005615, + "loss_rtd": 0.1762045919895172, + "loss_sent": 0.003955208696424961, + "loss_sod": 0.018526971340179443, + "loss_total": 0.19868677854537964, + "step": 397899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.268898963928223, + "loss_rtd": 0.17445386946201324, + "loss_sent": 0.007511140778660774, + "loss_sod": 0.09362047910690308, + "loss_total": 0.27558547258377075, + "step": 397899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.8327092528343201, + "learning_rate": 4.445033373862151e-09, + "loss": 0.4009, + "step": 397900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.45214319229126, + "loss_rtd": 0.16575685143470764, + "loss_sent": 2.3105838408810087e-05, + "loss_sod": 0.10151588916778564, + "loss_total": 0.26729583740234375, + "step": 397999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.442799091339111, + "loss_rtd": 0.16667398810386658, + "loss_sent": 0.009982021525502205, + "loss_sod": 0.03796064108610153, + "loss_total": 0.21461665630340576, + "step": 397999 + }, + { + "epoch": 0.024, + "grad_norm": 0.8787603378295898, + "learning_rate": 4.031973754725637e-09, + "loss": 0.4256, + "step": 398000 + }, + { + "epoch": 0.024, + "eval_loss": 0.3966180086135864, + "eval_runtime": 150.4248, + "eval_samples_per_second": 102.663, + "eval_steps_per_second": 0.804, + "step": 398000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.452498435974121, + "loss_rtd": 0.18792618811130524, + "loss_sent": 0.09348011016845703, + "loss_sod": 0.16174200177192688, + "loss_total": 0.44314831495285034, + "step": 398099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.271835803985596, + "loss_rtd": 0.17350687086582184, + "loss_sent": 2.3092798073776066e-05, + "loss_sod": 0.09141494333744049, + "loss_total": 0.2649449110031128, + "step": 398099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.3214391469955444, + "learning_rate": 3.6390525055329806e-09, + "loss": 0.4086, + "step": 398100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.939271450042725, + "loss_rtd": 0.2222088724374771, + "loss_sent": 0.08363074064254761, + "loss_sod": 0.03482465073466301, + "loss_total": 0.340664267539978, + "step": 398199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.742465496063232, + "loss_rtd": 0.1840306967496872, + "loss_sent": 0.12969596683979034, + "loss_sod": 0.008474668487906456, + "loss_total": 0.32220131158828735, + "step": 398199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.9859786629676819, + "learning_rate": 3.266269784552023e-09, + "loss": 0.414, + "step": 398200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.351289749145508, + "loss_rtd": 0.17522194981575012, + "loss_sent": 0.006022478919476271, + "loss_sod": 0.06655101478099823, + "loss_total": 0.2477954477071762, + "step": 398299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.743028163909912, + "loss_rtd": 0.19148989021778107, + "loss_sent": 0.1353217512369156, + "loss_sod": 0.016871271654963493, + "loss_total": 0.3436829149723053, + "step": 398299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.7481858134269714, + "learning_rate": 2.913625741940429e-09, + "loss": 0.4291, + "step": 398300 + }, + { + "epoch": 0.024798, + "loss_gen": 6.19744348526001, + "loss_rtd": 0.205043762922287, + "loss_sent": 0.2743740975856781, + "loss_sod": 0.006529063917696476, + "loss_total": 0.485946923494339, + "step": 398399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.926233291625977, + "loss_rtd": 0.20683732628822327, + "loss_sent": 0.16888761520385742, + "loss_sod": 0.020709645003080368, + "loss_total": 0.39643460512161255, + "step": 398399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.181447982788086, + "learning_rate": 2.5811205197401322e-09, + "loss": 0.4194, + "step": 398400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.8736042976379395, + "loss_rtd": 0.20080658793449402, + "loss_sent": 0.3153776526451111, + "loss_sod": 0.04780576378107071, + "loss_total": 0.5639899969100952, + "step": 398499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.90634822845459, + "loss_rtd": 0.21827960014343262, + "loss_sent": 0.10579497367143631, + "loss_sod": 0.005877520423382521, + "loss_total": 0.32995209097862244, + "step": 398499 + }, + { + "epoch": 0.025, + "grad_norm": 1.1717947721481323, + "learning_rate": 2.2687542518828874e-09, + "loss": 0.4299, + "step": 398500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.878081798553467, + "loss_rtd": 0.19720390439033508, + "loss_sent": 0.19777674973011017, + "loss_sod": 0.051944032311439514, + "loss_total": 0.44692468643188477, + "step": 398599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.84797477722168, + "loss_rtd": 0.21240682899951935, + "loss_sent": 0.46165311336517334, + "loss_sod": 0.01230304129421711, + "loss_total": 0.6863629817962646, + "step": 398599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.7682371139526367, + "learning_rate": 1.9765270641958213e-09, + "loss": 0.4085, + "step": 398600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.527498245239258, + "loss_rtd": 0.17584048211574554, + "loss_sent": 0.0007072031730785966, + "loss_sod": 0.027694545686244965, + "loss_total": 0.20424222946166992, + "step": 398699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.6394782066345215, + "loss_rtd": 0.19597937166690826, + "loss_sent": 0.027343623340129852, + "loss_sod": 0.07703717052936554, + "loss_total": 0.30036017298698425, + "step": 398699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8241334557533264, + "learning_rate": 1.704439074379227e-09, + "loss": 0.412, + "step": 398700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.8821635246276855, + "loss_rtd": 0.2143273949623108, + "loss_sent": 0.1178353875875473, + "loss_sod": 0.017243245616555214, + "loss_total": 0.34940603375434875, + "step": 398799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.804738521575928, + "loss_rtd": 0.19504877924919128, + "loss_sent": 0.025929627940058708, + "loss_sod": 0.018858319148421288, + "loss_total": 0.23983672261238098, + "step": 398799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.6110307574272156, + "learning_rate": 1.4524903920398736e-09, + "loss": 0.4038, + "step": 398800 + }, + { + "epoch": 0.025798, + "loss_gen": 6.070090293884277, + "loss_rtd": 0.21025177836418152, + "loss_sent": 0.2085135132074356, + "loss_sod": 0.0638621523976326, + "loss_total": 0.4826274514198303, + "step": 398899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.646976947784424, + "loss_rtd": 0.22182682156562805, + "loss_sent": 0.12924838066101074, + "loss_sod": 0.05625419318675995, + "loss_total": 0.40732938051223755, + "step": 398899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.20877206325531, + "learning_rate": 1.2206811186576962e-09, + "loss": 0.4023, + "step": 398900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.952920436859131, + "loss_rtd": 0.2104673832654953, + "loss_sent": 0.19287212193012238, + "loss_sod": 0.07483374327421188, + "loss_total": 0.47817325592041016, + "step": 398999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.882963180541992, + "loss_rtd": 0.1719771921634674, + "loss_sent": 0.07575849443674088, + "loss_sod": 0.1464938074350357, + "loss_total": 0.3942295014858246, + "step": 398999 + }, + { + "epoch": 0.026, + "grad_norm": 1.297067403793335, + "learning_rate": 1.009011347602451e-09, + "loss": 0.4091, + "step": 399000 + }, + { + "epoch": 0.026, + "eval_loss": 0.4002399444580078, + "eval_runtime": 149.9812, + "eval_samples_per_second": 102.966, + "eval_steps_per_second": 0.807, + "step": 399000 + }, + { + "epoch": 0.026198, + "loss_gen": 6.132881164550781, + "loss_rtd": 0.20493392646312714, + "loss_sent": 0.5589383244514465, + "loss_sod": 0.023373253643512726, + "loss_total": 0.787245512008667, + "step": 399099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.744200229644775, + "loss_rtd": 0.19487015902996063, + "loss_sent": 0.027530426159501076, + "loss_sod": 0.04802277684211731, + "loss_total": 0.27042335271835327, + "step": 399099 + }, + { + "epoch": 0.0262, + "grad_norm": 2.5343658924102783, + "learning_rate": 8.174811641392665e-10, + "loss": 0.4239, + "step": 399100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.711893081665039, + "loss_rtd": 0.2208995521068573, + "loss_sent": 0.0921013280749321, + "loss_sod": 0.06058129295706749, + "loss_total": 0.3735821843147278, + "step": 399199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.812560081481934, + "loss_rtd": 0.19347859919071198, + "loss_sent": 0.37179550528526306, + "loss_sod": 0.038559816777706146, + "loss_total": 0.6038339138031006, + "step": 399199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.0608220100402832, + "learning_rate": 6.460906454175408e-10, + "loss": 0.4158, + "step": 399200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.791370868682861, + "loss_rtd": 0.2209886759519577, + "loss_sent": 0.11350365728139877, + "loss_sod": 0.04800304025411606, + "loss_total": 0.38249537348747253, + "step": 399299 + }, + { + "epoch": 0.026598, + "loss_gen": 6.132655143737793, + "loss_rtd": 0.20953939855098724, + "loss_sent": 0.12483220547437668, + "loss_sod": 0.024725060909986496, + "loss_total": 0.3590966761112213, + "step": 399299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.3723679780960083, + "learning_rate": 4.948398604709414e-10, + "loss": 0.4149, + "step": 399300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.434885501861572, + "loss_rtd": 0.17824910581111908, + "loss_sent": 0.02135716937482357, + "loss_sod": 0.12772992253303528, + "loss_total": 0.3273361921310425, + "step": 399399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.362635612487793, + "loss_rtd": 0.17097197473049164, + "loss_sent": 0.22683192789554596, + "loss_sod": 0.01930174231529236, + "loss_total": 0.41710564494132996, + "step": 399399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.625372290611267, + "learning_rate": 3.637288702229569e-10, + "loss": 0.4104, + "step": 399400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.829373836517334, + "loss_rtd": 0.2223738580942154, + "loss_sent": 0.2690161168575287, + "loss_sod": 0.02829951047897339, + "loss_total": 0.5196894407272339, + "step": 399499 + }, + { + "epoch": 0.026998, + "loss_gen": 6.015805244445801, + "loss_rtd": 0.20060022175312042, + "loss_sent": 0.08454405516386032, + "loss_sod": 0.05157846584916115, + "loss_total": 0.3367227613925934, + "step": 399499 + }, + { + "epoch": 0.027, + "grad_norm": 0.8972489833831787, + "learning_rate": 2.527577274868964e-10, + "loss": 0.4207, + "step": 399500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.29140043258667, + "loss_rtd": 0.18922896683216095, + "loss_sent": 0.045968472957611084, + "loss_sod": 0.09831858426332474, + "loss_total": 0.33351603150367737, + "step": 399599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.128223896026611, + "loss_rtd": 0.1511480212211609, + "loss_sent": 2.2403644834412262e-05, + "loss_sod": 0.08321768790483475, + "loss_total": 0.2343880981206894, + "step": 399599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.0857374668121338, + "learning_rate": 1.6192647696033902e-10, + "loss": 0.4028, + "step": 399600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.591905117034912, + "loss_rtd": 0.20010913908481598, + "loss_sent": 0.07157837599515915, + "loss_sod": 0.01919025555253029, + "loss_total": 0.2908777594566345, + "step": 399699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.796738624572754, + "loss_rtd": 0.23129458725452423, + "loss_sent": 0.1561926305294037, + "loss_sod": 0.02767675742506981, + "loss_total": 0.41516396403312683, + "step": 399699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.0839260816574097, + "learning_rate": 9.123515523068449e-11, + "loss": 0.4279, + "step": 399700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.8244242668151855, + "loss_rtd": 0.219486802816391, + "loss_sent": 0.1759643703699112, + "loss_sod": 0.02567342296242714, + "loss_total": 0.42112457752227783, + "step": 399799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.842320919036865, + "loss_rtd": 0.2068764567375183, + "loss_sent": 0.0915793776512146, + "loss_sod": 0.004401105921715498, + "loss_total": 0.3028569221496582, + "step": 399799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.001084327697754, + "learning_rate": 4.068379076960227e-11, + "loss": 0.4144, + "step": 399800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.913634300231934, + "loss_rtd": 0.1969427615404129, + "loss_sent": 0.3520510494709015, + "loss_sod": 0.01477159932255745, + "loss_total": 0.5637654066085815, + "step": 399899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.979221820831299, + "loss_rtd": 0.22889800369739532, + "loss_sent": 0.4575820863246918, + "loss_sod": 0.05908600240945816, + "loss_total": 0.7455661296844482, + "step": 399899 + }, + { + "epoch": 0.0278, + "grad_norm": 2.4349780082702637, + "learning_rate": 1.0272403944133757e-11, + "loss": 0.4252, + "step": 399900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.880891799926758, + "loss_rtd": 0.24432171881198883, + "loss_sent": 0.054657068103551865, + "loss_sod": 0.054939813911914825, + "loss_total": 0.3539186120033264, + "step": 399999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.968878269195557, + "loss_rtd": 0.21166859567165375, + "loss_sent": 0.12592780590057373, + "loss_sod": 0.03586617857217789, + "loss_total": 0.37346258759498596, + "step": 399999 + }, + { + "epoch": 0.028, + "grad_norm": 1.1277533769607544, + "learning_rate": 1.0070000389106326e-15, + "loss": 0.4122, + "step": 400000 + }, + { + "epoch": 0.028, + "eval_loss": 0.3968818187713623, + "eval_runtime": 150.0239, + "eval_samples_per_second": 102.937, + "eval_steps_per_second": 0.807, + "step": 400000 } ], "logging_steps": 100, @@ -78208,7 +103234,7 @@ "attributes": {} } }, - "total_flos": 2.1164774706118656e+19, + "total_flos": 2.79402966417408e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null