diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,9 +2,9 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.006, + "epoch": 0.032, "eval_steps": 1000, - "global_step": 163000, + "global_step": 272000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -42069,6 +42069,28128 @@ "eval_samples_per_second": 102.363, "eval_steps_per_second": 0.802, "step": 163000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.196635723114014, + "loss_rtd": 0.28168556094169617, + "loss_sent": 0.12392954528331757, + "loss_sod": 0.04333566501736641, + "loss_total": 0.44895076751708984, + "step": 163099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.259164810180664, + "loss_rtd": 0.2748069763183594, + "loss_sent": 0.2878701984882355, + "loss_sod": 0.016808802261948586, + "loss_total": 0.5794860124588013, + "step": 163099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.3830218315124512, + "learning_rate": 4.663912786477674e-05, + "loss": 0.4884, + "step": 163100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.5838494300842285, + "loss_rtd": 0.24817979335784912, + "loss_sent": 3.2675852708052844e-05, + "loss_sod": 0.08564233779907227, + "loss_total": 0.3338547945022583, + "step": 163199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.423274993896484, + "loss_rtd": 0.2379945069551468, + "loss_sent": 0.0018292111344635487, + "loss_sod": 0.04787762835621834, + "loss_total": 0.2877013385295868, + "step": 163199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.9870240092277527, + "learning_rate": 4.660746705417474e-05, + "loss": 0.5106, + "step": 163200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.000813007354736, + "loss_rtd": 0.2704791724681854, + "loss_sent": 0.2741572856903076, + "loss_sod": 0.025609789416193962, + "loss_total": 0.5702462196350098, + "step": 163299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.260286331176758, + "loss_rtd": 0.28576281666755676, + "loss_sent": 0.2345261126756668, + "loss_sod": 0.001798760611563921, + "loss_total": 0.5220876932144165, + "step": 163299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.569671630859375, + "learning_rate": 4.657580761008458e-05, + "loss": 0.5042, + "step": 163300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.422150611877441, + "loss_rtd": 0.263324111700058, + "loss_sent": 0.3823015093803406, + "loss_sod": 0.16739143431186676, + "loss_total": 0.8130170702934265, + "step": 163399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.753075122833252, + "loss_rtd": 0.27140310406684875, + "loss_sent": 0.49792271852493286, + "loss_sod": 0.04979398101568222, + "loss_total": 0.8191198110580444, + "step": 163399 + }, + { + "epoch": 0.0068, + "grad_norm": 3.1737489700317383, + "learning_rate": 4.6544149545258725e-05, + "loss": 0.5011, + "step": 163400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.081680774688721, + "loss_rtd": 0.2573992609977722, + "loss_sent": 0.08737410604953766, + "loss_sod": 0.07585644721984863, + "loss_total": 0.4206297993659973, + "step": 163499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.307277202606201, + "loss_rtd": 0.2570751905441284, + "loss_sent": 0.0989130511879921, + "loss_sod": 0.030727503821253777, + "loss_total": 0.38671573996543884, + "step": 163499 + }, + { + "epoch": 0.007, + "grad_norm": 0.6402074098587036, + "learning_rate": 4.6512492872449e-05, + "loss": 0.5052, + "step": 163500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.550046443939209, + "loss_rtd": 0.276483416557312, + "loss_sent": 0.1697588413953781, + "loss_sod": 0.11472378671169281, + "loss_total": 0.5609660744667053, + "step": 163599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.275697231292725, + "loss_rtd": 0.28646954894065857, + "loss_sent": 0.23295924067497253, + "loss_sod": 0.18847021460533142, + "loss_total": 0.7078989744186401, + "step": 163599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.2019567489624023, + "learning_rate": 4.6480837604406755e-05, + "loss": 0.5469, + "step": 163600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.931293964385986, + "loss_rtd": 0.2878555357456207, + "loss_sent": 0.08064966648817062, + "loss_sod": 0.09106098115444183, + "loss_total": 0.4595661759376526, + "step": 163699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.440551280975342, + "loss_rtd": 0.25958216190338135, + "loss_sent": 0.16952340304851532, + "loss_sod": 0.035872530192136765, + "loss_total": 0.46497809886932373, + "step": 163699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.8347538709640503, + "learning_rate": 4.6449183753882683e-05, + "loss": 0.4985, + "step": 163700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.048709392547607, + "loss_rtd": 0.2709319591522217, + "loss_sent": 0.10440225899219513, + "loss_sod": 0.03406666964292526, + "loss_total": 0.4094008803367615, + "step": 163799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.5242414474487305, + "loss_rtd": 0.25989148020744324, + "loss_sent": 0.1897791028022766, + "loss_sod": 0.07422979921102524, + "loss_total": 0.5239003896713257, + "step": 163799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.8669432997703552, + "learning_rate": 4.641753133362697e-05, + "loss": 0.4962, + "step": 163800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.846010684967041, + "loss_rtd": 0.2867366671562195, + "loss_sent": 0.29276242852211, + "loss_sod": 0.08926688879728317, + "loss_total": 0.6687660217285156, + "step": 163899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.45940637588501, + "loss_rtd": 0.25110098719596863, + "loss_sent": 0.3821507692337036, + "loss_sod": 0.0544838011264801, + "loss_total": 0.6877355575561523, + "step": 163899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.5126763582229614, + "learning_rate": 4.638588035638922e-05, + "loss": 0.4943, + "step": 163900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.329362392425537, + "loss_rtd": 0.26091039180755615, + "loss_sent": 0.45977982878685, + "loss_sod": 0.008483264595270157, + "loss_total": 0.729173481464386, + "step": 163999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.4824395179748535, + "loss_rtd": 0.2714334726333618, + "loss_sent": 0.19653542339801788, + "loss_sod": 0.10478407144546509, + "loss_total": 0.5727529525756836, + "step": 163999 + }, + { + "epoch": 0.008, + "grad_norm": 1.234817624092102, + "learning_rate": 4.6354230834918424e-05, + "loss": 0.5003, + "step": 164000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4826383888721466, + "eval_runtime": 150.7695, + "eval_samples_per_second": 102.428, + "eval_steps_per_second": 0.803, + "step": 164000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.691675662994385, + "loss_rtd": 0.24220599234104156, + "loss_sent": 0.021226750686764717, + "loss_sod": 0.05146567523479462, + "loss_total": 0.31489843130111694, + "step": 164099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.425869464874268, + "loss_rtd": 0.25354447960853577, + "loss_sent": 0.05983325466513634, + "loss_sod": 0.05555838346481323, + "loss_total": 0.36893612146377563, + "step": 164099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.5825456380844116, + "learning_rate": 4.632258278196301e-05, + "loss": 0.4999, + "step": 164100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.4635233879089355, + "loss_rtd": 0.26874104142189026, + "loss_sent": 0.5512610673904419, + "loss_sod": 0.0693744421005249, + "loss_total": 0.8893765211105347, + "step": 164199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.497776985168457, + "loss_rtd": 0.2705215513706207, + "loss_sent": 0.11279398947954178, + "loss_sod": 0.15561792254447937, + "loss_total": 0.5389334559440613, + "step": 164199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.5598934888839722, + "learning_rate": 4.6290936210270806e-05, + "loss": 0.5075, + "step": 164200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.348184108734131, + "loss_rtd": 0.2685256004333496, + "loss_sent": 0.20474465191364288, + "loss_sod": 0.07704780250787735, + "loss_total": 0.5503180623054504, + "step": 164299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.261141777038574, + "loss_rtd": 0.27409055829048157, + "loss_sent": 0.1952885091304779, + "loss_sod": 0.04625772684812546, + "loss_total": 0.5156368017196655, + "step": 164299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.8624095320701599, + "learning_rate": 4.6259291132589064e-05, + "loss": 0.5171, + "step": 164300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.83092737197876, + "loss_rtd": 0.2858480215072632, + "loss_sent": 0.12171663343906403, + "loss_sod": 0.09565457701683044, + "loss_total": 0.5032192468643188, + "step": 164399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.413974285125732, + "loss_rtd": 0.28275954723358154, + "loss_sent": 0.22656257450580597, + "loss_sod": 0.050913333892822266, + "loss_total": 0.5602354407310486, + "step": 164399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.6559889316558838, + "learning_rate": 4.62276475616644e-05, + "loss": 0.5072, + "step": 164400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.438117027282715, + "loss_rtd": 0.2606719136238098, + "loss_sent": 0.24627365171909332, + "loss_sod": 0.0357002317905426, + "loss_total": 0.5426458120346069, + "step": 164499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.413086891174316, + "loss_rtd": 0.2698831856250763, + "loss_sent": 0.23739132285118103, + "loss_sod": 0.023256702348589897, + "loss_total": 0.5305311679840088, + "step": 164499 + }, + { + "epoch": 0.009, + "grad_norm": 2.3605706691741943, + "learning_rate": 4.619600551024285e-05, + "loss": 0.51, + "step": 164500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.65707540512085, + "loss_rtd": 0.2757400870323181, + "loss_sent": 0.5702740550041199, + "loss_sod": 0.055693574249744415, + "loss_total": 0.9017077088356018, + "step": 164599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.978064060211182, + "loss_rtd": 0.26737818121910095, + "loss_sent": 0.03427908197045326, + "loss_sod": 0.07687597721815109, + "loss_total": 0.3785332441329956, + "step": 164599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.4472017288208008, + "learning_rate": 4.616436499106982e-05, + "loss": 0.5028, + "step": 164600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.549246311187744, + "loss_rtd": 0.2585373818874359, + "loss_sent": 0.2615335285663605, + "loss_sod": 0.01783927157521248, + "loss_total": 0.5379102230072021, + "step": 164699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.301608085632324, + "loss_rtd": 0.2660742402076721, + "loss_sent": 0.2762836515903473, + "loss_sod": 0.15904825925827026, + "loss_total": 0.7014061212539673, + "step": 164699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.6548930406570435, + "learning_rate": 4.613272601689012e-05, + "loss": 0.5031, + "step": 164700 + }, + { + "epoch": 0.009598, + "loss_gen": 4.4817795753479, + "loss_rtd": 0.23729349672794342, + "loss_sent": 5.854514893144369e-05, + "loss_sod": 0.10458141565322876, + "loss_total": 0.3419334292411804, + "step": 164799 + }, + { + "epoch": 0.009598, + "loss_gen": 4.456570148468018, + "loss_rtd": 0.2427823543548584, + "loss_sent": 0.009673393331468105, + "loss_sod": 0.06102978438138962, + "loss_total": 0.3134855329990387, + "step": 164799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.7266996502876282, + "learning_rate": 4.610108860044791e-05, + "loss": 0.4993, + "step": 164800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.356980323791504, + "loss_rtd": 0.2802791893482208, + "loss_sent": 0.12030818313360214, + "loss_sod": 0.22139334678649902, + "loss_total": 0.6219806671142578, + "step": 164899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.348964214324951, + "loss_rtd": 0.28221219778060913, + "loss_sent": 0.10928641259670258, + "loss_sod": 0.028059374541044235, + "loss_total": 0.41955798864364624, + "step": 164899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.0726114511489868, + "learning_rate": 4.6069452754486756e-05, + "loss": 0.5157, + "step": 164900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.3322014808654785, + "loss_rtd": 0.2652531564235687, + "loss_sent": 0.2010899782180786, + "loss_sod": 0.04203135892748833, + "loss_total": 0.5083744525909424, + "step": 164999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.87101411819458, + "loss_rtd": 0.27641060948371887, + "loss_sent": 0.17419025301933289, + "loss_sod": 0.03616292029619217, + "loss_total": 0.48676377534866333, + "step": 164999 + }, + { + "epoch": 0.01, + "grad_norm": 0.9331060647964478, + "learning_rate": 4.603781849174955e-05, + "loss": 0.473, + "step": 165000 + }, + { + "epoch": 0.01, + "eval_loss": 0.48438504338264465, + "eval_runtime": 150.7866, + "eval_samples_per_second": 102.416, + "eval_steps_per_second": 0.802, + "step": 165000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.626122951507568, + "loss_rtd": 0.2546291649341583, + "loss_sent": 0.13072346150875092, + "loss_sod": 0.06794248521327972, + "loss_total": 0.45329511165618896, + "step": 165099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.788132190704346, + "loss_rtd": 0.25939905643463135, + "loss_sent": 0.24285702407360077, + "loss_sod": 0.040133036673069, + "loss_total": 0.5423890948295593, + "step": 165099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.0221394300460815, + "learning_rate": 4.6006185824978594e-05, + "loss": 0.5015, + "step": 165100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.292180061340332, + "loss_rtd": 0.27583184838294983, + "loss_sent": 0.26557138562202454, + "loss_sod": 0.04793336242437363, + "loss_total": 0.589336633682251, + "step": 165199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.339547634124756, + "loss_rtd": 0.2673541009426117, + "loss_sent": 0.03542206063866615, + "loss_sod": 0.012089189141988754, + "loss_total": 0.3148653507232666, + "step": 165199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.7367468476295471, + "learning_rate": 4.59745547669155e-05, + "loss": 0.5038, + "step": 165200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.372686386108398, + "loss_rtd": 0.2524415850639343, + "loss_sent": 0.09043261408805847, + "loss_sod": 0.07902982831001282, + "loss_total": 0.4219040274620056, + "step": 165299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.473160266876221, + "loss_rtd": 0.28483524918556213, + "loss_sent": 0.10204581171274185, + "loss_sod": 0.0278621893376112, + "loss_total": 0.41474324464797974, + "step": 165299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9719716310501099, + "learning_rate": 4.5942925330301276e-05, + "loss": 0.4997, + "step": 165300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.557486057281494, + "loss_rtd": 0.2818976640701294, + "loss_sent": 0.18971668183803558, + "loss_sod": 0.10618086159229279, + "loss_total": 0.5777952075004578, + "step": 165399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.35322380065918, + "loss_rtd": 0.24993188679218292, + "loss_sent": 0.21613375842571259, + "loss_sod": 0.10320958495140076, + "loss_total": 0.5692752599716187, + "step": 165399 + }, + { + "epoch": 0.0108, + "grad_norm": 2.0916054248809814, + "learning_rate": 4.591129752787623e-05, + "loss": 0.5098, + "step": 165400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.265660762786865, + "loss_rtd": 0.2649977505207062, + "loss_sent": 0.1313401162624359, + "loss_sod": 0.04304903373122215, + "loss_total": 0.43938690423965454, + "step": 165499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.371414661407471, + "loss_rtd": 0.2730584740638733, + "loss_sent": 0.1434060037136078, + "loss_sod": 0.05730576068162918, + "loss_total": 0.47377023100852966, + "step": 165499 + }, + { + "epoch": 0.011, + "grad_norm": 0.6318996548652649, + "learning_rate": 4.587967137238006e-05, + "loss": 0.4905, + "step": 165500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.278937339782715, + "loss_rtd": 0.25847798585891724, + "loss_sent": 0.35574769973754883, + "loss_sod": 0.050757866352796555, + "loss_total": 0.6649835109710693, + "step": 165599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.451294422149658, + "loss_rtd": 0.27524012327194214, + "loss_sent": 0.0910453274846077, + "loss_sod": 0.019244834780693054, + "loss_total": 0.3855302929878235, + "step": 165599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9767748713493347, + "learning_rate": 4.584804687655177e-05, + "loss": 0.5019, + "step": 165600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.455461502075195, + "loss_rtd": 0.2628023326396942, + "loss_sent": 0.20049241185188293, + "loss_sod": 0.09067504107952118, + "loss_total": 0.5539697408676147, + "step": 165699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.385140895843506, + "loss_rtd": 0.2677285373210907, + "loss_sent": 0.17031317949295044, + "loss_sod": 0.13305577635765076, + "loss_total": 0.5710974931716919, + "step": 165699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.1536784172058105, + "learning_rate": 4.58164240531297e-05, + "loss": 0.4898, + "step": 165700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.205896377563477, + "loss_rtd": 0.2467249035835266, + "loss_sent": 0.25560465455055237, + "loss_sod": 0.01789039932191372, + "loss_total": 0.5202199220657349, + "step": 165799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.474554061889648, + "loss_rtd": 0.25408485531806946, + "loss_sent": 0.34175625443458557, + "loss_sod": 0.028084468096494675, + "loss_total": 0.6239255666732788, + "step": 165799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.2400084733963013, + "learning_rate": 4.578480291485152e-05, + "loss": 0.5018, + "step": 165800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.407284736633301, + "loss_rtd": 0.2712644338607788, + "loss_sent": 0.07869881391525269, + "loss_sod": 0.06259606778621674, + "loss_total": 0.4125593304634094, + "step": 165899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.107211589813232, + "loss_rtd": 0.2766292691230774, + "loss_sent": 0.05480150878429413, + "loss_sod": 0.04472660645842552, + "loss_total": 0.37615740299224854, + "step": 165899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.9775351285934448, + "learning_rate": 4.575318347445422e-05, + "loss": 0.5107, + "step": 165900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.451925277709961, + "loss_rtd": 0.2789785861968994, + "loss_sent": 0.34577539563179016, + "loss_sod": 0.1541379690170288, + "loss_total": 0.778891921043396, + "step": 165999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.297626495361328, + "loss_rtd": 0.2855328321456909, + "loss_sent": 0.24290558695793152, + "loss_sod": 0.0163103174418211, + "loss_total": 0.5447487235069275, + "step": 165999 + }, + { + "epoch": 0.012, + "grad_norm": 1.4010405540466309, + "learning_rate": 4.572156574467411e-05, + "loss": 0.5089, + "step": 166000 + }, + { + "epoch": 0.012, + "eval_loss": 0.48603564500808716, + "eval_runtime": 150.8488, + "eval_samples_per_second": 102.374, + "eval_steps_per_second": 0.802, + "step": 166000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.359498023986816, + "loss_rtd": 0.2447972595691681, + "loss_sent": 0.13807053864002228, + "loss_sod": 0.09395666420459747, + "loss_total": 0.47682446241378784, + "step": 166099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.2059855461120605, + "loss_rtd": 0.26789209246635437, + "loss_sent": 0.1320495903491974, + "loss_sod": 0.06434500217437744, + "loss_total": 0.4642866849899292, + "step": 166099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.9840983748435974, + "learning_rate": 4.56899497382468e-05, + "loss": 0.4948, + "step": 166100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.42720365524292, + "loss_rtd": 0.27576813101768494, + "loss_sent": 0.16738727688789368, + "loss_sod": 0.06169932335615158, + "loss_total": 0.5048547387123108, + "step": 166199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.688323497772217, + "loss_rtd": 0.25756192207336426, + "loss_sent": 0.5139428377151489, + "loss_sod": 0.050605274736881256, + "loss_total": 0.8221100568771362, + "step": 166199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.7862499952316284, + "learning_rate": 4.565833546790723e-05, + "loss": 0.4845, + "step": 166200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.421172142028809, + "loss_rtd": 0.2712525427341461, + "loss_sent": 0.4319884777069092, + "loss_sod": 0.019644085317850113, + "loss_total": 0.7228851318359375, + "step": 166299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.947851657867432, + "loss_rtd": 0.2702462375164032, + "loss_sent": 0.17605744302272797, + "loss_sod": 0.042434852570295334, + "loss_total": 0.4887385368347168, + "step": 166299 + }, + { + "epoch": 0.0126, + "grad_norm": 2.0962629318237305, + "learning_rate": 4.56267229463896e-05, + "loss": 0.5121, + "step": 166300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.406226634979248, + "loss_rtd": 0.2810554802417755, + "loss_sent": 0.18005436658859253, + "loss_sod": 0.0701325386762619, + "loss_total": 0.5312423706054688, + "step": 166399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.280158042907715, + "loss_rtd": 0.2932305932044983, + "loss_sent": 0.17078426480293274, + "loss_sod": 0.008367412723600864, + "loss_total": 0.4723822772502899, + "step": 166399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.598619818687439, + "learning_rate": 4.5595112186427465e-05, + "loss": 0.5043, + "step": 166400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.456410884857178, + "loss_rtd": 0.23915442824363708, + "loss_sent": 0.014395859092473984, + "loss_sod": 0.056887514889240265, + "loss_total": 0.31043779850006104, + "step": 166499 + }, + { + "epoch": 0.012998, + "loss_gen": 4.590512275695801, + "loss_rtd": 0.23342490196228027, + "loss_sent": 0.08713645488023758, + "loss_sod": 0.02067759819328785, + "loss_total": 0.34123894572257996, + "step": 166499 + }, + { + "epoch": 0.013, + "grad_norm": 0.6391474604606628, + "learning_rate": 4.556350320075359e-05, + "loss": 0.4972, + "step": 166500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.363429069519043, + "loss_rtd": 0.25685250759124756, + "loss_sent": 0.41547098755836487, + "loss_sod": 0.0345754399895668, + "loss_total": 0.7068989276885986, + "step": 166599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.199674606323242, + "loss_rtd": 0.27524057030677795, + "loss_sent": 0.2515944540500641, + "loss_sod": 0.02120119519531727, + "loss_total": 0.5480362176895142, + "step": 166599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0128406286239624, + "learning_rate": 4.553189600210012e-05, + "loss": 0.5052, + "step": 166600 + }, + { + "epoch": 0.013398, + "loss_gen": 4.539990425109863, + "loss_rtd": 0.25559335947036743, + "loss_sent": 0.006525717210024595, + "loss_sod": 0.13782614469528198, + "loss_total": 0.3999452292919159, + "step": 166699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.329376220703125, + "loss_rtd": 0.27464374899864197, + "loss_sent": 0.115913987159729, + "loss_sod": 0.19408178329467773, + "loss_total": 0.5846395492553711, + "step": 166699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.9887056946754456, + "learning_rate": 4.550029060319839e-05, + "loss": 0.509, + "step": 166700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.409414291381836, + "loss_rtd": 0.28682273626327515, + "loss_sent": 0.09505373239517212, + "loss_sod": 0.03986112400889397, + "loss_total": 0.42173758149147034, + "step": 166799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.381486415863037, + "loss_rtd": 0.2774446904659271, + "loss_sent": 0.09368741512298584, + "loss_sod": 0.0027217199094593525, + "loss_total": 0.3738538324832916, + "step": 166799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.7898857593536377, + "learning_rate": 4.546868701677908e-05, + "loss": 0.49, + "step": 166800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.2110490798950195, + "loss_rtd": 0.2563081383705139, + "loss_sent": 0.2264343649148941, + "loss_sod": 0.0321497805416584, + "loss_total": 0.5148922801017761, + "step": 166899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.506667613983154, + "loss_rtd": 0.25266268849372864, + "loss_sent": 0.2526794672012329, + "loss_sod": 0.009198658168315887, + "loss_total": 0.5145407915115356, + "step": 166899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.664481520652771, + "learning_rate": 4.543708525557208e-05, + "loss": 0.4972, + "step": 166900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.3129191398620605, + "loss_rtd": 0.25302985310554504, + "loss_sent": 0.1839773803949356, + "loss_sod": 0.09276404231786728, + "loss_total": 0.5297712683677673, + "step": 166999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.687230587005615, + "loss_rtd": 0.28435543179512024, + "loss_sent": 0.20702335238456726, + "loss_sod": 0.07819317281246185, + "loss_total": 0.5695719718933105, + "step": 166999 + }, + { + "epoch": 0.014, + "grad_norm": 0.9783918857574463, + "learning_rate": 4.540548533230661e-05, + "loss": 0.508, + "step": 167000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4755215644836426, + "eval_runtime": 150.7223, + "eval_samples_per_second": 102.46, + "eval_steps_per_second": 0.803, + "step": 167000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.118136405944824, + "loss_rtd": 0.2836233675479889, + "loss_sent": 0.12645424902439117, + "loss_sod": 0.024192065000534058, + "loss_total": 0.43426966667175293, + "step": 167099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.48662805557251, + "loss_rtd": 0.24392926692962646, + "loss_sent": 0.25345009565353394, + "loss_sod": 0.09135273844003677, + "loss_total": 0.588732123374939, + "step": 167099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.121079921722412, + "learning_rate": 4.5373887259711103e-05, + "loss": 0.4988, + "step": 167100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.632580280303955, + "loss_rtd": 0.2665417492389679, + "loss_sent": 0.29506707191467285, + "loss_sod": 0.06013890355825424, + "loss_total": 0.6217477321624756, + "step": 167199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.352197647094727, + "loss_rtd": 0.27785807847976685, + "loss_sent": 0.14311152696609497, + "loss_sod": 0.026262005791068077, + "loss_total": 0.44723162055015564, + "step": 167199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.9423695802688599, + "learning_rate": 4.5342291050513254e-05, + "loss": 0.5061, + "step": 167200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.549313068389893, + "loss_rtd": 0.2749606966972351, + "loss_sent": 0.09180915355682373, + "loss_sod": 0.057210523635149, + "loss_total": 0.42398038506507874, + "step": 167299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.151215076446533, + "loss_rtd": 0.2457694560289383, + "loss_sent": 0.11216197162866592, + "loss_sod": 0.11463813483715057, + "loss_total": 0.4725695550441742, + "step": 167299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.8585280776023865, + "learning_rate": 4.531069671743999e-05, + "loss": 0.4956, + "step": 167300 + }, + { + "epoch": 0.014798, + "loss_gen": 4.729015827178955, + "loss_rtd": 0.2535562217235565, + "loss_sent": 0.24296793341636658, + "loss_sod": 0.0640372484922409, + "loss_total": 0.5605614185333252, + "step": 167399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.214290142059326, + "loss_rtd": 0.2587656080722809, + "loss_sent": 0.09210658818483353, + "loss_sod": 0.06939534842967987, + "loss_total": 0.4202675223350525, + "step": 167399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.9528436660766602, + "learning_rate": 4.527910427321755e-05, + "loss": 0.5027, + "step": 167400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.571860313415527, + "loss_rtd": 0.2504604756832123, + "loss_sent": 0.20218954980373383, + "loss_sod": 0.03219134360551834, + "loss_total": 0.48484134674072266, + "step": 167499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.189939975738525, + "loss_rtd": 0.2828872501850128, + "loss_sent": 0.1269908845424652, + "loss_sod": 0.005901483818888664, + "loss_total": 0.41577962040901184, + "step": 167499 + }, + { + "epoch": 0.015, + "grad_norm": 0.5682773590087891, + "learning_rate": 4.524751373057132e-05, + "loss": 0.5047, + "step": 167500 + }, + { + "epoch": 0.015198, + "loss_gen": 4.9674530029296875, + "loss_rtd": 0.23083911836147308, + "loss_sent": 4.700662975665182e-05, + "loss_sod": 0.12337478995323181, + "loss_total": 0.3542608916759491, + "step": 167599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.8469929695129395, + "loss_rtd": 0.23121331632137299, + "loss_sent": 3.6799596273340285e-05, + "loss_sod": 0.16222378611564636, + "loss_total": 0.39347389340400696, + "step": 167599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.1110172271728516, + "learning_rate": 4.521592510222601e-05, + "loss": 0.4965, + "step": 167600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.174350261688232, + "loss_rtd": 0.261982798576355, + "loss_sent": 0.1439199447631836, + "loss_sod": 0.10705357044935226, + "loss_total": 0.5129563212394714, + "step": 167699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.304527282714844, + "loss_rtd": 0.2757393717765808, + "loss_sent": 0.19051194190979004, + "loss_sod": 0.043662890791893005, + "loss_total": 0.509914219379425, + "step": 167699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.111415982246399, + "learning_rate": 4.518433840090549e-05, + "loss": 0.501, + "step": 167700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.373919486999512, + "loss_rtd": 0.25809842348098755, + "loss_sent": 0.28425711393356323, + "loss_sod": 0.07867135852575302, + "loss_total": 0.621026873588562, + "step": 167799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.168500900268555, + "loss_rtd": 0.25599929690361023, + "loss_sent": 0.3453138768672943, + "loss_sod": 0.029682748019695282, + "loss_total": 0.6309959292411804, + "step": 167799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.4504157304763794, + "learning_rate": 4.51527536393329e-05, + "loss": 0.5124, + "step": 167800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.251155853271484, + "loss_rtd": 0.2272103726863861, + "loss_sent": 0.0014535936061292887, + "loss_sod": 0.10316424071788788, + "loss_total": 0.33182820677757263, + "step": 167899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.352320671081543, + "loss_rtd": 0.2797969579696655, + "loss_sent": 0.24996641278266907, + "loss_sod": 0.02973112277686596, + "loss_total": 0.5594944953918457, + "step": 167899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.9157557487487793, + "learning_rate": 4.512117083023054e-05, + "loss": 0.4988, + "step": 167900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.048002243041992, + "loss_rtd": 0.22969184815883636, + "loss_sent": 0.2293103188276291, + "loss_sod": 0.019478891044855118, + "loss_total": 0.4784810543060303, + "step": 167999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.313672065734863, + "loss_rtd": 0.2619876563549042, + "loss_sent": 0.3204754590988159, + "loss_sod": 0.0028961533680558205, + "loss_total": 0.5853592753410339, + "step": 167999 + }, + { + "epoch": 0.016, + "grad_norm": 2.1524903774261475, + "learning_rate": 4.508958998632e-05, + "loss": 0.4873, + "step": 168000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4858248829841614, + "eval_runtime": 150.7798, + "eval_samples_per_second": 102.421, + "eval_steps_per_second": 0.802, + "step": 168000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.309937477111816, + "loss_rtd": 0.27981308102607727, + "loss_sent": 0.21948282420635223, + "loss_sod": 0.06391419470310211, + "loss_total": 0.563210129737854, + "step": 168099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.210428714752197, + "loss_rtd": 0.26737168431282043, + "loss_sent": 0.05904927849769592, + "loss_sod": 0.07481005787849426, + "loss_total": 0.4012310206890106, + "step": 168099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.7795582413673401, + "learning_rate": 4.505801112032202e-05, + "loss": 0.4954, + "step": 168100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.683972358703613, + "loss_rtd": 0.27294522523880005, + "loss_sent": 0.29927825927734375, + "loss_sod": 0.098269522190094, + "loss_total": 0.6704930067062378, + "step": 168199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.382657051086426, + "loss_rtd": 0.25094324350357056, + "loss_sent": 0.34905773401260376, + "loss_sod": 0.033952027559280396, + "loss_total": 0.6339529752731323, + "step": 168199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.7303627729415894, + "learning_rate": 4.502643424495658e-05, + "loss": 0.5193, + "step": 168200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.351790428161621, + "loss_rtd": 0.25140300393104553, + "loss_sent": 0.11114335060119629, + "loss_sod": 0.09140322357416153, + "loss_total": 0.45394957065582275, + "step": 168299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.532053470611572, + "loss_rtd": 0.2671525776386261, + "loss_sent": 0.24703453481197357, + "loss_sod": 0.01089945062994957, + "loss_total": 0.5250865817070007, + "step": 168299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.757623016834259, + "learning_rate": 4.499485937294282e-05, + "loss": 0.5073, + "step": 168300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.328295707702637, + "loss_rtd": 0.27258703112602234, + "loss_sent": 0.4864695966243744, + "loss_sod": 0.04786607250571251, + "loss_total": 0.8069226741790771, + "step": 168399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.371199131011963, + "loss_rtd": 0.28276970982551575, + "loss_sent": 0.24851186573505402, + "loss_sod": 0.04643501341342926, + "loss_total": 0.577716588973999, + "step": 168399 + }, + { + "epoch": 0.0168, + "grad_norm": 2.028629779815674, + "learning_rate": 4.4963286516999114e-05, + "loss": 0.4871, + "step": 168400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.465271472930908, + "loss_rtd": 0.2482798546552658, + "loss_sent": 6.206209218362346e-05, + "loss_sod": 0.22675487399101257, + "loss_total": 0.47509676218032837, + "step": 168499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.391423225402832, + "loss_rtd": 0.2677866518497467, + "loss_sent": 0.06388792395591736, + "loss_sod": 0.023668289184570312, + "loss_total": 0.3553428649902344, + "step": 168499 + }, + { + "epoch": 0.017, + "grad_norm": 1.0805388689041138, + "learning_rate": 4.4931715689843e-05, + "loss": 0.4799, + "step": 168500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.520055294036865, + "loss_rtd": 0.2823135256767273, + "loss_sent": 0.17781849205493927, + "loss_sod": 0.04104011505842209, + "loss_total": 0.5011721253395081, + "step": 168599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.64277982711792, + "loss_rtd": 0.27174386382102966, + "loss_sent": 0.39020928740501404, + "loss_sod": 0.07014597207307816, + "loss_total": 0.7320991158485413, + "step": 168599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.7376092672348022, + "learning_rate": 4.490014690419119e-05, + "loss": 0.502, + "step": 168600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.833861827850342, + "loss_rtd": 0.2307732254266739, + "loss_sent": 0.0436985120177269, + "loss_sod": 0.06335127353668213, + "loss_total": 0.3378230035305023, + "step": 168699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.434229850769043, + "loss_rtd": 0.2786445617675781, + "loss_sent": 0.2816252112388611, + "loss_sod": 0.04005259647965431, + "loss_total": 0.6003223657608032, + "step": 168699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.1425764560699463, + "learning_rate": 4.4868580172759605e-05, + "loss": 0.5021, + "step": 168700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.347623348236084, + "loss_rtd": 0.25195398926734924, + "loss_sent": 0.14637000858783722, + "loss_sod": 0.009355948306620121, + "loss_total": 0.4076799750328064, + "step": 168799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.267827033996582, + "loss_rtd": 0.2708859443664551, + "loss_sent": 0.38428178429603577, + "loss_sod": 0.0482797846198082, + "loss_total": 0.7034475207328796, + "step": 168799 + }, + { + "epoch": 0.0176, + "grad_norm": 2.426145553588867, + "learning_rate": 4.483701550826331e-05, + "loss": 0.5021, + "step": 168800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.149459362030029, + "loss_rtd": 0.24636590480804443, + "loss_sent": 0.05017540231347084, + "loss_sod": 0.02266959473490715, + "loss_total": 0.31921088695526123, + "step": 168899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.458242416381836, + "loss_rtd": 0.23602870106697083, + "loss_sent": 4.74040352855809e-05, + "loss_sod": 0.05188628286123276, + "loss_total": 0.287962406873703, + "step": 168899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.7804234623908997, + "learning_rate": 4.4805452923416554e-05, + "loss": 0.5093, + "step": 168900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.327387809753418, + "loss_rtd": 0.26298099756240845, + "loss_sent": 0.2780497968196869, + "loss_sod": 0.026178279891610146, + "loss_total": 0.5672090649604797, + "step": 168999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.3800129890441895, + "loss_rtd": 0.24750502407550812, + "loss_sent": 0.37032008171081543, + "loss_sod": 0.019295981153845787, + "loss_total": 0.6371210813522339, + "step": 168999 + }, + { + "epoch": 0.018, + "grad_norm": 1.1320600509643555, + "learning_rate": 4.477389243093273e-05, + "loss": 0.5065, + "step": 169000 + }, + { + "epoch": 0.018, + "eval_loss": 0.48476919531822205, + "eval_runtime": 150.7082, + "eval_samples_per_second": 102.47, + "eval_steps_per_second": 0.803, + "step": 169000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.689855575561523, + "loss_rtd": 0.27795419096946716, + "loss_sent": 0.18249726295471191, + "loss_sod": 0.07154709845781326, + "loss_total": 0.5319985151290894, + "step": 169099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.256010055541992, + "loss_rtd": 0.2706320583820343, + "loss_sent": 0.10705152153968811, + "loss_sod": 0.10184080898761749, + "loss_total": 0.4795244038105011, + "step": 169099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.9365662932395935, + "learning_rate": 4.4742334043524415e-05, + "loss": 0.4944, + "step": 169100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.54203462600708, + "loss_rtd": 0.2633820176124573, + "loss_sent": 0.17439861595630646, + "loss_sod": 0.12042141705751419, + "loss_total": 0.5582020282745361, + "step": 169199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.297516822814941, + "loss_rtd": 0.2625052034854889, + "loss_sent": 0.05075225606560707, + "loss_sod": 0.04686944559216499, + "loss_total": 0.36012691259384155, + "step": 169199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.8866571187973022, + "learning_rate": 4.471077777390331e-05, + "loss": 0.5, + "step": 169200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.463126182556152, + "loss_rtd": 0.2389262318611145, + "loss_sent": 0.000489122059661895, + "loss_sod": 0.17280098795890808, + "loss_total": 0.41221633553504944, + "step": 169299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.5432891845703125, + "loss_rtd": 0.26551342010498047, + "loss_sent": 0.21926134824752808, + "loss_sod": 0.07455364614725113, + "loss_total": 0.5593284368515015, + "step": 169299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.9207517504692078, + "learning_rate": 4.46792236347803e-05, + "loss": 0.5015, + "step": 169300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.219318389892578, + "loss_rtd": 0.24935311079025269, + "loss_sent": 0.10047516226768494, + "loss_sod": 0.008159383200109005, + "loss_total": 0.357987642288208, + "step": 169399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.312473297119141, + "loss_rtd": 0.26895448565483093, + "loss_sent": 0.2097213864326477, + "loss_sod": 0.061198119074106216, + "loss_total": 0.5398739576339722, + "step": 169399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.3753437995910645, + "learning_rate": 4.464767163886536e-05, + "loss": 0.5024, + "step": 169400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.333802700042725, + "loss_rtd": 0.25046104192733765, + "loss_sent": 0.1616601198911667, + "loss_sod": 0.03301946818828583, + "loss_total": 0.44514065980911255, + "step": 169499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.260376930236816, + "loss_rtd": 0.26193368434906006, + "loss_sent": 0.22181886434555054, + "loss_sod": 0.06733492016792297, + "loss_total": 0.551087498664856, + "step": 169499 + }, + { + "epoch": 0.019, + "grad_norm": 0.9431465268135071, + "learning_rate": 4.461612179886766e-05, + "loss": 0.494, + "step": 169500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.238470077514648, + "loss_rtd": 0.27454906702041626, + "loss_sent": 0.16887615621089935, + "loss_sod": 0.09713076800107956, + "loss_total": 0.540556013584137, + "step": 169599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.788388252258301, + "loss_rtd": 0.23497284948825836, + "loss_sent": 0.031695254147052765, + "loss_sod": 0.12693434953689575, + "loss_total": 0.3936024308204651, + "step": 169599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.3158783912658691, + "learning_rate": 4.458457412749544e-05, + "loss": 0.4959, + "step": 169600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.679224967956543, + "loss_rtd": 0.24525420367717743, + "loss_sent": 6.181051867315546e-05, + "loss_sod": 0.21537888050079346, + "loss_total": 0.46069490909576416, + "step": 169699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.540152549743652, + "loss_rtd": 0.24027235805988312, + "loss_sent": 0.12506097555160522, + "loss_sod": 0.14578180015087128, + "loss_total": 0.5111151337623596, + "step": 169699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.4084185361862183, + "learning_rate": 4.455302863745613e-05, + "loss": 0.513, + "step": 169700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.2467041015625, + "loss_rtd": 0.2628336250782013, + "loss_sent": 0.07544388622045517, + "loss_sod": 0.029422901570796967, + "loss_total": 0.3677004277706146, + "step": 169799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.480223178863525, + "loss_rtd": 0.26126691699028015, + "loss_sent": 0.6259580254554749, + "loss_sod": 0.08103892207145691, + "loss_total": 0.9682638645172119, + "step": 169799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.288264274597168, + "learning_rate": 4.4521485341456216e-05, + "loss": 0.5023, + "step": 169800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.5037970542907715, + "loss_rtd": 0.27007076144218445, + "loss_sent": 0.032417405396699905, + "loss_sod": 0.06645283102989197, + "loss_total": 0.3689410090446472, + "step": 169899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.211860179901123, + "loss_rtd": 0.2855651080608368, + "loss_sent": 0.0825471356511116, + "loss_sod": 0.05850375443696976, + "loss_total": 0.42661598324775696, + "step": 169899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.0605272054672241, + "learning_rate": 4.4489944252201366e-05, + "loss": 0.4976, + "step": 169900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.047159194946289, + "loss_rtd": 0.2544962763786316, + "loss_sent": 0.001588489511050284, + "loss_sod": 0.20521697402000427, + "loss_total": 0.4613017439842224, + "step": 169999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.681049823760986, + "loss_rtd": 0.23406003415584564, + "loss_sent": 0.00014909173478372395, + "loss_sod": 0.13247479498386383, + "loss_total": 0.3666839301586151, + "step": 169999 + }, + { + "epoch": 0.02, + "grad_norm": 1.2742552757263184, + "learning_rate": 4.44584053823963e-05, + "loss": 0.5199, + "step": 170000 + }, + { + "epoch": 0.02, + "eval_loss": 0.4731971323490143, + "eval_runtime": 150.8635, + "eval_samples_per_second": 102.364, + "eval_steps_per_second": 0.802, + "step": 170000 + }, + { + "epoch": 0.020198, + "loss_gen": 4.544429779052734, + "loss_rtd": 0.23796328902244568, + "loss_sent": 8.894162601791322e-05, + "loss_sod": 0.13107258081436157, + "loss_total": 0.36912479996681213, + "step": 170099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.360555648803711, + "loss_rtd": 0.281270295381546, + "loss_sent": 0.14059801399707794, + "loss_sod": 0.04083305224776268, + "loss_total": 0.46270138025283813, + "step": 170099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.9566983580589294, + "learning_rate": 4.4426868744744895e-05, + "loss": 0.5035, + "step": 170100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.065962791442871, + "loss_rtd": 0.26063400506973267, + "loss_sent": 0.04239511117339134, + "loss_sod": 0.03537307679653168, + "loss_total": 0.3384022116661072, + "step": 170199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.020777702331543, + "loss_rtd": 0.28646722435951233, + "loss_sent": 0.05823962390422821, + "loss_sod": 0.06766140460968018, + "loss_total": 0.4123682379722595, + "step": 170199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.9063208699226379, + "learning_rate": 4.439533435195009e-05, + "loss": 0.4982, + "step": 170200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.676692962646484, + "loss_rtd": 0.24276001751422882, + "loss_sent": 3.965487121604383e-05, + "loss_sod": 0.06598778814077377, + "loss_total": 0.30878746509552, + "step": 170299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.554323196411133, + "loss_rtd": 0.2563796043395996, + "loss_sent": 0.5975649952888489, + "loss_sod": 0.16358384490013123, + "loss_total": 1.0175284147262573, + "step": 170299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.785658597946167, + "learning_rate": 4.436380221671393e-05, + "loss": 0.4946, + "step": 170300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.870201587677002, + "loss_rtd": 0.27057531476020813, + "loss_sent": 0.13948950171470642, + "loss_sod": 0.044493697583675385, + "loss_total": 0.45455852150917053, + "step": 170399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.158511638641357, + "loss_rtd": 0.2867234945297241, + "loss_sent": 0.05372565984725952, + "loss_sod": 0.038248710334300995, + "loss_total": 0.37869787216186523, + "step": 170399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8261120915412903, + "learning_rate": 4.433227235173757e-05, + "loss": 0.5119, + "step": 170400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.268481731414795, + "loss_rtd": 0.25351619720458984, + "loss_sent": 0.08443231880664825, + "loss_sod": 0.007908103987574577, + "loss_total": 0.34585660696029663, + "step": 170499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.773632526397705, + "loss_rtd": 0.27786916494369507, + "loss_sent": 0.052542515099048615, + "loss_sod": 0.06800629198551178, + "loss_total": 0.39841794967651367, + "step": 170499 + }, + { + "epoch": 0.021, + "grad_norm": 0.6295098066329956, + "learning_rate": 4.430074476972122e-05, + "loss": 0.4798, + "step": 170500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.5034284591674805, + "loss_rtd": 0.2871614396572113, + "loss_sent": 0.08345489948987961, + "loss_sod": 0.1271916627883911, + "loss_total": 0.4978080093860626, + "step": 170599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.310825347900391, + "loss_rtd": 0.27726760506629944, + "loss_sent": 0.20777426660060883, + "loss_sod": 0.0776047632098198, + "loss_total": 0.5626466274261475, + "step": 170599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.0804089307785034, + "learning_rate": 4.426921948336421e-05, + "loss": 0.4862, + "step": 170600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.445140838623047, + "loss_rtd": 0.2714994549751282, + "loss_sent": 0.17374204099178314, + "loss_sod": 0.029518041759729385, + "loss_total": 0.4747595191001892, + "step": 170699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.439637660980225, + "loss_rtd": 0.2526755630970001, + "loss_sent": 0.021121647208929062, + "loss_sod": 0.027989905327558517, + "loss_total": 0.3017871081829071, + "step": 170699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.5975282788276672, + "learning_rate": 4.423769650536489e-05, + "loss": 0.4932, + "step": 170700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.500450134277344, + "loss_rtd": 0.2770961821079254, + "loss_sent": 0.2747577726840973, + "loss_sod": 0.10403533279895782, + "loss_total": 0.6558892726898193, + "step": 170799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.309975624084473, + "loss_rtd": 0.2784741520881653, + "loss_sent": 0.1761515885591507, + "loss_sod": 0.04092315584421158, + "loss_total": 0.49554890394210815, + "step": 170799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.0437424182891846, + "learning_rate": 4.420617584842074e-05, + "loss": 0.5029, + "step": 170800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.936534881591797, + "loss_rtd": 0.24867531657218933, + "loss_sent": 0.042488064616918564, + "loss_sod": 0.10822486132383347, + "loss_total": 0.39938825368881226, + "step": 170899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.563939094543457, + "loss_rtd": 0.25593051314353943, + "loss_sent": 0.21076306700706482, + "loss_sod": 0.04818826913833618, + "loss_total": 0.5148818492889404, + "step": 170899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.859067440032959, + "learning_rate": 4.4174657525228256e-05, + "loss": 0.4988, + "step": 170900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.604702949523926, + "loss_rtd": 0.25139927864074707, + "loss_sent": 0.0008354461169801652, + "loss_sod": 0.1703825294971466, + "loss_total": 0.42261725664138794, + "step": 170999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.240743637084961, + "loss_rtd": 0.27437034249305725, + "loss_sent": 0.2517975866794586, + "loss_sod": 0.06107909604907036, + "loss_total": 0.5872470140457153, + "step": 170999 + }, + { + "epoch": 0.022, + "grad_norm": 1.3069241046905518, + "learning_rate": 4.414314154848304e-05, + "loss": 0.4868, + "step": 171000 + }, + { + "epoch": 0.022, + "eval_loss": 0.476881742477417, + "eval_runtime": 152.174, + "eval_samples_per_second": 101.483, + "eval_steps_per_second": 0.795, + "step": 171000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.357883930206299, + "loss_rtd": 0.26406019926071167, + "loss_sent": 0.12234348803758621, + "loss_sod": 0.05356413125991821, + "loss_total": 0.4399678111076355, + "step": 171099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.282578468322754, + "loss_rtd": 0.2464466094970703, + "loss_sent": 6.460111035266891e-05, + "loss_sod": 0.19680382311344147, + "loss_total": 0.4433150291442871, + "step": 171099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.176934003829956, + "learning_rate": 4.4111627930879695e-05, + "loss": 0.4837, + "step": 171100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.41828727722168, + "loss_rtd": 0.26423409581184387, + "loss_sent": 0.1900784969329834, + "loss_sod": 0.06894370913505554, + "loss_total": 0.5232563018798828, + "step": 171199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.591996669769287, + "loss_rtd": 0.2510789930820465, + "loss_sent": 0.06349091231822968, + "loss_sod": 0.0813186913728714, + "loss_total": 0.39588862657546997, + "step": 171199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.8740753531455994, + "learning_rate": 4.408011668511192e-05, + "loss": 0.4891, + "step": 171200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.379201412200928, + "loss_rtd": 0.2552388906478882, + "loss_sent": 0.10088750720024109, + "loss_sod": 0.12585505843162537, + "loss_total": 0.48198145627975464, + "step": 171299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.958032608032227, + "loss_rtd": 0.24648171663284302, + "loss_sent": 0.025942375883460045, + "loss_sod": 0.027695482596755028, + "loss_total": 0.3001195788383484, + "step": 171299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.8427684903144836, + "learning_rate": 4.404860782387243e-05, + "loss": 0.4992, + "step": 171300 + }, + { + "epoch": 0.022798, + "loss_gen": 4.767716884613037, + "loss_rtd": 0.24949835240840912, + "loss_sent": 0.012483463622629642, + "loss_sod": 0.12245252728462219, + "loss_total": 0.3844343423843384, + "step": 171399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.250916004180908, + "loss_rtd": 0.2629123628139496, + "loss_sent": 0.1811651885509491, + "loss_sod": 0.035274289548397064, + "loss_total": 0.47935184836387634, + "step": 171399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.0522243976593018, + "learning_rate": 4.401710135985301e-05, + "loss": 0.5094, + "step": 171400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.062880516052246, + "loss_rtd": 0.24714313447475433, + "loss_sent": 0.0644451156258583, + "loss_sod": 0.09545697271823883, + "loss_total": 0.4070452153682709, + "step": 171499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.03845739364624, + "loss_rtd": 0.2493828982114792, + "loss_sent": 0.05711861327290535, + "loss_sod": 0.13982105255126953, + "loss_total": 0.44632259011268616, + "step": 171499 + }, + { + "epoch": 0.023, + "grad_norm": 1.0879409313201904, + "learning_rate": 4.398559730574443e-05, + "loss": 0.5012, + "step": 171500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.422330379486084, + "loss_rtd": 0.26781049370765686, + "loss_sent": 0.10690589994192123, + "loss_sod": 0.03012928180396557, + "loss_total": 0.4048456847667694, + "step": 171599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.345040321350098, + "loss_rtd": 0.27778422832489014, + "loss_sent": 0.19501087069511414, + "loss_sod": 0.07717195153236389, + "loss_total": 0.5499670505523682, + "step": 171599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.8802943229675293, + "learning_rate": 4.395409567423655e-05, + "loss": 0.5057, + "step": 171600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.491530895233154, + "loss_rtd": 0.2580206096172333, + "loss_sent": 0.23020225763320923, + "loss_sod": 0.06939679384231567, + "loss_total": 0.5576196908950806, + "step": 171699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.020575523376465, + "loss_rtd": 0.2541915774345398, + "loss_sent": 0.3434763252735138, + "loss_sod": 0.028169114142656326, + "loss_total": 0.6258370280265808, + "step": 171699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.243648886680603, + "learning_rate": 4.3922596478018207e-05, + "loss": 0.5004, + "step": 171700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.0843825340271, + "loss_rtd": 0.2561759352684021, + "loss_sent": 0.1246255487203598, + "loss_sod": 0.025484293699264526, + "loss_total": 0.40628576278686523, + "step": 171799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.655697345733643, + "loss_rtd": 0.24287694692611694, + "loss_sent": 0.17661504447460175, + "loss_sod": 0.05493897944688797, + "loss_total": 0.47443097829818726, + "step": 171799 + }, + { + "epoch": 0.0236, + "grad_norm": 2.0405614376068115, + "learning_rate": 4.389109972977727e-05, + "loss": 0.4722, + "step": 171800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.065279483795166, + "loss_rtd": 0.2325080782175064, + "loss_sent": 0.0006211738218553364, + "loss_sod": 0.08426440507173538, + "loss_total": 0.3173936605453491, + "step": 171899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.632493019104004, + "loss_rtd": 0.2621689736843109, + "loss_sent": 0.3588075339794159, + "loss_sod": 0.0310895387083292, + "loss_total": 0.6520660519599915, + "step": 171899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.0510684251785278, + "learning_rate": 4.385960544220064e-05, + "loss": 0.491, + "step": 171900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.561727046966553, + "loss_rtd": 0.2848459780216217, + "loss_sent": 0.513619065284729, + "loss_sod": 0.06750833243131638, + "loss_total": 0.8659733533859253, + "step": 171999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.594300746917725, + "loss_rtd": 0.25993654131889343, + "loss_sent": 0.20714308321475983, + "loss_sod": 0.029170267283916473, + "loss_total": 0.4962499141693115, + "step": 171999 + }, + { + "epoch": 0.024, + "grad_norm": 1.7951167821884155, + "learning_rate": 4.382811362797419e-05, + "loss": 0.501, + "step": 172000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4790232181549072, + "eval_runtime": 150.8946, + "eval_samples_per_second": 102.343, + "eval_steps_per_second": 0.802, + "step": 172000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.671760559082031, + "loss_rtd": 0.29092535376548767, + "loss_sent": 0.26654955744743347, + "loss_sod": 0.0813579335808754, + "loss_total": 0.6388328075408936, + "step": 172099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.701687335968018, + "loss_rtd": 0.24037468433380127, + "loss_sent": 0.363858163356781, + "loss_sod": 0.05815407633781433, + "loss_total": 0.6623868942260742, + "step": 172099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.230000615119934, + "learning_rate": 4.379662429978285e-05, + "loss": 0.505, + "step": 172100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.224126815795898, + "loss_rtd": 0.2466423064470291, + "loss_sent": 0.5247039198875427, + "loss_sod": 0.0947648212313652, + "loss_total": 0.8661110401153564, + "step": 172199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.056763172149658, + "loss_rtd": 0.25852951407432556, + "loss_sent": 7.622349949087948e-05, + "loss_sod": 0.21016332507133484, + "loss_total": 0.4687690734863281, + "step": 172199 + }, + { + "epoch": 0.0244, + "grad_norm": 2.1720242500305176, + "learning_rate": 4.376513747031048e-05, + "loss": 0.4989, + "step": 172200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.5568108558654785, + "loss_rtd": 0.266933411359787, + "loss_sent": 0.1446159929037094, + "loss_sod": 0.05012991651892662, + "loss_total": 0.4616793394088745, + "step": 172299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.212480545043945, + "loss_rtd": 0.2525312304496765, + "loss_sent": 0.08446284383535385, + "loss_sod": 0.0036936099641025066, + "loss_total": 0.34068769216537476, + "step": 172299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.7302485704421997, + "learning_rate": 4.373365315224001e-05, + "loss": 0.4953, + "step": 172300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.336203098297119, + "loss_rtd": 0.2691044509410858, + "loss_sent": 0.19347639381885529, + "loss_sod": 0.012682327069342136, + "loss_total": 0.47526317834854126, + "step": 172399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.143266677856445, + "loss_rtd": 0.2894757390022278, + "loss_sent": 0.20344778895378113, + "loss_sod": 0.054581418633461, + "loss_total": 0.5475049018859863, + "step": 172399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.216090202331543, + "learning_rate": 4.370217135825329e-05, + "loss": 0.4782, + "step": 172400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.914358615875244, + "loss_rtd": 0.2621120810508728, + "loss_sent": 0.21592973172664642, + "loss_sod": 0.050652455538511276, + "loss_total": 0.5286942720413208, + "step": 172499 + }, + { + "epoch": 0.024998, + "loss_gen": 4.816646099090576, + "loss_rtd": 0.24619466066360474, + "loss_sent": 0.039638955146074295, + "loss_sod": 0.17656829953193665, + "loss_total": 0.4624019265174866, + "step": 172499 + }, + { + "epoch": 0.025, + "grad_norm": 1.1692919731140137, + "learning_rate": 4.3670692101031196e-05, + "loss": 0.4834, + "step": 172500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.635056972503662, + "loss_rtd": 0.2667495310306549, + "loss_sent": 0.14194297790527344, + "loss_sod": 0.008958159014582634, + "loss_total": 0.41765066981315613, + "step": 172599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.464961051940918, + "loss_rtd": 0.25216853618621826, + "loss_sent": 0.46573954820632935, + "loss_sod": 0.007270245812833309, + "loss_total": 0.7251783609390259, + "step": 172599 + }, + { + "epoch": 0.0252, + "grad_norm": 2.3285279273986816, + "learning_rate": 4.363921539325356e-05, + "loss": 0.5006, + "step": 172600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.531739234924316, + "loss_rtd": 0.2669811248779297, + "loss_sent": 0.37888938188552856, + "loss_sod": 0.13690614700317383, + "loss_total": 0.7827766537666321, + "step": 172699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.290124416351318, + "loss_rtd": 0.2700974643230438, + "loss_sent": 0.20026355981826782, + "loss_sod": 0.05382388457655907, + "loss_total": 0.5241849422454834, + "step": 172699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.828365445137024, + "learning_rate": 4.360774124759922e-05, + "loss": 0.501, + "step": 172700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.625349521636963, + "loss_rtd": 0.2544606924057007, + "loss_sent": 0.1294373720884323, + "loss_sod": 0.02067280374467373, + "loss_total": 0.40457087755203247, + "step": 172799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.215242862701416, + "loss_rtd": 0.2510789632797241, + "loss_sent": 0.10542890429496765, + "loss_sod": 0.06537258625030518, + "loss_total": 0.42188045382499695, + "step": 172799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.9845788478851318, + "learning_rate": 4.357626967674593e-05, + "loss": 0.4907, + "step": 172800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.444211006164551, + "loss_rtd": 0.24663518369197845, + "loss_sent": 0.15665069222450256, + "loss_sod": 0.02020771987736225, + "loss_total": 0.4234935939311981, + "step": 172899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.450448036193848, + "loss_rtd": 0.26696035265922546, + "loss_sent": 0.23775306344032288, + "loss_sod": 0.049146123230457306, + "loss_total": 0.553859531879425, + "step": 172899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.5564359426498413, + "learning_rate": 4.354480069337045e-05, + "loss": 0.4727, + "step": 172900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.63621711730957, + "loss_rtd": 0.2305292934179306, + "loss_sent": 0.02157679945230484, + "loss_sod": 0.13029345870018005, + "loss_total": 0.3823995292186737, + "step": 172999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.68080997467041, + "loss_rtd": 0.25236624479293823, + "loss_sent": 3.4659493394428864e-05, + "loss_sod": 0.10431516170501709, + "loss_total": 0.3567160665988922, + "step": 172999 + }, + { + "epoch": 0.026, + "grad_norm": 0.7032691240310669, + "learning_rate": 4.351333431014847e-05, + "loss": 0.492, + "step": 173000 + }, + { + "epoch": 0.026, + "eval_loss": 0.4772503674030304, + "eval_runtime": 150.7744, + "eval_samples_per_second": 102.425, + "eval_steps_per_second": 0.803, + "step": 173000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.988316059112549, + "loss_rtd": 0.2611532509326935, + "loss_sent": 0.02122604288160801, + "loss_sod": 0.11611946672201157, + "loss_total": 0.3984987735748291, + "step": 173099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.792057991027832, + "loss_rtd": 0.23634378612041473, + "loss_sent": 0.0003018920833710581, + "loss_sod": 0.08330892026424408, + "loss_total": 0.31995460391044617, + "step": 173099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.8967685699462891, + "learning_rate": 4.348187053975467e-05, + "loss": 0.5081, + "step": 173100 + }, + { + "epoch": 0.026398, + "loss_gen": 4.4217848777771, + "loss_rtd": 0.22614915668964386, + "loss_sent": 0.007760841865092516, + "loss_sod": 0.10313044488430023, + "loss_total": 0.3370404541492462, + "step": 173199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.512134552001953, + "loss_rtd": 0.2666032314300537, + "loss_sent": 0.16551974415779114, + "loss_sod": 0.03663212060928345, + "loss_total": 0.4687550961971283, + "step": 173199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.209341049194336, + "learning_rate": 4.3450409394862614e-05, + "loss": 0.4981, + "step": 173200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.36017370223999, + "loss_rtd": 0.24220968782901764, + "loss_sent": 0.19550010561943054, + "loss_sod": 0.04168718680739403, + "loss_total": 0.4793969690799713, + "step": 173299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.390031337738037, + "loss_rtd": 0.25432056188583374, + "loss_sent": 0.27294614911079407, + "loss_sod": 0.05045618116855621, + "loss_total": 0.5777229070663452, + "step": 173299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.0157606601715088, + "learning_rate": 4.341895088814489e-05, + "loss": 0.4785, + "step": 173300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.332516670227051, + "loss_rtd": 0.2581424117088318, + "loss_sent": 0.0722486823797226, + "loss_sod": 0.14761140942573547, + "loss_total": 0.47800248861312866, + "step": 173399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.863432884216309, + "loss_rtd": 0.2590324878692627, + "loss_sent": 0.06837914884090424, + "loss_sod": 0.12218590825796127, + "loss_total": 0.4495975375175476, + "step": 173399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.0803948640823364, + "learning_rate": 4.338749503227296e-05, + "loss": 0.5071, + "step": 173400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.350805759429932, + "loss_rtd": 0.25680214166641235, + "loss_sent": 0.37348273396492004, + "loss_sod": 0.018537752330303192, + "loss_total": 0.6488226652145386, + "step": 173499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.221343040466309, + "loss_rtd": 0.2781735360622406, + "loss_sent": 0.13024096190929413, + "loss_sod": 0.010937447659671307, + "loss_total": 0.4193519353866577, + "step": 173499 + }, + { + "epoch": 0.027, + "grad_norm": 0.9971233606338501, + "learning_rate": 4.335604183991723e-05, + "loss": 0.498, + "step": 173500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.203567981719971, + "loss_rtd": 0.25005918741226196, + "loss_sent": 0.07693670690059662, + "loss_sod": 0.07172539830207825, + "loss_total": 0.39872127771377563, + "step": 173599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.419286251068115, + "loss_rtd": 0.2811583876609802, + "loss_sent": 0.5652903318405151, + "loss_sod": 0.02947412058711052, + "loss_total": 0.8759227991104126, + "step": 173599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.73147714138031, + "learning_rate": 4.332459132374707e-05, + "loss": 0.5077, + "step": 173600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.219594955444336, + "loss_rtd": 0.28303781151771545, + "loss_sent": 0.08994757384061813, + "loss_sod": 0.046749409288167953, + "loss_total": 0.41973480582237244, + "step": 173699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.612659931182861, + "loss_rtd": 0.27426472306251526, + "loss_sent": 0.22041532397270203, + "loss_sod": 0.026184238493442535, + "loss_total": 0.5208642482757568, + "step": 173699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.706559956073761, + "learning_rate": 4.32931434964307e-05, + "loss": 0.5095, + "step": 173700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.5038838386535645, + "loss_rtd": 0.22788257896900177, + "loss_sent": 0.02189287357032299, + "loss_sod": 0.0732315331697464, + "loss_total": 0.3230069875717163, + "step": 173799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.356247425079346, + "loss_rtd": 0.24853888154029846, + "loss_sent": 0.10135073214769363, + "loss_sod": 0.030710332095623016, + "loss_total": 0.3805999457836151, + "step": 173799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8105595707893372, + "learning_rate": 4.3261698370635354e-05, + "loss": 0.5031, + "step": 173800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.5633416175842285, + "loss_rtd": 0.23870515823364258, + "loss_sent": 0.006563057191669941, + "loss_sod": 0.06918448954820633, + "loss_total": 0.3144527077674866, + "step": 173899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.257259845733643, + "loss_rtd": 0.27255484461784363, + "loss_sent": 0.5081937313079834, + "loss_sod": 0.013570001348853111, + "loss_total": 0.7943185567855835, + "step": 173899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.7526987791061401, + "learning_rate": 4.3230255959027076e-05, + "loss": 0.5088, + "step": 173900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.683145999908447, + "loss_rtd": 0.25262314081192017, + "loss_sent": 0.12697406113147736, + "loss_sod": 0.10950789600610733, + "loss_total": 0.48910510540008545, + "step": 173999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.555135250091553, + "loss_rtd": 0.2704862654209137, + "loss_sent": 0.12093016505241394, + "loss_sod": 0.045832931995391846, + "loss_total": 0.4372493624687195, + "step": 173999 + }, + { + "epoch": 0.028, + "grad_norm": 0.8709926009178162, + "learning_rate": 4.31988162742709e-05, + "loss": 0.4925, + "step": 174000 + }, + { + "epoch": 0.028, + "eval_loss": 0.47378090023994446, + "eval_runtime": 151.0282, + "eval_samples_per_second": 102.252, + "eval_steps_per_second": 0.801, + "step": 174000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.1633710861206055, + "loss_rtd": 0.28782790899276733, + "loss_sent": 0.2325538694858551, + "loss_sod": 0.07180501520633698, + "loss_total": 0.5921868085861206, + "step": 174099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.622278690338135, + "loss_rtd": 0.26133668422698975, + "loss_sent": 0.16414345800876617, + "loss_sod": 0.04548267275094986, + "loss_total": 0.4709628224372864, + "step": 174099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.924146831035614, + "learning_rate": 4.316737932903071e-05, + "loss": 0.4921, + "step": 174100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.44661808013916, + "loss_rtd": 0.24095293879508972, + "loss_sent": 0.2738695740699768, + "loss_sod": 0.07141793519258499, + "loss_total": 0.5862404108047485, + "step": 174199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.276996612548828, + "loss_rtd": 0.2431168407201767, + "loss_sent": 0.2242770493030548, + "loss_sod": 0.017125261947512627, + "loss_total": 0.4845191538333893, + "step": 174199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.4760338068008423, + "learning_rate": 4.313594513596932e-05, + "loss": 0.514, + "step": 174200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.371870994567871, + "loss_rtd": 0.2607460021972656, + "loss_sent": 0.16423146426677704, + "loss_sod": 0.030310600996017456, + "loss_total": 0.4552880525588989, + "step": 174299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.095975875854492, + "loss_rtd": 0.23831218481063843, + "loss_sent": 0.011028303764760494, + "loss_sod": 0.10951460897922516, + "loss_total": 0.35885506868362427, + "step": 174299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.9950005412101746, + "learning_rate": 4.31045137077484e-05, + "loss": 0.4863, + "step": 174300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.386153221130371, + "loss_rtd": 0.2543492317199707, + "loss_sent": 0.08969032764434814, + "loss_sod": 0.12441924959421158, + "loss_total": 0.46845880150794983, + "step": 174399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.415390968322754, + "loss_rtd": 0.2741059958934784, + "loss_sent": 0.18275922536849976, + "loss_sod": 0.10143411159515381, + "loss_total": 0.5582993030548096, + "step": 174399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.3448522090911865, + "learning_rate": 4.307308505702853e-05, + "loss": 0.508, + "step": 174400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.306718349456787, + "loss_rtd": 0.2631789445877075, + "loss_sent": 0.18015454709529877, + "loss_sod": 0.025235353037714958, + "loss_total": 0.4685688614845276, + "step": 174499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.371037006378174, + "loss_rtd": 0.2609129250049591, + "loss_sent": 0.03724387660622597, + "loss_sod": 0.019226595759391785, + "loss_total": 0.31738337874412537, + "step": 174499 + }, + { + "epoch": 0.029, + "grad_norm": 1.099623680114746, + "learning_rate": 4.3041659196469176e-05, + "loss": 0.482, + "step": 174500 + }, + { + "epoch": 0.029198, + "loss_gen": 4.8854498863220215, + "loss_rtd": 0.2525550127029419, + "loss_sent": 0.03448490798473358, + "loss_sod": 0.08429908752441406, + "loss_total": 0.37133902311325073, + "step": 174599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.215135097503662, + "loss_rtd": 0.2650979161262512, + "loss_sent": 0.05643727630376816, + "loss_sod": 0.13042433559894562, + "loss_total": 0.4519595205783844, + "step": 174599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.1190494298934937, + "learning_rate": 4.301023613872867e-05, + "loss": 0.5, + "step": 174600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.834264278411865, + "loss_rtd": 0.24064302444458008, + "loss_sent": 0.00012111241812817752, + "loss_sod": 0.18033990263938904, + "loss_total": 0.4211040437221527, + "step": 174699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.916804313659668, + "loss_rtd": 0.23712149262428284, + "loss_sent": 0.11205428093671799, + "loss_sod": 0.05833221226930618, + "loss_total": 0.407507985830307, + "step": 174699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.8764186501502991, + "learning_rate": 4.2978815896464195e-05, + "loss": 0.4956, + "step": 174700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.5481743812561035, + "loss_rtd": 0.2715030908584595, + "loss_sent": 0.4839355945587158, + "loss_sod": 0.13712286949157715, + "loss_total": 0.8925615549087524, + "step": 174799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.568472385406494, + "loss_rtd": 0.26322945952415466, + "loss_sent": 0.2807634174823761, + "loss_sod": 0.057357851415872574, + "loss_total": 0.601350724697113, + "step": 174799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.5966711044311523, + "learning_rate": 4.2947398482331856e-05, + "loss": 0.5002, + "step": 174800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.3092041015625, + "loss_rtd": 0.27356022596359253, + "loss_sent": 0.11498356610536575, + "loss_sod": 0.05010610073804855, + "loss_total": 0.43864989280700684, + "step": 174899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.309630870819092, + "loss_rtd": 0.2655850350856781, + "loss_sent": 0.03646130859851837, + "loss_sod": 0.07756256312131882, + "loss_total": 0.3796089291572571, + "step": 174899 + }, + { + "epoch": 0.0298, + "grad_norm": 0.6131798028945923, + "learning_rate": 4.291598390898657e-05, + "loss": 0.4947, + "step": 174900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.312253475189209, + "loss_rtd": 0.25848621129989624, + "loss_sent": 0.141913041472435, + "loss_sod": 0.030754581093788147, + "loss_total": 0.4311538338661194, + "step": 174999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.19513463973999, + "loss_rtd": 0.24765127897262573, + "loss_sent": 0.46720418334007263, + "loss_sod": 0.04283073544502258, + "loss_total": 0.757686197757721, + "step": 174999 + }, + { + "epoch": 0.03, + "grad_norm": 1.4432408809661865, + "learning_rate": 4.28845721890821e-05, + "loss": 0.509, + "step": 175000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4798588752746582, + "eval_runtime": 150.7622, + "eval_samples_per_second": 102.433, + "eval_steps_per_second": 0.803, + "step": 175000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.340457439422607, + "loss_rtd": 0.27221396565437317, + "loss_sent": 0.3090207278728485, + "loss_sod": 0.023055512458086014, + "loss_total": 0.604290246963501, + "step": 175099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.338098049163818, + "loss_rtd": 0.26297610998153687, + "loss_sent": 0.11111314594745636, + "loss_sod": 0.07260318845510483, + "loss_total": 0.44669246673583984, + "step": 175099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.9103385210037231, + "learning_rate": 4.2853163335271115e-05, + "loss": 0.5014, + "step": 175100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.2835564613342285, + "loss_rtd": 0.25003665685653687, + "loss_sent": 0.21480363607406616, + "loss_sod": 0.055279094725847244, + "loss_total": 0.5201194286346436, + "step": 175199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.523531436920166, + "loss_rtd": 0.2706020474433899, + "loss_sent": 0.15565568208694458, + "loss_sod": 0.01964074932038784, + "loss_total": 0.44589847326278687, + "step": 175199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.6668602824211121, + "learning_rate": 4.282175736020507e-05, + "loss": 0.4832, + "step": 175200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.030451774597168, + "loss_rtd": 0.2588375210762024, + "loss_sent": 0.01697034016251564, + "loss_sod": 0.13009503483772278, + "loss_total": 0.4059028923511505, + "step": 175299 + }, + { + "epoch": 0.030598, + "loss_gen": 4.95477819442749, + "loss_rtd": 0.24549439549446106, + "loss_sent": 0.00011641019955277443, + "loss_sod": 0.15176764130592346, + "loss_total": 0.39737844467163086, + "step": 175299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.9259080290794373, + "learning_rate": 4.279035427653431e-05, + "loss": 0.5048, + "step": 175300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.685121059417725, + "loss_rtd": 0.26787543296813965, + "loss_sent": 0.08151499927043915, + "loss_sod": 0.04920577257871628, + "loss_total": 0.39859622716903687, + "step": 175399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.650103569030762, + "loss_rtd": 0.2859199345111847, + "loss_sent": 0.11438175290822983, + "loss_sod": 0.14111463725566864, + "loss_total": 0.5414162874221802, + "step": 175399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.007842779159546, + "learning_rate": 4.275895409690798e-05, + "loss": 0.4863, + "step": 175400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.223883628845215, + "loss_rtd": 0.24045738577842712, + "loss_sent": 0.18959611654281616, + "loss_sod": 0.03346420079469681, + "loss_total": 0.4635176956653595, + "step": 175499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.113461017608643, + "loss_rtd": 0.251642107963562, + "loss_sent": 0.15249517560005188, + "loss_sod": 0.10661154985427856, + "loss_total": 0.5107488632202148, + "step": 175499 + }, + { + "epoch": 0.031, + "grad_norm": 1.7224534749984741, + "learning_rate": 4.272755683397408e-05, + "loss": 0.5096, + "step": 175500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.483402252197266, + "loss_rtd": 0.2544477581977844, + "loss_sent": 0.15212436020374298, + "loss_sod": 0.18544356524944305, + "loss_total": 0.5920156836509705, + "step": 175599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.230832099914551, + "loss_rtd": 0.2887153923511505, + "loss_sent": 0.17754162847995758, + "loss_sod": 0.0485423281788826, + "loss_total": 0.5147993564605713, + "step": 175599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.8059684038162231, + "learning_rate": 4.269616250037941e-05, + "loss": 0.5049, + "step": 175600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.94041109085083, + "loss_rtd": 0.2362675815820694, + "loss_sent": 3.4851342206820846e-05, + "loss_sod": 0.13205501437187195, + "loss_total": 0.36835744976997375, + "step": 175699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.082764148712158, + "loss_rtd": 0.24880072474479675, + "loss_sent": 5.622784374281764e-05, + "loss_sod": 0.10536079853773117, + "loss_total": 0.3542177677154541, + "step": 175699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.0555180311203003, + "learning_rate": 4.266477110876963e-05, + "loss": 0.4887, + "step": 175700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.065995693206787, + "loss_rtd": 0.26552313566207886, + "loss_sent": 0.29504725337028503, + "loss_sod": 0.050535283982753754, + "loss_total": 0.6111056804656982, + "step": 175799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.396198749542236, + "loss_rtd": 0.2697710692882538, + "loss_sent": 0.15708176791667938, + "loss_sod": 0.04377313703298569, + "loss_total": 0.47062599658966064, + "step": 175799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.0791652202606201, + "learning_rate": 4.2633382671789164e-05, + "loss": 0.5038, + "step": 175800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.311132907867432, + "loss_rtd": 0.24325776100158691, + "loss_sent": 0.19141052663326263, + "loss_sod": 0.13933482766151428, + "loss_total": 0.5740031003952026, + "step": 175899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.138039588928223, + "loss_rtd": 0.2622635066509247, + "loss_sent": 0.12972286343574524, + "loss_sod": 0.047259338200092316, + "loss_total": 0.43924570083618164, + "step": 175899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.4694644212722778, + "learning_rate": 4.260199720208126e-05, + "loss": 0.5041, + "step": 175900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.145990371704102, + "loss_rtd": 0.2639693319797516, + "loss_sent": 0.07845783233642578, + "loss_sod": 0.02447548508644104, + "loss_total": 0.3669026494026184, + "step": 175999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.343972682952881, + "loss_rtd": 0.24482199549674988, + "loss_sent": 0.45106545090675354, + "loss_sod": 0.027471786364912987, + "loss_total": 0.723359227180481, + "step": 175999 + }, + { + "epoch": 0.032, + "grad_norm": 1.1580651998519897, + "learning_rate": 4.257061471228802e-05, + "loss": 0.4906, + "step": 176000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4756234884262085, + "eval_runtime": 151.418, + "eval_samples_per_second": 101.989, + "eval_steps_per_second": 0.799, + "step": 176000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.3917460441589355, + "loss_rtd": 0.26709726452827454, + "loss_sent": 0.3204467296600342, + "loss_sod": 0.061376411467790604, + "loss_total": 0.6489204168319702, + "step": 176099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.494623184204102, + "loss_rtd": 0.2769303023815155, + "loss_sent": 0.4546057879924774, + "loss_sod": 0.07944586873054504, + "loss_total": 0.8109819889068604, + "step": 176099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.1192471981048584, + "learning_rate": 4.2539235215050264e-05, + "loss": 0.5013, + "step": 176100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.089158058166504, + "loss_rtd": 0.2508448660373688, + "loss_sent": 0.06241251528263092, + "loss_sod": 0.1598670482635498, + "loss_total": 0.4731244444847107, + "step": 176199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.992786884307861, + "loss_rtd": 0.26904919743537903, + "loss_sent": 0.1746479719877243, + "loss_sod": 0.05742231011390686, + "loss_total": 0.5011194944381714, + "step": 176199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.2808656692504883, + "learning_rate": 4.2507858723007685e-05, + "loss": 0.4925, + "step": 176200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.298864364624023, + "loss_rtd": 0.25586748123168945, + "loss_sent": 0.19043675065040588, + "loss_sod": 0.036146149039268494, + "loss_total": 0.482450395822525, + "step": 176299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.539154052734375, + "loss_rtd": 0.2653588652610779, + "loss_sent": 0.1710675209760666, + "loss_sod": 0.01716458611190319, + "loss_total": 0.453590989112854, + "step": 176299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.7630979418754578, + "learning_rate": 4.2476485248798714e-05, + "loss": 0.5024, + "step": 176300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.059012413024902, + "loss_rtd": 0.24951209127902985, + "loss_sent": 0.0003721616230905056, + "loss_sod": 0.07661294937133789, + "loss_total": 0.3264971971511841, + "step": 176399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.338247776031494, + "loss_rtd": 0.2792041599750519, + "loss_sent": 0.3772808909416199, + "loss_sod": 0.036255113780498505, + "loss_total": 0.6927402019500732, + "step": 176399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.6497650146484375, + "learning_rate": 4.2445114805060584e-05, + "loss": 0.4829, + "step": 176400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.673123836517334, + "loss_rtd": 0.23918534815311432, + "loss_sent": 0.022573119029402733, + "loss_sod": 0.06347043067216873, + "loss_total": 0.32522889971733093, + "step": 176499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.806493759155273, + "loss_rtd": 0.25529617071151733, + "loss_sent": 0.004772007931023836, + "loss_sod": 0.13478165864944458, + "loss_total": 0.39484983682632446, + "step": 176499 + }, + { + "epoch": 0.001, + "grad_norm": 0.7160961031913757, + "learning_rate": 4.24137474044293e-05, + "loss": 0.5093, + "step": 176500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.41826868057251, + "loss_rtd": 0.2571258544921875, + "loss_sent": 0.18246565759181976, + "loss_sod": 0.05885126441717148, + "loss_total": 0.49844276905059814, + "step": 176599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.055743217468262, + "loss_rtd": 0.2348114401102066, + "loss_sent": 0.03232239559292793, + "loss_sod": 0.0971403568983078, + "loss_total": 0.36427420377731323, + "step": 176599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.7421179413795471, + "learning_rate": 4.238238305953966e-05, + "loss": 0.4903, + "step": 176600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.529196739196777, + "loss_rtd": 0.27083462476730347, + "loss_sent": 0.2703145146369934, + "loss_sod": 0.023448172956705093, + "loss_total": 0.5645973086357117, + "step": 176699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.448575496673584, + "loss_rtd": 0.2592054307460785, + "loss_sent": 0.14654381573200226, + "loss_sod": 0.008700167760252953, + "loss_total": 0.41444939374923706, + "step": 176699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.1424907445907593, + "learning_rate": 4.235102178302522e-05, + "loss": 0.5184, + "step": 176700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.341691493988037, + "loss_rtd": 0.2590598464012146, + "loss_sent": 0.04613998532295227, + "loss_sod": 0.14152613282203674, + "loss_total": 0.4467259645462036, + "step": 176799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.94687032699585, + "loss_rtd": 0.23710812628269196, + "loss_sent": 3.762466803891584e-05, + "loss_sod": 0.17916914820671082, + "loss_total": 0.41631489992141724, + "step": 176799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.1884740591049194, + "learning_rate": 4.2319663587518274e-05, + "loss": 0.4724, + "step": 176800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.708977699279785, + "loss_rtd": 0.2611987888813019, + "loss_sent": 0.2741192579269409, + "loss_sod": 0.08952027559280396, + "loss_total": 0.6248383522033691, + "step": 176899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.4857025146484375, + "loss_rtd": 0.2508212924003601, + "loss_sent": 0.44258952140808105, + "loss_sod": 0.026850054040551186, + "loss_total": 0.7202608585357666, + "step": 176899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.1517118215560913, + "learning_rate": 4.228830848564993e-05, + "loss": 0.503, + "step": 176900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.066939353942871, + "loss_rtd": 0.2523457109928131, + "loss_sent": 0.07493351399898529, + "loss_sod": 0.03503037989139557, + "loss_total": 0.36230963468551636, + "step": 176999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.708009719848633, + "loss_rtd": 0.28032973408699036, + "loss_sent": 0.11090180277824402, + "loss_sod": 0.010316354222595692, + "loss_total": 0.4015478789806366, + "step": 176999 + }, + { + "epoch": 0.002, + "grad_norm": 0.9930636286735535, + "learning_rate": 4.225695649005e-05, + "loss": 0.4841, + "step": 177000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4668843448162079, + "eval_runtime": 154.4239, + "eval_samples_per_second": 100.004, + "eval_steps_per_second": 0.784, + "step": 177000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.652185440063477, + "loss_rtd": 0.290635347366333, + "loss_sent": 0.18285208940505981, + "loss_sod": 0.18549787998199463, + "loss_total": 0.6589853167533875, + "step": 177099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.1103973388671875, + "loss_rtd": 0.25475525856018066, + "loss_sent": 0.011581259779632092, + "loss_sod": 0.18181636929512024, + "loss_total": 0.44815289974212646, + "step": 177099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.280313491821289, + "learning_rate": 4.222560761334708e-05, + "loss": 0.4888, + "step": 177100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.472076416015625, + "loss_rtd": 0.268470823764801, + "loss_sent": 0.14005210995674133, + "loss_sod": 0.03243196755647659, + "loss_total": 0.44095489382743835, + "step": 177199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.537106513977051, + "loss_rtd": 0.2637845575809479, + "loss_sent": 0.20055530965328217, + "loss_sod": 0.010856792330741882, + "loss_total": 0.4751966595649719, + "step": 177199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.9776031970977783, + "learning_rate": 4.219426186816847e-05, + "loss": 0.4898, + "step": 177200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.7137532234191895, + "loss_rtd": 0.23269148170948029, + "loss_sent": 0.0030476772226393223, + "loss_sod": 0.08300338685512543, + "loss_total": 0.3187425434589386, + "step": 177299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.946521282196045, + "loss_rtd": 0.2618849575519562, + "loss_sent": 0.0312032587826252, + "loss_sod": 0.06359866261482239, + "loss_total": 0.35668689012527466, + "step": 177299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.864230215549469, + "learning_rate": 4.2162919267140254e-05, + "loss": 0.4938, + "step": 177300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.496953010559082, + "loss_rtd": 0.26852214336395264, + "loss_sent": 0.1591894030570984, + "loss_sod": 0.07635277509689331, + "loss_total": 0.5040643215179443, + "step": 177399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.572754859924316, + "loss_rtd": 0.25595274567604065, + "loss_sent": 0.16189433634281158, + "loss_sod": 0.06928539276123047, + "loss_total": 0.4871324896812439, + "step": 177399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.822463035583496, + "learning_rate": 4.2131579822887213e-05, + "loss": 0.5024, + "step": 177400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.361823081970215, + "loss_rtd": 0.2623514235019684, + "loss_sent": 0.16954083740711212, + "loss_sod": 0.05207022279500961, + "loss_total": 0.4839624762535095, + "step": 177499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.58965539932251, + "loss_rtd": 0.25201404094696045, + "loss_sent": 0.08452600985765457, + "loss_sod": 0.13741786777973175, + "loss_total": 0.473957896232605, + "step": 177499 + }, + { + "epoch": 0.003, + "grad_norm": 0.725857138633728, + "learning_rate": 4.210024354803288e-05, + "loss": 0.5007, + "step": 177500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.413411617279053, + "loss_rtd": 0.24122172594070435, + "loss_sent": 0.2514476776123047, + "loss_sod": 0.04914465546607971, + "loss_total": 0.5418140888214111, + "step": 177599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.256341934204102, + "loss_rtd": 0.28153109550476074, + "loss_sent": 0.21093186736106873, + "loss_sod": 0.0345311164855957, + "loss_total": 0.5269941091537476, + "step": 177599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.6663022041320801, + "learning_rate": 4.2068910455199504e-05, + "loss": 0.5017, + "step": 177600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.281067848205566, + "loss_rtd": 0.260381817817688, + "loss_sent": 0.28563469648361206, + "loss_sod": 0.024251481518149376, + "loss_total": 0.5702680349349976, + "step": 177699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.571688652038574, + "loss_rtd": 0.28116393089294434, + "loss_sent": 0.22319813072681427, + "loss_sod": 0.035796672105789185, + "loss_total": 0.540158748626709, + "step": 177699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.2563155889511108, + "learning_rate": 4.203758055700806e-05, + "loss": 0.5026, + "step": 177700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.343420505523682, + "loss_rtd": 0.2577490508556366, + "loss_sent": 0.10741659998893738, + "loss_sod": 0.02538968436419964, + "loss_total": 0.39055532217025757, + "step": 177799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.449235439300537, + "loss_rtd": 0.2673318684101105, + "loss_sent": 0.1105814129114151, + "loss_sod": 0.016446208581328392, + "loss_total": 0.3943594694137573, + "step": 177799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.7080693244934082, + "learning_rate": 4.2006253866078194e-05, + "loss": 0.4801, + "step": 177800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.616039752960205, + "loss_rtd": 0.2546175420284271, + "loss_sent": 0.1314704418182373, + "loss_sod": 0.04876062646508217, + "loss_total": 0.4348486065864563, + "step": 177899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.463183403015137, + "loss_rtd": 0.2529369294643402, + "loss_sent": 0.17607368528842926, + "loss_sod": 0.08975831419229507, + "loss_total": 0.5187689065933228, + "step": 177899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.1900516748428345, + "learning_rate": 4.197493039502833e-05, + "loss": 0.5193, + "step": 177900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.278634548187256, + "loss_rtd": 0.2694304883480072, + "loss_sent": 0.22764815390110016, + "loss_sod": 0.05485042184591293, + "loss_total": 0.5519290566444397, + "step": 177999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.351944446563721, + "loss_rtd": 0.26608723402023315, + "loss_sent": 0.30641230940818787, + "loss_sod": 0.0295640267431736, + "loss_total": 0.6020635366439819, + "step": 177999 + }, + { + "epoch": 0.004, + "grad_norm": 1.2334996461868286, + "learning_rate": 4.1943610156475544e-05, + "loss": 0.4883, + "step": 178000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4764252007007599, + "eval_runtime": 151.2918, + "eval_samples_per_second": 102.074, + "eval_steps_per_second": 0.8, + "step": 178000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.586325645446777, + "loss_rtd": 0.27039051055908203, + "loss_sent": 0.17318874597549438, + "loss_sod": 0.05760243162512779, + "loss_total": 0.5011817216873169, + "step": 178099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.69460916519165, + "loss_rtd": 0.2655748128890991, + "loss_sent": 0.11303507536649704, + "loss_sod": 0.039691053330898285, + "loss_total": 0.41830095648765564, + "step": 178099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2535382509231567, + "learning_rate": 4.191229316303561e-05, + "loss": 0.4997, + "step": 178100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.459695339202881, + "loss_rtd": 0.2589976489543915, + "loss_sent": 0.21007460355758667, + "loss_sod": 0.08964826166629791, + "loss_total": 0.5587205290794373, + "step": 178199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.936141014099121, + "loss_rtd": 0.23948220908641815, + "loss_sent": 0.07174783200025558, + "loss_sod": 0.052694909274578094, + "loss_total": 0.36392495036125183, + "step": 178199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.7968523502349854, + "learning_rate": 4.1880979427323037e-05, + "loss": 0.4961, + "step": 178200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.5288987159729, + "loss_rtd": 0.24514004588127136, + "loss_sent": 0.14824345707893372, + "loss_sod": 0.07209758460521698, + "loss_total": 0.46548107266426086, + "step": 178299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.431944370269775, + "loss_rtd": 0.27769768238067627, + "loss_sent": 0.2189095914363861, + "loss_sod": 0.1527351289987564, + "loss_total": 0.64934241771698, + "step": 178299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.2251418828964233, + "learning_rate": 4.1849668961950964e-05, + "loss": 0.503, + "step": 178300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.546605110168457, + "loss_rtd": 0.27432477474212646, + "loss_sent": 0.32855531573295593, + "loss_sod": 0.06549499183893204, + "loss_total": 0.6683750748634338, + "step": 178399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.0015130043029785, + "loss_rtd": 0.2546570897102356, + "loss_sent": 0.026251059025526047, + "loss_sod": 0.07344754040241241, + "loss_total": 0.35435566306114197, + "step": 178399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.289629340171814, + "learning_rate": 4.181836177953127e-05, + "loss": 0.5061, + "step": 178400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.554134845733643, + "loss_rtd": 0.2642400860786438, + "loss_sent": 0.5396769642829895, + "loss_sod": 0.053843654692173004, + "loss_total": 0.8577606678009033, + "step": 178499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.628774166107178, + "loss_rtd": 0.26725253462791443, + "loss_sent": 0.05701502412557602, + "loss_sod": 0.04202532395720482, + "loss_total": 0.36629289388656616, + "step": 178499 + }, + { + "epoch": 0.005, + "grad_norm": 1.825698971748352, + "learning_rate": 4.1787057892674465e-05, + "loss": 0.4892, + "step": 178500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.714921474456787, + "loss_rtd": 0.24926084280014038, + "loss_sent": 0.45414772629737854, + "loss_sod": 0.06611594557762146, + "loss_total": 0.7695245146751404, + "step": 178599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.350859642028809, + "loss_rtd": 0.26421958208084106, + "loss_sent": 0.21245747804641724, + "loss_sod": 0.06240715831518173, + "loss_total": 0.5390841960906982, + "step": 178599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.542447805404663, + "learning_rate": 4.175575731398977e-05, + "loss": 0.4853, + "step": 178600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.297928810119629, + "loss_rtd": 0.26276102662086487, + "loss_sent": 0.2245301455259323, + "loss_sod": 0.006228235084563494, + "loss_total": 0.4935194253921509, + "step": 178699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.591362953186035, + "loss_rtd": 0.26682981848716736, + "loss_sent": 0.04091120511293411, + "loss_sod": 0.14690172672271729, + "loss_total": 0.45464274287223816, + "step": 178699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.9810351133346558, + "learning_rate": 4.172446005608503e-05, + "loss": 0.483, + "step": 178700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.2232160568237305, + "loss_rtd": 0.251579225063324, + "loss_sent": 0.10386347025632858, + "loss_sod": 0.07500104606151581, + "loss_total": 0.43044376373291016, + "step": 178799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.377779960632324, + "loss_rtd": 0.2628142535686493, + "loss_sent": 0.27205297350883484, + "loss_sod": 0.044300567358732224, + "loss_total": 0.5791677832603455, + "step": 178799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.4532432556152344, + "learning_rate": 4.1693166131566805e-05, + "loss": 0.5065, + "step": 178800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.490054607391357, + "loss_rtd": 0.26628002524375916, + "loss_sent": 0.09744316339492798, + "loss_sod": 0.022234242409467697, + "loss_total": 0.38595741987228394, + "step": 178899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.5699782371521, + "loss_rtd": 0.2603760361671448, + "loss_sent": 0.34774866700172424, + "loss_sod": 0.18945448100566864, + "loss_total": 0.7975791692733765, + "step": 178899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.9477487206459045, + "learning_rate": 4.166187555304025e-05, + "loss": 0.4933, + "step": 178900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.482988357543945, + "loss_rtd": 0.23788413405418396, + "loss_sent": 0.059080567210912704, + "loss_sod": 0.049302082508802414, + "loss_total": 0.3462667763233185, + "step": 178999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.442384243011475, + "loss_rtd": 0.26769599318504333, + "loss_sent": 0.227765753865242, + "loss_sod": 0.038329411298036575, + "loss_total": 0.533791184425354, + "step": 178999 + }, + { + "epoch": 0.006, + "grad_norm": 2.0118491649627686, + "learning_rate": 4.163058833310925e-05, + "loss": 0.5041, + "step": 179000 + }, + { + "epoch": 0.006, + "eval_loss": 0.47466304898262024, + "eval_runtime": 151.4685, + "eval_samples_per_second": 101.955, + "eval_steps_per_second": 0.799, + "step": 179000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.188037872314453, + "loss_rtd": 0.2617184519767761, + "loss_sent": 0.12243036925792694, + "loss_sod": 0.03435511887073517, + "loss_total": 0.41850394010543823, + "step": 179099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.017541885375977, + "loss_rtd": 0.24198447167873383, + "loss_sent": 0.055074840784072876, + "loss_sod": 0.06318394839763641, + "loss_total": 0.3602432608604431, + "step": 179099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.8135080337524414, + "learning_rate": 4.159930448437624e-05, + "loss": 0.4773, + "step": 179100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.485675811767578, + "loss_rtd": 0.2606898844242096, + "loss_sent": 0.23423993587493896, + "loss_sod": 0.040725454688072205, + "loss_total": 0.5356552600860596, + "step": 179199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.839269638061523, + "loss_rtd": 0.26704105734825134, + "loss_sent": 0.11773671954870224, + "loss_sod": 0.1408788114786148, + "loss_total": 0.5256565809249878, + "step": 179199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.3560131788253784, + "learning_rate": 4.15680240194424e-05, + "loss": 0.5046, + "step": 179200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.47376012802124, + "loss_rtd": 0.25602632761001587, + "loss_sent": 0.30784136056900024, + "loss_sod": 0.0686689242720604, + "loss_total": 0.6325366497039795, + "step": 179299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.46110200881958, + "loss_rtd": 0.2672787606716156, + "loss_sent": 0.20248740911483765, + "loss_sod": 0.035606883466243744, + "loss_total": 0.5053730607032776, + "step": 179299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.325631022453308, + "learning_rate": 4.153674695090746e-05, + "loss": 0.4956, + "step": 179300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.34942102432251, + "loss_rtd": 0.27311182022094727, + "loss_sent": 0.23604953289031982, + "loss_sod": 0.011576816439628601, + "loss_total": 0.5207381844520569, + "step": 179399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.549720287322998, + "loss_rtd": 0.27027398347854614, + "loss_sent": 0.0977923646569252, + "loss_sod": 0.10007601231336594, + "loss_total": 0.4681423604488373, + "step": 179399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.9811376929283142, + "learning_rate": 4.150547329136985e-05, + "loss": 0.5003, + "step": 179400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.362544536590576, + "loss_rtd": 0.26804307103157043, + "loss_sent": 0.06654703617095947, + "loss_sod": 0.03614526987075806, + "loss_total": 0.37073537707328796, + "step": 179499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.273104667663574, + "loss_rtd": 0.24291948974132538, + "loss_sent": 0.16729098558425903, + "loss_sod": 0.005854018032550812, + "loss_total": 0.4160645008087158, + "step": 179499 + }, + { + "epoch": 0.007, + "grad_norm": 0.7588452100753784, + "learning_rate": 4.147420305342659e-05, + "loss": 0.5043, + "step": 179500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.419837951660156, + "loss_rtd": 0.2564307749271393, + "loss_sent": 0.14678673446178436, + "loss_sod": 0.06157728284597397, + "loss_total": 0.4647948145866394, + "step": 179599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.458054542541504, + "loss_rtd": 0.27689340710639954, + "loss_sent": 0.1271832138299942, + "loss_sod": 0.14260171353816986, + "loss_total": 0.5466783046722412, + "step": 179599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.20590341091156, + "learning_rate": 4.1442936249673296e-05, + "loss": 0.4819, + "step": 179600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.793631076812744, + "loss_rtd": 0.22719790041446686, + "loss_sent": 0.060469288378953934, + "loss_sod": 0.03978104516863823, + "loss_total": 0.3274482488632202, + "step": 179699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.353108882904053, + "loss_rtd": 0.25455164909362793, + "loss_sent": 0.05941108986735344, + "loss_sod": 0.07290526479482651, + "loss_total": 0.3868680000305176, + "step": 179699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.9167174696922302, + "learning_rate": 4.141167289270428e-05, + "loss": 0.4904, + "step": 179700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.078159332275391, + "loss_rtd": 0.24838924407958984, + "loss_sent": 0.11320249736309052, + "loss_sod": 0.04460389167070389, + "loss_total": 0.40619564056396484, + "step": 179799 + }, + { + "epoch": 0.007598, + "loss_gen": 4.7999491691589355, + "loss_rtd": 0.2368369698524475, + "loss_sent": 0.04544038325548172, + "loss_sod": 0.08653011918067932, + "loss_total": 0.36880746483802795, + "step": 179799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.9799928665161133, + "learning_rate": 4.138041299511238e-05, + "loss": 0.4912, + "step": 179800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.345700263977051, + "loss_rtd": 0.26462382078170776, + "loss_sent": 0.11898131668567657, + "loss_sod": 0.044064655900001526, + "loss_total": 0.4276697635650635, + "step": 179899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.21361780166626, + "loss_rtd": 0.2546546161174774, + "loss_sent": 0.12579309940338135, + "loss_sod": 0.011989301070570946, + "loss_total": 0.39243701100349426, + "step": 179899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.027785062789917, + "learning_rate": 4.13491565694891e-05, + "loss": 0.4954, + "step": 179900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.613238334655762, + "loss_rtd": 0.2553330957889557, + "loss_sent": 0.17620572447776794, + "loss_sod": 0.21121367812156677, + "loss_total": 0.6427525281906128, + "step": 179999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.368664264678955, + "loss_rtd": 0.24394778907299042, + "loss_sent": 0.183329775929451, + "loss_sod": 0.009965645149350166, + "loss_total": 0.4372432231903076, + "step": 179999 + }, + { + "epoch": 0.008, + "grad_norm": 1.0747244358062744, + "learning_rate": 4.131790362842451e-05, + "loss": 0.4804, + "step": 180000 + }, + { + "epoch": 0.008, + "eval_loss": 0.47713226079940796, + "eval_runtime": 151.3287, + "eval_samples_per_second": 102.049, + "eval_steps_per_second": 0.8, + "step": 180000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.487625598907471, + "loss_rtd": 0.2641645669937134, + "loss_sent": 0.13404642045497894, + "loss_sod": 0.022700896486639977, + "loss_total": 0.42091190814971924, + "step": 180099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.319086074829102, + "loss_rtd": 0.2597549557685852, + "loss_sent": 0.01480947993695736, + "loss_sod": 0.13523250818252563, + "loss_total": 0.40979695320129395, + "step": 180099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.92105633020401, + "learning_rate": 4.128665418450732e-05, + "loss": 0.488, + "step": 180100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.736827850341797, + "loss_rtd": 0.2725524604320526, + "loss_sent": 0.08031554520130157, + "loss_sod": 0.09078691899776459, + "loss_total": 0.4436548948287964, + "step": 180199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.926023960113525, + "loss_rtd": 0.27341535687446594, + "loss_sent": 0.14149793982505798, + "loss_sod": 0.1236058920621872, + "loss_total": 0.5385191440582275, + "step": 180199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.8921319842338562, + "learning_rate": 4.1255408250324765e-05, + "loss": 0.4881, + "step": 180200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.5424723625183105, + "loss_rtd": 0.2582395374774933, + "loss_sent": 0.07435081154108047, + "loss_sod": 0.030729809775948524, + "loss_total": 0.36332014203071594, + "step": 180299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.650275230407715, + "loss_rtd": 0.27444303035736084, + "loss_sent": 0.07789462059736252, + "loss_sod": 0.048112284392118454, + "loss_total": 0.4004499316215515, + "step": 180299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.9056849479675293, + "learning_rate": 4.122416583846274e-05, + "loss": 0.5023, + "step": 180300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.1777873039245605, + "loss_rtd": 0.2654229402542114, + "loss_sent": 0.35161760449409485, + "loss_sod": 0.01783064752817154, + "loss_total": 0.6348711848258972, + "step": 180399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.614340305328369, + "loss_rtd": 0.25416260957717896, + "loss_sent": 0.10317254066467285, + "loss_sod": 0.08120696246623993, + "loss_total": 0.43854212760925293, + "step": 180399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.9221227169036865, + "learning_rate": 4.119292696150564e-05, + "loss": 0.5029, + "step": 180400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.872322082519531, + "loss_rtd": 0.2462892383337021, + "loss_sent": 0.05622588098049164, + "loss_sod": 0.18008814752101898, + "loss_total": 0.4826032519340515, + "step": 180499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.6165876388549805, + "loss_rtd": 0.2476365566253662, + "loss_sent": 3.859668504446745e-05, + "loss_sod": 0.05294607952237129, + "loss_total": 0.30062124133110046, + "step": 180499 + }, + { + "epoch": 0.009, + "grad_norm": 0.8333690762519836, + "learning_rate": 4.116169163203653e-05, + "loss": 0.4848, + "step": 180500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.586027145385742, + "loss_rtd": 0.23426824808120728, + "loss_sent": 0.241379976272583, + "loss_sod": 0.01346066314727068, + "loss_total": 0.4891088902950287, + "step": 180599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.472120761871338, + "loss_rtd": 0.24811071157455444, + "loss_sent": 0.2098441869020462, + "loss_sod": 0.01939151994884014, + "loss_total": 0.47734642028808594, + "step": 180599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.9169313311576843, + "learning_rate": 4.113045986263696e-05, + "loss": 0.5004, + "step": 180600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.667011737823486, + "loss_rtd": 0.23707003891468048, + "loss_sent": 0.21612732112407684, + "loss_sod": 0.06380543112754822, + "loss_total": 0.5170028209686279, + "step": 180699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.365878105163574, + "loss_rtd": 0.2620985805988312, + "loss_sent": 0.10187751799821854, + "loss_sod": 0.026803283020853996, + "loss_total": 0.39077937602996826, + "step": 180699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.8551573753356934, + "learning_rate": 4.1099231665887104e-05, + "loss": 0.5003, + "step": 180700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.391913414001465, + "loss_rtd": 0.23210972547531128, + "loss_sent": 0.3293698728084564, + "loss_sod": 0.10571935772895813, + "loss_total": 0.6671989560127258, + "step": 180799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.294342041015625, + "loss_rtd": 0.24984751641750336, + "loss_sent": 0.5203132033348083, + "loss_sod": 0.06893518567085266, + "loss_total": 0.8390958905220032, + "step": 180799 + }, + { + "epoch": 0.0096, + "grad_norm": 2.281083822250366, + "learning_rate": 4.106800705436566e-05, + "loss": 0.4873, + "step": 180800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.3256707191467285, + "loss_rtd": 0.24510520696640015, + "loss_sent": 0.25402265787124634, + "loss_sod": 0.06134971231222153, + "loss_total": 0.560477614402771, + "step": 180899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.4293036460876465, + "loss_rtd": 0.250690221786499, + "loss_sent": 0.1888970136642456, + "loss_sod": 0.07212607562541962, + "loss_total": 0.5117133259773254, + "step": 180899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.920924961566925, + "learning_rate": 4.103678604064992e-05, + "loss": 0.5026, + "step": 180900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.400794982910156, + "loss_rtd": 0.27316975593566895, + "loss_sent": 0.2487497329711914, + "loss_sod": 0.03013472445309162, + "loss_total": 0.552054226398468, + "step": 180999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.666261196136475, + "loss_rtd": 0.2672635316848755, + "loss_sent": 0.16726598143577576, + "loss_sod": 0.076224185526371, + "loss_total": 0.5107536911964417, + "step": 180999 + }, + { + "epoch": 0.01, + "grad_norm": 1.0255016088485718, + "learning_rate": 4.100556863731567e-05, + "loss": 0.5075, + "step": 181000 + }, + { + "epoch": 0.01, + "eval_loss": 0.46509864926338196, + "eval_runtime": 151.2879, + "eval_samples_per_second": 102.077, + "eval_steps_per_second": 0.8, + "step": 181000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.197551727294922, + "loss_rtd": 0.2652503252029419, + "loss_sent": 0.044459570199251175, + "loss_sod": 0.0072874571196734905, + "loss_total": 0.31699734926223755, + "step": 181099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.716944694519043, + "loss_rtd": 0.25120916962623596, + "loss_sent": 0.2240513563156128, + "loss_sod": 0.014923347160220146, + "loss_total": 0.49018386006355286, + "step": 181099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8222607970237732, + "learning_rate": 4.0974354856937316e-05, + "loss": 0.4921, + "step": 181100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.415718078613281, + "loss_rtd": 0.2567186653614044, + "loss_sent": 0.09884947538375854, + "loss_sod": 0.057101938873529434, + "loss_total": 0.4126700758934021, + "step": 181199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.394846439361572, + "loss_rtd": 0.27208247780799866, + "loss_sent": 0.4067822992801666, + "loss_sod": 0.031822025775909424, + "loss_total": 0.7106868028640747, + "step": 181199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.0264217853546143, + "learning_rate": 4.094314471208775e-05, + "loss": 0.4865, + "step": 181200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.2518744468688965, + "loss_rtd": 0.286484032869339, + "loss_sent": 0.12352120131254196, + "loss_sod": 0.11138713359832764, + "loss_total": 0.5213923454284668, + "step": 181299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.881196022033691, + "loss_rtd": 0.2354147732257843, + "loss_sent": 0.00039958898560144007, + "loss_sod": 0.20208677649497986, + "loss_total": 0.4379011392593384, + "step": 181299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.3067972660064697, + "learning_rate": 4.0911938215338395e-05, + "loss": 0.4775, + "step": 181300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.660808086395264, + "loss_rtd": 0.235980823636055, + "loss_sent": 0.20990155637264252, + "loss_sod": 0.031419988721609116, + "loss_total": 0.4773023724555969, + "step": 181399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.864224433898926, + "loss_rtd": 0.23932556807994843, + "loss_sent": 0.0018415855010971427, + "loss_sod": 0.13941384851932526, + "loss_total": 0.38058099150657654, + "step": 181399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.9057682156562805, + "learning_rate": 4.088073537925925e-05, + "loss": 0.4996, + "step": 181400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.179491996765137, + "loss_rtd": 0.24789611995220184, + "loss_sent": 0.19952335953712463, + "loss_sod": 0.0029267133213579655, + "loss_total": 0.4503462016582489, + "step": 181499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.132256507873535, + "loss_rtd": 0.28106802701950073, + "loss_sent": 0.24645671248435974, + "loss_sod": 0.011169906705617905, + "loss_total": 0.5386946201324463, + "step": 181499 + }, + { + "epoch": 0.011, + "grad_norm": 0.9796513319015503, + "learning_rate": 4.0849536216418796e-05, + "loss": 0.4914, + "step": 181500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.230510234832764, + "loss_rtd": 0.2636624276638031, + "loss_sent": 0.22412417829036713, + "loss_sod": 0.08131483197212219, + "loss_total": 0.5691014528274536, + "step": 181599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.510939598083496, + "loss_rtd": 0.25225135684013367, + "loss_sent": 0.14450867474079132, + "loss_sod": 0.004225961863994598, + "loss_total": 0.4009860157966614, + "step": 181599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9009349346160889, + "learning_rate": 4.081834073938406e-05, + "loss": 0.495, + "step": 181600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.600374221801758, + "loss_rtd": 0.2612046003341675, + "loss_sent": 0.2270781695842743, + "loss_sod": 0.1035432517528534, + "loss_total": 0.5918260216712952, + "step": 181699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.734739303588867, + "loss_rtd": 0.2507054805755615, + "loss_sent": 0.3365671932697296, + "loss_sod": 0.04251658171415329, + "loss_total": 0.6297892332077026, + "step": 181699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.9514797925949097, + "learning_rate": 4.078714896072058e-05, + "loss": 0.4988, + "step": 181700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.431485652923584, + "loss_rtd": 0.24863086640834808, + "loss_sent": 0.22148184478282928, + "loss_sod": 0.07801975309848785, + "loss_total": 0.5481324791908264, + "step": 181799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.48687744140625, + "loss_rtd": 0.2456839680671692, + "loss_sent": 0.12878286838531494, + "loss_sod": 0.04740482196211815, + "loss_total": 0.4218716621398926, + "step": 181799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.2794034481048584, + "learning_rate": 4.0755960892992404e-05, + "loss": 0.4957, + "step": 181800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.224306583404541, + "loss_rtd": 0.2649737000465393, + "loss_sent": 0.08195577561855316, + "loss_sod": 0.016276869922876358, + "loss_total": 0.36320632696151733, + "step": 181899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.511739730834961, + "loss_rtd": 0.2753254771232605, + "loss_sent": 0.12046878784894943, + "loss_sod": 0.07207190990447998, + "loss_total": 0.4678661823272705, + "step": 181899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.8595014214515686, + "learning_rate": 4.072477654876206e-05, + "loss": 0.4949, + "step": 181900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.5958638191223145, + "loss_rtd": 0.25696098804473877, + "loss_sent": 0.21077921986579895, + "loss_sod": 0.0512884184718132, + "loss_total": 0.5190286636352539, + "step": 181999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.1753411293029785, + "loss_rtd": 0.2576918601989746, + "loss_sent": 0.13037826120853424, + "loss_sod": 0.0631154403090477, + "loss_total": 0.45118558406829834, + "step": 181999 + }, + { + "epoch": 0.012, + "grad_norm": 0.8823849558830261, + "learning_rate": 4.069359594059062e-05, + "loss": 0.4758, + "step": 182000 + }, + { + "epoch": 0.012, + "eval_loss": 0.4631200432777405, + "eval_runtime": 151.5039, + "eval_samples_per_second": 101.931, + "eval_steps_per_second": 0.799, + "step": 182000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.830898761749268, + "loss_rtd": 0.2535586953163147, + "loss_sent": 2.959699122584425e-05, + "loss_sod": 0.11356370896100998, + "loss_total": 0.36715200543403625, + "step": 182099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.701071262359619, + "loss_rtd": 0.22336901724338531, + "loss_sent": 0.017257556319236755, + "loss_sod": 0.08801577240228653, + "loss_total": 0.3286423683166504, + "step": 182099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.6836785078048706, + "learning_rate": 4.0662419081037625e-05, + "loss": 0.4756, + "step": 182100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.572444915771484, + "loss_rtd": 0.24312740564346313, + "loss_sent": 0.17422907054424286, + "loss_sod": 0.051759544759988785, + "loss_total": 0.4691160321235657, + "step": 182199 + }, + { + "epoch": 0.012398, + "loss_gen": 4.937934398651123, + "loss_rtd": 0.24445945024490356, + "loss_sent": 3.690571975312196e-05, + "loss_sod": 0.24471774697303772, + "loss_total": 0.48921409249305725, + "step": 182199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.4495713710784912, + "learning_rate": 4.063124598266111e-05, + "loss": 0.4993, + "step": 182200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.658036231994629, + "loss_rtd": 0.27451783418655396, + "loss_sent": 0.07577437162399292, + "loss_sod": 0.04801433905959129, + "loss_total": 0.39830654859542847, + "step": 182299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.250896453857422, + "loss_rtd": 0.25234609842300415, + "loss_sent": 0.03949786722660065, + "loss_sod": 0.0794113352894783, + "loss_total": 0.3712552785873413, + "step": 182299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.6161572933197021, + "learning_rate": 4.0600076658017585e-05, + "loss": 0.5026, + "step": 182300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.730467319488525, + "loss_rtd": 0.2254515141248703, + "loss_sent": 0.05113282427191734, + "loss_sod": 0.035083699971437454, + "loss_total": 0.3116680383682251, + "step": 182399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.1474151611328125, + "loss_rtd": 0.26398155093193054, + "loss_sent": 0.07230521738529205, + "loss_sod": 0.16289299726486206, + "loss_total": 0.49917978048324585, + "step": 182399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.1920756101608276, + "learning_rate": 4.056891111966206e-05, + "loss": 0.4929, + "step": 182400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.696252822875977, + "loss_rtd": 0.21865539252758026, + "loss_sent": 0.09347908943891525, + "loss_sod": 0.008299498818814754, + "loss_total": 0.32043397426605225, + "step": 182499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.6855878829956055, + "loss_rtd": 0.27629354596138, + "loss_sent": 0.39232394099235535, + "loss_sod": 0.10279744863510132, + "loss_total": 0.7714149355888367, + "step": 182499 + }, + { + "epoch": 0.013, + "grad_norm": 1.159462571144104, + "learning_rate": 4.0537749380148004e-05, + "loss": 0.4804, + "step": 182500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.757350444793701, + "loss_rtd": 0.2363877296447754, + "loss_sent": 4.058789636474103e-05, + "loss_sod": 0.06571846455335617, + "loss_total": 0.3021467924118042, + "step": 182599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.5546159744262695, + "loss_rtd": 0.27724331617355347, + "loss_sent": 0.12912122905254364, + "loss_sod": 0.03324735909700394, + "loss_total": 0.43961191177368164, + "step": 182599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.6910777688026428, + "learning_rate": 4.050659145202738e-05, + "loss": 0.5107, + "step": 182600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.621369361877441, + "loss_rtd": 0.2423516809940338, + "loss_sent": 0.04023056849837303, + "loss_sod": 0.006830750964581966, + "loss_total": 0.2894130051136017, + "step": 182699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.406081199645996, + "loss_rtd": 0.26067420840263367, + "loss_sent": 0.380702406167984, + "loss_sod": 0.03927621245384216, + "loss_total": 0.6806528568267822, + "step": 182699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.6373041868209839, + "learning_rate": 4.0475437347850577e-05, + "loss": 0.4763, + "step": 182700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.552177906036377, + "loss_rtd": 0.2496611773967743, + "loss_sent": 0.17501670122146606, + "loss_sod": 0.05193847045302391, + "loss_total": 0.47661635279655457, + "step": 182799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.976564407348633, + "loss_rtd": 0.2528514862060547, + "loss_sent": 0.08602333813905716, + "loss_sod": 0.02177412249147892, + "loss_total": 0.3606489300727844, + "step": 182799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.255135416984558, + "learning_rate": 4.0444287080166464e-05, + "loss": 0.4871, + "step": 182800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.524647235870361, + "loss_rtd": 0.2574880123138428, + "loss_sent": 0.12596097588539124, + "loss_sod": 0.037886302918195724, + "loss_total": 0.42133527994155884, + "step": 182899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.371420860290527, + "loss_rtd": 0.26169314980506897, + "loss_sent": 0.41575688123703003, + "loss_sod": 0.015264102257788181, + "loss_total": 0.6927140951156616, + "step": 182899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.9581928253173828, + "learning_rate": 4.041314066152239e-05, + "loss": 0.4787, + "step": 182900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.1283745765686035, + "loss_rtd": 0.24160684645175934, + "loss_sent": 0.0006477711140178144, + "loss_sod": 0.1458633840084076, + "loss_total": 0.38811802864074707, + "step": 182999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.760714530944824, + "loss_rtd": 0.2214888632297516, + "loss_sent": 0.01441890373826027, + "loss_sod": 0.02126622013747692, + "loss_total": 0.25717398524284363, + "step": 182999 + }, + { + "epoch": 0.014, + "grad_norm": 0.6300406455993652, + "learning_rate": 4.038199810446409e-05, + "loss": 0.5115, + "step": 183000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4704357087612152, + "eval_runtime": 151.3242, + "eval_samples_per_second": 102.052, + "eval_steps_per_second": 0.8, + "step": 183000 + }, + { + "epoch": 0.014198, + "loss_gen": 4.839997291564941, + "loss_rtd": 0.22678792476654053, + "loss_sent": 0.018909098580479622, + "loss_sod": 0.19718509912490845, + "loss_total": 0.44288212060928345, + "step": 183099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.5360822677612305, + "loss_rtd": 0.2616807818412781, + "loss_sent": 0.29152411222457886, + "loss_sod": 0.020168419927358627, + "loss_total": 0.5733733177185059, + "step": 183099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.2153898477554321, + "learning_rate": 4.0350859421535814e-05, + "loss": 0.4827, + "step": 183100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.435664653778076, + "loss_rtd": 0.27038609981536865, + "loss_sent": 0.18372918665409088, + "loss_sod": 0.12099272757768631, + "loss_total": 0.575107991695404, + "step": 183199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.528284549713135, + "loss_rtd": 0.2692619264125824, + "loss_sent": 0.12430182099342346, + "loss_sod": 0.1124880239367485, + "loss_total": 0.506051778793335, + "step": 183199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.2037572860717773, + "learning_rate": 4.03197246252802e-05, + "loss": 0.4835, + "step": 183200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.204357624053955, + "loss_rtd": 0.25782856345176697, + "loss_sent": 0.08069150149822235, + "loss_sod": 0.004195802845060825, + "loss_total": 0.342715859413147, + "step": 183299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.3771233558654785, + "loss_rtd": 0.25820738077163696, + "loss_sent": 0.2556789219379425, + "loss_sod": 0.06680300831794739, + "loss_total": 0.5806893110275269, + "step": 183299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.8360551595687866, + "learning_rate": 4.0288593728238365e-05, + "loss": 0.5018, + "step": 183300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.616715431213379, + "loss_rtd": 0.26079732179641724, + "loss_sent": 0.091511569917202, + "loss_sod": 0.11728174984455109, + "loss_total": 0.4695906341075897, + "step": 183399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.630971908569336, + "loss_rtd": 0.2716662287712097, + "loss_sent": 0.22483932971954346, + "loss_sod": 0.02582927793264389, + "loss_total": 0.5223348140716553, + "step": 183399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.7448164820671082, + "learning_rate": 4.02574667429498e-05, + "loss": 0.4973, + "step": 183400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.2737627029418945, + "loss_rtd": 0.2512357831001282, + "loss_sent": 3.664237738121301e-05, + "loss_sod": 0.17651206254959106, + "loss_total": 0.4277845025062561, + "step": 183499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.630550384521484, + "loss_rtd": 0.21566666662693024, + "loss_sent": 0.005458462052047253, + "loss_sod": 0.0444282591342926, + "loss_total": 0.26555341482162476, + "step": 183499 + }, + { + "epoch": 0.015, + "grad_norm": 0.8602021336555481, + "learning_rate": 4.0226343681952476e-05, + "loss": 0.4891, + "step": 183500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.610905647277832, + "loss_rtd": 0.27766552567481995, + "loss_sent": 0.32495835423469543, + "loss_sod": 0.009545616805553436, + "loss_total": 0.6121695041656494, + "step": 183599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.8337273597717285, + "loss_rtd": 0.26680153608322144, + "loss_sent": 0.43907541036605835, + "loss_sod": 0.10988657176494598, + "loss_total": 0.815763533115387, + "step": 183599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.4352822303771973, + "learning_rate": 4.019522455778274e-05, + "loss": 0.4855, + "step": 183600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.38031005859375, + "loss_rtd": 0.2854062616825104, + "loss_sent": 0.1864727884531021, + "loss_sod": 0.0777948647737503, + "loss_total": 0.5496739149093628, + "step": 183699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.878078937530518, + "loss_rtd": 0.2514983117580414, + "loss_sent": 0.21822580695152283, + "loss_sod": 0.054971419274806976, + "loss_total": 0.5246955156326294, + "step": 183699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.4945223331451416, + "learning_rate": 4.016410938297539e-05, + "loss": 0.5119, + "step": 183700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.504877090454102, + "loss_rtd": 0.2742302119731903, + "loss_sent": 0.1993817538022995, + "loss_sod": 0.06181372329592705, + "loss_total": 0.5354256629943848, + "step": 183799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.478941917419434, + "loss_rtd": 0.24994368851184845, + "loss_sent": 0.14488546550273895, + "loss_sod": 0.039909228682518005, + "loss_total": 0.4347383975982666, + "step": 183799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.8672297596931458, + "learning_rate": 4.0132998170063594e-05, + "loss": 0.4911, + "step": 183800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.2837934494018555, + "loss_rtd": 0.2422766536474228, + "loss_sent": 0.05480147525668144, + "loss_sod": 0.021374644711613655, + "loss_total": 0.31845277547836304, + "step": 183899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.804450988769531, + "loss_rtd": 0.2299848198890686, + "loss_sent": 3.797487443080172e-05, + "loss_sod": 0.22849178314208984, + "loss_total": 0.45851457118988037, + "step": 183899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.9032144546508789, + "learning_rate": 4.010189093157896e-05, + "loss": 0.49, + "step": 183900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.608768463134766, + "loss_rtd": 0.26588255167007446, + "loss_sent": 0.2749767005443573, + "loss_sod": 0.04174520820379257, + "loss_total": 0.5826044678688049, + "step": 183999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.7578229904174805, + "loss_rtd": 0.2552870810031891, + "loss_sent": 0.29340633749961853, + "loss_sod": 0.042127273976802826, + "loss_total": 0.5908206701278687, + "step": 183999 + }, + { + "epoch": 0.016, + "grad_norm": 1.2859172821044922, + "learning_rate": 4.007078768005147e-05, + "loss": 0.4885, + "step": 184000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4621444642543793, + "eval_runtime": 151.5087, + "eval_samples_per_second": 101.928, + "eval_steps_per_second": 0.799, + "step": 184000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.459926128387451, + "loss_rtd": 0.2595793902873993, + "loss_sent": 0.09777318686246872, + "loss_sod": 0.030045513063669205, + "loss_total": 0.3873980939388275, + "step": 184099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.62075138092041, + "loss_rtd": 0.25962185859680176, + "loss_sent": 0.16660597920417786, + "loss_sod": 0.020659856498241425, + "loss_total": 0.44688770174980164, + "step": 184099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.9403879046440125, + "learning_rate": 4.0039688428009525e-05, + "loss": 0.4838, + "step": 184100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.1393914222717285, + "loss_rtd": 0.26254382729530334, + "loss_sent": 0.08186690509319305, + "loss_sod": 0.013753924518823624, + "loss_total": 0.3581646680831909, + "step": 184199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.615752220153809, + "loss_rtd": 0.2803412675857544, + "loss_sent": 0.1581527590751648, + "loss_sod": 0.0893164724111557, + "loss_total": 0.5278105139732361, + "step": 184199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.6435991525650024, + "learning_rate": 4.00085931879799e-05, + "loss": 0.5036, + "step": 184200 + }, + { + "epoch": 0.016598, + "loss_gen": 4.898801326751709, + "loss_rtd": 0.22874201834201813, + "loss_sent": 4.691241338150576e-05, + "loss_sod": 0.16978690028190613, + "loss_total": 0.3985758423805237, + "step": 184299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.128859996795654, + "loss_rtd": 0.24762988090515137, + "loss_sent": 0.027703799307346344, + "loss_sod": 0.12636427581310272, + "loss_total": 0.40169796347618103, + "step": 184299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.0342738628387451, + "learning_rate": 3.997750197248773e-05, + "loss": 0.4855, + "step": 184300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.697004795074463, + "loss_rtd": 0.2553969919681549, + "loss_sent": 0.16735699772834778, + "loss_sod": 0.04648858308792114, + "loss_total": 0.46924257278442383, + "step": 184399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.459543228149414, + "loss_rtd": 0.25131621956825256, + "loss_sent": 0.32132408022880554, + "loss_sod": 0.05537897348403931, + "loss_total": 0.6280192732810974, + "step": 184399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.027738094329834, + "learning_rate": 3.9946414794056565e-05, + "loss": 0.4747, + "step": 184400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.9221954345703125, + "loss_rtd": 0.2270478904247284, + "loss_sent": 0.08424238860607147, + "loss_sod": 0.08059877157211304, + "loss_total": 0.3918890357017517, + "step": 184499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.468795299530029, + "loss_rtd": 0.2783259451389313, + "loss_sent": 0.2450186163187027, + "loss_sod": 0.043618764728307724, + "loss_total": 0.5669633150100708, + "step": 184499 + }, + { + "epoch": 0.017, + "grad_norm": 0.5893915295600891, + "learning_rate": 3.9915331665208325e-05, + "loss": 0.4938, + "step": 184500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.752498626708984, + "loss_rtd": 0.23344717919826508, + "loss_sent": 3.408768679946661e-05, + "loss_sod": 0.12836860120296478, + "loss_total": 0.3618498742580414, + "step": 184599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.045048713684082, + "loss_rtd": 0.23287439346313477, + "loss_sent": 0.0720604881644249, + "loss_sod": 0.04457763582468033, + "loss_total": 0.34951251745224, + "step": 184599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.9258651733398438, + "learning_rate": 3.988425259846327e-05, + "loss": 0.4748, + "step": 184600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.031115531921387, + "loss_rtd": 0.2072264552116394, + "loss_sent": 0.007756704930216074, + "loss_sod": 0.07516558468341827, + "loss_total": 0.2901487350463867, + "step": 184699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.351471900939941, + "loss_rtd": 0.22140014171600342, + "loss_sent": 0.04059907793998718, + "loss_sod": 0.12634029984474182, + "loss_total": 0.3883395195007324, + "step": 184699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.6309583187103271, + "learning_rate": 3.9853177606340075e-05, + "loss": 0.4765, + "step": 184700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.614597797393799, + "loss_rtd": 0.2522928714752197, + "loss_sent": 0.18731844425201416, + "loss_sod": 0.017789138481020927, + "loss_total": 0.45740044116973877, + "step": 184799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.453588485717773, + "loss_rtd": 0.25674304366111755, + "loss_sent": 0.16259916126728058, + "loss_sod": 0.04139901325106621, + "loss_total": 0.46074122190475464, + "step": 184799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.7350119948387146, + "learning_rate": 3.982210670135571e-05, + "loss": 0.5014, + "step": 184800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.427696704864502, + "loss_rtd": 0.25103387236595154, + "loss_sent": 0.13740447163581848, + "loss_sod": 0.026184070855379105, + "loss_total": 0.41462242603302, + "step": 184899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.288742542266846, + "loss_rtd": 0.26934170722961426, + "loss_sent": 0.33357757329940796, + "loss_sod": 0.054238948971033096, + "loss_total": 0.6571582555770874, + "step": 184899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.1365247964859009, + "learning_rate": 3.979103989602556e-05, + "loss": 0.4931, + "step": 184900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.507558345794678, + "loss_rtd": 0.2580697238445282, + "loss_sent": 0.12666617333889008, + "loss_sod": 0.013586482033133507, + "loss_total": 0.3983224034309387, + "step": 184999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.5863542556762695, + "loss_rtd": 0.2636576294898987, + "loss_sent": 0.1636495143175125, + "loss_sod": 0.029072415083646774, + "loss_total": 0.4563795328140259, + "step": 184999 + }, + { + "epoch": 0.018, + "grad_norm": 0.5589444637298584, + "learning_rate": 3.9759977202863316e-05, + "loss": 0.4866, + "step": 185000 + }, + { + "epoch": 0.018, + "eval_loss": 0.46582549810409546, + "eval_runtime": 151.4957, + "eval_samples_per_second": 101.937, + "eval_steps_per_second": 0.799, + "step": 185000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.513877868652344, + "loss_rtd": 0.2644221782684326, + "loss_sent": 0.09160961955785751, + "loss_sod": 0.13490232825279236, + "loss_total": 0.4909341335296631, + "step": 185099 + }, + { + "epoch": 0.018198, + "loss_gen": 4.994869709014893, + "loss_rtd": 0.23290346562862396, + "loss_sent": 3.2664669561199844e-05, + "loss_sod": 0.18036779761314392, + "loss_total": 0.4133039116859436, + "step": 185099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.1618928909301758, + "learning_rate": 3.972891863438104e-05, + "loss": 0.4791, + "step": 185100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.438427448272705, + "loss_rtd": 0.2602479159832001, + "loss_sent": 0.1740417629480362, + "loss_sod": 0.03438429906964302, + "loss_total": 0.4686740040779114, + "step": 185199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.651998519897461, + "loss_rtd": 0.2555427849292755, + "loss_sent": 0.33565443754196167, + "loss_sod": 0.07781211286783218, + "loss_total": 0.6690093278884888, + "step": 185199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.7313363552093506, + "learning_rate": 3.9697864203089085e-05, + "loss": 0.4762, + "step": 185200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.649327278137207, + "loss_rtd": 0.2665024399757385, + "loss_sent": 0.25976502895355225, + "loss_sod": 0.05779499560594559, + "loss_total": 0.5840624570846558, + "step": 185299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.586805820465088, + "loss_rtd": 0.23661339282989502, + "loss_sent": 0.18426039814949036, + "loss_sod": 0.0241120345890522, + "loss_total": 0.44498583674430847, + "step": 185299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.3442031145095825, + "learning_rate": 3.9666813921496223e-05, + "loss": 0.4937, + "step": 185300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.644951820373535, + "loss_rtd": 0.2642596364021301, + "loss_sent": 0.1667267233133316, + "loss_sod": 0.10745980590581894, + "loss_total": 0.5384461879730225, + "step": 185399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.103083610534668, + "loss_rtd": 0.2206515669822693, + "loss_sent": 0.2605799436569214, + "loss_sod": 0.09676875919103622, + "loss_total": 0.5780003070831299, + "step": 185399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.1460782289505005, + "learning_rate": 3.963576780210946e-05, + "loss": 0.493, + "step": 185400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.5820770263671875, + "loss_rtd": 0.2645975947380066, + "loss_sent": 0.24656212329864502, + "loss_sod": 0.007974594831466675, + "loss_total": 0.5191342830657959, + "step": 185499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.377520561218262, + "loss_rtd": 0.25580769777297974, + "loss_sent": 0.2565309405326843, + "loss_sod": 0.06911972165107727, + "loss_total": 0.581458330154419, + "step": 185499 + }, + { + "epoch": 0.019, + "grad_norm": 1.3609293699264526, + "learning_rate": 3.96047258574342e-05, + "loss": 0.4871, + "step": 185500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.632157325744629, + "loss_rtd": 0.2597460448741913, + "loss_sent": 0.11746742576360703, + "loss_sod": 0.049807947129011154, + "loss_total": 0.42702141404151917, + "step": 185599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.603592872619629, + "loss_rtd": 0.2609822452068329, + "loss_sent": 0.26343026757240295, + "loss_sod": 0.06601596623659134, + "loss_total": 0.5904284715652466, + "step": 185599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.9038184881210327, + "learning_rate": 3.9573688099974104e-05, + "loss": 0.4896, + "step": 185600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.080387592315674, + "loss_rtd": 0.23782046139240265, + "loss_sent": 0.16632144153118134, + "loss_sod": 0.14350368082523346, + "loss_total": 0.5476455688476562, + "step": 185699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.929356575012207, + "loss_rtd": 0.2298208326101303, + "loss_sent": 0.023580890148878098, + "loss_sod": 0.10661250352859497, + "loss_total": 0.3600142300128937, + "step": 185699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.179927110671997, + "learning_rate": 3.954265454223121e-05, + "loss": 0.4804, + "step": 185700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.3133625984191895, + "loss_rtd": 0.24351274967193604, + "loss_sent": 0.07976383715867996, + "loss_sod": 0.10921406745910645, + "loss_total": 0.43249064683914185, + "step": 185799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.280283451080322, + "loss_rtd": 0.22837214171886444, + "loss_sent": 0.10248099267482758, + "loss_sod": 0.08554579317569733, + "loss_total": 0.41639894247055054, + "step": 185799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.0985273122787476, + "learning_rate": 3.951162519670582e-05, + "loss": 0.4867, + "step": 185800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.408891201019287, + "loss_rtd": 0.2592429220676422, + "loss_sent": 0.1581694781780243, + "loss_sod": 0.06999105215072632, + "loss_total": 0.4874034523963928, + "step": 185899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.758256435394287, + "loss_rtd": 0.22770388424396515, + "loss_sent": 3.780725091928616e-05, + "loss_sod": 0.1224948838353157, + "loss_total": 0.35023659467697144, + "step": 185899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.8321103453636169, + "learning_rate": 3.948060007589653e-05, + "loss": 0.486, + "step": 185900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.374885559082031, + "loss_rtd": 0.2390490621328354, + "loss_sent": 0.203518807888031, + "loss_sod": 0.00301058660261333, + "loss_total": 0.4455784559249878, + "step": 185999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.598089694976807, + "loss_rtd": 0.24036353826522827, + "loss_sent": 0.3474893271923065, + "loss_sod": 0.09969167411327362, + "loss_total": 0.687544584274292, + "step": 185999 + }, + { + "epoch": 0.02, + "grad_norm": 1.2725980281829834, + "learning_rate": 3.944957919230029e-05, + "loss": 0.4839, + "step": 186000 + }, + { + "epoch": 0.02, + "eval_loss": 0.4647587239742279, + "eval_runtime": 151.4244, + "eval_samples_per_second": 101.985, + "eval_steps_per_second": 0.799, + "step": 186000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.6196465492248535, + "loss_rtd": 0.2646197974681854, + "loss_sent": 0.03576711192727089, + "loss_sod": 0.1355540156364441, + "loss_total": 0.4359409213066101, + "step": 186099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.504579544067383, + "loss_rtd": 0.26363444328308105, + "loss_sent": 0.12544412910938263, + "loss_sod": 0.038709308952093124, + "loss_total": 0.4277878999710083, + "step": 186099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.8783823251724243, + "learning_rate": 3.941856255841227e-05, + "loss": 0.4881, + "step": 186100 + }, + { + "epoch": 0.020398, + "loss_gen": 6.232545375823975, + "loss_rtd": 0.26752662658691406, + "loss_sent": 0.057205189019441605, + "loss_sod": 0.06630943715572357, + "loss_total": 0.39104127883911133, + "step": 186199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.357983589172363, + "loss_rtd": 0.26961809396743774, + "loss_sent": 0.08901049196720123, + "loss_sod": 0.013747490011155605, + "loss_total": 0.37237608432769775, + "step": 186199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.9054022431373596, + "learning_rate": 3.9387550186726e-05, + "loss": 0.4857, + "step": 186200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.58601713180542, + "loss_rtd": 0.27239173650741577, + "loss_sent": 0.6295097470283508, + "loss_sod": 0.08182334899902344, + "loss_total": 0.98372483253479, + "step": 186299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.3033342361450195, + "loss_rtd": 0.24794980883598328, + "loss_sent": 0.28165143728256226, + "loss_sod": 0.04188890755176544, + "loss_total": 0.5714901685714722, + "step": 186299 + }, + { + "epoch": 0.0206, + "grad_norm": 2.7101213932037354, + "learning_rate": 3.935654208973323e-05, + "loss": 0.4739, + "step": 186300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.419743061065674, + "loss_rtd": 0.2780866026878357, + "loss_sent": 0.08253250271081924, + "loss_sod": 0.011677983216941357, + "loss_total": 0.372297078371048, + "step": 186399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.571310997009277, + "loss_rtd": 0.27073201537132263, + "loss_sent": 0.09081586450338364, + "loss_sod": 0.090709388256073, + "loss_total": 0.45225727558135986, + "step": 186399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.7526413798332214, + "learning_rate": 3.932553827992406e-05, + "loss": 0.4776, + "step": 186400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.313012599945068, + "loss_rtd": 0.2521372139453888, + "loss_sent": 0.20292626321315765, + "loss_sod": 0.015023418702185154, + "loss_total": 0.47008687257766724, + "step": 186499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.843327045440674, + "loss_rtd": 0.2717064321041107, + "loss_sent": 0.06547203660011292, + "loss_sod": 0.023575402796268463, + "loss_total": 0.3607538640499115, + "step": 186499 + }, + { + "epoch": 0.021, + "grad_norm": 1.1148160696029663, + "learning_rate": 3.929453876978677e-05, + "loss": 0.4906, + "step": 186500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.784761428833008, + "loss_rtd": 0.2746570408344269, + "loss_sent": 0.24680086970329285, + "loss_sod": 0.08704239130020142, + "loss_total": 0.6085003018379211, + "step": 186599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.373242378234863, + "loss_rtd": 0.2574537694454193, + "loss_sent": 0.11793971061706543, + "loss_sod": 0.010002588853240013, + "loss_total": 0.3853960633277893, + "step": 186599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.017259955406189, + "learning_rate": 3.9263543571807994e-05, + "loss": 0.4784, + "step": 186600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.470976829528809, + "loss_rtd": 0.24996253848075867, + "loss_sent": 0.140133798122406, + "loss_sod": 0.08862268179655075, + "loss_total": 0.478719025850296, + "step": 186699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.5595598220825195, + "loss_rtd": 0.2608106732368469, + "loss_sent": 0.11437699943780899, + "loss_sod": 0.06650028377771378, + "loss_total": 0.4416879415512085, + "step": 186699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.0855633020401, + "learning_rate": 3.9232552698472574e-05, + "loss": 0.4993, + "step": 186700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.088070392608643, + "loss_rtd": 0.24716244637966156, + "loss_sent": 0.01046920008957386, + "loss_sod": 0.038748372346162796, + "loss_total": 0.29638001322746277, + "step": 186799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.764645099639893, + "loss_rtd": 0.22984711825847626, + "loss_sent": 4.883740984951146e-05, + "loss_sod": 0.14717769622802734, + "loss_total": 0.3770736753940582, + "step": 186799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.70028156042099, + "learning_rate": 3.920156616226365e-05, + "loss": 0.4732, + "step": 186800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.1860880851745605, + "loss_rtd": 0.2700851857662201, + "loss_sent": 0.13081170618534088, + "loss_sod": 0.09594961255788803, + "loss_total": 0.4968464970588684, + "step": 186899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.540929794311523, + "loss_rtd": 0.2529226243495941, + "loss_sent": 0.31104522943496704, + "loss_sod": 0.08700490742921829, + "loss_total": 0.6509727239608765, + "step": 186899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.1717571020126343, + "learning_rate": 3.917058397566258e-05, + "loss": 0.4787, + "step": 186900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.626912593841553, + "loss_rtd": 0.2688499689102173, + "loss_sent": 0.16953960061073303, + "loss_sod": 0.03840908408164978, + "loss_total": 0.4767986536026001, + "step": 186999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.451415061950684, + "loss_rtd": 0.2621687650680542, + "loss_sent": 0.12659521400928497, + "loss_sod": 0.04100191965699196, + "loss_total": 0.42976588010787964, + "step": 186999 + }, + { + "epoch": 0.022, + "grad_norm": 1.1987330913543701, + "learning_rate": 3.9139606151148994e-05, + "loss": 0.4877, + "step": 187000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4629695415496826, + "eval_runtime": 151.2984, + "eval_samples_per_second": 102.07, + "eval_steps_per_second": 0.8, + "step": 187000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.563031196594238, + "loss_rtd": 0.26130300760269165, + "loss_sent": 0.10228981822729111, + "loss_sod": 0.016945987939834595, + "loss_total": 0.38053882122039795, + "step": 187099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.131287574768066, + "loss_rtd": 0.2218714952468872, + "loss_sent": 0.0543176643550396, + "loss_sod": 0.17131146788597107, + "loss_total": 0.447500616312027, + "step": 187099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.127787709236145, + "learning_rate": 3.910863270120074e-05, + "loss": 0.4608, + "step": 187100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.79901123046875, + "loss_rtd": 0.2656002640724182, + "loss_sent": 0.21989504992961884, + "loss_sod": 0.2511252760887146, + "loss_total": 0.7366206049919128, + "step": 187199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.371663570404053, + "loss_rtd": 0.24986638128757477, + "loss_sent": 0.10776374489068985, + "loss_sod": 0.005952136591076851, + "loss_total": 0.3635822534561157, + "step": 187199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.1022467613220215, + "learning_rate": 3.907766363829395e-05, + "loss": 0.4834, + "step": 187200 + }, + { + "epoch": 0.022598, + "loss_gen": 6.133777141571045, + "loss_rtd": 0.2625170946121216, + "loss_sent": 0.05752617120742798, + "loss_sod": 0.039537668228149414, + "loss_total": 0.359580934047699, + "step": 187299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.570786952972412, + "loss_rtd": 0.24772319197654724, + "loss_sent": 0.3918899595737457, + "loss_sod": 0.07816148549318314, + "loss_total": 0.7177746295928955, + "step": 187299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.1749955415725708, + "learning_rate": 3.904669897490293e-05, + "loss": 0.4795, + "step": 187300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.406407833099365, + "loss_rtd": 0.26402488350868225, + "loss_sent": 0.14382319152355194, + "loss_sod": 0.05829557031393051, + "loss_total": 0.4661436676979065, + "step": 187399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.629527568817139, + "loss_rtd": 0.2584216594696045, + "loss_sent": 0.31577810645103455, + "loss_sod": 0.030520280823111534, + "loss_total": 0.6047200560569763, + "step": 187399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.1226608753204346, + "learning_rate": 3.901573872350025e-05, + "loss": 0.5154, + "step": 187400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.973224639892578, + "loss_rtd": 0.23897667229175568, + "loss_sent": 0.2134232223033905, + "loss_sod": 0.015605310909450054, + "loss_total": 0.4680052101612091, + "step": 187499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.37889289855957, + "loss_rtd": 0.2571919560432434, + "loss_sent": 0.06486905366182327, + "loss_sod": 0.11300955712795258, + "loss_total": 0.43507057428359985, + "step": 187499 + }, + { + "epoch": 0.023, + "grad_norm": 0.9117709994316101, + "learning_rate": 3.8984782896556704e-05, + "loss": 0.4859, + "step": 187500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.2087836265563965, + "loss_rtd": 0.2483009546995163, + "loss_sent": 0.05193908140063286, + "loss_sod": 0.024950454011559486, + "loss_total": 0.3251904845237732, + "step": 187599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.2132415771484375, + "loss_rtd": 0.2532327473163605, + "loss_sent": 0.008457361720502377, + "loss_sod": 0.08348232507705688, + "loss_total": 0.3451724350452423, + "step": 187599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.7311539053916931, + "learning_rate": 3.8953831506541286e-05, + "loss": 0.4816, + "step": 187600 + }, + { + "epoch": 0.023398, + "loss_gen": 4.942038536071777, + "loss_rtd": 0.23786434531211853, + "loss_sent": 0.09772311896085739, + "loss_sod": 0.05206605792045593, + "loss_total": 0.38765352964401245, + "step": 187699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.612724781036377, + "loss_rtd": 0.22827434539794922, + "loss_sent": 0.00022661521506961435, + "loss_sod": 0.14668779075145721, + "loss_total": 0.37518876791000366, + "step": 187699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8188888430595398, + "learning_rate": 3.892288456592123e-05, + "loss": 0.4891, + "step": 187700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.408532619476318, + "loss_rtd": 0.24254950881004333, + "loss_sent": 0.2602873742580414, + "loss_sod": 0.019567377865314484, + "loss_total": 0.5224042534828186, + "step": 187799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.257552623748779, + "loss_rtd": 0.2503557801246643, + "loss_sent": 0.5016323924064636, + "loss_sod": 0.11872883141040802, + "loss_total": 0.8707169890403748, + "step": 187799 + }, + { + "epoch": 0.0236, + "grad_norm": 2.0080339908599854, + "learning_rate": 3.889194208716192e-05, + "loss": 0.4653, + "step": 187800 + }, + { + "epoch": 0.023798, + "loss_gen": 6.010610580444336, + "loss_rtd": 0.25839945673942566, + "loss_sent": 0.3183378279209137, + "loss_sod": 0.06243967264890671, + "loss_total": 0.6391769647598267, + "step": 187899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.619524955749512, + "loss_rtd": 0.2607441842556, + "loss_sent": 0.1995314210653305, + "loss_sod": 0.12462512403726578, + "loss_total": 0.5849007368087769, + "step": 187899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.2173618078231812, + "learning_rate": 3.886100408272703e-05, + "loss": 0.4991, + "step": 187900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.060142517089844, + "loss_rtd": 0.220561683177948, + "loss_sent": 0.09204896539449692, + "loss_sod": 0.16276516020298004, + "loss_total": 0.47537580132484436, + "step": 187999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.676015377044678, + "loss_rtd": 0.24254687130451202, + "loss_sent": 0.22284887731075287, + "loss_sod": 0.02567087486386299, + "loss_total": 0.4910666346549988, + "step": 187999 + }, + { + "epoch": 0.024, + "grad_norm": 1.120131015777588, + "learning_rate": 3.883007056507835e-05, + "loss": 0.4827, + "step": 188000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4598945379257202, + "eval_runtime": 151.472, + "eval_samples_per_second": 101.953, + "eval_steps_per_second": 0.799, + "step": 188000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.747088432312012, + "loss_rtd": 0.2507705092430115, + "loss_sent": 0.2397606372833252, + "loss_sod": 0.09798656404018402, + "loss_total": 0.5885177254676819, + "step": 188099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.377871513366699, + "loss_rtd": 0.24825069308280945, + "loss_sent": 0.2851286232471466, + "loss_sod": 0.044352252036333084, + "loss_total": 0.5777316093444824, + "step": 188099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.453224778175354, + "learning_rate": 3.879914154667592e-05, + "loss": 0.5039, + "step": 188100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.2848005294799805, + "loss_rtd": 0.23623868823051453, + "loss_sent": 0.021470773965120316, + "loss_sod": 0.08070395886898041, + "loss_total": 0.33841341733932495, + "step": 188199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.011479377746582, + "loss_rtd": 0.22047826647758484, + "loss_sent": 9.895951370708644e-05, + "loss_sod": 0.20309217274188995, + "loss_total": 0.4236694276332855, + "step": 188199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.9086915850639343, + "learning_rate": 3.876821703997793e-05, + "loss": 0.4814, + "step": 188200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.45997428894043, + "loss_rtd": 0.21449197828769684, + "loss_sent": 7.156938954722136e-05, + "loss_sod": 0.11868256330490112, + "loss_total": 0.3332460820674896, + "step": 188299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.46342658996582, + "loss_rtd": 0.26590511202812195, + "loss_sent": 0.15254537761211395, + "loss_sod": 0.07487458735704422, + "loss_total": 0.49332505464553833, + "step": 188299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.0269204378128052, + "learning_rate": 3.873729705744078e-05, + "loss": 0.4733, + "step": 188300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.391395568847656, + "loss_rtd": 0.2536605894565582, + "loss_sent": 0.6708694696426392, + "loss_sod": 0.04845999926328659, + "loss_total": 0.9729900360107422, + "step": 188399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.521356105804443, + "loss_rtd": 0.24665305018424988, + "loss_sent": 0.3473989963531494, + "loss_sod": 0.03050447255373001, + "loss_total": 0.6245565414428711, + "step": 188399 + }, + { + "epoch": 0.0248, + "grad_norm": 2.669387102127075, + "learning_rate": 3.8706381611519015e-05, + "loss": 0.4823, + "step": 188400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.504930019378662, + "loss_rtd": 0.26366689801216125, + "loss_sent": 0.07109244167804718, + "loss_sod": 0.05478106439113617, + "loss_total": 0.3895403742790222, + "step": 188499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.711468696594238, + "loss_rtd": 0.2695043683052063, + "loss_sent": 0.1970943659543991, + "loss_sod": 0.039382901042699814, + "loss_total": 0.5059816241264343, + "step": 188499 + }, + { + "epoch": 0.025, + "grad_norm": 1.0042864084243774, + "learning_rate": 3.8675470714665405e-05, + "loss": 0.4904, + "step": 188500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.217864036560059, + "loss_rtd": 0.25182682275772095, + "loss_sent": 0.029164280742406845, + "loss_sod": 0.04378075152635574, + "loss_total": 0.32477185130119324, + "step": 188599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.344483375549316, + "loss_rtd": 0.23775868117809296, + "loss_sent": 0.16007396578788757, + "loss_sod": 0.031165238469839096, + "loss_total": 0.42899787425994873, + "step": 188599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.3527297973632812, + "learning_rate": 3.864456437933082e-05, + "loss": 0.4846, + "step": 188600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.737148284912109, + "loss_rtd": 0.22901023924350739, + "loss_sent": 0.000305487890727818, + "loss_sod": 0.309353768825531, + "loss_total": 0.5386694669723511, + "step": 188699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.505973815917969, + "loss_rtd": 0.2795254588127136, + "loss_sent": 0.10963069647550583, + "loss_sod": 0.05984332039952278, + "loss_total": 0.4489994943141937, + "step": 188699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.2695363759994507, + "learning_rate": 3.8613662617964354e-05, + "loss": 0.4848, + "step": 188700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.010315418243408, + "loss_rtd": 0.23522193729877472, + "loss_sent": 0.09733253717422485, + "loss_sod": 0.015465503558516502, + "loss_total": 0.3480199873447418, + "step": 188799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.887147903442383, + "loss_rtd": 0.24651817977428436, + "loss_sent": 0.036465562880039215, + "loss_sod": 0.060762643814086914, + "loss_total": 0.3437463939189911, + "step": 188799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.6803804039955139, + "learning_rate": 3.858276544301321e-05, + "loss": 0.4927, + "step": 188800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.247392654418945, + "loss_rtd": 0.22698451578617096, + "loss_sent": 0.0394565612077713, + "loss_sod": 0.10808434337377548, + "loss_total": 0.37452542781829834, + "step": 188899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.169305324554443, + "loss_rtd": 0.2783455550670624, + "loss_sent": 0.0984940379858017, + "loss_sod": 0.03639579564332962, + "loss_total": 0.4132353663444519, + "step": 188899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.8834724426269531, + "learning_rate": 3.85518728669228e-05, + "loss": 0.4945, + "step": 188900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.514176368713379, + "loss_rtd": 0.21182015538215637, + "loss_sent": 3.6810768506256863e-05, + "loss_sod": 0.18472716212272644, + "loss_total": 0.3965841233730316, + "step": 188999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.477997303009033, + "loss_rtd": 0.22783230245113373, + "loss_sent": 0.11864908039569855, + "loss_sod": 0.11274217814207077, + "loss_total": 0.45922356843948364, + "step": 188999 + }, + { + "epoch": 0.026, + "grad_norm": 1.195783257484436, + "learning_rate": 3.852098490213663e-05, + "loss": 0.4854, + "step": 189000 + }, + { + "epoch": 0.026, + "eval_loss": 0.45650702714920044, + "eval_runtime": 151.2912, + "eval_samples_per_second": 102.075, + "eval_steps_per_second": 0.8, + "step": 189000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.517990589141846, + "loss_rtd": 0.2650156617164612, + "loss_sent": 0.13934160768985748, + "loss_sod": 0.009648462757468224, + "loss_total": 0.41400575637817383, + "step": 189099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.422589302062988, + "loss_rtd": 0.23975032567977905, + "loss_sent": 0.17650927603244781, + "loss_sod": 0.10824856907129288, + "loss_total": 0.5245081782341003, + "step": 189099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9580986499786377, + "learning_rate": 3.849010156109635e-05, + "loss": 0.4954, + "step": 189100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.617785930633545, + "loss_rtd": 0.27601534128189087, + "loss_sent": 0.10777447372674942, + "loss_sod": 0.05619753524661064, + "loss_total": 0.43998733162879944, + "step": 189199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.626599311828613, + "loss_rtd": 0.2704137861728668, + "loss_sent": 0.1494636982679367, + "loss_sod": 0.10374113917350769, + "loss_total": 0.5236186385154724, + "step": 189199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.9200600981712341, + "learning_rate": 3.845922285624181e-05, + "loss": 0.4844, + "step": 189200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.618977069854736, + "loss_rtd": 0.2506709694862366, + "loss_sent": 0.5057516098022461, + "loss_sod": 0.10134395956993103, + "loss_total": 0.8577665090560913, + "step": 189299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.481948375701904, + "loss_rtd": 0.25752022862434387, + "loss_sent": 0.26329484581947327, + "loss_sod": 0.06825120747089386, + "loss_total": 0.5890662670135498, + "step": 189299 + }, + { + "epoch": 0.0266, + "grad_norm": 2.118957042694092, + "learning_rate": 3.84283488000109e-05, + "loss": 0.4767, + "step": 189300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.345311164855957, + "loss_rtd": 0.24202558398246765, + "loss_sent": 0.18557079136371613, + "loss_sod": 0.057000309228897095, + "loss_total": 0.4845966696739197, + "step": 189399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.465376377105713, + "loss_rtd": 0.24598413705825806, + "loss_sent": 0.15849705040454865, + "loss_sod": 0.13609986007213593, + "loss_total": 0.5405810475349426, + "step": 189399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.19861900806427, + "learning_rate": 3.839747940483972e-05, + "loss": 0.4783, + "step": 189400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.405307769775391, + "loss_rtd": 0.23371540009975433, + "loss_sent": 0.10167175531387329, + "loss_sod": 0.02658150903880596, + "loss_total": 0.36196866631507874, + "step": 189499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.3316779136657715, + "loss_rtd": 0.27084195613861084, + "loss_sent": 0.1450253427028656, + "loss_sod": 0.04817929491400719, + "loss_total": 0.4640465974807739, + "step": 189499 + }, + { + "epoch": 0.027, + "grad_norm": 1.349503517150879, + "learning_rate": 3.836661468316244e-05, + "loss": 0.4964, + "step": 189500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.54126501083374, + "loss_rtd": 0.25931403040885925, + "loss_sent": 0.12344910949468613, + "loss_sod": 0.15993519127368927, + "loss_total": 0.542698323726654, + "step": 189599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.141296863555908, + "loss_rtd": 0.23213070631027222, + "loss_sent": 0.08870422840118408, + "loss_sod": 0.07632371038198471, + "loss_total": 0.3971586525440216, + "step": 189599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.0059212446212769, + "learning_rate": 3.833575464741139e-05, + "loss": 0.4847, + "step": 189600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.827796459197998, + "loss_rtd": 0.24768830835819244, + "loss_sent": 0.13497382402420044, + "loss_sod": 0.007975916378200054, + "loss_total": 0.3906380534172058, + "step": 189699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.769606590270996, + "loss_rtd": 0.2651318311691284, + "loss_sent": 0.2790696322917938, + "loss_sod": 0.056333690881729126, + "loss_total": 0.6005351543426514, + "step": 189699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.6840248107910156, + "learning_rate": 3.8304899310016956e-05, + "loss": 0.4949, + "step": 189700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.565116882324219, + "loss_rtd": 0.24932719767093658, + "loss_sent": 0.2298298180103302, + "loss_sod": 0.03319466486573219, + "loss_total": 0.5123516917228699, + "step": 189799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.100515365600586, + "loss_rtd": 0.2612711787223816, + "loss_sent": 0.16838902235031128, + "loss_sod": 0.0048184944316744804, + "loss_total": 0.4344787001609802, + "step": 189799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8379236459732056, + "learning_rate": 3.82740486834077e-05, + "loss": 0.5004, + "step": 189800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.489904403686523, + "loss_rtd": 0.2707536816596985, + "loss_sent": 0.1192086786031723, + "loss_sod": 0.013609429821372032, + "loss_total": 0.4035717844963074, + "step": 189899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.503792762756348, + "loss_rtd": 0.24232688546180725, + "loss_sent": 0.3279736340045929, + "loss_sod": 0.011655149981379509, + "loss_total": 0.5819556713104248, + "step": 189899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.9022887349128723, + "learning_rate": 3.824320278001021e-05, + "loss": 0.4742, + "step": 189900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.60560941696167, + "loss_rtd": 0.26560530066490173, + "loss_sent": 0.1791583150625229, + "loss_sod": 0.059306129813194275, + "loss_total": 0.5040697455406189, + "step": 189999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.363497257232666, + "loss_rtd": 0.23657403886318207, + "loss_sent": 0.14551042020320892, + "loss_sod": 0.020474769175052643, + "loss_total": 0.40255922079086304, + "step": 189999 + }, + { + "epoch": 0.028, + "grad_norm": 0.9289900064468384, + "learning_rate": 3.821236161224925e-05, + "loss": 0.4938, + "step": 190000 + }, + { + "epoch": 0.028, + "eval_loss": 0.4670124351978302, + "eval_runtime": 151.7151, + "eval_samples_per_second": 101.789, + "eval_steps_per_second": 0.798, + "step": 190000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.427304267883301, + "loss_rtd": 0.23809626698493958, + "loss_sent": 0.3506697714328766, + "loss_sod": 0.06381907314062119, + "loss_total": 0.6525851488113403, + "step": 190099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.639505386352539, + "loss_rtd": 0.2414628118276596, + "loss_sent": 0.14909490942955017, + "loss_sod": 0.02899942733347416, + "loss_total": 0.4195571541786194, + "step": 190099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.8516379594802856, + "learning_rate": 3.818152519254762e-05, + "loss": 0.4853, + "step": 190100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.43601131439209, + "loss_rtd": 0.26161977648735046, + "loss_sent": 0.19055239856243134, + "loss_sod": 0.1804932951927185, + "loss_total": 0.6326654553413391, + "step": 190199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.528129577636719, + "loss_rtd": 0.24942205846309662, + "loss_sent": 0.22511687874794006, + "loss_sod": 0.09172326326370239, + "loss_total": 0.5662622451782227, + "step": 190199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.180493712425232, + "learning_rate": 3.815069353332623e-05, + "loss": 0.4916, + "step": 190200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.4941534996032715, + "loss_rtd": 0.2445591390132904, + "loss_sent": 0.1911228448152542, + "loss_sod": 0.058923400938510895, + "loss_total": 0.4946053624153137, + "step": 190299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.534848690032959, + "loss_rtd": 0.2617496848106384, + "loss_sent": 0.24936708807945251, + "loss_sod": 0.13276061415672302, + "loss_total": 0.643877387046814, + "step": 190299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.6018524169921875, + "learning_rate": 3.811986664700406e-05, + "loss": 0.4897, + "step": 190300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.680985927581787, + "loss_rtd": 0.25227612257003784, + "loss_sent": 0.2787964344024658, + "loss_sod": 0.01892828568816185, + "loss_total": 0.5500008463859558, + "step": 190399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.267366886138916, + "loss_rtd": 0.24123340845108032, + "loss_sent": 0.10007834434509277, + "loss_sod": 0.044746894389390945, + "loss_total": 0.38605865836143494, + "step": 190399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.5841699242591858, + "learning_rate": 3.808904454599819e-05, + "loss": 0.4849, + "step": 190400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.461845397949219, + "loss_rtd": 0.2444579303264618, + "loss_sent": 0.4447803199291229, + "loss_sod": 0.0374327078461647, + "loss_total": 0.7266709804534912, + "step": 190499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.268030166625977, + "loss_rtd": 0.23218882083892822, + "loss_sent": 0.12373758852481842, + "loss_sod": 0.03155789524316788, + "loss_total": 0.3874843120574951, + "step": 190499 + }, + { + "epoch": 0.029, + "grad_norm": 1.5703704357147217, + "learning_rate": 3.805822724272376e-05, + "loss": 0.4973, + "step": 190500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.516118049621582, + "loss_rtd": 0.2561403214931488, + "loss_sent": 0.16193555295467377, + "loss_sod": 0.053360715508461, + "loss_total": 0.47143661975860596, + "step": 190599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.603597640991211, + "loss_rtd": 0.24327020347118378, + "loss_sent": 0.20391519367694855, + "loss_sod": 0.02561803162097931, + "loss_total": 0.47280341386795044, + "step": 190599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.8055551648139954, + "learning_rate": 3.8027414749593956e-05, + "loss": 0.489, + "step": 190600 + }, + { + "epoch": 0.029398, + "loss_gen": 6.1082258224487305, + "loss_rtd": 0.24867743253707886, + "loss_sent": 0.04290452226996422, + "loss_sod": 0.10100595653057098, + "loss_total": 0.39258792996406555, + "step": 190699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.504403114318848, + "loss_rtd": 0.23535284399986267, + "loss_sent": 0.16792336106300354, + "loss_sod": 0.060683224350214005, + "loss_total": 0.4639594256877899, + "step": 190699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.6784458160400391, + "learning_rate": 3.799660707902007e-05, + "loss": 0.4914, + "step": 190700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.532922267913818, + "loss_rtd": 0.2595323622226715, + "loss_sent": 0.09610046446323395, + "loss_sod": 0.049020085483789444, + "loss_total": 0.4046528935432434, + "step": 190799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.673886775970459, + "loss_rtd": 0.23863846063613892, + "loss_sent": 0.09060431271791458, + "loss_sod": 0.03512513265013695, + "loss_total": 0.36436790227890015, + "step": 190799 + }, + { + "epoch": 0.0296, + "grad_norm": 0.8528914451599121, + "learning_rate": 3.79658042434114e-05, + "loss": 0.4752, + "step": 190800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.7321014404296875, + "loss_rtd": 0.26183241605758667, + "loss_sent": 0.07424481213092804, + "loss_sod": 0.07845449447631836, + "loss_total": 0.4145317077636719, + "step": 190899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.275577545166016, + "loss_rtd": 0.25554800033569336, + "loss_sent": 0.23322120308876038, + "loss_sod": 0.06550353020429611, + "loss_total": 0.5542727708816528, + "step": 190899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.780219554901123, + "learning_rate": 3.793500625517536e-05, + "loss": 0.4749, + "step": 190900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.511038780212402, + "loss_rtd": 0.2445359081029892, + "loss_sent": 0.16977296769618988, + "loss_sod": 0.008282160386443138, + "loss_total": 0.42259103059768677, + "step": 190999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.605917930603027, + "loss_rtd": 0.2625270485877991, + "loss_sent": 0.3814159631729126, + "loss_sod": 0.046406544744968414, + "loss_total": 0.6903495788574219, + "step": 190999 + }, + { + "epoch": 0.03, + "grad_norm": 2.1270103454589844, + "learning_rate": 3.7904213126717346e-05, + "loss": 0.4942, + "step": 191000 + }, + { + "epoch": 0.03, + "eval_loss": 0.46074697375297546, + "eval_runtime": 151.3683, + "eval_samples_per_second": 102.023, + "eval_steps_per_second": 0.799, + "step": 191000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.758920192718506, + "loss_rtd": 0.26381030678749084, + "loss_sent": 0.18165838718414307, + "loss_sod": 0.09162928909063339, + "loss_total": 0.5370979905128479, + "step": 191099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.745611190795898, + "loss_rtd": 0.2590659260749817, + "loss_sent": 0.200775608420372, + "loss_sod": 0.042787425220012665, + "loss_total": 0.5026289820671082, + "step": 191099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.7340903878211975, + "learning_rate": 3.7873424870440846e-05, + "loss": 0.4867, + "step": 191100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.6203413009643555, + "loss_rtd": 0.2579295039176941, + "loss_sent": 0.161702960729599, + "loss_sod": 0.017896367236971855, + "loss_total": 0.4375288188457489, + "step": 191199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.602794170379639, + "loss_rtd": 0.2508043050765991, + "loss_sent": 0.30941370129585266, + "loss_sod": 0.08232814073562622, + "loss_total": 0.6425461769104004, + "step": 191199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.153231143951416, + "learning_rate": 3.7842641498747346e-05, + "loss": 0.484, + "step": 191200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.551522731781006, + "loss_rtd": 0.25297266244888306, + "loss_sent": 0.2190672904253006, + "loss_sod": 0.037652745842933655, + "loss_total": 0.5096926689147949, + "step": 191299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.213325500488281, + "loss_rtd": 0.27947014570236206, + "loss_sent": 0.45438188314437866, + "loss_sod": 0.011225146241486073, + "loss_total": 0.7450771331787109, + "step": 191299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.9284847974777222, + "learning_rate": 3.781186302403641e-05, + "loss": 0.4784, + "step": 191300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.694090843200684, + "loss_rtd": 0.24617375433444977, + "loss_sent": 0.052540965378284454, + "loss_sod": 0.03280903026461601, + "loss_total": 0.33152374625205994, + "step": 191399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.137330055236816, + "loss_rtd": 0.2720976173877716, + "loss_sent": 0.2972986698150635, + "loss_sod": 0.04297982156276703, + "loss_total": 0.6123760938644409, + "step": 191399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.1373209953308105, + "learning_rate": 3.778108945870558e-05, + "loss": 0.4877, + "step": 191400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.3562331199646, + "loss_rtd": 0.2669129967689514, + "loss_sent": 0.4924572706222534, + "loss_sod": 0.0187371838837862, + "loss_total": 0.7781074643135071, + "step": 191499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.238649845123291, + "loss_rtd": 0.25423958897590637, + "loss_sent": 0.09695713967084885, + "loss_sod": 0.04322301968932152, + "loss_total": 0.39441975951194763, + "step": 191499 + }, + { + "epoch": 0.031, + "grad_norm": 1.3981800079345703, + "learning_rate": 3.775032081515046e-05, + "loss": 0.4929, + "step": 191500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.644824981689453, + "loss_rtd": 0.2523040175437927, + "loss_sent": 0.44738587737083435, + "loss_sod": 0.055231668055057526, + "loss_total": 0.754921555519104, + "step": 191599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.5427117347717285, + "loss_rtd": 0.24850618839263916, + "loss_sent": 0.26900747418403625, + "loss_sod": 0.03090936876833439, + "loss_total": 0.5484230518341064, + "step": 191599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.1982362270355225, + "learning_rate": 3.7719557105764647e-05, + "loss": 0.4834, + "step": 191600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.413008689880371, + "loss_rtd": 0.2563272714614868, + "loss_sent": 0.03943207487463951, + "loss_sod": 0.17963504791259766, + "loss_total": 0.4753943979740143, + "step": 191699 + }, + { + "epoch": 0.031398, + "loss_gen": 4.911994934082031, + "loss_rtd": 0.22217217087745667, + "loss_sent": 0.019519370049238205, + "loss_sod": 0.06098729372024536, + "loss_total": 0.30267882347106934, + "step": 191699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.8048076629638672, + "learning_rate": 3.768879834293977e-05, + "loss": 0.4927, + "step": 191700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.637204647064209, + "loss_rtd": 0.2501910626888275, + "loss_sent": 0.3050091862678528, + "loss_sod": 0.09607332944869995, + "loss_total": 0.6512736082077026, + "step": 191799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.50172758102417, + "loss_rtd": 0.26181814074516296, + "loss_sent": 0.2631334662437439, + "loss_sod": 0.05659785866737366, + "loss_total": 0.5815494656562805, + "step": 191799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.9879064559936523, + "learning_rate": 3.765804453906544e-05, + "loss": 0.5056, + "step": 191800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.14209508895874, + "loss_rtd": 0.23424683511257172, + "loss_sent": 0.08079767227172852, + "loss_sod": 0.1852322518825531, + "loss_total": 0.5002767443656921, + "step": 191899 + }, + { + "epoch": 0.031798, + "loss_gen": 4.655786514282227, + "loss_rtd": 0.21770267188549042, + "loss_sent": 0.04880133271217346, + "loss_sod": 0.05604922026395798, + "loss_total": 0.32255321741104126, + "step": 191899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.380016565322876, + "learning_rate": 3.762729570652931e-05, + "loss": 0.5068, + "step": 191900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.879818916320801, + "loss_rtd": 0.24440042674541473, + "loss_sent": 0.07798027992248535, + "loss_sod": 0.07537493854761124, + "loss_total": 0.3977556526660919, + "step": 191999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.795013427734375, + "loss_rtd": 0.2571924328804016, + "loss_sent": 0.15835008025169373, + "loss_sod": 0.03647574782371521, + "loss_total": 0.45201826095581055, + "step": 191999 + }, + { + "epoch": 0.032, + "grad_norm": 1.2892613410949707, + "learning_rate": 3.7596551857716965e-05, + "loss": 0.497, + "step": 192000 + }, + { + "epoch": 0.032, + "eval_loss": 0.46215012669563293, + "eval_runtime": 151.4113, + "eval_samples_per_second": 101.994, + "eval_steps_per_second": 0.799, + "step": 192000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.767733097076416, + "loss_rtd": 0.26180925965309143, + "loss_sent": 0.07043591886758804, + "loss_sod": 0.0524330735206604, + "loss_total": 0.3846782445907593, + "step": 192099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.635196685791016, + "loss_rtd": 0.2711675465106964, + "loss_sent": 0.193241149187088, + "loss_sod": 0.0742715373635292, + "loss_total": 0.5386801958084106, + "step": 192099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.2892861366271973, + "learning_rate": 3.756581300501207e-05, + "loss": 0.485, + "step": 192100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.3160624504089355, + "loss_rtd": 0.25533467531204224, + "loss_sent": 0.6479426622390747, + "loss_sod": 0.05005098134279251, + "loss_total": 0.9533283114433289, + "step": 192199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.530961513519287, + "loss_rtd": 0.23777180910110474, + "loss_sent": 0.25572335720062256, + "loss_sod": 0.014990163967013359, + "loss_total": 0.5084853172302246, + "step": 192199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.9177531003952026, + "learning_rate": 3.75350791607962e-05, + "loss": 0.4737, + "step": 192200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.648280143737793, + "loss_rtd": 0.24090732634067535, + "loss_sent": 0.1316465139389038, + "loss_sod": 0.027110133320093155, + "loss_total": 0.3996639847755432, + "step": 192299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.590279579162598, + "loss_rtd": 0.2609612047672272, + "loss_sent": 0.18384705483913422, + "loss_sod": 0.010000656358897686, + "loss_total": 0.45480889081954956, + "step": 192299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.9537299871444702, + "learning_rate": 3.750435033744896e-05, + "loss": 0.4991, + "step": 192300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.547919273376465, + "loss_rtd": 0.24577993154525757, + "loss_sent": 0.31399571895599365, + "loss_sod": 0.08307299017906189, + "loss_total": 0.6428486108779907, + "step": 192399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.422171592712402, + "loss_rtd": 0.26553598046302795, + "loss_sent": 0.4638215899467468, + "loss_sod": 0.014869781211018562, + "loss_total": 0.7442273497581482, + "step": 192399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.79023277759552, + "learning_rate": 3.7473626547347904e-05, + "loss": 0.4946, + "step": 192400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.781009197235107, + "loss_rtd": 0.21600483357906342, + "loss_sent": 3.5161818232154474e-05, + "loss_sod": 0.156089186668396, + "loss_total": 0.3721292018890381, + "step": 192499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.706577777862549, + "loss_rtd": 0.22244736552238464, + "loss_sent": 0.013634216971695423, + "loss_sod": 0.043591342866420746, + "loss_total": 0.27967292070388794, + "step": 192499 + }, + { + "epoch": 0.001, + "grad_norm": 1.0063154697418213, + "learning_rate": 3.7442907802868574e-05, + "loss": 0.4752, + "step": 192500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.6759114265441895, + "loss_rtd": 0.2586769759654999, + "loss_sent": 0.23568876087665558, + "loss_sod": 0.04188964143395424, + "loss_total": 0.5362553596496582, + "step": 192599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.485069274902344, + "loss_rtd": 0.25091472268104553, + "loss_sent": 0.21204343438148499, + "loss_sod": 0.008346015587449074, + "loss_total": 0.47130417823791504, + "step": 192599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.9378650188446045, + "learning_rate": 3.7412194116384486e-05, + "loss": 0.4787, + "step": 192600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.312941074371338, + "loss_rtd": 0.2663998603820801, + "loss_sent": 0.12301906198263168, + "loss_sod": 0.06201421469449997, + "loss_total": 0.4514331519603729, + "step": 192699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.562491416931152, + "loss_rtd": 0.2644144594669342, + "loss_sent": 0.1517217457294464, + "loss_sod": 0.016208041459321976, + "loss_total": 0.4323442578315735, + "step": 192699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.7727892994880676, + "learning_rate": 3.738148550026711e-05, + "loss": 0.4632, + "step": 192700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.586191654205322, + "loss_rtd": 0.25658947229385376, + "loss_sent": 0.1271830052137375, + "loss_sod": 0.03610118106007576, + "loss_total": 0.4198736548423767, + "step": 192799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.758675575256348, + "loss_rtd": 0.26218181848526, + "loss_sent": 0.15812864899635315, + "loss_sod": 0.04964819177985191, + "loss_total": 0.46995866298675537, + "step": 192799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.7383379936218262, + "learning_rate": 3.735078196688585e-05, + "loss": 0.4839, + "step": 192800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.558610439300537, + "loss_rtd": 0.2237866073846817, + "loss_sent": 4.1192786738974974e-05, + "loss_sod": 0.06296208500862122, + "loss_total": 0.2867898941040039, + "step": 192899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.737308502197266, + "loss_rtd": 0.22315563261508942, + "loss_sent": 4.613067358150147e-05, + "loss_sod": 0.04467999190092087, + "loss_total": 0.26788175106048584, + "step": 192899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.7159841060638428, + "learning_rate": 3.732008352860811e-05, + "loss": 0.4753, + "step": 192900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.871915817260742, + "loss_rtd": 0.24594026803970337, + "loss_sent": 0.03623334318399429, + "loss_sod": 0.17563214898109436, + "loss_total": 0.4578057527542114, + "step": 192999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.12923526763916, + "loss_rtd": 0.24965007603168488, + "loss_sent": 9.992629202315584e-05, + "loss_sod": 0.15130211412906647, + "loss_total": 0.4010521173477173, + "step": 192999 + }, + { + "epoch": 0.002, + "grad_norm": 0.8945534229278564, + "learning_rate": 3.7289390197799203e-05, + "loss": 0.487, + "step": 193000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4674343764781952, + "eval_runtime": 153.7357, + "eval_samples_per_second": 100.452, + "eval_steps_per_second": 0.787, + "step": 193000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.556958198547363, + "loss_rtd": 0.2575167119503021, + "loss_sent": 0.14814774692058563, + "loss_sod": 0.053643129765987396, + "loss_total": 0.45930761098861694, + "step": 193099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.765564441680908, + "loss_rtd": 0.23487484455108643, + "loss_sent": 0.27274149656295776, + "loss_sod": 0.08774063736200333, + "loss_total": 0.5953569412231445, + "step": 193099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.7784550189971924, + "learning_rate": 3.7258701986822405e-05, + "loss": 0.4796, + "step": 193100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.610030174255371, + "loss_rtd": 0.2565581798553467, + "loss_sent": 0.11878179758787155, + "loss_sod": 0.02501131221652031, + "loss_total": 0.40035128593444824, + "step": 193199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.386488914489746, + "loss_rtd": 0.25022369623184204, + "loss_sent": 0.07101274281740189, + "loss_sod": 0.07276440411806107, + "loss_total": 0.3940008282661438, + "step": 193199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.0038328170776367, + "learning_rate": 3.722801890803892e-05, + "loss": 0.4982, + "step": 193200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.366093635559082, + "loss_rtd": 0.26855507493019104, + "loss_sent": 0.14672008156776428, + "loss_sod": 0.07533880323171616, + "loss_total": 0.4906139671802521, + "step": 193299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.110126972198486, + "loss_rtd": 0.24855197966098785, + "loss_sent": 0.07789073139429092, + "loss_sod": 0.20015639066696167, + "loss_total": 0.526599109172821, + "step": 193299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.3545650243759155, + "learning_rate": 3.71973409738079e-05, + "loss": 0.4714, + "step": 193300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.646805286407471, + "loss_rtd": 0.23973631858825684, + "loss_sent": 0.2123149037361145, + "loss_sod": 0.06509586423635483, + "loss_total": 0.5171470642089844, + "step": 193399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.550223350524902, + "loss_rtd": 0.25887084007263184, + "loss_sent": 0.08267856389284134, + "loss_sod": 0.09744960069656372, + "loss_total": 0.4389989972114563, + "step": 193399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.0914514064788818, + "learning_rate": 3.716666819648639e-05, + "loss": 0.4825, + "step": 193400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.455780029296875, + "loss_rtd": 0.23686279356479645, + "loss_sent": 0.18024176359176636, + "loss_sod": 0.012053391896188259, + "loss_total": 0.42915794253349304, + "step": 193499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.336060523986816, + "loss_rtd": 0.2880454361438751, + "loss_sent": 0.18380507826805115, + "loss_sod": 0.06229345127940178, + "loss_total": 0.5341439247131348, + "step": 193499 + }, + { + "epoch": 0.003, + "grad_norm": 0.9820837378501892, + "learning_rate": 3.7136000588429416e-05, + "loss": 0.475, + "step": 193500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.634300708770752, + "loss_rtd": 0.2416241616010666, + "loss_sent": 0.19586947560310364, + "loss_sod": 0.04599007964134216, + "loss_total": 0.4834837019443512, + "step": 193599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.745166778564453, + "loss_rtd": 0.26596733927726746, + "loss_sent": 0.28147685527801514, + "loss_sod": 0.025701254606246948, + "loss_total": 0.5731454491615295, + "step": 193599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.973056972026825, + "learning_rate": 3.7105338161989856e-05, + "loss": 0.489, + "step": 193600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.152373790740967, + "loss_rtd": 0.23187947273254395, + "loss_sent": 0.10266685485839844, + "loss_sod": 0.12113480269908905, + "loss_total": 0.4556811451911926, + "step": 193699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.242809295654297, + "loss_rtd": 0.24947988986968994, + "loss_sent": 4.132475805818103e-05, + "loss_sod": 0.07227280735969543, + "loss_total": 0.3217940330505371, + "step": 193699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.9321451187133789, + "learning_rate": 3.707468092951854e-05, + "loss": 0.472, + "step": 193700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.641763687133789, + "loss_rtd": 0.24687477946281433, + "loss_sent": 0.12674228847026825, + "loss_sod": 0.06126464158296585, + "loss_total": 0.43488168716430664, + "step": 193799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.771834373474121, + "loss_rtd": 0.26061493158340454, + "loss_sent": 0.14301107823848724, + "loss_sod": 0.10927961766719818, + "loss_total": 0.5129056572914124, + "step": 193799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.1897246837615967, + "learning_rate": 3.7044028903364206e-05, + "loss": 0.476, + "step": 193800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.51343297958374, + "loss_rtd": 0.25676435232162476, + "loss_sent": 0.26321089267730713, + "loss_sod": 0.051310814917087555, + "loss_total": 0.5712860822677612, + "step": 193899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.367305278778076, + "loss_rtd": 0.28087079524993896, + "loss_sent": 0.03771647810935974, + "loss_sod": 0.0246761292219162, + "loss_total": 0.3432634174823761, + "step": 193899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.9496843218803406, + "learning_rate": 3.7013382095873475e-05, + "loss": 0.4967, + "step": 193900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.242870807647705, + "loss_rtd": 0.24983222782611847, + "loss_sent": 0.09496738016605377, + "loss_sod": 0.0035881041549146175, + "loss_total": 0.3483877182006836, + "step": 193999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.543667793273926, + "loss_rtd": 0.25194668769836426, + "loss_sent": 0.06284943968057632, + "loss_sod": 0.11689009517431259, + "loss_total": 0.4316862225532532, + "step": 193999 + }, + { + "epoch": 0.004, + "grad_norm": 0.7864642143249512, + "learning_rate": 3.698274051939088e-05, + "loss": 0.4887, + "step": 194000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4566684365272522, + "eval_runtime": 150.8137, + "eval_samples_per_second": 102.398, + "eval_steps_per_second": 0.802, + "step": 194000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.840829372406006, + "loss_rtd": 0.20772796869277954, + "loss_sent": 0.02202727273106575, + "loss_sod": 0.04938127100467682, + "loss_total": 0.2791365087032318, + "step": 194099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.729475021362305, + "loss_rtd": 0.2559773027896881, + "loss_sent": 0.1774718016386032, + "loss_sod": 0.026250924915075302, + "loss_total": 0.4597000479698181, + "step": 194099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.7378675937652588, + "learning_rate": 3.695210418625885e-05, + "loss": 0.4677, + "step": 194100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.102241516113281, + "loss_rtd": 0.24229399859905243, + "loss_sent": 0.14018964767456055, + "loss_sod": 0.10332176834344864, + "loss_total": 0.4858054220676422, + "step": 194199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.999631881713867, + "loss_rtd": 0.23838362097740173, + "loss_sent": 0.044546131044626236, + "loss_sod": 0.09325068444013596, + "loss_total": 0.3761804401874542, + "step": 194199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.932612955570221, + "learning_rate": 3.6921473108817694e-05, + "loss": 0.4791, + "step": 194200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.322678565979004, + "loss_rtd": 0.2504326105117798, + "loss_sent": 0.22586561739444733, + "loss_sod": 0.06177980825304985, + "loss_total": 0.5380780100822449, + "step": 194299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.666067123413086, + "loss_rtd": 0.2541863024234772, + "loss_sent": 0.3421761989593506, + "loss_sod": 0.06002382934093475, + "loss_total": 0.6563863158226013, + "step": 194299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.124372124671936, + "learning_rate": 3.6890847299405606e-05, + "loss": 0.4965, + "step": 194300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.198084831237793, + "loss_rtd": 0.24170055985450745, + "loss_sent": 0.06232727691531181, + "loss_sod": 0.03245178610086441, + "loss_total": 0.3364796042442322, + "step": 194399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.713399887084961, + "loss_rtd": 0.23639996349811554, + "loss_sent": 0.2283087968826294, + "loss_sod": 0.04151545464992523, + "loss_total": 0.5062242150306702, + "step": 194399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.6989062428474426, + "learning_rate": 3.6860226770358665e-05, + "loss": 0.4893, + "step": 194400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.571877479553223, + "loss_rtd": 0.25060853362083435, + "loss_sent": 0.11890272796154022, + "loss_sod": 0.0195518359541893, + "loss_total": 0.38906311988830566, + "step": 194499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.988839626312256, + "loss_rtd": 0.2519717216491699, + "loss_sent": 0.2186650186777115, + "loss_sod": 0.01724029891192913, + "loss_total": 0.4878770112991333, + "step": 194499 + }, + { + "epoch": 0.005, + "grad_norm": 1.1391091346740723, + "learning_rate": 3.682961153401079e-05, + "loss": 0.4908, + "step": 194500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.439150333404541, + "loss_rtd": 0.26181721687316895, + "loss_sent": 0.130909264087677, + "loss_sod": 0.01662658341228962, + "loss_total": 0.4093530774116516, + "step": 194599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.506926536560059, + "loss_rtd": 0.24847464263439178, + "loss_sent": 0.10631046444177628, + "loss_sod": 0.032716698944568634, + "loss_total": 0.3875018060207367, + "step": 194599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.6705906987190247, + "learning_rate": 3.679900160269384e-05, + "loss": 0.4979, + "step": 194600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.596723556518555, + "loss_rtd": 0.24961379170417786, + "loss_sent": 0.2417415827512741, + "loss_sod": 0.04811094328761101, + "loss_total": 0.5394663214683533, + "step": 194699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.437923908233643, + "loss_rtd": 0.24580544233322144, + "loss_sent": 0.056771207600831985, + "loss_sod": 0.03742823004722595, + "loss_total": 0.34000489115715027, + "step": 194699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.057392954826355, + "learning_rate": 3.676839698873744e-05, + "loss": 0.4881, + "step": 194700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.729447364807129, + "loss_rtd": 0.2617315649986267, + "loss_sent": 0.13916468620300293, + "loss_sod": 0.056938085705041885, + "loss_total": 0.4578343331813812, + "step": 194799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.596599578857422, + "loss_rtd": 0.2584453523159027, + "loss_sent": 0.25563913583755493, + "loss_sod": 0.0812462866306305, + "loss_total": 0.5953307747840881, + "step": 194799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.0611262321472168, + "learning_rate": 3.673779770446917e-05, + "loss": 0.4968, + "step": 194800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.796848773956299, + "loss_rtd": 0.2665272057056427, + "loss_sent": 0.24075986444950104, + "loss_sod": 0.026238219812512398, + "loss_total": 0.533525288105011, + "step": 194899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.1608662605285645, + "loss_rtd": 0.2742592990398407, + "loss_sent": 0.094719298183918, + "loss_sod": 0.018144870176911354, + "loss_total": 0.3871234655380249, + "step": 194899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.689104437828064, + "learning_rate": 3.670720376221439e-05, + "loss": 0.4718, + "step": 194900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.2695817947387695, + "loss_rtd": 0.24876317381858826, + "loss_sent": 0.12489506602287292, + "loss_sod": 0.05009578540921211, + "loss_total": 0.4237540364265442, + "step": 194999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.184060573577881, + "loss_rtd": 0.23411627113819122, + "loss_sent": 0.2702539563179016, + "loss_sod": 0.029751798138022423, + "loss_total": 0.5341219902038574, + "step": 194999 + }, + { + "epoch": 0.006, + "grad_norm": 0.7266300916671753, + "learning_rate": 3.667661517429635e-05, + "loss": 0.4924, + "step": 195000 + }, + { + "epoch": 0.006, + "eval_loss": 0.4612562954425812, + "eval_runtime": 152.1017, + "eval_samples_per_second": 101.531, + "eval_steps_per_second": 0.796, + "step": 195000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.542816162109375, + "loss_rtd": 0.27036115527153015, + "loss_sent": 0.2015909105539322, + "loss_sod": 0.02594519406557083, + "loss_total": 0.49789726734161377, + "step": 195099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.973165512084961, + "loss_rtd": 0.25184711813926697, + "loss_sent": 0.260888934135437, + "loss_sod": 0.04519077017903328, + "loss_total": 0.5579268336296082, + "step": 195099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.7794356346130371, + "learning_rate": 3.6646031953036125e-05, + "loss": 0.4899, + "step": 195100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.378973007202148, + "loss_rtd": 0.2437082976102829, + "loss_sent": 0.17770645022392273, + "loss_sod": 0.03263450786471367, + "loss_total": 0.4540492594242096, + "step": 195199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.3555097579956055, + "loss_rtd": 0.244486004114151, + "loss_sent": 0.28214266896247864, + "loss_sod": 0.07187218964099884, + "loss_total": 0.5985008478164673, + "step": 195199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.9849734306335449, + "learning_rate": 3.6615454110752624e-05, + "loss": 0.4725, + "step": 195200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.869761943817139, + "loss_rtd": 0.27491292357444763, + "loss_sent": 0.1574680358171463, + "loss_sod": 0.03220804035663605, + "loss_total": 0.46458899974823, + "step": 195299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.750090599060059, + "loss_rtd": 0.2437335103750229, + "loss_sent": 0.0854685828089714, + "loss_sod": 0.011373812332749367, + "loss_total": 0.3405759036540985, + "step": 195299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.5817446708679199, + "learning_rate": 3.658488165976261e-05, + "loss": 0.4895, + "step": 195300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.721882343292236, + "loss_rtd": 0.23500916361808777, + "loss_sent": 0.235703706741333, + "loss_sod": 0.030631324276328087, + "loss_total": 0.5013442039489746, + "step": 195399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.700669765472412, + "loss_rtd": 0.2499621957540512, + "loss_sent": 0.19793350994586945, + "loss_sod": 0.14146006107330322, + "loss_total": 0.5893557667732239, + "step": 195399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.9907777905464172, + "learning_rate": 3.655431461238066e-05, + "loss": 0.4936, + "step": 195400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.884043216705322, + "loss_rtd": 0.2326071858406067, + "loss_sent": 0.03339262679219246, + "loss_sod": 0.035985104739665985, + "loss_total": 0.3019849359989166, + "step": 195499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.64555549621582, + "loss_rtd": 0.2768833637237549, + "loss_sent": 0.2613065540790558, + "loss_sod": 0.04346334934234619, + "loss_total": 0.5816532373428345, + "step": 195499 + }, + { + "epoch": 0.007, + "grad_norm": 0.7556312680244446, + "learning_rate": 3.652375298091918e-05, + "loss": 0.4794, + "step": 195500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.760225772857666, + "loss_rtd": 0.24749693274497986, + "loss_sent": 0.051286693662405014, + "loss_sod": 0.2220277190208435, + "loss_total": 0.5208113193511963, + "step": 195599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.8289947509765625, + "loss_rtd": 0.21202883124351501, + "loss_sent": 0.028503786772489548, + "loss_sod": 0.03376930579543114, + "loss_total": 0.2743019163608551, + "step": 195599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.8307656645774841, + "learning_rate": 3.649319677768838e-05, + "loss": 0.4776, + "step": 195600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.164693355560303, + "loss_rtd": 0.21437622606754303, + "loss_sent": 0.06534356623888016, + "loss_sod": 0.16298896074295044, + "loss_total": 0.44270873069763184, + "step": 195699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.572518348693848, + "loss_rtd": 0.25925710797309875, + "loss_sent": 0.27719762921333313, + "loss_sod": 0.05138392373919487, + "loss_total": 0.5878386497497559, + "step": 195699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.486840009689331, + "learning_rate": 3.6462646014996317e-05, + "loss": 0.4763, + "step": 195700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.479996681213379, + "loss_rtd": 0.26533398032188416, + "loss_sent": 0.11786812543869019, + "loss_sod": 0.006996192038059235, + "loss_total": 0.390198290348053, + "step": 195799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.574079513549805, + "loss_rtd": 0.24514523148536682, + "loss_sent": 0.22541922330856323, + "loss_sod": 0.06325840950012207, + "loss_total": 0.5338228940963745, + "step": 195799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.063891887664795, + "learning_rate": 3.6432100705148796e-05, + "loss": 0.48, + "step": 195800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.226762294769287, + "loss_rtd": 0.24794931709766388, + "loss_sent": 0.18562978506088257, + "loss_sod": 0.05482611805200577, + "loss_total": 0.4884052276611328, + "step": 195899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.682345390319824, + "loss_rtd": 0.2275385856628418, + "loss_sent": 0.16026000678539276, + "loss_sod": 0.0994885042309761, + "loss_total": 0.48728710412979126, + "step": 195899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.9148600697517395, + "learning_rate": 3.640156086044951e-05, + "loss": 0.4788, + "step": 195900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.2727460861206055, + "loss_rtd": 0.24048146605491638, + "loss_sent": 0.41567525267601013, + "loss_sod": 0.06500697135925293, + "loss_total": 0.7211636900901794, + "step": 195999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.807211875915527, + "loss_rtd": 0.24590934813022614, + "loss_sent": 0.38504141569137573, + "loss_sod": 0.05467440187931061, + "loss_total": 0.6856251955032349, + "step": 195999 + }, + { + "epoch": 0.008, + "grad_norm": 1.636039137840271, + "learning_rate": 3.637102649319987e-05, + "loss": 0.4716, + "step": 196000 + }, + { + "epoch": 0.008, + "eval_loss": 0.45426931977272034, + "eval_runtime": 150.8469, + "eval_samples_per_second": 102.375, + "eval_steps_per_second": 0.802, + "step": 196000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.539201736450195, + "loss_rtd": 0.2573156952857971, + "loss_sent": 0.07600665837526321, + "loss_sod": 0.03368259221315384, + "loss_total": 0.367004930973053, + "step": 196099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.819047927856445, + "loss_rtd": 0.25339609384536743, + "loss_sent": 0.364510178565979, + "loss_sod": 0.02152218297123909, + "loss_total": 0.6394284963607788, + "step": 196099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.717594563961029, + "learning_rate": 3.634049761569914e-05, + "loss": 0.4637, + "step": 196100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.560566425323486, + "loss_rtd": 0.25853967666625977, + "loss_sent": 0.2929643988609314, + "loss_sod": 0.02935452200472355, + "loss_total": 0.580858588218689, + "step": 196199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.567770004272461, + "loss_rtd": 0.25819912552833557, + "loss_sent": 0.244191512465477, + "loss_sod": 0.08314001560211182, + "loss_total": 0.5855306386947632, + "step": 196199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.7716864347457886, + "learning_rate": 3.6309974240244326e-05, + "loss": 0.4732, + "step": 196200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.5980119705200195, + "loss_rtd": 0.2434452772140503, + "loss_sent": 0.17914623022079468, + "loss_sod": 0.015952210873365402, + "loss_total": 0.4385437071323395, + "step": 196299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.570590972900391, + "loss_rtd": 0.2433573305606842, + "loss_sent": 0.17294123768806458, + "loss_sod": 0.029650865122675896, + "loss_total": 0.4459494352340698, + "step": 196299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.742214024066925, + "learning_rate": 3.6279456379130263e-05, + "loss": 0.4854, + "step": 196300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.177133083343506, + "loss_rtd": 0.23149006068706512, + "loss_sent": 0.005693112034350634, + "loss_sod": 0.09032922983169556, + "loss_total": 0.3275124132633209, + "step": 196399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.464236259460449, + "loss_rtd": 0.23897162079811096, + "loss_sent": 0.389897882938385, + "loss_sod": 0.039472367614507675, + "loss_total": 0.668341875076294, + "step": 196399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.2947161197662354, + "learning_rate": 3.624894404464951e-05, + "loss": 0.4892, + "step": 196400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.962997913360596, + "loss_rtd": 0.22442328929901123, + "loss_sent": 0.01633565127849579, + "loss_sod": 0.13329781591892242, + "loss_total": 0.37405675649642944, + "step": 196499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.450702667236328, + "loss_rtd": 0.24412818253040314, + "loss_sent": 0.32243049144744873, + "loss_sod": 0.02968277968466282, + "loss_total": 0.5962414741516113, + "step": 196499 + }, + { + "epoch": 0.009, + "grad_norm": 1.5923128128051758, + "learning_rate": 3.6218437249092474e-05, + "loss": 0.4792, + "step": 196500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.418288230895996, + "loss_rtd": 0.263407826423645, + "loss_sent": 0.4554015100002289, + "loss_sod": 0.06364298611879349, + "loss_total": 0.7824523448944092, + "step": 196599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.380255222320557, + "loss_rtd": 0.24268421530723572, + "loss_sent": 0.12270950525999069, + "loss_sod": 0.09936580806970596, + "loss_total": 0.4647595286369324, + "step": 196599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.7431308031082153, + "learning_rate": 3.6187936004747245e-05, + "loss": 0.475, + "step": 196600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.645377159118652, + "loss_rtd": 0.25081971287727356, + "loss_sent": 0.2932474613189697, + "loss_sod": 0.0416770838201046, + "loss_total": 0.5857442617416382, + "step": 196699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.848897457122803, + "loss_rtd": 0.22943075001239777, + "loss_sent": 0.0503636933863163, + "loss_sod": 0.06484819203615189, + "loss_total": 0.34464263916015625, + "step": 196699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.7886890769004822, + "learning_rate": 3.615744032389976e-05, + "loss": 0.4823, + "step": 196700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.389031887054443, + "loss_rtd": 0.2638556659221649, + "loss_sent": 0.11603834480047226, + "loss_sod": 0.07925192266702652, + "loss_total": 0.4591459333896637, + "step": 196799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.689804553985596, + "loss_rtd": 0.2472999393939972, + "loss_sent": 0.10590516775846481, + "loss_sod": 0.07291129976511002, + "loss_total": 0.426116406917572, + "step": 196799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.5039016008377075, + "learning_rate": 3.612695021883366e-05, + "loss": 0.4964, + "step": 196800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.939685821533203, + "loss_rtd": 0.23807507753372192, + "loss_sent": 0.004213188774883747, + "loss_sod": 0.06944707036018372, + "loss_total": 0.3117353320121765, + "step": 196899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.869617938995361, + "loss_rtd": 0.22580496966838837, + "loss_sent": 0.004214581102132797, + "loss_sod": 0.1402873396873474, + "loss_total": 0.37030690908432007, + "step": 196899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.9130590558052063, + "learning_rate": 3.609646570183033e-05, + "loss": 0.4772, + "step": 196900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.155139446258545, + "loss_rtd": 0.2475593388080597, + "loss_sent": 0.24393866956233978, + "loss_sod": 0.11925341188907623, + "loss_total": 0.6107514500617981, + "step": 196999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.7448320388793945, + "loss_rtd": 0.2576092779636383, + "loss_sent": 0.19933441281318665, + "loss_sod": 0.021516328677535057, + "loss_total": 0.47846001386642456, + "step": 196999 + }, + { + "epoch": 0.01, + "grad_norm": 1.9248608350753784, + "learning_rate": 3.606598678516897e-05, + "loss": 0.4839, + "step": 197000 + }, + { + "epoch": 0.01, + "eval_loss": 0.45972615480422974, + "eval_runtime": 150.855, + "eval_samples_per_second": 102.37, + "eval_steps_per_second": 0.802, + "step": 197000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.392733097076416, + "loss_rtd": 0.24464982748031616, + "loss_sent": 0.26841703057289124, + "loss_sod": 0.09383723884820938, + "loss_total": 0.6069040894508362, + "step": 197099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.601526737213135, + "loss_rtd": 0.25071266293525696, + "loss_sent": 0.13358637690544128, + "loss_sod": 0.026629824191331863, + "loss_total": 0.410928875207901, + "step": 197099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.9692697525024414, + "learning_rate": 3.603551348112646e-05, + "loss": 0.4612, + "step": 197100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.954998016357422, + "loss_rtd": 0.2352873533964157, + "loss_sent": 0.01572391204535961, + "loss_sod": 0.03777249529957771, + "loss_total": 0.2887837588787079, + "step": 197199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.429396629333496, + "loss_rtd": 0.26083338260650635, + "loss_sent": 0.06994747370481491, + "loss_sod": 0.11270473152399063, + "loss_total": 0.4434855878353119, + "step": 197199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.049393892288208, + "learning_rate": 3.600504580197746e-05, + "loss": 0.4724, + "step": 197200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.400674343109131, + "loss_rtd": 0.2626436948776245, + "loss_sent": 0.7087818384170532, + "loss_sod": 0.10867201536893845, + "loss_total": 1.0800975561141968, + "step": 197299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.000377178192139, + "loss_rtd": 0.226999431848526, + "loss_sent": 0.2798299193382263, + "loss_sod": 0.036855921149253845, + "loss_total": 0.543685257434845, + "step": 197299 + }, + { + "epoch": 0.0106, + "grad_norm": 2.1214144229888916, + "learning_rate": 3.597458375999432e-05, + "loss": 0.4891, + "step": 197300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.523964881896973, + "loss_rtd": 0.2674618363380432, + "loss_sent": 0.2527769207954407, + "loss_sod": 0.037127744406461716, + "loss_total": 0.5573664903640747, + "step": 197399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.307008743286133, + "loss_rtd": 0.24416065216064453, + "loss_sent": 0.6054388284683228, + "loss_sod": 0.014565913006663322, + "loss_total": 0.8641654253005981, + "step": 197399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.8312748670578003, + "learning_rate": 3.5944127367447176e-05, + "loss": 0.4766, + "step": 197400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.318966865539551, + "loss_rtd": 0.23821872472763062, + "loss_sent": 0.08623597025871277, + "loss_sod": 0.00946864951401949, + "loss_total": 0.33392333984375, + "step": 197499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.584272861480713, + "loss_rtd": 0.23597374558448792, + "loss_sent": 0.330825537443161, + "loss_sod": 0.08103427290916443, + "loss_total": 0.6478335857391357, + "step": 197499 + }, + { + "epoch": 0.011, + "grad_norm": 1.9473567008972168, + "learning_rate": 3.591367663660384e-05, + "loss": 0.4811, + "step": 197500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.993553161621094, + "loss_rtd": 0.2804194986820221, + "loss_sent": 0.05455870181322098, + "loss_sod": 0.03569484502077103, + "loss_total": 0.3706730306148529, + "step": 197599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.681451320648193, + "loss_rtd": 0.2488894909620285, + "loss_sent": 0.3947000801563263, + "loss_sod": 0.06296908110380173, + "loss_total": 0.7065586447715759, + "step": 197599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9109740853309631, + "learning_rate": 3.588323157972988e-05, + "loss": 0.4914, + "step": 197600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.435794353485107, + "loss_rtd": 0.25761738419532776, + "loss_sent": 0.2595762312412262, + "loss_sod": 0.04328524321317673, + "loss_total": 0.5604788661003113, + "step": 197699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.800150394439697, + "loss_rtd": 0.24064289033412933, + "loss_sent": 0.1380920112133026, + "loss_sod": 0.046730682253837585, + "loss_total": 0.42546558380126953, + "step": 197699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.6069086790084839, + "learning_rate": 3.585279220908854e-05, + "loss": 0.4721, + "step": 197700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.557639122009277, + "loss_rtd": 0.246734157204628, + "loss_sent": 0.1797482669353485, + "loss_sod": 0.05293069779872894, + "loss_total": 0.47941312193870544, + "step": 197799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.796323776245117, + "loss_rtd": 0.23539410531520844, + "loss_sent": 0.24460507929325104, + "loss_sod": 0.0563870333135128, + "loss_total": 0.5363861918449402, + "step": 197799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.7594579458236694, + "learning_rate": 3.582235853694082e-05, + "loss": 0.476, + "step": 197800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.968145370483398, + "loss_rtd": 0.2510281801223755, + "loss_sent": 0.10709671676158905, + "loss_sod": 0.018377184867858887, + "loss_total": 0.3765020966529846, + "step": 197899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.510294437408447, + "loss_rtd": 0.24616675078868866, + "loss_sent": 0.37081557512283325, + "loss_sod": 0.03428245335817337, + "loss_total": 0.6512647867202759, + "step": 197899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.0171705484390259, + "learning_rate": 3.5791930575545377e-05, + "loss": 0.481, + "step": 197900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.905046463012695, + "loss_rtd": 0.22450122237205505, + "loss_sent": 0.013790993019938469, + "loss_sod": 0.0523202121257782, + "loss_total": 0.29061242938041687, + "step": 197999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.369077205657959, + "loss_rtd": 0.23604454100131989, + "loss_sent": 0.01143306028097868, + "loss_sod": 0.13096845149993896, + "loss_total": 0.3784460425376892, + "step": 197999 + }, + { + "epoch": 0.012, + "grad_norm": 0.7122476100921631, + "learning_rate": 3.57615083371586e-05, + "loss": 0.488, + "step": 198000 + }, + { + "epoch": 0.012, + "eval_loss": 0.44978225231170654, + "eval_runtime": 150.9294, + "eval_samples_per_second": 102.319, + "eval_steps_per_second": 0.802, + "step": 198000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.49887752532959, + "loss_rtd": 0.2237449735403061, + "loss_sent": 0.20722432434558868, + "loss_sod": 0.12202988564968109, + "loss_total": 0.5529991984367371, + "step": 198099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.660391330718994, + "loss_rtd": 0.21641302108764648, + "loss_sent": 0.019336359575390816, + "loss_sod": 0.016802601516246796, + "loss_total": 0.25255200266838074, + "step": 198099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.9483657479286194, + "learning_rate": 3.573109183403456e-05, + "loss": 0.4686, + "step": 198100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.383784294128418, + "loss_rtd": 0.2824476361274719, + "loss_sent": 0.3660046458244324, + "loss_sod": 0.030124733224511147, + "loss_total": 0.6785770058631897, + "step": 198199 + }, + { + "epoch": 0.012398, + "loss_gen": 6.0347113609313965, + "loss_rtd": 0.25417864322662354, + "loss_sent": 0.08456467092037201, + "loss_sod": 0.08676651865243912, + "loss_total": 0.42550981044769287, + "step": 198199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.4950041770935059, + "learning_rate": 3.570068107842503e-05, + "loss": 0.4627, + "step": 198200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.719231128692627, + "loss_rtd": 0.2484939843416214, + "loss_sent": 0.09971030056476593, + "loss_sod": 0.10634118318557739, + "loss_total": 0.45454543828964233, + "step": 198299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.544730186462402, + "loss_rtd": 0.2531845271587372, + "loss_sent": 0.22829610109329224, + "loss_sod": 0.1020435094833374, + "loss_total": 0.5835241079330444, + "step": 198299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.1413614749908447, + "learning_rate": 3.567027608257945e-05, + "loss": 0.4828, + "step": 198300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.189231872558594, + "loss_rtd": 0.2279975712299347, + "loss_sent": 0.06486871838569641, + "loss_sod": 0.00765463849529624, + "loss_total": 0.3005209267139435, + "step": 198399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.541438102722168, + "loss_rtd": 0.23647157847881317, + "loss_sent": 0.036401890218257904, + "loss_sod": 0.03704839199781418, + "loss_total": 0.30992186069488525, + "step": 198399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.569990873336792, + "learning_rate": 3.5639876858744945e-05, + "loss": 0.4726, + "step": 198400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.509321689605713, + "loss_rtd": 0.25453636050224304, + "loss_sent": 0.2132350355386734, + "loss_sod": 0.02199200913310051, + "loss_total": 0.48976337909698486, + "step": 198499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.463100433349609, + "loss_rtd": 0.24901433289051056, + "loss_sent": 0.2661890685558319, + "loss_sod": 0.028923040255904198, + "loss_total": 0.5441264510154724, + "step": 198499 + }, + { + "epoch": 0.013, + "grad_norm": 1.5464208126068115, + "learning_rate": 3.5609483419166335e-05, + "loss": 0.483, + "step": 198500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.566218376159668, + "loss_rtd": 0.24107764661312103, + "loss_sent": 0.0769277960062027, + "loss_sod": 0.05446183308959007, + "loss_total": 0.3724672794342041, + "step": 198599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.630220413208008, + "loss_rtd": 0.2496069222688675, + "loss_sent": 0.005942351184785366, + "loss_sod": 0.18867163360118866, + "loss_total": 0.4442209005355835, + "step": 198599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.7546682953834534, + "learning_rate": 3.557909577608607e-05, + "loss": 0.4859, + "step": 198600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.5324506759643555, + "loss_rtd": 0.25724464654922485, + "loss_sent": 0.2177625298500061, + "loss_sod": 0.04412589967250824, + "loss_total": 0.5191330909729004, + "step": 198699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.545251846313477, + "loss_rtd": 0.24980393052101135, + "loss_sent": 0.08665682375431061, + "loss_sod": 0.021374624222517014, + "loss_total": 0.3578353524208069, + "step": 198699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.0699278116226196, + "learning_rate": 3.5548713941744305e-05, + "loss": 0.4846, + "step": 198700 + }, + { + "epoch": 0.013598, + "loss_gen": 6.274804592132568, + "loss_rtd": 0.24994046986103058, + "loss_sent": 0.1418921798467636, + "loss_sod": 0.10209321230649948, + "loss_total": 0.49392586946487427, + "step": 198799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.959682464599609, + "loss_rtd": 0.26186472177505493, + "loss_sent": 0.11118325591087341, + "loss_sod": 0.021411027759313583, + "loss_total": 0.3944590091705322, + "step": 198799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.8901879787445068, + "learning_rate": 3.551833792837883e-05, + "loss": 0.4829, + "step": 198800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.676726818084717, + "loss_rtd": 0.24532736837863922, + "loss_sent": 0.21729162335395813, + "loss_sod": 0.014285311102867126, + "loss_total": 0.4769043028354645, + "step": 198899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.671530723571777, + "loss_rtd": 0.269016832113266, + "loss_sent": 0.22904743254184723, + "loss_sod": 0.029550496488809586, + "loss_total": 0.5276147723197937, + "step": 198899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.8333650231361389, + "learning_rate": 3.5487967748225124e-05, + "loss": 0.4979, + "step": 198900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.445819854736328, + "loss_rtd": 0.25361278653144836, + "loss_sent": 0.18403823673725128, + "loss_sod": 0.03664318472146988, + "loss_total": 0.47429418563842773, + "step": 198999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.973983287811279, + "loss_rtd": 0.2397686392068863, + "loss_sent": 0.04354400560259819, + "loss_sod": 0.14111952483654022, + "loss_total": 0.4244321584701538, + "step": 198999 + }, + { + "epoch": 0.014, + "grad_norm": 0.7778320908546448, + "learning_rate": 3.545760341351625e-05, + "loss": 0.4703, + "step": 199000 + }, + { + "epoch": 0.014, + "eval_loss": 0.46094122529029846, + "eval_runtime": 150.9227, + "eval_samples_per_second": 102.324, + "eval_steps_per_second": 0.802, + "step": 199000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.699524879455566, + "loss_rtd": 0.25735366344451904, + "loss_sent": 0.4843277335166931, + "loss_sod": 0.019896212965250015, + "loss_total": 0.7615776062011719, + "step": 199099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.988870143890381, + "loss_rtd": 0.22112753987312317, + "loss_sent": 0.07448185980319977, + "loss_sod": 0.1123475506901741, + "loss_total": 0.40795695781707764, + "step": 199099 + }, + { + "epoch": 0.0142, + "grad_norm": 2.0350053310394287, + "learning_rate": 3.542724493648301e-05, + "loss": 0.4829, + "step": 199100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.903702735900879, + "loss_rtd": 0.21473678946495056, + "loss_sent": 0.135924831032753, + "loss_sod": 0.035691987723112106, + "loss_total": 0.38635361194610596, + "step": 199199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.458212375640869, + "loss_rtd": 0.2825135886669159, + "loss_sent": 0.2972809672355652, + "loss_sod": 0.09804816544055939, + "loss_total": 0.6778427362442017, + "step": 199199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.107661485671997, + "learning_rate": 3.5396892329353737e-05, + "loss": 0.476, + "step": 199200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.409221172332764, + "loss_rtd": 0.23438437283039093, + "loss_sent": 0.16067595779895782, + "loss_sod": 0.02327580936253071, + "loss_total": 0.4183361530303955, + "step": 199299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.573988914489746, + "loss_rtd": 0.23705151677131653, + "loss_sent": 0.25113776326179504, + "loss_sod": 0.0523638054728508, + "loss_total": 0.540553092956543, + "step": 199299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.7476825714111328, + "learning_rate": 3.536654560435451e-05, + "loss": 0.4751, + "step": 199300 + }, + { + "epoch": 0.014798, + "loss_gen": 4.950931072235107, + "loss_rtd": 0.21000342071056366, + "loss_sent": 0.07651349902153015, + "loss_sod": 0.13793537020683289, + "loss_total": 0.4244522750377655, + "step": 199399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.381289005279541, + "loss_rtd": 0.23568496108055115, + "loss_sent": 0.20836390554904938, + "loss_sod": 0.02635544165968895, + "loss_total": 0.47040432691574097, + "step": 199399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.5315524339675903, + "learning_rate": 3.533620477370895e-05, + "loss": 0.4691, + "step": 199400 + }, + { + "epoch": 0.014998, + "loss_gen": 6.159007549285889, + "loss_rtd": 0.2743111252784729, + "loss_sent": 0.06190488860011101, + "loss_sod": 0.1761845052242279, + "loss_total": 0.5124005079269409, + "step": 199499 + }, + { + "epoch": 0.014998, + "loss_gen": 6.015754699707031, + "loss_rtd": 0.24755114316940308, + "loss_sent": 0.17315152287483215, + "loss_sod": 0.09640425443649292, + "loss_total": 0.5171068906784058, + "step": 199499 + }, + { + "epoch": 0.015, + "grad_norm": 1.2112517356872559, + "learning_rate": 3.5305869849638365e-05, + "loss": 0.4766, + "step": 199500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.525232315063477, + "loss_rtd": 0.25431257486343384, + "loss_sent": 0.06290171295404434, + "loss_sod": 0.027303146198391914, + "loss_total": 0.34451743960380554, + "step": 199599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.814101219177246, + "loss_rtd": 0.23179247975349426, + "loss_sent": 0.015164760872721672, + "loss_sod": 0.08381253480911255, + "loss_total": 0.33076977729797363, + "step": 199599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.9078371524810791, + "learning_rate": 3.527554084436163e-05, + "loss": 0.4777, + "step": 199600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.657907485961914, + "loss_rtd": 0.2604738473892212, + "loss_sent": 0.33068281412124634, + "loss_sod": 0.017987214028835297, + "loss_total": 0.609143853187561, + "step": 199699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.430020809173584, + "loss_rtd": 0.261460542678833, + "loss_sent": 0.08397623896598816, + "loss_sod": 0.0045923409052193165, + "loss_total": 0.3500291109085083, + "step": 199699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.057021975517273, + "learning_rate": 3.52452177700953e-05, + "loss": 0.4704, + "step": 199700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.489025592803955, + "loss_rtd": 0.2676151990890503, + "loss_sent": 0.2924831509590149, + "loss_sod": 0.03531592711806297, + "loss_total": 0.5954142808914185, + "step": 199799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.577322483062744, + "loss_rtd": 0.23318113386631012, + "loss_sent": 0.1402457058429718, + "loss_sod": 0.010187160223722458, + "loss_total": 0.3836140036582947, + "step": 199799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.8963132500648499, + "learning_rate": 3.5214900639053474e-05, + "loss": 0.4863, + "step": 199800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.575000762939453, + "loss_rtd": 0.25459951162338257, + "loss_sent": 0.09506849944591522, + "loss_sod": 0.046479783952236176, + "loss_total": 0.39614778757095337, + "step": 199899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.3990478515625, + "loss_rtd": 0.27562594413757324, + "loss_sent": 0.3519396483898163, + "loss_sod": 0.021528389304876328, + "loss_total": 0.6490939855575562, + "step": 199899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.0373303890228271, + "learning_rate": 3.5184589463447916e-05, + "loss": 0.4584, + "step": 199900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.785987377166748, + "loss_rtd": 0.23527689278125763, + "loss_sent": 0.3650191128253937, + "loss_sod": 0.07376965880393982, + "loss_total": 0.6740657091140747, + "step": 199999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.684935569763184, + "loss_rtd": 0.2589579224586487, + "loss_sent": 0.2833992838859558, + "loss_sod": 0.014439928345382214, + "loss_total": 0.5567971467971802, + "step": 199999 + }, + { + "epoch": 0.016, + "grad_norm": 1.6609793901443481, + "learning_rate": 3.5154284255487945e-05, + "loss": 0.4624, + "step": 200000 + }, + { + "epoch": 0.016, + "eval_loss": 0.45440730452537537, + "eval_runtime": 151.123, + "eval_samples_per_second": 102.188, + "eval_steps_per_second": 0.801, + "step": 200000 + }, + { + "epoch": 0.016198, + "loss_gen": 6.422415256500244, + "loss_rtd": 0.2642577886581421, + "loss_sent": 0.052832260727882385, + "loss_sod": 0.13694913685321808, + "loss_total": 0.45403915643692017, + "step": 200099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.789581298828125, + "loss_rtd": 0.25363168120384216, + "loss_sent": 0.171161949634552, + "loss_sod": 0.011497782543301582, + "loss_total": 0.4362914264202118, + "step": 200099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.7341986894607544, + "learning_rate": 3.51239850273805e-05, + "loss": 0.4692, + "step": 200100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.698653221130371, + "loss_rtd": 0.26942676305770874, + "loss_sent": 0.24518704414367676, + "loss_sod": 0.006007281132042408, + "loss_total": 0.5206210613250732, + "step": 200199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.7527923583984375, + "loss_rtd": 0.2350677251815796, + "loss_sent": 0.25851503014564514, + "loss_sod": 0.04473987966775894, + "loss_total": 0.5383226275444031, + "step": 200199 + }, + { + "epoch": 0.0164, + "grad_norm": 2.361886501312256, + "learning_rate": 3.509369179133011e-05, + "loss": 0.4804, + "step": 200200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.395769119262695, + "loss_rtd": 0.26046210527420044, + "loss_sent": 0.14961999654769897, + "loss_sod": 0.03154686838388443, + "loss_total": 0.44162896275520325, + "step": 200299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.759584426879883, + "loss_rtd": 0.2427106350660324, + "loss_sent": 0.36849498748779297, + "loss_sod": 0.07145502418279648, + "loss_total": 0.6826606392860413, + "step": 200299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.9213209748268127, + "learning_rate": 3.506340455953887e-05, + "loss": 0.4711, + "step": 200300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.742183685302734, + "loss_rtd": 0.23548494279384613, + "loss_sent": 0.14038434624671936, + "loss_sod": 0.0410086065530777, + "loss_total": 0.4168778955936432, + "step": 200399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.646134376525879, + "loss_rtd": 0.23521903157234192, + "loss_sent": 0.10856125503778458, + "loss_sod": 0.13441947102546692, + "loss_total": 0.4781997501850128, + "step": 200399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.8273062705993652, + "learning_rate": 3.50331233442065e-05, + "loss": 0.4698, + "step": 200400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.391009330749512, + "loss_rtd": 0.25922268629074097, + "loss_sent": 0.22874972224235535, + "loss_sod": 0.159585639834404, + "loss_total": 0.6475580930709839, + "step": 200499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.653717994689941, + "loss_rtd": 0.2337595820426941, + "loss_sent": 0.10643959790468216, + "loss_sod": 0.009374690242111683, + "loss_total": 0.34957388043403625, + "step": 200499 + }, + { + "epoch": 0.017, + "grad_norm": 1.0434125661849976, + "learning_rate": 3.500284815753025e-05, + "loss": 0.492, + "step": 200500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.748563766479492, + "loss_rtd": 0.22266308963298798, + "loss_sent": 3.562410347512923e-05, + "loss_sod": 0.1355685293674469, + "loss_total": 0.358267217874527, + "step": 200599 + }, + { + "epoch": 0.017198, + "loss_gen": 4.907251358032227, + "loss_rtd": 0.21454456448554993, + "loss_sent": 0.02917483262717724, + "loss_sod": 0.17345693707466125, + "loss_total": 0.41717633605003357, + "step": 200599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.0818666219711304, + "learning_rate": 3.497257901170497e-05, + "loss": 0.4957, + "step": 200600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.9142842292785645, + "loss_rtd": 0.25485530495643616, + "loss_sent": 0.09352286159992218, + "loss_sod": 0.028299875557422638, + "loss_total": 0.3766780495643616, + "step": 200699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.5983428955078125, + "loss_rtd": 0.25049030780792236, + "loss_sent": 0.19364747405052185, + "loss_sod": 0.04774875566363335, + "loss_total": 0.49188652634620667, + "step": 200699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.9354696273803711, + "learning_rate": 3.494231591892307e-05, + "loss": 0.4801, + "step": 200700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.382093906402588, + "loss_rtd": 0.24713316559791565, + "loss_sent": 0.032326988875865936, + "loss_sod": 0.07704608142375946, + "loss_total": 0.35650622844696045, + "step": 200799 + }, + { + "epoch": 0.017598, + "loss_gen": 4.812257289886475, + "loss_rtd": 0.21914120018482208, + "loss_sent": 2.922447310993448e-05, + "loss_sod": 0.12229741364717484, + "loss_total": 0.34146785736083984, + "step": 200799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.7055344581604004, + "learning_rate": 3.4912058891374525e-05, + "loss": 0.465, + "step": 200800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.679133892059326, + "loss_rtd": 0.2276810258626938, + "loss_sent": 0.15405045449733734, + "loss_sod": 0.02448256127536297, + "loss_total": 0.40621405839920044, + "step": 200899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.473849773406982, + "loss_rtd": 0.2811950147151947, + "loss_sent": 0.43161195516586304, + "loss_sod": 0.017364630475640297, + "loss_total": 0.7301715612411499, + "step": 200899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.1705329418182373, + "learning_rate": 3.4881807941246844e-05, + "loss": 0.488, + "step": 200900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.494246006011963, + "loss_rtd": 0.26030996441841125, + "loss_sent": 0.24041537940502167, + "loss_sod": 0.01098925806581974, + "loss_total": 0.5117145776748657, + "step": 200999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.916234970092773, + "loss_rtd": 0.26386576890945435, + "loss_sent": 0.11231882870197296, + "loss_sod": 0.08995687961578369, + "loss_total": 0.4661414623260498, + "step": 200999 + }, + { + "epoch": 0.018, + "grad_norm": 0.74242103099823, + "learning_rate": 3.485156308072512e-05, + "loss": 0.4635, + "step": 201000 + }, + { + "epoch": 0.018, + "eval_loss": 0.45967555046081543, + "eval_runtime": 152.3511, + "eval_samples_per_second": 101.365, + "eval_steps_per_second": 0.794, + "step": 201000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.238211154937744, + "loss_rtd": 0.22699862718582153, + "loss_sent": 0.16258226335048676, + "loss_sod": 0.06895344704389572, + "loss_total": 0.4585343599319458, + "step": 201099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.443732738494873, + "loss_rtd": 0.2511044442653656, + "loss_sent": 0.18242928385734558, + "loss_sod": 0.00869907345622778, + "loss_total": 0.44223278760910034, + "step": 201099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.223291277885437, + "learning_rate": 3.482132432199197e-05, + "loss": 0.4699, + "step": 201100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.6898393630981445, + "loss_rtd": 0.2735462188720703, + "loss_sent": 0.25486263632774353, + "loss_sod": 0.017370786517858505, + "loss_total": 0.5457796454429626, + "step": 201199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.367513179779053, + "loss_rtd": 0.25148075819015503, + "loss_sent": 0.19145554304122925, + "loss_sod": 0.024947544559836388, + "loss_total": 0.4678838551044464, + "step": 201199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.9793940186500549, + "learning_rate": 3.479109167722757e-05, + "loss": 0.4631, + "step": 201200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.831352710723877, + "loss_rtd": 0.2631663382053375, + "loss_sent": 0.4253993332386017, + "loss_sod": 0.08521652966737747, + "loss_total": 0.7737821936607361, + "step": 201299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.7043585777282715, + "loss_rtd": 0.2361105978488922, + "loss_sent": 0.02994850091636181, + "loss_sod": 0.09454520046710968, + "loss_total": 0.36060431599617004, + "step": 201299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.8844496011734009, + "learning_rate": 3.476086515860965e-05, + "loss": 0.478, + "step": 201300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.68744421005249, + "loss_rtd": 0.2573264539241791, + "loss_sent": 0.070601686835289, + "loss_sod": 0.02096596546471119, + "loss_total": 0.3488941192626953, + "step": 201399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.503346920013428, + "loss_rtd": 0.254792183637619, + "loss_sent": 0.0630236342549324, + "loss_sod": 0.04250772297382355, + "loss_total": 0.36032354831695557, + "step": 201399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.7962280511856079, + "learning_rate": 3.47306447783134e-05, + "loss": 0.4777, + "step": 201400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.533423900604248, + "loss_rtd": 0.2535945773124695, + "loss_sent": 0.2537531852722168, + "loss_sod": 0.04877634719014168, + "loss_total": 0.5561240911483765, + "step": 201499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.3803863525390625, + "loss_rtd": 0.23618975281715393, + "loss_sent": 0.10412287712097168, + "loss_sod": 0.02499981038272381, + "loss_total": 0.3653124272823334, + "step": 201499 + }, + { + "epoch": 0.019, + "grad_norm": 0.7263472676277161, + "learning_rate": 3.47004305485116e-05, + "loss": 0.4762, + "step": 201500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.453300476074219, + "loss_rtd": 0.2398272007703781, + "loss_sent": 0.25099968910217285, + "loss_sod": 0.010356992483139038, + "loss_total": 0.5011838674545288, + "step": 201599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.467883110046387, + "loss_rtd": 0.25183042883872986, + "loss_sent": 0.34147679805755615, + "loss_sod": 0.054517049342393875, + "loss_total": 0.6478242874145508, + "step": 201599 + }, + { + "epoch": 0.0192, + "grad_norm": 2.130498170852661, + "learning_rate": 3.467022248137455e-05, + "loss": 0.4992, + "step": 201600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.547444820404053, + "loss_rtd": 0.24827061593532562, + "loss_sent": 0.0749385729432106, + "loss_sod": 0.02044161595404148, + "loss_total": 0.34365078806877136, + "step": 201699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.6619343757629395, + "loss_rtd": 0.2587766945362091, + "loss_sent": 0.15140384435653687, + "loss_sod": 0.041210610419511795, + "loss_total": 0.45139116048812866, + "step": 201699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.8851449489593506, + "learning_rate": 3.464002058907004e-05, + "loss": 0.4673, + "step": 201700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.297020435333252, + "loss_rtd": 0.2192527800798416, + "loss_sent": 0.10825800150632858, + "loss_sod": 0.02537507191300392, + "loss_total": 0.3528858423233032, + "step": 201799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.737192153930664, + "loss_rtd": 0.24014760553836823, + "loss_sent": 0.2475651055574417, + "loss_sod": 0.0290079228579998, + "loss_total": 0.5167206525802612, + "step": 201799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.6454467177391052, + "learning_rate": 3.460982488376342e-05, + "loss": 0.4925, + "step": 201800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.807116508483887, + "loss_rtd": 0.23687048256397247, + "loss_sent": 0.10783880949020386, + "loss_sod": 0.05488898605108261, + "loss_total": 0.39959827065467834, + "step": 201899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.60234260559082, + "loss_rtd": 0.24860233068466187, + "loss_sent": 0.6529000401496887, + "loss_sod": 0.05441371351480484, + "loss_total": 0.9559160470962524, + "step": 201899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.5367225408554077, + "learning_rate": 3.4579635377617485e-05, + "loss": 0.4866, + "step": 201900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.0976762771606445, + "loss_rtd": 0.2277284413576126, + "loss_sent": 3.441545050009154e-05, + "loss_sod": 0.19301730394363403, + "loss_total": 0.42078015208244324, + "step": 201999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.853547096252441, + "loss_rtd": 0.22184228897094727, + "loss_sent": 0.0005072517087683082, + "loss_sod": 0.10036781430244446, + "loss_total": 0.3227173686027527, + "step": 201999 + }, + { + "epoch": 0.02, + "grad_norm": 0.8136307597160339, + "learning_rate": 3.4549452082792585e-05, + "loss": 0.457, + "step": 202000 + }, + { + "epoch": 0.02, + "eval_loss": 0.45421433448791504, + "eval_runtime": 151.0851, + "eval_samples_per_second": 102.214, + "eval_steps_per_second": 0.801, + "step": 202000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.407601356506348, + "loss_rtd": 0.25276073813438416, + "loss_sent": 0.2210267335176468, + "loss_sod": 0.01045980490744114, + "loss_total": 0.48424726724624634, + "step": 202099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.363122940063477, + "loss_rtd": 0.257572740316391, + "loss_sent": 0.1861880123615265, + "loss_sod": 0.0553722158074379, + "loss_total": 0.4991329610347748, + "step": 202099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.7221114039421082, + "learning_rate": 3.451927501144653e-05, + "loss": 0.4872, + "step": 202100 + }, + { + "epoch": 0.020398, + "loss_gen": 4.728961944580078, + "loss_rtd": 0.21321223676204681, + "loss_sent": 3.206491965102032e-05, + "loss_sod": 0.2221163958311081, + "loss_total": 0.43536069989204407, + "step": 202199 + }, + { + "epoch": 0.020398, + "loss_gen": 4.939293384552002, + "loss_rtd": 0.24339261651039124, + "loss_sent": 0.06533413380384445, + "loss_sod": 0.051401764154434204, + "loss_total": 0.3601285219192505, + "step": 202199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.0307354927062988, + "learning_rate": 3.448910417573465e-05, + "loss": 0.4602, + "step": 202200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.139678955078125, + "loss_rtd": 0.23202811181545258, + "loss_sent": 4.979508958058432e-05, + "loss_sod": 0.07965496182441711, + "loss_total": 0.31173285841941833, + "step": 202299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.023827075958252, + "loss_rtd": 0.24008287489414215, + "loss_sent": 0.012041668407619, + "loss_sod": 0.16286291182041168, + "loss_total": 0.4149874448776245, + "step": 202299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.039016842842102, + "learning_rate": 3.4458939587809745e-05, + "loss": 0.4503, + "step": 202300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.28715705871582, + "loss_rtd": 0.22881212830543518, + "loss_sent": 0.24012714624404907, + "loss_sod": 0.004217217210680246, + "loss_total": 0.47315651178359985, + "step": 202399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.761211395263672, + "loss_rtd": 0.25692808628082275, + "loss_sent": 0.15823429822921753, + "loss_sod": 0.07715623825788498, + "loss_total": 0.49231863021850586, + "step": 202399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.0622472763061523, + "learning_rate": 3.442878125982213e-05, + "loss": 0.4734, + "step": 202400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.585818290710449, + "loss_rtd": 0.26084500551223755, + "loss_sent": 0.0931270495057106, + "loss_sod": 0.03289801999926567, + "loss_total": 0.38687005639076233, + "step": 202499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.775491714477539, + "loss_rtd": 0.24642148613929749, + "loss_sent": 0.20359787344932556, + "loss_sod": 0.07437972724437714, + "loss_total": 0.5243990421295166, + "step": 202499 + }, + { + "epoch": 0.021, + "grad_norm": 1.4548580646514893, + "learning_rate": 3.4398629203919556e-05, + "loss": 0.4586, + "step": 202500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.792977333068848, + "loss_rtd": 0.23596957325935364, + "loss_sent": 0.09734531491994858, + "loss_sod": 0.07019216567277908, + "loss_total": 0.4035070538520813, + "step": 202599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.22594690322876, + "loss_rtd": 0.2360369712114334, + "loss_sent": 4.6116518205963075e-05, + "loss_sod": 0.14883092045783997, + "loss_total": 0.38491401076316833, + "step": 202599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.2377222776412964, + "learning_rate": 3.436848343224727e-05, + "loss": 0.4865, + "step": 202600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.328969955444336, + "loss_rtd": 0.2357320785522461, + "loss_sent": 0.021743187680840492, + "loss_sod": 0.08085143566131592, + "loss_total": 0.33832669258117676, + "step": 202699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.124855041503906, + "loss_rtd": 0.216110497713089, + "loss_sent": 0.007868066430091858, + "loss_sod": 0.09717868268489838, + "loss_total": 0.32115721702575684, + "step": 202699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.7213184237480164, + "learning_rate": 3.433834395694799e-05, + "loss": 0.4691, + "step": 202700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.366321563720703, + "loss_rtd": 0.2519897222518921, + "loss_sent": 0.015164789743721485, + "loss_sod": 0.06233523041009903, + "loss_total": 0.32948973774909973, + "step": 202799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.971346855163574, + "loss_rtd": 0.22204498946666718, + "loss_sent": 3.21301122312434e-05, + "loss_sod": 0.08021879196166992, + "loss_total": 0.3022959232330322, + "step": 202799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.6737499237060547, + "learning_rate": 3.43082107901619e-05, + "loss": 0.4849, + "step": 202800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.622530460357666, + "loss_rtd": 0.24170541763305664, + "loss_sent": 0.056620679795742035, + "loss_sod": 0.04395980015397072, + "loss_total": 0.3422859013080597, + "step": 202899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.886425971984863, + "loss_rtd": 0.24952994287014008, + "loss_sent": 0.13775122165679932, + "loss_sod": 0.02643515169620514, + "loss_total": 0.41371631622314453, + "step": 202899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.9930436015129089, + "learning_rate": 3.427808394402661e-05, + "loss": 0.4652, + "step": 202900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.827584266662598, + "loss_rtd": 0.23784901201725006, + "loss_sent": 0.3198770582675934, + "loss_sod": 0.07419580966234207, + "loss_total": 0.6319218873977661, + "step": 202999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.748804569244385, + "loss_rtd": 0.2546565532684326, + "loss_sent": 0.37579435110092163, + "loss_sod": 0.047191210091114044, + "loss_total": 0.6776421070098877, + "step": 202999 + }, + { + "epoch": 0.022, + "grad_norm": 1.7178469896316528, + "learning_rate": 3.424796343067724e-05, + "loss": 0.4531, + "step": 203000 + }, + { + "epoch": 0.022, + "eval_loss": 0.44866427779197693, + "eval_runtime": 150.7737, + "eval_samples_per_second": 102.425, + "eval_steps_per_second": 0.803, + "step": 203000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.684000492095947, + "loss_rtd": 0.25918105244636536, + "loss_sent": 0.0925687924027443, + "loss_sod": 0.014433680102229118, + "loss_total": 0.3661835193634033, + "step": 203099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.693911075592041, + "loss_rtd": 0.24773980677127838, + "loss_sent": 0.053045522421598434, + "loss_sod": 0.04356825351715088, + "loss_total": 0.3443535566329956, + "step": 203099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.1181321144104004, + "learning_rate": 3.421784926224632e-05, + "loss": 0.473, + "step": 203100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.202003479003906, + "loss_rtd": 0.21969114243984222, + "loss_sent": 0.028610864654183388, + "loss_sod": 0.14271146059036255, + "loss_total": 0.3910134732723236, + "step": 203199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.214725017547607, + "loss_rtd": 0.23188306391239166, + "loss_sent": 0.041248664259910583, + "loss_sod": 0.0785137414932251, + "loss_total": 0.35164546966552734, + "step": 203199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.9796826839447021, + "learning_rate": 3.418774145086382e-05, + "loss": 0.481, + "step": 203200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.901376247406006, + "loss_rtd": 0.2233228087425232, + "loss_sent": 0.0004384858184494078, + "loss_sod": 0.13029982149600983, + "loss_total": 0.354061096906662, + "step": 203299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.945909023284912, + "loss_rtd": 0.23735138773918152, + "loss_sent": 0.004065847024321556, + "loss_sod": 0.1614292860031128, + "loss_total": 0.4028465151786804, + "step": 203299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.9993460774421692, + "learning_rate": 3.4157640008657174e-05, + "loss": 0.4838, + "step": 203300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.35711145401001, + "loss_rtd": 0.27121755480766296, + "loss_sent": 0.13728365302085876, + "loss_sod": 0.016950685530900955, + "loss_total": 0.4254519045352936, + "step": 203399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.855948448181152, + "loss_rtd": 0.2642308473587036, + "loss_sent": 0.0945664644241333, + "loss_sod": 0.03819169104099274, + "loss_total": 0.39698898792266846, + "step": 203399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.8500588536262512, + "learning_rate": 3.412754494775123e-05, + "loss": 0.4711, + "step": 203400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.399954795837402, + "loss_rtd": 0.26527848839759827, + "loss_sent": 0.16523225605487823, + "loss_sod": 0.052225202322006226, + "loss_total": 0.48273593187332153, + "step": 203499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.9042792320251465, + "loss_rtd": 0.2657572329044342, + "loss_sent": 0.09081815183162689, + "loss_sod": 0.07401497662067413, + "loss_total": 0.4305903911590576, + "step": 203499 + }, + { + "epoch": 0.023, + "grad_norm": 0.8275566101074219, + "learning_rate": 3.4097456280268304e-05, + "loss": 0.4863, + "step": 203500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.517064094543457, + "loss_rtd": 0.24983039498329163, + "loss_sent": 0.5964022278785706, + "loss_sod": 0.012449709698557854, + "loss_total": 0.8586823344230652, + "step": 203599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.628481388092041, + "loss_rtd": 0.2514532506465912, + "loss_sent": 0.161366805434227, + "loss_sod": 0.021773474290966988, + "loss_total": 0.4345935583114624, + "step": 203599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.1252191066741943, + "learning_rate": 3.4067374018328066e-05, + "loss": 0.4583, + "step": 203600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.702511787414551, + "loss_rtd": 0.24112685024738312, + "loss_sent": 0.26646703481674194, + "loss_sod": 0.034668125212192535, + "loss_total": 0.5422620177268982, + "step": 203699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.327409267425537, + "loss_rtd": 0.23813338577747345, + "loss_sent": 0.0379524864256382, + "loss_sod": 0.1470402032136917, + "loss_total": 0.42312607169151306, + "step": 203699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.257358431816101, + "learning_rate": 3.403729817404768e-05, + "loss": 0.4791, + "step": 203700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.931430816650391, + "loss_rtd": 0.2473500519990921, + "loss_sent": 0.1528945118188858, + "loss_sod": 0.028097284957766533, + "loss_total": 0.4283418655395508, + "step": 203799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.680041790008545, + "loss_rtd": 0.2385464310646057, + "loss_sent": 0.04751960188150406, + "loss_sod": 0.03606370836496353, + "loss_total": 0.3221297264099121, + "step": 203799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.6562179327011108, + "learning_rate": 3.400722875954168e-05, + "loss": 0.4803, + "step": 203800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.830761909484863, + "loss_rtd": 0.23952393233776093, + "loss_sent": 0.11855100095272064, + "loss_sod": 0.06821095943450928, + "loss_total": 0.42628592252731323, + "step": 203899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.562859535217285, + "loss_rtd": 0.23691979050636292, + "loss_sent": 0.08273235708475113, + "loss_sod": 0.03769238665699959, + "loss_total": 0.35734453797340393, + "step": 203899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.172888994216919, + "learning_rate": 3.3977165786922016e-05, + "loss": 0.4758, + "step": 203900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.457442283630371, + "loss_rtd": 0.2446732521057129, + "loss_sent": 0.25208550691604614, + "loss_sod": 0.028653493151068687, + "loss_total": 0.5254122614860535, + "step": 203999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.817096710205078, + "loss_rtd": 0.2458757609128952, + "loss_sent": 0.14045284688472748, + "loss_sod": 0.018014002591371536, + "loss_total": 0.4043425917625427, + "step": 203999 + }, + { + "epoch": 0.024, + "grad_norm": 0.9994099736213684, + "learning_rate": 3.394710926829806e-05, + "loss": 0.4809, + "step": 204000 + }, + { + "epoch": 0.024, + "eval_loss": 0.44341379404067993, + "eval_runtime": 151.3617, + "eval_samples_per_second": 102.027, + "eval_steps_per_second": 0.799, + "step": 204000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.479134559631348, + "loss_rtd": 0.25226548314094543, + "loss_sent": 0.21348440647125244, + "loss_sod": 0.08062466979026794, + "loss_total": 0.5463745594024658, + "step": 204099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.48226261138916, + "loss_rtd": 0.25293290615081787, + "loss_sent": 0.19629566371440887, + "loss_sod": 0.09361980855464935, + "loss_total": 0.5428484082221985, + "step": 204099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.9648359417915344, + "learning_rate": 3.391705921577658e-05, + "loss": 0.4906, + "step": 204100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.709273338317871, + "loss_rtd": 0.23943577706813812, + "loss_sent": 0.1579311639070511, + "loss_sod": 0.028613269329071045, + "loss_total": 0.42598021030426025, + "step": 204199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.064204692840576, + "loss_rtd": 0.22587241232395172, + "loss_sent": 4.577033905661665e-05, + "loss_sod": 0.18077200651168823, + "loss_total": 0.40669018030166626, + "step": 204199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.1610053777694702, + "learning_rate": 3.388701564146171e-05, + "loss": 0.4618, + "step": 204200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.936402320861816, + "loss_rtd": 0.2294440120458603, + "loss_sent": 0.166824609041214, + "loss_sod": 0.029074087738990784, + "loss_total": 0.42534270882606506, + "step": 204299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.647017478942871, + "loss_rtd": 0.24903364479541779, + "loss_sent": 0.16078977286815643, + "loss_sod": 0.068865567445755, + "loss_total": 0.4786890149116516, + "step": 204299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.1673539876937866, + "learning_rate": 3.385697855745502e-05, + "loss": 0.4718, + "step": 204300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.273296356201172, + "loss_rtd": 0.23775988817214966, + "loss_sent": 0.21562601625919342, + "loss_sod": 0.019672540947794914, + "loss_total": 0.47305846214294434, + "step": 204399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.458662986755371, + "loss_rtd": 0.24080781638622284, + "loss_sent": 0.07909756898880005, + "loss_sod": 0.02410779520869255, + "loss_total": 0.34401318430900574, + "step": 204399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.3262639045715332, + "learning_rate": 3.3826947975855425e-05, + "loss": 0.4762, + "step": 204400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.764666557312012, + "loss_rtd": 0.2508697807788849, + "loss_sent": 0.11904556304216385, + "loss_sod": 0.12209869921207428, + "loss_total": 0.4920140504837036, + "step": 204499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.455235481262207, + "loss_rtd": 0.2474861890077591, + "loss_sent": 0.2829621732234955, + "loss_sod": 0.045313261449337006, + "loss_total": 0.575761616230011, + "step": 204499 + }, + { + "epoch": 0.025, + "grad_norm": 0.8297535181045532, + "learning_rate": 3.379692390875927e-05, + "loss": 0.4805, + "step": 204500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.957808017730713, + "loss_rtd": 0.2471088021993637, + "loss_sent": 0.05789091810584068, + "loss_sod": 0.0650874674320221, + "loss_total": 0.3700871765613556, + "step": 204599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.533000469207764, + "loss_rtd": 0.24974395334720612, + "loss_sent": 0.1626661866903305, + "loss_sod": 0.0023819920606911182, + "loss_total": 0.41479212045669556, + "step": 204599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.7717977166175842, + "learning_rate": 3.376690636826023e-05, + "loss": 0.479, + "step": 204600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.416359901428223, + "loss_rtd": 0.24626386165618896, + "loss_sent": 0.0010380190797150135, + "loss_sod": 0.10930918902158737, + "loss_total": 0.35661107301712036, + "step": 204699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.751550197601318, + "loss_rtd": 0.21348457038402557, + "loss_sent": 3.4875549317803234e-05, + "loss_sod": 0.07302307337522507, + "loss_total": 0.28654250502586365, + "step": 204699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.7635507583618164, + "learning_rate": 3.373689536644934e-05, + "loss": 0.4802, + "step": 204700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.804357528686523, + "loss_rtd": 0.22278271615505219, + "loss_sent": 0.10689501464366913, + "loss_sod": 0.033710166811943054, + "loss_total": 0.3633878827095032, + "step": 204799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.579931735992432, + "loss_rtd": 0.2518373727798462, + "loss_sent": 0.34698089957237244, + "loss_sod": 0.05123288929462433, + "loss_total": 0.6500511169433594, + "step": 204799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.9754551649093628, + "learning_rate": 3.3706890915415076e-05, + "loss": 0.4751, + "step": 204800 + }, + { + "epoch": 0.025798, + "loss_gen": 4.703709602355957, + "loss_rtd": 0.22530224919319153, + "loss_sent": 0.006922869477421045, + "loss_sod": 0.047425776720047, + "loss_total": 0.27965089678764343, + "step": 204899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.458637237548828, + "loss_rtd": 0.21392790973186493, + "loss_sent": 0.10577099770307541, + "loss_sod": 0.04395810514688492, + "loss_total": 0.36365702748298645, + "step": 204899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.8675275444984436, + "learning_rate": 3.3676893027243185e-05, + "loss": 0.4619, + "step": 204900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.478752613067627, + "loss_rtd": 0.2437988519668579, + "loss_sent": 0.23692891001701355, + "loss_sod": 0.03953733295202255, + "loss_total": 0.5202651023864746, + "step": 204999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.675318717956543, + "loss_rtd": 0.23829564452171326, + "loss_sent": 0.21041101217269897, + "loss_sod": 0.0461123026907444, + "loss_total": 0.49481895565986633, + "step": 204999 + }, + { + "epoch": 0.026, + "grad_norm": 1.635738492012024, + "learning_rate": 3.3646901714016846e-05, + "loss": 0.4671, + "step": 205000 + }, + { + "epoch": 0.026, + "eval_loss": 0.4509928524494171, + "eval_runtime": 150.9297, + "eval_samples_per_second": 102.319, + "eval_steps_per_second": 0.802, + "step": 205000 + }, + { + "epoch": 0.026198, + "loss_gen": 6.056728839874268, + "loss_rtd": 0.23301714658737183, + "loss_sent": 0.17012298107147217, + "loss_sod": 0.09039507061243057, + "loss_total": 0.49353519082069397, + "step": 205099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.634973049163818, + "loss_rtd": 0.21394062042236328, + "loss_sent": 0.11980615556240082, + "loss_sod": 0.048588450998067856, + "loss_total": 0.38233524560928345, + "step": 205099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.3641608953475952, + "learning_rate": 3.3616916987816515e-05, + "loss": 0.467, + "step": 205100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.877918720245361, + "loss_rtd": 0.2511952519416809, + "loss_sent": 0.20603446662425995, + "loss_sod": 0.0371844507753849, + "loss_total": 0.49441415071487427, + "step": 205199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.692530632019043, + "loss_rtd": 0.24305275082588196, + "loss_sent": 0.11774775385856628, + "loss_sod": 0.07681696861982346, + "loss_total": 0.4376174807548523, + "step": 205199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.165781855583191, + "learning_rate": 3.3586938860720084e-05, + "loss": 0.4688, + "step": 205200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.86293363571167, + "loss_rtd": 0.2578810751438141, + "loss_sent": 0.14761590957641602, + "loss_sod": 0.03640330582857132, + "loss_total": 0.4419002830982208, + "step": 205299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.422086238861084, + "loss_rtd": 0.23155608773231506, + "loss_sent": 0.22479580342769623, + "loss_sod": 0.002203958109021187, + "loss_total": 0.45855584740638733, + "step": 205299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.6977037191390991, + "learning_rate": 3.355696734480271e-05, + "loss": 0.4774, + "step": 205300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.680825710296631, + "loss_rtd": 0.23180951178073883, + "loss_sent": 0.11733356863260269, + "loss_sod": 0.08922179043292999, + "loss_total": 0.4383648633956909, + "step": 205399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.45111083984375, + "loss_rtd": 0.22984595596790314, + "loss_sent": 0.27929097414016724, + "loss_sod": 0.04875180870294571, + "loss_total": 0.5578887462615967, + "step": 205399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.9854266047477722, + "learning_rate": 3.352700245213693e-05, + "loss": 0.4862, + "step": 205400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.818627834320068, + "loss_rtd": 0.2522910535335541, + "loss_sent": 0.1124449223279953, + "loss_sod": 0.04227959364652634, + "loss_total": 0.4070155620574951, + "step": 205499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.588534832000732, + "loss_rtd": 0.2587710916996002, + "loss_sent": 0.2575885057449341, + "loss_sod": 0.10087103396654129, + "loss_total": 0.6172306537628174, + "step": 205499 + }, + { + "epoch": 0.027, + "grad_norm": 1.0960627794265747, + "learning_rate": 3.349704419479258e-05, + "loss": 0.4731, + "step": 205500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.415334224700928, + "loss_rtd": 0.230610191822052, + "loss_sent": 0.112730011343956, + "loss_sod": 0.05337172746658325, + "loss_total": 0.39671194553375244, + "step": 205599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.831805229187012, + "loss_rtd": 0.19442704319953918, + "loss_sent": 0.0017129608895629644, + "loss_sod": 0.06312575191259384, + "loss_total": 0.2592657506465912, + "step": 205599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.7432999014854431, + "learning_rate": 3.346709258483687e-05, + "loss": 0.4788, + "step": 205600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.799887180328369, + "loss_rtd": 0.24671250581741333, + "loss_sent": 0.10987219959497452, + "loss_sod": 0.17109833657741547, + "loss_total": 0.5276830792427063, + "step": 205699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.3035759925842285, + "loss_rtd": 0.2797275185585022, + "loss_sent": 0.16637729108333588, + "loss_sod": 0.1263885498046875, + "loss_total": 0.5724933743476868, + "step": 205699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.4277757406234741, + "learning_rate": 3.3437147634334274e-05, + "loss": 0.4798, + "step": 205700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.149640083312988, + "loss_rtd": 0.2670230269432068, + "loss_sent": 0.18647268414497375, + "loss_sod": 0.12289707362651825, + "loss_total": 0.5763927698135376, + "step": 205799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.793913841247559, + "loss_rtd": 0.25202977657318115, + "loss_sent": 0.19943426549434662, + "loss_sod": 0.039625078439712524, + "loss_total": 0.4910891056060791, + "step": 205799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8974087834358215, + "learning_rate": 3.3407209355346644e-05, + "loss": 0.4759, + "step": 205800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.44106912612915, + "loss_rtd": 0.24474363029003143, + "loss_sent": 0.13920697569847107, + "loss_sod": 0.03924485296010971, + "loss_total": 0.4231954514980316, + "step": 205899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.045462131500244, + "loss_rtd": 0.22086864709854126, + "loss_sent": 0.007559431251138449, + "loss_sod": 0.16332033276557922, + "loss_total": 0.3917483985424042, + "step": 205899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.0281552076339722, + "learning_rate": 3.337727775993309e-05, + "loss": 0.4721, + "step": 205900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.836759090423584, + "loss_rtd": 0.20352813601493835, + "loss_sent": 0.01286726351827383, + "loss_sod": 0.07332977652549744, + "loss_total": 0.2897251844406128, + "step": 205999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.748128890991211, + "loss_rtd": 0.21712294220924377, + "loss_sent": 8.002267713891342e-05, + "loss_sod": 0.14717672765254974, + "loss_total": 0.3643796741962433, + "step": 205999 + }, + { + "epoch": 0.028, + "grad_norm": 1.1035751104354858, + "learning_rate": 3.334735286015007e-05, + "loss": 0.4822, + "step": 206000 + }, + { + "epoch": 0.028, + "eval_loss": 0.4547508656978607, + "eval_runtime": 151.0983, + "eval_samples_per_second": 102.205, + "eval_steps_per_second": 0.801, + "step": 206000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.244474411010742, + "loss_rtd": 0.2550910413265228, + "loss_sent": 0.2698170244693756, + "loss_sod": 0.024612342938780785, + "loss_total": 0.5495203733444214, + "step": 206099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.359042644500732, + "loss_rtd": 0.2508499026298523, + "loss_sent": 0.22385083138942719, + "loss_sod": 0.07547710835933685, + "loss_total": 0.5501778721809387, + "step": 206099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.1150691509246826, + "learning_rate": 3.331743466805133e-05, + "loss": 0.4684, + "step": 206100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.788224697113037, + "loss_rtd": 0.26031696796417236, + "loss_sent": 0.19803516566753387, + "loss_sod": 0.08414338529109955, + "loss_total": 0.5424955487251282, + "step": 206199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.410754680633545, + "loss_rtd": 0.28402501344680786, + "loss_sent": 0.08933217823505402, + "loss_sod": 0.008630115538835526, + "loss_total": 0.3819873332977295, + "step": 206199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.027699589729309, + "learning_rate": 3.3287523195687907e-05, + "loss": 0.4682, + "step": 206200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.61097526550293, + "loss_rtd": 0.26154080033302307, + "loss_sent": 0.24920663237571716, + "loss_sod": 0.002796958899125457, + "loss_total": 0.5135443806648254, + "step": 206299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.758017539978027, + "loss_rtd": 0.241214781999588, + "loss_sent": 0.2503407895565033, + "loss_sod": 0.056327708065509796, + "loss_total": 0.5478832721710205, + "step": 206299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.1785681247711182, + "learning_rate": 3.3257618455108154e-05, + "loss": 0.4525, + "step": 206300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.683103084564209, + "loss_rtd": 0.23636199533939362, + "loss_sent": 0.07750055938959122, + "loss_sod": 0.044951193034648895, + "loss_total": 0.35881373286247253, + "step": 206399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.5561628341674805, + "loss_rtd": 0.22717030346393585, + "loss_sent": 0.17105694115161896, + "loss_sod": 0.06201820448040962, + "loss_total": 0.46024543046951294, + "step": 206399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.4344044923782349, + "learning_rate": 3.322772045835767e-05, + "loss": 0.4784, + "step": 206400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.1489176750183105, + "loss_rtd": 0.2281053066253662, + "loss_sent": 0.08055388182401657, + "loss_sod": 0.04174261540174484, + "loss_total": 0.3504018187522888, + "step": 206499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.431298732757568, + "loss_rtd": 0.2533523142337799, + "loss_sent": 0.39194077253341675, + "loss_sod": 0.05683267489075661, + "loss_total": 0.7021257877349854, + "step": 206499 + }, + { + "epoch": 0.029, + "grad_norm": 1.2416197061538696, + "learning_rate": 3.319782921747939e-05, + "loss": 0.4852, + "step": 206500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.777912139892578, + "loss_rtd": 0.2684161365032196, + "loss_sent": 0.6214912533760071, + "loss_sod": 0.10799536108970642, + "loss_total": 0.9979027509689331, + "step": 206599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.7680253982543945, + "loss_rtd": 0.23439471423625946, + "loss_sent": 0.26999813318252563, + "loss_sod": 0.02371506206691265, + "loss_total": 0.5281078815460205, + "step": 206599 + }, + { + "epoch": 0.0292, + "grad_norm": 4.099353790283203, + "learning_rate": 3.316794474451348e-05, + "loss": 0.4565, + "step": 206600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.284374713897705, + "loss_rtd": 0.22979433834552765, + "loss_sent": 0.13422654569149017, + "loss_sod": 0.021393032744526863, + "loss_total": 0.3854139447212219, + "step": 206699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.886496543884277, + "loss_rtd": 0.25662046670913696, + "loss_sent": 0.14732879400253296, + "loss_sod": 0.118320994079113, + "loss_total": 0.5222702622413635, + "step": 206699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.9259575009346008, + "learning_rate": 3.3138067051497425e-05, + "loss": 0.4689, + "step": 206700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.3440470695495605, + "loss_rtd": 0.25219035148620605, + "loss_sent": 0.26719051599502563, + "loss_sod": 0.014408763498067856, + "loss_total": 0.5337896347045898, + "step": 206799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.437257766723633, + "loss_rtd": 0.2473822981119156, + "loss_sent": 0.1943555474281311, + "loss_sod": 0.10320235788822174, + "loss_total": 0.5449402332305908, + "step": 206799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.2678300142288208, + "learning_rate": 3.3108196150465935e-05, + "loss": 0.4548, + "step": 206800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.510725021362305, + "loss_rtd": 0.23764798045158386, + "loss_sent": 0.13177217543125153, + "loss_sod": 0.005060961470007896, + "loss_total": 0.37448111176490784, + "step": 206899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.919556617736816, + "loss_rtd": 0.2585257887840271, + "loss_sent": 0.1994444876909256, + "loss_sod": 0.10045526921749115, + "loss_total": 0.5584255456924438, + "step": 206899 + }, + { + "epoch": 0.0298, + "grad_norm": 0.8215299844741821, + "learning_rate": 3.307833205345103e-05, + "loss": 0.4792, + "step": 206900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.876476287841797, + "loss_rtd": 0.25320500135421753, + "loss_sent": 0.4009746015071869, + "loss_sod": 0.0036950299981981516, + "loss_total": 0.6578746438026428, + "step": 206999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.347562313079834, + "loss_rtd": 0.2552586495876312, + "loss_sent": 0.13582611083984375, + "loss_sod": 0.021963156759738922, + "loss_total": 0.4130479097366333, + "step": 206999 + }, + { + "epoch": 0.03, + "grad_norm": 2.0394182205200195, + "learning_rate": 3.304847477248193e-05, + "loss": 0.4797, + "step": 207000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4456648826599121, + "eval_runtime": 150.9782, + "eval_samples_per_second": 102.286, + "eval_steps_per_second": 0.801, + "step": 207000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.376842975616455, + "loss_rtd": 0.2462959587574005, + "loss_sent": 0.2018681764602661, + "loss_sod": 0.008978866040706635, + "loss_total": 0.45714300870895386, + "step": 207099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.584357738494873, + "loss_rtd": 0.23943877220153809, + "loss_sent": 0.2790125906467438, + "loss_sod": 0.021808486431837082, + "loss_total": 0.540259838104248, + "step": 207099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.0433752536773682, + "learning_rate": 3.301862431958519e-05, + "loss": 0.4795, + "step": 207100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.446674823760986, + "loss_rtd": 0.23925071954727173, + "loss_sent": 0.224978506565094, + "loss_sod": 0.03975839912891388, + "loss_total": 0.5039876103401184, + "step": 207199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.688730716705322, + "loss_rtd": 0.24019432067871094, + "loss_sent": 0.18840710818767548, + "loss_sod": 0.11322510987520218, + "loss_total": 0.5418265461921692, + "step": 207199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.1642003059387207, + "learning_rate": 3.2988780706784515e-05, + "loss": 0.4705, + "step": 207200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.471463203430176, + "loss_rtd": 0.2537890374660492, + "loss_sent": 0.08549313992261887, + "loss_sod": 0.02495107799768448, + "loss_total": 0.36423325538635254, + "step": 207299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.5565032958984375, + "loss_rtd": 0.24373923242092133, + "loss_sent": 0.1720479428768158, + "loss_sod": 0.057914912700653076, + "loss_total": 0.4737021028995514, + "step": 207299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.752086877822876, + "learning_rate": 3.2958943946100963e-05, + "loss": 0.4674, + "step": 207300 + }, + { + "epoch": 0.030798, + "loss_gen": 6.340982913970947, + "loss_rtd": 0.26111024618148804, + "loss_sent": 0.21188463270664215, + "loss_sod": 0.1566849648952484, + "loss_total": 0.6296798586845398, + "step": 207399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.478623867034912, + "loss_rtd": 0.24548162519931793, + "loss_sent": 0.027345668524503708, + "loss_sod": 0.22641485929489136, + "loss_total": 0.4992421269416809, + "step": 207399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.1571487188339233, + "learning_rate": 3.292911404955273e-05, + "loss": 0.4796, + "step": 207400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.6921892166137695, + "loss_rtd": 0.24621668457984924, + "loss_sent": 0.07063555717468262, + "loss_sod": 0.08260297775268555, + "loss_total": 0.3994552195072174, + "step": 207499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.785663604736328, + "loss_rtd": 0.2260243445634842, + "loss_sent": 0.10995034873485565, + "loss_sod": 0.06233404576778412, + "loss_total": 0.39830875396728516, + "step": 207499 + }, + { + "epoch": 0.031, + "grad_norm": 1.4793405532836914, + "learning_rate": 3.2899291029155335e-05, + "loss": 0.4597, + "step": 207500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.756110668182373, + "loss_rtd": 0.25878915190696716, + "loss_sent": 0.2612974941730499, + "loss_sod": 0.02312314696609974, + "loss_total": 0.5432097911834717, + "step": 207599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.803622245788574, + "loss_rtd": 0.2384912073612213, + "loss_sent": 0.31777098774909973, + "loss_sod": 0.023026280105113983, + "loss_total": 0.5792884826660156, + "step": 207599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.9261624813079834, + "learning_rate": 3.286947489692145e-05, + "loss": 0.4743, + "step": 207600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.427712917327881, + "loss_rtd": 0.26596778631210327, + "loss_sent": 0.29693910479545593, + "loss_sod": 0.037313319742679596, + "loss_total": 0.6002202033996582, + "step": 207699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.803351879119873, + "loss_rtd": 0.2527596354484558, + "loss_sent": 0.27306580543518066, + "loss_sod": 0.09535633027553558, + "loss_total": 0.6211817860603333, + "step": 207699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.2428370714187622, + "learning_rate": 3.2839665664861044e-05, + "loss": 0.472, + "step": 207700 + }, + { + "epoch": 0.031598, + "loss_gen": 4.810303688049316, + "loss_rtd": 0.21520909667015076, + "loss_sent": 0.0006225758697837591, + "loss_sod": 0.07345493137836456, + "loss_total": 0.28928661346435547, + "step": 207799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.153961658477783, + "loss_rtd": 0.21760773658752441, + "loss_sent": 0.002753297798335552, + "loss_sod": 0.11247245222330093, + "loss_total": 0.332833468914032, + "step": 207799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.764484703540802, + "learning_rate": 3.280986334498125e-05, + "loss": 0.4687, + "step": 207800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.668840408325195, + "loss_rtd": 0.2228214293718338, + "loss_sent": 0.21543559432029724, + "loss_sod": 0.010019434615969658, + "loss_total": 0.44827646017074585, + "step": 207899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.782387733459473, + "loss_rtd": 0.2503328025341034, + "loss_sent": 0.20673413574695587, + "loss_sod": 0.01645244099199772, + "loss_total": 0.47351938486099243, + "step": 207899 + }, + { + "epoch": 0.0318, + "grad_norm": 0.7568258047103882, + "learning_rate": 3.2780067949286444e-05, + "loss": 0.4886, + "step": 207900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.21542501449585, + "loss_rtd": 0.24553513526916504, + "loss_sent": 0.10508822649717331, + "loss_sod": 0.06286120414733887, + "loss_total": 0.4134845733642578, + "step": 207999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.18034029006958, + "loss_rtd": 0.22578170895576477, + "loss_sent": 0.14467483758926392, + "loss_sod": 0.1189010739326477, + "loss_total": 0.4893576204776764, + "step": 207999 + }, + { + "epoch": 0.032, + "grad_norm": 1.0519402027130127, + "learning_rate": 3.2750279489778214e-05, + "loss": 0.4812, + "step": 208000 + }, + { + "epoch": 0.032, + "eval_loss": 0.44441160559654236, + "eval_runtime": 152.4424, + "eval_samples_per_second": 101.304, + "eval_steps_per_second": 0.794, + "step": 208000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.8635687828063965, + "loss_rtd": 0.2613697946071625, + "loss_sent": 0.1858176738023758, + "loss_sod": 0.013185801915824413, + "loss_total": 0.46037328243255615, + "step": 208099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.366179466247559, + "loss_rtd": 0.2074943631887436, + "loss_sent": 9.578260505804792e-05, + "loss_sod": 0.08147788792848587, + "loss_total": 0.2890680432319641, + "step": 208099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.0053433179855347, + "learning_rate": 3.272049797845533e-05, + "loss": 0.4755, + "step": 208100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.5080413818359375, + "loss_rtd": 0.22976088523864746, + "loss_sent": 0.014457907527685165, + "loss_sod": 0.037182632833719254, + "loss_total": 0.2814014256000519, + "step": 208199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.42368221282959, + "loss_rtd": 0.2559351623058319, + "loss_sent": 0.2956313192844391, + "loss_sod": 0.0034990981221199036, + "loss_total": 0.5550655722618103, + "step": 208199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.9731106758117676, + "learning_rate": 3.269072342731381e-05, + "loss": 0.4751, + "step": 208200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.340611934661865, + "loss_rtd": 0.24798448383808136, + "loss_sent": 0.1278972327709198, + "loss_sod": 0.014393225312232971, + "loss_total": 0.39027494192123413, + "step": 208299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.826351165771484, + "loss_rtd": 0.23806066811084747, + "loss_sent": 0.326007217168808, + "loss_sod": 0.0613991841673851, + "loss_total": 0.62546706199646, + "step": 208299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.7958987355232239, + "learning_rate": 3.2660955848346805e-05, + "loss": 0.4828, + "step": 208300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.886526584625244, + "loss_rtd": 0.21395230293273926, + "loss_sent": 0.010712208226323128, + "loss_sod": 0.07605178654193878, + "loss_total": 0.3007162809371948, + "step": 208399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.741994857788086, + "loss_rtd": 0.2044481337070465, + "loss_sent": 0.009149810299277306, + "loss_sod": 0.05806032568216324, + "loss_total": 0.2716582715511322, + "step": 208399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.676787257194519, + "learning_rate": 3.263119525354473e-05, + "loss": 0.4691, + "step": 208400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.5689239501953125, + "loss_rtd": 0.26971539855003357, + "loss_sent": 0.3627955913543701, + "loss_sod": 0.0074113160371780396, + "loss_total": 0.6399223208427429, + "step": 208499 + }, + { + "epoch": 0.000998, + "loss_gen": 6.180447578430176, + "loss_rtd": 0.24745963513851166, + "loss_sent": 0.25532370805740356, + "loss_sod": 0.043171755969524384, + "loss_total": 0.5459550619125366, + "step": 208499 + }, + { + "epoch": 0.001, + "grad_norm": 0.9868326187133789, + "learning_rate": 3.260144165489511e-05, + "loss": 0.472, + "step": 208500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.486894130706787, + "loss_rtd": 0.24865777790546417, + "loss_sent": 0.2057969570159912, + "loss_sod": 0.038115762174129486, + "loss_total": 0.4925704896450043, + "step": 208599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.663079261779785, + "loss_rtd": 0.2541826069355011, + "loss_sent": 0.08111198246479034, + "loss_sod": 0.042758356779813766, + "loss_total": 0.3780529499053955, + "step": 208599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.9215517044067383, + "learning_rate": 3.257169506438273e-05, + "loss": 0.4752, + "step": 208600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.853135585784912, + "loss_rtd": 0.26189929246902466, + "loss_sent": 0.11170561611652374, + "loss_sod": 0.028330225497484207, + "loss_total": 0.4019351601600647, + "step": 208699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.584386348724365, + "loss_rtd": 0.25824618339538574, + "loss_sent": 0.3483213484287262, + "loss_sod": 0.018394894897937775, + "loss_total": 0.6249624490737915, + "step": 208699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.4105433225631714, + "learning_rate": 3.254195549398948e-05, + "loss": 0.4563, + "step": 208700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.602505683898926, + "loss_rtd": 0.2587122619152069, + "loss_sent": 0.1277967244386673, + "loss_sod": 0.034992266446352005, + "loss_total": 0.4215012788772583, + "step": 208799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.469400405883789, + "loss_rtd": 0.23010583221912384, + "loss_sent": 0.33312076330184937, + "loss_sod": 0.04757145047187805, + "loss_total": 0.6107980608940125, + "step": 208799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.2992291450500488, + "learning_rate": 3.251222295569448e-05, + "loss": 0.4715, + "step": 208800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.667748928070068, + "loss_rtd": 0.24374772608280182, + "loss_sent": 0.25694599747657776, + "loss_sod": 0.014960157684981823, + "loss_total": 0.5156538486480713, + "step": 208899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.3918232917785645, + "loss_rtd": 0.25772157311439514, + "loss_sent": 0.08614183962345123, + "loss_sod": 0.03495020419359207, + "loss_total": 0.37881362438201904, + "step": 208899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.7250854969024658, + "learning_rate": 3.248249746147397e-05, + "loss": 0.4614, + "step": 208900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.483517169952393, + "loss_rtd": 0.25373467803001404, + "loss_sent": 0.1412976086139679, + "loss_sod": 0.07849239557981491, + "loss_total": 0.47352468967437744, + "step": 208999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.205908298492432, + "loss_rtd": 0.22909490764141083, + "loss_sent": 3.0029106710571796e-05, + "loss_sod": 0.38783594965934753, + "loss_total": 0.616960883140564, + "step": 208999 + }, + { + "epoch": 0.002, + "grad_norm": 1.5134930610656738, + "learning_rate": 3.245277902330139e-05, + "loss": 0.4744, + "step": 209000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4447585344314575, + "eval_runtime": 154.5293, + "eval_samples_per_second": 99.936, + "eval_steps_per_second": 0.783, + "step": 209000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.121287822723389, + "loss_rtd": 0.2065429985523224, + "loss_sent": 0.04182280972599983, + "loss_sod": 0.07478819787502289, + "loss_total": 0.3231540024280548, + "step": 209099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.722884178161621, + "loss_rtd": 0.26243090629577637, + "loss_sent": 0.24262161552906036, + "loss_sod": 0.09496884047985077, + "loss_total": 0.6000213623046875, + "step": 209099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.2191126346588135, + "learning_rate": 3.2423067653147324e-05, + "loss": 0.4696, + "step": 209100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.343850612640381, + "loss_rtd": 0.24848255515098572, + "loss_sent": 0.16229428350925446, + "loss_sod": 0.020043738186359406, + "loss_total": 0.4308205842971802, + "step": 209199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.992334842681885, + "loss_rtd": 0.21648763120174408, + "loss_sent": 0.06389337033033371, + "loss_sod": 0.062385477125644684, + "loss_total": 0.34276649355888367, + "step": 209199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.6456282734870911, + "learning_rate": 3.239336336297951e-05, + "loss": 0.4718, + "step": 209200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.863377571105957, + "loss_rtd": 0.24766705930233002, + "loss_sent": 0.20583513379096985, + "loss_sod": 0.015658937394618988, + "loss_total": 0.46916112303733826, + "step": 209299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.271611213684082, + "loss_rtd": 0.23583559691905975, + "loss_sent": 0.13610097765922546, + "loss_sod": 0.00965056847780943, + "loss_total": 0.3815871477127075, + "step": 209299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.9501907229423523, + "learning_rate": 3.2363666164762826e-05, + "loss": 0.4752, + "step": 209300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.05283784866333, + "loss_rtd": 0.22583165764808655, + "loss_sent": 0.026940269395709038, + "loss_sod": 0.05996691435575485, + "loss_total": 0.312738835811615, + "step": 209399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.836438179016113, + "loss_rtd": 0.21261976659297943, + "loss_sent": 0.05323530361056328, + "loss_sod": 0.1903487741947174, + "loss_total": 0.456203818321228, + "step": 209399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.0178892612457275, + "learning_rate": 3.2333976070459304e-05, + "loss": 0.4769, + "step": 209400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.641650199890137, + "loss_rtd": 0.2465897500514984, + "loss_sent": 0.2687963545322418, + "loss_sod": 0.01301610004156828, + "loss_total": 0.5284022092819214, + "step": 209499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.4567365646362305, + "loss_rtd": 0.20831464231014252, + "loss_sent": 0.09384509176015854, + "loss_sod": 0.020197510719299316, + "loss_total": 0.3223572373390198, + "step": 209499 + }, + { + "epoch": 0.003, + "grad_norm": 0.6911271810531616, + "learning_rate": 3.2304293092028106e-05, + "loss": 0.475, + "step": 209500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.885210990905762, + "loss_rtd": 0.22602224349975586, + "loss_sent": 0.04062027856707573, + "loss_sod": 0.08307226747274399, + "loss_total": 0.3497147858142853, + "step": 209599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.531671047210693, + "loss_rtd": 0.2223314642906189, + "loss_sent": 0.36238762736320496, + "loss_sod": 0.03387190029025078, + "loss_total": 0.6185909509658813, + "step": 209599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.709784746170044, + "learning_rate": 3.227461724142553e-05, + "loss": 0.4709, + "step": 209600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.497905254364014, + "loss_rtd": 0.25370118021965027, + "loss_sent": 0.13840118050575256, + "loss_sod": 0.023736033588647842, + "loss_total": 0.4158383905887604, + "step": 209699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.420224189758301, + "loss_rtd": 0.23246806859970093, + "loss_sent": 0.13259181380271912, + "loss_sod": 0.009585598483681679, + "loss_total": 0.374645471572876, + "step": 209699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.8438321948051453, + "learning_rate": 3.224494853060502e-05, + "loss": 0.459, + "step": 209700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.336059093475342, + "loss_rtd": 0.21610666811466217, + "loss_sent": 0.06361129134893417, + "loss_sod": 0.08039283007383347, + "loss_total": 0.3601107895374298, + "step": 209799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.975579261779785, + "loss_rtd": 0.24493789672851562, + "loss_sent": 0.04117688909173012, + "loss_sod": 0.049076810479164124, + "loss_total": 0.3351915776729584, + "step": 209799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.7402128577232361, + "learning_rate": 3.221528697151712e-05, + "loss": 0.4558, + "step": 209800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.895811080932617, + "loss_rtd": 0.22647565603256226, + "loss_sent": 0.0059408931992948055, + "loss_sod": 0.1335909366607666, + "loss_total": 0.3660074770450592, + "step": 209899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.193243980407715, + "loss_rtd": 0.22750423848628998, + "loss_sent": 0.023350337520241737, + "loss_sod": 0.07141244411468506, + "loss_total": 0.3222670257091522, + "step": 209899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.917251467704773, + "learning_rate": 3.218563257610949e-05, + "loss": 0.4787, + "step": 209900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.881529808044434, + "loss_rtd": 0.2474154531955719, + "loss_sent": 0.1599976271390915, + "loss_sod": 0.046758297830820084, + "loss_total": 0.454171359539032, + "step": 209999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.793908596038818, + "loss_rtd": 0.2482430636882782, + "loss_sent": 0.23379755020141602, + "loss_sod": 0.11410799622535706, + "loss_total": 0.5961486101150513, + "step": 209999 + }, + { + "epoch": 0.004, + "grad_norm": 1.2906124591827393, + "learning_rate": 3.2155985356326934e-05, + "loss": 0.4858, + "step": 210000 + }, + { + "epoch": 0.004, + "eval_loss": 0.44805896282196045, + "eval_runtime": 151.337, + "eval_samples_per_second": 102.044, + "eval_steps_per_second": 0.8, + "step": 210000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.477323532104492, + "loss_rtd": 0.22297251224517822, + "loss_sent": 0.0769631415605545, + "loss_sod": 0.04961954057216644, + "loss_total": 0.34955519437789917, + "step": 210099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.684615612030029, + "loss_rtd": 0.24644732475280762, + "loss_sent": 0.10543947666883469, + "loss_sod": 0.06935250014066696, + "loss_total": 0.42123931646347046, + "step": 210099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.0123628377914429, + "learning_rate": 3.212634532411133e-05, + "loss": 0.4537, + "step": 210100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.729521751403809, + "loss_rtd": 0.23756033182144165, + "loss_sent": 0.19801343977451324, + "loss_sod": 0.07395317405462265, + "loss_total": 0.5095269680023193, + "step": 210199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.19659423828125, + "loss_rtd": 0.2312103807926178, + "loss_sent": 0.03671765699982643, + "loss_sod": 0.11782599985599518, + "loss_total": 0.3857540488243103, + "step": 210199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.8746253252029419, + "learning_rate": 3.209671249140171e-05, + "loss": 0.4708, + "step": 210200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.612518310546875, + "loss_rtd": 0.24346370995044708, + "loss_sent": 0.37320858240127563, + "loss_sod": 0.01649416610598564, + "loss_total": 0.6331664323806763, + "step": 210299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.739737033843994, + "loss_rtd": 0.253539502620697, + "loss_sent": 0.02703433856368065, + "loss_sod": 0.05502090975642204, + "loss_total": 0.3355947434902191, + "step": 210299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.2408486604690552, + "learning_rate": 3.206708687013414e-05, + "loss": 0.4774, + "step": 210300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.601258754730225, + "loss_rtd": 0.2331903576850891, + "loss_sent": 0.12105364352464676, + "loss_sod": 0.09930294007062912, + "loss_total": 0.453546941280365, + "step": 210399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.636895179748535, + "loss_rtd": 0.2383427917957306, + "loss_sent": 0.16379112005233765, + "loss_sod": 0.012728769332170486, + "loss_total": 0.4148626923561096, + "step": 210399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.1434956789016724, + "learning_rate": 3.203746847224185e-05, + "loss": 0.4732, + "step": 210400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.337172508239746, + "loss_rtd": 0.21960227191448212, + "loss_sent": 0.20413963496685028, + "loss_sod": 0.08904212713241577, + "loss_total": 0.5127840638160706, + "step": 210499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.146918296813965, + "loss_rtd": 0.20090530812740326, + "loss_sent": 0.12208490073680878, + "loss_sod": 0.06480234861373901, + "loss_total": 0.38779258728027344, + "step": 210499 + }, + { + "epoch": 0.005, + "grad_norm": 1.1932604312896729, + "learning_rate": 3.20078573096551e-05, + "loss": 0.4668, + "step": 210500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.558023452758789, + "loss_rtd": 0.24844017624855042, + "loss_sent": 0.1205759197473526, + "loss_sod": 0.003456368576735258, + "loss_total": 0.37247246503829956, + "step": 210599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.625892162322998, + "loss_rtd": 0.24933810532093048, + "loss_sent": 0.06842228025197983, + "loss_sod": 0.11991280317306519, + "loss_total": 0.4376731812953949, + "step": 210599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.8431463837623596, + "learning_rate": 3.197825339430128e-05, + "loss": 0.4666, + "step": 210600 + }, + { + "epoch": 0.005398, + "loss_gen": 6.091310024261475, + "loss_rtd": 0.2611507773399353, + "loss_sent": 0.5253362655639648, + "loss_sod": 0.12528419494628906, + "loss_total": 0.9117712378501892, + "step": 210699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.429983139038086, + "loss_rtd": 0.24127110838890076, + "loss_sent": 0.33283531665802, + "loss_sod": 0.09813092648983002, + "loss_total": 0.6722373366355896, + "step": 210699 + }, + { + "epoch": 0.0054, + "grad_norm": 2.887056350708008, + "learning_rate": 3.194865673810483e-05, + "loss": 0.4643, + "step": 210700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.702674388885498, + "loss_rtd": 0.2532796263694763, + "loss_sent": 0.2713761329650879, + "loss_sod": 0.1055951863527298, + "loss_total": 0.6302509307861328, + "step": 210799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.545117378234863, + "loss_rtd": 0.25731727480888367, + "loss_sent": 0.09072793275117874, + "loss_sod": 0.050606194883584976, + "loss_total": 0.3986513912677765, + "step": 210799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.428985834121704, + "learning_rate": 3.191906735298729e-05, + "loss": 0.471, + "step": 210800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.364992618560791, + "loss_rtd": 0.2524144649505615, + "loss_sent": 0.08133655041456223, + "loss_sod": 0.08424169570207596, + "loss_total": 0.4179927110671997, + "step": 210899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.39879846572876, + "loss_rtd": 0.24170148372650146, + "loss_sent": 0.18533536791801453, + "loss_sod": 0.06258462369441986, + "loss_total": 0.48962149024009705, + "step": 210899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.7437417507171631, + "learning_rate": 3.1889485250867265e-05, + "loss": 0.48, + "step": 210900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.75720739364624, + "loss_rtd": 0.25696682929992676, + "loss_sent": 0.22219933569431305, + "loss_sod": 0.03509850800037384, + "loss_total": 0.5142646431922913, + "step": 210999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.4086527824401855, + "loss_rtd": 0.22480274736881256, + "loss_sent": 0.4734490215778351, + "loss_sod": 0.03117639757692814, + "loss_total": 0.7294281721115112, + "step": 210999 + }, + { + "epoch": 0.006, + "grad_norm": 1.9705997705459595, + "learning_rate": 3.1859910443660405e-05, + "loss": 0.4565, + "step": 211000 + }, + { + "epoch": 0.006, + "eval_loss": 0.4437921643257141, + "eval_runtime": 151.2311, + "eval_samples_per_second": 102.115, + "eval_steps_per_second": 0.8, + "step": 211000 + }, + { + "epoch": 0.006198, + "loss_gen": 6.2043352127075195, + "loss_rtd": 0.2633081376552582, + "loss_sent": 0.07349584996700287, + "loss_sod": 0.15775038301944733, + "loss_total": 0.49455440044403076, + "step": 211099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.431703090667725, + "loss_rtd": 0.2513551115989685, + "loss_sent": 0.044419560581445694, + "loss_sod": 0.017070291563868523, + "loss_total": 0.3128449618816376, + "step": 211099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.8284661173820496, + "learning_rate": 3.183034294327946e-05, + "loss": 0.4516, + "step": 211100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.562857151031494, + "loss_rtd": 0.2520465552806854, + "loss_sent": 0.3894054889678955, + "loss_sod": 0.11704543232917786, + "loss_total": 0.7584974765777588, + "step": 211199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.668973445892334, + "loss_rtd": 0.25732630491256714, + "loss_sent": 0.20670296251773834, + "loss_sod": 0.05067497491836548, + "loss_total": 0.5147042274475098, + "step": 211199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.652125597000122, + "learning_rate": 3.18007827616342e-05, + "loss": 0.4687, + "step": 211200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.724977016448975, + "loss_rtd": 0.2492094337940216, + "loss_sent": 0.16580648720264435, + "loss_sod": 0.017729351297020912, + "loss_total": 0.4327452778816223, + "step": 211299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.394907474517822, + "loss_rtd": 0.24357140064239502, + "loss_sent": 0.23959758877754211, + "loss_sod": 0.04065662622451782, + "loss_total": 0.5238256454467773, + "step": 211299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.1622283458709717, + "learning_rate": 3.177122991063146e-05, + "loss": 0.4833, + "step": 211300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.4965925216674805, + "loss_rtd": 0.24693022668361664, + "loss_sent": 0.15063565969467163, + "loss_sod": 0.020055752247571945, + "loss_total": 0.4176216423511505, + "step": 211399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.532601356506348, + "loss_rtd": 0.2507868707180023, + "loss_sent": 0.14384964108467102, + "loss_sod": 0.03301244601607323, + "loss_total": 0.42764896154403687, + "step": 211399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.9734103083610535, + "learning_rate": 3.174168440217514e-05, + "loss": 0.4807, + "step": 211400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.537749767303467, + "loss_rtd": 0.2611582577228546, + "loss_sent": 0.2690557539463043, + "loss_sod": 0.07254573702812195, + "loss_total": 0.6027597188949585, + "step": 211499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.112955570220947, + "loss_rtd": 0.21495188772678375, + "loss_sent": 0.022779934108257294, + "loss_sod": 0.2705698013305664, + "loss_total": 0.5083016157150269, + "step": 211499 + }, + { + "epoch": 0.007, + "grad_norm": 1.480070948600769, + "learning_rate": 3.171214624816616e-05, + "loss": 0.4694, + "step": 211500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.822688579559326, + "loss_rtd": 0.19313618540763855, + "loss_sent": 8.413719478994608e-05, + "loss_sod": 0.17681023478507996, + "loss_total": 0.37003055214881897, + "step": 211599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.394720554351807, + "loss_rtd": 0.23504839837551117, + "loss_sent": 0.09636963903903961, + "loss_sod": 0.07450568675994873, + "loss_total": 0.4059237241744995, + "step": 211599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.0673494338989258, + "learning_rate": 3.1682615460502474e-05, + "loss": 0.4766, + "step": 211600 + }, + { + "epoch": 0.007398, + "loss_gen": 6.004249572753906, + "loss_rtd": 0.25233152508735657, + "loss_sent": 0.05808437243103981, + "loss_sod": 0.05536285787820816, + "loss_total": 0.36577874422073364, + "step": 211699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.868964195251465, + "loss_rtd": 0.24131612479686737, + "loss_sent": 0.17636896669864655, + "loss_sod": 0.035525646060705185, + "loss_total": 0.453210711479187, + "step": 211699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.89599609375, + "learning_rate": 3.1653092051079104e-05, + "loss": 0.4694, + "step": 211700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.826862812042236, + "loss_rtd": 0.2411191612482071, + "loss_sent": 0.15905123949050903, + "loss_sod": 0.08179174363613129, + "loss_total": 0.4819621443748474, + "step": 211799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.79934024810791, + "loss_rtd": 0.23552261292934418, + "loss_sent": 0.16693347692489624, + "loss_sod": 0.048653826117515564, + "loss_total": 0.451109915971756, + "step": 211799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.0250190496444702, + "learning_rate": 3.162357603178805e-05, + "loss": 0.4608, + "step": 211800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.756589889526367, + "loss_rtd": 0.22902975976467133, + "loss_sent": 0.22040459513664246, + "loss_sod": 0.05062992870807648, + "loss_total": 0.5000642538070679, + "step": 211899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.970505714416504, + "loss_rtd": 0.240070179104805, + "loss_sent": 0.3103477656841278, + "loss_sod": 0.036664918065071106, + "loss_total": 0.5870828628540039, + "step": 211899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.3604886531829834, + "learning_rate": 3.159406741451838e-05, + "loss": 0.4661, + "step": 211900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.4524245262146, + "loss_rtd": 0.22372019290924072, + "loss_sent": 0.22474953532218933, + "loss_sod": 0.057086482644081116, + "loss_total": 0.5055562257766724, + "step": 211999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.881251335144043, + "loss_rtd": 0.24504905939102173, + "loss_sent": 0.10071533173322678, + "loss_sod": 0.09807638823986053, + "loss_total": 0.44384080171585083, + "step": 211999 + }, + { + "epoch": 0.008, + "grad_norm": 2.14951491355896, + "learning_rate": 3.156456621115615e-05, + "loss": 0.4727, + "step": 212000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4430355131626129, + "eval_runtime": 152.9269, + "eval_samples_per_second": 100.983, + "eval_steps_per_second": 0.791, + "step": 212000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.8363118171691895, + "loss_rtd": 0.21359658241271973, + "loss_sent": 0.019240180030465126, + "loss_sod": 0.11113185435533524, + "loss_total": 0.34396860003471375, + "step": 212099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.640757083892822, + "loss_rtd": 0.25492802262306213, + "loss_sent": 0.1972058117389679, + "loss_sod": 0.03543057292699814, + "loss_total": 0.48756441473960876, + "step": 212099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.6279405951499939, + "learning_rate": 3.153507243358445e-05, + "loss": 0.4818, + "step": 212100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.463850975036621, + "loss_rtd": 0.23923315107822418, + "loss_sent": 0.17068266868591309, + "loss_sod": 0.06000015512108803, + "loss_total": 0.4699159860610962, + "step": 212199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.92637825012207, + "loss_rtd": 0.2557355463504791, + "loss_sent": 0.206705704331398, + "loss_sod": 0.07056833058595657, + "loss_total": 0.5330095887184143, + "step": 212199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.1855320930480957, + "learning_rate": 3.150558609368336e-05, + "loss": 0.4519, + "step": 212200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.1901140213012695, + "loss_rtd": 0.21692410111427307, + "loss_sent": 0.03507065400481224, + "loss_sod": 0.01112939789891243, + "loss_total": 0.26312413811683655, + "step": 212299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.64489221572876, + "loss_rtd": 0.22842562198638916, + "loss_sent": 0.20782798528671265, + "loss_sod": 0.06458929926156998, + "loss_total": 0.5008429288864136, + "step": 212299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.7970889210700989, + "learning_rate": 3.147610720332998e-05, + "loss": 0.4621, + "step": 212300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.243655204772949, + "loss_rtd": 0.25343963503837585, + "loss_sent": 0.19937506318092346, + "loss_sod": 0.006845582742244005, + "loss_total": 0.45966029167175293, + "step": 212399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.747071266174316, + "loss_rtd": 0.262832373380661, + "loss_sent": 0.09505753964185715, + "loss_sod": 0.07253297418355942, + "loss_total": 0.43042290210723877, + "step": 212399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.6185807585716248, + "learning_rate": 3.144663577439842e-05, + "loss": 0.4588, + "step": 212400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.078952312469482, + "loss_rtd": 0.1971844732761383, + "loss_sent": 6.208645936567336e-05, + "loss_sod": 0.06430843472480774, + "loss_total": 0.2615549862384796, + "step": 212499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.409107208251953, + "loss_rtd": 0.19337983429431915, + "loss_sent": 3.095667852903716e-05, + "loss_sod": 0.17032751441001892, + "loss_total": 0.3637383282184601, + "step": 212499 + }, + { + "epoch": 0.009, + "grad_norm": 1.218471884727478, + "learning_rate": 3.141717181875973e-05, + "loss": 0.4688, + "step": 212500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.532618999481201, + "loss_rtd": 0.24023999273777008, + "loss_sent": 0.2433645874261856, + "loss_sod": 0.05891285836696625, + "loss_total": 0.5425174236297607, + "step": 212599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.952725887298584, + "loss_rtd": 0.2123037874698639, + "loss_sent": 0.038688044995069504, + "loss_sod": 0.08232495933771133, + "loss_total": 0.33331677317619324, + "step": 212599 + }, + { + "epoch": 0.0092, + "grad_norm": 2.4606971740722656, + "learning_rate": 3.1387715348282023e-05, + "loss": 0.4663, + "step": 212600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.907297134399414, + "loss_rtd": 0.1979868859052658, + "loss_sent": 4.540507507044822e-05, + "loss_sod": 0.10434964299201965, + "loss_total": 0.3023819327354431, + "step": 212699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.306654930114746, + "loss_rtd": 0.2169453650712967, + "loss_sent": 0.06280342489480972, + "loss_sod": 0.041878774762153625, + "loss_total": 0.32162755727767944, + "step": 212699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.6663098931312561, + "learning_rate": 3.1358266374830336e-05, + "loss": 0.4744, + "step": 212700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.580843925476074, + "loss_rtd": 0.2426462471485138, + "loss_sent": 0.5985530018806458, + "loss_sod": 0.07179553806781769, + "loss_total": 0.9129948019981384, + "step": 212799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.386800289154053, + "loss_rtd": 0.24027827382087708, + "loss_sent": 0.2527490258216858, + "loss_sod": 0.05511070787906647, + "loss_total": 0.5481380224227905, + "step": 212799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.92095148563385, + "learning_rate": 3.132882491026674e-05, + "loss": 0.4909, + "step": 212800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.439431190490723, + "loss_rtd": 0.24224305152893066, + "loss_sent": 0.11520251631736755, + "loss_sod": 0.061743177473545074, + "loss_total": 0.4191887378692627, + "step": 212899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.694076061248779, + "loss_rtd": 0.23508226871490479, + "loss_sent": 0.05061827227473259, + "loss_sod": 0.006715849507600069, + "loss_total": 0.29241639375686646, + "step": 212899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.7011203765869141, + "learning_rate": 3.1299390966450213e-05, + "loss": 0.4667, + "step": 212900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.152209758758545, + "loss_rtd": 0.22610633075237274, + "loss_sent": 3.1172719900496304e-05, + "loss_sod": 0.19216427206993103, + "loss_total": 0.4183017611503601, + "step": 212999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.097952365875244, + "loss_rtd": 0.21032899618148804, + "loss_sent": 3.7628306017722934e-05, + "loss_sod": 0.054617222398519516, + "loss_total": 0.2649838626384735, + "step": 212999 + }, + { + "epoch": 0.01, + "grad_norm": 0.7068494558334351, + "learning_rate": 3.1269964555236784e-05, + "loss": 0.4581, + "step": 213000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4454001784324646, + "eval_runtime": 151.4406, + "eval_samples_per_second": 101.974, + "eval_steps_per_second": 0.799, + "step": 213000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.744257926940918, + "loss_rtd": 0.25936928391456604, + "loss_sent": 0.047065265476703644, + "loss_sod": 0.03533713519573212, + "loss_total": 0.34177166223526, + "step": 213099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.667468070983887, + "loss_rtd": 0.23790179193019867, + "loss_sent": 0.3409661054611206, + "loss_sod": 0.03238163888454437, + "loss_total": 0.611249566078186, + "step": 213099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8624411821365356, + "learning_rate": 3.1240545688479394e-05, + "loss": 0.4715, + "step": 213100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.707870960235596, + "loss_rtd": 0.21306991577148438, + "loss_sent": 0.13571777939796448, + "loss_sod": 0.0220950897783041, + "loss_total": 0.3708827793598175, + "step": 213199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.601797580718994, + "loss_rtd": 0.24627207219600677, + "loss_sent": 0.15918178856372833, + "loss_sod": 0.00783846527338028, + "loss_total": 0.4132923483848572, + "step": 213199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.8530075550079346, + "learning_rate": 3.1211134378027955e-05, + "loss": 0.4818, + "step": 213200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.771250247955322, + "loss_rtd": 0.2444213181734085, + "loss_sent": 0.09979578107595444, + "loss_sod": 0.06902863830327988, + "loss_total": 0.4132457375526428, + "step": 213299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.803530693054199, + "loss_rtd": 0.26237183809280396, + "loss_sent": 0.2752484977245331, + "loss_sod": 0.02671178989112377, + "loss_total": 0.564332127571106, + "step": 213299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.8731642961502075, + "learning_rate": 3.118173063572933e-05, + "loss": 0.4707, + "step": 213300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.060603618621826, + "loss_rtd": 0.19803661108016968, + "loss_sent": 0.011174539104104042, + "loss_sod": 0.053331535309553146, + "loss_total": 0.2625426650047302, + "step": 213399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.629602909088135, + "loss_rtd": 0.2579532563686371, + "loss_sent": 0.0589432455599308, + "loss_sod": 0.046761203557252884, + "loss_total": 0.36365771293640137, + "step": 213399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.7239234447479248, + "learning_rate": 3.115233447342738e-05, + "loss": 0.4585, + "step": 213400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.775127410888672, + "loss_rtd": 0.25830426812171936, + "loss_sent": 0.17954021692276, + "loss_sod": 0.062176622450351715, + "loss_total": 0.5000211000442505, + "step": 213499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.7465338706970215, + "loss_rtd": 0.24478895962238312, + "loss_sent": 0.24995005130767822, + "loss_sod": 0.08784323185682297, + "loss_total": 0.5825822353363037, + "step": 213499 + }, + { + "epoch": 0.011, + "grad_norm": 0.9594159722328186, + "learning_rate": 3.112294590296283e-05, + "loss": 0.4485, + "step": 213500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.562532424926758, + "loss_rtd": 0.23189584910869598, + "loss_sent": 0.7027842402458191, + "loss_sod": 0.018722033128142357, + "loss_total": 0.9534021615982056, + "step": 213599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.744312763214111, + "loss_rtd": 0.24609750509262085, + "loss_sent": 0.21287801861763, + "loss_sod": 0.016435619443655014, + "loss_total": 0.47541114687919617, + "step": 213599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.6305056810379028, + "learning_rate": 3.109356493617341e-05, + "loss": 0.4674, + "step": 213600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.308304786682129, + "loss_rtd": 0.24485275149345398, + "loss_sent": 0.21994268894195557, + "loss_sod": 0.05680780112743378, + "loss_total": 0.5216032266616821, + "step": 213699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.640525817871094, + "loss_rtd": 0.2537594139575958, + "loss_sent": 0.2038225680589676, + "loss_sod": 0.01056704856455326, + "loss_total": 0.46814900636672974, + "step": 213699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.0676085948944092, + "learning_rate": 3.106419158489379e-05, + "loss": 0.4471, + "step": 213700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.560603141784668, + "loss_rtd": 0.23428739607334137, + "loss_sent": 0.4096875488758087, + "loss_sod": 0.05208686739206314, + "loss_total": 0.6960618495941162, + "step": 213799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.416528701782227, + "loss_rtd": 0.24423688650131226, + "loss_sent": 0.13170087337493896, + "loss_sod": 0.038248151540756226, + "loss_total": 0.41418591141700745, + "step": 213799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.8993825912475586, + "learning_rate": 3.1034825860955524e-05, + "loss": 0.4694, + "step": 213800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.59212064743042, + "loss_rtd": 0.2265229970216751, + "loss_sent": 0.10345450788736343, + "loss_sod": 0.09530478715896606, + "loss_total": 0.4252822697162628, + "step": 213899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.533003807067871, + "loss_rtd": 0.24870604276657104, + "loss_sent": 0.2663210928440094, + "loss_sod": 0.04454107582569122, + "loss_total": 0.5595681667327881, + "step": 213899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.0270558595657349, + "learning_rate": 3.100546777618713e-05, + "loss": 0.4778, + "step": 213900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.885112285614014, + "loss_rtd": 0.2112845927476883, + "loss_sent": 0.02935801073908806, + "loss_sod": 0.04289938136935234, + "loss_total": 0.2835419774055481, + "step": 213999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.518467903137207, + "loss_rtd": 0.2470613420009613, + "loss_sent": 0.09954142570495605, + "loss_sod": 0.04146134480834007, + "loss_total": 0.38806411623954773, + "step": 213999 + }, + { + "epoch": 0.012, + "grad_norm": 0.7365216016769409, + "learning_rate": 3.0976117342414054e-05, + "loss": 0.462, + "step": 214000 + }, + { + "epoch": 0.012, + "eval_loss": 0.44605186581611633, + "eval_runtime": 151.7656, + "eval_samples_per_second": 101.756, + "eval_steps_per_second": 0.797, + "step": 214000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.769169330596924, + "loss_rtd": 0.24741266667842865, + "loss_sent": 0.40118569135665894, + "loss_sod": 0.010234126821160316, + "loss_total": 0.6588324904441833, + "step": 214099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.8204545974731445, + "loss_rtd": 0.23688404262065887, + "loss_sent": 0.23825155198574066, + "loss_sod": 0.0842704251408577, + "loss_total": 0.559406042098999, + "step": 214099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.2068729400634766, + "learning_rate": 3.0946774571458634e-05, + "loss": 0.4635, + "step": 214100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.7868146896362305, + "loss_rtd": 0.24271126091480255, + "loss_sent": 0.19455036520957947, + "loss_sod": 0.06991995871067047, + "loss_total": 0.5071815848350525, + "step": 214199 + }, + { + "epoch": 0.012398, + "loss_gen": 6.01089334487915, + "loss_rtd": 0.2621612846851349, + "loss_sent": 0.09373494237661362, + "loss_sod": 0.10824918001890182, + "loss_total": 0.4641454219818115, + "step": 214199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.5242117643356323, + "learning_rate": 3.091743947514013e-05, + "loss": 0.461, + "step": 214200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.009282112121582, + "loss_rtd": 0.21023614704608917, + "loss_sent": 2.888362178055104e-05, + "loss_sod": 0.12804462015628815, + "loss_total": 0.338309645652771, + "step": 214299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.0859150886535645, + "loss_rtd": 0.2234998345375061, + "loss_sent": 0.23565302789211273, + "loss_sod": 0.03161570057272911, + "loss_total": 0.49076855182647705, + "step": 214299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.975635826587677, + "learning_rate": 3.088811206527474e-05, + "loss": 0.4556, + "step": 214300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.517728805541992, + "loss_rtd": 0.22543208301067352, + "loss_sent": 0.13504022359848022, + "loss_sod": 0.054832931607961655, + "loss_total": 0.4153052270412445, + "step": 214399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.704839706420898, + "loss_rtd": 0.24344506859779358, + "loss_sent": 0.15011349320411682, + "loss_sod": 0.029339928179979324, + "loss_total": 0.4228985011577606, + "step": 214399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.6345607042312622, + "learning_rate": 3.085879235367551e-05, + "loss": 0.4556, + "step": 214400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.6054511070251465, + "loss_rtd": 0.2383522391319275, + "loss_sent": 0.1786447912454605, + "loss_sod": 0.015524017624557018, + "loss_total": 0.4325210452079773, + "step": 214499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.389135360717773, + "loss_rtd": 0.24175548553466797, + "loss_sent": 0.14701464772224426, + "loss_sod": 0.05823016166687012, + "loss_total": 0.44700029492378235, + "step": 214499 + }, + { + "epoch": 0.013, + "grad_norm": 1.3787164688110352, + "learning_rate": 3.082948035215244e-05, + "loss": 0.4727, + "step": 214500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.542291164398193, + "loss_rtd": 0.22595494985580444, + "loss_sent": 0.1399892121553421, + "loss_sod": 0.032616619020700455, + "loss_total": 0.3985607624053955, + "step": 214599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.504732608795166, + "loss_rtd": 0.2170952707529068, + "loss_sent": 0.29960954189300537, + "loss_sod": 0.07303205132484436, + "loss_total": 0.5897368788719177, + "step": 214599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.189300775527954, + "learning_rate": 3.0800176072512374e-05, + "loss": 0.4733, + "step": 214600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.516160011291504, + "loss_rtd": 0.22539576888084412, + "loss_sent": 0.02846885845065117, + "loss_sod": 0.08762237429618835, + "loss_total": 0.34148699045181274, + "step": 214699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.72625207901001, + "loss_rtd": 0.2533041536808014, + "loss_sent": 0.28508323431015015, + "loss_sod": 0.02012855000793934, + "loss_total": 0.5585159063339233, + "step": 214699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.8739172220230103, + "learning_rate": 3.077087952655911e-05, + "loss": 0.4547, + "step": 214700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.518967151641846, + "loss_rtd": 0.233483225107193, + "loss_sent": 0.09542209655046463, + "loss_sod": 0.09760531038045883, + "loss_total": 0.42651063203811646, + "step": 214799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.508381366729736, + "loss_rtd": 0.24900829792022705, + "loss_sent": 0.5820159316062927, + "loss_sod": 0.01064244844019413, + "loss_total": 0.8416666984558105, + "step": 214799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.896393895149231, + "learning_rate": 3.074159072609326e-05, + "loss": 0.4751, + "step": 214800 + }, + { + "epoch": 0.013798, + "loss_gen": 6.465732574462891, + "loss_rtd": 0.2200365960597992, + "loss_sent": 0.11021783202886581, + "loss_sod": 0.13790738582611084, + "loss_total": 0.46816182136535645, + "step": 214899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.558566093444824, + "loss_rtd": 0.2123665064573288, + "loss_sent": 0.07727959752082825, + "loss_sod": 0.10284897685050964, + "loss_total": 0.3924950659275055, + "step": 214899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.7778891324996948, + "learning_rate": 3.071230968291238e-05, + "loss": 0.4701, + "step": 214900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.915468692779541, + "loss_rtd": 0.24788525700569153, + "loss_sent": 0.10812418162822723, + "loss_sod": 0.06854336708784103, + "loss_total": 0.4245527982711792, + "step": 214999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.7604546546936035, + "loss_rtd": 0.23790998756885529, + "loss_sent": 0.21076975762844086, + "loss_sod": 0.04442159831523895, + "loss_total": 0.4931013584136963, + "step": 214999 + }, + { + "epoch": 0.014, + "grad_norm": 0.8312272429466248, + "learning_rate": 3.068303640881083e-05, + "loss": 0.4646, + "step": 215000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4383068084716797, + "eval_runtime": 151.4463, + "eval_samples_per_second": 101.97, + "eval_steps_per_second": 0.799, + "step": 215000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.824502944946289, + "loss_rtd": 0.25133025646209717, + "loss_sent": 0.21859335899353027, + "loss_sod": 0.06594441831111908, + "loss_total": 0.5358680486679077, + "step": 215099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.6651177406311035, + "loss_rtd": 0.23057860136032104, + "loss_sent": 0.18085774779319763, + "loss_sod": 0.06961269676685333, + "loss_total": 0.4810490310192108, + "step": 215099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.818749189376831, + "learning_rate": 3.065377091557993e-05, + "loss": 0.462, + "step": 215100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.954616546630859, + "loss_rtd": 0.2156609445810318, + "loss_sent": 3.6461002309806645e-05, + "loss_sod": 0.14588233828544617, + "loss_total": 0.3615797460079193, + "step": 215199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.063547611236572, + "loss_rtd": 0.21089325845241547, + "loss_sent": 0.07102958858013153, + "loss_sod": 0.047408923506736755, + "loss_total": 0.32933175563812256, + "step": 215199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.8162667155265808, + "learning_rate": 3.062451321500777e-05, + "loss": 0.4598, + "step": 215200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.6063737869262695, + "loss_rtd": 0.24424877762794495, + "loss_sent": 0.10496512800455093, + "loss_sod": 0.07444434612989426, + "loss_total": 0.42365825176239014, + "step": 215299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.617539405822754, + "loss_rtd": 0.24557539820671082, + "loss_sent": 0.09547129273414612, + "loss_sod": 0.023936942219734192, + "loss_total": 0.3649836480617523, + "step": 215299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.67758709192276, + "learning_rate": 3.059526331887938e-05, + "loss": 0.4583, + "step": 215300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.776050090789795, + "loss_rtd": 0.23236040771007538, + "loss_sent": 0.19112221896648407, + "loss_sod": 0.12273884564638138, + "loss_total": 0.5462214946746826, + "step": 215399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.921604156494141, + "loss_rtd": 0.25309666991233826, + "loss_sent": 0.376557856798172, + "loss_sod": 0.1523684859275818, + "loss_total": 0.782023012638092, + "step": 215399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.8483816385269165, + "learning_rate": 3.056602123897659e-05, + "loss": 0.4733, + "step": 215400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.675897598266602, + "loss_rtd": 0.2582680284976959, + "loss_sent": 0.17964158952236176, + "loss_sod": 0.02270853891968727, + "loss_total": 0.46061813831329346, + "step": 215499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.974243640899658, + "loss_rtd": 0.2546817362308502, + "loss_sent": 0.05891652777791023, + "loss_sod": 0.15968403220176697, + "loss_total": 0.4732823073863983, + "step": 215499 + }, + { + "epoch": 0.015, + "grad_norm": 0.9689565896987915, + "learning_rate": 3.0536786987078146e-05, + "loss": 0.4789, + "step": 215500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.560499668121338, + "loss_rtd": 0.24256254732608795, + "loss_sent": 0.14838965237140656, + "loss_sod": 0.04218659549951553, + "loss_total": 0.43313878774642944, + "step": 215599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.735309600830078, + "loss_rtd": 0.23040291666984558, + "loss_sent": 0.1612355262041092, + "loss_sod": 0.13364312052726746, + "loss_total": 0.525281548500061, + "step": 215599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.5442880392074585, + "learning_rate": 3.050756057495957e-05, + "loss": 0.4799, + "step": 215600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.064610958099365, + "loss_rtd": 0.22517243027687073, + "loss_sent": 0.057054758071899414, + "loss_sod": 0.07848721742630005, + "loss_total": 0.3607144057750702, + "step": 215699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.957058906555176, + "loss_rtd": 0.20025552809238434, + "loss_sent": 0.0676516443490982, + "loss_sod": 0.05821962654590607, + "loss_total": 0.3261268138885498, + "step": 215699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.7563294768333435, + "learning_rate": 3.0478342014393245e-05, + "loss": 0.473, + "step": 215700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.815528869628906, + "loss_rtd": 0.24377046525478363, + "loss_sent": 0.13200443983078003, + "loss_sod": 0.08974200487136841, + "loss_total": 0.46551692485809326, + "step": 215799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.621831893920898, + "loss_rtd": 0.24012672901153564, + "loss_sent": 0.1662404090166092, + "loss_sod": 0.004909290932118893, + "loss_total": 0.4112764298915863, + "step": 215799 + }, + { + "epoch": 0.0156, + "grad_norm": 2.114844560623169, + "learning_rate": 3.044913131714844e-05, + "loss": 0.4698, + "step": 215800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.884778022766113, + "loss_rtd": 0.23560364544391632, + "loss_sent": 0.07110671699047089, + "loss_sod": 0.13425633311271667, + "loss_total": 0.44096672534942627, + "step": 215899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.541790008544922, + "loss_rtd": 0.2510862648487091, + "loss_sent": 0.2857402563095093, + "loss_sod": 0.030730176717042923, + "loss_total": 0.5675567388534546, + "step": 215899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.1384119987487793, + "learning_rate": 3.041992849499119e-05, + "loss": 0.4574, + "step": 215900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.798852443695068, + "loss_rtd": 0.25009506940841675, + "loss_sent": 0.23937390744686127, + "loss_sod": 0.01792096719145775, + "loss_total": 0.5073899626731873, + "step": 215999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.422009468078613, + "loss_rtd": 0.23141589760780334, + "loss_sent": 0.04138416051864624, + "loss_sod": 0.0043543362990021706, + "loss_total": 0.2771543860435486, + "step": 215999 + }, + { + "epoch": 0.016, + "grad_norm": 0.6650428175926208, + "learning_rate": 3.0390733559684413e-05, + "loss": 0.4648, + "step": 216000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4469378590583801, + "eval_runtime": 151.9435, + "eval_samples_per_second": 101.636, + "eval_steps_per_second": 0.796, + "step": 216000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.508726596832275, + "loss_rtd": 0.24046586453914642, + "loss_sent": 0.10226985067129135, + "loss_sod": 0.054271720349788666, + "loss_total": 0.39700743556022644, + "step": 216099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.240207672119141, + "loss_rtd": 0.24352088570594788, + "loss_sent": 6.022010711603798e-05, + "loss_sod": 0.11144635826349258, + "loss_total": 0.3550274670124054, + "step": 216099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.9859812259674072, + "learning_rate": 3.0361546522987804e-05, + "loss": 0.4586, + "step": 216100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.6960859298706055, + "loss_rtd": 0.2316541224718094, + "loss_sent": 0.08846792578697205, + "loss_sod": 0.04236774891614914, + "loss_total": 0.36248978972435, + "step": 216199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.228146076202393, + "loss_rtd": 0.24127553403377533, + "loss_sent": 8.796445763437077e-05, + "loss_sod": 0.17378482222557068, + "loss_total": 0.4151483178138733, + "step": 216199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.9168807864189148, + "learning_rate": 3.0332367396657914e-05, + "loss": 0.478, + "step": 216200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.7294535636901855, + "loss_rtd": 0.24016699194908142, + "loss_sent": 0.22297443449497223, + "loss_sod": 0.038633398711681366, + "loss_total": 0.5017748475074768, + "step": 216299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.633352279663086, + "loss_rtd": 0.22915062308311462, + "loss_sent": 0.2604297697544098, + "loss_sod": 0.09406111389398575, + "loss_total": 0.583641529083252, + "step": 216299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.8506834506988525, + "learning_rate": 3.0303196192448075e-05, + "loss": 0.4725, + "step": 216300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.817175388336182, + "loss_rtd": 0.24774198234081268, + "loss_sent": 0.08555918186903, + "loss_sod": 0.04189221188426018, + "loss_total": 0.37519338726997375, + "step": 216399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.58889627456665, + "loss_rtd": 0.257211297750473, + "loss_sent": 0.40883737802505493, + "loss_sod": 0.029644910246133804, + "loss_total": 0.6956936120986938, + "step": 216399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.9701164364814758, + "learning_rate": 3.0274032922108465e-05, + "loss": 0.4728, + "step": 216400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.23891544342041, + "loss_rtd": 0.23464882373809814, + "loss_sent": 0.029688643291592598, + "loss_sod": 0.08234697580337524, + "loss_total": 0.34668445587158203, + "step": 216499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.186557292938232, + "loss_rtd": 0.22171097993850708, + "loss_sent": 0.0829959437251091, + "loss_sod": 0.014110023155808449, + "loss_total": 0.3188169598579407, + "step": 216499 + }, + { + "epoch": 0.017, + "grad_norm": 0.6848690509796143, + "learning_rate": 3.024487759738602e-05, + "loss": 0.4425, + "step": 216500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.832528591156006, + "loss_rtd": 0.23400872945785522, + "loss_sent": 0.061081450432538986, + "loss_sod": 0.055188536643981934, + "loss_total": 0.35027870535850525, + "step": 216599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.990908622741699, + "loss_rtd": 0.25657305121421814, + "loss_sent": 0.15799954533576965, + "loss_sod": 0.04473128914833069, + "loss_total": 0.4593038856983185, + "step": 216599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.826411783695221, + "learning_rate": 3.021573023002453e-05, + "loss": 0.449, + "step": 216600 + }, + { + "epoch": 0.017398, + "loss_gen": 6.292687892913818, + "loss_rtd": 0.24653609097003937, + "loss_sent": 0.1989891529083252, + "loss_sod": 0.1252911388874054, + "loss_total": 0.5708163976669312, + "step": 216699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.649014472961426, + "loss_rtd": 0.24311646819114685, + "loss_sent": 0.10323931276798248, + "loss_sod": 0.03465801477432251, + "loss_total": 0.38101381063461304, + "step": 216699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.5302157402038574, + "learning_rate": 3.0186590831764534e-05, + "loss": 0.467, + "step": 216700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.707921981811523, + "loss_rtd": 0.2469438761472702, + "loss_sent": 0.4266524314880371, + "loss_sod": 0.0603007897734642, + "loss_total": 0.7338970899581909, + "step": 216799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.979853630065918, + "loss_rtd": 0.25102096796035767, + "loss_sent": 0.1520538479089737, + "loss_sod": 0.01745704561471939, + "loss_total": 0.42053186893463135, + "step": 216799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.536612629890442, + "learning_rate": 3.015745941434338e-05, + "loss": 0.4784, + "step": 216800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.374979019165039, + "loss_rtd": 0.21934618055820465, + "loss_sent": 0.08120442926883698, + "loss_sod": 0.0021703036036342382, + "loss_total": 0.30272090435028076, + "step": 216899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.738278865814209, + "loss_rtd": 0.24041594564914703, + "loss_sent": 0.19931069016456604, + "loss_sod": 0.009540720842778683, + "loss_total": 0.44926735758781433, + "step": 216899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.6462528705596924, + "learning_rate": 3.0128335989495205e-05, + "loss": 0.4729, + "step": 216900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.295807838439941, + "loss_rtd": 0.21960827708244324, + "loss_sent": 0.11986997723579407, + "loss_sod": 0.11057955026626587, + "loss_total": 0.4500578045845032, + "step": 216999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.198234558105469, + "loss_rtd": 0.22012540698051453, + "loss_sent": 0.00014326247037388384, + "loss_sod": 0.13883469998836517, + "loss_total": 0.35910335183143616, + "step": 216999 + }, + { + "epoch": 0.018, + "grad_norm": 0.9709426760673523, + "learning_rate": 3.0099220568950915e-05, + "loss": 0.4694, + "step": 217000 + }, + { + "epoch": 0.018, + "eval_loss": 0.4433152377605438, + "eval_runtime": 152.6746, + "eval_samples_per_second": 101.15, + "eval_steps_per_second": 0.793, + "step": 217000 + }, + { + "epoch": 0.018198, + "loss_gen": 4.91729736328125, + "loss_rtd": 0.20390455424785614, + "loss_sent": 3.083745832554996e-05, + "loss_sod": 0.11846113204956055, + "loss_total": 0.32239654660224915, + "step": 217099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.067019939422607, + "loss_rtd": 0.2096415013074875, + "loss_sent": 0.00343311601318419, + "loss_sod": 0.10725976526737213, + "loss_total": 0.32033437490463257, + "step": 217099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.031674861907959, + "learning_rate": 3.0070113164438197e-05, + "loss": 0.4601, + "step": 217100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.6049909591674805, + "loss_rtd": 0.23252132534980774, + "loss_sent": 0.13186942040920258, + "loss_sod": 0.02871989831328392, + "loss_total": 0.39311063289642334, + "step": 217199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.454476356506348, + "loss_rtd": 0.22120122611522675, + "loss_sent": 0.17035096883773804, + "loss_sod": 0.028199102729558945, + "loss_total": 0.41975128650665283, + "step": 217199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.1801053285598755, + "learning_rate": 3.0041013787681516e-05, + "loss": 0.455, + "step": 217200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.490360260009766, + "loss_rtd": 0.24848321080207825, + "loss_sent": 0.18496976792812347, + "loss_sod": 0.03594896197319031, + "loss_total": 0.4694019556045532, + "step": 217299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.519717693328857, + "loss_rtd": 0.20764923095703125, + "loss_sent": 0.09661871194839478, + "loss_sod": 0.046689342707395554, + "loss_total": 0.3509572744369507, + "step": 217299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.180071234703064, + "learning_rate": 3.0011922450402096e-05, + "loss": 0.4556, + "step": 217300 + }, + { + "epoch": 0.018798, + "loss_gen": 6.0195794105529785, + "loss_rtd": 0.25551044940948486, + "loss_sent": 0.23748035728931427, + "loss_sod": 0.05200285464525223, + "loss_total": 0.5449936389923096, + "step": 217399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.733720302581787, + "loss_rtd": 0.23086370527744293, + "loss_sent": 0.08993496000766754, + "loss_sod": 0.019307363778352737, + "loss_total": 0.3401060104370117, + "step": 217399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.8362905979156494, + "learning_rate": 2.9982839164317917e-05, + "loss": 0.4708, + "step": 217400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.469005584716797, + "loss_rtd": 0.2607032060623169, + "loss_sent": 0.31628668308258057, + "loss_sod": 0.019380606710910797, + "loss_total": 0.5963704586029053, + "step": 217499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.559659481048584, + "loss_rtd": 0.25695350766181946, + "loss_sent": 0.27798107266426086, + "loss_sod": 0.09437217563390732, + "loss_total": 0.6293067932128906, + "step": 217499 + }, + { + "epoch": 0.019, + "grad_norm": 1.2468453645706177, + "learning_rate": 2.9953763941143735e-05, + "loss": 0.4776, + "step": 217500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.993227005004883, + "loss_rtd": 0.22093051671981812, + "loss_sent": 3.197171099600382e-05, + "loss_sod": 0.10044729709625244, + "loss_total": 0.32140979170799255, + "step": 217599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.977768898010254, + "loss_rtd": 0.22226789593696594, + "loss_sent": 3.39422476827167e-05, + "loss_sod": 0.10714846849441528, + "loss_total": 0.3294503092765808, + "step": 217599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.0557632446289062, + "learning_rate": 2.9924696792591034e-05, + "loss": 0.4656, + "step": 217600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.729475021362305, + "loss_rtd": 0.24771659076213837, + "loss_sent": 0.19890882074832916, + "loss_sod": 0.014270318672060966, + "loss_total": 0.46089571714401245, + "step": 217699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.104670524597168, + "loss_rtd": 0.2094206064939499, + "loss_sent": 0.010839566588401794, + "loss_sod": 0.17914612591266632, + "loss_total": 0.3994063138961792, + "step": 217699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.872069239616394, + "learning_rate": 2.989563773036807e-05, + "loss": 0.4609, + "step": 217700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.684610843658447, + "loss_rtd": 0.23152419924736023, + "loss_sent": 0.2436731457710266, + "loss_sod": 0.037578895688056946, + "loss_total": 0.512776255607605, + "step": 217799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.681646823883057, + "loss_rtd": 0.22867180407047272, + "loss_sent": 0.13524675369262695, + "loss_sod": 0.02578415721654892, + "loss_total": 0.389702707529068, + "step": 217799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.6499189138412476, + "learning_rate": 2.9866586766179815e-05, + "loss": 0.4668, + "step": 217800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.7797136306762695, + "loss_rtd": 0.22893859446048737, + "loss_sent": 0.08922241628170013, + "loss_sod": 0.049116350710392, + "loss_total": 0.3672773838043213, + "step": 217899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.348376750946045, + "loss_rtd": 0.22028933465480804, + "loss_sent": 0.009248141199350357, + "loss_sod": 0.09586643427610397, + "loss_total": 0.3254038989543915, + "step": 217899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.9610381722450256, + "learning_rate": 2.9837543911728017e-05, + "loss": 0.4883, + "step": 217900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.335222244262695, + "loss_rtd": 0.2449258416891098, + "loss_sent": 0.3830544054508209, + "loss_sod": 0.04035145789384842, + "loss_total": 0.6683317422866821, + "step": 217999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.649767875671387, + "loss_rtd": 0.2364692986011505, + "loss_sent": 0.21286125481128693, + "loss_sod": 0.0946025475859642, + "loss_total": 0.543933093547821, + "step": 217999 + }, + { + "epoch": 0.02, + "grad_norm": 1.0370113849639893, + "learning_rate": 2.980850917871111e-05, + "loss": 0.4561, + "step": 218000 + }, + { + "epoch": 0.02, + "eval_loss": 0.44908398389816284, + "eval_runtime": 154.2902, + "eval_samples_per_second": 100.091, + "eval_steps_per_second": 0.784, + "step": 218000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.748846530914307, + "loss_rtd": 0.25088027119636536, + "loss_sent": 0.7614478468894958, + "loss_sod": 0.08117273449897766, + "loss_total": 1.0935008525848389, + "step": 218099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.721073627471924, + "loss_rtd": 0.25364333391189575, + "loss_sent": 0.12141552567481995, + "loss_sod": 0.02959049493074417, + "loss_total": 0.4046493470668793, + "step": 218099 + }, + { + "epoch": 0.0202, + "grad_norm": 3.803954601287842, + "learning_rate": 2.977948257882429e-05, + "loss": 0.4768, + "step": 218100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.563726902008057, + "loss_rtd": 0.24683329463005066, + "loss_sent": 0.17280761897563934, + "loss_sod": 0.041086986660957336, + "loss_total": 0.4607279300689697, + "step": 218199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.701605796813965, + "loss_rtd": 0.24776841700077057, + "loss_sent": 0.22824575006961823, + "loss_sod": 0.09738124161958694, + "loss_total": 0.5733954310417175, + "step": 218199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.22172212600708, + "learning_rate": 2.975046412375947e-05, + "loss": 0.4874, + "step": 218200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.798618793487549, + "loss_rtd": 0.26276931166648865, + "loss_sent": 0.3364769220352173, + "loss_sod": 0.030972769483923912, + "loss_total": 0.6302189826965332, + "step": 218299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.930161476135254, + "loss_rtd": 0.2547016441822052, + "loss_sent": 0.22650833427906036, + "loss_sod": 0.15774983167648315, + "loss_total": 0.6389598250389099, + "step": 218299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.2376837730407715, + "learning_rate": 2.9721453825205286e-05, + "loss": 0.4583, + "step": 218300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.668736934661865, + "loss_rtd": 0.23904027044773102, + "loss_sent": 0.32486769556999207, + "loss_sod": 0.019937217235565186, + "loss_total": 0.5838451385498047, + "step": 218399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.828061103820801, + "loss_rtd": 0.22369907796382904, + "loss_sent": 0.18765747547149658, + "loss_sod": 0.023204054683446884, + "loss_total": 0.4345605969429016, + "step": 218399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8148056864738464, + "learning_rate": 2.969245169484709e-05, + "loss": 0.4571, + "step": 218400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.631268501281738, + "loss_rtd": 0.24726520478725433, + "loss_sent": 0.08655589818954468, + "loss_sod": 0.015350056812167168, + "loss_total": 0.34917116165161133, + "step": 218499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.873820781707764, + "loss_rtd": 0.24602143466472626, + "loss_sent": 0.1784927397966385, + "loss_sod": 0.039597295224666595, + "loss_total": 0.46411144733428955, + "step": 218499 + }, + { + "epoch": 0.021, + "grad_norm": 0.5884883403778076, + "learning_rate": 2.9663457744366896e-05, + "loss": 0.4745, + "step": 218500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.680235862731934, + "loss_rtd": 0.23177313804626465, + "loss_sent": 0.2758804261684418, + "loss_sod": 0.02068045549094677, + "loss_total": 0.5283340215682983, + "step": 218599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.674758434295654, + "loss_rtd": 0.23199044167995453, + "loss_sent": 0.010449514724314213, + "loss_sod": 0.14128607511520386, + "loss_total": 0.3837260603904724, + "step": 218599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.9589932560920715, + "learning_rate": 2.96344719854435e-05, + "loss": 0.4651, + "step": 218600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.597902297973633, + "loss_rtd": 0.23603561520576477, + "loss_sent": 0.2469548135995865, + "loss_sod": 0.05467989668250084, + "loss_total": 0.5376703143119812, + "step": 218699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.73699426651001, + "loss_rtd": 0.25086814165115356, + "loss_sent": 0.09403558075428009, + "loss_sod": 0.07857215404510498, + "loss_total": 0.42347586154937744, + "step": 218699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.5063176155090332, + "learning_rate": 2.960549442975236e-05, + "loss": 0.4792, + "step": 218700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.861974716186523, + "loss_rtd": 0.2633379101753235, + "loss_sent": 0.0994805097579956, + "loss_sod": 0.16226577758789062, + "loss_total": 0.5250841975212097, + "step": 218799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.505153179168701, + "loss_rtd": 0.24079662561416626, + "loss_sent": 0.1348225623369217, + "loss_sod": 0.04609474539756775, + "loss_total": 0.4217139482498169, + "step": 218799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.833949625492096, + "learning_rate": 2.957652508896561e-05, + "loss": 0.4548, + "step": 218800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.6427226066589355, + "loss_rtd": 0.24502190947532654, + "loss_sent": 0.05857028812170029, + "loss_sod": 0.025192689150571823, + "loss_total": 0.32878488302230835, + "step": 218899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.44520378112793, + "loss_rtd": 0.24716676771640778, + "loss_sent": 0.12982669472694397, + "loss_sod": 0.08698557317256927, + "loss_total": 0.463979035615921, + "step": 218899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.9194590449333191, + "learning_rate": 2.9547563974752123e-05, + "loss": 0.4728, + "step": 218900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.841884613037109, + "loss_rtd": 0.23181861639022827, + "loss_sent": 0.027558909729123116, + "loss_sod": 0.11836151778697968, + "loss_total": 0.3777390718460083, + "step": 218999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.55942440032959, + "loss_rtd": 0.2494911253452301, + "loss_sent": 0.27481240034103394, + "loss_sod": 0.06376910954713821, + "loss_total": 0.588072657585144, + "step": 218999 + }, + { + "epoch": 0.022, + "grad_norm": 1.2120897769927979, + "learning_rate": 2.9518611098777417e-05, + "loss": 0.4712, + "step": 219000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4352158010005951, + "eval_runtime": 152.1307, + "eval_samples_per_second": 101.511, + "eval_steps_per_second": 0.795, + "step": 219000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.482469081878662, + "loss_rtd": 0.22782304883003235, + "loss_sent": 0.2615703046321869, + "loss_sod": 0.032145604491233826, + "loss_total": 0.5215389728546143, + "step": 219099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.551905155181885, + "loss_rtd": 0.24674688279628754, + "loss_sent": 0.03384366258978844, + "loss_sod": 0.0707610696554184, + "loss_total": 0.35135161876678467, + "step": 219099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.7032610774040222, + "learning_rate": 2.9489666472703714e-05, + "loss": 0.4543, + "step": 219100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.787640571594238, + "loss_rtd": 0.2396640032529831, + "loss_sent": 0.3986879587173462, + "loss_sod": 0.1018485352396965, + "loss_total": 0.7402005195617676, + "step": 219199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.247024059295654, + "loss_rtd": 0.24130845069885254, + "loss_sent": 0.08370120078325272, + "loss_sod": 0.040330611169338226, + "loss_total": 0.3653402626514435, + "step": 219199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.0427066087722778, + "learning_rate": 2.9460730108189895e-05, + "loss": 0.4808, + "step": 219200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.795830249786377, + "loss_rtd": 0.24341337382793427, + "loss_sent": 0.07364793866872787, + "loss_sod": 0.0308077372610569, + "loss_total": 0.34786906838417053, + "step": 219299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.655640125274658, + "loss_rtd": 0.24741683900356293, + "loss_sent": 0.29452285170555115, + "loss_sod": 0.13073895871639252, + "loss_total": 0.6726786494255066, + "step": 219299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.4652475118637085, + "learning_rate": 2.943180201689154e-05, + "loss": 0.4561, + "step": 219300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.646549224853516, + "loss_rtd": 0.23614893853664398, + "loss_sent": 0.4527502655982971, + "loss_sod": 0.0017168745398521423, + "loss_total": 0.6906160712242126, + "step": 219399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.653653144836426, + "loss_rtd": 0.2556692957878113, + "loss_sent": 0.07633604109287262, + "loss_sod": 0.021083682775497437, + "loss_total": 0.35308903455734253, + "step": 219399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.9115483164787292, + "learning_rate": 2.940288221046087e-05, + "loss": 0.4611, + "step": 219400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.757050514221191, + "loss_rtd": 0.240886852145195, + "loss_sent": 0.09002269059419632, + "loss_sod": 0.015488551929593086, + "loss_total": 0.34639808535575867, + "step": 219499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.640953063964844, + "loss_rtd": 0.22654040157794952, + "loss_sent": 0.17745435237884521, + "loss_sod": 0.012498008087277412, + "loss_total": 0.416492760181427, + "step": 219499 + }, + { + "epoch": 0.023, + "grad_norm": 0.859093964099884, + "learning_rate": 2.9373970700546783e-05, + "loss": 0.4575, + "step": 219500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.375519752502441, + "loss_rtd": 0.21631726622581482, + "loss_sent": 0.04595290496945381, + "loss_sod": 0.06255972385406494, + "loss_total": 0.32482990622520447, + "step": 219599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.680385112762451, + "loss_rtd": 0.22726675868034363, + "loss_sent": 0.10205487906932831, + "loss_sod": 0.06129787489771843, + "loss_total": 0.39061951637268066, + "step": 219599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.7443668842315674, + "learning_rate": 2.9345067498794827e-05, + "loss": 0.4789, + "step": 219600 + }, + { + "epoch": 0.023398, + "loss_gen": 4.975682258605957, + "loss_rtd": 0.23272277414798737, + "loss_sent": 2.9425633329083212e-05, + "loss_sod": 0.18261456489562988, + "loss_total": 0.4153667390346527, + "step": 219699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.370975494384766, + "loss_rtd": 0.2152707278728485, + "loss_sent": 0.03215698525309563, + "loss_sod": 0.04176726192235947, + "loss_total": 0.2891949713230133, + "step": 219699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.7752969264984131, + "learning_rate": 2.9316172616847216e-05, + "loss": 0.4752, + "step": 219700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.661818981170654, + "loss_rtd": 0.2615804672241211, + "loss_sent": 0.3608298897743225, + "loss_sod": 0.018724482506513596, + "loss_total": 0.6411348581314087, + "step": 219799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.536441326141357, + "loss_rtd": 0.2420029491186142, + "loss_sent": 0.17182713747024536, + "loss_sod": 0.12364213168621063, + "loss_total": 0.5374722480773926, + "step": 219799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.6802945137023926, + "learning_rate": 2.92872860663428e-05, + "loss": 0.4949, + "step": 219800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.811741352081299, + "loss_rtd": 0.24488957226276398, + "loss_sent": 0.5066856741905212, + "loss_sod": 0.03712468221783638, + "loss_total": 0.7886999249458313, + "step": 219899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.2717061042785645, + "loss_rtd": 0.21546435356140137, + "loss_sent": 0.028289644047617912, + "loss_sod": 0.07698732614517212, + "loss_total": 0.32074132561683655, + "step": 219899 + }, + { + "epoch": 0.0238, + "grad_norm": 2.3763620853424072, + "learning_rate": 2.925840785891708e-05, + "loss": 0.453, + "step": 219900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.716613292694092, + "loss_rtd": 0.21320635080337524, + "loss_sent": 0.00033508040360175073, + "loss_sod": 0.1457812786102295, + "loss_total": 0.3593226969242096, + "step": 219999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.380373477935791, + "loss_rtd": 0.19935832917690277, + "loss_sent": 0.0003447000926826149, + "loss_sod": 0.29420581459999084, + "loss_total": 0.4939088225364685, + "step": 219999 + }, + { + "epoch": 0.024, + "grad_norm": 1.152647852897644, + "learning_rate": 2.9229538006202195e-05, + "loss": 0.4615, + "step": 220000 + }, + { + "epoch": 0.024, + "eval_loss": 0.44127845764160156, + "eval_runtime": 152.7968, + "eval_samples_per_second": 101.069, + "eval_steps_per_second": 0.792, + "step": 220000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.725831508636475, + "loss_rtd": 0.2538098990917206, + "loss_sent": 0.6349098086357117, + "loss_sod": 0.015414290130138397, + "loss_total": 0.9041340351104736, + "step": 220099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.566556453704834, + "loss_rtd": 0.246371790766716, + "loss_sent": 0.28911134600639343, + "loss_sod": 0.014819586649537086, + "loss_total": 0.5503027439117432, + "step": 220099 + }, + { + "epoch": 0.0242, + "grad_norm": 2.324589729309082, + "learning_rate": 2.920067651982692e-05, + "loss": 0.4619, + "step": 220100 + }, + { + "epoch": 0.024398, + "loss_gen": 4.885288715362549, + "loss_rtd": 0.20006316900253296, + "loss_sent": 0.00020059717644471675, + "loss_sod": 0.133328378200531, + "loss_total": 0.33359214663505554, + "step": 220199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.525906085968018, + "loss_rtd": 0.2554265558719635, + "loss_sent": 0.14584718644618988, + "loss_sod": 0.056197769939899445, + "loss_total": 0.45747148990631104, + "step": 220199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.9548836350440979, + "learning_rate": 2.917182341141665e-05, + "loss": 0.4787, + "step": 220200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.831428050994873, + "loss_rtd": 0.22891171276569366, + "loss_sent": 0.16352984309196472, + "loss_sod": 0.045935772359371185, + "loss_total": 0.438377320766449, + "step": 220299 + }, + { + "epoch": 0.024598, + "loss_gen": 4.988731384277344, + "loss_rtd": 0.21196728944778442, + "loss_sent": 0.07420089095830917, + "loss_sod": 0.03989127650856972, + "loss_total": 0.3260594606399536, + "step": 220299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.7816998362541199, + "learning_rate": 2.9142978692593435e-05, + "loss": 0.4558, + "step": 220300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.981966018676758, + "loss_rtd": 0.23611585795879364, + "loss_sent": 0.0951041504740715, + "loss_sod": 0.04287397116422653, + "loss_total": 0.3740939795970917, + "step": 220399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.942696571350098, + "loss_rtd": 0.25359106063842773, + "loss_sent": 0.12361510097980499, + "loss_sod": 0.20155510306358337, + "loss_total": 0.5787612795829773, + "step": 220399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.8744722604751587, + "learning_rate": 2.911414237497589e-05, + "loss": 0.4598, + "step": 220400 + }, + { + "epoch": 0.024998, + "loss_gen": 6.028652191162109, + "loss_rtd": 0.24679096043109894, + "loss_sent": 0.15192911028862, + "loss_sod": 0.032262980937957764, + "loss_total": 0.4309830367565155, + "step": 220499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.859983444213867, + "loss_rtd": 0.22552786767482758, + "loss_sent": 0.21342353522777557, + "loss_sod": 0.05349688231945038, + "loss_total": 0.49244827032089233, + "step": 220499 + }, + { + "epoch": 0.025, + "grad_norm": 1.909841537475586, + "learning_rate": 2.9085314470179298e-05, + "loss": 0.4581, + "step": 220500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.959765434265137, + "loss_rtd": 0.248510479927063, + "loss_sent": 0.3497915267944336, + "loss_sod": 0.04938121140003204, + "loss_total": 0.6476832032203674, + "step": 220599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.7115397453308105, + "loss_rtd": 0.24150577187538147, + "loss_sent": 0.1365508884191513, + "loss_sod": 0.11778974533081055, + "loss_total": 0.49584639072418213, + "step": 220599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.3502163887023926, + "learning_rate": 2.9056494989815535e-05, + "loss": 0.4622, + "step": 220600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.561402797698975, + "loss_rtd": 0.24348825216293335, + "loss_sent": 0.2194337546825409, + "loss_sod": 0.006533219013363123, + "loss_total": 0.46945521235466003, + "step": 220699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.872133255004883, + "loss_rtd": 0.2291983813047409, + "loss_sent": 0.12904326617717743, + "loss_sod": 0.018771955743432045, + "loss_total": 0.377013623714447, + "step": 220699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8810868263244629, + "learning_rate": 2.902768394549311e-05, + "loss": 0.4449, + "step": 220700 + }, + { + "epoch": 0.025598, + "loss_gen": 6.152884483337402, + "loss_rtd": 0.24259310960769653, + "loss_sent": 0.03288838267326355, + "loss_sod": 0.3722909092903137, + "loss_total": 0.6477724313735962, + "step": 220799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.881531715393066, + "loss_rtd": 0.22413040697574615, + "loss_sent": 0.21983295679092407, + "loss_sod": 0.02162952907383442, + "loss_total": 0.4655928909778595, + "step": 220799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.0890847444534302, + "learning_rate": 2.8998881348817057e-05, + "loss": 0.4836, + "step": 220800 + }, + { + "epoch": 0.025798, + "loss_gen": 6.111624240875244, + "loss_rtd": 0.2526547312736511, + "loss_sent": 0.07042557746171951, + "loss_sod": 0.07130489498376846, + "loss_total": 0.3943851888179779, + "step": 220899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.344731330871582, + "loss_rtd": 0.23291535675525665, + "loss_sent": 0.10674972087144852, + "loss_sod": 0.020333917811512947, + "loss_total": 0.35999900102615356, + "step": 220899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.694638192653656, + "learning_rate": 2.897008721138909e-05, + "loss": 0.4656, + "step": 220900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.454096794128418, + "loss_rtd": 0.23449811339378357, + "loss_sent": 0.0007230049232020974, + "loss_sod": 0.15796786546707153, + "loss_total": 0.3931889832019806, + "step": 220999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.9117350578308105, + "loss_rtd": 0.20750699937343597, + "loss_sent": 0.00227547250688076, + "loss_sod": 0.05858577787876129, + "loss_total": 0.2683682441711426, + "step": 220999 + }, + { + "epoch": 0.026, + "grad_norm": 1.0255084037780762, + "learning_rate": 2.894130154480748e-05, + "loss": 0.4585, + "step": 221000 + }, + { + "epoch": 0.026, + "eval_loss": 0.4414041340351105, + "eval_runtime": 151.9668, + "eval_samples_per_second": 101.621, + "eval_steps_per_second": 0.796, + "step": 221000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.129405975341797, + "loss_rtd": 0.2178030163049698, + "loss_sent": 0.12021706998348236, + "loss_sod": 0.10975439846515656, + "loss_total": 0.4477744698524475, + "step": 221099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.916364669799805, + "loss_rtd": 0.22973603010177612, + "loss_sent": 0.03431417793035507, + "loss_sod": 0.03355777636170387, + "loss_total": 0.29760798811912537, + "step": 221099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9078941345214844, + "learning_rate": 2.8912524360667104e-05, + "loss": 0.4424, + "step": 221100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.452906131744385, + "loss_rtd": 0.24439242482185364, + "loss_sent": 0.17114460468292236, + "loss_sod": 0.01148926094174385, + "loss_total": 0.42702630162239075, + "step": 221199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.052069187164307, + "loss_rtd": 0.2224331945180893, + "loss_sent": 2.770273022179026e-05, + "loss_sod": 0.10630861669778824, + "loss_total": 0.3287695348262787, + "step": 221199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.8697308897972107, + "learning_rate": 2.8883755670559382e-05, + "loss": 0.4663, + "step": 221200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.732661247253418, + "loss_rtd": 0.21897049248218536, + "loss_sent": 0.4717823565006256, + "loss_sod": 0.039517201483249664, + "loss_total": 0.7302700281143188, + "step": 221299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.809877872467041, + "loss_rtd": 0.20634424686431885, + "loss_sent": 0.3764127492904663, + "loss_sod": 0.06832806766033173, + "loss_total": 0.6510850787162781, + "step": 221299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.94827139377594, + "learning_rate": 2.8854995486072344e-05, + "loss": 0.4781, + "step": 221300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.611896991729736, + "loss_rtd": 0.22673384845256805, + "loss_sent": 0.25589561462402344, + "loss_sod": 0.09249468892812729, + "loss_total": 0.5751241445541382, + "step": 221399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.919143199920654, + "loss_rtd": 0.2417079657316208, + "loss_sent": 0.4991658329963684, + "loss_sod": 0.023417092859745026, + "loss_total": 0.7642909288406372, + "step": 221399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.8854585886001587, + "learning_rate": 2.8826243818790598e-05, + "loss": 0.4629, + "step": 221400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.547589302062988, + "loss_rtd": 0.23697513341903687, + "loss_sent": 0.05889887735247612, + "loss_sod": 0.1516200751066208, + "loss_total": 0.4474940896034241, + "step": 221499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.313899040222168, + "loss_rtd": 0.22380518913269043, + "loss_sent": 0.04938521981239319, + "loss_sod": 0.10741880536079407, + "loss_total": 0.3806092143058777, + "step": 221499 + }, + { + "epoch": 0.027, + "grad_norm": 1.0613423585891724, + "learning_rate": 2.879750068029533e-05, + "loss": 0.4561, + "step": 221500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.850945472717285, + "loss_rtd": 0.25582897663116455, + "loss_sent": 0.11795809864997864, + "loss_sod": 0.037380725145339966, + "loss_total": 0.41116780042648315, + "step": 221599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.520864009857178, + "loss_rtd": 0.22643856704235077, + "loss_sent": 0.07080437242984772, + "loss_sod": 0.03870118409395218, + "loss_total": 0.33594411611557007, + "step": 221599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.8865393996238708, + "learning_rate": 2.8768766082164234e-05, + "loss": 0.4598, + "step": 221600 + }, + { + "epoch": 0.027398, + "loss_gen": 4.99382209777832, + "loss_rtd": 0.2131546288728714, + "loss_sent": 0.04601503536105156, + "loss_sod": 0.015454077161848545, + "loss_total": 0.2746237516403198, + "step": 221699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.551748752593994, + "loss_rtd": 0.22426831722259521, + "loss_sent": 0.09755738079547882, + "loss_sod": 0.07161170989274979, + "loss_total": 0.39343738555908203, + "step": 221699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.6296889185905457, + "learning_rate": 2.8740040035971614e-05, + "loss": 0.4535, + "step": 221700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.719409465789795, + "loss_rtd": 0.24622197449207306, + "loss_sent": 0.1148734763264656, + "loss_sod": 0.007248458918184042, + "loss_total": 0.36834388971328735, + "step": 221799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.579176425933838, + "loss_rtd": 0.24331387877464294, + "loss_sent": 0.04704314470291138, + "loss_sod": 0.04200097545981407, + "loss_total": 0.3323580026626587, + "step": 221799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.6344034075737, + "learning_rate": 2.8711322553288356e-05, + "loss": 0.474, + "step": 221800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.555872917175293, + "loss_rtd": 0.24041315913200378, + "loss_sent": 0.21580737829208374, + "loss_sod": 0.09671332687139511, + "loss_total": 0.5529338717460632, + "step": 221899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.872961521148682, + "loss_rtd": 0.23050495982170105, + "loss_sent": 0.28679120540618896, + "loss_sod": 0.15477249026298523, + "loss_total": 0.6720686554908752, + "step": 221899 + }, + { + "epoch": 0.0278, + "grad_norm": 2.2413880825042725, + "learning_rate": 2.86826136456818e-05, + "loss": 0.4668, + "step": 221900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.610729694366455, + "loss_rtd": 0.23461250960826874, + "loss_sent": 0.16489671170711517, + "loss_sod": 0.06333743035793304, + "loss_total": 0.46284663677215576, + "step": 221999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.9616851806640625, + "loss_rtd": 0.25797396898269653, + "loss_sent": 0.2884034514427185, + "loss_sod": 0.09543274343013763, + "loss_total": 0.6418101787567139, + "step": 221999 + }, + { + "epoch": 0.028, + "grad_norm": 1.0590890645980835, + "learning_rate": 2.865391332471592e-05, + "loss": 0.4597, + "step": 222000 + }, + { + "epoch": 0.028, + "eval_loss": 0.43902838230133057, + "eval_runtime": 151.969, + "eval_samples_per_second": 101.619, + "eval_steps_per_second": 0.796, + "step": 222000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.0284013748168945, + "loss_rtd": 0.23429106175899506, + "loss_sent": 2.5784262106753886e-05, + "loss_sod": 0.10376732796430588, + "loss_total": 0.3380841612815857, + "step": 222099 + }, + { + "epoch": 0.028198, + "loss_gen": 4.849533557891846, + "loss_rtd": 0.2006891965866089, + "loss_sent": 0.04447287321090698, + "loss_sod": 0.03498782962560654, + "loss_total": 0.280149906873703, + "step": 222099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.6721543073654175, + "learning_rate": 2.8625221601951203e-05, + "loss": 0.4444, + "step": 222100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.572941780090332, + "loss_rtd": 0.2390994429588318, + "loss_sent": 0.08765707910060883, + "loss_sod": 0.08156536519527435, + "loss_total": 0.40832191705703735, + "step": 222199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.066463947296143, + "loss_rtd": 0.20003828406333923, + "loss_sent": 0.0006903017056174576, + "loss_sod": 0.06834739446640015, + "loss_total": 0.26907598972320557, + "step": 222199 + }, + { + "epoch": 0.0284, + "grad_norm": 0.6420572400093079, + "learning_rate": 2.859653848894468e-05, + "loss": 0.466, + "step": 222200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.858935832977295, + "loss_rtd": 0.23716403543949127, + "loss_sent": 0.1687023937702179, + "loss_sod": 0.1139901876449585, + "loss_total": 0.5198565721511841, + "step": 222299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.699175834655762, + "loss_rtd": 0.25139546394348145, + "loss_sent": 0.182742640376091, + "loss_sod": 0.06551109999418259, + "loss_total": 0.4996492266654968, + "step": 222299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.1013721227645874, + "learning_rate": 2.8567863997249877e-05, + "loss": 0.4816, + "step": 222300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.0963897705078125, + "loss_rtd": 0.2102944552898407, + "loss_sent": 0.024991141632199287, + "loss_sod": 0.089943528175354, + "loss_total": 0.32522913813591003, + "step": 222399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.822452545166016, + "loss_rtd": 0.21162298321723938, + "loss_sent": 0.17472924292087555, + "loss_sod": 0.0625510960817337, + "loss_total": 0.44890332221984863, + "step": 222399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.0727407932281494, + "learning_rate": 2.85391981384169e-05, + "loss": 0.4592, + "step": 222400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.756165981292725, + "loss_rtd": 0.24165068566799164, + "loss_sent": 0.25815513730049133, + "loss_sod": 0.01099803950637579, + "loss_total": 0.5108038187026978, + "step": 222499 + }, + { + "epoch": 0.028998, + "loss_gen": 6.250514030456543, + "loss_rtd": 0.2295823097229004, + "loss_sent": 0.08269616961479187, + "loss_sod": 0.059744883328676224, + "loss_total": 0.3720233738422394, + "step": 222499 + }, + { + "epoch": 0.029, + "grad_norm": 1.047006368637085, + "learning_rate": 2.8510540923992342e-05, + "loss": 0.4624, + "step": 222500 + }, + { + "epoch": 0.029198, + "loss_gen": 6.0060272216796875, + "loss_rtd": 0.2313002347946167, + "loss_sent": 0.12705208361148834, + "loss_sod": 0.0676863044500351, + "loss_total": 0.42603862285614014, + "step": 222599 + }, + { + "epoch": 0.029198, + "loss_gen": 6.095234394073486, + "loss_rtd": 0.25022509694099426, + "loss_sent": 0.16193456947803497, + "loss_sod": 0.050696954131126404, + "loss_total": 0.46285659074783325, + "step": 222599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.7585253715515137, + "learning_rate": 2.8481892365519346e-05, + "loss": 0.461, + "step": 222600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.729217529296875, + "loss_rtd": 0.20401251316070557, + "loss_sent": 0.0006949233938939869, + "loss_sod": 0.015092398971319199, + "loss_total": 0.21979984641075134, + "step": 222699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.775670528411865, + "loss_rtd": 0.24301104247570038, + "loss_sent": 0.37751805782318115, + "loss_sod": 0.06195702776312828, + "loss_total": 0.6824861168861389, + "step": 222699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.2970914840698242, + "learning_rate": 2.8453252474537516e-05, + "loss": 0.4537, + "step": 222700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.738706588745117, + "loss_rtd": 0.21717077493667603, + "loss_sent": 0.5664131045341492, + "loss_sod": 0.03647429123520851, + "loss_total": 0.8200581669807434, + "step": 222799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.725508213043213, + "loss_rtd": 0.2441895753145218, + "loss_sent": 0.3335700035095215, + "loss_sod": 0.1268690526485443, + "loss_total": 0.7046286463737488, + "step": 222799 + }, + { + "epoch": 0.0296, + "grad_norm": 3.333723783493042, + "learning_rate": 2.842462126258302e-05, + "loss": 0.4587, + "step": 222800 + }, + { + "epoch": 0.029798, + "loss_gen": 6.071145534515381, + "loss_rtd": 0.2494257241487503, + "loss_sent": 0.10561519861221313, + "loss_sod": 0.05293821170926094, + "loss_total": 0.4079791307449341, + "step": 222899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.638516902923584, + "loss_rtd": 0.22418633103370667, + "loss_sent": 0.595332145690918, + "loss_sod": 0.08956931531429291, + "loss_total": 0.9090877771377563, + "step": 222899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.1928447484970093, + "learning_rate": 2.839599874118849e-05, + "loss": 0.4661, + "step": 222900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.490230083465576, + "loss_rtd": 0.2168295681476593, + "loss_sent": 0.18578708171844482, + "loss_sod": 0.021466167643666267, + "loss_total": 0.42408281564712524, + "step": 222999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.702321529388428, + "loss_rtd": 0.21663211286067963, + "loss_sent": 0.21463170647621155, + "loss_sod": 0.10396278649568558, + "loss_total": 0.535226583480835, + "step": 222999 + }, + { + "epoch": 0.03, + "grad_norm": 1.1903234720230103, + "learning_rate": 2.83673849218831e-05, + "loss": 0.4731, + "step": 223000 + }, + { + "epoch": 0.03, + "eval_loss": 0.43680015206336975, + "eval_runtime": 152.7027, + "eval_samples_per_second": 101.131, + "eval_steps_per_second": 0.792, + "step": 223000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.65090799331665, + "loss_rtd": 0.2267831563949585, + "loss_sent": 0.06457176059484482, + "loss_sod": 0.030351784080266953, + "loss_total": 0.32170671224594116, + "step": 223099 + }, + { + "epoch": 0.030198, + "loss_gen": 4.975265979766846, + "loss_rtd": 0.1984621286392212, + "loss_sent": 0.023878054693341255, + "loss_sod": 0.0772026777267456, + "loss_total": 0.2995428740978241, + "step": 223099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.8430375456809998, + "learning_rate": 2.8338779816192464e-05, + "loss": 0.4688, + "step": 223100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.6817626953125, + "loss_rtd": 0.23664012551307678, + "loss_sent": 0.4942237436771393, + "loss_sod": 0.03030521608889103, + "loss_total": 0.7611690759658813, + "step": 223199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.505131721496582, + "loss_rtd": 0.2329002320766449, + "loss_sent": 0.1338961273431778, + "loss_sod": 0.04271288588643074, + "loss_total": 0.40950924158096313, + "step": 223199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.7884716987609863, + "learning_rate": 2.8310183435638727e-05, + "loss": 0.4496, + "step": 223200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.743865013122559, + "loss_rtd": 0.23399142920970917, + "loss_sent": 0.5928657650947571, + "loss_sod": 0.07470916211605072, + "loss_total": 0.9015663862228394, + "step": 223299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.657670021057129, + "loss_rtd": 0.24267837405204773, + "loss_sent": 0.0339723564684391, + "loss_sod": 0.12458410114049911, + "loss_total": 0.40123483538627625, + "step": 223299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.2713204622268677, + "learning_rate": 2.82815957917405e-05, + "loss": 0.4807, + "step": 223300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.44834041595459, + "loss_rtd": 0.22362852096557617, + "loss_sent": 0.1513085812330246, + "loss_sod": 0.03652561083436012, + "loss_total": 0.4114627242088318, + "step": 223399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.5224151611328125, + "loss_rtd": 0.24534864723682404, + "loss_sent": 0.10560718923807144, + "loss_sod": 0.10585036873817444, + "loss_total": 0.4568062126636505, + "step": 223399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.9982521533966064, + "learning_rate": 2.8253016896012918e-05, + "loss": 0.4798, + "step": 223400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.6275129318237305, + "loss_rtd": 0.2267647683620453, + "loss_sent": 0.2952210009098053, + "loss_sod": 0.12058262526988983, + "loss_total": 0.6425683498382568, + "step": 223499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.795315265655518, + "loss_rtd": 0.2462725043296814, + "loss_sent": 0.17645324766635895, + "loss_sod": 0.011587779968976974, + "loss_total": 0.4343135356903076, + "step": 223499 + }, + { + "epoch": 0.031, + "grad_norm": 0.9927622675895691, + "learning_rate": 2.822444675996751e-05, + "loss": 0.4525, + "step": 223500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.699234485626221, + "loss_rtd": 0.22923468053340912, + "loss_sent": 0.2931504249572754, + "loss_sod": 0.027988888323307037, + "loss_total": 0.5503740310668945, + "step": 223599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.68513298034668, + "loss_rtd": 0.23517471551895142, + "loss_sent": 0.19641363620758057, + "loss_sod": 0.024201134219765663, + "loss_total": 0.4557894766330719, + "step": 223599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.7236945629119873, + "learning_rate": 2.8195885395112343e-05, + "loss": 0.4786, + "step": 223600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.589235305786133, + "loss_rtd": 0.23837798833847046, + "loss_sent": 0.2533192038536072, + "loss_sod": 0.09866724908351898, + "loss_total": 0.5903644561767578, + "step": 223699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.589600563049316, + "loss_rtd": 0.24535444378852844, + "loss_sent": 0.18812699615955353, + "loss_sod": 0.043122511357069016, + "loss_total": 0.4766039252281189, + "step": 223699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.8318504691123962, + "learning_rate": 2.816733281295195e-05, + "loss": 0.4659, + "step": 223700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.715664863586426, + "loss_rtd": 0.24356691539287567, + "loss_sent": 0.2261410802602768, + "loss_sod": 0.04271980747580528, + "loss_total": 0.512427806854248, + "step": 223799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.816354751586914, + "loss_rtd": 0.2568530738353729, + "loss_sent": 0.15311940014362335, + "loss_sod": 0.02241531014442444, + "loss_total": 0.4323877692222595, + "step": 223799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.9049277305603027, + "learning_rate": 2.8138789024987268e-05, + "loss": 0.4566, + "step": 223800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.5496673583984375, + "loss_rtd": 0.2470230907201767, + "loss_sent": 0.06891334056854248, + "loss_sod": 0.033637940883636475, + "loss_total": 0.34957438707351685, + "step": 223899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.751945972442627, + "loss_rtd": 0.23640091717243195, + "loss_sent": 0.331116646528244, + "loss_sod": 0.027254968881607056, + "loss_total": 0.5947725772857666, + "step": 223899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.6053645610809326, + "learning_rate": 2.8110254042715755e-05, + "loss": 0.4764, + "step": 223900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.880710601806641, + "loss_rtd": 0.20072686672210693, + "loss_sent": 0.0037962477654218674, + "loss_sod": 0.05845049023628235, + "loss_total": 0.2629736065864563, + "step": 223999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.543124198913574, + "loss_rtd": 0.2402266263961792, + "loss_sent": 0.1997024565935135, + "loss_sod": 0.027427462860941887, + "loss_total": 0.4673565626144409, + "step": 223999 + }, + { + "epoch": 0.032, + "grad_norm": 0.7811095118522644, + "learning_rate": 2.808172787763129e-05, + "loss": 0.4638, + "step": 224000 + }, + { + "epoch": 0.032, + "eval_loss": 0.44494882225990295, + "eval_runtime": 152.2057, + "eval_samples_per_second": 101.461, + "eval_steps_per_second": 0.795, + "step": 224000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.569873809814453, + "loss_rtd": 0.22610104084014893, + "loss_sent": 0.2411336749792099, + "loss_sod": 0.002095532836392522, + "loss_total": 0.4693302512168884, + "step": 224099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.069498062133789, + "loss_rtd": 0.21059513092041016, + "loss_sent": 0.013547113165259361, + "loss_sod": 0.04915773868560791, + "loss_total": 0.2732999920845032, + "step": 224099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.0114434957504272, + "learning_rate": 2.805321054122424e-05, + "loss": 0.465, + "step": 224100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.417413234710693, + "loss_rtd": 0.24213016033172607, + "loss_sent": 0.24051572382450104, + "loss_sod": 0.051152754575014114, + "loss_total": 0.5337986350059509, + "step": 224199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.953667640686035, + "loss_rtd": 0.23942674696445465, + "loss_sent": 0.3299728035926819, + "loss_sod": 0.09776980429887772, + "loss_total": 0.6671693325042725, + "step": 224199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.6033778190612793, + "learning_rate": 2.8024702044981344e-05, + "loss": 0.4628, + "step": 224200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.387984752655029, + "loss_rtd": 0.2271862030029297, + "loss_sent": 0.10984363406896591, + "loss_sod": 0.03416328877210617, + "loss_total": 0.37119314074516296, + "step": 224299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.715670108795166, + "loss_rtd": 0.257066547870636, + "loss_sent": 0.16404929757118225, + "loss_sod": 0.06787629425525665, + "loss_total": 0.4889921247959137, + "step": 224299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.3464434146881104, + "learning_rate": 2.799620240038583e-05, + "loss": 0.4691, + "step": 224300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.593616008758545, + "loss_rtd": 0.2560146152973175, + "loss_sent": 0.27091580629348755, + "loss_sod": 0.001017265603877604, + "loss_total": 0.5279476642608643, + "step": 224399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.599079132080078, + "loss_rtd": 0.23365263640880585, + "loss_sent": 0.18014585971832275, + "loss_sod": 0.0022646132856607437, + "loss_total": 0.416063129901886, + "step": 224399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.9168931245803833, + "learning_rate": 2.796771161891736e-05, + "loss": 0.4508, + "step": 224400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.647077560424805, + "loss_rtd": 0.21314236521720886, + "loss_sent": 0.047262877225875854, + "loss_sod": 0.011531857773661613, + "loss_total": 0.2719371020793915, + "step": 224499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.37584924697876, + "loss_rtd": 0.2328566461801529, + "loss_sent": 0.00010984270920744166, + "loss_sod": 0.12468120455741882, + "loss_total": 0.35764771699905396, + "step": 224499 + }, + { + "epoch": 0.001, + "grad_norm": 0.8750359416007996, + "learning_rate": 2.7939229712052028e-05, + "loss": 0.4745, + "step": 224500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.534361839294434, + "loss_rtd": 0.227226123213768, + "loss_sent": 0.14308471977710724, + "loss_sod": 0.08522327244281769, + "loss_total": 0.45553410053253174, + "step": 224599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.645907402038574, + "loss_rtd": 0.23368482291698456, + "loss_sent": 0.16105742752552032, + "loss_sod": 0.06207191199064255, + "loss_total": 0.456814169883728, + "step": 224599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.4348220825195312, + "learning_rate": 2.7910756691262318e-05, + "loss": 0.4722, + "step": 224600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.638808250427246, + "loss_rtd": 0.23300603032112122, + "loss_sent": 0.20091569423675537, + "loss_sod": 0.08018143475055695, + "loss_total": 0.5141031742095947, + "step": 224699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.642586708068848, + "loss_rtd": 0.23759806156158447, + "loss_sent": 0.11396326124668121, + "loss_sod": 0.02553461492061615, + "loss_total": 0.37709593772888184, + "step": 224699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.8900947570800781, + "learning_rate": 2.7882292568017164e-05, + "loss": 0.4791, + "step": 224700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.708366870880127, + "loss_rtd": 0.2390284687280655, + "loss_sent": 0.4308047592639923, + "loss_sod": 0.05724124237895012, + "loss_total": 0.7270745038986206, + "step": 224799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.39437198638916, + "loss_rtd": 0.26441359519958496, + "loss_sent": 0.25949522852897644, + "loss_sod": 0.07557901740074158, + "loss_total": 0.599487841129303, + "step": 224799 + }, + { + "epoch": 0.0016, + "grad_norm": 2.3639681339263916, + "learning_rate": 2.785383735378193e-05, + "loss": 0.4634, + "step": 224800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.926591396331787, + "loss_rtd": 0.24465681612491608, + "loss_sent": 0.10229752212762833, + "loss_sod": 0.01764935441315174, + "loss_total": 0.3646036982536316, + "step": 224899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.601808547973633, + "loss_rtd": 0.23416711390018463, + "loss_sent": 0.11619371175765991, + "loss_sod": 0.02902691438794136, + "loss_total": 0.3793877363204956, + "step": 224899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.9203702211380005, + "learning_rate": 2.7825391060018368e-05, + "loss": 0.4805, + "step": 224900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.661350250244141, + "loss_rtd": 0.22943970561027527, + "loss_sent": 0.1653285026550293, + "loss_sod": 0.0028651938773691654, + "loss_total": 0.3976334035396576, + "step": 224999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.711034774780273, + "loss_rtd": 0.2437131106853485, + "loss_sent": 0.07447220385074615, + "loss_sod": 0.0342186838388443, + "loss_total": 0.35240399837493896, + "step": 224999 + }, + { + "epoch": 0.002, + "grad_norm": 0.6741482615470886, + "learning_rate": 2.7796953698184623e-05, + "loss": 0.4732, + "step": 225000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4311069846153259, + "eval_runtime": 154.154, + "eval_samples_per_second": 100.179, + "eval_steps_per_second": 0.785, + "step": 225000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.568168640136719, + "loss_rtd": 0.23405276238918304, + "loss_sent": 0.16396518051624298, + "loss_sod": 0.06602217257022858, + "loss_total": 0.4640401005744934, + "step": 225099 + }, + { + "epoch": 0.002198, + "loss_gen": 6.030465126037598, + "loss_rtd": 0.23309405148029327, + "loss_sent": 0.24881760776042938, + "loss_sod": 0.03785564377903938, + "loss_total": 0.5197672843933105, + "step": 225099 + }, + { + "epoch": 0.0022, + "grad_norm": 0.8508840203285217, + "learning_rate": 2.7768525279735276e-05, + "loss": 0.4629, + "step": 225100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.613748550415039, + "loss_rtd": 0.2495390772819519, + "loss_sent": 0.344651997089386, + "loss_sod": 0.05255025625228882, + "loss_total": 0.6467413306236267, + "step": 225199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.8420000076293945, + "loss_rtd": 0.24935126304626465, + "loss_sent": 0.15570217370986938, + "loss_sod": 0.022254379466176033, + "loss_total": 0.4273078143596649, + "step": 225199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.4552719593048096, + "learning_rate": 2.7740105816121302e-05, + "loss": 0.4833, + "step": 225200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.60266637802124, + "loss_rtd": 0.23874659836292267, + "loss_sent": 0.17598579823970795, + "loss_sod": 0.0027498353738337755, + "loss_total": 0.4174822270870209, + "step": 225299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.583761692047119, + "loss_rtd": 0.25399646162986755, + "loss_sent": 0.2547561526298523, + "loss_sod": 0.027600456029176712, + "loss_total": 0.5363531112670898, + "step": 225299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.054810881614685, + "learning_rate": 2.7711695318790025e-05, + "loss": 0.4494, + "step": 225300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.915513038635254, + "loss_rtd": 0.2317160665988922, + "loss_sent": 0.30354616045951843, + "loss_sod": 0.04480939358472824, + "loss_total": 0.5800716280937195, + "step": 225399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.447020530700684, + "loss_rtd": 0.23885852098464966, + "loss_sent": 0.18317927420139313, + "loss_sod": 0.09525464475154877, + "loss_total": 0.5172924399375916, + "step": 225399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.0005134344100952, + "learning_rate": 2.7683293799185205e-05, + "loss": 0.4468, + "step": 225400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.440904140472412, + "loss_rtd": 0.2239619344472885, + "loss_sent": 0.14300444722175598, + "loss_sod": 0.0059273578226566315, + "loss_total": 0.372893750667572, + "step": 225499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.547121524810791, + "loss_rtd": 0.23636527359485626, + "loss_sent": 0.31065142154693604, + "loss_sod": 0.0795908272266388, + "loss_total": 0.6266075372695923, + "step": 225499 + }, + { + "epoch": 0.003, + "grad_norm": 1.2681756019592285, + "learning_rate": 2.765490126874698e-05, + "loss": 0.44, + "step": 225500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.618387699127197, + "loss_rtd": 0.20254798233509064, + "loss_sent": 0.143963024020195, + "loss_sod": 0.07344525307416916, + "loss_total": 0.4199562668800354, + "step": 225599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.438107967376709, + "loss_rtd": 0.24515216052532196, + "loss_sent": 0.07458990067243576, + "loss_sod": 0.0852646604180336, + "loss_total": 0.4050067365169525, + "step": 225599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.8462616205215454, + "learning_rate": 2.7626517738911872e-05, + "loss": 0.4658, + "step": 225600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.462686538696289, + "loss_rtd": 0.23882129788398743, + "loss_sent": 0.2343187928199768, + "loss_sod": 0.04691701382398605, + "loss_total": 0.5200570821762085, + "step": 225699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.620092391967773, + "loss_rtd": 0.23274467885494232, + "loss_sent": 0.1371069848537445, + "loss_sod": 0.022337134927511215, + "loss_total": 0.39218878746032715, + "step": 225699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.7880659699440002, + "learning_rate": 2.7598143221112716e-05, + "loss": 0.4689, + "step": 225700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.799817085266113, + "loss_rtd": 0.2242693454027176, + "loss_sent": 0.06231445074081421, + "loss_sod": 0.01739829033613205, + "loss_total": 0.30398207902908325, + "step": 225799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.818936347961426, + "loss_rtd": 0.2236141562461853, + "loss_sent": 0.1971997618675232, + "loss_sod": 0.12863728404045105, + "loss_total": 0.5494512319564819, + "step": 225799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.2365944385528564, + "learning_rate": 2.75697777267788e-05, + "loss": 0.4662, + "step": 225800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.77978515625, + "loss_rtd": 0.23254472017288208, + "loss_sent": 0.06714111566543579, + "loss_sod": 0.02295173704624176, + "loss_total": 0.32263755798339844, + "step": 225899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.553442001342773, + "loss_rtd": 0.24642743170261383, + "loss_sent": 0.07728710025548935, + "loss_sod": 0.02444956637918949, + "loss_total": 0.3481641113758087, + "step": 225899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.9199406504631042, + "learning_rate": 2.7541421267335725e-05, + "loss": 0.4765, + "step": 225900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.896374702453613, + "loss_rtd": 0.24322231113910675, + "loss_sent": 0.30534428358078003, + "loss_sod": 0.06029155105352402, + "loss_total": 0.6088581085205078, + "step": 225999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.502675533294678, + "loss_rtd": 0.24224092066287994, + "loss_sent": 0.10783141106367111, + "loss_sod": 0.042063526809215546, + "loss_total": 0.3921358585357666, + "step": 225999 + }, + { + "epoch": 0.004, + "grad_norm": 1.3127118349075317, + "learning_rate": 2.7513073854205506e-05, + "loss": 0.47, + "step": 226000 + }, + { + "epoch": 0.004, + "eval_loss": 0.44110894203186035, + "eval_runtime": 150.8165, + "eval_samples_per_second": 102.396, + "eval_steps_per_second": 0.802, + "step": 226000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.643917083740234, + "loss_rtd": 0.24464350938796997, + "loss_sent": 0.16674335300922394, + "loss_sod": 0.021549327298998833, + "loss_total": 0.4329361915588379, + "step": 226099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.616582870483398, + "loss_rtd": 0.23230820894241333, + "loss_sent": 0.3552858531475067, + "loss_sod": 0.009614404290914536, + "loss_total": 0.5972084999084473, + "step": 226099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.6828440427780151, + "learning_rate": 2.7484735498806424e-05, + "loss": 0.467, + "step": 226100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.5748209953308105, + "loss_rtd": 0.2176102250814438, + "loss_sent": 0.06763680279254913, + "loss_sod": 0.013852190226316452, + "loss_total": 0.2990992069244385, + "step": 226199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.857899188995361, + "loss_rtd": 0.24582800269126892, + "loss_sent": 0.21359440684318542, + "loss_sod": 0.008314840495586395, + "loss_total": 0.46773725748062134, + "step": 226199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.6256184577941895, + "learning_rate": 2.7456406212553187e-05, + "loss": 0.4501, + "step": 226200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.72310733795166, + "loss_rtd": 0.23177634179592133, + "loss_sent": 0.1454453319311142, + "loss_sod": 0.05210348963737488, + "loss_total": 0.4293251633644104, + "step": 226299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.581999778747559, + "loss_rtd": 0.2323111593723297, + "loss_sent": 0.3031417429447174, + "loss_sod": 0.0475931242108345, + "loss_total": 0.583046019077301, + "step": 226299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.0376886129379272, + "learning_rate": 2.742808600685684e-05, + "loss": 0.4846, + "step": 226300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.795795917510986, + "loss_rtd": 0.24001413583755493, + "loss_sent": 0.16974866390228271, + "loss_sod": 0.04025164991617203, + "loss_total": 0.4500144422054291, + "step": 226399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.709367275238037, + "loss_rtd": 0.22698765993118286, + "loss_sent": 0.21126671135425568, + "loss_sod": 0.051406316459178925, + "loss_total": 0.48966068029403687, + "step": 226399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.9794959425926208, + "learning_rate": 2.7399774893124764e-05, + "loss": 0.4534, + "step": 226400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.647364616394043, + "loss_rtd": 0.2729233205318451, + "loss_sent": 0.2768864929676056, + "loss_sod": 0.04806673899292946, + "loss_total": 0.5978765487670898, + "step": 226499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.6918110847473145, + "loss_rtd": 0.2299945205450058, + "loss_sent": 0.2196802943944931, + "loss_sod": 0.07942471653223038, + "loss_total": 0.5290995240211487, + "step": 226499 + }, + { + "epoch": 0.005, + "grad_norm": 1.4714350700378418, + "learning_rate": 2.737147288276064e-05, + "loss": 0.449, + "step": 226500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.895983695983887, + "loss_rtd": 0.22006776928901672, + "loss_sent": 0.34701797366142273, + "loss_sod": 0.03792410343885422, + "loss_total": 0.6050098538398743, + "step": 226599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.722021579742432, + "loss_rtd": 0.2389376312494278, + "loss_sent": 0.09975945949554443, + "loss_sod": 0.09337402880191803, + "loss_total": 0.43207111954689026, + "step": 226599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.0441040992736816, + "learning_rate": 2.7343179987164535e-05, + "loss": 0.4641, + "step": 226600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.523041725158691, + "loss_rtd": 0.24761639535427094, + "loss_sent": 0.11091520637273788, + "loss_sod": 0.07680056989192963, + "loss_total": 0.43533217906951904, + "step": 226699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.493579387664795, + "loss_rtd": 0.22481165826320648, + "loss_sent": 0.35022062063217163, + "loss_sod": 0.00831974670290947, + "loss_total": 0.5833520293235779, + "step": 226699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.5081793069839478, + "learning_rate": 2.7314896217732845e-05, + "loss": 0.4534, + "step": 226700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.182254314422607, + "loss_rtd": 0.20410116016864777, + "loss_sent": 0.02833179570734501, + "loss_sod": 0.08729544281959534, + "loss_total": 0.31972840428352356, + "step": 226799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.498484134674072, + "loss_rtd": 0.24284105002880096, + "loss_sent": 0.16004407405853271, + "loss_sod": 0.021331269294023514, + "loss_total": 0.4242163896560669, + "step": 226799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.0090997219085693, + "learning_rate": 2.728662158585822e-05, + "loss": 0.4531, + "step": 226800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.486617565155029, + "loss_rtd": 0.23351794481277466, + "loss_sent": 0.17372311651706696, + "loss_sod": 0.017822682857513428, + "loss_total": 0.42506372928619385, + "step": 226899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.614632606506348, + "loss_rtd": 0.2119576781988144, + "loss_sent": 0.2688595652580261, + "loss_sod": 0.008249020203948021, + "loss_total": 0.4890662431716919, + "step": 226899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.02785062789917, + "learning_rate": 2.7258356102929715e-05, + "loss": 0.4585, + "step": 226900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.825535297393799, + "loss_rtd": 0.24155889451503754, + "loss_sent": 0.08116856962442398, + "loss_sod": 0.04631891846656799, + "loss_total": 0.3690463602542877, + "step": 226999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.491581916809082, + "loss_rtd": 0.23882105946540833, + "loss_sent": 0.23398028314113617, + "loss_sod": 0.003059498965740204, + "loss_total": 0.4758608341217041, + "step": 226999 + }, + { + "epoch": 0.006, + "grad_norm": 0.8642366528511047, + "learning_rate": 2.7230099780332646e-05, + "loss": 0.4496, + "step": 227000 + }, + { + "epoch": 0.006, + "eval_loss": 0.43922483921051025, + "eval_runtime": 150.8422, + "eval_samples_per_second": 102.379, + "eval_steps_per_second": 0.802, + "step": 227000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.555595874786377, + "loss_rtd": 0.22712871432304382, + "loss_sent": 0.27140527963638306, + "loss_sod": 0.04689645767211914, + "loss_total": 0.5454304218292236, + "step": 227099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.713353633880615, + "loss_rtd": 0.21562498807907104, + "loss_sent": 0.0435381643474102, + "loss_sod": 0.06617502123117447, + "loss_total": 0.3253381550312042, + "step": 227099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.3460780382156372, + "learning_rate": 2.720185262944866e-05, + "loss": 0.4588, + "step": 227100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.15493106842041, + "loss_rtd": 0.2117948830127716, + "loss_sent": 0.05631496384739876, + "loss_sod": 0.10051491856575012, + "loss_total": 0.3686247766017914, + "step": 227199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.011689186096191, + "loss_rtd": 0.19459405541419983, + "loss_sent": 4.475473542697728e-05, + "loss_sod": 0.050213322043418884, + "loss_total": 0.24485212564468384, + "step": 227199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.80864417552948, + "learning_rate": 2.7173614661655723e-05, + "loss": 0.4605, + "step": 227200 + }, + { + "epoch": 0.006598, + "loss_gen": 6.317282676696777, + "loss_rtd": 0.26973089575767517, + "loss_sent": 0.13002526760101318, + "loss_sod": 0.08195135742425919, + "loss_total": 0.48170751333236694, + "step": 227299 + }, + { + "epoch": 0.006598, + "loss_gen": 6.011381149291992, + "loss_rtd": 0.23621657490730286, + "loss_sent": 0.19482728838920593, + "loss_sod": 0.09912626445293427, + "loss_total": 0.5301700830459595, + "step": 227299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.8855258226394653, + "learning_rate": 2.7145385888328058e-05, + "loss": 0.4613, + "step": 227300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.7408270835876465, + "loss_rtd": 0.24157613515853882, + "loss_sent": 0.3369632959365845, + "loss_sod": 0.01694389060139656, + "loss_total": 0.5954833030700684, + "step": 227399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.954455852508545, + "loss_rtd": 0.25100675225257874, + "loss_sent": 0.12181692570447922, + "loss_sod": 0.00487709604203701, + "loss_total": 0.3777007758617401, + "step": 227399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.208707571029663, + "learning_rate": 2.7117166320836218e-05, + "loss": 0.4686, + "step": 227400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.6229143142700195, + "loss_rtd": 0.23978972434997559, + "loss_sent": 0.21991673111915588, + "loss_sod": 0.025293273851275444, + "loss_total": 0.48499971628189087, + "step": 227499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.387685298919678, + "loss_rtd": 0.22592461109161377, + "loss_sent": 0.12020763754844666, + "loss_sod": 0.05308495834469795, + "loss_total": 0.3992172181606293, + "step": 227499 + }, + { + "epoch": 0.007, + "grad_norm": 1.3162251710891724, + "learning_rate": 2.708895597054705e-05, + "loss": 0.4528, + "step": 227500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.599020004272461, + "loss_rtd": 0.21718180179595947, + "loss_sent": 0.26812273263931274, + "loss_sod": 0.006038974970579147, + "loss_total": 0.49134349822998047, + "step": 227599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.435451507568359, + "loss_rtd": 0.2204170674085617, + "loss_sent": 0.20077107846736908, + "loss_sod": 0.11586703360080719, + "loss_total": 0.5370551943778992, + "step": 227599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.3090988397598267, + "learning_rate": 2.7060754848823698e-05, + "loss": 0.432, + "step": 227600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.618217945098877, + "loss_rtd": 0.2524552643299103, + "loss_sent": 0.2500818073749542, + "loss_sod": 0.00473436014726758, + "loss_total": 0.507271409034729, + "step": 227699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.735246658325195, + "loss_rtd": 0.23610767722129822, + "loss_sent": 0.20814943313598633, + "loss_sod": 0.13017351925373077, + "loss_total": 0.5744306445121765, + "step": 227699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.1152544021606445, + "learning_rate": 2.703256296702553e-05, + "loss": 0.4549, + "step": 227700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.446821212768555, + "loss_rtd": 0.23186981678009033, + "loss_sent": 0.28947314620018005, + "loss_sod": 0.09214643388986588, + "loss_total": 0.6134893894195557, + "step": 227799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.690222263336182, + "loss_rtd": 0.23537524044513702, + "loss_sent": 0.2620966136455536, + "loss_sod": 0.05944913625717163, + "loss_total": 0.5569210052490234, + "step": 227799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.403546929359436, + "learning_rate": 2.700438033650825e-05, + "loss": 0.468, + "step": 227800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.735042095184326, + "loss_rtd": 0.24326685070991516, + "loss_sent": 0.4053393602371216, + "loss_sod": 0.06040772795677185, + "loss_total": 0.7090139389038086, + "step": 227899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.652089595794678, + "loss_rtd": 0.23130719363689423, + "loss_sent": 0.1046670451760292, + "loss_sod": 0.044817693531513214, + "loss_total": 0.38079193234443665, + "step": 227899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.8818656206130981, + "learning_rate": 2.697620696862382e-05, + "loss": 0.4564, + "step": 227900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.593469142913818, + "loss_rtd": 0.2454930990934372, + "loss_sent": 0.32072532176971436, + "loss_sod": 0.014206699095666409, + "loss_total": 0.5804251432418823, + "step": 227999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.961065769195557, + "loss_rtd": 0.23668722808361053, + "loss_sent": 0.40386414527893066, + "loss_sod": 0.13456673920154572, + "loss_total": 0.7751181125640869, + "step": 227999 + }, + { + "epoch": 0.008, + "grad_norm": 3.1582767963409424, + "learning_rate": 2.694804287472049e-05, + "loss": 0.4459, + "step": 228000 + }, + { + "epoch": 0.008, + "eval_loss": 0.43808892369270325, + "eval_runtime": 152.4528, + "eval_samples_per_second": 101.297, + "eval_steps_per_second": 0.794, + "step": 228000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.379452228546143, + "loss_rtd": 0.23132821917533875, + "loss_sent": 0.4139622747898102, + "loss_sod": 0.011506144888699055, + "loss_total": 0.6567966341972351, + "step": 228099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.633175849914551, + "loss_rtd": 0.2480832189321518, + "loss_sent": 0.17119170725345612, + "loss_sod": 0.031854670494794846, + "loss_total": 0.45112961530685425, + "step": 228099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.5515848398208618, + "learning_rate": 2.691988806614272e-05, + "loss": 0.4632, + "step": 228100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.950217247009277, + "loss_rtd": 0.23259751498699188, + "loss_sent": 0.40340232849121094, + "loss_sod": 0.05920649319887161, + "loss_total": 0.695206344127655, + "step": 228199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.799584865570068, + "loss_rtd": 0.2343636304140091, + "loss_sent": 0.07602745294570923, + "loss_sod": 0.044330716133117676, + "loss_total": 0.3547217845916748, + "step": 228199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.4872395992279053, + "learning_rate": 2.689174255423127e-05, + "loss": 0.4471, + "step": 228200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.84426736831665, + "loss_rtd": 0.19541485607624054, + "loss_sent": 0.0004303819441702217, + "loss_sod": 0.09736961126327515, + "loss_total": 0.2932148277759552, + "step": 228299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.628753185272217, + "loss_rtd": 0.23462453484535217, + "loss_sent": 0.14476704597473145, + "loss_sod": 0.020943686366081238, + "loss_total": 0.40033525228500366, + "step": 228299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.5867993831634521, + "learning_rate": 2.6863606350323172e-05, + "loss": 0.4561, + "step": 228300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.858457088470459, + "loss_rtd": 0.24009667336940765, + "loss_sent": 0.4131287634372711, + "loss_sod": 0.029319915920495987, + "loss_total": 0.6825453639030457, + "step": 228399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.606655597686768, + "loss_rtd": 0.2331199049949646, + "loss_sent": 0.4195597171783447, + "loss_sod": 0.0031043491326272488, + "loss_total": 0.6557839512825012, + "step": 228399 + }, + { + "epoch": 0.0088, + "grad_norm": 2.3644115924835205, + "learning_rate": 2.6835479465751657e-05, + "loss": 0.4719, + "step": 228400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.616539001464844, + "loss_rtd": 0.24378356337547302, + "loss_sent": 0.13728450238704681, + "loss_sod": 0.013642529025673866, + "loss_total": 0.39471060037612915, + "step": 228499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.42510986328125, + "loss_rtd": 0.21945352852344513, + "loss_sent": 0.06915150582790375, + "loss_sod": 0.10679539293050766, + "loss_total": 0.39540040493011475, + "step": 228499 + }, + { + "epoch": 0.009, + "grad_norm": 1.0487242937088013, + "learning_rate": 2.680736191184624e-05, + "loss": 0.4668, + "step": 228500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.891288757324219, + "loss_rtd": 0.24702602624893188, + "loss_sent": 0.4161568284034729, + "loss_sod": 0.052584581077098846, + "loss_total": 0.7157674431800842, + "step": 228599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.679673671722412, + "loss_rtd": 0.2405959665775299, + "loss_sent": 0.32089099287986755, + "loss_sod": 0.0328717865049839, + "loss_total": 0.5943587422370911, + "step": 228599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.6369969844818115, + "learning_rate": 2.677925369993267e-05, + "loss": 0.4677, + "step": 228600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.320028305053711, + "loss_rtd": 0.2504448890686035, + "loss_sent": 0.23768854141235352, + "loss_sod": 0.0029685497283935547, + "loss_total": 0.4911019802093506, + "step": 228699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.746073246002197, + "loss_rtd": 0.24318954348564148, + "loss_sent": 0.4006673991680145, + "loss_sod": 0.01529262587428093, + "loss_total": 0.6591495275497437, + "step": 228699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.0088990926742554, + "learning_rate": 2.6751154841332954e-05, + "loss": 0.4672, + "step": 228700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.174074172973633, + "loss_rtd": 0.23185382783412933, + "loss_sent": 0.022533010691404343, + "loss_sod": 0.039733707904815674, + "loss_total": 0.29412055015563965, + "step": 228799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.656975746154785, + "loss_rtd": 0.2337799072265625, + "loss_sent": 0.1169809028506279, + "loss_sod": 0.003765667788684368, + "loss_total": 0.35452648997306824, + "step": 228799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.5050992965698242, + "learning_rate": 2.6723065347365267e-05, + "loss": 0.4492, + "step": 228800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.612466335296631, + "loss_rtd": 0.22140438854694366, + "loss_sent": 0.1858435720205307, + "loss_sod": 0.01725723221898079, + "loss_total": 0.42450517416000366, + "step": 228899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.555539608001709, + "loss_rtd": 0.24305252730846405, + "loss_sent": 0.030688712373375893, + "loss_sod": 0.015331541188061237, + "loss_total": 0.28907278180122375, + "step": 228899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.7471132278442383, + "learning_rate": 2.6694985229344077e-05, + "loss": 0.4495, + "step": 228900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.858891010284424, + "loss_rtd": 0.23836539685726166, + "loss_sent": 0.055644478648900986, + "loss_sod": 0.18495967984199524, + "loss_total": 0.478969544172287, + "step": 228999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.894556522369385, + "loss_rtd": 0.20134000480175018, + "loss_sent": 0.0005428654258139431, + "loss_sod": 0.051950592547655106, + "loss_total": 0.25383347272872925, + "step": 228999 + }, + { + "epoch": 0.01, + "grad_norm": 1.039613127708435, + "learning_rate": 2.6666914498580048e-05, + "loss": 0.4672, + "step": 229000 + }, + { + "epoch": 0.01, + "eval_loss": 0.43447065353393555, + "eval_runtime": 150.9883, + "eval_samples_per_second": 102.279, + "eval_steps_per_second": 0.801, + "step": 229000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.497626781463623, + "loss_rtd": 0.2218395620584488, + "loss_sent": 0.07860352843999863, + "loss_sod": 0.07573240995407104, + "loss_total": 0.37617549300193787, + "step": 229099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.706343650817871, + "loss_rtd": 0.24197185039520264, + "loss_sent": 0.13926245272159576, + "loss_sod": 0.09395718574523926, + "loss_total": 0.47519147396087646, + "step": 229099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.1389895677566528, + "learning_rate": 2.6638853166380085e-05, + "loss": 0.476, + "step": 229100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.648293972015381, + "loss_rtd": 0.23363643884658813, + "loss_sent": 0.3106616139411926, + "loss_sod": 0.12124307453632355, + "loss_total": 0.6655411124229431, + "step": 229199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.569979190826416, + "loss_rtd": 0.23628993332386017, + "loss_sent": 0.15998664498329163, + "loss_sod": 0.03313560411334038, + "loss_total": 0.42941218614578247, + "step": 229199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.2090204954147339, + "learning_rate": 2.6610801244047257e-05, + "loss": 0.4747, + "step": 229200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.099374771118164, + "loss_rtd": 0.20434343814849854, + "loss_sent": 0.014487138949334621, + "loss_sod": 0.04945603758096695, + "loss_total": 0.2682866156101227, + "step": 229299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.644153594970703, + "loss_rtd": 0.2047325223684311, + "loss_sent": 0.18202875554561615, + "loss_sod": 0.06680737435817719, + "loss_total": 0.45356863737106323, + "step": 229299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.3708080053329468, + "learning_rate": 2.6582758742880893e-05, + "loss": 0.4778, + "step": 229300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.399141311645508, + "loss_rtd": 0.2253955751657486, + "loss_sent": 0.13051699101924896, + "loss_sod": 0.049406036734580994, + "loss_total": 0.40531861782073975, + "step": 229399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.877711772918701, + "loss_rtd": 0.2553017735481262, + "loss_sent": 0.45392152667045593, + "loss_sod": 0.06354334950447083, + "loss_total": 0.772766649723053, + "step": 229399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.1260013580322266, + "learning_rate": 2.6554725674176505e-05, + "loss": 0.4771, + "step": 229400 + }, + { + "epoch": 0.010998, + "loss_gen": 6.0335798263549805, + "loss_rtd": 0.24716512858867645, + "loss_sent": 0.23472942411899567, + "loss_sod": 0.09684835374355316, + "loss_total": 0.5787429213523865, + "step": 229499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.672654151916504, + "loss_rtd": 0.2530180513858795, + "loss_sent": 0.18808555603027344, + "loss_sod": 0.02407405897974968, + "loss_total": 0.46517765522003174, + "step": 229499 + }, + { + "epoch": 0.011, + "grad_norm": 0.8587880730628967, + "learning_rate": 2.6526702049225828e-05, + "loss": 0.4449, + "step": 229500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.884972095489502, + "loss_rtd": 0.22826610505580902, + "loss_sent": 0.2614843249320984, + "loss_sod": 0.11151456832885742, + "loss_total": 0.601265013217926, + "step": 229599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.78701114654541, + "loss_rtd": 0.2326907217502594, + "loss_sent": 0.1665249764919281, + "loss_sod": 0.04708373546600342, + "loss_total": 0.4462994337081909, + "step": 229599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.0387909412384033, + "learning_rate": 2.6498687879316743e-05, + "loss": 0.4706, + "step": 229600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.727572441101074, + "loss_rtd": 0.2457440346479416, + "loss_sent": 0.5038110017776489, + "loss_sod": 0.032166820019483566, + "loss_total": 0.781721830368042, + "step": 229699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.695270538330078, + "loss_rtd": 0.2344040870666504, + "loss_sent": 0.13566479086875916, + "loss_sod": 0.03081917017698288, + "loss_total": 0.400888055562973, + "step": 229699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.3107898235321045, + "learning_rate": 2.6470683175733367e-05, + "loss": 0.4619, + "step": 229700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.772567272186279, + "loss_rtd": 0.24603179097175598, + "loss_sent": 0.21451306343078613, + "loss_sod": 0.04447333514690399, + "loss_total": 0.5050181746482849, + "step": 229799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.984659194946289, + "loss_rtd": 0.2422512024641037, + "loss_sent": 0.022173278033733368, + "loss_sod": 0.10229814052581787, + "loss_total": 0.36672264337539673, + "step": 229799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.7812786102294922, + "learning_rate": 2.644268794975602e-05, + "loss": 0.4442, + "step": 229800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.654200553894043, + "loss_rtd": 0.2162037342786789, + "loss_sent": 0.06268905103206635, + "loss_sod": 0.009348021820187569, + "loss_total": 0.28824079036712646, + "step": 229899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.920285224914551, + "loss_rtd": 0.19752170145511627, + "loss_sent": 0.0001401292538503185, + "loss_sod": 0.12207889556884766, + "loss_total": 0.3197407126426697, + "step": 229899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.8136213421821594, + "learning_rate": 2.6414702212661118e-05, + "loss": 0.4463, + "step": 229900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.861421585083008, + "loss_rtd": 0.23023930191993713, + "loss_sent": 0.4636632800102234, + "loss_sod": 0.050620581954717636, + "loss_total": 0.7445231676101685, + "step": 229999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.83852481842041, + "loss_rtd": 0.2317688912153244, + "loss_sent": 0.39547643065452576, + "loss_sod": 0.034720465540885925, + "loss_total": 0.6619657874107361, + "step": 229999 + }, + { + "epoch": 0.012, + "grad_norm": 3.6912200450897217, + "learning_rate": 2.638672597572135e-05, + "loss": 0.4472, + "step": 230000 + }, + { + "epoch": 0.012, + "eval_loss": 0.43895989656448364, + "eval_runtime": 151.09, + "eval_samples_per_second": 102.211, + "eval_steps_per_second": 0.801, + "step": 230000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.356983184814453, + "loss_rtd": 0.24663342535495758, + "loss_sent": 0.1497732400894165, + "loss_sod": 0.03361300379037857, + "loss_total": 0.43001967668533325, + "step": 230099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.665292739868164, + "loss_rtd": 0.2385682612657547, + "loss_sent": 0.23744936287403107, + "loss_sod": 0.05865897983312607, + "loss_total": 0.5346766114234924, + "step": 230099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.7897059917449951, + "learning_rate": 2.635875925020554e-05, + "loss": 0.4518, + "step": 230100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.827788352966309, + "loss_rtd": 0.24381259083747864, + "loss_sent": 0.08867710828781128, + "loss_sod": 0.03537292033433914, + "loss_total": 0.36786261200904846, + "step": 230199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.340538501739502, + "loss_rtd": 0.2554982900619507, + "loss_sent": 0.19783081114292145, + "loss_sod": 0.019756725057959557, + "loss_total": 0.47308582067489624, + "step": 230199 + }, + { + "epoch": 0.0124, + "grad_norm": 0.998620331287384, + "learning_rate": 2.6330802047378687e-05, + "loss": 0.4695, + "step": 230200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.6338653564453125, + "loss_rtd": 0.22210107743740082, + "loss_sent": 0.10173848271369934, + "loss_sod": 0.07229706645011902, + "loss_total": 0.396136611700058, + "step": 230299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.754301071166992, + "loss_rtd": 0.24328972399234772, + "loss_sent": 0.23832380771636963, + "loss_sod": 0.011281922459602356, + "loss_total": 0.4928954541683197, + "step": 230299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.0170916318893433, + "learning_rate": 2.630285437850193e-05, + "loss": 0.4553, + "step": 230300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.837557792663574, + "loss_rtd": 0.22751744091510773, + "loss_sent": 0.11446940153837204, + "loss_sod": 0.050170376896858215, + "loss_total": 0.3921572268009186, + "step": 230399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.505778789520264, + "loss_rtd": 0.21539092063903809, + "loss_sent": 0.8965586423873901, + "loss_sod": 0.012855786830186844, + "loss_total": 1.1248053312301636, + "step": 230399 + }, + { + "epoch": 0.0128, + "grad_norm": 2.706059694290161, + "learning_rate": 2.6274916254832595e-05, + "loss": 0.4592, + "step": 230400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.705245018005371, + "loss_rtd": 0.25718510150909424, + "loss_sent": 0.1807137131690979, + "loss_sod": 0.03812996298074722, + "loss_total": 0.47602877020835876, + "step": 230499 + }, + { + "epoch": 0.012998, + "loss_gen": 4.976123332977295, + "loss_rtd": 0.20817071199417114, + "loss_sent": 0.01200772449374199, + "loss_sod": 0.05684669315814972, + "loss_total": 0.27702510356903076, + "step": 230499 + }, + { + "epoch": 0.013, + "grad_norm": 1.1367182731628418, + "learning_rate": 2.6246987687624148e-05, + "loss": 0.4902, + "step": 230500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.764529705047607, + "loss_rtd": 0.25021564960479736, + "loss_sent": 0.36481043696403503, + "loss_sod": 0.05366312712430954, + "loss_total": 0.6686892509460449, + "step": 230599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.755175590515137, + "loss_rtd": 0.24264858663082123, + "loss_sent": 0.10518502444028854, + "loss_sod": 0.06821787357330322, + "loss_total": 0.4160515069961548, + "step": 230599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.6509652137756348, + "learning_rate": 2.6219068688126236e-05, + "loss": 0.4575, + "step": 230600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.475387096405029, + "loss_rtd": 0.23707376420497894, + "loss_sent": 0.3856346607208252, + "loss_sod": 0.06012444570660591, + "loss_total": 0.6828328371047974, + "step": 230699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.969977378845215, + "loss_rtd": 0.26010188460350037, + "loss_sent": 0.3575820028781891, + "loss_sod": 0.08605533093214035, + "loss_total": 0.7037392258644104, + "step": 230699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.7759782075881958, + "learning_rate": 2.61911592675846e-05, + "loss": 0.4555, + "step": 230700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.816752910614014, + "loss_rtd": 0.20228004455566406, + "loss_sent": 0.0006019301945343614, + "loss_sod": 0.17747433483600616, + "loss_total": 0.3803562819957733, + "step": 230799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.356697082519531, + "loss_rtd": 0.21131305396556854, + "loss_sent": 0.054963912814855576, + "loss_sod": 0.04144435003399849, + "loss_total": 0.3077213168144226, + "step": 230799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.0606520175933838, + "learning_rate": 2.616325943724116e-05, + "loss": 0.4354, + "step": 230800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.535309791564941, + "loss_rtd": 0.23430833220481873, + "loss_sent": 0.17811453342437744, + "loss_sod": 0.060794614255428314, + "loss_total": 0.4732174873352051, + "step": 230899 + }, + { + "epoch": 0.013798, + "loss_gen": 4.870096683502197, + "loss_rtd": 0.1986638754606247, + "loss_sent": 2.7410307666286826e-05, + "loss_sod": 0.1807681769132614, + "loss_total": 0.3794594705104828, + "step": 230899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.3568741083145142, + "learning_rate": 2.6135369208333976e-05, + "loss": 0.4482, + "step": 230900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.768224239349365, + "loss_rtd": 0.242709681391716, + "loss_sent": 0.11642098426818848, + "loss_sod": 0.019703611731529236, + "loss_total": 0.3788342773914337, + "step": 230999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.88675594329834, + "loss_rtd": 0.24185074865818024, + "loss_sent": 0.6008553504943848, + "loss_sod": 0.043352626264095306, + "loss_total": 0.8860586881637573, + "step": 230999 + }, + { + "epoch": 0.014, + "grad_norm": 1.21649968624115, + "learning_rate": 2.6107488592097234e-05, + "loss": 0.4607, + "step": 231000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4299858510494232, + "eval_runtime": 151.0551, + "eval_samples_per_second": 102.234, + "eval_steps_per_second": 0.801, + "step": 231000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.451724529266357, + "loss_rtd": 0.19294089078903198, + "loss_sent": 0.06297950446605682, + "loss_sod": 0.07656969875097275, + "loss_total": 0.33249008655548096, + "step": 231099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.753503322601318, + "loss_rtd": 0.23417335748672485, + "loss_sent": 0.1358361691236496, + "loss_sod": 0.008586164563894272, + "loss_total": 0.3785957098007202, + "step": 231099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.9893803596496582, + "learning_rate": 2.6079617599761207e-05, + "loss": 0.4486, + "step": 231100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.810847759246826, + "loss_rtd": 0.2284773886203766, + "loss_sent": 0.23063786327838898, + "loss_sod": 0.03316286578774452, + "loss_total": 0.4922780990600586, + "step": 231199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.693573474884033, + "loss_rtd": 0.24732337892055511, + "loss_sent": 0.18036647140979767, + "loss_sod": 0.031402960419654846, + "loss_total": 0.45909279584884644, + "step": 231199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.2883961200714111, + "learning_rate": 2.605175624255236e-05, + "loss": 0.4579, + "step": 231200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.7976393699646, + "loss_rtd": 0.2478954792022705, + "loss_sent": 0.06113943085074425, + "loss_sod": 0.02013206295669079, + "loss_total": 0.329166978597641, + "step": 231299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.587930202484131, + "loss_rtd": 0.2226967215538025, + "loss_sent": 0.0834789052605629, + "loss_sod": 0.1132524386048317, + "loss_total": 0.4194280505180359, + "step": 231299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.7868592143058777, + "learning_rate": 2.602390453169325e-05, + "loss": 0.4532, + "step": 231300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.7209577560424805, + "loss_rtd": 0.24389250576496124, + "loss_sent": 0.10512908548116684, + "loss_sod": 0.01837059110403061, + "loss_total": 0.3673921823501587, + "step": 231399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.937654495239258, + "loss_rtd": 0.24715158343315125, + "loss_sent": 0.04203484579920769, + "loss_sod": 0.04600105062127113, + "loss_total": 0.33518746495246887, + "step": 231399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.9294385313987732, + "learning_rate": 2.5996062478402504e-05, + "loss": 0.4501, + "step": 231400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.7035231590271, + "loss_rtd": 0.2375149130821228, + "loss_sent": 0.46221548318862915, + "loss_sod": 0.129900261759758, + "loss_total": 0.8296306729316711, + "step": 231499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.928682327270508, + "loss_rtd": 0.22469770908355713, + "loss_sent": 0.1515629142522812, + "loss_sod": 0.05415157601237297, + "loss_total": 0.4304121732711792, + "step": 231499 + }, + { + "epoch": 0.015, + "grad_norm": 1.9025779962539673, + "learning_rate": 2.5968230093894925e-05, + "loss": 0.4556, + "step": 231500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.560404300689697, + "loss_rtd": 0.24419137835502625, + "loss_sent": 0.5162179470062256, + "loss_sod": 0.014479400590062141, + "loss_total": 0.7748887538909912, + "step": 231599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.86340856552124, + "loss_rtd": 0.2088865488767624, + "loss_sent": 0.11260949075222015, + "loss_sod": 0.0717419907450676, + "loss_total": 0.39323800802230835, + "step": 231599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.3364534378051758, + "learning_rate": 2.5940407389381387e-05, + "loss": 0.4457, + "step": 231600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.0402021408081055, + "loss_rtd": 0.20991869270801544, + "loss_sent": 2.816465712385252e-05, + "loss_sod": 0.1331070512533188, + "loss_total": 0.3430539071559906, + "step": 231699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.167065620422363, + "loss_rtd": 0.2097914069890976, + "loss_sent": 0.07106874883174896, + "loss_sod": 0.07669724524021149, + "loss_total": 0.35755741596221924, + "step": 231699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.8033815026283264, + "learning_rate": 2.59125943760689e-05, + "loss": 0.4508, + "step": 231700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.450754165649414, + "loss_rtd": 0.23916415870189667, + "loss_sent": 0.023154791444540024, + "loss_sod": 0.12429704517126083, + "loss_total": 0.3866159915924072, + "step": 231799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.072955131530762, + "loss_rtd": 0.20164482295513153, + "loss_sent": 0.02033694088459015, + "loss_sod": 0.06791871786117554, + "loss_total": 0.2899004817008972, + "step": 231799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.8766674995422363, + "learning_rate": 2.5884791065160495e-05, + "loss": 0.4577, + "step": 231800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.317257881164551, + "loss_rtd": 0.1965210884809494, + "loss_sent": 0.05714843422174454, + "loss_sod": 0.026518816128373146, + "loss_total": 0.28018835186958313, + "step": 231899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.720180511474609, + "loss_rtd": 0.23176634311676025, + "loss_sent": 0.29128673672676086, + "loss_sod": 0.07481521368026733, + "loss_total": 0.5978683233261108, + "step": 231899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.9235107898712158, + "learning_rate": 2.5856997467855364e-05, + "loss": 0.456, + "step": 231900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.634468078613281, + "loss_rtd": 0.2609516382217407, + "loss_sent": 0.15540874004364014, + "loss_sod": 0.0736379325389862, + "loss_total": 0.48999831080436707, + "step": 231999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.1410932540893555, + "loss_rtd": 0.23574668169021606, + "loss_sent": 0.0065481094643473625, + "loss_sod": 0.2120421677827835, + "loss_total": 0.454336941242218, + "step": 231999 + }, + { + "epoch": 0.016, + "grad_norm": 1.40248703956604, + "learning_rate": 2.5829213595348768e-05, + "loss": 0.4481, + "step": 232000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4332537055015564, + "eval_runtime": 151.7164, + "eval_samples_per_second": 101.789, + "eval_steps_per_second": 0.798, + "step": 232000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.714728355407715, + "loss_rtd": 0.2596420645713806, + "loss_sent": 0.21808409690856934, + "loss_sod": 0.035067614167928696, + "loss_total": 0.512793779373169, + "step": 232099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.517312526702881, + "loss_rtd": 0.2269272357225418, + "loss_sent": 0.0748443752527237, + "loss_sod": 0.11009599268436432, + "loss_total": 0.411867618560791, + "step": 232099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.9759652614593506, + "learning_rate": 2.5801439458832066e-05, + "loss": 0.4423, + "step": 232100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.762374401092529, + "loss_rtd": 0.23316055536270142, + "loss_sent": 0.2530592978000641, + "loss_sod": 0.08535408973693848, + "loss_total": 0.5715739727020264, + "step": 232199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.8840532302856445, + "loss_rtd": 0.19623419642448425, + "loss_sent": 0.004442990757524967, + "loss_sod": 0.024791399016976357, + "loss_total": 0.22546859085559845, + "step": 232199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.9774951338768005, + "learning_rate": 2.577367506949263e-05, + "loss": 0.4472, + "step": 232200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.8422651290893555, + "loss_rtd": 0.21944259107112885, + "loss_sent": 0.32135623693466187, + "loss_sod": 0.09976760298013687, + "loss_total": 0.6405664682388306, + "step": 232299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.7531890869140625, + "loss_rtd": 0.23951628804206848, + "loss_sent": 0.1844344586133957, + "loss_sod": 0.04384145885705948, + "loss_total": 0.46779221296310425, + "step": 232299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.98200523853302, + "learning_rate": 2.5745920438513983e-05, + "loss": 0.458, + "step": 232300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.926414489746094, + "loss_rtd": 0.22466085851192474, + "loss_sent": 0.47860443592071533, + "loss_sod": 0.0931425467133522, + "loss_total": 0.7964078187942505, + "step": 232399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.955382823944092, + "loss_rtd": 0.25543341040611267, + "loss_sent": 0.13194069266319275, + "loss_sod": 0.060727186501026154, + "loss_total": 0.448101282119751, + "step": 232399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.3394931554794312, + "learning_rate": 2.571817557707569e-05, + "loss": 0.4455, + "step": 232400 + }, + { + "epoch": 0.016998, + "loss_gen": 6.0583977699279785, + "loss_rtd": 0.23822690546512604, + "loss_sent": 0.16945557296276093, + "loss_sod": 0.18277540802955627, + "loss_total": 0.5904579162597656, + "step": 232499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.711584568023682, + "loss_rtd": 0.22483934462070465, + "loss_sent": 0.13381531834602356, + "loss_sod": 0.04649697244167328, + "loss_total": 0.4051516354084015, + "step": 232499 + }, + { + "epoch": 0.017, + "grad_norm": 0.924994945526123, + "learning_rate": 2.569044049635338e-05, + "loss": 0.4808, + "step": 232500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.650406360626221, + "loss_rtd": 0.23745349049568176, + "loss_sent": 0.3752727508544922, + "loss_sod": 0.034239865839481354, + "loss_total": 0.6469660997390747, + "step": 232599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.752997398376465, + "loss_rtd": 0.22762267291545868, + "loss_sent": 0.49283188581466675, + "loss_sod": 0.007687894627451897, + "loss_total": 0.7281424403190613, + "step": 232599 + }, + { + "epoch": 0.0172, + "grad_norm": 2.121411085128784, + "learning_rate": 2.5662715207518717e-05, + "loss": 0.4599, + "step": 232600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.526933193206787, + "loss_rtd": 0.21745210886001587, + "loss_sent": 0.17164161801338196, + "loss_sod": 0.057460881769657135, + "loss_total": 0.44655460119247437, + "step": 232699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.517461776733398, + "loss_rtd": 0.25354766845703125, + "loss_sent": 0.11019515246152878, + "loss_sod": 0.06326562166213989, + "loss_total": 0.4270084500312805, + "step": 232699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.162143349647522, + "learning_rate": 2.563499972173945e-05, + "loss": 0.455, + "step": 232700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.907709121704102, + "loss_rtd": 0.23863568902015686, + "loss_sent": 0.22896018624305725, + "loss_sod": 0.028900478035211563, + "loss_total": 0.4964963495731354, + "step": 232799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.922534465789795, + "loss_rtd": 0.23425829410552979, + "loss_sent": 0.0690067708492279, + "loss_sod": 0.10086705535650253, + "loss_total": 0.4041321277618408, + "step": 232799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.8419181704521179, + "learning_rate": 2.560729405017941e-05, + "loss": 0.4515, + "step": 232800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.123891353607178, + "loss_rtd": 0.22470834851264954, + "loss_sent": 0.30141136050224304, + "loss_sod": 0.015585193410515785, + "loss_total": 0.5417048931121826, + "step": 232899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.59957218170166, + "loss_rtd": 0.21619682013988495, + "loss_sent": 0.12326356768608093, + "loss_sod": 0.05799437686800957, + "loss_total": 0.39745476841926575, + "step": 232899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.230122447013855, + "learning_rate": 2.5579598203998388e-05, + "loss": 0.4716, + "step": 232900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.746820449829102, + "loss_rtd": 0.24394603073596954, + "loss_sent": 0.24958030879497528, + "loss_sod": 0.10353004187345505, + "loss_total": 0.5970563888549805, + "step": 232999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.907955646514893, + "loss_rtd": 0.22782622277736664, + "loss_sent": 0.3023303747177124, + "loss_sod": 0.046588096767663956, + "loss_total": 0.5767446756362915, + "step": 232999 + }, + { + "epoch": 0.018, + "grad_norm": 1.1948094367980957, + "learning_rate": 2.5551912194352284e-05, + "loss": 0.4554, + "step": 233000 + }, + { + "epoch": 0.018, + "eval_loss": 0.4314315617084503, + "eval_runtime": 151.2764, + "eval_samples_per_second": 102.085, + "eval_steps_per_second": 0.8, + "step": 233000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.260778903961182, + "loss_rtd": 0.22205084562301636, + "loss_sent": 3.8594189391005784e-05, + "loss_sod": 0.0917004719376564, + "loss_total": 0.31378990411758423, + "step": 233099 + }, + { + "epoch": 0.018198, + "loss_gen": 4.854741096496582, + "loss_rtd": 0.18209616839885712, + "loss_sent": 0.03757604956626892, + "loss_sod": 0.07991458475589752, + "loss_total": 0.29958680272102356, + "step": 233099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.9120252728462219, + "learning_rate": 2.5524236032393027e-05, + "loss": 0.4404, + "step": 233100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.615264892578125, + "loss_rtd": 0.21891269087791443, + "loss_sent": 0.30986180901527405, + "loss_sod": 0.010211745277047157, + "loss_total": 0.5389862656593323, + "step": 233199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.439765453338623, + "loss_rtd": 0.233239084482193, + "loss_sent": 0.2163098156452179, + "loss_sod": 0.014463482424616814, + "loss_total": 0.46401238441467285, + "step": 233199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.9287422895431519, + "learning_rate": 2.5496569729268592e-05, + "loss": 0.4458, + "step": 233200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.251770496368408, + "loss_rtd": 0.21084089577198029, + "loss_sent": 0.053106699138879776, + "loss_sod": 0.09045236557722092, + "loss_total": 0.3543999493122101, + "step": 233299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.034013271331787, + "loss_rtd": 0.20131589472293854, + "loss_sent": 0.05645310506224632, + "loss_sod": 0.009659279137849808, + "loss_total": 0.26742827892303467, + "step": 233299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.6519246697425842, + "learning_rate": 2.546891329612292e-05, + "loss": 0.4481, + "step": 233300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.29550313949585, + "loss_rtd": 0.23290880024433136, + "loss_sent": 0.10205116868019104, + "loss_sod": 0.03483670949935913, + "loss_total": 0.36979666352272034, + "step": 233399 + }, + { + "epoch": 0.018798, + "loss_gen": 6.386826515197754, + "loss_rtd": 0.2479952573776245, + "loss_sent": 0.06440846621990204, + "loss_sod": 0.14350193738937378, + "loss_total": 0.4559056758880615, + "step": 233399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.9469907879829407, + "learning_rate": 2.544126674409604e-05, + "loss": 0.4541, + "step": 233400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.965794086456299, + "loss_rtd": 0.23737753927707672, + "loss_sent": 0.15652404725551605, + "loss_sod": 0.06936980783939362, + "loss_total": 0.4632713794708252, + "step": 233499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.870352268218994, + "loss_rtd": 0.22798781096935272, + "loss_sent": 0.24696271121501923, + "loss_sod": 0.043010540306568146, + "loss_total": 0.5179610848426819, + "step": 233499 + }, + { + "epoch": 0.019, + "grad_norm": 1.350039005279541, + "learning_rate": 2.541363008432397e-05, + "loss": 0.4394, + "step": 233500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.645421981811523, + "loss_rtd": 0.2259354293346405, + "loss_sent": 0.1398894488811493, + "loss_sod": 0.12675334513187408, + "loss_total": 0.4925782382488251, + "step": 233599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.55348539352417, + "loss_rtd": 0.24399907886981964, + "loss_sent": 0.25538408756256104, + "loss_sod": 0.10012035816907883, + "loss_total": 0.5995035171508789, + "step": 233599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.2693923711776733, + "learning_rate": 2.538600332793879e-05, + "loss": 0.4603, + "step": 233600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.721219062805176, + "loss_rtd": 0.2351340800523758, + "loss_sent": 0.32560208439826965, + "loss_sod": 0.057719096541404724, + "loss_total": 0.6184552907943726, + "step": 233699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.946829795837402, + "loss_rtd": 0.17942406237125397, + "loss_sent": 0.024305157363414764, + "loss_sod": 0.06219222769141197, + "loss_total": 0.2659214437007904, + "step": 233699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.2101151943206787, + "learning_rate": 2.5358386486068498e-05, + "loss": 0.4592, + "step": 233700 + }, + { + "epoch": 0.019598, + "loss_gen": 6.033141136169434, + "loss_rtd": 0.25547918677330017, + "loss_sent": 0.20957908034324646, + "loss_sod": 0.17501670122146606, + "loss_total": 0.6400749683380127, + "step": 233799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.559689044952393, + "loss_rtd": 0.23313714563846588, + "loss_sent": 0.11533534526824951, + "loss_sod": 0.1217268705368042, + "loss_total": 0.4701993465423584, + "step": 233799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.3568018674850464, + "learning_rate": 2.5330779569837194e-05, + "loss": 0.4377, + "step": 233800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.204100608825684, + "loss_rtd": 0.2192050963640213, + "loss_sent": 0.018752919510006905, + "loss_sod": 0.09112323075532913, + "loss_total": 0.329081267118454, + "step": 233899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.559121608734131, + "loss_rtd": 0.23450756072998047, + "loss_sent": 0.13573385775089264, + "loss_sod": 0.04589318856596947, + "loss_total": 0.4161345958709717, + "step": 233899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.0575929880142212, + "learning_rate": 2.5303182590364914e-05, + "loss": 0.4641, + "step": 233900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.20557165145874, + "loss_rtd": 0.1950269639492035, + "loss_sent": 0.02659563161432743, + "loss_sod": 0.04482380300760269, + "loss_total": 0.26644638180732727, + "step": 233999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.707209587097168, + "loss_rtd": 0.23177239298820496, + "loss_sent": 0.27904412150382996, + "loss_sod": 0.10986566543579102, + "loss_total": 0.6206821799278259, + "step": 233999 + }, + { + "epoch": 0.02, + "grad_norm": 1.2919104099273682, + "learning_rate": 2.5275595558767763e-05, + "loss": 0.4631, + "step": 234000 + }, + { + "epoch": 0.02, + "eval_loss": 0.43134596943855286, + "eval_runtime": 151.1975, + "eval_samples_per_second": 102.138, + "eval_steps_per_second": 0.8, + "step": 234000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.622669696807861, + "loss_rtd": 0.23707415163516998, + "loss_sent": 0.09233508259057999, + "loss_sod": 0.0379769429564476, + "loss_total": 0.3673861622810364, + "step": 234099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.688944339752197, + "loss_rtd": 0.23022818565368652, + "loss_sent": 0.6720576286315918, + "loss_sod": 0.1273057758808136, + "loss_total": 1.0295915603637695, + "step": 234099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.4382449388504028, + "learning_rate": 2.5248018486157744e-05, + "loss": 0.4499, + "step": 234100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.51674747467041, + "loss_rtd": 0.2293349653482437, + "loss_sent": 0.1584416925907135, + "loss_sod": 0.023466818034648895, + "loss_total": 0.4112434685230255, + "step": 234199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.76281213760376, + "loss_rtd": 0.23749059438705444, + "loss_sent": 0.15755978226661682, + "loss_sod": 0.0425865575671196, + "loss_total": 0.43763694167137146, + "step": 234199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.8715875148773193, + "learning_rate": 2.522045138364292e-05, + "loss": 0.4713, + "step": 234200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.291765213012695, + "loss_rtd": 0.20291708409786224, + "loss_sent": 0.009411037899553776, + "loss_sod": 0.07422708719968796, + "loss_total": 0.2865552008152008, + "step": 234299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.929440498352051, + "loss_rtd": 0.23512855172157288, + "loss_sent": 0.13337145745754242, + "loss_sod": 0.03451795130968094, + "loss_total": 0.40301793813705444, + "step": 234299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.7912243008613586, + "learning_rate": 2.5192894262327314e-05, + "loss": 0.4739, + "step": 234300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.522777557373047, + "loss_rtd": 0.24809530377388, + "loss_sent": 0.05782622843980789, + "loss_sod": 0.023543953895568848, + "loss_total": 0.32946547865867615, + "step": 234399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.438991546630859, + "loss_rtd": 0.22391508519649506, + "loss_sent": 0.08650373667478561, + "loss_sod": 0.07383397966623306, + "loss_total": 0.3842528164386749, + "step": 234399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.7679288983345032, + "learning_rate": 2.5165347133310948e-05, + "loss": 0.4711, + "step": 234400 + }, + { + "epoch": 0.020998, + "loss_gen": 6.136439323425293, + "loss_rtd": 0.2450696974992752, + "loss_sent": 0.12871153652668, + "loss_sod": 0.04998399317264557, + "loss_total": 0.42376524209976196, + "step": 234499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.780904769897461, + "loss_rtd": 0.24080272018909454, + "loss_sent": 0.11224455386400223, + "loss_sod": 0.014613127335906029, + "loss_total": 0.36766040325164795, + "step": 234499 + }, + { + "epoch": 0.021, + "grad_norm": 0.852159857749939, + "learning_rate": 2.513781000768977e-05, + "loss": 0.4631, + "step": 234500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.776484489440918, + "loss_rtd": 0.2173469066619873, + "loss_sent": 0.014870740473270416, + "loss_sod": 0.15193238854408264, + "loss_total": 0.38415002822875977, + "step": 234599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.964664936065674, + "loss_rtd": 0.19436417520046234, + "loss_sent": 2.915369623224251e-05, + "loss_sod": 0.0699116662144661, + "loss_total": 0.2643049955368042, + "step": 234599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.8685846328735352, + "learning_rate": 2.5110282896555748e-05, + "loss": 0.4455, + "step": 234600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.800841808319092, + "loss_rtd": 0.2289271503686905, + "loss_sent": 0.009817521087825298, + "loss_sod": 0.09922560304403305, + "loss_total": 0.3379702568054199, + "step": 234699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.550946235656738, + "loss_rtd": 0.2376554161310196, + "loss_sent": 0.239140585064888, + "loss_sod": 0.03818279504776001, + "loss_total": 0.5149787664413452, + "step": 234699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.8645015954971313, + "learning_rate": 2.5082765810996822e-05, + "loss": 0.467, + "step": 234700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.73333740234375, + "loss_rtd": 0.2413206696510315, + "loss_sent": 0.2613426446914673, + "loss_sod": 0.044991590082645416, + "loss_total": 0.5476548671722412, + "step": 234799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.499999523162842, + "loss_rtd": 0.23359303176403046, + "loss_sent": 0.038986802101135254, + "loss_sod": 0.027301784604787827, + "loss_total": 0.29988160729408264, + "step": 234799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.6654761433601379, + "learning_rate": 2.5055258762096822e-05, + "loss": 0.4473, + "step": 234800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.706062316894531, + "loss_rtd": 0.2492617815732956, + "loss_sent": 0.08323502540588379, + "loss_sod": 0.01766759529709816, + "loss_total": 0.35016441345214844, + "step": 234899 + }, + { + "epoch": 0.021798, + "loss_gen": 6.086641788482666, + "loss_rtd": 0.2328205555677414, + "loss_sent": 0.15146449208259583, + "loss_sod": 0.01626696065068245, + "loss_total": 0.40055200457572937, + "step": 234899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.5309454202651978, + "learning_rate": 2.5027761760935614e-05, + "loss": 0.4512, + "step": 234900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.860592365264893, + "loss_rtd": 0.19987404346466064, + "loss_sent": 0.00031333829974755645, + "loss_sod": 0.06769369542598724, + "loss_total": 0.2678810954093933, + "step": 234999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.4687676429748535, + "loss_rtd": 0.2330511510372162, + "loss_sent": 0.13229608535766602, + "loss_sod": 0.05977775901556015, + "loss_total": 0.42512500286102295, + "step": 234999 + }, + { + "epoch": 0.022, + "grad_norm": 0.8150344491004944, + "learning_rate": 2.5000274818588975e-05, + "loss": 0.4579, + "step": 235000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4349405765533447, + "eval_runtime": 151.1347, + "eval_samples_per_second": 102.18, + "eval_steps_per_second": 0.801, + "step": 235000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.707245349884033, + "loss_rtd": 0.226992666721344, + "loss_sent": 0.10455288738012314, + "loss_sod": 0.02676062285900116, + "loss_total": 0.3583061993122101, + "step": 235099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.896261692047119, + "loss_rtd": 0.22141416370868683, + "loss_sent": 0.17445340752601624, + "loss_sod": 0.02341574989259243, + "loss_total": 0.41928333044052124, + "step": 235099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.5898993015289307, + "learning_rate": 2.4972797946128678e-05, + "loss": 0.4608, + "step": 235100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.678945064544678, + "loss_rtd": 0.2522539496421814, + "loss_sent": 0.40994736552238464, + "loss_sod": 0.03658227622509003, + "loss_total": 0.6987836360931396, + "step": 235199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.730838775634766, + "loss_rtd": 0.2313205897808075, + "loss_sent": 0.3588387668132782, + "loss_sod": 0.13311989605426788, + "loss_total": 0.7232792377471924, + "step": 235199 + }, + { + "epoch": 0.0224, + "grad_norm": 2.3968727588653564, + "learning_rate": 2.4945331154622352e-05, + "loss": 0.4369, + "step": 235200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.283631801605225, + "loss_rtd": 0.19868652522563934, + "loss_sent": 0.021283335983753204, + "loss_sod": 0.06252605468034744, + "loss_total": 0.28249591588974, + "step": 235299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.268820762634277, + "loss_rtd": 0.20874838531017303, + "loss_sent": 0.05531521514058113, + "loss_sod": 0.09486284852027893, + "loss_total": 0.3589264452457428, + "step": 235299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.848966658115387, + "learning_rate": 2.4917874455133638e-05, + "loss": 0.4586, + "step": 235300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.539623260498047, + "loss_rtd": 0.23312102258205414, + "loss_sent": 0.2591060996055603, + "loss_sod": 0.0032930118031799793, + "loss_total": 0.49552011489868164, + "step": 235399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.957262992858887, + "loss_rtd": 0.23715601861476898, + "loss_sent": 0.03702971339225769, + "loss_sod": 0.024175569415092468, + "loss_total": 0.29836130142211914, + "step": 235399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.5907749533653259, + "learning_rate": 2.489042785872212e-05, + "loss": 0.4454, + "step": 235400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.736026763916016, + "loss_rtd": 0.24415920674800873, + "loss_sent": 0.17802608013153076, + "loss_sod": 0.01555958017706871, + "loss_total": 0.4377448558807373, + "step": 235499 + }, + { + "epoch": 0.022998, + "loss_gen": 6.277961254119873, + "loss_rtd": 0.2379772812128067, + "loss_sent": 0.22978819906711578, + "loss_sod": 0.16743631660938263, + "loss_total": 0.6352018117904663, + "step": 235499 + }, + { + "epoch": 0.023, + "grad_norm": 1.01682710647583, + "learning_rate": 2.4862991376443235e-05, + "loss": 0.462, + "step": 235500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.370872497558594, + "loss_rtd": 0.21043279767036438, + "loss_sent": 3.229017966077663e-05, + "loss_sod": 0.18214809894561768, + "loss_total": 0.39261317253112793, + "step": 235599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.350832462310791, + "loss_rtd": 0.18735279142856598, + "loss_sent": 3.1079565815161914e-05, + "loss_sod": 0.10657824575901031, + "loss_total": 0.29396212100982666, + "step": 235599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.0765697956085205, + "learning_rate": 2.4835565019348432e-05, + "loss": 0.4678, + "step": 235600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.570460796356201, + "loss_rtd": 0.23438499867916107, + "loss_sent": 0.13533693552017212, + "loss_sod": 0.05307968705892563, + "loss_total": 0.4228016138076782, + "step": 235699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.606940269470215, + "loss_rtd": 0.23377205431461334, + "loss_sent": 0.007051699794828892, + "loss_sod": 0.14392854273319244, + "loss_total": 0.3847523033618927, + "step": 235699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.1008973121643066, + "learning_rate": 2.480814879848502e-05, + "loss": 0.4452, + "step": 235700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.1537065505981445, + "loss_rtd": 0.23854967951774597, + "loss_sent": 0.04164435714483261, + "loss_sod": 0.19953332841396332, + "loss_total": 0.4797273576259613, + "step": 235799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.616333961486816, + "loss_rtd": 0.21906889975070953, + "loss_sent": 0.19090819358825684, + "loss_sod": 0.034674208611249924, + "loss_total": 0.4446513056755066, + "step": 235799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.268100380897522, + "learning_rate": 2.478074272489625e-05, + "loss": 0.4483, + "step": 235800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.753663539886475, + "loss_rtd": 0.2341436892747879, + "loss_sent": 0.09450148046016693, + "loss_sod": 0.011313949711620808, + "loss_total": 0.33995911478996277, + "step": 235899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.697127819061279, + "loss_rtd": 0.2478364259004593, + "loss_sent": 0.10682282596826553, + "loss_sod": 0.043401964008808136, + "loss_total": 0.39806121587753296, + "step": 235899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.7656726241111755, + "learning_rate": 2.475334680962132e-05, + "loss": 0.4711, + "step": 235900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.891186237335205, + "loss_rtd": 0.25196000933647156, + "loss_sent": 0.18737146258354187, + "loss_sod": 0.025355849415063858, + "loss_total": 0.464687317609787, + "step": 235999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.931027412414551, + "loss_rtd": 0.22270405292510986, + "loss_sent": 0.30568718910217285, + "loss_sod": 0.012507490813732147, + "loss_total": 0.5408987402915955, + "step": 235999 + }, + { + "epoch": 0.024, + "grad_norm": 0.8812655210494995, + "learning_rate": 2.472596106369525e-05, + "loss": 0.4556, + "step": 236000 + }, + { + "epoch": 0.024, + "eval_loss": 0.43395423889160156, + "eval_runtime": 151.4616, + "eval_samples_per_second": 101.96, + "eval_steps_per_second": 0.799, + "step": 236000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.555566310882568, + "loss_rtd": 0.2515144646167755, + "loss_sent": 0.10076847672462463, + "loss_sod": 0.08022458106279373, + "loss_total": 0.4325075149536133, + "step": 236099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.315033912658691, + "loss_rtd": 0.23150426149368286, + "loss_sent": 0.11643737554550171, + "loss_sod": 0.03707926720380783, + "loss_total": 0.385020911693573, + "step": 236099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.7560243010520935, + "learning_rate": 2.469858549814905e-05, + "loss": 0.4497, + "step": 236100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.5409674644470215, + "loss_rtd": 0.20531043410301208, + "loss_sent": 0.053728409111499786, + "loss_sod": 0.028350792825222015, + "loss_total": 0.2873896360397339, + "step": 236199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.594228744506836, + "loss_rtd": 0.2509137690067291, + "loss_sent": 0.1735769659280777, + "loss_sod": 0.06944534927606583, + "loss_total": 0.49393606185913086, + "step": 236199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.7966400980949402, + "learning_rate": 2.467122012400958e-05, + "loss": 0.4455, + "step": 236200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.3407883644104, + "loss_rtd": 0.2251061350107193, + "loss_sent": 0.0012118516024202108, + "loss_sod": 0.18864163756370544, + "loss_total": 0.4149596393108368, + "step": 236299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.124298095703125, + "loss_rtd": 0.20701512694358826, + "loss_sent": 0.024271734058856964, + "loss_sod": 0.11601647734642029, + "loss_total": 0.3473033308982849, + "step": 236299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.288283109664917, + "learning_rate": 2.464386495229964e-05, + "loss": 0.4338, + "step": 236300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.575831413269043, + "loss_rtd": 0.23613017797470093, + "loss_sent": 0.08976863324642181, + "loss_sod": 0.008835520595312119, + "loss_total": 0.33473432064056396, + "step": 236399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.69340705871582, + "loss_rtd": 0.23825500905513763, + "loss_sent": 0.04495564475655556, + "loss_sod": 0.12028937041759491, + "loss_total": 0.4035000205039978, + "step": 236399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.7958593368530273, + "learning_rate": 2.461651999403784e-05, + "loss": 0.4593, + "step": 236400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.22304105758667, + "loss_rtd": 0.20367762446403503, + "loss_sent": 4.95214517286513e-05, + "loss_sod": 0.12968656420707703, + "loss_total": 0.3334137201309204, + "step": 236499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.314144611358643, + "loss_rtd": 0.2104502022266388, + "loss_sent": 0.02240786887705326, + "loss_sod": 0.15137213468551636, + "loss_total": 0.38423019647598267, + "step": 236499 + }, + { + "epoch": 0.025, + "grad_norm": 1.090668797492981, + "learning_rate": 2.458918526023875e-05, + "loss": 0.4823, + "step": 236500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.5973219871521, + "loss_rtd": 0.2358320951461792, + "loss_sent": 0.27655020356178284, + "loss_sod": 0.02151513658463955, + "loss_total": 0.5338973999023438, + "step": 236599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.743614196777344, + "loss_rtd": 0.23042291402816772, + "loss_sent": 0.07522018998861313, + "loss_sod": 0.003805323503911495, + "loss_total": 0.3094484210014343, + "step": 236599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.8916210532188416, + "learning_rate": 2.4561860761912804e-05, + "loss": 0.4426, + "step": 236600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.573662757873535, + "loss_rtd": 0.24415111541748047, + "loss_sent": 0.1414310783147812, + "loss_sod": 0.08143579959869385, + "loss_total": 0.4670180082321167, + "step": 236699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.633249282836914, + "loss_rtd": 0.21293377876281738, + "loss_sent": 0.12949199974536896, + "loss_sod": 0.025542840361595154, + "loss_total": 0.3679686188697815, + "step": 236699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.5825505256652832, + "learning_rate": 2.4534546510066313e-05, + "loss": 0.445, + "step": 236700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.501196384429932, + "loss_rtd": 0.24929967522621155, + "loss_sent": 0.15651912987232208, + "loss_sod": 0.042097192257642746, + "loss_total": 0.4479159712791443, + "step": 236799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.555871486663818, + "loss_rtd": 0.23386947810649872, + "loss_sent": 0.12060832977294922, + "loss_sod": 0.020886603742837906, + "loss_total": 0.37536442279815674, + "step": 236799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.7640842795372009, + "learning_rate": 2.4507242515701427e-05, + "loss": 0.4627, + "step": 236800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.898460388183594, + "loss_rtd": 0.2226713001728058, + "loss_sent": 0.18890473246574402, + "loss_sod": 0.05953853949904442, + "loss_total": 0.4711145758628845, + "step": 236899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.6854963302612305, + "loss_rtd": 0.20722654461860657, + "loss_sent": 0.6310082674026489, + "loss_sod": 0.04074262082576752, + "loss_total": 0.8789774179458618, + "step": 236899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.4738171100616455, + "learning_rate": 2.4479948789816204e-05, + "loss": 0.4917, + "step": 236900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.284900665283203, + "loss_rtd": 0.21622498333454132, + "loss_sent": 2.8011925678583793e-05, + "loss_sod": 0.06487318128347397, + "loss_total": 0.2811261713504791, + "step": 236999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.151595592498779, + "loss_rtd": 0.1989092230796814, + "loss_sent": 3.269418084528297e-05, + "loss_sod": 0.15551862120628357, + "loss_total": 0.35446053743362427, + "step": 236999 + }, + { + "epoch": 0.026, + "grad_norm": 0.8244456648826599, + "learning_rate": 2.4452665343404563e-05, + "loss": 0.4576, + "step": 237000 + }, + { + "epoch": 0.026, + "eval_loss": 0.42557063698768616, + "eval_runtime": 151.1388, + "eval_samples_per_second": 102.178, + "eval_steps_per_second": 0.801, + "step": 237000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.628950119018555, + "loss_rtd": 0.24218463897705078, + "loss_sent": 0.6823144555091858, + "loss_sod": 0.02316117286682129, + "loss_total": 0.9476602673530579, + "step": 237099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.59042501449585, + "loss_rtd": 0.24450884759426117, + "loss_sent": 0.129317507147789, + "loss_sod": 0.012200526893138885, + "loss_total": 0.38602685928344727, + "step": 237099 + }, + { + "epoch": 0.0262, + "grad_norm": 3.2174065113067627, + "learning_rate": 2.4425392187456282e-05, + "loss": 0.4537, + "step": 237100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.622735500335693, + "loss_rtd": 0.2481285035610199, + "loss_sent": 0.0748935118317604, + "loss_sod": 0.05360139533877373, + "loss_total": 0.37662339210510254, + "step": 237199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.744678974151611, + "loss_rtd": 0.2219824492931366, + "loss_sent": 0.28047963976860046, + "loss_sod": 0.012201274745166302, + "loss_total": 0.5146633386611938, + "step": 237199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.0077773332595825, + "learning_rate": 2.4398129332956948e-05, + "loss": 0.4746, + "step": 237200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.693736553192139, + "loss_rtd": 0.23063986003398895, + "loss_sent": 0.39893093705177307, + "loss_sod": 0.042699843645095825, + "loss_total": 0.672270655632019, + "step": 237299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.515918731689453, + "loss_rtd": 0.23402729630470276, + "loss_sent": 0.05968880280852318, + "loss_sod": 0.05745239183306694, + "loss_total": 0.3511684834957123, + "step": 237299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.8940922021865845, + "learning_rate": 2.4370876790888058e-05, + "loss": 0.4293, + "step": 237300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.96881628036499, + "loss_rtd": 0.22738666832447052, + "loss_sent": 0.028789600357413292, + "loss_sod": 0.045895736664533615, + "loss_total": 0.30207201838493347, + "step": 237399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.594783782958984, + "loss_rtd": 0.21820645034313202, + "loss_sent": 0.19008494913578033, + "loss_sod": 0.011797965504229069, + "loss_total": 0.42008936405181885, + "step": 237399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.6380093097686768, + "learning_rate": 2.4343634572226953e-05, + "loss": 0.4538, + "step": 237400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.770845890045166, + "loss_rtd": 0.243596151471138, + "loss_sent": 0.12701734900474548, + "loss_sod": 0.15806862711906433, + "loss_total": 0.5286821126937866, + "step": 237499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.631127834320068, + "loss_rtd": 0.22995781898498535, + "loss_sent": 0.13623708486557007, + "loss_sod": 0.013269779272377491, + "loss_total": 0.37946468591690063, + "step": 237499 + }, + { + "epoch": 0.027, + "grad_norm": 1.062378168106079, + "learning_rate": 2.4316402687946755e-05, + "loss": 0.4411, + "step": 237500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.318966388702393, + "loss_rtd": 0.2044476717710495, + "loss_sent": 0.027167636901140213, + "loss_sod": 0.1754985898733139, + "loss_total": 0.4071139097213745, + "step": 237599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.660177230834961, + "loss_rtd": 0.2145908623933792, + "loss_sent": 0.19793200492858887, + "loss_sod": 0.050246983766555786, + "loss_total": 0.46276986598968506, + "step": 237599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.5831185579299927, + "learning_rate": 2.4289181149016483e-05, + "loss": 0.4455, + "step": 237600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.6185126304626465, + "loss_rtd": 0.19363583624362946, + "loss_sent": 0.09224008023738861, + "loss_sod": 0.01841435208916664, + "loss_total": 0.3042902648448944, + "step": 237699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.913099765777588, + "loss_rtd": 0.23436789214611053, + "loss_sent": 0.3045767843723297, + "loss_sod": 0.06714023649692535, + "loss_total": 0.606084942817688, + "step": 237699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.714023768901825, + "learning_rate": 2.426196996640097e-05, + "loss": 0.4609, + "step": 237700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.34343147277832, + "loss_rtd": 0.1957351565361023, + "loss_sent": 0.016168508678674698, + "loss_sod": 0.05185885727405548, + "loss_total": 0.26376253366470337, + "step": 237799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.109145164489746, + "loss_rtd": 0.20797386765480042, + "loss_sent": 0.003358804387971759, + "loss_sod": 0.15427103638648987, + "loss_total": 0.3656037151813507, + "step": 237799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8577890992164612, + "learning_rate": 2.423476915106089e-05, + "loss": 0.4653, + "step": 237800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.908792018890381, + "loss_rtd": 0.2229354977607727, + "loss_sent": 0.1619836539030075, + "loss_sod": 0.019400130957365036, + "loss_total": 0.40431928634643555, + "step": 237899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.720446586608887, + "loss_rtd": 0.2260797917842865, + "loss_sent": 0.09854258596897125, + "loss_sod": 0.11681941896677017, + "loss_total": 0.44144177436828613, + "step": 237899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.316367268562317, + "learning_rate": 2.4207578713952706e-05, + "loss": 0.4573, + "step": 237900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.694923400878906, + "loss_rtd": 0.24636176228523254, + "loss_sent": 0.20768165588378906, + "loss_sod": 0.015640396624803543, + "loss_total": 0.46968382596969604, + "step": 237999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.449057579040527, + "loss_rtd": 0.24466529488563538, + "loss_sent": 0.3436650037765503, + "loss_sod": 0.028440985828638077, + "loss_total": 0.6167712807655334, + "step": 237999 + }, + { + "epoch": 0.028, + "grad_norm": 0.8595840930938721, + "learning_rate": 2.4180398666028726e-05, + "loss": 0.4659, + "step": 238000 + }, + { + "epoch": 0.028, + "eval_loss": 0.4316830039024353, + "eval_runtime": 151.3884, + "eval_samples_per_second": 102.009, + "eval_steps_per_second": 0.799, + "step": 238000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.5816473960876465, + "loss_rtd": 0.23535099625587463, + "loss_sent": 0.05623890459537506, + "loss_sod": 0.11616479605436325, + "loss_total": 0.40775471925735474, + "step": 238099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.348033905029297, + "loss_rtd": 0.21179944276809692, + "loss_sent": 3.586645470932126e-05, + "loss_sod": 0.2043694257736206, + "loss_total": 0.4162047207355499, + "step": 238099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.2257155179977417, + "learning_rate": 2.4153229018237074e-05, + "loss": 0.4564, + "step": 238100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.727396488189697, + "loss_rtd": 0.2434481382369995, + "loss_sent": 0.32008200883865356, + "loss_sod": 0.028842560946941376, + "loss_total": 0.592372715473175, + "step": 238199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.8877692222595215, + "loss_rtd": 0.23744194209575653, + "loss_sent": 0.24149754643440247, + "loss_sod": 0.17500735819339752, + "loss_total": 0.6539468765258789, + "step": 238199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.5632820129394531, + "learning_rate": 2.41260697815217e-05, + "loss": 0.4441, + "step": 238200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.408656120300293, + "loss_rtd": 0.21569550037384033, + "loss_sent": 0.040420565754175186, + "loss_sod": 0.16368474066257477, + "loss_total": 0.4198007881641388, + "step": 238299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.322138786315918, + "loss_rtd": 0.22217020392417908, + "loss_sent": 0.09246724843978882, + "loss_sod": 0.023493852466344833, + "loss_total": 0.338131308555603, + "step": 238299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.2043564319610596, + "learning_rate": 2.4098920966822307e-05, + "loss": 0.4621, + "step": 238300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.709166526794434, + "loss_rtd": 0.23016920685768127, + "loss_sent": 0.20664529502391815, + "loss_sod": 0.0580989234149456, + "loss_total": 0.49491339921951294, + "step": 238399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.634028911590576, + "loss_rtd": 0.22805720567703247, + "loss_sent": 0.11374135315418243, + "loss_sod": 0.0163797028362751, + "loss_total": 0.3581782579421997, + "step": 238399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.6835154891014099, + "learning_rate": 2.4071782585074455e-05, + "loss": 0.448, + "step": 238400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.901325702667236, + "loss_rtd": 0.2259879857301712, + "loss_sent": 0.4070032238960266, + "loss_sod": 0.11817653477191925, + "loss_total": 0.7511677742004395, + "step": 238499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.890055179595947, + "loss_rtd": 0.24064171314239502, + "loss_sent": 0.08593276143074036, + "loss_sod": 0.1841154545545578, + "loss_total": 0.5106899738311768, + "step": 238499 + }, + { + "epoch": 0.029, + "grad_norm": 2.0252127647399902, + "learning_rate": 2.404465464720947e-05, + "loss": 0.452, + "step": 238500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.779674530029297, + "loss_rtd": 0.2189570665359497, + "loss_sent": 0.410211443901062, + "loss_sod": 0.04524721950292587, + "loss_total": 0.6744157075881958, + "step": 238599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.71017599105835, + "loss_rtd": 0.22937899827957153, + "loss_sent": 0.3298669755458832, + "loss_sod": 0.09960901737213135, + "loss_total": 0.6588549613952637, + "step": 238599 + }, + { + "epoch": 0.0292, + "grad_norm": 3.146510124206543, + "learning_rate": 2.4017537164154514e-05, + "loss": 0.4532, + "step": 238600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.4319281578063965, + "loss_rtd": 0.21059221029281616, + "loss_sent": 0.09813720732927322, + "loss_sod": 0.03410068154335022, + "loss_total": 0.342830091714859, + "step": 238699 + }, + { + "epoch": 0.029398, + "loss_gen": 6.038654327392578, + "loss_rtd": 0.22054323554039001, + "loss_sent": 0.17490822076797485, + "loss_sod": 0.1179456114768982, + "loss_total": 0.5133970975875854, + "step": 238699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.0059821605682373, + "learning_rate": 2.399043014683246e-05, + "loss": 0.4392, + "step": 238700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.929017066955566, + "loss_rtd": 0.21582269668579102, + "loss_sent": 0.1245991513133049, + "loss_sod": 0.014260164462029934, + "loss_total": 0.35468199849128723, + "step": 238799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.320408821105957, + "loss_rtd": 0.23964263498783112, + "loss_sent": 0.18945762515068054, + "loss_sod": 0.004603613168001175, + "loss_total": 0.43370386958122253, + "step": 238799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.510923147201538, + "learning_rate": 2.396333360616203e-05, + "loss": 0.4559, + "step": 238800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.772350788116455, + "loss_rtd": 0.2370069921016693, + "loss_sent": 0.094133660197258, + "loss_sod": 0.004237771034240723, + "loss_total": 0.33537840843200684, + "step": 238899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.60974645614624, + "loss_rtd": 0.23583225905895233, + "loss_sent": 0.23875966668128967, + "loss_sod": 0.028905808925628662, + "loss_total": 0.5034977197647095, + "step": 238899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.3758431673049927, + "learning_rate": 2.393624755305771e-05, + "loss": 0.4491, + "step": 238900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.795708656311035, + "loss_rtd": 0.22569571435451508, + "loss_sent": 0.1735101342201233, + "loss_sod": 0.009773963131010532, + "loss_total": 0.4089798331260681, + "step": 238999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.156543254852295, + "loss_rtd": 0.22884215414524078, + "loss_sent": 0.06918361783027649, + "loss_sod": 0.06429454684257507, + "loss_total": 0.36232033371925354, + "step": 238999 + }, + { + "epoch": 0.03, + "grad_norm": 0.682130753993988, + "learning_rate": 2.390917199842978e-05, + "loss": 0.4673, + "step": 239000 + }, + { + "epoch": 0.03, + "eval_loss": 0.42803001403808594, + "eval_runtime": 151.3306, + "eval_samples_per_second": 102.048, + "eval_steps_per_second": 0.8, + "step": 239000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.893220901489258, + "loss_rtd": 0.23933978378772736, + "loss_sent": 0.16924291849136353, + "loss_sod": 0.051185209304094315, + "loss_total": 0.4597679078578949, + "step": 239099 + }, + { + "epoch": 0.030198, + "loss_gen": 6.047658920288086, + "loss_rtd": 0.23218569159507751, + "loss_sent": 0.07819077372550964, + "loss_sod": 0.12189389020204544, + "loss_total": 0.432270348072052, + "step": 239099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.4540042877197266, + "learning_rate": 2.388210695318423e-05, + "loss": 0.4578, + "step": 239100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.5395588874816895, + "loss_rtd": 0.22231055796146393, + "loss_sent": 0.19725970923900604, + "loss_sod": 0.05616962909698486, + "loss_total": 0.47573989629745483, + "step": 239199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.67051887512207, + "loss_rtd": 0.23646663129329681, + "loss_sent": 0.06493733823299408, + "loss_sod": 0.006037513259798288, + "loss_total": 0.30744147300720215, + "step": 239199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.253751516342163, + "learning_rate": 2.385505242822288e-05, + "loss": 0.4412, + "step": 239200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.815771579742432, + "loss_rtd": 0.23468433320522308, + "loss_sent": 0.14919671416282654, + "loss_sod": 0.14313432574272156, + "loss_total": 0.5270153284072876, + "step": 239299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.409769535064697, + "loss_rtd": 0.2294633984565735, + "loss_sent": 0.1363288015127182, + "loss_sod": 0.01859310083091259, + "loss_total": 0.38438528776168823, + "step": 239299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.8529370427131653, + "learning_rate": 2.38280084344433e-05, + "loss": 0.4479, + "step": 239300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.452807903289795, + "loss_rtd": 0.20699205994606018, + "loss_sent": 0.5443159937858582, + "loss_sod": 0.0021774633787572384, + "loss_total": 0.7534855008125305, + "step": 239399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.706867694854736, + "loss_rtd": 0.25227901339530945, + "loss_sent": 0.26974064111709595, + "loss_sod": 0.08123065531253815, + "loss_total": 0.6032503247261047, + "step": 239399 + }, + { + "epoch": 0.0308, + "grad_norm": 2.7138097286224365, + "learning_rate": 2.3800974982738773e-05, + "loss": 0.4626, + "step": 239400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.703469276428223, + "loss_rtd": 0.2405497282743454, + "loss_sent": 0.11308270692825317, + "loss_sod": 0.09160737693309784, + "loss_total": 0.4452398121356964, + "step": 239499 + }, + { + "epoch": 0.030998, + "loss_gen": 4.877888202667236, + "loss_rtd": 0.1974320113658905, + "loss_sent": 0.0006430040230043232, + "loss_sod": 0.08650462329387665, + "loss_total": 0.28457963466644287, + "step": 239499 + }, + { + "epoch": 0.031, + "grad_norm": 0.8747193217277527, + "learning_rate": 2.3773952083998392e-05, + "loss": 0.4376, + "step": 239500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.368193626403809, + "loss_rtd": 0.21049441397190094, + "loss_sent": 0.055834781378507614, + "loss_sod": 0.006824597716331482, + "loss_total": 0.27315378189086914, + "step": 239599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.744250297546387, + "loss_rtd": 0.2513865828514099, + "loss_sent": 0.14889192581176758, + "loss_sod": 0.11034315824508667, + "loss_total": 0.5106216669082642, + "step": 239599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.6689745187759399, + "learning_rate": 2.374693974910697e-05, + "loss": 0.4722, + "step": 239600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.915278911590576, + "loss_rtd": 0.2438460886478424, + "loss_sent": 0.19906951487064362, + "loss_sod": 0.010084887966513634, + "loss_total": 0.4530004858970642, + "step": 239699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.869354724884033, + "loss_rtd": 0.23033317923545837, + "loss_sent": 0.09512707591056824, + "loss_sod": 0.13668489456176758, + "loss_total": 0.4621451497077942, + "step": 239699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.1120641231536865, + "learning_rate": 2.3719937988945102e-05, + "loss": 0.4479, + "step": 239700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.956075668334961, + "loss_rtd": 0.24548287689685822, + "loss_sent": 0.21017757058143616, + "loss_sod": 0.01185193657875061, + "loss_total": 0.4675123989582062, + "step": 239799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.417455673217773, + "loss_rtd": 0.22980546951293945, + "loss_sent": 0.1534159779548645, + "loss_sod": 0.01895293965935707, + "loss_total": 0.4021743834018707, + "step": 239799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.22420072555542, + "learning_rate": 2.3692946814389043e-05, + "loss": 0.4707, + "step": 239800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.628900051116943, + "loss_rtd": 0.2454957365989685, + "loss_sent": 0.1552024483680725, + "loss_sod": 0.003533473936840892, + "loss_total": 0.4042316675186157, + "step": 239899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.544217586517334, + "loss_rtd": 0.2314174324274063, + "loss_sent": 0.1561063677072525, + "loss_sod": 0.0257144495844841, + "loss_total": 0.4132382273674011, + "step": 239899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.113447904586792, + "learning_rate": 2.3665966236310873e-05, + "loss": 0.4529, + "step": 239900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.736084938049316, + "loss_rtd": 0.2461896687746048, + "loss_sent": 0.11952412873506546, + "loss_sod": 0.02386702038347721, + "loss_total": 0.3895808160305023, + "step": 239999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.892647743225098, + "loss_rtd": 0.2216283679008484, + "loss_sent": 0.3390713334083557, + "loss_sod": 0.021314382553100586, + "loss_total": 0.5820140838623047, + "step": 239999 + }, + { + "epoch": 0.032, + "grad_norm": 0.8471458554267883, + "learning_rate": 2.3638996265578345e-05, + "loss": 0.4609, + "step": 240000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4270385801792145, + "eval_runtime": 151.4497, + "eval_samples_per_second": 101.968, + "eval_steps_per_second": 0.799, + "step": 240000 + }, + { + "epoch": 0.000198, + "loss_gen": 6.174821853637695, + "loss_rtd": 0.23269867897033691, + "loss_sent": 0.04734470322728157, + "loss_sod": 0.13130277395248413, + "loss_total": 0.4113461673259735, + "step": 240099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.896604537963867, + "loss_rtd": 0.2383555769920349, + "loss_sent": 0.0619867779314518, + "loss_sod": 0.04733799025416374, + "loss_total": 0.34768033027648926, + "step": 240099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.7818575501441956, + "learning_rate": 2.361203691305499e-05, + "loss": 0.4703, + "step": 240100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.213702201843262, + "loss_rtd": 0.19562645256519318, + "loss_sent": 0.09218383580446243, + "loss_sod": 0.12223028391599655, + "loss_total": 0.41004055738449097, + "step": 240199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.583803653717041, + "loss_rtd": 0.22966305911540985, + "loss_sent": 0.19943755865097046, + "loss_sod": 0.010032592341303825, + "loss_total": 0.4391332268714905, + "step": 240199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.1320916414260864, + "learning_rate": 2.358508818959999e-05, + "loss": 0.4418, + "step": 240200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.706082344055176, + "loss_rtd": 0.2443791776895523, + "loss_sent": 0.0584988035261631, + "loss_sod": 0.11845959722995758, + "loss_total": 0.4213375747203827, + "step": 240299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.8394269943237305, + "loss_rtd": 0.22650183737277985, + "loss_sent": 0.4121139347553253, + "loss_sod": 0.1565793752670288, + "loss_total": 0.7951951026916504, + "step": 240299 + }, + { + "epoch": 0.0006, + "grad_norm": 2.023341655731201, + "learning_rate": 2.355815010606831e-05, + "loss": 0.4467, + "step": 240300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.811026573181152, + "loss_rtd": 0.24636968970298767, + "loss_sent": 0.21656563878059387, + "loss_sod": 0.10607089102268219, + "loss_total": 0.5690062046051025, + "step": 240399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.745444297790527, + "loss_rtd": 0.21473713219165802, + "loss_sent": 0.29169970750808716, + "loss_sod": 0.057059645652770996, + "loss_total": 0.563496470451355, + "step": 240399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.0170907974243164, + "learning_rate": 2.353122267331061e-05, + "loss": 0.449, + "step": 240400 + }, + { + "epoch": 0.000998, + "loss_gen": 6.04369592666626, + "loss_rtd": 0.24859841167926788, + "loss_sent": 0.27929022908210754, + "loss_sod": 0.012861400842666626, + "loss_total": 0.5407500267028809, + "step": 240499 + }, + { + "epoch": 0.000998, + "loss_gen": 6.0217509269714355, + "loss_rtd": 0.23393367230892181, + "loss_sent": 0.15887829661369324, + "loss_sod": 0.0831235870718956, + "loss_total": 0.47593554854393005, + "step": 240499 + }, + { + "epoch": 0.001, + "grad_norm": 0.9431577920913696, + "learning_rate": 2.3504305902173262e-05, + "loss": 0.4288, + "step": 240500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.6050124168396, + "loss_rtd": 0.2500074803829193, + "loss_sent": 0.1043907105922699, + "loss_sod": 0.06603623926639557, + "loss_total": 0.42043444514274597, + "step": 240599 + }, + { + "epoch": 0.001198, + "loss_gen": 6.001707553863525, + "loss_rtd": 0.23530079424381256, + "loss_sent": 0.6919125914573669, + "loss_sod": 0.05928611755371094, + "loss_total": 0.9864994883537292, + "step": 240599 + }, + { + "epoch": 0.0012, + "grad_norm": 3.281320095062256, + "learning_rate": 2.3477399803498314e-05, + "loss": 0.4645, + "step": 240600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.266785621643066, + "loss_rtd": 0.20185698568820953, + "loss_sent": 0.037269651889801025, + "loss_sod": 0.01602785289287567, + "loss_total": 0.25515449047088623, + "step": 240699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.644423007965088, + "loss_rtd": 0.24058449268341064, + "loss_sent": 0.09521439671516418, + "loss_sod": 0.0025251905899494886, + "loss_total": 0.33832406997680664, + "step": 240699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.5941767692565918, + "learning_rate": 2.345050438812355e-05, + "loss": 0.4535, + "step": 240700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.874456882476807, + "loss_rtd": 0.22525304555892944, + "loss_sent": 0.2878045439720154, + "loss_sod": 0.09540347754955292, + "loss_total": 0.6084610819816589, + "step": 240799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.894911766052246, + "loss_rtd": 0.22562730312347412, + "loss_sent": 0.3028585612773895, + "loss_sod": 0.020743096247315407, + "loss_total": 0.5492289662361145, + "step": 240799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.1248347759246826, + "learning_rate": 2.342361966688247e-05, + "loss": 0.4589, + "step": 240800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.601938247680664, + "loss_rtd": 0.23857371509075165, + "loss_sent": 0.1302148997783661, + "loss_sod": 0.009533866308629513, + "loss_total": 0.3783224821090698, + "step": 240899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.81471061706543, + "loss_rtd": 0.2276061475276947, + "loss_sent": 0.10715650767087936, + "loss_sod": 0.024203572422266006, + "loss_total": 0.35896623134613037, + "step": 240899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.6274192929267883, + "learning_rate": 2.3396745650604186e-05, + "loss": 0.4586, + "step": 240900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.213769912719727, + "loss_rtd": 0.20258015394210815, + "loss_sent": 0.028924955055117607, + "loss_sod": 0.012560616247355938, + "loss_total": 0.24406573176383972, + "step": 240999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.9080657958984375, + "loss_rtd": 0.23435091972351074, + "loss_sent": 0.07439293712377548, + "loss_sod": 0.03413313999772072, + "loss_total": 0.34287700057029724, + "step": 240999 + }, + { + "epoch": 0.002, + "grad_norm": 0.6711193919181824, + "learning_rate": 2.336988235011357e-05, + "loss": 0.4375, + "step": 241000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4283449351787567, + "eval_runtime": 162.6705, + "eval_samples_per_second": 94.934, + "eval_steps_per_second": 0.744, + "step": 241000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.6881303787231445, + "loss_rtd": 0.22605517506599426, + "loss_sent": 0.39668649435043335, + "loss_sod": 0.09255596995353699, + "loss_total": 0.7152976393699646, + "step": 241099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.743150234222412, + "loss_rtd": 0.22890755534172058, + "loss_sent": 0.4766629636287689, + "loss_sod": 0.08745452016592026, + "loss_total": 0.793025016784668, + "step": 241099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.7100263833999634, + "learning_rate": 2.3343029776231163e-05, + "loss": 0.4559, + "step": 241100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.571623802185059, + "loss_rtd": 0.22446729242801666, + "loss_sent": 0.1801891028881073, + "loss_sod": 0.024750633165240288, + "loss_total": 0.4294070303440094, + "step": 241199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.958050727844238, + "loss_rtd": 0.20256225764751434, + "loss_sent": 0.01209980994462967, + "loss_sod": 0.20996412634849548, + "loss_total": 0.4246261715888977, + "step": 241199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.115786075592041, + "learning_rate": 2.3316187939773192e-05, + "loss": 0.4641, + "step": 241200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.70155668258667, + "loss_rtd": 0.1794353872537613, + "loss_sent": 0.07801483571529388, + "loss_sod": 0.03325256332755089, + "loss_total": 0.29070279002189636, + "step": 241299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.621855735778809, + "loss_rtd": 0.2466362863779068, + "loss_sent": 0.15442420542240143, + "loss_sod": 0.08801429718732834, + "loss_total": 0.4890747666358948, + "step": 241299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.6202239394187927, + "learning_rate": 2.328935685155151e-05, + "loss": 0.4706, + "step": 241300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.332050800323486, + "loss_rtd": 0.18241283297538757, + "loss_sent": 2.6445484763826244e-05, + "loss_sod": 0.050217967480421066, + "loss_total": 0.23265725374221802, + "step": 241399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.652956485748291, + "loss_rtd": 0.18314598500728607, + "loss_sent": 2.7188658350496553e-05, + "loss_sod": 0.2116314172744751, + "loss_total": 0.39480459690093994, + "step": 241399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.1327468156814575, + "learning_rate": 2.326253652237369e-05, + "loss": 0.4472, + "step": 241400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.280188083648682, + "loss_rtd": 0.21391335129737854, + "loss_sent": 0.02823665179312229, + "loss_sod": 0.02851317636668682, + "loss_total": 0.27066317200660706, + "step": 241499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.112308979034424, + "loss_rtd": 0.21453484892845154, + "loss_sent": 5.337989568943158e-05, + "loss_sod": 0.18758845329284668, + "loss_total": 0.4021766781806946, + "step": 241499 + }, + { + "epoch": 0.003, + "grad_norm": 1.0647000074386597, + "learning_rate": 2.3235726963042966e-05, + "loss": 0.4404, + "step": 241500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.487748146057129, + "loss_rtd": 0.2085038721561432, + "loss_sent": 0.09526938945055008, + "loss_sod": 0.0808418020606041, + "loss_total": 0.38461506366729736, + "step": 241599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.592741966247559, + "loss_rtd": 0.243381530046463, + "loss_sent": 0.07295112311840057, + "loss_sod": 0.06469008326530457, + "loss_total": 0.38102275133132935, + "step": 241599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9712144732475281, + "learning_rate": 2.3208928184358236e-05, + "loss": 0.4447, + "step": 241600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.652876377105713, + "loss_rtd": 0.21591812372207642, + "loss_sent": 0.35284423828125, + "loss_sod": 0.020741023123264313, + "loss_total": 0.5895034074783325, + "step": 241699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.119246006011963, + "loss_rtd": 0.20154471695423126, + "loss_sent": 0.07837800681591034, + "loss_sod": 0.021901678293943405, + "loss_total": 0.3018243908882141, + "step": 241699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.6129971742630005, + "learning_rate": 2.3182140197114012e-05, + "loss": 0.4575, + "step": 241700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.8978400230407715, + "loss_rtd": 0.22588253021240234, + "loss_sent": 0.16254116594791412, + "loss_sod": 0.016494540497660637, + "loss_total": 0.40491825342178345, + "step": 241799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.733808517456055, + "loss_rtd": 0.23193776607513428, + "loss_sent": 0.07303871959447861, + "loss_sod": 0.03720690310001373, + "loss_total": 0.342183381319046, + "step": 241799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.6855172514915466, + "learning_rate": 2.3155363012100507e-05, + "loss": 0.4711, + "step": 241800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.7418437004089355, + "loss_rtd": 0.24587838351726532, + "loss_sent": 0.09250931441783905, + "loss_sod": 0.04317227005958557, + "loss_total": 0.38155996799468994, + "step": 241899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.823653221130371, + "loss_rtd": 0.22120451927185059, + "loss_sent": 0.22145628929138184, + "loss_sod": 0.04665272682905197, + "loss_total": 0.489313542842865, + "step": 241899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.5941023230552673, + "learning_rate": 2.3128596640103574e-05, + "loss": 0.4611, + "step": 241900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.7284016609191895, + "loss_rtd": 0.2311260998249054, + "loss_sent": 0.40789496898651123, + "loss_sod": 0.013776625506579876, + "loss_total": 0.6527976989746094, + "step": 241999 + }, + { + "epoch": 0.003998, + "loss_gen": 6.094147682189941, + "loss_rtd": 0.24135150015354156, + "loss_sent": 0.10780978947877884, + "loss_sod": 0.06081417575478554, + "loss_total": 0.40997546911239624, + "step": 241999 + }, + { + "epoch": 0.004, + "grad_norm": 1.6425068378448486, + "learning_rate": 2.310184109190472e-05, + "loss": 0.4361, + "step": 242000 + }, + { + "epoch": 0.004, + "eval_loss": 0.42943087220191956, + "eval_runtime": 151.5831, + "eval_samples_per_second": 101.878, + "eval_steps_per_second": 0.798, + "step": 242000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.632208347320557, + "loss_rtd": 0.2249802052974701, + "loss_sent": 0.4374160170555115, + "loss_sod": 0.023674190044403076, + "loss_total": 0.686070442199707, + "step": 242099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.471686840057373, + "loss_rtd": 0.23182249069213867, + "loss_sent": 0.16629771888256073, + "loss_sod": 0.03183560073375702, + "loss_total": 0.4299558401107788, + "step": 242099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2165805101394653, + "learning_rate": 2.3075096378281036e-05, + "loss": 0.4352, + "step": 242100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.546979904174805, + "loss_rtd": 0.23156070709228516, + "loss_sent": 0.16346849501132965, + "loss_sod": 0.04800717160105705, + "loss_total": 0.44303637742996216, + "step": 242199 + }, + { + "epoch": 0.004398, + "loss_gen": 6.060455322265625, + "loss_rtd": 0.23157013952732086, + "loss_sent": 0.17300662398338318, + "loss_sod": 0.04722176492214203, + "loss_total": 0.45179852843284607, + "step": 242199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.2987682819366455, + "learning_rate": 2.304836251000531e-05, + "loss": 0.4454, + "step": 242200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.9557905197143555, + "loss_rtd": 0.21919752657413483, + "loss_sent": 0.18476250767707825, + "loss_sod": 0.05176394432783127, + "loss_total": 0.45572397112846375, + "step": 242299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.481745719909668, + "loss_rtd": 0.22046954929828644, + "loss_sent": 0.09188221395015717, + "loss_sod": 0.03570885211229324, + "loss_total": 0.34806060791015625, + "step": 242299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.0712467432022095, + "learning_rate": 2.3021639497845966e-05, + "loss": 0.4677, + "step": 242300 + }, + { + "epoch": 0.004798, + "loss_gen": 6.09511137008667, + "loss_rtd": 0.2507239580154419, + "loss_sent": 0.2072407603263855, + "loss_sod": 0.12444519996643066, + "loss_total": 0.5824099183082581, + "step": 242399 + }, + { + "epoch": 0.004798, + "loss_gen": 6.472561359405518, + "loss_rtd": 0.24314600229263306, + "loss_sent": 0.0769466683268547, + "loss_sod": 0.066665418446064, + "loss_total": 0.38675808906555176, + "step": 242399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.6674302816390991, + "learning_rate": 2.299492735256698e-05, + "loss": 0.4405, + "step": 242400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.394045829772949, + "loss_rtd": 0.2016497105360031, + "loss_sent": 0.01999494433403015, + "loss_sod": 0.1257443130016327, + "loss_total": 0.34738895297050476, + "step": 242499 + }, + { + "epoch": 0.004998, + "loss_gen": 6.0678019523620605, + "loss_rtd": 0.23026862740516663, + "loss_sent": 0.09744556248188019, + "loss_sod": 0.06970666348934174, + "loss_total": 0.39742082357406616, + "step": 242499 + }, + { + "epoch": 0.005, + "grad_norm": 1.0150680541992188, + "learning_rate": 2.2968226084928035e-05, + "loss": 0.456, + "step": 242500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.373544692993164, + "loss_rtd": 0.1858280897140503, + "loss_sent": 0.06773902475833893, + "loss_sod": 0.12823840975761414, + "loss_total": 0.38180553913116455, + "step": 242599 + }, + { + "epoch": 0.005198, + "loss_gen": 6.14440393447876, + "loss_rtd": 0.2186332494020462, + "loss_sent": 0.1912665218114853, + "loss_sod": 0.03565386310219765, + "loss_total": 0.44555366039276123, + "step": 242599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.85086989402771, + "learning_rate": 2.2941535705684385e-05, + "loss": 0.4544, + "step": 242600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.905035972595215, + "loss_rtd": 0.2487083375453949, + "loss_sent": 0.39679527282714844, + "loss_sod": 0.07764381915330887, + "loss_total": 0.7231473922729492, + "step": 242699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.728309154510498, + "loss_rtd": 0.2311766892671585, + "loss_sent": 0.2341116964817047, + "loss_sod": 0.004367607645690441, + "loss_total": 0.46965599060058594, + "step": 242699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.6963841915130615, + "learning_rate": 2.2914856225586933e-05, + "loss": 0.4399, + "step": 242700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.7613043785095215, + "loss_rtd": 0.23951976001262665, + "loss_sent": 0.2711679935455322, + "loss_sod": 0.031781427562236786, + "loss_total": 0.5424691438674927, + "step": 242799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.814478397369385, + "loss_rtd": 0.2483694702386856, + "loss_sent": 0.12304899096488953, + "loss_sod": 0.03288950026035309, + "loss_total": 0.4043079614639282, + "step": 242799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.7700718641281128, + "learning_rate": 2.2888187655382144e-05, + "loss": 0.4451, + "step": 242800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.66154670715332, + "loss_rtd": 0.24562153220176697, + "loss_sent": 0.14087800681591034, + "loss_sod": 0.02246302366256714, + "loss_total": 0.40896254777908325, + "step": 242899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.781575679779053, + "loss_rtd": 0.21482908725738525, + "loss_sent": 0.27028486132621765, + "loss_sod": 0.056306540966033936, + "loss_total": 0.5414204597473145, + "step": 242899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.9976038932800293, + "learning_rate": 2.286153000581212e-05, + "loss": 0.4508, + "step": 242900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.461580276489258, + "loss_rtd": 0.21401362121105194, + "loss_sent": 0.11155346781015396, + "loss_sod": 0.04492160305380821, + "loss_total": 0.3704886734485626, + "step": 242999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.641631126403809, + "loss_rtd": 0.25520673394203186, + "loss_sent": 0.19426937401294708, + "loss_sod": 0.012615029700100422, + "loss_total": 0.4620911478996277, + "step": 242999 + }, + { + "epoch": 0.006, + "grad_norm": 0.9921756982803345, + "learning_rate": 2.283488328761456e-05, + "loss": 0.461, + "step": 243000 + }, + { + "epoch": 0.006, + "eval_loss": 0.42579010128974915, + "eval_runtime": 152.9012, + "eval_samples_per_second": 101.0, + "eval_steps_per_second": 0.791, + "step": 243000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.39358377456665, + "loss_rtd": 0.22952860593795776, + "loss_sent": 0.19048072397708893, + "loss_sod": 0.03400341421365738, + "loss_total": 0.45401275157928467, + "step": 243099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.478324890136719, + "loss_rtd": 0.2199968844652176, + "loss_sent": 0.14730414748191833, + "loss_sod": 0.022844431921839714, + "loss_total": 0.390145480632782, + "step": 243099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.6951247453689575, + "learning_rate": 2.280824751152279e-05, + "loss": 0.4597, + "step": 243100 + }, + { + "epoch": 0.006398, + "loss_gen": 6.15172815322876, + "loss_rtd": 0.2365787923336029, + "loss_sent": 0.192633256316185, + "loss_sod": 0.05162573605775833, + "loss_total": 0.48083776235580444, + "step": 243199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.5926833152771, + "loss_rtd": 0.21989813446998596, + "loss_sent": 0.06581819802522659, + "loss_sod": 0.013996992260217667, + "loss_total": 0.2997133135795593, + "step": 243199 + }, + { + "epoch": 0.0064, + "grad_norm": 2.6375739574432373, + "learning_rate": 2.2781622688265646e-05, + "loss": 0.4613, + "step": 243200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.6530585289001465, + "loss_rtd": 0.25011229515075684, + "loss_sent": 0.14732234179973602, + "loss_sod": 0.00843762420117855, + "loss_total": 0.40587228536605835, + "step": 243299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.328200340270996, + "loss_rtd": 0.24131344258785248, + "loss_sent": 0.07536239176988602, + "loss_sod": 0.04264714941382408, + "loss_total": 0.3593229651451111, + "step": 243299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.6082994341850281, + "learning_rate": 2.2755008828567632e-05, + "loss": 0.4387, + "step": 243300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.879546642303467, + "loss_rtd": 0.23271173238754272, + "loss_sent": 0.07562201470136642, + "loss_sod": 0.06916868686676025, + "loss_total": 0.37750244140625, + "step": 243399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.528488636016846, + "loss_rtd": 0.21066364645957947, + "loss_sent": 6.91495297360234e-05, + "loss_sod": 0.08674241602420807, + "loss_total": 0.2974752187728882, + "step": 243399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.7027512192726135, + "learning_rate": 2.27284059431488e-05, + "loss": 0.4565, + "step": 243400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.234533309936523, + "loss_rtd": 0.1966201215982437, + "loss_sent": 0.08713880181312561, + "loss_sod": 0.2176421582698822, + "loss_total": 0.5014010667800903, + "step": 243499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.716063976287842, + "loss_rtd": 0.25576815009117126, + "loss_sent": 0.28784382343292236, + "loss_sod": 0.04102654382586479, + "loss_total": 0.5846384763717651, + "step": 243499 + }, + { + "epoch": 0.007, + "grad_norm": 1.3425332307815552, + "learning_rate": 2.2701814042724818e-05, + "loss": 0.437, + "step": 243500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.361954689025879, + "loss_rtd": 0.23103067278862, + "loss_sent": 0.23926769196987152, + "loss_sod": 0.020815439522266388, + "loss_total": 0.4911137819290161, + "step": 243599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.695514678955078, + "loss_rtd": 0.23887500166893005, + "loss_sent": 0.21414825320243835, + "loss_sod": 0.06075389310717583, + "loss_total": 0.5137771368026733, + "step": 243599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.6319363117218018, + "learning_rate": 2.267523313800685e-05, + "loss": 0.4598, + "step": 243600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.656366348266602, + "loss_rtd": 0.21935313940048218, + "loss_sent": 0.07514968514442444, + "loss_sod": 0.07651719450950623, + "loss_total": 0.37102001905441284, + "step": 243699 + }, + { + "epoch": 0.007398, + "loss_gen": 6.36208963394165, + "loss_rtd": 0.21408240497112274, + "loss_sent": 0.08127298951148987, + "loss_sod": 0.110919289290905, + "loss_total": 0.406274676322937, + "step": 243699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.9099557399749756, + "learning_rate": 2.2648663239701712e-05, + "loss": 0.4535, + "step": 243700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.040685653686523, + "loss_rtd": 0.18819379806518555, + "loss_sent": 0.013052689842879772, + "loss_sod": 0.08438065648078918, + "loss_total": 0.285627156496048, + "step": 243799 + }, + { + "epoch": 0.007598, + "loss_gen": 6.159510612487793, + "loss_rtd": 0.239523246884346, + "loss_sent": 0.21420960128307343, + "loss_sod": 0.07605443894863129, + "loss_total": 0.5297873020172119, + "step": 243799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.7743996977806091, + "learning_rate": 2.2622104358511742e-05, + "loss": 0.4621, + "step": 243800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.7327985763549805, + "loss_rtd": 0.2255195677280426, + "loss_sent": 0.09750816226005554, + "loss_sod": 0.09191425144672394, + "loss_total": 0.4149419963359833, + "step": 243899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.552008152008057, + "loss_rtd": 0.23785394430160522, + "loss_sent": 0.17532411217689514, + "loss_sod": 0.047099769115448, + "loss_total": 0.46027782559394836, + "step": 243899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.9805784225463867, + "learning_rate": 2.2595556505134885e-05, + "loss": 0.4528, + "step": 243900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.487459659576416, + "loss_rtd": 0.24633139371871948, + "loss_sent": 0.25000327825546265, + "loss_sod": 0.00778064364567399, + "loss_total": 0.5041153430938721, + "step": 243999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.831045150756836, + "loss_rtd": 0.24189917743206024, + "loss_sent": 0.2143426239490509, + "loss_sod": 0.032830022275447845, + "loss_total": 0.4890718162059784, + "step": 243999 + }, + { + "epoch": 0.008, + "grad_norm": 1.0033119916915894, + "learning_rate": 2.2569019690264593e-05, + "loss": 0.455, + "step": 244000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4203929901123047, + "eval_runtime": 151.4455, + "eval_samples_per_second": 101.971, + "eval_steps_per_second": 0.799, + "step": 244000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.603832244873047, + "loss_rtd": 0.23946131765842438, + "loss_sent": 0.12526585161685944, + "loss_sod": 0.01837044395506382, + "loss_total": 0.3830975890159607, + "step": 244099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.641284942626953, + "loss_rtd": 0.22863775491714478, + "loss_sent": 0.18284723162651062, + "loss_sod": 0.01325727254152298, + "loss_total": 0.4247422516345978, + "step": 244099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.6582899689674377, + "learning_rate": 2.254249392458987e-05, + "loss": 0.4511, + "step": 244100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.689499855041504, + "loss_rtd": 0.23861071467399597, + "loss_sent": 0.3746132552623749, + "loss_sod": 0.011193893849849701, + "loss_total": 0.6244179010391235, + "step": 244199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.840762138366699, + "loss_rtd": 0.24167542159557343, + "loss_sent": 0.1296040117740631, + "loss_sod": 0.02931944653391838, + "loss_total": 0.4005988836288452, + "step": 244199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.9416965842247009, + "learning_rate": 2.2515979218795307e-05, + "loss": 0.4384, + "step": 244200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.463130950927734, + "loss_rtd": 0.2265370935201645, + "loss_sent": 0.18656446039676666, + "loss_sod": 0.06559178233146667, + "loss_total": 0.47869330644607544, + "step": 244299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.253119945526123, + "loss_rtd": 0.21707893908023834, + "loss_sent": 0.1347392499446869, + "loss_sod": 0.0169514212757349, + "loss_total": 0.3687696158885956, + "step": 244299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.8346599340438843, + "learning_rate": 2.248947558356105e-05, + "loss": 0.4481, + "step": 244300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.3741230964660645, + "loss_rtd": 0.2366834282875061, + "loss_sent": 0.10139898955821991, + "loss_sod": 0.004884220659732819, + "loss_total": 0.34296661615371704, + "step": 244399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.582619667053223, + "loss_rtd": 0.2204575091600418, + "loss_sent": 0.015588854439556599, + "loss_sod": 0.04638000950217247, + "loss_total": 0.2824263572692871, + "step": 244399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.822998046875, + "learning_rate": 2.246298302956272e-05, + "loss": 0.4303, + "step": 244400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.60807991027832, + "loss_rtd": 0.2295462042093277, + "loss_sent": 0.09245659410953522, + "loss_sod": 0.019276469945907593, + "loss_total": 0.3412792682647705, + "step": 244499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.777159214019775, + "loss_rtd": 0.22936400771141052, + "loss_sent": 0.2728959918022156, + "loss_sod": 0.08682326227426529, + "loss_total": 0.5890832543373108, + "step": 244499 + }, + { + "epoch": 0.009, + "grad_norm": 0.8801591992378235, + "learning_rate": 2.243650156747153e-05, + "loss": 0.4516, + "step": 244500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.761781215667725, + "loss_rtd": 0.24474243819713593, + "loss_sent": 0.2567749321460724, + "loss_sod": 0.10763518512248993, + "loss_total": 0.6091525554656982, + "step": 244599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.992979526519775, + "loss_rtd": 0.22405067086219788, + "loss_sent": 0.09581495076417923, + "loss_sod": 0.01569833606481552, + "loss_total": 0.3355639576911926, + "step": 244599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.0448980331420898, + "learning_rate": 2.2410031207954214e-05, + "loss": 0.4446, + "step": 244600 + }, + { + "epoch": 0.009398, + "loss_gen": 6.007643222808838, + "loss_rtd": 0.2292904257774353, + "loss_sent": 0.11359409242868423, + "loss_sod": 0.04000445455312729, + "loss_total": 0.3828889727592468, + "step": 244699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.532634258270264, + "loss_rtd": 0.21435268223285675, + "loss_sent": 0.0600326769053936, + "loss_sod": 0.010471741668879986, + "loss_total": 0.2848570942878723, + "step": 244699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.7944142818450928, + "learning_rate": 2.2383571961673044e-05, + "loss": 0.4619, + "step": 244700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.7671685218811035, + "loss_rtd": 0.2227422147989273, + "loss_sent": 0.06705641746520996, + "loss_sod": 0.14935719966888428, + "loss_total": 0.43915581703186035, + "step": 244799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.227385520935059, + "loss_rtd": 0.2073536068201065, + "loss_sent": 7.633544009877369e-05, + "loss_sod": 0.07124359160661697, + "loss_total": 0.27867352962493896, + "step": 244799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.815395712852478, + "learning_rate": 2.235712383928577e-05, + "loss": 0.4528, + "step": 244800 + }, + { + "epoch": 0.009798, + "loss_gen": 6.026076316833496, + "loss_rtd": 0.25128811597824097, + "loss_sent": 0.25853249430656433, + "loss_sod": 0.08229227364063263, + "loss_total": 0.5921128988265991, + "step": 244899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.640193939208984, + "loss_rtd": 0.2260315716266632, + "loss_sent": 0.2339726835489273, + "loss_sod": 0.11494414508342743, + "loss_total": 0.5749483704566956, + "step": 244899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.3601337671279907, + "learning_rate": 2.2330686851445702e-05, + "loss": 0.4435, + "step": 244900 + }, + { + "epoch": 0.009998, + "loss_gen": 6.0089850425720215, + "loss_rtd": 0.2364109456539154, + "loss_sent": 0.7876885533332825, + "loss_sod": 0.03947332873940468, + "loss_total": 1.063572883605957, + "step": 244999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.786645412445068, + "loss_rtd": 0.2300453782081604, + "loss_sent": 0.10783194750547409, + "loss_sod": 0.018082313239574432, + "loss_total": 0.3559596538543701, + "step": 244999 + }, + { + "epoch": 0.01, + "grad_norm": 2.2042479515075684, + "learning_rate": 2.2304261008801668e-05, + "loss": 0.4485, + "step": 245000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4339143633842468, + "eval_runtime": 151.4764, + "eval_samples_per_second": 101.95, + "eval_steps_per_second": 0.799, + "step": 245000 + }, + { + "epoch": 0.010198, + "loss_gen": 4.967748641967773, + "loss_rtd": 0.20206503570079803, + "loss_sent": 0.0022498390171676874, + "loss_sod": 0.08894479274749756, + "loss_total": 0.2932596802711487, + "step": 245099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.727007865905762, + "loss_rtd": 0.23747678101062775, + "loss_sent": 0.0795101746916771, + "loss_sod": 0.09804389625787735, + "loss_total": 0.4150308668613434, + "step": 245099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8656148314476013, + "learning_rate": 2.2277846321998004e-05, + "loss": 0.4565, + "step": 245100 + }, + { + "epoch": 0.010398, + "loss_gen": 6.024504661560059, + "loss_rtd": 0.24439726769924164, + "loss_sent": 0.22530032694339752, + "loss_sod": 0.0551542304456234, + "loss_total": 0.5248517990112305, + "step": 245199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.88653564453125, + "loss_rtd": 0.23230203986167908, + "loss_sent": 0.08770927786827087, + "loss_sod": 0.021674348041415215, + "loss_total": 0.3416856527328491, + "step": 245199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.6006246209144592, + "learning_rate": 2.225144280167451e-05, + "loss": 0.4623, + "step": 245200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.808145523071289, + "loss_rtd": 0.23848189413547516, + "loss_sent": 0.2995993494987488, + "loss_sod": 0.056915100663900375, + "loss_total": 0.5949963331222534, + "step": 245299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.709053993225098, + "loss_rtd": 0.2434210479259491, + "loss_sent": 0.22623975574970245, + "loss_sod": 0.0472450815141201, + "loss_total": 0.5169059038162231, + "step": 245299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.4242582321166992, + "learning_rate": 2.2225050458466535e-05, + "loss": 0.448, + "step": 245300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.238818645477295, + "loss_rtd": 0.20706391334533691, + "loss_sent": 0.058697737753391266, + "loss_sod": 0.040166862308979034, + "loss_total": 0.305928498506546, + "step": 245399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.6264166831970215, + "loss_rtd": 0.22170746326446533, + "loss_sent": 0.3966951072216034, + "loss_sod": 0.049822889268398285, + "loss_total": 0.6682254672050476, + "step": 245399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.1491999626159668, + "learning_rate": 2.2198669303004932e-05, + "loss": 0.452, + "step": 245400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.834951877593994, + "loss_rtd": 0.23625729978084564, + "loss_sent": 0.07587853074073792, + "loss_sod": 0.10082794725894928, + "loss_total": 0.41296377778053284, + "step": 245499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.6272478103637695, + "loss_rtd": 0.20926325023174286, + "loss_sent": 0.00026445966796018183, + "loss_sod": 0.1732027232646942, + "loss_total": 0.3827304244041443, + "step": 245499 + }, + { + "epoch": 0.011, + "grad_norm": 1.1503254175186157, + "learning_rate": 2.2172299345915986e-05, + "loss": 0.4421, + "step": 245500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.753690719604492, + "loss_rtd": 0.25438231229782104, + "loss_sent": 0.07825588434934616, + "loss_sod": 0.007483081892132759, + "loss_total": 0.3401212990283966, + "step": 245599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.800004005432129, + "loss_rtd": 0.2196556031703949, + "loss_sent": 0.15227054059505463, + "loss_sod": 0.10250011086463928, + "loss_total": 0.47442626953125, + "step": 245599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.2017216682434082, + "learning_rate": 2.214594059782154e-05, + "loss": 0.4585, + "step": 245600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.600679874420166, + "loss_rtd": 0.20575957000255585, + "loss_sent": 0.0019685945007950068, + "loss_sod": 0.10293687880039215, + "loss_total": 0.3106650412082672, + "step": 245699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.707396507263184, + "loss_rtd": 0.23821589350700378, + "loss_sent": 0.08778704702854156, + "loss_sod": 0.02279324270784855, + "loss_total": 0.34879618883132935, + "step": 245699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.8220459818840027, + "learning_rate": 2.2119593069338895e-05, + "loss": 0.4446, + "step": 245700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.885300636291504, + "loss_rtd": 0.2289028763771057, + "loss_sent": 0.16664643585681915, + "loss_sod": 0.020120171830058098, + "loss_total": 0.4156695008277893, + "step": 245799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.484748363494873, + "loss_rtd": 0.21507808566093445, + "loss_sent": 0.08143315464258194, + "loss_sod": 0.06266427040100098, + "loss_total": 0.35917550325393677, + "step": 245799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.9669427871704102, + "learning_rate": 2.2093256771080838e-05, + "loss": 0.4268, + "step": 245800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.960178375244141, + "loss_rtd": 0.24078325927257538, + "loss_sent": 0.24635076522827148, + "loss_sod": 0.046413302421569824, + "loss_total": 0.5335473418235779, + "step": 245899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.852598667144775, + "loss_rtd": 0.22588245570659637, + "loss_sent": 0.11066222935914993, + "loss_sod": 0.0523991584777832, + "loss_total": 0.3889438509941101, + "step": 245899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.851073145866394, + "learning_rate": 2.2066931713655604e-05, + "loss": 0.4327, + "step": 245900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.353276252746582, + "loss_rtd": 0.19924487173557281, + "loss_sent": 0.16762703657150269, + "loss_sod": 0.13295862078666687, + "loss_total": 0.49983054399490356, + "step": 245999 + }, + { + "epoch": 0.011998, + "loss_gen": 6.044212341308594, + "loss_rtd": 0.22048138082027435, + "loss_sent": 0.32683029770851135, + "loss_sod": 0.10784997045993805, + "loss_total": 0.6551616191864014, + "step": 245999 + }, + { + "epoch": 0.012, + "grad_norm": 1.317622423171997, + "learning_rate": 2.2040617907666934e-05, + "loss": 0.4653, + "step": 246000 + }, + { + "epoch": 0.012, + "eval_loss": 0.42428115010261536, + "eval_runtime": 151.8415, + "eval_samples_per_second": 101.705, + "eval_steps_per_second": 0.797, + "step": 246000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.779860973358154, + "loss_rtd": 0.222878098487854, + "loss_sent": 0.25464460253715515, + "loss_sod": 0.11167255789041519, + "loss_total": 0.5891952514648438, + "step": 246099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.683056354522705, + "loss_rtd": 0.23052029311656952, + "loss_sent": 0.11374194920063019, + "loss_sod": 0.017238955944776535, + "loss_total": 0.36150121688842773, + "step": 246099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.1351783275604248, + "learning_rate": 2.201431536371402e-05, + "loss": 0.4619, + "step": 246100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.850572109222412, + "loss_rtd": 0.2256869077682495, + "loss_sent": 0.3070152699947357, + "loss_sod": 0.01178007572889328, + "loss_total": 0.5444822311401367, + "step": 246199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.7851386070251465, + "loss_rtd": 0.21720394492149353, + "loss_sent": 0.2863521873950958, + "loss_sod": 0.024845104664564133, + "loss_total": 0.528401255607605, + "step": 246199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.4040193557739258, + "learning_rate": 2.1988024092391558e-05, + "loss": 0.4495, + "step": 246200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.6477861404418945, + "loss_rtd": 0.24160976707935333, + "loss_sent": 0.33366549015045166, + "loss_sod": 0.0467105396091938, + "loss_total": 0.6219857931137085, + "step": 246299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.879884243011475, + "loss_rtd": 0.23690751194953918, + "loss_sent": 0.35896140336990356, + "loss_sod": 0.08702407032251358, + "loss_total": 0.6828929781913757, + "step": 246299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.3787297010421753, + "learning_rate": 2.1961744104289618e-05, + "loss": 0.4664, + "step": 246300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.86262321472168, + "loss_rtd": 0.23806585371494293, + "loss_sent": 0.2090684324502945, + "loss_sod": 0.019638799130916595, + "loss_total": 0.4667730927467346, + "step": 246399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.701531887054443, + "loss_rtd": 0.2358715981245041, + "loss_sent": 0.2644566297531128, + "loss_sod": 0.013847066089510918, + "loss_total": 0.514175295829773, + "step": 246399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.3796790838241577, + "learning_rate": 2.19354754099938e-05, + "loss": 0.4634, + "step": 246400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.696658134460449, + "loss_rtd": 0.2243136763572693, + "loss_sent": 0.35249295830726624, + "loss_sod": 0.03702107071876526, + "loss_total": 0.6138277053833008, + "step": 246499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.864236354827881, + "loss_rtd": 0.21939466893672943, + "loss_sent": 0.2594083845615387, + "loss_sod": 0.024586467072367668, + "loss_total": 0.5033895373344421, + "step": 246499 + }, + { + "epoch": 0.013, + "grad_norm": 1.9242634773254395, + "learning_rate": 2.1909218020085136e-05, + "loss": 0.4575, + "step": 246500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.756565570831299, + "loss_rtd": 0.2348550707101822, + "loss_sent": 0.2771225571632385, + "loss_sod": 0.06357685476541519, + "loss_total": 0.5755544900894165, + "step": 246599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.748210906982422, + "loss_rtd": 0.23125533759593964, + "loss_sent": 0.06457473337650299, + "loss_sod": 0.004369403701275587, + "loss_total": 0.3001994788646698, + "step": 246599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0573989152908325, + "learning_rate": 2.1882971945140106e-05, + "loss": 0.4386, + "step": 246600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.897297382354736, + "loss_rtd": 0.22446118295192719, + "loss_sent": 0.5125123858451843, + "loss_sod": 0.07384099066257477, + "loss_total": 0.8108145594596863, + "step": 246699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.738885402679443, + "loss_rtd": 0.20799201726913452, + "loss_sent": 0.140020489692688, + "loss_sod": 0.16425809264183044, + "loss_total": 0.5122705698013306, + "step": 246699 + }, + { + "epoch": 0.0134, + "grad_norm": 2.0589182376861572, + "learning_rate": 2.1856737195730596e-05, + "loss": 0.4539, + "step": 246700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.783360481262207, + "loss_rtd": 0.2284352034330368, + "loss_sent": 0.2370624542236328, + "loss_sod": 0.07437717914581299, + "loss_total": 0.5398748517036438, + "step": 246799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.760488986968994, + "loss_rtd": 0.2174978256225586, + "loss_sent": 0.36561107635498047, + "loss_sod": 0.08976496011018753, + "loss_total": 0.672873854637146, + "step": 246799 + }, + { + "epoch": 0.0136, + "grad_norm": 2.8151371479034424, + "learning_rate": 2.1830513782423984e-05, + "loss": 0.4682, + "step": 246800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.592465877532959, + "loss_rtd": 0.23686639964580536, + "loss_sent": 0.08548545837402344, + "loss_sod": 0.0479353666305542, + "loss_total": 0.3702872395515442, + "step": 246899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.187298774719238, + "loss_rtd": 0.23485742509365082, + "loss_sent": 0.00015608601097483188, + "loss_sod": 0.1666051298379898, + "loss_total": 0.401618629693985, + "step": 246899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.1387323141098022, + "learning_rate": 2.1804301715783064e-05, + "loss": 0.4444, + "step": 246900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.925267696380615, + "loss_rtd": 0.23347876965999603, + "loss_sent": 0.15267211198806763, + "loss_sod": 0.049669016152620316, + "loss_total": 0.4358198940753937, + "step": 246999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.698176383972168, + "loss_rtd": 0.2147320955991745, + "loss_sent": 0.04833608865737915, + "loss_sod": 0.012624384835362434, + "loss_total": 0.27569258213043213, + "step": 246999 + }, + { + "epoch": 0.014, + "grad_norm": 0.7666748762130737, + "learning_rate": 2.1778101006366032e-05, + "loss": 0.4434, + "step": 247000 + }, + { + "epoch": 0.014, + "eval_loss": 0.42115986347198486, + "eval_runtime": 151.5978, + "eval_samples_per_second": 101.868, + "eval_steps_per_second": 0.798, + "step": 247000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.583296775817871, + "loss_rtd": 0.21369054913520813, + "loss_sent": 0.4174363911151886, + "loss_sod": 0.04971133545041084, + "loss_total": 0.6808382868766785, + "step": 247099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.939522743225098, + "loss_rtd": 0.23565998673439026, + "loss_sent": 0.3900221586227417, + "loss_sod": 0.04761674255132675, + "loss_total": 0.6732988953590393, + "step": 247099 + }, + { + "epoch": 0.0142, + "grad_norm": 3.3717472553253174, + "learning_rate": 2.175191166472653e-05, + "loss": 0.446, + "step": 247100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.71102237701416, + "loss_rtd": 0.2300696223974228, + "loss_sent": 0.18655012547969818, + "loss_sod": 0.04130156710743904, + "loss_total": 0.4579213261604309, + "step": 247199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.667944431304932, + "loss_rtd": 0.22791703045368195, + "loss_sent": 0.09452652931213379, + "loss_sod": 0.006408519111573696, + "loss_total": 0.3288520574569702, + "step": 247199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.7964329123497009, + "learning_rate": 2.1725733701413636e-05, + "loss": 0.445, + "step": 247200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.576109409332275, + "loss_rtd": 0.2410363107919693, + "loss_sent": 0.34477588534355164, + "loss_sod": 0.16448423266410828, + "loss_total": 0.7502964735031128, + "step": 247299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.662108421325684, + "loss_rtd": 0.2261047661304474, + "loss_sent": 0.1713200956583023, + "loss_sod": 0.005798437632620335, + "loss_total": 0.40322330594062805, + "step": 247299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.8646548986434937, + "learning_rate": 2.1699567126971843e-05, + "loss": 0.4599, + "step": 247300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.640431880950928, + "loss_rtd": 0.23056964576244354, + "loss_sent": 0.1417578011751175, + "loss_sod": 0.02476336620748043, + "loss_total": 0.3970907926559448, + "step": 247399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.29046106338501, + "loss_rtd": 0.19295956194400787, + "loss_sent": 3.200511127943173e-05, + "loss_sod": 0.0558713860809803, + "loss_total": 0.24886295199394226, + "step": 247399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.0499322414398193, + "learning_rate": 2.167341195194101e-05, + "loss": 0.4416, + "step": 247400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.772815704345703, + "loss_rtd": 0.20959270000457764, + "loss_sent": 0.00048504251753911376, + "loss_sod": 0.1760254204273224, + "loss_total": 0.38610315322875977, + "step": 247499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.480262756347656, + "loss_rtd": 0.19009432196617126, + "loss_sent": 7.621695112902671e-05, + "loss_sod": 0.14515167474746704, + "loss_total": 0.33532220125198364, + "step": 247499 + }, + { + "epoch": 0.015, + "grad_norm": 1.1378663778305054, + "learning_rate": 2.1647268186856452e-05, + "loss": 0.4597, + "step": 247500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.816526889801025, + "loss_rtd": 0.23251159489154816, + "loss_sent": 0.30066436529159546, + "loss_sod": 0.072323277592659, + "loss_total": 0.605499267578125, + "step": 247599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.729894161224365, + "loss_rtd": 0.2259802520275116, + "loss_sent": 0.14471116662025452, + "loss_sod": 0.056692980229854584, + "loss_total": 0.4273844063282013, + "step": 247599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.9972118139266968, + "learning_rate": 2.1621135842248872e-05, + "loss": 0.4519, + "step": 247600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.717024326324463, + "loss_rtd": 0.22244328260421753, + "loss_sent": 0.37024953961372375, + "loss_sod": 0.17716263234615326, + "loss_total": 0.7698554992675781, + "step": 247699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.46368408203125, + "loss_rtd": 0.22375303506851196, + "loss_sent": 0.07559648901224136, + "loss_sod": 0.004048466682434082, + "loss_total": 0.3033979833126068, + "step": 247699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.8086750507354736, + "learning_rate": 2.1595014928644406e-05, + "loss": 0.4459, + "step": 247700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.685397624969482, + "loss_rtd": 0.2309625893831253, + "loss_sent": 0.30750802159309387, + "loss_sod": 0.08355244249105453, + "loss_total": 0.6220230460166931, + "step": 247799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.512424468994141, + "loss_rtd": 0.2207656353712082, + "loss_sent": 0.20813463628292084, + "loss_sod": 0.04484511539340019, + "loss_total": 0.4737454056739807, + "step": 247799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.8984826803207397, + "learning_rate": 2.1568905456564514e-05, + "loss": 0.4439, + "step": 247800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.573122024536133, + "loss_rtd": 0.2387402057647705, + "loss_sent": 0.21148760616779327, + "loss_sod": 0.028351351618766785, + "loss_total": 0.47857916355133057, + "step": 247899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.622769832611084, + "loss_rtd": 0.20884425938129425, + "loss_sent": 0.3075295090675354, + "loss_sod": 0.03448361158370972, + "loss_total": 0.5508573651313782, + "step": 247899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.7045705914497375, + "learning_rate": 2.1542807436526102e-05, + "loss": 0.45, + "step": 247900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.8512678146362305, + "loss_rtd": 0.22766973078250885, + "loss_sent": 0.2784850597381592, + "loss_sod": 0.05045732855796814, + "loss_total": 0.5566121339797974, + "step": 247999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.559403896331787, + "loss_rtd": 0.2467317134141922, + "loss_sent": 0.23199117183685303, + "loss_sod": 0.012821109034121037, + "loss_total": 0.4915440082550049, + "step": 247999 + }, + { + "epoch": 0.016, + "grad_norm": 0.8777672052383423, + "learning_rate": 2.1516720879041453e-05, + "loss": 0.4508, + "step": 248000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4283154308795929, + "eval_runtime": 151.5003, + "eval_samples_per_second": 101.934, + "eval_steps_per_second": 0.799, + "step": 248000 + }, + { + "epoch": 0.016198, + "loss_gen": 6.132227897644043, + "loss_rtd": 0.2485898733139038, + "loss_sent": 0.11270882934331894, + "loss_sod": 0.11559832096099854, + "loss_total": 0.4768970310688019, + "step": 248099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.833548069000244, + "loss_rtd": 0.20343995094299316, + "loss_sent": 0.22904229164123535, + "loss_sod": 0.03247417137026787, + "loss_total": 0.4649564027786255, + "step": 248099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.696287989616394, + "learning_rate": 2.1490645794618247e-05, + "loss": 0.4396, + "step": 248100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.809331893920898, + "loss_rtd": 0.21478372812271118, + "loss_sent": 0.10742197930812836, + "loss_sod": 0.054235316812992096, + "loss_total": 0.37644100189208984, + "step": 248199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.897103786468506, + "loss_rtd": 0.2588536739349365, + "loss_sent": 0.2113327831029892, + "loss_sod": 0.009779886342585087, + "loss_total": 0.47996634244918823, + "step": 248199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.5072851181030273, + "learning_rate": 2.146458219375948e-05, + "loss": 0.4351, + "step": 248200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.730291366577148, + "loss_rtd": 0.2134108990430832, + "loss_sent": 0.11594606935977936, + "loss_sod": 0.023139316588640213, + "loss_total": 0.35249626636505127, + "step": 248299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.768892765045166, + "loss_rtd": 0.22115463018417358, + "loss_sent": 0.25451236963272095, + "loss_sod": 0.08053763210773468, + "loss_total": 0.556204617023468, + "step": 248299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.1511993408203125, + "learning_rate": 2.14385300869636e-05, + "loss": 0.4436, + "step": 248300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.109781742095947, + "loss_rtd": 0.19637225568294525, + "loss_sent": 0.06141408905386925, + "loss_sod": 0.10375300794839859, + "loss_total": 0.361539363861084, + "step": 248399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.668107509613037, + "loss_rtd": 0.21653003990650177, + "loss_sent": 0.2802726924419403, + "loss_sod": 0.1111130565404892, + "loss_total": 0.6079157590866089, + "step": 248399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.1210267543792725, + "learning_rate": 2.1412489484724384e-05, + "loss": 0.448, + "step": 248400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.797348976135254, + "loss_rtd": 0.2264557033777237, + "loss_sent": 0.23551037907600403, + "loss_sod": 0.005144703201949596, + "loss_total": 0.4671107828617096, + "step": 248499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.7537665367126465, + "loss_rtd": 0.21819786727428436, + "loss_sent": 0.3020470142364502, + "loss_sod": 0.0521635077893734, + "loss_total": 0.5724083781242371, + "step": 248499 + }, + { + "epoch": 0.017, + "grad_norm": 2.03287410736084, + "learning_rate": 2.1386460397531e-05, + "loss": 0.4355, + "step": 248500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.910955429077148, + "loss_rtd": 0.23703964054584503, + "loss_sent": 0.055078279227018356, + "loss_sod": 0.07126298546791077, + "loss_total": 0.36338090896606445, + "step": 248599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.778095245361328, + "loss_rtd": 0.2506910562515259, + "loss_sent": 0.18475283682346344, + "loss_sod": 0.09531427919864655, + "loss_total": 0.5307581424713135, + "step": 248599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.9766395688056946, + "learning_rate": 2.1360442835867926e-05, + "loss": 0.4514, + "step": 248600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.89865255355835, + "loss_rtd": 0.22510012984275818, + "loss_sent": 0.13692350685596466, + "loss_sod": 0.041749171912670135, + "loss_total": 0.40377283096313477, + "step": 248699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.703371047973633, + "loss_rtd": 0.21377354860305786, + "loss_sent": 0.09150873869657516, + "loss_sod": 0.08654826879501343, + "loss_total": 0.39183056354522705, + "step": 248699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.8470425605773926, + "learning_rate": 2.133443681021506e-05, + "loss": 0.4387, + "step": 248700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.854787349700928, + "loss_rtd": 0.22065089643001556, + "loss_sent": 0.14864091575145721, + "loss_sod": 0.011109406128525734, + "loss_total": 0.38040122389793396, + "step": 248799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.786114692687988, + "loss_rtd": 0.2307480275630951, + "loss_sent": 0.050676070153713226, + "loss_sod": 0.027572056278586388, + "loss_total": 0.30899617075920105, + "step": 248799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.424483060836792, + "learning_rate": 2.1308442331047634e-05, + "loss": 0.439, + "step": 248800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.779053688049316, + "loss_rtd": 0.2416631579399109, + "loss_sent": 0.14630766212940216, + "loss_sod": 0.06027863174676895, + "loss_total": 0.4482494592666626, + "step": 248899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.042469024658203, + "loss_rtd": 0.19791507720947266, + "loss_sent": 0.006845710799098015, + "loss_sod": 0.05875186622142792, + "loss_total": 0.26351267099380493, + "step": 248899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.9350420236587524, + "learning_rate": 2.1282459408836186e-05, + "loss": 0.4493, + "step": 248900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.95554256439209, + "loss_rtd": 0.2389046549797058, + "loss_sent": 0.6801117062568665, + "loss_sod": 0.030125457793474197, + "loss_total": 0.9491418600082397, + "step": 248999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.599197864532471, + "loss_rtd": 0.225342258810997, + "loss_sent": 0.22508233785629272, + "loss_sod": 0.06279405206441879, + "loss_total": 0.5132186412811279, + "step": 248999 + }, + { + "epoch": 0.018, + "grad_norm": 2.1206676959991455, + "learning_rate": 2.1256488054046658e-05, + "loss": 0.4438, + "step": 249000 + }, + { + "epoch": 0.018, + "eval_loss": 0.42814549803733826, + "eval_runtime": 153.1422, + "eval_samples_per_second": 100.841, + "eval_steps_per_second": 0.79, + "step": 249000 + }, + { + "epoch": 0.018198, + "loss_gen": 6.016199588775635, + "loss_rtd": 0.22620250284671783, + "loss_sent": 0.17632102966308594, + "loss_sod": 0.04826517403125763, + "loss_total": 0.4507887065410614, + "step": 249099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.909693241119385, + "loss_rtd": 0.2267966866493225, + "loss_sent": 0.2964528501033783, + "loss_sod": 0.11600951105356216, + "loss_total": 0.6392590403556824, + "step": 249099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.9242974519729614, + "learning_rate": 2.12305282771403e-05, + "loss": 0.4525, + "step": 249100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.695147514343262, + "loss_rtd": 0.2238437980413437, + "loss_sent": 0.24166767299175262, + "loss_sod": 0.010288280434906483, + "loss_total": 0.4757997393608093, + "step": 249199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.540353775024414, + "loss_rtd": 0.22267025709152222, + "loss_sent": 0.07612654566764832, + "loss_sod": 0.020782165229320526, + "loss_total": 0.31957897543907166, + "step": 249199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.4858189821243286, + "learning_rate": 2.1204580088573733e-05, + "loss": 0.4557, + "step": 249200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.780377388000488, + "loss_rtd": 0.24228976666927338, + "loss_sent": 0.0438273586332798, + "loss_sod": 0.09033120423555374, + "loss_total": 0.3764483332633972, + "step": 249299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.685077667236328, + "loss_rtd": 0.24057133495807648, + "loss_sent": 0.23263029754161835, + "loss_sod": 0.09057365357875824, + "loss_total": 0.5637753009796143, + "step": 249299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.1972358226776123, + "learning_rate": 2.117864349879884e-05, + "loss": 0.4493, + "step": 249300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.7665815353393555, + "loss_rtd": 0.2430795282125473, + "loss_sent": 0.07919485867023468, + "loss_sod": 0.05886111408472061, + "loss_total": 0.3811355233192444, + "step": 249399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.909588813781738, + "loss_rtd": 0.2451922595500946, + "loss_sent": 0.29324105381965637, + "loss_sod": 0.1357181817293167, + "loss_total": 0.6741515398025513, + "step": 249399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.2323169708251953, + "learning_rate": 2.1152718518262903e-05, + "loss": 0.4347, + "step": 249400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.551820755004883, + "loss_rtd": 0.24110427498817444, + "loss_sent": 0.1510792225599289, + "loss_sod": 0.01303178258240223, + "loss_total": 0.4052152633666992, + "step": 249499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.698292255401611, + "loss_rtd": 0.2367483377456665, + "loss_sent": 0.37549740076065063, + "loss_sod": 0.06762387603521347, + "loss_total": 0.6798696517944336, + "step": 249499 + }, + { + "epoch": 0.019, + "grad_norm": 1.4118274450302124, + "learning_rate": 2.1126805157408496e-05, + "loss": 0.4365, + "step": 249500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.511271953582764, + "loss_rtd": 0.20768186450004578, + "loss_sent": 0.14879639446735382, + "loss_sod": 0.07677138596773148, + "loss_total": 0.43324965238571167, + "step": 249599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.441946983337402, + "loss_rtd": 0.24837626516819, + "loss_sent": 0.08175481110811234, + "loss_sod": 0.005567528773099184, + "loss_total": 0.33569860458374023, + "step": 249599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.5600844025611877, + "learning_rate": 2.1100903426673536e-05, + "loss": 0.444, + "step": 249600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.156698703765869, + "loss_rtd": 0.2006874829530716, + "loss_sent": 2.6125118893105537e-05, + "loss_sod": 0.14785784482955933, + "loss_total": 0.34857144951820374, + "step": 249699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.049666404724121, + "loss_rtd": 0.18938666582107544, + "loss_sent": 2.5378220016136765e-05, + "loss_sod": 0.12293696403503418, + "loss_total": 0.31234902143478394, + "step": 249699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.7393021583557129, + "learning_rate": 2.10750133364912e-05, + "loss": 0.4475, + "step": 249700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.47639274597168, + "loss_rtd": 0.21608366072177887, + "loss_sent": 0.03439813107252121, + "loss_sod": 0.12057603895664215, + "loss_total": 0.3710578382015228, + "step": 249799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.983134746551514, + "loss_rtd": 0.1847182959318161, + "loss_sent": 0.03477868810296059, + "loss_sod": 0.050732582807540894, + "loss_total": 0.2702295780181885, + "step": 249799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.823193371295929, + "learning_rate": 2.1049134897290036e-05, + "loss": 0.4292, + "step": 249800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.758688926696777, + "loss_rtd": 0.2276776134967804, + "loss_sent": 0.181236132979393, + "loss_sod": 0.041420139372348785, + "loss_total": 0.4503338932991028, + "step": 249899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.971752166748047, + "loss_rtd": 0.2346062958240509, + "loss_sent": 0.17096562683582306, + "loss_sod": 0.06036635488271713, + "loss_total": 0.4659382700920105, + "step": 249899 + }, + { + "epoch": 0.0198, + "grad_norm": 2.3099324703216553, + "learning_rate": 2.102326811949387e-05, + "loss": 0.4802, + "step": 249900 + }, + { + "epoch": 0.019998, + "loss_gen": 6.1688151359558105, + "loss_rtd": 0.22333301603794098, + "loss_sent": 0.18693652749061584, + "loss_sod": 0.18587328493595123, + "loss_total": 0.5961428284645081, + "step": 249999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.6526384353637695, + "loss_rtd": 0.20717786252498627, + "loss_sent": 0.05291612073779106, + "loss_sod": 0.14070719480514526, + "loss_total": 0.4008011817932129, + "step": 249999 + }, + { + "epoch": 0.02, + "grad_norm": 1.6782922744750977, + "learning_rate": 2.0997413013521867e-05, + "loss": 0.4637, + "step": 250000 + }, + { + "epoch": 0.02, + "eval_loss": 0.4257298409938812, + "eval_runtime": 151.7832, + "eval_samples_per_second": 101.744, + "eval_steps_per_second": 0.797, + "step": 250000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.887287139892578, + "loss_rtd": 0.22037333250045776, + "loss_sent": 0.0781002938747406, + "loss_sod": 0.02560676634311676, + "loss_total": 0.3240804076194763, + "step": 250099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.702293395996094, + "loss_rtd": 0.2283933460712433, + "loss_sent": 0.3841685354709625, + "loss_sod": 0.044053301215171814, + "loss_total": 0.656615138053894, + "step": 250099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.5517466068267822, + "learning_rate": 2.097156958978841e-05, + "loss": 0.455, + "step": 250100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.590933799743652, + "loss_rtd": 0.23131448030471802, + "loss_sent": 0.10019551217556, + "loss_sod": 0.0016662365524098277, + "loss_total": 0.33317622542381287, + "step": 250199 + }, + { + "epoch": 0.020398, + "loss_gen": 6.149798393249512, + "loss_rtd": 0.23659397661685944, + "loss_sent": 0.09647634625434875, + "loss_sod": 0.10330408811569214, + "loss_total": 0.43637439608573914, + "step": 250199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.1645917892456055, + "learning_rate": 2.0945737858703245e-05, + "loss": 0.4274, + "step": 250200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.983415603637695, + "loss_rtd": 0.22915597259998322, + "loss_sent": 0.20124390721321106, + "loss_sod": 0.11508305370807648, + "loss_total": 0.5454829335212708, + "step": 250299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.548913955688477, + "loss_rtd": 0.23363099992275238, + "loss_sent": 0.17175786197185516, + "loss_sod": 0.018295908346772194, + "loss_total": 0.4236847758293152, + "step": 250299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.2545076608657837, + "learning_rate": 2.091991783067142e-05, + "loss": 0.4347, + "step": 250300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.855342864990234, + "loss_rtd": 0.22722971439361572, + "loss_sent": 0.14149326086044312, + "loss_sod": 0.0870337188243866, + "loss_total": 0.45575669407844543, + "step": 250399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.924689292907715, + "loss_rtd": 0.224134162068367, + "loss_sent": 0.02492489479482174, + "loss_sod": 0.03921440616250038, + "loss_total": 0.28827348351478577, + "step": 250399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8212274312973022, + "learning_rate": 2.0894109516093195e-05, + "loss": 0.4465, + "step": 250400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.6750359535217285, + "loss_rtd": 0.2115013748407364, + "loss_sent": 0.0660208910703659, + "loss_sod": 0.07971183955669403, + "loss_total": 0.3572341203689575, + "step": 250499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.898503303527832, + "loss_rtd": 0.22151196002960205, + "loss_sent": 0.5142325758934021, + "loss_sod": 0.05417371541261673, + "loss_total": 0.7899182438850403, + "step": 250499 + }, + { + "epoch": 0.021, + "grad_norm": 2.3125884532928467, + "learning_rate": 2.086831292536418e-05, + "loss": 0.4684, + "step": 250500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.5948405265808105, + "loss_rtd": 0.21999555826187134, + "loss_sent": 0.06595755368471146, + "loss_sod": 0.006651579402387142, + "loss_total": 0.2926046848297119, + "step": 250599 + }, + { + "epoch": 0.021198, + "loss_gen": 6.026972770690918, + "loss_rtd": 0.23303301632404327, + "loss_sent": 0.19365358352661133, + "loss_sod": 0.09179709106683731, + "loss_total": 0.5184836983680725, + "step": 250599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.8277121186256409, + "learning_rate": 2.0842528068875233e-05, + "loss": 0.4451, + "step": 250600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.363009452819824, + "loss_rtd": 0.20621538162231445, + "loss_sent": 0.0007631806074641645, + "loss_sod": 0.21446320414543152, + "loss_total": 0.4214417636394501, + "step": 250699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.159182071685791, + "loss_rtd": 0.1883380264043808, + "loss_sent": 0.00517320167273283, + "loss_sod": 0.0313698910176754, + "loss_total": 0.2248811274766922, + "step": 250699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.0292924642562866, + "learning_rate": 2.0816754957012506e-05, + "loss": 0.4328, + "step": 250700 + }, + { + "epoch": 0.021598, + "loss_gen": 6.009695529937744, + "loss_rtd": 0.21848805248737335, + "loss_sent": 0.14705587923526764, + "loss_sod": 0.1392502337694168, + "loss_total": 0.504794180393219, + "step": 250799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.875698089599609, + "loss_rtd": 0.24496762454509735, + "loss_sent": 0.055516812950372696, + "loss_sod": 0.05094952508807182, + "loss_total": 0.35143396258354187, + "step": 250799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.8969025015830994, + "learning_rate": 2.0790993600157384e-05, + "loss": 0.4484, + "step": 250800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.118767738342285, + "loss_rtd": 0.18534214794635773, + "loss_sent": 0.008448443375527859, + "loss_sod": 0.19007673859596252, + "loss_total": 0.3838673532009125, + "step": 250899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.62872838973999, + "loss_rtd": 0.2451535165309906, + "loss_sent": 0.10492289066314697, + "loss_sod": 0.03780888020992279, + "loss_total": 0.38788527250289917, + "step": 250899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.2935609817504883, + "learning_rate": 2.076524400868654e-05, + "loss": 0.4448, + "step": 250900 + }, + { + "epoch": 0.021998, + "loss_gen": 7.03744649887085, + "loss_rtd": 0.24140574038028717, + "loss_sent": 0.04671387001872063, + "loss_sod": 0.10378556698560715, + "loss_total": 0.39190515875816345, + "step": 250999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.519412517547607, + "loss_rtd": 0.20484577119350433, + "loss_sent": 0.20768705010414124, + "loss_sod": 0.05683238059282303, + "loss_total": 0.4693652093410492, + "step": 250999 + }, + { + "epoch": 0.022, + "grad_norm": 1.482284665107727, + "learning_rate": 2.0739506192971913e-05, + "loss": 0.4452, + "step": 251000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4228754937648773, + "eval_runtime": 151.7485, + "eval_samples_per_second": 101.767, + "eval_steps_per_second": 0.797, + "step": 251000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.65935754776001, + "loss_rtd": 0.1884467601776123, + "loss_sent": 0.1918184906244278, + "loss_sod": 0.10814428329467773, + "loss_total": 0.48840951919555664, + "step": 251099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.581549167633057, + "loss_rtd": 0.23618453741073608, + "loss_sent": 0.17425058782100677, + "loss_sod": 0.05884721875190735, + "loss_total": 0.469282329082489, + "step": 251099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.955230712890625, + "learning_rate": 2.0713780163380712e-05, + "loss": 0.4599, + "step": 251100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.499749660491943, + "loss_rtd": 0.22472791373729706, + "loss_sent": 0.1220015436410904, + "loss_sod": 0.06044648587703705, + "loss_total": 0.4071759581565857, + "step": 251199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.156343460083008, + "loss_rtd": 0.19477254152297974, + "loss_sent": 0.035995736718177795, + "loss_sod": 0.03143073618412018, + "loss_total": 0.2621990144252777, + "step": 251199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.7173367738723755, + "learning_rate": 2.068806593027534e-05, + "loss": 0.4417, + "step": 251200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.773978233337402, + "loss_rtd": 0.260987788438797, + "loss_sent": 0.26252150535583496, + "loss_sod": 0.02137020230293274, + "loss_total": 0.5448794960975647, + "step": 251299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.492867946624756, + "loss_rtd": 0.22154872119426727, + "loss_sent": 0.09729105979204178, + "loss_sod": 0.03462834656238556, + "loss_total": 0.353468120098114, + "step": 251299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.8783969283103943, + "learning_rate": 2.066236350401351e-05, + "loss": 0.4547, + "step": 251300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.690173149108887, + "loss_rtd": 0.2423451989889145, + "loss_sent": 0.5937155485153198, + "loss_sod": 0.03639678657054901, + "loss_total": 0.8724575042724609, + "step": 251399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.853487968444824, + "loss_rtd": 0.21431252360343933, + "loss_sent": 0.2413485199213028, + "loss_sod": 0.21916517615318298, + "loss_total": 0.6748262047767639, + "step": 251399 + }, + { + "epoch": 0.0228, + "grad_norm": 2.0817222595214844, + "learning_rate": 2.063667289494815e-05, + "loss": 0.4588, + "step": 251400 + }, + { + "epoch": 0.022998, + "loss_gen": 6.574832439422607, + "loss_rtd": 0.2396440953016281, + "loss_sent": 0.10621442645788193, + "loss_sod": 0.11426478624343872, + "loss_total": 0.46012333035469055, + "step": 251499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.385630130767822, + "loss_rtd": 0.2209346890449524, + "loss_sent": 0.20252737402915955, + "loss_sod": 0.08124950528144836, + "loss_total": 0.5047115683555603, + "step": 251499 + }, + { + "epoch": 0.023, + "grad_norm": 1.0906034708023071, + "learning_rate": 2.0610994113427452e-05, + "loss": 0.4311, + "step": 251500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.799736022949219, + "loss_rtd": 0.22718507051467896, + "loss_sent": 0.0844174399971962, + "loss_sod": 0.0614926777780056, + "loss_total": 0.37309518456459045, + "step": 251599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.91364860534668, + "loss_rtd": 0.24203763902187347, + "loss_sent": 0.1268770843744278, + "loss_sod": 0.04052715748548508, + "loss_total": 0.40944188833236694, + "step": 251599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.8985000848770142, + "learning_rate": 2.0585327169794793e-05, + "loss": 0.4283, + "step": 251600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.676454544067383, + "loss_rtd": 0.24494819343090057, + "loss_sent": 0.24845416843891144, + "loss_sod": 0.10490782558917999, + "loss_total": 0.5983101725578308, + "step": 251699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.693519115447998, + "loss_rtd": 0.22302430868148804, + "loss_sent": 0.18203288316726685, + "loss_sod": 0.02259897254407406, + "loss_total": 0.4276561737060547, + "step": 251699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.0740926265716553, + "learning_rate": 2.0559672074388835e-05, + "loss": 0.452, + "step": 251700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.71986722946167, + "loss_rtd": 0.21114739775657654, + "loss_sent": 0.38429152965545654, + "loss_sod": 0.03487692400813103, + "loss_total": 0.6303158402442932, + "step": 251799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.775424003601074, + "loss_rtd": 0.24753491580486298, + "loss_sent": 0.1396598368883133, + "loss_sod": 0.023601790890097618, + "loss_total": 0.41079652309417725, + "step": 251799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.0082423686981201, + "learning_rate": 2.053402883754346e-05, + "loss": 0.4547, + "step": 251800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.8386945724487305, + "loss_rtd": 0.23833312094211578, + "loss_sent": 0.39287030696868896, + "loss_sod": 0.045166682451963425, + "loss_total": 0.6763701438903809, + "step": 251899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.735254287719727, + "loss_rtd": 0.21878089010715485, + "loss_sent": 0.24877341091632843, + "loss_sod": 0.04096533730626106, + "loss_total": 0.5085196495056152, + "step": 251899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.951898992061615, + "learning_rate": 2.050839746958773e-05, + "loss": 0.4387, + "step": 251900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.8343682289123535, + "loss_rtd": 0.23846673965454102, + "loss_sent": 0.30951541662216187, + "loss_sod": 0.012829523533582687, + "loss_total": 0.5608116388320923, + "step": 251999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.8856072425842285, + "loss_rtd": 0.21703128516674042, + "loss_sent": 0.37415531277656555, + "loss_sod": 0.09952997416257858, + "loss_total": 0.690716564655304, + "step": 251999 + }, + { + "epoch": 0.024, + "grad_norm": 1.9577677249908447, + "learning_rate": 2.0482777980845972e-05, + "loss": 0.4187, + "step": 252000 + }, + { + "epoch": 0.024, + "eval_loss": 0.42289337515830994, + "eval_runtime": 151.7939, + "eval_samples_per_second": 101.737, + "eval_steps_per_second": 0.797, + "step": 252000 + }, + { + "epoch": 0.024198, + "loss_gen": 4.916977405548096, + "loss_rtd": 0.1900949478149414, + "loss_sent": 2.561290420999285e-05, + "loss_sod": 0.044411174952983856, + "loss_total": 0.23453174531459808, + "step": 252099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.1336259841918945, + "loss_rtd": 0.18254713714122772, + "loss_sent": 0.09441950917243958, + "loss_sod": 0.05666785687208176, + "loss_total": 0.33363449573516846, + "step": 252099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.7168747186660767, + "learning_rate": 2.0457170381637714e-05, + "loss": 0.4347, + "step": 252100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.757220268249512, + "loss_rtd": 0.23494116961956024, + "loss_sent": 0.12362934648990631, + "loss_sod": 0.057746171951293945, + "loss_total": 0.4163166880607605, + "step": 252199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.632613658905029, + "loss_rtd": 0.2236497849225998, + "loss_sent": 0.15610367059707642, + "loss_sod": 0.02144138514995575, + "loss_total": 0.40119484066963196, + "step": 252199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.8283018469810486, + "learning_rate": 2.043157468227771e-05, + "loss": 0.4548, + "step": 252200 + }, + { + "epoch": 0.024598, + "loss_gen": 6.04722261428833, + "loss_rtd": 0.19264057278633118, + "loss_sent": 0.20455722510814667, + "loss_sod": 0.04163656011223793, + "loss_total": 0.43883436918258667, + "step": 252299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.652451515197754, + "loss_rtd": 0.22907041013240814, + "loss_sent": 0.13255144655704498, + "loss_sod": 0.027676325291395187, + "loss_total": 0.3892982006072998, + "step": 252299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.3295665979385376, + "learning_rate": 2.0405990893075866e-05, + "loss": 0.4442, + "step": 252300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.768548965454102, + "loss_rtd": 0.23619456589221954, + "loss_sent": 0.07642235606908798, + "loss_sod": 0.06291034072637558, + "loss_total": 0.3755272626876831, + "step": 252399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.375941276550293, + "loss_rtd": 0.19022639095783234, + "loss_sent": 0.054528672248125076, + "loss_sod": 0.14079278707504272, + "loss_total": 0.38554784655570984, + "step": 252399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.0614683628082275, + "learning_rate": 2.0380419024337355e-05, + "loss": 0.4582, + "step": 252400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.842750072479248, + "loss_rtd": 0.231532484292984, + "loss_sent": 0.11322799324989319, + "loss_sod": 0.006990238558501005, + "loss_total": 0.3517507314682007, + "step": 252499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.830024242401123, + "loss_rtd": 0.21234385669231415, + "loss_sent": 0.1968112587928772, + "loss_sod": 0.031806040555238724, + "loss_total": 0.44096115231513977, + "step": 252499 + }, + { + "epoch": 0.025, + "grad_norm": 0.9424448609352112, + "learning_rate": 2.035485908636254e-05, + "loss": 0.4404, + "step": 252500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.690739154815674, + "loss_rtd": 0.20290978252887726, + "loss_sent": 0.15227241814136505, + "loss_sod": 0.12506070733070374, + "loss_total": 0.48024290800094604, + "step": 252599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.893576622009277, + "loss_rtd": 0.22877617180347443, + "loss_sent": 0.13091930747032166, + "loss_sod": 0.03059357777237892, + "loss_total": 0.3902890682220459, + "step": 252599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.1129803657531738, + "learning_rate": 2.032931108944692e-05, + "loss": 0.4487, + "step": 252600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.083102703094482, + "loss_rtd": 0.17864498496055603, + "loss_sent": 2.9816606911481358e-05, + "loss_sod": 0.037687476724386215, + "loss_total": 0.21636228263378143, + "step": 252699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.865263938903809, + "loss_rtd": 0.16908150911331177, + "loss_sent": 0.011248045600950718, + "loss_sod": 0.03134654462337494, + "loss_total": 0.21167610585689545, + "step": 252699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.7066148519515991, + "learning_rate": 2.0303775043881255e-05, + "loss": 0.4485, + "step": 252700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.265369892120361, + "loss_rtd": 0.1809772104024887, + "loss_sent": 0.05907053127884865, + "loss_sod": 0.04577813670039177, + "loss_total": 0.2858258783817291, + "step": 252799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.592155933380127, + "loss_rtd": 0.2288004457950592, + "loss_sent": 0.06423830986022949, + "loss_sod": 0.024152319878339767, + "loss_total": 0.31719106435775757, + "step": 252799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.5810658931732178, + "learning_rate": 2.0278250959951443e-05, + "loss": 0.4433, + "step": 252800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.966113090515137, + "loss_rtd": 0.21657642722129822, + "loss_sent": 0.11611036211252213, + "loss_sod": 0.046724990010261536, + "loss_total": 0.3794117867946625, + "step": 252899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.758605003356934, + "loss_rtd": 0.21248145401477814, + "loss_sent": 0.24654428660869598, + "loss_sod": 0.09136464446783066, + "loss_total": 0.550390362739563, + "step": 252899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.7527257204055786, + "learning_rate": 2.0252738847938585e-05, + "loss": 0.457, + "step": 252900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.563623905181885, + "loss_rtd": 0.2416469007730484, + "loss_sent": 0.3352299630641937, + "loss_sod": 0.01914307475090027, + "loss_total": 0.596019983291626, + "step": 252999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.356666564941406, + "loss_rtd": 0.2195223718881607, + "loss_sent": 0.06701061874628067, + "loss_sod": 0.10076329112052917, + "loss_total": 0.38729628920555115, + "step": 252999 + }, + { + "epoch": 0.026, + "grad_norm": 1.4748008251190186, + "learning_rate": 2.0227238718118963e-05, + "loss": 0.4332, + "step": 253000 + }, + { + "epoch": 0.026, + "eval_loss": 0.4322918951511383, + "eval_runtime": 151.7236, + "eval_samples_per_second": 101.784, + "eval_steps_per_second": 0.798, + "step": 253000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.863963603973389, + "loss_rtd": 0.2454257756471634, + "loss_sent": 0.1348862200975418, + "loss_sod": 0.04228346049785614, + "loss_total": 0.42259544134140015, + "step": 253099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.711928367614746, + "loss_rtd": 0.22430557012557983, + "loss_sent": 0.3639461398124695, + "loss_sod": 0.04680056497454643, + "loss_total": 0.6350522637367249, + "step": 253099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.656769871711731, + "learning_rate": 2.0201750580764044e-05, + "loss": 0.4645, + "step": 253100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.750680923461914, + "loss_rtd": 0.23668131232261658, + "loss_sent": 0.24485036730766296, + "loss_sod": 0.0677449107170105, + "loss_total": 0.54927659034729, + "step": 253199 + }, + { + "epoch": 0.026398, + "loss_gen": 6.049872398376465, + "loss_rtd": 0.2207106202840805, + "loss_sent": 0.10593386739492416, + "loss_sod": 0.053735025227069855, + "loss_total": 0.3803795278072357, + "step": 253199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.709084153175354, + "learning_rate": 2.017627444614041e-05, + "loss": 0.444, + "step": 253200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.8751959800720215, + "loss_rtd": 0.2263498157262802, + "loss_sent": 0.18755370378494263, + "loss_sod": 0.0030357346404343843, + "loss_total": 0.41693925857543945, + "step": 253299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.824941635131836, + "loss_rtd": 0.2340388298034668, + "loss_sent": 0.10973584651947021, + "loss_sod": 0.06521715223789215, + "loss_total": 0.40899181365966797, + "step": 253299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.080283284187317, + "learning_rate": 2.015081032450986e-05, + "loss": 0.4371, + "step": 253300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.92127799987793, + "loss_rtd": 0.22395184636116028, + "loss_sent": 0.18231117725372314, + "loss_sod": 0.10265080630779266, + "loss_total": 0.5089138150215149, + "step": 253399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.888629913330078, + "loss_rtd": 0.23281361162662506, + "loss_sent": 0.08794432878494263, + "loss_sod": 0.13615782558918, + "loss_total": 0.4569157660007477, + "step": 253399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.3039835691452026, + "learning_rate": 2.012535822612936e-05, + "loss": 0.4643, + "step": 253400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.305265426635742, + "loss_rtd": 0.20122742652893066, + "loss_sent": 0.027874859049916267, + "loss_sod": 0.18004044890403748, + "loss_total": 0.40914273262023926, + "step": 253499 + }, + { + "epoch": 0.026998, + "loss_gen": 6.316204071044922, + "loss_rtd": 0.2375180572271347, + "loss_sent": 0.14653708040714264, + "loss_sod": 0.13144049048423767, + "loss_total": 0.5154955983161926, + "step": 253499 + }, + { + "epoch": 0.027, + "grad_norm": 1.2195602655410767, + "learning_rate": 2.009991816125098e-05, + "loss": 0.4438, + "step": 253500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.774242401123047, + "loss_rtd": 0.22510267794132233, + "loss_sent": 0.13734416663646698, + "loss_sod": 0.01843191497027874, + "loss_total": 0.380878746509552, + "step": 253599 + }, + { + "epoch": 0.027198, + "loss_gen": 6.295194149017334, + "loss_rtd": 0.24192148447036743, + "loss_sent": 0.13870932161808014, + "loss_sod": 0.06025253236293793, + "loss_total": 0.4408833384513855, + "step": 253599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.4461230039596558, + "learning_rate": 2.0074490140121982e-05, + "loss": 0.463, + "step": 253600 + }, + { + "epoch": 0.027398, + "loss_gen": 6.187361717224121, + "loss_rtd": 0.22875021398067474, + "loss_sent": 0.10544523596763611, + "loss_sod": 0.017582543194293976, + "loss_total": 0.3517780005931854, + "step": 253699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.74911642074585, + "loss_rtd": 0.23467105627059937, + "loss_sent": 0.18539391458034515, + "loss_sod": 0.0055173709988594055, + "loss_total": 0.4255823493003845, + "step": 253699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.4206382036209106, + "learning_rate": 2.004907417298478e-05, + "loss": 0.4209, + "step": 253700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.139222145080566, + "loss_rtd": 0.2642526626586914, + "loss_sent": 0.0941305011510849, + "loss_sod": 0.1769954264163971, + "loss_total": 0.5353785753250122, + "step": 253799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.594113826751709, + "loss_rtd": 0.1976829469203949, + "loss_sent": 0.057075586169958115, + "loss_sod": 0.023945963010191917, + "loss_total": 0.2787044942378998, + "step": 253799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.4231631755828857, + "learning_rate": 2.0023670270076937e-05, + "loss": 0.4389, + "step": 253800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.666767597198486, + "loss_rtd": 0.23870056867599487, + "loss_sent": 0.48226192593574524, + "loss_sod": 0.013401873409748077, + "loss_total": 0.73436439037323, + "step": 253899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.971577167510986, + "loss_rtd": 0.20716333389282227, + "loss_sent": 0.24025537073612213, + "loss_sod": 0.07430551946163177, + "loss_total": 0.5217242240905762, + "step": 253899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.488202452659607, + "learning_rate": 1.9998278441631108e-05, + "loss": 0.4418, + "step": 253900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.235413551330566, + "loss_rtd": 0.1910182386636734, + "loss_sent": 3.3657182939350605e-05, + "loss_sod": 0.11094896495342255, + "loss_total": 0.30200085043907166, + "step": 253999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.100149631500244, + "loss_rtd": 0.19616694748401642, + "loss_sent": 0.0688726156949997, + "loss_sod": 0.05267348885536194, + "loss_total": 0.31771305203437805, + "step": 253999 + }, + { + "epoch": 0.028, + "grad_norm": 0.7923054695129395, + "learning_rate": 1.9972898697875135e-05, + "loss": 0.4481, + "step": 254000 + }, + { + "epoch": 0.028, + "eval_loss": 0.41873565316200256, + "eval_runtime": 151.8254, + "eval_samples_per_second": 101.716, + "eval_steps_per_second": 0.797, + "step": 254000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.996367454528809, + "loss_rtd": 0.1675935685634613, + "loss_sent": 3.037181159015745e-05, + "loss_sod": 0.1063089668750763, + "loss_total": 0.27393290400505066, + "step": 254099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.810243129730225, + "loss_rtd": 0.210595965385437, + "loss_sent": 0.3463844954967499, + "loss_sod": 0.040098853409290314, + "loss_total": 0.5970792770385742, + "step": 254099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.048243522644043, + "learning_rate": 1.9947531049031976e-05, + "loss": 0.4489, + "step": 254100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.795935153961182, + "loss_rtd": 0.22435301542282104, + "loss_sent": 0.14384007453918457, + "loss_sod": 0.03588526323437691, + "loss_total": 0.4040783643722534, + "step": 254199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.829339981079102, + "loss_rtd": 0.22517512738704681, + "loss_sent": 0.16799873113632202, + "loss_sod": 0.05237983912229538, + "loss_total": 0.4455536901950836, + "step": 254199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.508608341217041, + "learning_rate": 1.9922175505319733e-05, + "loss": 0.4457, + "step": 254200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.382936000823975, + "loss_rtd": 0.21463479101657867, + "loss_sent": 0.05007823184132576, + "loss_sod": 0.01350468024611473, + "loss_total": 0.27821770310401917, + "step": 254299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.707792282104492, + "loss_rtd": 0.23562106490135193, + "loss_sent": 0.1486457735300064, + "loss_sod": 0.0814094990491867, + "loss_total": 0.46567630767822266, + "step": 254299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.9088292121887207, + "learning_rate": 1.989683207695158e-05, + "loss": 0.4587, + "step": 254300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.139110565185547, + "loss_rtd": 0.1864052563905716, + "loss_sent": 3.037551141460426e-05, + "loss_sod": 0.06559682637453079, + "loss_total": 0.25203245878219604, + "step": 254399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.914858818054199, + "loss_rtd": 0.23055309057235718, + "loss_sent": 0.23952604830265045, + "loss_sod": 0.017169222235679626, + "loss_total": 0.48724836111068726, + "step": 254399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.832278311252594, + "learning_rate": 1.987150077413587e-05, + "loss": 0.4368, + "step": 254400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.868295669555664, + "loss_rtd": 0.21565981209278107, + "loss_sent": 0.13931824266910553, + "loss_sod": 0.09656614810228348, + "loss_total": 0.45154422521591187, + "step": 254499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.699411392211914, + "loss_rtd": 0.21974077820777893, + "loss_sent": 0.2226129174232483, + "loss_sod": 0.04936359450221062, + "loss_total": 0.49171727895736694, + "step": 254499 + }, + { + "epoch": 0.029, + "grad_norm": 2.1245124340057373, + "learning_rate": 1.9846181607076043e-05, + "loss": 0.4423, + "step": 254500 + }, + { + "epoch": 0.029198, + "loss_gen": 6.034182071685791, + "loss_rtd": 0.22618551552295685, + "loss_sent": 0.15084312856197357, + "loss_sod": 0.04063883423805237, + "loss_total": 0.4176675081253052, + "step": 254599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.342142105102539, + "loss_rtd": 0.2164769172668457, + "loss_sent": 0.0017474597552791238, + "loss_sod": 0.11026746779680252, + "loss_total": 0.32849183678627014, + "step": 254599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.2419036626815796, + "learning_rate": 1.982087458597068e-05, + "loss": 0.4458, + "step": 254600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.955076694488525, + "loss_rtd": 0.18739967048168182, + "loss_sent": 0.021308621391654015, + "loss_sod": 0.06322623789310455, + "loss_total": 0.27193453907966614, + "step": 254699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.2264604568481445, + "loss_rtd": 0.19921955466270447, + "loss_sent": 0.07856092602014542, + "loss_sod": 0.011125471442937851, + "loss_total": 0.28890594840049744, + "step": 254699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.5246447920799255, + "learning_rate": 1.97955797210134e-05, + "loss": 0.463, + "step": 254700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.651065826416016, + "loss_rtd": 0.21787527203559875, + "loss_sent": 0.097113236784935, + "loss_sod": 0.048450201749801636, + "loss_total": 0.3634387254714966, + "step": 254799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.7161664962768555, + "loss_rtd": 0.22050975263118744, + "loss_sent": 0.05476810038089752, + "loss_sod": 0.08707212656736374, + "loss_total": 0.3623499870300293, + "step": 254799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.0568890571594238, + "learning_rate": 1.9770297022393004e-05, + "loss": 0.4364, + "step": 254800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.602153778076172, + "loss_rtd": 0.23433373868465424, + "loss_sent": 0.18747679889202118, + "loss_sod": 0.006510594859719276, + "loss_total": 0.42832112312316895, + "step": 254899 + }, + { + "epoch": 0.029798, + "loss_gen": 6.1688151359558105, + "loss_rtd": 0.21581362187862396, + "loss_sent": 0.1440548151731491, + "loss_sod": 0.05405785143375397, + "loss_total": 0.41392630338668823, + "step": 254899 + }, + { + "epoch": 0.0298, + "grad_norm": 0.7897472977638245, + "learning_rate": 1.9745026500293362e-05, + "loss": 0.4545, + "step": 254900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.436579704284668, + "loss_rtd": 0.2115718275308609, + "loss_sent": 0.0863354280591011, + "loss_sod": 0.12983065843582153, + "loss_total": 0.42773789167404175, + "step": 254999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.243215560913086, + "loss_rtd": 0.19306324422359467, + "loss_sent": 0.015153673477470875, + "loss_sod": 0.08352413773536682, + "loss_total": 0.2917410433292389, + "step": 254999 + }, + { + "epoch": 0.03, + "grad_norm": 0.9842661023139954, + "learning_rate": 1.9719768164893415e-05, + "loss": 0.4232, + "step": 255000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4207229018211365, + "eval_runtime": 151.5625, + "eval_samples_per_second": 101.892, + "eval_steps_per_second": 0.798, + "step": 255000 + }, + { + "epoch": 0.030198, + "loss_gen": 6.143173694610596, + "loss_rtd": 0.2157437652349472, + "loss_sent": 0.27789658308029175, + "loss_sod": 0.06362170726060867, + "loss_total": 0.5572620630264282, + "step": 255099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.865470886230469, + "loss_rtd": 0.2301105409860611, + "loss_sent": 0.34119436144828796, + "loss_sod": 0.1609756201505661, + "loss_total": 0.7322804927825928, + "step": 255099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.4151833057403564, + "learning_rate": 1.969452202636723e-05, + "loss": 0.4399, + "step": 255100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.303222179412842, + "loss_rtd": 0.18868422508239746, + "loss_sent": 0.0045908973552286625, + "loss_sod": 0.03284907341003418, + "loss_total": 0.22612419724464417, + "step": 255199 + }, + { + "epoch": 0.030398, + "loss_gen": 6.221683502197266, + "loss_rtd": 0.23939965665340424, + "loss_sent": 0.06910251080989838, + "loss_sod": 0.12874291837215424, + "loss_total": 0.43724507093429565, + "step": 255199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.8739762902259827, + "learning_rate": 1.966928809488395e-05, + "loss": 0.4363, + "step": 255200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.833194255828857, + "loss_rtd": 0.23057061433792114, + "loss_sent": 0.3710896968841553, + "loss_sod": 0.11740000545978546, + "loss_total": 0.7190603017807007, + "step": 255299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.44817590713501, + "loss_rtd": 0.234102264046669, + "loss_sent": 0.0429513044655323, + "loss_sod": 0.004230175167322159, + "loss_total": 0.28128373622894287, + "step": 255299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.7727651596069336, + "learning_rate": 1.964406638060781e-05, + "loss": 0.4217, + "step": 255300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.4819183349609375, + "loss_rtd": 0.22465725243091583, + "loss_sent": 0.1288895606994629, + "loss_sod": 0.059488095343112946, + "loss_total": 0.41303491592407227, + "step": 255399 + }, + { + "epoch": 0.030798, + "loss_gen": 6.006296634674072, + "loss_rtd": 0.22454136610031128, + "loss_sent": 0.07526542991399765, + "loss_sod": 0.05416024476289749, + "loss_total": 0.3539670407772064, + "step": 255399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.1668645143508911, + "learning_rate": 1.961885689369809e-05, + "loss": 0.4417, + "step": 255400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.520474433898926, + "loss_rtd": 0.21842928230762482, + "loss_sent": 0.0738108828663826, + "loss_sod": 0.04817997291684151, + "loss_total": 0.340420126914978, + "step": 255499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.870036602020264, + "loss_rtd": 0.21882574260234833, + "loss_sent": 0.25362205505371094, + "loss_sod": 0.12334321439266205, + "loss_total": 0.5957909822463989, + "step": 255499 + }, + { + "epoch": 0.031, + "grad_norm": 1.005359172821045, + "learning_rate": 1.9593659644309177e-05, + "loss": 0.4443, + "step": 255500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.490281581878662, + "loss_rtd": 0.22614344954490662, + "loss_sent": 0.09881580621004105, + "loss_sod": 0.011842755600810051, + "loss_total": 0.33680200576782227, + "step": 255599 + }, + { + "epoch": 0.031198, + "loss_gen": 6.003447532653809, + "loss_rtd": 0.23837314546108246, + "loss_sent": 0.1303938329219818, + "loss_sod": 0.032098740339279175, + "loss_total": 0.40086570382118225, + "step": 255599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.5261101722717285, + "learning_rate": 1.9568474642590523e-05, + "loss": 0.4544, + "step": 255600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.562786102294922, + "loss_rtd": 0.23503659665584564, + "loss_sent": 0.1858574002981186, + "loss_sod": 0.1714448630809784, + "loss_total": 0.5923388600349426, + "step": 255699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.179942607879639, + "loss_rtd": 0.18859492242336273, + "loss_sent": 0.05462285876274109, + "loss_sod": 0.15915504097938538, + "loss_total": 0.402372807264328, + "step": 255699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.346409559249878, + "learning_rate": 1.9543301898686662e-05, + "loss": 0.4607, + "step": 255700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.731593132019043, + "loss_rtd": 0.2347627878189087, + "loss_sent": 0.4166901409626007, + "loss_sod": 0.03454044088721275, + "loss_total": 0.6859933733940125, + "step": 255799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.529477119445801, + "loss_rtd": 0.2211005538702011, + "loss_sent": 0.07924675196409225, + "loss_sod": 0.05242312699556351, + "loss_total": 0.35277044773101807, + "step": 255799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.2263169288635254, + "learning_rate": 1.9518141422737136e-05, + "loss": 0.4349, + "step": 255800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.775720119476318, + "loss_rtd": 0.2190198451280594, + "loss_sent": 0.21844342350959778, + "loss_sod": 0.016671624034643173, + "loss_total": 0.45413488149642944, + "step": 255899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.653126239776611, + "loss_rtd": 0.2120610624551773, + "loss_sent": 0.1320323646068573, + "loss_sod": 0.046516623347997665, + "loss_total": 0.3906100392341614, + "step": 255899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.464066505432129, + "learning_rate": 1.9492993224876597e-05, + "loss": 0.4434, + "step": 255900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.5872626304626465, + "loss_rtd": 0.22028420865535736, + "loss_sent": 0.14101046323776245, + "loss_sod": 0.11387832462787628, + "loss_total": 0.4751729965209961, + "step": 255999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.998569011688232, + "loss_rtd": 0.21319493651390076, + "loss_sent": 0.19112010300159454, + "loss_sod": 0.07989027351140976, + "loss_total": 0.48420530557632446, + "step": 255999 + }, + { + "epoch": 0.032, + "grad_norm": 1.0494569540023804, + "learning_rate": 1.9467857315234746e-05, + "loss": 0.4404, + "step": 256000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4182792007923126, + "eval_runtime": 151.8291, + "eval_samples_per_second": 101.713, + "eval_steps_per_second": 0.797, + "step": 256000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.719137191772461, + "loss_rtd": 0.2249094843864441, + "loss_sent": 0.11407400667667389, + "loss_sod": 0.015464075841009617, + "loss_total": 0.3544475734233856, + "step": 256099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.7993974685668945, + "loss_rtd": 0.23655447363853455, + "loss_sent": 0.1094830185174942, + "loss_sod": 0.0687658041715622, + "loss_total": 0.41480326652526855, + "step": 256099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.841873824596405, + "learning_rate": 1.944273370393633e-05, + "loss": 0.4443, + "step": 256100 + }, + { + "epoch": 0.000398, + "loss_gen": 6.1038689613342285, + "loss_rtd": 0.23958365619182587, + "loss_sent": 0.35503125190734863, + "loss_sod": 0.028593841940164566, + "loss_total": 0.62320876121521, + "step": 256199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.77200984954834, + "loss_rtd": 0.2335517406463623, + "loss_sent": 0.3759221136569977, + "loss_sod": 0.15240080654621124, + "loss_total": 0.7618746757507324, + "step": 256199 + }, + { + "epoch": 0.0004, + "grad_norm": 2.4883439540863037, + "learning_rate": 1.9417622401101104e-05, + "loss": 0.4547, + "step": 256200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.6341776847839355, + "loss_rtd": 0.20687498152256012, + "loss_sent": 0.10849983990192413, + "loss_sod": 0.0372319258749485, + "loss_total": 0.35260674357414246, + "step": 256299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.907930374145508, + "loss_rtd": 0.20953242480754852, + "loss_sent": 0.14768798649311066, + "loss_sod": 0.03266579285264015, + "loss_total": 0.38988620042800903, + "step": 256299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.8542525768280029, + "learning_rate": 1.939252341684392e-05, + "loss": 0.4305, + "step": 256300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.550402641296387, + "loss_rtd": 0.21594884991645813, + "loss_sent": 0.01939331367611885, + "loss_sod": 0.010465777479112148, + "loss_total": 0.2458079308271408, + "step": 256399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.908761978149414, + "loss_rtd": 0.18330790102481842, + "loss_sent": 2.562065128586255e-05, + "loss_sod": 0.07614215463399887, + "loss_total": 0.2594756782054901, + "step": 256399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.6345007419586182, + "learning_rate": 1.936743676127466e-05, + "loss": 0.442, + "step": 256400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.735369682312012, + "loss_rtd": 0.23683825135231018, + "loss_sent": 0.15582871437072754, + "loss_sod": 0.08239433169364929, + "loss_total": 0.475061297416687, + "step": 256499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.574130058288574, + "loss_rtd": 0.24696214497089386, + "loss_sent": 0.13589608669281006, + "loss_sod": 0.016467146575450897, + "loss_total": 0.3993253707885742, + "step": 256499 + }, + { + "epoch": 0.001, + "grad_norm": 1.1938645839691162, + "learning_rate": 1.9342362444498197e-05, + "loss": 0.4357, + "step": 256500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.991654872894287, + "loss_rtd": 0.2191253900527954, + "loss_sent": 0.3768891394138336, + "loss_sod": 0.06542088091373444, + "loss_total": 0.6614353656768799, + "step": 256599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.852071762084961, + "loss_rtd": 0.21143855154514313, + "loss_sent": 0.11855126172304153, + "loss_sod": 0.06887871026992798, + "loss_total": 0.39886850118637085, + "step": 256599 + }, + { + "epoch": 0.0012, + "grad_norm": 2.1177027225494385, + "learning_rate": 1.931730047661447e-05, + "loss": 0.437, + "step": 256600 + }, + { + "epoch": 0.001398, + "loss_gen": 6.183917999267578, + "loss_rtd": 0.22021916508674622, + "loss_sent": 0.08833926171064377, + "loss_sod": 0.024696988984942436, + "loss_total": 0.333255410194397, + "step": 256699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.445835590362549, + "loss_rtd": 0.22125393152236938, + "loss_sent": 0.08685436099767685, + "loss_sod": 0.099408358335495, + "loss_total": 0.4075166583061218, + "step": 256699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.3161251544952393, + "learning_rate": 1.9292250867718442e-05, + "loss": 0.4439, + "step": 256700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.84780740737915, + "loss_rtd": 0.21963860094547272, + "loss_sent": 0.2052403688430786, + "loss_sod": 0.008837012574076653, + "loss_total": 0.4337159991264343, + "step": 256799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.553376197814941, + "loss_rtd": 0.23662590980529785, + "loss_sent": 0.10431526601314545, + "loss_sod": 0.005601783748716116, + "loss_total": 0.34654295444488525, + "step": 256799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.6843005418777466, + "learning_rate": 1.926721362790011e-05, + "loss": 0.4529, + "step": 256800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.042369842529297, + "loss_rtd": 0.19062404334545135, + "loss_sent": 0.009145115502178669, + "loss_sod": 0.04888478294014931, + "loss_total": 0.24865393340587616, + "step": 256899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.888213157653809, + "loss_rtd": 0.20920048654079437, + "loss_sent": 0.3349314332008362, + "loss_sod": 0.05534731596708298, + "loss_total": 0.5994791984558105, + "step": 256899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.5948286056518555, + "learning_rate": 1.9242188767244433e-05, + "loss": 0.436, + "step": 256900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.962906360626221, + "loss_rtd": 0.22339452803134918, + "loss_sent": 0.2463454008102417, + "loss_sod": 0.014831135049462318, + "loss_total": 0.48457106947898865, + "step": 256999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.909237861633301, + "loss_rtd": 0.2261003702878952, + "loss_sent": 0.14804089069366455, + "loss_sod": 0.010999368503689766, + "loss_total": 0.38514062762260437, + "step": 256999 + }, + { + "epoch": 0.002, + "grad_norm": 1.479538083076477, + "learning_rate": 1.921717629583145e-05, + "loss": 0.4491, + "step": 257000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4171997904777527, + "eval_runtime": 153.5453, + "eval_samples_per_second": 100.576, + "eval_steps_per_second": 0.788, + "step": 257000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.606085300445557, + "loss_rtd": 0.2263501137495041, + "loss_sent": 0.2513013780117035, + "loss_sod": 0.006674719508737326, + "loss_total": 0.48432621359825134, + "step": 257099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.469945907592773, + "loss_rtd": 0.21970972418785095, + "loss_sent": 0.29935726523399353, + "loss_sod": 0.06727207452058792, + "loss_total": 0.5863390564918518, + "step": 257099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.4244379997253418, + "learning_rate": 1.919217622373617e-05, + "loss": 0.4308, + "step": 257100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.54227876663208, + "loss_rtd": 0.24072355031967163, + "loss_sent": 0.22241312265396118, + "loss_sod": 0.11213670670986176, + "loss_total": 0.5752733945846558, + "step": 257199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.939525127410889, + "loss_rtd": 0.22330863773822784, + "loss_sent": 0.1421503871679306, + "loss_sod": 0.12618714570999146, + "loss_total": 0.4916461706161499, + "step": 257199 + }, + { + "epoch": 0.0024, + "grad_norm": 1.5518525838851929, + "learning_rate": 1.9167188561028636e-05, + "loss": 0.4608, + "step": 257200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.404356956481934, + "loss_rtd": 0.2190902829170227, + "loss_sent": 0.191134974360466, + "loss_sod": 0.015676403418183327, + "loss_total": 0.4259016513824463, + "step": 257299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.670596122741699, + "loss_rtd": 0.2514934241771698, + "loss_sent": 0.323307603597641, + "loss_sod": 0.0061821406707167625, + "loss_total": 0.5809831619262695, + "step": 257299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.0254656076431274, + "learning_rate": 1.914221331777385e-05, + "loss": 0.4324, + "step": 257300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.744279384613037, + "loss_rtd": 0.22491784393787384, + "loss_sent": 0.234115868806839, + "loss_sod": 0.039530716836452484, + "loss_total": 0.4985644221305847, + "step": 257399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.65138578414917, + "loss_rtd": 0.22519871592521667, + "loss_sent": 0.03546035289764404, + "loss_sod": 0.03364551067352295, + "loss_total": 0.29430457949638367, + "step": 257399 + }, + { + "epoch": 0.0028, + "grad_norm": 0.7370484471321106, + "learning_rate": 1.911725050403185e-05, + "loss": 0.4411, + "step": 257400 + }, + { + "epoch": 0.002998, + "loss_gen": 6.109411716461182, + "loss_rtd": 0.23645177483558655, + "loss_sent": 0.3965546786785126, + "loss_sod": 0.12229511141777039, + "loss_total": 0.7553015947341919, + "step": 257499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.601687908172607, + "loss_rtd": 0.21877025067806244, + "loss_sent": 0.21668651700019836, + "loss_sod": 0.03357876092195511, + "loss_total": 0.4690355360507965, + "step": 257499 + }, + { + "epoch": 0.003, + "grad_norm": 1.3516509532928467, + "learning_rate": 1.909230012985765e-05, + "loss": 0.4495, + "step": 257500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.896432399749756, + "loss_rtd": 0.2194606363773346, + "loss_sent": 0.11421237885951996, + "loss_sod": 0.1204255074262619, + "loss_total": 0.45409852266311646, + "step": 257599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.730504035949707, + "loss_rtd": 0.23530344665050507, + "loss_sent": 0.08894983679056168, + "loss_sod": 0.12989532947540283, + "loss_total": 0.45414862036705017, + "step": 257599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9213800430297852, + "learning_rate": 1.906736220530128e-05, + "loss": 0.4471, + "step": 257600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.863643646240234, + "loss_rtd": 0.24199290573596954, + "loss_sent": 0.2199094593524933, + "loss_sod": 0.017921190708875656, + "loss_total": 0.4798235595226288, + "step": 257699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.4639058113098145, + "loss_rtd": 0.20808997750282288, + "loss_sent": 0.03998017683625221, + "loss_sod": 0.014196610078215599, + "loss_total": 0.26226675510406494, + "step": 257699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.0237022638320923, + "learning_rate": 1.90424367404077e-05, + "loss": 0.4435, + "step": 257700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.857865810394287, + "loss_rtd": 0.22634819149971008, + "loss_sent": 0.15773418545722961, + "loss_sod": 0.05826546251773834, + "loss_total": 0.44234785437583923, + "step": 257799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.956275939941406, + "loss_rtd": 0.23653164505958557, + "loss_sent": 0.15997876226902008, + "loss_sod": 0.03316502645611763, + "loss_total": 0.42967545986175537, + "step": 257799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.0987085103988647, + "learning_rate": 1.90175237452169e-05, + "loss": 0.4323, + "step": 257800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.447580814361572, + "loss_rtd": 0.24280719459056854, + "loss_sent": 0.26339027285575867, + "loss_sod": 0.05107992887496948, + "loss_total": 0.5572774410247803, + "step": 257899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.788817405700684, + "loss_rtd": 0.23417778313159943, + "loss_sent": 0.058433547616004944, + "loss_sod": 0.06375335156917572, + "loss_total": 0.3563646674156189, + "step": 257899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.2282878160476685, + "learning_rate": 1.899262322976384e-05, + "loss": 0.4527, + "step": 257900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.897584915161133, + "loss_rtd": 0.2256196290254593, + "loss_sent": 0.1502881795167923, + "loss_sod": 0.09983941167593002, + "loss_total": 0.4757472276687622, + "step": 257999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.642777919769287, + "loss_rtd": 0.21140892803668976, + "loss_sent": 0.17053905129432678, + "loss_sod": 0.05413663387298584, + "loss_total": 0.4360845983028412, + "step": 257999 + }, + { + "epoch": 0.004, + "grad_norm": 1.1078600883483887, + "learning_rate": 1.8967735204078423e-05, + "loss": 0.4454, + "step": 258000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4140926003456116, + "eval_runtime": 150.2666, + "eval_samples_per_second": 102.771, + "eval_steps_per_second": 0.805, + "step": 258000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.197231769561768, + "loss_rtd": 0.18180225789546967, + "loss_sent": 2.8777447369066067e-05, + "loss_sod": 0.09882558882236481, + "loss_total": 0.280656635761261, + "step": 258099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.277012348175049, + "loss_rtd": 0.21173299849033356, + "loss_sent": 4.875852391705848e-05, + "loss_sod": 0.07263106107711792, + "loss_total": 0.28441280126571655, + "step": 258099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.8212816715240479, + "learning_rate": 1.8942859678185554e-05, + "loss": 0.423, + "step": 258100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.801881313323975, + "loss_rtd": 0.2259974628686905, + "loss_sent": 0.21458883583545685, + "loss_sod": 0.05621238425374031, + "loss_total": 0.49679869413375854, + "step": 258199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.9818267822265625, + "loss_rtd": 0.22572585940361023, + "loss_sent": 0.0223389845341444, + "loss_sod": 0.05908418819308281, + "loss_total": 0.3071490228176117, + "step": 258199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.1999270915985107, + "learning_rate": 1.8917996662105092e-05, + "loss": 0.4313, + "step": 258200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.425445079803467, + "loss_rtd": 0.2140994518995285, + "loss_sent": 0.15417629480361938, + "loss_sod": 0.004347951151430607, + "loss_total": 0.3726236820220947, + "step": 258299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.947686672210693, + "loss_rtd": 0.23279711604118347, + "loss_sent": 0.29078197479248047, + "loss_sod": 0.013146903365850449, + "loss_total": 0.5367259979248047, + "step": 258299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.7846020460128784, + "learning_rate": 1.8893146165851876e-05, + "loss": 0.4564, + "step": 258300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.848681926727295, + "loss_rtd": 0.2188025861978531, + "loss_sent": 0.2246469408273697, + "loss_sod": 0.08644305914640427, + "loss_total": 0.5298925638198853, + "step": 258399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.731697082519531, + "loss_rtd": 0.23489528894424438, + "loss_sent": 0.17494313418865204, + "loss_sod": 0.0680108591914177, + "loss_total": 0.4778493046760559, + "step": 258399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.1933016777038574, + "learning_rate": 1.8868308199435648e-05, + "loss": 0.4424, + "step": 258400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.807486534118652, + "loss_rtd": 0.23709937930107117, + "loss_sent": 0.09492206573486328, + "loss_sod": 0.04649343714118004, + "loss_total": 0.3785148859024048, + "step": 258499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.812468528747559, + "loss_rtd": 0.22758077085018158, + "loss_sent": 0.24587169289588928, + "loss_sod": 0.012292643077671528, + "loss_total": 0.4857451021671295, + "step": 258499 + }, + { + "epoch": 0.005, + "grad_norm": 1.2961477041244507, + "learning_rate": 1.884348277286115e-05, + "loss": 0.455, + "step": 258500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.979040622711182, + "loss_rtd": 0.257358193397522, + "loss_sent": 0.09835020452737808, + "loss_sod": 0.06101030483841896, + "loss_total": 0.4167186915874481, + "step": 258599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.869791507720947, + "loss_rtd": 0.23474963009357452, + "loss_sent": 0.12189159542322159, + "loss_sod": 0.07525230199098587, + "loss_total": 0.431893527507782, + "step": 258599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.9264788031578064, + "learning_rate": 1.8818669896128066e-05, + "loss": 0.4374, + "step": 258600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.952756881713867, + "loss_rtd": 0.2469499260187149, + "loss_sent": 0.131473109126091, + "loss_sod": 0.06519979238510132, + "loss_total": 0.4436228275299072, + "step": 258699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.444943428039551, + "loss_rtd": 0.2175222784280777, + "loss_sent": 0.023357335478067398, + "loss_sod": 0.1995525360107422, + "loss_total": 0.4404321312904358, + "step": 258699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.4726243019104004, + "learning_rate": 1.8793869579231038e-05, + "loss": 0.4478, + "step": 258700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.217036247253418, + "loss_rtd": 0.20182210206985474, + "loss_sent": 0.0020134393125772476, + "loss_sod": 0.07372411340475082, + "loss_total": 0.27755966782569885, + "step": 258799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.066666603088379, + "loss_rtd": 0.18927043676376343, + "loss_sent": 0.00011247151996940374, + "loss_sod": 0.091739222407341, + "loss_total": 0.2811221480369568, + "step": 258799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.811972439289093, + "learning_rate": 1.8769081832159595e-05, + "loss": 0.4523, + "step": 258800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.779342174530029, + "loss_rtd": 0.20872755348682404, + "loss_sent": 0.4420732855796814, + "loss_sod": 0.017633313313126564, + "loss_total": 0.6684341430664062, + "step": 258899 + }, + { + "epoch": 0.005798, + "loss_gen": 6.020589351654053, + "loss_rtd": 0.2380535751581192, + "loss_sent": 0.21202102303504944, + "loss_sod": 0.010953258723020554, + "loss_total": 0.4610278606414795, + "step": 258899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.0986590385437012, + "learning_rate": 1.8744306664898254e-05, + "loss": 0.4355, + "step": 258900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.813717365264893, + "loss_rtd": 0.23481868207454681, + "loss_sent": 0.3383063077926636, + "loss_sod": 0.0625721737742424, + "loss_total": 0.6356971263885498, + "step": 258999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.8133697509765625, + "loss_rtd": 0.23310688138008118, + "loss_sent": 0.17369568347930908, + "loss_sod": 0.06796613335609436, + "loss_total": 0.4747686982154846, + "step": 258999 + }, + { + "epoch": 0.006, + "grad_norm": 1.0945936441421509, + "learning_rate": 1.871954408742645e-05, + "loss": 0.4318, + "step": 259000 + }, + { + "epoch": 0.006, + "eval_loss": 0.41480275988578796, + "eval_runtime": 149.8703, + "eval_samples_per_second": 103.042, + "eval_steps_per_second": 0.807, + "step": 259000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.685275554656982, + "loss_rtd": 0.23449456691741943, + "loss_sent": 0.20681414008140564, + "loss_sod": 0.06243611499667168, + "loss_total": 0.5037448406219482, + "step": 259099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.759958267211914, + "loss_rtd": 0.21148507297039032, + "loss_sent": 0.09391295164823532, + "loss_sod": 0.032898806035518646, + "loss_total": 0.3382968306541443, + "step": 259099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.5001825094223022, + "learning_rate": 1.8694794109718566e-05, + "loss": 0.4559, + "step": 259100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.817416191101074, + "loss_rtd": 0.22993461787700653, + "loss_sent": 0.5977669358253479, + "loss_sod": 0.05000462383031845, + "loss_total": 0.8777061700820923, + "step": 259199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.833678722381592, + "loss_rtd": 0.24896228313446045, + "loss_sent": 0.12470057606697083, + "loss_sod": 0.02391231432557106, + "loss_total": 0.39757516980171204, + "step": 259199 + }, + { + "epoch": 0.0064, + "grad_norm": 2.281923770904541, + "learning_rate": 1.867005674174385e-05, + "loss": 0.4536, + "step": 259200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.6843647956848145, + "loss_rtd": 0.2394198775291443, + "loss_sent": 0.2764662504196167, + "loss_sod": 0.015383703634142876, + "loss_total": 0.5312697887420654, + "step": 259299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.640019416809082, + "loss_rtd": 0.19201642274856567, + "loss_sent": 0.14057159423828125, + "loss_sod": 0.011013489216566086, + "loss_total": 0.3436015248298645, + "step": 259299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.6997259259223938, + "learning_rate": 1.8645331993466537e-05, + "loss": 0.4352, + "step": 259300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.74385404586792, + "loss_rtd": 0.22940178215503693, + "loss_sent": 0.20560473203659058, + "loss_sod": 0.006837142165750265, + "loss_total": 0.4418436586856842, + "step": 259399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.917254447937012, + "loss_rtd": 0.2073005735874176, + "loss_sent": 0.24624089896678925, + "loss_sod": 0.023274298757314682, + "loss_total": 0.47681576013565063, + "step": 259399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.60682213306427, + "learning_rate": 1.8620619874845746e-05, + "loss": 0.4441, + "step": 259400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.354649543762207, + "loss_rtd": 0.2093132585287094, + "loss_sent": 0.0009695728658698499, + "loss_sod": 0.2127683162689209, + "loss_total": 0.4230511486530304, + "step": 259499 + }, + { + "epoch": 0.006998, + "loss_gen": 4.940433502197266, + "loss_rtd": 0.16785122454166412, + "loss_sent": 0.0041999006643891335, + "loss_sod": 0.016859542578458786, + "loss_total": 0.18891067802906036, + "step": 259499 + }, + { + "epoch": 0.007, + "grad_norm": 0.8403108716011047, + "learning_rate": 1.8595920395835532e-05, + "loss": 0.4378, + "step": 259500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.662302017211914, + "loss_rtd": 0.21706236898899078, + "loss_sent": 0.13679741322994232, + "loss_sod": 0.0185824166983366, + "loss_total": 0.37244218587875366, + "step": 259599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.968424320220947, + "loss_rtd": 0.2268364429473877, + "loss_sent": 0.19327765703201294, + "loss_sod": 0.07941228896379471, + "loss_total": 0.49952638149261475, + "step": 259599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.5976347327232361, + "learning_rate": 1.857123356638481e-05, + "loss": 0.4344, + "step": 259600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.7492146492004395, + "loss_rtd": 0.22425615787506104, + "loss_sent": 0.10312268882989883, + "loss_sod": 0.16404588520526886, + "loss_total": 0.49142470955848694, + "step": 259699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.878417015075684, + "loss_rtd": 0.23800617456436157, + "loss_sent": 0.09379882365465164, + "loss_sod": 0.10820753872394562, + "loss_total": 0.44001251459121704, + "step": 259699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.623661994934082, + "learning_rate": 1.854655939643745e-05, + "loss": 0.4282, + "step": 259700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.168807029724121, + "loss_rtd": 0.18829461932182312, + "loss_sent": 2.6924166377284564e-05, + "loss_sod": 0.05271182209253311, + "loss_total": 0.24103336036205292, + "step": 259799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.36625337600708, + "loss_rtd": 0.20180922746658325, + "loss_sent": 0.054681196808815, + "loss_sod": 0.030089624226093292, + "loss_total": 0.28658002614974976, + "step": 259799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.7371716499328613, + "learning_rate": 1.8521897895932222e-05, + "loss": 0.4344, + "step": 259800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.196185111999512, + "loss_rtd": 0.1803692877292633, + "loss_sent": 0.0004966436536051333, + "loss_sod": 0.10203516483306885, + "loss_total": 0.2829011082649231, + "step": 259899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.8036580085754395, + "loss_rtd": 0.22227637469768524, + "loss_sent": 0.12897543609142303, + "loss_sod": 0.07313300669193268, + "loss_total": 0.42438483238220215, + "step": 259899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.0138169527053833, + "learning_rate": 1.8497249074802737e-05, + "loss": 0.4485, + "step": 259900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.907511234283447, + "loss_rtd": 0.22554604709148407, + "loss_sent": 0.1830756664276123, + "loss_sod": 0.1009528711438179, + "loss_total": 0.5095745921134949, + "step": 259999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.93868350982666, + "loss_rtd": 0.22718757390975952, + "loss_sent": 0.1629742980003357, + "loss_sod": 0.023497367277741432, + "loss_total": 0.4136592447757721, + "step": 259999 + }, + { + "epoch": 0.008, + "grad_norm": 0.7653558254241943, + "learning_rate": 1.8472612942977558e-05, + "loss": 0.4424, + "step": 260000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4210491478443146, + "eval_runtime": 151.5593, + "eval_samples_per_second": 101.894, + "eval_steps_per_second": 0.798, + "step": 260000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.790144920349121, + "loss_rtd": 0.21712996065616608, + "loss_sent": 0.14015312492847443, + "loss_sod": 0.12637047469615936, + "loss_total": 0.48365354537963867, + "step": 260099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.493537902832031, + "loss_rtd": 0.20442447066307068, + "loss_sent": 0.00033282683580182493, + "loss_sod": 0.08371838182210922, + "loss_total": 0.2884756922721863, + "step": 260099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.9041178822517395, + "learning_rate": 1.8447989510380116e-05, + "loss": 0.4436, + "step": 260100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.798304080963135, + "loss_rtd": 0.23409560322761536, + "loss_sent": 0.07618724554777145, + "loss_sod": 0.09535852074623108, + "loss_total": 0.4056413769721985, + "step": 260199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.546088218688965, + "loss_rtd": 0.21580104529857635, + "loss_sent": 0.07127001136541367, + "loss_sod": 0.08646036684513092, + "loss_total": 0.37353143095970154, + "step": 260199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.2357592582702637, + "learning_rate": 1.842337878692874e-05, + "loss": 0.4302, + "step": 260200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.51783561706543, + "loss_rtd": 0.19924241304397583, + "loss_sent": 0.07657813280820847, + "loss_sod": 0.17297813296318054, + "loss_total": 0.44879868626594543, + "step": 260299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.854599475860596, + "loss_rtd": 0.23725444078445435, + "loss_sent": 0.38655468821525574, + "loss_sod": 0.06673618406057358, + "loss_total": 0.6905453205108643, + "step": 260299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.7616416215896606, + "learning_rate": 1.8398780782536602e-05, + "loss": 0.4622, + "step": 260300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.7527265548706055, + "loss_rtd": 0.21186961233615875, + "loss_sent": 0.04811631515622139, + "loss_sod": 0.02911721169948578, + "loss_total": 0.2891031503677368, + "step": 260399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.9584245681762695, + "loss_rtd": 0.23307180404663086, + "loss_sent": 0.11670845746994019, + "loss_sod": 0.03522536903619766, + "loss_total": 0.3850056231021881, + "step": 260399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.0394291877746582, + "learning_rate": 1.837419550711178e-05, + "loss": 0.4443, + "step": 260400 + }, + { + "epoch": 0.008998, + "loss_gen": 6.225304126739502, + "loss_rtd": 0.2223932147026062, + "loss_sent": 0.3683706521987915, + "loss_sod": 0.08619772642850876, + "loss_total": 0.6769616007804871, + "step": 260499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.999357223510742, + "loss_rtd": 0.2185937911272049, + "loss_sent": 0.10943987220525742, + "loss_sod": 0.10983487218618393, + "loss_total": 0.43786853551864624, + "step": 260499 + }, + { + "epoch": 0.009, + "grad_norm": 1.2048826217651367, + "learning_rate": 1.8349622970557227e-05, + "loss": 0.4661, + "step": 260500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.170165061950684, + "loss_rtd": 0.19268710911273956, + "loss_sent": 0.04321198910474777, + "loss_sod": 0.07380912452936172, + "loss_total": 0.30970823764801025, + "step": 260599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.835212230682373, + "loss_rtd": 0.226507306098938, + "loss_sent": 0.09596236795186996, + "loss_sod": 0.09300627559423447, + "loss_total": 0.4154759645462036, + "step": 260599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.9575132727622986, + "learning_rate": 1.8325063182770774e-05, + "loss": 0.4337, + "step": 260600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.77083683013916, + "loss_rtd": 0.21723388135433197, + "loss_sent": 0.2323702722787857, + "loss_sod": 0.006748107261955738, + "loss_total": 0.45635226368904114, + "step": 260699 + }, + { + "epoch": 0.009398, + "loss_gen": 6.030009746551514, + "loss_rtd": 0.23521681129932404, + "loss_sent": 0.20554371178150177, + "loss_sod": 0.04541389271616936, + "loss_total": 0.48617440462112427, + "step": 260699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.8153152465820312, + "learning_rate": 1.830051615364507e-05, + "loss": 0.4336, + "step": 260700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.599565029144287, + "loss_rtd": 0.24138863384723663, + "loss_sent": 0.0987170934677124, + "loss_sod": 0.010491971857845783, + "loss_total": 0.35059770941734314, + "step": 260799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.788488864898682, + "loss_rtd": 0.23278270661830902, + "loss_sent": 0.24295656383037567, + "loss_sod": 0.060434430837631226, + "loss_total": 0.5361737012863159, + "step": 260799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.104736566543579, + "learning_rate": 1.827598189306766e-05, + "loss": 0.4353, + "step": 260800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.7677154541015625, + "loss_rtd": 0.23435553908348083, + "loss_sent": 0.2477198988199234, + "loss_sod": 0.021104900166392326, + "loss_total": 0.5031803250312805, + "step": 260899 + }, + { + "epoch": 0.009798, + "loss_gen": 6.119668960571289, + "loss_rtd": 0.23140205442905426, + "loss_sent": 0.08643033355474472, + "loss_sod": 0.08407612890005112, + "loss_total": 0.4019085168838501, + "step": 260899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.2241572141647339, + "learning_rate": 1.8251460410920955e-05, + "loss": 0.4435, + "step": 260900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.381664752960205, + "loss_rtd": 0.1852429211139679, + "loss_sent": 0.0017346754902973771, + "loss_sod": 0.039578549563884735, + "loss_total": 0.22655615210533142, + "step": 260999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.7723517417907715, + "loss_rtd": 0.2153102159500122, + "loss_sent": 0.10258069634437561, + "loss_sod": 0.12844592332839966, + "loss_total": 0.4463368356227875, + "step": 260999 + }, + { + "epoch": 0.01, + "grad_norm": 0.9164645075798035, + "learning_rate": 1.8226951717082236e-05, + "loss": 0.4296, + "step": 261000 + }, + { + "epoch": 0.01, + "eval_loss": 0.42220592498779297, + "eval_runtime": 150.1395, + "eval_samples_per_second": 102.858, + "eval_steps_per_second": 0.806, + "step": 261000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.770923614501953, + "loss_rtd": 0.22430983185768127, + "loss_sent": 0.15456733107566833, + "loss_sod": 0.08047390729188919, + "loss_total": 0.4593510627746582, + "step": 261099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.349076747894287, + "loss_rtd": 0.1937120109796524, + "loss_sent": 0.010808099992573261, + "loss_sod": 0.06667657941579819, + "loss_total": 0.2711966931819916, + "step": 261099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.9448351263999939, + "learning_rate": 1.820245582142353e-05, + "loss": 0.4284, + "step": 261100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.942781925201416, + "loss_rtd": 0.22013670206069946, + "loss_sent": 0.2352485954761505, + "loss_sod": 0.09197963774204254, + "loss_total": 0.5473649501800537, + "step": 261199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.960268497467041, + "loss_rtd": 0.23670102655887604, + "loss_sent": 0.17174559831619263, + "loss_sod": 0.07600845396518707, + "loss_total": 0.48445507884025574, + "step": 261199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.0700087547302246, + "learning_rate": 1.8177972733811816e-05, + "loss": 0.4491, + "step": 261200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.878331184387207, + "loss_rtd": 0.23307335376739502, + "loss_sent": 0.1402657926082611, + "loss_sod": 0.01706000044941902, + "loss_total": 0.39039915800094604, + "step": 261299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.737022399902344, + "loss_rtd": 0.2178761512041092, + "loss_sent": 0.3490902781486511, + "loss_sod": 0.018797334283590317, + "loss_total": 0.5857637524604797, + "step": 261299 + }, + { + "epoch": 0.0106, + "grad_norm": 1.3782583475112915, + "learning_rate": 1.8153502464108878e-05, + "loss": 0.4297, + "step": 261300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.765710353851318, + "loss_rtd": 0.24119292199611664, + "loss_sent": 0.2397157996892929, + "loss_sod": 0.01448000967502594, + "loss_total": 0.4953887462615967, + "step": 261399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.466281890869141, + "loss_rtd": 0.25056958198547363, + "loss_sent": 0.34469687938690186, + "loss_sod": 0.007440236397087574, + "loss_total": 0.6027066707611084, + "step": 261399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.3865495920181274, + "learning_rate": 1.8129045022171354e-05, + "loss": 0.4478, + "step": 261400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.893030166625977, + "loss_rtd": 0.23232214152812958, + "loss_sent": 0.14779751002788544, + "loss_sod": 0.019544130191206932, + "loss_total": 0.3996638059616089, + "step": 261499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.448110580444336, + "loss_rtd": 0.1919664740562439, + "loss_sent": 0.013673014007508755, + "loss_sod": 0.04023461788892746, + "loss_total": 0.24587410688400269, + "step": 261499 + }, + { + "epoch": 0.011, + "grad_norm": 0.8250448703765869, + "learning_rate": 1.810460041785067e-05, + "loss": 0.4321, + "step": 261500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.925307750701904, + "loss_rtd": 0.22781497240066528, + "loss_sent": 0.3265842795372009, + "loss_sod": 0.02865752950310707, + "loss_total": 0.5830568075180054, + "step": 261599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.71012544631958, + "loss_rtd": 0.21731022000312805, + "loss_sent": 0.409697562456131, + "loss_sod": 0.04533015564084053, + "loss_total": 0.6723379492759705, + "step": 261599 + }, + { + "epoch": 0.0112, + "grad_norm": 3.7671751976013184, + "learning_rate": 1.8080168660993124e-05, + "loss": 0.4422, + "step": 261600 + }, + { + "epoch": 0.011398, + "loss_gen": 6.314149379730225, + "loss_rtd": 0.24056515097618103, + "loss_sent": 0.17447257041931152, + "loss_sod": 0.039770349860191345, + "loss_total": 0.4548080563545227, + "step": 261699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.892195701599121, + "loss_rtd": 0.21015846729278564, + "loss_sent": 0.3552035987377167, + "loss_sod": 0.05402024835348129, + "loss_total": 0.6193823218345642, + "step": 261699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.7854501008987427, + "learning_rate": 1.8055749761439822e-05, + "loss": 0.4344, + "step": 261700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.551679611206055, + "loss_rtd": 0.21133549511432648, + "loss_sent": 0.001736497855745256, + "loss_sod": 0.15424686670303345, + "loss_total": 0.3673188388347626, + "step": 261799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.148797512054443, + "loss_rtd": 0.17374002933502197, + "loss_sent": 2.777163899736479e-05, + "loss_sod": 0.13416633009910583, + "loss_total": 0.30793413519859314, + "step": 261799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.1299976110458374, + "learning_rate": 1.803134372902671e-05, + "loss": 0.4462, + "step": 261800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.854678153991699, + "loss_rtd": 0.23862631618976593, + "loss_sent": 0.1387609839439392, + "loss_sod": 0.022210635244846344, + "loss_total": 0.3995979428291321, + "step": 261899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.819527626037598, + "loss_rtd": 0.21107836067676544, + "loss_sent": 0.19799655675888062, + "loss_sod": 0.13092638552188873, + "loss_total": 0.5400012731552124, + "step": 261899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.9039456248283386, + "learning_rate": 1.8006950573584514e-05, + "loss": 0.4433, + "step": 261900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.435007572174072, + "loss_rtd": 0.19027191400527954, + "loss_sent": 6.607301475014538e-05, + "loss_sod": 0.1539205014705658, + "loss_total": 0.34425848722457886, + "step": 261999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.698180675506592, + "loss_rtd": 0.1957779973745346, + "loss_sent": 0.013719347305595875, + "loss_sod": 0.13571713864803314, + "loss_total": 0.34521448612213135, + "step": 261999 + }, + { + "epoch": 0.012, + "grad_norm": 1.1269047260284424, + "learning_rate": 1.798257030493879e-05, + "loss": 0.4432, + "step": 262000 + }, + { + "epoch": 0.012, + "eval_loss": 0.41653695702552795, + "eval_runtime": 150.2225, + "eval_samples_per_second": 102.801, + "eval_steps_per_second": 0.805, + "step": 262000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.099584102630615, + "loss_rtd": 0.22928960621356964, + "loss_sent": 0.2986092269420624, + "loss_sod": 0.04986407607793808, + "loss_total": 0.5777629017829895, + "step": 262099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.942395210266113, + "loss_rtd": 0.2586112916469574, + "loss_sent": 0.08630359172821045, + "loss_sod": 0.016102178022265434, + "loss_total": 0.36101704835891724, + "step": 262099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.3678045272827148, + "learning_rate": 1.7958202932909924e-05, + "loss": 0.4451, + "step": 262100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.783684730529785, + "loss_rtd": 0.2295273095369339, + "loss_sent": 0.4989902079105377, + "loss_sod": 0.03363502770662308, + "loss_total": 0.7621525526046753, + "step": 262199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.774890422821045, + "loss_rtd": 0.22113268077373505, + "loss_sent": 0.15401792526245117, + "loss_sod": 0.037544529885053635, + "loss_total": 0.41269513964653015, + "step": 262199 + }, + { + "epoch": 0.0124, + "grad_norm": 2.615041971206665, + "learning_rate": 1.7933848467313104e-05, + "loss": 0.4259, + "step": 262200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.877339839935303, + "loss_rtd": 0.22457431256771088, + "loss_sent": 0.28257301449775696, + "loss_sod": 0.0448489785194397, + "loss_total": 0.5519963502883911, + "step": 262299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.756425380706787, + "loss_rtd": 0.21935823559761047, + "loss_sent": 0.12605126202106476, + "loss_sod": 0.0387648269534111, + "loss_total": 0.3841743469238281, + "step": 262299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.8164938688278198, + "learning_rate": 1.7909506917958263e-05, + "loss": 0.4672, + "step": 262300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.550956726074219, + "loss_rtd": 0.21570302546024323, + "loss_sent": 0.11563875526189804, + "loss_sod": 0.07274002581834793, + "loss_total": 0.4040818214416504, + "step": 262399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.435138702392578, + "loss_rtd": 0.2403213530778885, + "loss_sent": 0.10892730951309204, + "loss_sod": 0.007292766589671373, + "loss_total": 0.3565414249897003, + "step": 262399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.2493977546691895, + "learning_rate": 1.78851782946502e-05, + "loss": 0.4354, + "step": 262400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.865581035614014, + "loss_rtd": 0.227000892162323, + "loss_sent": 0.1196775734424591, + "loss_sod": 0.03792010247707367, + "loss_total": 0.38459858298301697, + "step": 262499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.682101726531982, + "loss_rtd": 0.23281818628311157, + "loss_sent": 0.1366461217403412, + "loss_sod": 0.26875850558280945, + "loss_total": 0.6382228136062622, + "step": 262499 + }, + { + "epoch": 0.013, + "grad_norm": 1.4447888135910034, + "learning_rate": 1.78608626071885e-05, + "loss": 0.4301, + "step": 262500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.692393779754639, + "loss_rtd": 0.23628023266792297, + "loss_sent": 0.11476903408765793, + "loss_sod": 0.07787059247493744, + "loss_total": 0.42891985177993774, + "step": 262599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.288466930389404, + "loss_rtd": 0.19595885276794434, + "loss_sent": 0.040694888681173325, + "loss_sod": 0.09491953998804092, + "loss_total": 0.3315732777118683, + "step": 262599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.9164319038391113, + "learning_rate": 1.783655986536748e-05, + "loss": 0.4444, + "step": 262600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.399204730987549, + "loss_rtd": 0.19503091275691986, + "loss_sent": 0.06113690510392189, + "loss_sod": 0.06586134433746338, + "loss_total": 0.322029173374176, + "step": 262699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.644018173217773, + "loss_rtd": 0.23641744256019592, + "loss_sent": 0.29530683159828186, + "loss_sod": 0.04435921087861061, + "loss_total": 0.5760834813117981, + "step": 262699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.6195112466812134, + "learning_rate": 1.7812270078976295e-05, + "loss": 0.4474, + "step": 262700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.9916300773620605, + "loss_rtd": 0.23040005564689636, + "loss_sent": 0.12761488556861877, + "loss_sod": 0.03742263466119766, + "loss_total": 0.3954375684261322, + "step": 262799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.535221099853516, + "loss_rtd": 0.2318277657032013, + "loss_sent": 0.1480415314435959, + "loss_sod": 0.013859817758202553, + "loss_total": 0.3937291204929352, + "step": 262799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.9762578010559082, + "learning_rate": 1.778799325779888e-05, + "loss": 0.4293, + "step": 262800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.449334144592285, + "loss_rtd": 0.2052937150001526, + "loss_sent": 0.11499036848545074, + "loss_sod": 0.007230848073959351, + "loss_total": 0.3275149464607239, + "step": 262899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.600157260894775, + "loss_rtd": 0.21847011148929596, + "loss_sent": 0.2771449089050293, + "loss_sod": 0.02621576003730297, + "loss_total": 0.5218307971954346, + "step": 262899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.9009488821029663, + "learning_rate": 1.7763729411613943e-05, + "loss": 0.4548, + "step": 262900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.869690895080566, + "loss_rtd": 0.22194433212280273, + "loss_sent": 0.2831106185913086, + "loss_sod": 0.06669734418392181, + "loss_total": 0.5717523097991943, + "step": 262999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.614821910858154, + "loss_rtd": 0.2193174958229065, + "loss_sent": 0.18294291198253632, + "loss_sod": 0.017757045105099678, + "loss_total": 0.42001745104789734, + "step": 262999 + }, + { + "epoch": 0.014, + "grad_norm": 0.8729706406593323, + "learning_rate": 1.7739478550194928e-05, + "loss": 0.4354, + "step": 263000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4197606146335602, + "eval_runtime": 150.1876, + "eval_samples_per_second": 102.825, + "eval_steps_per_second": 0.806, + "step": 263000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.513040542602539, + "loss_rtd": 0.20443759858608246, + "loss_sent": 0.26258519291877747, + "loss_sod": 0.019108762964606285, + "loss_total": 0.48613154888153076, + "step": 263099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.50419807434082, + "loss_rtd": 0.18771882355213165, + "loss_sent": 0.01435436587780714, + "loss_sod": 0.06438340246677399, + "loss_total": 0.26645660400390625, + "step": 263099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.1289606094360352, + "learning_rate": 1.771524068331009e-05, + "loss": 0.4311, + "step": 263100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.496088981628418, + "loss_rtd": 0.19501489400863647, + "loss_sent": 0.07023801654577255, + "loss_sod": 0.003959077410399914, + "loss_total": 0.2692119777202606, + "step": 263199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.87968635559082, + "loss_rtd": 0.20503193140029907, + "loss_sent": 0.2680976688861847, + "loss_sod": 0.0630282461643219, + "loss_total": 0.5361578464508057, + "step": 263199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.8410677909851074, + "learning_rate": 1.7691015820722445e-05, + "loss": 0.4433, + "step": 263200 + }, + { + "epoch": 0.014598, + "loss_gen": 6.038082122802734, + "loss_rtd": 0.21350103616714478, + "loss_sent": 0.11250603944063187, + "loss_sod": 0.07663729786872864, + "loss_total": 0.4026443660259247, + "step": 263299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.946733474731445, + "loss_rtd": 0.24036170542240143, + "loss_sent": 0.0938752144575119, + "loss_sod": 0.025490881875157356, + "loss_total": 0.35972779989242554, + "step": 263299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.75941401720047, + "learning_rate": 1.7666803972189787e-05, + "loss": 0.4478, + "step": 263300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.910594463348389, + "loss_rtd": 0.22480866312980652, + "loss_sent": 0.1361338347196579, + "loss_sod": 0.10118836164474487, + "loss_total": 0.4621308445930481, + "step": 263399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.547285079956055, + "loss_rtd": 0.2045281082391739, + "loss_sent": 0.0008704860811121762, + "loss_sod": 0.07897263020277023, + "loss_total": 0.2843712270259857, + "step": 263399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.7293432950973511, + "learning_rate": 1.7642605147464604e-05, + "loss": 0.4582, + "step": 263400 + }, + { + "epoch": 0.014998, + "loss_gen": 6.056944847106934, + "loss_rtd": 0.2077402025461197, + "loss_sent": 0.2911964952945709, + "loss_sod": 0.058053940534591675, + "loss_total": 0.5569906234741211, + "step": 263499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.862424850463867, + "loss_rtd": 0.21458794176578522, + "loss_sent": 0.18894952535629272, + "loss_sod": 0.012409215793013573, + "loss_total": 0.4159466624259949, + "step": 263499 + }, + { + "epoch": 0.015, + "grad_norm": 1.3289551734924316, + "learning_rate": 1.761841935629419e-05, + "loss": 0.4299, + "step": 263500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.703284740447998, + "loss_rtd": 0.22918656468391418, + "loss_sent": 0.12732811272144318, + "loss_sod": 0.0786266028881073, + "loss_total": 0.43514126539230347, + "step": 263599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.550841331481934, + "loss_rtd": 0.21863055229187012, + "loss_sent": 0.08928092569112778, + "loss_sod": 0.05062025040388107, + "loss_total": 0.35853174328804016, + "step": 263599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.434622883796692, + "learning_rate": 1.7594246608420596e-05, + "loss": 0.4486, + "step": 263600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.5538787841796875, + "loss_rtd": 0.2212289571762085, + "loss_sent": 0.009823180735111237, + "loss_sod": 0.06804470717906952, + "loss_total": 0.29909685254096985, + "step": 263699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.048125743865967, + "loss_rtd": 0.18726715445518494, + "loss_sent": 2.449906969559379e-05, + "loss_sod": 0.14628250896930695, + "loss_total": 0.33357417583465576, + "step": 263699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.281462550163269, + "learning_rate": 1.7570086913580604e-05, + "loss": 0.4288, + "step": 263700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.608348846435547, + "loss_rtd": 0.20947839319705963, + "loss_sent": 0.24628858268260956, + "loss_sod": 0.00575850298628211, + "loss_total": 0.46152549982070923, + "step": 263799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.382534980773926, + "loss_rtd": 0.20266138017177582, + "loss_sent": 0.016107751056551933, + "loss_sod": 0.14555230736732483, + "loss_total": 0.36432141065597534, + "step": 263799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.0276693105697632, + "learning_rate": 1.7545940281505708e-05, + "loss": 0.4304, + "step": 263800 + }, + { + "epoch": 0.015798, + "loss_gen": 6.2425856590271, + "loss_rtd": 0.2131475806236267, + "loss_sent": 0.10438407212495804, + "loss_sod": 0.15258373320102692, + "loss_total": 0.4701153635978699, + "step": 263899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.785247802734375, + "loss_rtd": 0.23983649909496307, + "loss_sent": 0.13914045691490173, + "loss_sod": 0.005039280280470848, + "loss_total": 0.384016215801239, + "step": 263899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.2071220874786377, + "learning_rate": 1.752180672192219e-05, + "loss": 0.4325, + "step": 263900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.54254150390625, + "loss_rtd": 0.23202620446681976, + "loss_sent": 0.18760034441947937, + "loss_sod": 0.03395552933216095, + "loss_total": 0.4535820782184601, + "step": 263999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.472883701324463, + "loss_rtd": 0.19441907107830048, + "loss_sent": 0.02146313339471817, + "loss_sod": 0.10616981238126755, + "loss_total": 0.322052001953125, + "step": 263999 + }, + { + "epoch": 0.016, + "grad_norm": 0.976776659488678, + "learning_rate": 1.7497686244551038e-05, + "loss": 0.4453, + "step": 264000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4193997383117676, + "eval_runtime": 150.0928, + "eval_samples_per_second": 102.89, + "eval_steps_per_second": 0.806, + "step": 264000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.838901519775391, + "loss_rtd": 0.2291000485420227, + "loss_sent": 0.211017906665802, + "loss_sod": 0.03950728476047516, + "loss_total": 0.47962522506713867, + "step": 264099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.698509216308594, + "loss_rtd": 0.23480352759361267, + "loss_sent": 0.3536294400691986, + "loss_sod": 0.010975979268550873, + "loss_total": 0.5994089841842651, + "step": 264099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.448573350906372, + "learning_rate": 1.7473578859107996e-05, + "loss": 0.4548, + "step": 264100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.902383804321289, + "loss_rtd": 0.2170427292585373, + "loss_sent": 0.14012722671031952, + "loss_sod": 0.004921960178762674, + "loss_total": 0.36209189891815186, + "step": 264199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.668992042541504, + "loss_rtd": 0.2066890150308609, + "loss_sent": 0.04021590203046799, + "loss_sod": 0.16928046941757202, + "loss_total": 0.4161853790283203, + "step": 264199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.0368317365646362, + "learning_rate": 1.7449484575303483e-05, + "loss": 0.4307, + "step": 264200 + }, + { + "epoch": 0.016598, + "loss_gen": 6.014760971069336, + "loss_rtd": 0.23033563792705536, + "loss_sent": 0.161784365773201, + "loss_sod": 0.0634281262755394, + "loss_total": 0.45554810762405396, + "step": 264299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.611240386962891, + "loss_rtd": 0.21924929320812225, + "loss_sent": 0.08862759917974472, + "loss_sod": 0.0011986112222075462, + "loss_total": 0.3090755045413971, + "step": 264299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.8183719515800476, + "learning_rate": 1.742540340284269e-05, + "loss": 0.4435, + "step": 264300 + }, + { + "epoch": 0.016798, + "loss_gen": 6.161042213439941, + "loss_rtd": 0.21823842823505402, + "loss_sent": 0.1290140599012375, + "loss_sod": 0.054388489574193954, + "loss_total": 0.40164095163345337, + "step": 264399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.768616676330566, + "loss_rtd": 0.2210206836462021, + "loss_sent": 0.1565665900707245, + "loss_sod": 0.04180103540420532, + "loss_total": 0.4193883240222931, + "step": 264399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.7145875692367554, + "learning_rate": 1.7401335351425528e-05, + "loss": 0.4419, + "step": 264400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.702146530151367, + "loss_rtd": 0.23462967574596405, + "loss_sent": 0.13261710107326508, + "loss_sod": 0.050790537148714066, + "loss_total": 0.4180372953414917, + "step": 264499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.901893615722656, + "loss_rtd": 0.23588888347148895, + "loss_sent": 0.16363589465618134, + "loss_sod": 0.0845356285572052, + "loss_total": 0.4840604066848755, + "step": 264499 + }, + { + "epoch": 0.017, + "grad_norm": 0.9297024011611938, + "learning_rate": 1.7377280430746573e-05, + "loss": 0.44, + "step": 264500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.765101909637451, + "loss_rtd": 0.23689806461334229, + "loss_sent": 0.15025775134563446, + "loss_sod": 0.013489529490470886, + "loss_total": 0.40064537525177, + "step": 264599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.952276706695557, + "loss_rtd": 0.2074059545993805, + "loss_sent": 0.017983173951506615, + "loss_sod": 0.09613238275051117, + "loss_total": 0.32152149081230164, + "step": 264599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.932601273059845, + "learning_rate": 1.7353238650495156e-05, + "loss": 0.4379, + "step": 264600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.697495460510254, + "loss_rtd": 0.19966207444667816, + "loss_sent": 0.09372258186340332, + "loss_sod": 0.036938801407814026, + "loss_total": 0.3303234577178955, + "step": 264699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.791696548461914, + "loss_rtd": 0.20207498967647552, + "loss_sent": 0.4867168068885803, + "loss_sod": 0.06019856780767441, + "loss_total": 0.7489903569221497, + "step": 264699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.1397545337677002, + "learning_rate": 1.7329210020355307e-05, + "loss": 0.4252, + "step": 264700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.878633975982666, + "loss_rtd": 0.19591793417930603, + "loss_sent": 0.08669566363096237, + "loss_sod": 0.025733646005392075, + "loss_total": 0.30834725499153137, + "step": 264799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.755843162536621, + "loss_rtd": 0.24229732155799866, + "loss_sent": 0.18416090309619904, + "loss_sod": 0.018144188448786736, + "loss_total": 0.44460242986679077, + "step": 264799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.7541510462760925, + "learning_rate": 1.7305194550005776e-05, + "loss": 0.45, + "step": 264800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.819304943084717, + "loss_rtd": 0.20940464735031128, + "loss_sent": 0.14331218600273132, + "loss_sod": 0.046574972569942474, + "loss_total": 0.3992918133735657, + "step": 264899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.564984321594238, + "loss_rtd": 0.22418752312660217, + "loss_sent": 0.3626002371311188, + "loss_sod": 0.04436088353395462, + "loss_total": 0.631148636341095, + "step": 264899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.0071419477462769, + "learning_rate": 1.728119224911995e-05, + "loss": 0.4449, + "step": 264900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.87677526473999, + "loss_rtd": 0.19869399070739746, + "loss_sent": 0.5174181461334229, + "loss_sod": 0.03321876749396324, + "loss_total": 0.7493308782577515, + "step": 264999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.963561058044434, + "loss_rtd": 0.2295539677143097, + "loss_sent": 0.06396544724702835, + "loss_sod": 0.03682500496506691, + "loss_total": 0.33034440875053406, + "step": 264999 + }, + { + "epoch": 0.018, + "grad_norm": 1.95595383644104, + "learning_rate": 1.7257203127365972e-05, + "loss": 0.4391, + "step": 265000 + }, + { + "epoch": 0.018, + "eval_loss": 0.41752806305885315, + "eval_runtime": 150.3532, + "eval_samples_per_second": 102.711, + "eval_steps_per_second": 0.805, + "step": 265000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.199895858764648, + "loss_rtd": 0.18209359049797058, + "loss_sent": 0.012325072661042213, + "loss_sod": 0.2070392221212387, + "loss_total": 0.40145787596702576, + "step": 265099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.887097358703613, + "loss_rtd": 0.2314467579126358, + "loss_sent": 0.17575231194496155, + "loss_sod": 0.007308542262762785, + "loss_total": 0.4145076274871826, + "step": 265099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.245650291442871, + "learning_rate": 1.7233227194406665e-05, + "loss": 0.4219, + "step": 265100 + }, + { + "epoch": 0.018398, + "loss_gen": 6.223198413848877, + "loss_rtd": 0.24890851974487305, + "loss_sent": 0.09819793701171875, + "loss_sod": 0.07153752446174622, + "loss_total": 0.418643981218338, + "step": 265199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.632750034332275, + "loss_rtd": 0.22274403274059296, + "loss_sent": 0.11046051979064941, + "loss_sod": 0.06312073767185211, + "loss_total": 0.3963252902030945, + "step": 265199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.1298741102218628, + "learning_rate": 1.7209264459899537e-05, + "loss": 0.438, + "step": 265200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.852601051330566, + "loss_rtd": 0.206861674785614, + "loss_sent": 0.06961745023727417, + "loss_sod": 0.013976778835058212, + "loss_total": 0.2904559075832367, + "step": 265299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.838532447814941, + "loss_rtd": 0.230842724442482, + "loss_sent": 0.293140172958374, + "loss_sod": 0.025453124195337296, + "loss_total": 0.5494360327720642, + "step": 265299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.7868773341178894, + "learning_rate": 1.7185314933496744e-05, + "loss": 0.4466, + "step": 265300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.1168532371521, + "loss_rtd": 0.1774856299161911, + "loss_sent": 0.0019256524974480271, + "loss_sod": 0.016026396304368973, + "loss_total": 0.19543766975402832, + "step": 265399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.6769561767578125, + "loss_rtd": 0.22008772194385529, + "loss_sent": 0.148380845785141, + "loss_sod": 0.05477447807788849, + "loss_total": 0.42324304580688477, + "step": 265399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.9292224049568176, + "learning_rate": 1.7161378624845175e-05, + "loss": 0.4324, + "step": 265400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.493320941925049, + "loss_rtd": 0.20276357233524323, + "loss_sent": 0.22482146322727203, + "loss_sod": 0.002522670431062579, + "loss_total": 0.4301077127456665, + "step": 265499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.925997734069824, + "loss_rtd": 0.2251824587583542, + "loss_sent": 0.11820624768733978, + "loss_sod": 0.028906188905239105, + "loss_total": 0.37229490280151367, + "step": 265499 + }, + { + "epoch": 0.019, + "grad_norm": 1.5733423233032227, + "learning_rate": 1.7137455543586372e-05, + "loss": 0.4297, + "step": 265500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.135681629180908, + "loss_rtd": 0.17314018309116364, + "loss_sent": 0.00036163756158202887, + "loss_sod": 0.09965495020151138, + "loss_total": 0.2731567919254303, + "step": 265599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.846146106719971, + "loss_rtd": 0.2163507491350174, + "loss_sent": 0.14655791223049164, + "loss_sod": 0.09526507556438446, + "loss_total": 0.4581737518310547, + "step": 265599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.3445922136306763, + "learning_rate": 1.711354569935656e-05, + "loss": 0.4375, + "step": 265600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.286101818084717, + "loss_rtd": 0.1882425844669342, + "loss_sent": 5.8458357671042904e-05, + "loss_sod": 0.04162990301847458, + "loss_total": 0.22993095219135284, + "step": 265699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.513396263122559, + "loss_rtd": 0.19958806037902832, + "loss_sent": 0.06855539232492447, + "loss_sod": 0.1291506141424179, + "loss_total": 0.3972940742969513, + "step": 265699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.7645038366317749, + "learning_rate": 1.7089649101786588e-05, + "loss": 0.4433, + "step": 265700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.727639675140381, + "loss_rtd": 0.23569880425930023, + "loss_sent": 0.39408543705940247, + "loss_sod": 0.05898343026638031, + "loss_total": 0.688767671585083, + "step": 265799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.962072372436523, + "loss_rtd": 0.22397878766059875, + "loss_sent": 0.05560656636953354, + "loss_sod": 0.05383361876010895, + "loss_total": 0.33341899514198303, + "step": 265799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.8413041830062866, + "learning_rate": 1.7065765760502022e-05, + "loss": 0.429, + "step": 265800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.179636478424072, + "loss_rtd": 0.1974020004272461, + "loss_sent": 0.03730133920907974, + "loss_sod": 0.11626625061035156, + "loss_total": 0.3509695827960968, + "step": 265899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.240450859069824, + "loss_rtd": 0.18879199028015137, + "loss_sent": 0.07717913389205933, + "loss_sod": 0.0672200545668602, + "loss_total": 0.3331911861896515, + "step": 265899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.9509782195091248, + "learning_rate": 1.7041895685123087e-05, + "loss": 0.4245, + "step": 265900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.625738620758057, + "loss_rtd": 0.22091302275657654, + "loss_sent": 0.27037566900253296, + "loss_sod": 0.006796187721192837, + "loss_total": 0.4980848729610443, + "step": 265999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.942508697509766, + "loss_rtd": 0.2323511242866516, + "loss_sent": 0.1803351491689682, + "loss_sod": 0.11847847700119019, + "loss_total": 0.5311647653579712, + "step": 265999 + }, + { + "epoch": 0.02, + "grad_norm": 1.4394222497940063, + "learning_rate": 1.7018038885264615e-05, + "loss": 0.4177, + "step": 266000 + }, + { + "epoch": 0.02, + "eval_loss": 0.42416712641716003, + "eval_runtime": 151.8511, + "eval_samples_per_second": 101.698, + "eval_steps_per_second": 0.797, + "step": 266000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.478032112121582, + "loss_rtd": 0.2207171618938446, + "loss_sent": 0.11589272320270538, + "loss_sod": 0.030967382714152336, + "loss_total": 0.3675772547721863, + "step": 266099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.847688674926758, + "loss_rtd": 0.20218083262443542, + "loss_sent": 0.16010279953479767, + "loss_sod": 0.04950456693768501, + "loss_total": 0.4117882251739502, + "step": 266099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.408447265625, + "learning_rate": 1.6994195370536135e-05, + "loss": 0.4339, + "step": 266100 + }, + { + "epoch": 0.020398, + "loss_gen": 6.003078937530518, + "loss_rtd": 0.23656605184078217, + "loss_sent": 0.19200684130191803, + "loss_sod": 0.02124079503118992, + "loss_total": 0.4498136639595032, + "step": 266199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.59977388381958, + "loss_rtd": 0.2051219791173935, + "loss_sent": 0.11943111568689346, + "loss_sod": 0.018068883568048477, + "loss_total": 0.34262198209762573, + "step": 266199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.6684094667434692, + "learning_rate": 1.697036515054181e-05, + "loss": 0.4203, + "step": 266200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.6577277183532715, + "loss_rtd": 0.2120039314031601, + "loss_sent": 0.12950928509235382, + "loss_sod": 0.029606901109218597, + "loss_total": 0.3711200952529907, + "step": 266299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.689751148223877, + "loss_rtd": 0.22746476531028748, + "loss_sent": 0.2197207361459732, + "loss_sod": 0.08924826979637146, + "loss_total": 0.536433756351471, + "step": 266299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.1296987533569336, + "learning_rate": 1.694654823488047e-05, + "loss": 0.4351, + "step": 266300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.731989860534668, + "loss_rtd": 0.21588566899299622, + "loss_sent": 0.10215010493993759, + "loss_sod": 0.06963024288415909, + "loss_total": 0.3876660168170929, + "step": 266399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.85683536529541, + "loss_rtd": 0.21849097311496735, + "loss_sent": 0.20090869069099426, + "loss_sod": 0.0825258195400238, + "loss_total": 0.5019254684448242, + "step": 266399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.3961708545684814, + "learning_rate": 1.692274463314553e-05, + "loss": 0.4575, + "step": 266400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.548666954040527, + "loss_rtd": 0.22105520963668823, + "loss_sent": 0.1915527731180191, + "loss_sod": 0.03421821817755699, + "loss_total": 0.4468262195587158, + "step": 266499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.296370506286621, + "loss_rtd": 0.1955137401819229, + "loss_sent": 0.031068984419107437, + "loss_sod": 0.10109543800354004, + "loss_total": 0.3276781439781189, + "step": 266499 + }, + { + "epoch": 0.021, + "grad_norm": 0.9188606142997742, + "learning_rate": 1.68989543549251e-05, + "loss": 0.4355, + "step": 266500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.99031400680542, + "loss_rtd": 0.2285676747560501, + "loss_sent": 0.18636761605739594, + "loss_sod": 0.03560897707939148, + "loss_total": 0.4505442976951599, + "step": 266599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.960809230804443, + "loss_rtd": 0.20881226658821106, + "loss_sent": 0.0602426715195179, + "loss_sod": 0.1764306277036667, + "loss_total": 0.44548556208610535, + "step": 266599 + }, + { + "epoch": 0.0212, + "grad_norm": 2.0185892581939697, + "learning_rate": 1.6875177409801897e-05, + "loss": 0.4338, + "step": 266600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.464375972747803, + "loss_rtd": 0.19333864748477936, + "loss_sent": 0.023246033117175102, + "loss_sod": 0.08310497552156448, + "loss_total": 0.2996896505355835, + "step": 266699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.380824089050293, + "loss_rtd": 0.1889808624982834, + "loss_sent": 0.08529441058635712, + "loss_sod": 0.0023418976925313473, + "loss_total": 0.276617169380188, + "step": 266699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.6752312183380127, + "learning_rate": 1.685141380735329e-05, + "loss": 0.4415, + "step": 266700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.404900074005127, + "loss_rtd": 0.22304946184158325, + "loss_sent": 0.00990715716034174, + "loss_sod": 0.0994158387184143, + "loss_total": 0.3323724567890167, + "step": 266799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.072579383850098, + "loss_rtd": 0.18661810457706451, + "loss_sent": 2.7130905436933972e-05, + "loss_sod": 0.1396275758743286, + "loss_total": 0.32627278566360474, + "step": 266799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.9894542098045349, + "learning_rate": 1.682766355715122e-05, + "loss": 0.45, + "step": 266800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.838479995727539, + "loss_rtd": 0.21540221571922302, + "loss_sent": 0.3221489191055298, + "loss_sod": 0.012889444828033447, + "loss_total": 0.5504405498504639, + "step": 266899 + }, + { + "epoch": 0.021798, + "loss_gen": 6.1297125816345215, + "loss_rtd": 0.22166520357131958, + "loss_sent": 0.15729407966136932, + "loss_sod": 0.019165389239788055, + "loss_total": 0.39812469482421875, + "step": 266899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.8149746656417847, + "learning_rate": 1.6803926668762298e-05, + "loss": 0.4442, + "step": 266900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.621804714202881, + "loss_rtd": 0.23044511675834656, + "loss_sent": 0.23261255025863647, + "loss_sod": 0.02542426437139511, + "loss_total": 0.48848193883895874, + "step": 266999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.649903297424316, + "loss_rtd": 0.21168476343154907, + "loss_sent": 0.059327382594347, + "loss_sod": 0.11903805285692215, + "loss_total": 0.3900502026081085, + "step": 266999 + }, + { + "epoch": 0.022, + "grad_norm": 1.2215574979782104, + "learning_rate": 1.6780203151747742e-05, + "loss": 0.4099, + "step": 267000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4148196280002594, + "eval_runtime": 150.5299, + "eval_samples_per_second": 102.591, + "eval_steps_per_second": 0.804, + "step": 267000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.30557918548584, + "loss_rtd": 0.1764380931854248, + "loss_sent": 2.36851137742633e-05, + "loss_sod": 0.050875790417194366, + "loss_total": 0.2273375540971756, + "step": 267099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.727357387542725, + "loss_rtd": 0.19363459944725037, + "loss_sent": 0.5238352417945862, + "loss_sod": 0.13281087577342987, + "loss_total": 0.8502807021141052, + "step": 267099 + }, + { + "epoch": 0.0222, + "grad_norm": 2.4402108192443848, + "learning_rate": 1.6756493015663403e-05, + "loss": 0.4301, + "step": 267100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.480380535125732, + "loss_rtd": 0.23758484423160553, + "loss_sent": 0.12472793459892273, + "loss_sod": 0.014917224645614624, + "loss_total": 0.3772300183773041, + "step": 267199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.757543563842773, + "loss_rtd": 0.22850534319877625, + "loss_sent": 0.16525429487228394, + "loss_sod": 0.03858501464128494, + "loss_total": 0.4323446452617645, + "step": 267199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.9831203818321228, + "learning_rate": 1.6732796270059693e-05, + "loss": 0.4452, + "step": 267200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.9338250160217285, + "loss_rtd": 0.22473864257335663, + "loss_sent": 0.10696947574615479, + "loss_sod": 0.07062780857086182, + "loss_total": 0.4023359417915344, + "step": 267299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.790135860443115, + "loss_rtd": 0.23071084916591644, + "loss_sent": 0.15764625370502472, + "loss_sod": 0.14289075136184692, + "loss_total": 0.5312478542327881, + "step": 267299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.2271019220352173, + "learning_rate": 1.6709112924481657e-05, + "loss": 0.4342, + "step": 267300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.565337657928467, + "loss_rtd": 0.20565111935138702, + "loss_sent": 0.041761934757232666, + "loss_sod": 0.15724752843379974, + "loss_total": 0.40466058254241943, + "step": 267399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.711415767669678, + "loss_rtd": 0.20858651399612427, + "loss_sent": 0.15100525319576263, + "loss_sod": 0.027561167255043983, + "loss_total": 0.38715294003486633, + "step": 267399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.9476865530014038, + "learning_rate": 1.6685442988468973e-05, + "loss": 0.436, + "step": 267400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.75596284866333, + "loss_rtd": 0.2451847940683365, + "loss_sent": 0.17701643705368042, + "loss_sod": 0.04414738714694977, + "loss_total": 0.4663486182689667, + "step": 267499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.96491813659668, + "loss_rtd": 0.22007696330547333, + "loss_sent": 0.22976228594779968, + "loss_sod": 0.014164028689265251, + "loss_total": 0.4640032649040222, + "step": 267499 + }, + { + "epoch": 0.023, + "grad_norm": 1.0047032833099365, + "learning_rate": 1.6661786471555858e-05, + "loss": 0.4352, + "step": 267500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.881296634674072, + "loss_rtd": 0.2165652960538864, + "loss_sent": 0.03391829505562782, + "loss_sod": 0.07588096708059311, + "loss_total": 0.32636454701423645, + "step": 267599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.825904846191406, + "loss_rtd": 0.2303563356399536, + "loss_sent": 0.32303234934806824, + "loss_sod": 0.03734510391950607, + "loss_total": 0.5907337665557861, + "step": 267599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.319616436958313, + "learning_rate": 1.663814338327116e-05, + "loss": 0.4351, + "step": 267600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.730740547180176, + "loss_rtd": 0.2254457324743271, + "loss_sent": 0.2892443835735321, + "loss_sod": 0.08354109525680542, + "loss_total": 0.5982311964035034, + "step": 267699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.685034275054932, + "loss_rtd": 0.21458850800991058, + "loss_sent": 0.09530942142009735, + "loss_sod": 0.061370886862277985, + "loss_total": 0.3712688088417053, + "step": 267699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.315625548362732, + "learning_rate": 1.661451373313832e-05, + "loss": 0.4301, + "step": 267700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.6471662521362305, + "loss_rtd": 0.2073613703250885, + "loss_sent": 0.1655789017677307, + "loss_sod": 0.026781873777508736, + "loss_total": 0.399722158908844, + "step": 267799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.882993221282959, + "loss_rtd": 0.23457126319408417, + "loss_sent": 0.07836773246526718, + "loss_sod": 0.01736392267048359, + "loss_total": 0.3303029239177704, + "step": 267799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.6371013522148132, + "learning_rate": 1.659089753067537e-05, + "loss": 0.4307, + "step": 267800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.863724708557129, + "loss_rtd": 0.22295472025871277, + "loss_sent": 0.23114438354969025, + "loss_sod": 0.013383528217673302, + "loss_total": 0.46748262643814087, + "step": 267899 + }, + { + "epoch": 0.023798, + "loss_gen": 6.320718288421631, + "loss_rtd": 0.23582635819911957, + "loss_sent": 0.10020633786916733, + "loss_sod": 0.21261908113956451, + "loss_total": 0.5486517548561096, + "step": 267899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.1309590339660645, + "learning_rate": 1.656729478539488e-05, + "loss": 0.434, + "step": 267900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.900601387023926, + "loss_rtd": 0.20932823419570923, + "loss_sent": 0.2272641807794571, + "loss_sod": 0.12037929892539978, + "loss_total": 0.5569717288017273, + "step": 267999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.909946441650391, + "loss_rtd": 0.21609345078468323, + "loss_sent": 0.16744546592235565, + "loss_sod": 0.02023504301905632, + "loss_total": 0.4037739634513855, + "step": 267999 + }, + { + "epoch": 0.024, + "grad_norm": 1.2057437896728516, + "learning_rate": 1.6543705506804057e-05, + "loss": 0.4413, + "step": 268000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4166734218597412, + "eval_runtime": 150.2464, + "eval_samples_per_second": 102.784, + "eval_steps_per_second": 0.805, + "step": 268000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.234063625335693, + "loss_rtd": 0.18069347739219666, + "loss_sent": 0.0002493959618732333, + "loss_sod": 0.12024861574172974, + "loss_total": 0.3011914789676666, + "step": 268099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.2875213623046875, + "loss_rtd": 0.1950591504573822, + "loss_sent": 0.01434609480202198, + "loss_sod": 0.0864410549402237, + "loss_total": 0.2958463132381439, + "step": 268099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.6516631245613098, + "learning_rate": 1.6520129704404658e-05, + "loss": 0.4361, + "step": 268100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.475121974945068, + "loss_rtd": 0.2370995432138443, + "loss_sent": 0.21873275935649872, + "loss_sod": 0.010058843530714512, + "loss_total": 0.46589115262031555, + "step": 268199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.579318523406982, + "loss_rtd": 0.23786893486976624, + "loss_sent": 0.09398513287305832, + "loss_sod": 0.014935585670173168, + "loss_total": 0.3467896580696106, + "step": 268199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.305841326713562, + "learning_rate": 1.6496567387693018e-05, + "loss": 0.4131, + "step": 268200 + }, + { + "epoch": 0.024598, + "loss_gen": 6.101864337921143, + "loss_rtd": 0.19255444407463074, + "loss_sent": 0.35791492462158203, + "loss_sod": 0.17142772674560547, + "loss_total": 0.7218971252441406, + "step": 268299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.433445453643799, + "loss_rtd": 0.19646601378917694, + "loss_sent": 0.0854458138346672, + "loss_sod": 0.03310718014836311, + "loss_total": 0.31501901149749756, + "step": 268299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.7531074285507202, + "learning_rate": 1.647301856616002e-05, + "loss": 0.4563, + "step": 268300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.8197102546691895, + "loss_rtd": 0.21163439750671387, + "loss_sent": 0.13387049734592438, + "loss_sod": 0.035824965685606, + "loss_total": 0.38132986426353455, + "step": 268399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.995186805725098, + "loss_rtd": 0.21819134056568146, + "loss_sent": 0.26021432876586914, + "loss_sod": 0.05272424966096878, + "loss_total": 0.5311299562454224, + "step": 268399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.9215118885040283, + "learning_rate": 1.644948324929113e-05, + "loss": 0.4256, + "step": 268400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.807307243347168, + "loss_rtd": 0.21854138374328613, + "loss_sent": 0.10618340969085693, + "loss_sod": 0.035427503287792206, + "loss_total": 0.36015230417251587, + "step": 268499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.153491497039795, + "loss_rtd": 0.20173749327659607, + "loss_sent": 2.4461814973619767e-05, + "loss_sod": 0.125558003783226, + "loss_total": 0.3273199498653412, + "step": 268499 + }, + { + "epoch": 0.025, + "grad_norm": 0.9242141842842102, + "learning_rate": 1.6425961446566373e-05, + "loss": 0.4461, + "step": 268500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.697330951690674, + "loss_rtd": 0.2073066085577011, + "loss_sent": 0.007205627392977476, + "loss_sod": 0.06607302278280258, + "loss_total": 0.28058525919914246, + "step": 268599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.58998441696167, + "loss_rtd": 0.23148037493228912, + "loss_sent": 0.18772272765636444, + "loss_sod": 0.006988976616412401, + "loss_total": 0.4261920750141144, + "step": 268599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.9894757270812988, + "learning_rate": 1.640245316746035e-05, + "loss": 0.4383, + "step": 268600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.730067729949951, + "loss_rtd": 0.2177078127861023, + "loss_sent": 0.2573537528514862, + "loss_sod": 0.038831986486911774, + "loss_total": 0.5138935446739197, + "step": 268699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.883787631988525, + "loss_rtd": 0.21808990836143494, + "loss_sent": 0.4537474811077118, + "loss_sod": 0.010962520726025105, + "loss_total": 0.6827999353408813, + "step": 268699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8552432656288147, + "learning_rate": 1.6378958421442153e-05, + "loss": 0.4247, + "step": 268700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.448304176330566, + "loss_rtd": 0.21249639987945557, + "loss_sent": 0.0010201549157500267, + "loss_sod": 0.07972157001495361, + "loss_total": 0.2932381331920624, + "step": 268799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.408642768859863, + "loss_rtd": 0.18969331681728363, + "loss_sent": 0.008647771552205086, + "loss_sod": 0.052934639155864716, + "loss_total": 0.2512757182121277, + "step": 268799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.7549141645431519, + "learning_rate": 1.635547721797549e-05, + "loss": 0.4484, + "step": 268800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.7269816398620605, + "loss_rtd": 0.19920344650745392, + "loss_sent": 0.2688048779964447, + "loss_sod": 0.013905920088291168, + "loss_total": 0.4819142520427704, + "step": 268899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.738521575927734, + "loss_rtd": 0.20524026453495026, + "loss_sent": 0.046123404055833817, + "loss_sod": 0.009331931360065937, + "loss_total": 0.26069560647010803, + "step": 268899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.1248387098312378, + "learning_rate": 1.633200956651859e-05, + "loss": 0.4308, + "step": 268900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.629986763000488, + "loss_rtd": 0.20220881700515747, + "loss_sent": 0.13901971280574799, + "loss_sod": 0.012678693048655987, + "loss_total": 0.3539072275161743, + "step": 268999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.598387241363525, + "loss_rtd": 0.20648714900016785, + "loss_sent": 0.08330265432596207, + "loss_sod": 0.017148887738585472, + "loss_total": 0.30693867802619934, + "step": 268999 + }, + { + "epoch": 0.026, + "grad_norm": 0.48872238397598267, + "learning_rate": 1.6308555476524194e-05, + "loss": 0.4399, + "step": 269000 + }, + { + "epoch": 0.026, + "eval_loss": 0.41795945167541504, + "eval_runtime": 150.0528, + "eval_samples_per_second": 102.917, + "eval_steps_per_second": 0.806, + "step": 269000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.742762088775635, + "loss_rtd": 0.2292107194662094, + "loss_sent": 0.17488723993301392, + "loss_sod": 0.027320269495248795, + "loss_total": 0.431418240070343, + "step": 269099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.613757133483887, + "loss_rtd": 0.24387536942958832, + "loss_sent": 0.39201292395591736, + "loss_sod": 0.11247213184833527, + "loss_total": 0.7483603954315186, + "step": 269099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.2894322872161865, + "learning_rate": 1.628511495743963e-05, + "loss": 0.4444, + "step": 269100 + }, + { + "epoch": 0.026398, + "loss_gen": 6.1032843589782715, + "loss_rtd": 0.23158025741577148, + "loss_sent": 0.21154244244098663, + "loss_sod": 0.018028700724244118, + "loss_total": 0.4611514210700989, + "step": 269199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.8828535079956055, + "loss_rtd": 0.20052839815616608, + "loss_sent": 0.2413274645805359, + "loss_sod": 0.01449059322476387, + "loss_total": 0.45634645223617554, + "step": 269199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.0119686126708984, + "learning_rate": 1.6261688018706724e-05, + "loss": 0.4329, + "step": 269200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.8638834953308105, + "loss_rtd": 0.22481068968772888, + "loss_sent": 0.2950937747955322, + "loss_sod": 0.007474738638848066, + "loss_total": 0.5273792147636414, + "step": 269299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.876376152038574, + "loss_rtd": 0.22316375374794006, + "loss_sent": 0.04835449531674385, + "loss_sod": 0.009289991110563278, + "loss_total": 0.2808082401752472, + "step": 269299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.6763526201248169, + "learning_rate": 1.6238274669761866e-05, + "loss": 0.448, + "step": 269300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.312898635864258, + "loss_rtd": 0.18635858595371246, + "loss_sent": 6.018896237947047e-05, + "loss_sod": 0.09727039188146591, + "loss_total": 0.2836891710758209, + "step": 269399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.71761417388916, + "loss_rtd": 0.21085000038146973, + "loss_sent": 0.14621832966804504, + "loss_sod": 0.06339356303215027, + "loss_total": 0.42046189308166504, + "step": 269399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.9217313528060913, + "learning_rate": 1.6214874920035917e-05, + "loss": 0.4303, + "step": 269400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.7887749671936035, + "loss_rtd": 0.21253244578838348, + "loss_sent": 0.07061567157506943, + "loss_sod": 0.06723160296678543, + "loss_total": 0.35037973523139954, + "step": 269499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.745434761047363, + "loss_rtd": 0.2194966971874237, + "loss_sent": 0.0950508713722229, + "loss_sod": 0.01831759139895439, + "loss_total": 0.3328651785850525, + "step": 269499 + }, + { + "epoch": 0.027, + "grad_norm": 0.7986164093017578, + "learning_rate": 1.619148877895431e-05, + "loss": 0.4529, + "step": 269500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.825835704803467, + "loss_rtd": 0.2061392366886139, + "loss_sent": 0.021851060912013054, + "loss_sod": 0.013518210500478745, + "loss_total": 0.24150851368904114, + "step": 269599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.209312438964844, + "loss_rtd": 0.1861337423324585, + "loss_sent": 0.003053348045796156, + "loss_sod": 0.1544070541858673, + "loss_total": 0.34359413385391235, + "step": 269599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.8490173816680908, + "learning_rate": 1.6168116255936994e-05, + "loss": 0.4473, + "step": 269600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.419999599456787, + "loss_rtd": 0.17292170226573944, + "loss_sent": 0.0003229019930586219, + "loss_sod": 0.1488613784313202, + "loss_total": 0.3221059739589691, + "step": 269699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.54251766204834, + "loss_rtd": 0.19121377170085907, + "loss_sent": 0.07150860875844955, + "loss_sod": 0.10690651088953018, + "loss_total": 0.36962890625, + "step": 269699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.1455020904541016, + "learning_rate": 1.6144757360398395e-05, + "loss": 0.4556, + "step": 269700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.758680820465088, + "loss_rtd": 0.20524001121520996, + "loss_sent": 0.4888244867324829, + "loss_sod": 0.007920067757368088, + "loss_total": 0.7019845843315125, + "step": 269799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.787389755249023, + "loss_rtd": 0.22080378234386444, + "loss_sent": 0.10038325935602188, + "loss_sod": 0.1250607669353485, + "loss_total": 0.44624778628349304, + "step": 269799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.2480300664901733, + "learning_rate": 1.6121412101747492e-05, + "loss": 0.4397, + "step": 269800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.59853982925415, + "loss_rtd": 0.20155419409275055, + "loss_sent": 0.05726367607712746, + "loss_sod": 0.02039262093603611, + "loss_total": 0.27921050786972046, + "step": 269899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.393312931060791, + "loss_rtd": 0.169960156083107, + "loss_sent": 0.020561737939715385, + "loss_sod": 0.04915458336472511, + "loss_total": 0.23967647552490234, + "step": 269899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.6546497344970703, + "learning_rate": 1.609808048938773e-05, + "loss": 0.4419, + "step": 269900 + }, + { + "epoch": 0.027998, + "loss_gen": 6.180161952972412, + "loss_rtd": 0.22595210373401642, + "loss_sent": 0.15271803736686707, + "loss_sod": 0.09914088249206543, + "loss_total": 0.4778110086917877, + "step": 269999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.168148517608643, + "loss_rtd": 0.20173440873622894, + "loss_sent": 0.005457804538309574, + "loss_sod": 0.11283750087022781, + "loss_total": 0.32002973556518555, + "step": 269999 + }, + { + "epoch": 0.028, + "grad_norm": 1.3075617551803589, + "learning_rate": 1.6074762532717093e-05, + "loss": 0.4638, + "step": 270000 + }, + { + "epoch": 0.028, + "eval_loss": 0.4133795201778412, + "eval_runtime": 150.3009, + "eval_samples_per_second": 102.747, + "eval_steps_per_second": 0.805, + "step": 270000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.982174873352051, + "loss_rtd": 0.22061192989349365, + "loss_sent": 0.1923549324274063, + "loss_sod": 0.04667946323752403, + "loss_total": 0.4596463441848755, + "step": 270099 + }, + { + "epoch": 0.028198, + "loss_gen": 6.057941436767578, + "loss_rtd": 0.21350853145122528, + "loss_sent": 0.08760643750429153, + "loss_sod": 0.08947840332984924, + "loss_total": 0.39059334993362427, + "step": 270099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.9949202537536621, + "learning_rate": 1.605145824112805e-05, + "loss": 0.412, + "step": 270100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.641507625579834, + "loss_rtd": 0.1965770423412323, + "loss_sent": 0.08672265708446503, + "loss_sod": 0.05908845737576485, + "loss_total": 0.3423881530761719, + "step": 270199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.62833833694458, + "loss_rtd": 0.2123500108718872, + "loss_sent": 0.06860460340976715, + "loss_sod": 0.16088838875293732, + "loss_total": 0.4418429732322693, + "step": 270199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.074608564376831, + "learning_rate": 1.602816762400758e-05, + "loss": 0.4417, + "step": 270200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.634223461151123, + "loss_rtd": 0.2132130265235901, + "loss_sent": 0.058612026274204254, + "loss_sod": 0.09744498133659363, + "loss_total": 0.3692700266838074, + "step": 270299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.164697647094727, + "loss_rtd": 0.18826591968536377, + "loss_sent": 0.004804539028555155, + "loss_sod": 0.13274680078029633, + "loss_total": 0.3258172869682312, + "step": 270299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.1823171377182007, + "learning_rate": 1.6004890690737112e-05, + "loss": 0.457, + "step": 270300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.896481990814209, + "loss_rtd": 0.22735892236232758, + "loss_sent": 0.08205220848321915, + "loss_sod": 0.1463623344898224, + "loss_total": 0.4557734429836273, + "step": 270399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.64373779296875, + "loss_rtd": 0.22357432544231415, + "loss_sent": 0.2212490439414978, + "loss_sod": 0.017883561551570892, + "loss_total": 0.46270692348480225, + "step": 270399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.1900618076324463, + "learning_rate": 1.5981627450692614e-05, + "loss": 0.4442, + "step": 270400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.767705917358398, + "loss_rtd": 0.22999972105026245, + "loss_sent": 0.15789847075939178, + "loss_sod": 0.14105084538459778, + "loss_total": 0.5289490222930908, + "step": 270499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.778688907623291, + "loss_rtd": 0.2190682291984558, + "loss_sent": 0.40757206082344055, + "loss_sod": 0.02697756141424179, + "loss_total": 0.6536178588867188, + "step": 270499 + }, + { + "epoch": 0.029, + "grad_norm": 1.4509148597717285, + "learning_rate": 1.5958377913244527e-05, + "loss": 0.4339, + "step": 270500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.53188943862915, + "loss_rtd": 0.19379432499408722, + "loss_sent": 0.09786945581436157, + "loss_sod": 0.08292634785175323, + "loss_total": 0.374590128660202, + "step": 270599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.495702266693115, + "loss_rtd": 0.20815308392047882, + "loss_sent": 0.01570098102092743, + "loss_sod": 0.1423245370388031, + "loss_total": 0.36617863178253174, + "step": 270599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.8742807507514954, + "learning_rate": 1.5935142087757727e-05, + "loss": 0.4349, + "step": 270600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.954782962799072, + "loss_rtd": 0.20175915956497192, + "loss_sent": 0.12556029856204987, + "loss_sod": 0.018054986372590065, + "loss_total": 0.3453744649887085, + "step": 270699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.508944511413574, + "loss_rtd": 0.19804103672504425, + "loss_sent": 0.00024759970256127417, + "loss_sod": 0.03680426999926567, + "loss_total": 0.2350929081439972, + "step": 270699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.7660701870918274, + "learning_rate": 1.5911919983591617e-05, + "loss": 0.4444, + "step": 270700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.674312591552734, + "loss_rtd": 0.22166262567043304, + "loss_sent": 0.0702732726931572, + "loss_sod": 0.008209237828850746, + "loss_total": 0.30014514923095703, + "step": 270799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.849605560302734, + "loss_rtd": 0.22538764774799347, + "loss_sent": 0.2957608103752136, + "loss_sod": 0.013990513049066067, + "loss_total": 0.5351389646530151, + "step": 270799 + }, + { + "epoch": 0.0296, + "grad_norm": 0.6954144239425659, + "learning_rate": 1.5888711610100064e-05, + "loss": 0.4263, + "step": 270800 + }, + { + "epoch": 0.029798, + "loss_gen": 6.219542503356934, + "loss_rtd": 0.2291915863752365, + "loss_sent": 0.15675795078277588, + "loss_sod": 0.08937126398086548, + "loss_total": 0.47532081604003906, + "step": 270899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.773324966430664, + "loss_rtd": 0.21983198821544647, + "loss_sent": 0.11543253064155579, + "loss_sod": 0.0073675187304615974, + "loss_total": 0.3426320552825928, + "step": 270899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.270186185836792, + "learning_rate": 1.586551697663141e-05, + "loss": 0.4308, + "step": 270900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.716264247894287, + "loss_rtd": 0.21419492363929749, + "loss_sent": 0.38679981231689453, + "loss_sod": 0.019500968977808952, + "loss_total": 0.6204956769943237, + "step": 270999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.190142631530762, + "loss_rtd": 0.18350224196910858, + "loss_sent": 0.007931055501103401, + "loss_sod": 0.023862555623054504, + "loss_total": 0.21529585123062134, + "step": 270999 + }, + { + "epoch": 0.03, + "grad_norm": 1.137611985206604, + "learning_rate": 1.5842336092528427e-05, + "loss": 0.4465, + "step": 271000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4173237383365631, + "eval_runtime": 150.629, + "eval_samples_per_second": 102.523, + "eval_steps_per_second": 0.803, + "step": 271000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.4995574951171875, + "loss_rtd": 0.20884856581687927, + "loss_sent": 0.08446690440177917, + "loss_sod": 0.06681458652019501, + "loss_total": 0.36013004183769226, + "step": 271099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.772039890289307, + "loss_rtd": 0.2209097146987915, + "loss_sent": 0.1933259665966034, + "loss_sod": 0.04324163496494293, + "loss_total": 0.45747730135917664, + "step": 271099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.000438928604126, + "learning_rate": 1.5819168967128374e-05, + "loss": 0.4593, + "step": 271100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.74977445602417, + "loss_rtd": 0.21794189512729645, + "loss_sent": 0.21439877152442932, + "loss_sod": 0.029228707775473595, + "loss_total": 0.4615693688392639, + "step": 271199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.9667439460754395, + "loss_rtd": 0.23573808372020721, + "loss_sent": 0.11964467912912369, + "loss_sod": 0.04007008671760559, + "loss_total": 0.3954528272151947, + "step": 271199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.8552267551422119, + "learning_rate": 1.579601560976297e-05, + "loss": 0.4342, + "step": 271200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.5353007316589355, + "loss_rtd": 0.21769912540912628, + "loss_sent": 0.3156224489212036, + "loss_sod": 0.018426664173603058, + "loss_total": 0.5517482757568359, + "step": 271299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.710964679718018, + "loss_rtd": 0.22503423690795898, + "loss_sent": 0.09214138984680176, + "loss_sod": 0.07522615045309067, + "loss_total": 0.392401784658432, + "step": 271299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.2099968194961548, + "learning_rate": 1.577287602975841e-05, + "loss": 0.4298, + "step": 271300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.57111120223999, + "loss_rtd": 0.23344328999519348, + "loss_sent": 0.09366744011640549, + "loss_sod": 0.03955375403165817, + "loss_total": 0.36666449904441833, + "step": 271399 + }, + { + "epoch": 0.030798, + "loss_gen": 6.4007673263549805, + "loss_rtd": 0.24450141191482544, + "loss_sent": 0.08490917831659317, + "loss_sod": 0.10845999419689178, + "loss_total": 0.437870591878891, + "step": 271399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.1461840867996216, + "learning_rate": 1.5749750236435277e-05, + "loss": 0.4505, + "step": 271400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.678263187408447, + "loss_rtd": 0.20214150846004486, + "loss_sent": 0.12077659368515015, + "loss_sod": 0.03934522718191147, + "loss_total": 0.3622633218765259, + "step": 271499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.953507423400879, + "loss_rtd": 0.23510979115962982, + "loss_sent": 0.19212287664413452, + "loss_sod": 0.012832150794565678, + "loss_total": 0.44006481766700745, + "step": 271499 + }, + { + "epoch": 0.031, + "grad_norm": 0.7390113472938538, + "learning_rate": 1.572663823910865e-05, + "loss": 0.4259, + "step": 271500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.757784366607666, + "loss_rtd": 0.21268510818481445, + "loss_sent": 0.28529173135757446, + "loss_sod": 0.005190334282815456, + "loss_total": 0.5031671524047852, + "step": 271599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.3992133140563965, + "loss_rtd": 0.1795322597026825, + "loss_sent": 0.006780500989407301, + "loss_sod": 0.031529609113931656, + "loss_total": 0.21784237027168274, + "step": 271599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.2566139698028564, + "learning_rate": 1.5703540047088045e-05, + "loss": 0.4158, + "step": 271600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.956812381744385, + "loss_rtd": 0.22287671267986298, + "loss_sent": 0.5607706904411316, + "loss_sod": 0.15211720764636993, + "loss_total": 0.9357646107673645, + "step": 271699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.977387428283691, + "loss_rtd": 0.2216854840517044, + "loss_sent": 0.08405521512031555, + "loss_sod": 0.011146768927574158, + "loss_total": 0.3168874680995941, + "step": 271699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.8207440376281738, + "learning_rate": 1.5680455669677418e-05, + "loss": 0.4327, + "step": 271700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.83808708190918, + "loss_rtd": 0.2178107053041458, + "loss_sent": 0.20109620690345764, + "loss_sod": 0.006562143564224243, + "loss_total": 0.4254690408706665, + "step": 271799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.626862525939941, + "loss_rtd": 0.23068249225616455, + "loss_sent": 0.7458118200302124, + "loss_sod": 0.006968685891479254, + "loss_total": 0.9834629893302917, + "step": 271799 + }, + { + "epoch": 0.0316, + "grad_norm": 2.781471014022827, + "learning_rate": 1.5657385116175132e-05, + "loss": 0.425, + "step": 271800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.72520637512207, + "loss_rtd": 0.23127830028533936, + "loss_sent": 0.08770978450775146, + "loss_sod": 0.056782398372888565, + "loss_total": 0.3757704794406891, + "step": 271899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.877103805541992, + "loss_rtd": 0.22334948182106018, + "loss_sent": 0.20821848511695862, + "loss_sod": 0.028325015679001808, + "loss_total": 0.45989298820495605, + "step": 271899 + }, + { + "epoch": 0.0318, + "grad_norm": 0.7804876565933228, + "learning_rate": 1.563432839587401e-05, + "loss": 0.4408, + "step": 271900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.407350540161133, + "loss_rtd": 0.22078903019428253, + "loss_sent": 0.20672528445720673, + "loss_sod": 0.012127239257097244, + "loss_total": 0.439641535282135, + "step": 271999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.709309101104736, + "loss_rtd": 0.24471458792686462, + "loss_sent": 0.08863084763288498, + "loss_sod": 0.019763953983783722, + "loss_total": 0.3531093895435333, + "step": 271999 + }, + { + "epoch": 0.032, + "grad_norm": 0.6426042318344116, + "learning_rate": 1.561128551806132e-05, + "loss": 0.4359, + "step": 272000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4193708598613739, + "eval_runtime": 150.3918, + "eval_samples_per_second": 102.685, + "eval_steps_per_second": 0.805, + "step": 272000 } ], "logging_steps": 100, @@ -42088,7 +70210,7 @@ "attributes": {} } }, - "total_flos": 1.1385670881509376e+19, + "total_flos": 1.8999401716383744e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null