diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,9 +2,9 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.034, + "epoch": 0.006, "eval_steps": 1000, - "global_step": 31000, + "global_step": 163000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -8013,6 +8013,34062 @@ "eval_samples_per_second": 101.792, "eval_steps_per_second": 0.798, "step": 31000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.8133978843688965, + "loss_rtd": 0.3870883584022522, + "loss_sent": 0.17000341415405273, + "loss_sod": 0.06947870552539825, + "loss_total": 0.626570463180542, + "step": 31099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.526205062866211, + "loss_rtd": 0.42248114943504333, + "loss_sent": 0.15724748373031616, + "loss_sod": 0.04115546867251396, + "loss_total": 0.6208840608596802, + "step": 31099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.4922997951507568, + "learning_rate": 8.482434219565431e-05, + "loss": 0.6904, + "step": 31100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.535189151763916, + "loss_rtd": 0.4071005880832672, + "loss_sent": 0.22625060379505157, + "loss_sod": 0.015866361558437347, + "loss_total": 0.6492175459861755, + "step": 31199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.785840034484863, + "loss_rtd": 0.3867778480052948, + "loss_sent": 0.1283811330795288, + "loss_sod": 0.013246990740299225, + "loss_total": 0.5284059643745422, + "step": 31199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.9023923873901367, + "learning_rate": 8.480156436271101e-05, + "loss": 0.7022, + "step": 31200 + }, + { + "epoch": 0.000598, + "loss_gen": 4.289353370666504, + "loss_rtd": 0.4006703495979309, + "loss_sent": 0.3113102316856384, + "loss_sod": 0.06139662116765976, + "loss_total": 0.7733771800994873, + "step": 31299 + }, + { + "epoch": 0.000598, + "loss_gen": 4.495286464691162, + "loss_rtd": 0.40194377303123474, + "loss_sent": 0.2873789668083191, + "loss_sod": 0.06476514041423798, + "loss_total": 0.7540878653526306, + "step": 31299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.169629454612732, + "learning_rate": 8.477877251170174e-05, + "loss": 0.674, + "step": 31300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.043064594268799, + "loss_rtd": 0.38696688413619995, + "loss_sent": 0.10985315591096878, + "loss_sod": 0.1856946051120758, + "loss_total": 0.6825146675109863, + "step": 31399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.523093223571777, + "loss_rtd": 0.416436105966568, + "loss_sent": 0.1800312101840973, + "loss_sod": 0.1480741798877716, + "loss_total": 0.7445415258407593, + "step": 31399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.948949933052063, + "learning_rate": 8.475596665180707e-05, + "loss": 0.6757, + "step": 31400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.102592468261719, + "loss_rtd": 0.3981564939022064, + "loss_sent": 0.12449570745229721, + "loss_sod": 0.31758543848991394, + "loss_total": 0.8402376174926758, + "step": 31499 + }, + { + "epoch": 0.000998, + "loss_gen": 3.6268177032470703, + "loss_rtd": 0.38327154517173767, + "loss_sent": 0.002468185033649206, + "loss_sod": 0.4803374409675598, + "loss_total": 0.8660771250724792, + "step": 31499 + }, + { + "epoch": 0.001, + "grad_norm": 1.330782413482666, + "learning_rate": 8.47331467922132e-05, + "loss": 0.6954, + "step": 31500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.7002363204956055, + "loss_rtd": 0.41255107522010803, + "loss_sent": 0.11588738858699799, + "loss_sod": 0.18582235276699066, + "loss_total": 0.7142608165740967, + "step": 31599 + }, + { + "epoch": 0.001198, + "loss_gen": 4.524577617645264, + "loss_rtd": 0.3973040282726288, + "loss_sent": 0.1471736878156662, + "loss_sod": 0.06015242636203766, + "loss_total": 0.604630172252655, + "step": 31599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.8479200601577759, + "learning_rate": 8.471031294211194e-05, + "loss": 0.6919, + "step": 31600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.201780319213867, + "loss_rtd": 0.3961066007614136, + "loss_sent": 0.18251606822013855, + "loss_sod": 0.08499738574028015, + "loss_total": 0.6636200547218323, + "step": 31699 + }, + { + "epoch": 0.001398, + "loss_gen": 4.5241546630859375, + "loss_rtd": 0.4064065217971802, + "loss_sent": 0.22355028986930847, + "loss_sod": 0.03173723816871643, + "loss_total": 0.6616940498352051, + "step": 31699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.7753614783287048, + "learning_rate": 8.468746511070076e-05, + "loss": 0.6906, + "step": 31700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.587357997894287, + "loss_rtd": 0.40675556659698486, + "loss_sent": 0.3111591637134552, + "loss_sod": 0.06384904682636261, + "loss_total": 0.7817637920379639, + "step": 31799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.497931003570557, + "loss_rtd": 0.39460289478302, + "loss_sent": 0.320936381816864, + "loss_sod": 0.1028224378824234, + "loss_total": 0.8183616995811462, + "step": 31799 + }, + { + "epoch": 0.0016, + "grad_norm": 2.3936991691589355, + "learning_rate": 8.46646033071828e-05, + "loss": 0.6833, + "step": 31800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.862859725952148, + "loss_rtd": 0.39885783195495605, + "loss_sent": 0.08758208155632019, + "loss_sod": 0.1417115330696106, + "loss_total": 0.6281514167785645, + "step": 31899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.395966053009033, + "loss_rtd": 0.4056219160556793, + "loss_sent": 0.2164677232503891, + "loss_sod": 0.04559243470430374, + "loss_total": 0.6676820516586304, + "step": 31899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.1251325607299805, + "learning_rate": 8.464172754076679e-05, + "loss": 0.676, + "step": 31900 + }, + { + "epoch": 0.001998, + "loss_gen": 4.678037166595459, + "loss_rtd": 0.3924082815647125, + "loss_sent": 0.07607917487621307, + "loss_sod": 0.10162509977817535, + "loss_total": 0.5701125860214233, + "step": 31999 + }, + { + "epoch": 0.001998, + "loss_gen": 4.401127815246582, + "loss_rtd": 0.40330788493156433, + "loss_sent": 0.3779938519001007, + "loss_sod": 0.018371429294347763, + "loss_total": 0.7996731996536255, + "step": 31999 + }, + { + "epoch": 0.002, + "grad_norm": 1.875430703163147, + "learning_rate": 8.461883782066705e-05, + "loss": 0.6782, + "step": 32000 + }, + { + "epoch": 0.002, + "eval_loss": 0.6673182845115662, + "eval_runtime": 157.6853, + "eval_samples_per_second": 97.936, + "eval_steps_per_second": 0.767, + "step": 32000 + }, + { + "epoch": 0.002198, + "loss_gen": 4.48441743850708, + "loss_rtd": 0.40013572573661804, + "loss_sent": 0.11351187527179718, + "loss_sod": 0.25612175464630127, + "loss_total": 0.7697693705558777, + "step": 32099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.7465925216674805, + "loss_rtd": 0.4035055637359619, + "loss_sent": 0.298244446516037, + "loss_sod": 0.03680422157049179, + "loss_total": 0.7385542392730713, + "step": 32099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.017162799835205, + "learning_rate": 8.459593415610359e-05, + "loss": 0.6839, + "step": 32100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.524750709533691, + "loss_rtd": 0.4050101041793823, + "loss_sent": 0.37396883964538574, + "loss_sod": 0.03519045189023018, + "loss_total": 0.8141694068908691, + "step": 32199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.473918437957764, + "loss_rtd": 0.40793168544769287, + "loss_sent": 0.27542105317115784, + "loss_sod": 0.06677190959453583, + "loss_total": 0.7501246929168701, + "step": 32199 + }, + { + "epoch": 0.0024, + "grad_norm": 3.0965168476104736, + "learning_rate": 8.457301655630196e-05, + "loss": 0.6885, + "step": 32200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.752264499664307, + "loss_rtd": 0.4250895082950592, + "loss_sent": 0.12414298951625824, + "loss_sod": 0.08618341386318207, + "loss_total": 0.6354159116744995, + "step": 32299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.138677597045898, + "loss_rtd": 0.40253880620002747, + "loss_sent": 0.01680273376405239, + "loss_sod": 0.23747926950454712, + "loss_total": 0.6568208336830139, + "step": 32299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.1575840711593628, + "learning_rate": 8.455008503049342e-05, + "loss": 0.6679, + "step": 32300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.358146667480469, + "loss_rtd": 0.40618860721588135, + "loss_sent": 0.05179322510957718, + "loss_sod": 0.22525230050086975, + "loss_total": 0.6832340955734253, + "step": 32399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.595084190368652, + "loss_rtd": 0.39770156145095825, + "loss_sent": 0.1839921921491623, + "loss_sod": 0.036973558366298676, + "loss_total": 0.6186673045158386, + "step": 32399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.462870717048645, + "learning_rate": 8.452713958791477e-05, + "loss": 0.6767, + "step": 32400 + }, + { + "epoch": 0.002998, + "loss_gen": 4.643150806427002, + "loss_rtd": 0.40366634726524353, + "loss_sent": 0.532322347164154, + "loss_sod": 0.17457786202430725, + "loss_total": 1.1105666160583496, + "step": 32499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.605870723724365, + "loss_rtd": 0.3856964707374573, + "loss_sent": 0.13303084671497345, + "loss_sod": 0.08971034735441208, + "loss_total": 0.6084376573562622, + "step": 32499 + }, + { + "epoch": 0.003, + "grad_norm": 2.468165636062622, + "learning_rate": 8.450418023780839e-05, + "loss": 0.6946, + "step": 32500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.285338401794434, + "loss_rtd": 0.40404659509658813, + "loss_sent": 0.04896371811628342, + "loss_sod": 0.11847255378961563, + "loss_total": 0.5714828968048096, + "step": 32599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.491969108581543, + "loss_rtd": 0.3990192711353302, + "loss_sent": 0.08510634303092957, + "loss_sod": 0.02022891864180565, + "loss_total": 0.5043545365333557, + "step": 32599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.7647584080696106, + "learning_rate": 8.448120698942237e-05, + "loss": 0.6792, + "step": 32600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.343130111694336, + "loss_rtd": 0.41738593578338623, + "loss_sent": 0.33898279070854187, + "loss_sod": 0.1235346645116806, + "loss_total": 0.8799034357070923, + "step": 32699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.7674994468688965, + "loss_rtd": 0.3970383107662201, + "loss_sent": 0.2672957181930542, + "loss_sod": 0.03581714630126953, + "loss_total": 0.7001512050628662, + "step": 32699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.5911026000976562, + "learning_rate": 8.445821985201028e-05, + "loss": 0.698, + "step": 32700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.213858604431152, + "loss_rtd": 0.3942485749721527, + "loss_sent": 0.2171388417482376, + "loss_sod": 0.014159854501485825, + "loss_total": 0.6255472898483276, + "step": 32799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.432817459106445, + "loss_rtd": 0.3746455907821655, + "loss_sent": 0.11468903720378876, + "loss_sod": 0.03782479837536812, + "loss_total": 0.5271594524383545, + "step": 32799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.3355274200439453, + "learning_rate": 8.443521883483136e-05, + "loss": 0.6654, + "step": 32800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.383444309234619, + "loss_rtd": 0.3847176432609558, + "loss_sent": 0.14574401080608368, + "loss_sod": 0.1299697458744049, + "loss_total": 0.660431444644928, + "step": 32899 + }, + { + "epoch": 0.003798, + "loss_gen": 4.477179050445557, + "loss_rtd": 0.3920685946941376, + "loss_sent": 0.2143167108297348, + "loss_sod": 0.005493156611919403, + "loss_total": 0.6118784546852112, + "step": 32899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.910675585269928, + "learning_rate": 8.44122039471504e-05, + "loss": 0.6831, + "step": 32900 + }, + { + "epoch": 0.003998, + "loss_gen": 3.8524093627929688, + "loss_rtd": 0.39162158966064453, + "loss_sent": 0.06359441578388214, + "loss_sod": 0.03046536259353161, + "loss_total": 0.48568135499954224, + "step": 32999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.761472702026367, + "loss_rtd": 0.4120732247829437, + "loss_sent": 0.19595538079738617, + "loss_sod": 0.14814525842666626, + "loss_total": 0.756173849105835, + "step": 32999 + }, + { + "epoch": 0.004, + "grad_norm": 0.9662920832633972, + "learning_rate": 8.438917519823782e-05, + "loss": 0.6769, + "step": 33000 + }, + { + "epoch": 0.004, + "eval_loss": 0.6491342782974243, + "eval_runtime": 150.6483, + "eval_samples_per_second": 102.51, + "eval_steps_per_second": 0.803, + "step": 33000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.339219093322754, + "loss_rtd": 0.3928168714046478, + "loss_sent": 0.1766635775566101, + "loss_sod": 0.020830631256103516, + "loss_total": 0.5903110504150391, + "step": 33099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.55030632019043, + "loss_rtd": 0.4039199650287628, + "loss_sent": 0.13374005258083344, + "loss_sod": 0.1401435136795044, + "loss_total": 0.6778035163879395, + "step": 33099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.040810227394104, + "learning_rate": 8.436613259736958e-05, + "loss": 0.6924, + "step": 33100 + }, + { + "epoch": 0.004398, + "loss_gen": 4.651073455810547, + "loss_rtd": 0.3909187316894531, + "loss_sent": 0.237695574760437, + "loss_sod": 0.054423652589321136, + "loss_total": 0.6830379962921143, + "step": 33199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.0702948570251465, + "loss_rtd": 0.39740777015686035, + "loss_sent": 7.554675539722666e-05, + "loss_sod": 0.35460132360458374, + "loss_total": 0.7520846128463745, + "step": 33199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.9790658354759216, + "learning_rate": 8.434307615382724e-05, + "loss": 0.6559, + "step": 33200 + }, + { + "epoch": 0.004598, + "loss_gen": 3.6085634231567383, + "loss_rtd": 0.3859209716320038, + "loss_sent": 0.04533249884843826, + "loss_sod": 0.14941275119781494, + "loss_total": 0.5806662440299988, + "step": 33299 + }, + { + "epoch": 0.004598, + "loss_gen": 4.367456436157227, + "loss_rtd": 0.39892175793647766, + "loss_sent": 0.14764036238193512, + "loss_sod": 0.012713832780718803, + "loss_total": 0.5592759251594543, + "step": 33299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.9494178295135498, + "learning_rate": 8.432000587689792e-05, + "loss": 0.68, + "step": 33300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.770630359649658, + "loss_rtd": 0.38550376892089844, + "loss_sent": 0.08192545175552368, + "loss_sod": 0.010527098551392555, + "loss_total": 0.4779563248157501, + "step": 33399 + }, + { + "epoch": 0.004798, + "loss_gen": 4.432707786560059, + "loss_rtd": 0.4145929515361786, + "loss_sent": 0.12205509841442108, + "loss_sod": 0.0853184163570404, + "loss_total": 0.6219664812088013, + "step": 33399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.7924001812934875, + "learning_rate": 8.429692177587435e-05, + "loss": 0.6786, + "step": 33400 + }, + { + "epoch": 0.004998, + "loss_gen": 3.980473756790161, + "loss_rtd": 0.3727272152900696, + "loss_sent": 0.0033619177993386984, + "loss_sod": 0.2604028582572937, + "loss_total": 0.6364920139312744, + "step": 33499 + }, + { + "epoch": 0.004998, + "loss_gen": 4.709641456604004, + "loss_rtd": 0.3890208899974823, + "loss_sent": 0.13390415906906128, + "loss_sod": 0.021367769688367844, + "loss_total": 0.5442928075790405, + "step": 33499 + }, + { + "epoch": 0.005, + "grad_norm": 1.1371712684631348, + "learning_rate": 8.427382386005477e-05, + "loss": 0.6749, + "step": 33500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.686338901519775, + "loss_rtd": 0.40714138746261597, + "loss_sent": 0.23763643205165863, + "loss_sod": 0.08537831157445908, + "loss_total": 0.7301561236381531, + "step": 33599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.592874050140381, + "loss_rtd": 0.4045478403568268, + "loss_sent": 0.3227478563785553, + "loss_sod": 0.20993739366531372, + "loss_total": 0.9372330904006958, + "step": 33599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.5759620666503906, + "learning_rate": 8.425071213874308e-05, + "loss": 0.6726, + "step": 33600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.828529357910156, + "loss_rtd": 0.391663134098053, + "loss_sent": 0.2265581488609314, + "loss_sod": 0.15814724564552307, + "loss_total": 0.7763685584068298, + "step": 33699 + }, + { + "epoch": 0.005398, + "loss_gen": 4.472051620483398, + "loss_rtd": 0.3902159333229065, + "loss_sent": 0.12034723907709122, + "loss_sod": 0.020356986671686172, + "loss_total": 0.530920147895813, + "step": 33699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.8813192248344421, + "learning_rate": 8.422758662124858e-05, + "loss": 0.6617, + "step": 33700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.132448196411133, + "loss_rtd": 0.38475725054740906, + "loss_sent": 0.057623039931058884, + "loss_sod": 0.10686932504177094, + "loss_total": 0.5492495894432068, + "step": 33799 + }, + { + "epoch": 0.005598, + "loss_gen": 4.987173080444336, + "loss_rtd": 0.3826753497123718, + "loss_sent": 0.14800354838371277, + "loss_sod": 0.0643434152007103, + "loss_total": 0.5950223207473755, + "step": 33799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.1166493892669678, + "learning_rate": 8.420444731688633e-05, + "loss": 0.676, + "step": 33800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.062664985656738, + "loss_rtd": 0.38509470224380493, + "loss_sent": 0.10830825567245483, + "loss_sod": 0.15515291690826416, + "loss_total": 0.6485558748245239, + "step": 33899 + }, + { + "epoch": 0.005798, + "loss_gen": 3.500702381134033, + "loss_rtd": 0.38332781195640564, + "loss_sent": 0.005528334993869066, + "loss_sod": 0.12189139425754547, + "loss_total": 0.5107475519180298, + "step": 33899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.8776567578315735, + "learning_rate": 8.418129423497677e-05, + "loss": 0.6831, + "step": 33900 + }, + { + "epoch": 0.005998, + "loss_gen": 4.376248359680176, + "loss_rtd": 0.39717715978622437, + "loss_sent": 0.24568745493888855, + "loss_sod": 0.017836254090070724, + "loss_total": 0.6607008576393127, + "step": 33999 + }, + { + "epoch": 0.005998, + "loss_gen": 4.339692115783691, + "loss_rtd": 0.4054630398750305, + "loss_sent": 0.17948582768440247, + "loss_sod": 0.026773886755108833, + "loss_total": 0.6117227673530579, + "step": 33999 + }, + { + "epoch": 0.006, + "grad_norm": 1.105279803276062, + "learning_rate": 8.415812738484599e-05, + "loss": 0.656, + "step": 34000 + }, + { + "epoch": 0.006, + "eval_loss": 0.6587226986885071, + "eval_runtime": 150.7649, + "eval_samples_per_second": 102.431, + "eval_steps_per_second": 0.803, + "step": 34000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.84349250793457, + "loss_rtd": 0.3853917419910431, + "loss_sent": 0.2796160578727722, + "loss_sod": 0.18595334887504578, + "loss_total": 0.8509611487388611, + "step": 34099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.1158857345581055, + "loss_rtd": 0.4004764258861542, + "loss_sent": 0.16802577674388885, + "loss_sod": 0.07047758996486664, + "loss_total": 0.6389797925949097, + "step": 34099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.1270217895507812, + "learning_rate": 8.413494677582558e-05, + "loss": 0.6873, + "step": 34100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.219707489013672, + "loss_rtd": 0.3944161534309387, + "loss_sent": 0.15331660211086273, + "loss_sod": 0.0021382453851401806, + "loss_total": 0.549871027469635, + "step": 34199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.6819562911987305, + "loss_rtd": 0.4039584994316101, + "loss_sent": 0.5780501961708069, + "loss_sod": 0.11269272863864899, + "loss_total": 1.0947014093399048, + "step": 34199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.1589312553405762, + "learning_rate": 8.411175241725268e-05, + "loss": 0.6923, + "step": 34200 + }, + { + "epoch": 0.006598, + "loss_gen": 3.6634464263916016, + "loss_rtd": 0.39810386300086975, + "loss_sent": 0.022716881707310677, + "loss_sod": 0.36733466386795044, + "loss_total": 0.7881554365158081, + "step": 34299 + }, + { + "epoch": 0.006598, + "loss_gen": 4.041938304901123, + "loss_rtd": 0.39642512798309326, + "loss_sent": 0.07152106612920761, + "loss_sod": 0.08808917552232742, + "loss_total": 0.5560353398323059, + "step": 34299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.8713497519493103, + "learning_rate": 8.408854431847e-05, + "loss": 0.6774, + "step": 34300 + }, + { + "epoch": 0.006798, + "loss_gen": 4.1711907386779785, + "loss_rtd": 0.3844755291938782, + "loss_sent": 0.12414493411779404, + "loss_sod": 0.12355874478816986, + "loss_total": 0.6321792006492615, + "step": 34399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.556699275970459, + "loss_rtd": 0.38386863470077515, + "loss_sent": 0.08765093982219696, + "loss_sod": 0.10667753964662552, + "loss_total": 0.5781971216201782, + "step": 34399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.401253581047058, + "learning_rate": 8.406532248882573e-05, + "loss": 0.6888, + "step": 34400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.466337203979492, + "loss_rtd": 0.3804284930229187, + "loss_sent": 0.2287617027759552, + "loss_sod": 0.049220383167266846, + "loss_total": 0.6584105491638184, + "step": 34499 + }, + { + "epoch": 0.006998, + "loss_gen": 4.712965488433838, + "loss_rtd": 0.38402697443962097, + "loss_sent": 0.3427942991256714, + "loss_sod": 0.13551780581474304, + "loss_total": 0.8623390793800354, + "step": 34499 + }, + { + "epoch": 0.007, + "grad_norm": 2.533766984939575, + "learning_rate": 8.404208693767365e-05, + "loss": 0.6791, + "step": 34500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.2892608642578125, + "loss_rtd": 0.3866655230522156, + "loss_sent": 0.2026486098766327, + "loss_sod": 0.10786180198192596, + "loss_total": 0.6971759796142578, + "step": 34599 + }, + { + "epoch": 0.007198, + "loss_gen": 3.8161721229553223, + "loss_rtd": 0.42922765016555786, + "loss_sent": 0.0005910772597417235, + "loss_sod": 0.3839712142944336, + "loss_total": 0.813789963722229, + "step": 34599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.6854496002197266, + "learning_rate": 8.401883767437302e-05, + "loss": 0.6762, + "step": 34600 + }, + { + "epoch": 0.007398, + "loss_gen": 3.7486488819122314, + "loss_rtd": 0.3906943202018738, + "loss_sent": 0.0001909230777528137, + "loss_sod": 0.3329179883003235, + "loss_total": 0.7238032221794128, + "step": 34699 + }, + { + "epoch": 0.007398, + "loss_gen": 4.021856307983398, + "loss_rtd": 0.3831009268760681, + "loss_sent": 0.11416365206241608, + "loss_sod": 0.19775055348873138, + "loss_total": 0.6950151324272156, + "step": 34699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.3915011882781982, + "learning_rate": 8.399557470828863e-05, + "loss": 0.6615, + "step": 34700 + }, + { + "epoch": 0.007598, + "loss_gen": 4.43755578994751, + "loss_rtd": 0.39801058173179626, + "loss_sent": 0.15207479894161224, + "loss_sod": 0.04855339601635933, + "loss_total": 0.5986387729644775, + "step": 34799 + }, + { + "epoch": 0.007598, + "loss_gen": 4.49984884262085, + "loss_rtd": 0.3986855447292328, + "loss_sent": 0.1514422595500946, + "loss_sod": 0.05110839381814003, + "loss_total": 0.6012362241744995, + "step": 34799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.69097501039505, + "learning_rate": 8.397229804879084e-05, + "loss": 0.6594, + "step": 34800 + }, + { + "epoch": 0.007798, + "loss_gen": 4.759007930755615, + "loss_rtd": 0.38655874133110046, + "loss_sent": 0.21478146314620972, + "loss_sod": 0.04757128655910492, + "loss_total": 0.6489114761352539, + "step": 34899 + }, + { + "epoch": 0.007798, + "loss_gen": 3.825989246368408, + "loss_rtd": 0.39063310623168945, + "loss_sent": 0.015799371525645256, + "loss_sod": 0.2723545432090759, + "loss_total": 0.6787869930267334, + "step": 34899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.1858468055725098, + "learning_rate": 8.394900770525544e-05, + "loss": 0.6762, + "step": 34900 + }, + { + "epoch": 0.007998, + "loss_gen": 4.985413551330566, + "loss_rtd": 0.3810838460922241, + "loss_sent": 0.06006404012441635, + "loss_sod": 0.06051886826753616, + "loss_total": 0.5016667246818542, + "step": 34999 + }, + { + "epoch": 0.007998, + "loss_gen": 4.667548179626465, + "loss_rtd": 0.38158246874809265, + "loss_sent": 0.08527212589979172, + "loss_sod": 0.0968073233962059, + "loss_total": 0.5636619329452515, + "step": 34999 + }, + { + "epoch": 0.008, + "grad_norm": 0.9259592890739441, + "learning_rate": 8.392570368706379e-05, + "loss": 0.6555, + "step": 35000 + }, + { + "epoch": 0.008, + "eval_loss": 0.6558483242988586, + "eval_runtime": 152.0046, + "eval_samples_per_second": 101.596, + "eval_steps_per_second": 0.796, + "step": 35000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.283824443817139, + "loss_rtd": 0.3852701485157013, + "loss_sent": 0.20818640291690826, + "loss_sod": 0.020567061379551888, + "loss_total": 0.6140236258506775, + "step": 35099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.41359281539917, + "loss_rtd": 0.3706549108028412, + "loss_sent": 0.1305951327085495, + "loss_sod": 0.12105558067560196, + "loss_total": 0.6223056316375732, + "step": 35099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.8833096623420715, + "learning_rate": 8.390238600360276e-05, + "loss": 0.6708, + "step": 35100 + }, + { + "epoch": 0.008398, + "loss_gen": 3.6215407848358154, + "loss_rtd": 0.4050199091434479, + "loss_sent": 0.0112560810521245, + "loss_sod": 0.2151399552822113, + "loss_total": 0.6314159631729126, + "step": 35199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.06948184967041, + "loss_rtd": 0.41497963666915894, + "loss_sent": 0.08226001262664795, + "loss_sod": 0.14170867204666138, + "loss_total": 0.6389483213424683, + "step": 35199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.0082366466522217, + "learning_rate": 8.38790546642647e-05, + "loss": 0.6589, + "step": 35200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.5622711181640625, + "loss_rtd": 0.3707863390445709, + "loss_sent": 0.17080971598625183, + "loss_sod": 0.022889114916324615, + "loss_total": 0.5644851922988892, + "step": 35299 + }, + { + "epoch": 0.008598, + "loss_gen": 3.8004684448242188, + "loss_rtd": 0.37701067328453064, + "loss_sent": 0.001095216372050345, + "loss_sod": 0.3147203028202057, + "loss_total": 0.6928262114524841, + "step": 35299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.8075337409973145, + "learning_rate": 8.385570967844747e-05, + "loss": 0.6783, + "step": 35300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.531389236450195, + "loss_rtd": 0.3917960822582245, + "loss_sent": 0.3814413845539093, + "loss_sod": 0.029178844764828682, + "loss_total": 0.8024163246154785, + "step": 35399 + }, + { + "epoch": 0.008798, + "loss_gen": 4.834838390350342, + "loss_rtd": 0.3757188022136688, + "loss_sent": 0.09382227808237076, + "loss_sod": 0.1580665558576584, + "loss_total": 0.6276076436042786, + "step": 35399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.1063945293426514, + "learning_rate": 8.383235105555445e-05, + "loss": 0.6743, + "step": 35400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.395575523376465, + "loss_rtd": 0.40566393733024597, + "loss_sent": 0.07591883838176727, + "loss_sod": 0.06056656688451767, + "loss_total": 0.5421493649482727, + "step": 35499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.709528923034668, + "loss_rtd": 0.38671427965164185, + "loss_sent": 0.1463877558708191, + "loss_sod": 0.0882708728313446, + "loss_total": 0.6213729381561279, + "step": 35499 + }, + { + "epoch": 0.009, + "grad_norm": 0.64141446352005, + "learning_rate": 8.380897880499445e-05, + "loss": 0.6711, + "step": 35500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.119242191314697, + "loss_rtd": 0.3939517140388489, + "loss_sent": 0.053018298000097275, + "loss_sod": 0.22627955675125122, + "loss_total": 0.6732495427131653, + "step": 35599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.300588607788086, + "loss_rtd": 0.38656580448150635, + "loss_sent": 0.10431955754756927, + "loss_sod": 0.021519005298614502, + "loss_total": 0.5124043822288513, + "step": 35599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.8485056161880493, + "learning_rate": 8.378559293618183e-05, + "loss": 0.6813, + "step": 35600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.3555216789245605, + "loss_rtd": 0.3831770122051239, + "loss_sent": 0.14471060037612915, + "loss_sod": 0.005934491753578186, + "loss_total": 0.5338221192359924, + "step": 35699 + }, + { + "epoch": 0.009398, + "loss_gen": 4.078190326690674, + "loss_rtd": 0.3944447636604309, + "loss_sent": 0.12811420857906342, + "loss_sod": 0.13279440999031067, + "loss_total": 0.6553534269332886, + "step": 35699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.3532829284667969, + "learning_rate": 8.376219345853642e-05, + "loss": 0.6694, + "step": 35700 + }, + { + "epoch": 0.009598, + "loss_gen": 4.143390655517578, + "loss_rtd": 0.40090420842170715, + "loss_sent": 0.00558770215138793, + "loss_sod": 0.2737257480621338, + "loss_total": 0.6802176237106323, + "step": 35799 + }, + { + "epoch": 0.009598, + "loss_gen": 4.052029132843018, + "loss_rtd": 0.3747907280921936, + "loss_sent": 0.08215665817260742, + "loss_sod": 0.07205349206924438, + "loss_total": 0.5290008783340454, + "step": 35799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.2565505504608154, + "learning_rate": 8.373878038148353e-05, + "loss": 0.6617, + "step": 35800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.08327579498291, + "loss_rtd": 0.3714620769023895, + "loss_sent": 0.3929314911365509, + "loss_sod": 0.050866659730672836, + "loss_total": 0.8152602314949036, + "step": 35899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.571104526519775, + "loss_rtd": 0.42713063955307007, + "loss_sent": 0.25466254353523254, + "loss_sod": 0.014409082010388374, + "loss_total": 0.696202278137207, + "step": 35899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.9676883220672607, + "learning_rate": 8.371535371445392e-05, + "loss": 0.6728, + "step": 35900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.5458760261535645, + "loss_rtd": 0.38131198287010193, + "loss_sent": 0.09672313928604126, + "loss_sod": 0.04669678956270218, + "loss_total": 0.5247318744659424, + "step": 35999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.419743537902832, + "loss_rtd": 0.39327701926231384, + "loss_sent": 0.14998316764831543, + "loss_sod": 0.05913712829351425, + "loss_total": 0.6023973226547241, + "step": 35999 + }, + { + "epoch": 0.01, + "grad_norm": 0.7521686553955078, + "learning_rate": 8.369191346688389e-05, + "loss": 0.6615, + "step": 36000 + }, + { + "epoch": 0.01, + "eval_loss": 0.657751739025116, + "eval_runtime": 150.6017, + "eval_samples_per_second": 102.542, + "eval_steps_per_second": 0.803, + "step": 36000 + }, + { + "epoch": 0.010198, + "loss_gen": 4.568459987640381, + "loss_rtd": 0.38394075632095337, + "loss_sent": 0.2844971716403961, + "loss_sod": 0.012531893327832222, + "loss_total": 0.6809698343276978, + "step": 36099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.570716857910156, + "loss_rtd": 0.3938712775707245, + "loss_sent": 0.18808907270431519, + "loss_sod": 0.008191872388124466, + "loss_total": 0.5901522636413574, + "step": 36099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8684414625167847, + "learning_rate": 8.366845964821512e-05, + "loss": 0.6718, + "step": 36100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.625585079193115, + "loss_rtd": 0.38836559653282166, + "loss_sent": 0.1486426293849945, + "loss_sod": 0.03211354464292526, + "loss_total": 0.569121778011322, + "step": 36199 + }, + { + "epoch": 0.010398, + "loss_gen": 4.721551895141602, + "loss_rtd": 0.3992859423160553, + "loss_sent": 0.0891035795211792, + "loss_sod": 0.07581065595149994, + "loss_total": 0.5642001628875732, + "step": 36199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.025210976600647, + "learning_rate": 8.364499226789485e-05, + "loss": 0.6698, + "step": 36200 + }, + { + "epoch": 0.010598, + "loss_gen": 4.264175891876221, + "loss_rtd": 0.39955437183380127, + "loss_sent": 0.18356764316558838, + "loss_sod": 0.056868620216846466, + "loss_total": 0.6399906277656555, + "step": 36299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.184117794036865, + "loss_rtd": 0.38804009556770325, + "loss_sent": 0.08347688615322113, + "loss_sod": 0.12625752389431, + "loss_total": 0.5977745056152344, + "step": 36299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9175646305084229, + "learning_rate": 8.362151133537571e-05, + "loss": 0.6667, + "step": 36300 + }, + { + "epoch": 0.010798, + "loss_gen": 4.556824207305908, + "loss_rtd": 0.382408082485199, + "loss_sent": 0.25232207775115967, + "loss_sod": 0.043600019067525864, + "loss_total": 0.6783301830291748, + "step": 36399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.242853164672852, + "loss_rtd": 0.38585180044174194, + "loss_sent": 0.2861935794353485, + "loss_sod": 0.19454966485500336, + "loss_total": 0.8665950298309326, + "step": 36399 + }, + { + "epoch": 0.0108, + "grad_norm": 2.2240793704986572, + "learning_rate": 8.359801686011582e-05, + "loss": 0.6845, + "step": 36400 + }, + { + "epoch": 0.010998, + "loss_gen": 4.583993434906006, + "loss_rtd": 0.39570391178131104, + "loss_sent": 0.07827243953943253, + "loss_sod": 0.08733754605054855, + "loss_total": 0.5613139271736145, + "step": 36499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.440932750701904, + "loss_rtd": 0.3896424472332001, + "loss_sent": 0.25126516819000244, + "loss_sod": 0.08420707285404205, + "loss_total": 0.7251147031784058, + "step": 36499 + }, + { + "epoch": 0.011, + "grad_norm": 1.1776618957519531, + "learning_rate": 8.357450885157876e-05, + "loss": 0.6628, + "step": 36500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.803018093109131, + "loss_rtd": 0.4053913950920105, + "loss_sent": 0.23469845950603485, + "loss_sod": 0.1651773452758789, + "loss_total": 0.8052672147750854, + "step": 36599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.398932456970215, + "loss_rtd": 0.39668285846710205, + "loss_sent": 0.3532399833202362, + "loss_sod": 0.05229349806904793, + "loss_total": 0.8022163510322571, + "step": 36599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.701512336730957, + "learning_rate": 8.355098731923357e-05, + "loss": 0.6937, + "step": 36600 + }, + { + "epoch": 0.011398, + "loss_gen": 3.498446464538574, + "loss_rtd": 0.3683341145515442, + "loss_sent": 0.024808038026094437, + "loss_sod": 0.2724393904209137, + "loss_total": 0.6655815839767456, + "step": 36699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.199820041656494, + "loss_rtd": 0.38264238834381104, + "loss_sent": 0.1850346475839615, + "loss_sod": 0.042693350464105606, + "loss_total": 0.610370397567749, + "step": 36699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.188109278678894, + "learning_rate": 8.352745227255467e-05, + "loss": 0.6865, + "step": 36700 + }, + { + "epoch": 0.011598, + "loss_gen": 4.962965965270996, + "loss_rtd": 0.3960812985897064, + "loss_sent": 0.22524335980415344, + "loss_sod": 0.1821294128894806, + "loss_total": 0.8034540414810181, + "step": 36799 + }, + { + "epoch": 0.011598, + "loss_gen": 4.930484771728516, + "loss_rtd": 0.385154128074646, + "loss_sent": 0.27918630838394165, + "loss_sod": 0.0164080411195755, + "loss_total": 0.680748462677002, + "step": 36799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.9394999742507935, + "learning_rate": 8.350390372102201e-05, + "loss": 0.6652, + "step": 36800 + }, + { + "epoch": 0.011798, + "loss_gen": 4.238772392272949, + "loss_rtd": 0.3739355504512787, + "loss_sent": 0.1498599648475647, + "loss_sod": 0.0525074228644371, + "loss_total": 0.5763029456138611, + "step": 36899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.5995635986328125, + "loss_rtd": 0.39057114720344543, + "loss_sent": 0.3132609724998474, + "loss_sod": 0.03634927421808243, + "loss_total": 0.7401813864707947, + "step": 36899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.9696013927459717, + "learning_rate": 8.348034167412094e-05, + "loss": 0.6802, + "step": 36900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.623749732971191, + "loss_rtd": 0.37866002321243286, + "loss_sent": 0.30305925011634827, + "loss_sod": 0.01630227267742157, + "loss_total": 0.6980215311050415, + "step": 36999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.484660625457764, + "loss_rtd": 0.3995550870895386, + "loss_sent": 0.1641751378774643, + "loss_sod": 0.0898779034614563, + "loss_total": 0.6536081433296204, + "step": 36999 + }, + { + "epoch": 0.012, + "grad_norm": 2.3883414268493652, + "learning_rate": 8.345676614134226e-05, + "loss": 0.6647, + "step": 37000 + }, + { + "epoch": 0.012, + "eval_loss": 0.6482465267181396, + "eval_runtime": 151.0444, + "eval_samples_per_second": 102.241, + "eval_steps_per_second": 0.801, + "step": 37000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.435266017913818, + "loss_rtd": 0.3864968419075012, + "loss_sent": 0.20693446695804596, + "loss_sod": 0.024848148226737976, + "loss_total": 0.6182794570922852, + "step": 37099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.258318901062012, + "loss_rtd": 0.3999250829219818, + "loss_sent": 0.011927814222872257, + "loss_sod": 0.17478594183921814, + "loss_total": 0.586638867855072, + "step": 37099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.085282802581787, + "learning_rate": 8.343317713218217e-05, + "loss": 0.6692, + "step": 37100 + }, + { + "epoch": 0.012398, + "loss_gen": 3.623622179031372, + "loss_rtd": 0.3829926550388336, + "loss_sent": 0.00037199995131231844, + "loss_sod": 0.31614941358566284, + "loss_total": 0.6995140910148621, + "step": 37199 + }, + { + "epoch": 0.012398, + "loss_gen": 3.788888454437256, + "loss_rtd": 0.3892027735710144, + "loss_sent": 0.1038016527891159, + "loss_sod": 0.014133607037365437, + "loss_total": 0.5071380138397217, + "step": 37199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.115685224533081, + "learning_rate": 8.340957465614233e-05, + "loss": 0.6759, + "step": 37200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.963636875152588, + "loss_rtd": 0.3884185552597046, + "loss_sent": 0.4500541090965271, + "loss_sod": 0.053494930267333984, + "loss_total": 0.8919675946235657, + "step": 37299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.432641506195068, + "loss_rtd": 0.38547518849372864, + "loss_sent": 0.19582559168338776, + "loss_sod": 0.029296714812517166, + "loss_total": 0.6105974912643433, + "step": 37299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.4357924461364746, + "learning_rate": 8.33859587227298e-05, + "loss": 0.6745, + "step": 37300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.658502578735352, + "loss_rtd": 0.3811958432197571, + "loss_sent": 0.15638990700244904, + "loss_sod": 0.14238518476486206, + "loss_total": 0.679970920085907, + "step": 37399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.4922027587890625, + "loss_rtd": 0.4064105749130249, + "loss_sent": 0.10675998777151108, + "loss_sod": 0.04498648643493652, + "loss_total": 0.5581570267677307, + "step": 37399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.8286881446838379, + "learning_rate": 8.33623293414571e-05, + "loss": 0.6565, + "step": 37400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.656504154205322, + "loss_rtd": 0.4109244644641876, + "loss_sent": 0.1238277480006218, + "loss_sod": 0.061936601996421814, + "loss_total": 0.5966888070106506, + "step": 37499 + }, + { + "epoch": 0.012998, + "loss_gen": 4.614788055419922, + "loss_rtd": 0.38480427861213684, + "loss_sent": 0.13607770204544067, + "loss_sod": 0.043309397995471954, + "loss_total": 0.5641913414001465, + "step": 37499 + }, + { + "epoch": 0.013, + "grad_norm": 0.7024715542793274, + "learning_rate": 8.333868652184216e-05, + "loss": 0.6546, + "step": 37500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.133355617523193, + "loss_rtd": 0.3928178548812866, + "loss_sent": 0.10050570219755173, + "loss_sod": 0.03258616849780083, + "loss_total": 0.5259097218513489, + "step": 37599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.330506324768066, + "loss_rtd": 0.40831834077835083, + "loss_sent": 0.07231014221906662, + "loss_sod": 0.05504155904054642, + "loss_total": 0.5356700420379639, + "step": 37599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.8137696981430054, + "learning_rate": 8.331503027340824e-05, + "loss": 0.6499, + "step": 37600 + }, + { + "epoch": 0.013398, + "loss_gen": 4.40916633605957, + "loss_rtd": 0.38783833384513855, + "loss_sent": 0.27936530113220215, + "loss_sod": 0.176782488822937, + "loss_total": 0.8439861536026001, + "step": 37699 + }, + { + "epoch": 0.013398, + "loss_gen": 4.736486911773682, + "loss_rtd": 0.38095638155937195, + "loss_sent": 0.24123525619506836, + "loss_sod": 0.06507010757923126, + "loss_total": 0.6872617602348328, + "step": 37699 + }, + { + "epoch": 0.0134, + "grad_norm": 3.4207136631011963, + "learning_rate": 8.329136060568412e-05, + "loss": 0.6706, + "step": 37700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.38934326171875, + "loss_rtd": 0.4075080156326294, + "loss_sent": 0.26131579279899597, + "loss_sod": 0.09944939613342285, + "loss_total": 0.7682732343673706, + "step": 37799 + }, + { + "epoch": 0.013598, + "loss_gen": 4.540380001068115, + "loss_rtd": 0.3892989456653595, + "loss_sent": 0.10794064402580261, + "loss_sod": 0.09877896308898926, + "loss_total": 0.5960185527801514, + "step": 37799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.143424391746521, + "learning_rate": 8.326767752820392e-05, + "loss": 0.6694, + "step": 37800 + }, + { + "epoch": 0.013798, + "loss_gen": 3.6130313873291016, + "loss_rtd": 0.3840130567550659, + "loss_sent": 0.0001633672945899889, + "loss_sod": 0.34217336773872375, + "loss_total": 0.7263497710227966, + "step": 37899 + }, + { + "epoch": 0.013798, + "loss_gen": 3.312906265258789, + "loss_rtd": 0.3689209520816803, + "loss_sent": 0.00043438852299004793, + "loss_sod": 0.4225320518016815, + "loss_total": 0.7918874025344849, + "step": 37899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.6495534181594849, + "learning_rate": 8.32439810505072e-05, + "loss": 0.6577, + "step": 37900 + }, + { + "epoch": 0.013998, + "loss_gen": 4.597347736358643, + "loss_rtd": 0.38990819454193115, + "loss_sent": 0.1358875185251236, + "loss_sod": 0.11874350160360336, + "loss_total": 0.6445391774177551, + "step": 37999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.741082668304443, + "loss_rtd": 0.39351511001586914, + "loss_sent": 0.06590181589126587, + "loss_sod": 0.06430280953645706, + "loss_total": 0.5237197279930115, + "step": 37999 + }, + { + "epoch": 0.014, + "grad_norm": 1.036952257156372, + "learning_rate": 8.322027118213888e-05, + "loss": 0.6548, + "step": 38000 + }, + { + "epoch": 0.014, + "eval_loss": 0.6540851593017578, + "eval_runtime": 150.9211, + "eval_samples_per_second": 102.325, + "eval_steps_per_second": 0.802, + "step": 38000 + }, + { + "epoch": 0.014198, + "loss_gen": 4.49874210357666, + "loss_rtd": 0.38914498686790466, + "loss_sent": 0.1910504251718521, + "loss_sod": 0.1836278736591339, + "loss_total": 0.7638232707977295, + "step": 38099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.400293827056885, + "loss_rtd": 0.3808160424232483, + "loss_sent": 0.07757776230573654, + "loss_sod": 0.0072662136517465115, + "loss_total": 0.465660035610199, + "step": 38099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.0191900730133057, + "learning_rate": 8.319654793264932e-05, + "loss": 0.6852, + "step": 38100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.592637538909912, + "loss_rtd": 0.3863178491592407, + "loss_sent": 0.0930735245347023, + "loss_sod": 0.05370680242776871, + "loss_total": 0.5330981612205505, + "step": 38199 + }, + { + "epoch": 0.014398, + "loss_gen": 3.6214210987091064, + "loss_rtd": 0.38091063499450684, + "loss_sent": 0.02442455105483532, + "loss_sod": 0.23057721555233002, + "loss_total": 0.6359124183654785, + "step": 38199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.9923510551452637, + "learning_rate": 8.31728113115942e-05, + "loss": 0.6628, + "step": 38200 + }, + { + "epoch": 0.014598, + "loss_gen": 4.115151405334473, + "loss_rtd": 0.38642704486846924, + "loss_sent": 0.04713524878025055, + "loss_sod": 0.09587834030389786, + "loss_total": 0.5294406414031982, + "step": 38299 + }, + { + "epoch": 0.014598, + "loss_gen": 3.4967713356018066, + "loss_rtd": 0.3726522624492645, + "loss_sent": 0.00012220637290738523, + "loss_sod": 0.4132821559906006, + "loss_total": 0.786056637763977, + "step": 38299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.0016167163848877, + "learning_rate": 8.314906132853466e-05, + "loss": 0.6608, + "step": 38300 + }, + { + "epoch": 0.014798, + "loss_gen": 4.444309711456299, + "loss_rtd": 0.3885973393917084, + "loss_sent": 0.264399915933609, + "loss_sod": 0.03305451199412346, + "loss_total": 0.6860517263412476, + "step": 38399 + }, + { + "epoch": 0.014798, + "loss_gen": 4.453540802001953, + "loss_rtd": 0.3865545988082886, + "loss_sent": 0.13625246286392212, + "loss_sod": 0.14132162928581238, + "loss_total": 0.6641287207603455, + "step": 38399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.8410499095916748, + "learning_rate": 8.312529799303719e-05, + "loss": 0.6666, + "step": 38400 + }, + { + "epoch": 0.014998, + "loss_gen": 4.6401472091674805, + "loss_rtd": 0.3889612853527069, + "loss_sent": 0.1997268944978714, + "loss_sod": 0.055990178138017654, + "loss_total": 0.6446783542633057, + "step": 38499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.636585235595703, + "loss_rtd": 0.3822641968727112, + "loss_sent": 0.27123889327049255, + "loss_sod": 0.02250303141772747, + "loss_total": 0.6760060787200928, + "step": 38499 + }, + { + "epoch": 0.015, + "grad_norm": 1.5303524732589722, + "learning_rate": 8.310152131467364e-05, + "loss": 0.6587, + "step": 38500 + }, + { + "epoch": 0.015198, + "loss_gen": 4.518683910369873, + "loss_rtd": 0.3969024419784546, + "loss_sent": 0.08406098186969757, + "loss_sod": 0.03365109860897064, + "loss_total": 0.5146145224571228, + "step": 38599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.361326217651367, + "loss_rtd": 0.37997597455978394, + "loss_sent": 0.09411624819040298, + "loss_sod": 0.12055913358926773, + "loss_total": 0.5946514010429382, + "step": 38599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.276465654373169, + "learning_rate": 8.307773130302126e-05, + "loss": 0.6576, + "step": 38600 + }, + { + "epoch": 0.015398, + "loss_gen": 4.824479103088379, + "loss_rtd": 0.3833303451538086, + "loss_sent": 0.08850919455289841, + "loss_sod": 0.049159783869981766, + "loss_total": 0.5209993124008179, + "step": 38699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.024838924407959, + "loss_rtd": 0.3834380507469177, + "loss_sent": 0.1682136356830597, + "loss_sod": 0.14718347787857056, + "loss_total": 0.6988351345062256, + "step": 38699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.8864040970802307, + "learning_rate": 8.305392796766266e-05, + "loss": 0.6651, + "step": 38700 + }, + { + "epoch": 0.015598, + "loss_gen": 4.777594089508057, + "loss_rtd": 0.38975656032562256, + "loss_sent": 0.20690903067588806, + "loss_sod": 0.07318736612796783, + "loss_total": 0.6698529720306396, + "step": 38799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.332812309265137, + "loss_rtd": 0.3724307417869568, + "loss_sent": 0.2214450240135193, + "loss_sod": 0.1805974543094635, + "loss_total": 0.774473249912262, + "step": 38799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.9250850677490234, + "learning_rate": 8.303011131818585e-05, + "loss": 0.6602, + "step": 38800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.956061840057373, + "loss_rtd": 0.3984396457672119, + "loss_sent": 0.061900459229946136, + "loss_sod": 0.08385099470615387, + "loss_total": 0.5441910624504089, + "step": 38899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.553220272064209, + "loss_rtd": 0.3981330096721649, + "loss_sent": 0.21911245584487915, + "loss_sod": 0.09834859520196915, + "loss_total": 0.7155940532684326, + "step": 38899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.7907063364982605, + "learning_rate": 8.300628136418415e-05, + "loss": 0.668, + "step": 38900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.462879657745361, + "loss_rtd": 0.399784654378891, + "loss_sent": 0.2433241605758667, + "loss_sod": 0.010670976713299751, + "loss_total": 0.6537798047065735, + "step": 38999 + }, + { + "epoch": 0.015998, + "loss_gen": 4.294644832611084, + "loss_rtd": 0.3938490152359009, + "loss_sent": 0.2199404537677765, + "loss_sod": 0.01899193599820137, + "loss_total": 0.6327813863754272, + "step": 38999 + }, + { + "epoch": 0.016, + "grad_norm": 1.236820936203003, + "learning_rate": 8.298243811525626e-05, + "loss": 0.6774, + "step": 39000 + }, + { + "epoch": 0.016, + "eval_loss": 0.6467982530593872, + "eval_runtime": 151.0106, + "eval_samples_per_second": 102.264, + "eval_steps_per_second": 0.801, + "step": 39000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.289419174194336, + "loss_rtd": 0.39666029810905457, + "loss_sent": 0.11812765151262283, + "loss_sod": 0.01028955727815628, + "loss_total": 0.5250775218009949, + "step": 39099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.909334182739258, + "loss_rtd": 0.40946900844573975, + "loss_sent": 0.1141701266169548, + "loss_sod": 0.1173042356967926, + "loss_total": 0.6409433484077454, + "step": 39099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.7614564299583435, + "learning_rate": 8.295858158100623e-05, + "loss": 0.6598, + "step": 39100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.207708358764648, + "loss_rtd": 0.38203272223472595, + "loss_sent": 0.29075103998184204, + "loss_sod": 0.021450048312544823, + "loss_total": 0.694233775138855, + "step": 39199 + }, + { + "epoch": 0.016398, + "loss_gen": 3.9822723865509033, + "loss_rtd": 0.39657244086265564, + "loss_sent": 0.21307756006717682, + "loss_sod": 0.0807478129863739, + "loss_total": 0.6903977990150452, + "step": 39199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.3364311456680298, + "learning_rate": 8.293471177104349e-05, + "loss": 0.6551, + "step": 39200 + }, + { + "epoch": 0.016598, + "loss_gen": 4.271689414978027, + "loss_rtd": 0.39816737174987793, + "loss_sent": 0.15250374376773834, + "loss_sod": 0.07506150007247925, + "loss_total": 0.6257326006889343, + "step": 39299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.7730560302734375, + "loss_rtd": 0.40210118889808655, + "loss_sent": 0.21555040776729584, + "loss_sod": 0.05588348209857941, + "loss_total": 0.6735351085662842, + "step": 39299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.8487595319747925, + "learning_rate": 8.291082869498277e-05, + "loss": 0.6613, + "step": 39300 + }, + { + "epoch": 0.016798, + "loss_gen": 4.5574421882629395, + "loss_rtd": 0.3984720706939697, + "loss_sent": 0.26848775148391724, + "loss_sod": 0.007420409470796585, + "loss_total": 0.6743802428245544, + "step": 39399 + }, + { + "epoch": 0.016798, + "loss_gen": 4.480246543884277, + "loss_rtd": 0.3950853645801544, + "loss_sent": 0.13288316130638123, + "loss_sod": 0.030220378190279007, + "loss_total": 0.5581889152526855, + "step": 39399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.7920759320259094, + "learning_rate": 8.28869323624442e-05, + "loss": 0.671, + "step": 39400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.565373420715332, + "loss_rtd": 0.37720078229904175, + "loss_sent": 0.35734039545059204, + "loss_sod": 0.032200008630752563, + "loss_total": 0.766741156578064, + "step": 39499 + }, + { + "epoch": 0.016998, + "loss_gen": 4.659275531768799, + "loss_rtd": 0.3958361744880676, + "loss_sent": 0.09898357838392258, + "loss_sod": 0.05647943168878555, + "loss_total": 0.5512991547584534, + "step": 39499 + }, + { + "epoch": 0.017, + "grad_norm": 0.8385859727859497, + "learning_rate": 8.28630227830532e-05, + "loss": 0.6656, + "step": 39500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.545830249786377, + "loss_rtd": 0.3760372996330261, + "loss_sent": 0.3367275595664978, + "loss_sod": 0.11878657341003418, + "loss_total": 0.8315514326095581, + "step": 39599 + }, + { + "epoch": 0.017198, + "loss_gen": 4.430626392364502, + "loss_rtd": 0.3921814560890198, + "loss_sent": 0.17195403575897217, + "loss_sod": 0.019803347066044807, + "loss_total": 0.5839388370513916, + "step": 39599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.8410652875900269, + "learning_rate": 8.283909996644057e-05, + "loss": 0.6566, + "step": 39600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.776976585388184, + "loss_rtd": 0.3909640312194824, + "loss_sent": 0.20697557926177979, + "loss_sod": 0.04605009779334068, + "loss_total": 0.6439896821975708, + "step": 39699 + }, + { + "epoch": 0.017398, + "loss_gen": 4.416382789611816, + "loss_rtd": 0.3838307857513428, + "loss_sent": 0.2624322772026062, + "loss_sod": 0.012035838328301907, + "loss_total": 0.658298909664154, + "step": 39699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.9887232780456543, + "learning_rate": 8.281516392224238e-05, + "loss": 0.6769, + "step": 39700 + }, + { + "epoch": 0.017598, + "loss_gen": 4.665676116943359, + "loss_rtd": 0.3965701758861542, + "loss_sent": 0.34119102358818054, + "loss_sod": 0.06708301603794098, + "loss_total": 0.8048442602157593, + "step": 39799 + }, + { + "epoch": 0.017598, + "loss_gen": 4.204485893249512, + "loss_rtd": 0.39091309905052185, + "loss_sent": 0.21135586500167847, + "loss_sod": 0.07284900546073914, + "loss_total": 0.6751179695129395, + "step": 39799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.4561817646026611, + "learning_rate": 8.279121466010011e-05, + "loss": 0.6674, + "step": 39800 + }, + { + "epoch": 0.017798, + "loss_gen": 4.904703140258789, + "loss_rtd": 0.37332049012184143, + "loss_sent": 0.4987711012363434, + "loss_sod": 0.03765245899558067, + "loss_total": 0.9097440242767334, + "step": 39899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.342363357543945, + "loss_rtd": 0.37406766414642334, + "loss_sent": 0.12862245738506317, + "loss_sod": 0.07009097933769226, + "loss_total": 0.5727810859680176, + "step": 39899 + }, + { + "epoch": 0.0178, + "grad_norm": 2.1194393634796143, + "learning_rate": 8.276725218966049e-05, + "loss": 0.6456, + "step": 39900 + }, + { + "epoch": 0.017998, + "loss_gen": 4.648935317993164, + "loss_rtd": 0.383938729763031, + "loss_sent": 0.08299577981233597, + "loss_sod": 0.019381307065486908, + "loss_total": 0.4863158166408539, + "step": 39999 + }, + { + "epoch": 0.017998, + "loss_gen": 4.655354976654053, + "loss_rtd": 0.3713081181049347, + "loss_sent": 0.11958049982786179, + "loss_sod": 0.04544627666473389, + "loss_total": 0.5363348722457886, + "step": 39999 + }, + { + "epoch": 0.018, + "grad_norm": 0.8464807868003845, + "learning_rate": 8.274327652057558e-05, + "loss": 0.6587, + "step": 40000 + }, + { + "epoch": 0.018, + "eval_loss": 0.6380000114440918, + "eval_runtime": 150.7999, + "eval_samples_per_second": 102.407, + "eval_steps_per_second": 0.802, + "step": 40000 + }, + { + "epoch": 0.018198, + "loss_gen": 4.573928356170654, + "loss_rtd": 0.4032638370990753, + "loss_sent": 0.3652266263961792, + "loss_sod": 0.047281235456466675, + "loss_total": 0.8157716989517212, + "step": 40099 + }, + { + "epoch": 0.018198, + "loss_gen": 4.420799732208252, + "loss_rtd": 0.40008658170700073, + "loss_sent": 0.08159566670656204, + "loss_sod": 0.04224081709980965, + "loss_total": 0.5239230990409851, + "step": 40099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.7178969383239746, + "learning_rate": 8.271928766250283e-05, + "loss": 0.6505, + "step": 40100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.2330427169799805, + "loss_rtd": 0.38497084379196167, + "loss_sent": 0.11211797595024109, + "loss_sod": 0.18959033489227295, + "loss_total": 0.6866791248321533, + "step": 40199 + }, + { + "epoch": 0.018398, + "loss_gen": 4.203384876251221, + "loss_rtd": 0.3897349238395691, + "loss_sent": 0.002613413380458951, + "loss_sod": 0.2865219712257385, + "loss_total": 0.6788703203201294, + "step": 40199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.4291338920593262, + "learning_rate": 8.269528562510493e-05, + "loss": 0.6542, + "step": 40200 + }, + { + "epoch": 0.018598, + "loss_gen": 3.9043827056884766, + "loss_rtd": 0.392133891582489, + "loss_sent": 6.662686064373702e-05, + "loss_sod": 0.48413851857185364, + "loss_total": 0.8763390779495239, + "step": 40299 + }, + { + "epoch": 0.018598, + "loss_gen": 3.9214248657226562, + "loss_rtd": 0.3792304992675781, + "loss_sent": 0.00023043343389872462, + "loss_sod": 0.20601791143417358, + "loss_total": 0.5854788422584534, + "step": 40299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.0580034255981445, + "learning_rate": 8.267127041804987e-05, + "loss": 0.6687, + "step": 40300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.402094841003418, + "loss_rtd": 0.3716152012348175, + "loss_sent": 0.32779261469841003, + "loss_sod": 0.07380812615156174, + "loss_total": 0.7732159495353699, + "step": 40399 + }, + { + "epoch": 0.018798, + "loss_gen": 4.562647342681885, + "loss_rtd": 0.37056246399879456, + "loss_sent": 0.06721348315477371, + "loss_sod": 0.06720122694969177, + "loss_total": 0.5049771666526794, + "step": 40399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.3488694429397583, + "learning_rate": 8.2647242051011e-05, + "loss": 0.6542, + "step": 40400 + }, + { + "epoch": 0.018998, + "loss_gen": 3.678180694580078, + "loss_rtd": 0.3809404671192169, + "loss_sent": 8.662456821184605e-05, + "loss_sod": 0.22844916582107544, + "loss_total": 0.6094762682914734, + "step": 40499 + }, + { + "epoch": 0.018998, + "loss_gen": 3.860581159591675, + "loss_rtd": 0.3823385536670685, + "loss_sent": 0.0644868016242981, + "loss_sod": 0.05698993802070618, + "loss_total": 0.5038152933120728, + "step": 40499 + }, + { + "epoch": 0.019, + "grad_norm": 1.0200388431549072, + "learning_rate": 8.262320053366693e-05, + "loss": 0.6568, + "step": 40500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.615798473358154, + "loss_rtd": 0.3954371213912964, + "loss_sent": 0.3814672529697418, + "loss_sod": 0.09009584039449692, + "loss_total": 0.8670002222061157, + "step": 40599 + }, + { + "epoch": 0.019198, + "loss_gen": 3.917062997817993, + "loss_rtd": 0.3857502341270447, + "loss_sent": 0.016199423000216484, + "loss_sod": 0.18647600710391998, + "loss_total": 0.5884256362915039, + "step": 40599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.7290745973587036, + "learning_rate": 8.259914587570159e-05, + "loss": 0.6559, + "step": 40600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.480384349822998, + "loss_rtd": 0.39397501945495605, + "loss_sent": 0.3567149043083191, + "loss_sod": 0.012110976502299309, + "loss_total": 0.762800931930542, + "step": 40699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.576913833618164, + "loss_rtd": 0.38175448775291443, + "loss_sent": 0.13982127606868744, + "loss_sod": 0.07496733218431473, + "loss_total": 0.5965430736541748, + "step": 40699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.3119646310806274, + "learning_rate": 8.257507808680421e-05, + "loss": 0.664, + "step": 40700 + }, + { + "epoch": 0.019598, + "loss_gen": 4.674173355102539, + "loss_rtd": 0.3807498514652252, + "loss_sent": 0.2061619758605957, + "loss_sod": 0.10963135212659836, + "loss_total": 0.6965432167053223, + "step": 40799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.5892133712768555, + "loss_rtd": 0.41577914357185364, + "loss_sent": 0.20196275413036346, + "loss_sod": 0.04171139746904373, + "loss_total": 0.659453272819519, + "step": 40799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.5848276615142822, + "learning_rate": 8.255099717666923e-05, + "loss": 0.6517, + "step": 40800 + }, + { + "epoch": 0.019798, + "loss_gen": 4.5105791091918945, + "loss_rtd": 0.3716451823711395, + "loss_sent": 0.051084186881780624, + "loss_sod": 0.11713321506977081, + "loss_total": 0.5398625731468201, + "step": 40899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.010336399078369, + "loss_rtd": 0.3702070415019989, + "loss_sent": 0.003787236986681819, + "loss_sod": 0.3948768377304077, + "loss_total": 0.7688711285591125, + "step": 40899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.2466219663619995, + "learning_rate": 8.252690315499651e-05, + "loss": 0.6687, + "step": 40900 + }, + { + "epoch": 0.019998, + "loss_gen": 4.213362216949463, + "loss_rtd": 0.38417360186576843, + "loss_sent": 0.13808989524841309, + "loss_sod": 0.06279731541872025, + "loss_total": 0.5850608348846436, + "step": 40999 + }, + { + "epoch": 0.019998, + "loss_gen": 3.9070096015930176, + "loss_rtd": 0.40882429480552673, + "loss_sent": 0.003071530256420374, + "loss_sod": 0.19416505098342896, + "loss_total": 0.6060608625411987, + "step": 40999 + }, + { + "epoch": 0.02, + "grad_norm": 1.2292420864105225, + "learning_rate": 8.25027960314911e-05, + "loss": 0.6623, + "step": 41000 + }, + { + "epoch": 0.02, + "eval_loss": 0.6412881016731262, + "eval_runtime": 151.2282, + "eval_samples_per_second": 102.117, + "eval_steps_per_second": 0.8, + "step": 41000 + }, + { + "epoch": 0.020198, + "loss_gen": 3.7225356101989746, + "loss_rtd": 0.38106489181518555, + "loss_sent": 0.00014135816309135407, + "loss_sod": 0.3407840132713318, + "loss_total": 0.7219902873039246, + "step": 41099 + }, + { + "epoch": 0.020198, + "loss_gen": 4.466882228851318, + "loss_rtd": 0.3669414222240448, + "loss_sent": 0.11593437939882278, + "loss_sod": 0.1539105772972107, + "loss_total": 0.6367863416671753, + "step": 41099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.1810585260391235, + "learning_rate": 8.24786758158633e-05, + "loss": 0.669, + "step": 41100 + }, + { + "epoch": 0.020398, + "loss_gen": 3.9555904865264893, + "loss_rtd": 0.38175278902053833, + "loss_sent": 0.04201050475239754, + "loss_sod": 0.16164356470108032, + "loss_total": 0.5854068398475647, + "step": 41199 + }, + { + "epoch": 0.020398, + "loss_gen": 4.237810134887695, + "loss_rtd": 0.3647710978984833, + "loss_sent": 0.2045086920261383, + "loss_sod": 0.27279776334762573, + "loss_total": 0.8420774936676025, + "step": 41199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.3842684030532837, + "learning_rate": 8.245454251782878e-05, + "loss": 0.6532, + "step": 41200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.629668712615967, + "loss_rtd": 0.3939113914966583, + "loss_sent": 0.08818431198596954, + "loss_sod": 0.09862308204174042, + "loss_total": 0.5807187557220459, + "step": 41299 + }, + { + "epoch": 0.020598, + "loss_gen": 4.260357856750488, + "loss_rtd": 0.3583470582962036, + "loss_sent": 0.1607215255498886, + "loss_sod": 0.021704211831092834, + "loss_total": 0.5407727956771851, + "step": 41299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.976425290107727, + "learning_rate": 8.243039614710844e-05, + "loss": 0.6492, + "step": 41300 + }, + { + "epoch": 0.020798, + "loss_gen": 4.663462162017822, + "loss_rtd": 0.3940715193748474, + "loss_sent": 0.1713400036096573, + "loss_sod": 0.0544615238904953, + "loss_total": 0.619873046875, + "step": 41399 + }, + { + "epoch": 0.020798, + "loss_gen": 4.41485071182251, + "loss_rtd": 0.3642936050891876, + "loss_sent": 0.14602665603160858, + "loss_sod": 0.015467479825019836, + "loss_total": 0.5257877707481384, + "step": 41399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.2612237930297852, + "learning_rate": 8.240623671342837e-05, + "loss": 0.6541, + "step": 41400 + }, + { + "epoch": 0.020998, + "loss_gen": 4.343321800231934, + "loss_rtd": 0.3765054941177368, + "loss_sent": 0.22087278962135315, + "loss_sod": 0.016782084479928017, + "loss_total": 0.6141603589057922, + "step": 41499 + }, + { + "epoch": 0.020998, + "loss_gen": 4.469653129577637, + "loss_rtd": 0.3886878490447998, + "loss_sent": 0.02628425508737564, + "loss_sod": 0.18656085431575775, + "loss_total": 0.6015329957008362, + "step": 41499 + }, + { + "epoch": 0.021, + "grad_norm": 1.2931349277496338, + "learning_rate": 8.238206422652006e-05, + "loss": 0.6537, + "step": 41500 + }, + { + "epoch": 0.021198, + "loss_gen": 4.51466178894043, + "loss_rtd": 0.36613962054252625, + "loss_sent": 0.14363856613636017, + "loss_sod": 0.17767947912216187, + "loss_total": 0.6874576210975647, + "step": 41599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.485960006713867, + "loss_rtd": 0.3810034394264221, + "loss_sent": 0.1728469878435135, + "loss_sod": 0.08899247646331787, + "loss_total": 0.6428428888320923, + "step": 41599 + }, + { + "epoch": 0.0212, + "grad_norm": 2.514805793762207, + "learning_rate": 8.235787869612012e-05, + "loss": 0.6714, + "step": 41600 + }, + { + "epoch": 0.021398, + "loss_gen": 4.567913055419922, + "loss_rtd": 0.3841160237789154, + "loss_sent": 0.1377597600221634, + "loss_sod": 0.05175473913550377, + "loss_total": 0.5736305117607117, + "step": 41699 + }, + { + "epoch": 0.021398, + "loss_gen": 4.057435512542725, + "loss_rtd": 0.3718874156475067, + "loss_sent": 0.04041086509823799, + "loss_sod": 0.15216533839702606, + "loss_total": 0.5644636154174805, + "step": 41699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.4682799577713013, + "learning_rate": 8.233368013197053e-05, + "loss": 0.6523, + "step": 41700 + }, + { + "epoch": 0.021598, + "loss_gen": 3.9765806198120117, + "loss_rtd": 0.39956530928611755, + "loss_sent": 0.029685061424970627, + "loss_sod": 0.19594547152519226, + "loss_total": 0.6251958608627319, + "step": 41799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.426865577697754, + "loss_rtd": 0.37698954343795776, + "loss_sent": 0.18985331058502197, + "loss_sod": 0.0343150869011879, + "loss_total": 0.6011579036712646, + "step": 41799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.8145066499710083, + "learning_rate": 8.230946854381846e-05, + "loss": 0.6501, + "step": 41800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.632156848907471, + "loss_rtd": 0.3796764314174652, + "loss_sent": 0.07298461347818375, + "loss_sod": 0.12913213670253754, + "loss_total": 0.5817931890487671, + "step": 41899 + }, + { + "epoch": 0.021798, + "loss_gen": 4.973066806793213, + "loss_rtd": 0.40505293011665344, + "loss_sent": 0.15712273120880127, + "loss_sod": 0.07550527155399323, + "loss_total": 0.6376809477806091, + "step": 41899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.2040433883666992, + "learning_rate": 8.22852439414163e-05, + "loss": 0.6691, + "step": 41900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.504978179931641, + "loss_rtd": 0.38156020641326904, + "loss_sent": 0.26390254497528076, + "loss_sod": 0.09385176002979279, + "loss_total": 0.7393144965171814, + "step": 41999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.452895164489746, + "loss_rtd": 0.4032492935657501, + "loss_sent": 0.2142591029405594, + "loss_sod": 0.024533722549676895, + "loss_total": 0.6420421004295349, + "step": 41999 + }, + { + "epoch": 0.022, + "grad_norm": 1.5839908123016357, + "learning_rate": 8.226100633452176e-05, + "loss": 0.6579, + "step": 42000 + }, + { + "epoch": 0.022, + "eval_loss": 0.6314432621002197, + "eval_runtime": 150.9027, + "eval_samples_per_second": 102.337, + "eval_steps_per_second": 0.802, + "step": 42000 + }, + { + "epoch": 0.022198, + "loss_gen": 4.108220100402832, + "loss_rtd": 0.3855191469192505, + "loss_sent": 0.03595998138189316, + "loss_sod": 0.04322711378335953, + "loss_total": 0.4647062420845032, + "step": 42099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.688653469085693, + "loss_rtd": 0.37979236245155334, + "loss_sent": 0.11569111794233322, + "loss_sod": 0.13051004707813263, + "loss_total": 0.625993549823761, + "step": 42099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.9881030917167664, + "learning_rate": 8.223675573289773e-05, + "loss": 0.6565, + "step": 42100 + }, + { + "epoch": 0.022398, + "loss_gen": 3.745572566986084, + "loss_rtd": 0.36591315269470215, + "loss_sent": 5.752767174271867e-05, + "loss_sod": 0.41352346539497375, + "loss_total": 0.7794941067695618, + "step": 42199 + }, + { + "epoch": 0.022398, + "loss_gen": 4.244234085083008, + "loss_rtd": 0.3592204451560974, + "loss_sent": 0.06797172129154205, + "loss_sod": 0.10003990679979324, + "loss_total": 0.5272320508956909, + "step": 42199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.06648850440979, + "learning_rate": 8.221249214631233e-05, + "loss": 0.6539, + "step": 42200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.342432975769043, + "loss_rtd": 0.4121549427509308, + "loss_sent": 0.35916224122047424, + "loss_sod": 0.1422906517982483, + "loss_total": 0.9136078357696533, + "step": 42299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.415210723876953, + "loss_rtd": 0.3874806761741638, + "loss_sent": 0.31633779406547546, + "loss_sod": 0.11523738503456116, + "loss_total": 0.8190559148788452, + "step": 42299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.351127028465271, + "learning_rate": 8.218821558453896e-05, + "loss": 0.6481, + "step": 42300 + }, + { + "epoch": 0.022798, + "loss_gen": 4.327655792236328, + "loss_rtd": 0.36464032530784607, + "loss_sent": 0.022839125245809555, + "loss_sod": 0.2916712462902069, + "loss_total": 0.6791507005691528, + "step": 42399 + }, + { + "epoch": 0.022798, + "loss_gen": 4.3175883293151855, + "loss_rtd": 0.36989280581474304, + "loss_sent": 0.000921736063901335, + "loss_sod": 0.2789615988731384, + "loss_total": 0.6497761011123657, + "step": 42399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.007271647453308, + "learning_rate": 8.216392605735618e-05, + "loss": 0.6694, + "step": 42400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.200122356414795, + "loss_rtd": 0.3771202266216278, + "loss_sent": 0.30438563227653503, + "loss_sod": 0.11443760991096497, + "loss_total": 0.7959434986114502, + "step": 42499 + }, + { + "epoch": 0.022998, + "loss_gen": 4.341174125671387, + "loss_rtd": 0.3801506757736206, + "loss_sent": 0.10273100435733795, + "loss_sod": 0.053897298872470856, + "loss_total": 0.53677898645401, + "step": 42499 + }, + { + "epoch": 0.023, + "grad_norm": 1.900464415550232, + "learning_rate": 8.213962357454785e-05, + "loss": 0.6531, + "step": 42500 + }, + { + "epoch": 0.023198, + "loss_gen": 4.481957912445068, + "loss_rtd": 0.35730868577957153, + "loss_sent": 0.42418715357780457, + "loss_sod": 0.03608470782637596, + "loss_total": 0.8175805807113647, + "step": 42599 + }, + { + "epoch": 0.023198, + "loss_gen": 4.262242317199707, + "loss_rtd": 0.3994123935699463, + "loss_sent": 0.0772051140666008, + "loss_sod": 0.09249553084373474, + "loss_total": 0.56911301612854, + "step": 42599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.4650466442108154, + "learning_rate": 8.211530814590298e-05, + "loss": 0.6535, + "step": 42600 + }, + { + "epoch": 0.023398, + "loss_gen": 4.358861446380615, + "loss_rtd": 0.3849829137325287, + "loss_sent": 0.20576530694961548, + "loss_sod": 0.011316204443573952, + "loss_total": 0.6020644307136536, + "step": 42699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.429322719573975, + "loss_rtd": 0.3777335584163666, + "loss_sent": 0.1580859273672104, + "loss_sod": 0.01792304590344429, + "loss_total": 0.553742527961731, + "step": 42699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8418339490890503, + "learning_rate": 8.209097978121583e-05, + "loss": 0.6579, + "step": 42700 + }, + { + "epoch": 0.023598, + "loss_gen": 3.279528856277466, + "loss_rtd": 0.3709106743335724, + "loss_sent": 0.00022582203382626176, + "loss_sod": 0.20592719316482544, + "loss_total": 0.5770637392997742, + "step": 42799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.744197845458984, + "loss_rtd": 0.37123891711235046, + "loss_sent": 0.10743427276611328, + "loss_sod": 0.04356180876493454, + "loss_total": 0.5222350358963013, + "step": 42799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.0691636800765991, + "learning_rate": 8.206663849028587e-05, + "loss": 0.648, + "step": 42800 + }, + { + "epoch": 0.023798, + "loss_gen": 3.5588996410369873, + "loss_rtd": 0.37456318736076355, + "loss_sent": 5.979636625852436e-05, + "loss_sod": 0.2636725902557373, + "loss_total": 0.638295590877533, + "step": 42899 + }, + { + "epoch": 0.023798, + "loss_gen": 3.632612705230713, + "loss_rtd": 0.38829219341278076, + "loss_sent": 0.0013291510986164212, + "loss_sod": 0.27269303798675537, + "loss_total": 0.6623143553733826, + "step": 42899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.4013932943344116, + "learning_rate": 8.204228428291775e-05, + "loss": 0.6574, + "step": 42900 + }, + { + "epoch": 0.023998, + "loss_gen": 4.848650932312012, + "loss_rtd": 0.373313844203949, + "loss_sent": 0.17515422403812408, + "loss_sod": 0.04122958332300186, + "loss_total": 0.5896976590156555, + "step": 42999 + }, + { + "epoch": 0.023998, + "loss_gen": 4.167661666870117, + "loss_rtd": 0.3895135521888733, + "loss_sent": 0.006315239239484072, + "loss_sod": 0.15132783353328705, + "loss_total": 0.5471566319465637, + "step": 42999 + }, + { + "epoch": 0.024, + "grad_norm": 0.897146999835968, + "learning_rate": 8.201791716892136e-05, + "loss": 0.6511, + "step": 43000 + }, + { + "epoch": 0.024, + "eval_loss": 0.6402202844619751, + "eval_runtime": 151.004, + "eval_samples_per_second": 102.269, + "eval_steps_per_second": 0.801, + "step": 43000 + }, + { + "epoch": 0.024198, + "loss_gen": 4.613114833831787, + "loss_rtd": 0.36502689123153687, + "loss_sent": 0.1664440780878067, + "loss_sod": 0.020670989528298378, + "loss_total": 0.5521419644355774, + "step": 43099 + }, + { + "epoch": 0.024198, + "loss_gen": 4.534543991088867, + "loss_rtd": 0.38365063071250916, + "loss_sent": 0.31674572825431824, + "loss_sod": 0.04986383020877838, + "loss_total": 0.7502602338790894, + "step": 43099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.2234196662902832, + "learning_rate": 8.199353715811176e-05, + "loss": 0.6472, + "step": 43100 + }, + { + "epoch": 0.024398, + "loss_gen": 4.381618499755859, + "loss_rtd": 0.38506895303726196, + "loss_sent": 0.2596307694911957, + "loss_sod": 0.010949251241981983, + "loss_total": 0.655648946762085, + "step": 43199 + }, + { + "epoch": 0.024398, + "loss_gen": 4.481594085693359, + "loss_rtd": 0.38360145688056946, + "loss_sent": 0.32707130908966064, + "loss_sod": 0.013929128646850586, + "loss_total": 0.7246018648147583, + "step": 43199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.4966225624084473, + "learning_rate": 8.196914426030921e-05, + "loss": 0.6386, + "step": 43200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.606259346008301, + "loss_rtd": 0.3694329857826233, + "loss_sent": 0.21232984960079193, + "loss_sod": 0.05285738408565521, + "loss_total": 0.6346202492713928, + "step": 43299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.581188201904297, + "loss_rtd": 0.3795831799507141, + "loss_sent": 0.203518807888031, + "loss_sod": 0.26025161147117615, + "loss_total": 0.8433535695075989, + "step": 43299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.1756949424743652, + "learning_rate": 8.194473848533919e-05, + "loss": 0.659, + "step": 43300 + }, + { + "epoch": 0.024798, + "loss_gen": 4.864691734313965, + "loss_rtd": 0.3677074909210205, + "loss_sent": 0.15385890007019043, + "loss_sod": 0.04495445638895035, + "loss_total": 0.5665208101272583, + "step": 43399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.599943161010742, + "loss_rtd": 0.3858634829521179, + "loss_sent": 0.11128430813550949, + "loss_sod": 0.1456134021282196, + "loss_total": 0.6427611708641052, + "step": 43399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.0346415042877197, + "learning_rate": 8.192031984303232e-05, + "loss": 0.6602, + "step": 43400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.85927677154541, + "loss_rtd": 0.377153217792511, + "loss_sent": 0.12804071605205536, + "loss_sod": 0.028983604162931442, + "loss_total": 0.5341775417327881, + "step": 43499 + }, + { + "epoch": 0.024998, + "loss_gen": 4.69577693939209, + "loss_rtd": 0.40115371346473694, + "loss_sent": 0.08152621239423752, + "loss_sod": 0.19556081295013428, + "loss_total": 0.6782407164573669, + "step": 43499 + }, + { + "epoch": 0.025, + "grad_norm": 1.173714518547058, + "learning_rate": 8.189588834322444e-05, + "loss": 0.6385, + "step": 43500 + }, + { + "epoch": 0.025198, + "loss_gen": 4.078179836273193, + "loss_rtd": 0.40786659717559814, + "loss_sent": 6.961300823604688e-05, + "loss_sod": 0.2843131422996521, + "loss_total": 0.6922493577003479, + "step": 43599 + }, + { + "epoch": 0.025198, + "loss_gen": 3.5158584117889404, + "loss_rtd": 0.37952902913093567, + "loss_sent": 0.00015645030362065881, + "loss_sod": 0.2368762046098709, + "loss_total": 0.6165617108345032, + "step": 43599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.9594609141349792, + "learning_rate": 8.187144399575655e-05, + "loss": 0.6584, + "step": 43600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.753521919250488, + "loss_rtd": 0.3814672529697418, + "loss_sent": 0.387495756149292, + "loss_sod": 0.028873810544610023, + "loss_total": 0.7978367805480957, + "step": 43699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.257978439331055, + "loss_rtd": 0.4045831561088562, + "loss_sent": 0.20893022418022156, + "loss_sod": 0.03061862289905548, + "loss_total": 0.6441320180892944, + "step": 43699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.4797601699829102, + "learning_rate": 8.184698681047482e-05, + "loss": 0.6522, + "step": 43700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.600385665893555, + "loss_rtd": 0.384109228849411, + "loss_sent": 0.19618237018585205, + "loss_sod": 0.04927710443735123, + "loss_total": 0.6295686960220337, + "step": 43799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.504488945007324, + "loss_rtd": 0.3831881582736969, + "loss_sent": 0.40422946214675903, + "loss_sod": 0.07176312804222107, + "loss_total": 0.859180748462677, + "step": 43799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.76749849319458, + "learning_rate": 8.182251679723061e-05, + "loss": 0.6624, + "step": 43800 + }, + { + "epoch": 0.025798, + "loss_gen": 3.675482988357544, + "loss_rtd": 0.37020596861839294, + "loss_sent": 0.0027885735034942627, + "loss_sod": 0.20921216905117035, + "loss_total": 0.5822067260742188, + "step": 43899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.866572380065918, + "loss_rtd": 0.37793922424316406, + "loss_sent": 0.0779816284775734, + "loss_sod": 0.036181047558784485, + "loss_total": 0.49210187792778015, + "step": 43899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.9174070358276367, + "learning_rate": 8.179803396588045e-05, + "loss": 0.6474, + "step": 43900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.741182327270508, + "loss_rtd": 0.38066208362579346, + "loss_sent": 0.13676688075065613, + "loss_sod": 0.039525195956230164, + "loss_total": 0.5569541454315186, + "step": 43999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.582489490509033, + "loss_rtd": 0.3616386950016022, + "loss_sent": 0.15300214290618896, + "loss_sod": 0.029031649231910706, + "loss_total": 0.543672502040863, + "step": 43999 + }, + { + "epoch": 0.026, + "grad_norm": 0.9032507538795471, + "learning_rate": 8.177353832628602e-05, + "loss": 0.6548, + "step": 44000 + }, + { + "epoch": 0.026, + "eval_loss": 0.6352246999740601, + "eval_runtime": 150.8784, + "eval_samples_per_second": 102.354, + "eval_steps_per_second": 0.802, + "step": 44000 + }, + { + "epoch": 0.026198, + "loss_gen": 3.75815749168396, + "loss_rtd": 0.3719606399536133, + "loss_sent": 0.004499775357544422, + "loss_sod": 0.23574712872505188, + "loss_total": 0.6122075319290161, + "step": 44099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.299135684967041, + "loss_rtd": 0.3848499059677124, + "loss_sent": 0.11957715451717377, + "loss_sod": 0.06262027472257614, + "loss_total": 0.5670473575592041, + "step": 44099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.8012509942054749, + "learning_rate": 8.174902988831413e-05, + "loss": 0.6529, + "step": 44100 + }, + { + "epoch": 0.026398, + "loss_gen": 4.834580898284912, + "loss_rtd": 0.37917640805244446, + "loss_sent": 0.1776326447725296, + "loss_sod": 0.06561632454395294, + "loss_total": 0.622425377368927, + "step": 44199 + }, + { + "epoch": 0.026398, + "loss_gen": 4.1323957443237305, + "loss_rtd": 0.38789719343185425, + "loss_sent": 0.22935126721858978, + "loss_sod": 0.009114162065088749, + "loss_total": 0.6263626217842102, + "step": 44199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.483168125152588, + "learning_rate": 8.17245086618368e-05, + "loss": 0.6529, + "step": 44200 + }, + { + "epoch": 0.026598, + "loss_gen": 4.796681880950928, + "loss_rtd": 0.38072118163108826, + "loss_sent": 0.11894837021827698, + "loss_sod": 0.10300540924072266, + "loss_total": 0.6026749610900879, + "step": 44299 + }, + { + "epoch": 0.026598, + "loss_gen": 4.623732566833496, + "loss_rtd": 0.3622994124889374, + "loss_sent": 0.13840840756893158, + "loss_sod": 0.021766219288110733, + "loss_total": 0.5224740505218506, + "step": 44299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.7771408557891846, + "learning_rate": 8.16999746567312e-05, + "loss": 0.6443, + "step": 44300 + }, + { + "epoch": 0.026798, + "loss_gen": 3.522340774536133, + "loss_rtd": 0.36471617221832275, + "loss_sent": 0.0024779404047876596, + "loss_sod": 0.11392658203840256, + "loss_total": 0.4811207056045532, + "step": 44399 + }, + { + "epoch": 0.026798, + "loss_gen": 4.686045169830322, + "loss_rtd": 0.3902490735054016, + "loss_sent": 0.16535663604736328, + "loss_sod": 0.022858766838908195, + "loss_total": 0.5784645080566406, + "step": 44399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.6933805346488953, + "learning_rate": 8.167542788287958e-05, + "loss": 0.6555, + "step": 44400 + }, + { + "epoch": 0.026998, + "loss_gen": 4.474067687988281, + "loss_rtd": 0.3878956139087677, + "loss_sent": 0.10972359776496887, + "loss_sod": 0.04626302421092987, + "loss_total": 0.5438822507858276, + "step": 44499 + }, + { + "epoch": 0.026998, + "loss_gen": 4.703509330749512, + "loss_rtd": 0.39577972888946533, + "loss_sent": 0.22541356086730957, + "loss_sod": 0.03512066975235939, + "loss_total": 0.656313955783844, + "step": 44499 + }, + { + "epoch": 0.027, + "grad_norm": 0.9493730068206787, + "learning_rate": 8.165086835016939e-05, + "loss": 0.6493, + "step": 44500 + }, + { + "epoch": 0.027198, + "loss_gen": 4.679075241088867, + "loss_rtd": 0.37580394744873047, + "loss_sent": 0.47782081365585327, + "loss_sod": 0.08848084509372711, + "loss_total": 0.9421055912971497, + "step": 44599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.700301170349121, + "loss_rtd": 0.38203302025794983, + "loss_sent": 0.18475648760795593, + "loss_sod": 0.0023707542568445206, + "loss_total": 0.5691602230072021, + "step": 44599 + }, + { + "epoch": 0.0272, + "grad_norm": 2.114959716796875, + "learning_rate": 8.162629606849323e-05, + "loss": 0.6635, + "step": 44600 + }, + { + "epoch": 0.027398, + "loss_gen": 3.892876386642456, + "loss_rtd": 0.3716607689857483, + "loss_sent": 0.015241903252899647, + "loss_sod": 0.16945964097976685, + "loss_total": 0.5563623309135437, + "step": 44699 + }, + { + "epoch": 0.027398, + "loss_gen": 4.450325012207031, + "loss_rtd": 0.373519629240036, + "loss_sent": 0.3957299590110779, + "loss_sod": 0.04284271225333214, + "loss_total": 0.8120923042297363, + "step": 44699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.280011534690857, + "learning_rate": 8.160171104774879e-05, + "loss": 0.6662, + "step": 44700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.850058555603027, + "loss_rtd": 0.3819859027862549, + "loss_sent": 0.14247000217437744, + "loss_sod": 0.07389207929372787, + "loss_total": 0.5983480215072632, + "step": 44799 + }, + { + "epoch": 0.027598, + "loss_gen": 4.781891345977783, + "loss_rtd": 0.38648825883865356, + "loss_sent": 0.34172558784484863, + "loss_sod": 0.09410925209522247, + "loss_total": 0.8223230838775635, + "step": 44799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.891017735004425, + "learning_rate": 8.15771132978389e-05, + "loss": 0.6625, + "step": 44800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.8174285888671875, + "loss_rtd": 0.3941290080547333, + "loss_sent": 0.5728538632392883, + "loss_sod": 0.042764414101839066, + "loss_total": 1.0097472667694092, + "step": 44899 + }, + { + "epoch": 0.027798, + "loss_gen": 3.689896821975708, + "loss_rtd": 0.3721725344657898, + "loss_sent": 0.0008906761649996042, + "loss_sod": 0.18504759669303894, + "loss_total": 0.558110773563385, + "step": 44899 + }, + { + "epoch": 0.0278, + "grad_norm": 2.401081085205078, + "learning_rate": 8.155250282867157e-05, + "loss": 0.6667, + "step": 44900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.586578369140625, + "loss_rtd": 0.38676008582115173, + "loss_sent": 0.2055395245552063, + "loss_sod": 0.032622065395116806, + "loss_total": 0.6249216794967651, + "step": 44999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.673305511474609, + "loss_rtd": 0.3887365758419037, + "loss_sent": 0.28746530413627625, + "loss_sod": 0.025677867233753204, + "loss_total": 0.7018797397613525, + "step": 44999 + }, + { + "epoch": 0.028, + "grad_norm": 1.0690522193908691, + "learning_rate": 8.152787965015988e-05, + "loss": 0.6643, + "step": 45000 + }, + { + "epoch": 0.028, + "eval_loss": 0.6301302313804626, + "eval_runtime": 150.9721, + "eval_samples_per_second": 102.29, + "eval_steps_per_second": 0.801, + "step": 45000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.572504997253418, + "loss_rtd": 0.3817649483680725, + "loss_sent": 0.13866949081420898, + "loss_sod": 0.08782115578651428, + "loss_total": 0.6082556247711182, + "step": 45099 + }, + { + "epoch": 0.028198, + "loss_gen": 4.704024791717529, + "loss_rtd": 0.3800975978374481, + "loss_sent": 0.11726216971874237, + "loss_sod": 0.06253600120544434, + "loss_total": 0.5598957538604736, + "step": 45099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.9835987091064453, + "learning_rate": 8.150324377222202e-05, + "loss": 0.6621, + "step": 45100 + }, + { + "epoch": 0.028398, + "loss_gen": 4.337993144989014, + "loss_rtd": 0.37821826338768005, + "loss_sent": 0.17172084748744965, + "loss_sod": 0.012256279587745667, + "loss_total": 0.5621954202651978, + "step": 45199 + }, + { + "epoch": 0.028398, + "loss_gen": 4.574587821960449, + "loss_rtd": 0.3861466944217682, + "loss_sent": 0.16793416440486908, + "loss_sod": 0.029096323996782303, + "loss_total": 0.5831772089004517, + "step": 45199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.1708933115005493, + "learning_rate": 8.147859520478134e-05, + "loss": 0.6795, + "step": 45200 + }, + { + "epoch": 0.028598, + "loss_gen": 4.570967197418213, + "loss_rtd": 0.3870660364627838, + "loss_sent": 0.4017711281776428, + "loss_sod": 0.1306699514389038, + "loss_total": 0.9195070862770081, + "step": 45299 + }, + { + "epoch": 0.028598, + "loss_gen": 4.816206932067871, + "loss_rtd": 0.3569832444190979, + "loss_sent": 0.11378684639930725, + "loss_sod": 0.10787219554185867, + "loss_total": 0.5786422491073608, + "step": 45299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.1042001247406006, + "learning_rate": 8.145393395776629e-05, + "loss": 0.66, + "step": 45300 + }, + { + "epoch": 0.028798, + "loss_gen": 4.471880912780762, + "loss_rtd": 0.36207079887390137, + "loss_sent": 0.00018560764146968722, + "loss_sod": 0.32203108072280884, + "loss_total": 0.6842874884605408, + "step": 45399 + }, + { + "epoch": 0.028798, + "loss_gen": 3.6233901977539062, + "loss_rtd": 0.361145943403244, + "loss_sent": 0.006995246745646, + "loss_sod": 0.13985320925712585, + "loss_total": 0.5079944133758545, + "step": 45399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.1605628728866577, + "learning_rate": 8.14292600411104e-05, + "loss": 0.646, + "step": 45400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.472077369689941, + "loss_rtd": 0.3926493525505066, + "loss_sent": 0.24979284405708313, + "loss_sod": 0.018284296616911888, + "loss_total": 0.6607264876365662, + "step": 45499 + }, + { + "epoch": 0.028998, + "loss_gen": 4.657073974609375, + "loss_rtd": 0.37766319513320923, + "loss_sent": 0.13722217082977295, + "loss_sod": 0.05151546746492386, + "loss_total": 0.5664008259773254, + "step": 45499 + }, + { + "epoch": 0.029, + "grad_norm": 1.3693609237670898, + "learning_rate": 8.140457346475232e-05, + "loss": 0.6611, + "step": 45500 + }, + { + "epoch": 0.029198, + "loss_gen": 4.655327796936035, + "loss_rtd": 0.37780165672302246, + "loss_sent": 0.17282724380493164, + "loss_sod": 0.015296641737222672, + "loss_total": 0.5659255385398865, + "step": 45599 + }, + { + "epoch": 0.029198, + "loss_gen": 4.242246627807617, + "loss_rtd": 0.39474278688430786, + "loss_sent": 0.5056430101394653, + "loss_sod": 0.007861332967877388, + "loss_total": 0.9082471132278442, + "step": 45599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.5870112180709839, + "learning_rate": 8.13798742386358e-05, + "loss": 0.6651, + "step": 45600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.367142677307129, + "loss_rtd": 0.38583049178123474, + "loss_sent": 0.14037981629371643, + "loss_sod": 0.041490476578474045, + "loss_total": 0.5677007436752319, + "step": 45699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.504812240600586, + "loss_rtd": 0.3695089519023895, + "loss_sent": 0.19455914199352264, + "loss_sod": 0.0048032961785793304, + "loss_total": 0.5688713788986206, + "step": 45699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.2032526731491089, + "learning_rate": 8.135516237270969e-05, + "loss": 0.6674, + "step": 45700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.352217197418213, + "loss_rtd": 0.3749980926513672, + "loss_sent": 0.2131139487028122, + "loss_sod": 0.18811655044555664, + "loss_total": 0.7762286067008972, + "step": 45799 + }, + { + "epoch": 0.029598, + "loss_gen": 4.451251029968262, + "loss_rtd": 0.3717048466205597, + "loss_sent": 0.18724147975444794, + "loss_sod": 0.07915185391902924, + "loss_total": 0.6380981802940369, + "step": 45799 + }, + { + "epoch": 0.0296, + "grad_norm": 2.019467353820801, + "learning_rate": 8.133043787692794e-05, + "loss": 0.6611, + "step": 45800 + }, + { + "epoch": 0.029798, + "loss_gen": 4.293920993804932, + "loss_rtd": 0.3777517080307007, + "loss_sent": 0.20406286418437958, + "loss_sod": 0.009192945435643196, + "loss_total": 0.5910075306892395, + "step": 45899 + }, + { + "epoch": 0.029798, + "loss_gen": 4.806717872619629, + "loss_rtd": 0.369988352060318, + "loss_sent": 0.07092718034982681, + "loss_sod": 0.06475816667079926, + "loss_total": 0.5056737065315247, + "step": 45899 + }, + { + "epoch": 0.0298, + "grad_norm": 0.9810512661933899, + "learning_rate": 8.130570076124953e-05, + "loss": 0.6566, + "step": 45900 + }, + { + "epoch": 0.029998, + "loss_gen": 4.326590061187744, + "loss_rtd": 0.3545214831829071, + "loss_sent": 0.18595679104328156, + "loss_sod": 0.016033081337809563, + "loss_total": 0.5565113425254822, + "step": 45999 + }, + { + "epoch": 0.029998, + "loss_gen": 4.375558376312256, + "loss_rtd": 0.3825295865535736, + "loss_sent": 0.18366511166095734, + "loss_sod": 0.05017731338739395, + "loss_total": 0.6163719892501831, + "step": 45999 + }, + { + "epoch": 0.03, + "grad_norm": 0.9984399676322937, + "learning_rate": 8.128095103563862e-05, + "loss": 0.6358, + "step": 46000 + }, + { + "epoch": 0.03, + "eval_loss": 0.6287549138069153, + "eval_runtime": 150.7756, + "eval_samples_per_second": 102.424, + "eval_steps_per_second": 0.803, + "step": 46000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.699979305267334, + "loss_rtd": 0.3794102370738983, + "loss_sent": 0.10060257464647293, + "loss_sod": 0.011888833716511726, + "loss_total": 0.4919016361236572, + "step": 46099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.633126258850098, + "loss_rtd": 0.3745104670524597, + "loss_sent": 0.033742573112249374, + "loss_sod": 0.21771006286144257, + "loss_total": 0.6259630918502808, + "step": 46099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.7809391021728516, + "learning_rate": 8.125618871006438e-05, + "loss": 0.6447, + "step": 46100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.639756202697754, + "loss_rtd": 0.4024973511695862, + "loss_sent": 0.11776583641767502, + "loss_sod": 0.1854410469532013, + "loss_total": 0.7057042717933655, + "step": 46199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.52336311340332, + "loss_rtd": 0.3740673065185547, + "loss_sent": 0.2092219889163971, + "loss_sod": 0.11175394058227539, + "loss_total": 0.6950432062149048, + "step": 46199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.1094110012054443, + "learning_rate": 8.123141379450103e-05, + "loss": 0.6424, + "step": 46200 + }, + { + "epoch": 0.000598, + "loss_gen": 3.938033103942871, + "loss_rtd": 0.3785296678543091, + "loss_sent": 0.0002721761120483279, + "loss_sod": 0.5357171297073364, + "loss_total": 0.9145189523696899, + "step": 46299 + }, + { + "epoch": 0.000598, + "loss_gen": 4.006030559539795, + "loss_rtd": 0.37514546513557434, + "loss_sent": 0.10706964135169983, + "loss_sod": 0.14030012488365173, + "loss_total": 0.6225152015686035, + "step": 46299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.5383400917053223, + "learning_rate": 8.120662629892797e-05, + "loss": 0.6485, + "step": 46300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.286434173583984, + "loss_rtd": 0.374002069234848, + "loss_sent": 0.40777915716171265, + "loss_sod": 0.021501773968338966, + "loss_total": 0.8032829761505127, + "step": 46399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.167953968048096, + "loss_rtd": 0.38599392771720886, + "loss_sent": 0.16690266132354736, + "loss_sod": 0.06689120084047318, + "loss_total": 0.6197878122329712, + "step": 46399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.7189841270446777, + "learning_rate": 8.118182623332958e-05, + "loss": 0.6618, + "step": 46400 + }, + { + "epoch": 0.000998, + "loss_gen": 3.8184378147125244, + "loss_rtd": 0.38276156783103943, + "loss_sent": 0.03661860525608063, + "loss_sod": 0.15901032090187073, + "loss_total": 0.5783904790878296, + "step": 46499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.098611831665039, + "loss_rtd": 0.38754209876060486, + "loss_sent": 0.15005142986774445, + "loss_sod": 0.04999028146266937, + "loss_total": 0.5875837802886963, + "step": 46499 + }, + { + "epoch": 0.001, + "grad_norm": 1.010020136833191, + "learning_rate": 8.115701360769527e-05, + "loss": 0.634, + "step": 46500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.693360805511475, + "loss_rtd": 0.3871980905532837, + "loss_sent": 0.08103898912668228, + "loss_sod": 0.07677353918552399, + "loss_total": 0.5450106263160706, + "step": 46599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.113877296447754, + "loss_rtd": 0.3724420368671417, + "loss_sent": 0.3423876464366913, + "loss_sod": 0.06063517928123474, + "loss_total": 0.7754648923873901, + "step": 46599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.7423297166824341, + "learning_rate": 8.113218843201964e-05, + "loss": 0.6648, + "step": 46600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.010592937469482, + "loss_rtd": 0.36755380034446716, + "loss_sent": 0.1772671639919281, + "loss_sod": 0.07207248359918594, + "loss_total": 0.6168934106826782, + "step": 46699 + }, + { + "epoch": 0.001398, + "loss_gen": 4.1527791023254395, + "loss_rtd": 0.3596525490283966, + "loss_sent": 0.012780096381902695, + "loss_sod": 0.08750712871551514, + "loss_total": 0.45993977785110474, + "step": 46699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.2632046937942505, + "learning_rate": 8.110735071630223e-05, + "loss": 0.6344, + "step": 46700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.986955165863037, + "loss_rtd": 0.37553104758262634, + "loss_sent": 0.40427976846694946, + "loss_sod": 0.20503658056259155, + "loss_total": 0.984847366809845, + "step": 46799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.4725422859191895, + "loss_rtd": 0.3738175928592682, + "loss_sent": 0.16060931980609894, + "loss_sod": 0.005055803805589676, + "loss_total": 0.5394827127456665, + "step": 46799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.3313921689987183, + "learning_rate": 8.108250047054763e-05, + "loss": 0.6589, + "step": 46800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.980199813842773, + "loss_rtd": 0.3782990872859955, + "loss_sent": 0.14346514642238617, + "loss_sod": 0.06798546016216278, + "loss_total": 0.5897496938705444, + "step": 46899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.360611438751221, + "loss_rtd": 0.36665692925453186, + "loss_sent": 0.14531777799129486, + "loss_sod": 0.11170309782028198, + "loss_total": 0.6236777901649475, + "step": 46899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.2182822227478027, + "learning_rate": 8.10576377047656e-05, + "loss": 0.6338, + "step": 46900 + }, + { + "epoch": 0.001998, + "loss_gen": 4.34437370300293, + "loss_rtd": 0.3584587872028351, + "loss_sent": 0.15274320542812347, + "loss_sod": 0.09027344733476639, + "loss_total": 0.6014754176139832, + "step": 46999 + }, + { + "epoch": 0.001998, + "loss_gen": 3.8140687942504883, + "loss_rtd": 0.3624805510044098, + "loss_sent": 0.015718601644039154, + "loss_sod": 0.11918334662914276, + "loss_total": 0.4973824918270111, + "step": 46999 + }, + { + "epoch": 0.002, + "grad_norm": 0.7836332321166992, + "learning_rate": 8.103276242897081e-05, + "loss": 0.6446, + "step": 47000 + }, + { + "epoch": 0.002, + "eval_loss": 0.631763756275177, + "eval_runtime": 153.7159, + "eval_samples_per_second": 100.465, + "eval_steps_per_second": 0.787, + "step": 47000 + }, + { + "epoch": 0.002198, + "loss_gen": 4.455953121185303, + "loss_rtd": 0.3843330144882202, + "loss_sent": 0.17107711732387543, + "loss_sod": 0.04746630787849426, + "loss_total": 0.6028764247894287, + "step": 47099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.564654350280762, + "loss_rtd": 0.39598891139030457, + "loss_sent": 0.13187776505947113, + "loss_sod": 0.0783923864364624, + "loss_total": 0.6062590479850769, + "step": 47099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.115729808807373, + "learning_rate": 8.100787465318303e-05, + "loss": 0.656, + "step": 47100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.290502548217773, + "loss_rtd": 0.36489036679267883, + "loss_sent": 0.23262187838554382, + "loss_sod": 0.020622704178094864, + "loss_total": 0.6181349754333496, + "step": 47199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.5484747886657715, + "loss_rtd": 0.3793143033981323, + "loss_sent": 0.05421583727002144, + "loss_sod": 0.04414691403508186, + "loss_total": 0.4776770770549774, + "step": 47199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.9512256979942322, + "learning_rate": 8.098297438742703e-05, + "loss": 0.657, + "step": 47200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.363588809967041, + "loss_rtd": 0.3838651180267334, + "loss_sent": 0.08407268673181534, + "loss_sod": 0.036666594445705414, + "loss_total": 0.5046043992042542, + "step": 47299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.633561611175537, + "loss_rtd": 0.36992567777633667, + "loss_sent": 0.16519032418727875, + "loss_sod": 0.040895238518714905, + "loss_total": 0.5760112404823303, + "step": 47299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.8617016077041626, + "learning_rate": 8.095806164173265e-05, + "loss": 0.6538, + "step": 47300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.396857738494873, + "loss_rtd": 0.38025814294815063, + "loss_sent": 0.24536250531673431, + "loss_sod": 0.07464287430047989, + "loss_total": 0.700263500213623, + "step": 47399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.5637030601501465, + "loss_rtd": 0.3803219795227051, + "loss_sent": 0.12097058445215225, + "loss_sod": 0.10877542942762375, + "loss_total": 0.6100680232048035, + "step": 47399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.07035493850708, + "learning_rate": 8.093313642613476e-05, + "loss": 0.6548, + "step": 47400 + }, + { + "epoch": 0.002998, + "loss_gen": 4.511462211608887, + "loss_rtd": 0.3990772068500519, + "loss_sent": 0.2437988668680191, + "loss_sod": 0.029308389872312546, + "loss_total": 0.6721844673156738, + "step": 47499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.698146343231201, + "loss_rtd": 0.3864766061306, + "loss_sent": 0.21681182086467743, + "loss_sod": 0.15329721570014954, + "loss_total": 0.7565856575965881, + "step": 47499 + }, + { + "epoch": 0.003, + "grad_norm": 0.8426814079284668, + "learning_rate": 8.090819875067322e-05, + "loss": 0.6479, + "step": 47500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.979746341705322, + "loss_rtd": 0.3795184791088104, + "loss_sent": 0.2567595839500427, + "loss_sod": 0.09550534188747406, + "loss_total": 0.731783390045166, + "step": 47599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.728386878967285, + "loss_rtd": 0.3797706663608551, + "loss_sent": 0.10465622693300247, + "loss_sod": 0.16287440061569214, + "loss_total": 0.6473013162612915, + "step": 47599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.1490349769592285, + "learning_rate": 8.088324862539289e-05, + "loss": 0.6489, + "step": 47600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.331116676330566, + "loss_rtd": 0.36512210965156555, + "loss_sent": 0.03072904609143734, + "loss_sod": 0.030262088403105736, + "loss_total": 0.4261132478713989, + "step": 47699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.617870330810547, + "loss_rtd": 0.3699868321418762, + "loss_sent": 0.20203708112239838, + "loss_sod": 0.07554800808429718, + "loss_total": 0.6475719213485718, + "step": 47699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.831415593624115, + "learning_rate": 8.085828606034374e-05, + "loss": 0.6402, + "step": 47700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.519674777984619, + "loss_rtd": 0.3722618520259857, + "loss_sent": 0.2476036697626114, + "loss_sod": 0.07768885791301727, + "loss_total": 0.697554349899292, + "step": 47799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.585041046142578, + "loss_rtd": 0.3761773705482483, + "loss_sent": 0.09144699573516846, + "loss_sod": 0.0625835731625557, + "loss_total": 0.5302079319953918, + "step": 47799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.1098629236221313, + "learning_rate": 8.083331106558063e-05, + "loss": 0.6385, + "step": 47800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.606651782989502, + "loss_rtd": 0.3894237279891968, + "loss_sent": 0.14827781915664673, + "loss_sod": 0.08602949231863022, + "loss_total": 0.6237310171127319, + "step": 47899 + }, + { + "epoch": 0.003798, + "loss_gen": 4.438484191894531, + "loss_rtd": 0.3605833649635315, + "loss_sent": 0.24012890458106995, + "loss_sod": 0.02536821737885475, + "loss_total": 0.6260805130004883, + "step": 47899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.7561204433441162, + "learning_rate": 8.080832365116353e-05, + "loss": 0.6584, + "step": 47900 + }, + { + "epoch": 0.003998, + "loss_gen": 4.2903242111206055, + "loss_rtd": 0.36389920115470886, + "loss_sent": 0.08843392133712769, + "loss_sod": 0.031522657722234726, + "loss_total": 0.4838557839393616, + "step": 47999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.898620128631592, + "loss_rtd": 0.3597998321056366, + "loss_sent": 0.16160465776920319, + "loss_sod": 0.10634209215641022, + "loss_total": 0.62774658203125, + "step": 47999 + }, + { + "epoch": 0.004, + "grad_norm": 0.942060112953186, + "learning_rate": 8.078332382715734e-05, + "loss": 0.6421, + "step": 48000 + }, + { + "epoch": 0.004, + "eval_loss": 0.6217095851898193, + "eval_runtime": 151.3111, + "eval_samples_per_second": 102.061, + "eval_steps_per_second": 0.8, + "step": 48000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.645877361297607, + "loss_rtd": 0.39046603441238403, + "loss_sent": 0.23971006274223328, + "loss_sod": 0.0026833200827240944, + "loss_total": 0.6328594088554382, + "step": 48099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.066583633422852, + "loss_rtd": 0.364145427942276, + "loss_sent": 0.3097667396068573, + "loss_sod": 0.05506974458694458, + "loss_total": 0.7289819121360779, + "step": 48099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.9875011444091797, + "learning_rate": 8.075831160363199e-05, + "loss": 0.647, + "step": 48100 + }, + { + "epoch": 0.004398, + "loss_gen": 4.433474063873291, + "loss_rtd": 0.3740593492984772, + "loss_sent": 0.2557953894138336, + "loss_sod": 0.040519848465919495, + "loss_total": 0.6703746318817139, + "step": 48199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.800586700439453, + "loss_rtd": 0.37522590160369873, + "loss_sent": 0.21444040536880493, + "loss_sod": 0.023426318541169167, + "loss_total": 0.6130926609039307, + "step": 48199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.2088860273361206, + "learning_rate": 8.073328699066241e-05, + "loss": 0.6474, + "step": 48200 + }, + { + "epoch": 0.004598, + "loss_gen": 4.785979747772217, + "loss_rtd": 0.38683080673217773, + "loss_sent": 0.27298280596733093, + "loss_sod": 0.11252139508724213, + "loss_total": 0.7723350524902344, + "step": 48299 + }, + { + "epoch": 0.004598, + "loss_gen": 4.735990524291992, + "loss_rtd": 0.38740187883377075, + "loss_sent": 0.33720114827156067, + "loss_sod": 0.06438102573156357, + "loss_total": 0.7889840602874756, + "step": 48299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.7969639301300049, + "learning_rate": 8.07082499983285e-05, + "loss": 0.6591, + "step": 48300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.53634786605835, + "loss_rtd": 0.3495190739631653, + "loss_sent": 0.26451629400253296, + "loss_sod": 0.1641450822353363, + "loss_total": 0.7781804203987122, + "step": 48399 + }, + { + "epoch": 0.004798, + "loss_gen": 4.199224948883057, + "loss_rtd": 0.38749638199806213, + "loss_sent": 0.24783599376678467, + "loss_sod": 0.08294946700334549, + "loss_total": 0.7182818651199341, + "step": 48399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.0061957836151123, + "learning_rate": 8.068320063671517e-05, + "loss": 0.6358, + "step": 48400 + }, + { + "epoch": 0.004998, + "loss_gen": 4.236148834228516, + "loss_rtd": 0.384494423866272, + "loss_sent": 0.24837873876094818, + "loss_sod": 0.011858688667416573, + "loss_total": 0.644731879234314, + "step": 48499 + }, + { + "epoch": 0.004998, + "loss_gen": 4.68426513671875, + "loss_rtd": 0.3773494362831116, + "loss_sent": 0.05412619560956955, + "loss_sod": 0.009332123212516308, + "loss_total": 0.4408077597618103, + "step": 48499 + }, + { + "epoch": 0.005, + "grad_norm": 0.7440939545631409, + "learning_rate": 8.065813891591229e-05, + "loss": 0.6548, + "step": 48500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.069899559020996, + "loss_rtd": 0.3818332850933075, + "loss_sent": 0.2279142588376999, + "loss_sod": 0.05160973593592644, + "loss_total": 0.6613572835922241, + "step": 48599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.452792167663574, + "loss_rtd": 0.3705146610736847, + "loss_sent": 0.25298821926116943, + "loss_sod": 0.10445769131183624, + "loss_total": 0.7279605865478516, + "step": 48599 + }, + { + "epoch": 0.0052, + "grad_norm": 2.2869088649749756, + "learning_rate": 8.063306484601472e-05, + "loss": 0.6486, + "step": 48600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.08968448638916, + "loss_rtd": 0.38225290179252625, + "loss_sent": 0.07657121121883392, + "loss_sod": 0.21210332214832306, + "loss_total": 0.6709274649620056, + "step": 48699 + }, + { + "epoch": 0.005398, + "loss_gen": 3.7614517211914062, + "loss_rtd": 0.36560240387916565, + "loss_sent": 6.131923146313056e-05, + "loss_sod": 0.2490212619304657, + "loss_total": 0.6146849989891052, + "step": 48699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.0264867544174194, + "learning_rate": 8.06079784371223e-05, + "loss": 0.6448, + "step": 48700 + }, + { + "epoch": 0.005598, + "loss_gen": 4.340548038482666, + "loss_rtd": 0.39176028966903687, + "loss_sent": 0.17097064852714539, + "loss_sod": 0.01702743023633957, + "loss_total": 0.5797584056854248, + "step": 48799 + }, + { + "epoch": 0.005598, + "loss_gen": 4.355989933013916, + "loss_rtd": 0.384242445230484, + "loss_sent": 0.21529555320739746, + "loss_sod": 0.03836958855390549, + "loss_total": 0.6379076242446899, + "step": 48799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.3593602180480957, + "learning_rate": 8.05828796993398e-05, + "loss": 0.644, + "step": 48800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.421856880187988, + "loss_rtd": 0.37311193346977234, + "loss_sent": 0.46581143140792847, + "loss_sod": 0.016459614038467407, + "loss_total": 0.8553829789161682, + "step": 48899 + }, + { + "epoch": 0.005798, + "loss_gen": 4.63135290145874, + "loss_rtd": 0.3631671667098999, + "loss_sent": 0.17805886268615723, + "loss_sod": 0.02936861291527748, + "loss_total": 0.5705946683883667, + "step": 48899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.2808173894882202, + "learning_rate": 8.055776864277704e-05, + "loss": 0.644, + "step": 48900 + }, + { + "epoch": 0.005998, + "loss_gen": 4.752480506896973, + "loss_rtd": 0.3505268692970276, + "loss_sent": 0.11548473685979843, + "loss_sod": 0.0524880513548851, + "loss_total": 0.5184996724128723, + "step": 48999 + }, + { + "epoch": 0.005998, + "loss_gen": 4.753820896148682, + "loss_rtd": 0.3656466007232666, + "loss_sent": 0.16045430302619934, + "loss_sod": 0.01331318262964487, + "loss_total": 0.5394140481948853, + "step": 48999 + }, + { + "epoch": 0.006, + "grad_norm": 1.23256516456604, + "learning_rate": 8.053264527754871e-05, + "loss": 0.6408, + "step": 49000 + }, + { + "epoch": 0.006, + "eval_loss": 0.625141441822052, + "eval_runtime": 152.3065, + "eval_samples_per_second": 101.394, + "eval_steps_per_second": 0.794, + "step": 49000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.363494873046875, + "loss_rtd": 0.36784136295318604, + "loss_sent": 0.3462200164794922, + "loss_sod": 0.017456237226724625, + "loss_total": 0.7315176129341125, + "step": 49099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.641960620880127, + "loss_rtd": 0.3982604444026947, + "loss_sent": 0.400336354970932, + "loss_sod": 0.012596643529832363, + "loss_total": 0.8111934661865234, + "step": 49099 + }, + { + "epoch": 0.0062, + "grad_norm": 2.1316230297088623, + "learning_rate": 8.050750961377454e-05, + "loss": 0.6428, + "step": 49100 + }, + { + "epoch": 0.006398, + "loss_gen": 3.52602219581604, + "loss_rtd": 0.3481273949146271, + "loss_sent": 0.03384261578321457, + "loss_sod": 0.1904320865869522, + "loss_total": 0.5724021196365356, + "step": 49199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.659766674041748, + "loss_rtd": 0.3823029696941376, + "loss_sent": 0.3093835413455963, + "loss_sod": 0.13419994711875916, + "loss_total": 0.8258864879608154, + "step": 49199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.3852447271347046, + "learning_rate": 8.048236166157912e-05, + "loss": 0.6398, + "step": 49200 + }, + { + "epoch": 0.006598, + "loss_gen": 3.8257486820220947, + "loss_rtd": 0.3698168098926544, + "loss_sent": 0.042054060846567154, + "loss_sod": 0.16035039722919464, + "loss_total": 0.5722212791442871, + "step": 49299 + }, + { + "epoch": 0.006598, + "loss_gen": 4.6005539894104, + "loss_rtd": 0.3794531226158142, + "loss_sent": 0.2771409749984741, + "loss_sod": 0.052813343703746796, + "loss_total": 0.7094074487686157, + "step": 49299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.4007338285446167, + "learning_rate": 8.045720143109208e-05, + "loss": 0.6283, + "step": 49300 + }, + { + "epoch": 0.006798, + "loss_gen": 4.497256278991699, + "loss_rtd": 0.3654901087284088, + "loss_sent": 0.270648330450058, + "loss_sod": 0.2049960047006607, + "loss_total": 0.8411344289779663, + "step": 49399 + }, + { + "epoch": 0.006798, + "loss_gen": 4.614016532897949, + "loss_rtd": 0.3822830319404602, + "loss_sent": 0.19587868452072144, + "loss_sod": 0.032051846385002136, + "loss_total": 0.610213577747345, + "step": 49399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.296242594718933, + "learning_rate": 8.043202893244793e-05, + "loss": 0.6611, + "step": 49400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.415952682495117, + "loss_rtd": 0.3904218375682831, + "loss_sent": 0.1924910992383957, + "loss_sod": 0.0072877234779298306, + "loss_total": 0.590200662612915, + "step": 49499 + }, + { + "epoch": 0.006998, + "loss_gen": 4.368340969085693, + "loss_rtd": 0.3858594000339508, + "loss_sent": 0.47357892990112305, + "loss_sod": 0.024555889889597893, + "loss_total": 0.8839942216873169, + "step": 49499 + }, + { + "epoch": 0.007, + "grad_norm": 1.4348816871643066, + "learning_rate": 8.040684417578617e-05, + "loss": 0.6278, + "step": 49500 + }, + { + "epoch": 0.007198, + "loss_gen": 3.6858749389648438, + "loss_rtd": 0.37742456793785095, + "loss_sent": 4.36127302236855e-05, + "loss_sod": 0.22768960893154144, + "loss_total": 0.6051577925682068, + "step": 49599 + }, + { + "epoch": 0.007198, + "loss_gen": 3.604168653488159, + "loss_rtd": 0.3806535005569458, + "loss_sent": 6.0545422456925735e-05, + "loss_sod": 0.1864035576581955, + "loss_total": 0.5671176314353943, + "step": 49599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.1366004943847656, + "learning_rate": 8.038164717125123e-05, + "loss": 0.6474, + "step": 49600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.598033905029297, + "loss_rtd": 0.3724918067455292, + "loss_sent": 0.15030938386917114, + "loss_sod": 0.06815709173679352, + "loss_total": 0.590958297252655, + "step": 49699 + }, + { + "epoch": 0.007398, + "loss_gen": 4.401187896728516, + "loss_rtd": 0.3982204496860504, + "loss_sent": 0.2785221338272095, + "loss_sod": 0.030482318252325058, + "loss_total": 0.7072249054908752, + "step": 49699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.4378366470336914, + "learning_rate": 8.035643792899243e-05, + "loss": 0.6496, + "step": 49700 + }, + { + "epoch": 0.007598, + "loss_gen": 4.564473628997803, + "loss_rtd": 0.3646455407142639, + "loss_sent": 0.40471068024635315, + "loss_sod": 0.06631335616111755, + "loss_total": 0.8356695771217346, + "step": 49799 + }, + { + "epoch": 0.007598, + "loss_gen": 4.528884410858154, + "loss_rtd": 0.3850195109844208, + "loss_sent": 0.250173956155777, + "loss_sod": 0.07861298322677612, + "loss_total": 0.7138064503669739, + "step": 49799 + }, + { + "epoch": 0.0076, + "grad_norm": 2.3664045333862305, + "learning_rate": 8.033121645916407e-05, + "loss": 0.6501, + "step": 49800 + }, + { + "epoch": 0.007798, + "loss_gen": 4.976553440093994, + "loss_rtd": 0.38571128249168396, + "loss_sent": 0.4492168426513672, + "loss_sod": 0.05952766165137291, + "loss_total": 0.8944557905197144, + "step": 49899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.6225996017456055, + "loss_rtd": 0.3705814778804779, + "loss_sent": 0.19304804503917694, + "loss_sod": 0.059937480837106705, + "loss_total": 0.6235669851303101, + "step": 49899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.488413691520691, + "learning_rate": 8.030598277192533e-05, + "loss": 0.6436, + "step": 49900 + }, + { + "epoch": 0.007998, + "loss_gen": 4.49429178237915, + "loss_rtd": 0.38548919558525085, + "loss_sent": 0.34847933053970337, + "loss_sod": 0.031637679785490036, + "loss_total": 0.765606164932251, + "step": 49999 + }, + { + "epoch": 0.007998, + "loss_gen": 4.457889556884766, + "loss_rtd": 0.37796905636787415, + "loss_sent": 0.15737062692642212, + "loss_sod": 0.05866087228059769, + "loss_total": 0.5940005779266357, + "step": 49999 + }, + { + "epoch": 0.008, + "grad_norm": 1.426696538925171, + "learning_rate": 8.028073687744037e-05, + "loss": 0.6535, + "step": 50000 + }, + { + "epoch": 0.008, + "eval_loss": 0.6143267154693604, + "eval_runtime": 151.025, + "eval_samples_per_second": 102.255, + "eval_steps_per_second": 0.801, + "step": 50000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.058149814605713, + "loss_rtd": 0.3683951795101166, + "loss_sent": 0.30114972591400146, + "loss_sod": 0.03265371918678284, + "loss_total": 0.7021986246109009, + "step": 50099 + }, + { + "epoch": 0.008198, + "loss_gen": 3.9967827796936035, + "loss_rtd": 0.375331312417984, + "loss_sent": 0.007260024547576904, + "loss_sod": 0.33007389307022095, + "loss_total": 0.7126652598381042, + "step": 50099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.1502575874328613, + "learning_rate": 8.025547878587822e-05, + "loss": 0.6521, + "step": 50100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.803343772888184, + "loss_rtd": 0.37026453018188477, + "loss_sent": 1.1277397871017456, + "loss_sod": 0.06365317106246948, + "loss_total": 1.561657428741455, + "step": 50199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.387077808380127, + "loss_rtd": 0.3710021674633026, + "loss_sent": 0.3957918584346771, + "loss_sod": 0.05120411515235901, + "loss_total": 0.8179981708526611, + "step": 50199 + }, + { + "epoch": 0.0084, + "grad_norm": 4.342261791229248, + "learning_rate": 8.023020850741283e-05, + "loss": 0.6595, + "step": 50200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.672738552093506, + "loss_rtd": 0.3915119171142578, + "loss_sent": 0.21460790932178497, + "loss_sod": 0.18659816682338715, + "loss_total": 0.7927179932594299, + "step": 50299 + }, + { + "epoch": 0.008598, + "loss_gen": 3.972071886062622, + "loss_rtd": 0.3741694390773773, + "loss_sent": 0.0057837218046188354, + "loss_sod": 0.14437425136566162, + "loss_total": 0.5243274569511414, + "step": 50299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.3088226318359375, + "learning_rate": 8.020492605222307e-05, + "loss": 0.6495, + "step": 50300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.626949787139893, + "loss_rtd": 0.3816995620727539, + "loss_sent": 0.16593150794506073, + "loss_sod": 0.075348399579525, + "loss_total": 0.622979462146759, + "step": 50399 + }, + { + "epoch": 0.008798, + "loss_gen": 4.743112087249756, + "loss_rtd": 0.36963286995887756, + "loss_sent": 0.3240692913532257, + "loss_sod": 0.026848290115594864, + "loss_total": 0.7205504179000854, + "step": 50399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.5165996551513672, + "learning_rate": 8.01796314304927e-05, + "loss": 0.6421, + "step": 50400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.586883544921875, + "loss_rtd": 0.3631702661514282, + "loss_sent": 0.3149803578853607, + "loss_sod": 0.027500925585627556, + "loss_total": 0.7056515216827393, + "step": 50499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.823122501373291, + "loss_rtd": 0.3803825378417969, + "loss_sent": 0.15825589001178741, + "loss_sod": 0.09417456388473511, + "loss_total": 0.6328129768371582, + "step": 50499 + }, + { + "epoch": 0.009, + "grad_norm": 2.1964752674102783, + "learning_rate": 8.015432465241039e-05, + "loss": 0.6484, + "step": 50500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.64653205871582, + "loss_rtd": 0.3533076047897339, + "loss_sent": 0.27814698219299316, + "loss_sod": 0.06675729155540466, + "loss_total": 0.6982119083404541, + "step": 50599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.527213096618652, + "loss_rtd": 0.35981041193008423, + "loss_sent": 0.10821152478456497, + "loss_sod": 0.14620551466941833, + "loss_total": 0.6142274141311646, + "step": 50599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.4024051427841187, + "learning_rate": 8.012900572816973e-05, + "loss": 0.6389, + "step": 50600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.538909435272217, + "loss_rtd": 0.3922789394855499, + "loss_sent": 0.1413494348526001, + "loss_sod": 0.025570761412382126, + "loss_total": 0.5591990947723389, + "step": 50699 + }, + { + "epoch": 0.009398, + "loss_gen": 4.608863830566406, + "loss_rtd": 0.3743917644023895, + "loss_sent": 0.1731317639350891, + "loss_sod": 0.029530156403779984, + "loss_total": 0.5770536661148071, + "step": 50699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.3676615953445435, + "learning_rate": 8.010367466796917e-05, + "loss": 0.6389, + "step": 50700 + }, + { + "epoch": 0.009598, + "loss_gen": 3.994856834411621, + "loss_rtd": 0.3614933490753174, + "loss_sent": 0.005994033999741077, + "loss_sod": 0.2948499321937561, + "loss_total": 0.6623373031616211, + "step": 50799 + }, + { + "epoch": 0.009598, + "loss_gen": 4.143596649169922, + "loss_rtd": 0.36337488889694214, + "loss_sent": 0.004016831982880831, + "loss_sod": 0.23833590745925903, + "loss_total": 0.6057276129722595, + "step": 50799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.218487024307251, + "learning_rate": 8.007833148201205e-05, + "loss": 0.656, + "step": 50800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.584593772888184, + "loss_rtd": 0.3748578727245331, + "loss_sent": 0.11570402979850769, + "loss_sod": 0.010673943907022476, + "loss_total": 0.501235842704773, + "step": 50899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.303126335144043, + "loss_rtd": 0.38027480244636536, + "loss_sent": 0.27084439992904663, + "loss_sod": 0.04908788576722145, + "loss_total": 0.7002071142196655, + "step": 50899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.1877902746200562, + "learning_rate": 8.00529761805066e-05, + "loss": 0.6305, + "step": 50900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.418270587921143, + "loss_rtd": 0.3844428062438965, + "loss_sent": 0.1653502732515335, + "loss_sod": 0.14827539026737213, + "loss_total": 0.6980684399604797, + "step": 50999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.313439846038818, + "loss_rtd": 0.3674570620059967, + "loss_sent": 0.015818167477846146, + "loss_sod": 0.13971030712127686, + "loss_total": 0.5229855179786682, + "step": 50999 + }, + { + "epoch": 0.01, + "grad_norm": 0.9348272085189819, + "learning_rate": 8.002760877366594e-05, + "loss": 0.6529, + "step": 51000 + }, + { + "epoch": 0.01, + "eval_loss": 0.6201492547988892, + "eval_runtime": 150.9105, + "eval_samples_per_second": 102.332, + "eval_steps_per_second": 0.802, + "step": 51000 + }, + { + "epoch": 0.010198, + "loss_gen": 4.538188934326172, + "loss_rtd": 0.38228562474250793, + "loss_sent": 0.21149754524230957, + "loss_sod": 0.057641901075839996, + "loss_total": 0.6514250636100769, + "step": 51099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.536489963531494, + "loss_rtd": 0.3845718801021576, + "loss_sent": 0.13618037104606628, + "loss_sod": 0.01992986351251602, + "loss_total": 0.5406820774078369, + "step": 51099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.323643684387207, + "learning_rate": 8.000222927170806e-05, + "loss": 0.6422, + "step": 51100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.168093204498291, + "loss_rtd": 0.3785075545310974, + "loss_sent": 0.10471253842115402, + "loss_sod": 0.003981872461736202, + "loss_total": 0.4872019588947296, + "step": 51199 + }, + { + "epoch": 0.010398, + "loss_gen": 4.457294464111328, + "loss_rtd": 0.3768188953399658, + "loss_sent": 0.1765183061361313, + "loss_sod": 0.11947241425514221, + "loss_total": 0.6728096008300781, + "step": 51199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.7407876253128052, + "learning_rate": 7.997683768485582e-05, + "loss": 0.6439, + "step": 51200 + }, + { + "epoch": 0.010598, + "loss_gen": 4.623007774353027, + "loss_rtd": 0.36841443181037903, + "loss_sent": 0.3081963062286377, + "loss_sod": 0.06185394525527954, + "loss_total": 0.7384647130966187, + "step": 51299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.464491844177246, + "loss_rtd": 0.3853316009044647, + "loss_sent": 0.20332349836826324, + "loss_sod": 0.07168055325746536, + "loss_total": 0.6603356599807739, + "step": 51299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9847090840339661, + "learning_rate": 7.995143402333693e-05, + "loss": 0.6452, + "step": 51300 + }, + { + "epoch": 0.010798, + "loss_gen": 4.834001064300537, + "loss_rtd": 0.37550100684165955, + "loss_sent": 0.1689801961183548, + "loss_sod": 0.017282772809267044, + "loss_total": 0.5617640018463135, + "step": 51399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.595366477966309, + "loss_rtd": 0.36696791648864746, + "loss_sent": 0.08832325786352158, + "loss_sod": 0.02172490954399109, + "loss_total": 0.4770160913467407, + "step": 51399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.0254968404769897, + "learning_rate": 7.992601829738401e-05, + "loss": 0.6249, + "step": 51400 + }, + { + "epoch": 0.010998, + "loss_gen": 4.401004314422607, + "loss_rtd": 0.37882962822914124, + "loss_sent": 0.18529866635799408, + "loss_sod": 0.15116427838802338, + "loss_total": 0.7152925729751587, + "step": 51499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.549426078796387, + "loss_rtd": 0.3743341565132141, + "loss_sent": 0.3842788636684418, + "loss_sod": 0.043490782380104065, + "loss_total": 0.8021037578582764, + "step": 51499 + }, + { + "epoch": 0.011, + "grad_norm": 2.514681816101074, + "learning_rate": 7.99005905172345e-05, + "loss": 0.6283, + "step": 51500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.237088680267334, + "loss_rtd": 0.36371999979019165, + "loss_sent": 0.20528589189052582, + "loss_sod": 0.025060106068849564, + "loss_total": 0.5940660238265991, + "step": 51599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.785157203674316, + "loss_rtd": 0.35488858819007874, + "loss_sent": 0.22521014511585236, + "loss_sod": 0.08783333748579025, + "loss_total": 0.6679320931434631, + "step": 51599 + }, + { + "epoch": 0.0112, + "grad_norm": 2.1390838623046875, + "learning_rate": 7.98751506931307e-05, + "loss": 0.6395, + "step": 51600 + }, + { + "epoch": 0.011398, + "loss_gen": 3.7305500507354736, + "loss_rtd": 0.36545830965042114, + "loss_sent": 0.09047655761241913, + "loss_sod": 0.060882482677698135, + "loss_total": 0.5168173313140869, + "step": 51699 + }, + { + "epoch": 0.011398, + "loss_gen": 3.937504768371582, + "loss_rtd": 0.3714247941970825, + "loss_sent": 0.03422936052083969, + "loss_sod": 0.1963171362876892, + "loss_total": 0.6019712686538696, + "step": 51699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.8457944393157959, + "learning_rate": 7.984969883531977e-05, + "loss": 0.6354, + "step": 51700 + }, + { + "epoch": 0.011598, + "loss_gen": 4.4090070724487305, + "loss_rtd": 0.3685651123523712, + "loss_sent": 0.2501409649848938, + "loss_sod": 0.10854386538267136, + "loss_total": 0.7272499799728394, + "step": 51799 + }, + { + "epoch": 0.011598, + "loss_gen": 4.1526689529418945, + "loss_rtd": 0.3585469722747803, + "loss_sent": 0.05364396050572395, + "loss_sod": 0.046622730791568756, + "loss_total": 0.4588136672973633, + "step": 51799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.0936580896377563, + "learning_rate": 7.982423495405373e-05, + "loss": 0.6519, + "step": 51800 + }, + { + "epoch": 0.011798, + "loss_gen": 4.608326435089111, + "loss_rtd": 0.36813756823539734, + "loss_sent": 0.13579003512859344, + "loss_sod": 0.029388481751084328, + "loss_total": 0.5333160758018494, + "step": 51899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.717540264129639, + "loss_rtd": 0.3761679530143738, + "loss_sent": 0.14211294054985046, + "loss_sod": 0.05682980641722679, + "loss_total": 0.575110673904419, + "step": 51899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.7849305868148804, + "learning_rate": 7.979875905958942e-05, + "loss": 0.6327, + "step": 51900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.289188385009766, + "loss_rtd": 0.3613738715648651, + "loss_sent": 0.03220400959253311, + "loss_sod": 0.12358319014310837, + "loss_total": 0.5171610713005066, + "step": 51999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.805292129516602, + "loss_rtd": 0.37166833877563477, + "loss_sent": 0.46547287702560425, + "loss_sod": 0.08356957137584686, + "loss_total": 0.9207108020782471, + "step": 51999 + }, + { + "epoch": 0.012, + "grad_norm": 1.9747730493545532, + "learning_rate": 7.977327116218851e-05, + "loss": 0.638, + "step": 52000 + }, + { + "epoch": 0.012, + "eval_loss": 0.617628276348114, + "eval_runtime": 151.2384, + "eval_samples_per_second": 102.11, + "eval_steps_per_second": 0.8, + "step": 52000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.451638221740723, + "loss_rtd": 0.3651028871536255, + "loss_sent": 0.4473474323749542, + "loss_sod": 0.07271867990493774, + "loss_total": 0.8851690292358398, + "step": 52099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.706387996673584, + "loss_rtd": 0.3877633512020111, + "loss_sent": 0.375333696603775, + "loss_sod": 0.03720249608159065, + "loss_total": 0.8002995252609253, + "step": 52099 + }, + { + "epoch": 0.0122, + "grad_norm": 2.543621301651001, + "learning_rate": 7.974777127211755e-05, + "loss": 0.6345, + "step": 52100 + }, + { + "epoch": 0.012398, + "loss_gen": 4.720627784729004, + "loss_rtd": 0.37399521470069885, + "loss_sent": 0.37357261776924133, + "loss_sod": 0.06452462822198868, + "loss_total": 0.8120924234390259, + "step": 52199 + }, + { + "epoch": 0.012398, + "loss_gen": 4.308831691741943, + "loss_rtd": 0.37242117524147034, + "loss_sent": 0.09280207008123398, + "loss_sod": 0.16130012273788452, + "loss_total": 0.6265233755111694, + "step": 52199 + }, + { + "epoch": 0.0124, + "grad_norm": 2.0667686462402344, + "learning_rate": 7.972225939964786e-05, + "loss": 0.6393, + "step": 52200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.100876808166504, + "loss_rtd": 0.39659249782562256, + "loss_sent": 0.15490469336509705, + "loss_sod": 0.011322874575853348, + "loss_total": 0.5628200769424438, + "step": 52299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.57742166519165, + "loss_rtd": 0.38593554496765137, + "loss_sent": 0.3413234353065491, + "loss_sod": 0.039375368505716324, + "loss_total": 0.7666343450546265, + "step": 52299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.801899790763855, + "learning_rate": 7.969673555505566e-05, + "loss": 0.6517, + "step": 52300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.279021263122559, + "loss_rtd": 0.3841288685798645, + "loss_sent": 0.08024395257234573, + "loss_sod": 0.001899349270388484, + "loss_total": 0.46627217531204224, + "step": 52399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.565447807312012, + "loss_rtd": 0.3720090687274933, + "loss_sent": 0.06411219388246536, + "loss_sod": 0.05488793924450874, + "loss_total": 0.4910092055797577, + "step": 52399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.7367493510246277, + "learning_rate": 7.967119974862192e-05, + "loss": 0.6567, + "step": 52400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.1065802574157715, + "loss_rtd": 0.3532801568508148, + "loss_sent": 0.23175480961799622, + "loss_sod": 0.10666516423225403, + "loss_total": 0.6917001008987427, + "step": 52499 + }, + { + "epoch": 0.012998, + "loss_gen": 4.823935508728027, + "loss_rtd": 0.37401440739631653, + "loss_sent": 0.1524634212255478, + "loss_sod": 0.04514380916953087, + "loss_total": 0.5716216564178467, + "step": 52499 + }, + { + "epoch": 0.013, + "grad_norm": 1.4354084730148315, + "learning_rate": 7.964565199063246e-05, + "loss": 0.6451, + "step": 52500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.656192302703857, + "loss_rtd": 0.37465882301330566, + "loss_sent": 0.4301958978176117, + "loss_sod": 0.02238663285970688, + "loss_total": 0.8272413611412048, + "step": 52599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.415437698364258, + "loss_rtd": 0.3755865693092346, + "loss_sent": 0.17707963287830353, + "loss_sod": 0.10570264607667923, + "loss_total": 0.6583688259124756, + "step": 52599 + }, + { + "epoch": 0.0132, + "grad_norm": 2.0551586151123047, + "learning_rate": 7.962009229137794e-05, + "loss": 0.6374, + "step": 52600 + }, + { + "epoch": 0.013398, + "loss_gen": 4.528020858764648, + "loss_rtd": 0.36643391847610474, + "loss_sent": 0.2107040286064148, + "loss_sod": 0.08042527735233307, + "loss_total": 0.6575632095336914, + "step": 52699 + }, + { + "epoch": 0.013398, + "loss_gen": 4.34305477142334, + "loss_rtd": 0.3671649396419525, + "loss_sent": 0.11882704496383667, + "loss_sod": 0.03933439403772354, + "loss_total": 0.5253263711929321, + "step": 52699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.9808692336082458, + "learning_rate": 7.959452066115378e-05, + "loss": 0.6453, + "step": 52700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.5166168212890625, + "loss_rtd": 0.3739977777004242, + "loss_sent": 0.05713487043976784, + "loss_sod": 0.024063939228653908, + "loss_total": 0.4551965892314911, + "step": 52799 + }, + { + "epoch": 0.013598, + "loss_gen": 4.536118030548096, + "loss_rtd": 0.3654595911502838, + "loss_sent": 0.12023632973432541, + "loss_sod": 0.027032244950532913, + "loss_total": 0.5127281546592712, + "step": 52799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.7415221333503723, + "learning_rate": 7.956893711026023e-05, + "loss": 0.6476, + "step": 52800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.027712821960449, + "loss_rtd": 0.3675023317337036, + "loss_sent": 0.14814315736293793, + "loss_sod": 0.0724860429763794, + "loss_total": 0.5881315469741821, + "step": 52899 + }, + { + "epoch": 0.013798, + "loss_gen": 4.500967979431152, + "loss_rtd": 0.3762381672859192, + "loss_sent": 0.3494221866130829, + "loss_sod": 0.03951923921704292, + "loss_total": 0.7651796340942383, + "step": 52899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.3134151697158813, + "learning_rate": 7.954334164900235e-05, + "loss": 0.647, + "step": 52900 + }, + { + "epoch": 0.013998, + "loss_gen": 4.35737943649292, + "loss_rtd": 0.36826416850090027, + "loss_sent": 0.05364976450800896, + "loss_sod": 0.006631971336901188, + "loss_total": 0.42854589223861694, + "step": 52999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.280889987945557, + "loss_rtd": 0.37097907066345215, + "loss_sent": 0.06889402866363525, + "loss_sod": 0.1725533902645111, + "loss_total": 0.6124265193939209, + "step": 52999 + }, + { + "epoch": 0.014, + "grad_norm": 0.7555345892906189, + "learning_rate": 7.951773428769001e-05, + "loss": 0.6255, + "step": 53000 + }, + { + "epoch": 0.014, + "eval_loss": 0.62068110704422, + "eval_runtime": 150.9954, + "eval_samples_per_second": 102.275, + "eval_steps_per_second": 0.801, + "step": 53000 + }, + { + "epoch": 0.014198, + "loss_gen": 4.6845703125, + "loss_rtd": 0.3711201548576355, + "loss_sent": 0.24300257861614227, + "loss_sod": 0.1730903834104538, + "loss_total": 0.787213146686554, + "step": 53099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.5488152503967285, + "loss_rtd": 0.36780834197998047, + "loss_sent": 0.07398200035095215, + "loss_sod": 0.06251496821641922, + "loss_total": 0.5043053030967712, + "step": 53099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.8355894088745117, + "learning_rate": 7.94921150366378e-05, + "loss": 0.647, + "step": 53100 + }, + { + "epoch": 0.014398, + "loss_gen": 3.4567711353302, + "loss_rtd": 0.3385269343852997, + "loss_sent": 0.0021842813584953547, + "loss_sod": 0.16379128396511078, + "loss_total": 0.5045024752616882, + "step": 53199 + }, + { + "epoch": 0.014398, + "loss_gen": 4.636423587799072, + "loss_rtd": 0.3755619525909424, + "loss_sent": 0.08017344772815704, + "loss_sod": 0.09557287395000458, + "loss_total": 0.551308274269104, + "step": 53199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.9463667273521423, + "learning_rate": 7.94664839061652e-05, + "loss": 0.6457, + "step": 53200 + }, + { + "epoch": 0.014598, + "loss_gen": 4.0381269454956055, + "loss_rtd": 0.360150545835495, + "loss_sent": 0.050466492772102356, + "loss_sod": 0.0955638661980629, + "loss_total": 0.5061808824539185, + "step": 53299 + }, + { + "epoch": 0.014598, + "loss_gen": 4.728142261505127, + "loss_rtd": 0.3685055673122406, + "loss_sent": 0.19894526898860931, + "loss_sod": 0.02157244086265564, + "loss_total": 0.5890232920646667, + "step": 53299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.9717921018600464, + "learning_rate": 7.944084090659637e-05, + "loss": 0.6279, + "step": 53300 + }, + { + "epoch": 0.014798, + "loss_gen": 4.582457542419434, + "loss_rtd": 0.38222405314445496, + "loss_sent": 0.5896296501159668, + "loss_sod": 0.04997697472572327, + "loss_total": 1.021830677986145, + "step": 53399 + }, + { + "epoch": 0.014798, + "loss_gen": 4.844758033752441, + "loss_rtd": 0.3863065540790558, + "loss_sent": 0.3215869665145874, + "loss_sod": 0.02089056558907032, + "loss_total": 0.7287840843200684, + "step": 53399 + }, + { + "epoch": 0.0148, + "grad_norm": 2.3949897289276123, + "learning_rate": 7.941518604826039e-05, + "loss": 0.6434, + "step": 53400 + }, + { + "epoch": 0.014998, + "loss_gen": 4.715126991271973, + "loss_rtd": 0.3721052408218384, + "loss_sent": 0.1290234476327896, + "loss_sod": 0.05557714402675629, + "loss_total": 0.5567058324813843, + "step": 53499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.461250305175781, + "loss_rtd": 0.3816608488559723, + "loss_sent": 0.22881415486335754, + "loss_sod": 0.04375752806663513, + "loss_total": 0.6542325019836426, + "step": 53499 + }, + { + "epoch": 0.015, + "grad_norm": 0.9130325317382812, + "learning_rate": 7.938951934149096e-05, + "loss": 0.6398, + "step": 53500 + }, + { + "epoch": 0.015198, + "loss_gen": 4.400059223175049, + "loss_rtd": 0.3567712903022766, + "loss_sent": 0.19402320683002472, + "loss_sod": 0.011536471545696259, + "loss_total": 0.562330961227417, + "step": 53599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.636370658874512, + "loss_rtd": 0.36850762367248535, + "loss_sent": 0.1500333547592163, + "loss_sod": 0.04156222939491272, + "loss_total": 0.560103178024292, + "step": 53599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.8094152212142944, + "learning_rate": 7.936384079662666e-05, + "loss": 0.6531, + "step": 53600 + }, + { + "epoch": 0.015398, + "loss_gen": 4.712112903594971, + "loss_rtd": 0.3494918644428253, + "loss_sent": 0.40711623430252075, + "loss_sod": 0.022313233464956284, + "loss_total": 0.778921365737915, + "step": 53699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.587061405181885, + "loss_rtd": 0.3805294930934906, + "loss_sent": 0.1808282434940338, + "loss_sod": 0.06312863528728485, + "loss_total": 0.6244863271713257, + "step": 53699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.9854497313499451, + "learning_rate": 7.93381504240108e-05, + "loss": 0.633, + "step": 53700 + }, + { + "epoch": 0.015598, + "loss_gen": 4.5896477699279785, + "loss_rtd": 0.375474750995636, + "loss_sent": 0.1698981076478958, + "loss_sod": 0.11692309379577637, + "loss_total": 0.662295937538147, + "step": 53799 + }, + { + "epoch": 0.015598, + "loss_gen": 3.8224892616271973, + "loss_rtd": 0.37994661927223206, + "loss_sent": 0.0004034222802147269, + "loss_sod": 0.294214129447937, + "loss_total": 0.6745641231536865, + "step": 53799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.1310073137283325, + "learning_rate": 7.931244823399147e-05, + "loss": 0.6466, + "step": 53800 + }, + { + "epoch": 0.015798, + "loss_gen": 3.5543196201324463, + "loss_rtd": 0.35554906725883484, + "loss_sent": 0.020370911806821823, + "loss_sod": 0.13921767473220825, + "loss_total": 0.5151376724243164, + "step": 53899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.5510029792785645, + "loss_rtd": 0.38497135043144226, + "loss_sent": 0.2585177719593048, + "loss_sod": 0.026156924664974213, + "loss_total": 0.6696460247039795, + "step": 53899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.1140100955963135, + "learning_rate": 7.928673423692148e-05, + "loss": 0.6351, + "step": 53900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.142115592956543, + "loss_rtd": 0.3558996617794037, + "loss_sent": 0.1105511337518692, + "loss_sod": 0.06997261941432953, + "loss_total": 0.53642338514328, + "step": 53999 + }, + { + "epoch": 0.015998, + "loss_gen": 4.631337642669678, + "loss_rtd": 0.38000327348709106, + "loss_sent": 0.2932110130786896, + "loss_sod": 0.028590209782123566, + "loss_total": 0.701804518699646, + "step": 53999 + }, + { + "epoch": 0.016, + "grad_norm": 0.8721669316291809, + "learning_rate": 7.926100844315844e-05, + "loss": 0.6569, + "step": 54000 + }, + { + "epoch": 0.016, + "eval_loss": 0.6154483556747437, + "eval_runtime": 151.2289, + "eval_samples_per_second": 102.117, + "eval_steps_per_second": 0.8, + "step": 54000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.182412147521973, + "loss_rtd": 0.3678227365016937, + "loss_sent": 0.10248752683401108, + "loss_sod": 0.04535181447863579, + "loss_total": 0.5156620740890503, + "step": 54099 + }, + { + "epoch": 0.016198, + "loss_gen": 3.6269350051879883, + "loss_rtd": 0.36269044876098633, + "loss_sent": 0.0004897566395811737, + "loss_sod": 0.22491425275802612, + "loss_total": 0.5880944728851318, + "step": 54099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.8176355361938477, + "learning_rate": 7.923527086306472e-05, + "loss": 0.6424, + "step": 54100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.458475112915039, + "loss_rtd": 0.3739497661590576, + "loss_sent": 0.2738564610481262, + "loss_sod": 0.015809426084160805, + "loss_total": 0.6636156439781189, + "step": 54199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.506946086883545, + "loss_rtd": 0.36992794275283813, + "loss_sent": 0.21589812636375427, + "loss_sod": 0.029434029012918472, + "loss_total": 0.615260124206543, + "step": 54199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.8911906480789185, + "learning_rate": 7.920952150700738e-05, + "loss": 0.6457, + "step": 54200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.334949493408203, + "loss_rtd": 0.3890224099159241, + "loss_sent": 0.11779635399580002, + "loss_sod": 0.06975976377725601, + "loss_total": 0.5765784978866577, + "step": 54299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.676621437072754, + "loss_rtd": 0.3606879711151123, + "loss_sent": 0.21355892717838287, + "loss_sod": 0.03849518671631813, + "loss_total": 0.6127420663833618, + "step": 54299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.5208903551101685, + "learning_rate": 7.918376038535827e-05, + "loss": 0.6406, + "step": 54300 + }, + { + "epoch": 0.016798, + "loss_gen": 4.254660606384277, + "loss_rtd": 0.3675227463245392, + "loss_sent": 0.1284371018409729, + "loss_sod": 0.011969479732215405, + "loss_total": 0.5079293251037598, + "step": 54399 + }, + { + "epoch": 0.016798, + "loss_gen": 4.763401508331299, + "loss_rtd": 0.37080642580986023, + "loss_sent": 0.2208104133605957, + "loss_sod": 0.04189425706863403, + "loss_total": 0.6335110664367676, + "step": 54399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.5267845392227173, + "learning_rate": 7.915798750849396e-05, + "loss": 0.6357, + "step": 54400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.387042045593262, + "loss_rtd": 0.3640294373035431, + "loss_sent": 0.46577194333076477, + "loss_sod": 0.0832526832818985, + "loss_total": 0.9130541086196899, + "step": 54499 + }, + { + "epoch": 0.016998, + "loss_gen": 4.913600921630859, + "loss_rtd": 0.3470486104488373, + "loss_sent": 0.1457686871290207, + "loss_sod": 0.02201038785278797, + "loss_total": 0.5148276686668396, + "step": 54499 + }, + { + "epoch": 0.017, + "grad_norm": 1.0791354179382324, + "learning_rate": 7.913220288679577e-05, + "loss": 0.6379, + "step": 54500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.393093109130859, + "loss_rtd": 0.3685963749885559, + "loss_sent": 0.07685188949108124, + "loss_sod": 0.008272483013570309, + "loss_total": 0.45372074842453003, + "step": 54599 + }, + { + "epoch": 0.017198, + "loss_gen": 4.638741970062256, + "loss_rtd": 0.37237581610679626, + "loss_sent": 0.08207528293132782, + "loss_sod": 0.018077945336699486, + "loss_total": 0.4725290536880493, + "step": 54599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.7968005537986755, + "learning_rate": 7.910640653064974e-05, + "loss": 0.6465, + "step": 54600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.1174421310424805, + "loss_rtd": 0.3648335039615631, + "loss_sent": 9.457358828512952e-05, + "loss_sod": 0.29401132464408875, + "loss_total": 0.6589394211769104, + "step": 54699 + }, + { + "epoch": 0.017398, + "loss_gen": 3.519338607788086, + "loss_rtd": 0.36666354537010193, + "loss_sent": 4.878496110904962e-05, + "loss_sod": 0.35522928833961487, + "loss_total": 0.7219415903091431, + "step": 54699 + }, + { + "epoch": 0.0174, + "grad_norm": 2.1006596088409424, + "learning_rate": 7.908059845044665e-05, + "loss": 0.63, + "step": 54700 + }, + { + "epoch": 0.017598, + "loss_gen": 3.734240770339966, + "loss_rtd": 0.3810313045978546, + "loss_sent": 0.00022152572637423873, + "loss_sod": 0.2045402228832245, + "loss_total": 0.5857930779457092, + "step": 54799 + }, + { + "epoch": 0.017598, + "loss_gen": 4.664052963256836, + "loss_rtd": 0.3665785789489746, + "loss_sent": 0.38680845499038696, + "loss_sod": 0.046340540051460266, + "loss_total": 0.7997275590896606, + "step": 54799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.4892998933792114, + "learning_rate": 7.905477865658197e-05, + "loss": 0.6366, + "step": 54800 + }, + { + "epoch": 0.017798, + "loss_gen": 4.481685638427734, + "loss_rtd": 0.3766678273677826, + "loss_sent": 0.03785740211606026, + "loss_sod": 0.19254451990127563, + "loss_total": 0.607069730758667, + "step": 54899 + }, + { + "epoch": 0.017798, + "loss_gen": 3.6889336109161377, + "loss_rtd": 0.3576793372631073, + "loss_sent": 0.03508095443248749, + "loss_sod": 0.24248912930488586, + "loss_total": 0.6352494359016418, + "step": 54899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.9492436647415161, + "learning_rate": 7.902894715945593e-05, + "loss": 0.6338, + "step": 54900 + }, + { + "epoch": 0.017998, + "loss_gen": 4.717471599578857, + "loss_rtd": 0.37920039892196655, + "loss_sent": 0.12316140532493591, + "loss_sod": 0.058044034987688065, + "loss_total": 0.5604058504104614, + "step": 54999 + }, + { + "epoch": 0.017998, + "loss_gen": 4.401419162750244, + "loss_rtd": 0.37455177307128906, + "loss_sent": 0.30304399132728577, + "loss_sod": 0.052812084555625916, + "loss_total": 0.7304078340530396, + "step": 54999 + }, + { + "epoch": 0.018, + "grad_norm": 1.7150174379348755, + "learning_rate": 7.900310396947344e-05, + "loss": 0.6365, + "step": 55000 + }, + { + "epoch": 0.018, + "eval_loss": 0.6184331774711609, + "eval_runtime": 151.0815, + "eval_samples_per_second": 102.216, + "eval_steps_per_second": 0.801, + "step": 55000 + }, + { + "epoch": 0.018198, + "loss_gen": 3.9587795734405518, + "loss_rtd": 0.3572849631309509, + "loss_sent": 0.000930636830162257, + "loss_sod": 0.18941888213157654, + "loss_total": 0.5476344227790833, + "step": 55099 + }, + { + "epoch": 0.018198, + "loss_gen": 4.033356189727783, + "loss_rtd": 0.3687863349914551, + "loss_sent": 0.007204096298664808, + "loss_sod": 0.22562572360038757, + "loss_total": 0.6016162037849426, + "step": 55099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.3715091943740845, + "learning_rate": 7.897724909704417e-05, + "loss": 0.6453, + "step": 55100 + }, + { + "epoch": 0.018398, + "loss_gen": 4.626516342163086, + "loss_rtd": 0.38258397579193115, + "loss_sent": 0.1983485072851181, + "loss_sod": 0.17736437916755676, + "loss_total": 0.7582968473434448, + "step": 55199 + }, + { + "epoch": 0.018398, + "loss_gen": 4.2777252197265625, + "loss_rtd": 0.3886966109275818, + "loss_sent": 0.24301214516162872, + "loss_sod": 0.025996133685112, + "loss_total": 0.6577048897743225, + "step": 55199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.7724279165267944, + "learning_rate": 7.895138255258238e-05, + "loss": 0.6507, + "step": 55200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.166768550872803, + "loss_rtd": 0.3680054843425751, + "loss_sent": 0.04852308705449104, + "loss_sod": 0.14644241333007812, + "loss_total": 0.5629709959030151, + "step": 55299 + }, + { + "epoch": 0.018598, + "loss_gen": 4.600881576538086, + "loss_rtd": 0.3678843080997467, + "loss_sent": 0.3402821719646454, + "loss_sod": 0.06393858790397644, + "loss_total": 0.7721050977706909, + "step": 55299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.0595279932022095, + "learning_rate": 7.892550434650718e-05, + "loss": 0.6396, + "step": 55300 + }, + { + "epoch": 0.018798, + "loss_gen": 3.6643149852752686, + "loss_rtd": 0.36610931158065796, + "loss_sent": 0.10220718383789062, + "loss_sod": 0.17650368809700012, + "loss_total": 0.6448202133178711, + "step": 55399 + }, + { + "epoch": 0.018798, + "loss_gen": 4.971153259277344, + "loss_rtd": 0.36842429637908936, + "loss_sent": 0.1988043189048767, + "loss_sod": 0.08470700681209564, + "loss_total": 0.6519356369972229, + "step": 55399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.5318913459777832, + "learning_rate": 7.88996144892423e-05, + "loss": 0.6186, + "step": 55400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.143435001373291, + "loss_rtd": 0.3610016703605652, + "loss_sent": 0.12808094918727875, + "loss_sod": 0.11209128797054291, + "loss_total": 0.6011739373207092, + "step": 55499 + }, + { + "epoch": 0.018998, + "loss_gen": 4.701988220214844, + "loss_rtd": 0.39345940947532654, + "loss_sent": 0.22166474163532257, + "loss_sod": 0.06643322855234146, + "loss_total": 0.6815573573112488, + "step": 55499 + }, + { + "epoch": 0.019, + "grad_norm": 1.1434299945831299, + "learning_rate": 7.887371299121616e-05, + "loss": 0.6239, + "step": 55500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.342228412628174, + "loss_rtd": 0.37801849842071533, + "loss_sent": 0.19338516891002655, + "loss_sod": 0.014940367080271244, + "loss_total": 0.5863440632820129, + "step": 55599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.5737223625183105, + "loss_rtd": 0.35960954427719116, + "loss_sent": 0.4485333263874054, + "loss_sod": 0.0636458471417427, + "loss_total": 0.871788740158081, + "step": 55599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.7248344421386719, + "learning_rate": 7.884779986286186e-05, + "loss": 0.6235, + "step": 55600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.677097797393799, + "loss_rtd": 0.36578962206840515, + "loss_sent": 0.2834576368331909, + "loss_sod": 0.12220462411642075, + "loss_total": 0.7714518904685974, + "step": 55699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.656655788421631, + "loss_rtd": 0.3725653886795044, + "loss_sent": 0.28585928678512573, + "loss_sod": 0.021390490233898163, + "loss_total": 0.6798151731491089, + "step": 55699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.4569215774536133, + "learning_rate": 7.882187511461723e-05, + "loss": 0.6348, + "step": 55700 + }, + { + "epoch": 0.019598, + "loss_gen": 3.6917800903320312, + "loss_rtd": 0.3651793599128723, + "loss_sent": 0.0032284788321703672, + "loss_sod": 0.10444990545511246, + "loss_total": 0.4728577435016632, + "step": 55799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.053613662719727, + "loss_rtd": 0.3636980652809143, + "loss_sent": 0.13399262726306915, + "loss_sod": 0.02854270115494728, + "loss_total": 0.5262333750724792, + "step": 55799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.8296945691108704, + "learning_rate": 7.879593875692476e-05, + "loss": 0.6373, + "step": 55800 + }, + { + "epoch": 0.019798, + "loss_gen": 4.553261756896973, + "loss_rtd": 0.3637148439884186, + "loss_sent": 0.09166393429040909, + "loss_sod": 0.09912621229887009, + "loss_total": 0.5545049905776978, + "step": 55899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.008322238922119, + "loss_rtd": 0.3714514374732971, + "loss_sent": 0.24092429876327515, + "loss_sod": 0.12934193015098572, + "loss_total": 0.7417176365852356, + "step": 55899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.8420190811157227, + "learning_rate": 7.876999080023159e-05, + "loss": 0.631, + "step": 55900 + }, + { + "epoch": 0.019998, + "loss_gen": 4.874432563781738, + "loss_rtd": 0.38109180331230164, + "loss_sent": 0.12003308534622192, + "loss_sod": 0.10766370594501495, + "loss_total": 0.6087886095046997, + "step": 55999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.570915699005127, + "loss_rtd": 0.36038196086883545, + "loss_sent": 0.20860238373279572, + "loss_sod": 0.03632885217666626, + "loss_total": 0.6053131818771362, + "step": 55999 + }, + { + "epoch": 0.02, + "grad_norm": 1.397096037864685, + "learning_rate": 7.874403125498958e-05, + "loss": 0.6225, + "step": 56000 + }, + { + "epoch": 0.02, + "eval_loss": 0.6109273433685303, + "eval_runtime": 151.4077, + "eval_samples_per_second": 101.996, + "eval_steps_per_second": 0.799, + "step": 56000 + }, + { + "epoch": 0.020198, + "loss_gen": 3.914543867111206, + "loss_rtd": 0.3707915246486664, + "loss_sent": 0.004192584194242954, + "loss_sod": 0.21228839457035065, + "loss_total": 0.5872725248336792, + "step": 56099 + }, + { + "epoch": 0.020198, + "loss_gen": 3.5228183269500732, + "loss_rtd": 0.34751108288764954, + "loss_sent": 0.002179651753976941, + "loss_sod": 0.249548077583313, + "loss_total": 0.5992387533187866, + "step": 56099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.8863370418548584, + "learning_rate": 7.871806013165522e-05, + "loss": 0.6343, + "step": 56100 + }, + { + "epoch": 0.020398, + "loss_gen": 3.588907480239868, + "loss_rtd": 0.36278581619262695, + "loss_sent": 0.005457947961986065, + "loss_sod": 0.2697388529777527, + "loss_total": 0.6379826068878174, + "step": 56199 + }, + { + "epoch": 0.020398, + "loss_gen": 4.755928039550781, + "loss_rtd": 0.35757341980934143, + "loss_sent": 0.06363532692193985, + "loss_sod": 0.08482104539871216, + "loss_total": 0.5060297846794128, + "step": 56199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.069838523864746, + "learning_rate": 7.869207744068966e-05, + "loss": 0.6402, + "step": 56200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.537457466125488, + "loss_rtd": 0.36283206939697266, + "loss_sent": 0.173272967338562, + "loss_sod": 0.06157602369785309, + "loss_total": 0.5976810455322266, + "step": 56299 + }, + { + "epoch": 0.020598, + "loss_gen": 4.561739921569824, + "loss_rtd": 0.3555087149143219, + "loss_sent": 0.05402013659477234, + "loss_sod": 0.06651479005813599, + "loss_total": 0.4760436415672302, + "step": 56299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.89546799659729, + "learning_rate": 7.866608319255875e-05, + "loss": 0.6262, + "step": 56300 + }, + { + "epoch": 0.020798, + "loss_gen": 4.26029109954834, + "loss_rtd": 0.35518962144851685, + "loss_sent": 0.22561852633953094, + "loss_sod": 0.017394546419382095, + "loss_total": 0.5982027053833008, + "step": 56399 + }, + { + "epoch": 0.020798, + "loss_gen": 4.655737400054932, + "loss_rtd": 0.3703760802745819, + "loss_sent": 0.07087548822164536, + "loss_sod": 0.02960325963795185, + "loss_total": 0.47085484862327576, + "step": 56399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.39506196975708, + "learning_rate": 7.864007739773295e-05, + "loss": 0.629, + "step": 56400 + }, + { + "epoch": 0.020998, + "loss_gen": 3.8033533096313477, + "loss_rtd": 0.3684219717979431, + "loss_sent": 7.553644536528736e-05, + "loss_sod": 0.24581684172153473, + "loss_total": 0.6143143177032471, + "step": 56499 + }, + { + "epoch": 0.020998, + "loss_gen": 3.8116445541381836, + "loss_rtd": 0.35867562890052795, + "loss_sent": 0.07484734803438187, + "loss_sod": 0.12182188779115677, + "loss_total": 0.5553448796272278, + "step": 56499 + }, + { + "epoch": 0.021, + "grad_norm": 1.1752865314483643, + "learning_rate": 7.861406006668739e-05, + "loss": 0.6416, + "step": 56500 + }, + { + "epoch": 0.021198, + "loss_gen": 3.7026193141937256, + "loss_rtd": 0.35439664125442505, + "loss_sent": 0.048916835337877274, + "loss_sod": 0.3023568093776703, + "loss_total": 0.7056702375411987, + "step": 56599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.53428840637207, + "loss_rtd": 0.3755502700805664, + "loss_sent": 0.17246706783771515, + "loss_sod": 0.008769787847995758, + "loss_total": 0.5567871332168579, + "step": 56599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.2310662269592285, + "learning_rate": 7.858803120990188e-05, + "loss": 0.6205, + "step": 56600 + }, + { + "epoch": 0.021398, + "loss_gen": 4.742209434509277, + "loss_rtd": 0.37581825256347656, + "loss_sent": 0.2694573700428009, + "loss_sod": 0.10139670968055725, + "loss_total": 0.7466723322868347, + "step": 56699 + }, + { + "epoch": 0.021398, + "loss_gen": 3.6188018321990967, + "loss_rtd": 0.35414817929267883, + "loss_sent": 0.01109377946704626, + "loss_sod": 0.12507717311382294, + "loss_total": 0.4903191030025482, + "step": 56699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.840541660785675, + "learning_rate": 7.85619908378608e-05, + "loss": 0.6472, + "step": 56700 + }, + { + "epoch": 0.021598, + "loss_gen": 4.646273136138916, + "loss_rtd": 0.3598632216453552, + "loss_sent": 0.6560623645782471, + "loss_sod": 0.014731225557625294, + "loss_total": 1.0306568145751953, + "step": 56799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.867937088012695, + "loss_rtd": 0.3471471965312958, + "loss_sent": 0.4271682798862457, + "loss_sod": 0.081779345870018, + "loss_total": 0.8560948371887207, + "step": 56799 + }, + { + "epoch": 0.0216, + "grad_norm": 4.364806652069092, + "learning_rate": 7.853593896105323e-05, + "loss": 0.6382, + "step": 56800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.655259132385254, + "loss_rtd": 0.3678421676158905, + "loss_sent": 0.08329112082719803, + "loss_sod": 0.03640985116362572, + "loss_total": 0.48754313588142395, + "step": 56899 + }, + { + "epoch": 0.021798, + "loss_gen": 3.8237650394439697, + "loss_rtd": 0.368927925825119, + "loss_sent": 0.0001940029178513214, + "loss_sod": 0.2146022617816925, + "loss_total": 0.5837242007255554, + "step": 56899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.9214321374893188, + "learning_rate": 7.850987558997287e-05, + "loss": 0.6355, + "step": 56900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.45928955078125, + "loss_rtd": 0.3921184837818146, + "loss_sent": 0.2075405865907669, + "loss_sod": 0.02730563096702099, + "loss_total": 0.6269646883010864, + "step": 56999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.576908588409424, + "loss_rtd": 0.3429858386516571, + "loss_sent": 0.20230966806411743, + "loss_sod": 0.014626307412981987, + "loss_total": 0.5599218010902405, + "step": 56999 + }, + { + "epoch": 0.022, + "grad_norm": 0.9983424544334412, + "learning_rate": 7.848380073511802e-05, + "loss": 0.6291, + "step": 57000 + }, + { + "epoch": 0.022, + "eval_loss": 0.6036836504936218, + "eval_runtime": 151.1538, + "eval_samples_per_second": 102.167, + "eval_steps_per_second": 0.801, + "step": 57000 + }, + { + "epoch": 0.022198, + "loss_gen": 4.728472709655762, + "loss_rtd": 0.34702467918395996, + "loss_sent": 0.1587972193956375, + "loss_sod": 0.0453263595700264, + "loss_total": 0.5511482357978821, + "step": 57099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.708425521850586, + "loss_rtd": 0.33947470784187317, + "loss_sent": 0.11597699671983719, + "loss_sod": 0.1268012523651123, + "loss_total": 0.5822529792785645, + "step": 57099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.8465123772621155, + "learning_rate": 7.845771440699164e-05, + "loss": 0.6327, + "step": 57100 + }, + { + "epoch": 0.022398, + "loss_gen": 3.7659003734588623, + "loss_rtd": 0.3444095551967621, + "loss_sent": 0.10276321321725845, + "loss_sod": 0.07046366482973099, + "loss_total": 0.5176364183425903, + "step": 57199 + }, + { + "epoch": 0.022398, + "loss_gen": 4.462311744689941, + "loss_rtd": 0.3790939748287201, + "loss_sent": 0.10643347352743149, + "loss_sod": 0.08552703261375427, + "loss_total": 0.5710544586181641, + "step": 57199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.8902172446250916, + "learning_rate": 7.84316166161013e-05, + "loss": 0.6375, + "step": 57200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.8482232093811035, + "loss_rtd": 0.36452606320381165, + "loss_sent": 0.216790571808815, + "loss_sod": 0.06441571563482285, + "loss_total": 0.6457323431968689, + "step": 57299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.491093158721924, + "loss_rtd": 0.36763903498649597, + "loss_sent": 0.3399943709373474, + "loss_sod": 0.08514704555273056, + "loss_total": 0.7927804589271545, + "step": 57299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.0277565717697144, + "learning_rate": 7.840550737295919e-05, + "loss": 0.6327, + "step": 57300 + }, + { + "epoch": 0.022798, + "loss_gen": 4.362370491027832, + "loss_rtd": 0.3496209681034088, + "loss_sent": 0.15771304070949554, + "loss_sod": 0.024754945188760757, + "loss_total": 0.5320889353752136, + "step": 57399 + }, + { + "epoch": 0.022798, + "loss_gen": 4.630321502685547, + "loss_rtd": 0.37803417444229126, + "loss_sent": 0.24064858257770538, + "loss_sod": 0.06453673541545868, + "loss_total": 0.6832194924354553, + "step": 57399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.3916471004486084, + "learning_rate": 7.837938668808211e-05, + "loss": 0.624, + "step": 57400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.793696880340576, + "loss_rtd": 0.3645938038825989, + "loss_sent": 0.31480395793914795, + "loss_sod": 0.06923617422580719, + "loss_total": 0.7486339211463928, + "step": 57499 + }, + { + "epoch": 0.022998, + "loss_gen": 4.454331398010254, + "loss_rtd": 0.35629791021347046, + "loss_sent": 0.12270854413509369, + "loss_sod": 0.01729443669319153, + "loss_total": 0.4963008761405945, + "step": 57499 + }, + { + "epoch": 0.023, + "grad_norm": 0.9017834067344666, + "learning_rate": 7.835325457199146e-05, + "loss": 0.6221, + "step": 57500 + }, + { + "epoch": 0.023198, + "loss_gen": 4.476206302642822, + "loss_rtd": 0.3627890646457672, + "loss_sent": 0.18494051694869995, + "loss_sod": 0.050898827612400055, + "loss_total": 0.5986284017562866, + "step": 57599 + }, + { + "epoch": 0.023198, + "loss_gen": 4.25529670715332, + "loss_rtd": 0.36866647005081177, + "loss_sent": 0.4510160982608795, + "loss_sod": 0.09126748889684677, + "loss_total": 0.9109500646591187, + "step": 57599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.077824592590332, + "learning_rate": 7.832711103521326e-05, + "loss": 0.6272, + "step": 57600 + }, + { + "epoch": 0.023398, + "loss_gen": 4.585660934448242, + "loss_rtd": 0.3672980070114136, + "loss_sent": 0.20580396056175232, + "loss_sod": 0.022966112941503525, + "loss_total": 0.5960680842399597, + "step": 57699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.617955207824707, + "loss_rtd": 0.3689972162246704, + "loss_sent": 0.27105477452278137, + "loss_sod": 0.007499829400330782, + "loss_total": 0.6475518345832825, + "step": 57699 + }, + { + "epoch": 0.0234, + "grad_norm": 2.2827484607696533, + "learning_rate": 7.830095608827813e-05, + "loss": 0.641, + "step": 57700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.636845111846924, + "loss_rtd": 0.3683159053325653, + "loss_sent": 0.22516517341136932, + "loss_sod": 0.09461245685815811, + "loss_total": 0.6880935430526733, + "step": 57799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.369560718536377, + "loss_rtd": 0.35372239351272583, + "loss_sent": 0.23294277489185333, + "loss_sod": 0.017427930608391762, + "loss_total": 0.604093074798584, + "step": 57799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.006242275238037, + "learning_rate": 7.827478974172127e-05, + "loss": 0.6255, + "step": 57800 + }, + { + "epoch": 0.023798, + "loss_gen": 4.858678340911865, + "loss_rtd": 0.3479278087615967, + "loss_sent": 0.32929107546806335, + "loss_sod": 0.05031493306159973, + "loss_total": 0.7275338172912598, + "step": 57899 + }, + { + "epoch": 0.023798, + "loss_gen": 4.78029727935791, + "loss_rtd": 0.3700944483280182, + "loss_sent": 0.17167387902736664, + "loss_sod": 0.007030355744063854, + "loss_total": 0.548798680305481, + "step": 57899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.0834317207336426, + "learning_rate": 7.824861200608248e-05, + "loss": 0.6438, + "step": 57900 + }, + { + "epoch": 0.023998, + "loss_gen": 4.542178153991699, + "loss_rtd": 0.3664270043373108, + "loss_sent": 0.10557856410741806, + "loss_sod": 0.02531070075929165, + "loss_total": 0.49731627106666565, + "step": 57999 + }, + { + "epoch": 0.023998, + "loss_gen": 4.939555644989014, + "loss_rtd": 0.36791694164276123, + "loss_sent": 0.1434306651353836, + "loss_sod": 0.15800848603248596, + "loss_total": 0.6693560481071472, + "step": 57999 + }, + { + "epoch": 0.024, + "grad_norm": 0.8415298461914062, + "learning_rate": 7.822242289190616e-05, + "loss": 0.6129, + "step": 58000 + }, + { + "epoch": 0.024, + "eval_loss": 0.6122521162033081, + "eval_runtime": 151.2846, + "eval_samples_per_second": 102.079, + "eval_steps_per_second": 0.8, + "step": 58000 + }, + { + "epoch": 0.024198, + "loss_gen": 3.856839895248413, + "loss_rtd": 0.35103926062583923, + "loss_sent": 7.347911014221609e-05, + "loss_sod": 0.18446674942970276, + "loss_total": 0.53557950258255, + "step": 58099 + }, + { + "epoch": 0.024198, + "loss_gen": 4.428738594055176, + "loss_rtd": 0.37424057722091675, + "loss_sent": 0.24140650033950806, + "loss_sod": 0.058345548808574677, + "loss_total": 0.6739926338195801, + "step": 58099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.5646039247512817, + "learning_rate": 7.819622240974125e-05, + "loss": 0.6211, + "step": 58100 + }, + { + "epoch": 0.024398, + "loss_gen": 4.742285251617432, + "loss_rtd": 0.37152335047721863, + "loss_sent": 0.25615110993385315, + "loss_sod": 0.033854153007268906, + "loss_total": 0.6615285873413086, + "step": 58199 + }, + { + "epoch": 0.024398, + "loss_gen": 4.426832675933838, + "loss_rtd": 0.3590032756328583, + "loss_sent": 0.16081035137176514, + "loss_sod": 0.0034500528126955032, + "loss_total": 0.523263692855835, + "step": 58199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.7670844197273254, + "learning_rate": 7.817001057014135e-05, + "loss": 0.6157, + "step": 58200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.695468425750732, + "loss_rtd": 0.36778417229652405, + "loss_sent": 0.16970743238925934, + "loss_sod": 0.09776318073272705, + "loss_total": 0.6352548003196716, + "step": 58299 + }, + { + "epoch": 0.024598, + "loss_gen": 4.1878275871276855, + "loss_rtd": 0.3674927353858948, + "loss_sent": 0.0856841579079628, + "loss_sod": 0.15574391186237335, + "loss_total": 0.6089208126068115, + "step": 58299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.017652153968811, + "learning_rate": 7.814378738366456e-05, + "loss": 0.6372, + "step": 58300 + }, + { + "epoch": 0.024798, + "loss_gen": 4.5369181632995605, + "loss_rtd": 0.3620288372039795, + "loss_sent": 0.23065121471881866, + "loss_sod": 0.03505735471844673, + "loss_total": 0.6277374029159546, + "step": 58399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.5468316078186035, + "loss_rtd": 0.37129703164100647, + "loss_sent": 0.13053631782531738, + "loss_sod": 0.042834796011447906, + "loss_total": 0.5446681380271912, + "step": 58399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.0757381916046143, + "learning_rate": 7.811755286087356e-05, + "loss": 0.6286, + "step": 58400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.764802932739258, + "loss_rtd": 0.3671434819698334, + "loss_sent": 0.23353952169418335, + "loss_sod": 0.043985750526189804, + "loss_total": 0.6446687579154968, + "step": 58499 + }, + { + "epoch": 0.024998, + "loss_gen": 4.488729476928711, + "loss_rtd": 0.3664853870868683, + "loss_sent": 0.043898455798625946, + "loss_sod": 0.17564155161380768, + "loss_total": 0.5860254168510437, + "step": 58499 + }, + { + "epoch": 0.025, + "grad_norm": 1.4052540063858032, + "learning_rate": 7.809130701233565e-05, + "loss": 0.6303, + "step": 58500 + }, + { + "epoch": 0.025198, + "loss_gen": 4.284855365753174, + "loss_rtd": 0.35478779673576355, + "loss_sent": 0.011805405840277672, + "loss_sod": 0.12428386509418488, + "loss_total": 0.49087709188461304, + "step": 58599 + }, + { + "epoch": 0.025198, + "loss_gen": 4.3681640625, + "loss_rtd": 0.36329731345176697, + "loss_sent": 0.31201988458633423, + "loss_sod": 0.09334906190633774, + "loss_total": 0.7686662673950195, + "step": 58599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.0215644836425781, + "learning_rate": 7.806504984862263e-05, + "loss": 0.6371, + "step": 58600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.680645942687988, + "loss_rtd": 0.3676464259624481, + "loss_sent": 0.21945630013942719, + "loss_sod": 0.05643542483448982, + "loss_total": 0.6435381770133972, + "step": 58699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.2610979080200195, + "loss_rtd": 0.3559699058532715, + "loss_sent": 0.09809956699609756, + "loss_sod": 0.09417600929737091, + "loss_total": 0.5482454895973206, + "step": 58699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.2106742858886719, + "learning_rate": 7.803878138031088e-05, + "loss": 0.6292, + "step": 58700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.729255199432373, + "loss_rtd": 0.3560835123062134, + "loss_sent": 0.14522771537303925, + "loss_sod": 0.005209613591432571, + "loss_total": 0.5065208673477173, + "step": 58799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.687342166900635, + "loss_rtd": 0.37340936064720154, + "loss_sent": 0.505208432674408, + "loss_sod": 0.01523881871253252, + "loss_total": 0.8938566446304321, + "step": 58799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.73228120803833, + "learning_rate": 7.801250161798135e-05, + "loss": 0.6207, + "step": 58800 + }, + { + "epoch": 0.025798, + "loss_gen": 4.603366851806641, + "loss_rtd": 0.36248043179512024, + "loss_sent": 0.2825396656990051, + "loss_sod": 0.046890988945961, + "loss_total": 0.6919111013412476, + "step": 58899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.493285179138184, + "loss_rtd": 0.3563370108604431, + "loss_sent": 0.3288453221321106, + "loss_sod": 0.07662333548069, + "loss_total": 0.7618056535720825, + "step": 58899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.0256446599960327, + "learning_rate": 7.798621057221951e-05, + "loss": 0.6249, + "step": 58900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.566259384155273, + "loss_rtd": 0.36538976430892944, + "loss_sent": 0.29362866282463074, + "loss_sod": 0.05600784346461296, + "loss_total": 0.7150262594223022, + "step": 58999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.62779426574707, + "loss_rtd": 0.3808223009109497, + "loss_sent": 0.11313124746084213, + "loss_sod": 0.047821350395679474, + "loss_total": 0.5417748689651489, + "step": 58999 + }, + { + "epoch": 0.026, + "grad_norm": 0.9359037280082703, + "learning_rate": 7.79599082536154e-05, + "loss": 0.6375, + "step": 59000 + }, + { + "epoch": 0.026, + "eval_loss": 0.6099367737770081, + "eval_runtime": 151.1862, + "eval_samples_per_second": 102.146, + "eval_steps_per_second": 0.8, + "step": 59000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.435936450958252, + "loss_rtd": 0.3585285246372223, + "loss_sent": 0.17895811796188354, + "loss_sod": 0.05199865251779556, + "loss_total": 0.5894852876663208, + "step": 59099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.619828224182129, + "loss_rtd": 0.3510432243347168, + "loss_sent": 0.2641523480415344, + "loss_sod": 0.2132890522480011, + "loss_total": 0.8284845948219299, + "step": 59099 + }, + { + "epoch": 0.0262, + "grad_norm": 3.9642646312713623, + "learning_rate": 7.79335946727636e-05, + "loss": 0.6246, + "step": 59100 + }, + { + "epoch": 0.026398, + "loss_gen": 4.73977518081665, + "loss_rtd": 0.3586340844631195, + "loss_sent": 0.2495546191930771, + "loss_sod": 0.13427351415157318, + "loss_total": 0.7424622178077698, + "step": 59199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.060482025146484, + "loss_rtd": 0.3705763816833496, + "loss_sent": 0.15017586946487427, + "loss_sod": 0.14380647242069244, + "loss_total": 0.6645587086677551, + "step": 59199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.0570859909057617, + "learning_rate": 7.79072698402632e-05, + "loss": 0.6263, + "step": 59200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.457797527313232, + "loss_rtd": 0.35689282417297363, + "loss_sent": 0.12019678950309753, + "loss_sod": 0.14792628586292267, + "loss_total": 0.6250158548355103, + "step": 59299 + }, + { + "epoch": 0.026598, + "loss_gen": 4.3164262771606445, + "loss_rtd": 0.35765039920806885, + "loss_sent": 0.25349053740501404, + "loss_sod": 0.09530863910913467, + "loss_total": 0.706449568271637, + "step": 59299 + }, + { + "epoch": 0.0266, + "grad_norm": 2.9818403720855713, + "learning_rate": 7.788093376671783e-05, + "loss": 0.6255, + "step": 59300 + }, + { + "epoch": 0.026798, + "loss_gen": 3.90621018409729, + "loss_rtd": 0.3626609146595001, + "loss_sent": 0.015791015699505806, + "loss_sod": 0.23246951401233673, + "loss_total": 0.6109214425086975, + "step": 59399 + }, + { + "epoch": 0.026798, + "loss_gen": 4.519261360168457, + "loss_rtd": 0.3742714822292328, + "loss_sent": 0.145407572388649, + "loss_sod": 0.05256856605410576, + "loss_total": 0.5722476243972778, + "step": 59399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.2181992530822754, + "learning_rate": 7.785458646273569e-05, + "loss": 0.6208, + "step": 59400 + }, + { + "epoch": 0.026998, + "loss_gen": 4.5629377365112305, + "loss_rtd": 0.36785122752189636, + "loss_sent": 0.21749372780323029, + "loss_sod": 0.0407673604786396, + "loss_total": 0.6261123418807983, + "step": 59499 + }, + { + "epoch": 0.026998, + "loss_gen": 4.774982452392578, + "loss_rtd": 0.3666840195655823, + "loss_sent": 0.05598453804850578, + "loss_sod": 0.04872807487845421, + "loss_total": 0.47139662504196167, + "step": 59499 + }, + { + "epoch": 0.027, + "grad_norm": 0.6574075222015381, + "learning_rate": 7.782822793892945e-05, + "loss": 0.6235, + "step": 59500 + }, + { + "epoch": 0.027198, + "loss_gen": 4.934159755706787, + "loss_rtd": 0.35682305693626404, + "loss_sent": 0.14898377656936646, + "loss_sod": 0.07212354242801666, + "loss_total": 0.5779303908348083, + "step": 59599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.615574836730957, + "loss_rtd": 0.355925977230072, + "loss_sent": 0.2697262763977051, + "loss_sod": 0.057983674108982086, + "loss_total": 0.683635950088501, + "step": 59599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.7389049530029297, + "learning_rate": 7.780185820591632e-05, + "loss": 0.6298, + "step": 59600 + }, + { + "epoch": 0.027398, + "loss_gen": 4.8682146072387695, + "loss_rtd": 0.38151177763938904, + "loss_sent": 0.20392993092536926, + "loss_sod": 0.07457822561264038, + "loss_total": 0.6600199341773987, + "step": 59699 + }, + { + "epoch": 0.027398, + "loss_gen": 4.836795806884766, + "loss_rtd": 0.3634702265262604, + "loss_sent": 0.10432275384664536, + "loss_sod": 0.05495322495698929, + "loss_total": 0.522746205329895, + "step": 59699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.6321349740028381, + "learning_rate": 7.777547727431804e-05, + "loss": 0.6298, + "step": 59700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.784069061279297, + "loss_rtd": 0.3644939363002777, + "loss_sent": 0.16687586903572083, + "loss_sod": 0.05650252476334572, + "loss_total": 0.587872326374054, + "step": 59799 + }, + { + "epoch": 0.027598, + "loss_gen": 4.443631172180176, + "loss_rtd": 0.3605464994907379, + "loss_sent": 0.3157513737678528, + "loss_sod": 0.009228970855474472, + "loss_total": 0.6855268478393555, + "step": 59799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.874180793762207, + "learning_rate": 7.774908515476082e-05, + "loss": 0.6379, + "step": 59800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.536407947540283, + "loss_rtd": 0.36572104692459106, + "loss_sent": 0.3959828317165375, + "loss_sod": 0.07241517305374146, + "loss_total": 0.8341190814971924, + "step": 59899 + }, + { + "epoch": 0.027798, + "loss_gen": 4.399185657501221, + "loss_rtd": 0.3434516191482544, + "loss_sent": 0.011317362077534199, + "loss_sod": 0.10456545650959015, + "loss_total": 0.45933443307876587, + "step": 59899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.2676671743392944, + "learning_rate": 7.772268185787543e-05, + "loss": 0.6232, + "step": 59900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.64289665222168, + "loss_rtd": 0.36277514696121216, + "loss_sent": 0.333320289850235, + "loss_sod": 0.050307150930166245, + "loss_total": 0.7464026212692261, + "step": 59999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.631900787353516, + "loss_rtd": 0.3668534457683563, + "loss_sent": 0.2586413025856018, + "loss_sod": 0.016919543966650963, + "loss_total": 0.6424143314361572, + "step": 59999 + }, + { + "epoch": 0.028, + "grad_norm": 2.2221970558166504, + "learning_rate": 7.76962673942971e-05, + "loss": 0.6286, + "step": 60000 + }, + { + "epoch": 0.028, + "eval_loss": 0.6064568758010864, + "eval_runtime": 151.3435, + "eval_samples_per_second": 102.039, + "eval_steps_per_second": 0.8, + "step": 60000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.419715881347656, + "loss_rtd": 0.3511432707309723, + "loss_sent": 0.05077521502971649, + "loss_sod": 0.0357656329870224, + "loss_total": 0.4376841187477112, + "step": 60099 + }, + { + "epoch": 0.028198, + "loss_gen": 4.582746982574463, + "loss_rtd": 0.37409496307373047, + "loss_sent": 0.204301118850708, + "loss_sod": 0.00987776555120945, + "loss_total": 0.5882738828659058, + "step": 60099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.7696623206138611, + "learning_rate": 7.766984177466559e-05, + "loss": 0.6248, + "step": 60100 + }, + { + "epoch": 0.028398, + "loss_gen": 4.308511257171631, + "loss_rtd": 0.344106525182724, + "loss_sent": 0.05806731805205345, + "loss_sod": 0.07151489704847336, + "loss_total": 0.4736887514591217, + "step": 60199 + }, + { + "epoch": 0.028398, + "loss_gen": 4.259953022003174, + "loss_rtd": 0.3588264286518097, + "loss_sent": 0.15039996802806854, + "loss_sod": 0.1075981855392456, + "loss_total": 0.6168245673179626, + "step": 60199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.1939679384231567, + "learning_rate": 7.764340500962511e-05, + "loss": 0.6322, + "step": 60200 + }, + { + "epoch": 0.028598, + "loss_gen": 4.882040500640869, + "loss_rtd": 0.36036819219589233, + "loss_sent": 0.23449385166168213, + "loss_sod": 0.05917336791753769, + "loss_total": 0.6540354490280151, + "step": 60299 + }, + { + "epoch": 0.028598, + "loss_gen": 4.768974304199219, + "loss_rtd": 0.3655626177787781, + "loss_sent": 0.056659433990716934, + "loss_sod": 0.11761739104986191, + "loss_total": 0.5398394465446472, + "step": 60299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.6359830498695374, + "learning_rate": 7.761695710982439e-05, + "loss": 0.6314, + "step": 60300 + }, + { + "epoch": 0.028798, + "loss_gen": 4.6344313621521, + "loss_rtd": 0.3701755106449127, + "loss_sent": 0.14321716129779816, + "loss_sod": 0.0878496766090393, + "loss_total": 0.6012423634529114, + "step": 60399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.201408386230469, + "loss_rtd": 0.36076819896698, + "loss_sent": 0.2366686463356018, + "loss_sod": 0.028163466602563858, + "loss_total": 0.6256003379821777, + "step": 60399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.2316277027130127, + "learning_rate": 7.759049808591665e-05, + "loss": 0.6298, + "step": 60400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.660600185394287, + "loss_rtd": 0.36998021602630615, + "loss_sent": 0.1629168838262558, + "loss_sod": 0.050505802035331726, + "loss_total": 0.5834029316902161, + "step": 60499 + }, + { + "epoch": 0.028998, + "loss_gen": 4.8611159324646, + "loss_rtd": 0.36074256896972656, + "loss_sent": 0.2991497814655304, + "loss_sod": 0.07882288098335266, + "loss_total": 0.7387152314186096, + "step": 60499 + }, + { + "epoch": 0.029, + "grad_norm": 0.8937557935714722, + "learning_rate": 7.75640279485596e-05, + "loss": 0.6143, + "step": 60500 + }, + { + "epoch": 0.029198, + "loss_gen": 4.770934104919434, + "loss_rtd": 0.3545058071613312, + "loss_sent": 0.1732805073261261, + "loss_sod": 0.12196668982505798, + "loss_total": 0.6497529745101929, + "step": 60599 + }, + { + "epoch": 0.029198, + "loss_gen": 4.689424991607666, + "loss_rtd": 0.37039607763290405, + "loss_sent": 0.051986388862133026, + "loss_sod": 0.08831780403852463, + "loss_total": 0.5107002854347229, + "step": 60599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.8047595024108887, + "learning_rate": 7.753754670841535e-05, + "loss": 0.6325, + "step": 60600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.782720565795898, + "loss_rtd": 0.360862672328949, + "loss_sent": 0.13212287425994873, + "loss_sod": 0.08254893124103546, + "loss_total": 0.575534462928772, + "step": 60699 + }, + { + "epoch": 0.029398, + "loss_gen": 3.8673269748687744, + "loss_rtd": 0.35040780901908875, + "loss_sent": 0.015188871882855892, + "loss_sod": 0.08386120200157166, + "loss_total": 0.44945788383483887, + "step": 60699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.2101595401763916, + "learning_rate": 7.751105437615062e-05, + "loss": 0.631, + "step": 60700 + }, + { + "epoch": 0.029598, + "loss_gen": 4.655178070068359, + "loss_rtd": 0.3434184491634369, + "loss_sent": 0.13284368813037872, + "loss_sod": 0.17460592091083527, + "loss_total": 0.6508680582046509, + "step": 60799 + }, + { + "epoch": 0.029598, + "loss_gen": 4.486229419708252, + "loss_rtd": 0.3898225724697113, + "loss_sent": 0.3177969455718994, + "loss_sod": 0.06404908001422882, + "loss_total": 0.7716686129570007, + "step": 60799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.3122541904449463, + "learning_rate": 7.748455096243645e-05, + "loss": 0.6281, + "step": 60800 + }, + { + "epoch": 0.029798, + "loss_gen": 4.25994348526001, + "loss_rtd": 0.36359405517578125, + "loss_sent": 0.371652215719223, + "loss_sod": 0.03927462920546532, + "loss_total": 0.7745208740234375, + "step": 60899 + }, + { + "epoch": 0.029798, + "loss_gen": 4.894041061401367, + "loss_rtd": 0.360346257686615, + "loss_sent": 0.132888525724411, + "loss_sod": 0.06288320571184158, + "loss_total": 0.5561180114746094, + "step": 60899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.4946010112762451, + "learning_rate": 7.745803647794845e-05, + "loss": 0.6318, + "step": 60900 + }, + { + "epoch": 0.029998, + "loss_gen": 4.927550315856934, + "loss_rtd": 0.3632243275642395, + "loss_sent": 0.2353924959897995, + "loss_sod": 0.0985734835267067, + "loss_total": 0.6971902847290039, + "step": 60999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.230792999267578, + "loss_rtd": 0.3565824627876282, + "loss_sent": 0.10895363241434097, + "loss_sod": 0.28270161151885986, + "loss_total": 0.7482377290725708, + "step": 60999 + }, + { + "epoch": 0.03, + "grad_norm": 1.2049927711486816, + "learning_rate": 7.743151093336664e-05, + "loss": 0.6274, + "step": 61000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5990291237831116, + "eval_runtime": 151.1472, + "eval_samples_per_second": 102.172, + "eval_steps_per_second": 0.801, + "step": 61000 + }, + { + "epoch": 0.030198, + "loss_gen": 4.337743759155273, + "loss_rtd": 0.35195744037628174, + "loss_sent": 0.005699401255697012, + "loss_sod": 0.12330635637044907, + "loss_total": 0.48096320033073425, + "step": 61099 + }, + { + "epoch": 0.030198, + "loss_gen": 3.908996105194092, + "loss_rtd": 0.3597874641418457, + "loss_sent": 0.056060317903757095, + "loss_sod": 0.11038318276405334, + "loss_total": 0.5262309908866882, + "step": 61099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.8100944757461548, + "learning_rate": 7.74049743393755e-05, + "loss": 0.6284, + "step": 61100 + }, + { + "epoch": 0.030398, + "loss_gen": 4.535096645355225, + "loss_rtd": 0.359036922454834, + "loss_sent": 0.25981956720352173, + "loss_sod": 0.023562589660286903, + "loss_total": 0.6424190998077393, + "step": 61199 + }, + { + "epoch": 0.030398, + "loss_gen": 4.725579738616943, + "loss_rtd": 0.3722909688949585, + "loss_sent": 0.10179479420185089, + "loss_sod": 0.06360937654972076, + "loss_total": 0.5376951098442078, + "step": 61199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.8150643706321716, + "learning_rate": 7.7378426706664e-05, + "loss": 0.616, + "step": 61200 + }, + { + "epoch": 0.030598, + "loss_gen": 4.490610599517822, + "loss_rtd": 0.35507073998451233, + "loss_sent": 0.25802454352378845, + "loss_sod": 0.005174014251679182, + "loss_total": 0.6182693243026733, + "step": 61299 + }, + { + "epoch": 0.030598, + "loss_gen": 4.592755317687988, + "loss_rtd": 0.3682295083999634, + "loss_sent": 0.16518935561180115, + "loss_sod": 0.017999812960624695, + "loss_total": 0.551418662071228, + "step": 61299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.8812500238418579, + "learning_rate": 7.735186804592546e-05, + "loss": 0.6163, + "step": 61300 + }, + { + "epoch": 0.030798, + "loss_gen": 4.962682247161865, + "loss_rtd": 0.3762686848640442, + "loss_sent": 0.19632530212402344, + "loss_sod": 0.1151101291179657, + "loss_total": 0.6877040863037109, + "step": 61399 + }, + { + "epoch": 0.030798, + "loss_gen": 4.625997066497803, + "loss_rtd": 0.3505260944366455, + "loss_sent": 0.23315012454986572, + "loss_sod": 0.09739446640014648, + "loss_total": 0.6810706853866577, + "step": 61399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.640834093093872, + "learning_rate": 7.732529836785777e-05, + "loss": 0.6237, + "step": 61400 + }, + { + "epoch": 0.030998, + "loss_gen": 3.8268961906433105, + "loss_rtd": 0.3546771705150604, + "loss_sent": 0.0011191426310688257, + "loss_sod": 0.30724337697029114, + "loss_total": 0.6630396842956543, + "step": 61499 + }, + { + "epoch": 0.030998, + "loss_gen": 3.3745572566986084, + "loss_rtd": 0.3258892595767975, + "loss_sent": 0.0037176625337451696, + "loss_sod": 0.15428975224494934, + "loss_total": 0.4838966727256775, + "step": 61499 + }, + { + "epoch": 0.031, + "grad_norm": 1.0252587795257568, + "learning_rate": 7.729871768316315e-05, + "loss": 0.6217, + "step": 61500 + }, + { + "epoch": 0.031198, + "loss_gen": 4.474167823791504, + "loss_rtd": 0.35145077109336853, + "loss_sent": 0.27659541368484497, + "loss_sod": 0.037010371685028076, + "loss_total": 0.665056586265564, + "step": 61599 + }, + { + "epoch": 0.031198, + "loss_gen": 4.455756664276123, + "loss_rtd": 0.37024450302124023, + "loss_sent": 0.1990279108285904, + "loss_sod": 0.04633166640996933, + "loss_total": 0.6156041026115417, + "step": 61599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.8076386451721191, + "learning_rate": 7.727212600254832e-05, + "loss": 0.6372, + "step": 61600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.811530113220215, + "loss_rtd": 0.3651669919490814, + "loss_sent": 0.04905230551958084, + "loss_sod": 0.01257830485701561, + "loss_total": 0.4267975986003876, + "step": 61699 + }, + { + "epoch": 0.031398, + "loss_gen": 4.341268539428711, + "loss_rtd": 0.36992210149765015, + "loss_sent": 0.15841986238956451, + "loss_sod": 0.16322530806064606, + "loss_total": 0.6915672421455383, + "step": 61699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.2156025171279907, + "learning_rate": 7.724552333672439e-05, + "loss": 0.6139, + "step": 61700 + }, + { + "epoch": 0.031598, + "loss_gen": 4.670220851898193, + "loss_rtd": 0.3613542914390564, + "loss_sent": 0.09052611887454987, + "loss_sod": 0.13286900520324707, + "loss_total": 0.5847494006156921, + "step": 61799 + }, + { + "epoch": 0.031598, + "loss_gen": 4.732316970825195, + "loss_rtd": 0.3727900981903076, + "loss_sent": 0.2103252112865448, + "loss_sod": 0.005296154413372278, + "loss_total": 0.5884114503860474, + "step": 61799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.2746527194976807, + "learning_rate": 7.721890969640693e-05, + "loss": 0.6172, + "step": 61800 + }, + { + "epoch": 0.031798, + "loss_gen": 4.899127006530762, + "loss_rtd": 0.3722045123577118, + "loss_sent": 0.05811246484518051, + "loss_sod": 0.018387533724308014, + "loss_total": 0.4487045109272003, + "step": 61899 + }, + { + "epoch": 0.031798, + "loss_gen": 3.7219502925872803, + "loss_rtd": 0.346815288066864, + "loss_sent": 0.0038633050862699747, + "loss_sod": 0.20953437685966492, + "loss_total": 0.5602129697799683, + "step": 61899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.3523370027542114, + "learning_rate": 7.719228509231589e-05, + "loss": 0.6351, + "step": 61900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.3909592628479, + "loss_rtd": 0.3624614477157593, + "loss_sent": 0.4772406816482544, + "loss_sod": 0.07452499866485596, + "loss_total": 0.9142271280288696, + "step": 61999 + }, + { + "epoch": 0.031998, + "loss_gen": 4.466653823852539, + "loss_rtd": 0.3784707188606262, + "loss_sent": 0.2602320909500122, + "loss_sod": 0.03414306789636612, + "loss_total": 0.6728458404541016, + "step": 61999 + }, + { + "epoch": 0.032, + "grad_norm": 2.9149506092071533, + "learning_rate": 7.716564953517567e-05, + "loss": 0.6262, + "step": 62000 + }, + { + "epoch": 0.032, + "eval_loss": 0.5999614596366882, + "eval_runtime": 152.9369, + "eval_samples_per_second": 100.976, + "eval_steps_per_second": 0.791, + "step": 62000 + }, + { + "epoch": 0.032198, + "loss_gen": 3.7183635234832764, + "loss_rtd": 0.35874229669570923, + "loss_sent": 0.05439189448952675, + "loss_sod": 0.05234738066792488, + "loss_total": 0.46548157930374146, + "step": 62099 + }, + { + "epoch": 0.032198, + "loss_gen": 4.890871047973633, + "loss_rtd": 0.3613589406013489, + "loss_sent": 0.029185831546783447, + "loss_sod": 0.16612508893013, + "loss_total": 0.5566698908805847, + "step": 62099 + }, + { + "epoch": 0.0322, + "grad_norm": 0.6717313528060913, + "learning_rate": 7.713900303571505e-05, + "loss": 0.6359, + "step": 62100 + }, + { + "epoch": 0.032398, + "loss_gen": 3.771449565887451, + "loss_rtd": 0.35726627707481384, + "loss_sent": 4.493523738346994e-05, + "loss_sod": 0.16263878345489502, + "loss_total": 0.5199500322341919, + "step": 62199 + }, + { + "epoch": 0.032398, + "loss_gen": 5.023044586181641, + "loss_rtd": 0.35243162512779236, + "loss_sent": 0.3986659646034241, + "loss_sod": 0.21003739535808563, + "loss_total": 0.9611349701881409, + "step": 62199 + }, + { + "epoch": 0.0324, + "grad_norm": 1.1961450576782227, + "learning_rate": 7.711234560466727e-05, + "loss": 0.6203, + "step": 62200 + }, + { + "epoch": 0.032598, + "loss_gen": 3.678945541381836, + "loss_rtd": 0.35275405645370483, + "loss_sent": 0.0027436709497123957, + "loss_sod": 0.16715922951698303, + "loss_total": 0.5226569771766663, + "step": 62299 + }, + { + "epoch": 0.032598, + "loss_gen": 5.011557579040527, + "loss_rtd": 0.3473483920097351, + "loss_sent": 0.1250336468219757, + "loss_sod": 0.05513975769281387, + "loss_total": 0.5275217890739441, + "step": 62299 + }, + { + "epoch": 0.0326, + "grad_norm": 1.1040332317352295, + "learning_rate": 7.708567725276992e-05, + "loss": 0.6153, + "step": 62300 + }, + { + "epoch": 0.032798, + "loss_gen": 4.394141674041748, + "loss_rtd": 0.3745782971382141, + "loss_sent": 0.23874713480472565, + "loss_sod": 0.06496084481477737, + "loss_total": 0.6782862544059753, + "step": 62399 + }, + { + "epoch": 0.032798, + "loss_gen": 4.451708793640137, + "loss_rtd": 0.35331302881240845, + "loss_sent": 0.14257824420928955, + "loss_sod": 0.009480955079197884, + "loss_total": 0.5053722262382507, + "step": 62399 + }, + { + "epoch": 0.0328, + "grad_norm": 1.6813021898269653, + "learning_rate": 7.705899799076501e-05, + "loss": 0.6318, + "step": 62400 + }, + { + "epoch": 0.032998, + "loss_gen": 3.518160343170166, + "loss_rtd": 0.34264475107192993, + "loss_sent": 6.070993185858242e-05, + "loss_sod": 0.20465955138206482, + "loss_total": 0.5473650097846985, + "step": 62499 + }, + { + "epoch": 0.032998, + "loss_gen": 4.318394660949707, + "loss_rtd": 0.36519360542297363, + "loss_sent": 0.1040429025888443, + "loss_sod": 0.11054766178131104, + "loss_total": 0.5797841548919678, + "step": 62499 + }, + { + "epoch": 0.033, + "grad_norm": 1.4231469631195068, + "learning_rate": 7.703230782939894e-05, + "loss": 0.6213, + "step": 62500 + }, + { + "epoch": 0.033198, + "loss_gen": 4.303335666656494, + "loss_rtd": 0.36913084983825684, + "loss_sent": 0.041935522109270096, + "loss_sod": 0.039499327540397644, + "loss_total": 0.45056572556495667, + "step": 62599 + }, + { + "epoch": 0.033198, + "loss_gen": 3.836183786392212, + "loss_rtd": 0.35064151883125305, + "loss_sent": 0.032056838274002075, + "loss_sod": 0.06515227258205414, + "loss_total": 0.44785061478614807, + "step": 62599 + }, + { + "epoch": 0.0332, + "grad_norm": 0.8263905048370361, + "learning_rate": 7.700560677942252e-05, + "loss": 0.6232, + "step": 62600 + }, + { + "epoch": 0.033398, + "loss_gen": 4.548916339874268, + "loss_rtd": 0.35985687375068665, + "loss_sent": 0.17908038198947906, + "loss_sod": 0.04548419639468193, + "loss_total": 0.5844214558601379, + "step": 62699 + }, + { + "epoch": 0.033398, + "loss_gen": 4.711582183837891, + "loss_rtd": 0.36192235350608826, + "loss_sent": 0.05882194638252258, + "loss_sod": 0.0021437264513224363, + "loss_total": 0.42288804054260254, + "step": 62699 + }, + { + "epoch": 0.0334, + "grad_norm": 0.6532236337661743, + "learning_rate": 7.697889485159092e-05, + "loss": 0.6264, + "step": 62700 + }, + { + "epoch": 0.033598, + "loss_gen": 4.946453094482422, + "loss_rtd": 0.3616791367530823, + "loss_sent": 0.18877077102661133, + "loss_sod": 0.1229364424943924, + "loss_total": 0.6733863353729248, + "step": 62799 + }, + { + "epoch": 0.033598, + "loss_gen": 4.773370742797852, + "loss_rtd": 0.3507744371891022, + "loss_sent": 0.18613027036190033, + "loss_sod": 0.06355984508991241, + "loss_total": 0.6004645824432373, + "step": 62799 + }, + { + "epoch": 0.0336, + "grad_norm": 3.045973300933838, + "learning_rate": 7.69521720566637e-05, + "loss": 0.627, + "step": 62800 + }, + { + "epoch": 0.033798, + "loss_gen": 3.7490742206573486, + "loss_rtd": 0.3479219675064087, + "loss_sent": 0.03341430798172951, + "loss_sod": 0.3173055052757263, + "loss_total": 0.6986417770385742, + "step": 62899 + }, + { + "epoch": 0.033798, + "loss_gen": 4.756951808929443, + "loss_rtd": 0.36681756377220154, + "loss_sent": 0.17297106981277466, + "loss_sod": 0.08782077580690384, + "loss_total": 0.627609372138977, + "step": 62899 + }, + { + "epoch": 0.0338, + "grad_norm": 1.7359693050384521, + "learning_rate": 7.692543840540478e-05, + "loss": 0.6237, + "step": 62900 + }, + { + "epoch": 0.033998, + "loss_gen": 4.279255390167236, + "loss_rtd": 0.36131471395492554, + "loss_sent": 0.009380928240716457, + "loss_sod": 0.1650504171848297, + "loss_total": 0.5357460975646973, + "step": 62999 + }, + { + "epoch": 0.033998, + "loss_gen": 4.05281400680542, + "loss_rtd": 0.3557135760784149, + "loss_sent": 0.016067415475845337, + "loss_sod": 0.10457506030797958, + "loss_total": 0.47635605931282043, + "step": 62999 + }, + { + "epoch": 0.034, + "grad_norm": 0.7685233950614929, + "learning_rate": 7.689869390858251e-05, + "loss": 0.6241, + "step": 63000 + }, + { + "epoch": 0.034, + "eval_loss": 0.6013754606246948, + "eval_runtime": 151.1274, + "eval_samples_per_second": 102.185, + "eval_steps_per_second": 0.801, + "step": 63000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.0773468017578125, + "loss_rtd": 0.3411266505718231, + "loss_sent": 0.02308918535709381, + "loss_sod": 0.09176616370677948, + "loss_total": 0.455981969833374, + "step": 63099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.9239935874938965, + "loss_rtd": 0.34385135769844055, + "loss_sent": 0.056067075580358505, + "loss_sod": 0.14729167520999908, + "loss_total": 0.5472100973129272, + "step": 63099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.7968565225601196, + "learning_rate": 7.687193857696954e-05, + "loss": 0.6061, + "step": 63100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.347064018249512, + "loss_rtd": 0.3503779470920563, + "loss_sent": 0.0765359178185463, + "loss_sod": 0.029360976070165634, + "loss_total": 0.4562748372554779, + "step": 63199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.632389545440674, + "loss_rtd": 0.35120922327041626, + "loss_sent": 0.1494688093662262, + "loss_sod": 0.017021406441926956, + "loss_total": 0.5176994800567627, + "step": 63199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.7096732258796692, + "learning_rate": 7.684517242134292e-05, + "loss": 0.6253, + "step": 63200 + }, + { + "epoch": 0.000598, + "loss_gen": 4.462292671203613, + "loss_rtd": 0.37799072265625, + "loss_sent": 0.230128213763237, + "loss_sod": 0.06469544023275375, + "loss_total": 0.6728143692016602, + "step": 63299 + }, + { + "epoch": 0.000598, + "loss_gen": 4.9514946937561035, + "loss_rtd": 0.351814329624176, + "loss_sent": 0.2730381488800049, + "loss_sod": 0.04776401072740555, + "loss_total": 0.6726164817810059, + "step": 63299 + }, + { + "epoch": 0.0006, + "grad_norm": 2.0112061500549316, + "learning_rate": 7.681839545248408e-05, + "loss": 0.6235, + "step": 63300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.555823802947998, + "loss_rtd": 0.3706224858760834, + "loss_sent": 0.09607721120119095, + "loss_sod": 0.0519188717007637, + "loss_total": 0.5186185836791992, + "step": 63399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.698625564575195, + "loss_rtd": 0.37373045086860657, + "loss_sent": 0.08867265284061432, + "loss_sod": 0.07897308468818665, + "loss_total": 0.5413761734962463, + "step": 63399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.7768045663833618, + "learning_rate": 7.679160768117875e-05, + "loss": 0.6233, + "step": 63400 + }, + { + "epoch": 0.000998, + "loss_gen": 3.7820255756378174, + "loss_rtd": 0.36425188183784485, + "loss_sent": 0.030947675928473473, + "loss_sod": 0.07450937479734421, + "loss_total": 0.46970894932746887, + "step": 63499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.292600154876709, + "loss_rtd": 0.3501809239387512, + "loss_sent": 0.12530605494976044, + "loss_sod": 0.07304050773382187, + "loss_total": 0.5485274791717529, + "step": 63499 + }, + { + "epoch": 0.001, + "grad_norm": 0.7421267628669739, + "learning_rate": 7.676480911821705e-05, + "loss": 0.6161, + "step": 63500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.7239179611206055, + "loss_rtd": 0.3634018301963806, + "loss_sent": 0.44304990768432617, + "loss_sod": 0.11866919696331024, + "loss_total": 0.9251209497451782, + "step": 63599 + }, + { + "epoch": 0.001198, + "loss_gen": 4.428805828094482, + "loss_rtd": 0.3359716236591339, + "loss_sent": 0.5018662810325623, + "loss_sod": 0.07080628722906113, + "loss_total": 0.9086441993713379, + "step": 63599 + }, + { + "epoch": 0.0012, + "grad_norm": 2.007418394088745, + "learning_rate": 7.673799977439342e-05, + "loss": 0.6302, + "step": 63600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.5692524909973145, + "loss_rtd": 0.3591201603412628, + "loss_sent": 0.16385026276111603, + "loss_sod": 0.13043148815631866, + "loss_total": 0.6534019112586975, + "step": 63699 + }, + { + "epoch": 0.001398, + "loss_gen": 4.602705955505371, + "loss_rtd": 0.3531390130519867, + "loss_sent": 0.65873783826828, + "loss_sod": 0.0991470068693161, + "loss_total": 1.1110239028930664, + "step": 63699 + }, + { + "epoch": 0.0014, + "grad_norm": 3.1703555583953857, + "learning_rate": 7.671117966050669e-05, + "loss": 0.6213, + "step": 63700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.764593601226807, + "loss_rtd": 0.3667794466018677, + "loss_sent": 0.12115845829248428, + "loss_sod": 0.002873638179153204, + "loss_total": 0.4908115267753601, + "step": 63799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.480521202087402, + "loss_rtd": 0.349360853433609, + "loss_sent": 0.187759131193161, + "loss_sod": 0.05906372889876366, + "loss_total": 0.596183717250824, + "step": 63799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.0501720905303955, + "learning_rate": 7.668434878736e-05, + "loss": 0.6061, + "step": 63800 + }, + { + "epoch": 0.001798, + "loss_gen": 3.776412010192871, + "loss_rtd": 0.35311639308929443, + "loss_sent": 9.88018509815447e-05, + "loss_sod": 0.17744015157222748, + "loss_total": 0.5306553840637207, + "step": 63899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.243601322174072, + "loss_rtd": 0.3713495135307312, + "loss_sent": 0.12143199145793915, + "loss_sod": 0.06371863931417465, + "loss_total": 0.5565001368522644, + "step": 63899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.7900858521461487, + "learning_rate": 7.665750716576079e-05, + "loss": 0.628, + "step": 63900 + }, + { + "epoch": 0.001998, + "loss_gen": 4.197118759155273, + "loss_rtd": 0.3397866189479828, + "loss_sent": 0.1800970882177353, + "loss_sod": 0.05564101040363312, + "loss_total": 0.5755247473716736, + "step": 63999 + }, + { + "epoch": 0.001998, + "loss_gen": 4.659060478210449, + "loss_rtd": 0.36526429653167725, + "loss_sent": 0.20626680552959442, + "loss_sod": 0.043784722685813904, + "loss_total": 0.615315854549408, + "step": 63999 + }, + { + "epoch": 0.002, + "grad_norm": 1.2787045240402222, + "learning_rate": 7.66306548065209e-05, + "loss": 0.6301, + "step": 64000 + }, + { + "epoch": 0.002, + "eval_loss": 0.5960562229156494, + "eval_runtime": 154.0597, + "eval_samples_per_second": 100.24, + "eval_steps_per_second": 0.785, + "step": 64000 + }, + { + "epoch": 0.002198, + "loss_gen": 4.280486583709717, + "loss_rtd": 0.3794573247432709, + "loss_sent": 0.2574203610420227, + "loss_sod": 0.009277268312871456, + "loss_total": 0.6461549401283264, + "step": 64099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.851013660430908, + "loss_rtd": 0.356160968542099, + "loss_sent": 0.33749884366989136, + "loss_sod": 0.16863031685352325, + "loss_total": 0.8622901439666748, + "step": 64099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.2959072589874268, + "learning_rate": 7.660379172045642e-05, + "loss": 0.6318, + "step": 64100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.21964168548584, + "loss_rtd": 0.3674074709415436, + "loss_sent": 0.001419869135133922, + "loss_sod": 0.19931061565876007, + "loss_total": 0.5681379437446594, + "step": 64199 + }, + { + "epoch": 0.002398, + "loss_gen": 3.719330072402954, + "loss_rtd": 0.3511691093444824, + "loss_sent": 3.7132998841116205e-05, + "loss_sod": 0.19852951169013977, + "loss_total": 0.5497357249259949, + "step": 64199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.7420927882194519, + "learning_rate": 7.657691791838783e-05, + "loss": 0.6208, + "step": 64200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.600236415863037, + "loss_rtd": 0.34968018531799316, + "loss_sent": 0.24622157216072083, + "loss_sod": 0.04593350738286972, + "loss_total": 0.6418352723121643, + "step": 64299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.608397006988525, + "loss_rtd": 0.35003018379211426, + "loss_sent": 0.29476937651634216, + "loss_sod": 0.04235994815826416, + "loss_total": 0.687159538269043, + "step": 64299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.8296270370483398, + "learning_rate": 7.655003341113987e-05, + "loss": 0.6072, + "step": 64300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.674376487731934, + "loss_rtd": 0.35226747393608093, + "loss_sent": 0.1678522676229477, + "loss_sod": 0.06918580830097198, + "loss_total": 0.589305579662323, + "step": 64399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.135908603668213, + "loss_rtd": 0.3713374137878418, + "loss_sent": 0.06879798322916031, + "loss_sod": 0.18518736958503723, + "loss_total": 0.6253227591514587, + "step": 64399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.7070022821426392, + "learning_rate": 7.652313820954163e-05, + "loss": 0.6276, + "step": 64400 + }, + { + "epoch": 0.002998, + "loss_gen": 4.676963806152344, + "loss_rtd": 0.3604086637496948, + "loss_sent": 0.2059246301651001, + "loss_sod": 0.04678652435541153, + "loss_total": 0.6131198406219482, + "step": 64499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.29622220993042, + "loss_rtd": 0.37331321835517883, + "loss_sent": 0.20497316122055054, + "loss_sod": 0.015880735591053963, + "loss_total": 0.5941671133041382, + "step": 64499 + }, + { + "epoch": 0.003, + "grad_norm": 0.8023879528045654, + "learning_rate": 7.649623232442651e-05, + "loss": 0.6182, + "step": 64500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.412691593170166, + "loss_rtd": 0.37671002745628357, + "loss_sent": 0.10626865923404694, + "loss_sod": 0.011743715032935143, + "loss_total": 0.4947224259376526, + "step": 64599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.241797924041748, + "loss_rtd": 0.3560134172439575, + "loss_sent": 0.22045773267745972, + "loss_sod": 0.012969817966222763, + "loss_total": 0.5894409418106079, + "step": 64599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9714013934135437, + "learning_rate": 7.646931576663214e-05, + "loss": 0.6075, + "step": 64600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.545714378356934, + "loss_rtd": 0.3601056933403015, + "loss_sent": 0.3741488456726074, + "loss_sod": 0.010922128334641457, + "loss_total": 0.7451766729354858, + "step": 64699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.675589561462402, + "loss_rtd": 0.35947301983833313, + "loss_sent": 0.34950560331344604, + "loss_sod": 0.0069966865703463554, + "loss_total": 0.715975284576416, + "step": 64699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.301599144935608, + "learning_rate": 7.644238854700059e-05, + "loss": 0.6364, + "step": 64700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.894478797912598, + "loss_rtd": 0.3421265780925751, + "loss_sent": 0.271994948387146, + "loss_sod": 0.025406386703252792, + "loss_total": 0.6395279169082642, + "step": 64799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.560560703277588, + "loss_rtd": 0.3420966863632202, + "loss_sent": 0.23488765954971313, + "loss_sod": 0.08738589286804199, + "loss_total": 0.6643702387809753, + "step": 64799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.433797836303711, + "learning_rate": 7.641545067637806e-05, + "loss": 0.6162, + "step": 64800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.559416770935059, + "loss_rtd": 0.34533098340034485, + "loss_sent": 0.09186774492263794, + "loss_sod": 0.030342355370521545, + "loss_total": 0.4675410985946655, + "step": 64899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.012601852416992, + "loss_rtd": 0.36505126953125, + "loss_sent": 0.2887916564941406, + "loss_sod": 0.05366521328687668, + "loss_total": 0.7075081467628479, + "step": 64899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.9611746072769165, + "learning_rate": 7.638850216561518e-05, + "loss": 0.6303, + "step": 64900 + }, + { + "epoch": 0.003998, + "loss_gen": 4.474420070648193, + "loss_rtd": 0.3590322732925415, + "loss_sent": 0.11335202306509018, + "loss_sod": 0.018347179517149925, + "loss_total": 0.49073147773742676, + "step": 64999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.450010776519775, + "loss_rtd": 0.3534477949142456, + "loss_sent": 0.08659390360116959, + "loss_sod": 0.05938224121928215, + "loss_total": 0.49942392110824585, + "step": 64999 + }, + { + "epoch": 0.004, + "grad_norm": 0.5334826111793518, + "learning_rate": 7.636154302556676e-05, + "loss": 0.6099, + "step": 65000 + }, + { + "epoch": 0.004, + "eval_loss": 0.5954232811927795, + "eval_runtime": 151.2911, + "eval_samples_per_second": 102.075, + "eval_steps_per_second": 0.8, + "step": 65000 + }, + { + "epoch": 0.004198, + "loss_gen": 3.7600913047790527, + "loss_rtd": 0.35678860545158386, + "loss_sent": 0.00042318625492043793, + "loss_sod": 0.28913211822509766, + "loss_total": 0.6463439464569092, + "step": 65099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.273679733276367, + "loss_rtd": 0.3608115017414093, + "loss_sent": 0.07257717847824097, + "loss_sod": 0.1048695370554924, + "loss_total": 0.5382581949234009, + "step": 65099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.8813515305519104, + "learning_rate": 7.633457326709198e-05, + "loss": 0.6271, + "step": 65100 + }, + { + "epoch": 0.004398, + "loss_gen": 3.7220046520233154, + "loss_rtd": 0.3242909014225006, + "loss_sent": 0.0042475066147744656, + "loss_sod": 0.3289870619773865, + "loss_total": 0.6575254797935486, + "step": 65199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.362858295440674, + "loss_rtd": 0.33517253398895264, + "loss_sent": 0.1916651576757431, + "loss_sod": 0.04640581086277962, + "loss_total": 0.5732434988021851, + "step": 65199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.1218812465667725, + "learning_rate": 7.630759290105422e-05, + "loss": 0.6086, + "step": 65200 + }, + { + "epoch": 0.004598, + "loss_gen": 4.771598815917969, + "loss_rtd": 0.365702748298645, + "loss_sent": 0.22391684353351593, + "loss_sod": 0.1459675431251526, + "loss_total": 0.7355871200561523, + "step": 65299 + }, + { + "epoch": 0.004598, + "loss_gen": 4.376883506774902, + "loss_rtd": 0.35041317343711853, + "loss_sent": 0.14419321715831757, + "loss_sod": 0.03334677591919899, + "loss_total": 0.5279531478881836, + "step": 65299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.9339886903762817, + "learning_rate": 7.62806019383212e-05, + "loss": 0.6084, + "step": 65300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.824034214019775, + "loss_rtd": 0.3548614978790283, + "loss_sent": 0.19835662841796875, + "loss_sod": 0.02828032895922661, + "loss_total": 0.5814984440803528, + "step": 65399 + }, + { + "epoch": 0.004798, + "loss_gen": 4.8600592613220215, + "loss_rtd": 0.35373997688293457, + "loss_sent": 0.05740976706147194, + "loss_sod": 0.2584027945995331, + "loss_total": 0.6695525646209717, + "step": 65399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.6910472512245178, + "learning_rate": 7.625360038976486e-05, + "loss": 0.6054, + "step": 65400 + }, + { + "epoch": 0.004998, + "loss_gen": 3.788112163543701, + "loss_rtd": 0.3388946056365967, + "loss_sent": 0.000608363188803196, + "loss_sod": 0.2595179080963135, + "loss_total": 0.5990208983421326, + "step": 65499 + }, + { + "epoch": 0.004998, + "loss_gen": 3.8532238006591797, + "loss_rtd": 0.32595545053482056, + "loss_sent": 0.09683480858802795, + "loss_sod": 0.011593611910939217, + "loss_total": 0.4343838691711426, + "step": 65499 + }, + { + "epoch": 0.005, + "grad_norm": 1.0176689624786377, + "learning_rate": 7.622658826626144e-05, + "loss": 0.6187, + "step": 65500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.517738342285156, + "loss_rtd": 0.3676239252090454, + "loss_sent": 0.05257377773523331, + "loss_sod": 0.10262086987495422, + "loss_total": 0.5228185653686523, + "step": 65599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.969003677368164, + "loss_rtd": 0.3514210283756256, + "loss_sent": 0.264767050743103, + "loss_sod": 0.03280862420797348, + "loss_total": 0.6489967107772827, + "step": 65599 + }, + { + "epoch": 0.0052, + "grad_norm": 2.6527786254882812, + "learning_rate": 7.619956557869136e-05, + "loss": 0.6206, + "step": 65600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.2397847175598145, + "loss_rtd": 0.3516516089439392, + "loss_sent": 8.085014269454405e-05, + "loss_sod": 0.2125997245311737, + "loss_total": 0.564332127571106, + "step": 65699 + }, + { + "epoch": 0.005398, + "loss_gen": 3.795886278152466, + "loss_rtd": 0.3487846553325653, + "loss_sent": 0.00012464348401408643, + "loss_sod": 0.19986745715141296, + "loss_total": 0.5487767457962036, + "step": 65699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.0295031070709229, + "learning_rate": 7.617253233793944e-05, + "loss": 0.6183, + "step": 65700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.007923126220703, + "loss_rtd": 0.36578112840652466, + "loss_sent": 0.0788542702794075, + "loss_sod": 0.06385549157857895, + "loss_total": 0.5084909200668335, + "step": 65799 + }, + { + "epoch": 0.005598, + "loss_gen": 4.588578701019287, + "loss_rtd": 0.3535233438014984, + "loss_sent": 0.31957945227622986, + "loss_sod": 0.09898321330547333, + "loss_total": 0.7720860242843628, + "step": 65799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.2556023597717285, + "learning_rate": 7.61454885548946e-05, + "loss": 0.6096, + "step": 65800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.651038646697998, + "loss_rtd": 0.34410956501960754, + "loss_sent": 0.0990416631102562, + "loss_sod": 0.040140315890312195, + "loss_total": 0.48329153656959534, + "step": 65899 + }, + { + "epoch": 0.005798, + "loss_gen": 4.929686546325684, + "loss_rtd": 0.36061891913414, + "loss_sent": 0.2854152023792267, + "loss_sod": 0.02335675247013569, + "loss_total": 0.6693909168243408, + "step": 65899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.4057626724243164, + "learning_rate": 7.611843424045011e-05, + "loss": 0.61, + "step": 65900 + }, + { + "epoch": 0.005998, + "loss_gen": 3.417036294937134, + "loss_rtd": 0.3334285616874695, + "loss_sent": 0.049593642354011536, + "loss_sod": 0.06800346076488495, + "loss_total": 0.45102566480636597, + "step": 65999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.139853000640869, + "loss_rtd": 0.34877824783325195, + "loss_sent": 0.2039397805929184, + "loss_sod": 0.17971526086330414, + "loss_total": 0.7324333190917969, + "step": 65999 + }, + { + "epoch": 0.006, + "grad_norm": 0.6943939328193665, + "learning_rate": 7.609136940550343e-05, + "loss": 0.6335, + "step": 66000 + }, + { + "epoch": 0.006, + "eval_loss": 0.5987781882286072, + "eval_runtime": 152.7139, + "eval_samples_per_second": 101.124, + "eval_steps_per_second": 0.792, + "step": 66000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.310494422912598, + "loss_rtd": 0.35313376784324646, + "loss_sent": 0.11600424349308014, + "loss_sod": 0.010630443692207336, + "loss_total": 0.47976845502853394, + "step": 66099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.730156898498535, + "loss_rtd": 0.3578748106956482, + "loss_sent": 0.10235317796468735, + "loss_sod": 0.051168106496334076, + "loss_total": 0.5113961100578308, + "step": 66099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.5719876885414124, + "learning_rate": 7.606429406095626e-05, + "loss": 0.6179, + "step": 66100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.188300609588623, + "loss_rtd": 0.3529491126537323, + "loss_sent": 0.16361874341964722, + "loss_sod": 0.0830625593662262, + "loss_total": 0.5996304154396057, + "step": 66199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.628381729125977, + "loss_rtd": 0.35839518904685974, + "loss_sent": 0.0803244486451149, + "loss_sod": 0.04144023358821869, + "loss_total": 0.48015984892845154, + "step": 66199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.5547677874565125, + "learning_rate": 7.603720821771457e-05, + "loss": 0.6283, + "step": 66200 + }, + { + "epoch": 0.006598, + "loss_gen": 4.736541748046875, + "loss_rtd": 0.35284411907196045, + "loss_sent": 0.1723390817642212, + "loss_sod": 0.11351539194583893, + "loss_total": 0.6386985778808594, + "step": 66299 + }, + { + "epoch": 0.006598, + "loss_gen": 4.497254848480225, + "loss_rtd": 0.34798967838287354, + "loss_sent": 0.264619380235672, + "loss_sod": 0.04229599982500076, + "loss_total": 0.6549050807952881, + "step": 66299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.5063583850860596, + "learning_rate": 7.601011188668851e-05, + "loss": 0.6046, + "step": 66300 + }, + { + "epoch": 0.006798, + "loss_gen": 4.981752872467041, + "loss_rtd": 0.3534925878047943, + "loss_sent": 0.18722547590732574, + "loss_sod": 0.11359981447458267, + "loss_total": 0.6543178558349609, + "step": 66399 + }, + { + "epoch": 0.006798, + "loss_gen": 4.6029462814331055, + "loss_rtd": 0.3494454622268677, + "loss_sent": 0.23802660405635834, + "loss_sod": 0.14635378122329712, + "loss_total": 0.7338258624076843, + "step": 66399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.9086915254592896, + "learning_rate": 7.598300507879252e-05, + "loss": 0.6134, + "step": 66400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.720383167266846, + "loss_rtd": 0.3627254366874695, + "loss_sent": 0.2635696828365326, + "loss_sod": 0.09105813503265381, + "loss_total": 0.7173532247543335, + "step": 66499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.396498680114746, + "loss_rtd": 0.368895024061203, + "loss_sent": 0.16009920835494995, + "loss_sod": 0.12825074791908264, + "loss_total": 0.6572449803352356, + "step": 66499 + }, + { + "epoch": 0.007, + "grad_norm": 0.8973502516746521, + "learning_rate": 7.595588780494517e-05, + "loss": 0.6199, + "step": 66500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.397541522979736, + "loss_rtd": 0.35902175307273865, + "loss_sent": 0.31910210847854614, + "loss_sod": 0.09253882616758347, + "loss_total": 0.7706626653671265, + "step": 66599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.694241046905518, + "loss_rtd": 0.3520123362541199, + "loss_sent": 0.3550555109977722, + "loss_sod": 0.04164835065603256, + "loss_total": 0.7487162351608276, + "step": 66599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.9792577624320984, + "learning_rate": 7.592876007606933e-05, + "loss": 0.6145, + "step": 66600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.712134838104248, + "loss_rtd": 0.35215991735458374, + "loss_sent": 0.07199952751398087, + "loss_sod": 0.05022723227739334, + "loss_total": 0.47438666224479675, + "step": 66699 + }, + { + "epoch": 0.007398, + "loss_gen": 4.361044406890869, + "loss_rtd": 0.3654535114765167, + "loss_sent": 0.23767469823360443, + "loss_sod": 0.08877260237932205, + "loss_total": 0.6919007897377014, + "step": 66699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.9028485417366028, + "learning_rate": 7.590162190309202e-05, + "loss": 0.6217, + "step": 66700 + }, + { + "epoch": 0.007598, + "loss_gen": 4.741976261138916, + "loss_rtd": 0.3726173937320709, + "loss_sent": 0.11401088535785675, + "loss_sod": 0.08064589649438858, + "loss_total": 0.5672741532325745, + "step": 66799 + }, + { + "epoch": 0.007598, + "loss_gen": 4.381463050842285, + "loss_rtd": 0.36060744524002075, + "loss_sent": 0.1998521238565445, + "loss_sod": 0.011398155242204666, + "loss_total": 0.571857750415802, + "step": 66799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.2918533086776733, + "learning_rate": 7.587447329694451e-05, + "loss": 0.6049, + "step": 66800 + }, + { + "epoch": 0.007798, + "loss_gen": 4.1675333976745605, + "loss_rtd": 0.36102527379989624, + "loss_sent": 0.19719304144382477, + "loss_sod": 0.014241417869925499, + "loss_total": 0.5724597573280334, + "step": 66899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.874257564544678, + "loss_rtd": 0.36321237683296204, + "loss_sent": 0.24794764816761017, + "loss_sod": 0.021412856876850128, + "loss_total": 0.6325728893280029, + "step": 66899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.7860808372497559, + "learning_rate": 7.584731426856226e-05, + "loss": 0.6226, + "step": 66900 + }, + { + "epoch": 0.007998, + "loss_gen": 4.2689924240112305, + "loss_rtd": 0.35132095217704773, + "loss_sent": 0.10176771879196167, + "loss_sod": 0.020253252238035202, + "loss_total": 0.4733419120311737, + "step": 66999 + }, + { + "epoch": 0.007998, + "loss_gen": 4.7362236976623535, + "loss_rtd": 0.3590451180934906, + "loss_sent": 0.4144623577594757, + "loss_sod": 0.017209095880389214, + "loss_total": 0.7907165288925171, + "step": 66999 + }, + { + "epoch": 0.008, + "grad_norm": 0.8132656216621399, + "learning_rate": 7.58201448288849e-05, + "loss": 0.6109, + "step": 67000 + }, + { + "epoch": 0.008, + "eval_loss": 0.594206690788269, + "eval_runtime": 151.5032, + "eval_samples_per_second": 101.932, + "eval_steps_per_second": 0.799, + "step": 67000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.255455493927002, + "loss_rtd": 0.3566700220108032, + "loss_sent": 0.06086091697216034, + "loss_sod": 0.1735401749610901, + "loss_total": 0.5910711288452148, + "step": 67099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.768855571746826, + "loss_rtd": 0.3584344685077667, + "loss_sent": 0.263040155172348, + "loss_sod": 0.08959699422121048, + "loss_total": 0.7110716104507446, + "step": 67099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.561359167098999, + "learning_rate": 7.579296498885629e-05, + "loss": 0.6237, + "step": 67100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.59312105178833, + "loss_rtd": 0.3655880391597748, + "loss_sent": 0.2877505421638489, + "loss_sod": 0.05011430382728577, + "loss_total": 0.7034528851509094, + "step": 67199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.395310878753662, + "loss_rtd": 0.333885133266449, + "loss_sent": 0.39168989658355713, + "loss_sod": 0.038761869072914124, + "loss_total": 0.764336884021759, + "step": 67199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.665055751800537, + "learning_rate": 7.576577475942447e-05, + "loss": 0.6257, + "step": 67200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.306066513061523, + "loss_rtd": 0.34404680132865906, + "loss_sent": 0.0048809596337378025, + "loss_sod": 0.2033056765794754, + "loss_total": 0.5522334575653076, + "step": 67299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.293673038482666, + "loss_rtd": 0.3350405693054199, + "loss_sent": 0.07155311852693558, + "loss_sod": 0.041877102106809616, + "loss_total": 0.4484707713127136, + "step": 67299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.913288414478302, + "learning_rate": 7.573857415154166e-05, + "loss": 0.617, + "step": 67300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.843385219573975, + "loss_rtd": 0.34204018115997314, + "loss_sent": 0.14883247017860413, + "loss_sod": 0.06675544381141663, + "loss_total": 0.5576280951499939, + "step": 67399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.128697395324707, + "loss_rtd": 0.36374631524086, + "loss_sent": 0.29251644015312195, + "loss_sod": 0.06548918783664703, + "loss_total": 0.7217519283294678, + "step": 67399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.7872377634048462, + "learning_rate": 7.571136317616425e-05, + "loss": 0.6271, + "step": 67400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.786539077758789, + "loss_rtd": 0.3563820421695709, + "loss_sent": 0.42853304743766785, + "loss_sod": 0.054787375032901764, + "loss_total": 0.8397024869918823, + "step": 67499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.070417881011963, + "loss_rtd": 0.33073729276657104, + "loss_sent": 0.16601036489009857, + "loss_sod": 0.06587125360965729, + "loss_total": 0.5626189112663269, + "step": 67499 + }, + { + "epoch": 0.009, + "grad_norm": 1.2672414779663086, + "learning_rate": 7.568414184425283e-05, + "loss": 0.6094, + "step": 67500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.705150127410889, + "loss_rtd": 0.3587769865989685, + "loss_sent": 0.09281128644943237, + "loss_sod": 0.06759106367826462, + "loss_total": 0.5191793441772461, + "step": 67599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.746580123901367, + "loss_rtd": 0.35041725635528564, + "loss_sent": 0.31421762704849243, + "loss_sod": 0.026292871683835983, + "loss_total": 0.6909277439117432, + "step": 67599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.3968737125396729, + "learning_rate": 7.565691016677216e-05, + "loss": 0.6102, + "step": 67600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.4377241134643555, + "loss_rtd": 0.3586418032646179, + "loss_sent": 0.2569618821144104, + "loss_sod": 0.06580895185470581, + "loss_total": 0.6814126372337341, + "step": 67699 + }, + { + "epoch": 0.009398, + "loss_gen": 4.565977096557617, + "loss_rtd": 0.34834152460098267, + "loss_sent": 0.3570059537887573, + "loss_sod": 0.06186792254447937, + "loss_total": 0.767215371131897, + "step": 67699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.1630606651306152, + "learning_rate": 7.562966815469114e-05, + "loss": 0.6205, + "step": 67700 + }, + { + "epoch": 0.009598, + "loss_gen": 4.7827911376953125, + "loss_rtd": 0.34308144450187683, + "loss_sent": 0.1217210441827774, + "loss_sod": 0.11103591322898865, + "loss_total": 0.5758383870124817, + "step": 67799 + }, + { + "epoch": 0.009598, + "loss_gen": 4.414473056793213, + "loss_rtd": 0.3497660160064697, + "loss_sent": 0.23417848348617554, + "loss_sod": 0.017716476693749428, + "loss_total": 0.601660966873169, + "step": 67799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.7495279312133789, + "learning_rate": 7.560241581898284e-05, + "loss": 0.62, + "step": 67800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.713752269744873, + "loss_rtd": 0.3629918694496155, + "loss_sent": 0.19341717660427094, + "loss_sod": 0.04620201140642166, + "loss_total": 0.6026110649108887, + "step": 67899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.554504871368408, + "loss_rtd": 0.3645625114440918, + "loss_sent": 0.19659636914730072, + "loss_sod": 0.025591988116502762, + "loss_total": 0.586750864982605, + "step": 67899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.5260523557662964, + "learning_rate": 7.55751531706245e-05, + "loss": 0.6138, + "step": 67900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.0989251136779785, + "loss_rtd": 0.35533761978149414, + "loss_sent": 0.09594675153493881, + "loss_sod": 0.036903925240039825, + "loss_total": 0.4881882965564728, + "step": 67999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.533815383911133, + "loss_rtd": 0.3532601296901703, + "loss_sent": 0.3286660611629486, + "loss_sod": 0.1737453192472458, + "loss_total": 0.8556715250015259, + "step": 67999 + }, + { + "epoch": 0.01, + "grad_norm": 0.9284948110580444, + "learning_rate": 7.554788022059757e-05, + "loss": 0.617, + "step": 68000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5927685499191284, + "eval_runtime": 151.3074, + "eval_samples_per_second": 102.064, + "eval_steps_per_second": 0.8, + "step": 68000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.1164350509643555, + "loss_rtd": 0.337686151266098, + "loss_sent": 0.08159107714891434, + "loss_sod": 0.09356731176376343, + "loss_total": 0.5128445625305176, + "step": 68099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.555169582366943, + "loss_rtd": 0.35074982047080994, + "loss_sent": 0.1417180746793747, + "loss_sod": 0.10723769664764404, + "loss_total": 0.5997055768966675, + "step": 68099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.1322089433670044, + "learning_rate": 7.55205969798875e-05, + "loss": 0.6282, + "step": 68100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.946879863739014, + "loss_rtd": 0.3475859761238098, + "loss_sent": 0.19958215951919556, + "loss_sod": 0.15003390610218048, + "loss_total": 0.6972020268440247, + "step": 68199 + }, + { + "epoch": 0.010398, + "loss_gen": 4.611652851104736, + "loss_rtd": 0.3355503976345062, + "loss_sent": 0.20881244540214539, + "loss_sod": 0.006465624086558819, + "loss_total": 0.5508284568786621, + "step": 68199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.731116771697998, + "learning_rate": 7.549330345948403e-05, + "loss": 0.625, + "step": 68200 + }, + { + "epoch": 0.010598, + "loss_gen": 3.7777929306030273, + "loss_rtd": 0.33989417552948, + "loss_sent": 0.0103762187063694, + "loss_sod": 0.15077877044677734, + "loss_total": 0.5010491609573364, + "step": 68299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.680449485778809, + "loss_rtd": 0.35290950536727905, + "loss_sent": 0.21351496875286102, + "loss_sod": 0.012890908867120743, + "loss_total": 0.5793153643608093, + "step": 68299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9339681267738342, + "learning_rate": 7.546599967038098e-05, + "loss": 0.6124, + "step": 68300 + }, + { + "epoch": 0.010798, + "loss_gen": 3.854318618774414, + "loss_rtd": 0.34512147307395935, + "loss_sent": 4.1809904359979555e-05, + "loss_sod": 0.1997881531715393, + "loss_total": 0.5449513792991638, + "step": 68399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.7505669593811035, + "loss_rtd": 0.3413781523704529, + "loss_sent": 0.12651513516902924, + "loss_sod": 0.04629211872816086, + "loss_total": 0.5141854286193848, + "step": 68399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.1004862785339355, + "learning_rate": 7.54386856235763e-05, + "loss": 0.5988, + "step": 68400 + }, + { + "epoch": 0.010998, + "loss_gen": 4.606143951416016, + "loss_rtd": 0.34316402673721313, + "loss_sent": 0.018881957978010178, + "loss_sod": 0.1613437682390213, + "loss_total": 0.5233897566795349, + "step": 68499 + }, + { + "epoch": 0.010998, + "loss_gen": 3.749549388885498, + "loss_rtd": 0.32255205512046814, + "loss_sent": 0.00953242089599371, + "loss_sod": 0.08570539951324463, + "loss_total": 0.41778987646102905, + "step": 68499 + }, + { + "epoch": 0.011, + "grad_norm": 0.9866381883621216, + "learning_rate": 7.54113613300721e-05, + "loss": 0.6168, + "step": 68500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.0278639793396, + "loss_rtd": 0.35034534335136414, + "loss_sent": 6.925057823536918e-05, + "loss_sod": 0.3125148415565491, + "loss_total": 0.6629294157028198, + "step": 68599 + }, + { + "epoch": 0.011198, + "loss_gen": 3.742182970046997, + "loss_rtd": 0.3397414982318878, + "loss_sent": 5.0243801524629816e-05, + "loss_sod": 0.13007307052612305, + "loss_total": 0.4698648154735565, + "step": 68599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.2500969171524048, + "learning_rate": 7.53840268008746e-05, + "loss": 0.6263, + "step": 68600 + }, + { + "epoch": 0.011398, + "loss_gen": 4.967545032501221, + "loss_rtd": 0.35615482926368713, + "loss_sent": 0.237074613571167, + "loss_sod": 0.04892565682530403, + "loss_total": 0.642155110836029, + "step": 68699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.54293966293335, + "loss_rtd": 0.3437325358390808, + "loss_sent": 0.20231592655181885, + "loss_sod": 0.02160598337650299, + "loss_total": 0.5676544308662415, + "step": 68699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.12941575050354, + "learning_rate": 7.535668204699413e-05, + "loss": 0.6202, + "step": 68700 + }, + { + "epoch": 0.011598, + "loss_gen": 4.625636577606201, + "loss_rtd": 0.35658740997314453, + "loss_sent": 0.22827327251434326, + "loss_sod": 0.02449220046401024, + "loss_total": 0.6093528866767883, + "step": 68799 + }, + { + "epoch": 0.011598, + "loss_gen": 4.631955146789551, + "loss_rtd": 0.34243470430374146, + "loss_sent": 0.13356204330921173, + "loss_sod": 0.0317053347826004, + "loss_total": 0.5077020525932312, + "step": 68799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.6647234559059143, + "learning_rate": 7.532932707944515e-05, + "loss": 0.6109, + "step": 68800 + }, + { + "epoch": 0.011798, + "loss_gen": 4.35915470123291, + "loss_rtd": 0.34421539306640625, + "loss_sent": 0.012710307724773884, + "loss_sod": 0.1049477681517601, + "loss_total": 0.46187347173690796, + "step": 68899 + }, + { + "epoch": 0.011798, + "loss_gen": 3.6791775226593018, + "loss_rtd": 0.34611740708351135, + "loss_sent": 0.0006580319022759795, + "loss_sod": 0.23809869587421417, + "loss_total": 0.584874153137207, + "step": 68899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.886480450630188, + "learning_rate": 7.530196190924628e-05, + "loss": 0.6084, + "step": 68900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.0167646408081055, + "loss_rtd": 0.340043306350708, + "loss_sent": 0.11419618874788284, + "loss_sod": 0.08260629326105118, + "loss_total": 0.5368458032608032, + "step": 68999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.605703353881836, + "loss_rtd": 0.3495825529098511, + "loss_sent": 0.27643582224845886, + "loss_sod": 0.0530291423201561, + "loss_total": 0.6790475249290466, + "step": 68999 + }, + { + "epoch": 0.012, + "grad_norm": 1.7575035095214844, + "learning_rate": 7.527458654742017e-05, + "loss": 0.6009, + "step": 69000 + }, + { + "epoch": 0.012, + "eval_loss": 0.5909610986709595, + "eval_runtime": 151.308, + "eval_samples_per_second": 102.063, + "eval_steps_per_second": 0.8, + "step": 69000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.608206748962402, + "loss_rtd": 0.36003121733665466, + "loss_sent": 0.14249297976493835, + "loss_sod": 0.035286448895931244, + "loss_total": 0.5378106832504272, + "step": 69099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.639560699462891, + "loss_rtd": 0.3554588556289673, + "loss_sent": 0.27541911602020264, + "loss_sod": 0.0017712387489154935, + "loss_total": 0.6326491832733154, + "step": 69099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.5627210736274719, + "learning_rate": 7.524720100499361e-05, + "loss": 0.6167, + "step": 69100 + }, + { + "epoch": 0.012398, + "loss_gen": 4.804170608520508, + "loss_rtd": 0.3510792553424835, + "loss_sent": 0.11709165573120117, + "loss_sod": 0.09820735454559326, + "loss_total": 0.5663782358169556, + "step": 69199 + }, + { + "epoch": 0.012398, + "loss_gen": 4.468132972717285, + "loss_rtd": 0.34883058071136475, + "loss_sent": 0.31851422786712646, + "loss_sod": 0.024485470727086067, + "loss_total": 0.6918302774429321, + "step": 69199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.4534741640090942, + "learning_rate": 7.52198052929975e-05, + "loss": 0.6186, + "step": 69200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.745293617248535, + "loss_rtd": 0.36391720175743103, + "loss_sent": 0.5158696174621582, + "loss_sod": 0.005640673916786909, + "loss_total": 0.8854274749755859, + "step": 69299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.706986904144287, + "loss_rtd": 0.36713218688964844, + "loss_sent": 0.17896877229213715, + "loss_sod": 0.018621649593114853, + "loss_total": 0.5647225975990295, + "step": 69299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.0246957540512085, + "learning_rate": 7.519239942246686e-05, + "loss": 0.617, + "step": 69300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.811370849609375, + "loss_rtd": 0.35900723934173584, + "loss_sent": 0.26550769805908203, + "loss_sod": 0.06882263720035553, + "loss_total": 0.6933375597000122, + "step": 69399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.900637149810791, + "loss_rtd": 0.343448668718338, + "loss_sent": 0.33858102560043335, + "loss_sod": 0.023398658260703087, + "loss_total": 0.7054283618927002, + "step": 69399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.4369068145751953, + "learning_rate": 7.516498340444071e-05, + "loss": 0.6112, + "step": 69400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.447149276733398, + "loss_rtd": 0.35762926936149597, + "loss_sent": 0.10391967743635178, + "loss_sod": 0.006824937183409929, + "loss_total": 0.4683738946914673, + "step": 69499 + }, + { + "epoch": 0.012998, + "loss_gen": 3.9780890941619873, + "loss_rtd": 0.3181975483894348, + "loss_sent": 0.034344203770160675, + "loss_sod": 0.05337222293019295, + "loss_total": 0.40591397881507874, + "step": 69499 + }, + { + "epoch": 0.013, + "grad_norm": 0.8781808614730835, + "learning_rate": 7.51375572499623e-05, + "loss": 0.607, + "step": 69500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.7805328369140625, + "loss_rtd": 0.37241312861442566, + "loss_sent": 0.1631665676832199, + "loss_sod": 0.024414723739027977, + "loss_total": 0.5599943995475769, + "step": 69599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.68084192276001, + "loss_rtd": 0.3549376428127289, + "loss_sent": 0.13597002625465393, + "loss_sod": 0.15555113554000854, + "loss_total": 0.6464587450027466, + "step": 69599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0803793668746948, + "learning_rate": 7.51101209700788e-05, + "loss": 0.6164, + "step": 69600 + }, + { + "epoch": 0.013398, + "loss_gen": 4.543577194213867, + "loss_rtd": 0.3576522171497345, + "loss_sent": 0.049495793879032135, + "loss_sod": 0.18203045427799225, + "loss_total": 0.5891785025596619, + "step": 69699 + }, + { + "epoch": 0.013398, + "loss_gen": 3.8307535648345947, + "loss_rtd": 0.32948005199432373, + "loss_sent": 0.06662531197071075, + "loss_sod": 0.12097512930631638, + "loss_total": 0.5170804858207703, + "step": 69699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.8176936507225037, + "learning_rate": 7.50826745758416e-05, + "loss": 0.614, + "step": 69700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.719297885894775, + "loss_rtd": 0.3517528474330902, + "loss_sent": 0.08722758293151855, + "loss_sod": 0.08633407205343246, + "loss_total": 0.5253145098686218, + "step": 69799 + }, + { + "epoch": 0.013598, + "loss_gen": 4.140933513641357, + "loss_rtd": 0.35081884264945984, + "loss_sent": 0.041073914617300034, + "loss_sod": 0.15672434866428375, + "loss_total": 0.5486171245574951, + "step": 69799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.0456980466842651, + "learning_rate": 7.505521807830604e-05, + "loss": 0.6383, + "step": 69800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.080830097198486, + "loss_rtd": 0.350803017616272, + "loss_sent": 0.3331204652786255, + "loss_sod": 0.020194532349705696, + "loss_total": 0.704118013381958, + "step": 69899 + }, + { + "epoch": 0.013798, + "loss_gen": 4.873362064361572, + "loss_rtd": 0.3467556834220886, + "loss_sent": 0.07918145507574081, + "loss_sod": 0.06439277529716492, + "loss_total": 0.49032992124557495, + "step": 69899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.6474930644035339, + "learning_rate": 7.502775148853167e-05, + "loss": 0.6277, + "step": 69900 + }, + { + "epoch": 0.013998, + "loss_gen": 4.525168418884277, + "loss_rtd": 0.3627609610557556, + "loss_sent": 0.5461922287940979, + "loss_sod": 0.02388431876897812, + "loss_total": 0.9328374862670898, + "step": 69999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.732813835144043, + "loss_rtd": 0.35742875933647156, + "loss_sent": 0.22035260498523712, + "loss_sod": 0.043894391506910324, + "loss_total": 0.6216757297515869, + "step": 69999 + }, + { + "epoch": 0.014, + "grad_norm": 1.2363808155059814, + "learning_rate": 7.5000274817582e-05, + "loss": 0.6089, + "step": 70000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5933198928833008, + "eval_runtime": 151.4068, + "eval_samples_per_second": 101.997, + "eval_steps_per_second": 0.799, + "step": 70000 + }, + { + "epoch": 0.014198, + "loss_gen": 3.4616024494171143, + "loss_rtd": 0.3362026512622833, + "loss_sent": 0.0014539293479174376, + "loss_sod": 0.2485821694135666, + "loss_total": 0.5862387418746948, + "step": 70099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.8870673179626465, + "loss_rtd": 0.3374359607696533, + "loss_sent": 0.27305692434310913, + "loss_sod": 0.07673107087612152, + "loss_total": 0.6872239708900452, + "step": 70099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.3238862752914429, + "learning_rate": 7.49727880765246e-05, + "loss": 0.6184, + "step": 70100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.444095134735107, + "loss_rtd": 0.3639775514602661, + "loss_sent": 0.2643648386001587, + "loss_sod": 0.01581292226910591, + "loss_total": 0.6441553235054016, + "step": 70199 + }, + { + "epoch": 0.014398, + "loss_gen": 4.8016886711120605, + "loss_rtd": 0.347025603055954, + "loss_sent": 0.12922294437885284, + "loss_sod": 0.058164454996585846, + "loss_total": 0.5344129800796509, + "step": 70199 + }, + { + "epoch": 0.0144, + "grad_norm": 2.101402759552002, + "learning_rate": 7.494529127643116e-05, + "loss": 0.6069, + "step": 70200 + }, + { + "epoch": 0.014598, + "loss_gen": 4.9519267082214355, + "loss_rtd": 0.36667752265930176, + "loss_sent": 0.15208236873149872, + "loss_sod": 0.10472363233566284, + "loss_total": 0.6234835386276245, + "step": 70299 + }, + { + "epoch": 0.014598, + "loss_gen": 4.56881046295166, + "loss_rtd": 0.3670293390750885, + "loss_sent": 0.20608092844486237, + "loss_sod": 0.00635911151766777, + "loss_total": 0.5794693827629089, + "step": 70299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.5491467714309692, + "learning_rate": 7.491778442837737e-05, + "loss": 0.6247, + "step": 70300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.125654220581055, + "loss_rtd": 0.34238919615745544, + "loss_sent": 0.02770579606294632, + "loss_sod": 0.07730180770158768, + "loss_total": 0.44739678502082825, + "step": 70399 + }, + { + "epoch": 0.014798, + "loss_gen": 4.639283180236816, + "loss_rtd": 0.3573998510837555, + "loss_sent": 0.004309282172471285, + "loss_sod": 0.395590603351593, + "loss_total": 0.7572997808456421, + "step": 70399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.4109572172164917, + "learning_rate": 7.4890267543443e-05, + "loss": 0.62, + "step": 70400 + }, + { + "epoch": 0.014998, + "loss_gen": 3.8224353790283203, + "loss_rtd": 0.31847918033599854, + "loss_sent": 0.08672991394996643, + "loss_sod": 0.08530547469854355, + "loss_total": 0.4905145764350891, + "step": 70499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.5329508781433105, + "loss_rtd": 0.35414499044418335, + "loss_sent": 0.050933536142110825, + "loss_sod": 0.12525150179862976, + "loss_total": 0.5303300023078918, + "step": 70499 + }, + { + "epoch": 0.015, + "grad_norm": 0.8013997673988342, + "learning_rate": 7.486274063271183e-05, + "loss": 0.6037, + "step": 70500 + }, + { + "epoch": 0.015198, + "loss_gen": 3.8157787322998047, + "loss_rtd": 0.32829922437667847, + "loss_sent": 0.002050962997600436, + "loss_sod": 0.1017485186457634, + "loss_total": 0.43209871649742126, + "step": 70599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.587977886199951, + "loss_rtd": 0.3350503146648407, + "loss_sent": 0.21401993930339813, + "loss_sod": 0.0497254952788353, + "loss_total": 0.5987957715988159, + "step": 70599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.8044083118438721, + "learning_rate": 7.483520370727171e-05, + "loss": 0.6199, + "step": 70600 + }, + { + "epoch": 0.015398, + "loss_gen": 4.2190842628479, + "loss_rtd": 0.35025885701179504, + "loss_sent": 0.06589227169752121, + "loss_sod": 0.11934252083301544, + "loss_total": 0.5354936718940735, + "step": 70699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.106986045837402, + "loss_rtd": 0.34259411692619324, + "loss_sent": 0.024853527545928955, + "loss_sod": 0.08085648715496063, + "loss_total": 0.44830411672592163, + "step": 70699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.6379954218864441, + "learning_rate": 7.480765677821451e-05, + "loss": 0.6126, + "step": 70700 + }, + { + "epoch": 0.015598, + "loss_gen": 4.338307857513428, + "loss_rtd": 0.36540743708610535, + "loss_sent": 0.28503185510635376, + "loss_sod": 0.009250342845916748, + "loss_total": 0.6596896648406982, + "step": 70799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.614833831787109, + "loss_rtd": 0.3449106216430664, + "loss_sent": 0.15405747294425964, + "loss_sod": 0.1161065399646759, + "loss_total": 0.615074634552002, + "step": 70799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.7099796533584595, + "learning_rate": 7.478009985663613e-05, + "loss": 0.6094, + "step": 70800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.494375705718994, + "loss_rtd": 0.3482241630554199, + "loss_sent": 0.11123603582382202, + "loss_sod": 0.03280208632349968, + "loss_total": 0.4922622740268707, + "step": 70899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.529008865356445, + "loss_rtd": 0.3492968678474426, + "loss_sent": 0.25559794902801514, + "loss_sod": 0.06588475406169891, + "loss_total": 0.6707795858383179, + "step": 70899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.142425298690796, + "learning_rate": 7.475253295363648e-05, + "loss": 0.6302, + "step": 70900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.294867515563965, + "loss_rtd": 0.33559533953666687, + "loss_sent": 0.027024684473872185, + "loss_sod": 0.20596477389335632, + "loss_total": 0.5685847997665405, + "step": 70999 + }, + { + "epoch": 0.015998, + "loss_gen": 4.484335422515869, + "loss_rtd": 0.3317863345146179, + "loss_sent": 0.24554044008255005, + "loss_sod": 0.043406642973423004, + "loss_total": 0.620733380317688, + "step": 70999 + }, + { + "epoch": 0.016, + "grad_norm": 1.1793019771575928, + "learning_rate": 7.472495608031953e-05, + "loss": 0.6204, + "step": 71000 + }, + { + "epoch": 0.016, + "eval_loss": 0.5913823843002319, + "eval_runtime": 151.6947, + "eval_samples_per_second": 101.803, + "eval_steps_per_second": 0.798, + "step": 71000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.870697975158691, + "loss_rtd": 0.36172083020210266, + "loss_sent": 0.13929325342178345, + "loss_sod": 0.028338346630334854, + "loss_total": 0.5293524265289307, + "step": 71099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.943262577056885, + "loss_rtd": 0.34843337535858154, + "loss_sent": 0.1354500949382782, + "loss_sod": 0.0597839392721653, + "loss_total": 0.5436674356460571, + "step": 71099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.7821417450904846, + "learning_rate": 7.469736924779324e-05, + "loss": 0.6059, + "step": 71100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.828202724456787, + "loss_rtd": 0.35496997833251953, + "loss_sent": 0.1153426542878151, + "loss_sod": 0.108072429895401, + "loss_total": 0.578385055065155, + "step": 71199 + }, + { + "epoch": 0.016398, + "loss_gen": 3.7728540897369385, + "loss_rtd": 0.3343942165374756, + "loss_sent": 0.00378451868891716, + "loss_sod": 0.1482536345720291, + "loss_total": 0.48643234372138977, + "step": 71199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.8646527528762817, + "learning_rate": 7.466977246716955e-05, + "loss": 0.6138, + "step": 71200 + }, + { + "epoch": 0.016598, + "loss_gen": 4.1918864250183105, + "loss_rtd": 0.34998562932014465, + "loss_sent": 0.03945707529783249, + "loss_sod": 0.2179742455482483, + "loss_total": 0.6074169874191284, + "step": 71299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.605656147003174, + "loss_rtd": 0.35405847430229187, + "loss_sent": 0.3797968626022339, + "loss_sod": 0.0821184515953064, + "loss_total": 0.8159737586975098, + "step": 71299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.2149840593338013, + "learning_rate": 7.464216574956446e-05, + "loss": 0.5955, + "step": 71300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.070539474487305, + "loss_rtd": 0.35759761929512024, + "loss_sent": 0.24154625833034515, + "loss_sod": 0.07218272984027863, + "loss_total": 0.6713266372680664, + "step": 71399 + }, + { + "epoch": 0.016798, + "loss_gen": 4.95283842086792, + "loss_rtd": 0.3476433753967285, + "loss_sent": 0.22640782594680786, + "loss_sod": 0.018956400454044342, + "loss_total": 0.5930075645446777, + "step": 71399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.3016432523727417, + "learning_rate": 7.461454910609795e-05, + "loss": 0.6202, + "step": 71400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.5588812828063965, + "loss_rtd": 0.34922486543655396, + "loss_sent": 0.11128672957420349, + "loss_sod": 0.12964019179344177, + "loss_total": 0.5901517868041992, + "step": 71499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.221651554107666, + "loss_rtd": 0.3547467589378357, + "loss_sent": 0.3745213449001312, + "loss_sod": 0.08372054249048233, + "loss_total": 0.8129886388778687, + "step": 71499 + }, + { + "epoch": 0.017, + "grad_norm": 1.3753210306167603, + "learning_rate": 7.458692254789401e-05, + "loss": 0.6123, + "step": 71500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.223153114318848, + "loss_rtd": 0.34834831953048706, + "loss_sent": 0.04446294903755188, + "loss_sod": 0.20041140913963318, + "loss_total": 0.5932226181030273, + "step": 71599 + }, + { + "epoch": 0.017198, + "loss_gen": 3.967151165008545, + "loss_rtd": 0.3504388630390167, + "loss_sent": 0.0001261267752852291, + "loss_sod": 0.36733120679855347, + "loss_total": 0.7178962230682373, + "step": 71599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.626511812210083, + "learning_rate": 7.455928608608061e-05, + "loss": 0.6192, + "step": 71600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.821959018707275, + "loss_rtd": 0.35648804903030396, + "loss_sent": 0.1773880422115326, + "loss_sod": 0.20540033280849457, + "loss_total": 0.7392764091491699, + "step": 71699 + }, + { + "epoch": 0.017398, + "loss_gen": 4.832204818725586, + "loss_rtd": 0.35967719554901123, + "loss_sent": 0.07360906898975372, + "loss_sod": 0.006231877952814102, + "loss_total": 0.43951815366744995, + "step": 71699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.7945804595947266, + "learning_rate": 7.45316397317897e-05, + "loss": 0.6075, + "step": 71700 + }, + { + "epoch": 0.017598, + "loss_gen": 4.836548805236816, + "loss_rtd": 0.34330615401268005, + "loss_sent": 0.14278709888458252, + "loss_sod": 0.015522902831435204, + "loss_total": 0.5016161203384399, + "step": 71799 + }, + { + "epoch": 0.017598, + "loss_gen": 4.607902526855469, + "loss_rtd": 0.3659072518348694, + "loss_sent": 0.1836635023355484, + "loss_sod": 0.024699239060282707, + "loss_total": 0.5742700099945068, + "step": 71799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.4797484874725342, + "learning_rate": 7.450398349615726e-05, + "loss": 0.6072, + "step": 71800 + }, + { + "epoch": 0.017798, + "loss_gen": 4.702144145965576, + "loss_rtd": 0.3708803653717041, + "loss_sent": 0.07195448875427246, + "loss_sod": 0.028032181784510612, + "loss_total": 0.4708670377731323, + "step": 71899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.1101155281066895, + "loss_rtd": 0.35019639134407043, + "loss_sent": 0.0927276536822319, + "loss_sod": 0.10751849412918091, + "loss_total": 0.5504425168037415, + "step": 71899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.7477824687957764, + "learning_rate": 7.447631739032318e-05, + "loss": 0.5924, + "step": 71900 + }, + { + "epoch": 0.017998, + "loss_gen": 4.389316082000732, + "loss_rtd": 0.33602374792099, + "loss_sent": 0.009403475560247898, + "loss_sod": 0.25647178292274475, + "loss_total": 0.6018990278244019, + "step": 71999 + }, + { + "epoch": 0.017998, + "loss_gen": 3.5910627841949463, + "loss_rtd": 0.31313490867614746, + "loss_sent": 0.03344634920358658, + "loss_sod": 0.09169971197843552, + "loss_total": 0.43828096985816956, + "step": 71999 + }, + { + "epoch": 0.018, + "grad_norm": 0.6587882041931152, + "learning_rate": 7.44486414254314e-05, + "loss": 0.6089, + "step": 72000 + }, + { + "epoch": 0.018, + "eval_loss": 0.5882743000984192, + "eval_runtime": 152.8817, + "eval_samples_per_second": 101.013, + "eval_steps_per_second": 0.791, + "step": 72000 + }, + { + "epoch": 0.018198, + "loss_gen": 4.688161373138428, + "loss_rtd": 0.3436170816421509, + "loss_sent": 0.2503986954689026, + "loss_sod": 0.00963823776692152, + "loss_total": 0.6036540269851685, + "step": 72099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.017253398895264, + "loss_rtd": 0.34326061606407166, + "loss_sent": 0.0735774114727974, + "loss_sod": 0.09248524904251099, + "loss_total": 0.5093232989311218, + "step": 72099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.6841739416122437, + "learning_rate": 7.442095561262975e-05, + "loss": 0.607, + "step": 72100 + }, + { + "epoch": 0.018398, + "loss_gen": 4.958353042602539, + "loss_rtd": 0.36189010739326477, + "loss_sent": 0.09843690693378448, + "loss_sod": 0.02413778007030487, + "loss_total": 0.48446476459503174, + "step": 72199 + }, + { + "epoch": 0.018398, + "loss_gen": 4.871512413024902, + "loss_rtd": 0.35542696714401245, + "loss_sent": 0.19521720707416534, + "loss_sod": 0.017067646607756615, + "loss_total": 0.5677118301391602, + "step": 72199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.6388457417488098, + "learning_rate": 7.439325996307012e-05, + "loss": 0.6128, + "step": 72200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.331342697143555, + "loss_rtd": 0.33741307258605957, + "loss_sent": 0.08115275949239731, + "loss_sod": 0.005808874499052763, + "loss_total": 0.42437469959259033, + "step": 72299 + }, + { + "epoch": 0.018598, + "loss_gen": 4.409862518310547, + "loss_rtd": 0.35305461287498474, + "loss_sent": 0.08265908807516098, + "loss_sod": 0.0333339087665081, + "loss_total": 0.4690476059913635, + "step": 72299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.7669657468795776, + "learning_rate": 7.436555448790829e-05, + "loss": 0.6054, + "step": 72300 + }, + { + "epoch": 0.018798, + "loss_gen": 4.626320838928223, + "loss_rtd": 0.37120702862739563, + "loss_sent": 0.34755128622055054, + "loss_sod": 0.010918952524662018, + "loss_total": 0.7296772599220276, + "step": 72399 + }, + { + "epoch": 0.018798, + "loss_gen": 4.711775302886963, + "loss_rtd": 0.3592338562011719, + "loss_sent": 0.10267001390457153, + "loss_sod": 0.14617079496383667, + "loss_total": 0.6080746650695801, + "step": 72399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.2369270324707031, + "learning_rate": 7.433783919830403e-05, + "loss": 0.6104, + "step": 72400 + }, + { + "epoch": 0.018998, + "loss_gen": 4.396215915679932, + "loss_rtd": 0.3596164584159851, + "loss_sent": 0.08987735211849213, + "loss_sod": 0.04751113802194595, + "loss_total": 0.4970049262046814, + "step": 72499 + }, + { + "epoch": 0.018998, + "loss_gen": 3.75136661529541, + "loss_rtd": 0.3371661305427551, + "loss_sent": 0.00012291154416743666, + "loss_sod": 0.12435232102870941, + "loss_total": 0.4616413712501526, + "step": 72499 + }, + { + "epoch": 0.019, + "grad_norm": 0.6749967932701111, + "learning_rate": 7.431011410542105e-05, + "loss": 0.6132, + "step": 72500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.698030471801758, + "loss_rtd": 0.3466373682022095, + "loss_sent": 0.14236655831336975, + "loss_sod": 0.11202895641326904, + "loss_total": 0.6010328531265259, + "step": 72599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.294450759887695, + "loss_rtd": 0.36601531505584717, + "loss_sent": 0.027178332209587097, + "loss_sod": 0.15719231963157654, + "loss_total": 0.5503860116004944, + "step": 72599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.9426364302635193, + "learning_rate": 7.4282379220427e-05, + "loss": 0.6202, + "step": 72600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.937078952789307, + "loss_rtd": 0.3478560149669647, + "loss_sent": 0.1771114319562912, + "loss_sod": 0.002633861731737852, + "loss_total": 0.5276013016700745, + "step": 72699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.70350980758667, + "loss_rtd": 0.34218764305114746, + "loss_sent": 0.13848990201950073, + "loss_sod": 0.0959969088435173, + "loss_total": 0.5766744613647461, + "step": 72699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.6248541474342346, + "learning_rate": 7.425463455449353e-05, + "loss": 0.6066, + "step": 72700 + }, + { + "epoch": 0.019598, + "loss_gen": 4.646872520446777, + "loss_rtd": 0.3477177321910858, + "loss_sent": 0.34237056970596313, + "loss_sod": 0.08231464773416519, + "loss_total": 0.7724029421806335, + "step": 72799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.0671916007995605, + "loss_rtd": 0.3369097411632538, + "loss_sent": 0.006389040965586901, + "loss_sod": 0.08553760498762131, + "loss_total": 0.4288364052772522, + "step": 72799 + }, + { + "epoch": 0.0196, + "grad_norm": 1.6249091625213623, + "learning_rate": 7.422688011879614e-05, + "loss": 0.6151, + "step": 72800 + }, + { + "epoch": 0.019798, + "loss_gen": 4.565310955047607, + "loss_rtd": 0.3453063368797302, + "loss_sent": 0.2012784332036972, + "loss_sod": 0.023004647344350815, + "loss_total": 0.5695894360542297, + "step": 72899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.667540550231934, + "loss_rtd": 0.3517211079597473, + "loss_sent": 0.14475028216838837, + "loss_sod": 0.2036372721195221, + "loss_total": 0.7001087069511414, + "step": 72899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.9508171677589417, + "learning_rate": 7.419911592451434e-05, + "loss": 0.6122, + "step": 72900 + }, + { + "epoch": 0.019998, + "loss_gen": 4.509414196014404, + "loss_rtd": 0.3496207594871521, + "loss_sent": 0.2732815444469452, + "loss_sod": 0.009184077382087708, + "loss_total": 0.6320863962173462, + "step": 72999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.850524425506592, + "loss_rtd": 0.3499402105808258, + "loss_sent": 0.24237596988677979, + "loss_sod": 0.1104980856180191, + "loss_total": 0.7028142809867859, + "step": 72999 + }, + { + "epoch": 0.02, + "grad_norm": 1.4842668771743774, + "learning_rate": 7.417134198283156e-05, + "loss": 0.6163, + "step": 73000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5847336053848267, + "eval_runtime": 151.7735, + "eval_samples_per_second": 101.75, + "eval_steps_per_second": 0.797, + "step": 73000 + }, + { + "epoch": 0.020198, + "loss_gen": 4.465516567230225, + "loss_rtd": 0.33586585521698, + "loss_sent": 0.19374719262123108, + "loss_sod": 0.045669347047805786, + "loss_total": 0.5752823948860168, + "step": 73099 + }, + { + "epoch": 0.020198, + "loss_gen": 4.804156303405762, + "loss_rtd": 0.37062567472457886, + "loss_sent": 0.7627204060554504, + "loss_sod": 0.046356506645679474, + "loss_total": 1.1797025203704834, + "step": 73099 + }, + { + "epoch": 0.0202, + "grad_norm": 3.475369453430176, + "learning_rate": 7.41435583049351e-05, + "loss": 0.6248, + "step": 73100 + }, + { + "epoch": 0.020398, + "loss_gen": 4.678688049316406, + "loss_rtd": 0.35629740357398987, + "loss_sent": 0.28247150778770447, + "loss_sod": 0.007911695167422295, + "loss_total": 0.6466805934906006, + "step": 73199 + }, + { + "epoch": 0.020398, + "loss_gen": 4.716961860656738, + "loss_rtd": 0.34720107913017273, + "loss_sent": 0.30606597661972046, + "loss_sod": 0.03888232260942459, + "loss_total": 0.6921494007110596, + "step": 73199 + }, + { + "epoch": 0.0204, + "grad_norm": 2.1669692993164062, + "learning_rate": 7.411576490201624e-05, + "loss": 0.6046, + "step": 73200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.407477378845215, + "loss_rtd": 0.3413894474506378, + "loss_sent": 0.22684229910373688, + "loss_sod": 0.03058023564517498, + "loss_total": 0.5988119840621948, + "step": 73299 + }, + { + "epoch": 0.020598, + "loss_gen": 4.551497459411621, + "loss_rtd": 0.34881627559661865, + "loss_sent": 0.12238585948944092, + "loss_sod": 0.03570621833205223, + "loss_total": 0.5069083571434021, + "step": 73299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.0146186351776123, + "learning_rate": 7.408796178527017e-05, + "loss": 0.6063, + "step": 73300 + }, + { + "epoch": 0.020798, + "loss_gen": 4.718207359313965, + "loss_rtd": 0.33871084451675415, + "loss_sent": 0.41996413469314575, + "loss_sod": 0.08324310183525085, + "loss_total": 0.8419181108474731, + "step": 73399 + }, + { + "epoch": 0.020798, + "loss_gen": 4.675392150878906, + "loss_rtd": 0.3622107207775116, + "loss_sent": 0.12315183877944946, + "loss_sod": 0.11210007965564728, + "loss_total": 0.5974626541137695, + "step": 73399 + }, + { + "epoch": 0.0208, + "grad_norm": 2.3092925548553467, + "learning_rate": 7.406014896589597e-05, + "loss": 0.6116, + "step": 73400 + }, + { + "epoch": 0.020998, + "loss_gen": 4.315270900726318, + "loss_rtd": 0.351446270942688, + "loss_sent": 0.008908976800739765, + "loss_sod": 0.14602422714233398, + "loss_total": 0.5063794851303101, + "step": 73499 + }, + { + "epoch": 0.020998, + "loss_gen": 4.223549842834473, + "loss_rtd": 0.3434906303882599, + "loss_sent": 0.09560151398181915, + "loss_sod": 0.09820383787155151, + "loss_total": 0.5372959971427917, + "step": 73499 + }, + { + "epoch": 0.021, + "grad_norm": 0.8053382039070129, + "learning_rate": 7.403232645509665e-05, + "loss": 0.6137, + "step": 73500 + }, + { + "epoch": 0.021198, + "loss_gen": 4.870631694793701, + "loss_rtd": 0.3493156135082245, + "loss_sent": 0.16123583912849426, + "loss_sod": 0.016129961237311363, + "loss_total": 0.5266814231872559, + "step": 73599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.504601001739502, + "loss_rtd": 0.3525901436805725, + "loss_sent": 0.23618851602077484, + "loss_sod": 0.05389292910695076, + "loss_total": 0.6426715850830078, + "step": 73599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.8500021696090698, + "learning_rate": 7.400449426407909e-05, + "loss": 0.6043, + "step": 73600 + }, + { + "epoch": 0.021398, + "loss_gen": 4.457568168640137, + "loss_rtd": 0.3443717360496521, + "loss_sent": 0.0008905435097403824, + "loss_sod": 0.17282648384571075, + "loss_total": 0.5180887579917908, + "step": 73699 + }, + { + "epoch": 0.021398, + "loss_gen": 3.4980556964874268, + "loss_rtd": 0.32986247539520264, + "loss_sent": 0.0009524160996079445, + "loss_sod": 0.17944881319999695, + "loss_total": 0.5102637410163879, + "step": 73699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.0395961999893188, + "learning_rate": 7.397665240405413e-05, + "loss": 0.6086, + "step": 73700 + }, + { + "epoch": 0.021598, + "loss_gen": 4.717159748077393, + "loss_rtd": 0.37082016468048096, + "loss_sent": 0.3229347765445709, + "loss_sod": 0.08952424675226212, + "loss_total": 0.7832791805267334, + "step": 73799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.845605850219727, + "loss_rtd": 0.3581559956073761, + "loss_sent": 0.2923746407032013, + "loss_sod": 0.028529290109872818, + "loss_total": 0.6790599226951599, + "step": 73799 + }, + { + "epoch": 0.0216, + "grad_norm": 2.5028345584869385, + "learning_rate": 7.394880088623644e-05, + "loss": 0.6238, + "step": 73800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.317142486572266, + "loss_rtd": 0.35131940245628357, + "loss_sent": 0.32754117250442505, + "loss_sod": 0.05299842357635498, + "loss_total": 0.7318589687347412, + "step": 73899 + }, + { + "epoch": 0.021798, + "loss_gen": 4.551251411437988, + "loss_rtd": 0.35017669200897217, + "loss_sent": 0.19271309673786163, + "loss_sod": 0.018432054668664932, + "loss_total": 0.5613218545913696, + "step": 73899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.3386276960372925, + "learning_rate": 7.392093972184462e-05, + "loss": 0.6067, + "step": 73900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.816708564758301, + "loss_rtd": 0.3660191595554352, + "loss_sent": 0.2075343281030655, + "loss_sod": 0.16501009464263916, + "loss_total": 0.7385635375976562, + "step": 73999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.9030070304870605, + "loss_rtd": 0.3595043420791626, + "loss_sent": 0.22702062129974365, + "loss_sod": 0.03965284675359726, + "loss_total": 0.6261777877807617, + "step": 73999 + }, + { + "epoch": 0.022, + "grad_norm": 1.3727046251296997, + "learning_rate": 7.389306892210115e-05, + "loss": 0.6122, + "step": 74000 + }, + { + "epoch": 0.022, + "eval_loss": 0.5883642435073853, + "eval_runtime": 151.4391, + "eval_samples_per_second": 101.975, + "eval_steps_per_second": 0.799, + "step": 74000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.003251075744629, + "loss_rtd": 0.3404412567615509, + "loss_sent": 0.0867854505777359, + "loss_sod": 0.025416646152734756, + "loss_total": 0.45264333486557007, + "step": 74099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.423887252807617, + "loss_rtd": 0.33721449971199036, + "loss_sent": 0.310514897108078, + "loss_sod": 0.004518180154263973, + "loss_total": 0.6522475481033325, + "step": 74099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.9278171062469482, + "learning_rate": 7.386518849823235e-05, + "loss": 0.6154, + "step": 74100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.123299598693848, + "loss_rtd": 0.3594000041484833, + "loss_sent": 0.1910119503736496, + "loss_sod": 0.13898342847824097, + "loss_total": 0.6893953680992126, + "step": 74199 + }, + { + "epoch": 0.022398, + "loss_gen": 4.525036811828613, + "loss_rtd": 0.37645843625068665, + "loss_sent": 0.12674348056316376, + "loss_sod": 0.11194515973329544, + "loss_total": 0.615147054195404, + "step": 74199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.8637129664421082, + "learning_rate": 7.383729846146849e-05, + "loss": 0.5955, + "step": 74200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.2129034996032715, + "loss_rtd": 0.3292183578014374, + "loss_sent": 0.14378657937049866, + "loss_sod": 0.012567362748086452, + "loss_total": 0.48557230830192566, + "step": 74299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.629938125610352, + "loss_rtd": 0.3502351641654968, + "loss_sent": 0.2462940216064453, + "loss_sod": 0.05217970535159111, + "loss_total": 0.6487088799476624, + "step": 74299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.1983137130737305, + "learning_rate": 7.380939882304368e-05, + "loss": 0.6137, + "step": 74300 + }, + { + "epoch": 0.022798, + "loss_gen": 4.627171993255615, + "loss_rtd": 0.3490825891494751, + "loss_sent": 0.23272109031677246, + "loss_sod": 0.004489724058657885, + "loss_total": 0.5862933993339539, + "step": 74399 + }, + { + "epoch": 0.022798, + "loss_gen": 4.706835746765137, + "loss_rtd": 0.34307944774627686, + "loss_sent": 0.1921033263206482, + "loss_sod": 0.08980696648359299, + "loss_total": 0.6249897480010986, + "step": 74399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.6464195251464844, + "learning_rate": 7.378148959419585e-05, + "loss": 0.6032, + "step": 74400 + }, + { + "epoch": 0.022998, + "loss_gen": 3.9812397956848145, + "loss_rtd": 0.3321714997291565, + "loss_sent": 0.0303138829767704, + "loss_sod": 0.09572997689247131, + "loss_total": 0.4582153558731079, + "step": 74499 + }, + { + "epoch": 0.022998, + "loss_gen": 4.2348527908325195, + "loss_rtd": 0.3658457398414612, + "loss_sent": 0.028739361092448235, + "loss_sod": 0.01832672953605652, + "loss_total": 0.4129118323326111, + "step": 74499 + }, + { + "epoch": 0.023, + "grad_norm": 0.6190145611763, + "learning_rate": 7.375357078616685e-05, + "loss": 0.5969, + "step": 74500 + }, + { + "epoch": 0.023198, + "loss_gen": 4.848466873168945, + "loss_rtd": 0.34901705384254456, + "loss_sent": 0.2657622694969177, + "loss_sod": 0.03693791851401329, + "loss_total": 0.6517172455787659, + "step": 74599 + }, + { + "epoch": 0.023198, + "loss_gen": 4.7964606285095215, + "loss_rtd": 0.35357391834259033, + "loss_sent": 0.12477079033851624, + "loss_sod": 0.06684249639511108, + "loss_total": 0.54518723487854, + "step": 74599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.4587454795837402, + "learning_rate": 7.372564241020239e-05, + "loss": 0.5989, + "step": 74600 + }, + { + "epoch": 0.023398, + "loss_gen": 4.390159606933594, + "loss_rtd": 0.3433459997177124, + "loss_sent": 0.2119998186826706, + "loss_sod": 0.001375580090098083, + "loss_total": 0.5567213892936707, + "step": 74699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.818406105041504, + "loss_rtd": 0.36668869853019714, + "loss_sent": 0.3295286297798157, + "loss_sod": 0.07705482840538025, + "loss_total": 0.7732721567153931, + "step": 74699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.1205918788909912, + "learning_rate": 7.3697704477552e-05, + "loss": 0.6106, + "step": 74700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.816539764404297, + "loss_rtd": 0.34127429127693176, + "loss_sent": 0.2649698853492737, + "loss_sod": 0.04257281869649887, + "loss_total": 0.6488170027732849, + "step": 74799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.949727535247803, + "loss_rtd": 0.3543637990951538, + "loss_sent": 0.08813301473855972, + "loss_sod": 0.030118556693196297, + "loss_total": 0.4726153612136841, + "step": 74799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.1401525735855103, + "learning_rate": 7.366975699946908e-05, + "loss": 0.5906, + "step": 74800 + }, + { + "epoch": 0.023798, + "loss_gen": 4.6410322189331055, + "loss_rtd": 0.33168840408325195, + "loss_sent": 0.2278660237789154, + "loss_sod": 0.12386610358953476, + "loss_total": 0.6834205389022827, + "step": 74899 + }, + { + "epoch": 0.023798, + "loss_gen": 4.859090328216553, + "loss_rtd": 0.351549357175827, + "loss_sent": 0.21702858805656433, + "loss_sod": 0.03496665507555008, + "loss_total": 0.6035445928573608, + "step": 74899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.0700111389160156, + "learning_rate": 7.364179998721088e-05, + "loss": 0.6076, + "step": 74900 + }, + { + "epoch": 0.023998, + "loss_gen": 4.151700973510742, + "loss_rtd": 0.3265613913536072, + "loss_sent": 0.029658786952495575, + "loss_sod": 0.23936760425567627, + "loss_total": 0.5955877900123596, + "step": 74999 + }, + { + "epoch": 0.023998, + "loss_gen": 4.144930839538574, + "loss_rtd": 0.35030627250671387, + "loss_sent": 0.0014046452706679702, + "loss_sod": 0.25777551531791687, + "loss_total": 0.60948646068573, + "step": 74999 + }, + { + "epoch": 0.024, + "grad_norm": 0.7320817112922668, + "learning_rate": 7.361383345203848e-05, + "loss": 0.6163, + "step": 75000 + }, + { + "epoch": 0.024, + "eval_loss": 0.5863644480705261, + "eval_runtime": 151.972, + "eval_samples_per_second": 101.617, + "eval_steps_per_second": 0.796, + "step": 75000 + }, + { + "epoch": 0.024198, + "loss_gen": 4.696193218231201, + "loss_rtd": 0.3485161066055298, + "loss_sent": 0.21172170341014862, + "loss_sod": 0.03800595551729202, + "loss_total": 0.598243772983551, + "step": 75099 + }, + { + "epoch": 0.024198, + "loss_gen": 4.1647233963012695, + "loss_rtd": 0.34751391410827637, + "loss_sent": 0.09992515295743942, + "loss_sod": 0.2592490017414093, + "loss_total": 0.7066880464553833, + "step": 75099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.2299166917800903, + "learning_rate": 7.358585740521675e-05, + "loss": 0.6027, + "step": 75100 + }, + { + "epoch": 0.024398, + "loss_gen": 4.2474799156188965, + "loss_rtd": 0.355958491563797, + "loss_sent": 9.39548117457889e-05, + "loss_sod": 0.234136700630188, + "loss_total": 0.5901890993118286, + "step": 75199 + }, + { + "epoch": 0.024398, + "loss_gen": 3.7780745029449463, + "loss_rtd": 0.33295783400535583, + "loss_sent": 5.115962267154828e-05, + "loss_sod": 0.15659479796886444, + "loss_total": 0.48960378766059875, + "step": 75199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.761970579624176, + "learning_rate": 7.355787185801451e-05, + "loss": 0.6033, + "step": 75200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.645076274871826, + "loss_rtd": 0.34952473640441895, + "loss_sent": 0.3229450583457947, + "loss_sod": 0.006602790206670761, + "loss_total": 0.6790726184844971, + "step": 75299 + }, + { + "epoch": 0.024598, + "loss_gen": 4.550302982330322, + "loss_rtd": 0.3657408654689789, + "loss_sent": 0.20307454466819763, + "loss_sod": 0.028181517496705055, + "loss_total": 0.5969969034194946, + "step": 75299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.4382457733154297, + "learning_rate": 7.35298768217043e-05, + "loss": 0.6185, + "step": 75300 + }, + { + "epoch": 0.024798, + "loss_gen": 3.696732521057129, + "loss_rtd": 0.3344995677471161, + "loss_sent": 4.129198350710794e-05, + "loss_sod": 0.25585028529167175, + "loss_total": 0.5903911590576172, + "step": 75399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.009065628051758, + "loss_rtd": 0.3305332362651825, + "loss_sent": 0.0384492464363575, + "loss_sod": 0.19323648512363434, + "loss_total": 0.562218964099884, + "step": 75399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.0108808279037476, + "learning_rate": 7.350187230756253e-05, + "loss": 0.6041, + "step": 75400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.490052223205566, + "loss_rtd": 0.35382014513015747, + "loss_sent": 0.20739054679870605, + "loss_sod": 0.11048437654972076, + "loss_total": 0.6716950535774231, + "step": 75499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.205677509307861, + "loss_rtd": 0.3394111692905426, + "loss_sent": 0.4223472774028778, + "loss_sod": 0.1295999437570572, + "loss_total": 0.8913583755493164, + "step": 75499 + }, + { + "epoch": 0.025, + "grad_norm": 1.3963903188705444, + "learning_rate": 7.347385832686938e-05, + "loss": 0.615, + "step": 75500 + }, + { + "epoch": 0.025198, + "loss_gen": 4.7777791023254395, + "loss_rtd": 0.33643102645874023, + "loss_sent": 0.3857211470603943, + "loss_sod": 0.02439464069902897, + "loss_total": 0.7465468049049377, + "step": 75599 + }, + { + "epoch": 0.025198, + "loss_gen": 4.464804172515869, + "loss_rtd": 0.3587886691093445, + "loss_sent": 0.07014551013708115, + "loss_sod": 0.1717534363269806, + "loss_total": 0.6006876230239868, + "step": 75599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.3431655168533325, + "learning_rate": 7.344583489090893e-05, + "loss": 0.619, + "step": 75600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.4741926193237305, + "loss_rtd": 0.35084760189056396, + "loss_sent": 0.22809773683547974, + "loss_sod": 0.02333931252360344, + "loss_total": 0.6022846698760986, + "step": 75699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.5327467918396, + "loss_rtd": 0.35005471110343933, + "loss_sent": 0.12196867913007736, + "loss_sod": 0.025535210967063904, + "loss_total": 0.49755859375, + "step": 75699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.279105305671692, + "learning_rate": 7.341780201096897e-05, + "loss": 0.6056, + "step": 75700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.242307662963867, + "loss_rtd": 0.3398686647415161, + "loss_sent": 0.3016100227832794, + "loss_sod": 0.055376965552568436, + "loss_total": 0.6968556642532349, + "step": 75799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.900667190551758, + "loss_rtd": 0.3508695662021637, + "loss_sent": 0.14425787329673767, + "loss_sod": 0.04531536251306534, + "loss_total": 0.5404428243637085, + "step": 75799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.2808212041854858, + "learning_rate": 7.338975969834117e-05, + "loss": 0.6097, + "step": 75800 + }, + { + "epoch": 0.025798, + "loss_gen": 4.717090129852295, + "loss_rtd": 0.33705005049705505, + "loss_sent": 0.1795204132795334, + "loss_sod": 0.03254042938351631, + "loss_total": 0.5491108894348145, + "step": 75899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.365910530090332, + "loss_rtd": 0.37813952565193176, + "loss_sent": 0.23682186007499695, + "loss_sod": 0.06760432571172714, + "loss_total": 0.6825656890869141, + "step": 75899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.737987756729126, + "learning_rate": 7.336170796432093e-05, + "loss": 0.6138, + "step": 75900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.509415626525879, + "loss_rtd": 0.33232179284095764, + "loss_sent": 0.15266606211662292, + "loss_sod": 0.01130371168255806, + "loss_total": 0.4962915778160095, + "step": 75999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.437432765960693, + "loss_rtd": 0.34324514865875244, + "loss_sent": 0.2692817449569702, + "loss_sod": 0.1113276407122612, + "loss_total": 0.7238545417785645, + "step": 75999 + }, + { + "epoch": 0.026, + "grad_norm": 1.6792041063308716, + "learning_rate": 7.333364682020755e-05, + "loss": 0.6127, + "step": 76000 + }, + { + "epoch": 0.026, + "eval_loss": 0.5819876790046692, + "eval_runtime": 151.4448, + "eval_samples_per_second": 101.971, + "eval_steps_per_second": 0.799, + "step": 76000 + }, + { + "epoch": 0.026198, + "loss_gen": 3.8935954570770264, + "loss_rtd": 0.33059537410736084, + "loss_sent": 5.3254007070790976e-05, + "loss_sod": 0.14532531797885895, + "loss_total": 0.4759739339351654, + "step": 76099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.468469619750977, + "loss_rtd": 0.33144134283065796, + "loss_sent": 0.21191422641277313, + "loss_sod": 0.08587560802698135, + "loss_total": 0.6292311549186707, + "step": 76099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9422717690467834, + "learning_rate": 7.330557627730402e-05, + "loss": 0.5996, + "step": 76100 + }, + { + "epoch": 0.026398, + "loss_gen": 4.545282363891602, + "loss_rtd": 0.33573615550994873, + "loss_sent": 0.2813408076763153, + "loss_sod": 0.14380568265914917, + "loss_total": 0.7608826160430908, + "step": 76199 + }, + { + "epoch": 0.026398, + "loss_gen": 4.843388557434082, + "loss_rtd": 0.3394545614719391, + "loss_sent": 0.23535408079624176, + "loss_sod": 0.04507937282323837, + "loss_total": 0.6198880076408386, + "step": 76199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.9187272787094116, + "learning_rate": 7.327749634691714e-05, + "loss": 0.6048, + "step": 76200 + }, + { + "epoch": 0.026598, + "loss_gen": 4.975532054901123, + "loss_rtd": 0.3380142152309418, + "loss_sent": 0.3965788185596466, + "loss_sod": 0.06410861015319824, + "loss_total": 0.7987016439437866, + "step": 76299 + }, + { + "epoch": 0.026598, + "loss_gen": 4.570683002471924, + "loss_rtd": 0.3355201184749603, + "loss_sent": 0.1470223218202591, + "loss_sod": 0.15155425667762756, + "loss_total": 0.6340966820716858, + "step": 76299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.0840137004852295, + "learning_rate": 7.324940704035753e-05, + "loss": 0.6044, + "step": 76300 + }, + { + "epoch": 0.026798, + "loss_gen": 4.853896141052246, + "loss_rtd": 0.34713008999824524, + "loss_sent": 0.19798138737678528, + "loss_sod": 0.09696397930383682, + "loss_total": 0.6420754194259644, + "step": 76399 + }, + { + "epoch": 0.026798, + "loss_gen": 4.590819358825684, + "loss_rtd": 0.349762886762619, + "loss_sent": 0.14599496126174927, + "loss_sod": 0.032492972910404205, + "loss_total": 0.5282508134841919, + "step": 76399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.1431488990783691, + "learning_rate": 7.322130836893952e-05, + "loss": 0.5836, + "step": 76400 + }, + { + "epoch": 0.026998, + "loss_gen": 4.64332914352417, + "loss_rtd": 0.3496253192424774, + "loss_sent": 0.6941021084785461, + "loss_sod": 0.010382898151874542, + "loss_total": 1.0541102886199951, + "step": 76499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.089400291442871, + "loss_rtd": 0.34875771403312683, + "loss_sent": 0.17473797500133514, + "loss_sod": 0.10071061551570892, + "loss_total": 0.6242063045501709, + "step": 76499 + }, + { + "epoch": 0.027, + "grad_norm": 2.004483938217163, + "learning_rate": 7.31932003439813e-05, + "loss": 0.6089, + "step": 76500 + }, + { + "epoch": 0.027198, + "loss_gen": 4.623749732971191, + "loss_rtd": 0.35708877444267273, + "loss_sent": 0.269750714302063, + "loss_sod": 0.006428820081055164, + "loss_total": 0.6332682967185974, + "step": 76599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.706696033477783, + "loss_rtd": 0.34746280312538147, + "loss_sent": 0.19256754219532013, + "loss_sod": 0.020543798804283142, + "loss_total": 0.5605741143226624, + "step": 76599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.9635896682739258, + "learning_rate": 7.316508297680474e-05, + "loss": 0.6054, + "step": 76600 + }, + { + "epoch": 0.027398, + "loss_gen": 4.7151079177856445, + "loss_rtd": 0.35019439458847046, + "loss_sent": 0.13037344813346863, + "loss_sod": 0.018380889669060707, + "loss_total": 0.49894872307777405, + "step": 76699 + }, + { + "epoch": 0.027398, + "loss_gen": 4.885163307189941, + "loss_rtd": 0.3396100699901581, + "loss_sent": 0.37537530064582825, + "loss_sod": 0.04548022896051407, + "loss_total": 0.7604656219482422, + "step": 76699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.422677755355835, + "learning_rate": 7.313695627873553e-05, + "loss": 0.6092, + "step": 76700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.78942346572876, + "loss_rtd": 0.3287353217601776, + "loss_sent": 0.25026121735572815, + "loss_sod": 0.051381666213274, + "loss_total": 0.630378246307373, + "step": 76799 + }, + { + "epoch": 0.027598, + "loss_gen": 4.916085243225098, + "loss_rtd": 0.3519432842731476, + "loss_sent": 0.3339703381061554, + "loss_sod": 0.08152005076408386, + "loss_total": 0.7674336433410645, + "step": 76799 + }, + { + "epoch": 0.0276, + "grad_norm": 2.0085551738739014, + "learning_rate": 7.31088202611031e-05, + "loss": 0.6049, + "step": 76800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.992638111114502, + "loss_rtd": 0.33744439482688904, + "loss_sent": 0.3600675165653229, + "loss_sod": 0.1079985573887825, + "loss_total": 0.8055104613304138, + "step": 76899 + }, + { + "epoch": 0.027798, + "loss_gen": 4.506402492523193, + "loss_rtd": 0.3557027280330658, + "loss_sent": 0.07336246967315674, + "loss_sod": 0.006056470330804586, + "loss_total": 0.43512165546417236, + "step": 76899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.325028419494629, + "learning_rate": 7.308067493524064e-05, + "loss": 0.603, + "step": 76900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.883081436157227, + "loss_rtd": 0.34431836009025574, + "loss_sent": 0.11972984671592712, + "loss_sod": 0.15086975693702698, + "loss_total": 0.6149179935455322, + "step": 76999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.85264778137207, + "loss_rtd": 0.36707422137260437, + "loss_sent": 0.12365243583917618, + "loss_sod": 0.06359491497278214, + "loss_total": 0.5543215870857239, + "step": 76999 + }, + { + "epoch": 0.028, + "grad_norm": 0.9523294568061829, + "learning_rate": 7.305252031248506e-05, + "loss": 0.6139, + "step": 77000 + }, + { + "epoch": 0.028, + "eval_loss": 0.5862102508544922, + "eval_runtime": 151.6791, + "eval_samples_per_second": 101.814, + "eval_steps_per_second": 0.798, + "step": 77000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.708960056304932, + "loss_rtd": 0.3252050280570984, + "loss_sent": 0.10101579129695892, + "loss_sod": 0.09263059496879578, + "loss_total": 0.5188513994216919, + "step": 77099 + }, + { + "epoch": 0.028198, + "loss_gen": 3.5701472759246826, + "loss_rtd": 0.31843188405036926, + "loss_sent": 4.417903983267024e-05, + "loss_sod": 0.2799660861492157, + "loss_total": 0.5984421372413635, + "step": 77099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.270837426185608, + "learning_rate": 7.302435640417707e-05, + "loss": 0.6094, + "step": 77100 + }, + { + "epoch": 0.028398, + "loss_gen": 4.561153411865234, + "loss_rtd": 0.34159260988235474, + "loss_sent": 0.11388792842626572, + "loss_sod": 0.0881519764661789, + "loss_total": 0.5436325073242188, + "step": 77199 + }, + { + "epoch": 0.028398, + "loss_gen": 4.914067268371582, + "loss_rtd": 0.3523808419704437, + "loss_sent": 0.4624131917953491, + "loss_sod": 0.03237197920680046, + "loss_total": 0.8471660017967224, + "step": 77199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.6572402715682983, + "learning_rate": 7.299618322166106e-05, + "loss": 0.5905, + "step": 77200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.187253475189209, + "loss_rtd": 0.3432314991950989, + "loss_sent": 0.042851973325014114, + "loss_sod": 0.0935392677783966, + "loss_total": 0.4796227514743805, + "step": 77299 + }, + { + "epoch": 0.028598, + "loss_gen": 4.5806803703308105, + "loss_rtd": 0.3343912661075592, + "loss_sent": 0.22158819437026978, + "loss_sod": 0.02681458368897438, + "loss_total": 0.5827940702438354, + "step": 77299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.66943359375, + "learning_rate": 7.296800077628521e-05, + "loss": 0.6095, + "step": 77300 + }, + { + "epoch": 0.028798, + "loss_gen": 3.7758219242095947, + "loss_rtd": 0.31664010882377625, + "loss_sent": 0.08940772712230682, + "loss_sod": 0.03532155230641365, + "loss_total": 0.4413694143295288, + "step": 77399 + }, + { + "epoch": 0.028798, + "loss_gen": 4.408205032348633, + "loss_rtd": 0.3292091190814972, + "loss_sent": 0.2610619068145752, + "loss_sod": 0.16430658102035522, + "loss_total": 0.75457763671875, + "step": 77399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.8829277157783508, + "learning_rate": 7.293980907940139e-05, + "loss": 0.594, + "step": 77400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.965546131134033, + "loss_rtd": 0.3634222745895386, + "loss_sent": 0.14828786253929138, + "loss_sod": 0.03323981165885925, + "loss_total": 0.5449499487876892, + "step": 77499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.154338359832764, + "loss_rtd": 0.3393701910972595, + "loss_sent": 0.1171560138463974, + "loss_sod": 0.05646669864654541, + "loss_total": 0.5129929184913635, + "step": 77499 + }, + { + "epoch": 0.029, + "grad_norm": 0.6069972515106201, + "learning_rate": 7.291160814236522e-05, + "loss": 0.5909, + "step": 77500 + }, + { + "epoch": 0.029198, + "loss_gen": 4.255629539489746, + "loss_rtd": 0.3524973690509796, + "loss_sent": 0.20539936423301697, + "loss_sod": 0.07957247644662857, + "loss_total": 0.6374691724777222, + "step": 77599 + }, + { + "epoch": 0.029198, + "loss_gen": 4.823246002197266, + "loss_rtd": 0.3374880254268646, + "loss_sent": 0.1319703608751297, + "loss_sod": 0.12082032859325409, + "loss_total": 0.590278685092926, + "step": 77599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.9810872077941895, + "learning_rate": 7.288339797653603e-05, + "loss": 0.6058, + "step": 77600 + }, + { + "epoch": 0.029398, + "loss_gen": 3.8772659301757812, + "loss_rtd": 0.3234400749206543, + "loss_sent": 0.03408446162939072, + "loss_sod": 0.08280368894338608, + "loss_total": 0.4403282403945923, + "step": 77699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.439805507659912, + "loss_rtd": 0.3517507314682007, + "loss_sent": 0.25072789192199707, + "loss_sod": 0.13128891587257385, + "loss_total": 0.7337675094604492, + "step": 77699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.86631178855896, + "learning_rate": 7.285517859327688e-05, + "loss": 0.5862, + "step": 77700 + }, + { + "epoch": 0.029598, + "loss_gen": 4.9372992515563965, + "loss_rtd": 0.3406504988670349, + "loss_sent": 0.32388031482696533, + "loss_sod": 0.026403963565826416, + "loss_total": 0.6909347772598267, + "step": 77799 + }, + { + "epoch": 0.029598, + "loss_gen": 4.953743934631348, + "loss_rtd": 0.33431267738342285, + "loss_sent": 0.38023707270622253, + "loss_sod": 0.07797607034444809, + "loss_total": 0.7925258278846741, + "step": 77799 + }, + { + "epoch": 0.0296, + "grad_norm": 2.2315027713775635, + "learning_rate": 7.282695000395451e-05, + "loss": 0.5938, + "step": 77800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.043262481689453, + "loss_rtd": 0.3420753479003906, + "loss_sent": 0.2619520425796509, + "loss_sod": 0.062093593180179596, + "loss_total": 0.6661210060119629, + "step": 77899 + }, + { + "epoch": 0.029798, + "loss_gen": 4.945333480834961, + "loss_rtd": 0.33110296726226807, + "loss_sent": 0.16390226781368256, + "loss_sod": 0.04249800369143486, + "loss_total": 0.5375032424926758, + "step": 77899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.4338330030441284, + "learning_rate": 7.27987122199394e-05, + "loss": 0.6039, + "step": 77900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.042571544647217, + "loss_rtd": 0.3607284426689148, + "loss_sent": 0.16485467553138733, + "loss_sod": 0.10195044428110123, + "loss_total": 0.6275335550308228, + "step": 77999 + }, + { + "epoch": 0.029998, + "loss_gen": 4.82106351852417, + "loss_rtd": 0.33838844299316406, + "loss_sent": 0.14246927201747894, + "loss_sod": 0.030077558010816574, + "loss_total": 0.5109352469444275, + "step": 77999 + }, + { + "epoch": 0.03, + "grad_norm": 0.8304947018623352, + "learning_rate": 7.277046525260575e-05, + "loss": 0.603, + "step": 78000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5783067345619202, + "eval_runtime": 152.9147, + "eval_samples_per_second": 100.991, + "eval_steps_per_second": 0.791, + "step": 78000 + }, + { + "epoch": 0.030198, + "loss_gen": 4.8893256187438965, + "loss_rtd": 0.33507299423217773, + "loss_sent": 0.3655364215373993, + "loss_sod": 0.17951573431491852, + "loss_total": 0.8801251649856567, + "step": 78099 + }, + { + "epoch": 0.030198, + "loss_gen": 4.547902584075928, + "loss_rtd": 0.3381362557411194, + "loss_sent": 0.012435453943908215, + "loss_sod": 0.04874253273010254, + "loss_total": 0.3993142545223236, + "step": 78099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.5881937742233276, + "learning_rate": 7.274220911333142e-05, + "loss": 0.6111, + "step": 78100 + }, + { + "epoch": 0.030398, + "loss_gen": 4.830233573913574, + "loss_rtd": 0.35028842091560364, + "loss_sent": 0.21340954303741455, + "loss_sod": 0.04156368970870972, + "loss_total": 0.6052616834640503, + "step": 78199 + }, + { + "epoch": 0.030398, + "loss_gen": 4.917245864868164, + "loss_rtd": 0.35128992795944214, + "loss_sent": 0.12125124037265778, + "loss_sod": 0.1287066787481308, + "loss_total": 0.6012478470802307, + "step": 78199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.1110472679138184, + "learning_rate": 7.271394381349797e-05, + "loss": 0.5843, + "step": 78200 + }, + { + "epoch": 0.030598, + "loss_gen": 4.95368766784668, + "loss_rtd": 0.3567056655883789, + "loss_sent": 0.2553303837776184, + "loss_sod": 0.08781591057777405, + "loss_total": 0.6998519897460938, + "step": 78299 + }, + { + "epoch": 0.030598, + "loss_gen": 4.473656177520752, + "loss_rtd": 0.3402126729488373, + "loss_sent": 0.05505591630935669, + "loss_sod": 0.008297743275761604, + "loss_total": 0.4035663306713104, + "step": 78299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.2994519472122192, + "learning_rate": 7.268566936449067e-05, + "loss": 0.6022, + "step": 78300 + }, + { + "epoch": 0.030798, + "loss_gen": 4.774655342102051, + "loss_rtd": 0.33963102102279663, + "loss_sent": 0.09324996173381805, + "loss_sod": 0.1005897969007492, + "loss_total": 0.5334708094596863, + "step": 78399 + }, + { + "epoch": 0.030798, + "loss_gen": 4.81737756729126, + "loss_rtd": 0.33952900767326355, + "loss_sent": 0.29965364933013916, + "loss_sod": 0.015807606279850006, + "loss_total": 0.6549902558326721, + "step": 78399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.7558334469795227, + "learning_rate": 7.265738577769847e-05, + "loss": 0.5992, + "step": 78400 + }, + { + "epoch": 0.030998, + "loss_gen": 4.676304340362549, + "loss_rtd": 0.3421095013618469, + "loss_sent": 0.05747511610388756, + "loss_sod": 0.12205128371715546, + "loss_total": 0.521635890007019, + "step": 78499 + }, + { + "epoch": 0.030998, + "loss_gen": 4.821928024291992, + "loss_rtd": 0.3583402931690216, + "loss_sent": 0.08410129696130753, + "loss_sod": 0.027698632329702377, + "loss_total": 0.4701402187347412, + "step": 78499 + }, + { + "epoch": 0.031, + "grad_norm": 0.8563811779022217, + "learning_rate": 7.262909306451399e-05, + "loss": 0.5979, + "step": 78500 + }, + { + "epoch": 0.031198, + "loss_gen": 4.854304790496826, + "loss_rtd": 0.3506193459033966, + "loss_sent": 0.36575737595558167, + "loss_sod": 0.02470770850777626, + "loss_total": 0.7410844564437866, + "step": 78599 + }, + { + "epoch": 0.031198, + "loss_gen": 4.517398834228516, + "loss_rtd": 0.32580113410949707, + "loss_sent": 0.18032927811145782, + "loss_sod": 0.05777490884065628, + "loss_total": 0.5639052987098694, + "step": 78599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.334820032119751, + "learning_rate": 7.260079123633352e-05, + "loss": 0.593, + "step": 78600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.088646411895752, + "loss_rtd": 0.3596729338169098, + "loss_sent": 0.26877015829086304, + "loss_sod": 0.07989838719367981, + "loss_total": 0.7083414793014526, + "step": 78699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.0591936111450195, + "loss_rtd": 0.332432359457016, + "loss_sent": 0.5234665870666504, + "loss_sod": 0.029768668115139008, + "loss_total": 0.885667622089386, + "step": 78699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.5670020580291748, + "learning_rate": 7.257248030455704e-05, + "loss": 0.6176, + "step": 78700 + }, + { + "epoch": 0.031598, + "loss_gen": 4.583230972290039, + "loss_rtd": 0.3506057858467102, + "loss_sent": 0.40736258029937744, + "loss_sod": 0.01595848798751831, + "loss_total": 0.773926854133606, + "step": 78799 + }, + { + "epoch": 0.031598, + "loss_gen": 4.5218186378479, + "loss_rtd": 0.3393426835536957, + "loss_sent": 0.22740121185779572, + "loss_sod": 0.02407972514629364, + "loss_total": 0.5908235907554626, + "step": 78799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.3964877128601074, + "learning_rate": 7.254416028058822e-05, + "loss": 0.6169, + "step": 78800 + }, + { + "epoch": 0.031798, + "loss_gen": 4.38004207611084, + "loss_rtd": 0.36475300788879395, + "loss_sent": 0.26608026027679443, + "loss_sod": 0.007397185545414686, + "loss_total": 0.6382304430007935, + "step": 78899 + }, + { + "epoch": 0.031798, + "loss_gen": 4.8993144035339355, + "loss_rtd": 0.35095274448394775, + "loss_sent": 0.213242307305336, + "loss_sod": 0.23117947578430176, + "loss_total": 0.7953745126724243, + "step": 78899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.4138163328170776, + "learning_rate": 7.251583117583429e-05, + "loss": 0.5951, + "step": 78900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.560085773468018, + "loss_rtd": 0.3431425094604492, + "loss_sent": 0.16087932884693146, + "loss_sod": 0.01687842607498169, + "loss_total": 0.5209002494812012, + "step": 78999 + }, + { + "epoch": 0.031998, + "loss_gen": 4.905075550079346, + "loss_rtd": 0.35127487778663635, + "loss_sent": 0.12146424502134323, + "loss_sod": 0.029746858403086662, + "loss_total": 0.502485990524292, + "step": 78999 + }, + { + "epoch": 0.032, + "grad_norm": 0.8463312387466431, + "learning_rate": 7.24874930017063e-05, + "loss": 0.5956, + "step": 79000 + }, + { + "epoch": 0.032, + "eval_loss": 0.5794667601585388, + "eval_runtime": 152.0437, + "eval_samples_per_second": 101.569, + "eval_steps_per_second": 0.796, + "step": 79000 + }, + { + "epoch": 0.032198, + "loss_gen": 4.906808376312256, + "loss_rtd": 0.3431001603603363, + "loss_sent": 0.15424330532550812, + "loss_sod": 0.07962116599082947, + "loss_total": 0.5769646167755127, + "step": 79099 + }, + { + "epoch": 0.032198, + "loss_gen": 4.781933784484863, + "loss_rtd": 0.34182798862457275, + "loss_sent": 0.16395731270313263, + "loss_sod": 0.11651577055454254, + "loss_total": 0.6223010420799255, + "step": 79099 + }, + { + "epoch": 0.0322, + "grad_norm": 1.2742360830307007, + "learning_rate": 7.245914576961878e-05, + "loss": 0.5978, + "step": 79100 + }, + { + "epoch": 0.032398, + "loss_gen": 4.805627822875977, + "loss_rtd": 0.33406636118888855, + "loss_sent": 0.10832850635051727, + "loss_sod": 0.029560662806034088, + "loss_total": 0.4719555377960205, + "step": 79199 + }, + { + "epoch": 0.032398, + "loss_gen": 4.88537073135376, + "loss_rtd": 0.3371656537055969, + "loss_sent": 0.29921814799308777, + "loss_sod": 0.05873815715312958, + "loss_total": 0.6951220035552979, + "step": 79199 + }, + { + "epoch": 0.0324, + "grad_norm": 1.185131549835205, + "learning_rate": 7.243078949099006e-05, + "loss": 0.6086, + "step": 79200 + }, + { + "epoch": 0.032598, + "loss_gen": 5.029036521911621, + "loss_rtd": 0.3534056544303894, + "loss_sent": 0.21892966330051422, + "loss_sod": 0.15603232383728027, + "loss_total": 0.7283676266670227, + "step": 79299 + }, + { + "epoch": 0.032598, + "loss_gen": 4.953742504119873, + "loss_rtd": 0.315411776304245, + "loss_sent": 0.435451865196228, + "loss_sod": 0.11127342283725739, + "loss_total": 0.8621370792388916, + "step": 79299 + }, + { + "epoch": 0.0326, + "grad_norm": 1.3502787351608276, + "learning_rate": 7.2402424177242e-05, + "loss": 0.6062, + "step": 79300 + }, + { + "epoch": 0.032798, + "loss_gen": 4.27264928817749, + "loss_rtd": 0.3398166298866272, + "loss_sent": 0.04490416496992111, + "loss_sod": 0.11879505962133408, + "loss_total": 0.5035158395767212, + "step": 79399 + }, + { + "epoch": 0.032798, + "loss_gen": 3.5960450172424316, + "loss_rtd": 0.330872505903244, + "loss_sent": 4.761438685818575e-05, + "loss_sod": 0.1638348400592804, + "loss_total": 0.49475497007369995, + "step": 79399 + }, + { + "epoch": 0.0328, + "grad_norm": 1.1576119661331177, + "learning_rate": 7.237404983980016e-05, + "loss": 0.5923, + "step": 79400 + }, + { + "epoch": 0.032998, + "loss_gen": 4.7262444496154785, + "loss_rtd": 0.37096092104911804, + "loss_sent": 0.12971912324428558, + "loss_sod": 0.06608349829912186, + "loss_total": 0.5667635202407837, + "step": 79499 + }, + { + "epoch": 0.032998, + "loss_gen": 4.8113908767700195, + "loss_rtd": 0.35529083013534546, + "loss_sent": 0.22367742657661438, + "loss_sod": 0.059045784175395966, + "loss_total": 0.6380140781402588, + "step": 79499 + }, + { + "epoch": 0.033, + "grad_norm": 1.82859468460083, + "learning_rate": 7.234566649009373e-05, + "loss": 0.5985, + "step": 79500 + }, + { + "epoch": 0.033198, + "loss_gen": 4.804863452911377, + "loss_rtd": 0.3359534442424774, + "loss_sent": 0.05564633756875992, + "loss_sod": 0.12397048622369766, + "loss_total": 0.5155702829360962, + "step": 79599 + }, + { + "epoch": 0.033198, + "loss_gen": 4.417680263519287, + "loss_rtd": 0.3600488305091858, + "loss_sent": 0.21803125739097595, + "loss_sod": 0.05044897273182869, + "loss_total": 0.6285290718078613, + "step": 79599 + }, + { + "epoch": 0.0332, + "grad_norm": 0.7793765068054199, + "learning_rate": 7.23172741395555e-05, + "loss": 0.6023, + "step": 79600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.278229713439941, + "loss_rtd": 0.33663731813430786, + "loss_sent": 0.5610448122024536, + "loss_sod": 0.15033027529716492, + "loss_total": 1.048012375831604, + "step": 79699 + }, + { + "epoch": 0.033398, + "loss_gen": 4.6742753982543945, + "loss_rtd": 0.3588688373565674, + "loss_sent": 0.35958272218704224, + "loss_sod": 0.020474664866924286, + "loss_total": 0.7389262318611145, + "step": 79699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.4588605165481567, + "learning_rate": 7.228887279962192e-05, + "loss": 0.5974, + "step": 79700 + }, + { + "epoch": 0.033598, + "loss_gen": 4.4923930168151855, + "loss_rtd": 0.3381545841693878, + "loss_sent": 0.05760623887181282, + "loss_sod": 0.07205002009868622, + "loss_total": 0.46781083941459656, + "step": 79799 + }, + { + "epoch": 0.033598, + "loss_gen": 4.729586601257324, + "loss_rtd": 0.34935539960861206, + "loss_sent": 0.13376431167125702, + "loss_sod": 0.04556266963481903, + "loss_total": 0.5286824107170105, + "step": 79799 + }, + { + "epoch": 0.0336, + "grad_norm": 0.8367990255355835, + "learning_rate": 7.226046248173305e-05, + "loss": 0.5974, + "step": 79800 + }, + { + "epoch": 0.033798, + "loss_gen": 4.863954544067383, + "loss_rtd": 0.34549108147621155, + "loss_sent": 0.1215205267071724, + "loss_sod": 0.0367925763130188, + "loss_total": 0.5038042068481445, + "step": 79899 + }, + { + "epoch": 0.033798, + "loss_gen": 4.856892108917236, + "loss_rtd": 0.3340567350387573, + "loss_sent": 0.18412260711193085, + "loss_sod": 0.05267767608165741, + "loss_total": 0.570857048034668, + "step": 79899 + }, + { + "epoch": 0.0338, + "grad_norm": 0.9888171553611755, + "learning_rate": 7.223204319733255e-05, + "loss": 0.5983, + "step": 79900 + }, + { + "epoch": 0.033998, + "loss_gen": 4.183230400085449, + "loss_rtd": 0.3279927968978882, + "loss_sent": 0.09533942490816116, + "loss_sod": 0.07924136519432068, + "loss_total": 0.5025736093521118, + "step": 79999 + }, + { + "epoch": 0.033998, + "loss_gen": 4.862549304962158, + "loss_rtd": 0.3317645192146301, + "loss_sent": 0.26322609186172485, + "loss_sod": 0.04174702987074852, + "loss_total": 0.6367376446723938, + "step": 79999 + }, + { + "epoch": 0.034, + "grad_norm": 1.68937349319458, + "learning_rate": 7.22036149578677e-05, + "loss": 0.6177, + "step": 80000 + }, + { + "epoch": 0.034, + "eval_loss": 0.5796692967414856, + "eval_runtime": 151.847, + "eval_samples_per_second": 101.701, + "eval_steps_per_second": 0.797, + "step": 80000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.622843265533447, + "loss_rtd": 0.3517666459083557, + "loss_sent": 0.09793248027563095, + "loss_sod": 0.050344400107860565, + "loss_total": 0.500043511390686, + "step": 80099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.010815620422363, + "loss_rtd": 0.34234005212783813, + "loss_sent": 0.00033815408824011683, + "loss_sod": 0.21406474709510803, + "loss_total": 0.5567429661750793, + "step": 80099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.9491010308265686, + "learning_rate": 7.21751777747894e-05, + "loss": 0.5969, + "step": 80100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.592875957489014, + "loss_rtd": 0.33668312430381775, + "loss_sent": 0.10726157575845718, + "loss_sod": 0.06045020371675491, + "loss_total": 0.5043948888778687, + "step": 80199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.241170883178711, + "loss_rtd": 0.345996230840683, + "loss_sent": 0.17690055072307587, + "loss_sod": 0.1328912377357483, + "loss_total": 0.655788004398346, + "step": 80199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.9030731320381165, + "learning_rate": 7.214673165955214e-05, + "loss": 0.587, + "step": 80200 + }, + { + "epoch": 0.000598, + "loss_gen": 3.6014344692230225, + "loss_rtd": 0.3160237967967987, + "loss_sent": 8.825836994219571e-05, + "loss_sod": 0.1601894348859787, + "loss_total": 0.4763014614582062, + "step": 80299 + }, + { + "epoch": 0.000598, + "loss_gen": 4.452521800994873, + "loss_rtd": 0.32397112250328064, + "loss_sent": 0.1345147341489792, + "loss_sod": 0.05335891619324684, + "loss_total": 0.5118447542190552, + "step": 80299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.8657112121582031, + "learning_rate": 7.211827662361403e-05, + "loss": 0.6072, + "step": 80300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.304147720336914, + "loss_rtd": 0.3607083857059479, + "loss_sent": 0.2113885134458542, + "loss_sod": 0.004338310100138187, + "loss_total": 0.5764352083206177, + "step": 80399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.526896953582764, + "loss_rtd": 0.35893163084983826, + "loss_sent": 0.12096848338842392, + "loss_sod": 0.053405389189720154, + "loss_total": 0.5333054661750793, + "step": 80399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.575034737586975, + "learning_rate": 7.208981267843675e-05, + "loss": 0.6187, + "step": 80400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.012181282043457, + "loss_rtd": 0.3451538383960724, + "loss_sent": 0.45236727595329285, + "loss_sod": 0.01512182503938675, + "loss_total": 0.8126429319381714, + "step": 80499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.574728488922119, + "loss_rtd": 0.3141675889492035, + "loss_sent": 0.08709835261106491, + "loss_sod": 0.017966214567422867, + "loss_total": 0.41923215985298157, + "step": 80499 + }, + { + "epoch": 0.001, + "grad_norm": 0.944183886051178, + "learning_rate": 7.206133983548554e-05, + "loss": 0.5973, + "step": 80500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.878512859344482, + "loss_rtd": 0.33649691939353943, + "loss_sent": 0.47494471073150635, + "loss_sod": 0.07619231939315796, + "loss_total": 0.8876339197158813, + "step": 80599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.036816596984863, + "loss_rtd": 0.34828975796699524, + "loss_sent": 0.18520109355449677, + "loss_sod": 0.06617250293493271, + "loss_total": 0.5996633768081665, + "step": 80599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.1195414066314697, + "learning_rate": 7.203285810622929e-05, + "loss": 0.5913, + "step": 80600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.697576999664307, + "loss_rtd": 0.32750260829925537, + "loss_sent": 0.19536228477954865, + "loss_sod": 0.014839423820376396, + "loss_total": 0.5377042889595032, + "step": 80699 + }, + { + "epoch": 0.001398, + "loss_gen": 4.68838357925415, + "loss_rtd": 0.35094472765922546, + "loss_sent": 0.10224013775587082, + "loss_sod": 0.006008798256516457, + "loss_total": 0.4591936767101288, + "step": 80699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.7635841965675354, + "learning_rate": 7.200436750214044e-05, + "loss": 0.6122, + "step": 80700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.490006446838379, + "loss_rtd": 0.3497379422187805, + "loss_sent": 0.10162508487701416, + "loss_sod": 0.024359572678804398, + "loss_total": 0.47572261095046997, + "step": 80799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.756316184997559, + "loss_rtd": 0.32693690061569214, + "loss_sent": 0.08548082411289215, + "loss_sod": 0.06649260222911835, + "loss_total": 0.47891032695770264, + "step": 80799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.5970702171325684, + "learning_rate": 7.197586803469499e-05, + "loss": 0.5948, + "step": 80800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.97715950012207, + "loss_rtd": 0.3462950885295868, + "loss_sent": 0.0746036022901535, + "loss_sod": 0.04795315861701965, + "loss_total": 0.46885186433792114, + "step": 80899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.787614345550537, + "loss_rtd": 0.34894487261772156, + "loss_sent": 0.1466822624206543, + "loss_sod": 0.07616492360830307, + "loss_total": 0.5717920660972595, + "step": 80899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.4280049800872803, + "learning_rate": 7.194735971537251e-05, + "loss": 0.6022, + "step": 80900 + }, + { + "epoch": 0.001998, + "loss_gen": 3.9288675785064697, + "loss_rtd": 0.3252773880958557, + "loss_sent": 7.597176590934396e-05, + "loss_sod": 0.11348104476928711, + "loss_total": 0.43883439898490906, + "step": 80999 + }, + { + "epoch": 0.001998, + "loss_gen": 3.8037943840026855, + "loss_rtd": 0.3277899920940399, + "loss_sent": 0.002315351739525795, + "loss_sod": 0.2413269728422165, + "loss_total": 0.5714322924613953, + "step": 80999 + }, + { + "epoch": 0.002, + "grad_norm": 0.8177794814109802, + "learning_rate": 7.191884255565617e-05, + "loss": 0.6041, + "step": 81000 + }, + { + "epoch": 0.002, + "eval_loss": 0.5703736543655396, + "eval_runtime": 154.5666, + "eval_samples_per_second": 99.912, + "eval_steps_per_second": 0.783, + "step": 81000 + }, + { + "epoch": 0.002198, + "loss_gen": 4.518189430236816, + "loss_rtd": 0.321702241897583, + "loss_sent": 0.14045092463493347, + "loss_sod": 0.06171754375100136, + "loss_total": 0.5238707065582275, + "step": 81099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.521371364593506, + "loss_rtd": 0.33750367164611816, + "loss_sent": 0.2126319408416748, + "loss_sod": 0.008465304970741272, + "loss_total": 0.558600902557373, + "step": 81099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.046858787536621, + "learning_rate": 7.189031656703267e-05, + "loss": 0.5956, + "step": 81100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.222639560699463, + "loss_rtd": 0.3334866166114807, + "loss_sent": 0.28419044613838196, + "loss_sod": 0.10069449990987778, + "loss_total": 0.718371570110321, + "step": 81199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.741415023803711, + "loss_rtd": 0.35411888360977173, + "loss_sent": 0.32628849148750305, + "loss_sod": 0.10670779645442963, + "loss_total": 0.787115216255188, + "step": 81199 + }, + { + "epoch": 0.0024, + "grad_norm": 2.5012848377227783, + "learning_rate": 7.186178176099227e-05, + "loss": 0.5877, + "step": 81200 + }, + { + "epoch": 0.002598, + "loss_gen": 3.8994622230529785, + "loss_rtd": 0.3342326581478119, + "loss_sent": 0.00015677422925364226, + "loss_sod": 0.26861628890037537, + "loss_total": 0.6030057072639465, + "step": 81299 + }, + { + "epoch": 0.002598, + "loss_gen": 3.7668027877807617, + "loss_rtd": 0.3269832134246826, + "loss_sent": 0.00885203666985035, + "loss_sod": 0.2654297947883606, + "loss_total": 0.6012650728225708, + "step": 81299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.464532732963562, + "learning_rate": 7.183323814902879e-05, + "loss": 0.6232, + "step": 81300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.685091018676758, + "loss_rtd": 0.3290248215198517, + "loss_sent": 0.1316319704055786, + "loss_sod": 0.18161681294441223, + "loss_total": 0.6422736048698425, + "step": 81399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.897707939147949, + "loss_rtd": 0.3307396471500397, + "loss_sent": 0.07971008867025375, + "loss_sod": 0.18826325237751007, + "loss_total": 0.5987129807472229, + "step": 81399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.3436256647109985, + "learning_rate": 7.18046857426396e-05, + "loss": 0.6005, + "step": 81400 + }, + { + "epoch": 0.002998, + "loss_gen": 4.494446754455566, + "loss_rtd": 0.3431621789932251, + "loss_sent": 0.09104984998703003, + "loss_sod": 0.04677867144346237, + "loss_total": 0.4809907078742981, + "step": 81499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.569034099578857, + "loss_rtd": 0.3323788344860077, + "loss_sent": 0.15404106676578522, + "loss_sod": 0.01700172759592533, + "loss_total": 0.5034216046333313, + "step": 81499 + }, + { + "epoch": 0.003, + "grad_norm": 0.7456111311912537, + "learning_rate": 7.17761245533256e-05, + "loss": 0.5935, + "step": 81500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.801833629608154, + "loss_rtd": 0.33733636140823364, + "loss_sent": 0.2018003612756729, + "loss_sod": 0.014958927407860756, + "loss_total": 0.5540956258773804, + "step": 81599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.605984687805176, + "loss_rtd": 0.32954123616218567, + "loss_sent": 0.32172343134880066, + "loss_sod": 0.027626996859908104, + "loss_total": 0.678891658782959, + "step": 81599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9144284129142761, + "learning_rate": 7.174755459259124e-05, + "loss": 0.5976, + "step": 81600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.538783550262451, + "loss_rtd": 0.3379725515842438, + "loss_sent": 0.26620247960090637, + "loss_sod": 0.06738986819982529, + "loss_total": 0.6715649366378784, + "step": 81699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.728128910064697, + "loss_rtd": 0.376240074634552, + "loss_sent": 0.1057659238576889, + "loss_sod": 0.12661533057689667, + "loss_total": 0.60862135887146, + "step": 81699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.5157244205474854, + "learning_rate": 7.171897587194448e-05, + "loss": 0.5935, + "step": 81700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.843796730041504, + "loss_rtd": 0.35366329550743103, + "loss_sent": 0.06068854779005051, + "loss_sod": 0.15954655408859253, + "loss_total": 0.5738983750343323, + "step": 81799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.083298683166504, + "loss_rtd": 0.3443755805492401, + "loss_sent": 0.2293194681406021, + "loss_sod": 0.223373681306839, + "loss_total": 0.79706871509552, + "step": 81799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.1671299934387207, + "learning_rate": 7.169038840289684e-05, + "loss": 0.6109, + "step": 81800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.942607402801514, + "loss_rtd": 0.3444528877735138, + "loss_sent": 0.08512663841247559, + "loss_sod": 0.06856218725442886, + "loss_total": 0.49814170598983765, + "step": 81899 + }, + { + "epoch": 0.003798, + "loss_gen": 4.8817949295043945, + "loss_rtd": 0.3282775282859802, + "loss_sent": 0.3587074279785156, + "loss_sod": 0.012187833897769451, + "loss_total": 0.6991727948188782, + "step": 81899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.4610754251480103, + "learning_rate": 7.166179219696335e-05, + "loss": 0.5909, + "step": 81900 + }, + { + "epoch": 0.003998, + "loss_gen": 4.809683799743652, + "loss_rtd": 0.34137725830078125, + "loss_sent": 0.22331377863883972, + "loss_sod": 0.033756665885448456, + "loss_total": 0.5984476804733276, + "step": 81999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.722792148590088, + "loss_rtd": 0.3351828455924988, + "loss_sent": 0.2343035638332367, + "loss_sod": 0.020440496504306793, + "loss_total": 0.5899268984794617, + "step": 81999 + }, + { + "epoch": 0.004, + "grad_norm": 1.3949823379516602, + "learning_rate": 7.163318726566255e-05, + "loss": 0.5998, + "step": 82000 + }, + { + "epoch": 0.004, + "eval_loss": 0.5795272588729858, + "eval_runtime": 151.858, + "eval_samples_per_second": 101.694, + "eval_steps_per_second": 0.797, + "step": 82000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.556379795074463, + "loss_rtd": 0.33904021978378296, + "loss_sent": 0.20144030451774597, + "loss_sod": 0.004236500710248947, + "loss_total": 0.544717013835907, + "step": 82099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.782190322875977, + "loss_rtd": 0.34505394101142883, + "loss_sent": 0.41519373655319214, + "loss_sod": 0.18751248717308044, + "loss_total": 0.9477601647377014, + "step": 82099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.381917119026184, + "learning_rate": 7.16045736205165e-05, + "loss": 0.603, + "step": 82100 + }, + { + "epoch": 0.004398, + "loss_gen": 4.838950157165527, + "loss_rtd": 0.32782894372940063, + "loss_sent": 0.11037642508745193, + "loss_sod": 0.015319553203880787, + "loss_total": 0.4535249173641205, + "step": 82199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.70442008972168, + "loss_rtd": 0.3487652838230133, + "loss_sent": 0.11023913323879242, + "loss_sod": 0.19873446226119995, + "loss_total": 0.6577389240264893, + "step": 82199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.1894351243972778, + "learning_rate": 7.157595127305079e-05, + "loss": 0.6084, + "step": 82200 + }, + { + "epoch": 0.004598, + "loss_gen": 4.37827205657959, + "loss_rtd": 0.35872745513916016, + "loss_sent": 0.04632806405425072, + "loss_sod": 0.07619193196296692, + "loss_total": 0.4812474548816681, + "step": 82299 + }, + { + "epoch": 0.004598, + "loss_gen": 3.9130702018737793, + "loss_rtd": 0.32239601016044617, + "loss_sent": 0.007580731529742479, + "loss_sod": 0.10175400972366333, + "loss_total": 0.4317307472229004, + "step": 82299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.706117570400238, + "learning_rate": 7.154732023479448e-05, + "loss": 0.5986, + "step": 82300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.160461902618408, + "loss_rtd": 0.3397037088871002, + "loss_sent": 8.609052747488022e-05, + "loss_sod": 0.2912858724594116, + "loss_total": 0.6310756206512451, + "step": 82399 + }, + { + "epoch": 0.004798, + "loss_gen": 3.7689208984375, + "loss_rtd": 0.3256233036518097, + "loss_sent": 0.003863121848553419, + "loss_sod": 0.1561591774225235, + "loss_total": 0.4856456220149994, + "step": 82399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.3118590116500854, + "learning_rate": 7.151868051728017e-05, + "loss": 0.581, + "step": 82400 + }, + { + "epoch": 0.004998, + "loss_gen": 4.68132209777832, + "loss_rtd": 0.323245644569397, + "loss_sent": 0.057707998901605606, + "loss_sod": 0.1399214267730713, + "loss_total": 0.520875096321106, + "step": 82499 + }, + { + "epoch": 0.004998, + "loss_gen": 4.498316287994385, + "loss_rtd": 0.3447036147117615, + "loss_sent": 0.054062191396951675, + "loss_sod": 0.08559941500425339, + "loss_total": 0.48436522483825684, + "step": 82499 + }, + { + "epoch": 0.005, + "grad_norm": 0.7210830450057983, + "learning_rate": 7.14900321320439e-05, + "loss": 0.5916, + "step": 82500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.19460391998291, + "loss_rtd": 0.3308198153972626, + "loss_sent": 0.2415839582681656, + "loss_sod": 0.07629989087581635, + "loss_total": 0.6487036347389221, + "step": 82599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.732879638671875, + "loss_rtd": 0.3315872251987457, + "loss_sent": 0.022963855415582657, + "loss_sod": 0.11855947226285934, + "loss_total": 0.473110556602478, + "step": 82599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.1002004146575928, + "learning_rate": 7.146137509062527e-05, + "loss": 0.5798, + "step": 82600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.847637176513672, + "loss_rtd": 0.32974302768707275, + "loss_sent": 0.39561349153518677, + "loss_sod": 0.020584065467119217, + "loss_total": 0.7459405660629272, + "step": 82699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.003781795501709, + "loss_rtd": 0.35805708169937134, + "loss_sent": 0.12605199217796326, + "loss_sod": 0.04816053807735443, + "loss_total": 0.5322695970535278, + "step": 82699 + }, + { + "epoch": 0.0054, + "grad_norm": 2.112302303314209, + "learning_rate": 7.14327094045673e-05, + "loss": 0.5841, + "step": 82700 + }, + { + "epoch": 0.005598, + "loss_gen": 4.327634811401367, + "loss_rtd": 0.351259708404541, + "loss_sent": 0.1786668449640274, + "loss_sod": 0.07430240511894226, + "loss_total": 0.6042289733886719, + "step": 82799 + }, + { + "epoch": 0.005598, + "loss_gen": 4.847617149353027, + "loss_rtd": 0.33705267310142517, + "loss_sent": 0.10496676713228226, + "loss_sod": 0.042972419410943985, + "loss_total": 0.4849918484687805, + "step": 82799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.8327969312667847, + "learning_rate": 7.140403508541658e-05, + "loss": 0.6011, + "step": 82800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.828712463378906, + "loss_rtd": 0.3448171615600586, + "loss_sent": 0.13701654970645905, + "loss_sod": 0.10765130072832108, + "loss_total": 0.5894849896430969, + "step": 82899 + }, + { + "epoch": 0.005798, + "loss_gen": 4.773641109466553, + "loss_rtd": 0.328213632106781, + "loss_sent": 0.3089849352836609, + "loss_sod": 0.1221984475851059, + "loss_total": 0.759397029876709, + "step": 82899 + }, + { + "epoch": 0.0058, + "grad_norm": 0.8959248661994934, + "learning_rate": 7.137535214472306e-05, + "loss": 0.5852, + "step": 82900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.394990921020508, + "loss_rtd": 0.3399747610092163, + "loss_sent": 0.15843546390533447, + "loss_sod": 0.08604242652654648, + "loss_total": 0.5844526290893555, + "step": 82999 + }, + { + "epoch": 0.005998, + "loss_gen": 4.741580963134766, + "loss_rtd": 0.3336928188800812, + "loss_sent": 0.17245937883853912, + "loss_sod": 0.02594193071126938, + "loss_total": 0.5320941209793091, + "step": 82999 + }, + { + "epoch": 0.006, + "grad_norm": 1.5453912019729614, + "learning_rate": 7.134666059404028e-05, + "loss": 0.5932, + "step": 83000 + }, + { + "epoch": 0.006, + "eval_loss": 0.576248288154602, + "eval_runtime": 152.0388, + "eval_samples_per_second": 101.573, + "eval_steps_per_second": 0.796, + "step": 83000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.927525043487549, + "loss_rtd": 0.3208247721195221, + "loss_sent": 0.1647900640964508, + "loss_sod": 0.10025301575660706, + "loss_total": 0.5858678817749023, + "step": 83099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.932606220245361, + "loss_rtd": 0.3480580449104309, + "loss_sent": 0.2994418442249298, + "loss_sod": 0.0884096771478653, + "loss_total": 0.7359095811843872, + "step": 83099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.814121663570404, + "learning_rate": 7.131796044492514e-05, + "loss": 0.6067, + "step": 83100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.173018932342529, + "loss_rtd": 0.3376966714859009, + "loss_sent": 0.056200314313173294, + "loss_sod": 0.09993436187505722, + "loss_total": 0.4938313663005829, + "step": 83199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.874149799346924, + "loss_rtd": 0.35732176899909973, + "loss_sent": 0.04529409483075142, + "loss_sod": 0.04716810956597328, + "loss_total": 0.44978398084640503, + "step": 83199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.7071604132652283, + "learning_rate": 7.128925170893809e-05, + "loss": 0.6105, + "step": 83200 + }, + { + "epoch": 0.006598, + "loss_gen": 4.64926290512085, + "loss_rtd": 0.3289829194545746, + "loss_sent": 0.1293676495552063, + "loss_sod": 0.07462191581726074, + "loss_total": 0.5329724550247192, + "step": 83299 + }, + { + "epoch": 0.006598, + "loss_gen": 4.161506175994873, + "loss_rtd": 0.32510906457901, + "loss_sent": 0.09937126934528351, + "loss_sod": 0.16410039365291595, + "loss_total": 0.5885807275772095, + "step": 83299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.2556997537612915, + "learning_rate": 7.126053439764299e-05, + "loss": 0.5866, + "step": 83300 + }, + { + "epoch": 0.006798, + "loss_gen": 4.905492305755615, + "loss_rtd": 0.34571221470832825, + "loss_sent": 0.41267988085746765, + "loss_sod": 0.027991173788905144, + "loss_total": 0.7863832712173462, + "step": 83399 + }, + { + "epoch": 0.006798, + "loss_gen": 4.741423606872559, + "loss_rtd": 0.3393404185771942, + "loss_sent": 0.0038078154902905226, + "loss_sod": 0.28449833393096924, + "loss_total": 0.6276466250419617, + "step": 83399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.1530828475952148, + "learning_rate": 7.123180852260718e-05, + "loss": 0.59, + "step": 83400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.59058141708374, + "loss_rtd": 0.33513790369033813, + "loss_sent": 0.0916479155421257, + "loss_sod": 0.09420442581176758, + "loss_total": 0.520990252494812, + "step": 83499 + }, + { + "epoch": 0.006998, + "loss_gen": 4.141010761260986, + "loss_rtd": 0.32602787017822266, + "loss_sent": 0.0297227930277586, + "loss_sod": 0.17539286613464355, + "loss_total": 0.5311435461044312, + "step": 83499 + }, + { + "epoch": 0.007, + "grad_norm": 1.0477850437164307, + "learning_rate": 7.120307409540146e-05, + "loss": 0.5962, + "step": 83500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.906367778778076, + "loss_rtd": 0.32614704966545105, + "loss_sent": 0.14142878353595734, + "loss_sod": 0.06066673994064331, + "loss_total": 0.5282425880432129, + "step": 83599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.8122663497924805, + "loss_rtd": 0.343631386756897, + "loss_sent": 0.15950128436088562, + "loss_sod": 0.03024749830365181, + "loss_total": 0.5333801507949829, + "step": 83599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.989540159702301, + "learning_rate": 7.11743311276e-05, + "loss": 0.5914, + "step": 83600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.893677234649658, + "loss_rtd": 0.3473113477230072, + "loss_sent": 0.41388821601867676, + "loss_sod": 0.06965136528015137, + "loss_total": 0.8308509588241577, + "step": 83699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.037633895874023, + "loss_rtd": 0.34461310505867004, + "loss_sent": 0.061890531331300735, + "loss_sod": 0.08433335274457932, + "loss_total": 0.4908370077610016, + "step": 83699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.752944827079773, + "learning_rate": 7.11455796307805e-05, + "loss": 0.5874, + "step": 83700 + }, + { + "epoch": 0.007598, + "loss_gen": 4.618000030517578, + "loss_rtd": 0.34183815121650696, + "loss_sent": 0.06785891950130463, + "loss_sod": 0.03274420648813248, + "loss_total": 0.44244128465652466, + "step": 83799 + }, + { + "epoch": 0.007598, + "loss_gen": 4.727540969848633, + "loss_rtd": 0.358920156955719, + "loss_sent": 0.17538397014141083, + "loss_sod": 0.1261889487504959, + "loss_total": 0.6604930758476257, + "step": 83799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.753732442855835, + "learning_rate": 7.111681961652405e-05, + "loss": 0.5912, + "step": 83800 + }, + { + "epoch": 0.007798, + "loss_gen": 4.761882305145264, + "loss_rtd": 0.34194257855415344, + "loss_sent": 0.14010225236415863, + "loss_sod": 0.03127816691994667, + "loss_total": 0.5133230090141296, + "step": 83899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.765944480895996, + "loss_rtd": 0.3241620361804962, + "loss_sent": 0.5859805345535278, + "loss_sod": 0.06969790160655975, + "loss_total": 0.9798404574394226, + "step": 83899 + }, + { + "epoch": 0.0078, + "grad_norm": 3.3188042640686035, + "learning_rate": 7.10880510964152e-05, + "loss": 0.5866, + "step": 83900 + }, + { + "epoch": 0.007998, + "loss_gen": 4.736507892608643, + "loss_rtd": 0.3393866717815399, + "loss_sent": 0.17300938069820404, + "loss_sod": 0.1425882875919342, + "loss_total": 0.6549843549728394, + "step": 83999 + }, + { + "epoch": 0.007998, + "loss_gen": 4.610940456390381, + "loss_rtd": 0.35401055216789246, + "loss_sent": 0.04571661725640297, + "loss_sod": 0.013207420706748962, + "loss_total": 0.4129345715045929, + "step": 83999 + }, + { + "epoch": 0.008, + "grad_norm": 1.0809509754180908, + "learning_rate": 7.105927408204189e-05, + "loss": 0.5765, + "step": 84000 + }, + { + "epoch": 0.008, + "eval_loss": 0.5772362947463989, + "eval_runtime": 153.3887, + "eval_samples_per_second": 100.679, + "eval_steps_per_second": 0.789, + "step": 84000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.888073921203613, + "loss_rtd": 0.35439684987068176, + "loss_sent": 0.328273206949234, + "loss_sod": 0.07193342596292496, + "loss_total": 0.7546035051345825, + "step": 84099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.5528645515441895, + "loss_rtd": 0.3527440130710602, + "loss_sent": 0.19996052980422974, + "loss_sod": 0.0916566327214241, + "loss_total": 0.644361138343811, + "step": 84099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.13083815574646, + "learning_rate": 7.103048858499549e-05, + "loss": 0.5911, + "step": 84100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.745001316070557, + "loss_rtd": 0.3726560175418854, + "loss_sent": 0.14635957777500153, + "loss_sod": 0.0025487099774181843, + "loss_total": 0.5215643048286438, + "step": 84199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.553593158721924, + "loss_rtd": 0.33759889006614685, + "loss_sent": 0.17711028456687927, + "loss_sod": 0.13150401413440704, + "loss_total": 0.646213173866272, + "step": 84199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.7179540991783142, + "learning_rate": 7.100169461687081e-05, + "loss": 0.6069, + "step": 84200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.926650047302246, + "loss_rtd": 0.32800349593162537, + "loss_sent": 0.18810003995895386, + "loss_sod": 0.034881964325904846, + "loss_total": 0.5509855151176453, + "step": 84299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.353402614593506, + "loss_rtd": 0.3569122850894928, + "loss_sent": 0.017280040308833122, + "loss_sod": 0.064621701836586, + "loss_total": 0.43881404399871826, + "step": 84299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.742855429649353, + "learning_rate": 7.097289218926604e-05, + "loss": 0.5819, + "step": 84300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.87427282333374, + "loss_rtd": 0.3532108962535858, + "loss_sent": 0.10535918176174164, + "loss_sod": 0.06047770380973816, + "loss_total": 0.5190477967262268, + "step": 84399 + }, + { + "epoch": 0.008798, + "loss_gen": 4.954662799835205, + "loss_rtd": 0.33628085255622864, + "loss_sent": 0.22558976709842682, + "loss_sod": 0.07964995503425598, + "loss_total": 0.6415205597877502, + "step": 84399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.9289606213569641, + "learning_rate": 7.09440813137828e-05, + "loss": 0.6014, + "step": 84400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.7491679191589355, + "loss_rtd": 0.3386727571487427, + "loss_sent": 0.1393914520740509, + "loss_sod": 0.07014788687229156, + "loss_total": 0.5482120513916016, + "step": 84499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.837072849273682, + "loss_rtd": 0.32627806067466736, + "loss_sent": 0.26598304510116577, + "loss_sod": 0.1486058086156845, + "loss_total": 0.7408668994903564, + "step": 84499 + }, + { + "epoch": 0.009, + "grad_norm": 0.9888080954551697, + "learning_rate": 7.091526200202612e-05, + "loss": 0.6071, + "step": 84500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.859007358551025, + "loss_rtd": 0.32816991209983826, + "loss_sent": 0.22488537430763245, + "loss_sod": 0.04852532222867012, + "loss_total": 0.6015806198120117, + "step": 84599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.959181785583496, + "loss_rtd": 0.34092968702316284, + "loss_sent": 0.17731061577796936, + "loss_sod": 0.17065644264221191, + "loss_total": 0.6888967752456665, + "step": 84599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.6574453711509705, + "learning_rate": 7.08864342656044e-05, + "loss": 0.5941, + "step": 84600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.744875907897949, + "loss_rtd": 0.3381560146808624, + "loss_sent": 0.23297180235385895, + "loss_sod": 0.11416157335042953, + "loss_total": 0.6852893829345703, + "step": 84699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.035859107971191, + "loss_rtd": 0.34597527980804443, + "loss_sent": 0.450090229511261, + "loss_sod": 0.03151167184114456, + "loss_total": 0.8275771737098694, + "step": 84699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.1587839126586914, + "learning_rate": 7.085759811612946e-05, + "loss": 0.5985, + "step": 84700 + }, + { + "epoch": 0.009598, + "loss_gen": 4.791224479675293, + "loss_rtd": 0.3161582946777344, + "loss_sent": 0.226994127035141, + "loss_sod": 0.034145288169384, + "loss_total": 0.5772976875305176, + "step": 84799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.157752513885498, + "loss_rtd": 0.3282153010368347, + "loss_sent": 0.11908372491598129, + "loss_sod": 0.17988857626914978, + "loss_total": 0.6271875500679016, + "step": 84799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.2291240692138672, + "learning_rate": 7.08287535652165e-05, + "loss": 0.5951, + "step": 84800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.31527853012085, + "loss_rtd": 0.3151688873767853, + "loss_sent": 0.11810876429080963, + "loss_sod": 0.05422281101346016, + "loss_total": 0.48750048875808716, + "step": 84899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.165080547332764, + "loss_rtd": 0.3413681983947754, + "loss_sent": 0.1543727070093155, + "loss_sod": 0.046299442648887634, + "loss_total": 0.5420403480529785, + "step": 84899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.8090561628341675, + "learning_rate": 7.07999006244841e-05, + "loss": 0.592, + "step": 84900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.7347002029418945, + "loss_rtd": 0.3390168845653534, + "loss_sent": 0.49473872780799866, + "loss_sod": 0.03233742341399193, + "loss_total": 0.8660930395126343, + "step": 84999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.499658107757568, + "loss_rtd": 0.3307003378868103, + "loss_sent": 0.32836344838142395, + "loss_sod": 0.04098241776227951, + "loss_total": 0.700046181678772, + "step": 84999 + }, + { + "epoch": 0.01, + "grad_norm": 2.6637675762176514, + "learning_rate": 7.077103930555419e-05, + "loss": 0.6071, + "step": 85000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5636082887649536, + "eval_runtime": 151.5121, + "eval_samples_per_second": 101.926, + "eval_steps_per_second": 0.799, + "step": 85000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.155033111572266, + "loss_rtd": 0.3279043436050415, + "loss_sent": 0.10387200862169266, + "loss_sod": 0.08239713311195374, + "loss_total": 0.5141735076904297, + "step": 85099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.681345462799072, + "loss_rtd": 0.3611370623111725, + "loss_sent": 0.11581390351057053, + "loss_sod": 0.07930219173431396, + "loss_total": 0.5562531352043152, + "step": 85099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.8770554661750793, + "learning_rate": 7.074216962005216e-05, + "loss": 0.5828, + "step": 85100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.5408711433410645, + "loss_rtd": 0.3362995982170105, + "loss_sent": 0.29717129468917847, + "loss_sod": 0.0074509442783892155, + "loss_total": 0.6409218311309814, + "step": 85199 + }, + { + "epoch": 0.010398, + "loss_gen": 4.74578332901001, + "loss_rtd": 0.3223089277744293, + "loss_sent": 0.17509596049785614, + "loss_sod": 0.0695834830403328, + "loss_total": 0.5669883489608765, + "step": 85199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.3575395345687866, + "learning_rate": 7.071329157960665e-05, + "loss": 0.593, + "step": 85200 + }, + { + "epoch": 0.010598, + "loss_gen": 4.741410732269287, + "loss_rtd": 0.31915760040283203, + "loss_sent": 0.5397876501083374, + "loss_sod": 0.010888807475566864, + "loss_total": 0.8698340654373169, + "step": 85299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.687405109405518, + "loss_rtd": 0.333869069814682, + "loss_sent": 0.5599907040596008, + "loss_sod": 0.064788818359375, + "loss_total": 0.9586485624313354, + "step": 85299 + }, + { + "epoch": 0.0106, + "grad_norm": 4.997759819030762, + "learning_rate": 7.06844051958498e-05, + "loss": 0.5913, + "step": 85300 + }, + { + "epoch": 0.010798, + "loss_gen": 4.9768595695495605, + "loss_rtd": 0.3349880576133728, + "loss_sent": 0.125346377491951, + "loss_sod": 0.10623002797365189, + "loss_total": 0.5665644407272339, + "step": 85399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.237880229949951, + "loss_rtd": 0.3359336256980896, + "loss_sent": 0.010364835150539875, + "loss_sod": 0.11379797756671906, + "loss_total": 0.46009641885757446, + "step": 85399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.265926718711853, + "learning_rate": 7.0655510480417e-05, + "loss": 0.5838, + "step": 85400 + }, + { + "epoch": 0.010998, + "loss_gen": 3.779320478439331, + "loss_rtd": 0.3096616566181183, + "loss_sent": 0.057744111865758896, + "loss_sod": 0.09589076787233353, + "loss_total": 0.4632965326309204, + "step": 85499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.641038417816162, + "loss_rtd": 0.330924391746521, + "loss_sent": 0.09023583680391312, + "loss_sod": 0.02348215878009796, + "loss_total": 0.4446423649787903, + "step": 85499 + }, + { + "epoch": 0.011, + "grad_norm": 0.806560754776001, + "learning_rate": 7.062660744494706e-05, + "loss": 0.5787, + "step": 85500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.924380779266357, + "loss_rtd": 0.33277904987335205, + "loss_sent": 0.15705890953540802, + "loss_sod": 0.13256730139255524, + "loss_total": 0.6224052309989929, + "step": 85599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.707538604736328, + "loss_rtd": 0.32195743918418884, + "loss_sent": 0.09301640093326569, + "loss_sod": 0.07604361325502396, + "loss_total": 0.4910174608230591, + "step": 85599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9301294684410095, + "learning_rate": 7.05976961010821e-05, + "loss": 0.6062, + "step": 85600 + }, + { + "epoch": 0.011398, + "loss_gen": 4.808781147003174, + "loss_rtd": 0.3219926059246063, + "loss_sent": 0.12298624962568283, + "loss_sod": 0.0509757325053215, + "loss_total": 0.49595460295677185, + "step": 85699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.390843391418457, + "loss_rtd": 0.33405792713165283, + "loss_sent": 0.026021022349596024, + "loss_sod": 0.11794449388980865, + "loss_total": 0.4780234396457672, + "step": 85699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.7896721959114075, + "learning_rate": 7.056877646046761e-05, + "loss": 0.5888, + "step": 85700 + }, + { + "epoch": 0.011598, + "loss_gen": 4.867891788482666, + "loss_rtd": 0.3365158438682556, + "loss_sent": 0.5656376481056213, + "loss_sod": 0.03345107287168503, + "loss_total": 0.9356045722961426, + "step": 85799 + }, + { + "epoch": 0.011598, + "loss_gen": 4.885983943939209, + "loss_rtd": 0.3423565924167633, + "loss_sent": 0.2113242745399475, + "loss_sod": 0.024075839668512344, + "loss_total": 0.5777567028999329, + "step": 85799 + }, + { + "epoch": 0.0116, + "grad_norm": 2.0509936809539795, + "learning_rate": 7.053984853475244e-05, + "loss": 0.5864, + "step": 85800 + }, + { + "epoch": 0.011798, + "loss_gen": 4.7428717613220215, + "loss_rtd": 0.3404030501842499, + "loss_sent": 0.17430885136127472, + "loss_sod": 0.010180543176829815, + "loss_total": 0.5248924493789673, + "step": 85899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.872359275817871, + "loss_rtd": 0.3368551433086395, + "loss_sent": 0.3656493127346039, + "loss_sod": 0.05703877657651901, + "loss_total": 0.759543240070343, + "step": 85899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.3592721223831177, + "learning_rate": 7.051091233558871e-05, + "loss": 0.5825, + "step": 85900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.666917324066162, + "loss_rtd": 0.33322930335998535, + "loss_sent": 0.19591303169727325, + "loss_sod": 0.09397896379232407, + "loss_total": 0.6231213212013245, + "step": 85999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.712637424468994, + "loss_rtd": 0.3511534035205841, + "loss_sent": 0.20152921974658966, + "loss_sod": 0.060484375804662704, + "loss_total": 0.6131669878959656, + "step": 85999 + }, + { + "epoch": 0.012, + "grad_norm": 1.0833466053009033, + "learning_rate": 7.048196787463195e-05, + "loss": 0.5989, + "step": 86000 + }, + { + "epoch": 0.012, + "eval_loss": 0.5701783299446106, + "eval_runtime": 152.5195, + "eval_samples_per_second": 101.253, + "eval_steps_per_second": 0.793, + "step": 86000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.020610332489014, + "loss_rtd": 0.3479492664337158, + "loss_sent": 0.12172229588031769, + "loss_sod": 0.0221400186419487, + "loss_total": 0.4918115735054016, + "step": 86099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.548126697540283, + "loss_rtd": 0.33291855454444885, + "loss_sent": 0.17961755394935608, + "loss_sod": 0.11401254683732986, + "loss_total": 0.6265486478805542, + "step": 86099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.7376876473426819, + "learning_rate": 7.0453015163541e-05, + "loss": 0.5823, + "step": 86100 + }, + { + "epoch": 0.012398, + "loss_gen": 4.535302639007568, + "loss_rtd": 0.34424495697021484, + "loss_sent": 0.2351195067167282, + "loss_sod": 0.2106015384197235, + "loss_total": 0.7899660468101501, + "step": 86199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.479808807373047, + "loss_rtd": 0.34633177518844604, + "loss_sent": 0.04221196100115776, + "loss_sod": 0.11639726161956787, + "loss_total": 0.5049409866333008, + "step": 86199 + }, + { + "epoch": 0.0124, + "grad_norm": 0.8945944905281067, + "learning_rate": 7.042405421397798e-05, + "loss": 0.5869, + "step": 86200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.183826446533203, + "loss_rtd": 0.3244444727897644, + "loss_sent": 0.013029288500547409, + "loss_sod": 0.19251969456672668, + "loss_total": 0.52999347448349, + "step": 86299 + }, + { + "epoch": 0.012598, + "loss_gen": 3.937880039215088, + "loss_rtd": 0.3229083716869354, + "loss_sent": 3.980579276685603e-05, + "loss_sod": 0.24052423238754272, + "loss_total": 0.5634723901748657, + "step": 86299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.1283100843429565, + "learning_rate": 7.039508503760835e-05, + "loss": 0.5928, + "step": 86300 + }, + { + "epoch": 0.012798, + "loss_gen": 3.9103431701660156, + "loss_rtd": 0.31903406977653503, + "loss_sent": 5.407594653661363e-05, + "loss_sod": 0.2617771327495575, + "loss_total": 0.5808652639389038, + "step": 86399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.453827381134033, + "loss_rtd": 0.31779396533966064, + "loss_sent": 0.21239037811756134, + "loss_sod": 0.017334502190351486, + "loss_total": 0.5475188493728638, + "step": 86399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.8633012175559998, + "learning_rate": 7.03661076461009e-05, + "loss": 0.5941, + "step": 86400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.856034278869629, + "loss_rtd": 0.3515087366104126, + "loss_sent": 0.40019503235816956, + "loss_sod": 0.08624620735645294, + "loss_total": 0.8379499912261963, + "step": 86499 + }, + { + "epoch": 0.012998, + "loss_gen": 4.10723352432251, + "loss_rtd": 0.3264789283275604, + "loss_sent": 0.01756051741540432, + "loss_sod": 0.11352347582578659, + "loss_total": 0.4575629234313965, + "step": 86499 + }, + { + "epoch": 0.013, + "grad_norm": 1.4365172386169434, + "learning_rate": 7.033712205112775e-05, + "loss": 0.6133, + "step": 86500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.694050312042236, + "loss_rtd": 0.3362416625022888, + "loss_sent": 0.20183268189430237, + "loss_sod": 0.09752219915390015, + "loss_total": 0.635596513748169, + "step": 86599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.243170261383057, + "loss_rtd": 0.3346792161464691, + "loss_sent": 0.061446767300367355, + "loss_sod": 0.08382999897003174, + "loss_total": 0.4799559712409973, + "step": 86599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.9417069554328918, + "learning_rate": 7.030812826436426e-05, + "loss": 0.5752, + "step": 86600 + }, + { + "epoch": 0.013398, + "loss_gen": 4.410436153411865, + "loss_rtd": 0.335841566324234, + "loss_sent": 0.21186509728431702, + "loss_sod": 0.06897386908531189, + "loss_total": 0.6166805028915405, + "step": 86699 + }, + { + "epoch": 0.013398, + "loss_gen": 4.674890518188477, + "loss_rtd": 0.348044753074646, + "loss_sent": 0.129514679312706, + "loss_sod": 0.033202577382326126, + "loss_total": 0.5107620358467102, + "step": 86699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.644648790359497, + "learning_rate": 7.027912629748913e-05, + "loss": 0.5863, + "step": 86700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.710915565490723, + "loss_rtd": 0.3374066650867462, + "loss_sent": 0.23154044151306152, + "loss_sod": 0.01586066000163555, + "loss_total": 0.5848077535629272, + "step": 86799 + }, + { + "epoch": 0.013598, + "loss_gen": 4.84998083114624, + "loss_rtd": 0.3377188742160797, + "loss_sent": 0.1488795429468155, + "loss_sod": 0.05167201906442642, + "loss_total": 0.5382704138755798, + "step": 86799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.8954354524612427, + "learning_rate": 7.025011616218435e-05, + "loss": 0.5803, + "step": 86800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.269907474517822, + "loss_rtd": 0.3336092531681061, + "loss_sent": 0.05773217976093292, + "loss_sod": 0.08352617919445038, + "loss_total": 0.474867582321167, + "step": 86899 + }, + { + "epoch": 0.013798, + "loss_gen": 4.46759557723999, + "loss_rtd": 0.34029409289360046, + "loss_sent": 0.039328545331954956, + "loss_sod": 0.1799912452697754, + "loss_total": 0.5596139430999756, + "step": 86899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.8178805708885193, + "learning_rate": 7.02210978701352e-05, + "loss": 0.5813, + "step": 86900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.13110876083374, + "loss_rtd": 0.31566160917282104, + "loss_sent": 0.17514754831790924, + "loss_sod": 0.16235847771167755, + "loss_total": 0.6531676054000854, + "step": 86999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.873115539550781, + "loss_rtd": 0.3448944687843323, + "loss_sent": 0.0757671445608139, + "loss_sod": 0.046936094760894775, + "loss_total": 0.46759772300720215, + "step": 86999 + }, + { + "epoch": 0.014, + "grad_norm": 1.4258131980895996, + "learning_rate": 7.019207143303028e-05, + "loss": 0.5802, + "step": 87000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5673538446426392, + "eval_runtime": 151.8075, + "eval_samples_per_second": 101.728, + "eval_steps_per_second": 0.797, + "step": 87000 + }, + { + "epoch": 0.014198, + "loss_gen": 4.761818885803223, + "loss_rtd": 0.33635979890823364, + "loss_sent": 0.15500541031360626, + "loss_sod": 0.0708441287279129, + "loss_total": 0.5622093081474304, + "step": 87099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.59244966506958, + "loss_rtd": 0.3566775321960449, + "loss_sent": 0.22278250753879547, + "loss_sod": 0.0419575572013855, + "loss_total": 0.6214175820350647, + "step": 87099 + }, + { + "epoch": 0.0142, + "grad_norm": 2.196838855743408, + "learning_rate": 7.016303686256137e-05, + "loss": 0.572, + "step": 87100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.7601704597473145, + "loss_rtd": 0.33573299646377563, + "loss_sent": 0.4068352282047272, + "loss_sod": 0.005720850545912981, + "loss_total": 0.7482891082763672, + "step": 87199 + }, + { + "epoch": 0.014398, + "loss_gen": 4.789660453796387, + "loss_rtd": 0.3457402288913727, + "loss_sent": 0.5019651055335999, + "loss_sod": 0.07149482518434525, + "loss_total": 0.9192001819610596, + "step": 87199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.686604380607605, + "learning_rate": 7.013399417042363e-05, + "loss": 0.5796, + "step": 87200 + }, + { + "epoch": 0.014598, + "loss_gen": 4.942568302154541, + "loss_rtd": 0.3394739627838135, + "loss_sent": 0.19377031922340393, + "loss_sod": 0.02111152932047844, + "loss_total": 0.554355800151825, + "step": 87299 + }, + { + "epoch": 0.014598, + "loss_gen": 4.664207935333252, + "loss_rtd": 0.3370613753795624, + "loss_sent": 0.17265434563159943, + "loss_sod": 0.10131214559078217, + "loss_total": 0.6110278367996216, + "step": 87299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.783726453781128, + "learning_rate": 7.010494336831544e-05, + "loss": 0.5785, + "step": 87300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.153042793273926, + "loss_rtd": 0.3529767096042633, + "loss_sent": 0.2563766837120056, + "loss_sod": 0.06033950299024582, + "loss_total": 0.669692873954773, + "step": 87399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.191508769989014, + "loss_rtd": 0.3387785255908966, + "loss_sent": 0.10880298167467117, + "loss_sod": 0.09149737656116486, + "loss_total": 0.5390788912773132, + "step": 87399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.9969197511672974, + "learning_rate": 7.007588446793847e-05, + "loss": 0.5849, + "step": 87400 + }, + { + "epoch": 0.014998, + "loss_gen": 4.172388553619385, + "loss_rtd": 0.32984110713005066, + "loss_sent": 0.0001445196830900386, + "loss_sod": 0.20981809496879578, + "loss_total": 0.5398037433624268, + "step": 87499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.134131908416748, + "loss_rtd": 0.3385279178619385, + "loss_sent": 0.1611495465040207, + "loss_sod": 0.0354672446846962, + "loss_total": 0.5351446866989136, + "step": 87499 + }, + { + "epoch": 0.015, + "grad_norm": 0.898914098739624, + "learning_rate": 7.004681748099764e-05, + "loss": 0.6018, + "step": 87500 + }, + { + "epoch": 0.015198, + "loss_gen": 4.956865310668945, + "loss_rtd": 0.3564647436141968, + "loss_sent": 0.22139987349510193, + "loss_sod": 0.1836312711238861, + "loss_total": 0.7614959478378296, + "step": 87599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.106131076812744, + "loss_rtd": 0.3263373076915741, + "loss_sent": 0.28746137022972107, + "loss_sod": 0.08258932828903198, + "loss_total": 0.6963880062103271, + "step": 87599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.0720531940460205, + "learning_rate": 7.001774241920111e-05, + "loss": 0.5916, + "step": 87600 + }, + { + "epoch": 0.015398, + "loss_gen": 4.984757423400879, + "loss_rtd": 0.3438102602958679, + "loss_sent": 0.41753992438316345, + "loss_sod": 0.04417465254664421, + "loss_total": 0.8055248260498047, + "step": 87699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.551210880279541, + "loss_rtd": 0.34808868169784546, + "loss_sent": 0.1848447322845459, + "loss_sod": 0.032358843833208084, + "loss_total": 0.565292239189148, + "step": 87699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.469988465309143, + "learning_rate": 6.998865929426035e-05, + "loss": 0.595, + "step": 87700 + }, + { + "epoch": 0.015598, + "loss_gen": 4.855138778686523, + "loss_rtd": 0.3485548198223114, + "loss_sent": 0.19279338419437408, + "loss_sod": 0.04516763985157013, + "loss_total": 0.5865158438682556, + "step": 87799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.957738399505615, + "loss_rtd": 0.32220762968063354, + "loss_sent": 0.330213725566864, + "loss_sod": 0.129806786775589, + "loss_total": 0.7822281122207642, + "step": 87799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.1649664640426636, + "learning_rate": 6.995956811789e-05, + "loss": 0.5845, + "step": 87800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.357553482055664, + "loss_rtd": 0.3270278871059418, + "loss_sent": 0.23838768899440765, + "loss_sod": 0.06158880144357681, + "loss_total": 0.6270043849945068, + "step": 87899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.058922290802002, + "loss_rtd": 0.34483423829078674, + "loss_sent": 0.3389914631843567, + "loss_sod": 0.08650758862495422, + "loss_total": 0.7703332901000977, + "step": 87899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.0614956617355347, + "learning_rate": 6.993046890180801e-05, + "loss": 0.574, + "step": 87900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.099646091461182, + "loss_rtd": 0.31961262226104736, + "loss_sent": 0.007533014286309481, + "loss_sod": 0.18244077265262604, + "loss_total": 0.5095863938331604, + "step": 87999 + }, + { + "epoch": 0.015998, + "loss_gen": 3.6884658336639404, + "loss_rtd": 0.3106600046157837, + "loss_sent": 0.007008615881204605, + "loss_sod": 0.320667028427124, + "loss_total": 0.638335645198822, + "step": 87999 + }, + { + "epoch": 0.016, + "grad_norm": 1.3791881799697876, + "learning_rate": 6.990136165773552e-05, + "loss": 0.5809, + "step": 88000 + }, + { + "epoch": 0.016, + "eval_loss": 0.5669590830802917, + "eval_runtime": 151.9205, + "eval_samples_per_second": 101.652, + "eval_steps_per_second": 0.796, + "step": 88000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.847960472106934, + "loss_rtd": 0.34454160928726196, + "loss_sent": 0.25795817375183105, + "loss_sod": 0.0255027636885643, + "loss_total": 0.6280025243759155, + "step": 88099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.912221431732178, + "loss_rtd": 0.33135339617729187, + "loss_sent": 0.2321702241897583, + "loss_sod": 0.017423417419195175, + "loss_total": 0.5809470415115356, + "step": 88099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.772662878036499, + "learning_rate": 6.987224639739695e-05, + "loss": 0.5837, + "step": 88100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.866065502166748, + "loss_rtd": 0.345676451921463, + "loss_sent": 0.24594935774803162, + "loss_sod": 0.03352060541510582, + "loss_total": 0.6251463890075684, + "step": 88199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.942430019378662, + "loss_rtd": 0.35307154059410095, + "loss_sent": 0.28599974513053894, + "loss_sod": 0.030116241425275803, + "loss_total": 0.6691875457763672, + "step": 88199 + }, + { + "epoch": 0.0164, + "grad_norm": 2.106566905975342, + "learning_rate": 6.984312313251989e-05, + "loss": 0.5926, + "step": 88200 + }, + { + "epoch": 0.016598, + "loss_gen": 4.405894756317139, + "loss_rtd": 0.33880099654197693, + "loss_sent": 0.07704459875822067, + "loss_sod": 0.011914386413991451, + "loss_total": 0.42775997519493103, + "step": 88299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.814757347106934, + "loss_rtd": 0.3188076317310333, + "loss_sent": 0.41589853167533875, + "loss_sod": 0.036490026861429214, + "loss_total": 0.771196186542511, + "step": 88299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.1776729822158813, + "learning_rate": 6.981399187483523e-05, + "loss": 0.5907, + "step": 88300 + }, + { + "epoch": 0.016798, + "loss_gen": 4.972283363342285, + "loss_rtd": 0.3257956802845001, + "loss_sent": 0.17019076645374298, + "loss_sod": 0.0564018152654171, + "loss_total": 0.5523882508277893, + "step": 88399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.1294097900390625, + "loss_rtd": 0.3375230133533478, + "loss_sent": 0.04303320497274399, + "loss_sod": 0.10859806090593338, + "loss_total": 0.48915427923202515, + "step": 88399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.6791639924049377, + "learning_rate": 6.978485263607698e-05, + "loss": 0.5946, + "step": 88400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.180440902709961, + "loss_rtd": 0.30871906876564026, + "loss_sent": 0.06477366387844086, + "loss_sod": 0.10586203634738922, + "loss_total": 0.4793547987937927, + "step": 88499 + }, + { + "epoch": 0.016998, + "loss_gen": 4.830018997192383, + "loss_rtd": 0.3306615650653839, + "loss_sent": 0.28171202540397644, + "loss_sod": 0.044385459274053574, + "loss_total": 0.6567590236663818, + "step": 88499 + }, + { + "epoch": 0.017, + "grad_norm": 0.8192684650421143, + "learning_rate": 6.975570542798248e-05, + "loss": 0.5938, + "step": 88500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.691791534423828, + "loss_rtd": 0.3458113968372345, + "loss_sent": 0.04866185411810875, + "loss_sod": 0.18742048740386963, + "loss_total": 0.5818936824798584, + "step": 88599 + }, + { + "epoch": 0.017198, + "loss_gen": 3.6342341899871826, + "loss_rtd": 0.31179022789001465, + "loss_sent": 0.016484718769788742, + "loss_sod": 0.2539399266242981, + "loss_total": 0.582214891910553, + "step": 88599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.6321954727172852, + "learning_rate": 6.972655026229218e-05, + "loss": 0.5972, + "step": 88600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.733677864074707, + "loss_rtd": 0.32928189635276794, + "loss_sent": 0.0740603432059288, + "loss_sod": 0.030074436217546463, + "loss_total": 0.4334166944026947, + "step": 88699 + }, + { + "epoch": 0.017398, + "loss_gen": 4.662412643432617, + "loss_rtd": 0.3300178349018097, + "loss_sent": 0.34945255517959595, + "loss_sod": 0.003907858394086361, + "loss_total": 0.6833782196044922, + "step": 88699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.7026873230934143, + "learning_rate": 6.969738715074981e-05, + "loss": 0.573, + "step": 88700 + }, + { + "epoch": 0.017598, + "loss_gen": 4.659727573394775, + "loss_rtd": 0.33939382433891296, + "loss_sent": 0.028759872540831566, + "loss_sod": 0.07622699439525604, + "loss_total": 0.44438067078590393, + "step": 88799 + }, + { + "epoch": 0.017598, + "loss_gen": 3.7558815479278564, + "loss_rtd": 0.3180695176124573, + "loss_sent": 0.0015709931030869484, + "loss_sod": 0.1858685165643692, + "loss_total": 0.5055090188980103, + "step": 88799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.8093413710594177, + "learning_rate": 6.966821610510222e-05, + "loss": 0.5861, + "step": 88800 + }, + { + "epoch": 0.017798, + "loss_gen": 4.879168510437012, + "loss_rtd": 0.32642388343811035, + "loss_sent": 0.32197320461273193, + "loss_sod": 0.02328118309378624, + "loss_total": 0.6716783046722412, + "step": 88899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.66521692276001, + "loss_rtd": 0.33152955770492554, + "loss_sent": 0.1449562907218933, + "loss_sod": 0.16021357476711273, + "loss_total": 0.6366994380950928, + "step": 88899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.8646384477615356, + "learning_rate": 6.963903713709956e-05, + "loss": 0.5902, + "step": 88900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.182841777801514, + "loss_rtd": 0.33539631962776184, + "loss_sent": 0.06493685394525528, + "loss_sod": 0.14024244248867035, + "loss_total": 0.5405756235122681, + "step": 88999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.200113296508789, + "loss_rtd": 0.3302038609981537, + "loss_sent": 0.1657828539609909, + "loss_sod": 0.09019247442483902, + "loss_total": 0.5861791968345642, + "step": 88999 + }, + { + "epoch": 0.018, + "grad_norm": 1.0806804895401, + "learning_rate": 6.960985025849508e-05, + "loss": 0.5881, + "step": 89000 + }, + { + "epoch": 0.018, + "eval_loss": 0.5663845539093018, + "eval_runtime": 151.8203, + "eval_samples_per_second": 101.719, + "eval_steps_per_second": 0.797, + "step": 89000 + }, + { + "epoch": 0.018198, + "loss_gen": 4.941082000732422, + "loss_rtd": 0.3443474769592285, + "loss_sent": 0.23076364398002625, + "loss_sod": 0.10233885794878006, + "loss_total": 0.6774499416351318, + "step": 89099 + }, + { + "epoch": 0.018198, + "loss_gen": 4.799691200256348, + "loss_rtd": 0.3158910572528839, + "loss_sent": 0.34577134251594543, + "loss_sod": 0.02999667264521122, + "loss_total": 0.6916590929031372, + "step": 89099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.5906386375427246, + "learning_rate": 6.958065548104528e-05, + "loss": 0.5866, + "step": 89100 + }, + { + "epoch": 0.018398, + "loss_gen": 4.602643013000488, + "loss_rtd": 0.3486579358577728, + "loss_sent": 0.35448747873306274, + "loss_sod": 0.017640359699726105, + "loss_total": 0.7207857370376587, + "step": 89199 + }, + { + "epoch": 0.018398, + "loss_gen": 4.9737043380737305, + "loss_rtd": 0.3401373028755188, + "loss_sent": 0.10153168439865112, + "loss_sod": 0.016300665214657784, + "loss_total": 0.45796966552734375, + "step": 89199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.8996251225471497, + "learning_rate": 6.955145281650976e-05, + "loss": 0.5858, + "step": 89200 + }, + { + "epoch": 0.018598, + "loss_gen": 3.8062310218811035, + "loss_rtd": 0.2853853404521942, + "loss_sent": 0.0019416897557675838, + "loss_sod": 0.1541668176651001, + "loss_total": 0.44149383902549744, + "step": 89299 + }, + { + "epoch": 0.018598, + "loss_gen": 4.6544365882873535, + "loss_rtd": 0.3491682708263397, + "loss_sent": 0.31565359234809875, + "loss_sod": 0.028360841795802116, + "loss_total": 0.6931827068328857, + "step": 89299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.7657874822616577, + "learning_rate": 6.952224227665142e-05, + "loss": 0.5646, + "step": 89300 + }, + { + "epoch": 0.018798, + "loss_gen": 3.746079444885254, + "loss_rtd": 0.3051496148109436, + "loss_sent": 0.0015413948567584157, + "loss_sod": 0.15952399373054504, + "loss_total": 0.46621501445770264, + "step": 89399 + }, + { + "epoch": 0.018798, + "loss_gen": 4.637059211730957, + "loss_rtd": 0.3363519012928009, + "loss_sent": 0.44921576976776123, + "loss_sod": 0.039606474339962006, + "loss_total": 0.8251741528511047, + "step": 89399 + }, + { + "epoch": 0.0188, + "grad_norm": 2.19759202003479, + "learning_rate": 6.949302387323621e-05, + "loss": 0.5877, + "step": 89400 + }, + { + "epoch": 0.018998, + "loss_gen": 4.969757080078125, + "loss_rtd": 0.3233579099178314, + "loss_sent": 0.3188636898994446, + "loss_sod": 0.04749014973640442, + "loss_total": 0.6897117495536804, + "step": 89499 + }, + { + "epoch": 0.018998, + "loss_gen": 4.802304267883301, + "loss_rtd": 0.33002758026123047, + "loss_sent": 0.27685385942459106, + "loss_sod": 0.07065742462873459, + "loss_total": 0.6775388717651367, + "step": 89499 + }, + { + "epoch": 0.019, + "grad_norm": 0.8650509715080261, + "learning_rate": 6.946379761803332e-05, + "loss": 0.5794, + "step": 89500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.751745700836182, + "loss_rtd": 0.3350619077682495, + "loss_sent": 0.1835276484489441, + "loss_sod": 0.023965610191226006, + "loss_total": 0.5425551533699036, + "step": 89599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.994669437408447, + "loss_rtd": 0.3365551829338074, + "loss_sent": 0.3081744909286499, + "loss_sod": 0.05738406255841255, + "loss_total": 0.7021137475967407, + "step": 89599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.9256625175476074, + "learning_rate": 6.943456352281507e-05, + "loss": 0.5876, + "step": 89600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.88437557220459, + "loss_rtd": 0.3251269459724426, + "loss_sent": 0.32263243198394775, + "loss_sod": 0.06728115677833557, + "loss_total": 0.7150405645370483, + "step": 89699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.768828392028809, + "loss_rtd": 0.33759233355522156, + "loss_sent": 0.1716608703136444, + "loss_sod": 0.0714825764298439, + "loss_total": 0.5807358026504517, + "step": 89699 + }, + { + "epoch": 0.0194, + "grad_norm": 1.9086744785308838, + "learning_rate": 6.940532159935696e-05, + "loss": 0.593, + "step": 89700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.026336193084717, + "loss_rtd": 0.3272406756877899, + "loss_sent": 0.15711930394172668, + "loss_sod": 0.2609151303768158, + "loss_total": 0.7452751398086548, + "step": 89799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.9076714515686035, + "loss_rtd": 0.3377518355846405, + "loss_sent": 0.1297970563173294, + "loss_sod": 0.03276817873120308, + "loss_total": 0.5003170967102051, + "step": 89799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.7625382542610168, + "learning_rate": 6.937607185943762e-05, + "loss": 0.5953, + "step": 89800 + }, + { + "epoch": 0.019798, + "loss_gen": 4.734294414520264, + "loss_rtd": 0.328772634267807, + "loss_sent": 0.15064159035682678, + "loss_sod": 0.08754176646471024, + "loss_total": 0.5669559836387634, + "step": 89899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.313686370849609, + "loss_rtd": 0.3340799808502197, + "loss_sent": 6.285148265305907e-05, + "loss_sod": 0.16030219197273254, + "loss_total": 0.4944450259208679, + "step": 89899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.1803996562957764, + "learning_rate": 6.934681431483886e-05, + "loss": 0.5879, + "step": 89900 + }, + { + "epoch": 0.019998, + "loss_gen": 4.845009803771973, + "loss_rtd": 0.3295662999153137, + "loss_sent": 0.2918822467327118, + "loss_sod": 0.029903696849942207, + "loss_total": 0.6513522863388062, + "step": 89999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.945878505706787, + "loss_rtd": 0.3441464900970459, + "loss_sent": 0.10085861384868622, + "loss_sod": 0.020352281630039215, + "loss_total": 0.46535736322402954, + "step": 89999 + }, + { + "epoch": 0.02, + "grad_norm": 1.1029510498046875, + "learning_rate": 6.931754897734561e-05, + "loss": 0.5913, + "step": 90000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5658617615699768, + "eval_runtime": 153.6901, + "eval_samples_per_second": 100.481, + "eval_steps_per_second": 0.787, + "step": 90000 + }, + { + "epoch": 0.020198, + "loss_gen": 4.51792573928833, + "loss_rtd": 0.32277560234069824, + "loss_sent": 0.005829704459756613, + "loss_sod": 0.2306119203567505, + "loss_total": 0.5592172145843506, + "step": 90099 + }, + { + "epoch": 0.020198, + "loss_gen": 4.009688854217529, + "loss_rtd": 0.319385826587677, + "loss_sent": 0.07977547496557236, + "loss_sod": 0.12750348448753357, + "loss_total": 0.5266647934913635, + "step": 90099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.484255075454712, + "learning_rate": 6.928827585874593e-05, + "loss": 0.5957, + "step": 90100 + }, + { + "epoch": 0.020398, + "loss_gen": 4.403891563415527, + "loss_rtd": 0.37662163376808167, + "loss_sent": 0.07366303354501724, + "loss_sod": 0.029338199645280838, + "loss_total": 0.47962287068367004, + "step": 90199 + }, + { + "epoch": 0.020398, + "loss_gen": 4.337122440338135, + "loss_rtd": 0.33047062158584595, + "loss_sent": 0.15428656339645386, + "loss_sod": 0.06315929442644119, + "loss_total": 0.5479164719581604, + "step": 90199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.3041845560073853, + "learning_rate": 6.925899497083106e-05, + "loss": 0.5873, + "step": 90200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.499560356140137, + "loss_rtd": 0.34531867504119873, + "loss_sent": 0.07629498094320297, + "loss_sod": 0.10883737355470657, + "loss_total": 0.5304509997367859, + "step": 90299 + }, + { + "epoch": 0.020598, + "loss_gen": 4.623281002044678, + "loss_rtd": 0.3306674063205719, + "loss_sent": 0.06131604686379433, + "loss_sod": 0.10808578133583069, + "loss_total": 0.500069260597229, + "step": 90299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.7871525287628174, + "learning_rate": 6.92297063253953e-05, + "loss": 0.5943, + "step": 90300 + }, + { + "epoch": 0.020798, + "loss_gen": 4.951801300048828, + "loss_rtd": 0.3330695927143097, + "loss_sent": 0.3051595389842987, + "loss_sod": 0.048723429441452026, + "loss_total": 0.6869525909423828, + "step": 90399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.108636379241943, + "loss_rtd": 0.33414900302886963, + "loss_sent": 0.24988193809986115, + "loss_sod": 0.04154475778341293, + "loss_total": 0.6255757212638855, + "step": 90399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.8811945915222168, + "learning_rate": 6.920040993423615e-05, + "loss": 0.578, + "step": 90400 + }, + { + "epoch": 0.020998, + "loss_gen": 4.781215190887451, + "loss_rtd": 0.3221619427204132, + "loss_sent": 0.06880512088537216, + "loss_sod": 0.039578575640916824, + "loss_total": 0.4305456578731537, + "step": 90499 + }, + { + "epoch": 0.020998, + "loss_gen": 4.846989154815674, + "loss_rtd": 0.34084388613700867, + "loss_sent": 0.2732623219490051, + "loss_sod": 0.0029289848171174526, + "loss_total": 0.6170351505279541, + "step": 90499 + }, + { + "epoch": 0.021, + "grad_norm": 1.9614698886871338, + "learning_rate": 6.917110580915416e-05, + "loss": 0.5948, + "step": 90500 + }, + { + "epoch": 0.021198, + "loss_gen": 4.733129501342773, + "loss_rtd": 0.34376993775367737, + "loss_sent": 0.2138681858778, + "loss_sod": 0.09283463656902313, + "loss_total": 0.6504727602005005, + "step": 90599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.80059289932251, + "loss_rtd": 0.3121301829814911, + "loss_sent": 0.41170620918273926, + "loss_sod": 0.03384142369031906, + "loss_total": 0.7576777935028076, + "step": 90599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.6248618364334106, + "learning_rate": 6.914179396195306e-05, + "loss": 0.5848, + "step": 90600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.078394889831543, + "loss_rtd": 0.32173919677734375, + "loss_sent": 0.07046397030353546, + "loss_sod": 0.04545767232775688, + "loss_total": 0.437660813331604, + "step": 90699 + }, + { + "epoch": 0.021398, + "loss_gen": 4.4654693603515625, + "loss_rtd": 0.32393723726272583, + "loss_sent": 0.3042590618133545, + "loss_sod": 0.010264010168612003, + "loss_total": 0.6384602785110474, + "step": 90699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.042040467262268, + "learning_rate": 6.911247440443963e-05, + "loss": 0.5638, + "step": 90700 + }, + { + "epoch": 0.021598, + "loss_gen": 4.693902015686035, + "loss_rtd": 0.3379054665565491, + "loss_sent": 0.1831001490354538, + "loss_sod": 0.06713329255580902, + "loss_total": 0.5881389379501343, + "step": 90799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.526010036468506, + "loss_rtd": 0.33100053668022156, + "loss_sent": 0.2598324120044708, + "loss_sod": 0.0941501259803772, + "loss_total": 0.6849830746650696, + "step": 90799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.4666478633880615, + "learning_rate": 6.908314714842379e-05, + "loss": 0.5887, + "step": 90800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.80063533782959, + "loss_rtd": 0.3257311284542084, + "loss_sent": 0.3028273284435272, + "loss_sod": 0.054644741117954254, + "loss_total": 0.6832032203674316, + "step": 90899 + }, + { + "epoch": 0.021798, + "loss_gen": 4.882693290710449, + "loss_rtd": 0.3200565576553345, + "loss_sent": 0.1715368628501892, + "loss_sod": 0.031077342107892036, + "loss_total": 0.5226707458496094, + "step": 90899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.7058055400848389, + "learning_rate": 6.905381220571857e-05, + "loss": 0.5768, + "step": 90900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.241171836853027, + "loss_rtd": 0.3140244781970978, + "loss_sent": 0.08510010689496994, + "loss_sod": 0.07062557339668274, + "loss_total": 0.46975016593933105, + "step": 90999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.380671977996826, + "loss_rtd": 0.31599661707878113, + "loss_sent": 0.0004729445499833673, + "loss_sod": 0.22579069435596466, + "loss_total": 0.5422602891921997, + "step": 90999 + }, + { + "epoch": 0.022, + "grad_norm": 0.8553369045257568, + "learning_rate": 6.902446958814006e-05, + "loss": 0.6037, + "step": 91000 + }, + { + "epoch": 0.022, + "eval_loss": 0.561314582824707, + "eval_runtime": 151.7474, + "eval_samples_per_second": 101.768, + "eval_steps_per_second": 0.797, + "step": 91000 + }, + { + "epoch": 0.022198, + "loss_gen": 4.4136176109313965, + "loss_rtd": 0.33867254853248596, + "loss_sent": 0.20216551423072815, + "loss_sod": 0.01734425127506256, + "loss_total": 0.5581823587417603, + "step": 91099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.756989479064941, + "loss_rtd": 0.3549559414386749, + "loss_sent": 0.09194394946098328, + "loss_sod": 0.02332470379769802, + "loss_total": 0.4702245891094208, + "step": 91099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.015998363494873, + "learning_rate": 6.899511930750749e-05, + "loss": 0.5732, + "step": 91100 + }, + { + "epoch": 0.022398, + "loss_gen": 4.628757953643799, + "loss_rtd": 0.3276508152484894, + "loss_sent": 0.23581714928150177, + "loss_sod": 0.10900195688009262, + "loss_total": 0.6724699139595032, + "step": 91199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.0903167724609375, + "loss_rtd": 0.3232450485229492, + "loss_sent": 0.08014486730098724, + "loss_sod": 0.04226721078157425, + "loss_total": 0.4456571340560913, + "step": 91199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.8765266537666321, + "learning_rate": 6.896576137564313e-05, + "loss": 0.5939, + "step": 91200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.923919677734375, + "loss_rtd": 0.32763293385505676, + "loss_sent": 0.3958737254142761, + "loss_sod": 0.021579347550868988, + "loss_total": 0.7450860142707825, + "step": 91299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.680777072906494, + "loss_rtd": 0.3213493824005127, + "loss_sent": 0.04074085131287575, + "loss_sod": 0.01941441185772419, + "loss_total": 0.3815046548843384, + "step": 91299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.4765080213546753, + "learning_rate": 6.893639580437236e-05, + "loss": 0.5847, + "step": 91300 + }, + { + "epoch": 0.022798, + "loss_gen": 3.7447922229766846, + "loss_rtd": 0.3144702911376953, + "loss_sent": 6.558249879162759e-05, + "loss_sod": 0.19657494127750397, + "loss_total": 0.511110782623291, + "step": 91399 + }, + { + "epoch": 0.022798, + "loss_gen": 4.804544448852539, + "loss_rtd": 0.32792600989341736, + "loss_sent": 0.106887586414814, + "loss_sod": 0.05512861907482147, + "loss_total": 0.48994219303131104, + "step": 91399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.1063635349273682, + "learning_rate": 6.890702260552361e-05, + "loss": 0.5839, + "step": 91400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.9147491455078125, + "loss_rtd": 0.3242236375808716, + "loss_sent": 0.31382521986961365, + "loss_sod": 0.07273143529891968, + "loss_total": 0.7107802629470825, + "step": 91499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.193991661071777, + "loss_rtd": 0.3225175440311432, + "loss_sent": 0.16223827004432678, + "loss_sod": 0.12314175814390182, + "loss_total": 0.6078975796699524, + "step": 91499 + }, + { + "epoch": 0.023, + "grad_norm": 1.018505573272705, + "learning_rate": 6.887764179092842e-05, + "loss": 0.581, + "step": 91500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.449570655822754, + "loss_rtd": 0.33190277218818665, + "loss_sent": 0.12670950591564178, + "loss_sod": 0.11134722083806992, + "loss_total": 0.5699595212936401, + "step": 91599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.419771671295166, + "loss_rtd": 0.3302202820777893, + "loss_sent": 0.07329118251800537, + "loss_sod": 0.10373926162719727, + "loss_total": 0.5072507262229919, + "step": 91599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.2618904113769531, + "learning_rate": 6.884825337242138e-05, + "loss": 0.5836, + "step": 91600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.094449996948242, + "loss_rtd": 0.3166581988334656, + "loss_sent": 0.10368459671735764, + "loss_sod": 0.043163277208805084, + "loss_total": 0.4635060727596283, + "step": 91699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.795473575592041, + "loss_rtd": 0.3415147066116333, + "loss_sent": 0.35659798979759216, + "loss_sod": 0.04028211534023285, + "loss_total": 0.7383948564529419, + "step": 91699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.4385802745819092, + "learning_rate": 6.881885736184014e-05, + "loss": 0.5678, + "step": 91700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.745820045471191, + "loss_rtd": 0.32357558608055115, + "loss_sent": 0.22468724846839905, + "loss_sod": 0.01799878105521202, + "loss_total": 0.5662616491317749, + "step": 91799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.822248935699463, + "loss_rtd": 0.34313949942588806, + "loss_sent": 0.21366900205612183, + "loss_sod": 0.049441125243902206, + "loss_total": 0.6062496304512024, + "step": 91799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.3255048990249634, + "learning_rate": 6.878945377102539e-05, + "loss": 0.5865, + "step": 91800 + }, + { + "epoch": 0.023798, + "loss_gen": 4.711320877075195, + "loss_rtd": 0.3355475664138794, + "loss_sent": 0.2739960253238678, + "loss_sod": 0.0651506781578064, + "loss_total": 0.674694299697876, + "step": 91899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.000282287597656, + "loss_rtd": 0.32347211241722107, + "loss_sent": 0.10537165403366089, + "loss_sod": 0.06985322386026382, + "loss_total": 0.4986969828605652, + "step": 91899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.4182720184326172, + "learning_rate": 6.876004261182092e-05, + "loss": 0.5817, + "step": 91900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.340452671051025, + "loss_rtd": 0.3391110897064209, + "loss_sent": 0.15645575523376465, + "loss_sod": 0.047599561512470245, + "loss_total": 0.5431663990020752, + "step": 91999 + }, + { + "epoch": 0.023998, + "loss_gen": 3.864002227783203, + "loss_rtd": 0.3019247055053711, + "loss_sent": 0.0006159417098388076, + "loss_sod": 0.18779833614826202, + "loss_total": 0.4903389811515808, + "step": 91999 + }, + { + "epoch": 0.024, + "grad_norm": 1.1301180124282837, + "learning_rate": 6.873062389607352e-05, + "loss": 0.5876, + "step": 92000 + }, + { + "epoch": 0.024, + "eval_loss": 0.5641158223152161, + "eval_runtime": 151.8861, + "eval_samples_per_second": 101.675, + "eval_steps_per_second": 0.797, + "step": 92000 + }, + { + "epoch": 0.024198, + "loss_gen": 4.568058490753174, + "loss_rtd": 0.32689598202705383, + "loss_sent": 0.1527620255947113, + "loss_sod": 0.07747532427310944, + "loss_total": 0.5571333169937134, + "step": 92099 + }, + { + "epoch": 0.024198, + "loss_gen": 4.698720932006836, + "loss_rtd": 0.3107092082500458, + "loss_sent": 0.2789194583892822, + "loss_sod": 0.005637895781546831, + "loss_total": 0.595266580581665, + "step": 92099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.138969898223877, + "learning_rate": 6.870119763563307e-05, + "loss": 0.574, + "step": 92100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.00078821182251, + "loss_rtd": 0.316707968711853, + "loss_sent": 0.4610620439052582, + "loss_sod": 0.12511105835437775, + "loss_total": 0.9028810262680054, + "step": 92199 + }, + { + "epoch": 0.024398, + "loss_gen": 4.84621000289917, + "loss_rtd": 0.317338764667511, + "loss_sent": 0.22897499799728394, + "loss_sod": 0.01574169285595417, + "loss_total": 0.5620554685592651, + "step": 92199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.2326622009277344, + "learning_rate": 6.867176384235243e-05, + "loss": 0.584, + "step": 92200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.925948143005371, + "loss_rtd": 0.33173057436943054, + "loss_sent": 0.0742463544011116, + "loss_sod": 0.15905244648456573, + "loss_total": 0.5650293827056885, + "step": 92299 + }, + { + "epoch": 0.024598, + "loss_gen": 4.965786457061768, + "loss_rtd": 0.32640862464904785, + "loss_sent": 0.18212881684303284, + "loss_sod": 0.09404770284891129, + "loss_total": 0.6025851368904114, + "step": 92299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.2922873497009277, + "learning_rate": 6.864232252808757e-05, + "loss": 0.5905, + "step": 92300 + }, + { + "epoch": 0.024798, + "loss_gen": 4.907704830169678, + "loss_rtd": 0.33255714178085327, + "loss_sent": 0.18304024636745453, + "loss_sod": 0.059389252215623856, + "loss_total": 0.5749866366386414, + "step": 92399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.2335615158081055, + "loss_rtd": 0.33417046070098877, + "loss_sent": 0.07495804131031036, + "loss_sod": 0.1096133142709732, + "loss_total": 0.51874178647995, + "step": 92399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.9164637327194214, + "learning_rate": 6.86128737046974e-05, + "loss": 0.5908, + "step": 92400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.048556327819824, + "loss_rtd": 0.3085005283355713, + "loss_sent": 0.03911376744508743, + "loss_sod": 0.13077664375305176, + "loss_total": 0.4783909320831299, + "step": 92499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.776292324066162, + "loss_rtd": 0.3288393020629883, + "loss_sent": 0.166950985789299, + "loss_sod": 0.29131215810775757, + "loss_total": 0.787102460861206, + "step": 92499 + }, + { + "epoch": 0.025, + "grad_norm": 0.8874497413635254, + "learning_rate": 6.858341738404396e-05, + "loss": 0.5974, + "step": 92500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.173285484313965, + "loss_rtd": 0.3269065022468567, + "loss_sent": 0.00547097297385335, + "loss_sod": 0.14457273483276367, + "loss_total": 0.47695019841194153, + "step": 92599 + }, + { + "epoch": 0.025198, + "loss_gen": 4.434605121612549, + "loss_rtd": 0.3425711989402771, + "loss_sent": 5.5371558119077235e-05, + "loss_sod": 0.2001873254776001, + "loss_total": 0.5428138971328735, + "step": 92599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.0194905996322632, + "learning_rate": 6.85539535779922e-05, + "loss": 0.5897, + "step": 92600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.890368938446045, + "loss_rtd": 0.3072635233402252, + "loss_sent": 0.2147490233182907, + "loss_sod": 0.1317869871854782, + "loss_total": 0.6537995338439941, + "step": 92699 + }, + { + "epoch": 0.025398, + "loss_gen": 4.97855281829834, + "loss_rtd": 0.33050867915153503, + "loss_sent": 0.19271942973136902, + "loss_sod": 0.018159018829464912, + "loss_total": 0.541387140750885, + "step": 92699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8697763681411743, + "learning_rate": 6.852448229841015e-05, + "loss": 0.5787, + "step": 92700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.931724548339844, + "loss_rtd": 0.3266960084438324, + "loss_sent": 0.11171326041221619, + "loss_sod": 0.0046081882901489735, + "loss_total": 0.44301745295524597, + "step": 92799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.95509672164917, + "loss_rtd": 0.3352271318435669, + "loss_sent": 0.2149454653263092, + "loss_sod": 0.028835207223892212, + "loss_total": 0.5790078043937683, + "step": 92799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.7829344868659973, + "learning_rate": 6.849500355716886e-05, + "loss": 0.5752, + "step": 92800 + }, + { + "epoch": 0.025798, + "loss_gen": 4.798049449920654, + "loss_rtd": 0.3433483839035034, + "loss_sent": 0.17305660247802734, + "loss_sod": 0.03813375532627106, + "loss_total": 0.5545387268066406, + "step": 92899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.248742580413818, + "loss_rtd": 0.32993873953819275, + "loss_sent": 0.22511538863182068, + "loss_sod": 0.05407518893480301, + "loss_total": 0.6091293096542358, + "step": 92899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.5971472263336182, + "learning_rate": 6.846551736614233e-05, + "loss": 0.5967, + "step": 92900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.976089954376221, + "loss_rtd": 0.31229016184806824, + "loss_sent": 0.14180786907672882, + "loss_sod": 0.04470566660165787, + "loss_total": 0.49880367517471313, + "step": 92999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.7940287590026855, + "loss_rtd": 0.3305432200431824, + "loss_sent": 0.29722675681114197, + "loss_sod": 0.03651285916566849, + "loss_total": 0.6642827987670898, + "step": 92999 + }, + { + "epoch": 0.026, + "grad_norm": 0.9626067280769348, + "learning_rate": 6.843602373720763e-05, + "loss": 0.5781, + "step": 93000 + }, + { + "epoch": 0.026, + "eval_loss": 0.5562506914138794, + "eval_runtime": 152.1715, + "eval_samples_per_second": 101.484, + "eval_steps_per_second": 0.795, + "step": 93000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.889732360839844, + "loss_rtd": 0.33864760398864746, + "loss_sent": 0.15712913870811462, + "loss_sod": 0.0421319380402565, + "loss_total": 0.537908673286438, + "step": 93099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.961540699005127, + "loss_rtd": 0.3180936872959137, + "loss_sent": 0.016319338232278824, + "loss_sod": 0.058194972574710846, + "loss_total": 0.39260798692703247, + "step": 93099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.9290160536766052, + "learning_rate": 6.840652268224478e-05, + "loss": 0.5976, + "step": 93100 + }, + { + "epoch": 0.026398, + "loss_gen": 4.791418075561523, + "loss_rtd": 0.3520411550998688, + "loss_sent": 0.11683662235736847, + "loss_sod": 0.008030342869460583, + "loss_total": 0.4769081473350525, + "step": 93199 + }, + { + "epoch": 0.026398, + "loss_gen": 4.8290486335754395, + "loss_rtd": 0.3225650191307068, + "loss_sent": 0.056988898664712906, + "loss_sod": 0.03313485532999039, + "loss_total": 0.4126887619495392, + "step": 93199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.7951269745826721, + "learning_rate": 6.837701421313677e-05, + "loss": 0.5914, + "step": 93200 + }, + { + "epoch": 0.026598, + "loss_gen": 4.922800540924072, + "loss_rtd": 0.32993313670158386, + "loss_sent": 0.05176448076963425, + "loss_sod": 0.03638416528701782, + "loss_total": 0.41808179020881653, + "step": 93299 + }, + { + "epoch": 0.026598, + "loss_gen": 4.823805809020996, + "loss_rtd": 0.33049729466438293, + "loss_sent": 0.33578017354011536, + "loss_sod": 0.02971348538994789, + "loss_total": 0.6959909200668335, + "step": 93299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.7036159634590149, + "learning_rate": 6.834749834176965e-05, + "loss": 0.5733, + "step": 93300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.183927536010742, + "loss_rtd": 0.3362314701080322, + "loss_sent": 0.11524573713541031, + "loss_sod": 0.04844234138727188, + "loss_total": 0.4999195337295532, + "step": 93399 + }, + { + "epoch": 0.026798, + "loss_gen": 4.61659049987793, + "loss_rtd": 0.3277224004268646, + "loss_sent": 0.20252130925655365, + "loss_sod": 0.02409757860004902, + "loss_total": 0.5543413162231445, + "step": 93399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.9997643232345581, + "learning_rate": 6.831797508003239e-05, + "loss": 0.5982, + "step": 93400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.1337199211120605, + "loss_rtd": 0.3236249089241028, + "loss_sent": 0.3480924069881439, + "loss_sod": 0.04242559149861336, + "loss_total": 0.714142918586731, + "step": 93499 + }, + { + "epoch": 0.026998, + "loss_gen": 4.656829357147217, + "loss_rtd": 0.31943279504776, + "loss_sent": 0.3870808482170105, + "loss_sod": 0.02070135436952114, + "loss_total": 0.7272149920463562, + "step": 93499 + }, + { + "epoch": 0.027, + "grad_norm": 2.493122100830078, + "learning_rate": 6.828844443981696e-05, + "loss": 0.5771, + "step": 93500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.040384292602539, + "loss_rtd": 0.3554927408695221, + "loss_sent": 0.40386709570884705, + "loss_sod": 0.03719080239534378, + "loss_total": 0.7965506315231323, + "step": 93599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.405703544616699, + "loss_rtd": 0.32774341106414795, + "loss_sent": 0.08388072997331619, + "loss_sod": 0.1241694763302803, + "loss_total": 0.5357936024665833, + "step": 93599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.1916543245315552, + "learning_rate": 6.82589064330183e-05, + "loss": 0.5765, + "step": 93600 + }, + { + "epoch": 0.027398, + "loss_gen": 4.902307510375977, + "loss_rtd": 0.3315292298793793, + "loss_sent": 0.08468205481767654, + "loss_sod": 0.08713705837726593, + "loss_total": 0.5033483505249023, + "step": 93699 + }, + { + "epoch": 0.027398, + "loss_gen": 3.8888163566589355, + "loss_rtd": 0.3110394775867462, + "loss_sent": 0.0002666794753167778, + "loss_sod": 0.07057055830955505, + "loss_total": 0.38187670707702637, + "step": 93699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.7991061806678772, + "learning_rate": 6.82293610715343e-05, + "loss": 0.5754, + "step": 93700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.270509719848633, + "loss_rtd": 0.330045610666275, + "loss_sent": 0.04376628249883652, + "loss_sod": 0.04363022372126579, + "loss_total": 0.41744211316108704, + "step": 93799 + }, + { + "epoch": 0.027598, + "loss_gen": 3.6017343997955322, + "loss_rtd": 0.28588688373565674, + "loss_sent": 0.017953140661120415, + "loss_sod": 0.03629875183105469, + "loss_total": 0.3401387631893158, + "step": 93799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.5313162207603455, + "learning_rate": 6.819980836726585e-05, + "loss": 0.5578, + "step": 93800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.4805474281311035, + "loss_rtd": 0.3350062370300293, + "loss_sent": 0.15716692805290222, + "loss_sod": 0.07830715924501419, + "loss_total": 0.5704803466796875, + "step": 93899 + }, + { + "epoch": 0.027798, + "loss_gen": 4.663735389709473, + "loss_rtd": 0.3246467113494873, + "loss_sent": 0.2916516065597534, + "loss_sod": 0.02006671018898487, + "loss_total": 0.6363650560379028, + "step": 93899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.9600923657417297, + "learning_rate": 6.817024833211674e-05, + "loss": 0.5761, + "step": 93900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.982539653778076, + "loss_rtd": 0.3416353464126587, + "loss_sent": 0.12349400669336319, + "loss_sod": 0.08955147862434387, + "loss_total": 0.5546808242797852, + "step": 93999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.441377639770508, + "loss_rtd": 0.31564581394195557, + "loss_sent": 0.39718303084373474, + "loss_sod": 0.10988418757915497, + "loss_total": 0.8227130174636841, + "step": 93999 + }, + { + "epoch": 0.028, + "grad_norm": 1.1688591241836548, + "learning_rate": 6.814068097799381e-05, + "loss": 0.577, + "step": 94000 + }, + { + "epoch": 0.028, + "eval_loss": 0.5589998960494995, + "eval_runtime": 152.0681, + "eval_samples_per_second": 101.553, + "eval_steps_per_second": 0.796, + "step": 94000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.901253700256348, + "loss_rtd": 0.32753586769104004, + "loss_sent": 0.11455921083688736, + "loss_sod": 0.05945047736167908, + "loss_total": 0.5015455484390259, + "step": 94099 + }, + { + "epoch": 0.028198, + "loss_gen": 4.768004417419434, + "loss_rtd": 0.32225775718688965, + "loss_sent": 0.08686144649982452, + "loss_sod": 0.044164177030324936, + "loss_total": 0.4532833695411682, + "step": 94099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.5498080253601074, + "learning_rate": 6.811110631680674e-05, + "loss": 0.5768, + "step": 94100 + }, + { + "epoch": 0.028398, + "loss_gen": 3.9671852588653564, + "loss_rtd": 0.30639663338661194, + "loss_sent": 0.10128289461135864, + "loss_sod": 0.17533493041992188, + "loss_total": 0.5830144882202148, + "step": 94199 + }, + { + "epoch": 0.028398, + "loss_gen": 4.78854513168335, + "loss_rtd": 0.3263081908226013, + "loss_sent": 0.10896441340446472, + "loss_sod": 0.0634838342666626, + "loss_total": 0.49875643849372864, + "step": 94199 + }, + { + "epoch": 0.0284, + "grad_norm": 0.7613881230354309, + "learning_rate": 6.808152436046821e-05, + "loss": 0.5578, + "step": 94200 + }, + { + "epoch": 0.028598, + "loss_gen": 4.006463527679443, + "loss_rtd": 0.32778647541999817, + "loss_sent": 0.0010425588116049767, + "loss_sod": 0.17045274376869202, + "loss_total": 0.49928176403045654, + "step": 94299 + }, + { + "epoch": 0.028598, + "loss_gen": 3.7664616107940674, + "loss_rtd": 0.30638378858566284, + "loss_sent": 0.010290347971022129, + "loss_sod": 0.11973874270915985, + "loss_total": 0.43641290068626404, + "step": 94299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.8034588694572449, + "learning_rate": 6.805193512089384e-05, + "loss": 0.5811, + "step": 94300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.312290191650391, + "loss_rtd": 0.33947744965553284, + "loss_sent": 0.11955995857715607, + "loss_sod": 0.04985839128494263, + "loss_total": 0.5088958144187927, + "step": 94399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.070069789886475, + "loss_rtd": 0.3127090632915497, + "loss_sent": 0.16472576558589935, + "loss_sod": 0.13579319417476654, + "loss_total": 0.6132280230522156, + "step": 94399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.6400247812271118, + "learning_rate": 6.802233861000213e-05, + "loss": 0.5839, + "step": 94400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.70365047454834, + "loss_rtd": 0.3269539177417755, + "loss_sent": 0.025795940309762955, + "loss_sod": 0.16885893046855927, + "loss_total": 0.5216087698936462, + "step": 94499 + }, + { + "epoch": 0.028998, + "loss_gen": 4.463148593902588, + "loss_rtd": 0.32368203997612, + "loss_sent": 3.795439261011779e-05, + "loss_sod": 0.2021465003490448, + "loss_total": 0.5258665084838867, + "step": 94499 + }, + { + "epoch": 0.029, + "grad_norm": 0.8533647656440735, + "learning_rate": 6.799273483971461e-05, + "loss": 0.566, + "step": 94500 + }, + { + "epoch": 0.029198, + "loss_gen": 4.47703742980957, + "loss_rtd": 0.3162345290184021, + "loss_sent": 0.1702050119638443, + "loss_sod": 0.08295343816280365, + "loss_total": 0.56939297914505, + "step": 94599 + }, + { + "epoch": 0.029198, + "loss_gen": 3.957000494003296, + "loss_rtd": 0.300820529460907, + "loss_sent": 5.911453263252042e-05, + "loss_sod": 0.2539139986038208, + "loss_total": 0.554793655872345, + "step": 94599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.1866121292114258, + "learning_rate": 6.796312382195565e-05, + "loss": 0.5868, + "step": 94600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.326991081237793, + "loss_rtd": 0.3325690031051636, + "loss_sent": 0.11136277765035629, + "loss_sod": 0.12073438614606857, + "loss_total": 0.5646661520004272, + "step": 94699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.1335649490356445, + "loss_rtd": 0.33579036593437195, + "loss_sent": 0.2504735291004181, + "loss_sod": 0.09647265076637268, + "loss_total": 0.6827365159988403, + "step": 94699 + }, + { + "epoch": 0.0294, + "grad_norm": 2.0132827758789062, + "learning_rate": 6.793350556865255e-05, + "loss": 0.5692, + "step": 94700 + }, + { + "epoch": 0.029598, + "loss_gen": 4.99793815612793, + "loss_rtd": 0.32112497091293335, + "loss_sent": 0.761057436466217, + "loss_sod": 0.13677382469177246, + "loss_total": 1.2189562320709229, + "step": 94799 + }, + { + "epoch": 0.029598, + "loss_gen": 4.6057353019714355, + "loss_rtd": 0.33685338497161865, + "loss_sent": 0.25173771381378174, + "loss_sod": 0.010746541433036327, + "loss_total": 0.599337637424469, + "step": 94799 + }, + { + "epoch": 0.0296, + "grad_norm": 3.4236013889312744, + "learning_rate": 6.790388009173556e-05, + "loss": 0.5943, + "step": 94800 + }, + { + "epoch": 0.029798, + "loss_gen": 4.816145420074463, + "loss_rtd": 0.30661526322364807, + "loss_sent": 0.46385112404823303, + "loss_sod": 0.06537486612796783, + "loss_total": 0.8358412981033325, + "step": 94899 + }, + { + "epoch": 0.029798, + "loss_gen": 4.936795711517334, + "loss_rtd": 0.3264653980731964, + "loss_sent": 0.5147636532783508, + "loss_sod": 0.01598704420030117, + "loss_total": 0.8572161197662354, + "step": 94899 + }, + { + "epoch": 0.0298, + "grad_norm": 3.2405478954315186, + "learning_rate": 6.787424740313782e-05, + "loss": 0.5974, + "step": 94900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.354405403137207, + "loss_rtd": 0.3224092125892639, + "loss_sent": 0.2332027405500412, + "loss_sod": 0.05973425507545471, + "loss_total": 0.6153461933135986, + "step": 94999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.362025737762451, + "loss_rtd": 0.32954007387161255, + "loss_sent": 0.14254899322986603, + "loss_sod": 0.0746975690126419, + "loss_total": 0.5467866063117981, + "step": 94999 + }, + { + "epoch": 0.03, + "grad_norm": 0.8183056712150574, + "learning_rate": 6.784460751479533e-05, + "loss": 0.5894, + "step": 95000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5654690861701965, + "eval_runtime": 151.7443, + "eval_samples_per_second": 101.77, + "eval_steps_per_second": 0.797, + "step": 95000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.05227518081665, + "loss_rtd": 0.3259921371936798, + "loss_sent": 0.1098412349820137, + "loss_sod": 0.07160069048404694, + "loss_total": 0.507434070110321, + "step": 95099 + }, + { + "epoch": 0.030198, + "loss_gen": 4.776727676391602, + "loss_rtd": 0.32909905910491943, + "loss_sent": 0.3634227216243744, + "loss_sod": 0.1241193413734436, + "loss_total": 0.816641092300415, + "step": 95099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.8225250244140625, + "learning_rate": 6.78149604386471e-05, + "loss": 0.5771, + "step": 95100 + }, + { + "epoch": 0.030398, + "loss_gen": 3.834552764892578, + "loss_rtd": 0.2728038728237152, + "loss_sent": 0.07849743962287903, + "loss_sod": 0.0710219070315361, + "loss_total": 0.42232322692871094, + "step": 95199 + }, + { + "epoch": 0.030398, + "loss_gen": 4.6927571296691895, + "loss_rtd": 0.32974088191986084, + "loss_sent": 0.11285782605409622, + "loss_sod": 0.008276103995740414, + "loss_total": 0.45087480545043945, + "step": 95199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.921575665473938, + "learning_rate": 6.778530618663493e-05, + "loss": 0.582, + "step": 95200 + }, + { + "epoch": 0.030598, + "loss_gen": 4.853861331939697, + "loss_rtd": 0.3156324326992035, + "loss_sent": 0.05224445462226868, + "loss_sod": 0.045983877032995224, + "loss_total": 0.4138607680797577, + "step": 95299 + }, + { + "epoch": 0.030598, + "loss_gen": 3.976949453353882, + "loss_rtd": 0.31451788544654846, + "loss_sent": 5.422512549557723e-05, + "loss_sod": 0.11671288311481476, + "loss_total": 0.43128499388694763, + "step": 95299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.5733093619346619, + "learning_rate": 6.775564477070353e-05, + "loss": 0.5812, + "step": 95300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.2976155281066895, + "loss_rtd": 0.32160815596580505, + "loss_sent": 0.1605655997991562, + "loss_sod": 0.16856563091278076, + "loss_total": 0.6507393717765808, + "step": 95399 + }, + { + "epoch": 0.030798, + "loss_gen": 4.614623069763184, + "loss_rtd": 0.32714834809303284, + "loss_sent": 0.2806328535079956, + "loss_sod": 0.11139454692602158, + "loss_total": 0.7191757559776306, + "step": 95399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.9181222319602966, + "learning_rate": 6.772597620280057e-05, + "loss": 0.5765, + "step": 95400 + }, + { + "epoch": 0.030998, + "loss_gen": 3.8405728340148926, + "loss_rtd": 0.32943475246429443, + "loss_sent": 3.500963794067502e-05, + "loss_sod": 0.34127986431121826, + "loss_total": 0.6707496047019958, + "step": 95499 + }, + { + "epoch": 0.030998, + "loss_gen": 4.477936744689941, + "loss_rtd": 0.30729934573173523, + "loss_sent": 0.054840900003910065, + "loss_sod": 0.1278393268585205, + "loss_total": 0.4899795651435852, + "step": 95499 + }, + { + "epoch": 0.031, + "grad_norm": 0.944657027721405, + "learning_rate": 6.769630049487651e-05, + "loss": 0.5819, + "step": 95500 + }, + { + "epoch": 0.031198, + "loss_gen": 4.953878402709961, + "loss_rtd": 0.333034485578537, + "loss_sent": 0.09908009320497513, + "loss_sod": 0.09774559736251831, + "loss_total": 0.5298601984977722, + "step": 95599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.242481708526611, + "loss_rtd": 0.3321620225906372, + "loss_sent": 0.15503835678100586, + "loss_sod": 0.08591844141483307, + "loss_total": 0.5731188058853149, + "step": 95599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.3390533924102783, + "learning_rate": 6.766661765888472e-05, + "loss": 0.5943, + "step": 95600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.760883331298828, + "loss_rtd": 0.31884732842445374, + "loss_sent": 0.25945332646369934, + "loss_sod": 0.042491644620895386, + "loss_total": 0.6207922697067261, + "step": 95699 + }, + { + "epoch": 0.031398, + "loss_gen": 4.66824197769165, + "loss_rtd": 0.33300110697746277, + "loss_sent": 0.3370925784111023, + "loss_sod": 0.0452861562371254, + "loss_total": 0.7153798341751099, + "step": 95699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.4447449445724487, + "learning_rate": 6.763692770678145e-05, + "loss": 0.5816, + "step": 95700 + }, + { + "epoch": 0.031598, + "loss_gen": 4.620529651641846, + "loss_rtd": 0.3089185059070587, + "loss_sent": 0.08042269200086594, + "loss_sod": 0.07788525521755219, + "loss_total": 0.46722644567489624, + "step": 95799 + }, + { + "epoch": 0.031598, + "loss_gen": 4.976378440856934, + "loss_rtd": 0.30276939272880554, + "loss_sent": 0.24751774966716766, + "loss_sod": 0.10714822262525558, + "loss_total": 0.6574353575706482, + "step": 95799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.5255494117736816, + "learning_rate": 6.76072306505258e-05, + "loss": 0.5746, + "step": 95800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.37379264831543, + "loss_rtd": 0.3348751366138458, + "loss_sent": 0.06511642038822174, + "loss_sod": 0.09197663515806198, + "loss_total": 0.49196821451187134, + "step": 95899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.007133960723877, + "loss_rtd": 0.3252499997615814, + "loss_sent": 0.28710106015205383, + "loss_sod": 0.04140780121088028, + "loss_total": 0.6537588834762573, + "step": 95899 + }, + { + "epoch": 0.0318, + "grad_norm": 0.8288471698760986, + "learning_rate": 6.757752650207976e-05, + "loss": 0.5714, + "step": 95900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.212557792663574, + "loss_rtd": 0.314740926027298, + "loss_sent": 0.3847140669822693, + "loss_sod": 0.07991741597652435, + "loss_total": 0.7793723940849304, + "step": 95999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.0146050453186035, + "loss_rtd": 0.33511367440223694, + "loss_sent": 0.17582233250141144, + "loss_sod": 0.17594823241233826, + "loss_total": 0.6868842244148254, + "step": 95999 + }, + { + "epoch": 0.032, + "grad_norm": 0.8252917528152466, + "learning_rate": 6.754781527340815e-05, + "loss": 0.5914, + "step": 96000 + }, + { + "epoch": 0.032, + "eval_loss": 0.5543739199638367, + "eval_runtime": 152.0748, + "eval_samples_per_second": 101.549, + "eval_steps_per_second": 0.796, + "step": 96000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.152584075927734, + "loss_rtd": 0.3240143954753876, + "loss_sent": 0.12969470024108887, + "loss_sod": 0.28179270029067993, + "loss_total": 0.735501766204834, + "step": 96099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.5161638259887695, + "loss_rtd": 0.33060047030448914, + "loss_sent": 0.17957797646522522, + "loss_sod": 0.021789319813251495, + "loss_total": 0.5319677591323853, + "step": 96099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.2911592721939087, + "learning_rate": 6.751809697647865e-05, + "loss": 0.5768, + "step": 96100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.449524402618408, + "loss_rtd": 0.3334892690181732, + "loss_sent": 0.09894298762083054, + "loss_sod": 0.12040586769580841, + "loss_total": 0.552838146686554, + "step": 96199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.929290294647217, + "loss_rtd": 0.33274203538894653, + "loss_sent": 0.06985893845558167, + "loss_sod": 0.01726730354130268, + "loss_total": 0.4198682904243469, + "step": 96199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.8315396308898926, + "learning_rate": 6.748837162326176e-05, + "loss": 0.5828, + "step": 96200 + }, + { + "epoch": 0.000598, + "loss_gen": 4.316035747528076, + "loss_rtd": 0.3169655203819275, + "loss_sent": 0.044400569051504135, + "loss_sod": 0.0127729382365942, + "loss_total": 0.37413904070854187, + "step": 96299 + }, + { + "epoch": 0.000598, + "loss_gen": 3.987454891204834, + "loss_rtd": 0.3221747577190399, + "loss_sent": 0.05567139387130737, + "loss_sod": 0.10847879201173782, + "loss_total": 0.4863249361515045, + "step": 96299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.6544968485832214, + "learning_rate": 6.745863922573089e-05, + "loss": 0.5773, + "step": 96300 + }, + { + "epoch": 0.000798, + "loss_gen": 3.916574716567993, + "loss_rtd": 0.30331626534461975, + "loss_sent": 3.7389883800642565e-05, + "loss_sod": 0.16237305104732513, + "loss_total": 0.46572670340538025, + "step": 96399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.333512306213379, + "loss_rtd": 0.33237385749816895, + "loss_sent": 0.20916062593460083, + "loss_sod": 0.0840887576341629, + "loss_total": 0.6256232261657715, + "step": 96399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.012278437614441, + "learning_rate": 6.742889979586223e-05, + "loss": 0.5716, + "step": 96400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.873001575469971, + "loss_rtd": 0.3161269724369049, + "loss_sent": 0.08801141381263733, + "loss_sod": 0.08623507618904114, + "loss_total": 0.4903734624385834, + "step": 96499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.970402240753174, + "loss_rtd": 0.3255588710308075, + "loss_sent": 0.1072477251291275, + "loss_sod": 0.029996495693922043, + "loss_total": 0.46280306577682495, + "step": 96499 + }, + { + "epoch": 0.001, + "grad_norm": 1.0416778326034546, + "learning_rate": 6.73991533456348e-05, + "loss": 0.5758, + "step": 96500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.5595479011535645, + "loss_rtd": 0.30846548080444336, + "loss_sent": 0.1608753800392151, + "loss_sod": 0.028976604342460632, + "loss_total": 0.4983174800872803, + "step": 96599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.133108615875244, + "loss_rtd": 0.32069918513298035, + "loss_sent": 0.10308950394392014, + "loss_sod": 0.03172937035560608, + "loss_total": 0.45551806688308716, + "step": 96599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.594142198562622, + "learning_rate": 6.736939988703051e-05, + "loss": 0.5629, + "step": 96600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.983255863189697, + "loss_rtd": 0.3195769190788269, + "loss_sent": 0.1621912568807602, + "loss_sod": 0.09886207431554794, + "loss_total": 0.5806302428245544, + "step": 96699 + }, + { + "epoch": 0.001398, + "loss_gen": 4.989710330963135, + "loss_rtd": 0.3138113021850586, + "loss_sent": 0.23394593596458435, + "loss_sod": 0.023748688399791718, + "loss_total": 0.5715059041976929, + "step": 96699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.8842947483062744, + "learning_rate": 6.7339639432034e-05, + "loss": 0.5776, + "step": 96700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.149325370788574, + "loss_rtd": 0.3154001235961914, + "loss_sent": 0.048991478979587555, + "loss_sod": 0.047412846237421036, + "loss_total": 0.4118044376373291, + "step": 96799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.415460586547852, + "loss_rtd": 0.3051280081272125, + "loss_sent": 0.0019924018997699022, + "loss_sod": 0.19839507341384888, + "loss_total": 0.5055155158042908, + "step": 96799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.8955238461494446, + "learning_rate": 6.73098719926328e-05, + "loss": 0.5777, + "step": 96800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.44769287109375, + "loss_rtd": 0.3249737322330475, + "loss_sent": 0.24767473340034485, + "loss_sod": 0.16051912307739258, + "loss_total": 0.7331675887107849, + "step": 96899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.257213115692139, + "loss_rtd": 0.3302728533744812, + "loss_sent": 0.10927356034517288, + "loss_sod": 0.019681863486766815, + "loss_total": 0.4592282772064209, + "step": 96899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.026750922203064, + "learning_rate": 6.728009758081725e-05, + "loss": 0.561, + "step": 96900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.501615047454834, + "loss_rtd": 0.3360181450843811, + "loss_sent": 0.12423968315124512, + "loss_sod": 0.13235829770565033, + "loss_total": 0.5926161408424377, + "step": 96999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.110159397125244, + "loss_rtd": 0.335237979888916, + "loss_sent": 0.33716878294944763, + "loss_sod": 0.07254555821418762, + "loss_total": 0.7449523210525513, + "step": 96999 + }, + { + "epoch": 0.002, + "grad_norm": 0.5956766605377197, + "learning_rate": 6.725031620858045e-05, + "loss": 0.5641, + "step": 97000 + }, + { + "epoch": 0.002, + "eval_loss": 0.5578327775001526, + "eval_runtime": 154.3466, + "eval_samples_per_second": 100.054, + "eval_steps_per_second": 0.784, + "step": 97000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.034848213195801, + "loss_rtd": 0.3098868429660797, + "loss_sent": 0.14335517585277557, + "loss_sod": 0.08951293677091599, + "loss_total": 0.5427549481391907, + "step": 97099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.086153030395508, + "loss_rtd": 0.3134371340274811, + "loss_sent": 0.2379942685365677, + "loss_sod": 0.022758232429623604, + "loss_total": 0.5741896629333496, + "step": 97099 + }, + { + "epoch": 0.0022, + "grad_norm": 2.3417766094207764, + "learning_rate": 6.722052788791835e-05, + "loss": 0.5818, + "step": 97100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.786106586456299, + "loss_rtd": 0.33305180072784424, + "loss_sent": 0.10948657989501953, + "loss_sod": 0.13213729858398438, + "loss_total": 0.5746756792068481, + "step": 97199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.835780620574951, + "loss_rtd": 0.30624470114707947, + "loss_sent": 0.1795838326215744, + "loss_sod": 0.003370692953467369, + "loss_total": 0.4891992211341858, + "step": 97199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.9707930684089661, + "learning_rate": 6.719073263082967e-05, + "loss": 0.5685, + "step": 97200 + }, + { + "epoch": 0.002598, + "loss_gen": 4.574395656585693, + "loss_rtd": 0.3340129554271698, + "loss_sent": 0.1329442262649536, + "loss_sod": 0.0605727955698967, + "loss_total": 0.5275299549102783, + "step": 97299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.205588340759277, + "loss_rtd": 0.31502586603164673, + "loss_sent": 0.006520736496895552, + "loss_sod": 0.17753705382347107, + "loss_total": 0.49908366799354553, + "step": 97299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.108286738395691, + "learning_rate": 6.716093044931594e-05, + "loss": 0.5975, + "step": 97300 + }, + { + "epoch": 0.002798, + "loss_gen": 3.865203380584717, + "loss_rtd": 0.28823763132095337, + "loss_sent": 0.1040513888001442, + "loss_sod": 0.03210706636309624, + "loss_total": 0.4243960678577423, + "step": 97399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.896703243255615, + "loss_rtd": 0.3183712065219879, + "loss_sent": 0.173360675573349, + "loss_sod": 0.09247465431690216, + "loss_total": 0.5842065811157227, + "step": 97399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.019538164138794, + "learning_rate": 6.713112135538148e-05, + "loss": 0.5795, + "step": 97400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.013828754425049, + "loss_rtd": 0.32604846358299255, + "loss_sent": 0.09510602056980133, + "loss_sod": 0.05798761546611786, + "loss_total": 0.47914206981658936, + "step": 97499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.215390682220459, + "loss_rtd": 0.32824939489364624, + "loss_sent": 0.015554209239780903, + "loss_sod": 0.15425828099250793, + "loss_total": 0.4980618953704834, + "step": 97499 + }, + { + "epoch": 0.003, + "grad_norm": 1.072590947151184, + "learning_rate": 6.710130536103338e-05, + "loss": 0.5762, + "step": 97500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.738191604614258, + "loss_rtd": 0.3265741765499115, + "loss_sent": 0.06120915710926056, + "loss_sod": 0.06804925948381424, + "loss_total": 0.4558326005935669, + "step": 97599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.706864356994629, + "loss_rtd": 0.34545576572418213, + "loss_sent": 0.23362770676612854, + "loss_sod": 0.022017929702997208, + "loss_total": 0.6011013984680176, + "step": 97599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.2530436515808105, + "learning_rate": 6.707148247828154e-05, + "loss": 0.5752, + "step": 97600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.104136943817139, + "loss_rtd": 0.3150257170200348, + "loss_sent": 0.13220404088497162, + "loss_sod": 0.1294633448123932, + "loss_total": 0.5766931176185608, + "step": 97699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.722529888153076, + "loss_rtd": 0.31569933891296387, + "loss_sent": 0.06980519741773605, + "loss_sod": 0.0539846271276474, + "loss_total": 0.4394891858100891, + "step": 97699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.1341675519943237, + "learning_rate": 6.704165271913858e-05, + "loss": 0.5827, + "step": 97700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.343900203704834, + "loss_rtd": 0.30843839049339294, + "loss_sent": 0.1389574408531189, + "loss_sod": 0.13357022404670715, + "loss_total": 0.580966055393219, + "step": 97799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.162288665771484, + "loss_rtd": 0.3121359050273895, + "loss_sent": 0.12799625098705292, + "loss_sod": 0.06447754800319672, + "loss_total": 0.5046097040176392, + "step": 97799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.9196800589561462, + "learning_rate": 6.701181609561996e-05, + "loss": 0.5951, + "step": 97800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.433358192443848, + "loss_rtd": 0.31221476197242737, + "loss_sent": 0.12727300822734833, + "loss_sod": 0.04596872627735138, + "loss_total": 0.48545652627944946, + "step": 97899 + }, + { + "epoch": 0.003798, + "loss_gen": 4.850100517272949, + "loss_rtd": 0.3153742551803589, + "loss_sent": 0.16493898630142212, + "loss_sod": 0.05727492272853851, + "loss_total": 0.5375881791114807, + "step": 97899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.8402808308601379, + "learning_rate": 6.698197261974383e-05, + "loss": 0.5785, + "step": 97900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.0220723152160645, + "loss_rtd": 0.3187673091888428, + "loss_sent": 0.05181638151407242, + "loss_sod": 0.1913912296295166, + "loss_total": 0.5619749426841736, + "step": 97999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.382528781890869, + "loss_rtd": 0.3159908652305603, + "loss_sent": 0.22155477106571198, + "loss_sod": 0.051169902086257935, + "loss_total": 0.5887155532836914, + "step": 97999 + }, + { + "epoch": 0.004, + "grad_norm": 1.3498995304107666, + "learning_rate": 6.695212230353119e-05, + "loss": 0.5795, + "step": 98000 + }, + { + "epoch": 0.004, + "eval_loss": 0.5507333278656006, + "eval_runtime": 151.1769, + "eval_samples_per_second": 102.152, + "eval_steps_per_second": 0.8, + "step": 98000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.718911170959473, + "loss_rtd": 0.32273924350738525, + "loss_sent": 0.3075748383998871, + "loss_sod": 0.0760340765118599, + "loss_total": 0.706348180770874, + "step": 98099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.744332313537598, + "loss_rtd": 0.3346700370311737, + "loss_sent": 0.3497461676597595, + "loss_sod": 0.011217096820473671, + "loss_total": 0.6956332921981812, + "step": 98099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2334489822387695, + "learning_rate": 6.69222651590057e-05, + "loss": 0.5762, + "step": 98100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.152660369873047, + "loss_rtd": 0.3339768052101135, + "loss_sent": 0.18060199916362762, + "loss_sod": 0.16783888638019562, + "loss_total": 0.6824176907539368, + "step": 98199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.443938255310059, + "loss_rtd": 0.3192080855369568, + "loss_sent": 0.05183064565062523, + "loss_sod": 0.2955458462238312, + "loss_total": 0.6665846109390259, + "step": 98199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.4761645793914795, + "learning_rate": 6.689240119819382e-05, + "loss": 0.582, + "step": 98200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.026210784912109, + "loss_rtd": 0.34066301584243774, + "loss_sent": 0.24418486654758453, + "loss_sod": 0.03400744870305061, + "loss_total": 0.618855357170105, + "step": 98299 + }, + { + "epoch": 0.004598, + "loss_gen": 4.778450012207031, + "loss_rtd": 0.3111923336982727, + "loss_sent": 0.1166359931230545, + "loss_sod": 0.038881730288267136, + "loss_total": 0.46671003103256226, + "step": 98299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.9921392202377319, + "learning_rate": 6.686253043312476e-05, + "loss": 0.5756, + "step": 98300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.739179611206055, + "loss_rtd": 0.33178257942199707, + "loss_sent": 0.2532976567745209, + "loss_sod": 0.1417667120695114, + "loss_total": 0.7268469333648682, + "step": 98399 + }, + { + "epoch": 0.004798, + "loss_gen": 4.754785537719727, + "loss_rtd": 0.3195800185203552, + "loss_sent": 0.021861691027879715, + "loss_sod": 0.03856188803911209, + "loss_total": 0.38000360131263733, + "step": 98399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.7642726898193359, + "learning_rate": 6.683265287583046e-05, + "loss": 0.5802, + "step": 98400 + }, + { + "epoch": 0.004998, + "loss_gen": 4.178973197937012, + "loss_rtd": 0.3167199194431305, + "loss_sent": 7.245481538120657e-05, + "loss_sod": 0.21317259967327118, + "loss_total": 0.5299649834632874, + "step": 98499 + }, + { + "epoch": 0.004998, + "loss_gen": 3.935793876647949, + "loss_rtd": 0.3067960739135742, + "loss_sent": 0.000541093060746789, + "loss_sod": 0.20888842642307281, + "loss_total": 0.5162255764007568, + "step": 98499 + }, + { + "epoch": 0.005, + "grad_norm": 1.3664913177490234, + "learning_rate": 6.68027685383456e-05, + "loss": 0.5627, + "step": 98500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.335430145263672, + "loss_rtd": 0.3187076151371002, + "loss_sent": 0.0009927991777658463, + "loss_sod": 0.1831379532814026, + "loss_total": 0.5028383731842041, + "step": 98599 + }, + { + "epoch": 0.005198, + "loss_gen": 3.8598129749298096, + "loss_rtd": 0.28702089190483093, + "loss_sent": 0.0530681237578392, + "loss_sod": 0.06852472573518753, + "loss_total": 0.40861374139785767, + "step": 98599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.0185030698776245, + "learning_rate": 6.677287743270758e-05, + "loss": 0.5597, + "step": 98600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.972487449645996, + "loss_rtd": 0.30347952246665955, + "loss_sent": 0.5287765264511108, + "loss_sod": 0.03221752494573593, + "loss_total": 0.8644735813140869, + "step": 98699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.005542278289795, + "loss_rtd": 0.3374456763267517, + "loss_sent": 0.14009448885917664, + "loss_sod": 0.020594295114278793, + "loss_total": 0.49813446402549744, + "step": 98699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.627276062965393, + "learning_rate": 6.674297957095651e-05, + "loss": 0.5698, + "step": 98700 + }, + { + "epoch": 0.005598, + "loss_gen": 4.793325424194336, + "loss_rtd": 0.3073770999908447, + "loss_sent": 0.12930980324745178, + "loss_sod": 0.09903208911418915, + "loss_total": 0.5357190370559692, + "step": 98799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.244107723236084, + "loss_rtd": 0.3283325433731079, + "loss_sent": 0.29997268319129944, + "loss_sod": 0.07830352336168289, + "loss_total": 0.706608772277832, + "step": 98799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.0231043100357056, + "learning_rate": 6.671307496513532e-05, + "loss": 0.571, + "step": 98800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.634741306304932, + "loss_rtd": 0.30798089504241943, + "loss_sent": 0.2160707265138626, + "loss_sod": 0.07869061082601547, + "loss_total": 0.6027422547340393, + "step": 98899 + }, + { + "epoch": 0.005798, + "loss_gen": 4.8831634521484375, + "loss_rtd": 0.3266364336013794, + "loss_sent": 0.005406542681157589, + "loss_sod": 0.17728476226329803, + "loss_total": 0.50932776927948, + "step": 98899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.0855956077575684, + "learning_rate": 6.66831636272895e-05, + "loss": 0.585, + "step": 98900 + }, + { + "epoch": 0.005998, + "loss_gen": 4.216243743896484, + "loss_rtd": 0.3120708167552948, + "loss_sent": 0.060384079813957214, + "loss_sod": 0.034039661288261414, + "loss_total": 0.4064945578575134, + "step": 98999 + }, + { + "epoch": 0.005998, + "loss_gen": 4.957681655883789, + "loss_rtd": 0.32467541098594666, + "loss_sent": 0.0837487280368805, + "loss_sod": 0.053761258721351624, + "loss_total": 0.46218541264533997, + "step": 98999 + }, + { + "epoch": 0.006, + "grad_norm": 0.8100439310073853, + "learning_rate": 6.665324556946738e-05, + "loss": 0.5808, + "step": 99000 + }, + { + "epoch": 0.006, + "eval_loss": 0.5498852133750916, + "eval_runtime": 150.8186, + "eval_samples_per_second": 102.395, + "eval_steps_per_second": 0.802, + "step": 99000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.371486186981201, + "loss_rtd": 0.317146360874176, + "loss_sent": 0.03379607945680618, + "loss_sod": 0.10996784269809723, + "loss_total": 0.46091026067733765, + "step": 99099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.400828838348389, + "loss_rtd": 0.32509690523147583, + "loss_sent": 0.15079639852046967, + "loss_sod": 0.028728686273097992, + "loss_total": 0.5046219825744629, + "step": 99099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.9362024068832397, + "learning_rate": 6.662332080371992e-05, + "loss": 0.5783, + "step": 99100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.116110801696777, + "loss_rtd": 0.30363523960113525, + "loss_sent": 0.002137851668521762, + "loss_sod": 0.19607360661029816, + "loss_total": 0.5018466711044312, + "step": 99199 + }, + { + "epoch": 0.006398, + "loss_gen": 4.902525424957275, + "loss_rtd": 0.30980345606803894, + "loss_sent": 0.19927829504013062, + "loss_sod": 0.029844652861356735, + "loss_total": 0.538926362991333, + "step": 99199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.8438217639923096, + "learning_rate": 6.659338934210084e-05, + "loss": 0.5684, + "step": 99200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.058244228363037, + "loss_rtd": 0.3190957009792328, + "loss_sent": 0.15323588252067566, + "loss_sod": 0.032876744866371155, + "loss_total": 0.5052083730697632, + "step": 99299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.0241193771362305, + "loss_rtd": 0.31235265731811523, + "loss_sent": 0.1497867852449417, + "loss_sod": 0.014322447590529919, + "loss_total": 0.47646188735961914, + "step": 99299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.295091986656189, + "learning_rate": 6.656345119666652e-05, + "loss": 0.5846, + "step": 99300 + }, + { + "epoch": 0.006798, + "loss_gen": 3.940835952758789, + "loss_rtd": 0.3246998190879822, + "loss_sent": 3.1785522878635675e-05, + "loss_sod": 0.18714739382266998, + "loss_total": 0.5118789672851562, + "step": 99399 + }, + { + "epoch": 0.006798, + "loss_gen": 3.761812925338745, + "loss_rtd": 0.29748839139938354, + "loss_sent": 0.004839347209781408, + "loss_sod": 0.17168492078781128, + "loss_total": 0.47401267290115356, + "step": 99399 + }, + { + "epoch": 0.0068, + "grad_norm": 1.1156435012817383, + "learning_rate": 6.653350637947602e-05, + "loss": 0.5643, + "step": 99400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.088423728942871, + "loss_rtd": 0.32740095257759094, + "loss_sent": 0.04146308824419975, + "loss_sod": 0.09329129010438919, + "loss_total": 0.4621553122997284, + "step": 99499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.329144477844238, + "loss_rtd": 0.3109143078327179, + "loss_sent": 0.23411983251571655, + "loss_sod": 0.10364553332328796, + "loss_total": 0.6486796736717224, + "step": 99499 + }, + { + "epoch": 0.007, + "grad_norm": 0.8647971153259277, + "learning_rate": 6.650355490259114e-05, + "loss": 0.5795, + "step": 99500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.738002300262451, + "loss_rtd": 0.315083771944046, + "loss_sent": 0.1626637876033783, + "loss_sod": 0.05075232312083244, + "loss_total": 0.5284998416900635, + "step": 99599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.8016252517700195, + "loss_rtd": 0.32618269324302673, + "loss_sent": 0.19932086765766144, + "loss_sod": 0.03520169481635094, + "loss_total": 0.5607052445411682, + "step": 99599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.1057846546173096, + "learning_rate": 6.647359677807634e-05, + "loss": 0.5618, + "step": 99600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.591098308563232, + "loss_rtd": 0.34488070011138916, + "loss_sent": 0.17269062995910645, + "loss_sod": 0.08623214066028595, + "loss_total": 0.6038034558296204, + "step": 99699 + }, + { + "epoch": 0.007398, + "loss_gen": 4.796253204345703, + "loss_rtd": 0.31622114777565, + "loss_sent": 0.08896497637033463, + "loss_sod": 0.07983792573213577, + "loss_total": 0.48502403497695923, + "step": 99699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.8518372774124146, + "learning_rate": 6.64436320179987e-05, + "loss": 0.584, + "step": 99700 + }, + { + "epoch": 0.007598, + "loss_gen": 4.318517208099365, + "loss_rtd": 0.31801772117614746, + "loss_sent": 0.022323038429021835, + "loss_sod": 0.03361869603395462, + "loss_total": 0.3739594519138336, + "step": 99799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.060001373291016, + "loss_rtd": 0.3279321491718292, + "loss_sent": 0.4337468147277832, + "loss_sod": 0.07998764514923096, + "loss_total": 0.841666579246521, + "step": 99799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.227362036705017, + "learning_rate": 6.641366063442805e-05, + "loss": 0.581, + "step": 99800 + }, + { + "epoch": 0.007798, + "loss_gen": 3.996183156967163, + "loss_rtd": 0.311187207698822, + "loss_sent": 0.016353951767086983, + "loss_sod": 0.0697982981801033, + "loss_total": 0.39733946323394775, + "step": 99899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.877110481262207, + "loss_rtd": 0.31006091833114624, + "loss_sent": 0.1758640557527542, + "loss_sod": 0.08931712806224823, + "loss_total": 0.5752421021461487, + "step": 99899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.0441415309906006, + "learning_rate": 6.638368263943687e-05, + "loss": 0.5687, + "step": 99900 + }, + { + "epoch": 0.007998, + "loss_gen": 4.829002857208252, + "loss_rtd": 0.321885883808136, + "loss_sent": 0.20318901538848877, + "loss_sod": 0.10848979651927948, + "loss_total": 0.6335647106170654, + "step": 99999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.311275005340576, + "loss_rtd": 0.3442972004413605, + "loss_sent": 0.272118479013443, + "loss_sod": 0.04463036358356476, + "loss_total": 0.661046028137207, + "step": 99999 + }, + { + "epoch": 0.008, + "grad_norm": 1.10209321975708, + "learning_rate": 6.635369804510027e-05, + "loss": 0.5853, + "step": 100000 + }, + { + "epoch": 0.008, + "eval_loss": 0.5626693964004517, + "eval_runtime": 151.234, + "eval_samples_per_second": 102.113, + "eval_steps_per_second": 0.8, + "step": 100000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.7915239334106445, + "loss_rtd": 0.32424196600914, + "loss_sent": 0.11183413118124008, + "loss_sod": 0.1372000277042389, + "loss_total": 0.5732761025428772, + "step": 100099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.999149322509766, + "loss_rtd": 0.32147786021232605, + "loss_sent": 0.20129071176052094, + "loss_sod": 0.042393386363983154, + "loss_total": 0.565161943435669, + "step": 100099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.762076199054718, + "learning_rate": 6.632370686349608e-05, + "loss": 0.5833, + "step": 100100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.001662731170654, + "loss_rtd": 0.3111487329006195, + "loss_sent": 3.409513374208473e-05, + "loss_sod": 0.15228456258773804, + "loss_total": 0.46346738934516907, + "step": 100199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.259944915771484, + "loss_rtd": 0.30491870641708374, + "loss_sent": 0.04012997820973396, + "loss_sod": 0.12616415321826935, + "loss_total": 0.47121283411979675, + "step": 100199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.8947985768318176, + "learning_rate": 6.62937091067047e-05, + "loss": 0.5696, + "step": 100200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.226615905761719, + "loss_rtd": 0.3265242576599121, + "loss_sent": 0.16229365766048431, + "loss_sod": 0.09461610019207001, + "loss_total": 0.5834340453147888, + "step": 100299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.9019083976745605, + "loss_rtd": 0.32702329754829407, + "loss_sent": 0.2279345840215683, + "loss_sod": 0.044989585876464844, + "loss_total": 0.599947452545166, + "step": 100299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.3629820346832275, + "learning_rate": 6.626370478680923e-05, + "loss": 0.556, + "step": 100300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.851716041564941, + "loss_rtd": 0.31771615147590637, + "loss_sent": 0.25636428594589233, + "loss_sod": 0.09885308891534805, + "loss_total": 0.6729335188865662, + "step": 100399 + }, + { + "epoch": 0.008798, + "loss_gen": 4.763000965118408, + "loss_rtd": 0.3167797923088074, + "loss_sent": 0.2151304930448532, + "loss_sod": 0.09555570781230927, + "loss_total": 0.6274660229682922, + "step": 100399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.6304292678833008, + "learning_rate": 6.623369391589542e-05, + "loss": 0.5755, + "step": 100400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.066193580627441, + "loss_rtd": 0.30663493275642395, + "loss_sent": 0.2264665961265564, + "loss_sod": 0.04555559158325195, + "loss_total": 0.5786571502685547, + "step": 100499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.120555400848389, + "loss_rtd": 0.30996865034103394, + "loss_sent": 0.24104826152324677, + "loss_sod": 0.051908448338508606, + "loss_total": 0.6029253602027893, + "step": 100499 + }, + { + "epoch": 0.009, + "grad_norm": 1.1163640022277832, + "learning_rate": 6.620367650605166e-05, + "loss": 0.5821, + "step": 100500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.402734756469727, + "loss_rtd": 0.3174905478954315, + "loss_sent": 0.2320769727230072, + "loss_sod": 0.04570315405726433, + "loss_total": 0.5952706336975098, + "step": 100599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.212018013000488, + "loss_rtd": 0.32338646054267883, + "loss_sent": 0.00036768606514669955, + "loss_sod": 0.25911781191825867, + "loss_total": 0.5828719735145569, + "step": 100599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.159544825553894, + "learning_rate": 6.617365256936894e-05, + "loss": 0.5817, + "step": 100600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.843101501464844, + "loss_rtd": 0.32027003169059753, + "loss_sent": 0.3335469961166382, + "loss_sod": 0.13191527128219604, + "loss_total": 0.7857322692871094, + "step": 100699 + }, + { + "epoch": 0.009398, + "loss_gen": 4.927363395690918, + "loss_rtd": 0.31417667865753174, + "loss_sent": 0.2652972638607025, + "loss_sod": 0.04378899186849594, + "loss_total": 0.6232629418373108, + "step": 100699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.166442632675171, + "learning_rate": 6.614362211794087e-05, + "loss": 0.5655, + "step": 100700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.136700630187988, + "loss_rtd": 0.3016396760940552, + "loss_sent": 0.22913001477718353, + "loss_sod": 0.04708781838417053, + "loss_total": 0.577857494354248, + "step": 100799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.053333759307861, + "loss_rtd": 0.30575957894325256, + "loss_sent": 0.06288175284862518, + "loss_sod": 0.11969773471355438, + "loss_total": 0.48833906650543213, + "step": 100799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.0079056024551392, + "learning_rate": 6.611358516386377e-05, + "loss": 0.5711, + "step": 100800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.671409606933594, + "loss_rtd": 0.33603939414024353, + "loss_sent": 0.07858309149742126, + "loss_sod": 0.08161260187625885, + "loss_total": 0.49623510241508484, + "step": 100899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.940887451171875, + "loss_rtd": 0.3263009190559387, + "loss_sent": 0.2012975513935089, + "loss_sod": 0.013004586100578308, + "loss_total": 0.5406030416488647, + "step": 100899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.2577089071273804, + "learning_rate": 6.608354171923648e-05, + "loss": 0.5711, + "step": 100900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.780365467071533, + "loss_rtd": 0.3365417420864105, + "loss_sent": 0.2404618114233017, + "loss_sod": 0.021603599190711975, + "loss_total": 0.5986071825027466, + "step": 100999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.838820457458496, + "loss_rtd": 0.31516823172569275, + "loss_sent": 0.42256417870521545, + "loss_sod": 0.04550718888640404, + "loss_total": 0.7832396030426025, + "step": 100999 + }, + { + "epoch": 0.01, + "grad_norm": 1.636043906211853, + "learning_rate": 6.605349179616052e-05, + "loss": 0.5786, + "step": 101000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5521333813667297, + "eval_runtime": 151.1925, + "eval_samples_per_second": 102.141, + "eval_steps_per_second": 0.8, + "step": 101000 + }, + { + "epoch": 0.010198, + "loss_gen": 4.162107467651367, + "loss_rtd": 0.3015623986721039, + "loss_sent": 0.0013494102749973536, + "loss_sod": 0.22462737560272217, + "loss_total": 0.5275391936302185, + "step": 101099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.035129547119141, + "loss_rtd": 0.31379857659339905, + "loss_sent": 0.0006297457148320973, + "loss_sod": 0.21650034189224243, + "loss_total": 0.5309286713600159, + "step": 101099 + }, + { + "epoch": 0.0102, + "grad_norm": 0.972885251045227, + "learning_rate": 6.602343540673999e-05, + "loss": 0.5699, + "step": 101100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.747851371765137, + "loss_rtd": 0.3193017244338989, + "loss_sent": 0.07847078144550323, + "loss_sod": 0.09810219705104828, + "loss_total": 0.49587470293045044, + "step": 101199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.180852890014648, + "loss_rtd": 0.3225772976875305, + "loss_sent": 0.11992615461349487, + "loss_sod": 0.056841302663087845, + "loss_total": 0.49934476613998413, + "step": 101199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.2792707681655884, + "learning_rate": 6.599337256308158e-05, + "loss": 0.5693, + "step": 101200 + }, + { + "epoch": 0.010598, + "loss_gen": 4.498040199279785, + "loss_rtd": 0.3336476683616638, + "loss_sent": 0.09353425353765488, + "loss_sod": 0.03337704762816429, + "loss_total": 0.4605589807033539, + "step": 101299 + }, + { + "epoch": 0.010598, + "loss_gen": 4.985463619232178, + "loss_rtd": 0.3326781392097473, + "loss_sent": 0.1662260740995407, + "loss_sod": 0.029946666210889816, + "loss_total": 0.5288508534431458, + "step": 101299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.8527702689170837, + "learning_rate": 6.596330327729461e-05, + "loss": 0.5775, + "step": 101300 + }, + { + "epoch": 0.010798, + "loss_gen": 4.720818519592285, + "loss_rtd": 0.3480078876018524, + "loss_sent": 0.05008330196142197, + "loss_sod": 0.005968412384390831, + "loss_total": 0.40405961871147156, + "step": 101399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.84202241897583, + "loss_rtd": 0.3052635192871094, + "loss_sent": 0.14516431093215942, + "loss_sod": 0.05550266057252884, + "loss_total": 0.505930483341217, + "step": 101399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.6203334927558899, + "learning_rate": 6.593322756149099e-05, + "loss": 0.5648, + "step": 101400 + }, + { + "epoch": 0.010998, + "loss_gen": 4.84304666519165, + "loss_rtd": 0.3437245190143585, + "loss_sent": 0.1599966436624527, + "loss_sod": 0.15706191956996918, + "loss_total": 0.660783052444458, + "step": 101499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.944455146789551, + "loss_rtd": 0.3024526834487915, + "loss_sent": 0.40022334456443787, + "loss_sod": 0.03615320473909378, + "loss_total": 0.7388292551040649, + "step": 101499 + }, + { + "epoch": 0.011, + "grad_norm": 1.1639444828033447, + "learning_rate": 6.590314542778522e-05, + "loss": 0.5786, + "step": 101500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.69655704498291, + "loss_rtd": 0.32028722763061523, + "loss_sent": 0.03291154280304909, + "loss_sod": 0.16597428917884827, + "loss_total": 0.5191730856895447, + "step": 101599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.082765102386475, + "loss_rtd": 0.2942723333835602, + "loss_sent": 0.05638710409402847, + "loss_sod": 0.02609078399837017, + "loss_total": 0.3767502009868622, + "step": 101599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.8092089295387268, + "learning_rate": 6.587305688829437e-05, + "loss": 0.5818, + "step": 101600 + }, + { + "epoch": 0.011398, + "loss_gen": 4.038250923156738, + "loss_rtd": 0.30816489458084106, + "loss_sent": 5.8082812756765634e-05, + "loss_sod": 0.2049764096736908, + "loss_total": 0.5131993293762207, + "step": 101699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.315676689147949, + "loss_rtd": 0.32112082839012146, + "loss_sent": 0.021004807204008102, + "loss_sod": 0.1235692948102951, + "loss_total": 0.4656949043273926, + "step": 101699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.8982096314430237, + "learning_rate": 6.58429619551381e-05, + "loss": 0.5795, + "step": 101700 + }, + { + "epoch": 0.011598, + "loss_gen": 4.464595794677734, + "loss_rtd": 0.3015983998775482, + "loss_sent": 0.05006067827343941, + "loss_sod": 0.13953593373298645, + "loss_total": 0.49119502305984497, + "step": 101799 + }, + { + "epoch": 0.011598, + "loss_gen": 4.08694314956665, + "loss_rtd": 0.3185790479183197, + "loss_sent": 4.180019459454343e-05, + "loss_sod": 0.1926421821117401, + "loss_total": 0.5112630128860474, + "step": 101799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.1276414394378662, + "learning_rate": 6.581286064043866e-05, + "loss": 0.558, + "step": 101800 + }, + { + "epoch": 0.011798, + "loss_gen": 3.974503993988037, + "loss_rtd": 0.2857121527194977, + "loss_sent": 0.0001361898030154407, + "loss_sod": 0.13948465883731842, + "loss_total": 0.42533302307128906, + "step": 101899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.069895267486572, + "loss_rtd": 0.32346153259277344, + "loss_sent": 0.0006381009006872773, + "loss_sod": 0.20743003487586975, + "loss_total": 0.5315296649932861, + "step": 101899 + }, + { + "epoch": 0.0118, + "grad_norm": 0.807518720626831, + "learning_rate": 6.578275295632084e-05, + "loss": 0.5671, + "step": 101900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.010288715362549, + "loss_rtd": 0.31243669986724854, + "loss_sent": 4.243159855832346e-05, + "loss_sod": 0.2639489471912384, + "loss_total": 0.5764280557632446, + "step": 101999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.833768367767334, + "loss_rtd": 0.32573121786117554, + "loss_sent": 0.12251759320497513, + "loss_sod": 0.0804349035024643, + "loss_total": 0.5286837220191956, + "step": 101999 + }, + { + "epoch": 0.012, + "grad_norm": 1.121748447418213, + "learning_rate": 6.575263891491203e-05, + "loss": 0.5679, + "step": 102000 + }, + { + "epoch": 0.012, + "eval_loss": 0.5446926951408386, + "eval_runtime": 151.3259, + "eval_samples_per_second": 102.051, + "eval_steps_per_second": 0.8, + "step": 102000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.978600978851318, + "loss_rtd": 0.31439170241355896, + "loss_sent": 0.05940273776650429, + "loss_sod": 0.22584794461727142, + "loss_total": 0.5996423959732056, + "step": 102099 + }, + { + "epoch": 0.012198, + "loss_gen": 3.9863944053649902, + "loss_rtd": 0.3023916482925415, + "loss_sent": 4.6820186980767176e-05, + "loss_sod": 0.2726382613182068, + "loss_total": 0.5750767588615417, + "step": 102099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.3223897218704224, + "learning_rate": 6.572251852834212e-05, + "loss": 0.5679, + "step": 102100 + }, + { + "epoch": 0.012398, + "loss_gen": 4.828497886657715, + "loss_rtd": 0.32210204005241394, + "loss_sent": 0.18495376408100128, + "loss_sod": 0.08336347341537476, + "loss_total": 0.5904192924499512, + "step": 102199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.150286674499512, + "loss_rtd": 0.31037473678588867, + "loss_sent": 0.16400249302387238, + "loss_sod": 0.07330995053052902, + "loss_total": 0.5476871728897095, + "step": 102199 + }, + { + "epoch": 0.0124, + "grad_norm": 2.2942142486572266, + "learning_rate": 6.569239180874365e-05, + "loss": 0.5718, + "step": 102200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.675304889678955, + "loss_rtd": 0.3205735981464386, + "loss_sent": 0.14644664525985718, + "loss_sod": 0.028588993474841118, + "loss_total": 0.49560922384262085, + "step": 102299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.031630516052246, + "loss_rtd": 0.3230000436306, + "loss_sent": 0.05868524685502052, + "loss_sod": 0.061777375638484955, + "loss_total": 0.44346266984939575, + "step": 102299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.659148633480072, + "learning_rate": 6.566225876825161e-05, + "loss": 0.566, + "step": 102300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.987382411956787, + "loss_rtd": 0.3295574486255646, + "loss_sent": 0.5667756199836731, + "loss_sod": 0.0292537622153759, + "loss_total": 0.9255868196487427, + "step": 102399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.694916248321533, + "loss_rtd": 0.3182528018951416, + "loss_sent": 0.26894715428352356, + "loss_sod": 0.07731296867132187, + "loss_total": 0.6645129323005676, + "step": 102399 + }, + { + "epoch": 0.0128, + "grad_norm": 2.89612078666687, + "learning_rate": 6.563211941900364e-05, + "loss": 0.5874, + "step": 102400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.287630558013916, + "loss_rtd": 0.31084689497947693, + "loss_sent": 0.011314211413264275, + "loss_sod": 0.08189202845096588, + "loss_total": 0.4040531516075134, + "step": 102499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.082841396331787, + "loss_rtd": 0.30779874324798584, + "loss_sent": 0.23700939118862152, + "loss_sod": 0.10018089413642883, + "loss_total": 0.644989013671875, + "step": 102499 + }, + { + "epoch": 0.013, + "grad_norm": 1.138588786125183, + "learning_rate": 6.560197377313983e-05, + "loss": 0.576, + "step": 102500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.048178195953369, + "loss_rtd": 0.3007459044456482, + "loss_sent": 0.03131717070937157, + "loss_sod": 0.1861349493265152, + "loss_total": 0.5181980133056641, + "step": 102599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.632444858551025, + "loss_rtd": 0.3480101525783539, + "loss_sent": 0.13145510852336884, + "loss_sod": 0.0014071919722482562, + "loss_total": 0.4808724522590637, + "step": 102599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0020616054534912, + "learning_rate": 6.557182184280284e-05, + "loss": 0.5746, + "step": 102600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.145431995391846, + "loss_rtd": 0.3162272274494171, + "loss_sent": 0.15705393254756927, + "loss_sod": 0.2427826076745987, + "loss_total": 0.7160637974739075, + "step": 102699 + }, + { + "epoch": 0.013398, + "loss_gen": 4.439493179321289, + "loss_rtd": 0.3044537901878357, + "loss_sent": 0.12587888538837433, + "loss_sod": 0.1643853783607483, + "loss_total": 0.5947180390357971, + "step": 102699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.0804530382156372, + "learning_rate": 6.554166364013787e-05, + "loss": 0.5721, + "step": 102700 + }, + { + "epoch": 0.013598, + "loss_gen": 4.833267688751221, + "loss_rtd": 0.30929800868034363, + "loss_sent": 0.3013760447502136, + "loss_sod": 0.014710478484630585, + "loss_total": 0.6253845691680908, + "step": 102799 + }, + { + "epoch": 0.013598, + "loss_gen": 4.671603202819824, + "loss_rtd": 0.32912150025367737, + "loss_sent": 0.45154908299446106, + "loss_sod": 0.03300929814577103, + "loss_total": 0.8136798739433289, + "step": 102799 + }, + { + "epoch": 0.0136, + "grad_norm": 2.9260988235473633, + "learning_rate": 6.551149917729267e-05, + "loss": 0.5699, + "step": 102800 + }, + { + "epoch": 0.013798, + "loss_gen": 4.445441722869873, + "loss_rtd": 0.30097082257270813, + "loss_sent": 0.2778117060661316, + "loss_sod": 0.0919363722205162, + "loss_total": 0.6707189083099365, + "step": 102899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.081186294555664, + "loss_rtd": 0.31350207328796387, + "loss_sent": 0.0635070875287056, + "loss_sod": 0.0671958401799202, + "loss_total": 0.44420498609542847, + "step": 102899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.733470618724823, + "learning_rate": 6.548132846641744e-05, + "loss": 0.5678, + "step": 102900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.434316158294678, + "loss_rtd": 0.3185882270336151, + "loss_sent": 0.1743234395980835, + "loss_sod": 0.16610151529312134, + "loss_total": 0.6590131521224976, + "step": 102999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.26799201965332, + "loss_rtd": 0.3111538887023926, + "loss_sent": 0.19061948359012604, + "loss_sod": 0.08537900447845459, + "loss_total": 0.587152361869812, + "step": 102999 + }, + { + "epoch": 0.014, + "grad_norm": 0.9744783043861389, + "learning_rate": 6.545115151966496e-05, + "loss": 0.5717, + "step": 103000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5482023358345032, + "eval_runtime": 151.1963, + "eval_samples_per_second": 102.139, + "eval_steps_per_second": 0.8, + "step": 103000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.193575382232666, + "loss_rtd": 0.3247431814670563, + "loss_sent": 0.3455546200275421, + "loss_sod": 0.08126696199178696, + "loss_total": 0.7515647411346436, + "step": 103099 + }, + { + "epoch": 0.014198, + "loss_gen": 4.203389644622803, + "loss_rtd": 0.3015291392803192, + "loss_sent": 0.004309745505452156, + "loss_sod": 0.1388365924358368, + "loss_total": 0.444675475358963, + "step": 103099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.1028567552566528, + "learning_rate": 6.542096834919049e-05, + "loss": 0.5635, + "step": 103100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.115509510040283, + "loss_rtd": 0.302995502948761, + "loss_sent": 0.0011495847720652819, + "loss_sod": 0.13383348286151886, + "loss_total": 0.4379785656929016, + "step": 103199 + }, + { + "epoch": 0.014398, + "loss_gen": 4.152536869049072, + "loss_rtd": 0.3090682327747345, + "loss_sent": 0.04768054559826851, + "loss_sod": 0.1396777629852295, + "loss_total": 0.4964265525341034, + "step": 103199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.2074131965637207, + "learning_rate": 6.53907789671518e-05, + "loss": 0.593, + "step": 103200 + }, + { + "epoch": 0.014598, + "loss_gen": 4.523934364318848, + "loss_rtd": 0.30799534916877747, + "loss_sent": 0.03921209275722504, + "loss_sod": 0.09137500822544098, + "loss_total": 0.43858247995376587, + "step": 103299 + }, + { + "epoch": 0.014598, + "loss_gen": 3.9856107234954834, + "loss_rtd": 0.30571386218070984, + "loss_sent": 8.002371760085225e-05, + "loss_sod": 0.11510750651359558, + "loss_total": 0.4209013879299164, + "step": 103299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.7983850240707397, + "learning_rate": 6.536058338570922e-05, + "loss": 0.5603, + "step": 103300 + }, + { + "epoch": 0.014798, + "loss_gen": 3.939851760864258, + "loss_rtd": 0.3169386684894562, + "loss_sent": 4.114958574064076e-05, + "loss_sod": 0.09740159660577774, + "loss_total": 0.4143814146518707, + "step": 103399 + }, + { + "epoch": 0.014798, + "loss_gen": 3.738983154296875, + "loss_rtd": 0.31111863255500793, + "loss_sent": 0.009131144732236862, + "loss_sod": 0.07810862362384796, + "loss_total": 0.39835840463638306, + "step": 103399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.48440203070640564, + "learning_rate": 6.533038161702546e-05, + "loss": 0.5662, + "step": 103400 + }, + { + "epoch": 0.014998, + "loss_gen": 4.638041973114014, + "loss_rtd": 0.3122525215148926, + "loss_sent": 0.1494915932416916, + "loss_sod": 0.027382340282201767, + "loss_total": 0.48912644386291504, + "step": 103499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.790919780731201, + "loss_rtd": 0.3246724605560303, + "loss_sent": 0.06974232196807861, + "loss_sod": 0.06924775242805481, + "loss_total": 0.4636625349521637, + "step": 103499 + }, + { + "epoch": 0.015, + "grad_norm": 0.8386632204055786, + "learning_rate": 6.530017367326582e-05, + "loss": 0.5688, + "step": 103500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.208066463470459, + "loss_rtd": 0.3045886754989624, + "loss_sent": 0.5311081409454346, + "loss_sod": 0.11848323047161102, + "loss_total": 0.9541800618171692, + "step": 103599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.914482116699219, + "loss_rtd": 0.3204767405986786, + "loss_sent": 0.2790606915950775, + "loss_sod": 0.033330462872982025, + "loss_total": 0.6328679323196411, + "step": 103599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.759294867515564, + "learning_rate": 6.526995956659806e-05, + "loss": 0.5673, + "step": 103600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.131373405456543, + "loss_rtd": 0.3379717469215393, + "loss_sent": 0.23370657861232758, + "loss_sod": 0.07675453275442123, + "loss_total": 0.6484328508377075, + "step": 103699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.740903377532959, + "loss_rtd": 0.3197406530380249, + "loss_sent": 0.1491633504629135, + "loss_sod": 0.08728623390197754, + "loss_total": 0.5561902523040771, + "step": 103699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.6623493432998657, + "learning_rate": 6.523973930919241e-05, + "loss": 0.5796, + "step": 103700 + }, + { + "epoch": 0.015598, + "loss_gen": 4.825345039367676, + "loss_rtd": 0.32074809074401855, + "loss_sent": 0.37894371151924133, + "loss_sod": 0.03921166807413101, + "loss_total": 0.7389034628868103, + "step": 103799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.758147239685059, + "loss_rtd": 0.3263850510120392, + "loss_sent": 0.17378558218479156, + "loss_sod": 0.049403682351112366, + "loss_total": 0.5495743155479431, + "step": 103799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.2121750116348267, + "learning_rate": 6.52095129132216e-05, + "loss": 0.5656, + "step": 103800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.990795612335205, + "loss_rtd": 0.307752788066864, + "loss_sent": 0.04160727560520172, + "loss_sod": 0.1492750644683838, + "loss_total": 0.49863511323928833, + "step": 103899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.051953315734863, + "loss_rtd": 0.300202876329422, + "loss_sent": 0.0019655979704111814, + "loss_sod": 0.16191361844539642, + "loss_total": 0.46408209204673767, + "step": 103899 + }, + { + "epoch": 0.0158, + "grad_norm": 0.9242538809776306, + "learning_rate": 6.517928039086079e-05, + "loss": 0.5572, + "step": 103900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.933145523071289, + "loss_rtd": 0.32966703176498413, + "loss_sent": 0.11528322100639343, + "loss_sod": 0.013144847005605698, + "loss_total": 0.45809510350227356, + "step": 103999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.144468307495117, + "loss_rtd": 0.30523863434791565, + "loss_sent": 0.20605115592479706, + "loss_sod": 0.03963879495859146, + "loss_total": 0.5509285926818848, + "step": 103999 + }, + { + "epoch": 0.016, + "grad_norm": 0.6265259981155396, + "learning_rate": 6.514904175428766e-05, + "loss": 0.5677, + "step": 104000 + }, + { + "epoch": 0.016, + "eval_loss": 0.5446927547454834, + "eval_runtime": 151.3544, + "eval_samples_per_second": 102.032, + "eval_steps_per_second": 0.799, + "step": 104000 + }, + { + "epoch": 0.016198, + "loss_gen": 3.759413719177246, + "loss_rtd": 0.2940031886100769, + "loss_sent": 0.0001251414796570316, + "loss_sod": 0.26236772537231445, + "loss_total": 0.5564960837364197, + "step": 104099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.728495121002197, + "loss_rtd": 0.3166390061378479, + "loss_sent": 0.04960273578763008, + "loss_sod": 0.036053985357284546, + "loss_total": 0.4022957384586334, + "step": 104099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.973793089389801, + "learning_rate": 6.511879701568233e-05, + "loss": 0.561, + "step": 104100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.024625778198242, + "loss_rtd": 0.3019779622554779, + "loss_sent": 0.37978801131248474, + "loss_sod": 0.08072702586650848, + "loss_total": 0.7624930143356323, + "step": 104199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.858999252319336, + "loss_rtd": 0.32595953345298767, + "loss_sent": 0.08095843344926834, + "loss_sod": 0.09027931839227676, + "loss_total": 0.4971972703933716, + "step": 104199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.5230836868286133, + "learning_rate": 6.508854618722735e-05, + "loss": 0.5772, + "step": 104200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.25753116607666, + "loss_rtd": 0.33190762996673584, + "loss_sent": 0.26229095458984375, + "loss_sod": 0.09572476148605347, + "loss_total": 0.6899233460426331, + "step": 104299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.92232608795166, + "loss_rtd": 0.32525166869163513, + "loss_sent": 0.30636516213417053, + "loss_sod": 0.011416537687182426, + "loss_total": 0.6430333852767944, + "step": 104299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.396575689315796, + "learning_rate": 6.50582892811078e-05, + "loss": 0.5568, + "step": 104300 + }, + { + "epoch": 0.016798, + "loss_gen": 4.894100666046143, + "loss_rtd": 0.31734776496887207, + "loss_sent": 0.19172203540802002, + "loss_sod": 0.05022633820772171, + "loss_total": 0.5592961311340332, + "step": 104399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.057659149169922, + "loss_rtd": 0.31772705912590027, + "loss_sent": 0.31592148542404175, + "loss_sod": 0.06395834684371948, + "loss_total": 0.6976069211959839, + "step": 104399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.9074657559394836, + "learning_rate": 6.502802630951112e-05, + "loss": 0.5639, + "step": 104400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.838658809661865, + "loss_rtd": 0.3140501081943512, + "loss_sent": 0.24971437454223633, + "loss_sod": 0.011981564573943615, + "loss_total": 0.5757460594177246, + "step": 104499 + }, + { + "epoch": 0.016998, + "loss_gen": 4.932409286499023, + "loss_rtd": 0.31491634249687195, + "loss_sent": 0.14830800890922546, + "loss_sod": 0.07958915084600449, + "loss_total": 0.5428135395050049, + "step": 104499 + }, + { + "epoch": 0.017, + "grad_norm": 0.789269745349884, + "learning_rate": 6.499775728462722e-05, + "loss": 0.5899, + "step": 104500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.028350353240967, + "loss_rtd": 0.3054017126560211, + "loss_sent": 0.005753053352236748, + "loss_sod": 0.20317314565181732, + "loss_total": 0.514327883720398, + "step": 104599 + }, + { + "epoch": 0.017198, + "loss_gen": 4.46292781829834, + "loss_rtd": 0.3214676082134247, + "loss_sent": 0.04085097834467888, + "loss_sod": 0.19534572958946228, + "loss_total": 0.5576643347740173, + "step": 104599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.0206047296524048, + "learning_rate": 6.49674822186485e-05, + "loss": 0.5793, + "step": 104600 + }, + { + "epoch": 0.017398, + "loss_gen": 4.8943915367126465, + "loss_rtd": 0.3206595778465271, + "loss_sent": 0.23480308055877686, + "loss_sod": 0.12426088750362396, + "loss_total": 0.6797235608100891, + "step": 104699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.224236011505127, + "loss_rtd": 0.31364619731903076, + "loss_sent": 0.0050000278279185295, + "loss_sod": 0.272971510887146, + "loss_total": 0.59161776304245, + "step": 104699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.2544224262237549, + "learning_rate": 6.493720112376972e-05, + "loss": 0.5868, + "step": 104700 + }, + { + "epoch": 0.017598, + "loss_gen": 4.896785259246826, + "loss_rtd": 0.30121535062789917, + "loss_sent": 0.16163146495819092, + "loss_sod": 0.08443138003349304, + "loss_total": 0.5472781658172607, + "step": 104799 + }, + { + "epoch": 0.017598, + "loss_gen": 4.746344566345215, + "loss_rtd": 0.3232337534427643, + "loss_sent": 0.33305108547210693, + "loss_sod": 0.05574338138103485, + "loss_total": 0.7120282053947449, + "step": 104799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.3743486404418945, + "learning_rate": 6.490691401218812e-05, + "loss": 0.5625, + "step": 104800 + }, + { + "epoch": 0.017798, + "loss_gen": 4.22736930847168, + "loss_rtd": 0.2959963083267212, + "loss_sent": 0.017806321382522583, + "loss_sod": 0.13410848379135132, + "loss_total": 0.4479111135005951, + "step": 104899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.7643022537231445, + "loss_rtd": 0.30367931723594666, + "loss_sent": 0.08337313681840897, + "loss_sod": 0.07327957451343536, + "loss_total": 0.4603320360183716, + "step": 104899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.8151678442955017, + "learning_rate": 6.487662089610334e-05, + "loss": 0.5704, + "step": 104900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.2139892578125, + "loss_rtd": 0.32095271348953247, + "loss_sent": 0.08497481793165207, + "loss_sod": 0.18144731223583221, + "loss_total": 0.5873748660087585, + "step": 104999 + }, + { + "epoch": 0.017998, + "loss_gen": 4.802613258361816, + "loss_rtd": 0.31074824929237366, + "loss_sent": 0.1703757643699646, + "loss_sod": 0.029345963150262833, + "loss_total": 0.5104699730873108, + "step": 104999 + }, + { + "epoch": 0.018, + "grad_norm": 0.9500181674957275, + "learning_rate": 6.484632178771744e-05, + "loss": 0.5592, + "step": 105000 + }, + { + "epoch": 0.018, + "eval_loss": 0.5460320115089417, + "eval_runtime": 152.8021, + "eval_samples_per_second": 101.065, + "eval_steps_per_second": 0.792, + "step": 105000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.208505630493164, + "loss_rtd": 0.32324033975601196, + "loss_sent": 0.2505112588405609, + "loss_sod": 0.07465124130249023, + "loss_total": 0.6484028100967407, + "step": 105099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.328365325927734, + "loss_rtd": 0.33295971155166626, + "loss_sent": 0.10097527503967285, + "loss_sod": 0.202653706073761, + "loss_total": 0.6365886926651001, + "step": 105099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.2256265878677368, + "learning_rate": 6.481601669923489e-05, + "loss": 0.5554, + "step": 105100 + }, + { + "epoch": 0.018398, + "loss_gen": 4.671132564544678, + "loss_rtd": 0.32226237654685974, + "loss_sent": 0.27640920877456665, + "loss_sod": 0.08043090999126434, + "loss_total": 0.6791024804115295, + "step": 105199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.23636531829834, + "loss_rtd": 0.3007037341594696, + "loss_sent": 0.21370506286621094, + "loss_sod": 0.16631944477558136, + "loss_total": 0.6807282567024231, + "step": 105199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.2847132682800293, + "learning_rate": 6.47857056428626e-05, + "loss": 0.5722, + "step": 105200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.904481887817383, + "loss_rtd": 0.33259204030036926, + "loss_sent": 0.1941862255334854, + "loss_sod": 0.05695922672748566, + "loss_total": 0.5837374925613403, + "step": 105299 + }, + { + "epoch": 0.018598, + "loss_gen": 4.8768229484558105, + "loss_rtd": 0.333579957485199, + "loss_sent": 0.08184095472097397, + "loss_sod": 0.02738514356315136, + "loss_total": 0.44280606508255005, + "step": 105299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.8443505167961121, + "learning_rate": 6.475538863080984e-05, + "loss": 0.5535, + "step": 105300 + }, + { + "epoch": 0.018798, + "loss_gen": 4.9819016456604, + "loss_rtd": 0.2957344353199005, + "loss_sent": 0.07932811975479126, + "loss_sod": 0.0848763957619667, + "loss_total": 0.4599389433860779, + "step": 105399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.246075630187988, + "loss_rtd": 0.33596476912498474, + "loss_sent": 0.1679612249135971, + "loss_sod": 0.046787574887275696, + "loss_total": 0.5507135987281799, + "step": 105399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.7843284606933594, + "learning_rate": 6.472506567528832e-05, + "loss": 0.5596, + "step": 105400 + }, + { + "epoch": 0.018998, + "loss_gen": 4.234274864196777, + "loss_rtd": 0.31183311343193054, + "loss_sent": 0.01924183964729309, + "loss_sod": 0.1953190118074417, + "loss_total": 0.5263940095901489, + "step": 105499 + }, + { + "epoch": 0.018998, + "loss_gen": 3.8398239612579346, + "loss_rtd": 0.28846511244773865, + "loss_sent": 3.70305533579085e-05, + "loss_sod": 0.11776828020811081, + "loss_total": 0.40627044439315796, + "step": 105499 + }, + { + "epoch": 0.019, + "grad_norm": 0.8793208003044128, + "learning_rate": 6.469473678851208e-05, + "loss": 0.5771, + "step": 105500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.88959264755249, + "loss_rtd": 0.3144441545009613, + "loss_sent": 0.3775281310081482, + "loss_sod": 0.05158288776874542, + "loss_total": 0.7435551881790161, + "step": 105599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.031152725219727, + "loss_rtd": 0.31768447160720825, + "loss_sent": 0.17155961692333221, + "loss_sod": 0.11411077529191971, + "loss_total": 0.6033548712730408, + "step": 105599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.247576117515564, + "learning_rate": 6.466440198269763e-05, + "loss": 0.563, + "step": 105600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.1013007164001465, + "loss_rtd": 0.30058038234710693, + "loss_sent": 0.014173594303429127, + "loss_sod": 0.14618852734565735, + "loss_total": 0.46094250679016113, + "step": 105699 + }, + { + "epoch": 0.019398, + "loss_gen": 3.6222245693206787, + "loss_rtd": 0.28102585673332214, + "loss_sent": 0.028827032074332237, + "loss_sod": 0.16895148158073425, + "loss_total": 0.4788043797016144, + "step": 105699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.9890584945678711, + "learning_rate": 6.46340612700638e-05, + "loss": 0.5526, + "step": 105700 + }, + { + "epoch": 0.019598, + "loss_gen": 3.9984564781188965, + "loss_rtd": 0.2904631793498993, + "loss_sent": 0.030142538249492645, + "loss_sod": 0.06392544507980347, + "loss_total": 0.384531170129776, + "step": 105799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.746275901794434, + "loss_rtd": 0.3146536648273468, + "loss_sent": 0.09788447618484497, + "loss_sod": 0.04603857174515724, + "loss_total": 0.4585767090320587, + "step": 105799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.7948949337005615, + "learning_rate": 6.460371466283186e-05, + "loss": 0.5676, + "step": 105800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.381058692932129, + "loss_rtd": 0.3200831711292267, + "loss_sent": 0.18164263665676117, + "loss_sod": 0.10055247694253922, + "loss_total": 0.6022782921791077, + "step": 105899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.627455711364746, + "loss_rtd": 0.3433959484100342, + "loss_sent": 0.12217989563941956, + "loss_sod": 0.00880503375083208, + "loss_total": 0.47438088059425354, + "step": 105899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.8509397506713867, + "learning_rate": 6.457336217322539e-05, + "loss": 0.5663, + "step": 105900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.083179950714111, + "loss_rtd": 0.32536083459854126, + "loss_sent": 0.14673031866550446, + "loss_sod": 0.031289633363485336, + "loss_total": 0.5033807754516602, + "step": 105999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.490145206451416, + "loss_rtd": 0.34568482637405396, + "loss_sent": 0.141320139169693, + "loss_sod": 0.03445363789796829, + "loss_total": 0.521458625793457, + "step": 105999 + }, + { + "epoch": 0.02, + "grad_norm": 0.6346383094787598, + "learning_rate": 6.45430038134704e-05, + "loss": 0.5582, + "step": 106000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5525758862495422, + "eval_runtime": 151.5121, + "eval_samples_per_second": 101.926, + "eval_steps_per_second": 0.799, + "step": 106000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.103689670562744, + "loss_rtd": 0.31973427534103394, + "loss_sent": 0.2565765976905823, + "loss_sod": 0.07389902323484421, + "loss_total": 0.650209903717041, + "step": 106099 + }, + { + "epoch": 0.020198, + "loss_gen": 4.885805606842041, + "loss_rtd": 0.32697972655296326, + "loss_sent": 0.1322324126958847, + "loss_sod": 0.06987996399402618, + "loss_total": 0.5290921330451965, + "step": 106099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.8351715803146362, + "learning_rate": 6.451263959579519e-05, + "loss": 0.5658, + "step": 106100 + }, + { + "epoch": 0.020398, + "loss_gen": 4.95379638671875, + "loss_rtd": 0.3120945692062378, + "loss_sent": 0.13482888042926788, + "loss_sod": 0.13001154363155365, + "loss_total": 0.5769349932670593, + "step": 106199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.018063545227051, + "loss_rtd": 0.32068923115730286, + "loss_sent": 0.276826411485672, + "loss_sod": 0.1472255289554596, + "loss_total": 0.7447412014007568, + "step": 106199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.67173171043396, + "learning_rate": 6.44822695324305e-05, + "loss": 0.5718, + "step": 106200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.010893821716309, + "loss_rtd": 0.32713550329208374, + "loss_sent": 0.14166052639484406, + "loss_sod": 0.062498897314071655, + "loss_total": 0.5312949419021606, + "step": 106299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.061027526855469, + "loss_rtd": 0.2854859530925751, + "loss_sent": 0.315373033285141, + "loss_sod": 0.1234944760799408, + "loss_total": 0.7243534326553345, + "step": 106299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.8546362519264221, + "learning_rate": 6.445189363560936e-05, + "loss": 0.5718, + "step": 106300 + }, + { + "epoch": 0.020798, + "loss_gen": 4.929854393005371, + "loss_rtd": 0.3133118748664856, + "loss_sent": 0.1772003173828125, + "loss_sod": 0.07359988242387772, + "loss_total": 0.5641120672225952, + "step": 106399 + }, + { + "epoch": 0.020798, + "loss_gen": 4.964849472045898, + "loss_rtd": 0.30542078614234924, + "loss_sent": 0.23408684134483337, + "loss_sod": 0.02591603808104992, + "loss_total": 0.5654236674308777, + "step": 106399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.9423354268074036, + "learning_rate": 6.44215119175672e-05, + "loss": 0.5752, + "step": 106400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.083122730255127, + "loss_rtd": 0.3306697607040405, + "loss_sent": 0.13572731614112854, + "loss_sod": 0.04980308562517166, + "loss_total": 0.5162001848220825, + "step": 106499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.144300937652588, + "loss_rtd": 0.316855251789093, + "loss_sent": 0.11016371846199036, + "loss_sod": 0.0425821915268898, + "loss_total": 0.4696011543273926, + "step": 106499 + }, + { + "epoch": 0.021, + "grad_norm": 0.7220878005027771, + "learning_rate": 6.439112439054176e-05, + "loss": 0.564, + "step": 106500 + }, + { + "epoch": 0.021198, + "loss_gen": 4.594676971435547, + "loss_rtd": 0.2950240969657898, + "loss_sent": 0.1877138316631317, + "loss_sod": 0.03032473661005497, + "loss_total": 0.5130626559257507, + "step": 106599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.0950751304626465, + "loss_rtd": 0.3105525076389313, + "loss_sent": 0.09172980487346649, + "loss_sod": 0.17428332567214966, + "loss_total": 0.5765656232833862, + "step": 106599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.0337969064712524, + "learning_rate": 6.436073106677315e-05, + "loss": 0.5684, + "step": 106600 + }, + { + "epoch": 0.021398, + "loss_gen": 3.9136695861816406, + "loss_rtd": 0.29337170720100403, + "loss_sent": 5.8220874052494764e-05, + "loss_sod": 0.1815156638622284, + "loss_total": 0.474945604801178, + "step": 106699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.0382795333862305, + "loss_rtd": 0.31321319937705994, + "loss_sent": 0.07537049055099487, + "loss_sod": 0.03697393834590912, + "loss_total": 0.42555761337280273, + "step": 106699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.93421471118927, + "learning_rate": 6.433033195850378e-05, + "loss": 0.5693, + "step": 106700 + }, + { + "epoch": 0.021598, + "loss_gen": 4.9295172691345215, + "loss_rtd": 0.32531750202178955, + "loss_sent": 0.4898681640625, + "loss_sod": 0.09437037259340286, + "loss_total": 0.9095560312271118, + "step": 106799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.337591648101807, + "loss_rtd": 0.31953203678131104, + "loss_sent": 0.16137145459651947, + "loss_sod": 0.08585690706968307, + "loss_total": 0.5667604207992554, + "step": 106799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.7039817571640015, + "learning_rate": 6.429992707797838e-05, + "loss": 0.5643, + "step": 106800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.099813938140869, + "loss_rtd": 0.3315562307834625, + "loss_sent": 0.15979455411434174, + "loss_sod": 0.040022898465394974, + "loss_total": 0.5313736796379089, + "step": 106899 + }, + { + "epoch": 0.021798, + "loss_gen": 4.756868362426758, + "loss_rtd": 0.2984583377838135, + "loss_sent": 0.11247531324625015, + "loss_sod": 0.11603482067584991, + "loss_total": 0.5269684791564941, + "step": 106899 + }, + { + "epoch": 0.0218, + "grad_norm": 0.9488338232040405, + "learning_rate": 6.42695164374441e-05, + "loss": 0.5866, + "step": 106900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.187564373016357, + "loss_rtd": 0.32474082708358765, + "loss_sent": 0.31143543124198914, + "loss_sod": 0.024499859660863876, + "loss_total": 0.660676121711731, + "step": 106999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.2301740646362305, + "loss_rtd": 0.3165074586868286, + "loss_sent": 0.09302461892366409, + "loss_sod": 0.07346178591251373, + "loss_total": 0.48299384117126465, + "step": 106999 + }, + { + "epoch": 0.022, + "grad_norm": 0.9418954253196716, + "learning_rate": 6.423910004915029e-05, + "loss": 0.5586, + "step": 107000 + }, + { + "epoch": 0.022, + "eval_loss": 0.5417913198471069, + "eval_runtime": 151.1415, + "eval_samples_per_second": 102.176, + "eval_steps_per_second": 0.801, + "step": 107000 + }, + { + "epoch": 0.022198, + "loss_gen": 4.9591755867004395, + "loss_rtd": 0.3148433566093445, + "loss_sent": 0.2994132936000824, + "loss_sod": 0.047530196607112885, + "loss_total": 0.6617868542671204, + "step": 107099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.875545978546143, + "loss_rtd": 0.3147640824317932, + "loss_sent": 0.4485037922859192, + "loss_sod": 0.042732372879981995, + "loss_total": 0.8060002326965332, + "step": 107099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.8302114009857178, + "learning_rate": 6.420867792534869e-05, + "loss": 0.5603, + "step": 107100 + }, + { + "epoch": 0.022398, + "loss_gen": 4.373366355895996, + "loss_rtd": 0.30908501148223877, + "loss_sent": 0.3430616855621338, + "loss_sod": 0.0285382941365242, + "loss_total": 0.6806849837303162, + "step": 107199 + }, + { + "epoch": 0.022398, + "loss_gen": 4.641295909881592, + "loss_rtd": 0.3117026388645172, + "loss_sent": 0.1315881460905075, + "loss_sod": 0.016390351578593254, + "loss_total": 0.4596811532974243, + "step": 107199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.9939401745796204, + "learning_rate": 6.417825007829331e-05, + "loss": 0.5817, + "step": 107200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.970827102661133, + "loss_rtd": 0.32075393199920654, + "loss_sent": 0.13476908206939697, + "loss_sod": 0.12782539427280426, + "loss_total": 0.5833483934402466, + "step": 107299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.014376163482666, + "loss_rtd": 0.30996695160865784, + "loss_sent": 0.11436322331428528, + "loss_sod": 0.05173637717962265, + "loss_total": 0.47606655955314636, + "step": 107299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.8865584135055542, + "learning_rate": 6.414781652024051e-05, + "loss": 0.5686, + "step": 107300 + }, + { + "epoch": 0.022798, + "loss_gen": 4.803501605987549, + "loss_rtd": 0.29663944244384766, + "loss_sent": 0.28819388151168823, + "loss_sod": 0.01116451807320118, + "loss_total": 0.5959978103637695, + "step": 107399 + }, + { + "epoch": 0.022798, + "loss_gen": 4.872408866882324, + "loss_rtd": 0.31238287687301636, + "loss_sent": 0.44345003366470337, + "loss_sod": 0.07661646604537964, + "loss_total": 0.8324493765830994, + "step": 107399 + }, + { + "epoch": 0.0228, + "grad_norm": 2.490213394165039, + "learning_rate": 6.411737726344888e-05, + "loss": 0.5807, + "step": 107400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.940255165100098, + "loss_rtd": 0.30571743845939636, + "loss_sent": 0.35065779089927673, + "loss_sod": 0.020769037306308746, + "loss_total": 0.6771442890167236, + "step": 107499 + }, + { + "epoch": 0.022998, + "loss_gen": 4.842052459716797, + "loss_rtd": 0.31405186653137207, + "loss_sent": 0.09119933098554611, + "loss_sod": 0.10954436659812927, + "loss_total": 0.5147955417633057, + "step": 107499 + }, + { + "epoch": 0.023, + "grad_norm": 0.7584764957427979, + "learning_rate": 6.408693232017942e-05, + "loss": 0.5705, + "step": 107500 + }, + { + "epoch": 0.023198, + "loss_gen": 3.9214885234832764, + "loss_rtd": 0.2983987033367157, + "loss_sent": 4.374084892333485e-05, + "loss_sod": 0.18205353617668152, + "loss_total": 0.48049598932266235, + "step": 107599 + }, + { + "epoch": 0.023198, + "loss_gen": 4.829421520233154, + "loss_rtd": 0.31019821763038635, + "loss_sent": 0.2678295969963074, + "loss_sod": 0.009797907434403896, + "loss_total": 0.5878257155418396, + "step": 107599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.0991127490997314, + "learning_rate": 6.405648170269527e-05, + "loss": 0.574, + "step": 107600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.199046611785889, + "loss_rtd": 0.30022376775741577, + "loss_sent": 0.17597688734531403, + "loss_sod": 0.07369022816419601, + "loss_total": 0.5498908758163452, + "step": 107699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.044209003448486, + "loss_rtd": 0.30875247716903687, + "loss_sent": 0.5088533759117126, + "loss_sod": 0.029024748131632805, + "loss_total": 0.8466305732727051, + "step": 107699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.7089482545852661, + "learning_rate": 6.402602542326198e-05, + "loss": 0.5752, + "step": 107700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.051883220672607, + "loss_rtd": 0.2870909869670868, + "loss_sent": 0.04724877327680588, + "loss_sod": 0.07817958295345306, + "loss_total": 0.41251933574676514, + "step": 107799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.491959571838379, + "loss_rtd": 0.28452175855636597, + "loss_sent": 0.06092541664838791, + "loss_sod": 0.140193372964859, + "loss_total": 0.4856405556201935, + "step": 107799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.6783625483512878, + "learning_rate": 6.399556349414733e-05, + "loss": 0.5684, + "step": 107800 + }, + { + "epoch": 0.023798, + "loss_gen": 4.9519362449646, + "loss_rtd": 0.31674841046333313, + "loss_sent": 0.22679011523723602, + "loss_sod": 0.06879362463951111, + "loss_total": 0.6123321652412415, + "step": 107899 + }, + { + "epoch": 0.023798, + "loss_gen": 4.219686985015869, + "loss_rtd": 0.3118826150894165, + "loss_sent": 0.01116969808936119, + "loss_sod": 0.07467565685510635, + "loss_total": 0.39772796630859375, + "step": 107899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.7484124302864075, + "learning_rate": 6.396509592762137e-05, + "loss": 0.5724, + "step": 107900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.6382670402526855, + "loss_rtd": 0.3116965889930725, + "loss_sent": 0.08824295550584793, + "loss_sod": 0.06433819979429245, + "loss_total": 0.4642777442932129, + "step": 107999 + }, + { + "epoch": 0.023998, + "loss_gen": 4.553746223449707, + "loss_rtd": 0.30597829818725586, + "loss_sent": 0.062276970595121384, + "loss_sod": 0.16350418329238892, + "loss_total": 0.5317594408988953, + "step": 107999 + }, + { + "epoch": 0.024, + "grad_norm": 0.9666557312011719, + "learning_rate": 6.393462273595644e-05, + "loss": 0.571, + "step": 108000 + }, + { + "epoch": 0.024, + "eval_loss": 0.5461033582687378, + "eval_runtime": 151.3137, + "eval_samples_per_second": 102.059, + "eval_steps_per_second": 0.8, + "step": 108000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.0266947746276855, + "loss_rtd": 0.29067620635032654, + "loss_sent": 0.1917770653963089, + "loss_sod": 0.01880338229238987, + "loss_total": 0.5012566447257996, + "step": 108099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.079265117645264, + "loss_rtd": 0.32476240396499634, + "loss_sent": 0.09011615067720413, + "loss_sod": 0.16986671090126038, + "loss_total": 0.5847452878952026, + "step": 108099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.7331221103668213, + "learning_rate": 6.390414393142716e-05, + "loss": 0.5669, + "step": 108100 + }, + { + "epoch": 0.024398, + "loss_gen": 4.8616862297058105, + "loss_rtd": 0.30674853920936584, + "loss_sent": 0.06683506071567535, + "loss_sod": 0.036662496626377106, + "loss_total": 0.4102460741996765, + "step": 108199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.296269416809082, + "loss_rtd": 0.31321123242378235, + "loss_sent": 0.1383715271949768, + "loss_sod": 0.11871857941150665, + "loss_total": 0.570301353931427, + "step": 108199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.399257779121399, + "learning_rate": 6.387365952631034e-05, + "loss": 0.5699, + "step": 108200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.698439121246338, + "loss_rtd": 0.3081118166446686, + "loss_sent": 0.12159612774848938, + "loss_sod": 0.041703373193740845, + "loss_total": 0.4714113175868988, + "step": 108299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.076000213623047, + "loss_rtd": 0.32107752561569214, + "loss_sent": 0.11131883412599564, + "loss_sod": 0.03802306577563286, + "loss_total": 0.47041940689086914, + "step": 108299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.8935823440551758, + "learning_rate": 6.384316953288514e-05, + "loss": 0.5625, + "step": 108300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.059536457061768, + "loss_rtd": 0.30067944526672363, + "loss_sent": 0.1602024883031845, + "loss_sod": 0.13888368010520935, + "loss_total": 0.5997655987739563, + "step": 108399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.356949329376221, + "loss_rtd": 0.3023897707462311, + "loss_sent": 6.415346433641389e-05, + "loss_sod": 0.08302780985832214, + "loss_total": 0.38548174500465393, + "step": 108399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.9004961848258972, + "learning_rate": 6.38126739634329e-05, + "loss": 0.5798, + "step": 108400 + }, + { + "epoch": 0.024998, + "loss_gen": 4.912137031555176, + "loss_rtd": 0.3300974369049072, + "loss_sent": 0.2669000029563904, + "loss_sod": 0.017181701958179474, + "loss_total": 0.6141791343688965, + "step": 108499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.501112937927246, + "loss_rtd": 0.31232425570487976, + "loss_sent": 0.0987105593085289, + "loss_sod": 0.0689978376030922, + "loss_total": 0.48003265261650085, + "step": 108499 + }, + { + "epoch": 0.025, + "grad_norm": 1.2480976581573486, + "learning_rate": 6.378217283023726e-05, + "loss": 0.5556, + "step": 108500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.071778774261475, + "loss_rtd": 0.30955711007118225, + "loss_sent": 0.315388023853302, + "loss_sod": 0.025845076888799667, + "loss_total": 0.6507902145385742, + "step": 108599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.081519603729248, + "loss_rtd": 0.33002594113349915, + "loss_sent": 0.24318280816078186, + "loss_sod": 0.07440317422151566, + "loss_total": 0.6476119160652161, + "step": 108599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.0340014696121216, + "learning_rate": 6.375166614558403e-05, + "loss": 0.5763, + "step": 108600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.8062615394592285, + "loss_rtd": 0.30479082465171814, + "loss_sent": 0.3667656183242798, + "loss_sod": 0.09619477391242981, + "loss_total": 0.7677512168884277, + "step": 108699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.325409889221191, + "loss_rtd": 0.30163633823394775, + "loss_sent": 0.1088394746184349, + "loss_sod": 0.07651998102664948, + "loss_total": 0.48699578642845154, + "step": 108699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.312751054763794, + "learning_rate": 6.372115392176132e-05, + "loss": 0.5545, + "step": 108700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.001461505889893, + "loss_rtd": 0.31182214617729187, + "loss_sent": 0.1901216357946396, + "loss_sod": 0.10808823257684708, + "loss_total": 0.6100320219993591, + "step": 108799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.421943187713623, + "loss_rtd": 0.3268759846687317, + "loss_sent": 0.18132103979587555, + "loss_sod": 0.029783664271235466, + "loss_total": 0.5379806756973267, + "step": 108799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.7836019396781921, + "learning_rate": 6.369063617105947e-05, + "loss": 0.5809, + "step": 108800 + }, + { + "epoch": 0.025798, + "loss_gen": 4.891840934753418, + "loss_rtd": 0.3302566111087799, + "loss_sent": 0.06812560558319092, + "loss_sod": 0.018494369462132454, + "loss_total": 0.41687658429145813, + "step": 108899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.9688568115234375, + "loss_rtd": 0.322144478559494, + "loss_sent": 0.1739887148141861, + "loss_sod": 0.11990626156330109, + "loss_total": 0.6160394549369812, + "step": 108899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.6998794674873352, + "learning_rate": 6.366011290577098e-05, + "loss": 0.5624, + "step": 108900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.213585376739502, + "loss_rtd": 0.3215108811855316, + "loss_sent": 0.11756500601768494, + "loss_sod": 0.20318147540092468, + "loss_total": 0.6422573328018188, + "step": 108999 + }, + { + "epoch": 0.025998, + "loss_gen": 4.674625396728516, + "loss_rtd": 0.3096647560596466, + "loss_sent": 0.12946784496307373, + "loss_sod": 0.04208453372120857, + "loss_total": 0.4812171459197998, + "step": 108999 + }, + { + "epoch": 0.026, + "grad_norm": 1.1656898260116577, + "learning_rate": 6.362958413819067e-05, + "loss": 0.5861, + "step": 109000 + }, + { + "epoch": 0.026, + "eval_loss": 0.5389401912689209, + "eval_runtime": 151.381, + "eval_samples_per_second": 102.014, + "eval_steps_per_second": 0.799, + "step": 109000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.878951072692871, + "loss_rtd": 0.3325100839138031, + "loss_sent": 0.1492864340543747, + "loss_sod": 0.06057453528046608, + "loss_total": 0.5423710346221924, + "step": 109099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.216949939727783, + "loss_rtd": 0.31064364314079285, + "loss_sent": 0.18343260884284973, + "loss_sod": 0.09501229971647263, + "loss_total": 0.5890885591506958, + "step": 109099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.1586878299713135, + "learning_rate": 6.359904988061548e-05, + "loss": 0.5564, + "step": 109100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.054717063903809, + "loss_rtd": 0.30129122734069824, + "loss_sent": 0.23554277420043945, + "loss_sod": 0.02523457258939743, + "loss_total": 0.5620685815811157, + "step": 109199 + }, + { + "epoch": 0.026398, + "loss_gen": 4.210378646850586, + "loss_rtd": 0.3084638714790344, + "loss_sent": 0.0177040733397007, + "loss_sod": 0.09642495959997177, + "loss_total": 0.4225929081439972, + "step": 109199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.7733477354049683, + "learning_rate": 6.356851014534464e-05, + "loss": 0.5741, + "step": 109200 + }, + { + "epoch": 0.026598, + "loss_gen": 4.9100871086120605, + "loss_rtd": 0.3333955705165863, + "loss_sent": 0.23439662158489227, + "loss_sod": 0.012360401451587677, + "loss_total": 0.5801525712013245, + "step": 109299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.150571823120117, + "loss_rtd": 0.33575183153152466, + "loss_sent": 0.2101443111896515, + "loss_sod": 0.10955361276865005, + "loss_total": 0.6554497480392456, + "step": 109299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.0756936073303223, + "learning_rate": 6.353796494467952e-05, + "loss": 0.5541, + "step": 109300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.095142841339111, + "loss_rtd": 0.30467578768730164, + "loss_sent": 0.4449251592159271, + "loss_sod": 0.011433375999331474, + "loss_total": 0.7610343098640442, + "step": 109399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.259122848510742, + "loss_rtd": 0.33221355080604553, + "loss_sent": 0.11076716333627701, + "loss_sod": 0.04559096321463585, + "loss_total": 0.4885716736316681, + "step": 109399 + }, + { + "epoch": 0.0268, + "grad_norm": 0.7708160281181335, + "learning_rate": 6.350741429092375e-05, + "loss": 0.5584, + "step": 109400 + }, + { + "epoch": 0.026998, + "loss_gen": 4.958425045013428, + "loss_rtd": 0.3213268220424652, + "loss_sent": 0.23420964181423187, + "loss_sod": 0.027131108567118645, + "loss_total": 0.5826675891876221, + "step": 109499 + }, + { + "epoch": 0.026998, + "loss_gen": 4.767280578613281, + "loss_rtd": 0.31559616327285767, + "loss_sent": 0.04378554970026016, + "loss_sod": 0.0120012778788805, + "loss_total": 0.3713829815387726, + "step": 109499 + }, + { + "epoch": 0.027, + "grad_norm": 0.6379171013832092, + "learning_rate": 6.347685819638313e-05, + "loss": 0.5836, + "step": 109500 + }, + { + "epoch": 0.027198, + "loss_gen": 4.930249214172363, + "loss_rtd": 0.2870037853717804, + "loss_sent": 0.15182381868362427, + "loss_sod": 0.09946365654468536, + "loss_total": 0.5382912755012512, + "step": 109599 + }, + { + "epoch": 0.027198, + "loss_gen": 4.758519649505615, + "loss_rtd": 0.31260746717453003, + "loss_sent": 0.1231798604130745, + "loss_sod": 0.04067111387848854, + "loss_total": 0.47645843029022217, + "step": 109599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.47101891040802, + "learning_rate": 6.344629667336563e-05, + "loss": 0.5544, + "step": 109600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.018040180206299, + "loss_rtd": 0.3060138523578644, + "loss_sent": 0.21603040397167206, + "loss_sod": 0.01728692650794983, + "loss_total": 0.5393311977386475, + "step": 109699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.17416524887085, + "loss_rtd": 0.30740025639533997, + "loss_sent": 0.21488739550113678, + "loss_sod": 0.16485324501991272, + "loss_total": 0.6871408820152283, + "step": 109699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.9327196478843689, + "learning_rate": 6.341572973418142e-05, + "loss": 0.5667, + "step": 109700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.905360698699951, + "loss_rtd": 0.30515894293785095, + "loss_sent": 0.07022082060575485, + "loss_sod": 0.028215806931257248, + "loss_total": 0.40359556674957275, + "step": 109799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.34921407699585, + "loss_rtd": 0.3019445836544037, + "loss_sent": 0.08953826874494553, + "loss_sod": 0.02431022748351097, + "loss_total": 0.4157930910587311, + "step": 109799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.6951218247413635, + "learning_rate": 6.33851573911429e-05, + "loss": 0.561, + "step": 109800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.802526473999023, + "loss_rtd": 0.29821139574050903, + "loss_sent": 0.23770183324813843, + "loss_sod": 0.06610836088657379, + "loss_total": 0.6020215749740601, + "step": 109899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.04287576675415, + "loss_rtd": 0.3214740455150604, + "loss_sent": 0.3329661786556244, + "loss_sod": 0.08987447619438171, + "loss_total": 0.7443146705627441, + "step": 109899 + }, + { + "epoch": 0.0278, + "grad_norm": 3.2666738033294678, + "learning_rate": 6.335457965656459e-05, + "loss": 0.5639, + "step": 109900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.238755226135254, + "loss_rtd": 0.3152182698249817, + "loss_sent": 0.31257209181785583, + "loss_sod": 0.03364112973213196, + "loss_total": 0.6614314913749695, + "step": 109999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.10002326965332, + "loss_rtd": 0.30614298582077026, + "loss_sent": 0.4742734432220459, + "loss_sod": 0.029593631625175476, + "loss_total": 0.8100100755691528, + "step": 109999 + }, + { + "epoch": 0.028, + "grad_norm": 2.491260290145874, + "learning_rate": 6.332399654276318e-05, + "loss": 0.5602, + "step": 110000 + }, + { + "epoch": 0.028, + "eval_loss": 0.5431951880455017, + "eval_runtime": 151.4848, + "eval_samples_per_second": 101.944, + "eval_steps_per_second": 0.799, + "step": 110000 + }, + { + "epoch": 0.028198, + "loss_gen": 4.500039577484131, + "loss_rtd": 0.298180490732193, + "loss_sent": 0.009449800476431847, + "loss_sod": 0.1921154111623764, + "loss_total": 0.4997456967830658, + "step": 110099 + }, + { + "epoch": 0.028198, + "loss_gen": 4.855717658996582, + "loss_rtd": 0.3043445944786072, + "loss_sent": 0.13195885717868805, + "loss_sod": 0.00961886253207922, + "loss_total": 0.445922315120697, + "step": 110099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.0164231061935425, + "learning_rate": 6.329340806205755e-05, + "loss": 0.5541, + "step": 110100 + }, + { + "epoch": 0.028398, + "loss_gen": 4.283051490783691, + "loss_rtd": 0.2776123285293579, + "loss_sent": 0.036811452358961105, + "loss_sod": 0.01744782365858555, + "loss_total": 0.3318715989589691, + "step": 110199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.16132116317749, + "loss_rtd": 0.3195195496082306, + "loss_sent": 0.2174578309059143, + "loss_sod": 0.043812330812215805, + "loss_total": 0.5807896852493286, + "step": 110199 + }, + { + "epoch": 0.0284, + "grad_norm": 0.746578574180603, + "learning_rate": 6.326281422676874e-05, + "loss": 0.5578, + "step": 110200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.145158767700195, + "loss_rtd": 0.32694098353385925, + "loss_sent": 0.2601774334907532, + "loss_sod": 0.08325707167387009, + "loss_total": 0.6703754663467407, + "step": 110299 + }, + { + "epoch": 0.028598, + "loss_gen": 4.930212497711182, + "loss_rtd": 0.3273719251155853, + "loss_sent": 0.05658649280667305, + "loss_sod": 0.15999355912208557, + "loss_total": 0.5439519882202148, + "step": 110299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.4284107685089111, + "learning_rate": 6.323221504921995e-05, + "loss": 0.5784, + "step": 110300 + }, + { + "epoch": 0.028798, + "loss_gen": 3.761293649673462, + "loss_rtd": 0.28234994411468506, + "loss_sent": 0.0022282814607024193, + "loss_sod": 0.23036682605743408, + "loss_total": 0.5149450302124023, + "step": 110399 + }, + { + "epoch": 0.028798, + "loss_gen": 4.16573429107666, + "loss_rtd": 0.29319512844085693, + "loss_sent": 0.009119627065956593, + "loss_sod": 0.20074167847633362, + "loss_total": 0.5030564069747925, + "step": 110399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.2292065620422363, + "learning_rate": 6.320161054173652e-05, + "loss": 0.5606, + "step": 110400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.747781276702881, + "loss_rtd": 0.3284401297569275, + "loss_sent": 0.26554733514785767, + "loss_sod": 0.05696718767285347, + "loss_total": 0.6509546637535095, + "step": 110499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.256872653961182, + "loss_rtd": 0.31553196907043457, + "loss_sent": 0.1591951549053192, + "loss_sod": 0.1461992859840393, + "loss_total": 0.6209263801574707, + "step": 110499 + }, + { + "epoch": 0.029, + "grad_norm": 0.9094834923744202, + "learning_rate": 6.317100071664595e-05, + "loss": 0.5614, + "step": 110500 + }, + { + "epoch": 0.029198, + "loss_gen": 3.910529851913452, + "loss_rtd": 0.2881653904914856, + "loss_sent": 0.0019547692500054836, + "loss_sod": 0.08033779263496399, + "loss_total": 0.3704579472541809, + "step": 110599 + }, + { + "epoch": 0.029198, + "loss_gen": 4.869881629943848, + "loss_rtd": 0.3074661195278168, + "loss_sent": 0.8043997883796692, + "loss_sod": 0.09549353271722794, + "loss_total": 1.2073594331741333, + "step": 110599 + }, + { + "epoch": 0.0292, + "grad_norm": 3.183957099914551, + "learning_rate": 6.314038558627787e-05, + "loss": 0.5579, + "step": 110600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.859715938568115, + "loss_rtd": 0.3090853691101074, + "loss_sent": 0.3077383041381836, + "loss_sod": 0.0855005756020546, + "loss_total": 0.7023242712020874, + "step": 110699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.484714508056641, + "loss_rtd": 0.3146698474884033, + "loss_sent": 0.013457296416163445, + "loss_sod": 0.07126327604055405, + "loss_total": 0.39939042925834656, + "step": 110699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.0308226346969604, + "learning_rate": 6.310976516296403e-05, + "loss": 0.565, + "step": 110700 + }, + { + "epoch": 0.029598, + "loss_gen": 4.919672966003418, + "loss_rtd": 0.32212698459625244, + "loss_sent": 0.0972585380077362, + "loss_sod": 0.021064667031168938, + "loss_total": 0.44045019149780273, + "step": 110799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.028139114379883, + "loss_rtd": 0.3183032274246216, + "loss_sent": 0.15866515040397644, + "loss_sod": 0.09258978813886642, + "loss_total": 0.5695581436157227, + "step": 110799 + }, + { + "epoch": 0.0296, + "grad_norm": 0.5978352427482605, + "learning_rate": 6.307913945903836e-05, + "loss": 0.5615, + "step": 110800 + }, + { + "epoch": 0.029798, + "loss_gen": 4.213213920593262, + "loss_rtd": 0.2699766755104065, + "loss_sent": 0.01592285744845867, + "loss_sod": 0.09502089023590088, + "loss_total": 0.38092041015625, + "step": 110899 + }, + { + "epoch": 0.029798, + "loss_gen": 4.7946553230285645, + "loss_rtd": 0.3044602572917938, + "loss_sent": 0.29190877079963684, + "loss_sod": 0.06971128284931183, + "loss_total": 0.6660803556442261, + "step": 110899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.3571891784667969, + "learning_rate": 6.304850848683688e-05, + "loss": 0.5483, + "step": 110900 + }, + { + "epoch": 0.029998, + "loss_gen": 4.958065986633301, + "loss_rtd": 0.30979248881340027, + "loss_sent": 0.29292795062065125, + "loss_sod": 0.033625528216362, + "loss_total": 0.6363459825515747, + "step": 110999 + }, + { + "epoch": 0.029998, + "loss_gen": 4.840777397155762, + "loss_rtd": 0.31712937355041504, + "loss_sent": 0.16160918772220612, + "loss_sod": 0.09780505299568176, + "loss_total": 0.5765436291694641, + "step": 110999 + }, + { + "epoch": 0.03, + "grad_norm": 1.1442328691482544, + "learning_rate": 6.301787225869774e-05, + "loss": 0.5567, + "step": 111000 + }, + { + "epoch": 0.03, + "eval_loss": 0.540738046169281, + "eval_runtime": 152.7321, + "eval_samples_per_second": 101.112, + "eval_steps_per_second": 0.792, + "step": 111000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.099303245544434, + "loss_rtd": 0.30958980321884155, + "loss_sent": 0.26804983615875244, + "loss_sod": 0.062308840453624725, + "loss_total": 0.6399484872817993, + "step": 111099 + }, + { + "epoch": 0.030198, + "loss_gen": 4.889252662658691, + "loss_rtd": 0.30952370166778564, + "loss_sent": 0.10719144344329834, + "loss_sod": 0.03891471400856972, + "loss_total": 0.4556298553943634, + "step": 111099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.7303214073181152, + "learning_rate": 6.298723078696121e-05, + "loss": 0.5622, + "step": 111100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.152919769287109, + "loss_rtd": 0.3233211040496826, + "loss_sent": 0.2871219217777252, + "loss_sod": 0.07788588851690292, + "loss_total": 0.6883289217948914, + "step": 111199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.525146961212158, + "loss_rtd": 0.3192431628704071, + "loss_sent": 0.11062529683113098, + "loss_sod": 0.12471377104520798, + "loss_total": 0.5545822381973267, + "step": 111199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.2109558582305908, + "learning_rate": 6.295658408396968e-05, + "loss": 0.5503, + "step": 111200 + }, + { + "epoch": 0.030598, + "loss_gen": 4.999670505523682, + "loss_rtd": 0.3378047049045563, + "loss_sent": 0.269733726978302, + "loss_sod": 0.03210064768791199, + "loss_total": 0.6396390795707703, + "step": 111299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.20380163192749, + "loss_rtd": 0.29996007680892944, + "loss_sent": 0.2559049129486084, + "loss_sod": 0.06889547407627106, + "loss_total": 0.6247604489326477, + "step": 111299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.7519530057907104, + "learning_rate": 6.292593216206761e-05, + "loss": 0.5525, + "step": 111300 + }, + { + "epoch": 0.030798, + "loss_gen": 4.638796329498291, + "loss_rtd": 0.29377856850624084, + "loss_sent": 0.019329413771629333, + "loss_sod": 0.1504117250442505, + "loss_total": 0.4635196924209595, + "step": 111399 + }, + { + "epoch": 0.030798, + "loss_gen": 4.285656929016113, + "loss_rtd": 0.3159097731113434, + "loss_sent": 4.849431570619345e-05, + "loss_sod": 0.1864272654056549, + "loss_total": 0.5023855566978455, + "step": 111399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.7244387865066528, + "learning_rate": 6.289527503360162e-05, + "loss": 0.5685, + "step": 111400 + }, + { + "epoch": 0.030998, + "loss_gen": 4.955970287322998, + "loss_rtd": 0.3145253360271454, + "loss_sent": 0.11424721032381058, + "loss_sod": 0.05316298082470894, + "loss_total": 0.4819355309009552, + "step": 111499 + }, + { + "epoch": 0.030998, + "loss_gen": 4.991325378417969, + "loss_rtd": 0.31193283200263977, + "loss_sent": 0.24776963889598846, + "loss_sod": 0.08382590860128403, + "loss_total": 0.643528401851654, + "step": 111499 + }, + { + "epoch": 0.031, + "grad_norm": 1.2693592309951782, + "learning_rate": 6.28646127109204e-05, + "loss": 0.562, + "step": 111500 + }, + { + "epoch": 0.031198, + "loss_gen": 4.383149147033691, + "loss_rtd": 0.30271032452583313, + "loss_sent": 0.0012748053995892406, + "loss_sod": 0.08654949069023132, + "loss_total": 0.3905346095561981, + "step": 111599 + }, + { + "epoch": 0.031198, + "loss_gen": 4.844264507293701, + "loss_rtd": 0.3073599338531494, + "loss_sent": 0.10096272081136703, + "loss_sod": 0.01730138435959816, + "loss_total": 0.4256240427494049, + "step": 111599 + }, + { + "epoch": 0.0312, + "grad_norm": 0.8001429438591003, + "learning_rate": 6.283394520637472e-05, + "loss": 0.5531, + "step": 111600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.880539894104004, + "loss_rtd": 0.3117526173591614, + "loss_sent": 0.2095177322626114, + "loss_sod": 0.10011793673038483, + "loss_total": 0.6213882565498352, + "step": 111699 + }, + { + "epoch": 0.031398, + "loss_gen": 4.566366195678711, + "loss_rtd": 0.294606477022171, + "loss_sent": 0.12573687732219696, + "loss_sod": 0.020580457523465157, + "loss_total": 0.440923810005188, + "step": 111699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.7626075744628906, + "learning_rate": 6.280327253231743e-05, + "loss": 0.5586, + "step": 111700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.112703323364258, + "loss_rtd": 0.2861131727695465, + "loss_sent": 0.3115231692790985, + "loss_sod": 0.07299712300300598, + "loss_total": 0.6706334352493286, + "step": 111799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.371541500091553, + "loss_rtd": 0.313865602016449, + "loss_sent": 0.24734216928482056, + "loss_sod": 0.04733390361070633, + "loss_total": 0.6085416674613953, + "step": 111799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.8598159551620483, + "learning_rate": 6.277259470110351e-05, + "loss": 0.5561, + "step": 111800 + }, + { + "epoch": 0.031798, + "loss_gen": 4.814857482910156, + "loss_rtd": 0.29830074310302734, + "loss_sent": 0.10594409704208374, + "loss_sod": 0.10980981588363647, + "loss_total": 0.5140546560287476, + "step": 111899 + }, + { + "epoch": 0.031798, + "loss_gen": 4.281027793884277, + "loss_rtd": 0.29203304648399353, + "loss_sent": 0.3097190260887146, + "loss_sod": 0.075651615858078, + "loss_total": 0.6774036884307861, + "step": 111899 + }, + { + "epoch": 0.0318, + "grad_norm": 1.5515258312225342, + "learning_rate": 6.274191172508996e-05, + "loss": 0.5396, + "step": 111900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.902201175689697, + "loss_rtd": 0.3031350076198578, + "loss_sent": 0.0972568541765213, + "loss_sod": 0.03203858807682991, + "loss_total": 0.4324304461479187, + "step": 111999 + }, + { + "epoch": 0.031998, + "loss_gen": 4.321255683898926, + "loss_rtd": 0.29954472184181213, + "loss_sent": 0.00010922457295237109, + "loss_sod": 0.12528063356876373, + "loss_total": 0.42493459582328796, + "step": 111999 + }, + { + "epoch": 0.032, + "grad_norm": 0.8171465396881104, + "learning_rate": 6.271122361663589e-05, + "loss": 0.5774, + "step": 112000 + }, + { + "epoch": 0.032, + "eval_loss": 0.5389299392700195, + "eval_runtime": 151.411, + "eval_samples_per_second": 101.994, + "eval_steps_per_second": 0.799, + "step": 112000 + }, + { + "epoch": 0.032198, + "loss_gen": 5.150925636291504, + "loss_rtd": 0.318128377199173, + "loss_sent": 0.16503667831420898, + "loss_sod": 0.12780560553073883, + "loss_total": 0.610970675945282, + "step": 112099 + }, + { + "epoch": 0.032198, + "loss_gen": 4.960903167724609, + "loss_rtd": 0.29868510365486145, + "loss_sent": 0.15749751031398773, + "loss_sod": 0.04956481605768204, + "loss_total": 0.5057474374771118, + "step": 112099 + }, + { + "epoch": 0.0322, + "grad_norm": 0.807860791683197, + "learning_rate": 6.268053038810247e-05, + "loss": 0.5439, + "step": 112100 + }, + { + "epoch": 0.032398, + "loss_gen": 4.996148109436035, + "loss_rtd": 0.32346105575561523, + "loss_sent": 0.11034146696329117, + "loss_sod": 0.12658926844596863, + "loss_total": 0.5603917837142944, + "step": 112199 + }, + { + "epoch": 0.032398, + "loss_gen": 4.908397197723389, + "loss_rtd": 0.2976805865764618, + "loss_sent": 0.035902105271816254, + "loss_sod": 0.16037225723266602, + "loss_total": 0.49395495653152466, + "step": 112199 + }, + { + "epoch": 0.0324, + "grad_norm": 1.1983587741851807, + "learning_rate": 6.264983205185294e-05, + "loss": 0.5619, + "step": 112200 + }, + { + "epoch": 0.032598, + "loss_gen": 4.789726734161377, + "loss_rtd": 0.31488755345344543, + "loss_sent": 0.10511964559555054, + "loss_sod": 0.11555609107017517, + "loss_total": 0.5355632901191711, + "step": 112299 + }, + { + "epoch": 0.032598, + "loss_gen": 4.668126106262207, + "loss_rtd": 0.3064815104007721, + "loss_sent": 0.006027332507073879, + "loss_sod": 0.18640363216400146, + "loss_total": 0.4989124834537506, + "step": 112299 + }, + { + "epoch": 0.0326, + "grad_norm": 0.7658260464668274, + "learning_rate": 6.261912862025256e-05, + "loss": 0.565, + "step": 112300 + }, + { + "epoch": 0.032798, + "loss_gen": 5.2496418952941895, + "loss_rtd": 0.31157806515693665, + "loss_sent": 0.06531276553869247, + "loss_sod": 0.09153740108013153, + "loss_total": 0.46842822432518005, + "step": 112399 + }, + { + "epoch": 0.032798, + "loss_gen": 4.798717975616455, + "loss_rtd": 0.32129183411598206, + "loss_sent": 0.09239664673805237, + "loss_sod": 0.027722157537937164, + "loss_total": 0.441410630941391, + "step": 112399 + }, + { + "epoch": 0.0328, + "grad_norm": 0.933722734451294, + "learning_rate": 6.258842010566868e-05, + "loss": 0.5707, + "step": 112400 + }, + { + "epoch": 0.032998, + "loss_gen": 3.718125104904175, + "loss_rtd": 0.2656061053276062, + "loss_sent": 0.006414363626390696, + "loss_sod": 0.07887257635593414, + "loss_total": 0.3508930206298828, + "step": 112499 + }, + { + "epoch": 0.032998, + "loss_gen": 4.555234909057617, + "loss_rtd": 0.31273484230041504, + "loss_sent": 0.2504809498786926, + "loss_sod": 0.006745964288711548, + "loss_total": 0.5699617862701416, + "step": 112499 + }, + { + "epoch": 0.033, + "grad_norm": 0.8214619755744934, + "learning_rate": 6.255770652047069e-05, + "loss": 0.5472, + "step": 112500 + }, + { + "epoch": 0.033198, + "loss_gen": 3.9582273960113525, + "loss_rtd": 0.2800060510635376, + "loss_sent": 7.444770017173141e-05, + "loss_sod": 0.1791847199201584, + "loss_total": 0.45926523208618164, + "step": 112599 + }, + { + "epoch": 0.033198, + "loss_gen": 4.763845920562744, + "loss_rtd": 0.30309340357780457, + "loss_sent": 0.12869341671466827, + "loss_sod": 0.10470730811357498, + "loss_total": 0.5364941358566284, + "step": 112599 + }, + { + "epoch": 0.0332, + "grad_norm": 1.3438313007354736, + "learning_rate": 6.252698787703002e-05, + "loss": 0.5455, + "step": 112600 + }, + { + "epoch": 0.033398, + "loss_gen": 4.98195219039917, + "loss_rtd": 0.3181473910808563, + "loss_sent": 0.35770562291145325, + "loss_sod": 0.09960955381393433, + "loss_total": 0.7754625678062439, + "step": 112699 + }, + { + "epoch": 0.033398, + "loss_gen": 5.52577018737793, + "loss_rtd": 0.3122628629207611, + "loss_sent": 0.1191747710108757, + "loss_sod": 0.04338350147008896, + "loss_total": 0.47482115030288696, + "step": 112699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.301356554031372, + "learning_rate": 6.249626418772013e-05, + "loss": 0.5689, + "step": 112700 + }, + { + "epoch": 0.033598, + "loss_gen": 4.864354610443115, + "loss_rtd": 0.3088129758834839, + "loss_sent": 0.173867866396904, + "loss_sod": 0.045669399201869965, + "loss_total": 0.5283502340316772, + "step": 112799 + }, + { + "epoch": 0.033598, + "loss_gen": 4.882724761962891, + "loss_rtd": 0.3042221963405609, + "loss_sent": 0.09302534908056259, + "loss_sod": 0.007613973692059517, + "loss_total": 0.40486153960227966, + "step": 112799 + }, + { + "epoch": 0.0336, + "grad_norm": 0.7351897358894348, + "learning_rate": 6.24655354649165e-05, + "loss": 0.5701, + "step": 112800 + }, + { + "epoch": 0.033798, + "loss_gen": 5.298699855804443, + "loss_rtd": 0.30309218168258667, + "loss_sent": 0.1564468890428543, + "loss_sod": 0.06765016913414001, + "loss_total": 0.5271892547607422, + "step": 112899 + }, + { + "epoch": 0.033798, + "loss_gen": 4.842959880828857, + "loss_rtd": 0.299211323261261, + "loss_sent": 0.34119969606399536, + "loss_sod": 0.015285290777683258, + "loss_total": 0.6556962728500366, + "step": 112899 + }, + { + "epoch": 0.0338, + "grad_norm": 1.1486371755599976, + "learning_rate": 6.24348017209967e-05, + "loss": 0.5554, + "step": 112900 + }, + { + "epoch": 0.033998, + "loss_gen": 5.233341693878174, + "loss_rtd": 0.3070437014102936, + "loss_sent": 0.08728650212287903, + "loss_sod": 0.06658341735601425, + "loss_total": 0.46091362833976746, + "step": 112999 + }, + { + "epoch": 0.033998, + "loss_gen": 4.29036808013916, + "loss_rtd": 0.28697866201400757, + "loss_sent": 0.026916533708572388, + "loss_sod": 0.03943333402276039, + "loss_total": 0.35332852602005005, + "step": 112999 + }, + { + "epoch": 0.034, + "grad_norm": 0.8945763111114502, + "learning_rate": 6.240406296834024e-05, + "loss": 0.5482, + "step": 113000 + }, + { + "epoch": 0.034, + "eval_loss": 0.528028130531311, + "eval_runtime": 151.2941, + "eval_samples_per_second": 102.073, + "eval_steps_per_second": 0.8, + "step": 113000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.800609588623047, + "loss_rtd": 0.3146374821662903, + "loss_sent": 0.05099153518676758, + "loss_sod": 0.02965501882135868, + "loss_total": 0.3952840268611908, + "step": 113099 + }, + { + "epoch": 0.000198, + "loss_gen": 4.137120723724365, + "loss_rtd": 0.28727754950523376, + "loss_sent": 0.022033551707863808, + "loss_sod": 0.05693252012133598, + "loss_total": 0.3662436008453369, + "step": 113099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.5575613379478455, + "learning_rate": 6.23733192193287e-05, + "loss": 0.541, + "step": 113100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.011971473693848, + "loss_rtd": 0.29916349053382874, + "loss_sent": 0.14115023612976074, + "loss_sod": 0.015926435589790344, + "loss_total": 0.456240177154541, + "step": 113199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.719551086425781, + "loss_rtd": 0.3118532598018646, + "loss_sent": 0.20628313720226288, + "loss_sod": 0.03100305050611496, + "loss_total": 0.5491394400596619, + "step": 113199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.85649573802948, + "learning_rate": 6.234257048634566e-05, + "loss": 0.5686, + "step": 113200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.200534820556641, + "loss_rtd": 0.3089621067047119, + "loss_sent": 0.21121861040592194, + "loss_sod": 0.07619243115186691, + "loss_total": 0.5963731408119202, + "step": 113299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.303565979003906, + "loss_rtd": 0.31125354766845703, + "loss_sent": 0.07995057851076126, + "loss_sod": 0.15613064169883728, + "loss_total": 0.5473347902297974, + "step": 113299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.0480402708053589, + "learning_rate": 6.231181678177671e-05, + "loss": 0.55, + "step": 113300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.183351993560791, + "loss_rtd": 0.2881074845790863, + "loss_sent": 0.24594196677207947, + "loss_sod": 0.09231461584568024, + "loss_total": 0.6263641119003296, + "step": 113399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.683897495269775, + "loss_rtd": 0.3044544756412506, + "loss_sent": 0.19104251265525818, + "loss_sod": 0.02554282546043396, + "loss_total": 0.5210398435592651, + "step": 113399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.3088388442993164, + "learning_rate": 6.228105811800942e-05, + "loss": 0.5593, + "step": 113400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.025886058807373, + "loss_rtd": 0.3033176362514496, + "loss_sent": 0.29587307572364807, + "loss_sod": 0.08863915503025055, + "loss_total": 0.687829852104187, + "step": 113499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.1141157150268555, + "loss_rtd": 0.3167629539966583, + "loss_sent": 0.18045826256275177, + "loss_sod": 0.02749168500304222, + "loss_total": 0.5247129201889038, + "step": 113499 + }, + { + "epoch": 0.001, + "grad_norm": 1.1944942474365234, + "learning_rate": 6.225029450743341e-05, + "loss": 0.5393, + "step": 113500 + }, + { + "epoch": 0.001198, + "loss_gen": 4.8736982345581055, + "loss_rtd": 0.28981536626815796, + "loss_sent": 0.18818746507167816, + "loss_sod": 0.059310294687747955, + "loss_total": 0.5373131036758423, + "step": 113599 + }, + { + "epoch": 0.001198, + "loss_gen": 4.904609680175781, + "loss_rtd": 0.3106389045715332, + "loss_sent": 0.07906059920787811, + "loss_sod": 0.08784067630767822, + "loss_total": 0.47754019498825073, + "step": 113599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.537190556526184, + "learning_rate": 6.221952596244022e-05, + "loss": 0.5567, + "step": 113600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.23633337020874, + "loss_rtd": 0.2887208163738251, + "loss_sent": 0.007274389732629061, + "loss_sod": 0.179124116897583, + "loss_total": 0.47511932253837585, + "step": 113699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.34153938293457, + "loss_rtd": 0.3088279366493225, + "loss_sent": 0.17111530900001526, + "loss_sod": 0.07337859272956848, + "loss_total": 0.5533218383789062, + "step": 113699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.2073296308517456, + "learning_rate": 6.218875249542343e-05, + "loss": 0.5628, + "step": 113700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.831704616546631, + "loss_rtd": 0.31238698959350586, + "loss_sent": 0.22992931306362152, + "loss_sod": 0.08314387500286102, + "loss_total": 0.6254602074623108, + "step": 113799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.000875949859619, + "loss_rtd": 0.3161376118659973, + "loss_sent": 0.6010262370109558, + "loss_sod": 0.08366889506578445, + "loss_total": 1.0008327960968018, + "step": 113799 + }, + { + "epoch": 0.0016, + "grad_norm": 2.048090934753418, + "learning_rate": 6.215797411877862e-05, + "loss": 0.5543, + "step": 113800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.704843521118164, + "loss_rtd": 0.2941146194934845, + "loss_sent": 0.04738251864910126, + "loss_sod": 0.020208284258842468, + "loss_total": 0.3617054224014282, + "step": 113899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.110398292541504, + "loss_rtd": 0.3091120421886444, + "loss_sent": 0.4693821668624878, + "loss_sod": 0.10883425176143646, + "loss_total": 0.8873284459114075, + "step": 113899 + }, + { + "epoch": 0.0018, + "grad_norm": 2.1799964904785156, + "learning_rate": 6.212719084490326e-05, + "loss": 0.5514, + "step": 113900 + }, + { + "epoch": 0.001998, + "loss_gen": 4.731112003326416, + "loss_rtd": 0.2894507348537445, + "loss_sent": 0.19872452318668365, + "loss_sod": 0.04754795879125595, + "loss_total": 0.5357232093811035, + "step": 113999 + }, + { + "epoch": 0.001998, + "loss_gen": 4.918564796447754, + "loss_rtd": 0.32017087936401367, + "loss_sent": 0.4357863962650299, + "loss_sod": 0.017332345247268677, + "loss_total": 0.7732896208763123, + "step": 113999 + }, + { + "epoch": 0.002, + "grad_norm": 1.3521355390548706, + "learning_rate": 6.20964026861969e-05, + "loss": 0.5652, + "step": 114000 + }, + { + "epoch": 0.002, + "eval_loss": 0.5385438799858093, + "eval_runtime": 153.5069, + "eval_samples_per_second": 100.601, + "eval_steps_per_second": 0.788, + "step": 114000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.050449848175049, + "loss_rtd": 0.3129706084728241, + "loss_sent": 0.256864070892334, + "loss_sod": 0.022321533411741257, + "loss_total": 0.592156171798706, + "step": 114099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.117237567901611, + "loss_rtd": 0.31384947896003723, + "loss_sent": 0.3733851909637451, + "loss_sod": 0.1366913765668869, + "loss_total": 0.8239260315895081, + "step": 114099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.153089165687561, + "learning_rate": 6.206560965506097e-05, + "loss": 0.55, + "step": 114100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.886935710906982, + "loss_rtd": 0.30306994915008545, + "loss_sent": 0.1534023880958557, + "loss_sod": 0.05112428963184357, + "loss_total": 0.5075966119766235, + "step": 114199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.999471664428711, + "loss_rtd": 0.309175044298172, + "loss_sent": 0.09727267175912857, + "loss_sod": 0.06468882411718369, + "loss_total": 0.47113654017448425, + "step": 114199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.8409729599952698, + "learning_rate": 6.203481176389892e-05, + "loss": 0.5552, + "step": 114200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.151288032531738, + "loss_rtd": 0.31213319301605225, + "loss_sent": 0.0629645511507988, + "loss_sod": 0.10300157964229584, + "loss_total": 0.47809934616088867, + "step": 114299 + }, + { + "epoch": 0.002598, + "loss_gen": 4.788854122161865, + "loss_rtd": 0.28913843631744385, + "loss_sent": 0.13088183104991913, + "loss_sod": 0.0693574994802475, + "loss_total": 0.48937779664993286, + "step": 114299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.7904286980628967, + "learning_rate": 6.200400902511612e-05, + "loss": 0.5548, + "step": 114300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.06631326675415, + "loss_rtd": 0.2967974841594696, + "loss_sent": 0.24668163061141968, + "loss_sod": 0.03065936453640461, + "loss_total": 0.5741385221481323, + "step": 114399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.099505424499512, + "loss_rtd": 0.3112536072731018, + "loss_sent": 0.358215868473053, + "loss_sod": 0.03907562047243118, + "loss_total": 0.7085450887680054, + "step": 114399 + }, + { + "epoch": 0.0028, + "grad_norm": 2.1623480319976807, + "learning_rate": 6.197320145111993e-05, + "loss": 0.5565, + "step": 114400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.291693687438965, + "loss_rtd": 0.303223580121994, + "loss_sent": 0.11208082735538483, + "loss_sod": 0.21081554889678955, + "loss_total": 0.6261199712753296, + "step": 114499 + }, + { + "epoch": 0.002998, + "loss_gen": 4.941003322601318, + "loss_rtd": 0.3099227249622345, + "loss_sent": 0.30689841508865356, + "loss_sod": 0.010753561742603779, + "loss_total": 0.6275746822357178, + "step": 114499 + }, + { + "epoch": 0.003, + "grad_norm": 1.994731068611145, + "learning_rate": 6.194238905431963e-05, + "loss": 0.5536, + "step": 114500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.026473522186279, + "loss_rtd": 0.29253116250038147, + "loss_sent": 0.03600431978702545, + "loss_sod": 0.1048341616988182, + "loss_total": 0.43336963653564453, + "step": 114599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.14656925201416, + "loss_rtd": 0.30833864212036133, + "loss_sent": 0.07989537715911865, + "loss_sod": 0.06955140084028244, + "loss_total": 0.457785427570343, + "step": 114599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.9737004637718201, + "learning_rate": 6.191157184712644e-05, + "loss": 0.5342, + "step": 114600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.6400556564331055, + "loss_rtd": 0.323696106672287, + "loss_sent": 0.1208534985780716, + "loss_sod": 0.024927394464612007, + "loss_total": 0.46947699785232544, + "step": 114699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.89691162109375, + "loss_rtd": 0.315727174282074, + "loss_sent": 0.027605533599853516, + "loss_sod": 0.2120855748653412, + "loss_total": 0.5554182529449463, + "step": 114699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.1237143278121948, + "learning_rate": 6.188074984195353e-05, + "loss": 0.551, + "step": 114700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.290353775024414, + "loss_rtd": 0.2860538959503174, + "loss_sent": 4.6586945245508105e-05, + "loss_sod": 0.08435927331447601, + "loss_total": 0.37045976519584656, + "step": 114799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.766724109649658, + "loss_rtd": 0.2988499104976654, + "loss_sent": 0.10280866175889969, + "loss_sod": 0.07156020402908325, + "loss_total": 0.47321876883506775, + "step": 114799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.7292584776878357, + "learning_rate": 6.184992305121601e-05, + "loss": 0.5591, + "step": 114800 + }, + { + "epoch": 0.003798, + "loss_gen": 4.8388991355896, + "loss_rtd": 0.3185732066631317, + "loss_sent": 0.14404888451099396, + "loss_sod": 0.10201037675142288, + "loss_total": 0.5646324753761292, + "step": 114899 + }, + { + "epoch": 0.003798, + "loss_gen": 3.8639140129089355, + "loss_rtd": 0.30034103989601135, + "loss_sent": 0.0003229479189030826, + "loss_sod": 0.1416824460029602, + "loss_total": 0.4423464238643646, + "step": 114899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.0658996105194092, + "learning_rate": 6.181909148733092e-05, + "loss": 0.5491, + "step": 114900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.2712602615356445, + "loss_rtd": 0.28642332553863525, + "loss_sent": 0.15423594415187836, + "loss_sod": 0.10140690207481384, + "loss_total": 0.5420661568641663, + "step": 114999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.609872817993164, + "loss_rtd": 0.31023040413856506, + "loss_sent": 0.05131329968571663, + "loss_sod": 0.0744519978761673, + "loss_total": 0.4359957277774811, + "step": 114999 + }, + { + "epoch": 0.004, + "grad_norm": 1.00832200050354, + "learning_rate": 6.178825516271715e-05, + "loss": 0.5413, + "step": 115000 + }, + { + "epoch": 0.004, + "eval_loss": 0.5329810380935669, + "eval_runtime": 150.7093, + "eval_samples_per_second": 102.469, + "eval_steps_per_second": 0.803, + "step": 115000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.131319046020508, + "loss_rtd": 0.3135809600353241, + "loss_sent": 0.2427971512079239, + "loss_sod": 0.034553416073322296, + "loss_total": 0.5909315347671509, + "step": 115099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.176270961761475, + "loss_rtd": 0.31237703561782837, + "loss_sent": 0.2483011931180954, + "loss_sod": 0.07424858212471008, + "loss_total": 0.6349267959594727, + "step": 115099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.304195523262024, + "learning_rate": 6.175741408979565e-05, + "loss": 0.5562, + "step": 115100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.37276554107666, + "loss_rtd": 0.30123811960220337, + "loss_sent": 0.32682904601097107, + "loss_sod": 0.0769878551363945, + "loss_total": 0.7050549983978271, + "step": 115199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.281090259552002, + "loss_rtd": 0.30877524614334106, + "loss_sent": 0.1706777960062027, + "loss_sod": 0.07811566442251205, + "loss_total": 0.5575687289237976, + "step": 115199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.9951972961425781, + "learning_rate": 6.172656828098915e-05, + "loss": 0.5743, + "step": 115200 + }, + { + "epoch": 0.004598, + "loss_gen": 4.0622968673706055, + "loss_rtd": 0.28530532121658325, + "loss_sent": 0.14061227440834045, + "loss_sod": 0.012159768491983414, + "loss_total": 0.4380773603916168, + "step": 115299 + }, + { + "epoch": 0.004598, + "loss_gen": 4.939615249633789, + "loss_rtd": 0.3039166033267975, + "loss_sent": 0.3648511469364166, + "loss_sod": 0.11913774907588959, + "loss_total": 0.7879054546356201, + "step": 115299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.1858198642730713, + "learning_rate": 6.169571774872234e-05, + "loss": 0.5552, + "step": 115300 + }, + { + "epoch": 0.004798, + "loss_gen": 4.055812358856201, + "loss_rtd": 0.25892534852027893, + "loss_sent": 0.028526559472084045, + "loss_sod": 0.11896795779466629, + "loss_total": 0.40641987323760986, + "step": 115399 + }, + { + "epoch": 0.004798, + "loss_gen": 4.453707695007324, + "loss_rtd": 0.3138894736766815, + "loss_sent": 4.394640927785076e-05, + "loss_sod": 0.17471273243427277, + "loss_total": 0.48864617943763733, + "step": 115399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.8378913402557373, + "learning_rate": 6.166486250542182e-05, + "loss": 0.5437, + "step": 115400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.018705368041992, + "loss_rtd": 0.29178404808044434, + "loss_sent": 0.5226912498474121, + "loss_sod": 0.04983864724636078, + "loss_total": 0.8643139600753784, + "step": 115499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.1382646560668945, + "loss_rtd": 0.32372385263442993, + "loss_sent": 0.07393720000982285, + "loss_sod": 0.0608237162232399, + "loss_total": 0.4584847688674927, + "step": 115499 + }, + { + "epoch": 0.005, + "grad_norm": 1.0321120023727417, + "learning_rate": 6.163400256351608e-05, + "loss": 0.5564, + "step": 115500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.459597110748291, + "loss_rtd": 0.3049749732017517, + "loss_sent": 0.3579116463661194, + "loss_sod": 0.07099738717079163, + "loss_total": 0.7338839769363403, + "step": 115599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.500686168670654, + "loss_rtd": 0.3075568377971649, + "loss_sent": 0.09604629874229431, + "loss_sod": 0.03514803946018219, + "loss_total": 0.4387511909008026, + "step": 115599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.8851786851882935, + "learning_rate": 6.16031379354355e-05, + "loss": 0.5562, + "step": 115600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.096866607666016, + "loss_rtd": 0.28537148237228394, + "loss_sent": 3.223814201191999e-05, + "loss_sod": 0.10718190670013428, + "loss_total": 0.3925856351852417, + "step": 115699 + }, + { + "epoch": 0.005398, + "loss_gen": 4.263040065765381, + "loss_rtd": 0.2905530631542206, + "loss_sent": 0.053427621722221375, + "loss_sod": 0.03622889891266823, + "loss_total": 0.3802095651626587, + "step": 115699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.5757880210876465, + "learning_rate": 6.157226863361236e-05, + "loss": 0.5483, + "step": 115700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.071784973144531, + "loss_rtd": 0.3027852773666382, + "loss_sent": 0.26040104031562805, + "loss_sod": 0.012592491693794727, + "loss_total": 0.5757788419723511, + "step": 115799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.007861614227295, + "loss_rtd": 0.31663382053375244, + "loss_sent": 0.2857656478881836, + "loss_sod": 0.029870562255382538, + "loss_total": 0.6322700381278992, + "step": 115799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.4296653270721436, + "learning_rate": 6.154139467048077e-05, + "loss": 0.551, + "step": 115800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.9681010246276855, + "loss_rtd": 0.31963518261909485, + "loss_sent": 0.377085417509079, + "loss_sod": 0.0771680474281311, + "loss_total": 0.7738886475563049, + "step": 115899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.173583030700684, + "loss_rtd": 0.3045327067375183, + "loss_sent": 0.17333577573299408, + "loss_sod": 0.12360046803951263, + "loss_total": 0.6014689803123474, + "step": 115899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.1803321838378906, + "learning_rate": 6.151051605847681e-05, + "loss": 0.5691, + "step": 115900 + }, + { + "epoch": 0.005998, + "loss_gen": 4.760784149169922, + "loss_rtd": 0.30599844455718994, + "loss_sent": 0.26939308643341064, + "loss_sod": 0.12544703483581543, + "loss_total": 0.700838565826416, + "step": 115999 + }, + { + "epoch": 0.005998, + "loss_gen": 4.810731887817383, + "loss_rtd": 0.3135074973106384, + "loss_sent": 0.2628597617149353, + "loss_sod": 0.02120266482234001, + "loss_total": 0.5975699424743652, + "step": 115999 + }, + { + "epoch": 0.006, + "grad_norm": 1.2998814582824707, + "learning_rate": 6.147963281003835e-05, + "loss": 0.5536, + "step": 116000 + }, + { + "epoch": 0.006, + "eval_loss": 0.5306083559989929, + "eval_runtime": 150.8686, + "eval_samples_per_second": 102.361, + "eval_steps_per_second": 0.802, + "step": 116000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.276411533355713, + "loss_rtd": 0.30166155099868774, + "loss_sent": 4.164973506703973e-05, + "loss_sod": 0.09485423564910889, + "loss_total": 0.39655745029449463, + "step": 116099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.135272026062012, + "loss_rtd": 0.28391721844673157, + "loss_sent": 0.00020394708553794771, + "loss_sod": 0.30215784907341003, + "loss_total": 0.586279034614563, + "step": 116099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.0701336860656738, + "learning_rate": 6.144874493760517e-05, + "loss": 0.5651, + "step": 116100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.803519248962402, + "loss_rtd": 0.3124741017818451, + "loss_sent": 0.28700754046440125, + "loss_sod": 0.05058220773935318, + "loss_total": 0.6500638723373413, + "step": 116199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.236623287200928, + "loss_rtd": 0.3101903796195984, + "loss_sent": 0.1815764456987381, + "loss_sod": 0.056037433445453644, + "loss_total": 0.5478042364120483, + "step": 116199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.485912561416626, + "learning_rate": 6.141785245361891e-05, + "loss": 0.5374, + "step": 116200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.382851600646973, + "loss_rtd": 0.3070049285888672, + "loss_sent": 0.2606680691242218, + "loss_sod": 0.024468395859003067, + "loss_total": 0.5921413898468018, + "step": 116299 + }, + { + "epoch": 0.006598, + "loss_gen": 4.9794721603393555, + "loss_rtd": 0.3133496642112732, + "loss_sent": 0.3018931448459625, + "loss_sod": 0.03662215173244476, + "loss_total": 0.6518650054931641, + "step": 116299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.5523754358291626, + "learning_rate": 6.138695537052302e-05, + "loss": 0.5523, + "step": 116300 + }, + { + "epoch": 0.006798, + "loss_gen": 4.770416259765625, + "loss_rtd": 0.30799728631973267, + "loss_sent": 0.11200794577598572, + "loss_sod": 0.12188903987407684, + "loss_total": 0.5418943166732788, + "step": 116399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.167383670806885, + "loss_rtd": 0.2794514000415802, + "loss_sent": 0.10882844775915146, + "loss_sod": 0.00805368460714817, + "loss_total": 0.3963335454463959, + "step": 116399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.9968624711036682, + "learning_rate": 6.135605370076288e-05, + "loss": 0.5512, + "step": 116400 + }, + { + "epoch": 0.006998, + "loss_gen": 4.758355140686035, + "loss_rtd": 0.3198693096637726, + "loss_sent": 0.002205683384090662, + "loss_sod": 0.14805887639522552, + "loss_total": 0.47013384103775024, + "step": 116499 + }, + { + "epoch": 0.006998, + "loss_gen": 4.639089584350586, + "loss_rtd": 0.305816650390625, + "loss_sent": 3.7118101317901164e-05, + "loss_sod": 0.2026383876800537, + "loss_total": 0.5084921717643738, + "step": 116499 + }, + { + "epoch": 0.007, + "grad_norm": 1.0024669170379639, + "learning_rate": 6.132514745678567e-05, + "loss": 0.5633, + "step": 116500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.15023946762085, + "loss_rtd": 0.316466361284256, + "loss_sent": 0.1617228239774704, + "loss_sod": 0.10378433018922806, + "loss_total": 0.5819734930992126, + "step": 116599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.660497188568115, + "loss_rtd": 0.3126148283481598, + "loss_sent": 0.25780534744262695, + "loss_sod": 0.02918866090476513, + "loss_total": 0.599608838558197, + "step": 116599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.9753177762031555, + "learning_rate": 6.129423665104042e-05, + "loss": 0.5681, + "step": 116600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.929150104522705, + "loss_rtd": 0.29756641387939453, + "loss_sent": 0.16652946174144745, + "loss_sod": 0.0524655245244503, + "loss_total": 0.5165613889694214, + "step": 116699 + }, + { + "epoch": 0.007398, + "loss_gen": 4.864585876464844, + "loss_rtd": 0.3132537007331848, + "loss_sent": 0.2488655000925064, + "loss_sod": 0.00397275248542428, + "loss_total": 0.5660919547080994, + "step": 116699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.6388241052627563, + "learning_rate": 6.1263321295978e-05, + "loss": 0.5628, + "step": 116700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.148667812347412, + "loss_rtd": 0.29710373282432556, + "loss_sent": 0.16809654235839844, + "loss_sod": 0.032282207161188126, + "loss_total": 0.4974824786186218, + "step": 116799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.510430812835693, + "loss_rtd": 0.31945863366127014, + "loss_sent": 0.15705585479736328, + "loss_sod": 0.08870366215705872, + "loss_total": 0.5652181506156921, + "step": 116799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.2833685874938965, + "learning_rate": 6.123240140405111e-05, + "loss": 0.5505, + "step": 116800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.093433856964111, + "loss_rtd": 0.29983484745025635, + "loss_sent": 0.23154324293136597, + "loss_sod": 0.03990485891699791, + "loss_total": 0.5712829828262329, + "step": 116899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.802632808685303, + "loss_rtd": 0.3150176703929901, + "loss_sent": 0.2557350993156433, + "loss_sod": 0.1667909026145935, + "loss_total": 0.7375437021255493, + "step": 116899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.092612624168396, + "learning_rate": 6.120147698771426e-05, + "loss": 0.5582, + "step": 116900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.013883590698242, + "loss_rtd": 0.31911247968673706, + "loss_sent": 0.2507493495941162, + "loss_sod": 0.09466500580310822, + "loss_total": 0.6645268201828003, + "step": 116999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.192552089691162, + "loss_rtd": 0.3006800413131714, + "loss_sent": 0.29077842831611633, + "loss_sod": 0.13088540732860565, + "loss_total": 0.722343921661377, + "step": 116999 + }, + { + "epoch": 0.008, + "grad_norm": 0.9831264615058899, + "learning_rate": 6.117054805942384e-05, + "loss": 0.5622, + "step": 117000 + }, + { + "epoch": 0.008, + "eval_loss": 0.5328510403633118, + "eval_runtime": 151.0378, + "eval_samples_per_second": 102.246, + "eval_steps_per_second": 0.801, + "step": 117000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.035093307495117, + "loss_rtd": 0.28687572479248047, + "loss_sent": 8.938983228290454e-05, + "loss_sod": 0.13432514667510986, + "loss_total": 0.42129024863243103, + "step": 117099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.797860622406006, + "loss_rtd": 0.3159748613834381, + "loss_sent": 0.13346822559833527, + "loss_sod": 0.015710938721895218, + "loss_total": 0.4651540517807007, + "step": 117099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.6174519062042236, + "learning_rate": 6.113961463163797e-05, + "loss": 0.5447, + "step": 117100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.414780616760254, + "loss_rtd": 0.29347509145736694, + "loss_sent": 0.06772622466087341, + "loss_sod": 0.05473160743713379, + "loss_total": 0.41593292355537415, + "step": 117199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.32811975479126, + "loss_rtd": 0.30691400170326233, + "loss_sent": 0.015786901116371155, + "loss_sod": 0.2614777088165283, + "loss_total": 0.5841785669326782, + "step": 117199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.7983853816986084, + "learning_rate": 6.11086767168167e-05, + "loss": 0.5543, + "step": 117200 + }, + { + "epoch": 0.008598, + "loss_gen": 4.570329189300537, + "loss_rtd": 0.2815568149089813, + "loss_sent": 0.0009163393406197429, + "loss_sod": 0.12550219893455505, + "loss_total": 0.40797534584999084, + "step": 117299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.595879077911377, + "loss_rtd": 0.28276875615119934, + "loss_sent": 0.0515110082924366, + "loss_sod": 0.09898176789283752, + "loss_total": 0.43326154351234436, + "step": 117299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.7184866666793823, + "learning_rate": 6.107773432742174e-05, + "loss": 0.5741, + "step": 117300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.7618632316589355, + "loss_rtd": 0.3073684573173523, + "loss_sent": 0.055196598172187805, + "loss_sod": 0.004483198281377554, + "loss_total": 0.3670482635498047, + "step": 117399 + }, + { + "epoch": 0.008798, + "loss_gen": 4.997790336608887, + "loss_rtd": 0.30896130204200745, + "loss_sent": 0.04724084958434105, + "loss_sod": 0.09285259991884232, + "loss_total": 0.4490547478199005, + "step": 117399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.7552955150604248, + "learning_rate": 6.104678747591673e-05, + "loss": 0.5318, + "step": 117400 + }, + { + "epoch": 0.008998, + "loss_gen": 4.978947639465332, + "loss_rtd": 0.2978529930114746, + "loss_sent": 0.21421316266059875, + "loss_sod": 0.043476030230522156, + "loss_total": 0.5555422306060791, + "step": 117499 + }, + { + "epoch": 0.008998, + "loss_gen": 4.688227653503418, + "loss_rtd": 0.3126789629459381, + "loss_sent": 0.09292519092559814, + "loss_sod": 0.011855825781822205, + "loss_total": 0.41745996475219727, + "step": 117499 + }, + { + "epoch": 0.009, + "grad_norm": 1.7240303754806519, + "learning_rate": 6.101583617476705e-05, + "loss": 0.5632, + "step": 117500 + }, + { + "epoch": 0.009198, + "loss_gen": 4.799741744995117, + "loss_rtd": 0.3096998929977417, + "loss_sent": 0.0798083245754242, + "loss_sod": 0.018908588215708733, + "loss_total": 0.4084168076515198, + "step": 117599 + }, + { + "epoch": 0.009198, + "loss_gen": 4.974015712738037, + "loss_rtd": 0.28615111112594604, + "loss_sent": 0.21938830614089966, + "loss_sod": 0.09349527209997177, + "loss_total": 0.5990346670150757, + "step": 117599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.9687650203704834, + "learning_rate": 6.0984880436439884e-05, + "loss": 0.5647, + "step": 117600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.020453929901123, + "loss_rtd": 0.28277814388275146, + "loss_sent": 0.3842502236366272, + "loss_sod": 0.03025163896381855, + "loss_total": 0.6972800493240356, + "step": 117699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.045213222503662, + "loss_rtd": 0.3140573501586914, + "loss_sent": 0.1761348396539688, + "loss_sod": 0.02005627565085888, + "loss_total": 0.5102484822273254, + "step": 117699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.7610554695129395, + "learning_rate": 6.0953920273404184e-05, + "loss": 0.5464, + "step": 117700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.240246295928955, + "loss_rtd": 0.2947600483894348, + "loss_sent": 0.12171149998903275, + "loss_sod": 0.10148908197879791, + "loss_total": 0.5179606080055237, + "step": 117799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.303003311157227, + "loss_rtd": 0.3095209002494812, + "loss_sent": 0.24944280087947845, + "loss_sod": 0.09355980157852173, + "loss_total": 0.6525235176086426, + "step": 117799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.479610800743103, + "learning_rate": 6.0922955698130704e-05, + "loss": 0.5579, + "step": 117800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.901512622833252, + "loss_rtd": 0.3135903775691986, + "loss_sent": 0.08606160432100296, + "loss_sod": 0.013673401437699795, + "loss_total": 0.41332536935806274, + "step": 117899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.495378017425537, + "loss_rtd": 0.3010045289993286, + "loss_sent": 0.1780148297548294, + "loss_sod": 0.0486171655356884, + "loss_total": 0.5276365280151367, + "step": 117899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.8681696057319641, + "learning_rate": 6.089198672309198e-05, + "loss": 0.5457, + "step": 117900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.7274274826049805, + "loss_rtd": 0.31041961908340454, + "loss_sent": 4.1027440602192655e-05, + "loss_sod": 0.20404189825057983, + "loss_total": 0.5145025253295898, + "step": 117999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.6410722732543945, + "loss_rtd": 0.3025372624397278, + "loss_sent": 4.420238110469654e-05, + "loss_sod": 0.21246737241744995, + "loss_total": 0.5150488615036011, + "step": 117999 + }, + { + "epoch": 0.01, + "grad_norm": 1.265177607536316, + "learning_rate": 6.0861013360762284e-05, + "loss": 0.5567, + "step": 118000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5303297638893127, + "eval_runtime": 150.6817, + "eval_samples_per_second": 102.488, + "eval_steps_per_second": 0.803, + "step": 118000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.342700004577637, + "loss_rtd": 0.3074573278427124, + "loss_sent": 0.12914182245731354, + "loss_sod": 0.10512572526931763, + "loss_total": 0.5417248606681824, + "step": 118099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.054635047912598, + "loss_rtd": 0.3276335597038269, + "loss_sent": 0.17085400223731995, + "loss_sod": 0.02813855931162834, + "loss_total": 0.5266261100769043, + "step": 118099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.3616130352020264, + "learning_rate": 6.083003562361774e-05, + "loss": 0.5641, + "step": 118100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.146373748779297, + "loss_rtd": 0.2659860849380493, + "loss_sent": 0.05771252140402794, + "loss_sod": 0.06970370560884476, + "loss_total": 0.3934023082256317, + "step": 118199 + }, + { + "epoch": 0.010398, + "loss_gen": 4.889800548553467, + "loss_rtd": 0.3198871612548828, + "loss_sent": 0.11661746352910995, + "loss_sod": 0.03849222511053085, + "loss_total": 0.4749968647956848, + "step": 118199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.7561426162719727, + "learning_rate": 6.079905352413613e-05, + "loss": 0.5516, + "step": 118200 + }, + { + "epoch": 0.010598, + "loss_gen": 4.492007255554199, + "loss_rtd": 0.3046039044857025, + "loss_sent": 0.1819341778755188, + "loss_sod": 0.024301957339048386, + "loss_total": 0.5108400583267212, + "step": 118299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.142167568206787, + "loss_rtd": 0.3205025792121887, + "loss_sent": 0.20369991660118103, + "loss_sod": 0.07976368814706802, + "loss_total": 0.6039661765098572, + "step": 118299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.8787733912467957, + "learning_rate": 6.076806707479704e-05, + "loss": 0.5444, + "step": 118300 + }, + { + "epoch": 0.010798, + "loss_gen": 4.338025093078613, + "loss_rtd": 0.274977445602417, + "loss_sent": 0.043792724609375, + "loss_sod": 0.04635123163461685, + "loss_total": 0.36512139439582825, + "step": 118399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.427778244018555, + "loss_rtd": 0.31101086735725403, + "loss_sent": 0.04641634225845337, + "loss_sod": 0.07147668302059174, + "loss_total": 0.42890387773513794, + "step": 118399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.7714661955833435, + "learning_rate": 6.073707628808184e-05, + "loss": 0.5514, + "step": 118400 + }, + { + "epoch": 0.010998, + "loss_gen": 4.238531589508057, + "loss_rtd": 0.2738182544708252, + "loss_sent": 0.02748727984726429, + "loss_sod": 0.1467839777469635, + "loss_total": 0.44808951020240784, + "step": 118499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.748021602630615, + "loss_rtd": 0.314196914434433, + "loss_sent": 0.281240314245224, + "loss_sod": 0.014339671470224857, + "loss_total": 0.6097769141197205, + "step": 118499 + }, + { + "epoch": 0.011, + "grad_norm": 0.9258102178573608, + "learning_rate": 6.070608117647358e-05, + "loss": 0.5483, + "step": 118500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.137299060821533, + "loss_rtd": 0.28736162185668945, + "loss_sent": 3.937733708880842e-05, + "loss_sod": 0.2685033679008484, + "loss_total": 0.5559043884277344, + "step": 118599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.600038528442383, + "loss_rtd": 0.27350613474845886, + "loss_sent": 0.0018881767755374312, + "loss_sod": 0.13644471764564514, + "loss_total": 0.4118390381336212, + "step": 118599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9446884989738464, + "learning_rate": 6.067508175245711e-05, + "loss": 0.553, + "step": 118600 + }, + { + "epoch": 0.011398, + "loss_gen": 4.890167236328125, + "loss_rtd": 0.30400070548057556, + "loss_sent": 0.30468428134918213, + "loss_sod": 0.05940008908510208, + "loss_total": 0.6680850982666016, + "step": 118699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.859102249145508, + "loss_rtd": 0.29612302780151367, + "loss_sent": 0.07546553015708923, + "loss_sod": 0.07025572657585144, + "loss_total": 0.44184428453445435, + "step": 118699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.1226309537887573, + "learning_rate": 6.064407802851898e-05, + "loss": 0.5545, + "step": 118700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.093757629394531, + "loss_rtd": 0.3137657940387726, + "loss_sent": 0.14663511514663696, + "loss_sod": 0.18289406597614288, + "loss_total": 0.6432949900627136, + "step": 118799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.48219108581543, + "loss_rtd": 0.2992091774940491, + "loss_sent": 0.11522194743156433, + "loss_sod": 0.058543283492326736, + "loss_total": 0.47297441959381104, + "step": 118799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.0091612339019775, + "learning_rate": 6.0613070017147486e-05, + "loss": 0.539, + "step": 118800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.052651405334473, + "loss_rtd": 0.2983821630477905, + "loss_sent": 0.9284707903862, + "loss_sod": 0.05928806588053703, + "loss_total": 1.286141037940979, + "step": 118899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.213494777679443, + "loss_rtd": 0.3135639429092407, + "loss_sent": 0.08417072147130966, + "loss_sod": 0.0376645028591156, + "loss_total": 0.4353991746902466, + "step": 118899 + }, + { + "epoch": 0.0118, + "grad_norm": 4.361950874328613, + "learning_rate": 6.058205773083268e-05, + "loss": 0.5491, + "step": 118900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.080746650695801, + "loss_rtd": 0.2965928912162781, + "loss_sent": 0.1390780657529831, + "loss_sod": 0.03249840438365936, + "loss_total": 0.4681693911552429, + "step": 118999 + }, + { + "epoch": 0.011998, + "loss_gen": 4.327706336975098, + "loss_rtd": 0.29067936539649963, + "loss_sent": 0.0004974246839992702, + "loss_sod": 0.20749783515930176, + "loss_total": 0.4986746311187744, + "step": 118999 + }, + { + "epoch": 0.012, + "grad_norm": 0.8957967162132263, + "learning_rate": 6.055104118206627e-05, + "loss": 0.5375, + "step": 119000 + }, + { + "epoch": 0.012, + "eval_loss": 0.5409232974052429, + "eval_runtime": 151.0629, + "eval_samples_per_second": 102.229, + "eval_steps_per_second": 0.801, + "step": 119000 + }, + { + "epoch": 0.012198, + "loss_gen": 4.732698917388916, + "loss_rtd": 0.3089301884174347, + "loss_sent": 0.38602128624916077, + "loss_sod": 0.003477014135569334, + "loss_total": 0.6984285116195679, + "step": 119099 + }, + { + "epoch": 0.012198, + "loss_gen": 4.9668049812316895, + "loss_rtd": 0.2914218604564667, + "loss_sent": 0.06754955649375916, + "loss_sod": 0.010460056364536285, + "loss_total": 0.3694314658641815, + "step": 119099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.34317147731781, + "learning_rate": 6.052002038334173e-05, + "loss": 0.5629, + "step": 119100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.735567569732666, + "loss_rtd": 0.2999626100063324, + "loss_sent": 0.16539442539215088, + "loss_sod": 0.09441490471363068, + "loss_total": 0.5597719550132751, + "step": 119199 + }, + { + "epoch": 0.012398, + "loss_gen": 4.99312162399292, + "loss_rtd": 0.31069958209991455, + "loss_sent": 0.1555461436510086, + "loss_sod": 0.0029545840807259083, + "loss_total": 0.4692003130912781, + "step": 119199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.335006594657898, + "learning_rate": 6.048899534715424e-05, + "loss": 0.5551, + "step": 119200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.128385066986084, + "loss_rtd": 0.3325307369232178, + "loss_sent": 0.19470533728599548, + "loss_sod": 0.03076741099357605, + "loss_total": 0.5580034852027893, + "step": 119299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.859645366668701, + "loss_rtd": 0.3197959363460541, + "loss_sent": 0.17254067957401276, + "loss_sod": 0.03450271114706993, + "loss_total": 0.5268393158912659, + "step": 119299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.2715450525283813, + "learning_rate": 6.0457966086000695e-05, + "loss": 0.5415, + "step": 119300 + }, + { + "epoch": 0.012798, + "loss_gen": 4.846061706542969, + "loss_rtd": 0.30711206793785095, + "loss_sent": 0.3994387686252594, + "loss_sod": 0.01650303602218628, + "loss_total": 0.7230538725852966, + "step": 119399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.607169151306152, + "loss_rtd": 0.29206523299217224, + "loss_sent": 0.013358005322515965, + "loss_sod": 0.18210723996162415, + "loss_total": 0.4875304698944092, + "step": 119399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.009638786315918, + "learning_rate": 6.042693261237964e-05, + "loss": 0.5599, + "step": 119400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.192318916320801, + "loss_rtd": 0.28099825978279114, + "loss_sent": 0.015737246721982956, + "loss_sod": 0.12070950120687485, + "loss_total": 0.41744500398635864, + "step": 119499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.28455114364624, + "loss_rtd": 0.30890995264053345, + "loss_sent": 0.026161663234233856, + "loss_sod": 0.1854681819677353, + "loss_total": 0.5205398201942444, + "step": 119499 + }, + { + "epoch": 0.013, + "grad_norm": 0.7967619895935059, + "learning_rate": 6.0395894938791395e-05, + "loss": 0.5471, + "step": 119500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.179881572723389, + "loss_rtd": 0.316481351852417, + "loss_sent": 0.3046930730342865, + "loss_sod": 0.03662659600377083, + "loss_total": 0.6578010320663452, + "step": 119599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.606226444244385, + "loss_rtd": 0.30212709307670593, + "loss_sent": 0.11669275909662247, + "loss_sod": 0.09998899698257446, + "loss_total": 0.5188088417053223, + "step": 119599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.1093003749847412, + "learning_rate": 6.036485307773789e-05, + "loss": 0.5431, + "step": 119600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.087669372558594, + "loss_rtd": 0.32508882880210876, + "loss_sent": 0.12444044649600983, + "loss_sod": 0.11683076620101929, + "loss_total": 0.5663600564002991, + "step": 119699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.0431952476501465, + "loss_rtd": 0.32513174414634705, + "loss_sent": 0.13575586676597595, + "loss_sod": 0.06492830812931061, + "loss_total": 0.5258159637451172, + "step": 119699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.1140689849853516, + "learning_rate": 6.0333807041722824e-05, + "loss": 0.5475, + "step": 119700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.197576999664307, + "loss_rtd": 0.303524911403656, + "loss_sent": 0.2978334128856659, + "loss_sod": 0.07744783163070679, + "loss_total": 0.6788061857223511, + "step": 119799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.200259208679199, + "loss_rtd": 0.28390949964523315, + "loss_sent": 0.39740145206451416, + "loss_sod": 0.04918929561972618, + "loss_total": 0.7305002212524414, + "step": 119799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.7545603513717651, + "learning_rate": 6.030275684325151e-05, + "loss": 0.5512, + "step": 119800 + }, + { + "epoch": 0.013798, + "loss_gen": 4.439004898071289, + "loss_rtd": 0.32057374715805054, + "loss_sent": 0.04900732263922691, + "loss_sod": 0.0249588992446661, + "loss_total": 0.3945399820804596, + "step": 119899 + }, + { + "epoch": 0.013798, + "loss_gen": 4.865222454071045, + "loss_rtd": 0.3066619336605072, + "loss_sent": 0.053667474538087845, + "loss_sod": 0.009524598717689514, + "loss_total": 0.36985403299331665, + "step": 119899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.9675593376159668, + "learning_rate": 6.0271702494830976e-05, + "loss": 0.5408, + "step": 119900 + }, + { + "epoch": 0.013998, + "loss_gen": 4.92477560043335, + "loss_rtd": 0.30895090103149414, + "loss_sent": 0.3618309199810028, + "loss_sod": 0.05889948457479477, + "loss_total": 0.7296813130378723, + "step": 119999 + }, + { + "epoch": 0.013998, + "loss_gen": 4.568111419677734, + "loss_rtd": 0.3014170229434967, + "loss_sent": 0.012722902931272984, + "loss_sod": 0.1176237165927887, + "loss_total": 0.4317636489868164, + "step": 119999 + }, + { + "epoch": 0.014, + "grad_norm": 1.7804460525512695, + "learning_rate": 6.0240644008969904e-05, + "loss": 0.5416, + "step": 120000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5281617641448975, + "eval_runtime": 151.1779, + "eval_samples_per_second": 102.151, + "eval_steps_per_second": 0.8, + "step": 120000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.095440864562988, + "loss_rtd": 0.29845625162124634, + "loss_sent": 0.09549505263566971, + "loss_sod": 0.08383683115243912, + "loss_total": 0.47778812050819397, + "step": 120099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.03488826751709, + "loss_rtd": 0.3122705817222595, + "loss_sent": 0.437841534614563, + "loss_sod": 0.024121161550283432, + "loss_total": 0.7742332816123962, + "step": 120099 + }, + { + "epoch": 0.0142, + "grad_norm": 2.029510021209717, + "learning_rate": 6.020958139817864e-05, + "loss": 0.5411, + "step": 120100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.938586711883545, + "loss_rtd": 0.2876558303833008, + "loss_sent": 0.2451150119304657, + "loss_sod": 0.09866916388273239, + "loss_total": 0.6314400434494019, + "step": 120199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.084557056427002, + "loss_rtd": 0.3087672293186188, + "loss_sent": 0.19489362835884094, + "loss_sod": 0.1311921775341034, + "loss_total": 0.6348530054092407, + "step": 120199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.1793755292892456, + "learning_rate": 6.017851467496922e-05, + "loss": 0.5461, + "step": 120200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.093992710113525, + "loss_rtd": 0.3222237825393677, + "loss_sent": 0.205959290266037, + "loss_sod": 0.021119918674230576, + "loss_total": 0.5493029952049255, + "step": 120299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.074092388153076, + "loss_rtd": 0.32208383083343506, + "loss_sent": 0.24837274849414825, + "loss_sod": 0.012265660800039768, + "loss_total": 0.5827222466468811, + "step": 120299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.5728222131729126, + "learning_rate": 6.01474438518553e-05, + "loss": 0.5437, + "step": 120300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.079484939575195, + "loss_rtd": 0.29105761647224426, + "loss_sent": 0.293789803981781, + "loss_sod": 0.01728013902902603, + "loss_total": 0.6021275520324707, + "step": 120399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.114513874053955, + "loss_rtd": 0.31496596336364746, + "loss_sent": 0.47538989782333374, + "loss_sod": 0.10584516823291779, + "loss_total": 0.8962010145187378, + "step": 120399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.357329249382019, + "learning_rate": 6.011636894135222e-05, + "loss": 0.556, + "step": 120400 + }, + { + "epoch": 0.014998, + "loss_gen": 4.6962761878967285, + "loss_rtd": 0.26938948035240173, + "loss_sent": 0.15451398491859436, + "loss_sod": 0.03728630393743515, + "loss_total": 0.46118977665901184, + "step": 120499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.1061906814575195, + "loss_rtd": 0.2843681871891022, + "loss_sent": 0.021051516756415367, + "loss_sod": 0.10623716562986374, + "loss_total": 0.4116568863391876, + "step": 120499 + }, + { + "epoch": 0.015, + "grad_norm": 0.8589921593666077, + "learning_rate": 6.008528995597692e-05, + "loss": 0.5451, + "step": 120500 + }, + { + "epoch": 0.015198, + "loss_gen": 4.76133918762207, + "loss_rtd": 0.2874845862388611, + "loss_sent": 0.037548765540122986, + "loss_sod": 0.08966411650180817, + "loss_total": 0.41469746828079224, + "step": 120599 + }, + { + "epoch": 0.015198, + "loss_gen": 4.528561592102051, + "loss_rtd": 0.29467278718948364, + "loss_sent": 0.00033994315890595317, + "loss_sod": 0.21030665934085846, + "loss_total": 0.505319356918335, + "step": 120599 + }, + { + "epoch": 0.0152, + "grad_norm": 0.9326037764549255, + "learning_rate": 6.0054206908248054e-05, + "loss": 0.5361, + "step": 120600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.1329851150512695, + "loss_rtd": 0.3232683837413788, + "loss_sent": 0.447965145111084, + "loss_sod": 0.04933439940214157, + "loss_total": 0.8205679655075073, + "step": 120699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.720605373382568, + "loss_rtd": 0.3047685921192169, + "loss_sent": 0.055031027644872665, + "loss_sod": 0.16122189164161682, + "loss_total": 0.5210214853286743, + "step": 120699 + }, + { + "epoch": 0.0154, + "grad_norm": 2.2183480262756348, + "learning_rate": 6.002311981068583e-05, + "loss": 0.5477, + "step": 120700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.154892921447754, + "loss_rtd": 0.3027958869934082, + "loss_sent": 0.2581217586994171, + "loss_sod": 0.026079662144184113, + "loss_total": 0.5869972705841064, + "step": 120799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.97944974899292, + "loss_rtd": 0.31093499064445496, + "loss_sent": 0.3042903244495392, + "loss_sod": 0.058462005108594894, + "loss_total": 0.6736873388290405, + "step": 120799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.1163026094436646, + "learning_rate": 5.999202867581216e-05, + "loss": 0.5503, + "step": 120800 + }, + { + "epoch": 0.015798, + "loss_gen": 4.936832427978516, + "loss_rtd": 0.294286847114563, + "loss_sent": 0.12870286405086517, + "loss_sod": 0.09846531599760056, + "loss_total": 0.5214550495147705, + "step": 120899 + }, + { + "epoch": 0.015798, + "loss_gen": 4.880770206451416, + "loss_rtd": 0.31502896547317505, + "loss_sent": 0.07885976135730743, + "loss_sod": 0.10253088176250458, + "loss_total": 0.49641960859298706, + "step": 120899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.0672459602355957, + "learning_rate": 5.996093351615053e-05, + "loss": 0.5368, + "step": 120900 + }, + { + "epoch": 0.015998, + "loss_gen": 4.134515762329102, + "loss_rtd": 0.2835191488265991, + "loss_sent": 0.07676522433757782, + "loss_sod": 0.0987343117594719, + "loss_total": 0.4590187072753906, + "step": 120999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.60503625869751, + "loss_rtd": 0.31180766224861145, + "loss_sent": 0.17509113252162933, + "loss_sod": 0.143839031457901, + "loss_total": 0.630737841129303, + "step": 120999 + }, + { + "epoch": 0.016, + "grad_norm": 1.1638810634613037, + "learning_rate": 5.992983434422607e-05, + "loss": 0.5428, + "step": 121000 + }, + { + "epoch": 0.016, + "eval_loss": 0.5266171097755432, + "eval_runtime": 151.1401, + "eval_samples_per_second": 102.177, + "eval_steps_per_second": 0.801, + "step": 121000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.856085777282715, + "loss_rtd": 0.3030649721622467, + "loss_sent": 0.1892850399017334, + "loss_sod": 0.06271328032016754, + "loss_total": 0.5550633072853088, + "step": 121099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.909219264984131, + "loss_rtd": 0.3179358243942261, + "loss_sent": 0.2385430932044983, + "loss_sod": 0.02142917737364769, + "loss_total": 0.5779080986976624, + "step": 121099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.5869792699813843, + "learning_rate": 5.9898731172565515e-05, + "loss": 0.5578, + "step": 121100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.8263068199157715, + "loss_rtd": 0.2914373278617859, + "loss_sent": 0.3489281237125397, + "loss_sod": 0.025136850774288177, + "loss_total": 0.6655023097991943, + "step": 121199 + }, + { + "epoch": 0.016398, + "loss_gen": 3.9975028038024902, + "loss_rtd": 0.2875139117240906, + "loss_sent": 0.0004698280245065689, + "loss_sod": 0.1230778768658638, + "loss_total": 0.4110616147518158, + "step": 121199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.3362252712249756, + "learning_rate": 5.986762401369724e-05, + "loss": 0.5514, + "step": 121200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.069579124450684, + "loss_rtd": 0.3111385703086853, + "loss_sent": 0.21980202198028564, + "loss_sod": 0.03434155881404877, + "loss_total": 0.5652821660041809, + "step": 121299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.631121635437012, + "loss_rtd": 0.2858448028564453, + "loss_sent": 0.03684644028544426, + "loss_sod": 0.08502034097909927, + "loss_total": 0.40771156549453735, + "step": 121299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.7875375747680664, + "learning_rate": 5.9836512880151185e-05, + "loss": 0.5644, + "step": 121300 + }, + { + "epoch": 0.016798, + "loss_gen": 4.904500484466553, + "loss_rtd": 0.2946925461292267, + "loss_sent": 0.14646261930465698, + "loss_sod": 0.02115407958626747, + "loss_total": 0.46230924129486084, + "step": 121399 + }, + { + "epoch": 0.016798, + "loss_gen": 4.832746505737305, + "loss_rtd": 0.29741933941841125, + "loss_sent": 0.08594054728746414, + "loss_sod": 0.1111661046743393, + "loss_total": 0.4945259690284729, + "step": 121399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.589100182056427, + "learning_rate": 5.980539778445892e-05, + "loss": 0.5446, + "step": 121400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.133731365203857, + "loss_rtd": 0.29808109998703003, + "loss_sent": 0.23018375039100647, + "loss_sod": 0.1376340538263321, + "loss_total": 0.6658989191055298, + "step": 121499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.038772106170654, + "loss_rtd": 0.3078896105289459, + "loss_sent": 0.31025782227516174, + "loss_sod": 0.00960936862975359, + "loss_total": 0.6277568340301514, + "step": 121499 + }, + { + "epoch": 0.017, + "grad_norm": 0.9874173402786255, + "learning_rate": 5.97742787391536e-05, + "loss": 0.5534, + "step": 121500 + }, + { + "epoch": 0.017198, + "loss_gen": 4.801186561584473, + "loss_rtd": 0.2905040383338928, + "loss_sent": 0.2787076532840729, + "loss_sod": 0.0025419124867767096, + "loss_total": 0.5717536211013794, + "step": 121599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.082403182983398, + "loss_rtd": 0.2908390164375305, + "loss_sent": 0.07650449126958847, + "loss_sod": 0.10242286324501038, + "loss_total": 0.46976637840270996, + "step": 121599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.8432844281196594, + "learning_rate": 5.974315575676998e-05, + "loss": 0.5422, + "step": 121600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.10474967956543, + "loss_rtd": 0.31578487157821655, + "loss_sent": 0.3773306608200073, + "loss_sod": 0.014497784897685051, + "loss_total": 0.7076133489608765, + "step": 121699 + }, + { + "epoch": 0.017398, + "loss_gen": 4.390714645385742, + "loss_rtd": 0.2889076769351959, + "loss_sent": 0.01650119200348854, + "loss_sod": 0.2263110876083374, + "loss_total": 0.5317199230194092, + "step": 121699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.9937688708305359, + "learning_rate": 5.971202884984438e-05, + "loss": 0.545, + "step": 121700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.184569358825684, + "loss_rtd": 0.26924213767051697, + "loss_sent": 0.12040992081165314, + "loss_sod": 0.024413447827100754, + "loss_total": 0.41406548023223877, + "step": 121799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.181044578552246, + "loss_rtd": 0.29509222507476807, + "loss_sent": 0.21284370124340057, + "loss_sod": 0.023889530450105667, + "loss_total": 0.5318254828453064, + "step": 121799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.8882488012313843, + "learning_rate": 5.968089803091471e-05, + "loss": 0.5501, + "step": 121800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.14775276184082, + "loss_rtd": 0.3031477928161621, + "loss_sent": 0.32531747221946716, + "loss_sod": 0.09638293087482452, + "loss_total": 0.7248481512069702, + "step": 121899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.95175313949585, + "loss_rtd": 0.30131402611732483, + "loss_sent": 0.3279035985469818, + "loss_sod": 0.055954333394765854, + "loss_total": 0.6851719617843628, + "step": 121899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.5275734663009644, + "learning_rate": 5.964976331252049e-05, + "loss": 0.5466, + "step": 121900 + }, + { + "epoch": 0.017998, + "loss_gen": 4.066583156585693, + "loss_rtd": 0.28303706645965576, + "loss_sent": 0.027056049555540085, + "loss_sod": 0.1286648064851761, + "loss_total": 0.43875789642333984, + "step": 121999 + }, + { + "epoch": 0.017998, + "loss_gen": 4.806674480438232, + "loss_rtd": 0.31188538670539856, + "loss_sent": 0.14575275778770447, + "loss_sod": 0.059200696647167206, + "loss_total": 0.5168388485908508, + "step": 121999 + }, + { + "epoch": 0.018, + "grad_norm": 0.8213522434234619, + "learning_rate": 5.961862470720274e-05, + "loss": 0.5543, + "step": 122000 + }, + { + "epoch": 0.018, + "eval_loss": 0.526357114315033, + "eval_runtime": 151.087, + "eval_samples_per_second": 102.213, + "eval_steps_per_second": 0.801, + "step": 122000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.63696813583374, + "loss_rtd": 0.2938276529312134, + "loss_sent": 0.15499430894851685, + "loss_sod": 0.11129481345415115, + "loss_total": 0.5601167678833008, + "step": 122099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.119730472564697, + "loss_rtd": 0.3093253970146179, + "loss_sent": 0.21570608019828796, + "loss_sod": 0.04213138669729233, + "loss_total": 0.5671628713607788, + "step": 122099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.5725865364074707, + "learning_rate": 5.9587482227504135e-05, + "loss": 0.5374, + "step": 122100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.021235466003418, + "loss_rtd": 0.30636805295944214, + "loss_sent": 0.5750591158866882, + "loss_sod": 0.22981981933116913, + "loss_total": 1.1112470626831055, + "step": 122199 + }, + { + "epoch": 0.018398, + "loss_gen": 4.6234235763549805, + "loss_rtd": 0.32066893577575684, + "loss_sent": 0.2912712097167969, + "loss_sod": 0.023670032620429993, + "loss_total": 0.6356101632118225, + "step": 122199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.8399732112884521, + "learning_rate": 5.9556335885968816e-05, + "loss": 0.5571, + "step": 122200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.109967231750488, + "loss_rtd": 0.3167197108268738, + "loss_sent": 0.34567931294441223, + "loss_sod": 0.01075891312211752, + "loss_total": 0.6731579303741455, + "step": 122299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.150479316711426, + "loss_rtd": 0.30093443393707275, + "loss_sent": 0.14319652318954468, + "loss_sod": 0.10695550590753555, + "loss_total": 0.55108642578125, + "step": 122299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.9634116888046265, + "learning_rate": 5.952518569514256e-05, + "loss": 0.5425, + "step": 122300 + }, + { + "epoch": 0.018798, + "loss_gen": 4.859994411468506, + "loss_rtd": 0.3014945387840271, + "loss_sent": 0.2702159881591797, + "loss_sod": 0.06617426872253418, + "loss_total": 0.637884795665741, + "step": 122399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.301722049713135, + "loss_rtd": 0.2949070632457733, + "loss_sent": 0.39677849411964417, + "loss_sod": 0.03144046291708946, + "loss_total": 0.7231260538101196, + "step": 122399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.1997106075286865, + "learning_rate": 5.9494031667572634e-05, + "loss": 0.5414, + "step": 122400 + }, + { + "epoch": 0.018998, + "loss_gen": 4.732713222503662, + "loss_rtd": 0.30171406269073486, + "loss_sent": 0.21200843155384064, + "loss_sod": 0.10248461365699768, + "loss_total": 0.6162071228027344, + "step": 122499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.2896952629089355, + "loss_rtd": 0.298050194978714, + "loss_sent": 0.22778217494487762, + "loss_sod": 0.07858137786388397, + "loss_total": 0.6044137477874756, + "step": 122499 + }, + { + "epoch": 0.019, + "grad_norm": 0.8152273893356323, + "learning_rate": 5.946287381580789e-05, + "loss": 0.5435, + "step": 122500 + }, + { + "epoch": 0.019198, + "loss_gen": 4.822017192840576, + "loss_rtd": 0.3023791015148163, + "loss_sent": 0.40086454153060913, + "loss_sod": 0.0457364022731781, + "loss_total": 0.7489800453186035, + "step": 122599 + }, + { + "epoch": 0.019198, + "loss_gen": 4.883655071258545, + "loss_rtd": 0.2954021990299225, + "loss_sent": 0.1451638638973236, + "loss_sod": 0.06186845153570175, + "loss_total": 0.502434492111206, + "step": 122599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.943340539932251, + "learning_rate": 5.94317121523987e-05, + "loss": 0.5519, + "step": 122600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.133467197418213, + "loss_rtd": 0.3176296055316925, + "loss_sent": 0.2999730706214905, + "loss_sod": 0.03264019265770912, + "loss_total": 0.6502428650856018, + "step": 122699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.871667861938477, + "loss_rtd": 0.3009471297264099, + "loss_sent": 0.04241713508963585, + "loss_sod": 0.2269972562789917, + "loss_total": 0.5703614950180054, + "step": 122699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.9830484390258789, + "learning_rate": 5.9400546689897e-05, + "loss": 0.5354, + "step": 122700 + }, + { + "epoch": 0.019598, + "loss_gen": 4.362318992614746, + "loss_rtd": 0.27739039063453674, + "loss_sent": 0.00620456924661994, + "loss_sod": 0.16359837353229523, + "loss_total": 0.44719335436820984, + "step": 122799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.854663372039795, + "loss_rtd": 0.30164459347724915, + "loss_sent": 0.19727568328380585, + "loss_sod": 0.014775708317756653, + "loss_total": 0.513696014881134, + "step": 122799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.8522732257843018, + "learning_rate": 5.936937744085619e-05, + "loss": 0.5399, + "step": 122800 + }, + { + "epoch": 0.019798, + "loss_gen": 4.497557640075684, + "loss_rtd": 0.2768266499042511, + "loss_sent": 0.037511665374040604, + "loss_sod": 0.043739430606365204, + "loss_total": 0.3580777645111084, + "step": 122899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.397903919219971, + "loss_rtd": 0.3055480718612671, + "loss_sent": 0.13608016073703766, + "loss_sod": 0.06525881588459015, + "loss_total": 0.5068870186805725, + "step": 122899 + }, + { + "epoch": 0.0198, + "grad_norm": 0.7463865876197815, + "learning_rate": 5.933820441783129e-05, + "loss": 0.5494, + "step": 122900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.11893892288208, + "loss_rtd": 0.29417848587036133, + "loss_sent": 0.052359383553266525, + "loss_sod": 0.1419471800327301, + "loss_total": 0.48848503828048706, + "step": 122999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.288945198059082, + "loss_rtd": 0.28406262397766113, + "loss_sent": 0.4790675640106201, + "loss_sod": 0.06081649661064148, + "loss_total": 0.8239467144012451, + "step": 122999 + }, + { + "epoch": 0.02, + "grad_norm": 1.3207755088806152, + "learning_rate": 5.930702763337875e-05, + "loss": 0.567, + "step": 123000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5226380825042725, + "eval_runtime": 152.5005, + "eval_samples_per_second": 101.265, + "eval_steps_per_second": 0.793, + "step": 123000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.152369976043701, + "loss_rtd": 0.2915489077568054, + "loss_sent": 0.6095741987228394, + "loss_sod": 0.04649090766906738, + "loss_total": 0.9476140141487122, + "step": 123099 + }, + { + "epoch": 0.020198, + "loss_gen": 4.896316051483154, + "loss_rtd": 0.28835970163345337, + "loss_sent": 0.15937300026416779, + "loss_sod": 0.1062634140253067, + "loss_total": 0.5539960861206055, + "step": 123099 + }, + { + "epoch": 0.0202, + "grad_norm": 3.124485731124878, + "learning_rate": 5.92758471000566e-05, + "loss": 0.5493, + "step": 123100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.231166362762451, + "loss_rtd": 0.3091714084148407, + "loss_sent": 0.3914588987827301, + "loss_sod": 0.023496031761169434, + "loss_total": 0.7241263389587402, + "step": 123199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.008862018585205, + "loss_rtd": 0.31297391653060913, + "loss_sent": 0.3347489535808563, + "loss_sod": 0.07666729390621185, + "loss_total": 0.7243901491165161, + "step": 123199 + }, + { + "epoch": 0.0204, + "grad_norm": 2.304495096206665, + "learning_rate": 5.924466283042435e-05, + "loss": 0.5379, + "step": 123200 + }, + { + "epoch": 0.020598, + "loss_gen": 4.942907333374023, + "loss_rtd": 0.30902355909347534, + "loss_sent": 0.16258026659488678, + "loss_sod": 0.09693579375743866, + "loss_total": 0.5685396194458008, + "step": 123299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.27769136428833, + "loss_rtd": 0.32299166917800903, + "loss_sent": 0.5551480650901794, + "loss_sod": 0.1346132755279541, + "loss_total": 1.0127530097961426, + "step": 123299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.7606263160705566, + "learning_rate": 5.9213474837043014e-05, + "loss": 0.5298, + "step": 123300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.086638450622559, + "loss_rtd": 0.28601691126823425, + "loss_sent": 0.2600572407245636, + "loss_sod": 0.01576514169573784, + "loss_total": 0.5618392825126648, + "step": 123399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.126517295837402, + "loss_rtd": 0.29980188608169556, + "loss_sent": 0.3001590371131897, + "loss_sod": 0.03337424248456955, + "loss_total": 0.6333351731300354, + "step": 123399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.766998529434204, + "learning_rate": 5.918228313247511e-05, + "loss": 0.5352, + "step": 123400 + }, + { + "epoch": 0.020998, + "loss_gen": 4.065799236297607, + "loss_rtd": 0.27878060936927795, + "loss_sent": 0.07709919661283493, + "loss_sod": 0.07623769342899323, + "loss_total": 0.4321175217628479, + "step": 123499 + }, + { + "epoch": 0.020998, + "loss_gen": 4.259700298309326, + "loss_rtd": 0.29394569993019104, + "loss_sent": 0.024310484528541565, + "loss_sod": 0.10083907842636108, + "loss_total": 0.4190952777862549, + "step": 123499 + }, + { + "epoch": 0.021, + "grad_norm": 1.1099777221679688, + "learning_rate": 5.915108772928468e-05, + "loss": 0.5423, + "step": 123500 + }, + { + "epoch": 0.021198, + "loss_gen": 4.254960060119629, + "loss_rtd": 0.2864917516708374, + "loss_sent": 0.0008751353598199785, + "loss_sod": 0.1623045951128006, + "loss_total": 0.4496714770793915, + "step": 123599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.17879581451416, + "loss_rtd": 0.28422412276268005, + "loss_sent": 5.6978515203809366e-05, + "loss_sod": 0.13626736402511597, + "loss_total": 0.4205484688282013, + "step": 123599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.1184039115905762, + "learning_rate": 5.911988864003718e-05, + "loss": 0.5359, + "step": 123600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.00405216217041, + "loss_rtd": 0.3066645562648773, + "loss_sent": 0.06916403770446777, + "loss_sod": 0.027641355991363525, + "loss_total": 0.4034699499607086, + "step": 123699 + }, + { + "epoch": 0.021398, + "loss_gen": 4.681728363037109, + "loss_rtd": 0.2836722135543823, + "loss_sent": 0.029413238167762756, + "loss_sod": 0.16685713827610016, + "loss_total": 0.47994256019592285, + "step": 123699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.7159540057182312, + "learning_rate": 5.9088685877299645e-05, + "loss": 0.528, + "step": 123700 + }, + { + "epoch": 0.021598, + "loss_gen": 4.758907318115234, + "loss_rtd": 0.29878365993499756, + "loss_sent": 0.06928456574678421, + "loss_sod": 0.1059439554810524, + "loss_total": 0.474012166261673, + "step": 123799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.365872383117676, + "loss_rtd": 0.28514567017555237, + "loss_sent": 0.06936918944120407, + "loss_sod": 0.1124887466430664, + "loss_total": 0.46700361371040344, + "step": 123799 + }, + { + "epoch": 0.0216, + "grad_norm": 0.8247285485267639, + "learning_rate": 5.905747945364052e-05, + "loss": 0.5443, + "step": 123800 + }, + { + "epoch": 0.021798, + "loss_gen": 4.873709678649902, + "loss_rtd": 0.2912997007369995, + "loss_sent": 0.14019466936588287, + "loss_sod": 0.09625925868749619, + "loss_total": 0.5277536511421204, + "step": 123899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.2385478019714355, + "loss_rtd": 0.3011288642883301, + "loss_sent": 0.38254106044769287, + "loss_sod": 0.06964413821697235, + "loss_total": 0.7533140778541565, + "step": 123899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.5148694515228271, + "learning_rate": 5.902626938162975e-05, + "loss": 0.554, + "step": 123900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.200740814208984, + "loss_rtd": 0.30683231353759766, + "loss_sent": 0.1646765172481537, + "loss_sod": 0.020718924701213837, + "loss_total": 0.4922277629375458, + "step": 123999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.879190921783447, + "loss_rtd": 0.3014052212238312, + "loss_sent": 0.12611354887485504, + "loss_sod": 0.031445227563381195, + "loss_total": 0.4589639902114868, + "step": 123999 + }, + { + "epoch": 0.022, + "grad_norm": 0.8267918825149536, + "learning_rate": 5.899505567383876e-05, + "loss": 0.5372, + "step": 124000 + }, + { + "epoch": 0.022, + "eval_loss": 0.5149691104888916, + "eval_runtime": 151.0729, + "eval_samples_per_second": 102.222, + "eval_steps_per_second": 0.801, + "step": 124000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.533763885498047, + "loss_rtd": 0.2997266352176666, + "loss_sent": 0.11060944199562073, + "loss_sod": 0.020724087953567505, + "loss_total": 0.43106016516685486, + "step": 124099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.938562393188477, + "loss_rtd": 0.3038523197174072, + "loss_sent": 0.1367499977350235, + "loss_sod": 0.049065910279750824, + "loss_total": 0.48966825008392334, + "step": 124099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.5654602646827698, + "learning_rate": 5.896383834284042e-05, + "loss": 0.5342, + "step": 124100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.137826442718506, + "loss_rtd": 0.32040926814079285, + "loss_sent": 0.47739261388778687, + "loss_sod": 0.10855264961719513, + "loss_total": 0.906354546546936, + "step": 124199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.2313408851623535, + "loss_rtd": 0.27901092171669006, + "loss_sent": 0.21637006103992462, + "loss_sod": 0.1009574681520462, + "loss_total": 0.5963384509086609, + "step": 124199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.7423334121704102, + "learning_rate": 5.893261740120907e-05, + "loss": 0.5487, + "step": 124200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.9825944900512695, + "loss_rtd": 0.31523093581199646, + "loss_sent": 0.09425953775644302, + "loss_sod": 0.10607744753360748, + "loss_total": 0.51556795835495, + "step": 124299 + }, + { + "epoch": 0.022598, + "loss_gen": 3.9386215209960938, + "loss_rtd": 0.2735031843185425, + "loss_sent": 3.841445141006261e-05, + "loss_sod": 0.1777140200138092, + "loss_total": 0.4512556195259094, + "step": 124299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.0147106647491455, + "learning_rate": 5.890139286152048e-05, + "loss": 0.5542, + "step": 124300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.006237506866455, + "loss_rtd": 0.3022223711013794, + "loss_sent": 0.166145458817482, + "loss_sod": 0.010666808113455772, + "loss_total": 0.4790346622467041, + "step": 124399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.274487495422363, + "loss_rtd": 0.2841145694255829, + "loss_sent": 0.2956092655658722, + "loss_sod": 0.056456875056028366, + "loss_total": 0.6361806988716125, + "step": 124399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.7239913940429688, + "learning_rate": 5.887016473635193e-05, + "loss": 0.5355, + "step": 124400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.045144081115723, + "loss_rtd": 0.29968875646591187, + "loss_sent": 0.17614498734474182, + "loss_sod": 0.036587730050086975, + "loss_total": 0.5124214887619019, + "step": 124499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.091627597808838, + "loss_rtd": 0.28908464312553406, + "loss_sent": 0.5740426182746887, + "loss_sod": 0.08704821765422821, + "loss_total": 0.9501754641532898, + "step": 124499 + }, + { + "epoch": 0.023, + "grad_norm": 2.1002988815307617, + "learning_rate": 5.8838933038282075e-05, + "loss": 0.5456, + "step": 124500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.296675682067871, + "loss_rtd": 0.3025449216365814, + "loss_sent": 0.459489107131958, + "loss_sod": 0.10067996382713318, + "loss_total": 0.8627139925956726, + "step": 124599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.145221710205078, + "loss_rtd": 0.3288326561450958, + "loss_sent": 0.0934688001871109, + "loss_sod": 0.03551265969872475, + "loss_total": 0.45781409740448, + "step": 124599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.4806410074234009, + "learning_rate": 5.880769777989106e-05, + "loss": 0.5551, + "step": 124600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.228641033172607, + "loss_rtd": 0.29840022325515747, + "loss_sent": 0.2383067011833191, + "loss_sod": 0.04886143282055855, + "loss_total": 0.585568368434906, + "step": 124699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.166115760803223, + "loss_rtd": 0.28897228837013245, + "loss_sent": 0.2666550874710083, + "loss_sod": 0.024279436096549034, + "loss_total": 0.5799068212509155, + "step": 124699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.4311983585357666, + "learning_rate": 5.877645897376042e-05, + "loss": 0.5439, + "step": 124700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.865180492401123, + "loss_rtd": 0.34005826711654663, + "loss_sent": 0.1311091035604477, + "loss_sod": 0.002296092454344034, + "loss_total": 0.4734634757041931, + "step": 124799 + }, + { + "epoch": 0.023598, + "loss_gen": 4.964910507202148, + "loss_rtd": 0.31438836455345154, + "loss_sent": 0.070197694003582, + "loss_sod": 0.004081283695995808, + "loss_total": 0.38866734504699707, + "step": 124799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.5093366503715515, + "learning_rate": 5.874521663247316e-05, + "loss": 0.5561, + "step": 124800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.0229339599609375, + "loss_rtd": 0.2815239131450653, + "loss_sent": 0.11104641854763031, + "loss_sod": 0.031022746115922928, + "loss_total": 0.42359310388565063, + "step": 124899 + }, + { + "epoch": 0.023798, + "loss_gen": 4.6763691902160645, + "loss_rtd": 0.3147454857826233, + "loss_sent": 0.01773260533809662, + "loss_sod": 0.16944128274917603, + "loss_total": 0.5019193887710571, + "step": 124899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.7633762359619141, + "learning_rate": 5.871397076861368e-05, + "loss": 0.5414, + "step": 124900 + }, + { + "epoch": 0.023998, + "loss_gen": 4.248086452484131, + "loss_rtd": 0.2752586901187897, + "loss_sent": 3.502455001580529e-05, + "loss_sod": 0.15013575553894043, + "loss_total": 0.42542946338653564, + "step": 124999 + }, + { + "epoch": 0.023998, + "loss_gen": 4.334212303161621, + "loss_rtd": 0.2842738926410675, + "loss_sent": 0.00014213178656063974, + "loss_sod": 0.16111718118190765, + "loss_total": 0.4455331861972809, + "step": 124999 + }, + { + "epoch": 0.024, + "grad_norm": 0.9359869360923767, + "learning_rate": 5.868272139476783e-05, + "loss": 0.5328, + "step": 125000 + }, + { + "epoch": 0.024, + "eval_loss": 0.5189536809921265, + "eval_runtime": 151.055, + "eval_samples_per_second": 102.234, + "eval_steps_per_second": 0.801, + "step": 125000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.009149074554443, + "loss_rtd": 0.2947096824645996, + "loss_sent": 0.14182795584201813, + "loss_sod": 0.022032614797353745, + "loss_total": 0.4585702419281006, + "step": 125099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.443232536315918, + "loss_rtd": 0.2923023998737335, + "loss_sent": 0.19547949731349945, + "loss_sod": 0.09092387557029724, + "loss_total": 0.5787057876586914, + "step": 125099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.1454132795333862, + "learning_rate": 5.8651468523522826e-05, + "loss": 0.5491, + "step": 125100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.078240871429443, + "loss_rtd": 0.30490824580192566, + "loss_sent": 0.20921728014945984, + "loss_sod": 0.09317630529403687, + "loss_total": 0.6073018312454224, + "step": 125199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.094516754150391, + "loss_rtd": 0.29713723063468933, + "loss_sent": 0.2630443572998047, + "loss_sod": 0.1428437978029251, + "loss_total": 0.7030254006385803, + "step": 125199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.592337965965271, + "learning_rate": 5.862021216746735e-05, + "loss": 0.5311, + "step": 125200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.01593017578125, + "loss_rtd": 0.2979161739349365, + "loss_sent": 0.23441477119922638, + "loss_sod": 0.06355893611907959, + "loss_total": 0.5958898663520813, + "step": 125299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.215662002563477, + "loss_rtd": 0.30738565325737, + "loss_sent": 0.12065132707357407, + "loss_sod": 0.034862369298934937, + "loss_total": 0.4628993570804596, + "step": 125299 + }, + { + "epoch": 0.0246, + "grad_norm": 0.7183341383934021, + "learning_rate": 5.858895233919143e-05, + "loss": 0.5316, + "step": 125300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.217257022857666, + "loss_rtd": 0.29606208205223083, + "loss_sent": 0.32616057991981506, + "loss_sod": 0.029719607904553413, + "loss_total": 0.651942253112793, + "step": 125399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.335766315460205, + "loss_rtd": 0.2977655529975891, + "loss_sent": 0.3512638807296753, + "loss_sod": 0.10390587151050568, + "loss_total": 0.7529352903366089, + "step": 125399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.5178121328353882, + "learning_rate": 5.855768905128654e-05, + "loss": 0.5522, + "step": 125400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.150040149688721, + "loss_rtd": 0.2950993776321411, + "loss_sent": 0.28894516825675964, + "loss_sod": 0.04238574951887131, + "loss_total": 0.6264302730560303, + "step": 125499 + }, + { + "epoch": 0.024998, + "loss_gen": 4.919865131378174, + "loss_rtd": 0.29594555497169495, + "loss_sent": 0.09065475314855576, + "loss_sod": 0.17580023407936096, + "loss_total": 0.5624005198478699, + "step": 125499 + }, + { + "epoch": 0.025, + "grad_norm": 1.1456927061080933, + "learning_rate": 5.852642231634553e-05, + "loss": 0.5289, + "step": 125500 + }, + { + "epoch": 0.025198, + "loss_gen": 4.215357303619385, + "loss_rtd": 0.273853600025177, + "loss_sent": 3.686283525894396e-05, + "loss_sod": 0.11730808764696121, + "loss_total": 0.3911985456943512, + "step": 125599 + }, + { + "epoch": 0.025198, + "loss_gen": 4.287888526916504, + "loss_rtd": 0.29126518964767456, + "loss_sent": 0.0007547377608716488, + "loss_sod": 0.1250227689743042, + "loss_total": 0.41704270243644714, + "step": 125599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.0336772203445435, + "learning_rate": 5.849515214696262e-05, + "loss": 0.5185, + "step": 125600 + }, + { + "epoch": 0.025398, + "loss_gen": 4.999906539916992, + "loss_rtd": 0.30497992038726807, + "loss_sent": 0.16713660955429077, + "loss_sod": 0.02549305558204651, + "loss_total": 0.49760958552360535, + "step": 125699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.279110908508301, + "loss_rtd": 0.29239341616630554, + "loss_sent": 0.18312755227088928, + "loss_sod": 0.09581852704286575, + "loss_total": 0.57133948802948, + "step": 125699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.8091269135475159, + "learning_rate": 5.846387855573345e-05, + "loss": 0.5448, + "step": 125700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.951651096343994, + "loss_rtd": 0.32205092906951904, + "loss_sent": 0.4923538863658905, + "loss_sod": 0.13187460601329803, + "loss_total": 0.9462794065475464, + "step": 125799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.742814064025879, + "loss_rtd": 0.2959570288658142, + "loss_sent": 0.11529549956321716, + "loss_sod": 0.06500224024057388, + "loss_total": 0.47625476121902466, + "step": 125799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.7001880407333374, + "learning_rate": 5.8432601555254996e-05, + "loss": 0.5498, + "step": 125800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.141140937805176, + "loss_rtd": 0.3066727817058563, + "loss_sent": 0.34323009848594666, + "loss_sod": 0.07900206744670868, + "loss_total": 0.7289049625396729, + "step": 125899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.802655220031738, + "loss_rtd": 0.300791472196579, + "loss_sent": 0.23526684939861298, + "loss_sod": 0.020103124901652336, + "loss_total": 0.5561614632606506, + "step": 125899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.6712987422943115, + "learning_rate": 5.8401321158125666e-05, + "loss": 0.5428, + "step": 125900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.243439674377441, + "loss_rtd": 0.287342369556427, + "loss_sent": 0.41289782524108887, + "loss_sod": 0.03788952901959419, + "loss_total": 0.738129734992981, + "step": 125999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.358307838439941, + "loss_rtd": 0.2985134720802307, + "loss_sent": 0.12002741545438766, + "loss_sod": 0.0510287806391716, + "loss_total": 0.46956968307495117, + "step": 125999 + }, + { + "epoch": 0.026, + "grad_norm": 2.2706379890441895, + "learning_rate": 5.837003737694515e-05, + "loss": 0.5549, + "step": 126000 + }, + { + "epoch": 0.026, + "eval_loss": 0.5160689949989319, + "eval_runtime": 151.3702, + "eval_samples_per_second": 102.021, + "eval_steps_per_second": 0.799, + "step": 126000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.723132610321045, + "loss_rtd": 0.2982438802719116, + "loss_sent": 0.007835360243916512, + "loss_sod": 0.21524950861930847, + "loss_total": 0.5213288068771362, + "step": 126099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.581118106842041, + "loss_rtd": 0.27140286564826965, + "loss_sent": 0.03246323764324188, + "loss_sod": 0.11235076189041138, + "loss_total": 0.4162168502807617, + "step": 126099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.3443397283554077, + "learning_rate": 5.833875022431461e-05, + "loss": 0.5302, + "step": 126100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.162801742553711, + "loss_rtd": 0.2833329737186432, + "loss_sent": 0.2646946310997009, + "loss_sod": 0.06836559623479843, + "loss_total": 0.6163932085037231, + "step": 126199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.058148384094238, + "loss_rtd": 0.29877620935440063, + "loss_sent": 0.3333621323108673, + "loss_sod": 0.06412633508443832, + "loss_total": 0.6962646842002869, + "step": 126199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.4117292165756226, + "learning_rate": 5.830745971283645e-05, + "loss": 0.5333, + "step": 126200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.934667110443115, + "loss_rtd": 0.2820201814174652, + "loss_sent": 0.1041148379445076, + "loss_sod": 0.0714135393500328, + "loss_total": 0.4575485587120056, + "step": 126299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.061122417449951, + "loss_rtd": 0.3041793704032898, + "loss_sent": 0.14399518072605133, + "loss_sod": 0.09221570193767548, + "loss_total": 0.5403902530670166, + "step": 126299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.7418568730354309, + "learning_rate": 5.827616585511453e-05, + "loss": 0.5448, + "step": 126300 + }, + { + "epoch": 0.026798, + "loss_gen": 4.881433486938477, + "loss_rtd": 0.312472403049469, + "loss_sent": 0.19444599747657776, + "loss_sod": 0.06203896924853325, + "loss_total": 0.5689573287963867, + "step": 126399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.254079341888428, + "loss_rtd": 0.304902046918869, + "loss_sent": 0.2852088510990143, + "loss_sod": 0.04830005019903183, + "loss_total": 0.6384109258651733, + "step": 126399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.1201962232589722, + "learning_rate": 5.8244868663753985e-05, + "loss": 0.5564, + "step": 126400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.201193809509277, + "loss_rtd": 0.26028522849082947, + "loss_sent": 0.14957848191261292, + "loss_sod": 0.084686279296875, + "loss_total": 0.4945499897003174, + "step": 126499 + }, + { + "epoch": 0.026998, + "loss_gen": 4.998375415802002, + "loss_rtd": 0.31030091643333435, + "loss_sent": 0.20636168122291565, + "loss_sod": 0.017277412116527557, + "loss_total": 0.5339400172233582, + "step": 126499 + }, + { + "epoch": 0.027, + "grad_norm": 1.3054890632629395, + "learning_rate": 5.821356815136133e-05, + "loss": 0.5452, + "step": 126500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.041162490844727, + "loss_rtd": 0.3027835488319397, + "loss_sent": 0.045284755527973175, + "loss_sod": 0.04813477769494057, + "loss_total": 0.39620307087898254, + "step": 126599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.0087995529174805, + "loss_rtd": 0.3090861141681671, + "loss_sent": 0.2531428933143616, + "loss_sod": 0.013048935681581497, + "loss_total": 0.5752779245376587, + "step": 126599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.8977271914482117, + "learning_rate": 5.818226433054441e-05, + "loss": 0.5542, + "step": 126600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.059216022491455, + "loss_rtd": 0.3061828911304474, + "loss_sent": 0.07738476246595383, + "loss_sod": 0.12489735335111618, + "loss_total": 0.5084649920463562, + "step": 126699 + }, + { + "epoch": 0.027398, + "loss_gen": 4.808566093444824, + "loss_rtd": 0.3014642298221588, + "loss_sent": 0.3923496901988983, + "loss_sod": 0.02318255417048931, + "loss_total": 0.716996431350708, + "step": 126699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.1886048316955566, + "learning_rate": 5.8150957213912406e-05, + "loss": 0.5504, + "step": 126700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.194309234619141, + "loss_rtd": 0.29297277331352234, + "loss_sent": 0.19681888818740845, + "loss_sod": 0.04647182673215866, + "loss_total": 0.5362634658813477, + "step": 126799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.107058048248291, + "loss_rtd": 0.3071816861629486, + "loss_sent": 0.1383209228515625, + "loss_sod": 0.012243765406310558, + "loss_total": 0.45774638652801514, + "step": 126799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.1656361818313599, + "learning_rate": 5.811964681407579e-05, + "loss": 0.5231, + "step": 126800 + }, + { + "epoch": 0.027798, + "loss_gen": 4.980090141296387, + "loss_rtd": 0.3053024113178253, + "loss_sent": 0.10915306210517883, + "loss_sod": 0.015619794838130474, + "loss_total": 0.4300752580165863, + "step": 126899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.233313083648682, + "loss_rtd": 0.28620627522468567, + "loss_sent": 0.08538281917572021, + "loss_sod": 0.054982736706733704, + "loss_total": 0.4265718460083008, + "step": 126899 + }, + { + "epoch": 0.0278, + "grad_norm": 0.9681254029273987, + "learning_rate": 5.808833314364642e-05, + "loss": 0.5424, + "step": 126900 + }, + { + "epoch": 0.027998, + "loss_gen": 4.407595634460449, + "loss_rtd": 0.28838080167770386, + "loss_sent": 0.0010148589499294758, + "loss_sod": 0.16616910696029663, + "loss_total": 0.4555647671222687, + "step": 126999 + }, + { + "epoch": 0.027998, + "loss_gen": 4.3773322105407715, + "loss_rtd": 0.2868803143501282, + "loss_sent": 0.03552349656820297, + "loss_sod": 0.23757776618003845, + "loss_total": 0.5599815249443054, + "step": 126999 + }, + { + "epoch": 0.028, + "grad_norm": 1.0379002094268799, + "learning_rate": 5.8057016215237415e-05, + "loss": 0.5381, + "step": 127000 + }, + { + "epoch": 0.028, + "eval_loss": 0.5150266289710999, + "eval_runtime": 151.2575, + "eval_samples_per_second": 102.097, + "eval_steps_per_second": 0.8, + "step": 127000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.079526901245117, + "loss_rtd": 0.30835166573524475, + "loss_sent": 0.09046214073896408, + "loss_sod": 0.1558559536933899, + "loss_total": 0.5546697378158569, + "step": 127099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.022377014160156, + "loss_rtd": 0.3085956871509552, + "loss_sent": 0.2875477075576782, + "loss_sod": 0.040983445942401886, + "loss_total": 0.6371268033981323, + "step": 127099 + }, + { + "epoch": 0.0282, + "grad_norm": 0.8533868193626404, + "learning_rate": 5.8025696041463264e-05, + "loss": 0.5328, + "step": 127100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.05034875869751, + "loss_rtd": 0.29051896929740906, + "loss_sent": 0.2004084438085556, + "loss_sod": 0.1325872391462326, + "loss_total": 0.6235146522521973, + "step": 127199 + }, + { + "epoch": 0.028398, + "loss_gen": 4.871407508850098, + "loss_rtd": 0.29680755734443665, + "loss_sent": 0.23683467507362366, + "loss_sod": 0.05222240835428238, + "loss_total": 0.5858646631240845, + "step": 127199 + }, + { + "epoch": 0.0284, + "grad_norm": 0.9194096922874451, + "learning_rate": 5.799437263493968e-05, + "loss": 0.5305, + "step": 127200 + }, + { + "epoch": 0.028598, + "loss_gen": 4.758853912353516, + "loss_rtd": 0.2984887659549713, + "loss_sent": 0.19850069284439087, + "loss_sod": 0.02239525318145752, + "loss_total": 0.5193847417831421, + "step": 127299 + }, + { + "epoch": 0.028598, + "loss_gen": 4.938508033752441, + "loss_rtd": 0.2909735143184662, + "loss_sent": 0.18434683978557587, + "loss_sod": 0.05350429564714432, + "loss_total": 0.5288246273994446, + "step": 127299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.3278090953826904, + "learning_rate": 5.796304600828377e-05, + "loss": 0.5242, + "step": 127300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.2907819747924805, + "loss_rtd": 0.3024289906024933, + "loss_sent": 0.1467730700969696, + "loss_sod": 0.11514891684055328, + "loss_total": 0.564350962638855, + "step": 127399 + }, + { + "epoch": 0.028798, + "loss_gen": 4.858901500701904, + "loss_rtd": 0.2916867434978485, + "loss_sent": 0.11312361061573029, + "loss_sod": 0.053617626428604126, + "loss_total": 0.45842796564102173, + "step": 127399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.8836105465888977, + "learning_rate": 5.7931716174113874e-05, + "loss": 0.5525, + "step": 127400 + }, + { + "epoch": 0.028998, + "loss_gen": 4.528642654418945, + "loss_rtd": 0.2835925817489624, + "loss_sent": 0.04004380851984024, + "loss_sod": 0.07841572165489197, + "loss_total": 0.402052104473114, + "step": 127499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.090068340301514, + "loss_rtd": 0.28824785351753235, + "loss_sent": 0.14014855027198792, + "loss_sod": 0.051337309181690216, + "loss_total": 0.4797337055206299, + "step": 127499 + }, + { + "epoch": 0.029, + "grad_norm": 1.31979238986969, + "learning_rate": 5.790038314504966e-05, + "loss": 0.5613, + "step": 127500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.043606758117676, + "loss_rtd": 0.3019232749938965, + "loss_sent": 0.21159958839416504, + "loss_sod": 0.07312679290771484, + "loss_total": 0.5866496562957764, + "step": 127599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.199647426605225, + "loss_rtd": 0.29339706897735596, + "loss_sent": 0.09941595047712326, + "loss_sod": 0.03714088350534439, + "loss_total": 0.4299539029598236, + "step": 127599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.5811394453048706, + "learning_rate": 5.786904693371205e-05, + "loss": 0.5506, + "step": 127600 + }, + { + "epoch": 0.029398, + "loss_gen": 4.513834476470947, + "loss_rtd": 0.282697856426239, + "loss_sent": 0.05248807370662689, + "loss_sod": 0.08066190779209137, + "loss_total": 0.4158478379249573, + "step": 127699 + }, + { + "epoch": 0.029398, + "loss_gen": 4.730311870574951, + "loss_rtd": 0.2756289541721344, + "loss_sent": 0.013358295895159245, + "loss_sod": 0.14654584228992462, + "loss_total": 0.4355331063270569, + "step": 127699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.7642179727554321, + "learning_rate": 5.783770755272329e-05, + "loss": 0.5432, + "step": 127700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.3390398025512695, + "loss_rtd": 0.3092106878757477, + "loss_sent": 0.07087057828903198, + "loss_sod": 0.10748178511857986, + "loss_total": 0.48756304383277893, + "step": 127799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.166284084320068, + "loss_rtd": 0.2996288537979126, + "loss_sent": 0.02704087272286415, + "loss_sod": 0.12750141322612762, + "loss_total": 0.45417115092277527, + "step": 127799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.0137939453125, + "learning_rate": 5.780636501470685e-05, + "loss": 0.5488, + "step": 127800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.168057441711426, + "loss_rtd": 0.28732624650001526, + "loss_sent": 0.6037995219230652, + "loss_sod": 0.05241278186440468, + "loss_total": 0.9435385465621948, + "step": 127899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.055641174316406, + "loss_rtd": 0.290254145860672, + "loss_sent": 0.16115570068359375, + "loss_sod": 0.10485047101974487, + "loss_total": 0.556260347366333, + "step": 127899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.5859217643737793, + "learning_rate": 5.777501933228753e-05, + "loss": 0.5283, + "step": 127900 + }, + { + "epoch": 0.029998, + "loss_gen": 4.708909034729004, + "loss_rtd": 0.29839926958084106, + "loss_sent": 0.2220747321844101, + "loss_sod": 0.061270326375961304, + "loss_total": 0.5817443132400513, + "step": 127999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.2027764320373535, + "loss_rtd": 0.3080592751502991, + "loss_sent": 0.25785529613494873, + "loss_sod": 0.03954096511006355, + "loss_total": 0.6054555177688599, + "step": 127999 + }, + { + "epoch": 0.03, + "grad_norm": 1.232383131980896, + "learning_rate": 5.774367051809134e-05, + "loss": 0.5501, + "step": 128000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5161465406417847, + "eval_runtime": 150.9333, + "eval_samples_per_second": 102.317, + "eval_steps_per_second": 0.802, + "step": 128000 + }, + { + "epoch": 0.030198, + "loss_gen": 4.795260429382324, + "loss_rtd": 0.29082995653152466, + "loss_sent": 0.062463853508234024, + "loss_sod": 0.08286631852388382, + "loss_total": 0.4361601173877716, + "step": 128099 + }, + { + "epoch": 0.030198, + "loss_gen": 4.604288578033447, + "loss_rtd": 0.2832608222961426, + "loss_sent": 0.08235875517129898, + "loss_sod": 0.06919785588979721, + "loss_total": 0.43481743335723877, + "step": 128099 + }, + { + "epoch": 0.0302, + "grad_norm": 0.6762851476669312, + "learning_rate": 5.771231858474559e-05, + "loss": 0.531, + "step": 128100 + }, + { + "epoch": 0.030398, + "loss_gen": 4.632268905639648, + "loss_rtd": 0.31469446420669556, + "loss_sent": 0.160960391163826, + "loss_sod": 0.023928089067339897, + "loss_total": 0.4995829463005066, + "step": 128199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.145657062530518, + "loss_rtd": 0.30525362491607666, + "loss_sent": 0.07963842898607254, + "loss_sod": 0.12434092164039612, + "loss_total": 0.5092329978942871, + "step": 128199 + }, + { + "epoch": 0.0304, + "grad_norm": 0.7570841908454895, + "learning_rate": 5.768096354487885e-05, + "loss": 0.5363, + "step": 128200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.123581886291504, + "loss_rtd": 0.3003334403038025, + "loss_sent": 0.26045313477516174, + "loss_sod": 0.056969910860061646, + "loss_total": 0.6177564859390259, + "step": 128299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.559329986572266, + "loss_rtd": 0.2909456789493561, + "loss_sent": 0.38860034942626953, + "loss_sod": 0.039868880063295364, + "loss_total": 0.7194149494171143, + "step": 128299 + }, + { + "epoch": 0.0306, + "grad_norm": 1.918216347694397, + "learning_rate": 5.76496054111209e-05, + "loss": 0.535, + "step": 128300 + }, + { + "epoch": 0.030798, + "loss_gen": 4.942437648773193, + "loss_rtd": 0.3000963628292084, + "loss_sent": 0.13890379667282104, + "loss_sod": 0.01041445042937994, + "loss_total": 0.44941461086273193, + "step": 128399 + }, + { + "epoch": 0.030798, + "loss_gen": 4.34162712097168, + "loss_rtd": 0.2815767526626587, + "loss_sent": 0.007178016472607851, + "loss_sod": 0.10765630006790161, + "loss_total": 0.39641106128692627, + "step": 128399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.6317238807678223, + "learning_rate": 5.761824419610282e-05, + "loss": 0.5502, + "step": 128400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.209155082702637, + "loss_rtd": 0.29979637265205383, + "loss_sent": 0.06951673328876495, + "loss_sod": 0.15041989088058472, + "loss_total": 0.5197330117225647, + "step": 128499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.554530143737793, + "loss_rtd": 0.307586669921875, + "loss_sent": 0.20848438143730164, + "loss_sod": 0.0821925476193428, + "loss_total": 0.5982636213302612, + "step": 128499 + }, + { + "epoch": 0.031, + "grad_norm": 1.3530056476593018, + "learning_rate": 5.758687991245687e-05, + "loss": 0.5532, + "step": 128500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.005382537841797, + "loss_rtd": 0.2981312870979309, + "loss_sent": 0.37801048159599304, + "loss_sod": 0.07218707352876663, + "loss_total": 0.7483288049697876, + "step": 128599 + }, + { + "epoch": 0.031198, + "loss_gen": 4.880046367645264, + "loss_rtd": 0.30107080936431885, + "loss_sent": 0.4745630919933319, + "loss_sod": 0.038496892899274826, + "loss_total": 0.8141307830810547, + "step": 128599 + }, + { + "epoch": 0.0312, + "grad_norm": 4.0272417068481445, + "learning_rate": 5.7555512572816616e-05, + "loss": 0.5392, + "step": 128600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.588792324066162, + "loss_rtd": 0.290439248085022, + "loss_sent": 0.4006589353084564, + "loss_sod": 0.01840728148818016, + "loss_total": 0.7095054388046265, + "step": 128699 + }, + { + "epoch": 0.031398, + "loss_gen": 4.9264607429504395, + "loss_rtd": 0.2721864879131317, + "loss_sent": 0.12008009105920792, + "loss_sod": 0.046869415789842606, + "loss_total": 0.43913599848747253, + "step": 128699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.8392835855484009, + "learning_rate": 5.7524142189816785e-05, + "loss": 0.5397, + "step": 128700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.434980392456055, + "loss_rtd": 0.2939222753047943, + "loss_sent": 0.24817781150341034, + "loss_sod": 0.04262280464172363, + "loss_total": 0.5847228765487671, + "step": 128799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.024068355560303, + "loss_rtd": 0.28636598587036133, + "loss_sent": 0.14124320447444916, + "loss_sod": 0.05179080367088318, + "loss_total": 0.47939997911453247, + "step": 128799 + }, + { + "epoch": 0.0316, + "grad_norm": 0.8851576447486877, + "learning_rate": 5.7492768776093394e-05, + "loss": 0.5415, + "step": 128800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.2839884757995605, + "loss_rtd": 0.29162558913230896, + "loss_sent": 0.08399336785078049, + "loss_sod": 0.09393030405044556, + "loss_total": 0.4695492684841156, + "step": 128899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.347387790679932, + "loss_rtd": 0.3011826276779175, + "loss_sent": 0.19613268971443176, + "loss_sod": 0.04818310588598251, + "loss_total": 0.5454984307289124, + "step": 128899 + }, + { + "epoch": 0.0318, + "grad_norm": 0.8077017068862915, + "learning_rate": 5.7461392344283626e-05, + "loss": 0.5444, + "step": 128900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.278444766998291, + "loss_rtd": 0.2844964563846588, + "loss_sent": 0.03407386317849159, + "loss_sod": 0.01254432462155819, + "loss_total": 0.33111464977264404, + "step": 128999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.117967128753662, + "loss_rtd": 0.301628053188324, + "loss_sent": 0.10132772475481033, + "loss_sod": 0.052032314240932465, + "loss_total": 0.4549880921840668, + "step": 128999 + }, + { + "epoch": 0.032, + "grad_norm": 0.5169385075569153, + "learning_rate": 5.743001290702592e-05, + "loss": 0.5357, + "step": 129000 + }, + { + "epoch": 0.032, + "eval_loss": 0.5248415470123291, + "eval_runtime": 151.1474, + "eval_samples_per_second": 102.172, + "eval_steps_per_second": 0.801, + "step": 129000 + }, + { + "epoch": 0.032198, + "loss_gen": 4.844819068908691, + "loss_rtd": 0.3168841302394867, + "loss_sent": 0.23479963839054108, + "loss_sod": 0.00928429700434208, + "loss_total": 0.5609680414199829, + "step": 129099 + }, + { + "epoch": 0.032198, + "loss_gen": 5.09084939956665, + "loss_rtd": 0.2808663547039032, + "loss_sent": 0.14822618663311005, + "loss_sod": 0.05094731226563454, + "loss_total": 0.4800398349761963, + "step": 129099 + }, + { + "epoch": 0.0322, + "grad_norm": 2.0065536499023438, + "learning_rate": 5.7398630476959894e-05, + "loss": 0.5392, + "step": 129100 + }, + { + "epoch": 0.032398, + "loss_gen": 4.217450141906738, + "loss_rtd": 0.2663937211036682, + "loss_sent": 0.0009667632402852178, + "loss_sod": 0.18625682592391968, + "loss_total": 0.45361730456352234, + "step": 129199 + }, + { + "epoch": 0.032398, + "loss_gen": 5.381683826446533, + "loss_rtd": 0.2975890636444092, + "loss_sent": 0.31217342615127563, + "loss_sod": 0.03514635190367699, + "loss_total": 0.6449088454246521, + "step": 129199 + }, + { + "epoch": 0.0324, + "grad_norm": 1.4418388605117798, + "learning_rate": 5.7367245066726415e-05, + "loss": 0.5377, + "step": 129200 + }, + { + "epoch": 0.032598, + "loss_gen": 4.9181294441223145, + "loss_rtd": 0.30215147137641907, + "loss_sent": 0.332130491733551, + "loss_sod": 0.03966284915804863, + "loss_total": 0.6739448308944702, + "step": 129299 + }, + { + "epoch": 0.032598, + "loss_gen": 5.0707550048828125, + "loss_rtd": 0.27600985765457153, + "loss_sent": 0.07822196930646896, + "loss_sod": 0.04753671586513519, + "loss_total": 0.4017685651779175, + "step": 129299 + }, + { + "epoch": 0.0326, + "grad_norm": 0.8299680352210999, + "learning_rate": 5.733585668896748e-05, + "loss": 0.5382, + "step": 129300 + }, + { + "epoch": 0.032798, + "loss_gen": 5.268406867980957, + "loss_rtd": 0.2856754660606384, + "loss_sent": 0.419086217880249, + "loss_sod": 0.021928519010543823, + "loss_total": 0.7266901731491089, + "step": 129399 + }, + { + "epoch": 0.032798, + "loss_gen": 5.475241661071777, + "loss_rtd": 0.3006037473678589, + "loss_sent": 0.09587062150239944, + "loss_sod": 0.14535808563232422, + "loss_total": 0.541832447052002, + "step": 129399 + }, + { + "epoch": 0.0328, + "grad_norm": 1.5298182964324951, + "learning_rate": 5.730446535632636e-05, + "loss": 0.5461, + "step": 129400 + }, + { + "epoch": 0.032998, + "loss_gen": 5.033608436584473, + "loss_rtd": 0.2969168722629547, + "loss_sent": 0.055923569947481155, + "loss_sod": 0.0731518417596817, + "loss_total": 0.42599231004714966, + "step": 129499 + }, + { + "epoch": 0.032998, + "loss_gen": 5.0876898765563965, + "loss_rtd": 0.30063095688819885, + "loss_sent": 0.2950197756290436, + "loss_sod": 0.015468468889594078, + "loss_total": 0.6111192107200623, + "step": 129499 + }, + { + "epoch": 0.033, + "grad_norm": 1.4737677574157715, + "learning_rate": 5.727307108144748e-05, + "loss": 0.5369, + "step": 129500 + }, + { + "epoch": 0.033198, + "loss_gen": 5.193936824798584, + "loss_rtd": 0.2994391620159149, + "loss_sent": 0.19632945954799652, + "loss_sod": 0.05846899002790451, + "loss_total": 0.5542376041412354, + "step": 129599 + }, + { + "epoch": 0.033198, + "loss_gen": 4.978903293609619, + "loss_rtd": 0.29046401381492615, + "loss_sent": 0.3001216948032379, + "loss_sod": 0.02204369381070137, + "loss_total": 0.6126294136047363, + "step": 129599 + }, + { + "epoch": 0.0332, + "grad_norm": 1.5114130973815918, + "learning_rate": 5.724167387697643e-05, + "loss": 0.5383, + "step": 129600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.067448616027832, + "loss_rtd": 0.29043594002723694, + "loss_sent": 0.064723439514637, + "loss_sod": 0.23341111838817596, + "loss_total": 0.5885704755783081, + "step": 129699 + }, + { + "epoch": 0.033398, + "loss_gen": 4.303596496582031, + "loss_rtd": 0.2770473062992096, + "loss_sent": 0.02179446630179882, + "loss_sod": 0.07781679183244705, + "loss_total": 0.37665855884552, + "step": 129699 + }, + { + "epoch": 0.0334, + "grad_norm": 0.9134754538536072, + "learning_rate": 5.7210273755560006e-05, + "loss": 0.5321, + "step": 129700 + }, + { + "epoch": 0.033598, + "loss_gen": 5.0098772048950195, + "loss_rtd": 0.28739839792251587, + "loss_sent": 0.03049510531127453, + "loss_sod": 0.27623870968818665, + "loss_total": 0.5941322445869446, + "step": 129799 + }, + { + "epoch": 0.033598, + "loss_gen": 4.247901439666748, + "loss_rtd": 0.26333802938461304, + "loss_sent": 4.661710045184009e-05, + "loss_sod": 0.24826735258102417, + "loss_total": 0.5116519927978516, + "step": 129799 + }, + { + "epoch": 0.0336, + "grad_norm": 1.8941473960876465, + "learning_rate": 5.717887072984617e-05, + "loss": 0.5345, + "step": 129800 + }, + { + "epoch": 0.033798, + "loss_gen": 4.365687847137451, + "loss_rtd": 0.27838221192359924, + "loss_sent": 0.027936099097132683, + "loss_sod": 0.2134028673171997, + "loss_total": 0.5197211503982544, + "step": 129899 + }, + { + "epoch": 0.033798, + "loss_gen": 5.483396530151367, + "loss_rtd": 0.2784916162490845, + "loss_sent": 0.1958129107952118, + "loss_sod": 0.06765724718570709, + "loss_total": 0.5419617891311646, + "step": 129899 + }, + { + "epoch": 0.0338, + "grad_norm": 1.7065073251724243, + "learning_rate": 5.7147464812484075e-05, + "loss": 0.5356, + "step": 129900 + }, + { + "epoch": 0.033998, + "loss_gen": 5.141090393066406, + "loss_rtd": 0.2807604968547821, + "loss_sent": 0.13576620817184448, + "loss_sod": 0.06998756527900696, + "loss_total": 0.48651427030563354, + "step": 129999 + }, + { + "epoch": 0.033998, + "loss_gen": 5.268886089324951, + "loss_rtd": 0.2966271936893463, + "loss_sent": 0.145225390791893, + "loss_sod": 0.10153379291296005, + "loss_total": 0.5433863997459412, + "step": 129999 + }, + { + "epoch": 0.034, + "grad_norm": 1.1913107633590698, + "learning_rate": 5.7116056016124e-05, + "loss": 0.5408, + "step": 130000 + }, + { + "epoch": 0.034, + "eval_loss": 0.5125383138656616, + "eval_runtime": 152.8002, + "eval_samples_per_second": 101.067, + "eval_steps_per_second": 0.792, + "step": 130000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.968357563018799, + "loss_rtd": 0.31810882687568665, + "loss_sent": 0.15254102647304535, + "loss_sod": 0.04814954102039337, + "loss_total": 0.5187994241714478, + "step": 130099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.501764297485352, + "loss_rtd": 0.30586525797843933, + "loss_sent": 0.2929389178752899, + "loss_sod": 0.06635792553424835, + "loss_total": 0.6651620864868164, + "step": 130099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.82964026927948, + "learning_rate": 5.7084644353417415e-05, + "loss": 0.5585, + "step": 130100 + }, + { + "epoch": 0.000398, + "loss_gen": 4.515683174133301, + "loss_rtd": 0.2780984342098236, + "loss_sent": 9.406738536199555e-05, + "loss_sod": 0.10286936163902283, + "loss_total": 0.381061851978302, + "step": 130199 + }, + { + "epoch": 0.000398, + "loss_gen": 4.640872001647949, + "loss_rtd": 0.2829115688800812, + "loss_sent": 0.17435689270496368, + "loss_sod": 0.09970413148403168, + "loss_total": 0.5569725632667542, + "step": 130199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.9950740933418274, + "learning_rate": 5.705322983701692e-05, + "loss": 0.539, + "step": 130200 + }, + { + "epoch": 0.000598, + "loss_gen": 4.904420852661133, + "loss_rtd": 0.31934723258018494, + "loss_sent": 0.3927709460258484, + "loss_sod": 0.00935314130038023, + "loss_total": 0.7214713096618652, + "step": 130299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.066431045532227, + "loss_rtd": 0.30573365092277527, + "loss_sent": 0.29260292649269104, + "loss_sod": 0.09204436838626862, + "loss_total": 0.6903809309005737, + "step": 130299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.1517211198806763, + "learning_rate": 5.702181247957631e-05, + "loss": 0.5413, + "step": 130300 + }, + { + "epoch": 0.000798, + "loss_gen": 4.618826389312744, + "loss_rtd": 0.2726590633392334, + "loss_sent": 6.71393281663768e-05, + "loss_sod": 0.2165694385766983, + "loss_total": 0.48929563164711, + "step": 130399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.029799461364746, + "loss_rtd": 0.28384435176849365, + "loss_sent": 0.13905425369739532, + "loss_sod": 0.10686061531305313, + "loss_total": 0.5297592282295227, + "step": 130399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.1434992551803589, + "learning_rate": 5.699039229375045e-05, + "loss": 0.5423, + "step": 130400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.674434185028076, + "loss_rtd": 0.2828030586242676, + "loss_sent": 0.06495529413223267, + "loss_sod": 0.22607634961605072, + "loss_total": 0.5738347172737122, + "step": 130499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.008847236633301, + "loss_rtd": 0.25200924277305603, + "loss_sent": 0.006744662765413523, + "loss_sod": 0.1287979781627655, + "loss_total": 0.387551873922348, + "step": 130499 + }, + { + "epoch": 0.001, + "grad_norm": 1.069828748703003, + "learning_rate": 5.695896929219543e-05, + "loss": 0.5274, + "step": 130500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.1654534339904785, + "loss_rtd": 0.299507737159729, + "loss_sent": 0.1574164181947708, + "loss_sod": 0.035543106496334076, + "loss_total": 0.4924672842025757, + "step": 130599 + }, + { + "epoch": 0.001198, + "loss_gen": 4.607670783996582, + "loss_rtd": 0.27649277448654175, + "loss_sent": 0.078493133187294, + "loss_sod": 0.15773795545101166, + "loss_total": 0.5127238631248474, + "step": 130599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.8112003803253174, + "learning_rate": 5.6927543487568405e-05, + "loss": 0.5265, + "step": 130600 + }, + { + "epoch": 0.001398, + "loss_gen": 4.456787586212158, + "loss_rtd": 0.2700689733028412, + "loss_sent": 0.053266555070877075, + "loss_sod": 0.09510375559329987, + "loss_total": 0.4184392988681793, + "step": 130699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.192258358001709, + "loss_rtd": 0.2975078523159027, + "loss_sent": 0.12484312057495117, + "loss_sod": 0.02995981276035309, + "loss_total": 0.45231080055236816, + "step": 130699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.5776410102844238, + "learning_rate": 5.6896114892527694e-05, + "loss": 0.525, + "step": 130700 + }, + { + "epoch": 0.001598, + "loss_gen": 4.901659965515137, + "loss_rtd": 0.3314596712589264, + "loss_sent": 0.08643057942390442, + "loss_sod": 0.023916970938444138, + "loss_total": 0.44180721044540405, + "step": 130799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.883615493774414, + "loss_rtd": 0.2808056175708771, + "loss_sent": 0.25803086161613464, + "loss_sod": 0.09379947930574417, + "loss_total": 0.6326359510421753, + "step": 130799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.8937269449234009, + "learning_rate": 5.686468351973272e-05, + "loss": 0.5516, + "step": 130800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.328621864318848, + "loss_rtd": 0.296293169260025, + "loss_sent": 0.11390230059623718, + "loss_sod": 0.05793844163417816, + "loss_total": 0.4681338965892792, + "step": 130899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.119774341583252, + "loss_rtd": 0.2935149371623993, + "loss_sent": 0.26623860001564026, + "loss_sod": 0.06770818680524826, + "loss_total": 0.6274617314338684, + "step": 130899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.0362036228179932, + "learning_rate": 5.683324938184407e-05, + "loss": 0.536, + "step": 130900 + }, + { + "epoch": 0.001998, + "loss_gen": 4.447842597961426, + "loss_rtd": 0.2601456642150879, + "loss_sent": 0.0004762514145113528, + "loss_sod": 0.2477504014968872, + "loss_total": 0.5083723068237305, + "step": 130999 + }, + { + "epoch": 0.001998, + "loss_gen": 4.670206069946289, + "loss_rtd": 0.27382001280784607, + "loss_sent": 0.007198238279670477, + "loss_sod": 0.08487206697463989, + "loss_total": 0.3658903241157532, + "step": 130999 + }, + { + "epoch": 0.002, + "grad_norm": 0.7748968005180359, + "learning_rate": 5.680181249152337e-05, + "loss": 0.5309, + "step": 131000 + }, + { + "epoch": 0.002, + "eval_loss": 0.5239149928092957, + "eval_runtime": 155.5885, + "eval_samples_per_second": 99.255, + "eval_steps_per_second": 0.778, + "step": 131000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.072779178619385, + "loss_rtd": 0.2663709223270416, + "loss_sent": 0.04606501758098602, + "loss_sod": 0.12739084661006927, + "loss_total": 0.4398267865180969, + "step": 131099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.089369773864746, + "loss_rtd": 0.2644404172897339, + "loss_sent": 0.1319088339805603, + "loss_sod": 0.1043473482131958, + "loss_total": 0.50069659948349, + "step": 131099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.181709885597229, + "learning_rate": 5.6770372861433406e-05, + "loss": 0.5282, + "step": 131100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.151736259460449, + "loss_rtd": 0.2952464520931244, + "loss_sent": 0.1472969502210617, + "loss_sod": 0.06669507920742035, + "loss_total": 0.5092384815216064, + "step": 131199 + }, + { + "epoch": 0.002398, + "loss_gen": 4.665799140930176, + "loss_rtd": 0.28812384605407715, + "loss_sent": 0.05638657882809639, + "loss_sod": 0.09353906661272049, + "loss_total": 0.4380494952201843, + "step": 131199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.8514031171798706, + "learning_rate": 5.6738930504238065e-05, + "loss": 0.5335, + "step": 131200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.095651149749756, + "loss_rtd": 0.3047369718551636, + "loss_sent": 0.5476189851760864, + "loss_sod": 0.02074720710515976, + "loss_total": 0.873103141784668, + "step": 131299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.383840560913086, + "loss_rtd": 0.27004507184028625, + "loss_sent": 0.15658144652843475, + "loss_sod": 0.07216206192970276, + "loss_total": 0.49878859519958496, + "step": 131299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.7374162673950195, + "learning_rate": 5.670748543260232e-05, + "loss": 0.5438, + "step": 131300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.274921417236328, + "loss_rtd": 0.30671313405036926, + "loss_sent": 0.4435414969921112, + "loss_sod": 0.07780569046735764, + "loss_total": 0.8280603289604187, + "step": 131399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.974460601806641, + "loss_rtd": 0.2908535301685333, + "loss_sent": 0.2117234170436859, + "loss_sod": 0.12159785628318787, + "loss_total": 0.6241748332977295, + "step": 131399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.4120749235153198, + "learning_rate": 5.667603765919225e-05, + "loss": 0.5412, + "step": 131400 + }, + { + "epoch": 0.002998, + "loss_gen": 4.948692321777344, + "loss_rtd": 0.28530600666999817, + "loss_sent": 0.1581439971923828, + "loss_sod": 0.05736737698316574, + "loss_total": 0.5008174180984497, + "step": 131499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.769708633422852, + "loss_rtd": 0.29132771492004395, + "loss_sent": 0.121072918176651, + "loss_sod": 0.054681532084941864, + "loss_total": 0.4670821726322174, + "step": 131499 + }, + { + "epoch": 0.003, + "grad_norm": 0.7811002135276794, + "learning_rate": 5.6644587196675014e-05, + "loss": 0.5247, + "step": 131500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.725676536560059, + "loss_rtd": 0.2849738597869873, + "loss_sent": 0.0006301194080151618, + "loss_sod": 0.23249486088752747, + "loss_total": 0.5180988311767578, + "step": 131599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.686426162719727, + "loss_rtd": 0.2830125093460083, + "loss_sent": 0.0005858843796886504, + "loss_sod": 0.2647145092487335, + "loss_total": 0.5483129024505615, + "step": 131599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.3104661703109741, + "learning_rate": 5.661313405771884e-05, + "loss": 0.5301, + "step": 131600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.908972263336182, + "loss_rtd": 0.28524264693260193, + "loss_sent": 0.030228758230805397, + "loss_sod": 0.08076919615268707, + "loss_total": 0.39624062180519104, + "step": 131699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.779695987701416, + "loss_rtd": 0.2580753266811371, + "loss_sent": 0.07049515843391418, + "loss_sod": 0.08630006015300751, + "loss_total": 0.4148705303668976, + "step": 131699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.7225167155265808, + "learning_rate": 5.658167825499306e-05, + "loss": 0.543, + "step": 131700 + }, + { + "epoch": 0.003598, + "loss_gen": 4.8812055587768555, + "loss_rtd": 0.29372212290763855, + "loss_sent": 0.11324161291122437, + "loss_sod": 0.08304459601640701, + "loss_total": 0.49000832438468933, + "step": 131799 + }, + { + "epoch": 0.003598, + "loss_gen": 4.897563934326172, + "loss_rtd": 0.30420058965682983, + "loss_sent": 0.2974887788295746, + "loss_sod": 0.01579902321100235, + "loss_total": 0.6174883842468262, + "step": 131799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.8330687284469604, + "learning_rate": 5.655021980116808e-05, + "loss": 0.5481, + "step": 131800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.180691719055176, + "loss_rtd": 0.28755512833595276, + "loss_sent": 0.2041163593530655, + "loss_sod": 0.06895408034324646, + "loss_total": 0.5606255531311035, + "step": 131899 + }, + { + "epoch": 0.003798, + "loss_gen": 4.610910892486572, + "loss_rtd": 0.28476133942604065, + "loss_sent": 0.03329460322856903, + "loss_sod": 0.18745854496955872, + "loss_total": 0.5055145025253296, + "step": 131899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.9010452032089233, + "learning_rate": 5.651875870891533e-05, + "loss": 0.54, + "step": 131900 + }, + { + "epoch": 0.003998, + "loss_gen": 4.279195308685303, + "loss_rtd": 0.26342475414276123, + "loss_sent": 0.03198295086622238, + "loss_sod": 0.0630388855934143, + "loss_total": 0.3584465980529785, + "step": 131999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.077536582946777, + "loss_rtd": 0.313030481338501, + "loss_sent": 0.18453598022460938, + "loss_sod": 0.023974746465682983, + "loss_total": 0.5215412378311157, + "step": 131999 + }, + { + "epoch": 0.004, + "grad_norm": 0.7825824618339539, + "learning_rate": 5.648729499090737e-05, + "loss": 0.5538, + "step": 132000 + }, + { + "epoch": 0.004, + "eval_loss": 0.5140495300292969, + "eval_runtime": 152.2396, + "eval_samples_per_second": 101.439, + "eval_steps_per_second": 0.795, + "step": 132000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.530811786651611, + "loss_rtd": 0.2890695631504059, + "loss_sent": 0.07682177424430847, + "loss_sod": 0.11939701437950134, + "loss_total": 0.4852883517742157, + "step": 132099 + }, + { + "epoch": 0.004198, + "loss_gen": 4.3468499183654785, + "loss_rtd": 0.28686901926994324, + "loss_sent": 0.00015228970733005553, + "loss_sod": 0.12854860723018646, + "loss_total": 0.41556990146636963, + "step": 132099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.906028687953949, + "learning_rate": 5.645582865981773e-05, + "loss": 0.54, + "step": 132100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.194786548614502, + "loss_rtd": 0.29532912373542786, + "loss_sent": 0.11146141588687897, + "loss_sod": 0.007537001743912697, + "loss_total": 0.41432756185531616, + "step": 132199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.131966590881348, + "loss_rtd": 0.291231244802475, + "loss_sent": 0.11104708164930344, + "loss_sod": 0.06896242499351501, + "loss_total": 0.471240758895874, + "step": 132199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.3519681692123413, + "learning_rate": 5.642435972832112e-05, + "loss": 0.5176, + "step": 132200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.20922327041626, + "loss_rtd": 0.3060184121131897, + "loss_sent": 0.03654468432068825, + "loss_sod": 0.052750006318092346, + "loss_total": 0.3953130841255188, + "step": 132299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.11304235458374, + "loss_rtd": 0.30031052231788635, + "loss_sent": 0.05421074479818344, + "loss_sod": 0.013233036734163761, + "loss_total": 0.3677543103694916, + "step": 132299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.6765241622924805, + "learning_rate": 5.639288820909314e-05, + "loss": 0.548, + "step": 132300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.279114723205566, + "loss_rtd": 0.3012498915195465, + "loss_sent": 0.04160747677087784, + "loss_sod": 0.07582254707813263, + "loss_total": 0.4186799228191376, + "step": 132399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.80006217956543, + "loss_rtd": 0.2973664402961731, + "loss_sent": 0.17434334754943848, + "loss_sod": 0.04450562596321106, + "loss_total": 0.516215443611145, + "step": 132399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.5384371280670166, + "learning_rate": 5.636141411481058e-05, + "loss": 0.5242, + "step": 132400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.171237468719482, + "loss_rtd": 0.2945054769515991, + "loss_sent": 0.13472336530685425, + "loss_sod": 0.004508455283939838, + "loss_total": 0.43373730778694153, + "step": 132499 + }, + { + "epoch": 0.004998, + "loss_gen": 4.911259174346924, + "loss_rtd": 0.28879836201667786, + "loss_sent": 0.13032907247543335, + "loss_sod": 0.0037056375294923782, + "loss_total": 0.42283308506011963, + "step": 132499 + }, + { + "epoch": 0.005, + "grad_norm": 0.7335926294326782, + "learning_rate": 5.632993745815116e-05, + "loss": 0.5346, + "step": 132500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.9052534103393555, + "loss_rtd": 0.2912304103374481, + "loss_sent": 0.41629940271377563, + "loss_sod": 0.022761059924960136, + "loss_total": 0.7302908897399902, + "step": 132599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.133388519287109, + "loss_rtd": 0.31143835186958313, + "loss_sent": 0.08793631196022034, + "loss_sod": 0.057130102068185806, + "loss_total": 0.456504762172699, + "step": 132599 + }, + { + "epoch": 0.0052, + "grad_norm": 1.1287411451339722, + "learning_rate": 5.6298458251793705e-05, + "loss": 0.5566, + "step": 132600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.22165584564209, + "loss_rtd": 0.28310316801071167, + "loss_sent": 0.06454163044691086, + "loss_sod": 0.1360900104045868, + "loss_total": 0.4837348163127899, + "step": 132699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.478661060333252, + "loss_rtd": 0.309174507856369, + "loss_sent": 0.14244163036346436, + "loss_sod": 0.045050252228975296, + "loss_total": 0.49666640162467957, + "step": 132699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.8059908747673035, + "learning_rate": 5.626697650841801e-05, + "loss": 0.515, + "step": 132700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.115859508514404, + "loss_rtd": 0.2883886992931366, + "loss_sent": 0.14805959165096283, + "loss_sod": 0.13457569479942322, + "loss_total": 0.5710240006446838, + "step": 132799 + }, + { + "epoch": 0.005598, + "loss_gen": 4.049540042877197, + "loss_rtd": 0.2628515958786011, + "loss_sent": 4.867784446105361e-05, + "loss_sod": 0.2343326359987259, + "loss_total": 0.4972328841686249, + "step": 132799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.0146269798278809, + "learning_rate": 5.6235492240704936e-05, + "loss": 0.5348, + "step": 132800 + }, + { + "epoch": 0.005798, + "loss_gen": 4.973217010498047, + "loss_rtd": 0.2926579713821411, + "loss_sent": 0.596145749092102, + "loss_sod": 0.014550590887665749, + "loss_total": 0.903354287147522, + "step": 132899 + }, + { + "epoch": 0.005798, + "loss_gen": 4.936171054840088, + "loss_rtd": 0.3060975670814514, + "loss_sent": 0.19991983473300934, + "loss_sod": 0.02503911592066288, + "loss_total": 0.5310565233230591, + "step": 132899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.7769293785095215, + "learning_rate": 5.620400546133632e-05, + "loss": 0.5423, + "step": 132900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.438228607177734, + "loss_rtd": 0.28840088844299316, + "loss_sent": 0.49344602227211, + "loss_sod": 0.04564886912703514, + "loss_total": 0.827495813369751, + "step": 132999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.199347019195557, + "loss_rtd": 0.2940641939640045, + "loss_sent": 0.10124754905700684, + "loss_sod": 0.10038881003856659, + "loss_total": 0.49570053815841675, + "step": 132999 + }, + { + "epoch": 0.006, + "grad_norm": 1.180791974067688, + "learning_rate": 5.617251618299505e-05, + "loss": 0.5302, + "step": 133000 + }, + { + "epoch": 0.006, + "eval_loss": 0.517386257648468, + "eval_runtime": 153.7275, + "eval_samples_per_second": 100.457, + "eval_steps_per_second": 0.787, + "step": 133000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.261128902435303, + "loss_rtd": 0.2899162173271179, + "loss_sent": 0.34748998284339905, + "loss_sod": 0.11603307723999023, + "loss_total": 0.7534393072128296, + "step": 133099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.1016364097595215, + "loss_rtd": 0.30007854104042053, + "loss_sent": 0.24995823204517365, + "loss_sod": 0.09962132573127747, + "loss_total": 0.6496580839157104, + "step": 133099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.2698135375976562, + "learning_rate": 5.6141024418365e-05, + "loss": 0.5369, + "step": 133100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.548202037811279, + "loss_rtd": 0.2767568826675415, + "loss_sent": 0.014326936565339565, + "loss_sod": 0.09710343182086945, + "loss_total": 0.3881872296333313, + "step": 133199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.061511039733887, + "loss_rtd": 0.2927514910697937, + "loss_sent": 0.08371397107839584, + "loss_sod": 0.062351688742637634, + "loss_total": 0.43881717324256897, + "step": 133199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.90691077709198, + "learning_rate": 5.6109530180131054e-05, + "loss": 0.5452, + "step": 133200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.073202133178711, + "loss_rtd": 0.2805119454860687, + "loss_sent": 0.03842172771692276, + "loss_sod": 0.0018447530455887318, + "loss_total": 0.32077842950820923, + "step": 133299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.099011421203613, + "loss_rtd": 0.2953358292579651, + "loss_sent": 0.13252925872802734, + "loss_sod": 0.0037807973567396402, + "loss_total": 0.4316459000110626, + "step": 133299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.6164154410362244, + "learning_rate": 5.6078033480979085e-05, + "loss": 0.5346, + "step": 133300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.158677101135254, + "loss_rtd": 0.3126745820045471, + "loss_sent": 0.08241559565067291, + "loss_sod": 0.016511857509613037, + "loss_total": 0.4116020202636719, + "step": 133399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.331038475036621, + "loss_rtd": 0.29681864380836487, + "loss_sent": 0.2278798371553421, + "loss_sod": 0.07774224877357483, + "loss_total": 0.6024407148361206, + "step": 133399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.8526787161827087, + "learning_rate": 5.604653433359594e-05, + "loss": 0.5334, + "step": 133400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.004838466644287, + "loss_rtd": 0.2894722521305084, + "loss_sent": 0.10016779601573944, + "loss_sod": 0.04609420895576477, + "loss_total": 0.43573427200317383, + "step": 133499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.184755802154541, + "loss_rtd": 0.28799888491630554, + "loss_sent": 0.23058097064495087, + "loss_sod": 0.03503584489226341, + "loss_total": 0.5536156892776489, + "step": 133499 + }, + { + "epoch": 0.007, + "grad_norm": 0.6855185031890869, + "learning_rate": 5.6015032750669504e-05, + "loss": 0.5264, + "step": 133500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.565594673156738, + "loss_rtd": 0.2776607275009155, + "loss_sent": 0.011718451045453548, + "loss_sod": 0.08202856034040451, + "loss_total": 0.37140774726867676, + "step": 133599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.21798038482666, + "loss_rtd": 0.2986351549625397, + "loss_sent": 0.06739834696054459, + "loss_sod": 0.03009982593357563, + "loss_total": 0.39613333344459534, + "step": 133599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.8277300596237183, + "learning_rate": 5.598352874488858e-05, + "loss": 0.5169, + "step": 133600 + }, + { + "epoch": 0.007398, + "loss_gen": 4.988482475280762, + "loss_rtd": 0.309760719537735, + "loss_sent": 0.16191567480564117, + "loss_sod": 0.029046613723039627, + "loss_total": 0.5007230043411255, + "step": 133699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.192599296569824, + "loss_rtd": 0.2854505479335785, + "loss_sent": 0.3803899884223938, + "loss_sod": 0.03612706810235977, + "loss_total": 0.7019675970077515, + "step": 133699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.176679015159607, + "learning_rate": 5.595202232894301e-05, + "loss": 0.5316, + "step": 133700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.446816921234131, + "loss_rtd": 0.29135972261428833, + "loss_sent": 0.08156407624483109, + "loss_sod": 0.036502495408058167, + "loss_total": 0.4094262719154358, + "step": 133799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.273036003112793, + "loss_rtd": 0.2708379626274109, + "loss_sent": 0.21031464636325836, + "loss_sod": 0.027876533567905426, + "loss_total": 0.5090291500091553, + "step": 133799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.6264864206314087, + "learning_rate": 5.592051351552354e-05, + "loss": 0.5161, + "step": 133800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.071184158325195, + "loss_rtd": 0.2771450877189636, + "loss_sent": 0.5051693320274353, + "loss_sod": 0.029784109443426132, + "loss_total": 0.812098503112793, + "step": 133899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.102906227111816, + "loss_rtd": 0.29092371463775635, + "loss_sent": 0.18195125460624695, + "loss_sod": 0.026057599112391472, + "loss_total": 0.4989325702190399, + "step": 133899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.306749701499939, + "learning_rate": 5.588900231732196e-05, + "loss": 0.526, + "step": 133900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.075216293334961, + "loss_rtd": 0.30054181814193726, + "loss_sent": 0.3336167335510254, + "loss_sod": 0.06170884892344475, + "loss_total": 0.6958674192428589, + "step": 133999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.275862693786621, + "loss_rtd": 0.30010300874710083, + "loss_sent": 0.7424845695495605, + "loss_sod": 0.05289775878190994, + "loss_total": 1.0954853296279907, + "step": 133999 + }, + { + "epoch": 0.008, + "grad_norm": 2.0897834300994873, + "learning_rate": 5.585748874703093e-05, + "loss": 0.5305, + "step": 134000 + }, + { + "epoch": 0.008, + "eval_loss": 0.5119883418083191, + "eval_runtime": 152.2502, + "eval_samples_per_second": 101.432, + "eval_steps_per_second": 0.795, + "step": 134000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.6806745529174805, + "loss_rtd": 0.27665334939956665, + "loss_sent": 0.0004764352925121784, + "loss_sod": 0.2410624921321869, + "loss_total": 0.5181922316551208, + "step": 134099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.15795373916626, + "loss_rtd": 0.3039684593677521, + "loss_sent": 0.21698664128780365, + "loss_sod": 0.022792495787143707, + "loss_total": 0.54374760389328, + "step": 134099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.1554814577102661, + "learning_rate": 5.582597281734414e-05, + "loss": 0.5259, + "step": 134100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.32167911529541, + "loss_rtd": 0.29960358142852783, + "loss_sent": 0.10849422216415405, + "loss_sod": 0.0041342126205563545, + "loss_total": 0.41223201155662537, + "step": 134199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.886404991149902, + "loss_rtd": 0.2942427694797516, + "loss_sent": 0.2582089602947235, + "loss_sod": 0.08146588504314423, + "loss_total": 0.6339175701141357, + "step": 134199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.9550265669822693, + "learning_rate": 5.5794454540956186e-05, + "loss": 0.5458, + "step": 134200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.064291000366211, + "loss_rtd": 0.292093962430954, + "loss_sent": 0.19798637926578522, + "loss_sod": 0.03719881549477577, + "loss_total": 0.5272791385650635, + "step": 134299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.778872966766357, + "loss_rtd": 0.2855645716190338, + "loss_sent": 0.09939462691545486, + "loss_sod": 0.030383706092834473, + "loss_total": 0.41534289717674255, + "step": 134299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.9824146032333374, + "learning_rate": 5.5762933930562645e-05, + "loss": 0.5183, + "step": 134300 + }, + { + "epoch": 0.008798, + "loss_gen": 4.325845241546631, + "loss_rtd": 0.2759763300418854, + "loss_sent": 0.026905614882707596, + "loss_sod": 0.10722506046295166, + "loss_total": 0.4101070165634155, + "step": 134399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.273506164550781, + "loss_rtd": 0.2808290123939514, + "loss_sent": 0.12383317947387695, + "loss_sod": 0.05283088609576225, + "loss_total": 0.4574930667877197, + "step": 134399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.7307271361351013, + "learning_rate": 5.573141099886e-05, + "loss": 0.5355, + "step": 134400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.2135419845581055, + "loss_rtd": 0.3053134083747864, + "loss_sent": 0.5553669333457947, + "loss_sod": 0.020075494423508644, + "loss_total": 0.8807558417320251, + "step": 134499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.001331806182861, + "loss_rtd": 0.28902408480644226, + "loss_sent": 0.22344139218330383, + "loss_sod": 0.031435299664735794, + "loss_total": 0.5439007878303528, + "step": 134499 + }, + { + "epoch": 0.009, + "grad_norm": 1.4039077758789062, + "learning_rate": 5.56998857585457e-05, + "loss": 0.5438, + "step": 134500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.113071918487549, + "loss_rtd": 0.2908078730106354, + "loss_sent": 0.23563972115516663, + "loss_sod": 0.06931894272565842, + "loss_total": 0.595766544342041, + "step": 134599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.014859676361084, + "loss_rtd": 0.3115485608577728, + "loss_sent": 0.36756986379623413, + "loss_sod": 0.06761464476585388, + "loss_total": 0.7467330694198608, + "step": 134599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.3059592247009277, + "learning_rate": 5.5668358222318084e-05, + "loss": 0.5312, + "step": 134600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.183529376983643, + "loss_rtd": 0.29946181178092957, + "loss_sent": 0.2329072654247284, + "loss_sod": 0.05431223660707474, + "loss_total": 0.5866813063621521, + "step": 134699 + }, + { + "epoch": 0.009398, + "loss_gen": 4.47651481628418, + "loss_rtd": 0.2590901851654053, + "loss_sent": 0.05405307933688164, + "loss_sod": 0.17085470259189606, + "loss_total": 0.4839979410171509, + "step": 134699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.9354749917984009, + "learning_rate": 5.5636828402876475e-05, + "loss": 0.5426, + "step": 134700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.529237270355225, + "loss_rtd": 0.2927532494068146, + "loss_sent": 0.2400491088628769, + "loss_sod": 0.10767121613025665, + "loss_total": 0.6404736042022705, + "step": 134799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.23181676864624, + "loss_rtd": 0.2928999364376068, + "loss_sent": 0.33182835578918457, + "loss_sod": 0.013041868805885315, + "loss_total": 0.6377701759338379, + "step": 134799 + }, + { + "epoch": 0.0096, + "grad_norm": 1.2457292079925537, + "learning_rate": 5.560529631292104e-05, + "loss": 0.5289, + "step": 134800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.881642818450928, + "loss_rtd": 0.262358158826828, + "loss_sent": 0.04763232171535492, + "loss_sod": 0.12649855017662048, + "loss_total": 0.4364890456199646, + "step": 134899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.394132614135742, + "loss_rtd": 0.2918231189250946, + "loss_sent": 0.23514193296432495, + "loss_sod": 0.06371861696243286, + "loss_total": 0.5906836986541748, + "step": 134899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.9727721810340881, + "learning_rate": 5.557376196515294e-05, + "loss": 0.5272, + "step": 134900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.764476299285889, + "loss_rtd": 0.27719348669052124, + "loss_sent": 0.008931392803788185, + "loss_sod": 0.0570664182305336, + "loss_total": 0.3431912958621979, + "step": 134999 + }, + { + "epoch": 0.009998, + "loss_gen": 4.979287624359131, + "loss_rtd": 0.28074929118156433, + "loss_sent": 0.11842507869005203, + "loss_sod": 0.17452594637870789, + "loss_total": 0.5737003087997437, + "step": 134999 + }, + { + "epoch": 0.01, + "grad_norm": 1.1130653619766235, + "learning_rate": 5.554222537227417e-05, + "loss": 0.538, + "step": 135000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5137791037559509, + "eval_runtime": 152.4735, + "eval_samples_per_second": 101.283, + "eval_steps_per_second": 0.794, + "step": 135000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.055010795593262, + "loss_rtd": 0.30108219385147095, + "loss_sent": 0.23061391711235046, + "loss_sod": 0.05377934128046036, + "loss_total": 0.5854754447937012, + "step": 135099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.1281328201293945, + "loss_rtd": 0.290868878364563, + "loss_sent": 0.25150617957115173, + "loss_sod": 0.02241610735654831, + "loss_total": 0.564791202545166, + "step": 135099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.0362716913223267, + "learning_rate": 5.55106865469877e-05, + "loss": 0.5355, + "step": 135100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.109951496124268, + "loss_rtd": 0.29834866523742676, + "loss_sent": 0.37092530727386475, + "loss_sod": 0.15103842318058014, + "loss_total": 0.8203123807907104, + "step": 135199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.356719493865967, + "loss_rtd": 0.2927529811859131, + "loss_sent": 0.16856837272644043, + "loss_sod": 0.11834752559661865, + "loss_total": 0.5796688795089722, + "step": 135199 + }, + { + "epoch": 0.0104, + "grad_norm": 1.686990737915039, + "learning_rate": 5.5479145501997334e-05, + "loss": 0.5572, + "step": 135200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.151969909667969, + "loss_rtd": 0.2973788380622864, + "loss_sent": 0.2360147386789322, + "loss_sod": 0.05193222314119339, + "loss_total": 0.5853257775306702, + "step": 135299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.259129524230957, + "loss_rtd": 0.29523059725761414, + "loss_sent": 0.23155558109283447, + "loss_sod": 0.07976596057415009, + "loss_total": 0.6065521240234375, + "step": 135299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.8935631513595581, + "learning_rate": 5.544760225000781e-05, + "loss": 0.5363, + "step": 135300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.004709720611572, + "loss_rtd": 0.27916789054870605, + "loss_sent": 0.10970665514469147, + "loss_sod": 0.01602090150117874, + "loss_total": 0.4048954248428345, + "step": 135399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.230704307556152, + "loss_rtd": 0.2816089689731598, + "loss_sent": 0.24363799393177032, + "loss_sod": 0.073275126516819, + "loss_total": 0.5985220670700073, + "step": 135399 + }, + { + "epoch": 0.0108, + "grad_norm": 0.7221740484237671, + "learning_rate": 5.541605680372476e-05, + "loss": 0.5478, + "step": 135400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.016166687011719, + "loss_rtd": 0.27321386337280273, + "loss_sent": 0.4934834837913513, + "loss_sod": 0.1191985160112381, + "loss_total": 0.885895848274231, + "step": 135499 + }, + { + "epoch": 0.010998, + "loss_gen": 4.839963912963867, + "loss_rtd": 0.31342312693595886, + "loss_sent": 0.12136463075876236, + "loss_sod": 0.019670138135552406, + "loss_total": 0.4544578790664673, + "step": 135499 + }, + { + "epoch": 0.011, + "grad_norm": 1.7836360931396484, + "learning_rate": 5.538450917585467e-05, + "loss": 0.5274, + "step": 135500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.11860990524292, + "loss_rtd": 0.2798524498939514, + "loss_sent": 0.1161596029996872, + "loss_sod": 0.04325724393129349, + "loss_total": 0.4392693042755127, + "step": 135599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.1634297370910645, + "loss_rtd": 0.2881108522415161, + "loss_sent": 0.22788073122501373, + "loss_sod": 0.01384064368903637, + "loss_total": 0.5298322439193726, + "step": 135599 + }, + { + "epoch": 0.0112, + "grad_norm": 0.9651035070419312, + "learning_rate": 5.535295937910494e-05, + "loss": 0.5408, + "step": 135600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.181997776031494, + "loss_rtd": 0.3008062243461609, + "loss_sent": 0.15601599216461182, + "loss_sod": 0.05230824649333954, + "loss_total": 0.5091304779052734, + "step": 135699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.398614883422852, + "loss_rtd": 0.2960011065006256, + "loss_sent": 0.12754502892494202, + "loss_sod": 0.0128394216299057, + "loss_total": 0.43638554215431213, + "step": 135699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.894529402256012, + "learning_rate": 5.53214074261838e-05, + "loss": 0.5271, + "step": 135700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.05432653427124, + "loss_rtd": 0.2885904908180237, + "loss_sent": 0.1281057894229889, + "loss_sod": 0.03045843169093132, + "loss_total": 0.447154700756073, + "step": 135799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.534729957580566, + "loss_rtd": 0.2890322506427765, + "loss_sent": 0.23375825583934784, + "loss_sod": 0.13331487774848938, + "loss_total": 0.6561053991317749, + "step": 135799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.2520610094070435, + "learning_rate": 5.52898533298004e-05, + "loss": 0.5335, + "step": 135800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.442405700683594, + "loss_rtd": 0.29448550939559937, + "loss_sent": 0.288179874420166, + "loss_sod": 0.13137699663639069, + "loss_total": 0.7140423655509949, + "step": 135899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.789536476135254, + "loss_rtd": 0.27976828813552856, + "loss_sent": 0.07441139221191406, + "loss_sod": 0.014468375593423843, + "loss_total": 0.36864805221557617, + "step": 135899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.348859429359436, + "learning_rate": 5.5258297102664694e-05, + "loss": 0.5339, + "step": 135900 + }, + { + "epoch": 0.011998, + "loss_gen": 4.468254566192627, + "loss_rtd": 0.264314204454422, + "loss_sent": 5.786648398498073e-05, + "loss_sod": 0.08384271711111069, + "loss_total": 0.3482148051261902, + "step": 135999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.028106689453125, + "loss_rtd": 0.27646321058273315, + "loss_sent": 0.316119909286499, + "loss_sod": 0.012395642697811127, + "loss_total": 0.6049787998199463, + "step": 135999 + }, + { + "epoch": 0.012, + "grad_norm": 0.6943278312683105, + "learning_rate": 5.522673875748756e-05, + "loss": 0.5275, + "step": 136000 + }, + { + "epoch": 0.012, + "eval_loss": 0.5100157856941223, + "eval_runtime": 152.671, + "eval_samples_per_second": 101.152, + "eval_steps_per_second": 0.793, + "step": 136000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.106087684631348, + "loss_rtd": 0.30971595644950867, + "loss_sent": 0.3000084161758423, + "loss_sod": 0.06449112296104431, + "loss_total": 0.6742154955863953, + "step": 136099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.165160179138184, + "loss_rtd": 0.28918367624282837, + "loss_sent": 0.15298724174499512, + "loss_sod": 0.08421222865581512, + "loss_total": 0.5263831615447998, + "step": 136099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.0627872943878174, + "learning_rate": 5.519517830698067e-05, + "loss": 0.5292, + "step": 136100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.2184977531433105, + "loss_rtd": 0.2985919713973999, + "loss_sent": 0.3204294741153717, + "loss_sod": 0.10847426950931549, + "loss_total": 0.7274956703186035, + "step": 136199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.046943187713623, + "loss_rtd": 0.2754345238208771, + "loss_sent": 0.21202506124973297, + "loss_sod": 0.010480174794793129, + "loss_total": 0.4979397654533386, + "step": 136199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.6335351467132568, + "learning_rate": 5.516361576385658e-05, + "loss": 0.5363, + "step": 136200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.846960544586182, + "loss_rtd": 0.30323663353919983, + "loss_sent": 0.08127310127019882, + "loss_sod": 0.04320206493139267, + "loss_total": 0.4277118146419525, + "step": 136299 + }, + { + "epoch": 0.012598, + "loss_gen": 4.125184059143066, + "loss_rtd": 0.25880250334739685, + "loss_sent": 0.0005236545694060624, + "loss_sod": 0.07243509590625763, + "loss_total": 0.33176127076148987, + "step": 136299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.8810359835624695, + "learning_rate": 5.5132051140828666e-05, + "loss": 0.5305, + "step": 136300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.3787455558776855, + "loss_rtd": 0.29837551712989807, + "loss_sent": 0.20992742478847504, + "loss_sod": 0.1247561126947403, + "loss_total": 0.6330590844154358, + "step": 136399 + }, + { + "epoch": 0.012798, + "loss_gen": 4.606182098388672, + "loss_rtd": 0.27354809641838074, + "loss_sent": 0.01576041430234909, + "loss_sod": 0.06710290163755417, + "loss_total": 0.3564114272594452, + "step": 136399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.8069556355476379, + "learning_rate": 5.510048445061119e-05, + "loss": 0.5345, + "step": 136400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.970380783081055, + "loss_rtd": 0.29826441407203674, + "loss_sent": 0.4209631681442261, + "loss_sod": 0.02821575477719307, + "loss_total": 0.7474433183670044, + "step": 136499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.184389591217041, + "loss_rtd": 0.3037567138671875, + "loss_sent": 0.1017565131187439, + "loss_sod": 0.023996714502573013, + "loss_total": 0.4295099377632141, + "step": 136499 + }, + { + "epoch": 0.013, + "grad_norm": 0.882291853427887, + "learning_rate": 5.506891570591917e-05, + "loss": 0.5347, + "step": 136500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.901976585388184, + "loss_rtd": 0.2915496230125427, + "loss_sent": 0.3449662923812866, + "loss_sod": 0.017863880842924118, + "loss_total": 0.6543797850608826, + "step": 136599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.934019565582275, + "loss_rtd": 0.2803727090358734, + "loss_sent": 0.2724975645542145, + "loss_sod": 0.005678113549947739, + "loss_total": 0.5585483908653259, + "step": 136599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.5168174505233765, + "learning_rate": 5.503734491946852e-05, + "loss": 0.5495, + "step": 136600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.003833293914795, + "loss_rtd": 0.2940410375595093, + "loss_sent": 0.11700721830129623, + "loss_sod": 0.18890929222106934, + "loss_total": 0.5999575257301331, + "step": 136699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.236407279968262, + "loss_rtd": 0.299206018447876, + "loss_sent": 0.09691785275936127, + "loss_sod": 0.11297590285539627, + "loss_total": 0.5090997815132141, + "step": 136699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.0674422979354858, + "learning_rate": 5.500577210397593e-05, + "loss": 0.5175, + "step": 136700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.100795745849609, + "loss_rtd": 0.2777169942855835, + "loss_sent": 0.30468812584877014, + "loss_sod": 0.1105886697769165, + "loss_total": 0.6929937601089478, + "step": 136799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.732030868530273, + "loss_rtd": 0.2954294979572296, + "loss_sent": 0.12909935414791107, + "loss_sod": 0.16132435202598572, + "loss_total": 0.5858532190322876, + "step": 136799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.5608834028244019, + "learning_rate": 5.497419727215895e-05, + "loss": 0.5173, + "step": 136800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.082647800445557, + "loss_rtd": 0.2739904224872589, + "loss_sent": 0.280626118183136, + "loss_sod": 0.08271697163581848, + "loss_total": 0.6373335123062134, + "step": 136899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.200981616973877, + "loss_rtd": 0.2875075042247772, + "loss_sent": 0.3835589289665222, + "loss_sod": 0.0919804498553276, + "loss_total": 0.7630468606948853, + "step": 136899 + }, + { + "epoch": 0.0138, + "grad_norm": 2.6818933486938477, + "learning_rate": 5.494262043673588e-05, + "loss": 0.5245, + "step": 136900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.0394086837768555, + "loss_rtd": 0.28725799918174744, + "loss_sent": 0.21636411547660828, + "loss_sod": 0.0462038628757, + "loss_total": 0.5498259663581848, + "step": 136999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.304337024688721, + "loss_rtd": 0.280945360660553, + "loss_sent": 0.14623352885246277, + "loss_sod": 0.15014445781707764, + "loss_total": 0.577323317527771, + "step": 136999 + }, + { + "epoch": 0.014, + "grad_norm": 0.8700416684150696, + "learning_rate": 5.49110416104259e-05, + "loss": 0.534, + "step": 137000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5080940127372742, + "eval_runtime": 151.7199, + "eval_samples_per_second": 101.786, + "eval_steps_per_second": 0.798, + "step": 137000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.115293979644775, + "loss_rtd": 0.2914241552352905, + "loss_sent": 0.14293156564235687, + "loss_sod": 0.07873409986495972, + "loss_total": 0.5130898356437683, + "step": 137099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.320815563201904, + "loss_rtd": 0.27097514271736145, + "loss_sent": 0.10093007236719131, + "loss_sod": 0.05424465239048004, + "loss_total": 0.4261498749256134, + "step": 137099 + }, + { + "epoch": 0.0142, + "grad_norm": 0.7981420755386353, + "learning_rate": 5.487946080594895e-05, + "loss": 0.5209, + "step": 137100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.491896152496338, + "loss_rtd": 0.3019807040691376, + "loss_sent": 0.3259780704975128, + "loss_sod": 0.07002832740545273, + "loss_total": 0.6979870796203613, + "step": 137199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.126546382904053, + "loss_rtd": 0.2816140353679657, + "loss_sent": 0.3527289032936096, + "loss_sod": 0.030675236135721207, + "loss_total": 0.6650182008743286, + "step": 137199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.7934132814407349, + "learning_rate": 5.484787803602577e-05, + "loss": 0.5289, + "step": 137200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.0582146644592285, + "loss_rtd": 0.28518033027648926, + "loss_sent": 0.4254242777824402, + "loss_sod": 0.012909738346934319, + "loss_total": 0.7235143184661865, + "step": 137299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.125099182128906, + "loss_rtd": 0.29282599687576294, + "loss_sent": 0.30198314785957336, + "loss_sod": 0.01117524690926075, + "loss_total": 0.6059843897819519, + "step": 137299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.8578848838806152, + "learning_rate": 5.48162933133779e-05, + "loss": 0.5206, + "step": 137300 + }, + { + "epoch": 0.014798, + "loss_gen": 4.208960056304932, + "loss_rtd": 0.2615668475627899, + "loss_sent": 0.032868642359972, + "loss_sod": 0.06202565133571625, + "loss_total": 0.35646113753318787, + "step": 137399 + }, + { + "epoch": 0.014798, + "loss_gen": 4.981863498687744, + "loss_rtd": 0.3050662875175476, + "loss_sent": 0.29135698080062866, + "loss_sod": 0.0854504331946373, + "loss_total": 0.6818736791610718, + "step": 137399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.0216947793960571, + "learning_rate": 5.4784706650727655e-05, + "loss": 0.5247, + "step": 137400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.137484073638916, + "loss_rtd": 0.29767727851867676, + "loss_sent": 0.45579925179481506, + "loss_sod": 0.08855852484703064, + "loss_total": 0.8420350551605225, + "step": 137499 + }, + { + "epoch": 0.014998, + "loss_gen": 4.418776512145996, + "loss_rtd": 0.2826775014400482, + "loss_sent": 0.020374977961182594, + "loss_sod": 0.08248041570186615, + "loss_total": 0.3855328857898712, + "step": 137499 + }, + { + "epoch": 0.015, + "grad_norm": 1.8559634685516357, + "learning_rate": 5.4753118060798146e-05, + "loss": 0.5254, + "step": 137500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.250895977020264, + "loss_rtd": 0.2845058739185333, + "loss_sent": 0.13925084471702576, + "loss_sod": 0.07972454279661179, + "loss_total": 0.5034812688827515, + "step": 137599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.185181617736816, + "loss_rtd": 0.3077811002731323, + "loss_sent": 0.29663532972335815, + "loss_sod": 0.023668231442570686, + "loss_total": 0.628084659576416, + "step": 137599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.198548674583435, + "learning_rate": 5.4721527556313244e-05, + "loss": 0.5228, + "step": 137600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.097166538238525, + "loss_rtd": 0.2978009581565857, + "loss_sent": 0.17746710777282715, + "loss_sod": 0.03157547116279602, + "loss_total": 0.5068435668945312, + "step": 137699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.623559474945068, + "loss_rtd": 0.26392605900764465, + "loss_sent": 0.0842394232749939, + "loss_sod": 0.05295109003782272, + "loss_total": 0.40111657977104187, + "step": 137699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.7644197940826416, + "learning_rate": 5.468993514999762e-05, + "loss": 0.5229, + "step": 137700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.118028163909912, + "loss_rtd": 0.27068865299224854, + "loss_sent": 0.1393316686153412, + "loss_sod": 0.059510473161935806, + "loss_total": 0.46953079104423523, + "step": 137799 + }, + { + "epoch": 0.015598, + "loss_gen": 4.489533424377441, + "loss_rtd": 0.2679705321788788, + "loss_sent": 0.07441765069961548, + "loss_sod": 0.10028500854969025, + "loss_total": 0.4426732063293457, + "step": 137799 + }, + { + "epoch": 0.0156, + "grad_norm": 0.8717600703239441, + "learning_rate": 5.4658340854576664e-05, + "loss": 0.5224, + "step": 137800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.418225288391113, + "loss_rtd": 0.29547515511512756, + "loss_sent": 0.1982797086238861, + "loss_sod": 0.032213449478149414, + "loss_total": 0.5259683132171631, + "step": 137899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.145331382751465, + "loss_rtd": 0.30295661091804504, + "loss_sent": 0.3211835026741028, + "loss_sod": 0.07496554404497147, + "loss_total": 0.6991056203842163, + "step": 137899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.294331669807434, + "learning_rate": 5.462674468277658e-05, + "loss": 0.5173, + "step": 137900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.082634449005127, + "loss_rtd": 0.2707638144493103, + "loss_sent": 0.2992852032184601, + "loss_sod": 0.0839737057685852, + "loss_total": 0.6540226936340332, + "step": 137999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.871114253997803, + "loss_rtd": 0.3043176829814911, + "loss_sent": 0.1752648800611496, + "loss_sod": 0.10215941816568375, + "loss_total": 0.581741988658905, + "step": 137999 + }, + { + "epoch": 0.016, + "grad_norm": 1.2170419692993164, + "learning_rate": 5.459514664732428e-05, + "loss": 0.5068, + "step": 138000 + }, + { + "epoch": 0.016, + "eval_loss": 0.5059153437614441, + "eval_runtime": 151.8064, + "eval_samples_per_second": 101.728, + "eval_steps_per_second": 0.797, + "step": 138000 + }, + { + "epoch": 0.016198, + "loss_gen": 4.806975841522217, + "loss_rtd": 0.2750062346458435, + "loss_sent": 0.1520356386899948, + "loss_sod": 0.07864824682474136, + "loss_total": 0.5056900978088379, + "step": 138099 + }, + { + "epoch": 0.016198, + "loss_gen": 4.340170383453369, + "loss_rtd": 0.26763975620269775, + "loss_sent": 0.036351222544908524, + "loss_sod": 0.078841932117939, + "loss_total": 0.38283291459083557, + "step": 138099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.6640313267707825, + "learning_rate": 5.456354676094747e-05, + "loss": 0.5333, + "step": 138100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.181458473205566, + "loss_rtd": 0.281023234128952, + "loss_sent": 0.03813255950808525, + "loss_sod": 0.10235526412725449, + "loss_total": 0.42151105403900146, + "step": 138199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.608335494995117, + "loss_rtd": 0.2757279574871063, + "loss_sent": 0.0003415390965528786, + "loss_sod": 0.2963537573814392, + "loss_total": 0.5724232196807861, + "step": 138199 + }, + { + "epoch": 0.0164, + "grad_norm": 1.1935975551605225, + "learning_rate": 5.453194503637455e-05, + "loss": 0.5471, + "step": 138200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.142540454864502, + "loss_rtd": 0.2785303294658661, + "loss_sent": 0.29988670349121094, + "loss_sod": 0.01790069229900837, + "loss_total": 0.5963177680969238, + "step": 138299 + }, + { + "epoch": 0.016598, + "loss_gen": 4.787282466888428, + "loss_rtd": 0.3087652325630188, + "loss_sent": 0.17025905847549438, + "loss_sod": 0.020297439768910408, + "loss_total": 0.49932172894477844, + "step": 138299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.8735156655311584, + "learning_rate": 5.450034148633474e-05, + "loss": 0.536, + "step": 138300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.514640808105469, + "loss_rtd": 0.28863558173179626, + "loss_sent": 0.10861130058765411, + "loss_sod": 0.09328439831733704, + "loss_total": 0.4905312657356262, + "step": 138399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.685362815856934, + "loss_rtd": 0.2902231812477112, + "loss_sent": 0.10314501076936722, + "loss_sod": 0.06868387013673782, + "loss_total": 0.46205204725265503, + "step": 138399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.9048056602478027, + "learning_rate": 5.4468736123557896e-05, + "loss": 0.545, + "step": 138400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.267678260803223, + "loss_rtd": 0.2785603702068329, + "loss_sent": 0.3957210183143616, + "loss_sod": 0.024061761796474457, + "loss_total": 0.6983431577682495, + "step": 138499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.309103012084961, + "loss_rtd": 0.26775482296943665, + "loss_sent": 0.10505812615156174, + "loss_sod": 0.1075400561094284, + "loss_total": 0.4803529977798462, + "step": 138499 + }, + { + "epoch": 0.017, + "grad_norm": 1.1361006498336792, + "learning_rate": 5.44371289607747e-05, + "loss": 0.5317, + "step": 138500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.496198654174805, + "loss_rtd": 0.2915562391281128, + "loss_sent": 0.20493806898593903, + "loss_sod": 0.192259281873703, + "loss_total": 0.688753604888916, + "step": 138599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.233489990234375, + "loss_rtd": 0.2855234742164612, + "loss_sent": 0.176621675491333, + "loss_sod": 0.06855890899896622, + "loss_total": 0.5307040214538574, + "step": 138599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.181013822555542, + "learning_rate": 5.440552001071646e-05, + "loss": 0.52, + "step": 138600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.1925835609436035, + "loss_rtd": 0.28075963258743286, + "loss_sent": 0.009259273298084736, + "loss_sod": 0.31399455666542053, + "loss_total": 0.6040135025978088, + "step": 138699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.44363260269165, + "loss_rtd": 0.28415897488594055, + "loss_sent": 0.4156152307987213, + "loss_sod": 0.02586456388235092, + "loss_total": 0.725638747215271, + "step": 138699 + }, + { + "epoch": 0.0174, + "grad_norm": 2.010545015335083, + "learning_rate": 5.437390928611531e-05, + "loss": 0.5278, + "step": 138700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.633986949920654, + "loss_rtd": 0.29162952303886414, + "loss_sent": 0.48899412155151367, + "loss_sod": 0.0521540530025959, + "loss_total": 0.832777738571167, + "step": 138799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.244661331176758, + "loss_rtd": 0.2950066924095154, + "loss_sent": 0.17288966476917267, + "loss_sod": 0.044365983456373215, + "loss_total": 0.5122623443603516, + "step": 138799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.5652868747711182, + "learning_rate": 5.434229679970402e-05, + "loss": 0.5282, + "step": 138800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.057420253753662, + "loss_rtd": 0.27275577187538147, + "loss_sent": 0.0189326424151659, + "loss_sod": 0.04146324470639229, + "loss_total": 0.3331516683101654, + "step": 138899 + }, + { + "epoch": 0.017798, + "loss_gen": 4.323928356170654, + "loss_rtd": 0.26484811305999756, + "loss_sent": 0.0012342464178800583, + "loss_sod": 0.12477214634418488, + "loss_total": 0.39085447788238525, + "step": 138899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.6872423887252808, + "learning_rate": 5.4310682564216086e-05, + "loss": 0.5051, + "step": 138900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.4057297706604, + "loss_rtd": 0.30169445276260376, + "loss_sent": 0.13476087152957916, + "loss_sod": 0.06039680913090706, + "loss_total": 0.49685215950012207, + "step": 138999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.367425918579102, + "loss_rtd": 0.30054301023483276, + "loss_sent": 0.15720084309577942, + "loss_sod": 0.03683830425143242, + "loss_total": 0.4945821464061737, + "step": 138999 + }, + { + "epoch": 0.018, + "grad_norm": 0.7920910716056824, + "learning_rate": 5.427906659238574e-05, + "loss": 0.5208, + "step": 139000 + }, + { + "epoch": 0.018, + "eval_loss": 0.5083724856376648, + "eval_runtime": 151.9605, + "eval_samples_per_second": 101.625, + "eval_steps_per_second": 0.796, + "step": 139000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.02623176574707, + "loss_rtd": 0.28522375226020813, + "loss_sent": 0.1001211553812027, + "loss_sod": 0.10232888907194138, + "loss_total": 0.487673819065094, + "step": 139099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.390545845031738, + "loss_rtd": 0.2911186218261719, + "loss_sent": 0.14162448048591614, + "loss_sod": 0.1621837615966797, + "loss_total": 0.5949268341064453, + "step": 139099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.045466661453247, + "learning_rate": 5.424744889694787e-05, + "loss": 0.5213, + "step": 139100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.249063014984131, + "loss_rtd": 0.2696802318096161, + "loss_sent": 0.48203372955322266, + "loss_sod": 0.0916304886341095, + "loss_total": 0.8433444499969482, + "step": 139199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.152245044708252, + "loss_rtd": 0.28176751732826233, + "loss_sent": 0.18449372053146362, + "loss_sod": 0.0054007526487112045, + "loss_total": 0.4716619849205017, + "step": 139199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.0413336753845215, + "learning_rate": 5.4215829490638105e-05, + "loss": 0.5221, + "step": 139200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.478488922119141, + "loss_rtd": 0.2944413125514984, + "loss_sent": 0.020181145519018173, + "loss_sod": 0.059523675590753555, + "loss_total": 0.37414613366127014, + "step": 139299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.148072242736816, + "loss_rtd": 0.26982197165489197, + "loss_sent": 0.6545564532279968, + "loss_sod": 0.11251619458198547, + "loss_total": 1.0368945598602295, + "step": 139299 + }, + { + "epoch": 0.0186, + "grad_norm": 2.1318886280059814, + "learning_rate": 5.4184208386192716e-05, + "loss": 0.5216, + "step": 139300 + }, + { + "epoch": 0.018798, + "loss_gen": 4.851564884185791, + "loss_rtd": 0.30229687690734863, + "loss_sent": 0.2966586649417877, + "loss_sod": 0.0349770151078701, + "loss_total": 0.6339325904846191, + "step": 139399 + }, + { + "epoch": 0.018798, + "loss_gen": 4.35960578918457, + "loss_rtd": 0.2744034230709076, + "loss_sent": 0.050812479108572006, + "loss_sod": 0.18627747893333435, + "loss_total": 0.5114933848381042, + "step": 139399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.0778703689575195, + "learning_rate": 5.41525855963487e-05, + "loss": 0.5344, + "step": 139400 + }, + { + "epoch": 0.018998, + "loss_gen": 4.415232181549072, + "loss_rtd": 0.27290967106819153, + "loss_sent": 0.02048950456082821, + "loss_sod": 0.1491314023733139, + "loss_total": 0.4425305724143982, + "step": 139499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.161798477172852, + "loss_rtd": 0.281983882188797, + "loss_sent": 0.05633978173136711, + "loss_sod": 0.0970369279384613, + "loss_total": 0.4353605806827545, + "step": 139499 + }, + { + "epoch": 0.019, + "grad_norm": 1.0125378370285034, + "learning_rate": 5.412096113384371e-05, + "loss": 0.5273, + "step": 139500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.189088821411133, + "loss_rtd": 0.285104900598526, + "loss_sent": 0.08446572721004486, + "loss_sod": 0.033265918493270874, + "loss_total": 0.40283656120300293, + "step": 139599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.182796955108643, + "loss_rtd": 0.3001440167427063, + "loss_sent": 0.11930057406425476, + "loss_sod": 0.016159305348992348, + "loss_total": 0.43560388684272766, + "step": 139599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.0447872877120972, + "learning_rate": 5.408933501141605e-05, + "loss": 0.5174, + "step": 139600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.016360759735107, + "loss_rtd": 0.2864854335784912, + "loss_sent": 0.10789066553115845, + "loss_sod": 0.07900240272283554, + "loss_total": 0.4733785092830658, + "step": 139699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.363579273223877, + "loss_rtd": 0.28431788086891174, + "loss_sent": 0.10657691955566406, + "loss_sod": 0.04029475152492523, + "loss_total": 0.43118953704833984, + "step": 139699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.6977967023849487, + "learning_rate": 5.405770724180475e-05, + "loss": 0.5148, + "step": 139700 + }, + { + "epoch": 0.019598, + "loss_gen": 4.978874683380127, + "loss_rtd": 0.29177454113960266, + "loss_sent": 0.3232150077819824, + "loss_sod": 0.04943980276584625, + "loss_total": 0.6644293665885925, + "step": 139799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.377932071685791, + "loss_rtd": 0.28163814544677734, + "loss_sent": 0.09854451566934586, + "loss_sod": 0.0532626211643219, + "loss_total": 0.4334452748298645, + "step": 139799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.7485654354095459, + "learning_rate": 5.4026077837749444e-05, + "loss": 0.5252, + "step": 139800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.053930282592773, + "loss_rtd": 0.28623196482658386, + "loss_sent": 0.33429646492004395, + "loss_sod": 0.04297721013426781, + "loss_total": 0.6635056734085083, + "step": 139899 + }, + { + "epoch": 0.019798, + "loss_gen": 4.4671950340271, + "loss_rtd": 0.2724299132823944, + "loss_sent": 0.012071617878973484, + "loss_sod": 0.060311198234558105, + "loss_total": 0.3448127210140228, + "step": 139899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.485947608947754, + "learning_rate": 5.399444681199046e-05, + "loss": 0.5322, + "step": 139900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.439514636993408, + "loss_rtd": 0.29135048389434814, + "loss_sent": 0.29323235154151917, + "loss_sod": 0.03105130046606064, + "loss_total": 0.6156341433525085, + "step": 139999 + }, + { + "epoch": 0.019998, + "loss_gen": 4.866759300231934, + "loss_rtd": 0.279400110244751, + "loss_sent": 0.22192879021167755, + "loss_sod": 0.0416153222322464, + "loss_total": 0.5429441928863525, + "step": 139999 + }, + { + "epoch": 0.02, + "grad_norm": 0.8952061533927917, + "learning_rate": 5.3962814177268806e-05, + "loss": 0.522, + "step": 140000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5130712985992432, + "eval_runtime": 151.8239, + "eval_samples_per_second": 101.717, + "eval_steps_per_second": 0.797, + "step": 140000 + }, + { + "epoch": 0.020198, + "loss_gen": 4.963345527648926, + "loss_rtd": 0.30678701400756836, + "loss_sent": 0.10555349290370941, + "loss_sod": 0.03217800706624985, + "loss_total": 0.444518506526947, + "step": 140099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.195887565612793, + "loss_rtd": 0.2673446536064148, + "loss_sent": 0.20815463364124298, + "loss_sod": 0.07391124218702316, + "loss_total": 0.5494105219841003, + "step": 140099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.8452274799346924, + "learning_rate": 5.393117994632605e-05, + "loss": 0.5316, + "step": 140100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.17562198638916, + "loss_rtd": 0.3097587525844574, + "loss_sent": 0.06611491739749908, + "loss_sod": 0.020314838737249374, + "loss_total": 0.39618849754333496, + "step": 140199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.2723388671875, + "loss_rtd": 0.3024448752403259, + "loss_sent": 0.13708089292049408, + "loss_sod": 0.01651904545724392, + "loss_total": 0.4560447931289673, + "step": 140199 + }, + { + "epoch": 0.0204, + "grad_norm": 0.7367912530899048, + "learning_rate": 5.38995441319045e-05, + "loss": 0.5359, + "step": 140200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.102669715881348, + "loss_rtd": 0.29721760749816895, + "loss_sent": 0.14544659852981567, + "loss_sod": 0.054101817309856415, + "loss_total": 0.49676603078842163, + "step": 140299 + }, + { + "epoch": 0.020598, + "loss_gen": 4.7257609367370605, + "loss_rtd": 0.26518717408180237, + "loss_sent": 0.10037977993488312, + "loss_sod": 0.057052429765462875, + "loss_total": 0.42261940240859985, + "step": 140299 + }, + { + "epoch": 0.0206, + "grad_norm": 0.5840766429901123, + "learning_rate": 5.386790674674702e-05, + "loss": 0.5214, + "step": 140300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.420653343200684, + "loss_rtd": 0.3184576630592346, + "loss_sent": 0.0775059312582016, + "loss_sod": 0.08341328054666519, + "loss_total": 0.4793768525123596, + "step": 140399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.616556167602539, + "loss_rtd": 0.2992227077484131, + "loss_sent": 0.10827698558568954, + "loss_sod": 0.13227517902851105, + "loss_total": 0.5397748351097107, + "step": 140399 + }, + { + "epoch": 0.0208, + "grad_norm": 1.0775346755981445, + "learning_rate": 5.383626780359719e-05, + "loss": 0.5216, + "step": 140400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.062439441680908, + "loss_rtd": 0.2716071307659149, + "loss_sent": 0.29206931591033936, + "loss_sod": 0.018853861838579178, + "loss_total": 0.5825303196907043, + "step": 140499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.32375431060791, + "loss_rtd": 0.2656872868537903, + "loss_sent": 0.03729403764009476, + "loss_sod": 0.13978254795074463, + "loss_total": 0.4427638649940491, + "step": 140499 + }, + { + "epoch": 0.021, + "grad_norm": 1.158530592918396, + "learning_rate": 5.380462731519912e-05, + "loss": 0.5097, + "step": 140500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.209296226501465, + "loss_rtd": 0.29382750391960144, + "loss_sent": 0.13547280430793762, + "loss_sod": 0.028409291058778763, + "loss_total": 0.4577096104621887, + "step": 140599 + }, + { + "epoch": 0.021198, + "loss_gen": 4.403994083404541, + "loss_rtd": 0.27498820424079895, + "loss_sent": 3.983187343692407e-05, + "loss_sod": 0.309126079082489, + "loss_total": 0.5841541290283203, + "step": 140599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.4097546339035034, + "learning_rate": 5.377298529429764e-05, + "loss": 0.5369, + "step": 140600 + }, + { + "epoch": 0.021398, + "loss_gen": 4.614843368530273, + "loss_rtd": 0.2606056034564972, + "loss_sent": 7.508649287046865e-05, + "loss_sod": 0.20027443766593933, + "loss_total": 0.46095511317253113, + "step": 140699 + }, + { + "epoch": 0.021398, + "loss_gen": 4.698951721191406, + "loss_rtd": 0.27076229453086853, + "loss_sent": 0.00012203287042211741, + "loss_sod": 0.1711643636226654, + "loss_total": 0.44204869866371155, + "step": 140699 + }, + { + "epoch": 0.0214, + "grad_norm": 1.1054785251617432, + "learning_rate": 5.374134175363813e-05, + "loss": 0.5182, + "step": 140700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.098642826080322, + "loss_rtd": 0.28260645270347595, + "loss_sent": 0.2348274439573288, + "loss_sod": 0.16163408756256104, + "loss_total": 0.6790679693222046, + "step": 140799 + }, + { + "epoch": 0.021598, + "loss_gen": 4.961879253387451, + "loss_rtd": 0.30278342962265015, + "loss_sent": 0.1892790049314499, + "loss_sod": 0.02508535422384739, + "loss_total": 0.5171477794647217, + "step": 140799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.704318881034851, + "learning_rate": 5.370969670596662e-05, + "loss": 0.5224, + "step": 140800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.177754878997803, + "loss_rtd": 0.27701535820961, + "loss_sent": 0.27083972096443176, + "loss_sod": 0.07172597944736481, + "loss_total": 0.6195811033248901, + "step": 140899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.3604326248168945, + "loss_rtd": 0.28001242876052856, + "loss_sent": 0.3185557425022125, + "loss_sod": 0.07279413938522339, + "loss_total": 0.6713622808456421, + "step": 140899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.4917004108428955, + "learning_rate": 5.367805016402969e-05, + "loss": 0.5224, + "step": 140900 + }, + { + "epoch": 0.021998, + "loss_gen": 4.1361308097839355, + "loss_rtd": 0.25162193179130554, + "loss_sent": 3.872735032928176e-05, + "loss_sod": 0.10565103590488434, + "loss_total": 0.3573116958141327, + "step": 140999 + }, + { + "epoch": 0.021998, + "loss_gen": 4.701605319976807, + "loss_rtd": 0.2827128469944, + "loss_sent": 0.23710231482982635, + "loss_sod": 0.10039571672677994, + "loss_total": 0.6202108860015869, + "step": 140999 + }, + { + "epoch": 0.022, + "grad_norm": 1.0531132221221924, + "learning_rate": 5.3646402140574625e-05, + "loss": 0.5406, + "step": 141000 + }, + { + "epoch": 0.022, + "eval_loss": 0.5066352486610413, + "eval_runtime": 151.8364, + "eval_samples_per_second": 101.708, + "eval_steps_per_second": 0.797, + "step": 141000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.351595878601074, + "loss_rtd": 0.28690072894096375, + "loss_sent": 0.28563031554222107, + "loss_sod": 0.05796433985233307, + "loss_total": 0.6304954290390015, + "step": 141099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.359902858734131, + "loss_rtd": 0.2727966010570526, + "loss_sent": 0.22008396685123444, + "loss_sod": 0.04198246821761131, + "loss_total": 0.5348630547523499, + "step": 141099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.092651128768921, + "learning_rate": 5.361475264834921e-05, + "loss": 0.5367, + "step": 141100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.347242832183838, + "loss_rtd": 0.2868700623512268, + "loss_sent": 0.049733467400074005, + "loss_sod": 0.07246068865060806, + "loss_total": 0.4090642035007477, + "step": 141199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.100693702697754, + "loss_rtd": 0.27388474345207214, + "loss_sent": 0.26119932532310486, + "loss_sod": 0.03162490203976631, + "loss_total": 0.5667089819908142, + "step": 141199 + }, + { + "epoch": 0.0224, + "grad_norm": 0.895071268081665, + "learning_rate": 5.358310170010184e-05, + "loss": 0.5246, + "step": 141200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.4381794929504395, + "loss_rtd": 0.2834264039993286, + "loss_sent": 0.22384138405323029, + "loss_sod": 0.0389728844165802, + "loss_total": 0.5462406873703003, + "step": 141299 + }, + { + "epoch": 0.022598, + "loss_gen": 5.192421913146973, + "loss_rtd": 0.2665290832519531, + "loss_sent": 0.20966970920562744, + "loss_sod": 0.03119862824678421, + "loss_total": 0.5073974132537842, + "step": 141299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.4381169080734253, + "learning_rate": 5.355144930858156e-05, + "loss": 0.5288, + "step": 141300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.142797946929932, + "loss_rtd": 0.2882399260997772, + "loss_sent": 0.33860427141189575, + "loss_sod": 0.14554527401924133, + "loss_total": 0.7723894715309143, + "step": 141399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.220066070556641, + "loss_rtd": 0.27605000138282776, + "loss_sent": 0.37749025225639343, + "loss_sod": 0.12252858281135559, + "loss_total": 0.7760688066482544, + "step": 141399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.5305678844451904, + "learning_rate": 5.35197954865379e-05, + "loss": 0.5442, + "step": 141400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.572871208190918, + "loss_rtd": 0.29910731315612793, + "loss_sent": 0.12829171121120453, + "loss_sod": 0.07723744958639145, + "loss_total": 0.5046364665031433, + "step": 141499 + }, + { + "epoch": 0.022998, + "loss_gen": 4.925431251525879, + "loss_rtd": 0.2905297875404358, + "loss_sent": 0.3508687913417816, + "loss_sod": 0.09138332307338715, + "loss_total": 0.7327818870544434, + "step": 141499 + }, + { + "epoch": 0.023, + "grad_norm": 1.5656780004501343, + "learning_rate": 5.348814024672105e-05, + "loss": 0.5172, + "step": 141500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.2485809326171875, + "loss_rtd": 0.27613022923469543, + "loss_sent": 0.14428235590457916, + "loss_sod": 0.03354290872812271, + "loss_total": 0.4539554715156555, + "step": 141599 + }, + { + "epoch": 0.023198, + "loss_gen": 4.784273624420166, + "loss_rtd": 0.2983526885509491, + "loss_sent": 0.14410346746444702, + "loss_sod": 0.02310715615749359, + "loss_total": 0.4655632972717285, + "step": 141599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.1338571310043335, + "learning_rate": 5.345648360188172e-05, + "loss": 0.5308, + "step": 141600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.090795040130615, + "loss_rtd": 0.29210251569747925, + "loss_sent": 0.12591008841991425, + "loss_sod": 0.014865301549434662, + "loss_total": 0.43287789821624756, + "step": 141699 + }, + { + "epoch": 0.023398, + "loss_gen": 4.447715759277344, + "loss_rtd": 0.2845127284526825, + "loss_sent": 3.83231854357291e-05, + "loss_sod": 0.11685922741889954, + "loss_total": 0.4014102816581726, + "step": 141699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.6102616786956787, + "learning_rate": 5.342482556477122e-05, + "loss": 0.5348, + "step": 141700 + }, + { + "epoch": 0.023598, + "loss_gen": 4.8323283195495605, + "loss_rtd": 0.2786339521408081, + "loss_sent": 0.31370827555656433, + "loss_sod": 0.06373046338558197, + "loss_total": 0.656072735786438, + "step": 141799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.1615400314331055, + "loss_rtd": 0.2869878113269806, + "loss_sent": 0.140548974275589, + "loss_sod": 0.028873804956674576, + "loss_total": 0.45641058683395386, + "step": 141799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.0132590532302856, + "learning_rate": 5.339316614814138e-05, + "loss": 0.5241, + "step": 141800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.21695613861084, + "loss_rtd": 0.27958694100379944, + "loss_sent": 0.22142477333545685, + "loss_sod": 0.012841064482927322, + "loss_total": 0.5138527750968933, + "step": 141899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.146639823913574, + "loss_rtd": 0.2886291742324829, + "loss_sent": 0.0847259908914566, + "loss_sod": 0.155159592628479, + "loss_total": 0.5285147428512573, + "step": 141899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.0194185972213745, + "learning_rate": 5.336150536474463e-05, + "loss": 0.5183, + "step": 141900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.03857421875, + "loss_rtd": 0.3072211444377899, + "loss_sent": 0.19135643541812897, + "loss_sod": 0.011410113424062729, + "loss_total": 0.5099877119064331, + "step": 141999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.117791175842285, + "loss_rtd": 0.2907784879207611, + "loss_sent": 0.0756467804312706, + "loss_sod": 0.04117099940776825, + "loss_total": 0.40759626030921936, + "step": 141999 + }, + { + "epoch": 0.024, + "grad_norm": 0.7143073678016663, + "learning_rate": 5.332984322733392e-05, + "loss": 0.5268, + "step": 142000 + }, + { + "epoch": 0.024, + "eval_loss": 0.5095360279083252, + "eval_runtime": 151.8864, + "eval_samples_per_second": 101.675, + "eval_steps_per_second": 0.797, + "step": 142000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.314025402069092, + "loss_rtd": 0.2714063227176666, + "loss_sent": 0.17580746114253998, + "loss_sod": 0.07070260494947433, + "loss_total": 0.5179163813591003, + "step": 142099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.130003929138184, + "loss_rtd": 0.2887544631958008, + "loss_sent": 0.14470921456813812, + "loss_sod": 0.028869371861219406, + "loss_total": 0.4623330235481262, + "step": 142099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.853802502155304, + "learning_rate": 5.3298179748662756e-05, + "loss": 0.5407, + "step": 142100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.291382312774658, + "loss_rtd": 0.28734806180000305, + "loss_sent": 0.3452179729938507, + "loss_sod": 0.1100362166762352, + "loss_total": 0.7426022291183472, + "step": 142199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.413672924041748, + "loss_rtd": 0.2905466854572296, + "loss_sent": 0.39333876967430115, + "loss_sod": 0.06980624794960022, + "loss_total": 0.7536916732788086, + "step": 142199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.6220515966415405, + "learning_rate": 5.326651494148518e-05, + "loss": 0.5452, + "step": 142200 + }, + { + "epoch": 0.024598, + "loss_gen": 4.5579915046691895, + "loss_rtd": 0.2797586917877197, + "loss_sent": 0.05823824927210808, + "loss_sod": 0.05107353627681732, + "loss_total": 0.3890704810619354, + "step": 142299 + }, + { + "epoch": 0.024598, + "loss_gen": 4.982907295227051, + "loss_rtd": 0.2790917456150055, + "loss_sent": 0.30364033579826355, + "loss_sod": 0.06816454976797104, + "loss_total": 0.6508966684341431, + "step": 142299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.0212825536727905, + "learning_rate": 5.323484881855578e-05, + "loss": 0.5266, + "step": 142300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.212350845336914, + "loss_rtd": 0.29858461022377014, + "loss_sent": 0.3669554889202118, + "loss_sod": 0.06416751444339752, + "loss_total": 0.7297075986862183, + "step": 142399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.119755744934082, + "loss_rtd": 0.2848243713378906, + "loss_sent": 0.2943706214427948, + "loss_sod": 0.00534034613519907, + "loss_total": 0.5845353603363037, + "step": 142399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.4153069257736206, + "learning_rate": 5.3203181392629655e-05, + "loss": 0.5271, + "step": 142400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.111626625061035, + "loss_rtd": 0.28250032663345337, + "loss_sent": 0.06871972978115082, + "loss_sod": 0.03493531793355942, + "loss_total": 0.386155366897583, + "step": 142499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.229166507720947, + "loss_rtd": 0.2831133306026459, + "loss_sent": 0.14898912608623505, + "loss_sod": 0.037110161036252975, + "loss_total": 0.4692125916481018, + "step": 142499 + }, + { + "epoch": 0.025, + "grad_norm": 0.8726935386657715, + "learning_rate": 5.317151267646246e-05, + "loss": 0.5309, + "step": 142500 + }, + { + "epoch": 0.025198, + "loss_gen": 4.964182376861572, + "loss_rtd": 0.2866588830947876, + "loss_sent": 0.14459633827209473, + "loss_sod": 0.04257828742265701, + "loss_total": 0.47383350133895874, + "step": 142599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.328502178192139, + "loss_rtd": 0.2869490087032318, + "loss_sent": 0.24192702770233154, + "loss_sod": 0.09247876703739166, + "loss_total": 0.6213548183441162, + "step": 142599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.888357400894165, + "learning_rate": 5.3139842682810325e-05, + "loss": 0.5268, + "step": 142600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.31513786315918, + "loss_rtd": 0.30803197622299194, + "loss_sent": 0.24215351045131683, + "loss_sod": 0.17132531106472015, + "loss_total": 0.7215108275413513, + "step": 142699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.110383033752441, + "loss_rtd": 0.3043190836906433, + "loss_sent": 0.29990580677986145, + "loss_sod": 0.02735138311982155, + "loss_total": 0.6315762996673584, + "step": 142699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.414551854133606, + "learning_rate": 5.310817142442994e-05, + "loss": 0.5275, + "step": 142700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.0857696533203125, + "loss_rtd": 0.2900143563747406, + "loss_sent": 0.13079498708248138, + "loss_sod": 0.03388945758342743, + "loss_total": 0.4546988010406494, + "step": 142799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.201300144195557, + "loss_rtd": 0.28961318731307983, + "loss_sent": 0.17543339729309082, + "loss_sod": 0.09287995100021362, + "loss_total": 0.5579265356063843, + "step": 142799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.7257480025291443, + "learning_rate": 5.3076498914078485e-05, + "loss": 0.526, + "step": 142800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.191403865814209, + "loss_rtd": 0.27923136949539185, + "loss_sent": 0.19919177889823914, + "loss_sod": 0.06867915391921997, + "loss_total": 0.5471023321151733, + "step": 142899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.063490390777588, + "loss_rtd": 0.2899428904056549, + "loss_sent": 0.23567284643650055, + "loss_sod": 0.0096047418192029, + "loss_total": 0.5352205038070679, + "step": 142899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.9004678130149841, + "learning_rate": 5.3044825164513625e-05, + "loss": 0.5357, + "step": 142900 + }, + { + "epoch": 0.025998, + "loss_gen": 4.831913948059082, + "loss_rtd": 0.2710915803909302, + "loss_sent": 0.005070207174867392, + "loss_sod": 0.19541436433792114, + "loss_total": 0.47157615423202515, + "step": 142999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.439896583557129, + "loss_rtd": 0.2875880002975464, + "loss_sent": 0.2070181518793106, + "loss_sod": 0.15899549424648285, + "loss_total": 0.6536016464233398, + "step": 142999 + }, + { + "epoch": 0.026, + "grad_norm": 1.2820786237716675, + "learning_rate": 5.3013150188493554e-05, + "loss": 0.5319, + "step": 143000 + }, + { + "epoch": 0.026, + "eval_loss": 0.5012403726577759, + "eval_runtime": 151.7563, + "eval_samples_per_second": 101.762, + "eval_steps_per_second": 0.797, + "step": 143000 + }, + { + "epoch": 0.000198, + "loss_gen": 4.66406774520874, + "loss_rtd": 0.2746325731277466, + "loss_sent": 0.03527475520968437, + "loss_sod": 0.10925401747226715, + "loss_total": 0.419161319732666, + "step": 143099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.168968200683594, + "loss_rtd": 0.3047997057437897, + "loss_sent": 0.1341041475534439, + "loss_sod": 0.022689972072839737, + "loss_total": 0.4615938067436218, + "step": 143099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.9890168905258179, + "learning_rate": 5.298147399877694e-05, + "loss": 0.5203, + "step": 143100 + }, + { + "epoch": 0.000398, + "loss_gen": 6.0309739112854, + "loss_rtd": 0.28600800037384033, + "loss_sent": 0.07655972987413406, + "loss_sod": 0.12908899784088135, + "loss_total": 0.49165672063827515, + "step": 143199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.226163864135742, + "loss_rtd": 0.2816427946090698, + "loss_sent": 0.271378755569458, + "loss_sod": 0.08493401110172272, + "loss_total": 0.6379555463790894, + "step": 143199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.8018764853477478, + "learning_rate": 5.294979660812298e-05, + "loss": 0.5279, + "step": 143200 + }, + { + "epoch": 0.000598, + "loss_gen": 4.230194568634033, + "loss_rtd": 0.2597436010837555, + "loss_sent": 4.242077920935117e-05, + "loss_sod": 0.18114471435546875, + "loss_total": 0.44093072414398193, + "step": 143299 + }, + { + "epoch": 0.000598, + "loss_gen": 4.6562113761901855, + "loss_rtd": 0.29104700684547424, + "loss_sent": 0.13502706587314606, + "loss_sod": 0.06349501013755798, + "loss_total": 0.4895690679550171, + "step": 143299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.9213275909423828, + "learning_rate": 5.2918118029291273e-05, + "loss": 0.5265, + "step": 143300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.273587226867676, + "loss_rtd": 0.28636878728866577, + "loss_sent": 0.2882729172706604, + "loss_sod": 0.051931463181972504, + "loss_total": 0.6265732049942017, + "step": 143399 + }, + { + "epoch": 0.000798, + "loss_gen": 4.987724304199219, + "loss_rtd": 0.28506967425346375, + "loss_sent": 0.15950746834278107, + "loss_sod": 0.05867493897676468, + "loss_total": 0.5032520890235901, + "step": 143399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.0150002241134644, + "learning_rate": 5.288643827504199e-05, + "loss": 0.5162, + "step": 143400 + }, + { + "epoch": 0.000998, + "loss_gen": 4.251161098480225, + "loss_rtd": 0.26213178038597107, + "loss_sent": 0.001973393140360713, + "loss_sod": 0.09073591232299805, + "loss_total": 0.35484108328819275, + "step": 143499 + }, + { + "epoch": 0.000998, + "loss_gen": 4.917688369750977, + "loss_rtd": 0.2969304919242859, + "loss_sent": 0.5181108117103577, + "loss_sod": 0.001427557785063982, + "loss_total": 0.8164688348770142, + "step": 143499 + }, + { + "epoch": 0.001, + "grad_norm": 1.1918784379959106, + "learning_rate": 5.28547573581357e-05, + "loss": 0.536, + "step": 143500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.211683750152588, + "loss_rtd": 0.27299249172210693, + "loss_sent": 0.1254974752664566, + "loss_sod": 0.030453510582447052, + "loss_total": 0.4289434552192688, + "step": 143599 + }, + { + "epoch": 0.001198, + "loss_gen": 4.957193374633789, + "loss_rtd": 0.2765406668186188, + "loss_sent": 0.2259223908185959, + "loss_sod": 0.009444572031497955, + "loss_total": 0.5119076371192932, + "step": 143599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.7084022164344788, + "learning_rate": 5.28230752913335e-05, + "loss": 0.5381, + "step": 143600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.326329708099365, + "loss_rtd": 0.28667306900024414, + "loss_sent": 0.37185096740722656, + "loss_sod": 0.055304475128650665, + "loss_total": 0.7138285040855408, + "step": 143699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.471609115600586, + "loss_rtd": 0.2862567901611328, + "loss_sent": 0.32228535413742065, + "loss_sod": 0.034261059015989304, + "loss_total": 0.6428031921386719, + "step": 143699 + }, + { + "epoch": 0.0014, + "grad_norm": 1.2625130414962769, + "learning_rate": 5.2791392087396916e-05, + "loss": 0.5404, + "step": 143700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.279477596282959, + "loss_rtd": 0.28712037205696106, + "loss_sent": 0.07828990370035172, + "loss_sod": 0.04689822718501091, + "loss_total": 0.4123084843158722, + "step": 143799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.951378345489502, + "loss_rtd": 0.28546950221061707, + "loss_sent": 0.2018580287694931, + "loss_sod": 0.031223274767398834, + "loss_total": 0.5185508131980896, + "step": 143799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.14565908908844, + "learning_rate": 5.275970775908793e-05, + "loss": 0.5412, + "step": 143800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.2541632652282715, + "loss_rtd": 0.2872055768966675, + "loss_sent": 0.0985424593091011, + "loss_sod": 0.09489560127258301, + "loss_total": 0.480643630027771, + "step": 143899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.1686882972717285, + "loss_rtd": 0.28436461091041565, + "loss_sent": 0.2751750648021698, + "loss_sod": 0.08275718986988068, + "loss_total": 0.6422969102859497, + "step": 143899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.7285401821136475, + "learning_rate": 5.272802231916897e-05, + "loss": 0.533, + "step": 143900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.279683589935303, + "loss_rtd": 0.28544238209724426, + "loss_sent": 0.32155296206474304, + "loss_sod": 0.1077994555234909, + "loss_total": 0.7147947549819946, + "step": 143999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.334860324859619, + "loss_rtd": 0.2843003273010254, + "loss_sent": 0.25391697883605957, + "loss_sod": 0.06681793183088303, + "loss_total": 0.6050352454185486, + "step": 143999 + }, + { + "epoch": 0.002, + "grad_norm": 1.001358151435852, + "learning_rate": 5.269633578040296e-05, + "loss": 0.5315, + "step": 144000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4986079931259155, + "eval_runtime": 151.7633, + "eval_samples_per_second": 101.757, + "eval_steps_per_second": 0.797, + "step": 144000 + }, + { + "epoch": 0.002198, + "loss_gen": 4.742333889007568, + "loss_rtd": 0.27811941504478455, + "loss_sent": 0.0001277799456147477, + "loss_sod": 0.28573179244995117, + "loss_total": 0.5639790296554565, + "step": 144099 + }, + { + "epoch": 0.002198, + "loss_gen": 4.326420307159424, + "loss_rtd": 0.2698899507522583, + "loss_sent": 0.007454150356352329, + "loss_sod": 0.05084558576345444, + "loss_total": 0.3281897008419037, + "step": 144099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.4421451091766357, + "learning_rate": 5.266464815555322e-05, + "loss": 0.5063, + "step": 144100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.360074043273926, + "loss_rtd": 0.25610458850860596, + "loss_sent": 0.14470449090003967, + "loss_sod": 0.13214565813541412, + "loss_total": 0.5329546928405762, + "step": 144199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.312962532043457, + "loss_rtd": 0.27623429894447327, + "loss_sent": 0.12307702749967575, + "loss_sod": 0.06466057151556015, + "loss_total": 0.46397191286087036, + "step": 144199 + }, + { + "epoch": 0.0024, + "grad_norm": 2.330955982208252, + "learning_rate": 5.2632959457383524e-05, + "loss": 0.5111, + "step": 144200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.245543479919434, + "loss_rtd": 0.27186232805252075, + "loss_sent": 0.1922900378704071, + "loss_sod": 0.019116532057523727, + "loss_total": 0.4832688868045807, + "step": 144299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.100775241851807, + "loss_rtd": 0.26578307151794434, + "loss_sent": 0.14884836971759796, + "loss_sod": 0.06197121739387512, + "loss_total": 0.4766026735305786, + "step": 144299 + }, + { + "epoch": 0.0026, + "grad_norm": 0.8121767044067383, + "learning_rate": 5.260126969865806e-05, + "loss": 0.5047, + "step": 144300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.928552627563477, + "loss_rtd": 0.29740992188453674, + "loss_sent": 0.06796685606241226, + "loss_sod": 0.013185365125536919, + "loss_total": 0.3785621225833893, + "step": 144399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.756792068481445, + "loss_rtd": 0.2940201461315155, + "loss_sent": 0.12159086763858795, + "loss_sod": 0.11485806107521057, + "loss_total": 0.5304690599441528, + "step": 144399 + }, + { + "epoch": 0.0028, + "grad_norm": 0.7329797744750977, + "learning_rate": 5.256957889214149e-05, + "loss": 0.5407, + "step": 144400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.3688836097717285, + "loss_rtd": 0.27353352308273315, + "loss_sent": 0.20044541358947754, + "loss_sod": 0.11551567912101746, + "loss_total": 0.5894945859909058, + "step": 144499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.121417045593262, + "loss_rtd": 0.30285027623176575, + "loss_sent": 0.17902149260044098, + "loss_sod": 0.04204069823026657, + "loss_total": 0.5239124894142151, + "step": 144499 + }, + { + "epoch": 0.003, + "grad_norm": 1.3801915645599365, + "learning_rate": 5.2537887050598836e-05, + "loss": 0.5248, + "step": 144500 + }, + { + "epoch": 0.003198, + "loss_gen": 4.300551891326904, + "loss_rtd": 0.26581066846847534, + "loss_sent": 0.06156810745596886, + "loss_sod": 0.10824832320213318, + "loss_total": 0.4356271028518677, + "step": 144599 + }, + { + "epoch": 0.003198, + "loss_gen": 4.990353107452393, + "loss_rtd": 0.2805595099925995, + "loss_sent": 0.2781326472759247, + "loss_sod": 0.01689683087170124, + "loss_total": 0.5755890011787415, + "step": 144599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.461370825767517, + "learning_rate": 5.2506194186795585e-05, + "loss": 0.5333, + "step": 144600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.402153015136719, + "loss_rtd": 0.29907524585723877, + "loss_sent": 0.2757262885570526, + "loss_sod": 0.05807660520076752, + "loss_total": 0.6328781843185425, + "step": 144699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.253642559051514, + "loss_rtd": 0.3036414682865143, + "loss_sent": 0.23505249619483948, + "loss_sod": 0.02995115891098976, + "loss_total": 0.5686451196670532, + "step": 144699 + }, + { + "epoch": 0.0034, + "grad_norm": 2.0132596492767334, + "learning_rate": 5.247450031349761e-05, + "loss": 0.5165, + "step": 144700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.567648410797119, + "loss_rtd": 0.2975943088531494, + "loss_sent": 0.12214425206184387, + "loss_sod": 0.04818717762827873, + "loss_total": 0.4679257273674011, + "step": 144799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.476112365722656, + "loss_rtd": 0.2752467691898346, + "loss_sent": 0.2662510573863983, + "loss_sod": 0.08222628384828568, + "loss_total": 0.623724102973938, + "step": 144799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.566232442855835, + "learning_rate": 5.244280544347122e-05, + "loss": 0.5208, + "step": 144800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.422399044036865, + "loss_rtd": 0.2777884006500244, + "loss_sent": 0.30881041288375854, + "loss_sod": 0.06714334338903427, + "loss_total": 0.6537421941757202, + "step": 144899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.092738628387451, + "loss_rtd": 0.27688854932785034, + "loss_sent": 0.09166580438613892, + "loss_sod": 0.020468203350901604, + "loss_total": 0.389022558927536, + "step": 144899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.0747374296188354, + "learning_rate": 5.2411109589483074e-05, + "loss": 0.5349, + "step": 144900 + }, + { + "epoch": 0.003998, + "loss_gen": 4.289102554321289, + "loss_rtd": 0.25185874104499817, + "loss_sent": 0.030722348019480705, + "loss_sod": 0.13470537960529327, + "loss_total": 0.4172864854335785, + "step": 144999 + }, + { + "epoch": 0.003998, + "loss_gen": 4.872633457183838, + "loss_rtd": 0.28845635056495667, + "loss_sent": 0.23248308897018433, + "loss_sod": 0.010248836129903793, + "loss_total": 0.5311882495880127, + "step": 144999 + }, + { + "epoch": 0.004, + "grad_norm": 0.8651681542396545, + "learning_rate": 5.2379412764300286e-05, + "loss": 0.5108, + "step": 145000 + }, + { + "epoch": 0.004, + "eval_loss": 0.49849000573158264, + "eval_runtime": 150.0413, + "eval_samples_per_second": 102.925, + "eval_steps_per_second": 0.806, + "step": 145000 + }, + { + "epoch": 0.004198, + "loss_gen": 4.525643825531006, + "loss_rtd": 0.26510900259017944, + "loss_sent": 4.2476720409467816e-05, + "loss_sod": 0.16975006461143494, + "loss_total": 0.43490153551101685, + "step": 145099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.021480560302734, + "loss_rtd": 0.27532893419265747, + "loss_sent": 0.048536594957113266, + "loss_sod": 0.14325806498527527, + "loss_total": 0.4671235978603363, + "step": 145099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2153644561767578, + "learning_rate": 5.234771498069032e-05, + "loss": 0.5306, + "step": 145100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.092408657073975, + "loss_rtd": 0.29199859499931335, + "loss_sent": 0.1541261076927185, + "loss_sod": 0.02647477388381958, + "loss_total": 0.47259947657585144, + "step": 145199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.244051456451416, + "loss_rtd": 0.2970103621482849, + "loss_sent": 0.3261437714099884, + "loss_sod": 0.07276580482721329, + "loss_total": 0.695919930934906, + "step": 145199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.7709296941757202, + "learning_rate": 5.231601625142103e-05, + "loss": 0.5116, + "step": 145200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.031761169433594, + "loss_rtd": 0.30165672302246094, + "loss_sent": 0.059283364564180374, + "loss_sod": 0.00943625159561634, + "loss_total": 0.3703763484954834, + "step": 145299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.8102126121521, + "loss_rtd": 0.28832292556762695, + "loss_sent": 0.1924618035554886, + "loss_sod": 0.12810344994068146, + "loss_total": 0.6088881492614746, + "step": 145299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.8893154859542847, + "learning_rate": 5.228431658926068e-05, + "loss": 0.5195, + "step": 145300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.59763240814209, + "loss_rtd": 0.29531168937683105, + "loss_sent": 0.11734317243099213, + "loss_sod": 0.024245794862508774, + "loss_total": 0.43690067529678345, + "step": 145399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.305819034576416, + "loss_rtd": 0.2831506133079529, + "loss_sent": 0.14053313434123993, + "loss_sod": 0.0076009538024663925, + "loss_total": 0.43128472566604614, + "step": 145399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.1131749153137207, + "learning_rate": 5.225261600697787e-05, + "loss": 0.5084, + "step": 145400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.338892936706543, + "loss_rtd": 0.2787962555885315, + "loss_sent": 0.10962951928377151, + "loss_sod": 0.028091823682188988, + "loss_total": 0.41651758551597595, + "step": 145499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.347107887268066, + "loss_rtd": 0.2881333529949188, + "loss_sent": 0.2519276738166809, + "loss_sod": 0.02762795239686966, + "loss_total": 0.5676889419555664, + "step": 145499 + }, + { + "epoch": 0.005, + "grad_norm": 0.8285014033317566, + "learning_rate": 5.2220914517341614e-05, + "loss": 0.5065, + "step": 145500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.96449613571167, + "loss_rtd": 0.28018784523010254, + "loss_sent": 0.12971442937850952, + "loss_sod": 0.010564430616796017, + "loss_total": 0.42046669125556946, + "step": 145599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.47280216217041, + "loss_rtd": 0.2938080430030823, + "loss_sent": 0.18062248826026917, + "loss_sod": 0.1176159530878067, + "loss_total": 0.5920464992523193, + "step": 145599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.9586519002914429, + "learning_rate": 5.218921213312125e-05, + "loss": 0.5203, + "step": 145600 + }, + { + "epoch": 0.005398, + "loss_gen": 4.765594482421875, + "loss_rtd": 0.24818666279315948, + "loss_sent": 0.02665616199374199, + "loss_sod": 0.18032819032669067, + "loss_total": 0.45517098903656006, + "step": 145699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.4180588722229, + "loss_rtd": 0.29591622948646545, + "loss_sent": 0.15679924190044403, + "loss_sod": 0.03232501819729805, + "loss_total": 0.48504048585891724, + "step": 145699 + }, + { + "epoch": 0.0054, + "grad_norm": 0.8756905794143677, + "learning_rate": 5.2157508867086505e-05, + "loss": 0.5301, + "step": 145700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.147712230682373, + "loss_rtd": 0.2685620188713074, + "loss_sent": 0.2744750678539276, + "loss_sod": 0.0034240009263157845, + "loss_total": 0.5464611053466797, + "step": 145799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.349913597106934, + "loss_rtd": 0.27673259377479553, + "loss_sent": 0.5106353759765625, + "loss_sod": 0.09146961569786072, + "loss_total": 0.8788375854492188, + "step": 145799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.717309832572937, + "learning_rate": 5.2125804732007444e-05, + "loss": 0.5258, + "step": 145800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.037972450256348, + "loss_rtd": 0.2800423800945282, + "loss_sent": 0.16363272070884705, + "loss_sod": 0.030139204114675522, + "loss_total": 0.47381430864334106, + "step": 145899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.376882553100586, + "loss_rtd": 0.30431145429611206, + "loss_sent": 0.10817435383796692, + "loss_sod": 0.10825251787900925, + "loss_total": 0.5207383632659912, + "step": 145899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.1302735805511475, + "learning_rate": 5.209409974065449e-05, + "loss": 0.5219, + "step": 145900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.465824127197266, + "loss_rtd": 0.2778286337852478, + "loss_sent": 0.34211188554763794, + "loss_sod": 0.05147743597626686, + "loss_total": 0.6714179515838623, + "step": 145999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.824912071228027, + "loss_rtd": 0.2751026451587677, + "loss_sent": 0.0986921563744545, + "loss_sod": 0.11800215393304825, + "loss_total": 0.49179694056510925, + "step": 145999 + }, + { + "epoch": 0.006, + "grad_norm": 1.7354319095611572, + "learning_rate": 5.206239390579841e-05, + "loss": 0.5245, + "step": 146000 + }, + { + "epoch": 0.006, + "eval_loss": 0.49971112608909607, + "eval_runtime": 151.2542, + "eval_samples_per_second": 102.1, + "eval_steps_per_second": 0.8, + "step": 146000 + }, + { + "epoch": 0.006198, + "loss_gen": 4.642544746398926, + "loss_rtd": 0.2521948218345642, + "loss_sent": 0.027225524187088013, + "loss_sod": 0.10969524830579758, + "loss_total": 0.3891156017780304, + "step": 146099 + }, + { + "epoch": 0.006198, + "loss_gen": 4.493594169616699, + "loss_rtd": 0.2724360525608063, + "loss_sent": 6.572721031261608e-05, + "loss_sod": 0.11912119388580322, + "loss_total": 0.39162296056747437, + "step": 146099 + }, + { + "epoch": 0.0062, + "grad_norm": 0.7895417809486389, + "learning_rate": 5.203068724021032e-05, + "loss": 0.5336, + "step": 146100 + }, + { + "epoch": 0.006398, + "loss_gen": 4.546789169311523, + "loss_rtd": 0.2653850018978119, + "loss_sent": 0.07199425995349884, + "loss_sod": 0.19736507534980774, + "loss_total": 0.5347443222999573, + "step": 146199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.211479663848877, + "loss_rtd": 0.2985738515853882, + "loss_sent": 0.31487011909484863, + "loss_sod": 0.03032844513654709, + "loss_total": 0.6437724232673645, + "step": 146199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.3916696310043335, + "learning_rate": 5.199897975666164e-05, + "loss": 0.4996, + "step": 146200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.014381408691406, + "loss_rtd": 0.26531317830085754, + "loss_sent": 0.008006240241229534, + "loss_sod": 0.280905157327652, + "loss_total": 0.5542245507240295, + "step": 146299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.242966175079346, + "loss_rtd": 0.29195889830589294, + "loss_sent": 0.12836714088916779, + "loss_sod": 0.07119995355606079, + "loss_total": 0.4915260076522827, + "step": 146299 + }, + { + "epoch": 0.0066, + "grad_norm": 1.2561445236206055, + "learning_rate": 5.196727146792416e-05, + "loss": 0.5213, + "step": 146300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.0602192878723145, + "loss_rtd": 0.27024582028388977, + "loss_sent": 0.16223858296871185, + "loss_sod": 0.03788085654377937, + "loss_total": 0.4703652858734131, + "step": 146399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.497828960418701, + "loss_rtd": 0.30007460713386536, + "loss_sent": 0.20957550406455994, + "loss_sod": 0.017270918935537338, + "loss_total": 0.5269210338592529, + "step": 146399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.963026762008667, + "learning_rate": 5.193556238676996e-05, + "loss": 0.5167, + "step": 146400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.307521820068359, + "loss_rtd": 0.2788214385509491, + "loss_sent": 0.07746175676584244, + "loss_sod": 0.17130763828754425, + "loss_total": 0.5275908708572388, + "step": 146499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.17738676071167, + "loss_rtd": 0.2861032485961914, + "loss_sent": 0.27327609062194824, + "loss_sod": 0.011359155178070068, + "loss_total": 0.5707384943962097, + "step": 146499 + }, + { + "epoch": 0.007, + "grad_norm": 0.8306548595428467, + "learning_rate": 5.1903852525971476e-05, + "loss": 0.5264, + "step": 146500 + }, + { + "epoch": 0.007198, + "loss_gen": 4.597184181213379, + "loss_rtd": 0.2621597647666931, + "loss_sent": 0.15151676535606384, + "loss_sod": 0.0587867796421051, + "loss_total": 0.47246330976486206, + "step": 146599 + }, + { + "epoch": 0.007198, + "loss_gen": 4.857696533203125, + "loss_rtd": 0.3095194697380066, + "loss_sent": 0.24361759424209595, + "loss_sod": 0.016918929293751717, + "loss_total": 0.5700559616088867, + "step": 146599 + }, + { + "epoch": 0.0072, + "grad_norm": 0.8499969840049744, + "learning_rate": 5.1872141898301405e-05, + "loss": 0.5222, + "step": 146600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.243187427520752, + "loss_rtd": 0.28105396032333374, + "loss_sent": 0.23848485946655273, + "loss_sod": 0.028276845812797546, + "loss_total": 0.5478156805038452, + "step": 146699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.279906272888184, + "loss_rtd": 0.2728053629398346, + "loss_sent": 0.275601327419281, + "loss_sod": 0.031477998942136765, + "loss_total": 0.5798846483230591, + "step": 146699 + }, + { + "epoch": 0.0074, + "grad_norm": 0.8263813257217407, + "learning_rate": 5.184043051653282e-05, + "loss": 0.5411, + "step": 146700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.104190826416016, + "loss_rtd": 0.30905061960220337, + "loss_sent": 0.12589170038700104, + "loss_sod": 0.07200553268194199, + "loss_total": 0.5069478750228882, + "step": 146799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.063407897949219, + "loss_rtd": 0.261229932308197, + "loss_sent": 0.2982919216156006, + "loss_sod": 0.06371574103832245, + "loss_total": 0.6232376098632812, + "step": 146799 + }, + { + "epoch": 0.0076, + "grad_norm": 1.327774167060852, + "learning_rate": 5.180871839343904e-05, + "loss": 0.5035, + "step": 146800 + }, + { + "epoch": 0.007798, + "loss_gen": 4.542552471160889, + "loss_rtd": 0.2761382460594177, + "loss_sent": 0.0015847000759094954, + "loss_sod": 0.0996432900428772, + "loss_total": 0.37736624479293823, + "step": 146899 + }, + { + "epoch": 0.007798, + "loss_gen": 4.099895000457764, + "loss_rtd": 0.2609536945819855, + "loss_sent": 4.1595685615902767e-05, + "loss_sod": 0.12537619471549988, + "loss_total": 0.3863714933395386, + "step": 146899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.9281373023986816, + "learning_rate": 5.17770055417937e-05, + "loss": 0.5355, + "step": 146900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.141694068908691, + "loss_rtd": 0.28546997904777527, + "loss_sent": 0.06943061947822571, + "loss_sod": 0.03025348111987114, + "loss_total": 0.3851540684700012, + "step": 146999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.212350368499756, + "loss_rtd": 0.28410953283309937, + "loss_sent": 0.5911439657211304, + "loss_sod": 0.053531453013420105, + "loss_total": 0.928784966468811, + "step": 146999 + }, + { + "epoch": 0.008, + "grad_norm": 2.583029270172119, + "learning_rate": 5.174529197437075e-05, + "loss": 0.5139, + "step": 147000 + }, + { + "epoch": 0.008, + "eval_loss": 0.5039019584655762, + "eval_runtime": 149.783, + "eval_samples_per_second": 103.102, + "eval_steps_per_second": 0.808, + "step": 147000 + }, + { + "epoch": 0.008198, + "loss_gen": 4.981076717376709, + "loss_rtd": 0.2826388478279114, + "loss_sent": 0.15006348490715027, + "loss_sod": 0.05539526045322418, + "loss_total": 0.48809757828712463, + "step": 147099 + }, + { + "epoch": 0.008198, + "loss_gen": 4.922446250915527, + "loss_rtd": 0.26888060569763184, + "loss_sent": 0.12618999183177948, + "loss_sod": 0.02075822651386261, + "loss_total": 0.4158288240432739, + "step": 147099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.805479884147644, + "learning_rate": 5.1713577703944386e-05, + "loss": 0.534, + "step": 147100 + }, + { + "epoch": 0.008398, + "loss_gen": 4.193459510803223, + "loss_rtd": 0.202713742852211, + "loss_sent": 0.0001042370276991278, + "loss_sod": 0.40677186846733093, + "loss_total": 0.6095898747444153, + "step": 147199 + }, + { + "epoch": 0.008398, + "loss_gen": 4.794753074645996, + "loss_rtd": 0.2630331218242645, + "loss_sent": 0.03350198641419411, + "loss_sod": 0.16700756549835205, + "loss_total": 0.4635426700115204, + "step": 147199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.532089114189148, + "learning_rate": 5.168186274328913e-05, + "loss": 0.5168, + "step": 147200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.296161651611328, + "loss_rtd": 0.2707403004169464, + "loss_sent": 0.127390056848526, + "loss_sod": 0.08690618723630905, + "loss_total": 0.48503655195236206, + "step": 147299 + }, + { + "epoch": 0.008598, + "loss_gen": 4.702284812927246, + "loss_rtd": 0.269621878862381, + "loss_sent": 0.03573767468333244, + "loss_sod": 0.21274027228355408, + "loss_total": 0.518099844455719, + "step": 147299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.3610261678695679, + "learning_rate": 5.165014710517977e-05, + "loss": 0.5347, + "step": 147300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.544619560241699, + "loss_rtd": 0.2703266739845276, + "loss_sent": 0.22265926003456116, + "loss_sod": 0.09505819529294968, + "loss_total": 0.5880441665649414, + "step": 147399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.152088165283203, + "loss_rtd": 0.2840980589389801, + "loss_sent": 0.10082517564296722, + "loss_sod": 0.0299910306930542, + "loss_total": 0.41491425037384033, + "step": 147399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.0200332403182983, + "learning_rate": 5.161843080239135e-05, + "loss": 0.5303, + "step": 147400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.207124710083008, + "loss_rtd": 0.28050926327705383, + "loss_sent": 0.27539655566215515, + "loss_sod": 0.06495991349220276, + "loss_total": 0.6208657026290894, + "step": 147499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.303969383239746, + "loss_rtd": 0.2890775501728058, + "loss_sent": 0.07043773680925369, + "loss_sod": 0.05035356059670448, + "loss_total": 0.40986883640289307, + "step": 147499 + }, + { + "epoch": 0.009, + "grad_norm": 1.070663571357727, + "learning_rate": 5.1586713847699184e-05, + "loss": 0.5378, + "step": 147500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.774738788604736, + "loss_rtd": 0.2902034521102905, + "loss_sent": 0.19283321499824524, + "loss_sod": 0.07166126370429993, + "loss_total": 0.5546979308128357, + "step": 147599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.813538074493408, + "loss_rtd": 0.28299787640571594, + "loss_sent": 0.09115929156541824, + "loss_sod": 0.07303023338317871, + "loss_total": 0.4471873939037323, + "step": 147599 + }, + { + "epoch": 0.0092, + "grad_norm": 0.8816004395484924, + "learning_rate": 5.1554996253878894e-05, + "loss": 0.5038, + "step": 147600 + }, + { + "epoch": 0.009398, + "loss_gen": 4.608497619628906, + "loss_rtd": 0.26772594451904297, + "loss_sent": 0.019445884972810745, + "loss_sod": 0.2451837956905365, + "loss_total": 0.5323556661605835, + "step": 147699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.536288261413574, + "loss_rtd": 0.2630954384803772, + "loss_sent": 0.2004006803035736, + "loss_sod": 0.12198895961046219, + "loss_total": 0.5854851007461548, + "step": 147699 + }, + { + "epoch": 0.0094, + "grad_norm": 0.9539245963096619, + "learning_rate": 5.152327803370628e-05, + "loss": 0.5288, + "step": 147700 + }, + { + "epoch": 0.009598, + "loss_gen": 4.787131309509277, + "loss_rtd": 0.2679953873157501, + "loss_sent": 0.05050016939640045, + "loss_sod": 0.0257180817425251, + "loss_total": 0.34421366453170776, + "step": 147799 + }, + { + "epoch": 0.009598, + "loss_gen": 4.5462260246276855, + "loss_rtd": 0.2658301591873169, + "loss_sent": 0.008703449741005898, + "loss_sod": 0.09948737174272537, + "loss_total": 0.3740209639072418, + "step": 147799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.8293658494949341, + "learning_rate": 5.149155919995747e-05, + "loss": 0.5265, + "step": 147800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.169826030731201, + "loss_rtd": 0.2948521673679352, + "loss_sent": 0.17312173545360565, + "loss_sod": 0.021826203912496567, + "loss_total": 0.4898000955581665, + "step": 147899 + }, + { + "epoch": 0.009798, + "loss_gen": 4.541766166687012, + "loss_rtd": 0.25918081402778625, + "loss_sent": 9.824033622862771e-05, + "loss_sod": 0.06766112148761749, + "loss_total": 0.3269401788711548, + "step": 147899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.8617637753486633, + "learning_rate": 5.145983976540879e-05, + "loss": 0.5106, + "step": 147900 + }, + { + "epoch": 0.009998, + "loss_gen": 4.6215996742248535, + "loss_rtd": 0.26691102981567383, + "loss_sent": 0.07961020618677139, + "loss_sod": 0.038686759769916534, + "loss_total": 0.38520798087120056, + "step": 147999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.167181968688965, + "loss_rtd": 0.2748759984970093, + "loss_sent": 0.03125729411840439, + "loss_sod": 0.051365822553634644, + "loss_total": 0.3574991226196289, + "step": 147999 + }, + { + "epoch": 0.01, + "grad_norm": 0.6762785911560059, + "learning_rate": 5.142811974283683e-05, + "loss": 0.5172, + "step": 148000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4876296818256378, + "eval_runtime": 149.8683, + "eval_samples_per_second": 103.044, + "eval_steps_per_second": 0.807, + "step": 148000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.190354824066162, + "loss_rtd": 0.27201536297798157, + "loss_sent": 0.23600536584854126, + "loss_sod": 0.0445774644613266, + "loss_total": 0.5525981783866882, + "step": 148099 + }, + { + "epoch": 0.010198, + "loss_gen": 4.571619033813477, + "loss_rtd": 0.2665053606033325, + "loss_sent": 0.003586550010368228, + "loss_sod": 0.1969318687915802, + "loss_total": 0.4670237898826599, + "step": 148099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.0617401599884033, + "learning_rate": 5.13963991450184e-05, + "loss": 0.5031, + "step": 148100 + }, + { + "epoch": 0.010398, + "loss_gen": 4.784799575805664, + "loss_rtd": 0.27273911237716675, + "loss_sent": 0.04332485422492027, + "loss_sod": 0.06624819338321686, + "loss_total": 0.382312148809433, + "step": 148199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.042555809020996, + "loss_rtd": 0.27097582817077637, + "loss_sent": 7.135730265872553e-05, + "loss_sod": 0.24795745313167572, + "loss_total": 0.5190046429634094, + "step": 148199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.9661006927490234, + "learning_rate": 5.136467798473057e-05, + "loss": 0.5347, + "step": 148200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.472863674163818, + "loss_rtd": 0.2973729074001312, + "loss_sent": 0.166547954082489, + "loss_sod": 0.0312616229057312, + "loss_total": 0.49518248438835144, + "step": 148299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.32472562789917, + "loss_rtd": 0.2740918695926666, + "loss_sent": 0.15659397840499878, + "loss_sod": 0.09983142465353012, + "loss_total": 0.5305172801017761, + "step": 148299 + }, + { + "epoch": 0.0106, + "grad_norm": 2.0236117839813232, + "learning_rate": 5.13329562747506e-05, + "loss": 0.5305, + "step": 148300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.342022895812988, + "loss_rtd": 0.2709534764289856, + "loss_sent": 0.0804288312792778, + "loss_sod": 0.12894587218761444, + "loss_total": 0.48032820224761963, + "step": 148399 + }, + { + "epoch": 0.010798, + "loss_gen": 4.858490943908691, + "loss_rtd": 0.2701784670352936, + "loss_sent": 0.0011307575041428208, + "loss_sod": 0.1636728048324585, + "loss_total": 0.434982031583786, + "step": 148399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.430707573890686, + "learning_rate": 5.1301234027856e-05, + "loss": 0.5154, + "step": 148400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.170599460601807, + "loss_rtd": 0.2862345278263092, + "loss_sent": 0.2449491024017334, + "loss_sod": 0.006505691446363926, + "loss_total": 0.5376893281936646, + "step": 148499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.065062522888184, + "loss_rtd": 0.25986894965171814, + "loss_sent": 0.25112301111221313, + "loss_sod": 0.0871850922703743, + "loss_total": 0.5981770753860474, + "step": 148499 + }, + { + "epoch": 0.011, + "grad_norm": 1.5618078708648682, + "learning_rate": 5.12695112568245e-05, + "loss": 0.5255, + "step": 148500 + }, + { + "epoch": 0.011198, + "loss_gen": 4.927695274353027, + "loss_rtd": 0.27999284863471985, + "loss_sent": 0.1934816539287567, + "loss_sod": 0.0527733638882637, + "loss_total": 0.5262478590011597, + "step": 148599 + }, + { + "epoch": 0.011198, + "loss_gen": 4.234586238861084, + "loss_rtd": 0.26285597681999207, + "loss_sent": 0.00020559415861498564, + "loss_sod": 0.16865915060043335, + "loss_total": 0.4317207336425781, + "step": 148599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.0820720195770264, + "learning_rate": 5.123778797443402e-05, + "loss": 0.5257, + "step": 148600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.378777503967285, + "loss_rtd": 0.27872249484062195, + "loss_sent": 0.06293470412492752, + "loss_sod": 0.05825507640838623, + "loss_total": 0.3999122679233551, + "step": 148699 + }, + { + "epoch": 0.011398, + "loss_gen": 4.768366813659668, + "loss_rtd": 0.2677273452281952, + "loss_sent": 0.06426917761564255, + "loss_sod": 0.13946032524108887, + "loss_total": 0.4714568555355072, + "step": 148699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.7936626076698303, + "learning_rate": 5.1206064193462677e-05, + "loss": 0.5252, + "step": 148700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.086231708526611, + "loss_rtd": 0.25840625166893005, + "loss_sent": 0.39218491315841675, + "loss_sod": 0.0288742296397686, + "loss_total": 0.6794654130935669, + "step": 148799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.232894420623779, + "loss_rtd": 0.2748814821243286, + "loss_sent": 0.14246445894241333, + "loss_sod": 0.013335615396499634, + "loss_total": 0.4306815564632416, + "step": 148799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.8960897326469421, + "learning_rate": 5.117433992668883e-05, + "loss": 0.5172, + "step": 148800 + }, + { + "epoch": 0.011798, + "loss_gen": 4.525980472564697, + "loss_rtd": 0.26214292645454407, + "loss_sent": 0.00010965510591631755, + "loss_sod": 0.11243387311697006, + "loss_total": 0.37468644976615906, + "step": 148899 + }, + { + "epoch": 0.011798, + "loss_gen": 4.614633083343506, + "loss_rtd": 0.25251060724258423, + "loss_sent": 0.15795734524726868, + "loss_sod": 0.04601619392633438, + "loss_total": 0.4564841389656067, + "step": 148899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.0195109844207764, + "learning_rate": 5.1142615186891e-05, + "loss": 0.5235, + "step": 148900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.448610782623291, + "loss_rtd": 0.3102265000343323, + "loss_sent": 0.15307582914829254, + "loss_sod": 0.12536995112895966, + "loss_total": 0.5886722803115845, + "step": 148999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.651727676391602, + "loss_rtd": 0.2846134603023529, + "loss_sent": 0.1086808368563652, + "loss_sod": 0.020908888429403305, + "loss_total": 0.4142031967639923, + "step": 148999 + }, + { + "epoch": 0.012, + "grad_norm": 1.1462866067886353, + "learning_rate": 5.111088998684791e-05, + "loss": 0.5132, + "step": 149000 + }, + { + "epoch": 0.012, + "eval_loss": 0.49717065691947937, + "eval_runtime": 150.5412, + "eval_samples_per_second": 102.583, + "eval_steps_per_second": 0.804, + "step": 149000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.183509826660156, + "loss_rtd": 0.2786847651004791, + "loss_sent": 0.18089258670806885, + "loss_sod": 0.02995058335363865, + "loss_total": 0.48952794075012207, + "step": 149099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.1459479331970215, + "loss_rtd": 0.2758665978908539, + "loss_sent": 0.38989973068237305, + "loss_sod": 0.061380039900541306, + "loss_total": 0.7271463871002197, + "step": 149099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.0846115350723267, + "learning_rate": 5.107916433933847e-05, + "loss": 0.5286, + "step": 149100 + }, + { + "epoch": 0.012398, + "loss_gen": 4.653554916381836, + "loss_rtd": 0.2821529507637024, + "loss_sent": 4.9881629820447415e-05, + "loss_sod": 0.17509253323078156, + "loss_total": 0.45729535818099976, + "step": 149199 + }, + { + "epoch": 0.012398, + "loss_gen": 4.378239154815674, + "loss_rtd": 0.24975354969501495, + "loss_sent": 4.61439231003169e-05, + "loss_sod": 0.0695633590221405, + "loss_total": 0.3193630576133728, + "step": 149199 + }, + { + "epoch": 0.0124, + "grad_norm": 0.6731809377670288, + "learning_rate": 5.104743825714175e-05, + "loss": 0.5074, + "step": 149200 + }, + { + "epoch": 0.012598, + "loss_gen": 4.971227169036865, + "loss_rtd": 0.28132012486457825, + "loss_sent": 0.20047783851623535, + "loss_sod": 0.005067291669547558, + "loss_total": 0.48686525225639343, + "step": 149299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.21154260635376, + "loss_rtd": 0.27386224269866943, + "loss_sent": 0.12603174149990082, + "loss_sod": 0.047394100576639175, + "loss_total": 0.4472880959510803, + "step": 149299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.994850218296051, + "learning_rate": 5.101571175303704e-05, + "loss": 0.505, + "step": 149300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.278228282928467, + "loss_rtd": 0.26508253812789917, + "loss_sent": 0.2426879107952118, + "loss_sod": 0.026061663404107094, + "loss_total": 0.5338320732116699, + "step": 149399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.448014259338379, + "loss_rtd": 0.2663023769855499, + "loss_sent": 0.11761312186717987, + "loss_sod": 0.034651800990104675, + "loss_total": 0.4185672998428345, + "step": 149399 + }, + { + "epoch": 0.0128, + "grad_norm": 0.8637998104095459, + "learning_rate": 5.0983984839803746e-05, + "loss": 0.5073, + "step": 149400 + }, + { + "epoch": 0.012998, + "loss_gen": 4.440591812133789, + "loss_rtd": 0.25507181882858276, + "loss_sent": 0.024109583348035812, + "loss_sod": 0.1100204810500145, + "loss_total": 0.3892018795013428, + "step": 149499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.3086419105529785, + "loss_rtd": 0.294219046831131, + "loss_sent": 0.23900176584720612, + "loss_sod": 0.04122648388147354, + "loss_total": 0.5744472742080688, + "step": 149499 + }, + { + "epoch": 0.013, + "grad_norm": 1.1947880983352661, + "learning_rate": 5.095225753022149e-05, + "loss": 0.5202, + "step": 149500 + }, + { + "epoch": 0.013198, + "loss_gen": 4.865903854370117, + "loss_rtd": 0.2748209238052368, + "loss_sent": 0.00433766096830368, + "loss_sod": 0.23390433192253113, + "loss_total": 0.5130629539489746, + "step": 149599 + }, + { + "epoch": 0.013198, + "loss_gen": 4.590498924255371, + "loss_rtd": 0.2770313024520874, + "loss_sent": 0.026584025472402573, + "loss_sod": 0.059929199516773224, + "loss_total": 0.3635445237159729, + "step": 149599 + }, + { + "epoch": 0.0132, + "grad_norm": 0.8740405440330505, + "learning_rate": 5.092052983707e-05, + "loss": 0.4994, + "step": 149600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.447321891784668, + "loss_rtd": 0.25785496830940247, + "loss_sent": 0.41757288575172424, + "loss_sod": 0.07028576731681824, + "loss_total": 0.7457135915756226, + "step": 149699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.204119682312012, + "loss_rtd": 0.2789541184902191, + "loss_sent": 0.13769857585430145, + "loss_sod": 0.08741885423660278, + "loss_total": 0.5040715336799622, + "step": 149699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.4651234149932861, + "learning_rate": 5.088880177312921e-05, + "loss": 0.515, + "step": 149700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.082386016845703, + "loss_rtd": 0.3076334297657013, + "loss_sent": 0.1974334418773651, + "loss_sod": 0.05499216169118881, + "loss_total": 0.5600590705871582, + "step": 149799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.488828182220459, + "loss_rtd": 0.2964935600757599, + "loss_sent": 0.17566673457622528, + "loss_sod": 0.041144777089357376, + "loss_total": 0.5133050680160522, + "step": 149799 + }, + { + "epoch": 0.0136, + "grad_norm": 1.3623194694519043, + "learning_rate": 5.0857073351179166e-05, + "loss": 0.5094, + "step": 149800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.439533710479736, + "loss_rtd": 0.2903883159160614, + "loss_sent": 0.2238389253616333, + "loss_sod": 0.12228512018918991, + "loss_total": 0.6365123987197876, + "step": 149899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.538571834564209, + "loss_rtd": 0.26950180530548096, + "loss_sent": 0.43811267614364624, + "loss_sod": 0.03304041177034378, + "loss_total": 0.7406548857688904, + "step": 149899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.3348768949508667, + "learning_rate": 5.082534458400009e-05, + "loss": 0.5455, + "step": 149900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.461939334869385, + "loss_rtd": 0.2724016606807709, + "loss_sent": 0.2284865826368332, + "loss_sod": 0.029714003205299377, + "loss_total": 0.5306022763252258, + "step": 149999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.542205333709717, + "loss_rtd": 0.27906474471092224, + "loss_sent": 0.43526676297187805, + "loss_sod": 0.011782418936491013, + "loss_total": 0.7261139154434204, + "step": 149999 + }, + { + "epoch": 0.014, + "grad_norm": 1.2709500789642334, + "learning_rate": 5.07936154843723e-05, + "loss": 0.5119, + "step": 150000 + }, + { + "epoch": 0.014, + "eval_loss": 0.5034695863723755, + "eval_runtime": 150.354, + "eval_samples_per_second": 102.711, + "eval_steps_per_second": 0.805, + "step": 150000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.530719757080078, + "loss_rtd": 0.28405705094337463, + "loss_sent": 0.41221487522125244, + "loss_sod": 0.06176081299781799, + "loss_total": 0.7580327391624451, + "step": 150099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.354163646697998, + "loss_rtd": 0.2832927405834198, + "loss_sent": 0.12678612768650055, + "loss_sod": 0.08215628564357758, + "loss_total": 0.49223512411117554, + "step": 150099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.3871103525161743, + "learning_rate": 5.07618860650763e-05, + "loss": 0.5174, + "step": 150100 + }, + { + "epoch": 0.014398, + "loss_gen": 4.387948989868164, + "loss_rtd": 0.2604489028453827, + "loss_sent": 0.0833883062005043, + "loss_sod": 0.1408933401107788, + "loss_total": 0.4847305417060852, + "step": 150199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.349581241607666, + "loss_rtd": 0.27376511693000793, + "loss_sent": 0.32408303022384644, + "loss_sod": 0.011156159453094006, + "loss_total": 0.6090043187141418, + "step": 150199 + }, + { + "epoch": 0.0144, + "grad_norm": 1.3786460161209106, + "learning_rate": 5.0730156338892675e-05, + "loss": 0.5, + "step": 150200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.469412326812744, + "loss_rtd": 0.277353435754776, + "loss_sent": 0.15125852823257446, + "loss_sod": 0.018494347110390663, + "loss_total": 0.4471063017845154, + "step": 150299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.344174385070801, + "loss_rtd": 0.27286669611930847, + "loss_sent": 0.11355658620595932, + "loss_sod": 0.06565544009208679, + "loss_total": 0.4520787298679352, + "step": 150299 + }, + { + "epoch": 0.0146, + "grad_norm": 1.7398344278335571, + "learning_rate": 5.0698426318602167e-05, + "loss": 0.5134, + "step": 150300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.567254066467285, + "loss_rtd": 0.2901189625263214, + "loss_sent": 0.08586336672306061, + "loss_sod": 0.0980837270617485, + "loss_total": 0.4740660786628723, + "step": 150399 + }, + { + "epoch": 0.014798, + "loss_gen": 4.967702865600586, + "loss_rtd": 0.2737525701522827, + "loss_sent": 0.025388075038790703, + "loss_sod": 0.06181246042251587, + "loss_total": 0.36095309257507324, + "step": 150399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.6185212135314941, + "learning_rate": 5.0666696016985616e-05, + "loss": 0.498, + "step": 150400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.319883346557617, + "loss_rtd": 0.2838747203350067, + "loss_sent": 0.09052863717079163, + "loss_sod": 0.07014872878789902, + "loss_total": 0.44455209374427795, + "step": 150499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.142828464508057, + "loss_rtd": 0.28203245997428894, + "loss_sent": 0.31190669536590576, + "loss_sod": 0.01716822385787964, + "loss_total": 0.611107349395752, + "step": 150499 + }, + { + "epoch": 0.015, + "grad_norm": 0.7125792503356934, + "learning_rate": 5.0634965446824e-05, + "loss": 0.5025, + "step": 150500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.298783302307129, + "loss_rtd": 0.2770278751850128, + "loss_sent": 0.15093576908111572, + "loss_sod": 0.05704700946807861, + "loss_total": 0.48501065373420715, + "step": 150599 + }, + { + "epoch": 0.015198, + "loss_gen": 5.3972296714782715, + "loss_rtd": 0.2908611595630646, + "loss_sent": 0.11442862451076508, + "loss_sod": 0.08333995938301086, + "loss_total": 0.4886297583580017, + "step": 150599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.4157239198684692, + "learning_rate": 5.060323462089839e-05, + "loss": 0.5384, + "step": 150600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.017396926879883, + "loss_rtd": 0.2765372097492218, + "loss_sent": 0.10029003024101257, + "loss_sod": 0.1045127734541893, + "loss_total": 0.4813400208950043, + "step": 150699 + }, + { + "epoch": 0.015398, + "loss_gen": 4.340902805328369, + "loss_rtd": 0.2603532075881958, + "loss_sent": 3.807917164522223e-05, + "loss_sod": 0.21225422620773315, + "loss_total": 0.47264552116394043, + "step": 150699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.9066276550292969, + "learning_rate": 5.057150355198992e-05, + "loss": 0.5255, + "step": 150700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.14680290222168, + "loss_rtd": 0.27704185247421265, + "loss_sent": 0.08824780583381653, + "loss_sod": 0.09765308350324631, + "loss_total": 0.4629427492618561, + "step": 150799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.663311004638672, + "loss_rtd": 0.26936689019203186, + "loss_sent": 0.1907847821712494, + "loss_sod": 0.09802541136741638, + "loss_total": 0.55817711353302, + "step": 150799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.303207516670227, + "learning_rate": 5.05397722528799e-05, + "loss": 0.5117, + "step": 150800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.496127128601074, + "loss_rtd": 0.29871439933776855, + "loss_sent": 0.15010574460029602, + "loss_sod": 0.11930060386657715, + "loss_total": 0.5681207180023193, + "step": 150899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.232115268707275, + "loss_rtd": 0.25799593329429626, + "loss_sent": 0.10728186368942261, + "loss_sod": 0.015332083217799664, + "loss_total": 0.3806098699569702, + "step": 150899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.2439154386520386, + "learning_rate": 5.050804073634967e-05, + "loss": 0.5134, + "step": 150900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.270485877990723, + "loss_rtd": 0.26713284850120544, + "loss_sent": 0.28044942021369934, + "loss_sod": 0.015043208375573158, + "loss_total": 0.5626254677772522, + "step": 150999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.34695291519165, + "loss_rtd": 0.287723183631897, + "loss_sent": 0.09541434794664383, + "loss_sod": 0.052663177251815796, + "loss_total": 0.435800701379776, + "step": 150999 + }, + { + "epoch": 0.016, + "grad_norm": 0.6401495337486267, + "learning_rate": 5.047630901518071e-05, + "loss": 0.5206, + "step": 151000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4957713484764099, + "eval_runtime": 150.2029, + "eval_samples_per_second": 102.814, + "eval_steps_per_second": 0.806, + "step": 151000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.460110664367676, + "loss_rtd": 0.273301362991333, + "loss_sent": 0.43311354517936707, + "loss_sod": 0.028535090386867523, + "loss_total": 0.7349500060081482, + "step": 151099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.245303630828857, + "loss_rtd": 0.2648560404777527, + "loss_sent": 0.10785567760467529, + "loss_sod": 0.03306739404797554, + "loss_total": 0.4057791233062744, + "step": 151099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.4352444410324097, + "learning_rate": 5.044457710215452e-05, + "loss": 0.5203, + "step": 151100 + }, + { + "epoch": 0.016398, + "loss_gen": 4.798547744750977, + "loss_rtd": 0.2782033085823059, + "loss_sent": 0.00013255204248707741, + "loss_sod": 0.06532851606607437, + "loss_total": 0.34366437792778015, + "step": 151199 + }, + { + "epoch": 0.016398, + "loss_gen": 4.926626682281494, + "loss_rtd": 0.26169103384017944, + "loss_sent": 0.00042506129830144346, + "loss_sod": 0.22389405965805054, + "loss_total": 0.4860101640224457, + "step": 151199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.8608229756355286, + "learning_rate": 5.041284501005273e-05, + "loss": 0.5167, + "step": 151200 + }, + { + "epoch": 0.016598, + "loss_gen": 4.439567565917969, + "loss_rtd": 0.2428324818611145, + "loss_sent": 0.045875802636146545, + "loss_sod": 0.1477464884519577, + "loss_total": 0.43645477294921875, + "step": 151299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.37948751449585, + "loss_rtd": 0.28312432765960693, + "loss_sent": 0.08464525640010834, + "loss_sod": 0.15120011568069458, + "loss_total": 0.518969714641571, + "step": 151299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.1371779441833496, + "learning_rate": 5.038111275165702e-05, + "loss": 0.5127, + "step": 151300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.02803897857666, + "loss_rtd": 0.2808513641357422, + "loss_sent": 0.16122612357139587, + "loss_sod": 0.01682141050696373, + "loss_total": 0.4588989019393921, + "step": 151399 + }, + { + "epoch": 0.016798, + "loss_gen": 4.860019207000732, + "loss_rtd": 0.2671452760696411, + "loss_sent": 0.07295264303684235, + "loss_sod": 0.08556990325450897, + "loss_total": 0.42566782236099243, + "step": 151399 + }, + { + "epoch": 0.0168, + "grad_norm": 0.8448479771614075, + "learning_rate": 5.034938033974915e-05, + "loss": 0.5177, + "step": 151400 + }, + { + "epoch": 0.016998, + "loss_gen": 4.632369518280029, + "loss_rtd": 0.2392517626285553, + "loss_sent": 0.0006165798986330628, + "loss_sod": 0.11644583940505981, + "loss_total": 0.35631418228149414, + "step": 151499 + }, + { + "epoch": 0.016998, + "loss_gen": 4.526658058166504, + "loss_rtd": 0.2356649935245514, + "loss_sent": 0.00011681746400427073, + "loss_sod": 0.2846408188343048, + "loss_total": 0.520422637462616, + "step": 151499 + }, + { + "epoch": 0.017, + "grad_norm": 1.3764458894729614, + "learning_rate": 5.031764778711091e-05, + "loss": 0.5221, + "step": 151500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.24907112121582, + "loss_rtd": 0.24610601365566254, + "loss_sent": 0.5204124450683594, + "loss_sod": 0.01562754064798355, + "loss_total": 0.7821459770202637, + "step": 151599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.313752174377441, + "loss_rtd": 0.28422072529792786, + "loss_sent": 0.10614383965730667, + "loss_sod": 0.05076826736330986, + "loss_total": 0.4411328136920929, + "step": 151599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.5663731098175049, + "learning_rate": 5.0285915106524185e-05, + "loss": 0.521, + "step": 151600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.389764785766602, + "loss_rtd": 0.2796376049518585, + "loss_sent": 0.1030566394329071, + "loss_sod": 0.026172231882810593, + "loss_total": 0.4088664650917053, + "step": 151699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.174036502838135, + "loss_rtd": 0.2925577163696289, + "loss_sent": 0.17829322814941406, + "loss_sod": 0.05444139987230301, + "loss_total": 0.5252923369407654, + "step": 151699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.7920127511024475, + "learning_rate": 5.025418231077088e-05, + "loss": 0.5024, + "step": 151700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.434704780578613, + "loss_rtd": 0.2754076421260834, + "loss_sent": 0.105732262134552, + "loss_sod": 0.09751874208450317, + "loss_total": 0.47865864634513855, + "step": 151799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.483138084411621, + "loss_rtd": 0.27305567264556885, + "loss_sent": 0.30992719531059265, + "loss_sod": 0.19430087506771088, + "loss_total": 0.777283787727356, + "step": 151799 + }, + { + "epoch": 0.0176, + "grad_norm": 2.0217292308807373, + "learning_rate": 5.022244941263298e-05, + "loss": 0.5311, + "step": 151800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.1941399574279785, + "loss_rtd": 0.27134954929351807, + "loss_sent": 0.062282513827085495, + "loss_sod": 0.04314194619655609, + "loss_total": 0.37677401304244995, + "step": 151899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.215184688568115, + "loss_rtd": 0.2767806649208069, + "loss_sent": 0.2362992763519287, + "loss_sod": 0.012753710150718689, + "loss_total": 0.5258336663246155, + "step": 151899 + }, + { + "epoch": 0.0178, + "grad_norm": 0.8731580972671509, + "learning_rate": 5.019071642489248e-05, + "loss": 0.5189, + "step": 151900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.417120456695557, + "loss_rtd": 0.28732550144195557, + "loss_sent": 0.3475547730922699, + "loss_sod": 0.04580339789390564, + "loss_total": 0.6806836724281311, + "step": 151999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.299704551696777, + "loss_rtd": 0.28852763772010803, + "loss_sent": 0.15768270194530487, + "loss_sod": 0.05578818917274475, + "loss_total": 0.5019985437393188, + "step": 151999 + }, + { + "epoch": 0.018, + "grad_norm": 0.7697166800498962, + "learning_rate": 5.0158983360331426e-05, + "loss": 0.5078, + "step": 152000 + }, + { + "epoch": 0.018, + "eval_loss": 0.4934936463832855, + "eval_runtime": 149.9582, + "eval_samples_per_second": 102.982, + "eval_steps_per_second": 0.807, + "step": 152000 + }, + { + "epoch": 0.018198, + "loss_gen": 4.863179683685303, + "loss_rtd": 0.2728963792324066, + "loss_sent": 0.0338527075946331, + "loss_sod": 0.016788605600595474, + "loss_total": 0.323537677526474, + "step": 152099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.3263959884643555, + "loss_rtd": 0.26920780539512634, + "loss_sent": 0.08819323778152466, + "loss_sod": 0.02479766495525837, + "loss_total": 0.3821987211704254, + "step": 152099 + }, + { + "epoch": 0.0182, + "grad_norm": 0.6549220085144043, + "learning_rate": 5.012725023173189e-05, + "loss": 0.514, + "step": 152100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.352543354034424, + "loss_rtd": 0.297717422246933, + "loss_sent": 0.15490016341209412, + "loss_sod": 0.07908543199300766, + "loss_total": 0.531702995300293, + "step": 152199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.592075347900391, + "loss_rtd": 0.28803884983062744, + "loss_sent": 0.0869017019867897, + "loss_sod": 0.024519015103578568, + "loss_total": 0.399459570646286, + "step": 152199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.9082280397415161, + "learning_rate": 5.009551705187599e-05, + "loss": 0.5309, + "step": 152200 + }, + { + "epoch": 0.018598, + "loss_gen": 4.977872371673584, + "loss_rtd": 0.2900993525981903, + "loss_sent": 0.12147052586078644, + "loss_sod": 0.027479641139507294, + "loss_total": 0.43904954195022583, + "step": 152299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.130236625671387, + "loss_rtd": 0.30054759979248047, + "loss_sent": 0.47358593344688416, + "loss_sod": 0.18136243522167206, + "loss_total": 0.9554959535598755, + "step": 152299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.6463603973388672, + "learning_rate": 5.006378383354582e-05, + "loss": 0.5216, + "step": 152300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.2921462059021, + "loss_rtd": 0.2681122124195099, + "loss_sent": 0.17086376249790192, + "loss_sod": 0.04019676148891449, + "loss_total": 0.4791727066040039, + "step": 152399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.256403923034668, + "loss_rtd": 0.28217169642448425, + "loss_sent": 0.07587186992168427, + "loss_sod": 0.043038398027420044, + "loss_total": 0.40108197927474976, + "step": 152399 + }, + { + "epoch": 0.0188, + "grad_norm": 1.5022106170654297, + "learning_rate": 5.0032050589523535e-05, + "loss": 0.5044, + "step": 152400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.432286739349365, + "loss_rtd": 0.2685067057609558, + "loss_sent": 0.5534102916717529, + "loss_sod": 0.040103763341903687, + "loss_total": 0.86202073097229, + "step": 152499 + }, + { + "epoch": 0.018998, + "loss_gen": 4.681586265563965, + "loss_rtd": 0.2693133056163788, + "loss_sent": 0.0029743232298642397, + "loss_sod": 0.0748993381857872, + "loss_total": 0.34718698263168335, + "step": 152499 + }, + { + "epoch": 0.019, + "grad_norm": 2.022700071334839, + "learning_rate": 5.000031733259127e-05, + "loss": 0.5118, + "step": 152500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.357352256774902, + "loss_rtd": 0.28818804025650024, + "loss_sent": 0.2510608434677124, + "loss_sod": 0.0101924492046237, + "loss_total": 0.5494413375854492, + "step": 152599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.540969371795654, + "loss_rtd": 0.27876439690589905, + "loss_sent": 0.19681772589683533, + "loss_sod": 0.07287005335092545, + "loss_total": 0.5484521389007568, + "step": 152599 + }, + { + "epoch": 0.0192, + "grad_norm": 1.7219185829162598, + "learning_rate": 4.996858407553119e-05, + "loss": 0.4923, + "step": 152600 + }, + { + "epoch": 0.019398, + "loss_gen": 4.536776542663574, + "loss_rtd": 0.2535674273967743, + "loss_sent": 0.00014156920951791108, + "loss_sod": 0.1365301012992859, + "loss_total": 0.39023908972740173, + "step": 152699 + }, + { + "epoch": 0.019398, + "loss_gen": 4.447594165802002, + "loss_rtd": 0.24667733907699585, + "loss_sent": 0.0001558194198878482, + "loss_sod": 0.05303407460451126, + "loss_total": 0.2998672127723694, + "step": 152699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.890343964099884, + "learning_rate": 4.9936850831125434e-05, + "loss": 0.5257, + "step": 152700 + }, + { + "epoch": 0.019598, + "loss_gen": 4.976705551147461, + "loss_rtd": 0.2726480960845947, + "loss_sent": 0.019405441358685493, + "loss_sod": 0.0301898792386055, + "loss_total": 0.32224342226982117, + "step": 152799 + }, + { + "epoch": 0.019598, + "loss_gen": 4.595449924468994, + "loss_rtd": 0.2642533481121063, + "loss_sent": 0.0005791126750409603, + "loss_sod": 0.12613129615783691, + "loss_total": 0.39096376299858093, + "step": 152799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.738387405872345, + "learning_rate": 4.990511761215617e-05, + "loss": 0.5046, + "step": 152800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.384093761444092, + "loss_rtd": 0.26410719752311707, + "loss_sent": 0.35313689708709717, + "loss_sod": 0.04660925641655922, + "loss_total": 0.6638533473014832, + "step": 152899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.380703449249268, + "loss_rtd": 0.27725425362586975, + "loss_sent": 0.9195929169654846, + "loss_sod": 0.12258525937795639, + "loss_total": 1.3194324970245361, + "step": 152899 + }, + { + "epoch": 0.0198, + "grad_norm": 5.63961124420166, + "learning_rate": 4.987338443140552e-05, + "loss": 0.5053, + "step": 152900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.3890862464904785, + "loss_rtd": 0.27779167890548706, + "loss_sent": 0.1700344681739807, + "loss_sod": 0.06807170063257217, + "loss_total": 0.5158978700637817, + "step": 152999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.191761016845703, + "loss_rtd": 0.2886083722114563, + "loss_sent": 0.1861131340265274, + "loss_sod": 0.005172345787286758, + "loss_total": 0.47989386320114136, + "step": 152999 + }, + { + "epoch": 0.02, + "grad_norm": 1.5369865894317627, + "learning_rate": 4.9841651301655585e-05, + "loss": 0.5132, + "step": 153000 + }, + { + "epoch": 0.02, + "eval_loss": 0.4946920573711395, + "eval_runtime": 150.4304, + "eval_samples_per_second": 102.659, + "eval_steps_per_second": 0.804, + "step": 153000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.107113361358643, + "loss_rtd": 0.2785392999649048, + "loss_sent": 0.5377865433692932, + "loss_sod": 0.03649866580963135, + "loss_total": 0.8528245091438293, + "step": 153099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.097657203674316, + "loss_rtd": 0.2691618502140045, + "loss_sent": 0.19406843185424805, + "loss_sod": 0.11953267455101013, + "loss_total": 0.5827629566192627, + "step": 153099 + }, + { + "epoch": 0.0202, + "grad_norm": 2.1632916927337646, + "learning_rate": 4.9809918235688505e-05, + "loss": 0.5114, + "step": 153100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.302831649780273, + "loss_rtd": 0.28545865416526794, + "loss_sent": 0.29269009828567505, + "loss_sod": 0.007460000459104776, + "loss_total": 0.585608720779419, + "step": 153199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.287594318389893, + "loss_rtd": 0.278229296207428, + "loss_sent": 0.1570609211921692, + "loss_sod": 0.030832931399345398, + "loss_total": 0.46612316370010376, + "step": 153199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.0404207706451416, + "learning_rate": 4.9778185246286325e-05, + "loss": 0.5202, + "step": 153200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.5488972663879395, + "loss_rtd": 0.271356999874115, + "loss_sent": 0.37172770500183105, + "loss_sod": 0.1260555535554886, + "loss_total": 0.7691402435302734, + "step": 153299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.269132614135742, + "loss_rtd": 0.2762696146965027, + "loss_sent": 0.1427033245563507, + "loss_sod": 0.024955397471785545, + "loss_total": 0.4439283311367035, + "step": 153299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.0403190851211548, + "learning_rate": 4.974645234623111e-05, + "loss": 0.5211, + "step": 153300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.068711280822754, + "loss_rtd": 0.2830943763256073, + "loss_sent": 0.030563218519091606, + "loss_sod": 0.032533757388591766, + "loss_total": 0.3461913466453552, + "step": 153399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.301641941070557, + "loss_rtd": 0.28749141097068787, + "loss_sent": 0.30285027623176575, + "loss_sod": 0.12032678723335266, + "loss_total": 0.7106684446334839, + "step": 153399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.9960215091705322, + "learning_rate": 4.971471954830485e-05, + "loss": 0.5248, + "step": 153400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.454285621643066, + "loss_rtd": 0.2725071310997009, + "loss_sent": 0.31986936926841736, + "loss_sod": 0.08787284046411514, + "loss_total": 0.6802493333816528, + "step": 153499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.364346504211426, + "loss_rtd": 0.29394635558128357, + "loss_sent": 0.11776778846979141, + "loss_sod": 0.08728210628032684, + "loss_total": 0.49899622797966003, + "step": 153499 + }, + { + "epoch": 0.021, + "grad_norm": 0.9756061434745789, + "learning_rate": 4.968298686528953e-05, + "loss": 0.5136, + "step": 153500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.274739742279053, + "loss_rtd": 0.29549235105514526, + "loss_sent": 0.16786660254001617, + "loss_sod": 0.06539119780063629, + "loss_total": 0.5287501215934753, + "step": 153599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.181947708129883, + "loss_rtd": 0.28288233280181885, + "loss_sent": 0.22003786265850067, + "loss_sod": 0.03190217167139053, + "loss_total": 0.5348223447799683, + "step": 153599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.8788776397705078, + "learning_rate": 4.9651254309967056e-05, + "loss": 0.5193, + "step": 153600 + }, + { + "epoch": 0.021398, + "loss_gen": 4.863471984863281, + "loss_rtd": 0.27452632784843445, + "loss_sent": 0.3187580108642578, + "loss_sod": 0.012509215623140335, + "loss_total": 0.6057935953140259, + "step": 153699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.246292591094971, + "loss_rtd": 0.2921764552593231, + "loss_sent": 0.1435055285692215, + "loss_sod": 0.06779582798480988, + "loss_total": 0.5034778118133545, + "step": 153699 + }, + { + "epoch": 0.0214, + "grad_norm": 2.0051217079162598, + "learning_rate": 4.961952189511931e-05, + "loss": 0.5206, + "step": 153700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.377110481262207, + "loss_rtd": 0.28640779852867126, + "loss_sent": 0.1573113650083542, + "loss_sod": 0.024247044697403908, + "loss_total": 0.4679661989212036, + "step": 153799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.602847099304199, + "loss_rtd": 0.2789478600025177, + "loss_sent": 0.14849574863910675, + "loss_sod": 0.09215997159481049, + "loss_total": 0.5196035504341125, + "step": 153799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.800870656967163, + "learning_rate": 4.958778963352809e-05, + "loss": 0.5314, + "step": 153800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.2421793937683105, + "loss_rtd": 0.2638772428035736, + "loss_sent": 0.6456987261772156, + "loss_sod": 0.04866240918636322, + "loss_total": 0.9582383632659912, + "step": 153899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.094288349151611, + "loss_rtd": 0.265754371881485, + "loss_sent": 0.06336957216262817, + "loss_sod": 0.08266130834817886, + "loss_total": 0.4117852449417114, + "step": 153899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.552384376525879, + "learning_rate": 4.9556057537975176e-05, + "loss": 0.5135, + "step": 153900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.359681606292725, + "loss_rtd": 0.2835022211074829, + "loss_sent": 0.252536416053772, + "loss_sod": 0.08727392554283142, + "loss_total": 0.6233125925064087, + "step": 153999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.315378189086914, + "loss_rtd": 0.2870238423347473, + "loss_sent": 0.23657800257205963, + "loss_sod": 0.10807448625564575, + "loss_total": 0.6316763162612915, + "step": 153999 + }, + { + "epoch": 0.022, + "grad_norm": 0.8836574554443359, + "learning_rate": 4.952432562124221e-05, + "loss": 0.5131, + "step": 154000 + }, + { + "epoch": 0.022, + "eval_loss": 0.4933038651943207, + "eval_runtime": 150.2357, + "eval_samples_per_second": 102.792, + "eval_steps_per_second": 0.805, + "step": 154000 + }, + { + "epoch": 0.022198, + "loss_gen": 4.809384346008301, + "loss_rtd": 0.2640708386898041, + "loss_sent": 3.874971662298776e-05, + "loss_sod": 0.13840627670288086, + "loss_total": 0.40251585841178894, + "step": 154099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.730175495147705, + "loss_rtd": 0.27670857310295105, + "loss_sent": 0.003312483662739396, + "loss_sod": 0.09407520294189453, + "loss_total": 0.3740962743759155, + "step": 154099 + }, + { + "epoch": 0.0222, + "grad_norm": 0.8724492788314819, + "learning_rate": 4.9492593896110845e-05, + "loss": 0.5117, + "step": 154100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.300963401794434, + "loss_rtd": 0.2905631959438324, + "loss_sent": 0.29418423771858215, + "loss_sod": 0.023533185943961143, + "loss_total": 0.6082806587219238, + "step": 154199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.291253089904785, + "loss_rtd": 0.2760257422924042, + "loss_sent": 0.4496966004371643, + "loss_sod": 0.04876187443733215, + "loss_total": 0.7744842171669006, + "step": 154199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.068644404411316, + "learning_rate": 4.9460862375362585e-05, + "loss": 0.5162, + "step": 154200 + }, + { + "epoch": 0.022598, + "loss_gen": 4.715968608856201, + "loss_rtd": 0.26204782724380493, + "loss_sent": 0.00037177972262725234, + "loss_sod": 0.19392526149749756, + "loss_total": 0.456344872713089, + "step": 154299 + }, + { + "epoch": 0.022598, + "loss_gen": 4.791236877441406, + "loss_rtd": 0.2713673710823059, + "loss_sent": 0.09709008038043976, + "loss_sod": 0.034875400364398956, + "loss_total": 0.40333282947540283, + "step": 154299 + }, + { + "epoch": 0.0226, + "grad_norm": 0.7773022651672363, + "learning_rate": 4.942913107177891e-05, + "loss": 0.5188, + "step": 154300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.160711288452148, + "loss_rtd": 0.26806074380874634, + "loss_sent": 0.3172636032104492, + "loss_sod": 0.05338805168867111, + "loss_total": 0.6387124061584473, + "step": 154399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.194608688354492, + "loss_rtd": 0.2792544960975647, + "loss_sent": 0.302408903837204, + "loss_sod": 0.07118754088878632, + "loss_total": 0.6528509855270386, + "step": 154399 + }, + { + "epoch": 0.0228, + "grad_norm": 2.052335500717163, + "learning_rate": 4.939739999814115e-05, + "loss": 0.5066, + "step": 154400 + }, + { + "epoch": 0.022998, + "loss_gen": 4.819077968597412, + "loss_rtd": 0.251863032579422, + "loss_sent": 0.2087668478488922, + "loss_sod": 0.11115151643753052, + "loss_total": 0.5717813968658447, + "step": 154499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.3396806716918945, + "loss_rtd": 0.28426268696784973, + "loss_sent": 0.17901720106601715, + "loss_sod": 0.08635630458593369, + "loss_total": 0.54963618516922, + "step": 154499 + }, + { + "epoch": 0.023, + "grad_norm": 1.043086051940918, + "learning_rate": 4.936566916723062e-05, + "loss": 0.5229, + "step": 154500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.2737812995910645, + "loss_rtd": 0.2666214406490326, + "loss_sent": 0.17710836231708527, + "loss_sod": 0.0495685413479805, + "loss_total": 0.49329835176467896, + "step": 154599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.154017448425293, + "loss_rtd": 0.2650807499885559, + "loss_sent": 0.24059459567070007, + "loss_sod": 0.07724697887897491, + "loss_total": 0.5829223394393921, + "step": 154599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.197387456893921, + "learning_rate": 4.933393859182847e-05, + "loss": 0.4993, + "step": 154600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.324120044708252, + "loss_rtd": 0.26337793469429016, + "loss_sent": 0.1251501590013504, + "loss_sod": 0.03473785147070885, + "loss_total": 0.4232659339904785, + "step": 154699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.492222309112549, + "loss_rtd": 0.2757542133331299, + "loss_sent": 0.16035082936286926, + "loss_sod": 0.24490724503993988, + "loss_total": 0.6810122728347778, + "step": 154699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.3781133890151978, + "learning_rate": 4.930220828471576e-05, + "loss": 0.5257, + "step": 154700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.526704788208008, + "loss_rtd": 0.2778828740119934, + "loss_sent": 0.3450658619403839, + "loss_sod": 0.01861502230167389, + "loss_total": 0.6415637731552124, + "step": 154799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.400487899780273, + "loss_rtd": 0.2804326117038727, + "loss_sent": 0.3724420666694641, + "loss_sod": 0.13108769059181213, + "loss_total": 0.7839623689651489, + "step": 154799 + }, + { + "epoch": 0.0236, + "grad_norm": 1.3151750564575195, + "learning_rate": 4.927047825867348e-05, + "loss": 0.506, + "step": 154800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.331615924835205, + "loss_rtd": 0.26284393668174744, + "loss_sent": 0.28544846177101135, + "loss_sod": 0.05874761939048767, + "loss_total": 0.6070400476455688, + "step": 154899 + }, + { + "epoch": 0.023798, + "loss_gen": 4.914802551269531, + "loss_rtd": 0.26309871673583984, + "loss_sent": 0.15676219761371613, + "loss_sod": 0.014258254319429398, + "loss_total": 0.43411916494369507, + "step": 154899 + }, + { + "epoch": 0.0238, + "grad_norm": 1.1727299690246582, + "learning_rate": 4.923874852648247e-05, + "loss": 0.5267, + "step": 154900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.204455375671387, + "loss_rtd": 0.26971036195755005, + "loss_sent": 0.06897718459367752, + "loss_sod": 0.13732947409152985, + "loss_total": 0.4760169982910156, + "step": 154999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.415859699249268, + "loss_rtd": 0.2773853838443756, + "loss_sent": 0.20107945799827576, + "loss_sod": 0.05754546821117401, + "loss_total": 0.5360102653503418, + "step": 154999 + }, + { + "epoch": 0.024, + "grad_norm": 1.0946190357208252, + "learning_rate": 4.920701910092347e-05, + "loss": 0.5325, + "step": 155000 + }, + { + "epoch": 0.024, + "eval_loss": 0.4920775592327118, + "eval_runtime": 150.2738, + "eval_samples_per_second": 102.766, + "eval_steps_per_second": 0.805, + "step": 155000 + }, + { + "epoch": 0.024198, + "loss_gen": 4.494928359985352, + "loss_rtd": 0.2546127140522003, + "loss_sent": 0.0256333164870739, + "loss_sod": 0.1090117022395134, + "loss_total": 0.3892577290534973, + "step": 155099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.4429802894592285, + "loss_rtd": 0.2867959439754486, + "loss_sent": 0.08887942135334015, + "loss_sod": 0.01610971800982952, + "loss_total": 0.3917850852012634, + "step": 155099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.8489281535148621, + "learning_rate": 4.917528999477706e-05, + "loss": 0.5062, + "step": 155100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.064107418060303, + "loss_rtd": 0.27549663186073303, + "loss_sent": 0.3441270589828491, + "loss_sod": 0.008993230760097504, + "loss_total": 0.6286169290542603, + "step": 155199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.197736740112305, + "loss_rtd": 0.2571727931499481, + "loss_sent": 0.11452917009592056, + "loss_sod": 0.07485997676849365, + "loss_total": 0.44656193256378174, + "step": 155199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.1755305528640747, + "learning_rate": 4.914356122082376e-05, + "loss": 0.5236, + "step": 155200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.632621765136719, + "loss_rtd": 0.27535516023635864, + "loss_sent": 0.2456662356853485, + "loss_sod": 0.04502054303884506, + "loss_total": 0.5660419464111328, + "step": 155299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.261656761169434, + "loss_rtd": 0.2716114819049835, + "loss_sent": 0.07773374766111374, + "loss_sod": 0.056362785398960114, + "loss_total": 0.4057080149650574, + "step": 155299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.1455860137939453, + "learning_rate": 4.911183279184389e-05, + "loss": 0.5075, + "step": 155300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.270544528961182, + "loss_rtd": 0.27206915616989136, + "loss_sent": 0.11627724766731262, + "loss_sod": 0.014066686853766441, + "loss_total": 0.40241310000419617, + "step": 155399 + }, + { + "epoch": 0.024798, + "loss_gen": 4.643050670623779, + "loss_rtd": 0.26377764344215393, + "loss_sent": 0.0007975984481163323, + "loss_sod": 0.05068105086684227, + "loss_total": 0.3152562975883484, + "step": 155399 + }, + { + "epoch": 0.0248, + "grad_norm": 0.745100200176239, + "learning_rate": 4.908010472061767e-05, + "loss": 0.5001, + "step": 155400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.607298851013184, + "loss_rtd": 0.2740238308906555, + "loss_sent": 0.7319679260253906, + "loss_sod": 0.12371865659952164, + "loss_total": 1.1297104358673096, + "step": 155499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.345309734344482, + "loss_rtd": 0.2741493582725525, + "loss_sent": 0.1308216154575348, + "loss_sod": 0.05069271847605705, + "loss_total": 0.45566368103027344, + "step": 155499 + }, + { + "epoch": 0.025, + "grad_norm": 3.0212907791137695, + "learning_rate": 4.9048377019925156e-05, + "loss": 0.5232, + "step": 155500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.159890174865723, + "loss_rtd": 0.277555912733078, + "loss_sent": 0.12425858527421951, + "loss_sod": 0.04284006729722023, + "loss_total": 0.44465455412864685, + "step": 155599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.168512344360352, + "loss_rtd": 0.2913990020751953, + "loss_sent": 0.11198101192712784, + "loss_sod": 0.035088904201984406, + "loss_total": 0.43846890330314636, + "step": 155599 + }, + { + "epoch": 0.0252, + "grad_norm": 1.1938484907150269, + "learning_rate": 4.901664970254627e-05, + "loss": 0.5352, + "step": 155600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.350635051727295, + "loss_rtd": 0.26318657398223877, + "loss_sent": 0.09457547217607498, + "loss_sod": 0.04108530655503273, + "loss_total": 0.3988473415374756, + "step": 155699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.57899808883667, + "loss_rtd": 0.2769685983657837, + "loss_sent": 0.035828847438097, + "loss_sod": 0.0684489831328392, + "loss_total": 0.3812464475631714, + "step": 155699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.9996123313903809, + "learning_rate": 4.898492278126076e-05, + "loss": 0.4948, + "step": 155700 + }, + { + "epoch": 0.025598, + "loss_gen": 4.611931800842285, + "loss_rtd": 0.26654088497161865, + "loss_sent": 3.4519805922172964e-05, + "loss_sod": 0.1173800453543663, + "loss_total": 0.3839554488658905, + "step": 155799 + }, + { + "epoch": 0.025598, + "loss_gen": 4.544041633605957, + "loss_rtd": 0.2545827031135559, + "loss_sent": 3.5108379961457103e-05, + "loss_sod": 0.21659061312675476, + "loss_total": 0.4712084233760834, + "step": 155799 + }, + { + "epoch": 0.0256, + "grad_norm": 0.9293721914291382, + "learning_rate": 4.895319626884824e-05, + "loss": 0.5295, + "step": 155800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.0901594161987305, + "loss_rtd": 0.2564566135406494, + "loss_sent": 3.784263753914274e-05, + "loss_sod": 0.16215522587299347, + "loss_total": 0.41864967346191406, + "step": 155899 + }, + { + "epoch": 0.025798, + "loss_gen": 4.958869934082031, + "loss_rtd": 0.253162145614624, + "loss_sent": 3.955057036364451e-05, + "loss_sod": 0.21552357077598572, + "loss_total": 0.4687252640724182, + "step": 155899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.0558736324310303, + "learning_rate": 4.892147017808812e-05, + "loss": 0.5194, + "step": 155900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.430213928222656, + "loss_rtd": 0.2625291645526886, + "loss_sent": 0.31915414333343506, + "loss_sod": 0.04818132147192955, + "loss_total": 0.6298646330833435, + "step": 155999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.714964389801025, + "loss_rtd": 0.28088244795799255, + "loss_sent": 0.18083544075489044, + "loss_sod": 0.05280933156609535, + "loss_total": 0.5145272016525269, + "step": 155999 + }, + { + "epoch": 0.026, + "grad_norm": 1.0524640083312988, + "learning_rate": 4.888974452175969e-05, + "loss": 0.5276, + "step": 156000 + }, + { + "epoch": 0.026, + "eval_loss": 0.48745784163475037, + "eval_runtime": 150.2911, + "eval_samples_per_second": 102.754, + "eval_steps_per_second": 0.805, + "step": 156000 + }, + { + "epoch": 0.026198, + "loss_gen": 4.251652717590332, + "loss_rtd": 0.2523916959762573, + "loss_sent": 3.480105442577042e-05, + "loss_sod": 0.10084492713212967, + "loss_total": 0.3532714247703552, + "step": 156099 + }, + { + "epoch": 0.026198, + "loss_gen": 4.76702880859375, + "loss_rtd": 0.24150487780570984, + "loss_sent": 0.05287511274218559, + "loss_sod": 0.13154476881027222, + "loss_total": 0.42592474818229675, + "step": 156099 + }, + { + "epoch": 0.0262, + "grad_norm": 0.807264506816864, + "learning_rate": 4.8858019312642054e-05, + "loss": 0.5263, + "step": 156100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.219092845916748, + "loss_rtd": 0.27106907963752747, + "loss_sent": 0.11399336159229279, + "loss_sod": 0.012272108346223831, + "loss_total": 0.39733457565307617, + "step": 156199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.637687683105469, + "loss_rtd": 0.2768366038799286, + "loss_sent": 0.3727564811706543, + "loss_sod": 0.039362743496894836, + "loss_total": 0.6889558434486389, + "step": 156199 + }, + { + "epoch": 0.0264, + "grad_norm": 1.6470954418182373, + "learning_rate": 4.882629456351408e-05, + "loss": 0.523, + "step": 156200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.342074394226074, + "loss_rtd": 0.28616639971733093, + "loss_sent": 0.04557182267308235, + "loss_sod": 0.0065534948371350765, + "loss_total": 0.338291734457016, + "step": 156299 + }, + { + "epoch": 0.026598, + "loss_gen": 4.873995780944824, + "loss_rtd": 0.26216158270835876, + "loss_sent": 0.14498597383499146, + "loss_sod": 0.018181968480348587, + "loss_total": 0.4253295361995697, + "step": 156299 + }, + { + "epoch": 0.0266, + "grad_norm": 0.7005792856216431, + "learning_rate": 4.879457028715452e-05, + "loss": 0.5035, + "step": 156300 + }, + { + "epoch": 0.026798, + "loss_gen": 4.46565580368042, + "loss_rtd": 0.23772087693214417, + "loss_sent": 0.06319355219602585, + "loss_sod": 0.17266608774662018, + "loss_total": 0.473580539226532, + "step": 156399 + }, + { + "epoch": 0.026798, + "loss_gen": 4.926111698150635, + "loss_rtd": 0.24819941818714142, + "loss_sent": 0.23541516065597534, + "loss_sod": 0.09991899877786636, + "loss_total": 0.5835335850715637, + "step": 156399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.0653536319732666, + "learning_rate": 4.8762846496341906e-05, + "loss": 0.5123, + "step": 156400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.300265312194824, + "loss_rtd": 0.25348299741744995, + "loss_sent": 0.13196788728237152, + "loss_sod": 0.08344398438930511, + "loss_total": 0.468894898891449, + "step": 156499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.363890171051025, + "loss_rtd": 0.26457250118255615, + "loss_sent": 0.09559301286935806, + "loss_sod": 0.042374856770038605, + "loss_total": 0.4025403559207916, + "step": 156499 + }, + { + "epoch": 0.027, + "grad_norm": 0.7778878808021545, + "learning_rate": 4.873112320385458e-05, + "loss": 0.4891, + "step": 156500 + }, + { + "epoch": 0.027198, + "loss_gen": 4.990156173706055, + "loss_rtd": 0.27784112095832825, + "loss_sent": 0.13252398371696472, + "loss_sod": 0.004448779858648777, + "loss_total": 0.4148138761520386, + "step": 156599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.4462714195251465, + "loss_rtd": 0.28916388750076294, + "loss_sent": 0.14478699862957, + "loss_sod": 0.07963693141937256, + "loss_total": 0.5135878324508667, + "step": 156599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.9037930369377136, + "learning_rate": 4.869940042247066e-05, + "loss": 0.5123, + "step": 156600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.006406784057617, + "loss_rtd": 0.26727545261383057, + "loss_sent": 0.4620921015739441, + "loss_sod": 0.08716131746768951, + "loss_total": 0.816528856754303, + "step": 156699 + }, + { + "epoch": 0.027398, + "loss_gen": 4.661520004272461, + "loss_rtd": 0.2576027512550354, + "loss_sent": 0.008090567775070667, + "loss_sod": 0.11190100014209747, + "loss_total": 0.3775942921638489, + "step": 156699 + }, + { + "epoch": 0.0274, + "grad_norm": 1.406989574432373, + "learning_rate": 4.866767816496812e-05, + "loss": 0.5139, + "step": 156700 + }, + { + "epoch": 0.027598, + "loss_gen": 4.819665431976318, + "loss_rtd": 0.28634992241859436, + "loss_sent": 0.1437319964170456, + "loss_sod": 0.0734187439084053, + "loss_total": 0.5035006403923035, + "step": 156799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.204472064971924, + "loss_rtd": 0.2901288568973541, + "loss_sent": 0.10268744081258774, + "loss_sod": 0.05606215447187424, + "loss_total": 0.4488784670829773, + "step": 156799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8680161237716675, + "learning_rate": 4.863595644412463e-05, + "loss": 0.4933, + "step": 156800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.420330047607422, + "loss_rtd": 0.2791830599308014, + "loss_sent": 0.3732958734035492, + "loss_sod": 0.11190824210643768, + "loss_total": 0.7643871307373047, + "step": 156899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.031901836395264, + "loss_rtd": 0.27225786447525024, + "loss_sent": 0.033356182277202606, + "loss_sod": 0.0836183950304985, + "loss_total": 0.38923245668411255, + "step": 156899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.086056113243103, + "learning_rate": 4.860423527271774e-05, + "loss": 0.5158, + "step": 156900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.104860305786133, + "loss_rtd": 0.24588295817375183, + "loss_sent": 0.1802573800086975, + "loss_sod": 0.010749001987278461, + "loss_total": 0.4368893504142761, + "step": 156999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.1478495597839355, + "loss_rtd": 0.2704768776893616, + "loss_sent": 0.2739458680152893, + "loss_sod": 0.11669597029685974, + "loss_total": 0.661118745803833, + "step": 156999 + }, + { + "epoch": 0.028, + "grad_norm": 1.624053955078125, + "learning_rate": 4.8572514663524704e-05, + "loss": 0.5245, + "step": 157000 + }, + { + "epoch": 0.028, + "eval_loss": 0.48777928948402405, + "eval_runtime": 150.5699, + "eval_samples_per_second": 102.564, + "eval_steps_per_second": 0.804, + "step": 157000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.487461090087891, + "loss_rtd": 0.2601374387741089, + "loss_sent": 0.22656944394111633, + "loss_sod": 0.05389409512281418, + "loss_total": 0.5406010150909424, + "step": 157099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.392871379852295, + "loss_rtd": 0.27915239334106445, + "loss_sent": 0.15948906540870667, + "loss_sod": 0.08141209185123444, + "loss_total": 0.520053505897522, + "step": 157099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.0358128547668457, + "learning_rate": 4.85407946293226e-05, + "loss": 0.5251, + "step": 157100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.279001712799072, + "loss_rtd": 0.2800320088863373, + "loss_sent": 0.15085484087467194, + "loss_sod": 0.018586760386824608, + "loss_total": 0.44947361946105957, + "step": 157199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.3281073570251465, + "loss_rtd": 0.2607080340385437, + "loss_sent": 0.08769252151250839, + "loss_sod": 0.08442763984203339, + "loss_total": 0.43282821774482727, + "step": 157199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.5268245935440063, + "learning_rate": 4.850907518288823e-05, + "loss": 0.5164, + "step": 157200 + }, + { + "epoch": 0.028598, + "loss_gen": 4.504331111907959, + "loss_rtd": 0.22062382102012634, + "loss_sent": 0.02035563997924328, + "loss_sod": 0.12281917780637741, + "loss_total": 0.3637986481189728, + "step": 157299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.401305198669434, + "loss_rtd": 0.2521509826183319, + "loss_sent": 0.1265888661146164, + "loss_sod": 0.04096400737762451, + "loss_total": 0.4197038412094116, + "step": 157299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.0456082820892334, + "learning_rate": 4.8477356336998215e-05, + "loss": 0.51, + "step": 157300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.367915630340576, + "loss_rtd": 0.2643294930458069, + "loss_sent": 0.23415818810462952, + "loss_sod": 0.07469923794269562, + "loss_total": 0.5731868743896484, + "step": 157399 + }, + { + "epoch": 0.028798, + "loss_gen": 5.2256550788879395, + "loss_rtd": 0.28218239545822144, + "loss_sent": 0.14147312939167023, + "loss_sod": 0.06159043684601784, + "loss_total": 0.485245943069458, + "step": 157399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.009660005569458, + "learning_rate": 4.844563810442887e-05, + "loss": 0.5111, + "step": 157400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.380802154541016, + "loss_rtd": 0.2648017108440399, + "loss_sent": 0.06642752885818481, + "loss_sod": 0.16112709045410156, + "loss_total": 0.4923563301563263, + "step": 157499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.134881019592285, + "loss_rtd": 0.24950629472732544, + "loss_sent": 0.016680188477039337, + "loss_sod": 0.1164335161447525, + "loss_total": 0.3826199769973755, + "step": 157499 + }, + { + "epoch": 0.029, + "grad_norm": 1.143450140953064, + "learning_rate": 4.8413920497956326e-05, + "loss": 0.5025, + "step": 157500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.487768650054932, + "loss_rtd": 0.2694796621799469, + "loss_sent": 0.133780837059021, + "loss_sod": 0.03469589725136757, + "loss_total": 0.43795639276504517, + "step": 157599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.598739147186279, + "loss_rtd": 0.28467464447021484, + "loss_sent": 0.17000456154346466, + "loss_sod": 0.05821293592453003, + "loss_total": 0.5128921270370483, + "step": 157599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.8719593286514282, + "learning_rate": 4.83822035303564e-05, + "loss": 0.5118, + "step": 157600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.4392595291137695, + "loss_rtd": 0.26860255002975464, + "loss_sent": 0.33762699365615845, + "loss_sod": 0.05551446974277496, + "loss_total": 0.6617439985275269, + "step": 157699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.3656907081604, + "loss_rtd": 0.2806245982646942, + "loss_sent": 0.2980003356933594, + "loss_sod": 0.016527138650417328, + "loss_total": 0.5951520800590515, + "step": 157699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.9922837018966675, + "learning_rate": 4.8350487214404715e-05, + "loss": 0.5213, + "step": 157700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.197019100189209, + "loss_rtd": 0.29149261116981506, + "loss_sent": 0.09200244396924973, + "loss_sod": 0.04063096642494202, + "loss_total": 0.4241260290145874, + "step": 157799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.526877403259277, + "loss_rtd": 0.26612284779548645, + "loss_sent": 0.33563366532325745, + "loss_sod": 0.03838837891817093, + "loss_total": 0.6401448845863342, + "step": 157799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.1496505737304688, + "learning_rate": 4.831877156287658e-05, + "loss": 0.5094, + "step": 157800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.166398525238037, + "loss_rtd": 0.28753796219825745, + "loss_sent": 0.07045305520296097, + "loss_sod": 0.00541570782661438, + "loss_total": 0.3634067177772522, + "step": 157899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.361014366149902, + "loss_rtd": 0.25920534133911133, + "loss_sent": 0.3058786690235138, + "loss_sod": 0.04083895683288574, + "loss_total": 0.6059229373931885, + "step": 157899 + }, + { + "epoch": 0.0298, + "grad_norm": 2.0139553546905518, + "learning_rate": 4.828705658854705e-05, + "loss": 0.5226, + "step": 157900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.709131717681885, + "loss_rtd": 0.2707286775112152, + "loss_sent": 0.30841881036758423, + "loss_sod": 0.09141042828559875, + "loss_total": 0.6705579161643982, + "step": 157999 + }, + { + "epoch": 0.029998, + "loss_gen": 4.703526973724365, + "loss_rtd": 0.2666856050491333, + "loss_sent": 0.03206224367022514, + "loss_sod": 0.1287064254283905, + "loss_total": 0.42745426297187805, + "step": 157999 + }, + { + "epoch": 0.03, + "grad_norm": 1.1153990030288696, + "learning_rate": 4.8255342304190924e-05, + "loss": 0.4898, + "step": 158000 + }, + { + "epoch": 0.03, + "eval_loss": 0.48881009221076965, + "eval_runtime": 150.3182, + "eval_samples_per_second": 102.735, + "eval_steps_per_second": 0.805, + "step": 158000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.368672847747803, + "loss_rtd": 0.2833646535873413, + "loss_sent": 0.6040071845054626, + "loss_sod": 0.10221802443265915, + "loss_total": 0.9895898699760437, + "step": 158099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.176130771636963, + "loss_rtd": 0.2608996629714966, + "loss_sent": 0.3742767572402954, + "loss_sod": 0.023876942694187164, + "loss_total": 0.6590533256530762, + "step": 158099 + }, + { + "epoch": 0.0302, + "grad_norm": 2.632840871810913, + "learning_rate": 4.8223628722582706e-05, + "loss": 0.4948, + "step": 158100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.298892498016357, + "loss_rtd": 0.271578848361969, + "loss_sent": 0.0672275647521019, + "loss_sod": 0.07478600740432739, + "loss_total": 0.4135924279689789, + "step": 158199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.326475620269775, + "loss_rtd": 0.27241429686546326, + "loss_sent": 0.3747837245464325, + "loss_sod": 0.05368448793888092, + "loss_total": 0.7008825540542603, + "step": 158199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.163684606552124, + "learning_rate": 4.819191585649663e-05, + "loss": 0.5026, + "step": 158200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.390805721282959, + "loss_rtd": 0.2750718891620636, + "loss_sent": 0.1656327098608017, + "loss_sod": 0.10272759199142456, + "loss_total": 0.5434321761131287, + "step": 158299 + }, + { + "epoch": 0.030598, + "loss_gen": 4.812838077545166, + "loss_rtd": 0.24897724390029907, + "loss_sent": 4.013166835648008e-05, + "loss_sod": 0.1467210054397583, + "loss_total": 0.3957383930683136, + "step": 158299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.8710622787475586, + "learning_rate": 4.816020371870663e-05, + "loss": 0.5101, + "step": 158300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.417858123779297, + "loss_rtd": 0.28526821732521057, + "loss_sent": 0.21175134181976318, + "loss_sod": 0.054051898419857025, + "loss_total": 0.5510714650154114, + "step": 158399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.285908222198486, + "loss_rtd": 0.24675749242305756, + "loss_sent": 0.28908753395080566, + "loss_sod": 0.004072052426636219, + "loss_total": 0.5399171113967896, + "step": 158399 + }, + { + "epoch": 0.0308, + "grad_norm": 1.0410542488098145, + "learning_rate": 4.812849232198636e-05, + "loss": 0.5016, + "step": 158400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.419442176818848, + "loss_rtd": 0.2768394947052002, + "loss_sent": 0.3450249433517456, + "loss_sod": 0.058325424790382385, + "loss_total": 0.680189847946167, + "step": 158499 + }, + { + "epoch": 0.030998, + "loss_gen": 5.143611431121826, + "loss_rtd": 0.27038657665252686, + "loss_sent": 0.04221571609377861, + "loss_sod": 0.009236699901521206, + "loss_total": 0.32183897495269775, + "step": 158499 + }, + { + "epoch": 0.031, + "grad_norm": 0.64340740442276, + "learning_rate": 4.8096781679109145e-05, + "loss": 0.5045, + "step": 158500 + }, + { + "epoch": 0.031198, + "loss_gen": 4.824569225311279, + "loss_rtd": 0.274274080991745, + "loss_sent": 0.24172565340995789, + "loss_sod": 0.048117611557245255, + "loss_total": 0.5641173124313354, + "step": 158599 + }, + { + "epoch": 0.031198, + "loss_gen": 4.973732948303223, + "loss_rtd": 0.2760692238807678, + "loss_sent": 0.44531095027923584, + "loss_sod": 0.028519706800580025, + "loss_total": 0.7498998641967773, + "step": 158599 + }, + { + "epoch": 0.0312, + "grad_norm": 2.0706980228424072, + "learning_rate": 4.806507180284806e-05, + "loss": 0.5017, + "step": 158600 + }, + { + "epoch": 0.031398, + "loss_gen": 4.814790725708008, + "loss_rtd": 0.2397366315126419, + "loss_sent": 0.03261179476976395, + "loss_sod": 0.02776399254798889, + "loss_total": 0.30011242628097534, + "step": 158699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.327791213989258, + "loss_rtd": 0.26420092582702637, + "loss_sent": 0.09725827723741531, + "loss_sod": 0.13030242919921875, + "loss_total": 0.49176162481307983, + "step": 158699 + }, + { + "epoch": 0.0314, + "grad_norm": 1.094794511795044, + "learning_rate": 4.8033362705975815e-05, + "loss": 0.4989, + "step": 158700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.1611409187316895, + "loss_rtd": 0.2766934335231781, + "loss_sent": 0.5184255838394165, + "loss_sod": 0.04233116656541824, + "loss_total": 0.8374501466751099, + "step": 158799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.242688179016113, + "loss_rtd": 0.27282342314720154, + "loss_sent": 0.2498510330915451, + "loss_sod": 0.0054869623854756355, + "loss_total": 0.5281614065170288, + "step": 158799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.6587289571762085, + "learning_rate": 4.800165440126483e-05, + "loss": 0.5125, + "step": 158800 + }, + { + "epoch": 0.031798, + "loss_gen": 4.8841633796691895, + "loss_rtd": 0.26241910457611084, + "loss_sent": 0.17879162728786469, + "loss_sod": 0.00880429707467556, + "loss_total": 0.45001500844955444, + "step": 158899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.207898139953613, + "loss_rtd": 0.28028276562690735, + "loss_sent": 0.14197683334350586, + "loss_sod": 0.01219436526298523, + "loss_total": 0.43445396423339844, + "step": 158899 + }, + { + "epoch": 0.0318, + "grad_norm": 0.8651069402694702, + "learning_rate": 4.796994690148722e-05, + "loss": 0.5319, + "step": 158900 + }, + { + "epoch": 0.031998, + "loss_gen": 4.6617841720581055, + "loss_rtd": 0.24260640144348145, + "loss_sent": 0.061801083385944366, + "loss_sod": 0.06671155989170074, + "loss_total": 0.37111902236938477, + "step": 158999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.371520519256592, + "loss_rtd": 0.26576241850852966, + "loss_sent": 0.10865353792905807, + "loss_sod": 0.0148203419521451, + "loss_total": 0.38923630118370056, + "step": 158999 + }, + { + "epoch": 0.032, + "grad_norm": 0.6985596418380737, + "learning_rate": 4.793824021941474e-05, + "loss": 0.4981, + "step": 159000 + }, + { + "epoch": 0.032, + "eval_loss": 0.48707443475723267, + "eval_runtime": 151.6715, + "eval_samples_per_second": 101.819, + "eval_steps_per_second": 0.798, + "step": 159000 + }, + { + "epoch": 0.032198, + "loss_gen": 5.464871406555176, + "loss_rtd": 0.28606587648391724, + "loss_sent": 0.286416620016098, + "loss_sod": 0.022649457678198814, + "loss_total": 0.5951319932937622, + "step": 159099 + }, + { + "epoch": 0.032198, + "loss_gen": 5.388561725616455, + "loss_rtd": 0.26644182205200195, + "loss_sent": 0.09939169138669968, + "loss_sod": 0.028540709987282753, + "loss_total": 0.39437422156333923, + "step": 159099 + }, + { + "epoch": 0.0322, + "grad_norm": 0.666222870349884, + "learning_rate": 4.790653436781887e-05, + "loss": 0.5294, + "step": 159100 + }, + { + "epoch": 0.032398, + "loss_gen": 5.315982341766357, + "loss_rtd": 0.27147552371025085, + "loss_sent": 0.20511852204799652, + "loss_sod": 0.015645399689674377, + "loss_total": 0.49223947525024414, + "step": 159199 + }, + { + "epoch": 0.032398, + "loss_gen": 5.502182483673096, + "loss_rtd": 0.26964810490608215, + "loss_sent": 0.14899982511997223, + "loss_sod": 0.0346234068274498, + "loss_total": 0.4532713294029236, + "step": 159199 + }, + { + "epoch": 0.0324, + "grad_norm": 0.8348574042320251, + "learning_rate": 4.787482935947071e-05, + "loss": 0.515, + "step": 159200 + }, + { + "epoch": 0.032598, + "loss_gen": 5.40325927734375, + "loss_rtd": 0.26339060068130493, + "loss_sent": 0.2452559620141983, + "loss_sod": 0.07802461087703705, + "loss_total": 0.5866711735725403, + "step": 159299 + }, + { + "epoch": 0.032598, + "loss_gen": 5.222452163696289, + "loss_rtd": 0.2770087718963623, + "loss_sent": 0.08895104378461838, + "loss_sod": 0.10624325275421143, + "loss_total": 0.4722030758857727, + "step": 159299 + }, + { + "epoch": 0.0326, + "grad_norm": 1.073285698890686, + "learning_rate": 4.784312520714101e-05, + "loss": 0.5185, + "step": 159300 + }, + { + "epoch": 0.032798, + "loss_gen": 5.449478626251221, + "loss_rtd": 0.26343077421188354, + "loss_sent": 0.0868225023150444, + "loss_sod": 0.022818146273493767, + "loss_total": 0.37307143211364746, + "step": 159399 + }, + { + "epoch": 0.032798, + "loss_gen": 5.341283321380615, + "loss_rtd": 0.2790081202983856, + "loss_sent": 0.2690570652484894, + "loss_sod": 0.026930466294288635, + "loss_total": 0.5749956369400024, + "step": 159399 + }, + { + "epoch": 0.0328, + "grad_norm": 1.0528799295425415, + "learning_rate": 4.781142192360024e-05, + "loss": 0.5081, + "step": 159400 + }, + { + "epoch": 0.032998, + "loss_gen": 4.618107318878174, + "loss_rtd": 0.23057666420936584, + "loss_sent": 0.031134679913520813, + "loss_sod": 0.15663164854049683, + "loss_total": 0.4183430075645447, + "step": 159499 + }, + { + "epoch": 0.032998, + "loss_gen": 5.574121952056885, + "loss_rtd": 0.2753072679042816, + "loss_sent": 0.24265043437480927, + "loss_sod": 0.053256791085004807, + "loss_total": 0.571214497089386, + "step": 159499 + }, + { + "epoch": 0.033, + "grad_norm": 0.9131119847297668, + "learning_rate": 4.777971952161843e-05, + "loss": 0.5085, + "step": 159500 + }, + { + "epoch": 0.033198, + "loss_gen": 5.096642017364502, + "loss_rtd": 0.27004846930503845, + "loss_sent": 0.11180497705936432, + "loss_sod": 0.04232407361268997, + "loss_total": 0.42417752742767334, + "step": 159599 + }, + { + "epoch": 0.033198, + "loss_gen": 5.813295841217041, + "loss_rtd": 0.2599891424179077, + "loss_sent": 0.15825702250003815, + "loss_sod": 0.09351354837417603, + "loss_total": 0.5117596983909607, + "step": 159599 + }, + { + "epoch": 0.0332, + "grad_norm": 1.730204701423645, + "learning_rate": 4.774801801396536e-05, + "loss": 0.509, + "step": 159600 + }, + { + "epoch": 0.033398, + "loss_gen": 5.034669876098633, + "loss_rtd": 0.2789415121078491, + "loss_sent": 0.10340646654367447, + "loss_sod": 0.01877691224217415, + "loss_total": 0.40112489461898804, + "step": 159699 + }, + { + "epoch": 0.033398, + "loss_gen": 5.425487995147705, + "loss_rtd": 0.2687099874019623, + "loss_sent": 0.4645315110683441, + "loss_sod": 0.0642232820391655, + "loss_total": 0.7974647879600525, + "step": 159699 + }, + { + "epoch": 0.0334, + "grad_norm": 1.626502513885498, + "learning_rate": 4.7716317413410336e-05, + "loss": 0.5265, + "step": 159700 + }, + { + "epoch": 0.033598, + "loss_gen": 5.267213821411133, + "loss_rtd": 0.2763698697090149, + "loss_sent": 0.1774713099002838, + "loss_sod": 0.024524521082639694, + "loss_total": 0.4783656895160675, + "step": 159799 + }, + { + "epoch": 0.033598, + "loss_gen": 5.383553504943848, + "loss_rtd": 0.2740878462791443, + "loss_sent": 0.32771018147468567, + "loss_sod": 0.015716755762696266, + "loss_total": 0.6175147891044617, + "step": 159799 + }, + { + "epoch": 0.0336, + "grad_norm": 0.7344114184379578, + "learning_rate": 4.768461773272241e-05, + "loss": 0.5221, + "step": 159800 + }, + { + "epoch": 0.033798, + "loss_gen": 5.088738918304443, + "loss_rtd": 0.2582167387008667, + "loss_sent": 0.12434374541044235, + "loss_sod": 0.08773496001958847, + "loss_total": 0.4702954590320587, + "step": 159899 + }, + { + "epoch": 0.033798, + "loss_gen": 5.8013410568237305, + "loss_rtd": 0.27426230907440186, + "loss_sent": 0.24400825798511505, + "loss_sod": 0.11932605504989624, + "loss_total": 0.637596607208252, + "step": 159899 + }, + { + "epoch": 0.0338, + "grad_norm": 0.6615749001502991, + "learning_rate": 4.765291898467017e-05, + "loss": 0.5045, + "step": 159900 + }, + { + "epoch": 0.033998, + "loss_gen": 5.236286163330078, + "loss_rtd": 0.279506117105484, + "loss_sent": 0.2051544040441513, + "loss_sod": 0.23678992688655853, + "loss_total": 0.7214504480361938, + "step": 159999 + }, + { + "epoch": 0.033998, + "loss_gen": 5.376890659332275, + "loss_rtd": 0.2707577347755432, + "loss_sent": 0.17245976626873016, + "loss_sod": 0.07291204482316971, + "loss_total": 0.5161295533180237, + "step": 159999 + }, + { + "epoch": 0.034, + "grad_norm": 1.7812297344207764, + "learning_rate": 4.762122118202189e-05, + "loss": 0.5276, + "step": 160000 + }, + { + "epoch": 0.034, + "eval_loss": 0.4820318818092346, + "eval_runtime": 150.069, + "eval_samples_per_second": 102.906, + "eval_steps_per_second": 0.806, + "step": 160000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.842165946960449, + "loss_rtd": 0.2776602506637573, + "loss_sent": 0.1528262048959732, + "loss_sod": 0.057652607560157776, + "loss_total": 0.4881390333175659, + "step": 160099 + }, + { + "epoch": 0.000198, + "loss_gen": 5.351126194000244, + "loss_rtd": 0.2911335527896881, + "loss_sent": 0.09747724235057831, + "loss_sod": 0.1224154382944107, + "loss_total": 0.5110262632369995, + "step": 160099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.5827804803848267, + "learning_rate": 4.758952433754543e-05, + "loss": 0.5215, + "step": 160100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.027387619018555, + "loss_rtd": 0.25595375895500183, + "loss_sent": 0.08539359271526337, + "loss_sod": 0.0035551826003938913, + "loss_total": 0.34490251541137695, + "step": 160199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.507497310638428, + "loss_rtd": 0.2640344798564911, + "loss_sent": 0.2868528664112091, + "loss_sod": 0.059601373970508575, + "loss_total": 0.6104887127876282, + "step": 160199 + }, + { + "epoch": 0.0004, + "grad_norm": 0.7635030150413513, + "learning_rate": 4.755782846400828e-05, + "loss": 0.5153, + "step": 160200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.2646870613098145, + "loss_rtd": 0.27357620000839233, + "loss_sent": 0.18477076292037964, + "loss_sod": 0.0682992935180664, + "loss_total": 0.5266462564468384, + "step": 160299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.374934673309326, + "loss_rtd": 0.29369816184043884, + "loss_sent": 0.2698197662830353, + "loss_sod": 0.12471264600753784, + "loss_total": 0.688230574131012, + "step": 160299 + }, + { + "epoch": 0.0006, + "grad_norm": 2.2770004272460938, + "learning_rate": 4.752613357417752e-05, + "loss": 0.5145, + "step": 160300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.457101821899414, + "loss_rtd": 0.2753320336341858, + "loss_sent": 0.12120392173528671, + "loss_sod": 0.05263853073120117, + "loss_total": 0.4491744935512543, + "step": 160399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.722702503204346, + "loss_rtd": 0.29496505856513977, + "loss_sent": 0.1253061443567276, + "loss_sod": 0.1110258400440216, + "loss_total": 0.5312970280647278, + "step": 160399 + }, + { + "epoch": 0.0008, + "grad_norm": 0.7886563539505005, + "learning_rate": 4.749443968081988e-05, + "loss": 0.5156, + "step": 160400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.088737964630127, + "loss_rtd": 0.2770231068134308, + "loss_sent": 0.4365537166595459, + "loss_sod": 0.008964703418314457, + "loss_total": 0.7225415706634521, + "step": 160499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.218410015106201, + "loss_rtd": 0.25978273153305054, + "loss_sent": 0.1902262568473816, + "loss_sod": 0.046633802354335785, + "loss_total": 0.4966427981853485, + "step": 160499 + }, + { + "epoch": 0.001, + "grad_norm": 1.558271050453186, + "learning_rate": 4.746274679670162e-05, + "loss": 0.5024, + "step": 160500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.377699375152588, + "loss_rtd": 0.2715120315551758, + "loss_sent": 0.376115083694458, + "loss_sod": 0.14049793779850006, + "loss_total": 0.7881250381469727, + "step": 160599 + }, + { + "epoch": 0.001198, + "loss_gen": 5.463585376739502, + "loss_rtd": 0.2850705683231354, + "loss_sent": 0.19470594823360443, + "loss_sod": 0.03956345468759537, + "loss_total": 0.5193399786949158, + "step": 160599 + }, + { + "epoch": 0.0012, + "grad_norm": 2.669656991958618, + "learning_rate": 4.7431054934588664e-05, + "loss": 0.5165, + "step": 160600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.164077281951904, + "loss_rtd": 0.2682517170906067, + "loss_sent": 0.20542724430561066, + "loss_sod": 0.027547337114810944, + "loss_total": 0.5012263059616089, + "step": 160699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.316427707672119, + "loss_rtd": 0.2847226560115814, + "loss_sent": 0.11995867639780045, + "loss_sod": 0.027916785329580307, + "loss_total": 0.4325981140136719, + "step": 160699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.6276112794876099, + "learning_rate": 4.7399364107246474e-05, + "loss": 0.5151, + "step": 160700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.611260890960693, + "loss_rtd": 0.27259013056755066, + "loss_sent": 0.14668984711170197, + "loss_sod": 0.027081597596406937, + "loss_total": 0.44636160135269165, + "step": 160799 + }, + { + "epoch": 0.001598, + "loss_gen": 4.734642028808594, + "loss_rtd": 0.25575634837150574, + "loss_sent": 0.03888203203678131, + "loss_sod": 0.026697641238570213, + "loss_total": 0.321336030960083, + "step": 160799 + }, + { + "epoch": 0.0016, + "grad_norm": 0.656639039516449, + "learning_rate": 4.7367674327440094e-05, + "loss": 0.4948, + "step": 160800 + }, + { + "epoch": 0.001798, + "loss_gen": 4.511584281921387, + "loss_rtd": 0.2586683928966522, + "loss_sent": 3.6015455407323316e-05, + "loss_sod": 0.21329265832901, + "loss_total": 0.47199705243110657, + "step": 160899 + }, + { + "epoch": 0.001798, + "loss_gen": 4.865720748901367, + "loss_rtd": 0.2565580904483795, + "loss_sent": 0.2702704966068268, + "loss_sod": 0.07395409047603607, + "loss_total": 0.6007826328277588, + "step": 160899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.0349520444869995, + "learning_rate": 4.733598560793419e-05, + "loss": 0.498, + "step": 160900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.4267072677612305, + "loss_rtd": 0.2575697600841522, + "loss_sent": 0.10051631182432175, + "loss_sod": 0.06149299815297127, + "loss_total": 0.41957908868789673, + "step": 160999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.543146133422852, + "loss_rtd": 0.24666328728199005, + "loss_sent": 0.14442215859889984, + "loss_sod": 0.022993076592683792, + "loss_total": 0.4140785336494446, + "step": 160999 + }, + { + "epoch": 0.002, + "grad_norm": 0.7205025553703308, + "learning_rate": 4.730429796149296e-05, + "loss": 0.4927, + "step": 161000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4876585602760315, + "eval_runtime": 153.5292, + "eval_samples_per_second": 100.587, + "eval_steps_per_second": 0.788, + "step": 161000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.267669677734375, + "loss_rtd": 0.25475186109542847, + "loss_sent": 0.3130422532558441, + "loss_sod": 0.04444386065006256, + "loss_total": 0.6122379302978516, + "step": 161099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.493355751037598, + "loss_rtd": 0.26913684606552124, + "loss_sent": 0.2605651021003723, + "loss_sod": 0.1393851637840271, + "loss_total": 0.6690871119499207, + "step": 161099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.1247471570968628, + "learning_rate": 4.72726114008802e-05, + "loss": 0.4902, + "step": 161100 + }, + { + "epoch": 0.002398, + "loss_gen": 4.755769729614258, + "loss_rtd": 0.2405293583869934, + "loss_sent": 0.0060568442568182945, + "loss_sod": 0.06752968579530716, + "loss_total": 0.31411588191986084, + "step": 161199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.556065559387207, + "loss_rtd": 0.262604683637619, + "loss_sent": 0.5367265939712524, + "loss_sod": 0.01863880269229412, + "loss_total": 0.8179700374603271, + "step": 161199 + }, + { + "epoch": 0.0024, + "grad_norm": 2.352881908416748, + "learning_rate": 4.724092593885922e-05, + "loss": 0.5068, + "step": 161200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.599846839904785, + "loss_rtd": 0.2677103281021118, + "loss_sent": 0.25691384077072144, + "loss_sod": 0.09384635090827942, + "loss_total": 0.6184705495834351, + "step": 161299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.266449928283691, + "loss_rtd": 0.2805553674697876, + "loss_sent": 0.12523576617240906, + "loss_sod": 0.10910908132791519, + "loss_total": 0.5149002075195312, + "step": 161299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.0119141340255737, + "learning_rate": 4.720924158819296e-05, + "loss": 0.5048, + "step": 161300 + }, + { + "epoch": 0.002798, + "loss_gen": 4.974189758300781, + "loss_rtd": 0.2747703790664673, + "loss_sent": 0.04194648563861847, + "loss_sod": 0.24337291717529297, + "loss_total": 0.5600897669792175, + "step": 161399 + }, + { + "epoch": 0.002798, + "loss_gen": 4.487055778503418, + "loss_rtd": 0.25943294167518616, + "loss_sent": 3.886331614921801e-05, + "loss_sod": 0.14297300577163696, + "loss_total": 0.4024448096752167, + "step": 161399 + }, + { + "epoch": 0.0028, + "grad_norm": 0.9917436838150024, + "learning_rate": 4.717755836164384e-05, + "loss": 0.5014, + "step": 161400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.604397773742676, + "loss_rtd": 0.26280122995376587, + "loss_sent": 0.20259395241737366, + "loss_sod": 0.03261889889836311, + "loss_total": 0.49801409244537354, + "step": 161499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.4992594718933105, + "loss_rtd": 0.2787325978279114, + "loss_sent": 0.11834646016359329, + "loss_sod": 0.07163845747709274, + "loss_total": 0.4687175154685974, + "step": 161499 + }, + { + "epoch": 0.003, + "grad_norm": 1.0626286268234253, + "learning_rate": 4.714587627197389e-05, + "loss": 0.4994, + "step": 161500 + }, + { + "epoch": 0.003198, + "loss_gen": 5.513464450836182, + "loss_rtd": 0.2768165171146393, + "loss_sent": 0.3070828318595886, + "loss_sod": 0.08395469188690186, + "loss_total": 0.6678540706634521, + "step": 161599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.214165687561035, + "loss_rtd": 0.256234735250473, + "loss_sent": 0.25648224353790283, + "loss_sod": 0.035651497542858124, + "loss_total": 0.5483684539794922, + "step": 161599 + }, + { + "epoch": 0.0032, + "grad_norm": 0.8552629351615906, + "learning_rate": 4.7114195331944626e-05, + "loss": 0.4924, + "step": 161600 + }, + { + "epoch": 0.003398, + "loss_gen": 4.5428876876831055, + "loss_rtd": 0.2481108009815216, + "loss_sent": 5.068567770649679e-05, + "loss_sod": 0.12272955477237701, + "loss_total": 0.3708910644054413, + "step": 161699 + }, + { + "epoch": 0.003398, + "loss_gen": 4.6056437492370605, + "loss_rtd": 0.25533589720726013, + "loss_sent": 0.11403465270996094, + "loss_sod": 0.06946588307619095, + "loss_total": 0.4388364255428314, + "step": 161699 + }, + { + "epoch": 0.0034, + "grad_norm": 0.7701624631881714, + "learning_rate": 4.708251555431715e-05, + "loss": 0.5092, + "step": 161700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.452862739562988, + "loss_rtd": 0.2653208076953888, + "loss_sent": 0.11454400420188904, + "loss_sod": 0.045678894966840744, + "loss_total": 0.4255436956882477, + "step": 161799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.611103534698486, + "loss_rtd": 0.26636242866516113, + "loss_sent": 0.09849405288696289, + "loss_sod": 0.04249482601881027, + "loss_total": 0.4073513150215149, + "step": 161799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.1957632303237915, + "learning_rate": 4.705083695185204e-05, + "loss": 0.5065, + "step": 161800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.354158878326416, + "loss_rtd": 0.2837497591972351, + "loss_sent": 0.25385022163391113, + "loss_sod": 0.08466369658708572, + "loss_total": 0.6222636699676514, + "step": 161899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.471778869628906, + "loss_rtd": 0.27457547187805176, + "loss_sent": 0.3879691958427429, + "loss_sod": 0.039826177060604095, + "loss_total": 0.7023708820343018, + "step": 161899 + }, + { + "epoch": 0.0038, + "grad_norm": 2.106362819671631, + "learning_rate": 4.701915953730947e-05, + "loss": 0.5016, + "step": 161900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.2265825271606445, + "loss_rtd": 0.28481683135032654, + "loss_sent": 0.22780855000019073, + "loss_sod": 0.012533052824437618, + "loss_total": 0.5251584053039551, + "step": 161999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.280604362487793, + "loss_rtd": 0.25784459710121155, + "loss_sent": 0.23577207326889038, + "loss_sod": 0.042707107961177826, + "loss_total": 0.5363237857818604, + "step": 161999 + }, + { + "epoch": 0.004, + "grad_norm": 0.5745465159416199, + "learning_rate": 4.698748332344907e-05, + "loss": 0.4946, + "step": 162000 + }, + { + "epoch": 0.004, + "eval_loss": 0.48661598563194275, + "eval_runtime": 150.5923, + "eval_samples_per_second": 102.548, + "eval_steps_per_second": 0.803, + "step": 162000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.778889179229736, + "loss_rtd": 0.2813631296157837, + "loss_sent": 0.07085127383470535, + "loss_sod": 0.09804874658584595, + "loss_total": 0.4502631425857544, + "step": 162099 + }, + { + "epoch": 0.004198, + "loss_gen": 5.018377780914307, + "loss_rtd": 0.2641095519065857, + "loss_sent": 0.12971749901771545, + "loss_sod": 0.0829600989818573, + "loss_total": 0.47678714990615845, + "step": 162099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.7387970685958862, + "learning_rate": 4.695580832303004e-05, + "loss": 0.5122, + "step": 162100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.642885684967041, + "loss_rtd": 0.2560461163520813, + "loss_sent": 0.3851100206375122, + "loss_sod": 0.08798560500144958, + "loss_total": 0.7291417121887207, + "step": 162199 + }, + { + "epoch": 0.004398, + "loss_gen": 5.147655010223389, + "loss_rtd": 0.2690267264842987, + "loss_sent": 0.02875726856291294, + "loss_sod": 0.06375060975551605, + "loss_total": 0.36153459548950195, + "step": 162199 + }, + { + "epoch": 0.0044, + "grad_norm": 1.0226855278015137, + "learning_rate": 4.692413454881104e-05, + "loss": 0.5089, + "step": 162200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.4006171226501465, + "loss_rtd": 0.30595433712005615, + "loss_sent": 0.09044289588928223, + "loss_sod": 0.016173701733350754, + "loss_total": 0.41257092356681824, + "step": 162299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.622282981872559, + "loss_rtd": 0.26950570940971375, + "loss_sent": 0.19931748509407043, + "loss_sod": 0.0840272456407547, + "loss_total": 0.5528504848480225, + "step": 162299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.0370724201202393, + "learning_rate": 4.6892462013550286e-05, + "loss": 0.5111, + "step": 162300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.265988826751709, + "loss_rtd": 0.26581963896751404, + "loss_sent": 0.12919701635837555, + "loss_sod": 0.02982659637928009, + "loss_total": 0.4248432517051697, + "step": 162399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.431527614593506, + "loss_rtd": 0.25241127610206604, + "loss_sent": 0.08514315634965897, + "loss_sod": 0.06059649586677551, + "loss_total": 0.3981509208679199, + "step": 162399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.910581111907959, + "learning_rate": 4.686079073000546e-05, + "loss": 0.4977, + "step": 162400 + }, + { + "epoch": 0.004998, + "loss_gen": 4.9854044914245605, + "loss_rtd": 0.25647881627082825, + "loss_sent": 0.019727284088730812, + "loss_sod": 0.030515329912304878, + "loss_total": 0.30672144889831543, + "step": 162499 + }, + { + "epoch": 0.004998, + "loss_gen": 4.734309196472168, + "loss_rtd": 0.2618856430053711, + "loss_sent": 3.370772174093872e-05, + "loss_sod": 0.05488193780183792, + "loss_total": 0.3168012797832489, + "step": 162499 + }, + { + "epoch": 0.005, + "grad_norm": 0.483264297246933, + "learning_rate": 4.682912071093374e-05, + "loss": 0.5109, + "step": 162500 + }, + { + "epoch": 0.005198, + "loss_gen": 4.8724751472473145, + "loss_rtd": 0.2632364332675934, + "loss_sent": 0.00023552450875286013, + "loss_sod": 0.16117876768112183, + "loss_total": 0.42465072870254517, + "step": 162599 + }, + { + "epoch": 0.005198, + "loss_gen": 4.560177326202393, + "loss_rtd": 0.2536315321922302, + "loss_sent": 3.5272158129373565e-05, + "loss_sod": 0.17273250222206116, + "loss_total": 0.4263993203639984, + "step": 162599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.9590781331062317, + "learning_rate": 4.679745196909184e-05, + "loss": 0.4926, + "step": 162600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.54343318939209, + "loss_rtd": 0.2936595380306244, + "loss_sent": 0.05555896461009979, + "loss_sod": 0.08926781266927719, + "loss_total": 0.43848633766174316, + "step": 162699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.347440719604492, + "loss_rtd": 0.28149881958961487, + "loss_sent": 0.10503031313419342, + "loss_sod": 0.08179987967014313, + "loss_total": 0.4683290123939514, + "step": 162699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.0491477251052856, + "learning_rate": 4.676578451723588e-05, + "loss": 0.5042, + "step": 162700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.493664741516113, + "loss_rtd": 0.29408419132232666, + "loss_sent": 0.10495664179325104, + "loss_sod": 0.031579189002513885, + "loss_total": 0.430620014667511, + "step": 162799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.574361801147461, + "loss_rtd": 0.28189975023269653, + "loss_sent": 0.15872269868850708, + "loss_sod": 0.014554636552929878, + "loss_total": 0.45517709851264954, + "step": 162799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.51229727268219, + "learning_rate": 4.673411836812155e-05, + "loss": 0.5045, + "step": 162800 + }, + { + "epoch": 0.005798, + "loss_gen": 5.420064926147461, + "loss_rtd": 0.2710047960281372, + "loss_sent": 0.20507270097732544, + "loss_sod": 0.04919132590293884, + "loss_total": 0.5252687931060791, + "step": 162899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.324102878570557, + "loss_rtd": 0.2755732238292694, + "loss_sent": 0.11353310942649841, + "loss_sod": 0.0448470339179039, + "loss_total": 0.4339533746242523, + "step": 162899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.1079555749893188, + "learning_rate": 4.670245353450394e-05, + "loss": 0.487, + "step": 162900 + }, + { + "epoch": 0.005998, + "loss_gen": 4.567865371704102, + "loss_rtd": 0.23298753798007965, + "loss_sent": 0.19341908395290375, + "loss_sod": 0.028926221653819084, + "loss_total": 0.45533281564712524, + "step": 162999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.623187065124512, + "loss_rtd": 0.2772347033023834, + "loss_sent": 0.20228157937526703, + "loss_sod": 0.1268787682056427, + "loss_total": 0.6063950657844543, + "step": 162999 + }, + { + "epoch": 0.006, + "grad_norm": 1.046578049659729, + "learning_rate": 4.6670790029137655e-05, + "loss": 0.5086, + "step": 163000 + }, + { + "epoch": 0.006, + "eval_loss": 0.48867926001548767, + "eval_runtime": 150.8643, + "eval_samples_per_second": 102.363, + "eval_steps_per_second": 0.802, + "step": 163000 } ], "logging_steps": 100, @@ -8032,7 +42088,7 @@ "attributes": {} } }, - "total_flos": 2.165372989734912e+18, + "total_flos": 1.1385670881509376e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null