diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,9 +2,9 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.032, + "epoch": 0.03, "eval_steps": 1000, - "global_step": 272000, + "global_step": 303000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -70191,6 +70191,8004 @@ "eval_samples_per_second": 102.685, "eval_steps_per_second": 0.805, "step": 272000 + }, + { + "epoch": 0.000198, + "loss_gen": 5.854343414306641, + "loss_rtd": 0.22708803415298462, + "loss_sent": 0.13898998498916626, + "loss_sod": 0.09644470363855362, + "loss_total": 0.4625227153301239, + "step": 272099 + }, + { + "epoch": 0.000198, + "loss_gen": 6.205217361450195, + "loss_rtd": 0.22106121480464935, + "loss_sent": 0.09671042859554291, + "loss_sod": 0.047797903418540955, + "loss_total": 0.365569531917572, + "step": 272099 + }, + { + "epoch": 0.0002, + "grad_norm": 0.7376329898834229, + "learning_rate": 1.5588256492018692e-05, + "loss": 0.4259, + "step": 272100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.971658229827881, + "loss_rtd": 0.2248469591140747, + "loss_sent": 0.1618236005306244, + "loss_sod": 0.04234761744737625, + "loss_total": 0.42901816964149475, + "step": 272199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.041867733001709, + "loss_rtd": 0.18634530901908875, + "loss_sent": 0.0001128903022618033, + "loss_sod": 0.08167225122451782, + "loss_total": 0.26813045144081116, + "step": 272199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.1817041635513306, + "learning_rate": 1.5565241327022233e-05, + "loss": 0.4243, + "step": 272200 + }, + { + "epoch": 0.000598, + "loss_gen": 6.258779048919678, + "loss_rtd": 0.22976508736610413, + "loss_sent": 0.305381178855896, + "loss_sod": 0.03196942061185837, + "loss_total": 0.5671156644821167, + "step": 272299 + }, + { + "epoch": 0.000598, + "loss_gen": 6.000399589538574, + "loss_rtd": 0.21835242211818695, + "loss_sent": 0.4397623538970947, + "loss_sod": 0.08194266259670258, + "loss_total": 0.7400574684143066, + "step": 272299 + }, + { + "epoch": 0.0006, + "grad_norm": 1.7773146629333496, + "learning_rate": 1.5542240032342453e-05, + "loss": 0.4324, + "step": 272300 + }, + { + "epoch": 0.000798, + "loss_gen": 5.836642742156982, + "loss_rtd": 0.208672896027565, + "loss_sent": 0.19265063107013702, + "loss_sod": 0.060387954115867615, + "loss_total": 0.46171146631240845, + "step": 272399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.5153679847717285, + "loss_rtd": 0.22192445397377014, + "loss_sent": 0.26429370045661926, + "loss_sod": 0.07314467430114746, + "loss_total": 0.5593628287315369, + "step": 272399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.0331426858901978, + "learning_rate": 1.5519252617244284e-05, + "loss": 0.4375, + "step": 272400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.724026679992676, + "loss_rtd": 0.2140340656042099, + "loss_sent": 0.31759142875671387, + "loss_sod": 0.11842348426580429, + "loss_total": 0.6500489711761475, + "step": 272499 + }, + { + "epoch": 0.000998, + "loss_gen": 6.262879848480225, + "loss_rtd": 0.22788311541080475, + "loss_sent": 0.30835044384002686, + "loss_sod": 0.06990567594766617, + "loss_total": 0.6061392426490784, + "step": 272499 + }, + { + "epoch": 0.001, + "grad_norm": 1.3533614873886108, + "learning_rate": 1.549627909098702e-05, + "loss": 0.4512, + "step": 272500 + }, + { + "epoch": 0.001198, + "loss_gen": 5.785185813903809, + "loss_rtd": 0.21991954743862152, + "loss_sent": 0.2482890635728836, + "loss_sod": 0.06520332396030426, + "loss_total": 0.5334119200706482, + "step": 272599 + }, + { + "epoch": 0.001198, + "loss_gen": 6.257391929626465, + "loss_rtd": 0.22569477558135986, + "loss_sent": 0.2106040120124817, + "loss_sod": 0.1530519276857376, + "loss_total": 0.589350700378418, + "step": 272599 + }, + { + "epoch": 0.0012, + "grad_norm": 1.1151965856552124, + "learning_rate": 1.5473319462824408e-05, + "loss": 0.4373, + "step": 272600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.766992568969727, + "loss_rtd": 0.22347459197044373, + "loss_sent": 0.19389308989048004, + "loss_sod": 0.03797778859734535, + "loss_total": 0.4553454518318176, + "step": 272699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.842998027801514, + "loss_rtd": 0.23596161603927612, + "loss_sent": 0.07384367287158966, + "loss_sod": 0.04780445992946625, + "loss_total": 0.35760974884033203, + "step": 272699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.8956001400947571, + "learning_rate": 1.5450373742004592e-05, + "loss": 0.4411, + "step": 272700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.876858234405518, + "loss_rtd": 0.21792659163475037, + "loss_sent": 0.347202330827713, + "loss_sod": 0.08309298753738403, + "loss_total": 0.6482219099998474, + "step": 272799 + }, + { + "epoch": 0.001598, + "loss_gen": 6.156166076660156, + "loss_rtd": 0.23301275074481964, + "loss_sent": 0.1636011302471161, + "loss_sod": 0.11997898668050766, + "loss_total": 0.5165928602218628, + "step": 272799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.5213719606399536, + "learning_rate": 1.5427441937770115e-05, + "loss": 0.4334, + "step": 272800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.906069755554199, + "loss_rtd": 0.23371683061122894, + "loss_sent": 0.26300743222236633, + "loss_sod": 0.0921693667769432, + "loss_total": 0.5888936519622803, + "step": 272899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.64545202255249, + "loss_rtd": 0.20052975416183472, + "loss_sent": 0.10251563042402267, + "loss_sod": 0.06725096702575684, + "loss_total": 0.3702963590621948, + "step": 272899 + }, + { + "epoch": 0.0018, + "grad_norm": 1.0158360004425049, + "learning_rate": 1.5404524059357877e-05, + "loss": 0.4327, + "step": 272900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.925593376159668, + "loss_rtd": 0.21296720206737518, + "loss_sent": 0.2139560729265213, + "loss_sod": 0.030827436596155167, + "loss_total": 0.45775070786476135, + "step": 272999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.651845932006836, + "loss_rtd": 0.21196268498897552, + "loss_sent": 0.13291458785533905, + "loss_sod": 0.020824376493692398, + "loss_total": 0.3657016456127167, + "step": 272999 + }, + { + "epoch": 0.002, + "grad_norm": 0.6587467789649963, + "learning_rate": 1.5381620115999214e-05, + "loss": 0.4234, + "step": 273000 + }, + { + "epoch": 0.002, + "eval_loss": 0.41479891538619995, + "eval_runtime": 154.9089, + "eval_samples_per_second": 99.691, + "eval_steps_per_second": 0.781, + "step": 273000 + }, + { + "epoch": 0.002198, + "loss_gen": 6.200292110443115, + "loss_rtd": 0.21195562183856964, + "loss_sent": 0.11321356892585754, + "loss_sod": 0.07670631259679794, + "loss_total": 0.40187549591064453, + "step": 273099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.7697649002075195, + "loss_rtd": 0.220800518989563, + "loss_sent": 0.04641459882259369, + "loss_sod": 0.04055223986506462, + "loss_total": 0.3077673316001892, + "step": 273099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.0034691095352173, + "learning_rate": 1.535873011691982e-05, + "loss": 0.4446, + "step": 273100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.887719631195068, + "loss_rtd": 0.21751265227794647, + "loss_sent": 0.12734873592853546, + "loss_sod": 0.013623987324535847, + "loss_total": 0.3584853708744049, + "step": 273199 + }, + { + "epoch": 0.002398, + "loss_gen": 6.000583171844482, + "loss_rtd": 0.20690982043743134, + "loss_sent": 0.1919780969619751, + "loss_sod": 0.07696212828159332, + "loss_total": 0.47585004568099976, + "step": 273199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.7924743294715881, + "learning_rate": 1.5335854071339813e-05, + "loss": 0.423, + "step": 273200 + }, + { + "epoch": 0.002598, + "loss_gen": 5.654041767120361, + "loss_rtd": 0.22770124673843384, + "loss_sent": 0.11472953855991364, + "loss_sod": 0.038625702261924744, + "loss_total": 0.3810564875602722, + "step": 273299 + }, + { + "epoch": 0.002598, + "loss_gen": 5.648713111877441, + "loss_rtd": 0.19792591035366058, + "loss_sent": 0.3324960768222809, + "loss_sod": 0.0631243959069252, + "loss_total": 0.5935463905334473, + "step": 273299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.7783716917037964, + "learning_rate": 1.5312991988473625e-05, + "loss": 0.4557, + "step": 273300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.617676258087158, + "loss_rtd": 0.23671136796474457, + "loss_sent": 0.3875289261341095, + "loss_sod": 0.07125213742256165, + "loss_total": 0.6954923868179321, + "step": 273399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.685251235961914, + "loss_rtd": 0.21025420725345612, + "loss_sent": 0.10254145413637161, + "loss_sod": 0.10889772325754166, + "loss_total": 0.4216933846473694, + "step": 273399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.5747517347335815, + "learning_rate": 1.529014387753011e-05, + "loss": 0.4236, + "step": 273400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.638803958892822, + "loss_rtd": 0.21310441195964813, + "loss_sent": 0.18038244545459747, + "loss_sod": 0.1423824429512024, + "loss_total": 0.535869300365448, + "step": 273499 + }, + { + "epoch": 0.002998, + "loss_gen": 5.264876365661621, + "loss_rtd": 0.17995695769786835, + "loss_sent": 0.10798300057649612, + "loss_sod": 0.08907205611467361, + "loss_total": 0.3770120143890381, + "step": 273499 + }, + { + "epoch": 0.003, + "grad_norm": 1.2970952987670898, + "learning_rate": 1.5267309747712517e-05, + "loss": 0.423, + "step": 273500 + }, + { + "epoch": 0.003198, + "loss_gen": 6.159759044647217, + "loss_rtd": 0.22437120974063873, + "loss_sent": 0.16736772656440735, + "loss_sod": 0.05562593415379524, + "loss_total": 0.447364866733551, + "step": 273599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.799188613891602, + "loss_rtd": 0.22752384841442108, + "loss_sent": 0.238138347864151, + "loss_sod": 0.008632916025817394, + "loss_total": 0.47429510951042175, + "step": 273599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.5340250730514526, + "learning_rate": 1.5244489608218377e-05, + "loss": 0.4355, + "step": 273600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.6213836669921875, + "loss_rtd": 0.2576653063297272, + "loss_sent": 0.1624908745288849, + "loss_sod": 0.016954997554421425, + "loss_total": 0.43711116909980774, + "step": 273699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.219740390777588, + "loss_rtd": 0.21375170350074768, + "loss_sent": 0.0008478844538331032, + "loss_sod": 0.14455559849739075, + "loss_total": 0.35915517807006836, + "step": 273699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.051249623298645, + "learning_rate": 1.5221683468239673e-05, + "loss": 0.4182, + "step": 273700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.7904839515686035, + "loss_rtd": 0.17814978957176208, + "loss_sent": 0.10398238897323608, + "loss_sod": 0.08456475287675858, + "loss_total": 0.36669692397117615, + "step": 273799 + }, + { + "epoch": 0.003598, + "loss_gen": 6.159872531890869, + "loss_rtd": 0.2229016125202179, + "loss_sent": 0.3075779676437378, + "loss_sod": 0.04954897612333298, + "loss_total": 0.5800285339355469, + "step": 273799 + }, + { + "epoch": 0.0036, + "grad_norm": 0.7286344766616821, + "learning_rate": 1.5198891336962707e-05, + "loss": 0.4353, + "step": 273800 + }, + { + "epoch": 0.003798, + "loss_gen": 5.964946746826172, + "loss_rtd": 0.20447297394275665, + "loss_sent": 0.1900361180305481, + "loss_sod": 0.07651027292013168, + "loss_total": 0.47101935744285583, + "step": 273899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.49271297454834, + "loss_rtd": 0.22054538130760193, + "loss_sent": 0.2098027765750885, + "loss_sod": 0.0044085439294576645, + "loss_total": 0.43475669622421265, + "step": 273899 + }, + { + "epoch": 0.0038, + "grad_norm": 1.0789908170700073, + "learning_rate": 1.5176113223568167e-05, + "loss": 0.4351, + "step": 273900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.062466144561768, + "loss_rtd": 0.1730503886938095, + "loss_sent": 2.5076464226003736e-05, + "loss_sod": 0.14788149297237396, + "loss_total": 0.3209569454193115, + "step": 273999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.458028316497803, + "loss_rtd": 0.19143301248550415, + "loss_sent": 0.17083045840263367, + "loss_sod": 0.0875568687915802, + "loss_total": 0.449820339679718, + "step": 273999 + }, + { + "epoch": 0.004, + "grad_norm": 1.5684807300567627, + "learning_rate": 1.5153349137231038e-05, + "loss": 0.4205, + "step": 274000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4165240526199341, + "eval_runtime": 151.1454, + "eval_samples_per_second": 102.173, + "eval_steps_per_second": 0.801, + "step": 274000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.204282283782959, + "loss_rtd": 0.1609993278980255, + "loss_sent": 0.0012794769136235118, + "loss_sod": 0.05992255359888077, + "loss_total": 0.22220134735107422, + "step": 274099 + }, + { + "epoch": 0.004198, + "loss_gen": 6.008677005767822, + "loss_rtd": 0.23444169759750366, + "loss_sent": 0.4248729348182678, + "loss_sod": 0.09115256369113922, + "loss_total": 0.7504671812057495, + "step": 274099 + }, + { + "epoch": 0.0042, + "grad_norm": 1.2817180156707764, + "learning_rate": 1.5130599087120706e-05, + "loss": 0.428, + "step": 274100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.449244499206543, + "loss_rtd": 0.20360541343688965, + "loss_sent": 0.02436850033700466, + "loss_sod": 0.04385147988796234, + "loss_total": 0.2718254029750824, + "step": 274199 + }, + { + "epoch": 0.004398, + "loss_gen": 4.987967014312744, + "loss_rtd": 0.18337763845920563, + "loss_sent": 2.4480446882080287e-05, + "loss_sod": 0.043488502502441406, + "loss_total": 0.22689062356948853, + "step": 274199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.5992895364761353, + "learning_rate": 1.5107863082400897e-05, + "loss": 0.4311, + "step": 274200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.187564373016357, + "loss_rtd": 0.1814659982919693, + "loss_sent": 3.216753248125315e-05, + "loss_sod": 0.14015816152095795, + "loss_total": 0.32165631651878357, + "step": 274299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.265283584594727, + "loss_rtd": 0.19243568181991577, + "loss_sent": 0.04902365058660507, + "loss_sod": 0.11203384399414062, + "loss_total": 0.35349318385124207, + "step": 274299 + }, + { + "epoch": 0.0046, + "grad_norm": 1.2498400211334229, + "learning_rate": 1.508514113222968e-05, + "loss": 0.4359, + "step": 274300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.494047164916992, + "loss_rtd": 0.19427287578582764, + "loss_sent": 2.600963125587441e-05, + "loss_sod": 0.05721534416079521, + "loss_total": 0.2515142261981964, + "step": 274399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.231760501861572, + "loss_rtd": 0.18359731137752533, + "loss_sent": 2.7589037927100435e-05, + "loss_sod": 0.291170597076416, + "loss_total": 0.47479552030563354, + "step": 274399 + }, + { + "epoch": 0.0048, + "grad_norm": 1.336585283279419, + "learning_rate": 1.5062433245759422e-05, + "loss": 0.4411, + "step": 274400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.843842506408691, + "loss_rtd": 0.22189895808696747, + "loss_sent": 0.10693559050559998, + "loss_sod": 0.06513269990682602, + "loss_total": 0.39396724104881287, + "step": 274499 + }, + { + "epoch": 0.004998, + "loss_gen": 5.820666313171387, + "loss_rtd": 0.19190526008605957, + "loss_sent": 0.2163529247045517, + "loss_sod": 0.027400104328989983, + "loss_total": 0.4356582760810852, + "step": 274499 + }, + { + "epoch": 0.005, + "grad_norm": 1.718505859375, + "learning_rate": 1.5039739432136873e-05, + "loss": 0.4297, + "step": 274500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.920298099517822, + "loss_rtd": 0.23496927320957184, + "loss_sent": 0.08924492448568344, + "loss_sod": 0.02064770646393299, + "loss_total": 0.3448619246482849, + "step": 274599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.958324909210205, + "loss_rtd": 0.19699518382549286, + "loss_sent": 0.24578341841697693, + "loss_sod": 0.05115849897265434, + "loss_total": 0.49393710494041443, + "step": 274599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.8538525700569153, + "learning_rate": 1.5017059700503105e-05, + "loss": 0.438, + "step": 274600 + }, + { + "epoch": 0.005398, + "loss_gen": 6.252610206604004, + "loss_rtd": 0.23672576248645782, + "loss_sent": 0.21985003352165222, + "loss_sod": 0.07795245945453644, + "loss_total": 0.5345282554626465, + "step": 274699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.9698486328125, + "loss_rtd": 0.22775180637836456, + "loss_sent": 0.22323723137378693, + "loss_sod": 0.06044522300362587, + "loss_total": 0.5114342570304871, + "step": 274699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.2575621604919434, + "learning_rate": 1.4994394059993521e-05, + "loss": 0.436, + "step": 274700 + }, + { + "epoch": 0.005598, + "loss_gen": 5.722237586975098, + "loss_rtd": 0.2087721824645996, + "loss_sent": 0.02655068412423134, + "loss_sod": 0.06442528963088989, + "loss_total": 0.29974815249443054, + "step": 274799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.214300632476807, + "loss_rtd": 0.1811797022819519, + "loss_sent": 0.057300493121147156, + "loss_sod": 0.04453400522470474, + "loss_total": 0.283014178276062, + "step": 274799 + }, + { + "epoch": 0.0056, + "grad_norm": 0.7612568736076355, + "learning_rate": 1.4971742519737803e-05, + "loss": 0.421, + "step": 274800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.314690589904785, + "loss_rtd": 0.22435688972473145, + "loss_sent": 0.21978983283042908, + "loss_sod": 0.08311988413333893, + "loss_total": 0.5272666215896606, + "step": 274899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.76157283782959, + "loss_rtd": 0.23554137349128723, + "loss_sent": 0.12696672976016998, + "loss_sod": 0.0958535447716713, + "loss_total": 0.4583616256713867, + "step": 274899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.2189191579818726, + "learning_rate": 1.4949105088860017e-05, + "loss": 0.4249, + "step": 274900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.916494846343994, + "loss_rtd": 0.2172781527042389, + "loss_sent": 0.14282725751399994, + "loss_sod": 0.039861761033535004, + "loss_total": 0.3999671936035156, + "step": 274999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.9629364013671875, + "loss_rtd": 0.21175292134284973, + "loss_sent": 0.1134389191865921, + "loss_sod": 0.001974719576537609, + "loss_total": 0.3271665573120117, + "step": 274999 + }, + { + "epoch": 0.006, + "grad_norm": 1.2094670534133911, + "learning_rate": 1.4926481776478501e-05, + "loss": 0.454, + "step": 275000 + }, + { + "epoch": 0.006, + "eval_loss": 0.41286784410476685, + "eval_runtime": 150.9879, + "eval_samples_per_second": 102.28, + "eval_steps_per_second": 0.801, + "step": 275000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.998263359069824, + "loss_rtd": 0.21866413950920105, + "loss_sent": 0.14763079583644867, + "loss_sod": 0.01852121762931347, + "loss_total": 0.38481616973876953, + "step": 275099 + }, + { + "epoch": 0.006198, + "loss_gen": 6.226556777954102, + "loss_rtd": 0.22671213746070862, + "loss_sent": 0.07838278263807297, + "loss_sod": 0.0725444108247757, + "loss_total": 0.3776393532752991, + "step": 275099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.2292507886886597, + "learning_rate": 1.4903872591705953e-05, + "loss": 0.4469, + "step": 275100 + }, + { + "epoch": 0.006398, + "loss_gen": 5.403130054473877, + "loss_rtd": 0.17612871527671814, + "loss_sent": 0.0024083247408270836, + "loss_sod": 0.14639602601528168, + "loss_total": 0.3249330520629883, + "step": 275199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.9787278175354, + "loss_rtd": 0.21918079257011414, + "loss_sent": 0.14101746678352356, + "loss_sod": 0.002642581705003977, + "loss_total": 0.36284083127975464, + "step": 275199 + }, + { + "epoch": 0.0064, + "grad_norm": 0.9513075947761536, + "learning_rate": 1.4881277543649308e-05, + "loss": 0.4199, + "step": 275200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.5881028175354, + "loss_rtd": 0.20095397531986237, + "loss_sent": 0.11840689182281494, + "loss_sod": 0.06324057281017303, + "loss_total": 0.38260143995285034, + "step": 275299 + }, + { + "epoch": 0.006598, + "loss_gen": 6.146471977233887, + "loss_rtd": 0.22917453944683075, + "loss_sent": 0.0370137095451355, + "loss_sod": 0.1151127964258194, + "loss_total": 0.38130104541778564, + "step": 275299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.9370741844177246, + "learning_rate": 1.485869664140987e-05, + "loss": 0.4374, + "step": 275300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.571595191955566, + "loss_rtd": 0.2085263431072235, + "loss_sent": 0.031222401186823845, + "loss_sod": 0.08115590363740921, + "loss_total": 0.3209046423435211, + "step": 275399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.117570400238037, + "loss_rtd": 0.1735120266675949, + "loss_sent": 2.8771723009413108e-05, + "loss_sod": 0.0636037290096283, + "loss_total": 0.23714452981948853, + "step": 275399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.7947896122932434, + "learning_rate": 1.483612989408324e-05, + "loss": 0.421, + "step": 275400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.947719097137451, + "loss_rtd": 0.2378571629524231, + "loss_sent": 0.1077478751540184, + "loss_sod": 0.06863857805728912, + "loss_total": 0.41424360871315, + "step": 275499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.633233070373535, + "loss_rtd": 0.20518076419830322, + "loss_sent": 0.0270475372672081, + "loss_sod": 0.025233253836631775, + "loss_total": 0.2574615478515625, + "step": 275499 + }, + { + "epoch": 0.007, + "grad_norm": 1.0830098390579224, + "learning_rate": 1.4813577310759268e-05, + "loss": 0.4115, + "step": 275500 + }, + { + "epoch": 0.007198, + "loss_gen": 6.004197597503662, + "loss_rtd": 0.23788532614707947, + "loss_sent": 0.043408095836639404, + "loss_sod": 0.06540364027023315, + "loss_total": 0.346697062253952, + "step": 275599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.529959678649902, + "loss_rtd": 0.23637913167476654, + "loss_sent": 0.25661712884902954, + "loss_sod": 0.030556680634617805, + "loss_total": 0.5235529541969299, + "step": 275599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.00693941116333, + "learning_rate": 1.4791038900522148e-05, + "loss": 0.4383, + "step": 275600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.628551483154297, + "loss_rtd": 0.21775345504283905, + "loss_sent": 0.13816148042678833, + "loss_sod": 0.0368569940328598, + "loss_total": 0.3927719295024872, + "step": 275699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.497834205627441, + "loss_rtd": 0.1999260038137436, + "loss_sent": 0.07742981612682343, + "loss_sod": 0.01064966432750225, + "loss_total": 0.2880054712295532, + "step": 275699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.2652186155319214, + "learning_rate": 1.4768514672450345e-05, + "loss": 0.4254, + "step": 275700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.588418483734131, + "loss_rtd": 0.2098814994096756, + "loss_sent": 0.0544310137629509, + "loss_sod": 0.01398796122521162, + "loss_total": 0.2783004641532898, + "step": 275799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.634303092956543, + "loss_rtd": 0.22205425798892975, + "loss_sent": 0.11263878643512726, + "loss_sod": 0.026592668145895004, + "loss_total": 0.3612857162952423, + "step": 275799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.7957473397254944, + "learning_rate": 1.4746004635616634e-05, + "loss": 0.4164, + "step": 275800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.97379207611084, + "loss_rtd": 0.19933107495307922, + "loss_sent": 0.07186050713062286, + "loss_sod": 0.037373773753643036, + "loss_total": 0.3085653781890869, + "step": 275899 + }, + { + "epoch": 0.007798, + "loss_gen": 5.917050361633301, + "loss_rtd": 0.21200326085090637, + "loss_sent": 0.08479931950569153, + "loss_sod": 0.02335244044661522, + "loss_total": 0.3201550245285034, + "step": 275899 + }, + { + "epoch": 0.0078, + "grad_norm": 0.6725935935974121, + "learning_rate": 1.4723508799088025e-05, + "loss": 0.4428, + "step": 275900 + }, + { + "epoch": 0.007998, + "loss_gen": 5.615390300750732, + "loss_rtd": 0.19833429157733917, + "loss_sent": 0.11061300337314606, + "loss_sod": 0.02802497148513794, + "loss_total": 0.33697226643562317, + "step": 275999 + }, + { + "epoch": 0.007998, + "loss_gen": 5.542318344116211, + "loss_rtd": 0.1746564507484436, + "loss_sent": 0.11866465955972672, + "loss_sod": 0.07623709738254547, + "loss_total": 0.369558185338974, + "step": 275999 + }, + { + "epoch": 0.008, + "grad_norm": 0.777366042137146, + "learning_rate": 1.4701027171925853e-05, + "loss": 0.4344, + "step": 276000 + }, + { + "epoch": 0.008, + "eval_loss": 0.41162198781967163, + "eval_runtime": 152.5004, + "eval_samples_per_second": 101.265, + "eval_steps_per_second": 0.793, + "step": 276000 + }, + { + "epoch": 0.008198, + "loss_gen": 6.17227029800415, + "loss_rtd": 0.22520144283771515, + "loss_sent": 0.05229737237095833, + "loss_sod": 0.04423283040523529, + "loss_total": 0.32173165678977966, + "step": 276099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.640833854675293, + "loss_rtd": 0.22593384981155396, + "loss_sent": 0.17062826454639435, + "loss_sod": 0.12412357330322266, + "loss_total": 0.5206856727600098, + "step": 276099 + }, + { + "epoch": 0.0082, + "grad_norm": 0.7446812391281128, + "learning_rate": 1.4678559763185718e-05, + "loss": 0.4297, + "step": 276100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.91195821762085, + "loss_rtd": 0.21936455368995667, + "loss_sent": 0.34628406167030334, + "loss_sod": 0.015731997787952423, + "loss_total": 0.5813806056976318, + "step": 276199 + }, + { + "epoch": 0.008398, + "loss_gen": 6.316697120666504, + "loss_rtd": 0.2094716876745224, + "loss_sent": 0.1173069104552269, + "loss_sod": 0.06010034680366516, + "loss_total": 0.38687893748283386, + "step": 276199 + }, + { + "epoch": 0.0084, + "grad_norm": 0.9478293657302856, + "learning_rate": 1.46561065819175e-05, + "loss": 0.4225, + "step": 276200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.881045818328857, + "loss_rtd": 0.22816449403762817, + "loss_sent": 0.2730463445186615, + "loss_sod": 0.02581781893968582, + "loss_total": 0.5270286798477173, + "step": 276299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.697487831115723, + "loss_rtd": 0.2018587589263916, + "loss_sent": 0.39347535371780396, + "loss_sod": 0.02456580102443695, + "loss_total": 0.6198999285697937, + "step": 276299 + }, + { + "epoch": 0.0086, + "grad_norm": 1.8602070808410645, + "learning_rate": 1.4633667637165305e-05, + "loss": 0.4302, + "step": 276300 + }, + { + "epoch": 0.008798, + "loss_gen": 6.158581256866455, + "loss_rtd": 0.23338966071605682, + "loss_sent": 0.13927677273750305, + "loss_sod": 0.03054956905543804, + "loss_total": 0.40321600437164307, + "step": 276399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.349296569824219, + "loss_rtd": 0.18747206032276154, + "loss_sent": 0.0666453018784523, + "loss_sod": 0.034182045608758926, + "loss_total": 0.28829941153526306, + "step": 276399 + }, + { + "epoch": 0.0088, + "grad_norm": 0.7663335800170898, + "learning_rate": 1.4611242937967562e-05, + "loss": 0.451, + "step": 276400 + }, + { + "epoch": 0.008998, + "loss_gen": 6.009261131286621, + "loss_rtd": 0.21562351286411285, + "loss_sent": 0.20994015038013458, + "loss_sod": 0.04199356585741043, + "loss_total": 0.46755725145339966, + "step": 276499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.512537002563477, + "loss_rtd": 0.22194944322109222, + "loss_sent": 0.32049813866615295, + "loss_sod": 0.0023547913879156113, + "loss_total": 0.5448023676872253, + "step": 276499 + }, + { + "epoch": 0.009, + "grad_norm": 0.7253670692443848, + "learning_rate": 1.4588832493356924e-05, + "loss": 0.4405, + "step": 276500 + }, + { + "epoch": 0.009198, + "loss_gen": 5.093565940856934, + "loss_rtd": 0.1910579800605774, + "loss_sent": 2.6950241590384394e-05, + "loss_sod": 0.03554120287299156, + "loss_total": 0.22662614285945892, + "step": 276599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.972431659698486, + "loss_rtd": 0.21581493318080902, + "loss_sent": 0.24811503291130066, + "loss_sod": 0.03777027875185013, + "loss_total": 0.5017002820968628, + "step": 276599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.0715969800949097, + "learning_rate": 1.4566436312360349e-05, + "loss": 0.4206, + "step": 276600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.644054889678955, + "loss_rtd": 0.23983755707740784, + "loss_sent": 0.3277575969696045, + "loss_sod": 0.009140715934336185, + "loss_total": 0.5767358541488647, + "step": 276699 + }, + { + "epoch": 0.009398, + "loss_gen": 6.069643020629883, + "loss_rtd": 0.2163091003894806, + "loss_sent": 0.353619784116745, + "loss_sod": 0.09094171971082687, + "loss_total": 0.6608706116676331, + "step": 276699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.3661854267120361, + "learning_rate": 1.4544054403998969e-05, + "loss": 0.4322, + "step": 276700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.873619556427002, + "loss_rtd": 0.19589076936244965, + "loss_sent": 0.27185410261154175, + "loss_sod": 0.01664174534380436, + "loss_total": 0.4843866229057312, + "step": 276799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.717758655548096, + "loss_rtd": 0.1972099393606186, + "loss_sent": 0.16506752371788025, + "loss_sod": 0.11253196001052856, + "loss_total": 0.4748094379901886, + "step": 276799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.8117824196815491, + "learning_rate": 1.4521686777288234e-05, + "loss": 0.4388, + "step": 276800 + }, + { + "epoch": 0.009798, + "loss_gen": 4.978543281555176, + "loss_rtd": 0.18296152353286743, + "loss_sent": 2.4838065655785613e-05, + "loss_sod": 0.1248551458120346, + "loss_total": 0.3078415095806122, + "step": 276899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.24105978012085, + "loss_rtd": 0.18175606429576874, + "loss_sent": 0.02356233075261116, + "loss_sod": 0.011996938847005367, + "loss_total": 0.21731533110141754, + "step": 276899 + }, + { + "epoch": 0.0098, + "grad_norm": 0.7824288606643677, + "learning_rate": 1.4499333441237838e-05, + "loss": 0.4274, + "step": 276900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.631489276885986, + "loss_rtd": 0.22237901389598846, + "loss_sent": 0.20925535261631012, + "loss_sod": 0.15647080540657043, + "loss_total": 0.5881051421165466, + "step": 276999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.795790195465088, + "loss_rtd": 0.2195892632007599, + "loss_sent": 0.1186850517988205, + "loss_sod": 0.015782378613948822, + "loss_total": 0.354056715965271, + "step": 276999 + }, + { + "epoch": 0.01, + "grad_norm": 1.1823580265045166, + "learning_rate": 1.4476994404851668e-05, + "loss": 0.4314, + "step": 277000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4196847379207611, + "eval_runtime": 150.9205, + "eval_samples_per_second": 102.325, + "eval_steps_per_second": 0.802, + "step": 277000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.772769927978516, + "loss_rtd": 0.21531303226947784, + "loss_sent": 0.2676294147968292, + "loss_sod": 0.008648330345749855, + "loss_total": 0.49159079790115356, + "step": 277099 + }, + { + "epoch": 0.010198, + "loss_gen": 6.091549396514893, + "loss_rtd": 0.2283223420381546, + "loss_sent": 0.22223016619682312, + "loss_sod": 0.09155251085758209, + "loss_total": 0.5421050190925598, + "step": 277099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.085469365119934, + "learning_rate": 1.4454669677127907e-05, + "loss": 0.4322, + "step": 277100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.694558620452881, + "loss_rtd": 0.22899405658245087, + "loss_sent": 0.021348869428038597, + "loss_sod": 0.07458329945802689, + "loss_total": 0.3249262273311615, + "step": 277199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.591134071350098, + "loss_rtd": 0.21290600299835205, + "loss_sent": 0.1980232149362564, + "loss_sod": 0.05598120763897896, + "loss_total": 0.4669104218482971, + "step": 277199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.8149976134300232, + "learning_rate": 1.4432359267058953e-05, + "loss": 0.4212, + "step": 277200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.793703079223633, + "loss_rtd": 0.23179537057876587, + "loss_sent": 0.07164697349071503, + "loss_sod": 0.02043539099395275, + "loss_total": 0.32387775182724, + "step": 277299 + }, + { + "epoch": 0.010598, + "loss_gen": 6.0543317794799805, + "loss_rtd": 0.22306634485721588, + "loss_sent": 0.32701098918914795, + "loss_sod": 0.01589788869023323, + "loss_total": 0.5659751892089844, + "step": 277299 + }, + { + "epoch": 0.0106, + "grad_norm": 0.9578184485435486, + "learning_rate": 1.4410063183631446e-05, + "loss": 0.4235, + "step": 277300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.882953643798828, + "loss_rtd": 0.2202696055173874, + "loss_sent": 0.40532752871513367, + "loss_sod": 0.09417147934436798, + "loss_total": 0.7197686433792114, + "step": 277399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.9690117835998535, + "loss_rtd": 0.23239389061927795, + "loss_sent": 0.300907164812088, + "loss_sod": 0.08439088612794876, + "loss_total": 0.6176919341087341, + "step": 277399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.7343910932540894, + "learning_rate": 1.4387781435826215e-05, + "loss": 0.412, + "step": 277400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.794314384460449, + "loss_rtd": 0.21898530423641205, + "loss_sent": 0.11131469160318375, + "loss_sod": 0.05166494846343994, + "loss_total": 0.38196495175361633, + "step": 277499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.808643341064453, + "loss_rtd": 0.22665444016456604, + "loss_sent": 0.30303165316581726, + "loss_sod": 0.0033834788482636213, + "loss_total": 0.5330695509910583, + "step": 277499 + }, + { + "epoch": 0.011, + "grad_norm": 0.6830807328224182, + "learning_rate": 1.436551403261836e-05, + "loss": 0.4503, + "step": 277500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.957104206085205, + "loss_rtd": 0.2045014649629593, + "loss_sent": 0.14599353075027466, + "loss_sod": 0.03382723033428192, + "loss_total": 0.38432222604751587, + "step": 277599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.789407253265381, + "loss_rtd": 0.20713423192501068, + "loss_sent": 0.09987723082304001, + "loss_sod": 0.05185386538505554, + "loss_total": 0.35886532068252563, + "step": 277599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.0491242408752441, + "learning_rate": 1.4343260982977196e-05, + "loss": 0.4421, + "step": 277600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.814051151275635, + "loss_rtd": 0.21798686683177948, + "loss_sent": 0.1862952709197998, + "loss_sod": 0.038201622664928436, + "loss_total": 0.4424837529659271, + "step": 277699 + }, + { + "epoch": 0.011398, + "loss_gen": 5.72694730758667, + "loss_rtd": 0.24146729707717896, + "loss_sent": 0.31808140873908997, + "loss_sod": 0.010205268859863281, + "loss_total": 0.5697540044784546, + "step": 277699 + }, + { + "epoch": 0.0114, + "grad_norm": 0.9509449601173401, + "learning_rate": 1.432102229586626e-05, + "loss": 0.4455, + "step": 277700 + }, + { + "epoch": 0.011598, + "loss_gen": 6.004022598266602, + "loss_rtd": 0.22351793944835663, + "loss_sent": 0.1464354395866394, + "loss_sod": 0.0887962132692337, + "loss_total": 0.45874959230422974, + "step": 277799 + }, + { + "epoch": 0.011598, + "loss_gen": 5.679751396179199, + "loss_rtd": 0.22603632509708405, + "loss_sent": 0.12933529913425446, + "loss_sod": 0.02899974212050438, + "loss_total": 0.3843713402748108, + "step": 277799 + }, + { + "epoch": 0.0116, + "grad_norm": 0.9555836915969849, + "learning_rate": 1.4298797980243255e-05, + "loss": 0.4205, + "step": 277800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.585626125335693, + "loss_rtd": 0.21574117243289948, + "loss_sent": 0.5474934577941895, + "loss_sod": 0.007428249344229698, + "loss_total": 0.7706629037857056, + "step": 277899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.836236953735352, + "loss_rtd": 0.22890616953372955, + "loss_sent": 0.14271174371242523, + "loss_sod": 0.01348471362143755, + "loss_total": 0.38510262966156006, + "step": 277899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.4604941606521606, + "learning_rate": 1.4276588045060163e-05, + "loss": 0.4354, + "step": 277900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.417580604553223, + "loss_rtd": 0.1938585638999939, + "loss_sent": 0.0372602641582489, + "loss_sod": 0.0591491237282753, + "loss_total": 0.2902679443359375, + "step": 277999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.544027328491211, + "loss_rtd": 0.19217461347579956, + "loss_sent": 0.09470445662736893, + "loss_sod": 0.03213660791516304, + "loss_total": 0.3190156817436218, + "step": 277999 + }, + { + "epoch": 0.012, + "grad_norm": 0.6893901824951172, + "learning_rate": 1.425439249926313e-05, + "loss": 0.4261, + "step": 278000 + }, + { + "epoch": 0.012, + "eval_loss": 0.4183647930622101, + "eval_runtime": 151.2535, + "eval_samples_per_second": 102.1, + "eval_steps_per_second": 0.8, + "step": 278000 + }, + { + "epoch": 0.012198, + "loss_gen": 6.114015102386475, + "loss_rtd": 0.22248844802379608, + "loss_sent": 0.32810482382774353, + "loss_sod": 0.04194863140583038, + "loss_total": 0.5925419330596924, + "step": 278099 + }, + { + "epoch": 0.012198, + "loss_gen": 6.096914768218994, + "loss_rtd": 0.22503113746643066, + "loss_sent": 0.1498892605304718, + "loss_sod": 0.02221049927175045, + "loss_total": 0.39713090658187866, + "step": 278099 + }, + { + "epoch": 0.0122, + "grad_norm": 0.7431953549385071, + "learning_rate": 1.4232211351792552e-05, + "loss": 0.4332, + "step": 278100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.778744220733643, + "loss_rtd": 0.20733730494976044, + "loss_sent": 0.12015029788017273, + "loss_sod": 0.06986133754253387, + "loss_total": 0.39734894037246704, + "step": 278199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.296205043792725, + "loss_rtd": 0.18247418105602264, + "loss_sent": 3.088546509388834e-05, + "loss_sod": 0.31866455078125, + "loss_total": 0.5011696219444275, + "step": 278199 + }, + { + "epoch": 0.0124, + "grad_norm": 1.4823925495147705, + "learning_rate": 1.4210044611582934e-05, + "loss": 0.4189, + "step": 278200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.900089263916016, + "loss_rtd": 0.19683802127838135, + "loss_sent": 0.2003399282693863, + "loss_sod": 0.06887827813625336, + "loss_total": 0.466056227684021, + "step": 278299 + }, + { + "epoch": 0.012598, + "loss_gen": 6.01000452041626, + "loss_rtd": 0.20670291781425476, + "loss_sent": 0.03745774179697037, + "loss_sod": 0.016352390870451927, + "loss_total": 0.260513037443161, + "step": 278299 + }, + { + "epoch": 0.0126, + "grad_norm": 0.8375272154808044, + "learning_rate": 1.4187892287563071e-05, + "loss": 0.4273, + "step": 278300 + }, + { + "epoch": 0.012798, + "loss_gen": 6.473291397094727, + "loss_rtd": 0.2656841576099396, + "loss_sent": 0.0679529532790184, + "loss_sod": 0.11885888129472733, + "loss_total": 0.4524959921836853, + "step": 278399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.9420061111450195, + "loss_rtd": 0.2288360446691513, + "loss_sent": 0.09559198468923569, + "loss_sod": 0.09812648594379425, + "loss_total": 0.42255452275276184, + "step": 278399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.2853294610977173, + "learning_rate": 1.4165754388655906e-05, + "loss": 0.4395, + "step": 278400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.745471477508545, + "loss_rtd": 0.2117670774459839, + "loss_sent": 0.18949981033802032, + "loss_sod": 0.0478760227560997, + "loss_total": 0.4491429328918457, + "step": 278499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.499617099761963, + "loss_rtd": 0.2230619192123413, + "loss_sent": 0.097218818962574, + "loss_sod": 0.06726241111755371, + "loss_total": 0.3875431418418884, + "step": 278499 + }, + { + "epoch": 0.013, + "grad_norm": 0.8567191958427429, + "learning_rate": 1.4143630923778606e-05, + "loss": 0.4512, + "step": 278500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.759278774261475, + "loss_rtd": 0.2041715532541275, + "loss_sent": 0.14937297999858856, + "loss_sod": 0.07851532101631165, + "loss_total": 0.4320598840713501, + "step": 278599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.783989906311035, + "loss_rtd": 0.2008306086063385, + "loss_sent": 0.2086365818977356, + "loss_sod": 0.07383053004741669, + "loss_total": 0.4832977056503296, + "step": 278599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.1480988264083862, + "learning_rate": 1.4121521901842467e-05, + "loss": 0.4392, + "step": 278600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.692169666290283, + "loss_rtd": 0.2233029156923294, + "loss_sent": 0.362612247467041, + "loss_sod": 0.03540550917387009, + "loss_total": 0.6213206648826599, + "step": 278699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.673735618591309, + "loss_rtd": 0.20805056393146515, + "loss_sent": 0.03982659429311752, + "loss_sod": 0.12513360381126404, + "loss_total": 0.3730107843875885, + "step": 278699 + }, + { + "epoch": 0.0134, + "grad_norm": 1.594168782234192, + "learning_rate": 1.4099427331753018e-05, + "loss": 0.4476, + "step": 278700 + }, + { + "epoch": 0.013598, + "loss_gen": 5.833976745605469, + "loss_rtd": 0.22022613883018494, + "loss_sent": 0.1856670379638672, + "loss_sod": 0.07268480956554413, + "loss_total": 0.47857797145843506, + "step": 278799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.80198860168457, + "loss_rtd": 0.21828624606132507, + "loss_sent": 0.19001781940460205, + "loss_sod": 0.030553320422768593, + "loss_total": 0.43885737657546997, + "step": 278799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.7171003222465515, + "learning_rate": 1.4077347222409942e-05, + "loss": 0.4406, + "step": 278800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.872027397155762, + "loss_rtd": 0.21578609943389893, + "loss_sent": 0.21043899655342102, + "loss_sod": 0.06564974784851074, + "loss_total": 0.4918748438358307, + "step": 278899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.9985032081604, + "loss_rtd": 0.23049300909042358, + "loss_sent": 0.25634822249412537, + "loss_sod": 0.013149198144674301, + "loss_total": 0.49999043345451355, + "step": 278899 + }, + { + "epoch": 0.0138, + "grad_norm": 1.0039849281311035, + "learning_rate": 1.4055281582707125e-05, + "loss": 0.4331, + "step": 278900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.637964725494385, + "loss_rtd": 0.21313661336898804, + "loss_sent": 0.0539582297205925, + "loss_sod": 0.15895986557006836, + "loss_total": 0.4260547161102295, + "step": 278999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.225949764251709, + "loss_rtd": 0.1816854029893875, + "loss_sent": 5.1824903493979946e-05, + "loss_sod": 0.05483756214380264, + "loss_total": 0.23657479882240295, + "step": 278999 + }, + { + "epoch": 0.014, + "grad_norm": 0.9494348168373108, + "learning_rate": 1.4033230421532574e-05, + "loss": 0.4261, + "step": 279000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4114840030670166, + "eval_runtime": 151.0764, + "eval_samples_per_second": 102.22, + "eval_steps_per_second": 0.801, + "step": 279000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.948075771331787, + "loss_rtd": 0.21769101917743683, + "loss_sent": 0.18114681541919708, + "loss_sod": 0.03720657899975777, + "loss_total": 0.4360443949699402, + "step": 279099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.593774795532227, + "loss_rtd": 0.22905591130256653, + "loss_sent": 0.5004178285598755, + "loss_sod": 0.018901998177170753, + "loss_total": 0.7483757734298706, + "step": 279099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.857199788093567, + "learning_rate": 1.4011193747768509e-05, + "loss": 0.4385, + "step": 279100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.325761795043945, + "loss_rtd": 0.1875769942998886, + "loss_sent": 0.015059086494147778, + "loss_sod": 0.02798033133149147, + "loss_total": 0.23061640560626984, + "step": 279199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.665338516235352, + "loss_rtd": 0.20474572479724884, + "loss_sent": 0.03706784546375275, + "loss_sod": 0.042797110974788666, + "loss_total": 0.28461068868637085, + "step": 279199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.6072078347206116, + "learning_rate": 1.3989171570291294e-05, + "loss": 0.434, + "step": 279200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.405586242675781, + "loss_rtd": 0.19190989434719086, + "loss_sent": 0.003731678007170558, + "loss_sod": 0.07705147564411163, + "loss_total": 0.27269303798675537, + "step": 279299 + }, + { + "epoch": 0.014598, + "loss_gen": 5.41531229019165, + "loss_rtd": 0.21093900501728058, + "loss_sent": 0.014339824207127094, + "loss_sod": 0.09241083264350891, + "loss_total": 0.3176896870136261, + "step": 279299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.7842441201210022, + "learning_rate": 1.396716389797148e-05, + "loss": 0.4327, + "step": 279300 + }, + { + "epoch": 0.014798, + "loss_gen": 5.80268669128418, + "loss_rtd": 0.21594296395778656, + "loss_sent": 0.2316673994064331, + "loss_sod": 0.03718440607190132, + "loss_total": 0.4847947657108307, + "step": 279399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.298433303833008, + "loss_rtd": 0.1938018649816513, + "loss_sent": 0.018638234585523605, + "loss_sod": 0.0383220873773098, + "loss_total": 0.2507621943950653, + "step": 279399 + }, + { + "epoch": 0.0148, + "grad_norm": 0.7067802548408508, + "learning_rate": 1.394517073967373e-05, + "loss": 0.4529, + "step": 279400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.532692909240723, + "loss_rtd": 0.24964453279972076, + "loss_sent": 0.2670353949069977, + "loss_sod": 0.04572862759232521, + "loss_total": 0.5624085664749146, + "step": 279499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.870636463165283, + "loss_rtd": 0.234962597489357, + "loss_sent": 0.17811015248298645, + "loss_sod": 0.041591327637434006, + "loss_total": 0.45466408133506775, + "step": 279499 + }, + { + "epoch": 0.015, + "grad_norm": 1.5451864004135132, + "learning_rate": 1.3923192104256888e-05, + "loss": 0.4283, + "step": 279500 + }, + { + "epoch": 0.015198, + "loss_gen": 6.10763692855835, + "loss_rtd": 0.20867428183555603, + "loss_sent": 0.31970512866973877, + "loss_sod": 0.05931424722075462, + "loss_total": 0.5876936912536621, + "step": 279599 + }, + { + "epoch": 0.015198, + "loss_gen": 6.079599857330322, + "loss_rtd": 0.2157566249370575, + "loss_sent": 0.24810534715652466, + "loss_sod": 0.15353038907051086, + "loss_total": 0.617392361164093, + "step": 279599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.3794596195220947, + "learning_rate": 1.3901228000573951e-05, + "loss": 0.429, + "step": 279600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.296116828918457, + "loss_rtd": 0.1936623603105545, + "loss_sent": 2.509882688173093e-05, + "loss_sod": 0.09577268362045288, + "loss_total": 0.28946012258529663, + "step": 279699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.075648307800293, + "loss_rtd": 0.16984108090400696, + "loss_sent": 2.540243076509796e-05, + "loss_sod": 0.16885748505592346, + "loss_total": 0.33872395753860474, + "step": 279699 + }, + { + "epoch": 0.0154, + "grad_norm": 1.2368967533111572, + "learning_rate": 1.3879278437472083e-05, + "loss": 0.4295, + "step": 279700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.920673370361328, + "loss_rtd": 0.2070206254720688, + "loss_sent": 0.1506125032901764, + "loss_sod": 0.09789955615997314, + "loss_total": 0.4555326998233795, + "step": 279799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.842594146728516, + "loss_rtd": 0.22099550068378448, + "loss_sent": 0.16854983568191528, + "loss_sod": 0.028708353638648987, + "loss_total": 0.41825369000434875, + "step": 279799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.3597524166107178, + "learning_rate": 1.3857343423792518e-05, + "loss": 0.404, + "step": 279800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.681039333343506, + "loss_rtd": 0.20897695422172546, + "loss_sent": 0.016555357724428177, + "loss_sod": 0.05064110457897186, + "loss_total": 0.2761734127998352, + "step": 279899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.172247409820557, + "loss_rtd": 0.16623836755752563, + "loss_sent": 0.000807274307589978, + "loss_sod": 0.1987113505601883, + "loss_total": 0.3657569885253906, + "step": 279899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.2343541383743286, + "learning_rate": 1.3835422968370698e-05, + "loss": 0.4351, + "step": 279900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.726408958435059, + "loss_rtd": 0.2137012481689453, + "loss_sent": 0.2577979564666748, + "loss_sod": 0.01765153743326664, + "loss_total": 0.4891507625579834, + "step": 279999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.963380813598633, + "loss_rtd": 0.23053455352783203, + "loss_sent": 0.3462984263896942, + "loss_sod": 0.11793604493141174, + "loss_total": 0.694769024848938, + "step": 279999 + }, + { + "epoch": 0.016, + "grad_norm": 0.9562790989875793, + "learning_rate": 1.3813517080036198e-05, + "loss": 0.4177, + "step": 280000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4070643186569214, + "eval_runtime": 151.3227, + "eval_samples_per_second": 102.053, + "eval_steps_per_second": 0.8, + "step": 280000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.96182918548584, + "loss_rtd": 0.2096618264913559, + "loss_sent": 0.10885757207870483, + "loss_sod": 0.038305506110191345, + "loss_total": 0.3568249046802521, + "step": 280099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.419727802276611, + "loss_rtd": 0.19863568246364594, + "loss_sent": 0.031493622809648514, + "loss_sod": 0.08233529329299927, + "loss_total": 0.3124646246433258, + "step": 280099 + }, + { + "epoch": 0.0162, + "grad_norm": 0.8464761972427368, + "learning_rate": 1.3791625767612682e-05, + "loss": 0.4365, + "step": 280100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.853739261627197, + "loss_rtd": 0.24195794761180878, + "loss_sent": 0.0944938138127327, + "loss_sod": 0.010652352124452591, + "loss_total": 0.34710410237312317, + "step": 280199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.659969806671143, + "loss_rtd": 0.21010221540927887, + "loss_sent": 0.24196834862232208, + "loss_sod": 0.007421444170176983, + "loss_total": 0.459492027759552, + "step": 280199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.7923970222473145, + "learning_rate": 1.3769749039917968e-05, + "loss": 0.4328, + "step": 280200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.695321083068848, + "loss_rtd": 0.2275717407464981, + "loss_sent": 0.2553408443927765, + "loss_sod": 0.06647907942533493, + "loss_total": 0.5493916273117065, + "step": 280299 + }, + { + "epoch": 0.016598, + "loss_gen": 6.023808002471924, + "loss_rtd": 0.22004935145378113, + "loss_sent": 0.12588292360305786, + "loss_sod": 0.03191075846552849, + "loss_total": 0.3778430223464966, + "step": 280299 + }, + { + "epoch": 0.0166, + "grad_norm": 0.882409930229187, + "learning_rate": 1.3747886905764012e-05, + "loss": 0.4379, + "step": 280300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.480722427368164, + "loss_rtd": 0.19871079921722412, + "loss_sent": 0.09207594394683838, + "loss_sod": 0.009698489680886269, + "loss_total": 0.3004852533340454, + "step": 280399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.713308811187744, + "loss_rtd": 0.22099217772483826, + "loss_sent": 0.2679978311061859, + "loss_sod": 0.006327535957098007, + "loss_total": 0.4953175485134125, + "step": 280399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.1832282543182373, + "learning_rate": 1.372603937395689e-05, + "loss": 0.4526, + "step": 280400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.249807834625244, + "loss_rtd": 0.1779339760541916, + "loss_sent": 0.006705451291054487, + "loss_sod": 0.17092213034629822, + "loss_total": 0.3555615544319153, + "step": 280499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.696319103240967, + "loss_rtd": 0.20037059485912323, + "loss_sent": 0.13759541511535645, + "loss_sod": 0.05141723155975342, + "loss_total": 0.3893832564353943, + "step": 280499 + }, + { + "epoch": 0.017, + "grad_norm": 1.077949047088623, + "learning_rate": 1.370420645329676e-05, + "loss": 0.4188, + "step": 280500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.335850238800049, + "loss_rtd": 0.18321716785430908, + "loss_sent": 0.0345289409160614, + "loss_sod": 0.04553137719631195, + "loss_total": 0.26327747106552124, + "step": 280599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.804455280303955, + "loss_rtd": 0.22036710381507874, + "loss_sent": 0.6789878010749817, + "loss_sod": 0.03309963271021843, + "loss_total": 0.932454526424408, + "step": 280599 + }, + { + "epoch": 0.0172, + "grad_norm": 1.4698665142059326, + "learning_rate": 1.3682388152577924e-05, + "loss": 0.4288, + "step": 280600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.792542934417725, + "loss_rtd": 0.2209930270910263, + "loss_sent": 0.2628214955329895, + "loss_sod": 0.145041823387146, + "loss_total": 0.628856360912323, + "step": 280699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.430458068847656, + "loss_rtd": 0.22566501796245575, + "loss_sent": 0.09811260551214218, + "loss_sod": 0.008618427440524101, + "loss_total": 0.3323960602283478, + "step": 280699 + }, + { + "epoch": 0.0174, + "grad_norm": 1.1568013429641724, + "learning_rate": 1.3660584480588795e-05, + "loss": 0.4272, + "step": 280700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.936997413635254, + "loss_rtd": 0.21900537610054016, + "loss_sent": 0.11840333044528961, + "loss_sod": 0.04137301817536354, + "loss_total": 0.3787817358970642, + "step": 280799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.852680206298828, + "loss_rtd": 0.19177745282649994, + "loss_sent": 0.10055841505527496, + "loss_sod": 0.09903798252344131, + "loss_total": 0.391373872756958, + "step": 280799 + }, + { + "epoch": 0.0176, + "grad_norm": 0.954412579536438, + "learning_rate": 1.3638795446111913e-05, + "loss": 0.4445, + "step": 280800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.223379135131836, + "loss_rtd": 0.18811871111392975, + "loss_sent": 2.899908577091992e-05, + "loss_sod": 0.05685259774327278, + "loss_total": 0.24500030279159546, + "step": 280899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.524837970733643, + "loss_rtd": 0.19736768305301666, + "loss_sent": 0.14851026237010956, + "loss_sod": 0.08419989049434662, + "loss_total": 0.43007785081863403, + "step": 280899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.0047430992126465, + "learning_rate": 1.3617021057923856e-05, + "loss": 0.4326, + "step": 280900 + }, + { + "epoch": 0.017998, + "loss_gen": 6.013608455657959, + "loss_rtd": 0.1934678703546524, + "loss_sent": 0.205018550157547, + "loss_sod": 0.03125808387994766, + "loss_total": 0.42974451184272766, + "step": 280999 + }, + { + "epoch": 0.017998, + "loss_gen": 6.036141395568848, + "loss_rtd": 0.22736336290836334, + "loss_sent": 0.20304203033447266, + "loss_sod": 0.0939517617225647, + "loss_total": 0.5243571400642395, + "step": 280999 + }, + { + "epoch": 0.018, + "grad_norm": 1.1123294830322266, + "learning_rate": 1.3595261324795366e-05, + "loss": 0.4394, + "step": 281000 + }, + { + "epoch": 0.018, + "eval_loss": 0.4104762673377991, + "eval_runtime": 151.2003, + "eval_samples_per_second": 102.136, + "eval_steps_per_second": 0.8, + "step": 281000 + }, + { + "epoch": 0.018198, + "loss_gen": 5.797642230987549, + "loss_rtd": 0.21336081624031067, + "loss_sent": 0.1785113662481308, + "loss_sod": 0.023090077564120293, + "loss_total": 0.4149622619152069, + "step": 281099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.4549336433410645, + "loss_rtd": 0.20158612728118896, + "loss_sent": 0.027963347733020782, + "loss_sod": 0.11027807742357254, + "loss_total": 0.3398275375366211, + "step": 281099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.2796478271484375, + "learning_rate": 1.3573516255491265e-05, + "loss": 0.4314, + "step": 281100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.885648250579834, + "loss_rtd": 0.2175597995519638, + "loss_sent": 0.4733242988586426, + "loss_sod": 0.014459663070738316, + "loss_total": 0.7053437829017639, + "step": 281199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.595763206481934, + "loss_rtd": 0.2076541930437088, + "loss_sent": 0.0002502041752450168, + "loss_sod": 0.15390491485595703, + "loss_total": 0.36180928349494934, + "step": 281199 + }, + { + "epoch": 0.0184, + "grad_norm": 1.9548704624176025, + "learning_rate": 1.3551785858770478e-05, + "loss": 0.4098, + "step": 281200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.988670349121094, + "loss_rtd": 0.21267130970954895, + "loss_sent": 0.3423349857330322, + "loss_sod": 0.018251899629831314, + "loss_total": 0.5732581615447998, + "step": 281299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.636219501495361, + "loss_rtd": 0.2078903615474701, + "loss_sent": 0.13064752519130707, + "loss_sod": 0.0047988081350922585, + "loss_total": 0.34333670139312744, + "step": 281299 + }, + { + "epoch": 0.0186, + "grad_norm": 0.9335140585899353, + "learning_rate": 1.3530070143385965e-05, + "loss": 0.43, + "step": 281300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.231248378753662, + "loss_rtd": 0.17432983219623566, + "loss_sent": 0.05204940587282181, + "loss_sod": 0.02497088722884655, + "loss_total": 0.25135013461112976, + "step": 281399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.542165279388428, + "loss_rtd": 0.2278299331665039, + "loss_sent": 0.13975077867507935, + "loss_sod": 0.02129683829843998, + "loss_total": 0.3888775706291199, + "step": 281399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.9037200808525085, + "learning_rate": 1.350836911808484e-05, + "loss": 0.4472, + "step": 281400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.556024551391602, + "loss_rtd": 0.20617425441741943, + "loss_sent": 0.04082641005516052, + "loss_sod": 0.06385300308465958, + "loss_total": 0.31085366010665894, + "step": 281499 + }, + { + "epoch": 0.018998, + "loss_gen": 6.012040615081787, + "loss_rtd": 0.21958351135253906, + "loss_sent": 0.09644078463315964, + "loss_sod": 0.11470630764961243, + "loss_total": 0.4307306110858917, + "step": 281499 + }, + { + "epoch": 0.019, + "grad_norm": 1.2389469146728516, + "learning_rate": 1.3486682791608285e-05, + "loss": 0.4261, + "step": 281500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.968993663787842, + "loss_rtd": 0.2420225739479065, + "loss_sent": 0.18818706274032593, + "loss_sod": 0.15060660243034363, + "loss_total": 0.5808162689208984, + "step": 281599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.804429531097412, + "loss_rtd": 0.2363513559103012, + "loss_sent": 0.6853828430175781, + "loss_sod": 0.01946105621755123, + "loss_total": 0.9411952495574951, + "step": 281599 + }, + { + "epoch": 0.0192, + "grad_norm": 2.3807199001312256, + "learning_rate": 1.3465011172691521e-05, + "loss": 0.4191, + "step": 281600 + }, + { + "epoch": 0.019398, + "loss_gen": 6.050154209136963, + "loss_rtd": 0.21635742485523224, + "loss_sent": 0.05861207842826843, + "loss_sod": 0.02883468195796013, + "loss_total": 0.3038041889667511, + "step": 281699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.7222676277160645, + "loss_rtd": 0.21847565472126007, + "loss_sent": 0.1909605860710144, + "loss_sod": 0.11452765762805939, + "loss_total": 0.5239639282226562, + "step": 281699 + }, + { + "epoch": 0.0194, + "grad_norm": 2.022014856338501, + "learning_rate": 1.344335427006389e-05, + "loss": 0.4236, + "step": 281700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.700893878936768, + "loss_rtd": 0.20058487355709076, + "loss_sent": 0.23017999529838562, + "loss_sod": 0.017526116222143173, + "loss_total": 0.44829100370407104, + "step": 281799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.10507345199585, + "loss_rtd": 0.16948948800563812, + "loss_sent": 2.6156780222663656e-05, + "loss_sod": 0.03725551813840866, + "loss_total": 0.2067711502313614, + "step": 281799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.7906410694122314, + "learning_rate": 1.3421712092448784e-05, + "loss": 0.42, + "step": 281800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.543769359588623, + "loss_rtd": 0.2028585523366928, + "loss_sent": 0.011168386787176132, + "loss_sod": 0.0761219710111618, + "loss_total": 0.29014891386032104, + "step": 281899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.394286155700684, + "loss_rtd": 0.17520064115524292, + "loss_sent": 6.80653247400187e-05, + "loss_sod": 0.19853290915489197, + "loss_total": 0.3738016188144684, + "step": 281899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.062142014503479, + "learning_rate": 1.3400084648563687e-05, + "loss": 0.423, + "step": 281900 + }, + { + "epoch": 0.019998, + "loss_gen": 6.219426155090332, + "loss_rtd": 0.21608854830265045, + "loss_sent": 0.15127773582935333, + "loss_sod": 0.039266083389520645, + "loss_total": 0.40663236379623413, + "step": 281999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.831309795379639, + "loss_rtd": 0.2534753382205963, + "loss_sent": 0.3156713843345642, + "loss_sod": 0.053194474428892136, + "loss_total": 0.6223411560058594, + "step": 281999 + }, + { + "epoch": 0.02, + "grad_norm": 1.422544002532959, + "learning_rate": 1.3378471947120108e-05, + "loss": 0.4254, + "step": 282000 + }, + { + "epoch": 0.02, + "eval_loss": 0.4120514988899231, + "eval_runtime": 153.1231, + "eval_samples_per_second": 100.853, + "eval_steps_per_second": 0.79, + "step": 282000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.8682756423950195, + "loss_rtd": 0.19730664789676666, + "loss_sent": 0.2298789918422699, + "loss_sod": 0.051396775990724564, + "loss_total": 0.4785824120044708, + "step": 282099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.661902904510498, + "loss_rtd": 0.22755858302116394, + "loss_sent": 0.5371243953704834, + "loss_sod": 0.0011561757419258356, + "loss_total": 0.765839159488678, + "step": 282099 + }, + { + "epoch": 0.0202, + "grad_norm": 1.3973290920257568, + "learning_rate": 1.3356873996823643e-05, + "loss": 0.4355, + "step": 282100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.888813018798828, + "loss_rtd": 0.20488464832305908, + "loss_sent": 0.2436603158712387, + "loss_sod": 0.023993542417883873, + "loss_total": 0.4725385010242462, + "step": 282199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.726215362548828, + "loss_rtd": 0.22751547396183014, + "loss_sent": 0.15657109022140503, + "loss_sod": 0.08309026807546616, + "loss_total": 0.4671768248081207, + "step": 282199 + }, + { + "epoch": 0.0204, + "grad_norm": 1.1480568647384644, + "learning_rate": 1.3335290806373951e-05, + "loss": 0.4332, + "step": 282200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.7480974197387695, + "loss_rtd": 0.22389169037342072, + "loss_sent": 0.22032272815704346, + "loss_sod": 0.024300508201122284, + "loss_total": 0.46851491928100586, + "step": 282299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.5183515548706055, + "loss_rtd": 0.19824880361557007, + "loss_sent": 0.13213348388671875, + "loss_sod": 0.021483005955815315, + "loss_total": 0.351865291595459, + "step": 282299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.1079515218734741, + "learning_rate": 1.3313722384464756e-05, + "loss": 0.436, + "step": 282300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.875498294830322, + "loss_rtd": 0.20670932531356812, + "loss_sent": 0.09952390938997269, + "loss_sod": 0.05448728799819946, + "loss_total": 0.36072051525115967, + "step": 282399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.664567470550537, + "loss_rtd": 0.18414762616157532, + "loss_sent": 0.04319232329726219, + "loss_sod": 0.030659429728984833, + "loss_total": 0.25799939036369324, + "step": 282399 + }, + { + "epoch": 0.0208, + "grad_norm": 0.8064749240875244, + "learning_rate": 1.3292168739783777e-05, + "loss": 0.4235, + "step": 282400 + }, + { + "epoch": 0.020998, + "loss_gen": 5.922492980957031, + "loss_rtd": 0.22151030600070953, + "loss_sent": 0.24791277945041656, + "loss_sod": 0.06692000478506088, + "loss_total": 0.5363430976867676, + "step": 282499 + }, + { + "epoch": 0.020998, + "loss_gen": 6.015212059020996, + "loss_rtd": 0.21163247525691986, + "loss_sent": 0.32908403873443604, + "loss_sod": 0.0717523992061615, + "loss_total": 0.6124688982963562, + "step": 282499 + }, + { + "epoch": 0.021, + "grad_norm": 1.5593205690383911, + "learning_rate": 1.3270629881012842e-05, + "loss": 0.4279, + "step": 282500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.767723083496094, + "loss_rtd": 0.22423988580703735, + "loss_sent": 0.12806497514247894, + "loss_sod": 0.05011466145515442, + "loss_total": 0.4024195075035095, + "step": 282599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.565870761871338, + "loss_rtd": 0.19891837239265442, + "loss_sent": 0.07426561415195465, + "loss_sod": 0.018944283947348595, + "loss_total": 0.2921282649040222, + "step": 282599 + }, + { + "epoch": 0.0212, + "grad_norm": 0.7791507840156555, + "learning_rate": 1.32491058168278e-05, + "loss": 0.4299, + "step": 282600 + }, + { + "epoch": 0.021398, + "loss_gen": 5.895869731903076, + "loss_rtd": 0.21902136504650116, + "loss_sent": 0.31859588623046875, + "loss_sod": 0.022601434960961342, + "loss_total": 0.5602186918258667, + "step": 282699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.891465187072754, + "loss_rtd": 0.22689184546470642, + "loss_sent": 0.12094026058912277, + "loss_sod": 0.09026947617530823, + "loss_total": 0.438101589679718, + "step": 282699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.9102500081062317, + "learning_rate": 1.3227596555898553e-05, + "loss": 0.4355, + "step": 282700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.7174973487854, + "loss_rtd": 0.18339064717292786, + "loss_sent": 0.15521188080310822, + "loss_sod": 0.12047228217124939, + "loss_total": 0.45907479524612427, + "step": 282799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.60105037689209, + "loss_rtd": 0.2015862911939621, + "loss_sent": 0.2438632994890213, + "loss_sod": 0.00624456163495779, + "loss_total": 0.451694130897522, + "step": 282799 + }, + { + "epoch": 0.0216, + "grad_norm": 1.2852686643600464, + "learning_rate": 1.3206102106889001e-05, + "loss": 0.4376, + "step": 282800 + }, + { + "epoch": 0.021798, + "loss_gen": 6.158794403076172, + "loss_rtd": 0.2291603535413742, + "loss_sent": 0.16960297524929047, + "loss_sod": 0.0897158682346344, + "loss_total": 0.4884791970252991, + "step": 282899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.54933500289917, + "loss_rtd": 0.19277606904506683, + "loss_sent": 0.00838877446949482, + "loss_sod": 0.15760350227355957, + "loss_total": 0.35876837372779846, + "step": 282899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.240827202796936, + "learning_rate": 1.318462247845712e-05, + "loss": 0.4283, + "step": 282900 + }, + { + "epoch": 0.021998, + "loss_gen": 5.909274578094482, + "loss_rtd": 0.23066742718219757, + "loss_sent": 0.340031236410141, + "loss_sod": 0.09343505650758743, + "loss_total": 0.6641337275505066, + "step": 282999 + }, + { + "epoch": 0.021998, + "loss_gen": 5.683666229248047, + "loss_rtd": 0.23398225009441376, + "loss_sent": 0.11925114691257477, + "loss_sod": 0.015400709584355354, + "loss_total": 0.36863410472869873, + "step": 282999 + }, + { + "epoch": 0.022, + "grad_norm": 1.2445647716522217, + "learning_rate": 1.3163157679254918e-05, + "loss": 0.4139, + "step": 283000 + }, + { + "epoch": 0.022, + "eval_loss": 0.40600138902664185, + "eval_runtime": 151.0534, + "eval_samples_per_second": 102.235, + "eval_steps_per_second": 0.801, + "step": 283000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.523969650268555, + "loss_rtd": 0.21689656376838684, + "loss_sent": 0.07073554396629333, + "loss_sod": 0.06892915815114975, + "loss_total": 0.3565612733364105, + "step": 283099 + }, + { + "epoch": 0.022198, + "loss_gen": 4.999656677246094, + "loss_rtd": 0.1698964238166809, + "loss_sent": 2.757607217063196e-05, + "loss_sod": 0.07586808502674103, + "loss_total": 0.24579209089279175, + "step": 283099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.0766270160675049, + "learning_rate": 1.3141707717928381e-05, + "loss": 0.4368, + "step": 283100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.874646186828613, + "loss_rtd": 0.20832401514053345, + "loss_sent": 0.3349180817604065, + "loss_sod": 0.15273280441761017, + "loss_total": 0.6959748864173889, + "step": 283199 + }, + { + "epoch": 0.022398, + "loss_gen": 5.822833061218262, + "loss_rtd": 0.21687422692775726, + "loss_sent": 0.06077207997441292, + "loss_sod": 0.007514716126024723, + "loss_total": 0.28516101837158203, + "step": 283199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.3074407577514648, + "learning_rate": 1.3120272603117573e-05, + "loss": 0.4209, + "step": 283200 + }, + { + "epoch": 0.022598, + "loss_gen": 5.798384666442871, + "loss_rtd": 0.231093168258667, + "loss_sent": 0.30197083950042725, + "loss_sod": 0.08319886028766632, + "loss_total": 0.6162628531455994, + "step": 283299 + }, + { + "epoch": 0.022598, + "loss_gen": 6.18732213973999, + "loss_rtd": 0.21987217664718628, + "loss_sent": 0.1981782764196396, + "loss_sod": 0.04336914047598839, + "loss_total": 0.46141958236694336, + "step": 283299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.1965978145599365, + "learning_rate": 1.3098852343456542e-05, + "loss": 0.4283, + "step": 283300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.856659412384033, + "loss_rtd": 0.22240784764289856, + "loss_sent": 0.026708999648690224, + "loss_sod": 0.11427552998065948, + "loss_total": 0.3633923828601837, + "step": 283399 + }, + { + "epoch": 0.022798, + "loss_gen": 5.88224458694458, + "loss_rtd": 0.20745408535003662, + "loss_sent": 0.43349555134773254, + "loss_sod": 0.012127671390771866, + "loss_total": 0.6530773043632507, + "step": 283399 + }, + { + "epoch": 0.0228, + "grad_norm": 1.0618562698364258, + "learning_rate": 1.3077446947573397e-05, + "loss": 0.4178, + "step": 283400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.896244049072266, + "loss_rtd": 0.2344936728477478, + "loss_sent": 0.11081881076097488, + "loss_sod": 0.07712383568286896, + "loss_total": 0.42243629693984985, + "step": 283499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.9980645179748535, + "loss_rtd": 0.21711502969264984, + "loss_sent": 0.2896641492843628, + "loss_sod": 0.011189509183168411, + "loss_total": 0.5179686546325684, + "step": 283499 + }, + { + "epoch": 0.023, + "grad_norm": 0.949335515499115, + "learning_rate": 1.3056056424090186e-05, + "loss": 0.4196, + "step": 283500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.704510688781738, + "loss_rtd": 0.2115384191274643, + "loss_sent": 0.0277202520519495, + "loss_sod": 0.009425907395780087, + "loss_total": 0.2486845850944519, + "step": 283599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.615524768829346, + "loss_rtd": 0.20658348500728607, + "loss_sent": 0.059819743037223816, + "loss_sod": 0.021676931530237198, + "loss_total": 0.2880801558494568, + "step": 283599 + }, + { + "epoch": 0.0232, + "grad_norm": 0.48402082920074463, + "learning_rate": 1.3034680781623026e-05, + "loss": 0.4319, + "step": 283600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.913975715637207, + "loss_rtd": 0.2053077667951584, + "loss_sent": 0.48148226737976074, + "loss_sod": 0.09669242799282074, + "loss_total": 0.7834824323654175, + "step": 283699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.48197603225708, + "loss_rtd": 0.21156112849712372, + "loss_sent": 0.41472411155700684, + "loss_sod": 0.013013198040425777, + "loss_total": 0.6392984390258789, + "step": 283699 + }, + { + "epoch": 0.0234, + "grad_norm": 1.6488102674484253, + "learning_rate": 1.3013320028782033e-05, + "loss": 0.4235, + "step": 283700 + }, + { + "epoch": 0.023598, + "loss_gen": 6.132323265075684, + "loss_rtd": 0.23241229355335236, + "loss_sent": 0.3086828887462616, + "loss_sod": 0.03744089603424072, + "loss_total": 0.5785360336303711, + "step": 283799 + }, + { + "epoch": 0.023598, + "loss_gen": 5.932138919830322, + "loss_rtd": 0.2254108488559723, + "loss_sent": 0.12026329338550568, + "loss_sod": 0.026655053719878197, + "loss_total": 0.3723291754722595, + "step": 283799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.9617313146591187, + "learning_rate": 1.2991974174171323e-05, + "loss": 0.4376, + "step": 283800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.448973655700684, + "loss_rtd": 0.1710488349199295, + "loss_sent": 0.021012771874666214, + "loss_sod": 0.0922928899526596, + "loss_total": 0.2843545079231262, + "step": 283899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.906702995300293, + "loss_rtd": 0.2369176596403122, + "loss_sent": 0.11955936253070831, + "loss_sod": 0.05116061121225357, + "loss_total": 0.40763765573501587, + "step": 283899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.7681860327720642, + "learning_rate": 1.2970643226388973e-05, + "loss": 0.4387, + "step": 283900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.948052883148193, + "loss_rtd": 0.2227478325366974, + "loss_sent": 0.17543916404247284, + "loss_sod": 0.05872737243771553, + "loss_total": 0.45691436529159546, + "step": 283999 + }, + { + "epoch": 0.023998, + "loss_gen": 6.008965969085693, + "loss_rtd": 0.2151934653520584, + "loss_sent": 0.13839246332645416, + "loss_sod": 0.03402182459831238, + "loss_total": 0.38760775327682495, + "step": 283999 + }, + { + "epoch": 0.024, + "grad_norm": 0.9981189370155334, + "learning_rate": 1.2949327194027105e-05, + "loss": 0.4168, + "step": 284000 + }, + { + "epoch": 0.024, + "eval_loss": 0.41382986307144165, + "eval_runtime": 151.2755, + "eval_samples_per_second": 102.085, + "eval_steps_per_second": 0.8, + "step": 284000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.6349101066589355, + "loss_rtd": 0.2067456692457199, + "loss_sent": 0.3376573920249939, + "loss_sod": 0.0167418010532856, + "loss_total": 0.5611448287963867, + "step": 284099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.6613640785217285, + "loss_rtd": 0.22606918215751648, + "loss_sent": 0.10882196575403214, + "loss_sod": 0.004473487846553326, + "loss_total": 0.3393646478652954, + "step": 284099 + }, + { + "epoch": 0.0242, + "grad_norm": 0.751107394695282, + "learning_rate": 1.2928026085671813e-05, + "loss": 0.4178, + "step": 284100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.774574279785156, + "loss_rtd": 0.20110966265201569, + "loss_sent": 0.18786457180976868, + "loss_sod": 0.09336121380329132, + "loss_total": 0.4823354482650757, + "step": 284199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.376277923583984, + "loss_rtd": 0.1898571401834488, + "loss_sent": 2.540801688155625e-05, + "loss_sod": 0.12208402156829834, + "loss_total": 0.31196656823158264, + "step": 284199 + }, + { + "epoch": 0.0244, + "grad_norm": 1.0591825246810913, + "learning_rate": 1.2906739909903193e-05, + "loss": 0.4433, + "step": 284200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.828518390655518, + "loss_rtd": 0.23971131443977356, + "loss_sent": 0.12309505045413971, + "loss_sod": 0.1512659788131714, + "loss_total": 0.5140723586082458, + "step": 284299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.744897365570068, + "loss_rtd": 0.22822363674640656, + "loss_sent": 0.08411303162574768, + "loss_sod": 0.033215202391147614, + "loss_total": 0.34555187821388245, + "step": 284299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.157599687576294, + "learning_rate": 1.2885468675295287e-05, + "loss": 0.4317, + "step": 284300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.711983680725098, + "loss_rtd": 0.21978799998760223, + "loss_sent": 0.45889976620674133, + "loss_sod": 0.12963168323040009, + "loss_total": 0.8083194494247437, + "step": 284399 + }, + { + "epoch": 0.024798, + "loss_gen": 5.7123517990112305, + "loss_rtd": 0.2189454883337021, + "loss_sent": 0.1288938671350479, + "loss_sod": 0.0427108071744442, + "loss_total": 0.3905501365661621, + "step": 284399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.4148083925247192, + "learning_rate": 1.2864212390416158e-05, + "loss": 0.4569, + "step": 284400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.427609920501709, + "loss_rtd": 0.18476331233978271, + "loss_sent": 0.24138493835926056, + "loss_sod": 0.016914937645196915, + "loss_total": 0.4430631995201111, + "step": 284499 + }, + { + "epoch": 0.024998, + "loss_gen": 6.197299003601074, + "loss_rtd": 0.21423634886741638, + "loss_sent": 0.07877682894468307, + "loss_sod": 0.05729764699935913, + "loss_total": 0.3503108322620392, + "step": 284499 + }, + { + "epoch": 0.025, + "grad_norm": 0.9697398543357849, + "learning_rate": 1.2842971063827857e-05, + "loss": 0.4331, + "step": 284500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.556057453155518, + "loss_rtd": 0.19708921015262604, + "loss_sent": 2.9624856324517168e-05, + "loss_sod": 0.08423587679862976, + "loss_total": 0.28135472536087036, + "step": 284599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.281551837921143, + "loss_rtd": 0.1667519062757492, + "loss_sent": 3.996157829533331e-05, + "loss_sod": 0.07685260474681854, + "loss_total": 0.24364447593688965, + "step": 284599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.7385295033454895, + "learning_rate": 1.2821744704086352e-05, + "loss": 0.4358, + "step": 284600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.991865634918213, + "loss_rtd": 0.21228674054145813, + "loss_sent": 0.26478254795074463, + "loss_sod": 0.030033813789486885, + "loss_total": 0.5071030855178833, + "step": 284699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.828866004943848, + "loss_rtd": 0.2195064127445221, + "loss_sent": 0.19662582874298096, + "loss_sod": 0.03633663430809975, + "loss_total": 0.4524688720703125, + "step": 284699 + }, + { + "epoch": 0.0254, + "grad_norm": 1.1441248655319214, + "learning_rate": 1.2800533319741631e-05, + "loss": 0.419, + "step": 284700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.646084785461426, + "loss_rtd": 0.24002711474895477, + "loss_sent": 0.3304119408130646, + "loss_sod": 0.015011422336101532, + "loss_total": 0.5854504704475403, + "step": 284799 + }, + { + "epoch": 0.025598, + "loss_gen": 5.964494705200195, + "loss_rtd": 0.20331549644470215, + "loss_sent": 0.3138743042945862, + "loss_sod": 0.1335727572441101, + "loss_total": 0.6507625579833984, + "step": 284799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.8249038457870483, + "learning_rate": 1.2779336919337643e-05, + "loss": 0.4332, + "step": 284800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.492527961730957, + "loss_rtd": 0.20996277034282684, + "loss_sent": 0.12192250788211823, + "loss_sod": 0.010068539530038834, + "loss_total": 0.3419538140296936, + "step": 284899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.226845741271973, + "loss_rtd": 0.1744282841682434, + "loss_sent": 0.0313674733042717, + "loss_sod": 0.033174075186252594, + "loss_total": 0.2389698326587677, + "step": 284899 + }, + { + "epoch": 0.0258, + "grad_norm": 0.7089688777923584, + "learning_rate": 1.2758155511412306e-05, + "loss": 0.4292, + "step": 284900 + }, + { + "epoch": 0.025998, + "loss_gen": 6.070541858673096, + "loss_rtd": 0.2007075399160385, + "loss_sent": 0.09279533475637436, + "loss_sod": 0.11625343561172485, + "loss_total": 0.4097563326358795, + "step": 284999 + }, + { + "epoch": 0.025998, + "loss_gen": 5.112553119659424, + "loss_rtd": 0.18466633558273315, + "loss_sent": 2.7075042453361675e-05, + "loss_sod": 0.08028829097747803, + "loss_total": 0.2649816870689392, + "step": 284999 + }, + { + "epoch": 0.026, + "grad_norm": 0.9484739303588867, + "learning_rate": 1.273698910449746e-05, + "loss": 0.4421, + "step": 285000 + }, + { + "epoch": 0.026, + "eval_loss": 0.41398826241493225, + "eval_runtime": 151.3999, + "eval_samples_per_second": 102.001, + "eval_steps_per_second": 0.799, + "step": 285000 + }, + { + "epoch": 0.026198, + "loss_gen": 6.07914924621582, + "loss_rtd": 0.21907606720924377, + "loss_sent": 0.08856616169214249, + "loss_sod": 0.037902090698480606, + "loss_total": 0.34554430842399597, + "step": 285099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.945745468139648, + "loss_rtd": 0.24509187042713165, + "loss_sent": 0.12188887596130371, + "loss_sod": 0.02607342228293419, + "loss_total": 0.39305415749549866, + "step": 285099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.6241201162338257, + "learning_rate": 1.271583770711895e-05, + "loss": 0.4237, + "step": 285100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.871770858764648, + "loss_rtd": 0.1924058347940445, + "loss_sent": 0.07700707763433456, + "loss_sod": 0.03474319353699684, + "loss_total": 0.304156094789505, + "step": 285199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.897307395935059, + "loss_rtd": 0.20265106856822968, + "loss_sent": 0.07986465096473694, + "loss_sod": 0.05640251934528351, + "loss_total": 0.3389182388782501, + "step": 285199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.8830915093421936, + "learning_rate": 1.2694701327796548e-05, + "loss": 0.4342, + "step": 285200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.877799034118652, + "loss_rtd": 0.2351219356060028, + "loss_sent": 0.3728555142879486, + "loss_sod": 0.017333796247839928, + "loss_total": 0.6253112554550171, + "step": 285299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.8381547927856445, + "loss_rtd": 0.21782098710536957, + "loss_sent": 0.2870821952819824, + "loss_sod": 0.07433715462684631, + "loss_total": 0.5792403221130371, + "step": 285299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.999060034751892, + "learning_rate": 1.267357997504401e-05, + "loss": 0.4218, + "step": 285300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.939278602600098, + "loss_rtd": 0.22044366598129272, + "loss_sent": 0.3877010941505432, + "loss_sod": 0.028766902163624763, + "loss_total": 0.6369116306304932, + "step": 285399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.718808174133301, + "loss_rtd": 0.20779336988925934, + "loss_sent": 0.22688840329647064, + "loss_sod": 0.0855991393327713, + "loss_total": 0.5202808976173401, + "step": 285399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.714625597000122, + "learning_rate": 1.2652473657368974e-05, + "loss": 0.4393, + "step": 285400 + }, + { + "epoch": 0.026998, + "loss_gen": 6.065155506134033, + "loss_rtd": 0.1971694380044937, + "loss_sent": 0.5773887038230896, + "loss_sod": 0.02404419332742691, + "loss_total": 0.7986023426055908, + "step": 285499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.852124214172363, + "loss_rtd": 0.21547146141529083, + "loss_sent": 0.13859428465366364, + "loss_sod": 0.05556311458349228, + "loss_total": 0.40962886810302734, + "step": 285499 + }, + { + "epoch": 0.027, + "grad_norm": 1.6814595460891724, + "learning_rate": 1.2631382383273088e-05, + "loss": 0.4486, + "step": 285500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.898143291473389, + "loss_rtd": 0.21077917516231537, + "loss_sent": 0.3432920277118683, + "loss_sod": 0.042259760200977325, + "loss_total": 0.596331000328064, + "step": 285599 + }, + { + "epoch": 0.027198, + "loss_gen": 6.2751288414001465, + "loss_rtd": 0.21900509297847748, + "loss_sent": 0.08064062893390656, + "loss_sod": 0.055173758417367935, + "loss_total": 0.35481947660446167, + "step": 285599 + }, + { + "epoch": 0.0272, + "grad_norm": 1.4933332204818726, + "learning_rate": 1.2610306161251905e-05, + "loss": 0.4231, + "step": 285600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.716874599456787, + "loss_rtd": 0.2126436084508896, + "loss_sent": 0.1443919539451599, + "loss_sod": 0.07392412424087524, + "loss_total": 0.43095970153808594, + "step": 285699 + }, + { + "epoch": 0.027398, + "loss_gen": 5.25924015045166, + "loss_rtd": 0.2138407975435257, + "loss_sent": 0.011563577689230442, + "loss_sod": 0.1020401269197464, + "loss_total": 0.32744449377059937, + "step": 285699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.8555403351783752, + "learning_rate": 1.2589244999794946e-05, + "loss": 0.4172, + "step": 285700 + }, + { + "epoch": 0.027598, + "loss_gen": 6.041465759277344, + "loss_rtd": 0.2162010371685028, + "loss_sent": 0.23427049815654755, + "loss_sod": 0.08856480568647385, + "loss_total": 0.5390363335609436, + "step": 285799 + }, + { + "epoch": 0.027598, + "loss_gen": 5.668476581573486, + "loss_rtd": 0.20259647071361542, + "loss_sent": 0.341512531042099, + "loss_sod": 0.00797621626406908, + "loss_total": 0.5520852208137512, + "step": 285799 + }, + { + "epoch": 0.0276, + "grad_norm": 1.9753674268722534, + "learning_rate": 1.2568198907385609e-05, + "loss": 0.4292, + "step": 285800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.720056533813477, + "loss_rtd": 0.2221757173538208, + "loss_sent": 0.281146377325058, + "loss_sod": 0.03888082504272461, + "loss_total": 0.5422029495239258, + "step": 285899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.756960868835449, + "loss_rtd": 0.2197091430425644, + "loss_sent": 0.3437296152114868, + "loss_sod": 0.013600092381238937, + "loss_total": 0.5770388841629028, + "step": 285899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.7216100692749023, + "learning_rate": 1.2547167892501277e-05, + "loss": 0.423, + "step": 285900 + }, + { + "epoch": 0.027998, + "loss_gen": 5.471036434173584, + "loss_rtd": 0.1858777403831482, + "loss_sent": 0.10051614046096802, + "loss_sod": 0.07392618805170059, + "loss_total": 0.3603200614452362, + "step": 285999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.082937717437744, + "loss_rtd": 0.16976216435432434, + "loss_sent": 0.026819400489330292, + "loss_sod": 0.054379597306251526, + "loss_total": 0.25096115469932556, + "step": 285999 + }, + { + "epoch": 0.028, + "grad_norm": 0.9310762286186218, + "learning_rate": 1.2526151963613242e-05, + "loss": 0.4371, + "step": 286000 + }, + { + "epoch": 0.028, + "eval_loss": 0.40129756927490234, + "eval_runtime": 151.672, + "eval_samples_per_second": 101.818, + "eval_steps_per_second": 0.798, + "step": 286000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.8664326667785645, + "loss_rtd": 0.20758090913295746, + "loss_sent": 0.2003534883260727, + "loss_sod": 0.07129999995231628, + "loss_total": 0.47923439741134644, + "step": 286099 + }, + { + "epoch": 0.028198, + "loss_gen": 5.703149318695068, + "loss_rtd": 0.2316005825996399, + "loss_sent": 0.1504419445991516, + "loss_sod": 0.05148168280720711, + "loss_total": 0.4335242211818695, + "step": 286099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.8415143489837646, + "learning_rate": 1.2505151129186727e-05, + "loss": 0.4379, + "step": 286100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.7814459800720215, + "loss_rtd": 0.18596313893795013, + "loss_sent": 0.218510240316391, + "loss_sod": 0.048116009682416916, + "loss_total": 0.45258939266204834, + "step": 286199 + }, + { + "epoch": 0.028398, + "loss_gen": 6.188547134399414, + "loss_rtd": 0.20028914511203766, + "loss_sent": 0.2166014164686203, + "loss_sod": 0.03200073167681694, + "loss_total": 0.448891282081604, + "step": 286199 + }, + { + "epoch": 0.0284, + "grad_norm": 1.1353873014450073, + "learning_rate": 1.2484165397680841e-05, + "loss": 0.4263, + "step": 286200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.512418270111084, + "loss_rtd": 0.22223679721355438, + "loss_sent": 0.15049083530902863, + "loss_sod": 0.009644124656915665, + "loss_total": 0.3823717534542084, + "step": 286299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.767345905303955, + "loss_rtd": 0.19095493853092194, + "loss_sent": 0.03672843053936958, + "loss_sod": 0.12461046129465103, + "loss_total": 0.35229384899139404, + "step": 286299 + }, + { + "epoch": 0.0286, + "grad_norm": 0.9279076457023621, + "learning_rate": 1.2463194777548642e-05, + "loss": 0.44, + "step": 286300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.638607025146484, + "loss_rtd": 0.21502289175987244, + "loss_sent": 0.09224697202444077, + "loss_sod": 0.036453887820243835, + "loss_total": 0.34372374415397644, + "step": 286399 + }, + { + "epoch": 0.028798, + "loss_gen": 6.230381965637207, + "loss_rtd": 0.21746058762073517, + "loss_sent": 0.10541404783725739, + "loss_sod": 0.031930990517139435, + "loss_total": 0.3548056483268738, + "step": 286399 + }, + { + "epoch": 0.0288, + "grad_norm": 0.8662870526313782, + "learning_rate": 1.2442239277237117e-05, + "loss": 0.4413, + "step": 286400 + }, + { + "epoch": 0.028998, + "loss_gen": 5.631784439086914, + "loss_rtd": 0.22725287079811096, + "loss_sent": 0.07502210140228271, + "loss_sod": 0.0033889992628246546, + "loss_total": 0.30566397309303284, + "step": 286499 + }, + { + "epoch": 0.028998, + "loss_gen": 6.191287517547607, + "loss_rtd": 0.22293871641159058, + "loss_sent": 0.1790841966867447, + "loss_sod": 0.06443575024604797, + "loss_total": 0.46645867824554443, + "step": 286499 + }, + { + "epoch": 0.029, + "grad_norm": 0.5956138968467712, + "learning_rate": 1.24212989051871e-05, + "loss": 0.402, + "step": 286500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.489704132080078, + "loss_rtd": 0.18943743407726288, + "loss_sent": 0.0016923850635066628, + "loss_sod": 0.13636058568954468, + "loss_total": 0.3274904191493988, + "step": 286599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.920446872711182, + "loss_rtd": 0.19623598456382751, + "loss_sent": 0.15697337687015533, + "loss_sod": 0.01677049696445465, + "loss_total": 0.3699798583984375, + "step": 286599 + }, + { + "epoch": 0.0292, + "grad_norm": 0.7704411745071411, + "learning_rate": 1.240037366983341e-05, + "loss": 0.4328, + "step": 286600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.745086193084717, + "loss_rtd": 0.22206702828407288, + "loss_sent": 0.08591610193252563, + "loss_sod": 0.012878085486590862, + "loss_total": 0.32086122035980225, + "step": 286699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.579181671142578, + "loss_rtd": 0.20150712132453918, + "loss_sent": 0.20237770676612854, + "loss_sod": 0.04131292179226875, + "loss_total": 0.4451977610588074, + "step": 286699 + }, + { + "epoch": 0.0294, + "grad_norm": 1.2384002208709717, + "learning_rate": 1.2379463579604689e-05, + "loss": 0.4294, + "step": 286700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.212658405303955, + "loss_rtd": 0.1719597429037094, + "loss_sent": 0.017256371676921844, + "loss_sod": 0.26873350143432617, + "loss_total": 0.4579496383666992, + "step": 286799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.894923686981201, + "loss_rtd": 0.22619961202144623, + "loss_sent": 0.18095217645168304, + "loss_sod": 0.09205351024866104, + "loss_total": 0.4992052912712097, + "step": 286799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.484777569770813, + "learning_rate": 1.2358568642923546e-05, + "loss": 0.4396, + "step": 286800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.847315788269043, + "loss_rtd": 0.2166406661272049, + "loss_sent": 0.16564525663852692, + "loss_sod": 0.011585107073187828, + "loss_total": 0.393871009349823, + "step": 286899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.832507133483887, + "loss_rtd": 0.21148845553398132, + "loss_sent": 0.21389459073543549, + "loss_sod": 0.05752115696668625, + "loss_total": 0.48290419578552246, + "step": 286899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.6614314317703247, + "learning_rate": 1.233768886820646e-05, + "loss": 0.4519, + "step": 286900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.466715335845947, + "loss_rtd": 0.18587522208690643, + "loss_sent": 0.2249709814786911, + "loss_sod": 0.05599728226661682, + "loss_total": 0.46684348583221436, + "step": 286999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.908249378204346, + "loss_rtd": 0.20992301404476166, + "loss_sent": 0.20955248177051544, + "loss_sod": 0.03031115047633648, + "loss_total": 0.4497866630554199, + "step": 286999 + }, + { + "epoch": 0.03, + "grad_norm": 1.157019853591919, + "learning_rate": 1.2316824263863785e-05, + "loss": 0.4248, + "step": 287000 + }, + { + "epoch": 0.03, + "eval_loss": 0.40862521529197693, + "eval_runtime": 151.418, + "eval_samples_per_second": 101.989, + "eval_steps_per_second": 0.799, + "step": 287000 + }, + { + "epoch": 0.030198, + "loss_gen": 5.93383264541626, + "loss_rtd": 0.218502476811409, + "loss_sent": 0.08899467438459396, + "loss_sod": 0.09092241525650024, + "loss_total": 0.3984195590019226, + "step": 287099 + }, + { + "epoch": 0.030198, + "loss_gen": 5.598023414611816, + "loss_rtd": 0.22513043880462646, + "loss_sent": 0.07191810756921768, + "loss_sod": 0.0750078409910202, + "loss_total": 0.37205639481544495, + "step": 287099 + }, + { + "epoch": 0.0302, + "grad_norm": 1.1030205488204956, + "learning_rate": 1.2295974838299785e-05, + "loss": 0.4316, + "step": 287100 + }, + { + "epoch": 0.030398, + "loss_gen": 5.7092180252075195, + "loss_rtd": 0.22398166358470917, + "loss_sent": 0.34127724170684814, + "loss_sod": 0.01627691090106964, + "loss_total": 0.581535816192627, + "step": 287199 + }, + { + "epoch": 0.030398, + "loss_gen": 5.87132453918457, + "loss_rtd": 0.20336180925369263, + "loss_sent": 0.1266353875398636, + "loss_sod": 0.03997209668159485, + "loss_total": 0.36996930837631226, + "step": 287199 + }, + { + "epoch": 0.0304, + "grad_norm": 1.5085489749908447, + "learning_rate": 1.2275140599912616e-05, + "loss": 0.4211, + "step": 287200 + }, + { + "epoch": 0.030598, + "loss_gen": 5.954097270965576, + "loss_rtd": 0.19193975627422333, + "loss_sent": 0.03704122081398964, + "loss_sod": 0.06875681132078171, + "loss_total": 0.2977377772331238, + "step": 287299 + }, + { + "epoch": 0.030598, + "loss_gen": 5.827497482299805, + "loss_rtd": 0.21817778050899506, + "loss_sent": 0.21135996282100677, + "loss_sod": 0.017506320029497147, + "loss_total": 0.4470440745353699, + "step": 287299 + }, + { + "epoch": 0.0306, + "grad_norm": 0.918419361114502, + "learning_rate": 1.2254321557094311e-05, + "loss": 0.4358, + "step": 287300 + }, + { + "epoch": 0.030798, + "loss_gen": 5.492374897003174, + "loss_rtd": 0.20044521987438202, + "loss_sent": 0.07455956190824509, + "loss_sod": 0.017204325646162033, + "loss_total": 0.29220911860466003, + "step": 287399 + }, + { + "epoch": 0.030798, + "loss_gen": 5.504309177398682, + "loss_rtd": 0.18586887419223785, + "loss_sent": 0.013198519125580788, + "loss_sod": 0.055731356143951416, + "loss_total": 0.2547987401485443, + "step": 287399 + }, + { + "epoch": 0.0308, + "grad_norm": 0.9885357022285461, + "learning_rate": 1.2233517718230758e-05, + "loss": 0.4177, + "step": 287400 + }, + { + "epoch": 0.030998, + "loss_gen": 5.919321060180664, + "loss_rtd": 0.21748416125774384, + "loss_sent": 0.10268405824899673, + "loss_sod": 0.018541250377893448, + "loss_total": 0.3387094736099243, + "step": 287499 + }, + { + "epoch": 0.030998, + "loss_gen": 6.114922523498535, + "loss_rtd": 0.22717665135860443, + "loss_sent": 0.17845787107944489, + "loss_sod": 0.011602483689785004, + "loss_total": 0.41723698377609253, + "step": 287499 + }, + { + "epoch": 0.031, + "grad_norm": 0.6173911690711975, + "learning_rate": 1.2212729091701752e-05, + "loss": 0.3988, + "step": 287500 + }, + { + "epoch": 0.031198, + "loss_gen": 5.648708343505859, + "loss_rtd": 0.17013563215732574, + "loss_sent": 0.12543965876102448, + "loss_sod": 0.09033001959323883, + "loss_total": 0.38590532541275024, + "step": 287599 + }, + { + "epoch": 0.031198, + "loss_gen": 5.628352165222168, + "loss_rtd": 0.21228154003620148, + "loss_sent": 0.10157328099012375, + "loss_sod": 0.06106088310480118, + "loss_total": 0.3749157190322876, + "step": 287599 + }, + { + "epoch": 0.0312, + "grad_norm": 1.354960322380066, + "learning_rate": 1.2191955685880963e-05, + "loss": 0.4403, + "step": 287600 + }, + { + "epoch": 0.031398, + "loss_gen": 5.752691268920898, + "loss_rtd": 0.20500053465366364, + "loss_sent": 0.2797483801841736, + "loss_sod": 0.0019102394580841064, + "loss_total": 0.4866591691970825, + "step": 287699 + }, + { + "epoch": 0.031398, + "loss_gen": 5.715508460998535, + "loss_rtd": 0.22658459842205048, + "loss_sent": 0.1587134152650833, + "loss_sod": 0.017449066042900085, + "loss_total": 0.40274709463119507, + "step": 287699 + }, + { + "epoch": 0.0314, + "grad_norm": 0.9837059378623962, + "learning_rate": 1.217119750913589e-05, + "loss": 0.4479, + "step": 287700 + }, + { + "epoch": 0.031598, + "loss_gen": 5.924016952514648, + "loss_rtd": 0.21890516579151154, + "loss_sent": 0.1304733008146286, + "loss_sod": 0.03267096355557442, + "loss_total": 0.38204944133758545, + "step": 287799 + }, + { + "epoch": 0.031598, + "loss_gen": 5.6897969245910645, + "loss_rtd": 0.21855436265468597, + "loss_sent": 0.1375998556613922, + "loss_sod": 0.02110433019697666, + "loss_total": 0.3772585391998291, + "step": 287799 + }, + { + "epoch": 0.0316, + "grad_norm": 1.2844737768173218, + "learning_rate": 1.2150454569827935e-05, + "loss": 0.4113, + "step": 287800 + }, + { + "epoch": 0.031798, + "loss_gen": 5.7530012130737305, + "loss_rtd": 0.22878804802894592, + "loss_sent": 0.4068334698677063, + "loss_sod": 0.023075032979249954, + "loss_total": 0.6586965322494507, + "step": 287899 + }, + { + "epoch": 0.031798, + "loss_gen": 5.890971660614014, + "loss_rtd": 0.22797581553459167, + "loss_sent": 0.24472099542617798, + "loss_sod": 0.05026178061962128, + "loss_total": 0.5229585766792297, + "step": 287899 + }, + { + "epoch": 0.0318, + "grad_norm": 2.550966739654541, + "learning_rate": 1.2129726876312347e-05, + "loss": 0.406, + "step": 287900 + }, + { + "epoch": 0.031998, + "loss_gen": 5.977214336395264, + "loss_rtd": 0.223429337143898, + "loss_sent": 0.10118972510099411, + "loss_sod": 0.022638369351625443, + "loss_total": 0.34725743532180786, + "step": 287999 + }, + { + "epoch": 0.031998, + "loss_gen": 5.822166919708252, + "loss_rtd": 0.22542782127857208, + "loss_sent": 0.20130446553230286, + "loss_sod": 0.03689045459032059, + "loss_total": 0.4636227488517761, + "step": 287999 + }, + { + "epoch": 0.032, + "grad_norm": 1.104121446609497, + "learning_rate": 1.2109014436938265e-05, + "loss": 0.4343, + "step": 288000 + }, + { + "epoch": 0.032, + "eval_loss": 0.4042772054672241, + "eval_runtime": 151.4653, + "eval_samples_per_second": 101.957, + "eval_steps_per_second": 0.799, + "step": 288000 + }, + { + "epoch": 0.000198, + "loss_gen": 6.583367347717285, + "loss_rtd": 0.21817566454410553, + "loss_sent": 0.13085076212882996, + "loss_sod": 0.12323612719774246, + "loss_total": 0.47226256132125854, + "step": 288099 + }, + { + "epoch": 0.000198, + "loss_gen": 6.112459182739258, + "loss_rtd": 0.20037896931171417, + "loss_sent": 0.36410635709762573, + "loss_sod": 0.06308729201555252, + "loss_total": 0.6275726556777954, + "step": 288099 + }, + { + "epoch": 0.0002, + "grad_norm": 1.806197166442871, + "learning_rate": 1.208831726004862e-05, + "loss": 0.4501, + "step": 288100 + }, + { + "epoch": 0.000398, + "loss_gen": 5.959825038909912, + "loss_rtd": 0.2052944004535675, + "loss_sent": 0.2453039139509201, + "loss_sod": 0.05934235453605652, + "loss_total": 0.5099406838417053, + "step": 288199 + }, + { + "epoch": 0.000398, + "loss_gen": 5.293071746826172, + "loss_rtd": 0.1985822468996048, + "loss_sent": 0.04508104547858238, + "loss_sod": 0.06112917512655258, + "loss_total": 0.30479246377944946, + "step": 288199 + }, + { + "epoch": 0.0004, + "grad_norm": 1.0070668458938599, + "learning_rate": 1.206763535398025e-05, + "loss": 0.4345, + "step": 288200 + }, + { + "epoch": 0.000598, + "loss_gen": 5.489973545074463, + "loss_rtd": 0.22991295158863068, + "loss_sent": 0.15148195624351501, + "loss_sod": 0.005384674295783043, + "loss_total": 0.3867795765399933, + "step": 288299 + }, + { + "epoch": 0.000598, + "loss_gen": 5.503274917602539, + "loss_rtd": 0.20294919610023499, + "loss_sent": 0.010002459399402142, + "loss_sod": 0.08979085087776184, + "loss_total": 0.30274251103401184, + "step": 288299 + }, + { + "epoch": 0.0006, + "grad_norm": 0.8200414180755615, + "learning_rate": 1.2046968727063823e-05, + "loss": 0.4195, + "step": 288300 + }, + { + "epoch": 0.000798, + "loss_gen": 6.101378917694092, + "loss_rtd": 0.2206754982471466, + "loss_sent": 0.16157306730747223, + "loss_sod": 0.1463429480791092, + "loss_total": 0.528591513633728, + "step": 288399 + }, + { + "epoch": 0.000798, + "loss_gen": 5.862943172454834, + "loss_rtd": 0.21129311621189117, + "loss_sent": 0.2719140648841858, + "loss_sod": 0.055987320840358734, + "loss_total": 0.5391944646835327, + "step": 288399 + }, + { + "epoch": 0.0008, + "grad_norm": 1.0483568906784058, + "learning_rate": 1.202631738762387e-05, + "loss": 0.4424, + "step": 288400 + }, + { + "epoch": 0.000998, + "loss_gen": 5.596846580505371, + "loss_rtd": 0.19718573987483978, + "loss_sent": 0.12381140142679214, + "loss_sod": 0.07885465025901794, + "loss_total": 0.3998517692089081, + "step": 288499 + }, + { + "epoch": 0.000998, + "loss_gen": 5.635765075683594, + "loss_rtd": 0.1893777996301651, + "loss_sent": 0.21595261991024017, + "loss_sod": 0.014623412862420082, + "loss_total": 0.4199538230895996, + "step": 288499 + }, + { + "epoch": 0.001, + "grad_norm": 1.088371753692627, + "learning_rate": 1.2005681343978713e-05, + "loss": 0.4237, + "step": 288500 + }, + { + "epoch": 0.001198, + "loss_gen": 6.086782455444336, + "loss_rtd": 0.22220873832702637, + "loss_sent": 0.19847138226032257, + "loss_sod": 0.13065265119075775, + "loss_total": 0.5513327717781067, + "step": 288599 + }, + { + "epoch": 0.001198, + "loss_gen": 6.182405471801758, + "loss_rtd": 0.22912736237049103, + "loss_sent": 0.26886099576950073, + "loss_sod": 0.03880622982978821, + "loss_total": 0.5367946028709412, + "step": 288599 + }, + { + "epoch": 0.0012, + "grad_norm": 0.9216969013214111, + "learning_rate": 1.1985060604440574e-05, + "loss": 0.4193, + "step": 288600 + }, + { + "epoch": 0.001398, + "loss_gen": 5.336289405822754, + "loss_rtd": 0.19004136323928833, + "loss_sent": 0.0002211226528743282, + "loss_sod": 0.06926990300416946, + "loss_total": 0.2595323920249939, + "step": 288699 + }, + { + "epoch": 0.001398, + "loss_gen": 5.84647274017334, + "loss_rtd": 0.21220256388187408, + "loss_sent": 0.07044178992509842, + "loss_sod": 0.03766937553882599, + "loss_total": 0.3203137218952179, + "step": 288699 + }, + { + "epoch": 0.0014, + "grad_norm": 0.7235668897628784, + "learning_rate": 1.196445517731547e-05, + "loss": 0.4107, + "step": 288700 + }, + { + "epoch": 0.001598, + "loss_gen": 5.91360330581665, + "loss_rtd": 0.2236490547657013, + "loss_sent": 0.12645292282104492, + "loss_sod": 0.11816971004009247, + "loss_total": 0.4682716727256775, + "step": 288799 + }, + { + "epoch": 0.001598, + "loss_gen": 5.722621917724609, + "loss_rtd": 0.21771691739559174, + "loss_sent": 0.09559899568557739, + "loss_sod": 0.05412505567073822, + "loss_total": 0.36744096875190735, + "step": 288799 + }, + { + "epoch": 0.0016, + "grad_norm": 1.1119811534881592, + "learning_rate": 1.1943865070903294e-05, + "loss": 0.4279, + "step": 288800 + }, + { + "epoch": 0.001798, + "loss_gen": 5.7283244132995605, + "loss_rtd": 0.21435987949371338, + "loss_sent": 0.17511047422885895, + "loss_sod": 0.030356254428625107, + "loss_total": 0.4198266267776489, + "step": 288899 + }, + { + "epoch": 0.001798, + "loss_gen": 5.526970863342285, + "loss_rtd": 0.19748038053512573, + "loss_sent": 0.14994210004806519, + "loss_sod": 0.009142027236521244, + "loss_total": 0.3565645217895508, + "step": 288899 + }, + { + "epoch": 0.0018, + "grad_norm": 0.6695646047592163, + "learning_rate": 1.1923290293497696e-05, + "loss": 0.4262, + "step": 288900 + }, + { + "epoch": 0.001998, + "loss_gen": 5.682656764984131, + "loss_rtd": 0.21027801930904388, + "loss_sent": 0.2659796178340912, + "loss_sod": 0.04973718151450157, + "loss_total": 0.5259947776794434, + "step": 288999 + }, + { + "epoch": 0.001998, + "loss_gen": 5.95244836807251, + "loss_rtd": 0.2203441858291626, + "loss_sent": 0.5933377146720886, + "loss_sod": 0.021773140877485275, + "loss_total": 0.835455060005188, + "step": 288999 + }, + { + "epoch": 0.002, + "grad_norm": 1.7816059589385986, + "learning_rate": 1.1902730853386219e-05, + "loss": 0.4357, + "step": 289000 + }, + { + "epoch": 0.002, + "eval_loss": 0.4045261740684509, + "eval_runtime": 154.5568, + "eval_samples_per_second": 99.918, + "eval_steps_per_second": 0.783, + "step": 289000 + }, + { + "epoch": 0.002198, + "loss_gen": 5.636537075042725, + "loss_rtd": 0.19163213670253754, + "loss_sent": 0.0680651143193245, + "loss_sod": 0.01924016699194908, + "loss_total": 0.278937429189682, + "step": 289099 + }, + { + "epoch": 0.002198, + "loss_gen": 5.6999993324279785, + "loss_rtd": 0.18549351394176483, + "loss_sent": 0.421495258808136, + "loss_sod": 0.05939367786049843, + "loss_total": 0.6663824319839478, + "step": 289099 + }, + { + "epoch": 0.0022, + "grad_norm": 1.0450456142425537, + "learning_rate": 1.1882186758850205e-05, + "loss": 0.419, + "step": 289100 + }, + { + "epoch": 0.002398, + "loss_gen": 5.386354923248291, + "loss_rtd": 0.16911444067955017, + "loss_sent": 0.0005919820396229625, + "loss_sod": 0.0733618512749672, + "loss_total": 0.24306826293468475, + "step": 289199 + }, + { + "epoch": 0.002398, + "loss_gen": 5.968642234802246, + "loss_rtd": 0.20339106023311615, + "loss_sent": 0.1694738119840622, + "loss_sod": 0.05585000291466713, + "loss_total": 0.4287148714065552, + "step": 289199 + }, + { + "epoch": 0.0024, + "grad_norm": 0.9566167593002319, + "learning_rate": 1.1861658018164802e-05, + "loss": 0.4182, + "step": 289200 + }, + { + "epoch": 0.002598, + "loss_gen": 6.1110100746154785, + "loss_rtd": 0.2216099351644516, + "loss_sent": 0.14492638409137726, + "loss_sod": 0.07394503057003021, + "loss_total": 0.44048136472702026, + "step": 289299 + }, + { + "epoch": 0.002598, + "loss_gen": 6.032350540161133, + "loss_rtd": 0.23270705342292786, + "loss_sent": 0.0479719340801239, + "loss_sod": 0.012125834822654724, + "loss_total": 0.2928048372268677, + "step": 289299 + }, + { + "epoch": 0.0026, + "grad_norm": 1.0500189065933228, + "learning_rate": 1.1841144639598977e-05, + "loss": 0.4158, + "step": 289300 + }, + { + "epoch": 0.002798, + "loss_gen": 5.233558654785156, + "loss_rtd": 0.1882786601781845, + "loss_sent": 0.008049456402659416, + "loss_sod": 0.16217921674251556, + "loss_total": 0.35850733518600464, + "step": 289399 + }, + { + "epoch": 0.002798, + "loss_gen": 5.24332332611084, + "loss_rtd": 0.17740464210510254, + "loss_sent": 0.016357656568288803, + "loss_sod": 0.13456860184669495, + "loss_total": 0.3283309042453766, + "step": 289399 + }, + { + "epoch": 0.0028, + "grad_norm": 1.1031545400619507, + "learning_rate": 1.1820646631415538e-05, + "loss": 0.4112, + "step": 289400 + }, + { + "epoch": 0.002998, + "loss_gen": 5.92902946472168, + "loss_rtd": 0.220808744430542, + "loss_sent": 0.12423915416002274, + "loss_sod": 0.07556038349866867, + "loss_total": 0.4206082820892334, + "step": 289499 + }, + { + "epoch": 0.002998, + "loss_gen": 6.194652080535889, + "loss_rtd": 0.21770694851875305, + "loss_sent": 0.6755025386810303, + "loss_sod": 0.060136828571558, + "loss_total": 0.953346312046051, + "step": 289499 + }, + { + "epoch": 0.003, + "grad_norm": 2.728039503097534, + "learning_rate": 1.1800164001871078e-05, + "loss": 0.4284, + "step": 289500 + }, + { + "epoch": 0.003198, + "loss_gen": 6.139127731323242, + "loss_rtd": 0.21810507774353027, + "loss_sent": 0.0803564265370369, + "loss_sod": 0.03863891586661339, + "loss_total": 0.33710041642189026, + "step": 289599 + }, + { + "epoch": 0.003198, + "loss_gen": 5.752684116363525, + "loss_rtd": 0.22759109735488892, + "loss_sent": 0.30329668521881104, + "loss_sod": 0.027985205873847008, + "loss_total": 0.5588729977607727, + "step": 289599 + }, + { + "epoch": 0.0032, + "grad_norm": 1.119472861289978, + "learning_rate": 1.177969675921598e-05, + "loss": 0.4457, + "step": 289600 + }, + { + "epoch": 0.003398, + "loss_gen": 5.589775562286377, + "loss_rtd": 0.22320495545864105, + "loss_sent": 0.23578467965126038, + "loss_sod": 0.007745911367237568, + "loss_total": 0.4667355418205261, + "step": 289699 + }, + { + "epoch": 0.003398, + "loss_gen": 5.557400703430176, + "loss_rtd": 0.2014605849981308, + "loss_sent": 0.140837162733078, + "loss_sod": 0.08160769939422607, + "loss_total": 0.42390546202659607, + "step": 289699 + }, + { + "epoch": 0.0034, + "grad_norm": 1.7268755435943604, + "learning_rate": 1.1759244911694451e-05, + "loss": 0.4394, + "step": 289700 + }, + { + "epoch": 0.003598, + "loss_gen": 5.619295120239258, + "loss_rtd": 0.17604520916938782, + "loss_sent": 0.039584919810295105, + "loss_sod": 0.09962044656276703, + "loss_total": 0.31525057554244995, + "step": 289799 + }, + { + "epoch": 0.003598, + "loss_gen": 5.715934753417969, + "loss_rtd": 0.2389906346797943, + "loss_sent": 0.11277256906032562, + "loss_sod": 0.030528396368026733, + "loss_total": 0.38229161500930786, + "step": 289799 + }, + { + "epoch": 0.0036, + "grad_norm": 1.0768576860427856, + "learning_rate": 1.1738808467544505e-05, + "loss": 0.4088, + "step": 289800 + }, + { + "epoch": 0.003798, + "loss_gen": 6.084167003631592, + "loss_rtd": 0.23010893166065216, + "loss_sent": 0.09768994152545929, + "loss_sod": 0.04006550461053848, + "loss_total": 0.36786437034606934, + "step": 289899 + }, + { + "epoch": 0.003798, + "loss_gen": 5.800467491149902, + "loss_rtd": 0.216391459107399, + "loss_sent": 0.2136368453502655, + "loss_sod": 0.07278364151716232, + "loss_total": 0.5028119087219238, + "step": 289899 + }, + { + "epoch": 0.0038, + "grad_norm": 0.9832937717437744, + "learning_rate": 1.171838743499794e-05, + "loss": 0.4067, + "step": 289900 + }, + { + "epoch": 0.003998, + "loss_gen": 5.556502819061279, + "loss_rtd": 0.21895749866962433, + "loss_sent": 0.06765895336866379, + "loss_sod": 0.038971636444330215, + "loss_total": 0.3255881071090698, + "step": 289999 + }, + { + "epoch": 0.003998, + "loss_gen": 5.713016986846924, + "loss_rtd": 0.21213513612747192, + "loss_sent": 0.14003337919712067, + "loss_sod": 0.018597260117530823, + "loss_total": 0.3707657754421234, + "step": 289999 + }, + { + "epoch": 0.004, + "grad_norm": 0.8992867469787598, + "learning_rate": 1.1697981822280329e-05, + "loss": 0.4336, + "step": 290000 + }, + { + "epoch": 0.004, + "eval_loss": 0.4080997407436371, + "eval_runtime": 151.4259, + "eval_samples_per_second": 101.984, + "eval_steps_per_second": 0.799, + "step": 290000 + }, + { + "epoch": 0.004198, + "loss_gen": 5.927196979522705, + "loss_rtd": 0.21384547650814056, + "loss_sent": 0.28930529952049255, + "loss_sod": 0.053418584167957306, + "loss_total": 0.5565693378448486, + "step": 290099 + }, + { + "epoch": 0.004198, + "loss_gen": 6.024545669555664, + "loss_rtd": 0.2033890038728714, + "loss_sent": 0.049007099121809006, + "loss_sod": 0.024014577269554138, + "loss_total": 0.27641066908836365, + "step": 290099 + }, + { + "epoch": 0.0042, + "grad_norm": 0.8494968414306641, + "learning_rate": 1.1677591637611058e-05, + "loss": 0.4102, + "step": 290100 + }, + { + "epoch": 0.004398, + "loss_gen": 5.861697673797607, + "loss_rtd": 0.1946389377117157, + "loss_sent": 0.2799685597419739, + "loss_sod": 0.01940545253455639, + "loss_total": 0.4940129518508911, + "step": 290199 + }, + { + "epoch": 0.004398, + "loss_gen": 6.297935962677002, + "loss_rtd": 0.22417907416820526, + "loss_sent": 0.13598723709583282, + "loss_sod": 0.07087448984384537, + "loss_total": 0.43104082345962524, + "step": 290199 + }, + { + "epoch": 0.0044, + "grad_norm": 0.7087154388427734, + "learning_rate": 1.1657216889203294e-05, + "loss": 0.4239, + "step": 290200 + }, + { + "epoch": 0.004598, + "loss_gen": 5.808155059814453, + "loss_rtd": 0.21386592090129852, + "loss_sent": 0.13774049282073975, + "loss_sod": 0.07614940404891968, + "loss_total": 0.42775583267211914, + "step": 290299 + }, + { + "epoch": 0.004598, + "loss_gen": 5.876097679138184, + "loss_rtd": 0.21150068938732147, + "loss_sent": 0.08654508739709854, + "loss_sod": 0.030276494100689888, + "loss_total": 0.32832226157188416, + "step": 290299 + }, + { + "epoch": 0.0046, + "grad_norm": 0.7103103995323181, + "learning_rate": 1.1636857585263994e-05, + "loss": 0.4103, + "step": 290300 + }, + { + "epoch": 0.004798, + "loss_gen": 5.536046028137207, + "loss_rtd": 0.18324218690395355, + "loss_sent": 0.18905538320541382, + "loss_sod": 0.018242817372083664, + "loss_total": 0.39054039120674133, + "step": 290399 + }, + { + "epoch": 0.004798, + "loss_gen": 5.71199369430542, + "loss_rtd": 0.23030778765678406, + "loss_sent": 0.19299103319644928, + "loss_sod": 0.15578626096248627, + "loss_total": 0.5790850520133972, + "step": 290399 + }, + { + "epoch": 0.0048, + "grad_norm": 0.9059568047523499, + "learning_rate": 1.1616513733993856e-05, + "loss": 0.4336, + "step": 290400 + }, + { + "epoch": 0.004998, + "loss_gen": 5.776209831237793, + "loss_rtd": 0.20986218750476837, + "loss_sent": 0.1940474957227707, + "loss_sod": 0.03195120394229889, + "loss_total": 0.43586087226867676, + "step": 290499 + }, + { + "epoch": 0.004998, + "loss_gen": 6.1553473472595215, + "loss_rtd": 0.22172221541404724, + "loss_sent": 0.0935099795460701, + "loss_sod": 0.019701147451996803, + "loss_total": 0.334933340549469, + "step": 290499 + }, + { + "epoch": 0.005, + "grad_norm": 1.9684752225875854, + "learning_rate": 1.1596185343587395e-05, + "loss": 0.4315, + "step": 290500 + }, + { + "epoch": 0.005198, + "loss_gen": 5.282568454742432, + "loss_rtd": 0.17847198247909546, + "loss_sent": 0.01683359593153, + "loss_sod": 0.05404090881347656, + "loss_total": 0.24934649467468262, + "step": 290599 + }, + { + "epoch": 0.005198, + "loss_gen": 5.774869918823242, + "loss_rtd": 0.21058449149131775, + "loss_sent": 0.2569732367992401, + "loss_sod": 0.05100230127573013, + "loss_total": 0.5185600519180298, + "step": 290599 + }, + { + "epoch": 0.0052, + "grad_norm": 0.8482804298400879, + "learning_rate": 1.1575872422232892e-05, + "loss": 0.4345, + "step": 290600 + }, + { + "epoch": 0.005398, + "loss_gen": 5.804244518280029, + "loss_rtd": 0.19818954169750214, + "loss_sent": 0.0005043614073656499, + "loss_sod": 0.1376422792673111, + "loss_total": 0.3363361954689026, + "step": 290699 + }, + { + "epoch": 0.005398, + "loss_gen": 5.581037998199463, + "loss_rtd": 0.16705118119716644, + "loss_sent": 6.964366912143305e-05, + "loss_sod": 0.10265404731035233, + "loss_total": 0.2697748839855194, + "step": 290699 + }, + { + "epoch": 0.0054, + "grad_norm": 1.1441339254379272, + "learning_rate": 1.1555574978112387e-05, + "loss": 0.4184, + "step": 290700 + }, + { + "epoch": 0.005598, + "loss_gen": 6.0515217781066895, + "loss_rtd": 0.23304536938667297, + "loss_sent": 0.2999255955219269, + "loss_sod": 0.002595345489680767, + "loss_total": 0.5355663299560547, + "step": 290799 + }, + { + "epoch": 0.005598, + "loss_gen": 5.992300987243652, + "loss_rtd": 0.23110775649547577, + "loss_sent": 0.11044023931026459, + "loss_sod": 0.1251615285873413, + "loss_total": 0.46670955419540405, + "step": 290799 + }, + { + "epoch": 0.0056, + "grad_norm": 1.3418282270431519, + "learning_rate": 1.1535293019401678e-05, + "loss": 0.431, + "step": 290800 + }, + { + "epoch": 0.005798, + "loss_gen": 6.040250301361084, + "loss_rtd": 0.19023177027702332, + "loss_sent": 0.36948129534721375, + "loss_sod": 0.01797431707382202, + "loss_total": 0.5776873826980591, + "step": 290899 + }, + { + "epoch": 0.005798, + "loss_gen": 5.865544319152832, + "loss_rtd": 0.2297186553478241, + "loss_sent": 0.19893589615821838, + "loss_sod": 0.0579451285302639, + "loss_total": 0.4865996837615967, + "step": 290899 + }, + { + "epoch": 0.0058, + "grad_norm": 1.5304007530212402, + "learning_rate": 1.1515026554270336e-05, + "loss": 0.4336, + "step": 290900 + }, + { + "epoch": 0.005998, + "loss_gen": 5.982074737548828, + "loss_rtd": 0.20522068440914154, + "loss_sent": 0.06905671209096909, + "loss_sod": 0.1271514892578125, + "loss_total": 0.4014289081096649, + "step": 290999 + }, + { + "epoch": 0.005998, + "loss_gen": 5.474308013916016, + "loss_rtd": 0.19547529518604279, + "loss_sent": 3.6256878956919536e-05, + "loss_sod": 0.05424252897500992, + "loss_total": 0.24975408613681793, + "step": 290999 + }, + { + "epoch": 0.006, + "grad_norm": 0.7216436862945557, + "learning_rate": 1.1494775590881707e-05, + "loss": 0.439, + "step": 291000 + }, + { + "epoch": 0.006, + "eval_loss": 0.41002848744392395, + "eval_runtime": 152.8536, + "eval_samples_per_second": 101.031, + "eval_steps_per_second": 0.792, + "step": 291000 + }, + { + "epoch": 0.006198, + "loss_gen": 5.9718499183654785, + "loss_rtd": 0.22338102757930756, + "loss_sent": 0.41834065318107605, + "loss_sod": 0.014543645083904266, + "loss_total": 0.6562653183937073, + "step": 291099 + }, + { + "epoch": 0.006198, + "loss_gen": 5.756255149841309, + "loss_rtd": 0.2223668396472931, + "loss_sent": 0.22848108410835266, + "loss_sod": 0.018167486414313316, + "loss_total": 0.4690154194831848, + "step": 291099 + }, + { + "epoch": 0.0062, + "grad_norm": 1.14185631275177, + "learning_rate": 1.1474540137392853e-05, + "loss": 0.4335, + "step": 291100 + }, + { + "epoch": 0.006398, + "loss_gen": 6.031336784362793, + "loss_rtd": 0.20401334762573242, + "loss_sent": 0.31035447120666504, + "loss_sod": 0.011721178889274597, + "loss_total": 0.5260890126228333, + "step": 291199 + }, + { + "epoch": 0.006398, + "loss_gen": 5.556453227996826, + "loss_rtd": 0.19337640702724457, + "loss_sent": 0.06550833582878113, + "loss_sod": 0.022421490401029587, + "loss_total": 0.2813062369823456, + "step": 291199 + }, + { + "epoch": 0.0064, + "grad_norm": 1.1999459266662598, + "learning_rate": 1.1454320201954626e-05, + "loss": 0.4427, + "step": 291200 + }, + { + "epoch": 0.006598, + "loss_gen": 5.517667770385742, + "loss_rtd": 0.2339557409286499, + "loss_sent": 0.07689621299505234, + "loss_sod": 0.017893413081765175, + "loss_total": 0.32874536514282227, + "step": 291299 + }, + { + "epoch": 0.006598, + "loss_gen": 5.999050140380859, + "loss_rtd": 0.23434123396873474, + "loss_sent": 0.09787822514772415, + "loss_sod": 0.0470002181828022, + "loss_total": 0.3792196810245514, + "step": 291299 + }, + { + "epoch": 0.0066, + "grad_norm": 0.653445839881897, + "learning_rate": 1.1434115792711614e-05, + "loss": 0.415, + "step": 291300 + }, + { + "epoch": 0.006798, + "loss_gen": 5.648245334625244, + "loss_rtd": 0.1952313631772995, + "loss_sent": 3.299563832115382e-05, + "loss_sod": 0.12789836525917053, + "loss_total": 0.323162704706192, + "step": 291399 + }, + { + "epoch": 0.006798, + "loss_gen": 5.682374477386475, + "loss_rtd": 0.1928941160440445, + "loss_sent": 0.0007378265727311373, + "loss_sod": 0.09649474918842316, + "loss_total": 0.2901266813278198, + "step": 291399 + }, + { + "epoch": 0.0068, + "grad_norm": 0.8920658826828003, + "learning_rate": 1.1413926917802159e-05, + "loss": 0.4209, + "step": 291400 + }, + { + "epoch": 0.006998, + "loss_gen": 5.852756977081299, + "loss_rtd": 0.212942436337471, + "loss_sent": 0.18852655589580536, + "loss_sod": 0.1215762048959732, + "loss_total": 0.5230451822280884, + "step": 291499 + }, + { + "epoch": 0.006998, + "loss_gen": 5.678630352020264, + "loss_rtd": 0.21963368356227875, + "loss_sent": 0.1962205171585083, + "loss_sod": 0.013026234693825245, + "loss_total": 0.4288804531097412, + "step": 291499 + }, + { + "epoch": 0.007, + "grad_norm": 1.5081746578216553, + "learning_rate": 1.139375358535832e-05, + "loss": 0.4392, + "step": 291500 + }, + { + "epoch": 0.007198, + "loss_gen": 5.75437593460083, + "loss_rtd": 0.2078815996646881, + "loss_sent": 0.3053898513317108, + "loss_sod": 0.08104534447193146, + "loss_total": 0.594316840171814, + "step": 291599 + }, + { + "epoch": 0.007198, + "loss_gen": 5.293683052062988, + "loss_rtd": 0.19586960971355438, + "loss_sent": 3.034544897673186e-05, + "loss_sod": 0.07798950374126434, + "loss_total": 0.2738894522190094, + "step": 291599 + }, + { + "epoch": 0.0072, + "grad_norm": 1.0767451524734497, + "learning_rate": 1.137359580350591e-05, + "loss": 0.4231, + "step": 291600 + }, + { + "epoch": 0.007398, + "loss_gen": 5.748904705047607, + "loss_rtd": 0.2279605269432068, + "loss_sent": 0.13035079836845398, + "loss_sod": 0.05731924623250961, + "loss_total": 0.415630578994751, + "step": 291699 + }, + { + "epoch": 0.007398, + "loss_gen": 5.693149089813232, + "loss_rtd": 0.17812733352184296, + "loss_sent": 0.09372135996818542, + "loss_sod": 0.1162639930844307, + "loss_total": 0.3881126940250397, + "step": 291699 + }, + { + "epoch": 0.0074, + "grad_norm": 1.0027326345443726, + "learning_rate": 1.1353453580364497e-05, + "loss": 0.4276, + "step": 291700 + }, + { + "epoch": 0.007598, + "loss_gen": 5.866568088531494, + "loss_rtd": 0.22758306562900543, + "loss_sent": 0.19470615684986115, + "loss_sod": 0.025758441537618637, + "loss_total": 0.4480476379394531, + "step": 291799 + }, + { + "epoch": 0.007598, + "loss_gen": 5.84188175201416, + "loss_rtd": 0.21974079310894012, + "loss_sent": 0.13410304486751556, + "loss_sod": 0.05712611600756645, + "loss_total": 0.41096997261047363, + "step": 291799 + }, + { + "epoch": 0.0076, + "grad_norm": 0.9332654476165771, + "learning_rate": 1.1333326924047371e-05, + "loss": 0.4407, + "step": 291800 + }, + { + "epoch": 0.007798, + "loss_gen": 5.9282097816467285, + "loss_rtd": 0.2190975695848465, + "loss_sent": 0.23697197437286377, + "loss_sod": 0.008148223161697388, + "loss_total": 0.46421778202056885, + "step": 291899 + }, + { + "epoch": 0.007798, + "loss_gen": 6.181413173675537, + "loss_rtd": 0.21828410029411316, + "loss_sent": 0.27279528975486755, + "loss_sod": 0.02741367369890213, + "loss_total": 0.5184930562973022, + "step": 291899 + }, + { + "epoch": 0.0078, + "grad_norm": 1.0647717714309692, + "learning_rate": 1.1313215842661523e-05, + "loss": 0.4405, + "step": 291900 + }, + { + "epoch": 0.007998, + "loss_gen": 6.085374355316162, + "loss_rtd": 0.22489675879478455, + "loss_sent": 0.20845045149326324, + "loss_sod": 0.06859986484050751, + "loss_total": 0.5019470453262329, + "step": 291999 + }, + { + "epoch": 0.007998, + "loss_gen": 6.197389125823975, + "loss_rtd": 0.21831458806991577, + "loss_sent": 0.21181783080101013, + "loss_sod": 0.015346596948802471, + "loss_total": 0.44547903537750244, + "step": 291999 + }, + { + "epoch": 0.008, + "grad_norm": 0.9735605120658875, + "learning_rate": 1.1293120344307712e-05, + "loss": 0.3987, + "step": 292000 + }, + { + "epoch": 0.008, + "eval_loss": 0.4108790457248688, + "eval_runtime": 151.4209, + "eval_samples_per_second": 101.987, + "eval_steps_per_second": 0.799, + "step": 292000 + }, + { + "epoch": 0.008198, + "loss_gen": 5.955929279327393, + "loss_rtd": 0.23072001338005066, + "loss_sent": 0.2771455943584442, + "loss_sod": 0.020346995443105698, + "loss_total": 0.5282126069068909, + "step": 292099 + }, + { + "epoch": 0.008198, + "loss_gen": 5.931318283081055, + "loss_rtd": 0.22061000764369965, + "loss_sent": 0.3473089635372162, + "loss_sod": 0.04022546112537384, + "loss_total": 0.6081444025039673, + "step": 292099 + }, + { + "epoch": 0.0082, + "grad_norm": 1.074404001235962, + "learning_rate": 1.127304043708039e-05, + "loss": 0.4526, + "step": 292100 + }, + { + "epoch": 0.008398, + "loss_gen": 5.771393299102783, + "loss_rtd": 0.19248847663402557, + "loss_sent": 0.2669306695461273, + "loss_sod": 0.03017154335975647, + "loss_total": 0.48959070444107056, + "step": 292199 + }, + { + "epoch": 0.008398, + "loss_gen": 5.533403396606445, + "loss_rtd": 0.1992393136024475, + "loss_sent": 0.00835365243256092, + "loss_sod": 0.06661911308765411, + "loss_total": 0.2742120623588562, + "step": 292199 + }, + { + "epoch": 0.0084, + "grad_norm": 1.0609321594238281, + "learning_rate": 1.1252976129067767e-05, + "loss": 0.4196, + "step": 292200 + }, + { + "epoch": 0.008598, + "loss_gen": 5.930607318878174, + "loss_rtd": 0.23103652894496918, + "loss_sent": 0.1808062195777893, + "loss_sod": 0.05816710740327835, + "loss_total": 0.47000986337661743, + "step": 292299 + }, + { + "epoch": 0.008598, + "loss_gen": 5.779129981994629, + "loss_rtd": 0.2152262181043625, + "loss_sent": 0.15281803905963898, + "loss_sod": 0.035360969603061676, + "loss_total": 0.40340524911880493, + "step": 292299 + }, + { + "epoch": 0.0086, + "grad_norm": 0.7079684734344482, + "learning_rate": 1.1232927428351713e-05, + "loss": 0.4077, + "step": 292300 + }, + { + "epoch": 0.008798, + "loss_gen": 5.861176013946533, + "loss_rtd": 0.23282544314861298, + "loss_sent": 0.3248097598552704, + "loss_sod": 0.04428011178970337, + "loss_total": 0.6019153594970703, + "step": 292399 + }, + { + "epoch": 0.008798, + "loss_gen": 5.710519790649414, + "loss_rtd": 0.21236442029476166, + "loss_sent": 0.3869825005531311, + "loss_sod": 0.015781860798597336, + "loss_total": 0.615128755569458, + "step": 292399 + }, + { + "epoch": 0.0088, + "grad_norm": 1.3579967021942139, + "learning_rate": 1.1212894343007851e-05, + "loss": 0.4411, + "step": 292400 + }, + { + "epoch": 0.008998, + "loss_gen": 5.598791122436523, + "loss_rtd": 0.2247728407382965, + "loss_sent": 0.10374794155359268, + "loss_sod": 0.006241069175302982, + "loss_total": 0.3347618579864502, + "step": 292499 + }, + { + "epoch": 0.008998, + "loss_gen": 5.96766996383667, + "loss_rtd": 0.2124291956424713, + "loss_sent": 0.04815928265452385, + "loss_sod": 0.011035613715648651, + "loss_total": 0.2716240882873535, + "step": 292499 + }, + { + "epoch": 0.009, + "grad_norm": 0.6185656785964966, + "learning_rate": 1.1192876881105524e-05, + "loss": 0.435, + "step": 292500 + }, + { + "epoch": 0.009198, + "loss_gen": 6.012024402618408, + "loss_rtd": 0.2138938456773758, + "loss_sent": 0.12079918384552002, + "loss_sod": 0.04975533485412598, + "loss_total": 0.3844483494758606, + "step": 292599 + }, + { + "epoch": 0.009198, + "loss_gen": 5.75975227355957, + "loss_rtd": 0.208217591047287, + "loss_sent": 0.13656321167945862, + "loss_sod": 0.05625367909669876, + "loss_total": 0.40103447437286377, + "step": 292599 + }, + { + "epoch": 0.0092, + "grad_norm": 1.2162506580352783, + "learning_rate": 1.1172875050707737e-05, + "loss": 0.4321, + "step": 292600 + }, + { + "epoch": 0.009398, + "loss_gen": 5.854612350463867, + "loss_rtd": 0.22599859535694122, + "loss_sent": 0.21165831387043, + "loss_sod": 0.03499480336904526, + "loss_total": 0.47265172004699707, + "step": 292699 + }, + { + "epoch": 0.009398, + "loss_gen": 5.9117584228515625, + "loss_rtd": 0.2201584130525589, + "loss_sent": 0.27716943621635437, + "loss_sod": 0.010912577621638775, + "loss_total": 0.5082404613494873, + "step": 292699 + }, + { + "epoch": 0.0094, + "grad_norm": 1.9428503513336182, + "learning_rate": 1.115288885987123e-05, + "loss": 0.4168, + "step": 292700 + }, + { + "epoch": 0.009598, + "loss_gen": 5.924159049987793, + "loss_rtd": 0.2209009975194931, + "loss_sent": 0.15670207142829895, + "loss_sod": 0.012711119838058949, + "loss_total": 0.3903141915798187, + "step": 292799 + }, + { + "epoch": 0.009598, + "loss_gen": 5.813932418823242, + "loss_rtd": 0.2028120458126068, + "loss_sent": 0.12386512756347656, + "loss_sod": 0.030841028317809105, + "loss_total": 0.35751819610595703, + "step": 292799 + }, + { + "epoch": 0.0096, + "grad_norm": 0.7272285223007202, + "learning_rate": 1.1132918316646451e-05, + "loss": 0.4278, + "step": 292800 + }, + { + "epoch": 0.009798, + "loss_gen": 5.8489789962768555, + "loss_rtd": 0.2129559963941574, + "loss_sent": 0.3505731225013733, + "loss_sod": 0.10345625877380371, + "loss_total": 0.6669853925704956, + "step": 292899 + }, + { + "epoch": 0.009798, + "loss_gen": 5.843230724334717, + "loss_rtd": 0.19831852614879608, + "loss_sent": 0.372302770614624, + "loss_sod": 0.0443035289645195, + "loss_total": 0.6149247884750366, + "step": 292899 + }, + { + "epoch": 0.0098, + "grad_norm": 1.7858167886734009, + "learning_rate": 1.1112963429077539e-05, + "loss": 0.4316, + "step": 292900 + }, + { + "epoch": 0.009998, + "loss_gen": 5.7075605392456055, + "loss_rtd": 0.18111105263233185, + "loss_sent": 0.009544480592012405, + "loss_sod": 0.06162203848361969, + "loss_total": 0.25227758288383484, + "step": 292999 + }, + { + "epoch": 0.009998, + "loss_gen": 5.608867645263672, + "loss_rtd": 0.1733575314283371, + "loss_sent": 0.06498207896947861, + "loss_sod": 0.019020648673176765, + "loss_total": 0.2573602497577667, + "step": 292999 + }, + { + "epoch": 0.01, + "grad_norm": 0.7675855755805969, + "learning_rate": 1.1093024205202291e-05, + "loss": 0.42, + "step": 293000 + }, + { + "epoch": 0.01, + "eval_loss": 0.4080387055873871, + "eval_runtime": 151.421, + "eval_samples_per_second": 101.987, + "eval_steps_per_second": 0.799, + "step": 293000 + }, + { + "epoch": 0.010198, + "loss_gen": 5.766430854797363, + "loss_rtd": 0.2050667256116867, + "loss_sent": 0.0007737329578958452, + "loss_sod": 0.17520329356193542, + "loss_total": 0.3810437321662903, + "step": 293099 + }, + { + "epoch": 0.010198, + "loss_gen": 5.688209533691406, + "loss_rtd": 0.20277422666549683, + "loss_sent": 0.28075194358825684, + "loss_sod": 0.03886014595627785, + "loss_total": 0.5223863124847412, + "step": 293099 + }, + { + "epoch": 0.0102, + "grad_norm": 1.5843936204910278, + "learning_rate": 1.1073100653052244e-05, + "loss": 0.4179, + "step": 293100 + }, + { + "epoch": 0.010398, + "loss_gen": 5.868280410766602, + "loss_rtd": 0.2234855592250824, + "loss_sent": 0.2485310286283493, + "loss_sod": 0.022049788385629654, + "loss_total": 0.49406635761260986, + "step": 293199 + }, + { + "epoch": 0.010398, + "loss_gen": 5.338741302490234, + "loss_rtd": 0.18713144958019257, + "loss_sent": 0.022464105859398842, + "loss_sod": 0.04646528512239456, + "loss_total": 0.2560608386993408, + "step": 293199 + }, + { + "epoch": 0.0104, + "grad_norm": 0.688319981098175, + "learning_rate": 1.1053192780652594e-05, + "loss": 0.4308, + "step": 293200 + }, + { + "epoch": 0.010598, + "loss_gen": 5.75809907913208, + "loss_rtd": 0.21313640475273132, + "loss_sent": 0.35639873147010803, + "loss_sod": 0.11235233396291733, + "loss_total": 0.6818875074386597, + "step": 293299 + }, + { + "epoch": 0.010598, + "loss_gen": 5.880958080291748, + "loss_rtd": 0.22107014060020447, + "loss_sent": 0.3080473840236664, + "loss_sod": 0.05575653538107872, + "loss_total": 0.5848740339279175, + "step": 293299 + }, + { + "epoch": 0.0106, + "grad_norm": 3.041757583618164, + "learning_rate": 1.103330059602225e-05, + "loss": 0.4356, + "step": 293300 + }, + { + "epoch": 0.010798, + "loss_gen": 5.958160400390625, + "loss_rtd": 0.21889819204807281, + "loss_sent": 0.14059273898601532, + "loss_sod": 0.039761994034051895, + "loss_total": 0.3992529511451721, + "step": 293399 + }, + { + "epoch": 0.010798, + "loss_gen": 5.773192882537842, + "loss_rtd": 0.21891982853412628, + "loss_sent": 0.33050525188446045, + "loss_sod": 0.009372915141284466, + "loss_total": 0.5587980151176453, + "step": 293399 + }, + { + "epoch": 0.0108, + "grad_norm": 1.1098265647888184, + "learning_rate": 1.1013424107173753e-05, + "loss": 0.4228, + "step": 293400 + }, + { + "epoch": 0.010998, + "loss_gen": 5.468547821044922, + "loss_rtd": 0.18723736703395844, + "loss_sent": 0.0818619504570961, + "loss_sod": 0.1300342082977295, + "loss_total": 0.3991335332393646, + "step": 293499 + }, + { + "epoch": 0.010998, + "loss_gen": 5.687875747680664, + "loss_rtd": 0.20722660422325134, + "loss_sent": 0.2695978879928589, + "loss_sod": 0.012449869886040688, + "loss_total": 0.48927438259124756, + "step": 293499 + }, + { + "epoch": 0.011, + "grad_norm": 1.1542943716049194, + "learning_rate": 1.0993563322113365e-05, + "loss": 0.4211, + "step": 293500 + }, + { + "epoch": 0.011198, + "loss_gen": 5.802901268005371, + "loss_rtd": 0.2055222988128662, + "loss_sent": 0.278396338224411, + "loss_sod": 0.040736123919487, + "loss_total": 0.524654746055603, + "step": 293599 + }, + { + "epoch": 0.011198, + "loss_gen": 5.8944783210754395, + "loss_rtd": 0.19163289666175842, + "loss_sent": 0.12466850876808167, + "loss_sod": 0.056496649980545044, + "loss_total": 0.37279805541038513, + "step": 293599 + }, + { + "epoch": 0.0112, + "grad_norm": 1.147995114326477, + "learning_rate": 1.0973718248841003e-05, + "loss": 0.4383, + "step": 293600 + }, + { + "epoch": 0.011398, + "loss_gen": 5.991019248962402, + "loss_rtd": 0.22885239124298096, + "loss_sent": 0.2256946861743927, + "loss_sod": 0.02889326959848404, + "loss_total": 0.4834403395652771, + "step": 293699 + }, + { + "epoch": 0.011398, + "loss_gen": 6.015378475189209, + "loss_rtd": 0.21590931713581085, + "loss_sent": 0.11068647354841232, + "loss_sod": 0.05200549215078354, + "loss_total": 0.3786012828350067, + "step": 293699 + }, + { + "epoch": 0.0114, + "grad_norm": 1.2632439136505127, + "learning_rate": 1.0953888895350279e-05, + "loss": 0.4527, + "step": 293700 + }, + { + "epoch": 0.011598, + "loss_gen": 5.888670921325684, + "loss_rtd": 0.2036859095096588, + "loss_sent": 0.17866621911525726, + "loss_sod": 0.18804116547107697, + "loss_total": 0.5703933238983154, + "step": 293799 + }, + { + "epoch": 0.011598, + "loss_gen": 6.096378326416016, + "loss_rtd": 0.21143315732479095, + "loss_sent": 0.111661896109581, + "loss_sod": 0.028706058859825134, + "loss_total": 0.3518010973930359, + "step": 293799 + }, + { + "epoch": 0.0116, + "grad_norm": 1.6043952703475952, + "learning_rate": 1.0934075269628425e-05, + "loss": 0.4373, + "step": 293800 + }, + { + "epoch": 0.011798, + "loss_gen": 5.471293926239014, + "loss_rtd": 0.20524010062217712, + "loss_sent": 0.3265687823295593, + "loss_sod": 0.06880029290914536, + "loss_total": 0.6006091833114624, + "step": 293899 + }, + { + "epoch": 0.011798, + "loss_gen": 5.927410125732422, + "loss_rtd": 0.1994786560535431, + "loss_sent": 0.2063358873128891, + "loss_sod": 0.08651162683963776, + "loss_total": 0.49232620000839233, + "step": 293899 + }, + { + "epoch": 0.0118, + "grad_norm": 1.4650521278381348, + "learning_rate": 1.091427737965638e-05, + "loss": 0.4264, + "step": 293900 + }, + { + "epoch": 0.011998, + "loss_gen": 5.625615119934082, + "loss_rtd": 0.20205235481262207, + "loss_sent": 0.1508893072605133, + "loss_sod": 0.01654103957116604, + "loss_total": 0.36948269605636597, + "step": 293999 + }, + { + "epoch": 0.011998, + "loss_gen": 5.612279415130615, + "loss_rtd": 0.226791113615036, + "loss_sent": 0.16933803260326385, + "loss_sod": 0.003286023624241352, + "loss_total": 0.39941516518592834, + "step": 293999 + }, + { + "epoch": 0.012, + "grad_norm": 0.9215983152389526, + "learning_rate": 1.0894495233408746e-05, + "loss": 0.4227, + "step": 294000 + }, + { + "epoch": 0.012, + "eval_loss": 0.41339993476867676, + "eval_runtime": 151.3193, + "eval_samples_per_second": 102.056, + "eval_steps_per_second": 0.8, + "step": 294000 + }, + { + "epoch": 0.012198, + "loss_gen": 5.763054847717285, + "loss_rtd": 0.20253047347068787, + "loss_sent": 0.29640471935272217, + "loss_sod": 0.05300295352935791, + "loss_total": 0.5519381761550903, + "step": 294099 + }, + { + "epoch": 0.012198, + "loss_gen": 5.384382247924805, + "loss_rtd": 0.17322170734405518, + "loss_sent": 0.07608072459697723, + "loss_sod": 0.0016368563519790769, + "loss_total": 0.2509393095970154, + "step": 294099 + }, + { + "epoch": 0.0122, + "grad_norm": 1.4361717700958252, + "learning_rate": 1.0874728838853742e-05, + "loss": 0.4371, + "step": 294100 + }, + { + "epoch": 0.012398, + "loss_gen": 5.522751808166504, + "loss_rtd": 0.2118579000234604, + "loss_sent": 0.09907300770282745, + "loss_sod": 0.001598638598807156, + "loss_total": 0.3125295639038086, + "step": 294199 + }, + { + "epoch": 0.012398, + "loss_gen": 5.788977146148682, + "loss_rtd": 0.18781417608261108, + "loss_sent": 0.006302570924162865, + "loss_sod": 0.04807844012975693, + "loss_total": 0.24219518899917603, + "step": 294199 + }, + { + "epoch": 0.0124, + "grad_norm": 0.5275652408599854, + "learning_rate": 1.085497820395328e-05, + "loss": 0.4405, + "step": 294200 + }, + { + "epoch": 0.012598, + "loss_gen": 5.767052173614502, + "loss_rtd": 0.21944154798984528, + "loss_sent": 0.3759983777999878, + "loss_sod": 0.07488761842250824, + "loss_total": 0.6703275442123413, + "step": 294299 + }, + { + "epoch": 0.012598, + "loss_gen": 5.955311298370361, + "loss_rtd": 0.215692937374115, + "loss_sent": 0.29599979519844055, + "loss_sod": 0.051369279623031616, + "loss_total": 0.5630620121955872, + "step": 294299 + }, + { + "epoch": 0.0126, + "grad_norm": 1.6063812971115112, + "learning_rate": 1.083524333666292e-05, + "loss": 0.4139, + "step": 294300 + }, + { + "epoch": 0.012798, + "loss_gen": 5.296679496765137, + "loss_rtd": 0.19681201875209808, + "loss_sent": 0.00028336889226920903, + "loss_sod": 0.1252090483903885, + "loss_total": 0.3223044276237488, + "step": 294399 + }, + { + "epoch": 0.012798, + "loss_gen": 5.031749725341797, + "loss_rtd": 0.17776720225811005, + "loss_sent": 2.7434438379714265e-05, + "loss_sod": 0.12903621792793274, + "loss_total": 0.30683085322380066, + "step": 294399 + }, + { + "epoch": 0.0128, + "grad_norm": 1.2570996284484863, + "learning_rate": 1.0815524244931875e-05, + "loss": 0.4282, + "step": 294400 + }, + { + "epoch": 0.012998, + "loss_gen": 5.5852532386779785, + "loss_rtd": 0.1918531209230423, + "loss_sent": 0.08934935927391052, + "loss_sod": 0.045001521706581116, + "loss_total": 0.32620400190353394, + "step": 294499 + }, + { + "epoch": 0.012998, + "loss_gen": 5.534328937530518, + "loss_rtd": 0.1863044649362564, + "loss_sent": 0.02965555712580681, + "loss_sod": 0.01043771207332611, + "loss_total": 0.22639773786067963, + "step": 294499 + }, + { + "epoch": 0.013, + "grad_norm": 0.5376600027084351, + "learning_rate": 1.0795820936702961e-05, + "loss": 0.4217, + "step": 294500 + }, + { + "epoch": 0.013198, + "loss_gen": 5.873933792114258, + "loss_rtd": 0.21789704263210297, + "loss_sent": 0.07002614438533783, + "loss_sod": 0.013727325946092606, + "loss_total": 0.3016505241394043, + "step": 294599 + }, + { + "epoch": 0.013198, + "loss_gen": 5.904489040374756, + "loss_rtd": 0.21422289311885834, + "loss_sent": 0.1969592124223709, + "loss_sod": 0.07365565001964569, + "loss_total": 0.48483777046203613, + "step": 294599 + }, + { + "epoch": 0.0132, + "grad_norm": 1.0407181978225708, + "learning_rate": 1.0776133419912682e-05, + "loss": 0.4435, + "step": 294600 + }, + { + "epoch": 0.013398, + "loss_gen": 5.62467098236084, + "loss_rtd": 0.18759846687316895, + "loss_sent": 0.017625365406274796, + "loss_sod": 0.03514987975358963, + "loss_total": 0.24037370085716248, + "step": 294699 + }, + { + "epoch": 0.013398, + "loss_gen": 5.553157806396484, + "loss_rtd": 0.1785619556903839, + "loss_sent": 0.01877358928322792, + "loss_sod": 0.03354465216398239, + "loss_total": 0.23088020086288452, + "step": 294699 + }, + { + "epoch": 0.0134, + "grad_norm": 0.5728604793548584, + "learning_rate": 1.0756461702491177e-05, + "loss": 0.4177, + "step": 294700 + }, + { + "epoch": 0.013598, + "loss_gen": 6.215653419494629, + "loss_rtd": 0.21273796260356903, + "loss_sent": 0.17294226586818695, + "loss_sod": 0.01092920545488596, + "loss_total": 0.39660942554473877, + "step": 294799 + }, + { + "epoch": 0.013598, + "loss_gen": 5.096684455871582, + "loss_rtd": 0.17959147691726685, + "loss_sent": 0.021339621394872665, + "loss_sod": 0.11821482330560684, + "loss_total": 0.31914591789245605, + "step": 294799 + }, + { + "epoch": 0.0136, + "grad_norm": 0.9141101837158203, + "learning_rate": 1.0736805792362214e-05, + "loss": 0.4432, + "step": 294800 + }, + { + "epoch": 0.013798, + "loss_gen": 5.342113494873047, + "loss_rtd": 0.19361698627471924, + "loss_sent": 0.09061580151319504, + "loss_sod": 0.0437910333275795, + "loss_total": 0.3280238211154938, + "step": 294899 + }, + { + "epoch": 0.013798, + "loss_gen": 5.73457145690918, + "loss_rtd": 0.20573337376117706, + "loss_sent": 0.11693263053894043, + "loss_sod": 0.01017211563885212, + "loss_total": 0.33283811807632446, + "step": 294899 + }, + { + "epoch": 0.0138, + "grad_norm": 0.8815062046051025, + "learning_rate": 1.0717165697443177e-05, + "loss": 0.4115, + "step": 294900 + }, + { + "epoch": 0.013998, + "loss_gen": 5.697635173797607, + "loss_rtd": 0.18362516164779663, + "loss_sent": 0.0441841259598732, + "loss_sod": 0.0444292277097702, + "loss_total": 0.27223852276802063, + "step": 294999 + }, + { + "epoch": 0.013998, + "loss_gen": 5.7185492515563965, + "loss_rtd": 0.1831914335489273, + "loss_sent": 0.00016801382298581302, + "loss_sod": 0.10571719706058502, + "loss_total": 0.28907665610313416, + "step": 294999 + }, + { + "epoch": 0.014, + "grad_norm": 1.007917881011963, + "learning_rate": 1.069754142564509e-05, + "loss": 0.43, + "step": 295000 + }, + { + "epoch": 0.014, + "eval_loss": 0.4112323522567749, + "eval_runtime": 151.4167, + "eval_samples_per_second": 101.99, + "eval_steps_per_second": 0.799, + "step": 295000 + }, + { + "epoch": 0.014198, + "loss_gen": 5.640534400939941, + "loss_rtd": 0.20620249211788177, + "loss_sent": 0.3413327932357788, + "loss_sod": 0.0013544512912631035, + "loss_total": 0.5488897562026978, + "step": 295099 + }, + { + "epoch": 0.014198, + "loss_gen": 5.677936553955078, + "loss_rtd": 0.2097548544406891, + "loss_sent": 0.1667233407497406, + "loss_sod": 0.07956099510192871, + "loss_total": 0.4560391902923584, + "step": 295099 + }, + { + "epoch": 0.0142, + "grad_norm": 1.303545594215393, + "learning_rate": 1.0677932984872624e-05, + "loss": 0.4405, + "step": 295100 + }, + { + "epoch": 0.014398, + "loss_gen": 5.546933174133301, + "loss_rtd": 0.19512756168842316, + "loss_sent": 0.003282061545178294, + "loss_sod": 0.07110337913036346, + "loss_total": 0.26951301097869873, + "step": 295199 + }, + { + "epoch": 0.014398, + "loss_gen": 5.376771450042725, + "loss_rtd": 0.1766503006219864, + "loss_sent": 0.04354753717780113, + "loss_sod": 0.10782700777053833, + "loss_total": 0.32802483439445496, + "step": 295199 + }, + { + "epoch": 0.0144, + "grad_norm": 0.8968387246131897, + "learning_rate": 1.0658340383024057e-05, + "loss": 0.4263, + "step": 295200 + }, + { + "epoch": 0.014598, + "loss_gen": 5.709486484527588, + "loss_rtd": 0.2513912320137024, + "loss_sent": 0.2460688054561615, + "loss_sod": 0.016093676909804344, + "loss_total": 0.5135537385940552, + "step": 295299 + }, + { + "epoch": 0.014598, + "loss_gen": 6.1804399490356445, + "loss_rtd": 0.22530977427959442, + "loss_sent": 0.14730171859264374, + "loss_sod": 0.03301909938454628, + "loss_total": 0.40563058853149414, + "step": 295299 + }, + { + "epoch": 0.0146, + "grad_norm": 0.877086341381073, + "learning_rate": 1.0638763627991283e-05, + "loss": 0.4127, + "step": 295300 + }, + { + "epoch": 0.014798, + "loss_gen": 6.099516868591309, + "loss_rtd": 0.22501027584075928, + "loss_sent": 0.08149467408657074, + "loss_sod": 0.14560794830322266, + "loss_total": 0.45211291313171387, + "step": 295399 + }, + { + "epoch": 0.014798, + "loss_gen": 5.662152290344238, + "loss_rtd": 0.222167506814003, + "loss_sent": 0.15366816520690918, + "loss_sod": 0.12854395806789398, + "loss_total": 0.5043796300888062, + "step": 295399 + }, + { + "epoch": 0.0148, + "grad_norm": 1.238622784614563, + "learning_rate": 1.06192027276598e-05, + "loss": 0.4047, + "step": 295400 + }, + { + "epoch": 0.014998, + "loss_gen": 5.903112411499023, + "loss_rtd": 0.20635390281677246, + "loss_sent": 0.13178260624408722, + "loss_sod": 0.15884466469287872, + "loss_total": 0.496981143951416, + "step": 295499 + }, + { + "epoch": 0.014998, + "loss_gen": 5.323427677154541, + "loss_rtd": 0.1728198230266571, + "loss_sent": 0.045676738023757935, + "loss_sod": 0.08595232665538788, + "loss_total": 0.3044488728046417, + "step": 295499 + }, + { + "epoch": 0.015, + "grad_norm": 1.3525426387786865, + "learning_rate": 1.0599657689908742e-05, + "loss": 0.4227, + "step": 295500 + }, + { + "epoch": 0.015198, + "loss_gen": 5.918006420135498, + "loss_rtd": 0.21017996966838837, + "loss_sent": 0.12502259016036987, + "loss_sod": 0.0637504905462265, + "loss_total": 0.39895305037498474, + "step": 295599 + }, + { + "epoch": 0.015198, + "loss_gen": 6.010242938995361, + "loss_rtd": 0.20556005835533142, + "loss_sent": 0.19859637320041656, + "loss_sod": 0.034558624029159546, + "loss_total": 0.43871504068374634, + "step": 295599 + }, + { + "epoch": 0.0152, + "grad_norm": 1.5819967985153198, + "learning_rate": 1.0580128522610872e-05, + "loss": 0.4275, + "step": 295600 + }, + { + "epoch": 0.015398, + "loss_gen": 5.248749732971191, + "loss_rtd": 0.18831850588321686, + "loss_sent": 0.030619287863373756, + "loss_sod": 0.05449339747428894, + "loss_total": 0.2734311819076538, + "step": 295699 + }, + { + "epoch": 0.015398, + "loss_gen": 5.176632881164551, + "loss_rtd": 0.16702422499656677, + "loss_sent": 0.03597523272037506, + "loss_sod": 0.0655825212597847, + "loss_total": 0.26858198642730713, + "step": 295699 + }, + { + "epoch": 0.0154, + "grad_norm": 0.7380995750427246, + "learning_rate": 1.056061523363251e-05, + "loss": 0.4084, + "step": 295700 + }, + { + "epoch": 0.015598, + "loss_gen": 5.933668613433838, + "loss_rtd": 0.23030397295951843, + "loss_sent": 0.11890958249568939, + "loss_sod": 0.1161596029996872, + "loss_total": 0.465373158454895, + "step": 295799 + }, + { + "epoch": 0.015598, + "loss_gen": 5.771134376525879, + "loss_rtd": 0.20498719811439514, + "loss_sent": 0.06305620819330215, + "loss_sod": 0.024693351238965988, + "loss_total": 0.2927367687225342, + "step": 295799 + }, + { + "epoch": 0.0156, + "grad_norm": 1.2381393909454346, + "learning_rate": 1.0541117830833608e-05, + "loss": 0.435, + "step": 295800 + }, + { + "epoch": 0.015798, + "loss_gen": 5.445761680603027, + "loss_rtd": 0.19832631945610046, + "loss_sent": 0.09245370328426361, + "loss_sod": 0.06520716845989227, + "loss_total": 0.35598719120025635, + "step": 295899 + }, + { + "epoch": 0.015798, + "loss_gen": 5.892358779907227, + "loss_rtd": 0.22854578495025635, + "loss_sent": 0.16346819698810577, + "loss_sod": 0.04468546062707901, + "loss_total": 0.43669945001602173, + "step": 295899 + }, + { + "epoch": 0.0158, + "grad_norm": 1.5533887147903442, + "learning_rate": 1.052163632206773e-05, + "loss": 0.4148, + "step": 295900 + }, + { + "epoch": 0.015998, + "loss_gen": 5.874584197998047, + "loss_rtd": 0.2053760290145874, + "loss_sent": 0.12736639380455017, + "loss_sod": 0.12023165822029114, + "loss_total": 0.4529740810394287, + "step": 295999 + }, + { + "epoch": 0.015998, + "loss_gen": 5.839260101318359, + "loss_rtd": 0.2121221274137497, + "loss_sent": 0.20069658756256104, + "loss_sod": 0.012942355126142502, + "loss_total": 0.42576107382774353, + "step": 295999 + }, + { + "epoch": 0.016, + "grad_norm": 0.8821128606796265, + "learning_rate": 1.050217071518203e-05, + "loss": 0.4363, + "step": 296000 + }, + { + "epoch": 0.016, + "eval_loss": 0.4061391353607178, + "eval_runtime": 151.6422, + "eval_samples_per_second": 101.838, + "eval_steps_per_second": 0.798, + "step": 296000 + }, + { + "epoch": 0.016198, + "loss_gen": 5.9743757247924805, + "loss_rtd": 0.21355779469013214, + "loss_sent": 0.2628011405467987, + "loss_sod": 0.13888202607631683, + "loss_total": 0.6152409315109253, + "step": 296099 + }, + { + "epoch": 0.016198, + "loss_gen": 5.897843360900879, + "loss_rtd": 0.2111058086156845, + "loss_sent": 0.10183496028184891, + "loss_sod": 0.02770070731639862, + "loss_total": 0.34064146876335144, + "step": 296099 + }, + { + "epoch": 0.0162, + "grad_norm": 1.251771092414856, + "learning_rate": 1.0482721018017232e-05, + "loss": 0.4263, + "step": 296100 + }, + { + "epoch": 0.016398, + "loss_gen": 5.334744453430176, + "loss_rtd": 0.1818494349718094, + "loss_sent": 0.03574901446700096, + "loss_sod": 0.03192409500479698, + "loss_total": 0.24952255189418793, + "step": 296199 + }, + { + "epoch": 0.016398, + "loss_gen": 5.954919338226318, + "loss_rtd": 0.20946823060512543, + "loss_sent": 0.2702799439430237, + "loss_sod": 0.12556950747966766, + "loss_total": 0.6053177118301392, + "step": 296199 + }, + { + "epoch": 0.0164, + "grad_norm": 0.9505832195281982, + "learning_rate": 1.0463287238407682e-05, + "loss": 0.4314, + "step": 296200 + }, + { + "epoch": 0.016598, + "loss_gen": 5.765938758850098, + "loss_rtd": 0.21922503411769867, + "loss_sent": 0.3533184826374054, + "loss_sod": 0.038111329078674316, + "loss_total": 0.6106548309326172, + "step": 296299 + }, + { + "epoch": 0.016598, + "loss_gen": 5.936850547790527, + "loss_rtd": 0.21708908677101135, + "loss_sent": 0.09488590806722641, + "loss_sod": 0.02059009112417698, + "loss_total": 0.3325650990009308, + "step": 296299 + }, + { + "epoch": 0.0166, + "grad_norm": 1.1279613971710205, + "learning_rate": 1.0443869384181304e-05, + "loss": 0.4186, + "step": 296300 + }, + { + "epoch": 0.016798, + "loss_gen": 5.8069682121276855, + "loss_rtd": 0.2173311859369278, + "loss_sent": 0.06469067931175232, + "loss_sod": 0.09630684554576874, + "loss_total": 0.37832871079444885, + "step": 296399 + }, + { + "epoch": 0.016798, + "loss_gen": 5.9797797203063965, + "loss_rtd": 0.2254602611064911, + "loss_sent": 0.3228491246700287, + "loss_sod": 0.08778952062129974, + "loss_total": 0.6360988616943359, + "step": 296399 + }, + { + "epoch": 0.0168, + "grad_norm": 1.264456868171692, + "learning_rate": 1.0424467463159621e-05, + "loss": 0.4301, + "step": 296400 + }, + { + "epoch": 0.016998, + "loss_gen": 5.872979164123535, + "loss_rtd": 0.22613875567913055, + "loss_sent": 0.10173720121383667, + "loss_sod": 0.03349519520998001, + "loss_total": 0.36137115955352783, + "step": 296499 + }, + { + "epoch": 0.016998, + "loss_gen": 5.38588285446167, + "loss_rtd": 0.1842663586139679, + "loss_sent": 0.03717409819364548, + "loss_sod": 0.08778329193592072, + "loss_total": 0.3092237710952759, + "step": 296499 + }, + { + "epoch": 0.017, + "grad_norm": 0.9810932874679565, + "learning_rate": 1.0405081483157698e-05, + "loss": 0.448, + "step": 296500 + }, + { + "epoch": 0.017198, + "loss_gen": 5.761422634124756, + "loss_rtd": 0.19922517240047455, + "loss_sent": 0.15324638783931732, + "loss_sod": 0.028836267068982124, + "loss_total": 0.38130784034729004, + "step": 296599 + }, + { + "epoch": 0.017198, + "loss_gen": 5.834839820861816, + "loss_rtd": 0.18709823489189148, + "loss_sent": 0.20373891294002533, + "loss_sod": 0.014057589694857597, + "loss_total": 0.40489473938941956, + "step": 296599 + }, + { + "epoch": 0.0172, + "grad_norm": 0.6301513314247131, + "learning_rate": 1.0385711451984216e-05, + "loss": 0.3983, + "step": 296600 + }, + { + "epoch": 0.017398, + "loss_gen": 5.727804183959961, + "loss_rtd": 0.20559702813625336, + "loss_sent": 0.25096338987350464, + "loss_sod": 0.012648189440369606, + "loss_total": 0.46920859813690186, + "step": 296699 + }, + { + "epoch": 0.017398, + "loss_gen": 5.767256736755371, + "loss_rtd": 0.22392408549785614, + "loss_sent": 0.2691446542739868, + "loss_sod": 0.016966650262475014, + "loss_total": 0.5100353956222534, + "step": 296699 + }, + { + "epoch": 0.0174, + "grad_norm": 0.9540920257568359, + "learning_rate": 1.0366357377441427e-05, + "loss": 0.4354, + "step": 296700 + }, + { + "epoch": 0.017598, + "loss_gen": 5.80673885345459, + "loss_rtd": 0.21131189167499542, + "loss_sent": 0.20243430137634277, + "loss_sod": 0.08631715923547745, + "loss_total": 0.5000633597373962, + "step": 296799 + }, + { + "epoch": 0.017598, + "loss_gen": 5.06290340423584, + "loss_rtd": 0.1622084677219391, + "loss_sent": 0.00015526461356785148, + "loss_sod": 0.10462068021297455, + "loss_total": 0.26698440313339233, + "step": 296799 + }, + { + "epoch": 0.0176, + "grad_norm": 1.232321858406067, + "learning_rate": 1.0347019267325158e-05, + "loss": 0.423, + "step": 296800 + }, + { + "epoch": 0.017798, + "loss_gen": 5.783798694610596, + "loss_rtd": 0.21064801514148712, + "loss_sent": 0.0831662192940712, + "loss_sod": 0.06500860303640366, + "loss_total": 0.3588228225708008, + "step": 296899 + }, + { + "epoch": 0.017798, + "loss_gen": 5.779657363891602, + "loss_rtd": 0.2202780544757843, + "loss_sent": 0.06787479668855667, + "loss_sod": 0.10200324654579163, + "loss_total": 0.390156090259552, + "step": 296899 + }, + { + "epoch": 0.0178, + "grad_norm": 1.2608566284179688, + "learning_rate": 1.0327697129424774e-05, + "loss": 0.4132, + "step": 296900 + }, + { + "epoch": 0.017998, + "loss_gen": 5.684676647186279, + "loss_rtd": 0.2112094908952713, + "loss_sent": 0.12982314825057983, + "loss_sod": 0.025935960933566093, + "loss_total": 0.3669686019420624, + "step": 296999 + }, + { + "epoch": 0.017998, + "loss_gen": 5.565051078796387, + "loss_rtd": 0.1902083456516266, + "loss_sent": 0.08210460096597672, + "loss_sod": 0.08566206693649292, + "loss_total": 0.3579750061035156, + "step": 296999 + }, + { + "epoch": 0.018, + "grad_norm": 0.8872039318084717, + "learning_rate": 1.030839097152324e-05, + "loss": 0.4287, + "step": 297000 + }, + { + "epoch": 0.018, + "eval_loss": 0.4070330858230591, + "eval_runtime": 151.3494, + "eval_samples_per_second": 102.035, + "eval_steps_per_second": 0.799, + "step": 297000 + }, + { + "epoch": 0.018198, + "loss_gen": 6.026285171508789, + "loss_rtd": 0.24354510009288788, + "loss_sent": 0.14378716051578522, + "loss_sod": 0.07584456354379654, + "loss_total": 0.4631768465042114, + "step": 297099 + }, + { + "epoch": 0.018198, + "loss_gen": 5.670589923858643, + "loss_rtd": 0.21470296382904053, + "loss_sent": 0.07253681868314743, + "loss_sod": 0.14160436391830444, + "loss_total": 0.428844153881073, + "step": 297099 + }, + { + "epoch": 0.0182, + "grad_norm": 1.7557255029678345, + "learning_rate": 1.0289100801397088e-05, + "loss": 0.4385, + "step": 297100 + }, + { + "epoch": 0.018398, + "loss_gen": 5.80548620223999, + "loss_rtd": 0.2085079401731491, + "loss_sent": 0.12764374911785126, + "loss_sod": 0.03700674697756767, + "loss_total": 0.37315845489501953, + "step": 297199 + }, + { + "epoch": 0.018398, + "loss_gen": 5.622036933898926, + "loss_rtd": 0.1856563836336136, + "loss_sent": 0.015428837388753891, + "loss_sod": 0.03319863975048065, + "loss_total": 0.23428386449813843, + "step": 297199 + }, + { + "epoch": 0.0184, + "grad_norm": 0.8043609261512756, + "learning_rate": 1.0269826626816376e-05, + "loss": 0.4314, + "step": 297200 + }, + { + "epoch": 0.018598, + "loss_gen": 5.673922061920166, + "loss_rtd": 0.1946595311164856, + "loss_sent": 0.07288999110460281, + "loss_sod": 0.04901793226599693, + "loss_total": 0.31656745076179504, + "step": 297299 + }, + { + "epoch": 0.018598, + "loss_gen": 5.985547065734863, + "loss_rtd": 0.21650931239128113, + "loss_sent": 0.32444992661476135, + "loss_sod": 0.01558828353881836, + "loss_total": 0.5565475225448608, + "step": 297299 + }, + { + "epoch": 0.0186, + "grad_norm": 1.7502100467681885, + "learning_rate": 1.0250568455544745e-05, + "loss": 0.4293, + "step": 297300 + }, + { + "epoch": 0.018798, + "loss_gen": 5.949221134185791, + "loss_rtd": 0.21529193222522736, + "loss_sent": 0.25843536853790283, + "loss_sod": 0.042320068925619125, + "loss_total": 0.5160473585128784, + "step": 297399 + }, + { + "epoch": 0.018798, + "loss_gen": 5.934944152832031, + "loss_rtd": 0.212518572807312, + "loss_sent": 0.16465464234352112, + "loss_sod": 0.011793924495577812, + "loss_total": 0.3889671564102173, + "step": 297399 + }, + { + "epoch": 0.0188, + "grad_norm": 0.8618829250335693, + "learning_rate": 1.0231326295339388e-05, + "loss": 0.4303, + "step": 297400 + }, + { + "epoch": 0.018998, + "loss_gen": 5.899423122406006, + "loss_rtd": 0.23357856273651123, + "loss_sent": 0.2989017069339752, + "loss_sod": 0.01683850958943367, + "loss_total": 0.549318790435791, + "step": 297499 + }, + { + "epoch": 0.018998, + "loss_gen": 5.954813003540039, + "loss_rtd": 0.22031544148921967, + "loss_sent": 0.10967754572629929, + "loss_sod": 0.02306656539440155, + "loss_total": 0.3530595600605011, + "step": 297499 + }, + { + "epoch": 0.019, + "grad_norm": 0.791570782661438, + "learning_rate": 1.0212100153951054e-05, + "loss": 0.4191, + "step": 297500 + }, + { + "epoch": 0.019198, + "loss_gen": 5.731978416442871, + "loss_rtd": 0.23210537433624268, + "loss_sent": 0.10853546857833862, + "loss_sod": 0.0026697558350861073, + "loss_total": 0.3433105945587158, + "step": 297599 + }, + { + "epoch": 0.019198, + "loss_gen": 5.460188865661621, + "loss_rtd": 0.1825115829706192, + "loss_sent": 0.007262526545673609, + "loss_sod": 0.10843467712402344, + "loss_total": 0.2982087731361389, + "step": 297599 + }, + { + "epoch": 0.0192, + "grad_norm": 0.9947394728660583, + "learning_rate": 1.019289003912401e-05, + "loss": 0.4318, + "step": 297600 + }, + { + "epoch": 0.019398, + "loss_gen": 5.637037754058838, + "loss_rtd": 0.22613970935344696, + "loss_sent": 0.24242305755615234, + "loss_sod": 0.02330154925584793, + "loss_total": 0.49186432361602783, + "step": 297699 + }, + { + "epoch": 0.019398, + "loss_gen": 5.62609338760376, + "loss_rtd": 0.22445453703403473, + "loss_sent": 0.19351904094219208, + "loss_sod": 0.006364143453538418, + "loss_total": 0.4243377149105072, + "step": 297699 + }, + { + "epoch": 0.0194, + "grad_norm": 0.9231829047203064, + "learning_rate": 1.017369595859609e-05, + "loss": 0.4344, + "step": 297700 + }, + { + "epoch": 0.019598, + "loss_gen": 5.795924663543701, + "loss_rtd": 0.2381930649280548, + "loss_sent": 0.13380736112594604, + "loss_sod": 0.0353718027472496, + "loss_total": 0.40737223625183105, + "step": 297799 + }, + { + "epoch": 0.019598, + "loss_gen": 5.828614234924316, + "loss_rtd": 0.216282457113266, + "loss_sent": 0.08772077411413193, + "loss_sod": 0.012895278632640839, + "loss_total": 0.31689852476119995, + "step": 297799 + }, + { + "epoch": 0.0196, + "grad_norm": 0.7130690813064575, + "learning_rate": 1.0154517920098682e-05, + "loss": 0.4276, + "step": 297800 + }, + { + "epoch": 0.019798, + "loss_gen": 5.616603851318359, + "loss_rtd": 0.20068252086639404, + "loss_sent": 0.13653664290905, + "loss_sod": 0.06905515491962433, + "loss_total": 0.40627431869506836, + "step": 297899 + }, + { + "epoch": 0.019798, + "loss_gen": 5.861021995544434, + "loss_rtd": 0.22706495225429535, + "loss_sent": 0.32415828108787537, + "loss_sod": 0.0038043325766921043, + "loss_total": 0.5550275444984436, + "step": 297899 + }, + { + "epoch": 0.0198, + "grad_norm": 1.12308669090271, + "learning_rate": 1.0135355931356705e-05, + "loss": 0.4493, + "step": 297900 + }, + { + "epoch": 0.019998, + "loss_gen": 5.923847675323486, + "loss_rtd": 0.20875504612922668, + "loss_sent": 0.5159587264060974, + "loss_sod": 0.09680992364883423, + "loss_total": 0.8215236663818359, + "step": 297999 + }, + { + "epoch": 0.019998, + "loss_gen": 5.8114190101623535, + "loss_rtd": 0.22599723935127258, + "loss_sent": 0.16716141998767853, + "loss_sod": 0.040851183235645294, + "loss_total": 0.434009850025177, + "step": 297999 + }, + { + "epoch": 0.02, + "grad_norm": 2.635293483734131, + "learning_rate": 1.0116210000088578e-05, + "loss": 0.443, + "step": 298000 + }, + { + "epoch": 0.02, + "eval_loss": 0.40928593277931213, + "eval_runtime": 151.8378, + "eval_samples_per_second": 101.707, + "eval_steps_per_second": 0.797, + "step": 298000 + }, + { + "epoch": 0.020198, + "loss_gen": 5.813126087188721, + "loss_rtd": 0.22400996088981628, + "loss_sent": 0.12310399860143661, + "loss_sod": 0.020084943622350693, + "loss_total": 0.3671989142894745, + "step": 298099 + }, + { + "epoch": 0.020198, + "loss_gen": 5.907942295074463, + "loss_rtd": 0.21236549317836761, + "loss_sent": 0.10100822895765305, + "loss_sod": 0.07614253461360931, + "loss_total": 0.38951626420021057, + "step": 298099 + }, + { + "epoch": 0.0202, + "grad_norm": 0.754381000995636, + "learning_rate": 1.0097080134006286e-05, + "loss": 0.4397, + "step": 298100 + }, + { + "epoch": 0.020398, + "loss_gen": 5.606247425079346, + "loss_rtd": 0.18676139414310455, + "loss_sent": 0.040077440440654755, + "loss_sod": 0.08575140684843063, + "loss_total": 0.31259024143218994, + "step": 298199 + }, + { + "epoch": 0.020398, + "loss_gen": 5.761253356933594, + "loss_rtd": 0.20684626698493958, + "loss_sent": 0.32071366906166077, + "loss_sod": 0.023238133639097214, + "loss_total": 0.5507980585098267, + "step": 298199 + }, + { + "epoch": 0.0204, + "grad_norm": 2.0746564865112305, + "learning_rate": 1.0077966340815354e-05, + "loss": 0.4155, + "step": 298200 + }, + { + "epoch": 0.020598, + "loss_gen": 5.899225234985352, + "loss_rtd": 0.204883873462677, + "loss_sent": 0.31172147393226624, + "loss_sod": 0.05679526552557945, + "loss_total": 0.573400616645813, + "step": 298299 + }, + { + "epoch": 0.020598, + "loss_gen": 5.9609904289245605, + "loss_rtd": 0.21264024078845978, + "loss_sent": 0.1893542855978012, + "loss_sod": 0.043941430747509, + "loss_total": 0.4459359645843506, + "step": 298299 + }, + { + "epoch": 0.0206, + "grad_norm": 1.960498332977295, + "learning_rate": 1.0058868628214813e-05, + "loss": 0.4244, + "step": 298300 + }, + { + "epoch": 0.020798, + "loss_gen": 5.936389446258545, + "loss_rtd": 0.21245436370372772, + "loss_sent": 0.42014551162719727, + "loss_sod": 0.042525287717580795, + "loss_total": 0.6751251220703125, + "step": 298399 + }, + { + "epoch": 0.020798, + "loss_gen": 5.671829700469971, + "loss_rtd": 0.2047274261713028, + "loss_sent": 0.2431950569152832, + "loss_sod": 0.0022957162000238895, + "loss_total": 0.45021820068359375, + "step": 298399 + }, + { + "epoch": 0.0208, + "grad_norm": 2.06866717338562, + "learning_rate": 1.00397870038972e-05, + "loss": 0.4285, + "step": 298400 + }, + { + "epoch": 0.020998, + "loss_gen": 6.019495964050293, + "loss_rtd": 0.22130316495895386, + "loss_sent": 0.10773345828056335, + "loss_sod": 0.06705247610807419, + "loss_total": 0.396089106798172, + "step": 298499 + }, + { + "epoch": 0.020998, + "loss_gen": 5.783188819885254, + "loss_rtd": 0.21490241587162018, + "loss_sent": 0.13539086282253265, + "loss_sod": 0.0357794389128685, + "loss_total": 0.38607269525527954, + "step": 298499 + }, + { + "epoch": 0.021, + "grad_norm": 0.8840310573577881, + "learning_rate": 1.0020721475548606e-05, + "loss": 0.4287, + "step": 298500 + }, + { + "epoch": 0.021198, + "loss_gen": 5.865187644958496, + "loss_rtd": 0.19026094675064087, + "loss_sent": 0.2522822618484497, + "loss_sod": 0.035999853163957596, + "loss_total": 0.4785430431365967, + "step": 298599 + }, + { + "epoch": 0.021198, + "loss_gen": 5.421957492828369, + "loss_rtd": 0.21228951215744019, + "loss_sent": 0.1677059680223465, + "loss_sod": 0.02884584479033947, + "loss_total": 0.4088413119316101, + "step": 298599 + }, + { + "epoch": 0.0212, + "grad_norm": 1.372063398361206, + "learning_rate": 1.0001672050848632e-05, + "loss": 0.432, + "step": 298600 + }, + { + "epoch": 0.021398, + "loss_gen": 6.029733180999756, + "loss_rtd": 0.20245236158370972, + "loss_sent": 0.23813922703266144, + "loss_sod": 0.04080799221992493, + "loss_total": 0.4813995957374573, + "step": 298699 + }, + { + "epoch": 0.021398, + "loss_gen": 5.817180633544922, + "loss_rtd": 0.21810311079025269, + "loss_sent": 0.11740967631340027, + "loss_sod": 0.06908391416072845, + "loss_total": 0.4045967161655426, + "step": 298699 + }, + { + "epoch": 0.0214, + "grad_norm": 0.9485461711883545, + "learning_rate": 9.982638737470358e-06, + "loss": 0.42, + "step": 298700 + }, + { + "epoch": 0.021598, + "loss_gen": 5.792064666748047, + "loss_rtd": 0.2184455245733261, + "loss_sent": 0.5901139378547668, + "loss_sod": 0.0015406090533360839, + "loss_total": 0.8101000785827637, + "step": 298799 + }, + { + "epoch": 0.021598, + "loss_gen": 5.668981552124023, + "loss_rtd": 0.19227340817451477, + "loss_sent": 0.2015993446111679, + "loss_sod": 0.07817303389310837, + "loss_total": 0.47204577922821045, + "step": 298799 + }, + { + "epoch": 0.0216, + "grad_norm": 2.764828681945801, + "learning_rate": 9.963621543080415e-06, + "loss": 0.4254, + "step": 298800 + }, + { + "epoch": 0.021798, + "loss_gen": 5.603916645050049, + "loss_rtd": 0.21229764819145203, + "loss_sent": 0.3499624729156494, + "loss_sod": 0.013875177130103111, + "loss_total": 0.5761352777481079, + "step": 298899 + }, + { + "epoch": 0.021798, + "loss_gen": 5.999195098876953, + "loss_rtd": 0.21483048796653748, + "loss_sent": 0.21669946610927582, + "loss_sod": 0.03287728875875473, + "loss_total": 0.4644072651863098, + "step": 298899 + }, + { + "epoch": 0.0218, + "grad_norm": 1.6463184356689453, + "learning_rate": 9.944620475338928e-06, + "loss": 0.4267, + "step": 298900 + }, + { + "epoch": 0.021998, + "loss_gen": 6.022584438323975, + "loss_rtd": 0.2096121609210968, + "loss_sent": 0.20677828788757324, + "loss_sod": 0.04668641835451126, + "loss_total": 0.4630768597126007, + "step": 298999 + }, + { + "epoch": 0.021998, + "loss_gen": 6.090778827667236, + "loss_rtd": 0.21469448506832123, + "loss_sent": 0.15987078845500946, + "loss_sod": 0.07775652408599854, + "loss_total": 0.4523218274116516, + "step": 298999 + }, + { + "epoch": 0.022, + "grad_norm": 1.2275803089141846, + "learning_rate": 9.925635541899536e-06, + "loss": 0.419, + "step": 299000 + }, + { + "epoch": 0.022, + "eval_loss": 0.40254780650138855, + "eval_runtime": 151.5688, + "eval_samples_per_second": 101.888, + "eval_steps_per_second": 0.798, + "step": 299000 + }, + { + "epoch": 0.022198, + "loss_gen": 5.889149188995361, + "loss_rtd": 0.21863630414009094, + "loss_sent": 0.08945917338132858, + "loss_sod": 0.07114804536104202, + "loss_total": 0.37924352288246155, + "step": 299099 + }, + { + "epoch": 0.022198, + "loss_gen": 5.488976001739502, + "loss_rtd": 0.2114456444978714, + "loss_sent": 0.20754298567771912, + "loss_sod": 0.00454333983361721, + "loss_total": 0.4235319495201111, + "step": 299099 + }, + { + "epoch": 0.0222, + "grad_norm": 1.01533043384552, + "learning_rate": 9.906666750409337e-06, + "loss": 0.4243, + "step": 299100 + }, + { + "epoch": 0.022398, + "loss_gen": 5.907752990722656, + "loss_rtd": 0.21214336156845093, + "loss_sent": 0.24359628558158875, + "loss_sod": 0.043090175837278366, + "loss_total": 0.49882981181144714, + "step": 299199 + }, + { + "epoch": 0.022398, + "loss_gen": 6.102293014526367, + "loss_rtd": 0.2234363704919815, + "loss_sent": 0.3608255386352539, + "loss_sod": 0.062283582985401154, + "loss_total": 0.6465455293655396, + "step": 299199 + }, + { + "epoch": 0.0224, + "grad_norm": 1.8902572393417358, + "learning_rate": 9.887714108508983e-06, + "loss": 0.4465, + "step": 299200 + }, + { + "epoch": 0.022598, + "loss_gen": 6.135397434234619, + "loss_rtd": 0.21637411415576935, + "loss_sent": 0.28540757298469543, + "loss_sod": 0.005812958814203739, + "loss_total": 0.507594645023346, + "step": 299299 + }, + { + "epoch": 0.022598, + "loss_gen": 6.284907341003418, + "loss_rtd": 0.219236820936203, + "loss_sent": 0.11850326508283615, + "loss_sod": 0.01861223578453064, + "loss_total": 0.3563523292541504, + "step": 299299 + }, + { + "epoch": 0.0226, + "grad_norm": 1.237573266029358, + "learning_rate": 9.868777623832586e-06, + "loss": 0.4217, + "step": 299300 + }, + { + "epoch": 0.022798, + "loss_gen": 5.801009178161621, + "loss_rtd": 0.2080560028553009, + "loss_sent": 0.18533608317375183, + "loss_sod": 0.014758003875613213, + "loss_total": 0.4081500768661499, + "step": 299399 + }, + { + "epoch": 0.022798, + "loss_gen": 6.101986408233643, + "loss_rtd": 0.2245248407125473, + "loss_sent": 0.15532957017421722, + "loss_sod": 0.06217414140701294, + "loss_total": 0.44202858209609985, + "step": 299399 + }, + { + "epoch": 0.0228, + "grad_norm": 0.7558884024620056, + "learning_rate": 9.849857304007781e-06, + "loss": 0.4222, + "step": 299400 + }, + { + "epoch": 0.022998, + "loss_gen": 5.513538837432861, + "loss_rtd": 0.21030937135219574, + "loss_sent": 0.20333507657051086, + "loss_sod": 0.029832664877176285, + "loss_total": 0.4434770941734314, + "step": 299499 + }, + { + "epoch": 0.022998, + "loss_gen": 5.658996105194092, + "loss_rtd": 0.20771342515945435, + "loss_sent": 0.07299751043319702, + "loss_sod": 0.12441278994083405, + "loss_total": 0.4051237106323242, + "step": 299499 + }, + { + "epoch": 0.023, + "grad_norm": 0.9803382158279419, + "learning_rate": 9.830953156655636e-06, + "loss": 0.4322, + "step": 299500 + }, + { + "epoch": 0.023198, + "loss_gen": 5.8414483070373535, + "loss_rtd": 0.21803100407123566, + "loss_sent": 0.14960582554340363, + "loss_sod": 0.03182917833328247, + "loss_total": 0.39946600794792175, + "step": 299599 + }, + { + "epoch": 0.023198, + "loss_gen": 5.983527183532715, + "loss_rtd": 0.21602879464626312, + "loss_sent": 0.2488606870174408, + "loss_sod": 0.06502971053123474, + "loss_total": 0.5299191474914551, + "step": 299599 + }, + { + "epoch": 0.0232, + "grad_norm": 1.2604292631149292, + "learning_rate": 9.812065189390756e-06, + "loss": 0.4158, + "step": 299600 + }, + { + "epoch": 0.023398, + "loss_gen": 5.726539134979248, + "loss_rtd": 0.1950404793024063, + "loss_sent": 0.12808853387832642, + "loss_sod": 0.031436990946531296, + "loss_total": 0.3545660078525543, + "step": 299699 + }, + { + "epoch": 0.023398, + "loss_gen": 5.22651481628418, + "loss_rtd": 0.1948186159133911, + "loss_sent": 2.413586298644077e-05, + "loss_sod": 0.15451985597610474, + "loss_total": 0.3493626117706299, + "step": 299699 + }, + { + "epoch": 0.0234, + "grad_norm": 0.8676385283470154, + "learning_rate": 9.79319340982121e-06, + "loss": 0.4277, + "step": 299700 + }, + { + "epoch": 0.023598, + "loss_gen": 5.286085605621338, + "loss_rtd": 0.1636226773262024, + "loss_sent": 0.06757794320583344, + "loss_sod": 0.019085204228758812, + "loss_total": 0.250285804271698, + "step": 299799 + }, + { + "epoch": 0.023598, + "loss_gen": 6.023898601531982, + "loss_rtd": 0.2052106410264969, + "loss_sent": 0.28852471709251404, + "loss_sod": 0.011790897697210312, + "loss_total": 0.5055262446403503, + "step": 299799 + }, + { + "epoch": 0.0236, + "grad_norm": 0.8971786499023438, + "learning_rate": 9.774337825548563e-06, + "loss": 0.4175, + "step": 299800 + }, + { + "epoch": 0.023798, + "loss_gen": 5.704233646392822, + "loss_rtd": 0.22727744281291962, + "loss_sent": 0.12884476780891418, + "loss_sod": 0.04108048230409622, + "loss_total": 0.3972027003765106, + "step": 299899 + }, + { + "epoch": 0.023798, + "loss_gen": 5.376214981079102, + "loss_rtd": 0.17768554389476776, + "loss_sent": 0.033320698887109756, + "loss_sod": 0.008416893891990185, + "loss_total": 0.21942313015460968, + "step": 299899 + }, + { + "epoch": 0.0238, + "grad_norm": 0.7222482562065125, + "learning_rate": 9.75549844416782e-06, + "loss": 0.4323, + "step": 299900 + }, + { + "epoch": 0.023998, + "loss_gen": 5.739912986755371, + "loss_rtd": 0.21299508213996887, + "loss_sent": 0.3322557210922241, + "loss_sod": 0.013067997992038727, + "loss_total": 0.5583187937736511, + "step": 299999 + }, + { + "epoch": 0.023998, + "loss_gen": 5.824176788330078, + "loss_rtd": 0.2032489776611328, + "loss_sent": 0.3767026364803314, + "loss_sod": 0.02217734232544899, + "loss_total": 0.6021289825439453, + "step": 299999 + }, + { + "epoch": 0.024, + "grad_norm": 2.0592379570007324, + "learning_rate": 9.736675273267487e-06, + "loss": 0.4224, + "step": 300000 + }, + { + "epoch": 0.024, + "eval_loss": 0.40290775895118713, + "eval_runtime": 151.5169, + "eval_samples_per_second": 101.923, + "eval_steps_per_second": 0.799, + "step": 300000 + }, + { + "epoch": 0.024198, + "loss_gen": 5.842031478881836, + "loss_rtd": 0.20937180519104004, + "loss_sent": 0.2385185956954956, + "loss_sod": 0.021150756627321243, + "loss_total": 0.4690411686897278, + "step": 300099 + }, + { + "epoch": 0.024198, + "loss_gen": 5.577695369720459, + "loss_rtd": 0.22668272256851196, + "loss_sent": 0.14812883734703064, + "loss_sod": 0.004138198681175709, + "loss_total": 0.37894976139068604, + "step": 300099 + }, + { + "epoch": 0.0242, + "grad_norm": 1.4563050270080566, + "learning_rate": 9.717868320429541e-06, + "loss": 0.4364, + "step": 300100 + }, + { + "epoch": 0.024398, + "loss_gen": 5.9344964027404785, + "loss_rtd": 0.19561925530433655, + "loss_sent": 0.045883551239967346, + "loss_sod": 0.09183825552463531, + "loss_total": 0.3333410620689392, + "step": 300199 + }, + { + "epoch": 0.024398, + "loss_gen": 5.7659478187561035, + "loss_rtd": 0.21479427814483643, + "loss_sent": 0.2246863692998886, + "loss_sod": 0.028010647743940353, + "loss_total": 0.4674912989139557, + "step": 300199 + }, + { + "epoch": 0.0244, + "grad_norm": 0.9620458483695984, + "learning_rate": 9.699077593229434e-06, + "loss": 0.4158, + "step": 300200 + }, + { + "epoch": 0.024598, + "loss_gen": 5.711204528808594, + "loss_rtd": 0.2187155783176422, + "loss_sent": 0.34310054779052734, + "loss_sod": 0.04240068420767784, + "loss_total": 0.6042168140411377, + "step": 300299 + }, + { + "epoch": 0.024598, + "loss_gen": 5.713837623596191, + "loss_rtd": 0.2238655835390091, + "loss_sent": 0.13969789445400238, + "loss_sod": 0.05623272806406021, + "loss_total": 0.4197962284088135, + "step": 300299 + }, + { + "epoch": 0.0246, + "grad_norm": 1.2001155614852905, + "learning_rate": 9.680303099236031e-06, + "loss": 0.4134, + "step": 300300 + }, + { + "epoch": 0.024798, + "loss_gen": 5.767083644866943, + "loss_rtd": 0.20476669073104858, + "loss_sent": 0.1843688040971756, + "loss_sod": 0.05684830993413925, + "loss_total": 0.4459838271141052, + "step": 300399 + }, + { + "epoch": 0.024798, + "loss_gen": 6.067483901977539, + "loss_rtd": 0.20688337087631226, + "loss_sent": 0.2843725383281708, + "loss_sod": 0.05607297271490097, + "loss_total": 0.5473288893699646, + "step": 300399 + }, + { + "epoch": 0.0248, + "grad_norm": 1.3059539794921875, + "learning_rate": 9.661544846011728e-06, + "loss": 0.4266, + "step": 300400 + }, + { + "epoch": 0.024998, + "loss_gen": 5.937251091003418, + "loss_rtd": 0.2108709067106247, + "loss_sent": 0.14311131834983826, + "loss_sod": 0.12280671298503876, + "loss_total": 0.4767889380455017, + "step": 300499 + }, + { + "epoch": 0.024998, + "loss_gen": 5.702668190002441, + "loss_rtd": 0.21346786618232727, + "loss_sent": 0.07040661573410034, + "loss_sod": 0.06024638190865517, + "loss_total": 0.3441208600997925, + "step": 300499 + }, + { + "epoch": 0.025, + "grad_norm": 1.0034197568893433, + "learning_rate": 9.642802841112347e-06, + "loss": 0.4303, + "step": 300500 + }, + { + "epoch": 0.025198, + "loss_gen": 5.861401081085205, + "loss_rtd": 0.18989452719688416, + "loss_sent": 0.19715692102909088, + "loss_sod": 0.03241632133722305, + "loss_total": 0.4194677472114563, + "step": 300599 + }, + { + "epoch": 0.025198, + "loss_gen": 5.596365928649902, + "loss_rtd": 0.21305887401103973, + "loss_sent": 0.06802504509687424, + "loss_sod": 0.02314567193388939, + "loss_total": 0.30422958731651306, + "step": 300599 + }, + { + "epoch": 0.0252, + "grad_norm": 0.781015157699585, + "learning_rate": 9.624077092087142e-06, + "loss": 0.4143, + "step": 300600 + }, + { + "epoch": 0.025398, + "loss_gen": 5.217522144317627, + "loss_rtd": 0.15774831175804138, + "loss_sent": 2.5797296984819695e-05, + "loss_sod": 0.02641209587454796, + "loss_total": 0.1841862052679062, + "step": 300699 + }, + { + "epoch": 0.025398, + "loss_gen": 5.510445594787598, + "loss_rtd": 0.18725579977035522, + "loss_sent": 0.07301981002092361, + "loss_sod": 0.1081763505935669, + "loss_total": 0.36845195293426514, + "step": 300699 + }, + { + "epoch": 0.0254, + "grad_norm": 0.9929467439651489, + "learning_rate": 9.605367606478854e-06, + "loss": 0.419, + "step": 300700 + }, + { + "epoch": 0.025598, + "loss_gen": 5.955038070678711, + "loss_rtd": 0.21208456158638, + "loss_sent": 0.22477084398269653, + "loss_sod": 0.036805808544158936, + "loss_total": 0.4736612141132355, + "step": 300799 + }, + { + "epoch": 0.025598, + "loss_gen": 6.133488178253174, + "loss_rtd": 0.21041129529476166, + "loss_sent": 0.4313626289367676, + "loss_sod": 0.024549473077058792, + "loss_total": 0.6663234233856201, + "step": 300799 + }, + { + "epoch": 0.0256, + "grad_norm": 1.38124680519104, + "learning_rate": 9.586674391823663e-06, + "loss": 0.4271, + "step": 300800 + }, + { + "epoch": 0.025798, + "loss_gen": 5.827110290527344, + "loss_rtd": 0.22941620647907257, + "loss_sent": 0.1237158551812172, + "loss_sod": 0.05527558550238609, + "loss_total": 0.40840762853622437, + "step": 300899 + }, + { + "epoch": 0.025798, + "loss_gen": 5.892513751983643, + "loss_rtd": 0.2249341607093811, + "loss_sent": 0.1240287646651268, + "loss_sod": 0.036415159702301025, + "loss_total": 0.3853780925273895, + "step": 300899 + }, + { + "epoch": 0.0258, + "grad_norm": 1.31509268283844, + "learning_rate": 9.567997455651212e-06, + "loss": 0.4322, + "step": 300900 + }, + { + "epoch": 0.025998, + "loss_gen": 5.883686542510986, + "loss_rtd": 0.21076661348342896, + "loss_sent": 0.20811495184898376, + "loss_sod": 0.03484820947051048, + "loss_total": 0.4537297785282135, + "step": 300999 + }, + { + "epoch": 0.025998, + "loss_gen": 6.016758918762207, + "loss_rtd": 0.24303993582725525, + "loss_sent": 0.12013135850429535, + "loss_sod": 0.006829577032476664, + "loss_total": 0.3700008690357208, + "step": 300999 + }, + { + "epoch": 0.026, + "grad_norm": 1.149437665939331, + "learning_rate": 9.549336805484531e-06, + "loss": 0.4247, + "step": 301000 + }, + { + "epoch": 0.026, + "eval_loss": 0.3994949460029602, + "eval_runtime": 151.57, + "eval_samples_per_second": 101.887, + "eval_steps_per_second": 0.798, + "step": 301000 + }, + { + "epoch": 0.026198, + "loss_gen": 5.778402328491211, + "loss_rtd": 0.20204311609268188, + "loss_sent": 0.19165849685668945, + "loss_sod": 0.01693936064839363, + "loss_total": 0.4106409549713135, + "step": 301099 + }, + { + "epoch": 0.026198, + "loss_gen": 5.913434982299805, + "loss_rtd": 0.21450239419937134, + "loss_sent": 0.33072736859321594, + "loss_sod": 0.04181693121790886, + "loss_total": 0.5870466828346252, + "step": 301099 + }, + { + "epoch": 0.0262, + "grad_norm": 1.1853196620941162, + "learning_rate": 9.53069244884015e-06, + "loss": 0.413, + "step": 301100 + }, + { + "epoch": 0.026398, + "loss_gen": 5.8812713623046875, + "loss_rtd": 0.2307126373052597, + "loss_sent": 0.2065943330526352, + "loss_sod": 0.022692713886499405, + "loss_total": 0.459999680519104, + "step": 301199 + }, + { + "epoch": 0.026398, + "loss_gen": 5.574290752410889, + "loss_rtd": 0.23668262362480164, + "loss_sent": 0.11356791108846664, + "loss_sod": 0.028580449521541595, + "loss_total": 0.37883099913597107, + "step": 301199 + }, + { + "epoch": 0.0264, + "grad_norm": 0.6476990580558777, + "learning_rate": 9.512064393228015e-06, + "loss": 0.4067, + "step": 301200 + }, + { + "epoch": 0.026598, + "loss_gen": 5.341989517211914, + "loss_rtd": 0.16177833080291748, + "loss_sent": 2.711415072553791e-05, + "loss_sod": 0.08934150636196136, + "loss_total": 0.25114697217941284, + "step": 301299 + }, + { + "epoch": 0.026598, + "loss_gen": 5.641251087188721, + "loss_rtd": 0.1875661462545395, + "loss_sent": 0.11462666094303131, + "loss_sod": 0.1264946311712265, + "loss_total": 0.4286874532699585, + "step": 301299 + }, + { + "epoch": 0.0266, + "grad_norm": 1.1820822954177856, + "learning_rate": 9.493452646151506e-06, + "loss": 0.4319, + "step": 301300 + }, + { + "epoch": 0.026798, + "loss_gen": 5.6729512214660645, + "loss_rtd": 0.20079460740089417, + "loss_sent": 0.0888160839676857, + "loss_sod": 0.04933574050664902, + "loss_total": 0.3389464318752289, + "step": 301399 + }, + { + "epoch": 0.026798, + "loss_gen": 5.719766139984131, + "loss_rtd": 0.21247261762619019, + "loss_sent": 0.14630696177482605, + "loss_sod": 0.009837578982114792, + "loss_total": 0.3686171770095825, + "step": 301399 + }, + { + "epoch": 0.0268, + "grad_norm": 1.0473250150680542, + "learning_rate": 9.474857215107419e-06, + "loss": 0.4369, + "step": 301400 + }, + { + "epoch": 0.026998, + "loss_gen": 5.690413475036621, + "loss_rtd": 0.19615985453128815, + "loss_sent": 0.07599996030330658, + "loss_sod": 0.1089341789484024, + "loss_total": 0.38109397888183594, + "step": 301499 + }, + { + "epoch": 0.026998, + "loss_gen": 5.081170082092285, + "loss_rtd": 0.16104573011398315, + "loss_sent": 0.0011152346851304173, + "loss_sod": 0.044768575578927994, + "loss_total": 0.2069295346736908, + "step": 301499 + }, + { + "epoch": 0.027, + "grad_norm": 1.1119778156280518, + "learning_rate": 9.456278107585998e-06, + "loss": 0.4403, + "step": 301500 + }, + { + "epoch": 0.027198, + "loss_gen": 5.589986801147461, + "loss_rtd": 0.1943880319595337, + "loss_sent": 0.004554521758109331, + "loss_sod": 0.1489199846982956, + "loss_total": 0.3478625416755676, + "step": 301599 + }, + { + "epoch": 0.027198, + "loss_gen": 5.281999111175537, + "loss_rtd": 0.17661824822425842, + "loss_sent": 0.005106969270855188, + "loss_sod": 0.039718326181173325, + "loss_total": 0.2214435487985611, + "step": 301599 + }, + { + "epoch": 0.0272, + "grad_norm": 0.9654894471168518, + "learning_rate": 9.437715331070907e-06, + "loss": 0.4415, + "step": 301600 + }, + { + "epoch": 0.027398, + "loss_gen": 5.765803813934326, + "loss_rtd": 0.22063113749027252, + "loss_sent": 0.11184588819742203, + "loss_sod": 0.004072606097906828, + "loss_total": 0.33654963970184326, + "step": 301699 + }, + { + "epoch": 0.027398, + "loss_gen": 6.429196357727051, + "loss_rtd": 0.22575436532497406, + "loss_sent": 0.0653010904788971, + "loss_sod": 0.07813893258571625, + "loss_total": 0.3691943883895874, + "step": 301699 + }, + { + "epoch": 0.0274, + "grad_norm": 0.9896982908248901, + "learning_rate": 9.419168893039242e-06, + "loss": 0.4175, + "step": 301700 + }, + { + "epoch": 0.027598, + "loss_gen": 5.694515228271484, + "loss_rtd": 0.2080574482679367, + "loss_sent": 0.06674934178590775, + "loss_sod": 0.08304727077484131, + "loss_total": 0.35785406827926636, + "step": 301799 + }, + { + "epoch": 0.027598, + "loss_gen": 6.014454364776611, + "loss_rtd": 0.19791358709335327, + "loss_sent": 0.18025633692741394, + "loss_sod": 0.06847569346427917, + "loss_total": 0.4466456174850464, + "step": 301799 + }, + { + "epoch": 0.0276, + "grad_norm": 0.8307927846908569, + "learning_rate": 9.400638800961487e-06, + "loss": 0.4191, + "step": 301800 + }, + { + "epoch": 0.027798, + "loss_gen": 5.827275276184082, + "loss_rtd": 0.2270171046257019, + "loss_sent": 0.39101526141166687, + "loss_sod": 0.03082764521241188, + "loss_total": 0.648859977722168, + "step": 301899 + }, + { + "epoch": 0.027798, + "loss_gen": 5.811234474182129, + "loss_rtd": 0.21190690994262695, + "loss_sent": 0.14939238131046295, + "loss_sod": 0.0018225417006760836, + "loss_total": 0.3631218373775482, + "step": 301899 + }, + { + "epoch": 0.0278, + "grad_norm": 1.2130917310714722, + "learning_rate": 9.382125062301562e-06, + "loss": 0.4287, + "step": 301900 + }, + { + "epoch": 0.027998, + "loss_gen": 6.002170562744141, + "loss_rtd": 0.22525110840797424, + "loss_sent": 0.256336510181427, + "loss_sod": 0.07571940869092941, + "loss_total": 0.5573070049285889, + "step": 301999 + }, + { + "epoch": 0.027998, + "loss_gen": 5.7604594230651855, + "loss_rtd": 0.21847416460514069, + "loss_sent": 0.1460372358560562, + "loss_sod": 0.06556612998247147, + "loss_total": 0.43007755279541016, + "step": 301999 + }, + { + "epoch": 0.028, + "grad_norm": 1.1580942869186401, + "learning_rate": 9.363627684516818e-06, + "loss": 0.4306, + "step": 302000 + }, + { + "epoch": 0.028, + "eval_loss": 0.4082627594470978, + "eval_runtime": 151.8601, + "eval_samples_per_second": 101.692, + "eval_steps_per_second": 0.797, + "step": 302000 + }, + { + "epoch": 0.028198, + "loss_gen": 5.524214744567871, + "loss_rtd": 0.24167396128177643, + "loss_sent": 0.16886498034000397, + "loss_sod": 0.024528495967388153, + "loss_total": 0.43506741523742676, + "step": 302099 + }, + { + "epoch": 0.028198, + "loss_gen": 6.628436088562012, + "loss_rtd": 0.206573948264122, + "loss_sent": 0.064475879073143, + "loss_sod": 0.3051353693008423, + "loss_total": 0.5761852264404297, + "step": 302099 + }, + { + "epoch": 0.0282, + "grad_norm": 1.5883597135543823, + "learning_rate": 9.34514667505797e-06, + "loss": 0.435, + "step": 302100 + }, + { + "epoch": 0.028398, + "loss_gen": 5.717016220092773, + "loss_rtd": 0.21950861811637878, + "loss_sent": 0.4334104359149933, + "loss_sod": 0.01246470957994461, + "loss_total": 0.6653837561607361, + "step": 302199 + }, + { + "epoch": 0.028398, + "loss_gen": 5.838071346282959, + "loss_rtd": 0.19945593178272247, + "loss_sent": 0.22899119555950165, + "loss_sod": 0.014160841703414917, + "loss_total": 0.44260796904563904, + "step": 302199 + }, + { + "epoch": 0.0284, + "grad_norm": 0.9990032315254211, + "learning_rate": 9.326682041369178e-06, + "loss": 0.4218, + "step": 302200 + }, + { + "epoch": 0.028598, + "loss_gen": 5.844183921813965, + "loss_rtd": 0.2198197990655899, + "loss_sent": 0.4132113754749298, + "loss_sod": 0.10050918161869049, + "loss_total": 0.7335403561592102, + "step": 302299 + }, + { + "epoch": 0.028598, + "loss_gen": 5.786034107208252, + "loss_rtd": 0.2089647352695465, + "loss_sent": 0.3188110589981079, + "loss_sod": 0.04004322737455368, + "loss_total": 0.5678189992904663, + "step": 302299 + }, + { + "epoch": 0.0286, + "grad_norm": 1.4748289585113525, + "learning_rate": 9.308233790887999e-06, + "loss": 0.4207, + "step": 302300 + }, + { + "epoch": 0.028798, + "loss_gen": 5.850669860839844, + "loss_rtd": 0.21573230624198914, + "loss_sent": 0.11724942922592163, + "loss_sod": 0.046550020575523376, + "loss_total": 0.37953174114227295, + "step": 302399 + }, + { + "epoch": 0.028798, + "loss_gen": 6.027736663818359, + "loss_rtd": 0.21444594860076904, + "loss_sent": 0.09546088427305222, + "loss_sod": 0.05568915605545044, + "loss_total": 0.3655959963798523, + "step": 302399 + }, + { + "epoch": 0.0288, + "grad_norm": 1.1638667583465576, + "learning_rate": 9.289801931045395e-06, + "loss": 0.4279, + "step": 302400 + }, + { + "epoch": 0.028998, + "loss_gen": 6.195638179779053, + "loss_rtd": 0.21306723356246948, + "loss_sent": 0.37989112734794617, + "loss_sod": 0.10237512737512589, + "loss_total": 0.6953334808349609, + "step": 302499 + }, + { + "epoch": 0.028998, + "loss_gen": 5.978787899017334, + "loss_rtd": 0.20936015248298645, + "loss_sent": 0.049700818955898285, + "loss_sod": 0.08687801659107208, + "loss_total": 0.3459390103816986, + "step": 302499 + }, + { + "epoch": 0.029, + "grad_norm": 1.337684154510498, + "learning_rate": 9.271386469265691e-06, + "loss": 0.4428, + "step": 302500 + }, + { + "epoch": 0.029198, + "loss_gen": 5.901731967926025, + "loss_rtd": 0.21487298607826233, + "loss_sent": 0.11970613151788712, + "loss_sod": 0.005755370482802391, + "loss_total": 0.3403344750404358, + "step": 302599 + }, + { + "epoch": 0.029198, + "loss_gen": 5.754985809326172, + "loss_rtd": 0.19078297913074493, + "loss_sent": 0.12921775877475739, + "loss_sod": 0.07635924220085144, + "loss_total": 0.39635998010635376, + "step": 302599 + }, + { + "epoch": 0.0292, + "grad_norm": 1.9090731143951416, + "learning_rate": 9.252987412966647e-06, + "loss": 0.4289, + "step": 302600 + }, + { + "epoch": 0.029398, + "loss_gen": 5.673803806304932, + "loss_rtd": 0.23954981565475464, + "loss_sent": 0.1688096523284912, + "loss_sod": 0.010437065735459328, + "loss_total": 0.4187965393066406, + "step": 302699 + }, + { + "epoch": 0.029398, + "loss_gen": 5.75961446762085, + "loss_rtd": 0.20999027788639069, + "loss_sent": 0.2323492169380188, + "loss_sod": 0.019814923405647278, + "loss_total": 0.46215441823005676, + "step": 302699 + }, + { + "epoch": 0.0294, + "grad_norm": 0.8130232691764832, + "learning_rate": 9.234604769559401e-06, + "loss": 0.4079, + "step": 302700 + }, + { + "epoch": 0.029598, + "loss_gen": 5.370748043060303, + "loss_rtd": 0.19760359823703766, + "loss_sent": 0.02775844745337963, + "loss_sod": 0.04738260433077812, + "loss_total": 0.27274465560913086, + "step": 302799 + }, + { + "epoch": 0.029598, + "loss_gen": 5.298427581787109, + "loss_rtd": 0.19612470269203186, + "loss_sent": 0.0008724250365048647, + "loss_sod": 0.12282057851552963, + "loss_total": 0.3198177218437195, + "step": 302799 + }, + { + "epoch": 0.0296, + "grad_norm": 1.1167999505996704, + "learning_rate": 9.216238546448492e-06, + "loss": 0.433, + "step": 302800 + }, + { + "epoch": 0.029798, + "loss_gen": 5.976594924926758, + "loss_rtd": 0.21692176163196564, + "loss_sent": 0.15063509345054626, + "loss_sod": 0.015588534064590931, + "loss_total": 0.38314539194107056, + "step": 302899 + }, + { + "epoch": 0.029798, + "loss_gen": 5.5621418952941895, + "loss_rtd": 0.2005169689655304, + "loss_sent": 0.3393249213695526, + "loss_sod": 0.02127825655043125, + "loss_total": 0.5611201524734497, + "step": 302899 + }, + { + "epoch": 0.0298, + "grad_norm": 1.424239993095398, + "learning_rate": 9.197888751031803e-06, + "loss": 0.4257, + "step": 302900 + }, + { + "epoch": 0.029998, + "loss_gen": 5.912893295288086, + "loss_rtd": 0.21181289851665497, + "loss_sent": 0.0844845324754715, + "loss_sod": 0.0034042359329760075, + "loss_total": 0.29970166087150574, + "step": 302999 + }, + { + "epoch": 0.029998, + "loss_gen": 5.859111309051514, + "loss_rtd": 0.20496448874473572, + "loss_sent": 0.03563971444964409, + "loss_sod": 0.17539413273334503, + "loss_total": 0.41599833965301514, + "step": 302999 + }, + { + "epoch": 0.03, + "grad_norm": 0.7764007449150085, + "learning_rate": 9.17955539070065e-06, + "loss": 0.4259, + "step": 303000 + }, + { + "epoch": 0.03, + "eval_loss": 0.4112508296966553, + "eval_runtime": 151.5863, + "eval_samples_per_second": 101.876, + "eval_steps_per_second": 0.798, + "step": 303000 } ], "logging_steps": 100, @@ -70210,7 +78208,7 @@ "attributes": {} } }, - "total_flos": 1.8999401716383744e+19, + "total_flos": 2.1164774706118656e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null