diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,103219 +2,1345 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.028, + "epoch": 0.034, "eval_steps": 1000, - "global_step": 400000, + "global_step": 17000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2e-06, - "grad_norm": 13.825847625732422, - "learning_rate": 9.118382907149165e-05, - "loss": 2.5112, + "grad_norm": 29.506126403808594, + "learning_rate": 0.0, + "loss": 1.5091, "step": 1 }, - { - "epoch": 0.000198, - "loss_gen": 7.338496208190918, - "loss_rtd": 0.3364008367061615, - "loss_sent": 0.6368644833564758, - "loss_sod": 0.48023903369903564, - "loss_total": 1.6714577674865723, - "step": 99 - }, - { - "epoch": 0.000198, - "loss_gen": 7.39376163482666, - "loss_rtd": 0.3249678909778595, - "loss_sent": 0.6036056876182556, - "loss_sod": 0.6825383901596069, - "loss_total": 1.8307068347930908, - "step": 99 - }, { "epoch": 0.0002, - "grad_norm": 2.552236795425415, - "learning_rate": 9.116600623227749e-05, - "loss": 1.6687, + "grad_norm": 7.35781717300415, + "learning_rate": 9.9e-07, + "loss": 1.6562, "step": 100 }, - { - "epoch": 0.000398, - "loss_gen": 7.468382358551025, - "loss_rtd": 0.34856757521629333, - "loss_sent": 0.24916481971740723, - "loss_sod": 0.5723617672920227, - "loss_total": 1.6159565448760986, - "step": 199 - }, - { - "epoch": 0.000398, - "loss_gen": 7.193928241729736, - "loss_rtd": 0.3576851189136505, - "loss_sent": 0.08575951308012009, - "loss_sod": 0.5566340684890747, - "loss_total": 1.4295562505722046, - "step": 199 - }, { "epoch": 0.0004, - "grad_norm": 4.255260467529297, - "learning_rate": 9.11479868656544e-05, - "loss": 1.6235, + "grad_norm": 4.9180989265441895, + "learning_rate": 1.99e-06, + "loss": 1.6176, "step": 200 }, - { - "epoch": 0.000598, - "loss_gen": 6.456632137298584, - "loss_rtd": 0.3662375509738922, - "loss_sent": 0.05986621230840683, - "loss_sod": 0.5555340647697449, - "loss_total": 1.5607976913452148, - "step": 299 - }, - { - "epoch": 0.000598, - "loss_gen": 6.634022235870361, - "loss_rtd": 0.3693581521511078, - "loss_sent": 0.4121516942977905, - "loss_sod": 0.30377814173698425, - "loss_total": 1.6803597211837769, - "step": 299 - }, { "epoch": 0.0006, - "grad_norm": 3.2695043087005615, - "learning_rate": 9.11299509246271e-05, - "loss": 1.5732, + "grad_norm": 1.8868086338043213, + "learning_rate": 2.99e-06, + "loss": 1.548, "step": 300 }, - { - "epoch": 0.000798, - "loss_gen": 6.009239196777344, - "loss_rtd": 0.3889811038970947, - "loss_sent": 0.05189042165875435, - "loss_sod": 0.22444681823253632, - "loss_total": 1.3846241235733032, - "step": 399 - }, - { - "epoch": 0.000798, - "loss_gen": 6.323911666870117, - "loss_rtd": 0.3717780113220215, - "loss_sent": 0.47785434126853943, - "loss_sod": 0.27205729484558105, - "loss_total": 1.878661870956421, - "step": 399 - }, { "epoch": 0.0008, - "grad_norm": 1.4569134712219238, - "learning_rate": 9.111189841646048e-05, - "loss": 1.6297, + "grad_norm": 7.365355491638184, + "learning_rate": 3.99e-06, + "loss": 1.4958, "step": 400 }, - { - "epoch": 0.000998, - "loss_gen": 5.7959980964660645, - "loss_rtd": 0.41233113408088684, - "loss_sent": 0.06202005594968796, - "loss_sod": 0.3987869620323181, - "loss_total": 1.7407991886138916, - "step": 499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.973730564117432, - "loss_rtd": 0.3940364122390747, - "loss_sent": 0.20922060310840607, - "loss_sod": 0.1169593557715416, - "loss_total": 1.6144838333129883, - "step": 499 - }, { "epoch": 0.001, - "grad_norm": 1.5367815494537354, - "learning_rate": 9.109382934842612e-05, - "loss": 1.731, + "grad_norm": 8.965476989746094, + "learning_rate": 4.9900000000000005e-06, + "loss": 1.4918, "step": 500 }, - { - "epoch": 0.001198, - "loss_gen": 5.828492164611816, - "loss_rtd": 0.39731845259666443, - "loss_sent": 0.15445075929164886, - "loss_sod": 0.08247369527816772, - "loss_total": 1.681622862815857, - "step": 599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.8320417404174805, - "loss_rtd": 0.40642160177230835, - "loss_sent": 0.28684261441230774, - "loss_sod": 0.1827951818704605, - "loss_total": 1.9240772724151611, - "step": 599 - }, { "epoch": 0.0012, - "grad_norm": 1.504315733909607, - "learning_rate": 9.10757437278022e-05, - "loss": 1.874, + "grad_norm": 2.2186834812164307, + "learning_rate": 5.99e-06, + "loss": 1.4807, "step": 600 }, - { - "epoch": 0.001398, - "loss_gen": 6.005682945251465, - "loss_rtd": 0.40744271874427795, - "loss_sent": 0.12264258414506912, - "loss_sod": 0.1687479019165039, - "loss_total": 1.9582250118255615, - "step": 699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.766935348510742, - "loss_rtd": 0.4119797646999359, - "loss_sent": 0.10859901458024979, - "loss_sod": 0.2178047150373459, - "loss_total": 1.9477099180221558, - "step": 699 - }, { "epoch": 0.0014, - "grad_norm": 1.2932411432266235, - "learning_rate": 9.105764156187362e-05, - "loss": 1.9748, + "grad_norm": 1.970430850982666, + "learning_rate": 6.990000000000001e-06, + "loss": 1.4312, "step": 700 }, - { - "epoch": 0.001598, - "loss_gen": 5.579442024230957, - "loss_rtd": 0.4019645154476166, - "loss_sent": 0.15888626873493195, - "loss_sod": 0.09037375450134277, - "loss_total": 1.9886168241500854, - "step": 799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.165345668792725, - "loss_rtd": 0.42980027198791504, - "loss_sent": 0.12720005214214325, - "loss_sod": 0.2558518648147583, - "loss_total": 2.05098557472229, - "step": 799 - }, { "epoch": 0.0016, - "grad_norm": 0.9107739329338074, - "learning_rate": 9.103952285793193e-05, - "loss": 2.1122, + "grad_norm": 1.5914119482040405, + "learning_rate": 7.99e-06, + "loss": 1.3848, "step": 800 }, - { - "epoch": 0.001798, - "loss_gen": 5.492117404937744, - "loss_rtd": 0.39895787835121155, - "loss_sent": 0.18793721497058868, - "loss_sod": 0.1065969318151474, - "loss_total": 2.174715995788574, - "step": 899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.479669570922852, - "loss_rtd": 0.39764881134033203, - "loss_sent": 0.1811528503894806, - "loss_sod": 0.1471906304359436, - "loss_total": 2.2038590908050537, - "step": 899 - }, { "epoch": 0.0018, - "grad_norm": 1.1011357307434082, - "learning_rate": 9.102138762327534e-05, - "loss": 2.2494, + "grad_norm": 1.7615679502487183, + "learning_rate": 8.99e-06, + "loss": 1.4126, "step": 900 }, - { - "epoch": 0.001998, - "loss_gen": 5.582163333892822, - "loss_rtd": 0.4162678122520447, - "loss_sent": 0.5247966051101685, - "loss_sod": 0.19601702690124512, - "loss_total": 2.810055732727051, - "step": 999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.055418491363525, - "loss_rtd": 0.4222857356071472, - "loss_sent": 0.23062478005886078, - "loss_sod": 0.21447616815567017, - "loss_total": 2.382495403289795, - "step": 999 - }, { "epoch": 0.002, - "grad_norm": 2.1947152614593506, - "learning_rate": 9.100323586520871e-05, - "loss": 2.3633, + "grad_norm": 1.5981565713882446, + "learning_rate": 9.990000000000001e-06, + "loss": 1.3768, "step": 1000 }, { "epoch": 0.002, - "eval_loss": 2.3820672035217285, - "eval_runtime": 152.1395, - "eval_samples_per_second": 101.506, - "eval_steps_per_second": 0.795, + "eval_loss": 1.1488478183746338, + "eval_runtime": 84.3931, + "eval_samples_per_second": 182.989, + "eval_steps_per_second": 2.868, "step": 1000 }, - { - "epoch": 0.002198, - "loss_gen": 5.590832233428955, - "loss_rtd": 0.44405338168144226, - "loss_sent": 0.17209087312221527, - "loss_sod": 0.13748066127300262, - "loss_total": 2.4303739070892334, - "step": 1099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.535982608795166, - "loss_rtd": 0.436458945274353, - "loss_sent": 0.4832025170326233, - "loss_sod": 0.1516181230545044, - "loss_total": 2.731578826904297, - "step": 1099 - }, { "epoch": 0.0022, - "grad_norm": 1.503093957901001, - "learning_rate": 9.09850675910436e-05, - "loss": 2.4442, + "grad_norm": 1.9463247060775757, + "learning_rate": 1.099e-05, + "loss": 1.4649, "step": 1100 }, - { - "epoch": 0.002398, - "loss_gen": 5.088871955871582, - "loss_rtd": 0.4277195334434509, - "loss_sent": 0.1527119278907776, - "loss_sod": 0.07266837358474731, - "loss_total": 2.177920341491699, - "step": 1199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.475919246673584, - "loss_rtd": 0.4493253827095032, - "loss_sent": 0.2745739817619324, - "loss_sod": 0.11778494715690613, - "loss_total": 2.4824793338775635, - "step": 1199 - }, { "epoch": 0.0024, - "grad_norm": 1.0774881839752197, - "learning_rate": 9.096688280809814e-05, - "loss": 2.3916, + "grad_norm": 1.997353434562683, + "learning_rate": 1.199e-05, + "loss": 1.422, "step": 1200 }, - { - "epoch": 0.002598, - "loss_gen": 5.362448215484619, - "loss_rtd": 0.43012556433677673, - "loss_sent": 0.19409601390361786, - "loss_sod": 0.058121513575315475, - "loss_total": 2.286700487136841, - "step": 1299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.386651515960693, - "loss_rtd": 0.4165177345275879, - "loss_sent": 0.30741289258003235, - "loss_sod": 0.15456417202949524, - "loss_total": 2.490093469619751, - "step": 1299 - }, { "epoch": 0.0026, - "grad_norm": 0.9983309507369995, - "learning_rate": 9.09486815236972e-05, - "loss": 2.3624, + "grad_norm": 2.028587818145752, + "learning_rate": 1.299e-05, + "loss": 1.4101, "step": 1300 }, - { - "epoch": 0.002798, - "loss_gen": 5.062756061553955, - "loss_rtd": 0.4479227364063263, - "loss_sent": 0.1385871171951294, - "loss_sod": 0.06484868377447128, - "loss_total": 2.1628317832946777, - "step": 1399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.175561904907227, - "loss_rtd": 0.4320078492164612, - "loss_sent": 0.23488734662532806, - "loss_sod": 0.12735715508460999, - "loss_total": 2.3394033908843994, - "step": 1399 - }, { "epoch": 0.0028, - "grad_norm": 1.7671256065368652, - "learning_rate": 9.093046374517224e-05, - "loss": 2.325, + "grad_norm": 1.8055784702301025, + "learning_rate": 1.399e-05, + "loss": 1.379, "step": 1400 }, - { - "epoch": 0.002998, - "loss_gen": 5.061497688293457, - "loss_rtd": 0.41403764486312866, - "loss_sent": 0.0569935068488121, - "loss_sod": 0.05051300302147865, - "loss_total": 2.0285050868988037, - "step": 1499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.155898571014404, - "loss_rtd": 0.42731085419654846, - "loss_sent": 0.583581268787384, - "loss_sod": 0.1655517816543579, - "loss_total": 2.7115108966827393, - "step": 1499 - }, { "epoch": 0.003, - "grad_norm": 1.6871683597564697, - "learning_rate": 9.091222947986137e-05, - "loss": 2.3133, + "grad_norm": 2.630389451980591, + "learning_rate": 1.499e-05, + "loss": 1.3915, "step": 1500 }, - { - "epoch": 0.003198, - "loss_gen": 5.307708263397217, - "loss_rtd": 0.4442152976989746, - "loss_sent": 0.12124097347259521, - "loss_sod": 0.15964221954345703, - "loss_total": 2.3000707626342773, - "step": 1599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.3126912117004395, - "loss_rtd": 0.42704111337661743, - "loss_sent": 0.34546709060668945, - "loss_sod": 0.13138636946678162, - "loss_total": 2.4803454875946045, - "step": 1599 - }, { "epoch": 0.0032, - "grad_norm": 1.4612807035446167, - "learning_rate": 9.089397873510937e-05, - "loss": 2.2816, + "grad_norm": 1.4471231698989868, + "learning_rate": 1.599e-05, + "loss": 1.3651, "step": 1600 }, - { - "epoch": 0.003398, - "loss_gen": 4.37318229675293, - "loss_rtd": 0.4542223811149597, - "loss_sent": 0.06731607019901276, - "loss_sod": 0.14954794943332672, - "loss_total": 1.963611125946045, - "step": 1699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.005198001861572, - "loss_rtd": 0.4181549549102783, - "loss_sent": 0.19707848131656647, - "loss_sod": 0.04773856699466705, - "loss_total": 2.1422934532165527, - "step": 1699 - }, { "epoch": 0.0034, - "grad_norm": 1.238786220550537, - "learning_rate": 9.087571151826762e-05, - "loss": 2.2543, + "grad_norm": 1.4115934371948242, + "learning_rate": 1.699e-05, + "loss": 1.3327, "step": 1700 }, - { - "epoch": 0.003598, - "loss_gen": 4.819798469543457, - "loss_rtd": 0.4338776171207428, - "loss_sent": 0.1732659637928009, - "loss_sod": 0.07842444628477097, - "loss_total": 2.1035704612731934, - "step": 1799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.023968696594238, - "loss_rtd": 0.46381649374961853, - "loss_sent": 0.0971527025103569, - "loss_sod": 0.27688997983932495, - "loss_total": 2.021725654602051, - "step": 1799 - }, { "epoch": 0.0036, - "grad_norm": 1.0480766296386719, - "learning_rate": 9.085742783669415e-05, - "loss": 2.2141, + "grad_norm": 1.1099858283996582, + "learning_rate": 1.7990000000000002e-05, + "loss": 1.304, "step": 1800 }, - { - "epoch": 0.003798, - "loss_gen": 5.054291725158691, - "loss_rtd": 0.4202824532985687, - "loss_sent": 0.09639989584684372, - "loss_sod": 0.0953429564833641, - "loss_total": 2.0912883281707764, - "step": 1899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.769758701324463, - "loss_rtd": 0.43368253111839294, - "loss_sent": 0.12541694939136505, - "loss_sod": 0.14418333768844604, - "loss_total": 2.0992703437805176, - "step": 1899 - }, { "epoch": 0.0038, - "grad_norm": 1.2859623432159424, - "learning_rate": 9.083912769775365e-05, - "loss": 2.1992, + "grad_norm": 1.5767651796340942, + "learning_rate": 1.8990000000000003e-05, + "loss": 1.3375, "step": 1900 }, - { - "epoch": 0.003998, - "loss_gen": 5.237246990203857, - "loss_rtd": 0.41872021555900574, - "loss_sent": 0.06527635455131531, - "loss_sod": 0.1806328445672989, - "loss_total": 2.188520669937134, - "step": 1999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.804346561431885, - "loss_rtd": 0.4427565336227417, - "loss_sent": 0.37472474575042725, - "loss_sod": 0.07424280792474747, - "loss_total": 2.2896533012390137, - "step": 1999 - }, { "epoch": 0.004, - "grad_norm": 1.116328477859497, - "learning_rate": 9.082081110881737e-05, - "loss": 2.1936, + "grad_norm": 1.3484268188476562, + "learning_rate": 1.999e-05, + "loss": 1.3746, "step": 2000 }, { "epoch": 0.004, - "eval_loss": 2.1267459392547607, - "eval_runtime": 151.0606, - "eval_samples_per_second": 102.23, - "eval_steps_per_second": 0.801, + "eval_loss": 1.1486531496047974, + "eval_runtime": 76.1223, + "eval_samples_per_second": 202.871, + "eval_steps_per_second": 3.179, "step": 2000 }, - { - "epoch": 0.004198, - "loss_gen": 4.939090728759766, - "loss_rtd": 0.406046062707901, - "loss_sent": 0.177232027053833, - "loss_sod": 0.07092248648405075, - "loss_total": 2.082077980041504, - "step": 2099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.136044025421143, - "loss_rtd": 0.4234750270843506, - "loss_sent": 0.49980977177619934, - "loss_sod": 0.10405763983726501, - "loss_total": 2.512158155441284, - "step": 2099 - }, { "epoch": 0.0042, - "grad_norm": 1.969171404838562, - "learning_rate": 9.080247807726327e-05, - "loss": 2.1834, + "grad_norm": 1.6412079334259033, + "learning_rate": 2.099e-05, + "loss": 1.3931, "step": 2100 }, - { - "epoch": 0.004398, - "loss_gen": 5.003271102905273, - "loss_rtd": 0.4190343916416168, - "loss_sent": 0.21502840518951416, - "loss_sod": 0.1662188321352005, - "loss_total": 2.2364859580993652, - "step": 2199 - }, - { - "epoch": 0.004398, - "loss_gen": 3.9248173236846924, - "loss_rtd": 0.46104612946510315, - "loss_sent": 0.043037645518779755, - "loss_sod": 0.11977896094322205, - "loss_total": 1.7504937648773193, - "step": 2199 - }, { "epoch": 0.0044, - "grad_norm": 0.8276557922363281, - "learning_rate": 9.07841286104759e-05, - "loss": 2.1231, + "grad_norm": 1.17317533493042, + "learning_rate": 2.199e-05, + "loss": 1.3512, "step": 2200 }, - { - "epoch": 0.004598, - "loss_gen": 3.965472459793091, - "loss_rtd": 0.467626690864563, - "loss_sent": 0.049993958324193954, - "loss_sod": 0.29348552227020264, - "loss_total": 1.9406394958496094, - "step": 2299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.1308698654174805, - "loss_rtd": 0.44145962595939636, - "loss_sent": 0.0027684078086167574, - "loss_sod": 0.40908950567245483, - "loss_total": 2.0299630165100098, - "step": 2299 - }, { "epoch": 0.0046, - "grad_norm": 2.2544002532958984, - "learning_rate": 9.076576271584638e-05, - "loss": 2.151, + "grad_norm": 0.8342074751853943, + "learning_rate": 2.2990000000000002e-05, + "loss": 1.3805, "step": 2300 }, - { - "epoch": 0.004798, - "loss_gen": 4.784628868103027, - "loss_rtd": 0.43977904319763184, - "loss_sent": 0.47826358675956726, - "loss_sod": 0.14754629135131836, - "loss_total": 2.4170873165130615, - "step": 2399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.764978885650635, - "loss_rtd": 0.44556763768196106, - "loss_sent": 0.28284481167793274, - "loss_sod": 0.039426274597644806, - "loss_total": 2.1137866973876953, - "step": 2399 - }, { "epoch": 0.0048, - "grad_norm": 3.047312021255493, - "learning_rate": 9.074738040077253e-05, - "loss": 2.1187, + "grad_norm": 1.5843234062194824, + "learning_rate": 2.3990000000000002e-05, + "loss": 1.377, "step": 2400 }, - { - "epoch": 0.004998, - "loss_gen": 4.629762172698975, - "loss_rtd": 0.4105084240436554, - "loss_sent": 0.4007233679294586, - "loss_sod": 0.04058845341205597, - "loss_total": 2.147829532623291, - "step": 2499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.5216569900512695, - "loss_rtd": 0.43388718366622925, - "loss_sent": 0.10170675814151764, - "loss_sod": 0.08774755895137787, - "loss_total": 1.8890888690948486, - "step": 2499 - }, { "epoch": 0.005, - "grad_norm": 1.293394684791565, - "learning_rate": 9.07289816726587e-05, - "loss": 2.1094, + "grad_norm": 1.915511131286621, + "learning_rate": 2.4990000000000003e-05, + "loss": 1.3659, "step": 2500 }, - { - "epoch": 0.005198, - "loss_gen": 4.740551948547363, - "loss_rtd": 0.4234953224658966, - "loss_sent": 0.2437671273946762, - "loss_sod": 0.13527914881706238, - "loss_total": 2.116788148880005, - "step": 2599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.820707321166992, - "loss_rtd": 0.4365331828594208, - "loss_sent": 0.18041883409023285, - "loss_sod": 0.07415901869535446, - "loss_total": 2.0275795459747314, - "step": 2599 - }, { "epoch": 0.0052, - "grad_norm": 1.3240649700164795, - "learning_rate": 9.071056653891595e-05, - "loss": 2.0402, + "grad_norm": 1.6507076025009155, + "learning_rate": 2.5990000000000004e-05, + "loss": 1.2875, "step": 2600 }, - { - "epoch": 0.005398, - "loss_gen": 4.781034469604492, - "loss_rtd": 0.426374226808548, - "loss_sent": 0.32308676838874817, - "loss_sod": 0.16155104339122772, - "loss_total": 2.222855806350708, - "step": 2699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.135765075683594, - "loss_rtd": 0.3981669843196869, - "loss_sent": 0.21877646446228027, - "loss_sod": 0.18226270377635956, - "loss_total": 2.2083826065063477, - "step": 2699 - }, { "epoch": 0.0054, - "grad_norm": 1.8904129266738892, - "learning_rate": 9.069213500696186e-05, - "loss": 2.0394, + "grad_norm": 1.5680265426635742, + "learning_rate": 2.6989999999999997e-05, + "loss": 1.3402, "step": 2700 }, - { - "epoch": 0.005598, - "loss_gen": 3.7506027221679688, - "loss_rtd": 0.4511221647262573, - "loss_sent": 0.028750889003276825, - "loss_sod": 0.32733532786369324, - "loss_total": 1.825059413909912, - "step": 2799 - }, - { - "epoch": 0.005598, - "loss_gen": 3.439668655395508, - "loss_rtd": 0.4633672833442688, - "loss_sent": 0.000510736252181232, - "loss_sod": 0.4404051601886749, - "loss_total": 1.8377517461776733, - "step": 2799 - }, { "epoch": 0.0056, - "grad_norm": 0.7309032678604126, - "learning_rate": 9.067368708422066e-05, - "loss": 2.0247, + "grad_norm": 0.8005309700965881, + "learning_rate": 2.7989999999999998e-05, + "loss": 1.3565, "step": 2800 }, - { - "epoch": 0.005798, - "loss_gen": 4.358363628387451, - "loss_rtd": 0.435396283864975, - "loss_sent": 0.1706308275461197, - "loss_sod": 0.02801288664340973, - "loss_total": 1.8031005859375, - "step": 2899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.692056179046631, - "loss_rtd": 0.4350062608718872, - "loss_sent": 0.135645791888237, - "loss_sod": 0.07625914365053177, - "loss_total": 1.9054793119430542, - "step": 2899 - }, { "epoch": 0.0058, - "grad_norm": 1.024571180343628, - "learning_rate": 9.065522277812314e-05, - "loss": 1.9753, + "grad_norm": 1.664014220237732, + "learning_rate": 2.8990000000000002e-05, + "loss": 1.3118, "step": 2900 }, - { - "epoch": 0.005998, - "loss_gen": 3.3283891677856445, - "loss_rtd": 0.4622822403907776, - "loss_sent": 0.042274124920368195, - "loss_sod": 0.08875666558742523, - "loss_total": 1.4751375913619995, - "step": 2999 - }, - { - "epoch": 0.005998, - "loss_gen": 3.772102117538452, - "loss_rtd": 0.471868634223938, - "loss_sent": 0.03946878761053085, - "loss_sod": 0.13368847966194153, - "loss_total": 1.6444077491760254, - "step": 2999 - }, { "epoch": 0.006, - "grad_norm": 0.7709165215492249, - "learning_rate": 9.063674209610678e-05, - "loss": 1.95, + "grad_norm": 1.1597651243209839, + "learning_rate": 2.9990000000000003e-05, + "loss": 1.3207, "step": 3000 }, { "epoch": 0.006, - "eval_loss": 1.9189565181732178, - "eval_runtime": 151.1801, - "eval_samples_per_second": 102.15, - "eval_steps_per_second": 0.8, + "eval_loss": 1.1344993114471436, + "eval_runtime": 76.5771, + "eval_samples_per_second": 201.666, + "eval_steps_per_second": 3.16, "step": 3000 }, - { - "epoch": 0.006198, - "loss_gen": 4.625843048095703, - "loss_rtd": 0.41452756524086, - "loss_sent": 0.42125964164733887, - "loss_sod": 0.04516763985157013, - "loss_total": 2.090644121170044, - "step": 3099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.561415672302246, - "loss_rtd": 0.4227144420146942, - "loss_sent": 0.24549300968647003, - "loss_sod": 0.09064850211143494, - "loss_total": 1.9516971111297607, - "step": 3099 - }, { "epoch": 0.0062, - "grad_norm": 1.147400975227356, - "learning_rate": 9.061824504561555e-05, - "loss": 1.9453, + "grad_norm": 1.6559661626815796, + "learning_rate": 3.099e-05, + "loss": 1.3103, "step": 3100 }, - { - "epoch": 0.006398, - "loss_gen": 5.0961737632751465, - "loss_rtd": 0.4299827516078949, - "loss_sent": 0.2821832001209259, - "loss_sod": 0.23717480897903442, - "loss_total": 2.263834238052368, - "step": 3199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.924869060516357, - "loss_rtd": 0.4275287985801697, - "loss_sent": 0.1261586993932724, - "loss_sod": 0.17284032702445984, - "loss_total": 1.996835470199585, - "step": 3199 - }, { "epoch": 0.0064, - "grad_norm": 1.0568593740463257, - "learning_rate": 9.059973163410007e-05, - "loss": 1.9416, + "grad_norm": 1.390712857246399, + "learning_rate": 3.1990000000000004e-05, + "loss": 1.3855, "step": 3200 }, - { - "epoch": 0.006598, - "loss_gen": 4.659168243408203, - "loss_rtd": 0.42212942242622375, - "loss_sent": 0.27326828241348267, - "loss_sod": 0.15088553726673126, - "loss_total": 2.030813455581665, - "step": 3299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.539935111999512, - "loss_rtd": 0.4461780786514282, - "loss_sent": 0.3518986105918884, - "loss_sod": 0.0318928137421608, - "loss_total": 1.9841861724853516, - "step": 3299 - }, { "epoch": 0.0066, - "grad_norm": 2.5101399421691895, - "learning_rate": 9.058120186901755e-05, - "loss": 1.8857, + "grad_norm": 1.9980418682098389, + "learning_rate": 3.299e-05, + "loss": 1.3109, "step": 3300 }, - { - "epoch": 0.006798, - "loss_gen": 4.367181777954102, - "loss_rtd": 0.4400678873062134, - "loss_sent": 0.1957220435142517, - "loss_sod": 0.11265156418085098, - "loss_total": 1.8420209884643555, - "step": 3399 - }, - { - "epoch": 0.006798, - "loss_gen": 4.619999408721924, - "loss_rtd": 0.4316225051879883, - "loss_sent": 0.1329963207244873, - "loss_sod": 0.11254145950078964, - "loss_total": 1.8340474367141724, - "step": 3399 - }, { "epoch": 0.0068, - "grad_norm": 1.8781721591949463, - "learning_rate": 9.056265575783176e-05, - "loss": 1.8745, + "grad_norm": 1.2899682521820068, + "learning_rate": 3.399e-05, + "loss": 1.3219, "step": 3400 }, - { - "epoch": 0.006998, - "loss_gen": 4.546133518218994, - "loss_rtd": 0.437592476606369, - "loss_sent": 0.38809189200401306, - "loss_sod": 0.14976780116558075, - "loss_total": 2.095884084701538, - "step": 3499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.594987869262695, - "loss_rtd": 0.43721577525138855, - "loss_sent": 0.3743051588535309, - "loss_sod": 0.022005265578627586, - "loss_total": 1.965998888015747, - "step": 3499 - }, { "epoch": 0.007, - "grad_norm": 3.105059862136841, - "learning_rate": 9.05440933080131e-05, - "loss": 1.855, + "grad_norm": 1.44901704788208, + "learning_rate": 3.499e-05, + "loss": 1.3089, "step": 3500 }, - { - "epoch": 0.007198, - "loss_gen": 5.049171447753906, - "loss_rtd": 0.43322035670280457, - "loss_sent": 0.34750890731811523, - "loss_sod": 0.07064300775527954, - "loss_total": 2.0752432346343994, - "step": 3599 - }, - { - "epoch": 0.007198, - "loss_gen": 3.8683502674102783, - "loss_rtd": 0.4488944113254547, - "loss_sent": 0.08359670639038086, - "loss_sod": 0.15620453655719757, - "loss_total": 1.6263469457626343, - "step": 3599 - }, { "epoch": 0.0072, - "grad_norm": 1.1705760955810547, - "learning_rate": 9.052551452703848e-05, - "loss": 1.8166, + "grad_norm": 1.3377976417541504, + "learning_rate": 3.599e-05, + "loss": 1.2995, "step": 3600 }, - { - "epoch": 0.007398, - "loss_gen": 4.654787063598633, - "loss_rtd": 0.4270414113998413, - "loss_sent": 0.2564600110054016, - "loss_sod": 0.2547892928123474, - "loss_total": 2.0471081733703613, - "step": 3699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.532984733581543, - "loss_rtd": 0.43204793334007263, - "loss_sent": 0.31941744685173035, - "loss_sod": 0.10170117765665054, - "loss_total": 1.9329694509506226, - "step": 3699 - }, { "epoch": 0.0074, - "grad_norm": 1.5379921197891235, - "learning_rate": 9.050691942239147e-05, - "loss": 1.8172, + "grad_norm": 1.5043129920959473, + "learning_rate": 3.699e-05, + "loss": 1.3421, "step": 3700 }, - { - "epoch": 0.007598, - "loss_gen": 4.633910655975342, - "loss_rtd": 0.4312874376773834, - "loss_sent": 0.10443449020385742, - "loss_sod": 0.10198817402124405, - "loss_total": 1.7216852903366089, - "step": 3799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.459100723266602, - "loss_rtd": 0.4371269941329956, - "loss_sent": 0.1382666677236557, - "loss_sod": 0.11707606166601181, - "loss_total": 1.7355529069900513, - "step": 3799 - }, { "epoch": 0.0076, - "grad_norm": 1.3472646474838257, - "learning_rate": 9.048830800156217e-05, - "loss": 1.7869, + "grad_norm": 1.4387165307998657, + "learning_rate": 3.799e-05, + "loss": 1.3337, "step": 3800 }, - { - "epoch": 0.007798, - "loss_gen": 4.800703525543213, - "loss_rtd": 0.43522247672080994, - "loss_sent": 0.1432768851518631, - "loss_sod": 0.1126156821846962, - "loss_total": 1.7930315732955933, - "step": 3899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.192720890045166, - "loss_rtd": 0.443738728761673, - "loss_sent": 0.12932538986206055, - "loss_sod": 0.21474014222621918, - "loss_total": 1.750169038772583, - "step": 3899 - }, { "epoch": 0.0078, - "grad_norm": 0.6541185975074768, - "learning_rate": 9.046968027204723e-05, - "loss": 1.7513, + "grad_norm": 1.1607294082641602, + "learning_rate": 3.8990000000000004e-05, + "loss": 1.2852, "step": 3900 }, - { - "epoch": 0.007998, - "loss_gen": 4.6910858154296875, - "loss_rtd": 0.42430779337882996, - "loss_sent": 0.18219135701656342, - "loss_sod": 0.18787550926208496, - "loss_total": 1.8500816822052002, - "step": 3999 - }, - { - "epoch": 0.007998, - "loss_gen": 3.837690830230713, - "loss_rtd": 0.45955485105514526, - "loss_sent": 0.011964034289121628, - "loss_sod": 0.5141125917434692, - "loss_total": 1.8492859601974487, - "step": 3999 - }, { "epoch": 0.008, - "grad_norm": 1.1112271547317505, - "learning_rate": 9.045103624134992e-05, - "loss": 1.7631, + "grad_norm": 1.0189259052276611, + "learning_rate": 3.999e-05, + "loss": 1.3277, "step": 4000 }, { "epoch": 0.008, - "eval_loss": 1.6880124807357788, - "eval_runtime": 151.9356, - "eval_samples_per_second": 101.642, - "eval_steps_per_second": 0.796, + "eval_loss": 1.1298929452896118, + "eval_runtime": 76.4952, + "eval_samples_per_second": 201.882, + "eval_steps_per_second": 3.164, "step": 4000 }, - { - "epoch": 0.008198, - "loss_gen": 4.3431878089904785, - "loss_rtd": 0.43319565057754517, - "loss_sent": 0.31508129835128784, - "loss_sod": 0.0758034810423851, - "loss_total": 1.7816097736358643, - "step": 4099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.724102020263672, - "loss_rtd": 0.43733254075050354, - "loss_sent": 0.36949220299720764, - "loss_sod": 0.044229187071323395, - "loss_total": 1.8925622701644897, - "step": 4099 - }, { "epoch": 0.0082, - "grad_norm": 2.3654143810272217, - "learning_rate": 9.043237591698004e-05, - "loss": 1.6969, + "grad_norm": 1.6229581832885742, + "learning_rate": 4.099e-05, + "loss": 1.2878, "step": 4100 }, - { - "epoch": 0.008398, - "loss_gen": 4.745401859283447, - "loss_rtd": 0.42455577850341797, - "loss_sent": 0.6431677937507629, - "loss_sod": 0.08375408500432968, - "loss_total": 2.1755483150482178, - "step": 4199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.193449020385742, - "loss_rtd": 0.44740211963653564, - "loss_sent": 0.2250525802373886, - "loss_sod": 0.06576678156852722, - "loss_total": 1.6431792974472046, - "step": 4199 - }, { "epoch": 0.0084, - "grad_norm": 2.3624160289764404, - "learning_rate": 9.0413699306454e-05, - "loss": 1.6817, + "grad_norm": 1.693702220916748, + "learning_rate": 4.199e-05, + "loss": 1.313, "step": 4200 }, - { - "epoch": 0.008598, - "loss_gen": 3.6266229152679443, - "loss_rtd": 0.4430789053440094, - "loss_sent": 0.02863028645515442, - "loss_sod": 0.125107541680336, - "loss_total": 1.3622456789016724, - "step": 4299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.528926849365234, - "loss_rtd": 0.43553048372268677, - "loss_sent": 0.1775393933057785, - "loss_sod": 0.057973627001047134, - "loss_total": 1.6269111633300781, - "step": 4299 - }, { "epoch": 0.0086, - "grad_norm": 0.9962396025657654, - "learning_rate": 9.03950064172947e-05, - "loss": 1.6548, + "grad_norm": 1.169730544090271, + "learning_rate": 4.299e-05, + "loss": 1.2915, "step": 4300 }, - { - "epoch": 0.008798, - "loss_gen": 4.4079437255859375, - "loss_rtd": 0.4349976181983948, - "loss_sent": 0.367504745721817, - "loss_sod": 0.18055737018585205, - "loss_total": 1.892151951789856, - "step": 4399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.170320987701416, - "loss_rtd": 0.46673741936683655, - "loss_sent": 0.29705068469047546, - "loss_sod": 0.09038496017456055, - "loss_total": 1.7142581939697266, - "step": 4399 - }, { "epoch": 0.0088, - "grad_norm": 1.3224109411239624, - "learning_rate": 9.037629725703166e-05, - "loss": 1.6651, + "grad_norm": 1.3561712503433228, + "learning_rate": 4.3990000000000004e-05, + "loss": 1.3337, "step": 4400 }, - { - "epoch": 0.008998, - "loss_gen": 4.52614164352417, - "loss_rtd": 0.4263684153556824, - "loss_sent": 0.19911253452301025, - "loss_sod": 0.07762130349874496, - "loss_total": 1.6144509315490723, - "step": 4499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.717104911804199, - "loss_rtd": 0.4144288897514343, - "loss_sent": 0.4404892921447754, - "loss_sod": 0.05830947682261467, - "loss_total": 1.8630272150039673, - "step": 4499 - }, { "epoch": 0.009, - "grad_norm": 1.3386483192443848, - "learning_rate": 9.035757183320088e-05, - "loss": 1.622, + "grad_norm": 1.4713114500045776, + "learning_rate": 4.499e-05, + "loss": 1.309, "step": 4500 }, - { - "epoch": 0.009198, - "loss_gen": 4.1291399002075195, - "loss_rtd": 0.44797226786613464, - "loss_sent": 0.13307587802410126, - "loss_sod": 0.14009913802146912, - "loss_total": 1.5321199893951416, - "step": 4599 - }, - { - "epoch": 0.009198, - "loss_gen": 3.3122775554656982, - "loss_rtd": 0.4629718065261841, - "loss_sent": 0.0329078771173954, - "loss_sod": 0.2527591586112976, - "loss_total": 1.3991779088974, - "step": 4599 - }, { "epoch": 0.0092, - "grad_norm": 0.636795699596405, - "learning_rate": 9.033883015334501e-05, - "loss": 1.6116, + "grad_norm": 1.0679044723510742, + "learning_rate": 4.599e-05, + "loss": 1.3464, "step": 4600 }, - { - "epoch": 0.009398, - "loss_gen": 4.3619160652160645, - "loss_rtd": 0.3999948799610138, - "loss_sent": 0.1576240509748459, - "loss_sod": 0.0319533571600914, - "loss_total": 1.4244252443313599, - "step": 4699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.300095081329346, - "loss_rtd": 0.45642659068107605, - "loss_sent": 0.17598015069961548, - "loss_sod": 0.09119284152984619, - "loss_total": 1.5466203689575195, - "step": 4699 - }, { "epoch": 0.0094, - "grad_norm": 0.5705883502960205, - "learning_rate": 9.032007222501318e-05, - "loss": 1.6013, + "grad_norm": 1.4595869779586792, + "learning_rate": 4.699e-05, + "loss": 1.3385, "step": 4700 }, - { - "epoch": 0.009598, - "loss_gen": 4.3331685066223145, - "loss_rtd": 0.4457979202270508, - "loss_sent": 0.5673869848251343, - "loss_sod": 0.05349266901612282, - "loss_total": 1.8741161823272705, - "step": 4799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.254533767700195, - "loss_rtd": 0.4539889991283417, - "loss_sent": 0.3765316903591156, - "loss_sod": 0.011228205636143684, - "loss_total": 1.6345348358154297, - "step": 4799 - }, { "epoch": 0.0096, - "grad_norm": 2.2074193954467773, - "learning_rate": 9.030129805576109e-05, - "loss": 1.5485, + "grad_norm": 1.6443949937820435, + "learning_rate": 4.799e-05, + "loss": 1.3287, "step": 4800 }, - { - "epoch": 0.009798, - "loss_gen": 4.519925594329834, - "loss_rtd": 0.42422616481781006, - "loss_sent": 0.1548745185136795, - "loss_sod": 0.11548137664794922, - "loss_total": 1.5137642621994019, - "step": 4899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.240198135375977, - "loss_rtd": 0.4464409649372101, - "loss_sent": 0.13806650042533875, - "loss_sod": 0.06082164868712425, - "loss_total": 1.4138140678405762, - "step": 4899 - }, { "epoch": 0.0098, - "grad_norm": 0.8736671209335327, - "learning_rate": 9.028250765315094e-05, - "loss": 1.5344, + "grad_norm": 1.3524634838104248, + "learning_rate": 4.8990000000000004e-05, + "loss": 1.3224, "step": 4900 }, - { - "epoch": 0.009998, - "loss_gen": 4.306075096130371, - "loss_rtd": 0.4342154860496521, - "loss_sent": 0.3236868381500244, - "loss_sod": 0.03643370792269707, - "loss_total": 1.5526306629180908, - "step": 4999 - }, - { - "epoch": 0.009998, - "loss_gen": 3.5708041191101074, - "loss_rtd": 0.45510080456733704, - "loss_sent": 0.13939236104488373, - "loss_sod": 0.19058340787887573, - "loss_total": 1.4138908386230469, - "step": 4999 - }, { "epoch": 0.01, - "grad_norm": 2.0682897567749023, - "learning_rate": 9.026370102475154e-05, - "loss": 1.5218, + "grad_norm": 1.552986979484558, + "learning_rate": 4.999e-05, + "loss": 1.3256, "step": 5000 }, { "epoch": 0.01, - "eval_loss": 1.4553194046020508, - "eval_runtime": 151.1516, - "eval_samples_per_second": 102.169, - "eval_steps_per_second": 0.801, + "eval_loss": 1.1314986944198608, + "eval_runtime": 76.3433, + "eval_samples_per_second": 202.284, + "eval_steps_per_second": 3.17, "step": 5000 }, - { - "epoch": 0.010198, - "loss_gen": 4.2357563972473145, - "loss_rtd": 0.4142667353153229, - "loss_sent": 0.18892182409763336, - "loss_sod": 0.07149676233530045, - "loss_total": 1.3986937999725342, - "step": 5099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.266040325164795, - "loss_rtd": 0.43612152338027954, - "loss_sent": 0.20986633002758026, - "loss_sod": 0.16188688576221466, - "loss_total": 1.5370596647262573, - "step": 5099 - }, { "epoch": 0.0102, - "grad_norm": 3.3480117321014404, - "learning_rate": 9.024487817813818e-05, - "loss": 1.502, + "grad_norm": 1.1126846075057983, + "learning_rate": 4.9999995065197964e-05, + "loss": 1.3184, "step": 5100 }, - { - "epoch": 0.010398, - "loss_gen": 3.4499049186706543, - "loss_rtd": 0.4549427926540375, - "loss_sent": 0.11068418622016907, - "loss_sod": 0.1226339265704155, - "loss_total": 1.260018229484558, - "step": 5199 - }, - { - "epoch": 0.010398, - "loss_gen": 2.777376890182495, - "loss_rtd": 0.4569548964500427, - "loss_sent": 0.0002673329727258533, - "loss_sod": 0.3624283969402313, - "loss_total": 1.2799490690231323, - "step": 5199 - }, { "epoch": 0.0104, - "grad_norm": 0.6756449937820435, - "learning_rate": 9.02260391208927e-05, - "loss": 1.4573, + "grad_norm": 0.8533400893211365, + "learning_rate": 4.999998006090441e-05, + "loss": 1.3145, "step": 5200 }, - { - "epoch": 0.010598, - "loss_gen": 4.136087417602539, - "loss_rtd": 0.4495868980884552, - "loss_sent": 0.09559452533721924, - "loss_sod": 0.09505974501371384, - "loss_total": 1.3041480779647827, - "step": 5299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.018187046051025, - "loss_rtd": 0.46124017238616943, - "loss_sent": 0.21007047593593597, - "loss_sod": 0.017557775601744652, - "loss_total": 1.3338505029678345, - "step": 5299 - }, { "epoch": 0.0106, - "grad_norm": 0.9290640950202942, - "learning_rate": 9.020718386060347e-05, - "loss": 1.4342, + "grad_norm": 1.6032077074050903, + "learning_rate": 4.9999954986621866e-05, + "loss": 1.2894, "step": 5300 }, - { - "epoch": 0.010798, - "loss_gen": 4.0267462730407715, - "loss_rtd": 0.4358631372451782, - "loss_sent": 0.24202901124954224, - "loss_sod": 0.007630678825080395, - "loss_total": 1.3108251094818115, - "step": 5399 - }, - { - "epoch": 0.010798, - "loss_gen": 3.931260347366333, - "loss_rtd": 0.45226356387138367, - "loss_sent": 0.051588136702775955, - "loss_sod": 0.2738020420074463, - "loss_total": 1.388128399848938, - "step": 5399 - }, { "epoch": 0.0108, - "grad_norm": 0.7409310340881348, - "learning_rate": 9.018831240486539e-05, - "loss": 1.4041, + "grad_norm": 1.2594430446624756, + "learning_rate": 4.999991984236044e-05, + "loss": 1.2515, "step": 5400 }, - { - "epoch": 0.010998, - "loss_gen": 4.556105613708496, - "loss_rtd": 0.44320401549339294, - "loss_sent": 0.22269946336746216, - "loss_sod": 0.029224302619695663, - "loss_total": 1.378782033920288, - "step": 5499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.53646183013916, - "loss_rtd": 0.4213176369667053, - "loss_sent": 0.14112795889377594, - "loss_sod": 0.24416619539260864, - "loss_total": 1.4873186349868774, - "step": 5499 - }, { "epoch": 0.011, - "grad_norm": 1.722217082977295, - "learning_rate": 9.016942476127988e-05, - "loss": 1.3937, + "grad_norm": 1.2169750928878784, + "learning_rate": 4.99998746281343e-05, + "loss": 1.2603, "step": 5500 }, - { - "epoch": 0.011198, - "loss_gen": 4.555620193481445, - "loss_rtd": 0.45380261540412903, - "loss_sent": 0.4497606158256531, - "loss_sod": 0.06671988219022751, - "loss_total": 1.6300160884857178, - "step": 5599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.417397975921631, - "loss_rtd": 0.4402684271335602, - "loss_sent": 0.6401938199996948, - "loss_sod": 0.12325134873390198, - "loss_total": 1.843429684638977, - "step": 5599 - }, { "epoch": 0.0112, - "grad_norm": 2.376171588897705, - "learning_rate": 9.015052093745488e-05, - "loss": 1.382, + "grad_norm": 1.2038013935089111, + "learning_rate": 4.999981934396165e-05, + "loss": 1.3063, "step": 5600 }, - { - "epoch": 0.011398, - "loss_gen": 3.161689519882202, - "loss_rtd": 0.45311564207077026, - "loss_sent": 0.030058998614549637, - "loss_sod": 0.1347379982471466, - "loss_total": 1.059248924255371, - "step": 5699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.352455139160156, - "loss_rtd": 0.42320743203163147, - "loss_sent": 0.2760780155658722, - "loss_sod": 0.05662854015827179, - "loss_total": 1.363467812538147, - "step": 5699 - }, { "epoch": 0.0114, - "grad_norm": 1.3683046102523804, - "learning_rate": 9.013160094100485e-05, - "loss": 1.3398, + "grad_norm": 1.1477010250091553, + "learning_rate": 4.999975398986476e-05, + "loss": 1.3057, "step": 5700 }, - { - "epoch": 0.011598, - "loss_gen": 2.8516530990600586, - "loss_rtd": 0.46841028332710266, - "loss_sent": 0.00027514316025190055, - "loss_sod": 0.3374539613723755, - "loss_total": 1.1893240213394165, - "step": 5799 - }, - { - "epoch": 0.011598, - "loss_gen": 3.1871232986450195, - "loss_rtd": 0.473550945520401, - "loss_sent": 0.05105242878198624, - "loss_sod": 0.2973681092262268, - "loss_total": 1.2502342462539673, - "step": 5799 - }, { "epoch": 0.0116, - "grad_norm": 0.8012044429779053, - "learning_rate": 9.011266477955076e-05, - "loss": 1.3203, + "grad_norm": 0.6725754141807556, + "learning_rate": 4.9999678565869944e-05, + "loss": 1.3211, "step": 5800 }, - { - "epoch": 0.011798, - "loss_gen": 4.541375160217285, - "loss_rtd": 0.43058615922927856, - "loss_sent": 0.12835851311683655, - "loss_sod": 0.0999765694141388, - "loss_total": 1.2455573081970215, - "step": 5899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.475722312927246, - "loss_rtd": 0.4540126919746399, - "loss_sent": 0.04481251910328865, - "loss_sod": 0.10295294225215912, - "loss_total": 1.1799335479736328, - "step": 5899 - }, { "epoch": 0.0118, - "grad_norm": 0.9984089136123657, - "learning_rate": 9.00937124607201e-05, + "grad_norm": 1.5470402240753174, + "learning_rate": 4.99995930720076e-05, "loss": 1.2794, "step": 5900 }, - { - "epoch": 0.011998, - "loss_gen": 4.149008750915527, - "loss_rtd": 0.44907456636428833, - "loss_sent": 0.1539432555437088, - "loss_sod": 0.1032748594880104, - "loss_total": 1.220787763595581, - "step": 5999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.26630163192749, - "loss_rtd": 0.43877696990966797, - "loss_sent": 0.11144015192985535, - "loss_sod": 0.14063358306884766, - "loss_total": 1.2198905944824219, - "step": 5999 - }, { "epoch": 0.012, - "grad_norm": 1.0076442956924438, - "learning_rate": 9.007474399214685e-05, - "loss": 1.2577, + "grad_norm": 1.8079277276992798, + "learning_rate": 4.999949750831215e-05, + "loss": 1.2736, "step": 6000 }, { "epoch": 0.012, - "eval_loss": 1.2371487617492676, - "eval_runtime": 151.5278, - "eval_samples_per_second": 101.915, - "eval_steps_per_second": 0.799, + "eval_loss": 1.1335862874984741, + "eval_runtime": 76.3508, + "eval_samples_per_second": 202.264, + "eval_steps_per_second": 3.17, "step": 6000 }, - { - "epoch": 0.012198, - "loss_gen": 4.438620090484619, - "loss_rtd": 0.44645529985427856, - "loss_sent": 0.31584903597831726, - "loss_sod": 0.038453131914138794, - "loss_total": 1.3283517360687256, - "step": 6099 - }, - { - "epoch": 0.012198, - "loss_gen": 3.957967758178711, - "loss_rtd": 0.4457968771457672, - "loss_sent": 0.2645883560180664, - "loss_sod": 0.07057566940784454, - "loss_total": 1.251422643661499, - "step": 6099 - }, { "epoch": 0.0122, - "grad_norm": 3.596428155899048, - "learning_rate": 9.005575938147153e-05, - "loss": 1.2417, + "grad_norm": 1.4117431640625, + "learning_rate": 4.99993918748221e-05, + "loss": 1.3142, "step": 6100 }, - { - "epoch": 0.012398, - "loss_gen": 4.318472862243652, - "loss_rtd": 0.4407297372817993, - "loss_sent": 0.1350451558828354, - "loss_sod": 0.1250823438167572, - "loss_total": 1.192137598991394, - "step": 6199 - }, - { - "epoch": 0.012398, - "loss_gen": 3.8575875759124756, - "loss_rtd": 0.4555145800113678, - "loss_sent": 0.1910392791032791, - "loss_sod": 0.038792889565229416, - "loss_total": 1.1241956949234009, - "step": 6199 - }, { "epoch": 0.0124, - "grad_norm": 1.5775076150894165, - "learning_rate": 9.003675863634109e-05, - "loss": 1.2279, + "grad_norm": 1.2657192945480347, + "learning_rate": 4.999927617157998e-05, + "loss": 1.3216, "step": 6200 }, - { - "epoch": 0.012598, - "loss_gen": 4.14998722076416, - "loss_rtd": 0.4265194833278656, - "loss_sent": 0.30052652955055237, - "loss_sod": 0.07797949016094208, - "loss_total": 1.2561488151550293, - "step": 6299 - }, - { - "epoch": 0.012598, - "loss_gen": 2.7819416522979736, - "loss_rtd": 0.4592662751674652, - "loss_sent": 0.06214335188269615, - "loss_sod": 0.27429041266441345, - "loss_total": 1.0981101989746094, - "step": 6299 - }, { "epoch": 0.0126, - "grad_norm": 1.3763465881347656, - "learning_rate": 9.001774176440907e-05, - "loss": 1.2032, + "grad_norm": 1.0358809232711792, + "learning_rate": 4.9999150398632425e-05, + "loss": 1.329, "step": 6300 }, - { - "epoch": 0.012798, - "loss_gen": 4.117013931274414, - "loss_rtd": 0.4338371753692627, - "loss_sent": 0.20068877935409546, - "loss_sod": 0.014359413646161556, - "loss_total": 1.0758084058761597, - "step": 6399 - }, - { - "epoch": 0.012798, - "loss_gen": 3.95107364654541, - "loss_rtd": 0.43674370646476746, - "loss_sent": 0.11374907940626144, - "loss_sod": 0.005498822778463364, - "loss_total": 0.9657070636749268, - "step": 6399 - }, { "epoch": 0.0128, - "grad_norm": 1.0737333297729492, - "learning_rate": 8.999870877333546e-05, - "loss": 1.2041, + "grad_norm": 1.6824450492858887, + "learning_rate": 4.999901455603007e-05, + "loss": 1.2911, "step": 6400 }, - { - "epoch": 0.012998, - "loss_gen": 4.177030563354492, - "loss_rtd": 0.4391840994358063, - "loss_sent": 0.13953599333763123, - "loss_sod": 0.06702539324760437, - "loss_total": 1.058211326599121, - "step": 6499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.0403361320495605, - "loss_rtd": 0.4460426867008209, - "loss_sent": 0.4360485076904297, - "loss_sod": 0.0397292897105217, - "loss_total": 1.3207882642745972, - "step": 6499 - }, { "epoch": 0.013, - "grad_norm": 2.090071201324463, - "learning_rate": 8.997965967078675e-05, - "loss": 1.1728, + "grad_norm": 1.5632168054580688, + "learning_rate": 4.9998868643827635e-05, + "loss": 1.3004, "step": 6500 }, - { - "epoch": 0.013198, - "loss_gen": 4.261964797973633, - "loss_rtd": 0.45017364621162415, - "loss_sent": 0.680860161781311, - "loss_sod": 0.02003784291446209, - "loss_total": 1.5510892868041992, - "step": 6599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.514923095703125, - "loss_rtd": 0.4445608854293823, - "loss_sent": 0.15158210694789886, - "loss_sod": 0.28943219780921936, - "loss_total": 1.3093348741531372, - "step": 6599 - }, { "epoch": 0.0132, - "grad_norm": 2.357900857925415, - "learning_rate": 8.996059446443587e-05, - "loss": 1.1415, + "grad_norm": 1.254310131072998, + "learning_rate": 4.99987126620839e-05, + "loss": 1.2981, "step": 6600 }, - { - "epoch": 0.013398, - "loss_gen": 4.206921100616455, - "loss_rtd": 0.45165202021598816, - "loss_sent": 0.37640759348869324, - "loss_sod": 0.11158914864063263, - "loss_total": 1.3142218589782715, - "step": 6699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.138385772705078, - "loss_rtd": 0.4333704113960266, - "loss_sent": 0.19938777387142181, - "loss_sod": 0.07604871690273285, - "loss_total": 1.0772777795791626, - "step": 6699 - }, { "epoch": 0.0134, - "grad_norm": 2.0228612422943115, - "learning_rate": 8.994151316196236e-05, - "loss": 1.1384, + "grad_norm": 1.4540060758590698, + "learning_rate": 4.999854661086171e-05, + "loss": 1.3184, "step": 6700 }, - { - "epoch": 0.013598, - "loss_gen": 4.004166603088379, - "loss_rtd": 0.4367596209049225, - "loss_sent": 0.11661457270383835, - "loss_sod": 0.051644932478666306, - "loss_total": 0.9425358772277832, - "step": 6799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.20042610168457, - "loss_rtd": 0.4377318024635315, - "loss_sent": 0.2091219127178192, - "loss_sod": 0.10864714533090591, - "loss_total": 1.1095606088638306, - "step": 6799 - }, { "epoch": 0.0136, - "grad_norm": 1.7526026964187622, - "learning_rate": 8.992241577105209e-05, - "loss": 1.1267, + "grad_norm": 1.3684179782867432, + "learning_rate": 4.999837049022792e-05, + "loss": 1.2914, "step": 6800 }, - { - "epoch": 0.013798, - "loss_gen": 4.2462592124938965, - "loss_rtd": 0.46197614073753357, - "loss_sent": 0.19782444834709167, - "loss_sod": 0.12475350499153137, - "loss_total": 1.1226646900177002, - "step": 6899 - }, - { - "epoch": 0.013798, - "loss_gen": 3.794999122619629, - "loss_rtd": 0.46073493361473083, - "loss_sent": 0.2632262110710144, - "loss_sod": 0.19318082928657532, - "loss_total": 1.2193207740783691, - "step": 6899 - }, { "epoch": 0.0138, - "grad_norm": 3.328732490539551, - "learning_rate": 8.990330229939755e-05, - "loss": 1.0854, + "grad_norm": 1.474075436592102, + "learning_rate": 4.999818430025349e-05, + "loss": 1.2702, "step": 6900 }, - { - "epoch": 0.013998, - "loss_gen": 4.278293132781982, - "loss_rtd": 0.451276570558548, - "loss_sent": 0.3585604429244995, - "loss_sod": 0.027789060026407242, - "loss_total": 1.1586921215057373, - "step": 6999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.299320697784424, - "loss_rtd": 0.4333668351173401, - "loss_sent": 0.32892492413520813, - "loss_sod": 0.11924998462200165, - "loss_total": 1.2041857242584229, - "step": 6999 - }, { "epoch": 0.014, - "grad_norm": 1.338874101638794, - "learning_rate": 8.988417275469759e-05, - "loss": 1.061, + "grad_norm": 1.3687875270843506, + "learning_rate": 4.999798804101341e-05, + "loss": 1.2388, "step": 7000 }, { "epoch": 0.014, - "eval_loss": 1.0338186025619507, - "eval_runtime": 151.247, - "eval_samples_per_second": 102.104, - "eval_steps_per_second": 0.8, + "eval_loss": 1.1258224248886108, + "eval_runtime": 76.3516, + "eval_samples_per_second": 202.262, + "eval_steps_per_second": 3.17, "step": 7000 }, - { - "epoch": 0.014198, - "loss_gen": 2.923762083053589, - "loss_rtd": 0.4590458869934082, - "loss_sent": 0.00043936684960499406, - "loss_sod": 0.4023163318634033, - "loss_total": 1.0680921077728271, - "step": 7099 - }, - { - "epoch": 0.014198, - "loss_gen": 2.4773359298706055, - "loss_rtd": 0.4419349431991577, - "loss_sent": 0.0001352017861790955, - "loss_sod": 0.2943947911262512, - "loss_total": 0.9112571477890015, - "step": 7099 - }, { "epoch": 0.0142, - "grad_norm": 1.9851511716842651, - "learning_rate": 8.986502714465762e-05, - "loss": 1.0493, + "grad_norm": 0.6668384075164795, + "learning_rate": 4.999778171258675e-05, + "loss": 1.2768, "step": 7100 }, - { - "epoch": 0.014398, - "loss_gen": 4.283316612243652, - "loss_rtd": 0.4622212052345276, - "loss_sent": 0.07737211883068085, - "loss_sod": 0.11309823393821716, - "loss_total": 0.9360949397087097, - "step": 7199 - }, - { - "epoch": 0.014398, - "loss_gen": 3.8199846744537354, - "loss_rtd": 0.4555726945400238, - "loss_sent": 0.07644452154636383, - "loss_sod": 0.06696701049804688, - "loss_total": 0.8517314791679382, - "step": 7199 - }, { "epoch": 0.0144, - "grad_norm": 0.9992061853408813, - "learning_rate": 8.98458654769895e-05, - "loss": 1.0395, + "grad_norm": 1.1303478479385376, + "learning_rate": 4.9997565315056596e-05, + "loss": 1.2639, "step": 7200 }, - { - "epoch": 0.014598, - "loss_gen": 4.160510540008545, - "loss_rtd": 0.46040648221969604, - "loss_sent": 0.23881760239601135, - "loss_sod": 0.019337791949510574, - "loss_total": 0.9759917259216309, - "step": 7299 - }, - { - "epoch": 0.014598, - "loss_gen": 4.084108829498291, - "loss_rtd": 0.43639713525772095, - "loss_sent": 0.39973214268684387, - "loss_sod": 0.009163892827928066, - "loss_total": 1.097995638847351, - "step": 7299 - }, { "epoch": 0.0146, - "grad_norm": 1.4760040044784546, - "learning_rate": 8.98266877594115e-05, - "loss": 1.0213, + "grad_norm": 1.516221046447754, + "learning_rate": 4.999733884851012e-05, + "loss": 1.2805, "step": 7300 }, - { - "epoch": 0.014798, - "loss_gen": 4.018357276916504, - "loss_rtd": 0.4384211301803589, - "loss_sent": 0.30337029695510864, - "loss_sod": 0.1135876476764679, - "loss_total": 1.0872063636779785, - "step": 7399 - }, - { - "epoch": 0.014798, - "loss_gen": 3.9204838275909424, - "loss_rtd": 0.43890058994293213, - "loss_sent": 0.34354591369628906, - "loss_sod": 0.05680248886346817, - "loss_total": 1.0654296875, - "step": 7399 - }, { "epoch": 0.0148, - "grad_norm": 1.4493780136108398, - "learning_rate": 8.980749399964847e-05, - "loss": 0.9879, + "grad_norm": 1.3124428987503052, + "learning_rate": 4.9997102313038544e-05, + "loss": 1.2811, "step": 7400 }, - { - "epoch": 0.014998, - "loss_gen": 4.476629257202148, - "loss_rtd": 0.43758487701416016, - "loss_sent": 0.2716887295246124, - "loss_sod": 0.23893794417381287, - "loss_total": 1.1882572174072266, - "step": 7499 - }, - { - "epoch": 0.014998, - "loss_gen": 3.8585877418518066, - "loss_rtd": 0.45572131872177124, - "loss_sent": 0.3436616063117981, - "loss_sod": 0.14704178273677826, - "loss_total": 1.153329849243164, - "step": 7499 - }, { "epoch": 0.015, - "grad_norm": 1.700055718421936, - "learning_rate": 8.97882842054316e-05, - "loss": 0.9677, + "grad_norm": 1.390687346458435, + "learning_rate": 4.999685570873715e-05, + "loss": 1.2481, "step": 7500 }, - { - "epoch": 0.015198, - "loss_gen": 4.266899585723877, - "loss_rtd": 0.4483409821987152, - "loss_sent": 0.18687587976455688, - "loss_sod": 0.04890352860093117, - "loss_total": 0.8960543870925903, - "step": 7599 - }, - { - "epoch": 0.015198, - "loss_gen": 2.754318952560425, - "loss_rtd": 0.44348207116127014, - "loss_sent": 0.022434303537011147, - "loss_sod": 0.42374366521835327, - "loss_total": 1.0264652967453003, - "step": 7599 - }, { "epoch": 0.0152, - "grad_norm": 1.109738826751709, - "learning_rate": 8.976905838449861e-05, - "loss": 0.9591, + "grad_norm": 0.8783305883407593, + "learning_rate": 4.999659903570526e-05, + "loss": 1.2986, "step": 7600 }, - { - "epoch": 0.015398, - "loss_gen": 3.008835554122925, - "loss_rtd": 0.47243383526802063, - "loss_sent": 0.04867243766784668, - "loss_sod": 0.1572308987379074, - "loss_total": 0.81625896692276, - "step": 7699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.257413864135742, - "loss_rtd": 0.42728057503700256, - "loss_sent": 0.1736775040626526, - "loss_sod": 0.015286522917449474, - "loss_total": 0.8113998174667358, - "step": 7699 - }, { "epoch": 0.0154, - "grad_norm": 1.0539206266403198, - "learning_rate": 8.974981654459366e-05, - "loss": 0.9468, + "grad_norm": 1.0741727352142334, + "learning_rate": 4.999633229404628e-05, + "loss": 1.2784, "step": 7700 }, - { - "epoch": 0.015598, - "loss_gen": 3.177402973175049, - "loss_rtd": 0.45968639850616455, - "loss_sent": 0.009485319256782532, - "loss_sod": 0.3038681149482727, - "loss_total": 0.9069209694862366, - "step": 7799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.26717472076416, - "loss_rtd": 0.4419127404689789, - "loss_sent": 0.07726690173149109, - "loss_sod": 0.03336562216281891, - "loss_total": 0.732344388961792, - "step": 7799 - }, { "epoch": 0.0156, - "grad_norm": 0.9270355701446533, - "learning_rate": 8.973055869346735e-05, - "loss": 0.9286, + "grad_norm": 1.022088885307312, + "learning_rate": 4.999605548386763e-05, + "loss": 1.2869, "step": 7800 }, - { - "epoch": 0.015798, - "loss_gen": 4.384439468383789, - "loss_rtd": 0.44737496972084045, - "loss_sent": 0.1470516175031662, - "loss_sod": 0.1933356523513794, - "loss_total": 0.9568408131599426, - "step": 7899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.084644317626953, - "loss_rtd": 0.4502268433570862, - "loss_sent": 0.14218765497207642, - "loss_sod": 0.06409710645675659, - "loss_total": 0.8140290379524231, - "step": 7899 - }, { "epoch": 0.0158, - "grad_norm": 1.05494225025177, - "learning_rate": 8.971128483887676e-05, - "loss": 0.9236, + "grad_norm": 1.0997594594955444, + "learning_rate": 4.9995768605280826e-05, + "loss": 1.2736, "step": 7900 }, - { - "epoch": 0.015998, - "loss_gen": 4.135157585144043, - "loss_rtd": 0.43412676453590393, - "loss_sent": 0.28543326258659363, - "loss_sod": 0.09229005873203278, - "loss_total": 0.9571057558059692, - "step": 7999 - }, - { - "epoch": 0.015998, - "loss_gen": 3.857150077819824, - "loss_rtd": 0.442083477973938, - "loss_sent": 0.2047467678785324, - "loss_sod": 0.14205977320671082, - "loss_total": 0.9243801236152649, - "step": 7999 - }, { "epoch": 0.016, - "grad_norm": 0.962242841720581, - "learning_rate": 8.96919949885854e-05, - "loss": 0.901, + "grad_norm": 1.191188931465149, + "learning_rate": 4.9995471658401414e-05, + "loss": 1.256, "step": 8000 }, { "epoch": 0.016, - "eval_loss": 0.8876739740371704, - "eval_runtime": 152.9353, - "eval_samples_per_second": 100.977, - "eval_steps_per_second": 0.791, + "eval_loss": 1.1234357357025146, + "eval_runtime": 76.115, + "eval_samples_per_second": 202.89, + "eval_steps_per_second": 3.179, "step": 8000 }, - { - "epoch": 0.016198, - "loss_gen": 2.9248859882354736, - "loss_rtd": 0.4561987817287445, - "loss_sent": 0.00010435195144964382, - "loss_sod": 0.36148083209991455, - "loss_total": 0.9108849763870239, - "step": 8099 - }, - { - "epoch": 0.016198, - "loss_gen": 2.6596930027008057, - "loss_rtd": 0.4671928286552429, - "loss_sent": 0.00011411526065785438, - "loss_sod": 0.3841462731361389, - "loss_total": 0.9361129403114319, - "step": 8099 - }, { "epoch": 0.0162, - "grad_norm": 2.1761651039123535, - "learning_rate": 8.967268915036318e-05, - "loss": 0.9123, + "grad_norm": 0.7304887175559998, + "learning_rate": 4.9995164643349015e-05, + "loss": 1.2717, "step": 8100 }, - { - "epoch": 0.016398, - "loss_gen": 4.17715311050415, - "loss_rtd": 0.451453298330307, - "loss_sent": 0.354988157749176, - "loss_sod": 0.1447845697402954, - "loss_total": 1.0710194110870361, - "step": 8199 - }, - { - "epoch": 0.016398, - "loss_gen": 3.652554512023926, - "loss_rtd": 0.47347867488861084, - "loss_sent": 0.10158122330904007, - "loss_sod": 0.11182337254285812, - "loss_total": 0.7916321158409119, - "step": 8199 - }, { "epoch": 0.0164, - "grad_norm": 1.7405667304992676, - "learning_rate": 8.965336733198653e-05, - "loss": 0.8714, + "grad_norm": 1.2335166931152344, + "learning_rate": 4.9994847560247276e-05, + "loss": 1.2657, "step": 8200 }, - { - "epoch": 0.016598, - "loss_gen": 4.009151935577393, - "loss_rtd": 0.4614793360233307, - "loss_sent": 0.27255332469940186, - "loss_sod": 0.03202224522829056, - "loss_total": 0.8689844608306885, - "step": 8299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.062987804412842, - "loss_rtd": 0.4590751826763153, - "loss_sent": 0.1842603087425232, - "loss_sod": 0.19588200747966766, - "loss_total": 0.9435292482376099, - "step": 8299 - }, { "epoch": 0.0166, - "grad_norm": 1.2510912418365479, - "learning_rate": 8.963402954123825e-05, - "loss": 0.872, + "grad_norm": 1.424973487854004, + "learning_rate": 4.999452040922393e-05, + "loss": 1.3235, "step": 8300 }, - { - "epoch": 0.016798, - "loss_gen": 4.116705894470215, - "loss_rtd": 0.44752010703086853, - "loss_sent": 0.06567193567752838, - "loss_sod": 0.1416502296924591, - "loss_total": 0.7487877607345581, - "step": 8399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.250878810882568, - "loss_rtd": 0.4497721791267395, - "loss_sent": 0.1908683329820633, - "loss_sod": 0.022904343903064728, - "loss_total": 0.760552167892456, - "step": 8399 - }, { "epoch": 0.0168, - "grad_norm": 0.8308121562004089, - "learning_rate": 8.961467578590762e-05, - "loss": 0.8702, + "grad_norm": 1.1544169187545776, + "learning_rate": 4.999418319041076e-05, + "loss": 1.2455, "step": 8400 }, - { - "epoch": 0.016998, - "loss_gen": 3.3889882564544678, - "loss_rtd": 0.4579627513885498, - "loss_sent": 0.20038780570030212, - "loss_sod": 0.14318343997001648, - "loss_total": 0.8697285652160645, - "step": 8499 - }, - { - "epoch": 0.016998, - "loss_gen": 3.9100821018218994, - "loss_rtd": 0.45459499955177307, - "loss_sent": 0.11043836176395416, - "loss_sod": 0.07976808398962021, - "loss_total": 0.7234815359115601, - "step": 8499 - }, { "epoch": 0.017, - "grad_norm": 1.0994970798492432, - "learning_rate": 8.959530607379032e-05, - "loss": 0.8502, + "grad_norm": 1.1393338441848755, + "learning_rate": 4.9993835903943585e-05, + "loss": 1.233, "step": 8500 }, - { - "epoch": 0.017198, - "loss_gen": 4.52324914932251, - "loss_rtd": 0.4543832838535309, - "loss_sent": 0.35701337456703186, - "loss_sod": 0.07280991226434708, - "loss_total": 0.9637364149093628, - "step": 8599 - }, - { - "epoch": 0.017198, - "loss_gen": 3.273129940032959, - "loss_rtd": 0.4423699676990509, - "loss_sent": 0.15605831146240234, - "loss_sod": 0.10610494017601013, - "loss_total": 0.7620828747749329, - "step": 8599 - }, { "epoch": 0.0172, - "grad_norm": 1.2175599336624146, - "learning_rate": 8.957592041268846e-05, - "loss": 0.8415, + "grad_norm": 1.1183439493179321, + "learning_rate": 4.99934785499623e-05, + "loss": 1.2282, "step": 8600 }, - { - "epoch": 0.017398, - "loss_gen": 3.7413508892059326, - "loss_rtd": 0.43950116634368896, - "loss_sent": 0.21325911581516266, - "loss_sod": 0.13741731643676758, - "loss_total": 0.8470605611801147, - "step": 8699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.3951568603515625, - "loss_rtd": 0.4412088394165039, - "loss_sent": 0.30719298124313354, - "loss_sod": 0.1277753859758377, - "loss_total": 0.9430004954338074, - "step": 8699 - }, { "epoch": 0.0174, - "grad_norm": 1.5301543474197388, - "learning_rate": 8.955651881041059e-05, - "loss": 0.827, + "grad_norm": 1.275148868560791, + "learning_rate": 4.999311112861084e-05, + "loss": 1.2665, "step": 8700 }, - { - "epoch": 0.017598, - "loss_gen": 4.006596088409424, - "loss_rtd": 0.4507193863391876, - "loss_sent": 0.3619312047958374, - "loss_sod": 0.005532963667064905, - "loss_total": 0.8702272176742554, - "step": 8799 - }, - { - "epoch": 0.017598, - "loss_gen": 3.811447858810425, - "loss_rtd": 0.4546920955181122, - "loss_sent": 0.0801302045583725, - "loss_sod": 0.25472018122673035, - "loss_total": 0.8390512466430664, - "step": 8799 - }, { "epoch": 0.0176, - "grad_norm": 1.0982296466827393, - "learning_rate": 8.953710127477168e-05, - "loss": 0.8128, + "grad_norm": 1.4136372804641724, + "learning_rate": 4.99927336400372e-05, + "loss": 1.2617, "step": 8800 }, - { - "epoch": 0.017798, - "loss_gen": 3.8621981143951416, - "loss_rtd": 0.442514568567276, - "loss_sent": 0.2113874852657318, - "loss_sod": 0.06732678413391113, - "loss_total": 0.7634892463684082, - "step": 8899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.029360294342041, - "loss_rtd": 0.4374653697013855, - "loss_sent": 0.43539056181907654, - "loss_sod": 0.12874938547611237, - "loss_total": 1.0456947088241577, - "step": 8899 - }, { "epoch": 0.0178, - "grad_norm": 1.562687635421753, - "learning_rate": 8.951766781359311e-05, - "loss": 0.8058, + "grad_norm": 1.392327904701233, + "learning_rate": 4.999234608439345e-05, + "loss": 1.292, "step": 8900 }, - { - "epoch": 0.017998, - "loss_gen": 3.94120717048645, - "loss_rtd": 0.4648306965827942, - "loss_sent": 0.1459241509437561, - "loss_sod": 0.052158210426568985, - "loss_total": 0.6986362934112549, - "step": 8999 - }, - { - "epoch": 0.017998, - "loss_gen": 3.806023359298706, - "loss_rtd": 0.45226743817329407, - "loss_sent": 0.17193950712680817, - "loss_sod": 0.027455970644950867, - "loss_total": 0.6861608028411865, - "step": 8999 - }, { "epoch": 0.018, - "grad_norm": 1.8158535957336426, - "learning_rate": 8.949821843470266e-05, - "loss": 0.7964, + "grad_norm": 1.367475152015686, + "learning_rate": 4.9991948461835685e-05, + "loss": 1.2153, "step": 9000 }, { "epoch": 0.018, - "eval_loss": 0.7903507947921753, - "eval_runtime": 151.2799, - "eval_samples_per_second": 102.082, - "eval_steps_per_second": 0.8, + "eval_loss": 1.1127148866653442, + "eval_runtime": 76.2524, + "eval_samples_per_second": 202.525, + "eval_steps_per_second": 3.174, "step": 9000 }, - { - "epoch": 0.018198, - "loss_gen": 2.4747531414031982, - "loss_rtd": 0.44327718019485474, - "loss_sent": 0.00012714380864053965, - "loss_sod": 0.38547617197036743, - "loss_total": 0.8470889925956726, - "step": 9099 - }, - { - "epoch": 0.018198, - "loss_gen": 3.196993350982666, - "loss_rtd": 0.4125911295413971, - "loss_sent": 0.16173741221427917, - "loss_sod": 0.03193075954914093, - "loss_total": 0.6297818422317505, - "step": 9099 - }, { "epoch": 0.0182, - "grad_norm": 0.7714298963546753, - "learning_rate": 8.947875314593455e-05, - "loss": 0.7744, + "grad_norm": 0.8793131709098816, + "learning_rate": 4.999154077252407e-05, + "loss": 1.2734, "step": 9100 }, - { - "epoch": 0.018398, - "loss_gen": 3.0633838176727295, - "loss_rtd": 0.4389207065105438, - "loss_sent": 0.05032424256205559, - "loss_sod": 0.30537647008895874, - "loss_total": 0.8124662041664124, - "step": 9199 - }, - { - "epoch": 0.018398, - "loss_gen": 2.1238484382629395, - "loss_rtd": 0.4132879376411438, - "loss_sent": 9.437712287763134e-05, - "loss_sod": 0.3491120934486389, - "loss_total": 0.7748661637306213, - "step": 9199 - }, { "epoch": 0.0184, - "grad_norm": 0.7082574367523193, - "learning_rate": 8.94592719551294e-05, - "loss": 0.7931, + "grad_norm": 0.6496739387512207, + "learning_rate": 4.999112301662281e-05, + "loss": 1.2498, "step": 9200 }, - { - "epoch": 0.018598, - "loss_gen": 2.9134955406188965, - "loss_rtd": 0.4698430299758911, - "loss_sent": 0.039316605776548386, - "loss_sod": 0.17961709201335907, - "loss_total": 0.7017951607704163, - "step": 9299 - }, - { - "epoch": 0.018598, - "loss_gen": 3.965733289718628, - "loss_rtd": 0.4487987160682678, - "loss_sent": 0.3107520639896393, - "loss_sod": 0.006400309503078461, - "loss_total": 0.7836712598800659, - "step": 9299 - }, { "epoch": 0.0186, - "grad_norm": 1.1956804990768433, - "learning_rate": 8.943977487013423e-05, - "loss": 0.7891, + "grad_norm": 1.1462939977645874, + "learning_rate": 4.99906951943002e-05, + "loss": 1.2549, "step": 9300 }, - { - "epoch": 0.018798, - "loss_gen": 4.273687362670898, - "loss_rtd": 0.44777828454971313, - "loss_sent": 0.21755613386631012, - "loss_sod": 0.032184895128011703, - "loss_total": 0.7115744352340698, - "step": 9399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.014370918273926, - "loss_rtd": 0.4529024064540863, - "loss_sent": 0.25203970074653625, - "loss_sod": 0.05144047737121582, - "loss_total": 0.7695848941802979, - "step": 9399 - }, { "epoch": 0.0188, - "grad_norm": 0.8677006363868713, - "learning_rate": 8.942026189880244e-05, - "loss": 0.7976, + "grad_norm": 1.520691156387329, + "learning_rate": 4.999025730572854e-05, + "loss": 1.2437, "step": 9400 }, - { - "epoch": 0.018998, - "loss_gen": 3.9230942726135254, - "loss_rtd": 0.4471578299999237, - "loss_sent": 0.12711574137210846, - "loss_sod": 0.011995519511401653, - "loss_total": 0.5952448844909668, - "step": 9499 - }, - { - "epoch": 0.018998, - "loss_gen": 3.7593722343444824, - "loss_rtd": 0.4566221237182617, - "loss_sent": 0.2904764413833618, - "loss_sod": 0.09695194661617279, - "loss_total": 0.8526517152786255, - "step": 9499 - }, { "epoch": 0.019, - "grad_norm": 1.596506953239441, - "learning_rate": 8.940073304899388e-05, - "loss": 0.7528, + "grad_norm": 1.3555136919021606, + "learning_rate": 4.998980935108424e-05, + "loss": 1.2326, "step": 9500 }, - { - "epoch": 0.019198, - "loss_gen": 3.8245770931243896, - "loss_rtd": 0.43909817934036255, - "loss_sent": 0.15130890905857086, - "loss_sod": 0.040884099900722504, - "loss_total": 0.6369021534919739, - "step": 9599 - }, - { - "epoch": 0.019198, - "loss_gen": 3.8713860511779785, - "loss_rtd": 0.45773789286613464, - "loss_sent": 0.33891892433166504, - "loss_sod": 0.02650151401758194, - "loss_total": 0.8288379907608032, - "step": 9599 - }, { "epoch": 0.0192, - "grad_norm": 1.0625760555267334, - "learning_rate": 8.938118832857476e-05, - "loss": 0.7587, + "grad_norm": 1.467217206954956, + "learning_rate": 4.9989351330547715e-05, + "loss": 1.2768, "step": 9600 }, - { - "epoch": 0.019398, - "loss_gen": 3.8790111541748047, - "loss_rtd": 0.4518607258796692, - "loss_sent": 0.25272804498672485, - "loss_sod": 0.11352493613958359, - "loss_total": 0.8213223814964294, - "step": 9699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.176455020904541, - "loss_rtd": 0.44528916478157043, - "loss_sent": 0.3503030240535736, - "loss_sod": 0.09298127889633179, - "loss_total": 0.8920282125473022, - "step": 9699 - }, { "epoch": 0.0194, - "grad_norm": 2.159792184829712, - "learning_rate": 8.93616277454177e-05, - "loss": 0.7643, + "grad_norm": 1.3842765092849731, + "learning_rate": 4.998888324430346e-05, + "loss": 1.2675, "step": 9700 }, - { - "epoch": 0.019598, - "loss_gen": 4.2923502922058105, - "loss_rtd": 0.44386738538742065, - "loss_sent": 0.33425548672676086, - "loss_sod": 0.13649314641952515, - "loss_total": 0.9162001609802246, - "step": 9799 - }, - { - "epoch": 0.019598, - "loss_gen": 3.989419937133789, - "loss_rtd": 0.45562487840652466, - "loss_sent": 0.09749240428209305, - "loss_sod": 0.017574891448020935, - "loss_total": 0.5721644759178162, - "step": 9799 - }, { "epoch": 0.0196, - "grad_norm": 1.0099166631698608, - "learning_rate": 8.934205130740169e-05, - "loss": 0.7612, + "grad_norm": 1.344078540802002, + "learning_rate": 4.998840509254003e-05, + "loss": 1.2619, "step": 9800 }, - { - "epoch": 0.019798, - "loss_gen": 3.232377290725708, - "loss_rtd": 0.44479018449783325, - "loss_sent": 0.07202599197626114, - "loss_sod": 0.2975013256072998, - "loss_total": 0.8146188259124756, - "step": 9899 - }, - { - "epoch": 0.019798, - "loss_gen": 2.5692195892333984, - "loss_rtd": 0.42933931946754456, - "loss_sent": 0.025050949305295944, - "loss_sod": 0.2836762070655823, - "loss_total": 0.7383059859275818, - "step": 9899 - }, { "epoch": 0.0198, - "grad_norm": 1.441141128540039, - "learning_rate": 8.93224590224121e-05, - "loss": 0.7724, + "grad_norm": 0.7567517757415771, + "learning_rate": 4.998791687545001e-05, + "loss": 1.2794, "step": 9900 }, - { - "epoch": 0.019998, - "loss_gen": 4.065705299377441, - "loss_rtd": 0.41851821541786194, - "loss_sent": 0.057108424603939056, - "loss_sod": 0.003937193192541599, - "loss_total": 0.4795638620853424, - "step": 9999 - }, - { - "epoch": 0.019998, - "loss_gen": 3.380124568939209, - "loss_rtd": 0.4455118775367737, - "loss_sent": 0.03728582710027695, - "loss_sod": 0.2612026035785675, - "loss_total": 0.7440003156661987, - "step": 9999 - }, { "epoch": 0.02, - "grad_norm": 1.000209927558899, - "learning_rate": 8.930285089834074e-05, - "loss": 0.7523, + "grad_norm": 0.9987697601318359, + "learning_rate": 4.998741859323006e-05, + "loss": 1.2778, "step": 10000 }, { "epoch": 0.02, - "eval_loss": 0.7508344054222107, - "eval_runtime": 151.4354, - "eval_samples_per_second": 101.977, - "eval_steps_per_second": 0.799, + "eval_loss": 1.1275579929351807, + "eval_runtime": 76.2888, + "eval_samples_per_second": 202.428, + "eval_steps_per_second": 3.172, "step": 10000 }, - { - "epoch": 0.020198, - "loss_gen": 4.481088161468506, - "loss_rtd": 0.44522103667259216, - "loss_sent": 0.23012477159500122, - "loss_sod": 0.10265250504016876, - "loss_total": 0.7779983282089233, - "step": 10099 - }, - { - "epoch": 0.020198, - "loss_gen": 3.8373875617980957, - "loss_rtd": 0.43886369466781616, - "loss_sent": 0.5344423651695251, - "loss_sod": 0.03612484782934189, - "loss_total": 1.0094308853149414, - "step": 10099 - }, { "epoch": 0.0202, - "grad_norm": 2.045971155166626, - "learning_rate": 8.928322694308574e-05, - "loss": 0.7515, + "grad_norm": 1.5212323665618896, + "learning_rate": 4.9986910246080894e-05, + "loss": 1.2884, "step": 10100 }, - { - "epoch": 0.020398, - "loss_gen": 4.168641090393066, - "loss_rtd": 0.44839605689048767, - "loss_sent": 0.2666820287704468, - "loss_sod": 0.07253801822662354, - "loss_total": 0.7876161336898804, - "step": 10199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.216071128845215, - "loss_rtd": 0.4510549008846283, - "loss_sent": 0.2517627477645874, - "loss_sod": 0.1449786126613617, - "loss_total": 0.8477962613105774, - "step": 10199 - }, { "epoch": 0.0204, - "grad_norm": 1.3105542659759521, - "learning_rate": 8.92635871645516e-05, - "loss": 0.7545, + "grad_norm": 1.5730245113372803, + "learning_rate": 4.998639183420727e-05, + "loss": 1.282, "step": 10200 }, - { - "epoch": 0.020598, - "loss_gen": 3.248972177505493, - "loss_rtd": 0.45092254877090454, - "loss_sent": 0.37074267864227295, - "loss_sod": 0.17783309519290924, - "loss_total": 0.9994983077049255, - "step": 10299 - }, - { - "epoch": 0.020598, - "loss_gen": 2.8247666358947754, - "loss_rtd": 0.45638176798820496, - "loss_sent": 0.0008790385909378529, - "loss_sod": 0.4168325364589691, - "loss_total": 0.8740933537483215, - "step": 10299 - }, { "epoch": 0.0206, - "grad_norm": 1.7141914367675781, - "learning_rate": 8.924393157064926e-05, - "loss": 0.7535, + "grad_norm": 0.8342368602752686, + "learning_rate": 4.9985863357818e-05, + "loss": 1.2408, "step": 10300 }, - { - "epoch": 0.020798, - "loss_gen": 4.234827518463135, - "loss_rtd": 0.4640049934387207, - "loss_sent": 0.1334572434425354, - "loss_sod": 0.07902702689170837, - "loss_total": 0.6764892935752869, - "step": 10399 - }, - { - "epoch": 0.020798, - "loss_gen": 3.7969512939453125, - "loss_rtd": 0.44702625274658203, - "loss_sent": 0.33471742272377014, - "loss_sod": 0.09909866005182266, - "loss_total": 0.8808423280715942, - "step": 10399 - }, { "epoch": 0.0208, - "grad_norm": 1.0838115215301514, - "learning_rate": 8.922426016929598e-05, - "loss": 0.7524, + "grad_norm": 1.3672316074371338, + "learning_rate": 4.998532481712596e-05, + "loss": 1.2205, "step": 10400 }, - { - "epoch": 0.020998, - "loss_gen": 3.911032199859619, - "loss_rtd": 0.4394637942314148, - "loss_sent": 0.08548645675182343, - "loss_sod": 0.14958754181861877, - "loss_total": 0.6745378375053406, - "step": 10499 - }, - { - "epoch": 0.020998, - "loss_gen": 4.120211601257324, - "loss_rtd": 0.44440609216690063, - "loss_sent": 0.2759931981563568, - "loss_sod": 0.0870596170425415, - "loss_total": 0.8074588775634766, - "step": 10499 - }, { "epoch": 0.021, - "grad_norm": 0.9914191365242004, - "learning_rate": 8.920457296841538e-05, - "loss": 0.7554, + "grad_norm": 1.1164605617523193, + "learning_rate": 4.998477621234806e-05, + "loss": 1.2817, "step": 10500 }, - { - "epoch": 0.021198, - "loss_gen": 3.870936393737793, - "loss_rtd": 0.4242846965789795, - "loss_sent": 0.21312680840492249, - "loss_sod": 0.09429841488599777, - "loss_total": 0.7317099571228027, - "step": 10599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.131606578826904, - "loss_rtd": 0.43067824840545654, - "loss_sent": 0.28381282091140747, - "loss_sod": 0.0976143404841423, - "loss_total": 0.8121054172515869, - "step": 10599 - }, { "epoch": 0.0212, - "grad_norm": 1.5844788551330566, - "learning_rate": 8.918486997593749e-05, - "loss": 0.7619, + "grad_norm": 1.2867449522018433, + "learning_rate": 4.99842175437053e-05, + "loss": 1.2598, "step": 10600 }, - { - "epoch": 0.021398, - "loss_gen": 3.798642158508301, - "loss_rtd": 0.43546780943870544, - "loss_sent": 0.2033432424068451, - "loss_sod": 0.13071854412555695, - "loss_total": 0.7695295810699463, - "step": 10699 - }, - { - "epoch": 0.021398, - "loss_gen": 3.8915579319000244, - "loss_rtd": 0.4383130967617035, - "loss_sent": 0.19973257184028625, - "loss_sod": 0.0048730941489338875, - "loss_total": 0.6429187655448914, - "step": 10699 - }, { "epoch": 0.0214, - "grad_norm": 1.2073383331298828, - "learning_rate": 8.916515119979866e-05, - "loss": 0.7569, + "grad_norm": 1.6646244525909424, + "learning_rate": 4.99836488114227e-05, + "loss": 1.2163, "step": 10700 }, - { - "epoch": 0.021598, - "loss_gen": 4.278448104858398, - "loss_rtd": 0.4538666605949402, - "loss_sent": 0.4568008482456207, - "loss_sod": 0.08655402809381485, - "loss_total": 0.9972215890884399, - "step": 10799 - }, - { - "epoch": 0.021598, - "loss_gen": 3.7883036136627197, - "loss_rtd": 0.44330617785453796, - "loss_sent": 0.460102915763855, - "loss_sod": 0.04768051952123642, - "loss_total": 0.95108962059021, - "step": 10799 - }, { "epoch": 0.0216, - "grad_norm": 2.6000397205352783, - "learning_rate": 8.91454166479416e-05, - "loss": 0.7608, + "grad_norm": 1.3233399391174316, + "learning_rate": 4.998307001572935e-05, + "loss": 1.2744, "step": 10800 }, - { - "epoch": 0.021798, - "loss_gen": 3.8277363777160645, - "loss_rtd": 0.46418583393096924, - "loss_sent": 0.3211608827114105, - "loss_sod": 0.10692701488733292, - "loss_total": 0.8922737836837769, - "step": 10899 - }, - { - "epoch": 0.021798, - "loss_gen": 4.054731845855713, - "loss_rtd": 0.45032167434692383, - "loss_sent": 0.27219444513320923, - "loss_sod": 0.28465166687965393, - "loss_total": 1.0071678161621094, - "step": 10899 - }, { "epoch": 0.0218, - "grad_norm": 2.0657763481140137, - "learning_rate": 8.912566632831541e-05, - "loss": 0.7661, + "grad_norm": 1.1658077239990234, + "learning_rate": 4.9982481156858385e-05, + "loss": 1.274, "step": 10900 }, - { - "epoch": 0.021998, - "loss_gen": 4.123061656951904, - "loss_rtd": 0.42160412669181824, - "loss_sent": 0.17279018461704254, - "loss_sod": 0.02948286198079586, - "loss_total": 0.6238771677017212, - "step": 10999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.480509281158447, - "loss_rtd": 0.44309404492378235, - "loss_sent": 0.18066415190696716, - "loss_sod": 0.13343116641044617, - "loss_total": 0.7571893930435181, - "step": 10999 - }, { "epoch": 0.022, - "grad_norm": 0.9440023899078369, - "learning_rate": 8.91059002488755e-05, - "loss": 0.764, + "grad_norm": 1.4505467414855957, + "learning_rate": 4.9981882235046995e-05, + "loss": 1.2645, "step": 11000 }, { "epoch": 0.022, - "eval_loss": 0.7398257255554199, - "eval_runtime": 151.466, - "eval_samples_per_second": 101.957, - "eval_steps_per_second": 0.799, + "eval_loss": 1.1138958930969238, + "eval_runtime": 76.7643, + "eval_samples_per_second": 201.174, + "eval_steps_per_second": 3.153, "step": 11000 }, - { - "epoch": 0.022198, - "loss_gen": 3.9331908226013184, - "loss_rtd": 0.45400765538215637, - "loss_sent": 0.2909051179885864, - "loss_sod": 0.15119487047195435, - "loss_total": 0.8961076736450195, - "step": 11099 - }, - { - "epoch": 0.022198, - "loss_gen": 2.9539029598236084, - "loss_rtd": 0.43858084082603455, - "loss_sent": 0.037397708743810654, - "loss_sod": 0.3715130388736725, - "loss_total": 0.8474915623664856, - "step": 11099 - }, { "epoch": 0.0222, - "grad_norm": 1.1551802158355713, - "learning_rate": 8.908611841758363e-05, - "loss": 0.7472, + "grad_norm": 0.8515588641166687, + "learning_rate": 4.998127325053642e-05, + "loss": 1.2359, "step": 11100 }, - { - "epoch": 0.022398, - "loss_gen": 3.897245168685913, - "loss_rtd": 0.42984846234321594, - "loss_sent": 0.22936731576919556, - "loss_sod": 0.021883495151996613, - "loss_total": 0.6810992956161499, - "step": 11199 - }, - { - "epoch": 0.022398, - "loss_gen": 3.969332456588745, - "loss_rtd": 0.44527700543403625, - "loss_sent": 0.3268181383609772, - "loss_sod": 0.09524273872375488, - "loss_total": 0.8673378229141235, - "step": 11199 - }, { "epoch": 0.0224, - "grad_norm": 2.26607608795166, - "learning_rate": 8.906632084240796e-05, - "loss": 0.759, + "grad_norm": 1.4022259712219238, + "learning_rate": 4.9980654203571983e-05, + "loss": 1.2515, "step": 11200 }, - { - "epoch": 0.022598, - "loss_gen": 4.213681221008301, - "loss_rtd": 0.4063725769519806, - "loss_sent": 0.2478422373533249, - "loss_sod": 0.058716077357530594, - "loss_total": 0.7129309177398682, - "step": 11299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.215815544128418, - "loss_rtd": 0.43394604325294495, - "loss_sent": 0.4162815511226654, - "loss_sod": 0.04028809815645218, - "loss_total": 0.8905156850814819, - "step": 11299 - }, { "epoch": 0.0226, - "grad_norm": 1.892191767692566, - "learning_rate": 8.90465075313229e-05, - "loss": 0.7353, + "grad_norm": 1.5902676582336426, + "learning_rate": 4.998002509440301e-05, + "loss": 1.2305, "step": 11300 }, - { - "epoch": 0.022798, - "loss_gen": 2.236022710800171, - "loss_rtd": 0.4328562617301941, - "loss_sent": 0.0001368140656268224, - "loss_sod": 0.2996408939361572, - "loss_total": 0.7326339483261108, - "step": 11399 - }, - { - "epoch": 0.022798, - "loss_gen": 3.444706916809082, - "loss_rtd": 0.41880279779434204, - "loss_sent": 0.12022778391838074, - "loss_sod": 0.06587596237659454, - "loss_total": 0.6049065589904785, - "step": 11399 - }, { "epoch": 0.0228, - "grad_norm": 0.6935063600540161, - "learning_rate": 8.902667849230929e-05, - "loss": 0.7465, + "grad_norm": 0.763087809085846, + "learning_rate": 4.997938592328292e-05, + "loss": 1.2312, "step": 11400 }, - { - "epoch": 0.022998, - "loss_gen": 3.834214448928833, - "loss_rtd": 0.44961848855018616, - "loss_sent": 0.4442434012889862, - "loss_sod": 0.07701162248849869, - "loss_total": 0.9708734750747681, - "step": 11499 - }, - { - "epoch": 0.022998, - "loss_gen": 3.003070831298828, - "loss_rtd": 0.4377424418926239, - "loss_sent": 0.06250195950269699, - "loss_sod": 0.1500367373228073, - "loss_total": 0.6502811312675476, - "step": 11499 - }, { "epoch": 0.023, - "grad_norm": 1.296846866607666, - "learning_rate": 8.900683373335425e-05, - "loss": 0.7588, + "grad_norm": 1.4949332475662231, + "learning_rate": 4.997873669046916e-05, + "loss": 1.2768, "step": 11500 }, - { - "epoch": 0.023198, - "loss_gen": 4.146674633026123, - "loss_rtd": 0.44089600443840027, - "loss_sent": 0.20838849246501923, - "loss_sod": 0.08634405583143234, - "loss_total": 0.7356285452842712, - "step": 11599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.042768955230713, - "loss_rtd": 0.4361976981163025, - "loss_sent": 0.2685220241546631, - "loss_sod": 0.1542719006538391, - "loss_total": 0.8589916229248047, - "step": 11599 - }, { "epoch": 0.0232, - "grad_norm": 0.9552421569824219, - "learning_rate": 8.898697326245124e-05, - "loss": 0.7565, + "grad_norm": 1.0390666723251343, + "learning_rate": 4.9978077396223255e-05, + "loss": 1.2355, "step": 11600 }, - { - "epoch": 0.023398, - "loss_gen": 3.578911781311035, - "loss_rtd": 0.44924330711364746, - "loss_sent": 0.04782826453447342, - "loss_sod": 0.3338066041469574, - "loss_total": 0.8308781981468201, - "step": 11699 - }, - { - "epoch": 0.023398, - "loss_gen": 2.7252180576324463, - "loss_rtd": 0.43577805161476135, - "loss_sent": 0.001131089637055993, - "loss_sod": 0.42657536268234253, - "loss_total": 0.8634845614433289, - "step": 11699 - }, { "epoch": 0.0234, - "grad_norm": 0.6433441638946533, - "learning_rate": 8.896709708760008e-05, - "loss": 0.7481, + "grad_norm": 0.6799549460411072, + "learning_rate": 4.997740804081076e-05, + "loss": 1.264, "step": 11700 }, - { - "epoch": 0.023598, - "loss_gen": 3.7408361434936523, - "loss_rtd": 0.4493173658847809, - "loss_sent": 0.1397567242383957, - "loss_sod": 0.02804996259510517, - "loss_total": 0.617124080657959, - "step": 11799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.120850563049316, - "loss_rtd": 0.43224528431892395, - "loss_sent": 0.17164933681488037, - "loss_sod": 0.07089000195264816, - "loss_total": 0.6747846007347107, - "step": 11799 - }, { "epoch": 0.0236, - "grad_norm": 0.7836123704910278, - "learning_rate": 8.894720521680686e-05, - "loss": 0.7493, + "grad_norm": 1.4702496528625488, + "learning_rate": 4.99767286245013e-05, + "loss": 1.3092, "step": 11800 }, - { - "epoch": 0.023798, - "loss_gen": 4.201231002807617, - "loss_rtd": 0.42488178610801697, - "loss_sent": 0.25421980023384094, - "loss_sod": 0.03894122317433357, - "loss_total": 0.7180428504943848, - "step": 11899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.196627140045166, - "loss_rtd": 0.4310776889324188, - "loss_sent": 0.14267674088478088, - "loss_sod": 0.15274421870708466, - "loss_total": 0.7264986038208008, - "step": 11899 - }, { "epoch": 0.0238, - "grad_norm": 0.8541756272315979, - "learning_rate": 8.892729765808402e-05, - "loss": 0.743, + "grad_norm": 1.3574661016464233, + "learning_rate": 4.997603914756853e-05, + "loss": 1.2654, "step": 11900 }, - { - "epoch": 0.023998, - "loss_gen": 4.17164945602417, - "loss_rtd": 0.4589729905128479, - "loss_sent": 0.12974581122398376, - "loss_sod": 0.11565913259983063, - "loss_total": 0.7043778896331787, - "step": 11999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.203178882598877, - "loss_rtd": 0.4291312098503113, - "loss_sent": 0.38069620728492737, - "loss_sod": 0.1546536386013031, - "loss_total": 0.964480996131897, - "step": 11999 - }, { "epoch": 0.024, - "grad_norm": 0.7374743819236755, - "learning_rate": 8.890737441945037e-05, - "loss": 0.7471, + "grad_norm": 1.1170625686645508, + "learning_rate": 4.9975339610290175e-05, + "loss": 1.2343, "step": 12000 }, { "epoch": 0.024, - "eval_loss": 0.7346014380455017, - "eval_runtime": 151.7074, - "eval_samples_per_second": 101.795, - "eval_steps_per_second": 0.798, + "eval_loss": 1.1109821796417236, + "eval_runtime": 76.4587, + "eval_samples_per_second": 201.978, + "eval_steps_per_second": 3.165, "step": 12000 }, - { - "epoch": 0.024198, - "loss_gen": 4.091280937194824, - "loss_rtd": 0.44482794404029846, - "loss_sent": 0.2618083655834198, - "loss_sod": 0.034425701946020126, - "loss_total": 0.7410620450973511, - "step": 12099 - }, - { - "epoch": 0.024198, - "loss_gen": 3.968428134918213, - "loss_rtd": 0.43132540583610535, - "loss_sent": 0.18958622217178345, - "loss_sod": 0.08942354470491409, - "loss_total": 0.7103351950645447, - "step": 12099 - }, { "epoch": 0.0242, - "grad_norm": 0.7772459983825684, - "learning_rate": 8.888743550893095e-05, - "loss": 0.7556, + "grad_norm": 1.2707583904266357, + "learning_rate": 4.997463001294802e-05, + "loss": 1.2525, "step": 12100 }, - { - "epoch": 0.024398, - "loss_gen": 3.7260987758636475, - "loss_rtd": 0.4445788860321045, - "loss_sent": 0.2562042772769928, - "loss_sod": 0.05302653834223747, - "loss_total": 0.7538096904754639, - "step": 12199 - }, - { - "epoch": 0.024398, - "loss_gen": 4.109078407287598, - "loss_rtd": 0.4398147463798523, - "loss_sent": 0.1253279596567154, - "loss_sod": 0.013701645657420158, - "loss_total": 0.5788443684577942, - "step": 12199 - }, { "epoch": 0.0244, - "grad_norm": 1.5994316339492798, - "learning_rate": 8.886748093455714e-05, - "loss": 0.7373, + "grad_norm": 1.2613739967346191, + "learning_rate": 4.997391035582788e-05, + "loss": 1.2698, "step": 12200 }, - { - "epoch": 0.024598, - "loss_gen": 4.129921913146973, - "loss_rtd": 0.43831920623779297, - "loss_sent": 0.27449101209640503, - "loss_sod": 0.13593009114265442, - "loss_total": 0.84874027967453, - "step": 12299 - }, - { - "epoch": 0.024598, - "loss_gen": 3.085507869720459, - "loss_rtd": 0.43336358666419983, - "loss_sent": 0.17228688299655914, - "loss_sod": 0.15589821338653564, - "loss_total": 0.761548638343811, - "step": 12299 - }, { "epoch": 0.0246, - "grad_norm": 1.5170425176620483, - "learning_rate": 8.884751070436668e-05, - "loss": 0.7545, + "grad_norm": 1.1995183229446411, + "learning_rate": 4.997318063921963e-05, + "loss": 1.237, "step": 12300 }, - { - "epoch": 0.024798, - "loss_gen": 3.849769115447998, - "loss_rtd": 0.45669013261795044, - "loss_sent": 0.03489939868450165, - "loss_sod": 0.32191336154937744, - "loss_total": 0.8135029077529907, - "step": 12399 - }, - { - "epoch": 0.024798, - "loss_gen": 3.112346649169922, - "loss_rtd": 0.44622284173965454, - "loss_sent": 0.0024297181516885757, - "loss_sod": 0.3605993986129761, - "loss_total": 0.8092519640922546, - "step": 12399 - }, { "epoch": 0.0248, - "grad_norm": 1.343874454498291, - "learning_rate": 8.882752482640354e-05, - "loss": 0.7642, + "grad_norm": 0.729535698890686, + "learning_rate": 4.997244086341721e-05, + "loss": 1.2248, "step": 12400 }, - { - "epoch": 0.024998, - "loss_gen": 4.160418510437012, - "loss_rtd": 0.4546605050563812, - "loss_sent": 0.5152992606163025, - "loss_sod": 0.23410066962242126, - "loss_total": 1.204060435295105, - "step": 12499 - }, - { - "epoch": 0.024998, - "loss_gen": 4.379058361053467, - "loss_rtd": 0.44232314825057983, - "loss_sent": 0.32886990904808044, - "loss_sod": 0.14197906851768494, - "loss_total": 0.9131721258163452, - "step": 12499 - }, { "epoch": 0.025, - "grad_norm": 3.379681348800659, - "learning_rate": 8.880752330871805e-05, - "loss": 0.752, + "grad_norm": 1.3250787258148193, + "learning_rate": 4.9971691028718594e-05, + "loss": 1.2617, "step": 12500 }, - { - "epoch": 0.025198, - "loss_gen": 3.828108310699463, - "loss_rtd": 0.4420686662197113, - "loss_sent": 0.607168436050415, - "loss_sod": 0.07426024228334427, - "loss_total": 1.1234973669052124, - "step": 12599 - }, - { - "epoch": 0.025198, - "loss_gen": 4.14678955078125, - "loss_rtd": 0.4491349160671234, - "loss_sent": 0.2777857780456543, - "loss_sod": 0.0520130880177021, - "loss_total": 0.7789337635040283, - "step": 12599 - }, { "epoch": 0.0252, - "grad_norm": 1.5856245756149292, - "learning_rate": 8.87875061593668e-05, - "loss": 0.7549, + "grad_norm": 1.421278476715088, + "learning_rate": 4.997093113542582e-05, + "loss": 1.2321, "step": 12600 }, - { - "epoch": 0.025398, - "loss_gen": 4.272764205932617, - "loss_rtd": 0.4308563470840454, - "loss_sent": 0.31138402223587036, - "loss_sod": 0.04572285711765289, - "loss_total": 0.7879632115364075, - "step": 12699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.319077491760254, - "loss_rtd": 0.45940783619880676, - "loss_sent": 0.1377851366996765, - "loss_sod": 0.08773832768201828, - "loss_total": 0.6849312782287598, - "step": 12699 - }, { "epoch": 0.0254, - "grad_norm": 1.2356271743774414, - "learning_rate": 8.876747338641271e-05, - "loss": 0.7513, + "grad_norm": 1.5168310403823853, + "learning_rate": 4.997016118384497e-05, + "loss": 1.2268, "step": 12700 }, { - "epoch": 0.025598, - "loss_gen": 4.071308612823486, - "loss_rtd": 0.43188050389289856, - "loss_sent": 0.2389868199825287, - "loss_sod": 0.055405814200639725, - "loss_total": 0.7262731790542603, - "step": 12799 - }, - { - "epoch": 0.025598, - "loss_gen": 3.7361228466033936, - "loss_rtd": 0.43908101320266724, - "loss_sent": 0.08752284198999405, - "loss_sod": 0.12760449945926666, - "loss_total": 0.6542083621025085, - "step": 12799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.9795969724655151, - "learning_rate": 8.874742499792499e-05, - "loss": 0.7467, - "step": 12800 - }, - { - "epoch": 0.025798, - "loss_gen": 3.2456018924713135, - "loss_rtd": 0.43680670857429504, - "loss_sent": 0.11466845124959946, - "loss_sod": 0.1494515985250473, - "loss_total": 0.7009267210960388, - "step": 12899 - }, - { - "epoch": 0.025798, - "loss_gen": 2.999803066253662, - "loss_rtd": 0.43333643674850464, - "loss_sent": 0.09659186750650406, - "loss_sod": 0.2513529658317566, - "loss_total": 0.7812812924385071, - "step": 12899 + "epoch": 0.0256, + "grad_norm": 1.045483946800232, + "learning_rate": 4.996938117428618e-05, + "loss": 1.2714, + "step": 12800 }, { "epoch": 0.0258, - "grad_norm": 1.0719258785247803, - "learning_rate": 8.87273610019791e-05, - "loss": 0.7372, + "grad_norm": 0.8379656076431274, + "learning_rate": 4.9968591107063647e-05, + "loss": 1.2792, "step": 12900 }, - { - "epoch": 0.025998, - "loss_gen": 3.9534850120544434, - "loss_rtd": 0.42895567417144775, - "loss_sent": 0.17686723172664642, - "loss_sod": 0.007649438455700874, - "loss_total": 0.6134723424911499, - "step": 12999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.115196228027344, - "loss_rtd": 0.4262949824333191, - "loss_sent": 0.4657926857471466, - "loss_sod": 0.06579498201608658, - "loss_total": 0.9578826427459717, - "step": 12999 - }, { "epoch": 0.026, - "grad_norm": 1.4140347242355347, - "learning_rate": 8.870728140665684e-05, - "loss": 0.7338, + "grad_norm": 1.620133638381958, + "learning_rate": 4.996779098249559e-05, + "loss": 1.2456, "step": 13000 }, { "epoch": 0.026, - "eval_loss": 0.7341647744178772, - "eval_runtime": 151.3683, - "eval_samples_per_second": 102.023, - "eval_steps_per_second": 0.799, + "eval_loss": 1.1081608533859253, + "eval_runtime": 76.4734, + "eval_samples_per_second": 201.939, + "eval_steps_per_second": 3.164, "step": 13000 }, - { - "epoch": 0.026198, - "loss_gen": 4.0114216804504395, - "loss_rtd": 0.4345766007900238, - "loss_sent": 0.3323989510536194, - "loss_sod": 0.07648509740829468, - "loss_total": 0.8434606194496155, - "step": 13099 - }, - { - "epoch": 0.026198, - "loss_gen": 3.950547933578491, - "loss_rtd": 0.43731945753097534, - "loss_sent": 0.16473570466041565, - "loss_sod": 0.13462206721305847, - "loss_total": 0.7366771697998047, - "step": 13099 - }, { "epoch": 0.0262, - "grad_norm": 1.0907080173492432, - "learning_rate": 8.868718622004626e-05, - "loss": 0.7625, + "grad_norm": 1.2181329727172852, + "learning_rate": 4.9966980800904315e-05, + "loss": 1.2187, "step": 13100 }, - { - "epoch": 0.026398, - "loss_gen": 3.7565226554870605, - "loss_rtd": 0.4329445958137512, - "loss_sent": 0.1427980661392212, - "loss_sod": 0.19305050373077393, - "loss_total": 0.7687931656837463, - "step": 13199 - }, - { - "epoch": 0.026398, - "loss_gen": 3.9972236156463623, - "loss_rtd": 0.45005112886428833, - "loss_sent": 0.22460077702999115, - "loss_sod": 0.003133713733404875, - "loss_total": 0.6777856349945068, - "step": 13199 - }, { "epoch": 0.0264, - "grad_norm": 1.207594633102417, - "learning_rate": 8.866707545024169e-05, - "loss": 0.7235, + "grad_norm": 1.4935636520385742, + "learning_rate": 4.996616056261616e-05, + "loss": 1.2405, "step": 13200 }, - { - "epoch": 0.026598, - "loss_gen": 4.161458969116211, - "loss_rtd": 0.4533689618110657, - "loss_sent": 0.267709344625473, - "loss_sod": 0.008421978913247585, - "loss_total": 0.7295002937316895, - "step": 13299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.003698348999023, - "loss_rtd": 0.4371702969074249, - "loss_sent": 0.41131556034088135, - "loss_sod": 0.11664707213640213, - "loss_total": 0.9651329517364502, - "step": 13299 - }, { "epoch": 0.0266, - "grad_norm": 1.1547691822052002, - "learning_rate": 8.864694910534375e-05, - "loss": 0.7531, + "grad_norm": 1.3096436262130737, + "learning_rate": 4.996533026796152e-05, + "loss": 1.2599, "step": 13300 }, - { - "epoch": 0.026798, - "loss_gen": 3.7176673412323, - "loss_rtd": 0.42440980672836304, - "loss_sent": 0.2140468806028366, - "loss_sod": 0.04582613706588745, - "loss_total": 0.6842828392982483, - "step": 13399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.113990783691406, - "loss_rtd": 0.431838721036911, - "loss_sent": 0.1240159422159195, - "loss_sod": 0.019157452508807182, - "loss_total": 0.5750120878219604, - "step": 13399 - }, { "epoch": 0.0268, - "grad_norm": 1.4807409048080444, - "learning_rate": 8.862680719345933e-05, - "loss": 0.7454, + "grad_norm": 1.5392045974731445, + "learning_rate": 4.996448991727483e-05, + "loss": 1.2491, "step": 13400 }, - { - "epoch": 0.026998, - "loss_gen": 3.7197763919830322, - "loss_rtd": 0.4261091351509094, - "loss_sent": 0.14779257774353027, - "loss_sod": 0.028960872441530228, - "loss_total": 0.6028625965118408, - "step": 13499 - }, - { - "epoch": 0.026998, - "loss_gen": 3.850450277328491, - "loss_rtd": 0.44980937242507935, - "loss_sent": 0.1829732209444046, - "loss_sod": 0.09278829395771027, - "loss_total": 0.7255708575248718, - "step": 13499 - }, { "epoch": 0.027, - "grad_norm": 0.9217216372489929, - "learning_rate": 8.860664972270161e-05, - "loss": 0.7366, + "grad_norm": 1.3175737857818604, + "learning_rate": 4.996363951089459e-05, + "loss": 1.2383, "step": 13500 }, - { - "epoch": 0.027198, - "loss_gen": 4.276337146759033, - "loss_rtd": 0.43566420674324036, - "loss_sent": 0.19798988103866577, - "loss_sod": 0.07652521133422852, - "loss_total": 0.7101792693138123, - "step": 13599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.133307456970215, - "loss_rtd": 0.4287024140357971, - "loss_sent": 0.2429627925157547, - "loss_sod": 0.03901500999927521, - "loss_total": 0.7106801867485046, - "step": 13599 - }, { "epoch": 0.0272, - "grad_norm": 0.744624674320221, - "learning_rate": 8.858647670118998e-05, - "loss": 0.7416, + "grad_norm": 1.3839282989501953, + "learning_rate": 4.9962779049163335e-05, + "loss": 1.2739, "step": 13600 }, - { - "epoch": 0.027398, - "loss_gen": 2.6930673122406006, - "loss_rtd": 0.4245775640010834, - "loss_sent": 0.1255381852388382, - "loss_sod": 0.31067463755607605, - "loss_total": 0.8607903718948364, - "step": 13699 - }, - { - "epoch": 0.027398, - "loss_gen": 3.989245653152466, - "loss_rtd": 0.4460928440093994, - "loss_sent": 0.3285417854785919, - "loss_sod": 0.04119309410452843, - "loss_total": 0.8158277273178101, - "step": 13699 - }, { "epoch": 0.0274, - "grad_norm": 1.0839942693710327, - "learning_rate": 8.856628813705014e-05, - "loss": 0.7379, + "grad_norm": 0.8403354287147522, + "learning_rate": 4.996190853242767e-05, + "loss": 1.2378, "step": 13700 }, - { - "epoch": 0.027598, - "loss_gen": 3.781327247619629, - "loss_rtd": 0.45150938630104065, - "loss_sent": 0.15958184003829956, - "loss_sod": 0.09167152643203735, - "loss_total": 0.7027627825737, - "step": 13799 - }, - { - "epoch": 0.027598, - "loss_gen": 4.02975606918335, - "loss_rtd": 0.4286169707775116, - "loss_sent": 0.27698010206222534, - "loss_sod": 0.06525172293186188, - "loss_total": 0.77084881067276, - "step": 13799 - }, { "epoch": 0.0276, - "grad_norm": 1.9123098850250244, - "learning_rate": 8.854608403841407e-05, - "loss": 0.7504, + "grad_norm": 1.2463191747665405, + "learning_rate": 4.996102796103823e-05, + "loss": 1.2248, "step": 13800 }, - { - "epoch": 0.027798, - "loss_gen": 4.6888298988342285, - "loss_rtd": 0.425262987613678, - "loss_sent": 0.05898591876029968, - "loss_sod": 0.2515452802181244, - "loss_total": 0.735794186592102, - "step": 13899 - }, - { - "epoch": 0.027798, - "loss_gen": 3.9502158164978027, - "loss_rtd": 0.4248698651790619, - "loss_sent": 0.14980857074260712, - "loss_sod": 0.017817402258515358, - "loss_total": 0.592495858669281, - "step": 13899 - }, { "epoch": 0.0278, - "grad_norm": 1.2942866086959839, - "learning_rate": 8.852586441341996e-05, - "loss": 0.7301, + "grad_norm": 1.466070294380188, + "learning_rate": 4.996013733534971e-05, + "loss": 1.2567, "step": 13900 }, - { - "epoch": 0.027998, - "loss_gen": 3.0356056690216064, - "loss_rtd": 0.43091312050819397, - "loss_sent": 0.019389711320400238, - "loss_sod": 0.258874773979187, - "loss_total": 0.709177553653717, - "step": 13999 - }, - { - "epoch": 0.027998, - "loss_gen": 2.5347843170166016, - "loss_rtd": 0.4192298948764801, - "loss_sent": 0.01333379466086626, - "loss_sod": 0.25479552149772644, - "loss_total": 0.6873592138290405, - "step": 13999 - }, { "epoch": 0.028, - "grad_norm": 0.7091480493545532, - "learning_rate": 8.850562927021227e-05, - "loss": 0.7375, + "grad_norm": 0.8661775588989258, + "learning_rate": 4.995923665572085e-05, + "loss": 1.2372, "step": 14000 }, { "epoch": 0.028, - "eval_loss": 0.732259213924408, - "eval_runtime": 151.7404, - "eval_samples_per_second": 101.773, - "eval_steps_per_second": 0.797, + "eval_loss": 1.113655686378479, + "eval_runtime": 76.3727, + "eval_samples_per_second": 202.206, + "eval_steps_per_second": 3.169, "step": 14000 }, { - "epoch": 0.000198, - "loss_gen": 3.0679430961608887, - "loss_rtd": 0.43229687213897705, - "loss_sent": 0.03127670660614967, - "loss_sod": 0.18361902236938477, - "loss_total": 0.6471925973892212, - "step": 14099 - }, - { - "epoch": 0.000198, - "loss_gen": 2.065791606903076, - "loss_rtd": 0.3922792375087738, - "loss_sent": 0.023536257445812225, - "loss_sod": 0.3744746744632721, - "loss_total": 0.7902901768684387, - "step": 14099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.5569876432418823, - "learning_rate": 8.84853786169417e-05, - "loss": 0.7265, + "epoch": 0.0282, + "grad_norm": 0.9262897968292236, + "learning_rate": 4.9958325922514466e-05, + "loss": 1.2082, "step": 14100 }, { - "epoch": 0.000398, - "loss_gen": 4.120703220367432, - "loss_rtd": 0.44122856855392456, - "loss_sent": 0.284933477640152, - "loss_sod": 0.08465798199176788, - "loss_total": 0.8108199834823608, - "step": 14199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.25185489654541, - "loss_rtd": 0.41677260398864746, - "loss_sent": 0.35926559567451477, - "loss_sod": 0.024174978956580162, - "loss_total": 0.8002132177352905, - "step": 14199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.4152344465255737, - "learning_rate": 8.846511246176526e-05, - "loss": 0.7529, + "epoch": 0.0284, + "grad_norm": 1.406928539276123, + "learning_rate": 4.995740513609738e-05, + "loss": 1.2576, "step": 14200 }, { - "epoch": 0.000598, - "loss_gen": 3.732604742050171, - "loss_rtd": 0.43974068760871887, - "loss_sent": 0.05152856558561325, - "loss_sod": 0.1073233112692833, - "loss_total": 0.5985925793647766, - "step": 14299 - }, - { - "epoch": 0.000598, - "loss_gen": 3.9523119926452637, - "loss_rtd": 0.41860440373420715, - "loss_sent": 0.2899419665336609, - "loss_sod": 0.018143948167562485, - "loss_total": 0.7266902923583984, - "step": 14299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.4069437980651855, - "learning_rate": 8.844483081284609e-05, - "loss": 0.7454, + "epoch": 0.0286, + "grad_norm": 0.9858616590499878, + "learning_rate": 4.9956474296840485e-05, + "loss": 1.2173, "step": 14300 }, { - "epoch": 0.000798, - "loss_gen": 4.0675506591796875, - "loss_rtd": 0.4320642650127411, - "loss_sent": 0.1469995379447937, - "loss_sod": 0.118939608335495, - "loss_total": 0.6980034112930298, - "step": 14399 - }, - { - "epoch": 0.000798, - "loss_gen": 3.9876673221588135, - "loss_rtd": 0.461688756942749, - "loss_sent": 0.6275394558906555, - "loss_sod": 0.04515424743294716, - "loss_total": 1.1343824863433838, - "step": 14399 - }, - { - "epoch": 0.0008, - "grad_norm": 2.224152088165283, - "learning_rate": 8.842453367835366e-05, - "loss": 0.7377, + "epoch": 0.0288, + "grad_norm": 0.6425116062164307, + "learning_rate": 4.9955533405118725e-05, + "loss": 1.237, "step": 14400 }, { - "epoch": 0.000998, - "loss_gen": 3.8818235397338867, - "loss_rtd": 0.4337279200553894, - "loss_sent": 0.14887529611587524, - "loss_sod": 0.2178276777267456, - "loss_total": 0.8004308938980103, - "step": 14499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.115235328674316, - "loss_rtd": 0.4256025552749634, - "loss_sent": 0.3175339996814728, - "loss_sod": 0.146746426820755, - "loss_total": 0.8898829221725464, - "step": 14499 - }, - { - "epoch": 0.001, - "grad_norm": 1.2925978899002075, - "learning_rate": 8.840422106646368e-05, - "loss": 0.7259, + "epoch": 0.029, + "grad_norm": 0.7704317569732666, + "learning_rate": 4.9954582461311106e-05, + "loss": 1.286, "step": 14500 }, { - "epoch": 0.001198, - "loss_gen": 3.6830289363861084, - "loss_rtd": 0.4349622428417206, - "loss_sent": 0.0974566638469696, - "loss_sod": 0.036854855716228485, - "loss_total": 0.5692737698554993, - "step": 14599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.088166236877441, - "loss_rtd": 0.4255536198616028, - "loss_sent": 0.19805455207824707, - "loss_sod": 0.03682982921600342, - "loss_total": 0.6604380011558533, - "step": 14599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.7997679114341736, - "learning_rate": 8.838389298535805e-05, - "loss": 0.7445, + "epoch": 0.0292, + "grad_norm": 1.2745368480682373, + "learning_rate": 4.995362146580065e-05, + "loss": 1.2553, "step": 14600 }, { - "epoch": 0.001398, - "loss_gen": 3.9595894813537598, - "loss_rtd": 0.4168483018875122, - "loss_sent": 0.2763338088989258, - "loss_sod": 0.04373210668563843, - "loss_total": 0.7369142174720764, - "step": 14699 - }, - { - "epoch": 0.001398, - "loss_gen": 3.125018835067749, - "loss_rtd": 0.42329004406929016, - "loss_sent": 0.008385371416807175, - "loss_sod": 0.38495877385139465, - "loss_total": 0.8166341781616211, - "step": 14699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.3052629232406616, - "learning_rate": 8.83635494432249e-05, - "loss": 0.7247, + "epoch": 0.0294, + "grad_norm": 1.1889222860336304, + "learning_rate": 4.995265041897444e-05, + "loss": 1.2783, "step": 14700 }, { - "epoch": 0.001598, - "loss_gen": 4.314852714538574, - "loss_rtd": 0.431550532579422, - "loss_sent": 0.19600588083267212, - "loss_sod": 0.1637813299894333, - "loss_total": 0.7913377285003662, - "step": 14799 - }, - { - "epoch": 0.001598, - "loss_gen": 3.685572862625122, - "loss_rtd": 0.4151884913444519, - "loss_sent": 0.5205061435699463, - "loss_sod": 0.02497510239481926, - "loss_total": 0.960669755935669, - "step": 14799 - }, - { - "epoch": 0.0016, - "grad_norm": 2.231931686401367, - "learning_rate": 8.834319044825862e-05, - "loss": 0.7334, + "epoch": 0.0296, + "grad_norm": 1.4223252534866333, + "learning_rate": 4.9951669321223645e-05, + "loss": 1.27, "step": 14800 }, { - "epoch": 0.001798, - "loss_gen": 3.1399123668670654, - "loss_rtd": 0.41176992654800415, - "loss_sent": 0.10314608365297318, - "loss_sod": 0.08458153158426285, - "loss_total": 0.5994975566864014, - "step": 14899 - }, - { - "epoch": 0.001798, - "loss_gen": 3.982893705368042, - "loss_rtd": 0.42843541502952576, - "loss_sent": 0.23265258967876434, - "loss_sod": 0.11196303367614746, - "loss_total": 0.7730510830879211, - "step": 14899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.1219249963760376, - "learning_rate": 8.832281600865983e-05, - "loss": 0.7279, + "epoch": 0.0298, + "grad_norm": 1.0991147756576538, + "learning_rate": 4.995067817294342e-05, + "loss": 1.2373, "step": 14900 }, { - "epoch": 0.001998, - "loss_gen": 3.748601198196411, - "loss_rtd": 0.42924025654792786, - "loss_sent": 0.2589420676231384, - "loss_sod": 0.12195281684398651, - "loss_total": 0.8101351261138916, - "step": 14999 - }, - { - "epoch": 0.001998, - "loss_gen": 3.7271108627319336, - "loss_rtd": 0.4306026101112366, - "loss_sent": 0.2542365789413452, - "loss_sod": 0.09196815639734268, - "loss_total": 0.7768073678016663, - "step": 14999 - }, - { - "epoch": 0.002, - "grad_norm": 1.8965760469436646, - "learning_rate": 8.830242613263532e-05, - "loss": 0.7431, + "epoch": 0.03, + "grad_norm": 1.2834559679031372, + "learning_rate": 4.994967697453301e-05, + "loss": 1.2725, "step": 15000 }, { - "epoch": 0.002, - "eval_loss": 0.7258825302124023, - "eval_runtime": 152.5564, - "eval_samples_per_second": 101.228, - "eval_steps_per_second": 0.793, + "epoch": 0.03, + "eval_loss": 1.1147979497909546, + "eval_runtime": 77.4863, + "eval_samples_per_second": 199.3, + "eval_steps_per_second": 3.123, "step": 15000 }, { - "epoch": 0.002198, - "loss_gen": 4.080875396728516, - "loss_rtd": 0.44319817423820496, - "loss_sent": 0.3066878616809845, - "loss_sod": 0.01762264408171177, - "loss_total": 0.7675086855888367, - "step": 15099 - }, - { - "epoch": 0.002198, - "loss_gen": 3.494755744934082, - "loss_rtd": 0.429073303937912, - "loss_sent": 0.21678392589092255, - "loss_sod": 0.06768958270549774, - "loss_total": 0.7135468125343323, - "step": 15099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.2699775695800781, - "learning_rate": 8.828202082839815e-05, - "loss": 0.7201, + "epoch": 0.0302, + "grad_norm": 1.3690969944000244, + "learning_rate": 4.9948665726395705e-05, + "loss": 1.2631, "step": 15100 }, { - "epoch": 0.002398, - "loss_gen": 4.007122993469238, - "loss_rtd": 0.4222743511199951, - "loss_sent": 0.31278446316719055, - "loss_sod": 0.002229036297649145, - "loss_total": 0.7372878789901733, - "step": 15199 - }, - { - "epoch": 0.002398, - "loss_gen": 3.383213996887207, - "loss_rtd": 0.42610523104667664, - "loss_sent": 0.11944480985403061, - "loss_sod": 0.13941094279289246, - "loss_total": 0.6849609613418579, - "step": 15199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.189070224761963, - "learning_rate": 8.826160010416756e-05, - "loss": 0.7106, + "epoch": 0.0304, + "grad_norm": 1.0501981973648071, + "learning_rate": 4.994764442893882e-05, + "loss": 1.2614, "step": 15200 }, { - "epoch": 0.002598, - "loss_gen": 2.794424533843994, - "loss_rtd": 0.36382150650024414, - "loss_sent": 0.12546056509017944, - "loss_sod": 0.06349128484725952, - "loss_total": 0.5527733564376831, - "step": 15299 - }, - { - "epoch": 0.002598, - "loss_gen": 3.5569169521331787, - "loss_rtd": 0.42518049478530884, - "loss_sent": 0.14433139562606812, - "loss_sod": 0.1257479041814804, - "loss_total": 0.6952598094940186, - "step": 15299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.2007133960723877, - "learning_rate": 8.824116396816904e-05, - "loss": 0.7406, + "epoch": 0.0306, + "grad_norm": 1.2085719108581543, + "learning_rate": 4.994661308257375e-05, + "loss": 1.1982, "step": 15300 }, { - "epoch": 0.002798, - "loss_gen": 3.122385263442993, - "loss_rtd": 0.4065248966217041, - "loss_sent": 0.09375770390033722, - "loss_sod": 0.21171408891677856, - "loss_total": 0.7119966745376587, - "step": 15399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.033022403717041, - "loss_rtd": 0.42629608511924744, - "loss_sent": 0.4818168878555298, - "loss_sod": 0.17908814549446106, - "loss_total": 1.0872011184692383, - "step": 15399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.5872125625610352, - "learning_rate": 8.822071242863424e-05, - "loss": 0.736, + "epoch": 0.0308, + "grad_norm": 1.1436259746551514, + "learning_rate": 4.994557168771591e-05, + "loss": 1.2079, "step": 15400 }, { - "epoch": 0.002998, - "loss_gen": 3.652820348739624, - "loss_rtd": 0.4053482413291931, - "loss_sent": 0.255595862865448, - "loss_sod": 0.07709679007530212, - "loss_total": 0.7380409240722656, - "step": 15499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.354541301727295, - "loss_rtd": 0.4241352379322052, - "loss_sent": 0.156655415892601, - "loss_sod": 0.1135091483592987, - "loss_total": 0.6942998170852661, - "step": 15499 - }, - { - "epoch": 0.003, - "grad_norm": 1.6814513206481934, - "learning_rate": 8.820024549380103e-05, - "loss": 0.7249, + "epoch": 0.031, + "grad_norm": 0.8355712890625, + "learning_rate": 4.994452024478478e-05, + "loss": 1.2537, "step": 15500 }, { - "epoch": 0.003198, - "loss_gen": 4.114484786987305, - "loss_rtd": 0.412880003452301, - "loss_sent": 0.3902363181114197, - "loss_sod": 0.024768974632024765, - "loss_total": 0.8278852701187134, - "step": 15599 - }, - { - "epoch": 0.003198, - "loss_gen": 2.7664718627929688, - "loss_rtd": 0.41608572006225586, - "loss_sent": 0.0032381522469222546, - "loss_sod": 0.36812782287597656, - "loss_total": 0.7874516844749451, - "step": 15599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.080322265625, - "learning_rate": 8.817976317191352e-05, - "loss": 0.7262, + "epoch": 0.0312, + "grad_norm": 0.9547547698020935, + "learning_rate": 4.9943458754203875e-05, + "loss": 1.2399, "step": 15600 }, { - "epoch": 0.003398, - "loss_gen": 4.035771369934082, - "loss_rtd": 0.42975643277168274, - "loss_sent": 0.22751572728157043, - "loss_sod": 0.06185830011963844, - "loss_total": 0.7191304564476013, - "step": 15699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.123803615570068, - "loss_rtd": 0.40343937277793884, - "loss_sent": 0.14625126123428345, - "loss_sod": 0.06565766036510468, - "loss_total": 0.6153482794761658, - "step": 15699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.0878139734268188, - "learning_rate": 8.815926547122197e-05, - "loss": 0.7328, + "epoch": 0.0314, + "grad_norm": 1.090165138244629, + "learning_rate": 4.994238721640077e-05, + "loss": 1.2324, "step": 15700 }, { - "epoch": 0.003598, - "loss_gen": 4.012997627258301, - "loss_rtd": 0.4305029809474945, - "loss_sent": 0.22090695798397064, - "loss_sod": 0.0283144973218441, - "loss_total": 0.6797244548797607, - "step": 15799 - }, - { - "epoch": 0.003598, - "loss_gen": 3.9291770458221436, - "loss_rtd": 0.4147172272205353, - "loss_sent": 0.19529950618743896, - "loss_sod": 0.07241620123386383, - "loss_total": 0.6824329495429993, - "step": 15799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.7841458916664124, - "learning_rate": 8.813875239998286e-05, - "loss": 0.7196, + "epoch": 0.0316, + "grad_norm": 0.9351906180381775, + "learning_rate": 4.9941305631807076e-05, + "loss": 1.2431, "step": 15800 }, { - "epoch": 0.003798, - "loss_gen": 4.31983757019043, - "loss_rtd": 0.4128066599369049, - "loss_sent": 0.13574691116809845, - "loss_sod": 0.22686126828193665, - "loss_total": 0.7754148244857788, - "step": 15899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.224883079528809, - "loss_rtd": 0.42368102073669434, - "loss_sent": 0.3318007290363312, - "loss_sod": 0.10107254981994629, - "loss_total": 0.8565542697906494, - "step": 15899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.1637392044067383, - "learning_rate": 8.811822396645881e-05, - "loss": 0.7431, + "epoch": 0.0318, + "grad_norm": 1.3740676641464233, + "learning_rate": 4.9940214000858456e-05, + "loss": 1.2487, "step": 15900 }, { - "epoch": 0.003998, - "loss_gen": 4.401248455047607, - "loss_rtd": 0.4079552888870239, - "loss_sent": 0.1784553825855255, - "loss_sod": 0.05878440663218498, - "loss_total": 0.6451950669288635, - "step": 15999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.055051803588867, - "loss_rtd": 0.42362701892852783, - "loss_sent": 0.28670841455459595, - "loss_sod": 0.0232734065502882, - "loss_total": 0.7336088418960571, - "step": 15999 - }, - { - "epoch": 0.004, - "grad_norm": 1.3481870889663696, - "learning_rate": 8.809768017891873e-05, - "loss": 0.7344, + "epoch": 0.032, + "grad_norm": 0.656019926071167, + "learning_rate": 4.993911232399462e-05, + "loss": 1.2371, "step": 16000 }, { - "epoch": 0.004, - "eval_loss": 0.7216879725456238, - "eval_runtime": 152.7099, - "eval_samples_per_second": 101.126, - "eval_steps_per_second": 0.792, + "epoch": 0.032, + "eval_loss": 1.1028244495391846, + "eval_runtime": 76.4629, + "eval_samples_per_second": 201.967, + "eval_steps_per_second": 3.165, "step": 16000 }, { - "epoch": 0.004198, - "loss_gen": 2.5740246772766113, - "loss_rtd": 0.40873804688453674, - "loss_sent": 0.018980562686920166, - "loss_sod": 0.21097786724567413, - "loss_total": 0.6386964917182922, - "step": 16099 - }, - { - "epoch": 0.004198, - "loss_gen": 3.902594804763794, - "loss_rtd": 0.42422205209732056, - "loss_sent": 0.04060244932770729, - "loss_sod": 0.17612992227077484, - "loss_total": 0.6409544348716736, - "step": 16099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.7013354897499084, - "learning_rate": 8.807712104563763e-05, - "loss": 0.7182, + "epoch": 0.0322, + "grad_norm": 1.20018470287323, + "learning_rate": 4.9938000601659315e-05, + "loss": 1.2547, "step": 16100 }, { - "epoch": 0.004398, - "loss_gen": 2.7281432151794434, - "loss_rtd": 0.39617347717285156, - "loss_sent": 0.07382925599813461, - "loss_sod": 0.17214326560497284, - "loss_total": 0.6421459913253784, - "step": 16199 - }, - { - "epoch": 0.004398, - "loss_gen": 3.9564368724823, - "loss_rtd": 0.4245787560939789, - "loss_sent": 0.13513240218162537, - "loss_sod": 0.1007312759757042, - "loss_total": 0.6604424715042114, - "step": 16199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.2529441118240356, - "learning_rate": 8.805654657489672e-05, - "loss": 0.7395, + "epoch": 0.0324, + "grad_norm": 1.2216906547546387, + "learning_rate": 4.993687883430036e-05, + "loss": 1.2327, "step": 16200 }, { - "epoch": 0.004598, - "loss_gen": 4.069423198699951, - "loss_rtd": 0.42716529965400696, - "loss_sent": 0.20060759782791138, - "loss_sod": 0.2048812359571457, - "loss_total": 0.8326541185379028, - "step": 16299 - }, - { - "epoch": 0.004598, - "loss_gen": 3.879422187805176, - "loss_rtd": 0.40844979882240295, - "loss_sent": 0.09611134231090546, - "loss_sod": 0.04968631640076637, - "loss_total": 0.5542474389076233, - "step": 16299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.2504940032958984, - "learning_rate": 8.803595677498341e-05, - "loss": 0.7247, + "epoch": 0.0326, + "grad_norm": 1.0969616174697876, + "learning_rate": 4.99357470223696e-05, + "loss": 1.2513, "step": 16300 }, { - "epoch": 0.004798, - "loss_gen": 3.7565174102783203, - "loss_rtd": 0.42008933424949646, - "loss_sent": 0.13713136315345764, - "loss_sod": 0.007490306627005339, - "loss_total": 0.5647109746932983, - "step": 16399 - }, - { - "epoch": 0.004798, - "loss_gen": 3.987405776977539, - "loss_rtd": 0.4281255900859833, - "loss_sent": 0.23581212759017944, - "loss_sod": 0.06856285035610199, - "loss_total": 0.7325005531311035, - "step": 16399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.352505087852478, - "learning_rate": 8.801535165419124e-05, - "loss": 0.7326, + "epoch": 0.0328, + "grad_norm": 1.026194453239441, + "learning_rate": 4.99346051663229e-05, + "loss": 1.2508, "step": 16400 }, { - "epoch": 0.004998, - "loss_gen": 4.19617223739624, - "loss_rtd": 0.40191999077796936, - "loss_sent": 0.1798119843006134, - "loss_sod": 0.09213796257972717, - "loss_total": 0.6738699674606323, - "step": 16499 - }, - { - "epoch": 0.004998, - "loss_gen": 3.9264073371887207, - "loss_rtd": 0.42804864048957825, - "loss_sent": 0.10941962897777557, - "loss_sod": 0.09465447813272476, - "loss_total": 0.6321227550506592, - "step": 16499 - }, - { - "epoch": 0.005, - "grad_norm": 1.0817756652832031, - "learning_rate": 8.799473122081999e-05, - "loss": 0.7278, + "epoch": 0.033, + "grad_norm": 1.1246017217636108, + "learning_rate": 4.993345326662023e-05, + "loss": 1.2538, "step": 16500 }, { - "epoch": 0.005198, - "loss_gen": 4.115394115447998, - "loss_rtd": 0.4185012876987457, - "loss_sent": 0.3424651622772217, - "loss_sod": 0.04793938249349594, - "loss_total": 0.808905839920044, - "step": 16599 - }, - { - "epoch": 0.005198, - "loss_gen": 3.8694446086883545, - "loss_rtd": 0.425730437040329, - "loss_sent": 0.35807597637176514, - "loss_sod": 0.08995547145605087, - "loss_total": 0.8737618923187256, - "step": 16599 - }, - { - "epoch": 0.0052, - "grad_norm": 3.3067049980163574, - "learning_rate": 8.797409548317555e-05, - "loss": 0.7214, + "epoch": 0.0332, + "grad_norm": 1.293093204498291, + "learning_rate": 4.993229132372557e-05, + "loss": 1.2236, "step": 16600 }, { - "epoch": 0.005398, - "loss_gen": 4.149951457977295, - "loss_rtd": 0.41732972860336304, - "loss_sent": 0.12850509583950043, - "loss_sod": 0.16574223339557648, - "loss_total": 0.7115770578384399, - "step": 16699 - }, - { - "epoch": 0.005398, - "loss_gen": 3.211554527282715, - "loss_rtd": 0.4234715402126312, - "loss_sent": 0.120115265250206, - "loss_sod": 0.2308167815208435, - "loss_total": 0.7744035720825195, - "step": 16699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.411970853805542, - "learning_rate": 8.795344444956998e-05, - "loss": 0.719, + "epoch": 0.0334, + "grad_norm": 1.208122730255127, + "learning_rate": 4.993111933810695e-05, + "loss": 1.2753, "step": 16700 }, { - "epoch": 0.005598, - "loss_gen": 2.7611095905303955, - "loss_rtd": 0.40610721707344055, - "loss_sent": 0.04416600242257118, - "loss_sod": 0.39784079790115356, - "loss_total": 0.8481140732765198, - "step": 16799 - }, - { - "epoch": 0.005598, - "loss_gen": 3.9168972969055176, - "loss_rtd": 0.3954412043094635, - "loss_sent": 0.38528358936309814, - "loss_sod": 0.04784262180328369, - "loss_total": 0.828567385673523, - "step": 16799 - }, - { - "epoch": 0.0056, - "grad_norm": 3.84426212310791, - "learning_rate": 8.793277812832153e-05, - "loss": 0.717, + "epoch": 0.0336, + "grad_norm": 1.073480248451233, + "learning_rate": 4.992993731023643e-05, + "loss": 1.2665, "step": 16800 }, { - "epoch": 0.005798, - "loss_gen": 2.4084560871124268, - "loss_rtd": 0.39277756214141846, - "loss_sent": 0.00010122371168108657, - "loss_sod": 0.40928834676742554, - "loss_total": 0.8021671175956726, - "step": 16899 - }, - { - "epoch": 0.005798, - "loss_gen": 2.609027624130249, - "loss_rtd": 0.40788689255714417, - "loss_sent": 0.021917378529906273, - "loss_sod": 0.16589200496673584, - "loss_total": 0.5956962704658508, - "step": 16899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.5878534913063049, - "learning_rate": 8.791209652775459e-05, - "loss": 0.7236, + "epoch": 0.0338, + "grad_norm": 1.4211028814315796, + "learning_rate": 4.9928745240590146e-05, + "loss": 1.2388, "step": 16900 }, { - "epoch": 0.005998, - "loss_gen": 4.018768787384033, - "loss_rtd": 0.42293936014175415, - "loss_sent": 0.12526118755340576, - "loss_sod": 0.08860775828361511, - "loss_total": 0.6368082761764526, - "step": 16999 - }, - { - "epoch": 0.005998, - "loss_gen": 3.610734462738037, - "loss_rtd": 0.42120155692100525, - "loss_sent": 0.18627098202705383, - "loss_sod": 0.08135885000228882, - "loss_total": 0.6888313293457031, - "step": 16999 - }, - { - "epoch": 0.006, - "grad_norm": 0.8335890769958496, - "learning_rate": 8.789139965619968e-05, - "loss": 0.7083, + "epoch": 0.034, + "grad_norm": 1.1787285804748535, + "learning_rate": 4.992754312964827e-05, + "loss": 1.2118, "step": 17000 }, { - "epoch": 0.006, - "eval_loss": 0.7154964804649353, - "eval_runtime": 151.3533, - "eval_samples_per_second": 102.033, - "eval_steps_per_second": 0.799, + "epoch": 0.034, + "eval_loss": 1.104814887046814, + "eval_runtime": 76.4454, + "eval_samples_per_second": 202.013, + "eval_steps_per_second": 3.166, "step": 17000 - }, - { - "epoch": 0.006198, - "loss_gen": 2.9088780879974365, - "loss_rtd": 0.4015282094478607, - "loss_sent": 0.03383695334196091, - "loss_sod": 0.19729620218276978, - "loss_total": 0.6326613426208496, - "step": 17099 - }, - { - "epoch": 0.006198, - "loss_gen": 3.805980920791626, - "loss_rtd": 0.41610702872276306, - "loss_sent": 0.3630307912826538, - "loss_sod": 0.033853236585855484, - "loss_total": 0.8129910230636597, - "step": 17099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.3783321380615234, - "learning_rate": 8.787068752199353e-05, - "loss": 0.731, - "step": 17100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.188808917999268, - "loss_rtd": 0.43117642402648926, - "loss_sent": 0.0706799104809761, - "loss_sod": 0.08008696883916855, - "loss_total": 0.5819432735443115, - "step": 17199 - }, - { - "epoch": 0.006398, - "loss_gen": 3.1088032722473145, - "loss_rtd": 0.42036527395248413, - "loss_sent": 0.04297754913568497, - "loss_sod": 0.12957589328289032, - "loss_total": 0.5929187536239624, - "step": 17199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.0201504230499268, - "learning_rate": 8.7849960133479e-05, - "loss": 0.719, - "step": 17200 - }, - { - "epoch": 0.006598, - "loss_gen": 3.864455223083496, - "loss_rtd": 0.41520410776138306, - "loss_sent": 0.37994444370269775, - "loss_sod": 0.05261944234371185, - "loss_total": 0.8477680087089539, - "step": 17299 - }, - { - "epoch": 0.006598, - "loss_gen": 3.6532490253448486, - "loss_rtd": 0.4261130690574646, - "loss_sent": 0.1113138422369957, - "loss_sod": 0.01775341108441353, - "loss_total": 0.5551803112030029, - "step": 17299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.0252410173416138, - "learning_rate": 8.782921749900501e-05, - "loss": 0.7198, - "step": 17300 - }, - { - "epoch": 0.006798, - "loss_gen": 3.8775410652160645, - "loss_rtd": 0.43246281147003174, - "loss_sent": 0.39332959055900574, - "loss_sod": 0.0777227133512497, - "loss_total": 0.903515100479126, - "step": 17399 - }, - { - "epoch": 0.006798, - "loss_gen": 3.4301466941833496, - "loss_rtd": 0.4230908453464508, - "loss_sent": 0.2612267732620239, - "loss_sod": 0.13481563329696655, - "loss_total": 0.8191332817077637, - "step": 17399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.6101447343826294, - "learning_rate": 8.780845962692677e-05, - "loss": 0.7277, - "step": 17400 - }, - { - "epoch": 0.006998, - "loss_gen": 3.934807777404785, - "loss_rtd": 0.42464226484298706, - "loss_sent": 0.15972498059272766, - "loss_sod": 0.1780788153409958, - "loss_total": 0.7624460458755493, - "step": 17499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.128389358520508, - "loss_rtd": 0.425221711397171, - "loss_sent": 0.13899770379066467, - "loss_sod": 0.045398104935884476, - "loss_total": 0.6096175312995911, - "step": 17499 - }, - { - "epoch": 0.007, - "grad_norm": 1.1656321287155151, - "learning_rate": 8.778768652560551e-05, - "loss": 0.7327, - "step": 17500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.306458473205566, - "loss_rtd": 0.4018927216529846, - "loss_sent": 0.27341505885124207, - "loss_sod": 0.028834078460931778, - "loss_total": 0.7041418552398682, - "step": 17599 - }, - { - "epoch": 0.007198, - "loss_gen": 3.6742680072784424, - "loss_rtd": 0.42321765422821045, - "loss_sent": 0.17562679946422577, - "loss_sod": 0.14957062900066376, - "loss_total": 0.7484151124954224, - "step": 17599 - }, - { - "epoch": 0.0072, - "grad_norm": 2.1263222694396973, - "learning_rate": 8.776689820340862e-05, - "loss": 0.7369, - "step": 17600 - }, - { - "epoch": 0.007398, - "loss_gen": 3.879549980163574, - "loss_rtd": 0.40929073095321655, - "loss_sent": 0.2122492492198944, - "loss_sod": 0.012041620910167694, - "loss_total": 0.6335816383361816, - "step": 17699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.083215236663818, - "loss_rtd": 0.4366784691810608, - "loss_sent": 0.16557861864566803, - "loss_sod": 0.016638852655887604, - "loss_total": 0.618895947933197, - "step": 17699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.6774116158485413, - "learning_rate": 8.774609466870966e-05, - "loss": 0.7327, - "step": 17700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.151714324951172, - "loss_rtd": 0.4287411570549011, - "loss_sent": 0.18538439273834229, - "loss_sod": 0.02048429846763611, - "loss_total": 0.6346098184585571, - "step": 17799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.426347732543945, - "loss_rtd": 0.42809152603149414, - "loss_sent": 0.19042737782001495, - "loss_sod": 0.07416960597038269, - "loss_total": 0.692688524723053, - "step": 17799 - }, - { - "epoch": 0.0076, - "grad_norm": 2.4827094078063965, - "learning_rate": 8.772527592988829e-05, - "loss": 0.7175, - "step": 17800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.004205703735352, - "loss_rtd": 0.41530540585517883, - "loss_sent": 0.16743431985378265, - "loss_sod": 0.18945026397705078, - "loss_total": 0.7721899747848511, - "step": 17899 - }, - { - "epoch": 0.007798, - "loss_gen": 3.50095534324646, - "loss_rtd": 0.4054555594921112, - "loss_sent": 0.00267047923989594, - "loss_sod": 0.399612694978714, - "loss_total": 0.807738721370697, - "step": 17899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.0332685708999634, - "learning_rate": 8.770444199533028e-05, - "loss": 0.7162, - "step": 17900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.209635257720947, - "loss_rtd": 0.39917030930519104, - "loss_sent": 0.11306151747703552, - "loss_sod": 0.03569847345352173, - "loss_total": 0.5479303002357483, - "step": 17999 - }, - { - "epoch": 0.007998, - "loss_gen": 3.763282299041748, - "loss_rtd": 0.4105561375617981, - "loss_sent": 0.1345166563987732, - "loss_sod": 0.04830792546272278, - "loss_total": 0.5933806896209717, - "step": 17999 - }, - { - "epoch": 0.008, - "grad_norm": 1.0159022808074951, - "learning_rate": 8.768359287342754e-05, - "loss": 0.7193, - "step": 18000 - }, - { - "epoch": 0.008, - "eval_loss": 0.7110997438430786, - "eval_runtime": 151.5607, - "eval_samples_per_second": 101.893, - "eval_steps_per_second": 0.798, - "step": 18000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.019419193267822, - "loss_rtd": 0.4001118540763855, - "loss_sent": 0.09835583716630936, - "loss_sod": 0.098896324634552, - "loss_total": 0.5973640084266663, - "step": 18099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.136058807373047, - "loss_rtd": 0.42017704248428345, - "loss_sent": 0.17160357534885406, - "loss_sod": 0.02384444698691368, - "loss_total": 0.6156250834465027, - "step": 18099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.8614924550056458, - "learning_rate": 8.766272857257808e-05, - "loss": 0.7178, - "step": 18100 - }, - { - "epoch": 0.008398, - "loss_gen": 3.8973662853240967, - "loss_rtd": 0.4116802215576172, - "loss_sent": 0.22857199609279633, - "loss_sod": 0.08869585394859314, - "loss_total": 0.7289481163024902, - "step": 18199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.000563621520996, - "loss_rtd": 0.4114419221878052, - "loss_sent": 0.11508604139089584, - "loss_sod": 0.019892679527401924, - "loss_total": 0.5464206337928772, - "step": 18199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.9504892826080322, - "learning_rate": 8.764184910118609e-05, - "loss": 0.7141, - "step": 18200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.2602105140686035, - "loss_rtd": 0.4248410165309906, - "loss_sent": 0.1012086570262909, - "loss_sod": 0.13853920996189117, - "loss_total": 0.6645889282226562, - "step": 18299 - }, - { - "epoch": 0.008598, - "loss_gen": 2.558227062225342, - "loss_rtd": 0.39907151460647583, - "loss_sent": 0.007993497885763645, - "loss_sod": 0.31753554940223694, - "loss_total": 0.724600613117218, - "step": 18299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.2532877922058105, - "learning_rate": 8.762095446766176e-05, - "loss": 0.7124, - "step": 18300 - }, - { - "epoch": 0.008798, - "loss_gen": 3.8672940731048584, - "loss_rtd": 0.41844481229782104, - "loss_sent": 0.2934986352920532, - "loss_sod": 0.10334864258766174, - "loss_total": 0.8152921199798584, - "step": 18399 - }, - { - "epoch": 0.008798, - "loss_gen": 3.9968318939208984, - "loss_rtd": 0.40939950942993164, - "loss_sent": 0.17195989191532135, - "loss_sod": 0.03714260458946228, - "loss_total": 0.6185020208358765, - "step": 18399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.0843919515609741, - "learning_rate": 8.760004468042147e-05, - "loss": 0.7133, - "step": 18400 - }, - { - "epoch": 0.008998, - "loss_gen": 3.860063314437866, - "loss_rtd": 0.4077662527561188, - "loss_sent": 0.22858792543411255, - "loss_sod": 0.07041435688734055, - "loss_total": 0.7067685127258301, - "step": 18499 - }, - { - "epoch": 0.008998, - "loss_gen": 3.951281785964966, - "loss_rtd": 0.4170241951942444, - "loss_sent": 0.1347436010837555, - "loss_sod": 0.02688794955611229, - "loss_total": 0.5786557197570801, - "step": 18499 - }, - { - "epoch": 0.009, - "grad_norm": 0.7378911375999451, - "learning_rate": 8.75791197478877e-05, - "loss": 0.7178, - "step": 18500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.030672073364258, - "loss_rtd": 0.42401623725891113, - "loss_sent": 0.0964384451508522, - "loss_sod": 0.010999690741300583, - "loss_total": 0.5314543843269348, - "step": 18599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.139626502990723, - "loss_rtd": 0.4047938287258148, - "loss_sent": 0.2407665252685547, - "loss_sod": 0.09544012695550919, - "loss_total": 0.7410004734992981, - "step": 18599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.2465193271636963, - "learning_rate": 8.755817967848897e-05, - "loss": 0.7071, - "step": 18600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.339874744415283, - "loss_rtd": 0.425040602684021, - "loss_sent": 0.16886325180530548, - "loss_sod": 0.03933661803603172, - "loss_total": 0.6332404613494873, - "step": 18699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.066360950469971, - "loss_rtd": 0.42420873045921326, - "loss_sent": 0.385574609041214, - "loss_sod": 0.0640186294913292, - "loss_total": 0.8738019466400146, - "step": 18699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.517830491065979, - "learning_rate": 8.753722448065996e-05, - "loss": 0.713, - "step": 18700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.080881595611572, - "loss_rtd": 0.4158501625061035, - "loss_sent": 0.04737195372581482, - "loss_sod": 0.004243885166943073, - "loss_total": 0.46746599674224854, - "step": 18799 - }, - { - "epoch": 0.009598, - "loss_gen": 2.9473581314086914, - "loss_rtd": 0.40688350796699524, - "loss_sent": 0.005198832601308823, - "loss_sod": 0.21468880772590637, - "loss_total": 0.6267711520195007, - "step": 18799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.9139093160629272, - "learning_rate": 8.751625416284142e-05, - "loss": 0.7025, - "step": 18800 - }, - { - "epoch": 0.009798, - "loss_gen": 2.641028642654419, - "loss_rtd": 0.3995615839958191, - "loss_sent": 0.00015060137957334518, - "loss_sod": 0.3031446635723114, - "loss_total": 0.7028568387031555, - "step": 18899 - }, - { - "epoch": 0.009798, - "loss_gen": 3.585927724838257, - "loss_rtd": 0.41310915350914, - "loss_sent": 0.03582323342561722, - "loss_sod": 0.17621608078479767, - "loss_total": 0.6251484751701355, - "step": 18899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.656723141670227, - "learning_rate": 8.74952687334802e-05, - "loss": 0.7155, - "step": 18900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.061804294586182, - "loss_rtd": 0.42302989959716797, - "loss_sent": 0.1835642009973526, - "loss_sod": 0.05181720480322838, - "loss_total": 0.6584113240242004, - "step": 18999 - }, - { - "epoch": 0.009998, - "loss_gen": 3.725508451461792, - "loss_rtd": 0.4165676534175873, - "loss_sent": 0.12800826132297516, - "loss_sod": 0.18614144623279572, - "loss_total": 0.7307173609733582, - "step": 18999 - }, - { - "epoch": 0.01, - "grad_norm": 1.5900828838348389, - "learning_rate": 8.74742682010292e-05, - "loss": 0.7148, - "step": 19000 - }, - { - "epoch": 0.01, - "eval_loss": 0.7048465609550476, - "eval_runtime": 151.8862, - "eval_samples_per_second": 101.675, - "eval_steps_per_second": 0.797, - "step": 19000 - }, - { - "epoch": 0.010198, - "loss_gen": 3.7057571411132812, - "loss_rtd": 0.4221038520336151, - "loss_sent": 0.16967017948627472, - "loss_sod": 0.1264842003583908, - "loss_total": 0.7182582020759583, - "step": 19099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.596755027770996, - "loss_rtd": 0.4268624186515808, - "loss_sent": 0.12956024706363678, - "loss_sod": 0.1693856567144394, - "loss_total": 0.725808322429657, - "step": 19099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.438319444656372, - "learning_rate": 8.745325257394747e-05, - "loss": 0.723, - "step": 19100 - }, - { - "epoch": 0.010398, - "loss_gen": 3.9707088470458984, - "loss_rtd": 0.4285086989402771, - "loss_sent": 0.14790266752243042, - "loss_sod": 0.027327165007591248, - "loss_total": 0.60373854637146, - "step": 19199 - }, - { - "epoch": 0.010398, - "loss_gen": 3.938842296600342, - "loss_rtd": 0.39713433384895325, - "loss_sent": 0.18230196833610535, - "loss_sod": 0.0404755175113678, - "loss_total": 0.619911789894104, - "step": 19199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.9801616072654724, - "learning_rate": 8.743222186070006e-05, - "loss": 0.7152, - "step": 19200 - }, - { - "epoch": 0.010598, - "loss_gen": 3.9666779041290283, - "loss_rtd": 0.40703949332237244, - "loss_sent": 0.35698920488357544, - "loss_sod": 0.11823119223117828, - "loss_total": 0.8822599053382874, - "step": 19299 - }, - { - "epoch": 0.010598, - "loss_gen": 2.5196690559387207, - "loss_rtd": 0.3921911418437958, - "loss_sent": 0.030794350430369377, - "loss_sod": 0.2004784196615219, - "loss_total": 0.6234638690948486, - "step": 19299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.678354263305664, - "learning_rate": 8.741117606975817e-05, - "loss": 0.7391, - "step": 19300 - }, - { - "epoch": 0.010798, - "loss_gen": 3.5528931617736816, - "loss_rtd": 0.4105856716632843, - "loss_sent": 0.5260259509086609, - "loss_sod": 0.04325145110487938, - "loss_total": 0.9798630475997925, - "step": 19399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.052915096282959, - "loss_rtd": 0.4151667654514313, - "loss_sent": 0.2871251106262207, - "loss_sod": 0.030132891610264778, - "loss_total": 0.7324247360229492, - "step": 19399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.2643232345581055, - "learning_rate": 8.739011520959904e-05, - "loss": 0.713, - "step": 19400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.023740768432617, - "loss_rtd": 0.40644070506095886, - "loss_sent": 0.2646462321281433, - "loss_sod": 0.09764538705348969, - "loss_total": 0.7687323093414307, - "step": 19499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.029313564300537, - "loss_rtd": 0.4271722137928009, - "loss_sent": 0.2032327502965927, - "loss_sod": 0.027567539364099503, - "loss_total": 0.657972514629364, - "step": 19499 - }, - { - "epoch": 0.011, - "grad_norm": 1.3250114917755127, - "learning_rate": 8.736903928870597e-05, - "loss": 0.7128, - "step": 19500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.008549690246582, - "loss_rtd": 0.4227379560470581, - "loss_sent": 0.20236265659332275, - "loss_sod": 0.052655987441539764, - "loss_total": 0.6777566075325012, - "step": 19599 - }, - { - "epoch": 0.011198, - "loss_gen": 3.985772132873535, - "loss_rtd": 0.43641579151153564, - "loss_sent": 0.27415043115615845, - "loss_sod": 0.19595825672149658, - "loss_total": 0.9065244793891907, - "step": 19599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.8043322563171387, - "learning_rate": 8.734794831556834e-05, - "loss": 0.7076, - "step": 19600 - }, - { - "epoch": 0.011398, - "loss_gen": 4.039675235748291, - "loss_rtd": 0.41741177439689636, - "loss_sent": 0.4203322231769562, - "loss_sod": 0.08601180464029312, - "loss_total": 0.9237557649612427, - "step": 19699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.08049201965332, - "loss_rtd": 0.4036373198032379, - "loss_sent": 0.43353450298309326, - "loss_sod": 0.0564105249941349, - "loss_total": 0.8935823440551758, - "step": 19699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.7929366827011108, - "learning_rate": 8.73268422986816e-05, - "loss": 0.7175, - "step": 19700 - }, - { - "epoch": 0.011598, - "loss_gen": 2.2888705730438232, - "loss_rtd": 0.4113491177558899, - "loss_sent": 0.01757943071424961, - "loss_sod": 0.2767329812049866, - "loss_total": 0.7056615352630615, - "step": 19799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.463761329650879, - "loss_rtd": 0.42178013920783997, - "loss_sent": 0.12542349100112915, - "loss_sod": 0.08447619527578354, - "loss_total": 0.6316798329353333, - "step": 19799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.8132031559944153, - "learning_rate": 8.730572124654725e-05, - "loss": 0.7143, - "step": 19800 - }, - { - "epoch": 0.011798, - "loss_gen": 3.2291722297668457, - "loss_rtd": 0.40131255984306335, - "loss_sent": 0.05474870651960373, - "loss_sod": 0.13893182575702667, - "loss_total": 0.5949931144714355, - "step": 19899 - }, - { - "epoch": 0.011798, - "loss_gen": 2.4490771293640137, - "loss_rtd": 0.3775515854358673, - "loss_sent": 0.06087493896484375, - "loss_sod": 0.21542909741401672, - "loss_total": 0.6538556218147278, - "step": 19899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.6356754899024963, - "learning_rate": 8.728458516767284e-05, - "loss": 0.7232, - "step": 19900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.252781391143799, - "loss_rtd": 0.39302095770835876, - "loss_sent": 0.03697744384407997, - "loss_sod": 0.034853093326091766, - "loss_total": 0.4648514986038208, - "step": 19999 - }, - { - "epoch": 0.011998, - "loss_gen": 3.7617712020874023, - "loss_rtd": 0.40322282910346985, - "loss_sent": 0.1799575388431549, - "loss_sod": 0.02489660680294037, - "loss_total": 0.6080769300460815, - "step": 19999 - }, - { - "epoch": 0.012, - "grad_norm": 0.8684195876121521, - "learning_rate": 8.726343407057197e-05, - "loss": 0.7041, - "step": 20000 - }, - { - "epoch": 0.012, - "eval_loss": 0.7028465270996094, - "eval_runtime": 151.5434, - "eval_samples_per_second": 101.905, - "eval_steps_per_second": 0.798, - "step": 20000 - }, - { - "epoch": 0.012198, - "loss_gen": 3.716078519821167, - "loss_rtd": 0.4142749011516571, - "loss_sent": 0.1574735790491104, - "loss_sod": 0.011068914085626602, - "loss_total": 0.5828173756599426, - "step": 20099 - }, - { - "epoch": 0.012198, - "loss_gen": 3.796159505844116, - "loss_rtd": 0.409680038690567, - "loss_sent": 0.032792288810014725, - "loss_sod": 0.1799483299255371, - "loss_total": 0.622420608997345, - "step": 20099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.7984619140625, - "learning_rate": 8.724226796376433e-05, - "loss": 0.711, - "step": 20100 - }, - { - "epoch": 0.012398, - "loss_gen": 3.9472317695617676, - "loss_rtd": 0.40650129318237305, - "loss_sent": 0.08266563713550568, - "loss_sod": 0.045177169144153595, - "loss_total": 0.5343440771102905, - "step": 20199 - }, - { - "epoch": 0.012398, - "loss_gen": 2.4588019847869873, - "loss_rtd": 0.38946664333343506, - "loss_sent": 0.0076849148608744144, - "loss_sod": 0.44422250986099243, - "loss_total": 0.8413740396499634, - "step": 20199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.0634772777557373, - "learning_rate": 8.72210868557756e-05, - "loss": 0.6964, - "step": 20200 - }, - { - "epoch": 0.012598, - "loss_gen": 2.4628233909606934, - "loss_rtd": 0.3977556526660919, - "loss_sent": 9.53001290326938e-05, - "loss_sod": 0.35795658826828003, - "loss_total": 0.7558075785636902, - "step": 20299 - }, - { - "epoch": 0.012598, - "loss_gen": 2.3897507190704346, - "loss_rtd": 0.4025641083717346, - "loss_sent": 0.001091538928449154, - "loss_sod": 0.42349517345428467, - "loss_total": 0.827150821685791, - "step": 20299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.2861807346343994, - "learning_rate": 8.719989075513753e-05, - "loss": 0.711, - "step": 20300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.174057960510254, - "loss_rtd": 0.4290989935398102, - "loss_sent": 0.09670135378837585, - "loss_sod": 0.023639153689146042, - "loss_total": 0.5494394898414612, - "step": 20399 - }, - { - "epoch": 0.012798, - "loss_gen": 3.941239356994629, - "loss_rtd": 0.3997499644756317, - "loss_sent": 0.12611424922943115, - "loss_sod": 0.07376294583082199, - "loss_total": 0.5996271371841431, - "step": 20399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.0112621784210205, - "learning_rate": 8.717867967038792e-05, - "loss": 0.718, - "step": 20400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.399020195007324, - "loss_rtd": 0.4018489420413971, - "loss_sent": 0.21433569490909576, - "loss_sod": 0.06530171632766724, - "loss_total": 0.6814863681793213, - "step": 20499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.367480754852295, - "loss_rtd": 0.41640469431877136, - "loss_sent": 0.32281652092933655, - "loss_sod": 0.06447701901197433, - "loss_total": 0.8036982417106628, - "step": 20499 - }, - { - "epoch": 0.013, - "grad_norm": 1.4023051261901855, - "learning_rate": 8.715745361007059e-05, - "loss": 0.7078, - "step": 20500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.183026313781738, - "loss_rtd": 0.40223562717437744, - "loss_sent": 0.1258079558610916, - "loss_sod": 0.11566033959388733, - "loss_total": 0.6437038779258728, - "step": 20599 - }, - { - "epoch": 0.013198, - "loss_gen": 3.8825387954711914, - "loss_rtd": 0.4105805456638336, - "loss_sent": 0.21834413707256317, - "loss_sod": 0.037197474390268326, - "loss_total": 0.6661221385002136, - "step": 20599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.2524880170822144, - "learning_rate": 8.713621258273538e-05, - "loss": 0.7103, - "step": 20600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.011019229888916, - "loss_rtd": 0.39504221081733704, - "loss_sent": 0.108099065721035, - "loss_sod": 0.01876910775899887, - "loss_total": 0.5219103693962097, - "step": 20699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.195362091064453, - "loss_rtd": 0.39853763580322266, - "loss_sent": 0.3297787606716156, - "loss_sod": 0.0631496012210846, - "loss_total": 0.7914659976959229, - "step": 20699 - }, - { - "epoch": 0.0134, - "grad_norm": 2.073709726333618, - "learning_rate": 8.711495659693818e-05, - "loss": 0.7065, - "step": 20700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.199145317077637, - "loss_rtd": 0.4101455807685852, - "loss_sent": 0.1480754017829895, - "loss_sod": 0.08632524311542511, - "loss_total": 0.6445462107658386, - "step": 20799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.125156879425049, - "loss_rtd": 0.3986564576625824, - "loss_sent": 0.11776266992092133, - "loss_sod": 0.03158608078956604, - "loss_total": 0.548005223274231, - "step": 20799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.2252863645553589, - "learning_rate": 8.709368566124091e-05, - "loss": 0.7105, - "step": 20800 - }, - { - "epoch": 0.013798, - "loss_gen": 3.4146010875701904, - "loss_rtd": 0.3956667482852936, - "loss_sent": 0.16512219607830048, - "loss_sod": 0.10856941342353821, - "loss_total": 0.6693583726882935, - "step": 20899 - }, - { - "epoch": 0.013798, - "loss_gen": 3.9524085521698, - "loss_rtd": 0.39170873165130615, - "loss_sent": 0.3147831857204437, - "loss_sod": 0.01406177319586277, - "loss_total": 0.7205536961555481, - "step": 20899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.7617398500442505, - "learning_rate": 8.70723997842115e-05, - "loss": 0.7165, - "step": 20900 - }, - { - "epoch": 0.013998, - "loss_gen": 2.8314666748046875, - "loss_rtd": 0.3838663399219513, - "loss_sent": 0.09685777127742767, - "loss_sod": 0.12291301041841507, - "loss_total": 0.603637158870697, - "step": 20999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.050356864929199, - "loss_rtd": 0.41659289598464966, - "loss_sent": 0.16024036705493927, - "loss_sod": 0.0964965671300888, - "loss_total": 0.6733298301696777, - "step": 20999 - }, - { - "epoch": 0.014, - "grad_norm": 0.82136070728302, - "learning_rate": 8.705109897442388e-05, - "loss": 0.6817, - "step": 21000 - }, - { - "epoch": 0.014, - "eval_loss": 0.6961836218833923, - "eval_runtime": 151.3756, - "eval_samples_per_second": 102.018, - "eval_steps_per_second": 0.799, - "step": 21000 - }, - { - "epoch": 0.014198, - "loss_gen": 4.205439567565918, - "loss_rtd": 0.390224426984787, - "loss_sent": 0.43894219398498535, - "loss_sod": 0.0434480756521225, - "loss_total": 0.8726146817207336, - "step": 21099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.209077835083008, - "loss_rtd": 0.4092998206615448, - "loss_sent": 0.49253588914871216, - "loss_sod": 0.06910588592290878, - "loss_total": 0.9709416031837463, - "step": 21099 - }, - { - "epoch": 0.0142, - "grad_norm": 2.717113971710205, - "learning_rate": 8.7029783240458e-05, - "loss": 0.7054, - "step": 21100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.044052600860596, - "loss_rtd": 0.4245973527431488, - "loss_sent": 0.14878441393375397, - "loss_sod": 0.052621856331825256, - "loss_total": 0.626003623008728, - "step": 21199 - }, - { - "epoch": 0.014398, - "loss_gen": 3.825176954269409, - "loss_rtd": 0.4175969064235687, - "loss_sent": 0.2602907419204712, - "loss_sod": 0.1315835416316986, - "loss_total": 0.8094711899757385, - "step": 21199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.7577776312828064, - "learning_rate": 8.700845259089989e-05, - "loss": 0.7134, - "step": 21200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.306161880493164, - "loss_rtd": 0.3955383002758026, - "loss_sent": 0.047602277249097824, - "loss_sod": 0.011172058060765266, - "loss_total": 0.45431262254714966, - "step": 21299 - }, - { - "epoch": 0.014598, - "loss_gen": 3.892268657684326, - "loss_rtd": 0.4082190692424774, - "loss_sent": 0.1741885244846344, - "loss_sod": 0.15817657113075256, - "loss_total": 0.740584135055542, - "step": 21299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.9689379930496216, - "learning_rate": 8.69871070343415e-05, - "loss": 0.6951, - "step": 21300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.0580854415893555, - "loss_rtd": 0.3923904299736023, - "loss_sent": 0.26663270592689514, - "loss_sod": 0.011745485477149487, - "loss_total": 0.6707686185836792, - "step": 21399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.141996383666992, - "loss_rtd": 0.41980284452438354, - "loss_sent": 0.2592174708843231, - "loss_sod": 0.053839970380067825, - "loss_total": 0.7328603267669678, - "step": 21399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.1764981746673584, - "learning_rate": 8.696574657938081e-05, - "loss": 0.7015, - "step": 21400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.1991496086120605, - "loss_rtd": 0.40171363949775696, - "loss_sent": 0.17878541350364685, - "loss_sod": 0.10786039382219315, - "loss_total": 0.6883594989776611, - "step": 21499 - }, - { - "epoch": 0.014998, - "loss_gen": 3.7081212997436523, - "loss_rtd": 0.41223984956741333, - "loss_sent": 0.08660165965557098, - "loss_sod": 0.032381195574998856, - "loss_total": 0.5312227010726929, - "step": 21499 - }, - { - "epoch": 0.015, - "grad_norm": 0.9278017282485962, - "learning_rate": 8.694437123462182e-05, - "loss": 0.715, - "step": 21500 - }, - { - "epoch": 0.015198, - "loss_gen": 3.3395333290100098, - "loss_rtd": 0.399366557598114, - "loss_sent": 0.10116645693778992, - "loss_sod": 0.21150001883506775, - "loss_total": 0.7120330333709717, - "step": 21599 - }, - { - "epoch": 0.015198, - "loss_gen": 2.31325364112854, - "loss_rtd": 0.37933480739593506, - "loss_sent": 0.0013232976198196411, - "loss_sod": 0.27269241213798523, - "loss_total": 0.6533505320549011, - "step": 21599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.8672342300415039, - "learning_rate": 8.692298100867453e-05, - "loss": 0.7099, - "step": 21600 - }, - { - "epoch": 0.015398, - "loss_gen": 3.8823935985565186, - "loss_rtd": 0.4169360399246216, - "loss_sent": 0.088588185608387, - "loss_sod": 0.053140271455049515, - "loss_total": 0.5586645007133484, - "step": 21699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.013762950897217, - "loss_rtd": 0.43377217650413513, - "loss_sent": 0.27658548951148987, - "loss_sod": 0.09921903163194656, - "loss_total": 0.8095767498016357, - "step": 21699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.9942051768302917, - "learning_rate": 8.690157591015489e-05, - "loss": 0.7113, - "step": 21700 - }, - { - "epoch": 0.015598, - "loss_gen": 3.874941110610962, - "loss_rtd": 0.4076584577560425, - "loss_sent": 0.19729302823543549, - "loss_sod": 0.0727071613073349, - "loss_total": 0.6776586771011353, - "step": 21799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.1540727615356445, - "loss_rtd": 0.42252597212791443, - "loss_sent": 0.11478852480649948, - "loss_sod": 0.11771350353956223, - "loss_total": 0.6550279855728149, - "step": 21799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.6577136516571045, - "learning_rate": 8.688015594768488e-05, - "loss": 0.6966, - "step": 21800 - }, - { - "epoch": 0.015798, - "loss_gen": 3.9389901161193848, - "loss_rtd": 0.4259053170681, - "loss_sent": 0.17768579721450806, - "loss_sod": 0.04217896610498428, - "loss_total": 0.6457700729370117, - "step": 21899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.207218647003174, - "loss_rtd": 0.39804330468177795, - "loss_sent": 0.21022315323352814, - "loss_sod": 0.09626258909702301, - "loss_total": 0.7045290470123291, - "step": 21899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.9212267994880676, - "learning_rate": 8.685872112989248e-05, - "loss": 0.7099, - "step": 21900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.139245510101318, - "loss_rtd": 0.4145708382129669, - "loss_sent": 0.17489010095596313, - "loss_sod": 0.03991694003343582, - "loss_total": 0.6293778419494629, - "step": 21999 - }, - { - "epoch": 0.015998, - "loss_gen": 4.0036516189575195, - "loss_rtd": 0.4070430397987366, - "loss_sent": 0.19366881251335144, - "loss_sod": 0.04858838766813278, - "loss_total": 0.649300217628479, - "step": 21999 - }, - { - "epoch": 0.016, - "grad_norm": 0.8448497653007507, - "learning_rate": 8.68372714654116e-05, - "loss": 0.7183, - "step": 22000 - }, - { - "epoch": 0.016, - "eval_loss": 0.6913487315177917, - "eval_runtime": 151.5751, - "eval_samples_per_second": 101.883, - "eval_steps_per_second": 0.798, - "step": 22000 - }, - { - "epoch": 0.016198, - "loss_gen": 3.783210277557373, - "loss_rtd": 0.41268104314804077, - "loss_sent": 0.13717064261436462, - "loss_sod": 0.015744447708129883, - "loss_total": 0.5655961036682129, - "step": 22099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.48651647567749, - "loss_rtd": 0.4079062342643738, - "loss_sent": 0.03336573764681816, - "loss_sod": 0.272824764251709, - "loss_total": 0.71409672498703, - "step": 22099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.8137263655662537, - "learning_rate": 8.681580696288219e-05, - "loss": 0.7106, - "step": 22100 - }, - { - "epoch": 0.016398, - "loss_gen": 3.734351634979248, - "loss_rtd": 0.39496752619743347, - "loss_sent": 0.3425740599632263, - "loss_sod": 0.032268062233924866, - "loss_total": 0.7698096632957458, - "step": 22199 - }, - { - "epoch": 0.016398, - "loss_gen": 2.9667320251464844, - "loss_rtd": 0.41361331939697266, - "loss_sent": 0.007793530356138945, - "loss_sod": 0.19353321194648743, - "loss_total": 0.6149400472640991, - "step": 22199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.9939231276512146, - "learning_rate": 8.679432763095014e-05, - "loss": 0.7061, - "step": 22200 - }, - { - "epoch": 0.016598, - "loss_gen": 3.9864869117736816, - "loss_rtd": 0.41875556111335754, - "loss_sent": 0.29814907908439636, - "loss_sod": 0.05030648410320282, - "loss_total": 0.7672110795974731, - "step": 22299 - }, - { - "epoch": 0.016598, - "loss_gen": 3.6811013221740723, - "loss_rtd": 0.3975054621696472, - "loss_sent": 0.06244634836912155, - "loss_sod": 0.13688090443611145, - "loss_total": 0.5968327522277832, - "step": 22299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8836981654167175, - "learning_rate": 8.677283347826732e-05, - "loss": 0.7077, - "step": 22300 - }, - { - "epoch": 0.016798, - "loss_gen": 3.832756519317627, - "loss_rtd": 0.4089447259902954, - "loss_sent": 0.3234836757183075, - "loss_sod": 0.006607224233448505, - "loss_total": 0.7390356063842773, - "step": 22399 - }, - { - "epoch": 0.016798, - "loss_gen": 3.8547472953796387, - "loss_rtd": 0.39647242426872253, - "loss_sent": 0.15630517899990082, - "loss_sod": 0.08985313773155212, - "loss_total": 0.6426307559013367, - "step": 22399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.8587817549705505, - "learning_rate": 8.675132451349157e-05, - "loss": 0.693, - "step": 22400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.252213954925537, - "loss_rtd": 0.40426650643348694, - "loss_sent": 0.31181442737579346, - "loss_sod": 0.09723158180713654, - "loss_total": 0.8133125305175781, - "step": 22499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.409897804260254, - "loss_rtd": 0.40113937854766846, - "loss_sent": 0.240126371383667, - "loss_sod": 0.06067778170108795, - "loss_total": 0.7019435167312622, - "step": 22499 - }, - { - "epoch": 0.017, - "grad_norm": 1.4950565099716187, - "learning_rate": 8.67298007452867e-05, - "loss": 0.7183, - "step": 22500 - }, - { - "epoch": 0.017198, - "loss_gen": 2.8861172199249268, - "loss_rtd": 0.40829846262931824, - "loss_sent": 0.057722508907318115, - "loss_sod": 0.20995157957077026, - "loss_total": 0.6759725213050842, - "step": 22599 - }, - { - "epoch": 0.017198, - "loss_gen": 4.093786716461182, - "loss_rtd": 0.4014229476451874, - "loss_sent": 0.535052478313446, - "loss_sod": 0.021234866231679916, - "loss_total": 0.9577102661132812, - "step": 22599 - }, - { - "epoch": 0.0172, - "grad_norm": 2.5805695056915283, - "learning_rate": 8.670826218232248e-05, - "loss": 0.6747, - "step": 22600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.194330215454102, - "loss_rtd": 0.3959496319293976, - "loss_sent": 0.21887068450450897, - "loss_sod": 0.055604949593544006, - "loss_total": 0.670425295829773, - "step": 22699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.2793169021606445, - "loss_rtd": 0.39640936255455017, - "loss_sent": 0.27822908759117126, - "loss_sod": 0.029160842299461365, - "loss_total": 0.7037992477416992, - "step": 22699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.0775412321090698, - "learning_rate": 8.668670883327466e-05, - "loss": 0.7011, - "step": 22700 - }, - { - "epoch": 0.017598, - "loss_gen": 3.990032196044922, - "loss_rtd": 0.40130147337913513, - "loss_sent": 0.03100929781794548, - "loss_sod": 0.04583095759153366, - "loss_total": 0.478141725063324, - "step": 22799 - }, - { - "epoch": 0.017598, - "loss_gen": 3.3408172130584717, - "loss_rtd": 0.4124903976917267, - "loss_sent": 0.001292606582865119, - "loss_sod": 0.3106074631214142, - "loss_total": 0.72439044713974, - "step": 22799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.9182292222976685, - "learning_rate": 8.666514070682489e-05, - "loss": 0.6921, - "step": 22800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.052070140838623, - "loss_rtd": 0.41997236013412476, - "loss_sent": 0.4130243957042694, - "loss_sod": 0.05000931769609451, - "loss_total": 0.8830060958862305, - "step": 22899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.178469181060791, - "loss_rtd": 0.39809826016426086, - "loss_sent": 0.18564869463443756, - "loss_sod": 0.11889810860157013, - "loss_total": 0.7026450634002686, - "step": 22899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.1307878494262695, - "learning_rate": 8.664355781166084e-05, - "loss": 0.6988, - "step": 22900 - }, - { - "epoch": 0.017998, - "loss_gen": 3.903327226638794, - "loss_rtd": 0.3989095091819763, - "loss_sent": 0.24173308908939362, - "loss_sod": 0.0987914428114891, - "loss_total": 0.7394340634346008, - "step": 22999 - }, - { - "epoch": 0.017998, - "loss_gen": 3.99613356590271, - "loss_rtd": 0.4007818400859833, - "loss_sent": 0.42962703108787537, - "loss_sod": 0.2058112770318985, - "loss_total": 1.0362201929092407, - "step": 22999 - }, - { - "epoch": 0.018, - "grad_norm": 1.4807555675506592, - "learning_rate": 8.662196015647608e-05, - "loss": 0.699, - "step": 23000 - }, - { - "epoch": 0.018, - "eval_loss": 0.6816897988319397, - "eval_runtime": 151.3149, - "eval_samples_per_second": 102.059, - "eval_steps_per_second": 0.8, - "step": 23000 - }, - { - "epoch": 0.018198, - "loss_gen": 3.8977420330047607, - "loss_rtd": 0.39817938208580017, - "loss_sent": 0.12031625211238861, - "loss_sod": 0.02363927662372589, - "loss_total": 0.5421349406242371, - "step": 23099 - }, - { - "epoch": 0.018198, - "loss_gen": 3.8846075534820557, - "loss_rtd": 0.4139736592769623, - "loss_sent": 0.09967810660600662, - "loss_sod": 0.03566112369298935, - "loss_total": 0.5493128895759583, - "step": 23099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.8791826963424683, - "learning_rate": 8.660034774997014e-05, - "loss": 0.686, - "step": 23100 - }, - { - "epoch": 0.018398, - "loss_gen": 4.103387832641602, - "loss_rtd": 0.39818358421325684, - "loss_sent": 0.24097369611263275, - "loss_sod": 0.0187580157071352, - "loss_total": 0.6579152941703796, - "step": 23199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.216580867767334, - "loss_rtd": 0.39543870091438293, - "loss_sent": 0.4009850323200226, - "loss_sod": 0.06736785918474197, - "loss_total": 0.8637915849685669, - "step": 23199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.7484025955200195, - "learning_rate": 8.657872060084852e-05, - "loss": 0.7072, - "step": 23200 - }, - { - "epoch": 0.018598, - "loss_gen": 3.958742618560791, - "loss_rtd": 0.4209873080253601, - "loss_sent": 0.22494757175445557, - "loss_sod": 0.033440593630075455, - "loss_total": 0.6793754696846008, - "step": 23299 - }, - { - "epoch": 0.018598, - "loss_gen": 3.8709557056427, - "loss_rtd": 0.4054337441921234, - "loss_sent": 0.12235992401838303, - "loss_sod": 0.0359024778008461, - "loss_total": 0.5636961460113525, - "step": 23299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.2961993217468262, - "learning_rate": 8.65570787178226e-05, - "loss": 0.7079, - "step": 23300 - }, - { - "epoch": 0.018798, - "loss_gen": 3.9349918365478516, - "loss_rtd": 0.40120649337768555, - "loss_sent": 0.1885022073984146, - "loss_sod": 0.012976760044693947, - "loss_total": 0.6026854515075684, - "step": 23399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.091781139373779, - "loss_rtd": 0.40365511178970337, - "loss_sent": 0.2327946424484253, - "loss_sod": 0.06162922829389572, - "loss_total": 0.698078989982605, - "step": 23399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.784354567527771, - "learning_rate": 8.653542210960975e-05, - "loss": 0.7064, - "step": 23400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.1265716552734375, - "loss_rtd": 0.3895909786224365, - "loss_sent": 0.1206795871257782, - "loss_sod": 0.17931661009788513, - "loss_total": 0.6895872354507446, - "step": 23499 - }, - { - "epoch": 0.018998, - "loss_gen": 3.7460741996765137, - "loss_rtd": 0.4037736654281616, - "loss_sent": 0.25270503759384155, - "loss_sod": 0.07054421305656433, - "loss_total": 0.7270228862762451, - "step": 23499 - }, - { - "epoch": 0.019, - "grad_norm": 1.0526052713394165, - "learning_rate": 8.651375078493325e-05, - "loss": 0.6982, - "step": 23500 - }, - { - "epoch": 0.019198, - "loss_gen": 3.9294276237487793, - "loss_rtd": 0.4179944694042206, - "loss_sent": 0.4713047742843628, - "loss_sod": 0.07345118373632431, - "loss_total": 0.9627504348754883, - "step": 23599 - }, - { - "epoch": 0.019198, - "loss_gen": 3.983640193939209, - "loss_rtd": 0.3858213424682617, - "loss_sent": 0.08541528880596161, - "loss_sod": 0.12690787017345428, - "loss_total": 0.59814453125, - "step": 23599 - }, - { - "epoch": 0.0192, - "grad_norm": 2.1285250186920166, - "learning_rate": 8.64920647525223e-05, - "loss": 0.6978, - "step": 23600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.192156791687012, - "loss_rtd": 0.4084530770778656, - "loss_sent": 0.38544365763664246, - "loss_sod": 0.018873078748583794, - "loss_total": 0.8127697706222534, - "step": 23699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.107691287994385, - "loss_rtd": 0.4033298194408417, - "loss_sent": 0.21068085730075836, - "loss_sod": 0.11531396210193634, - "loss_total": 0.7293246388435364, - "step": 23699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.0886321067810059, - "learning_rate": 8.647036402111202e-05, - "loss": 0.6809, - "step": 23700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.093698024749756, - "loss_rtd": 0.41472548246383667, - "loss_sent": 0.5435208678245544, - "loss_sod": 0.020486906170845032, - "loss_total": 0.978733241558075, - "step": 23799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.09850549697876, - "loss_rtd": 0.40556395053863525, - "loss_sent": 0.2776758670806885, - "loss_sod": 0.030896909534931183, - "loss_total": 0.7141367197036743, - "step": 23799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.3980774879455566, - "learning_rate": 8.644864859944348e-05, - "loss": 0.7079, - "step": 23800 - }, - { - "epoch": 0.019798, - "loss_gen": 2.1838574409484863, - "loss_rtd": 0.38736552000045776, - "loss_sent": 0.0054249693639576435, - "loss_sod": 0.31862956285476685, - "loss_total": 0.7114200592041016, - "step": 23899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.038309097290039, - "loss_rtd": 0.40513619780540466, - "loss_sent": 0.3140098452568054, - "loss_sod": 0.07896880805492401, - "loss_total": 0.7981148362159729, - "step": 23899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.0776424407958984, - "learning_rate": 8.642691849626364e-05, - "loss": 0.703, - "step": 23900 - }, - { - "epoch": 0.019998, - "loss_gen": 2.5359106063842773, - "loss_rtd": 0.36821162700653076, - "loss_sent": 0.031892210245132446, - "loss_sod": 0.18366624414920807, - "loss_total": 0.5837700366973877, - "step": 23999 - }, - { - "epoch": 0.019998, - "loss_gen": 3.8365397453308105, - "loss_rtd": 0.39642348885536194, - "loss_sent": 0.1128220409154892, - "loss_sod": 0.025802068412303925, - "loss_total": 0.5350475907325745, - "step": 23999 - }, - { - "epoch": 0.02, - "grad_norm": 0.9664748311042786, - "learning_rate": 8.64051737203254e-05, - "loss": 0.7152, - "step": 24000 - }, - { - "epoch": 0.02, - "eval_loss": 0.676652729511261, - "eval_runtime": 151.6275, - "eval_samples_per_second": 101.848, - "eval_steps_per_second": 0.798, - "step": 24000 - }, - { - "epoch": 0.020198, - "loss_gen": 2.9289157390594482, - "loss_rtd": 0.39156660437583923, - "loss_sent": 0.08221442252397537, - "loss_sod": 0.12104234844446182, - "loss_total": 0.5948233604431152, - "step": 24099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.0122151374816895, - "loss_rtd": 0.4064323306083679, - "loss_sent": 0.19350147247314453, - "loss_sod": 0.06628310680389404, - "loss_total": 0.6662169098854065, - "step": 24099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.2565594911575317, - "learning_rate": 8.638341428038752e-05, - "loss": 0.6899, - "step": 24100 - }, - { - "epoch": 0.020398, - "loss_gen": 4.0760273933410645, - "loss_rtd": 0.43153977394104004, - "loss_sent": 0.20869547128677368, - "loss_sod": 0.07006371021270752, - "loss_total": 0.7102989554405212, - "step": 24199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.087414264678955, - "loss_rtd": 0.40784889459609985, - "loss_sent": 0.14950834214687347, - "loss_sod": 0.05528098717331886, - "loss_total": 0.6126382350921631, - "step": 24199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.1879384517669678, - "learning_rate": 8.636164018521473e-05, - "loss": 0.6932, - "step": 24200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.045533180236816, - "loss_rtd": 0.4118155241012573, - "loss_sent": 0.12074345350265503, - "loss_sod": 0.10650644451379776, - "loss_total": 0.6390654444694519, - "step": 24299 - }, - { - "epoch": 0.020598, - "loss_gen": 3.007084608078003, - "loss_rtd": 0.40772879123687744, - "loss_sent": 0.13143962621688843, - "loss_sod": 0.29404184222221375, - "loss_total": 0.833210289478302, - "step": 24299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.9798792600631714, - "learning_rate": 8.633985144357762e-05, - "loss": 0.6961, - "step": 24300 - }, - { - "epoch": 0.020798, - "loss_gen": 3.431025981903076, - "loss_rtd": 0.38585808873176575, - "loss_sent": 0.08852383494377136, - "loss_sod": 0.0714193657040596, - "loss_total": 0.5458012819290161, - "step": 24399 - }, - { - "epoch": 0.020798, - "loss_gen": 3.961956739425659, - "loss_rtd": 0.40572047233581543, - "loss_sent": 0.1551080048084259, - "loss_sod": 0.0599331334233284, - "loss_total": 0.6207616329193115, - "step": 24399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.948943555355072, - "learning_rate": 8.63180480642527e-05, - "loss": 0.7037, - "step": 24400 - }, - { - "epoch": 0.020998, - "loss_gen": 3.923896074295044, - "loss_rtd": 0.38946792483329773, - "loss_sent": 0.3071112036705017, - "loss_sod": 0.03382628783583641, - "loss_total": 0.7304054498672485, - "step": 24499 - }, - { - "epoch": 0.020998, - "loss_gen": 3.72622013092041, - "loss_rtd": 0.40381157398223877, - "loss_sent": 0.1826399266719818, - "loss_sod": 0.024815665557980537, - "loss_total": 0.6112672090530396, - "step": 24499 - }, - { - "epoch": 0.021, - "grad_norm": 0.820321261882782, - "learning_rate": 8.629623005602234e-05, - "loss": 0.6752, - "step": 24500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.044614791870117, - "loss_rtd": 0.4168679714202881, - "loss_sent": 0.23598124086856842, - "loss_sod": 0.16695289313793182, - "loss_total": 0.8198021054267883, - "step": 24599 - }, - { - "epoch": 0.021198, - "loss_gen": 2.699082374572754, - "loss_rtd": 0.3844226598739624, - "loss_sent": 0.015604406595230103, - "loss_sod": 0.23557092249393463, - "loss_total": 0.6355979442596436, - "step": 24599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.1755541563034058, - "learning_rate": 8.627439742767488e-05, - "loss": 0.6817, - "step": 24600 - }, - { - "epoch": 0.021398, - "loss_gen": 3.3173317909240723, - "loss_rtd": 0.38064172863960266, - "loss_sent": 0.07833272218704224, - "loss_sod": 0.08912532776594162, - "loss_total": 0.5480997562408447, - "step": 24699 - }, - { - "epoch": 0.021398, - "loss_gen": 3.951279640197754, - "loss_rtd": 0.40371090173721313, - "loss_sent": 0.1179547980427742, - "loss_sod": 0.03314758092164993, - "loss_total": 0.5548132658004761, - "step": 24699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.7562770843505859, - "learning_rate": 8.625255018800446e-05, - "loss": 0.6785, - "step": 24700 - }, - { - "epoch": 0.021598, - "loss_gen": 2.5105037689208984, - "loss_rtd": 0.3946555554866791, - "loss_sent": 0.010327634401619434, - "loss_sod": 0.3795273005962372, - "loss_total": 0.7845104932785034, - "step": 24799 - }, - { - "epoch": 0.021598, - "loss_gen": 3.9272818565368652, - "loss_rtd": 0.39661532640457153, - "loss_sent": 0.2233789712190628, - "loss_sod": 0.039907559752464294, - "loss_total": 0.6599018573760986, - "step": 24799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.2474645376205444, - "learning_rate": 8.623068834581116e-05, - "loss": 0.7006, - "step": 24800 - }, - { - "epoch": 0.021798, - "loss_gen": 3.11579966545105, - "loss_rtd": 0.40773114562034607, - "loss_sent": 0.07494784891605377, - "loss_sod": 0.16161800920963287, - "loss_total": 0.6442970037460327, - "step": 24899 - }, - { - "epoch": 0.021798, - "loss_gen": 3.445728063583374, - "loss_rtd": 0.4030747413635254, - "loss_sent": 0.014770938083529472, - "loss_sod": 0.19394944608211517, - "loss_total": 0.6117951273918152, - "step": 24899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.9890291094779968, - "learning_rate": 8.620881190990095e-05, - "loss": 0.6908, - "step": 24900 - }, - { - "epoch": 0.021998, - "loss_gen": 3.799651861190796, - "loss_rtd": 0.4000953435897827, - "loss_sent": 0.13671578466892242, - "loss_sod": 0.10781806707382202, - "loss_total": 0.644629180431366, - "step": 24999 - }, - { - "epoch": 0.021998, - "loss_gen": 3.714355230331421, - "loss_rtd": 0.3902835547924042, - "loss_sent": 0.342161625623703, - "loss_sod": 0.006307260133326054, - "loss_total": 0.7387524843215942, - "step": 24999 - }, - { - "epoch": 0.022, - "grad_norm": 2.0926296710968018, - "learning_rate": 8.618692088908561e-05, - "loss": 0.6998, - "step": 25000 - }, - { - "epoch": 0.022, - "eval_loss": 0.6693914532661438, - "eval_runtime": 151.6384, - "eval_samples_per_second": 101.841, - "eval_steps_per_second": 0.798, - "step": 25000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.19512414932251, - "loss_rtd": 0.3900134265422821, - "loss_sent": 0.18684500455856323, - "loss_sod": 0.02522754669189453, - "loss_total": 0.6020859479904175, - "step": 25099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.123718738555908, - "loss_rtd": 0.38469183444976807, - "loss_sent": 0.25549593567848206, - "loss_sod": 0.048402104526758194, - "loss_total": 0.688589870929718, - "step": 25099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.8465349078178406, - "learning_rate": 8.616501529218286e-05, - "loss": 0.6847, - "step": 25100 - }, - { - "epoch": 0.022398, - "loss_gen": 4.1009202003479, - "loss_rtd": 0.4004979431629181, - "loss_sent": 0.2241445779800415, - "loss_sod": 0.06554199010133743, - "loss_total": 0.690184473991394, - "step": 25199 - }, - { - "epoch": 0.022398, - "loss_gen": 4.1883864402771, - "loss_rtd": 0.4093237817287445, - "loss_sent": 0.4629150629043579, - "loss_sod": 0.046529121696949005, - "loss_total": 0.9187679290771484, - "step": 25199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.9161996841430664, - "learning_rate": 8.614309512801628e-05, - "loss": 0.6966, - "step": 25200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.16085958480835, - "loss_rtd": 0.39612576365470886, - "loss_sent": 0.09838993847370148, - "loss_sod": 0.04299217090010643, - "loss_total": 0.5375078916549683, - "step": 25299 - }, - { - "epoch": 0.022598, - "loss_gen": 3.9336602687835693, - "loss_rtd": 0.40497827529907227, - "loss_sent": 0.2555369436740875, - "loss_sod": 0.17616719007492065, - "loss_total": 0.8366824388504028, - "step": 25299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.8919493556022644, - "learning_rate": 8.612116040541531e-05, - "loss": 0.6924, - "step": 25300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.016414642333984, - "loss_rtd": 0.42911723256111145, - "loss_sent": 0.15751326084136963, - "loss_sod": 0.0035857190378010273, - "loss_total": 0.5902162194252014, - "step": 25399 - }, - { - "epoch": 0.022798, - "loss_gen": 3.0825438499450684, - "loss_rtd": 0.38710057735443115, - "loss_sent": 0.023861445486545563, - "loss_sod": 0.08589092642068863, - "loss_total": 0.49685293436050415, - "step": 25399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.7878300547599792, - "learning_rate": 8.609921113321526e-05, - "loss": 0.6836, - "step": 25400 - }, - { - "epoch": 0.022998, - "loss_gen": 2.6240932941436768, - "loss_rtd": 0.38083410263061523, - "loss_sent": 0.0003823730512522161, - "loss_sod": 0.23906835913658142, - "loss_total": 0.6202848553657532, - "step": 25499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.091073513031006, - "loss_rtd": 0.40128275752067566, - "loss_sent": 0.13246385753154755, - "loss_sod": 0.09995909035205841, - "loss_total": 0.633705735206604, - "step": 25499 - }, - { - "epoch": 0.023, - "grad_norm": 1.3569259643554688, - "learning_rate": 8.607724732025726e-05, - "loss": 0.6916, - "step": 25500 - }, - { - "epoch": 0.023198, - "loss_gen": 3.147066354751587, - "loss_rtd": 0.3897458612918854, - "loss_sent": 0.14791350066661835, - "loss_sod": 0.16765955090522766, - "loss_total": 0.7053189277648926, - "step": 25599 - }, - { - "epoch": 0.023198, - "loss_gen": 3.56636381149292, - "loss_rtd": 0.39919835329055786, - "loss_sent": 0.1255677193403244, - "loss_sod": 0.1003207266330719, - "loss_total": 0.625086784362793, - "step": 25599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.9187750816345215, - "learning_rate": 8.605526897538836e-05, - "loss": 0.6918, - "step": 25600 - }, - { - "epoch": 0.023398, - "loss_gen": 3.95824933052063, - "loss_rtd": 0.4032837748527527, - "loss_sent": 0.3386683762073517, - "loss_sod": 0.08816278725862503, - "loss_total": 0.8301149606704712, - "step": 25699 - }, - { - "epoch": 0.023398, - "loss_gen": 3.9838974475860596, - "loss_rtd": 0.40660005807876587, - "loss_sent": 0.04155031964182854, - "loss_sod": 0.06001908332109451, - "loss_total": 0.5081694722175598, - "step": 25699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.4878605604171753, - "learning_rate": 8.603327610746143e-05, - "loss": 0.6751, - "step": 25700 - }, - { - "epoch": 0.023598, - "loss_gen": 2.472942590713501, - "loss_rtd": 0.38380375504493713, - "loss_sent": 0.05147818848490715, - "loss_sod": 0.1970677673816681, - "loss_total": 0.6323497295379639, - "step": 25799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.118562698364258, - "loss_rtd": 0.4042208194732666, - "loss_sent": 0.33144524693489075, - "loss_sod": 0.01570950075984001, - "loss_total": 0.7513755559921265, - "step": 25799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.1134629249572754, - "learning_rate": 8.60112687253352e-05, - "loss": 0.6792, - "step": 25800 - }, - { - "epoch": 0.023798, - "loss_gen": 4.10316801071167, - "loss_rtd": 0.41145727038383484, - "loss_sent": 0.11688307672739029, - "loss_sod": 0.063237763941288, - "loss_total": 0.5915781259536743, - "step": 25899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.139028072357178, - "loss_rtd": 0.38884127140045166, - "loss_sent": 0.15973824262619019, - "loss_sod": 0.020577775314450264, - "loss_total": 0.5691573023796082, - "step": 25899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.445813536643982, - "learning_rate": 8.598924683787423e-05, - "loss": 0.6812, - "step": 25900 - }, - { - "epoch": 0.023998, - "loss_gen": 3.7991557121276855, - "loss_rtd": 0.39667585492134094, - "loss_sent": 0.1311122626066208, - "loss_sod": 0.0969759151339531, - "loss_total": 0.6247640252113342, - "step": 25999 - }, - { - "epoch": 0.023998, - "loss_gen": 2.765401601791382, - "loss_rtd": 0.38930559158325195, - "loss_sent": 0.04718257114291191, - "loss_sod": 0.18341773748397827, - "loss_total": 0.6199058890342712, - "step": 25999 - }, - { - "epoch": 0.024, - "grad_norm": 1.1262786388397217, - "learning_rate": 8.596721045394893e-05, - "loss": 0.6788, - "step": 26000 - }, - { - "epoch": 0.024, - "eval_loss": 0.6740491390228271, - "eval_runtime": 151.6725, - "eval_samples_per_second": 101.818, - "eval_steps_per_second": 0.798, - "step": 26000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.190979957580566, - "loss_rtd": 0.40651756525039673, - "loss_sent": 0.06409557163715363, - "loss_sod": 0.05562091991305351, - "loss_total": 0.5262340307235718, - "step": 26099 - }, - { - "epoch": 0.024198, - "loss_gen": 4.179203510284424, - "loss_rtd": 0.4081471562385559, - "loss_sent": 0.6165047883987427, - "loss_sod": 0.06474655121564865, - "loss_total": 1.0893985033035278, - "step": 26099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.2236292362213135, - "learning_rate": 8.594515958243557e-05, - "loss": 0.6922, - "step": 26100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.126471042633057, - "loss_rtd": 0.4005105197429657, - "loss_sent": 0.11121775209903717, - "loss_sod": 0.04821493476629257, - "loss_total": 0.5599431991577148, - "step": 26199 - }, - { - "epoch": 0.024398, - "loss_gen": 4.1259260177612305, - "loss_rtd": 0.4046095013618469, - "loss_sent": 0.14760328829288483, - "loss_sod": 0.04813776910305023, - "loss_total": 0.600350558757782, - "step": 26199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9959726929664612, - "learning_rate": 8.592309423221622e-05, - "loss": 0.6985, - "step": 26200 - }, - { - "epoch": 0.024598, - "loss_gen": 3.951544761657715, - "loss_rtd": 0.3880527913570404, - "loss_sent": 0.23553051054477692, - "loss_sod": 0.09306551516056061, - "loss_total": 0.7166488170623779, - "step": 26299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.2476115226745605, - "loss_rtd": 0.38585108518600464, - "loss_sent": 0.5140498876571655, - "loss_sod": 0.07745873928070068, - "loss_total": 0.9773597121238708, - "step": 26299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.0858606100082397, - "learning_rate": 8.590101441217881e-05, - "loss": 0.6942, - "step": 26300 - }, - { - "epoch": 0.024798, - "loss_gen": 4.143537521362305, - "loss_rtd": 0.3949359655380249, - "loss_sent": 0.28887373208999634, - "loss_sod": 0.04987058416008949, - "loss_total": 0.733680248260498, - "step": 26399 - }, - { - "epoch": 0.024798, - "loss_gen": 3.4738848209381104, - "loss_rtd": 0.39791178703308105, - "loss_sent": 0.020407138392329216, - "loss_sod": 0.20267105102539062, - "loss_total": 0.620989978313446, - "step": 26399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.310874104499817, - "learning_rate": 8.58789201312171e-05, - "loss": 0.6671, - "step": 26400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.453327655792236, - "loss_rtd": 0.38886758685112, - "loss_sent": 0.2199181467294693, - "loss_sod": 0.10373158007860184, - "loss_total": 0.7125173211097717, - "step": 26499 - }, - { - "epoch": 0.024998, - "loss_gen": 3.7706122398376465, - "loss_rtd": 0.3938748836517334, - "loss_sent": 0.14808478951454163, - "loss_sod": 0.14805863797664642, - "loss_total": 0.6900182962417603, - "step": 26499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8602674603462219, - "learning_rate": 8.585681139823064e-05, - "loss": 0.7111, - "step": 26500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.2173871994018555, - "loss_rtd": 0.37780728936195374, - "loss_sent": 0.2071719616651535, - "loss_sod": 0.026507128030061722, - "loss_total": 0.6114863753318787, - "step": 26599 - }, - { - "epoch": 0.025198, - "loss_gen": 3.8598594665527344, - "loss_rtd": 0.38630515336990356, - "loss_sent": 0.25124266743659973, - "loss_sod": 0.12236283719539642, - "loss_total": 0.7599107027053833, - "step": 26599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.8886291980743408, - "learning_rate": 8.583468822212484e-05, - "loss": 0.6839, - "step": 26600 - }, - { - "epoch": 0.025398, - "loss_gen": 3.7826666831970215, - "loss_rtd": 0.3913278579711914, - "loss_sent": 0.07715705782175064, - "loss_sod": 0.07193191349506378, - "loss_total": 0.5404168367385864, - "step": 26699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.422722816467285, - "loss_rtd": 0.39607706665992737, - "loss_sent": 0.2463354915380478, - "loss_sod": 0.03971979767084122, - "loss_total": 0.682132363319397, - "step": 26699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.9952844381332397, - "learning_rate": 8.58125506118109e-05, - "loss": 0.6941, - "step": 26700 - }, - { - "epoch": 0.025598, - "loss_gen": 3.191692352294922, - "loss_rtd": 0.39240193367004395, - "loss_sent": 0.09603174775838852, - "loss_sod": 0.2056460678577423, - "loss_total": 0.6940796971321106, - "step": 26799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.318501949310303, - "loss_rtd": 0.3884369134902954, - "loss_sent": 0.37728720903396606, - "loss_sod": 0.04980402812361717, - "loss_total": 0.815528154373169, - "step": 26799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.3234130144119263, - "learning_rate": 8.579039857620587e-05, - "loss": 0.6802, - "step": 26800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.028014183044434, - "loss_rtd": 0.4063258469104767, - "loss_sent": 0.4201416075229645, - "loss_sod": 0.16575351357460022, - "loss_total": 0.9922209978103638, - "step": 26899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.132768154144287, - "loss_rtd": 0.39652127027511597, - "loss_sent": 0.11059390008449554, - "loss_sod": 0.2088238000869751, - "loss_total": 0.7159389853477478, - "step": 26899 - }, - { - "epoch": 0.0258, - "grad_norm": 4.884906768798828, - "learning_rate": 8.576823212423258e-05, - "loss": 0.684, - "step": 26900 - }, - { - "epoch": 0.025998, - "loss_gen": 3.7047204971313477, - "loss_rtd": 0.3975735604763031, - "loss_sent": 0.17745862901210785, - "loss_sod": 0.17362171411514282, - "loss_total": 0.7486538887023926, - "step": 26999 - }, - { - "epoch": 0.025998, - "loss_gen": 3.9842333793640137, - "loss_rtd": 0.3972439765930176, - "loss_sent": 0.23744548857212067, - "loss_sod": 0.06872141361236572, - "loss_total": 0.7034108638763428, - "step": 26999 - }, - { - "epoch": 0.026, - "grad_norm": 1.8134444952011108, - "learning_rate": 8.574605126481966e-05, - "loss": 0.6802, - "step": 27000 - }, - { - "epoch": 0.026, - "eval_loss": 0.6658182144165039, - "eval_runtime": 151.5136, - "eval_samples_per_second": 101.925, - "eval_steps_per_second": 0.799, - "step": 27000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.2170023918151855, - "loss_rtd": 0.415958046913147, - "loss_sent": 0.21967723965644836, - "loss_sod": 0.021079879254102707, - "loss_total": 0.6567151546478271, - "step": 27099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.277978897094727, - "loss_rtd": 0.37973552942276, - "loss_sent": 0.14102819561958313, - "loss_sod": 0.03472696989774704, - "loss_total": 0.5554907321929932, - "step": 27099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.7816162109375, - "learning_rate": 8.572385600690156e-05, - "loss": 0.6838, - "step": 27100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.134129524230957, - "loss_rtd": 0.3900187313556671, - "loss_sent": 0.033807508647441864, - "loss_sod": 0.06719149649143219, - "loss_total": 0.49101775884628296, - "step": 27199 - }, - { - "epoch": 0.026398, - "loss_gen": 3.962702751159668, - "loss_rtd": 0.3858363926410675, - "loss_sent": 0.172145813703537, - "loss_sod": 0.12715598940849304, - "loss_total": 0.6851382255554199, - "step": 27199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.6766712665557861, - "learning_rate": 8.570164635941853e-05, - "loss": 0.6921, - "step": 27200 - }, - { - "epoch": 0.026598, - "loss_gen": 3.8449392318725586, - "loss_rtd": 0.3860165774822235, - "loss_sent": 0.18103188276290894, - "loss_sod": 0.014358876273036003, - "loss_total": 0.5814073085784912, - "step": 27299 - }, - { - "epoch": 0.026598, - "loss_gen": 3.932614803314209, - "loss_rtd": 0.39760032296180725, - "loss_sent": 0.2425239533185959, - "loss_sod": 0.023994160816073418, - "loss_total": 0.6641184091567993, - "step": 27299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.8064673542976379, - "learning_rate": 8.567942233131662e-05, - "loss": 0.6864, - "step": 27300 - }, - { - "epoch": 0.026798, - "loss_gen": 2.5391201972961426, - "loss_rtd": 0.3704368770122528, - "loss_sent": 0.01781933382153511, - "loss_sod": 0.17318029701709747, - "loss_total": 0.5614365339279175, - "step": 27399 - }, - { - "epoch": 0.026798, - "loss_gen": 3.9527628421783447, - "loss_rtd": 0.3933136761188507, - "loss_sent": 0.05654369294643402, - "loss_sod": 0.0030567459762096405, - "loss_total": 0.45291411876678467, - "step": 27399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.0445514917373657, - "learning_rate": 8.565718393154767e-05, - "loss": 0.6832, - "step": 27400 - }, - { - "epoch": 0.026998, - "loss_gen": 3.9675824642181396, - "loss_rtd": 0.4080756604671478, - "loss_sent": 0.3663182556629181, - "loss_sod": 0.03932018578052521, - "loss_total": 0.8137141466140747, - "step": 27499 - }, - { - "epoch": 0.026998, - "loss_gen": 3.931001663208008, - "loss_rtd": 0.4211898744106293, - "loss_sent": 0.1906636357307434, - "loss_sod": 0.01331951841711998, - "loss_total": 0.625173032283783, - "step": 27499 - }, - { - "epoch": 0.027, - "grad_norm": 2.134218454360962, - "learning_rate": 8.563493116906929e-05, - "loss": 0.6746, - "step": 27500 - }, - { - "epoch": 0.027198, - "loss_gen": 3.0001442432403564, - "loss_rtd": 0.40689149498939514, - "loss_sent": 8.177244308171794e-05, - "loss_sod": 0.38727009296417236, - "loss_total": 0.794243335723877, - "step": 27599 - }, - { - "epoch": 0.027198, - "loss_gen": 2.654207944869995, - "loss_rtd": 0.38280314207077026, - "loss_sent": 0.00010956126061500981, - "loss_sod": 0.34705692529678345, - "loss_total": 0.7299696207046509, - "step": 27599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.3557177782058716, - "learning_rate": 8.561266405284489e-05, - "loss": 0.6848, - "step": 27600 - }, - { - "epoch": 0.027398, - "loss_gen": 4.053226470947266, - "loss_rtd": 0.4024927318096161, - "loss_sent": 0.08170834183692932, - "loss_sod": 0.09648574143648148, - "loss_total": 0.5806868076324463, - "step": 27699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.063277244567871, - "loss_rtd": 0.40312132239341736, - "loss_sent": 0.08955015242099762, - "loss_sod": 0.036374881863594055, - "loss_total": 0.529046356678009, - "step": 27699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.108843207359314, - "learning_rate": 8.559038259184369e-05, - "loss": 0.6819, - "step": 27700 - }, - { - "epoch": 0.027598, - "loss_gen": 2.528923273086548, - "loss_rtd": 0.3807741105556488, - "loss_sent": 0.01023287232965231, - "loss_sod": 0.3588981032371521, - "loss_total": 0.7499050498008728, - "step": 27799 - }, - { - "epoch": 0.027598, - "loss_gen": 4.049012184143066, - "loss_rtd": 0.4035383462905884, - "loss_sent": 0.2546653747558594, - "loss_sod": 0.01868937723338604, - "loss_total": 0.6768931150436401, - "step": 27799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.410236120223999, - "learning_rate": 8.556808679504063e-05, - "loss": 0.683, - "step": 27800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.2119293212890625, - "loss_rtd": 0.3961019515991211, - "loss_sent": 0.2591469883918762, - "loss_sod": 0.1585664451122284, - "loss_total": 0.8138154149055481, - "step": 27899 - }, - { - "epoch": 0.027798, - "loss_gen": 4.044536590576172, - "loss_rtd": 0.3809899091720581, - "loss_sent": 0.08467037975788116, - "loss_sod": 0.06006523221731186, - "loss_total": 0.5257255434989929, - "step": 27899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.07132089138031, - "learning_rate": 8.554577667141644e-05, - "loss": 0.6724, - "step": 27900 - }, - { - "epoch": 0.027998, - "loss_gen": 3.5283443927764893, - "loss_rtd": 0.3904866576194763, - "loss_sent": 0.16283035278320312, - "loss_sod": 0.005995422601699829, - "loss_total": 0.5593124628067017, - "step": 27999 - }, - { - "epoch": 0.027998, - "loss_gen": 3.9386887550354004, - "loss_rtd": 0.40175795555114746, - "loss_sent": 0.12449917197227478, - "loss_sod": 0.17524287104606628, - "loss_total": 0.7015000581741333, - "step": 27999 - }, - { - "epoch": 0.028, - "grad_norm": 1.200141429901123, - "learning_rate": 8.552345222995768e-05, - "loss": 0.678, - "step": 28000 - }, - { - "epoch": 0.028, - "eval_loss": 0.6660157442092896, - "eval_runtime": 151.7199, - "eval_samples_per_second": 101.786, - "eval_steps_per_second": 0.798, - "step": 28000 - }, - { - "epoch": 0.028198, - "loss_gen": 3.672696590423584, - "loss_rtd": 0.41329845786094666, - "loss_sent": 0.3887781500816345, - "loss_sod": 0.02329905517399311, - "loss_total": 0.8253756761550903, - "step": 28099 - }, - { - "epoch": 0.028198, - "loss_gen": 3.825963258743286, - "loss_rtd": 0.39754387736320496, - "loss_sent": 0.23071692883968353, - "loss_sod": 0.0226123183965683, - "loss_total": 0.6508731245994568, - "step": 28099 - }, - { - "epoch": 0.0282, - "grad_norm": 2.2831573486328125, - "learning_rate": 8.550111347965659e-05, - "loss": 0.6707, - "step": 28100 - }, - { - "epoch": 0.028398, - "loss_gen": 3.5817105770111084, - "loss_rtd": 0.38901486992836, - "loss_sent": 0.13949772715568542, - "loss_sod": 0.17847463488578796, - "loss_total": 0.7069872617721558, - "step": 28199 - }, - { - "epoch": 0.028398, - "loss_gen": 3.9351091384887695, - "loss_rtd": 0.3837580680847168, - "loss_sent": 0.34843626618385315, - "loss_sod": 0.036523815244436264, - "loss_total": 0.7687181234359741, - "step": 28199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.053599238395691, - "learning_rate": 8.547876042951127e-05, - "loss": 0.6821, - "step": 28200 - }, - { - "epoch": 0.028598, - "loss_gen": 3.978748083114624, - "loss_rtd": 0.3998650908470154, - "loss_sent": 0.11169924587011337, - "loss_sod": 0.11000090092420578, - "loss_total": 0.6215652227401733, - "step": 28299 - }, - { - "epoch": 0.028598, - "loss_gen": 3.4779841899871826, - "loss_rtd": 0.39652907848358154, - "loss_sent": 0.13573679327964783, - "loss_sod": 0.15448696911334991, - "loss_total": 0.6867527961730957, - "step": 28299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.2055600881576538, - "learning_rate": 8.545639308852546e-05, - "loss": 0.6904, - "step": 28300 - }, - { - "epoch": 0.028798, - "loss_gen": 3.9777488708496094, - "loss_rtd": 0.40452948212623596, - "loss_sent": 0.16046662628650665, - "loss_sod": 0.05235912278294563, - "loss_total": 0.617355227470398, - "step": 28399 - }, - { - "epoch": 0.028798, - "loss_gen": 3.3435187339782715, - "loss_rtd": 0.39600813388824463, - "loss_sent": 0.18167442083358765, - "loss_sod": 0.09612157940864563, - "loss_total": 0.6738041639328003, - "step": 28399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.6021157503128052, - "learning_rate": 8.543401146570876e-05, - "loss": 0.7038, - "step": 28400 - }, - { - "epoch": 0.028998, - "loss_gen": 3.129559278488159, - "loss_rtd": 0.4089212715625763, - "loss_sent": 0.06733108311891556, - "loss_sod": 0.19300401210784912, - "loss_total": 0.669256329536438, - "step": 28499 - }, - { - "epoch": 0.028998, - "loss_gen": 4.067131519317627, - "loss_rtd": 0.411083459854126, - "loss_sent": 0.19176021218299866, - "loss_sod": 0.019741695374250412, - "loss_total": 0.6225853562355042, - "step": 28499 - }, - { - "epoch": 0.029, - "grad_norm": 1.7359048128128052, - "learning_rate": 8.541161557007649e-05, - "loss": 0.6795, - "step": 28500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.150300025939941, - "loss_rtd": 0.38514629006385803, - "loss_sent": 0.23437869548797607, - "loss_sod": 0.031097739934921265, - "loss_total": 0.6506227254867554, - "step": 28599 - }, - { - "epoch": 0.029198, - "loss_gen": 3.720979928970337, - "loss_rtd": 0.3987027108669281, - "loss_sent": 0.01098732091486454, - "loss_sod": 0.17678982019424438, - "loss_total": 0.5864798426628113, - "step": 28599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.6905227303504944, - "learning_rate": 8.53892054106497e-05, - "loss": 0.6865, - "step": 28600 - }, - { - "epoch": 0.029398, - "loss_gen": 3.9948174953460693, - "loss_rtd": 0.4022224545478821, - "loss_sent": 0.2017492949962616, - "loss_sod": 0.0130801722407341, - "loss_total": 0.6170519590377808, - "step": 28699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.131485939025879, - "loss_rtd": 0.3997971713542938, - "loss_sent": 0.2441902458667755, - "loss_sod": 0.010508487932384014, - "loss_total": 0.654495894908905, - "step": 28699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.1098192930221558, - "learning_rate": 8.536678099645519e-05, - "loss": 0.6859, - "step": 28700 - }, - { - "epoch": 0.029598, - "loss_gen": 3.984785556793213, - "loss_rtd": 0.39890533685684204, - "loss_sent": 0.1698952615261078, - "loss_sod": 0.016041960567235947, - "loss_total": 0.5848425626754761, - "step": 28799 - }, - { - "epoch": 0.029598, - "loss_gen": 3.76471209526062, - "loss_rtd": 0.4008885622024536, - "loss_sent": 0.5502520799636841, - "loss_sod": 0.10835559666156769, - "loss_total": 1.0594961643218994, - "step": 28799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.9490429162979126, - "learning_rate": 8.534434233652554e-05, - "loss": 0.6912, - "step": 28800 - }, - { - "epoch": 0.029798, - "loss_gen": 4.082456588745117, - "loss_rtd": 0.39842408895492554, - "loss_sent": 0.21229511499404907, - "loss_sod": 0.018248165026307106, - "loss_total": 0.6289674043655396, - "step": 28899 - }, - { - "epoch": 0.029798, - "loss_gen": 3.9854977130889893, - "loss_rtd": 0.3764669895172119, - "loss_sent": 0.1423240303993225, - "loss_sod": 0.012563586235046387, - "loss_total": 0.5313546061515808, - "step": 28899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.910893440246582, - "learning_rate": 8.532188943989902e-05, - "loss": 0.6787, - "step": 28900 - }, - { - "epoch": 0.029998, - "loss_gen": 4.123041152954102, - "loss_rtd": 0.3877366781234741, - "loss_sent": 0.15122351050376892, - "loss_sod": 0.06843329221010208, - "loss_total": 0.6073935031890869, - "step": 28999 - }, - { - "epoch": 0.029998, - "loss_gen": 3.9017460346221924, - "loss_rtd": 0.40746182203292847, - "loss_sent": 0.19289076328277588, - "loss_sod": 0.06269969046115875, - "loss_total": 0.6630522608757019, - "step": 28999 - }, - { - "epoch": 0.03, - "grad_norm": 1.3564268350601196, - "learning_rate": 8.529942231561965e-05, - "loss": 0.6793, - "step": 29000 - }, - { - "epoch": 0.03, - "eval_loss": 0.6600340604782104, - "eval_runtime": 151.6015, - "eval_samples_per_second": 101.866, - "eval_steps_per_second": 0.798, - "step": 29000 - }, - { - "epoch": 0.030198, - "loss_gen": 2.629991054534912, - "loss_rtd": 0.3722720146179199, - "loss_sent": 0.02489466778934002, - "loss_sod": 0.2123338282108307, - "loss_total": 0.6095004677772522, - "step": 29099 - }, - { - "epoch": 0.030198, - "loss_gen": 2.5899765491485596, - "loss_rtd": 0.37675192952156067, - "loss_sent": 0.03350969776511192, - "loss_sod": 0.09084537625312805, - "loss_total": 0.5011069774627686, - "step": 29099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.0573091506958008, - "learning_rate": 8.527694097273719e-05, - "loss": 0.6656, - "step": 29100 - }, - { - "epoch": 0.030398, - "loss_gen": 3.877784490585327, - "loss_rtd": 0.39652708172798157, - "loss_sent": 0.12195165455341339, - "loss_sod": 0.03464795649051666, - "loss_total": 0.5531266927719116, - "step": 29199 - }, - { - "epoch": 0.030398, - "loss_gen": 3.8500592708587646, - "loss_rtd": 0.393253356218338, - "loss_sent": 0.07029858976602554, - "loss_sod": 0.1901566982269287, - "loss_total": 0.6537086367607117, - "step": 29199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.2037585973739624, - "learning_rate": 8.525444542030714e-05, - "loss": 0.6772, - "step": 29200 - }, - { - "epoch": 0.030598, - "loss_gen": 4.189395904541016, - "loss_rtd": 0.40149644017219543, - "loss_sent": 0.22139061987400055, - "loss_sod": 0.05354627966880798, - "loss_total": 0.6764333248138428, - "step": 29299 - }, - { - "epoch": 0.030598, - "loss_gen": 4.037633895874023, - "loss_rtd": 0.40255945920944214, - "loss_sent": 0.3815597891807556, - "loss_sod": 0.07546190917491913, - "loss_total": 0.8595811724662781, - "step": 29299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.5219388008117676, - "learning_rate": 8.523193566739069e-05, - "loss": 0.685, - "step": 29300 - }, - { - "epoch": 0.030798, - "loss_gen": 3.7690134048461914, - "loss_rtd": 0.3794300854206085, - "loss_sent": 0.16399529576301575, - "loss_sod": 0.032787956297397614, - "loss_total": 0.5762133598327637, - "step": 29399 - }, - { - "epoch": 0.030798, - "loss_gen": 2.8526697158813477, - "loss_rtd": 0.40557971596717834, - "loss_sent": 0.0011788775445893407, - "loss_sod": 0.24723093211650848, - "loss_total": 0.6539894938468933, - "step": 29399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.0497325658798218, - "learning_rate": 8.520941172305477e-05, - "loss": 0.6907, - "step": 29400 - }, - { - "epoch": 0.030998, - "loss_gen": 4.048665523529053, - "loss_rtd": 0.3874911963939667, - "loss_sent": 0.10516659170389175, - "loss_sod": 0.03333348408341408, - "loss_total": 0.5259912610054016, - "step": 29499 - }, - { - "epoch": 0.030998, - "loss_gen": 4.126290321350098, - "loss_rtd": 0.4164048433303833, - "loss_sent": 0.17039263248443604, - "loss_sod": 0.04183642193675041, - "loss_total": 0.6286338567733765, - "step": 29499 - }, - { - "epoch": 0.031, - "grad_norm": 0.7477266788482666, - "learning_rate": 8.5186873596372e-05, - "loss": 0.6852, - "step": 29500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.251503944396973, - "loss_rtd": 0.4096163809299469, - "loss_sent": 0.14379481971263885, - "loss_sod": 0.07312798500061035, - "loss_total": 0.6265391707420349, - "step": 29599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.066746711730957, - "loss_rtd": 0.41031789779663086, - "loss_sent": 0.2121007889509201, - "loss_sod": 0.01770671084523201, - "loss_total": 0.6401253938674927, - "step": 29599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.068182110786438, - "learning_rate": 8.516432129642076e-05, - "loss": 0.6811, - "step": 29600 - }, - { - "epoch": 0.031398, - "loss_gen": 2.7509026527404785, - "loss_rtd": 0.38142314553260803, - "loss_sent": 0.010356533341109753, - "loss_sod": 0.17461445927619934, - "loss_total": 0.5663941502571106, - "step": 29699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.262149810791016, - "loss_rtd": 0.38366538286209106, - "loss_sent": 0.3922903835773468, - "loss_sod": 0.12595689296722412, - "loss_total": 0.9019126892089844, - "step": 29699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.8687852621078491, - "learning_rate": 8.51417548322851e-05, - "loss": 0.6845, - "step": 29700 - }, - { - "epoch": 0.031598, - "loss_gen": 4.012904167175293, - "loss_rtd": 0.40136775374412537, - "loss_sent": 0.15064215660095215, - "loss_sod": 0.10415194928646088, - "loss_total": 0.6561618447303772, - "step": 29799 - }, - { - "epoch": 0.031598, - "loss_gen": 3.656109571456909, - "loss_rtd": 0.38143277168273926, - "loss_sent": 0.10486055910587311, - "loss_sod": 0.16847120225429535, - "loss_total": 0.6547645330429077, - "step": 29799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.9747915863990784, - "learning_rate": 8.511917421305483e-05, - "loss": 0.6796, - "step": 29800 - }, - { - "epoch": 0.031798, - "loss_gen": 4.163219451904297, - "loss_rtd": 0.38973960280418396, - "loss_sent": 0.17084045708179474, - "loss_sod": 0.040867019444704056, - "loss_total": 0.6014471054077148, - "step": 29899 - }, - { - "epoch": 0.031798, - "loss_gen": 4.002505302429199, - "loss_rtd": 0.3905410170555115, - "loss_sent": 0.16887712478637695, - "loss_sod": 0.04208110272884369, - "loss_total": 0.6014992594718933, - "step": 29899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.9827485680580139, - "learning_rate": 8.509657944782535e-05, - "loss": 0.6857, - "step": 29900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.085021018981934, - "loss_rtd": 0.40430063009262085, - "loss_sent": 0.10190442204475403, - "loss_sod": 0.18283164501190186, - "loss_total": 0.6890367269515991, - "step": 29999 - }, - { - "epoch": 0.031998, - "loss_gen": 4.095636367797852, - "loss_rtd": 0.41734832525253296, - "loss_sent": 0.20207037031650543, - "loss_sod": 0.024344047531485558, - "loss_total": 0.6437627673149109, - "step": 29999 - }, - { - "epoch": 0.032, - "grad_norm": 0.8807776570320129, - "learning_rate": 8.507397054569788e-05, - "loss": 0.6884, - "step": 30000 - }, - { - "epoch": 0.032, - "eval_loss": 0.6608233451843262, - "eval_runtime": 151.6967, - "eval_samples_per_second": 101.802, - "eval_steps_per_second": 0.798, - "step": 30000 - }, - { - "epoch": 0.032198, - "loss_gen": 3.037975549697876, - "loss_rtd": 0.37186214327812195, - "loss_sent": 0.08015337586402893, - "loss_sod": 0.0894375741481781, - "loss_total": 0.5414531230926514, - "step": 30099 - }, - { - "epoch": 0.032198, - "loss_gen": 4.237514972686768, - "loss_rtd": 0.4175643026828766, - "loss_sent": 0.15542326867580414, - "loss_sod": 0.011203978210687637, - "loss_total": 0.5841915607452393, - "step": 30099 - }, - { - "epoch": 0.0322, - "grad_norm": 0.9229232668876648, - "learning_rate": 8.505134751577925e-05, - "loss": 0.6687, - "step": 30100 - }, - { - "epoch": 0.032398, - "loss_gen": 2.4504544734954834, - "loss_rtd": 0.39251017570495605, - "loss_sent": 0.00014123741129878908, - "loss_sod": 0.3881913423538208, - "loss_total": 0.7808427810668945, - "step": 30199 - }, - { - "epoch": 0.032398, - "loss_gen": 2.186908483505249, - "loss_rtd": 0.3671330511569977, - "loss_sent": 0.05180347338318825, - "loss_sod": 0.1732897162437439, - "loss_total": 0.5922262668609619, - "step": 30199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.0616891384124756, - "learning_rate": 8.502871036718206e-05, - "loss": 0.6948, - "step": 30200 - }, - { - "epoch": 0.032598, - "loss_gen": 4.021667957305908, - "loss_rtd": 0.39637523889541626, - "loss_sent": 0.1306372582912445, - "loss_sod": 0.004748234525322914, - "loss_total": 0.5317606925964355, - "step": 30299 - }, - { - "epoch": 0.032598, - "loss_gen": 4.0861287117004395, - "loss_rtd": 0.40060102939605713, - "loss_sent": 0.17788061499595642, - "loss_sod": 0.08653467148542404, - "loss_total": 0.6650162935256958, - "step": 30299 - }, - { - "epoch": 0.0326, - "grad_norm": 1.5607576370239258, - "learning_rate": 8.500605910902451e-05, - "loss": 0.6983, - "step": 30300 - }, - { - "epoch": 0.032798, - "loss_gen": 4.1850266456604, - "loss_rtd": 0.39154505729675293, - "loss_sent": 0.2575682997703552, - "loss_sod": 0.06015142798423767, - "loss_total": 0.7092647552490234, - "step": 30399 - }, - { - "epoch": 0.032798, - "loss_gen": 3.922189474105835, - "loss_rtd": 0.3912644386291504, - "loss_sent": 0.12429532408714294, - "loss_sod": 0.024736901745200157, - "loss_total": 0.5402966737747192, - "step": 30399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.0241069793701172, - "learning_rate": 8.498339375043054e-05, - "loss": 0.6603, - "step": 30400 - }, - { - "epoch": 0.032998, - "loss_gen": 3.650404691696167, - "loss_rtd": 0.4076765775680542, - "loss_sent": 0.4603433310985565, - "loss_sod": 0.19098269939422607, - "loss_total": 1.0590026378631592, - "step": 30499 - }, - { - "epoch": 0.032998, - "loss_gen": 2.541520118713379, - "loss_rtd": 0.3753470480442047, - "loss_sent": 8.163358870660886e-05, - "loss_sod": 0.20873983204364777, - "loss_total": 0.584168553352356, - "step": 30499 - }, - { - "epoch": 0.033, - "grad_norm": 1.3215209245681763, - "learning_rate": 8.496071430052975e-05, - "loss": 0.6851, - "step": 30500 - }, - { - "epoch": 0.033198, - "loss_gen": 4.049553871154785, - "loss_rtd": 0.3839671015739441, - "loss_sent": 0.213576540350914, - "loss_sod": 0.08501975983381271, - "loss_total": 0.6825634241104126, - "step": 30599 - }, - { - "epoch": 0.033198, - "loss_gen": 4.2370758056640625, - "loss_rtd": 0.39378654956817627, - "loss_sent": 0.1530376374721527, - "loss_sod": 0.15263622999191284, - "loss_total": 0.6994603872299194, - "step": 30599 - }, - { - "epoch": 0.0332, - "grad_norm": 1.6667479276657104, - "learning_rate": 8.493802076845741e-05, - "loss": 0.6995, - "step": 30600 - }, - { - "epoch": 0.033398, - "loss_gen": 4.333565711975098, - "loss_rtd": 0.3889717161655426, - "loss_sent": 0.15002062916755676, - "loss_sod": 0.00713471882045269, - "loss_total": 0.5461270809173584, - "step": 30699 - }, - { - "epoch": 0.033398, - "loss_gen": 3.984656572341919, - "loss_rtd": 0.40549373626708984, - "loss_sent": 0.3065290153026581, - "loss_sod": 0.015845568850636482, - "loss_total": 0.7278683185577393, - "step": 30699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.2295725345611572, - "learning_rate": 8.491531316335451e-05, - "loss": 0.678, - "step": 30700 - }, - { - "epoch": 0.033598, - "loss_gen": 2.4938693046569824, - "loss_rtd": 0.3633447587490082, - "loss_sent": 0.03735076263546944, - "loss_sod": 0.3045555651187897, - "loss_total": 0.7052510976791382, - "step": 30799 - }, - { - "epoch": 0.033598, - "loss_gen": 3.8879973888397217, - "loss_rtd": 0.39840835332870483, - "loss_sent": 0.1616201400756836, - "loss_sod": 0.05654360353946686, - "loss_total": 0.6165720820426941, - "step": 30799 - }, - { - "epoch": 0.0336, - "grad_norm": 1.2731208801269531, - "learning_rate": 8.489259149436762e-05, - "loss": 0.6793, - "step": 30800 - }, - { - "epoch": 0.033798, - "loss_gen": 4.189641952514648, - "loss_rtd": 0.40135085582733154, - "loss_sent": 0.3362424075603485, - "loss_sod": 0.07014751434326172, - "loss_total": 0.8077408075332642, - "step": 30899 - }, - { - "epoch": 0.033798, - "loss_gen": 4.071540832519531, - "loss_rtd": 0.3889402449131012, - "loss_sent": 0.2542674243450165, - "loss_sod": 0.02074206806719303, - "loss_total": 0.663949728012085, - "step": 30899 - }, - { - "epoch": 0.0338, - "grad_norm": 1.0032696723937988, - "learning_rate": 8.486985577064905e-05, - "loss": 0.6825, - "step": 30900 - }, - { - "epoch": 0.033998, - "loss_gen": 4.200192928314209, - "loss_rtd": 0.3943803608417511, - "loss_sent": 0.20004616677761078, - "loss_sod": 0.10118408501148224, - "loss_total": 0.6956106424331665, - "step": 30999 - }, - { - "epoch": 0.033998, - "loss_gen": 4.053731441497803, - "loss_rtd": 0.3781401216983795, - "loss_sent": 0.2314639687538147, - "loss_sod": 0.0035957596264779568, - "loss_total": 0.6131998300552368, - "step": 30999 - }, - { - "epoch": 0.034, - "grad_norm": 0.9742544889450073, - "learning_rate": 8.484710600135675e-05, - "loss": 0.6834, - "step": 31000 - }, - { - "epoch": 0.034, - "eval_loss": 0.6606374979019165, - "eval_runtime": 151.7109, - "eval_samples_per_second": 101.792, - "eval_steps_per_second": 0.798, - "step": 31000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.8133978843688965, - "loss_rtd": 0.3870883584022522, - "loss_sent": 0.17000341415405273, - "loss_sod": 0.06947870552539825, - "loss_total": 0.626570463180542, - "step": 31099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.526205062866211, - "loss_rtd": 0.42248114943504333, - "loss_sent": 0.15724748373031616, - "loss_sod": 0.04115546867251396, - "loss_total": 0.6208840608596802, - "step": 31099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.4922997951507568, - "learning_rate": 8.482434219565431e-05, - "loss": 0.6904, - "step": 31100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.535189151763916, - "loss_rtd": 0.4071005880832672, - "loss_sent": 0.22625060379505157, - "loss_sod": 0.015866361558437347, - "loss_total": 0.6492175459861755, - "step": 31199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.785840034484863, - "loss_rtd": 0.3867778480052948, - "loss_sent": 0.1283811330795288, - "loss_sod": 0.013246990740299225, - "loss_total": 0.5284059643745422, - "step": 31199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.9023923873901367, - "learning_rate": 8.480156436271101e-05, - "loss": 0.7022, - "step": 31200 - }, - { - "epoch": 0.000598, - "loss_gen": 4.289353370666504, - "loss_rtd": 0.4006703495979309, - "loss_sent": 0.3113102316856384, - "loss_sod": 0.06139662116765976, - "loss_total": 0.7733771800994873, - "step": 31299 - }, - { - "epoch": 0.000598, - "loss_gen": 4.495286464691162, - "loss_rtd": 0.40194377303123474, - "loss_sent": 0.2873789668083191, - "loss_sod": 0.06476514041423798, - "loss_total": 0.7540878653526306, - "step": 31299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.169629454612732, - "learning_rate": 8.477877251170174e-05, - "loss": 0.674, - "step": 31300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.043064594268799, - "loss_rtd": 0.38696688413619995, - "loss_sent": 0.10985315591096878, - "loss_sod": 0.1856946051120758, - "loss_total": 0.6825146675109863, - "step": 31399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.523093223571777, - "loss_rtd": 0.416436105966568, - "loss_sent": 0.1800312101840973, - "loss_sod": 0.1480741798877716, - "loss_total": 0.7445415258407593, - "step": 31399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.948949933052063, - "learning_rate": 8.475596665180707e-05, - "loss": 0.6757, - "step": 31400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.102592468261719, - "loss_rtd": 0.3981564939022064, - "loss_sent": 0.12449570745229721, - "loss_sod": 0.31758543848991394, - "loss_total": 0.8402376174926758, - "step": 31499 - }, - { - "epoch": 0.000998, - "loss_gen": 3.6268177032470703, - "loss_rtd": 0.38327154517173767, - "loss_sent": 0.002468185033649206, - "loss_sod": 0.4803374409675598, - "loss_total": 0.8660771250724792, - "step": 31499 - }, - { - "epoch": 0.001, - "grad_norm": 1.330782413482666, - "learning_rate": 8.47331467922132e-05, - "loss": 0.6954, - "step": 31500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.7002363204956055, - "loss_rtd": 0.41255107522010803, - "loss_sent": 0.11588738858699799, - "loss_sod": 0.18582235276699066, - "loss_total": 0.7142608165740967, - "step": 31599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.524577617645264, - "loss_rtd": 0.3973040282726288, - "loss_sent": 0.1471736878156662, - "loss_sod": 0.06015242636203766, - "loss_total": 0.604630172252655, - "step": 31599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.8479200601577759, - "learning_rate": 8.471031294211194e-05, - "loss": 0.6919, - "step": 31600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.201780319213867, - "loss_rtd": 0.3961066007614136, - "loss_sent": 0.18251606822013855, - "loss_sod": 0.08499738574028015, - "loss_total": 0.6636200547218323, - "step": 31699 - }, - { - "epoch": 0.001398, - "loss_gen": 4.5241546630859375, - "loss_rtd": 0.4064065217971802, - "loss_sent": 0.22355028986930847, - "loss_sod": 0.03173723816871643, - "loss_total": 0.6616940498352051, - "step": 31699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.7753614783287048, - "learning_rate": 8.468746511070076e-05, - "loss": 0.6906, - "step": 31700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.587357997894287, - "loss_rtd": 0.40675556659698486, - "loss_sent": 0.3111591637134552, - "loss_sod": 0.06384904682636261, - "loss_total": 0.7817637920379639, - "step": 31799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.497931003570557, - "loss_rtd": 0.39460289478302, - "loss_sent": 0.320936381816864, - "loss_sod": 0.1028224378824234, - "loss_total": 0.8183616995811462, - "step": 31799 - }, - { - "epoch": 0.0016, - "grad_norm": 2.3936991691589355, - "learning_rate": 8.46646033071828e-05, - "loss": 0.6833, - "step": 31800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.862859725952148, - "loss_rtd": 0.39885783195495605, - "loss_sent": 0.08758208155632019, - "loss_sod": 0.1417115330696106, - "loss_total": 0.6281514167785645, - "step": 31899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.395966053009033, - "loss_rtd": 0.4056219160556793, - "loss_sent": 0.2164677232503891, - "loss_sod": 0.04559243470430374, - "loss_total": 0.6676820516586304, - "step": 31899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.1251325607299805, - "learning_rate": 8.464172754076679e-05, - "loss": 0.676, - "step": 31900 - }, - { - "epoch": 0.001998, - "loss_gen": 4.678037166595459, - "loss_rtd": 0.3924082815647125, - "loss_sent": 0.07607917487621307, - "loss_sod": 0.10162509977817535, - "loss_total": 0.5701125860214233, - "step": 31999 - }, - { - "epoch": 0.001998, - "loss_gen": 4.401127815246582, - "loss_rtd": 0.40330788493156433, - "loss_sent": 0.3779938519001007, - "loss_sod": 0.018371429294347763, - "loss_total": 0.7996731996536255, - "step": 31999 - }, - { - "epoch": 0.002, - "grad_norm": 1.875430703163147, - "learning_rate": 8.461883782066705e-05, - "loss": 0.6782, - "step": 32000 - }, - { - "epoch": 0.002, - "eval_loss": 0.6673182845115662, - "eval_runtime": 157.6853, - "eval_samples_per_second": 97.936, - "eval_steps_per_second": 0.767, - "step": 32000 - }, - { - "epoch": 0.002198, - "loss_gen": 4.48441743850708, - "loss_rtd": 0.40013572573661804, - "loss_sent": 0.11351187527179718, - "loss_sod": 0.25612175464630127, - "loss_total": 0.7697693705558777, - "step": 32099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.7465925216674805, - "loss_rtd": 0.4035055637359619, - "loss_sent": 0.298244446516037, - "loss_sod": 0.03680422157049179, - "loss_total": 0.7385542392730713, - "step": 32099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.017162799835205, - "learning_rate": 8.459593415610359e-05, - "loss": 0.6839, - "step": 32100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.524750709533691, - "loss_rtd": 0.4050101041793823, - "loss_sent": 0.37396883964538574, - "loss_sod": 0.03519045189023018, - "loss_total": 0.8141694068908691, - "step": 32199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.473918437957764, - "loss_rtd": 0.40793168544769287, - "loss_sent": 0.27542105317115784, - "loss_sod": 0.06677190959453583, - "loss_total": 0.7501246929168701, - "step": 32199 - }, - { - "epoch": 0.0024, - "grad_norm": 3.0965168476104736, - "learning_rate": 8.457301655630196e-05, - "loss": 0.6885, - "step": 32200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.752264499664307, - "loss_rtd": 0.4250895082950592, - "loss_sent": 0.12414298951625824, - "loss_sod": 0.08618341386318207, - "loss_total": 0.6354159116744995, - "step": 32299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.138677597045898, - "loss_rtd": 0.40253880620002747, - "loss_sent": 0.01680273376405239, - "loss_sod": 0.23747926950454712, - "loss_total": 0.6568208336830139, - "step": 32299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.1575840711593628, - "learning_rate": 8.455008503049342e-05, - "loss": 0.6679, - "step": 32300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.358146667480469, - "loss_rtd": 0.40618860721588135, - "loss_sent": 0.05179322510957718, - "loss_sod": 0.22525230050086975, - "loss_total": 0.6832340955734253, - "step": 32399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.595084190368652, - "loss_rtd": 0.39770156145095825, - "loss_sent": 0.1839921921491623, - "loss_sod": 0.036973558366298676, - "loss_total": 0.6186673045158386, - "step": 32399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.462870717048645, - "learning_rate": 8.452713958791477e-05, - "loss": 0.6767, - "step": 32400 - }, - { - "epoch": 0.002998, - "loss_gen": 4.643150806427002, - "loss_rtd": 0.40366634726524353, - "loss_sent": 0.532322347164154, - "loss_sod": 0.17457786202430725, - "loss_total": 1.1105666160583496, - "step": 32499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.605870723724365, - "loss_rtd": 0.3856964707374573, - "loss_sent": 0.13303084671497345, - "loss_sod": 0.08971034735441208, - "loss_total": 0.6084376573562622, - "step": 32499 - }, - { - "epoch": 0.003, - "grad_norm": 2.468165636062622, - "learning_rate": 8.450418023780839e-05, - "loss": 0.6946, - "step": 32500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.285338401794434, - "loss_rtd": 0.40404659509658813, - "loss_sent": 0.04896371811628342, - "loss_sod": 0.11847255378961563, - "loss_total": 0.5714828968048096, - "step": 32599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.491969108581543, - "loss_rtd": 0.3990192711353302, - "loss_sent": 0.08510634303092957, - "loss_sod": 0.02022891864180565, - "loss_total": 0.5043545365333557, - "step": 32599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.7647584080696106, - "learning_rate": 8.448120698942237e-05, - "loss": 0.6792, - "step": 32600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.343130111694336, - "loss_rtd": 0.41738593578338623, - "loss_sent": 0.33898279070854187, - "loss_sod": 0.1235346645116806, - "loss_total": 0.8799034357070923, - "step": 32699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.7674994468688965, - "loss_rtd": 0.3970383107662201, - "loss_sent": 0.2672957181930542, - "loss_sod": 0.03581714630126953, - "loss_total": 0.7001512050628662, - "step": 32699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.5911026000976562, - "learning_rate": 8.445821985201028e-05, - "loss": 0.698, - "step": 32700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.213858604431152, - "loss_rtd": 0.3942485749721527, - "loss_sent": 0.2171388417482376, - "loss_sod": 0.014159854501485825, - "loss_total": 0.6255472898483276, - "step": 32799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.432817459106445, - "loss_rtd": 0.3746455907821655, - "loss_sent": 0.11468903720378876, - "loss_sod": 0.03782479837536812, - "loss_total": 0.5271594524383545, - "step": 32799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.3355274200439453, - "learning_rate": 8.443521883483136e-05, - "loss": 0.6654, - "step": 32800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.383444309234619, - "loss_rtd": 0.3847176432609558, - "loss_sent": 0.14574401080608368, - "loss_sod": 0.1299697458744049, - "loss_total": 0.660431444644928, - "step": 32899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.477179050445557, - "loss_rtd": 0.3920685946941376, - "loss_sent": 0.2143167108297348, - "loss_sod": 0.005493156611919403, - "loss_total": 0.6118784546852112, - "step": 32899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.910675585269928, - "learning_rate": 8.44122039471504e-05, - "loss": 0.6831, - "step": 32900 - }, - { - "epoch": 0.003998, - "loss_gen": 3.8524093627929688, - "loss_rtd": 0.39162158966064453, - "loss_sent": 0.06359441578388214, - "loss_sod": 0.03046536259353161, - "loss_total": 0.48568135499954224, - "step": 32999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.761472702026367, - "loss_rtd": 0.4120732247829437, - "loss_sent": 0.19595538079738617, - "loss_sod": 0.14814525842666626, - "loss_total": 0.756173849105835, - "step": 32999 - }, - { - "epoch": 0.004, - "grad_norm": 0.9662920832633972, - "learning_rate": 8.438917519823782e-05, - "loss": 0.6769, - "step": 33000 - }, - { - "epoch": 0.004, - "eval_loss": 0.6491342782974243, - "eval_runtime": 150.6483, - "eval_samples_per_second": 102.51, - "eval_steps_per_second": 0.803, - "step": 33000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.339219093322754, - "loss_rtd": 0.3928168714046478, - "loss_sent": 0.1766635775566101, - "loss_sod": 0.020830631256103516, - "loss_total": 0.5903110504150391, - "step": 33099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.55030632019043, - "loss_rtd": 0.4039199650287628, - "loss_sent": 0.13374005258083344, - "loss_sod": 0.1401435136795044, - "loss_total": 0.6778035163879395, - "step": 33099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.040810227394104, - "learning_rate": 8.436613259736958e-05, - "loss": 0.6924, - "step": 33100 - }, - { - "epoch": 0.004398, - "loss_gen": 4.651073455810547, - "loss_rtd": 0.3909187316894531, - "loss_sent": 0.237695574760437, - "loss_sod": 0.054423652589321136, - "loss_total": 0.6830379962921143, - "step": 33199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.0702948570251465, - "loss_rtd": 0.39740777015686035, - "loss_sent": 7.554675539722666e-05, - "loss_sod": 0.35460132360458374, - "loss_total": 0.7520846128463745, - "step": 33199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.9790658354759216, - "learning_rate": 8.434307615382724e-05, - "loss": 0.6559, - "step": 33200 - }, - { - "epoch": 0.004598, - "loss_gen": 3.6085634231567383, - "loss_rtd": 0.3859209716320038, - "loss_sent": 0.04533249884843826, - "loss_sod": 0.14941275119781494, - "loss_total": 0.5806662440299988, - "step": 33299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.367456436157227, - "loss_rtd": 0.39892175793647766, - "loss_sent": 0.14764036238193512, - "loss_sod": 0.012713832780718803, - "loss_total": 0.5592759251594543, - "step": 33299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.9494178295135498, - "learning_rate": 8.432000587689792e-05, - "loss": 0.68, - "step": 33300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.770630359649658, - "loss_rtd": 0.38550376892089844, - "loss_sent": 0.08192545175552368, - "loss_sod": 0.010527098551392555, - "loss_total": 0.4779563248157501, - "step": 33399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.432707786560059, - "loss_rtd": 0.4145929515361786, - "loss_sent": 0.12205509841442108, - "loss_sod": 0.0853184163570404, - "loss_total": 0.6219664812088013, - "step": 33399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.7924001812934875, - "learning_rate": 8.429692177587435e-05, - "loss": 0.6786, - "step": 33400 - }, - { - "epoch": 0.004998, - "loss_gen": 3.980473756790161, - "loss_rtd": 0.3727272152900696, - "loss_sent": 0.0033619177993386984, - "loss_sod": 0.2604028582572937, - "loss_total": 0.6364920139312744, - "step": 33499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.709641456604004, - "loss_rtd": 0.3890208899974823, - "loss_sent": 0.13390415906906128, - "loss_sod": 0.021367769688367844, - "loss_total": 0.5442928075790405, - "step": 33499 - }, - { - "epoch": 0.005, - "grad_norm": 1.1371712684631348, - "learning_rate": 8.427382386005477e-05, - "loss": 0.6749, - "step": 33500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.686338901519775, - "loss_rtd": 0.40714138746261597, - "loss_sent": 0.23763643205165863, - "loss_sod": 0.08537831157445908, - "loss_total": 0.7301561236381531, - "step": 33599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.592874050140381, - "loss_rtd": 0.4045478403568268, - "loss_sent": 0.3227478563785553, - "loss_sod": 0.20993739366531372, - "loss_total": 0.9372330904006958, - "step": 33599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.5759620666503906, - "learning_rate": 8.425071213874308e-05, - "loss": 0.6726, - "step": 33600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.828529357910156, - "loss_rtd": 0.391663134098053, - "loss_sent": 0.2265581488609314, - "loss_sod": 0.15814724564552307, - "loss_total": 0.7763685584068298, - "step": 33699 - }, - { - "epoch": 0.005398, - "loss_gen": 4.472051620483398, - "loss_rtd": 0.3902159333229065, - "loss_sent": 0.12034723907709122, - "loss_sod": 0.020356986671686172, - "loss_total": 0.530920147895813, - "step": 33699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.8813192248344421, - "learning_rate": 8.422758662124858e-05, - "loss": 0.6617, - "step": 33700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.132448196411133, - "loss_rtd": 0.38475725054740906, - "loss_sent": 0.057623039931058884, - "loss_sod": 0.10686932504177094, - "loss_total": 0.5492495894432068, - "step": 33799 - }, - { - "epoch": 0.005598, - "loss_gen": 4.987173080444336, - "loss_rtd": 0.3826753497123718, - "loss_sent": 0.14800354838371277, - "loss_sod": 0.0643434152007103, - "loss_total": 0.5950223207473755, - "step": 33799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.1166493892669678, - "learning_rate": 8.420444731688633e-05, - "loss": 0.676, - "step": 33800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.062664985656738, - "loss_rtd": 0.38509470224380493, - "loss_sent": 0.10830825567245483, - "loss_sod": 0.15515291690826416, - "loss_total": 0.6485558748245239, - "step": 33899 - }, - { - "epoch": 0.005798, - "loss_gen": 3.500702381134033, - "loss_rtd": 0.38332781195640564, - "loss_sent": 0.005528334993869066, - "loss_sod": 0.12189139425754547, - "loss_total": 0.5107475519180298, - "step": 33899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.8776567578315735, - "learning_rate": 8.418129423497677e-05, - "loss": 0.6831, - "step": 33900 - }, - { - "epoch": 0.005998, - "loss_gen": 4.376248359680176, - "loss_rtd": 0.39717715978622437, - "loss_sent": 0.24568745493888855, - "loss_sod": 0.017836254090070724, - "loss_total": 0.6607008576393127, - "step": 33999 - }, - { - "epoch": 0.005998, - "loss_gen": 4.339692115783691, - "loss_rtd": 0.4054630398750305, - "loss_sent": 0.17948582768440247, - "loss_sod": 0.026773886755108833, - "loss_total": 0.6117227673530579, - "step": 33999 - }, - { - "epoch": 0.006, - "grad_norm": 1.105279803276062, - "learning_rate": 8.415812738484599e-05, - "loss": 0.656, - "step": 34000 - }, - { - "epoch": 0.006, - "eval_loss": 0.6587226986885071, - "eval_runtime": 150.7649, - "eval_samples_per_second": 102.431, - "eval_steps_per_second": 0.803, - "step": 34000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.84349250793457, - "loss_rtd": 0.3853917419910431, - "loss_sent": 0.2796160578727722, - "loss_sod": 0.18595334887504578, - "loss_total": 0.8509611487388611, - "step": 34099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.1158857345581055, - "loss_rtd": 0.4004764258861542, - "loss_sent": 0.16802577674388885, - "loss_sod": 0.07047758996486664, - "loss_total": 0.6389797925949097, - "step": 34099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.1270217895507812, - "learning_rate": 8.413494677582558e-05, - "loss": 0.6873, - "step": 34100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.219707489013672, - "loss_rtd": 0.3944161534309387, - "loss_sent": 0.15331660211086273, - "loss_sod": 0.0021382453851401806, - "loss_total": 0.549871027469635, - "step": 34199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.6819562911987305, - "loss_rtd": 0.4039584994316101, - "loss_sent": 0.5780501961708069, - "loss_sod": 0.11269272863864899, - "loss_total": 1.0947014093399048, - "step": 34199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.1589312553405762, - "learning_rate": 8.411175241725268e-05, - "loss": 0.6923, - "step": 34200 - }, - { - "epoch": 0.006598, - "loss_gen": 3.6634464263916016, - "loss_rtd": 0.39810386300086975, - "loss_sent": 0.022716881707310677, - "loss_sod": 0.36733466386795044, - "loss_total": 0.7881554365158081, - "step": 34299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.041938304901123, - "loss_rtd": 0.39642512798309326, - "loss_sent": 0.07152106612920761, - "loss_sod": 0.08808917552232742, - "loss_total": 0.5560353398323059, - "step": 34299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.8713497519493103, - "learning_rate": 8.408854431847e-05, - "loss": 0.6774, - "step": 34300 - }, - { - "epoch": 0.006798, - "loss_gen": 4.1711907386779785, - "loss_rtd": 0.3844755291938782, - "loss_sent": 0.12414493411779404, - "loss_sod": 0.12355874478816986, - "loss_total": 0.6321792006492615, - "step": 34399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.556699275970459, - "loss_rtd": 0.38386863470077515, - "loss_sent": 0.08765093982219696, - "loss_sod": 0.10667753964662552, - "loss_total": 0.5781971216201782, - "step": 34399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.401253581047058, - "learning_rate": 8.406532248882573e-05, - "loss": 0.6888, - "step": 34400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.466337203979492, - "loss_rtd": 0.3804284930229187, - "loss_sent": 0.2287617027759552, - "loss_sod": 0.049220383167266846, - "loss_total": 0.6584105491638184, - "step": 34499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.712965488433838, - "loss_rtd": 0.38402697443962097, - "loss_sent": 0.3427942991256714, - "loss_sod": 0.13551780581474304, - "loss_total": 0.8623390793800354, - "step": 34499 - }, - { - "epoch": 0.007, - "grad_norm": 2.533766984939575, - "learning_rate": 8.404208693767365e-05, - "loss": 0.6791, - "step": 34500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.2892608642578125, - "loss_rtd": 0.3866655230522156, - "loss_sent": 0.2026486098766327, - "loss_sod": 0.10786180198192596, - "loss_total": 0.6971759796142578, - "step": 34599 - }, - { - "epoch": 0.007198, - "loss_gen": 3.8161721229553223, - "loss_rtd": 0.42922765016555786, - "loss_sent": 0.0005910772597417235, - "loss_sod": 0.3839712142944336, - "loss_total": 0.813789963722229, - "step": 34599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.6854496002197266, - "learning_rate": 8.401883767437302e-05, - "loss": 0.6762, - "step": 34600 - }, - { - "epoch": 0.007398, - "loss_gen": 3.7486488819122314, - "loss_rtd": 0.3906943202018738, - "loss_sent": 0.0001909230777528137, - "loss_sod": 0.3329179883003235, - "loss_total": 0.7238032221794128, - "step": 34699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.021856307983398, - "loss_rtd": 0.3831009268760681, - "loss_sent": 0.11416365206241608, - "loss_sod": 0.19775055348873138, - "loss_total": 0.6950151324272156, - "step": 34699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.3915011882781982, - "learning_rate": 8.399557470828863e-05, - "loss": 0.6615, - "step": 34700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.43755578994751, - "loss_rtd": 0.39801058173179626, - "loss_sent": 0.15207479894161224, - "loss_sod": 0.04855339601635933, - "loss_total": 0.5986387729644775, - "step": 34799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.49984884262085, - "loss_rtd": 0.3986855447292328, - "loss_sent": 0.1514422595500946, - "loss_sod": 0.05110839381814003, - "loss_total": 0.6012362241744995, - "step": 34799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.69097501039505, - "learning_rate": 8.397229804879084e-05, - "loss": 0.6594, - "step": 34800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.759007930755615, - "loss_rtd": 0.38655874133110046, - "loss_sent": 0.21478146314620972, - "loss_sod": 0.04757128655910492, - "loss_total": 0.6489114761352539, - "step": 34899 - }, - { - "epoch": 0.007798, - "loss_gen": 3.825989246368408, - "loss_rtd": 0.39063310623168945, - "loss_sent": 0.015799371525645256, - "loss_sod": 0.2723545432090759, - "loss_total": 0.6787869930267334, - "step": 34899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.1858468055725098, - "learning_rate": 8.394900770525544e-05, - "loss": 0.6762, - "step": 34900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.985413551330566, - "loss_rtd": 0.3810838460922241, - "loss_sent": 0.06006404012441635, - "loss_sod": 0.06051886826753616, - "loss_total": 0.5016667246818542, - "step": 34999 - }, - { - "epoch": 0.007998, - "loss_gen": 4.667548179626465, - "loss_rtd": 0.38158246874809265, - "loss_sent": 0.08527212589979172, - "loss_sod": 0.0968073233962059, - "loss_total": 0.5636619329452515, - "step": 34999 - }, - { - "epoch": 0.008, - "grad_norm": 0.9259592890739441, - "learning_rate": 8.392570368706379e-05, - "loss": 0.6555, - "step": 35000 - }, - { - "epoch": 0.008, - "eval_loss": 0.6558483242988586, - "eval_runtime": 152.0046, - "eval_samples_per_second": 101.596, - "eval_steps_per_second": 0.796, - "step": 35000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.283824443817139, - "loss_rtd": 0.3852701485157013, - "loss_sent": 0.20818640291690826, - "loss_sod": 0.020567061379551888, - "loss_total": 0.6140236258506775, - "step": 35099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.41359281539917, - "loss_rtd": 0.3706549108028412, - "loss_sent": 0.1305951327085495, - "loss_sod": 0.12105558067560196, - "loss_total": 0.6223056316375732, - "step": 35099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.8833096623420715, - "learning_rate": 8.390238600360276e-05, - "loss": 0.6708, - "step": 35100 - }, - { - "epoch": 0.008398, - "loss_gen": 3.6215407848358154, - "loss_rtd": 0.4050199091434479, - "loss_sent": 0.0112560810521245, - "loss_sod": 0.2151399552822113, - "loss_total": 0.6314159631729126, - "step": 35199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.06948184967041, - "loss_rtd": 0.41497963666915894, - "loss_sent": 0.08226001262664795, - "loss_sod": 0.14170867204666138, - "loss_total": 0.6389483213424683, - "step": 35199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.0082366466522217, - "learning_rate": 8.38790546642647e-05, - "loss": 0.6589, - "step": 35200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.5622711181640625, - "loss_rtd": 0.3707863390445709, - "loss_sent": 0.17080971598625183, - "loss_sod": 0.022889114916324615, - "loss_total": 0.5644851922988892, - "step": 35299 - }, - { - "epoch": 0.008598, - "loss_gen": 3.8004684448242188, - "loss_rtd": 0.37701067328453064, - "loss_sent": 0.001095216372050345, - "loss_sod": 0.3147203028202057, - "loss_total": 0.6928262114524841, - "step": 35299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.8075337409973145, - "learning_rate": 8.385570967844747e-05, - "loss": 0.6783, - "step": 35300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.531389236450195, - "loss_rtd": 0.3917960822582245, - "loss_sent": 0.3814413845539093, - "loss_sod": 0.029178844764828682, - "loss_total": 0.8024163246154785, - "step": 35399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.834838390350342, - "loss_rtd": 0.3757188022136688, - "loss_sent": 0.09382227808237076, - "loss_sod": 0.1580665558576584, - "loss_total": 0.6276076436042786, - "step": 35399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.1063945293426514, - "learning_rate": 8.383235105555445e-05, - "loss": 0.6743, - "step": 35400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.395575523376465, - "loss_rtd": 0.40566393733024597, - "loss_sent": 0.07591883838176727, - "loss_sod": 0.06056656688451767, - "loss_total": 0.5421493649482727, - "step": 35499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.709528923034668, - "loss_rtd": 0.38671427965164185, - "loss_sent": 0.1463877558708191, - "loss_sod": 0.0882708728313446, - "loss_total": 0.6213729381561279, - "step": 35499 - }, - { - "epoch": 0.009, - "grad_norm": 0.64141446352005, - "learning_rate": 8.380897880499445e-05, - "loss": 0.6711, - "step": 35500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.119242191314697, - "loss_rtd": 0.3939517140388489, - "loss_sent": 0.053018298000097275, - "loss_sod": 0.22627955675125122, - "loss_total": 0.6732495427131653, - "step": 35599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.300588607788086, - "loss_rtd": 0.38656580448150635, - "loss_sent": 0.10431955754756927, - "loss_sod": 0.021519005298614502, - "loss_total": 0.5124043822288513, - "step": 35599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.8485056161880493, - "learning_rate": 8.378559293618183e-05, - "loss": 0.6813, - "step": 35600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.3555216789245605, - "loss_rtd": 0.3831770122051239, - "loss_sent": 0.14471060037612915, - "loss_sod": 0.005934491753578186, - "loss_total": 0.5338221192359924, - "step": 35699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.078190326690674, - "loss_rtd": 0.3944447636604309, - "loss_sent": 0.12811420857906342, - "loss_sod": 0.13279440999031067, - "loss_total": 0.6553534269332886, - "step": 35699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.3532829284667969, - "learning_rate": 8.376219345853642e-05, - "loss": 0.6694, - "step": 35700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.143390655517578, - "loss_rtd": 0.40090420842170715, - "loss_sent": 0.00558770215138793, - "loss_sod": 0.2737257480621338, - "loss_total": 0.6802176237106323, - "step": 35799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.052029132843018, - "loss_rtd": 0.3747907280921936, - "loss_sent": 0.08215665817260742, - "loss_sod": 0.07205349206924438, - "loss_total": 0.5290008783340454, - "step": 35799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.2565505504608154, - "learning_rate": 8.373878038148353e-05, - "loss": 0.6617, - "step": 35800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.08327579498291, - "loss_rtd": 0.3714620769023895, - "loss_sent": 0.3929314911365509, - "loss_sod": 0.050866659730672836, - "loss_total": 0.8152602314949036, - "step": 35899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.571104526519775, - "loss_rtd": 0.42713063955307007, - "loss_sent": 0.25466254353523254, - "loss_sod": 0.014409082010388374, - "loss_total": 0.696202278137207, - "step": 35899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.9676883220672607, - "learning_rate": 8.371535371445392e-05, - "loss": 0.6728, - "step": 35900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.5458760261535645, - "loss_rtd": 0.38131198287010193, - "loss_sent": 0.09672313928604126, - "loss_sod": 0.04669678956270218, - "loss_total": 0.5247318744659424, - "step": 35999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.419743537902832, - "loss_rtd": 0.39327701926231384, - "loss_sent": 0.14998316764831543, - "loss_sod": 0.05913712829351425, - "loss_total": 0.6023973226547241, - "step": 35999 - }, - { - "epoch": 0.01, - "grad_norm": 0.7521686553955078, - "learning_rate": 8.369191346688389e-05, - "loss": 0.6615, - "step": 36000 - }, - { - "epoch": 0.01, - "eval_loss": 0.657751739025116, - "eval_runtime": 150.6017, - "eval_samples_per_second": 102.542, - "eval_steps_per_second": 0.803, - "step": 36000 - }, - { - "epoch": 0.010198, - "loss_gen": 4.568459987640381, - "loss_rtd": 0.38394075632095337, - "loss_sent": 0.2844971716403961, - "loss_sod": 0.012531893327832222, - "loss_total": 0.6809698343276978, - "step": 36099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.570716857910156, - "loss_rtd": 0.3938712775707245, - "loss_sent": 0.18808907270431519, - "loss_sod": 0.008191872388124466, - "loss_total": 0.5901522636413574, - "step": 36099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8684414625167847, - "learning_rate": 8.366845964821512e-05, - "loss": 0.6718, - "step": 36100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.625585079193115, - "loss_rtd": 0.38836559653282166, - "loss_sent": 0.1486426293849945, - "loss_sod": 0.03211354464292526, - "loss_total": 0.569121778011322, - "step": 36199 - }, - { - "epoch": 0.010398, - "loss_gen": 4.721551895141602, - "loss_rtd": 0.3992859423160553, - "loss_sent": 0.0891035795211792, - "loss_sod": 0.07581065595149994, - "loss_total": 0.5642001628875732, - "step": 36199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.025210976600647, - "learning_rate": 8.364499226789485e-05, - "loss": 0.6698, - "step": 36200 - }, - { - "epoch": 0.010598, - "loss_gen": 4.264175891876221, - "loss_rtd": 0.39955437183380127, - "loss_sent": 0.18356764316558838, - "loss_sod": 0.056868620216846466, - "loss_total": 0.6399906277656555, - "step": 36299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.184117794036865, - "loss_rtd": 0.38804009556770325, - "loss_sent": 0.08347688615322113, - "loss_sod": 0.12625752389431, - "loss_total": 0.5977745056152344, - "step": 36299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9175646305084229, - "learning_rate": 8.362151133537571e-05, - "loss": 0.6667, - "step": 36300 - }, - { - "epoch": 0.010798, - "loss_gen": 4.556824207305908, - "loss_rtd": 0.382408082485199, - "loss_sent": 0.25232207775115967, - "loss_sod": 0.043600019067525864, - "loss_total": 0.6783301830291748, - "step": 36399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.242853164672852, - "loss_rtd": 0.38585180044174194, - "loss_sent": 0.2861935794353485, - "loss_sod": 0.19454966485500336, - "loss_total": 0.8665950298309326, - "step": 36399 - }, - { - "epoch": 0.0108, - "grad_norm": 2.2240793704986572, - "learning_rate": 8.359801686011582e-05, - "loss": 0.6845, - "step": 36400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.583993434906006, - "loss_rtd": 0.39570391178131104, - "loss_sent": 0.07827243953943253, - "loss_sod": 0.08733754605054855, - "loss_total": 0.5613139271736145, - "step": 36499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.440932750701904, - "loss_rtd": 0.3896424472332001, - "loss_sent": 0.25126516819000244, - "loss_sod": 0.08420707285404205, - "loss_total": 0.7251147031784058, - "step": 36499 - }, - { - "epoch": 0.011, - "grad_norm": 1.1776618957519531, - "learning_rate": 8.357450885157876e-05, - "loss": 0.6628, - "step": 36500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.803018093109131, - "loss_rtd": 0.4053913950920105, - "loss_sent": 0.23469845950603485, - "loss_sod": 0.1651773452758789, - "loss_total": 0.8052672147750854, - "step": 36599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.398932456970215, - "loss_rtd": 0.39668285846710205, - "loss_sent": 0.3532399833202362, - "loss_sod": 0.05229349806904793, - "loss_total": 0.8022163510322571, - "step": 36599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.701512336730957, - "learning_rate": 8.355098731923357e-05, - "loss": 0.6937, - "step": 36600 - }, - { - "epoch": 0.011398, - "loss_gen": 3.498446464538574, - "loss_rtd": 0.3683341145515442, - "loss_sent": 0.024808038026094437, - "loss_sod": 0.2724393904209137, - "loss_total": 0.6655815839767456, - "step": 36699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.199820041656494, - "loss_rtd": 0.38264238834381104, - "loss_sent": 0.1850346475839615, - "loss_sod": 0.042693350464105606, - "loss_total": 0.610370397567749, - "step": 36699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.188109278678894, - "learning_rate": 8.352745227255467e-05, - "loss": 0.6865, - "step": 36700 - }, - { - "epoch": 0.011598, - "loss_gen": 4.962965965270996, - "loss_rtd": 0.3960812985897064, - "loss_sent": 0.22524335980415344, - "loss_sod": 0.1821294128894806, - "loss_total": 0.8034540414810181, - "step": 36799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.930484771728516, - "loss_rtd": 0.385154128074646, - "loss_sent": 0.27918630838394165, - "loss_sod": 0.0164080411195755, - "loss_total": 0.680748462677002, - "step": 36799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.9394999742507935, - "learning_rate": 8.350390372102201e-05, - "loss": 0.6652, - "step": 36800 - }, - { - "epoch": 0.011798, - "loss_gen": 4.238772392272949, - "loss_rtd": 0.3739355504512787, - "loss_sent": 0.1498599648475647, - "loss_sod": 0.0525074228644371, - "loss_total": 0.5763029456138611, - "step": 36899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.5995635986328125, - "loss_rtd": 0.39057114720344543, - "loss_sent": 0.3132609724998474, - "loss_sod": 0.03634927421808243, - "loss_total": 0.7401813864707947, - "step": 36899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.9696013927459717, - "learning_rate": 8.348034167412094e-05, - "loss": 0.6802, - "step": 36900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.623749732971191, - "loss_rtd": 0.37866002321243286, - "loss_sent": 0.30305925011634827, - "loss_sod": 0.01630227267742157, - "loss_total": 0.6980215311050415, - "step": 36999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.484660625457764, - "loss_rtd": 0.3995550870895386, - "loss_sent": 0.1641751378774643, - "loss_sod": 0.0898779034614563, - "loss_total": 0.6536081433296204, - "step": 36999 - }, - { - "epoch": 0.012, - "grad_norm": 2.3883414268493652, - "learning_rate": 8.345676614134226e-05, - "loss": 0.6647, - "step": 37000 - }, - { - "epoch": 0.012, - "eval_loss": 0.6482465267181396, - "eval_runtime": 151.0444, - "eval_samples_per_second": 102.241, - "eval_steps_per_second": 0.801, - "step": 37000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.435266017913818, - "loss_rtd": 0.3864968419075012, - "loss_sent": 0.20693446695804596, - "loss_sod": 0.024848148226737976, - "loss_total": 0.6182794570922852, - "step": 37099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.258318901062012, - "loss_rtd": 0.3999250829219818, - "loss_sent": 0.011927814222872257, - "loss_sod": 0.17478594183921814, - "loss_total": 0.586638867855072, - "step": 37099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.085282802581787, - "learning_rate": 8.343317713218217e-05, - "loss": 0.6692, - "step": 37100 - }, - { - "epoch": 0.012398, - "loss_gen": 3.623622179031372, - "loss_rtd": 0.3829926550388336, - "loss_sent": 0.00037199995131231844, - "loss_sod": 0.31614941358566284, - "loss_total": 0.6995140910148621, - "step": 37199 - }, - { - "epoch": 0.012398, - "loss_gen": 3.788888454437256, - "loss_rtd": 0.3892027735710144, - "loss_sent": 0.1038016527891159, - "loss_sod": 0.014133607037365437, - "loss_total": 0.5071380138397217, - "step": 37199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.115685224533081, - "learning_rate": 8.340957465614233e-05, - "loss": 0.6759, - "step": 37200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.963636875152588, - "loss_rtd": 0.3884185552597046, - "loss_sent": 0.4500541090965271, - "loss_sod": 0.053494930267333984, - "loss_total": 0.8919675946235657, - "step": 37299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.432641506195068, - "loss_rtd": 0.38547518849372864, - "loss_sent": 0.19582559168338776, - "loss_sod": 0.029296714812517166, - "loss_total": 0.6105974912643433, - "step": 37299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.4357924461364746, - "learning_rate": 8.33859587227298e-05, - "loss": 0.6745, - "step": 37300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.658502578735352, - "loss_rtd": 0.3811958432197571, - "loss_sent": 0.15638990700244904, - "loss_sod": 0.14238518476486206, - "loss_total": 0.679970920085907, - "step": 37399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.4922027587890625, - "loss_rtd": 0.4064105749130249, - "loss_sent": 0.10675998777151108, - "loss_sod": 0.04498648643493652, - "loss_total": 0.5581570267677307, - "step": 37399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.8286881446838379, - "learning_rate": 8.33623293414571e-05, - "loss": 0.6565, - "step": 37400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.656504154205322, - "loss_rtd": 0.4109244644641876, - "loss_sent": 0.1238277480006218, - "loss_sod": 0.061936601996421814, - "loss_total": 0.5966888070106506, - "step": 37499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.614788055419922, - "loss_rtd": 0.38480427861213684, - "loss_sent": 0.13607770204544067, - "loss_sod": 0.043309397995471954, - "loss_total": 0.5641913414001465, - "step": 37499 - }, - { - "epoch": 0.013, - "grad_norm": 0.7024715542793274, - "learning_rate": 8.333868652184216e-05, - "loss": 0.6546, - "step": 37500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.133355617523193, - "loss_rtd": 0.3928178548812866, - "loss_sent": 0.10050570219755173, - "loss_sod": 0.03258616849780083, - "loss_total": 0.5259097218513489, - "step": 37599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.330506324768066, - "loss_rtd": 0.40831834077835083, - "loss_sent": 0.07231014221906662, - "loss_sod": 0.05504155904054642, - "loss_total": 0.5356700420379639, - "step": 37599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.8137696981430054, - "learning_rate": 8.331503027340824e-05, - "loss": 0.6499, - "step": 37600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.40916633605957, - "loss_rtd": 0.38783833384513855, - "loss_sent": 0.27936530113220215, - "loss_sod": 0.176782488822937, - "loss_total": 0.8439861536026001, - "step": 37699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.736486911773682, - "loss_rtd": 0.38095638155937195, - "loss_sent": 0.24123525619506836, - "loss_sod": 0.06507010757923126, - "loss_total": 0.6872617602348328, - "step": 37699 - }, - { - "epoch": 0.0134, - "grad_norm": 3.4207136631011963, - "learning_rate": 8.329136060568412e-05, - "loss": 0.6706, - "step": 37700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.38934326171875, - "loss_rtd": 0.4075080156326294, - "loss_sent": 0.26131579279899597, - "loss_sod": 0.09944939613342285, - "loss_total": 0.7682732343673706, - "step": 37799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.540380001068115, - "loss_rtd": 0.3892989456653595, - "loss_sent": 0.10794064402580261, - "loss_sod": 0.09877896308898926, - "loss_total": 0.5960185527801514, - "step": 37799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.143424391746521, - "learning_rate": 8.326767752820392e-05, - "loss": 0.6694, - "step": 37800 - }, - { - "epoch": 0.013798, - "loss_gen": 3.6130313873291016, - "loss_rtd": 0.3840130567550659, - "loss_sent": 0.0001633672945899889, - "loss_sod": 0.34217336773872375, - "loss_total": 0.7263497710227966, - "step": 37899 - }, - { - "epoch": 0.013798, - "loss_gen": 3.312906265258789, - "loss_rtd": 0.3689209520816803, - "loss_sent": 0.00043438852299004793, - "loss_sod": 0.4225320518016815, - "loss_total": 0.7918874025344849, - "step": 37899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.6495534181594849, - "learning_rate": 8.32439810505072e-05, - "loss": 0.6577, - "step": 37900 - }, - { - "epoch": 0.013998, - "loss_gen": 4.597347736358643, - "loss_rtd": 0.38990819454193115, - "loss_sent": 0.1358875185251236, - "loss_sod": 0.11874350160360336, - "loss_total": 0.6445391774177551, - "step": 37999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.741082668304443, - "loss_rtd": 0.39351511001586914, - "loss_sent": 0.06590181589126587, - "loss_sod": 0.06430280953645706, - "loss_total": 0.5237197279930115, - "step": 37999 - }, - { - "epoch": 0.014, - "grad_norm": 1.036952257156372, - "learning_rate": 8.322027118213888e-05, - "loss": 0.6548, - "step": 38000 - }, - { - "epoch": 0.014, - "eval_loss": 0.6540851593017578, - "eval_runtime": 150.9211, - "eval_samples_per_second": 102.325, - "eval_steps_per_second": 0.802, - "step": 38000 - }, - { - "epoch": 0.014198, - "loss_gen": 4.49874210357666, - "loss_rtd": 0.38914498686790466, - "loss_sent": 0.1910504251718521, - "loss_sod": 0.1836278736591339, - "loss_total": 0.7638232707977295, - "step": 38099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.400293827056885, - "loss_rtd": 0.3808160424232483, - "loss_sent": 0.07757776230573654, - "loss_sod": 0.0072662136517465115, - "loss_total": 0.465660035610199, - "step": 38099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.0191900730133057, - "learning_rate": 8.319654793264932e-05, - "loss": 0.6852, - "step": 38100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.592637538909912, - "loss_rtd": 0.3863178491592407, - "loss_sent": 0.0930735245347023, - "loss_sod": 0.05370680242776871, - "loss_total": 0.5330981612205505, - "step": 38199 - }, - { - "epoch": 0.014398, - "loss_gen": 3.6214210987091064, - "loss_rtd": 0.38091063499450684, - "loss_sent": 0.02442455105483532, - "loss_sod": 0.23057721555233002, - "loss_total": 0.6359124183654785, - "step": 38199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.9923510551452637, - "learning_rate": 8.31728113115942e-05, - "loss": 0.6628, - "step": 38200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.115151405334473, - "loss_rtd": 0.38642704486846924, - "loss_sent": 0.04713524878025055, - "loss_sod": 0.09587834030389786, - "loss_total": 0.5294406414031982, - "step": 38299 - }, - { - "epoch": 0.014598, - "loss_gen": 3.4967713356018066, - "loss_rtd": 0.3726522624492645, - "loss_sent": 0.00012220637290738523, - "loss_sod": 0.4132821559906006, - "loss_total": 0.786056637763977, - "step": 38299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.0016167163848877, - "learning_rate": 8.314906132853466e-05, - "loss": 0.6608, - "step": 38300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.444309711456299, - "loss_rtd": 0.3885973393917084, - "loss_sent": 0.264399915933609, - "loss_sod": 0.03305451199412346, - "loss_total": 0.6860517263412476, - "step": 38399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.453540802001953, - "loss_rtd": 0.3865545988082886, - "loss_sent": 0.13625246286392212, - "loss_sod": 0.14132162928581238, - "loss_total": 0.6641287207603455, - "step": 38399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.8410499095916748, - "learning_rate": 8.312529799303719e-05, - "loss": 0.6666, - "step": 38400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.6401472091674805, - "loss_rtd": 0.3889612853527069, - "loss_sent": 0.1997268944978714, - "loss_sod": 0.055990178138017654, - "loss_total": 0.6446783542633057, - "step": 38499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.636585235595703, - "loss_rtd": 0.3822641968727112, - "loss_sent": 0.27123889327049255, - "loss_sod": 0.02250303141772747, - "loss_total": 0.6760060787200928, - "step": 38499 - }, - { - "epoch": 0.015, - "grad_norm": 1.5303524732589722, - "learning_rate": 8.310152131467364e-05, - "loss": 0.6587, - "step": 38500 - }, - { - "epoch": 0.015198, - "loss_gen": 4.518683910369873, - "loss_rtd": 0.3969024419784546, - "loss_sent": 0.08406098186969757, - "loss_sod": 0.03365109860897064, - "loss_total": 0.5146145224571228, - "step": 38599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.361326217651367, - "loss_rtd": 0.37997597455978394, - "loss_sent": 0.09411624819040298, - "loss_sod": 0.12055913358926773, - "loss_total": 0.5946514010429382, - "step": 38599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.276465654373169, - "learning_rate": 8.307773130302126e-05, - "loss": 0.6576, - "step": 38600 - }, - { - "epoch": 0.015398, - "loss_gen": 4.824479103088379, - "loss_rtd": 0.3833303451538086, - "loss_sent": 0.08850919455289841, - "loss_sod": 0.049159783869981766, - "loss_total": 0.5209993124008179, - "step": 38699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.024838924407959, - "loss_rtd": 0.3834380507469177, - "loss_sent": 0.1682136356830597, - "loss_sod": 0.14718347787857056, - "loss_total": 0.6988351345062256, - "step": 38699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.8864040970802307, - "learning_rate": 8.305392796766266e-05, - "loss": 0.6651, - "step": 38700 - }, - { - "epoch": 0.015598, - "loss_gen": 4.777594089508057, - "loss_rtd": 0.38975656032562256, - "loss_sent": 0.20690903067588806, - "loss_sod": 0.07318736612796783, - "loss_total": 0.6698529720306396, - "step": 38799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.332812309265137, - "loss_rtd": 0.3724307417869568, - "loss_sent": 0.2214450240135193, - "loss_sod": 0.1805974543094635, - "loss_total": 0.774473249912262, - "step": 38799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.9250850677490234, - "learning_rate": 8.303011131818585e-05, - "loss": 0.6602, - "step": 38800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.956061840057373, - "loss_rtd": 0.3984396457672119, - "loss_sent": 0.061900459229946136, - "loss_sod": 0.08385099470615387, - "loss_total": 0.5441910624504089, - "step": 38899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.553220272064209, - "loss_rtd": 0.3981330096721649, - "loss_sent": 0.21911245584487915, - "loss_sod": 0.09834859520196915, - "loss_total": 0.7155940532684326, - "step": 38899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.7907063364982605, - "learning_rate": 8.300628136418415e-05, - "loss": 0.668, - "step": 38900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.462879657745361, - "loss_rtd": 0.399784654378891, - "loss_sent": 0.2433241605758667, - "loss_sod": 0.010670976713299751, - "loss_total": 0.6537798047065735, - "step": 38999 - }, - { - "epoch": 0.015998, - "loss_gen": 4.294644832611084, - "loss_rtd": 0.3938490152359009, - "loss_sent": 0.2199404537677765, - "loss_sod": 0.01899193599820137, - "loss_total": 0.6327813863754272, - "step": 38999 - }, - { - "epoch": 0.016, - "grad_norm": 1.236820936203003, - "learning_rate": 8.298243811525626e-05, - "loss": 0.6774, - "step": 39000 - }, - { - "epoch": 0.016, - "eval_loss": 0.6467982530593872, - "eval_runtime": 151.0106, - "eval_samples_per_second": 102.264, - "eval_steps_per_second": 0.801, - "step": 39000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.289419174194336, - "loss_rtd": 0.39666029810905457, - "loss_sent": 0.11812765151262283, - "loss_sod": 0.01028955727815628, - "loss_total": 0.5250775218009949, - "step": 39099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.909334182739258, - "loss_rtd": 0.40946900844573975, - "loss_sent": 0.1141701266169548, - "loss_sod": 0.1173042356967926, - "loss_total": 0.6409433484077454, - "step": 39099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.7614564299583435, - "learning_rate": 8.295858158100623e-05, - "loss": 0.6598, - "step": 39100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.207708358764648, - "loss_rtd": 0.38203272223472595, - "loss_sent": 0.29075103998184204, - "loss_sod": 0.021450048312544823, - "loss_total": 0.694233775138855, - "step": 39199 - }, - { - "epoch": 0.016398, - "loss_gen": 3.9822723865509033, - "loss_rtd": 0.39657244086265564, - "loss_sent": 0.21307756006717682, - "loss_sod": 0.0807478129863739, - "loss_total": 0.6903977990150452, - "step": 39199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.3364311456680298, - "learning_rate": 8.293471177104349e-05, - "loss": 0.6551, - "step": 39200 - }, - { - "epoch": 0.016598, - "loss_gen": 4.271689414978027, - "loss_rtd": 0.39816737174987793, - "loss_sent": 0.15250374376773834, - "loss_sod": 0.07506150007247925, - "loss_total": 0.6257326006889343, - "step": 39299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.7730560302734375, - "loss_rtd": 0.40210118889808655, - "loss_sent": 0.21555040776729584, - "loss_sod": 0.05588348209857941, - "loss_total": 0.6735351085662842, - "step": 39299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8487595319747925, - "learning_rate": 8.291082869498277e-05, - "loss": 0.6613, - "step": 39300 - }, - { - "epoch": 0.016798, - "loss_gen": 4.5574421882629395, - "loss_rtd": 0.3984720706939697, - "loss_sent": 0.26848775148391724, - "loss_sod": 0.007420409470796585, - "loss_total": 0.6743802428245544, - "step": 39399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.480246543884277, - "loss_rtd": 0.3950853645801544, - "loss_sent": 0.13288316130638123, - "loss_sod": 0.030220378190279007, - "loss_total": 0.5581889152526855, - "step": 39399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.7920759320259094, - "learning_rate": 8.28869323624442e-05, - "loss": 0.671, - "step": 39400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.565373420715332, - "loss_rtd": 0.37720078229904175, - "loss_sent": 0.35734039545059204, - "loss_sod": 0.032200008630752563, - "loss_total": 0.766741156578064, - "step": 39499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.659275531768799, - "loss_rtd": 0.3958361744880676, - "loss_sent": 0.09898357838392258, - "loss_sod": 0.05647943168878555, - "loss_total": 0.5512991547584534, - "step": 39499 - }, - { - "epoch": 0.017, - "grad_norm": 0.8385859727859497, - "learning_rate": 8.28630227830532e-05, - "loss": 0.6656, - "step": 39500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.545830249786377, - "loss_rtd": 0.3760372996330261, - "loss_sent": 0.3367275595664978, - "loss_sod": 0.11878657341003418, - "loss_total": 0.8315514326095581, - "step": 39599 - }, - { - "epoch": 0.017198, - "loss_gen": 4.430626392364502, - "loss_rtd": 0.3921814560890198, - "loss_sent": 0.17195403575897217, - "loss_sod": 0.019803347066044807, - "loss_total": 0.5839388370513916, - "step": 39599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.8410652875900269, - "learning_rate": 8.283909996644057e-05, - "loss": 0.6566, - "step": 39600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.776976585388184, - "loss_rtd": 0.3909640312194824, - "loss_sent": 0.20697557926177979, - "loss_sod": 0.04605009779334068, - "loss_total": 0.6439896821975708, - "step": 39699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.416382789611816, - "loss_rtd": 0.3838307857513428, - "loss_sent": 0.2624322772026062, - "loss_sod": 0.012035838328301907, - "loss_total": 0.658298909664154, - "step": 39699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.9887232780456543, - "learning_rate": 8.281516392224238e-05, - "loss": 0.6769, - "step": 39700 - }, - { - "epoch": 0.017598, - "loss_gen": 4.665676116943359, - "loss_rtd": 0.3965701758861542, - "loss_sent": 0.34119102358818054, - "loss_sod": 0.06708301603794098, - "loss_total": 0.8048442602157593, - "step": 39799 - }, - { - "epoch": 0.017598, - "loss_gen": 4.204485893249512, - "loss_rtd": 0.39091309905052185, - "loss_sent": 0.21135586500167847, - "loss_sod": 0.07284900546073914, - "loss_total": 0.6751179695129395, - "step": 39799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.4561817646026611, - "learning_rate": 8.279121466010011e-05, - "loss": 0.6674, - "step": 39800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.904703140258789, - "loss_rtd": 0.37332049012184143, - "loss_sent": 0.4987711012363434, - "loss_sod": 0.03765245899558067, - "loss_total": 0.9097440242767334, - "step": 39899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.342363357543945, - "loss_rtd": 0.37406766414642334, - "loss_sent": 0.12862245738506317, - "loss_sod": 0.07009097933769226, - "loss_total": 0.5727810859680176, - "step": 39899 - }, - { - "epoch": 0.0178, - "grad_norm": 2.1194393634796143, - "learning_rate": 8.276725218966049e-05, - "loss": 0.6456, - "step": 39900 - }, - { - "epoch": 0.017998, - "loss_gen": 4.648935317993164, - "loss_rtd": 0.383938729763031, - "loss_sent": 0.08299577981233597, - "loss_sod": 0.019381307065486908, - "loss_total": 0.4863158166408539, - "step": 39999 - }, - { - "epoch": 0.017998, - "loss_gen": 4.655354976654053, - "loss_rtd": 0.3713081181049347, - "loss_sent": 0.11958049982786179, - "loss_sod": 0.04544627666473389, - "loss_total": 0.5363348722457886, - "step": 39999 - }, - { - "epoch": 0.018, - "grad_norm": 0.8464807868003845, - "learning_rate": 8.274327652057558e-05, - "loss": 0.6587, - "step": 40000 - }, - { - "epoch": 0.018, - "eval_loss": 0.6380000114440918, - "eval_runtime": 150.7999, - "eval_samples_per_second": 102.407, - "eval_steps_per_second": 0.802, - "step": 40000 - }, - { - "epoch": 0.018198, - "loss_gen": 4.573928356170654, - "loss_rtd": 0.4032638370990753, - "loss_sent": 0.3652266263961792, - "loss_sod": 0.047281235456466675, - "loss_total": 0.8157716989517212, - "step": 40099 - }, - { - "epoch": 0.018198, - "loss_gen": 4.420799732208252, - "loss_rtd": 0.40008658170700073, - "loss_sent": 0.08159566670656204, - "loss_sod": 0.04224081709980965, - "loss_total": 0.5239230990409851, - "step": 40099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.7178969383239746, - "learning_rate": 8.271928766250283e-05, - "loss": 0.6505, - "step": 40100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.2330427169799805, - "loss_rtd": 0.38497084379196167, - "loss_sent": 0.11211797595024109, - "loss_sod": 0.18959033489227295, - "loss_total": 0.6866791248321533, - "step": 40199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.203384876251221, - "loss_rtd": 0.3897349238395691, - "loss_sent": 0.002613413380458951, - "loss_sod": 0.2865219712257385, - "loss_total": 0.6788703203201294, - "step": 40199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.4291338920593262, - "learning_rate": 8.269528562510493e-05, - "loss": 0.6542, - "step": 40200 - }, - { - "epoch": 0.018598, - "loss_gen": 3.9043827056884766, - "loss_rtd": 0.392133891582489, - "loss_sent": 6.662686064373702e-05, - "loss_sod": 0.48413851857185364, - "loss_total": 0.8763390779495239, - "step": 40299 - }, - { - "epoch": 0.018598, - "loss_gen": 3.9214248657226562, - "loss_rtd": 0.3792304992675781, - "loss_sent": 0.00023043343389872462, - "loss_sod": 0.20601791143417358, - "loss_total": 0.5854788422584534, - "step": 40299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.0580034255981445, - "learning_rate": 8.267127041804987e-05, - "loss": 0.6687, - "step": 40300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.402094841003418, - "loss_rtd": 0.3716152012348175, - "loss_sent": 0.32779261469841003, - "loss_sod": 0.07380812615156174, - "loss_total": 0.7732159495353699, - "step": 40399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.562647342681885, - "loss_rtd": 0.37056246399879456, - "loss_sent": 0.06721348315477371, - "loss_sod": 0.06720122694969177, - "loss_total": 0.5049771666526794, - "step": 40399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.3488694429397583, - "learning_rate": 8.2647242051011e-05, - "loss": 0.6542, - "step": 40400 - }, - { - "epoch": 0.018998, - "loss_gen": 3.678180694580078, - "loss_rtd": 0.3809404671192169, - "loss_sent": 8.662456821184605e-05, - "loss_sod": 0.22844916582107544, - "loss_total": 0.6094762682914734, - "step": 40499 - }, - { - "epoch": 0.018998, - "loss_gen": 3.860581159591675, - "loss_rtd": 0.3823385536670685, - "loss_sent": 0.0644868016242981, - "loss_sod": 0.05698993802070618, - "loss_total": 0.5038152933120728, - "step": 40499 - }, - { - "epoch": 0.019, - "grad_norm": 1.0200388431549072, - "learning_rate": 8.262320053366693e-05, - "loss": 0.6568, - "step": 40500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.615798473358154, - "loss_rtd": 0.3954371213912964, - "loss_sent": 0.3814672529697418, - "loss_sod": 0.09009584039449692, - "loss_total": 0.8670002222061157, - "step": 40599 - }, - { - "epoch": 0.019198, - "loss_gen": 3.917062997817993, - "loss_rtd": 0.3857502341270447, - "loss_sent": 0.016199423000216484, - "loss_sod": 0.18647600710391998, - "loss_total": 0.5884256362915039, - "step": 40599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.7290745973587036, - "learning_rate": 8.259914587570159e-05, - "loss": 0.6559, - "step": 40600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.480384349822998, - "loss_rtd": 0.39397501945495605, - "loss_sent": 0.3567149043083191, - "loss_sod": 0.012110976502299309, - "loss_total": 0.762800931930542, - "step": 40699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.576913833618164, - "loss_rtd": 0.38175448775291443, - "loss_sent": 0.13982127606868744, - "loss_sod": 0.07496733218431473, - "loss_total": 0.5965430736541748, - "step": 40699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.3119646310806274, - "learning_rate": 8.257507808680421e-05, - "loss": 0.664, - "step": 40700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.674173355102539, - "loss_rtd": 0.3807498514652252, - "loss_sent": 0.2061619758605957, - "loss_sod": 0.10963135212659836, - "loss_total": 0.6965432167053223, - "step": 40799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.5892133712768555, - "loss_rtd": 0.41577914357185364, - "loss_sent": 0.20196275413036346, - "loss_sod": 0.04171139746904373, - "loss_total": 0.659453272819519, - "step": 40799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.5848276615142822, - "learning_rate": 8.255099717666923e-05, - "loss": 0.6517, - "step": 40800 - }, - { - "epoch": 0.019798, - "loss_gen": 4.5105791091918945, - "loss_rtd": 0.3716451823711395, - "loss_sent": 0.051084186881780624, - "loss_sod": 0.11713321506977081, - "loss_total": 0.5398625731468201, - "step": 40899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.010336399078369, - "loss_rtd": 0.3702070415019989, - "loss_sent": 0.003787236986681819, - "loss_sod": 0.3948768377304077, - "loss_total": 0.7688711285591125, - "step": 40899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.2466219663619995, - "learning_rate": 8.252690315499651e-05, - "loss": 0.6687, - "step": 40900 - }, - { - "epoch": 0.019998, - "loss_gen": 4.213362216949463, - "loss_rtd": 0.38417360186576843, - "loss_sent": 0.13808989524841309, - "loss_sod": 0.06279731541872025, - "loss_total": 0.5850608348846436, - "step": 40999 - }, - { - "epoch": 0.019998, - "loss_gen": 3.9070096015930176, - "loss_rtd": 0.40882429480552673, - "loss_sent": 0.003071530256420374, - "loss_sod": 0.19416505098342896, - "loss_total": 0.6060608625411987, - "step": 40999 - }, - { - "epoch": 0.02, - "grad_norm": 1.2292420864105225, - "learning_rate": 8.25027960314911e-05, - "loss": 0.6623, - "step": 41000 - }, - { - "epoch": 0.02, - "eval_loss": 0.6412881016731262, - "eval_runtime": 151.2282, - "eval_samples_per_second": 102.117, - "eval_steps_per_second": 0.8, - "step": 41000 - }, - { - "epoch": 0.020198, - "loss_gen": 3.7225356101989746, - "loss_rtd": 0.38106489181518555, - "loss_sent": 0.00014135816309135407, - "loss_sod": 0.3407840132713318, - "loss_total": 0.7219902873039246, - "step": 41099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.466882228851318, - "loss_rtd": 0.3669414222240448, - "loss_sent": 0.11593437939882278, - "loss_sod": 0.1539105772972107, - "loss_total": 0.6367863416671753, - "step": 41099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.1810585260391235, - "learning_rate": 8.24786758158633e-05, - "loss": 0.669, - "step": 41100 - }, - { - "epoch": 0.020398, - "loss_gen": 3.9555904865264893, - "loss_rtd": 0.38175278902053833, - "loss_sent": 0.04201050475239754, - "loss_sod": 0.16164356470108032, - "loss_total": 0.5854068398475647, - "step": 41199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.237810134887695, - "loss_rtd": 0.3647710978984833, - "loss_sent": 0.2045086920261383, - "loss_sod": 0.27279776334762573, - "loss_total": 0.8420774936676025, - "step": 41199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.3842684030532837, - "learning_rate": 8.245454251782878e-05, - "loss": 0.6532, - "step": 41200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.629668712615967, - "loss_rtd": 0.3939113914966583, - "loss_sent": 0.08818431198596954, - "loss_sod": 0.09862308204174042, - "loss_total": 0.5807187557220459, - "step": 41299 - }, - { - "epoch": 0.020598, - "loss_gen": 4.260357856750488, - "loss_rtd": 0.3583470582962036, - "loss_sent": 0.1607215255498886, - "loss_sod": 0.021704211831092834, - "loss_total": 0.5407727956771851, - "step": 41299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.976425290107727, - "learning_rate": 8.243039614710844e-05, - "loss": 0.6492, - "step": 41300 - }, - { - "epoch": 0.020798, - "loss_gen": 4.663462162017822, - "loss_rtd": 0.3940715193748474, - "loss_sent": 0.1713400036096573, - "loss_sod": 0.0544615238904953, - "loss_total": 0.619873046875, - "step": 41399 - }, - { - "epoch": 0.020798, - "loss_gen": 4.41485071182251, - "loss_rtd": 0.3642936050891876, - "loss_sent": 0.14602665603160858, - "loss_sod": 0.015467479825019836, - "loss_total": 0.5257877707481384, - "step": 41399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.2612237930297852, - "learning_rate": 8.240623671342837e-05, - "loss": 0.6541, - "step": 41400 - }, - { - "epoch": 0.020998, - "loss_gen": 4.343321800231934, - "loss_rtd": 0.3765054941177368, - "loss_sent": 0.22087278962135315, - "loss_sod": 0.016782084479928017, - "loss_total": 0.6141603589057922, - "step": 41499 - }, - { - "epoch": 0.020998, - "loss_gen": 4.469653129577637, - "loss_rtd": 0.3886878490447998, - "loss_sent": 0.02628425508737564, - "loss_sod": 0.18656085431575775, - "loss_total": 0.6015329957008362, - "step": 41499 - }, - { - "epoch": 0.021, - "grad_norm": 1.2931349277496338, - "learning_rate": 8.238206422652006e-05, - "loss": 0.6537, - "step": 41500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.51466178894043, - "loss_rtd": 0.36613962054252625, - "loss_sent": 0.14363856613636017, - "loss_sod": 0.17767947912216187, - "loss_total": 0.6874576210975647, - "step": 41599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.485960006713867, - "loss_rtd": 0.3810034394264221, - "loss_sent": 0.1728469878435135, - "loss_sod": 0.08899247646331787, - "loss_total": 0.6428428888320923, - "step": 41599 - }, - { - "epoch": 0.0212, - "grad_norm": 2.514805793762207, - "learning_rate": 8.235787869612012e-05, - "loss": 0.6714, - "step": 41600 - }, - { - "epoch": 0.021398, - "loss_gen": 4.567913055419922, - "loss_rtd": 0.3841160237789154, - "loss_sent": 0.1377597600221634, - "loss_sod": 0.05175473913550377, - "loss_total": 0.5736305117607117, - "step": 41699 - }, - { - "epoch": 0.021398, - "loss_gen": 4.057435512542725, - "loss_rtd": 0.3718874156475067, - "loss_sent": 0.04041086509823799, - "loss_sod": 0.15216533839702606, - "loss_total": 0.5644636154174805, - "step": 41699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.4682799577713013, - "learning_rate": 8.233368013197053e-05, - "loss": 0.6523, - "step": 41700 - }, - { - "epoch": 0.021598, - "loss_gen": 3.9765806198120117, - "loss_rtd": 0.39956530928611755, - "loss_sent": 0.029685061424970627, - "loss_sod": 0.19594547152519226, - "loss_total": 0.6251958608627319, - "step": 41799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.426865577697754, - "loss_rtd": 0.37698954343795776, - "loss_sent": 0.18985331058502197, - "loss_sod": 0.0343150869011879, - "loss_total": 0.6011579036712646, - "step": 41799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.8145066499710083, - "learning_rate": 8.230946854381846e-05, - "loss": 0.6501, - "step": 41800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.632156848907471, - "loss_rtd": 0.3796764314174652, - "loss_sent": 0.07298461347818375, - "loss_sod": 0.12913213670253754, - "loss_total": 0.5817931890487671, - "step": 41899 - }, - { - "epoch": 0.021798, - "loss_gen": 4.973066806793213, - "loss_rtd": 0.40505293011665344, - "loss_sent": 0.15712273120880127, - "loss_sod": 0.07550527155399323, - "loss_total": 0.6376809477806091, - "step": 41899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.2040433883666992, - "learning_rate": 8.22852439414163e-05, - "loss": 0.6691, - "step": 41900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.504978179931641, - "loss_rtd": 0.38156020641326904, - "loss_sent": 0.26390254497528076, - "loss_sod": 0.09385176002979279, - "loss_total": 0.7393144965171814, - "step": 41999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.452895164489746, - "loss_rtd": 0.4032492935657501, - "loss_sent": 0.2142591029405594, - "loss_sod": 0.024533722549676895, - "loss_total": 0.6420421004295349, - "step": 41999 - }, - { - "epoch": 0.022, - "grad_norm": 1.5839908123016357, - "learning_rate": 8.226100633452176e-05, - "loss": 0.6579, - "step": 42000 - }, - { - "epoch": 0.022, - "eval_loss": 0.6314432621002197, - "eval_runtime": 150.9027, - "eval_samples_per_second": 102.337, - "eval_steps_per_second": 0.802, - "step": 42000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.108220100402832, - "loss_rtd": 0.3855191469192505, - "loss_sent": 0.03595998138189316, - "loss_sod": 0.04322711378335953, - "loss_total": 0.4647062420845032, - "step": 42099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.688653469085693, - "loss_rtd": 0.37979236245155334, - "loss_sent": 0.11569111794233322, - "loss_sod": 0.13051004707813263, - "loss_total": 0.625993549823761, - "step": 42099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.9881030917167664, - "learning_rate": 8.223675573289773e-05, - "loss": 0.6565, - "step": 42100 - }, - { - "epoch": 0.022398, - "loss_gen": 3.745572566986084, - "loss_rtd": 0.36591315269470215, - "loss_sent": 5.752767174271867e-05, - "loss_sod": 0.41352346539497375, - "loss_total": 0.7794941067695618, - "step": 42199 - }, - { - "epoch": 0.022398, - "loss_gen": 4.244234085083008, - "loss_rtd": 0.3592204451560974, - "loss_sent": 0.06797172129154205, - "loss_sod": 0.10003990679979324, - "loss_total": 0.5272320508956909, - "step": 42199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.06648850440979, - "learning_rate": 8.221249214631233e-05, - "loss": 0.6539, - "step": 42200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.342432975769043, - "loss_rtd": 0.4121549427509308, - "loss_sent": 0.35916224122047424, - "loss_sod": 0.1422906517982483, - "loss_total": 0.9136078357696533, - "step": 42299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.415210723876953, - "loss_rtd": 0.3874806761741638, - "loss_sent": 0.31633779406547546, - "loss_sod": 0.11523738503456116, - "loss_total": 0.8190559148788452, - "step": 42299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.351127028465271, - "learning_rate": 8.218821558453896e-05, - "loss": 0.6481, - "step": 42300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.327655792236328, - "loss_rtd": 0.36464032530784607, - "loss_sent": 0.022839125245809555, - "loss_sod": 0.2916712462902069, - "loss_total": 0.6791507005691528, - "step": 42399 - }, - { - "epoch": 0.022798, - "loss_gen": 4.3175883293151855, - "loss_rtd": 0.36989280581474304, - "loss_sent": 0.000921736063901335, - "loss_sod": 0.2789615988731384, - "loss_total": 0.6497761011123657, - "step": 42399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.007271647453308, - "learning_rate": 8.216392605735618e-05, - "loss": 0.6694, - "step": 42400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.200122356414795, - "loss_rtd": 0.3771202266216278, - "loss_sent": 0.30438563227653503, - "loss_sod": 0.11443760991096497, - "loss_total": 0.7959434986114502, - "step": 42499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.341174125671387, - "loss_rtd": 0.3801506757736206, - "loss_sent": 0.10273100435733795, - "loss_sod": 0.053897298872470856, - "loss_total": 0.53677898645401, - "step": 42499 - }, - { - "epoch": 0.023, - "grad_norm": 1.900464415550232, - "learning_rate": 8.213962357454785e-05, - "loss": 0.6531, - "step": 42500 - }, - { - "epoch": 0.023198, - "loss_gen": 4.481957912445068, - "loss_rtd": 0.35730868577957153, - "loss_sent": 0.42418715357780457, - "loss_sod": 0.03608470782637596, - "loss_total": 0.8175805807113647, - "step": 42599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.262242317199707, - "loss_rtd": 0.3994123935699463, - "loss_sent": 0.0772051140666008, - "loss_sod": 0.09249553084373474, - "loss_total": 0.56911301612854, - "step": 42599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.4650466442108154, - "learning_rate": 8.211530814590298e-05, - "loss": 0.6535, - "step": 42600 - }, - { - "epoch": 0.023398, - "loss_gen": 4.358861446380615, - "loss_rtd": 0.3849829137325287, - "loss_sent": 0.20576530694961548, - "loss_sod": 0.011316204443573952, - "loss_total": 0.6020644307136536, - "step": 42699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.429322719573975, - "loss_rtd": 0.3777335584163666, - "loss_sent": 0.1580859273672104, - "loss_sod": 0.01792304590344429, - "loss_total": 0.553742527961731, - "step": 42699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8418339490890503, - "learning_rate": 8.209097978121583e-05, - "loss": 0.6579, - "step": 42700 - }, - { - "epoch": 0.023598, - "loss_gen": 3.279528856277466, - "loss_rtd": 0.3709106743335724, - "loss_sent": 0.00022582203382626176, - "loss_sod": 0.20592719316482544, - "loss_total": 0.5770637392997742, - "step": 42799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.744197845458984, - "loss_rtd": 0.37123891711235046, - "loss_sent": 0.10743427276611328, - "loss_sod": 0.04356180876493454, - "loss_total": 0.5222350358963013, - "step": 42799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.0691636800765991, - "learning_rate": 8.206663849028587e-05, - "loss": 0.648, - "step": 42800 - }, - { - "epoch": 0.023798, - "loss_gen": 3.5588996410369873, - "loss_rtd": 0.37456318736076355, - "loss_sent": 5.979636625852436e-05, - "loss_sod": 0.2636725902557373, - "loss_total": 0.638295590877533, - "step": 42899 - }, - { - "epoch": 0.023798, - "loss_gen": 3.632612705230713, - "loss_rtd": 0.38829219341278076, - "loss_sent": 0.0013291510986164212, - "loss_sod": 0.27269303798675537, - "loss_total": 0.6623143553733826, - "step": 42899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.4013932943344116, - "learning_rate": 8.204228428291775e-05, - "loss": 0.6574, - "step": 42900 - }, - { - "epoch": 0.023998, - "loss_gen": 4.848650932312012, - "loss_rtd": 0.373313844203949, - "loss_sent": 0.17515422403812408, - "loss_sod": 0.04122958332300186, - "loss_total": 0.5896976590156555, - "step": 42999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.167661666870117, - "loss_rtd": 0.3895135521888733, - "loss_sent": 0.006315239239484072, - "loss_sod": 0.15132783353328705, - "loss_total": 0.5471566319465637, - "step": 42999 - }, - { - "epoch": 0.024, - "grad_norm": 0.897146999835968, - "learning_rate": 8.201791716892136e-05, - "loss": 0.6511, - "step": 43000 - }, - { - "epoch": 0.024, - "eval_loss": 0.6402202844619751, - "eval_runtime": 151.004, - "eval_samples_per_second": 102.269, - "eval_steps_per_second": 0.801, - "step": 43000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.613114833831787, - "loss_rtd": 0.36502689123153687, - "loss_sent": 0.1664440780878067, - "loss_sod": 0.020670989528298378, - "loss_total": 0.5521419644355774, - "step": 43099 - }, - { - "epoch": 0.024198, - "loss_gen": 4.534543991088867, - "loss_rtd": 0.38365063071250916, - "loss_sent": 0.31674572825431824, - "loss_sod": 0.04986383020877838, - "loss_total": 0.7502602338790894, - "step": 43099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.2234196662902832, - "learning_rate": 8.199353715811176e-05, - "loss": 0.6472, - "step": 43100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.381618499755859, - "loss_rtd": 0.38506895303726196, - "loss_sent": 0.2596307694911957, - "loss_sod": 0.010949251241981983, - "loss_total": 0.655648946762085, - "step": 43199 - }, - { - "epoch": 0.024398, - "loss_gen": 4.481594085693359, - "loss_rtd": 0.38360145688056946, - "loss_sent": 0.32707130908966064, - "loss_sod": 0.013929128646850586, - "loss_total": 0.7246018648147583, - "step": 43199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.4966225624084473, - "learning_rate": 8.196914426030921e-05, - "loss": 0.6386, - "step": 43200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.606259346008301, - "loss_rtd": 0.3694329857826233, - "loss_sent": 0.21232984960079193, - "loss_sod": 0.05285738408565521, - "loss_total": 0.6346202492713928, - "step": 43299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.581188201904297, - "loss_rtd": 0.3795831799507141, - "loss_sent": 0.203518807888031, - "loss_sod": 0.26025161147117615, - "loss_total": 0.8433535695075989, - "step": 43299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.1756949424743652, - "learning_rate": 8.194473848533919e-05, - "loss": 0.659, - "step": 43300 - }, - { - "epoch": 0.024798, - "loss_gen": 4.864691734313965, - "loss_rtd": 0.3677074909210205, - "loss_sent": 0.15385890007019043, - "loss_sod": 0.04495445638895035, - "loss_total": 0.5665208101272583, - "step": 43399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.599943161010742, - "loss_rtd": 0.3858634829521179, - "loss_sent": 0.11128430813550949, - "loss_sod": 0.1456134021282196, - "loss_total": 0.6427611708641052, - "step": 43399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.0346415042877197, - "learning_rate": 8.192031984303232e-05, - "loss": 0.6602, - "step": 43400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.85927677154541, - "loss_rtd": 0.377153217792511, - "loss_sent": 0.12804071605205536, - "loss_sod": 0.028983604162931442, - "loss_total": 0.5341775417327881, - "step": 43499 - }, - { - "epoch": 0.024998, - "loss_gen": 4.69577693939209, - "loss_rtd": 0.40115371346473694, - "loss_sent": 0.08152621239423752, - "loss_sod": 0.19556081295013428, - "loss_total": 0.6782407164573669, - "step": 43499 - }, - { - "epoch": 0.025, - "grad_norm": 1.173714518547058, - "learning_rate": 8.189588834322444e-05, - "loss": 0.6385, - "step": 43500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.078179836273193, - "loss_rtd": 0.40786659717559814, - "loss_sent": 6.961300823604688e-05, - "loss_sod": 0.2843131422996521, - "loss_total": 0.6922493577003479, - "step": 43599 - }, - { - "epoch": 0.025198, - "loss_gen": 3.5158584117889404, - "loss_rtd": 0.37952902913093567, - "loss_sent": 0.00015645030362065881, - "loss_sod": 0.2368762046098709, - "loss_total": 0.6165617108345032, - "step": 43599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.9594609141349792, - "learning_rate": 8.187144399575655e-05, - "loss": 0.6584, - "step": 43600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.753521919250488, - "loss_rtd": 0.3814672529697418, - "loss_sent": 0.387495756149292, - "loss_sod": 0.028873810544610023, - "loss_total": 0.7978367805480957, - "step": 43699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.257978439331055, - "loss_rtd": 0.4045831561088562, - "loss_sent": 0.20893022418022156, - "loss_sod": 0.03061862289905548, - "loss_total": 0.6441320180892944, - "step": 43699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.4797601699829102, - "learning_rate": 8.184698681047482e-05, - "loss": 0.6522, - "step": 43700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.600385665893555, - "loss_rtd": 0.384109228849411, - "loss_sent": 0.19618237018585205, - "loss_sod": 0.04927710443735123, - "loss_total": 0.6295686960220337, - "step": 43799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.504488945007324, - "loss_rtd": 0.3831881582736969, - "loss_sent": 0.40422946214675903, - "loss_sod": 0.07176312804222107, - "loss_total": 0.859180748462677, - "step": 43799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.76749849319458, - "learning_rate": 8.182251679723061e-05, - "loss": 0.6624, - "step": 43800 - }, - { - "epoch": 0.025798, - "loss_gen": 3.675482988357544, - "loss_rtd": 0.37020596861839294, - "loss_sent": 0.0027885735034942627, - "loss_sod": 0.20921216905117035, - "loss_total": 0.5822067260742188, - "step": 43899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.866572380065918, - "loss_rtd": 0.37793922424316406, - "loss_sent": 0.0779816284775734, - "loss_sod": 0.036181047558784485, - "loss_total": 0.49210187792778015, - "step": 43899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.9174070358276367, - "learning_rate": 8.179803396588045e-05, - "loss": 0.6474, - "step": 43900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.741182327270508, - "loss_rtd": 0.38066208362579346, - "loss_sent": 0.13676688075065613, - "loss_sod": 0.039525195956230164, - "loss_total": 0.5569541454315186, - "step": 43999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.582489490509033, - "loss_rtd": 0.3616386950016022, - "loss_sent": 0.15300214290618896, - "loss_sod": 0.029031649231910706, - "loss_total": 0.543672502040863, - "step": 43999 - }, - { - "epoch": 0.026, - "grad_norm": 0.9032507538795471, - "learning_rate": 8.177353832628602e-05, - "loss": 0.6548, - "step": 44000 - }, - { - "epoch": 0.026, - "eval_loss": 0.6352246999740601, - "eval_runtime": 150.8784, - "eval_samples_per_second": 102.354, - "eval_steps_per_second": 0.802, - "step": 44000 - }, - { - "epoch": 0.026198, - "loss_gen": 3.75815749168396, - "loss_rtd": 0.3719606399536133, - "loss_sent": 0.004499775357544422, - "loss_sod": 0.23574712872505188, - "loss_total": 0.6122075319290161, - "step": 44099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.299135684967041, - "loss_rtd": 0.3848499059677124, - "loss_sent": 0.11957715451717377, - "loss_sod": 0.06262027472257614, - "loss_total": 0.5670473575592041, - "step": 44099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.8012509942054749, - "learning_rate": 8.174902988831413e-05, - "loss": 0.6529, - "step": 44100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.834580898284912, - "loss_rtd": 0.37917640805244446, - "loss_sent": 0.1776326447725296, - "loss_sod": 0.06561632454395294, - "loss_total": 0.622425377368927, - "step": 44199 - }, - { - "epoch": 0.026398, - "loss_gen": 4.1323957443237305, - "loss_rtd": 0.38789719343185425, - "loss_sent": 0.22935126721858978, - "loss_sod": 0.009114162065088749, - "loss_total": 0.6263626217842102, - "step": 44199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.483168125152588, - "learning_rate": 8.17245086618368e-05, - "loss": 0.6529, - "step": 44200 - }, - { - "epoch": 0.026598, - "loss_gen": 4.796681880950928, - "loss_rtd": 0.38072118163108826, - "loss_sent": 0.11894837021827698, - "loss_sod": 0.10300540924072266, - "loss_total": 0.6026749610900879, - "step": 44299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.623732566833496, - "loss_rtd": 0.3622994124889374, - "loss_sent": 0.13840840756893158, - "loss_sod": 0.021766219288110733, - "loss_total": 0.5224740505218506, - "step": 44299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.7771408557891846, - "learning_rate": 8.16999746567312e-05, - "loss": 0.6443, - "step": 44300 - }, - { - "epoch": 0.026798, - "loss_gen": 3.522340774536133, - "loss_rtd": 0.36471617221832275, - "loss_sent": 0.0024779404047876596, - "loss_sod": 0.11392658203840256, - "loss_total": 0.4811207056045532, - "step": 44399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.686045169830322, - "loss_rtd": 0.3902490735054016, - "loss_sent": 0.16535663604736328, - "loss_sod": 0.022858766838908195, - "loss_total": 0.5784645080566406, - "step": 44399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.6933805346488953, - "learning_rate": 8.167542788287958e-05, - "loss": 0.6555, - "step": 44400 - }, - { - "epoch": 0.026998, - "loss_gen": 4.474067687988281, - "loss_rtd": 0.3878956139087677, - "loss_sent": 0.10972359776496887, - "loss_sod": 0.04626302421092987, - "loss_total": 0.5438822507858276, - "step": 44499 - }, - { - "epoch": 0.026998, - "loss_gen": 4.703509330749512, - "loss_rtd": 0.39577972888946533, - "loss_sent": 0.22541356086730957, - "loss_sod": 0.03512066975235939, - "loss_total": 0.656313955783844, - "step": 44499 - }, - { - "epoch": 0.027, - "grad_norm": 0.9493730068206787, - "learning_rate": 8.165086835016939e-05, - "loss": 0.6493, - "step": 44500 - }, - { - "epoch": 0.027198, - "loss_gen": 4.679075241088867, - "loss_rtd": 0.37580394744873047, - "loss_sent": 0.47782081365585327, - "loss_sod": 0.08848084509372711, - "loss_total": 0.9421055912971497, - "step": 44599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.700301170349121, - "loss_rtd": 0.38203302025794983, - "loss_sent": 0.18475648760795593, - "loss_sod": 0.0023707542568445206, - "loss_total": 0.5691602230072021, - "step": 44599 - }, - { - "epoch": 0.0272, - "grad_norm": 2.114959716796875, - "learning_rate": 8.162629606849323e-05, - "loss": 0.6635, - "step": 44600 - }, - { - "epoch": 0.027398, - "loss_gen": 3.892876386642456, - "loss_rtd": 0.3716607689857483, - "loss_sent": 0.015241903252899647, - "loss_sod": 0.16945964097976685, - "loss_total": 0.5563623309135437, - "step": 44699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.450325012207031, - "loss_rtd": 0.373519629240036, - "loss_sent": 0.3957299590110779, - "loss_sod": 0.04284271225333214, - "loss_total": 0.8120923042297363, - "step": 44699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.280011534690857, - "learning_rate": 8.160171104774879e-05, - "loss": 0.6662, - "step": 44700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.850058555603027, - "loss_rtd": 0.3819859027862549, - "loss_sent": 0.14247000217437744, - "loss_sod": 0.07389207929372787, - "loss_total": 0.5983480215072632, - "step": 44799 - }, - { - "epoch": 0.027598, - "loss_gen": 4.781891345977783, - "loss_rtd": 0.38648825883865356, - "loss_sent": 0.34172558784484863, - "loss_sod": 0.09410925209522247, - "loss_total": 0.8223230838775635, - "step": 44799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.891017735004425, - "learning_rate": 8.15771132978389e-05, - "loss": 0.6625, - "step": 44800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.8174285888671875, - "loss_rtd": 0.3941290080547333, - "loss_sent": 0.5728538632392883, - "loss_sod": 0.042764414101839066, - "loss_total": 1.0097472667694092, - "step": 44899 - }, - { - "epoch": 0.027798, - "loss_gen": 3.689896821975708, - "loss_rtd": 0.3721725344657898, - "loss_sent": 0.0008906761649996042, - "loss_sod": 0.18504759669303894, - "loss_total": 0.558110773563385, - "step": 44899 - }, - { - "epoch": 0.0278, - "grad_norm": 2.401081085205078, - "learning_rate": 8.155250282867157e-05, - "loss": 0.6667, - "step": 44900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.586578369140625, - "loss_rtd": 0.38676008582115173, - "loss_sent": 0.2055395245552063, - "loss_sod": 0.032622065395116806, - "loss_total": 0.6249216794967651, - "step": 44999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.673305511474609, - "loss_rtd": 0.3887365758419037, - "loss_sent": 0.28746530413627625, - "loss_sod": 0.025677867233753204, - "loss_total": 0.7018797397613525, - "step": 44999 - }, - { - "epoch": 0.028, - "grad_norm": 1.0690522193908691, - "learning_rate": 8.152787965015988e-05, - "loss": 0.6643, - "step": 45000 - }, - { - "epoch": 0.028, - "eval_loss": 0.6301302313804626, - "eval_runtime": 150.9721, - "eval_samples_per_second": 102.29, - "eval_steps_per_second": 0.801, - "step": 45000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.572504997253418, - "loss_rtd": 0.3817649483680725, - "loss_sent": 0.13866949081420898, - "loss_sod": 0.08782115578651428, - "loss_total": 0.6082556247711182, - "step": 45099 - }, - { - "epoch": 0.028198, - "loss_gen": 4.704024791717529, - "loss_rtd": 0.3800975978374481, - "loss_sent": 0.11726216971874237, - "loss_sod": 0.06253600120544434, - "loss_total": 0.5598957538604736, - "step": 45099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.9835987091064453, - "learning_rate": 8.150324377222202e-05, - "loss": 0.6621, - "step": 45100 - }, - { - "epoch": 0.028398, - "loss_gen": 4.337993144989014, - "loss_rtd": 0.37821826338768005, - "loss_sent": 0.17172084748744965, - "loss_sod": 0.012256279587745667, - "loss_total": 0.5621954202651978, - "step": 45199 - }, - { - "epoch": 0.028398, - "loss_gen": 4.574587821960449, - "loss_rtd": 0.3861466944217682, - "loss_sent": 0.16793416440486908, - "loss_sod": 0.029096323996782303, - "loss_total": 0.5831772089004517, - "step": 45199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.1708933115005493, - "learning_rate": 8.147859520478134e-05, - "loss": 0.6795, - "step": 45200 - }, - { - "epoch": 0.028598, - "loss_gen": 4.570967197418213, - "loss_rtd": 0.3870660364627838, - "loss_sent": 0.4017711281776428, - "loss_sod": 0.1306699514389038, - "loss_total": 0.9195070862770081, - "step": 45299 - }, - { - "epoch": 0.028598, - "loss_gen": 4.816206932067871, - "loss_rtd": 0.3569832444190979, - "loss_sent": 0.11378684639930725, - "loss_sod": 0.10787219554185867, - "loss_total": 0.5786422491073608, - "step": 45299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.1042001247406006, - "learning_rate": 8.145393395776629e-05, - "loss": 0.66, - "step": 45300 - }, - { - "epoch": 0.028798, - "loss_gen": 4.471880912780762, - "loss_rtd": 0.36207079887390137, - "loss_sent": 0.00018560764146968722, - "loss_sod": 0.32203108072280884, - "loss_total": 0.6842874884605408, - "step": 45399 - }, - { - "epoch": 0.028798, - "loss_gen": 3.6233901977539062, - "loss_rtd": 0.361145943403244, - "loss_sent": 0.006995246745646, - "loss_sod": 0.13985320925712585, - "loss_total": 0.5079944133758545, - "step": 45399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.1605628728866577, - "learning_rate": 8.14292600411104e-05, - "loss": 0.646, - "step": 45400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.472077369689941, - "loss_rtd": 0.3926493525505066, - "loss_sent": 0.24979284405708313, - "loss_sod": 0.018284296616911888, - "loss_total": 0.6607264876365662, - "step": 45499 - }, - { - "epoch": 0.028998, - "loss_gen": 4.657073974609375, - "loss_rtd": 0.37766319513320923, - "loss_sent": 0.13722217082977295, - "loss_sod": 0.05151546746492386, - "loss_total": 0.5664008259773254, - "step": 45499 - }, - { - "epoch": 0.029, - "grad_norm": 1.3693609237670898, - "learning_rate": 8.140457346475232e-05, - "loss": 0.6611, - "step": 45500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.655327796936035, - "loss_rtd": 0.37780165672302246, - "loss_sent": 0.17282724380493164, - "loss_sod": 0.015296641737222672, - "loss_total": 0.5659255385398865, - "step": 45599 - }, - { - "epoch": 0.029198, - "loss_gen": 4.242246627807617, - "loss_rtd": 0.39474278688430786, - "loss_sent": 0.5056430101394653, - "loss_sod": 0.007861332967877388, - "loss_total": 0.9082471132278442, - "step": 45599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.5870112180709839, - "learning_rate": 8.13798742386358e-05, - "loss": 0.6651, - "step": 45600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.367142677307129, - "loss_rtd": 0.38583049178123474, - "loss_sent": 0.14037981629371643, - "loss_sod": 0.041490476578474045, - "loss_total": 0.5677007436752319, - "step": 45699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.504812240600586, - "loss_rtd": 0.3695089519023895, - "loss_sent": 0.19455914199352264, - "loss_sod": 0.0048032961785793304, - "loss_total": 0.5688713788986206, - "step": 45699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.2032526731491089, - "learning_rate": 8.135516237270969e-05, - "loss": 0.6674, - "step": 45700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.352217197418213, - "loss_rtd": 0.3749980926513672, - "loss_sent": 0.2131139487028122, - "loss_sod": 0.18811655044555664, - "loss_total": 0.7762286067008972, - "step": 45799 - }, - { - "epoch": 0.029598, - "loss_gen": 4.451251029968262, - "loss_rtd": 0.3717048466205597, - "loss_sent": 0.18724147975444794, - "loss_sod": 0.07915185391902924, - "loss_total": 0.6380981802940369, - "step": 45799 - }, - { - "epoch": 0.0296, - "grad_norm": 2.019467353820801, - "learning_rate": 8.133043787692794e-05, - "loss": 0.6611, - "step": 45800 - }, - { - "epoch": 0.029798, - "loss_gen": 4.293920993804932, - "loss_rtd": 0.3777517080307007, - "loss_sent": 0.20406286418437958, - "loss_sod": 0.009192945435643196, - "loss_total": 0.5910075306892395, - "step": 45899 - }, - { - "epoch": 0.029798, - "loss_gen": 4.806717872619629, - "loss_rtd": 0.369988352060318, - "loss_sent": 0.07092718034982681, - "loss_sod": 0.06475816667079926, - "loss_total": 0.5056737065315247, - "step": 45899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.9810512661933899, - "learning_rate": 8.130570076124953e-05, - "loss": 0.6566, - "step": 45900 - }, - { - "epoch": 0.029998, - "loss_gen": 4.326590061187744, - "loss_rtd": 0.3545214831829071, - "loss_sent": 0.18595679104328156, - "loss_sod": 0.016033081337809563, - "loss_total": 0.5565113425254822, - "step": 45999 - }, - { - "epoch": 0.029998, - "loss_gen": 4.375558376312256, - "loss_rtd": 0.3825295865535736, - "loss_sent": 0.18366511166095734, - "loss_sod": 0.05017731338739395, - "loss_total": 0.6163719892501831, - "step": 45999 - }, - { - "epoch": 0.03, - "grad_norm": 0.9984399676322937, - "learning_rate": 8.128095103563862e-05, - "loss": 0.6358, - "step": 46000 - }, - { - "epoch": 0.03, - "eval_loss": 0.6287549138069153, - "eval_runtime": 150.7756, - "eval_samples_per_second": 102.424, - "eval_steps_per_second": 0.803, - "step": 46000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.699979305267334, - "loss_rtd": 0.3794102370738983, - "loss_sent": 0.10060257464647293, - "loss_sod": 0.011888833716511726, - "loss_total": 0.4919016361236572, - "step": 46099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.633126258850098, - "loss_rtd": 0.3745104670524597, - "loss_sent": 0.033742573112249374, - "loss_sod": 0.21771006286144257, - "loss_total": 0.6259630918502808, - "step": 46099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.7809391021728516, - "learning_rate": 8.125618871006438e-05, - "loss": 0.6447, - "step": 46100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.639756202697754, - "loss_rtd": 0.4024973511695862, - "loss_sent": 0.11776583641767502, - "loss_sod": 0.1854410469532013, - "loss_total": 0.7057042717933655, - "step": 46199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.52336311340332, - "loss_rtd": 0.3740673065185547, - "loss_sent": 0.2092219889163971, - "loss_sod": 0.11175394058227539, - "loss_total": 0.6950432062149048, - "step": 46199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.1094110012054443, - "learning_rate": 8.123141379450103e-05, - "loss": 0.6424, - "step": 46200 - }, - { - "epoch": 0.000598, - "loss_gen": 3.938033103942871, - "loss_rtd": 0.3785296678543091, - "loss_sent": 0.0002721761120483279, - "loss_sod": 0.5357171297073364, - "loss_total": 0.9145189523696899, - "step": 46299 - }, - { - "epoch": 0.000598, - "loss_gen": 4.006030559539795, - "loss_rtd": 0.37514546513557434, - "loss_sent": 0.10706964135169983, - "loss_sod": 0.14030012488365173, - "loss_total": 0.6225152015686035, - "step": 46299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.5383400917053223, - "learning_rate": 8.120662629892797e-05, - "loss": 0.6485, - "step": 46300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.286434173583984, - "loss_rtd": 0.374002069234848, - "loss_sent": 0.40777915716171265, - "loss_sod": 0.021501773968338966, - "loss_total": 0.8032829761505127, - "step": 46399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.167953968048096, - "loss_rtd": 0.38599392771720886, - "loss_sent": 0.16690266132354736, - "loss_sod": 0.06689120084047318, - "loss_total": 0.6197878122329712, - "step": 46399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.7189841270446777, - "learning_rate": 8.118182623332958e-05, - "loss": 0.6618, - "step": 46400 - }, - { - "epoch": 0.000998, - "loss_gen": 3.8184378147125244, - "loss_rtd": 0.38276156783103943, - "loss_sent": 0.03661860525608063, - "loss_sod": 0.15901032090187073, - "loss_total": 0.5783904790878296, - "step": 46499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.098611831665039, - "loss_rtd": 0.38754209876060486, - "loss_sent": 0.15005142986774445, - "loss_sod": 0.04999028146266937, - "loss_total": 0.5875837802886963, - "step": 46499 - }, - { - "epoch": 0.001, - "grad_norm": 1.010020136833191, - "learning_rate": 8.115701360769527e-05, - "loss": 0.634, - "step": 46500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.693360805511475, - "loss_rtd": 0.3871980905532837, - "loss_sent": 0.08103898912668228, - "loss_sod": 0.07677353918552399, - "loss_total": 0.5450106263160706, - "step": 46599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.113877296447754, - "loss_rtd": 0.3724420368671417, - "loss_sent": 0.3423876464366913, - "loss_sod": 0.06063517928123474, - "loss_total": 0.7754648923873901, - "step": 46599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.7423297166824341, - "learning_rate": 8.113218843201964e-05, - "loss": 0.6648, - "step": 46600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.010592937469482, - "loss_rtd": 0.36755380034446716, - "loss_sent": 0.1772671639919281, - "loss_sod": 0.07207248359918594, - "loss_total": 0.6168934106826782, - "step": 46699 - }, - { - "epoch": 0.001398, - "loss_gen": 4.1527791023254395, - "loss_rtd": 0.3596525490283966, - "loss_sent": 0.012780096381902695, - "loss_sod": 0.08750712871551514, - "loss_total": 0.45993977785110474, - "step": 46699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.2632046937942505, - "learning_rate": 8.110735071630223e-05, - "loss": 0.6344, - "step": 46700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.986955165863037, - "loss_rtd": 0.37553104758262634, - "loss_sent": 0.40427976846694946, - "loss_sod": 0.20503658056259155, - "loss_total": 0.984847366809845, - "step": 46799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.4725422859191895, - "loss_rtd": 0.3738175928592682, - "loss_sent": 0.16060931980609894, - "loss_sod": 0.005055803805589676, - "loss_total": 0.5394827127456665, - "step": 46799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.3313921689987183, - "learning_rate": 8.108250047054763e-05, - "loss": 0.6589, - "step": 46800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.980199813842773, - "loss_rtd": 0.3782990872859955, - "loss_sent": 0.14346514642238617, - "loss_sod": 0.06798546016216278, - "loss_total": 0.5897496938705444, - "step": 46899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.360611438751221, - "loss_rtd": 0.36665692925453186, - "loss_sent": 0.14531777799129486, - "loss_sod": 0.11170309782028198, - "loss_total": 0.6236777901649475, - "step": 46899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.2182822227478027, - "learning_rate": 8.10576377047656e-05, - "loss": 0.6338, - "step": 46900 - }, - { - "epoch": 0.001998, - "loss_gen": 4.34437370300293, - "loss_rtd": 0.3584587872028351, - "loss_sent": 0.15274320542812347, - "loss_sod": 0.09027344733476639, - "loss_total": 0.6014754176139832, - "step": 46999 - }, - { - "epoch": 0.001998, - "loss_gen": 3.8140687942504883, - "loss_rtd": 0.3624805510044098, - "loss_sent": 0.015718601644039154, - "loss_sod": 0.11918334662914276, - "loss_total": 0.4973824918270111, - "step": 46999 - }, - { - "epoch": 0.002, - "grad_norm": 0.7836332321166992, - "learning_rate": 8.103276242897081e-05, - "loss": 0.6446, - "step": 47000 - }, - { - "epoch": 0.002, - "eval_loss": 0.631763756275177, - "eval_runtime": 153.7159, - "eval_samples_per_second": 100.465, - "eval_steps_per_second": 0.787, - "step": 47000 - }, - { - "epoch": 0.002198, - "loss_gen": 4.455953121185303, - "loss_rtd": 0.3843330144882202, - "loss_sent": 0.17107711732387543, - "loss_sod": 0.04746630787849426, - "loss_total": 0.6028764247894287, - "step": 47099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.564654350280762, - "loss_rtd": 0.39598891139030457, - "loss_sent": 0.13187776505947113, - "loss_sod": 0.0783923864364624, - "loss_total": 0.6062590479850769, - "step": 47099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.115729808807373, - "learning_rate": 8.100787465318303e-05, - "loss": 0.656, - "step": 47100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.290502548217773, - "loss_rtd": 0.36489036679267883, - "loss_sent": 0.23262187838554382, - "loss_sod": 0.020622704178094864, - "loss_total": 0.6181349754333496, - "step": 47199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.5484747886657715, - "loss_rtd": 0.3793143033981323, - "loss_sent": 0.05421583727002144, - "loss_sod": 0.04414691403508186, - "loss_total": 0.4776770770549774, - "step": 47199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.9512256979942322, - "learning_rate": 8.098297438742703e-05, - "loss": 0.657, - "step": 47200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.363588809967041, - "loss_rtd": 0.3838651180267334, - "loss_sent": 0.08407268673181534, - "loss_sod": 0.036666594445705414, - "loss_total": 0.5046043992042542, - "step": 47299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.633561611175537, - "loss_rtd": 0.36992567777633667, - "loss_sent": 0.16519032418727875, - "loss_sod": 0.040895238518714905, - "loss_total": 0.5760112404823303, - "step": 47299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.8617016077041626, - "learning_rate": 8.095806164173265e-05, - "loss": 0.6538, - "step": 47300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.396857738494873, - "loss_rtd": 0.38025814294815063, - "loss_sent": 0.24536250531673431, - "loss_sod": 0.07464287430047989, - "loss_total": 0.700263500213623, - "step": 47399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.5637030601501465, - "loss_rtd": 0.3803219795227051, - "loss_sent": 0.12097058445215225, - "loss_sod": 0.10877542942762375, - "loss_total": 0.6100680232048035, - "step": 47399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.07035493850708, - "learning_rate": 8.093313642613476e-05, - "loss": 0.6548, - "step": 47400 - }, - { - "epoch": 0.002998, - "loss_gen": 4.511462211608887, - "loss_rtd": 0.3990772068500519, - "loss_sent": 0.2437988668680191, - "loss_sod": 0.029308389872312546, - "loss_total": 0.6721844673156738, - "step": 47499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.698146343231201, - "loss_rtd": 0.3864766061306, - "loss_sent": 0.21681182086467743, - "loss_sod": 0.15329721570014954, - "loss_total": 0.7565856575965881, - "step": 47499 - }, - { - "epoch": 0.003, - "grad_norm": 0.8426814079284668, - "learning_rate": 8.090819875067322e-05, - "loss": 0.6479, - "step": 47500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.979746341705322, - "loss_rtd": 0.3795184791088104, - "loss_sent": 0.2567595839500427, - "loss_sod": 0.09550534188747406, - "loss_total": 0.731783390045166, - "step": 47599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.728386878967285, - "loss_rtd": 0.3797706663608551, - "loss_sent": 0.10465622693300247, - "loss_sod": 0.16287440061569214, - "loss_total": 0.6473013162612915, - "step": 47599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.1490349769592285, - "learning_rate": 8.088324862539289e-05, - "loss": 0.6489, - "step": 47600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.331116676330566, - "loss_rtd": 0.36512210965156555, - "loss_sent": 0.03072904609143734, - "loss_sod": 0.030262088403105736, - "loss_total": 0.4261132478713989, - "step": 47699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.617870330810547, - "loss_rtd": 0.3699868321418762, - "loss_sent": 0.20203708112239838, - "loss_sod": 0.07554800808429718, - "loss_total": 0.6475719213485718, - "step": 47699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.831415593624115, - "learning_rate": 8.085828606034374e-05, - "loss": 0.6402, - "step": 47700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.519674777984619, - "loss_rtd": 0.3722618520259857, - "loss_sent": 0.2476036697626114, - "loss_sod": 0.07768885791301727, - "loss_total": 0.697554349899292, - "step": 47799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.585041046142578, - "loss_rtd": 0.3761773705482483, - "loss_sent": 0.09144699573516846, - "loss_sod": 0.0625835731625557, - "loss_total": 0.5302079319953918, - "step": 47799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.1098629236221313, - "learning_rate": 8.083331106558063e-05, - "loss": 0.6385, - "step": 47800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.606651782989502, - "loss_rtd": 0.3894237279891968, - "loss_sent": 0.14827781915664673, - "loss_sod": 0.08602949231863022, - "loss_total": 0.6237310171127319, - "step": 47899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.438484191894531, - "loss_rtd": 0.3605833649635315, - "loss_sent": 0.24012890458106995, - "loss_sod": 0.02536821737885475, - "loss_total": 0.6260805130004883, - "step": 47899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.7561204433441162, - "learning_rate": 8.080832365116353e-05, - "loss": 0.6584, - "step": 47900 - }, - { - "epoch": 0.003998, - "loss_gen": 4.2903242111206055, - "loss_rtd": 0.36389920115470886, - "loss_sent": 0.08843392133712769, - "loss_sod": 0.031522657722234726, - "loss_total": 0.4838557839393616, - "step": 47999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.898620128631592, - "loss_rtd": 0.3597998321056366, - "loss_sent": 0.16160465776920319, - "loss_sod": 0.10634209215641022, - "loss_total": 0.62774658203125, - "step": 47999 - }, - { - "epoch": 0.004, - "grad_norm": 0.942060112953186, - "learning_rate": 8.078332382715734e-05, - "loss": 0.6421, - "step": 48000 - }, - { - "epoch": 0.004, - "eval_loss": 0.6217095851898193, - "eval_runtime": 151.3111, - "eval_samples_per_second": 102.061, - "eval_steps_per_second": 0.8, - "step": 48000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.645877361297607, - "loss_rtd": 0.39046603441238403, - "loss_sent": 0.23971006274223328, - "loss_sod": 0.0026833200827240944, - "loss_total": 0.6328594088554382, - "step": 48099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.066583633422852, - "loss_rtd": 0.364145427942276, - "loss_sent": 0.3097667396068573, - "loss_sod": 0.05506974458694458, - "loss_total": 0.7289819121360779, - "step": 48099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.9875011444091797, - "learning_rate": 8.075831160363199e-05, - "loss": 0.647, - "step": 48100 - }, - { - "epoch": 0.004398, - "loss_gen": 4.433474063873291, - "loss_rtd": 0.3740593492984772, - "loss_sent": 0.2557953894138336, - "loss_sod": 0.040519848465919495, - "loss_total": 0.6703746318817139, - "step": 48199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.800586700439453, - "loss_rtd": 0.37522590160369873, - "loss_sent": 0.21444040536880493, - "loss_sod": 0.023426318541169167, - "loss_total": 0.6130926609039307, - "step": 48199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.2088860273361206, - "learning_rate": 8.073328699066241e-05, - "loss": 0.6474, - "step": 48200 - }, - { - "epoch": 0.004598, - "loss_gen": 4.785979747772217, - "loss_rtd": 0.38683080673217773, - "loss_sent": 0.27298280596733093, - "loss_sod": 0.11252139508724213, - "loss_total": 0.7723350524902344, - "step": 48299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.735990524291992, - "loss_rtd": 0.38740187883377075, - "loss_sent": 0.33720114827156067, - "loss_sod": 0.06438102573156357, - "loss_total": 0.7889840602874756, - "step": 48299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.7969639301300049, - "learning_rate": 8.07082499983285e-05, - "loss": 0.6591, - "step": 48300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.53634786605835, - "loss_rtd": 0.3495190739631653, - "loss_sent": 0.26451629400253296, - "loss_sod": 0.1641450822353363, - "loss_total": 0.7781804203987122, - "step": 48399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.199224948883057, - "loss_rtd": 0.38749638199806213, - "loss_sent": 0.24783599376678467, - "loss_sod": 0.08294946700334549, - "loss_total": 0.7182818651199341, - "step": 48399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.0061957836151123, - "learning_rate": 8.068320063671517e-05, - "loss": 0.6358, - "step": 48400 - }, - { - "epoch": 0.004998, - "loss_gen": 4.236148834228516, - "loss_rtd": 0.384494423866272, - "loss_sent": 0.24837873876094818, - "loss_sod": 0.011858688667416573, - "loss_total": 0.644731879234314, - "step": 48499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.68426513671875, - "loss_rtd": 0.3773494362831116, - "loss_sent": 0.05412619560956955, - "loss_sod": 0.009332123212516308, - "loss_total": 0.4408077597618103, - "step": 48499 - }, - { - "epoch": 0.005, - "grad_norm": 0.7440939545631409, - "learning_rate": 8.065813891591229e-05, - "loss": 0.6548, - "step": 48500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.069899559020996, - "loss_rtd": 0.3818332850933075, - "loss_sent": 0.2279142588376999, - "loss_sod": 0.05160973593592644, - "loss_total": 0.6613572835922241, - "step": 48599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.452792167663574, - "loss_rtd": 0.3705146610736847, - "loss_sent": 0.25298821926116943, - "loss_sod": 0.10445769131183624, - "loss_total": 0.7279605865478516, - "step": 48599 - }, - { - "epoch": 0.0052, - "grad_norm": 2.2869088649749756, - "learning_rate": 8.063306484601472e-05, - "loss": 0.6486, - "step": 48600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.08968448638916, - "loss_rtd": 0.38225290179252625, - "loss_sent": 0.07657121121883392, - "loss_sod": 0.21210332214832306, - "loss_total": 0.6709274649620056, - "step": 48699 - }, - { - "epoch": 0.005398, - "loss_gen": 3.7614517211914062, - "loss_rtd": 0.36560240387916565, - "loss_sent": 6.131923146313056e-05, - "loss_sod": 0.2490212619304657, - "loss_total": 0.6146849989891052, - "step": 48699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.0264867544174194, - "learning_rate": 8.06079784371223e-05, - "loss": 0.6448, - "step": 48700 - }, - { - "epoch": 0.005598, - "loss_gen": 4.340548038482666, - "loss_rtd": 0.39176028966903687, - "loss_sent": 0.17097064852714539, - "loss_sod": 0.01702743023633957, - "loss_total": 0.5797584056854248, - "step": 48799 - }, - { - "epoch": 0.005598, - "loss_gen": 4.355989933013916, - "loss_rtd": 0.384242445230484, - "loss_sent": 0.21529555320739746, - "loss_sod": 0.03836958855390549, - "loss_total": 0.6379076242446899, - "step": 48799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.3593602180480957, - "learning_rate": 8.05828796993398e-05, - "loss": 0.644, - "step": 48800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.421856880187988, - "loss_rtd": 0.37311193346977234, - "loss_sent": 0.46581143140792847, - "loss_sod": 0.016459614038467407, - "loss_total": 0.8553829789161682, - "step": 48899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.63135290145874, - "loss_rtd": 0.3631671667098999, - "loss_sent": 0.17805886268615723, - "loss_sod": 0.02936861291527748, - "loss_total": 0.5705946683883667, - "step": 48899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.2808173894882202, - "learning_rate": 8.055776864277704e-05, - "loss": 0.644, - "step": 48900 - }, - { - "epoch": 0.005998, - "loss_gen": 4.752480506896973, - "loss_rtd": 0.3505268692970276, - "loss_sent": 0.11548473685979843, - "loss_sod": 0.0524880513548851, - "loss_total": 0.5184996724128723, - "step": 48999 - }, - { - "epoch": 0.005998, - "loss_gen": 4.753820896148682, - "loss_rtd": 0.3656466007232666, - "loss_sent": 0.16045430302619934, - "loss_sod": 0.01331318262964487, - "loss_total": 0.5394140481948853, - "step": 48999 - }, - { - "epoch": 0.006, - "grad_norm": 1.23256516456604, - "learning_rate": 8.053264527754871e-05, - "loss": 0.6408, - "step": 49000 - }, - { - "epoch": 0.006, - "eval_loss": 0.625141441822052, - "eval_runtime": 152.3065, - "eval_samples_per_second": 101.394, - "eval_steps_per_second": 0.794, - "step": 49000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.363494873046875, - "loss_rtd": 0.36784136295318604, - "loss_sent": 0.3462200164794922, - "loss_sod": 0.017456237226724625, - "loss_total": 0.7315176129341125, - "step": 49099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.641960620880127, - "loss_rtd": 0.3982604444026947, - "loss_sent": 0.400336354970932, - "loss_sod": 0.012596643529832363, - "loss_total": 0.8111934661865234, - "step": 49099 - }, - { - "epoch": 0.0062, - "grad_norm": 2.1316230297088623, - "learning_rate": 8.050750961377454e-05, - "loss": 0.6428, - "step": 49100 - }, - { - "epoch": 0.006398, - "loss_gen": 3.52602219581604, - "loss_rtd": 0.3481273949146271, - "loss_sent": 0.03384261578321457, - "loss_sod": 0.1904320865869522, - "loss_total": 0.5724021196365356, - "step": 49199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.659766674041748, - "loss_rtd": 0.3823029696941376, - "loss_sent": 0.3093835413455963, - "loss_sod": 0.13419994711875916, - "loss_total": 0.8258864879608154, - "step": 49199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.3852447271347046, - "learning_rate": 8.048236166157912e-05, - "loss": 0.6398, - "step": 49200 - }, - { - "epoch": 0.006598, - "loss_gen": 3.8257486820220947, - "loss_rtd": 0.3698168098926544, - "loss_sent": 0.042054060846567154, - "loss_sod": 0.16035039722919464, - "loss_total": 0.5722212791442871, - "step": 49299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.6005539894104, - "loss_rtd": 0.3794531226158142, - "loss_sent": 0.2771409749984741, - "loss_sod": 0.052813343703746796, - "loss_total": 0.7094074487686157, - "step": 49299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.4007338285446167, - "learning_rate": 8.045720143109208e-05, - "loss": 0.6283, - "step": 49300 - }, - { - "epoch": 0.006798, - "loss_gen": 4.497256278991699, - "loss_rtd": 0.3654901087284088, - "loss_sent": 0.270648330450058, - "loss_sod": 0.2049960047006607, - "loss_total": 0.8411344289779663, - "step": 49399 - }, - { - "epoch": 0.006798, - "loss_gen": 4.614016532897949, - "loss_rtd": 0.3822830319404602, - "loss_sent": 0.19587868452072144, - "loss_sod": 0.032051846385002136, - "loss_total": 0.610213577747345, - "step": 49399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.296242594718933, - "learning_rate": 8.043202893244793e-05, - "loss": 0.6611, - "step": 49400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.415952682495117, - "loss_rtd": 0.3904218375682831, - "loss_sent": 0.1924910992383957, - "loss_sod": 0.0072877234779298306, - "loss_total": 0.590200662612915, - "step": 49499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.368340969085693, - "loss_rtd": 0.3858594000339508, - "loss_sent": 0.47357892990112305, - "loss_sod": 0.024555889889597893, - "loss_total": 0.8839942216873169, - "step": 49499 - }, - { - "epoch": 0.007, - "grad_norm": 1.4348816871643066, - "learning_rate": 8.040684417578617e-05, - "loss": 0.6278, - "step": 49500 - }, - { - "epoch": 0.007198, - "loss_gen": 3.6858749389648438, - "loss_rtd": 0.37742456793785095, - "loss_sent": 4.36127302236855e-05, - "loss_sod": 0.22768960893154144, - "loss_total": 0.6051577925682068, - "step": 49599 - }, - { - "epoch": 0.007198, - "loss_gen": 3.604168653488159, - "loss_rtd": 0.3806535005569458, - "loss_sent": 6.0545422456925735e-05, - "loss_sod": 0.1864035576581955, - "loss_total": 0.5671176314353943, - "step": 49599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.1366004943847656, - "learning_rate": 8.038164717125123e-05, - "loss": 0.6474, - "step": 49600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.598033905029297, - "loss_rtd": 0.3724918067455292, - "loss_sent": 0.15030938386917114, - "loss_sod": 0.06815709173679352, - "loss_total": 0.590958297252655, - "step": 49699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.401187896728516, - "loss_rtd": 0.3982204496860504, - "loss_sent": 0.2785221338272095, - "loss_sod": 0.030482318252325058, - "loss_total": 0.7072249054908752, - "step": 49699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.4378366470336914, - "learning_rate": 8.035643792899243e-05, - "loss": 0.6496, - "step": 49700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.564473628997803, - "loss_rtd": 0.3646455407142639, - "loss_sent": 0.40471068024635315, - "loss_sod": 0.06631335616111755, - "loss_total": 0.8356695771217346, - "step": 49799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.528884410858154, - "loss_rtd": 0.3850195109844208, - "loss_sent": 0.250173956155777, - "loss_sod": 0.07861298322677612, - "loss_total": 0.7138064503669739, - "step": 49799 - }, - { - "epoch": 0.0076, - "grad_norm": 2.3664045333862305, - "learning_rate": 8.033121645916407e-05, - "loss": 0.6501, - "step": 49800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.976553440093994, - "loss_rtd": 0.38571128249168396, - "loss_sent": 0.4492168426513672, - "loss_sod": 0.05952766165137291, - "loss_total": 0.8944557905197144, - "step": 49899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.6225996017456055, - "loss_rtd": 0.3705814778804779, - "loss_sent": 0.19304804503917694, - "loss_sod": 0.059937480837106705, - "loss_total": 0.6235669851303101, - "step": 49899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.488413691520691, - "learning_rate": 8.030598277192533e-05, - "loss": 0.6436, - "step": 49900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.49429178237915, - "loss_rtd": 0.38548919558525085, - "loss_sent": 0.34847933053970337, - "loss_sod": 0.031637679785490036, - "loss_total": 0.765606164932251, - "step": 49999 - }, - { - "epoch": 0.007998, - "loss_gen": 4.457889556884766, - "loss_rtd": 0.37796905636787415, - "loss_sent": 0.15737062692642212, - "loss_sod": 0.05866087228059769, - "loss_total": 0.5940005779266357, - "step": 49999 - }, - { - "epoch": 0.008, - "grad_norm": 1.426696538925171, - "learning_rate": 8.028073687744037e-05, - "loss": 0.6535, - "step": 50000 - }, - { - "epoch": 0.008, - "eval_loss": 0.6143267154693604, - "eval_runtime": 151.025, - "eval_samples_per_second": 102.255, - "eval_steps_per_second": 0.801, - "step": 50000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.058149814605713, - "loss_rtd": 0.3683951795101166, - "loss_sent": 0.30114972591400146, - "loss_sod": 0.03265371918678284, - "loss_total": 0.7021986246109009, - "step": 50099 - }, - { - "epoch": 0.008198, - "loss_gen": 3.9967827796936035, - "loss_rtd": 0.375331312417984, - "loss_sent": 0.007260024547576904, - "loss_sod": 0.33007389307022095, - "loss_total": 0.7126652598381042, - "step": 50099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.1502575874328613, - "learning_rate": 8.025547878587822e-05, - "loss": 0.6521, - "step": 50100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.803343772888184, - "loss_rtd": 0.37026453018188477, - "loss_sent": 1.1277397871017456, - "loss_sod": 0.06365317106246948, - "loss_total": 1.561657428741455, - "step": 50199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.387077808380127, - "loss_rtd": 0.3710021674633026, - "loss_sent": 0.3957918584346771, - "loss_sod": 0.05120411515235901, - "loss_total": 0.8179981708526611, - "step": 50199 - }, - { - "epoch": 0.0084, - "grad_norm": 4.342261791229248, - "learning_rate": 8.023020850741283e-05, - "loss": 0.6595, - "step": 50200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.672738552093506, - "loss_rtd": 0.3915119171142578, - "loss_sent": 0.21460790932178497, - "loss_sod": 0.18659816682338715, - "loss_total": 0.7927179932594299, - "step": 50299 - }, - { - "epoch": 0.008598, - "loss_gen": 3.972071886062622, - "loss_rtd": 0.3741694390773773, - "loss_sent": 0.0057837218046188354, - "loss_sod": 0.14437425136566162, - "loss_total": 0.5243274569511414, - "step": 50299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.3088226318359375, - "learning_rate": 8.020492605222307e-05, - "loss": 0.6495, - "step": 50300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.626949787139893, - "loss_rtd": 0.3816995620727539, - "loss_sent": 0.16593150794506073, - "loss_sod": 0.075348399579525, - "loss_total": 0.622979462146759, - "step": 50399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.743112087249756, - "loss_rtd": 0.36963286995887756, - "loss_sent": 0.3240692913532257, - "loss_sod": 0.026848290115594864, - "loss_total": 0.7205504179000854, - "step": 50399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.5165996551513672, - "learning_rate": 8.01796314304927e-05, - "loss": 0.6421, - "step": 50400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.586883544921875, - "loss_rtd": 0.3631702661514282, - "loss_sent": 0.3149803578853607, - "loss_sod": 0.027500925585627556, - "loss_total": 0.7056515216827393, - "step": 50499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.823122501373291, - "loss_rtd": 0.3803825378417969, - "loss_sent": 0.15825589001178741, - "loss_sod": 0.09417456388473511, - "loss_total": 0.6328129768371582, - "step": 50499 - }, - { - "epoch": 0.009, - "grad_norm": 2.1964752674102783, - "learning_rate": 8.015432465241039e-05, - "loss": 0.6484, - "step": 50500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.64653205871582, - "loss_rtd": 0.3533076047897339, - "loss_sent": 0.27814698219299316, - "loss_sod": 0.06675729155540466, - "loss_total": 0.6982119083404541, - "step": 50599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.527213096618652, - "loss_rtd": 0.35981041193008423, - "loss_sent": 0.10821152478456497, - "loss_sod": 0.14620551466941833, - "loss_total": 0.6142274141311646, - "step": 50599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.4024051427841187, - "learning_rate": 8.012900572816973e-05, - "loss": 0.6389, - "step": 50600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.538909435272217, - "loss_rtd": 0.3922789394855499, - "loss_sent": 0.1413494348526001, - "loss_sod": 0.025570761412382126, - "loss_total": 0.5591990947723389, - "step": 50699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.608863830566406, - "loss_rtd": 0.3743917644023895, - "loss_sent": 0.1731317639350891, - "loss_sod": 0.029530156403779984, - "loss_total": 0.5770536661148071, - "step": 50699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.3676615953445435, - "learning_rate": 8.010367466796917e-05, - "loss": 0.6389, - "step": 50700 - }, - { - "epoch": 0.009598, - "loss_gen": 3.994856834411621, - "loss_rtd": 0.3614933490753174, - "loss_sent": 0.005994033999741077, - "loss_sod": 0.2948499321937561, - "loss_total": 0.6623373031616211, - "step": 50799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.143596649169922, - "loss_rtd": 0.36337488889694214, - "loss_sent": 0.004016831982880831, - "loss_sod": 0.23833590745925903, - "loss_total": 0.6057276129722595, - "step": 50799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.218487024307251, - "learning_rate": 8.007833148201205e-05, - "loss": 0.656, - "step": 50800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.584593772888184, - "loss_rtd": 0.3748578727245331, - "loss_sent": 0.11570402979850769, - "loss_sod": 0.010673943907022476, - "loss_total": 0.501235842704773, - "step": 50899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.303126335144043, - "loss_rtd": 0.38027480244636536, - "loss_sent": 0.27084439992904663, - "loss_sod": 0.04908788576722145, - "loss_total": 0.7002071142196655, - "step": 50899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.1877902746200562, - "learning_rate": 8.00529761805066e-05, - "loss": 0.6305, - "step": 50900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.418270587921143, - "loss_rtd": 0.3844428062438965, - "loss_sent": 0.1653502732515335, - "loss_sod": 0.14827539026737213, - "loss_total": 0.6980684399604797, - "step": 50999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.313439846038818, - "loss_rtd": 0.3674570620059967, - "loss_sent": 0.015818167477846146, - "loss_sod": 0.13971030712127686, - "loss_total": 0.5229855179786682, - "step": 50999 - }, - { - "epoch": 0.01, - "grad_norm": 0.9348272085189819, - "learning_rate": 8.002760877366594e-05, - "loss": 0.6529, - "step": 51000 - }, - { - "epoch": 0.01, - "eval_loss": 0.6201492547988892, - "eval_runtime": 150.9105, - "eval_samples_per_second": 102.332, - "eval_steps_per_second": 0.802, - "step": 51000 - }, - { - "epoch": 0.010198, - "loss_gen": 4.538188934326172, - "loss_rtd": 0.38228562474250793, - "loss_sent": 0.21149754524230957, - "loss_sod": 0.057641901075839996, - "loss_total": 0.6514250636100769, - "step": 51099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.536489963531494, - "loss_rtd": 0.3845718801021576, - "loss_sent": 0.13618037104606628, - "loss_sod": 0.01992986351251602, - "loss_total": 0.5406820774078369, - "step": 51099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.323643684387207, - "learning_rate": 8.000222927170806e-05, - "loss": 0.6422, - "step": 51100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.168093204498291, - "loss_rtd": 0.3785075545310974, - "loss_sent": 0.10471253842115402, - "loss_sod": 0.003981872461736202, - "loss_total": 0.4872019588947296, - "step": 51199 - }, - { - "epoch": 0.010398, - "loss_gen": 4.457294464111328, - "loss_rtd": 0.3768188953399658, - "loss_sent": 0.1765183061361313, - "loss_sod": 0.11947241425514221, - "loss_total": 0.6728096008300781, - "step": 51199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.7407876253128052, - "learning_rate": 7.997683768485582e-05, - "loss": 0.6439, - "step": 51200 - }, - { - "epoch": 0.010598, - "loss_gen": 4.623007774353027, - "loss_rtd": 0.36841443181037903, - "loss_sent": 0.3081963062286377, - "loss_sod": 0.06185394525527954, - "loss_total": 0.7384647130966187, - "step": 51299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.464491844177246, - "loss_rtd": 0.3853316009044647, - "loss_sent": 0.20332349836826324, - "loss_sod": 0.07168055325746536, - "loss_total": 0.6603356599807739, - "step": 51299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9847090840339661, - "learning_rate": 7.995143402333693e-05, - "loss": 0.6452, - "step": 51300 - }, - { - "epoch": 0.010798, - "loss_gen": 4.834001064300537, - "loss_rtd": 0.37550100684165955, - "loss_sent": 0.1689801961183548, - "loss_sod": 0.017282772809267044, - "loss_total": 0.5617640018463135, - "step": 51399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.595366477966309, - "loss_rtd": 0.36696791648864746, - "loss_sent": 0.08832325786352158, - "loss_sod": 0.02172490954399109, - "loss_total": 0.4770160913467407, - "step": 51399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.0254968404769897, - "learning_rate": 7.992601829738401e-05, - "loss": 0.6249, - "step": 51400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.401004314422607, - "loss_rtd": 0.37882962822914124, - "loss_sent": 0.18529866635799408, - "loss_sod": 0.15116427838802338, - "loss_total": 0.7152925729751587, - "step": 51499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.549426078796387, - "loss_rtd": 0.3743341565132141, - "loss_sent": 0.3842788636684418, - "loss_sod": 0.043490782380104065, - "loss_total": 0.8021037578582764, - "step": 51499 - }, - { - "epoch": 0.011, - "grad_norm": 2.514681816101074, - "learning_rate": 7.99005905172345e-05, - "loss": 0.6283, - "step": 51500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.237088680267334, - "loss_rtd": 0.36371999979019165, - "loss_sent": 0.20528589189052582, - "loss_sod": 0.025060106068849564, - "loss_total": 0.5940660238265991, - "step": 51599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.785157203674316, - "loss_rtd": 0.35488858819007874, - "loss_sent": 0.22521014511585236, - "loss_sod": 0.08783333748579025, - "loss_total": 0.6679320931434631, - "step": 51599 - }, - { - "epoch": 0.0112, - "grad_norm": 2.1390838623046875, - "learning_rate": 7.98751506931307e-05, - "loss": 0.6395, - "step": 51600 - }, - { - "epoch": 0.011398, - "loss_gen": 3.7305500507354736, - "loss_rtd": 0.36545830965042114, - "loss_sent": 0.09047655761241913, - "loss_sod": 0.060882482677698135, - "loss_total": 0.5168173313140869, - "step": 51699 - }, - { - "epoch": 0.011398, - "loss_gen": 3.937504768371582, - "loss_rtd": 0.3714247941970825, - "loss_sent": 0.03422936052083969, - "loss_sod": 0.1963171362876892, - "loss_total": 0.6019712686538696, - "step": 51699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.8457944393157959, - "learning_rate": 7.984969883531977e-05, - "loss": 0.6354, - "step": 51700 - }, - { - "epoch": 0.011598, - "loss_gen": 4.4090070724487305, - "loss_rtd": 0.3685651123523712, - "loss_sent": 0.2501409649848938, - "loss_sod": 0.10854386538267136, - "loss_total": 0.7272499799728394, - "step": 51799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.1526689529418945, - "loss_rtd": 0.3585469722747803, - "loss_sent": 0.05364396050572395, - "loss_sod": 0.046622730791568756, - "loss_total": 0.4588136672973633, - "step": 51799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.0936580896377563, - "learning_rate": 7.982423495405373e-05, - "loss": 0.6519, - "step": 51800 - }, - { - "epoch": 0.011798, - "loss_gen": 4.608326435089111, - "loss_rtd": 0.36813756823539734, - "loss_sent": 0.13579003512859344, - "loss_sod": 0.029388481751084328, - "loss_total": 0.5333160758018494, - "step": 51899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.717540264129639, - "loss_rtd": 0.3761679530143738, - "loss_sent": 0.14211294054985046, - "loss_sod": 0.05682980641722679, - "loss_total": 0.575110673904419, - "step": 51899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.7849305868148804, - "learning_rate": 7.979875905958942e-05, - "loss": 0.6327, - "step": 51900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.289188385009766, - "loss_rtd": 0.3613738715648651, - "loss_sent": 0.03220400959253311, - "loss_sod": 0.12358319014310837, - "loss_total": 0.5171610713005066, - "step": 51999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.805292129516602, - "loss_rtd": 0.37166833877563477, - "loss_sent": 0.46547287702560425, - "loss_sod": 0.08356957137584686, - "loss_total": 0.9207108020782471, - "step": 51999 - }, - { - "epoch": 0.012, - "grad_norm": 1.9747730493545532, - "learning_rate": 7.977327116218851e-05, - "loss": 0.638, - "step": 52000 - }, - { - "epoch": 0.012, - "eval_loss": 0.617628276348114, - "eval_runtime": 151.2384, - "eval_samples_per_second": 102.11, - "eval_steps_per_second": 0.8, - "step": 52000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.451638221740723, - "loss_rtd": 0.3651028871536255, - "loss_sent": 0.4473474323749542, - "loss_sod": 0.07271867990493774, - "loss_total": 0.8851690292358398, - "step": 52099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.706387996673584, - "loss_rtd": 0.3877633512020111, - "loss_sent": 0.375333696603775, - "loss_sod": 0.03720249608159065, - "loss_total": 0.8002995252609253, - "step": 52099 - }, - { - "epoch": 0.0122, - "grad_norm": 2.543621301651001, - "learning_rate": 7.974777127211755e-05, - "loss": 0.6345, - "step": 52100 - }, - { - "epoch": 0.012398, - "loss_gen": 4.720627784729004, - "loss_rtd": 0.37399521470069885, - "loss_sent": 0.37357261776924133, - "loss_sod": 0.06452462822198868, - "loss_total": 0.8120924234390259, - "step": 52199 - }, - { - "epoch": 0.012398, - "loss_gen": 4.308831691741943, - "loss_rtd": 0.37242117524147034, - "loss_sent": 0.09280207008123398, - "loss_sod": 0.16130012273788452, - "loss_total": 0.6265233755111694, - "step": 52199 - }, - { - "epoch": 0.0124, - "grad_norm": 2.0667686462402344, - "learning_rate": 7.972225939964786e-05, - "loss": 0.6393, - "step": 52200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.100876808166504, - "loss_rtd": 0.39659249782562256, - "loss_sent": 0.15490469336509705, - "loss_sod": 0.011322874575853348, - "loss_total": 0.5628200769424438, - "step": 52299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.57742166519165, - "loss_rtd": 0.38593554496765137, - "loss_sent": 0.3413234353065491, - "loss_sod": 0.039375368505716324, - "loss_total": 0.7666343450546265, - "step": 52299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.801899790763855, - "learning_rate": 7.969673555505566e-05, - "loss": 0.6517, - "step": 52300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.279021263122559, - "loss_rtd": 0.3841288685798645, - "loss_sent": 0.08024395257234573, - "loss_sod": 0.001899349270388484, - "loss_total": 0.46627217531204224, - "step": 52399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.565447807312012, - "loss_rtd": 0.3720090687274933, - "loss_sent": 0.06411219388246536, - "loss_sod": 0.05488793924450874, - "loss_total": 0.4910092055797577, - "step": 52399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.7367493510246277, - "learning_rate": 7.967119974862192e-05, - "loss": 0.6567, - "step": 52400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.1065802574157715, - "loss_rtd": 0.3532801568508148, - "loss_sent": 0.23175480961799622, - "loss_sod": 0.10666516423225403, - "loss_total": 0.6917001008987427, - "step": 52499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.823935508728027, - "loss_rtd": 0.37401440739631653, - "loss_sent": 0.1524634212255478, - "loss_sod": 0.04514380916953087, - "loss_total": 0.5716216564178467, - "step": 52499 - }, - { - "epoch": 0.013, - "grad_norm": 1.4354084730148315, - "learning_rate": 7.964565199063246e-05, - "loss": 0.6451, - "step": 52500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.656192302703857, - "loss_rtd": 0.37465882301330566, - "loss_sent": 0.4301958978176117, - "loss_sod": 0.02238663285970688, - "loss_total": 0.8272413611412048, - "step": 52599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.415437698364258, - "loss_rtd": 0.3755865693092346, - "loss_sent": 0.17707963287830353, - "loss_sod": 0.10570264607667923, - "loss_total": 0.6583688259124756, - "step": 52599 - }, - { - "epoch": 0.0132, - "grad_norm": 2.0551586151123047, - "learning_rate": 7.962009229137794e-05, - "loss": 0.6374, - "step": 52600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.528020858764648, - "loss_rtd": 0.36643391847610474, - "loss_sent": 0.2107040286064148, - "loss_sod": 0.08042527735233307, - "loss_total": 0.6575632095336914, - "step": 52699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.34305477142334, - "loss_rtd": 0.3671649396419525, - "loss_sent": 0.11882704496383667, - "loss_sod": 0.03933439403772354, - "loss_total": 0.5253263711929321, - "step": 52699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.9808692336082458, - "learning_rate": 7.959452066115378e-05, - "loss": 0.6453, - "step": 52700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.5166168212890625, - "loss_rtd": 0.3739977777004242, - "loss_sent": 0.05713487043976784, - "loss_sod": 0.024063939228653908, - "loss_total": 0.4551965892314911, - "step": 52799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.536118030548096, - "loss_rtd": 0.3654595911502838, - "loss_sent": 0.12023632973432541, - "loss_sod": 0.027032244950532913, - "loss_total": 0.5127281546592712, - "step": 52799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.7415221333503723, - "learning_rate": 7.956893711026023e-05, - "loss": 0.6476, - "step": 52800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.027712821960449, - "loss_rtd": 0.3675023317337036, - "loss_sent": 0.14814315736293793, - "loss_sod": 0.0724860429763794, - "loss_total": 0.5881315469741821, - "step": 52899 - }, - { - "epoch": 0.013798, - "loss_gen": 4.500967979431152, - "loss_rtd": 0.3762381672859192, - "loss_sent": 0.3494221866130829, - "loss_sod": 0.03951923921704292, - "loss_total": 0.7651796340942383, - "step": 52899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.3134151697158813, - "learning_rate": 7.954334164900235e-05, - "loss": 0.647, - "step": 52900 - }, - { - "epoch": 0.013998, - "loss_gen": 4.35737943649292, - "loss_rtd": 0.36826416850090027, - "loss_sent": 0.05364976450800896, - "loss_sod": 0.006631971336901188, - "loss_total": 0.42854589223861694, - "step": 52999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.280889987945557, - "loss_rtd": 0.37097907066345215, - "loss_sent": 0.06889402866363525, - "loss_sod": 0.1725533902645111, - "loss_total": 0.6124265193939209, - "step": 52999 - }, - { - "epoch": 0.014, - "grad_norm": 0.7555345892906189, - "learning_rate": 7.951773428769001e-05, - "loss": 0.6255, - "step": 53000 - }, - { - "epoch": 0.014, - "eval_loss": 0.62068110704422, - "eval_runtime": 150.9954, - "eval_samples_per_second": 102.275, - "eval_steps_per_second": 0.801, - "step": 53000 - }, - { - "epoch": 0.014198, - "loss_gen": 4.6845703125, - "loss_rtd": 0.3711201548576355, - "loss_sent": 0.24300257861614227, - "loss_sod": 0.1730903834104538, - "loss_total": 0.787213146686554, - "step": 53099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.5488152503967285, - "loss_rtd": 0.36780834197998047, - "loss_sent": 0.07398200035095215, - "loss_sod": 0.06251496821641922, - "loss_total": 0.5043053030967712, - "step": 53099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.8355894088745117, - "learning_rate": 7.94921150366378e-05, - "loss": 0.647, - "step": 53100 - }, - { - "epoch": 0.014398, - "loss_gen": 3.4567711353302, - "loss_rtd": 0.3385269343852997, - "loss_sent": 0.0021842813584953547, - "loss_sod": 0.16379128396511078, - "loss_total": 0.5045024752616882, - "step": 53199 - }, - { - "epoch": 0.014398, - "loss_gen": 4.636423587799072, - "loss_rtd": 0.3755619525909424, - "loss_sent": 0.08017344772815704, - "loss_sod": 0.09557287395000458, - "loss_total": 0.551308274269104, - "step": 53199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.9463667273521423, - "learning_rate": 7.94664839061652e-05, - "loss": 0.6457, - "step": 53200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.0381269454956055, - "loss_rtd": 0.360150545835495, - "loss_sent": 0.050466492772102356, - "loss_sod": 0.0955638661980629, - "loss_total": 0.5061808824539185, - "step": 53299 - }, - { - "epoch": 0.014598, - "loss_gen": 4.728142261505127, - "loss_rtd": 0.3685055673122406, - "loss_sent": 0.19894526898860931, - "loss_sod": 0.02157244086265564, - "loss_total": 0.5890232920646667, - "step": 53299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.9717921018600464, - "learning_rate": 7.944084090659637e-05, - "loss": 0.6279, - "step": 53300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.582457542419434, - "loss_rtd": 0.38222405314445496, - "loss_sent": 0.5896296501159668, - "loss_sod": 0.04997697472572327, - "loss_total": 1.021830677986145, - "step": 53399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.844758033752441, - "loss_rtd": 0.3863065540790558, - "loss_sent": 0.3215869665145874, - "loss_sod": 0.02089056558907032, - "loss_total": 0.7287840843200684, - "step": 53399 - }, - { - "epoch": 0.0148, - "grad_norm": 2.3949897289276123, - "learning_rate": 7.941518604826039e-05, - "loss": 0.6434, - "step": 53400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.715126991271973, - "loss_rtd": 0.3721052408218384, - "loss_sent": 0.1290234476327896, - "loss_sod": 0.05557714402675629, - "loss_total": 0.5567058324813843, - "step": 53499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.461250305175781, - "loss_rtd": 0.3816608488559723, - "loss_sent": 0.22881415486335754, - "loss_sod": 0.04375752806663513, - "loss_total": 0.6542325019836426, - "step": 53499 - }, - { - "epoch": 0.015, - "grad_norm": 0.9130325317382812, - "learning_rate": 7.938951934149096e-05, - "loss": 0.6398, - "step": 53500 - }, - { - "epoch": 0.015198, - "loss_gen": 4.400059223175049, - "loss_rtd": 0.3567712903022766, - "loss_sent": 0.19402320683002472, - "loss_sod": 0.011536471545696259, - "loss_total": 0.562330961227417, - "step": 53599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.636370658874512, - "loss_rtd": 0.36850762367248535, - "loss_sent": 0.1500333547592163, - "loss_sod": 0.04156222939491272, - "loss_total": 0.560103178024292, - "step": 53599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.8094152212142944, - "learning_rate": 7.936384079662666e-05, - "loss": 0.6531, - "step": 53600 - }, - { - "epoch": 0.015398, - "loss_gen": 4.712112903594971, - "loss_rtd": 0.3494918644428253, - "loss_sent": 0.40711623430252075, - "loss_sod": 0.022313233464956284, - "loss_total": 0.778921365737915, - "step": 53699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.587061405181885, - "loss_rtd": 0.3805294930934906, - "loss_sent": 0.1808282434940338, - "loss_sod": 0.06312863528728485, - "loss_total": 0.6244863271713257, - "step": 53699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.9854497313499451, - "learning_rate": 7.93381504240108e-05, - "loss": 0.633, - "step": 53700 - }, - { - "epoch": 0.015598, - "loss_gen": 4.5896477699279785, - "loss_rtd": 0.375474750995636, - "loss_sent": 0.1698981076478958, - "loss_sod": 0.11692309379577637, - "loss_total": 0.662295937538147, - "step": 53799 - }, - { - "epoch": 0.015598, - "loss_gen": 3.8224892616271973, - "loss_rtd": 0.37994661927223206, - "loss_sent": 0.0004034222802147269, - "loss_sod": 0.294214129447937, - "loss_total": 0.6745641231536865, - "step": 53799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.1310073137283325, - "learning_rate": 7.931244823399147e-05, - "loss": 0.6466, - "step": 53800 - }, - { - "epoch": 0.015798, - "loss_gen": 3.5543196201324463, - "loss_rtd": 0.35554906725883484, - "loss_sent": 0.020370911806821823, - "loss_sod": 0.13921767473220825, - "loss_total": 0.5151376724243164, - "step": 53899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.5510029792785645, - "loss_rtd": 0.38497135043144226, - "loss_sent": 0.2585177719593048, - "loss_sod": 0.026156924664974213, - "loss_total": 0.6696460247039795, - "step": 53899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.1140100955963135, - "learning_rate": 7.928673423692148e-05, - "loss": 0.6351, - "step": 53900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.142115592956543, - "loss_rtd": 0.3558996617794037, - "loss_sent": 0.1105511337518692, - "loss_sod": 0.06997261941432953, - "loss_total": 0.53642338514328, - "step": 53999 - }, - { - "epoch": 0.015998, - "loss_gen": 4.631337642669678, - "loss_rtd": 0.38000327348709106, - "loss_sent": 0.2932110130786896, - "loss_sod": 0.028590209782123566, - "loss_total": 0.701804518699646, - "step": 53999 - }, - { - "epoch": 0.016, - "grad_norm": 0.8721669316291809, - "learning_rate": 7.926100844315844e-05, - "loss": 0.6569, - "step": 54000 - }, - { - "epoch": 0.016, - "eval_loss": 0.6154483556747437, - "eval_runtime": 151.2289, - "eval_samples_per_second": 102.117, - "eval_steps_per_second": 0.8, - "step": 54000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.182412147521973, - "loss_rtd": 0.3678227365016937, - "loss_sent": 0.10248752683401108, - "loss_sod": 0.04535181447863579, - "loss_total": 0.5156620740890503, - "step": 54099 - }, - { - "epoch": 0.016198, - "loss_gen": 3.6269350051879883, - "loss_rtd": 0.36269044876098633, - "loss_sent": 0.0004897566395811737, - "loss_sod": 0.22491425275802612, - "loss_total": 0.5880944728851318, - "step": 54099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.8176355361938477, - "learning_rate": 7.923527086306472e-05, - "loss": 0.6424, - "step": 54100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.458475112915039, - "loss_rtd": 0.3739497661590576, - "loss_sent": 0.2738564610481262, - "loss_sod": 0.015809426084160805, - "loss_total": 0.6636156439781189, - "step": 54199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.506946086883545, - "loss_rtd": 0.36992794275283813, - "loss_sent": 0.21589812636375427, - "loss_sod": 0.029434029012918472, - "loss_total": 0.615260124206543, - "step": 54199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.8911906480789185, - "learning_rate": 7.920952150700738e-05, - "loss": 0.6457, - "step": 54200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.334949493408203, - "loss_rtd": 0.3890224099159241, - "loss_sent": 0.11779635399580002, - "loss_sod": 0.06975976377725601, - "loss_total": 0.5765784978866577, - "step": 54299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.676621437072754, - "loss_rtd": 0.3606879711151123, - "loss_sent": 0.21355892717838287, - "loss_sod": 0.03849518671631813, - "loss_total": 0.6127420663833618, - "step": 54299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.5208903551101685, - "learning_rate": 7.918376038535827e-05, - "loss": 0.6406, - "step": 54300 - }, - { - "epoch": 0.016798, - "loss_gen": 4.254660606384277, - "loss_rtd": 0.3675227463245392, - "loss_sent": 0.1284371018409729, - "loss_sod": 0.011969479732215405, - "loss_total": 0.5079293251037598, - "step": 54399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.763401508331299, - "loss_rtd": 0.37080642580986023, - "loss_sent": 0.2208104133605957, - "loss_sod": 0.04189425706863403, - "loss_total": 0.6335110664367676, - "step": 54399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.5267845392227173, - "learning_rate": 7.915798750849396e-05, - "loss": 0.6357, - "step": 54400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.387042045593262, - "loss_rtd": 0.3640294373035431, - "loss_sent": 0.46577194333076477, - "loss_sod": 0.0832526832818985, - "loss_total": 0.9130541086196899, - "step": 54499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.913600921630859, - "loss_rtd": 0.3470486104488373, - "loss_sent": 0.1457686871290207, - "loss_sod": 0.02201038785278797, - "loss_total": 0.5148276686668396, - "step": 54499 - }, - { - "epoch": 0.017, - "grad_norm": 1.0791354179382324, - "learning_rate": 7.913220288679577e-05, - "loss": 0.6379, - "step": 54500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.393093109130859, - "loss_rtd": 0.3685963749885559, - "loss_sent": 0.07685188949108124, - "loss_sod": 0.008272483013570309, - "loss_total": 0.45372074842453003, - "step": 54599 - }, - { - "epoch": 0.017198, - "loss_gen": 4.638741970062256, - "loss_rtd": 0.37237581610679626, - "loss_sent": 0.08207528293132782, - "loss_sod": 0.018077945336699486, - "loss_total": 0.4725290536880493, - "step": 54599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.7968005537986755, - "learning_rate": 7.910640653064974e-05, - "loss": 0.6465, - "step": 54600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.1174421310424805, - "loss_rtd": 0.3648335039615631, - "loss_sent": 9.457358828512952e-05, - "loss_sod": 0.29401132464408875, - "loss_total": 0.6589394211769104, - "step": 54699 - }, - { - "epoch": 0.017398, - "loss_gen": 3.519338607788086, - "loss_rtd": 0.36666354537010193, - "loss_sent": 4.878496110904962e-05, - "loss_sod": 0.35522928833961487, - "loss_total": 0.7219415903091431, - "step": 54699 - }, - { - "epoch": 0.0174, - "grad_norm": 2.1006596088409424, - "learning_rate": 7.908059845044665e-05, - "loss": 0.63, - "step": 54700 - }, - { - "epoch": 0.017598, - "loss_gen": 3.734240770339966, - "loss_rtd": 0.3810313045978546, - "loss_sent": 0.00022152572637423873, - "loss_sod": 0.2045402228832245, - "loss_total": 0.5857930779457092, - "step": 54799 - }, - { - "epoch": 0.017598, - "loss_gen": 4.664052963256836, - "loss_rtd": 0.3665785789489746, - "loss_sent": 0.38680845499038696, - "loss_sod": 0.046340540051460266, - "loss_total": 0.7997275590896606, - "step": 54799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.4892998933792114, - "learning_rate": 7.905477865658197e-05, - "loss": 0.6366, - "step": 54800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.481685638427734, - "loss_rtd": 0.3766678273677826, - "loss_sent": 0.03785740211606026, - "loss_sod": 0.19254451990127563, - "loss_total": 0.607069730758667, - "step": 54899 - }, - { - "epoch": 0.017798, - "loss_gen": 3.6889336109161377, - "loss_rtd": 0.3576793372631073, - "loss_sent": 0.03508095443248749, - "loss_sod": 0.24248912930488586, - "loss_total": 0.6352494359016418, - "step": 54899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.9492436647415161, - "learning_rate": 7.902894715945593e-05, - "loss": 0.6338, - "step": 54900 - }, - { - "epoch": 0.017998, - "loss_gen": 4.717471599578857, - "loss_rtd": 0.37920039892196655, - "loss_sent": 0.12316140532493591, - "loss_sod": 0.058044034987688065, - "loss_total": 0.5604058504104614, - "step": 54999 - }, - { - "epoch": 0.017998, - "loss_gen": 4.401419162750244, - "loss_rtd": 0.37455177307128906, - "loss_sent": 0.30304399132728577, - "loss_sod": 0.052812084555625916, - "loss_total": 0.7304078340530396, - "step": 54999 - }, - { - "epoch": 0.018, - "grad_norm": 1.7150174379348755, - "learning_rate": 7.900310396947344e-05, - "loss": 0.6365, - "step": 55000 - }, - { - "epoch": 0.018, - "eval_loss": 0.6184331774711609, - "eval_runtime": 151.0815, - "eval_samples_per_second": 102.216, - "eval_steps_per_second": 0.801, - "step": 55000 - }, - { - "epoch": 0.018198, - "loss_gen": 3.9587795734405518, - "loss_rtd": 0.3572849631309509, - "loss_sent": 0.000930636830162257, - "loss_sod": 0.18941888213157654, - "loss_total": 0.5476344227790833, - "step": 55099 - }, - { - "epoch": 0.018198, - "loss_gen": 4.033356189727783, - "loss_rtd": 0.3687863349914551, - "loss_sent": 0.007204096298664808, - "loss_sod": 0.22562572360038757, - "loss_total": 0.6016162037849426, - "step": 55099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.3715091943740845, - "learning_rate": 7.897724909704417e-05, - "loss": 0.6453, - "step": 55100 - }, - { - "epoch": 0.018398, - "loss_gen": 4.626516342163086, - "loss_rtd": 0.38258397579193115, - "loss_sent": 0.1983485072851181, - "loss_sod": 0.17736437916755676, - "loss_total": 0.7582968473434448, - "step": 55199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.2777252197265625, - "loss_rtd": 0.3886966109275818, - "loss_sent": 0.24301214516162872, - "loss_sod": 0.025996133685112, - "loss_total": 0.6577048897743225, - "step": 55199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.7724279165267944, - "learning_rate": 7.895138255258238e-05, - "loss": 0.6507, - "step": 55200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.166768550872803, - "loss_rtd": 0.3680054843425751, - "loss_sent": 0.04852308705449104, - "loss_sod": 0.14644241333007812, - "loss_total": 0.5629709959030151, - "step": 55299 - }, - { - "epoch": 0.018598, - "loss_gen": 4.600881576538086, - "loss_rtd": 0.3678843080997467, - "loss_sent": 0.3402821719646454, - "loss_sod": 0.06393858790397644, - "loss_total": 0.7721050977706909, - "step": 55299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.0595279932022095, - "learning_rate": 7.892550434650718e-05, - "loss": 0.6396, - "step": 55300 - }, - { - "epoch": 0.018798, - "loss_gen": 3.6643149852752686, - "loss_rtd": 0.36610931158065796, - "loss_sent": 0.10220718383789062, - "loss_sod": 0.17650368809700012, - "loss_total": 0.6448202133178711, - "step": 55399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.971153259277344, - "loss_rtd": 0.36842429637908936, - "loss_sent": 0.1988043189048767, - "loss_sod": 0.08470700681209564, - "loss_total": 0.6519356369972229, - "step": 55399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.5318913459777832, - "learning_rate": 7.88996144892423e-05, - "loss": 0.6186, - "step": 55400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.143435001373291, - "loss_rtd": 0.3610016703605652, - "loss_sent": 0.12808094918727875, - "loss_sod": 0.11209128797054291, - "loss_total": 0.6011739373207092, - "step": 55499 - }, - { - "epoch": 0.018998, - "loss_gen": 4.701988220214844, - "loss_rtd": 0.39345940947532654, - "loss_sent": 0.22166474163532257, - "loss_sod": 0.06643322855234146, - "loss_total": 0.6815573573112488, - "step": 55499 - }, - { - "epoch": 0.019, - "grad_norm": 1.1434299945831299, - "learning_rate": 7.887371299121616e-05, - "loss": 0.6239, - "step": 55500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.342228412628174, - "loss_rtd": 0.37801849842071533, - "loss_sent": 0.19338516891002655, - "loss_sod": 0.014940367080271244, - "loss_total": 0.5863440632820129, - "step": 55599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.5737223625183105, - "loss_rtd": 0.35960954427719116, - "loss_sent": 0.4485333263874054, - "loss_sod": 0.0636458471417427, - "loss_total": 0.871788740158081, - "step": 55599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.7248344421386719, - "learning_rate": 7.884779986286186e-05, - "loss": 0.6235, - "step": 55600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.677097797393799, - "loss_rtd": 0.36578962206840515, - "loss_sent": 0.2834576368331909, - "loss_sod": 0.12220462411642075, - "loss_total": 0.7714518904685974, - "step": 55699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.656655788421631, - "loss_rtd": 0.3725653886795044, - "loss_sent": 0.28585928678512573, - "loss_sod": 0.021390490233898163, - "loss_total": 0.6798151731491089, - "step": 55699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.4569215774536133, - "learning_rate": 7.882187511461723e-05, - "loss": 0.6348, - "step": 55700 - }, - { - "epoch": 0.019598, - "loss_gen": 3.6917800903320312, - "loss_rtd": 0.3651793599128723, - "loss_sent": 0.0032284788321703672, - "loss_sod": 0.10444990545511246, - "loss_total": 0.4728577435016632, - "step": 55799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.053613662719727, - "loss_rtd": 0.3636980652809143, - "loss_sent": 0.13399262726306915, - "loss_sod": 0.02854270115494728, - "loss_total": 0.5262333750724792, - "step": 55799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.8296945691108704, - "learning_rate": 7.879593875692476e-05, - "loss": 0.6373, - "step": 55800 - }, - { - "epoch": 0.019798, - "loss_gen": 4.553261756896973, - "loss_rtd": 0.3637148439884186, - "loss_sent": 0.09166393429040909, - "loss_sod": 0.09912621229887009, - "loss_total": 0.5545049905776978, - "step": 55899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.008322238922119, - "loss_rtd": 0.3714514374732971, - "loss_sent": 0.24092429876327515, - "loss_sod": 0.12934193015098572, - "loss_total": 0.7417176365852356, - "step": 55899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.8420190811157227, - "learning_rate": 7.876999080023159e-05, - "loss": 0.631, - "step": 55900 - }, - { - "epoch": 0.019998, - "loss_gen": 4.874432563781738, - "loss_rtd": 0.38109180331230164, - "loss_sent": 0.12003308534622192, - "loss_sod": 0.10766370594501495, - "loss_total": 0.6087886095046997, - "step": 55999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.570915699005127, - "loss_rtd": 0.36038196086883545, - "loss_sent": 0.20860238373279572, - "loss_sod": 0.03632885217666626, - "loss_total": 0.6053131818771362, - "step": 55999 - }, - { - "epoch": 0.02, - "grad_norm": 1.397096037864685, - "learning_rate": 7.874403125498958e-05, - "loss": 0.6225, - "step": 56000 - }, - { - "epoch": 0.02, - "eval_loss": 0.6109273433685303, - "eval_runtime": 151.4077, - "eval_samples_per_second": 101.996, - "eval_steps_per_second": 0.799, - "step": 56000 - }, - { - "epoch": 0.020198, - "loss_gen": 3.914543867111206, - "loss_rtd": 0.3707915246486664, - "loss_sent": 0.004192584194242954, - "loss_sod": 0.21228839457035065, - "loss_total": 0.5872725248336792, - "step": 56099 - }, - { - "epoch": 0.020198, - "loss_gen": 3.5228183269500732, - "loss_rtd": 0.34751108288764954, - "loss_sent": 0.002179651753976941, - "loss_sod": 0.249548077583313, - "loss_total": 0.5992387533187866, - "step": 56099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.8863370418548584, - "learning_rate": 7.871806013165522e-05, - "loss": 0.6343, - "step": 56100 - }, - { - "epoch": 0.020398, - "loss_gen": 3.588907480239868, - "loss_rtd": 0.36278581619262695, - "loss_sent": 0.005457947961986065, - "loss_sod": 0.2697388529777527, - "loss_total": 0.6379826068878174, - "step": 56199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.755928039550781, - "loss_rtd": 0.35757341980934143, - "loss_sent": 0.06363532692193985, - "loss_sod": 0.08482104539871216, - "loss_total": 0.5060297846794128, - "step": 56199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.069838523864746, - "learning_rate": 7.869207744068966e-05, - "loss": 0.6402, - "step": 56200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.537457466125488, - "loss_rtd": 0.36283206939697266, - "loss_sent": 0.173272967338562, - "loss_sod": 0.06157602369785309, - "loss_total": 0.5976810455322266, - "step": 56299 - }, - { - "epoch": 0.020598, - "loss_gen": 4.561739921569824, - "loss_rtd": 0.3555087149143219, - "loss_sent": 0.05402013659477234, - "loss_sod": 0.06651479005813599, - "loss_total": 0.4760436415672302, - "step": 56299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.89546799659729, - "learning_rate": 7.866608319255875e-05, - "loss": 0.6262, - "step": 56300 - }, - { - "epoch": 0.020798, - "loss_gen": 4.26029109954834, - "loss_rtd": 0.35518962144851685, - "loss_sent": 0.22561852633953094, - "loss_sod": 0.017394546419382095, - "loss_total": 0.5982027053833008, - "step": 56399 - }, - { - "epoch": 0.020798, - "loss_gen": 4.655737400054932, - "loss_rtd": 0.3703760802745819, - "loss_sent": 0.07087548822164536, - "loss_sod": 0.02960325963795185, - "loss_total": 0.47085484862327576, - "step": 56399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.39506196975708, - "learning_rate": 7.864007739773295e-05, - "loss": 0.629, - "step": 56400 - }, - { - "epoch": 0.020998, - "loss_gen": 3.8033533096313477, - "loss_rtd": 0.3684219717979431, - "loss_sent": 7.553644536528736e-05, - "loss_sod": 0.24581684172153473, - "loss_total": 0.6143143177032471, - "step": 56499 - }, - { - "epoch": 0.020998, - "loss_gen": 3.8116445541381836, - "loss_rtd": 0.35867562890052795, - "loss_sent": 0.07484734803438187, - "loss_sod": 0.12182188779115677, - "loss_total": 0.5553448796272278, - "step": 56499 - }, - { - "epoch": 0.021, - "grad_norm": 1.1752865314483643, - "learning_rate": 7.861406006668739e-05, - "loss": 0.6416, - "step": 56500 - }, - { - "epoch": 0.021198, - "loss_gen": 3.7026193141937256, - "loss_rtd": 0.35439664125442505, - "loss_sent": 0.048916835337877274, - "loss_sod": 0.3023568093776703, - "loss_total": 0.7056702375411987, - "step": 56599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.53428840637207, - "loss_rtd": 0.3755502700805664, - "loss_sent": 0.17246706783771515, - "loss_sod": 0.008769787847995758, - "loss_total": 0.5567871332168579, - "step": 56599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.2310662269592285, - "learning_rate": 7.858803120990188e-05, - "loss": 0.6205, - "step": 56600 - }, - { - "epoch": 0.021398, - "loss_gen": 4.742209434509277, - "loss_rtd": 0.37581825256347656, - "loss_sent": 0.2694573700428009, - "loss_sod": 0.10139670968055725, - "loss_total": 0.7466723322868347, - "step": 56699 - }, - { - "epoch": 0.021398, - "loss_gen": 3.6188018321990967, - "loss_rtd": 0.35414817929267883, - "loss_sent": 0.01109377946704626, - "loss_sod": 0.12507717311382294, - "loss_total": 0.4903191030025482, - "step": 56699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.840541660785675, - "learning_rate": 7.85619908378608e-05, - "loss": 0.6472, - "step": 56700 - }, - { - "epoch": 0.021598, - "loss_gen": 4.646273136138916, - "loss_rtd": 0.3598632216453552, - "loss_sent": 0.6560623645782471, - "loss_sod": 0.014731225557625294, - "loss_total": 1.0306568145751953, - "step": 56799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.867937088012695, - "loss_rtd": 0.3471471965312958, - "loss_sent": 0.4271682798862457, - "loss_sod": 0.081779345870018, - "loss_total": 0.8560948371887207, - "step": 56799 - }, - { - "epoch": 0.0216, - "grad_norm": 4.364806652069092, - "learning_rate": 7.853593896105323e-05, - "loss": 0.6382, - "step": 56800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.655259132385254, - "loss_rtd": 0.3678421676158905, - "loss_sent": 0.08329112082719803, - "loss_sod": 0.03640985116362572, - "loss_total": 0.48754313588142395, - "step": 56899 - }, - { - "epoch": 0.021798, - "loss_gen": 3.8237650394439697, - "loss_rtd": 0.368927925825119, - "loss_sent": 0.0001940029178513214, - "loss_sod": 0.2146022617816925, - "loss_total": 0.5837242007255554, - "step": 56899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.9214321374893188, - "learning_rate": 7.850987558997287e-05, - "loss": 0.6355, - "step": 56900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.45928955078125, - "loss_rtd": 0.3921184837818146, - "loss_sent": 0.2075405865907669, - "loss_sod": 0.02730563096702099, - "loss_total": 0.6269646883010864, - "step": 56999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.576908588409424, - "loss_rtd": 0.3429858386516571, - "loss_sent": 0.20230966806411743, - "loss_sod": 0.014626307412981987, - "loss_total": 0.5599218010902405, - "step": 56999 - }, - { - "epoch": 0.022, - "grad_norm": 0.9983424544334412, - "learning_rate": 7.848380073511802e-05, - "loss": 0.6291, - "step": 57000 - }, - { - "epoch": 0.022, - "eval_loss": 0.6036836504936218, - "eval_runtime": 151.1538, - "eval_samples_per_second": 102.167, - "eval_steps_per_second": 0.801, - "step": 57000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.728472709655762, - "loss_rtd": 0.34702467918395996, - "loss_sent": 0.1587972193956375, - "loss_sod": 0.0453263595700264, - "loss_total": 0.5511482357978821, - "step": 57099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.708425521850586, - "loss_rtd": 0.33947470784187317, - "loss_sent": 0.11597699671983719, - "loss_sod": 0.1268012523651123, - "loss_total": 0.5822529792785645, - "step": 57099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.8465123772621155, - "learning_rate": 7.845771440699164e-05, - "loss": 0.6327, - "step": 57100 - }, - { - "epoch": 0.022398, - "loss_gen": 3.7659003734588623, - "loss_rtd": 0.3444095551967621, - "loss_sent": 0.10276321321725845, - "loss_sod": 0.07046366482973099, - "loss_total": 0.5176364183425903, - "step": 57199 - }, - { - "epoch": 0.022398, - "loss_gen": 4.462311744689941, - "loss_rtd": 0.3790939748287201, - "loss_sent": 0.10643347352743149, - "loss_sod": 0.08552703261375427, - "loss_total": 0.5710544586181641, - "step": 57199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.8902172446250916, - "learning_rate": 7.84316166161013e-05, - "loss": 0.6375, - "step": 57200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.8482232093811035, - "loss_rtd": 0.36452606320381165, - "loss_sent": 0.216790571808815, - "loss_sod": 0.06441571563482285, - "loss_total": 0.6457323431968689, - "step": 57299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.491093158721924, - "loss_rtd": 0.36763903498649597, - "loss_sent": 0.3399943709373474, - "loss_sod": 0.08514704555273056, - "loss_total": 0.7927804589271545, - "step": 57299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.0277565717697144, - "learning_rate": 7.840550737295919e-05, - "loss": 0.6327, - "step": 57300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.362370491027832, - "loss_rtd": 0.3496209681034088, - "loss_sent": 0.15771304070949554, - "loss_sod": 0.024754945188760757, - "loss_total": 0.5320889353752136, - "step": 57399 - }, - { - "epoch": 0.022798, - "loss_gen": 4.630321502685547, - "loss_rtd": 0.37803417444229126, - "loss_sent": 0.24064858257770538, - "loss_sod": 0.06453673541545868, - "loss_total": 0.6832194924354553, - "step": 57399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.3916471004486084, - "learning_rate": 7.837938668808211e-05, - "loss": 0.624, - "step": 57400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.793696880340576, - "loss_rtd": 0.3645938038825989, - "loss_sent": 0.31480395793914795, - "loss_sod": 0.06923617422580719, - "loss_total": 0.7486339211463928, - "step": 57499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.454331398010254, - "loss_rtd": 0.35629791021347046, - "loss_sent": 0.12270854413509369, - "loss_sod": 0.01729443669319153, - "loss_total": 0.4963008761405945, - "step": 57499 - }, - { - "epoch": 0.023, - "grad_norm": 0.9017834067344666, - "learning_rate": 7.835325457199146e-05, - "loss": 0.6221, - "step": 57500 - }, - { - "epoch": 0.023198, - "loss_gen": 4.476206302642822, - "loss_rtd": 0.3627890646457672, - "loss_sent": 0.18494051694869995, - "loss_sod": 0.050898827612400055, - "loss_total": 0.5986284017562866, - "step": 57599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.25529670715332, - "loss_rtd": 0.36866647005081177, - "loss_sent": 0.4510160982608795, - "loss_sod": 0.09126748889684677, - "loss_total": 0.9109500646591187, - "step": 57599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.077824592590332, - "learning_rate": 7.832711103521326e-05, - "loss": 0.6272, - "step": 57600 - }, - { - "epoch": 0.023398, - "loss_gen": 4.585660934448242, - "loss_rtd": 0.3672980070114136, - "loss_sent": 0.20580396056175232, - "loss_sod": 0.022966112941503525, - "loss_total": 0.5960680842399597, - "step": 57699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.617955207824707, - "loss_rtd": 0.3689972162246704, - "loss_sent": 0.27105477452278137, - "loss_sod": 0.007499829400330782, - "loss_total": 0.6475518345832825, - "step": 57699 - }, - { - "epoch": 0.0234, - "grad_norm": 2.2827484607696533, - "learning_rate": 7.830095608827813e-05, - "loss": 0.641, - "step": 57700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.636845111846924, - "loss_rtd": 0.3683159053325653, - "loss_sent": 0.22516517341136932, - "loss_sod": 0.09461245685815811, - "loss_total": 0.6880935430526733, - "step": 57799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.369560718536377, - "loss_rtd": 0.35372239351272583, - "loss_sent": 0.23294277489185333, - "loss_sod": 0.017427930608391762, - "loss_total": 0.604093074798584, - "step": 57799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.006242275238037, - "learning_rate": 7.827478974172127e-05, - "loss": 0.6255, - "step": 57800 - }, - { - "epoch": 0.023798, - "loss_gen": 4.858678340911865, - "loss_rtd": 0.3479278087615967, - "loss_sent": 0.32929107546806335, - "loss_sod": 0.05031493306159973, - "loss_total": 0.7275338172912598, - "step": 57899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.78029727935791, - "loss_rtd": 0.3700944483280182, - "loss_sent": 0.17167387902736664, - "loss_sod": 0.007030355744063854, - "loss_total": 0.548798680305481, - "step": 57899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.0834317207336426, - "learning_rate": 7.824861200608248e-05, - "loss": 0.6438, - "step": 57900 - }, - { - "epoch": 0.023998, - "loss_gen": 4.542178153991699, - "loss_rtd": 0.3664270043373108, - "loss_sent": 0.10557856410741806, - "loss_sod": 0.02531070075929165, - "loss_total": 0.49731627106666565, - "step": 57999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.939555644989014, - "loss_rtd": 0.36791694164276123, - "loss_sent": 0.1434306651353836, - "loss_sod": 0.15800848603248596, - "loss_total": 0.6693560481071472, - "step": 57999 - }, - { - "epoch": 0.024, - "grad_norm": 0.8415298461914062, - "learning_rate": 7.822242289190616e-05, - "loss": 0.6129, - "step": 58000 - }, - { - "epoch": 0.024, - "eval_loss": 0.6122521162033081, - "eval_runtime": 151.2846, - "eval_samples_per_second": 102.079, - "eval_steps_per_second": 0.8, - "step": 58000 - }, - { - "epoch": 0.024198, - "loss_gen": 3.856839895248413, - "loss_rtd": 0.35103926062583923, - "loss_sent": 7.347911014221609e-05, - "loss_sod": 0.18446674942970276, - "loss_total": 0.53557950258255, - "step": 58099 - }, - { - "epoch": 0.024198, - "loss_gen": 4.428738594055176, - "loss_rtd": 0.37424057722091675, - "loss_sent": 0.24140650033950806, - "loss_sod": 0.058345548808574677, - "loss_total": 0.6739926338195801, - "step": 58099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.5646039247512817, - "learning_rate": 7.819622240974125e-05, - "loss": 0.6211, - "step": 58100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.742285251617432, - "loss_rtd": 0.37152335047721863, - "loss_sent": 0.25615110993385315, - "loss_sod": 0.033854153007268906, - "loss_total": 0.6615285873413086, - "step": 58199 - }, - { - "epoch": 0.024398, - "loss_gen": 4.426832675933838, - "loss_rtd": 0.3590032756328583, - "loss_sent": 0.16081035137176514, - "loss_sod": 0.0034500528126955032, - "loss_total": 0.523263692855835, - "step": 58199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.7670844197273254, - "learning_rate": 7.817001057014135e-05, - "loss": 0.6157, - "step": 58200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.695468425750732, - "loss_rtd": 0.36778417229652405, - "loss_sent": 0.16970743238925934, - "loss_sod": 0.09776318073272705, - "loss_total": 0.6352548003196716, - "step": 58299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.1878275871276855, - "loss_rtd": 0.3674927353858948, - "loss_sent": 0.0856841579079628, - "loss_sod": 0.15574391186237335, - "loss_total": 0.6089208126068115, - "step": 58299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.017652153968811, - "learning_rate": 7.814378738366456e-05, - "loss": 0.6372, - "step": 58300 - }, - { - "epoch": 0.024798, - "loss_gen": 4.5369181632995605, - "loss_rtd": 0.3620288372039795, - "loss_sent": 0.23065121471881866, - "loss_sod": 0.03505735471844673, - "loss_total": 0.6277374029159546, - "step": 58399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.5468316078186035, - "loss_rtd": 0.37129703164100647, - "loss_sent": 0.13053631782531738, - "loss_sod": 0.042834796011447906, - "loss_total": 0.5446681380271912, - "step": 58399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.0757381916046143, - "learning_rate": 7.811755286087356e-05, - "loss": 0.6286, - "step": 58400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.764802932739258, - "loss_rtd": 0.3671434819698334, - "loss_sent": 0.23353952169418335, - "loss_sod": 0.043985750526189804, - "loss_total": 0.6446687579154968, - "step": 58499 - }, - { - "epoch": 0.024998, - "loss_gen": 4.488729476928711, - "loss_rtd": 0.3664853870868683, - "loss_sent": 0.043898455798625946, - "loss_sod": 0.17564155161380768, - "loss_total": 0.5860254168510437, - "step": 58499 - }, - { - "epoch": 0.025, - "grad_norm": 1.4052540063858032, - "learning_rate": 7.809130701233565e-05, - "loss": 0.6303, - "step": 58500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.284855365753174, - "loss_rtd": 0.35478779673576355, - "loss_sent": 0.011805405840277672, - "loss_sod": 0.12428386509418488, - "loss_total": 0.49087709188461304, - "step": 58599 - }, - { - "epoch": 0.025198, - "loss_gen": 4.3681640625, - "loss_rtd": 0.36329731345176697, - "loss_sent": 0.31201988458633423, - "loss_sod": 0.09334906190633774, - "loss_total": 0.7686662673950195, - "step": 58599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.0215644836425781, - "learning_rate": 7.806504984862263e-05, - "loss": 0.6371, - "step": 58600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.680645942687988, - "loss_rtd": 0.3676464259624481, - "loss_sent": 0.21945630013942719, - "loss_sod": 0.05643542483448982, - "loss_total": 0.6435381770133972, - "step": 58699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.2610979080200195, - "loss_rtd": 0.3559699058532715, - "loss_sent": 0.09809956699609756, - "loss_sod": 0.09417600929737091, - "loss_total": 0.5482454895973206, - "step": 58699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.2106742858886719, - "learning_rate": 7.803878138031088e-05, - "loss": 0.6292, - "step": 58700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.729255199432373, - "loss_rtd": 0.3560835123062134, - "loss_sent": 0.14522771537303925, - "loss_sod": 0.005209613591432571, - "loss_total": 0.5065208673477173, - "step": 58799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.687342166900635, - "loss_rtd": 0.37340936064720154, - "loss_sent": 0.505208432674408, - "loss_sod": 0.01523881871253252, - "loss_total": 0.8938566446304321, - "step": 58799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.73228120803833, - "learning_rate": 7.801250161798135e-05, - "loss": 0.6207, - "step": 58800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.603366851806641, - "loss_rtd": 0.36248043179512024, - "loss_sent": 0.2825396656990051, - "loss_sod": 0.046890988945961, - "loss_total": 0.6919111013412476, - "step": 58899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.493285179138184, - "loss_rtd": 0.3563370108604431, - "loss_sent": 0.3288453221321106, - "loss_sod": 0.07662333548069, - "loss_total": 0.7618056535720825, - "step": 58899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.0256446599960327, - "learning_rate": 7.798621057221951e-05, - "loss": 0.6249, - "step": 58900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.566259384155273, - "loss_rtd": 0.36538976430892944, - "loss_sent": 0.29362866282463074, - "loss_sod": 0.05600784346461296, - "loss_total": 0.7150262594223022, - "step": 58999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.62779426574707, - "loss_rtd": 0.3808223009109497, - "loss_sent": 0.11313124746084213, - "loss_sod": 0.047821350395679474, - "loss_total": 0.5417748689651489, - "step": 58999 - }, - { - "epoch": 0.026, - "grad_norm": 0.9359037280082703, - "learning_rate": 7.79599082536154e-05, - "loss": 0.6375, - "step": 59000 - }, - { - "epoch": 0.026, - "eval_loss": 0.6099367737770081, - "eval_runtime": 151.1862, - "eval_samples_per_second": 102.146, - "eval_steps_per_second": 0.8, - "step": 59000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.435936450958252, - "loss_rtd": 0.3585285246372223, - "loss_sent": 0.17895811796188354, - "loss_sod": 0.05199865251779556, - "loss_total": 0.5894852876663208, - "step": 59099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.619828224182129, - "loss_rtd": 0.3510432243347168, - "loss_sent": 0.2641523480415344, - "loss_sod": 0.2132890522480011, - "loss_total": 0.8284845948219299, - "step": 59099 - }, - { - "epoch": 0.0262, - "grad_norm": 3.9642646312713623, - "learning_rate": 7.79335946727636e-05, - "loss": 0.6246, - "step": 59100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.73977518081665, - "loss_rtd": 0.3586340844631195, - "loss_sent": 0.2495546191930771, - "loss_sod": 0.13427351415157318, - "loss_total": 0.7424622178077698, - "step": 59199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.060482025146484, - "loss_rtd": 0.3705763816833496, - "loss_sent": 0.15017586946487427, - "loss_sod": 0.14380647242069244, - "loss_total": 0.6645587086677551, - "step": 59199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.0570859909057617, - "learning_rate": 7.79072698402632e-05, - "loss": 0.6263, - "step": 59200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.457797527313232, - "loss_rtd": 0.35689282417297363, - "loss_sent": 0.12019678950309753, - "loss_sod": 0.14792628586292267, - "loss_total": 0.6250158548355103, - "step": 59299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.3164262771606445, - "loss_rtd": 0.35765039920806885, - "loss_sent": 0.25349053740501404, - "loss_sod": 0.09530863910913467, - "loss_total": 0.706449568271637, - "step": 59299 - }, - { - "epoch": 0.0266, - "grad_norm": 2.9818403720855713, - "learning_rate": 7.788093376671783e-05, - "loss": 0.6255, - "step": 59300 - }, - { - "epoch": 0.026798, - "loss_gen": 3.90621018409729, - "loss_rtd": 0.3626609146595001, - "loss_sent": 0.015791015699505806, - "loss_sod": 0.23246951401233673, - "loss_total": 0.6109214425086975, - "step": 59399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.519261360168457, - "loss_rtd": 0.3742714822292328, - "loss_sent": 0.145407572388649, - "loss_sod": 0.05256856605410576, - "loss_total": 0.5722476243972778, - "step": 59399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.2181992530822754, - "learning_rate": 7.785458646273569e-05, - "loss": 0.6208, - "step": 59400 - }, - { - "epoch": 0.026998, - "loss_gen": 4.5629377365112305, - "loss_rtd": 0.36785122752189636, - "loss_sent": 0.21749372780323029, - "loss_sod": 0.0407673604786396, - "loss_total": 0.6261123418807983, - "step": 59499 - }, - { - "epoch": 0.026998, - "loss_gen": 4.774982452392578, - "loss_rtd": 0.3666840195655823, - "loss_sent": 0.05598453804850578, - "loss_sod": 0.04872807487845421, - "loss_total": 0.47139662504196167, - "step": 59499 - }, - { - "epoch": 0.027, - "grad_norm": 0.6574075222015381, - "learning_rate": 7.782822793892945e-05, - "loss": 0.6235, - "step": 59500 - }, - { - "epoch": 0.027198, - "loss_gen": 4.934159755706787, - "loss_rtd": 0.35682305693626404, - "loss_sent": 0.14898377656936646, - "loss_sod": 0.07212354242801666, - "loss_total": 0.5779303908348083, - "step": 59599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.615574836730957, - "loss_rtd": 0.355925977230072, - "loss_sent": 0.2697262763977051, - "loss_sod": 0.057983674108982086, - "loss_total": 0.683635950088501, - "step": 59599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.7389049530029297, - "learning_rate": 7.780185820591632e-05, - "loss": 0.6298, - "step": 59600 - }, - { - "epoch": 0.027398, - "loss_gen": 4.8682146072387695, - "loss_rtd": 0.38151177763938904, - "loss_sent": 0.20392993092536926, - "loss_sod": 0.07457822561264038, - "loss_total": 0.6600199341773987, - "step": 59699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.836795806884766, - "loss_rtd": 0.3634702265262604, - "loss_sent": 0.10432275384664536, - "loss_sod": 0.05495322495698929, - "loss_total": 0.522746205329895, - "step": 59699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.6321349740028381, - "learning_rate": 7.777547727431804e-05, - "loss": 0.6298, - "step": 59700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.784069061279297, - "loss_rtd": 0.3644939363002777, - "loss_sent": 0.16687586903572083, - "loss_sod": 0.05650252476334572, - "loss_total": 0.587872326374054, - "step": 59799 - }, - { - "epoch": 0.027598, - "loss_gen": 4.443631172180176, - "loss_rtd": 0.3605464994907379, - "loss_sent": 0.3157513737678528, - "loss_sod": 0.009228970855474472, - "loss_total": 0.6855268478393555, - "step": 59799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.874180793762207, - "learning_rate": 7.774908515476082e-05, - "loss": 0.6379, - "step": 59800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.536407947540283, - "loss_rtd": 0.36572104692459106, - "loss_sent": 0.3959828317165375, - "loss_sod": 0.07241517305374146, - "loss_total": 0.8341190814971924, - "step": 59899 - }, - { - "epoch": 0.027798, - "loss_gen": 4.399185657501221, - "loss_rtd": 0.3434516191482544, - "loss_sent": 0.011317362077534199, - "loss_sod": 0.10456545650959015, - "loss_total": 0.45933443307876587, - "step": 59899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.2676671743392944, - "learning_rate": 7.772268185787543e-05, - "loss": 0.6232, - "step": 59900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.64289665222168, - "loss_rtd": 0.36277514696121216, - "loss_sent": 0.333320289850235, - "loss_sod": 0.050307150930166245, - "loss_total": 0.7464026212692261, - "step": 59999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.631900787353516, - "loss_rtd": 0.3668534457683563, - "loss_sent": 0.2586413025856018, - "loss_sod": 0.016919543966650963, - "loss_total": 0.6424143314361572, - "step": 59999 - }, - { - "epoch": 0.028, - "grad_norm": 2.2221970558166504, - "learning_rate": 7.76962673942971e-05, - "loss": 0.6286, - "step": 60000 - }, - { - "epoch": 0.028, - "eval_loss": 0.6064568758010864, - "eval_runtime": 151.3435, - "eval_samples_per_second": 102.039, - "eval_steps_per_second": 0.8, - "step": 60000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.419715881347656, - "loss_rtd": 0.3511432707309723, - "loss_sent": 0.05077521502971649, - "loss_sod": 0.0357656329870224, - "loss_total": 0.4376841187477112, - "step": 60099 - }, - { - "epoch": 0.028198, - "loss_gen": 4.582746982574463, - "loss_rtd": 0.37409496307373047, - "loss_sent": 0.204301118850708, - "loss_sod": 0.00987776555120945, - "loss_total": 0.5882738828659058, - "step": 60099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.7696623206138611, - "learning_rate": 7.766984177466559e-05, - "loss": 0.6248, - "step": 60100 - }, - { - "epoch": 0.028398, - "loss_gen": 4.308511257171631, - "loss_rtd": 0.344106525182724, - "loss_sent": 0.05806731805205345, - "loss_sod": 0.07151489704847336, - "loss_total": 0.4736887514591217, - "step": 60199 - }, - { - "epoch": 0.028398, - "loss_gen": 4.259953022003174, - "loss_rtd": 0.3588264286518097, - "loss_sent": 0.15039996802806854, - "loss_sod": 0.1075981855392456, - "loss_total": 0.6168245673179626, - "step": 60199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.1939679384231567, - "learning_rate": 7.764340500962511e-05, - "loss": 0.6322, - "step": 60200 - }, - { - "epoch": 0.028598, - "loss_gen": 4.882040500640869, - "loss_rtd": 0.36036819219589233, - "loss_sent": 0.23449385166168213, - "loss_sod": 0.05917336791753769, - "loss_total": 0.6540354490280151, - "step": 60299 - }, - { - "epoch": 0.028598, - "loss_gen": 4.768974304199219, - "loss_rtd": 0.3655626177787781, - "loss_sent": 0.056659433990716934, - "loss_sod": 0.11761739104986191, - "loss_total": 0.5398394465446472, - "step": 60299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.6359830498695374, - "learning_rate": 7.761695710982439e-05, - "loss": 0.6314, - "step": 60300 - }, - { - "epoch": 0.028798, - "loss_gen": 4.6344313621521, - "loss_rtd": 0.3701755106449127, - "loss_sent": 0.14321716129779816, - "loss_sod": 0.0878496766090393, - "loss_total": 0.6012423634529114, - "step": 60399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.201408386230469, - "loss_rtd": 0.36076819896698, - "loss_sent": 0.2366686463356018, - "loss_sod": 0.028163466602563858, - "loss_total": 0.6256003379821777, - "step": 60399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.2316277027130127, - "learning_rate": 7.759049808591665e-05, - "loss": 0.6298, - "step": 60400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.660600185394287, - "loss_rtd": 0.36998021602630615, - "loss_sent": 0.1629168838262558, - "loss_sod": 0.050505802035331726, - "loss_total": 0.5834029316902161, - "step": 60499 - }, - { - "epoch": 0.028998, - "loss_gen": 4.8611159324646, - "loss_rtd": 0.36074256896972656, - "loss_sent": 0.2991497814655304, - "loss_sod": 0.07882288098335266, - "loss_total": 0.7387152314186096, - "step": 60499 - }, - { - "epoch": 0.029, - "grad_norm": 0.8937557935714722, - "learning_rate": 7.75640279485596e-05, - "loss": 0.6143, - "step": 60500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.770934104919434, - "loss_rtd": 0.3545058071613312, - "loss_sent": 0.1732805073261261, - "loss_sod": 0.12196668982505798, - "loss_total": 0.6497529745101929, - "step": 60599 - }, - { - "epoch": 0.029198, - "loss_gen": 4.689424991607666, - "loss_rtd": 0.37039607763290405, - "loss_sent": 0.051986388862133026, - "loss_sod": 0.08831780403852463, - "loss_total": 0.5107002854347229, - "step": 60599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.8047595024108887, - "learning_rate": 7.753754670841535e-05, - "loss": 0.6325, - "step": 60600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.782720565795898, - "loss_rtd": 0.360862672328949, - "loss_sent": 0.13212287425994873, - "loss_sod": 0.08254893124103546, - "loss_total": 0.575534462928772, - "step": 60699 - }, - { - "epoch": 0.029398, - "loss_gen": 3.8673269748687744, - "loss_rtd": 0.35040780901908875, - "loss_sent": 0.015188871882855892, - "loss_sod": 0.08386120200157166, - "loss_total": 0.44945788383483887, - "step": 60699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.2101595401763916, - "learning_rate": 7.751105437615062e-05, - "loss": 0.631, - "step": 60700 - }, - { - "epoch": 0.029598, - "loss_gen": 4.655178070068359, - "loss_rtd": 0.3434184491634369, - "loss_sent": 0.13284368813037872, - "loss_sod": 0.17460592091083527, - "loss_total": 0.6508680582046509, - "step": 60799 - }, - { - "epoch": 0.029598, - "loss_gen": 4.486229419708252, - "loss_rtd": 0.3898225724697113, - "loss_sent": 0.3177969455718994, - "loss_sod": 0.06404908001422882, - "loss_total": 0.7716686129570007, - "step": 60799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.3122541904449463, - "learning_rate": 7.748455096243645e-05, - "loss": 0.6281, - "step": 60800 - }, - { - "epoch": 0.029798, - "loss_gen": 4.25994348526001, - "loss_rtd": 0.36359405517578125, - "loss_sent": 0.371652215719223, - "loss_sod": 0.03927462920546532, - "loss_total": 0.7745208740234375, - "step": 60899 - }, - { - "epoch": 0.029798, - "loss_gen": 4.894041061401367, - "loss_rtd": 0.360346257686615, - "loss_sent": 0.132888525724411, - "loss_sod": 0.06288320571184158, - "loss_total": 0.5561180114746094, - "step": 60899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.4946010112762451, - "learning_rate": 7.745803647794845e-05, - "loss": 0.6318, - "step": 60900 - }, - { - "epoch": 0.029998, - "loss_gen": 4.927550315856934, - "loss_rtd": 0.3632243275642395, - "loss_sent": 0.2353924959897995, - "loss_sod": 0.0985734835267067, - "loss_total": 0.6971902847290039, - "step": 60999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.230792999267578, - "loss_rtd": 0.3565824627876282, - "loss_sent": 0.10895363241434097, - "loss_sod": 0.28270161151885986, - "loss_total": 0.7482377290725708, - "step": 60999 - }, - { - "epoch": 0.03, - "grad_norm": 1.2049927711486816, - "learning_rate": 7.743151093336664e-05, - "loss": 0.6274, - "step": 61000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5990291237831116, - "eval_runtime": 151.1472, - "eval_samples_per_second": 102.172, - "eval_steps_per_second": 0.801, - "step": 61000 - }, - { - "epoch": 0.030198, - "loss_gen": 4.337743759155273, - "loss_rtd": 0.35195744037628174, - "loss_sent": 0.005699401255697012, - "loss_sod": 0.12330635637044907, - "loss_total": 0.48096320033073425, - "step": 61099 - }, - { - "epoch": 0.030198, - "loss_gen": 3.908996105194092, - "loss_rtd": 0.3597874641418457, - "loss_sent": 0.056060317903757095, - "loss_sod": 0.11038318276405334, - "loss_total": 0.5262309908866882, - "step": 61099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.8100944757461548, - "learning_rate": 7.74049743393755e-05, - "loss": 0.6284, - "step": 61100 - }, - { - "epoch": 0.030398, - "loss_gen": 4.535096645355225, - "loss_rtd": 0.359036922454834, - "loss_sent": 0.25981956720352173, - "loss_sod": 0.023562589660286903, - "loss_total": 0.6424190998077393, - "step": 61199 - }, - { - "epoch": 0.030398, - "loss_gen": 4.725579738616943, - "loss_rtd": 0.3722909688949585, - "loss_sent": 0.10179479420185089, - "loss_sod": 0.06360937654972076, - "loss_total": 0.5376951098442078, - "step": 61199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.8150643706321716, - "learning_rate": 7.7378426706664e-05, - "loss": 0.616, - "step": 61200 - }, - { - "epoch": 0.030598, - "loss_gen": 4.490610599517822, - "loss_rtd": 0.35507073998451233, - "loss_sent": 0.25802454352378845, - "loss_sod": 0.005174014251679182, - "loss_total": 0.6182693243026733, - "step": 61299 - }, - { - "epoch": 0.030598, - "loss_gen": 4.592755317687988, - "loss_rtd": 0.3682295083999634, - "loss_sent": 0.16518935561180115, - "loss_sod": 0.017999812960624695, - "loss_total": 0.551418662071228, - "step": 61299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.8812500238418579, - "learning_rate": 7.735186804592546e-05, - "loss": 0.6163, - "step": 61300 - }, - { - "epoch": 0.030798, - "loss_gen": 4.962682247161865, - "loss_rtd": 0.3762686848640442, - "loss_sent": 0.19632530212402344, - "loss_sod": 0.1151101291179657, - "loss_total": 0.6877040863037109, - "step": 61399 - }, - { - "epoch": 0.030798, - "loss_gen": 4.625997066497803, - "loss_rtd": 0.3505260944366455, - "loss_sent": 0.23315012454986572, - "loss_sod": 0.09739446640014648, - "loss_total": 0.6810706853866577, - "step": 61399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.640834093093872, - "learning_rate": 7.732529836785777e-05, - "loss": 0.6237, - "step": 61400 - }, - { - "epoch": 0.030998, - "loss_gen": 3.8268961906433105, - "loss_rtd": 0.3546771705150604, - "loss_sent": 0.0011191426310688257, - "loss_sod": 0.30724337697029114, - "loss_total": 0.6630396842956543, - "step": 61499 - }, - { - "epoch": 0.030998, - "loss_gen": 3.3745572566986084, - "loss_rtd": 0.3258892595767975, - "loss_sent": 0.0037176625337451696, - "loss_sod": 0.15428975224494934, - "loss_total": 0.4838966727256775, - "step": 61499 - }, - { - "epoch": 0.031, - "grad_norm": 1.0252587795257568, - "learning_rate": 7.729871768316315e-05, - "loss": 0.6217, - "step": 61500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.474167823791504, - "loss_rtd": 0.35145077109336853, - "loss_sent": 0.27659541368484497, - "loss_sod": 0.037010371685028076, - "loss_total": 0.665056586265564, - "step": 61599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.455756664276123, - "loss_rtd": 0.37024450302124023, - "loss_sent": 0.1990279108285904, - "loss_sod": 0.04633166640996933, - "loss_total": 0.6156041026115417, - "step": 61599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.8076386451721191, - "learning_rate": 7.727212600254832e-05, - "loss": 0.6372, - "step": 61600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.811530113220215, - "loss_rtd": 0.3651669919490814, - "loss_sent": 0.04905230551958084, - "loss_sod": 0.01257830485701561, - "loss_total": 0.4267975986003876, - "step": 61699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.341268539428711, - "loss_rtd": 0.36992210149765015, - "loss_sent": 0.15841986238956451, - "loss_sod": 0.16322530806064606, - "loss_total": 0.6915672421455383, - "step": 61699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.2156025171279907, - "learning_rate": 7.724552333672439e-05, - "loss": 0.6139, - "step": 61700 - }, - { - "epoch": 0.031598, - "loss_gen": 4.670220851898193, - "loss_rtd": 0.3613542914390564, - "loss_sent": 0.09052611887454987, - "loss_sod": 0.13286900520324707, - "loss_total": 0.5847494006156921, - "step": 61799 - }, - { - "epoch": 0.031598, - "loss_gen": 4.732316970825195, - "loss_rtd": 0.3727900981903076, - "loss_sent": 0.2103252112865448, - "loss_sod": 0.005296154413372278, - "loss_total": 0.5884114503860474, - "step": 61799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.2746527194976807, - "learning_rate": 7.721890969640693e-05, - "loss": 0.6172, - "step": 61800 - }, - { - "epoch": 0.031798, - "loss_gen": 4.899127006530762, - "loss_rtd": 0.3722045123577118, - "loss_sent": 0.05811246484518051, - "loss_sod": 0.018387533724308014, - "loss_total": 0.4487045109272003, - "step": 61899 - }, - { - "epoch": 0.031798, - "loss_gen": 3.7219502925872803, - "loss_rtd": 0.346815288066864, - "loss_sent": 0.0038633050862699747, - "loss_sod": 0.20953437685966492, - "loss_total": 0.5602129697799683, - "step": 61899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.3523370027542114, - "learning_rate": 7.719228509231589e-05, - "loss": 0.6351, - "step": 61900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.3909592628479, - "loss_rtd": 0.3624614477157593, - "loss_sent": 0.4772406816482544, - "loss_sod": 0.07452499866485596, - "loss_total": 0.9142271280288696, - "step": 61999 - }, - { - "epoch": 0.031998, - "loss_gen": 4.466653823852539, - "loss_rtd": 0.3784707188606262, - "loss_sent": 0.2602320909500122, - "loss_sod": 0.03414306789636612, - "loss_total": 0.6728458404541016, - "step": 61999 - }, - { - "epoch": 0.032, - "grad_norm": 2.9149506092071533, - "learning_rate": 7.716564953517567e-05, - "loss": 0.6262, - "step": 62000 - }, - { - "epoch": 0.032, - "eval_loss": 0.5999614596366882, - "eval_runtime": 152.9369, - "eval_samples_per_second": 100.976, - "eval_steps_per_second": 0.791, - "step": 62000 - }, - { - "epoch": 0.032198, - "loss_gen": 3.7183635234832764, - "loss_rtd": 0.35874229669570923, - "loss_sent": 0.05439189448952675, - "loss_sod": 0.05234738066792488, - "loss_total": 0.46548157930374146, - "step": 62099 - }, - { - "epoch": 0.032198, - "loss_gen": 4.890871047973633, - "loss_rtd": 0.3613589406013489, - "loss_sent": 0.029185831546783447, - "loss_sod": 0.16612508893013, - "loss_total": 0.5566698908805847, - "step": 62099 - }, - { - "epoch": 0.0322, - "grad_norm": 0.6717313528060913, - "learning_rate": 7.713900303571505e-05, - "loss": 0.6359, - "step": 62100 - }, - { - "epoch": 0.032398, - "loss_gen": 3.771449565887451, - "loss_rtd": 0.35726627707481384, - "loss_sent": 4.493523738346994e-05, - "loss_sod": 0.16263878345489502, - "loss_total": 0.5199500322341919, - "step": 62199 - }, - { - "epoch": 0.032398, - "loss_gen": 5.023044586181641, - "loss_rtd": 0.35243162512779236, - "loss_sent": 0.3986659646034241, - "loss_sod": 0.21003739535808563, - "loss_total": 0.9611349701881409, - "step": 62199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.1961450576782227, - "learning_rate": 7.711234560466727e-05, - "loss": 0.6203, - "step": 62200 - }, - { - "epoch": 0.032598, - "loss_gen": 3.678945541381836, - "loss_rtd": 0.35275405645370483, - "loss_sent": 0.0027436709497123957, - "loss_sod": 0.16715922951698303, - "loss_total": 0.5226569771766663, - "step": 62299 - }, - { - "epoch": 0.032598, - "loss_gen": 5.011557579040527, - "loss_rtd": 0.3473483920097351, - "loss_sent": 0.1250336468219757, - "loss_sod": 0.05513975769281387, - "loss_total": 0.5275217890739441, - "step": 62299 - }, - { - "epoch": 0.0326, - "grad_norm": 1.1040332317352295, - "learning_rate": 7.708567725276992e-05, - "loss": 0.6153, - "step": 62300 - }, - { - "epoch": 0.032798, - "loss_gen": 4.394141674041748, - "loss_rtd": 0.3745782971382141, - "loss_sent": 0.23874713480472565, - "loss_sod": 0.06496084481477737, - "loss_total": 0.6782862544059753, - "step": 62399 - }, - { - "epoch": 0.032798, - "loss_gen": 4.451708793640137, - "loss_rtd": 0.35331302881240845, - "loss_sent": 0.14257824420928955, - "loss_sod": 0.009480955079197884, - "loss_total": 0.5053722262382507, - "step": 62399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.6813021898269653, - "learning_rate": 7.705899799076501e-05, - "loss": 0.6318, - "step": 62400 - }, - { - "epoch": 0.032998, - "loss_gen": 3.518160343170166, - "loss_rtd": 0.34264475107192993, - "loss_sent": 6.070993185858242e-05, - "loss_sod": 0.20465955138206482, - "loss_total": 0.5473650097846985, - "step": 62499 - }, - { - "epoch": 0.032998, - "loss_gen": 4.318394660949707, - "loss_rtd": 0.36519360542297363, - "loss_sent": 0.1040429025888443, - "loss_sod": 0.11054766178131104, - "loss_total": 0.5797841548919678, - "step": 62499 - }, - { - "epoch": 0.033, - "grad_norm": 1.4231469631195068, - "learning_rate": 7.703230782939894e-05, - "loss": 0.6213, - "step": 62500 - }, - { - "epoch": 0.033198, - "loss_gen": 4.303335666656494, - "loss_rtd": 0.36913084983825684, - "loss_sent": 0.041935522109270096, - "loss_sod": 0.039499327540397644, - "loss_total": 0.45056572556495667, - "step": 62599 - }, - { - "epoch": 0.033198, - "loss_gen": 3.836183786392212, - "loss_rtd": 0.35064151883125305, - "loss_sent": 0.032056838274002075, - "loss_sod": 0.06515227258205414, - "loss_total": 0.44785061478614807, - "step": 62599 - }, - { - "epoch": 0.0332, - "grad_norm": 0.8263905048370361, - "learning_rate": 7.700560677942252e-05, - "loss": 0.6232, - "step": 62600 - }, - { - "epoch": 0.033398, - "loss_gen": 4.548916339874268, - "loss_rtd": 0.35985687375068665, - "loss_sent": 0.17908038198947906, - "loss_sod": 0.04548419639468193, - "loss_total": 0.5844214558601379, - "step": 62699 - }, - { - "epoch": 0.033398, - "loss_gen": 4.711582183837891, - "loss_rtd": 0.36192235350608826, - "loss_sent": 0.05882194638252258, - "loss_sod": 0.0021437264513224363, - "loss_total": 0.42288804054260254, - "step": 62699 - }, - { - "epoch": 0.0334, - "grad_norm": 0.6532236337661743, - "learning_rate": 7.697889485159092e-05, - "loss": 0.6264, - "step": 62700 - }, - { - "epoch": 0.033598, - "loss_gen": 4.946453094482422, - "loss_rtd": 0.3616791367530823, - "loss_sent": 0.18877077102661133, - "loss_sod": 0.1229364424943924, - "loss_total": 0.6733863353729248, - "step": 62799 - }, - { - "epoch": 0.033598, - "loss_gen": 4.773370742797852, - "loss_rtd": 0.3507744371891022, - "loss_sent": 0.18613027036190033, - "loss_sod": 0.06355984508991241, - "loss_total": 0.6004645824432373, - "step": 62799 - }, - { - "epoch": 0.0336, - "grad_norm": 3.045973300933838, - "learning_rate": 7.69521720566637e-05, - "loss": 0.627, - "step": 62800 - }, - { - "epoch": 0.033798, - "loss_gen": 3.7490742206573486, - "loss_rtd": 0.3479219675064087, - "loss_sent": 0.03341430798172951, - "loss_sod": 0.3173055052757263, - "loss_total": 0.6986417770385742, - "step": 62899 - }, - { - "epoch": 0.033798, - "loss_gen": 4.756951808929443, - "loss_rtd": 0.36681756377220154, - "loss_sent": 0.17297106981277466, - "loss_sod": 0.08782077580690384, - "loss_total": 0.627609372138977, - "step": 62899 - }, - { - "epoch": 0.0338, - "grad_norm": 1.7359693050384521, - "learning_rate": 7.692543840540478e-05, - "loss": 0.6237, - "step": 62900 - }, - { - "epoch": 0.033998, - "loss_gen": 4.279255390167236, - "loss_rtd": 0.36131471395492554, - "loss_sent": 0.009380928240716457, - "loss_sod": 0.1650504171848297, - "loss_total": 0.5357460975646973, - "step": 62999 - }, - { - "epoch": 0.033998, - "loss_gen": 4.05281400680542, - "loss_rtd": 0.3557135760784149, - "loss_sent": 0.016067415475845337, - "loss_sod": 0.10457506030797958, - "loss_total": 0.47635605931282043, - "step": 62999 - }, - { - "epoch": 0.034, - "grad_norm": 0.7685233950614929, - "learning_rate": 7.689869390858251e-05, - "loss": 0.6241, - "step": 63000 - }, - { - "epoch": 0.034, - "eval_loss": 0.6013754606246948, - "eval_runtime": 151.1274, - "eval_samples_per_second": 102.185, - "eval_steps_per_second": 0.801, - "step": 63000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.0773468017578125, - "loss_rtd": 0.3411266505718231, - "loss_sent": 0.02308918535709381, - "loss_sod": 0.09176616370677948, - "loss_total": 0.455981969833374, - "step": 63099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.9239935874938965, - "loss_rtd": 0.34385135769844055, - "loss_sent": 0.056067075580358505, - "loss_sod": 0.14729167520999908, - "loss_total": 0.5472100973129272, - "step": 63099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.7968565225601196, - "learning_rate": 7.687193857696954e-05, - "loss": 0.6061, - "step": 63100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.347064018249512, - "loss_rtd": 0.3503779470920563, - "loss_sent": 0.0765359178185463, - "loss_sod": 0.029360976070165634, - "loss_total": 0.4562748372554779, - "step": 63199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.632389545440674, - "loss_rtd": 0.35120922327041626, - "loss_sent": 0.1494688093662262, - "loss_sod": 0.017021406441926956, - "loss_total": 0.5176994800567627, - "step": 63199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.7096732258796692, - "learning_rate": 7.684517242134292e-05, - "loss": 0.6253, - "step": 63200 - }, - { - "epoch": 0.000598, - "loss_gen": 4.462292671203613, - "loss_rtd": 0.37799072265625, - "loss_sent": 0.230128213763237, - "loss_sod": 0.06469544023275375, - "loss_total": 0.6728143692016602, - "step": 63299 - }, - { - "epoch": 0.000598, - "loss_gen": 4.9514946937561035, - "loss_rtd": 0.351814329624176, - "loss_sent": 0.2730381488800049, - "loss_sod": 0.04776401072740555, - "loss_total": 0.6726164817810059, - "step": 63299 - }, - { - "epoch": 0.0006, - "grad_norm": 2.0112061500549316, - "learning_rate": 7.681839545248408e-05, - "loss": 0.6235, - "step": 63300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.555823802947998, - "loss_rtd": 0.3706224858760834, - "loss_sent": 0.09607721120119095, - "loss_sod": 0.0519188717007637, - "loss_total": 0.5186185836791992, - "step": 63399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.698625564575195, - "loss_rtd": 0.37373045086860657, - "loss_sent": 0.08867265284061432, - "loss_sod": 0.07897308468818665, - "loss_total": 0.5413761734962463, - "step": 63399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.7768045663833618, - "learning_rate": 7.679160768117875e-05, - "loss": 0.6233, - "step": 63400 - }, - { - "epoch": 0.000998, - "loss_gen": 3.7820255756378174, - "loss_rtd": 0.36425188183784485, - "loss_sent": 0.030947675928473473, - "loss_sod": 0.07450937479734421, - "loss_total": 0.46970894932746887, - "step": 63499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.292600154876709, - "loss_rtd": 0.3501809239387512, - "loss_sent": 0.12530605494976044, - "loss_sod": 0.07304050773382187, - "loss_total": 0.5485274791717529, - "step": 63499 - }, - { - "epoch": 0.001, - "grad_norm": 0.7421267628669739, - "learning_rate": 7.676480911821705e-05, - "loss": 0.6161, - "step": 63500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.7239179611206055, - "loss_rtd": 0.3634018301963806, - "loss_sent": 0.44304990768432617, - "loss_sod": 0.11866919696331024, - "loss_total": 0.9251209497451782, - "step": 63599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.428805828094482, - "loss_rtd": 0.3359716236591339, - "loss_sent": 0.5018662810325623, - "loss_sod": 0.07080628722906113, - "loss_total": 0.9086441993713379, - "step": 63599 - }, - { - "epoch": 0.0012, - "grad_norm": 2.007418394088745, - "learning_rate": 7.673799977439342e-05, - "loss": 0.6302, - "step": 63600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.5692524909973145, - "loss_rtd": 0.3591201603412628, - "loss_sent": 0.16385026276111603, - "loss_sod": 0.13043148815631866, - "loss_total": 0.6534019112586975, - "step": 63699 - }, - { - "epoch": 0.001398, - "loss_gen": 4.602705955505371, - "loss_rtd": 0.3531390130519867, - "loss_sent": 0.65873783826828, - "loss_sod": 0.0991470068693161, - "loss_total": 1.1110239028930664, - "step": 63699 - }, - { - "epoch": 0.0014, - "grad_norm": 3.1703555583953857, - "learning_rate": 7.671117966050669e-05, - "loss": 0.6213, - "step": 63700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.764593601226807, - "loss_rtd": 0.3667794466018677, - "loss_sent": 0.12115845829248428, - "loss_sod": 0.002873638179153204, - "loss_total": 0.4908115267753601, - "step": 63799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.480521202087402, - "loss_rtd": 0.349360853433609, - "loss_sent": 0.187759131193161, - "loss_sod": 0.05906372889876366, - "loss_total": 0.596183717250824, - "step": 63799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.0501720905303955, - "learning_rate": 7.668434878736e-05, - "loss": 0.6061, - "step": 63800 - }, - { - "epoch": 0.001798, - "loss_gen": 3.776412010192871, - "loss_rtd": 0.35311639308929443, - "loss_sent": 9.88018509815447e-05, - "loss_sod": 0.17744015157222748, - "loss_total": 0.5306553840637207, - "step": 63899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.243601322174072, - "loss_rtd": 0.3713495135307312, - "loss_sent": 0.12143199145793915, - "loss_sod": 0.06371863931417465, - "loss_total": 0.5565001368522644, - "step": 63899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.7900858521461487, - "learning_rate": 7.665750716576079e-05, - "loss": 0.628, - "step": 63900 - }, - { - "epoch": 0.001998, - "loss_gen": 4.197118759155273, - "loss_rtd": 0.3397866189479828, - "loss_sent": 0.1800970882177353, - "loss_sod": 0.05564101040363312, - "loss_total": 0.5755247473716736, - "step": 63999 - }, - { - "epoch": 0.001998, - "loss_gen": 4.659060478210449, - "loss_rtd": 0.36526429653167725, - "loss_sent": 0.20626680552959442, - "loss_sod": 0.043784722685813904, - "loss_total": 0.615315854549408, - "step": 63999 - }, - { - "epoch": 0.002, - "grad_norm": 1.2787045240402222, - "learning_rate": 7.66306548065209e-05, - "loss": 0.6301, - "step": 64000 - }, - { - "epoch": 0.002, - "eval_loss": 0.5960562229156494, - "eval_runtime": 154.0597, - "eval_samples_per_second": 100.24, - "eval_steps_per_second": 0.785, - "step": 64000 - }, - { - "epoch": 0.002198, - "loss_gen": 4.280486583709717, - "loss_rtd": 0.3794573247432709, - "loss_sent": 0.2574203610420227, - "loss_sod": 0.009277268312871456, - "loss_total": 0.6461549401283264, - "step": 64099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.851013660430908, - "loss_rtd": 0.356160968542099, - "loss_sent": 0.33749884366989136, - "loss_sod": 0.16863031685352325, - "loss_total": 0.8622901439666748, - "step": 64099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.2959072589874268, - "learning_rate": 7.660379172045642e-05, - "loss": 0.6318, - "step": 64100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.21964168548584, - "loss_rtd": 0.3674074709415436, - "loss_sent": 0.001419869135133922, - "loss_sod": 0.19931061565876007, - "loss_total": 0.5681379437446594, - "step": 64199 - }, - { - "epoch": 0.002398, - "loss_gen": 3.719330072402954, - "loss_rtd": 0.3511691093444824, - "loss_sent": 3.7132998841116205e-05, - "loss_sod": 0.19852951169013977, - "loss_total": 0.5497357249259949, - "step": 64199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.7420927882194519, - "learning_rate": 7.657691791838783e-05, - "loss": 0.6208, - "step": 64200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.600236415863037, - "loss_rtd": 0.34968018531799316, - "loss_sent": 0.24622157216072083, - "loss_sod": 0.04593350738286972, - "loss_total": 0.6418352723121643, - "step": 64299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.608397006988525, - "loss_rtd": 0.35003018379211426, - "loss_sent": 0.29476937651634216, - "loss_sod": 0.04235994815826416, - "loss_total": 0.687159538269043, - "step": 64299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.8296270370483398, - "learning_rate": 7.655003341113987e-05, - "loss": 0.6072, - "step": 64300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.674376487731934, - "loss_rtd": 0.35226747393608093, - "loss_sent": 0.1678522676229477, - "loss_sod": 0.06918580830097198, - "loss_total": 0.589305579662323, - "step": 64399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.135908603668213, - "loss_rtd": 0.3713374137878418, - "loss_sent": 0.06879798322916031, - "loss_sod": 0.18518736958503723, - "loss_total": 0.6253227591514587, - "step": 64399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.7070022821426392, - "learning_rate": 7.652313820954163e-05, - "loss": 0.6276, - "step": 64400 - }, - { - "epoch": 0.002998, - "loss_gen": 4.676963806152344, - "loss_rtd": 0.3604086637496948, - "loss_sent": 0.2059246301651001, - "loss_sod": 0.04678652435541153, - "loss_total": 0.6131198406219482, - "step": 64499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.29622220993042, - "loss_rtd": 0.37331321835517883, - "loss_sent": 0.20497316122055054, - "loss_sod": 0.015880735591053963, - "loss_total": 0.5941671133041382, - "step": 64499 - }, - { - "epoch": 0.003, - "grad_norm": 0.8023879528045654, - "learning_rate": 7.649623232442651e-05, - "loss": 0.6182, - "step": 64500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.412691593170166, - "loss_rtd": 0.37671002745628357, - "loss_sent": 0.10626865923404694, - "loss_sod": 0.011743715032935143, - "loss_total": 0.4947224259376526, - "step": 64599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.241797924041748, - "loss_rtd": 0.3560134172439575, - "loss_sent": 0.22045773267745972, - "loss_sod": 0.012969817966222763, - "loss_total": 0.5894409418106079, - "step": 64599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9714013934135437, - "learning_rate": 7.646931576663214e-05, - "loss": 0.6075, - "step": 64600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.545714378356934, - "loss_rtd": 0.3601056933403015, - "loss_sent": 0.3741488456726074, - "loss_sod": 0.010922128334641457, - "loss_total": 0.7451766729354858, - "step": 64699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.675589561462402, - "loss_rtd": 0.35947301983833313, - "loss_sent": 0.34950560331344604, - "loss_sod": 0.0069966865703463554, - "loss_total": 0.715975284576416, - "step": 64699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.301599144935608, - "learning_rate": 7.644238854700059e-05, - "loss": 0.6364, - "step": 64700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.894478797912598, - "loss_rtd": 0.3421265780925751, - "loss_sent": 0.271994948387146, - "loss_sod": 0.025406386703252792, - "loss_total": 0.6395279169082642, - "step": 64799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.560560703277588, - "loss_rtd": 0.3420966863632202, - "loss_sent": 0.23488765954971313, - "loss_sod": 0.08738589286804199, - "loss_total": 0.6643702387809753, - "step": 64799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.433797836303711, - "learning_rate": 7.641545067637806e-05, - "loss": 0.6162, - "step": 64800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.559416770935059, - "loss_rtd": 0.34533098340034485, - "loss_sent": 0.09186774492263794, - "loss_sod": 0.030342355370521545, - "loss_total": 0.4675410985946655, - "step": 64899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.012601852416992, - "loss_rtd": 0.36505126953125, - "loss_sent": 0.2887916564941406, - "loss_sod": 0.05366521328687668, - "loss_total": 0.7075081467628479, - "step": 64899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.9611746072769165, - "learning_rate": 7.638850216561518e-05, - "loss": 0.6303, - "step": 64900 - }, - { - "epoch": 0.003998, - "loss_gen": 4.474420070648193, - "loss_rtd": 0.3590322732925415, - "loss_sent": 0.11335202306509018, - "loss_sod": 0.018347179517149925, - "loss_total": 0.49073147773742676, - "step": 64999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.450010776519775, - "loss_rtd": 0.3534477949142456, - "loss_sent": 0.08659390360116959, - "loss_sod": 0.05938224121928215, - "loss_total": 0.49942392110824585, - "step": 64999 - }, - { - "epoch": 0.004, - "grad_norm": 0.5334826111793518, - "learning_rate": 7.636154302556676e-05, - "loss": 0.6099, - "step": 65000 - }, - { - "epoch": 0.004, - "eval_loss": 0.5954232811927795, - "eval_runtime": 151.2911, - "eval_samples_per_second": 102.075, - "eval_steps_per_second": 0.8, - "step": 65000 - }, - { - "epoch": 0.004198, - "loss_gen": 3.7600913047790527, - "loss_rtd": 0.35678860545158386, - "loss_sent": 0.00042318625492043793, - "loss_sod": 0.28913211822509766, - "loss_total": 0.6463439464569092, - "step": 65099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.273679733276367, - "loss_rtd": 0.3608115017414093, - "loss_sent": 0.07257717847824097, - "loss_sod": 0.1048695370554924, - "loss_total": 0.5382581949234009, - "step": 65099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.8813515305519104, - "learning_rate": 7.633457326709198e-05, - "loss": 0.6271, - "step": 65100 - }, - { - "epoch": 0.004398, - "loss_gen": 3.7220046520233154, - "loss_rtd": 0.3242909014225006, - "loss_sent": 0.0042475066147744656, - "loss_sod": 0.3289870619773865, - "loss_total": 0.6575254797935486, - "step": 65199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.362858295440674, - "loss_rtd": 0.33517253398895264, - "loss_sent": 0.1916651576757431, - "loss_sod": 0.04640581086277962, - "loss_total": 0.5732434988021851, - "step": 65199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.1218812465667725, - "learning_rate": 7.630759290105422e-05, - "loss": 0.6086, - "step": 65200 - }, - { - "epoch": 0.004598, - "loss_gen": 4.771598815917969, - "loss_rtd": 0.365702748298645, - "loss_sent": 0.22391684353351593, - "loss_sod": 0.1459675431251526, - "loss_total": 0.7355871200561523, - "step": 65299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.376883506774902, - "loss_rtd": 0.35041317343711853, - "loss_sent": 0.14419321715831757, - "loss_sod": 0.03334677591919899, - "loss_total": 0.5279531478881836, - "step": 65299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.9339886903762817, - "learning_rate": 7.62806019383212e-05, - "loss": 0.6084, - "step": 65300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.824034214019775, - "loss_rtd": 0.3548614978790283, - "loss_sent": 0.19835662841796875, - "loss_sod": 0.02828032895922661, - "loss_total": 0.5814984440803528, - "step": 65399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.8600592613220215, - "loss_rtd": 0.35373997688293457, - "loss_sent": 0.05740976706147194, - "loss_sod": 0.2584027945995331, - "loss_total": 0.6695525646209717, - "step": 65399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.6910472512245178, - "learning_rate": 7.625360038976486e-05, - "loss": 0.6054, - "step": 65400 - }, - { - "epoch": 0.004998, - "loss_gen": 3.788112163543701, - "loss_rtd": 0.3388946056365967, - "loss_sent": 0.000608363188803196, - "loss_sod": 0.2595179080963135, - "loss_total": 0.5990208983421326, - "step": 65499 - }, - { - "epoch": 0.004998, - "loss_gen": 3.8532238006591797, - "loss_rtd": 0.32595545053482056, - "loss_sent": 0.09683480858802795, - "loss_sod": 0.011593611910939217, - "loss_total": 0.4343838691711426, - "step": 65499 - }, - { - "epoch": 0.005, - "grad_norm": 1.0176689624786377, - "learning_rate": 7.622658826626144e-05, - "loss": 0.6187, - "step": 65500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.517738342285156, - "loss_rtd": 0.3676239252090454, - "loss_sent": 0.05257377773523331, - "loss_sod": 0.10262086987495422, - "loss_total": 0.5228185653686523, - "step": 65599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.969003677368164, - "loss_rtd": 0.3514210283756256, - "loss_sent": 0.264767050743103, - "loss_sod": 0.03280862420797348, - "loss_total": 0.6489967107772827, - "step": 65599 - }, - { - "epoch": 0.0052, - "grad_norm": 2.6527786254882812, - "learning_rate": 7.619956557869136e-05, - "loss": 0.6206, - "step": 65600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.2397847175598145, - "loss_rtd": 0.3516516089439392, - "loss_sent": 8.085014269454405e-05, - "loss_sod": 0.2125997245311737, - "loss_total": 0.564332127571106, - "step": 65699 - }, - { - "epoch": 0.005398, - "loss_gen": 3.795886278152466, - "loss_rtd": 0.3487846553325653, - "loss_sent": 0.00012464348401408643, - "loss_sod": 0.19986745715141296, - "loss_total": 0.5487767457962036, - "step": 65699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.0295031070709229, - "learning_rate": 7.617253233793944e-05, - "loss": 0.6183, - "step": 65700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.007923126220703, - "loss_rtd": 0.36578112840652466, - "loss_sent": 0.0788542702794075, - "loss_sod": 0.06385549157857895, - "loss_total": 0.5084909200668335, - "step": 65799 - }, - { - "epoch": 0.005598, - "loss_gen": 4.588578701019287, - "loss_rtd": 0.3535233438014984, - "loss_sent": 0.31957945227622986, - "loss_sod": 0.09898321330547333, - "loss_total": 0.7720860242843628, - "step": 65799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.2556023597717285, - "learning_rate": 7.61454885548946e-05, - "loss": 0.6096, - "step": 65800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.651038646697998, - "loss_rtd": 0.34410956501960754, - "loss_sent": 0.0990416631102562, - "loss_sod": 0.040140315890312195, - "loss_total": 0.48329153656959534, - "step": 65899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.929686546325684, - "loss_rtd": 0.36061891913414, - "loss_sent": 0.2854152023792267, - "loss_sod": 0.02335675247013569, - "loss_total": 0.6693909168243408, - "step": 65899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.4057626724243164, - "learning_rate": 7.611843424045011e-05, - "loss": 0.61, - "step": 65900 - }, - { - "epoch": 0.005998, - "loss_gen": 3.417036294937134, - "loss_rtd": 0.3334285616874695, - "loss_sent": 0.049593642354011536, - "loss_sod": 0.06800346076488495, - "loss_total": 0.45102566480636597, - "step": 65999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.139853000640869, - "loss_rtd": 0.34877824783325195, - "loss_sent": 0.2039397805929184, - "loss_sod": 0.17971526086330414, - "loss_total": 0.7324333190917969, - "step": 65999 - }, - { - "epoch": 0.006, - "grad_norm": 0.6943939328193665, - "learning_rate": 7.609136940550343e-05, - "loss": 0.6335, - "step": 66000 - }, - { - "epoch": 0.006, - "eval_loss": 0.5987781882286072, - "eval_runtime": 152.7139, - "eval_samples_per_second": 101.124, - "eval_steps_per_second": 0.792, - "step": 66000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.310494422912598, - "loss_rtd": 0.35313376784324646, - "loss_sent": 0.11600424349308014, - "loss_sod": 0.010630443692207336, - "loss_total": 0.47976845502853394, - "step": 66099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.730156898498535, - "loss_rtd": 0.3578748106956482, - "loss_sent": 0.10235317796468735, - "loss_sod": 0.051168106496334076, - "loss_total": 0.5113961100578308, - "step": 66099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.5719876885414124, - "learning_rate": 7.606429406095626e-05, - "loss": 0.6179, - "step": 66100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.188300609588623, - "loss_rtd": 0.3529491126537323, - "loss_sent": 0.16361874341964722, - "loss_sod": 0.0830625593662262, - "loss_total": 0.5996304154396057, - "step": 66199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.628381729125977, - "loss_rtd": 0.35839518904685974, - "loss_sent": 0.0803244486451149, - "loss_sod": 0.04144023358821869, - "loss_total": 0.48015984892845154, - "step": 66199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.5547677874565125, - "learning_rate": 7.603720821771457e-05, - "loss": 0.6283, - "step": 66200 - }, - { - "epoch": 0.006598, - "loss_gen": 4.736541748046875, - "loss_rtd": 0.35284411907196045, - "loss_sent": 0.1723390817642212, - "loss_sod": 0.11351539194583893, - "loss_total": 0.6386985778808594, - "step": 66299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.497254848480225, - "loss_rtd": 0.34798967838287354, - "loss_sent": 0.264619380235672, - "loss_sod": 0.04229599982500076, - "loss_total": 0.6549050807952881, - "step": 66299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.5063583850860596, - "learning_rate": 7.601011188668851e-05, - "loss": 0.6046, - "step": 66300 - }, - { - "epoch": 0.006798, - "loss_gen": 4.981752872467041, - "loss_rtd": 0.3534925878047943, - "loss_sent": 0.18722547590732574, - "loss_sod": 0.11359981447458267, - "loss_total": 0.6543178558349609, - "step": 66399 - }, - { - "epoch": 0.006798, - "loss_gen": 4.6029462814331055, - "loss_rtd": 0.3494454622268677, - "loss_sent": 0.23802660405635834, - "loss_sod": 0.14635378122329712, - "loss_total": 0.7338258624076843, - "step": 66399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.9086915254592896, - "learning_rate": 7.598300507879252e-05, - "loss": 0.6134, - "step": 66400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.720383167266846, - "loss_rtd": 0.3627254366874695, - "loss_sent": 0.2635696828365326, - "loss_sod": 0.09105813503265381, - "loss_total": 0.7173532247543335, - "step": 66499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.396498680114746, - "loss_rtd": 0.368895024061203, - "loss_sent": 0.16009920835494995, - "loss_sod": 0.12825074791908264, - "loss_total": 0.6572449803352356, - "step": 66499 - }, - { - "epoch": 0.007, - "grad_norm": 0.8973502516746521, - "learning_rate": 7.595588780494517e-05, - "loss": 0.6199, - "step": 66500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.397541522979736, - "loss_rtd": 0.35902175307273865, - "loss_sent": 0.31910210847854614, - "loss_sod": 0.09253882616758347, - "loss_total": 0.7706626653671265, - "step": 66599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.694241046905518, - "loss_rtd": 0.3520123362541199, - "loss_sent": 0.3550555109977722, - "loss_sod": 0.04164835065603256, - "loss_total": 0.7487162351608276, - "step": 66599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.9792577624320984, - "learning_rate": 7.592876007606933e-05, - "loss": 0.6145, - "step": 66600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.712134838104248, - "loss_rtd": 0.35215991735458374, - "loss_sent": 0.07199952751398087, - "loss_sod": 0.05022723227739334, - "loss_total": 0.47438666224479675, - "step": 66699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.361044406890869, - "loss_rtd": 0.3654535114765167, - "loss_sent": 0.23767469823360443, - "loss_sod": 0.08877260237932205, - "loss_total": 0.6919007897377014, - "step": 66699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.9028485417366028, - "learning_rate": 7.590162190309202e-05, - "loss": 0.6217, - "step": 66700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.741976261138916, - "loss_rtd": 0.3726173937320709, - "loss_sent": 0.11401088535785675, - "loss_sod": 0.08064589649438858, - "loss_total": 0.5672741532325745, - "step": 66799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.381463050842285, - "loss_rtd": 0.36060744524002075, - "loss_sent": 0.1998521238565445, - "loss_sod": 0.011398155242204666, - "loss_total": 0.571857750415802, - "step": 66799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.2918533086776733, - "learning_rate": 7.587447329694451e-05, - "loss": 0.6049, - "step": 66800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.1675333976745605, - "loss_rtd": 0.36102527379989624, - "loss_sent": 0.19719304144382477, - "loss_sod": 0.014241417869925499, - "loss_total": 0.5724597573280334, - "step": 66899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.874257564544678, - "loss_rtd": 0.36321237683296204, - "loss_sent": 0.24794764816761017, - "loss_sod": 0.021412856876850128, - "loss_total": 0.6325728893280029, - "step": 66899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.7860808372497559, - "learning_rate": 7.584731426856226e-05, - "loss": 0.6226, - "step": 66900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.2689924240112305, - "loss_rtd": 0.35132095217704773, - "loss_sent": 0.10176771879196167, - "loss_sod": 0.020253252238035202, - "loss_total": 0.4733419120311737, - "step": 66999 - }, - { - "epoch": 0.007998, - "loss_gen": 4.7362236976623535, - "loss_rtd": 0.3590451180934906, - "loss_sent": 0.4144623577594757, - "loss_sod": 0.017209095880389214, - "loss_total": 0.7907165288925171, - "step": 66999 - }, - { - "epoch": 0.008, - "grad_norm": 0.8132656216621399, - "learning_rate": 7.58201448288849e-05, - "loss": 0.6109, - "step": 67000 - }, - { - "epoch": 0.008, - "eval_loss": 0.594206690788269, - "eval_runtime": 151.5032, - "eval_samples_per_second": 101.932, - "eval_steps_per_second": 0.799, - "step": 67000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.255455493927002, - "loss_rtd": 0.3566700220108032, - "loss_sent": 0.06086091697216034, - "loss_sod": 0.1735401749610901, - "loss_total": 0.5910711288452148, - "step": 67099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.768855571746826, - "loss_rtd": 0.3584344685077667, - "loss_sent": 0.263040155172348, - "loss_sod": 0.08959699422121048, - "loss_total": 0.7110716104507446, - "step": 67099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.561359167098999, - "learning_rate": 7.579296498885629e-05, - "loss": 0.6237, - "step": 67100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.59312105178833, - "loss_rtd": 0.3655880391597748, - "loss_sent": 0.2877505421638489, - "loss_sod": 0.05011430382728577, - "loss_total": 0.7034528851509094, - "step": 67199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.395310878753662, - "loss_rtd": 0.333885133266449, - "loss_sent": 0.39168989658355713, - "loss_sod": 0.038761869072914124, - "loss_total": 0.764336884021759, - "step": 67199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.665055751800537, - "learning_rate": 7.576577475942447e-05, - "loss": 0.6257, - "step": 67200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.306066513061523, - "loss_rtd": 0.34404680132865906, - "loss_sent": 0.0048809596337378025, - "loss_sod": 0.2033056765794754, - "loss_total": 0.5522334575653076, - "step": 67299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.293673038482666, - "loss_rtd": 0.3350405693054199, - "loss_sent": 0.07155311852693558, - "loss_sod": 0.041877102106809616, - "loss_total": 0.4484707713127136, - "step": 67299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.913288414478302, - "learning_rate": 7.573857415154166e-05, - "loss": 0.617, - "step": 67300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.843385219573975, - "loss_rtd": 0.34204018115997314, - "loss_sent": 0.14883247017860413, - "loss_sod": 0.06675544381141663, - "loss_total": 0.5576280951499939, - "step": 67399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.128697395324707, - "loss_rtd": 0.36374631524086, - "loss_sent": 0.29251644015312195, - "loss_sod": 0.06548918783664703, - "loss_total": 0.7217519283294678, - "step": 67399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.7872377634048462, - "learning_rate": 7.571136317616425e-05, - "loss": 0.6271, - "step": 67400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.786539077758789, - "loss_rtd": 0.3563820421695709, - "loss_sent": 0.42853304743766785, - "loss_sod": 0.054787375032901764, - "loss_total": 0.8397024869918823, - "step": 67499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.070417881011963, - "loss_rtd": 0.33073729276657104, - "loss_sent": 0.16601036489009857, - "loss_sod": 0.06587125360965729, - "loss_total": 0.5626189112663269, - "step": 67499 - }, - { - "epoch": 0.009, - "grad_norm": 1.2672414779663086, - "learning_rate": 7.568414184425283e-05, - "loss": 0.6094, - "step": 67500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.705150127410889, - "loss_rtd": 0.3587769865989685, - "loss_sent": 0.09281128644943237, - "loss_sod": 0.06759106367826462, - "loss_total": 0.5191793441772461, - "step": 67599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.746580123901367, - "loss_rtd": 0.35041725635528564, - "loss_sent": 0.31421762704849243, - "loss_sod": 0.026292871683835983, - "loss_total": 0.6909277439117432, - "step": 67599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.3968737125396729, - "learning_rate": 7.565691016677216e-05, - "loss": 0.6102, - "step": 67600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.4377241134643555, - "loss_rtd": 0.3586418032646179, - "loss_sent": 0.2569618821144104, - "loss_sod": 0.06580895185470581, - "loss_total": 0.6814126372337341, - "step": 67699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.565977096557617, - "loss_rtd": 0.34834152460098267, - "loss_sent": 0.3570059537887573, - "loss_sod": 0.06186792254447937, - "loss_total": 0.767215371131897, - "step": 67699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.1630606651306152, - "learning_rate": 7.562966815469114e-05, - "loss": 0.6205, - "step": 67700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.7827911376953125, - "loss_rtd": 0.34308144450187683, - "loss_sent": 0.1217210441827774, - "loss_sod": 0.11103591322898865, - "loss_total": 0.5758383870124817, - "step": 67799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.414473056793213, - "loss_rtd": 0.3497660160064697, - "loss_sent": 0.23417848348617554, - "loss_sod": 0.017716476693749428, - "loss_total": 0.601660966873169, - "step": 67799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.7495279312133789, - "learning_rate": 7.560241581898284e-05, - "loss": 0.62, - "step": 67800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.713752269744873, - "loss_rtd": 0.3629918694496155, - "loss_sent": 0.19341717660427094, - "loss_sod": 0.04620201140642166, - "loss_total": 0.6026110649108887, - "step": 67899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.554504871368408, - "loss_rtd": 0.3645625114440918, - "loss_sent": 0.19659636914730072, - "loss_sod": 0.025591988116502762, - "loss_total": 0.586750864982605, - "step": 67899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.5260523557662964, - "learning_rate": 7.55751531706245e-05, - "loss": 0.6138, - "step": 67900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.0989251136779785, - "loss_rtd": 0.35533761978149414, - "loss_sent": 0.09594675153493881, - "loss_sod": 0.036903925240039825, - "loss_total": 0.4881882965564728, - "step": 67999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.533815383911133, - "loss_rtd": 0.3532601296901703, - "loss_sent": 0.3286660611629486, - "loss_sod": 0.1737453192472458, - "loss_total": 0.8556715250015259, - "step": 67999 - }, - { - "epoch": 0.01, - "grad_norm": 0.9284948110580444, - "learning_rate": 7.554788022059757e-05, - "loss": 0.617, - "step": 68000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5927685499191284, - "eval_runtime": 151.3074, - "eval_samples_per_second": 102.064, - "eval_steps_per_second": 0.8, - "step": 68000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.1164350509643555, - "loss_rtd": 0.337686151266098, - "loss_sent": 0.08159107714891434, - "loss_sod": 0.09356731176376343, - "loss_total": 0.5128445625305176, - "step": 68099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.555169582366943, - "loss_rtd": 0.35074982047080994, - "loss_sent": 0.1417180746793747, - "loss_sod": 0.10723769664764404, - "loss_total": 0.5997055768966675, - "step": 68099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.1322089433670044, - "learning_rate": 7.55205969798875e-05, - "loss": 0.6282, - "step": 68100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.946879863739014, - "loss_rtd": 0.3475859761238098, - "loss_sent": 0.19958215951919556, - "loss_sod": 0.15003390610218048, - "loss_total": 0.6972020268440247, - "step": 68199 - }, - { - "epoch": 0.010398, - "loss_gen": 4.611652851104736, - "loss_rtd": 0.3355503976345062, - "loss_sent": 0.20881244540214539, - "loss_sod": 0.006465624086558819, - "loss_total": 0.5508284568786621, - "step": 68199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.731116771697998, - "learning_rate": 7.549330345948403e-05, - "loss": 0.625, - "step": 68200 - }, - { - "epoch": 0.010598, - "loss_gen": 3.7777929306030273, - "loss_rtd": 0.33989417552948, - "loss_sent": 0.0103762187063694, - "loss_sod": 0.15077877044677734, - "loss_total": 0.5010491609573364, - "step": 68299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.680449485778809, - "loss_rtd": 0.35290950536727905, - "loss_sent": 0.21351496875286102, - "loss_sod": 0.012890908867120743, - "loss_total": 0.5793153643608093, - "step": 68299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9339681267738342, - "learning_rate": 7.546599967038098e-05, - "loss": 0.6124, - "step": 68300 - }, - { - "epoch": 0.010798, - "loss_gen": 3.854318618774414, - "loss_rtd": 0.34512147307395935, - "loss_sent": 4.1809904359979555e-05, - "loss_sod": 0.1997881531715393, - "loss_total": 0.5449513792991638, - "step": 68399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.7505669593811035, - "loss_rtd": 0.3413781523704529, - "loss_sent": 0.12651513516902924, - "loss_sod": 0.04629211872816086, - "loss_total": 0.5141854286193848, - "step": 68399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.1004862785339355, - "learning_rate": 7.54386856235763e-05, - "loss": 0.5988, - "step": 68400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.606143951416016, - "loss_rtd": 0.34316402673721313, - "loss_sent": 0.018881957978010178, - "loss_sod": 0.1613437682390213, - "loss_total": 0.5233897566795349, - "step": 68499 - }, - { - "epoch": 0.010998, - "loss_gen": 3.749549388885498, - "loss_rtd": 0.32255205512046814, - "loss_sent": 0.00953242089599371, - "loss_sod": 0.08570539951324463, - "loss_total": 0.41778987646102905, - "step": 68499 - }, - { - "epoch": 0.011, - "grad_norm": 0.9866381883621216, - "learning_rate": 7.54113613300721e-05, - "loss": 0.6168, - "step": 68500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.0278639793396, - "loss_rtd": 0.35034534335136414, - "loss_sent": 6.925057823536918e-05, - "loss_sod": 0.3125148415565491, - "loss_total": 0.6629294157028198, - "step": 68599 - }, - { - "epoch": 0.011198, - "loss_gen": 3.742182970046997, - "loss_rtd": 0.3397414982318878, - "loss_sent": 5.0243801524629816e-05, - "loss_sod": 0.13007307052612305, - "loss_total": 0.4698648154735565, - "step": 68599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.2500969171524048, - "learning_rate": 7.53840268008746e-05, - "loss": 0.6263, - "step": 68600 - }, - { - "epoch": 0.011398, - "loss_gen": 4.967545032501221, - "loss_rtd": 0.35615482926368713, - "loss_sent": 0.237074613571167, - "loss_sod": 0.04892565682530403, - "loss_total": 0.642155110836029, - "step": 68699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.54293966293335, - "loss_rtd": 0.3437325358390808, - "loss_sent": 0.20231592655181885, - "loss_sod": 0.02160598337650299, - "loss_total": 0.5676544308662415, - "step": 68699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.12941575050354, - "learning_rate": 7.535668204699413e-05, - "loss": 0.6202, - "step": 68700 - }, - { - "epoch": 0.011598, - "loss_gen": 4.625636577606201, - "loss_rtd": 0.35658740997314453, - "loss_sent": 0.22827327251434326, - "loss_sod": 0.02449220046401024, - "loss_total": 0.6093528866767883, - "step": 68799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.631955146789551, - "loss_rtd": 0.34243470430374146, - "loss_sent": 0.13356204330921173, - "loss_sod": 0.0317053347826004, - "loss_total": 0.5077020525932312, - "step": 68799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.6647234559059143, - "learning_rate": 7.532932707944515e-05, - "loss": 0.6109, - "step": 68800 - }, - { - "epoch": 0.011798, - "loss_gen": 4.35915470123291, - "loss_rtd": 0.34421539306640625, - "loss_sent": 0.012710307724773884, - "loss_sod": 0.1049477681517601, - "loss_total": 0.46187347173690796, - "step": 68899 - }, - { - "epoch": 0.011798, - "loss_gen": 3.6791775226593018, - "loss_rtd": 0.34611740708351135, - "loss_sent": 0.0006580319022759795, - "loss_sod": 0.23809869587421417, - "loss_total": 0.584874153137207, - "step": 68899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.886480450630188, - "learning_rate": 7.530196190924628e-05, - "loss": 0.6084, - "step": 68900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.0167646408081055, - "loss_rtd": 0.340043306350708, - "loss_sent": 0.11419618874788284, - "loss_sod": 0.08260629326105118, - "loss_total": 0.5368458032608032, - "step": 68999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.605703353881836, - "loss_rtd": 0.3495825529098511, - "loss_sent": 0.27643582224845886, - "loss_sod": 0.0530291423201561, - "loss_total": 0.6790475249290466, - "step": 68999 - }, - { - "epoch": 0.012, - "grad_norm": 1.7575035095214844, - "learning_rate": 7.527458654742017e-05, - "loss": 0.6009, - "step": 69000 - }, - { - "epoch": 0.012, - "eval_loss": 0.5909610986709595, - "eval_runtime": 151.308, - "eval_samples_per_second": 102.063, - "eval_steps_per_second": 0.8, - "step": 69000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.608206748962402, - "loss_rtd": 0.36003121733665466, - "loss_sent": 0.14249297976493835, - "loss_sod": 0.035286448895931244, - "loss_total": 0.5378106832504272, - "step": 69099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.639560699462891, - "loss_rtd": 0.3554588556289673, - "loss_sent": 0.27541911602020264, - "loss_sod": 0.0017712387489154935, - "loss_total": 0.6326491832733154, - "step": 69099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.5627210736274719, - "learning_rate": 7.524720100499361e-05, - "loss": 0.6167, - "step": 69100 - }, - { - "epoch": 0.012398, - "loss_gen": 4.804170608520508, - "loss_rtd": 0.3510792553424835, - "loss_sent": 0.11709165573120117, - "loss_sod": 0.09820735454559326, - "loss_total": 0.5663782358169556, - "step": 69199 - }, - { - "epoch": 0.012398, - "loss_gen": 4.468132972717285, - "loss_rtd": 0.34883058071136475, - "loss_sent": 0.31851422786712646, - "loss_sod": 0.024485470727086067, - "loss_total": 0.6918302774429321, - "step": 69199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.4534741640090942, - "learning_rate": 7.52198052929975e-05, - "loss": 0.6186, - "step": 69200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.745293617248535, - "loss_rtd": 0.36391720175743103, - "loss_sent": 0.5158696174621582, - "loss_sod": 0.005640673916786909, - "loss_total": 0.8854274749755859, - "step": 69299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.706986904144287, - "loss_rtd": 0.36713218688964844, - "loss_sent": 0.17896877229213715, - "loss_sod": 0.018621649593114853, - "loss_total": 0.5647225975990295, - "step": 69299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.0246957540512085, - "learning_rate": 7.519239942246686e-05, - "loss": 0.617, - "step": 69300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.811370849609375, - "loss_rtd": 0.35900723934173584, - "loss_sent": 0.26550769805908203, - "loss_sod": 0.06882263720035553, - "loss_total": 0.6933375597000122, - "step": 69399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.900637149810791, - "loss_rtd": 0.343448668718338, - "loss_sent": 0.33858102560043335, - "loss_sod": 0.023398658260703087, - "loss_total": 0.7054283618927002, - "step": 69399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.4369068145751953, - "learning_rate": 7.516498340444071e-05, - "loss": 0.6112, - "step": 69400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.447149276733398, - "loss_rtd": 0.35762926936149597, - "loss_sent": 0.10391967743635178, - "loss_sod": 0.006824937183409929, - "loss_total": 0.4683738946914673, - "step": 69499 - }, - { - "epoch": 0.012998, - "loss_gen": 3.9780890941619873, - "loss_rtd": 0.3181975483894348, - "loss_sent": 0.034344203770160675, - "loss_sod": 0.05337222293019295, - "loss_total": 0.40591397881507874, - "step": 69499 - }, - { - "epoch": 0.013, - "grad_norm": 0.8781808614730835, - "learning_rate": 7.51375572499623e-05, - "loss": 0.607, - "step": 69500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.7805328369140625, - "loss_rtd": 0.37241312861442566, - "loss_sent": 0.1631665676832199, - "loss_sod": 0.024414723739027977, - "loss_total": 0.5599943995475769, - "step": 69599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.68084192276001, - "loss_rtd": 0.3549376428127289, - "loss_sent": 0.13597002625465393, - "loss_sod": 0.15555113554000854, - "loss_total": 0.6464587450027466, - "step": 69599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0803793668746948, - "learning_rate": 7.51101209700788e-05, - "loss": 0.6164, - "step": 69600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.543577194213867, - "loss_rtd": 0.3576522171497345, - "loss_sent": 0.049495793879032135, - "loss_sod": 0.18203045427799225, - "loss_total": 0.5891785025596619, - "step": 69699 - }, - { - "epoch": 0.013398, - "loss_gen": 3.8307535648345947, - "loss_rtd": 0.32948005199432373, - "loss_sent": 0.06662531197071075, - "loss_sod": 0.12097512930631638, - "loss_total": 0.5170804858207703, - "step": 69699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.8176936507225037, - "learning_rate": 7.50826745758416e-05, - "loss": 0.614, - "step": 69700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.719297885894775, - "loss_rtd": 0.3517528474330902, - "loss_sent": 0.08722758293151855, - "loss_sod": 0.08633407205343246, - "loss_total": 0.5253145098686218, - "step": 69799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.140933513641357, - "loss_rtd": 0.35081884264945984, - "loss_sent": 0.041073914617300034, - "loss_sod": 0.15672434866428375, - "loss_total": 0.5486171245574951, - "step": 69799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.0456980466842651, - "learning_rate": 7.505521807830604e-05, - "loss": 0.6383, - "step": 69800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.080830097198486, - "loss_rtd": 0.350803017616272, - "loss_sent": 0.3331204652786255, - "loss_sod": 0.020194532349705696, - "loss_total": 0.704118013381958, - "step": 69899 - }, - { - "epoch": 0.013798, - "loss_gen": 4.873362064361572, - "loss_rtd": 0.3467556834220886, - "loss_sent": 0.07918145507574081, - "loss_sod": 0.06439277529716492, - "loss_total": 0.49032992124557495, - "step": 69899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.6474930644035339, - "learning_rate": 7.502775148853167e-05, - "loss": 0.6277, - "step": 69900 - }, - { - "epoch": 0.013998, - "loss_gen": 4.525168418884277, - "loss_rtd": 0.3627609610557556, - "loss_sent": 0.5461922287940979, - "loss_sod": 0.02388431876897812, - "loss_total": 0.9328374862670898, - "step": 69999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.732813835144043, - "loss_rtd": 0.35742875933647156, - "loss_sent": 0.22035260498523712, - "loss_sod": 0.043894391506910324, - "loss_total": 0.6216757297515869, - "step": 69999 - }, - { - "epoch": 0.014, - "grad_norm": 1.2363808155059814, - "learning_rate": 7.5000274817582e-05, - "loss": 0.6089, - "step": 70000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5933198928833008, - "eval_runtime": 151.4068, - "eval_samples_per_second": 101.997, - "eval_steps_per_second": 0.799, - "step": 70000 - }, - { - "epoch": 0.014198, - "loss_gen": 3.4616024494171143, - "loss_rtd": 0.3362026512622833, - "loss_sent": 0.0014539293479174376, - "loss_sod": 0.2485821694135666, - "loss_total": 0.5862387418746948, - "step": 70099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.8870673179626465, - "loss_rtd": 0.3374359607696533, - "loss_sent": 0.27305692434310913, - "loss_sod": 0.07673107087612152, - "loss_total": 0.6872239708900452, - "step": 70099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.3238862752914429, - "learning_rate": 7.49727880765246e-05, - "loss": 0.6184, - "step": 70100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.444095134735107, - "loss_rtd": 0.3639775514602661, - "loss_sent": 0.2643648386001587, - "loss_sod": 0.01581292226910591, - "loss_total": 0.6441553235054016, - "step": 70199 - }, - { - "epoch": 0.014398, - "loss_gen": 4.8016886711120605, - "loss_rtd": 0.347025603055954, - "loss_sent": 0.12922294437885284, - "loss_sod": 0.058164454996585846, - "loss_total": 0.5344129800796509, - "step": 70199 - }, - { - "epoch": 0.0144, - "grad_norm": 2.101402759552002, - "learning_rate": 7.494529127643116e-05, - "loss": 0.6069, - "step": 70200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.9519267082214355, - "loss_rtd": 0.36667752265930176, - "loss_sent": 0.15208236873149872, - "loss_sod": 0.10472363233566284, - "loss_total": 0.6234835386276245, - "step": 70299 - }, - { - "epoch": 0.014598, - "loss_gen": 4.56881046295166, - "loss_rtd": 0.3670293390750885, - "loss_sent": 0.20608092844486237, - "loss_sod": 0.00635911151766777, - "loss_total": 0.5794693827629089, - "step": 70299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.5491467714309692, - "learning_rate": 7.491778442837737e-05, - "loss": 0.6247, - "step": 70300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.125654220581055, - "loss_rtd": 0.34238919615745544, - "loss_sent": 0.02770579606294632, - "loss_sod": 0.07730180770158768, - "loss_total": 0.44739678502082825, - "step": 70399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.639283180236816, - "loss_rtd": 0.3573998510837555, - "loss_sent": 0.004309282172471285, - "loss_sod": 0.395590603351593, - "loss_total": 0.7572997808456421, - "step": 70399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.4109572172164917, - "learning_rate": 7.4890267543443e-05, - "loss": 0.62, - "step": 70400 - }, - { - "epoch": 0.014998, - "loss_gen": 3.8224353790283203, - "loss_rtd": 0.31847918033599854, - "loss_sent": 0.08672991394996643, - "loss_sod": 0.08530547469854355, - "loss_total": 0.4905145764350891, - "step": 70499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.5329508781433105, - "loss_rtd": 0.35414499044418335, - "loss_sent": 0.050933536142110825, - "loss_sod": 0.12525150179862976, - "loss_total": 0.5303300023078918, - "step": 70499 - }, - { - "epoch": 0.015, - "grad_norm": 0.8013997673988342, - "learning_rate": 7.486274063271183e-05, - "loss": 0.6037, - "step": 70500 - }, - { - "epoch": 0.015198, - "loss_gen": 3.8157787322998047, - "loss_rtd": 0.32829922437667847, - "loss_sent": 0.002050962997600436, - "loss_sod": 0.1017485186457634, - "loss_total": 0.43209871649742126, - "step": 70599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.587977886199951, - "loss_rtd": 0.3350503146648407, - "loss_sent": 0.21401993930339813, - "loss_sod": 0.0497254952788353, - "loss_total": 0.5987957715988159, - "step": 70599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.8044083118438721, - "learning_rate": 7.483520370727171e-05, - "loss": 0.6199, - "step": 70600 - }, - { - "epoch": 0.015398, - "loss_gen": 4.2190842628479, - "loss_rtd": 0.35025885701179504, - "loss_sent": 0.06589227169752121, - "loss_sod": 0.11934252083301544, - "loss_total": 0.5354936718940735, - "step": 70699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.106986045837402, - "loss_rtd": 0.34259411692619324, - "loss_sent": 0.024853527545928955, - "loss_sod": 0.08085648715496063, - "loss_total": 0.44830411672592163, - "step": 70699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.6379954218864441, - "learning_rate": 7.480765677821451e-05, - "loss": 0.6126, - "step": 70700 - }, - { - "epoch": 0.015598, - "loss_gen": 4.338307857513428, - "loss_rtd": 0.36540743708610535, - "loss_sent": 0.28503185510635376, - "loss_sod": 0.009250342845916748, - "loss_total": 0.6596896648406982, - "step": 70799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.614833831787109, - "loss_rtd": 0.3449106216430664, - "loss_sent": 0.15405747294425964, - "loss_sod": 0.1161065399646759, - "loss_total": 0.615074634552002, - "step": 70799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.7099796533584595, - "learning_rate": 7.478009985663613e-05, - "loss": 0.6094, - "step": 70800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.494375705718994, - "loss_rtd": 0.3482241630554199, - "loss_sent": 0.11123603582382202, - "loss_sod": 0.03280208632349968, - "loss_total": 0.4922622740268707, - "step": 70899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.529008865356445, - "loss_rtd": 0.3492968678474426, - "loss_sent": 0.25559794902801514, - "loss_sod": 0.06588475406169891, - "loss_total": 0.6707795858383179, - "step": 70899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.142425298690796, - "learning_rate": 7.475253295363648e-05, - "loss": 0.6302, - "step": 70900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.294867515563965, - "loss_rtd": 0.33559533953666687, - "loss_sent": 0.027024684473872185, - "loss_sod": 0.20596477389335632, - "loss_total": 0.5685847997665405, - "step": 70999 - }, - { - "epoch": 0.015998, - "loss_gen": 4.484335422515869, - "loss_rtd": 0.3317863345146179, - "loss_sent": 0.24554044008255005, - "loss_sod": 0.043406642973423004, - "loss_total": 0.620733380317688, - "step": 70999 - }, - { - "epoch": 0.016, - "grad_norm": 1.1793019771575928, - "learning_rate": 7.472495608031953e-05, - "loss": 0.6204, - "step": 71000 - }, - { - "epoch": 0.016, - "eval_loss": 0.5913823843002319, - "eval_runtime": 151.6947, - "eval_samples_per_second": 101.803, - "eval_steps_per_second": 0.798, - "step": 71000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.870697975158691, - "loss_rtd": 0.36172083020210266, - "loss_sent": 0.13929325342178345, - "loss_sod": 0.028338346630334854, - "loss_total": 0.5293524265289307, - "step": 71099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.943262577056885, - "loss_rtd": 0.34843337535858154, - "loss_sent": 0.1354500949382782, - "loss_sod": 0.0597839392721653, - "loss_total": 0.5436674356460571, - "step": 71099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.7821417450904846, - "learning_rate": 7.469736924779324e-05, - "loss": 0.6059, - "step": 71100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.828202724456787, - "loss_rtd": 0.35496997833251953, - "loss_sent": 0.1153426542878151, - "loss_sod": 0.108072429895401, - "loss_total": 0.578385055065155, - "step": 71199 - }, - { - "epoch": 0.016398, - "loss_gen": 3.7728540897369385, - "loss_rtd": 0.3343942165374756, - "loss_sent": 0.00378451868891716, - "loss_sod": 0.1482536345720291, - "loss_total": 0.48643234372138977, - "step": 71199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.8646527528762817, - "learning_rate": 7.466977246716955e-05, - "loss": 0.6138, - "step": 71200 - }, - { - "epoch": 0.016598, - "loss_gen": 4.1918864250183105, - "loss_rtd": 0.34998562932014465, - "loss_sent": 0.03945707529783249, - "loss_sod": 0.2179742455482483, - "loss_total": 0.6074169874191284, - "step": 71299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.605656147003174, - "loss_rtd": 0.35405847430229187, - "loss_sent": 0.3797968626022339, - "loss_sod": 0.0821184515953064, - "loss_total": 0.8159737586975098, - "step": 71299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.2149840593338013, - "learning_rate": 7.464216574956446e-05, - "loss": 0.5955, - "step": 71300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.070539474487305, - "loss_rtd": 0.35759761929512024, - "loss_sent": 0.24154625833034515, - "loss_sod": 0.07218272984027863, - "loss_total": 0.6713266372680664, - "step": 71399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.95283842086792, - "loss_rtd": 0.3476433753967285, - "loss_sent": 0.22640782594680786, - "loss_sod": 0.018956400454044342, - "loss_total": 0.5930075645446777, - "step": 71399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.3016432523727417, - "learning_rate": 7.461454910609795e-05, - "loss": 0.6202, - "step": 71400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.5588812828063965, - "loss_rtd": 0.34922486543655396, - "loss_sent": 0.11128672957420349, - "loss_sod": 0.12964019179344177, - "loss_total": 0.5901517868041992, - "step": 71499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.221651554107666, - "loss_rtd": 0.3547467589378357, - "loss_sent": 0.3745213449001312, - "loss_sod": 0.08372054249048233, - "loss_total": 0.8129886388778687, - "step": 71499 - }, - { - "epoch": 0.017, - "grad_norm": 1.3753210306167603, - "learning_rate": 7.458692254789401e-05, - "loss": 0.6123, - "step": 71500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.223153114318848, - "loss_rtd": 0.34834831953048706, - "loss_sent": 0.04446294903755188, - "loss_sod": 0.20041140913963318, - "loss_total": 0.5932226181030273, - "step": 71599 - }, - { - "epoch": 0.017198, - "loss_gen": 3.967151165008545, - "loss_rtd": 0.3504388630390167, - "loss_sent": 0.0001261267752852291, - "loss_sod": 0.36733120679855347, - "loss_total": 0.7178962230682373, - "step": 71599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.626511812210083, - "learning_rate": 7.455928608608061e-05, - "loss": 0.6192, - "step": 71600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.821959018707275, - "loss_rtd": 0.35648804903030396, - "loss_sent": 0.1773880422115326, - "loss_sod": 0.20540033280849457, - "loss_total": 0.7392764091491699, - "step": 71699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.832204818725586, - "loss_rtd": 0.35967719554901123, - "loss_sent": 0.07360906898975372, - "loss_sod": 0.006231877952814102, - "loss_total": 0.43951815366744995, - "step": 71699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.7945804595947266, - "learning_rate": 7.45316397317897e-05, - "loss": 0.6075, - "step": 71700 - }, - { - "epoch": 0.017598, - "loss_gen": 4.836548805236816, - "loss_rtd": 0.34330615401268005, - "loss_sent": 0.14278709888458252, - "loss_sod": 0.015522902831435204, - "loss_total": 0.5016161203384399, - "step": 71799 - }, - { - "epoch": 0.017598, - "loss_gen": 4.607902526855469, - "loss_rtd": 0.3659072518348694, - "loss_sent": 0.1836635023355484, - "loss_sod": 0.024699239060282707, - "loss_total": 0.5742700099945068, - "step": 71799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.4797484874725342, - "learning_rate": 7.450398349615726e-05, - "loss": 0.6072, - "step": 71800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.702144145965576, - "loss_rtd": 0.3708803653717041, - "loss_sent": 0.07195448875427246, - "loss_sod": 0.028032181784510612, - "loss_total": 0.4708670377731323, - "step": 71899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.1101155281066895, - "loss_rtd": 0.35019639134407043, - "loss_sent": 0.0927276536822319, - "loss_sod": 0.10751849412918091, - "loss_total": 0.5504425168037415, - "step": 71899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.7477824687957764, - "learning_rate": 7.447631739032318e-05, - "loss": 0.5924, - "step": 71900 - }, - { - "epoch": 0.017998, - "loss_gen": 4.389316082000732, - "loss_rtd": 0.33602374792099, - "loss_sent": 0.009403475560247898, - "loss_sod": 0.25647178292274475, - "loss_total": 0.6018990278244019, - "step": 71999 - }, - { - "epoch": 0.017998, - "loss_gen": 3.5910627841949463, - "loss_rtd": 0.31313490867614746, - "loss_sent": 0.03344634920358658, - "loss_sod": 0.09169971197843552, - "loss_total": 0.43828096985816956, - "step": 71999 - }, - { - "epoch": 0.018, - "grad_norm": 0.6587882041931152, - "learning_rate": 7.44486414254314e-05, - "loss": 0.6089, - "step": 72000 - }, - { - "epoch": 0.018, - "eval_loss": 0.5882743000984192, - "eval_runtime": 152.8817, - "eval_samples_per_second": 101.013, - "eval_steps_per_second": 0.791, - "step": 72000 - }, - { - "epoch": 0.018198, - "loss_gen": 4.688161373138428, - "loss_rtd": 0.3436170816421509, - "loss_sent": 0.2503986954689026, - "loss_sod": 0.00963823776692152, - "loss_total": 0.6036540269851685, - "step": 72099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.017253398895264, - "loss_rtd": 0.34326061606407166, - "loss_sent": 0.0735774114727974, - "loss_sod": 0.09248524904251099, - "loss_total": 0.5093232989311218, - "step": 72099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.6841739416122437, - "learning_rate": 7.442095561262975e-05, - "loss": 0.607, - "step": 72100 - }, - { - "epoch": 0.018398, - "loss_gen": 4.958353042602539, - "loss_rtd": 0.36189010739326477, - "loss_sent": 0.09843690693378448, - "loss_sod": 0.02413778007030487, - "loss_total": 0.48446476459503174, - "step": 72199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.871512413024902, - "loss_rtd": 0.35542696714401245, - "loss_sent": 0.19521720707416534, - "loss_sod": 0.017067646607756615, - "loss_total": 0.5677118301391602, - "step": 72199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.6388457417488098, - "learning_rate": 7.439325996307012e-05, - "loss": 0.6128, - "step": 72200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.331342697143555, - "loss_rtd": 0.33741307258605957, - "loss_sent": 0.08115275949239731, - "loss_sod": 0.005808874499052763, - "loss_total": 0.42437469959259033, - "step": 72299 - }, - { - "epoch": 0.018598, - "loss_gen": 4.409862518310547, - "loss_rtd": 0.35305461287498474, - "loss_sent": 0.08265908807516098, - "loss_sod": 0.0333339087665081, - "loss_total": 0.4690476059913635, - "step": 72299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.7669657468795776, - "learning_rate": 7.436555448790829e-05, - "loss": 0.6054, - "step": 72300 - }, - { - "epoch": 0.018798, - "loss_gen": 4.626320838928223, - "loss_rtd": 0.37120702862739563, - "loss_sent": 0.34755128622055054, - "loss_sod": 0.010918952524662018, - "loss_total": 0.7296772599220276, - "step": 72399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.711775302886963, - "loss_rtd": 0.3592338562011719, - "loss_sent": 0.10267001390457153, - "loss_sod": 0.14617079496383667, - "loss_total": 0.6080746650695801, - "step": 72399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.2369270324707031, - "learning_rate": 7.433783919830403e-05, - "loss": 0.6104, - "step": 72400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.396215915679932, - "loss_rtd": 0.3596164584159851, - "loss_sent": 0.08987735211849213, - "loss_sod": 0.04751113802194595, - "loss_total": 0.4970049262046814, - "step": 72499 - }, - { - "epoch": 0.018998, - "loss_gen": 3.75136661529541, - "loss_rtd": 0.3371661305427551, - "loss_sent": 0.00012291154416743666, - "loss_sod": 0.12435232102870941, - "loss_total": 0.4616413712501526, - "step": 72499 - }, - { - "epoch": 0.019, - "grad_norm": 0.6749967932701111, - "learning_rate": 7.431011410542105e-05, - "loss": 0.6132, - "step": 72500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.698030471801758, - "loss_rtd": 0.3466373682022095, - "loss_sent": 0.14236655831336975, - "loss_sod": 0.11202895641326904, - "loss_total": 0.6010328531265259, - "step": 72599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.294450759887695, - "loss_rtd": 0.36601531505584717, - "loss_sent": 0.027178332209587097, - "loss_sod": 0.15719231963157654, - "loss_total": 0.5503860116004944, - "step": 72599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.9426364302635193, - "learning_rate": 7.4282379220427e-05, - "loss": 0.6202, - "step": 72600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.937078952789307, - "loss_rtd": 0.3478560149669647, - "loss_sent": 0.1771114319562912, - "loss_sod": 0.002633861731737852, - "loss_total": 0.5276013016700745, - "step": 72699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.70350980758667, - "loss_rtd": 0.34218764305114746, - "loss_sent": 0.13848990201950073, - "loss_sod": 0.0959969088435173, - "loss_total": 0.5766744613647461, - "step": 72699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.6248541474342346, - "learning_rate": 7.425463455449353e-05, - "loss": 0.6066, - "step": 72700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.646872520446777, - "loss_rtd": 0.3477177321910858, - "loss_sent": 0.34237056970596313, - "loss_sod": 0.08231464773416519, - "loss_total": 0.7724029421806335, - "step": 72799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.0671916007995605, - "loss_rtd": 0.3369097411632538, - "loss_sent": 0.006389040965586901, - "loss_sod": 0.08553760498762131, - "loss_total": 0.4288364052772522, - "step": 72799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.6249091625213623, - "learning_rate": 7.422688011879614e-05, - "loss": 0.6151, - "step": 72800 - }, - { - "epoch": 0.019798, - "loss_gen": 4.565310955047607, - "loss_rtd": 0.3453063368797302, - "loss_sent": 0.2012784332036972, - "loss_sod": 0.023004647344350815, - "loss_total": 0.5695894360542297, - "step": 72899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.667540550231934, - "loss_rtd": 0.3517211079597473, - "loss_sent": 0.14475028216838837, - "loss_sod": 0.2036372721195221, - "loss_total": 0.7001087069511414, - "step": 72899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.9508171677589417, - "learning_rate": 7.419911592451434e-05, - "loss": 0.6122, - "step": 72900 - }, - { - "epoch": 0.019998, - "loss_gen": 4.509414196014404, - "loss_rtd": 0.3496207594871521, - "loss_sent": 0.2732815444469452, - "loss_sod": 0.009184077382087708, - "loss_total": 0.6320863962173462, - "step": 72999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.850524425506592, - "loss_rtd": 0.3499402105808258, - "loss_sent": 0.24237596988677979, - "loss_sod": 0.1104980856180191, - "loss_total": 0.7028142809867859, - "step": 72999 - }, - { - "epoch": 0.02, - "grad_norm": 1.4842668771743774, - "learning_rate": 7.417134198283156e-05, - "loss": 0.6163, - "step": 73000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5847336053848267, - "eval_runtime": 151.7735, - "eval_samples_per_second": 101.75, - "eval_steps_per_second": 0.797, - "step": 73000 - }, - { - "epoch": 0.020198, - "loss_gen": 4.465516567230225, - "loss_rtd": 0.33586585521698, - "loss_sent": 0.19374719262123108, - "loss_sod": 0.045669347047805786, - "loss_total": 0.5752823948860168, - "step": 73099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.804156303405762, - "loss_rtd": 0.37062567472457886, - "loss_sent": 0.7627204060554504, - "loss_sod": 0.046356506645679474, - "loss_total": 1.1797025203704834, - "step": 73099 - }, - { - "epoch": 0.0202, - "grad_norm": 3.475369453430176, - "learning_rate": 7.41435583049351e-05, - "loss": 0.6248, - "step": 73100 - }, - { - "epoch": 0.020398, - "loss_gen": 4.678688049316406, - "loss_rtd": 0.35629740357398987, - "loss_sent": 0.28247150778770447, - "loss_sod": 0.007911695167422295, - "loss_total": 0.6466805934906006, - "step": 73199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.716961860656738, - "loss_rtd": 0.34720107913017273, - "loss_sent": 0.30606597661972046, - "loss_sod": 0.03888232260942459, - "loss_total": 0.6921494007110596, - "step": 73199 - }, - { - "epoch": 0.0204, - "grad_norm": 2.1669692993164062, - "learning_rate": 7.411576490201624e-05, - "loss": 0.6046, - "step": 73200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.407477378845215, - "loss_rtd": 0.3413894474506378, - "loss_sent": 0.22684229910373688, - "loss_sod": 0.03058023564517498, - "loss_total": 0.5988119840621948, - "step": 73299 - }, - { - "epoch": 0.020598, - "loss_gen": 4.551497459411621, - "loss_rtd": 0.34881627559661865, - "loss_sent": 0.12238585948944092, - "loss_sod": 0.03570621833205223, - "loss_total": 0.5069083571434021, - "step": 73299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.0146186351776123, - "learning_rate": 7.408796178527017e-05, - "loss": 0.6063, - "step": 73300 - }, - { - "epoch": 0.020798, - "loss_gen": 4.718207359313965, - "loss_rtd": 0.33871084451675415, - "loss_sent": 0.41996413469314575, - "loss_sod": 0.08324310183525085, - "loss_total": 0.8419181108474731, - "step": 73399 - }, - { - "epoch": 0.020798, - "loss_gen": 4.675392150878906, - "loss_rtd": 0.3622107207775116, - "loss_sent": 0.12315183877944946, - "loss_sod": 0.11210007965564728, - "loss_total": 0.5974626541137695, - "step": 73399 - }, - { - "epoch": 0.0208, - "grad_norm": 2.3092925548553467, - "learning_rate": 7.406014896589597e-05, - "loss": 0.6116, - "step": 73400 - }, - { - "epoch": 0.020998, - "loss_gen": 4.315270900726318, - "loss_rtd": 0.351446270942688, - "loss_sent": 0.008908976800739765, - "loss_sod": 0.14602422714233398, - "loss_total": 0.5063794851303101, - "step": 73499 - }, - { - "epoch": 0.020998, - "loss_gen": 4.223549842834473, - "loss_rtd": 0.3434906303882599, - "loss_sent": 0.09560151398181915, - "loss_sod": 0.09820383787155151, - "loss_total": 0.5372959971427917, - "step": 73499 - }, - { - "epoch": 0.021, - "grad_norm": 0.8053382039070129, - "learning_rate": 7.403232645509665e-05, - "loss": 0.6137, - "step": 73500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.870631694793701, - "loss_rtd": 0.3493156135082245, - "loss_sent": 0.16123583912849426, - "loss_sod": 0.016129961237311363, - "loss_total": 0.5266814231872559, - "step": 73599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.504601001739502, - "loss_rtd": 0.3525901436805725, - "loss_sent": 0.23618851602077484, - "loss_sod": 0.05389292910695076, - "loss_total": 0.6426715850830078, - "step": 73599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.8500021696090698, - "learning_rate": 7.400449426407909e-05, - "loss": 0.6043, - "step": 73600 - }, - { - "epoch": 0.021398, - "loss_gen": 4.457568168640137, - "loss_rtd": 0.3443717360496521, - "loss_sent": 0.0008905435097403824, - "loss_sod": 0.17282648384571075, - "loss_total": 0.5180887579917908, - "step": 73699 - }, - { - "epoch": 0.021398, - "loss_gen": 3.4980556964874268, - "loss_rtd": 0.32986247539520264, - "loss_sent": 0.0009524160996079445, - "loss_sod": 0.17944881319999695, - "loss_total": 0.5102637410163879, - "step": 73699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.0395961999893188, - "learning_rate": 7.397665240405413e-05, - "loss": 0.6086, - "step": 73700 - }, - { - "epoch": 0.021598, - "loss_gen": 4.717159748077393, - "loss_rtd": 0.37082016468048096, - "loss_sent": 0.3229347765445709, - "loss_sod": 0.08952424675226212, - "loss_total": 0.7832791805267334, - "step": 73799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.845605850219727, - "loss_rtd": 0.3581559956073761, - "loss_sent": 0.2923746407032013, - "loss_sod": 0.028529290109872818, - "loss_total": 0.6790599226951599, - "step": 73799 - }, - { - "epoch": 0.0216, - "grad_norm": 2.5028345584869385, - "learning_rate": 7.394880088623644e-05, - "loss": 0.6238, - "step": 73800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.317142486572266, - "loss_rtd": 0.35131940245628357, - "loss_sent": 0.32754117250442505, - "loss_sod": 0.05299842357635498, - "loss_total": 0.7318589687347412, - "step": 73899 - }, - { - "epoch": 0.021798, - "loss_gen": 4.551251411437988, - "loss_rtd": 0.35017669200897217, - "loss_sent": 0.19271309673786163, - "loss_sod": 0.018432054668664932, - "loss_total": 0.5613218545913696, - "step": 73899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.3386276960372925, - "learning_rate": 7.392093972184462e-05, - "loss": 0.6067, - "step": 73900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.816708564758301, - "loss_rtd": 0.3660191595554352, - "loss_sent": 0.2075343281030655, - "loss_sod": 0.16501009464263916, - "loss_total": 0.7385635375976562, - "step": 73999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.9030070304870605, - "loss_rtd": 0.3595043420791626, - "loss_sent": 0.22702062129974365, - "loss_sod": 0.03965284675359726, - "loss_total": 0.6261777877807617, - "step": 73999 - }, - { - "epoch": 0.022, - "grad_norm": 1.3727046251296997, - "learning_rate": 7.389306892210115e-05, - "loss": 0.6122, - "step": 74000 - }, - { - "epoch": 0.022, - "eval_loss": 0.5883642435073853, - "eval_runtime": 151.4391, - "eval_samples_per_second": 101.975, - "eval_steps_per_second": 0.799, - "step": 74000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.003251075744629, - "loss_rtd": 0.3404412567615509, - "loss_sent": 0.0867854505777359, - "loss_sod": 0.025416646152734756, - "loss_total": 0.45264333486557007, - "step": 74099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.423887252807617, - "loss_rtd": 0.33721449971199036, - "loss_sent": 0.310514897108078, - "loss_sod": 0.004518180154263973, - "loss_total": 0.6522475481033325, - "step": 74099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.9278171062469482, - "learning_rate": 7.386518849823235e-05, - "loss": 0.6154, - "step": 74100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.123299598693848, - "loss_rtd": 0.3594000041484833, - "loss_sent": 0.1910119503736496, - "loss_sod": 0.13898342847824097, - "loss_total": 0.6893953680992126, - "step": 74199 - }, - { - "epoch": 0.022398, - "loss_gen": 4.525036811828613, - "loss_rtd": 0.37645843625068665, - "loss_sent": 0.12674348056316376, - "loss_sod": 0.11194515973329544, - "loss_total": 0.615147054195404, - "step": 74199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.8637129664421082, - "learning_rate": 7.383729846146849e-05, - "loss": 0.5955, - "step": 74200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.2129034996032715, - "loss_rtd": 0.3292183578014374, - "loss_sent": 0.14378657937049866, - "loss_sod": 0.012567362748086452, - "loss_total": 0.48557230830192566, - "step": 74299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.629938125610352, - "loss_rtd": 0.3502351641654968, - "loss_sent": 0.2462940216064453, - "loss_sod": 0.05217970535159111, - "loss_total": 0.6487088799476624, - "step": 74299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.1983137130737305, - "learning_rate": 7.380939882304368e-05, - "loss": 0.6137, - "step": 74300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.627171993255615, - "loss_rtd": 0.3490825891494751, - "loss_sent": 0.23272109031677246, - "loss_sod": 0.004489724058657885, - "loss_total": 0.5862933993339539, - "step": 74399 - }, - { - "epoch": 0.022798, - "loss_gen": 4.706835746765137, - "loss_rtd": 0.34307944774627686, - "loss_sent": 0.1921033263206482, - "loss_sod": 0.08980696648359299, - "loss_total": 0.6249897480010986, - "step": 74399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.6464195251464844, - "learning_rate": 7.378148959419585e-05, - "loss": 0.6032, - "step": 74400 - }, - { - "epoch": 0.022998, - "loss_gen": 3.9812397956848145, - "loss_rtd": 0.3321714997291565, - "loss_sent": 0.0303138829767704, - "loss_sod": 0.09572997689247131, - "loss_total": 0.4582153558731079, - "step": 74499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.2348527908325195, - "loss_rtd": 0.3658457398414612, - "loss_sent": 0.028739361092448235, - "loss_sod": 0.01832672953605652, - "loss_total": 0.4129118323326111, - "step": 74499 - }, - { - "epoch": 0.023, - "grad_norm": 0.6190145611763, - "learning_rate": 7.375357078616685e-05, - "loss": 0.5969, - "step": 74500 - }, - { - "epoch": 0.023198, - "loss_gen": 4.848466873168945, - "loss_rtd": 0.34901705384254456, - "loss_sent": 0.2657622694969177, - "loss_sod": 0.03693791851401329, - "loss_total": 0.6517172455787659, - "step": 74599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.7964606285095215, - "loss_rtd": 0.35357391834259033, - "loss_sent": 0.12477079033851624, - "loss_sod": 0.06684249639511108, - "loss_total": 0.54518723487854, - "step": 74599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.4587454795837402, - "learning_rate": 7.372564241020239e-05, - "loss": 0.5989, - "step": 74600 - }, - { - "epoch": 0.023398, - "loss_gen": 4.390159606933594, - "loss_rtd": 0.3433459997177124, - "loss_sent": 0.2119998186826706, - "loss_sod": 0.001375580090098083, - "loss_total": 0.5567213892936707, - "step": 74699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.818406105041504, - "loss_rtd": 0.36668869853019714, - "loss_sent": 0.3295286297798157, - "loss_sod": 0.07705482840538025, - "loss_total": 0.7732721567153931, - "step": 74699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.1205918788909912, - "learning_rate": 7.3697704477552e-05, - "loss": 0.6106, - "step": 74700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.816539764404297, - "loss_rtd": 0.34127429127693176, - "loss_sent": 0.2649698853492737, - "loss_sod": 0.04257281869649887, - "loss_total": 0.6488170027732849, - "step": 74799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.949727535247803, - "loss_rtd": 0.3543637990951538, - "loss_sent": 0.08813301473855972, - "loss_sod": 0.030118556693196297, - "loss_total": 0.4726153612136841, - "step": 74799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.1401525735855103, - "learning_rate": 7.366975699946908e-05, - "loss": 0.5906, - "step": 74800 - }, - { - "epoch": 0.023798, - "loss_gen": 4.6410322189331055, - "loss_rtd": 0.33168840408325195, - "loss_sent": 0.2278660237789154, - "loss_sod": 0.12386610358953476, - "loss_total": 0.6834205389022827, - "step": 74899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.859090328216553, - "loss_rtd": 0.351549357175827, - "loss_sent": 0.21702858805656433, - "loss_sod": 0.03496665507555008, - "loss_total": 0.6035445928573608, - "step": 74899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.0700111389160156, - "learning_rate": 7.364179998721088e-05, - "loss": 0.6076, - "step": 74900 - }, - { - "epoch": 0.023998, - "loss_gen": 4.151700973510742, - "loss_rtd": 0.3265613913536072, - "loss_sent": 0.029658786952495575, - "loss_sod": 0.23936760425567627, - "loss_total": 0.5955877900123596, - "step": 74999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.144930839538574, - "loss_rtd": 0.35030627250671387, - "loss_sent": 0.0014046452706679702, - "loss_sod": 0.25777551531791687, - "loss_total": 0.60948646068573, - "step": 74999 - }, - { - "epoch": 0.024, - "grad_norm": 0.7320817112922668, - "learning_rate": 7.361383345203848e-05, - "loss": 0.6163, - "step": 75000 - }, - { - "epoch": 0.024, - "eval_loss": 0.5863644480705261, - "eval_runtime": 151.972, - "eval_samples_per_second": 101.617, - "eval_steps_per_second": 0.796, - "step": 75000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.696193218231201, - "loss_rtd": 0.3485161066055298, - "loss_sent": 0.21172170341014862, - "loss_sod": 0.03800595551729202, - "loss_total": 0.598243772983551, - "step": 75099 - }, - { - "epoch": 0.024198, - "loss_gen": 4.1647233963012695, - "loss_rtd": 0.34751391410827637, - "loss_sent": 0.09992515295743942, - "loss_sod": 0.2592490017414093, - "loss_total": 0.7066880464553833, - "step": 75099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.2299166917800903, - "learning_rate": 7.358585740521675e-05, - "loss": 0.6027, - "step": 75100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.2474799156188965, - "loss_rtd": 0.355958491563797, - "loss_sent": 9.39548117457889e-05, - "loss_sod": 0.234136700630188, - "loss_total": 0.5901890993118286, - "step": 75199 - }, - { - "epoch": 0.024398, - "loss_gen": 3.7780745029449463, - "loss_rtd": 0.33295783400535583, - "loss_sent": 5.115962267154828e-05, - "loss_sod": 0.15659479796886444, - "loss_total": 0.48960378766059875, - "step": 75199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.761970579624176, - "learning_rate": 7.355787185801451e-05, - "loss": 0.6033, - "step": 75200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.645076274871826, - "loss_rtd": 0.34952473640441895, - "loss_sent": 0.3229450583457947, - "loss_sod": 0.006602790206670761, - "loss_total": 0.6790726184844971, - "step": 75299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.550302982330322, - "loss_rtd": 0.3657408654689789, - "loss_sent": 0.20307454466819763, - "loss_sod": 0.028181517496705055, - "loss_total": 0.5969969034194946, - "step": 75299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.4382457733154297, - "learning_rate": 7.35298768217043e-05, - "loss": 0.6185, - "step": 75300 - }, - { - "epoch": 0.024798, - "loss_gen": 3.696732521057129, - "loss_rtd": 0.3344995677471161, - "loss_sent": 4.129198350710794e-05, - "loss_sod": 0.25585028529167175, - "loss_total": 0.5903911590576172, - "step": 75399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.009065628051758, - "loss_rtd": 0.3305332362651825, - "loss_sent": 0.0384492464363575, - "loss_sod": 0.19323648512363434, - "loss_total": 0.562218964099884, - "step": 75399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.0108808279037476, - "learning_rate": 7.350187230756253e-05, - "loss": 0.6041, - "step": 75400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.490052223205566, - "loss_rtd": 0.35382014513015747, - "loss_sent": 0.20739054679870605, - "loss_sod": 0.11048437654972076, - "loss_total": 0.6716950535774231, - "step": 75499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.205677509307861, - "loss_rtd": 0.3394111692905426, - "loss_sent": 0.4223472774028778, - "loss_sod": 0.1295999437570572, - "loss_total": 0.8913583755493164, - "step": 75499 - }, - { - "epoch": 0.025, - "grad_norm": 1.3963903188705444, - "learning_rate": 7.347385832686938e-05, - "loss": 0.615, - "step": 75500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.7777791023254395, - "loss_rtd": 0.33643102645874023, - "loss_sent": 0.3857211470603943, - "loss_sod": 0.02439464069902897, - "loss_total": 0.7465468049049377, - "step": 75599 - }, - { - "epoch": 0.025198, - "loss_gen": 4.464804172515869, - "loss_rtd": 0.3587886691093445, - "loss_sent": 0.07014551013708115, - "loss_sod": 0.1717534363269806, - "loss_total": 0.6006876230239868, - "step": 75599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.3431655168533325, - "learning_rate": 7.344583489090893e-05, - "loss": 0.619, - "step": 75600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.4741926193237305, - "loss_rtd": 0.35084760189056396, - "loss_sent": 0.22809773683547974, - "loss_sod": 0.02333931252360344, - "loss_total": 0.6022846698760986, - "step": 75699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.5327467918396, - "loss_rtd": 0.35005471110343933, - "loss_sent": 0.12196867913007736, - "loss_sod": 0.025535210967063904, - "loss_total": 0.49755859375, - "step": 75699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.279105305671692, - "learning_rate": 7.341780201096897e-05, - "loss": 0.6056, - "step": 75700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.242307662963867, - "loss_rtd": 0.3398686647415161, - "loss_sent": 0.3016100227832794, - "loss_sod": 0.055376965552568436, - "loss_total": 0.6968556642532349, - "step": 75799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.900667190551758, - "loss_rtd": 0.3508695662021637, - "loss_sent": 0.14425787329673767, - "loss_sod": 0.04531536251306534, - "loss_total": 0.5404428243637085, - "step": 75799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.2808212041854858, - "learning_rate": 7.338975969834117e-05, - "loss": 0.6097, - "step": 75800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.717090129852295, - "loss_rtd": 0.33705005049705505, - "loss_sent": 0.1795204132795334, - "loss_sod": 0.03254042938351631, - "loss_total": 0.5491108894348145, - "step": 75899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.365910530090332, - "loss_rtd": 0.37813952565193176, - "loss_sent": 0.23682186007499695, - "loss_sod": 0.06760432571172714, - "loss_total": 0.6825656890869141, - "step": 75899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.737987756729126, - "learning_rate": 7.336170796432093e-05, - "loss": 0.6138, - "step": 75900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.509415626525879, - "loss_rtd": 0.33232179284095764, - "loss_sent": 0.15266606211662292, - "loss_sod": 0.01130371168255806, - "loss_total": 0.4962915778160095, - "step": 75999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.437432765960693, - "loss_rtd": 0.34324514865875244, - "loss_sent": 0.2692817449569702, - "loss_sod": 0.1113276407122612, - "loss_total": 0.7238545417785645, - "step": 75999 - }, - { - "epoch": 0.026, - "grad_norm": 1.6792041063308716, - "learning_rate": 7.333364682020755e-05, - "loss": 0.6127, - "step": 76000 - }, - { - "epoch": 0.026, - "eval_loss": 0.5819876790046692, - "eval_runtime": 151.4448, - "eval_samples_per_second": 101.971, - "eval_steps_per_second": 0.799, - "step": 76000 - }, - { - "epoch": 0.026198, - "loss_gen": 3.8935954570770264, - "loss_rtd": 0.33059537410736084, - "loss_sent": 5.3254007070790976e-05, - "loss_sod": 0.14532531797885895, - "loss_total": 0.4759739339351654, - "step": 76099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.468469619750977, - "loss_rtd": 0.33144134283065796, - "loss_sent": 0.21191422641277313, - "loss_sod": 0.08587560802698135, - "loss_total": 0.6292311549186707, - "step": 76099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9422717690467834, - "learning_rate": 7.330557627730402e-05, - "loss": 0.5996, - "step": 76100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.545282363891602, - "loss_rtd": 0.33573615550994873, - "loss_sent": 0.2813408076763153, - "loss_sod": 0.14380568265914917, - "loss_total": 0.7608826160430908, - "step": 76199 - }, - { - "epoch": 0.026398, - "loss_gen": 4.843388557434082, - "loss_rtd": 0.3394545614719391, - "loss_sent": 0.23535408079624176, - "loss_sod": 0.04507937282323837, - "loss_total": 0.6198880076408386, - "step": 76199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.9187272787094116, - "learning_rate": 7.327749634691714e-05, - "loss": 0.6048, - "step": 76200 - }, - { - "epoch": 0.026598, - "loss_gen": 4.975532054901123, - "loss_rtd": 0.3380142152309418, - "loss_sent": 0.3965788185596466, - "loss_sod": 0.06410861015319824, - "loss_total": 0.7987016439437866, - "step": 76299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.570683002471924, - "loss_rtd": 0.3355201184749603, - "loss_sent": 0.1470223218202591, - "loss_sod": 0.15155425667762756, - "loss_total": 0.6340966820716858, - "step": 76299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.0840137004852295, - "learning_rate": 7.324940704035753e-05, - "loss": 0.6044, - "step": 76300 - }, - { - "epoch": 0.026798, - "loss_gen": 4.853896141052246, - "loss_rtd": 0.34713008999824524, - "loss_sent": 0.19798138737678528, - "loss_sod": 0.09696397930383682, - "loss_total": 0.6420754194259644, - "step": 76399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.590819358825684, - "loss_rtd": 0.349762886762619, - "loss_sent": 0.14599496126174927, - "loss_sod": 0.032492972910404205, - "loss_total": 0.5282508134841919, - "step": 76399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.1431488990783691, - "learning_rate": 7.322130836893952e-05, - "loss": 0.5836, - "step": 76400 - }, - { - "epoch": 0.026998, - "loss_gen": 4.64332914352417, - "loss_rtd": 0.3496253192424774, - "loss_sent": 0.6941021084785461, - "loss_sod": 0.010382898151874542, - "loss_total": 1.0541102886199951, - "step": 76499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.089400291442871, - "loss_rtd": 0.34875771403312683, - "loss_sent": 0.17473797500133514, - "loss_sod": 0.10071061551570892, - "loss_total": 0.6242063045501709, - "step": 76499 - }, - { - "epoch": 0.027, - "grad_norm": 2.004483938217163, - "learning_rate": 7.31932003439813e-05, - "loss": 0.6089, - "step": 76500 - }, - { - "epoch": 0.027198, - "loss_gen": 4.623749732971191, - "loss_rtd": 0.35708877444267273, - "loss_sent": 0.269750714302063, - "loss_sod": 0.006428820081055164, - "loss_total": 0.6332682967185974, - "step": 76599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.706696033477783, - "loss_rtd": 0.34746280312538147, - "loss_sent": 0.19256754219532013, - "loss_sod": 0.020543798804283142, - "loss_total": 0.5605741143226624, - "step": 76599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.9635896682739258, - "learning_rate": 7.316508297680474e-05, - "loss": 0.6054, - "step": 76600 - }, - { - "epoch": 0.027398, - "loss_gen": 4.7151079177856445, - "loss_rtd": 0.35019439458847046, - "loss_sent": 0.13037344813346863, - "loss_sod": 0.018380889669060707, - "loss_total": 0.49894872307777405, - "step": 76699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.885163307189941, - "loss_rtd": 0.3396100699901581, - "loss_sent": 0.37537530064582825, - "loss_sod": 0.04548022896051407, - "loss_total": 0.7604656219482422, - "step": 76699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.422677755355835, - "learning_rate": 7.313695627873553e-05, - "loss": 0.6092, - "step": 76700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.78942346572876, - "loss_rtd": 0.3287353217601776, - "loss_sent": 0.25026121735572815, - "loss_sod": 0.051381666213274, - "loss_total": 0.630378246307373, - "step": 76799 - }, - { - "epoch": 0.027598, - "loss_gen": 4.916085243225098, - "loss_rtd": 0.3519432842731476, - "loss_sent": 0.3339703381061554, - "loss_sod": 0.08152005076408386, - "loss_total": 0.7674336433410645, - "step": 76799 - }, - { - "epoch": 0.0276, - "grad_norm": 2.0085551738739014, - "learning_rate": 7.31088202611031e-05, - "loss": 0.6049, - "step": 76800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.992638111114502, - "loss_rtd": 0.33744439482688904, - "loss_sent": 0.3600675165653229, - "loss_sod": 0.1079985573887825, - "loss_total": 0.8055104613304138, - "step": 76899 - }, - { - "epoch": 0.027798, - "loss_gen": 4.506402492523193, - "loss_rtd": 0.3557027280330658, - "loss_sent": 0.07336246967315674, - "loss_sod": 0.006056470330804586, - "loss_total": 0.43512165546417236, - "step": 76899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.325028419494629, - "learning_rate": 7.308067493524064e-05, - "loss": 0.603, - "step": 76900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.883081436157227, - "loss_rtd": 0.34431836009025574, - "loss_sent": 0.11972984671592712, - "loss_sod": 0.15086975693702698, - "loss_total": 0.6149179935455322, - "step": 76999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.85264778137207, - "loss_rtd": 0.36707422137260437, - "loss_sent": 0.12365243583917618, - "loss_sod": 0.06359491497278214, - "loss_total": 0.5543215870857239, - "step": 76999 - }, - { - "epoch": 0.028, - "grad_norm": 0.9523294568061829, - "learning_rate": 7.305252031248506e-05, - "loss": 0.6139, - "step": 77000 - }, - { - "epoch": 0.028, - "eval_loss": 0.5862102508544922, - "eval_runtime": 151.6791, - "eval_samples_per_second": 101.814, - "eval_steps_per_second": 0.798, - "step": 77000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.708960056304932, - "loss_rtd": 0.3252050280570984, - "loss_sent": 0.10101579129695892, - "loss_sod": 0.09263059496879578, - "loss_total": 0.5188513994216919, - "step": 77099 - }, - { - "epoch": 0.028198, - "loss_gen": 3.5701472759246826, - "loss_rtd": 0.31843188405036926, - "loss_sent": 4.417903983267024e-05, - "loss_sod": 0.2799660861492157, - "loss_total": 0.5984421372413635, - "step": 77099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.270837426185608, - "learning_rate": 7.302435640417707e-05, - "loss": 0.6094, - "step": 77100 - }, - { - "epoch": 0.028398, - "loss_gen": 4.561153411865234, - "loss_rtd": 0.34159260988235474, - "loss_sent": 0.11388792842626572, - "loss_sod": 0.0881519764661789, - "loss_total": 0.5436325073242188, - "step": 77199 - }, - { - "epoch": 0.028398, - "loss_gen": 4.914067268371582, - "loss_rtd": 0.3523808419704437, - "loss_sent": 0.4624131917953491, - "loss_sod": 0.03237197920680046, - "loss_total": 0.8471660017967224, - "step": 77199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.6572402715682983, - "learning_rate": 7.299618322166106e-05, - "loss": 0.5905, - "step": 77200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.187253475189209, - "loss_rtd": 0.3432314991950989, - "loss_sent": 0.042851973325014114, - "loss_sod": 0.0935392677783966, - "loss_total": 0.4796227514743805, - "step": 77299 - }, - { - "epoch": 0.028598, - "loss_gen": 4.5806803703308105, - "loss_rtd": 0.3343912661075592, - "loss_sent": 0.22158819437026978, - "loss_sod": 0.02681458368897438, - "loss_total": 0.5827940702438354, - "step": 77299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.66943359375, - "learning_rate": 7.296800077628521e-05, - "loss": 0.6095, - "step": 77300 - }, - { - "epoch": 0.028798, - "loss_gen": 3.7758219242095947, - "loss_rtd": 0.31664010882377625, - "loss_sent": 0.08940772712230682, - "loss_sod": 0.03532155230641365, - "loss_total": 0.4413694143295288, - "step": 77399 - }, - { - "epoch": 0.028798, - "loss_gen": 4.408205032348633, - "loss_rtd": 0.3292091190814972, - "loss_sent": 0.2610619068145752, - "loss_sod": 0.16430658102035522, - "loss_total": 0.75457763671875, - "step": 77399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.8829277157783508, - "learning_rate": 7.293980907940139e-05, - "loss": 0.594, - "step": 77400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.965546131134033, - "loss_rtd": 0.3634222745895386, - "loss_sent": 0.14828786253929138, - "loss_sod": 0.03323981165885925, - "loss_total": 0.5449499487876892, - "step": 77499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.154338359832764, - "loss_rtd": 0.3393701910972595, - "loss_sent": 0.1171560138463974, - "loss_sod": 0.05646669864654541, - "loss_total": 0.5129929184913635, - "step": 77499 - }, - { - "epoch": 0.029, - "grad_norm": 0.6069972515106201, - "learning_rate": 7.291160814236522e-05, - "loss": 0.5909, - "step": 77500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.255629539489746, - "loss_rtd": 0.3524973690509796, - "loss_sent": 0.20539936423301697, - "loss_sod": 0.07957247644662857, - "loss_total": 0.6374691724777222, - "step": 77599 - }, - { - "epoch": 0.029198, - "loss_gen": 4.823246002197266, - "loss_rtd": 0.3374880254268646, - "loss_sent": 0.1319703608751297, - "loss_sod": 0.12082032859325409, - "loss_total": 0.590278685092926, - "step": 77599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.9810872077941895, - "learning_rate": 7.288339797653603e-05, - "loss": 0.6058, - "step": 77600 - }, - { - "epoch": 0.029398, - "loss_gen": 3.8772659301757812, - "loss_rtd": 0.3234400749206543, - "loss_sent": 0.03408446162939072, - "loss_sod": 0.08280368894338608, - "loss_total": 0.4403282403945923, - "step": 77699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.439805507659912, - "loss_rtd": 0.3517507314682007, - "loss_sent": 0.25072789192199707, - "loss_sod": 0.13128891587257385, - "loss_total": 0.7337675094604492, - "step": 77699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.86631178855896, - "learning_rate": 7.285517859327688e-05, - "loss": 0.5862, - "step": 77700 - }, - { - "epoch": 0.029598, - "loss_gen": 4.9372992515563965, - "loss_rtd": 0.3406504988670349, - "loss_sent": 0.32388031482696533, - "loss_sod": 0.026403963565826416, - "loss_total": 0.6909347772598267, - "step": 77799 - }, - { - "epoch": 0.029598, - "loss_gen": 4.953743934631348, - "loss_rtd": 0.33431267738342285, - "loss_sent": 0.38023707270622253, - "loss_sod": 0.07797607034444809, - "loss_total": 0.7925258278846741, - "step": 77799 - }, - { - "epoch": 0.0296, - "grad_norm": 2.2315027713775635, - "learning_rate": 7.282695000395451e-05, - "loss": 0.5938, - "step": 77800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.043262481689453, - "loss_rtd": 0.3420753479003906, - "loss_sent": 0.2619520425796509, - "loss_sod": 0.062093593180179596, - "loss_total": 0.6661210060119629, - "step": 77899 - }, - { - "epoch": 0.029798, - "loss_gen": 4.945333480834961, - "loss_rtd": 0.33110296726226807, - "loss_sent": 0.16390226781368256, - "loss_sod": 0.04249800369143486, - "loss_total": 0.5375032424926758, - "step": 77899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.4338330030441284, - "learning_rate": 7.27987122199394e-05, - "loss": 0.6039, - "step": 77900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.042571544647217, - "loss_rtd": 0.3607284426689148, - "loss_sent": 0.16485467553138733, - "loss_sod": 0.10195044428110123, - "loss_total": 0.6275335550308228, - "step": 77999 - }, - { - "epoch": 0.029998, - "loss_gen": 4.82106351852417, - "loss_rtd": 0.33838844299316406, - "loss_sent": 0.14246927201747894, - "loss_sod": 0.030077558010816574, - "loss_total": 0.5109352469444275, - "step": 77999 - }, - { - "epoch": 0.03, - "grad_norm": 0.8304947018623352, - "learning_rate": 7.277046525260575e-05, - "loss": 0.603, - "step": 78000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5783067345619202, - "eval_runtime": 152.9147, - "eval_samples_per_second": 100.991, - "eval_steps_per_second": 0.791, - "step": 78000 - }, - { - "epoch": 0.030198, - "loss_gen": 4.8893256187438965, - "loss_rtd": 0.33507299423217773, - "loss_sent": 0.3655364215373993, - "loss_sod": 0.17951573431491852, - "loss_total": 0.8801251649856567, - "step": 78099 - }, - { - "epoch": 0.030198, - "loss_gen": 4.547902584075928, - "loss_rtd": 0.3381362557411194, - "loss_sent": 0.012435453943908215, - "loss_sod": 0.04874253273010254, - "loss_total": 0.3993142545223236, - "step": 78099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.5881937742233276, - "learning_rate": 7.274220911333142e-05, - "loss": 0.6111, - "step": 78100 - }, - { - "epoch": 0.030398, - "loss_gen": 4.830233573913574, - "loss_rtd": 0.35028842091560364, - "loss_sent": 0.21340954303741455, - "loss_sod": 0.04156368970870972, - "loss_total": 0.6052616834640503, - "step": 78199 - }, - { - "epoch": 0.030398, - "loss_gen": 4.917245864868164, - "loss_rtd": 0.35128992795944214, - "loss_sent": 0.12125124037265778, - "loss_sod": 0.1287066787481308, - "loss_total": 0.6012478470802307, - "step": 78199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.1110472679138184, - "learning_rate": 7.271394381349797e-05, - "loss": 0.5843, - "step": 78200 - }, - { - "epoch": 0.030598, - "loss_gen": 4.95368766784668, - "loss_rtd": 0.3567056655883789, - "loss_sent": 0.2553303837776184, - "loss_sod": 0.08781591057777405, - "loss_total": 0.6998519897460938, - "step": 78299 - }, - { - "epoch": 0.030598, - "loss_gen": 4.473656177520752, - "loss_rtd": 0.3402126729488373, - "loss_sent": 0.05505591630935669, - "loss_sod": 0.008297743275761604, - "loss_total": 0.4035663306713104, - "step": 78299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.2994519472122192, - "learning_rate": 7.268566936449067e-05, - "loss": 0.6022, - "step": 78300 - }, - { - "epoch": 0.030798, - "loss_gen": 4.774655342102051, - "loss_rtd": 0.33963102102279663, - "loss_sent": 0.09324996173381805, - "loss_sod": 0.1005897969007492, - "loss_total": 0.5334708094596863, - "step": 78399 - }, - { - "epoch": 0.030798, - "loss_gen": 4.81737756729126, - "loss_rtd": 0.33952900767326355, - "loss_sent": 0.29965364933013916, - "loss_sod": 0.015807606279850006, - "loss_total": 0.6549902558326721, - "step": 78399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.7558334469795227, - "learning_rate": 7.265738577769847e-05, - "loss": 0.5992, - "step": 78400 - }, - { - "epoch": 0.030998, - "loss_gen": 4.676304340362549, - "loss_rtd": 0.3421095013618469, - "loss_sent": 0.05747511610388756, - "loss_sod": 0.12205128371715546, - "loss_total": 0.521635890007019, - "step": 78499 - }, - { - "epoch": 0.030998, - "loss_gen": 4.821928024291992, - "loss_rtd": 0.3583402931690216, - "loss_sent": 0.08410129696130753, - "loss_sod": 0.027698632329702377, - "loss_total": 0.4701402187347412, - "step": 78499 - }, - { - "epoch": 0.031, - "grad_norm": 0.8563811779022217, - "learning_rate": 7.262909306451399e-05, - "loss": 0.5979, - "step": 78500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.854304790496826, - "loss_rtd": 0.3506193459033966, - "loss_sent": 0.36575737595558167, - "loss_sod": 0.02470770850777626, - "loss_total": 0.7410844564437866, - "step": 78599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.517398834228516, - "loss_rtd": 0.32580113410949707, - "loss_sent": 0.18032927811145782, - "loss_sod": 0.05777490884065628, - "loss_total": 0.5639052987098694, - "step": 78599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.334820032119751, - "learning_rate": 7.260079123633352e-05, - "loss": 0.593, - "step": 78600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.088646411895752, - "loss_rtd": 0.3596729338169098, - "loss_sent": 0.26877015829086304, - "loss_sod": 0.07989838719367981, - "loss_total": 0.7083414793014526, - "step": 78699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.0591936111450195, - "loss_rtd": 0.332432359457016, - "loss_sent": 0.5234665870666504, - "loss_sod": 0.029768668115139008, - "loss_total": 0.885667622089386, - "step": 78699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.5670020580291748, - "learning_rate": 7.257248030455704e-05, - "loss": 0.6176, - "step": 78700 - }, - { - "epoch": 0.031598, - "loss_gen": 4.583230972290039, - "loss_rtd": 0.3506057858467102, - "loss_sent": 0.40736258029937744, - "loss_sod": 0.01595848798751831, - "loss_total": 0.773926854133606, - "step": 78799 - }, - { - "epoch": 0.031598, - "loss_gen": 4.5218186378479, - "loss_rtd": 0.3393426835536957, - "loss_sent": 0.22740121185779572, - "loss_sod": 0.02407972514629364, - "loss_total": 0.5908235907554626, - "step": 78799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.3964877128601074, - "learning_rate": 7.254416028058822e-05, - "loss": 0.6169, - "step": 78800 - }, - { - "epoch": 0.031798, - "loss_gen": 4.38004207611084, - "loss_rtd": 0.36475300788879395, - "loss_sent": 0.26608026027679443, - "loss_sod": 0.007397185545414686, - "loss_total": 0.6382304430007935, - "step": 78899 - }, - { - "epoch": 0.031798, - "loss_gen": 4.8993144035339355, - "loss_rtd": 0.35095274448394775, - "loss_sent": 0.213242307305336, - "loss_sod": 0.23117947578430176, - "loss_total": 0.7953745126724243, - "step": 78899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.4138163328170776, - "learning_rate": 7.251583117583429e-05, - "loss": 0.5951, - "step": 78900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.560085773468018, - "loss_rtd": 0.3431425094604492, - "loss_sent": 0.16087932884693146, - "loss_sod": 0.01687842607498169, - "loss_total": 0.5209002494812012, - "step": 78999 - }, - { - "epoch": 0.031998, - "loss_gen": 4.905075550079346, - "loss_rtd": 0.35127487778663635, - "loss_sent": 0.12146424502134323, - "loss_sod": 0.029746858403086662, - "loss_total": 0.502485990524292, - "step": 78999 - }, - { - "epoch": 0.032, - "grad_norm": 0.8463312387466431, - "learning_rate": 7.24874930017063e-05, - "loss": 0.5956, - "step": 79000 - }, - { - "epoch": 0.032, - "eval_loss": 0.5794667601585388, - "eval_runtime": 152.0437, - "eval_samples_per_second": 101.569, - "eval_steps_per_second": 0.796, - "step": 79000 - }, - { - "epoch": 0.032198, - "loss_gen": 4.906808376312256, - "loss_rtd": 0.3431001603603363, - "loss_sent": 0.15424330532550812, - "loss_sod": 0.07962116599082947, - "loss_total": 0.5769646167755127, - "step": 79099 - }, - { - "epoch": 0.032198, - "loss_gen": 4.781933784484863, - "loss_rtd": 0.34182798862457275, - "loss_sent": 0.16395731270313263, - "loss_sod": 0.11651577055454254, - "loss_total": 0.6223010420799255, - "step": 79099 - }, - { - "epoch": 0.0322, - "grad_norm": 1.2742360830307007, - "learning_rate": 7.245914576961878e-05, - "loss": 0.5978, - "step": 79100 - }, - { - "epoch": 0.032398, - "loss_gen": 4.805627822875977, - "loss_rtd": 0.33406636118888855, - "loss_sent": 0.10832850635051727, - "loss_sod": 0.029560662806034088, - "loss_total": 0.4719555377960205, - "step": 79199 - }, - { - "epoch": 0.032398, - "loss_gen": 4.88537073135376, - "loss_rtd": 0.3371656537055969, - "loss_sent": 0.29921814799308777, - "loss_sod": 0.05873815715312958, - "loss_total": 0.6951220035552979, - "step": 79199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.185131549835205, - "learning_rate": 7.243078949099006e-05, - "loss": 0.6086, - "step": 79200 - }, - { - "epoch": 0.032598, - "loss_gen": 5.029036521911621, - "loss_rtd": 0.3534056544303894, - "loss_sent": 0.21892966330051422, - "loss_sod": 0.15603232383728027, - "loss_total": 0.7283676266670227, - "step": 79299 - }, - { - "epoch": 0.032598, - "loss_gen": 4.953742504119873, - "loss_rtd": 0.315411776304245, - "loss_sent": 0.435451865196228, - "loss_sod": 0.11127342283725739, - "loss_total": 0.8621370792388916, - "step": 79299 - }, - { - "epoch": 0.0326, - "grad_norm": 1.3502787351608276, - "learning_rate": 7.2402424177242e-05, - "loss": 0.6062, - "step": 79300 - }, - { - "epoch": 0.032798, - "loss_gen": 4.27264928817749, - "loss_rtd": 0.3398166298866272, - "loss_sent": 0.04490416496992111, - "loss_sod": 0.11879505962133408, - "loss_total": 0.5035158395767212, - "step": 79399 - }, - { - "epoch": 0.032798, - "loss_gen": 3.5960450172424316, - "loss_rtd": 0.330872505903244, - "loss_sent": 4.761438685818575e-05, - "loss_sod": 0.1638348400592804, - "loss_total": 0.49475497007369995, - "step": 79399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.1576119661331177, - "learning_rate": 7.237404983980016e-05, - "loss": 0.5923, - "step": 79400 - }, - { - "epoch": 0.032998, - "loss_gen": 4.7262444496154785, - "loss_rtd": 0.37096092104911804, - "loss_sent": 0.12971912324428558, - "loss_sod": 0.06608349829912186, - "loss_total": 0.5667635202407837, - "step": 79499 - }, - { - "epoch": 0.032998, - "loss_gen": 4.8113908767700195, - "loss_rtd": 0.35529083013534546, - "loss_sent": 0.22367742657661438, - "loss_sod": 0.059045784175395966, - "loss_total": 0.6380140781402588, - "step": 79499 - }, - { - "epoch": 0.033, - "grad_norm": 1.82859468460083, - "learning_rate": 7.234566649009373e-05, - "loss": 0.5985, - "step": 79500 - }, - { - "epoch": 0.033198, - "loss_gen": 4.804863452911377, - "loss_rtd": 0.3359534442424774, - "loss_sent": 0.05564633756875992, - "loss_sod": 0.12397048622369766, - "loss_total": 0.5155702829360962, - "step": 79599 - }, - { - "epoch": 0.033198, - "loss_gen": 4.417680263519287, - "loss_rtd": 0.3600488305091858, - "loss_sent": 0.21803125739097595, - "loss_sod": 0.05044897273182869, - "loss_total": 0.6285290718078613, - "step": 79599 - }, - { - "epoch": 0.0332, - "grad_norm": 0.7793765068054199, - "learning_rate": 7.23172741395555e-05, - "loss": 0.6023, - "step": 79600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.278229713439941, - "loss_rtd": 0.33663731813430786, - "loss_sent": 0.5610448122024536, - "loss_sod": 0.15033027529716492, - "loss_total": 1.048012375831604, - "step": 79699 - }, - { - "epoch": 0.033398, - "loss_gen": 4.6742753982543945, - "loss_rtd": 0.3588688373565674, - "loss_sent": 0.35958272218704224, - "loss_sod": 0.020474664866924286, - "loss_total": 0.7389262318611145, - "step": 79699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.4588605165481567, - "learning_rate": 7.228887279962192e-05, - "loss": 0.5974, - "step": 79700 - }, - { - "epoch": 0.033598, - "loss_gen": 4.4923930168151855, - "loss_rtd": 0.3381545841693878, - "loss_sent": 0.05760623887181282, - "loss_sod": 0.07205002009868622, - "loss_total": 0.46781083941459656, - "step": 79799 - }, - { - "epoch": 0.033598, - "loss_gen": 4.729586601257324, - "loss_rtd": 0.34935539960861206, - "loss_sent": 0.13376431167125702, - "loss_sod": 0.04556266963481903, - "loss_total": 0.5286824107170105, - "step": 79799 - }, - { - "epoch": 0.0336, - "grad_norm": 0.8367990255355835, - "learning_rate": 7.226046248173305e-05, - "loss": 0.5974, - "step": 79800 - }, - { - "epoch": 0.033798, - "loss_gen": 4.863954544067383, - "loss_rtd": 0.34549108147621155, - "loss_sent": 0.1215205267071724, - "loss_sod": 0.0367925763130188, - "loss_total": 0.5038042068481445, - "step": 79899 - }, - { - "epoch": 0.033798, - "loss_gen": 4.856892108917236, - "loss_rtd": 0.3340567350387573, - "loss_sent": 0.18412260711193085, - "loss_sod": 0.05267767608165741, - "loss_total": 0.570857048034668, - "step": 79899 - }, - { - "epoch": 0.0338, - "grad_norm": 0.9888171553611755, - "learning_rate": 7.223204319733255e-05, - "loss": 0.5983, - "step": 79900 - }, - { - "epoch": 0.033998, - "loss_gen": 4.183230400085449, - "loss_rtd": 0.3279927968978882, - "loss_sent": 0.09533942490816116, - "loss_sod": 0.07924136519432068, - "loss_total": 0.5025736093521118, - "step": 79999 - }, - { - "epoch": 0.033998, - "loss_gen": 4.862549304962158, - "loss_rtd": 0.3317645192146301, - "loss_sent": 0.26322609186172485, - "loss_sod": 0.04174702987074852, - "loss_total": 0.6367376446723938, - "step": 79999 - }, - { - "epoch": 0.034, - "grad_norm": 1.68937349319458, - "learning_rate": 7.22036149578677e-05, - "loss": 0.6177, - "step": 80000 - }, - { - "epoch": 0.034, - "eval_loss": 0.5796692967414856, - "eval_runtime": 151.847, - "eval_samples_per_second": 101.701, - "eval_steps_per_second": 0.797, - "step": 80000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.622843265533447, - "loss_rtd": 0.3517666459083557, - "loss_sent": 0.09793248027563095, - "loss_sod": 0.050344400107860565, - "loss_total": 0.500043511390686, - "step": 80099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.010815620422363, - "loss_rtd": 0.34234005212783813, - "loss_sent": 0.00033815408824011683, - "loss_sod": 0.21406474709510803, - "loss_total": 0.5567429661750793, - "step": 80099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.9491010308265686, - "learning_rate": 7.21751777747894e-05, - "loss": 0.5969, - "step": 80100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.592875957489014, - "loss_rtd": 0.33668312430381775, - "loss_sent": 0.10726157575845718, - "loss_sod": 0.06045020371675491, - "loss_total": 0.5043948888778687, - "step": 80199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.241170883178711, - "loss_rtd": 0.345996230840683, - "loss_sent": 0.17690055072307587, - "loss_sod": 0.1328912377357483, - "loss_total": 0.655788004398346, - "step": 80199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.9030731320381165, - "learning_rate": 7.214673165955214e-05, - "loss": 0.587, - "step": 80200 - }, - { - "epoch": 0.000598, - "loss_gen": 3.6014344692230225, - "loss_rtd": 0.3160237967967987, - "loss_sent": 8.825836994219571e-05, - "loss_sod": 0.1601894348859787, - "loss_total": 0.4763014614582062, - "step": 80299 - }, - { - "epoch": 0.000598, - "loss_gen": 4.452521800994873, - "loss_rtd": 0.32397112250328064, - "loss_sent": 0.1345147341489792, - "loss_sod": 0.05335891619324684, - "loss_total": 0.5118447542190552, - "step": 80299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.8657112121582031, - "learning_rate": 7.211827662361403e-05, - "loss": 0.6072, - "step": 80300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.304147720336914, - "loss_rtd": 0.3607083857059479, - "loss_sent": 0.2113885134458542, - "loss_sod": 0.004338310100138187, - "loss_total": 0.5764352083206177, - "step": 80399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.526896953582764, - "loss_rtd": 0.35893163084983826, - "loss_sent": 0.12096848338842392, - "loss_sod": 0.053405389189720154, - "loss_total": 0.5333054661750793, - "step": 80399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.575034737586975, - "learning_rate": 7.208981267843675e-05, - "loss": 0.6187, - "step": 80400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.012181282043457, - "loss_rtd": 0.3451538383960724, - "loss_sent": 0.45236727595329285, - "loss_sod": 0.01512182503938675, - "loss_total": 0.8126429319381714, - "step": 80499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.574728488922119, - "loss_rtd": 0.3141675889492035, - "loss_sent": 0.08709835261106491, - "loss_sod": 0.017966214567422867, - "loss_total": 0.41923215985298157, - "step": 80499 - }, - { - "epoch": 0.001, - "grad_norm": 0.944183886051178, - "learning_rate": 7.206133983548554e-05, - "loss": 0.5973, - "step": 80500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.878512859344482, - "loss_rtd": 0.33649691939353943, - "loss_sent": 0.47494471073150635, - "loss_sod": 0.07619231939315796, - "loss_total": 0.8876339197158813, - "step": 80599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.036816596984863, - "loss_rtd": 0.34828975796699524, - "loss_sent": 0.18520109355449677, - "loss_sod": 0.06617250293493271, - "loss_total": 0.5996633768081665, - "step": 80599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.1195414066314697, - "learning_rate": 7.203285810622929e-05, - "loss": 0.5913, - "step": 80600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.697576999664307, - "loss_rtd": 0.32750260829925537, - "loss_sent": 0.19536228477954865, - "loss_sod": 0.014839423820376396, - "loss_total": 0.5377042889595032, - "step": 80699 - }, - { - "epoch": 0.001398, - "loss_gen": 4.68838357925415, - "loss_rtd": 0.35094472765922546, - "loss_sent": 0.10224013775587082, - "loss_sod": 0.006008798256516457, - "loss_total": 0.4591936767101288, - "step": 80699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.7635841965675354, - "learning_rate": 7.200436750214044e-05, - "loss": 0.6122, - "step": 80700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.490006446838379, - "loss_rtd": 0.3497379422187805, - "loss_sent": 0.10162508487701416, - "loss_sod": 0.024359572678804398, - "loss_total": 0.47572261095046997, - "step": 80799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.756316184997559, - "loss_rtd": 0.32693690061569214, - "loss_sent": 0.08548082411289215, - "loss_sod": 0.06649260222911835, - "loss_total": 0.47891032695770264, - "step": 80799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.5970702171325684, - "learning_rate": 7.197586803469499e-05, - "loss": 0.5948, - "step": 80800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.97715950012207, - "loss_rtd": 0.3462950885295868, - "loss_sent": 0.0746036022901535, - "loss_sod": 0.04795315861701965, - "loss_total": 0.46885186433792114, - "step": 80899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.787614345550537, - "loss_rtd": 0.34894487261772156, - "loss_sent": 0.1466822624206543, - "loss_sod": 0.07616492360830307, - "loss_total": 0.5717920660972595, - "step": 80899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.4280049800872803, - "learning_rate": 7.194735971537251e-05, - "loss": 0.6022, - "step": 80900 - }, - { - "epoch": 0.001998, - "loss_gen": 3.9288675785064697, - "loss_rtd": 0.3252773880958557, - "loss_sent": 7.597176590934396e-05, - "loss_sod": 0.11348104476928711, - "loss_total": 0.43883439898490906, - "step": 80999 - }, - { - "epoch": 0.001998, - "loss_gen": 3.8037943840026855, - "loss_rtd": 0.3277899920940399, - "loss_sent": 0.002315351739525795, - "loss_sod": 0.2413269728422165, - "loss_total": 0.5714322924613953, - "step": 80999 - }, - { - "epoch": 0.002, - "grad_norm": 0.8177794814109802, - "learning_rate": 7.191884255565617e-05, - "loss": 0.6041, - "step": 81000 - }, - { - "epoch": 0.002, - "eval_loss": 0.5703736543655396, - "eval_runtime": 154.5666, - "eval_samples_per_second": 99.912, - "eval_steps_per_second": 0.783, - "step": 81000 - }, - { - "epoch": 0.002198, - "loss_gen": 4.518189430236816, - "loss_rtd": 0.321702241897583, - "loss_sent": 0.14045092463493347, - "loss_sod": 0.06171754375100136, - "loss_total": 0.5238707065582275, - "step": 81099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.521371364593506, - "loss_rtd": 0.33750367164611816, - "loss_sent": 0.2126319408416748, - "loss_sod": 0.008465304970741272, - "loss_total": 0.558600902557373, - "step": 81099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.046858787536621, - "learning_rate": 7.189031656703267e-05, - "loss": 0.5956, - "step": 81100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.222639560699463, - "loss_rtd": 0.3334866166114807, - "loss_sent": 0.28419044613838196, - "loss_sod": 0.10069449990987778, - "loss_total": 0.718371570110321, - "step": 81199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.741415023803711, - "loss_rtd": 0.35411888360977173, - "loss_sent": 0.32628849148750305, - "loss_sod": 0.10670779645442963, - "loss_total": 0.787115216255188, - "step": 81199 - }, - { - "epoch": 0.0024, - "grad_norm": 2.5012848377227783, - "learning_rate": 7.186178176099227e-05, - "loss": 0.5877, - "step": 81200 - }, - { - "epoch": 0.002598, - "loss_gen": 3.8994622230529785, - "loss_rtd": 0.3342326581478119, - "loss_sent": 0.00015677422925364226, - "loss_sod": 0.26861628890037537, - "loss_total": 0.6030057072639465, - "step": 81299 - }, - { - "epoch": 0.002598, - "loss_gen": 3.7668027877807617, - "loss_rtd": 0.3269832134246826, - "loss_sent": 0.00885203666985035, - "loss_sod": 0.2654297947883606, - "loss_total": 0.6012650728225708, - "step": 81299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.464532732963562, - "learning_rate": 7.183323814902879e-05, - "loss": 0.6232, - "step": 81300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.685091018676758, - "loss_rtd": 0.3290248215198517, - "loss_sent": 0.1316319704055786, - "loss_sod": 0.18161681294441223, - "loss_total": 0.6422736048698425, - "step": 81399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.897707939147949, - "loss_rtd": 0.3307396471500397, - "loss_sent": 0.07971008867025375, - "loss_sod": 0.18826325237751007, - "loss_total": 0.5987129807472229, - "step": 81399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.3436256647109985, - "learning_rate": 7.18046857426396e-05, - "loss": 0.6005, - "step": 81400 - }, - { - "epoch": 0.002998, - "loss_gen": 4.494446754455566, - "loss_rtd": 0.3431621789932251, - "loss_sent": 0.09104984998703003, - "loss_sod": 0.04677867144346237, - "loss_total": 0.4809907078742981, - "step": 81499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.569034099578857, - "loss_rtd": 0.3323788344860077, - "loss_sent": 0.15404106676578522, - "loss_sod": 0.01700172759592533, - "loss_total": 0.5034216046333313, - "step": 81499 - }, - { - "epoch": 0.003, - "grad_norm": 0.7456111311912537, - "learning_rate": 7.17761245533256e-05, - "loss": 0.5935, - "step": 81500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.801833629608154, - "loss_rtd": 0.33733636140823364, - "loss_sent": 0.2018003612756729, - "loss_sod": 0.014958927407860756, - "loss_total": 0.5540956258773804, - "step": 81599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.605984687805176, - "loss_rtd": 0.32954123616218567, - "loss_sent": 0.32172343134880066, - "loss_sod": 0.027626996859908104, - "loss_total": 0.678891658782959, - "step": 81599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9144284129142761, - "learning_rate": 7.174755459259124e-05, - "loss": 0.5976, - "step": 81600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.538783550262451, - "loss_rtd": 0.3379725515842438, - "loss_sent": 0.26620247960090637, - "loss_sod": 0.06738986819982529, - "loss_total": 0.6715649366378784, - "step": 81699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.728128910064697, - "loss_rtd": 0.376240074634552, - "loss_sent": 0.1057659238576889, - "loss_sod": 0.12661533057689667, - "loss_total": 0.60862135887146, - "step": 81699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.5157244205474854, - "learning_rate": 7.171897587194448e-05, - "loss": 0.5935, - "step": 81700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.843796730041504, - "loss_rtd": 0.35366329550743103, - "loss_sent": 0.06068854779005051, - "loss_sod": 0.15954655408859253, - "loss_total": 0.5738983750343323, - "step": 81799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.083298683166504, - "loss_rtd": 0.3443755805492401, - "loss_sent": 0.2293194681406021, - "loss_sod": 0.223373681306839, - "loss_total": 0.79706871509552, - "step": 81799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.1671299934387207, - "learning_rate": 7.169038840289684e-05, - "loss": 0.6109, - "step": 81800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.942607402801514, - "loss_rtd": 0.3444528877735138, - "loss_sent": 0.08512663841247559, - "loss_sod": 0.06856218725442886, - "loss_total": 0.49814170598983765, - "step": 81899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.8817949295043945, - "loss_rtd": 0.3282775282859802, - "loss_sent": 0.3587074279785156, - "loss_sod": 0.012187833897769451, - "loss_total": 0.6991727948188782, - "step": 81899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.4610754251480103, - "learning_rate": 7.166179219696335e-05, - "loss": 0.5909, - "step": 81900 - }, - { - "epoch": 0.003998, - "loss_gen": 4.809683799743652, - "loss_rtd": 0.34137725830078125, - "loss_sent": 0.22331377863883972, - "loss_sod": 0.033756665885448456, - "loss_total": 0.5984476804733276, - "step": 81999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.722792148590088, - "loss_rtd": 0.3351828455924988, - "loss_sent": 0.2343035638332367, - "loss_sod": 0.020440496504306793, - "loss_total": 0.5899268984794617, - "step": 81999 - }, - { - "epoch": 0.004, - "grad_norm": 1.3949823379516602, - "learning_rate": 7.163318726566255e-05, - "loss": 0.5998, - "step": 82000 - }, - { - "epoch": 0.004, - "eval_loss": 0.5795272588729858, - "eval_runtime": 151.858, - "eval_samples_per_second": 101.694, - "eval_steps_per_second": 0.797, - "step": 82000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.556379795074463, - "loss_rtd": 0.33904021978378296, - "loss_sent": 0.20144030451774597, - "loss_sod": 0.004236500710248947, - "loss_total": 0.544717013835907, - "step": 82099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.782190322875977, - "loss_rtd": 0.34505394101142883, - "loss_sent": 0.41519373655319214, - "loss_sod": 0.18751248717308044, - "loss_total": 0.9477601647377014, - "step": 82099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.381917119026184, - "learning_rate": 7.16045736205165e-05, - "loss": 0.603, - "step": 82100 - }, - { - "epoch": 0.004398, - "loss_gen": 4.838950157165527, - "loss_rtd": 0.32782894372940063, - "loss_sent": 0.11037642508745193, - "loss_sod": 0.015319553203880787, - "loss_total": 0.4535249173641205, - "step": 82199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.70442008972168, - "loss_rtd": 0.3487652838230133, - "loss_sent": 0.11023913323879242, - "loss_sod": 0.19873446226119995, - "loss_total": 0.6577389240264893, - "step": 82199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.1894351243972778, - "learning_rate": 7.157595127305079e-05, - "loss": 0.6084, - "step": 82200 - }, - { - "epoch": 0.004598, - "loss_gen": 4.37827205657959, - "loss_rtd": 0.35872745513916016, - "loss_sent": 0.04632806405425072, - "loss_sod": 0.07619193196296692, - "loss_total": 0.4812474548816681, - "step": 82299 - }, - { - "epoch": 0.004598, - "loss_gen": 3.9130702018737793, - "loss_rtd": 0.32239601016044617, - "loss_sent": 0.007580731529742479, - "loss_sod": 0.10175400972366333, - "loss_total": 0.4317307472229004, - "step": 82299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.706117570400238, - "learning_rate": 7.154732023479448e-05, - "loss": 0.5986, - "step": 82300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.160461902618408, - "loss_rtd": 0.3397037088871002, - "loss_sent": 8.609052747488022e-05, - "loss_sod": 0.2912858724594116, - "loss_total": 0.6310756206512451, - "step": 82399 - }, - { - "epoch": 0.004798, - "loss_gen": 3.7689208984375, - "loss_rtd": 0.3256233036518097, - "loss_sent": 0.003863121848553419, - "loss_sod": 0.1561591774225235, - "loss_total": 0.4856456220149994, - "step": 82399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.3118590116500854, - "learning_rate": 7.151868051728017e-05, - "loss": 0.581, - "step": 82400 - }, - { - "epoch": 0.004998, - "loss_gen": 4.68132209777832, - "loss_rtd": 0.323245644569397, - "loss_sent": 0.057707998901605606, - "loss_sod": 0.1399214267730713, - "loss_total": 0.520875096321106, - "step": 82499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.498316287994385, - "loss_rtd": 0.3447036147117615, - "loss_sent": 0.054062191396951675, - "loss_sod": 0.08559941500425339, - "loss_total": 0.48436522483825684, - "step": 82499 - }, - { - "epoch": 0.005, - "grad_norm": 0.7210830450057983, - "learning_rate": 7.14900321320439e-05, - "loss": 0.5916, - "step": 82500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.19460391998291, - "loss_rtd": 0.3308198153972626, - "loss_sent": 0.2415839582681656, - "loss_sod": 0.07629989087581635, - "loss_total": 0.6487036347389221, - "step": 82599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.732879638671875, - "loss_rtd": 0.3315872251987457, - "loss_sent": 0.022963855415582657, - "loss_sod": 0.11855947226285934, - "loss_total": 0.473110556602478, - "step": 82599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.1002004146575928, - "learning_rate": 7.146137509062527e-05, - "loss": 0.5798, - "step": 82600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.847637176513672, - "loss_rtd": 0.32974302768707275, - "loss_sent": 0.39561349153518677, - "loss_sod": 0.020584065467119217, - "loss_total": 0.7459405660629272, - "step": 82699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.003781795501709, - "loss_rtd": 0.35805708169937134, - "loss_sent": 0.12605199217796326, - "loss_sod": 0.04816053807735443, - "loss_total": 0.5322695970535278, - "step": 82699 - }, - { - "epoch": 0.0054, - "grad_norm": 2.112302303314209, - "learning_rate": 7.14327094045673e-05, - "loss": 0.5841, - "step": 82700 - }, - { - "epoch": 0.005598, - "loss_gen": 4.327634811401367, - "loss_rtd": 0.351259708404541, - "loss_sent": 0.1786668449640274, - "loss_sod": 0.07430240511894226, - "loss_total": 0.6042289733886719, - "step": 82799 - }, - { - "epoch": 0.005598, - "loss_gen": 4.847617149353027, - "loss_rtd": 0.33705267310142517, - "loss_sent": 0.10496676713228226, - "loss_sod": 0.042972419410943985, - "loss_total": 0.4849918484687805, - "step": 82799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.8327969312667847, - "learning_rate": 7.140403508541658e-05, - "loss": 0.6011, - "step": 82800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.828712463378906, - "loss_rtd": 0.3448171615600586, - "loss_sent": 0.13701654970645905, - "loss_sod": 0.10765130072832108, - "loss_total": 0.5894849896430969, - "step": 82899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.773641109466553, - "loss_rtd": 0.328213632106781, - "loss_sent": 0.3089849352836609, - "loss_sod": 0.1221984475851059, - "loss_total": 0.759397029876709, - "step": 82899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.8959248661994934, - "learning_rate": 7.137535214472306e-05, - "loss": 0.5852, - "step": 82900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.394990921020508, - "loss_rtd": 0.3399747610092163, - "loss_sent": 0.15843546390533447, - "loss_sod": 0.08604242652654648, - "loss_total": 0.5844526290893555, - "step": 82999 - }, - { - "epoch": 0.005998, - "loss_gen": 4.741580963134766, - "loss_rtd": 0.3336928188800812, - "loss_sent": 0.17245937883853912, - "loss_sod": 0.02594193071126938, - "loss_total": 0.5320941209793091, - "step": 82999 - }, - { - "epoch": 0.006, - "grad_norm": 1.5453912019729614, - "learning_rate": 7.134666059404028e-05, - "loss": 0.5932, - "step": 83000 - }, - { - "epoch": 0.006, - "eval_loss": 0.576248288154602, - "eval_runtime": 152.0388, - "eval_samples_per_second": 101.573, - "eval_steps_per_second": 0.796, - "step": 83000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.927525043487549, - "loss_rtd": 0.3208247721195221, - "loss_sent": 0.1647900640964508, - "loss_sod": 0.10025301575660706, - "loss_total": 0.5858678817749023, - "step": 83099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.932606220245361, - "loss_rtd": 0.3480580449104309, - "loss_sent": 0.2994418442249298, - "loss_sod": 0.0884096771478653, - "loss_total": 0.7359095811843872, - "step": 83099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.814121663570404, - "learning_rate": 7.131796044492514e-05, - "loss": 0.6067, - "step": 83100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.173018932342529, - "loss_rtd": 0.3376966714859009, - "loss_sent": 0.056200314313173294, - "loss_sod": 0.09993436187505722, - "loss_total": 0.4938313663005829, - "step": 83199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.874149799346924, - "loss_rtd": 0.35732176899909973, - "loss_sent": 0.04529409483075142, - "loss_sod": 0.04716810956597328, - "loss_total": 0.44978398084640503, - "step": 83199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.7071604132652283, - "learning_rate": 7.128925170893809e-05, - "loss": 0.6105, - "step": 83200 - }, - { - "epoch": 0.006598, - "loss_gen": 4.64926290512085, - "loss_rtd": 0.3289829194545746, - "loss_sent": 0.1293676495552063, - "loss_sod": 0.07462191581726074, - "loss_total": 0.5329724550247192, - "step": 83299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.161506175994873, - "loss_rtd": 0.32510906457901, - "loss_sent": 0.09937126934528351, - "loss_sod": 0.16410039365291595, - "loss_total": 0.5885807275772095, - "step": 83299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.2556997537612915, - "learning_rate": 7.126053439764299e-05, - "loss": 0.5866, - "step": 83300 - }, - { - "epoch": 0.006798, - "loss_gen": 4.905492305755615, - "loss_rtd": 0.34571221470832825, - "loss_sent": 0.41267988085746765, - "loss_sod": 0.027991173788905144, - "loss_total": 0.7863832712173462, - "step": 83399 - }, - { - "epoch": 0.006798, - "loss_gen": 4.741423606872559, - "loss_rtd": 0.3393404185771942, - "loss_sent": 0.0038078154902905226, - "loss_sod": 0.28449833393096924, - "loss_total": 0.6276466250419617, - "step": 83399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.1530828475952148, - "learning_rate": 7.123180852260718e-05, - "loss": 0.59, - "step": 83400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.59058141708374, - "loss_rtd": 0.33513790369033813, - "loss_sent": 0.0916479155421257, - "loss_sod": 0.09420442581176758, - "loss_total": 0.520990252494812, - "step": 83499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.141010761260986, - "loss_rtd": 0.32602787017822266, - "loss_sent": 0.0297227930277586, - "loss_sod": 0.17539286613464355, - "loss_total": 0.5311435461044312, - "step": 83499 - }, - { - "epoch": 0.007, - "grad_norm": 1.0477850437164307, - "learning_rate": 7.120307409540146e-05, - "loss": 0.5962, - "step": 83500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.906367778778076, - "loss_rtd": 0.32614704966545105, - "loss_sent": 0.14142878353595734, - "loss_sod": 0.06066673994064331, - "loss_total": 0.5282425880432129, - "step": 83599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.8122663497924805, - "loss_rtd": 0.343631386756897, - "loss_sent": 0.15950128436088562, - "loss_sod": 0.03024749830365181, - "loss_total": 0.5333801507949829, - "step": 83599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.989540159702301, - "learning_rate": 7.11743311276e-05, - "loss": 0.5914, - "step": 83600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.893677234649658, - "loss_rtd": 0.3473113477230072, - "loss_sent": 0.41388821601867676, - "loss_sod": 0.06965136528015137, - "loss_total": 0.8308509588241577, - "step": 83699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.037633895874023, - "loss_rtd": 0.34461310505867004, - "loss_sent": 0.061890531331300735, - "loss_sod": 0.08433335274457932, - "loss_total": 0.4908370077610016, - "step": 83699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.752944827079773, - "learning_rate": 7.11455796307805e-05, - "loss": 0.5874, - "step": 83700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.618000030517578, - "loss_rtd": 0.34183815121650696, - "loss_sent": 0.06785891950130463, - "loss_sod": 0.03274420648813248, - "loss_total": 0.44244128465652466, - "step": 83799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.727540969848633, - "loss_rtd": 0.358920156955719, - "loss_sent": 0.17538397014141083, - "loss_sod": 0.1261889487504959, - "loss_total": 0.6604930758476257, - "step": 83799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.753732442855835, - "learning_rate": 7.111681961652405e-05, - "loss": 0.5912, - "step": 83800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.761882305145264, - "loss_rtd": 0.34194257855415344, - "loss_sent": 0.14010225236415863, - "loss_sod": 0.03127816691994667, - "loss_total": 0.5133230090141296, - "step": 83899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.765944480895996, - "loss_rtd": 0.3241620361804962, - "loss_sent": 0.5859805345535278, - "loss_sod": 0.06969790160655975, - "loss_total": 0.9798404574394226, - "step": 83899 - }, - { - "epoch": 0.0078, - "grad_norm": 3.3188042640686035, - "learning_rate": 7.10880510964152e-05, - "loss": 0.5866, - "step": 83900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.736507892608643, - "loss_rtd": 0.3393866717815399, - "loss_sent": 0.17300938069820404, - "loss_sod": 0.1425882875919342, - "loss_total": 0.6549843549728394, - "step": 83999 - }, - { - "epoch": 0.007998, - "loss_gen": 4.610940456390381, - "loss_rtd": 0.35401055216789246, - "loss_sent": 0.04571661725640297, - "loss_sod": 0.013207420706748962, - "loss_total": 0.4129345715045929, - "step": 83999 - }, - { - "epoch": 0.008, - "grad_norm": 1.0809509754180908, - "learning_rate": 7.105927408204189e-05, - "loss": 0.5765, - "step": 84000 - }, - { - "epoch": 0.008, - "eval_loss": 0.5772362947463989, - "eval_runtime": 153.3887, - "eval_samples_per_second": 100.679, - "eval_steps_per_second": 0.789, - "step": 84000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.888073921203613, - "loss_rtd": 0.35439684987068176, - "loss_sent": 0.328273206949234, - "loss_sod": 0.07193342596292496, - "loss_total": 0.7546035051345825, - "step": 84099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.5528645515441895, - "loss_rtd": 0.3527440130710602, - "loss_sent": 0.19996052980422974, - "loss_sod": 0.0916566327214241, - "loss_total": 0.644361138343811, - "step": 84099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.13083815574646, - "learning_rate": 7.103048858499549e-05, - "loss": 0.5911, - "step": 84100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.745001316070557, - "loss_rtd": 0.3726560175418854, - "loss_sent": 0.14635957777500153, - "loss_sod": 0.0025487099774181843, - "loss_total": 0.5215643048286438, - "step": 84199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.553593158721924, - "loss_rtd": 0.33759889006614685, - "loss_sent": 0.17711028456687927, - "loss_sod": 0.13150401413440704, - "loss_total": 0.646213173866272, - "step": 84199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.7179540991783142, - "learning_rate": 7.100169461687081e-05, - "loss": 0.6069, - "step": 84200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.926650047302246, - "loss_rtd": 0.32800349593162537, - "loss_sent": 0.18810003995895386, - "loss_sod": 0.034881964325904846, - "loss_total": 0.5509855151176453, - "step": 84299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.353402614593506, - "loss_rtd": 0.3569122850894928, - "loss_sent": 0.017280040308833122, - "loss_sod": 0.064621701836586, - "loss_total": 0.43881404399871826, - "step": 84299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.742855429649353, - "learning_rate": 7.097289218926604e-05, - "loss": 0.5819, - "step": 84300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.87427282333374, - "loss_rtd": 0.3532108962535858, - "loss_sent": 0.10535918176174164, - "loss_sod": 0.06047770380973816, - "loss_total": 0.5190477967262268, - "step": 84399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.954662799835205, - "loss_rtd": 0.33628085255622864, - "loss_sent": 0.22558976709842682, - "loss_sod": 0.07964995503425598, - "loss_total": 0.6415205597877502, - "step": 84399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.9289606213569641, - "learning_rate": 7.09440813137828e-05, - "loss": 0.6014, - "step": 84400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.7491679191589355, - "loss_rtd": 0.3386727571487427, - "loss_sent": 0.1393914520740509, - "loss_sod": 0.07014788687229156, - "loss_total": 0.5482120513916016, - "step": 84499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.837072849273682, - "loss_rtd": 0.32627806067466736, - "loss_sent": 0.26598304510116577, - "loss_sod": 0.1486058086156845, - "loss_total": 0.7408668994903564, - "step": 84499 - }, - { - "epoch": 0.009, - "grad_norm": 0.9888080954551697, - "learning_rate": 7.091526200202612e-05, - "loss": 0.6071, - "step": 84500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.859007358551025, - "loss_rtd": 0.32816991209983826, - "loss_sent": 0.22488537430763245, - "loss_sod": 0.04852532222867012, - "loss_total": 0.6015806198120117, - "step": 84599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.959181785583496, - "loss_rtd": 0.34092968702316284, - "loss_sent": 0.17731061577796936, - "loss_sod": 0.17065644264221191, - "loss_total": 0.6888967752456665, - "step": 84599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.6574453711509705, - "learning_rate": 7.08864342656044e-05, - "loss": 0.5941, - "step": 84600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.744875907897949, - "loss_rtd": 0.3381560146808624, - "loss_sent": 0.23297180235385895, - "loss_sod": 0.11416157335042953, - "loss_total": 0.6852893829345703, - "step": 84699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.035859107971191, - "loss_rtd": 0.34597527980804443, - "loss_sent": 0.450090229511261, - "loss_sod": 0.03151167184114456, - "loss_total": 0.8275771737098694, - "step": 84699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.1587839126586914, - "learning_rate": 7.085759811612946e-05, - "loss": 0.5985, - "step": 84700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.791224479675293, - "loss_rtd": 0.3161582946777344, - "loss_sent": 0.226994127035141, - "loss_sod": 0.034145288169384, - "loss_total": 0.5772976875305176, - "step": 84799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.157752513885498, - "loss_rtd": 0.3282153010368347, - "loss_sent": 0.11908372491598129, - "loss_sod": 0.17988857626914978, - "loss_total": 0.6271875500679016, - "step": 84799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.2291240692138672, - "learning_rate": 7.08287535652165e-05, - "loss": 0.5951, - "step": 84800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.31527853012085, - "loss_rtd": 0.3151688873767853, - "loss_sent": 0.11810876429080963, - "loss_sod": 0.05422281101346016, - "loss_total": 0.48750048875808716, - "step": 84899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.165080547332764, - "loss_rtd": 0.3413681983947754, - "loss_sent": 0.1543727070093155, - "loss_sod": 0.046299442648887634, - "loss_total": 0.5420403480529785, - "step": 84899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.8090561628341675, - "learning_rate": 7.07999006244841e-05, - "loss": 0.592, - "step": 84900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.7347002029418945, - "loss_rtd": 0.3390168845653534, - "loss_sent": 0.49473872780799866, - "loss_sod": 0.03233742341399193, - "loss_total": 0.8660930395126343, - "step": 84999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.499658107757568, - "loss_rtd": 0.3307003378868103, - "loss_sent": 0.32836344838142395, - "loss_sod": 0.04098241776227951, - "loss_total": 0.700046181678772, - "step": 84999 - }, - { - "epoch": 0.01, - "grad_norm": 2.6637675762176514, - "learning_rate": 7.077103930555419e-05, - "loss": 0.6071, - "step": 85000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5636082887649536, - "eval_runtime": 151.5121, - "eval_samples_per_second": 101.926, - "eval_steps_per_second": 0.799, - "step": 85000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.155033111572266, - "loss_rtd": 0.3279043436050415, - "loss_sent": 0.10387200862169266, - "loss_sod": 0.08239713311195374, - "loss_total": 0.5141735076904297, - "step": 85099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.681345462799072, - "loss_rtd": 0.3611370623111725, - "loss_sent": 0.11581390351057053, - "loss_sod": 0.07930219173431396, - "loss_total": 0.5562531352043152, - "step": 85099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8770554661750793, - "learning_rate": 7.074216962005216e-05, - "loss": 0.5828, - "step": 85100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.5408711433410645, - "loss_rtd": 0.3362995982170105, - "loss_sent": 0.29717129468917847, - "loss_sod": 0.0074509442783892155, - "loss_total": 0.6409218311309814, - "step": 85199 - }, - { - "epoch": 0.010398, - "loss_gen": 4.74578332901001, - "loss_rtd": 0.3223089277744293, - "loss_sent": 0.17509596049785614, - "loss_sod": 0.0695834830403328, - "loss_total": 0.5669883489608765, - "step": 85199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.3575395345687866, - "learning_rate": 7.071329157960665e-05, - "loss": 0.593, - "step": 85200 - }, - { - "epoch": 0.010598, - "loss_gen": 4.741410732269287, - "loss_rtd": 0.31915760040283203, - "loss_sent": 0.5397876501083374, - "loss_sod": 0.010888807475566864, - "loss_total": 0.8698340654373169, - "step": 85299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.687405109405518, - "loss_rtd": 0.333869069814682, - "loss_sent": 0.5599907040596008, - "loss_sod": 0.064788818359375, - "loss_total": 0.9586485624313354, - "step": 85299 - }, - { - "epoch": 0.0106, - "grad_norm": 4.997759819030762, - "learning_rate": 7.06844051958498e-05, - "loss": 0.5913, - "step": 85300 - }, - { - "epoch": 0.010798, - "loss_gen": 4.9768595695495605, - "loss_rtd": 0.3349880576133728, - "loss_sent": 0.125346377491951, - "loss_sod": 0.10623002797365189, - "loss_total": 0.5665644407272339, - "step": 85399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.237880229949951, - "loss_rtd": 0.3359336256980896, - "loss_sent": 0.010364835150539875, - "loss_sod": 0.11379797756671906, - "loss_total": 0.46009641885757446, - "step": 85399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.265926718711853, - "learning_rate": 7.0655510480417e-05, - "loss": 0.5838, - "step": 85400 - }, - { - "epoch": 0.010998, - "loss_gen": 3.779320478439331, - "loss_rtd": 0.3096616566181183, - "loss_sent": 0.057744111865758896, - "loss_sod": 0.09589076787233353, - "loss_total": 0.4632965326309204, - "step": 85499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.641038417816162, - "loss_rtd": 0.330924391746521, - "loss_sent": 0.09023583680391312, - "loss_sod": 0.02348215878009796, - "loss_total": 0.4446423649787903, - "step": 85499 - }, - { - "epoch": 0.011, - "grad_norm": 0.806560754776001, - "learning_rate": 7.062660744494706e-05, - "loss": 0.5787, - "step": 85500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.924380779266357, - "loss_rtd": 0.33277904987335205, - "loss_sent": 0.15705890953540802, - "loss_sod": 0.13256730139255524, - "loss_total": 0.6224052309989929, - "step": 85599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.707538604736328, - "loss_rtd": 0.32195743918418884, - "loss_sent": 0.09301640093326569, - "loss_sod": 0.07604361325502396, - "loss_total": 0.4910174608230591, - "step": 85599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9301294684410095, - "learning_rate": 7.05976961010821e-05, - "loss": 0.6062, - "step": 85600 - }, - { - "epoch": 0.011398, - "loss_gen": 4.808781147003174, - "loss_rtd": 0.3219926059246063, - "loss_sent": 0.12298624962568283, - "loss_sod": 0.0509757325053215, - "loss_total": 0.49595460295677185, - "step": 85699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.390843391418457, - "loss_rtd": 0.33405792713165283, - "loss_sent": 0.026021022349596024, - "loss_sod": 0.11794449388980865, - "loss_total": 0.4780234396457672, - "step": 85699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.7896721959114075, - "learning_rate": 7.056877646046761e-05, - "loss": 0.5888, - "step": 85700 - }, - { - "epoch": 0.011598, - "loss_gen": 4.867891788482666, - "loss_rtd": 0.3365158438682556, - "loss_sent": 0.5656376481056213, - "loss_sod": 0.03345107287168503, - "loss_total": 0.9356045722961426, - "step": 85799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.885983943939209, - "loss_rtd": 0.3423565924167633, - "loss_sent": 0.2113242745399475, - "loss_sod": 0.024075839668512344, - "loss_total": 0.5777567028999329, - "step": 85799 - }, - { - "epoch": 0.0116, - "grad_norm": 2.0509936809539795, - "learning_rate": 7.053984853475244e-05, - "loss": 0.5864, - "step": 85800 - }, - { - "epoch": 0.011798, - "loss_gen": 4.7428717613220215, - "loss_rtd": 0.3404030501842499, - "loss_sent": 0.17430885136127472, - "loss_sod": 0.010180543176829815, - "loss_total": 0.5248924493789673, - "step": 85899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.872359275817871, - "loss_rtd": 0.3368551433086395, - "loss_sent": 0.3656493127346039, - "loss_sod": 0.05703877657651901, - "loss_total": 0.759543240070343, - "step": 85899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.3592721223831177, - "learning_rate": 7.051091233558871e-05, - "loss": 0.5825, - "step": 85900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.666917324066162, - "loss_rtd": 0.33322930335998535, - "loss_sent": 0.19591303169727325, - "loss_sod": 0.09397896379232407, - "loss_total": 0.6231213212013245, - "step": 85999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.712637424468994, - "loss_rtd": 0.3511534035205841, - "loss_sent": 0.20152921974658966, - "loss_sod": 0.060484375804662704, - "loss_total": 0.6131669878959656, - "step": 85999 - }, - { - "epoch": 0.012, - "grad_norm": 1.0833466053009033, - "learning_rate": 7.048196787463195e-05, - "loss": 0.5989, - "step": 86000 - }, - { - "epoch": 0.012, - "eval_loss": 0.5701783299446106, - "eval_runtime": 152.5195, - "eval_samples_per_second": 101.253, - "eval_steps_per_second": 0.793, - "step": 86000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.020610332489014, - "loss_rtd": 0.3479492664337158, - "loss_sent": 0.12172229588031769, - "loss_sod": 0.0221400186419487, - "loss_total": 0.4918115735054016, - "step": 86099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.548126697540283, - "loss_rtd": 0.33291855454444885, - "loss_sent": 0.17961755394935608, - "loss_sod": 0.11401254683732986, - "loss_total": 0.6265486478805542, - "step": 86099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.7376876473426819, - "learning_rate": 7.0453015163541e-05, - "loss": 0.5823, - "step": 86100 - }, - { - "epoch": 0.012398, - "loss_gen": 4.535302639007568, - "loss_rtd": 0.34424495697021484, - "loss_sent": 0.2351195067167282, - "loss_sod": 0.2106015384197235, - "loss_total": 0.7899660468101501, - "step": 86199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.479808807373047, - "loss_rtd": 0.34633177518844604, - "loss_sent": 0.04221196100115776, - "loss_sod": 0.11639726161956787, - "loss_total": 0.5049409866333008, - "step": 86199 - }, - { - "epoch": 0.0124, - "grad_norm": 0.8945944905281067, - "learning_rate": 7.042405421397798e-05, - "loss": 0.5869, - "step": 86200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.183826446533203, - "loss_rtd": 0.3244444727897644, - "loss_sent": 0.013029288500547409, - "loss_sod": 0.19251969456672668, - "loss_total": 0.52999347448349, - "step": 86299 - }, - { - "epoch": 0.012598, - "loss_gen": 3.937880039215088, - "loss_rtd": 0.3229083716869354, - "loss_sent": 3.980579276685603e-05, - "loss_sod": 0.24052423238754272, - "loss_total": 0.5634723901748657, - "step": 86299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.1283100843429565, - "learning_rate": 7.039508503760835e-05, - "loss": 0.5928, - "step": 86300 - }, - { - "epoch": 0.012798, - "loss_gen": 3.9103431701660156, - "loss_rtd": 0.31903406977653503, - "loss_sent": 5.407594653661363e-05, - "loss_sod": 0.2617771327495575, - "loss_total": 0.5808652639389038, - "step": 86399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.453827381134033, - "loss_rtd": 0.31779396533966064, - "loss_sent": 0.21239037811756134, - "loss_sod": 0.017334502190351486, - "loss_total": 0.5475188493728638, - "step": 86399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.8633012175559998, - "learning_rate": 7.03661076461009e-05, - "loss": 0.5941, - "step": 86400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.856034278869629, - "loss_rtd": 0.3515087366104126, - "loss_sent": 0.40019503235816956, - "loss_sod": 0.08624620735645294, - "loss_total": 0.8379499912261963, - "step": 86499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.10723352432251, - "loss_rtd": 0.3264789283275604, - "loss_sent": 0.01756051741540432, - "loss_sod": 0.11352347582578659, - "loss_total": 0.4575629234313965, - "step": 86499 - }, - { - "epoch": 0.013, - "grad_norm": 1.4365172386169434, - "learning_rate": 7.033712205112775e-05, - "loss": 0.6133, - "step": 86500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.694050312042236, - "loss_rtd": 0.3362416625022888, - "loss_sent": 0.20183268189430237, - "loss_sod": 0.09752219915390015, - "loss_total": 0.635596513748169, - "step": 86599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.243170261383057, - "loss_rtd": 0.3346792161464691, - "loss_sent": 0.061446767300367355, - "loss_sod": 0.08382999897003174, - "loss_total": 0.4799559712409973, - "step": 86599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.9417069554328918, - "learning_rate": 7.030812826436426e-05, - "loss": 0.5752, - "step": 86600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.410436153411865, - "loss_rtd": 0.335841566324234, - "loss_sent": 0.21186509728431702, - "loss_sod": 0.06897386908531189, - "loss_total": 0.6166805028915405, - "step": 86699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.674890518188477, - "loss_rtd": 0.348044753074646, - "loss_sent": 0.129514679312706, - "loss_sod": 0.033202577382326126, - "loss_total": 0.5107620358467102, - "step": 86699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.644648790359497, - "learning_rate": 7.027912629748913e-05, - "loss": 0.5863, - "step": 86700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.710915565490723, - "loss_rtd": 0.3374066650867462, - "loss_sent": 0.23154044151306152, - "loss_sod": 0.01586066000163555, - "loss_total": 0.5848077535629272, - "step": 86799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.84998083114624, - "loss_rtd": 0.3377188742160797, - "loss_sent": 0.1488795429468155, - "loss_sod": 0.05167201906442642, - "loss_total": 0.5382704138755798, - "step": 86799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.8954354524612427, - "learning_rate": 7.025011616218435e-05, - "loss": 0.5803, - "step": 86800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.269907474517822, - "loss_rtd": 0.3336092531681061, - "loss_sent": 0.05773217976093292, - "loss_sod": 0.08352617919445038, - "loss_total": 0.474867582321167, - "step": 86899 - }, - { - "epoch": 0.013798, - "loss_gen": 4.46759557723999, - "loss_rtd": 0.34029409289360046, - "loss_sent": 0.039328545331954956, - "loss_sod": 0.1799912452697754, - "loss_total": 0.5596139430999756, - "step": 86899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.8178805708885193, - "learning_rate": 7.02210978701352e-05, - "loss": 0.5813, - "step": 86900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.13110876083374, - "loss_rtd": 0.31566160917282104, - "loss_sent": 0.17514754831790924, - "loss_sod": 0.16235847771167755, - "loss_total": 0.6531676054000854, - "step": 86999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.873115539550781, - "loss_rtd": 0.3448944687843323, - "loss_sent": 0.0757671445608139, - "loss_sod": 0.046936094760894775, - "loss_total": 0.46759772300720215, - "step": 86999 - }, - { - "epoch": 0.014, - "grad_norm": 1.4258131980895996, - "learning_rate": 7.019207143303028e-05, - "loss": 0.5802, - "step": 87000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5673538446426392, - "eval_runtime": 151.8075, - "eval_samples_per_second": 101.728, - "eval_steps_per_second": 0.797, - "step": 87000 - }, - { - "epoch": 0.014198, - "loss_gen": 4.761818885803223, - "loss_rtd": 0.33635979890823364, - "loss_sent": 0.15500541031360626, - "loss_sod": 0.0708441287279129, - "loss_total": 0.5622093081474304, - "step": 87099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.59244966506958, - "loss_rtd": 0.3566775321960449, - "loss_sent": 0.22278250753879547, - "loss_sod": 0.0419575572013855, - "loss_total": 0.6214175820350647, - "step": 87099 - }, - { - "epoch": 0.0142, - "grad_norm": 2.196838855743408, - "learning_rate": 7.016303686256137e-05, - "loss": 0.572, - "step": 87100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.7601704597473145, - "loss_rtd": 0.33573299646377563, - "loss_sent": 0.4068352282047272, - "loss_sod": 0.005720850545912981, - "loss_total": 0.7482891082763672, - "step": 87199 - }, - { - "epoch": 0.014398, - "loss_gen": 4.789660453796387, - "loss_rtd": 0.3457402288913727, - "loss_sent": 0.5019651055335999, - "loss_sod": 0.07149482518434525, - "loss_total": 0.9192001819610596, - "step": 87199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.686604380607605, - "learning_rate": 7.013399417042363e-05, - "loss": 0.5796, - "step": 87200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.942568302154541, - "loss_rtd": 0.3394739627838135, - "loss_sent": 0.19377031922340393, - "loss_sod": 0.02111152932047844, - "loss_total": 0.554355800151825, - "step": 87299 - }, - { - "epoch": 0.014598, - "loss_gen": 4.664207935333252, - "loss_rtd": 0.3370613753795624, - "loss_sent": 0.17265434563159943, - "loss_sod": 0.10131214559078217, - "loss_total": 0.6110278367996216, - "step": 87299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.783726453781128, - "learning_rate": 7.010494336831544e-05, - "loss": 0.5785, - "step": 87300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.153042793273926, - "loss_rtd": 0.3529767096042633, - "loss_sent": 0.2563766837120056, - "loss_sod": 0.06033950299024582, - "loss_total": 0.669692873954773, - "step": 87399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.191508769989014, - "loss_rtd": 0.3387785255908966, - "loss_sent": 0.10880298167467117, - "loss_sod": 0.09149737656116486, - "loss_total": 0.5390788912773132, - "step": 87399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.9969197511672974, - "learning_rate": 7.007588446793847e-05, - "loss": 0.5849, - "step": 87400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.172388553619385, - "loss_rtd": 0.32984110713005066, - "loss_sent": 0.0001445196830900386, - "loss_sod": 0.20981809496879578, - "loss_total": 0.5398037433624268, - "step": 87499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.134131908416748, - "loss_rtd": 0.3385279178619385, - "loss_sent": 0.1611495465040207, - "loss_sod": 0.0354672446846962, - "loss_total": 0.5351446866989136, - "step": 87499 - }, - { - "epoch": 0.015, - "grad_norm": 0.898914098739624, - "learning_rate": 7.004681748099764e-05, - "loss": 0.6018, - "step": 87500 - }, - { - "epoch": 0.015198, - "loss_gen": 4.956865310668945, - "loss_rtd": 0.3564647436141968, - "loss_sent": 0.22139987349510193, - "loss_sod": 0.1836312711238861, - "loss_total": 0.7614959478378296, - "step": 87599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.106131076812744, - "loss_rtd": 0.3263373076915741, - "loss_sent": 0.28746137022972107, - "loss_sod": 0.08258932828903198, - "loss_total": 0.6963880062103271, - "step": 87599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.0720531940460205, - "learning_rate": 7.001774241920111e-05, - "loss": 0.5916, - "step": 87600 - }, - { - "epoch": 0.015398, - "loss_gen": 4.984757423400879, - "loss_rtd": 0.3438102602958679, - "loss_sent": 0.41753992438316345, - "loss_sod": 0.04417465254664421, - "loss_total": 0.8055248260498047, - "step": 87699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.551210880279541, - "loss_rtd": 0.34808868169784546, - "loss_sent": 0.1848447322845459, - "loss_sod": 0.032358843833208084, - "loss_total": 0.565292239189148, - "step": 87699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.469988465309143, - "learning_rate": 6.998865929426035e-05, - "loss": 0.595, - "step": 87700 - }, - { - "epoch": 0.015598, - "loss_gen": 4.855138778686523, - "loss_rtd": 0.3485548198223114, - "loss_sent": 0.19279338419437408, - "loss_sod": 0.04516763985157013, - "loss_total": 0.5865158438682556, - "step": 87799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.957738399505615, - "loss_rtd": 0.32220762968063354, - "loss_sent": 0.330213725566864, - "loss_sod": 0.129806786775589, - "loss_total": 0.7822281122207642, - "step": 87799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.1649664640426636, - "learning_rate": 6.995956811789e-05, - "loss": 0.5845, - "step": 87800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.357553482055664, - "loss_rtd": 0.3270278871059418, - "loss_sent": 0.23838768899440765, - "loss_sod": 0.06158880144357681, - "loss_total": 0.6270043849945068, - "step": 87899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.058922290802002, - "loss_rtd": 0.34483423829078674, - "loss_sent": 0.3389914631843567, - "loss_sod": 0.08650758862495422, - "loss_total": 0.7703332901000977, - "step": 87899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.0614956617355347, - "learning_rate": 6.993046890180801e-05, - "loss": 0.574, - "step": 87900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.099646091461182, - "loss_rtd": 0.31961262226104736, - "loss_sent": 0.007533014286309481, - "loss_sod": 0.18244077265262604, - "loss_total": 0.5095863938331604, - "step": 87999 - }, - { - "epoch": 0.015998, - "loss_gen": 3.6884658336639404, - "loss_rtd": 0.3106600046157837, - "loss_sent": 0.007008615881204605, - "loss_sod": 0.320667028427124, - "loss_total": 0.638335645198822, - "step": 87999 - }, - { - "epoch": 0.016, - "grad_norm": 1.3791881799697876, - "learning_rate": 6.990136165773552e-05, - "loss": 0.5809, - "step": 88000 - }, - { - "epoch": 0.016, - "eval_loss": 0.5669590830802917, - "eval_runtime": 151.9205, - "eval_samples_per_second": 101.652, - "eval_steps_per_second": 0.796, - "step": 88000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.847960472106934, - "loss_rtd": 0.34454160928726196, - "loss_sent": 0.25795817375183105, - "loss_sod": 0.0255027636885643, - "loss_total": 0.6280025243759155, - "step": 88099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.912221431732178, - "loss_rtd": 0.33135339617729187, - "loss_sent": 0.2321702241897583, - "loss_sod": 0.017423417419195175, - "loss_total": 0.5809470415115356, - "step": 88099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.772662878036499, - "learning_rate": 6.987224639739695e-05, - "loss": 0.5837, - "step": 88100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.866065502166748, - "loss_rtd": 0.345676451921463, - "loss_sent": 0.24594935774803162, - "loss_sod": 0.03352060541510582, - "loss_total": 0.6251463890075684, - "step": 88199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.942430019378662, - "loss_rtd": 0.35307154059410095, - "loss_sent": 0.28599974513053894, - "loss_sod": 0.030116241425275803, - "loss_total": 0.6691875457763672, - "step": 88199 - }, - { - "epoch": 0.0164, - "grad_norm": 2.106566905975342, - "learning_rate": 6.984312313251989e-05, - "loss": 0.5926, - "step": 88200 - }, - { - "epoch": 0.016598, - "loss_gen": 4.405894756317139, - "loss_rtd": 0.33880099654197693, - "loss_sent": 0.07704459875822067, - "loss_sod": 0.011914386413991451, - "loss_total": 0.42775997519493103, - "step": 88299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.814757347106934, - "loss_rtd": 0.3188076317310333, - "loss_sent": 0.41589853167533875, - "loss_sod": 0.036490026861429214, - "loss_total": 0.771196186542511, - "step": 88299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.1776729822158813, - "learning_rate": 6.981399187483523e-05, - "loss": 0.5907, - "step": 88300 - }, - { - "epoch": 0.016798, - "loss_gen": 4.972283363342285, - "loss_rtd": 0.3257956802845001, - "loss_sent": 0.17019076645374298, - "loss_sod": 0.0564018152654171, - "loss_total": 0.5523882508277893, - "step": 88399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.1294097900390625, - "loss_rtd": 0.3375230133533478, - "loss_sent": 0.04303320497274399, - "loss_sod": 0.10859806090593338, - "loss_total": 0.48915427923202515, - "step": 88399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.6791639924049377, - "learning_rate": 6.978485263607698e-05, - "loss": 0.5946, - "step": 88400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.180440902709961, - "loss_rtd": 0.30871906876564026, - "loss_sent": 0.06477366387844086, - "loss_sod": 0.10586203634738922, - "loss_total": 0.4793547987937927, - "step": 88499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.830018997192383, - "loss_rtd": 0.3306615650653839, - "loss_sent": 0.28171202540397644, - "loss_sod": 0.044385459274053574, - "loss_total": 0.6567590236663818, - "step": 88499 - }, - { - "epoch": 0.017, - "grad_norm": 0.8192684650421143, - "learning_rate": 6.975570542798248e-05, - "loss": 0.5938, - "step": 88500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.691791534423828, - "loss_rtd": 0.3458113968372345, - "loss_sent": 0.04866185411810875, - "loss_sod": 0.18742048740386963, - "loss_total": 0.5818936824798584, - "step": 88599 - }, - { - "epoch": 0.017198, - "loss_gen": 3.6342341899871826, - "loss_rtd": 0.31179022789001465, - "loss_sent": 0.016484718769788742, - "loss_sod": 0.2539399266242981, - "loss_total": 0.582214891910553, - "step": 88599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.6321954727172852, - "learning_rate": 6.972655026229218e-05, - "loss": 0.5972, - "step": 88600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.733677864074707, - "loss_rtd": 0.32928189635276794, - "loss_sent": 0.0740603432059288, - "loss_sod": 0.030074436217546463, - "loss_total": 0.4334166944026947, - "step": 88699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.662412643432617, - "loss_rtd": 0.3300178349018097, - "loss_sent": 0.34945255517959595, - "loss_sod": 0.003907858394086361, - "loss_total": 0.6833782196044922, - "step": 88699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.7026873230934143, - "learning_rate": 6.969738715074981e-05, - "loss": 0.573, - "step": 88700 - }, - { - "epoch": 0.017598, - "loss_gen": 4.659727573394775, - "loss_rtd": 0.33939382433891296, - "loss_sent": 0.028759872540831566, - "loss_sod": 0.07622699439525604, - "loss_total": 0.44438067078590393, - "step": 88799 - }, - { - "epoch": 0.017598, - "loss_gen": 3.7558815479278564, - "loss_rtd": 0.3180695176124573, - "loss_sent": 0.0015709931030869484, - "loss_sod": 0.1858685165643692, - "loss_total": 0.5055090188980103, - "step": 88799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.8093413710594177, - "learning_rate": 6.966821610510222e-05, - "loss": 0.5861, - "step": 88800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.879168510437012, - "loss_rtd": 0.32642388343811035, - "loss_sent": 0.32197320461273193, - "loss_sod": 0.02328118309378624, - "loss_total": 0.6716783046722412, - "step": 88899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.66521692276001, - "loss_rtd": 0.33152955770492554, - "loss_sent": 0.1449562907218933, - "loss_sod": 0.16021357476711273, - "loss_total": 0.6366994380950928, - "step": 88899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.8646384477615356, - "learning_rate": 6.963903713709956e-05, - "loss": 0.5902, - "step": 88900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.182841777801514, - "loss_rtd": 0.33539631962776184, - "loss_sent": 0.06493685394525528, - "loss_sod": 0.14024244248867035, - "loss_total": 0.5405756235122681, - "step": 88999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.200113296508789, - "loss_rtd": 0.3302038609981537, - "loss_sent": 0.1657828539609909, - "loss_sod": 0.09019247442483902, - "loss_total": 0.5861791968345642, - "step": 88999 - }, - { - "epoch": 0.018, - "grad_norm": 1.0806804895401, - "learning_rate": 6.960985025849508e-05, - "loss": 0.5881, - "step": 89000 - }, - { - "epoch": 0.018, - "eval_loss": 0.5663845539093018, - "eval_runtime": 151.8203, - "eval_samples_per_second": 101.719, - "eval_steps_per_second": 0.797, - "step": 89000 - }, - { - "epoch": 0.018198, - "loss_gen": 4.941082000732422, - "loss_rtd": 0.3443474769592285, - "loss_sent": 0.23076364398002625, - "loss_sod": 0.10233885794878006, - "loss_total": 0.6774499416351318, - "step": 89099 - }, - { - "epoch": 0.018198, - "loss_gen": 4.799691200256348, - "loss_rtd": 0.3158910572528839, - "loss_sent": 0.34577134251594543, - "loss_sod": 0.02999667264521122, - "loss_total": 0.6916590929031372, - "step": 89099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.5906386375427246, - "learning_rate": 6.958065548104528e-05, - "loss": 0.5866, - "step": 89100 - }, - { - "epoch": 0.018398, - "loss_gen": 4.602643013000488, - "loss_rtd": 0.3486579358577728, - "loss_sent": 0.35448747873306274, - "loss_sod": 0.017640359699726105, - "loss_total": 0.7207857370376587, - "step": 89199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.9737043380737305, - "loss_rtd": 0.3401373028755188, - "loss_sent": 0.10153168439865112, - "loss_sod": 0.016300665214657784, - "loss_total": 0.45796966552734375, - "step": 89199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.8996251225471497, - "learning_rate": 6.955145281650976e-05, - "loss": 0.5858, - "step": 89200 - }, - { - "epoch": 0.018598, - "loss_gen": 3.8062310218811035, - "loss_rtd": 0.2853853404521942, - "loss_sent": 0.0019416897557675838, - "loss_sod": 0.1541668176651001, - "loss_total": 0.44149383902549744, - "step": 89299 - }, - { - "epoch": 0.018598, - "loss_gen": 4.6544365882873535, - "loss_rtd": 0.3491682708263397, - "loss_sent": 0.31565359234809875, - "loss_sod": 0.028360841795802116, - "loss_total": 0.6931827068328857, - "step": 89299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.7657874822616577, - "learning_rate": 6.952224227665142e-05, - "loss": 0.5646, - "step": 89300 - }, - { - "epoch": 0.018798, - "loss_gen": 3.746079444885254, - "loss_rtd": 0.3051496148109436, - "loss_sent": 0.0015413948567584157, - "loss_sod": 0.15952399373054504, - "loss_total": 0.46621501445770264, - "step": 89399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.637059211730957, - "loss_rtd": 0.3363519012928009, - "loss_sent": 0.44921576976776123, - "loss_sod": 0.039606474339962006, - "loss_total": 0.8251741528511047, - "step": 89399 - }, - { - "epoch": 0.0188, - "grad_norm": 2.19759202003479, - "learning_rate": 6.949302387323621e-05, - "loss": 0.5877, - "step": 89400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.969757080078125, - "loss_rtd": 0.3233579099178314, - "loss_sent": 0.3188636898994446, - "loss_sod": 0.04749014973640442, - "loss_total": 0.6897117495536804, - "step": 89499 - }, - { - "epoch": 0.018998, - "loss_gen": 4.802304267883301, - "loss_rtd": 0.33002758026123047, - "loss_sent": 0.27685385942459106, - "loss_sod": 0.07065742462873459, - "loss_total": 0.6775388717651367, - "step": 89499 - }, - { - "epoch": 0.019, - "grad_norm": 0.8650509715080261, - "learning_rate": 6.946379761803332e-05, - "loss": 0.5794, - "step": 89500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.751745700836182, - "loss_rtd": 0.3350619077682495, - "loss_sent": 0.1835276484489441, - "loss_sod": 0.023965610191226006, - "loss_total": 0.5425551533699036, - "step": 89599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.994669437408447, - "loss_rtd": 0.3365551829338074, - "loss_sent": 0.3081744909286499, - "loss_sod": 0.05738406255841255, - "loss_total": 0.7021137475967407, - "step": 89599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.9256625175476074, - "learning_rate": 6.943456352281507e-05, - "loss": 0.5876, - "step": 89600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.88437557220459, - "loss_rtd": 0.3251269459724426, - "loss_sent": 0.32263243198394775, - "loss_sod": 0.06728115677833557, - "loss_total": 0.7150405645370483, - "step": 89699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.768828392028809, - "loss_rtd": 0.33759233355522156, - "loss_sent": 0.1716608703136444, - "loss_sod": 0.0714825764298439, - "loss_total": 0.5807358026504517, - "step": 89699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.9086744785308838, - "learning_rate": 6.940532159935696e-05, - "loss": 0.593, - "step": 89700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.026336193084717, - "loss_rtd": 0.3272406756877899, - "loss_sent": 0.15711930394172668, - "loss_sod": 0.2609151303768158, - "loss_total": 0.7452751398086548, - "step": 89799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.9076714515686035, - "loss_rtd": 0.3377518355846405, - "loss_sent": 0.1297970563173294, - "loss_sod": 0.03276817873120308, - "loss_total": 0.5003170967102051, - "step": 89799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.7625382542610168, - "learning_rate": 6.937607185943762e-05, - "loss": 0.5953, - "step": 89800 - }, - { - "epoch": 0.019798, - "loss_gen": 4.734294414520264, - "loss_rtd": 0.328772634267807, - "loss_sent": 0.15064159035682678, - "loss_sod": 0.08754176646471024, - "loss_total": 0.5669559836387634, - "step": 89899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.313686370849609, - "loss_rtd": 0.3340799808502197, - "loss_sent": 6.285148265305907e-05, - "loss_sod": 0.16030219197273254, - "loss_total": 0.4944450259208679, - "step": 89899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.1803996562957764, - "learning_rate": 6.934681431483886e-05, - "loss": 0.5879, - "step": 89900 - }, - { - "epoch": 0.019998, - "loss_gen": 4.845009803771973, - "loss_rtd": 0.3295662999153137, - "loss_sent": 0.2918822467327118, - "loss_sod": 0.029903696849942207, - "loss_total": 0.6513522863388062, - "step": 89999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.945878505706787, - "loss_rtd": 0.3441464900970459, - "loss_sent": 0.10085861384868622, - "loss_sod": 0.020352281630039215, - "loss_total": 0.46535736322402954, - "step": 89999 - }, - { - "epoch": 0.02, - "grad_norm": 1.1029510498046875, - "learning_rate": 6.931754897734561e-05, - "loss": 0.5913, - "step": 90000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5658617615699768, - "eval_runtime": 153.6901, - "eval_samples_per_second": 100.481, - "eval_steps_per_second": 0.787, - "step": 90000 - }, - { - "epoch": 0.020198, - "loss_gen": 4.51792573928833, - "loss_rtd": 0.32277560234069824, - "loss_sent": 0.005829704459756613, - "loss_sod": 0.2306119203567505, - "loss_total": 0.5592172145843506, - "step": 90099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.009688854217529, - "loss_rtd": 0.319385826587677, - "loss_sent": 0.07977547496557236, - "loss_sod": 0.12750348448753357, - "loss_total": 0.5266647934913635, - "step": 90099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.484255075454712, - "learning_rate": 6.928827585874593e-05, - "loss": 0.5957, - "step": 90100 - }, - { - "epoch": 0.020398, - "loss_gen": 4.403891563415527, - "loss_rtd": 0.37662163376808167, - "loss_sent": 0.07366303354501724, - "loss_sod": 0.029338199645280838, - "loss_total": 0.47962287068367004, - "step": 90199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.337122440338135, - "loss_rtd": 0.33047062158584595, - "loss_sent": 0.15428656339645386, - "loss_sod": 0.06315929442644119, - "loss_total": 0.5479164719581604, - "step": 90199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.3041845560073853, - "learning_rate": 6.925899497083106e-05, - "loss": 0.5873, - "step": 90200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.499560356140137, - "loss_rtd": 0.34531867504119873, - "loss_sent": 0.07629498094320297, - "loss_sod": 0.10883737355470657, - "loss_total": 0.5304509997367859, - "step": 90299 - }, - { - "epoch": 0.020598, - "loss_gen": 4.623281002044678, - "loss_rtd": 0.3306674063205719, - "loss_sent": 0.06131604686379433, - "loss_sod": 0.10808578133583069, - "loss_total": 0.500069260597229, - "step": 90299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.7871525287628174, - "learning_rate": 6.92297063253953e-05, - "loss": 0.5943, - "step": 90300 - }, - { - "epoch": 0.020798, - "loss_gen": 4.951801300048828, - "loss_rtd": 0.3330695927143097, - "loss_sent": 0.3051595389842987, - "loss_sod": 0.048723429441452026, - "loss_total": 0.6869525909423828, - "step": 90399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.108636379241943, - "loss_rtd": 0.33414900302886963, - "loss_sent": 0.24988193809986115, - "loss_sod": 0.04154475778341293, - "loss_total": 0.6255757212638855, - "step": 90399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.8811945915222168, - "learning_rate": 6.920040993423615e-05, - "loss": 0.578, - "step": 90400 - }, - { - "epoch": 0.020998, - "loss_gen": 4.781215190887451, - "loss_rtd": 0.3221619427204132, - "loss_sent": 0.06880512088537216, - "loss_sod": 0.039578575640916824, - "loss_total": 0.4305456578731537, - "step": 90499 - }, - { - "epoch": 0.020998, - "loss_gen": 4.846989154815674, - "loss_rtd": 0.34084388613700867, - "loss_sent": 0.2732623219490051, - "loss_sod": 0.0029289848171174526, - "loss_total": 0.6170351505279541, - "step": 90499 - }, - { - "epoch": 0.021, - "grad_norm": 1.9614698886871338, - "learning_rate": 6.917110580915416e-05, - "loss": 0.5948, - "step": 90500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.733129501342773, - "loss_rtd": 0.34376993775367737, - "loss_sent": 0.2138681858778, - "loss_sod": 0.09283463656902313, - "loss_total": 0.6504727602005005, - "step": 90599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.80059289932251, - "loss_rtd": 0.3121301829814911, - "loss_sent": 0.41170620918273926, - "loss_sod": 0.03384142369031906, - "loss_total": 0.7576777935028076, - "step": 90599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.6248618364334106, - "learning_rate": 6.914179396195306e-05, - "loss": 0.5848, - "step": 90600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.078394889831543, - "loss_rtd": 0.32173919677734375, - "loss_sent": 0.07046397030353546, - "loss_sod": 0.04545767232775688, - "loss_total": 0.437660813331604, - "step": 90699 - }, - { - "epoch": 0.021398, - "loss_gen": 4.4654693603515625, - "loss_rtd": 0.32393723726272583, - "loss_sent": 0.3042590618133545, - "loss_sod": 0.010264010168612003, - "loss_total": 0.6384602785110474, - "step": 90699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.042040467262268, - "learning_rate": 6.911247440443963e-05, - "loss": 0.5638, - "step": 90700 - }, - { - "epoch": 0.021598, - "loss_gen": 4.693902015686035, - "loss_rtd": 0.3379054665565491, - "loss_sent": 0.1831001490354538, - "loss_sod": 0.06713329255580902, - "loss_total": 0.5881389379501343, - "step": 90799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.526010036468506, - "loss_rtd": 0.33100053668022156, - "loss_sent": 0.2598324120044708, - "loss_sod": 0.0941501259803772, - "loss_total": 0.6849830746650696, - "step": 90799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.4666478633880615, - "learning_rate": 6.908314714842379e-05, - "loss": 0.5887, - "step": 90800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.80063533782959, - "loss_rtd": 0.3257311284542084, - "loss_sent": 0.3028273284435272, - "loss_sod": 0.054644741117954254, - "loss_total": 0.6832032203674316, - "step": 90899 - }, - { - "epoch": 0.021798, - "loss_gen": 4.882693290710449, - "loss_rtd": 0.3200565576553345, - "loss_sent": 0.1715368628501892, - "loss_sod": 0.031077342107892036, - "loss_total": 0.5226707458496094, - "step": 90899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.7058055400848389, - "learning_rate": 6.905381220571857e-05, - "loss": 0.5768, - "step": 90900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.241171836853027, - "loss_rtd": 0.3140244781970978, - "loss_sent": 0.08510010689496994, - "loss_sod": 0.07062557339668274, - "loss_total": 0.46975016593933105, - "step": 90999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.380671977996826, - "loss_rtd": 0.31599661707878113, - "loss_sent": 0.0004729445499833673, - "loss_sod": 0.22579069435596466, - "loss_total": 0.5422602891921997, - "step": 90999 - }, - { - "epoch": 0.022, - "grad_norm": 0.8553369045257568, - "learning_rate": 6.902446958814006e-05, - "loss": 0.6037, - "step": 91000 - }, - { - "epoch": 0.022, - "eval_loss": 0.561314582824707, - "eval_runtime": 151.7474, - "eval_samples_per_second": 101.768, - "eval_steps_per_second": 0.797, - "step": 91000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.4136176109313965, - "loss_rtd": 0.33867254853248596, - "loss_sent": 0.20216551423072815, - "loss_sod": 0.01734425127506256, - "loss_total": 0.5581823587417603, - "step": 91099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.756989479064941, - "loss_rtd": 0.3549559414386749, - "loss_sent": 0.09194394946098328, - "loss_sod": 0.02332470379769802, - "loss_total": 0.4702245891094208, - "step": 91099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.015998363494873, - "learning_rate": 6.899511930750749e-05, - "loss": 0.5732, - "step": 91100 - }, - { - "epoch": 0.022398, - "loss_gen": 4.628757953643799, - "loss_rtd": 0.3276508152484894, - "loss_sent": 0.23581714928150177, - "loss_sod": 0.10900195688009262, - "loss_total": 0.6724699139595032, - "step": 91199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.0903167724609375, - "loss_rtd": 0.3232450485229492, - "loss_sent": 0.08014486730098724, - "loss_sod": 0.04226721078157425, - "loss_total": 0.4456571340560913, - "step": 91199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.8765266537666321, - "learning_rate": 6.896576137564313e-05, - "loss": 0.5939, - "step": 91200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.923919677734375, - "loss_rtd": 0.32763293385505676, - "loss_sent": 0.3958737254142761, - "loss_sod": 0.021579347550868988, - "loss_total": 0.7450860142707825, - "step": 91299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.680777072906494, - "loss_rtd": 0.3213493824005127, - "loss_sent": 0.04074085131287575, - "loss_sod": 0.01941441185772419, - "loss_total": 0.3815046548843384, - "step": 91299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.4765080213546753, - "learning_rate": 6.893639580437236e-05, - "loss": 0.5847, - "step": 91300 - }, - { - "epoch": 0.022798, - "loss_gen": 3.7447922229766846, - "loss_rtd": 0.3144702911376953, - "loss_sent": 6.558249879162759e-05, - "loss_sod": 0.19657494127750397, - "loss_total": 0.511110782623291, - "step": 91399 - }, - { - "epoch": 0.022798, - "loss_gen": 4.804544448852539, - "loss_rtd": 0.32792600989341736, - "loss_sent": 0.106887586414814, - "loss_sod": 0.05512861907482147, - "loss_total": 0.48994219303131104, - "step": 91399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.1063635349273682, - "learning_rate": 6.890702260552361e-05, - "loss": 0.5839, - "step": 91400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.9147491455078125, - "loss_rtd": 0.3242236375808716, - "loss_sent": 0.31382521986961365, - "loss_sod": 0.07273143529891968, - "loss_total": 0.7107802629470825, - "step": 91499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.193991661071777, - "loss_rtd": 0.3225175440311432, - "loss_sent": 0.16223827004432678, - "loss_sod": 0.12314175814390182, - "loss_total": 0.6078975796699524, - "step": 91499 - }, - { - "epoch": 0.023, - "grad_norm": 1.018505573272705, - "learning_rate": 6.887764179092842e-05, - "loss": 0.581, - "step": 91500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.449570655822754, - "loss_rtd": 0.33190277218818665, - "loss_sent": 0.12670950591564178, - "loss_sod": 0.11134722083806992, - "loss_total": 0.5699595212936401, - "step": 91599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.419771671295166, - "loss_rtd": 0.3302202820777893, - "loss_sent": 0.07329118251800537, - "loss_sod": 0.10373926162719727, - "loss_total": 0.5072507262229919, - "step": 91599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.2618904113769531, - "learning_rate": 6.884825337242138e-05, - "loss": 0.5836, - "step": 91600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.094449996948242, - "loss_rtd": 0.3166581988334656, - "loss_sent": 0.10368459671735764, - "loss_sod": 0.043163277208805084, - "loss_total": 0.4635060727596283, - "step": 91699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.795473575592041, - "loss_rtd": 0.3415147066116333, - "loss_sent": 0.35659798979759216, - "loss_sod": 0.04028211534023285, - "loss_total": 0.7383948564529419, - "step": 91699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.4385802745819092, - "learning_rate": 6.881885736184014e-05, - "loss": 0.5678, - "step": 91700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.745820045471191, - "loss_rtd": 0.32357558608055115, - "loss_sent": 0.22468724846839905, - "loss_sod": 0.01799878105521202, - "loss_total": 0.5662616491317749, - "step": 91799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.822248935699463, - "loss_rtd": 0.34313949942588806, - "loss_sent": 0.21366900205612183, - "loss_sod": 0.049441125243902206, - "loss_total": 0.6062496304512024, - "step": 91799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.3255048990249634, - "learning_rate": 6.878945377102539e-05, - "loss": 0.5865, - "step": 91800 - }, - { - "epoch": 0.023798, - "loss_gen": 4.711320877075195, - "loss_rtd": 0.3355475664138794, - "loss_sent": 0.2739960253238678, - "loss_sod": 0.0651506781578064, - "loss_total": 0.674694299697876, - "step": 91899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.000282287597656, - "loss_rtd": 0.32347211241722107, - "loss_sent": 0.10537165403366089, - "loss_sod": 0.06985322386026382, - "loss_total": 0.4986969828605652, - "step": 91899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.4182720184326172, - "learning_rate": 6.876004261182092e-05, - "loss": 0.5817, - "step": 91900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.340452671051025, - "loss_rtd": 0.3391110897064209, - "loss_sent": 0.15645575523376465, - "loss_sod": 0.047599561512470245, - "loss_total": 0.5431663990020752, - "step": 91999 - }, - { - "epoch": 0.023998, - "loss_gen": 3.864002227783203, - "loss_rtd": 0.3019247055053711, - "loss_sent": 0.0006159417098388076, - "loss_sod": 0.18779833614826202, - "loss_total": 0.4903389811515808, - "step": 91999 - }, - { - "epoch": 0.024, - "grad_norm": 1.1301180124282837, - "learning_rate": 6.873062389607352e-05, - "loss": 0.5876, - "step": 92000 - }, - { - "epoch": 0.024, - "eval_loss": 0.5641158223152161, - "eval_runtime": 151.8861, - "eval_samples_per_second": 101.675, - "eval_steps_per_second": 0.797, - "step": 92000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.568058490753174, - "loss_rtd": 0.32689598202705383, - "loss_sent": 0.1527620255947113, - "loss_sod": 0.07747532427310944, - "loss_total": 0.5571333169937134, - "step": 92099 - }, - { - "epoch": 0.024198, - "loss_gen": 4.698720932006836, - "loss_rtd": 0.3107092082500458, - "loss_sent": 0.2789194583892822, - "loss_sod": 0.005637895781546831, - "loss_total": 0.595266580581665, - "step": 92099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.138969898223877, - "learning_rate": 6.870119763563307e-05, - "loss": 0.574, - "step": 92100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.00078821182251, - "loss_rtd": 0.316707968711853, - "loss_sent": 0.4610620439052582, - "loss_sod": 0.12511105835437775, - "loss_total": 0.9028810262680054, - "step": 92199 - }, - { - "epoch": 0.024398, - "loss_gen": 4.84621000289917, - "loss_rtd": 0.317338764667511, - "loss_sent": 0.22897499799728394, - "loss_sod": 0.01574169285595417, - "loss_total": 0.5620554685592651, - "step": 92199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.2326622009277344, - "learning_rate": 6.867176384235243e-05, - "loss": 0.584, - "step": 92200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.925948143005371, - "loss_rtd": 0.33173057436943054, - "loss_sent": 0.0742463544011116, - "loss_sod": 0.15905244648456573, - "loss_total": 0.5650293827056885, - "step": 92299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.965786457061768, - "loss_rtd": 0.32640862464904785, - "loss_sent": 0.18212881684303284, - "loss_sod": 0.09404770284891129, - "loss_total": 0.6025851368904114, - "step": 92299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.2922873497009277, - "learning_rate": 6.864232252808757e-05, - "loss": 0.5905, - "step": 92300 - }, - { - "epoch": 0.024798, - "loss_gen": 4.907704830169678, - "loss_rtd": 0.33255714178085327, - "loss_sent": 0.18304024636745453, - "loss_sod": 0.059389252215623856, - "loss_total": 0.5749866366386414, - "step": 92399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.2335615158081055, - "loss_rtd": 0.33417046070098877, - "loss_sent": 0.07495804131031036, - "loss_sod": 0.1096133142709732, - "loss_total": 0.51874178647995, - "step": 92399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.9164637327194214, - "learning_rate": 6.86128737046974e-05, - "loss": 0.5908, - "step": 92400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.048556327819824, - "loss_rtd": 0.3085005283355713, - "loss_sent": 0.03911376744508743, - "loss_sod": 0.13077664375305176, - "loss_total": 0.4783909320831299, - "step": 92499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.776292324066162, - "loss_rtd": 0.3288393020629883, - "loss_sent": 0.166950985789299, - "loss_sod": 0.29131215810775757, - "loss_total": 0.787102460861206, - "step": 92499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8874497413635254, - "learning_rate": 6.858341738404396e-05, - "loss": 0.5974, - "step": 92500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.173285484313965, - "loss_rtd": 0.3269065022468567, - "loss_sent": 0.00547097297385335, - "loss_sod": 0.14457273483276367, - "loss_total": 0.47695019841194153, - "step": 92599 - }, - { - "epoch": 0.025198, - "loss_gen": 4.434605121612549, - "loss_rtd": 0.3425711989402771, - "loss_sent": 5.5371558119077235e-05, - "loss_sod": 0.2001873254776001, - "loss_total": 0.5428138971328735, - "step": 92599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.0194905996322632, - "learning_rate": 6.85539535779922e-05, - "loss": 0.5897, - "step": 92600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.890368938446045, - "loss_rtd": 0.3072635233402252, - "loss_sent": 0.2147490233182907, - "loss_sod": 0.1317869871854782, - "loss_total": 0.6537995338439941, - "step": 92699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.97855281829834, - "loss_rtd": 0.33050867915153503, - "loss_sent": 0.19271942973136902, - "loss_sod": 0.018159018829464912, - "loss_total": 0.541387140750885, - "step": 92699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8697763681411743, - "learning_rate": 6.852448229841015e-05, - "loss": 0.5787, - "step": 92700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.931724548339844, - "loss_rtd": 0.3266960084438324, - "loss_sent": 0.11171326041221619, - "loss_sod": 0.0046081882901489735, - "loss_total": 0.44301745295524597, - "step": 92799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.95509672164917, - "loss_rtd": 0.3352271318435669, - "loss_sent": 0.2149454653263092, - "loss_sod": 0.028835207223892212, - "loss_total": 0.5790078043937683, - "step": 92799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.7829344868659973, - "learning_rate": 6.849500355716886e-05, - "loss": 0.5752, - "step": 92800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.798049449920654, - "loss_rtd": 0.3433483839035034, - "loss_sent": 0.17305660247802734, - "loss_sod": 0.03813375532627106, - "loss_total": 0.5545387268066406, - "step": 92899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.248742580413818, - "loss_rtd": 0.32993873953819275, - "loss_sent": 0.22511538863182068, - "loss_sod": 0.05407518893480301, - "loss_total": 0.6091293096542358, - "step": 92899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.5971472263336182, - "learning_rate": 6.846551736614233e-05, - "loss": 0.5967, - "step": 92900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.976089954376221, - "loss_rtd": 0.31229016184806824, - "loss_sent": 0.14180786907672882, - "loss_sod": 0.04470566660165787, - "loss_total": 0.49880367517471313, - "step": 92999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.7940287590026855, - "loss_rtd": 0.3305432200431824, - "loss_sent": 0.29722675681114197, - "loss_sod": 0.03651285916566849, - "loss_total": 0.6642827987670898, - "step": 92999 - }, - { - "epoch": 0.026, - "grad_norm": 0.9626067280769348, - "learning_rate": 6.843602373720763e-05, - "loss": 0.5781, - "step": 93000 - }, - { - "epoch": 0.026, - "eval_loss": 0.5562506914138794, - "eval_runtime": 152.1715, - "eval_samples_per_second": 101.484, - "eval_steps_per_second": 0.795, - "step": 93000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.889732360839844, - "loss_rtd": 0.33864760398864746, - "loss_sent": 0.15712913870811462, - "loss_sod": 0.0421319380402565, - "loss_total": 0.537908673286438, - "step": 93099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.961540699005127, - "loss_rtd": 0.3180936872959137, - "loss_sent": 0.016319338232278824, - "loss_sod": 0.058194972574710846, - "loss_total": 0.39260798692703247, - "step": 93099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9290160536766052, - "learning_rate": 6.840652268224478e-05, - "loss": 0.5976, - "step": 93100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.791418075561523, - "loss_rtd": 0.3520411550998688, - "loss_sent": 0.11683662235736847, - "loss_sod": 0.008030342869460583, - "loss_total": 0.4769081473350525, - "step": 93199 - }, - { - "epoch": 0.026398, - "loss_gen": 4.8290486335754395, - "loss_rtd": 0.3225650191307068, - "loss_sent": 0.056988898664712906, - "loss_sod": 0.03313485532999039, - "loss_total": 0.4126887619495392, - "step": 93199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.7951269745826721, - "learning_rate": 6.837701421313677e-05, - "loss": 0.5914, - "step": 93200 - }, - { - "epoch": 0.026598, - "loss_gen": 4.922800540924072, - "loss_rtd": 0.32993313670158386, - "loss_sent": 0.05176448076963425, - "loss_sod": 0.03638416528701782, - "loss_total": 0.41808179020881653, - "step": 93299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.823805809020996, - "loss_rtd": 0.33049729466438293, - "loss_sent": 0.33578017354011536, - "loss_sod": 0.02971348538994789, - "loss_total": 0.6959909200668335, - "step": 93299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.7036159634590149, - "learning_rate": 6.834749834176965e-05, - "loss": 0.5733, - "step": 93300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.183927536010742, - "loss_rtd": 0.3362314701080322, - "loss_sent": 0.11524573713541031, - "loss_sod": 0.04844234138727188, - "loss_total": 0.4999195337295532, - "step": 93399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.61659049987793, - "loss_rtd": 0.3277224004268646, - "loss_sent": 0.20252130925655365, - "loss_sod": 0.02409757860004902, - "loss_total": 0.5543413162231445, - "step": 93399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.9997643232345581, - "learning_rate": 6.831797508003239e-05, - "loss": 0.5982, - "step": 93400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.1337199211120605, - "loss_rtd": 0.3236249089241028, - "loss_sent": 0.3480924069881439, - "loss_sod": 0.04242559149861336, - "loss_total": 0.714142918586731, - "step": 93499 - }, - { - "epoch": 0.026998, - "loss_gen": 4.656829357147217, - "loss_rtd": 0.31943279504776, - "loss_sent": 0.3870808482170105, - "loss_sod": 0.02070135436952114, - "loss_total": 0.7272149920463562, - "step": 93499 - }, - { - "epoch": 0.027, - "grad_norm": 2.493122100830078, - "learning_rate": 6.828844443981696e-05, - "loss": 0.5771, - "step": 93500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.040384292602539, - "loss_rtd": 0.3554927408695221, - "loss_sent": 0.40386709570884705, - "loss_sod": 0.03719080239534378, - "loss_total": 0.7965506315231323, - "step": 93599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.405703544616699, - "loss_rtd": 0.32774341106414795, - "loss_sent": 0.08388072997331619, - "loss_sod": 0.1241694763302803, - "loss_total": 0.5357936024665833, - "step": 93599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.1916543245315552, - "learning_rate": 6.82589064330183e-05, - "loss": 0.5765, - "step": 93600 - }, - { - "epoch": 0.027398, - "loss_gen": 4.902307510375977, - "loss_rtd": 0.3315292298793793, - "loss_sent": 0.08468205481767654, - "loss_sod": 0.08713705837726593, - "loss_total": 0.5033483505249023, - "step": 93699 - }, - { - "epoch": 0.027398, - "loss_gen": 3.8888163566589355, - "loss_rtd": 0.3110394775867462, - "loss_sent": 0.0002666794753167778, - "loss_sod": 0.07057055830955505, - "loss_total": 0.38187670707702637, - "step": 93699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.7991061806678772, - "learning_rate": 6.82293610715343e-05, - "loss": 0.5754, - "step": 93700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.270509719848633, - "loss_rtd": 0.330045610666275, - "loss_sent": 0.04376628249883652, - "loss_sod": 0.04363022372126579, - "loss_total": 0.41744211316108704, - "step": 93799 - }, - { - "epoch": 0.027598, - "loss_gen": 3.6017343997955322, - "loss_rtd": 0.28588688373565674, - "loss_sent": 0.017953140661120415, - "loss_sod": 0.03629875183105469, - "loss_total": 0.3401387631893158, - "step": 93799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.5313162207603455, - "learning_rate": 6.819980836726585e-05, - "loss": 0.5578, - "step": 93800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.4805474281311035, - "loss_rtd": 0.3350062370300293, - "loss_sent": 0.15716692805290222, - "loss_sod": 0.07830715924501419, - "loss_total": 0.5704803466796875, - "step": 93899 - }, - { - "epoch": 0.027798, - "loss_gen": 4.663735389709473, - "loss_rtd": 0.3246467113494873, - "loss_sent": 0.2916516065597534, - "loss_sod": 0.02006671018898487, - "loss_total": 0.6363650560379028, - "step": 93899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.9600923657417297, - "learning_rate": 6.817024833211674e-05, - "loss": 0.5761, - "step": 93900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.982539653778076, - "loss_rtd": 0.3416353464126587, - "loss_sent": 0.12349400669336319, - "loss_sod": 0.08955147862434387, - "loss_total": 0.5546808242797852, - "step": 93999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.441377639770508, - "loss_rtd": 0.31564581394195557, - "loss_sent": 0.39718303084373474, - "loss_sod": 0.10988418757915497, - "loss_total": 0.8227130174636841, - "step": 93999 - }, - { - "epoch": 0.028, - "grad_norm": 1.1688591241836548, - "learning_rate": 6.814068097799381e-05, - "loss": 0.577, - "step": 94000 - }, - { - "epoch": 0.028, - "eval_loss": 0.5589998960494995, - "eval_runtime": 152.0681, - "eval_samples_per_second": 101.553, - "eval_steps_per_second": 0.796, - "step": 94000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.901253700256348, - "loss_rtd": 0.32753586769104004, - "loss_sent": 0.11455921083688736, - "loss_sod": 0.05945047736167908, - "loss_total": 0.5015455484390259, - "step": 94099 - }, - { - "epoch": 0.028198, - "loss_gen": 4.768004417419434, - "loss_rtd": 0.32225775718688965, - "loss_sent": 0.08686144649982452, - "loss_sod": 0.044164177030324936, - "loss_total": 0.4532833695411682, - "step": 94099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.5498080253601074, - "learning_rate": 6.811110631680674e-05, - "loss": 0.5768, - "step": 94100 - }, - { - "epoch": 0.028398, - "loss_gen": 3.9671852588653564, - "loss_rtd": 0.30639663338661194, - "loss_sent": 0.10128289461135864, - "loss_sod": 0.17533493041992188, - "loss_total": 0.5830144882202148, - "step": 94199 - }, - { - "epoch": 0.028398, - "loss_gen": 4.78854513168335, - "loss_rtd": 0.3263081908226013, - "loss_sent": 0.10896441340446472, - "loss_sod": 0.0634838342666626, - "loss_total": 0.49875643849372864, - "step": 94199 - }, - { - "epoch": 0.0284, - "grad_norm": 0.7613881230354309, - "learning_rate": 6.808152436046821e-05, - "loss": 0.5578, - "step": 94200 - }, - { - "epoch": 0.028598, - "loss_gen": 4.006463527679443, - "loss_rtd": 0.32778647541999817, - "loss_sent": 0.0010425588116049767, - "loss_sod": 0.17045274376869202, - "loss_total": 0.49928176403045654, - "step": 94299 - }, - { - "epoch": 0.028598, - "loss_gen": 3.7664616107940674, - "loss_rtd": 0.30638378858566284, - "loss_sent": 0.010290347971022129, - "loss_sod": 0.11973874270915985, - "loss_total": 0.43641290068626404, - "step": 94299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.8034588694572449, - "learning_rate": 6.805193512089384e-05, - "loss": 0.5811, - "step": 94300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.312290191650391, - "loss_rtd": 0.33947744965553284, - "loss_sent": 0.11955995857715607, - "loss_sod": 0.04985839128494263, - "loss_total": 0.5088958144187927, - "step": 94399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.070069789886475, - "loss_rtd": 0.3127090632915497, - "loss_sent": 0.16472576558589935, - "loss_sod": 0.13579319417476654, - "loss_total": 0.6132280230522156, - "step": 94399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.6400247812271118, - "learning_rate": 6.802233861000213e-05, - "loss": 0.5839, - "step": 94400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.70365047454834, - "loss_rtd": 0.3269539177417755, - "loss_sent": 0.025795940309762955, - "loss_sod": 0.16885893046855927, - "loss_total": 0.5216087698936462, - "step": 94499 - }, - { - "epoch": 0.028998, - "loss_gen": 4.463148593902588, - "loss_rtd": 0.32368203997612, - "loss_sent": 3.795439261011779e-05, - "loss_sod": 0.2021465003490448, - "loss_total": 0.5258665084838867, - "step": 94499 - }, - { - "epoch": 0.029, - "grad_norm": 0.8533647656440735, - "learning_rate": 6.799273483971461e-05, - "loss": 0.566, - "step": 94500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.47703742980957, - "loss_rtd": 0.3162345290184021, - "loss_sent": 0.1702050119638443, - "loss_sod": 0.08295343816280365, - "loss_total": 0.56939297914505, - "step": 94599 - }, - { - "epoch": 0.029198, - "loss_gen": 3.957000494003296, - "loss_rtd": 0.300820529460907, - "loss_sent": 5.911453263252042e-05, - "loss_sod": 0.2539139986038208, - "loss_total": 0.554793655872345, - "step": 94599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.1866121292114258, - "learning_rate": 6.796312382195565e-05, - "loss": 0.5868, - "step": 94600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.326991081237793, - "loss_rtd": 0.3325690031051636, - "loss_sent": 0.11136277765035629, - "loss_sod": 0.12073438614606857, - "loss_total": 0.5646661520004272, - "step": 94699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.1335649490356445, - "loss_rtd": 0.33579036593437195, - "loss_sent": 0.2504735291004181, - "loss_sod": 0.09647265076637268, - "loss_total": 0.6827365159988403, - "step": 94699 - }, - { - "epoch": 0.0294, - "grad_norm": 2.0132827758789062, - "learning_rate": 6.793350556865255e-05, - "loss": 0.5692, - "step": 94700 - }, - { - "epoch": 0.029598, - "loss_gen": 4.99793815612793, - "loss_rtd": 0.32112497091293335, - "loss_sent": 0.761057436466217, - "loss_sod": 0.13677382469177246, - "loss_total": 1.2189562320709229, - "step": 94799 - }, - { - "epoch": 0.029598, - "loss_gen": 4.6057353019714355, - "loss_rtd": 0.33685338497161865, - "loss_sent": 0.25173771381378174, - "loss_sod": 0.010746541433036327, - "loss_total": 0.599337637424469, - "step": 94799 - }, - { - "epoch": 0.0296, - "grad_norm": 3.4236013889312744, - "learning_rate": 6.790388009173556e-05, - "loss": 0.5943, - "step": 94800 - }, - { - "epoch": 0.029798, - "loss_gen": 4.816145420074463, - "loss_rtd": 0.30661526322364807, - "loss_sent": 0.46385112404823303, - "loss_sod": 0.06537486612796783, - "loss_total": 0.8358412981033325, - "step": 94899 - }, - { - "epoch": 0.029798, - "loss_gen": 4.936795711517334, - "loss_rtd": 0.3264653980731964, - "loss_sent": 0.5147636532783508, - "loss_sod": 0.01598704420030117, - "loss_total": 0.8572161197662354, - "step": 94899 - }, - { - "epoch": 0.0298, - "grad_norm": 3.2405478954315186, - "learning_rate": 6.787424740313782e-05, - "loss": 0.5974, - "step": 94900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.354405403137207, - "loss_rtd": 0.3224092125892639, - "loss_sent": 0.2332027405500412, - "loss_sod": 0.05973425507545471, - "loss_total": 0.6153461933135986, - "step": 94999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.362025737762451, - "loss_rtd": 0.32954007387161255, - "loss_sent": 0.14254899322986603, - "loss_sod": 0.0746975690126419, - "loss_total": 0.5467866063117981, - "step": 94999 - }, - { - "epoch": 0.03, - "grad_norm": 0.8183056712150574, - "learning_rate": 6.784460751479533e-05, - "loss": 0.5894, - "step": 95000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5654690861701965, - "eval_runtime": 151.7443, - "eval_samples_per_second": 101.77, - "eval_steps_per_second": 0.797, - "step": 95000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.05227518081665, - "loss_rtd": 0.3259921371936798, - "loss_sent": 0.1098412349820137, - "loss_sod": 0.07160069048404694, - "loss_total": 0.507434070110321, - "step": 95099 - }, - { - "epoch": 0.030198, - "loss_gen": 4.776727676391602, - "loss_rtd": 0.32909905910491943, - "loss_sent": 0.3634227216243744, - "loss_sod": 0.1241193413734436, - "loss_total": 0.816641092300415, - "step": 95099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.8225250244140625, - "learning_rate": 6.78149604386471e-05, - "loss": 0.5771, - "step": 95100 - }, - { - "epoch": 0.030398, - "loss_gen": 3.834552764892578, - "loss_rtd": 0.2728038728237152, - "loss_sent": 0.07849743962287903, - "loss_sod": 0.0710219070315361, - "loss_total": 0.42232322692871094, - "step": 95199 - }, - { - "epoch": 0.030398, - "loss_gen": 4.6927571296691895, - "loss_rtd": 0.32974088191986084, - "loss_sent": 0.11285782605409622, - "loss_sod": 0.008276103995740414, - "loss_total": 0.45087480545043945, - "step": 95199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.921575665473938, - "learning_rate": 6.778530618663493e-05, - "loss": 0.582, - "step": 95200 - }, - { - "epoch": 0.030598, - "loss_gen": 4.853861331939697, - "loss_rtd": 0.3156324326992035, - "loss_sent": 0.05224445462226868, - "loss_sod": 0.045983877032995224, - "loss_total": 0.4138607680797577, - "step": 95299 - }, - { - "epoch": 0.030598, - "loss_gen": 3.976949453353882, - "loss_rtd": 0.31451788544654846, - "loss_sent": 5.422512549557723e-05, - "loss_sod": 0.11671288311481476, - "loss_total": 0.43128499388694763, - "step": 95299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.5733093619346619, - "learning_rate": 6.775564477070353e-05, - "loss": 0.5812, - "step": 95300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.2976155281066895, - "loss_rtd": 0.32160815596580505, - "loss_sent": 0.1605655997991562, - "loss_sod": 0.16856563091278076, - "loss_total": 0.6507393717765808, - "step": 95399 - }, - { - "epoch": 0.030798, - "loss_gen": 4.614623069763184, - "loss_rtd": 0.32714834809303284, - "loss_sent": 0.2806328535079956, - "loss_sod": 0.11139454692602158, - "loss_total": 0.7191757559776306, - "step": 95399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.9181222319602966, - "learning_rate": 6.772597620280057e-05, - "loss": 0.5765, - "step": 95400 - }, - { - "epoch": 0.030998, - "loss_gen": 3.8405728340148926, - "loss_rtd": 0.32943475246429443, - "loss_sent": 3.500963794067502e-05, - "loss_sod": 0.34127986431121826, - "loss_total": 0.6707496047019958, - "step": 95499 - }, - { - "epoch": 0.030998, - "loss_gen": 4.477936744689941, - "loss_rtd": 0.30729934573173523, - "loss_sent": 0.054840900003910065, - "loss_sod": 0.1278393268585205, - "loss_total": 0.4899795651435852, - "step": 95499 - }, - { - "epoch": 0.031, - "grad_norm": 0.944657027721405, - "learning_rate": 6.769630049487651e-05, - "loss": 0.5819, - "step": 95500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.953878402709961, - "loss_rtd": 0.333034485578537, - "loss_sent": 0.09908009320497513, - "loss_sod": 0.09774559736251831, - "loss_total": 0.5298601984977722, - "step": 95599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.242481708526611, - "loss_rtd": 0.3321620225906372, - "loss_sent": 0.15503835678100586, - "loss_sod": 0.08591844141483307, - "loss_total": 0.5731188058853149, - "step": 95599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.3390533924102783, - "learning_rate": 6.766661765888472e-05, - "loss": 0.5943, - "step": 95600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.760883331298828, - "loss_rtd": 0.31884732842445374, - "loss_sent": 0.25945332646369934, - "loss_sod": 0.042491644620895386, - "loss_total": 0.6207922697067261, - "step": 95699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.66824197769165, - "loss_rtd": 0.33300110697746277, - "loss_sent": 0.3370925784111023, - "loss_sod": 0.0452861562371254, - "loss_total": 0.7153798341751099, - "step": 95699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.4447449445724487, - "learning_rate": 6.763692770678145e-05, - "loss": 0.5816, - "step": 95700 - }, - { - "epoch": 0.031598, - "loss_gen": 4.620529651641846, - "loss_rtd": 0.3089185059070587, - "loss_sent": 0.08042269200086594, - "loss_sod": 0.07788525521755219, - "loss_total": 0.46722644567489624, - "step": 95799 - }, - { - "epoch": 0.031598, - "loss_gen": 4.976378440856934, - "loss_rtd": 0.30276939272880554, - "loss_sent": 0.24751774966716766, - "loss_sod": 0.10714822262525558, - "loss_total": 0.6574353575706482, - "step": 95799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.5255494117736816, - "learning_rate": 6.76072306505258e-05, - "loss": 0.5746, - "step": 95800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.37379264831543, - "loss_rtd": 0.3348751366138458, - "loss_sent": 0.06511642038822174, - "loss_sod": 0.09197663515806198, - "loss_total": 0.49196821451187134, - "step": 95899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.007133960723877, - "loss_rtd": 0.3252499997615814, - "loss_sent": 0.28710106015205383, - "loss_sod": 0.04140780121088028, - "loss_total": 0.6537588834762573, - "step": 95899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.8288471698760986, - "learning_rate": 6.757752650207976e-05, - "loss": 0.5714, - "step": 95900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.212557792663574, - "loss_rtd": 0.314740926027298, - "loss_sent": 0.3847140669822693, - "loss_sod": 0.07991741597652435, - "loss_total": 0.7793723940849304, - "step": 95999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.0146050453186035, - "loss_rtd": 0.33511367440223694, - "loss_sent": 0.17582233250141144, - "loss_sod": 0.17594823241233826, - "loss_total": 0.6868842244148254, - "step": 95999 - }, - { - "epoch": 0.032, - "grad_norm": 0.8252917528152466, - "learning_rate": 6.754781527340815e-05, - "loss": 0.5914, - "step": 96000 - }, - { - "epoch": 0.032, - "eval_loss": 0.5543739199638367, - "eval_runtime": 152.0748, - "eval_samples_per_second": 101.549, - "eval_steps_per_second": 0.796, - "step": 96000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.152584075927734, - "loss_rtd": 0.3240143954753876, - "loss_sent": 0.12969470024108887, - "loss_sod": 0.28179270029067993, - "loss_total": 0.735501766204834, - "step": 96099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.5161638259887695, - "loss_rtd": 0.33060047030448914, - "loss_sent": 0.17957797646522522, - "loss_sod": 0.021789319813251495, - "loss_total": 0.5319677591323853, - "step": 96099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.2911592721939087, - "learning_rate": 6.751809697647865e-05, - "loss": 0.5768, - "step": 96100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.449524402618408, - "loss_rtd": 0.3334892690181732, - "loss_sent": 0.09894298762083054, - "loss_sod": 0.12040586769580841, - "loss_total": 0.552838146686554, - "step": 96199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.929290294647217, - "loss_rtd": 0.33274203538894653, - "loss_sent": 0.06985893845558167, - "loss_sod": 0.01726730354130268, - "loss_total": 0.4198682904243469, - "step": 96199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.8315396308898926, - "learning_rate": 6.748837162326176e-05, - "loss": 0.5828, - "step": 96200 - }, - { - "epoch": 0.000598, - "loss_gen": 4.316035747528076, - "loss_rtd": 0.3169655203819275, - "loss_sent": 0.044400569051504135, - "loss_sod": 0.0127729382365942, - "loss_total": 0.37413904070854187, - "step": 96299 - }, - { - "epoch": 0.000598, - "loss_gen": 3.987454891204834, - "loss_rtd": 0.3221747577190399, - "loss_sent": 0.05567139387130737, - "loss_sod": 0.10847879201173782, - "loss_total": 0.4863249361515045, - "step": 96299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.6544968485832214, - "learning_rate": 6.745863922573089e-05, - "loss": 0.5773, - "step": 96300 - }, - { - "epoch": 0.000798, - "loss_gen": 3.916574716567993, - "loss_rtd": 0.30331626534461975, - "loss_sent": 3.7389883800642565e-05, - "loss_sod": 0.16237305104732513, - "loss_total": 0.46572670340538025, - "step": 96399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.333512306213379, - "loss_rtd": 0.33237385749816895, - "loss_sent": 0.20916062593460083, - "loss_sod": 0.0840887576341629, - "loss_total": 0.6256232261657715, - "step": 96399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.012278437614441, - "learning_rate": 6.742889979586223e-05, - "loss": 0.5716, - "step": 96400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.873001575469971, - "loss_rtd": 0.3161269724369049, - "loss_sent": 0.08801141381263733, - "loss_sod": 0.08623507618904114, - "loss_total": 0.4903734624385834, - "step": 96499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.970402240753174, - "loss_rtd": 0.3255588710308075, - "loss_sent": 0.1072477251291275, - "loss_sod": 0.029996495693922043, - "loss_total": 0.46280306577682495, - "step": 96499 - }, - { - "epoch": 0.001, - "grad_norm": 1.0416778326034546, - "learning_rate": 6.73991533456348e-05, - "loss": 0.5758, - "step": 96500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.5595479011535645, - "loss_rtd": 0.30846548080444336, - "loss_sent": 0.1608753800392151, - "loss_sod": 0.028976604342460632, - "loss_total": 0.4983174800872803, - "step": 96599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.133108615875244, - "loss_rtd": 0.32069918513298035, - "loss_sent": 0.10308950394392014, - "loss_sod": 0.03172937035560608, - "loss_total": 0.45551806688308716, - "step": 96599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.594142198562622, - "learning_rate": 6.736939988703051e-05, - "loss": 0.5629, - "step": 96600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.983255863189697, - "loss_rtd": 0.3195769190788269, - "loss_sent": 0.1621912568807602, - "loss_sod": 0.09886207431554794, - "loss_total": 0.5806302428245544, - "step": 96699 - }, - { - "epoch": 0.001398, - "loss_gen": 4.989710330963135, - "loss_rtd": 0.3138113021850586, - "loss_sent": 0.23394593596458435, - "loss_sod": 0.023748688399791718, - "loss_total": 0.5715059041976929, - "step": 96699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.8842947483062744, - "learning_rate": 6.7339639432034e-05, - "loss": 0.5776, - "step": 96700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.149325370788574, - "loss_rtd": 0.3154001235961914, - "loss_sent": 0.048991478979587555, - "loss_sod": 0.047412846237421036, - "loss_total": 0.4118044376373291, - "step": 96799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.415460586547852, - "loss_rtd": 0.3051280081272125, - "loss_sent": 0.0019924018997699022, - "loss_sod": 0.19839507341384888, - "loss_total": 0.5055155158042908, - "step": 96799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.8955238461494446, - "learning_rate": 6.73098719926328e-05, - "loss": 0.5777, - "step": 96800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.44769287109375, - "loss_rtd": 0.3249737322330475, - "loss_sent": 0.24767473340034485, - "loss_sod": 0.16051912307739258, - "loss_total": 0.7331675887107849, - "step": 96899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.257213115692139, - "loss_rtd": 0.3302728533744812, - "loss_sent": 0.10927356034517288, - "loss_sod": 0.019681863486766815, - "loss_total": 0.4592282772064209, - "step": 96899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.026750922203064, - "learning_rate": 6.728009758081725e-05, - "loss": 0.561, - "step": 96900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.501615047454834, - "loss_rtd": 0.3360181450843811, - "loss_sent": 0.12423968315124512, - "loss_sod": 0.13235829770565033, - "loss_total": 0.5926161408424377, - "step": 96999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.110159397125244, - "loss_rtd": 0.335237979888916, - "loss_sent": 0.33716878294944763, - "loss_sod": 0.07254555821418762, - "loss_total": 0.7449523210525513, - "step": 96999 - }, - { - "epoch": 0.002, - "grad_norm": 0.5956766605377197, - "learning_rate": 6.725031620858045e-05, - "loss": 0.5641, - "step": 97000 - }, - { - "epoch": 0.002, - "eval_loss": 0.5578327775001526, - "eval_runtime": 154.3466, - "eval_samples_per_second": 100.054, - "eval_steps_per_second": 0.784, - "step": 97000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.034848213195801, - "loss_rtd": 0.3098868429660797, - "loss_sent": 0.14335517585277557, - "loss_sod": 0.08951293677091599, - "loss_total": 0.5427549481391907, - "step": 97099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.086153030395508, - "loss_rtd": 0.3134371340274811, - "loss_sent": 0.2379942685365677, - "loss_sod": 0.022758232429623604, - "loss_total": 0.5741896629333496, - "step": 97099 - }, - { - "epoch": 0.0022, - "grad_norm": 2.3417766094207764, - "learning_rate": 6.722052788791835e-05, - "loss": 0.5818, - "step": 97100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.786106586456299, - "loss_rtd": 0.33305180072784424, - "loss_sent": 0.10948657989501953, - "loss_sod": 0.13213729858398438, - "loss_total": 0.5746756792068481, - "step": 97199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.835780620574951, - "loss_rtd": 0.30624470114707947, - "loss_sent": 0.1795838326215744, - "loss_sod": 0.003370692953467369, - "loss_total": 0.4891992211341858, - "step": 97199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.9707930684089661, - "learning_rate": 6.719073263082967e-05, - "loss": 0.5685, - "step": 97200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.574395656585693, - "loss_rtd": 0.3340129554271698, - "loss_sent": 0.1329442262649536, - "loss_sod": 0.0605727955698967, - "loss_total": 0.5275299549102783, - "step": 97299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.205588340759277, - "loss_rtd": 0.31502586603164673, - "loss_sent": 0.006520736496895552, - "loss_sod": 0.17753705382347107, - "loss_total": 0.49908366799354553, - "step": 97299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.108286738395691, - "learning_rate": 6.716093044931594e-05, - "loss": 0.5975, - "step": 97300 - }, - { - "epoch": 0.002798, - "loss_gen": 3.865203380584717, - "loss_rtd": 0.28823763132095337, - "loss_sent": 0.1040513888001442, - "loss_sod": 0.03210706636309624, - "loss_total": 0.4243960678577423, - "step": 97399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.896703243255615, - "loss_rtd": 0.3183712065219879, - "loss_sent": 0.173360675573349, - "loss_sod": 0.09247465431690216, - "loss_total": 0.5842065811157227, - "step": 97399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.019538164138794, - "learning_rate": 6.713112135538148e-05, - "loss": 0.5795, - "step": 97400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.013828754425049, - "loss_rtd": 0.32604846358299255, - "loss_sent": 0.09510602056980133, - "loss_sod": 0.05798761546611786, - "loss_total": 0.47914206981658936, - "step": 97499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.215390682220459, - "loss_rtd": 0.32824939489364624, - "loss_sent": 0.015554209239780903, - "loss_sod": 0.15425828099250793, - "loss_total": 0.4980618953704834, - "step": 97499 - }, - { - "epoch": 0.003, - "grad_norm": 1.072590947151184, - "learning_rate": 6.710130536103338e-05, - "loss": 0.5762, - "step": 97500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.738191604614258, - "loss_rtd": 0.3265741765499115, - "loss_sent": 0.06120915710926056, - "loss_sod": 0.06804925948381424, - "loss_total": 0.4558326005935669, - "step": 97599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.706864356994629, - "loss_rtd": 0.34545576572418213, - "loss_sent": 0.23362770676612854, - "loss_sod": 0.022017929702997208, - "loss_total": 0.6011013984680176, - "step": 97599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.2530436515808105, - "learning_rate": 6.707148247828154e-05, - "loss": 0.5752, - "step": 97600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.104136943817139, - "loss_rtd": 0.3150257170200348, - "loss_sent": 0.13220404088497162, - "loss_sod": 0.1294633448123932, - "loss_total": 0.5766931176185608, - "step": 97699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.722529888153076, - "loss_rtd": 0.31569933891296387, - "loss_sent": 0.06980519741773605, - "loss_sod": 0.0539846271276474, - "loss_total": 0.4394891858100891, - "step": 97699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.1341675519943237, - "learning_rate": 6.704165271913858e-05, - "loss": 0.5827, - "step": 97700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.343900203704834, - "loss_rtd": 0.30843839049339294, - "loss_sent": 0.1389574408531189, - "loss_sod": 0.13357022404670715, - "loss_total": 0.580966055393219, - "step": 97799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.162288665771484, - "loss_rtd": 0.3121359050273895, - "loss_sent": 0.12799625098705292, - "loss_sod": 0.06447754800319672, - "loss_total": 0.5046097040176392, - "step": 97799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.9196800589561462, - "learning_rate": 6.701181609561996e-05, - "loss": 0.5951, - "step": 97800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.433358192443848, - "loss_rtd": 0.31221476197242737, - "loss_sent": 0.12727300822734833, - "loss_sod": 0.04596872627735138, - "loss_total": 0.48545652627944946, - "step": 97899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.850100517272949, - "loss_rtd": 0.3153742551803589, - "loss_sent": 0.16493898630142212, - "loss_sod": 0.05727492272853851, - "loss_total": 0.5375881791114807, - "step": 97899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.8402808308601379, - "learning_rate": 6.698197261974383e-05, - "loss": 0.5785, - "step": 97900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.0220723152160645, - "loss_rtd": 0.3187673091888428, - "loss_sent": 0.05181638151407242, - "loss_sod": 0.1913912296295166, - "loss_total": 0.5619749426841736, - "step": 97999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.382528781890869, - "loss_rtd": 0.3159908652305603, - "loss_sent": 0.22155477106571198, - "loss_sod": 0.051169902086257935, - "loss_total": 0.5887155532836914, - "step": 97999 - }, - { - "epoch": 0.004, - "grad_norm": 1.3498995304107666, - "learning_rate": 6.695212230353119e-05, - "loss": 0.5795, - "step": 98000 - }, - { - "epoch": 0.004, - "eval_loss": 0.5507333278656006, - "eval_runtime": 151.1769, - "eval_samples_per_second": 102.152, - "eval_steps_per_second": 0.8, - "step": 98000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.718911170959473, - "loss_rtd": 0.32273924350738525, - "loss_sent": 0.3075748383998871, - "loss_sod": 0.0760340765118599, - "loss_total": 0.706348180770874, - "step": 98099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.744332313537598, - "loss_rtd": 0.3346700370311737, - "loss_sent": 0.3497461676597595, - "loss_sod": 0.011217096820473671, - "loss_total": 0.6956332921981812, - "step": 98099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2334489822387695, - "learning_rate": 6.69222651590057e-05, - "loss": 0.5762, - "step": 98100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.152660369873047, - "loss_rtd": 0.3339768052101135, - "loss_sent": 0.18060199916362762, - "loss_sod": 0.16783888638019562, - "loss_total": 0.6824176907539368, - "step": 98199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.443938255310059, - "loss_rtd": 0.3192080855369568, - "loss_sent": 0.05183064565062523, - "loss_sod": 0.2955458462238312, - "loss_total": 0.6665846109390259, - "step": 98199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.4761645793914795, - "learning_rate": 6.689240119819382e-05, - "loss": 0.582, - "step": 98200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.026210784912109, - "loss_rtd": 0.34066301584243774, - "loss_sent": 0.24418486654758453, - "loss_sod": 0.03400744870305061, - "loss_total": 0.618855357170105, - "step": 98299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.778450012207031, - "loss_rtd": 0.3111923336982727, - "loss_sent": 0.1166359931230545, - "loss_sod": 0.038881730288267136, - "loss_total": 0.46671003103256226, - "step": 98299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.9921392202377319, - "learning_rate": 6.686253043312476e-05, - "loss": 0.5756, - "step": 98300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.739179611206055, - "loss_rtd": 0.33178257942199707, - "loss_sent": 0.2532976567745209, - "loss_sod": 0.1417667120695114, - "loss_total": 0.7268469333648682, - "step": 98399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.754785537719727, - "loss_rtd": 0.3195800185203552, - "loss_sent": 0.021861691027879715, - "loss_sod": 0.03856188803911209, - "loss_total": 0.38000360131263733, - "step": 98399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.7642726898193359, - "learning_rate": 6.683265287583046e-05, - "loss": 0.5802, - "step": 98400 - }, - { - "epoch": 0.004998, - "loss_gen": 4.178973197937012, - "loss_rtd": 0.3167199194431305, - "loss_sent": 7.245481538120657e-05, - "loss_sod": 0.21317259967327118, - "loss_total": 0.5299649834632874, - "step": 98499 - }, - { - "epoch": 0.004998, - "loss_gen": 3.935793876647949, - "loss_rtd": 0.3067960739135742, - "loss_sent": 0.000541093060746789, - "loss_sod": 0.20888842642307281, - "loss_total": 0.5162255764007568, - "step": 98499 - }, - { - "epoch": 0.005, - "grad_norm": 1.3664913177490234, - "learning_rate": 6.68027685383456e-05, - "loss": 0.5627, - "step": 98500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.335430145263672, - "loss_rtd": 0.3187076151371002, - "loss_sent": 0.0009927991777658463, - "loss_sod": 0.1831379532814026, - "loss_total": 0.5028383731842041, - "step": 98599 - }, - { - "epoch": 0.005198, - "loss_gen": 3.8598129749298096, - "loss_rtd": 0.28702089190483093, - "loss_sent": 0.0530681237578392, - "loss_sod": 0.06852472573518753, - "loss_total": 0.40861374139785767, - "step": 98599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.0185030698776245, - "learning_rate": 6.677287743270758e-05, - "loss": 0.5597, - "step": 98600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.972487449645996, - "loss_rtd": 0.30347952246665955, - "loss_sent": 0.5287765264511108, - "loss_sod": 0.03221752494573593, - "loss_total": 0.8644735813140869, - "step": 98699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.005542278289795, - "loss_rtd": 0.3374456763267517, - "loss_sent": 0.14009448885917664, - "loss_sod": 0.020594295114278793, - "loss_total": 0.49813446402549744, - "step": 98699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.627276062965393, - "learning_rate": 6.674297957095651e-05, - "loss": 0.5698, - "step": 98700 - }, - { - "epoch": 0.005598, - "loss_gen": 4.793325424194336, - "loss_rtd": 0.3073770999908447, - "loss_sent": 0.12930980324745178, - "loss_sod": 0.09903208911418915, - "loss_total": 0.5357190370559692, - "step": 98799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.244107723236084, - "loss_rtd": 0.3283325433731079, - "loss_sent": 0.29997268319129944, - "loss_sod": 0.07830352336168289, - "loss_total": 0.706608772277832, - "step": 98799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.0231043100357056, - "learning_rate": 6.671307496513532e-05, - "loss": 0.571, - "step": 98800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.634741306304932, - "loss_rtd": 0.30798089504241943, - "loss_sent": 0.2160707265138626, - "loss_sod": 0.07869061082601547, - "loss_total": 0.6027422547340393, - "step": 98899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.8831634521484375, - "loss_rtd": 0.3266364336013794, - "loss_sent": 0.005406542681157589, - "loss_sod": 0.17728476226329803, - "loss_total": 0.50932776927948, - "step": 98899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.0855956077575684, - "learning_rate": 6.66831636272895e-05, - "loss": 0.585, - "step": 98900 - }, - { - "epoch": 0.005998, - "loss_gen": 4.216243743896484, - "loss_rtd": 0.3120708167552948, - "loss_sent": 0.060384079813957214, - "loss_sod": 0.034039661288261414, - "loss_total": 0.4064945578575134, - "step": 98999 - }, - { - "epoch": 0.005998, - "loss_gen": 4.957681655883789, - "loss_rtd": 0.32467541098594666, - "loss_sent": 0.0837487280368805, - "loss_sod": 0.053761258721351624, - "loss_total": 0.46218541264533997, - "step": 98999 - }, - { - "epoch": 0.006, - "grad_norm": 0.8100439310073853, - "learning_rate": 6.665324556946738e-05, - "loss": 0.5808, - "step": 99000 - }, - { - "epoch": 0.006, - "eval_loss": 0.5498852133750916, - "eval_runtime": 150.8186, - "eval_samples_per_second": 102.395, - "eval_steps_per_second": 0.802, - "step": 99000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.371486186981201, - "loss_rtd": 0.317146360874176, - "loss_sent": 0.03379607945680618, - "loss_sod": 0.10996784269809723, - "loss_total": 0.46091026067733765, - "step": 99099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.400828838348389, - "loss_rtd": 0.32509690523147583, - "loss_sent": 0.15079639852046967, - "loss_sod": 0.028728686273097992, - "loss_total": 0.5046219825744629, - "step": 99099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.9362024068832397, - "learning_rate": 6.662332080371992e-05, - "loss": 0.5783, - "step": 99100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.116110801696777, - "loss_rtd": 0.30363523960113525, - "loss_sent": 0.002137851668521762, - "loss_sod": 0.19607360661029816, - "loss_total": 0.5018466711044312, - "step": 99199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.902525424957275, - "loss_rtd": 0.30980345606803894, - "loss_sent": 0.19927829504013062, - "loss_sod": 0.029844652861356735, - "loss_total": 0.538926362991333, - "step": 99199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.8438217639923096, - "learning_rate": 6.659338934210084e-05, - "loss": 0.5684, - "step": 99200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.058244228363037, - "loss_rtd": 0.3190957009792328, - "loss_sent": 0.15323588252067566, - "loss_sod": 0.032876744866371155, - "loss_total": 0.5052083730697632, - "step": 99299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.0241193771362305, - "loss_rtd": 0.31235265731811523, - "loss_sent": 0.1497867852449417, - "loss_sod": 0.014322447590529919, - "loss_total": 0.47646188735961914, - "step": 99299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.295091986656189, - "learning_rate": 6.656345119666652e-05, - "loss": 0.5846, - "step": 99300 - }, - { - "epoch": 0.006798, - "loss_gen": 3.940835952758789, - "loss_rtd": 0.3246998190879822, - "loss_sent": 3.1785522878635675e-05, - "loss_sod": 0.18714739382266998, - "loss_total": 0.5118789672851562, - "step": 99399 - }, - { - "epoch": 0.006798, - "loss_gen": 3.761812925338745, - "loss_rtd": 0.29748839139938354, - "loss_sent": 0.004839347209781408, - "loss_sod": 0.17168492078781128, - "loss_total": 0.47401267290115356, - "step": 99399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.1156435012817383, - "learning_rate": 6.653350637947602e-05, - "loss": 0.5643, - "step": 99400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.088423728942871, - "loss_rtd": 0.32740095257759094, - "loss_sent": 0.04146308824419975, - "loss_sod": 0.09329129010438919, - "loss_total": 0.4621553122997284, - "step": 99499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.329144477844238, - "loss_rtd": 0.3109143078327179, - "loss_sent": 0.23411983251571655, - "loss_sod": 0.10364553332328796, - "loss_total": 0.6486796736717224, - "step": 99499 - }, - { - "epoch": 0.007, - "grad_norm": 0.8647971153259277, - "learning_rate": 6.650355490259114e-05, - "loss": 0.5795, - "step": 99500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.738002300262451, - "loss_rtd": 0.315083771944046, - "loss_sent": 0.1626637876033783, - "loss_sod": 0.05075232312083244, - "loss_total": 0.5284998416900635, - "step": 99599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.8016252517700195, - "loss_rtd": 0.32618269324302673, - "loss_sent": 0.19932086765766144, - "loss_sod": 0.03520169481635094, - "loss_total": 0.5607052445411682, - "step": 99599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.1057846546173096, - "learning_rate": 6.647359677807634e-05, - "loss": 0.5618, - "step": 99600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.591098308563232, - "loss_rtd": 0.34488070011138916, - "loss_sent": 0.17269062995910645, - "loss_sod": 0.08623214066028595, - "loss_total": 0.6038034558296204, - "step": 99699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.796253204345703, - "loss_rtd": 0.31622114777565, - "loss_sent": 0.08896497637033463, - "loss_sod": 0.07983792573213577, - "loss_total": 0.48502403497695923, - "step": 99699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.8518372774124146, - "learning_rate": 6.64436320179987e-05, - "loss": 0.584, - "step": 99700 - }, - { - "epoch": 0.007598, - "loss_gen": 4.318517208099365, - "loss_rtd": 0.31801772117614746, - "loss_sent": 0.022323038429021835, - "loss_sod": 0.03361869603395462, - "loss_total": 0.3739594519138336, - "step": 99799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.060001373291016, - "loss_rtd": 0.3279321491718292, - "loss_sent": 0.4337468147277832, - "loss_sod": 0.07998764514923096, - "loss_total": 0.841666579246521, - "step": 99799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.227362036705017, - "learning_rate": 6.641366063442805e-05, - "loss": 0.581, - "step": 99800 - }, - { - "epoch": 0.007798, - "loss_gen": 3.996183156967163, - "loss_rtd": 0.311187207698822, - "loss_sent": 0.016353951767086983, - "loss_sod": 0.0697982981801033, - "loss_total": 0.39733946323394775, - "step": 99899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.877110481262207, - "loss_rtd": 0.31006091833114624, - "loss_sent": 0.1758640557527542, - "loss_sod": 0.08931712806224823, - "loss_total": 0.5752421021461487, - "step": 99899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.0441415309906006, - "learning_rate": 6.638368263943687e-05, - "loss": 0.5687, - "step": 99900 - }, - { - "epoch": 0.007998, - "loss_gen": 4.829002857208252, - "loss_rtd": 0.321885883808136, - "loss_sent": 0.20318901538848877, - "loss_sod": 0.10848979651927948, - "loss_total": 0.6335647106170654, - "step": 99999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.311275005340576, - "loss_rtd": 0.3442972004413605, - "loss_sent": 0.272118479013443, - "loss_sod": 0.04463036358356476, - "loss_total": 0.661046028137207, - "step": 99999 - }, - { - "epoch": 0.008, - "grad_norm": 1.10209321975708, - "learning_rate": 6.635369804510027e-05, - "loss": 0.5853, - "step": 100000 - }, - { - "epoch": 0.008, - "eval_loss": 0.5626693964004517, - "eval_runtime": 151.234, - "eval_samples_per_second": 102.113, - "eval_steps_per_second": 0.8, - "step": 100000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.7915239334106445, - "loss_rtd": 0.32424196600914, - "loss_sent": 0.11183413118124008, - "loss_sod": 0.1372000277042389, - "loss_total": 0.5732761025428772, - "step": 100099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.999149322509766, - "loss_rtd": 0.32147786021232605, - "loss_sent": 0.20129071176052094, - "loss_sod": 0.042393386363983154, - "loss_total": 0.565161943435669, - "step": 100099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.762076199054718, - "learning_rate": 6.632370686349608e-05, - "loss": 0.5833, - "step": 100100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.001662731170654, - "loss_rtd": 0.3111487329006195, - "loss_sent": 3.409513374208473e-05, - "loss_sod": 0.15228456258773804, - "loss_total": 0.46346738934516907, - "step": 100199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.259944915771484, - "loss_rtd": 0.30491870641708374, - "loss_sent": 0.04012997820973396, - "loss_sod": 0.12616415321826935, - "loss_total": 0.47121283411979675, - "step": 100199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.8947985768318176, - "learning_rate": 6.62937091067047e-05, - "loss": 0.5696, - "step": 100200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.226615905761719, - "loss_rtd": 0.3265242576599121, - "loss_sent": 0.16229365766048431, - "loss_sod": 0.09461610019207001, - "loss_total": 0.5834340453147888, - "step": 100299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.9019083976745605, - "loss_rtd": 0.32702329754829407, - "loss_sent": 0.2279345840215683, - "loss_sod": 0.044989585876464844, - "loss_total": 0.599947452545166, - "step": 100299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.3629820346832275, - "learning_rate": 6.626370478680923e-05, - "loss": 0.556, - "step": 100300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.851716041564941, - "loss_rtd": 0.31771615147590637, - "loss_sent": 0.25636428594589233, - "loss_sod": 0.09885308891534805, - "loss_total": 0.6729335188865662, - "step": 100399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.763000965118408, - "loss_rtd": 0.3167797923088074, - "loss_sent": 0.2151304930448532, - "loss_sod": 0.09555570781230927, - "loss_total": 0.6274660229682922, - "step": 100399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.6304292678833008, - "learning_rate": 6.623369391589542e-05, - "loss": 0.5755, - "step": 100400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.066193580627441, - "loss_rtd": 0.30663493275642395, - "loss_sent": 0.2264665961265564, - "loss_sod": 0.04555559158325195, - "loss_total": 0.5786571502685547, - "step": 100499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.120555400848389, - "loss_rtd": 0.30996865034103394, - "loss_sent": 0.24104826152324677, - "loss_sod": 0.051908448338508606, - "loss_total": 0.6029253602027893, - "step": 100499 - }, - { - "epoch": 0.009, - "grad_norm": 1.1163640022277832, - "learning_rate": 6.620367650605166e-05, - "loss": 0.5821, - "step": 100500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.402734756469727, - "loss_rtd": 0.3174905478954315, - "loss_sent": 0.2320769727230072, - "loss_sod": 0.04570315405726433, - "loss_total": 0.5952706336975098, - "step": 100599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.212018013000488, - "loss_rtd": 0.32338646054267883, - "loss_sent": 0.00036768606514669955, - "loss_sod": 0.25911781191825867, - "loss_total": 0.5828719735145569, - "step": 100599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.159544825553894, - "learning_rate": 6.617365256936894e-05, - "loss": 0.5817, - "step": 100600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.843101501464844, - "loss_rtd": 0.32027003169059753, - "loss_sent": 0.3335469961166382, - "loss_sod": 0.13191527128219604, - "loss_total": 0.7857322692871094, - "step": 100699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.927363395690918, - "loss_rtd": 0.31417667865753174, - "loss_sent": 0.2652972638607025, - "loss_sod": 0.04378899186849594, - "loss_total": 0.6232629418373108, - "step": 100699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.166442632675171, - "learning_rate": 6.614362211794087e-05, - "loss": 0.5655, - "step": 100700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.136700630187988, - "loss_rtd": 0.3016396760940552, - "loss_sent": 0.22913001477718353, - "loss_sod": 0.04708781838417053, - "loss_total": 0.577857494354248, - "step": 100799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.053333759307861, - "loss_rtd": 0.30575957894325256, - "loss_sent": 0.06288175284862518, - "loss_sod": 0.11969773471355438, - "loss_total": 0.48833906650543213, - "step": 100799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.0079056024551392, - "learning_rate": 6.611358516386377e-05, - "loss": 0.5711, - "step": 100800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.671409606933594, - "loss_rtd": 0.33603939414024353, - "loss_sent": 0.07858309149742126, - "loss_sod": 0.08161260187625885, - "loss_total": 0.49623510241508484, - "step": 100899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.940887451171875, - "loss_rtd": 0.3263009190559387, - "loss_sent": 0.2012975513935089, - "loss_sod": 0.013004586100578308, - "loss_total": 0.5406030416488647, - "step": 100899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.2577089071273804, - "learning_rate": 6.608354171923648e-05, - "loss": 0.5711, - "step": 100900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.780365467071533, - "loss_rtd": 0.3365417420864105, - "loss_sent": 0.2404618114233017, - "loss_sod": 0.021603599190711975, - "loss_total": 0.5986071825027466, - "step": 100999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.838820457458496, - "loss_rtd": 0.31516823172569275, - "loss_sent": 0.42256417870521545, - "loss_sod": 0.04550718888640404, - "loss_total": 0.7832396030426025, - "step": 100999 - }, - { - "epoch": 0.01, - "grad_norm": 1.636043906211853, - "learning_rate": 6.605349179616052e-05, - "loss": 0.5786, - "step": 101000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5521333813667297, - "eval_runtime": 151.1925, - "eval_samples_per_second": 102.141, - "eval_steps_per_second": 0.8, - "step": 101000 - }, - { - "epoch": 0.010198, - "loss_gen": 4.162107467651367, - "loss_rtd": 0.3015623986721039, - "loss_sent": 0.0013494102749973536, - "loss_sod": 0.22462737560272217, - "loss_total": 0.5275391936302185, - "step": 101099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.035129547119141, - "loss_rtd": 0.31379857659339905, - "loss_sent": 0.0006297457148320973, - "loss_sod": 0.21650034189224243, - "loss_total": 0.5309286713600159, - "step": 101099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.972885251045227, - "learning_rate": 6.602343540673999e-05, - "loss": 0.5699, - "step": 101100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.747851371765137, - "loss_rtd": 0.3193017244338989, - "loss_sent": 0.07847078144550323, - "loss_sod": 0.09810219705104828, - "loss_total": 0.49587470293045044, - "step": 101199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.180852890014648, - "loss_rtd": 0.3225772976875305, - "loss_sent": 0.11992615461349487, - "loss_sod": 0.056841302663087845, - "loss_total": 0.49934476613998413, - "step": 101199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.2792707681655884, - "learning_rate": 6.599337256308158e-05, - "loss": 0.5693, - "step": 101200 - }, - { - "epoch": 0.010598, - "loss_gen": 4.498040199279785, - "loss_rtd": 0.3336476683616638, - "loss_sent": 0.09353425353765488, - "loss_sod": 0.03337704762816429, - "loss_total": 0.4605589807033539, - "step": 101299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.985463619232178, - "loss_rtd": 0.3326781392097473, - "loss_sent": 0.1662260740995407, - "loss_sod": 0.029946666210889816, - "loss_total": 0.5288508534431458, - "step": 101299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.8527702689170837, - "learning_rate": 6.596330327729461e-05, - "loss": 0.5775, - "step": 101300 - }, - { - "epoch": 0.010798, - "loss_gen": 4.720818519592285, - "loss_rtd": 0.3480078876018524, - "loss_sent": 0.05008330196142197, - "loss_sod": 0.005968412384390831, - "loss_total": 0.40405961871147156, - "step": 101399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.84202241897583, - "loss_rtd": 0.3052635192871094, - "loss_sent": 0.14516431093215942, - "loss_sod": 0.05550266057252884, - "loss_total": 0.505930483341217, - "step": 101399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.6203334927558899, - "learning_rate": 6.593322756149099e-05, - "loss": 0.5648, - "step": 101400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.84304666519165, - "loss_rtd": 0.3437245190143585, - "loss_sent": 0.1599966436624527, - "loss_sod": 0.15706191956996918, - "loss_total": 0.660783052444458, - "step": 101499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.944455146789551, - "loss_rtd": 0.3024526834487915, - "loss_sent": 0.40022334456443787, - "loss_sod": 0.03615320473909378, - "loss_total": 0.7388292551040649, - "step": 101499 - }, - { - "epoch": 0.011, - "grad_norm": 1.1639444828033447, - "learning_rate": 6.590314542778522e-05, - "loss": 0.5786, - "step": 101500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.69655704498291, - "loss_rtd": 0.32028722763061523, - "loss_sent": 0.03291154280304909, - "loss_sod": 0.16597428917884827, - "loss_total": 0.5191730856895447, - "step": 101599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.082765102386475, - "loss_rtd": 0.2942723333835602, - "loss_sent": 0.05638710409402847, - "loss_sod": 0.02609078399837017, - "loss_total": 0.3767502009868622, - "step": 101599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.8092089295387268, - "learning_rate": 6.587305688829437e-05, - "loss": 0.5818, - "step": 101600 - }, - { - "epoch": 0.011398, - "loss_gen": 4.038250923156738, - "loss_rtd": 0.30816489458084106, - "loss_sent": 5.8082812756765634e-05, - "loss_sod": 0.2049764096736908, - "loss_total": 0.5131993293762207, - "step": 101699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.315676689147949, - "loss_rtd": 0.32112082839012146, - "loss_sent": 0.021004807204008102, - "loss_sod": 0.1235692948102951, - "loss_total": 0.4656949043273926, - "step": 101699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.8982096314430237, - "learning_rate": 6.58429619551381e-05, - "loss": 0.5795, - "step": 101700 - }, - { - "epoch": 0.011598, - "loss_gen": 4.464595794677734, - "loss_rtd": 0.3015983998775482, - "loss_sent": 0.05006067827343941, - "loss_sod": 0.13953593373298645, - "loss_total": 0.49119502305984497, - "step": 101799 - }, - { - "epoch": 0.011598, - "loss_gen": 4.08694314956665, - "loss_rtd": 0.3185790479183197, - "loss_sent": 4.180019459454343e-05, - "loss_sod": 0.1926421821117401, - "loss_total": 0.5112630128860474, - "step": 101799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.1276414394378662, - "learning_rate": 6.581286064043866e-05, - "loss": 0.558, - "step": 101800 - }, - { - "epoch": 0.011798, - "loss_gen": 3.974503993988037, - "loss_rtd": 0.2857121527194977, - "loss_sent": 0.0001361898030154407, - "loss_sod": 0.13948465883731842, - "loss_total": 0.42533302307128906, - "step": 101899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.069895267486572, - "loss_rtd": 0.32346153259277344, - "loss_sent": 0.0006381009006872773, - "loss_sod": 0.20743003487586975, - "loss_total": 0.5315296649932861, - "step": 101899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.807518720626831, - "learning_rate": 6.578275295632084e-05, - "loss": 0.5671, - "step": 101900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.010288715362549, - "loss_rtd": 0.31243669986724854, - "loss_sent": 4.243159855832346e-05, - "loss_sod": 0.2639489471912384, - "loss_total": 0.5764280557632446, - "step": 101999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.833768367767334, - "loss_rtd": 0.32573121786117554, - "loss_sent": 0.12251759320497513, - "loss_sod": 0.0804349035024643, - "loss_total": 0.5286837220191956, - "step": 101999 - }, - { - "epoch": 0.012, - "grad_norm": 1.121748447418213, - "learning_rate": 6.575263891491203e-05, - "loss": 0.5679, - "step": 102000 - }, - { - "epoch": 0.012, - "eval_loss": 0.5446926951408386, - "eval_runtime": 151.3259, - "eval_samples_per_second": 102.051, - "eval_steps_per_second": 0.8, - "step": 102000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.978600978851318, - "loss_rtd": 0.31439170241355896, - "loss_sent": 0.05940273776650429, - "loss_sod": 0.22584794461727142, - "loss_total": 0.5996423959732056, - "step": 102099 - }, - { - "epoch": 0.012198, - "loss_gen": 3.9863944053649902, - "loss_rtd": 0.3023916482925415, - "loss_sent": 4.6820186980767176e-05, - "loss_sod": 0.2726382613182068, - "loss_total": 0.5750767588615417, - "step": 102099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.3223897218704224, - "learning_rate": 6.572251852834212e-05, - "loss": 0.5679, - "step": 102100 - }, - { - "epoch": 0.012398, - "loss_gen": 4.828497886657715, - "loss_rtd": 0.32210204005241394, - "loss_sent": 0.18495376408100128, - "loss_sod": 0.08336347341537476, - "loss_total": 0.5904192924499512, - "step": 102199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.150286674499512, - "loss_rtd": 0.31037473678588867, - "loss_sent": 0.16400249302387238, - "loss_sod": 0.07330995053052902, - "loss_total": 0.5476871728897095, - "step": 102199 - }, - { - "epoch": 0.0124, - "grad_norm": 2.2942142486572266, - "learning_rate": 6.569239180874365e-05, - "loss": 0.5718, - "step": 102200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.675304889678955, - "loss_rtd": 0.3205735981464386, - "loss_sent": 0.14644664525985718, - "loss_sod": 0.028588993474841118, - "loss_total": 0.49560922384262085, - "step": 102299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.031630516052246, - "loss_rtd": 0.3230000436306, - "loss_sent": 0.05868524685502052, - "loss_sod": 0.061777375638484955, - "loss_total": 0.44346266984939575, - "step": 102299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.659148633480072, - "learning_rate": 6.566225876825161e-05, - "loss": 0.566, - "step": 102300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.987382411956787, - "loss_rtd": 0.3295574486255646, - "loss_sent": 0.5667756199836731, - "loss_sod": 0.0292537622153759, - "loss_total": 0.9255868196487427, - "step": 102399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.694916248321533, - "loss_rtd": 0.3182528018951416, - "loss_sent": 0.26894715428352356, - "loss_sod": 0.07731296867132187, - "loss_total": 0.6645129323005676, - "step": 102399 - }, - { - "epoch": 0.0128, - "grad_norm": 2.89612078666687, - "learning_rate": 6.563211941900364e-05, - "loss": 0.5874, - "step": 102400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.287630558013916, - "loss_rtd": 0.31084689497947693, - "loss_sent": 0.011314211413264275, - "loss_sod": 0.08189202845096588, - "loss_total": 0.4040531516075134, - "step": 102499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.082841396331787, - "loss_rtd": 0.30779874324798584, - "loss_sent": 0.23700939118862152, - "loss_sod": 0.10018089413642883, - "loss_total": 0.644989013671875, - "step": 102499 - }, - { - "epoch": 0.013, - "grad_norm": 1.138588786125183, - "learning_rate": 6.560197377313983e-05, - "loss": 0.576, - "step": 102500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.048178195953369, - "loss_rtd": 0.3007459044456482, - "loss_sent": 0.03131717070937157, - "loss_sod": 0.1861349493265152, - "loss_total": 0.5181980133056641, - "step": 102599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.632444858551025, - "loss_rtd": 0.3480101525783539, - "loss_sent": 0.13145510852336884, - "loss_sod": 0.0014071919722482562, - "loss_total": 0.4808724522590637, - "step": 102599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0020616054534912, - "learning_rate": 6.557182184280284e-05, - "loss": 0.5746, - "step": 102600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.145431995391846, - "loss_rtd": 0.3162272274494171, - "loss_sent": 0.15705393254756927, - "loss_sod": 0.2427826076745987, - "loss_total": 0.7160637974739075, - "step": 102699 - }, - { - "epoch": 0.013398, - "loss_gen": 4.439493179321289, - "loss_rtd": 0.3044537901878357, - "loss_sent": 0.12587888538837433, - "loss_sod": 0.1643853783607483, - "loss_total": 0.5947180390357971, - "step": 102699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.0804530382156372, - "learning_rate": 6.554166364013787e-05, - "loss": 0.5721, - "step": 102700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.833267688751221, - "loss_rtd": 0.30929800868034363, - "loss_sent": 0.3013760447502136, - "loss_sod": 0.014710478484630585, - "loss_total": 0.6253845691680908, - "step": 102799 - }, - { - "epoch": 0.013598, - "loss_gen": 4.671603202819824, - "loss_rtd": 0.32912150025367737, - "loss_sent": 0.45154908299446106, - "loss_sod": 0.03300929814577103, - "loss_total": 0.8136798739433289, - "step": 102799 - }, - { - "epoch": 0.0136, - "grad_norm": 2.9260988235473633, - "learning_rate": 6.551149917729267e-05, - "loss": 0.5699, - "step": 102800 - }, - { - "epoch": 0.013798, - "loss_gen": 4.445441722869873, - "loss_rtd": 0.30097082257270813, - "loss_sent": 0.2778117060661316, - "loss_sod": 0.0919363722205162, - "loss_total": 0.6707189083099365, - "step": 102899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.081186294555664, - "loss_rtd": 0.31350207328796387, - "loss_sent": 0.0635070875287056, - "loss_sod": 0.0671958401799202, - "loss_total": 0.44420498609542847, - "step": 102899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.733470618724823, - "learning_rate": 6.548132846641744e-05, - "loss": 0.5678, - "step": 102900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.434316158294678, - "loss_rtd": 0.3185882270336151, - "loss_sent": 0.1743234395980835, - "loss_sod": 0.16610151529312134, - "loss_total": 0.6590131521224976, - "step": 102999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.26799201965332, - "loss_rtd": 0.3111538887023926, - "loss_sent": 0.19061948359012604, - "loss_sod": 0.08537900447845459, - "loss_total": 0.587152361869812, - "step": 102999 - }, - { - "epoch": 0.014, - "grad_norm": 0.9744783043861389, - "learning_rate": 6.545115151966496e-05, - "loss": 0.5717, - "step": 103000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5482023358345032, - "eval_runtime": 151.1963, - "eval_samples_per_second": 102.139, - "eval_steps_per_second": 0.8, - "step": 103000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.193575382232666, - "loss_rtd": 0.3247431814670563, - "loss_sent": 0.3455546200275421, - "loss_sod": 0.08126696199178696, - "loss_total": 0.7515647411346436, - "step": 103099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.203389644622803, - "loss_rtd": 0.3015291392803192, - "loss_sent": 0.004309745505452156, - "loss_sod": 0.1388365924358368, - "loss_total": 0.444675475358963, - "step": 103099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.1028567552566528, - "learning_rate": 6.542096834919049e-05, - "loss": 0.5635, - "step": 103100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.115509510040283, - "loss_rtd": 0.302995502948761, - "loss_sent": 0.0011495847720652819, - "loss_sod": 0.13383348286151886, - "loss_total": 0.4379785656929016, - "step": 103199 - }, - { - "epoch": 0.014398, - "loss_gen": 4.152536869049072, - "loss_rtd": 0.3090682327747345, - "loss_sent": 0.04768054559826851, - "loss_sod": 0.1396777629852295, - "loss_total": 0.4964265525341034, - "step": 103199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.2074131965637207, - "learning_rate": 6.53907789671518e-05, - "loss": 0.593, - "step": 103200 - }, - { - "epoch": 0.014598, - "loss_gen": 4.523934364318848, - "loss_rtd": 0.30799534916877747, - "loss_sent": 0.03921209275722504, - "loss_sod": 0.09137500822544098, - "loss_total": 0.43858247995376587, - "step": 103299 - }, - { - "epoch": 0.014598, - "loss_gen": 3.9856107234954834, - "loss_rtd": 0.30571386218070984, - "loss_sent": 8.002371760085225e-05, - "loss_sod": 0.11510750651359558, - "loss_total": 0.4209013879299164, - "step": 103299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.7983850240707397, - "learning_rate": 6.536058338570922e-05, - "loss": 0.5603, - "step": 103300 - }, - { - "epoch": 0.014798, - "loss_gen": 3.939851760864258, - "loss_rtd": 0.3169386684894562, - "loss_sent": 4.114958574064076e-05, - "loss_sod": 0.09740159660577774, - "loss_total": 0.4143814146518707, - "step": 103399 - }, - { - "epoch": 0.014798, - "loss_gen": 3.738983154296875, - "loss_rtd": 0.31111863255500793, - "loss_sent": 0.009131144732236862, - "loss_sod": 0.07810862362384796, - "loss_total": 0.39835840463638306, - "step": 103399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.48440203070640564, - "learning_rate": 6.533038161702546e-05, - "loss": 0.5662, - "step": 103400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.638041973114014, - "loss_rtd": 0.3122525215148926, - "loss_sent": 0.1494915932416916, - "loss_sod": 0.027382340282201767, - "loss_total": 0.48912644386291504, - "step": 103499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.790919780731201, - "loss_rtd": 0.3246724605560303, - "loss_sent": 0.06974232196807861, - "loss_sod": 0.06924775242805481, - "loss_total": 0.4636625349521637, - "step": 103499 - }, - { - "epoch": 0.015, - "grad_norm": 0.8386632204055786, - "learning_rate": 6.530017367326582e-05, - "loss": 0.5688, - "step": 103500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.208066463470459, - "loss_rtd": 0.3045886754989624, - "loss_sent": 0.5311081409454346, - "loss_sod": 0.11848323047161102, - "loss_total": 0.9541800618171692, - "step": 103599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.914482116699219, - "loss_rtd": 0.3204767405986786, - "loss_sent": 0.2790606915950775, - "loss_sod": 0.033330462872982025, - "loss_total": 0.6328679323196411, - "step": 103599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.759294867515564, - "learning_rate": 6.526995956659806e-05, - "loss": 0.5673, - "step": 103600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.131373405456543, - "loss_rtd": 0.3379717469215393, - "loss_sent": 0.23370657861232758, - "loss_sod": 0.07675453275442123, - "loss_total": 0.6484328508377075, - "step": 103699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.740903377532959, - "loss_rtd": 0.3197406530380249, - "loss_sent": 0.1491633504629135, - "loss_sod": 0.08728623390197754, - "loss_total": 0.5561902523040771, - "step": 103699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.6623493432998657, - "learning_rate": 6.523973930919241e-05, - "loss": 0.5796, - "step": 103700 - }, - { - "epoch": 0.015598, - "loss_gen": 4.825345039367676, - "loss_rtd": 0.32074809074401855, - "loss_sent": 0.37894371151924133, - "loss_sod": 0.03921166807413101, - "loss_total": 0.7389034628868103, - "step": 103799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.758147239685059, - "loss_rtd": 0.3263850510120392, - "loss_sent": 0.17378558218479156, - "loss_sod": 0.049403682351112366, - "loss_total": 0.5495743155479431, - "step": 103799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.2121750116348267, - "learning_rate": 6.52095129132216e-05, - "loss": 0.5656, - "step": 103800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.990795612335205, - "loss_rtd": 0.307752788066864, - "loss_sent": 0.04160727560520172, - "loss_sod": 0.1492750644683838, - "loss_total": 0.49863511323928833, - "step": 103899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.051953315734863, - "loss_rtd": 0.300202876329422, - "loss_sent": 0.0019655979704111814, - "loss_sod": 0.16191361844539642, - "loss_total": 0.46408209204673767, - "step": 103899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.9242538809776306, - "learning_rate": 6.517928039086079e-05, - "loss": 0.5572, - "step": 103900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.933145523071289, - "loss_rtd": 0.32966703176498413, - "loss_sent": 0.11528322100639343, - "loss_sod": 0.013144847005605698, - "loss_total": 0.45809510350227356, - "step": 103999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.144468307495117, - "loss_rtd": 0.30523863434791565, - "loss_sent": 0.20605115592479706, - "loss_sod": 0.03963879495859146, - "loss_total": 0.5509285926818848, - "step": 103999 - }, - { - "epoch": 0.016, - "grad_norm": 0.6265259981155396, - "learning_rate": 6.514904175428766e-05, - "loss": 0.5677, - "step": 104000 - }, - { - "epoch": 0.016, - "eval_loss": 0.5446927547454834, - "eval_runtime": 151.3544, - "eval_samples_per_second": 102.032, - "eval_steps_per_second": 0.799, - "step": 104000 - }, - { - "epoch": 0.016198, - "loss_gen": 3.759413719177246, - "loss_rtd": 0.2940031886100769, - "loss_sent": 0.0001251414796570316, - "loss_sod": 0.26236772537231445, - "loss_total": 0.5564960837364197, - "step": 104099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.728495121002197, - "loss_rtd": 0.3166390061378479, - "loss_sent": 0.04960273578763008, - "loss_sod": 0.036053985357284546, - "loss_total": 0.4022957384586334, - "step": 104099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.973793089389801, - "learning_rate": 6.511879701568233e-05, - "loss": 0.561, - "step": 104100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.024625778198242, - "loss_rtd": 0.3019779622554779, - "loss_sent": 0.37978801131248474, - "loss_sod": 0.08072702586650848, - "loss_total": 0.7624930143356323, - "step": 104199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.858999252319336, - "loss_rtd": 0.32595953345298767, - "loss_sent": 0.08095843344926834, - "loss_sod": 0.09027931839227676, - "loss_total": 0.4971972703933716, - "step": 104199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.5230836868286133, - "learning_rate": 6.508854618722735e-05, - "loss": 0.5772, - "step": 104200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.25753116607666, - "loss_rtd": 0.33190762996673584, - "loss_sent": 0.26229095458984375, - "loss_sod": 0.09572476148605347, - "loss_total": 0.6899233460426331, - "step": 104299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.92232608795166, - "loss_rtd": 0.32525166869163513, - "loss_sent": 0.30636516213417053, - "loss_sod": 0.011416537687182426, - "loss_total": 0.6430333852767944, - "step": 104299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.396575689315796, - "learning_rate": 6.50582892811078e-05, - "loss": 0.5568, - "step": 104300 - }, - { - "epoch": 0.016798, - "loss_gen": 4.894100666046143, - "loss_rtd": 0.31734776496887207, - "loss_sent": 0.19172203540802002, - "loss_sod": 0.05022633820772171, - "loss_total": 0.5592961311340332, - "step": 104399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.057659149169922, - "loss_rtd": 0.31772705912590027, - "loss_sent": 0.31592148542404175, - "loss_sod": 0.06395834684371948, - "loss_total": 0.6976069211959839, - "step": 104399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.9074657559394836, - "learning_rate": 6.502802630951112e-05, - "loss": 0.5639, - "step": 104400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.838658809661865, - "loss_rtd": 0.3140501081943512, - "loss_sent": 0.24971437454223633, - "loss_sod": 0.011981564573943615, - "loss_total": 0.5757460594177246, - "step": 104499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.932409286499023, - "loss_rtd": 0.31491634249687195, - "loss_sent": 0.14830800890922546, - "loss_sod": 0.07958915084600449, - "loss_total": 0.5428135395050049, - "step": 104499 - }, - { - "epoch": 0.017, - "grad_norm": 0.789269745349884, - "learning_rate": 6.499775728462722e-05, - "loss": 0.5899, - "step": 104500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.028350353240967, - "loss_rtd": 0.3054017126560211, - "loss_sent": 0.005753053352236748, - "loss_sod": 0.20317314565181732, - "loss_total": 0.514327883720398, - "step": 104599 - }, - { - "epoch": 0.017198, - "loss_gen": 4.46292781829834, - "loss_rtd": 0.3214676082134247, - "loss_sent": 0.04085097834467888, - "loss_sod": 0.19534572958946228, - "loss_total": 0.5576643347740173, - "step": 104599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.0206047296524048, - "learning_rate": 6.49674822186485e-05, - "loss": 0.5793, - "step": 104600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.8943915367126465, - "loss_rtd": 0.3206595778465271, - "loss_sent": 0.23480308055877686, - "loss_sod": 0.12426088750362396, - "loss_total": 0.6797235608100891, - "step": 104699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.224236011505127, - "loss_rtd": 0.31364619731903076, - "loss_sent": 0.0050000278279185295, - "loss_sod": 0.272971510887146, - "loss_total": 0.59161776304245, - "step": 104699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.2544224262237549, - "learning_rate": 6.493720112376972e-05, - "loss": 0.5868, - "step": 104700 - }, - { - "epoch": 0.017598, - "loss_gen": 4.896785259246826, - "loss_rtd": 0.30121535062789917, - "loss_sent": 0.16163146495819092, - "loss_sod": 0.08443138003349304, - "loss_total": 0.5472781658172607, - "step": 104799 - }, - { - "epoch": 0.017598, - "loss_gen": 4.746344566345215, - "loss_rtd": 0.3232337534427643, - "loss_sent": 0.33305108547210693, - "loss_sod": 0.05574338138103485, - "loss_total": 0.7120282053947449, - "step": 104799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.3743486404418945, - "learning_rate": 6.490691401218812e-05, - "loss": 0.5625, - "step": 104800 - }, - { - "epoch": 0.017798, - "loss_gen": 4.22736930847168, - "loss_rtd": 0.2959963083267212, - "loss_sent": 0.017806321382522583, - "loss_sod": 0.13410848379135132, - "loss_total": 0.4479111135005951, - "step": 104899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.7643022537231445, - "loss_rtd": 0.30367931723594666, - "loss_sent": 0.08337313681840897, - "loss_sod": 0.07327957451343536, - "loss_total": 0.4603320360183716, - "step": 104899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.8151678442955017, - "learning_rate": 6.487662089610334e-05, - "loss": 0.5704, - "step": 104900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.2139892578125, - "loss_rtd": 0.32095271348953247, - "loss_sent": 0.08497481793165207, - "loss_sod": 0.18144731223583221, - "loss_total": 0.5873748660087585, - "step": 104999 - }, - { - "epoch": 0.017998, - "loss_gen": 4.802613258361816, - "loss_rtd": 0.31074824929237366, - "loss_sent": 0.1703757643699646, - "loss_sod": 0.029345963150262833, - "loss_total": 0.5104699730873108, - "step": 104999 - }, - { - "epoch": 0.018, - "grad_norm": 0.9500181674957275, - "learning_rate": 6.484632178771744e-05, - "loss": 0.5592, - "step": 105000 - }, - { - "epoch": 0.018, - "eval_loss": 0.5460320115089417, - "eval_runtime": 152.8021, - "eval_samples_per_second": 101.065, - "eval_steps_per_second": 0.792, - "step": 105000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.208505630493164, - "loss_rtd": 0.32324033975601196, - "loss_sent": 0.2505112588405609, - "loss_sod": 0.07465124130249023, - "loss_total": 0.6484028100967407, - "step": 105099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.328365325927734, - "loss_rtd": 0.33295971155166626, - "loss_sent": 0.10097527503967285, - "loss_sod": 0.202653706073761, - "loss_total": 0.6365886926651001, - "step": 105099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.2256265878677368, - "learning_rate": 6.481601669923489e-05, - "loss": 0.5554, - "step": 105100 - }, - { - "epoch": 0.018398, - "loss_gen": 4.671132564544678, - "loss_rtd": 0.32226237654685974, - "loss_sent": 0.27640920877456665, - "loss_sod": 0.08043090999126434, - "loss_total": 0.6791024804115295, - "step": 105199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.23636531829834, - "loss_rtd": 0.3007037341594696, - "loss_sent": 0.21370506286621094, - "loss_sod": 0.16631944477558136, - "loss_total": 0.6807282567024231, - "step": 105199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.2847132682800293, - "learning_rate": 6.47857056428626e-05, - "loss": 0.5722, - "step": 105200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.904481887817383, - "loss_rtd": 0.33259204030036926, - "loss_sent": 0.1941862255334854, - "loss_sod": 0.05695922672748566, - "loss_total": 0.5837374925613403, - "step": 105299 - }, - { - "epoch": 0.018598, - "loss_gen": 4.8768229484558105, - "loss_rtd": 0.333579957485199, - "loss_sent": 0.08184095472097397, - "loss_sod": 0.02738514356315136, - "loss_total": 0.44280606508255005, - "step": 105299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.8443505167961121, - "learning_rate": 6.475538863080984e-05, - "loss": 0.5535, - "step": 105300 - }, - { - "epoch": 0.018798, - "loss_gen": 4.9819016456604, - "loss_rtd": 0.2957344353199005, - "loss_sent": 0.07932811975479126, - "loss_sod": 0.0848763957619667, - "loss_total": 0.4599389433860779, - "step": 105399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.246075630187988, - "loss_rtd": 0.33596476912498474, - "loss_sent": 0.1679612249135971, - "loss_sod": 0.046787574887275696, - "loss_total": 0.5507135987281799, - "step": 105399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.7843284606933594, - "learning_rate": 6.472506567528832e-05, - "loss": 0.5596, - "step": 105400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.234274864196777, - "loss_rtd": 0.31183311343193054, - "loss_sent": 0.01924183964729309, - "loss_sod": 0.1953190118074417, - "loss_total": 0.5263940095901489, - "step": 105499 - }, - { - "epoch": 0.018998, - "loss_gen": 3.8398239612579346, - "loss_rtd": 0.28846511244773865, - "loss_sent": 3.70305533579085e-05, - "loss_sod": 0.11776828020811081, - "loss_total": 0.40627044439315796, - "step": 105499 - }, - { - "epoch": 0.019, - "grad_norm": 0.8793208003044128, - "learning_rate": 6.469473678851208e-05, - "loss": 0.5771, - "step": 105500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.88959264755249, - "loss_rtd": 0.3144441545009613, - "loss_sent": 0.3775281310081482, - "loss_sod": 0.05158288776874542, - "loss_total": 0.7435551881790161, - "step": 105599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.031152725219727, - "loss_rtd": 0.31768447160720825, - "loss_sent": 0.17155961692333221, - "loss_sod": 0.11411077529191971, - "loss_total": 0.6033548712730408, - "step": 105599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.247576117515564, - "learning_rate": 6.466440198269763e-05, - "loss": 0.563, - "step": 105600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.1013007164001465, - "loss_rtd": 0.30058038234710693, - "loss_sent": 0.014173594303429127, - "loss_sod": 0.14618852734565735, - "loss_total": 0.46094250679016113, - "step": 105699 - }, - { - "epoch": 0.019398, - "loss_gen": 3.6222245693206787, - "loss_rtd": 0.28102585673332214, - "loss_sent": 0.028827032074332237, - "loss_sod": 0.16895148158073425, - "loss_total": 0.4788043797016144, - "step": 105699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.9890584945678711, - "learning_rate": 6.46340612700638e-05, - "loss": 0.5526, - "step": 105700 - }, - { - "epoch": 0.019598, - "loss_gen": 3.9984564781188965, - "loss_rtd": 0.2904631793498993, - "loss_sent": 0.030142538249492645, - "loss_sod": 0.06392544507980347, - "loss_total": 0.384531170129776, - "step": 105799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.746275901794434, - "loss_rtd": 0.3146536648273468, - "loss_sent": 0.09788447618484497, - "loss_sod": 0.04603857174515724, - "loss_total": 0.4585767090320587, - "step": 105799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.7948949337005615, - "learning_rate": 6.460371466283186e-05, - "loss": 0.5676, - "step": 105800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.381058692932129, - "loss_rtd": 0.3200831711292267, - "loss_sent": 0.18164263665676117, - "loss_sod": 0.10055247694253922, - "loss_total": 0.6022782921791077, - "step": 105899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.627455711364746, - "loss_rtd": 0.3433959484100342, - "loss_sent": 0.12217989563941956, - "loss_sod": 0.00880503375083208, - "loss_total": 0.47438088059425354, - "step": 105899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.8509397506713867, - "learning_rate": 6.457336217322539e-05, - "loss": 0.5663, - "step": 105900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.083179950714111, - "loss_rtd": 0.32536083459854126, - "loss_sent": 0.14673031866550446, - "loss_sod": 0.031289633363485336, - "loss_total": 0.5033807754516602, - "step": 105999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.490145206451416, - "loss_rtd": 0.34568482637405396, - "loss_sent": 0.141320139169693, - "loss_sod": 0.03445363789796829, - "loss_total": 0.521458625793457, - "step": 105999 - }, - { - "epoch": 0.02, - "grad_norm": 0.6346383094787598, - "learning_rate": 6.45430038134704e-05, - "loss": 0.5582, - "step": 106000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5525758862495422, - "eval_runtime": 151.5121, - "eval_samples_per_second": 101.926, - "eval_steps_per_second": 0.799, - "step": 106000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.103689670562744, - "loss_rtd": 0.31973427534103394, - "loss_sent": 0.2565765976905823, - "loss_sod": 0.07389902323484421, - "loss_total": 0.650209903717041, - "step": 106099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.885805606842041, - "loss_rtd": 0.32697972655296326, - "loss_sent": 0.1322324126958847, - "loss_sod": 0.06987996399402618, - "loss_total": 0.5290921330451965, - "step": 106099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.8351715803146362, - "learning_rate": 6.451263959579519e-05, - "loss": 0.5658, - "step": 106100 - }, - { - "epoch": 0.020398, - "loss_gen": 4.95379638671875, - "loss_rtd": 0.3120945692062378, - "loss_sent": 0.13482888042926788, - "loss_sod": 0.13001154363155365, - "loss_total": 0.5769349932670593, - "step": 106199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.018063545227051, - "loss_rtd": 0.32068923115730286, - "loss_sent": 0.276826411485672, - "loss_sod": 0.1472255289554596, - "loss_total": 0.7447412014007568, - "step": 106199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.67173171043396, - "learning_rate": 6.44822695324305e-05, - "loss": 0.5718, - "step": 106200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.010893821716309, - "loss_rtd": 0.32713550329208374, - "loss_sent": 0.14166052639484406, - "loss_sod": 0.062498897314071655, - "loss_total": 0.5312949419021606, - "step": 106299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.061027526855469, - "loss_rtd": 0.2854859530925751, - "loss_sent": 0.315373033285141, - "loss_sod": 0.1234944760799408, - "loss_total": 0.7243534326553345, - "step": 106299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.8546362519264221, - "learning_rate": 6.445189363560936e-05, - "loss": 0.5718, - "step": 106300 - }, - { - "epoch": 0.020798, - "loss_gen": 4.929854393005371, - "loss_rtd": 0.3133118748664856, - "loss_sent": 0.1772003173828125, - "loss_sod": 0.07359988242387772, - "loss_total": 0.5641120672225952, - "step": 106399 - }, - { - "epoch": 0.020798, - "loss_gen": 4.964849472045898, - "loss_rtd": 0.30542078614234924, - "loss_sent": 0.23408684134483337, - "loss_sod": 0.02591603808104992, - "loss_total": 0.5654236674308777, - "step": 106399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.9423354268074036, - "learning_rate": 6.44215119175672e-05, - "loss": 0.5752, - "step": 106400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.083122730255127, - "loss_rtd": 0.3306697607040405, - "loss_sent": 0.13572731614112854, - "loss_sod": 0.04980308562517166, - "loss_total": 0.5162001848220825, - "step": 106499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.144300937652588, - "loss_rtd": 0.316855251789093, - "loss_sent": 0.11016371846199036, - "loss_sod": 0.0425821915268898, - "loss_total": 0.4696011543273926, - "step": 106499 - }, - { - "epoch": 0.021, - "grad_norm": 0.7220878005027771, - "learning_rate": 6.439112439054176e-05, - "loss": 0.564, - "step": 106500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.594676971435547, - "loss_rtd": 0.2950240969657898, - "loss_sent": 0.1877138316631317, - "loss_sod": 0.03032473661005497, - "loss_total": 0.5130626559257507, - "step": 106599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.0950751304626465, - "loss_rtd": 0.3105525076389313, - "loss_sent": 0.09172980487346649, - "loss_sod": 0.17428332567214966, - "loss_total": 0.5765656232833862, - "step": 106599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.0337969064712524, - "learning_rate": 6.436073106677315e-05, - "loss": 0.5684, - "step": 106600 - }, - { - "epoch": 0.021398, - "loss_gen": 3.9136695861816406, - "loss_rtd": 0.29337170720100403, - "loss_sent": 5.8220874052494764e-05, - "loss_sod": 0.1815156638622284, - "loss_total": 0.474945604801178, - "step": 106699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.0382795333862305, - "loss_rtd": 0.31321319937705994, - "loss_sent": 0.07537049055099487, - "loss_sod": 0.03697393834590912, - "loss_total": 0.42555761337280273, - "step": 106699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.93421471118927, - "learning_rate": 6.433033195850378e-05, - "loss": 0.5693, - "step": 106700 - }, - { - "epoch": 0.021598, - "loss_gen": 4.9295172691345215, - "loss_rtd": 0.32531750202178955, - "loss_sent": 0.4898681640625, - "loss_sod": 0.09437037259340286, - "loss_total": 0.9095560312271118, - "step": 106799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.337591648101807, - "loss_rtd": 0.31953203678131104, - "loss_sent": 0.16137145459651947, - "loss_sod": 0.08585690706968307, - "loss_total": 0.5667604207992554, - "step": 106799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.7039817571640015, - "learning_rate": 6.429992707797838e-05, - "loss": 0.5643, - "step": 106800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.099813938140869, - "loss_rtd": 0.3315562307834625, - "loss_sent": 0.15979455411434174, - "loss_sod": 0.040022898465394974, - "loss_total": 0.5313736796379089, - "step": 106899 - }, - { - "epoch": 0.021798, - "loss_gen": 4.756868362426758, - "loss_rtd": 0.2984583377838135, - "loss_sent": 0.11247531324625015, - "loss_sod": 0.11603482067584991, - "loss_total": 0.5269684791564941, - "step": 106899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.9488338232040405, - "learning_rate": 6.42695164374441e-05, - "loss": 0.5866, - "step": 106900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.187564373016357, - "loss_rtd": 0.32474082708358765, - "loss_sent": 0.31143543124198914, - "loss_sod": 0.024499859660863876, - "loss_total": 0.660676121711731, - "step": 106999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.2301740646362305, - "loss_rtd": 0.3165074586868286, - "loss_sent": 0.09302461892366409, - "loss_sod": 0.07346178591251373, - "loss_total": 0.48299384117126465, - "step": 106999 - }, - { - "epoch": 0.022, - "grad_norm": 0.9418954253196716, - "learning_rate": 6.423910004915029e-05, - "loss": 0.5586, - "step": 107000 - }, - { - "epoch": 0.022, - "eval_loss": 0.5417913198471069, - "eval_runtime": 151.1415, - "eval_samples_per_second": 102.176, - "eval_steps_per_second": 0.801, - "step": 107000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.9591755867004395, - "loss_rtd": 0.3148433566093445, - "loss_sent": 0.2994132936000824, - "loss_sod": 0.047530196607112885, - "loss_total": 0.6617868542671204, - "step": 107099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.875545978546143, - "loss_rtd": 0.3147640824317932, - "loss_sent": 0.4485037922859192, - "loss_sod": 0.042732372879981995, - "loss_total": 0.8060002326965332, - "step": 107099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.8302114009857178, - "learning_rate": 6.420867792534869e-05, - "loss": 0.5603, - "step": 107100 - }, - { - "epoch": 0.022398, - "loss_gen": 4.373366355895996, - "loss_rtd": 0.30908501148223877, - "loss_sent": 0.3430616855621338, - "loss_sod": 0.0285382941365242, - "loss_total": 0.6806849837303162, - "step": 107199 - }, - { - "epoch": 0.022398, - "loss_gen": 4.641295909881592, - "loss_rtd": 0.3117026388645172, - "loss_sent": 0.1315881460905075, - "loss_sod": 0.016390351578593254, - "loss_total": 0.4596811532974243, - "step": 107199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.9939401745796204, - "learning_rate": 6.417825007829331e-05, - "loss": 0.5817, - "step": 107200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.970827102661133, - "loss_rtd": 0.32075393199920654, - "loss_sent": 0.13476908206939697, - "loss_sod": 0.12782539427280426, - "loss_total": 0.5833483934402466, - "step": 107299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.014376163482666, - "loss_rtd": 0.30996695160865784, - "loss_sent": 0.11436322331428528, - "loss_sod": 0.05173637717962265, - "loss_total": 0.47606655955314636, - "step": 107299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.8865584135055542, - "learning_rate": 6.414781652024051e-05, - "loss": 0.5686, - "step": 107300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.803501605987549, - "loss_rtd": 0.29663944244384766, - "loss_sent": 0.28819388151168823, - "loss_sod": 0.01116451807320118, - "loss_total": 0.5959978103637695, - "step": 107399 - }, - { - "epoch": 0.022798, - "loss_gen": 4.872408866882324, - "loss_rtd": 0.31238287687301636, - "loss_sent": 0.44345003366470337, - "loss_sod": 0.07661646604537964, - "loss_total": 0.8324493765830994, - "step": 107399 - }, - { - "epoch": 0.0228, - "grad_norm": 2.490213394165039, - "learning_rate": 6.411737726344888e-05, - "loss": 0.5807, - "step": 107400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.940255165100098, - "loss_rtd": 0.30571743845939636, - "loss_sent": 0.35065779089927673, - "loss_sod": 0.020769037306308746, - "loss_total": 0.6771442890167236, - "step": 107499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.842052459716797, - "loss_rtd": 0.31405186653137207, - "loss_sent": 0.09119933098554611, - "loss_sod": 0.10954436659812927, - "loss_total": 0.5147955417633057, - "step": 107499 - }, - { - "epoch": 0.023, - "grad_norm": 0.7584764957427979, - "learning_rate": 6.408693232017942e-05, - "loss": 0.5705, - "step": 107500 - }, - { - "epoch": 0.023198, - "loss_gen": 3.9214885234832764, - "loss_rtd": 0.2983987033367157, - "loss_sent": 4.374084892333485e-05, - "loss_sod": 0.18205353617668152, - "loss_total": 0.48049598932266235, - "step": 107599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.829421520233154, - "loss_rtd": 0.31019821763038635, - "loss_sent": 0.2678295969963074, - "loss_sod": 0.009797907434403896, - "loss_total": 0.5878257155418396, - "step": 107599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.0991127490997314, - "learning_rate": 6.405648170269527e-05, - "loss": 0.574, - "step": 107600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.199046611785889, - "loss_rtd": 0.30022376775741577, - "loss_sent": 0.17597688734531403, - "loss_sod": 0.07369022816419601, - "loss_total": 0.5498908758163452, - "step": 107699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.044209003448486, - "loss_rtd": 0.30875247716903687, - "loss_sent": 0.5088533759117126, - "loss_sod": 0.029024748131632805, - "loss_total": 0.8466305732727051, - "step": 107699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.7089482545852661, - "learning_rate": 6.402602542326198e-05, - "loss": 0.5752, - "step": 107700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.051883220672607, - "loss_rtd": 0.2870909869670868, - "loss_sent": 0.04724877327680588, - "loss_sod": 0.07817958295345306, - "loss_total": 0.41251933574676514, - "step": 107799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.491959571838379, - "loss_rtd": 0.28452175855636597, - "loss_sent": 0.06092541664838791, - "loss_sod": 0.140193372964859, - "loss_total": 0.4856405556201935, - "step": 107799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.6783625483512878, - "learning_rate": 6.399556349414733e-05, - "loss": 0.5684, - "step": 107800 - }, - { - "epoch": 0.023798, - "loss_gen": 4.9519362449646, - "loss_rtd": 0.31674841046333313, - "loss_sent": 0.22679011523723602, - "loss_sod": 0.06879362463951111, - "loss_total": 0.6123321652412415, - "step": 107899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.219686985015869, - "loss_rtd": 0.3118826150894165, - "loss_sent": 0.01116969808936119, - "loss_sod": 0.07467565685510635, - "loss_total": 0.39772796630859375, - "step": 107899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.7484124302864075, - "learning_rate": 6.396509592762137e-05, - "loss": 0.5724, - "step": 107900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.6382670402526855, - "loss_rtd": 0.3116965889930725, - "loss_sent": 0.08824295550584793, - "loss_sod": 0.06433819979429245, - "loss_total": 0.4642777442932129, - "step": 107999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.553746223449707, - "loss_rtd": 0.30597829818725586, - "loss_sent": 0.062276970595121384, - "loss_sod": 0.16350418329238892, - "loss_total": 0.5317594408988953, - "step": 107999 - }, - { - "epoch": 0.024, - "grad_norm": 0.9666557312011719, - "learning_rate": 6.393462273595644e-05, - "loss": 0.571, - "step": 108000 - }, - { - "epoch": 0.024, - "eval_loss": 0.5461033582687378, - "eval_runtime": 151.3137, - "eval_samples_per_second": 102.059, - "eval_steps_per_second": 0.8, - "step": 108000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.0266947746276855, - "loss_rtd": 0.29067620635032654, - "loss_sent": 0.1917770653963089, - "loss_sod": 0.01880338229238987, - "loss_total": 0.5012566447257996, - "step": 108099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.079265117645264, - "loss_rtd": 0.32476240396499634, - "loss_sent": 0.09011615067720413, - "loss_sod": 0.16986671090126038, - "loss_total": 0.5847452878952026, - "step": 108099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.7331221103668213, - "learning_rate": 6.390414393142716e-05, - "loss": 0.5669, - "step": 108100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.8616862297058105, - "loss_rtd": 0.30674853920936584, - "loss_sent": 0.06683506071567535, - "loss_sod": 0.036662496626377106, - "loss_total": 0.4102460741996765, - "step": 108199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.296269416809082, - "loss_rtd": 0.31321123242378235, - "loss_sent": 0.1383715271949768, - "loss_sod": 0.11871857941150665, - "loss_total": 0.570301353931427, - "step": 108199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.399257779121399, - "learning_rate": 6.387365952631034e-05, - "loss": 0.5699, - "step": 108200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.698439121246338, - "loss_rtd": 0.3081118166446686, - "loss_sent": 0.12159612774848938, - "loss_sod": 0.041703373193740845, - "loss_total": 0.4714113175868988, - "step": 108299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.076000213623047, - "loss_rtd": 0.32107752561569214, - "loss_sent": 0.11131883412599564, - "loss_sod": 0.03802306577563286, - "loss_total": 0.47041940689086914, - "step": 108299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.8935823440551758, - "learning_rate": 6.384316953288514e-05, - "loss": 0.5625, - "step": 108300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.059536457061768, - "loss_rtd": 0.30067944526672363, - "loss_sent": 0.1602024883031845, - "loss_sod": 0.13888368010520935, - "loss_total": 0.5997655987739563, - "step": 108399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.356949329376221, - "loss_rtd": 0.3023897707462311, - "loss_sent": 6.415346433641389e-05, - "loss_sod": 0.08302780985832214, - "loss_total": 0.38548174500465393, - "step": 108399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.9004961848258972, - "learning_rate": 6.38126739634329e-05, - "loss": 0.5798, - "step": 108400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.912137031555176, - "loss_rtd": 0.3300974369049072, - "loss_sent": 0.2669000029563904, - "loss_sod": 0.017181701958179474, - "loss_total": 0.6141791343688965, - "step": 108499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.501112937927246, - "loss_rtd": 0.31232425570487976, - "loss_sent": 0.0987105593085289, - "loss_sod": 0.0689978376030922, - "loss_total": 0.48003265261650085, - "step": 108499 - }, - { - "epoch": 0.025, - "grad_norm": 1.2480976581573486, - "learning_rate": 6.378217283023726e-05, - "loss": 0.5556, - "step": 108500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.071778774261475, - "loss_rtd": 0.30955711007118225, - "loss_sent": 0.315388023853302, - "loss_sod": 0.025845076888799667, - "loss_total": 0.6507902145385742, - "step": 108599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.081519603729248, - "loss_rtd": 0.33002594113349915, - "loss_sent": 0.24318280816078186, - "loss_sod": 0.07440317422151566, - "loss_total": 0.6476119160652161, - "step": 108599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.0340014696121216, - "learning_rate": 6.375166614558403e-05, - "loss": 0.5763, - "step": 108600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.8062615394592285, - "loss_rtd": 0.30479082465171814, - "loss_sent": 0.3667656183242798, - "loss_sod": 0.09619477391242981, - "loss_total": 0.7677512168884277, - "step": 108699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.325409889221191, - "loss_rtd": 0.30163633823394775, - "loss_sent": 0.1088394746184349, - "loss_sod": 0.07651998102664948, - "loss_total": 0.48699578642845154, - "step": 108699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.312751054763794, - "learning_rate": 6.372115392176132e-05, - "loss": 0.5545, - "step": 108700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.001461505889893, - "loss_rtd": 0.31182214617729187, - "loss_sent": 0.1901216357946396, - "loss_sod": 0.10808823257684708, - "loss_total": 0.6100320219993591, - "step": 108799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.421943187713623, - "loss_rtd": 0.3268759846687317, - "loss_sent": 0.18132103979587555, - "loss_sod": 0.029783664271235466, - "loss_total": 0.5379806756973267, - "step": 108799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.7836019396781921, - "learning_rate": 6.369063617105947e-05, - "loss": 0.5809, - "step": 108800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.891840934753418, - "loss_rtd": 0.3302566111087799, - "loss_sent": 0.06812560558319092, - "loss_sod": 0.018494369462132454, - "loss_total": 0.41687658429145813, - "step": 108899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.9688568115234375, - "loss_rtd": 0.322144478559494, - "loss_sent": 0.1739887148141861, - "loss_sod": 0.11990626156330109, - "loss_total": 0.6160394549369812, - "step": 108899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.6998794674873352, - "learning_rate": 6.366011290577098e-05, - "loss": 0.5624, - "step": 108900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.213585376739502, - "loss_rtd": 0.3215108811855316, - "loss_sent": 0.11756500601768494, - "loss_sod": 0.20318147540092468, - "loss_total": 0.6422573328018188, - "step": 108999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.674625396728516, - "loss_rtd": 0.3096647560596466, - "loss_sent": 0.12946784496307373, - "loss_sod": 0.04208453372120857, - "loss_total": 0.4812171459197998, - "step": 108999 - }, - { - "epoch": 0.026, - "grad_norm": 1.1656898260116577, - "learning_rate": 6.362958413819067e-05, - "loss": 0.5861, - "step": 109000 - }, - { - "epoch": 0.026, - "eval_loss": 0.5389401912689209, - "eval_runtime": 151.381, - "eval_samples_per_second": 102.014, - "eval_steps_per_second": 0.799, - "step": 109000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.878951072692871, - "loss_rtd": 0.3325100839138031, - "loss_sent": 0.1492864340543747, - "loss_sod": 0.06057453528046608, - "loss_total": 0.5423710346221924, - "step": 109099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.216949939727783, - "loss_rtd": 0.31064364314079285, - "loss_sent": 0.18343260884284973, - "loss_sod": 0.09501229971647263, - "loss_total": 0.5890885591506958, - "step": 109099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.1586878299713135, - "learning_rate": 6.359904988061548e-05, - "loss": 0.5564, - "step": 109100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.054717063903809, - "loss_rtd": 0.30129122734069824, - "loss_sent": 0.23554277420043945, - "loss_sod": 0.02523457258939743, - "loss_total": 0.5620685815811157, - "step": 109199 - }, - { - "epoch": 0.026398, - "loss_gen": 4.210378646850586, - "loss_rtd": 0.3084638714790344, - "loss_sent": 0.0177040733397007, - "loss_sod": 0.09642495959997177, - "loss_total": 0.4225929081439972, - "step": 109199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.7733477354049683, - "learning_rate": 6.356851014534464e-05, - "loss": 0.5741, - "step": 109200 - }, - { - "epoch": 0.026598, - "loss_gen": 4.9100871086120605, - "loss_rtd": 0.3333955705165863, - "loss_sent": 0.23439662158489227, - "loss_sod": 0.012360401451587677, - "loss_total": 0.5801525712013245, - "step": 109299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.150571823120117, - "loss_rtd": 0.33575183153152466, - "loss_sent": 0.2101443111896515, - "loss_sod": 0.10955361276865005, - "loss_total": 0.6554497480392456, - "step": 109299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.0756936073303223, - "learning_rate": 6.353796494467952e-05, - "loss": 0.5541, - "step": 109300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.095142841339111, - "loss_rtd": 0.30467578768730164, - "loss_sent": 0.4449251592159271, - "loss_sod": 0.011433375999331474, - "loss_total": 0.7610343098640442, - "step": 109399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.259122848510742, - "loss_rtd": 0.33221355080604553, - "loss_sent": 0.11076716333627701, - "loss_sod": 0.04559096321463585, - "loss_total": 0.4885716736316681, - "step": 109399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.7708160281181335, - "learning_rate": 6.350741429092375e-05, - "loss": 0.5584, - "step": 109400 - }, - { - "epoch": 0.026998, - "loss_gen": 4.958425045013428, - "loss_rtd": 0.3213268220424652, - "loss_sent": 0.23420964181423187, - "loss_sod": 0.027131108567118645, - "loss_total": 0.5826675891876221, - "step": 109499 - }, - { - "epoch": 0.026998, - "loss_gen": 4.767280578613281, - "loss_rtd": 0.31559616327285767, - "loss_sent": 0.04378554970026016, - "loss_sod": 0.0120012778788805, - "loss_total": 0.3713829815387726, - "step": 109499 - }, - { - "epoch": 0.027, - "grad_norm": 0.6379171013832092, - "learning_rate": 6.347685819638313e-05, - "loss": 0.5836, - "step": 109500 - }, - { - "epoch": 0.027198, - "loss_gen": 4.930249214172363, - "loss_rtd": 0.2870037853717804, - "loss_sent": 0.15182381868362427, - "loss_sod": 0.09946365654468536, - "loss_total": 0.5382912755012512, - "step": 109599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.758519649505615, - "loss_rtd": 0.31260746717453003, - "loss_sent": 0.1231798604130745, - "loss_sod": 0.04067111387848854, - "loss_total": 0.47645843029022217, - "step": 109599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.47101891040802, - "learning_rate": 6.344629667336563e-05, - "loss": 0.5544, - "step": 109600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.018040180206299, - "loss_rtd": 0.3060138523578644, - "loss_sent": 0.21603040397167206, - "loss_sod": 0.01728692650794983, - "loss_total": 0.5393311977386475, - "step": 109699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.17416524887085, - "loss_rtd": 0.30740025639533997, - "loss_sent": 0.21488739550113678, - "loss_sod": 0.16485324501991272, - "loss_total": 0.6871408820152283, - "step": 109699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.9327196478843689, - "learning_rate": 6.341572973418142e-05, - "loss": 0.5667, - "step": 109700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.905360698699951, - "loss_rtd": 0.30515894293785095, - "loss_sent": 0.07022082060575485, - "loss_sod": 0.028215806931257248, - "loss_total": 0.40359556674957275, - "step": 109799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.34921407699585, - "loss_rtd": 0.3019445836544037, - "loss_sent": 0.08953826874494553, - "loss_sod": 0.02431022748351097, - "loss_total": 0.4157930910587311, - "step": 109799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.6951218247413635, - "learning_rate": 6.33851573911429e-05, - "loss": 0.561, - "step": 109800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.802526473999023, - "loss_rtd": 0.29821139574050903, - "loss_sent": 0.23770183324813843, - "loss_sod": 0.06610836088657379, - "loss_total": 0.6020215749740601, - "step": 109899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.04287576675415, - "loss_rtd": 0.3214740455150604, - "loss_sent": 0.3329661786556244, - "loss_sod": 0.08987447619438171, - "loss_total": 0.7443146705627441, - "step": 109899 - }, - { - "epoch": 0.0278, - "grad_norm": 3.2666738033294678, - "learning_rate": 6.335457965656459e-05, - "loss": 0.5639, - "step": 109900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.238755226135254, - "loss_rtd": 0.3152182698249817, - "loss_sent": 0.31257209181785583, - "loss_sod": 0.03364112973213196, - "loss_total": 0.6614314913749695, - "step": 109999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.10002326965332, - "loss_rtd": 0.30614298582077026, - "loss_sent": 0.4742734432220459, - "loss_sod": 0.029593631625175476, - "loss_total": 0.8100100755691528, - "step": 109999 - }, - { - "epoch": 0.028, - "grad_norm": 2.491260290145874, - "learning_rate": 6.332399654276318e-05, - "loss": 0.5602, - "step": 110000 - }, - { - "epoch": 0.028, - "eval_loss": 0.5431951880455017, - "eval_runtime": 151.4848, - "eval_samples_per_second": 101.944, - "eval_steps_per_second": 0.799, - "step": 110000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.500039577484131, - "loss_rtd": 0.298180490732193, - "loss_sent": 0.009449800476431847, - "loss_sod": 0.1921154111623764, - "loss_total": 0.4997456967830658, - "step": 110099 - }, - { - "epoch": 0.028198, - "loss_gen": 4.855717658996582, - "loss_rtd": 0.3043445944786072, - "loss_sent": 0.13195885717868805, - "loss_sod": 0.00961886253207922, - "loss_total": 0.445922315120697, - "step": 110099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.0164231061935425, - "learning_rate": 6.329340806205755e-05, - "loss": 0.5541, - "step": 110100 - }, - { - "epoch": 0.028398, - "loss_gen": 4.283051490783691, - "loss_rtd": 0.2776123285293579, - "loss_sent": 0.036811452358961105, - "loss_sod": 0.01744782365858555, - "loss_total": 0.3318715989589691, - "step": 110199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.16132116317749, - "loss_rtd": 0.3195195496082306, - "loss_sent": 0.2174578309059143, - "loss_sod": 0.043812330812215805, - "loss_total": 0.5807896852493286, - "step": 110199 - }, - { - "epoch": 0.0284, - "grad_norm": 0.746578574180603, - "learning_rate": 6.326281422676874e-05, - "loss": 0.5578, - "step": 110200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.145158767700195, - "loss_rtd": 0.32694098353385925, - "loss_sent": 0.2601774334907532, - "loss_sod": 0.08325707167387009, - "loss_total": 0.6703754663467407, - "step": 110299 - }, - { - "epoch": 0.028598, - "loss_gen": 4.930212497711182, - "loss_rtd": 0.3273719251155853, - "loss_sent": 0.05658649280667305, - "loss_sod": 0.15999355912208557, - "loss_total": 0.5439519882202148, - "step": 110299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.4284107685089111, - "learning_rate": 6.323221504921995e-05, - "loss": 0.5784, - "step": 110300 - }, - { - "epoch": 0.028798, - "loss_gen": 3.761293649673462, - "loss_rtd": 0.28234994411468506, - "loss_sent": 0.0022282814607024193, - "loss_sod": 0.23036682605743408, - "loss_total": 0.5149450302124023, - "step": 110399 - }, - { - "epoch": 0.028798, - "loss_gen": 4.16573429107666, - "loss_rtd": 0.29319512844085693, - "loss_sent": 0.009119627065956593, - "loss_sod": 0.20074167847633362, - "loss_total": 0.5030564069747925, - "step": 110399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.2292065620422363, - "learning_rate": 6.320161054173652e-05, - "loss": 0.5606, - "step": 110400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.747781276702881, - "loss_rtd": 0.3284401297569275, - "loss_sent": 0.26554733514785767, - "loss_sod": 0.05696718767285347, - "loss_total": 0.6509546637535095, - "step": 110499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.256872653961182, - "loss_rtd": 0.31553196907043457, - "loss_sent": 0.1591951549053192, - "loss_sod": 0.1461992859840393, - "loss_total": 0.6209263801574707, - "step": 110499 - }, - { - "epoch": 0.029, - "grad_norm": 0.9094834923744202, - "learning_rate": 6.317100071664595e-05, - "loss": 0.5614, - "step": 110500 - }, - { - "epoch": 0.029198, - "loss_gen": 3.910529851913452, - "loss_rtd": 0.2881653904914856, - "loss_sent": 0.0019547692500054836, - "loss_sod": 0.08033779263496399, - "loss_total": 0.3704579472541809, - "step": 110599 - }, - { - "epoch": 0.029198, - "loss_gen": 4.869881629943848, - "loss_rtd": 0.3074661195278168, - "loss_sent": 0.8043997883796692, - "loss_sod": 0.09549353271722794, - "loss_total": 1.2073594331741333, - "step": 110599 - }, - { - "epoch": 0.0292, - "grad_norm": 3.183957099914551, - "learning_rate": 6.314038558627787e-05, - "loss": 0.5579, - "step": 110600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.859715938568115, - "loss_rtd": 0.3090853691101074, - "loss_sent": 0.3077383041381836, - "loss_sod": 0.0855005756020546, - "loss_total": 0.7023242712020874, - "step": 110699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.484714508056641, - "loss_rtd": 0.3146698474884033, - "loss_sent": 0.013457296416163445, - "loss_sod": 0.07126327604055405, - "loss_total": 0.39939042925834656, - "step": 110699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.0308226346969604, - "learning_rate": 6.310976516296403e-05, - "loss": 0.565, - "step": 110700 - }, - { - "epoch": 0.029598, - "loss_gen": 4.919672966003418, - "loss_rtd": 0.32212698459625244, - "loss_sent": 0.0972585380077362, - "loss_sod": 0.021064667031168938, - "loss_total": 0.44045019149780273, - "step": 110799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.028139114379883, - "loss_rtd": 0.3183032274246216, - "loss_sent": 0.15866515040397644, - "loss_sod": 0.09258978813886642, - "loss_total": 0.5695581436157227, - "step": 110799 - }, - { - "epoch": 0.0296, - "grad_norm": 0.5978352427482605, - "learning_rate": 6.307913945903836e-05, - "loss": 0.5615, - "step": 110800 - }, - { - "epoch": 0.029798, - "loss_gen": 4.213213920593262, - "loss_rtd": 0.2699766755104065, - "loss_sent": 0.01592285744845867, - "loss_sod": 0.09502089023590088, - "loss_total": 0.38092041015625, - "step": 110899 - }, - { - "epoch": 0.029798, - "loss_gen": 4.7946553230285645, - "loss_rtd": 0.3044602572917938, - "loss_sent": 0.29190877079963684, - "loss_sod": 0.06971128284931183, - "loss_total": 0.6660803556442261, - "step": 110899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.3571891784667969, - "learning_rate": 6.304850848683688e-05, - "loss": 0.5483, - "step": 110900 - }, - { - "epoch": 0.029998, - "loss_gen": 4.958065986633301, - "loss_rtd": 0.30979248881340027, - "loss_sent": 0.29292795062065125, - "loss_sod": 0.033625528216362, - "loss_total": 0.6363459825515747, - "step": 110999 - }, - { - "epoch": 0.029998, - "loss_gen": 4.840777397155762, - "loss_rtd": 0.31712937355041504, - "loss_sent": 0.16160918772220612, - "loss_sod": 0.09780505299568176, - "loss_total": 0.5765436291694641, - "step": 110999 - }, - { - "epoch": 0.03, - "grad_norm": 1.1442328691482544, - "learning_rate": 6.301787225869774e-05, - "loss": 0.5567, - "step": 111000 - }, - { - "epoch": 0.03, - "eval_loss": 0.540738046169281, - "eval_runtime": 152.7321, - "eval_samples_per_second": 101.112, - "eval_steps_per_second": 0.792, - "step": 111000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.099303245544434, - "loss_rtd": 0.30958980321884155, - "loss_sent": 0.26804983615875244, - "loss_sod": 0.062308840453624725, - "loss_total": 0.6399484872817993, - "step": 111099 - }, - { - "epoch": 0.030198, - "loss_gen": 4.889252662658691, - "loss_rtd": 0.30952370166778564, - "loss_sent": 0.10719144344329834, - "loss_sod": 0.03891471400856972, - "loss_total": 0.4556298553943634, - "step": 111099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.7303214073181152, - "learning_rate": 6.298723078696121e-05, - "loss": 0.5622, - "step": 111100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.152919769287109, - "loss_rtd": 0.3233211040496826, - "loss_sent": 0.2871219217777252, - "loss_sod": 0.07788588851690292, - "loss_total": 0.6883289217948914, - "step": 111199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.525146961212158, - "loss_rtd": 0.3192431628704071, - "loss_sent": 0.11062529683113098, - "loss_sod": 0.12471377104520798, - "loss_total": 0.5545822381973267, - "step": 111199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.2109558582305908, - "learning_rate": 6.295658408396968e-05, - "loss": 0.5503, - "step": 111200 - }, - { - "epoch": 0.030598, - "loss_gen": 4.999670505523682, - "loss_rtd": 0.3378047049045563, - "loss_sent": 0.269733726978302, - "loss_sod": 0.03210064768791199, - "loss_total": 0.6396390795707703, - "step": 111299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.20380163192749, - "loss_rtd": 0.29996007680892944, - "loss_sent": 0.2559049129486084, - "loss_sod": 0.06889547407627106, - "loss_total": 0.6247604489326477, - "step": 111299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.7519530057907104, - "learning_rate": 6.292593216206761e-05, - "loss": 0.5525, - "step": 111300 - }, - { - "epoch": 0.030798, - "loss_gen": 4.638796329498291, - "loss_rtd": 0.29377856850624084, - "loss_sent": 0.019329413771629333, - "loss_sod": 0.1504117250442505, - "loss_total": 0.4635196924209595, - "step": 111399 - }, - { - "epoch": 0.030798, - "loss_gen": 4.285656929016113, - "loss_rtd": 0.3159097731113434, - "loss_sent": 4.849431570619345e-05, - "loss_sod": 0.1864272654056549, - "loss_total": 0.5023855566978455, - "step": 111399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.7244387865066528, - "learning_rate": 6.289527503360162e-05, - "loss": 0.5685, - "step": 111400 - }, - { - "epoch": 0.030998, - "loss_gen": 4.955970287322998, - "loss_rtd": 0.3145253360271454, - "loss_sent": 0.11424721032381058, - "loss_sod": 0.05316298082470894, - "loss_total": 0.4819355309009552, - "step": 111499 - }, - { - "epoch": 0.030998, - "loss_gen": 4.991325378417969, - "loss_rtd": 0.31193283200263977, - "loss_sent": 0.24776963889598846, - "loss_sod": 0.08382590860128403, - "loss_total": 0.643528401851654, - "step": 111499 - }, - { - "epoch": 0.031, - "grad_norm": 1.2693592309951782, - "learning_rate": 6.28646127109204e-05, - "loss": 0.562, - "step": 111500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.383149147033691, - "loss_rtd": 0.30271032452583313, - "loss_sent": 0.0012748053995892406, - "loss_sod": 0.08654949069023132, - "loss_total": 0.3905346095561981, - "step": 111599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.844264507293701, - "loss_rtd": 0.3073599338531494, - "loss_sent": 0.10096272081136703, - "loss_sod": 0.01730138435959816, - "loss_total": 0.4256240427494049, - "step": 111599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.8001429438591003, - "learning_rate": 6.283394520637472e-05, - "loss": 0.5531, - "step": 111600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.880539894104004, - "loss_rtd": 0.3117526173591614, - "loss_sent": 0.2095177322626114, - "loss_sod": 0.10011793673038483, - "loss_total": 0.6213882565498352, - "step": 111699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.566366195678711, - "loss_rtd": 0.294606477022171, - "loss_sent": 0.12573687732219696, - "loss_sod": 0.020580457523465157, - "loss_total": 0.440923810005188, - "step": 111699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.7626075744628906, - "learning_rate": 6.280327253231743e-05, - "loss": 0.5586, - "step": 111700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.112703323364258, - "loss_rtd": 0.2861131727695465, - "loss_sent": 0.3115231692790985, - "loss_sod": 0.07299712300300598, - "loss_total": 0.6706334352493286, - "step": 111799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.371541500091553, - "loss_rtd": 0.313865602016449, - "loss_sent": 0.24734216928482056, - "loss_sod": 0.04733390361070633, - "loss_total": 0.6085416674613953, - "step": 111799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.8598159551620483, - "learning_rate": 6.277259470110351e-05, - "loss": 0.5561, - "step": 111800 - }, - { - "epoch": 0.031798, - "loss_gen": 4.814857482910156, - "loss_rtd": 0.29830074310302734, - "loss_sent": 0.10594409704208374, - "loss_sod": 0.10980981588363647, - "loss_total": 0.5140546560287476, - "step": 111899 - }, - { - "epoch": 0.031798, - "loss_gen": 4.281027793884277, - "loss_rtd": 0.29203304648399353, - "loss_sent": 0.3097190260887146, - "loss_sod": 0.075651615858078, - "loss_total": 0.6774036884307861, - "step": 111899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.5515258312225342, - "learning_rate": 6.274191172508996e-05, - "loss": 0.5396, - "step": 111900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.902201175689697, - "loss_rtd": 0.3031350076198578, - "loss_sent": 0.0972568541765213, - "loss_sod": 0.03203858807682991, - "loss_total": 0.4324304461479187, - "step": 111999 - }, - { - "epoch": 0.031998, - "loss_gen": 4.321255683898926, - "loss_rtd": 0.29954472184181213, - "loss_sent": 0.00010922457295237109, - "loss_sod": 0.12528063356876373, - "loss_total": 0.42493459582328796, - "step": 111999 - }, - { - "epoch": 0.032, - "grad_norm": 0.8171465396881104, - "learning_rate": 6.271122361663589e-05, - "loss": 0.5774, - "step": 112000 - }, - { - "epoch": 0.032, - "eval_loss": 0.5389299392700195, - "eval_runtime": 151.411, - "eval_samples_per_second": 101.994, - "eval_steps_per_second": 0.799, - "step": 112000 - }, - { - "epoch": 0.032198, - "loss_gen": 5.150925636291504, - "loss_rtd": 0.318128377199173, - "loss_sent": 0.16503667831420898, - "loss_sod": 0.12780560553073883, - "loss_total": 0.610970675945282, - "step": 112099 - }, - { - "epoch": 0.032198, - "loss_gen": 4.960903167724609, - "loss_rtd": 0.29868510365486145, - "loss_sent": 0.15749751031398773, - "loss_sod": 0.04956481605768204, - "loss_total": 0.5057474374771118, - "step": 112099 - }, - { - "epoch": 0.0322, - "grad_norm": 0.807860791683197, - "learning_rate": 6.268053038810247e-05, - "loss": 0.5439, - "step": 112100 - }, - { - "epoch": 0.032398, - "loss_gen": 4.996148109436035, - "loss_rtd": 0.32346105575561523, - "loss_sent": 0.11034146696329117, - "loss_sod": 0.12658926844596863, - "loss_total": 0.5603917837142944, - "step": 112199 - }, - { - "epoch": 0.032398, - "loss_gen": 4.908397197723389, - "loss_rtd": 0.2976805865764618, - "loss_sent": 0.035902105271816254, - "loss_sod": 0.16037225723266602, - "loss_total": 0.49395495653152466, - "step": 112199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.1983587741851807, - "learning_rate": 6.264983205185294e-05, - "loss": 0.5619, - "step": 112200 - }, - { - "epoch": 0.032598, - "loss_gen": 4.789726734161377, - "loss_rtd": 0.31488755345344543, - "loss_sent": 0.10511964559555054, - "loss_sod": 0.11555609107017517, - "loss_total": 0.5355632901191711, - "step": 112299 - }, - { - "epoch": 0.032598, - "loss_gen": 4.668126106262207, - "loss_rtd": 0.3064815104007721, - "loss_sent": 0.006027332507073879, - "loss_sod": 0.18640363216400146, - "loss_total": 0.4989124834537506, - "step": 112299 - }, - { - "epoch": 0.0326, - "grad_norm": 0.7658260464668274, - "learning_rate": 6.261912862025256e-05, - "loss": 0.565, - "step": 112300 - }, - { - "epoch": 0.032798, - "loss_gen": 5.2496418952941895, - "loss_rtd": 0.31157806515693665, - "loss_sent": 0.06531276553869247, - "loss_sod": 0.09153740108013153, - "loss_total": 0.46842822432518005, - "step": 112399 - }, - { - "epoch": 0.032798, - "loss_gen": 4.798717975616455, - "loss_rtd": 0.32129183411598206, - "loss_sent": 0.09239664673805237, - "loss_sod": 0.027722157537937164, - "loss_total": 0.441410630941391, - "step": 112399 - }, - { - "epoch": 0.0328, - "grad_norm": 0.933722734451294, - "learning_rate": 6.258842010566868e-05, - "loss": 0.5707, - "step": 112400 - }, - { - "epoch": 0.032998, - "loss_gen": 3.718125104904175, - "loss_rtd": 0.2656061053276062, - "loss_sent": 0.006414363626390696, - "loss_sod": 0.07887257635593414, - "loss_total": 0.3508930206298828, - "step": 112499 - }, - { - "epoch": 0.032998, - "loss_gen": 4.555234909057617, - "loss_rtd": 0.31273484230041504, - "loss_sent": 0.2504809498786926, - "loss_sod": 0.006745964288711548, - "loss_total": 0.5699617862701416, - "step": 112499 - }, - { - "epoch": 0.033, - "grad_norm": 0.8214619755744934, - "learning_rate": 6.255770652047069e-05, - "loss": 0.5472, - "step": 112500 - }, - { - "epoch": 0.033198, - "loss_gen": 3.9582273960113525, - "loss_rtd": 0.2800060510635376, - "loss_sent": 7.444770017173141e-05, - "loss_sod": 0.1791847199201584, - "loss_total": 0.45926523208618164, - "step": 112599 - }, - { - "epoch": 0.033198, - "loss_gen": 4.763845920562744, - "loss_rtd": 0.30309340357780457, - "loss_sent": 0.12869341671466827, - "loss_sod": 0.10470730811357498, - "loss_total": 0.5364941358566284, - "step": 112599 - }, - { - "epoch": 0.0332, - "grad_norm": 1.3438313007354736, - "learning_rate": 6.252698787703002e-05, - "loss": 0.5455, - "step": 112600 - }, - { - "epoch": 0.033398, - "loss_gen": 4.98195219039917, - "loss_rtd": 0.3181473910808563, - "loss_sent": 0.35770562291145325, - "loss_sod": 0.09960955381393433, - "loss_total": 0.7754625678062439, - "step": 112699 - }, - { - "epoch": 0.033398, - "loss_gen": 5.52577018737793, - "loss_rtd": 0.3122628629207611, - "loss_sent": 0.1191747710108757, - "loss_sod": 0.04338350147008896, - "loss_total": 0.47482115030288696, - "step": 112699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.301356554031372, - "learning_rate": 6.249626418772013e-05, - "loss": 0.5689, - "step": 112700 - }, - { - "epoch": 0.033598, - "loss_gen": 4.864354610443115, - "loss_rtd": 0.3088129758834839, - "loss_sent": 0.173867866396904, - "loss_sod": 0.045669399201869965, - "loss_total": 0.5283502340316772, - "step": 112799 - }, - { - "epoch": 0.033598, - "loss_gen": 4.882724761962891, - "loss_rtd": 0.3042221963405609, - "loss_sent": 0.09302534908056259, - "loss_sod": 0.007613973692059517, - "loss_total": 0.40486153960227966, - "step": 112799 - }, - { - "epoch": 0.0336, - "grad_norm": 0.7351897358894348, - "learning_rate": 6.24655354649165e-05, - "loss": 0.5701, - "step": 112800 - }, - { - "epoch": 0.033798, - "loss_gen": 5.298699855804443, - "loss_rtd": 0.30309218168258667, - "loss_sent": 0.1564468890428543, - "loss_sod": 0.06765016913414001, - "loss_total": 0.5271892547607422, - "step": 112899 - }, - { - "epoch": 0.033798, - "loss_gen": 4.842959880828857, - "loss_rtd": 0.299211323261261, - "loss_sent": 0.34119969606399536, - "loss_sod": 0.015285290777683258, - "loss_total": 0.6556962728500366, - "step": 112899 - }, - { - "epoch": 0.0338, - "grad_norm": 1.1486371755599976, - "learning_rate": 6.24348017209967e-05, - "loss": 0.5554, - "step": 112900 - }, - { - "epoch": 0.033998, - "loss_gen": 5.233341693878174, - "loss_rtd": 0.3070437014102936, - "loss_sent": 0.08728650212287903, - "loss_sod": 0.06658341735601425, - "loss_total": 0.46091362833976746, - "step": 112999 - }, - { - "epoch": 0.033998, - "loss_gen": 4.29036808013916, - "loss_rtd": 0.28697866201400757, - "loss_sent": 0.026916533708572388, - "loss_sod": 0.03943333402276039, - "loss_total": 0.35332852602005005, - "step": 112999 - }, - { - "epoch": 0.034, - "grad_norm": 0.8945763111114502, - "learning_rate": 6.240406296834024e-05, - "loss": 0.5482, - "step": 113000 - }, - { - "epoch": 0.034, - "eval_loss": 0.528028130531311, - "eval_runtime": 151.2941, - "eval_samples_per_second": 102.073, - "eval_steps_per_second": 0.8, - "step": 113000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.800609588623047, - "loss_rtd": 0.3146374821662903, - "loss_sent": 0.05099153518676758, - "loss_sod": 0.02965501882135868, - "loss_total": 0.3952840268611908, - "step": 113099 - }, - { - "epoch": 0.000198, - "loss_gen": 4.137120723724365, - "loss_rtd": 0.28727754950523376, - "loss_sent": 0.022033551707863808, - "loss_sod": 0.05693252012133598, - "loss_total": 0.3662436008453369, - "step": 113099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.5575613379478455, - "learning_rate": 6.23733192193287e-05, - "loss": 0.541, - "step": 113100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.011971473693848, - "loss_rtd": 0.29916349053382874, - "loss_sent": 0.14115023612976074, - "loss_sod": 0.015926435589790344, - "loss_total": 0.456240177154541, - "step": 113199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.719551086425781, - "loss_rtd": 0.3118532598018646, - "loss_sent": 0.20628313720226288, - "loss_sod": 0.03100305050611496, - "loss_total": 0.5491394400596619, - "step": 113199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.85649573802948, - "learning_rate": 6.234257048634566e-05, - "loss": 0.5686, - "step": 113200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.200534820556641, - "loss_rtd": 0.3089621067047119, - "loss_sent": 0.21121861040592194, - "loss_sod": 0.07619243115186691, - "loss_total": 0.5963731408119202, - "step": 113299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.303565979003906, - "loss_rtd": 0.31125354766845703, - "loss_sent": 0.07995057851076126, - "loss_sod": 0.15613064169883728, - "loss_total": 0.5473347902297974, - "step": 113299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.0480402708053589, - "learning_rate": 6.231181678177671e-05, - "loss": 0.55, - "step": 113300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.183351993560791, - "loss_rtd": 0.2881074845790863, - "loss_sent": 0.24594196677207947, - "loss_sod": 0.09231461584568024, - "loss_total": 0.6263641119003296, - "step": 113399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.683897495269775, - "loss_rtd": 0.3044544756412506, - "loss_sent": 0.19104251265525818, - "loss_sod": 0.02554282546043396, - "loss_total": 0.5210398435592651, - "step": 113399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.3088388442993164, - "learning_rate": 6.228105811800942e-05, - "loss": 0.5593, - "step": 113400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.025886058807373, - "loss_rtd": 0.3033176362514496, - "loss_sent": 0.29587307572364807, - "loss_sod": 0.08863915503025055, - "loss_total": 0.687829852104187, - "step": 113499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.1141157150268555, - "loss_rtd": 0.3167629539966583, - "loss_sent": 0.18045826256275177, - "loss_sod": 0.02749168500304222, - "loss_total": 0.5247129201889038, - "step": 113499 - }, - { - "epoch": 0.001, - "grad_norm": 1.1944942474365234, - "learning_rate": 6.225029450743341e-05, - "loss": 0.5393, - "step": 113500 - }, - { - "epoch": 0.001198, - "loss_gen": 4.8736982345581055, - "loss_rtd": 0.28981536626815796, - "loss_sent": 0.18818746507167816, - "loss_sod": 0.059310294687747955, - "loss_total": 0.5373131036758423, - "step": 113599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.904609680175781, - "loss_rtd": 0.3106389045715332, - "loss_sent": 0.07906059920787811, - "loss_sod": 0.08784067630767822, - "loss_total": 0.47754019498825073, - "step": 113599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.537190556526184, - "learning_rate": 6.221952596244022e-05, - "loss": 0.5567, - "step": 113600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.23633337020874, - "loss_rtd": 0.2887208163738251, - "loss_sent": 0.007274389732629061, - "loss_sod": 0.179124116897583, - "loss_total": 0.47511932253837585, - "step": 113699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.34153938293457, - "loss_rtd": 0.3088279366493225, - "loss_sent": 0.17111530900001526, - "loss_sod": 0.07337859272956848, - "loss_total": 0.5533218383789062, - "step": 113699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.2073296308517456, - "learning_rate": 6.218875249542343e-05, - "loss": 0.5628, - "step": 113700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.831704616546631, - "loss_rtd": 0.31238698959350586, - "loss_sent": 0.22992931306362152, - "loss_sod": 0.08314387500286102, - "loss_total": 0.6254602074623108, - "step": 113799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.000875949859619, - "loss_rtd": 0.3161376118659973, - "loss_sent": 0.6010262370109558, - "loss_sod": 0.08366889506578445, - "loss_total": 1.0008327960968018, - "step": 113799 - }, - { - "epoch": 0.0016, - "grad_norm": 2.048090934753418, - "learning_rate": 6.215797411877862e-05, - "loss": 0.5543, - "step": 113800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.704843521118164, - "loss_rtd": 0.2941146194934845, - "loss_sent": 0.04738251864910126, - "loss_sod": 0.020208284258842468, - "loss_total": 0.3617054224014282, - "step": 113899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.110398292541504, - "loss_rtd": 0.3091120421886444, - "loss_sent": 0.4693821668624878, - "loss_sod": 0.10883425176143646, - "loss_total": 0.8873284459114075, - "step": 113899 - }, - { - "epoch": 0.0018, - "grad_norm": 2.1799964904785156, - "learning_rate": 6.212719084490326e-05, - "loss": 0.5514, - "step": 113900 - }, - { - "epoch": 0.001998, - "loss_gen": 4.731112003326416, - "loss_rtd": 0.2894507348537445, - "loss_sent": 0.19872452318668365, - "loss_sod": 0.04754795879125595, - "loss_total": 0.5357232093811035, - "step": 113999 - }, - { - "epoch": 0.001998, - "loss_gen": 4.918564796447754, - "loss_rtd": 0.32017087936401367, - "loss_sent": 0.4357863962650299, - "loss_sod": 0.017332345247268677, - "loss_total": 0.7732896208763123, - "step": 113999 - }, - { - "epoch": 0.002, - "grad_norm": 1.3521355390548706, - "learning_rate": 6.20964026861969e-05, - "loss": 0.5652, - "step": 114000 - }, - { - "epoch": 0.002, - "eval_loss": 0.5385438799858093, - "eval_runtime": 153.5069, - "eval_samples_per_second": 100.601, - "eval_steps_per_second": 0.788, - "step": 114000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.050449848175049, - "loss_rtd": 0.3129706084728241, - "loss_sent": 0.256864070892334, - "loss_sod": 0.022321533411741257, - "loss_total": 0.592156171798706, - "step": 114099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.117237567901611, - "loss_rtd": 0.31384947896003723, - "loss_sent": 0.3733851909637451, - "loss_sod": 0.1366913765668869, - "loss_total": 0.8239260315895081, - "step": 114099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.153089165687561, - "learning_rate": 6.206560965506097e-05, - "loss": 0.55, - "step": 114100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.886935710906982, - "loss_rtd": 0.30306994915008545, - "loss_sent": 0.1534023880958557, - "loss_sod": 0.05112428963184357, - "loss_total": 0.5075966119766235, - "step": 114199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.999471664428711, - "loss_rtd": 0.309175044298172, - "loss_sent": 0.09727267175912857, - "loss_sod": 0.06468882411718369, - "loss_total": 0.47113654017448425, - "step": 114199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.8409729599952698, - "learning_rate": 6.203481176389892e-05, - "loss": 0.5552, - "step": 114200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.151288032531738, - "loss_rtd": 0.31213319301605225, - "loss_sent": 0.0629645511507988, - "loss_sod": 0.10300157964229584, - "loss_total": 0.47809934616088867, - "step": 114299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.788854122161865, - "loss_rtd": 0.28913843631744385, - "loss_sent": 0.13088183104991913, - "loss_sod": 0.0693574994802475, - "loss_total": 0.48937779664993286, - "step": 114299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.7904286980628967, - "learning_rate": 6.200400902511612e-05, - "loss": 0.5548, - "step": 114300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.06631326675415, - "loss_rtd": 0.2967974841594696, - "loss_sent": 0.24668163061141968, - "loss_sod": 0.03065936453640461, - "loss_total": 0.5741385221481323, - "step": 114399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.099505424499512, - "loss_rtd": 0.3112536072731018, - "loss_sent": 0.358215868473053, - "loss_sod": 0.03907562047243118, - "loss_total": 0.7085450887680054, - "step": 114399 - }, - { - "epoch": 0.0028, - "grad_norm": 2.1623480319976807, - "learning_rate": 6.197320145111993e-05, - "loss": 0.5565, - "step": 114400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.291693687438965, - "loss_rtd": 0.303223580121994, - "loss_sent": 0.11208082735538483, - "loss_sod": 0.21081554889678955, - "loss_total": 0.6261199712753296, - "step": 114499 - }, - { - "epoch": 0.002998, - "loss_gen": 4.941003322601318, - "loss_rtd": 0.3099227249622345, - "loss_sent": 0.30689841508865356, - "loss_sod": 0.010753561742603779, - "loss_total": 0.6275746822357178, - "step": 114499 - }, - { - "epoch": 0.003, - "grad_norm": 1.994731068611145, - "learning_rate": 6.194238905431963e-05, - "loss": 0.5536, - "step": 114500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.026473522186279, - "loss_rtd": 0.29253116250038147, - "loss_sent": 0.03600431978702545, - "loss_sod": 0.1048341616988182, - "loss_total": 0.43336963653564453, - "step": 114599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.14656925201416, - "loss_rtd": 0.30833864212036133, - "loss_sent": 0.07989537715911865, - "loss_sod": 0.06955140084028244, - "loss_total": 0.457785427570343, - "step": 114599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9737004637718201, - "learning_rate": 6.191157184712644e-05, - "loss": 0.5342, - "step": 114600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.6400556564331055, - "loss_rtd": 0.323696106672287, - "loss_sent": 0.1208534985780716, - "loss_sod": 0.024927394464612007, - "loss_total": 0.46947699785232544, - "step": 114699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.89691162109375, - "loss_rtd": 0.315727174282074, - "loss_sent": 0.027605533599853516, - "loss_sod": 0.2120855748653412, - "loss_total": 0.5554182529449463, - "step": 114699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.1237143278121948, - "learning_rate": 6.188074984195353e-05, - "loss": 0.551, - "step": 114700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.290353775024414, - "loss_rtd": 0.2860538959503174, - "loss_sent": 4.6586945245508105e-05, - "loss_sod": 0.08435927331447601, - "loss_total": 0.37045976519584656, - "step": 114799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.766724109649658, - "loss_rtd": 0.2988499104976654, - "loss_sent": 0.10280866175889969, - "loss_sod": 0.07156020402908325, - "loss_total": 0.47321876883506775, - "step": 114799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.7292584776878357, - "learning_rate": 6.184992305121601e-05, - "loss": 0.5591, - "step": 114800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.8388991355896, - "loss_rtd": 0.3185732066631317, - "loss_sent": 0.14404888451099396, - "loss_sod": 0.10201037675142288, - "loss_total": 0.5646324753761292, - "step": 114899 - }, - { - "epoch": 0.003798, - "loss_gen": 3.8639140129089355, - "loss_rtd": 0.30034103989601135, - "loss_sent": 0.0003229479189030826, - "loss_sod": 0.1416824460029602, - "loss_total": 0.4423464238643646, - "step": 114899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.0658996105194092, - "learning_rate": 6.181909148733092e-05, - "loss": 0.5491, - "step": 114900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.2712602615356445, - "loss_rtd": 0.28642332553863525, - "loss_sent": 0.15423594415187836, - "loss_sod": 0.10140690207481384, - "loss_total": 0.5420661568641663, - "step": 114999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.609872817993164, - "loss_rtd": 0.31023040413856506, - "loss_sent": 0.05131329968571663, - "loss_sod": 0.0744519978761673, - "loss_total": 0.4359957277774811, - "step": 114999 - }, - { - "epoch": 0.004, - "grad_norm": 1.00832200050354, - "learning_rate": 6.178825516271715e-05, - "loss": 0.5413, - "step": 115000 - }, - { - "epoch": 0.004, - "eval_loss": 0.5329810380935669, - "eval_runtime": 150.7093, - "eval_samples_per_second": 102.469, - "eval_steps_per_second": 0.803, - "step": 115000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.131319046020508, - "loss_rtd": 0.3135809600353241, - "loss_sent": 0.2427971512079239, - "loss_sod": 0.034553416073322296, - "loss_total": 0.5909315347671509, - "step": 115099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.176270961761475, - "loss_rtd": 0.31237703561782837, - "loss_sent": 0.2483011931180954, - "loss_sod": 0.07424858212471008, - "loss_total": 0.6349267959594727, - "step": 115099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.304195523262024, - "learning_rate": 6.175741408979565e-05, - "loss": 0.5562, - "step": 115100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.37276554107666, - "loss_rtd": 0.30123811960220337, - "loss_sent": 0.32682904601097107, - "loss_sod": 0.0769878551363945, - "loss_total": 0.7050549983978271, - "step": 115199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.281090259552002, - "loss_rtd": 0.30877524614334106, - "loss_sent": 0.1706777960062027, - "loss_sod": 0.07811566442251205, - "loss_total": 0.5575687289237976, - "step": 115199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.9951972961425781, - "learning_rate": 6.172656828098915e-05, - "loss": 0.5743, - "step": 115200 - }, - { - "epoch": 0.004598, - "loss_gen": 4.0622968673706055, - "loss_rtd": 0.28530532121658325, - "loss_sent": 0.14061227440834045, - "loss_sod": 0.012159768491983414, - "loss_total": 0.4380773603916168, - "step": 115299 - }, - { - "epoch": 0.004598, - "loss_gen": 4.939615249633789, - "loss_rtd": 0.3039166033267975, - "loss_sent": 0.3648511469364166, - "loss_sod": 0.11913774907588959, - "loss_total": 0.7879054546356201, - "step": 115299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.1858198642730713, - "learning_rate": 6.169571774872234e-05, - "loss": 0.5552, - "step": 115300 - }, - { - "epoch": 0.004798, - "loss_gen": 4.055812358856201, - "loss_rtd": 0.25892534852027893, - "loss_sent": 0.028526559472084045, - "loss_sod": 0.11896795779466629, - "loss_total": 0.40641987323760986, - "step": 115399 - }, - { - "epoch": 0.004798, - "loss_gen": 4.453707695007324, - "loss_rtd": 0.3138894736766815, - "loss_sent": 4.394640927785076e-05, - "loss_sod": 0.17471273243427277, - "loss_total": 0.48864617943763733, - "step": 115399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.8378913402557373, - "learning_rate": 6.166486250542182e-05, - "loss": 0.5437, - "step": 115400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.018705368041992, - "loss_rtd": 0.29178404808044434, - "loss_sent": 0.5226912498474121, - "loss_sod": 0.04983864724636078, - "loss_total": 0.8643139600753784, - "step": 115499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.1382646560668945, - "loss_rtd": 0.32372385263442993, - "loss_sent": 0.07393720000982285, - "loss_sod": 0.0608237162232399, - "loss_total": 0.4584847688674927, - "step": 115499 - }, - { - "epoch": 0.005, - "grad_norm": 1.0321120023727417, - "learning_rate": 6.163400256351608e-05, - "loss": 0.5564, - "step": 115500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.459597110748291, - "loss_rtd": 0.3049749732017517, - "loss_sent": 0.3579116463661194, - "loss_sod": 0.07099738717079163, - "loss_total": 0.7338839769363403, - "step": 115599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.500686168670654, - "loss_rtd": 0.3075568377971649, - "loss_sent": 0.09604629874229431, - "loss_sod": 0.03514803946018219, - "loss_total": 0.4387511909008026, - "step": 115599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.8851786851882935, - "learning_rate": 6.16031379354355e-05, - "loss": 0.5562, - "step": 115600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.096866607666016, - "loss_rtd": 0.28537148237228394, - "loss_sent": 3.223814201191999e-05, - "loss_sod": 0.10718190670013428, - "loss_total": 0.3925856351852417, - "step": 115699 - }, - { - "epoch": 0.005398, - "loss_gen": 4.263040065765381, - "loss_rtd": 0.2905530631542206, - "loss_sent": 0.053427621722221375, - "loss_sod": 0.03622889891266823, - "loss_total": 0.3802095651626587, - "step": 115699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.5757880210876465, - "learning_rate": 6.157226863361236e-05, - "loss": 0.5483, - "step": 115700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.071784973144531, - "loss_rtd": 0.3027852773666382, - "loss_sent": 0.26040104031562805, - "loss_sod": 0.012592491693794727, - "loss_total": 0.5757788419723511, - "step": 115799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.007861614227295, - "loss_rtd": 0.31663382053375244, - "loss_sent": 0.2857656478881836, - "loss_sod": 0.029870562255382538, - "loss_total": 0.6322700381278992, - "step": 115799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.4296653270721436, - "learning_rate": 6.154139467048077e-05, - "loss": 0.551, - "step": 115800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.9681010246276855, - "loss_rtd": 0.31963518261909485, - "loss_sent": 0.377085417509079, - "loss_sod": 0.0771680474281311, - "loss_total": 0.7738886475563049, - "step": 115899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.173583030700684, - "loss_rtd": 0.3045327067375183, - "loss_sent": 0.17333577573299408, - "loss_sod": 0.12360046803951263, - "loss_total": 0.6014689803123474, - "step": 115899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.1803321838378906, - "learning_rate": 6.151051605847681e-05, - "loss": 0.5691, - "step": 115900 - }, - { - "epoch": 0.005998, - "loss_gen": 4.760784149169922, - "loss_rtd": 0.30599844455718994, - "loss_sent": 0.26939308643341064, - "loss_sod": 0.12544703483581543, - "loss_total": 0.700838565826416, - "step": 115999 - }, - { - "epoch": 0.005998, - "loss_gen": 4.810731887817383, - "loss_rtd": 0.3135074973106384, - "loss_sent": 0.2628597617149353, - "loss_sod": 0.02120266482234001, - "loss_total": 0.5975699424743652, - "step": 115999 - }, - { - "epoch": 0.006, - "grad_norm": 1.2998814582824707, - "learning_rate": 6.147963281003835e-05, - "loss": 0.5536, - "step": 116000 - }, - { - "epoch": 0.006, - "eval_loss": 0.5306083559989929, - "eval_runtime": 150.8686, - "eval_samples_per_second": 102.361, - "eval_steps_per_second": 0.802, - "step": 116000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.276411533355713, - "loss_rtd": 0.30166155099868774, - "loss_sent": 4.164973506703973e-05, - "loss_sod": 0.09485423564910889, - "loss_total": 0.39655745029449463, - "step": 116099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.135272026062012, - "loss_rtd": 0.28391721844673157, - "loss_sent": 0.00020394708553794771, - "loss_sod": 0.30215784907341003, - "loss_total": 0.586279034614563, - "step": 116099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.0701336860656738, - "learning_rate": 6.144874493760517e-05, - "loss": 0.5651, - "step": 116100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.803519248962402, - "loss_rtd": 0.3124741017818451, - "loss_sent": 0.28700754046440125, - "loss_sod": 0.05058220773935318, - "loss_total": 0.6500638723373413, - "step": 116199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.236623287200928, - "loss_rtd": 0.3101903796195984, - "loss_sent": 0.1815764456987381, - "loss_sod": 0.056037433445453644, - "loss_total": 0.5478042364120483, - "step": 116199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.485912561416626, - "learning_rate": 6.141785245361891e-05, - "loss": 0.5374, - "step": 116200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.382851600646973, - "loss_rtd": 0.3070049285888672, - "loss_sent": 0.2606680691242218, - "loss_sod": 0.024468395859003067, - "loss_total": 0.5921413898468018, - "step": 116299 - }, - { - "epoch": 0.006598, - "loss_gen": 4.9794721603393555, - "loss_rtd": 0.3133496642112732, - "loss_sent": 0.3018931448459625, - "loss_sod": 0.03662215173244476, - "loss_total": 0.6518650054931641, - "step": 116299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.5523754358291626, - "learning_rate": 6.138695537052302e-05, - "loss": 0.5523, - "step": 116300 - }, - { - "epoch": 0.006798, - "loss_gen": 4.770416259765625, - "loss_rtd": 0.30799728631973267, - "loss_sent": 0.11200794577598572, - "loss_sod": 0.12188903987407684, - "loss_total": 0.5418943166732788, - "step": 116399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.167383670806885, - "loss_rtd": 0.2794514000415802, - "loss_sent": 0.10882844775915146, - "loss_sod": 0.00805368460714817, - "loss_total": 0.3963335454463959, - "step": 116399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.9968624711036682, - "learning_rate": 6.135605370076288e-05, - "loss": 0.5512, - "step": 116400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.758355140686035, - "loss_rtd": 0.3198693096637726, - "loss_sent": 0.002205683384090662, - "loss_sod": 0.14805887639522552, - "loss_total": 0.47013384103775024, - "step": 116499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.639089584350586, - "loss_rtd": 0.305816650390625, - "loss_sent": 3.7118101317901164e-05, - "loss_sod": 0.2026383876800537, - "loss_total": 0.5084921717643738, - "step": 116499 - }, - { - "epoch": 0.007, - "grad_norm": 1.0024669170379639, - "learning_rate": 6.132514745678567e-05, - "loss": 0.5633, - "step": 116500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.15023946762085, - "loss_rtd": 0.316466361284256, - "loss_sent": 0.1617228239774704, - "loss_sod": 0.10378433018922806, - "loss_total": 0.5819734930992126, - "step": 116599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.660497188568115, - "loss_rtd": 0.3126148283481598, - "loss_sent": 0.25780534744262695, - "loss_sod": 0.02918866090476513, - "loss_total": 0.599608838558197, - "step": 116599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.9753177762031555, - "learning_rate": 6.129423665104042e-05, - "loss": 0.5681, - "step": 116600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.929150104522705, - "loss_rtd": 0.29756641387939453, - "loss_sent": 0.16652946174144745, - "loss_sod": 0.0524655245244503, - "loss_total": 0.5165613889694214, - "step": 116699 - }, - { - "epoch": 0.007398, - "loss_gen": 4.864585876464844, - "loss_rtd": 0.3132537007331848, - "loss_sent": 0.2488655000925064, - "loss_sod": 0.00397275248542428, - "loss_total": 0.5660919547080994, - "step": 116699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.6388241052627563, - "learning_rate": 6.1263321295978e-05, - "loss": 0.5628, - "step": 116700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.148667812347412, - "loss_rtd": 0.29710373282432556, - "loss_sent": 0.16809654235839844, - "loss_sod": 0.032282207161188126, - "loss_total": 0.4974824786186218, - "step": 116799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.510430812835693, - "loss_rtd": 0.31945863366127014, - "loss_sent": 0.15705585479736328, - "loss_sod": 0.08870366215705872, - "loss_total": 0.5652181506156921, - "step": 116799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.2833685874938965, - "learning_rate": 6.123240140405111e-05, - "loss": 0.5505, - "step": 116800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.093433856964111, - "loss_rtd": 0.29983484745025635, - "loss_sent": 0.23154324293136597, - "loss_sod": 0.03990485891699791, - "loss_total": 0.5712829828262329, - "step": 116899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.802632808685303, - "loss_rtd": 0.3150176703929901, - "loss_sent": 0.2557350993156433, - "loss_sod": 0.1667909026145935, - "loss_total": 0.7375437021255493, - "step": 116899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.092612624168396, - "learning_rate": 6.120147698771426e-05, - "loss": 0.5582, - "step": 116900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.013883590698242, - "loss_rtd": 0.31911247968673706, - "loss_sent": 0.2507493495941162, - "loss_sod": 0.09466500580310822, - "loss_total": 0.6645268201828003, - "step": 116999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.192552089691162, - "loss_rtd": 0.3006800413131714, - "loss_sent": 0.29077842831611633, - "loss_sod": 0.13088540732860565, - "loss_total": 0.722343921661377, - "step": 116999 - }, - { - "epoch": 0.008, - "grad_norm": 0.9831264615058899, - "learning_rate": 6.117054805942384e-05, - "loss": 0.5622, - "step": 117000 - }, - { - "epoch": 0.008, - "eval_loss": 0.5328510403633118, - "eval_runtime": 151.0378, - "eval_samples_per_second": 102.246, - "eval_steps_per_second": 0.801, - "step": 117000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.035093307495117, - "loss_rtd": 0.28687572479248047, - "loss_sent": 8.938983228290454e-05, - "loss_sod": 0.13432514667510986, - "loss_total": 0.42129024863243103, - "step": 117099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.797860622406006, - "loss_rtd": 0.3159748613834381, - "loss_sent": 0.13346822559833527, - "loss_sod": 0.015710938721895218, - "loss_total": 0.4651540517807007, - "step": 117099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.6174519062042236, - "learning_rate": 6.113961463163797e-05, - "loss": 0.5447, - "step": 117100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.414780616760254, - "loss_rtd": 0.29347509145736694, - "loss_sent": 0.06772622466087341, - "loss_sod": 0.05473160743713379, - "loss_total": 0.41593292355537415, - "step": 117199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.32811975479126, - "loss_rtd": 0.30691400170326233, - "loss_sent": 0.015786901116371155, - "loss_sod": 0.2614777088165283, - "loss_total": 0.5841785669326782, - "step": 117199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.7983853816986084, - "learning_rate": 6.11086767168167e-05, - "loss": 0.5543, - "step": 117200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.570329189300537, - "loss_rtd": 0.2815568149089813, - "loss_sent": 0.0009163393406197429, - "loss_sod": 0.12550219893455505, - "loss_total": 0.40797534584999084, - "step": 117299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.595879077911377, - "loss_rtd": 0.28276875615119934, - "loss_sent": 0.0515110082924366, - "loss_sod": 0.09898176789283752, - "loss_total": 0.43326154351234436, - "step": 117299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.7184866666793823, - "learning_rate": 6.107773432742174e-05, - "loss": 0.5741, - "step": 117300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.7618632316589355, - "loss_rtd": 0.3073684573173523, - "loss_sent": 0.055196598172187805, - "loss_sod": 0.004483198281377554, - "loss_total": 0.3670482635498047, - "step": 117399 - }, - { - "epoch": 0.008798, - "loss_gen": 4.997790336608887, - "loss_rtd": 0.30896130204200745, - "loss_sent": 0.04724084958434105, - "loss_sod": 0.09285259991884232, - "loss_total": 0.4490547478199005, - "step": 117399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.7552955150604248, - "learning_rate": 6.104678747591673e-05, - "loss": 0.5318, - "step": 117400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.978947639465332, - "loss_rtd": 0.2978529930114746, - "loss_sent": 0.21421316266059875, - "loss_sod": 0.043476030230522156, - "loss_total": 0.5555422306060791, - "step": 117499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.688227653503418, - "loss_rtd": 0.3126789629459381, - "loss_sent": 0.09292519092559814, - "loss_sod": 0.011855825781822205, - "loss_total": 0.41745996475219727, - "step": 117499 - }, - { - "epoch": 0.009, - "grad_norm": 1.7240303754806519, - "learning_rate": 6.101583617476705e-05, - "loss": 0.5632, - "step": 117500 - }, - { - "epoch": 0.009198, - "loss_gen": 4.799741744995117, - "loss_rtd": 0.3096998929977417, - "loss_sent": 0.0798083245754242, - "loss_sod": 0.018908588215708733, - "loss_total": 0.4084168076515198, - "step": 117599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.974015712738037, - "loss_rtd": 0.28615111112594604, - "loss_sent": 0.21938830614089966, - "loss_sod": 0.09349527209997177, - "loss_total": 0.5990346670150757, - "step": 117599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.9687650203704834, - "learning_rate": 6.0984880436439884e-05, - "loss": 0.5647, - "step": 117600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.020453929901123, - "loss_rtd": 0.28277814388275146, - "loss_sent": 0.3842502236366272, - "loss_sod": 0.03025163896381855, - "loss_total": 0.6972800493240356, - "step": 117699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.045213222503662, - "loss_rtd": 0.3140573501586914, - "loss_sent": 0.1761348396539688, - "loss_sod": 0.02005627565085888, - "loss_total": 0.5102484822273254, - "step": 117699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.7610554695129395, - "learning_rate": 6.0953920273404184e-05, - "loss": 0.5464, - "step": 117700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.240246295928955, - "loss_rtd": 0.2947600483894348, - "loss_sent": 0.12171149998903275, - "loss_sod": 0.10148908197879791, - "loss_total": 0.5179606080055237, - "step": 117799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.303003311157227, - "loss_rtd": 0.3095209002494812, - "loss_sent": 0.24944280087947845, - "loss_sod": 0.09355980157852173, - "loss_total": 0.6525235176086426, - "step": 117799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.479610800743103, - "learning_rate": 6.0922955698130704e-05, - "loss": 0.5579, - "step": 117800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.901512622833252, - "loss_rtd": 0.3135903775691986, - "loss_sent": 0.08606160432100296, - "loss_sod": 0.013673401437699795, - "loss_total": 0.41332536935806274, - "step": 117899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.495378017425537, - "loss_rtd": 0.3010045289993286, - "loss_sent": 0.1780148297548294, - "loss_sod": 0.0486171655356884, - "loss_total": 0.5276365280151367, - "step": 117899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.8681696057319641, - "learning_rate": 6.089198672309198e-05, - "loss": 0.5457, - "step": 117900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.7274274826049805, - "loss_rtd": 0.31041961908340454, - "loss_sent": 4.1027440602192655e-05, - "loss_sod": 0.20404189825057983, - "loss_total": 0.5145025253295898, - "step": 117999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.6410722732543945, - "loss_rtd": 0.3025372624397278, - "loss_sent": 4.420238110469654e-05, - "loss_sod": 0.21246737241744995, - "loss_total": 0.5150488615036011, - "step": 117999 - }, - { - "epoch": 0.01, - "grad_norm": 1.265177607536316, - "learning_rate": 6.0861013360762284e-05, - "loss": 0.5567, - "step": 118000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5303297638893127, - "eval_runtime": 150.6817, - "eval_samples_per_second": 102.488, - "eval_steps_per_second": 0.803, - "step": 118000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.342700004577637, - "loss_rtd": 0.3074573278427124, - "loss_sent": 0.12914182245731354, - "loss_sod": 0.10512572526931763, - "loss_total": 0.5417248606681824, - "step": 118099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.054635047912598, - "loss_rtd": 0.3276335597038269, - "loss_sent": 0.17085400223731995, - "loss_sod": 0.02813855931162834, - "loss_total": 0.5266261100769043, - "step": 118099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.3616130352020264, - "learning_rate": 6.083003562361774e-05, - "loss": 0.5641, - "step": 118100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.146373748779297, - "loss_rtd": 0.2659860849380493, - "loss_sent": 0.05771252140402794, - "loss_sod": 0.06970370560884476, - "loss_total": 0.3934023082256317, - "step": 118199 - }, - { - "epoch": 0.010398, - "loss_gen": 4.889800548553467, - "loss_rtd": 0.3198871612548828, - "loss_sent": 0.11661746352910995, - "loss_sod": 0.03849222511053085, - "loss_total": 0.4749968647956848, - "step": 118199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.7561426162719727, - "learning_rate": 6.079905352413613e-05, - "loss": 0.5516, - "step": 118200 - }, - { - "epoch": 0.010598, - "loss_gen": 4.492007255554199, - "loss_rtd": 0.3046039044857025, - "loss_sent": 0.1819341778755188, - "loss_sod": 0.024301957339048386, - "loss_total": 0.5108400583267212, - "step": 118299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.142167568206787, - "loss_rtd": 0.3205025792121887, - "loss_sent": 0.20369991660118103, - "loss_sod": 0.07976368814706802, - "loss_total": 0.6039661765098572, - "step": 118299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.8787733912467957, - "learning_rate": 6.076806707479704e-05, - "loss": 0.5444, - "step": 118300 - }, - { - "epoch": 0.010798, - "loss_gen": 4.338025093078613, - "loss_rtd": 0.274977445602417, - "loss_sent": 0.043792724609375, - "loss_sod": 0.04635123163461685, - "loss_total": 0.36512139439582825, - "step": 118399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.427778244018555, - "loss_rtd": 0.31101086735725403, - "loss_sent": 0.04641634225845337, - "loss_sod": 0.07147668302059174, - "loss_total": 0.42890387773513794, - "step": 118399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.7714661955833435, - "learning_rate": 6.073707628808184e-05, - "loss": 0.5514, - "step": 118400 - }, - { - "epoch": 0.010998, - "loss_gen": 4.238531589508057, - "loss_rtd": 0.2738182544708252, - "loss_sent": 0.02748727984726429, - "loss_sod": 0.1467839777469635, - "loss_total": 0.44808951020240784, - "step": 118499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.748021602630615, - "loss_rtd": 0.314196914434433, - "loss_sent": 0.281240314245224, - "loss_sod": 0.014339671470224857, - "loss_total": 0.6097769141197205, - "step": 118499 - }, - { - "epoch": 0.011, - "grad_norm": 0.9258102178573608, - "learning_rate": 6.070608117647358e-05, - "loss": 0.5483, - "step": 118500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.137299060821533, - "loss_rtd": 0.28736162185668945, - "loss_sent": 3.937733708880842e-05, - "loss_sod": 0.2685033679008484, - "loss_total": 0.5559043884277344, - "step": 118599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.600038528442383, - "loss_rtd": 0.27350613474845886, - "loss_sent": 0.0018881767755374312, - "loss_sod": 0.13644471764564514, - "loss_total": 0.4118390381336212, - "step": 118599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9446884989738464, - "learning_rate": 6.067508175245711e-05, - "loss": 0.553, - "step": 118600 - }, - { - "epoch": 0.011398, - "loss_gen": 4.890167236328125, - "loss_rtd": 0.30400070548057556, - "loss_sent": 0.30468428134918213, - "loss_sod": 0.05940008908510208, - "loss_total": 0.6680850982666016, - "step": 118699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.859102249145508, - "loss_rtd": 0.29612302780151367, - "loss_sent": 0.07546553015708923, - "loss_sod": 0.07025572657585144, - "loss_total": 0.44184428453445435, - "step": 118699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.1226309537887573, - "learning_rate": 6.064407802851898e-05, - "loss": 0.5545, - "step": 118700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.093757629394531, - "loss_rtd": 0.3137657940387726, - "loss_sent": 0.14663511514663696, - "loss_sod": 0.18289406597614288, - "loss_total": 0.6432949900627136, - "step": 118799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.48219108581543, - "loss_rtd": 0.2992091774940491, - "loss_sent": 0.11522194743156433, - "loss_sod": 0.058543283492326736, - "loss_total": 0.47297441959381104, - "step": 118799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.0091612339019775, - "learning_rate": 6.0613070017147486e-05, - "loss": 0.539, - "step": 118800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.052651405334473, - "loss_rtd": 0.2983821630477905, - "loss_sent": 0.9284707903862, - "loss_sod": 0.05928806588053703, - "loss_total": 1.286141037940979, - "step": 118899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.213494777679443, - "loss_rtd": 0.3135639429092407, - "loss_sent": 0.08417072147130966, - "loss_sod": 0.0376645028591156, - "loss_total": 0.4353991746902466, - "step": 118899 - }, - { - "epoch": 0.0118, - "grad_norm": 4.361950874328613, - "learning_rate": 6.058205773083268e-05, - "loss": 0.5491, - "step": 118900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.080746650695801, - "loss_rtd": 0.2965928912162781, - "loss_sent": 0.1390780657529831, - "loss_sod": 0.03249840438365936, - "loss_total": 0.4681693911552429, - "step": 118999 - }, - { - "epoch": 0.011998, - "loss_gen": 4.327706336975098, - "loss_rtd": 0.29067936539649963, - "loss_sent": 0.0004974246839992702, - "loss_sod": 0.20749783515930176, - "loss_total": 0.4986746311187744, - "step": 118999 - }, - { - "epoch": 0.012, - "grad_norm": 0.8957967162132263, - "learning_rate": 6.055104118206627e-05, - "loss": 0.5375, - "step": 119000 - }, - { - "epoch": 0.012, - "eval_loss": 0.5409232974052429, - "eval_runtime": 151.0629, - "eval_samples_per_second": 102.229, - "eval_steps_per_second": 0.801, - "step": 119000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.732698917388916, - "loss_rtd": 0.3089301884174347, - "loss_sent": 0.38602128624916077, - "loss_sod": 0.003477014135569334, - "loss_total": 0.6984285116195679, - "step": 119099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.9668049812316895, - "loss_rtd": 0.2914218604564667, - "loss_sent": 0.06754955649375916, - "loss_sod": 0.010460056364536285, - "loss_total": 0.3694314658641815, - "step": 119099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.34317147731781, - "learning_rate": 6.052002038334173e-05, - "loss": 0.5629, - "step": 119100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.735567569732666, - "loss_rtd": 0.2999626100063324, - "loss_sent": 0.16539442539215088, - "loss_sod": 0.09441490471363068, - "loss_total": 0.5597719550132751, - "step": 119199 - }, - { - "epoch": 0.012398, - "loss_gen": 4.99312162399292, - "loss_rtd": 0.31069958209991455, - "loss_sent": 0.1555461436510086, - "loss_sod": 0.0029545840807259083, - "loss_total": 0.4692003130912781, - "step": 119199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.335006594657898, - "learning_rate": 6.048899534715424e-05, - "loss": 0.5551, - "step": 119200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.128385066986084, - "loss_rtd": 0.3325307369232178, - "loss_sent": 0.19470533728599548, - "loss_sod": 0.03076741099357605, - "loss_total": 0.5580034852027893, - "step": 119299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.859645366668701, - "loss_rtd": 0.3197959363460541, - "loss_sent": 0.17254067957401276, - "loss_sod": 0.03450271114706993, - "loss_total": 0.5268393158912659, - "step": 119299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.2715450525283813, - "learning_rate": 6.0457966086000695e-05, - "loss": 0.5415, - "step": 119300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.846061706542969, - "loss_rtd": 0.30711206793785095, - "loss_sent": 0.3994387686252594, - "loss_sod": 0.01650303602218628, - "loss_total": 0.7230538725852966, - "step": 119399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.607169151306152, - "loss_rtd": 0.29206523299217224, - "loss_sent": 0.013358005322515965, - "loss_sod": 0.18210723996162415, - "loss_total": 0.4875304698944092, - "step": 119399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.009638786315918, - "learning_rate": 6.042693261237964e-05, - "loss": 0.5599, - "step": 119400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.192318916320801, - "loss_rtd": 0.28099825978279114, - "loss_sent": 0.015737246721982956, - "loss_sod": 0.12070950120687485, - "loss_total": 0.41744500398635864, - "step": 119499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.28455114364624, - "loss_rtd": 0.30890995264053345, - "loss_sent": 0.026161663234233856, - "loss_sod": 0.1854681819677353, - "loss_total": 0.5205398201942444, - "step": 119499 - }, - { - "epoch": 0.013, - "grad_norm": 0.7967619895935059, - "learning_rate": 6.0395894938791395e-05, - "loss": 0.5471, - "step": 119500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.179881572723389, - "loss_rtd": 0.316481351852417, - "loss_sent": 0.3046930730342865, - "loss_sod": 0.03662659600377083, - "loss_total": 0.6578010320663452, - "step": 119599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.606226444244385, - "loss_rtd": 0.30212709307670593, - "loss_sent": 0.11669275909662247, - "loss_sod": 0.09998899698257446, - "loss_total": 0.5188088417053223, - "step": 119599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.1093003749847412, - "learning_rate": 6.036485307773789e-05, - "loss": 0.5431, - "step": 119600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.087669372558594, - "loss_rtd": 0.32508882880210876, - "loss_sent": 0.12444044649600983, - "loss_sod": 0.11683076620101929, - "loss_total": 0.5663600564002991, - "step": 119699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.0431952476501465, - "loss_rtd": 0.32513174414634705, - "loss_sent": 0.13575586676597595, - "loss_sod": 0.06492830812931061, - "loss_total": 0.5258159637451172, - "step": 119699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.1140689849853516, - "learning_rate": 6.0333807041722824e-05, - "loss": 0.5475, - "step": 119700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.197576999664307, - "loss_rtd": 0.303524911403656, - "loss_sent": 0.2978334128856659, - "loss_sod": 0.07744783163070679, - "loss_total": 0.6788061857223511, - "step": 119799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.200259208679199, - "loss_rtd": 0.28390949964523315, - "loss_sent": 0.39740145206451416, - "loss_sod": 0.04918929561972618, - "loss_total": 0.7305002212524414, - "step": 119799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.7545603513717651, - "learning_rate": 6.030275684325151e-05, - "loss": 0.5512, - "step": 119800 - }, - { - "epoch": 0.013798, - "loss_gen": 4.439004898071289, - "loss_rtd": 0.32057374715805054, - "loss_sent": 0.04900732263922691, - "loss_sod": 0.0249588992446661, - "loss_total": 0.3945399820804596, - "step": 119899 - }, - { - "epoch": 0.013798, - "loss_gen": 4.865222454071045, - "loss_rtd": 0.3066619336605072, - "loss_sent": 0.053667474538087845, - "loss_sod": 0.009524598717689514, - "loss_total": 0.36985403299331665, - "step": 119899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.9675593376159668, - "learning_rate": 6.0271702494830976e-05, - "loss": 0.5408, - "step": 119900 - }, - { - "epoch": 0.013998, - "loss_gen": 4.92477560043335, - "loss_rtd": 0.30895090103149414, - "loss_sent": 0.3618309199810028, - "loss_sod": 0.05889948457479477, - "loss_total": 0.7296813130378723, - "step": 119999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.568111419677734, - "loss_rtd": 0.3014170229434967, - "loss_sent": 0.012722902931272984, - "loss_sod": 0.1176237165927887, - "loss_total": 0.4317636489868164, - "step": 119999 - }, - { - "epoch": 0.014, - "grad_norm": 1.7804460525512695, - "learning_rate": 6.0240644008969904e-05, - "loss": 0.5416, - "step": 120000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5281617641448975, - "eval_runtime": 151.1779, - "eval_samples_per_second": 102.151, - "eval_steps_per_second": 0.8, - "step": 120000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.095440864562988, - "loss_rtd": 0.29845625162124634, - "loss_sent": 0.09549505263566971, - "loss_sod": 0.08383683115243912, - "loss_total": 0.47778812050819397, - "step": 120099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.03488826751709, - "loss_rtd": 0.3122705817222595, - "loss_sent": 0.437841534614563, - "loss_sod": 0.024121161550283432, - "loss_total": 0.7742332816123962, - "step": 120099 - }, - { - "epoch": 0.0142, - "grad_norm": 2.029510021209717, - "learning_rate": 6.020958139817864e-05, - "loss": 0.5411, - "step": 120100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.938586711883545, - "loss_rtd": 0.2876558303833008, - "loss_sent": 0.2451150119304657, - "loss_sod": 0.09866916388273239, - "loss_total": 0.6314400434494019, - "step": 120199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.084557056427002, - "loss_rtd": 0.3087672293186188, - "loss_sent": 0.19489362835884094, - "loss_sod": 0.1311921775341034, - "loss_total": 0.6348530054092407, - "step": 120199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.1793755292892456, - "learning_rate": 6.017851467496922e-05, - "loss": 0.5461, - "step": 120200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.093992710113525, - "loss_rtd": 0.3222237825393677, - "loss_sent": 0.205959290266037, - "loss_sod": 0.021119918674230576, - "loss_total": 0.5493029952049255, - "step": 120299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.074092388153076, - "loss_rtd": 0.32208383083343506, - "loss_sent": 0.24837274849414825, - "loss_sod": 0.012265660800039768, - "loss_total": 0.5827222466468811, - "step": 120299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.5728222131729126, - "learning_rate": 6.01474438518553e-05, - "loss": 0.5437, - "step": 120300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.079484939575195, - "loss_rtd": 0.29105761647224426, - "loss_sent": 0.293789803981781, - "loss_sod": 0.01728013902902603, - "loss_total": 0.6021275520324707, - "step": 120399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.114513874053955, - "loss_rtd": 0.31496596336364746, - "loss_sent": 0.47538989782333374, - "loss_sod": 0.10584516823291779, - "loss_total": 0.8962010145187378, - "step": 120399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.357329249382019, - "learning_rate": 6.011636894135222e-05, - "loss": 0.556, - "step": 120400 - }, - { - "epoch": 0.014998, - "loss_gen": 4.6962761878967285, - "loss_rtd": 0.26938948035240173, - "loss_sent": 0.15451398491859436, - "loss_sod": 0.03728630393743515, - "loss_total": 0.46118977665901184, - "step": 120499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.1061906814575195, - "loss_rtd": 0.2843681871891022, - "loss_sent": 0.021051516756415367, - "loss_sod": 0.10623716562986374, - "loss_total": 0.4116568863391876, - "step": 120499 - }, - { - "epoch": 0.015, - "grad_norm": 0.8589921593666077, - "learning_rate": 6.008528995597692e-05, - "loss": 0.5451, - "step": 120500 - }, - { - "epoch": 0.015198, - "loss_gen": 4.76133918762207, - "loss_rtd": 0.2874845862388611, - "loss_sent": 0.037548765540122986, - "loss_sod": 0.08966411650180817, - "loss_total": 0.41469746828079224, - "step": 120599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.528561592102051, - "loss_rtd": 0.29467278718948364, - "loss_sent": 0.00033994315890595317, - "loss_sod": 0.21030665934085846, - "loss_total": 0.505319356918335, - "step": 120599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.9326037764549255, - "learning_rate": 6.0054206908248054e-05, - "loss": 0.5361, - "step": 120600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.1329851150512695, - "loss_rtd": 0.3232683837413788, - "loss_sent": 0.447965145111084, - "loss_sod": 0.04933439940214157, - "loss_total": 0.8205679655075073, - "step": 120699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.720605373382568, - "loss_rtd": 0.3047685921192169, - "loss_sent": 0.055031027644872665, - "loss_sod": 0.16122189164161682, - "loss_total": 0.5210214853286743, - "step": 120699 - }, - { - "epoch": 0.0154, - "grad_norm": 2.2183480262756348, - "learning_rate": 6.002311981068583e-05, - "loss": 0.5477, - "step": 120700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.154892921447754, - "loss_rtd": 0.3027958869934082, - "loss_sent": 0.2581217586994171, - "loss_sod": 0.026079662144184113, - "loss_total": 0.5869972705841064, - "step": 120799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.97944974899292, - "loss_rtd": 0.31093499064445496, - "loss_sent": 0.3042903244495392, - "loss_sod": 0.058462005108594894, - "loss_total": 0.6736873388290405, - "step": 120799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.1163026094436646, - "learning_rate": 5.999202867581216e-05, - "loss": 0.5503, - "step": 120800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.936832427978516, - "loss_rtd": 0.294286847114563, - "loss_sent": 0.12870286405086517, - "loss_sod": 0.09846531599760056, - "loss_total": 0.5214550495147705, - "step": 120899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.880770206451416, - "loss_rtd": 0.31502896547317505, - "loss_sent": 0.07885976135730743, - "loss_sod": 0.10253088176250458, - "loss_total": 0.49641960859298706, - "step": 120899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.0672459602355957, - "learning_rate": 5.996093351615053e-05, - "loss": 0.5368, - "step": 120900 - }, - { - "epoch": 0.015998, - "loss_gen": 4.134515762329102, - "loss_rtd": 0.2835191488265991, - "loss_sent": 0.07676522433757782, - "loss_sod": 0.0987343117594719, - "loss_total": 0.4590187072753906, - "step": 120999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.60503625869751, - "loss_rtd": 0.31180766224861145, - "loss_sent": 0.17509113252162933, - "loss_sod": 0.143839031457901, - "loss_total": 0.630737841129303, - "step": 120999 - }, - { - "epoch": 0.016, - "grad_norm": 1.1638810634613037, - "learning_rate": 5.992983434422607e-05, - "loss": 0.5428, - "step": 121000 - }, - { - "epoch": 0.016, - "eval_loss": 0.5266171097755432, - "eval_runtime": 151.1401, - "eval_samples_per_second": 102.177, - "eval_steps_per_second": 0.801, - "step": 121000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.856085777282715, - "loss_rtd": 0.3030649721622467, - "loss_sent": 0.1892850399017334, - "loss_sod": 0.06271328032016754, - "loss_total": 0.5550633072853088, - "step": 121099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.909219264984131, - "loss_rtd": 0.3179358243942261, - "loss_sent": 0.2385430932044983, - "loss_sod": 0.02142917737364769, - "loss_total": 0.5779080986976624, - "step": 121099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.5869792699813843, - "learning_rate": 5.9898731172565515e-05, - "loss": 0.5578, - "step": 121100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.8263068199157715, - "loss_rtd": 0.2914373278617859, - "loss_sent": 0.3489281237125397, - "loss_sod": 0.025136850774288177, - "loss_total": 0.6655023097991943, - "step": 121199 - }, - { - "epoch": 0.016398, - "loss_gen": 3.9975028038024902, - "loss_rtd": 0.2875139117240906, - "loss_sent": 0.0004698280245065689, - "loss_sod": 0.1230778768658638, - "loss_total": 0.4110616147518158, - "step": 121199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.3362252712249756, - "learning_rate": 5.986762401369724e-05, - "loss": 0.5514, - "step": 121200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.069579124450684, - "loss_rtd": 0.3111385703086853, - "loss_sent": 0.21980202198028564, - "loss_sod": 0.03434155881404877, - "loss_total": 0.5652821660041809, - "step": 121299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.631121635437012, - "loss_rtd": 0.2858448028564453, - "loss_sent": 0.03684644028544426, - "loss_sod": 0.08502034097909927, - "loss_total": 0.40771156549453735, - "step": 121299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.7875375747680664, - "learning_rate": 5.9836512880151185e-05, - "loss": 0.5644, - "step": 121300 - }, - { - "epoch": 0.016798, - "loss_gen": 4.904500484466553, - "loss_rtd": 0.2946925461292267, - "loss_sent": 0.14646261930465698, - "loss_sod": 0.02115407958626747, - "loss_total": 0.46230924129486084, - "step": 121399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.832746505737305, - "loss_rtd": 0.29741933941841125, - "loss_sent": 0.08594054728746414, - "loss_sod": 0.1111661046743393, - "loss_total": 0.4945259690284729, - "step": 121399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.589100182056427, - "learning_rate": 5.980539778445892e-05, - "loss": 0.5446, - "step": 121400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.133731365203857, - "loss_rtd": 0.29808109998703003, - "loss_sent": 0.23018375039100647, - "loss_sod": 0.1376340538263321, - "loss_total": 0.6658989191055298, - "step": 121499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.038772106170654, - "loss_rtd": 0.3078896105289459, - "loss_sent": 0.31025782227516174, - "loss_sod": 0.00960936862975359, - "loss_total": 0.6277568340301514, - "step": 121499 - }, - { - "epoch": 0.017, - "grad_norm": 0.9874173402786255, - "learning_rate": 5.97742787391536e-05, - "loss": 0.5534, - "step": 121500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.801186561584473, - "loss_rtd": 0.2905040383338928, - "loss_sent": 0.2787076532840729, - "loss_sod": 0.0025419124867767096, - "loss_total": 0.5717536211013794, - "step": 121599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.082403182983398, - "loss_rtd": 0.2908390164375305, - "loss_sent": 0.07650449126958847, - "loss_sod": 0.10242286324501038, - "loss_total": 0.46976637840270996, - "step": 121599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.8432844281196594, - "learning_rate": 5.974315575676998e-05, - "loss": 0.5422, - "step": 121600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.10474967956543, - "loss_rtd": 0.31578487157821655, - "loss_sent": 0.3773306608200073, - "loss_sod": 0.014497784897685051, - "loss_total": 0.7076133489608765, - "step": 121699 - }, - { - "epoch": 0.017398, - "loss_gen": 4.390714645385742, - "loss_rtd": 0.2889076769351959, - "loss_sent": 0.01650119200348854, - "loss_sod": 0.2263110876083374, - "loss_total": 0.5317199230194092, - "step": 121699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.9937688708305359, - "learning_rate": 5.971202884984438e-05, - "loss": 0.545, - "step": 121700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.184569358825684, - "loss_rtd": 0.26924213767051697, - "loss_sent": 0.12040992081165314, - "loss_sod": 0.024413447827100754, - "loss_total": 0.41406548023223877, - "step": 121799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.181044578552246, - "loss_rtd": 0.29509222507476807, - "loss_sent": 0.21284370124340057, - "loss_sod": 0.023889530450105667, - "loss_total": 0.5318254828453064, - "step": 121799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.8882488012313843, - "learning_rate": 5.968089803091471e-05, - "loss": 0.5501, - "step": 121800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.14775276184082, - "loss_rtd": 0.3031477928161621, - "loss_sent": 0.32531747221946716, - "loss_sod": 0.09638293087482452, - "loss_total": 0.7248481512069702, - "step": 121899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.95175313949585, - "loss_rtd": 0.30131402611732483, - "loss_sent": 0.3279035985469818, - "loss_sod": 0.055954333394765854, - "loss_total": 0.6851719617843628, - "step": 121899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.5275734663009644, - "learning_rate": 5.964976331252049e-05, - "loss": 0.5466, - "step": 121900 - }, - { - "epoch": 0.017998, - "loss_gen": 4.066583156585693, - "loss_rtd": 0.28303706645965576, - "loss_sent": 0.027056049555540085, - "loss_sod": 0.1286648064851761, - "loss_total": 0.43875789642333984, - "step": 121999 - }, - { - "epoch": 0.017998, - "loss_gen": 4.806674480438232, - "loss_rtd": 0.31188538670539856, - "loss_sent": 0.14575275778770447, - "loss_sod": 0.059200696647167206, - "loss_total": 0.5168388485908508, - "step": 121999 - }, - { - "epoch": 0.018, - "grad_norm": 0.8213522434234619, - "learning_rate": 5.961862470720274e-05, - "loss": 0.5543, - "step": 122000 - }, - { - "epoch": 0.018, - "eval_loss": 0.526357114315033, - "eval_runtime": 151.087, - "eval_samples_per_second": 102.213, - "eval_steps_per_second": 0.801, - "step": 122000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.63696813583374, - "loss_rtd": 0.2938276529312134, - "loss_sent": 0.15499430894851685, - "loss_sod": 0.11129481345415115, - "loss_total": 0.5601167678833008, - "step": 122099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.119730472564697, - "loss_rtd": 0.3093253970146179, - "loss_sent": 0.21570608019828796, - "loss_sod": 0.04213138669729233, - "loss_total": 0.5671628713607788, - "step": 122099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.5725865364074707, - "learning_rate": 5.9587482227504135e-05, - "loss": 0.5374, - "step": 122100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.021235466003418, - "loss_rtd": 0.30636805295944214, - "loss_sent": 0.5750591158866882, - "loss_sod": 0.22981981933116913, - "loss_total": 1.1112470626831055, - "step": 122199 - }, - { - "epoch": 0.018398, - "loss_gen": 4.6234235763549805, - "loss_rtd": 0.32066893577575684, - "loss_sent": 0.2912712097167969, - "loss_sod": 0.023670032620429993, - "loss_total": 0.6356101632118225, - "step": 122199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.8399732112884521, - "learning_rate": 5.9556335885968816e-05, - "loss": 0.5571, - "step": 122200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.109967231750488, - "loss_rtd": 0.3167197108268738, - "loss_sent": 0.34567931294441223, - "loss_sod": 0.01075891312211752, - "loss_total": 0.6731579303741455, - "step": 122299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.150479316711426, - "loss_rtd": 0.30093443393707275, - "loss_sent": 0.14319652318954468, - "loss_sod": 0.10695550590753555, - "loss_total": 0.55108642578125, - "step": 122299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.9634116888046265, - "learning_rate": 5.952518569514256e-05, - "loss": 0.5425, - "step": 122300 - }, - { - "epoch": 0.018798, - "loss_gen": 4.859994411468506, - "loss_rtd": 0.3014945387840271, - "loss_sent": 0.2702159881591797, - "loss_sod": 0.06617426872253418, - "loss_total": 0.637884795665741, - "step": 122399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.301722049713135, - "loss_rtd": 0.2949070632457733, - "loss_sent": 0.39677849411964417, - "loss_sod": 0.03144046291708946, - "loss_total": 0.7231260538101196, - "step": 122399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.1997106075286865, - "learning_rate": 5.9494031667572634e-05, - "loss": 0.5414, - "step": 122400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.732713222503662, - "loss_rtd": 0.30171406269073486, - "loss_sent": 0.21200843155384064, - "loss_sod": 0.10248461365699768, - "loss_total": 0.6162071228027344, - "step": 122499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.2896952629089355, - "loss_rtd": 0.298050194978714, - "loss_sent": 0.22778217494487762, - "loss_sod": 0.07858137786388397, - "loss_total": 0.6044137477874756, - "step": 122499 - }, - { - "epoch": 0.019, - "grad_norm": 0.8152273893356323, - "learning_rate": 5.946287381580789e-05, - "loss": 0.5435, - "step": 122500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.822017192840576, - "loss_rtd": 0.3023791015148163, - "loss_sent": 0.40086454153060913, - "loss_sod": 0.0457364022731781, - "loss_total": 0.7489800453186035, - "step": 122599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.883655071258545, - "loss_rtd": 0.2954021990299225, - "loss_sent": 0.1451638638973236, - "loss_sod": 0.06186845153570175, - "loss_total": 0.502434492111206, - "step": 122599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.943340539932251, - "learning_rate": 5.94317121523987e-05, - "loss": 0.5519, - "step": 122600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.133467197418213, - "loss_rtd": 0.3176296055316925, - "loss_sent": 0.2999730706214905, - "loss_sod": 0.03264019265770912, - "loss_total": 0.6502428650856018, - "step": 122699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.871667861938477, - "loss_rtd": 0.3009471297264099, - "loss_sent": 0.04241713508963585, - "loss_sod": 0.2269972562789917, - "loss_total": 0.5703614950180054, - "step": 122699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.9830484390258789, - "learning_rate": 5.9400546689897e-05, - "loss": 0.5354, - "step": 122700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.362318992614746, - "loss_rtd": 0.27739039063453674, - "loss_sent": 0.00620456924661994, - "loss_sod": 0.16359837353229523, - "loss_total": 0.44719335436820984, - "step": 122799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.854663372039795, - "loss_rtd": 0.30164459347724915, - "loss_sent": 0.19727568328380585, - "loss_sod": 0.014775708317756653, - "loss_total": 0.513696014881134, - "step": 122799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.8522732257843018, - "learning_rate": 5.936937744085619e-05, - "loss": 0.5399, - "step": 122800 - }, - { - "epoch": 0.019798, - "loss_gen": 4.497557640075684, - "loss_rtd": 0.2768266499042511, - "loss_sent": 0.037511665374040604, - "loss_sod": 0.043739430606365204, - "loss_total": 0.3580777645111084, - "step": 122899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.397903919219971, - "loss_rtd": 0.3055480718612671, - "loss_sent": 0.13608016073703766, - "loss_sod": 0.06525881588459015, - "loss_total": 0.5068870186805725, - "step": 122899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.7463865876197815, - "learning_rate": 5.933820441783129e-05, - "loss": 0.5494, - "step": 122900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.11893892288208, - "loss_rtd": 0.29417848587036133, - "loss_sent": 0.052359383553266525, - "loss_sod": 0.1419471800327301, - "loss_total": 0.48848503828048706, - "step": 122999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.288945198059082, - "loss_rtd": 0.28406262397766113, - "loss_sent": 0.4790675640106201, - "loss_sod": 0.06081649661064148, - "loss_total": 0.8239467144012451, - "step": 122999 - }, - { - "epoch": 0.02, - "grad_norm": 1.3207755088806152, - "learning_rate": 5.930702763337875e-05, - "loss": 0.567, - "step": 123000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5226380825042725, - "eval_runtime": 152.5005, - "eval_samples_per_second": 101.265, - "eval_steps_per_second": 0.793, - "step": 123000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.152369976043701, - "loss_rtd": 0.2915489077568054, - "loss_sent": 0.6095741987228394, - "loss_sod": 0.04649090766906738, - "loss_total": 0.9476140141487122, - "step": 123099 - }, - { - "epoch": 0.020198, - "loss_gen": 4.896316051483154, - "loss_rtd": 0.28835970163345337, - "loss_sent": 0.15937300026416779, - "loss_sod": 0.1062634140253067, - "loss_total": 0.5539960861206055, - "step": 123099 - }, - { - "epoch": 0.0202, - "grad_norm": 3.124485731124878, - "learning_rate": 5.92758471000566e-05, - "loss": 0.5493, - "step": 123100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.231166362762451, - "loss_rtd": 0.3091714084148407, - "loss_sent": 0.3914588987827301, - "loss_sod": 0.023496031761169434, - "loss_total": 0.7241263389587402, - "step": 123199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.008862018585205, - "loss_rtd": 0.31297391653060913, - "loss_sent": 0.3347489535808563, - "loss_sod": 0.07666729390621185, - "loss_total": 0.7243901491165161, - "step": 123199 - }, - { - "epoch": 0.0204, - "grad_norm": 2.304495096206665, - "learning_rate": 5.924466283042435e-05, - "loss": 0.5379, - "step": 123200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.942907333374023, - "loss_rtd": 0.30902355909347534, - "loss_sent": 0.16258026659488678, - "loss_sod": 0.09693579375743866, - "loss_total": 0.5685396194458008, - "step": 123299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.27769136428833, - "loss_rtd": 0.32299166917800903, - "loss_sent": 0.5551480650901794, - "loss_sod": 0.1346132755279541, - "loss_total": 1.0127530097961426, - "step": 123299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.7606263160705566, - "learning_rate": 5.9213474837043014e-05, - "loss": 0.5298, - "step": 123300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.086638450622559, - "loss_rtd": 0.28601691126823425, - "loss_sent": 0.2600572407245636, - "loss_sod": 0.01576514169573784, - "loss_total": 0.5618392825126648, - "step": 123399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.126517295837402, - "loss_rtd": 0.29980188608169556, - "loss_sent": 0.3001590371131897, - "loss_sod": 0.03337424248456955, - "loss_total": 0.6333351731300354, - "step": 123399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.766998529434204, - "learning_rate": 5.918228313247511e-05, - "loss": 0.5352, - "step": 123400 - }, - { - "epoch": 0.020998, - "loss_gen": 4.065799236297607, - "loss_rtd": 0.27878060936927795, - "loss_sent": 0.07709919661283493, - "loss_sod": 0.07623769342899323, - "loss_total": 0.4321175217628479, - "step": 123499 - }, - { - "epoch": 0.020998, - "loss_gen": 4.259700298309326, - "loss_rtd": 0.29394569993019104, - "loss_sent": 0.024310484528541565, - "loss_sod": 0.10083907842636108, - "loss_total": 0.4190952777862549, - "step": 123499 - }, - { - "epoch": 0.021, - "grad_norm": 1.1099777221679688, - "learning_rate": 5.915108772928468e-05, - "loss": 0.5423, - "step": 123500 - }, - { - "epoch": 0.021198, - "loss_gen": 4.254960060119629, - "loss_rtd": 0.2864917516708374, - "loss_sent": 0.0008751353598199785, - "loss_sod": 0.1623045951128006, - "loss_total": 0.4496714770793915, - "step": 123599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.17879581451416, - "loss_rtd": 0.28422412276268005, - "loss_sent": 5.6978515203809366e-05, - "loss_sod": 0.13626736402511597, - "loss_total": 0.4205484688282013, - "step": 123599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.1184039115905762, - "learning_rate": 5.911988864003718e-05, - "loss": 0.5359, - "step": 123600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.00405216217041, - "loss_rtd": 0.3066645562648773, - "loss_sent": 0.06916403770446777, - "loss_sod": 0.027641355991363525, - "loss_total": 0.4034699499607086, - "step": 123699 - }, - { - "epoch": 0.021398, - "loss_gen": 4.681728363037109, - "loss_rtd": 0.2836722135543823, - "loss_sent": 0.029413238167762756, - "loss_sod": 0.16685713827610016, - "loss_total": 0.47994256019592285, - "step": 123699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.7159540057182312, - "learning_rate": 5.9088685877299645e-05, - "loss": 0.528, - "step": 123700 - }, - { - "epoch": 0.021598, - "loss_gen": 4.758907318115234, - "loss_rtd": 0.29878365993499756, - "loss_sent": 0.06928456574678421, - "loss_sod": 0.1059439554810524, - "loss_total": 0.474012166261673, - "step": 123799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.365872383117676, - "loss_rtd": 0.28514567017555237, - "loss_sent": 0.06936918944120407, - "loss_sod": 0.1124887466430664, - "loss_total": 0.46700361371040344, - "step": 123799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.8247285485267639, - "learning_rate": 5.905747945364052e-05, - "loss": 0.5443, - "step": 123800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.873709678649902, - "loss_rtd": 0.2912997007369995, - "loss_sent": 0.14019466936588287, - "loss_sod": 0.09625925868749619, - "loss_total": 0.5277536511421204, - "step": 123899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.2385478019714355, - "loss_rtd": 0.3011288642883301, - "loss_sent": 0.38254106044769287, - "loss_sod": 0.06964413821697235, - "loss_total": 0.7533140778541565, - "step": 123899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.5148694515228271, - "learning_rate": 5.902626938162975e-05, - "loss": 0.554, - "step": 123900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.200740814208984, - "loss_rtd": 0.30683231353759766, - "loss_sent": 0.1646765172481537, - "loss_sod": 0.020718924701213837, - "loss_total": 0.4922277629375458, - "step": 123999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.879190921783447, - "loss_rtd": 0.3014052212238312, - "loss_sent": 0.12611354887485504, - "loss_sod": 0.031445227563381195, - "loss_total": 0.4589639902114868, - "step": 123999 - }, - { - "epoch": 0.022, - "grad_norm": 0.8267918825149536, - "learning_rate": 5.899505567383876e-05, - "loss": 0.5372, - "step": 124000 - }, - { - "epoch": 0.022, - "eval_loss": 0.5149691104888916, - "eval_runtime": 151.0729, - "eval_samples_per_second": 102.222, - "eval_steps_per_second": 0.801, - "step": 124000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.533763885498047, - "loss_rtd": 0.2997266352176666, - "loss_sent": 0.11060944199562073, - "loss_sod": 0.020724087953567505, - "loss_total": 0.43106016516685486, - "step": 124099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.938562393188477, - "loss_rtd": 0.3038523197174072, - "loss_sent": 0.1367499977350235, - "loss_sod": 0.049065910279750824, - "loss_total": 0.48966825008392334, - "step": 124099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.5654602646827698, - "learning_rate": 5.896383834284042e-05, - "loss": 0.5342, - "step": 124100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.137826442718506, - "loss_rtd": 0.32040926814079285, - "loss_sent": 0.47739261388778687, - "loss_sod": 0.10855264961719513, - "loss_total": 0.906354546546936, - "step": 124199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.2313408851623535, - "loss_rtd": 0.27901092171669006, - "loss_sent": 0.21637006103992462, - "loss_sod": 0.1009574681520462, - "loss_total": 0.5963384509086609, - "step": 124199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.7423334121704102, - "learning_rate": 5.893261740120907e-05, - "loss": 0.5487, - "step": 124200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.9825944900512695, - "loss_rtd": 0.31523093581199646, - "loss_sent": 0.09425953775644302, - "loss_sod": 0.10607744753360748, - "loss_total": 0.51556795835495, - "step": 124299 - }, - { - "epoch": 0.022598, - "loss_gen": 3.9386215209960938, - "loss_rtd": 0.2735031843185425, - "loss_sent": 3.841445141006261e-05, - "loss_sod": 0.1777140200138092, - "loss_total": 0.4512556195259094, - "step": 124299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.0147106647491455, - "learning_rate": 5.890139286152048e-05, - "loss": 0.5542, - "step": 124300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.006237506866455, - "loss_rtd": 0.3022223711013794, - "loss_sent": 0.166145458817482, - "loss_sod": 0.010666808113455772, - "loss_total": 0.4790346622467041, - "step": 124399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.274487495422363, - "loss_rtd": 0.2841145694255829, - "loss_sent": 0.2956092655658722, - "loss_sod": 0.056456875056028366, - "loss_total": 0.6361806988716125, - "step": 124399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.7239913940429688, - "learning_rate": 5.887016473635193e-05, - "loss": 0.5355, - "step": 124400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.045144081115723, - "loss_rtd": 0.29968875646591187, - "loss_sent": 0.17614498734474182, - "loss_sod": 0.036587730050086975, - "loss_total": 0.5124214887619019, - "step": 124499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.091627597808838, - "loss_rtd": 0.28908464312553406, - "loss_sent": 0.5740426182746887, - "loss_sod": 0.08704821765422821, - "loss_total": 0.9501754641532898, - "step": 124499 - }, - { - "epoch": 0.023, - "grad_norm": 2.1002988815307617, - "learning_rate": 5.8838933038282075e-05, - "loss": 0.5456, - "step": 124500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.296675682067871, - "loss_rtd": 0.3025449216365814, - "loss_sent": 0.459489107131958, - "loss_sod": 0.10067996382713318, - "loss_total": 0.8627139925956726, - "step": 124599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.145221710205078, - "loss_rtd": 0.3288326561450958, - "loss_sent": 0.0934688001871109, - "loss_sod": 0.03551265969872475, - "loss_total": 0.45781409740448, - "step": 124599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.4806410074234009, - "learning_rate": 5.880769777989106e-05, - "loss": 0.5551, - "step": 124600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.228641033172607, - "loss_rtd": 0.29840022325515747, - "loss_sent": 0.2383067011833191, - "loss_sod": 0.04886143282055855, - "loss_total": 0.585568368434906, - "step": 124699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.166115760803223, - "loss_rtd": 0.28897228837013245, - "loss_sent": 0.2666550874710083, - "loss_sod": 0.024279436096549034, - "loss_total": 0.5799068212509155, - "step": 124699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.4311983585357666, - "learning_rate": 5.877645897376042e-05, - "loss": 0.5439, - "step": 124700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.865180492401123, - "loss_rtd": 0.34005826711654663, - "loss_sent": 0.1311091035604477, - "loss_sod": 0.002296092454344034, - "loss_total": 0.4734634757041931, - "step": 124799 - }, - { - "epoch": 0.023598, - "loss_gen": 4.964910507202148, - "loss_rtd": 0.31438836455345154, - "loss_sent": 0.070197694003582, - "loss_sod": 0.004081283695995808, - "loss_total": 0.38866734504699707, - "step": 124799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.5093366503715515, - "learning_rate": 5.874521663247316e-05, - "loss": 0.5561, - "step": 124800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.0229339599609375, - "loss_rtd": 0.2815239131450653, - "loss_sent": 0.11104641854763031, - "loss_sod": 0.031022746115922928, - "loss_total": 0.42359310388565063, - "step": 124899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.6763691902160645, - "loss_rtd": 0.3147454857826233, - "loss_sent": 0.01773260533809662, - "loss_sod": 0.16944128274917603, - "loss_total": 0.5019193887710571, - "step": 124899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.7633762359619141, - "learning_rate": 5.871397076861368e-05, - "loss": 0.5414, - "step": 124900 - }, - { - "epoch": 0.023998, - "loss_gen": 4.248086452484131, - "loss_rtd": 0.2752586901187897, - "loss_sent": 3.502455001580529e-05, - "loss_sod": 0.15013575553894043, - "loss_total": 0.42542946338653564, - "step": 124999 - }, - { - "epoch": 0.023998, - "loss_gen": 4.334212303161621, - "loss_rtd": 0.2842738926410675, - "loss_sent": 0.00014213178656063974, - "loss_sod": 0.16111718118190765, - "loss_total": 0.4455331861972809, - "step": 124999 - }, - { - "epoch": 0.024, - "grad_norm": 0.9359869360923767, - "learning_rate": 5.868272139476783e-05, - "loss": 0.5328, - "step": 125000 - }, - { - "epoch": 0.024, - "eval_loss": 0.5189536809921265, - "eval_runtime": 151.055, - "eval_samples_per_second": 102.234, - "eval_steps_per_second": 0.801, - "step": 125000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.009149074554443, - "loss_rtd": 0.2947096824645996, - "loss_sent": 0.14182795584201813, - "loss_sod": 0.022032614797353745, - "loss_total": 0.4585702419281006, - "step": 125099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.443232536315918, - "loss_rtd": 0.2923023998737335, - "loss_sent": 0.19547949731349945, - "loss_sod": 0.09092387557029724, - "loss_total": 0.5787057876586914, - "step": 125099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.1454132795333862, - "learning_rate": 5.8651468523522826e-05, - "loss": 0.5491, - "step": 125100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.078240871429443, - "loss_rtd": 0.30490824580192566, - "loss_sent": 0.20921728014945984, - "loss_sod": 0.09317630529403687, - "loss_total": 0.6073018312454224, - "step": 125199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.094516754150391, - "loss_rtd": 0.29713723063468933, - "loss_sent": 0.2630443572998047, - "loss_sod": 0.1428437978029251, - "loss_total": 0.7030254006385803, - "step": 125199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.592337965965271, - "learning_rate": 5.862021216746735e-05, - "loss": 0.5311, - "step": 125200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.01593017578125, - "loss_rtd": 0.2979161739349365, - "loss_sent": 0.23441477119922638, - "loss_sod": 0.06355893611907959, - "loss_total": 0.5958898663520813, - "step": 125299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.215662002563477, - "loss_rtd": 0.30738565325737, - "loss_sent": 0.12065132707357407, - "loss_sod": 0.034862369298934937, - "loss_total": 0.4628993570804596, - "step": 125299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.7183341383934021, - "learning_rate": 5.858895233919143e-05, - "loss": 0.5316, - "step": 125300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.217257022857666, - "loss_rtd": 0.29606208205223083, - "loss_sent": 0.32616057991981506, - "loss_sod": 0.029719607904553413, - "loss_total": 0.651942253112793, - "step": 125399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.335766315460205, - "loss_rtd": 0.2977655529975891, - "loss_sent": 0.3512638807296753, - "loss_sod": 0.10390587151050568, - "loss_total": 0.7529352903366089, - "step": 125399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.5178121328353882, - "learning_rate": 5.855768905128654e-05, - "loss": 0.5522, - "step": 125400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.150040149688721, - "loss_rtd": 0.2950993776321411, - "loss_sent": 0.28894516825675964, - "loss_sod": 0.04238574951887131, - "loss_total": 0.6264302730560303, - "step": 125499 - }, - { - "epoch": 0.024998, - "loss_gen": 4.919865131378174, - "loss_rtd": 0.29594555497169495, - "loss_sent": 0.09065475314855576, - "loss_sod": 0.17580023407936096, - "loss_total": 0.5624005198478699, - "step": 125499 - }, - { - "epoch": 0.025, - "grad_norm": 1.1456927061080933, - "learning_rate": 5.852642231634553e-05, - "loss": 0.5289, - "step": 125500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.215357303619385, - "loss_rtd": 0.273853600025177, - "loss_sent": 3.686283525894396e-05, - "loss_sod": 0.11730808764696121, - "loss_total": 0.3911985456943512, - "step": 125599 - }, - { - "epoch": 0.025198, - "loss_gen": 4.287888526916504, - "loss_rtd": 0.29126518964767456, - "loss_sent": 0.0007547377608716488, - "loss_sod": 0.1250227689743042, - "loss_total": 0.41704270243644714, - "step": 125599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.0336772203445435, - "learning_rate": 5.849515214696262e-05, - "loss": 0.5185, - "step": 125600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.999906539916992, - "loss_rtd": 0.30497992038726807, - "loss_sent": 0.16713660955429077, - "loss_sod": 0.02549305558204651, - "loss_total": 0.49760958552360535, - "step": 125699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.279110908508301, - "loss_rtd": 0.29239341616630554, - "loss_sent": 0.18312755227088928, - "loss_sod": 0.09581852704286575, - "loss_total": 0.57133948802948, - "step": 125699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8091269135475159, - "learning_rate": 5.846387855573345e-05, - "loss": 0.5448, - "step": 125700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.951651096343994, - "loss_rtd": 0.32205092906951904, - "loss_sent": 0.4923538863658905, - "loss_sod": 0.13187460601329803, - "loss_total": 0.9462794065475464, - "step": 125799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.742814064025879, - "loss_rtd": 0.2959570288658142, - "loss_sent": 0.11529549956321716, - "loss_sod": 0.06500224024057388, - "loss_total": 0.47625476121902466, - "step": 125799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.7001880407333374, - "learning_rate": 5.8432601555254996e-05, - "loss": 0.5498, - "step": 125800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.141140937805176, - "loss_rtd": 0.3066727817058563, - "loss_sent": 0.34323009848594666, - "loss_sod": 0.07900206744670868, - "loss_total": 0.7289049625396729, - "step": 125899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.802655220031738, - "loss_rtd": 0.300791472196579, - "loss_sent": 0.23526684939861298, - "loss_sod": 0.020103124901652336, - "loss_total": 0.5561614632606506, - "step": 125899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.6712987422943115, - "learning_rate": 5.8401321158125666e-05, - "loss": 0.5428, - "step": 125900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.243439674377441, - "loss_rtd": 0.287342369556427, - "loss_sent": 0.41289782524108887, - "loss_sod": 0.03788952901959419, - "loss_total": 0.738129734992981, - "step": 125999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.358307838439941, - "loss_rtd": 0.2985134720802307, - "loss_sent": 0.12002741545438766, - "loss_sod": 0.0510287806391716, - "loss_total": 0.46956968307495117, - "step": 125999 - }, - { - "epoch": 0.026, - "grad_norm": 2.2706379890441895, - "learning_rate": 5.837003737694515e-05, - "loss": 0.5549, - "step": 126000 - }, - { - "epoch": 0.026, - "eval_loss": 0.5160689949989319, - "eval_runtime": 151.3702, - "eval_samples_per_second": 102.021, - "eval_steps_per_second": 0.799, - "step": 126000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.723132610321045, - "loss_rtd": 0.2982438802719116, - "loss_sent": 0.007835360243916512, - "loss_sod": 0.21524950861930847, - "loss_total": 0.5213288068771362, - "step": 126099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.581118106842041, - "loss_rtd": 0.27140286564826965, - "loss_sent": 0.03246323764324188, - "loss_sod": 0.11235076189041138, - "loss_total": 0.4162168502807617, - "step": 126099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.3443397283554077, - "learning_rate": 5.833875022431461e-05, - "loss": 0.5302, - "step": 126100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.162801742553711, - "loss_rtd": 0.2833329737186432, - "loss_sent": 0.2646946310997009, - "loss_sod": 0.06836559623479843, - "loss_total": 0.6163932085037231, - "step": 126199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.058148384094238, - "loss_rtd": 0.29877620935440063, - "loss_sent": 0.3333621323108673, - "loss_sod": 0.06412633508443832, - "loss_total": 0.6962646842002869, - "step": 126199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.4117292165756226, - "learning_rate": 5.830745971283645e-05, - "loss": 0.5333, - "step": 126200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.934667110443115, - "loss_rtd": 0.2820201814174652, - "loss_sent": 0.1041148379445076, - "loss_sod": 0.0714135393500328, - "loss_total": 0.4575485587120056, - "step": 126299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.061122417449951, - "loss_rtd": 0.3041793704032898, - "loss_sent": 0.14399518072605133, - "loss_sod": 0.09221570193767548, - "loss_total": 0.5403902530670166, - "step": 126299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.7418568730354309, - "learning_rate": 5.827616585511453e-05, - "loss": 0.5448, - "step": 126300 - }, - { - "epoch": 0.026798, - "loss_gen": 4.881433486938477, - "loss_rtd": 0.312472403049469, - "loss_sent": 0.19444599747657776, - "loss_sod": 0.06203896924853325, - "loss_total": 0.5689573287963867, - "step": 126399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.254079341888428, - "loss_rtd": 0.304902046918869, - "loss_sent": 0.2852088510990143, - "loss_sod": 0.04830005019903183, - "loss_total": 0.6384109258651733, - "step": 126399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.1201962232589722, - "learning_rate": 5.8244868663753985e-05, - "loss": 0.5564, - "step": 126400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.201193809509277, - "loss_rtd": 0.26028522849082947, - "loss_sent": 0.14957848191261292, - "loss_sod": 0.084686279296875, - "loss_total": 0.4945499897003174, - "step": 126499 - }, - { - "epoch": 0.026998, - "loss_gen": 4.998375415802002, - "loss_rtd": 0.31030091643333435, - "loss_sent": 0.20636168122291565, - "loss_sod": 0.017277412116527557, - "loss_total": 0.5339400172233582, - "step": 126499 - }, - { - "epoch": 0.027, - "grad_norm": 1.3054890632629395, - "learning_rate": 5.821356815136133e-05, - "loss": 0.5452, - "step": 126500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.041162490844727, - "loss_rtd": 0.3027835488319397, - "loss_sent": 0.045284755527973175, - "loss_sod": 0.04813477769494057, - "loss_total": 0.39620307087898254, - "step": 126599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.0087995529174805, - "loss_rtd": 0.3090861141681671, - "loss_sent": 0.2531428933143616, - "loss_sod": 0.013048935681581497, - "loss_total": 0.5752779245376587, - "step": 126599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.8977271914482117, - "learning_rate": 5.818226433054441e-05, - "loss": 0.5542, - "step": 126600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.059216022491455, - "loss_rtd": 0.3061828911304474, - "loss_sent": 0.07738476246595383, - "loss_sod": 0.12489735335111618, - "loss_total": 0.5084649920463562, - "step": 126699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.808566093444824, - "loss_rtd": 0.3014642298221588, - "loss_sent": 0.3923496901988983, - "loss_sod": 0.02318255417048931, - "loss_total": 0.716996431350708, - "step": 126699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.1886048316955566, - "learning_rate": 5.8150957213912406e-05, - "loss": 0.5504, - "step": 126700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.194309234619141, - "loss_rtd": 0.29297277331352234, - "loss_sent": 0.19681888818740845, - "loss_sod": 0.04647182673215866, - "loss_total": 0.5362634658813477, - "step": 126799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.107058048248291, - "loss_rtd": 0.3071816861629486, - "loss_sent": 0.1383209228515625, - "loss_sod": 0.012243765406310558, - "loss_total": 0.45774638652801514, - "step": 126799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.1656361818313599, - "learning_rate": 5.811964681407579e-05, - "loss": 0.5231, - "step": 126800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.980090141296387, - "loss_rtd": 0.3053024113178253, - "loss_sent": 0.10915306210517883, - "loss_sod": 0.015619794838130474, - "loss_total": 0.4300752580165863, - "step": 126899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.233313083648682, - "loss_rtd": 0.28620627522468567, - "loss_sent": 0.08538281917572021, - "loss_sod": 0.054982736706733704, - "loss_total": 0.4265718460083008, - "step": 126899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.9681254029273987, - "learning_rate": 5.808833314364642e-05, - "loss": 0.5424, - "step": 126900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.407595634460449, - "loss_rtd": 0.28838080167770386, - "loss_sent": 0.0010148589499294758, - "loss_sod": 0.16616910696029663, - "loss_total": 0.4555647671222687, - "step": 126999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.3773322105407715, - "loss_rtd": 0.2868803143501282, - "loss_sent": 0.03552349656820297, - "loss_sod": 0.23757776618003845, - "loss_total": 0.5599815249443054, - "step": 126999 - }, - { - "epoch": 0.028, - "grad_norm": 1.0379002094268799, - "learning_rate": 5.8057016215237415e-05, - "loss": 0.5381, - "step": 127000 - }, - { - "epoch": 0.028, - "eval_loss": 0.5150266289710999, - "eval_runtime": 151.2575, - "eval_samples_per_second": 102.097, - "eval_steps_per_second": 0.8, - "step": 127000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.079526901245117, - "loss_rtd": 0.30835166573524475, - "loss_sent": 0.09046214073896408, - "loss_sod": 0.1558559536933899, - "loss_total": 0.5546697378158569, - "step": 127099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.022377014160156, - "loss_rtd": 0.3085956871509552, - "loss_sent": 0.2875477075576782, - "loss_sod": 0.040983445942401886, - "loss_total": 0.6371268033981323, - "step": 127099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.8533868193626404, - "learning_rate": 5.8025696041463264e-05, - "loss": 0.5328, - "step": 127100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.05034875869751, - "loss_rtd": 0.29051896929740906, - "loss_sent": 0.2004084438085556, - "loss_sod": 0.1325872391462326, - "loss_total": 0.6235146522521973, - "step": 127199 - }, - { - "epoch": 0.028398, - "loss_gen": 4.871407508850098, - "loss_rtd": 0.29680755734443665, - "loss_sent": 0.23683467507362366, - "loss_sod": 0.05222240835428238, - "loss_total": 0.5858646631240845, - "step": 127199 - }, - { - "epoch": 0.0284, - "grad_norm": 0.9194096922874451, - "learning_rate": 5.799437263493968e-05, - "loss": 0.5305, - "step": 127200 - }, - { - "epoch": 0.028598, - "loss_gen": 4.758853912353516, - "loss_rtd": 0.2984887659549713, - "loss_sent": 0.19850069284439087, - "loss_sod": 0.02239525318145752, - "loss_total": 0.5193847417831421, - "step": 127299 - }, - { - "epoch": 0.028598, - "loss_gen": 4.938508033752441, - "loss_rtd": 0.2909735143184662, - "loss_sent": 0.18434683978557587, - "loss_sod": 0.05350429564714432, - "loss_total": 0.5288246273994446, - "step": 127299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.3278090953826904, - "learning_rate": 5.796304600828377e-05, - "loss": 0.5242, - "step": 127300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.2907819747924805, - "loss_rtd": 0.3024289906024933, - "loss_sent": 0.1467730700969696, - "loss_sod": 0.11514891684055328, - "loss_total": 0.564350962638855, - "step": 127399 - }, - { - "epoch": 0.028798, - "loss_gen": 4.858901500701904, - "loss_rtd": 0.2916867434978485, - "loss_sent": 0.11312361061573029, - "loss_sod": 0.053617626428604126, - "loss_total": 0.45842796564102173, - "step": 127399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.8836105465888977, - "learning_rate": 5.7931716174113874e-05, - "loss": 0.5525, - "step": 127400 - }, - { - "epoch": 0.028998, - "loss_gen": 4.528642654418945, - "loss_rtd": 0.2835925817489624, - "loss_sent": 0.04004380851984024, - "loss_sod": 0.07841572165489197, - "loss_total": 0.402052104473114, - "step": 127499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.090068340301514, - "loss_rtd": 0.28824785351753235, - "loss_sent": 0.14014855027198792, - "loss_sod": 0.051337309181690216, - "loss_total": 0.4797337055206299, - "step": 127499 - }, - { - "epoch": 0.029, - "grad_norm": 1.31979238986969, - "learning_rate": 5.790038314504966e-05, - "loss": 0.5613, - "step": 127500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.043606758117676, - "loss_rtd": 0.3019232749938965, - "loss_sent": 0.21159958839416504, - "loss_sod": 0.07312679290771484, - "loss_total": 0.5866496562957764, - "step": 127599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.199647426605225, - "loss_rtd": 0.29339706897735596, - "loss_sent": 0.09941595047712326, - "loss_sod": 0.03714088350534439, - "loss_total": 0.4299539029598236, - "step": 127599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.5811394453048706, - "learning_rate": 5.786904693371205e-05, - "loss": 0.5506, - "step": 127600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.513834476470947, - "loss_rtd": 0.282697856426239, - "loss_sent": 0.05248807370662689, - "loss_sod": 0.08066190779209137, - "loss_total": 0.4158478379249573, - "step": 127699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.730311870574951, - "loss_rtd": 0.2756289541721344, - "loss_sent": 0.013358295895159245, - "loss_sod": 0.14654584228992462, - "loss_total": 0.4355331063270569, - "step": 127699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.7642179727554321, - "learning_rate": 5.783770755272329e-05, - "loss": 0.5432, - "step": 127700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.3390398025512695, - "loss_rtd": 0.3092106878757477, - "loss_sent": 0.07087057828903198, - "loss_sod": 0.10748178511857986, - "loss_total": 0.48756304383277893, - "step": 127799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.166284084320068, - "loss_rtd": 0.2996288537979126, - "loss_sent": 0.02704087272286415, - "loss_sod": 0.12750141322612762, - "loss_total": 0.45417115092277527, - "step": 127799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.0137939453125, - "learning_rate": 5.780636501470685e-05, - "loss": 0.5488, - "step": 127800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.168057441711426, - "loss_rtd": 0.28732624650001526, - "loss_sent": 0.6037995219230652, - "loss_sod": 0.05241278186440468, - "loss_total": 0.9435385465621948, - "step": 127899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.055641174316406, - "loss_rtd": 0.290254145860672, - "loss_sent": 0.16115570068359375, - "loss_sod": 0.10485047101974487, - "loss_total": 0.556260347366333, - "step": 127899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.5859217643737793, - "learning_rate": 5.777501933228753e-05, - "loss": 0.5283, - "step": 127900 - }, - { - "epoch": 0.029998, - "loss_gen": 4.708909034729004, - "loss_rtd": 0.29839926958084106, - "loss_sent": 0.2220747321844101, - "loss_sod": 0.061270326375961304, - "loss_total": 0.5817443132400513, - "step": 127999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.2027764320373535, - "loss_rtd": 0.3080592751502991, - "loss_sent": 0.25785529613494873, - "loss_sod": 0.03954096511006355, - "loss_total": 0.6054555177688599, - "step": 127999 - }, - { - "epoch": 0.03, - "grad_norm": 1.232383131980896, - "learning_rate": 5.774367051809134e-05, - "loss": 0.5501, - "step": 128000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5161465406417847, - "eval_runtime": 150.9333, - "eval_samples_per_second": 102.317, - "eval_steps_per_second": 0.802, - "step": 128000 - }, - { - "epoch": 0.030198, - "loss_gen": 4.795260429382324, - "loss_rtd": 0.29082995653152466, - "loss_sent": 0.062463853508234024, - "loss_sod": 0.08286631852388382, - "loss_total": 0.4361601173877716, - "step": 128099 - }, - { - "epoch": 0.030198, - "loss_gen": 4.604288578033447, - "loss_rtd": 0.2832608222961426, - "loss_sent": 0.08235875517129898, - "loss_sod": 0.06919785588979721, - "loss_total": 0.43481743335723877, - "step": 128099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.6762851476669312, - "learning_rate": 5.771231858474559e-05, - "loss": 0.531, - "step": 128100 - }, - { - "epoch": 0.030398, - "loss_gen": 4.632268905639648, - "loss_rtd": 0.31469446420669556, - "loss_sent": 0.160960391163826, - "loss_sod": 0.023928089067339897, - "loss_total": 0.4995829463005066, - "step": 128199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.145657062530518, - "loss_rtd": 0.30525362491607666, - "loss_sent": 0.07963842898607254, - "loss_sod": 0.12434092164039612, - "loss_total": 0.5092329978942871, - "step": 128199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.7570841908454895, - "learning_rate": 5.768096354487885e-05, - "loss": 0.5363, - "step": 128200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.123581886291504, - "loss_rtd": 0.3003334403038025, - "loss_sent": 0.26045313477516174, - "loss_sod": 0.056969910860061646, - "loss_total": 0.6177564859390259, - "step": 128299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.559329986572266, - "loss_rtd": 0.2909456789493561, - "loss_sent": 0.38860034942626953, - "loss_sod": 0.039868880063295364, - "loss_total": 0.7194149494171143, - "step": 128299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.918216347694397, - "learning_rate": 5.76496054111209e-05, - "loss": 0.535, - "step": 128300 - }, - { - "epoch": 0.030798, - "loss_gen": 4.942437648773193, - "loss_rtd": 0.3000963628292084, - "loss_sent": 0.13890379667282104, - "loss_sod": 0.01041445042937994, - "loss_total": 0.44941461086273193, - "step": 128399 - }, - { - "epoch": 0.030798, - "loss_gen": 4.34162712097168, - "loss_rtd": 0.2815767526626587, - "loss_sent": 0.007178016472607851, - "loss_sod": 0.10765630006790161, - "loss_total": 0.39641106128692627, - "step": 128399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.6317238807678223, - "learning_rate": 5.761824419610282e-05, - "loss": 0.5502, - "step": 128400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.209155082702637, - "loss_rtd": 0.29979637265205383, - "loss_sent": 0.06951673328876495, - "loss_sod": 0.15041989088058472, - "loss_total": 0.5197330117225647, - "step": 128499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.554530143737793, - "loss_rtd": 0.307586669921875, - "loss_sent": 0.20848438143730164, - "loss_sod": 0.0821925476193428, - "loss_total": 0.5982636213302612, - "step": 128499 - }, - { - "epoch": 0.031, - "grad_norm": 1.3530056476593018, - "learning_rate": 5.758687991245687e-05, - "loss": 0.5532, - "step": 128500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.005382537841797, - "loss_rtd": 0.2981312870979309, - "loss_sent": 0.37801048159599304, - "loss_sod": 0.07218707352876663, - "loss_total": 0.7483288049697876, - "step": 128599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.880046367645264, - "loss_rtd": 0.30107080936431885, - "loss_sent": 0.4745630919933319, - "loss_sod": 0.038496892899274826, - "loss_total": 0.8141307830810547, - "step": 128599 - }, - { - "epoch": 0.0312, - "grad_norm": 4.0272417068481445, - "learning_rate": 5.7555512572816616e-05, - "loss": 0.5392, - "step": 128600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.588792324066162, - "loss_rtd": 0.290439248085022, - "loss_sent": 0.4006589353084564, - "loss_sod": 0.01840728148818016, - "loss_total": 0.7095054388046265, - "step": 128699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.9264607429504395, - "loss_rtd": 0.2721864879131317, - "loss_sent": 0.12008009105920792, - "loss_sod": 0.046869415789842606, - "loss_total": 0.43913599848747253, - "step": 128699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.8392835855484009, - "learning_rate": 5.7524142189816785e-05, - "loss": 0.5397, - "step": 128700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.434980392456055, - "loss_rtd": 0.2939222753047943, - "loss_sent": 0.24817781150341034, - "loss_sod": 0.04262280464172363, - "loss_total": 0.5847228765487671, - "step": 128799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.024068355560303, - "loss_rtd": 0.28636598587036133, - "loss_sent": 0.14124320447444916, - "loss_sod": 0.05179080367088318, - "loss_total": 0.47939997911453247, - "step": 128799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.8851576447486877, - "learning_rate": 5.7492768776093394e-05, - "loss": 0.5415, - "step": 128800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.2839884757995605, - "loss_rtd": 0.29162558913230896, - "loss_sent": 0.08399336785078049, - "loss_sod": 0.09393030405044556, - "loss_total": 0.4695492684841156, - "step": 128899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.347387790679932, - "loss_rtd": 0.3011826276779175, - "loss_sent": 0.19613268971443176, - "loss_sod": 0.04818310588598251, - "loss_total": 0.5454984307289124, - "step": 128899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.8077017068862915, - "learning_rate": 5.7461392344283626e-05, - "loss": 0.5444, - "step": 128900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.278444766998291, - "loss_rtd": 0.2844964563846588, - "loss_sent": 0.03407386317849159, - "loss_sod": 0.01254432462155819, - "loss_total": 0.33111464977264404, - "step": 128999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.117967128753662, - "loss_rtd": 0.301628053188324, - "loss_sent": 0.10132772475481033, - "loss_sod": 0.052032314240932465, - "loss_total": 0.4549880921840668, - "step": 128999 - }, - { - "epoch": 0.032, - "grad_norm": 0.5169385075569153, - "learning_rate": 5.743001290702592e-05, - "loss": 0.5357, - "step": 129000 - }, - { - "epoch": 0.032, - "eval_loss": 0.5248415470123291, - "eval_runtime": 151.1474, - "eval_samples_per_second": 102.172, - "eval_steps_per_second": 0.801, - "step": 129000 - }, - { - "epoch": 0.032198, - "loss_gen": 4.844819068908691, - "loss_rtd": 0.3168841302394867, - "loss_sent": 0.23479963839054108, - "loss_sod": 0.00928429700434208, - "loss_total": 0.5609680414199829, - "step": 129099 - }, - { - "epoch": 0.032198, - "loss_gen": 5.09084939956665, - "loss_rtd": 0.2808663547039032, - "loss_sent": 0.14822618663311005, - "loss_sod": 0.05094731226563454, - "loss_total": 0.4800398349761963, - "step": 129099 - }, - { - "epoch": 0.0322, - "grad_norm": 2.0065536499023438, - "learning_rate": 5.7398630476959894e-05, - "loss": 0.5392, - "step": 129100 - }, - { - "epoch": 0.032398, - "loss_gen": 4.217450141906738, - "loss_rtd": 0.2663937211036682, - "loss_sent": 0.0009667632402852178, - "loss_sod": 0.18625682592391968, - "loss_total": 0.45361730456352234, - "step": 129199 - }, - { - "epoch": 0.032398, - "loss_gen": 5.381683826446533, - "loss_rtd": 0.2975890636444092, - "loss_sent": 0.31217342615127563, - "loss_sod": 0.03514635190367699, - "loss_total": 0.6449088454246521, - "step": 129199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.4418388605117798, - "learning_rate": 5.7367245066726415e-05, - "loss": 0.5377, - "step": 129200 - }, - { - "epoch": 0.032598, - "loss_gen": 4.9181294441223145, - "loss_rtd": 0.30215147137641907, - "loss_sent": 0.332130491733551, - "loss_sod": 0.03966284915804863, - "loss_total": 0.6739448308944702, - "step": 129299 - }, - { - "epoch": 0.032598, - "loss_gen": 5.0707550048828125, - "loss_rtd": 0.27600985765457153, - "loss_sent": 0.07822196930646896, - "loss_sod": 0.04753671586513519, - "loss_total": 0.4017685651779175, - "step": 129299 - }, - { - "epoch": 0.0326, - "grad_norm": 0.8299680352210999, - "learning_rate": 5.733585668896748e-05, - "loss": 0.5382, - "step": 129300 - }, - { - "epoch": 0.032798, - "loss_gen": 5.268406867980957, - "loss_rtd": 0.2856754660606384, - "loss_sent": 0.419086217880249, - "loss_sod": 0.021928519010543823, - "loss_total": 0.7266901731491089, - "step": 129399 - }, - { - "epoch": 0.032798, - "loss_gen": 5.475241661071777, - "loss_rtd": 0.3006037473678589, - "loss_sent": 0.09587062150239944, - "loss_sod": 0.14535808563232422, - "loss_total": 0.541832447052002, - "step": 129399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.5298182964324951, - "learning_rate": 5.730446535632636e-05, - "loss": 0.5461, - "step": 129400 - }, - { - "epoch": 0.032998, - "loss_gen": 5.033608436584473, - "loss_rtd": 0.2969168722629547, - "loss_sent": 0.055923569947481155, - "loss_sod": 0.0731518417596817, - "loss_total": 0.42599231004714966, - "step": 129499 - }, - { - "epoch": 0.032998, - "loss_gen": 5.0876898765563965, - "loss_rtd": 0.30063095688819885, - "loss_sent": 0.2950197756290436, - "loss_sod": 0.015468468889594078, - "loss_total": 0.6111192107200623, - "step": 129499 - }, - { - "epoch": 0.033, - "grad_norm": 1.4737677574157715, - "learning_rate": 5.727307108144748e-05, - "loss": 0.5369, - "step": 129500 - }, - { - "epoch": 0.033198, - "loss_gen": 5.193936824798584, - "loss_rtd": 0.2994391620159149, - "loss_sent": 0.19632945954799652, - "loss_sod": 0.05846899002790451, - "loss_total": 0.5542376041412354, - "step": 129599 - }, - { - "epoch": 0.033198, - "loss_gen": 4.978903293609619, - "loss_rtd": 0.29046401381492615, - "loss_sent": 0.3001216948032379, - "loss_sod": 0.02204369381070137, - "loss_total": 0.6126294136047363, - "step": 129599 - }, - { - "epoch": 0.0332, - "grad_norm": 1.5114130973815918, - "learning_rate": 5.724167387697643e-05, - "loss": 0.5383, - "step": 129600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.067448616027832, - "loss_rtd": 0.29043594002723694, - "loss_sent": 0.064723439514637, - "loss_sod": 0.23341111838817596, - "loss_total": 0.5885704755783081, - "step": 129699 - }, - { - "epoch": 0.033398, - "loss_gen": 4.303596496582031, - "loss_rtd": 0.2770473062992096, - "loss_sent": 0.02179446630179882, - "loss_sod": 0.07781679183244705, - "loss_total": 0.37665855884552, - "step": 129699 - }, - { - "epoch": 0.0334, - "grad_norm": 0.9134754538536072, - "learning_rate": 5.7210273755560006e-05, - "loss": 0.5321, - "step": 129700 - }, - { - "epoch": 0.033598, - "loss_gen": 5.0098772048950195, - "loss_rtd": 0.28739839792251587, - "loss_sent": 0.03049510531127453, - "loss_sod": 0.27623870968818665, - "loss_total": 0.5941322445869446, - "step": 129799 - }, - { - "epoch": 0.033598, - "loss_gen": 4.247901439666748, - "loss_rtd": 0.26333802938461304, - "loss_sent": 4.661710045184009e-05, - "loss_sod": 0.24826735258102417, - "loss_total": 0.5116519927978516, - "step": 129799 - }, - { - "epoch": 0.0336, - "grad_norm": 1.8941473960876465, - "learning_rate": 5.717887072984617e-05, - "loss": 0.5345, - "step": 129800 - }, - { - "epoch": 0.033798, - "loss_gen": 4.365687847137451, - "loss_rtd": 0.27838221192359924, - "loss_sent": 0.027936099097132683, - "loss_sod": 0.2134028673171997, - "loss_total": 0.5197211503982544, - "step": 129899 - }, - { - "epoch": 0.033798, - "loss_gen": 5.483396530151367, - "loss_rtd": 0.2784916162490845, - "loss_sent": 0.1958129107952118, - "loss_sod": 0.06765724718570709, - "loss_total": 0.5419617891311646, - "step": 129899 - }, - { - "epoch": 0.0338, - "grad_norm": 1.7065073251724243, - "learning_rate": 5.7147464812484075e-05, - "loss": 0.5356, - "step": 129900 - }, - { - "epoch": 0.033998, - "loss_gen": 5.141090393066406, - "loss_rtd": 0.2807604968547821, - "loss_sent": 0.13576620817184448, - "loss_sod": 0.06998756527900696, - "loss_total": 0.48651427030563354, - "step": 129999 - }, - { - "epoch": 0.033998, - "loss_gen": 5.268886089324951, - "loss_rtd": 0.2966271936893463, - "loss_sent": 0.145225390791893, - "loss_sod": 0.10153379291296005, - "loss_total": 0.5433863997459412, - "step": 129999 - }, - { - "epoch": 0.034, - "grad_norm": 1.1913107633590698, - "learning_rate": 5.7116056016124e-05, - "loss": 0.5408, - "step": 130000 - }, - { - "epoch": 0.034, - "eval_loss": 0.5125383138656616, - "eval_runtime": 152.8002, - "eval_samples_per_second": 101.067, - "eval_steps_per_second": 0.792, - "step": 130000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.968357563018799, - "loss_rtd": 0.31810882687568665, - "loss_sent": 0.15254102647304535, - "loss_sod": 0.04814954102039337, - "loss_total": 0.5187994241714478, - "step": 130099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.501764297485352, - "loss_rtd": 0.30586525797843933, - "loss_sent": 0.2929389178752899, - "loss_sod": 0.06635792553424835, - "loss_total": 0.6651620864868164, - "step": 130099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.82964026927948, - "learning_rate": 5.7084644353417415e-05, - "loss": 0.5585, - "step": 130100 - }, - { - "epoch": 0.000398, - "loss_gen": 4.515683174133301, - "loss_rtd": 0.2780984342098236, - "loss_sent": 9.406738536199555e-05, - "loss_sod": 0.10286936163902283, - "loss_total": 0.381061851978302, - "step": 130199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.640872001647949, - "loss_rtd": 0.2829115688800812, - "loss_sent": 0.17435689270496368, - "loss_sod": 0.09970413148403168, - "loss_total": 0.5569725632667542, - "step": 130199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.9950740933418274, - "learning_rate": 5.705322983701692e-05, - "loss": 0.539, - "step": 130200 - }, - { - "epoch": 0.000598, - "loss_gen": 4.904420852661133, - "loss_rtd": 0.31934723258018494, - "loss_sent": 0.3927709460258484, - "loss_sod": 0.00935314130038023, - "loss_total": 0.7214713096618652, - "step": 130299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.066431045532227, - "loss_rtd": 0.30573365092277527, - "loss_sent": 0.29260292649269104, - "loss_sod": 0.09204436838626862, - "loss_total": 0.6903809309005737, - "step": 130299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.1517211198806763, - "learning_rate": 5.702181247957631e-05, - "loss": 0.5413, - "step": 130300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.618826389312744, - "loss_rtd": 0.2726590633392334, - "loss_sent": 6.71393281663768e-05, - "loss_sod": 0.2165694385766983, - "loss_total": 0.48929563164711, - "step": 130399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.029799461364746, - "loss_rtd": 0.28384435176849365, - "loss_sent": 0.13905425369739532, - "loss_sod": 0.10686061531305313, - "loss_total": 0.5297592282295227, - "step": 130399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.1434992551803589, - "learning_rate": 5.699039229375045e-05, - "loss": 0.5423, - "step": 130400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.674434185028076, - "loss_rtd": 0.2828030586242676, - "loss_sent": 0.06495529413223267, - "loss_sod": 0.22607634961605072, - "loss_total": 0.5738347172737122, - "step": 130499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.008847236633301, - "loss_rtd": 0.25200924277305603, - "loss_sent": 0.006744662765413523, - "loss_sod": 0.1287979781627655, - "loss_total": 0.387551873922348, - "step": 130499 - }, - { - "epoch": 0.001, - "grad_norm": 1.069828748703003, - "learning_rate": 5.695896929219543e-05, - "loss": 0.5274, - "step": 130500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.1654534339904785, - "loss_rtd": 0.299507737159729, - "loss_sent": 0.1574164181947708, - "loss_sod": 0.035543106496334076, - "loss_total": 0.4924672842025757, - "step": 130599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.607670783996582, - "loss_rtd": 0.27649277448654175, - "loss_sent": 0.078493133187294, - "loss_sod": 0.15773795545101166, - "loss_total": 0.5127238631248474, - "step": 130599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.8112003803253174, - "learning_rate": 5.6927543487568405e-05, - "loss": 0.5265, - "step": 130600 - }, - { - "epoch": 0.001398, - "loss_gen": 4.456787586212158, - "loss_rtd": 0.2700689733028412, - "loss_sent": 0.053266555070877075, - "loss_sod": 0.09510375559329987, - "loss_total": 0.4184392988681793, - "step": 130699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.192258358001709, - "loss_rtd": 0.2975078523159027, - "loss_sent": 0.12484312057495117, - "loss_sod": 0.02995981276035309, - "loss_total": 0.45231080055236816, - "step": 130699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.5776410102844238, - "learning_rate": 5.6896114892527694e-05, - "loss": 0.525, - "step": 130700 - }, - { - "epoch": 0.001598, - "loss_gen": 4.901659965515137, - "loss_rtd": 0.3314596712589264, - "loss_sent": 0.08643057942390442, - "loss_sod": 0.023916970938444138, - "loss_total": 0.44180721044540405, - "step": 130799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.883615493774414, - "loss_rtd": 0.2808056175708771, - "loss_sent": 0.25803086161613464, - "loss_sod": 0.09379947930574417, - "loss_total": 0.6326359510421753, - "step": 130799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.8937269449234009, - "learning_rate": 5.686468351973272e-05, - "loss": 0.5516, - "step": 130800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.328621864318848, - "loss_rtd": 0.296293169260025, - "loss_sent": 0.11390230059623718, - "loss_sod": 0.05793844163417816, - "loss_total": 0.4681338965892792, - "step": 130899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.119774341583252, - "loss_rtd": 0.2935149371623993, - "loss_sent": 0.26623860001564026, - "loss_sod": 0.06770818680524826, - "loss_total": 0.6274617314338684, - "step": 130899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.0362036228179932, - "learning_rate": 5.683324938184407e-05, - "loss": 0.536, - "step": 130900 - }, - { - "epoch": 0.001998, - "loss_gen": 4.447842597961426, - "loss_rtd": 0.2601456642150879, - "loss_sent": 0.0004762514145113528, - "loss_sod": 0.2477504014968872, - "loss_total": 0.5083723068237305, - "step": 130999 - }, - { - "epoch": 0.001998, - "loss_gen": 4.670206069946289, - "loss_rtd": 0.27382001280784607, - "loss_sent": 0.007198238279670477, - "loss_sod": 0.08487206697463989, - "loss_total": 0.3658903241157532, - "step": 130999 - }, - { - "epoch": 0.002, - "grad_norm": 0.7748968005180359, - "learning_rate": 5.680181249152337e-05, - "loss": 0.5309, - "step": 131000 - }, - { - "epoch": 0.002, - "eval_loss": 0.5239149928092957, - "eval_runtime": 155.5885, - "eval_samples_per_second": 99.255, - "eval_steps_per_second": 0.778, - "step": 131000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.072779178619385, - "loss_rtd": 0.2663709223270416, - "loss_sent": 0.04606501758098602, - "loss_sod": 0.12739084661006927, - "loss_total": 0.4398267865180969, - "step": 131099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.089369773864746, - "loss_rtd": 0.2644404172897339, - "loss_sent": 0.1319088339805603, - "loss_sod": 0.1043473482131958, - "loss_total": 0.50069659948349, - "step": 131099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.181709885597229, - "learning_rate": 5.6770372861433406e-05, - "loss": 0.5282, - "step": 131100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.151736259460449, - "loss_rtd": 0.2952464520931244, - "loss_sent": 0.1472969502210617, - "loss_sod": 0.06669507920742035, - "loss_total": 0.5092384815216064, - "step": 131199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.665799140930176, - "loss_rtd": 0.28812384605407715, - "loss_sent": 0.05638657882809639, - "loss_sod": 0.09353906661272049, - "loss_total": 0.4380494952201843, - "step": 131199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.8514031171798706, - "learning_rate": 5.6738930504238065e-05, - "loss": 0.5335, - "step": 131200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.095651149749756, - "loss_rtd": 0.3047369718551636, - "loss_sent": 0.5476189851760864, - "loss_sod": 0.02074720710515976, - "loss_total": 0.873103141784668, - "step": 131299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.383840560913086, - "loss_rtd": 0.27004507184028625, - "loss_sent": 0.15658144652843475, - "loss_sod": 0.07216206192970276, - "loss_total": 0.49878859519958496, - "step": 131299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.7374162673950195, - "learning_rate": 5.670748543260232e-05, - "loss": 0.5438, - "step": 131300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.274921417236328, - "loss_rtd": 0.30671313405036926, - "loss_sent": 0.4435414969921112, - "loss_sod": 0.07780569046735764, - "loss_total": 0.8280603289604187, - "step": 131399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.974460601806641, - "loss_rtd": 0.2908535301685333, - "loss_sent": 0.2117234170436859, - "loss_sod": 0.12159785628318787, - "loss_total": 0.6241748332977295, - "step": 131399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.4120749235153198, - "learning_rate": 5.667603765919225e-05, - "loss": 0.5412, - "step": 131400 - }, - { - "epoch": 0.002998, - "loss_gen": 4.948692321777344, - "loss_rtd": 0.28530600666999817, - "loss_sent": 0.1581439971923828, - "loss_sod": 0.05736737698316574, - "loss_total": 0.5008174180984497, - "step": 131499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.769708633422852, - "loss_rtd": 0.29132771492004395, - "loss_sent": 0.121072918176651, - "loss_sod": 0.054681532084941864, - "loss_total": 0.4670821726322174, - "step": 131499 - }, - { - "epoch": 0.003, - "grad_norm": 0.7811002135276794, - "learning_rate": 5.6644587196675014e-05, - "loss": 0.5247, - "step": 131500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.725676536560059, - "loss_rtd": 0.2849738597869873, - "loss_sent": 0.0006301194080151618, - "loss_sod": 0.23249486088752747, - "loss_total": 0.5180988311767578, - "step": 131599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.686426162719727, - "loss_rtd": 0.2830125093460083, - "loss_sent": 0.0005858843796886504, - "loss_sod": 0.2647145092487335, - "loss_total": 0.5483129024505615, - "step": 131599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.3104661703109741, - "learning_rate": 5.661313405771884e-05, - "loss": 0.5301, - "step": 131600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.908972263336182, - "loss_rtd": 0.28524264693260193, - "loss_sent": 0.030228758230805397, - "loss_sod": 0.08076919615268707, - "loss_total": 0.39624062180519104, - "step": 131699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.779695987701416, - "loss_rtd": 0.2580753266811371, - "loss_sent": 0.07049515843391418, - "loss_sod": 0.08630006015300751, - "loss_total": 0.4148705303668976, - "step": 131699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.7225167155265808, - "learning_rate": 5.658167825499306e-05, - "loss": 0.543, - "step": 131700 - }, - { - "epoch": 0.003598, - "loss_gen": 4.8812055587768555, - "loss_rtd": 0.29372212290763855, - "loss_sent": 0.11324161291122437, - "loss_sod": 0.08304459601640701, - "loss_total": 0.49000832438468933, - "step": 131799 - }, - { - "epoch": 0.003598, - "loss_gen": 4.897563934326172, - "loss_rtd": 0.30420058965682983, - "loss_sent": 0.2974887788295746, - "loss_sod": 0.01579902321100235, - "loss_total": 0.6174883842468262, - "step": 131799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.8330687284469604, - "learning_rate": 5.655021980116808e-05, - "loss": 0.5481, - "step": 131800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.180691719055176, - "loss_rtd": 0.28755512833595276, - "loss_sent": 0.2041163593530655, - "loss_sod": 0.06895408034324646, - "loss_total": 0.5606255531311035, - "step": 131899 - }, - { - "epoch": 0.003798, - "loss_gen": 4.610910892486572, - "loss_rtd": 0.28476133942604065, - "loss_sent": 0.03329460322856903, - "loss_sod": 0.18745854496955872, - "loss_total": 0.5055145025253296, - "step": 131899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.9010452032089233, - "learning_rate": 5.651875870891533e-05, - "loss": 0.54, - "step": 131900 - }, - { - "epoch": 0.003998, - "loss_gen": 4.279195308685303, - "loss_rtd": 0.26342475414276123, - "loss_sent": 0.03198295086622238, - "loss_sod": 0.0630388855934143, - "loss_total": 0.3584465980529785, - "step": 131999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.077536582946777, - "loss_rtd": 0.313030481338501, - "loss_sent": 0.18453598022460938, - "loss_sod": 0.023974746465682983, - "loss_total": 0.5215412378311157, - "step": 131999 - }, - { - "epoch": 0.004, - "grad_norm": 0.7825824618339539, - "learning_rate": 5.648729499090737e-05, - "loss": 0.5538, - "step": 132000 - }, - { - "epoch": 0.004, - "eval_loss": 0.5140495300292969, - "eval_runtime": 152.2396, - "eval_samples_per_second": 101.439, - "eval_steps_per_second": 0.795, - "step": 132000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.530811786651611, - "loss_rtd": 0.2890695631504059, - "loss_sent": 0.07682177424430847, - "loss_sod": 0.11939701437950134, - "loss_total": 0.4852883517742157, - "step": 132099 - }, - { - "epoch": 0.004198, - "loss_gen": 4.3468499183654785, - "loss_rtd": 0.28686901926994324, - "loss_sent": 0.00015228970733005553, - "loss_sod": 0.12854860723018646, - "loss_total": 0.41556990146636963, - "step": 132099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.906028687953949, - "learning_rate": 5.645582865981773e-05, - "loss": 0.54, - "step": 132100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.194786548614502, - "loss_rtd": 0.29532912373542786, - "loss_sent": 0.11146141588687897, - "loss_sod": 0.007537001743912697, - "loss_total": 0.41432756185531616, - "step": 132199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.131966590881348, - "loss_rtd": 0.291231244802475, - "loss_sent": 0.11104708164930344, - "loss_sod": 0.06896242499351501, - "loss_total": 0.471240758895874, - "step": 132199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.3519681692123413, - "learning_rate": 5.642435972832112e-05, - "loss": 0.5176, - "step": 132200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.20922327041626, - "loss_rtd": 0.3060184121131897, - "loss_sent": 0.03654468432068825, - "loss_sod": 0.052750006318092346, - "loss_total": 0.3953130841255188, - "step": 132299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.11304235458374, - "loss_rtd": 0.30031052231788635, - "loss_sent": 0.05421074479818344, - "loss_sod": 0.013233036734163761, - "loss_total": 0.3677543103694916, - "step": 132299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.6765241622924805, - "learning_rate": 5.639288820909314e-05, - "loss": 0.548, - "step": 132300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.279114723205566, - "loss_rtd": 0.3012498915195465, - "loss_sent": 0.04160747677087784, - "loss_sod": 0.07582254707813263, - "loss_total": 0.4186799228191376, - "step": 132399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.80006217956543, - "loss_rtd": 0.2973664402961731, - "loss_sent": 0.17434334754943848, - "loss_sod": 0.04450562596321106, - "loss_total": 0.516215443611145, - "step": 132399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.5384371280670166, - "learning_rate": 5.636141411481058e-05, - "loss": 0.5242, - "step": 132400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.171237468719482, - "loss_rtd": 0.2945054769515991, - "loss_sent": 0.13472336530685425, - "loss_sod": 0.004508455283939838, - "loss_total": 0.43373730778694153, - "step": 132499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.911259174346924, - "loss_rtd": 0.28879836201667786, - "loss_sent": 0.13032907247543335, - "loss_sod": 0.0037056375294923782, - "loss_total": 0.42283308506011963, - "step": 132499 - }, - { - "epoch": 0.005, - "grad_norm": 0.7335926294326782, - "learning_rate": 5.632993745815116e-05, - "loss": 0.5346, - "step": 132500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.9052534103393555, - "loss_rtd": 0.2912304103374481, - "loss_sent": 0.41629940271377563, - "loss_sod": 0.022761059924960136, - "loss_total": 0.7302908897399902, - "step": 132599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.133388519287109, - "loss_rtd": 0.31143835186958313, - "loss_sent": 0.08793631196022034, - "loss_sod": 0.057130102068185806, - "loss_total": 0.456504762172699, - "step": 132599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.1287411451339722, - "learning_rate": 5.6298458251793705e-05, - "loss": 0.5566, - "step": 132600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.22165584564209, - "loss_rtd": 0.28310316801071167, - "loss_sent": 0.06454163044691086, - "loss_sod": 0.1360900104045868, - "loss_total": 0.4837348163127899, - "step": 132699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.478661060333252, - "loss_rtd": 0.309174507856369, - "loss_sent": 0.14244163036346436, - "loss_sod": 0.045050252228975296, - "loss_total": 0.49666640162467957, - "step": 132699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.8059908747673035, - "learning_rate": 5.626697650841801e-05, - "loss": 0.515, - "step": 132700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.115859508514404, - "loss_rtd": 0.2883886992931366, - "loss_sent": 0.14805959165096283, - "loss_sod": 0.13457569479942322, - "loss_total": 0.5710240006446838, - "step": 132799 - }, - { - "epoch": 0.005598, - "loss_gen": 4.049540042877197, - "loss_rtd": 0.2628515958786011, - "loss_sent": 4.867784446105361e-05, - "loss_sod": 0.2343326359987259, - "loss_total": 0.4972328841686249, - "step": 132799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.0146269798278809, - "learning_rate": 5.6235492240704936e-05, - "loss": 0.5348, - "step": 132800 - }, - { - "epoch": 0.005798, - "loss_gen": 4.973217010498047, - "loss_rtd": 0.2926579713821411, - "loss_sent": 0.596145749092102, - "loss_sod": 0.014550590887665749, - "loss_total": 0.903354287147522, - "step": 132899 - }, - { - "epoch": 0.005798, - "loss_gen": 4.936171054840088, - "loss_rtd": 0.3060975670814514, - "loss_sent": 0.19991983473300934, - "loss_sod": 0.02503911592066288, - "loss_total": 0.5310565233230591, - "step": 132899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.7769293785095215, - "learning_rate": 5.620400546133632e-05, - "loss": 0.5423, - "step": 132900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.438228607177734, - "loss_rtd": 0.28840088844299316, - "loss_sent": 0.49344602227211, - "loss_sod": 0.04564886912703514, - "loss_total": 0.827495813369751, - "step": 132999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.199347019195557, - "loss_rtd": 0.2940641939640045, - "loss_sent": 0.10124754905700684, - "loss_sod": 0.10038881003856659, - "loss_total": 0.49570053815841675, - "step": 132999 - }, - { - "epoch": 0.006, - "grad_norm": 1.180791974067688, - "learning_rate": 5.617251618299505e-05, - "loss": 0.5302, - "step": 133000 - }, - { - "epoch": 0.006, - "eval_loss": 0.517386257648468, - "eval_runtime": 153.7275, - "eval_samples_per_second": 100.457, - "eval_steps_per_second": 0.787, - "step": 133000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.261128902435303, - "loss_rtd": 0.2899162173271179, - "loss_sent": 0.34748998284339905, - "loss_sod": 0.11603307723999023, - "loss_total": 0.7534393072128296, - "step": 133099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.1016364097595215, - "loss_rtd": 0.30007854104042053, - "loss_sent": 0.24995823204517365, - "loss_sod": 0.09962132573127747, - "loss_total": 0.6496580839157104, - "step": 133099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.2698135375976562, - "learning_rate": 5.6141024418365e-05, - "loss": 0.5369, - "step": 133100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.548202037811279, - "loss_rtd": 0.2767568826675415, - "loss_sent": 0.014326936565339565, - "loss_sod": 0.09710343182086945, - "loss_total": 0.3881872296333313, - "step": 133199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.061511039733887, - "loss_rtd": 0.2927514910697937, - "loss_sent": 0.08371397107839584, - "loss_sod": 0.062351688742637634, - "loss_total": 0.43881717324256897, - "step": 133199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.90691077709198, - "learning_rate": 5.6109530180131054e-05, - "loss": 0.5452, - "step": 133200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.073202133178711, - "loss_rtd": 0.2805119454860687, - "loss_sent": 0.03842172771692276, - "loss_sod": 0.0018447530455887318, - "loss_total": 0.32077842950820923, - "step": 133299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.099011421203613, - "loss_rtd": 0.2953358292579651, - "loss_sent": 0.13252925872802734, - "loss_sod": 0.0037807973567396402, - "loss_total": 0.4316459000110626, - "step": 133299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.6164154410362244, - "learning_rate": 5.6078033480979085e-05, - "loss": 0.5346, - "step": 133300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.158677101135254, - "loss_rtd": 0.3126745820045471, - "loss_sent": 0.08241559565067291, - "loss_sod": 0.016511857509613037, - "loss_total": 0.4116020202636719, - "step": 133399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.331038475036621, - "loss_rtd": 0.29681864380836487, - "loss_sent": 0.2278798371553421, - "loss_sod": 0.07774224877357483, - "loss_total": 0.6024407148361206, - "step": 133399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.8526787161827087, - "learning_rate": 5.604653433359594e-05, - "loss": 0.5334, - "step": 133400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.004838466644287, - "loss_rtd": 0.2894722521305084, - "loss_sent": 0.10016779601573944, - "loss_sod": 0.04609420895576477, - "loss_total": 0.43573427200317383, - "step": 133499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.184755802154541, - "loss_rtd": 0.28799888491630554, - "loss_sent": 0.23058097064495087, - "loss_sod": 0.03503584489226341, - "loss_total": 0.5536156892776489, - "step": 133499 - }, - { - "epoch": 0.007, - "grad_norm": 0.6855185031890869, - "learning_rate": 5.6015032750669504e-05, - "loss": 0.5264, - "step": 133500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.565594673156738, - "loss_rtd": 0.2776607275009155, - "loss_sent": 0.011718451045453548, - "loss_sod": 0.08202856034040451, - "loss_total": 0.37140774726867676, - "step": 133599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.21798038482666, - "loss_rtd": 0.2986351549625397, - "loss_sent": 0.06739834696054459, - "loss_sod": 0.03009982593357563, - "loss_total": 0.39613333344459534, - "step": 133599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.8277300596237183, - "learning_rate": 5.598352874488858e-05, - "loss": 0.5169, - "step": 133600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.988482475280762, - "loss_rtd": 0.309760719537735, - "loss_sent": 0.16191567480564117, - "loss_sod": 0.029046613723039627, - "loss_total": 0.5007230043411255, - "step": 133699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.192599296569824, - "loss_rtd": 0.2854505479335785, - "loss_sent": 0.3803899884223938, - "loss_sod": 0.03612706810235977, - "loss_total": 0.7019675970077515, - "step": 133699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.176679015159607, - "learning_rate": 5.595202232894301e-05, - "loss": 0.5316, - "step": 133700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.446816921234131, - "loss_rtd": 0.29135972261428833, - "loss_sent": 0.08156407624483109, - "loss_sod": 0.036502495408058167, - "loss_total": 0.4094262719154358, - "step": 133799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.273036003112793, - "loss_rtd": 0.2708379626274109, - "loss_sent": 0.21031464636325836, - "loss_sod": 0.027876533567905426, - "loss_total": 0.5090291500091553, - "step": 133799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.6264864206314087, - "learning_rate": 5.592051351552354e-05, - "loss": 0.5161, - "step": 133800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.071184158325195, - "loss_rtd": 0.2771450877189636, - "loss_sent": 0.5051693320274353, - "loss_sod": 0.029784109443426132, - "loss_total": 0.812098503112793, - "step": 133899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.102906227111816, - "loss_rtd": 0.29092371463775635, - "loss_sent": 0.18195125460624695, - "loss_sod": 0.026057599112391472, - "loss_total": 0.4989325702190399, - "step": 133899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.306749701499939, - "learning_rate": 5.588900231732196e-05, - "loss": 0.526, - "step": 133900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.075216293334961, - "loss_rtd": 0.30054181814193726, - "loss_sent": 0.3336167335510254, - "loss_sod": 0.06170884892344475, - "loss_total": 0.6958674192428589, - "step": 133999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.275862693786621, - "loss_rtd": 0.30010300874710083, - "loss_sent": 0.7424845695495605, - "loss_sod": 0.05289775878190994, - "loss_total": 1.0954853296279907, - "step": 133999 - }, - { - "epoch": 0.008, - "grad_norm": 2.0897834300994873, - "learning_rate": 5.585748874703093e-05, - "loss": 0.5305, - "step": 134000 - }, - { - "epoch": 0.008, - "eval_loss": 0.5119883418083191, - "eval_runtime": 152.2502, - "eval_samples_per_second": 101.432, - "eval_steps_per_second": 0.795, - "step": 134000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.6806745529174805, - "loss_rtd": 0.27665334939956665, - "loss_sent": 0.0004764352925121784, - "loss_sod": 0.2410624921321869, - "loss_total": 0.5181922316551208, - "step": 134099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.15795373916626, - "loss_rtd": 0.3039684593677521, - "loss_sent": 0.21698664128780365, - "loss_sod": 0.022792495787143707, - "loss_total": 0.54374760389328, - "step": 134099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.1554814577102661, - "learning_rate": 5.582597281734414e-05, - "loss": 0.5259, - "step": 134100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.32167911529541, - "loss_rtd": 0.29960358142852783, - "loss_sent": 0.10849422216415405, - "loss_sod": 0.0041342126205563545, - "loss_total": 0.41223201155662537, - "step": 134199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.886404991149902, - "loss_rtd": 0.2942427694797516, - "loss_sent": 0.2582089602947235, - "loss_sod": 0.08146588504314423, - "loss_total": 0.6339175701141357, - "step": 134199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.9550265669822693, - "learning_rate": 5.5794454540956186e-05, - "loss": 0.5458, - "step": 134200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.064291000366211, - "loss_rtd": 0.292093962430954, - "loss_sent": 0.19798637926578522, - "loss_sod": 0.03719881549477577, - "loss_total": 0.5272791385650635, - "step": 134299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.778872966766357, - "loss_rtd": 0.2855645716190338, - "loss_sent": 0.09939462691545486, - "loss_sod": 0.030383706092834473, - "loss_total": 0.41534289717674255, - "step": 134299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.9824146032333374, - "learning_rate": 5.5762933930562645e-05, - "loss": 0.5183, - "step": 134300 - }, - { - "epoch": 0.008798, - "loss_gen": 4.325845241546631, - "loss_rtd": 0.2759763300418854, - "loss_sent": 0.026905614882707596, - "loss_sod": 0.10722506046295166, - "loss_total": 0.4101070165634155, - "step": 134399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.273506164550781, - "loss_rtd": 0.2808290123939514, - "loss_sent": 0.12383317947387695, - "loss_sod": 0.05283088609576225, - "loss_total": 0.4574930667877197, - "step": 134399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.7307271361351013, - "learning_rate": 5.573141099886e-05, - "loss": 0.5355, - "step": 134400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.2135419845581055, - "loss_rtd": 0.3053134083747864, - "loss_sent": 0.5553669333457947, - "loss_sod": 0.020075494423508644, - "loss_total": 0.8807558417320251, - "step": 134499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.001331806182861, - "loss_rtd": 0.28902408480644226, - "loss_sent": 0.22344139218330383, - "loss_sod": 0.031435299664735794, - "loss_total": 0.5439007878303528, - "step": 134499 - }, - { - "epoch": 0.009, - "grad_norm": 1.4039077758789062, - "learning_rate": 5.56998857585457e-05, - "loss": 0.5438, - "step": 134500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.113071918487549, - "loss_rtd": 0.2908078730106354, - "loss_sent": 0.23563972115516663, - "loss_sod": 0.06931894272565842, - "loss_total": 0.595766544342041, - "step": 134599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.014859676361084, - "loss_rtd": 0.3115485608577728, - "loss_sent": 0.36756986379623413, - "loss_sod": 0.06761464476585388, - "loss_total": 0.7467330694198608, - "step": 134599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.3059592247009277, - "learning_rate": 5.5668358222318084e-05, - "loss": 0.5312, - "step": 134600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.183529376983643, - "loss_rtd": 0.29946181178092957, - "loss_sent": 0.2329072654247284, - "loss_sod": 0.05431223660707474, - "loss_total": 0.5866813063621521, - "step": 134699 - }, - { - "epoch": 0.009398, - "loss_gen": 4.47651481628418, - "loss_rtd": 0.2590901851654053, - "loss_sent": 0.05405307933688164, - "loss_sod": 0.17085470259189606, - "loss_total": 0.4839979410171509, - "step": 134699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.9354749917984009, - "learning_rate": 5.5636828402876475e-05, - "loss": 0.5426, - "step": 134700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.529237270355225, - "loss_rtd": 0.2927532494068146, - "loss_sent": 0.2400491088628769, - "loss_sod": 0.10767121613025665, - "loss_total": 0.6404736042022705, - "step": 134799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.23181676864624, - "loss_rtd": 0.2928999364376068, - "loss_sent": 0.33182835578918457, - "loss_sod": 0.013041868805885315, - "loss_total": 0.6377701759338379, - "step": 134799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.2457292079925537, - "learning_rate": 5.560529631292104e-05, - "loss": 0.5289, - "step": 134800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.881642818450928, - "loss_rtd": 0.262358158826828, - "loss_sent": 0.04763232171535492, - "loss_sod": 0.12649855017662048, - "loss_total": 0.4364890456199646, - "step": 134899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.394132614135742, - "loss_rtd": 0.2918231189250946, - "loss_sent": 0.23514193296432495, - "loss_sod": 0.06371861696243286, - "loss_total": 0.5906836986541748, - "step": 134899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.9727721810340881, - "learning_rate": 5.557376196515294e-05, - "loss": 0.5272, - "step": 134900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.764476299285889, - "loss_rtd": 0.27719348669052124, - "loss_sent": 0.008931392803788185, - "loss_sod": 0.0570664182305336, - "loss_total": 0.3431912958621979, - "step": 134999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.979287624359131, - "loss_rtd": 0.28074929118156433, - "loss_sent": 0.11842507869005203, - "loss_sod": 0.17452594637870789, - "loss_total": 0.5737003087997437, - "step": 134999 - }, - { - "epoch": 0.01, - "grad_norm": 1.1130653619766235, - "learning_rate": 5.554222537227417e-05, - "loss": 0.538, - "step": 135000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5137791037559509, - "eval_runtime": 152.4735, - "eval_samples_per_second": 101.283, - "eval_steps_per_second": 0.794, - "step": 135000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.055010795593262, - "loss_rtd": 0.30108219385147095, - "loss_sent": 0.23061391711235046, - "loss_sod": 0.05377934128046036, - "loss_total": 0.5854754447937012, - "step": 135099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.1281328201293945, - "loss_rtd": 0.290868878364563, - "loss_sent": 0.25150617957115173, - "loss_sod": 0.02241610735654831, - "loss_total": 0.564791202545166, - "step": 135099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.0362716913223267, - "learning_rate": 5.55106865469877e-05, - "loss": 0.5355, - "step": 135100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.109951496124268, - "loss_rtd": 0.29834866523742676, - "loss_sent": 0.37092530727386475, - "loss_sod": 0.15103842318058014, - "loss_total": 0.8203123807907104, - "step": 135199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.356719493865967, - "loss_rtd": 0.2927529811859131, - "loss_sent": 0.16856837272644043, - "loss_sod": 0.11834752559661865, - "loss_total": 0.5796688795089722, - "step": 135199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.686990737915039, - "learning_rate": 5.5479145501997334e-05, - "loss": 0.5572, - "step": 135200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.151969909667969, - "loss_rtd": 0.2973788380622864, - "loss_sent": 0.2360147386789322, - "loss_sod": 0.05193222314119339, - "loss_total": 0.5853257775306702, - "step": 135299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.259129524230957, - "loss_rtd": 0.29523059725761414, - "loss_sent": 0.23155558109283447, - "loss_sod": 0.07976596057415009, - "loss_total": 0.6065521240234375, - "step": 135299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.8935631513595581, - "learning_rate": 5.544760225000781e-05, - "loss": 0.5363, - "step": 135300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.004709720611572, - "loss_rtd": 0.27916789054870605, - "loss_sent": 0.10970665514469147, - "loss_sod": 0.01602090150117874, - "loss_total": 0.4048954248428345, - "step": 135399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.230704307556152, - "loss_rtd": 0.2816089689731598, - "loss_sent": 0.24363799393177032, - "loss_sod": 0.073275126516819, - "loss_total": 0.5985220670700073, - "step": 135399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.7221740484237671, - "learning_rate": 5.541605680372476e-05, - "loss": 0.5478, - "step": 135400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.016166687011719, - "loss_rtd": 0.27321386337280273, - "loss_sent": 0.4934834837913513, - "loss_sod": 0.1191985160112381, - "loss_total": 0.885895848274231, - "step": 135499 - }, - { - "epoch": 0.010998, - "loss_gen": 4.839963912963867, - "loss_rtd": 0.31342312693595886, - "loss_sent": 0.12136463075876236, - "loss_sod": 0.019670138135552406, - "loss_total": 0.4544578790664673, - "step": 135499 - }, - { - "epoch": 0.011, - "grad_norm": 1.7836360931396484, - "learning_rate": 5.538450917585467e-05, - "loss": 0.5274, - "step": 135500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.11860990524292, - "loss_rtd": 0.2798524498939514, - "loss_sent": 0.1161596029996872, - "loss_sod": 0.04325724393129349, - "loss_total": 0.4392693042755127, - "step": 135599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.1634297370910645, - "loss_rtd": 0.2881108522415161, - "loss_sent": 0.22788073122501373, - "loss_sod": 0.01384064368903637, - "loss_total": 0.5298322439193726, - "step": 135599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9651035070419312, - "learning_rate": 5.535295937910494e-05, - "loss": 0.5408, - "step": 135600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.181997776031494, - "loss_rtd": 0.3008062243461609, - "loss_sent": 0.15601599216461182, - "loss_sod": 0.05230824649333954, - "loss_total": 0.5091304779052734, - "step": 135699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.398614883422852, - "loss_rtd": 0.2960011065006256, - "loss_sent": 0.12754502892494202, - "loss_sod": 0.0128394216299057, - "loss_total": 0.43638554215431213, - "step": 135699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.894529402256012, - "learning_rate": 5.53214074261838e-05, - "loss": 0.5271, - "step": 135700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.05432653427124, - "loss_rtd": 0.2885904908180237, - "loss_sent": 0.1281057894229889, - "loss_sod": 0.03045843169093132, - "loss_total": 0.447154700756073, - "step": 135799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.534729957580566, - "loss_rtd": 0.2890322506427765, - "loss_sent": 0.23375825583934784, - "loss_sod": 0.13331487774848938, - "loss_total": 0.6561053991317749, - "step": 135799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.2520610094070435, - "learning_rate": 5.52898533298004e-05, - "loss": 0.5335, - "step": 135800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.442405700683594, - "loss_rtd": 0.29448550939559937, - "loss_sent": 0.288179874420166, - "loss_sod": 0.13137699663639069, - "loss_total": 0.7140423655509949, - "step": 135899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.789536476135254, - "loss_rtd": 0.27976828813552856, - "loss_sent": 0.07441139221191406, - "loss_sod": 0.014468375593423843, - "loss_total": 0.36864805221557617, - "step": 135899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.348859429359436, - "learning_rate": 5.5258297102664694e-05, - "loss": 0.5339, - "step": 135900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.468254566192627, - "loss_rtd": 0.264314204454422, - "loss_sent": 5.786648398498073e-05, - "loss_sod": 0.08384271711111069, - "loss_total": 0.3482148051261902, - "step": 135999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.028106689453125, - "loss_rtd": 0.27646321058273315, - "loss_sent": 0.316119909286499, - "loss_sod": 0.012395642697811127, - "loss_total": 0.6049787998199463, - "step": 135999 - }, - { - "epoch": 0.012, - "grad_norm": 0.6943278312683105, - "learning_rate": 5.522673875748756e-05, - "loss": 0.5275, - "step": 136000 - }, - { - "epoch": 0.012, - "eval_loss": 0.5100157856941223, - "eval_runtime": 152.671, - "eval_samples_per_second": 101.152, - "eval_steps_per_second": 0.793, - "step": 136000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.106087684631348, - "loss_rtd": 0.30971595644950867, - "loss_sent": 0.3000084161758423, - "loss_sod": 0.06449112296104431, - "loss_total": 0.6742154955863953, - "step": 136099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.165160179138184, - "loss_rtd": 0.28918367624282837, - "loss_sent": 0.15298724174499512, - "loss_sod": 0.08421222865581512, - "loss_total": 0.5263831615447998, - "step": 136099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.0627872943878174, - "learning_rate": 5.519517830698067e-05, - "loss": 0.5292, - "step": 136100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.2184977531433105, - "loss_rtd": 0.2985919713973999, - "loss_sent": 0.3204294741153717, - "loss_sod": 0.10847426950931549, - "loss_total": 0.7274956703186035, - "step": 136199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.046943187713623, - "loss_rtd": 0.2754345238208771, - "loss_sent": 0.21202506124973297, - "loss_sod": 0.010480174794793129, - "loss_total": 0.4979397654533386, - "step": 136199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.6335351467132568, - "learning_rate": 5.516361576385658e-05, - "loss": 0.5363, - "step": 136200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.846960544586182, - "loss_rtd": 0.30323663353919983, - "loss_sent": 0.08127310127019882, - "loss_sod": 0.04320206493139267, - "loss_total": 0.4277118146419525, - "step": 136299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.125184059143066, - "loss_rtd": 0.25880250334739685, - "loss_sent": 0.0005236545694060624, - "loss_sod": 0.07243509590625763, - "loss_total": 0.33176127076148987, - "step": 136299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.8810359835624695, - "learning_rate": 5.5132051140828666e-05, - "loss": 0.5305, - "step": 136300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.3787455558776855, - "loss_rtd": 0.29837551712989807, - "loss_sent": 0.20992742478847504, - "loss_sod": 0.1247561126947403, - "loss_total": 0.6330590844154358, - "step": 136399 - }, - { - "epoch": 0.012798, - "loss_gen": 4.606182098388672, - "loss_rtd": 0.27354809641838074, - "loss_sent": 0.01576041430234909, - "loss_sod": 0.06710290163755417, - "loss_total": 0.3564114272594452, - "step": 136399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.8069556355476379, - "learning_rate": 5.510048445061119e-05, - "loss": 0.5345, - "step": 136400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.970380783081055, - "loss_rtd": 0.29826441407203674, - "loss_sent": 0.4209631681442261, - "loss_sod": 0.02821575477719307, - "loss_total": 0.7474433183670044, - "step": 136499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.184389591217041, - "loss_rtd": 0.3037567138671875, - "loss_sent": 0.1017565131187439, - "loss_sod": 0.023996714502573013, - "loss_total": 0.4295099377632141, - "step": 136499 - }, - { - "epoch": 0.013, - "grad_norm": 0.882291853427887, - "learning_rate": 5.506891570591917e-05, - "loss": 0.5347, - "step": 136500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.901976585388184, - "loss_rtd": 0.2915496230125427, - "loss_sent": 0.3449662923812866, - "loss_sod": 0.017863880842924118, - "loss_total": 0.6543797850608826, - "step": 136599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.934019565582275, - "loss_rtd": 0.2803727090358734, - "loss_sent": 0.2724975645542145, - "loss_sod": 0.005678113549947739, - "loss_total": 0.5585483908653259, - "step": 136599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.5168174505233765, - "learning_rate": 5.503734491946852e-05, - "loss": 0.5495, - "step": 136600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.003833293914795, - "loss_rtd": 0.2940410375595093, - "loss_sent": 0.11700721830129623, - "loss_sod": 0.18890929222106934, - "loss_total": 0.5999575257301331, - "step": 136699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.236407279968262, - "loss_rtd": 0.299206018447876, - "loss_sent": 0.09691785275936127, - "loss_sod": 0.11297590285539627, - "loss_total": 0.5090997815132141, - "step": 136699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.0674422979354858, - "learning_rate": 5.500577210397593e-05, - "loss": 0.5175, - "step": 136700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.100795745849609, - "loss_rtd": 0.2777169942855835, - "loss_sent": 0.30468812584877014, - "loss_sod": 0.1105886697769165, - "loss_total": 0.6929937601089478, - "step": 136799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.732030868530273, - "loss_rtd": 0.2954294979572296, - "loss_sent": 0.12909935414791107, - "loss_sod": 0.16132435202598572, - "loss_total": 0.5858532190322876, - "step": 136799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.5608834028244019, - "learning_rate": 5.497419727215895e-05, - "loss": 0.5173, - "step": 136800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.082647800445557, - "loss_rtd": 0.2739904224872589, - "loss_sent": 0.280626118183136, - "loss_sod": 0.08271697163581848, - "loss_total": 0.6373335123062134, - "step": 136899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.200981616973877, - "loss_rtd": 0.2875075042247772, - "loss_sent": 0.3835589289665222, - "loss_sod": 0.0919804498553276, - "loss_total": 0.7630468606948853, - "step": 136899 - }, - { - "epoch": 0.0138, - "grad_norm": 2.6818933486938477, - "learning_rate": 5.494262043673588e-05, - "loss": 0.5245, - "step": 136900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.0394086837768555, - "loss_rtd": 0.28725799918174744, - "loss_sent": 0.21636411547660828, - "loss_sod": 0.0462038628757, - "loss_total": 0.5498259663581848, - "step": 136999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.304337024688721, - "loss_rtd": 0.280945360660553, - "loss_sent": 0.14623352885246277, - "loss_sod": 0.15014445781707764, - "loss_total": 0.577323317527771, - "step": 136999 - }, - { - "epoch": 0.014, - "grad_norm": 0.8700416684150696, - "learning_rate": 5.49110416104259e-05, - "loss": 0.534, - "step": 137000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5080940127372742, - "eval_runtime": 151.7199, - "eval_samples_per_second": 101.786, - "eval_steps_per_second": 0.798, - "step": 137000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.115293979644775, - "loss_rtd": 0.2914241552352905, - "loss_sent": 0.14293156564235687, - "loss_sod": 0.07873409986495972, - "loss_total": 0.5130898356437683, - "step": 137099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.320815563201904, - "loss_rtd": 0.27097514271736145, - "loss_sent": 0.10093007236719131, - "loss_sod": 0.05424465239048004, - "loss_total": 0.4261498749256134, - "step": 137099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.7981420755386353, - "learning_rate": 5.487946080594895e-05, - "loss": 0.5209, - "step": 137100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.491896152496338, - "loss_rtd": 0.3019807040691376, - "loss_sent": 0.3259780704975128, - "loss_sod": 0.07002832740545273, - "loss_total": 0.6979870796203613, - "step": 137199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.126546382904053, - "loss_rtd": 0.2816140353679657, - "loss_sent": 0.3527289032936096, - "loss_sod": 0.030675236135721207, - "loss_total": 0.6650182008743286, - "step": 137199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.7934132814407349, - "learning_rate": 5.484787803602577e-05, - "loss": 0.5289, - "step": 137200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.0582146644592285, - "loss_rtd": 0.28518033027648926, - "loss_sent": 0.4254242777824402, - "loss_sod": 0.012909738346934319, - "loss_total": 0.7235143184661865, - "step": 137299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.125099182128906, - "loss_rtd": 0.29282599687576294, - "loss_sent": 0.30198314785957336, - "loss_sod": 0.01117524690926075, - "loss_total": 0.6059843897819519, - "step": 137299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.8578848838806152, - "learning_rate": 5.48162933133779e-05, - "loss": 0.5206, - "step": 137300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.208960056304932, - "loss_rtd": 0.2615668475627899, - "loss_sent": 0.032868642359972, - "loss_sod": 0.06202565133571625, - "loss_total": 0.35646113753318787, - "step": 137399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.981863498687744, - "loss_rtd": 0.3050662875175476, - "loss_sent": 0.29135698080062866, - "loss_sod": 0.0854504331946373, - "loss_total": 0.6818736791610718, - "step": 137399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.0216947793960571, - "learning_rate": 5.4784706650727655e-05, - "loss": 0.5247, - "step": 137400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.137484073638916, - "loss_rtd": 0.29767727851867676, - "loss_sent": 0.45579925179481506, - "loss_sod": 0.08855852484703064, - "loss_total": 0.8420350551605225, - "step": 137499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.418776512145996, - "loss_rtd": 0.2826775014400482, - "loss_sent": 0.020374977961182594, - "loss_sod": 0.08248041570186615, - "loss_total": 0.3855328857898712, - "step": 137499 - }, - { - "epoch": 0.015, - "grad_norm": 1.8559634685516357, - "learning_rate": 5.4753118060798146e-05, - "loss": 0.5254, - "step": 137500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.250895977020264, - "loss_rtd": 0.2845058739185333, - "loss_sent": 0.13925084471702576, - "loss_sod": 0.07972454279661179, - "loss_total": 0.5034812688827515, - "step": 137599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.185181617736816, - "loss_rtd": 0.3077811002731323, - "loss_sent": 0.29663532972335815, - "loss_sod": 0.023668231442570686, - "loss_total": 0.628084659576416, - "step": 137599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.198548674583435, - "learning_rate": 5.4721527556313244e-05, - "loss": 0.5228, - "step": 137600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.097166538238525, - "loss_rtd": 0.2978009581565857, - "loss_sent": 0.17746710777282715, - "loss_sod": 0.03157547116279602, - "loss_total": 0.5068435668945312, - "step": 137699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.623559474945068, - "loss_rtd": 0.26392605900764465, - "loss_sent": 0.0842394232749939, - "loss_sod": 0.05295109003782272, - "loss_total": 0.40111657977104187, - "step": 137699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.7644197940826416, - "learning_rate": 5.468993514999762e-05, - "loss": 0.5229, - "step": 137700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.118028163909912, - "loss_rtd": 0.27068865299224854, - "loss_sent": 0.1393316686153412, - "loss_sod": 0.059510473161935806, - "loss_total": 0.46953079104423523, - "step": 137799 - }, - { - "epoch": 0.015598, - "loss_gen": 4.489533424377441, - "loss_rtd": 0.2679705321788788, - "loss_sent": 0.07441765069961548, - "loss_sod": 0.10028500854969025, - "loss_total": 0.4426732063293457, - "step": 137799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.8717600703239441, - "learning_rate": 5.4658340854576664e-05, - "loss": 0.5224, - "step": 137800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.418225288391113, - "loss_rtd": 0.29547515511512756, - "loss_sent": 0.1982797086238861, - "loss_sod": 0.032213449478149414, - "loss_total": 0.5259683132171631, - "step": 137899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.145331382751465, - "loss_rtd": 0.30295661091804504, - "loss_sent": 0.3211835026741028, - "loss_sod": 0.07496554404497147, - "loss_total": 0.6991056203842163, - "step": 137899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.294331669807434, - "learning_rate": 5.462674468277658e-05, - "loss": 0.5173, - "step": 137900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.082634449005127, - "loss_rtd": 0.2707638144493103, - "loss_sent": 0.2992852032184601, - "loss_sod": 0.0839737057685852, - "loss_total": 0.6540226936340332, - "step": 137999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.871114253997803, - "loss_rtd": 0.3043176829814911, - "loss_sent": 0.1752648800611496, - "loss_sod": 0.10215941816568375, - "loss_total": 0.581741988658905, - "step": 137999 - }, - { - "epoch": 0.016, - "grad_norm": 1.2170419692993164, - "learning_rate": 5.459514664732428e-05, - "loss": 0.5068, - "step": 138000 - }, - { - "epoch": 0.016, - "eval_loss": 0.5059153437614441, - "eval_runtime": 151.8064, - "eval_samples_per_second": 101.728, - "eval_steps_per_second": 0.797, - "step": 138000 - }, - { - "epoch": 0.016198, - "loss_gen": 4.806975841522217, - "loss_rtd": 0.2750062346458435, - "loss_sent": 0.1520356386899948, - "loss_sod": 0.07864824682474136, - "loss_total": 0.5056900978088379, - "step": 138099 - }, - { - "epoch": 0.016198, - "loss_gen": 4.340170383453369, - "loss_rtd": 0.26763975620269775, - "loss_sent": 0.036351222544908524, - "loss_sod": 0.078841932117939, - "loss_total": 0.38283291459083557, - "step": 138099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.6640313267707825, - "learning_rate": 5.456354676094747e-05, - "loss": 0.5333, - "step": 138100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.181458473205566, - "loss_rtd": 0.281023234128952, - "loss_sent": 0.03813255950808525, - "loss_sod": 0.10235526412725449, - "loss_total": 0.42151105403900146, - "step": 138199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.608335494995117, - "loss_rtd": 0.2757279574871063, - "loss_sent": 0.0003415390965528786, - "loss_sod": 0.2963537573814392, - "loss_total": 0.5724232196807861, - "step": 138199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.1935975551605225, - "learning_rate": 5.453194503637455e-05, - "loss": 0.5471, - "step": 138200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.142540454864502, - "loss_rtd": 0.2785303294658661, - "loss_sent": 0.29988670349121094, - "loss_sod": 0.01790069229900837, - "loss_total": 0.5963177680969238, - "step": 138299 - }, - { - "epoch": 0.016598, - "loss_gen": 4.787282466888428, - "loss_rtd": 0.3087652325630188, - "loss_sent": 0.17025905847549438, - "loss_sod": 0.020297439768910408, - "loss_total": 0.49932172894477844, - "step": 138299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8735156655311584, - "learning_rate": 5.450034148633474e-05, - "loss": 0.536, - "step": 138300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.514640808105469, - "loss_rtd": 0.28863558173179626, - "loss_sent": 0.10861130058765411, - "loss_sod": 0.09328439831733704, - "loss_total": 0.4905312657356262, - "step": 138399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.685362815856934, - "loss_rtd": 0.2902231812477112, - "loss_sent": 0.10314501076936722, - "loss_sod": 0.06868387013673782, - "loss_total": 0.46205204725265503, - "step": 138399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.9048056602478027, - "learning_rate": 5.4468736123557896e-05, - "loss": 0.545, - "step": 138400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.267678260803223, - "loss_rtd": 0.2785603702068329, - "loss_sent": 0.3957210183143616, - "loss_sod": 0.024061761796474457, - "loss_total": 0.6983431577682495, - "step": 138499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.309103012084961, - "loss_rtd": 0.26775482296943665, - "loss_sent": 0.10505812615156174, - "loss_sod": 0.1075400561094284, - "loss_total": 0.4803529977798462, - "step": 138499 - }, - { - "epoch": 0.017, - "grad_norm": 1.1361006498336792, - "learning_rate": 5.44371289607747e-05, - "loss": 0.5317, - "step": 138500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.496198654174805, - "loss_rtd": 0.2915562391281128, - "loss_sent": 0.20493806898593903, - "loss_sod": 0.192259281873703, - "loss_total": 0.688753604888916, - "step": 138599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.233489990234375, - "loss_rtd": 0.2855234742164612, - "loss_sent": 0.176621675491333, - "loss_sod": 0.06855890899896622, - "loss_total": 0.5307040214538574, - "step": 138599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.181013822555542, - "learning_rate": 5.440552001071646e-05, - "loss": 0.52, - "step": 138600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.1925835609436035, - "loss_rtd": 0.28075963258743286, - "loss_sent": 0.009259273298084736, - "loss_sod": 0.31399455666542053, - "loss_total": 0.6040135025978088, - "step": 138699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.44363260269165, - "loss_rtd": 0.28415897488594055, - "loss_sent": 0.4156152307987213, - "loss_sod": 0.02586456388235092, - "loss_total": 0.725638747215271, - "step": 138699 - }, - { - "epoch": 0.0174, - "grad_norm": 2.010545015335083, - "learning_rate": 5.437390928611531e-05, - "loss": 0.5278, - "step": 138700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.633986949920654, - "loss_rtd": 0.29162952303886414, - "loss_sent": 0.48899412155151367, - "loss_sod": 0.0521540530025959, - "loss_total": 0.832777738571167, - "step": 138799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.244661331176758, - "loss_rtd": 0.2950066924095154, - "loss_sent": 0.17288966476917267, - "loss_sod": 0.044365983456373215, - "loss_total": 0.5122623443603516, - "step": 138799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.5652868747711182, - "learning_rate": 5.434229679970402e-05, - "loss": 0.5282, - "step": 138800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.057420253753662, - "loss_rtd": 0.27275577187538147, - "loss_sent": 0.0189326424151659, - "loss_sod": 0.04146324470639229, - "loss_total": 0.3331516683101654, - "step": 138899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.323928356170654, - "loss_rtd": 0.26484811305999756, - "loss_sent": 0.0012342464178800583, - "loss_sod": 0.12477214634418488, - "loss_total": 0.39085447788238525, - "step": 138899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.6872423887252808, - "learning_rate": 5.4310682564216086e-05, - "loss": 0.5051, - "step": 138900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.4057297706604, - "loss_rtd": 0.30169445276260376, - "loss_sent": 0.13476087152957916, - "loss_sod": 0.06039680913090706, - "loss_total": 0.49685215950012207, - "step": 138999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.367425918579102, - "loss_rtd": 0.30054301023483276, - "loss_sent": 0.15720084309577942, - "loss_sod": 0.03683830425143242, - "loss_total": 0.4945821464061737, - "step": 138999 - }, - { - "epoch": 0.018, - "grad_norm": 0.7920910716056824, - "learning_rate": 5.427906659238574e-05, - "loss": 0.5208, - "step": 139000 - }, - { - "epoch": 0.018, - "eval_loss": 0.5083724856376648, - "eval_runtime": 151.9605, - "eval_samples_per_second": 101.625, - "eval_steps_per_second": 0.796, - "step": 139000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.02623176574707, - "loss_rtd": 0.28522375226020813, - "loss_sent": 0.1001211553812027, - "loss_sod": 0.10232888907194138, - "loss_total": 0.487673819065094, - "step": 139099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.390545845031738, - "loss_rtd": 0.2911186218261719, - "loss_sent": 0.14162448048591614, - "loss_sod": 0.1621837615966797, - "loss_total": 0.5949268341064453, - "step": 139099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.045466661453247, - "learning_rate": 5.424744889694787e-05, - "loss": 0.5213, - "step": 139100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.249063014984131, - "loss_rtd": 0.2696802318096161, - "loss_sent": 0.48203372955322266, - "loss_sod": 0.0916304886341095, - "loss_total": 0.8433444499969482, - "step": 139199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.152245044708252, - "loss_rtd": 0.28176751732826233, - "loss_sent": 0.18449372053146362, - "loss_sod": 0.0054007526487112045, - "loss_total": 0.4716619849205017, - "step": 139199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.0413336753845215, - "learning_rate": 5.4215829490638105e-05, - "loss": 0.5221, - "step": 139200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.478488922119141, - "loss_rtd": 0.2944413125514984, - "loss_sent": 0.020181145519018173, - "loss_sod": 0.059523675590753555, - "loss_total": 0.37414613366127014, - "step": 139299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.148072242736816, - "loss_rtd": 0.26982197165489197, - "loss_sent": 0.6545564532279968, - "loss_sod": 0.11251619458198547, - "loss_total": 1.0368945598602295, - "step": 139299 - }, - { - "epoch": 0.0186, - "grad_norm": 2.1318886280059814, - "learning_rate": 5.4184208386192716e-05, - "loss": 0.5216, - "step": 139300 - }, - { - "epoch": 0.018798, - "loss_gen": 4.851564884185791, - "loss_rtd": 0.30229687690734863, - "loss_sent": 0.2966586649417877, - "loss_sod": 0.0349770151078701, - "loss_total": 0.6339325904846191, - "step": 139399 - }, - { - "epoch": 0.018798, - "loss_gen": 4.35960578918457, - "loss_rtd": 0.2744034230709076, - "loss_sent": 0.050812479108572006, - "loss_sod": 0.18627747893333435, - "loss_total": 0.5114933848381042, - "step": 139399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.0778703689575195, - "learning_rate": 5.41525855963487e-05, - "loss": 0.5344, - "step": 139400 - }, - { - "epoch": 0.018998, - "loss_gen": 4.415232181549072, - "loss_rtd": 0.27290967106819153, - "loss_sent": 0.02048950456082821, - "loss_sod": 0.1491314023733139, - "loss_total": 0.4425305724143982, - "step": 139499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.161798477172852, - "loss_rtd": 0.281983882188797, - "loss_sent": 0.05633978173136711, - "loss_sod": 0.0970369279384613, - "loss_total": 0.4353605806827545, - "step": 139499 - }, - { - "epoch": 0.019, - "grad_norm": 1.0125378370285034, - "learning_rate": 5.412096113384371e-05, - "loss": 0.5273, - "step": 139500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.189088821411133, - "loss_rtd": 0.285104900598526, - "loss_sent": 0.08446572721004486, - "loss_sod": 0.033265918493270874, - "loss_total": 0.40283656120300293, - "step": 139599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.182796955108643, - "loss_rtd": 0.3001440167427063, - "loss_sent": 0.11930057406425476, - "loss_sod": 0.016159305348992348, - "loss_total": 0.43560388684272766, - "step": 139599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.0447872877120972, - "learning_rate": 5.408933501141605e-05, - "loss": 0.5174, - "step": 139600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.016360759735107, - "loss_rtd": 0.2864854335784912, - "loss_sent": 0.10789066553115845, - "loss_sod": 0.07900240272283554, - "loss_total": 0.4733785092830658, - "step": 139699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.363579273223877, - "loss_rtd": 0.28431788086891174, - "loss_sent": 0.10657691955566406, - "loss_sod": 0.04029475152492523, - "loss_total": 0.43118953704833984, - "step": 139699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.6977967023849487, - "learning_rate": 5.405770724180475e-05, - "loss": 0.5148, - "step": 139700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.978874683380127, - "loss_rtd": 0.29177454113960266, - "loss_sent": 0.3232150077819824, - "loss_sod": 0.04943980276584625, - "loss_total": 0.6644293665885925, - "step": 139799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.377932071685791, - "loss_rtd": 0.28163814544677734, - "loss_sent": 0.09854451566934586, - "loss_sod": 0.0532626211643219, - "loss_total": 0.4334452748298645, - "step": 139799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.7485654354095459, - "learning_rate": 5.4026077837749444e-05, - "loss": 0.5252, - "step": 139800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.053930282592773, - "loss_rtd": 0.28623196482658386, - "loss_sent": 0.33429646492004395, - "loss_sod": 0.04297721013426781, - "loss_total": 0.6635056734085083, - "step": 139899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.4671950340271, - "loss_rtd": 0.2724299132823944, - "loss_sent": 0.012071617878973484, - "loss_sod": 0.060311198234558105, - "loss_total": 0.3448127210140228, - "step": 139899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.485947608947754, - "learning_rate": 5.399444681199046e-05, - "loss": 0.5322, - "step": 139900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.439514636993408, - "loss_rtd": 0.29135048389434814, - "loss_sent": 0.29323235154151917, - "loss_sod": 0.03105130046606064, - "loss_total": 0.6156341433525085, - "step": 139999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.866759300231934, - "loss_rtd": 0.279400110244751, - "loss_sent": 0.22192879021167755, - "loss_sod": 0.0416153222322464, - "loss_total": 0.5429441928863525, - "step": 139999 - }, - { - "epoch": 0.02, - "grad_norm": 0.8952061533927917, - "learning_rate": 5.3962814177268806e-05, - "loss": 0.522, - "step": 140000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5130712985992432, - "eval_runtime": 151.8239, - "eval_samples_per_second": 101.717, - "eval_steps_per_second": 0.797, - "step": 140000 - }, - { - "epoch": 0.020198, - "loss_gen": 4.963345527648926, - "loss_rtd": 0.30678701400756836, - "loss_sent": 0.10555349290370941, - "loss_sod": 0.03217800706624985, - "loss_total": 0.444518506526947, - "step": 140099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.195887565612793, - "loss_rtd": 0.2673446536064148, - "loss_sent": 0.20815463364124298, - "loss_sod": 0.07391124218702316, - "loss_total": 0.5494105219841003, - "step": 140099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.8452274799346924, - "learning_rate": 5.393117994632605e-05, - "loss": 0.5316, - "step": 140100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.17562198638916, - "loss_rtd": 0.3097587525844574, - "loss_sent": 0.06611491739749908, - "loss_sod": 0.020314838737249374, - "loss_total": 0.39618849754333496, - "step": 140199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.2723388671875, - "loss_rtd": 0.3024448752403259, - "loss_sent": 0.13708089292049408, - "loss_sod": 0.01651904545724392, - "loss_total": 0.4560447931289673, - "step": 140199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.7367912530899048, - "learning_rate": 5.38995441319045e-05, - "loss": 0.5359, - "step": 140200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.102669715881348, - "loss_rtd": 0.29721760749816895, - "loss_sent": 0.14544659852981567, - "loss_sod": 0.054101817309856415, - "loss_total": 0.49676603078842163, - "step": 140299 - }, - { - "epoch": 0.020598, - "loss_gen": 4.7257609367370605, - "loss_rtd": 0.26518717408180237, - "loss_sent": 0.10037977993488312, - "loss_sod": 0.057052429765462875, - "loss_total": 0.42261940240859985, - "step": 140299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.5840766429901123, - "learning_rate": 5.386790674674702e-05, - "loss": 0.5214, - "step": 140300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.420653343200684, - "loss_rtd": 0.3184576630592346, - "loss_sent": 0.0775059312582016, - "loss_sod": 0.08341328054666519, - "loss_total": 0.4793768525123596, - "step": 140399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.616556167602539, - "loss_rtd": 0.2992227077484131, - "loss_sent": 0.10827698558568954, - "loss_sod": 0.13227517902851105, - "loss_total": 0.5397748351097107, - "step": 140399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.0775346755981445, - "learning_rate": 5.383626780359719e-05, - "loss": 0.5216, - "step": 140400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.062439441680908, - "loss_rtd": 0.2716071307659149, - "loss_sent": 0.29206931591033936, - "loss_sod": 0.018853861838579178, - "loss_total": 0.5825303196907043, - "step": 140499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.32375431060791, - "loss_rtd": 0.2656872868537903, - "loss_sent": 0.03729403764009476, - "loss_sod": 0.13978254795074463, - "loss_total": 0.4427638649940491, - "step": 140499 - }, - { - "epoch": 0.021, - "grad_norm": 1.158530592918396, - "learning_rate": 5.380462731519912e-05, - "loss": 0.5097, - "step": 140500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.209296226501465, - "loss_rtd": 0.29382750391960144, - "loss_sent": 0.13547280430793762, - "loss_sod": 0.028409291058778763, - "loss_total": 0.4577096104621887, - "step": 140599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.403994083404541, - "loss_rtd": 0.27498820424079895, - "loss_sent": 3.983187343692407e-05, - "loss_sod": 0.309126079082489, - "loss_total": 0.5841541290283203, - "step": 140599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.4097546339035034, - "learning_rate": 5.377298529429764e-05, - "loss": 0.5369, - "step": 140600 - }, - { - "epoch": 0.021398, - "loss_gen": 4.614843368530273, - "loss_rtd": 0.2606056034564972, - "loss_sent": 7.508649287046865e-05, - "loss_sod": 0.20027443766593933, - "loss_total": 0.46095511317253113, - "step": 140699 - }, - { - "epoch": 0.021398, - "loss_gen": 4.698951721191406, - "loss_rtd": 0.27076229453086853, - "loss_sent": 0.00012203287042211741, - "loss_sod": 0.1711643636226654, - "loss_total": 0.44204869866371155, - "step": 140699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.1054785251617432, - "learning_rate": 5.374134175363813e-05, - "loss": 0.5182, - "step": 140700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.098642826080322, - "loss_rtd": 0.28260645270347595, - "loss_sent": 0.2348274439573288, - "loss_sod": 0.16163408756256104, - "loss_total": 0.6790679693222046, - "step": 140799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.961879253387451, - "loss_rtd": 0.30278342962265015, - "loss_sent": 0.1892790049314499, - "loss_sod": 0.02508535422384739, - "loss_total": 0.5171477794647217, - "step": 140799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.704318881034851, - "learning_rate": 5.370969670596662e-05, - "loss": 0.5224, - "step": 140800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.177754878997803, - "loss_rtd": 0.27701535820961, - "loss_sent": 0.27083972096443176, - "loss_sod": 0.07172597944736481, - "loss_total": 0.6195811033248901, - "step": 140899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.3604326248168945, - "loss_rtd": 0.28001242876052856, - "loss_sent": 0.3185557425022125, - "loss_sod": 0.07279413938522339, - "loss_total": 0.6713622808456421, - "step": 140899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.4917004108428955, - "learning_rate": 5.367805016402969e-05, - "loss": 0.5224, - "step": 140900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.1361308097839355, - "loss_rtd": 0.25162193179130554, - "loss_sent": 3.872735032928176e-05, - "loss_sod": 0.10565103590488434, - "loss_total": 0.3573116958141327, - "step": 140999 - }, - { - "epoch": 0.021998, - "loss_gen": 4.701605319976807, - "loss_rtd": 0.2827128469944, - "loss_sent": 0.23710231482982635, - "loss_sod": 0.10039571672677994, - "loss_total": 0.6202108860015869, - "step": 140999 - }, - { - "epoch": 0.022, - "grad_norm": 1.0531132221221924, - "learning_rate": 5.3646402140574625e-05, - "loss": 0.5406, - "step": 141000 - }, - { - "epoch": 0.022, - "eval_loss": 0.5066352486610413, - "eval_runtime": 151.8364, - "eval_samples_per_second": 101.708, - "eval_steps_per_second": 0.797, - "step": 141000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.351595878601074, - "loss_rtd": 0.28690072894096375, - "loss_sent": 0.28563031554222107, - "loss_sod": 0.05796433985233307, - "loss_total": 0.6304954290390015, - "step": 141099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.359902858734131, - "loss_rtd": 0.2727966010570526, - "loss_sent": 0.22008396685123444, - "loss_sod": 0.04198246821761131, - "loss_total": 0.5348630547523499, - "step": 141099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.092651128768921, - "learning_rate": 5.361475264834921e-05, - "loss": 0.5367, - "step": 141100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.347242832183838, - "loss_rtd": 0.2868700623512268, - "loss_sent": 0.049733467400074005, - "loss_sod": 0.07246068865060806, - "loss_total": 0.4090642035007477, - "step": 141199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.100693702697754, - "loss_rtd": 0.27388474345207214, - "loss_sent": 0.26119932532310486, - "loss_sod": 0.03162490203976631, - "loss_total": 0.5667089819908142, - "step": 141199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.895071268081665, - "learning_rate": 5.358310170010184e-05, - "loss": 0.5246, - "step": 141200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.4381794929504395, - "loss_rtd": 0.2834264039993286, - "loss_sent": 0.22384138405323029, - "loss_sod": 0.0389728844165802, - "loss_total": 0.5462406873703003, - "step": 141299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.192421913146973, - "loss_rtd": 0.2665290832519531, - "loss_sent": 0.20966970920562744, - "loss_sod": 0.03119862824678421, - "loss_total": 0.5073974132537842, - "step": 141299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.4381169080734253, - "learning_rate": 5.355144930858156e-05, - "loss": 0.5288, - "step": 141300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.142797946929932, - "loss_rtd": 0.2882399260997772, - "loss_sent": 0.33860427141189575, - "loss_sod": 0.14554527401924133, - "loss_total": 0.7723894715309143, - "step": 141399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.220066070556641, - "loss_rtd": 0.27605000138282776, - "loss_sent": 0.37749025225639343, - "loss_sod": 0.12252858281135559, - "loss_total": 0.7760688066482544, - "step": 141399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.5305678844451904, - "learning_rate": 5.35197954865379e-05, - "loss": 0.5442, - "step": 141400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.572871208190918, - "loss_rtd": 0.29910731315612793, - "loss_sent": 0.12829171121120453, - "loss_sod": 0.07723744958639145, - "loss_total": 0.5046364665031433, - "step": 141499 - }, - { - "epoch": 0.022998, - "loss_gen": 4.925431251525879, - "loss_rtd": 0.2905297875404358, - "loss_sent": 0.3508687913417816, - "loss_sod": 0.09138332307338715, - "loss_total": 0.7327818870544434, - "step": 141499 - }, - { - "epoch": 0.023, - "grad_norm": 1.5656780004501343, - "learning_rate": 5.348814024672105e-05, - "loss": 0.5172, - "step": 141500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.2485809326171875, - "loss_rtd": 0.27613022923469543, - "loss_sent": 0.14428235590457916, - "loss_sod": 0.03354290872812271, - "loss_total": 0.4539554715156555, - "step": 141599 - }, - { - "epoch": 0.023198, - "loss_gen": 4.784273624420166, - "loss_rtd": 0.2983526885509491, - "loss_sent": 0.14410346746444702, - "loss_sod": 0.02310715615749359, - "loss_total": 0.4655632972717285, - "step": 141599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.1338571310043335, - "learning_rate": 5.345648360188172e-05, - "loss": 0.5308, - "step": 141600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.090795040130615, - "loss_rtd": 0.29210251569747925, - "loss_sent": 0.12591008841991425, - "loss_sod": 0.014865301549434662, - "loss_total": 0.43287789821624756, - "step": 141699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.447715759277344, - "loss_rtd": 0.2845127284526825, - "loss_sent": 3.83231854357291e-05, - "loss_sod": 0.11685922741889954, - "loss_total": 0.4014102816581726, - "step": 141699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.6102616786956787, - "learning_rate": 5.342482556477122e-05, - "loss": 0.5348, - "step": 141700 - }, - { - "epoch": 0.023598, - "loss_gen": 4.8323283195495605, - "loss_rtd": 0.2786339521408081, - "loss_sent": 0.31370827555656433, - "loss_sod": 0.06373046338558197, - "loss_total": 0.656072735786438, - "step": 141799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.1615400314331055, - "loss_rtd": 0.2869878113269806, - "loss_sent": 0.140548974275589, - "loss_sod": 0.028873804956674576, - "loss_total": 0.45641058683395386, - "step": 141799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.0132590532302856, - "learning_rate": 5.339316614814138e-05, - "loss": 0.5241, - "step": 141800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.21695613861084, - "loss_rtd": 0.27958694100379944, - "loss_sent": 0.22142477333545685, - "loss_sod": 0.012841064482927322, - "loss_total": 0.5138527750968933, - "step": 141899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.146639823913574, - "loss_rtd": 0.2886291742324829, - "loss_sent": 0.0847259908914566, - "loss_sod": 0.155159592628479, - "loss_total": 0.5285147428512573, - "step": 141899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.0194185972213745, - "learning_rate": 5.336150536474463e-05, - "loss": 0.5183, - "step": 141900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.03857421875, - "loss_rtd": 0.3072211444377899, - "loss_sent": 0.19135643541812897, - "loss_sod": 0.011410113424062729, - "loss_total": 0.5099877119064331, - "step": 141999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.117791175842285, - "loss_rtd": 0.2907784879207611, - "loss_sent": 0.0756467804312706, - "loss_sod": 0.04117099940776825, - "loss_total": 0.40759626030921936, - "step": 141999 - }, - { - "epoch": 0.024, - "grad_norm": 0.7143073678016663, - "learning_rate": 5.332984322733392e-05, - "loss": 0.5268, - "step": 142000 - }, - { - "epoch": 0.024, - "eval_loss": 0.5095360279083252, - "eval_runtime": 151.8864, - "eval_samples_per_second": 101.675, - "eval_steps_per_second": 0.797, - "step": 142000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.314025402069092, - "loss_rtd": 0.2714063227176666, - "loss_sent": 0.17580746114253998, - "loss_sod": 0.07070260494947433, - "loss_total": 0.5179163813591003, - "step": 142099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.130003929138184, - "loss_rtd": 0.2887544631958008, - "loss_sent": 0.14470921456813812, - "loss_sod": 0.028869371861219406, - "loss_total": 0.4623330235481262, - "step": 142099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.853802502155304, - "learning_rate": 5.3298179748662756e-05, - "loss": 0.5407, - "step": 142100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.291382312774658, - "loss_rtd": 0.28734806180000305, - "loss_sent": 0.3452179729938507, - "loss_sod": 0.1100362166762352, - "loss_total": 0.7426022291183472, - "step": 142199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.413672924041748, - "loss_rtd": 0.2905466854572296, - "loss_sent": 0.39333876967430115, - "loss_sod": 0.06980624794960022, - "loss_total": 0.7536916732788086, - "step": 142199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.6220515966415405, - "learning_rate": 5.326651494148518e-05, - "loss": 0.5452, - "step": 142200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.5579915046691895, - "loss_rtd": 0.2797586917877197, - "loss_sent": 0.05823824927210808, - "loss_sod": 0.05107353627681732, - "loss_total": 0.3890704810619354, - "step": 142299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.982907295227051, - "loss_rtd": 0.2790917456150055, - "loss_sent": 0.30364033579826355, - "loss_sod": 0.06816454976797104, - "loss_total": 0.6508966684341431, - "step": 142299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.0212825536727905, - "learning_rate": 5.323484881855578e-05, - "loss": 0.5266, - "step": 142300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.212350845336914, - "loss_rtd": 0.29858461022377014, - "loss_sent": 0.3669554889202118, - "loss_sod": 0.06416751444339752, - "loss_total": 0.7297075986862183, - "step": 142399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.119755744934082, - "loss_rtd": 0.2848243713378906, - "loss_sent": 0.2943706214427948, - "loss_sod": 0.00534034613519907, - "loss_total": 0.5845353603363037, - "step": 142399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.4153069257736206, - "learning_rate": 5.3203181392629655e-05, - "loss": 0.5271, - "step": 142400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.111626625061035, - "loss_rtd": 0.28250032663345337, - "loss_sent": 0.06871972978115082, - "loss_sod": 0.03493531793355942, - "loss_total": 0.386155366897583, - "step": 142499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.229166507720947, - "loss_rtd": 0.2831133306026459, - "loss_sent": 0.14898912608623505, - "loss_sod": 0.037110161036252975, - "loss_total": 0.4692125916481018, - "step": 142499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8726935386657715, - "learning_rate": 5.317151267646246e-05, - "loss": 0.5309, - "step": 142500 - }, - { - "epoch": 0.025198, - "loss_gen": 4.964182376861572, - "loss_rtd": 0.2866588830947876, - "loss_sent": 0.14459633827209473, - "loss_sod": 0.04257828742265701, - "loss_total": 0.47383350133895874, - "step": 142599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.328502178192139, - "loss_rtd": 0.2869490087032318, - "loss_sent": 0.24192702770233154, - "loss_sod": 0.09247876703739166, - "loss_total": 0.6213548183441162, - "step": 142599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.888357400894165, - "learning_rate": 5.3139842682810325e-05, - "loss": 0.5268, - "step": 142600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.31513786315918, - "loss_rtd": 0.30803197622299194, - "loss_sent": 0.24215351045131683, - "loss_sod": 0.17132531106472015, - "loss_total": 0.7215108275413513, - "step": 142699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.110383033752441, - "loss_rtd": 0.3043190836906433, - "loss_sent": 0.29990580677986145, - "loss_sod": 0.02735138311982155, - "loss_total": 0.6315762996673584, - "step": 142699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.414551854133606, - "learning_rate": 5.310817142442994e-05, - "loss": 0.5275, - "step": 142700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.0857696533203125, - "loss_rtd": 0.2900143563747406, - "loss_sent": 0.13079498708248138, - "loss_sod": 0.03388945758342743, - "loss_total": 0.4546988010406494, - "step": 142799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.201300144195557, - "loss_rtd": 0.28961318731307983, - "loss_sent": 0.17543339729309082, - "loss_sod": 0.09287995100021362, - "loss_total": 0.5579265356063843, - "step": 142799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.7257480025291443, - "learning_rate": 5.3076498914078485e-05, - "loss": 0.526, - "step": 142800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.191403865814209, - "loss_rtd": 0.27923136949539185, - "loss_sent": 0.19919177889823914, - "loss_sod": 0.06867915391921997, - "loss_total": 0.5471023321151733, - "step": 142899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.063490390777588, - "loss_rtd": 0.2899428904056549, - "loss_sent": 0.23567284643650055, - "loss_sod": 0.0096047418192029, - "loss_total": 0.5352205038070679, - "step": 142899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.9004678130149841, - "learning_rate": 5.3044825164513625e-05, - "loss": 0.5357, - "step": 142900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.831913948059082, - "loss_rtd": 0.2710915803909302, - "loss_sent": 0.005070207174867392, - "loss_sod": 0.19541436433792114, - "loss_total": 0.47157615423202515, - "step": 142999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.439896583557129, - "loss_rtd": 0.2875880002975464, - "loss_sent": 0.2070181518793106, - "loss_sod": 0.15899549424648285, - "loss_total": 0.6536016464233398, - "step": 142999 - }, - { - "epoch": 0.026, - "grad_norm": 1.2820786237716675, - "learning_rate": 5.3013150188493554e-05, - "loss": 0.5319, - "step": 143000 - }, - { - "epoch": 0.026, - "eval_loss": 0.5012403726577759, - "eval_runtime": 151.7563, - "eval_samples_per_second": 101.762, - "eval_steps_per_second": 0.797, - "step": 143000 - }, - { - "epoch": 0.000198, - "loss_gen": 4.66406774520874, - "loss_rtd": 0.2746325731277466, - "loss_sent": 0.03527475520968437, - "loss_sod": 0.10925401747226715, - "loss_total": 0.419161319732666, - "step": 143099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.168968200683594, - "loss_rtd": 0.3047997057437897, - "loss_sent": 0.1341041475534439, - "loss_sod": 0.022689972072839737, - "loss_total": 0.4615938067436218, - "step": 143099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.9890168905258179, - "learning_rate": 5.298147399877694e-05, - "loss": 0.5203, - "step": 143100 - }, - { - "epoch": 0.000398, - "loss_gen": 6.0309739112854, - "loss_rtd": 0.28600800037384033, - "loss_sent": 0.07655972987413406, - "loss_sod": 0.12908899784088135, - "loss_total": 0.49165672063827515, - "step": 143199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.226163864135742, - "loss_rtd": 0.2816427946090698, - "loss_sent": 0.271378755569458, - "loss_sod": 0.08493401110172272, - "loss_total": 0.6379555463790894, - "step": 143199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.8018764853477478, - "learning_rate": 5.294979660812298e-05, - "loss": 0.5279, - "step": 143200 - }, - { - "epoch": 0.000598, - "loss_gen": 4.230194568634033, - "loss_rtd": 0.2597436010837555, - "loss_sent": 4.242077920935117e-05, - "loss_sod": 0.18114471435546875, - "loss_total": 0.44093072414398193, - "step": 143299 - }, - { - "epoch": 0.000598, - "loss_gen": 4.6562113761901855, - "loss_rtd": 0.29104700684547424, - "loss_sent": 0.13502706587314606, - "loss_sod": 0.06349501013755798, - "loss_total": 0.4895690679550171, - "step": 143299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.9213275909423828, - "learning_rate": 5.2918118029291273e-05, - "loss": 0.5265, - "step": 143300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.273587226867676, - "loss_rtd": 0.28636878728866577, - "loss_sent": 0.2882729172706604, - "loss_sod": 0.051931463181972504, - "loss_total": 0.6265732049942017, - "step": 143399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.987724304199219, - "loss_rtd": 0.28506967425346375, - "loss_sent": 0.15950746834278107, - "loss_sod": 0.05867493897676468, - "loss_total": 0.5032520890235901, - "step": 143399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.0150002241134644, - "learning_rate": 5.288643827504199e-05, - "loss": 0.5162, - "step": 143400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.251161098480225, - "loss_rtd": 0.26213178038597107, - "loss_sent": 0.001973393140360713, - "loss_sod": 0.09073591232299805, - "loss_total": 0.35484108328819275, - "step": 143499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.917688369750977, - "loss_rtd": 0.2969304919242859, - "loss_sent": 0.5181108117103577, - "loss_sod": 0.001427557785063982, - "loss_total": 0.8164688348770142, - "step": 143499 - }, - { - "epoch": 0.001, - "grad_norm": 1.1918784379959106, - "learning_rate": 5.28547573581357e-05, - "loss": 0.536, - "step": 143500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.211683750152588, - "loss_rtd": 0.27299249172210693, - "loss_sent": 0.1254974752664566, - "loss_sod": 0.030453510582447052, - "loss_total": 0.4289434552192688, - "step": 143599 - }, - { - "epoch": 0.001198, - "loss_gen": 4.957193374633789, - "loss_rtd": 0.2765406668186188, - "loss_sent": 0.2259223908185959, - "loss_sod": 0.009444572031497955, - "loss_total": 0.5119076371192932, - "step": 143599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.7084022164344788, - "learning_rate": 5.28230752913335e-05, - "loss": 0.5381, - "step": 143600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.326329708099365, - "loss_rtd": 0.28667306900024414, - "loss_sent": 0.37185096740722656, - "loss_sod": 0.055304475128650665, - "loss_total": 0.7138285040855408, - "step": 143699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.471609115600586, - "loss_rtd": 0.2862567901611328, - "loss_sent": 0.32228535413742065, - "loss_sod": 0.034261059015989304, - "loss_total": 0.6428031921386719, - "step": 143699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.2625130414962769, - "learning_rate": 5.2791392087396916e-05, - "loss": 0.5404, - "step": 143700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.279477596282959, - "loss_rtd": 0.28712037205696106, - "loss_sent": 0.07828990370035172, - "loss_sod": 0.04689822718501091, - "loss_total": 0.4123084843158722, - "step": 143799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.951378345489502, - "loss_rtd": 0.28546950221061707, - "loss_sent": 0.2018580287694931, - "loss_sod": 0.031223274767398834, - "loss_total": 0.5185508131980896, - "step": 143799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.14565908908844, - "learning_rate": 5.275970775908793e-05, - "loss": 0.5412, - "step": 143800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.2541632652282715, - "loss_rtd": 0.2872055768966675, - "loss_sent": 0.0985424593091011, - "loss_sod": 0.09489560127258301, - "loss_total": 0.480643630027771, - "step": 143899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.1686882972717285, - "loss_rtd": 0.28436461091041565, - "loss_sent": 0.2751750648021698, - "loss_sod": 0.08275718986988068, - "loss_total": 0.6422969102859497, - "step": 143899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.7285401821136475, - "learning_rate": 5.272802231916897e-05, - "loss": 0.533, - "step": 143900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.279683589935303, - "loss_rtd": 0.28544238209724426, - "loss_sent": 0.32155296206474304, - "loss_sod": 0.1077994555234909, - "loss_total": 0.7147947549819946, - "step": 143999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.334860324859619, - "loss_rtd": 0.2843003273010254, - "loss_sent": 0.25391697883605957, - "loss_sod": 0.06681793183088303, - "loss_total": 0.6050352454185486, - "step": 143999 - }, - { - "epoch": 0.002, - "grad_norm": 1.001358151435852, - "learning_rate": 5.269633578040296e-05, - "loss": 0.5315, - "step": 144000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4986079931259155, - "eval_runtime": 151.7633, - "eval_samples_per_second": 101.757, - "eval_steps_per_second": 0.797, - "step": 144000 - }, - { - "epoch": 0.002198, - "loss_gen": 4.742333889007568, - "loss_rtd": 0.27811941504478455, - "loss_sent": 0.0001277799456147477, - "loss_sod": 0.28573179244995117, - "loss_total": 0.5639790296554565, - "step": 144099 - }, - { - "epoch": 0.002198, - "loss_gen": 4.326420307159424, - "loss_rtd": 0.2698899507522583, - "loss_sent": 0.007454150356352329, - "loss_sod": 0.05084558576345444, - "loss_total": 0.3281897008419037, - "step": 144099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.4421451091766357, - "learning_rate": 5.266464815555322e-05, - "loss": 0.5063, - "step": 144100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.360074043273926, - "loss_rtd": 0.25610458850860596, - "loss_sent": 0.14470449090003967, - "loss_sod": 0.13214565813541412, - "loss_total": 0.5329546928405762, - "step": 144199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.312962532043457, - "loss_rtd": 0.27623429894447327, - "loss_sent": 0.12307702749967575, - "loss_sod": 0.06466057151556015, - "loss_total": 0.46397191286087036, - "step": 144199 - }, - { - "epoch": 0.0024, - "grad_norm": 2.330955982208252, - "learning_rate": 5.2632959457383524e-05, - "loss": 0.5111, - "step": 144200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.245543479919434, - "loss_rtd": 0.27186232805252075, - "loss_sent": 0.1922900378704071, - "loss_sod": 0.019116532057523727, - "loss_total": 0.4832688868045807, - "step": 144299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.100775241851807, - "loss_rtd": 0.26578307151794434, - "loss_sent": 0.14884836971759796, - "loss_sod": 0.06197121739387512, - "loss_total": 0.4766026735305786, - "step": 144299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.8121767044067383, - "learning_rate": 5.260126969865806e-05, - "loss": 0.5047, - "step": 144300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.928552627563477, - "loss_rtd": 0.29740992188453674, - "loss_sent": 0.06796685606241226, - "loss_sod": 0.013185365125536919, - "loss_total": 0.3785621225833893, - "step": 144399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.756792068481445, - "loss_rtd": 0.2940201461315155, - "loss_sent": 0.12159086763858795, - "loss_sod": 0.11485806107521057, - "loss_total": 0.5304690599441528, - "step": 144399 - }, - { - "epoch": 0.0028, - "grad_norm": 0.7329797744750977, - "learning_rate": 5.256957889214149e-05, - "loss": 0.5407, - "step": 144400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.3688836097717285, - "loss_rtd": 0.27353352308273315, - "loss_sent": 0.20044541358947754, - "loss_sod": 0.11551567912101746, - "loss_total": 0.5894945859909058, - "step": 144499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.121417045593262, - "loss_rtd": 0.30285027623176575, - "loss_sent": 0.17902149260044098, - "loss_sod": 0.04204069823026657, - "loss_total": 0.5239124894142151, - "step": 144499 - }, - { - "epoch": 0.003, - "grad_norm": 1.3801915645599365, - "learning_rate": 5.2537887050598836e-05, - "loss": 0.5248, - "step": 144500 - }, - { - "epoch": 0.003198, - "loss_gen": 4.300551891326904, - "loss_rtd": 0.26581066846847534, - "loss_sent": 0.06156810745596886, - "loss_sod": 0.10824832320213318, - "loss_total": 0.4356271028518677, - "step": 144599 - }, - { - "epoch": 0.003198, - "loss_gen": 4.990353107452393, - "loss_rtd": 0.2805595099925995, - "loss_sent": 0.2781326472759247, - "loss_sod": 0.01689683087170124, - "loss_total": 0.5755890011787415, - "step": 144599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.461370825767517, - "learning_rate": 5.2506194186795585e-05, - "loss": 0.5333, - "step": 144600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.402153015136719, - "loss_rtd": 0.29907524585723877, - "loss_sent": 0.2757262885570526, - "loss_sod": 0.05807660520076752, - "loss_total": 0.6328781843185425, - "step": 144699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.253642559051514, - "loss_rtd": 0.3036414682865143, - "loss_sent": 0.23505249619483948, - "loss_sod": 0.02995115891098976, - "loss_total": 0.5686451196670532, - "step": 144699 - }, - { - "epoch": 0.0034, - "grad_norm": 2.0132596492767334, - "learning_rate": 5.247450031349761e-05, - "loss": 0.5165, - "step": 144700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.567648410797119, - "loss_rtd": 0.2975943088531494, - "loss_sent": 0.12214425206184387, - "loss_sod": 0.04818717762827873, - "loss_total": 0.4679257273674011, - "step": 144799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.476112365722656, - "loss_rtd": 0.2752467691898346, - "loss_sent": 0.2662510573863983, - "loss_sod": 0.08222628384828568, - "loss_total": 0.623724102973938, - "step": 144799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.566232442855835, - "learning_rate": 5.244280544347122e-05, - "loss": 0.5208, - "step": 144800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.422399044036865, - "loss_rtd": 0.2777884006500244, - "loss_sent": 0.30881041288375854, - "loss_sod": 0.06714334338903427, - "loss_total": 0.6537421941757202, - "step": 144899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.092738628387451, - "loss_rtd": 0.27688854932785034, - "loss_sent": 0.09166580438613892, - "loss_sod": 0.020468203350901604, - "loss_total": 0.389022558927536, - "step": 144899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.0747374296188354, - "learning_rate": 5.2411109589483074e-05, - "loss": 0.5349, - "step": 144900 - }, - { - "epoch": 0.003998, - "loss_gen": 4.289102554321289, - "loss_rtd": 0.25185874104499817, - "loss_sent": 0.030722348019480705, - "loss_sod": 0.13470537960529327, - "loss_total": 0.4172864854335785, - "step": 144999 - }, - { - "epoch": 0.003998, - "loss_gen": 4.872633457183838, - "loss_rtd": 0.28845635056495667, - "loss_sent": 0.23248308897018433, - "loss_sod": 0.010248836129903793, - "loss_total": 0.5311882495880127, - "step": 144999 - }, - { - "epoch": 0.004, - "grad_norm": 0.8651681542396545, - "learning_rate": 5.2379412764300286e-05, - "loss": 0.5108, - "step": 145000 - }, - { - "epoch": 0.004, - "eval_loss": 0.49849000573158264, - "eval_runtime": 150.0413, - "eval_samples_per_second": 102.925, - "eval_steps_per_second": 0.806, - "step": 145000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.525643825531006, - "loss_rtd": 0.26510900259017944, - "loss_sent": 4.2476720409467816e-05, - "loss_sod": 0.16975006461143494, - "loss_total": 0.43490153551101685, - "step": 145099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.021480560302734, - "loss_rtd": 0.27532893419265747, - "loss_sent": 0.048536594957113266, - "loss_sod": 0.14325806498527527, - "loss_total": 0.4671235978603363, - "step": 145099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2153644561767578, - "learning_rate": 5.234771498069032e-05, - "loss": 0.5306, - "step": 145100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.092408657073975, - "loss_rtd": 0.29199859499931335, - "loss_sent": 0.1541261076927185, - "loss_sod": 0.02647477388381958, - "loss_total": 0.47259947657585144, - "step": 145199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.244051456451416, - "loss_rtd": 0.2970103621482849, - "loss_sent": 0.3261437714099884, - "loss_sod": 0.07276580482721329, - "loss_total": 0.695919930934906, - "step": 145199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.7709296941757202, - "learning_rate": 5.231601625142103e-05, - "loss": 0.5116, - "step": 145200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.031761169433594, - "loss_rtd": 0.30165672302246094, - "loss_sent": 0.059283364564180374, - "loss_sod": 0.00943625159561634, - "loss_total": 0.3703763484954834, - "step": 145299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.8102126121521, - "loss_rtd": 0.28832292556762695, - "loss_sent": 0.1924618035554886, - "loss_sod": 0.12810344994068146, - "loss_total": 0.6088881492614746, - "step": 145299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.8893154859542847, - "learning_rate": 5.228431658926068e-05, - "loss": 0.5195, - "step": 145300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.59763240814209, - "loss_rtd": 0.29531168937683105, - "loss_sent": 0.11734317243099213, - "loss_sod": 0.024245794862508774, - "loss_total": 0.43690067529678345, - "step": 145399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.305819034576416, - "loss_rtd": 0.2831506133079529, - "loss_sent": 0.14053313434123993, - "loss_sod": 0.0076009538024663925, - "loss_total": 0.43128472566604614, - "step": 145399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.1131749153137207, - "learning_rate": 5.225261600697787e-05, - "loss": 0.5084, - "step": 145400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.338892936706543, - "loss_rtd": 0.2787962555885315, - "loss_sent": 0.10962951928377151, - "loss_sod": 0.028091823682188988, - "loss_total": 0.41651758551597595, - "step": 145499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.347107887268066, - "loss_rtd": 0.2881333529949188, - "loss_sent": 0.2519276738166809, - "loss_sod": 0.02762795239686966, - "loss_total": 0.5676889419555664, - "step": 145499 - }, - { - "epoch": 0.005, - "grad_norm": 0.8285014033317566, - "learning_rate": 5.2220914517341614e-05, - "loss": 0.5065, - "step": 145500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.96449613571167, - "loss_rtd": 0.28018784523010254, - "loss_sent": 0.12971442937850952, - "loss_sod": 0.010564430616796017, - "loss_total": 0.42046669125556946, - "step": 145599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.47280216217041, - "loss_rtd": 0.2938080430030823, - "loss_sent": 0.18062248826026917, - "loss_sod": 0.1176159530878067, - "loss_total": 0.5920464992523193, - "step": 145599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.9586519002914429, - "learning_rate": 5.218921213312125e-05, - "loss": 0.5203, - "step": 145600 - }, - { - "epoch": 0.005398, - "loss_gen": 4.765594482421875, - "loss_rtd": 0.24818666279315948, - "loss_sent": 0.02665616199374199, - "loss_sod": 0.18032819032669067, - "loss_total": 0.45517098903656006, - "step": 145699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.4180588722229, - "loss_rtd": 0.29591622948646545, - "loss_sent": 0.15679924190044403, - "loss_sod": 0.03232501819729805, - "loss_total": 0.48504048585891724, - "step": 145699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.8756905794143677, - "learning_rate": 5.2157508867086505e-05, - "loss": 0.5301, - "step": 145700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.147712230682373, - "loss_rtd": 0.2685620188713074, - "loss_sent": 0.2744750678539276, - "loss_sod": 0.0034240009263157845, - "loss_total": 0.5464611053466797, - "step": 145799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.349913597106934, - "loss_rtd": 0.27673259377479553, - "loss_sent": 0.5106353759765625, - "loss_sod": 0.09146961569786072, - "loss_total": 0.8788375854492188, - "step": 145799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.717309832572937, - "learning_rate": 5.2125804732007444e-05, - "loss": 0.5258, - "step": 145800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.037972450256348, - "loss_rtd": 0.2800423800945282, - "loss_sent": 0.16363272070884705, - "loss_sod": 0.030139204114675522, - "loss_total": 0.47381430864334106, - "step": 145899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.376882553100586, - "loss_rtd": 0.30431145429611206, - "loss_sent": 0.10817435383796692, - "loss_sod": 0.10825251787900925, - "loss_total": 0.5207383632659912, - "step": 145899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.1302735805511475, - "learning_rate": 5.209409974065449e-05, - "loss": 0.5219, - "step": 145900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.465824127197266, - "loss_rtd": 0.2778286337852478, - "loss_sent": 0.34211188554763794, - "loss_sod": 0.05147743597626686, - "loss_total": 0.6714179515838623, - "step": 145999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.824912071228027, - "loss_rtd": 0.2751026451587677, - "loss_sent": 0.0986921563744545, - "loss_sod": 0.11800215393304825, - "loss_total": 0.49179694056510925, - "step": 145999 - }, - { - "epoch": 0.006, - "grad_norm": 1.7354319095611572, - "learning_rate": 5.206239390579841e-05, - "loss": 0.5245, - "step": 146000 - }, - { - "epoch": 0.006, - "eval_loss": 0.49971112608909607, - "eval_runtime": 151.2542, - "eval_samples_per_second": 102.1, - "eval_steps_per_second": 0.8, - "step": 146000 - }, - { - "epoch": 0.006198, - "loss_gen": 4.642544746398926, - "loss_rtd": 0.2521948218345642, - "loss_sent": 0.027225524187088013, - "loss_sod": 0.10969524830579758, - "loss_total": 0.3891156017780304, - "step": 146099 - }, - { - "epoch": 0.006198, - "loss_gen": 4.493594169616699, - "loss_rtd": 0.2724360525608063, - "loss_sent": 6.572721031261608e-05, - "loss_sod": 0.11912119388580322, - "loss_total": 0.39162296056747437, - "step": 146099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.7895417809486389, - "learning_rate": 5.203068724021032e-05, - "loss": 0.5336, - "step": 146100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.546789169311523, - "loss_rtd": 0.2653850018978119, - "loss_sent": 0.07199425995349884, - "loss_sod": 0.19736507534980774, - "loss_total": 0.5347443222999573, - "step": 146199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.211479663848877, - "loss_rtd": 0.2985738515853882, - "loss_sent": 0.31487011909484863, - "loss_sod": 0.03032844513654709, - "loss_total": 0.6437724232673645, - "step": 146199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.3916696310043335, - "learning_rate": 5.199897975666164e-05, - "loss": 0.4996, - "step": 146200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.014381408691406, - "loss_rtd": 0.26531317830085754, - "loss_sent": 0.008006240241229534, - "loss_sod": 0.280905157327652, - "loss_total": 0.5542245507240295, - "step": 146299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.242966175079346, - "loss_rtd": 0.29195889830589294, - "loss_sent": 0.12836714088916779, - "loss_sod": 0.07119995355606079, - "loss_total": 0.4915260076522827, - "step": 146299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.2561445236206055, - "learning_rate": 5.196727146792416e-05, - "loss": 0.5213, - "step": 146300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.0602192878723145, - "loss_rtd": 0.27024582028388977, - "loss_sent": 0.16223858296871185, - "loss_sod": 0.03788085654377937, - "loss_total": 0.4703652858734131, - "step": 146399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.497828960418701, - "loss_rtd": 0.30007460713386536, - "loss_sent": 0.20957550406455994, - "loss_sod": 0.017270918935537338, - "loss_total": 0.5269210338592529, - "step": 146399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.963026762008667, - "learning_rate": 5.193556238676996e-05, - "loss": 0.5167, - "step": 146400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.307521820068359, - "loss_rtd": 0.2788214385509491, - "loss_sent": 0.07746175676584244, - "loss_sod": 0.17130763828754425, - "loss_total": 0.5275908708572388, - "step": 146499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.17738676071167, - "loss_rtd": 0.2861032485961914, - "loss_sent": 0.27327609062194824, - "loss_sod": 0.011359155178070068, - "loss_total": 0.5707384943962097, - "step": 146499 - }, - { - "epoch": 0.007, - "grad_norm": 0.8306548595428467, - "learning_rate": 5.1903852525971476e-05, - "loss": 0.5264, - "step": 146500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.597184181213379, - "loss_rtd": 0.2621597647666931, - "loss_sent": 0.15151676535606384, - "loss_sod": 0.0587867796421051, - "loss_total": 0.47246330976486206, - "step": 146599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.857696533203125, - "loss_rtd": 0.3095194697380066, - "loss_sent": 0.24361759424209595, - "loss_sod": 0.016918929293751717, - "loss_total": 0.5700559616088867, - "step": 146599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.8499969840049744, - "learning_rate": 5.1872141898301405e-05, - "loss": 0.5222, - "step": 146600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.243187427520752, - "loss_rtd": 0.28105396032333374, - "loss_sent": 0.23848485946655273, - "loss_sod": 0.028276845812797546, - "loss_total": 0.5478156805038452, - "step": 146699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.279906272888184, - "loss_rtd": 0.2728053629398346, - "loss_sent": 0.275601327419281, - "loss_sod": 0.031477998942136765, - "loss_total": 0.5798846483230591, - "step": 146699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.8263813257217407, - "learning_rate": 5.184043051653282e-05, - "loss": 0.5411, - "step": 146700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.104190826416016, - "loss_rtd": 0.30905061960220337, - "loss_sent": 0.12589170038700104, - "loss_sod": 0.07200553268194199, - "loss_total": 0.5069478750228882, - "step": 146799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.063407897949219, - "loss_rtd": 0.261229932308197, - "loss_sent": 0.2982919216156006, - "loss_sod": 0.06371574103832245, - "loss_total": 0.6232376098632812, - "step": 146799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.327774167060852, - "learning_rate": 5.180871839343904e-05, - "loss": 0.5035, - "step": 146800 - }, - { - "epoch": 0.007798, - "loss_gen": 4.542552471160889, - "loss_rtd": 0.2761382460594177, - "loss_sent": 0.0015847000759094954, - "loss_sod": 0.0996432900428772, - "loss_total": 0.37736624479293823, - "step": 146899 - }, - { - "epoch": 0.007798, - "loss_gen": 4.099895000457764, - "loss_rtd": 0.2609536945819855, - "loss_sent": 4.1595685615902767e-05, - "loss_sod": 0.12537619471549988, - "loss_total": 0.3863714933395386, - "step": 146899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.9281373023986816, - "learning_rate": 5.17770055417937e-05, - "loss": 0.5355, - "step": 146900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.141694068908691, - "loss_rtd": 0.28546997904777527, - "loss_sent": 0.06943061947822571, - "loss_sod": 0.03025348111987114, - "loss_total": 0.3851540684700012, - "step": 146999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.212350368499756, - "loss_rtd": 0.28410953283309937, - "loss_sent": 0.5911439657211304, - "loss_sod": 0.053531453013420105, - "loss_total": 0.928784966468811, - "step": 146999 - }, - { - "epoch": 0.008, - "grad_norm": 2.583029270172119, - "learning_rate": 5.174529197437075e-05, - "loss": 0.5139, - "step": 147000 - }, - { - "epoch": 0.008, - "eval_loss": 0.5039019584655762, - "eval_runtime": 149.783, - "eval_samples_per_second": 103.102, - "eval_steps_per_second": 0.808, - "step": 147000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.981076717376709, - "loss_rtd": 0.2826388478279114, - "loss_sent": 0.15006348490715027, - "loss_sod": 0.05539526045322418, - "loss_total": 0.48809757828712463, - "step": 147099 - }, - { - "epoch": 0.008198, - "loss_gen": 4.922446250915527, - "loss_rtd": 0.26888060569763184, - "loss_sent": 0.12618999183177948, - "loss_sod": 0.02075822651386261, - "loss_total": 0.4158288240432739, - "step": 147099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.805479884147644, - "learning_rate": 5.1713577703944386e-05, - "loss": 0.534, - "step": 147100 - }, - { - "epoch": 0.008398, - "loss_gen": 4.193459510803223, - "loss_rtd": 0.202713742852211, - "loss_sent": 0.0001042370276991278, - "loss_sod": 0.40677186846733093, - "loss_total": 0.6095898747444153, - "step": 147199 - }, - { - "epoch": 0.008398, - "loss_gen": 4.794753074645996, - "loss_rtd": 0.2630331218242645, - "loss_sent": 0.03350198641419411, - "loss_sod": 0.16700756549835205, - "loss_total": 0.4635426700115204, - "step": 147199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.532089114189148, - "learning_rate": 5.168186274328913e-05, - "loss": 0.5168, - "step": 147200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.296161651611328, - "loss_rtd": 0.2707403004169464, - "loss_sent": 0.127390056848526, - "loss_sod": 0.08690618723630905, - "loss_total": 0.48503655195236206, - "step": 147299 - }, - { - "epoch": 0.008598, - "loss_gen": 4.702284812927246, - "loss_rtd": 0.269621878862381, - "loss_sent": 0.03573767468333244, - "loss_sod": 0.21274027228355408, - "loss_total": 0.518099844455719, - "step": 147299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.3610261678695679, - "learning_rate": 5.165014710517977e-05, - "loss": 0.5347, - "step": 147300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.544619560241699, - "loss_rtd": 0.2703266739845276, - "loss_sent": 0.22265926003456116, - "loss_sod": 0.09505819529294968, - "loss_total": 0.5880441665649414, - "step": 147399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.152088165283203, - "loss_rtd": 0.2840980589389801, - "loss_sent": 0.10082517564296722, - "loss_sod": 0.0299910306930542, - "loss_total": 0.41491425037384033, - "step": 147399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.0200332403182983, - "learning_rate": 5.161843080239135e-05, - "loss": 0.5303, - "step": 147400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.207124710083008, - "loss_rtd": 0.28050926327705383, - "loss_sent": 0.27539655566215515, - "loss_sod": 0.06495991349220276, - "loss_total": 0.6208657026290894, - "step": 147499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.303969383239746, - "loss_rtd": 0.2890775501728058, - "loss_sent": 0.07043773680925369, - "loss_sod": 0.05035356059670448, - "loss_total": 0.40986883640289307, - "step": 147499 - }, - { - "epoch": 0.009, - "grad_norm": 1.070663571357727, - "learning_rate": 5.1586713847699184e-05, - "loss": 0.5378, - "step": 147500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.774738788604736, - "loss_rtd": 0.2902034521102905, - "loss_sent": 0.19283321499824524, - "loss_sod": 0.07166126370429993, - "loss_total": 0.5546979308128357, - "step": 147599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.813538074493408, - "loss_rtd": 0.28299787640571594, - "loss_sent": 0.09115929156541824, - "loss_sod": 0.07303023338317871, - "loss_total": 0.4471873939037323, - "step": 147599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.8816004395484924, - "learning_rate": 5.1554996253878894e-05, - "loss": 0.5038, - "step": 147600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.608497619628906, - "loss_rtd": 0.26772594451904297, - "loss_sent": 0.019445884972810745, - "loss_sod": 0.2451837956905365, - "loss_total": 0.5323556661605835, - "step": 147699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.536288261413574, - "loss_rtd": 0.2630954384803772, - "loss_sent": 0.2004006803035736, - "loss_sod": 0.12198895961046219, - "loss_total": 0.5854851007461548, - "step": 147699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.9539245963096619, - "learning_rate": 5.152327803370628e-05, - "loss": 0.5288, - "step": 147700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.787131309509277, - "loss_rtd": 0.2679953873157501, - "loss_sent": 0.05050016939640045, - "loss_sod": 0.0257180817425251, - "loss_total": 0.34421366453170776, - "step": 147799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.5462260246276855, - "loss_rtd": 0.2658301591873169, - "loss_sent": 0.008703449741005898, - "loss_sod": 0.09948737174272537, - "loss_total": 0.3740209639072418, - "step": 147799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.8293658494949341, - "learning_rate": 5.149155919995747e-05, - "loss": 0.5265, - "step": 147800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.169826030731201, - "loss_rtd": 0.2948521673679352, - "loss_sent": 0.17312173545360565, - "loss_sod": 0.021826203912496567, - "loss_total": 0.4898000955581665, - "step": 147899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.541766166687012, - "loss_rtd": 0.25918081402778625, - "loss_sent": 9.824033622862771e-05, - "loss_sod": 0.06766112148761749, - "loss_total": 0.3269401788711548, - "step": 147899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.8617637753486633, - "learning_rate": 5.145983976540879e-05, - "loss": 0.5106, - "step": 147900 - }, - { - "epoch": 0.009998, - "loss_gen": 4.6215996742248535, - "loss_rtd": 0.26691102981567383, - "loss_sent": 0.07961020618677139, - "loss_sod": 0.038686759769916534, - "loss_total": 0.38520798087120056, - "step": 147999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.167181968688965, - "loss_rtd": 0.2748759984970093, - "loss_sent": 0.03125729411840439, - "loss_sod": 0.051365822553634644, - "loss_total": 0.3574991226196289, - "step": 147999 - }, - { - "epoch": 0.01, - "grad_norm": 0.6762785911560059, - "learning_rate": 5.142811974283683e-05, - "loss": 0.5172, - "step": 148000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4876296818256378, - "eval_runtime": 149.8683, - "eval_samples_per_second": 103.044, - "eval_steps_per_second": 0.807, - "step": 148000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.190354824066162, - "loss_rtd": 0.27201536297798157, - "loss_sent": 0.23600536584854126, - "loss_sod": 0.0445774644613266, - "loss_total": 0.5525981783866882, - "step": 148099 - }, - { - "epoch": 0.010198, - "loss_gen": 4.571619033813477, - "loss_rtd": 0.2665053606033325, - "loss_sent": 0.003586550010368228, - "loss_sod": 0.1969318687915802, - "loss_total": 0.4670237898826599, - "step": 148099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.0617401599884033, - "learning_rate": 5.13963991450184e-05, - "loss": 0.5031, - "step": 148100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.784799575805664, - "loss_rtd": 0.27273911237716675, - "loss_sent": 0.04332485422492027, - "loss_sod": 0.06624819338321686, - "loss_total": 0.382312148809433, - "step": 148199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.042555809020996, - "loss_rtd": 0.27097582817077637, - "loss_sent": 7.135730265872553e-05, - "loss_sod": 0.24795745313167572, - "loss_total": 0.5190046429634094, - "step": 148199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.9661006927490234, - "learning_rate": 5.136467798473057e-05, - "loss": 0.5347, - "step": 148200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.472863674163818, - "loss_rtd": 0.2973729074001312, - "loss_sent": 0.166547954082489, - "loss_sod": 0.0312616229057312, - "loss_total": 0.49518248438835144, - "step": 148299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.32472562789917, - "loss_rtd": 0.2740918695926666, - "loss_sent": 0.15659397840499878, - "loss_sod": 0.09983142465353012, - "loss_total": 0.5305172801017761, - "step": 148299 - }, - { - "epoch": 0.0106, - "grad_norm": 2.0236117839813232, - "learning_rate": 5.13329562747506e-05, - "loss": 0.5305, - "step": 148300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.342022895812988, - "loss_rtd": 0.2709534764289856, - "loss_sent": 0.0804288312792778, - "loss_sod": 0.12894587218761444, - "loss_total": 0.48032820224761963, - "step": 148399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.858490943908691, - "loss_rtd": 0.2701784670352936, - "loss_sent": 0.0011307575041428208, - "loss_sod": 0.1636728048324585, - "loss_total": 0.434982031583786, - "step": 148399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.430707573890686, - "learning_rate": 5.1301234027856e-05, - "loss": 0.5154, - "step": 148400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.170599460601807, - "loss_rtd": 0.2862345278263092, - "loss_sent": 0.2449491024017334, - "loss_sod": 0.006505691446363926, - "loss_total": 0.5376893281936646, - "step": 148499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.065062522888184, - "loss_rtd": 0.25986894965171814, - "loss_sent": 0.25112301111221313, - "loss_sod": 0.0871850922703743, - "loss_total": 0.5981770753860474, - "step": 148499 - }, - { - "epoch": 0.011, - "grad_norm": 1.5618078708648682, - "learning_rate": 5.12695112568245e-05, - "loss": 0.5255, - "step": 148500 - }, - { - "epoch": 0.011198, - "loss_gen": 4.927695274353027, - "loss_rtd": 0.27999284863471985, - "loss_sent": 0.1934816539287567, - "loss_sod": 0.0527733638882637, - "loss_total": 0.5262478590011597, - "step": 148599 - }, - { - "epoch": 0.011198, - "loss_gen": 4.234586238861084, - "loss_rtd": 0.26285597681999207, - "loss_sent": 0.00020559415861498564, - "loss_sod": 0.16865915060043335, - "loss_total": 0.4317207336425781, - "step": 148599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.0820720195770264, - "learning_rate": 5.123778797443402e-05, - "loss": 0.5257, - "step": 148600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.378777503967285, - "loss_rtd": 0.27872249484062195, - "loss_sent": 0.06293470412492752, - "loss_sod": 0.05825507640838623, - "loss_total": 0.3999122679233551, - "step": 148699 - }, - { - "epoch": 0.011398, - "loss_gen": 4.768366813659668, - "loss_rtd": 0.2677273452281952, - "loss_sent": 0.06426917761564255, - "loss_sod": 0.13946032524108887, - "loss_total": 0.4714568555355072, - "step": 148699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.7936626076698303, - "learning_rate": 5.1206064193462677e-05, - "loss": 0.5252, - "step": 148700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.086231708526611, - "loss_rtd": 0.25840625166893005, - "loss_sent": 0.39218491315841675, - "loss_sod": 0.0288742296397686, - "loss_total": 0.6794654130935669, - "step": 148799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.232894420623779, - "loss_rtd": 0.2748814821243286, - "loss_sent": 0.14246445894241333, - "loss_sod": 0.013335615396499634, - "loss_total": 0.4306815564632416, - "step": 148799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.8960897326469421, - "learning_rate": 5.117433992668883e-05, - "loss": 0.5172, - "step": 148800 - }, - { - "epoch": 0.011798, - "loss_gen": 4.525980472564697, - "loss_rtd": 0.26214292645454407, - "loss_sent": 0.00010965510591631755, - "loss_sod": 0.11243387311697006, - "loss_total": 0.37468644976615906, - "step": 148899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.614633083343506, - "loss_rtd": 0.25251060724258423, - "loss_sent": 0.15795734524726868, - "loss_sod": 0.04601619392633438, - "loss_total": 0.4564841389656067, - "step": 148899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.0195109844207764, - "learning_rate": 5.1142615186891e-05, - "loss": 0.5235, - "step": 148900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.448610782623291, - "loss_rtd": 0.3102265000343323, - "loss_sent": 0.15307582914829254, - "loss_sod": 0.12536995112895966, - "loss_total": 0.5886722803115845, - "step": 148999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.651727676391602, - "loss_rtd": 0.2846134603023529, - "loss_sent": 0.1086808368563652, - "loss_sod": 0.020908888429403305, - "loss_total": 0.4142031967639923, - "step": 148999 - }, - { - "epoch": 0.012, - "grad_norm": 1.1462866067886353, - "learning_rate": 5.111088998684791e-05, - "loss": 0.5132, - "step": 149000 - }, - { - "epoch": 0.012, - "eval_loss": 0.49717065691947937, - "eval_runtime": 150.5412, - "eval_samples_per_second": 102.583, - "eval_steps_per_second": 0.804, - "step": 149000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.183509826660156, - "loss_rtd": 0.2786847651004791, - "loss_sent": 0.18089258670806885, - "loss_sod": 0.02995058335363865, - "loss_total": 0.48952794075012207, - "step": 149099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.1459479331970215, - "loss_rtd": 0.2758665978908539, - "loss_sent": 0.38989973068237305, - "loss_sod": 0.061380039900541306, - "loss_total": 0.7271463871002197, - "step": 149099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.0846115350723267, - "learning_rate": 5.107916433933847e-05, - "loss": 0.5286, - "step": 149100 - }, - { - "epoch": 0.012398, - "loss_gen": 4.653554916381836, - "loss_rtd": 0.2821529507637024, - "loss_sent": 4.9881629820447415e-05, - "loss_sod": 0.17509253323078156, - "loss_total": 0.45729535818099976, - "step": 149199 - }, - { - "epoch": 0.012398, - "loss_gen": 4.378239154815674, - "loss_rtd": 0.24975354969501495, - "loss_sent": 4.61439231003169e-05, - "loss_sod": 0.0695633590221405, - "loss_total": 0.3193630576133728, - "step": 149199 - }, - { - "epoch": 0.0124, - "grad_norm": 0.6731809377670288, - "learning_rate": 5.104743825714175e-05, - "loss": 0.5074, - "step": 149200 - }, - { - "epoch": 0.012598, - "loss_gen": 4.971227169036865, - "loss_rtd": 0.28132012486457825, - "loss_sent": 0.20047783851623535, - "loss_sod": 0.005067291669547558, - "loss_total": 0.48686525225639343, - "step": 149299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.21154260635376, - "loss_rtd": 0.27386224269866943, - "loss_sent": 0.12603174149990082, - "loss_sod": 0.047394100576639175, - "loss_total": 0.4472880959510803, - "step": 149299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.994850218296051, - "learning_rate": 5.101571175303704e-05, - "loss": 0.505, - "step": 149300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.278228282928467, - "loss_rtd": 0.26508253812789917, - "loss_sent": 0.2426879107952118, - "loss_sod": 0.026061663404107094, - "loss_total": 0.5338320732116699, - "step": 149399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.448014259338379, - "loss_rtd": 0.2663023769855499, - "loss_sent": 0.11761312186717987, - "loss_sod": 0.034651800990104675, - "loss_total": 0.4185672998428345, - "step": 149399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.8637998104095459, - "learning_rate": 5.0983984839803746e-05, - "loss": 0.5073, - "step": 149400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.440591812133789, - "loss_rtd": 0.25507181882858276, - "loss_sent": 0.024109583348035812, - "loss_sod": 0.1100204810500145, - "loss_total": 0.3892018795013428, - "step": 149499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.3086419105529785, - "loss_rtd": 0.294219046831131, - "loss_sent": 0.23900176584720612, - "loss_sod": 0.04122648388147354, - "loss_total": 0.5744472742080688, - "step": 149499 - }, - { - "epoch": 0.013, - "grad_norm": 1.1947880983352661, - "learning_rate": 5.095225753022149e-05, - "loss": 0.5202, - "step": 149500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.865903854370117, - "loss_rtd": 0.2748209238052368, - "loss_sent": 0.00433766096830368, - "loss_sod": 0.23390433192253113, - "loss_total": 0.5130629539489746, - "step": 149599 - }, - { - "epoch": 0.013198, - "loss_gen": 4.590498924255371, - "loss_rtd": 0.2770313024520874, - "loss_sent": 0.026584025472402573, - "loss_sod": 0.059929199516773224, - "loss_total": 0.3635445237159729, - "step": 149599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.8740405440330505, - "learning_rate": 5.092052983707e-05, - "loss": 0.4994, - "step": 149600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.447321891784668, - "loss_rtd": 0.25785496830940247, - "loss_sent": 0.41757288575172424, - "loss_sod": 0.07028576731681824, - "loss_total": 0.7457135915756226, - "step": 149699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.204119682312012, - "loss_rtd": 0.2789541184902191, - "loss_sent": 0.13769857585430145, - "loss_sod": 0.08741885423660278, - "loss_total": 0.5040715336799622, - "step": 149699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.4651234149932861, - "learning_rate": 5.088880177312921e-05, - "loss": 0.515, - "step": 149700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.082386016845703, - "loss_rtd": 0.3076334297657013, - "loss_sent": 0.1974334418773651, - "loss_sod": 0.05499216169118881, - "loss_total": 0.5600590705871582, - "step": 149799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.488828182220459, - "loss_rtd": 0.2964935600757599, - "loss_sent": 0.17566673457622528, - "loss_sod": 0.041144777089357376, - "loss_total": 0.5133050680160522, - "step": 149799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.3623194694519043, - "learning_rate": 5.0857073351179166e-05, - "loss": 0.5094, - "step": 149800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.439533710479736, - "loss_rtd": 0.2903883159160614, - "loss_sent": 0.2238389253616333, - "loss_sod": 0.12228512018918991, - "loss_total": 0.6365123987197876, - "step": 149899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.538571834564209, - "loss_rtd": 0.26950180530548096, - "loss_sent": 0.43811267614364624, - "loss_sod": 0.03304041177034378, - "loss_total": 0.7406548857688904, - "step": 149899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.3348768949508667, - "learning_rate": 5.082534458400009e-05, - "loss": 0.5455, - "step": 149900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.461939334869385, - "loss_rtd": 0.2724016606807709, - "loss_sent": 0.2284865826368332, - "loss_sod": 0.029714003205299377, - "loss_total": 0.5306022763252258, - "step": 149999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.542205333709717, - "loss_rtd": 0.27906474471092224, - "loss_sent": 0.43526676297187805, - "loss_sod": 0.011782418936491013, - "loss_total": 0.7261139154434204, - "step": 149999 - }, - { - "epoch": 0.014, - "grad_norm": 1.2709500789642334, - "learning_rate": 5.07936154843723e-05, - "loss": 0.5119, - "step": 150000 - }, - { - "epoch": 0.014, - "eval_loss": 0.5034695863723755, - "eval_runtime": 150.354, - "eval_samples_per_second": 102.711, - "eval_steps_per_second": 0.805, - "step": 150000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.530719757080078, - "loss_rtd": 0.28405705094337463, - "loss_sent": 0.41221487522125244, - "loss_sod": 0.06176081299781799, - "loss_total": 0.7580327391624451, - "step": 150099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.354163646697998, - "loss_rtd": 0.2832927405834198, - "loss_sent": 0.12678612768650055, - "loss_sod": 0.08215628564357758, - "loss_total": 0.49223512411117554, - "step": 150099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.3871103525161743, - "learning_rate": 5.07618860650763e-05, - "loss": 0.5174, - "step": 150100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.387948989868164, - "loss_rtd": 0.2604489028453827, - "loss_sent": 0.0833883062005043, - "loss_sod": 0.1408933401107788, - "loss_total": 0.4847305417060852, - "step": 150199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.349581241607666, - "loss_rtd": 0.27376511693000793, - "loss_sent": 0.32408303022384644, - "loss_sod": 0.011156159453094006, - "loss_total": 0.6090043187141418, - "step": 150199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.3786460161209106, - "learning_rate": 5.0730156338892675e-05, - "loss": 0.5, - "step": 150200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.469412326812744, - "loss_rtd": 0.277353435754776, - "loss_sent": 0.15125852823257446, - "loss_sod": 0.018494347110390663, - "loss_total": 0.4471063017845154, - "step": 150299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.344174385070801, - "loss_rtd": 0.27286669611930847, - "loss_sent": 0.11355658620595932, - "loss_sod": 0.06565544009208679, - "loss_total": 0.4520787298679352, - "step": 150299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.7398344278335571, - "learning_rate": 5.0698426318602167e-05, - "loss": 0.5134, - "step": 150300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.567254066467285, - "loss_rtd": 0.2901189625263214, - "loss_sent": 0.08586336672306061, - "loss_sod": 0.0980837270617485, - "loss_total": 0.4740660786628723, - "step": 150399 - }, - { - "epoch": 0.014798, - "loss_gen": 4.967702865600586, - "loss_rtd": 0.2737525701522827, - "loss_sent": 0.025388075038790703, - "loss_sod": 0.06181246042251587, - "loss_total": 0.36095309257507324, - "step": 150399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.6185212135314941, - "learning_rate": 5.0666696016985616e-05, - "loss": 0.498, - "step": 150400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.319883346557617, - "loss_rtd": 0.2838747203350067, - "loss_sent": 0.09052863717079163, - "loss_sod": 0.07014872878789902, - "loss_total": 0.44455209374427795, - "step": 150499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.142828464508057, - "loss_rtd": 0.28203245997428894, - "loss_sent": 0.31190669536590576, - "loss_sod": 0.01716822385787964, - "loss_total": 0.611107349395752, - "step": 150499 - }, - { - "epoch": 0.015, - "grad_norm": 0.7125792503356934, - "learning_rate": 5.0634965446824e-05, - "loss": 0.5025, - "step": 150500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.298783302307129, - "loss_rtd": 0.2770278751850128, - "loss_sent": 0.15093576908111572, - "loss_sod": 0.05704700946807861, - "loss_total": 0.48501065373420715, - "step": 150599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.3972296714782715, - "loss_rtd": 0.2908611595630646, - "loss_sent": 0.11442862451076508, - "loss_sod": 0.08333995938301086, - "loss_total": 0.4886297583580017, - "step": 150599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.4157239198684692, - "learning_rate": 5.060323462089839e-05, - "loss": 0.5384, - "step": 150600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.017396926879883, - "loss_rtd": 0.2765372097492218, - "loss_sent": 0.10029003024101257, - "loss_sod": 0.1045127734541893, - "loss_total": 0.4813400208950043, - "step": 150699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.340902805328369, - "loss_rtd": 0.2603532075881958, - "loss_sent": 3.807917164522223e-05, - "loss_sod": 0.21225422620773315, - "loss_total": 0.47264552116394043, - "step": 150699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.9066276550292969, - "learning_rate": 5.057150355198992e-05, - "loss": 0.5255, - "step": 150700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.14680290222168, - "loss_rtd": 0.27704185247421265, - "loss_sent": 0.08824780583381653, - "loss_sod": 0.09765308350324631, - "loss_total": 0.4629427492618561, - "step": 150799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.663311004638672, - "loss_rtd": 0.26936689019203186, - "loss_sent": 0.1907847821712494, - "loss_sod": 0.09802541136741638, - "loss_total": 0.55817711353302, - "step": 150799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.303207516670227, - "learning_rate": 5.05397722528799e-05, - "loss": 0.5117, - "step": 150800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.496127128601074, - "loss_rtd": 0.29871439933776855, - "loss_sent": 0.15010574460029602, - "loss_sod": 0.11930060386657715, - "loss_total": 0.5681207180023193, - "step": 150899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.232115268707275, - "loss_rtd": 0.25799593329429626, - "loss_sent": 0.10728186368942261, - "loss_sod": 0.015332083217799664, - "loss_total": 0.3806098699569702, - "step": 150899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.2439154386520386, - "learning_rate": 5.050804073634967e-05, - "loss": 0.5134, - "step": 150900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.270485877990723, - "loss_rtd": 0.26713284850120544, - "loss_sent": 0.28044942021369934, - "loss_sod": 0.015043208375573158, - "loss_total": 0.5626254677772522, - "step": 150999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.34695291519165, - "loss_rtd": 0.287723183631897, - "loss_sent": 0.09541434794664383, - "loss_sod": 0.052663177251815796, - "loss_total": 0.435800701379776, - "step": 150999 - }, - { - "epoch": 0.016, - "grad_norm": 0.6401495337486267, - "learning_rate": 5.047630901518071e-05, - "loss": 0.5206, - "step": 151000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4957713484764099, - "eval_runtime": 150.2029, - "eval_samples_per_second": 102.814, - "eval_steps_per_second": 0.806, - "step": 151000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.460110664367676, - "loss_rtd": 0.273301362991333, - "loss_sent": 0.43311354517936707, - "loss_sod": 0.028535090386867523, - "loss_total": 0.7349500060081482, - "step": 151099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.245303630828857, - "loss_rtd": 0.2648560404777527, - "loss_sent": 0.10785567760467529, - "loss_sod": 0.03306739404797554, - "loss_total": 0.4057791233062744, - "step": 151099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.4352444410324097, - "learning_rate": 5.044457710215452e-05, - "loss": 0.5203, - "step": 151100 - }, - { - "epoch": 0.016398, - "loss_gen": 4.798547744750977, - "loss_rtd": 0.2782033085823059, - "loss_sent": 0.00013255204248707741, - "loss_sod": 0.06532851606607437, - "loss_total": 0.34366437792778015, - "step": 151199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.926626682281494, - "loss_rtd": 0.26169103384017944, - "loss_sent": 0.00042506129830144346, - "loss_sod": 0.22389405965805054, - "loss_total": 0.4860101640224457, - "step": 151199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.8608229756355286, - "learning_rate": 5.041284501005273e-05, - "loss": 0.5167, - "step": 151200 - }, - { - "epoch": 0.016598, - "loss_gen": 4.439567565917969, - "loss_rtd": 0.2428324818611145, - "loss_sent": 0.045875802636146545, - "loss_sod": 0.1477464884519577, - "loss_total": 0.43645477294921875, - "step": 151299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.37948751449585, - "loss_rtd": 0.28312432765960693, - "loss_sent": 0.08464525640010834, - "loss_sod": 0.15120011568069458, - "loss_total": 0.518969714641571, - "step": 151299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.1371779441833496, - "learning_rate": 5.038111275165702e-05, - "loss": 0.5127, - "step": 151300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.02803897857666, - "loss_rtd": 0.2808513641357422, - "loss_sent": 0.16122612357139587, - "loss_sod": 0.01682141050696373, - "loss_total": 0.4588989019393921, - "step": 151399 - }, - { - "epoch": 0.016798, - "loss_gen": 4.860019207000732, - "loss_rtd": 0.2671452760696411, - "loss_sent": 0.07295264303684235, - "loss_sod": 0.08556990325450897, - "loss_total": 0.42566782236099243, - "step": 151399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.8448479771614075, - "learning_rate": 5.034938033974915e-05, - "loss": 0.5177, - "step": 151400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.632369518280029, - "loss_rtd": 0.2392517626285553, - "loss_sent": 0.0006165798986330628, - "loss_sod": 0.11644583940505981, - "loss_total": 0.35631418228149414, - "step": 151499 - }, - { - "epoch": 0.016998, - "loss_gen": 4.526658058166504, - "loss_rtd": 0.2356649935245514, - "loss_sent": 0.00011681746400427073, - "loss_sod": 0.2846408188343048, - "loss_total": 0.520422637462616, - "step": 151499 - }, - { - "epoch": 0.017, - "grad_norm": 1.3764458894729614, - "learning_rate": 5.031764778711091e-05, - "loss": 0.5221, - "step": 151500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.24907112121582, - "loss_rtd": 0.24610601365566254, - "loss_sent": 0.5204124450683594, - "loss_sod": 0.01562754064798355, - "loss_total": 0.7821459770202637, - "step": 151599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.313752174377441, - "loss_rtd": 0.28422072529792786, - "loss_sent": 0.10614383965730667, - "loss_sod": 0.05076826736330986, - "loss_total": 0.4411328136920929, - "step": 151599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.5663731098175049, - "learning_rate": 5.0285915106524185e-05, - "loss": 0.521, - "step": 151600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.389764785766602, - "loss_rtd": 0.2796376049518585, - "loss_sent": 0.1030566394329071, - "loss_sod": 0.026172231882810593, - "loss_total": 0.4088664650917053, - "step": 151699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.174036502838135, - "loss_rtd": 0.2925577163696289, - "loss_sent": 0.17829322814941406, - "loss_sod": 0.05444139987230301, - "loss_total": 0.5252923369407654, - "step": 151699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.7920127511024475, - "learning_rate": 5.025418231077088e-05, - "loss": 0.5024, - "step": 151700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.434704780578613, - "loss_rtd": 0.2754076421260834, - "loss_sent": 0.105732262134552, - "loss_sod": 0.09751874208450317, - "loss_total": 0.47865864634513855, - "step": 151799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.483138084411621, - "loss_rtd": 0.27305567264556885, - "loss_sent": 0.30992719531059265, - "loss_sod": 0.19430087506771088, - "loss_total": 0.777283787727356, - "step": 151799 - }, - { - "epoch": 0.0176, - "grad_norm": 2.0217292308807373, - "learning_rate": 5.022244941263298e-05, - "loss": 0.5311, - "step": 151800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.1941399574279785, - "loss_rtd": 0.27134954929351807, - "loss_sent": 0.062282513827085495, - "loss_sod": 0.04314194619655609, - "loss_total": 0.37677401304244995, - "step": 151899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.215184688568115, - "loss_rtd": 0.2767806649208069, - "loss_sent": 0.2362992763519287, - "loss_sod": 0.012753710150718689, - "loss_total": 0.5258336663246155, - "step": 151899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.8731580972671509, - "learning_rate": 5.019071642489248e-05, - "loss": 0.5189, - "step": 151900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.417120456695557, - "loss_rtd": 0.28732550144195557, - "loss_sent": 0.3475547730922699, - "loss_sod": 0.04580339789390564, - "loss_total": 0.6806836724281311, - "step": 151999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.299704551696777, - "loss_rtd": 0.28852763772010803, - "loss_sent": 0.15768270194530487, - "loss_sod": 0.05578818917274475, - "loss_total": 0.5019985437393188, - "step": 151999 - }, - { - "epoch": 0.018, - "grad_norm": 0.7697166800498962, - "learning_rate": 5.0158983360331426e-05, - "loss": 0.5078, - "step": 152000 - }, - { - "epoch": 0.018, - "eval_loss": 0.4934936463832855, - "eval_runtime": 149.9582, - "eval_samples_per_second": 102.982, - "eval_steps_per_second": 0.807, - "step": 152000 - }, - { - "epoch": 0.018198, - "loss_gen": 4.863179683685303, - "loss_rtd": 0.2728963792324066, - "loss_sent": 0.0338527075946331, - "loss_sod": 0.016788605600595474, - "loss_total": 0.323537677526474, - "step": 152099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.3263959884643555, - "loss_rtd": 0.26920780539512634, - "loss_sent": 0.08819323778152466, - "loss_sod": 0.02479766495525837, - "loss_total": 0.3821987211704254, - "step": 152099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.6549220085144043, - "learning_rate": 5.012725023173189e-05, - "loss": 0.514, - "step": 152100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.352543354034424, - "loss_rtd": 0.297717422246933, - "loss_sent": 0.15490016341209412, - "loss_sod": 0.07908543199300766, - "loss_total": 0.531702995300293, - "step": 152199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.592075347900391, - "loss_rtd": 0.28803884983062744, - "loss_sent": 0.0869017019867897, - "loss_sod": 0.024519015103578568, - "loss_total": 0.399459570646286, - "step": 152199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.9082280397415161, - "learning_rate": 5.009551705187599e-05, - "loss": 0.5309, - "step": 152200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.977872371673584, - "loss_rtd": 0.2900993525981903, - "loss_sent": 0.12147052586078644, - "loss_sod": 0.027479641139507294, - "loss_total": 0.43904954195022583, - "step": 152299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.130236625671387, - "loss_rtd": 0.30054759979248047, - "loss_sent": 0.47358593344688416, - "loss_sod": 0.18136243522167206, - "loss_total": 0.9554959535598755, - "step": 152299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.6463603973388672, - "learning_rate": 5.006378383354582e-05, - "loss": 0.5216, - "step": 152300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.2921462059021, - "loss_rtd": 0.2681122124195099, - "loss_sent": 0.17086376249790192, - "loss_sod": 0.04019676148891449, - "loss_total": 0.4791727066040039, - "step": 152399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.256403923034668, - "loss_rtd": 0.28217169642448425, - "loss_sent": 0.07587186992168427, - "loss_sod": 0.043038398027420044, - "loss_total": 0.40108197927474976, - "step": 152399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.5022106170654297, - "learning_rate": 5.0032050589523535e-05, - "loss": 0.5044, - "step": 152400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.432286739349365, - "loss_rtd": 0.2685067057609558, - "loss_sent": 0.5534102916717529, - "loss_sod": 0.040103763341903687, - "loss_total": 0.86202073097229, - "step": 152499 - }, - { - "epoch": 0.018998, - "loss_gen": 4.681586265563965, - "loss_rtd": 0.2693133056163788, - "loss_sent": 0.0029743232298642397, - "loss_sod": 0.0748993381857872, - "loss_total": 0.34718698263168335, - "step": 152499 - }, - { - "epoch": 0.019, - "grad_norm": 2.022700071334839, - "learning_rate": 5.000031733259127e-05, - "loss": 0.5118, - "step": 152500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.357352256774902, - "loss_rtd": 0.28818804025650024, - "loss_sent": 0.2510608434677124, - "loss_sod": 0.0101924492046237, - "loss_total": 0.5494413375854492, - "step": 152599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.540969371795654, - "loss_rtd": 0.27876439690589905, - "loss_sent": 0.19681772589683533, - "loss_sod": 0.07287005335092545, - "loss_total": 0.5484521389007568, - "step": 152599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.7219185829162598, - "learning_rate": 4.996858407553119e-05, - "loss": 0.4923, - "step": 152600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.536776542663574, - "loss_rtd": 0.2535674273967743, - "loss_sent": 0.00014156920951791108, - "loss_sod": 0.1365301012992859, - "loss_total": 0.39023908972740173, - "step": 152699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.447594165802002, - "loss_rtd": 0.24667733907699585, - "loss_sent": 0.0001558194198878482, - "loss_sod": 0.05303407460451126, - "loss_total": 0.2998672127723694, - "step": 152699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.890343964099884, - "learning_rate": 4.9936850831125434e-05, - "loss": 0.5257, - "step": 152700 - }, - { - "epoch": 0.019598, - "loss_gen": 4.976705551147461, - "loss_rtd": 0.2726480960845947, - "loss_sent": 0.019405441358685493, - "loss_sod": 0.0301898792386055, - "loss_total": 0.32224342226982117, - "step": 152799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.595449924468994, - "loss_rtd": 0.2642533481121063, - "loss_sent": 0.0005791126750409603, - "loss_sod": 0.12613129615783691, - "loss_total": 0.39096376299858093, - "step": 152799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.738387405872345, - "learning_rate": 4.990511761215617e-05, - "loss": 0.5046, - "step": 152800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.384093761444092, - "loss_rtd": 0.26410719752311707, - "loss_sent": 0.35313689708709717, - "loss_sod": 0.04660925641655922, - "loss_total": 0.6638533473014832, - "step": 152899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.380703449249268, - "loss_rtd": 0.27725425362586975, - "loss_sent": 0.9195929169654846, - "loss_sod": 0.12258525937795639, - "loss_total": 1.3194324970245361, - "step": 152899 - }, - { - "epoch": 0.0198, - "grad_norm": 5.63961124420166, - "learning_rate": 4.987338443140552e-05, - "loss": 0.5053, - "step": 152900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.3890862464904785, - "loss_rtd": 0.27779167890548706, - "loss_sent": 0.1700344681739807, - "loss_sod": 0.06807170063257217, - "loss_total": 0.5158978700637817, - "step": 152999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.191761016845703, - "loss_rtd": 0.2886083722114563, - "loss_sent": 0.1861131340265274, - "loss_sod": 0.005172345787286758, - "loss_total": 0.47989386320114136, - "step": 152999 - }, - { - "epoch": 0.02, - "grad_norm": 1.5369865894317627, - "learning_rate": 4.9841651301655585e-05, - "loss": 0.5132, - "step": 153000 - }, - { - "epoch": 0.02, - "eval_loss": 0.4946920573711395, - "eval_runtime": 150.4304, - "eval_samples_per_second": 102.659, - "eval_steps_per_second": 0.804, - "step": 153000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.107113361358643, - "loss_rtd": 0.2785392999649048, - "loss_sent": 0.5377865433692932, - "loss_sod": 0.03649866580963135, - "loss_total": 0.8528245091438293, - "step": 153099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.097657203674316, - "loss_rtd": 0.2691618502140045, - "loss_sent": 0.19406843185424805, - "loss_sod": 0.11953267455101013, - "loss_total": 0.5827629566192627, - "step": 153099 - }, - { - "epoch": 0.0202, - "grad_norm": 2.1632916927337646, - "learning_rate": 4.9809918235688505e-05, - "loss": 0.5114, - "step": 153100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.302831649780273, - "loss_rtd": 0.28545865416526794, - "loss_sent": 0.29269009828567505, - "loss_sod": 0.007460000459104776, - "loss_total": 0.585608720779419, - "step": 153199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.287594318389893, - "loss_rtd": 0.278229296207428, - "loss_sent": 0.1570609211921692, - "loss_sod": 0.030832931399345398, - "loss_total": 0.46612316370010376, - "step": 153199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.0404207706451416, - "learning_rate": 4.9778185246286325e-05, - "loss": 0.5202, - "step": 153200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.5488972663879395, - "loss_rtd": 0.271356999874115, - "loss_sent": 0.37172770500183105, - "loss_sod": 0.1260555535554886, - "loss_total": 0.7691402435302734, - "step": 153299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.269132614135742, - "loss_rtd": 0.2762696146965027, - "loss_sent": 0.1427033245563507, - "loss_sod": 0.024955397471785545, - "loss_total": 0.4439283311367035, - "step": 153299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.0403190851211548, - "learning_rate": 4.974645234623111e-05, - "loss": 0.5211, - "step": 153300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.068711280822754, - "loss_rtd": 0.2830943763256073, - "loss_sent": 0.030563218519091606, - "loss_sod": 0.032533757388591766, - "loss_total": 0.3461913466453552, - "step": 153399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.301641941070557, - "loss_rtd": 0.28749141097068787, - "loss_sent": 0.30285027623176575, - "loss_sod": 0.12032678723335266, - "loss_total": 0.7106684446334839, - "step": 153399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.9960215091705322, - "learning_rate": 4.971471954830485e-05, - "loss": 0.5248, - "step": 153400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.454285621643066, - "loss_rtd": 0.2725071310997009, - "loss_sent": 0.31986936926841736, - "loss_sod": 0.08787284046411514, - "loss_total": 0.6802493333816528, - "step": 153499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.364346504211426, - "loss_rtd": 0.29394635558128357, - "loss_sent": 0.11776778846979141, - "loss_sod": 0.08728210628032684, - "loss_total": 0.49899622797966003, - "step": 153499 - }, - { - "epoch": 0.021, - "grad_norm": 0.9756061434745789, - "learning_rate": 4.968298686528953e-05, - "loss": 0.5136, - "step": 153500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.274739742279053, - "loss_rtd": 0.29549235105514526, - "loss_sent": 0.16786660254001617, - "loss_sod": 0.06539119780063629, - "loss_total": 0.5287501215934753, - "step": 153599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.181947708129883, - "loss_rtd": 0.28288233280181885, - "loss_sent": 0.22003786265850067, - "loss_sod": 0.03190217167139053, - "loss_total": 0.5348223447799683, - "step": 153599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.8788776397705078, - "learning_rate": 4.9651254309967056e-05, - "loss": 0.5193, - "step": 153600 - }, - { - "epoch": 0.021398, - "loss_gen": 4.863471984863281, - "loss_rtd": 0.27452632784843445, - "loss_sent": 0.3187580108642578, - "loss_sod": 0.012509215623140335, - "loss_total": 0.6057935953140259, - "step": 153699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.246292591094971, - "loss_rtd": 0.2921764552593231, - "loss_sent": 0.1435055285692215, - "loss_sod": 0.06779582798480988, - "loss_total": 0.5034778118133545, - "step": 153699 - }, - { - "epoch": 0.0214, - "grad_norm": 2.0051217079162598, - "learning_rate": 4.961952189511931e-05, - "loss": 0.5206, - "step": 153700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.377110481262207, - "loss_rtd": 0.28640779852867126, - "loss_sent": 0.1573113650083542, - "loss_sod": 0.024247044697403908, - "loss_total": 0.4679661989212036, - "step": 153799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.602847099304199, - "loss_rtd": 0.2789478600025177, - "loss_sent": 0.14849574863910675, - "loss_sod": 0.09215997159481049, - "loss_total": 0.5196035504341125, - "step": 153799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.800870656967163, - "learning_rate": 4.958778963352809e-05, - "loss": 0.5314, - "step": 153800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.2421793937683105, - "loss_rtd": 0.2638772428035736, - "loss_sent": 0.6456987261772156, - "loss_sod": 0.04866240918636322, - "loss_total": 0.9582383632659912, - "step": 153899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.094288349151611, - "loss_rtd": 0.265754371881485, - "loss_sent": 0.06336957216262817, - "loss_sod": 0.08266130834817886, - "loss_total": 0.4117852449417114, - "step": 153899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.552384376525879, - "learning_rate": 4.9556057537975176e-05, - "loss": 0.5135, - "step": 153900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.359681606292725, - "loss_rtd": 0.2835022211074829, - "loss_sent": 0.252536416053772, - "loss_sod": 0.08727392554283142, - "loss_total": 0.6233125925064087, - "step": 153999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.315378189086914, - "loss_rtd": 0.2870238423347473, - "loss_sent": 0.23657800257205963, - "loss_sod": 0.10807448625564575, - "loss_total": 0.6316763162612915, - "step": 153999 - }, - { - "epoch": 0.022, - "grad_norm": 0.8836574554443359, - "learning_rate": 4.952432562124221e-05, - "loss": 0.5131, - "step": 154000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4933038651943207, - "eval_runtime": 150.2357, - "eval_samples_per_second": 102.792, - "eval_steps_per_second": 0.805, - "step": 154000 - }, - { - "epoch": 0.022198, - "loss_gen": 4.809384346008301, - "loss_rtd": 0.2640708386898041, - "loss_sent": 3.874971662298776e-05, - "loss_sod": 0.13840627670288086, - "loss_total": 0.40251585841178894, - "step": 154099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.730175495147705, - "loss_rtd": 0.27670857310295105, - "loss_sent": 0.003312483662739396, - "loss_sod": 0.09407520294189453, - "loss_total": 0.3740962743759155, - "step": 154099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.8724492788314819, - "learning_rate": 4.9492593896110845e-05, - "loss": 0.5117, - "step": 154100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.300963401794434, - "loss_rtd": 0.2905631959438324, - "loss_sent": 0.29418423771858215, - "loss_sod": 0.023533185943961143, - "loss_total": 0.6082806587219238, - "step": 154199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.291253089904785, - "loss_rtd": 0.2760257422924042, - "loss_sent": 0.4496966004371643, - "loss_sod": 0.04876187443733215, - "loss_total": 0.7744842171669006, - "step": 154199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.068644404411316, - "learning_rate": 4.9460862375362585e-05, - "loss": 0.5162, - "step": 154200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.715968608856201, - "loss_rtd": 0.26204782724380493, - "loss_sent": 0.00037177972262725234, - "loss_sod": 0.19392526149749756, - "loss_total": 0.456344872713089, - "step": 154299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.791236877441406, - "loss_rtd": 0.2713673710823059, - "loss_sent": 0.09709008038043976, - "loss_sod": 0.034875400364398956, - "loss_total": 0.40333282947540283, - "step": 154299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.7773022651672363, - "learning_rate": 4.942913107177891e-05, - "loss": 0.5188, - "step": 154300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.160711288452148, - "loss_rtd": 0.26806074380874634, - "loss_sent": 0.3172636032104492, - "loss_sod": 0.05338805168867111, - "loss_total": 0.6387124061584473, - "step": 154399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.194608688354492, - "loss_rtd": 0.2792544960975647, - "loss_sent": 0.302408903837204, - "loss_sod": 0.07118754088878632, - "loss_total": 0.6528509855270386, - "step": 154399 - }, - { - "epoch": 0.0228, - "grad_norm": 2.052335500717163, - "learning_rate": 4.939739999814115e-05, - "loss": 0.5066, - "step": 154400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.819077968597412, - "loss_rtd": 0.251863032579422, - "loss_sent": 0.2087668478488922, - "loss_sod": 0.11115151643753052, - "loss_total": 0.5717813968658447, - "step": 154499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.3396806716918945, - "loss_rtd": 0.28426268696784973, - "loss_sent": 0.17901720106601715, - "loss_sod": 0.08635630458593369, - "loss_total": 0.54963618516922, - "step": 154499 - }, - { - "epoch": 0.023, - "grad_norm": 1.043086051940918, - "learning_rate": 4.936566916723062e-05, - "loss": 0.5229, - "step": 154500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.2737812995910645, - "loss_rtd": 0.2666214406490326, - "loss_sent": 0.17710836231708527, - "loss_sod": 0.0495685413479805, - "loss_total": 0.49329835176467896, - "step": 154599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.154017448425293, - "loss_rtd": 0.2650807499885559, - "loss_sent": 0.24059459567070007, - "loss_sod": 0.07724697887897491, - "loss_total": 0.5829223394393921, - "step": 154599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.197387456893921, - "learning_rate": 4.933393859182847e-05, - "loss": 0.4993, - "step": 154600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.324120044708252, - "loss_rtd": 0.26337793469429016, - "loss_sent": 0.1251501590013504, - "loss_sod": 0.03473785147070885, - "loss_total": 0.4232659339904785, - "step": 154699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.492222309112549, - "loss_rtd": 0.2757542133331299, - "loss_sent": 0.16035082936286926, - "loss_sod": 0.24490724503993988, - "loss_total": 0.6810122728347778, - "step": 154699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.3781133890151978, - "learning_rate": 4.930220828471576e-05, - "loss": 0.5257, - "step": 154700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.526704788208008, - "loss_rtd": 0.2778828740119934, - "loss_sent": 0.3450658619403839, - "loss_sod": 0.01861502230167389, - "loss_total": 0.6415637731552124, - "step": 154799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.400487899780273, - "loss_rtd": 0.2804326117038727, - "loss_sent": 0.3724420666694641, - "loss_sod": 0.13108769059181213, - "loss_total": 0.7839623689651489, - "step": 154799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.3151750564575195, - "learning_rate": 4.927047825867348e-05, - "loss": 0.506, - "step": 154800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.331615924835205, - "loss_rtd": 0.26284393668174744, - "loss_sent": 0.28544846177101135, - "loss_sod": 0.05874761939048767, - "loss_total": 0.6070400476455688, - "step": 154899 - }, - { - "epoch": 0.023798, - "loss_gen": 4.914802551269531, - "loss_rtd": 0.26309871673583984, - "loss_sent": 0.15676219761371613, - "loss_sod": 0.014258254319429398, - "loss_total": 0.43411916494369507, - "step": 154899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.1727299690246582, - "learning_rate": 4.923874852648247e-05, - "loss": 0.5267, - "step": 154900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.204455375671387, - "loss_rtd": 0.26971036195755005, - "loss_sent": 0.06897718459367752, - "loss_sod": 0.13732947409152985, - "loss_total": 0.4760169982910156, - "step": 154999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.415859699249268, - "loss_rtd": 0.2773853838443756, - "loss_sent": 0.20107945799827576, - "loss_sod": 0.05754546821117401, - "loss_total": 0.5360102653503418, - "step": 154999 - }, - { - "epoch": 0.024, - "grad_norm": 1.0946190357208252, - "learning_rate": 4.920701910092347e-05, - "loss": 0.5325, - "step": 155000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4920775592327118, - "eval_runtime": 150.2738, - "eval_samples_per_second": 102.766, - "eval_steps_per_second": 0.805, - "step": 155000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.494928359985352, - "loss_rtd": 0.2546127140522003, - "loss_sent": 0.0256333164870739, - "loss_sod": 0.1090117022395134, - "loss_total": 0.3892577290534973, - "step": 155099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.4429802894592285, - "loss_rtd": 0.2867959439754486, - "loss_sent": 0.08887942135334015, - "loss_sod": 0.01610971800982952, - "loss_total": 0.3917850852012634, - "step": 155099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.8489281535148621, - "learning_rate": 4.917528999477706e-05, - "loss": 0.5062, - "step": 155100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.064107418060303, - "loss_rtd": 0.27549663186073303, - "loss_sent": 0.3441270589828491, - "loss_sod": 0.008993230760097504, - "loss_total": 0.6286169290542603, - "step": 155199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.197736740112305, - "loss_rtd": 0.2571727931499481, - "loss_sent": 0.11452917009592056, - "loss_sod": 0.07485997676849365, - "loss_total": 0.44656193256378174, - "step": 155199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.1755305528640747, - "learning_rate": 4.914356122082376e-05, - "loss": 0.5236, - "step": 155200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.632621765136719, - "loss_rtd": 0.27535516023635864, - "loss_sent": 0.2456662356853485, - "loss_sod": 0.04502054303884506, - "loss_total": 0.5660419464111328, - "step": 155299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.261656761169434, - "loss_rtd": 0.2716114819049835, - "loss_sent": 0.07773374766111374, - "loss_sod": 0.056362785398960114, - "loss_total": 0.4057080149650574, - "step": 155299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.1455860137939453, - "learning_rate": 4.911183279184389e-05, - "loss": 0.5075, - "step": 155300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.270544528961182, - "loss_rtd": 0.27206915616989136, - "loss_sent": 0.11627724766731262, - "loss_sod": 0.014066686853766441, - "loss_total": 0.40241310000419617, - "step": 155399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.643050670623779, - "loss_rtd": 0.26377764344215393, - "loss_sent": 0.0007975984481163323, - "loss_sod": 0.05068105086684227, - "loss_total": 0.3152562975883484, - "step": 155399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.745100200176239, - "learning_rate": 4.908010472061767e-05, - "loss": 0.5001, - "step": 155400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.607298851013184, - "loss_rtd": 0.2740238308906555, - "loss_sent": 0.7319679260253906, - "loss_sod": 0.12371865659952164, - "loss_total": 1.1297104358673096, - "step": 155499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.345309734344482, - "loss_rtd": 0.2741493582725525, - "loss_sent": 0.1308216154575348, - "loss_sod": 0.05069271847605705, - "loss_total": 0.45566368103027344, - "step": 155499 - }, - { - "epoch": 0.025, - "grad_norm": 3.0212907791137695, - "learning_rate": 4.9048377019925156e-05, - "loss": 0.5232, - "step": 155500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.159890174865723, - "loss_rtd": 0.277555912733078, - "loss_sent": 0.12425858527421951, - "loss_sod": 0.04284006729722023, - "loss_total": 0.44465455412864685, - "step": 155599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.168512344360352, - "loss_rtd": 0.2913990020751953, - "loss_sent": 0.11198101192712784, - "loss_sod": 0.035088904201984406, - "loss_total": 0.43846890330314636, - "step": 155599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.1938484907150269, - "learning_rate": 4.901664970254627e-05, - "loss": 0.5352, - "step": 155600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.350635051727295, - "loss_rtd": 0.26318657398223877, - "loss_sent": 0.09457547217607498, - "loss_sod": 0.04108530655503273, - "loss_total": 0.3988473415374756, - "step": 155699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.57899808883667, - "loss_rtd": 0.2769685983657837, - "loss_sent": 0.035828847438097, - "loss_sod": 0.0684489831328392, - "loss_total": 0.3812464475631714, - "step": 155699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.9996123313903809, - "learning_rate": 4.898492278126076e-05, - "loss": 0.4948, - "step": 155700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.611931800842285, - "loss_rtd": 0.26654088497161865, - "loss_sent": 3.4519805922172964e-05, - "loss_sod": 0.1173800453543663, - "loss_total": 0.3839554488658905, - "step": 155799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.544041633605957, - "loss_rtd": 0.2545827031135559, - "loss_sent": 3.5108379961457103e-05, - "loss_sod": 0.21659061312675476, - "loss_total": 0.4712084233760834, - "step": 155799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.9293721914291382, - "learning_rate": 4.895319626884824e-05, - "loss": 0.5295, - "step": 155800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.0901594161987305, - "loss_rtd": 0.2564566135406494, - "loss_sent": 3.784263753914274e-05, - "loss_sod": 0.16215522587299347, - "loss_total": 0.41864967346191406, - "step": 155899 - }, - { - "epoch": 0.025798, - "loss_gen": 4.958869934082031, - "loss_rtd": 0.253162145614624, - "loss_sent": 3.955057036364451e-05, - "loss_sod": 0.21552357077598572, - "loss_total": 0.4687252640724182, - "step": 155899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.0558736324310303, - "learning_rate": 4.892147017808812e-05, - "loss": 0.5194, - "step": 155900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.430213928222656, - "loss_rtd": 0.2625291645526886, - "loss_sent": 0.31915414333343506, - "loss_sod": 0.04818132147192955, - "loss_total": 0.6298646330833435, - "step": 155999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.714964389801025, - "loss_rtd": 0.28088244795799255, - "loss_sent": 0.18083544075489044, - "loss_sod": 0.05280933156609535, - "loss_total": 0.5145272016525269, - "step": 155999 - }, - { - "epoch": 0.026, - "grad_norm": 1.0524640083312988, - "learning_rate": 4.888974452175969e-05, - "loss": 0.5276, - "step": 156000 - }, - { - "epoch": 0.026, - "eval_loss": 0.48745784163475037, - "eval_runtime": 150.2911, - "eval_samples_per_second": 102.754, - "eval_steps_per_second": 0.805, - "step": 156000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.251652717590332, - "loss_rtd": 0.2523916959762573, - "loss_sent": 3.480105442577042e-05, - "loss_sod": 0.10084492713212967, - "loss_total": 0.3532714247703552, - "step": 156099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.76702880859375, - "loss_rtd": 0.24150487780570984, - "loss_sent": 0.05287511274218559, - "loss_sod": 0.13154476881027222, - "loss_total": 0.42592474818229675, - "step": 156099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.807264506816864, - "learning_rate": 4.8858019312642054e-05, - "loss": 0.5263, - "step": 156100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.219092845916748, - "loss_rtd": 0.27106907963752747, - "loss_sent": 0.11399336159229279, - "loss_sod": 0.012272108346223831, - "loss_total": 0.39733457565307617, - "step": 156199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.637687683105469, - "loss_rtd": 0.2768366038799286, - "loss_sent": 0.3727564811706543, - "loss_sod": 0.039362743496894836, - "loss_total": 0.6889558434486389, - "step": 156199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.6470954418182373, - "learning_rate": 4.882629456351408e-05, - "loss": 0.523, - "step": 156200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.342074394226074, - "loss_rtd": 0.28616639971733093, - "loss_sent": 0.04557182267308235, - "loss_sod": 0.0065534948371350765, - "loss_total": 0.338291734457016, - "step": 156299 - }, - { - "epoch": 0.026598, - "loss_gen": 4.873995780944824, - "loss_rtd": 0.26216158270835876, - "loss_sent": 0.14498597383499146, - "loss_sod": 0.018181968480348587, - "loss_total": 0.4253295361995697, - "step": 156299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.7005792856216431, - "learning_rate": 4.879457028715452e-05, - "loss": 0.5035, - "step": 156300 - }, - { - "epoch": 0.026798, - "loss_gen": 4.46565580368042, - "loss_rtd": 0.23772087693214417, - "loss_sent": 0.06319355219602585, - "loss_sod": 0.17266608774662018, - "loss_total": 0.473580539226532, - "step": 156399 - }, - { - "epoch": 0.026798, - "loss_gen": 4.926111698150635, - "loss_rtd": 0.24819941818714142, - "loss_sent": 0.23541516065597534, - "loss_sod": 0.09991899877786636, - "loss_total": 0.5835335850715637, - "step": 156399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.0653536319732666, - "learning_rate": 4.8762846496341906e-05, - "loss": 0.5123, - "step": 156400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.300265312194824, - "loss_rtd": 0.25348299741744995, - "loss_sent": 0.13196788728237152, - "loss_sod": 0.08344398438930511, - "loss_total": 0.468894898891449, - "step": 156499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.363890171051025, - "loss_rtd": 0.26457250118255615, - "loss_sent": 0.09559301286935806, - "loss_sod": 0.042374856770038605, - "loss_total": 0.4025403559207916, - "step": 156499 - }, - { - "epoch": 0.027, - "grad_norm": 0.7778878808021545, - "learning_rate": 4.873112320385458e-05, - "loss": 0.4891, - "step": 156500 - }, - { - "epoch": 0.027198, - "loss_gen": 4.990156173706055, - "loss_rtd": 0.27784112095832825, - "loss_sent": 0.13252398371696472, - "loss_sod": 0.004448779858648777, - "loss_total": 0.4148138761520386, - "step": 156599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.4462714195251465, - "loss_rtd": 0.28916388750076294, - "loss_sent": 0.14478699862957, - "loss_sod": 0.07963693141937256, - "loss_total": 0.5135878324508667, - "step": 156599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.9037930369377136, - "learning_rate": 4.869940042247066e-05, - "loss": 0.5123, - "step": 156600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.006406784057617, - "loss_rtd": 0.26727545261383057, - "loss_sent": 0.4620921015739441, - "loss_sod": 0.08716131746768951, - "loss_total": 0.816528856754303, - "step": 156699 - }, - { - "epoch": 0.027398, - "loss_gen": 4.661520004272461, - "loss_rtd": 0.2576027512550354, - "loss_sent": 0.008090567775070667, - "loss_sod": 0.11190100014209747, - "loss_total": 0.3775942921638489, - "step": 156699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.406989574432373, - "learning_rate": 4.866767816496812e-05, - "loss": 0.5139, - "step": 156700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.819665431976318, - "loss_rtd": 0.28634992241859436, - "loss_sent": 0.1437319964170456, - "loss_sod": 0.0734187439084053, - "loss_total": 0.5035006403923035, - "step": 156799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.204472064971924, - "loss_rtd": 0.2901288568973541, - "loss_sent": 0.10268744081258774, - "loss_sod": 0.05606215447187424, - "loss_total": 0.4488784670829773, - "step": 156799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8680161237716675, - "learning_rate": 4.863595644412463e-05, - "loss": 0.4933, - "step": 156800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.420330047607422, - "loss_rtd": 0.2791830599308014, - "loss_sent": 0.3732958734035492, - "loss_sod": 0.11190824210643768, - "loss_total": 0.7643871307373047, - "step": 156899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.031901836395264, - "loss_rtd": 0.27225786447525024, - "loss_sent": 0.033356182277202606, - "loss_sod": 0.0836183950304985, - "loss_total": 0.38923245668411255, - "step": 156899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.086056113243103, - "learning_rate": 4.860423527271774e-05, - "loss": 0.5158, - "step": 156900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.104860305786133, - "loss_rtd": 0.24588295817375183, - "loss_sent": 0.1802573800086975, - "loss_sod": 0.010749001987278461, - "loss_total": 0.4368893504142761, - "step": 156999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.1478495597839355, - "loss_rtd": 0.2704768776893616, - "loss_sent": 0.2739458680152893, - "loss_sod": 0.11669597029685974, - "loss_total": 0.661118745803833, - "step": 156999 - }, - { - "epoch": 0.028, - "grad_norm": 1.624053955078125, - "learning_rate": 4.8572514663524704e-05, - "loss": 0.5245, - "step": 157000 - }, - { - "epoch": 0.028, - "eval_loss": 0.48777928948402405, - "eval_runtime": 150.5699, - "eval_samples_per_second": 102.564, - "eval_steps_per_second": 0.804, - "step": 157000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.487461090087891, - "loss_rtd": 0.2601374387741089, - "loss_sent": 0.22656944394111633, - "loss_sod": 0.05389409512281418, - "loss_total": 0.5406010150909424, - "step": 157099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.392871379852295, - "loss_rtd": 0.27915239334106445, - "loss_sent": 0.15948906540870667, - "loss_sod": 0.08141209185123444, - "loss_total": 0.520053505897522, - "step": 157099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.0358128547668457, - "learning_rate": 4.85407946293226e-05, - "loss": 0.5251, - "step": 157100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.279001712799072, - "loss_rtd": 0.2800320088863373, - "loss_sent": 0.15085484087467194, - "loss_sod": 0.018586760386824608, - "loss_total": 0.44947361946105957, - "step": 157199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.3281073570251465, - "loss_rtd": 0.2607080340385437, - "loss_sent": 0.08769252151250839, - "loss_sod": 0.08442763984203339, - "loss_total": 0.43282821774482727, - "step": 157199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.5268245935440063, - "learning_rate": 4.850907518288823e-05, - "loss": 0.5164, - "step": 157200 - }, - { - "epoch": 0.028598, - "loss_gen": 4.504331111907959, - "loss_rtd": 0.22062382102012634, - "loss_sent": 0.02035563997924328, - "loss_sod": 0.12281917780637741, - "loss_total": 0.3637986481189728, - "step": 157299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.401305198669434, - "loss_rtd": 0.2521509826183319, - "loss_sent": 0.1265888661146164, - "loss_sod": 0.04096400737762451, - "loss_total": 0.4197038412094116, - "step": 157299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.0456082820892334, - "learning_rate": 4.8477356336998215e-05, - "loss": 0.51, - "step": 157300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.367915630340576, - "loss_rtd": 0.2643294930458069, - "loss_sent": 0.23415818810462952, - "loss_sod": 0.07469923794269562, - "loss_total": 0.5731868743896484, - "step": 157399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.2256550788879395, - "loss_rtd": 0.28218239545822144, - "loss_sent": 0.14147312939167023, - "loss_sod": 0.06159043684601784, - "loss_total": 0.485245943069458, - "step": 157399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.009660005569458, - "learning_rate": 4.844563810442887e-05, - "loss": 0.5111, - "step": 157400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.380802154541016, - "loss_rtd": 0.2648017108440399, - "loss_sent": 0.06642752885818481, - "loss_sod": 0.16112709045410156, - "loss_total": 0.4923563301563263, - "step": 157499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.134881019592285, - "loss_rtd": 0.24950629472732544, - "loss_sent": 0.016680188477039337, - "loss_sod": 0.1164335161447525, - "loss_total": 0.3826199769973755, - "step": 157499 - }, - { - "epoch": 0.029, - "grad_norm": 1.143450140953064, - "learning_rate": 4.8413920497956326e-05, - "loss": 0.5025, - "step": 157500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.487768650054932, - "loss_rtd": 0.2694796621799469, - "loss_sent": 0.133780837059021, - "loss_sod": 0.03469589725136757, - "loss_total": 0.43795639276504517, - "step": 157599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.598739147186279, - "loss_rtd": 0.28467464447021484, - "loss_sent": 0.17000456154346466, - "loss_sod": 0.05821293592453003, - "loss_total": 0.5128921270370483, - "step": 157599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.8719593286514282, - "learning_rate": 4.83822035303564e-05, - "loss": 0.5118, - "step": 157600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.4392595291137695, - "loss_rtd": 0.26860255002975464, - "loss_sent": 0.33762699365615845, - "loss_sod": 0.05551446974277496, - "loss_total": 0.6617439985275269, - "step": 157699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.3656907081604, - "loss_rtd": 0.2806245982646942, - "loss_sent": 0.2980003356933594, - "loss_sod": 0.016527138650417328, - "loss_total": 0.5951520800590515, - "step": 157699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.9922837018966675, - "learning_rate": 4.8350487214404715e-05, - "loss": 0.5213, - "step": 157700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.197019100189209, - "loss_rtd": 0.29149261116981506, - "loss_sent": 0.09200244396924973, - "loss_sod": 0.04063096642494202, - "loss_total": 0.4241260290145874, - "step": 157799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.526877403259277, - "loss_rtd": 0.26612284779548645, - "loss_sent": 0.33563366532325745, - "loss_sod": 0.03838837891817093, - "loss_total": 0.6401448845863342, - "step": 157799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.1496505737304688, - "learning_rate": 4.831877156287658e-05, - "loss": 0.5094, - "step": 157800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.166398525238037, - "loss_rtd": 0.28753796219825745, - "loss_sent": 0.07045305520296097, - "loss_sod": 0.00541570782661438, - "loss_total": 0.3634067177772522, - "step": 157899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.361014366149902, - "loss_rtd": 0.25920534133911133, - "loss_sent": 0.3058786690235138, - "loss_sod": 0.04083895683288574, - "loss_total": 0.6059229373931885, - "step": 157899 - }, - { - "epoch": 0.0298, - "grad_norm": 2.0139553546905518, - "learning_rate": 4.828705658854705e-05, - "loss": 0.5226, - "step": 157900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.709131717681885, - "loss_rtd": 0.2707286775112152, - "loss_sent": 0.30841881036758423, - "loss_sod": 0.09141042828559875, - "loss_total": 0.6705579161643982, - "step": 157999 - }, - { - "epoch": 0.029998, - "loss_gen": 4.703526973724365, - "loss_rtd": 0.2666856050491333, - "loss_sent": 0.03206224367022514, - "loss_sod": 0.1287064254283905, - "loss_total": 0.42745426297187805, - "step": 157999 - }, - { - "epoch": 0.03, - "grad_norm": 1.1153990030288696, - "learning_rate": 4.8255342304190924e-05, - "loss": 0.4898, - "step": 158000 - }, - { - "epoch": 0.03, - "eval_loss": 0.48881009221076965, - "eval_runtime": 150.3182, - "eval_samples_per_second": 102.735, - "eval_steps_per_second": 0.805, - "step": 158000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.368672847747803, - "loss_rtd": 0.2833646535873413, - "loss_sent": 0.6040071845054626, - "loss_sod": 0.10221802443265915, - "loss_total": 0.9895898699760437, - "step": 158099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.176130771636963, - "loss_rtd": 0.2608996629714966, - "loss_sent": 0.3742767572402954, - "loss_sod": 0.023876942694187164, - "loss_total": 0.6590533256530762, - "step": 158099 - }, - { - "epoch": 0.0302, - "grad_norm": 2.632840871810913, - "learning_rate": 4.8223628722582706e-05, - "loss": 0.4948, - "step": 158100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.298892498016357, - "loss_rtd": 0.271578848361969, - "loss_sent": 0.0672275647521019, - "loss_sod": 0.07478600740432739, - "loss_total": 0.4135924279689789, - "step": 158199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.326475620269775, - "loss_rtd": 0.27241429686546326, - "loss_sent": 0.3747837245464325, - "loss_sod": 0.05368448793888092, - "loss_total": 0.7008825540542603, - "step": 158199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.163684606552124, - "learning_rate": 4.819191585649663e-05, - "loss": 0.5026, - "step": 158200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.390805721282959, - "loss_rtd": 0.2750718891620636, - "loss_sent": 0.1656327098608017, - "loss_sod": 0.10272759199142456, - "loss_total": 0.5434321761131287, - "step": 158299 - }, - { - "epoch": 0.030598, - "loss_gen": 4.812838077545166, - "loss_rtd": 0.24897724390029907, - "loss_sent": 4.013166835648008e-05, - "loss_sod": 0.1467210054397583, - "loss_total": 0.3957383930683136, - "step": 158299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.8710622787475586, - "learning_rate": 4.816020371870663e-05, - "loss": 0.5101, - "step": 158300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.417858123779297, - "loss_rtd": 0.28526821732521057, - "loss_sent": 0.21175134181976318, - "loss_sod": 0.054051898419857025, - "loss_total": 0.5510714650154114, - "step": 158399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.285908222198486, - "loss_rtd": 0.24675749242305756, - "loss_sent": 0.28908753395080566, - "loss_sod": 0.004072052426636219, - "loss_total": 0.5399171113967896, - "step": 158399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.0410542488098145, - "learning_rate": 4.812849232198636e-05, - "loss": 0.5016, - "step": 158400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.419442176818848, - "loss_rtd": 0.2768394947052002, - "loss_sent": 0.3450249433517456, - "loss_sod": 0.058325424790382385, - "loss_total": 0.680189847946167, - "step": 158499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.143611431121826, - "loss_rtd": 0.27038657665252686, - "loss_sent": 0.04221571609377861, - "loss_sod": 0.009236699901521206, - "loss_total": 0.32183897495269775, - "step": 158499 - }, - { - "epoch": 0.031, - "grad_norm": 0.64340740442276, - "learning_rate": 4.8096781679109145e-05, - "loss": 0.5045, - "step": 158500 - }, - { - "epoch": 0.031198, - "loss_gen": 4.824569225311279, - "loss_rtd": 0.274274080991745, - "loss_sent": 0.24172565340995789, - "loss_sod": 0.048117611557245255, - "loss_total": 0.5641173124313354, - "step": 158599 - }, - { - "epoch": 0.031198, - "loss_gen": 4.973732948303223, - "loss_rtd": 0.2760692238807678, - "loss_sent": 0.44531095027923584, - "loss_sod": 0.028519706800580025, - "loss_total": 0.7498998641967773, - "step": 158599 - }, - { - "epoch": 0.0312, - "grad_norm": 2.0706980228424072, - "learning_rate": 4.806507180284806e-05, - "loss": 0.5017, - "step": 158600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.814790725708008, - "loss_rtd": 0.2397366315126419, - "loss_sent": 0.03261179476976395, - "loss_sod": 0.02776399254798889, - "loss_total": 0.30011242628097534, - "step": 158699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.327791213989258, - "loss_rtd": 0.26420092582702637, - "loss_sent": 0.09725827723741531, - "loss_sod": 0.13030242919921875, - "loss_total": 0.49176162481307983, - "step": 158699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.094794511795044, - "learning_rate": 4.8033362705975815e-05, - "loss": 0.4989, - "step": 158700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.1611409187316895, - "loss_rtd": 0.2766934335231781, - "loss_sent": 0.5184255838394165, - "loss_sod": 0.04233116656541824, - "loss_total": 0.8374501466751099, - "step": 158799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.242688179016113, - "loss_rtd": 0.27282342314720154, - "loss_sent": 0.2498510330915451, - "loss_sod": 0.0054869623854756355, - "loss_total": 0.5281614065170288, - "step": 158799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.6587289571762085, - "learning_rate": 4.800165440126483e-05, - "loss": 0.5125, - "step": 158800 - }, - { - "epoch": 0.031798, - "loss_gen": 4.8841633796691895, - "loss_rtd": 0.26241910457611084, - "loss_sent": 0.17879162728786469, - "loss_sod": 0.00880429707467556, - "loss_total": 0.45001500844955444, - "step": 158899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.207898139953613, - "loss_rtd": 0.28028276562690735, - "loss_sent": 0.14197683334350586, - "loss_sod": 0.01219436526298523, - "loss_total": 0.43445396423339844, - "step": 158899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.8651069402694702, - "learning_rate": 4.796994690148722e-05, - "loss": 0.5319, - "step": 158900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.6617841720581055, - "loss_rtd": 0.24260640144348145, - "loss_sent": 0.061801083385944366, - "loss_sod": 0.06671155989170074, - "loss_total": 0.37111902236938477, - "step": 158999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.371520519256592, - "loss_rtd": 0.26576241850852966, - "loss_sent": 0.10865353792905807, - "loss_sod": 0.0148203419521451, - "loss_total": 0.38923630118370056, - "step": 158999 - }, - { - "epoch": 0.032, - "grad_norm": 0.6985596418380737, - "learning_rate": 4.793824021941474e-05, - "loss": 0.4981, - "step": 159000 - }, - { - "epoch": 0.032, - "eval_loss": 0.48707443475723267, - "eval_runtime": 151.6715, - "eval_samples_per_second": 101.819, - "eval_steps_per_second": 0.798, - "step": 159000 - }, - { - "epoch": 0.032198, - "loss_gen": 5.464871406555176, - "loss_rtd": 0.28606587648391724, - "loss_sent": 0.286416620016098, - "loss_sod": 0.022649457678198814, - "loss_total": 0.5951319932937622, - "step": 159099 - }, - { - "epoch": 0.032198, - "loss_gen": 5.388561725616455, - "loss_rtd": 0.26644182205200195, - "loss_sent": 0.09939169138669968, - "loss_sod": 0.028540709987282753, - "loss_total": 0.39437422156333923, - "step": 159099 - }, - { - "epoch": 0.0322, - "grad_norm": 0.666222870349884, - "learning_rate": 4.790653436781887e-05, - "loss": 0.5294, - "step": 159100 - }, - { - "epoch": 0.032398, - "loss_gen": 5.315982341766357, - "loss_rtd": 0.27147552371025085, - "loss_sent": 0.20511852204799652, - "loss_sod": 0.015645399689674377, - "loss_total": 0.49223947525024414, - "step": 159199 - }, - { - "epoch": 0.032398, - "loss_gen": 5.502182483673096, - "loss_rtd": 0.26964810490608215, - "loss_sent": 0.14899982511997223, - "loss_sod": 0.0346234068274498, - "loss_total": 0.4532713294029236, - "step": 159199 - }, - { - "epoch": 0.0324, - "grad_norm": 0.8348574042320251, - "learning_rate": 4.787482935947071e-05, - "loss": 0.515, - "step": 159200 - }, - { - "epoch": 0.032598, - "loss_gen": 5.40325927734375, - "loss_rtd": 0.26339060068130493, - "loss_sent": 0.2452559620141983, - "loss_sod": 0.07802461087703705, - "loss_total": 0.5866711735725403, - "step": 159299 - }, - { - "epoch": 0.032598, - "loss_gen": 5.222452163696289, - "loss_rtd": 0.2770087718963623, - "loss_sent": 0.08895104378461838, - "loss_sod": 0.10624325275421143, - "loss_total": 0.4722030758857727, - "step": 159299 - }, - { - "epoch": 0.0326, - "grad_norm": 1.073285698890686, - "learning_rate": 4.784312520714101e-05, - "loss": 0.5185, - "step": 159300 - }, - { - "epoch": 0.032798, - "loss_gen": 5.449478626251221, - "loss_rtd": 0.26343077421188354, - "loss_sent": 0.0868225023150444, - "loss_sod": 0.022818146273493767, - "loss_total": 0.37307143211364746, - "step": 159399 - }, - { - "epoch": 0.032798, - "loss_gen": 5.341283321380615, - "loss_rtd": 0.2790081202983856, - "loss_sent": 0.2690570652484894, - "loss_sod": 0.026930466294288635, - "loss_total": 0.5749956369400024, - "step": 159399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.0528799295425415, - "learning_rate": 4.781142192360024e-05, - "loss": 0.5081, - "step": 159400 - }, - { - "epoch": 0.032998, - "loss_gen": 4.618107318878174, - "loss_rtd": 0.23057666420936584, - "loss_sent": 0.031134679913520813, - "loss_sod": 0.15663164854049683, - "loss_total": 0.4183430075645447, - "step": 159499 - }, - { - "epoch": 0.032998, - "loss_gen": 5.574121952056885, - "loss_rtd": 0.2753072679042816, - "loss_sent": 0.24265043437480927, - "loss_sod": 0.053256791085004807, - "loss_total": 0.571214497089386, - "step": 159499 - }, - { - "epoch": 0.033, - "grad_norm": 0.9131119847297668, - "learning_rate": 4.777971952161843e-05, - "loss": 0.5085, - "step": 159500 - }, - { - "epoch": 0.033198, - "loss_gen": 5.096642017364502, - "loss_rtd": 0.27004846930503845, - "loss_sent": 0.11180497705936432, - "loss_sod": 0.04232407361268997, - "loss_total": 0.42417752742767334, - "step": 159599 - }, - { - "epoch": 0.033198, - "loss_gen": 5.813295841217041, - "loss_rtd": 0.2599891424179077, - "loss_sent": 0.15825702250003815, - "loss_sod": 0.09351354837417603, - "loss_total": 0.5117596983909607, - "step": 159599 - }, - { - "epoch": 0.0332, - "grad_norm": 1.730204701423645, - "learning_rate": 4.774801801396536e-05, - "loss": 0.509, - "step": 159600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.034669876098633, - "loss_rtd": 0.2789415121078491, - "loss_sent": 0.10340646654367447, - "loss_sod": 0.01877691224217415, - "loss_total": 0.40112489461898804, - "step": 159699 - }, - { - "epoch": 0.033398, - "loss_gen": 5.425487995147705, - "loss_rtd": 0.2687099874019623, - "loss_sent": 0.4645315110683441, - "loss_sod": 0.0642232820391655, - "loss_total": 0.7974647879600525, - "step": 159699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.626502513885498, - "learning_rate": 4.7716317413410336e-05, - "loss": 0.5265, - "step": 159700 - }, - { - "epoch": 0.033598, - "loss_gen": 5.267213821411133, - "loss_rtd": 0.2763698697090149, - "loss_sent": 0.1774713099002838, - "loss_sod": 0.024524521082639694, - "loss_total": 0.4783656895160675, - "step": 159799 - }, - { - "epoch": 0.033598, - "loss_gen": 5.383553504943848, - "loss_rtd": 0.2740878462791443, - "loss_sent": 0.32771018147468567, - "loss_sod": 0.015716755762696266, - "loss_total": 0.6175147891044617, - "step": 159799 - }, - { - "epoch": 0.0336, - "grad_norm": 0.7344114184379578, - "learning_rate": 4.768461773272241e-05, - "loss": 0.5221, - "step": 159800 - }, - { - "epoch": 0.033798, - "loss_gen": 5.088738918304443, - "loss_rtd": 0.2582167387008667, - "loss_sent": 0.12434374541044235, - "loss_sod": 0.08773496001958847, - "loss_total": 0.4702954590320587, - "step": 159899 - }, - { - "epoch": 0.033798, - "loss_gen": 5.8013410568237305, - "loss_rtd": 0.27426230907440186, - "loss_sent": 0.24400825798511505, - "loss_sod": 0.11932605504989624, - "loss_total": 0.637596607208252, - "step": 159899 - }, - { - "epoch": 0.0338, - "grad_norm": 0.6615749001502991, - "learning_rate": 4.765291898467017e-05, - "loss": 0.5045, - "step": 159900 - }, - { - "epoch": 0.033998, - "loss_gen": 5.236286163330078, - "loss_rtd": 0.279506117105484, - "loss_sent": 0.2051544040441513, - "loss_sod": 0.23678992688655853, - "loss_total": 0.7214504480361938, - "step": 159999 - }, - { - "epoch": 0.033998, - "loss_gen": 5.376890659332275, - "loss_rtd": 0.2707577347755432, - "loss_sent": 0.17245976626873016, - "loss_sod": 0.07291204482316971, - "loss_total": 0.5161295533180237, - "step": 159999 - }, - { - "epoch": 0.034, - "grad_norm": 1.7812297344207764, - "learning_rate": 4.762122118202189e-05, - "loss": 0.5276, - "step": 160000 - }, - { - "epoch": 0.034, - "eval_loss": 0.4820318818092346, - "eval_runtime": 150.069, - "eval_samples_per_second": 102.906, - "eval_steps_per_second": 0.806, - "step": 160000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.842165946960449, - "loss_rtd": 0.2776602506637573, - "loss_sent": 0.1528262048959732, - "loss_sod": 0.057652607560157776, - "loss_total": 0.4881390333175659, - "step": 160099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.351126194000244, - "loss_rtd": 0.2911335527896881, - "loss_sent": 0.09747724235057831, - "loss_sod": 0.1224154382944107, - "loss_total": 0.5110262632369995, - "step": 160099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.5827804803848267, - "learning_rate": 4.758952433754543e-05, - "loss": 0.5215, - "step": 160100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.027387619018555, - "loss_rtd": 0.25595375895500183, - "loss_sent": 0.08539359271526337, - "loss_sod": 0.0035551826003938913, - "loss_total": 0.34490251541137695, - "step": 160199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.507497310638428, - "loss_rtd": 0.2640344798564911, - "loss_sent": 0.2868528664112091, - "loss_sod": 0.059601373970508575, - "loss_total": 0.6104887127876282, - "step": 160199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.7635030150413513, - "learning_rate": 4.755782846400828e-05, - "loss": 0.5153, - "step": 160200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.2646870613098145, - "loss_rtd": 0.27357620000839233, - "loss_sent": 0.18477076292037964, - "loss_sod": 0.0682992935180664, - "loss_total": 0.5266462564468384, - "step": 160299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.374934673309326, - "loss_rtd": 0.29369816184043884, - "loss_sent": 0.2698197662830353, - "loss_sod": 0.12471264600753784, - "loss_total": 0.688230574131012, - "step": 160299 - }, - { - "epoch": 0.0006, - "grad_norm": 2.2770004272460938, - "learning_rate": 4.752613357417752e-05, - "loss": 0.5145, - "step": 160300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.457101821899414, - "loss_rtd": 0.2753320336341858, - "loss_sent": 0.12120392173528671, - "loss_sod": 0.05263853073120117, - "loss_total": 0.4491744935512543, - "step": 160399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.722702503204346, - "loss_rtd": 0.29496505856513977, - "loss_sent": 0.1253061443567276, - "loss_sod": 0.1110258400440216, - "loss_total": 0.5312970280647278, - "step": 160399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.7886563539505005, - "learning_rate": 4.749443968081988e-05, - "loss": 0.5156, - "step": 160400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.088737964630127, - "loss_rtd": 0.2770231068134308, - "loss_sent": 0.4365537166595459, - "loss_sod": 0.008964703418314457, - "loss_total": 0.7225415706634521, - "step": 160499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.218410015106201, - "loss_rtd": 0.25978273153305054, - "loss_sent": 0.1902262568473816, - "loss_sod": 0.046633802354335785, - "loss_total": 0.4966427981853485, - "step": 160499 - }, - { - "epoch": 0.001, - "grad_norm": 1.558271050453186, - "learning_rate": 4.746274679670162e-05, - "loss": 0.5024, - "step": 160500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.377699375152588, - "loss_rtd": 0.2715120315551758, - "loss_sent": 0.376115083694458, - "loss_sod": 0.14049793779850006, - "loss_total": 0.7881250381469727, - "step": 160599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.463585376739502, - "loss_rtd": 0.2850705683231354, - "loss_sent": 0.19470594823360443, - "loss_sod": 0.03956345468759537, - "loss_total": 0.5193399786949158, - "step": 160599 - }, - { - "epoch": 0.0012, - "grad_norm": 2.669656991958618, - "learning_rate": 4.7431054934588664e-05, - "loss": 0.5165, - "step": 160600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.164077281951904, - "loss_rtd": 0.2682517170906067, - "loss_sent": 0.20542724430561066, - "loss_sod": 0.027547337114810944, - "loss_total": 0.5012263059616089, - "step": 160699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.316427707672119, - "loss_rtd": 0.2847226560115814, - "loss_sent": 0.11995867639780045, - "loss_sod": 0.027916785329580307, - "loss_total": 0.4325981140136719, - "step": 160699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.6276112794876099, - "learning_rate": 4.7399364107246474e-05, - "loss": 0.5151, - "step": 160700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.611260890960693, - "loss_rtd": 0.27259013056755066, - "loss_sent": 0.14668984711170197, - "loss_sod": 0.027081597596406937, - "loss_total": 0.44636160135269165, - "step": 160799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.734642028808594, - "loss_rtd": 0.25575634837150574, - "loss_sent": 0.03888203203678131, - "loss_sod": 0.026697641238570213, - "loss_total": 0.321336030960083, - "step": 160799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.656639039516449, - "learning_rate": 4.7367674327440094e-05, - "loss": 0.4948, - "step": 160800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.511584281921387, - "loss_rtd": 0.2586683928966522, - "loss_sent": 3.6015455407323316e-05, - "loss_sod": 0.21329265832901, - "loss_total": 0.47199705243110657, - "step": 160899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.865720748901367, - "loss_rtd": 0.2565580904483795, - "loss_sent": 0.2702704966068268, - "loss_sod": 0.07395409047603607, - "loss_total": 0.6007826328277588, - "step": 160899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.0349520444869995, - "learning_rate": 4.733598560793419e-05, - "loss": 0.498, - "step": 160900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.4267072677612305, - "loss_rtd": 0.2575697600841522, - "loss_sent": 0.10051631182432175, - "loss_sod": 0.06149299815297127, - "loss_total": 0.41957908868789673, - "step": 160999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.543146133422852, - "loss_rtd": 0.24666328728199005, - "loss_sent": 0.14442215859889984, - "loss_sod": 0.022993076592683792, - "loss_total": 0.4140785336494446, - "step": 160999 - }, - { - "epoch": 0.002, - "grad_norm": 0.7205025553703308, - "learning_rate": 4.730429796149296e-05, - "loss": 0.4927, - "step": 161000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4876585602760315, - "eval_runtime": 153.5292, - "eval_samples_per_second": 100.587, - "eval_steps_per_second": 0.788, - "step": 161000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.267669677734375, - "loss_rtd": 0.25475186109542847, - "loss_sent": 0.3130422532558441, - "loss_sod": 0.04444386065006256, - "loss_total": 0.6122379302978516, - "step": 161099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.493355751037598, - "loss_rtd": 0.26913684606552124, - "loss_sent": 0.2605651021003723, - "loss_sod": 0.1393851637840271, - "loss_total": 0.6690871119499207, - "step": 161099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.1247471570968628, - "learning_rate": 4.72726114008802e-05, - "loss": 0.4902, - "step": 161100 - }, - { - "epoch": 0.002398, - "loss_gen": 4.755769729614258, - "loss_rtd": 0.2405293583869934, - "loss_sent": 0.0060568442568182945, - "loss_sod": 0.06752968579530716, - "loss_total": 0.31411588191986084, - "step": 161199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.556065559387207, - "loss_rtd": 0.262604683637619, - "loss_sent": 0.5367265939712524, - "loss_sod": 0.01863880269229412, - "loss_total": 0.8179700374603271, - "step": 161199 - }, - { - "epoch": 0.0024, - "grad_norm": 2.352881908416748, - "learning_rate": 4.724092593885922e-05, - "loss": 0.5068, - "step": 161200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.599846839904785, - "loss_rtd": 0.2677103281021118, - "loss_sent": 0.25691384077072144, - "loss_sod": 0.09384635090827942, - "loss_total": 0.6184705495834351, - "step": 161299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.266449928283691, - "loss_rtd": 0.2805553674697876, - "loss_sent": 0.12523576617240906, - "loss_sod": 0.10910908132791519, - "loss_total": 0.5149002075195312, - "step": 161299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.0119141340255737, - "learning_rate": 4.720924158819296e-05, - "loss": 0.5048, - "step": 161300 - }, - { - "epoch": 0.002798, - "loss_gen": 4.974189758300781, - "loss_rtd": 0.2747703790664673, - "loss_sent": 0.04194648563861847, - "loss_sod": 0.24337291717529297, - "loss_total": 0.5600897669792175, - "step": 161399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.487055778503418, - "loss_rtd": 0.25943294167518616, - "loss_sent": 3.886331614921801e-05, - "loss_sod": 0.14297300577163696, - "loss_total": 0.4024448096752167, - "step": 161399 - }, - { - "epoch": 0.0028, - "grad_norm": 0.9917436838150024, - "learning_rate": 4.717755836164384e-05, - "loss": 0.5014, - "step": 161400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.604397773742676, - "loss_rtd": 0.26280122995376587, - "loss_sent": 0.20259395241737366, - "loss_sod": 0.03261889889836311, - "loss_total": 0.49801409244537354, - "step": 161499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.4992594718933105, - "loss_rtd": 0.2787325978279114, - "loss_sent": 0.11834646016359329, - "loss_sod": 0.07163845747709274, - "loss_total": 0.4687175154685974, - "step": 161499 - }, - { - "epoch": 0.003, - "grad_norm": 1.0626286268234253, - "learning_rate": 4.714587627197389e-05, - "loss": 0.4994, - "step": 161500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.513464450836182, - "loss_rtd": 0.2768165171146393, - "loss_sent": 0.3070828318595886, - "loss_sod": 0.08395469188690186, - "loss_total": 0.6678540706634521, - "step": 161599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.214165687561035, - "loss_rtd": 0.256234735250473, - "loss_sent": 0.25648224353790283, - "loss_sod": 0.035651497542858124, - "loss_total": 0.5483684539794922, - "step": 161599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.8552629351615906, - "learning_rate": 4.7114195331944626e-05, - "loss": 0.4924, - "step": 161600 - }, - { - "epoch": 0.003398, - "loss_gen": 4.5428876876831055, - "loss_rtd": 0.2481108009815216, - "loss_sent": 5.068567770649679e-05, - "loss_sod": 0.12272955477237701, - "loss_total": 0.3708910644054413, - "step": 161699 - }, - { - "epoch": 0.003398, - "loss_gen": 4.6056437492370605, - "loss_rtd": 0.25533589720726013, - "loss_sent": 0.11403465270996094, - "loss_sod": 0.06946588307619095, - "loss_total": 0.4388364255428314, - "step": 161699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.7701624631881714, - "learning_rate": 4.708251555431715e-05, - "loss": 0.5092, - "step": 161700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.452862739562988, - "loss_rtd": 0.2653208076953888, - "loss_sent": 0.11454400420188904, - "loss_sod": 0.045678894966840744, - "loss_total": 0.4255436956882477, - "step": 161799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.611103534698486, - "loss_rtd": 0.26636242866516113, - "loss_sent": 0.09849405288696289, - "loss_sod": 0.04249482601881027, - "loss_total": 0.4073513150215149, - "step": 161799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.1957632303237915, - "learning_rate": 4.705083695185204e-05, - "loss": 0.5065, - "step": 161800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.354158878326416, - "loss_rtd": 0.2837497591972351, - "loss_sent": 0.25385022163391113, - "loss_sod": 0.08466369658708572, - "loss_total": 0.6222636699676514, - "step": 161899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.471778869628906, - "loss_rtd": 0.27457547187805176, - "loss_sent": 0.3879691958427429, - "loss_sod": 0.039826177060604095, - "loss_total": 0.7023708820343018, - "step": 161899 - }, - { - "epoch": 0.0038, - "grad_norm": 2.106362819671631, - "learning_rate": 4.701915953730947e-05, - "loss": 0.5016, - "step": 161900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.2265825271606445, - "loss_rtd": 0.28481683135032654, - "loss_sent": 0.22780855000019073, - "loss_sod": 0.012533052824437618, - "loss_total": 0.5251584053039551, - "step": 161999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.280604362487793, - "loss_rtd": 0.25784459710121155, - "loss_sent": 0.23577207326889038, - "loss_sod": 0.042707107961177826, - "loss_total": 0.5363237857818604, - "step": 161999 - }, - { - "epoch": 0.004, - "grad_norm": 0.5745465159416199, - "learning_rate": 4.698748332344907e-05, - "loss": 0.4946, - "step": 162000 - }, - { - "epoch": 0.004, - "eval_loss": 0.48661598563194275, - "eval_runtime": 150.5923, - "eval_samples_per_second": 102.548, - "eval_steps_per_second": 0.803, - "step": 162000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.778889179229736, - "loss_rtd": 0.2813631296157837, - "loss_sent": 0.07085127383470535, - "loss_sod": 0.09804874658584595, - "loss_total": 0.4502631425857544, - "step": 162099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.018377780914307, - "loss_rtd": 0.2641095519065857, - "loss_sent": 0.12971749901771545, - "loss_sod": 0.0829600989818573, - "loss_total": 0.47678714990615845, - "step": 162099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.7387970685958862, - "learning_rate": 4.695580832303004e-05, - "loss": 0.5122, - "step": 162100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.642885684967041, - "loss_rtd": 0.2560461163520813, - "loss_sent": 0.3851100206375122, - "loss_sod": 0.08798560500144958, - "loss_total": 0.7291417121887207, - "step": 162199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.147655010223389, - "loss_rtd": 0.2690267264842987, - "loss_sent": 0.02875726856291294, - "loss_sod": 0.06375060975551605, - "loss_total": 0.36153459548950195, - "step": 162199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.0226855278015137, - "learning_rate": 4.692413454881104e-05, - "loss": 0.5089, - "step": 162200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.4006171226501465, - "loss_rtd": 0.30595433712005615, - "loss_sent": 0.09044289588928223, - "loss_sod": 0.016173701733350754, - "loss_total": 0.41257092356681824, - "step": 162299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.622282981872559, - "loss_rtd": 0.26950570940971375, - "loss_sent": 0.19931748509407043, - "loss_sod": 0.0840272456407547, - "loss_total": 0.5528504848480225, - "step": 162299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.0370724201202393, - "learning_rate": 4.6892462013550286e-05, - "loss": 0.5111, - "step": 162300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.265988826751709, - "loss_rtd": 0.26581963896751404, - "loss_sent": 0.12919701635837555, - "loss_sod": 0.02982659637928009, - "loss_total": 0.4248432517051697, - "step": 162399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.431527614593506, - "loss_rtd": 0.25241127610206604, - "loss_sent": 0.08514315634965897, - "loss_sod": 0.06059649586677551, - "loss_total": 0.3981509208679199, - "step": 162399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.910581111907959, - "learning_rate": 4.686079073000546e-05, - "loss": 0.4977, - "step": 162400 - }, - { - "epoch": 0.004998, - "loss_gen": 4.9854044914245605, - "loss_rtd": 0.25647881627082825, - "loss_sent": 0.019727284088730812, - "loss_sod": 0.030515329912304878, - "loss_total": 0.30672144889831543, - "step": 162499 - }, - { - "epoch": 0.004998, - "loss_gen": 4.734309196472168, - "loss_rtd": 0.2618856430053711, - "loss_sent": 3.370772174093872e-05, - "loss_sod": 0.05488193780183792, - "loss_total": 0.3168012797832489, - "step": 162499 - }, - { - "epoch": 0.005, - "grad_norm": 0.483264297246933, - "learning_rate": 4.682912071093374e-05, - "loss": 0.5109, - "step": 162500 - }, - { - "epoch": 0.005198, - "loss_gen": 4.8724751472473145, - "loss_rtd": 0.2632364332675934, - "loss_sent": 0.00023552450875286013, - "loss_sod": 0.16117876768112183, - "loss_total": 0.42465072870254517, - "step": 162599 - }, - { - "epoch": 0.005198, - "loss_gen": 4.560177326202393, - "loss_rtd": 0.2536315321922302, - "loss_sent": 3.5272158129373565e-05, - "loss_sod": 0.17273250222206116, - "loss_total": 0.4263993203639984, - "step": 162599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.9590781331062317, - "learning_rate": 4.679745196909184e-05, - "loss": 0.4926, - "step": 162600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.54343318939209, - "loss_rtd": 0.2936595380306244, - "loss_sent": 0.05555896461009979, - "loss_sod": 0.08926781266927719, - "loss_total": 0.43848633766174316, - "step": 162699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.347440719604492, - "loss_rtd": 0.28149881958961487, - "loss_sent": 0.10503031313419342, - "loss_sod": 0.08179987967014313, - "loss_total": 0.4683290123939514, - "step": 162699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.0491477251052856, - "learning_rate": 4.676578451723588e-05, - "loss": 0.5042, - "step": 162700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.493664741516113, - "loss_rtd": 0.29408419132232666, - "loss_sent": 0.10495664179325104, - "loss_sod": 0.031579189002513885, - "loss_total": 0.430620014667511, - "step": 162799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.574361801147461, - "loss_rtd": 0.28189975023269653, - "loss_sent": 0.15872269868850708, - "loss_sod": 0.014554636552929878, - "loss_total": 0.45517709851264954, - "step": 162799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.51229727268219, - "learning_rate": 4.673411836812155e-05, - "loss": 0.5045, - "step": 162800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.420064926147461, - "loss_rtd": 0.2710047960281372, - "loss_sent": 0.20507270097732544, - "loss_sod": 0.04919132590293884, - "loss_total": 0.5252687931060791, - "step": 162899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.324102878570557, - "loss_rtd": 0.2755732238292694, - "loss_sent": 0.11353310942649841, - "loss_sod": 0.0448470339179039, - "loss_total": 0.4339533746242523, - "step": 162899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.1079555749893188, - "learning_rate": 4.670245353450394e-05, - "loss": 0.487, - "step": 162900 - }, - { - "epoch": 0.005998, - "loss_gen": 4.567865371704102, - "loss_rtd": 0.23298753798007965, - "loss_sent": 0.19341908395290375, - "loss_sod": 0.028926221653819084, - "loss_total": 0.45533281564712524, - "step": 162999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.623187065124512, - "loss_rtd": 0.2772347033023834, - "loss_sent": 0.20228157937526703, - "loss_sod": 0.1268787682056427, - "loss_total": 0.6063950657844543, - "step": 162999 - }, - { - "epoch": 0.006, - "grad_norm": 1.046578049659729, - "learning_rate": 4.6670790029137655e-05, - "loss": 0.5086, - "step": 163000 - }, - { - "epoch": 0.006, - "eval_loss": 0.48867926001548767, - "eval_runtime": 150.8643, - "eval_samples_per_second": 102.363, - "eval_steps_per_second": 0.802, - "step": 163000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.196635723114014, - "loss_rtd": 0.28168556094169617, - "loss_sent": 0.12392954528331757, - "loss_sod": 0.04333566501736641, - "loss_total": 0.44895076751708984, - "step": 163099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.259164810180664, - "loss_rtd": 0.2748069763183594, - "loss_sent": 0.2878701984882355, - "loss_sod": 0.016808802261948586, - "loss_total": 0.5794860124588013, - "step": 163099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.3830218315124512, - "learning_rate": 4.663912786477674e-05, - "loss": 0.4884, - "step": 163100 - }, - { - "epoch": 0.006398, - "loss_gen": 4.5838494300842285, - "loss_rtd": 0.24817979335784912, - "loss_sent": 3.2675852708052844e-05, - "loss_sod": 0.08564233779907227, - "loss_total": 0.3338547945022583, - "step": 163199 - }, - { - "epoch": 0.006398, - "loss_gen": 4.423274993896484, - "loss_rtd": 0.2379945069551468, - "loss_sent": 0.0018292111344635487, - "loss_sod": 0.04787762835621834, - "loss_total": 0.2877013385295868, - "step": 163199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.9870240092277527, - "learning_rate": 4.660746705417474e-05, - "loss": 0.5106, - "step": 163200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.000813007354736, - "loss_rtd": 0.2704791724681854, - "loss_sent": 0.2741572856903076, - "loss_sod": 0.025609789416193962, - "loss_total": 0.5702462196350098, - "step": 163299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.260286331176758, - "loss_rtd": 0.28576281666755676, - "loss_sent": 0.2345261126756668, - "loss_sod": 0.001798760611563921, - "loss_total": 0.5220876932144165, - "step": 163299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.569671630859375, - "learning_rate": 4.657580761008458e-05, - "loss": 0.5042, - "step": 163300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.422150611877441, - "loss_rtd": 0.263324111700058, - "loss_sent": 0.3823015093803406, - "loss_sod": 0.16739143431186676, - "loss_total": 0.8130170702934265, - "step": 163399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.753075122833252, - "loss_rtd": 0.27140310406684875, - "loss_sent": 0.49792271852493286, - "loss_sod": 0.04979398101568222, - "loss_total": 0.8191198110580444, - "step": 163399 - }, - { - "epoch": 0.0068, - "grad_norm": 3.1737489700317383, - "learning_rate": 4.6544149545258725e-05, - "loss": 0.5011, - "step": 163400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.081680774688721, - "loss_rtd": 0.2573992609977722, - "loss_sent": 0.08737410604953766, - "loss_sod": 0.07585644721984863, - "loss_total": 0.4206297993659973, - "step": 163499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.307277202606201, - "loss_rtd": 0.2570751905441284, - "loss_sent": 0.0989130511879921, - "loss_sod": 0.030727503821253777, - "loss_total": 0.38671573996543884, - "step": 163499 - }, - { - "epoch": 0.007, - "grad_norm": 0.6402074098587036, - "learning_rate": 4.6512492872449e-05, - "loss": 0.5052, - "step": 163500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.550046443939209, - "loss_rtd": 0.276483416557312, - "loss_sent": 0.1697588413953781, - "loss_sod": 0.11472378671169281, - "loss_total": 0.5609660744667053, - "step": 163599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.275697231292725, - "loss_rtd": 0.28646954894065857, - "loss_sent": 0.23295924067497253, - "loss_sod": 0.18847021460533142, - "loss_total": 0.7078989744186401, - "step": 163599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.2019567489624023, - "learning_rate": 4.6480837604406755e-05, - "loss": 0.5469, - "step": 163600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.931293964385986, - "loss_rtd": 0.2878555357456207, - "loss_sent": 0.08064966648817062, - "loss_sod": 0.09106098115444183, - "loss_total": 0.4595661759376526, - "step": 163699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.440551280975342, - "loss_rtd": 0.25958216190338135, - "loss_sent": 0.16952340304851532, - "loss_sod": 0.035872530192136765, - "loss_total": 0.46497809886932373, - "step": 163699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.8347538709640503, - "learning_rate": 4.6449183753882683e-05, - "loss": 0.4985, - "step": 163700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.048709392547607, - "loss_rtd": 0.2709319591522217, - "loss_sent": 0.10440225899219513, - "loss_sod": 0.03406666964292526, - "loss_total": 0.4094008803367615, - "step": 163799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.5242414474487305, - "loss_rtd": 0.25989148020744324, - "loss_sent": 0.1897791028022766, - "loss_sod": 0.07422979921102524, - "loss_total": 0.5239003896713257, - "step": 163799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.8669432997703552, - "learning_rate": 4.641753133362697e-05, - "loss": 0.4962, - "step": 163800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.846010684967041, - "loss_rtd": 0.2867366671562195, - "loss_sent": 0.29276242852211, - "loss_sod": 0.08926688879728317, - "loss_total": 0.6687660217285156, - "step": 163899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.45940637588501, - "loss_rtd": 0.25110098719596863, - "loss_sent": 0.3821507692337036, - "loss_sod": 0.0544838011264801, - "loss_total": 0.6877355575561523, - "step": 163899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.5126763582229614, - "learning_rate": 4.638588035638922e-05, - "loss": 0.4943, - "step": 163900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.329362392425537, - "loss_rtd": 0.26091039180755615, - "loss_sent": 0.45977982878685, - "loss_sod": 0.008483264595270157, - "loss_total": 0.729173481464386, - "step": 163999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.4824395179748535, - "loss_rtd": 0.2714334726333618, - "loss_sent": 0.19653542339801788, - "loss_sod": 0.10478407144546509, - "loss_total": 0.5727529525756836, - "step": 163999 - }, - { - "epoch": 0.008, - "grad_norm": 1.234817624092102, - "learning_rate": 4.6354230834918424e-05, - "loss": 0.5003, - "step": 164000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4826383888721466, - "eval_runtime": 150.7695, - "eval_samples_per_second": 102.428, - "eval_steps_per_second": 0.803, - "step": 164000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.691675662994385, - "loss_rtd": 0.24220599234104156, - "loss_sent": 0.021226750686764717, - "loss_sod": 0.05146567523479462, - "loss_total": 0.31489843130111694, - "step": 164099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.425869464874268, - "loss_rtd": 0.25354447960853577, - "loss_sent": 0.05983325466513634, - "loss_sod": 0.05555838346481323, - "loss_total": 0.36893612146377563, - "step": 164099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.5825456380844116, - "learning_rate": 4.632258278196301e-05, - "loss": 0.4999, - "step": 164100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.4635233879089355, - "loss_rtd": 0.26874104142189026, - "loss_sent": 0.5512610673904419, - "loss_sod": 0.0693744421005249, - "loss_total": 0.8893765211105347, - "step": 164199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.497776985168457, - "loss_rtd": 0.2705215513706207, - "loss_sent": 0.11279398947954178, - "loss_sod": 0.15561792254447937, - "loss_total": 0.5389334559440613, - "step": 164199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.5598934888839722, - "learning_rate": 4.6290936210270806e-05, - "loss": 0.5075, - "step": 164200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.348184108734131, - "loss_rtd": 0.2685256004333496, - "loss_sent": 0.20474465191364288, - "loss_sod": 0.07704780250787735, - "loss_total": 0.5503180623054504, - "step": 164299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.261141777038574, - "loss_rtd": 0.27409055829048157, - "loss_sent": 0.1952885091304779, - "loss_sod": 0.04625772684812546, - "loss_total": 0.5156368017196655, - "step": 164299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.8624095320701599, - "learning_rate": 4.6259291132589064e-05, - "loss": 0.5171, - "step": 164300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.83092737197876, - "loss_rtd": 0.2858480215072632, - "loss_sent": 0.12171663343906403, - "loss_sod": 0.09565457701683044, - "loss_total": 0.5032192468643188, - "step": 164399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.413974285125732, - "loss_rtd": 0.28275954723358154, - "loss_sent": 0.22656257450580597, - "loss_sod": 0.050913333892822266, - "loss_total": 0.5602354407310486, - "step": 164399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.6559889316558838, - "learning_rate": 4.62276475616644e-05, - "loss": 0.5072, - "step": 164400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.438117027282715, - "loss_rtd": 0.2606719136238098, - "loss_sent": 0.24627365171909332, - "loss_sod": 0.0357002317905426, - "loss_total": 0.5426458120346069, - "step": 164499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.413086891174316, - "loss_rtd": 0.2698831856250763, - "loss_sent": 0.23739132285118103, - "loss_sod": 0.023256702348589897, - "loss_total": 0.5305311679840088, - "step": 164499 - }, - { - "epoch": 0.009, - "grad_norm": 2.3605706691741943, - "learning_rate": 4.619600551024285e-05, - "loss": 0.51, - "step": 164500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.65707540512085, - "loss_rtd": 0.2757400870323181, - "loss_sent": 0.5702740550041199, - "loss_sod": 0.055693574249744415, - "loss_total": 0.9017077088356018, - "step": 164599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.978064060211182, - "loss_rtd": 0.26737818121910095, - "loss_sent": 0.03427908197045326, - "loss_sod": 0.07687597721815109, - "loss_total": 0.3785332441329956, - "step": 164599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.4472017288208008, - "learning_rate": 4.616436499106982e-05, - "loss": 0.5028, - "step": 164600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.549246311187744, - "loss_rtd": 0.2585373818874359, - "loss_sent": 0.2615335285663605, - "loss_sod": 0.01783927157521248, - "loss_total": 0.5379102230072021, - "step": 164699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.301608085632324, - "loss_rtd": 0.2660742402076721, - "loss_sent": 0.2762836515903473, - "loss_sod": 0.15904825925827026, - "loss_total": 0.7014061212539673, - "step": 164699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.6548930406570435, - "learning_rate": 4.613272601689012e-05, - "loss": 0.5031, - "step": 164700 - }, - { - "epoch": 0.009598, - "loss_gen": 4.4817795753479, - "loss_rtd": 0.23729349672794342, - "loss_sent": 5.854514893144369e-05, - "loss_sod": 0.10458141565322876, - "loss_total": 0.3419334292411804, - "step": 164799 - }, - { - "epoch": 0.009598, - "loss_gen": 4.456570148468018, - "loss_rtd": 0.2427823543548584, - "loss_sent": 0.009673393331468105, - "loss_sod": 0.06102978438138962, - "loss_total": 0.3134855329990387, - "step": 164799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.7266996502876282, - "learning_rate": 4.610108860044791e-05, - "loss": 0.4993, - "step": 164800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.356980323791504, - "loss_rtd": 0.2802791893482208, - "loss_sent": 0.12030818313360214, - "loss_sod": 0.22139334678649902, - "loss_total": 0.6219806671142578, - "step": 164899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.348964214324951, - "loss_rtd": 0.28221219778060913, - "loss_sent": 0.10928641259670258, - "loss_sod": 0.028059374541044235, - "loss_total": 0.41955798864364624, - "step": 164899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.0726114511489868, - "learning_rate": 4.6069452754486756e-05, - "loss": 0.5157, - "step": 164900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.3322014808654785, - "loss_rtd": 0.2652531564235687, - "loss_sent": 0.2010899782180786, - "loss_sod": 0.04203135892748833, - "loss_total": 0.5083744525909424, - "step": 164999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.87101411819458, - "loss_rtd": 0.27641060948371887, - "loss_sent": 0.17419025301933289, - "loss_sod": 0.03616292029619217, - "loss_total": 0.48676377534866333, - "step": 164999 - }, - { - "epoch": 0.01, - "grad_norm": 0.9331060647964478, - "learning_rate": 4.603781849174955e-05, - "loss": 0.473, - "step": 165000 - }, - { - "epoch": 0.01, - "eval_loss": 0.48438504338264465, - "eval_runtime": 150.7866, - "eval_samples_per_second": 102.416, - "eval_steps_per_second": 0.802, - "step": 165000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.626122951507568, - "loss_rtd": 0.2546291649341583, - "loss_sent": 0.13072346150875092, - "loss_sod": 0.06794248521327972, - "loss_total": 0.45329511165618896, - "step": 165099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.788132190704346, - "loss_rtd": 0.25939905643463135, - "loss_sent": 0.24285702407360077, - "loss_sod": 0.040133036673069, - "loss_total": 0.5423890948295593, - "step": 165099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.0221394300460815, - "learning_rate": 4.6006185824978594e-05, - "loss": 0.5015, - "step": 165100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.292180061340332, - "loss_rtd": 0.27583184838294983, - "loss_sent": 0.26557138562202454, - "loss_sod": 0.04793336242437363, - "loss_total": 0.589336633682251, - "step": 165199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.339547634124756, - "loss_rtd": 0.2673541009426117, - "loss_sent": 0.03542206063866615, - "loss_sod": 0.012089189141988754, - "loss_total": 0.3148653507232666, - "step": 165199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.7367468476295471, - "learning_rate": 4.59745547669155e-05, - "loss": 0.5038, - "step": 165200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.372686386108398, - "loss_rtd": 0.2524415850639343, - "loss_sent": 0.09043261408805847, - "loss_sod": 0.07902982831001282, - "loss_total": 0.4219040274620056, - "step": 165299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.473160266876221, - "loss_rtd": 0.28483524918556213, - "loss_sent": 0.10204581171274185, - "loss_sod": 0.0278621893376112, - "loss_total": 0.41474324464797974, - "step": 165299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9719716310501099, - "learning_rate": 4.5942925330301276e-05, - "loss": 0.4997, - "step": 165300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.557486057281494, - "loss_rtd": 0.2818976640701294, - "loss_sent": 0.18971668183803558, - "loss_sod": 0.10618086159229279, - "loss_total": 0.5777952075004578, - "step": 165399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.35322380065918, - "loss_rtd": 0.24993188679218292, - "loss_sent": 0.21613375842571259, - "loss_sod": 0.10320958495140076, - "loss_total": 0.5692752599716187, - "step": 165399 - }, - { - "epoch": 0.0108, - "grad_norm": 2.0916054248809814, - "learning_rate": 4.591129752787623e-05, - "loss": 0.5098, - "step": 165400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.265660762786865, - "loss_rtd": 0.2649977505207062, - "loss_sent": 0.1313401162624359, - "loss_sod": 0.04304903373122215, - "loss_total": 0.43938690423965454, - "step": 165499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.371414661407471, - "loss_rtd": 0.2730584740638733, - "loss_sent": 0.1434060037136078, - "loss_sod": 0.05730576068162918, - "loss_total": 0.47377023100852966, - "step": 165499 - }, - { - "epoch": 0.011, - "grad_norm": 0.6318996548652649, - "learning_rate": 4.587967137238006e-05, - "loss": 0.4905, - "step": 165500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.278937339782715, - "loss_rtd": 0.25847798585891724, - "loss_sent": 0.35574769973754883, - "loss_sod": 0.050757866352796555, - "loss_total": 0.6649835109710693, - "step": 165599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.451294422149658, - "loss_rtd": 0.27524012327194214, - "loss_sent": 0.0910453274846077, - "loss_sod": 0.019244834780693054, - "loss_total": 0.3855302929878235, - "step": 165599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9767748713493347, - "learning_rate": 4.584804687655177e-05, - "loss": 0.5019, - "step": 165600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.455461502075195, - "loss_rtd": 0.2628023326396942, - "loss_sent": 0.20049241185188293, - "loss_sod": 0.09067504107952118, - "loss_total": 0.5539697408676147, - "step": 165699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.385140895843506, - "loss_rtd": 0.2677285373210907, - "loss_sent": 0.17031317949295044, - "loss_sod": 0.13305577635765076, - "loss_total": 0.5710974931716919, - "step": 165699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.1536784172058105, - "learning_rate": 4.58164240531297e-05, - "loss": 0.4898, - "step": 165700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.205896377563477, - "loss_rtd": 0.2467249035835266, - "loss_sent": 0.25560465455055237, - "loss_sod": 0.01789039932191372, - "loss_total": 0.5202199220657349, - "step": 165799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.474554061889648, - "loss_rtd": 0.25408485531806946, - "loss_sent": 0.34175625443458557, - "loss_sod": 0.028084468096494675, - "loss_total": 0.6239255666732788, - "step": 165799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.2400084733963013, - "learning_rate": 4.578480291485152e-05, - "loss": 0.5018, - "step": 165800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.407284736633301, - "loss_rtd": 0.2712644338607788, - "loss_sent": 0.07869881391525269, - "loss_sod": 0.06259606778621674, - "loss_total": 0.4125593304634094, - "step": 165899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.107211589813232, - "loss_rtd": 0.2766292691230774, - "loss_sent": 0.05480150878429413, - "loss_sod": 0.04472660645842552, - "loss_total": 0.37615740299224854, - "step": 165899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.9775351285934448, - "learning_rate": 4.575318347445422e-05, - "loss": 0.5107, - "step": 165900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.451925277709961, - "loss_rtd": 0.2789785861968994, - "loss_sent": 0.34577539563179016, - "loss_sod": 0.1541379690170288, - "loss_total": 0.778891921043396, - "step": 165999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.297626495361328, - "loss_rtd": 0.2855328321456909, - "loss_sent": 0.24290558695793152, - "loss_sod": 0.0163103174418211, - "loss_total": 0.5447487235069275, - "step": 165999 - }, - { - "epoch": 0.012, - "grad_norm": 1.4010405540466309, - "learning_rate": 4.572156574467411e-05, - "loss": 0.5089, - "step": 166000 - }, - { - "epoch": 0.012, - "eval_loss": 0.48603564500808716, - "eval_runtime": 150.8488, - "eval_samples_per_second": 102.374, - "eval_steps_per_second": 0.802, - "step": 166000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.359498023986816, - "loss_rtd": 0.2447972595691681, - "loss_sent": 0.13807053864002228, - "loss_sod": 0.09395666420459747, - "loss_total": 0.47682446241378784, - "step": 166099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.2059855461120605, - "loss_rtd": 0.26789209246635437, - "loss_sent": 0.1320495903491974, - "loss_sod": 0.06434500217437744, - "loss_total": 0.4642866849899292, - "step": 166099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.9840983748435974, - "learning_rate": 4.56899497382468e-05, - "loss": 0.4948, - "step": 166100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.42720365524292, - "loss_rtd": 0.27576813101768494, - "loss_sent": 0.16738727688789368, - "loss_sod": 0.06169932335615158, - "loss_total": 0.5048547387123108, - "step": 166199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.688323497772217, - "loss_rtd": 0.25756192207336426, - "loss_sent": 0.5139428377151489, - "loss_sod": 0.050605274736881256, - "loss_total": 0.8221100568771362, - "step": 166199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.7862499952316284, - "learning_rate": 4.565833546790723e-05, - "loss": 0.4845, - "step": 166200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.421172142028809, - "loss_rtd": 0.2712525427341461, - "loss_sent": 0.4319884777069092, - "loss_sod": 0.019644085317850113, - "loss_total": 0.7228851318359375, - "step": 166299 - }, - { - "epoch": 0.012598, - "loss_gen": 4.947851657867432, - "loss_rtd": 0.2702462375164032, - "loss_sent": 0.17605744302272797, - "loss_sod": 0.042434852570295334, - "loss_total": 0.4887385368347168, - "step": 166299 - }, - { - "epoch": 0.0126, - "grad_norm": 2.0962629318237305, - "learning_rate": 4.56267229463896e-05, - "loss": 0.5121, - "step": 166300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.406226634979248, - "loss_rtd": 0.2810554802417755, - "loss_sent": 0.18005436658859253, - "loss_sod": 0.0701325386762619, - "loss_total": 0.5312423706054688, - "step": 166399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.280158042907715, - "loss_rtd": 0.2932305932044983, - "loss_sent": 0.17078426480293274, - "loss_sod": 0.008367412723600864, - "loss_total": 0.4723822772502899, - "step": 166399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.598619818687439, - "learning_rate": 4.5595112186427465e-05, - "loss": 0.5043, - "step": 166400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.456410884857178, - "loss_rtd": 0.23915442824363708, - "loss_sent": 0.014395859092473984, - "loss_sod": 0.056887514889240265, - "loss_total": 0.31043779850006104, - "step": 166499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.590512275695801, - "loss_rtd": 0.23342490196228027, - "loss_sent": 0.08713645488023758, - "loss_sod": 0.02067759819328785, - "loss_total": 0.34123894572257996, - "step": 166499 - }, - { - "epoch": 0.013, - "grad_norm": 0.6391474604606628, - "learning_rate": 4.556350320075359e-05, - "loss": 0.4972, - "step": 166500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.363429069519043, - "loss_rtd": 0.25685250759124756, - "loss_sent": 0.41547098755836487, - "loss_sod": 0.0345754399895668, - "loss_total": 0.7068989276885986, - "step": 166599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.199674606323242, - "loss_rtd": 0.27524057030677795, - "loss_sent": 0.2515944540500641, - "loss_sod": 0.02120119519531727, - "loss_total": 0.5480362176895142, - "step": 166599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0128406286239624, - "learning_rate": 4.553189600210012e-05, - "loss": 0.5052, - "step": 166600 - }, - { - "epoch": 0.013398, - "loss_gen": 4.539990425109863, - "loss_rtd": 0.25559335947036743, - "loss_sent": 0.006525717210024595, - "loss_sod": 0.13782614469528198, - "loss_total": 0.3999452292919159, - "step": 166699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.329376220703125, - "loss_rtd": 0.27464374899864197, - "loss_sent": 0.115913987159729, - "loss_sod": 0.19408178329467773, - "loss_total": 0.5846395492553711, - "step": 166699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.9887056946754456, - "learning_rate": 4.550029060319839e-05, - "loss": 0.509, - "step": 166700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.409414291381836, - "loss_rtd": 0.28682273626327515, - "loss_sent": 0.09505373239517212, - "loss_sod": 0.03986112400889397, - "loss_total": 0.42173758149147034, - "step": 166799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.381486415863037, - "loss_rtd": 0.2774446904659271, - "loss_sent": 0.09368741512298584, - "loss_sod": 0.0027217199094593525, - "loss_total": 0.3738538324832916, - "step": 166799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.7898857593536377, - "learning_rate": 4.546868701677908e-05, - "loss": 0.49, - "step": 166800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.2110490798950195, - "loss_rtd": 0.2563081383705139, - "loss_sent": 0.2264343649148941, - "loss_sod": 0.0321497805416584, - "loss_total": 0.5148922801017761, - "step": 166899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.506667613983154, - "loss_rtd": 0.25266268849372864, - "loss_sent": 0.2526794672012329, - "loss_sod": 0.009198658168315887, - "loss_total": 0.5145407915115356, - "step": 166899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.664481520652771, - "learning_rate": 4.543708525557208e-05, - "loss": 0.4972, - "step": 166900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.3129191398620605, - "loss_rtd": 0.25302985310554504, - "loss_sent": 0.1839773803949356, - "loss_sod": 0.09276404231786728, - "loss_total": 0.5297712683677673, - "step": 166999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.687230587005615, - "loss_rtd": 0.28435543179512024, - "loss_sent": 0.20702335238456726, - "loss_sod": 0.07819317281246185, - "loss_total": 0.5695719718933105, - "step": 166999 - }, - { - "epoch": 0.014, - "grad_norm": 0.9783918857574463, - "learning_rate": 4.540548533230661e-05, - "loss": 0.508, - "step": 167000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4755215644836426, - "eval_runtime": 150.7223, - "eval_samples_per_second": 102.46, - "eval_steps_per_second": 0.803, - "step": 167000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.118136405944824, - "loss_rtd": 0.2836233675479889, - "loss_sent": 0.12645424902439117, - "loss_sod": 0.024192065000534058, - "loss_total": 0.43426966667175293, - "step": 167099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.48662805557251, - "loss_rtd": 0.24392926692962646, - "loss_sent": 0.25345009565353394, - "loss_sod": 0.09135273844003677, - "loss_total": 0.588732123374939, - "step": 167099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.121079921722412, - "learning_rate": 4.5373887259711103e-05, - "loss": 0.4988, - "step": 167100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.632580280303955, - "loss_rtd": 0.2665417492389679, - "loss_sent": 0.29506707191467285, - "loss_sod": 0.06013890355825424, - "loss_total": 0.6217477321624756, - "step": 167199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.352197647094727, - "loss_rtd": 0.27785807847976685, - "loss_sent": 0.14311152696609497, - "loss_sod": 0.026262005791068077, - "loss_total": 0.44723162055015564, - "step": 167199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.9423695802688599, - "learning_rate": 4.5342291050513254e-05, - "loss": 0.5061, - "step": 167200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.549313068389893, - "loss_rtd": 0.2749606966972351, - "loss_sent": 0.09180915355682373, - "loss_sod": 0.057210523635149, - "loss_total": 0.42398038506507874, - "step": 167299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.151215076446533, - "loss_rtd": 0.2457694560289383, - "loss_sent": 0.11216197162866592, - "loss_sod": 0.11463813483715057, - "loss_total": 0.4725695550441742, - "step": 167299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.8585280776023865, - "learning_rate": 4.531069671743999e-05, - "loss": 0.4956, - "step": 167300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.729015827178955, - "loss_rtd": 0.2535562217235565, - "loss_sent": 0.24296793341636658, - "loss_sod": 0.0640372484922409, - "loss_total": 0.5605614185333252, - "step": 167399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.214290142059326, - "loss_rtd": 0.2587656080722809, - "loss_sent": 0.09210658818483353, - "loss_sod": 0.06939534842967987, - "loss_total": 0.4202675223350525, - "step": 167399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.9528436660766602, - "learning_rate": 4.527910427321755e-05, - "loss": 0.5027, - "step": 167400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.571860313415527, - "loss_rtd": 0.2504604756832123, - "loss_sent": 0.20218954980373383, - "loss_sod": 0.03219134360551834, - "loss_total": 0.48484134674072266, - "step": 167499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.189939975738525, - "loss_rtd": 0.2828872501850128, - "loss_sent": 0.1269908845424652, - "loss_sod": 0.005901483818888664, - "loss_total": 0.41577962040901184, - "step": 167499 - }, - { - "epoch": 0.015, - "grad_norm": 0.5682773590087891, - "learning_rate": 4.524751373057132e-05, - "loss": 0.5047, - "step": 167500 - }, - { - "epoch": 0.015198, - "loss_gen": 4.9674530029296875, - "loss_rtd": 0.23083911836147308, - "loss_sent": 4.700662975665182e-05, - "loss_sod": 0.12337478995323181, - "loss_total": 0.3542608916759491, - "step": 167599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.8469929695129395, - "loss_rtd": 0.23121331632137299, - "loss_sent": 3.6799596273340285e-05, - "loss_sod": 0.16222378611564636, - "loss_total": 0.39347389340400696, - "step": 167599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.1110172271728516, - "learning_rate": 4.521592510222601e-05, - "loss": 0.4965, - "step": 167600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.174350261688232, - "loss_rtd": 0.261982798576355, - "loss_sent": 0.1439199447631836, - "loss_sod": 0.10705357044935226, - "loss_total": 0.5129563212394714, - "step": 167699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.304527282714844, - "loss_rtd": 0.2757393717765808, - "loss_sent": 0.19051194190979004, - "loss_sod": 0.043662890791893005, - "loss_total": 0.509914219379425, - "step": 167699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.111415982246399, - "learning_rate": 4.518433840090549e-05, - "loss": 0.501, - "step": 167700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.373919486999512, - "loss_rtd": 0.25809842348098755, - "loss_sent": 0.28425711393356323, - "loss_sod": 0.07867135852575302, - "loss_total": 0.621026873588562, - "step": 167799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.168500900268555, - "loss_rtd": 0.25599929690361023, - "loss_sent": 0.3453138768672943, - "loss_sod": 0.029682748019695282, - "loss_total": 0.6309959292411804, - "step": 167799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.4504157304763794, - "learning_rate": 4.51527536393329e-05, - "loss": 0.5124, - "step": 167800 - }, - { - "epoch": 0.015798, - "loss_gen": 4.251155853271484, - "loss_rtd": 0.2272103726863861, - "loss_sent": 0.0014535936061292887, - "loss_sod": 0.10316424071788788, - "loss_total": 0.33182820677757263, - "step": 167899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.352320671081543, - "loss_rtd": 0.2797969579696655, - "loss_sent": 0.24996641278266907, - "loss_sod": 0.02973112277686596, - "loss_total": 0.5594944953918457, - "step": 167899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.9157557487487793, - "learning_rate": 4.512117083023054e-05, - "loss": 0.4988, - "step": 167900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.048002243041992, - "loss_rtd": 0.22969184815883636, - "loss_sent": 0.2293103188276291, - "loss_sod": 0.019478891044855118, - "loss_total": 0.4784810543060303, - "step": 167999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.313672065734863, - "loss_rtd": 0.2619876563549042, - "loss_sent": 0.3204754590988159, - "loss_sod": 0.0028961533680558205, - "loss_total": 0.5853592753410339, - "step": 167999 - }, - { - "epoch": 0.016, - "grad_norm": 2.1524903774261475, - "learning_rate": 4.508958998632e-05, - "loss": 0.4873, - "step": 168000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4858248829841614, - "eval_runtime": 150.7798, - "eval_samples_per_second": 102.421, - "eval_steps_per_second": 0.802, - "step": 168000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.309937477111816, - "loss_rtd": 0.27981308102607727, - "loss_sent": 0.21948282420635223, - "loss_sod": 0.06391419470310211, - "loss_total": 0.563210129737854, - "step": 168099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.210428714752197, - "loss_rtd": 0.26737168431282043, - "loss_sent": 0.05904927849769592, - "loss_sod": 0.07481005787849426, - "loss_total": 0.4012310206890106, - "step": 168099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.7795582413673401, - "learning_rate": 4.505801112032202e-05, - "loss": 0.4954, - "step": 168100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.683972358703613, - "loss_rtd": 0.27294522523880005, - "loss_sent": 0.29927825927734375, - "loss_sod": 0.098269522190094, - "loss_total": 0.6704930067062378, - "step": 168199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.382657051086426, - "loss_rtd": 0.25094324350357056, - "loss_sent": 0.34905773401260376, - "loss_sod": 0.033952027559280396, - "loss_total": 0.6339529752731323, - "step": 168199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.7303627729415894, - "learning_rate": 4.502643424495658e-05, - "loss": 0.5193, - "step": 168200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.351790428161621, - "loss_rtd": 0.25140300393104553, - "loss_sent": 0.11114335060119629, - "loss_sod": 0.09140322357416153, - "loss_total": 0.45394957065582275, - "step": 168299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.532053470611572, - "loss_rtd": 0.2671525776386261, - "loss_sent": 0.24703453481197357, - "loss_sod": 0.01089945062994957, - "loss_total": 0.5250865817070007, - "step": 168299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.757623016834259, - "learning_rate": 4.499485937294282e-05, - "loss": 0.5073, - "step": 168300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.328295707702637, - "loss_rtd": 0.27258703112602234, - "loss_sent": 0.4864695966243744, - "loss_sod": 0.04786607250571251, - "loss_total": 0.8069226741790771, - "step": 168399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.371199131011963, - "loss_rtd": 0.28276970982551575, - "loss_sent": 0.24851186573505402, - "loss_sod": 0.04643501341342926, - "loss_total": 0.577716588973999, - "step": 168399 - }, - { - "epoch": 0.0168, - "grad_norm": 2.028629779815674, - "learning_rate": 4.4963286516999114e-05, - "loss": 0.4871, - "step": 168400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.465271472930908, - "loss_rtd": 0.2482798546552658, - "loss_sent": 6.206209218362346e-05, - "loss_sod": 0.22675487399101257, - "loss_total": 0.47509676218032837, - "step": 168499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.391423225402832, - "loss_rtd": 0.2677866518497467, - "loss_sent": 0.06388792395591736, - "loss_sod": 0.023668289184570312, - "loss_total": 0.3553428649902344, - "step": 168499 - }, - { - "epoch": 0.017, - "grad_norm": 1.0805388689041138, - "learning_rate": 4.4931715689843e-05, - "loss": 0.4799, - "step": 168500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.520055294036865, - "loss_rtd": 0.2823135256767273, - "loss_sent": 0.17781849205493927, - "loss_sod": 0.04104011505842209, - "loss_total": 0.5011721253395081, - "step": 168599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.64277982711792, - "loss_rtd": 0.27174386382102966, - "loss_sent": 0.39020928740501404, - "loss_sod": 0.07014597207307816, - "loss_total": 0.7320991158485413, - "step": 168599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.7376092672348022, - "learning_rate": 4.490014690419119e-05, - "loss": 0.502, - "step": 168600 - }, - { - "epoch": 0.017398, - "loss_gen": 4.833861827850342, - "loss_rtd": 0.2307732254266739, - "loss_sent": 0.0436985120177269, - "loss_sod": 0.06335127353668213, - "loss_total": 0.3378230035305023, - "step": 168699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.434229850769043, - "loss_rtd": 0.2786445617675781, - "loss_sent": 0.2816252112388611, - "loss_sod": 0.04005259647965431, - "loss_total": 0.6003223657608032, - "step": 168699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.1425764560699463, - "learning_rate": 4.4868580172759605e-05, - "loss": 0.5021, - "step": 168700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.347623348236084, - "loss_rtd": 0.25195398926734924, - "loss_sent": 0.14637000858783722, - "loss_sod": 0.009355948306620121, - "loss_total": 0.4076799750328064, - "step": 168799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.267827033996582, - "loss_rtd": 0.2708859443664551, - "loss_sent": 0.38428178429603577, - "loss_sod": 0.0482797846198082, - "loss_total": 0.7034475207328796, - "step": 168799 - }, - { - "epoch": 0.0176, - "grad_norm": 2.426145553588867, - "learning_rate": 4.483701550826331e-05, - "loss": 0.5021, - "step": 168800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.149459362030029, - "loss_rtd": 0.24636590480804443, - "loss_sent": 0.05017540231347084, - "loss_sod": 0.02266959473490715, - "loss_total": 0.31921088695526123, - "step": 168899 - }, - { - "epoch": 0.017798, - "loss_gen": 4.458242416381836, - "loss_rtd": 0.23602870106697083, - "loss_sent": 4.74040352855809e-05, - "loss_sod": 0.05188628286123276, - "loss_total": 0.287962406873703, - "step": 168899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.7804234623908997, - "learning_rate": 4.4805452923416554e-05, - "loss": 0.5093, - "step": 168900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.327387809753418, - "loss_rtd": 0.26298099756240845, - "loss_sent": 0.2780497968196869, - "loss_sod": 0.026178279891610146, - "loss_total": 0.5672090649604797, - "step": 168999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.3800129890441895, - "loss_rtd": 0.24750502407550812, - "loss_sent": 0.37032008171081543, - "loss_sod": 0.019295981153845787, - "loss_total": 0.6371210813522339, - "step": 168999 - }, - { - "epoch": 0.018, - "grad_norm": 1.1320600509643555, - "learning_rate": 4.477389243093273e-05, - "loss": 0.5065, - "step": 169000 - }, - { - "epoch": 0.018, - "eval_loss": 0.48476919531822205, - "eval_runtime": 150.7082, - "eval_samples_per_second": 102.47, - "eval_steps_per_second": 0.803, - "step": 169000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.689855575561523, - "loss_rtd": 0.27795419096946716, - "loss_sent": 0.18249726295471191, - "loss_sod": 0.07154709845781326, - "loss_total": 0.5319985151290894, - "step": 169099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.256010055541992, - "loss_rtd": 0.2706320583820343, - "loss_sent": 0.10705152153968811, - "loss_sod": 0.10184080898761749, - "loss_total": 0.4795244038105011, - "step": 169099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.9365662932395935, - "learning_rate": 4.4742334043524415e-05, - "loss": 0.4944, - "step": 169100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.54203462600708, - "loss_rtd": 0.2633820176124573, - "loss_sent": 0.17439861595630646, - "loss_sod": 0.12042141705751419, - "loss_total": 0.5582020282745361, - "step": 169199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.297516822814941, - "loss_rtd": 0.2625052034854889, - "loss_sent": 0.05075225606560707, - "loss_sod": 0.04686944559216499, - "loss_total": 0.36012691259384155, - "step": 169199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.8866571187973022, - "learning_rate": 4.471077777390331e-05, - "loss": 0.5, - "step": 169200 - }, - { - "epoch": 0.018598, - "loss_gen": 4.463126182556152, - "loss_rtd": 0.2389262318611145, - "loss_sent": 0.000489122059661895, - "loss_sod": 0.17280098795890808, - "loss_total": 0.41221633553504944, - "step": 169299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.5432891845703125, - "loss_rtd": 0.26551342010498047, - "loss_sent": 0.21926134824752808, - "loss_sod": 0.07455364614725113, - "loss_total": 0.5593284368515015, - "step": 169299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.9207517504692078, - "learning_rate": 4.46792236347803e-05, - "loss": 0.5015, - "step": 169300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.219318389892578, - "loss_rtd": 0.24935311079025269, - "loss_sent": 0.10047516226768494, - "loss_sod": 0.008159383200109005, - "loss_total": 0.357987642288208, - "step": 169399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.312473297119141, - "loss_rtd": 0.26895448565483093, - "loss_sent": 0.2097213864326477, - "loss_sod": 0.061198119074106216, - "loss_total": 0.5398739576339722, - "step": 169399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.3753437995910645, - "learning_rate": 4.464767163886536e-05, - "loss": 0.5024, - "step": 169400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.333802700042725, - "loss_rtd": 0.25046104192733765, - "loss_sent": 0.1616601198911667, - "loss_sod": 0.03301946818828583, - "loss_total": 0.44514065980911255, - "step": 169499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.260376930236816, - "loss_rtd": 0.26193368434906006, - "loss_sent": 0.22181886434555054, - "loss_sod": 0.06733492016792297, - "loss_total": 0.551087498664856, - "step": 169499 - }, - { - "epoch": 0.019, - "grad_norm": 0.9431465268135071, - "learning_rate": 4.461612179886766e-05, - "loss": 0.494, - "step": 169500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.238470077514648, - "loss_rtd": 0.27454906702041626, - "loss_sent": 0.16887615621089935, - "loss_sod": 0.09713076800107956, - "loss_total": 0.540556013584137, - "step": 169599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.788388252258301, - "loss_rtd": 0.23497284948825836, - "loss_sent": 0.031695254147052765, - "loss_sod": 0.12693434953689575, - "loss_total": 0.3936024308204651, - "step": 169599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.3158783912658691, - "learning_rate": 4.458457412749544e-05, - "loss": 0.4959, - "step": 169600 - }, - { - "epoch": 0.019398, - "loss_gen": 4.679224967956543, - "loss_rtd": 0.24525420367717743, - "loss_sent": 6.181051867315546e-05, - "loss_sod": 0.21537888050079346, - "loss_total": 0.46069490909576416, - "step": 169699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.540152549743652, - "loss_rtd": 0.24027235805988312, - "loss_sent": 0.12506097555160522, - "loss_sod": 0.14578180015087128, - "loss_total": 0.5111151337623596, - "step": 169699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.4084185361862183, - "learning_rate": 4.455302863745613e-05, - "loss": 0.513, - "step": 169700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.2467041015625, - "loss_rtd": 0.2628336250782013, - "loss_sent": 0.07544388622045517, - "loss_sod": 0.029422901570796967, - "loss_total": 0.3677004277706146, - "step": 169799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.480223178863525, - "loss_rtd": 0.26126691699028015, - "loss_sent": 0.6259580254554749, - "loss_sod": 0.08103892207145691, - "loss_total": 0.9682638645172119, - "step": 169799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.288264274597168, - "learning_rate": 4.4521485341456216e-05, - "loss": 0.5023, - "step": 169800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.5037970542907715, - "loss_rtd": 0.27007076144218445, - "loss_sent": 0.032417405396699905, - "loss_sod": 0.06645283102989197, - "loss_total": 0.3689410090446472, - "step": 169899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.211860179901123, - "loss_rtd": 0.2855651080608368, - "loss_sent": 0.0825471356511116, - "loss_sod": 0.05850375443696976, - "loss_total": 0.42661598324775696, - "step": 169899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.0605272054672241, - "learning_rate": 4.4489944252201366e-05, - "loss": 0.4976, - "step": 169900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.047159194946289, - "loss_rtd": 0.2544962763786316, - "loss_sent": 0.001588489511050284, - "loss_sod": 0.20521697402000427, - "loss_total": 0.4613017439842224, - "step": 169999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.681049823760986, - "loss_rtd": 0.23406003415584564, - "loss_sent": 0.00014909173478372395, - "loss_sod": 0.13247479498386383, - "loss_total": 0.3666839301586151, - "step": 169999 - }, - { - "epoch": 0.02, - "grad_norm": 1.2742552757263184, - "learning_rate": 4.44584053823963e-05, - "loss": 0.5199, - "step": 170000 - }, - { - "epoch": 0.02, - "eval_loss": 0.4731971323490143, - "eval_runtime": 150.8635, - "eval_samples_per_second": 102.364, - "eval_steps_per_second": 0.802, - "step": 170000 - }, - { - "epoch": 0.020198, - "loss_gen": 4.544429779052734, - "loss_rtd": 0.23796328902244568, - "loss_sent": 8.894162601791322e-05, - "loss_sod": 0.13107258081436157, - "loss_total": 0.36912479996681213, - "step": 170099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.360555648803711, - "loss_rtd": 0.281270295381546, - "loss_sent": 0.14059801399707794, - "loss_sod": 0.04083305224776268, - "loss_total": 0.46270138025283813, - "step": 170099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.9566983580589294, - "learning_rate": 4.4426868744744895e-05, - "loss": 0.5035, - "step": 170100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.065962791442871, - "loss_rtd": 0.26063400506973267, - "loss_sent": 0.04239511117339134, - "loss_sod": 0.03537307679653168, - "loss_total": 0.3384022116661072, - "step": 170199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.020777702331543, - "loss_rtd": 0.28646722435951233, - "loss_sent": 0.05823962390422821, - "loss_sod": 0.06766140460968018, - "loss_total": 0.4123682379722595, - "step": 170199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.9063208699226379, - "learning_rate": 4.439533435195009e-05, - "loss": 0.4982, - "step": 170200 - }, - { - "epoch": 0.020598, - "loss_gen": 4.676692962646484, - "loss_rtd": 0.24276001751422882, - "loss_sent": 3.965487121604383e-05, - "loss_sod": 0.06598778814077377, - "loss_total": 0.30878746509552, - "step": 170299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.554323196411133, - "loss_rtd": 0.2563796043395996, - "loss_sent": 0.5975649952888489, - "loss_sod": 0.16358384490013123, - "loss_total": 1.0175284147262573, - "step": 170299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.785658597946167, - "learning_rate": 4.436380221671393e-05, - "loss": 0.4946, - "step": 170300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.870201587677002, - "loss_rtd": 0.27057531476020813, - "loss_sent": 0.13948950171470642, - "loss_sod": 0.044493697583675385, - "loss_total": 0.45455852150917053, - "step": 170399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.158511638641357, - "loss_rtd": 0.2867234945297241, - "loss_sent": 0.05372565984725952, - "loss_sod": 0.038248710334300995, - "loss_total": 0.37869787216186523, - "step": 170399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8261120915412903, - "learning_rate": 4.433227235173757e-05, - "loss": 0.5119, - "step": 170400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.268481731414795, - "loss_rtd": 0.25351619720458984, - "loss_sent": 0.08443231880664825, - "loss_sod": 0.007908103987574577, - "loss_total": 0.34585660696029663, - "step": 170499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.773632526397705, - "loss_rtd": 0.27786916494369507, - "loss_sent": 0.052542515099048615, - "loss_sod": 0.06800629198551178, - "loss_total": 0.39841794967651367, - "step": 170499 - }, - { - "epoch": 0.021, - "grad_norm": 0.6295098066329956, - "learning_rate": 4.430074476972122e-05, - "loss": 0.4798, - "step": 170500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.5034284591674805, - "loss_rtd": 0.2871614396572113, - "loss_sent": 0.08345489948987961, - "loss_sod": 0.1271916627883911, - "loss_total": 0.4978080093860626, - "step": 170599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.310825347900391, - "loss_rtd": 0.27726760506629944, - "loss_sent": 0.20777426660060883, - "loss_sod": 0.0776047632098198, - "loss_total": 0.5626466274261475, - "step": 170599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.0804089307785034, - "learning_rate": 4.426921948336421e-05, - "loss": 0.4862, - "step": 170600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.445140838623047, - "loss_rtd": 0.2714994549751282, - "loss_sent": 0.17374204099178314, - "loss_sod": 0.029518041759729385, - "loss_total": 0.4747595191001892, - "step": 170699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.439637660980225, - "loss_rtd": 0.2526755630970001, - "loss_sent": 0.021121647208929062, - "loss_sod": 0.027989905327558517, - "loss_total": 0.3017871081829071, - "step": 170699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.5975282788276672, - "learning_rate": 4.423769650536489e-05, - "loss": 0.4932, - "step": 170700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.500450134277344, - "loss_rtd": 0.2770961821079254, - "loss_sent": 0.2747577726840973, - "loss_sod": 0.10403533279895782, - "loss_total": 0.6558892726898193, - "step": 170799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.309975624084473, - "loss_rtd": 0.2784741520881653, - "loss_sent": 0.1761515885591507, - "loss_sod": 0.04092315584421158, - "loss_total": 0.49554890394210815, - "step": 170799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.0437424182891846, - "learning_rate": 4.420617584842074e-05, - "loss": 0.5029, - "step": 170800 - }, - { - "epoch": 0.021798, - "loss_gen": 4.936534881591797, - "loss_rtd": 0.24867531657218933, - "loss_sent": 0.042488064616918564, - "loss_sod": 0.10822486132383347, - "loss_total": 0.39938825368881226, - "step": 170899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.563939094543457, - "loss_rtd": 0.25593051314353943, - "loss_sent": 0.21076306700706482, - "loss_sod": 0.04818826913833618, - "loss_total": 0.5148818492889404, - "step": 170899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.859067440032959, - "learning_rate": 4.4174657525228256e-05, - "loss": 0.4988, - "step": 170900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.604702949523926, - "loss_rtd": 0.25139927864074707, - "loss_sent": 0.0008354461169801652, - "loss_sod": 0.1703825294971466, - "loss_total": 0.42261725664138794, - "step": 170999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.240743637084961, - "loss_rtd": 0.27437034249305725, - "loss_sent": 0.2517975866794586, - "loss_sod": 0.06107909604907036, - "loss_total": 0.5872470140457153, - "step": 170999 - }, - { - "epoch": 0.022, - "grad_norm": 1.3069241046905518, - "learning_rate": 4.414314154848304e-05, - "loss": 0.4868, - "step": 171000 - }, - { - "epoch": 0.022, - "eval_loss": 0.476881742477417, - "eval_runtime": 152.174, - "eval_samples_per_second": 101.483, - "eval_steps_per_second": 0.795, - "step": 171000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.357883930206299, - "loss_rtd": 0.26406019926071167, - "loss_sent": 0.12234348803758621, - "loss_sod": 0.05356413125991821, - "loss_total": 0.4399678111076355, - "step": 171099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.282578468322754, - "loss_rtd": 0.2464466094970703, - "loss_sent": 6.460111035266891e-05, - "loss_sod": 0.19680382311344147, - "loss_total": 0.4433150291442871, - "step": 171099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.176934003829956, - "learning_rate": 4.4111627930879695e-05, - "loss": 0.4837, - "step": 171100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.41828727722168, - "loss_rtd": 0.26423409581184387, - "loss_sent": 0.1900784969329834, - "loss_sod": 0.06894370913505554, - "loss_total": 0.5232563018798828, - "step": 171199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.591996669769287, - "loss_rtd": 0.2510789930820465, - "loss_sent": 0.06349091231822968, - "loss_sod": 0.0813186913728714, - "loss_total": 0.39588862657546997, - "step": 171199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.8740753531455994, - "learning_rate": 4.408011668511192e-05, - "loss": 0.4891, - "step": 171200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.379201412200928, - "loss_rtd": 0.2552388906478882, - "loss_sent": 0.10088750720024109, - "loss_sod": 0.12585505843162537, - "loss_total": 0.48198145627975464, - "step": 171299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.958032608032227, - "loss_rtd": 0.24648171663284302, - "loss_sent": 0.025942375883460045, - "loss_sod": 0.027695482596755028, - "loss_total": 0.3001195788383484, - "step": 171299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.8427684903144836, - "learning_rate": 4.404860782387243e-05, - "loss": 0.4992, - "step": 171300 - }, - { - "epoch": 0.022798, - "loss_gen": 4.767716884613037, - "loss_rtd": 0.24949835240840912, - "loss_sent": 0.012483463622629642, - "loss_sod": 0.12245252728462219, - "loss_total": 0.3844343423843384, - "step": 171399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.250916004180908, - "loss_rtd": 0.2629123628139496, - "loss_sent": 0.1811651885509491, - "loss_sod": 0.035274289548397064, - "loss_total": 0.47935184836387634, - "step": 171399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.0522243976593018, - "learning_rate": 4.401710135985301e-05, - "loss": 0.5094, - "step": 171400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.062880516052246, - "loss_rtd": 0.24714313447475433, - "loss_sent": 0.0644451156258583, - "loss_sod": 0.09545697271823883, - "loss_total": 0.4070452153682709, - "step": 171499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.03845739364624, - "loss_rtd": 0.2493828982114792, - "loss_sent": 0.05711861327290535, - "loss_sod": 0.13982105255126953, - "loss_total": 0.44632259011268616, - "step": 171499 - }, - { - "epoch": 0.023, - "grad_norm": 1.0879409313201904, - "learning_rate": 4.398559730574443e-05, - "loss": 0.5012, - "step": 171500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.422330379486084, - "loss_rtd": 0.26781049370765686, - "loss_sent": 0.10690589994192123, - "loss_sod": 0.03012928180396557, - "loss_total": 0.4048456847667694, - "step": 171599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.345040321350098, - "loss_rtd": 0.27778422832489014, - "loss_sent": 0.19501087069511414, - "loss_sod": 0.07717195153236389, - "loss_total": 0.5499670505523682, - "step": 171599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.8802943229675293, - "learning_rate": 4.395409567423655e-05, - "loss": 0.5057, - "step": 171600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.491530895233154, - "loss_rtd": 0.2580206096172333, - "loss_sent": 0.23020225763320923, - "loss_sod": 0.06939679384231567, - "loss_total": 0.5576196908950806, - "step": 171699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.020575523376465, - "loss_rtd": 0.2541915774345398, - "loss_sent": 0.3434763252735138, - "loss_sod": 0.028169114142656326, - "loss_total": 0.6258370280265808, - "step": 171699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.243648886680603, - "learning_rate": 4.3922596478018207e-05, - "loss": 0.5004, - "step": 171700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.0843825340271, - "loss_rtd": 0.2561759352684021, - "loss_sent": 0.1246255487203598, - "loss_sod": 0.025484293699264526, - "loss_total": 0.40628576278686523, - "step": 171799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.655697345733643, - "loss_rtd": 0.24287694692611694, - "loss_sent": 0.17661504447460175, - "loss_sod": 0.05493897944688797, - "loss_total": 0.47443097829818726, - "step": 171799 - }, - { - "epoch": 0.0236, - "grad_norm": 2.0405614376068115, - "learning_rate": 4.389109972977727e-05, - "loss": 0.4722, - "step": 171800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.065279483795166, - "loss_rtd": 0.2325080782175064, - "loss_sent": 0.0006211738218553364, - "loss_sod": 0.08426440507173538, - "loss_total": 0.3173936605453491, - "step": 171899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.632493019104004, - "loss_rtd": 0.2621689736843109, - "loss_sent": 0.3588075339794159, - "loss_sod": 0.0310895387083292, - "loss_total": 0.6520660519599915, - "step": 171899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.0510684251785278, - "learning_rate": 4.385960544220064e-05, - "loss": 0.491, - "step": 171900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.561727046966553, - "loss_rtd": 0.2848459780216217, - "loss_sent": 0.513619065284729, - "loss_sod": 0.06750833243131638, - "loss_total": 0.8659733533859253, - "step": 171999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.594300746917725, - "loss_rtd": 0.25993654131889343, - "loss_sent": 0.20714308321475983, - "loss_sod": 0.029170267283916473, - "loss_total": 0.4962499141693115, - "step": 171999 - }, - { - "epoch": 0.024, - "grad_norm": 1.7951167821884155, - "learning_rate": 4.382811362797419e-05, - "loss": 0.501, - "step": 172000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4790232181549072, - "eval_runtime": 150.8946, - "eval_samples_per_second": 102.343, - "eval_steps_per_second": 0.802, - "step": 172000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.671760559082031, - "loss_rtd": 0.29092535376548767, - "loss_sent": 0.26654955744743347, - "loss_sod": 0.0813579335808754, - "loss_total": 0.6388328075408936, - "step": 172099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.701687335968018, - "loss_rtd": 0.24037468433380127, - "loss_sent": 0.363858163356781, - "loss_sod": 0.05815407633781433, - "loss_total": 0.6623868942260742, - "step": 172099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.230000615119934, - "learning_rate": 4.379662429978285e-05, - "loss": 0.505, - "step": 172100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.224126815795898, - "loss_rtd": 0.2466423064470291, - "loss_sent": 0.5247039198875427, - "loss_sod": 0.0947648212313652, - "loss_total": 0.8661110401153564, - "step": 172199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.056763172149658, - "loss_rtd": 0.25852951407432556, - "loss_sent": 7.622349949087948e-05, - "loss_sod": 0.21016332507133484, - "loss_total": 0.4687690734863281, - "step": 172199 - }, - { - "epoch": 0.0244, - "grad_norm": 2.1720242500305176, - "learning_rate": 4.376513747031048e-05, - "loss": 0.4989, - "step": 172200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.5568108558654785, - "loss_rtd": 0.266933411359787, - "loss_sent": 0.1446159929037094, - "loss_sod": 0.05012991651892662, - "loss_total": 0.4616793394088745, - "step": 172299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.212480545043945, - "loss_rtd": 0.2525312304496765, - "loss_sent": 0.08446284383535385, - "loss_sod": 0.0036936099641025066, - "loss_total": 0.34068769216537476, - "step": 172299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.7302485704421997, - "learning_rate": 4.373365315224001e-05, - "loss": 0.4953, - "step": 172300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.336203098297119, - "loss_rtd": 0.2691044509410858, - "loss_sent": 0.19347639381885529, - "loss_sod": 0.012682327069342136, - "loss_total": 0.47526317834854126, - "step": 172399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.143266677856445, - "loss_rtd": 0.2894757390022278, - "loss_sent": 0.20344778895378113, - "loss_sod": 0.054581418633461, - "loss_total": 0.5475049018859863, - "step": 172399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.216090202331543, - "learning_rate": 4.370217135825329e-05, - "loss": 0.4782, - "step": 172400 - }, - { - "epoch": 0.024998, - "loss_gen": 4.914358615875244, - "loss_rtd": 0.2621120810508728, - "loss_sent": 0.21592973172664642, - "loss_sod": 0.050652455538511276, - "loss_total": 0.5286942720413208, - "step": 172499 - }, - { - "epoch": 0.024998, - "loss_gen": 4.816646099090576, - "loss_rtd": 0.24619466066360474, - "loss_sent": 0.039638955146074295, - "loss_sod": 0.17656829953193665, - "loss_total": 0.4624019265174866, - "step": 172499 - }, - { - "epoch": 0.025, - "grad_norm": 1.1692919731140137, - "learning_rate": 4.3670692101031196e-05, - "loss": 0.4834, - "step": 172500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.635056972503662, - "loss_rtd": 0.2667495310306549, - "loss_sent": 0.14194297790527344, - "loss_sod": 0.008958159014582634, - "loss_total": 0.41765066981315613, - "step": 172599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.464961051940918, - "loss_rtd": 0.25216853618621826, - "loss_sent": 0.46573954820632935, - "loss_sod": 0.007270245812833309, - "loss_total": 0.7251783609390259, - "step": 172599 - }, - { - "epoch": 0.0252, - "grad_norm": 2.3285279273986816, - "learning_rate": 4.363921539325356e-05, - "loss": 0.5006, - "step": 172600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.531739234924316, - "loss_rtd": 0.2669811248779297, - "loss_sent": 0.37888938188552856, - "loss_sod": 0.13690614700317383, - "loss_total": 0.7827766537666321, - "step": 172699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.290124416351318, - "loss_rtd": 0.2700974643230438, - "loss_sent": 0.20026355981826782, - "loss_sod": 0.05382388457655907, - "loss_total": 0.5241849422454834, - "step": 172699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.828365445137024, - "learning_rate": 4.360774124759922e-05, - "loss": 0.501, - "step": 172700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.625349521636963, - "loss_rtd": 0.2544606924057007, - "loss_sent": 0.1294373720884323, - "loss_sod": 0.02067280374467373, - "loss_total": 0.40457087755203247, - "step": 172799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.215242862701416, - "loss_rtd": 0.2510789632797241, - "loss_sent": 0.10542890429496765, - "loss_sod": 0.06537258625030518, - "loss_total": 0.42188045382499695, - "step": 172799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.9845788478851318, - "learning_rate": 4.357626967674593e-05, - "loss": 0.4907, - "step": 172800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.444211006164551, - "loss_rtd": 0.24663518369197845, - "loss_sent": 0.15665069222450256, - "loss_sod": 0.02020771987736225, - "loss_total": 0.4234935939311981, - "step": 172899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.450448036193848, - "loss_rtd": 0.26696035265922546, - "loss_sent": 0.23775306344032288, - "loss_sod": 0.049146123230457306, - "loss_total": 0.553859531879425, - "step": 172899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.5564359426498413, - "learning_rate": 4.354480069337045e-05, - "loss": 0.4727, - "step": 172900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.63621711730957, - "loss_rtd": 0.2305292934179306, - "loss_sent": 0.02157679945230484, - "loss_sod": 0.13029345870018005, - "loss_total": 0.3823995292186737, - "step": 172999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.68080997467041, - "loss_rtd": 0.25236624479293823, - "loss_sent": 3.4659493394428864e-05, - "loss_sod": 0.10431516170501709, - "loss_total": 0.3567160665988922, - "step": 172999 - }, - { - "epoch": 0.026, - "grad_norm": 0.7032691240310669, - "learning_rate": 4.351333431014847e-05, - "loss": 0.492, - "step": 173000 - }, - { - "epoch": 0.026, - "eval_loss": 0.4772503674030304, - "eval_runtime": 150.7744, - "eval_samples_per_second": 102.425, - "eval_steps_per_second": 0.803, - "step": 173000 - }, - { - "epoch": 0.026198, - "loss_gen": 4.988316059112549, - "loss_rtd": 0.2611532509326935, - "loss_sent": 0.02122604288160801, - "loss_sod": 0.11611946672201157, - "loss_total": 0.3984987735748291, - "step": 173099 - }, - { - "epoch": 0.026198, - "loss_gen": 4.792057991027832, - "loss_rtd": 0.23634378612041473, - "loss_sent": 0.0003018920833710581, - "loss_sod": 0.08330892026424408, - "loss_total": 0.31995460391044617, - "step": 173099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.8967685699462891, - "learning_rate": 4.348187053975467e-05, - "loss": 0.5081, - "step": 173100 - }, - { - "epoch": 0.026398, - "loss_gen": 4.4217848777771, - "loss_rtd": 0.22614915668964386, - "loss_sent": 0.007760841865092516, - "loss_sod": 0.10313044488430023, - "loss_total": 0.3370404541492462, - "step": 173199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.512134552001953, - "loss_rtd": 0.2666032314300537, - "loss_sent": 0.16551974415779114, - "loss_sod": 0.03663212060928345, - "loss_total": 0.4687550961971283, - "step": 173199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.209341049194336, - "learning_rate": 4.3450409394862614e-05, - "loss": 0.4981, - "step": 173200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.36017370223999, - "loss_rtd": 0.24220968782901764, - "loss_sent": 0.19550010561943054, - "loss_sod": 0.04168718680739403, - "loss_total": 0.4793969690799713, - "step": 173299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.390031337738037, - "loss_rtd": 0.25432056188583374, - "loss_sent": 0.27294614911079407, - "loss_sod": 0.05045618116855621, - "loss_total": 0.5777229070663452, - "step": 173299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.0157606601715088, - "learning_rate": 4.341895088814489e-05, - "loss": 0.4785, - "step": 173300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.332516670227051, - "loss_rtd": 0.2581424117088318, - "loss_sent": 0.0722486823797226, - "loss_sod": 0.14761140942573547, - "loss_total": 0.47800248861312866, - "step": 173399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.863432884216309, - "loss_rtd": 0.2590324878692627, - "loss_sent": 0.06837914884090424, - "loss_sod": 0.12218590825796127, - "loss_total": 0.4495975375175476, - "step": 173399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.0803948640823364, - "learning_rate": 4.338749503227296e-05, - "loss": 0.5071, - "step": 173400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.350805759429932, - "loss_rtd": 0.25680214166641235, - "loss_sent": 0.37348273396492004, - "loss_sod": 0.018537752330303192, - "loss_total": 0.6488226652145386, - "step": 173499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.221343040466309, - "loss_rtd": 0.2781735360622406, - "loss_sent": 0.13024096190929413, - "loss_sod": 0.010937447659671307, - "loss_total": 0.4193519353866577, - "step": 173499 - }, - { - "epoch": 0.027, - "grad_norm": 0.9971233606338501, - "learning_rate": 4.335604183991723e-05, - "loss": 0.498, - "step": 173500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.203567981719971, - "loss_rtd": 0.25005918741226196, - "loss_sent": 0.07693670690059662, - "loss_sod": 0.07172539830207825, - "loss_total": 0.39872127771377563, - "step": 173599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.419286251068115, - "loss_rtd": 0.2811583876609802, - "loss_sent": 0.5652903318405151, - "loss_sod": 0.02947412058711052, - "loss_total": 0.8759227991104126, - "step": 173599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.73147714138031, - "learning_rate": 4.332459132374707e-05, - "loss": 0.5077, - "step": 173600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.219594955444336, - "loss_rtd": 0.28303781151771545, - "loss_sent": 0.08994757384061813, - "loss_sod": 0.046749409288167953, - "loss_total": 0.41973480582237244, - "step": 173699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.612659931182861, - "loss_rtd": 0.27426472306251526, - "loss_sent": 0.22041532397270203, - "loss_sod": 0.026184238493442535, - "loss_total": 0.5208642482757568, - "step": 173699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.706559956073761, - "learning_rate": 4.32931434964307e-05, - "loss": 0.5095, - "step": 173700 - }, - { - "epoch": 0.027598, - "loss_gen": 4.5038838386535645, - "loss_rtd": 0.22788257896900177, - "loss_sent": 0.02189287357032299, - "loss_sod": 0.0732315331697464, - "loss_total": 0.3230069875717163, - "step": 173799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.356247425079346, - "loss_rtd": 0.24853888154029846, - "loss_sent": 0.10135073214769363, - "loss_sod": 0.030710332095623016, - "loss_total": 0.3805999457836151, - "step": 173799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8105595707893372, - "learning_rate": 4.3261698370635354e-05, - "loss": 0.5031, - "step": 173800 - }, - { - "epoch": 0.027798, - "loss_gen": 4.5633416175842285, - "loss_rtd": 0.23870515823364258, - "loss_sent": 0.006563057191669941, - "loss_sod": 0.06918448954820633, - "loss_total": 0.3144527077674866, - "step": 173899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.257259845733643, - "loss_rtd": 0.27255484461784363, - "loss_sent": 0.5081937313079834, - "loss_sod": 0.013570001348853111, - "loss_total": 0.7943185567855835, - "step": 173899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.7526987791061401, - "learning_rate": 4.3230255959027076e-05, - "loss": 0.5088, - "step": 173900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.683145999908447, - "loss_rtd": 0.25262314081192017, - "loss_sent": 0.12697406113147736, - "loss_sod": 0.10950789600610733, - "loss_total": 0.48910510540008545, - "step": 173999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.555135250091553, - "loss_rtd": 0.2704862654209137, - "loss_sent": 0.12093016505241394, - "loss_sod": 0.045832931995391846, - "loss_total": 0.4372493624687195, - "step": 173999 - }, - { - "epoch": 0.028, - "grad_norm": 0.8709926009178162, - "learning_rate": 4.31988162742709e-05, - "loss": 0.4925, - "step": 174000 - }, - { - "epoch": 0.028, - "eval_loss": 0.47378090023994446, - "eval_runtime": 151.0282, - "eval_samples_per_second": 102.252, - "eval_steps_per_second": 0.801, - "step": 174000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.1633710861206055, - "loss_rtd": 0.28782790899276733, - "loss_sent": 0.2325538694858551, - "loss_sod": 0.07180501520633698, - "loss_total": 0.5921868085861206, - "step": 174099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.622278690338135, - "loss_rtd": 0.26133668422698975, - "loss_sent": 0.16414345800876617, - "loss_sod": 0.04548267275094986, - "loss_total": 0.4709628224372864, - "step": 174099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.924146831035614, - "learning_rate": 4.316737932903071e-05, - "loss": 0.4921, - "step": 174100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.44661808013916, - "loss_rtd": 0.24095293879508972, - "loss_sent": 0.2738695740699768, - "loss_sod": 0.07141793519258499, - "loss_total": 0.5862404108047485, - "step": 174199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.276996612548828, - "loss_rtd": 0.2431168407201767, - "loss_sent": 0.2242770493030548, - "loss_sod": 0.017125261947512627, - "loss_total": 0.4845191538333893, - "step": 174199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.4760338068008423, - "learning_rate": 4.313594513596932e-05, - "loss": 0.514, - "step": 174200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.371870994567871, - "loss_rtd": 0.2607460021972656, - "loss_sent": 0.16423146426677704, - "loss_sod": 0.030310600996017456, - "loss_total": 0.4552880525588989, - "step": 174299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.095975875854492, - "loss_rtd": 0.23831218481063843, - "loss_sent": 0.011028303764760494, - "loss_sod": 0.10951460897922516, - "loss_total": 0.35885506868362427, - "step": 174299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.9950005412101746, - "learning_rate": 4.31045137077484e-05, - "loss": 0.4863, - "step": 174300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.386153221130371, - "loss_rtd": 0.2543492317199707, - "loss_sent": 0.08969032764434814, - "loss_sod": 0.12441924959421158, - "loss_total": 0.46845880150794983, - "step": 174399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.415390968322754, - "loss_rtd": 0.2741059958934784, - "loss_sent": 0.18275922536849976, - "loss_sod": 0.10143411159515381, - "loss_total": 0.5582993030548096, - "step": 174399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.3448522090911865, - "learning_rate": 4.307308505702853e-05, - "loss": 0.508, - "step": 174400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.306718349456787, - "loss_rtd": 0.2631789445877075, - "loss_sent": 0.18015454709529877, - "loss_sod": 0.025235353037714958, - "loss_total": 0.4685688614845276, - "step": 174499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.371037006378174, - "loss_rtd": 0.2609129250049591, - "loss_sent": 0.03724387660622597, - "loss_sod": 0.019226595759391785, - "loss_total": 0.31738337874412537, - "step": 174499 - }, - { - "epoch": 0.029, - "grad_norm": 1.099623680114746, - "learning_rate": 4.3041659196469176e-05, - "loss": 0.482, - "step": 174500 - }, - { - "epoch": 0.029198, - "loss_gen": 4.8854498863220215, - "loss_rtd": 0.2525550127029419, - "loss_sent": 0.03448490798473358, - "loss_sod": 0.08429908752441406, - "loss_total": 0.37133902311325073, - "step": 174599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.215135097503662, - "loss_rtd": 0.2650979161262512, - "loss_sent": 0.05643727630376816, - "loss_sod": 0.13042433559894562, - "loss_total": 0.4519595205783844, - "step": 174599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.1190494298934937, - "learning_rate": 4.301023613872867e-05, - "loss": 0.5, - "step": 174600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.834264278411865, - "loss_rtd": 0.24064302444458008, - "loss_sent": 0.00012111241812817752, - "loss_sod": 0.18033990263938904, - "loss_total": 0.4211040437221527, - "step": 174699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.916804313659668, - "loss_rtd": 0.23712149262428284, - "loss_sent": 0.11205428093671799, - "loss_sod": 0.05833221226930618, - "loss_total": 0.407507985830307, - "step": 174699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.8764186501502991, - "learning_rate": 4.2978815896464195e-05, - "loss": 0.4956, - "step": 174700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.5481743812561035, - "loss_rtd": 0.2715030908584595, - "loss_sent": 0.4839355945587158, - "loss_sod": 0.13712286949157715, - "loss_total": 0.8925615549087524, - "step": 174799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.568472385406494, - "loss_rtd": 0.26322945952415466, - "loss_sent": 0.2807634174823761, - "loss_sod": 0.057357851415872574, - "loss_total": 0.601350724697113, - "step": 174799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.5966711044311523, - "learning_rate": 4.2947398482331856e-05, - "loss": 0.5002, - "step": 174800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.3092041015625, - "loss_rtd": 0.27356022596359253, - "loss_sent": 0.11498356610536575, - "loss_sod": 0.05010610073804855, - "loss_total": 0.43864989280700684, - "step": 174899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.309630870819092, - "loss_rtd": 0.2655850350856781, - "loss_sent": 0.03646130859851837, - "loss_sod": 0.07756256312131882, - "loss_total": 0.3796089291572571, - "step": 174899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.6131798028945923, - "learning_rate": 4.291598390898657e-05, - "loss": 0.4947, - "step": 174900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.312253475189209, - "loss_rtd": 0.25848621129989624, - "loss_sent": 0.141913041472435, - "loss_sod": 0.030754581093788147, - "loss_total": 0.4311538338661194, - "step": 174999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.19513463973999, - "loss_rtd": 0.24765127897262573, - "loss_sent": 0.46720418334007263, - "loss_sod": 0.04283073544502258, - "loss_total": 0.757686197757721, - "step": 174999 - }, - { - "epoch": 0.03, - "grad_norm": 1.4432408809661865, - "learning_rate": 4.28845721890821e-05, - "loss": 0.509, - "step": 175000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4798588752746582, - "eval_runtime": 150.7622, - "eval_samples_per_second": 102.433, - "eval_steps_per_second": 0.803, - "step": 175000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.340457439422607, - "loss_rtd": 0.27221396565437317, - "loss_sent": 0.3090207278728485, - "loss_sod": 0.023055512458086014, - "loss_total": 0.604290246963501, - "step": 175099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.338098049163818, - "loss_rtd": 0.26297610998153687, - "loss_sent": 0.11111314594745636, - "loss_sod": 0.07260318845510483, - "loss_total": 0.44669246673583984, - "step": 175099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.9103385210037231, - "learning_rate": 4.2853163335271115e-05, - "loss": 0.5014, - "step": 175100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.2835564613342285, - "loss_rtd": 0.25003665685653687, - "loss_sent": 0.21480363607406616, - "loss_sod": 0.055279094725847244, - "loss_total": 0.5201194286346436, - "step": 175199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.523531436920166, - "loss_rtd": 0.2706020474433899, - "loss_sent": 0.15565568208694458, - "loss_sod": 0.01964074932038784, - "loss_total": 0.44589847326278687, - "step": 175199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.6668602824211121, - "learning_rate": 4.282175736020507e-05, - "loss": 0.4832, - "step": 175200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.030451774597168, - "loss_rtd": 0.2588375210762024, - "loss_sent": 0.01697034016251564, - "loss_sod": 0.13009503483772278, - "loss_total": 0.4059028923511505, - "step": 175299 - }, - { - "epoch": 0.030598, - "loss_gen": 4.95477819442749, - "loss_rtd": 0.24549439549446106, - "loss_sent": 0.00011641019955277443, - "loss_sod": 0.15176764130592346, - "loss_total": 0.39737844467163086, - "step": 175299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.9259080290794373, - "learning_rate": 4.279035427653431e-05, - "loss": 0.5048, - "step": 175300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.685121059417725, - "loss_rtd": 0.26787543296813965, - "loss_sent": 0.08151499927043915, - "loss_sod": 0.04920577257871628, - "loss_total": 0.39859622716903687, - "step": 175399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.650103569030762, - "loss_rtd": 0.2859199345111847, - "loss_sent": 0.11438175290822983, - "loss_sod": 0.14111463725566864, - "loss_total": 0.5414162874221802, - "step": 175399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.007842779159546, - "learning_rate": 4.275895409690798e-05, - "loss": 0.4863, - "step": 175400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.223883628845215, - "loss_rtd": 0.24045738577842712, - "loss_sent": 0.18959611654281616, - "loss_sod": 0.03346420079469681, - "loss_total": 0.4635176956653595, - "step": 175499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.113461017608643, - "loss_rtd": 0.251642107963562, - "loss_sent": 0.15249517560005188, - "loss_sod": 0.10661154985427856, - "loss_total": 0.5107488632202148, - "step": 175499 - }, - { - "epoch": 0.031, - "grad_norm": 1.7224534749984741, - "learning_rate": 4.272755683397408e-05, - "loss": 0.5096, - "step": 175500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.483402252197266, - "loss_rtd": 0.2544477581977844, - "loss_sent": 0.15212436020374298, - "loss_sod": 0.18544356524944305, - "loss_total": 0.5920156836509705, - "step": 175599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.230832099914551, - "loss_rtd": 0.2887153923511505, - "loss_sent": 0.17754162847995758, - "loss_sod": 0.0485423281788826, - "loss_total": 0.5147993564605713, - "step": 175599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.8059684038162231, - "learning_rate": 4.269616250037941e-05, - "loss": 0.5049, - "step": 175600 - }, - { - "epoch": 0.031398, - "loss_gen": 4.94041109085083, - "loss_rtd": 0.2362675815820694, - "loss_sent": 3.4851342206820846e-05, - "loss_sod": 0.13205501437187195, - "loss_total": 0.36835744976997375, - "step": 175699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.082764148712158, - "loss_rtd": 0.24880072474479675, - "loss_sent": 5.622784374281764e-05, - "loss_sod": 0.10536079853773117, - "loss_total": 0.3542177677154541, - "step": 175699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.0555180311203003, - "learning_rate": 4.266477110876963e-05, - "loss": 0.4887, - "step": 175700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.065995693206787, - "loss_rtd": 0.26552313566207886, - "loss_sent": 0.29504725337028503, - "loss_sod": 0.050535283982753754, - "loss_total": 0.6111056804656982, - "step": 175799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.396198749542236, - "loss_rtd": 0.2697710692882538, - "loss_sent": 0.15708176791667938, - "loss_sod": 0.04377313703298569, - "loss_total": 0.47062599658966064, - "step": 175799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.0791652202606201, - "learning_rate": 4.2633382671789164e-05, - "loss": 0.5038, - "step": 175800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.311132907867432, - "loss_rtd": 0.24325776100158691, - "loss_sent": 0.19141052663326263, - "loss_sod": 0.13933482766151428, - "loss_total": 0.5740031003952026, - "step": 175899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.138039588928223, - "loss_rtd": 0.2622635066509247, - "loss_sent": 0.12972286343574524, - "loss_sod": 0.047259338200092316, - "loss_total": 0.43924570083618164, - "step": 175899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.4694644212722778, - "learning_rate": 4.260199720208126e-05, - "loss": 0.5041, - "step": 175900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.145990371704102, - "loss_rtd": 0.2639693319797516, - "loss_sent": 0.07845783233642578, - "loss_sod": 0.02447548508644104, - "loss_total": 0.3669026494026184, - "step": 175999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.343972682952881, - "loss_rtd": 0.24482199549674988, - "loss_sent": 0.45106545090675354, - "loss_sod": 0.027471786364912987, - "loss_total": 0.723359227180481, - "step": 175999 - }, - { - "epoch": 0.032, - "grad_norm": 1.1580651998519897, - "learning_rate": 4.257061471228802e-05, - "loss": 0.4906, - "step": 176000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4756234884262085, - "eval_runtime": 151.418, - "eval_samples_per_second": 101.989, - "eval_steps_per_second": 0.799, - "step": 176000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.3917460441589355, - "loss_rtd": 0.26709726452827454, - "loss_sent": 0.3204467296600342, - "loss_sod": 0.061376411467790604, - "loss_total": 0.6489204168319702, - "step": 176099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.494623184204102, - "loss_rtd": 0.2769303023815155, - "loss_sent": 0.4546057879924774, - "loss_sod": 0.07944586873054504, - "loss_total": 0.8109819889068604, - "step": 176099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.1192471981048584, - "learning_rate": 4.2539235215050264e-05, - "loss": 0.5013, - "step": 176100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.089158058166504, - "loss_rtd": 0.2508448660373688, - "loss_sent": 0.06241251528263092, - "loss_sod": 0.1598670482635498, - "loss_total": 0.4731244444847107, - "step": 176199 - }, - { - "epoch": 0.000398, - "loss_gen": 4.992786884307861, - "loss_rtd": 0.26904919743537903, - "loss_sent": 0.1746479719877243, - "loss_sod": 0.05742231011390686, - "loss_total": 0.5011194944381714, - "step": 176199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.2808656692504883, - "learning_rate": 4.2507858723007685e-05, - "loss": 0.4925, - "step": 176200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.298864364624023, - "loss_rtd": 0.25586748123168945, - "loss_sent": 0.19043675065040588, - "loss_sod": 0.036146149039268494, - "loss_total": 0.482450395822525, - "step": 176299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.539154052734375, - "loss_rtd": 0.2653588652610779, - "loss_sent": 0.1710675209760666, - "loss_sod": 0.01716458611190319, - "loss_total": 0.453590989112854, - "step": 176299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.7630979418754578, - "learning_rate": 4.2476485248798714e-05, - "loss": 0.5024, - "step": 176300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.059012413024902, - "loss_rtd": 0.24951209127902985, - "loss_sent": 0.0003721616230905056, - "loss_sod": 0.07661294937133789, - "loss_total": 0.3264971971511841, - "step": 176399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.338247776031494, - "loss_rtd": 0.2792041599750519, - "loss_sent": 0.3772808909416199, - "loss_sod": 0.036255113780498505, - "loss_total": 0.6927402019500732, - "step": 176399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.6497650146484375, - "learning_rate": 4.2445114805060584e-05, - "loss": 0.4829, - "step": 176400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.673123836517334, - "loss_rtd": 0.23918534815311432, - "loss_sent": 0.022573119029402733, - "loss_sod": 0.06347043067216873, - "loss_total": 0.32522889971733093, - "step": 176499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.806493759155273, - "loss_rtd": 0.25529617071151733, - "loss_sent": 0.004772007931023836, - "loss_sod": 0.13478165864944458, - "loss_total": 0.39484983682632446, - "step": 176499 - }, - { - "epoch": 0.001, - "grad_norm": 0.7160961031913757, - "learning_rate": 4.24137474044293e-05, - "loss": 0.5093, - "step": 176500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.41826868057251, - "loss_rtd": 0.2571258544921875, - "loss_sent": 0.18246565759181976, - "loss_sod": 0.05885126441717148, - "loss_total": 0.49844276905059814, - "step": 176599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.055743217468262, - "loss_rtd": 0.2348114401102066, - "loss_sent": 0.03232239559292793, - "loss_sod": 0.0971403568983078, - "loss_total": 0.36427420377731323, - "step": 176599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.7421179413795471, - "learning_rate": 4.238238305953966e-05, - "loss": 0.4903, - "step": 176600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.529196739196777, - "loss_rtd": 0.27083462476730347, - "loss_sent": 0.2703145146369934, - "loss_sod": 0.023448172956705093, - "loss_total": 0.5645973086357117, - "step": 176699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.448575496673584, - "loss_rtd": 0.2592054307460785, - "loss_sent": 0.14654381573200226, - "loss_sod": 0.008700167760252953, - "loss_total": 0.41444939374923706, - "step": 176699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.1424907445907593, - "learning_rate": 4.235102178302522e-05, - "loss": 0.5184, - "step": 176700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.341691493988037, - "loss_rtd": 0.2590598464012146, - "loss_sent": 0.04613998532295227, - "loss_sod": 0.14152613282203674, - "loss_total": 0.4467259645462036, - "step": 176799 - }, - { - "epoch": 0.001598, - "loss_gen": 4.94687032699585, - "loss_rtd": 0.23710812628269196, - "loss_sent": 3.762466803891584e-05, - "loss_sod": 0.17916914820671082, - "loss_total": 0.41631489992141724, - "step": 176799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.1884740591049194, - "learning_rate": 4.2319663587518274e-05, - "loss": 0.4724, - "step": 176800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.708977699279785, - "loss_rtd": 0.2611987888813019, - "loss_sent": 0.2741192579269409, - "loss_sod": 0.08952027559280396, - "loss_total": 0.6248383522033691, - "step": 176899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.4857025146484375, - "loss_rtd": 0.2508212924003601, - "loss_sent": 0.44258952140808105, - "loss_sod": 0.026850054040551186, - "loss_total": 0.7202608585357666, - "step": 176899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.1517118215560913, - "learning_rate": 4.228830848564993e-05, - "loss": 0.503, - "step": 176900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.066939353942871, - "loss_rtd": 0.2523457109928131, - "loss_sent": 0.07493351399898529, - "loss_sod": 0.03503037989139557, - "loss_total": 0.36230963468551636, - "step": 176999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.708009719848633, - "loss_rtd": 0.28032973408699036, - "loss_sent": 0.11090180277824402, - "loss_sod": 0.010316354222595692, - "loss_total": 0.4015478789806366, - "step": 176999 - }, - { - "epoch": 0.002, - "grad_norm": 0.9930636286735535, - "learning_rate": 4.225695649005e-05, - "loss": 0.4841, - "step": 177000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4668843448162079, - "eval_runtime": 154.4239, - "eval_samples_per_second": 100.004, - "eval_steps_per_second": 0.784, - "step": 177000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.652185440063477, - "loss_rtd": 0.290635347366333, - "loss_sent": 0.18285208940505981, - "loss_sod": 0.18549787998199463, - "loss_total": 0.6589853167533875, - "step": 177099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.1103973388671875, - "loss_rtd": 0.25475525856018066, - "loss_sent": 0.011581259779632092, - "loss_sod": 0.18181636929512024, - "loss_total": 0.44815289974212646, - "step": 177099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.280313491821289, - "learning_rate": 4.222560761334708e-05, - "loss": 0.4888, - "step": 177100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.472076416015625, - "loss_rtd": 0.268470823764801, - "loss_sent": 0.14005210995674133, - "loss_sod": 0.03243196755647659, - "loss_total": 0.44095489382743835, - "step": 177199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.537106513977051, - "loss_rtd": 0.2637845575809479, - "loss_sent": 0.20055530965328217, - "loss_sod": 0.010856792330741882, - "loss_total": 0.4751966595649719, - "step": 177199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.9776031970977783, - "learning_rate": 4.219426186816847e-05, - "loss": 0.4898, - "step": 177200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.7137532234191895, - "loss_rtd": 0.23269148170948029, - "loss_sent": 0.0030476772226393223, - "loss_sod": 0.08300338685512543, - "loss_total": 0.3187425434589386, - "step": 177299 - }, - { - "epoch": 0.002598, - "loss_gen": 4.946521282196045, - "loss_rtd": 0.2618849575519562, - "loss_sent": 0.0312032587826252, - "loss_sod": 0.06359866261482239, - "loss_total": 0.35668689012527466, - "step": 177299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.864230215549469, - "learning_rate": 4.2162919267140254e-05, - "loss": 0.4938, - "step": 177300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.496953010559082, - "loss_rtd": 0.26852214336395264, - "loss_sent": 0.1591894030570984, - "loss_sod": 0.07635277509689331, - "loss_total": 0.5040643215179443, - "step": 177399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.572754859924316, - "loss_rtd": 0.25595274567604065, - "loss_sent": 0.16189433634281158, - "loss_sod": 0.06928539276123047, - "loss_total": 0.4871324896812439, - "step": 177399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.822463035583496, - "learning_rate": 4.2131579822887213e-05, - "loss": 0.5024, - "step": 177400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.361823081970215, - "loss_rtd": 0.2623514235019684, - "loss_sent": 0.16954083740711212, - "loss_sod": 0.05207022279500961, - "loss_total": 0.4839624762535095, - "step": 177499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.58965539932251, - "loss_rtd": 0.25201404094696045, - "loss_sent": 0.08452600985765457, - "loss_sod": 0.13741786777973175, - "loss_total": 0.473957896232605, - "step": 177499 - }, - { - "epoch": 0.003, - "grad_norm": 0.725857138633728, - "learning_rate": 4.210024354803288e-05, - "loss": 0.5007, - "step": 177500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.413411617279053, - "loss_rtd": 0.24122172594070435, - "loss_sent": 0.2514476776123047, - "loss_sod": 0.04914465546607971, - "loss_total": 0.5418140888214111, - "step": 177599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.256341934204102, - "loss_rtd": 0.28153109550476074, - "loss_sent": 0.21093186736106873, - "loss_sod": 0.0345311164855957, - "loss_total": 0.5269941091537476, - "step": 177599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.6663022041320801, - "learning_rate": 4.2068910455199504e-05, - "loss": 0.5017, - "step": 177600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.281067848205566, - "loss_rtd": 0.260381817817688, - "loss_sent": 0.28563469648361206, - "loss_sod": 0.024251481518149376, - "loss_total": 0.5702680349349976, - "step": 177699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.571688652038574, - "loss_rtd": 0.28116393089294434, - "loss_sent": 0.22319813072681427, - "loss_sod": 0.035796672105789185, - "loss_total": 0.540158748626709, - "step": 177699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.2563155889511108, - "learning_rate": 4.203758055700806e-05, - "loss": 0.5026, - "step": 177700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.343420505523682, - "loss_rtd": 0.2577490508556366, - "loss_sent": 0.10741659998893738, - "loss_sod": 0.02538968436419964, - "loss_total": 0.39055532217025757, - "step": 177799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.449235439300537, - "loss_rtd": 0.2673318684101105, - "loss_sent": 0.1105814129114151, - "loss_sod": 0.016446208581328392, - "loss_total": 0.3943594694137573, - "step": 177799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.7080693244934082, - "learning_rate": 4.2006253866078194e-05, - "loss": 0.4801, - "step": 177800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.616039752960205, - "loss_rtd": 0.2546175420284271, - "loss_sent": 0.1314704418182373, - "loss_sod": 0.04876062646508217, - "loss_total": 0.4348486065864563, - "step": 177899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.463183403015137, - "loss_rtd": 0.2529369294643402, - "loss_sent": 0.17607368528842926, - "loss_sod": 0.08975831419229507, - "loss_total": 0.5187689065933228, - "step": 177899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.1900516748428345, - "learning_rate": 4.197493039502833e-05, - "loss": 0.5193, - "step": 177900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.278634548187256, - "loss_rtd": 0.2694304883480072, - "loss_sent": 0.22764815390110016, - "loss_sod": 0.05485042184591293, - "loss_total": 0.5519290566444397, - "step": 177999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.351944446563721, - "loss_rtd": 0.26608723402023315, - "loss_sent": 0.30641230940818787, - "loss_sod": 0.0295640267431736, - "loss_total": 0.6020635366439819, - "step": 177999 - }, - { - "epoch": 0.004, - "grad_norm": 1.2334996461868286, - "learning_rate": 4.1943610156475544e-05, - "loss": 0.4883, - "step": 178000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4764252007007599, - "eval_runtime": 151.2918, - "eval_samples_per_second": 102.074, - "eval_steps_per_second": 0.8, - "step": 178000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.586325645446777, - "loss_rtd": 0.27039051055908203, - "loss_sent": 0.17318874597549438, - "loss_sod": 0.05760243162512779, - "loss_total": 0.5011817216873169, - "step": 178099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.69460916519165, - "loss_rtd": 0.2655748128890991, - "loss_sent": 0.11303507536649704, - "loss_sod": 0.039691053330898285, - "loss_total": 0.41830095648765564, - "step": 178099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2535382509231567, - "learning_rate": 4.191229316303561e-05, - "loss": 0.4997, - "step": 178100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.459695339202881, - "loss_rtd": 0.2589976489543915, - "loss_sent": 0.21007460355758667, - "loss_sod": 0.08964826166629791, - "loss_total": 0.5587205290794373, - "step": 178199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.936141014099121, - "loss_rtd": 0.23948220908641815, - "loss_sent": 0.07174783200025558, - "loss_sod": 0.052694909274578094, - "loss_total": 0.36392495036125183, - "step": 178199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.7968523502349854, - "learning_rate": 4.1880979427323037e-05, - "loss": 0.4961, - "step": 178200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.5288987159729, - "loss_rtd": 0.24514004588127136, - "loss_sent": 0.14824345707893372, - "loss_sod": 0.07209758460521698, - "loss_total": 0.46548107266426086, - "step": 178299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.431944370269775, - "loss_rtd": 0.27769768238067627, - "loss_sent": 0.2189095914363861, - "loss_sod": 0.1527351289987564, - "loss_total": 0.64934241771698, - "step": 178299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.2251418828964233, - "learning_rate": 4.1849668961950964e-05, - "loss": 0.503, - "step": 178300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.546605110168457, - "loss_rtd": 0.27432477474212646, - "loss_sent": 0.32855531573295593, - "loss_sod": 0.06549499183893204, - "loss_total": 0.6683750748634338, - "step": 178399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.0015130043029785, - "loss_rtd": 0.2546570897102356, - "loss_sent": 0.026251059025526047, - "loss_sod": 0.07344754040241241, - "loss_total": 0.35435566306114197, - "step": 178399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.289629340171814, - "learning_rate": 4.181836177953127e-05, - "loss": 0.5061, - "step": 178400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.554134845733643, - "loss_rtd": 0.2642400860786438, - "loss_sent": 0.5396769642829895, - "loss_sod": 0.053843654692173004, - "loss_total": 0.8577606678009033, - "step": 178499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.628774166107178, - "loss_rtd": 0.26725253462791443, - "loss_sent": 0.05701502412557602, - "loss_sod": 0.04202532395720482, - "loss_total": 0.36629289388656616, - "step": 178499 - }, - { - "epoch": 0.005, - "grad_norm": 1.825698971748352, - "learning_rate": 4.1787057892674465e-05, - "loss": 0.4892, - "step": 178500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.714921474456787, - "loss_rtd": 0.24926084280014038, - "loss_sent": 0.45414772629737854, - "loss_sod": 0.06611594557762146, - "loss_total": 0.7695245146751404, - "step": 178599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.350859642028809, - "loss_rtd": 0.26421958208084106, - "loss_sent": 0.21245747804641724, - "loss_sod": 0.06240715831518173, - "loss_total": 0.5390841960906982, - "step": 178599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.542447805404663, - "learning_rate": 4.175575731398977e-05, - "loss": 0.4853, - "step": 178600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.297928810119629, - "loss_rtd": 0.26276102662086487, - "loss_sent": 0.2245301455259323, - "loss_sod": 0.006228235084563494, - "loss_total": 0.4935194253921509, - "step": 178699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.591362953186035, - "loss_rtd": 0.26682981848716736, - "loss_sent": 0.04091120511293411, - "loss_sod": 0.14690172672271729, - "loss_total": 0.45464274287223816, - "step": 178699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.9810351133346558, - "learning_rate": 4.172446005608503e-05, - "loss": 0.483, - "step": 178700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.2232160568237305, - "loss_rtd": 0.251579225063324, - "loss_sent": 0.10386347025632858, - "loss_sod": 0.07500104606151581, - "loss_total": 0.43044376373291016, - "step": 178799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.377779960632324, - "loss_rtd": 0.2628142535686493, - "loss_sent": 0.27205297350883484, - "loss_sod": 0.044300567358732224, - "loss_total": 0.5791677832603455, - "step": 178799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.4532432556152344, - "learning_rate": 4.1693166131566805e-05, - "loss": 0.5065, - "step": 178800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.490054607391357, - "loss_rtd": 0.26628002524375916, - "loss_sent": 0.09744316339492798, - "loss_sod": 0.022234242409467697, - "loss_total": 0.38595741987228394, - "step": 178899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.5699782371521, - "loss_rtd": 0.2603760361671448, - "loss_sent": 0.34774866700172424, - "loss_sod": 0.18945448100566864, - "loss_total": 0.7975791692733765, - "step": 178899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.9477487206459045, - "learning_rate": 4.166187555304025e-05, - "loss": 0.4933, - "step": 178900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.482988357543945, - "loss_rtd": 0.23788413405418396, - "loss_sent": 0.059080567210912704, - "loss_sod": 0.049302082508802414, - "loss_total": 0.3462667763233185, - "step": 178999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.442384243011475, - "loss_rtd": 0.26769599318504333, - "loss_sent": 0.227765753865242, - "loss_sod": 0.038329411298036575, - "loss_total": 0.533791184425354, - "step": 178999 - }, - { - "epoch": 0.006, - "grad_norm": 2.0118491649627686, - "learning_rate": 4.163058833310925e-05, - "loss": 0.5041, - "step": 179000 - }, - { - "epoch": 0.006, - "eval_loss": 0.47466304898262024, - "eval_runtime": 151.4685, - "eval_samples_per_second": 101.955, - "eval_steps_per_second": 0.799, - "step": 179000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.188037872314453, - "loss_rtd": 0.2617184519767761, - "loss_sent": 0.12243036925792694, - "loss_sod": 0.03435511887073517, - "loss_total": 0.41850394010543823, - "step": 179099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.017541885375977, - "loss_rtd": 0.24198447167873383, - "loss_sent": 0.055074840784072876, - "loss_sod": 0.06318394839763641, - "loss_total": 0.3602432608604431, - "step": 179099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.8135080337524414, - "learning_rate": 4.159930448437624e-05, - "loss": 0.4773, - "step": 179100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.485675811767578, - "loss_rtd": 0.2606898844242096, - "loss_sent": 0.23423993587493896, - "loss_sod": 0.040725454688072205, - "loss_total": 0.5356552600860596, - "step": 179199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.839269638061523, - "loss_rtd": 0.26704105734825134, - "loss_sent": 0.11773671954870224, - "loss_sod": 0.1408788114786148, - "loss_total": 0.5256565809249878, - "step": 179199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.3560131788253784, - "learning_rate": 4.15680240194424e-05, - "loss": 0.5046, - "step": 179200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.47376012802124, - "loss_rtd": 0.25602632761001587, - "loss_sent": 0.30784136056900024, - "loss_sod": 0.0686689242720604, - "loss_total": 0.6325366497039795, - "step": 179299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.46110200881958, - "loss_rtd": 0.2672787606716156, - "loss_sent": 0.20248740911483765, - "loss_sod": 0.035606883466243744, - "loss_total": 0.5053730607032776, - "step": 179299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.325631022453308, - "learning_rate": 4.153674695090746e-05, - "loss": 0.4956, - "step": 179300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.34942102432251, - "loss_rtd": 0.27311182022094727, - "loss_sent": 0.23604953289031982, - "loss_sod": 0.011576816439628601, - "loss_total": 0.5207381844520569, - "step": 179399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.549720287322998, - "loss_rtd": 0.27027398347854614, - "loss_sent": 0.0977923646569252, - "loss_sod": 0.10007601231336594, - "loss_total": 0.4681423604488373, - "step": 179399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.9811376929283142, - "learning_rate": 4.150547329136985e-05, - "loss": 0.5003, - "step": 179400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.362544536590576, - "loss_rtd": 0.26804307103157043, - "loss_sent": 0.06654703617095947, - "loss_sod": 0.03614526987075806, - "loss_total": 0.37073537707328796, - "step": 179499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.273104667663574, - "loss_rtd": 0.24291948974132538, - "loss_sent": 0.16729098558425903, - "loss_sod": 0.005854018032550812, - "loss_total": 0.4160645008087158, - "step": 179499 - }, - { - "epoch": 0.007, - "grad_norm": 0.7588452100753784, - "learning_rate": 4.147420305342659e-05, - "loss": 0.5043, - "step": 179500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.419837951660156, - "loss_rtd": 0.2564307749271393, - "loss_sent": 0.14678673446178436, - "loss_sod": 0.06157728284597397, - "loss_total": 0.4647948145866394, - "step": 179599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.458054542541504, - "loss_rtd": 0.27689340710639954, - "loss_sent": 0.1271832138299942, - "loss_sod": 0.14260171353816986, - "loss_total": 0.5466783046722412, - "step": 179599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.20590341091156, - "learning_rate": 4.1442936249673296e-05, - "loss": 0.4819, - "step": 179600 - }, - { - "epoch": 0.007398, - "loss_gen": 4.793631076812744, - "loss_rtd": 0.22719790041446686, - "loss_sent": 0.060469288378953934, - "loss_sod": 0.03978104516863823, - "loss_total": 0.3274482488632202, - "step": 179699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.353108882904053, - "loss_rtd": 0.25455164909362793, - "loss_sent": 0.05941108986735344, - "loss_sod": 0.07290526479482651, - "loss_total": 0.3868680000305176, - "step": 179699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.9167174696922302, - "learning_rate": 4.141167289270428e-05, - "loss": 0.4904, - "step": 179700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.078159332275391, - "loss_rtd": 0.24838924407958984, - "loss_sent": 0.11320249736309052, - "loss_sod": 0.04460389167070389, - "loss_total": 0.40619564056396484, - "step": 179799 - }, - { - "epoch": 0.007598, - "loss_gen": 4.7999491691589355, - "loss_rtd": 0.2368369698524475, - "loss_sent": 0.04544038325548172, - "loss_sod": 0.08653011918067932, - "loss_total": 0.36880746483802795, - "step": 179799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.9799928665161133, - "learning_rate": 4.138041299511238e-05, - "loss": 0.4912, - "step": 179800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.345700263977051, - "loss_rtd": 0.26462382078170776, - "loss_sent": 0.11898131668567657, - "loss_sod": 0.044064655900001526, - "loss_total": 0.4276697635650635, - "step": 179899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.21361780166626, - "loss_rtd": 0.2546546161174774, - "loss_sent": 0.12579309940338135, - "loss_sod": 0.011989301070570946, - "loss_total": 0.39243701100349426, - "step": 179899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.027785062789917, - "learning_rate": 4.13491565694891e-05, - "loss": 0.4954, - "step": 179900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.613238334655762, - "loss_rtd": 0.2553330957889557, - "loss_sent": 0.17620572447776794, - "loss_sod": 0.21121367812156677, - "loss_total": 0.6427525281906128, - "step": 179999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.368664264678955, - "loss_rtd": 0.24394778907299042, - "loss_sent": 0.183329775929451, - "loss_sod": 0.009965645149350166, - "loss_total": 0.4372432231903076, - "step": 179999 - }, - { - "epoch": 0.008, - "grad_norm": 1.0747244358062744, - "learning_rate": 4.131790362842451e-05, - "loss": 0.4804, - "step": 180000 - }, - { - "epoch": 0.008, - "eval_loss": 0.47713226079940796, - "eval_runtime": 151.3287, - "eval_samples_per_second": 102.049, - "eval_steps_per_second": 0.8, - "step": 180000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.487625598907471, - "loss_rtd": 0.2641645669937134, - "loss_sent": 0.13404642045497894, - "loss_sod": 0.022700896486639977, - "loss_total": 0.42091190814971924, - "step": 180099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.319086074829102, - "loss_rtd": 0.2597549557685852, - "loss_sent": 0.01480947993695736, - "loss_sod": 0.13523250818252563, - "loss_total": 0.40979695320129395, - "step": 180099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.92105633020401, - "learning_rate": 4.128665418450732e-05, - "loss": 0.488, - "step": 180100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.736827850341797, - "loss_rtd": 0.2725524604320526, - "loss_sent": 0.08031554520130157, - "loss_sod": 0.09078691899776459, - "loss_total": 0.4436548948287964, - "step": 180199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.926023960113525, - "loss_rtd": 0.27341535687446594, - "loss_sent": 0.14149793982505798, - "loss_sod": 0.1236058920621872, - "loss_total": 0.5385191440582275, - "step": 180199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.8921319842338562, - "learning_rate": 4.1255408250324765e-05, - "loss": 0.4881, - "step": 180200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.5424723625183105, - "loss_rtd": 0.2582395374774933, - "loss_sent": 0.07435081154108047, - "loss_sod": 0.030729809775948524, - "loss_total": 0.36332014203071594, - "step": 180299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.650275230407715, - "loss_rtd": 0.27444303035736084, - "loss_sent": 0.07789462059736252, - "loss_sod": 0.048112284392118454, - "loss_total": 0.4004499316215515, - "step": 180299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.9056849479675293, - "learning_rate": 4.122416583846274e-05, - "loss": 0.5023, - "step": 180300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.1777873039245605, - "loss_rtd": 0.2654229402542114, - "loss_sent": 0.35161760449409485, - "loss_sod": 0.01783064752817154, - "loss_total": 0.6348711848258972, - "step": 180399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.614340305328369, - "loss_rtd": 0.25416260957717896, - "loss_sent": 0.10317254066467285, - "loss_sod": 0.08120696246623993, - "loss_total": 0.43854212760925293, - "step": 180399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.9221227169036865, - "learning_rate": 4.119292696150564e-05, - "loss": 0.5029, - "step": 180400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.872322082519531, - "loss_rtd": 0.2462892383337021, - "loss_sent": 0.05622588098049164, - "loss_sod": 0.18008814752101898, - "loss_total": 0.4826032519340515, - "step": 180499 - }, - { - "epoch": 0.008998, - "loss_gen": 4.6165876388549805, - "loss_rtd": 0.2476365566253662, - "loss_sent": 3.859668504446745e-05, - "loss_sod": 0.05294607952237129, - "loss_total": 0.30062124133110046, - "step": 180499 - }, - { - "epoch": 0.009, - "grad_norm": 0.8333690762519836, - "learning_rate": 4.116169163203653e-05, - "loss": 0.4848, - "step": 180500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.586027145385742, - "loss_rtd": 0.23426824808120728, - "loss_sent": 0.241379976272583, - "loss_sod": 0.01346066314727068, - "loss_total": 0.4891088902950287, - "step": 180599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.472120761871338, - "loss_rtd": 0.24811071157455444, - "loss_sent": 0.2098441869020462, - "loss_sod": 0.01939151994884014, - "loss_total": 0.47734642028808594, - "step": 180599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.9169313311576843, - "learning_rate": 4.113045986263696e-05, - "loss": 0.5004, - "step": 180600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.667011737823486, - "loss_rtd": 0.23707003891468048, - "loss_sent": 0.21612732112407684, - "loss_sod": 0.06380543112754822, - "loss_total": 0.5170028209686279, - "step": 180699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.365878105163574, - "loss_rtd": 0.2620985805988312, - "loss_sent": 0.10187751799821854, - "loss_sod": 0.026803283020853996, - "loss_total": 0.39077937602996826, - "step": 180699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.8551573753356934, - "learning_rate": 4.1099231665887104e-05, - "loss": 0.5003, - "step": 180700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.391913414001465, - "loss_rtd": 0.23210972547531128, - "loss_sent": 0.3293698728084564, - "loss_sod": 0.10571935772895813, - "loss_total": 0.6671989560127258, - "step": 180799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.294342041015625, - "loss_rtd": 0.24984751641750336, - "loss_sent": 0.5203132033348083, - "loss_sod": 0.06893518567085266, - "loss_total": 0.8390958905220032, - "step": 180799 - }, - { - "epoch": 0.0096, - "grad_norm": 2.281083822250366, - "learning_rate": 4.106800705436566e-05, - "loss": 0.4873, - "step": 180800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.3256707191467285, - "loss_rtd": 0.24510520696640015, - "loss_sent": 0.25402265787124634, - "loss_sod": 0.06134971231222153, - "loss_total": 0.560477614402771, - "step": 180899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.4293036460876465, - "loss_rtd": 0.250690221786499, - "loss_sent": 0.1888970136642456, - "loss_sod": 0.07212607562541962, - "loss_total": 0.5117133259773254, - "step": 180899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.920924961566925, - "learning_rate": 4.103678604064992e-05, - "loss": 0.5026, - "step": 180900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.400794982910156, - "loss_rtd": 0.27316975593566895, - "loss_sent": 0.2487497329711914, - "loss_sod": 0.03013472445309162, - "loss_total": 0.552054226398468, - "step": 180999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.666261196136475, - "loss_rtd": 0.2672635316848755, - "loss_sent": 0.16726598143577576, - "loss_sod": 0.076224185526371, - "loss_total": 0.5107536911964417, - "step": 180999 - }, - { - "epoch": 0.01, - "grad_norm": 1.0255016088485718, - "learning_rate": 4.100556863731567e-05, - "loss": 0.5075, - "step": 181000 - }, - { - "epoch": 0.01, - "eval_loss": 0.46509864926338196, - "eval_runtime": 151.2879, - "eval_samples_per_second": 102.077, - "eval_steps_per_second": 0.8, - "step": 181000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.197551727294922, - "loss_rtd": 0.2652503252029419, - "loss_sent": 0.044459570199251175, - "loss_sod": 0.0072874571196734905, - "loss_total": 0.31699734926223755, - "step": 181099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.716944694519043, - "loss_rtd": 0.25120916962623596, - "loss_sent": 0.2240513563156128, - "loss_sod": 0.014923347160220146, - "loss_total": 0.49018386006355286, - "step": 181099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8222607970237732, - "learning_rate": 4.0974354856937316e-05, - "loss": 0.4921, - "step": 181100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.415718078613281, - "loss_rtd": 0.2567186653614044, - "loss_sent": 0.09884947538375854, - "loss_sod": 0.057101938873529434, - "loss_total": 0.4126700758934021, - "step": 181199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.394846439361572, - "loss_rtd": 0.27208247780799866, - "loss_sent": 0.4067822992801666, - "loss_sod": 0.031822025775909424, - "loss_total": 0.7106868028640747, - "step": 181199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.0264217853546143, - "learning_rate": 4.094314471208775e-05, - "loss": 0.4865, - "step": 181200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.2518744468688965, - "loss_rtd": 0.286484032869339, - "loss_sent": 0.12352120131254196, - "loss_sod": 0.11138713359832764, - "loss_total": 0.5213923454284668, - "step": 181299 - }, - { - "epoch": 0.010598, - "loss_gen": 4.881196022033691, - "loss_rtd": 0.2354147732257843, - "loss_sent": 0.00039958898560144007, - "loss_sod": 0.20208677649497986, - "loss_total": 0.4379011392593384, - "step": 181299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.3067972660064697, - "learning_rate": 4.0911938215338395e-05, - "loss": 0.4775, - "step": 181300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.660808086395264, - "loss_rtd": 0.235980823636055, - "loss_sent": 0.20990155637264252, - "loss_sod": 0.031419988721609116, - "loss_total": 0.4773023724555969, - "step": 181399 - }, - { - "epoch": 0.010798, - "loss_gen": 4.864224433898926, - "loss_rtd": 0.23932556807994843, - "loss_sent": 0.0018415855010971427, - "loss_sod": 0.13941384851932526, - "loss_total": 0.38058099150657654, - "step": 181399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.9057682156562805, - "learning_rate": 4.088073537925925e-05, - "loss": 0.4996, - "step": 181400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.179491996765137, - "loss_rtd": 0.24789611995220184, - "loss_sent": 0.19952335953712463, - "loss_sod": 0.0029267133213579655, - "loss_total": 0.4503462016582489, - "step": 181499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.132256507873535, - "loss_rtd": 0.28106802701950073, - "loss_sent": 0.24645671248435974, - "loss_sod": 0.011169906705617905, - "loss_total": 0.5386946201324463, - "step": 181499 - }, - { - "epoch": 0.011, - "grad_norm": 0.9796513319015503, - "learning_rate": 4.0849536216418796e-05, - "loss": 0.4914, - "step": 181500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.230510234832764, - "loss_rtd": 0.2636624276638031, - "loss_sent": 0.22412417829036713, - "loss_sod": 0.08131483197212219, - "loss_total": 0.5691014528274536, - "step": 181599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.510939598083496, - "loss_rtd": 0.25225135684013367, - "loss_sent": 0.14450867474079132, - "loss_sod": 0.004225961863994598, - "loss_total": 0.4009860157966614, - "step": 181599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9009349346160889, - "learning_rate": 4.081834073938406e-05, - "loss": 0.495, - "step": 181600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.600374221801758, - "loss_rtd": 0.2612046003341675, - "loss_sent": 0.2270781695842743, - "loss_sod": 0.1035432517528534, - "loss_total": 0.5918260216712952, - "step": 181699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.734739303588867, - "loss_rtd": 0.2507054805755615, - "loss_sent": 0.3365671932697296, - "loss_sod": 0.04251658171415329, - "loss_total": 0.6297892332077026, - "step": 181699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.9514797925949097, - "learning_rate": 4.078714896072058e-05, - "loss": 0.4988, - "step": 181700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.431485652923584, - "loss_rtd": 0.24863086640834808, - "loss_sent": 0.22148184478282928, - "loss_sod": 0.07801975309848785, - "loss_total": 0.5481324791908264, - "step": 181799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.48687744140625, - "loss_rtd": 0.2456839680671692, - "loss_sent": 0.12878286838531494, - "loss_sod": 0.04740482196211815, - "loss_total": 0.4218716621398926, - "step": 181799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.2794034481048584, - "learning_rate": 4.0755960892992404e-05, - "loss": 0.4957, - "step": 181800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.224306583404541, - "loss_rtd": 0.2649737000465393, - "loss_sent": 0.08195577561855316, - "loss_sod": 0.016276869922876358, - "loss_total": 0.36320632696151733, - "step": 181899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.511739730834961, - "loss_rtd": 0.2753254771232605, - "loss_sent": 0.12046878784894943, - "loss_sod": 0.07207190990447998, - "loss_total": 0.4678661823272705, - "step": 181899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.8595014214515686, - "learning_rate": 4.072477654876206e-05, - "loss": 0.4949, - "step": 181900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.5958638191223145, - "loss_rtd": 0.25696098804473877, - "loss_sent": 0.21077921986579895, - "loss_sod": 0.0512884184718132, - "loss_total": 0.5190286636352539, - "step": 181999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.1753411293029785, - "loss_rtd": 0.2576918601989746, - "loss_sent": 0.13037826120853424, - "loss_sod": 0.0631154403090477, - "loss_total": 0.45118558406829834, - "step": 181999 - }, - { - "epoch": 0.012, - "grad_norm": 0.8823849558830261, - "learning_rate": 4.069359594059062e-05, - "loss": 0.4758, - "step": 182000 - }, - { - "epoch": 0.012, - "eval_loss": 0.4631200432777405, - "eval_runtime": 151.5039, - "eval_samples_per_second": 101.931, - "eval_steps_per_second": 0.799, - "step": 182000 - }, - { - "epoch": 0.012198, - "loss_gen": 4.830898761749268, - "loss_rtd": 0.2535586953163147, - "loss_sent": 2.959699122584425e-05, - "loss_sod": 0.11356370896100998, - "loss_total": 0.36715200543403625, - "step": 182099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.701071262359619, - "loss_rtd": 0.22336901724338531, - "loss_sent": 0.017257556319236755, - "loss_sod": 0.08801577240228653, - "loss_total": 0.3286423683166504, - "step": 182099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.6836785078048706, - "learning_rate": 4.0662419081037625e-05, - "loss": 0.4756, - "step": 182100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.572444915771484, - "loss_rtd": 0.24312740564346313, - "loss_sent": 0.17422907054424286, - "loss_sod": 0.051759544759988785, - "loss_total": 0.4691160321235657, - "step": 182199 - }, - { - "epoch": 0.012398, - "loss_gen": 4.937934398651123, - "loss_rtd": 0.24445945024490356, - "loss_sent": 3.690571975312196e-05, - "loss_sod": 0.24471774697303772, - "loss_total": 0.48921409249305725, - "step": 182199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.4495713710784912, - "learning_rate": 4.063124598266111e-05, - "loss": 0.4993, - "step": 182200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.658036231994629, - "loss_rtd": 0.27451783418655396, - "loss_sent": 0.07577437162399292, - "loss_sod": 0.04801433905959129, - "loss_total": 0.39830654859542847, - "step": 182299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.250896453857422, - "loss_rtd": 0.25234609842300415, - "loss_sent": 0.03949786722660065, - "loss_sod": 0.0794113352894783, - "loss_total": 0.3712552785873413, - "step": 182299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.6161572933197021, - "learning_rate": 4.0600076658017585e-05, - "loss": 0.5026, - "step": 182300 - }, - { - "epoch": 0.012798, - "loss_gen": 4.730467319488525, - "loss_rtd": 0.2254515141248703, - "loss_sent": 0.05113282427191734, - "loss_sod": 0.035083699971437454, - "loss_total": 0.3116680383682251, - "step": 182399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.1474151611328125, - "loss_rtd": 0.26398155093193054, - "loss_sent": 0.07230521738529205, - "loss_sod": 0.16289299726486206, - "loss_total": 0.49917978048324585, - "step": 182399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.1920756101608276, - "learning_rate": 4.056891111966206e-05, - "loss": 0.4929, - "step": 182400 - }, - { - "epoch": 0.012998, - "loss_gen": 4.696252822875977, - "loss_rtd": 0.21865539252758026, - "loss_sent": 0.09347908943891525, - "loss_sod": 0.008299498818814754, - "loss_total": 0.32043397426605225, - "step": 182499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.6855878829956055, - "loss_rtd": 0.27629354596138, - "loss_sent": 0.39232394099235535, - "loss_sod": 0.10279744863510132, - "loss_total": 0.7714149355888367, - "step": 182499 - }, - { - "epoch": 0.013, - "grad_norm": 1.159462571144104, - "learning_rate": 4.0537749380148004e-05, - "loss": 0.4804, - "step": 182500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.757350444793701, - "loss_rtd": 0.2363877296447754, - "loss_sent": 4.058789636474103e-05, - "loss_sod": 0.06571846455335617, - "loss_total": 0.3021467924118042, - "step": 182599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.5546159744262695, - "loss_rtd": 0.27724331617355347, - "loss_sent": 0.12912122905254364, - "loss_sod": 0.03324735909700394, - "loss_total": 0.43961191177368164, - "step": 182599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.6910777688026428, - "learning_rate": 4.050659145202738e-05, - "loss": 0.5107, - "step": 182600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.621369361877441, - "loss_rtd": 0.2423516809940338, - "loss_sent": 0.04023056849837303, - "loss_sod": 0.006830750964581966, - "loss_total": 0.2894130051136017, - "step": 182699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.406081199645996, - "loss_rtd": 0.26067420840263367, - "loss_sent": 0.380702406167984, - "loss_sod": 0.03927621245384216, - "loss_total": 0.6806528568267822, - "step": 182699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.6373041868209839, - "learning_rate": 4.0475437347850577e-05, - "loss": 0.4763, - "step": 182700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.552177906036377, - "loss_rtd": 0.2496611773967743, - "loss_sent": 0.17501670122146606, - "loss_sod": 0.05193847045302391, - "loss_total": 0.47661635279655457, - "step": 182799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.976564407348633, - "loss_rtd": 0.2528514862060547, - "loss_sent": 0.08602333813905716, - "loss_sod": 0.02177412249147892, - "loss_total": 0.3606489300727844, - "step": 182799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.255135416984558, - "learning_rate": 4.0444287080166464e-05, - "loss": 0.4871, - "step": 182800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.524647235870361, - "loss_rtd": 0.2574880123138428, - "loss_sent": 0.12596097588539124, - "loss_sod": 0.037886302918195724, - "loss_total": 0.42133527994155884, - "step": 182899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.371420860290527, - "loss_rtd": 0.26169314980506897, - "loss_sent": 0.41575688123703003, - "loss_sod": 0.015264102257788181, - "loss_total": 0.6927140951156616, - "step": 182899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.9581928253173828, - "learning_rate": 4.041314066152239e-05, - "loss": 0.4787, - "step": 182900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.1283745765686035, - "loss_rtd": 0.24160684645175934, - "loss_sent": 0.0006477711140178144, - "loss_sod": 0.1458633840084076, - "loss_total": 0.38811802864074707, - "step": 182999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.760714530944824, - "loss_rtd": 0.2214888632297516, - "loss_sent": 0.01441890373826027, - "loss_sod": 0.02126622013747692, - "loss_total": 0.25717398524284363, - "step": 182999 - }, - { - "epoch": 0.014, - "grad_norm": 0.6300406455993652, - "learning_rate": 4.038199810446409e-05, - "loss": 0.5115, - "step": 183000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4704357087612152, - "eval_runtime": 151.3242, - "eval_samples_per_second": 102.052, - "eval_steps_per_second": 0.8, - "step": 183000 - }, - { - "epoch": 0.014198, - "loss_gen": 4.839997291564941, - "loss_rtd": 0.22678792476654053, - "loss_sent": 0.018909098580479622, - "loss_sod": 0.19718509912490845, - "loss_total": 0.44288212060928345, - "step": 183099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.5360822677612305, - "loss_rtd": 0.2616807818412781, - "loss_sent": 0.29152411222457886, - "loss_sod": 0.020168419927358627, - "loss_total": 0.5733733177185059, - "step": 183099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.2153898477554321, - "learning_rate": 4.0350859421535814e-05, - "loss": 0.4827, - "step": 183100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.435664653778076, - "loss_rtd": 0.27038609981536865, - "loss_sent": 0.18372918665409088, - "loss_sod": 0.12099272757768631, - "loss_total": 0.575107991695404, - "step": 183199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.528284549713135, - "loss_rtd": 0.2692619264125824, - "loss_sent": 0.12430182099342346, - "loss_sod": 0.1124880239367485, - "loss_total": 0.506051778793335, - "step": 183199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.2037572860717773, - "learning_rate": 4.03197246252802e-05, - "loss": 0.4835, - "step": 183200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.204357624053955, - "loss_rtd": 0.25782856345176697, - "loss_sent": 0.08069150149822235, - "loss_sod": 0.004195802845060825, - "loss_total": 0.342715859413147, - "step": 183299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.3771233558654785, - "loss_rtd": 0.25820738077163696, - "loss_sent": 0.2556789219379425, - "loss_sod": 0.06680300831794739, - "loss_total": 0.5806893110275269, - "step": 183299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.8360551595687866, - "learning_rate": 4.0288593728238365e-05, - "loss": 0.5018, - "step": 183300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.616715431213379, - "loss_rtd": 0.26079732179641724, - "loss_sent": 0.091511569917202, - "loss_sod": 0.11728174984455109, - "loss_total": 0.4695906341075897, - "step": 183399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.630971908569336, - "loss_rtd": 0.2716662287712097, - "loss_sent": 0.22483932971954346, - "loss_sod": 0.02582927793264389, - "loss_total": 0.5223348140716553, - "step": 183399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.7448164820671082, - "learning_rate": 4.02574667429498e-05, - "loss": 0.4973, - "step": 183400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.2737627029418945, - "loss_rtd": 0.2512357831001282, - "loss_sent": 3.664237738121301e-05, - "loss_sod": 0.17651206254959106, - "loss_total": 0.4277845025062561, - "step": 183499 - }, - { - "epoch": 0.014998, - "loss_gen": 4.630550384521484, - "loss_rtd": 0.21566666662693024, - "loss_sent": 0.005458462052047253, - "loss_sod": 0.0444282591342926, - "loss_total": 0.26555341482162476, - "step": 183499 - }, - { - "epoch": 0.015, - "grad_norm": 0.8602021336555481, - "learning_rate": 4.0226343681952476e-05, - "loss": 0.4891, - "step": 183500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.610905647277832, - "loss_rtd": 0.27766552567481995, - "loss_sent": 0.32495835423469543, - "loss_sod": 0.009545616805553436, - "loss_total": 0.6121695041656494, - "step": 183599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.8337273597717285, - "loss_rtd": 0.26680153608322144, - "loss_sent": 0.43907541036605835, - "loss_sod": 0.10988657176494598, - "loss_total": 0.815763533115387, - "step": 183599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.4352822303771973, - "learning_rate": 4.019522455778274e-05, - "loss": 0.4855, - "step": 183600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.38031005859375, - "loss_rtd": 0.2854062616825104, - "loss_sent": 0.1864727884531021, - "loss_sod": 0.0777948647737503, - "loss_total": 0.5496739149093628, - "step": 183699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.878078937530518, - "loss_rtd": 0.2514983117580414, - "loss_sent": 0.21822580695152283, - "loss_sod": 0.054971419274806976, - "loss_total": 0.5246955156326294, - "step": 183699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.4945223331451416, - "learning_rate": 4.016410938297539e-05, - "loss": 0.5119, - "step": 183700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.504877090454102, - "loss_rtd": 0.2742302119731903, - "loss_sent": 0.1993817538022995, - "loss_sod": 0.06181372329592705, - "loss_total": 0.5354256629943848, - "step": 183799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.478941917419434, - "loss_rtd": 0.24994368851184845, - "loss_sent": 0.14488546550273895, - "loss_sod": 0.039909228682518005, - "loss_total": 0.4347383975982666, - "step": 183799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.8672297596931458, - "learning_rate": 4.0132998170063594e-05, - "loss": 0.4911, - "step": 183800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.2837934494018555, - "loss_rtd": 0.2422766536474228, - "loss_sent": 0.05480147525668144, - "loss_sod": 0.021374644711613655, - "loss_total": 0.31845277547836304, - "step": 183899 - }, - { - "epoch": 0.015798, - "loss_gen": 4.804450988769531, - "loss_rtd": 0.2299848198890686, - "loss_sent": 3.797487443080172e-05, - "loss_sod": 0.22849178314208984, - "loss_total": 0.45851457118988037, - "step": 183899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.9032144546508789, - "learning_rate": 4.010189093157896e-05, - "loss": 0.49, - "step": 183900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.608768463134766, - "loss_rtd": 0.26588255167007446, - "loss_sent": 0.2749767005443573, - "loss_sod": 0.04174520820379257, - "loss_total": 0.5826044678688049, - "step": 183999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.7578229904174805, - "loss_rtd": 0.2552870810031891, - "loss_sent": 0.29340633749961853, - "loss_sod": 0.042127273976802826, - "loss_total": 0.5908206701278687, - "step": 183999 - }, - { - "epoch": 0.016, - "grad_norm": 1.2859172821044922, - "learning_rate": 4.007078768005147e-05, - "loss": 0.4885, - "step": 184000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4621444642543793, - "eval_runtime": 151.5087, - "eval_samples_per_second": 101.928, - "eval_steps_per_second": 0.799, - "step": 184000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.459926128387451, - "loss_rtd": 0.2595793902873993, - "loss_sent": 0.09777318686246872, - "loss_sod": 0.030045513063669205, - "loss_total": 0.3873980939388275, - "step": 184099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.62075138092041, - "loss_rtd": 0.25962185859680176, - "loss_sent": 0.16660597920417786, - "loss_sod": 0.020659856498241425, - "loss_total": 0.44688770174980164, - "step": 184099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.9403879046440125, - "learning_rate": 4.0039688428009525e-05, - "loss": 0.4838, - "step": 184100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.1393914222717285, - "loss_rtd": 0.26254382729530334, - "loss_sent": 0.08186690509319305, - "loss_sod": 0.013753924518823624, - "loss_total": 0.3581646680831909, - "step": 184199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.615752220153809, - "loss_rtd": 0.2803412675857544, - "loss_sent": 0.1581527590751648, - "loss_sod": 0.0893164724111557, - "loss_total": 0.5278105139732361, - "step": 184199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.6435991525650024, - "learning_rate": 4.00085931879799e-05, - "loss": 0.5036, - "step": 184200 - }, - { - "epoch": 0.016598, - "loss_gen": 4.898801326751709, - "loss_rtd": 0.22874201834201813, - "loss_sent": 4.691241338150576e-05, - "loss_sod": 0.16978690028190613, - "loss_total": 0.3985758423805237, - "step": 184299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.128859996795654, - "loss_rtd": 0.24762988090515137, - "loss_sent": 0.027703799307346344, - "loss_sod": 0.12636427581310272, - "loss_total": 0.40169796347618103, - "step": 184299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.0342738628387451, - "learning_rate": 3.997750197248773e-05, - "loss": 0.4855, - "step": 184300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.697004795074463, - "loss_rtd": 0.2553969919681549, - "loss_sent": 0.16735699772834778, - "loss_sod": 0.04648858308792114, - "loss_total": 0.46924257278442383, - "step": 184399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.459543228149414, - "loss_rtd": 0.25131621956825256, - "loss_sent": 0.32132408022880554, - "loss_sod": 0.05537897348403931, - "loss_total": 0.6280192732810974, - "step": 184399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.027738094329834, - "learning_rate": 3.9946414794056565e-05, - "loss": 0.4747, - "step": 184400 - }, - { - "epoch": 0.016998, - "loss_gen": 4.9221954345703125, - "loss_rtd": 0.2270478904247284, - "loss_sent": 0.08424238860607147, - "loss_sod": 0.08059877157211304, - "loss_total": 0.3918890357017517, - "step": 184499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.468795299530029, - "loss_rtd": 0.2783259451389313, - "loss_sent": 0.2450186163187027, - "loss_sod": 0.043618764728307724, - "loss_total": 0.5669633150100708, - "step": 184499 - }, - { - "epoch": 0.017, - "grad_norm": 0.5893915295600891, - "learning_rate": 3.9915331665208325e-05, - "loss": 0.4938, - "step": 184500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.752498626708984, - "loss_rtd": 0.23344717919826508, - "loss_sent": 3.408768679946661e-05, - "loss_sod": 0.12836860120296478, - "loss_total": 0.3618498742580414, - "step": 184599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.045048713684082, - "loss_rtd": 0.23287439346313477, - "loss_sent": 0.0720604881644249, - "loss_sod": 0.04457763582468033, - "loss_total": 0.34951251745224, - "step": 184599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.9258651733398438, - "learning_rate": 3.988425259846327e-05, - "loss": 0.4748, - "step": 184600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.031115531921387, - "loss_rtd": 0.2072264552116394, - "loss_sent": 0.007756704930216074, - "loss_sod": 0.07516558468341827, - "loss_total": 0.2901487350463867, - "step": 184699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.351471900939941, - "loss_rtd": 0.22140014171600342, - "loss_sent": 0.04059907793998718, - "loss_sod": 0.12634029984474182, - "loss_total": 0.3883395195007324, - "step": 184699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.6309583187103271, - "learning_rate": 3.9853177606340075e-05, - "loss": 0.4765, - "step": 184700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.614597797393799, - "loss_rtd": 0.2522928714752197, - "loss_sent": 0.18731844425201416, - "loss_sod": 0.017789138481020927, - "loss_total": 0.45740044116973877, - "step": 184799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.453588485717773, - "loss_rtd": 0.25674304366111755, - "loss_sent": 0.16259916126728058, - "loss_sod": 0.04139901325106621, - "loss_total": 0.46074122190475464, - "step": 184799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.7350119948387146, - "learning_rate": 3.982210670135571e-05, - "loss": 0.5014, - "step": 184800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.427696704864502, - "loss_rtd": 0.25103387236595154, - "loss_sent": 0.13740447163581848, - "loss_sod": 0.026184070855379105, - "loss_total": 0.41462242603302, - "step": 184899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.288742542266846, - "loss_rtd": 0.26934170722961426, - "loss_sent": 0.33357757329940796, - "loss_sod": 0.054238948971033096, - "loss_total": 0.6571582555770874, - "step": 184899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.1365247964859009, - "learning_rate": 3.979103989602556e-05, - "loss": 0.4931, - "step": 184900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.507558345794678, - "loss_rtd": 0.2580697238445282, - "loss_sent": 0.12666617333889008, - "loss_sod": 0.013586482033133507, - "loss_total": 0.3983224034309387, - "step": 184999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.5863542556762695, - "loss_rtd": 0.2636576294898987, - "loss_sent": 0.1636495143175125, - "loss_sod": 0.029072415083646774, - "loss_total": 0.4563795328140259, - "step": 184999 - }, - { - "epoch": 0.018, - "grad_norm": 0.5589444637298584, - "learning_rate": 3.9759977202863316e-05, - "loss": 0.4866, - "step": 185000 - }, - { - "epoch": 0.018, - "eval_loss": 0.46582549810409546, - "eval_runtime": 151.4957, - "eval_samples_per_second": 101.937, - "eval_steps_per_second": 0.799, - "step": 185000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.513877868652344, - "loss_rtd": 0.2644221782684326, - "loss_sent": 0.09160961955785751, - "loss_sod": 0.13490232825279236, - "loss_total": 0.4909341335296631, - "step": 185099 - }, - { - "epoch": 0.018198, - "loss_gen": 4.994869709014893, - "loss_rtd": 0.23290346562862396, - "loss_sent": 3.2664669561199844e-05, - "loss_sod": 0.18036779761314392, - "loss_total": 0.4133039116859436, - "step": 185099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.1618928909301758, - "learning_rate": 3.972891863438104e-05, - "loss": 0.4791, - "step": 185100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.438427448272705, - "loss_rtd": 0.2602479159832001, - "loss_sent": 0.1740417629480362, - "loss_sod": 0.03438429906964302, - "loss_total": 0.4686740040779114, - "step": 185199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.651998519897461, - "loss_rtd": 0.2555427849292755, - "loss_sent": 0.33565443754196167, - "loss_sod": 0.07781211286783218, - "loss_total": 0.6690093278884888, - "step": 185199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.7313363552093506, - "learning_rate": 3.9697864203089085e-05, - "loss": 0.4762, - "step": 185200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.649327278137207, - "loss_rtd": 0.2665024399757385, - "loss_sent": 0.25976502895355225, - "loss_sod": 0.05779499560594559, - "loss_total": 0.5840624570846558, - "step": 185299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.586805820465088, - "loss_rtd": 0.23661339282989502, - "loss_sent": 0.18426039814949036, - "loss_sod": 0.0241120345890522, - "loss_total": 0.44498583674430847, - "step": 185299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.3442031145095825, - "learning_rate": 3.9666813921496223e-05, - "loss": 0.4937, - "step": 185300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.644951820373535, - "loss_rtd": 0.2642596364021301, - "loss_sent": 0.1667267233133316, - "loss_sod": 0.10745980590581894, - "loss_total": 0.5384461879730225, - "step": 185399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.103083610534668, - "loss_rtd": 0.2206515669822693, - "loss_sent": 0.2605799436569214, - "loss_sod": 0.09676875919103622, - "loss_total": 0.5780003070831299, - "step": 185399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.1460782289505005, - "learning_rate": 3.963576780210946e-05, - "loss": 0.493, - "step": 185400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.5820770263671875, - "loss_rtd": 0.2645975947380066, - "loss_sent": 0.24656212329864502, - "loss_sod": 0.007974594831466675, - "loss_total": 0.5191342830657959, - "step": 185499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.377520561218262, - "loss_rtd": 0.25580769777297974, - "loss_sent": 0.2565309405326843, - "loss_sod": 0.06911972165107727, - "loss_total": 0.581458330154419, - "step": 185499 - }, - { - "epoch": 0.019, - "grad_norm": 1.3609293699264526, - "learning_rate": 3.96047258574342e-05, - "loss": 0.4871, - "step": 185500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.632157325744629, - "loss_rtd": 0.2597460448741913, - "loss_sent": 0.11746742576360703, - "loss_sod": 0.049807947129011154, - "loss_total": 0.42702141404151917, - "step": 185599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.603592872619629, - "loss_rtd": 0.2609822452068329, - "loss_sent": 0.26343026757240295, - "loss_sod": 0.06601596623659134, - "loss_total": 0.5904284715652466, - "step": 185599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.9038184881210327, - "learning_rate": 3.9573688099974104e-05, - "loss": 0.4896, - "step": 185600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.080387592315674, - "loss_rtd": 0.23782046139240265, - "loss_sent": 0.16632144153118134, - "loss_sod": 0.14350368082523346, - "loss_total": 0.5476455688476562, - "step": 185699 - }, - { - "epoch": 0.019398, - "loss_gen": 4.929356575012207, - "loss_rtd": 0.2298208326101303, - "loss_sent": 0.023580890148878098, - "loss_sod": 0.10661250352859497, - "loss_total": 0.3600142300128937, - "step": 185699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.179927110671997, - "learning_rate": 3.954265454223121e-05, - "loss": 0.4804, - "step": 185700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.3133625984191895, - "loss_rtd": 0.24351274967193604, - "loss_sent": 0.07976383715867996, - "loss_sod": 0.10921406745910645, - "loss_total": 0.43249064683914185, - "step": 185799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.280283451080322, - "loss_rtd": 0.22837214171886444, - "loss_sent": 0.10248099267482758, - "loss_sod": 0.08554579317569733, - "loss_total": 0.41639894247055054, - "step": 185799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.0985273122787476, - "learning_rate": 3.951162519670582e-05, - "loss": 0.4867, - "step": 185800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.408891201019287, - "loss_rtd": 0.2592429220676422, - "loss_sent": 0.1581694781780243, - "loss_sod": 0.06999105215072632, - "loss_total": 0.4874034523963928, - "step": 185899 - }, - { - "epoch": 0.019798, - "loss_gen": 4.758256435394287, - "loss_rtd": 0.22770388424396515, - "loss_sent": 3.780725091928616e-05, - "loss_sod": 0.1224948838353157, - "loss_total": 0.35023659467697144, - "step": 185899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.8321103453636169, - "learning_rate": 3.948060007589653e-05, - "loss": 0.486, - "step": 185900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.374885559082031, - "loss_rtd": 0.2390490621328354, - "loss_sent": 0.203518807888031, - "loss_sod": 0.00301058660261333, - "loss_total": 0.4455784559249878, - "step": 185999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.598089694976807, - "loss_rtd": 0.24036353826522827, - "loss_sent": 0.3474893271923065, - "loss_sod": 0.09969167411327362, - "loss_total": 0.687544584274292, - "step": 185999 - }, - { - "epoch": 0.02, - "grad_norm": 1.2725980281829834, - "learning_rate": 3.944957919230029e-05, - "loss": 0.4839, - "step": 186000 - }, - { - "epoch": 0.02, - "eval_loss": 0.4647587239742279, - "eval_runtime": 151.4244, - "eval_samples_per_second": 101.985, - "eval_steps_per_second": 0.799, - "step": 186000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.6196465492248535, - "loss_rtd": 0.2646197974681854, - "loss_sent": 0.03576711192727089, - "loss_sod": 0.1355540156364441, - "loss_total": 0.4359409213066101, - "step": 186099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.504579544067383, - "loss_rtd": 0.26363444328308105, - "loss_sent": 0.12544412910938263, - "loss_sod": 0.038709308952093124, - "loss_total": 0.4277878999710083, - "step": 186099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.8783823251724243, - "learning_rate": 3.941856255841227e-05, - "loss": 0.4881, - "step": 186100 - }, - { - "epoch": 0.020398, - "loss_gen": 6.232545375823975, - "loss_rtd": 0.26752662658691406, - "loss_sent": 0.057205189019441605, - "loss_sod": 0.06630943715572357, - "loss_total": 0.39104127883911133, - "step": 186199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.357983589172363, - "loss_rtd": 0.26961809396743774, - "loss_sent": 0.08901049196720123, - "loss_sod": 0.013747490011155605, - "loss_total": 0.37237608432769775, - "step": 186199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.9054022431373596, - "learning_rate": 3.9387550186726e-05, - "loss": 0.4857, - "step": 186200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.58601713180542, - "loss_rtd": 0.27239173650741577, - "loss_sent": 0.6295097470283508, - "loss_sod": 0.08182334899902344, - "loss_total": 0.98372483253479, - "step": 186299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.3033342361450195, - "loss_rtd": 0.24794980883598328, - "loss_sent": 0.28165143728256226, - "loss_sod": 0.04188890755176544, - "loss_total": 0.5714901685714722, - "step": 186299 - }, - { - "epoch": 0.0206, - "grad_norm": 2.7101213932037354, - "learning_rate": 3.935654208973323e-05, - "loss": 0.4739, - "step": 186300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.419743061065674, - "loss_rtd": 0.2780866026878357, - "loss_sent": 0.08253250271081924, - "loss_sod": 0.011677983216941357, - "loss_total": 0.372297078371048, - "step": 186399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.571310997009277, - "loss_rtd": 0.27073201537132263, - "loss_sent": 0.09081586450338364, - "loss_sod": 0.090709388256073, - "loss_total": 0.45225727558135986, - "step": 186399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.7526413798332214, - "learning_rate": 3.932553827992406e-05, - "loss": 0.4776, - "step": 186400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.313012599945068, - "loss_rtd": 0.2521372139453888, - "loss_sent": 0.20292626321315765, - "loss_sod": 0.015023418702185154, - "loss_total": 0.47008687257766724, - "step": 186499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.843327045440674, - "loss_rtd": 0.2717064321041107, - "loss_sent": 0.06547203660011292, - "loss_sod": 0.023575402796268463, - "loss_total": 0.3607538640499115, - "step": 186499 - }, - { - "epoch": 0.021, - "grad_norm": 1.1148160696029663, - "learning_rate": 3.929453876978677e-05, - "loss": 0.4906, - "step": 186500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.784761428833008, - "loss_rtd": 0.2746570408344269, - "loss_sent": 0.24680086970329285, - "loss_sod": 0.08704239130020142, - "loss_total": 0.6085003018379211, - "step": 186599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.373242378234863, - "loss_rtd": 0.2574537694454193, - "loss_sent": 0.11793971061706543, - "loss_sod": 0.010002588853240013, - "loss_total": 0.3853960633277893, - "step": 186599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.017259955406189, - "learning_rate": 3.9263543571807994e-05, - "loss": 0.4784, - "step": 186600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.470976829528809, - "loss_rtd": 0.24996253848075867, - "loss_sent": 0.140133798122406, - "loss_sod": 0.08862268179655075, - "loss_total": 0.478719025850296, - "step": 186699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.5595598220825195, - "loss_rtd": 0.2608106732368469, - "loss_sent": 0.11437699943780899, - "loss_sod": 0.06650028377771378, - "loss_total": 0.4416879415512085, - "step": 186699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.0855633020401, - "learning_rate": 3.9232552698472574e-05, - "loss": 0.4993, - "step": 186700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.088070392608643, - "loss_rtd": 0.24716244637966156, - "loss_sent": 0.01046920008957386, - "loss_sod": 0.038748372346162796, - "loss_total": 0.29638001322746277, - "step": 186799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.764645099639893, - "loss_rtd": 0.22984711825847626, - "loss_sent": 4.883740984951146e-05, - "loss_sod": 0.14717769622802734, - "loss_total": 0.3770736753940582, - "step": 186799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.70028156042099, - "learning_rate": 3.920156616226365e-05, - "loss": 0.4732, - "step": 186800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.1860880851745605, - "loss_rtd": 0.2700851857662201, - "loss_sent": 0.13081170618534088, - "loss_sod": 0.09594961255788803, - "loss_total": 0.4968464970588684, - "step": 186899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.540929794311523, - "loss_rtd": 0.2529226243495941, - "loss_sent": 0.31104522943496704, - "loss_sod": 0.08700490742921829, - "loss_total": 0.6509727239608765, - "step": 186899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.1717571020126343, - "learning_rate": 3.917058397566258e-05, - "loss": 0.4787, - "step": 186900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.626912593841553, - "loss_rtd": 0.2688499689102173, - "loss_sent": 0.16953960061073303, - "loss_sod": 0.03840908408164978, - "loss_total": 0.4767986536026001, - "step": 186999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.451415061950684, - "loss_rtd": 0.2621687650680542, - "loss_sent": 0.12659521400928497, - "loss_sod": 0.04100191965699196, - "loss_total": 0.42976588010787964, - "step": 186999 - }, - { - "epoch": 0.022, - "grad_norm": 1.1987330913543701, - "learning_rate": 3.9139606151148994e-05, - "loss": 0.4877, - "step": 187000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4629695415496826, - "eval_runtime": 151.2984, - "eval_samples_per_second": 102.07, - "eval_steps_per_second": 0.8, - "step": 187000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.563031196594238, - "loss_rtd": 0.26130300760269165, - "loss_sent": 0.10228981822729111, - "loss_sod": 0.016945987939834595, - "loss_total": 0.38053882122039795, - "step": 187099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.131287574768066, - "loss_rtd": 0.2218714952468872, - "loss_sent": 0.0543176643550396, - "loss_sod": 0.17131146788597107, - "loss_total": 0.447500616312027, - "step": 187099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.127787709236145, - "learning_rate": 3.910863270120074e-05, - "loss": 0.4608, - "step": 187100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.79901123046875, - "loss_rtd": 0.2656002640724182, - "loss_sent": 0.21989504992961884, - "loss_sod": 0.2511252760887146, - "loss_total": 0.7366206049919128, - "step": 187199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.371663570404053, - "loss_rtd": 0.24986638128757477, - "loss_sent": 0.10776374489068985, - "loss_sod": 0.005952136591076851, - "loss_total": 0.3635822534561157, - "step": 187199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.1022467613220215, - "learning_rate": 3.907766363829395e-05, - "loss": 0.4834, - "step": 187200 - }, - { - "epoch": 0.022598, - "loss_gen": 6.133777141571045, - "loss_rtd": 0.2625170946121216, - "loss_sent": 0.05752617120742798, - "loss_sod": 0.039537668228149414, - "loss_total": 0.359580934047699, - "step": 187299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.570786952972412, - "loss_rtd": 0.24772319197654724, - "loss_sent": 0.3918899595737457, - "loss_sod": 0.07816148549318314, - "loss_total": 0.7177746295928955, - "step": 187299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.1749955415725708, - "learning_rate": 3.904669897490293e-05, - "loss": 0.4795, - "step": 187300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.406407833099365, - "loss_rtd": 0.26402488350868225, - "loss_sent": 0.14382319152355194, - "loss_sod": 0.05829557031393051, - "loss_total": 0.4661436676979065, - "step": 187399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.629527568817139, - "loss_rtd": 0.2584216594696045, - "loss_sent": 0.31577810645103455, - "loss_sod": 0.030520280823111534, - "loss_total": 0.6047200560569763, - "step": 187399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.1226608753204346, - "learning_rate": 3.901573872350025e-05, - "loss": 0.5154, - "step": 187400 - }, - { - "epoch": 0.022998, - "loss_gen": 4.973224639892578, - "loss_rtd": 0.23897667229175568, - "loss_sent": 0.2134232223033905, - "loss_sod": 0.015605310909450054, - "loss_total": 0.4680052101612091, - "step": 187499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.37889289855957, - "loss_rtd": 0.2571919560432434, - "loss_sent": 0.06486905366182327, - "loss_sod": 0.11300955712795258, - "loss_total": 0.43507057428359985, - "step": 187499 - }, - { - "epoch": 0.023, - "grad_norm": 0.9117709994316101, - "learning_rate": 3.8984782896556704e-05, - "loss": 0.4859, - "step": 187500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.2087836265563965, - "loss_rtd": 0.2483009546995163, - "loss_sent": 0.05193908140063286, - "loss_sod": 0.024950454011559486, - "loss_total": 0.3251904845237732, - "step": 187599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.2132415771484375, - "loss_rtd": 0.2532327473163605, - "loss_sent": 0.008457361720502377, - "loss_sod": 0.08348232507705688, - "loss_total": 0.3451724350452423, - "step": 187599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.7311539053916931, - "learning_rate": 3.8953831506541286e-05, - "loss": 0.4816, - "step": 187600 - }, - { - "epoch": 0.023398, - "loss_gen": 4.942038536071777, - "loss_rtd": 0.23786434531211853, - "loss_sent": 0.09772311896085739, - "loss_sod": 0.05206605792045593, - "loss_total": 0.38765352964401245, - "step": 187699 - }, - { - "epoch": 0.023398, - "loss_gen": 4.612724781036377, - "loss_rtd": 0.22827434539794922, - "loss_sent": 0.00022661521506961435, - "loss_sod": 0.14668779075145721, - "loss_total": 0.37518876791000366, - "step": 187699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8188888430595398, - "learning_rate": 3.892288456592123e-05, - "loss": 0.4891, - "step": 187700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.408532619476318, - "loss_rtd": 0.24254950881004333, - "loss_sent": 0.2602873742580414, - "loss_sod": 0.019567377865314484, - "loss_total": 0.5224042534828186, - "step": 187799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.257552623748779, - "loss_rtd": 0.2503557801246643, - "loss_sent": 0.5016323924064636, - "loss_sod": 0.11872883141040802, - "loss_total": 0.8707169890403748, - "step": 187799 - }, - { - "epoch": 0.0236, - "grad_norm": 2.0080339908599854, - "learning_rate": 3.889194208716192e-05, - "loss": 0.4653, - "step": 187800 - }, - { - "epoch": 0.023798, - "loss_gen": 6.010610580444336, - "loss_rtd": 0.25839945673942566, - "loss_sent": 0.3183378279209137, - "loss_sod": 0.06243967264890671, - "loss_total": 0.6391769647598267, - "step": 187899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.619524955749512, - "loss_rtd": 0.2607441842556, - "loss_sent": 0.1995314210653305, - "loss_sod": 0.12462512403726578, - "loss_total": 0.5849007368087769, - "step": 187899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.2173618078231812, - "learning_rate": 3.886100408272703e-05, - "loss": 0.4991, - "step": 187900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.060142517089844, - "loss_rtd": 0.220561683177948, - "loss_sent": 0.09204896539449692, - "loss_sod": 0.16276516020298004, - "loss_total": 0.47537580132484436, - "step": 187999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.676015377044678, - "loss_rtd": 0.24254687130451202, - "loss_sent": 0.22284887731075287, - "loss_sod": 0.02567087486386299, - "loss_total": 0.4910666346549988, - "step": 187999 - }, - { - "epoch": 0.024, - "grad_norm": 1.120131015777588, - "learning_rate": 3.883007056507835e-05, - "loss": 0.4827, - "step": 188000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4598945379257202, - "eval_runtime": 151.472, - "eval_samples_per_second": 101.953, - "eval_steps_per_second": 0.799, - "step": 188000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.747088432312012, - "loss_rtd": 0.2507705092430115, - "loss_sent": 0.2397606372833252, - "loss_sod": 0.09798656404018402, - "loss_total": 0.5885177254676819, - "step": 188099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.377871513366699, - "loss_rtd": 0.24825069308280945, - "loss_sent": 0.2851286232471466, - "loss_sod": 0.044352252036333084, - "loss_total": 0.5777316093444824, - "step": 188099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.453224778175354, - "learning_rate": 3.879914154667592e-05, - "loss": 0.5039, - "step": 188100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.2848005294799805, - "loss_rtd": 0.23623868823051453, - "loss_sent": 0.021470773965120316, - "loss_sod": 0.08070395886898041, - "loss_total": 0.33841341733932495, - "step": 188199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.011479377746582, - "loss_rtd": 0.22047826647758484, - "loss_sent": 9.895951370708644e-05, - "loss_sod": 0.20309217274188995, - "loss_total": 0.4236694276332855, - "step": 188199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9086915850639343, - "learning_rate": 3.876821703997793e-05, - "loss": 0.4814, - "step": 188200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.45997428894043, - "loss_rtd": 0.21449197828769684, - "loss_sent": 7.156938954722136e-05, - "loss_sod": 0.11868256330490112, - "loss_total": 0.3332460820674896, - "step": 188299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.46342658996582, - "loss_rtd": 0.26590511202812195, - "loss_sent": 0.15254537761211395, - "loss_sod": 0.07487458735704422, - "loss_total": 0.49332505464553833, - "step": 188299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.0269204378128052, - "learning_rate": 3.873729705744078e-05, - "loss": 0.4733, - "step": 188300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.391395568847656, - "loss_rtd": 0.2536605894565582, - "loss_sent": 0.6708694696426392, - "loss_sod": 0.04845999926328659, - "loss_total": 0.9729900360107422, - "step": 188399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.521356105804443, - "loss_rtd": 0.24665305018424988, - "loss_sent": 0.3473989963531494, - "loss_sod": 0.03050447255373001, - "loss_total": 0.6245565414428711, - "step": 188399 - }, - { - "epoch": 0.0248, - "grad_norm": 2.669387102127075, - "learning_rate": 3.8706381611519015e-05, - "loss": 0.4823, - "step": 188400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.504930019378662, - "loss_rtd": 0.26366689801216125, - "loss_sent": 0.07109244167804718, - "loss_sod": 0.05478106439113617, - "loss_total": 0.3895403742790222, - "step": 188499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.711468696594238, - "loss_rtd": 0.2695043683052063, - "loss_sent": 0.1970943659543991, - "loss_sod": 0.039382901042699814, - "loss_total": 0.5059816241264343, - "step": 188499 - }, - { - "epoch": 0.025, - "grad_norm": 1.0042864084243774, - "learning_rate": 3.8675470714665405e-05, - "loss": 0.4904, - "step": 188500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.217864036560059, - "loss_rtd": 0.25182682275772095, - "loss_sent": 0.029164280742406845, - "loss_sod": 0.04378075152635574, - "loss_total": 0.32477185130119324, - "step": 188599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.344483375549316, - "loss_rtd": 0.23775868117809296, - "loss_sent": 0.16007396578788757, - "loss_sod": 0.031165238469839096, - "loss_total": 0.42899787425994873, - "step": 188599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.3527297973632812, - "learning_rate": 3.864456437933082e-05, - "loss": 0.4846, - "step": 188600 - }, - { - "epoch": 0.025398, - "loss_gen": 4.737148284912109, - "loss_rtd": 0.22901023924350739, - "loss_sent": 0.000305487890727818, - "loss_sod": 0.309353768825531, - "loss_total": 0.5386694669723511, - "step": 188699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.505973815917969, - "loss_rtd": 0.2795254588127136, - "loss_sent": 0.10963069647550583, - "loss_sod": 0.05984332039952278, - "loss_total": 0.4489994943141937, - "step": 188699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.2695363759994507, - "learning_rate": 3.8613662617964354e-05, - "loss": 0.4848, - "step": 188700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.010315418243408, - "loss_rtd": 0.23522193729877472, - "loss_sent": 0.09733253717422485, - "loss_sod": 0.015465503558516502, - "loss_total": 0.3480199873447418, - "step": 188799 - }, - { - "epoch": 0.025598, - "loss_gen": 4.887147903442383, - "loss_rtd": 0.24651817977428436, - "loss_sent": 0.036465562880039215, - "loss_sod": 0.060762643814086914, - "loss_total": 0.3437463939189911, - "step": 188799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.6803804039955139, - "learning_rate": 3.858276544301321e-05, - "loss": 0.4927, - "step": 188800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.247392654418945, - "loss_rtd": 0.22698451578617096, - "loss_sent": 0.0394565612077713, - "loss_sod": 0.10808434337377548, - "loss_total": 0.37452542781829834, - "step": 188899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.169305324554443, - "loss_rtd": 0.2783455550670624, - "loss_sent": 0.0984940379858017, - "loss_sod": 0.03639579564332962, - "loss_total": 0.4132353663444519, - "step": 188899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.8834724426269531, - "learning_rate": 3.85518728669228e-05, - "loss": 0.4945, - "step": 188900 - }, - { - "epoch": 0.025998, - "loss_gen": 4.514176368713379, - "loss_rtd": 0.21182015538215637, - "loss_sent": 3.6810768506256863e-05, - "loss_sod": 0.18472716212272644, - "loss_total": 0.3965841233730316, - "step": 188999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.477997303009033, - "loss_rtd": 0.22783230245113373, - "loss_sent": 0.11864908039569855, - "loss_sod": 0.11274217814207077, - "loss_total": 0.45922356843948364, - "step": 188999 - }, - { - "epoch": 0.026, - "grad_norm": 1.195783257484436, - "learning_rate": 3.852098490213663e-05, - "loss": 0.4854, - "step": 189000 - }, - { - "epoch": 0.026, - "eval_loss": 0.45650702714920044, - "eval_runtime": 151.2912, - "eval_samples_per_second": 102.075, - "eval_steps_per_second": 0.8, - "step": 189000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.517990589141846, - "loss_rtd": 0.2650156617164612, - "loss_sent": 0.13934160768985748, - "loss_sod": 0.009648462757468224, - "loss_total": 0.41400575637817383, - "step": 189099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.422589302062988, - "loss_rtd": 0.23975032567977905, - "loss_sent": 0.17650927603244781, - "loss_sod": 0.10824856907129288, - "loss_total": 0.5245081782341003, - "step": 189099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9580986499786377, - "learning_rate": 3.849010156109635e-05, - "loss": 0.4954, - "step": 189100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.617785930633545, - "loss_rtd": 0.27601534128189087, - "loss_sent": 0.10777447372674942, - "loss_sod": 0.05619753524661064, - "loss_total": 0.43998733162879944, - "step": 189199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.626599311828613, - "loss_rtd": 0.2704137861728668, - "loss_sent": 0.1494636982679367, - "loss_sod": 0.10374113917350769, - "loss_total": 0.5236186385154724, - "step": 189199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.9200600981712341, - "learning_rate": 3.845922285624181e-05, - "loss": 0.4844, - "step": 189200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.618977069854736, - "loss_rtd": 0.2506709694862366, - "loss_sent": 0.5057516098022461, - "loss_sod": 0.10134395956993103, - "loss_total": 0.8577665090560913, - "step": 189299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.481948375701904, - "loss_rtd": 0.25752022862434387, - "loss_sent": 0.26329484581947327, - "loss_sod": 0.06825120747089386, - "loss_total": 0.5890662670135498, - "step": 189299 - }, - { - "epoch": 0.0266, - "grad_norm": 2.118957042694092, - "learning_rate": 3.84283488000109e-05, - "loss": 0.4767, - "step": 189300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.345311164855957, - "loss_rtd": 0.24202558398246765, - "loss_sent": 0.18557079136371613, - "loss_sod": 0.057000309228897095, - "loss_total": 0.4845966696739197, - "step": 189399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.465376377105713, - "loss_rtd": 0.24598413705825806, - "loss_sent": 0.15849705040454865, - "loss_sod": 0.13609986007213593, - "loss_total": 0.5405810475349426, - "step": 189399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.19861900806427, - "learning_rate": 3.839747940483972e-05, - "loss": 0.4783, - "step": 189400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.405307769775391, - "loss_rtd": 0.23371540009975433, - "loss_sent": 0.10167175531387329, - "loss_sod": 0.02658150903880596, - "loss_total": 0.36196866631507874, - "step": 189499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.3316779136657715, - "loss_rtd": 0.27084195613861084, - "loss_sent": 0.1450253427028656, - "loss_sod": 0.04817929491400719, - "loss_total": 0.4640465974807739, - "step": 189499 - }, - { - "epoch": 0.027, - "grad_norm": 1.349503517150879, - "learning_rate": 3.836661468316244e-05, - "loss": 0.4964, - "step": 189500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.54126501083374, - "loss_rtd": 0.25931403040885925, - "loss_sent": 0.12344910949468613, - "loss_sod": 0.15993519127368927, - "loss_total": 0.542698323726654, - "step": 189599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.141296863555908, - "loss_rtd": 0.23213070631027222, - "loss_sent": 0.08870422840118408, - "loss_sod": 0.07632371038198471, - "loss_total": 0.3971586525440216, - "step": 189599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.0059212446212769, - "learning_rate": 3.833575464741139e-05, - "loss": 0.4847, - "step": 189600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.827796459197998, - "loss_rtd": 0.24768830835819244, - "loss_sent": 0.13497382402420044, - "loss_sod": 0.007975916378200054, - "loss_total": 0.3906380534172058, - "step": 189699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.769606590270996, - "loss_rtd": 0.2651318311691284, - "loss_sent": 0.2790696322917938, - "loss_sod": 0.056333690881729126, - "loss_total": 0.6005351543426514, - "step": 189699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.6840248107910156, - "learning_rate": 3.8304899310016956e-05, - "loss": 0.4949, - "step": 189700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.565116882324219, - "loss_rtd": 0.24932719767093658, - "loss_sent": 0.2298298180103302, - "loss_sod": 0.03319466486573219, - "loss_total": 0.5123516917228699, - "step": 189799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.100515365600586, - "loss_rtd": 0.2612711787223816, - "loss_sent": 0.16838902235031128, - "loss_sod": 0.0048184944316744804, - "loss_total": 0.4344787001609802, - "step": 189799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8379236459732056, - "learning_rate": 3.82740486834077e-05, - "loss": 0.5004, - "step": 189800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.489904403686523, - "loss_rtd": 0.2707536816596985, - "loss_sent": 0.1192086786031723, - "loss_sod": 0.013609429821372032, - "loss_total": 0.4035717844963074, - "step": 189899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.503792762756348, - "loss_rtd": 0.24232688546180725, - "loss_sent": 0.3279736340045929, - "loss_sod": 0.011655149981379509, - "loss_total": 0.5819556713104248, - "step": 189899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.9022887349128723, - "learning_rate": 3.824320278001021e-05, - "loss": 0.4742, - "step": 189900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.60560941696167, - "loss_rtd": 0.26560530066490173, - "loss_sent": 0.1791583150625229, - "loss_sod": 0.059306129813194275, - "loss_total": 0.5040697455406189, - "step": 189999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.363497257232666, - "loss_rtd": 0.23657403886318207, - "loss_sent": 0.14551042020320892, - "loss_sod": 0.020474769175052643, - "loss_total": 0.40255922079086304, - "step": 189999 - }, - { - "epoch": 0.028, - "grad_norm": 0.9289900064468384, - "learning_rate": 3.821236161224925e-05, - "loss": 0.4938, - "step": 190000 - }, - { - "epoch": 0.028, - "eval_loss": 0.4670124351978302, - "eval_runtime": 151.7151, - "eval_samples_per_second": 101.789, - "eval_steps_per_second": 0.798, - "step": 190000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.427304267883301, - "loss_rtd": 0.23809626698493958, - "loss_sent": 0.3506697714328766, - "loss_sod": 0.06381907314062119, - "loss_total": 0.6525851488113403, - "step": 190099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.639505386352539, - "loss_rtd": 0.2414628118276596, - "loss_sent": 0.14909490942955017, - "loss_sod": 0.02899942733347416, - "loss_total": 0.4195571541786194, - "step": 190099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.8516379594802856, - "learning_rate": 3.818152519254762e-05, - "loss": 0.4853, - "step": 190100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.43601131439209, - "loss_rtd": 0.26161977648735046, - "loss_sent": 0.19055239856243134, - "loss_sod": 0.1804932951927185, - "loss_total": 0.6326654553413391, - "step": 190199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.528129577636719, - "loss_rtd": 0.24942205846309662, - "loss_sent": 0.22511687874794006, - "loss_sod": 0.09172326326370239, - "loss_total": 0.5662622451782227, - "step": 190199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.180493712425232, - "learning_rate": 3.815069353332623e-05, - "loss": 0.4916, - "step": 190200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.4941534996032715, - "loss_rtd": 0.2445591390132904, - "loss_sent": 0.1911228448152542, - "loss_sod": 0.058923400938510895, - "loss_total": 0.4946053624153137, - "step": 190299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.534848690032959, - "loss_rtd": 0.2617496848106384, - "loss_sent": 0.24936708807945251, - "loss_sod": 0.13276061415672302, - "loss_total": 0.643877387046814, - "step": 190299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.6018524169921875, - "learning_rate": 3.811986664700406e-05, - "loss": 0.4897, - "step": 190300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.680985927581787, - "loss_rtd": 0.25227612257003784, - "loss_sent": 0.2787964344024658, - "loss_sod": 0.01892828568816185, - "loss_total": 0.5500008463859558, - "step": 190399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.267366886138916, - "loss_rtd": 0.24123340845108032, - "loss_sent": 0.10007834434509277, - "loss_sod": 0.044746894389390945, - "loss_total": 0.38605865836143494, - "step": 190399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.5841699242591858, - "learning_rate": 3.808904454599819e-05, - "loss": 0.4849, - "step": 190400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.461845397949219, - "loss_rtd": 0.2444579303264618, - "loss_sent": 0.4447803199291229, - "loss_sod": 0.0374327078461647, - "loss_total": 0.7266709804534912, - "step": 190499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.268030166625977, - "loss_rtd": 0.23218882083892822, - "loss_sent": 0.12373758852481842, - "loss_sod": 0.03155789524316788, - "loss_total": 0.3874843120574951, - "step": 190499 - }, - { - "epoch": 0.029, - "grad_norm": 1.5703704357147217, - "learning_rate": 3.805822724272376e-05, - "loss": 0.4973, - "step": 190500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.516118049621582, - "loss_rtd": 0.2561403214931488, - "loss_sent": 0.16193555295467377, - "loss_sod": 0.053360715508461, - "loss_total": 0.47143661975860596, - "step": 190599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.603597640991211, - "loss_rtd": 0.24327020347118378, - "loss_sent": 0.20391519367694855, - "loss_sod": 0.02561803162097931, - "loss_total": 0.47280341386795044, - "step": 190599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.8055551648139954, - "learning_rate": 3.8027414749593956e-05, - "loss": 0.489, - "step": 190600 - }, - { - "epoch": 0.029398, - "loss_gen": 6.1082258224487305, - "loss_rtd": 0.24867743253707886, - "loss_sent": 0.04290452226996422, - "loss_sod": 0.10100595653057098, - "loss_total": 0.39258792996406555, - "step": 190699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.504403114318848, - "loss_rtd": 0.23535284399986267, - "loss_sent": 0.16792336106300354, - "loss_sod": 0.060683224350214005, - "loss_total": 0.4639594256877899, - "step": 190699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.6784458160400391, - "learning_rate": 3.799660707902007e-05, - "loss": 0.4914, - "step": 190700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.532922267913818, - "loss_rtd": 0.2595323622226715, - "loss_sent": 0.09610046446323395, - "loss_sod": 0.049020085483789444, - "loss_total": 0.4046528935432434, - "step": 190799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.673886775970459, - "loss_rtd": 0.23863846063613892, - "loss_sent": 0.09060431271791458, - "loss_sod": 0.03512513265013695, - "loss_total": 0.36436790227890015, - "step": 190799 - }, - { - "epoch": 0.0296, - "grad_norm": 0.8528914451599121, - "learning_rate": 3.79658042434114e-05, - "loss": 0.4752, - "step": 190800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.7321014404296875, - "loss_rtd": 0.26183241605758667, - "loss_sent": 0.07424481213092804, - "loss_sod": 0.07845449447631836, - "loss_total": 0.4145317077636719, - "step": 190899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.275577545166016, - "loss_rtd": 0.25554800033569336, - "loss_sent": 0.23322120308876038, - "loss_sod": 0.06550353020429611, - "loss_total": 0.5542727708816528, - "step": 190899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.780219554901123, - "learning_rate": 3.793500625517536e-05, - "loss": 0.4749, - "step": 190900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.511038780212402, - "loss_rtd": 0.2445359081029892, - "loss_sent": 0.16977296769618988, - "loss_sod": 0.008282160386443138, - "loss_total": 0.42259103059768677, - "step": 190999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.605917930603027, - "loss_rtd": 0.2625270485877991, - "loss_sent": 0.3814159631729126, - "loss_sod": 0.046406544744968414, - "loss_total": 0.6903495788574219, - "step": 190999 - }, - { - "epoch": 0.03, - "grad_norm": 2.1270103454589844, - "learning_rate": 3.7904213126717346e-05, - "loss": 0.4942, - "step": 191000 - }, - { - "epoch": 0.03, - "eval_loss": 0.46074697375297546, - "eval_runtime": 151.3683, - "eval_samples_per_second": 102.023, - "eval_steps_per_second": 0.799, - "step": 191000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.758920192718506, - "loss_rtd": 0.26381030678749084, - "loss_sent": 0.18165838718414307, - "loss_sod": 0.09162928909063339, - "loss_total": 0.5370979905128479, - "step": 191099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.745611190795898, - "loss_rtd": 0.2590659260749817, - "loss_sent": 0.200775608420372, - "loss_sod": 0.042787425220012665, - "loss_total": 0.5026289820671082, - "step": 191099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.7340903878211975, - "learning_rate": 3.7873424870440846e-05, - "loss": 0.4867, - "step": 191100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.6203413009643555, - "loss_rtd": 0.2579295039176941, - "loss_sent": 0.161702960729599, - "loss_sod": 0.017896367236971855, - "loss_total": 0.4375288188457489, - "step": 191199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.602794170379639, - "loss_rtd": 0.2508043050765991, - "loss_sent": 0.30941370129585266, - "loss_sod": 0.08232814073562622, - "loss_total": 0.6425461769104004, - "step": 191199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.153231143951416, - "learning_rate": 3.7842641498747346e-05, - "loss": 0.484, - "step": 191200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.551522731781006, - "loss_rtd": 0.25297266244888306, - "loss_sent": 0.2190672904253006, - "loss_sod": 0.037652745842933655, - "loss_total": 0.5096926689147949, - "step": 191299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.213325500488281, - "loss_rtd": 0.27947014570236206, - "loss_sent": 0.45438188314437866, - "loss_sod": 0.011225146241486073, - "loss_total": 0.7450771331787109, - "step": 191299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.9284847974777222, - "learning_rate": 3.781186302403641e-05, - "loss": 0.4784, - "step": 191300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.694090843200684, - "loss_rtd": 0.24617375433444977, - "loss_sent": 0.052540965378284454, - "loss_sod": 0.03280903026461601, - "loss_total": 0.33152374625205994, - "step": 191399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.137330055236816, - "loss_rtd": 0.2720976173877716, - "loss_sent": 0.2972986698150635, - "loss_sod": 0.04297982156276703, - "loss_total": 0.6123760938644409, - "step": 191399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.1373209953308105, - "learning_rate": 3.778108945870558e-05, - "loss": 0.4877, - "step": 191400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.3562331199646, - "loss_rtd": 0.2669129967689514, - "loss_sent": 0.4924572706222534, - "loss_sod": 0.0187371838837862, - "loss_total": 0.7781074643135071, - "step": 191499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.238649845123291, - "loss_rtd": 0.25423958897590637, - "loss_sent": 0.09695713967084885, - "loss_sod": 0.04322301968932152, - "loss_total": 0.39441975951194763, - "step": 191499 - }, - { - "epoch": 0.031, - "grad_norm": 1.3981800079345703, - "learning_rate": 3.775032081515046e-05, - "loss": 0.4929, - "step": 191500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.644824981689453, - "loss_rtd": 0.2523040175437927, - "loss_sent": 0.44738587737083435, - "loss_sod": 0.055231668055057526, - "loss_total": 0.754921555519104, - "step": 191599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.5427117347717285, - "loss_rtd": 0.24850618839263916, - "loss_sent": 0.26900747418403625, - "loss_sod": 0.03090936876833439, - "loss_total": 0.5484230518341064, - "step": 191599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.1982362270355225, - "learning_rate": 3.7719557105764647e-05, - "loss": 0.4834, - "step": 191600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.413008689880371, - "loss_rtd": 0.2563272714614868, - "loss_sent": 0.03943207487463951, - "loss_sod": 0.17963504791259766, - "loss_total": 0.4753943979740143, - "step": 191699 - }, - { - "epoch": 0.031398, - "loss_gen": 4.911994934082031, - "loss_rtd": 0.22217217087745667, - "loss_sent": 0.019519370049238205, - "loss_sod": 0.06098729372024536, - "loss_total": 0.30267882347106934, - "step": 191699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.8048076629638672, - "learning_rate": 3.768879834293977e-05, - "loss": 0.4927, - "step": 191700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.637204647064209, - "loss_rtd": 0.2501910626888275, - "loss_sent": 0.3050091862678528, - "loss_sod": 0.09607332944869995, - "loss_total": 0.6512736082077026, - "step": 191799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.50172758102417, - "loss_rtd": 0.26181814074516296, - "loss_sent": 0.2631334662437439, - "loss_sod": 0.05659785866737366, - "loss_total": 0.5815494656562805, - "step": 191799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.9879064559936523, - "learning_rate": 3.765804453906544e-05, - "loss": 0.5056, - "step": 191800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.14209508895874, - "loss_rtd": 0.23424683511257172, - "loss_sent": 0.08079767227172852, - "loss_sod": 0.1852322518825531, - "loss_total": 0.5002767443656921, - "step": 191899 - }, - { - "epoch": 0.031798, - "loss_gen": 4.655786514282227, - "loss_rtd": 0.21770267188549042, - "loss_sent": 0.04880133271217346, - "loss_sod": 0.05604922026395798, - "loss_total": 0.32255321741104126, - "step": 191899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.380016565322876, - "learning_rate": 3.762729570652931e-05, - "loss": 0.5068, - "step": 191900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.879818916320801, - "loss_rtd": 0.24440042674541473, - "loss_sent": 0.07798027992248535, - "loss_sod": 0.07537493854761124, - "loss_total": 0.3977556526660919, - "step": 191999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.795013427734375, - "loss_rtd": 0.2571924328804016, - "loss_sent": 0.15835008025169373, - "loss_sod": 0.03647574782371521, - "loss_total": 0.45201826095581055, - "step": 191999 - }, - { - "epoch": 0.032, - "grad_norm": 1.2892613410949707, - "learning_rate": 3.7596551857716965e-05, - "loss": 0.497, - "step": 192000 - }, - { - "epoch": 0.032, - "eval_loss": 0.46215012669563293, - "eval_runtime": 151.4113, - "eval_samples_per_second": 101.994, - "eval_steps_per_second": 0.799, - "step": 192000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.767733097076416, - "loss_rtd": 0.26180925965309143, - "loss_sent": 0.07043591886758804, - "loss_sod": 0.0524330735206604, - "loss_total": 0.3846782445907593, - "step": 192099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.635196685791016, - "loss_rtd": 0.2711675465106964, - "loss_sent": 0.193241149187088, - "loss_sod": 0.0742715373635292, - "loss_total": 0.5386801958084106, - "step": 192099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.2892861366271973, - "learning_rate": 3.756581300501207e-05, - "loss": 0.485, - "step": 192100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.3160624504089355, - "loss_rtd": 0.25533467531204224, - "loss_sent": 0.6479426622390747, - "loss_sod": 0.05005098134279251, - "loss_total": 0.9533283114433289, - "step": 192199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.530961513519287, - "loss_rtd": 0.23777180910110474, - "loss_sent": 0.25572335720062256, - "loss_sod": 0.014990163967013359, - "loss_total": 0.5084853172302246, - "step": 192199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.9177531003952026, - "learning_rate": 3.75350791607962e-05, - "loss": 0.4737, - "step": 192200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.648280143737793, - "loss_rtd": 0.24090732634067535, - "loss_sent": 0.1316465139389038, - "loss_sod": 0.027110133320093155, - "loss_total": 0.3996639847755432, - "step": 192299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.590279579162598, - "loss_rtd": 0.2609612047672272, - "loss_sent": 0.18384705483913422, - "loss_sod": 0.010000656358897686, - "loss_total": 0.45480889081954956, - "step": 192299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.9537299871444702, - "learning_rate": 3.750435033744896e-05, - "loss": 0.4991, - "step": 192300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.547919273376465, - "loss_rtd": 0.24577993154525757, - "loss_sent": 0.31399571895599365, - "loss_sod": 0.08307299017906189, - "loss_total": 0.6428486108779907, - "step": 192399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.422171592712402, - "loss_rtd": 0.26553598046302795, - "loss_sent": 0.4638215899467468, - "loss_sod": 0.014869781211018562, - "loss_total": 0.7442273497581482, - "step": 192399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.79023277759552, - "learning_rate": 3.7473626547347904e-05, - "loss": 0.4946, - "step": 192400 - }, - { - "epoch": 0.000998, - "loss_gen": 4.781009197235107, - "loss_rtd": 0.21600483357906342, - "loss_sent": 3.5161818232154474e-05, - "loss_sod": 0.156089186668396, - "loss_total": 0.3721292018890381, - "step": 192499 - }, - { - "epoch": 0.000998, - "loss_gen": 4.706577777862549, - "loss_rtd": 0.22244736552238464, - "loss_sent": 0.013634216971695423, - "loss_sod": 0.043591342866420746, - "loss_total": 0.27967292070388794, - "step": 192499 - }, - { - "epoch": 0.001, - "grad_norm": 1.0063154697418213, - "learning_rate": 3.7442907802868574e-05, - "loss": 0.4752, - "step": 192500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.6759114265441895, - "loss_rtd": 0.2586769759654999, - "loss_sent": 0.23568876087665558, - "loss_sod": 0.04188964143395424, - "loss_total": 0.5362553596496582, - "step": 192599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.485069274902344, - "loss_rtd": 0.25091472268104553, - "loss_sent": 0.21204343438148499, - "loss_sod": 0.008346015587449074, - "loss_total": 0.47130417823791504, - "step": 192599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.9378650188446045, - "learning_rate": 3.7412194116384486e-05, - "loss": 0.4787, - "step": 192600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.312941074371338, - "loss_rtd": 0.2663998603820801, - "loss_sent": 0.12301906198263168, - "loss_sod": 0.06201421469449997, - "loss_total": 0.4514331519603729, - "step": 192699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.562491416931152, - "loss_rtd": 0.2644144594669342, - "loss_sent": 0.1517217457294464, - "loss_sod": 0.016208041459321976, - "loss_total": 0.4323442578315735, - "step": 192699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.7727892994880676, - "learning_rate": 3.738148550026711e-05, - "loss": 0.4632, - "step": 192700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.586191654205322, - "loss_rtd": 0.25658947229385376, - "loss_sent": 0.1271830052137375, - "loss_sod": 0.03610118106007576, - "loss_total": 0.4198736548423767, - "step": 192799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.758675575256348, - "loss_rtd": 0.26218181848526, - "loss_sent": 0.15812864899635315, - "loss_sod": 0.04964819177985191, - "loss_total": 0.46995866298675537, - "step": 192799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.7383379936218262, - "learning_rate": 3.735078196688585e-05, - "loss": 0.4839, - "step": 192800 - }, - { - "epoch": 0.001798, - "loss_gen": 4.558610439300537, - "loss_rtd": 0.2237866073846817, - "loss_sent": 4.1192786738974974e-05, - "loss_sod": 0.06296208500862122, - "loss_total": 0.2867898941040039, - "step": 192899 - }, - { - "epoch": 0.001798, - "loss_gen": 4.737308502197266, - "loss_rtd": 0.22315563261508942, - "loss_sent": 4.613067358150147e-05, - "loss_sod": 0.04467999190092087, - "loss_total": 0.26788175106048584, - "step": 192899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.7159841060638428, - "learning_rate": 3.732008352860811e-05, - "loss": 0.4753, - "step": 192900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.871915817260742, - "loss_rtd": 0.24594026803970337, - "loss_sent": 0.03623334318399429, - "loss_sod": 0.17563214898109436, - "loss_total": 0.4578057527542114, - "step": 192999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.12923526763916, - "loss_rtd": 0.24965007603168488, - "loss_sent": 9.992629202315584e-05, - "loss_sod": 0.15130211412906647, - "loss_total": 0.4010521173477173, - "step": 192999 - }, - { - "epoch": 0.002, - "grad_norm": 0.8945534229278564, - "learning_rate": 3.7289390197799203e-05, - "loss": 0.487, - "step": 193000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4674343764781952, - "eval_runtime": 153.7357, - "eval_samples_per_second": 100.452, - "eval_steps_per_second": 0.787, - "step": 193000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.556958198547363, - "loss_rtd": 0.2575167119503021, - "loss_sent": 0.14814774692058563, - "loss_sod": 0.053643129765987396, - "loss_total": 0.45930761098861694, - "step": 193099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.765564441680908, - "loss_rtd": 0.23487484455108643, - "loss_sent": 0.27274149656295776, - "loss_sod": 0.08774063736200333, - "loss_total": 0.5953569412231445, - "step": 193099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.7784550189971924, - "learning_rate": 3.7258701986822405e-05, - "loss": 0.4796, - "step": 193100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.610030174255371, - "loss_rtd": 0.2565581798553467, - "loss_sent": 0.11878179758787155, - "loss_sod": 0.02501131221652031, - "loss_total": 0.40035128593444824, - "step": 193199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.386488914489746, - "loss_rtd": 0.25022369623184204, - "loss_sent": 0.07101274281740189, - "loss_sod": 0.07276440411806107, - "loss_total": 0.3940008282661438, - "step": 193199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.0038328170776367, - "learning_rate": 3.722801890803892e-05, - "loss": 0.4982, - "step": 193200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.366093635559082, - "loss_rtd": 0.26855507493019104, - "loss_sent": 0.14672008156776428, - "loss_sod": 0.07533880323171616, - "loss_total": 0.4906139671802521, - "step": 193299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.110126972198486, - "loss_rtd": 0.24855197966098785, - "loss_sent": 0.07789073139429092, - "loss_sod": 0.20015639066696167, - "loss_total": 0.526599109172821, - "step": 193299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.3545650243759155, - "learning_rate": 3.71973409738079e-05, - "loss": 0.4714, - "step": 193300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.646805286407471, - "loss_rtd": 0.23973631858825684, - "loss_sent": 0.2123149037361145, - "loss_sod": 0.06509586423635483, - "loss_total": 0.5171470642089844, - "step": 193399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.550223350524902, - "loss_rtd": 0.25887084007263184, - "loss_sent": 0.08267856389284134, - "loss_sod": 0.09744960069656372, - "loss_total": 0.4389989972114563, - "step": 193399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.0914514064788818, - "learning_rate": 3.716666819648639e-05, - "loss": 0.4825, - "step": 193400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.455780029296875, - "loss_rtd": 0.23686279356479645, - "loss_sent": 0.18024176359176636, - "loss_sod": 0.012053391896188259, - "loss_total": 0.42915794253349304, - "step": 193499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.336060523986816, - "loss_rtd": 0.2880454361438751, - "loss_sent": 0.18380507826805115, - "loss_sod": 0.06229345127940178, - "loss_total": 0.5341439247131348, - "step": 193499 - }, - { - "epoch": 0.003, - "grad_norm": 0.9820837378501892, - "learning_rate": 3.7136000588429416e-05, - "loss": 0.475, - "step": 193500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.634300708770752, - "loss_rtd": 0.2416241616010666, - "loss_sent": 0.19586947560310364, - "loss_sod": 0.04599007964134216, - "loss_total": 0.4834837019443512, - "step": 193599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.745166778564453, - "loss_rtd": 0.26596733927726746, - "loss_sent": 0.28147685527801514, - "loss_sod": 0.025701254606246948, - "loss_total": 0.5731454491615295, - "step": 193599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.973056972026825, - "learning_rate": 3.7105338161989856e-05, - "loss": 0.489, - "step": 193600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.152373790740967, - "loss_rtd": 0.23187947273254395, - "loss_sent": 0.10266685485839844, - "loss_sod": 0.12113480269908905, - "loss_total": 0.4556811451911926, - "step": 193699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.242809295654297, - "loss_rtd": 0.24947988986968994, - "loss_sent": 4.132475805818103e-05, - "loss_sod": 0.07227280735969543, - "loss_total": 0.3217940330505371, - "step": 193699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.9321451187133789, - "learning_rate": 3.707468092951854e-05, - "loss": 0.472, - "step": 193700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.641763687133789, - "loss_rtd": 0.24687477946281433, - "loss_sent": 0.12674228847026825, - "loss_sod": 0.06126464158296585, - "loss_total": 0.43488168716430664, - "step": 193799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.771834373474121, - "loss_rtd": 0.26061493158340454, - "loss_sent": 0.14301107823848724, - "loss_sod": 0.10927961766719818, - "loss_total": 0.5129056572914124, - "step": 193799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.1897246837615967, - "learning_rate": 3.7044028903364206e-05, - "loss": 0.476, - "step": 193800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.51343297958374, - "loss_rtd": 0.25676435232162476, - "loss_sent": 0.26321089267730713, - "loss_sod": 0.051310814917087555, - "loss_total": 0.5712860822677612, - "step": 193899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.367305278778076, - "loss_rtd": 0.28087079524993896, - "loss_sent": 0.03771647810935974, - "loss_sod": 0.0246761292219162, - "loss_total": 0.3432634174823761, - "step": 193899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.9496843218803406, - "learning_rate": 3.7013382095873475e-05, - "loss": 0.4967, - "step": 193900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.242870807647705, - "loss_rtd": 0.24983222782611847, - "loss_sent": 0.09496738016605377, - "loss_sod": 0.0035881041549146175, - "loss_total": 0.3483877182006836, - "step": 193999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.543667793273926, - "loss_rtd": 0.25194668769836426, - "loss_sent": 0.06284943968057632, - "loss_sod": 0.11689009517431259, - "loss_total": 0.4316862225532532, - "step": 193999 - }, - { - "epoch": 0.004, - "grad_norm": 0.7864642143249512, - "learning_rate": 3.698274051939088e-05, - "loss": 0.4887, - "step": 194000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4566684365272522, - "eval_runtime": 150.8137, - "eval_samples_per_second": 102.398, - "eval_steps_per_second": 0.802, - "step": 194000 - }, - { - "epoch": 0.004198, - "loss_gen": 4.840829372406006, - "loss_rtd": 0.20772796869277954, - "loss_sent": 0.02202727273106575, - "loss_sod": 0.04938127100467682, - "loss_total": 0.2791365087032318, - "step": 194099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.729475021362305, - "loss_rtd": 0.2559773027896881, - "loss_sent": 0.1774718016386032, - "loss_sod": 0.026250924915075302, - "loss_total": 0.4597000479698181, - "step": 194099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.7378675937652588, - "learning_rate": 3.695210418625885e-05, - "loss": 0.4677, - "step": 194100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.102241516113281, - "loss_rtd": 0.24229399859905243, - "loss_sent": 0.14018964767456055, - "loss_sod": 0.10332176834344864, - "loss_total": 0.4858054220676422, - "step": 194199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.999631881713867, - "loss_rtd": 0.23838362097740173, - "loss_sent": 0.044546131044626236, - "loss_sod": 0.09325068444013596, - "loss_total": 0.3761804401874542, - "step": 194199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.932612955570221, - "learning_rate": 3.6921473108817694e-05, - "loss": 0.4791, - "step": 194200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.322678565979004, - "loss_rtd": 0.2504326105117798, - "loss_sent": 0.22586561739444733, - "loss_sod": 0.06177980825304985, - "loss_total": 0.5380780100822449, - "step": 194299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.666067123413086, - "loss_rtd": 0.2541863024234772, - "loss_sent": 0.3421761989593506, - "loss_sod": 0.06002382934093475, - "loss_total": 0.6563863158226013, - "step": 194299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.124372124671936, - "learning_rate": 3.6890847299405606e-05, - "loss": 0.4965, - "step": 194300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.198084831237793, - "loss_rtd": 0.24170055985450745, - "loss_sent": 0.06232727691531181, - "loss_sod": 0.03245178610086441, - "loss_total": 0.3364796042442322, - "step": 194399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.713399887084961, - "loss_rtd": 0.23639996349811554, - "loss_sent": 0.2283087968826294, - "loss_sod": 0.04151545464992523, - "loss_total": 0.5062242150306702, - "step": 194399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.6989062428474426, - "learning_rate": 3.6860226770358665e-05, - "loss": 0.4893, - "step": 194400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.571877479553223, - "loss_rtd": 0.25060853362083435, - "loss_sent": 0.11890272796154022, - "loss_sod": 0.0195518359541893, - "loss_total": 0.38906311988830566, - "step": 194499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.988839626312256, - "loss_rtd": 0.2519717216491699, - "loss_sent": 0.2186650186777115, - "loss_sod": 0.01724029891192913, - "loss_total": 0.4878770112991333, - "step": 194499 - }, - { - "epoch": 0.005, - "grad_norm": 1.1391091346740723, - "learning_rate": 3.682961153401079e-05, - "loss": 0.4908, - "step": 194500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.439150333404541, - "loss_rtd": 0.26181721687316895, - "loss_sent": 0.130909264087677, - "loss_sod": 0.01662658341228962, - "loss_total": 0.4093530774116516, - "step": 194599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.506926536560059, - "loss_rtd": 0.24847464263439178, - "loss_sent": 0.10631046444177628, - "loss_sod": 0.032716698944568634, - "loss_total": 0.3875018060207367, - "step": 194599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.6705906987190247, - "learning_rate": 3.679900160269384e-05, - "loss": 0.4979, - "step": 194600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.596723556518555, - "loss_rtd": 0.24961379170417786, - "loss_sent": 0.2417415827512741, - "loss_sod": 0.04811094328761101, - "loss_total": 0.5394663214683533, - "step": 194699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.437923908233643, - "loss_rtd": 0.24580544233322144, - "loss_sent": 0.056771207600831985, - "loss_sod": 0.03742823004722595, - "loss_total": 0.34000489115715027, - "step": 194699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.057392954826355, - "learning_rate": 3.676839698873744e-05, - "loss": 0.4881, - "step": 194700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.729447364807129, - "loss_rtd": 0.2617315649986267, - "loss_sent": 0.13916468620300293, - "loss_sod": 0.056938085705041885, - "loss_total": 0.4578343331813812, - "step": 194799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.596599578857422, - "loss_rtd": 0.2584453523159027, - "loss_sent": 0.25563913583755493, - "loss_sod": 0.0812462866306305, - "loss_total": 0.5953307747840881, - "step": 194799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.0611262321472168, - "learning_rate": 3.673779770446917e-05, - "loss": 0.4968, - "step": 194800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.796848773956299, - "loss_rtd": 0.2665272057056427, - "loss_sent": 0.24075986444950104, - "loss_sod": 0.026238219812512398, - "loss_total": 0.533525288105011, - "step": 194899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.1608662605285645, - "loss_rtd": 0.2742592990398407, - "loss_sent": 0.094719298183918, - "loss_sod": 0.018144870176911354, - "loss_total": 0.3871234655380249, - "step": 194899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.689104437828064, - "learning_rate": 3.670720376221439e-05, - "loss": 0.4718, - "step": 194900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.2695817947387695, - "loss_rtd": 0.24876317381858826, - "loss_sent": 0.12489506602287292, - "loss_sod": 0.05009578540921211, - "loss_total": 0.4237540364265442, - "step": 194999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.184060573577881, - "loss_rtd": 0.23411627113819122, - "loss_sent": 0.2702539563179016, - "loss_sod": 0.029751798138022423, - "loss_total": 0.5341219902038574, - "step": 194999 - }, - { - "epoch": 0.006, - "grad_norm": 0.7266300916671753, - "learning_rate": 3.667661517429635e-05, - "loss": 0.4924, - "step": 195000 - }, - { - "epoch": 0.006, - "eval_loss": 0.4612562954425812, - "eval_runtime": 152.1017, - "eval_samples_per_second": 101.531, - "eval_steps_per_second": 0.796, - "step": 195000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.542816162109375, - "loss_rtd": 0.27036115527153015, - "loss_sent": 0.2015909105539322, - "loss_sod": 0.02594519406557083, - "loss_total": 0.49789726734161377, - "step": 195099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.973165512084961, - "loss_rtd": 0.25184711813926697, - "loss_sent": 0.260888934135437, - "loss_sod": 0.04519077017903328, - "loss_total": 0.5579268336296082, - "step": 195099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.7794356346130371, - "learning_rate": 3.6646031953036125e-05, - "loss": 0.4899, - "step": 195100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.378973007202148, - "loss_rtd": 0.2437082976102829, - "loss_sent": 0.17770645022392273, - "loss_sod": 0.03263450786471367, - "loss_total": 0.4540492594242096, - "step": 195199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.3555097579956055, - "loss_rtd": 0.244486004114151, - "loss_sent": 0.28214266896247864, - "loss_sod": 0.07187218964099884, - "loss_total": 0.5985008478164673, - "step": 195199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.9849734306335449, - "learning_rate": 3.6615454110752624e-05, - "loss": 0.4725, - "step": 195200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.869761943817139, - "loss_rtd": 0.27491292357444763, - "loss_sent": 0.1574680358171463, - "loss_sod": 0.03220804035663605, - "loss_total": 0.46458899974823, - "step": 195299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.750090599060059, - "loss_rtd": 0.2437335103750229, - "loss_sent": 0.0854685828089714, - "loss_sod": 0.011373812332749367, - "loss_total": 0.3405759036540985, - "step": 195299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.5817446708679199, - "learning_rate": 3.658488165976261e-05, - "loss": 0.4895, - "step": 195300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.721882343292236, - "loss_rtd": 0.23500916361808777, - "loss_sent": 0.235703706741333, - "loss_sod": 0.030631324276328087, - "loss_total": 0.5013442039489746, - "step": 195399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.700669765472412, - "loss_rtd": 0.2499621957540512, - "loss_sent": 0.19793350994586945, - "loss_sod": 0.14146006107330322, - "loss_total": 0.5893557667732239, - "step": 195399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.9907777905464172, - "learning_rate": 3.655431461238066e-05, - "loss": 0.4936, - "step": 195400 - }, - { - "epoch": 0.006998, - "loss_gen": 4.884043216705322, - "loss_rtd": 0.2326071858406067, - "loss_sent": 0.03339262679219246, - "loss_sod": 0.035985104739665985, - "loss_total": 0.3019849359989166, - "step": 195499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.64555549621582, - "loss_rtd": 0.2768833637237549, - "loss_sent": 0.2613065540790558, - "loss_sod": 0.04346334934234619, - "loss_total": 0.5816532373428345, - "step": 195499 - }, - { - "epoch": 0.007, - "grad_norm": 0.7556312680244446, - "learning_rate": 3.652375298091918e-05, - "loss": 0.4794, - "step": 195500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.760225772857666, - "loss_rtd": 0.24749693274497986, - "loss_sent": 0.051286693662405014, - "loss_sod": 0.2220277190208435, - "loss_total": 0.5208113193511963, - "step": 195599 - }, - { - "epoch": 0.007198, - "loss_gen": 4.8289947509765625, - "loss_rtd": 0.21202883124351501, - "loss_sent": 0.028503786772489548, - "loss_sod": 0.03376930579543114, - "loss_total": 0.2743019163608551, - "step": 195599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.8307656645774841, - "learning_rate": 3.649319677768838e-05, - "loss": 0.4776, - "step": 195600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.164693355560303, - "loss_rtd": 0.21437622606754303, - "loss_sent": 0.06534356623888016, - "loss_sod": 0.16298896074295044, - "loss_total": 0.44270873069763184, - "step": 195699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.572518348693848, - "loss_rtd": 0.25925710797309875, - "loss_sent": 0.27719762921333313, - "loss_sod": 0.05138392373919487, - "loss_total": 0.5878386497497559, - "step": 195699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.486840009689331, - "learning_rate": 3.6462646014996317e-05, - "loss": 0.4763, - "step": 195700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.479996681213379, - "loss_rtd": 0.26533398032188416, - "loss_sent": 0.11786812543869019, - "loss_sod": 0.006996192038059235, - "loss_total": 0.390198290348053, - "step": 195799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.574079513549805, - "loss_rtd": 0.24514523148536682, - "loss_sent": 0.22541922330856323, - "loss_sod": 0.06325840950012207, - "loss_total": 0.5338228940963745, - "step": 195799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.063891887664795, - "learning_rate": 3.6432100705148796e-05, - "loss": 0.48, - "step": 195800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.226762294769287, - "loss_rtd": 0.24794931709766388, - "loss_sent": 0.18562978506088257, - "loss_sod": 0.05482611805200577, - "loss_total": 0.4884052276611328, - "step": 195899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.682345390319824, - "loss_rtd": 0.2275385856628418, - "loss_sent": 0.16026000678539276, - "loss_sod": 0.0994885042309761, - "loss_total": 0.48728710412979126, - "step": 195899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.9148600697517395, - "learning_rate": 3.640156086044951e-05, - "loss": 0.4788, - "step": 195900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.2727460861206055, - "loss_rtd": 0.24048146605491638, - "loss_sent": 0.41567525267601013, - "loss_sod": 0.06500697135925293, - "loss_total": 0.7211636900901794, - "step": 195999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.807211875915527, - "loss_rtd": 0.24590934813022614, - "loss_sent": 0.38504141569137573, - "loss_sod": 0.05467440187931061, - "loss_total": 0.6856251955032349, - "step": 195999 - }, - { - "epoch": 0.008, - "grad_norm": 1.636039137840271, - "learning_rate": 3.637102649319987e-05, - "loss": 0.4716, - "step": 196000 - }, - { - "epoch": 0.008, - "eval_loss": 0.45426931977272034, - "eval_runtime": 150.8469, - "eval_samples_per_second": 102.375, - "eval_steps_per_second": 0.802, - "step": 196000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.539201736450195, - "loss_rtd": 0.2573156952857971, - "loss_sent": 0.07600665837526321, - "loss_sod": 0.03368259221315384, - "loss_total": 0.367004930973053, - "step": 196099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.819047927856445, - "loss_rtd": 0.25339609384536743, - "loss_sent": 0.364510178565979, - "loss_sod": 0.02152218297123909, - "loss_total": 0.6394284963607788, - "step": 196099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.717594563961029, - "learning_rate": 3.634049761569914e-05, - "loss": 0.4637, - "step": 196100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.560566425323486, - "loss_rtd": 0.25853967666625977, - "loss_sent": 0.2929643988609314, - "loss_sod": 0.02935452200472355, - "loss_total": 0.580858588218689, - "step": 196199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.567770004272461, - "loss_rtd": 0.25819912552833557, - "loss_sent": 0.244191512465477, - "loss_sod": 0.08314001560211182, - "loss_total": 0.5855306386947632, - "step": 196199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.7716864347457886, - "learning_rate": 3.6309974240244326e-05, - "loss": 0.4732, - "step": 196200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.5980119705200195, - "loss_rtd": 0.2434452772140503, - "loss_sent": 0.17914623022079468, - "loss_sod": 0.015952210873365402, - "loss_total": 0.4385437071323395, - "step": 196299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.570590972900391, - "loss_rtd": 0.2433573305606842, - "loss_sent": 0.17294123768806458, - "loss_sod": 0.029650865122675896, - "loss_total": 0.4459494352340698, - "step": 196299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.742214024066925, - "learning_rate": 3.6279456379130263e-05, - "loss": 0.4854, - "step": 196300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.177133083343506, - "loss_rtd": 0.23149006068706512, - "loss_sent": 0.005693112034350634, - "loss_sod": 0.09032922983169556, - "loss_total": 0.3275124132633209, - "step": 196399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.464236259460449, - "loss_rtd": 0.23897162079811096, - "loss_sent": 0.389897882938385, - "loss_sod": 0.039472367614507675, - "loss_total": 0.668341875076294, - "step": 196399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.2947161197662354, - "learning_rate": 3.624894404464951e-05, - "loss": 0.4892, - "step": 196400 - }, - { - "epoch": 0.008998, - "loss_gen": 4.962997913360596, - "loss_rtd": 0.22442328929901123, - "loss_sent": 0.01633565127849579, - "loss_sod": 0.13329781591892242, - "loss_total": 0.37405675649642944, - "step": 196499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.450702667236328, - "loss_rtd": 0.24412818253040314, - "loss_sent": 0.32243049144744873, - "loss_sod": 0.02968277968466282, - "loss_total": 0.5962414741516113, - "step": 196499 - }, - { - "epoch": 0.009, - "grad_norm": 1.5923128128051758, - "learning_rate": 3.6218437249092474e-05, - "loss": 0.4792, - "step": 196500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.418288230895996, - "loss_rtd": 0.263407826423645, - "loss_sent": 0.4554015100002289, - "loss_sod": 0.06364298611879349, - "loss_total": 0.7824523448944092, - "step": 196599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.380255222320557, - "loss_rtd": 0.24268421530723572, - "loss_sent": 0.12270950525999069, - "loss_sod": 0.09936580806970596, - "loss_total": 0.4647595286369324, - "step": 196599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.7431308031082153, - "learning_rate": 3.6187936004747245e-05, - "loss": 0.475, - "step": 196600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.645377159118652, - "loss_rtd": 0.25081971287727356, - "loss_sent": 0.2932474613189697, - "loss_sod": 0.0416770838201046, - "loss_total": 0.5857442617416382, - "step": 196699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.848897457122803, - "loss_rtd": 0.22943075001239777, - "loss_sent": 0.0503636933863163, - "loss_sod": 0.06484819203615189, - "loss_total": 0.34464263916015625, - "step": 196699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.7886890769004822, - "learning_rate": 3.615744032389976e-05, - "loss": 0.4823, - "step": 196700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.389031887054443, - "loss_rtd": 0.2638556659221649, - "loss_sent": 0.11603834480047226, - "loss_sod": 0.07925192266702652, - "loss_total": 0.4591459333896637, - "step": 196799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.689804553985596, - "loss_rtd": 0.2472999393939972, - "loss_sent": 0.10590516775846481, - "loss_sod": 0.07291129976511002, - "loss_total": 0.426116406917572, - "step": 196799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.5039016008377075, - "learning_rate": 3.612695021883366e-05, - "loss": 0.4964, - "step": 196800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.939685821533203, - "loss_rtd": 0.23807507753372192, - "loss_sent": 0.004213188774883747, - "loss_sod": 0.06944707036018372, - "loss_total": 0.3117353320121765, - "step": 196899 - }, - { - "epoch": 0.009798, - "loss_gen": 4.869617938995361, - "loss_rtd": 0.22580496966838837, - "loss_sent": 0.004214581102132797, - "loss_sod": 0.1402873396873474, - "loss_total": 0.37030690908432007, - "step": 196899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.9130590558052063, - "learning_rate": 3.609646570183033e-05, - "loss": 0.4772, - "step": 196900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.155139446258545, - "loss_rtd": 0.2475593388080597, - "loss_sent": 0.24393866956233978, - "loss_sod": 0.11925341188907623, - "loss_total": 0.6107514500617981, - "step": 196999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.7448320388793945, - "loss_rtd": 0.2576092779636383, - "loss_sent": 0.19933441281318665, - "loss_sod": 0.021516328677535057, - "loss_total": 0.47846001386642456, - "step": 196999 - }, - { - "epoch": 0.01, - "grad_norm": 1.9248608350753784, - "learning_rate": 3.606598678516897e-05, - "loss": 0.4839, - "step": 197000 - }, - { - "epoch": 0.01, - "eval_loss": 0.45972615480422974, - "eval_runtime": 150.855, - "eval_samples_per_second": 102.37, - "eval_steps_per_second": 0.802, - "step": 197000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.392733097076416, - "loss_rtd": 0.24464982748031616, - "loss_sent": 0.26841703057289124, - "loss_sod": 0.09383723884820938, - "loss_total": 0.6069040894508362, - "step": 197099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.601526737213135, - "loss_rtd": 0.25071266293525696, - "loss_sent": 0.13358637690544128, - "loss_sod": 0.026629824191331863, - "loss_total": 0.410928875207901, - "step": 197099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.9692697525024414, - "learning_rate": 3.603551348112646e-05, - "loss": 0.4612, - "step": 197100 - }, - { - "epoch": 0.010398, - "loss_gen": 4.954998016357422, - "loss_rtd": 0.2352873533964157, - "loss_sent": 0.01572391204535961, - "loss_sod": 0.03777249529957771, - "loss_total": 0.2887837588787079, - "step": 197199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.429396629333496, - "loss_rtd": 0.26083338260650635, - "loss_sent": 0.06994747370481491, - "loss_sod": 0.11270473152399063, - "loss_total": 0.4434855878353119, - "step": 197199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.049393892288208, - "learning_rate": 3.600504580197746e-05, - "loss": 0.4724, - "step": 197200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.400674343109131, - "loss_rtd": 0.2626436948776245, - "loss_sent": 0.7087818384170532, - "loss_sod": 0.10867201536893845, - "loss_total": 1.0800975561141968, - "step": 197299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.000377178192139, - "loss_rtd": 0.226999431848526, - "loss_sent": 0.2798299193382263, - "loss_sod": 0.036855921149253845, - "loss_total": 0.543685257434845, - "step": 197299 - }, - { - "epoch": 0.0106, - "grad_norm": 2.1214144229888916, - "learning_rate": 3.597458375999432e-05, - "loss": 0.4891, - "step": 197300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.523964881896973, - "loss_rtd": 0.2674618363380432, - "loss_sent": 0.2527769207954407, - "loss_sod": 0.037127744406461716, - "loss_total": 0.5573664903640747, - "step": 197399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.307008743286133, - "loss_rtd": 0.24416065216064453, - "loss_sent": 0.6054388284683228, - "loss_sod": 0.014565913006663322, - "loss_total": 0.8641654253005981, - "step": 197399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.8312748670578003, - "learning_rate": 3.5944127367447176e-05, - "loss": 0.4766, - "step": 197400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.318966865539551, - "loss_rtd": 0.23821872472763062, - "loss_sent": 0.08623597025871277, - "loss_sod": 0.00946864951401949, - "loss_total": 0.33392333984375, - "step": 197499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.584272861480713, - "loss_rtd": 0.23597374558448792, - "loss_sent": 0.330825537443161, - "loss_sod": 0.08103427290916443, - "loss_total": 0.6478335857391357, - "step": 197499 - }, - { - "epoch": 0.011, - "grad_norm": 1.9473567008972168, - "learning_rate": 3.591367663660384e-05, - "loss": 0.4811, - "step": 197500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.993553161621094, - "loss_rtd": 0.2804194986820221, - "loss_sent": 0.05455870181322098, - "loss_sod": 0.03569484502077103, - "loss_total": 0.3706730306148529, - "step": 197599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.681451320648193, - "loss_rtd": 0.2488894909620285, - "loss_sent": 0.3947000801563263, - "loss_sod": 0.06296908110380173, - "loss_total": 0.7065586447715759, - "step": 197599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.9109740853309631, - "learning_rate": 3.588323157972988e-05, - "loss": 0.4914, - "step": 197600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.435794353485107, - "loss_rtd": 0.25761738419532776, - "loss_sent": 0.2595762312412262, - "loss_sod": 0.04328524321317673, - "loss_total": 0.5604788661003113, - "step": 197699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.800150394439697, - "loss_rtd": 0.24064289033412933, - "loss_sent": 0.1380920112133026, - "loss_sod": 0.046730682253837585, - "loss_total": 0.42546558380126953, - "step": 197699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.6069086790084839, - "learning_rate": 3.585279220908854e-05, - "loss": 0.4721, - "step": 197700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.557639122009277, - "loss_rtd": 0.246734157204628, - "loss_sent": 0.1797482669353485, - "loss_sod": 0.05293069779872894, - "loss_total": 0.47941312193870544, - "step": 197799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.796323776245117, - "loss_rtd": 0.23539410531520844, - "loss_sent": 0.24460507929325104, - "loss_sod": 0.0563870333135128, - "loss_total": 0.5363861918449402, - "step": 197799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.7594579458236694, - "learning_rate": 3.582235853694082e-05, - "loss": 0.476, - "step": 197800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.968145370483398, - "loss_rtd": 0.2510281801223755, - "loss_sent": 0.10709671676158905, - "loss_sod": 0.018377184867858887, - "loss_total": 0.3765020966529846, - "step": 197899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.510294437408447, - "loss_rtd": 0.24616675078868866, - "loss_sent": 0.37081557512283325, - "loss_sod": 0.03428245335817337, - "loss_total": 0.6512647867202759, - "step": 197899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.0171705484390259, - "learning_rate": 3.5791930575545377e-05, - "loss": 0.481, - "step": 197900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.905046463012695, - "loss_rtd": 0.22450122237205505, - "loss_sent": 0.013790993019938469, - "loss_sod": 0.0523202121257782, - "loss_total": 0.29061242938041687, - "step": 197999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.369077205657959, - "loss_rtd": 0.23604454100131989, - "loss_sent": 0.01143306028097868, - "loss_sod": 0.13096845149993896, - "loss_total": 0.3784460425376892, - "step": 197999 - }, - { - "epoch": 0.012, - "grad_norm": 0.7122476100921631, - "learning_rate": 3.57615083371586e-05, - "loss": 0.488, - "step": 198000 - }, - { - "epoch": 0.012, - "eval_loss": 0.44978225231170654, - "eval_runtime": 150.9294, - "eval_samples_per_second": 102.319, - "eval_steps_per_second": 0.802, - "step": 198000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.49887752532959, - "loss_rtd": 0.2237449735403061, - "loss_sent": 0.20722432434558868, - "loss_sod": 0.12202988564968109, - "loss_total": 0.5529991984367371, - "step": 198099 - }, - { - "epoch": 0.012198, - "loss_gen": 4.660391330718994, - "loss_rtd": 0.21641302108764648, - "loss_sent": 0.019336359575390816, - "loss_sod": 0.016802601516246796, - "loss_total": 0.25255200266838074, - "step": 198099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.9483657479286194, - "learning_rate": 3.573109183403456e-05, - "loss": 0.4686, - "step": 198100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.383784294128418, - "loss_rtd": 0.2824476361274719, - "loss_sent": 0.3660046458244324, - "loss_sod": 0.030124733224511147, - "loss_total": 0.6785770058631897, - "step": 198199 - }, - { - "epoch": 0.012398, - "loss_gen": 6.0347113609313965, - "loss_rtd": 0.25417864322662354, - "loss_sent": 0.08456467092037201, - "loss_sod": 0.08676651865243912, - "loss_total": 0.42550981044769287, - "step": 198199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.4950041770935059, - "learning_rate": 3.570068107842503e-05, - "loss": 0.4627, - "step": 198200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.719231128692627, - "loss_rtd": 0.2484939843416214, - "loss_sent": 0.09971030056476593, - "loss_sod": 0.10634118318557739, - "loss_total": 0.45454543828964233, - "step": 198299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.544730186462402, - "loss_rtd": 0.2531845271587372, - "loss_sent": 0.22829610109329224, - "loss_sod": 0.1020435094833374, - "loss_total": 0.5835241079330444, - "step": 198299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.1413614749908447, - "learning_rate": 3.567027608257945e-05, - "loss": 0.4828, - "step": 198300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.189231872558594, - "loss_rtd": 0.2279975712299347, - "loss_sent": 0.06486871838569641, - "loss_sod": 0.00765463849529624, - "loss_total": 0.3005209267139435, - "step": 198399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.541438102722168, - "loss_rtd": 0.23647157847881317, - "loss_sent": 0.036401890218257904, - "loss_sod": 0.03704839199781418, - "loss_total": 0.30992186069488525, - "step": 198399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.569990873336792, - "learning_rate": 3.5639876858744945e-05, - "loss": 0.4726, - "step": 198400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.509321689605713, - "loss_rtd": 0.25453636050224304, - "loss_sent": 0.2132350355386734, - "loss_sod": 0.02199200913310051, - "loss_total": 0.48976337909698486, - "step": 198499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.463100433349609, - "loss_rtd": 0.24901433289051056, - "loss_sent": 0.2661890685558319, - "loss_sod": 0.028923040255904198, - "loss_total": 0.5441264510154724, - "step": 198499 - }, - { - "epoch": 0.013, - "grad_norm": 1.5464208126068115, - "learning_rate": 3.5609483419166335e-05, - "loss": 0.483, - "step": 198500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.566218376159668, - "loss_rtd": 0.24107764661312103, - "loss_sent": 0.0769277960062027, - "loss_sod": 0.05446183308959007, - "loss_total": 0.3724672794342041, - "step": 198599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.630220413208008, - "loss_rtd": 0.2496069222688675, - "loss_sent": 0.005942351184785366, - "loss_sod": 0.18867163360118866, - "loss_total": 0.4442209005355835, - "step": 198599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.7546682953834534, - "learning_rate": 3.557909577608607e-05, - "loss": 0.4859, - "step": 198600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.5324506759643555, - "loss_rtd": 0.25724464654922485, - "loss_sent": 0.2177625298500061, - "loss_sod": 0.04412589967250824, - "loss_total": 0.5191330909729004, - "step": 198699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.545251846313477, - "loss_rtd": 0.24980393052101135, - "loss_sent": 0.08665682375431061, - "loss_sod": 0.021374624222517014, - "loss_total": 0.3578353524208069, - "step": 198699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.0699278116226196, - "learning_rate": 3.5548713941744305e-05, - "loss": 0.4846, - "step": 198700 - }, - { - "epoch": 0.013598, - "loss_gen": 6.274804592132568, - "loss_rtd": 0.24994046986103058, - "loss_sent": 0.1418921798467636, - "loss_sod": 0.10209321230649948, - "loss_total": 0.49392586946487427, - "step": 198799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.959682464599609, - "loss_rtd": 0.26186472177505493, - "loss_sent": 0.11118325591087341, - "loss_sod": 0.021411027759313583, - "loss_total": 0.3944590091705322, - "step": 198799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.8901879787445068, - "learning_rate": 3.551833792837883e-05, - "loss": 0.4829, - "step": 198800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.676726818084717, - "loss_rtd": 0.24532736837863922, - "loss_sent": 0.21729162335395813, - "loss_sod": 0.014285311102867126, - "loss_total": 0.4769043028354645, - "step": 198899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.671530723571777, - "loss_rtd": 0.269016832113266, - "loss_sent": 0.22904743254184723, - "loss_sod": 0.029550496488809586, - "loss_total": 0.5276147723197937, - "step": 198899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.8333650231361389, - "learning_rate": 3.5487967748225124e-05, - "loss": 0.4979, - "step": 198900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.445819854736328, - "loss_rtd": 0.25361278653144836, - "loss_sent": 0.18403823673725128, - "loss_sod": 0.03664318472146988, - "loss_total": 0.47429418563842773, - "step": 198999 - }, - { - "epoch": 0.013998, - "loss_gen": 4.973983287811279, - "loss_rtd": 0.2397686392068863, - "loss_sent": 0.04354400560259819, - "loss_sod": 0.14111952483654022, - "loss_total": 0.4244321584701538, - "step": 198999 - }, - { - "epoch": 0.014, - "grad_norm": 0.7778320908546448, - "learning_rate": 3.545760341351625e-05, - "loss": 0.4703, - "step": 199000 - }, - { - "epoch": 0.014, - "eval_loss": 0.46094122529029846, - "eval_runtime": 150.9227, - "eval_samples_per_second": 102.324, - "eval_steps_per_second": 0.802, - "step": 199000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.699524879455566, - "loss_rtd": 0.25735366344451904, - "loss_sent": 0.4843277335166931, - "loss_sod": 0.019896212965250015, - "loss_total": 0.7615776062011719, - "step": 199099 - }, - { - "epoch": 0.014198, - "loss_gen": 4.988870143890381, - "loss_rtd": 0.22112753987312317, - "loss_sent": 0.07448185980319977, - "loss_sod": 0.1123475506901741, - "loss_total": 0.40795695781707764, - "step": 199099 - }, - { - "epoch": 0.0142, - "grad_norm": 2.0350053310394287, - "learning_rate": 3.542724493648301e-05, - "loss": 0.4829, - "step": 199100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.903702735900879, - "loss_rtd": 0.21473678946495056, - "loss_sent": 0.135924831032753, - "loss_sod": 0.035691987723112106, - "loss_total": 0.38635361194610596, - "step": 199199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.458212375640869, - "loss_rtd": 0.2825135886669159, - "loss_sent": 0.2972809672355652, - "loss_sod": 0.09804816544055939, - "loss_total": 0.6778427362442017, - "step": 199199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.107661485671997, - "learning_rate": 3.5396892329353737e-05, - "loss": 0.476, - "step": 199200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.409221172332764, - "loss_rtd": 0.23438437283039093, - "loss_sent": 0.16067595779895782, - "loss_sod": 0.02327580936253071, - "loss_total": 0.4183361530303955, - "step": 199299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.573988914489746, - "loss_rtd": 0.23705151677131653, - "loss_sent": 0.25113776326179504, - "loss_sod": 0.0523638054728508, - "loss_total": 0.540553092956543, - "step": 199299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.7476825714111328, - "learning_rate": 3.536654560435451e-05, - "loss": 0.4751, - "step": 199300 - }, - { - "epoch": 0.014798, - "loss_gen": 4.950931072235107, - "loss_rtd": 0.21000342071056366, - "loss_sent": 0.07651349902153015, - "loss_sod": 0.13793537020683289, - "loss_total": 0.4244522750377655, - "step": 199399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.381289005279541, - "loss_rtd": 0.23568496108055115, - "loss_sent": 0.20836390554904938, - "loss_sod": 0.02635544165968895, - "loss_total": 0.47040432691574097, - "step": 199399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.5315524339675903, - "learning_rate": 3.533620477370895e-05, - "loss": 0.4691, - "step": 199400 - }, - { - "epoch": 0.014998, - "loss_gen": 6.159007549285889, - "loss_rtd": 0.2743111252784729, - "loss_sent": 0.06190488860011101, - "loss_sod": 0.1761845052242279, - "loss_total": 0.5124005079269409, - "step": 199499 - }, - { - "epoch": 0.014998, - "loss_gen": 6.015754699707031, - "loss_rtd": 0.24755114316940308, - "loss_sent": 0.17315152287483215, - "loss_sod": 0.09640425443649292, - "loss_total": 0.5171068906784058, - "step": 199499 - }, - { - "epoch": 0.015, - "grad_norm": 1.2112517356872559, - "learning_rate": 3.5305869849638365e-05, - "loss": 0.4766, - "step": 199500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.525232315063477, - "loss_rtd": 0.25431257486343384, - "loss_sent": 0.06290171295404434, - "loss_sod": 0.027303146198391914, - "loss_total": 0.34451743960380554, - "step": 199599 - }, - { - "epoch": 0.015198, - "loss_gen": 4.814101219177246, - "loss_rtd": 0.23179247975349426, - "loss_sent": 0.015164760872721672, - "loss_sod": 0.08381253480911255, - "loss_total": 0.33076977729797363, - "step": 199599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.9078371524810791, - "learning_rate": 3.527554084436163e-05, - "loss": 0.4777, - "step": 199600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.657907485961914, - "loss_rtd": 0.2604738473892212, - "loss_sent": 0.33068281412124634, - "loss_sod": 0.017987214028835297, - "loss_total": 0.609143853187561, - "step": 199699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.430020809173584, - "loss_rtd": 0.261460542678833, - "loss_sent": 0.08397623896598816, - "loss_sod": 0.0045923409052193165, - "loss_total": 0.3500291109085083, - "step": 199699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.057021975517273, - "learning_rate": 3.52452177700953e-05, - "loss": 0.4704, - "step": 199700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.489025592803955, - "loss_rtd": 0.2676151990890503, - "loss_sent": 0.2924831509590149, - "loss_sod": 0.03531592711806297, - "loss_total": 0.5954142808914185, - "step": 199799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.577322483062744, - "loss_rtd": 0.23318113386631012, - "loss_sent": 0.1402457058429718, - "loss_sod": 0.010187160223722458, - "loss_total": 0.3836140036582947, - "step": 199799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.8963132500648499, - "learning_rate": 3.5214900639053474e-05, - "loss": 0.4863, - "step": 199800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.575000762939453, - "loss_rtd": 0.25459951162338257, - "loss_sent": 0.09506849944591522, - "loss_sod": 0.046479783952236176, - "loss_total": 0.39614778757095337, - "step": 199899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.3990478515625, - "loss_rtd": 0.27562594413757324, - "loss_sent": 0.3519396483898163, - "loss_sod": 0.021528389304876328, - "loss_total": 0.6490939855575562, - "step": 199899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.0373303890228271, - "learning_rate": 3.5184589463447916e-05, - "loss": 0.4584, - "step": 199900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.785987377166748, - "loss_rtd": 0.23527689278125763, - "loss_sent": 0.3650191128253937, - "loss_sod": 0.07376965880393982, - "loss_total": 0.6740657091140747, - "step": 199999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.684935569763184, - "loss_rtd": 0.2589579224586487, - "loss_sent": 0.2833992838859558, - "loss_sod": 0.014439928345382214, - "loss_total": 0.5567971467971802, - "step": 199999 - }, - { - "epoch": 0.016, - "grad_norm": 1.6609793901443481, - "learning_rate": 3.5154284255487945e-05, - "loss": 0.4624, - "step": 200000 - }, - { - "epoch": 0.016, - "eval_loss": 0.45440730452537537, - "eval_runtime": 151.123, - "eval_samples_per_second": 102.188, - "eval_steps_per_second": 0.801, - "step": 200000 - }, - { - "epoch": 0.016198, - "loss_gen": 6.422415256500244, - "loss_rtd": 0.2642577886581421, - "loss_sent": 0.052832260727882385, - "loss_sod": 0.13694913685321808, - "loss_total": 0.45403915643692017, - "step": 200099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.789581298828125, - "loss_rtd": 0.25363168120384216, - "loss_sent": 0.171161949634552, - "loss_sod": 0.011497782543301582, - "loss_total": 0.4362914264202118, - "step": 200099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.7341986894607544, - "learning_rate": 3.51239850273805e-05, - "loss": 0.4692, - "step": 200100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.698653221130371, - "loss_rtd": 0.26942676305770874, - "loss_sent": 0.24518704414367676, - "loss_sod": 0.006007281132042408, - "loss_total": 0.5206210613250732, - "step": 200199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.7527923583984375, - "loss_rtd": 0.2350677251815796, - "loss_sent": 0.25851503014564514, - "loss_sod": 0.04473987966775894, - "loss_total": 0.5383226275444031, - "step": 200199 - }, - { - "epoch": 0.0164, - "grad_norm": 2.361886501312256, - "learning_rate": 3.509369179133011e-05, - "loss": 0.4804, - "step": 200200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.395769119262695, - "loss_rtd": 0.26046210527420044, - "loss_sent": 0.14961999654769897, - "loss_sod": 0.03154686838388443, - "loss_total": 0.44162896275520325, - "step": 200299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.759584426879883, - "loss_rtd": 0.2427106350660324, - "loss_sent": 0.36849498748779297, - "loss_sod": 0.07145502418279648, - "loss_total": 0.6826606392860413, - "step": 200299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.9213209748268127, - "learning_rate": 3.506340455953887e-05, - "loss": 0.4711, - "step": 200300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.742183685302734, - "loss_rtd": 0.23548494279384613, - "loss_sent": 0.14038434624671936, - "loss_sod": 0.0410086065530777, - "loss_total": 0.4168778955936432, - "step": 200399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.646134376525879, - "loss_rtd": 0.23521903157234192, - "loss_sent": 0.10856125503778458, - "loss_sod": 0.13441947102546692, - "loss_total": 0.4781997501850128, - "step": 200399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.8273062705993652, - "learning_rate": 3.50331233442065e-05, - "loss": 0.4698, - "step": 200400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.391009330749512, - "loss_rtd": 0.25922268629074097, - "loss_sent": 0.22874972224235535, - "loss_sod": 0.159585639834404, - "loss_total": 0.6475580930709839, - "step": 200499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.653717994689941, - "loss_rtd": 0.2337595820426941, - "loss_sent": 0.10643959790468216, - "loss_sod": 0.009374690242111683, - "loss_total": 0.34957388043403625, - "step": 200499 - }, - { - "epoch": 0.017, - "grad_norm": 1.0434125661849976, - "learning_rate": 3.500284815753025e-05, - "loss": 0.492, - "step": 200500 - }, - { - "epoch": 0.017198, - "loss_gen": 4.748563766479492, - "loss_rtd": 0.22266308963298798, - "loss_sent": 3.562410347512923e-05, - "loss_sod": 0.1355685293674469, - "loss_total": 0.358267217874527, - "step": 200599 - }, - { - "epoch": 0.017198, - "loss_gen": 4.907251358032227, - "loss_rtd": 0.21454456448554993, - "loss_sent": 0.02917483262717724, - "loss_sod": 0.17345693707466125, - "loss_total": 0.41717633605003357, - "step": 200599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.0818666219711304, - "learning_rate": 3.497257901170497e-05, - "loss": 0.4957, - "step": 200600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.9142842292785645, - "loss_rtd": 0.25485530495643616, - "loss_sent": 0.09352286159992218, - "loss_sod": 0.028299875557422638, - "loss_total": 0.3766780495643616, - "step": 200699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.5983428955078125, - "loss_rtd": 0.25049030780792236, - "loss_sent": 0.19364747405052185, - "loss_sod": 0.04774875566363335, - "loss_total": 0.49188652634620667, - "step": 200699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.9354696273803711, - "learning_rate": 3.494231591892307e-05, - "loss": 0.4801, - "step": 200700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.382093906402588, - "loss_rtd": 0.24713316559791565, - "loss_sent": 0.032326988875865936, - "loss_sod": 0.07704608142375946, - "loss_total": 0.35650622844696045, - "step": 200799 - }, - { - "epoch": 0.017598, - "loss_gen": 4.812257289886475, - "loss_rtd": 0.21914120018482208, - "loss_sent": 2.922447310993448e-05, - "loss_sod": 0.12229741364717484, - "loss_total": 0.34146785736083984, - "step": 200799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.7055344581604004, - "learning_rate": 3.4912058891374525e-05, - "loss": 0.465, - "step": 200800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.679133892059326, - "loss_rtd": 0.2276810258626938, - "loss_sent": 0.15405045449733734, - "loss_sod": 0.02448256127536297, - "loss_total": 0.40621405839920044, - "step": 200899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.473849773406982, - "loss_rtd": 0.2811950147151947, - "loss_sent": 0.43161195516586304, - "loss_sod": 0.017364630475640297, - "loss_total": 0.7301715612411499, - "step": 200899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.1705329418182373, - "learning_rate": 3.4881807941246844e-05, - "loss": 0.488, - "step": 200900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.494246006011963, - "loss_rtd": 0.26030996441841125, - "loss_sent": 0.24041537940502167, - "loss_sod": 0.01098925806581974, - "loss_total": 0.5117145776748657, - "step": 200999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.916234970092773, - "loss_rtd": 0.26386576890945435, - "loss_sent": 0.11231882870197296, - "loss_sod": 0.08995687961578369, - "loss_total": 0.4661414623260498, - "step": 200999 - }, - { - "epoch": 0.018, - "grad_norm": 0.74242103099823, - "learning_rate": 3.485156308072512e-05, - "loss": 0.4635, - "step": 201000 - }, - { - "epoch": 0.018, - "eval_loss": 0.45967555046081543, - "eval_runtime": 152.3511, - "eval_samples_per_second": 101.365, - "eval_steps_per_second": 0.794, - "step": 201000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.238211154937744, - "loss_rtd": 0.22699862718582153, - "loss_sent": 0.16258226335048676, - "loss_sod": 0.06895344704389572, - "loss_total": 0.4585343599319458, - "step": 201099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.443732738494873, - "loss_rtd": 0.2511044442653656, - "loss_sent": 0.18242928385734558, - "loss_sod": 0.00869907345622778, - "loss_total": 0.44223278760910034, - "step": 201099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.223291277885437, - "learning_rate": 3.482132432199197e-05, - "loss": 0.4699, - "step": 201100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.6898393630981445, - "loss_rtd": 0.2735462188720703, - "loss_sent": 0.25486263632774353, - "loss_sod": 0.017370786517858505, - "loss_total": 0.5457796454429626, - "step": 201199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.367513179779053, - "loss_rtd": 0.25148075819015503, - "loss_sent": 0.19145554304122925, - "loss_sod": 0.024947544559836388, - "loss_total": 0.4678838551044464, - "step": 201199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.9793940186500549, - "learning_rate": 3.479109167722757e-05, - "loss": 0.4631, - "step": 201200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.831352710723877, - "loss_rtd": 0.2631663382053375, - "loss_sent": 0.4253993332386017, - "loss_sod": 0.08521652966737747, - "loss_total": 0.7737821936607361, - "step": 201299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.7043585777282715, - "loss_rtd": 0.2361105978488922, - "loss_sent": 0.02994850091636181, - "loss_sod": 0.09454520046710968, - "loss_total": 0.36060431599617004, - "step": 201299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.8844496011734009, - "learning_rate": 3.476086515860965e-05, - "loss": 0.478, - "step": 201300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.68744421005249, - "loss_rtd": 0.2573264539241791, - "loss_sent": 0.070601686835289, - "loss_sod": 0.02096596546471119, - "loss_total": 0.3488941192626953, - "step": 201399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.503346920013428, - "loss_rtd": 0.254792183637619, - "loss_sent": 0.0630236342549324, - "loss_sod": 0.04250772297382355, - "loss_total": 0.36032354831695557, - "step": 201399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.7962280511856079, - "learning_rate": 3.47306447783134e-05, - "loss": 0.4777, - "step": 201400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.533423900604248, - "loss_rtd": 0.2535945773124695, - "loss_sent": 0.2537531852722168, - "loss_sod": 0.04877634719014168, - "loss_total": 0.5561240911483765, - "step": 201499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.3803863525390625, - "loss_rtd": 0.23618975281715393, - "loss_sent": 0.10412287712097168, - "loss_sod": 0.02499981038272381, - "loss_total": 0.3653124272823334, - "step": 201499 - }, - { - "epoch": 0.019, - "grad_norm": 0.7263472676277161, - "learning_rate": 3.47004305485116e-05, - "loss": 0.4762, - "step": 201500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.453300476074219, - "loss_rtd": 0.2398272007703781, - "loss_sent": 0.25099968910217285, - "loss_sod": 0.010356992483139038, - "loss_total": 0.5011838674545288, - "step": 201599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.467883110046387, - "loss_rtd": 0.25183042883872986, - "loss_sent": 0.34147679805755615, - "loss_sod": 0.054517049342393875, - "loss_total": 0.6478242874145508, - "step": 201599 - }, - { - "epoch": 0.0192, - "grad_norm": 2.130498170852661, - "learning_rate": 3.467022248137455e-05, - "loss": 0.4992, - "step": 201600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.547444820404053, - "loss_rtd": 0.24827061593532562, - "loss_sent": 0.0749385729432106, - "loss_sod": 0.02044161595404148, - "loss_total": 0.34365078806877136, - "step": 201699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.6619343757629395, - "loss_rtd": 0.2587766945362091, - "loss_sent": 0.15140384435653687, - "loss_sod": 0.041210610419511795, - "loss_total": 0.45139116048812866, - "step": 201699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.8851449489593506, - "learning_rate": 3.464002058907004e-05, - "loss": 0.4673, - "step": 201700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.297020435333252, - "loss_rtd": 0.2192527800798416, - "loss_sent": 0.10825800150632858, - "loss_sod": 0.02537507191300392, - "loss_total": 0.3528858423233032, - "step": 201799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.737192153930664, - "loss_rtd": 0.24014760553836823, - "loss_sent": 0.2475651055574417, - "loss_sod": 0.0290079228579998, - "loss_total": 0.5167206525802612, - "step": 201799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.6454467177391052, - "learning_rate": 3.460982488376342e-05, - "loss": 0.4925, - "step": 201800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.807116508483887, - "loss_rtd": 0.23687048256397247, - "loss_sent": 0.10783880949020386, - "loss_sod": 0.05488898605108261, - "loss_total": 0.39959827065467834, - "step": 201899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.60234260559082, - "loss_rtd": 0.24860233068466187, - "loss_sent": 0.6529000401496887, - "loss_sod": 0.05441371351480484, - "loss_total": 0.9559160470962524, - "step": 201899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.5367225408554077, - "learning_rate": 3.4579635377617485e-05, - "loss": 0.4866, - "step": 201900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.0976762771606445, - "loss_rtd": 0.2277284413576126, - "loss_sent": 3.441545050009154e-05, - "loss_sod": 0.19301730394363403, - "loss_total": 0.42078015208244324, - "step": 201999 - }, - { - "epoch": 0.019998, - "loss_gen": 4.853547096252441, - "loss_rtd": 0.22184228897094727, - "loss_sent": 0.0005072517087683082, - "loss_sod": 0.10036781430244446, - "loss_total": 0.3227173686027527, - "step": 201999 - }, - { - "epoch": 0.02, - "grad_norm": 0.8136307597160339, - "learning_rate": 3.4549452082792585e-05, - "loss": 0.457, - "step": 202000 - }, - { - "epoch": 0.02, - "eval_loss": 0.45421433448791504, - "eval_runtime": 151.0851, - "eval_samples_per_second": 102.214, - "eval_steps_per_second": 0.801, - "step": 202000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.407601356506348, - "loss_rtd": 0.25276073813438416, - "loss_sent": 0.2210267335176468, - "loss_sod": 0.01045980490744114, - "loss_total": 0.48424726724624634, - "step": 202099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.363122940063477, - "loss_rtd": 0.257572740316391, - "loss_sent": 0.1861880123615265, - "loss_sod": 0.0553722158074379, - "loss_total": 0.4991329610347748, - "step": 202099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.7221114039421082, - "learning_rate": 3.451927501144653e-05, - "loss": 0.4872, - "step": 202100 - }, - { - "epoch": 0.020398, - "loss_gen": 4.728961944580078, - "loss_rtd": 0.21321223676204681, - "loss_sent": 3.206491965102032e-05, - "loss_sod": 0.2221163958311081, - "loss_total": 0.43536069989204407, - "step": 202199 - }, - { - "epoch": 0.020398, - "loss_gen": 4.939293384552002, - "loss_rtd": 0.24339261651039124, - "loss_sent": 0.06533413380384445, - "loss_sod": 0.051401764154434204, - "loss_total": 0.3601285219192505, - "step": 202199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.0307354927062988, - "learning_rate": 3.448910417573465e-05, - "loss": 0.4602, - "step": 202200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.139678955078125, - "loss_rtd": 0.23202811181545258, - "loss_sent": 4.979508958058432e-05, - "loss_sod": 0.07965496182441711, - "loss_total": 0.31173285841941833, - "step": 202299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.023827075958252, - "loss_rtd": 0.24008287489414215, - "loss_sent": 0.012041668407619, - "loss_sod": 0.16286291182041168, - "loss_total": 0.4149874448776245, - "step": 202299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.039016842842102, - "learning_rate": 3.4458939587809745e-05, - "loss": 0.4503, - "step": 202300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.28715705871582, - "loss_rtd": 0.22881212830543518, - "loss_sent": 0.24012714624404907, - "loss_sod": 0.004217217210680246, - "loss_total": 0.47315651178359985, - "step": 202399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.761211395263672, - "loss_rtd": 0.25692808628082275, - "loss_sent": 0.15823429822921753, - "loss_sod": 0.07715623825788498, - "loss_total": 0.49231863021850586, - "step": 202399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.0622472763061523, - "learning_rate": 3.442878125982213e-05, - "loss": 0.4734, - "step": 202400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.585818290710449, - "loss_rtd": 0.26084500551223755, - "loss_sent": 0.0931270495057106, - "loss_sod": 0.03289801999926567, - "loss_total": 0.38687005639076233, - "step": 202499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.775491714477539, - "loss_rtd": 0.24642148613929749, - "loss_sent": 0.20359787344932556, - "loss_sod": 0.07437972724437714, - "loss_total": 0.5243990421295166, - "step": 202499 - }, - { - "epoch": 0.021, - "grad_norm": 1.4548580646514893, - "learning_rate": 3.4398629203919556e-05, - "loss": 0.4586, - "step": 202500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.792977333068848, - "loss_rtd": 0.23596957325935364, - "loss_sent": 0.09734531491994858, - "loss_sod": 0.07019216567277908, - "loss_total": 0.4035070538520813, - "step": 202599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.22594690322876, - "loss_rtd": 0.2360369712114334, - "loss_sent": 4.6116518205963075e-05, - "loss_sod": 0.14883092045783997, - "loss_total": 0.38491401076316833, - "step": 202599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.2377222776412964, - "learning_rate": 3.436848343224727e-05, - "loss": 0.4865, - "step": 202600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.328969955444336, - "loss_rtd": 0.2357320785522461, - "loss_sent": 0.021743187680840492, - "loss_sod": 0.08085143566131592, - "loss_total": 0.33832669258117676, - "step": 202699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.124855041503906, - "loss_rtd": 0.216110497713089, - "loss_sent": 0.007868066430091858, - "loss_sod": 0.09717868268489838, - "loss_total": 0.32115721702575684, - "step": 202699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.7213184237480164, - "learning_rate": 3.433834395694799e-05, - "loss": 0.4691, - "step": 202700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.366321563720703, - "loss_rtd": 0.2519897222518921, - "loss_sent": 0.015164789743721485, - "loss_sod": 0.06233523041009903, - "loss_total": 0.32948973774909973, - "step": 202799 - }, - { - "epoch": 0.021598, - "loss_gen": 4.971346855163574, - "loss_rtd": 0.22204498946666718, - "loss_sent": 3.21301122312434e-05, - "loss_sod": 0.08021879196166992, - "loss_total": 0.3022959232330322, - "step": 202799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.6737499237060547, - "learning_rate": 3.43082107901619e-05, - "loss": 0.4849, - "step": 202800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.622530460357666, - "loss_rtd": 0.24170541763305664, - "loss_sent": 0.056620679795742035, - "loss_sod": 0.04395980015397072, - "loss_total": 0.3422859013080597, - "step": 202899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.886425971984863, - "loss_rtd": 0.24952994287014008, - "loss_sent": 0.13775122165679932, - "loss_sod": 0.02643515169620514, - "loss_total": 0.41371631622314453, - "step": 202899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.9930436015129089, - "learning_rate": 3.427808394402661e-05, - "loss": 0.4652, - "step": 202900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.827584266662598, - "loss_rtd": 0.23784901201725006, - "loss_sent": 0.3198770582675934, - "loss_sod": 0.07419580966234207, - "loss_total": 0.6319218873977661, - "step": 202999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.748804569244385, - "loss_rtd": 0.2546565532684326, - "loss_sent": 0.37579435110092163, - "loss_sod": 0.047191210091114044, - "loss_total": 0.6776421070098877, - "step": 202999 - }, - { - "epoch": 0.022, - "grad_norm": 1.7178469896316528, - "learning_rate": 3.424796343067724e-05, - "loss": 0.4531, - "step": 203000 - }, - { - "epoch": 0.022, - "eval_loss": 0.44866427779197693, - "eval_runtime": 150.7737, - "eval_samples_per_second": 102.425, - "eval_steps_per_second": 0.803, - "step": 203000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.684000492095947, - "loss_rtd": 0.25918105244636536, - "loss_sent": 0.0925687924027443, - "loss_sod": 0.014433680102229118, - "loss_total": 0.3661835193634033, - "step": 203099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.693911075592041, - "loss_rtd": 0.24773980677127838, - "loss_sent": 0.053045522421598434, - "loss_sod": 0.04356825351715088, - "loss_total": 0.3443535566329956, - "step": 203099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.1181321144104004, - "learning_rate": 3.421784926224632e-05, - "loss": 0.473, - "step": 203100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.202003479003906, - "loss_rtd": 0.21969114243984222, - "loss_sent": 0.028610864654183388, - "loss_sod": 0.14271146059036255, - "loss_total": 0.3910134732723236, - "step": 203199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.214725017547607, - "loss_rtd": 0.23188306391239166, - "loss_sent": 0.041248664259910583, - "loss_sod": 0.0785137414932251, - "loss_total": 0.35164546966552734, - "step": 203199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.9796826839447021, - "learning_rate": 3.418774145086382e-05, - "loss": 0.481, - "step": 203200 - }, - { - "epoch": 0.022598, - "loss_gen": 4.901376247406006, - "loss_rtd": 0.2233228087425232, - "loss_sent": 0.0004384858184494078, - "loss_sod": 0.13029982149600983, - "loss_total": 0.354061096906662, - "step": 203299 - }, - { - "epoch": 0.022598, - "loss_gen": 4.945909023284912, - "loss_rtd": 0.23735138773918152, - "loss_sent": 0.004065847024321556, - "loss_sod": 0.1614292860031128, - "loss_total": 0.4028465151786804, - "step": 203299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.9993460774421692, - "learning_rate": 3.4157640008657174e-05, - "loss": 0.4838, - "step": 203300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.35711145401001, - "loss_rtd": 0.27121755480766296, - "loss_sent": 0.13728365302085876, - "loss_sod": 0.016950685530900955, - "loss_total": 0.4254519045352936, - "step": 203399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.855948448181152, - "loss_rtd": 0.2642308473587036, - "loss_sent": 0.0945664644241333, - "loss_sod": 0.03819169104099274, - "loss_total": 0.39698898792266846, - "step": 203399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.8500588536262512, - "learning_rate": 3.412754494775123e-05, - "loss": 0.4711, - "step": 203400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.399954795837402, - "loss_rtd": 0.26527848839759827, - "loss_sent": 0.16523225605487823, - "loss_sod": 0.052225202322006226, - "loss_total": 0.48273593187332153, - "step": 203499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.9042792320251465, - "loss_rtd": 0.2657572329044342, - "loss_sent": 0.09081815183162689, - "loss_sod": 0.07401497662067413, - "loss_total": 0.4305903911590576, - "step": 203499 - }, - { - "epoch": 0.023, - "grad_norm": 0.8275566101074219, - "learning_rate": 3.4097456280268304e-05, - "loss": 0.4863, - "step": 203500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.517064094543457, - "loss_rtd": 0.24983039498329163, - "loss_sent": 0.5964022278785706, - "loss_sod": 0.012449709698557854, - "loss_total": 0.8586823344230652, - "step": 203599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.628481388092041, - "loss_rtd": 0.2514532506465912, - "loss_sent": 0.161366805434227, - "loss_sod": 0.021773474290966988, - "loss_total": 0.4345935583114624, - "step": 203599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.1252191066741943, - "learning_rate": 3.4067374018328066e-05, - "loss": 0.4583, - "step": 203600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.702511787414551, - "loss_rtd": 0.24112685024738312, - "loss_sent": 0.26646703481674194, - "loss_sod": 0.034668125212192535, - "loss_total": 0.5422620177268982, - "step": 203699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.327409267425537, - "loss_rtd": 0.23813338577747345, - "loss_sent": 0.0379524864256382, - "loss_sod": 0.1470402032136917, - "loss_total": 0.42312607169151306, - "step": 203699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.257358431816101, - "learning_rate": 3.403729817404768e-05, - "loss": 0.4791, - "step": 203700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.931430816650391, - "loss_rtd": 0.2473500519990921, - "loss_sent": 0.1528945118188858, - "loss_sod": 0.028097284957766533, - "loss_total": 0.4283418655395508, - "step": 203799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.680041790008545, - "loss_rtd": 0.2385464310646057, - "loss_sent": 0.04751960188150406, - "loss_sod": 0.03606370836496353, - "loss_total": 0.3221297264099121, - "step": 203799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.6562179327011108, - "learning_rate": 3.400722875954168e-05, - "loss": 0.4803, - "step": 203800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.830761909484863, - "loss_rtd": 0.23952393233776093, - "loss_sent": 0.11855100095272064, - "loss_sod": 0.06821095943450928, - "loss_total": 0.42628592252731323, - "step": 203899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.562859535217285, - "loss_rtd": 0.23691979050636292, - "loss_sent": 0.08273235708475113, - "loss_sod": 0.03769238665699959, - "loss_total": 0.35734453797340393, - "step": 203899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.172888994216919, - "learning_rate": 3.3977165786922016e-05, - "loss": 0.4758, - "step": 203900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.457442283630371, - "loss_rtd": 0.2446732521057129, - "loss_sent": 0.25208550691604614, - "loss_sod": 0.028653493151068687, - "loss_total": 0.5254122614860535, - "step": 203999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.817096710205078, - "loss_rtd": 0.2458757609128952, - "loss_sent": 0.14045284688472748, - "loss_sod": 0.018014002591371536, - "loss_total": 0.4043425917625427, - "step": 203999 - }, - { - "epoch": 0.024, - "grad_norm": 0.9994099736213684, - "learning_rate": 3.394710926829806e-05, - "loss": 0.4809, - "step": 204000 - }, - { - "epoch": 0.024, - "eval_loss": 0.44341379404067993, - "eval_runtime": 151.3617, - "eval_samples_per_second": 102.027, - "eval_steps_per_second": 0.799, - "step": 204000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.479134559631348, - "loss_rtd": 0.25226548314094543, - "loss_sent": 0.21348440647125244, - "loss_sod": 0.08062466979026794, - "loss_total": 0.5463745594024658, - "step": 204099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.48226261138916, - "loss_rtd": 0.25293290615081787, - "loss_sent": 0.19629566371440887, - "loss_sod": 0.09361980855464935, - "loss_total": 0.5428484082221985, - "step": 204099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.9648359417915344, - "learning_rate": 3.391705921577658e-05, - "loss": 0.4906, - "step": 204100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.709273338317871, - "loss_rtd": 0.23943577706813812, - "loss_sent": 0.1579311639070511, - "loss_sod": 0.028613269329071045, - "loss_total": 0.42598021030426025, - "step": 204199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.064204692840576, - "loss_rtd": 0.22587241232395172, - "loss_sent": 4.577033905661665e-05, - "loss_sod": 0.18077200651168823, - "loss_total": 0.40669018030166626, - "step": 204199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.1610053777694702, - "learning_rate": 3.388701564146171e-05, - "loss": 0.4618, - "step": 204200 - }, - { - "epoch": 0.024598, - "loss_gen": 4.936402320861816, - "loss_rtd": 0.2294440120458603, - "loss_sent": 0.166824609041214, - "loss_sod": 0.029074087738990784, - "loss_total": 0.42534270882606506, - "step": 204299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.647017478942871, - "loss_rtd": 0.24903364479541779, - "loss_sent": 0.16078977286815643, - "loss_sod": 0.068865567445755, - "loss_total": 0.4786890149116516, - "step": 204299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.1673539876937866, - "learning_rate": 3.385697855745502e-05, - "loss": 0.4718, - "step": 204300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.273296356201172, - "loss_rtd": 0.23775988817214966, - "loss_sent": 0.21562601625919342, - "loss_sod": 0.019672540947794914, - "loss_total": 0.47305846214294434, - "step": 204399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.458662986755371, - "loss_rtd": 0.24080781638622284, - "loss_sent": 0.07909756898880005, - "loss_sod": 0.02410779520869255, - "loss_total": 0.34401318430900574, - "step": 204399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.3262639045715332, - "learning_rate": 3.3826947975855425e-05, - "loss": 0.4762, - "step": 204400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.764666557312012, - "loss_rtd": 0.2508697807788849, - "loss_sent": 0.11904556304216385, - "loss_sod": 0.12209869921207428, - "loss_total": 0.4920140504837036, - "step": 204499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.455235481262207, - "loss_rtd": 0.2474861890077591, - "loss_sent": 0.2829621732234955, - "loss_sod": 0.045313261449337006, - "loss_total": 0.575761616230011, - "step": 204499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8297535181045532, - "learning_rate": 3.379692390875927e-05, - "loss": 0.4805, - "step": 204500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.957808017730713, - "loss_rtd": 0.2471088021993637, - "loss_sent": 0.05789091810584068, - "loss_sod": 0.0650874674320221, - "loss_total": 0.3700871765613556, - "step": 204599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.533000469207764, - "loss_rtd": 0.24974395334720612, - "loss_sent": 0.1626661866903305, - "loss_sod": 0.0023819920606911182, - "loss_total": 0.41479212045669556, - "step": 204599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.7717977166175842, - "learning_rate": 3.376690636826023e-05, - "loss": 0.479, - "step": 204600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.416359901428223, - "loss_rtd": 0.24626386165618896, - "loss_sent": 0.0010380190797150135, - "loss_sod": 0.10930918902158737, - "loss_total": 0.35661107301712036, - "step": 204699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.751550197601318, - "loss_rtd": 0.21348457038402557, - "loss_sent": 3.4875549317803234e-05, - "loss_sod": 0.07302307337522507, - "loss_total": 0.28654250502586365, - "step": 204699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.7635507583618164, - "learning_rate": 3.373689536644934e-05, - "loss": 0.4802, - "step": 204700 - }, - { - "epoch": 0.025598, - "loss_gen": 4.804357528686523, - "loss_rtd": 0.22278271615505219, - "loss_sent": 0.10689501464366913, - "loss_sod": 0.033710166811943054, - "loss_total": 0.3633878827095032, - "step": 204799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.579931735992432, - "loss_rtd": 0.2518373727798462, - "loss_sent": 0.34698089957237244, - "loss_sod": 0.05123288929462433, - "loss_total": 0.6500511169433594, - "step": 204799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.9754551649093628, - "learning_rate": 3.3706890915415076e-05, - "loss": 0.4751, - "step": 204800 - }, - { - "epoch": 0.025798, - "loss_gen": 4.703709602355957, - "loss_rtd": 0.22530224919319153, - "loss_sent": 0.006922869477421045, - "loss_sod": 0.047425776720047, - "loss_total": 0.27965089678764343, - "step": 204899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.458637237548828, - "loss_rtd": 0.21392790973186493, - "loss_sent": 0.10577099770307541, - "loss_sod": 0.04395810514688492, - "loss_total": 0.36365702748298645, - "step": 204899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.8675275444984436, - "learning_rate": 3.3676893027243185e-05, - "loss": 0.4619, - "step": 204900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.478752613067627, - "loss_rtd": 0.2437988519668579, - "loss_sent": 0.23692891001701355, - "loss_sod": 0.03953733295202255, - "loss_total": 0.5202651023864746, - "step": 204999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.675318717956543, - "loss_rtd": 0.23829564452171326, - "loss_sent": 0.21041101217269897, - "loss_sod": 0.0461123026907444, - "loss_total": 0.49481895565986633, - "step": 204999 - }, - { - "epoch": 0.026, - "grad_norm": 1.635738492012024, - "learning_rate": 3.3646901714016846e-05, - "loss": 0.4671, - "step": 205000 - }, - { - "epoch": 0.026, - "eval_loss": 0.4509928524494171, - "eval_runtime": 150.9297, - "eval_samples_per_second": 102.319, - "eval_steps_per_second": 0.802, - "step": 205000 - }, - { - "epoch": 0.026198, - "loss_gen": 6.056728839874268, - "loss_rtd": 0.23301714658737183, - "loss_sent": 0.17012298107147217, - "loss_sod": 0.09039507061243057, - "loss_total": 0.49353519082069397, - "step": 205099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.634973049163818, - "loss_rtd": 0.21394062042236328, - "loss_sent": 0.11980615556240082, - "loss_sod": 0.048588450998067856, - "loss_total": 0.38233524560928345, - "step": 205099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.3641608953475952, - "learning_rate": 3.3616916987816515e-05, - "loss": 0.467, - "step": 205100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.877918720245361, - "loss_rtd": 0.2511952519416809, - "loss_sent": 0.20603446662425995, - "loss_sod": 0.0371844507753849, - "loss_total": 0.49441415071487427, - "step": 205199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.692530632019043, - "loss_rtd": 0.24305275082588196, - "loss_sent": 0.11774775385856628, - "loss_sod": 0.07681696861982346, - "loss_total": 0.4376174807548523, - "step": 205199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.165781855583191, - "learning_rate": 3.3586938860720084e-05, - "loss": 0.4688, - "step": 205200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.86293363571167, - "loss_rtd": 0.2578810751438141, - "loss_sent": 0.14761590957641602, - "loss_sod": 0.03640330582857132, - "loss_total": 0.4419002830982208, - "step": 205299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.422086238861084, - "loss_rtd": 0.23155608773231506, - "loss_sent": 0.22479580342769623, - "loss_sod": 0.002203958109021187, - "loss_total": 0.45855584740638733, - "step": 205299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.6977037191390991, - "learning_rate": 3.355696734480271e-05, - "loss": 0.4774, - "step": 205300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.680825710296631, - "loss_rtd": 0.23180951178073883, - "loss_sent": 0.11733356863260269, - "loss_sod": 0.08922179043292999, - "loss_total": 0.4383648633956909, - "step": 205399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.45111083984375, - "loss_rtd": 0.22984595596790314, - "loss_sent": 0.27929097414016724, - "loss_sod": 0.04875180870294571, - "loss_total": 0.5578887462615967, - "step": 205399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.9854266047477722, - "learning_rate": 3.352700245213693e-05, - "loss": 0.4862, - "step": 205400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.818627834320068, - "loss_rtd": 0.2522910535335541, - "loss_sent": 0.1124449223279953, - "loss_sod": 0.04227959364652634, - "loss_total": 0.4070155620574951, - "step": 205499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.588534832000732, - "loss_rtd": 0.2587710916996002, - "loss_sent": 0.2575885057449341, - "loss_sod": 0.10087103396654129, - "loss_total": 0.6172306537628174, - "step": 205499 - }, - { - "epoch": 0.027, - "grad_norm": 1.0960627794265747, - "learning_rate": 3.349704419479258e-05, - "loss": 0.4731, - "step": 205500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.415334224700928, - "loss_rtd": 0.230610191822052, - "loss_sent": 0.112730011343956, - "loss_sod": 0.05337172746658325, - "loss_total": 0.39671194553375244, - "step": 205599 - }, - { - "epoch": 0.027198, - "loss_gen": 4.831805229187012, - "loss_rtd": 0.19442704319953918, - "loss_sent": 0.0017129608895629644, - "loss_sod": 0.06312575191259384, - "loss_total": 0.2592657506465912, - "step": 205599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.7432999014854431, - "learning_rate": 3.346709258483687e-05, - "loss": 0.4788, - "step": 205600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.799887180328369, - "loss_rtd": 0.24671250581741333, - "loss_sent": 0.10987219959497452, - "loss_sod": 0.17109833657741547, - "loss_total": 0.5276830792427063, - "step": 205699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.3035759925842285, - "loss_rtd": 0.2797275185585022, - "loss_sent": 0.16637729108333588, - "loss_sod": 0.1263885498046875, - "loss_total": 0.5724933743476868, - "step": 205699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.4277757406234741, - "learning_rate": 3.3437147634334274e-05, - "loss": 0.4798, - "step": 205700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.149640083312988, - "loss_rtd": 0.2670230269432068, - "loss_sent": 0.18647268414497375, - "loss_sod": 0.12289707362651825, - "loss_total": 0.5763927698135376, - "step": 205799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.793913841247559, - "loss_rtd": 0.25202977657318115, - "loss_sent": 0.19943426549434662, - "loss_sod": 0.039625078439712524, - "loss_total": 0.4910891056060791, - "step": 205799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8974087834358215, - "learning_rate": 3.3407209355346644e-05, - "loss": 0.4759, - "step": 205800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.44106912612915, - "loss_rtd": 0.24474363029003143, - "loss_sent": 0.13920697569847107, - "loss_sod": 0.03924485296010971, - "loss_total": 0.4231954514980316, - "step": 205899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.045462131500244, - "loss_rtd": 0.22086864709854126, - "loss_sent": 0.007559431251138449, - "loss_sod": 0.16332033276557922, - "loss_total": 0.3917483985424042, - "step": 205899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.0281552076339722, - "learning_rate": 3.337727775993309e-05, - "loss": 0.4721, - "step": 205900 - }, - { - "epoch": 0.027998, - "loss_gen": 4.836759090423584, - "loss_rtd": 0.20352813601493835, - "loss_sent": 0.01286726351827383, - "loss_sod": 0.07332977652549744, - "loss_total": 0.2897251844406128, - "step": 205999 - }, - { - "epoch": 0.027998, - "loss_gen": 4.748128890991211, - "loss_rtd": 0.21712294220924377, - "loss_sent": 8.002267713891342e-05, - "loss_sod": 0.14717672765254974, - "loss_total": 0.3643796741962433, - "step": 205999 - }, - { - "epoch": 0.028, - "grad_norm": 1.1035751104354858, - "learning_rate": 3.334735286015007e-05, - "loss": 0.4822, - "step": 206000 - }, - { - "epoch": 0.028, - "eval_loss": 0.4547508656978607, - "eval_runtime": 151.0983, - "eval_samples_per_second": 102.205, - "eval_steps_per_second": 0.801, - "step": 206000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.244474411010742, - "loss_rtd": 0.2550910413265228, - "loss_sent": 0.2698170244693756, - "loss_sod": 0.024612342938780785, - "loss_total": 0.5495203733444214, - "step": 206099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.359042644500732, - "loss_rtd": 0.2508499026298523, - "loss_sent": 0.22385083138942719, - "loss_sod": 0.07547710835933685, - "loss_total": 0.5501778721809387, - "step": 206099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.1150691509246826, - "learning_rate": 3.331743466805133e-05, - "loss": 0.4684, - "step": 206100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.788224697113037, - "loss_rtd": 0.26031696796417236, - "loss_sent": 0.19803516566753387, - "loss_sod": 0.08414338529109955, - "loss_total": 0.5424955487251282, - "step": 206199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.410754680633545, - "loss_rtd": 0.28402501344680786, - "loss_sent": 0.08933217823505402, - "loss_sod": 0.008630115538835526, - "loss_total": 0.3819873332977295, - "step": 206199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.027699589729309, - "learning_rate": 3.3287523195687907e-05, - "loss": 0.4682, - "step": 206200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.61097526550293, - "loss_rtd": 0.26154080033302307, - "loss_sent": 0.24920663237571716, - "loss_sod": 0.002796958899125457, - "loss_total": 0.5135443806648254, - "step": 206299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.758017539978027, - "loss_rtd": 0.241214781999588, - "loss_sent": 0.2503407895565033, - "loss_sod": 0.056327708065509796, - "loss_total": 0.5478832721710205, - "step": 206299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.1785681247711182, - "learning_rate": 3.3257618455108154e-05, - "loss": 0.4525, - "step": 206300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.683103084564209, - "loss_rtd": 0.23636199533939362, - "loss_sent": 0.07750055938959122, - "loss_sod": 0.044951193034648895, - "loss_total": 0.35881373286247253, - "step": 206399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.5561628341674805, - "loss_rtd": 0.22717030346393585, - "loss_sent": 0.17105694115161896, - "loss_sod": 0.06201820448040962, - "loss_total": 0.46024543046951294, - "step": 206399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.4344044923782349, - "learning_rate": 3.322772045835767e-05, - "loss": 0.4784, - "step": 206400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.1489176750183105, - "loss_rtd": 0.2281053066253662, - "loss_sent": 0.08055388182401657, - "loss_sod": 0.04174261540174484, - "loss_total": 0.3504018187522888, - "step": 206499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.431298732757568, - "loss_rtd": 0.2533523142337799, - "loss_sent": 0.39194077253341675, - "loss_sod": 0.05683267489075661, - "loss_total": 0.7021257877349854, - "step": 206499 - }, - { - "epoch": 0.029, - "grad_norm": 1.2416197061538696, - "learning_rate": 3.319782921747939e-05, - "loss": 0.4852, - "step": 206500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.777912139892578, - "loss_rtd": 0.2684161365032196, - "loss_sent": 0.6214912533760071, - "loss_sod": 0.10799536108970642, - "loss_total": 0.9979027509689331, - "step": 206599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.7680253982543945, - "loss_rtd": 0.23439471423625946, - "loss_sent": 0.26999813318252563, - "loss_sod": 0.02371506206691265, - "loss_total": 0.5281078815460205, - "step": 206599 - }, - { - "epoch": 0.0292, - "grad_norm": 4.099353790283203, - "learning_rate": 3.316794474451348e-05, - "loss": 0.4565, - "step": 206600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.284374713897705, - "loss_rtd": 0.22979433834552765, - "loss_sent": 0.13422654569149017, - "loss_sod": 0.021393032744526863, - "loss_total": 0.3854139447212219, - "step": 206699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.886496543884277, - "loss_rtd": 0.25662046670913696, - "loss_sent": 0.14732879400253296, - "loss_sod": 0.118320994079113, - "loss_total": 0.5222702622413635, - "step": 206699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.9259575009346008, - "learning_rate": 3.3138067051497425e-05, - "loss": 0.4689, - "step": 206700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.3440470695495605, - "loss_rtd": 0.25219035148620605, - "loss_sent": 0.26719051599502563, - "loss_sod": 0.014408763498067856, - "loss_total": 0.5337896347045898, - "step": 206799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.437257766723633, - "loss_rtd": 0.2473822981119156, - "loss_sent": 0.1943555474281311, - "loss_sod": 0.10320235788822174, - "loss_total": 0.5449402332305908, - "step": 206799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.2678300142288208, - "learning_rate": 3.3108196150465935e-05, - "loss": 0.4548, - "step": 206800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.510725021362305, - "loss_rtd": 0.23764798045158386, - "loss_sent": 0.13177217543125153, - "loss_sod": 0.005060961470007896, - "loss_total": 0.37448111176490784, - "step": 206899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.919556617736816, - "loss_rtd": 0.2585257887840271, - "loss_sent": 0.1994444876909256, - "loss_sod": 0.10045526921749115, - "loss_total": 0.5584255456924438, - "step": 206899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.8215299844741821, - "learning_rate": 3.307833205345103e-05, - "loss": 0.4792, - "step": 206900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.876476287841797, - "loss_rtd": 0.25320500135421753, - "loss_sent": 0.4009746015071869, - "loss_sod": 0.0036950299981981516, - "loss_total": 0.6578746438026428, - "step": 206999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.347562313079834, - "loss_rtd": 0.2552586495876312, - "loss_sent": 0.13582611083984375, - "loss_sod": 0.021963156759738922, - "loss_total": 0.4130479097366333, - "step": 206999 - }, - { - "epoch": 0.03, - "grad_norm": 2.0394182205200195, - "learning_rate": 3.304847477248193e-05, - "loss": 0.4797, - "step": 207000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4456648826599121, - "eval_runtime": 150.9782, - "eval_samples_per_second": 102.286, - "eval_steps_per_second": 0.801, - "step": 207000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.376842975616455, - "loss_rtd": 0.2462959587574005, - "loss_sent": 0.2018681764602661, - "loss_sod": 0.008978866040706635, - "loss_total": 0.45714300870895386, - "step": 207099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.584357738494873, - "loss_rtd": 0.23943877220153809, - "loss_sent": 0.2790125906467438, - "loss_sod": 0.021808486431837082, - "loss_total": 0.540259838104248, - "step": 207099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.0433752536773682, - "learning_rate": 3.301862431958519e-05, - "loss": 0.4795, - "step": 207100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.446674823760986, - "loss_rtd": 0.23925071954727173, - "loss_sent": 0.224978506565094, - "loss_sod": 0.03975839912891388, - "loss_total": 0.5039876103401184, - "step": 207199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.688730716705322, - "loss_rtd": 0.24019432067871094, - "loss_sent": 0.18840710818767548, - "loss_sod": 0.11322510987520218, - "loss_total": 0.5418265461921692, - "step": 207199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.1642003059387207, - "learning_rate": 3.2988780706784515e-05, - "loss": 0.4705, - "step": 207200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.471463203430176, - "loss_rtd": 0.2537890374660492, - "loss_sent": 0.08549313992261887, - "loss_sod": 0.02495107799768448, - "loss_total": 0.36423325538635254, - "step": 207299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.5565032958984375, - "loss_rtd": 0.24373923242092133, - "loss_sent": 0.1720479428768158, - "loss_sod": 0.057914912700653076, - "loss_total": 0.4737021028995514, - "step": 207299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.752086877822876, - "learning_rate": 3.2958943946100963e-05, - "loss": 0.4674, - "step": 207300 - }, - { - "epoch": 0.030798, - "loss_gen": 6.340982913970947, - "loss_rtd": 0.26111024618148804, - "loss_sent": 0.21188463270664215, - "loss_sod": 0.1566849648952484, - "loss_total": 0.6296798586845398, - "step": 207399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.478623867034912, - "loss_rtd": 0.24548162519931793, - "loss_sent": 0.027345668524503708, - "loss_sod": 0.22641485929489136, - "loss_total": 0.4992421269416809, - "step": 207399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.1571487188339233, - "learning_rate": 3.292911404955273e-05, - "loss": 0.4796, - "step": 207400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.6921892166137695, - "loss_rtd": 0.24621668457984924, - "loss_sent": 0.07063555717468262, - "loss_sod": 0.08260297775268555, - "loss_total": 0.3994552195072174, - "step": 207499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.785663604736328, - "loss_rtd": 0.2260243445634842, - "loss_sent": 0.10995034873485565, - "loss_sod": 0.06233404576778412, - "loss_total": 0.39830875396728516, - "step": 207499 - }, - { - "epoch": 0.031, - "grad_norm": 1.4793405532836914, - "learning_rate": 3.2899291029155335e-05, - "loss": 0.4597, - "step": 207500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.756110668182373, - "loss_rtd": 0.25878915190696716, - "loss_sent": 0.2612974941730499, - "loss_sod": 0.02312314696609974, - "loss_total": 0.5432097911834717, - "step": 207599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.803622245788574, - "loss_rtd": 0.2384912073612213, - "loss_sent": 0.31777098774909973, - "loss_sod": 0.023026280105113983, - "loss_total": 0.5792884826660156, - "step": 207599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.9261624813079834, - "learning_rate": 3.286947489692145e-05, - "loss": 0.4743, - "step": 207600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.427712917327881, - "loss_rtd": 0.26596778631210327, - "loss_sent": 0.29693910479545593, - "loss_sod": 0.037313319742679596, - "loss_total": 0.6002202033996582, - "step": 207699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.803351879119873, - "loss_rtd": 0.2527596354484558, - "loss_sent": 0.27306580543518066, - "loss_sod": 0.09535633027553558, - "loss_total": 0.6211817860603333, - "step": 207699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.2428370714187622, - "learning_rate": 3.2839665664861044e-05, - "loss": 0.472, - "step": 207700 - }, - { - "epoch": 0.031598, - "loss_gen": 4.810303688049316, - "loss_rtd": 0.21520909667015076, - "loss_sent": 0.0006225758697837591, - "loss_sod": 0.07345493137836456, - "loss_total": 0.28928661346435547, - "step": 207799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.153961658477783, - "loss_rtd": 0.21760773658752441, - "loss_sent": 0.002753297798335552, - "loss_sod": 0.11247245222330093, - "loss_total": 0.332833468914032, - "step": 207799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.764484703540802, - "learning_rate": 3.280986334498125e-05, - "loss": 0.4687, - "step": 207800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.668840408325195, - "loss_rtd": 0.2228214293718338, - "loss_sent": 0.21543559432029724, - "loss_sod": 0.010019434615969658, - "loss_total": 0.44827646017074585, - "step": 207899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.782387733459473, - "loss_rtd": 0.2503328025341034, - "loss_sent": 0.20673413574695587, - "loss_sod": 0.01645244099199772, - "loss_total": 0.47351938486099243, - "step": 207899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.7568258047103882, - "learning_rate": 3.2780067949286444e-05, - "loss": 0.4886, - "step": 207900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.21542501449585, - "loss_rtd": 0.24553513526916504, - "loss_sent": 0.10508822649717331, - "loss_sod": 0.06286120414733887, - "loss_total": 0.4134845733642578, - "step": 207999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.18034029006958, - "loss_rtd": 0.22578170895576477, - "loss_sent": 0.14467483758926392, - "loss_sod": 0.1189010739326477, - "loss_total": 0.4893576204776764, - "step": 207999 - }, - { - "epoch": 0.032, - "grad_norm": 1.0519402027130127, - "learning_rate": 3.2750279489778214e-05, - "loss": 0.4812, - "step": 208000 - }, - { - "epoch": 0.032, - "eval_loss": 0.44441160559654236, - "eval_runtime": 152.4424, - "eval_samples_per_second": 101.304, - "eval_steps_per_second": 0.794, - "step": 208000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.8635687828063965, - "loss_rtd": 0.2613697946071625, - "loss_sent": 0.1858176738023758, - "loss_sod": 0.013185801915824413, - "loss_total": 0.46037328243255615, - "step": 208099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.366179466247559, - "loss_rtd": 0.2074943631887436, - "loss_sent": 9.578260505804792e-05, - "loss_sod": 0.08147788792848587, - "loss_total": 0.2890680432319641, - "step": 208099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.0053433179855347, - "learning_rate": 3.272049797845533e-05, - "loss": 0.4755, - "step": 208100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.5080413818359375, - "loss_rtd": 0.22976088523864746, - "loss_sent": 0.014457907527685165, - "loss_sod": 0.037182632833719254, - "loss_total": 0.2814014256000519, - "step": 208199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.42368221282959, - "loss_rtd": 0.2559351623058319, - "loss_sent": 0.2956313192844391, - "loss_sod": 0.0034990981221199036, - "loss_total": 0.5550655722618103, - "step": 208199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.9731106758117676, - "learning_rate": 3.269072342731381e-05, - "loss": 0.4751, - "step": 208200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.340611934661865, - "loss_rtd": 0.24798448383808136, - "loss_sent": 0.1278972327709198, - "loss_sod": 0.014393225312232971, - "loss_total": 0.39027494192123413, - "step": 208299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.826351165771484, - "loss_rtd": 0.23806066811084747, - "loss_sent": 0.326007217168808, - "loss_sod": 0.0613991841673851, - "loss_total": 0.62546706199646, - "step": 208299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.7958987355232239, - "learning_rate": 3.2660955848346805e-05, - "loss": 0.4828, - "step": 208300 - }, - { - "epoch": 0.000798, - "loss_gen": 4.886526584625244, - "loss_rtd": 0.21395230293273926, - "loss_sent": 0.010712208226323128, - "loss_sod": 0.07605178654193878, - "loss_total": 0.3007162809371948, - "step": 208399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.741994857788086, - "loss_rtd": 0.2044481337070465, - "loss_sent": 0.009149810299277306, - "loss_sod": 0.05806032568216324, - "loss_total": 0.2716582715511322, - "step": 208399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.676787257194519, - "learning_rate": 3.263119525354473e-05, - "loss": 0.4691, - "step": 208400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.5689239501953125, - "loss_rtd": 0.26971539855003357, - "loss_sent": 0.3627955913543701, - "loss_sod": 0.0074113160371780396, - "loss_total": 0.6399223208427429, - "step": 208499 - }, - { - "epoch": 0.000998, - "loss_gen": 6.180447578430176, - "loss_rtd": 0.24745963513851166, - "loss_sent": 0.25532370805740356, - "loss_sod": 0.043171755969524384, - "loss_total": 0.5459550619125366, - "step": 208499 - }, - { - "epoch": 0.001, - "grad_norm": 0.9868326187133789, - "learning_rate": 3.260144165489511e-05, - "loss": 0.472, - "step": 208500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.486894130706787, - "loss_rtd": 0.24865777790546417, - "loss_sent": 0.2057969570159912, - "loss_sod": 0.038115762174129486, - "loss_total": 0.4925704896450043, - "step": 208599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.663079261779785, - "loss_rtd": 0.2541826069355011, - "loss_sent": 0.08111198246479034, - "loss_sod": 0.042758356779813766, - "loss_total": 0.3780529499053955, - "step": 208599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.9215517044067383, - "learning_rate": 3.257169506438273e-05, - "loss": 0.4752, - "step": 208600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.853135585784912, - "loss_rtd": 0.26189929246902466, - "loss_sent": 0.11170561611652374, - "loss_sod": 0.028330225497484207, - "loss_total": 0.4019351601600647, - "step": 208699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.584386348724365, - "loss_rtd": 0.25824618339538574, - "loss_sent": 0.3483213484287262, - "loss_sod": 0.018394894897937775, - "loss_total": 0.6249624490737915, - "step": 208699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.4105433225631714, - "learning_rate": 3.254195549398948e-05, - "loss": 0.4563, - "step": 208700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.602505683898926, - "loss_rtd": 0.2587122619152069, - "loss_sent": 0.1277967244386673, - "loss_sod": 0.034992266446352005, - "loss_total": 0.4215012788772583, - "step": 208799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.469400405883789, - "loss_rtd": 0.23010583221912384, - "loss_sent": 0.33312076330184937, - "loss_sod": 0.04757145047187805, - "loss_total": 0.6107980608940125, - "step": 208799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.2992291450500488, - "learning_rate": 3.251222295569448e-05, - "loss": 0.4715, - "step": 208800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.667748928070068, - "loss_rtd": 0.24374772608280182, - "loss_sent": 0.25694599747657776, - "loss_sod": 0.014960157684981823, - "loss_total": 0.5156538486480713, - "step": 208899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.3918232917785645, - "loss_rtd": 0.25772157311439514, - "loss_sent": 0.08614183962345123, - "loss_sod": 0.03495020419359207, - "loss_total": 0.37881362438201904, - "step": 208899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.7250854969024658, - "learning_rate": 3.248249746147397e-05, - "loss": 0.4614, - "step": 208900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.483517169952393, - "loss_rtd": 0.25373467803001404, - "loss_sent": 0.1412976086139679, - "loss_sod": 0.07849239557981491, - "loss_total": 0.47352468967437744, - "step": 208999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.205908298492432, - "loss_rtd": 0.22909490764141083, - "loss_sent": 3.0029106710571796e-05, - "loss_sod": 0.38783594965934753, - "loss_total": 0.616960883140564, - "step": 208999 - }, - { - "epoch": 0.002, - "grad_norm": 1.5134930610656738, - "learning_rate": 3.245277902330139e-05, - "loss": 0.4744, - "step": 209000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4447585344314575, - "eval_runtime": 154.5293, - "eval_samples_per_second": 99.936, - "eval_steps_per_second": 0.783, - "step": 209000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.121287822723389, - "loss_rtd": 0.2065429985523224, - "loss_sent": 0.04182280972599983, - "loss_sod": 0.07478819787502289, - "loss_total": 0.3231540024280548, - "step": 209099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.722884178161621, - "loss_rtd": 0.26243090629577637, - "loss_sent": 0.24262161552906036, - "loss_sod": 0.09496884047985077, - "loss_total": 0.6000213623046875, - "step": 209099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.2191126346588135, - "learning_rate": 3.2423067653147324e-05, - "loss": 0.4696, - "step": 209100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.343850612640381, - "loss_rtd": 0.24848255515098572, - "loss_sent": 0.16229428350925446, - "loss_sod": 0.020043738186359406, - "loss_total": 0.4308205842971802, - "step": 209199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.992334842681885, - "loss_rtd": 0.21648763120174408, - "loss_sent": 0.06389337033033371, - "loss_sod": 0.062385477125644684, - "loss_total": 0.34276649355888367, - "step": 209199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.6456282734870911, - "learning_rate": 3.239336336297951e-05, - "loss": 0.4718, - "step": 209200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.863377571105957, - "loss_rtd": 0.24766705930233002, - "loss_sent": 0.20583513379096985, - "loss_sod": 0.015658937394618988, - "loss_total": 0.46916112303733826, - "step": 209299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.271611213684082, - "loss_rtd": 0.23583559691905975, - "loss_sent": 0.13610097765922546, - "loss_sod": 0.00965056847780943, - "loss_total": 0.3815871477127075, - "step": 209299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.9501907229423523, - "learning_rate": 3.2363666164762826e-05, - "loss": 0.4752, - "step": 209300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.05283784866333, - "loss_rtd": 0.22583165764808655, - "loss_sent": 0.026940269395709038, - "loss_sod": 0.05996691435575485, - "loss_total": 0.312738835811615, - "step": 209399 - }, - { - "epoch": 0.002798, - "loss_gen": 4.836438179016113, - "loss_rtd": 0.21261976659297943, - "loss_sent": 0.05323530361056328, - "loss_sod": 0.1903487741947174, - "loss_total": 0.456203818321228, - "step": 209399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.0178892612457275, - "learning_rate": 3.2333976070459304e-05, - "loss": 0.4769, - "step": 209400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.641650199890137, - "loss_rtd": 0.2465897500514984, - "loss_sent": 0.2687963545322418, - "loss_sod": 0.01301610004156828, - "loss_total": 0.5284022092819214, - "step": 209499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.4567365646362305, - "loss_rtd": 0.20831464231014252, - "loss_sent": 0.09384509176015854, - "loss_sod": 0.020197510719299316, - "loss_total": 0.3223572373390198, - "step": 209499 - }, - { - "epoch": 0.003, - "grad_norm": 0.6911271810531616, - "learning_rate": 3.2304293092028106e-05, - "loss": 0.475, - "step": 209500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.885210990905762, - "loss_rtd": 0.22602224349975586, - "loss_sent": 0.04062027856707573, - "loss_sod": 0.08307226747274399, - "loss_total": 0.3497147858142853, - "step": 209599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.531671047210693, - "loss_rtd": 0.2223314642906189, - "loss_sent": 0.36238762736320496, - "loss_sod": 0.03387190029025078, - "loss_total": 0.6185909509658813, - "step": 209599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.709784746170044, - "learning_rate": 3.227461724142553e-05, - "loss": 0.4709, - "step": 209600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.497905254364014, - "loss_rtd": 0.25370118021965027, - "loss_sent": 0.13840118050575256, - "loss_sod": 0.023736033588647842, - "loss_total": 0.4158383905887604, - "step": 209699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.420224189758301, - "loss_rtd": 0.23246806859970093, - "loss_sent": 0.13259181380271912, - "loss_sod": 0.009585598483681679, - "loss_total": 0.374645471572876, - "step": 209699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.8438321948051453, - "learning_rate": 3.224494853060502e-05, - "loss": 0.459, - "step": 209700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.336059093475342, - "loss_rtd": 0.21610666811466217, - "loss_sent": 0.06361129134893417, - "loss_sod": 0.08039283007383347, - "loss_total": 0.3601107895374298, - "step": 209799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.975579261779785, - "loss_rtd": 0.24493789672851562, - "loss_sent": 0.04117688909173012, - "loss_sod": 0.049076810479164124, - "loss_total": 0.3351915776729584, - "step": 209799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.7402128577232361, - "learning_rate": 3.221528697151712e-05, - "loss": 0.4558, - "step": 209800 - }, - { - "epoch": 0.003798, - "loss_gen": 4.895811080932617, - "loss_rtd": 0.22647565603256226, - "loss_sent": 0.0059408931992948055, - "loss_sod": 0.1335909366607666, - "loss_total": 0.3660074770450592, - "step": 209899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.193243980407715, - "loss_rtd": 0.22750423848628998, - "loss_sent": 0.023350337520241737, - "loss_sod": 0.07141244411468506, - "loss_total": 0.3222670257091522, - "step": 209899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.917251467704773, - "learning_rate": 3.218563257610949e-05, - "loss": 0.4787, - "step": 209900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.881529808044434, - "loss_rtd": 0.2474154531955719, - "loss_sent": 0.1599976271390915, - "loss_sod": 0.046758297830820084, - "loss_total": 0.454171359539032, - "step": 209999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.793908596038818, - "loss_rtd": 0.2482430636882782, - "loss_sent": 0.23379755020141602, - "loss_sod": 0.11410799622535706, - "loss_total": 0.5961486101150513, - "step": 209999 - }, - { - "epoch": 0.004, - "grad_norm": 1.2906124591827393, - "learning_rate": 3.2155985356326934e-05, - "loss": 0.4858, - "step": 210000 - }, - { - "epoch": 0.004, - "eval_loss": 0.44805896282196045, - "eval_runtime": 151.337, - "eval_samples_per_second": 102.044, - "eval_steps_per_second": 0.8, - "step": 210000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.477323532104492, - "loss_rtd": 0.22297251224517822, - "loss_sent": 0.0769631415605545, - "loss_sod": 0.04961954057216644, - "loss_total": 0.34955519437789917, - "step": 210099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.684615612030029, - "loss_rtd": 0.24644732475280762, - "loss_sent": 0.10543947666883469, - "loss_sod": 0.06935250014066696, - "loss_total": 0.42123931646347046, - "step": 210099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.0123628377914429, - "learning_rate": 3.212634532411133e-05, - "loss": 0.4537, - "step": 210100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.729521751403809, - "loss_rtd": 0.23756033182144165, - "loss_sent": 0.19801343977451324, - "loss_sod": 0.07395317405462265, - "loss_total": 0.5095269680023193, - "step": 210199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.19659423828125, - "loss_rtd": 0.2312103807926178, - "loss_sent": 0.03671765699982643, - "loss_sod": 0.11782599985599518, - "loss_total": 0.3857540488243103, - "step": 210199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.8746253252029419, - "learning_rate": 3.209671249140171e-05, - "loss": 0.4708, - "step": 210200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.612518310546875, - "loss_rtd": 0.24346370995044708, - "loss_sent": 0.37320858240127563, - "loss_sod": 0.01649416610598564, - "loss_total": 0.6331664323806763, - "step": 210299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.739737033843994, - "loss_rtd": 0.253539502620697, - "loss_sent": 0.02703433856368065, - "loss_sod": 0.05502090975642204, - "loss_total": 0.3355947434902191, - "step": 210299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.2408486604690552, - "learning_rate": 3.206708687013414e-05, - "loss": 0.4774, - "step": 210300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.601258754730225, - "loss_rtd": 0.2331903576850891, - "loss_sent": 0.12105364352464676, - "loss_sod": 0.09930294007062912, - "loss_total": 0.453546941280365, - "step": 210399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.636895179748535, - "loss_rtd": 0.2383427917957306, - "loss_sent": 0.16379112005233765, - "loss_sod": 0.012728769332170486, - "loss_total": 0.4148626923561096, - "step": 210399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.1434956789016724, - "learning_rate": 3.203746847224185e-05, - "loss": 0.4732, - "step": 210400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.337172508239746, - "loss_rtd": 0.21960227191448212, - "loss_sent": 0.20413963496685028, - "loss_sod": 0.08904212713241577, - "loss_total": 0.5127840638160706, - "step": 210499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.146918296813965, - "loss_rtd": 0.20090530812740326, - "loss_sent": 0.12208490073680878, - "loss_sod": 0.06480234861373901, - "loss_total": 0.38779258728027344, - "step": 210499 - }, - { - "epoch": 0.005, - "grad_norm": 1.1932604312896729, - "learning_rate": 3.20078573096551e-05, - "loss": 0.4668, - "step": 210500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.558023452758789, - "loss_rtd": 0.24844017624855042, - "loss_sent": 0.1205759197473526, - "loss_sod": 0.003456368576735258, - "loss_total": 0.37247246503829956, - "step": 210599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.625892162322998, - "loss_rtd": 0.24933810532093048, - "loss_sent": 0.06842228025197983, - "loss_sod": 0.11991280317306519, - "loss_total": 0.4376731812953949, - "step": 210599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.8431463837623596, - "learning_rate": 3.197825339430128e-05, - "loss": 0.4666, - "step": 210600 - }, - { - "epoch": 0.005398, - "loss_gen": 6.091310024261475, - "loss_rtd": 0.2611507773399353, - "loss_sent": 0.5253362655639648, - "loss_sod": 0.12528419494628906, - "loss_total": 0.9117712378501892, - "step": 210699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.429983139038086, - "loss_rtd": 0.24127110838890076, - "loss_sent": 0.33283531665802, - "loss_sod": 0.09813092648983002, - "loss_total": 0.6722373366355896, - "step": 210699 - }, - { - "epoch": 0.0054, - "grad_norm": 2.887056350708008, - "learning_rate": 3.194865673810483e-05, - "loss": 0.4643, - "step": 210700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.702674388885498, - "loss_rtd": 0.2532796263694763, - "loss_sent": 0.2713761329650879, - "loss_sod": 0.1055951863527298, - "loss_total": 0.6302509307861328, - "step": 210799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.545117378234863, - "loss_rtd": 0.25731727480888367, - "loss_sent": 0.09072793275117874, - "loss_sod": 0.050606194883584976, - "loss_total": 0.3986513912677765, - "step": 210799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.428985834121704, - "learning_rate": 3.191906735298729e-05, - "loss": 0.471, - "step": 210800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.364992618560791, - "loss_rtd": 0.2524144649505615, - "loss_sent": 0.08133655041456223, - "loss_sod": 0.08424169570207596, - "loss_total": 0.4179927110671997, - "step": 210899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.39879846572876, - "loss_rtd": 0.24170148372650146, - "loss_sent": 0.18533536791801453, - "loss_sod": 0.06258462369441986, - "loss_total": 0.48962149024009705, - "step": 210899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.7437417507171631, - "learning_rate": 3.1889485250867265e-05, - "loss": 0.48, - "step": 210900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.75720739364624, - "loss_rtd": 0.25696682929992676, - "loss_sent": 0.22219933569431305, - "loss_sod": 0.03509850800037384, - "loss_total": 0.5142646431922913, - "step": 210999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.4086527824401855, - "loss_rtd": 0.22480274736881256, - "loss_sent": 0.4734490215778351, - "loss_sod": 0.03117639757692814, - "loss_total": 0.7294281721115112, - "step": 210999 - }, - { - "epoch": 0.006, - "grad_norm": 1.9705997705459595, - "learning_rate": 3.1859910443660405e-05, - "loss": 0.4565, - "step": 211000 - }, - { - "epoch": 0.006, - "eval_loss": 0.4437921643257141, - "eval_runtime": 151.2311, - "eval_samples_per_second": 102.115, - "eval_steps_per_second": 0.8, - "step": 211000 - }, - { - "epoch": 0.006198, - "loss_gen": 6.2043352127075195, - "loss_rtd": 0.2633081376552582, - "loss_sent": 0.07349584996700287, - "loss_sod": 0.15775038301944733, - "loss_total": 0.49455440044403076, - "step": 211099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.431703090667725, - "loss_rtd": 0.2513551115989685, - "loss_sent": 0.044419560581445694, - "loss_sod": 0.017070291563868523, - "loss_total": 0.3128449618816376, - "step": 211099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.8284661173820496, - "learning_rate": 3.183034294327946e-05, - "loss": 0.4516, - "step": 211100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.562857151031494, - "loss_rtd": 0.2520465552806854, - "loss_sent": 0.3894054889678955, - "loss_sod": 0.11704543232917786, - "loss_total": 0.7584974765777588, - "step": 211199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.668973445892334, - "loss_rtd": 0.25732630491256714, - "loss_sent": 0.20670296251773834, - "loss_sod": 0.05067497491836548, - "loss_total": 0.5147042274475098, - "step": 211199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.652125597000122, - "learning_rate": 3.18007827616342e-05, - "loss": 0.4687, - "step": 211200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.724977016448975, - "loss_rtd": 0.2492094337940216, - "loss_sent": 0.16580648720264435, - "loss_sod": 0.017729351297020912, - "loss_total": 0.4327452778816223, - "step": 211299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.394907474517822, - "loss_rtd": 0.24357140064239502, - "loss_sent": 0.23959758877754211, - "loss_sod": 0.04065662622451782, - "loss_total": 0.5238256454467773, - "step": 211299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.1622283458709717, - "learning_rate": 3.177122991063146e-05, - "loss": 0.4833, - "step": 211300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.4965925216674805, - "loss_rtd": 0.24693022668361664, - "loss_sent": 0.15063565969467163, - "loss_sod": 0.020055752247571945, - "loss_total": 0.4176216423511505, - "step": 211399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.532601356506348, - "loss_rtd": 0.2507868707180023, - "loss_sent": 0.14384964108467102, - "loss_sod": 0.03301244601607323, - "loss_total": 0.42764896154403687, - "step": 211399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.9734103083610535, - "learning_rate": 3.174168440217514e-05, - "loss": 0.4807, - "step": 211400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.537749767303467, - "loss_rtd": 0.2611582577228546, - "loss_sent": 0.2690557539463043, - "loss_sod": 0.07254573702812195, - "loss_total": 0.6027597188949585, - "step": 211499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.112955570220947, - "loss_rtd": 0.21495188772678375, - "loss_sent": 0.022779934108257294, - "loss_sod": 0.2705698013305664, - "loss_total": 0.5083016157150269, - "step": 211499 - }, - { - "epoch": 0.007, - "grad_norm": 1.480070948600769, - "learning_rate": 3.171214624816616e-05, - "loss": 0.4694, - "step": 211500 - }, - { - "epoch": 0.007198, - "loss_gen": 4.822688579559326, - "loss_rtd": 0.19313618540763855, - "loss_sent": 8.413719478994608e-05, - "loss_sod": 0.17681023478507996, - "loss_total": 0.37003055214881897, - "step": 211599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.394720554351807, - "loss_rtd": 0.23504839837551117, - "loss_sent": 0.09636963903903961, - "loss_sod": 0.07450568675994873, - "loss_total": 0.4059237241744995, - "step": 211599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.0673494338989258, - "learning_rate": 3.1682615460502474e-05, - "loss": 0.4766, - "step": 211600 - }, - { - "epoch": 0.007398, - "loss_gen": 6.004249572753906, - "loss_rtd": 0.25233152508735657, - "loss_sent": 0.05808437243103981, - "loss_sod": 0.05536285787820816, - "loss_total": 0.36577874422073364, - "step": 211699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.868964195251465, - "loss_rtd": 0.24131612479686737, - "loss_sent": 0.17636896669864655, - "loss_sod": 0.035525646060705185, - "loss_total": 0.453210711479187, - "step": 211699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.89599609375, - "learning_rate": 3.1653092051079104e-05, - "loss": 0.4694, - "step": 211700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.826862812042236, - "loss_rtd": 0.2411191612482071, - "loss_sent": 0.15905123949050903, - "loss_sod": 0.08179174363613129, - "loss_total": 0.4819621443748474, - "step": 211799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.79934024810791, - "loss_rtd": 0.23552261292934418, - "loss_sent": 0.16693347692489624, - "loss_sod": 0.048653826117515564, - "loss_total": 0.451109915971756, - "step": 211799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.0250190496444702, - "learning_rate": 3.162357603178805e-05, - "loss": 0.4608, - "step": 211800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.756589889526367, - "loss_rtd": 0.22902975976467133, - "loss_sent": 0.22040459513664246, - "loss_sod": 0.05062992870807648, - "loss_total": 0.5000642538070679, - "step": 211899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.970505714416504, - "loss_rtd": 0.240070179104805, - "loss_sent": 0.3103477656841278, - "loss_sod": 0.036664918065071106, - "loss_total": 0.5870828628540039, - "step": 211899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.3604886531829834, - "learning_rate": 3.159406741451838e-05, - "loss": 0.4661, - "step": 211900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.4524245262146, - "loss_rtd": 0.22372019290924072, - "loss_sent": 0.22474953532218933, - "loss_sod": 0.057086482644081116, - "loss_total": 0.5055562257766724, - "step": 211999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.881251335144043, - "loss_rtd": 0.24504905939102173, - "loss_sent": 0.10071533173322678, - "loss_sod": 0.09807638823986053, - "loss_total": 0.44384080171585083, - "step": 211999 - }, - { - "epoch": 0.008, - "grad_norm": 2.14951491355896, - "learning_rate": 3.156456621115615e-05, - "loss": 0.4727, - "step": 212000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4430355131626129, - "eval_runtime": 152.9269, - "eval_samples_per_second": 100.983, - "eval_steps_per_second": 0.791, - "step": 212000 - }, - { - "epoch": 0.008198, - "loss_gen": 4.8363118171691895, - "loss_rtd": 0.21359658241271973, - "loss_sent": 0.019240180030465126, - "loss_sod": 0.11113185435533524, - "loss_total": 0.34396860003471375, - "step": 212099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.640757083892822, - "loss_rtd": 0.25492802262306213, - "loss_sent": 0.1972058117389679, - "loss_sod": 0.03543057292699814, - "loss_total": 0.48756441473960876, - "step": 212099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.6279405951499939, - "learning_rate": 3.153507243358445e-05, - "loss": 0.4818, - "step": 212100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.463850975036621, - "loss_rtd": 0.23923315107822418, - "loss_sent": 0.17068266868591309, - "loss_sod": 0.06000015512108803, - "loss_total": 0.4699159860610962, - "step": 212199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.92637825012207, - "loss_rtd": 0.2557355463504791, - "loss_sent": 0.206705704331398, - "loss_sod": 0.07056833058595657, - "loss_total": 0.5330095887184143, - "step": 212199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.1855320930480957, - "learning_rate": 3.150558609368336e-05, - "loss": 0.4519, - "step": 212200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.1901140213012695, - "loss_rtd": 0.21692410111427307, - "loss_sent": 0.03507065400481224, - "loss_sod": 0.01112939789891243, - "loss_total": 0.26312413811683655, - "step": 212299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.64489221572876, - "loss_rtd": 0.22842562198638916, - "loss_sent": 0.20782798528671265, - "loss_sod": 0.06458929926156998, - "loss_total": 0.5008429288864136, - "step": 212299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.7970889210700989, - "learning_rate": 3.147610720332998e-05, - "loss": 0.4621, - "step": 212300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.243655204772949, - "loss_rtd": 0.25343963503837585, - "loss_sent": 0.19937506318092346, - "loss_sod": 0.006845582742244005, - "loss_total": 0.45966029167175293, - "step": 212399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.747071266174316, - "loss_rtd": 0.262832373380661, - "loss_sent": 0.09505753964185715, - "loss_sod": 0.07253297418355942, - "loss_total": 0.43042290210723877, - "step": 212399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.6185807585716248, - "learning_rate": 3.144663577439842e-05, - "loss": 0.4588, - "step": 212400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.078952312469482, - "loss_rtd": 0.1971844732761383, - "loss_sent": 6.208645936567336e-05, - "loss_sod": 0.06430843472480774, - "loss_total": 0.2615549862384796, - "step": 212499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.409107208251953, - "loss_rtd": 0.19337983429431915, - "loss_sent": 3.095667852903716e-05, - "loss_sod": 0.17032751441001892, - "loss_total": 0.3637383282184601, - "step": 212499 - }, - { - "epoch": 0.009, - "grad_norm": 1.218471884727478, - "learning_rate": 3.141717181875973e-05, - "loss": 0.4688, - "step": 212500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.532618999481201, - "loss_rtd": 0.24023999273777008, - "loss_sent": 0.2433645874261856, - "loss_sod": 0.05891285836696625, - "loss_total": 0.5425174236297607, - "step": 212599 - }, - { - "epoch": 0.009198, - "loss_gen": 4.952725887298584, - "loss_rtd": 0.2123037874698639, - "loss_sent": 0.038688044995069504, - "loss_sod": 0.08232495933771133, - "loss_total": 0.33331677317619324, - "step": 212599 - }, - { - "epoch": 0.0092, - "grad_norm": 2.4606971740722656, - "learning_rate": 3.1387715348282023e-05, - "loss": 0.4663, - "step": 212600 - }, - { - "epoch": 0.009398, - "loss_gen": 4.907297134399414, - "loss_rtd": 0.1979868859052658, - "loss_sent": 4.540507507044822e-05, - "loss_sod": 0.10434964299201965, - "loss_total": 0.3023819327354431, - "step": 212699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.306654930114746, - "loss_rtd": 0.2169453650712967, - "loss_sent": 0.06280342489480972, - "loss_sod": 0.041878774762153625, - "loss_total": 0.32162755727767944, - "step": 212699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.6663098931312561, - "learning_rate": 3.1358266374830336e-05, - "loss": 0.4744, - "step": 212700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.580843925476074, - "loss_rtd": 0.2426462471485138, - "loss_sent": 0.5985530018806458, - "loss_sod": 0.07179553806781769, - "loss_total": 0.9129948019981384, - "step": 212799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.386800289154053, - "loss_rtd": 0.24027827382087708, - "loss_sent": 0.2527490258216858, - "loss_sod": 0.05511070787906647, - "loss_total": 0.5481380224227905, - "step": 212799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.92095148563385, - "learning_rate": 3.132882491026674e-05, - "loss": 0.4909, - "step": 212800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.439431190490723, - "loss_rtd": 0.24224305152893066, - "loss_sent": 0.11520251631736755, - "loss_sod": 0.061743177473545074, - "loss_total": 0.4191887378692627, - "step": 212899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.694076061248779, - "loss_rtd": 0.23508226871490479, - "loss_sent": 0.05061827227473259, - "loss_sod": 0.006715849507600069, - "loss_total": 0.29241639375686646, - "step": 212899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.7011203765869141, - "learning_rate": 3.1299390966450213e-05, - "loss": 0.4667, - "step": 212900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.152209758758545, - "loss_rtd": 0.22610633075237274, - "loss_sent": 3.1172719900496304e-05, - "loss_sod": 0.19216427206993103, - "loss_total": 0.4183017611503601, - "step": 212999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.097952365875244, - "loss_rtd": 0.21032899618148804, - "loss_sent": 3.7628306017722934e-05, - "loss_sod": 0.054617222398519516, - "loss_total": 0.2649838626384735, - "step": 212999 - }, - { - "epoch": 0.01, - "grad_norm": 0.7068494558334351, - "learning_rate": 3.1269964555236784e-05, - "loss": 0.4581, - "step": 213000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4454001784324646, - "eval_runtime": 151.4406, - "eval_samples_per_second": 101.974, - "eval_steps_per_second": 0.799, - "step": 213000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.744257926940918, - "loss_rtd": 0.25936928391456604, - "loss_sent": 0.047065265476703644, - "loss_sod": 0.03533713519573212, - "loss_total": 0.34177166223526, - "step": 213099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.667468070983887, - "loss_rtd": 0.23790179193019867, - "loss_sent": 0.3409661054611206, - "loss_sod": 0.03238163888454437, - "loss_total": 0.611249566078186, - "step": 213099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8624411821365356, - "learning_rate": 3.1240545688479394e-05, - "loss": 0.4715, - "step": 213100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.707870960235596, - "loss_rtd": 0.21306991577148438, - "loss_sent": 0.13571777939796448, - "loss_sod": 0.0220950897783041, - "loss_total": 0.3708827793598175, - "step": 213199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.601797580718994, - "loss_rtd": 0.24627207219600677, - "loss_sent": 0.15918178856372833, - "loss_sod": 0.00783846527338028, - "loss_total": 0.4132923483848572, - "step": 213199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.8530075550079346, - "learning_rate": 3.1211134378027955e-05, - "loss": 0.4818, - "step": 213200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.771250247955322, - "loss_rtd": 0.2444213181734085, - "loss_sent": 0.09979578107595444, - "loss_sod": 0.06902863830327988, - "loss_total": 0.4132457375526428, - "step": 213299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.803530693054199, - "loss_rtd": 0.26237183809280396, - "loss_sent": 0.2752484977245331, - "loss_sod": 0.02671178989112377, - "loss_total": 0.564332127571106, - "step": 213299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.8731642961502075, - "learning_rate": 3.118173063572933e-05, - "loss": 0.4707, - "step": 213300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.060603618621826, - "loss_rtd": 0.19803661108016968, - "loss_sent": 0.011174539104104042, - "loss_sod": 0.053331535309553146, - "loss_total": 0.2625426650047302, - "step": 213399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.629602909088135, - "loss_rtd": 0.2579532563686371, - "loss_sent": 0.0589432455599308, - "loss_sod": 0.046761203557252884, - "loss_total": 0.36365771293640137, - "step": 213399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.7239234447479248, - "learning_rate": 3.115233447342738e-05, - "loss": 0.4585, - "step": 213400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.775127410888672, - "loss_rtd": 0.25830426812171936, - "loss_sent": 0.17954021692276, - "loss_sod": 0.062176622450351715, - "loss_total": 0.5000211000442505, - "step": 213499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.7465338706970215, - "loss_rtd": 0.24478895962238312, - "loss_sent": 0.24995005130767822, - "loss_sod": 0.08784323185682297, - "loss_total": 0.5825822353363037, - "step": 213499 - }, - { - "epoch": 0.011, - "grad_norm": 0.9594159722328186, - "learning_rate": 3.112294590296283e-05, - "loss": 0.4485, - "step": 213500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.562532424926758, - "loss_rtd": 0.23189584910869598, - "loss_sent": 0.7027842402458191, - "loss_sod": 0.018722033128142357, - "loss_total": 0.9534021615982056, - "step": 213599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.744312763214111, - "loss_rtd": 0.24609750509262085, - "loss_sent": 0.21287801861763, - "loss_sod": 0.016435619443655014, - "loss_total": 0.47541114687919617, - "step": 213599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.6305056810379028, - "learning_rate": 3.109356493617341e-05, - "loss": 0.4674, - "step": 213600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.308304786682129, - "loss_rtd": 0.24485275149345398, - "loss_sent": 0.21994268894195557, - "loss_sod": 0.05680780112743378, - "loss_total": 0.5216032266616821, - "step": 213699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.640525817871094, - "loss_rtd": 0.2537594139575958, - "loss_sent": 0.2038225680589676, - "loss_sod": 0.01056704856455326, - "loss_total": 0.46814900636672974, - "step": 213699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.0676085948944092, - "learning_rate": 3.106419158489379e-05, - "loss": 0.4471, - "step": 213700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.560603141784668, - "loss_rtd": 0.23428739607334137, - "loss_sent": 0.4096875488758087, - "loss_sod": 0.05208686739206314, - "loss_total": 0.6960618495941162, - "step": 213799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.416528701782227, - "loss_rtd": 0.24423688650131226, - "loss_sent": 0.13170087337493896, - "loss_sod": 0.038248151540756226, - "loss_total": 0.41418591141700745, - "step": 213799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.8993825912475586, - "learning_rate": 3.1034825860955524e-05, - "loss": 0.4694, - "step": 213800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.59212064743042, - "loss_rtd": 0.2265229970216751, - "loss_sent": 0.10345450788736343, - "loss_sod": 0.09530478715896606, - "loss_total": 0.4252822697162628, - "step": 213899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.533003807067871, - "loss_rtd": 0.24870604276657104, - "loss_sent": 0.2663210928440094, - "loss_sod": 0.04454107582569122, - "loss_total": 0.5595681667327881, - "step": 213899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.0270558595657349, - "learning_rate": 3.100546777618713e-05, - "loss": 0.4778, - "step": 213900 - }, - { - "epoch": 0.011998, - "loss_gen": 4.885112285614014, - "loss_rtd": 0.2112845927476883, - "loss_sent": 0.02935801073908806, - "loss_sod": 0.04289938136935234, - "loss_total": 0.2835419774055481, - "step": 213999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.518467903137207, - "loss_rtd": 0.2470613420009613, - "loss_sent": 0.09954142570495605, - "loss_sod": 0.04146134480834007, - "loss_total": 0.38806411623954773, - "step": 213999 - }, - { - "epoch": 0.012, - "grad_norm": 0.7365216016769409, - "learning_rate": 3.0976117342414054e-05, - "loss": 0.462, - "step": 214000 - }, - { - "epoch": 0.012, - "eval_loss": 0.44605186581611633, - "eval_runtime": 151.7656, - "eval_samples_per_second": 101.756, - "eval_steps_per_second": 0.797, - "step": 214000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.769169330596924, - "loss_rtd": 0.24741266667842865, - "loss_sent": 0.40118569135665894, - "loss_sod": 0.010234126821160316, - "loss_total": 0.6588324904441833, - "step": 214099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.8204545974731445, - "loss_rtd": 0.23688404262065887, - "loss_sent": 0.23825155198574066, - "loss_sod": 0.0842704251408577, - "loss_total": 0.559406042098999, - "step": 214099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.2068729400634766, - "learning_rate": 3.0946774571458634e-05, - "loss": 0.4635, - "step": 214100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.7868146896362305, - "loss_rtd": 0.24271126091480255, - "loss_sent": 0.19455036520957947, - "loss_sod": 0.06991995871067047, - "loss_total": 0.5071815848350525, - "step": 214199 - }, - { - "epoch": 0.012398, - "loss_gen": 6.01089334487915, - "loss_rtd": 0.2621612846851349, - "loss_sent": 0.09373494237661362, - "loss_sod": 0.10824918001890182, - "loss_total": 0.4641454219818115, - "step": 214199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.5242117643356323, - "learning_rate": 3.091743947514013e-05, - "loss": 0.461, - "step": 214200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.009282112121582, - "loss_rtd": 0.21023614704608917, - "loss_sent": 2.888362178055104e-05, - "loss_sod": 0.12804462015628815, - "loss_total": 0.338309645652771, - "step": 214299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.0859150886535645, - "loss_rtd": 0.2234998345375061, - "loss_sent": 0.23565302789211273, - "loss_sod": 0.03161570057272911, - "loss_total": 0.49076855182647705, - "step": 214299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.975635826587677, - "learning_rate": 3.088811206527474e-05, - "loss": 0.4556, - "step": 214300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.517728805541992, - "loss_rtd": 0.22543208301067352, - "loss_sent": 0.13504022359848022, - "loss_sod": 0.054832931607961655, - "loss_total": 0.4153052270412445, - "step": 214399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.704839706420898, - "loss_rtd": 0.24344506859779358, - "loss_sent": 0.15011349320411682, - "loss_sod": 0.029339928179979324, - "loss_total": 0.4228985011577606, - "step": 214399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.6345607042312622, - "learning_rate": 3.085879235367551e-05, - "loss": 0.4556, - "step": 214400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.6054511070251465, - "loss_rtd": 0.2383522391319275, - "loss_sent": 0.1786447912454605, - "loss_sod": 0.015524017624557018, - "loss_total": 0.4325210452079773, - "step": 214499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.389135360717773, - "loss_rtd": 0.24175548553466797, - "loss_sent": 0.14701464772224426, - "loss_sod": 0.05823016166687012, - "loss_total": 0.44700029492378235, - "step": 214499 - }, - { - "epoch": 0.013, - "grad_norm": 1.3787164688110352, - "learning_rate": 3.082948035215244e-05, - "loss": 0.4727, - "step": 214500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.542291164398193, - "loss_rtd": 0.22595494985580444, - "loss_sent": 0.1399892121553421, - "loss_sod": 0.032616619020700455, - "loss_total": 0.3985607624053955, - "step": 214599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.504732608795166, - "loss_rtd": 0.2170952707529068, - "loss_sent": 0.29960954189300537, - "loss_sod": 0.07303205132484436, - "loss_total": 0.5897368788719177, - "step": 214599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.189300775527954, - "learning_rate": 3.0800176072512374e-05, - "loss": 0.4733, - "step": 214600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.516160011291504, - "loss_rtd": 0.22539576888084412, - "loss_sent": 0.02846885845065117, - "loss_sod": 0.08762237429618835, - "loss_total": 0.34148699045181274, - "step": 214699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.72625207901001, - "loss_rtd": 0.2533041536808014, - "loss_sent": 0.28508323431015015, - "loss_sod": 0.02012855000793934, - "loss_total": 0.5585159063339233, - "step": 214699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.8739172220230103, - "learning_rate": 3.077087952655911e-05, - "loss": 0.4547, - "step": 214700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.518967151641846, - "loss_rtd": 0.233483225107193, - "loss_sent": 0.09542209655046463, - "loss_sod": 0.09760531038045883, - "loss_total": 0.42651063203811646, - "step": 214799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.508381366729736, - "loss_rtd": 0.24900829792022705, - "loss_sent": 0.5820159316062927, - "loss_sod": 0.01064244844019413, - "loss_total": 0.8416666984558105, - "step": 214799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.896393895149231, - "learning_rate": 3.074159072609326e-05, - "loss": 0.4751, - "step": 214800 - }, - { - "epoch": 0.013798, - "loss_gen": 6.465732574462891, - "loss_rtd": 0.2200365960597992, - "loss_sent": 0.11021783202886581, - "loss_sod": 0.13790738582611084, - "loss_total": 0.46816182136535645, - "step": 214899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.558566093444824, - "loss_rtd": 0.2123665064573288, - "loss_sent": 0.07727959752082825, - "loss_sod": 0.10284897685050964, - "loss_total": 0.3924950659275055, - "step": 214899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.7778891324996948, - "learning_rate": 3.071230968291238e-05, - "loss": 0.4701, - "step": 214900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.915468692779541, - "loss_rtd": 0.24788525700569153, - "loss_sent": 0.10812418162822723, - "loss_sod": 0.06854336708784103, - "loss_total": 0.4245527982711792, - "step": 214999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.7604546546936035, - "loss_rtd": 0.23790998756885529, - "loss_sent": 0.21076975762844086, - "loss_sod": 0.04442159831523895, - "loss_total": 0.4931013584136963, - "step": 214999 - }, - { - "epoch": 0.014, - "grad_norm": 0.8312272429466248, - "learning_rate": 3.068303640881083e-05, - "loss": 0.4646, - "step": 215000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4383068084716797, - "eval_runtime": 151.4463, - "eval_samples_per_second": 101.97, - "eval_steps_per_second": 0.799, - "step": 215000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.824502944946289, - "loss_rtd": 0.25133025646209717, - "loss_sent": 0.21859335899353027, - "loss_sod": 0.06594441831111908, - "loss_total": 0.5358680486679077, - "step": 215099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.6651177406311035, - "loss_rtd": 0.23057860136032104, - "loss_sent": 0.18085774779319763, - "loss_sod": 0.06961269676685333, - "loss_total": 0.4810490310192108, - "step": 215099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.818749189376831, - "learning_rate": 3.065377091557993e-05, - "loss": 0.462, - "step": 215100 - }, - { - "epoch": 0.014398, - "loss_gen": 4.954616546630859, - "loss_rtd": 0.2156609445810318, - "loss_sent": 3.6461002309806645e-05, - "loss_sod": 0.14588233828544617, - "loss_total": 0.3615797460079193, - "step": 215199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.063547611236572, - "loss_rtd": 0.21089325845241547, - "loss_sent": 0.07102958858013153, - "loss_sod": 0.047408923506736755, - "loss_total": 0.32933175563812256, - "step": 215199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.8162667155265808, - "learning_rate": 3.062451321500777e-05, - "loss": 0.4598, - "step": 215200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.6063737869262695, - "loss_rtd": 0.24424877762794495, - "loss_sent": 0.10496512800455093, - "loss_sod": 0.07444434612989426, - "loss_total": 0.42365825176239014, - "step": 215299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.617539405822754, - "loss_rtd": 0.24557539820671082, - "loss_sent": 0.09547129273414612, - "loss_sod": 0.023936942219734192, - "loss_total": 0.3649836480617523, - "step": 215299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.67758709192276, - "learning_rate": 3.059526331887938e-05, - "loss": 0.4583, - "step": 215300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.776050090789795, - "loss_rtd": 0.23236040771007538, - "loss_sent": 0.19112221896648407, - "loss_sod": 0.12273884564638138, - "loss_total": 0.5462214946746826, - "step": 215399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.921604156494141, - "loss_rtd": 0.25309666991233826, - "loss_sent": 0.376557856798172, - "loss_sod": 0.1523684859275818, - "loss_total": 0.782023012638092, - "step": 215399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.8483816385269165, - "learning_rate": 3.056602123897659e-05, - "loss": 0.4733, - "step": 215400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.675897598266602, - "loss_rtd": 0.2582680284976959, - "loss_sent": 0.17964158952236176, - "loss_sod": 0.02270853891968727, - "loss_total": 0.46061813831329346, - "step": 215499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.974243640899658, - "loss_rtd": 0.2546817362308502, - "loss_sent": 0.05891652777791023, - "loss_sod": 0.15968403220176697, - "loss_total": 0.4732823073863983, - "step": 215499 - }, - { - "epoch": 0.015, - "grad_norm": 0.9689565896987915, - "learning_rate": 3.0536786987078146e-05, - "loss": 0.4789, - "step": 215500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.560499668121338, - "loss_rtd": 0.24256254732608795, - "loss_sent": 0.14838965237140656, - "loss_sod": 0.04218659549951553, - "loss_total": 0.43313878774642944, - "step": 215599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.735309600830078, - "loss_rtd": 0.23040291666984558, - "loss_sent": 0.1612355262041092, - "loss_sod": 0.13364312052726746, - "loss_total": 0.525281548500061, - "step": 215599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.5442880392074585, - "learning_rate": 3.050756057495957e-05, - "loss": 0.4799, - "step": 215600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.064610958099365, - "loss_rtd": 0.22517243027687073, - "loss_sent": 0.057054758071899414, - "loss_sod": 0.07848721742630005, - "loss_total": 0.3607144057750702, - "step": 215699 - }, - { - "epoch": 0.015398, - "loss_gen": 4.957058906555176, - "loss_rtd": 0.20025552809238434, - "loss_sent": 0.0676516443490982, - "loss_sod": 0.05821962654590607, - "loss_total": 0.3261268138885498, - "step": 215699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.7563294768333435, - "learning_rate": 3.0478342014393245e-05, - "loss": 0.473, - "step": 215700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.815528869628906, - "loss_rtd": 0.24377046525478363, - "loss_sent": 0.13200443983078003, - "loss_sod": 0.08974200487136841, - "loss_total": 0.46551692485809326, - "step": 215799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.621831893920898, - "loss_rtd": 0.24012672901153564, - "loss_sent": 0.1662404090166092, - "loss_sod": 0.004909290932118893, - "loss_total": 0.4112764298915863, - "step": 215799 - }, - { - "epoch": 0.0156, - "grad_norm": 2.114844560623169, - "learning_rate": 3.044913131714844e-05, - "loss": 0.4698, - "step": 215800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.884778022766113, - "loss_rtd": 0.23560364544391632, - "loss_sent": 0.07110671699047089, - "loss_sod": 0.13425633311271667, - "loss_total": 0.44096672534942627, - "step": 215899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.541790008544922, - "loss_rtd": 0.2510862648487091, - "loss_sent": 0.2857402563095093, - "loss_sod": 0.030730176717042923, - "loss_total": 0.5675567388534546, - "step": 215899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.1384119987487793, - "learning_rate": 3.041992849499119e-05, - "loss": 0.4574, - "step": 215900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.798852443695068, - "loss_rtd": 0.25009506940841675, - "loss_sent": 0.23937390744686127, - "loss_sod": 0.01792096719145775, - "loss_total": 0.5073899626731873, - "step": 215999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.422009468078613, - "loss_rtd": 0.23141589760780334, - "loss_sent": 0.04138416051864624, - "loss_sod": 0.0043543362990021706, - "loss_total": 0.2771543860435486, - "step": 215999 - }, - { - "epoch": 0.016, - "grad_norm": 0.6650428175926208, - "learning_rate": 3.0390733559684413e-05, - "loss": 0.4648, - "step": 216000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4469378590583801, - "eval_runtime": 151.9435, - "eval_samples_per_second": 101.636, - "eval_steps_per_second": 0.796, - "step": 216000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.508726596832275, - "loss_rtd": 0.24046586453914642, - "loss_sent": 0.10226985067129135, - "loss_sod": 0.054271720349788666, - "loss_total": 0.39700743556022644, - "step": 216099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.240207672119141, - "loss_rtd": 0.24352088570594788, - "loss_sent": 6.022010711603798e-05, - "loss_sod": 0.11144635826349258, - "loss_total": 0.3550274670124054, - "step": 216099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.9859812259674072, - "learning_rate": 3.0361546522987804e-05, - "loss": 0.4586, - "step": 216100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.6960859298706055, - "loss_rtd": 0.2316541224718094, - "loss_sent": 0.08846792578697205, - "loss_sod": 0.04236774891614914, - "loss_total": 0.36248978972435, - "step": 216199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.228146076202393, - "loss_rtd": 0.24127553403377533, - "loss_sent": 8.796445763437077e-05, - "loss_sod": 0.17378482222557068, - "loss_total": 0.4151483178138733, - "step": 216199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.9168807864189148, - "learning_rate": 3.0332367396657914e-05, - "loss": 0.478, - "step": 216200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.7294535636901855, - "loss_rtd": 0.24016699194908142, - "loss_sent": 0.22297443449497223, - "loss_sod": 0.038633398711681366, - "loss_total": 0.5017748475074768, - "step": 216299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.633352279663086, - "loss_rtd": 0.22915062308311462, - "loss_sent": 0.2604297697544098, - "loss_sod": 0.09406111389398575, - "loss_total": 0.583641529083252, - "step": 216299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8506834506988525, - "learning_rate": 3.0303196192448075e-05, - "loss": 0.4725, - "step": 216300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.817175388336182, - "loss_rtd": 0.24774198234081268, - "loss_sent": 0.08555918186903, - "loss_sod": 0.04189221188426018, - "loss_total": 0.37519338726997375, - "step": 216399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.58889627456665, - "loss_rtd": 0.257211297750473, - "loss_sent": 0.40883737802505493, - "loss_sod": 0.029644910246133804, - "loss_total": 0.6956936120986938, - "step": 216399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.9701164364814758, - "learning_rate": 3.0274032922108465e-05, - "loss": 0.4728, - "step": 216400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.23891544342041, - "loss_rtd": 0.23464882373809814, - "loss_sent": 0.029688643291592598, - "loss_sod": 0.08234697580337524, - "loss_total": 0.34668445587158203, - "step": 216499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.186557292938232, - "loss_rtd": 0.22171097993850708, - "loss_sent": 0.0829959437251091, - "loss_sod": 0.014110023155808449, - "loss_total": 0.3188169598579407, - "step": 216499 - }, - { - "epoch": 0.017, - "grad_norm": 0.6848690509796143, - "learning_rate": 3.024487759738602e-05, - "loss": 0.4425, - "step": 216500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.832528591156006, - "loss_rtd": 0.23400872945785522, - "loss_sent": 0.061081450432538986, - "loss_sod": 0.055188536643981934, - "loss_total": 0.35027870535850525, - "step": 216599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.990908622741699, - "loss_rtd": 0.25657305121421814, - "loss_sent": 0.15799954533576965, - "loss_sod": 0.04473128914833069, - "loss_total": 0.4593038856983185, - "step": 216599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.826411783695221, - "learning_rate": 3.021573023002453e-05, - "loss": 0.449, - "step": 216600 - }, - { - "epoch": 0.017398, - "loss_gen": 6.292687892913818, - "loss_rtd": 0.24653609097003937, - "loss_sent": 0.1989891529083252, - "loss_sod": 0.1252911388874054, - "loss_total": 0.5708163976669312, - "step": 216699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.649014472961426, - "loss_rtd": 0.24311646819114685, - "loss_sent": 0.10323931276798248, - "loss_sod": 0.03465801477432251, - "loss_total": 0.38101381063461304, - "step": 216699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.5302157402038574, - "learning_rate": 3.0186590831764534e-05, - "loss": 0.467, - "step": 216700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.707921981811523, - "loss_rtd": 0.2469438761472702, - "loss_sent": 0.4266524314880371, - "loss_sod": 0.0603007897734642, - "loss_total": 0.7338970899581909, - "step": 216799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.979853630065918, - "loss_rtd": 0.25102096796035767, - "loss_sent": 0.1520538479089737, - "loss_sod": 0.01745704561471939, - "loss_total": 0.42053186893463135, - "step": 216799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.536612629890442, - "learning_rate": 3.015745941434338e-05, - "loss": 0.4784, - "step": 216800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.374979019165039, - "loss_rtd": 0.21934618055820465, - "loss_sent": 0.08120442926883698, - "loss_sod": 0.0021703036036342382, - "loss_total": 0.30272090435028076, - "step": 216899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.738278865814209, - "loss_rtd": 0.24041594564914703, - "loss_sent": 0.19931069016456604, - "loss_sod": 0.009540720842778683, - "loss_total": 0.44926735758781433, - "step": 216899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.6462528705596924, - "learning_rate": 3.0128335989495205e-05, - "loss": 0.4729, - "step": 216900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.295807838439941, - "loss_rtd": 0.21960827708244324, - "loss_sent": 0.11986997723579407, - "loss_sod": 0.11057955026626587, - "loss_total": 0.4500578045845032, - "step": 216999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.198234558105469, - "loss_rtd": 0.22012540698051453, - "loss_sent": 0.00014326247037388384, - "loss_sod": 0.13883469998836517, - "loss_total": 0.35910335183143616, - "step": 216999 - }, - { - "epoch": 0.018, - "grad_norm": 0.9709426760673523, - "learning_rate": 3.0099220568950915e-05, - "loss": 0.4694, - "step": 217000 - }, - { - "epoch": 0.018, - "eval_loss": 0.4433152377605438, - "eval_runtime": 152.6746, - "eval_samples_per_second": 101.15, - "eval_steps_per_second": 0.793, - "step": 217000 - }, - { - "epoch": 0.018198, - "loss_gen": 4.91729736328125, - "loss_rtd": 0.20390455424785614, - "loss_sent": 3.083745832554996e-05, - "loss_sod": 0.11846113204956055, - "loss_total": 0.32239654660224915, - "step": 217099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.067019939422607, - "loss_rtd": 0.2096415013074875, - "loss_sent": 0.00343311601318419, - "loss_sod": 0.10725976526737213, - "loss_total": 0.32033437490463257, - "step": 217099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.031674861907959, - "learning_rate": 3.0070113164438197e-05, - "loss": 0.4601, - "step": 217100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.6049909591674805, - "loss_rtd": 0.23252132534980774, - "loss_sent": 0.13186942040920258, - "loss_sod": 0.02871989831328392, - "loss_total": 0.39311063289642334, - "step": 217199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.454476356506348, - "loss_rtd": 0.22120122611522675, - "loss_sent": 0.17035096883773804, - "loss_sod": 0.028199102729558945, - "loss_total": 0.41975128650665283, - "step": 217199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.1801053285598755, - "learning_rate": 3.0041013787681516e-05, - "loss": 0.455, - "step": 217200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.490360260009766, - "loss_rtd": 0.24848321080207825, - "loss_sent": 0.18496976792812347, - "loss_sod": 0.03594896197319031, - "loss_total": 0.4694019556045532, - "step": 217299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.519717693328857, - "loss_rtd": 0.20764923095703125, - "loss_sent": 0.09661871194839478, - "loss_sod": 0.046689342707395554, - "loss_total": 0.3509572744369507, - "step": 217299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.180071234703064, - "learning_rate": 3.0011922450402096e-05, - "loss": 0.4556, - "step": 217300 - }, - { - "epoch": 0.018798, - "loss_gen": 6.0195794105529785, - "loss_rtd": 0.25551044940948486, - "loss_sent": 0.23748035728931427, - "loss_sod": 0.05200285464525223, - "loss_total": 0.5449936389923096, - "step": 217399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.733720302581787, - "loss_rtd": 0.23086370527744293, - "loss_sent": 0.08993496000766754, - "loss_sod": 0.019307363778352737, - "loss_total": 0.3401060104370117, - "step": 217399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.8362905979156494, - "learning_rate": 2.9982839164317917e-05, - "loss": 0.4708, - "step": 217400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.469005584716797, - "loss_rtd": 0.2607032060623169, - "loss_sent": 0.31628668308258057, - "loss_sod": 0.019380606710910797, - "loss_total": 0.5963704586029053, - "step": 217499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.559659481048584, - "loss_rtd": 0.25695350766181946, - "loss_sent": 0.27798107266426086, - "loss_sod": 0.09437217563390732, - "loss_total": 0.6293067932128906, - "step": 217499 - }, - { - "epoch": 0.019, - "grad_norm": 1.2468453645706177, - "learning_rate": 2.9953763941143735e-05, - "loss": 0.4776, - "step": 217500 - }, - { - "epoch": 0.019198, - "loss_gen": 4.993227005004883, - "loss_rtd": 0.22093051671981812, - "loss_sent": 3.197171099600382e-05, - "loss_sod": 0.10044729709625244, - "loss_total": 0.32140979170799255, - "step": 217599 - }, - { - "epoch": 0.019198, - "loss_gen": 4.977768898010254, - "loss_rtd": 0.22226789593696594, - "loss_sent": 3.39422476827167e-05, - "loss_sod": 0.10714846849441528, - "loss_total": 0.3294503092765808, - "step": 217599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.0557632446289062, - "learning_rate": 2.9924696792591034e-05, - "loss": 0.4656, - "step": 217600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.729475021362305, - "loss_rtd": 0.24771659076213837, - "loss_sent": 0.19890882074832916, - "loss_sod": 0.014270318672060966, - "loss_total": 0.46089571714401245, - "step": 217699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.104670524597168, - "loss_rtd": 0.2094206064939499, - "loss_sent": 0.010839566588401794, - "loss_sod": 0.17914612591266632, - "loss_total": 0.3994063138961792, - "step": 217699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.872069239616394, - "learning_rate": 2.989563773036807e-05, - "loss": 0.4609, - "step": 217700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.684610843658447, - "loss_rtd": 0.23152419924736023, - "loss_sent": 0.2436731457710266, - "loss_sod": 0.037578895688056946, - "loss_total": 0.512776255607605, - "step": 217799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.681646823883057, - "loss_rtd": 0.22867180407047272, - "loss_sent": 0.13524675369262695, - "loss_sod": 0.02578415721654892, - "loss_total": 0.389702707529068, - "step": 217799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.6499189138412476, - "learning_rate": 2.9866586766179815e-05, - "loss": 0.4668, - "step": 217800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.7797136306762695, - "loss_rtd": 0.22893859446048737, - "loss_sent": 0.08922241628170013, - "loss_sod": 0.049116350710392, - "loss_total": 0.3672773838043213, - "step": 217899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.348376750946045, - "loss_rtd": 0.22028933465480804, - "loss_sent": 0.009248141199350357, - "loss_sod": 0.09586643427610397, - "loss_total": 0.3254038989543915, - "step": 217899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.9610381722450256, - "learning_rate": 2.9837543911728017e-05, - "loss": 0.4883, - "step": 217900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.335222244262695, - "loss_rtd": 0.2449258416891098, - "loss_sent": 0.3830544054508209, - "loss_sod": 0.04035145789384842, - "loss_total": 0.6683317422866821, - "step": 217999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.649767875671387, - "loss_rtd": 0.2364692986011505, - "loss_sent": 0.21286125481128693, - "loss_sod": 0.0946025475859642, - "loss_total": 0.543933093547821, - "step": 217999 - }, - { - "epoch": 0.02, - "grad_norm": 1.0370113849639893, - "learning_rate": 2.980850917871111e-05, - "loss": 0.4561, - "step": 218000 - }, - { - "epoch": 0.02, - "eval_loss": 0.44908398389816284, - "eval_runtime": 154.2902, - "eval_samples_per_second": 100.091, - "eval_steps_per_second": 0.784, - "step": 218000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.748846530914307, - "loss_rtd": 0.25088027119636536, - "loss_sent": 0.7614478468894958, - "loss_sod": 0.08117273449897766, - "loss_total": 1.0935008525848389, - "step": 218099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.721073627471924, - "loss_rtd": 0.25364333391189575, - "loss_sent": 0.12141552567481995, - "loss_sod": 0.02959049493074417, - "loss_total": 0.4046493470668793, - "step": 218099 - }, - { - "epoch": 0.0202, - "grad_norm": 3.803954601287842, - "learning_rate": 2.977948257882429e-05, - "loss": 0.4768, - "step": 218100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.563726902008057, - "loss_rtd": 0.24683329463005066, - "loss_sent": 0.17280761897563934, - "loss_sod": 0.041086986660957336, - "loss_total": 0.4607279300689697, - "step": 218199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.701605796813965, - "loss_rtd": 0.24776841700077057, - "loss_sent": 0.22824575006961823, - "loss_sod": 0.09738124161958694, - "loss_total": 0.5733954310417175, - "step": 218199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.22172212600708, - "learning_rate": 2.975046412375947e-05, - "loss": 0.4874, - "step": 218200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.798618793487549, - "loss_rtd": 0.26276931166648865, - "loss_sent": 0.3364769220352173, - "loss_sod": 0.030972769483923912, - "loss_total": 0.6302189826965332, - "step": 218299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.930161476135254, - "loss_rtd": 0.2547016441822052, - "loss_sent": 0.22650833427906036, - "loss_sod": 0.15774983167648315, - "loss_total": 0.6389598250389099, - "step": 218299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.2376837730407715, - "learning_rate": 2.9721453825205286e-05, - "loss": 0.4583, - "step": 218300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.668736934661865, - "loss_rtd": 0.23904027044773102, - "loss_sent": 0.32486769556999207, - "loss_sod": 0.019937217235565186, - "loss_total": 0.5838451385498047, - "step": 218399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.828061103820801, - "loss_rtd": 0.22369907796382904, - "loss_sent": 0.18765747547149658, - "loss_sod": 0.023204054683446884, - "loss_total": 0.4345605969429016, - "step": 218399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8148056864738464, - "learning_rate": 2.969245169484709e-05, - "loss": 0.4571, - "step": 218400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.631268501281738, - "loss_rtd": 0.24726520478725433, - "loss_sent": 0.08655589818954468, - "loss_sod": 0.015350056812167168, - "loss_total": 0.34917116165161133, - "step": 218499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.873820781707764, - "loss_rtd": 0.24602143466472626, - "loss_sent": 0.1784927397966385, - "loss_sod": 0.039597295224666595, - "loss_total": 0.46411144733428955, - "step": 218499 - }, - { - "epoch": 0.021, - "grad_norm": 0.5884883403778076, - "learning_rate": 2.9663457744366896e-05, - "loss": 0.4745, - "step": 218500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.680235862731934, - "loss_rtd": 0.23177313804626465, - "loss_sent": 0.2758804261684418, - "loss_sod": 0.02068045549094677, - "loss_total": 0.5283340215682983, - "step": 218599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.674758434295654, - "loss_rtd": 0.23199044167995453, - "loss_sent": 0.010449514724314213, - "loss_sod": 0.14128607511520386, - "loss_total": 0.3837260603904724, - "step": 218599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.9589932560920715, - "learning_rate": 2.96344719854435e-05, - "loss": 0.4651, - "step": 218600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.597902297973633, - "loss_rtd": 0.23603561520576477, - "loss_sent": 0.2469548135995865, - "loss_sod": 0.05467989668250084, - "loss_total": 0.5376703143119812, - "step": 218699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.73699426651001, - "loss_rtd": 0.25086814165115356, - "loss_sent": 0.09403558075428009, - "loss_sod": 0.07857215404510498, - "loss_total": 0.42347586154937744, - "step": 218699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.5063176155090332, - "learning_rate": 2.960549442975236e-05, - "loss": 0.4792, - "step": 218700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.861974716186523, - "loss_rtd": 0.2633379101753235, - "loss_sent": 0.0994805097579956, - "loss_sod": 0.16226577758789062, - "loss_total": 0.5250841975212097, - "step": 218799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.505153179168701, - "loss_rtd": 0.24079662561416626, - "loss_sent": 0.1348225623369217, - "loss_sod": 0.04609474539756775, - "loss_total": 0.4217139482498169, - "step": 218799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.833949625492096, - "learning_rate": 2.957652508896561e-05, - "loss": 0.4548, - "step": 218800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.6427226066589355, - "loss_rtd": 0.24502190947532654, - "loss_sent": 0.05857028812170029, - "loss_sod": 0.025192689150571823, - "loss_total": 0.32878488302230835, - "step": 218899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.44520378112793, - "loss_rtd": 0.24716676771640778, - "loss_sent": 0.12982669472694397, - "loss_sod": 0.08698557317256927, - "loss_total": 0.463979035615921, - "step": 218899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.9194590449333191, - "learning_rate": 2.9547563974752123e-05, - "loss": 0.4728, - "step": 218900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.841884613037109, - "loss_rtd": 0.23181861639022827, - "loss_sent": 0.027558909729123116, - "loss_sod": 0.11836151778697968, - "loss_total": 0.3777390718460083, - "step": 218999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.55942440032959, - "loss_rtd": 0.2494911253452301, - "loss_sent": 0.27481240034103394, - "loss_sod": 0.06376910954713821, - "loss_total": 0.588072657585144, - "step": 218999 - }, - { - "epoch": 0.022, - "grad_norm": 1.2120897769927979, - "learning_rate": 2.9518611098777417e-05, - "loss": 0.4712, - "step": 219000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4352158010005951, - "eval_runtime": 152.1307, - "eval_samples_per_second": 101.511, - "eval_steps_per_second": 0.795, - "step": 219000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.482469081878662, - "loss_rtd": 0.22782304883003235, - "loss_sent": 0.2615703046321869, - "loss_sod": 0.032145604491233826, - "loss_total": 0.5215389728546143, - "step": 219099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.551905155181885, - "loss_rtd": 0.24674688279628754, - "loss_sent": 0.03384366258978844, - "loss_sod": 0.0707610696554184, - "loss_total": 0.35135161876678467, - "step": 219099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.7032610774040222, - "learning_rate": 2.9489666472703714e-05, - "loss": 0.4543, - "step": 219100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.787640571594238, - "loss_rtd": 0.2396640032529831, - "loss_sent": 0.3986879587173462, - "loss_sod": 0.1018485352396965, - "loss_total": 0.7402005195617676, - "step": 219199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.247024059295654, - "loss_rtd": 0.24130845069885254, - "loss_sent": 0.08370120078325272, - "loss_sod": 0.040330611169338226, - "loss_total": 0.3653402626514435, - "step": 219199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.0427066087722778, - "learning_rate": 2.9460730108189895e-05, - "loss": 0.4808, - "step": 219200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.795830249786377, - "loss_rtd": 0.24341337382793427, - "loss_sent": 0.07364793866872787, - "loss_sod": 0.0308077372610569, - "loss_total": 0.34786906838417053, - "step": 219299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.655640125274658, - "loss_rtd": 0.24741683900356293, - "loss_sent": 0.29452285170555115, - "loss_sod": 0.13073895871639252, - "loss_total": 0.6726786494255066, - "step": 219299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.4652475118637085, - "learning_rate": 2.943180201689154e-05, - "loss": 0.4561, - "step": 219300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.646549224853516, - "loss_rtd": 0.23614893853664398, - "loss_sent": 0.4527502655982971, - "loss_sod": 0.0017168745398521423, - "loss_total": 0.6906160712242126, - "step": 219399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.653653144836426, - "loss_rtd": 0.2556692957878113, - "loss_sent": 0.07633604109287262, - "loss_sod": 0.021083682775497437, - "loss_total": 0.35308903455734253, - "step": 219399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.9115483164787292, - "learning_rate": 2.940288221046087e-05, - "loss": 0.4611, - "step": 219400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.757050514221191, - "loss_rtd": 0.240886852145195, - "loss_sent": 0.09002269059419632, - "loss_sod": 0.015488551929593086, - "loss_total": 0.34639808535575867, - "step": 219499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.640953063964844, - "loss_rtd": 0.22654040157794952, - "loss_sent": 0.17745435237884521, - "loss_sod": 0.012498008087277412, - "loss_total": 0.416492760181427, - "step": 219499 - }, - { - "epoch": 0.023, - "grad_norm": 0.859093964099884, - "learning_rate": 2.9373970700546783e-05, - "loss": 0.4575, - "step": 219500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.375519752502441, - "loss_rtd": 0.21631726622581482, - "loss_sent": 0.04595290496945381, - "loss_sod": 0.06255972385406494, - "loss_total": 0.32482990622520447, - "step": 219599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.680385112762451, - "loss_rtd": 0.22726675868034363, - "loss_sent": 0.10205487906932831, - "loss_sod": 0.06129787489771843, - "loss_total": 0.39061951637268066, - "step": 219599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.7443668842315674, - "learning_rate": 2.9345067498794827e-05, - "loss": 0.4789, - "step": 219600 - }, - { - "epoch": 0.023398, - "loss_gen": 4.975682258605957, - "loss_rtd": 0.23272277414798737, - "loss_sent": 2.9425633329083212e-05, - "loss_sod": 0.18261456489562988, - "loss_total": 0.4153667390346527, - "step": 219699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.370975494384766, - "loss_rtd": 0.2152707278728485, - "loss_sent": 0.03215698525309563, - "loss_sod": 0.04176726192235947, - "loss_total": 0.2891949713230133, - "step": 219699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.7752969264984131, - "learning_rate": 2.9316172616847216e-05, - "loss": 0.4752, - "step": 219700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.661818981170654, - "loss_rtd": 0.2615804672241211, - "loss_sent": 0.3608298897743225, - "loss_sod": 0.018724482506513596, - "loss_total": 0.6411348581314087, - "step": 219799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.536441326141357, - "loss_rtd": 0.2420029491186142, - "loss_sent": 0.17182713747024536, - "loss_sod": 0.12364213168621063, - "loss_total": 0.5374722480773926, - "step": 219799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.6802945137023926, - "learning_rate": 2.92872860663428e-05, - "loss": 0.4949, - "step": 219800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.811741352081299, - "loss_rtd": 0.24488957226276398, - "loss_sent": 0.5066856741905212, - "loss_sod": 0.03712468221783638, - "loss_total": 0.7886999249458313, - "step": 219899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.2717061042785645, - "loss_rtd": 0.21546435356140137, - "loss_sent": 0.028289644047617912, - "loss_sod": 0.07698732614517212, - "loss_total": 0.32074132561683655, - "step": 219899 - }, - { - "epoch": 0.0238, - "grad_norm": 2.3763620853424072, - "learning_rate": 2.925840785891708e-05, - "loss": 0.453, - "step": 219900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.716613292694092, - "loss_rtd": 0.21320635080337524, - "loss_sent": 0.00033508040360175073, - "loss_sod": 0.1457812786102295, - "loss_total": 0.3593226969242096, - "step": 219999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.380373477935791, - "loss_rtd": 0.19935832917690277, - "loss_sent": 0.0003447000926826149, - "loss_sod": 0.29420581459999084, - "loss_total": 0.4939088225364685, - "step": 219999 - }, - { - "epoch": 0.024, - "grad_norm": 1.152647852897644, - "learning_rate": 2.9229538006202195e-05, - "loss": 0.4615, - "step": 220000 - }, - { - "epoch": 0.024, - "eval_loss": 0.44127845764160156, - "eval_runtime": 152.7968, - "eval_samples_per_second": 101.069, - "eval_steps_per_second": 0.792, - "step": 220000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.725831508636475, - "loss_rtd": 0.2538098990917206, - "loss_sent": 0.6349098086357117, - "loss_sod": 0.015414290130138397, - "loss_total": 0.9041340351104736, - "step": 220099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.566556453704834, - "loss_rtd": 0.246371790766716, - "loss_sent": 0.28911134600639343, - "loss_sod": 0.014819586649537086, - "loss_total": 0.5503027439117432, - "step": 220099 - }, - { - "epoch": 0.0242, - "grad_norm": 2.324589729309082, - "learning_rate": 2.920067651982692e-05, - "loss": 0.4619, - "step": 220100 - }, - { - "epoch": 0.024398, - "loss_gen": 4.885288715362549, - "loss_rtd": 0.20006316900253296, - "loss_sent": 0.00020059717644471675, - "loss_sod": 0.133328378200531, - "loss_total": 0.33359214663505554, - "step": 220199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.525906085968018, - "loss_rtd": 0.2554265558719635, - "loss_sent": 0.14584718644618988, - "loss_sod": 0.056197769939899445, - "loss_total": 0.45747148990631104, - "step": 220199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9548836350440979, - "learning_rate": 2.917182341141665e-05, - "loss": 0.4787, - "step": 220200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.831428050994873, - "loss_rtd": 0.22891171276569366, - "loss_sent": 0.16352984309196472, - "loss_sod": 0.045935772359371185, - "loss_total": 0.438377320766449, - "step": 220299 - }, - { - "epoch": 0.024598, - "loss_gen": 4.988731384277344, - "loss_rtd": 0.21196728944778442, - "loss_sent": 0.07420089095830917, - "loss_sod": 0.03989127650856972, - "loss_total": 0.3260594606399536, - "step": 220299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.7816998362541199, - "learning_rate": 2.9142978692593435e-05, - "loss": 0.4558, - "step": 220300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.981966018676758, - "loss_rtd": 0.23611585795879364, - "loss_sent": 0.0951041504740715, - "loss_sod": 0.04287397116422653, - "loss_total": 0.3740939795970917, - "step": 220399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.942696571350098, - "loss_rtd": 0.25359106063842773, - "loss_sent": 0.12361510097980499, - "loss_sod": 0.20155510306358337, - "loss_total": 0.5787612795829773, - "step": 220399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.8744722604751587, - "learning_rate": 2.911414237497589e-05, - "loss": 0.4598, - "step": 220400 - }, - { - "epoch": 0.024998, - "loss_gen": 6.028652191162109, - "loss_rtd": 0.24679096043109894, - "loss_sent": 0.15192911028862, - "loss_sod": 0.032262980937957764, - "loss_total": 0.4309830367565155, - "step": 220499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.859983444213867, - "loss_rtd": 0.22552786767482758, - "loss_sent": 0.21342353522777557, - "loss_sod": 0.05349688231945038, - "loss_total": 0.49244827032089233, - "step": 220499 - }, - { - "epoch": 0.025, - "grad_norm": 1.909841537475586, - "learning_rate": 2.9085314470179298e-05, - "loss": 0.4581, - "step": 220500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.959765434265137, - "loss_rtd": 0.248510479927063, - "loss_sent": 0.3497915267944336, - "loss_sod": 0.04938121140003204, - "loss_total": 0.6476832032203674, - "step": 220599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.7115397453308105, - "loss_rtd": 0.24150577187538147, - "loss_sent": 0.1365508884191513, - "loss_sod": 0.11778974533081055, - "loss_total": 0.49584639072418213, - "step": 220599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.3502163887023926, - "learning_rate": 2.9056494989815535e-05, - "loss": 0.4622, - "step": 220600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.561402797698975, - "loss_rtd": 0.24348825216293335, - "loss_sent": 0.2194337546825409, - "loss_sod": 0.006533219013363123, - "loss_total": 0.46945521235466003, - "step": 220699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.872133255004883, - "loss_rtd": 0.2291983813047409, - "loss_sent": 0.12904326617717743, - "loss_sod": 0.018771955743432045, - "loss_total": 0.377013623714447, - "step": 220699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8810868263244629, - "learning_rate": 2.902768394549311e-05, - "loss": 0.4449, - "step": 220700 - }, - { - "epoch": 0.025598, - "loss_gen": 6.152884483337402, - "loss_rtd": 0.24259310960769653, - "loss_sent": 0.03288838267326355, - "loss_sod": 0.3722909092903137, - "loss_total": 0.6477724313735962, - "step": 220799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.881531715393066, - "loss_rtd": 0.22413040697574615, - "loss_sent": 0.21983295679092407, - "loss_sod": 0.02162952907383442, - "loss_total": 0.4655928909778595, - "step": 220799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.0890847444534302, - "learning_rate": 2.8998881348817057e-05, - "loss": 0.4836, - "step": 220800 - }, - { - "epoch": 0.025798, - "loss_gen": 6.111624240875244, - "loss_rtd": 0.2526547312736511, - "loss_sent": 0.07042557746171951, - "loss_sod": 0.07130489498376846, - "loss_total": 0.3943851888179779, - "step": 220899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.344731330871582, - "loss_rtd": 0.23291535675525665, - "loss_sent": 0.10674972087144852, - "loss_sod": 0.020333917811512947, - "loss_total": 0.35999900102615356, - "step": 220899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.694638192653656, - "learning_rate": 2.897008721138909e-05, - "loss": 0.4656, - "step": 220900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.454096794128418, - "loss_rtd": 0.23449811339378357, - "loss_sent": 0.0007230049232020974, - "loss_sod": 0.15796786546707153, - "loss_total": 0.3931889832019806, - "step": 220999 - }, - { - "epoch": 0.025998, - "loss_gen": 4.9117350578308105, - "loss_rtd": 0.20750699937343597, - "loss_sent": 0.00227547250688076, - "loss_sod": 0.05858577787876129, - "loss_total": 0.2683682441711426, - "step": 220999 - }, - { - "epoch": 0.026, - "grad_norm": 1.0255084037780762, - "learning_rate": 2.894130154480748e-05, - "loss": 0.4585, - "step": 221000 - }, - { - "epoch": 0.026, - "eval_loss": 0.4414041340351105, - "eval_runtime": 151.9668, - "eval_samples_per_second": 101.621, - "eval_steps_per_second": 0.796, - "step": 221000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.129405975341797, - "loss_rtd": 0.2178030163049698, - "loss_sent": 0.12021706998348236, - "loss_sod": 0.10975439846515656, - "loss_total": 0.4477744698524475, - "step": 221099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.916364669799805, - "loss_rtd": 0.22973603010177612, - "loss_sent": 0.03431417793035507, - "loss_sod": 0.03355777636170387, - "loss_total": 0.29760798811912537, - "step": 221099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9078941345214844, - "learning_rate": 2.8912524360667104e-05, - "loss": 0.4424, - "step": 221100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.452906131744385, - "loss_rtd": 0.24439242482185364, - "loss_sent": 0.17114460468292236, - "loss_sod": 0.01148926094174385, - "loss_total": 0.42702630162239075, - "step": 221199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.052069187164307, - "loss_rtd": 0.2224331945180893, - "loss_sent": 2.770273022179026e-05, - "loss_sod": 0.10630861669778824, - "loss_total": 0.3287695348262787, - "step": 221199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.8697308897972107, - "learning_rate": 2.8883755670559382e-05, - "loss": 0.4663, - "step": 221200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.732661247253418, - "loss_rtd": 0.21897049248218536, - "loss_sent": 0.4717823565006256, - "loss_sod": 0.039517201483249664, - "loss_total": 0.7302700281143188, - "step": 221299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.809877872467041, - "loss_rtd": 0.20634424686431885, - "loss_sent": 0.3764127492904663, - "loss_sod": 0.06832806766033173, - "loss_total": 0.6510850787162781, - "step": 221299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.94827139377594, - "learning_rate": 2.8854995486072344e-05, - "loss": 0.4781, - "step": 221300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.611896991729736, - "loss_rtd": 0.22673384845256805, - "loss_sent": 0.25589561462402344, - "loss_sod": 0.09249468892812729, - "loss_total": 0.5751241445541382, - "step": 221399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.919143199920654, - "loss_rtd": 0.2417079657316208, - "loss_sent": 0.4991658329963684, - "loss_sod": 0.023417092859745026, - "loss_total": 0.7642909288406372, - "step": 221399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.8854585886001587, - "learning_rate": 2.8826243818790598e-05, - "loss": 0.4629, - "step": 221400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.547589302062988, - "loss_rtd": 0.23697513341903687, - "loss_sent": 0.05889887735247612, - "loss_sod": 0.1516200751066208, - "loss_total": 0.4474940896034241, - "step": 221499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.313899040222168, - "loss_rtd": 0.22380518913269043, - "loss_sent": 0.04938521981239319, - "loss_sod": 0.10741880536079407, - "loss_total": 0.3806092143058777, - "step": 221499 - }, - { - "epoch": 0.027, - "grad_norm": 1.0613423585891724, - "learning_rate": 2.879750068029533e-05, - "loss": 0.4561, - "step": 221500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.850945472717285, - "loss_rtd": 0.25582897663116455, - "loss_sent": 0.11795809864997864, - "loss_sod": 0.037380725145339966, - "loss_total": 0.41116780042648315, - "step": 221599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.520864009857178, - "loss_rtd": 0.22643856704235077, - "loss_sent": 0.07080437242984772, - "loss_sod": 0.03870118409395218, - "loss_total": 0.33594411611557007, - "step": 221599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.8865393996238708, - "learning_rate": 2.8768766082164234e-05, - "loss": 0.4598, - "step": 221600 - }, - { - "epoch": 0.027398, - "loss_gen": 4.99382209777832, - "loss_rtd": 0.2131546288728714, - "loss_sent": 0.04601503536105156, - "loss_sod": 0.015454077161848545, - "loss_total": 0.2746237516403198, - "step": 221699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.551748752593994, - "loss_rtd": 0.22426831722259521, - "loss_sent": 0.09755738079547882, - "loss_sod": 0.07161170989274979, - "loss_total": 0.39343738555908203, - "step": 221699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.6296889185905457, - "learning_rate": 2.8740040035971614e-05, - "loss": 0.4535, - "step": 221700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.719409465789795, - "loss_rtd": 0.24622197449207306, - "loss_sent": 0.1148734763264656, - "loss_sod": 0.007248458918184042, - "loss_total": 0.36834388971328735, - "step": 221799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.579176425933838, - "loss_rtd": 0.24331387877464294, - "loss_sent": 0.04704314470291138, - "loss_sod": 0.04200097545981407, - "loss_total": 0.3323580026626587, - "step": 221799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.6344034075737, - "learning_rate": 2.8711322553288356e-05, - "loss": 0.474, - "step": 221800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.555872917175293, - "loss_rtd": 0.24041315913200378, - "loss_sent": 0.21580737829208374, - "loss_sod": 0.09671332687139511, - "loss_total": 0.5529338717460632, - "step": 221899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.872961521148682, - "loss_rtd": 0.23050495982170105, - "loss_sent": 0.28679120540618896, - "loss_sod": 0.15477249026298523, - "loss_total": 0.6720686554908752, - "step": 221899 - }, - { - "epoch": 0.0278, - "grad_norm": 2.2413880825042725, - "learning_rate": 2.86826136456818e-05, - "loss": 0.4668, - "step": 221900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.610729694366455, - "loss_rtd": 0.23461250960826874, - "loss_sent": 0.16489671170711517, - "loss_sod": 0.06333743035793304, - "loss_total": 0.46284663677215576, - "step": 221999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.9616851806640625, - "loss_rtd": 0.25797396898269653, - "loss_sent": 0.2884034514427185, - "loss_sod": 0.09543274343013763, - "loss_total": 0.6418101787567139, - "step": 221999 - }, - { - "epoch": 0.028, - "grad_norm": 1.0590890645980835, - "learning_rate": 2.865391332471592e-05, - "loss": 0.4597, - "step": 222000 - }, - { - "epoch": 0.028, - "eval_loss": 0.43902838230133057, - "eval_runtime": 151.969, - "eval_samples_per_second": 101.619, - "eval_steps_per_second": 0.796, - "step": 222000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.0284013748168945, - "loss_rtd": 0.23429106175899506, - "loss_sent": 2.5784262106753886e-05, - "loss_sod": 0.10376732796430588, - "loss_total": 0.3380841612815857, - "step": 222099 - }, - { - "epoch": 0.028198, - "loss_gen": 4.849533557891846, - "loss_rtd": 0.2006891965866089, - "loss_sent": 0.04447287321090698, - "loss_sod": 0.03498782962560654, - "loss_total": 0.280149906873703, - "step": 222099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.6721543073654175, - "learning_rate": 2.8625221601951203e-05, - "loss": 0.4444, - "step": 222100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.572941780090332, - "loss_rtd": 0.2390994429588318, - "loss_sent": 0.08765707910060883, - "loss_sod": 0.08156536519527435, - "loss_total": 0.40832191705703735, - "step": 222199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.066463947296143, - "loss_rtd": 0.20003828406333923, - "loss_sent": 0.0006903017056174576, - "loss_sod": 0.06834739446640015, - "loss_total": 0.26907598972320557, - "step": 222199 - }, - { - "epoch": 0.0284, - "grad_norm": 0.6420572400093079, - "learning_rate": 2.859653848894468e-05, - "loss": 0.466, - "step": 222200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.858935832977295, - "loss_rtd": 0.23716403543949127, - "loss_sent": 0.1687023937702179, - "loss_sod": 0.1139901876449585, - "loss_total": 0.5198565721511841, - "step": 222299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.699175834655762, - "loss_rtd": 0.25139546394348145, - "loss_sent": 0.182742640376091, - "loss_sod": 0.06551109999418259, - "loss_total": 0.4996492266654968, - "step": 222299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.1013721227645874, - "learning_rate": 2.8567863997249877e-05, - "loss": 0.4816, - "step": 222300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.0963897705078125, - "loss_rtd": 0.2102944552898407, - "loss_sent": 0.024991141632199287, - "loss_sod": 0.089943528175354, - "loss_total": 0.32522913813591003, - "step": 222399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.822452545166016, - "loss_rtd": 0.21162298321723938, - "loss_sent": 0.17472924292087555, - "loss_sod": 0.0625510960817337, - "loss_total": 0.44890332221984863, - "step": 222399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.0727407932281494, - "learning_rate": 2.85391981384169e-05, - "loss": 0.4592, - "step": 222400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.756165981292725, - "loss_rtd": 0.24165068566799164, - "loss_sent": 0.25815513730049133, - "loss_sod": 0.01099803950637579, - "loss_total": 0.5108038187026978, - "step": 222499 - }, - { - "epoch": 0.028998, - "loss_gen": 6.250514030456543, - "loss_rtd": 0.2295823097229004, - "loss_sent": 0.08269616961479187, - "loss_sod": 0.059744883328676224, - "loss_total": 0.3720233738422394, - "step": 222499 - }, - { - "epoch": 0.029, - "grad_norm": 1.047006368637085, - "learning_rate": 2.8510540923992342e-05, - "loss": 0.4624, - "step": 222500 - }, - { - "epoch": 0.029198, - "loss_gen": 6.0060272216796875, - "loss_rtd": 0.2313002347946167, - "loss_sent": 0.12705208361148834, - "loss_sod": 0.0676863044500351, - "loss_total": 0.42603862285614014, - "step": 222599 - }, - { - "epoch": 0.029198, - "loss_gen": 6.095234394073486, - "loss_rtd": 0.25022509694099426, - "loss_sent": 0.16193456947803497, - "loss_sod": 0.050696954131126404, - "loss_total": 0.46285659074783325, - "step": 222599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.7585253715515137, - "learning_rate": 2.8481892365519346e-05, - "loss": 0.461, - "step": 222600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.729217529296875, - "loss_rtd": 0.20401251316070557, - "loss_sent": 0.0006949233938939869, - "loss_sod": 0.015092398971319199, - "loss_total": 0.21979984641075134, - "step": 222699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.775670528411865, - "loss_rtd": 0.24301104247570038, - "loss_sent": 0.37751805782318115, - "loss_sod": 0.06195702776312828, - "loss_total": 0.6824861168861389, - "step": 222699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.2970914840698242, - "learning_rate": 2.8453252474537516e-05, - "loss": 0.4537, - "step": 222700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.738706588745117, - "loss_rtd": 0.21717077493667603, - "loss_sent": 0.5664131045341492, - "loss_sod": 0.03647429123520851, - "loss_total": 0.8200581669807434, - "step": 222799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.725508213043213, - "loss_rtd": 0.2441895753145218, - "loss_sent": 0.3335700035095215, - "loss_sod": 0.1268690526485443, - "loss_total": 0.7046286463737488, - "step": 222799 - }, - { - "epoch": 0.0296, - "grad_norm": 3.333723783493042, - "learning_rate": 2.842462126258302e-05, - "loss": 0.4587, - "step": 222800 - }, - { - "epoch": 0.029798, - "loss_gen": 6.071145534515381, - "loss_rtd": 0.2494257241487503, - "loss_sent": 0.10561519861221313, - "loss_sod": 0.05293821170926094, - "loss_total": 0.4079791307449341, - "step": 222899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.638516902923584, - "loss_rtd": 0.22418633103370667, - "loss_sent": 0.595332145690918, - "loss_sod": 0.08956931531429291, - "loss_total": 0.9090877771377563, - "step": 222899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.1928447484970093, - "learning_rate": 2.839599874118849e-05, - "loss": 0.4661, - "step": 222900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.490230083465576, - "loss_rtd": 0.2168295681476593, - "loss_sent": 0.18578708171844482, - "loss_sod": 0.021466167643666267, - "loss_total": 0.42408281564712524, - "step": 222999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.702321529388428, - "loss_rtd": 0.21663211286067963, - "loss_sent": 0.21463170647621155, - "loss_sod": 0.10396278649568558, - "loss_total": 0.535226583480835, - "step": 222999 - }, - { - "epoch": 0.03, - "grad_norm": 1.1903234720230103, - "learning_rate": 2.83673849218831e-05, - "loss": 0.4731, - "step": 223000 - }, - { - "epoch": 0.03, - "eval_loss": 0.43680015206336975, - "eval_runtime": 152.7027, - "eval_samples_per_second": 101.131, - "eval_steps_per_second": 0.792, - "step": 223000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.65090799331665, - "loss_rtd": 0.2267831563949585, - "loss_sent": 0.06457176059484482, - "loss_sod": 0.030351784080266953, - "loss_total": 0.32170671224594116, - "step": 223099 - }, - { - "epoch": 0.030198, - "loss_gen": 4.975265979766846, - "loss_rtd": 0.1984621286392212, - "loss_sent": 0.023878054693341255, - "loss_sod": 0.0772026777267456, - "loss_total": 0.2995428740978241, - "step": 223099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.8430375456809998, - "learning_rate": 2.8338779816192464e-05, - "loss": 0.4688, - "step": 223100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.6817626953125, - "loss_rtd": 0.23664012551307678, - "loss_sent": 0.4942237436771393, - "loss_sod": 0.03030521608889103, - "loss_total": 0.7611690759658813, - "step": 223199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.505131721496582, - "loss_rtd": 0.2329002320766449, - "loss_sent": 0.1338961273431778, - "loss_sod": 0.04271288588643074, - "loss_total": 0.40950924158096313, - "step": 223199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.7884716987609863, - "learning_rate": 2.8310183435638727e-05, - "loss": 0.4496, - "step": 223200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.743865013122559, - "loss_rtd": 0.23399142920970917, - "loss_sent": 0.5928657650947571, - "loss_sod": 0.07470916211605072, - "loss_total": 0.9015663862228394, - "step": 223299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.657670021057129, - "loss_rtd": 0.24267837405204773, - "loss_sent": 0.0339723564684391, - "loss_sod": 0.12458410114049911, - "loss_total": 0.40123483538627625, - "step": 223299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.2713204622268677, - "learning_rate": 2.82815957917405e-05, - "loss": 0.4807, - "step": 223300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.44834041595459, - "loss_rtd": 0.22362852096557617, - "loss_sent": 0.1513085812330246, - "loss_sod": 0.03652561083436012, - "loss_total": 0.4114627242088318, - "step": 223399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.5224151611328125, - "loss_rtd": 0.24534864723682404, - "loss_sent": 0.10560718923807144, - "loss_sod": 0.10585036873817444, - "loss_total": 0.4568062126636505, - "step": 223399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.9982521533966064, - "learning_rate": 2.8253016896012918e-05, - "loss": 0.4798, - "step": 223400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.6275129318237305, - "loss_rtd": 0.2267647683620453, - "loss_sent": 0.2952210009098053, - "loss_sod": 0.12058262526988983, - "loss_total": 0.6425683498382568, - "step": 223499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.795315265655518, - "loss_rtd": 0.2462725043296814, - "loss_sent": 0.17645324766635895, - "loss_sod": 0.011587779968976974, - "loss_total": 0.4343135356903076, - "step": 223499 - }, - { - "epoch": 0.031, - "grad_norm": 0.9927622675895691, - "learning_rate": 2.822444675996751e-05, - "loss": 0.4525, - "step": 223500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.699234485626221, - "loss_rtd": 0.22923468053340912, - "loss_sent": 0.2931504249572754, - "loss_sod": 0.027988888323307037, - "loss_total": 0.5503740310668945, - "step": 223599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.68513298034668, - "loss_rtd": 0.23517471551895142, - "loss_sent": 0.19641363620758057, - "loss_sod": 0.024201134219765663, - "loss_total": 0.4557894766330719, - "step": 223599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.7236945629119873, - "learning_rate": 2.8195885395112343e-05, - "loss": 0.4786, - "step": 223600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.589235305786133, - "loss_rtd": 0.23837798833847046, - "loss_sent": 0.2533192038536072, - "loss_sod": 0.09866724908351898, - "loss_total": 0.5903644561767578, - "step": 223699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.589600563049316, - "loss_rtd": 0.24535444378852844, - "loss_sent": 0.18812699615955353, - "loss_sod": 0.043122511357069016, - "loss_total": 0.4766039252281189, - "step": 223699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.8318504691123962, - "learning_rate": 2.816733281295195e-05, - "loss": 0.4659, - "step": 223700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.715664863586426, - "loss_rtd": 0.24356691539287567, - "loss_sent": 0.2261410802602768, - "loss_sod": 0.04271980747580528, - "loss_total": 0.512427806854248, - "step": 223799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.816354751586914, - "loss_rtd": 0.2568530738353729, - "loss_sent": 0.15311940014362335, - "loss_sod": 0.02241531014442444, - "loss_total": 0.4323877692222595, - "step": 223799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.9049277305603027, - "learning_rate": 2.8138789024987268e-05, - "loss": 0.4566, - "step": 223800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.5496673583984375, - "loss_rtd": 0.2470230907201767, - "loss_sent": 0.06891334056854248, - "loss_sod": 0.033637940883636475, - "loss_total": 0.34957438707351685, - "step": 223899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.751945972442627, - "loss_rtd": 0.23640091717243195, - "loss_sent": 0.331116646528244, - "loss_sod": 0.027254968881607056, - "loss_total": 0.5947725772857666, - "step": 223899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.6053645610809326, - "learning_rate": 2.8110254042715755e-05, - "loss": 0.4764, - "step": 223900 - }, - { - "epoch": 0.031998, - "loss_gen": 4.880710601806641, - "loss_rtd": 0.20072686672210693, - "loss_sent": 0.0037962477654218674, - "loss_sod": 0.05845049023628235, - "loss_total": 0.2629736065864563, - "step": 223999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.543124198913574, - "loss_rtd": 0.2402266263961792, - "loss_sent": 0.1997024565935135, - "loss_sod": 0.027427462860941887, - "loss_total": 0.4673565626144409, - "step": 223999 - }, - { - "epoch": 0.032, - "grad_norm": 0.7811095118522644, - "learning_rate": 2.808172787763129e-05, - "loss": 0.4638, - "step": 224000 - }, - { - "epoch": 0.032, - "eval_loss": 0.44494882225990295, - "eval_runtime": 152.2057, - "eval_samples_per_second": 101.461, - "eval_steps_per_second": 0.795, - "step": 224000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.569873809814453, - "loss_rtd": 0.22610104084014893, - "loss_sent": 0.2411336749792099, - "loss_sod": 0.002095532836392522, - "loss_total": 0.4693302512168884, - "step": 224099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.069498062133789, - "loss_rtd": 0.21059513092041016, - "loss_sent": 0.013547113165259361, - "loss_sod": 0.04915773868560791, - "loss_total": 0.2732999920845032, - "step": 224099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.0114434957504272, - "learning_rate": 2.805321054122424e-05, - "loss": 0.465, - "step": 224100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.417413234710693, - "loss_rtd": 0.24213016033172607, - "loss_sent": 0.24051572382450104, - "loss_sod": 0.051152754575014114, - "loss_total": 0.5337986350059509, - "step": 224199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.953667640686035, - "loss_rtd": 0.23942674696445465, - "loss_sent": 0.3299728035926819, - "loss_sod": 0.09776980429887772, - "loss_total": 0.6671693325042725, - "step": 224199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.6033778190612793, - "learning_rate": 2.8024702044981344e-05, - "loss": 0.4628, - "step": 224200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.387984752655029, - "loss_rtd": 0.2271862030029297, - "loss_sent": 0.10984363406896591, - "loss_sod": 0.03416328877210617, - "loss_total": 0.37119314074516296, - "step": 224299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.715670108795166, - "loss_rtd": 0.257066547870636, - "loss_sent": 0.16404929757118225, - "loss_sod": 0.06787629425525665, - "loss_total": 0.4889921247959137, - "step": 224299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.3464434146881104, - "learning_rate": 2.799620240038583e-05, - "loss": 0.4691, - "step": 224300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.593616008758545, - "loss_rtd": 0.2560146152973175, - "loss_sent": 0.27091580629348755, - "loss_sod": 0.001017265603877604, - "loss_total": 0.5279476642608643, - "step": 224399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.599079132080078, - "loss_rtd": 0.23365263640880585, - "loss_sent": 0.18014585971832275, - "loss_sod": 0.0022646132856607437, - "loss_total": 0.416063129901886, - "step": 224399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.9168931245803833, - "learning_rate": 2.796771161891736e-05, - "loss": 0.4508, - "step": 224400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.647077560424805, - "loss_rtd": 0.21314236521720886, - "loss_sent": 0.047262877225875854, - "loss_sod": 0.011531857773661613, - "loss_total": 0.2719371020793915, - "step": 224499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.37584924697876, - "loss_rtd": 0.2328566461801529, - "loss_sent": 0.00010984270920744166, - "loss_sod": 0.12468120455741882, - "loss_total": 0.35764771699905396, - "step": 224499 - }, - { - "epoch": 0.001, - "grad_norm": 0.8750359416007996, - "learning_rate": 2.7939229712052028e-05, - "loss": 0.4745, - "step": 224500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.534361839294434, - "loss_rtd": 0.227226123213768, - "loss_sent": 0.14308471977710724, - "loss_sod": 0.08522327244281769, - "loss_total": 0.45553410053253174, - "step": 224599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.645907402038574, - "loss_rtd": 0.23368482291698456, - "loss_sent": 0.16105742752552032, - "loss_sod": 0.06207191199064255, - "loss_total": 0.456814169883728, - "step": 224599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.4348220825195312, - "learning_rate": 2.7910756691262318e-05, - "loss": 0.4722, - "step": 224600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.638808250427246, - "loss_rtd": 0.23300603032112122, - "loss_sent": 0.20091569423675537, - "loss_sod": 0.08018143475055695, - "loss_total": 0.5141031742095947, - "step": 224699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.642586708068848, - "loss_rtd": 0.23759806156158447, - "loss_sent": 0.11396326124668121, - "loss_sod": 0.02553461492061615, - "loss_total": 0.37709593772888184, - "step": 224699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.8900947570800781, - "learning_rate": 2.7882292568017164e-05, - "loss": 0.4791, - "step": 224700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.708366870880127, - "loss_rtd": 0.2390284687280655, - "loss_sent": 0.4308047592639923, - "loss_sod": 0.05724124237895012, - "loss_total": 0.7270745038986206, - "step": 224799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.39437198638916, - "loss_rtd": 0.26441359519958496, - "loss_sent": 0.25949522852897644, - "loss_sod": 0.07557901740074158, - "loss_total": 0.599487841129303, - "step": 224799 - }, - { - "epoch": 0.0016, - "grad_norm": 2.3639681339263916, - "learning_rate": 2.785383735378193e-05, - "loss": 0.4634, - "step": 224800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.926591396331787, - "loss_rtd": 0.24465681612491608, - "loss_sent": 0.10229752212762833, - "loss_sod": 0.01764935441315174, - "loss_total": 0.3646036982536316, - "step": 224899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.601808547973633, - "loss_rtd": 0.23416711390018463, - "loss_sent": 0.11619371175765991, - "loss_sod": 0.02902691438794136, - "loss_total": 0.3793877363204956, - "step": 224899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.9203702211380005, - "learning_rate": 2.7825391060018368e-05, - "loss": 0.4805, - "step": 224900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.661350250244141, - "loss_rtd": 0.22943970561027527, - "loss_sent": 0.1653285026550293, - "loss_sod": 0.0028651938773691654, - "loss_total": 0.3976334035396576, - "step": 224999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.711034774780273, - "loss_rtd": 0.2437131106853485, - "loss_sent": 0.07447220385074615, - "loss_sod": 0.0342186838388443, - "loss_total": 0.35240399837493896, - "step": 224999 - }, - { - "epoch": 0.002, - "grad_norm": 0.6741482615470886, - "learning_rate": 2.7796953698184623e-05, - "loss": 0.4732, - "step": 225000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4311069846153259, - "eval_runtime": 154.154, - "eval_samples_per_second": 100.179, - "eval_steps_per_second": 0.785, - "step": 225000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.568168640136719, - "loss_rtd": 0.23405276238918304, - "loss_sent": 0.16396518051624298, - "loss_sod": 0.06602217257022858, - "loss_total": 0.4640401005744934, - "step": 225099 - }, - { - "epoch": 0.002198, - "loss_gen": 6.030465126037598, - "loss_rtd": 0.23309405148029327, - "loss_sent": 0.24881760776042938, - "loss_sod": 0.03785564377903938, - "loss_total": 0.5197672843933105, - "step": 225099 - }, - { - "epoch": 0.0022, - "grad_norm": 0.8508840203285217, - "learning_rate": 2.7768525279735276e-05, - "loss": 0.4629, - "step": 225100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.613748550415039, - "loss_rtd": 0.2495390772819519, - "loss_sent": 0.344651997089386, - "loss_sod": 0.05255025625228882, - "loss_total": 0.6467413306236267, - "step": 225199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.8420000076293945, - "loss_rtd": 0.24935126304626465, - "loss_sent": 0.15570217370986938, - "loss_sod": 0.022254379466176033, - "loss_total": 0.4273078143596649, - "step": 225199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.4552719593048096, - "learning_rate": 2.7740105816121302e-05, - "loss": 0.4833, - "step": 225200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.60266637802124, - "loss_rtd": 0.23874659836292267, - "loss_sent": 0.17598579823970795, - "loss_sod": 0.0027498353738337755, - "loss_total": 0.4174822270870209, - "step": 225299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.583761692047119, - "loss_rtd": 0.25399646162986755, - "loss_sent": 0.2547561526298523, - "loss_sod": 0.027600456029176712, - "loss_total": 0.5363531112670898, - "step": 225299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.054810881614685, - "learning_rate": 2.7711695318790025e-05, - "loss": 0.4494, - "step": 225300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.915513038635254, - "loss_rtd": 0.2317160665988922, - "loss_sent": 0.30354616045951843, - "loss_sod": 0.04480939358472824, - "loss_total": 0.5800716280937195, - "step": 225399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.447020530700684, - "loss_rtd": 0.23885852098464966, - "loss_sent": 0.18317927420139313, - "loss_sod": 0.09525464475154877, - "loss_total": 0.5172924399375916, - "step": 225399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.0005134344100952, - "learning_rate": 2.7683293799185205e-05, - "loss": 0.4468, - "step": 225400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.440904140472412, - "loss_rtd": 0.2239619344472885, - "loss_sent": 0.14300444722175598, - "loss_sod": 0.0059273578226566315, - "loss_total": 0.372893750667572, - "step": 225499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.547121524810791, - "loss_rtd": 0.23636527359485626, - "loss_sent": 0.31065142154693604, - "loss_sod": 0.0795908272266388, - "loss_total": 0.6266075372695923, - "step": 225499 - }, - { - "epoch": 0.003, - "grad_norm": 1.2681756019592285, - "learning_rate": 2.765490126874698e-05, - "loss": 0.44, - "step": 225500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.618387699127197, - "loss_rtd": 0.20254798233509064, - "loss_sent": 0.143963024020195, - "loss_sod": 0.07344525307416916, - "loss_total": 0.4199562668800354, - "step": 225599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.438107967376709, - "loss_rtd": 0.24515216052532196, - "loss_sent": 0.07458990067243576, - "loss_sod": 0.0852646604180336, - "loss_total": 0.4050067365169525, - "step": 225599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.8462616205215454, - "learning_rate": 2.7626517738911872e-05, - "loss": 0.4658, - "step": 225600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.462686538696289, - "loss_rtd": 0.23882129788398743, - "loss_sent": 0.2343187928199768, - "loss_sod": 0.04691701382398605, - "loss_total": 0.5200570821762085, - "step": 225699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.620092391967773, - "loss_rtd": 0.23274467885494232, - "loss_sent": 0.1371069848537445, - "loss_sod": 0.022337134927511215, - "loss_total": 0.39218878746032715, - "step": 225699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.7880659699440002, - "learning_rate": 2.7598143221112716e-05, - "loss": 0.4689, - "step": 225700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.799817085266113, - "loss_rtd": 0.2242693454027176, - "loss_sent": 0.06231445074081421, - "loss_sod": 0.01739829033613205, - "loss_total": 0.30398207902908325, - "step": 225799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.818936347961426, - "loss_rtd": 0.2236141562461853, - "loss_sent": 0.1971997618675232, - "loss_sod": 0.12863728404045105, - "loss_total": 0.5494512319564819, - "step": 225799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.2365944385528564, - "learning_rate": 2.75697777267788e-05, - "loss": 0.4662, - "step": 225800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.77978515625, - "loss_rtd": 0.23254472017288208, - "loss_sent": 0.06714111566543579, - "loss_sod": 0.02295173704624176, - "loss_total": 0.32263755798339844, - "step": 225899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.553442001342773, - "loss_rtd": 0.24642743170261383, - "loss_sent": 0.07728710025548935, - "loss_sod": 0.02444956637918949, - "loss_total": 0.3481641113758087, - "step": 225899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.9199406504631042, - "learning_rate": 2.7541421267335725e-05, - "loss": 0.4765, - "step": 225900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.896374702453613, - "loss_rtd": 0.24322231113910675, - "loss_sent": 0.30534428358078003, - "loss_sod": 0.06029155105352402, - "loss_total": 0.6088581085205078, - "step": 225999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.502675533294678, - "loss_rtd": 0.24224092066287994, - "loss_sent": 0.10783141106367111, - "loss_sod": 0.042063526809215546, - "loss_total": 0.3921358585357666, - "step": 225999 - }, - { - "epoch": 0.004, - "grad_norm": 1.3127118349075317, - "learning_rate": 2.7513073854205506e-05, - "loss": 0.47, - "step": 226000 - }, - { - "epoch": 0.004, - "eval_loss": 0.44110894203186035, - "eval_runtime": 150.8165, - "eval_samples_per_second": 102.396, - "eval_steps_per_second": 0.802, - "step": 226000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.643917083740234, - "loss_rtd": 0.24464350938796997, - "loss_sent": 0.16674335300922394, - "loss_sod": 0.021549327298998833, - "loss_total": 0.4329361915588379, - "step": 226099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.616582870483398, - "loss_rtd": 0.23230820894241333, - "loss_sent": 0.3552858531475067, - "loss_sod": 0.009614404290914536, - "loss_total": 0.5972084999084473, - "step": 226099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.6828440427780151, - "learning_rate": 2.7484735498806424e-05, - "loss": 0.467, - "step": 226100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.5748209953308105, - "loss_rtd": 0.2176102250814438, - "loss_sent": 0.06763680279254913, - "loss_sod": 0.013852190226316452, - "loss_total": 0.2990992069244385, - "step": 226199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.857899188995361, - "loss_rtd": 0.24582800269126892, - "loss_sent": 0.21359440684318542, - "loss_sod": 0.008314840495586395, - "loss_total": 0.46773725748062134, - "step": 226199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.6256184577941895, - "learning_rate": 2.7456406212553187e-05, - "loss": 0.4501, - "step": 226200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.72310733795166, - "loss_rtd": 0.23177634179592133, - "loss_sent": 0.1454453319311142, - "loss_sod": 0.05210348963737488, - "loss_total": 0.4293251633644104, - "step": 226299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.581999778747559, - "loss_rtd": 0.2323111593723297, - "loss_sent": 0.3031417429447174, - "loss_sod": 0.0475931242108345, - "loss_total": 0.583046019077301, - "step": 226299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.0376886129379272, - "learning_rate": 2.742808600685684e-05, - "loss": 0.4846, - "step": 226300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.795795917510986, - "loss_rtd": 0.24001413583755493, - "loss_sent": 0.16974866390228271, - "loss_sod": 0.04025164991617203, - "loss_total": 0.4500144422054291, - "step": 226399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.709367275238037, - "loss_rtd": 0.22698765993118286, - "loss_sent": 0.21126671135425568, - "loss_sod": 0.051406316459178925, - "loss_total": 0.48966068029403687, - "step": 226399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.9794959425926208, - "learning_rate": 2.7399774893124764e-05, - "loss": 0.4534, - "step": 226400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.647364616394043, - "loss_rtd": 0.2729233205318451, - "loss_sent": 0.2768864929676056, - "loss_sod": 0.04806673899292946, - "loss_total": 0.5978765487670898, - "step": 226499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.6918110847473145, - "loss_rtd": 0.2299945205450058, - "loss_sent": 0.2196802943944931, - "loss_sod": 0.07942471653223038, - "loss_total": 0.5290995240211487, - "step": 226499 - }, - { - "epoch": 0.005, - "grad_norm": 1.4714350700378418, - "learning_rate": 2.737147288276064e-05, - "loss": 0.449, - "step": 226500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.895983695983887, - "loss_rtd": 0.22006776928901672, - "loss_sent": 0.34701797366142273, - "loss_sod": 0.03792410343885422, - "loss_total": 0.6050098538398743, - "step": 226599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.722021579742432, - "loss_rtd": 0.2389376312494278, - "loss_sent": 0.09975945949554443, - "loss_sod": 0.09337402880191803, - "loss_total": 0.43207111954689026, - "step": 226599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.0441040992736816, - "learning_rate": 2.7343179987164535e-05, - "loss": 0.4641, - "step": 226600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.523041725158691, - "loss_rtd": 0.24761639535427094, - "loss_sent": 0.11091520637273788, - "loss_sod": 0.07680056989192963, - "loss_total": 0.43533217906951904, - "step": 226699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.493579387664795, - "loss_rtd": 0.22481165826320648, - "loss_sent": 0.35022062063217163, - "loss_sod": 0.00831974670290947, - "loss_total": 0.5833520293235779, - "step": 226699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.5081793069839478, - "learning_rate": 2.7314896217732845e-05, - "loss": 0.4534, - "step": 226700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.182254314422607, - "loss_rtd": 0.20410116016864777, - "loss_sent": 0.02833179570734501, - "loss_sod": 0.08729544281959534, - "loss_total": 0.31972840428352356, - "step": 226799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.498484134674072, - "loss_rtd": 0.24284105002880096, - "loss_sent": 0.16004407405853271, - "loss_sod": 0.021331269294023514, - "loss_total": 0.4242163896560669, - "step": 226799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.0090997219085693, - "learning_rate": 2.728662158585822e-05, - "loss": 0.4531, - "step": 226800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.486617565155029, - "loss_rtd": 0.23351794481277466, - "loss_sent": 0.17372311651706696, - "loss_sod": 0.017822682857513428, - "loss_total": 0.42506372928619385, - "step": 226899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.614632606506348, - "loss_rtd": 0.2119576781988144, - "loss_sent": 0.2688595652580261, - "loss_sod": 0.008249020203948021, - "loss_total": 0.4890662431716919, - "step": 226899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.02785062789917, - "learning_rate": 2.7258356102929715e-05, - "loss": 0.4585, - "step": 226900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.825535297393799, - "loss_rtd": 0.24155889451503754, - "loss_sent": 0.08116856962442398, - "loss_sod": 0.04631891846656799, - "loss_total": 0.3690463602542877, - "step": 226999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.491581916809082, - "loss_rtd": 0.23882105946540833, - "loss_sent": 0.23398028314113617, - "loss_sod": 0.003059498965740204, - "loss_total": 0.4758608341217041, - "step": 226999 - }, - { - "epoch": 0.006, - "grad_norm": 0.8642366528511047, - "learning_rate": 2.7230099780332646e-05, - "loss": 0.4496, - "step": 227000 - }, - { - "epoch": 0.006, - "eval_loss": 0.43922483921051025, - "eval_runtime": 150.8422, - "eval_samples_per_second": 102.379, - "eval_steps_per_second": 0.802, - "step": 227000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.555595874786377, - "loss_rtd": 0.22712871432304382, - "loss_sent": 0.27140527963638306, - "loss_sod": 0.04689645767211914, - "loss_total": 0.5454304218292236, - "step": 227099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.713353633880615, - "loss_rtd": 0.21562498807907104, - "loss_sent": 0.0435381643474102, - "loss_sod": 0.06617502123117447, - "loss_total": 0.3253381550312042, - "step": 227099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.3460780382156372, - "learning_rate": 2.720185262944866e-05, - "loss": 0.4588, - "step": 227100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.15493106842041, - "loss_rtd": 0.2117948830127716, - "loss_sent": 0.05631496384739876, - "loss_sod": 0.10051491856575012, - "loss_total": 0.3686247766017914, - "step": 227199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.011689186096191, - "loss_rtd": 0.19459405541419983, - "loss_sent": 4.475473542697728e-05, - "loss_sod": 0.050213322043418884, - "loss_total": 0.24485212564468384, - "step": 227199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.80864417552948, - "learning_rate": 2.7173614661655723e-05, - "loss": 0.4605, - "step": 227200 - }, - { - "epoch": 0.006598, - "loss_gen": 6.317282676696777, - "loss_rtd": 0.26973089575767517, - "loss_sent": 0.13002526760101318, - "loss_sod": 0.08195135742425919, - "loss_total": 0.48170751333236694, - "step": 227299 - }, - { - "epoch": 0.006598, - "loss_gen": 6.011381149291992, - "loss_rtd": 0.23621657490730286, - "loss_sent": 0.19482728838920593, - "loss_sod": 0.09912626445293427, - "loss_total": 0.5301700830459595, - "step": 227299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.8855258226394653, - "learning_rate": 2.7145385888328058e-05, - "loss": 0.4613, - "step": 227300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.7408270835876465, - "loss_rtd": 0.24157613515853882, - "loss_sent": 0.3369632959365845, - "loss_sod": 0.01694389060139656, - "loss_total": 0.5954833030700684, - "step": 227399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.954455852508545, - "loss_rtd": 0.25100675225257874, - "loss_sent": 0.12181692570447922, - "loss_sod": 0.00487709604203701, - "loss_total": 0.3777007758617401, - "step": 227399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.208707571029663, - "learning_rate": 2.7117166320836218e-05, - "loss": 0.4686, - "step": 227400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.6229143142700195, - "loss_rtd": 0.23978972434997559, - "loss_sent": 0.21991673111915588, - "loss_sod": 0.025293273851275444, - "loss_total": 0.48499971628189087, - "step": 227499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.387685298919678, - "loss_rtd": 0.22592461109161377, - "loss_sent": 0.12020763754844666, - "loss_sod": 0.05308495834469795, - "loss_total": 0.3992172181606293, - "step": 227499 - }, - { - "epoch": 0.007, - "grad_norm": 1.3162251710891724, - "learning_rate": 2.708895597054705e-05, - "loss": 0.4528, - "step": 227500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.599020004272461, - "loss_rtd": 0.21718180179595947, - "loss_sent": 0.26812273263931274, - "loss_sod": 0.006038974970579147, - "loss_total": 0.49134349822998047, - "step": 227599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.435451507568359, - "loss_rtd": 0.2204170674085617, - "loss_sent": 0.20077107846736908, - "loss_sod": 0.11586703360080719, - "loss_total": 0.5370551943778992, - "step": 227599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.3090988397598267, - "learning_rate": 2.7060754848823698e-05, - "loss": 0.432, - "step": 227600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.618217945098877, - "loss_rtd": 0.2524552643299103, - "loss_sent": 0.2500818073749542, - "loss_sod": 0.00473436014726758, - "loss_total": 0.507271409034729, - "step": 227699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.735246658325195, - "loss_rtd": 0.23610767722129822, - "loss_sent": 0.20814943313598633, - "loss_sod": 0.13017351925373077, - "loss_total": 0.5744306445121765, - "step": 227699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.1152544021606445, - "learning_rate": 2.703256296702553e-05, - "loss": 0.4549, - "step": 227700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.446821212768555, - "loss_rtd": 0.23186981678009033, - "loss_sent": 0.28947314620018005, - "loss_sod": 0.09214643388986588, - "loss_total": 0.6134893894195557, - "step": 227799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.690222263336182, - "loss_rtd": 0.23537524044513702, - "loss_sent": 0.2620966136455536, - "loss_sod": 0.05944913625717163, - "loss_total": 0.5569210052490234, - "step": 227799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.403546929359436, - "learning_rate": 2.700438033650825e-05, - "loss": 0.468, - "step": 227800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.735042095184326, - "loss_rtd": 0.24326685070991516, - "loss_sent": 0.4053393602371216, - "loss_sod": 0.06040772795677185, - "loss_total": 0.7090139389038086, - "step": 227899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.652089595794678, - "loss_rtd": 0.23130719363689423, - "loss_sent": 0.1046670451760292, - "loss_sod": 0.044817693531513214, - "loss_total": 0.38079193234443665, - "step": 227899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.8818656206130981, - "learning_rate": 2.697620696862382e-05, - "loss": 0.4564, - "step": 227900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.593469142913818, - "loss_rtd": 0.2454930990934372, - "loss_sent": 0.32072532176971436, - "loss_sod": 0.014206699095666409, - "loss_total": 0.5804251432418823, - "step": 227999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.961065769195557, - "loss_rtd": 0.23668722808361053, - "loss_sent": 0.40386414527893066, - "loss_sod": 0.13456673920154572, - "loss_total": 0.7751181125640869, - "step": 227999 - }, - { - "epoch": 0.008, - "grad_norm": 3.1582767963409424, - "learning_rate": 2.694804287472049e-05, - "loss": 0.4459, - "step": 228000 - }, - { - "epoch": 0.008, - "eval_loss": 0.43808892369270325, - "eval_runtime": 152.4528, - "eval_samples_per_second": 101.297, - "eval_steps_per_second": 0.794, - "step": 228000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.379452228546143, - "loss_rtd": 0.23132821917533875, - "loss_sent": 0.4139622747898102, - "loss_sod": 0.011506144888699055, - "loss_total": 0.6567966341972351, - "step": 228099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.633175849914551, - "loss_rtd": 0.2480832189321518, - "loss_sent": 0.17119170725345612, - "loss_sod": 0.031854670494794846, - "loss_total": 0.45112961530685425, - "step": 228099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.5515848398208618, - "learning_rate": 2.691988806614272e-05, - "loss": 0.4632, - "step": 228100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.950217247009277, - "loss_rtd": 0.23259751498699188, - "loss_sent": 0.40340232849121094, - "loss_sod": 0.05920649319887161, - "loss_total": 0.695206344127655, - "step": 228199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.799584865570068, - "loss_rtd": 0.2343636304140091, - "loss_sent": 0.07602745294570923, - "loss_sod": 0.044330716133117676, - "loss_total": 0.3547217845916748, - "step": 228199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.4872395992279053, - "learning_rate": 2.689174255423127e-05, - "loss": 0.4471, - "step": 228200 - }, - { - "epoch": 0.008598, - "loss_gen": 4.84426736831665, - "loss_rtd": 0.19541485607624054, - "loss_sent": 0.0004303819441702217, - "loss_sod": 0.09736961126327515, - "loss_total": 0.2932148277759552, - "step": 228299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.628753185272217, - "loss_rtd": 0.23462453484535217, - "loss_sent": 0.14476704597473145, - "loss_sod": 0.020943686366081238, - "loss_total": 0.40033525228500366, - "step": 228299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.5867993831634521, - "learning_rate": 2.6863606350323172e-05, - "loss": 0.4561, - "step": 228300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.858457088470459, - "loss_rtd": 0.24009667336940765, - "loss_sent": 0.4131287634372711, - "loss_sod": 0.029319915920495987, - "loss_total": 0.6825453639030457, - "step": 228399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.606655597686768, - "loss_rtd": 0.2331199049949646, - "loss_sent": 0.4195597171783447, - "loss_sod": 0.0031043491326272488, - "loss_total": 0.6557839512825012, - "step": 228399 - }, - { - "epoch": 0.0088, - "grad_norm": 2.3644115924835205, - "learning_rate": 2.6835479465751657e-05, - "loss": 0.4719, - "step": 228400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.616539001464844, - "loss_rtd": 0.24378356337547302, - "loss_sent": 0.13728450238704681, - "loss_sod": 0.013642529025673866, - "loss_total": 0.39471060037612915, - "step": 228499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.42510986328125, - "loss_rtd": 0.21945352852344513, - "loss_sent": 0.06915150582790375, - "loss_sod": 0.10679539293050766, - "loss_total": 0.39540040493011475, - "step": 228499 - }, - { - "epoch": 0.009, - "grad_norm": 1.0487242937088013, - "learning_rate": 2.680736191184624e-05, - "loss": 0.4668, - "step": 228500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.891288757324219, - "loss_rtd": 0.24702602624893188, - "loss_sent": 0.4161568284034729, - "loss_sod": 0.052584581077098846, - "loss_total": 0.7157674431800842, - "step": 228599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.679673671722412, - "loss_rtd": 0.2405959665775299, - "loss_sent": 0.32089099287986755, - "loss_sod": 0.0328717865049839, - "loss_total": 0.5943587422370911, - "step": 228599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.6369969844818115, - "learning_rate": 2.677925369993267e-05, - "loss": 0.4677, - "step": 228600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.320028305053711, - "loss_rtd": 0.2504448890686035, - "loss_sent": 0.23768854141235352, - "loss_sod": 0.0029685497283935547, - "loss_total": 0.4911019802093506, - "step": 228699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.746073246002197, - "loss_rtd": 0.24318954348564148, - "loss_sent": 0.4006673991680145, - "loss_sod": 0.01529262587428093, - "loss_total": 0.6591495275497437, - "step": 228699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.0088990926742554, - "learning_rate": 2.6751154841332954e-05, - "loss": 0.4672, - "step": 228700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.174074172973633, - "loss_rtd": 0.23185382783412933, - "loss_sent": 0.022533010691404343, - "loss_sod": 0.039733707904815674, - "loss_total": 0.29412055015563965, - "step": 228799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.656975746154785, - "loss_rtd": 0.2337799072265625, - "loss_sent": 0.1169809028506279, - "loss_sod": 0.003765667788684368, - "loss_total": 0.35452648997306824, - "step": 228799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.5050992965698242, - "learning_rate": 2.6723065347365267e-05, - "loss": 0.4492, - "step": 228800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.612466335296631, - "loss_rtd": 0.22140438854694366, - "loss_sent": 0.1858435720205307, - "loss_sod": 0.01725723221898079, - "loss_total": 0.42450517416000366, - "step": 228899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.555539608001709, - "loss_rtd": 0.24305252730846405, - "loss_sent": 0.030688712373375893, - "loss_sod": 0.015331541188061237, - "loss_total": 0.28907278180122375, - "step": 228899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.7471132278442383, - "learning_rate": 2.6694985229344077e-05, - "loss": 0.4495, - "step": 228900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.858891010284424, - "loss_rtd": 0.23836539685726166, - "loss_sent": 0.055644478648900986, - "loss_sod": 0.18495967984199524, - "loss_total": 0.478969544172287, - "step": 228999 - }, - { - "epoch": 0.009998, - "loss_gen": 4.894556522369385, - "loss_rtd": 0.20134000480175018, - "loss_sent": 0.0005428654258139431, - "loss_sod": 0.051950592547655106, - "loss_total": 0.25383347272872925, - "step": 228999 - }, - { - "epoch": 0.01, - "grad_norm": 1.039613127708435, - "learning_rate": 2.6666914498580048e-05, - "loss": 0.4672, - "step": 229000 - }, - { - "epoch": 0.01, - "eval_loss": 0.43447065353393555, - "eval_runtime": 150.9883, - "eval_samples_per_second": 102.279, - "eval_steps_per_second": 0.801, - "step": 229000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.497626781463623, - "loss_rtd": 0.2218395620584488, - "loss_sent": 0.07860352843999863, - "loss_sod": 0.07573240995407104, - "loss_total": 0.37617549300193787, - "step": 229099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.706343650817871, - "loss_rtd": 0.24197185039520264, - "loss_sent": 0.13926245272159576, - "loss_sod": 0.09395718574523926, - "loss_total": 0.47519147396087646, - "step": 229099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.1389895677566528, - "learning_rate": 2.6638853166380085e-05, - "loss": 0.476, - "step": 229100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.648293972015381, - "loss_rtd": 0.23363643884658813, - "loss_sent": 0.3106616139411926, - "loss_sod": 0.12124307453632355, - "loss_total": 0.6655411124229431, - "step": 229199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.569979190826416, - "loss_rtd": 0.23628993332386017, - "loss_sent": 0.15998664498329163, - "loss_sod": 0.03313560411334038, - "loss_total": 0.42941218614578247, - "step": 229199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.2090204954147339, - "learning_rate": 2.6610801244047257e-05, - "loss": 0.4747, - "step": 229200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.099374771118164, - "loss_rtd": 0.20434343814849854, - "loss_sent": 0.014487138949334621, - "loss_sod": 0.04945603758096695, - "loss_total": 0.2682866156101227, - "step": 229299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.644153594970703, - "loss_rtd": 0.2047325223684311, - "loss_sent": 0.18202875554561615, - "loss_sod": 0.06680737435817719, - "loss_total": 0.45356863737106323, - "step": 229299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.3708080053329468, - "learning_rate": 2.6582758742880893e-05, - "loss": 0.4778, - "step": 229300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.399141311645508, - "loss_rtd": 0.2253955751657486, - "loss_sent": 0.13051699101924896, - "loss_sod": 0.049406036734580994, - "loss_total": 0.40531861782073975, - "step": 229399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.877711772918701, - "loss_rtd": 0.2553017735481262, - "loss_sent": 0.45392152667045593, - "loss_sod": 0.06354334950447083, - "loss_total": 0.772766649723053, - "step": 229399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.1260013580322266, - "learning_rate": 2.6554725674176505e-05, - "loss": 0.4771, - "step": 229400 - }, - { - "epoch": 0.010998, - "loss_gen": 6.0335798263549805, - "loss_rtd": 0.24716512858867645, - "loss_sent": 0.23472942411899567, - "loss_sod": 0.09684835374355316, - "loss_total": 0.5787429213523865, - "step": 229499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.672654151916504, - "loss_rtd": 0.2530180513858795, - "loss_sent": 0.18808555603027344, - "loss_sod": 0.02407405897974968, - "loss_total": 0.46517765522003174, - "step": 229499 - }, - { - "epoch": 0.011, - "grad_norm": 0.8587880730628967, - "learning_rate": 2.6526702049225828e-05, - "loss": 0.4449, - "step": 229500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.884972095489502, - "loss_rtd": 0.22826610505580902, - "loss_sent": 0.2614843249320984, - "loss_sod": 0.11151456832885742, - "loss_total": 0.601265013217926, - "step": 229599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.78701114654541, - "loss_rtd": 0.2326907217502594, - "loss_sent": 0.1665249764919281, - "loss_sod": 0.04708373546600342, - "loss_total": 0.4462994337081909, - "step": 229599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.0387909412384033, - "learning_rate": 2.6498687879316743e-05, - "loss": 0.4706, - "step": 229600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.727572441101074, - "loss_rtd": 0.2457440346479416, - "loss_sent": 0.5038110017776489, - "loss_sod": 0.032166820019483566, - "loss_total": 0.781721830368042, - "step": 229699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.695270538330078, - "loss_rtd": 0.2344040870666504, - "loss_sent": 0.13566479086875916, - "loss_sod": 0.03081917017698288, - "loss_total": 0.400888055562973, - "step": 229699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.3107898235321045, - "learning_rate": 2.6470683175733367e-05, - "loss": 0.4619, - "step": 229700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.772567272186279, - "loss_rtd": 0.24603179097175598, - "loss_sent": 0.21451306343078613, - "loss_sod": 0.04447333514690399, - "loss_total": 0.5050181746482849, - "step": 229799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.984659194946289, - "loss_rtd": 0.2422512024641037, - "loss_sent": 0.022173278033733368, - "loss_sod": 0.10229814052581787, - "loss_total": 0.36672264337539673, - "step": 229799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.7812786102294922, - "learning_rate": 2.644268794975602e-05, - "loss": 0.4442, - "step": 229800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.654200553894043, - "loss_rtd": 0.2162037342786789, - "loss_sent": 0.06268905103206635, - "loss_sod": 0.009348021820187569, - "loss_total": 0.28824079036712646, - "step": 229899 - }, - { - "epoch": 0.011798, - "loss_gen": 4.920285224914551, - "loss_rtd": 0.19752170145511627, - "loss_sent": 0.0001401292538503185, - "loss_sod": 0.12207889556884766, - "loss_total": 0.3197407126426697, - "step": 229899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.8136213421821594, - "learning_rate": 2.6414702212661118e-05, - "loss": 0.4463, - "step": 229900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.861421585083008, - "loss_rtd": 0.23023930191993713, - "loss_sent": 0.4636632800102234, - "loss_sod": 0.050620581954717636, - "loss_total": 0.7445231676101685, - "step": 229999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.83852481842041, - "loss_rtd": 0.2317688912153244, - "loss_sent": 0.39547643065452576, - "loss_sod": 0.034720465540885925, - "loss_total": 0.6619657874107361, - "step": 229999 - }, - { - "epoch": 0.012, - "grad_norm": 3.6912200450897217, - "learning_rate": 2.638672597572135e-05, - "loss": 0.4472, - "step": 230000 - }, - { - "epoch": 0.012, - "eval_loss": 0.43895989656448364, - "eval_runtime": 151.09, - "eval_samples_per_second": 102.211, - "eval_steps_per_second": 0.801, - "step": 230000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.356983184814453, - "loss_rtd": 0.24663342535495758, - "loss_sent": 0.1497732400894165, - "loss_sod": 0.03361300379037857, - "loss_total": 0.43001967668533325, - "step": 230099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.665292739868164, - "loss_rtd": 0.2385682612657547, - "loss_sent": 0.23744936287403107, - "loss_sod": 0.05865897983312607, - "loss_total": 0.5346766114234924, - "step": 230099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.7897059917449951, - "learning_rate": 2.635875925020554e-05, - "loss": 0.4518, - "step": 230100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.827788352966309, - "loss_rtd": 0.24381259083747864, - "loss_sent": 0.08867710828781128, - "loss_sod": 0.03537292033433914, - "loss_total": 0.36786261200904846, - "step": 230199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.340538501739502, - "loss_rtd": 0.2554982900619507, - "loss_sent": 0.19783081114292145, - "loss_sod": 0.019756725057959557, - "loss_total": 0.47308582067489624, - "step": 230199 - }, - { - "epoch": 0.0124, - "grad_norm": 0.998620331287384, - "learning_rate": 2.6330802047378687e-05, - "loss": 0.4695, - "step": 230200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.6338653564453125, - "loss_rtd": 0.22210107743740082, - "loss_sent": 0.10173848271369934, - "loss_sod": 0.07229706645011902, - "loss_total": 0.396136611700058, - "step": 230299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.754301071166992, - "loss_rtd": 0.24328972399234772, - "loss_sent": 0.23832380771636963, - "loss_sod": 0.011281922459602356, - "loss_total": 0.4928954541683197, - "step": 230299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.0170916318893433, - "learning_rate": 2.630285437850193e-05, - "loss": 0.4553, - "step": 230300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.837557792663574, - "loss_rtd": 0.22751744091510773, - "loss_sent": 0.11446940153837204, - "loss_sod": 0.050170376896858215, - "loss_total": 0.3921572268009186, - "step": 230399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.505778789520264, - "loss_rtd": 0.21539092063903809, - "loss_sent": 0.8965586423873901, - "loss_sod": 0.012855786830186844, - "loss_total": 1.1248053312301636, - "step": 230399 - }, - { - "epoch": 0.0128, - "grad_norm": 2.706059694290161, - "learning_rate": 2.6274916254832595e-05, - "loss": 0.4592, - "step": 230400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.705245018005371, - "loss_rtd": 0.25718510150909424, - "loss_sent": 0.1807137131690979, - "loss_sod": 0.03812996298074722, - "loss_total": 0.47602877020835876, - "step": 230499 - }, - { - "epoch": 0.012998, - "loss_gen": 4.976123332977295, - "loss_rtd": 0.20817071199417114, - "loss_sent": 0.01200772449374199, - "loss_sod": 0.05684669315814972, - "loss_total": 0.27702510356903076, - "step": 230499 - }, - { - "epoch": 0.013, - "grad_norm": 1.1367182731628418, - "learning_rate": 2.6246987687624148e-05, - "loss": 0.4902, - "step": 230500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.764529705047607, - "loss_rtd": 0.25021564960479736, - "loss_sent": 0.36481043696403503, - "loss_sod": 0.05366312712430954, - "loss_total": 0.6686892509460449, - "step": 230599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.755175590515137, - "loss_rtd": 0.24264858663082123, - "loss_sent": 0.10518502444028854, - "loss_sod": 0.06821787357330322, - "loss_total": 0.4160515069961548, - "step": 230599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.6509652137756348, - "learning_rate": 2.6219068688126236e-05, - "loss": 0.4575, - "step": 230600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.475387096405029, - "loss_rtd": 0.23707376420497894, - "loss_sent": 0.3856346607208252, - "loss_sod": 0.06012444570660591, - "loss_total": 0.6828328371047974, - "step": 230699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.969977378845215, - "loss_rtd": 0.26010188460350037, - "loss_sent": 0.3575820028781891, - "loss_sod": 0.08605533093214035, - "loss_total": 0.7037392258644104, - "step": 230699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.7759782075881958, - "learning_rate": 2.61911592675846e-05, - "loss": 0.4555, - "step": 230700 - }, - { - "epoch": 0.013598, - "loss_gen": 4.816752910614014, - "loss_rtd": 0.20228004455566406, - "loss_sent": 0.0006019301945343614, - "loss_sod": 0.17747433483600616, - "loss_total": 0.3803562819957733, - "step": 230799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.356697082519531, - "loss_rtd": 0.21131305396556854, - "loss_sent": 0.054963912814855576, - "loss_sod": 0.04144435003399849, - "loss_total": 0.3077213168144226, - "step": 230799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.0606520175933838, - "learning_rate": 2.616325943724116e-05, - "loss": 0.4354, - "step": 230800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.535309791564941, - "loss_rtd": 0.23430833220481873, - "loss_sent": 0.17811453342437744, - "loss_sod": 0.060794614255428314, - "loss_total": 0.4732174873352051, - "step": 230899 - }, - { - "epoch": 0.013798, - "loss_gen": 4.870096683502197, - "loss_rtd": 0.1986638754606247, - "loss_sent": 2.7410307666286826e-05, - "loss_sod": 0.1807681769132614, - "loss_total": 0.3794594705104828, - "step": 230899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.3568741083145142, - "learning_rate": 2.6135369208333976e-05, - "loss": 0.4482, - "step": 230900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.768224239349365, - "loss_rtd": 0.242709681391716, - "loss_sent": 0.11642098426818848, - "loss_sod": 0.019703611731529236, - "loss_total": 0.3788342773914337, - "step": 230999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.88675594329834, - "loss_rtd": 0.24185074865818024, - "loss_sent": 0.6008553504943848, - "loss_sod": 0.043352626264095306, - "loss_total": 0.8860586881637573, - "step": 230999 - }, - { - "epoch": 0.014, - "grad_norm": 1.21649968624115, - "learning_rate": 2.6107488592097234e-05, - "loss": 0.4607, - "step": 231000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4299858510494232, - "eval_runtime": 151.0551, - "eval_samples_per_second": 102.234, - "eval_steps_per_second": 0.801, - "step": 231000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.451724529266357, - "loss_rtd": 0.19294089078903198, - "loss_sent": 0.06297950446605682, - "loss_sod": 0.07656969875097275, - "loss_total": 0.33249008655548096, - "step": 231099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.753503322601318, - "loss_rtd": 0.23417335748672485, - "loss_sent": 0.1358361691236496, - "loss_sod": 0.008586164563894272, - "loss_total": 0.3785957098007202, - "step": 231099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.9893803596496582, - "learning_rate": 2.6079617599761207e-05, - "loss": 0.4486, - "step": 231100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.810847759246826, - "loss_rtd": 0.2284773886203766, - "loss_sent": 0.23063786327838898, - "loss_sod": 0.03316286578774452, - "loss_total": 0.4922780990600586, - "step": 231199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.693573474884033, - "loss_rtd": 0.24732337892055511, - "loss_sent": 0.18036647140979767, - "loss_sod": 0.031402960419654846, - "loss_total": 0.45909279584884644, - "step": 231199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.2883961200714111, - "learning_rate": 2.605175624255236e-05, - "loss": 0.4579, - "step": 231200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.7976393699646, - "loss_rtd": 0.2478954792022705, - "loss_sent": 0.06113943085074425, - "loss_sod": 0.02013206295669079, - "loss_total": 0.329166978597641, - "step": 231299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.587930202484131, - "loss_rtd": 0.2226967215538025, - "loss_sent": 0.0834789052605629, - "loss_sod": 0.1132524386048317, - "loss_total": 0.4194280505180359, - "step": 231299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.7868592143058777, - "learning_rate": 2.602390453169325e-05, - "loss": 0.4532, - "step": 231300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.7209577560424805, - "loss_rtd": 0.24389250576496124, - "loss_sent": 0.10512908548116684, - "loss_sod": 0.01837059110403061, - "loss_total": 0.3673921823501587, - "step": 231399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.937654495239258, - "loss_rtd": 0.24715158343315125, - "loss_sent": 0.04203484579920769, - "loss_sod": 0.04600105062127113, - "loss_total": 0.33518746495246887, - "step": 231399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.9294385313987732, - "learning_rate": 2.5996062478402504e-05, - "loss": 0.4501, - "step": 231400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.7035231590271, - "loss_rtd": 0.2375149130821228, - "loss_sent": 0.46221548318862915, - "loss_sod": 0.129900261759758, - "loss_total": 0.8296306729316711, - "step": 231499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.928682327270508, - "loss_rtd": 0.22469770908355713, - "loss_sent": 0.1515629142522812, - "loss_sod": 0.05415157601237297, - "loss_total": 0.4304121732711792, - "step": 231499 - }, - { - "epoch": 0.015, - "grad_norm": 1.9025779962539673, - "learning_rate": 2.5968230093894925e-05, - "loss": 0.4556, - "step": 231500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.560404300689697, - "loss_rtd": 0.24419137835502625, - "loss_sent": 0.5162179470062256, - "loss_sod": 0.014479400590062141, - "loss_total": 0.7748887538909912, - "step": 231599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.86340856552124, - "loss_rtd": 0.2088865488767624, - "loss_sent": 0.11260949075222015, - "loss_sod": 0.0717419907450676, - "loss_total": 0.39323800802230835, - "step": 231599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.3364534378051758, - "learning_rate": 2.5940407389381387e-05, - "loss": 0.4457, - "step": 231600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.0402021408081055, - "loss_rtd": 0.20991869270801544, - "loss_sent": 2.816465712385252e-05, - "loss_sod": 0.1331070512533188, - "loss_total": 0.3430539071559906, - "step": 231699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.167065620422363, - "loss_rtd": 0.2097914069890976, - "loss_sent": 0.07106874883174896, - "loss_sod": 0.07669724524021149, - "loss_total": 0.35755741596221924, - "step": 231699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.8033815026283264, - "learning_rate": 2.59125943760689e-05, - "loss": 0.4508, - "step": 231700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.450754165649414, - "loss_rtd": 0.23916415870189667, - "loss_sent": 0.023154791444540024, - "loss_sod": 0.12429704517126083, - "loss_total": 0.3866159915924072, - "step": 231799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.072955131530762, - "loss_rtd": 0.20164482295513153, - "loss_sent": 0.02033694088459015, - "loss_sod": 0.06791871786117554, - "loss_total": 0.2899004817008972, - "step": 231799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.8766674995422363, - "learning_rate": 2.5884791065160495e-05, - "loss": 0.4577, - "step": 231800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.317257881164551, - "loss_rtd": 0.1965210884809494, - "loss_sent": 0.05714843422174454, - "loss_sod": 0.026518816128373146, - "loss_total": 0.28018835186958313, - "step": 231899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.720180511474609, - "loss_rtd": 0.23176634311676025, - "loss_sent": 0.29128673672676086, - "loss_sod": 0.07481521368026733, - "loss_total": 0.5978683233261108, - "step": 231899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.9235107898712158, - "learning_rate": 2.5856997467855364e-05, - "loss": 0.456, - "step": 231900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.634468078613281, - "loss_rtd": 0.2609516382217407, - "loss_sent": 0.15540874004364014, - "loss_sod": 0.0736379325389862, - "loss_total": 0.48999831080436707, - "step": 231999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.1410932540893555, - "loss_rtd": 0.23574668169021606, - "loss_sent": 0.0065481094643473625, - "loss_sod": 0.2120421677827835, - "loss_total": 0.454336941242218, - "step": 231999 - }, - { - "epoch": 0.016, - "grad_norm": 1.40248703956604, - "learning_rate": 2.5829213595348768e-05, - "loss": 0.4481, - "step": 232000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4332537055015564, - "eval_runtime": 151.7164, - "eval_samples_per_second": 101.789, - "eval_steps_per_second": 0.798, - "step": 232000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.714728355407715, - "loss_rtd": 0.2596420645713806, - "loss_sent": 0.21808409690856934, - "loss_sod": 0.035067614167928696, - "loss_total": 0.512793779373169, - "step": 232099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.517312526702881, - "loss_rtd": 0.2269272357225418, - "loss_sent": 0.0748443752527237, - "loss_sod": 0.11009599268436432, - "loss_total": 0.411867618560791, - "step": 232099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.9759652614593506, - "learning_rate": 2.5801439458832066e-05, - "loss": 0.4423, - "step": 232100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.762374401092529, - "loss_rtd": 0.23316055536270142, - "loss_sent": 0.2530592978000641, - "loss_sod": 0.08535408973693848, - "loss_total": 0.5715739727020264, - "step": 232199 - }, - { - "epoch": 0.016398, - "loss_gen": 4.8840532302856445, - "loss_rtd": 0.19623419642448425, - "loss_sent": 0.004442990757524967, - "loss_sod": 0.024791399016976357, - "loss_total": 0.22546859085559845, - "step": 232199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.9774951338768005, - "learning_rate": 2.577367506949263e-05, - "loss": 0.4472, - "step": 232200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.8422651290893555, - "loss_rtd": 0.21944259107112885, - "loss_sent": 0.32135623693466187, - "loss_sod": 0.09976760298013687, - "loss_total": 0.6405664682388306, - "step": 232299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.7531890869140625, - "loss_rtd": 0.23951628804206848, - "loss_sent": 0.1844344586133957, - "loss_sod": 0.04384145885705948, - "loss_total": 0.46779221296310425, - "step": 232299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.98200523853302, - "learning_rate": 2.5745920438513983e-05, - "loss": 0.458, - "step": 232300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.926414489746094, - "loss_rtd": 0.22466085851192474, - "loss_sent": 0.47860443592071533, - "loss_sod": 0.0931425467133522, - "loss_total": 0.7964078187942505, - "step": 232399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.955382823944092, - "loss_rtd": 0.25543341040611267, - "loss_sent": 0.13194069266319275, - "loss_sod": 0.060727186501026154, - "loss_total": 0.448101282119751, - "step": 232399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.3394931554794312, - "learning_rate": 2.571817557707569e-05, - "loss": 0.4455, - "step": 232400 - }, - { - "epoch": 0.016998, - "loss_gen": 6.0583977699279785, - "loss_rtd": 0.23822690546512604, - "loss_sent": 0.16945557296276093, - "loss_sod": 0.18277540802955627, - "loss_total": 0.5904579162597656, - "step": 232499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.711584568023682, - "loss_rtd": 0.22483934462070465, - "loss_sent": 0.13381531834602356, - "loss_sod": 0.04649697244167328, - "loss_total": 0.4051516354084015, - "step": 232499 - }, - { - "epoch": 0.017, - "grad_norm": 0.924994945526123, - "learning_rate": 2.569044049635338e-05, - "loss": 0.4808, - "step": 232500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.650406360626221, - "loss_rtd": 0.23745349049568176, - "loss_sent": 0.3752727508544922, - "loss_sod": 0.034239865839481354, - "loss_total": 0.6469660997390747, - "step": 232599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.752997398376465, - "loss_rtd": 0.22762267291545868, - "loss_sent": 0.49283188581466675, - "loss_sod": 0.007687894627451897, - "loss_total": 0.7281424403190613, - "step": 232599 - }, - { - "epoch": 0.0172, - "grad_norm": 2.121411085128784, - "learning_rate": 2.5662715207518717e-05, - "loss": 0.4599, - "step": 232600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.526933193206787, - "loss_rtd": 0.21745210886001587, - "loss_sent": 0.17164161801338196, - "loss_sod": 0.057460881769657135, - "loss_total": 0.44655460119247437, - "step": 232699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.517461776733398, - "loss_rtd": 0.25354766845703125, - "loss_sent": 0.11019515246152878, - "loss_sod": 0.06326562166213989, - "loss_total": 0.4270084500312805, - "step": 232699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.162143349647522, - "learning_rate": 2.563499972173945e-05, - "loss": 0.455, - "step": 232700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.907709121704102, - "loss_rtd": 0.23863568902015686, - "loss_sent": 0.22896018624305725, - "loss_sod": 0.028900478035211563, - "loss_total": 0.4964963495731354, - "step": 232799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.922534465789795, - "loss_rtd": 0.23425829410552979, - "loss_sent": 0.0690067708492279, - "loss_sod": 0.10086705535650253, - "loss_total": 0.4041321277618408, - "step": 232799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.8419181704521179, - "learning_rate": 2.560729405017941e-05, - "loss": 0.4515, - "step": 232800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.123891353607178, - "loss_rtd": 0.22470834851264954, - "loss_sent": 0.30141136050224304, - "loss_sod": 0.015585193410515785, - "loss_total": 0.5417048931121826, - "step": 232899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.59957218170166, - "loss_rtd": 0.21619682013988495, - "loss_sent": 0.12326356768608093, - "loss_sod": 0.05799437686800957, - "loss_total": 0.39745476841926575, - "step": 232899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.230122447013855, - "learning_rate": 2.5579598203998388e-05, - "loss": 0.4716, - "step": 232900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.746820449829102, - "loss_rtd": 0.24394603073596954, - "loss_sent": 0.24958030879497528, - "loss_sod": 0.10353004187345505, - "loss_total": 0.5970563888549805, - "step": 232999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.907955646514893, - "loss_rtd": 0.22782622277736664, - "loss_sent": 0.3023303747177124, - "loss_sod": 0.046588096767663956, - "loss_total": 0.5767446756362915, - "step": 232999 - }, - { - "epoch": 0.018, - "grad_norm": 1.1948094367980957, - "learning_rate": 2.5551912194352284e-05, - "loss": 0.4554, - "step": 233000 - }, - { - "epoch": 0.018, - "eval_loss": 0.4314315617084503, - "eval_runtime": 151.2764, - "eval_samples_per_second": 102.085, - "eval_steps_per_second": 0.8, - "step": 233000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.260778903961182, - "loss_rtd": 0.22205084562301636, - "loss_sent": 3.8594189391005784e-05, - "loss_sod": 0.0917004719376564, - "loss_total": 0.31378990411758423, - "step": 233099 - }, - { - "epoch": 0.018198, - "loss_gen": 4.854741096496582, - "loss_rtd": 0.18209616839885712, - "loss_sent": 0.03757604956626892, - "loss_sod": 0.07991458475589752, - "loss_total": 0.29958680272102356, - "step": 233099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.9120252728462219, - "learning_rate": 2.5524236032393027e-05, - "loss": 0.4404, - "step": 233100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.615264892578125, - "loss_rtd": 0.21891269087791443, - "loss_sent": 0.30986180901527405, - "loss_sod": 0.010211745277047157, - "loss_total": 0.5389862656593323, - "step": 233199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.439765453338623, - "loss_rtd": 0.233239084482193, - "loss_sent": 0.2163098156452179, - "loss_sod": 0.014463482424616814, - "loss_total": 0.46401238441467285, - "step": 233199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.9287422895431519, - "learning_rate": 2.5496569729268592e-05, - "loss": 0.4458, - "step": 233200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.251770496368408, - "loss_rtd": 0.21084089577198029, - "loss_sent": 0.053106699138879776, - "loss_sod": 0.09045236557722092, - "loss_total": 0.3543999493122101, - "step": 233299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.034013271331787, - "loss_rtd": 0.20131589472293854, - "loss_sent": 0.05645310506224632, - "loss_sod": 0.009659279137849808, - "loss_total": 0.26742827892303467, - "step": 233299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.6519246697425842, - "learning_rate": 2.546891329612292e-05, - "loss": 0.4481, - "step": 233300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.29550313949585, - "loss_rtd": 0.23290880024433136, - "loss_sent": 0.10205116868019104, - "loss_sod": 0.03483670949935913, - "loss_total": 0.36979666352272034, - "step": 233399 - }, - { - "epoch": 0.018798, - "loss_gen": 6.386826515197754, - "loss_rtd": 0.2479952573776245, - "loss_sent": 0.06440846621990204, - "loss_sod": 0.14350193738937378, - "loss_total": 0.4559056758880615, - "step": 233399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.9469907879829407, - "learning_rate": 2.544126674409604e-05, - "loss": 0.4541, - "step": 233400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.965794086456299, - "loss_rtd": 0.23737753927707672, - "loss_sent": 0.15652404725551605, - "loss_sod": 0.06936980783939362, - "loss_total": 0.4632713794708252, - "step": 233499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.870352268218994, - "loss_rtd": 0.22798781096935272, - "loss_sent": 0.24696271121501923, - "loss_sod": 0.043010540306568146, - "loss_total": 0.5179610848426819, - "step": 233499 - }, - { - "epoch": 0.019, - "grad_norm": 1.350039005279541, - "learning_rate": 2.541363008432397e-05, - "loss": 0.4394, - "step": 233500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.645421981811523, - "loss_rtd": 0.2259354293346405, - "loss_sent": 0.1398894488811493, - "loss_sod": 0.12675334513187408, - "loss_total": 0.4925782382488251, - "step": 233599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.55348539352417, - "loss_rtd": 0.24399907886981964, - "loss_sent": 0.25538408756256104, - "loss_sod": 0.10012035816907883, - "loss_total": 0.5995035171508789, - "step": 233599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.2693923711776733, - "learning_rate": 2.538600332793879e-05, - "loss": 0.4603, - "step": 233600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.721219062805176, - "loss_rtd": 0.2351340800523758, - "loss_sent": 0.32560208439826965, - "loss_sod": 0.057719096541404724, - "loss_total": 0.6184552907943726, - "step": 233699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.946829795837402, - "loss_rtd": 0.17942406237125397, - "loss_sent": 0.024305157363414764, - "loss_sod": 0.06219222769141197, - "loss_total": 0.2659214437007904, - "step": 233699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.2101151943206787, - "learning_rate": 2.5358386486068498e-05, - "loss": 0.4592, - "step": 233700 - }, - { - "epoch": 0.019598, - "loss_gen": 6.033141136169434, - "loss_rtd": 0.25547918677330017, - "loss_sent": 0.20957908034324646, - "loss_sod": 0.17501670122146606, - "loss_total": 0.6400749683380127, - "step": 233799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.559689044952393, - "loss_rtd": 0.23313714563846588, - "loss_sent": 0.11533534526824951, - "loss_sod": 0.1217268705368042, - "loss_total": 0.4701993465423584, - "step": 233799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.3568018674850464, - "learning_rate": 2.5330779569837194e-05, - "loss": 0.4377, - "step": 233800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.204100608825684, - "loss_rtd": 0.2192050963640213, - "loss_sent": 0.018752919510006905, - "loss_sod": 0.09112323075532913, - "loss_total": 0.329081267118454, - "step": 233899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.559121608734131, - "loss_rtd": 0.23450756072998047, - "loss_sent": 0.13573385775089264, - "loss_sod": 0.04589318856596947, - "loss_total": 0.4161345958709717, - "step": 233899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.0575929880142212, - "learning_rate": 2.5303182590364914e-05, - "loss": 0.4641, - "step": 233900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.20557165145874, - "loss_rtd": 0.1950269639492035, - "loss_sent": 0.02659563161432743, - "loss_sod": 0.04482380300760269, - "loss_total": 0.26644638180732727, - "step": 233999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.707209587097168, - "loss_rtd": 0.23177239298820496, - "loss_sent": 0.27904412150382996, - "loss_sod": 0.10986566543579102, - "loss_total": 0.6206821799278259, - "step": 233999 - }, - { - "epoch": 0.02, - "grad_norm": 1.2919104099273682, - "learning_rate": 2.5275595558767763e-05, - "loss": 0.4631, - "step": 234000 - }, - { - "epoch": 0.02, - "eval_loss": 0.43134596943855286, - "eval_runtime": 151.1975, - "eval_samples_per_second": 102.138, - "eval_steps_per_second": 0.8, - "step": 234000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.622669696807861, - "loss_rtd": 0.23707415163516998, - "loss_sent": 0.09233508259057999, - "loss_sod": 0.0379769429564476, - "loss_total": 0.3673861622810364, - "step": 234099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.688944339752197, - "loss_rtd": 0.23022818565368652, - "loss_sent": 0.6720576286315918, - "loss_sod": 0.1273057758808136, - "loss_total": 1.0295915603637695, - "step": 234099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.4382449388504028, - "learning_rate": 2.5248018486157744e-05, - "loss": 0.4499, - "step": 234100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.51674747467041, - "loss_rtd": 0.2293349653482437, - "loss_sent": 0.1584416925907135, - "loss_sod": 0.023466818034648895, - "loss_total": 0.4112434685230255, - "step": 234199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.76281213760376, - "loss_rtd": 0.23749059438705444, - "loss_sent": 0.15755978226661682, - "loss_sod": 0.0425865575671196, - "loss_total": 0.43763694167137146, - "step": 234199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.8715875148773193, - "learning_rate": 2.522045138364292e-05, - "loss": 0.4713, - "step": 234200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.291765213012695, - "loss_rtd": 0.20291708409786224, - "loss_sent": 0.009411037899553776, - "loss_sod": 0.07422708719968796, - "loss_total": 0.2865552008152008, - "step": 234299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.929440498352051, - "loss_rtd": 0.23512855172157288, - "loss_sent": 0.13337145745754242, - "loss_sod": 0.03451795130968094, - "loss_total": 0.40301793813705444, - "step": 234299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.7912243008613586, - "learning_rate": 2.5192894262327314e-05, - "loss": 0.4739, - "step": 234300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.522777557373047, - "loss_rtd": 0.24809530377388, - "loss_sent": 0.05782622843980789, - "loss_sod": 0.023543953895568848, - "loss_total": 0.32946547865867615, - "step": 234399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.438991546630859, - "loss_rtd": 0.22391508519649506, - "loss_sent": 0.08650373667478561, - "loss_sod": 0.07383397966623306, - "loss_total": 0.3842528164386749, - "step": 234399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.7679288983345032, - "learning_rate": 2.5165347133310948e-05, - "loss": 0.4711, - "step": 234400 - }, - { - "epoch": 0.020998, - "loss_gen": 6.136439323425293, - "loss_rtd": 0.2450696974992752, - "loss_sent": 0.12871153652668, - "loss_sod": 0.04998399317264557, - "loss_total": 0.42376524209976196, - "step": 234499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.780904769897461, - "loss_rtd": 0.24080272018909454, - "loss_sent": 0.11224455386400223, - "loss_sod": 0.014613127335906029, - "loss_total": 0.36766040325164795, - "step": 234499 - }, - { - "epoch": 0.021, - "grad_norm": 0.852159857749939, - "learning_rate": 2.513781000768977e-05, - "loss": 0.4631, - "step": 234500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.776484489440918, - "loss_rtd": 0.2173469066619873, - "loss_sent": 0.014870740473270416, - "loss_sod": 0.15193238854408264, - "loss_total": 0.38415002822875977, - "step": 234599 - }, - { - "epoch": 0.021198, - "loss_gen": 4.964664936065674, - "loss_rtd": 0.19436417520046234, - "loss_sent": 2.915369623224251e-05, - "loss_sod": 0.0699116662144661, - "loss_total": 0.2643049955368042, - "step": 234599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.8685846328735352, - "learning_rate": 2.5110282896555748e-05, - "loss": 0.4455, - "step": 234600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.800841808319092, - "loss_rtd": 0.2289271503686905, - "loss_sent": 0.009817521087825298, - "loss_sod": 0.09922560304403305, - "loss_total": 0.3379702568054199, - "step": 234699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.550946235656738, - "loss_rtd": 0.2376554161310196, - "loss_sent": 0.239140585064888, - "loss_sod": 0.03818279504776001, - "loss_total": 0.5149787664413452, - "step": 234699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.8645015954971313, - "learning_rate": 2.5082765810996822e-05, - "loss": 0.467, - "step": 234700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.73333740234375, - "loss_rtd": 0.2413206696510315, - "loss_sent": 0.2613426446914673, - "loss_sod": 0.044991590082645416, - "loss_total": 0.5476548671722412, - "step": 234799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.499999523162842, - "loss_rtd": 0.23359303176403046, - "loss_sent": 0.038986802101135254, - "loss_sod": 0.027301784604787827, - "loss_total": 0.29988160729408264, - "step": 234799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.6654761433601379, - "learning_rate": 2.5055258762096822e-05, - "loss": 0.4473, - "step": 234800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.706062316894531, - "loss_rtd": 0.2492617815732956, - "loss_sent": 0.08323502540588379, - "loss_sod": 0.01766759529709816, - "loss_total": 0.35016441345214844, - "step": 234899 - }, - { - "epoch": 0.021798, - "loss_gen": 6.086641788482666, - "loss_rtd": 0.2328205555677414, - "loss_sent": 0.15146449208259583, - "loss_sod": 0.01626696065068245, - "loss_total": 0.40055200457572937, - "step": 234899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.5309454202651978, - "learning_rate": 2.5027761760935614e-05, - "loss": 0.4512, - "step": 234900 - }, - { - "epoch": 0.021998, - "loss_gen": 4.860592365264893, - "loss_rtd": 0.19987404346466064, - "loss_sent": 0.00031333829974755645, - "loss_sod": 0.06769369542598724, - "loss_total": 0.2678810954093933, - "step": 234999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.4687676429748535, - "loss_rtd": 0.2330511510372162, - "loss_sent": 0.13229608535766602, - "loss_sod": 0.05977775901556015, - "loss_total": 0.42512500286102295, - "step": 234999 - }, - { - "epoch": 0.022, - "grad_norm": 0.8150344491004944, - "learning_rate": 2.5000274818588975e-05, - "loss": 0.4579, - "step": 235000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4349405765533447, - "eval_runtime": 151.1347, - "eval_samples_per_second": 102.18, - "eval_steps_per_second": 0.801, - "step": 235000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.707245349884033, - "loss_rtd": 0.226992666721344, - "loss_sent": 0.10455288738012314, - "loss_sod": 0.02676062285900116, - "loss_total": 0.3583061993122101, - "step": 235099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.896261692047119, - "loss_rtd": 0.22141416370868683, - "loss_sent": 0.17445340752601624, - "loss_sod": 0.02341574989259243, - "loss_total": 0.41928333044052124, - "step": 235099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.5898993015289307, - "learning_rate": 2.4972797946128678e-05, - "loss": 0.4608, - "step": 235100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.678945064544678, - "loss_rtd": 0.2522539496421814, - "loss_sent": 0.40994736552238464, - "loss_sod": 0.03658227622509003, - "loss_total": 0.6987836360931396, - "step": 235199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.730838775634766, - "loss_rtd": 0.2313205897808075, - "loss_sent": 0.3588387668132782, - "loss_sod": 0.13311989605426788, - "loss_total": 0.7232792377471924, - "step": 235199 - }, - { - "epoch": 0.0224, - "grad_norm": 2.3968727588653564, - "learning_rate": 2.4945331154622352e-05, - "loss": 0.4369, - "step": 235200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.283631801605225, - "loss_rtd": 0.19868652522563934, - "loss_sent": 0.021283335983753204, - "loss_sod": 0.06252605468034744, - "loss_total": 0.28249591588974, - "step": 235299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.268820762634277, - "loss_rtd": 0.20874838531017303, - "loss_sent": 0.05531521514058113, - "loss_sod": 0.09486284852027893, - "loss_total": 0.3589264452457428, - "step": 235299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.848966658115387, - "learning_rate": 2.4917874455133638e-05, - "loss": 0.4586, - "step": 235300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.539623260498047, - "loss_rtd": 0.23312102258205414, - "loss_sent": 0.2591060996055603, - "loss_sod": 0.0032930118031799793, - "loss_total": 0.49552011489868164, - "step": 235399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.957262992858887, - "loss_rtd": 0.23715601861476898, - "loss_sent": 0.03702971339225769, - "loss_sod": 0.024175569415092468, - "loss_total": 0.29836130142211914, - "step": 235399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.5907749533653259, - "learning_rate": 2.489042785872212e-05, - "loss": 0.4454, - "step": 235400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.736026763916016, - "loss_rtd": 0.24415920674800873, - "loss_sent": 0.17802608013153076, - "loss_sod": 0.01555958017706871, - "loss_total": 0.4377448558807373, - "step": 235499 - }, - { - "epoch": 0.022998, - "loss_gen": 6.277961254119873, - "loss_rtd": 0.2379772812128067, - "loss_sent": 0.22978819906711578, - "loss_sod": 0.16743631660938263, - "loss_total": 0.6352018117904663, - "step": 235499 - }, - { - "epoch": 0.023, - "grad_norm": 1.01682710647583, - "learning_rate": 2.4862991376443235e-05, - "loss": 0.462, - "step": 235500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.370872497558594, - "loss_rtd": 0.21043279767036438, - "loss_sent": 3.229017966077663e-05, - "loss_sod": 0.18214809894561768, - "loss_total": 0.39261317253112793, - "step": 235599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.350832462310791, - "loss_rtd": 0.18735279142856598, - "loss_sent": 3.1079565815161914e-05, - "loss_sod": 0.10657824575901031, - "loss_total": 0.29396212100982666, - "step": 235599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.0765697956085205, - "learning_rate": 2.4835565019348432e-05, - "loss": 0.4678, - "step": 235600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.570460796356201, - "loss_rtd": 0.23438499867916107, - "loss_sent": 0.13533693552017212, - "loss_sod": 0.05307968705892563, - "loss_total": 0.4228016138076782, - "step": 235699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.606940269470215, - "loss_rtd": 0.23377205431461334, - "loss_sent": 0.007051699794828892, - "loss_sod": 0.14392854273319244, - "loss_total": 0.3847523033618927, - "step": 235699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.1008973121643066, - "learning_rate": 2.480814879848502e-05, - "loss": 0.4452, - "step": 235700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.1537065505981445, - "loss_rtd": 0.23854967951774597, - "loss_sent": 0.04164435714483261, - "loss_sod": 0.19953332841396332, - "loss_total": 0.4797273576259613, - "step": 235799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.616333961486816, - "loss_rtd": 0.21906889975070953, - "loss_sent": 0.19090819358825684, - "loss_sod": 0.034674208611249924, - "loss_total": 0.4446513056755066, - "step": 235799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.268100380897522, - "learning_rate": 2.478074272489625e-05, - "loss": 0.4483, - "step": 235800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.753663539886475, - "loss_rtd": 0.2341436892747879, - "loss_sent": 0.09450148046016693, - "loss_sod": 0.011313949711620808, - "loss_total": 0.33995911478996277, - "step": 235899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.697127819061279, - "loss_rtd": 0.2478364259004593, - "loss_sent": 0.10682282596826553, - "loss_sod": 0.043401964008808136, - "loss_total": 0.39806121587753296, - "step": 235899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.7656726241111755, - "learning_rate": 2.475334680962132e-05, - "loss": 0.4711, - "step": 235900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.891186237335205, - "loss_rtd": 0.25196000933647156, - "loss_sent": 0.18737146258354187, - "loss_sod": 0.025355849415063858, - "loss_total": 0.464687317609787, - "step": 235999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.931027412414551, - "loss_rtd": 0.22270405292510986, - "loss_sent": 0.30568718910217285, - "loss_sod": 0.012507490813732147, - "loss_total": 0.5408987402915955, - "step": 235999 - }, - { - "epoch": 0.024, - "grad_norm": 0.8812655210494995, - "learning_rate": 2.472596106369525e-05, - "loss": 0.4556, - "step": 236000 - }, - { - "epoch": 0.024, - "eval_loss": 0.43395423889160156, - "eval_runtime": 151.4616, - "eval_samples_per_second": 101.96, - "eval_steps_per_second": 0.799, - "step": 236000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.555566310882568, - "loss_rtd": 0.2515144646167755, - "loss_sent": 0.10076847672462463, - "loss_sod": 0.08022458106279373, - "loss_total": 0.4325075149536133, - "step": 236099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.315033912658691, - "loss_rtd": 0.23150426149368286, - "loss_sent": 0.11643737554550171, - "loss_sod": 0.03707926720380783, - "loss_total": 0.385020911693573, - "step": 236099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.7560243010520935, - "learning_rate": 2.469858549814905e-05, - "loss": 0.4497, - "step": 236100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.5409674644470215, - "loss_rtd": 0.20531043410301208, - "loss_sent": 0.053728409111499786, - "loss_sod": 0.028350792825222015, - "loss_total": 0.2873896360397339, - "step": 236199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.594228744506836, - "loss_rtd": 0.2509137690067291, - "loss_sent": 0.1735769659280777, - "loss_sod": 0.06944534927606583, - "loss_total": 0.49393606185913086, - "step": 236199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.7966400980949402, - "learning_rate": 2.467122012400958e-05, - "loss": 0.4455, - "step": 236200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.3407883644104, - "loss_rtd": 0.2251061350107193, - "loss_sent": 0.0012118516024202108, - "loss_sod": 0.18864163756370544, - "loss_total": 0.4149596393108368, - "step": 236299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.124298095703125, - "loss_rtd": 0.20701512694358826, - "loss_sent": 0.024271734058856964, - "loss_sod": 0.11601647734642029, - "loss_total": 0.3473033308982849, - "step": 236299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.288283109664917, - "learning_rate": 2.464386495229964e-05, - "loss": 0.4338, - "step": 236300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.575831413269043, - "loss_rtd": 0.23613017797470093, - "loss_sent": 0.08976863324642181, - "loss_sod": 0.008835520595312119, - "loss_total": 0.33473432064056396, - "step": 236399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.69340705871582, - "loss_rtd": 0.23825500905513763, - "loss_sent": 0.04495564475655556, - "loss_sod": 0.12028937041759491, - "loss_total": 0.4035000205039978, - "step": 236399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.7958593368530273, - "learning_rate": 2.461651999403784e-05, - "loss": 0.4593, - "step": 236400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.22304105758667, - "loss_rtd": 0.20367762446403503, - "loss_sent": 4.95214517286513e-05, - "loss_sod": 0.12968656420707703, - "loss_total": 0.3334137201309204, - "step": 236499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.314144611358643, - "loss_rtd": 0.2104502022266388, - "loss_sent": 0.02240786887705326, - "loss_sod": 0.15137213468551636, - "loss_total": 0.38423019647598267, - "step": 236499 - }, - { - "epoch": 0.025, - "grad_norm": 1.090668797492981, - "learning_rate": 2.458918526023875e-05, - "loss": 0.4823, - "step": 236500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.5973219871521, - "loss_rtd": 0.2358320951461792, - "loss_sent": 0.27655020356178284, - "loss_sod": 0.02151513658463955, - "loss_total": 0.5338973999023438, - "step": 236599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.743614196777344, - "loss_rtd": 0.23042291402816772, - "loss_sent": 0.07522018998861313, - "loss_sod": 0.003805323503911495, - "loss_total": 0.3094484210014343, - "step": 236599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.8916210532188416, - "learning_rate": 2.4561860761912804e-05, - "loss": 0.4426, - "step": 236600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.573662757873535, - "loss_rtd": 0.24415111541748047, - "loss_sent": 0.1414310783147812, - "loss_sod": 0.08143579959869385, - "loss_total": 0.4670180082321167, - "step": 236699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.633249282836914, - "loss_rtd": 0.21293377876281738, - "loss_sent": 0.12949199974536896, - "loss_sod": 0.025542840361595154, - "loss_total": 0.3679686188697815, - "step": 236699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.5825505256652832, - "learning_rate": 2.4534546510066313e-05, - "loss": 0.445, - "step": 236700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.501196384429932, - "loss_rtd": 0.24929967522621155, - "loss_sent": 0.15651912987232208, - "loss_sod": 0.042097192257642746, - "loss_total": 0.4479159712791443, - "step": 236799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.555871486663818, - "loss_rtd": 0.23386947810649872, - "loss_sent": 0.12060832977294922, - "loss_sod": 0.020886603742837906, - "loss_total": 0.37536442279815674, - "step": 236799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.7640842795372009, - "learning_rate": 2.4507242515701427e-05, - "loss": 0.4627, - "step": 236800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.898460388183594, - "loss_rtd": 0.2226713001728058, - "loss_sent": 0.18890473246574402, - "loss_sod": 0.05953853949904442, - "loss_total": 0.4711145758628845, - "step": 236899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.6854963302612305, - "loss_rtd": 0.20722654461860657, - "loss_sent": 0.6310082674026489, - "loss_sod": 0.04074262082576752, - "loss_total": 0.8789774179458618, - "step": 236899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.4738171100616455, - "learning_rate": 2.4479948789816204e-05, - "loss": 0.4917, - "step": 236900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.284900665283203, - "loss_rtd": 0.21622498333454132, - "loss_sent": 2.8011925678583793e-05, - "loss_sod": 0.06487318128347397, - "loss_total": 0.2811261713504791, - "step": 236999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.151595592498779, - "loss_rtd": 0.1989092230796814, - "loss_sent": 3.269418084528297e-05, - "loss_sod": 0.15551862120628357, - "loss_total": 0.35446053743362427, - "step": 236999 - }, - { - "epoch": 0.026, - "grad_norm": 0.8244456648826599, - "learning_rate": 2.4452665343404563e-05, - "loss": 0.4576, - "step": 237000 - }, - { - "epoch": 0.026, - "eval_loss": 0.42557063698768616, - "eval_runtime": 151.1388, - "eval_samples_per_second": 102.178, - "eval_steps_per_second": 0.801, - "step": 237000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.628950119018555, - "loss_rtd": 0.24218463897705078, - "loss_sent": 0.6823144555091858, - "loss_sod": 0.02316117286682129, - "loss_total": 0.9476602673530579, - "step": 237099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.59042501449585, - "loss_rtd": 0.24450884759426117, - "loss_sent": 0.129317507147789, - "loss_sod": 0.012200526893138885, - "loss_total": 0.38602685928344727, - "step": 237099 - }, - { - "epoch": 0.0262, - "grad_norm": 3.2174065113067627, - "learning_rate": 2.4425392187456282e-05, - "loss": 0.4537, - "step": 237100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.622735500335693, - "loss_rtd": 0.2481285035610199, - "loss_sent": 0.0748935118317604, - "loss_sod": 0.05360139533877373, - "loss_total": 0.37662339210510254, - "step": 237199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.744678974151611, - "loss_rtd": 0.2219824492931366, - "loss_sent": 0.28047963976860046, - "loss_sod": 0.012201274745166302, - "loss_total": 0.5146633386611938, - "step": 237199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.0077773332595825, - "learning_rate": 2.4398129332956948e-05, - "loss": 0.4746, - "step": 237200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.693736553192139, - "loss_rtd": 0.23063986003398895, - "loss_sent": 0.39893093705177307, - "loss_sod": 0.042699843645095825, - "loss_total": 0.672270655632019, - "step": 237299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.515918731689453, - "loss_rtd": 0.23402729630470276, - "loss_sent": 0.05968880280852318, - "loss_sod": 0.05745239183306694, - "loss_total": 0.3511684834957123, - "step": 237299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.8940922021865845, - "learning_rate": 2.4370876790888058e-05, - "loss": 0.4293, - "step": 237300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.96881628036499, - "loss_rtd": 0.22738666832447052, - "loss_sent": 0.028789600357413292, - "loss_sod": 0.045895736664533615, - "loss_total": 0.30207201838493347, - "step": 237399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.594783782958984, - "loss_rtd": 0.21820645034313202, - "loss_sent": 0.19008494913578033, - "loss_sod": 0.011797965504229069, - "loss_total": 0.42008936405181885, - "step": 237399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.6380093097686768, - "learning_rate": 2.4343634572226953e-05, - "loss": 0.4538, - "step": 237400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.770845890045166, - "loss_rtd": 0.243596151471138, - "loss_sent": 0.12701734900474548, - "loss_sod": 0.15806862711906433, - "loss_total": 0.5286821126937866, - "step": 237499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.631127834320068, - "loss_rtd": 0.22995781898498535, - "loss_sent": 0.13623708486557007, - "loss_sod": 0.013269779272377491, - "loss_total": 0.37946468591690063, - "step": 237499 - }, - { - "epoch": 0.027, - "grad_norm": 1.062378168106079, - "learning_rate": 2.4316402687946755e-05, - "loss": 0.4411, - "step": 237500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.318966388702393, - "loss_rtd": 0.2044476717710495, - "loss_sent": 0.027167636901140213, - "loss_sod": 0.1754985898733139, - "loss_total": 0.4071139097213745, - "step": 237599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.660177230834961, - "loss_rtd": 0.2145908623933792, - "loss_sent": 0.19793200492858887, - "loss_sod": 0.050246983766555786, - "loss_total": 0.46276986598968506, - "step": 237599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.5831185579299927, - "learning_rate": 2.4289181149016483e-05, - "loss": 0.4455, - "step": 237600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.6185126304626465, - "loss_rtd": 0.19363583624362946, - "loss_sent": 0.09224008023738861, - "loss_sod": 0.01841435208916664, - "loss_total": 0.3042902648448944, - "step": 237699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.913099765777588, - "loss_rtd": 0.23436789214611053, - "loss_sent": 0.3045767843723297, - "loss_sod": 0.06714023649692535, - "loss_total": 0.606084942817688, - "step": 237699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.714023768901825, - "learning_rate": 2.426196996640097e-05, - "loss": 0.4609, - "step": 237700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.34343147277832, - "loss_rtd": 0.1957351565361023, - "loss_sent": 0.016168508678674698, - "loss_sod": 0.05185885727405548, - "loss_total": 0.26376253366470337, - "step": 237799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.109145164489746, - "loss_rtd": 0.20797386765480042, - "loss_sent": 0.003358804387971759, - "loss_sod": 0.15427103638648987, - "loss_total": 0.3656037151813507, - "step": 237799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8577890992164612, - "learning_rate": 2.423476915106089e-05, - "loss": 0.4653, - "step": 237800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.908792018890381, - "loss_rtd": 0.2229354977607727, - "loss_sent": 0.1619836539030075, - "loss_sod": 0.019400130957365036, - "loss_total": 0.40431928634643555, - "step": 237899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.720446586608887, - "loss_rtd": 0.2260797917842865, - "loss_sent": 0.09854258596897125, - "loss_sod": 0.11681941896677017, - "loss_total": 0.44144177436828613, - "step": 237899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.316367268562317, - "learning_rate": 2.4207578713952706e-05, - "loss": 0.4573, - "step": 237900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.694923400878906, - "loss_rtd": 0.24636176228523254, - "loss_sent": 0.20768165588378906, - "loss_sod": 0.015640396624803543, - "loss_total": 0.46968382596969604, - "step": 237999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.449057579040527, - "loss_rtd": 0.24466529488563538, - "loss_sent": 0.3436650037765503, - "loss_sod": 0.028440985828638077, - "loss_total": 0.6167712807655334, - "step": 237999 - }, - { - "epoch": 0.028, - "grad_norm": 0.8595840930938721, - "learning_rate": 2.4180398666028726e-05, - "loss": 0.4659, - "step": 238000 - }, - { - "epoch": 0.028, - "eval_loss": 0.4316830039024353, - "eval_runtime": 151.3884, - "eval_samples_per_second": 102.009, - "eval_steps_per_second": 0.799, - "step": 238000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.5816473960876465, - "loss_rtd": 0.23535099625587463, - "loss_sent": 0.05623890459537506, - "loss_sod": 0.11616479605436325, - "loss_total": 0.40775471925735474, - "step": 238099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.348033905029297, - "loss_rtd": 0.21179944276809692, - "loss_sent": 3.586645470932126e-05, - "loss_sod": 0.2043694257736206, - "loss_total": 0.4162047207355499, - "step": 238099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.2257155179977417, - "learning_rate": 2.4153229018237074e-05, - "loss": 0.4564, - "step": 238100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.727396488189697, - "loss_rtd": 0.2434481382369995, - "loss_sent": 0.32008200883865356, - "loss_sod": 0.028842560946941376, - "loss_total": 0.592372715473175, - "step": 238199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.8877692222595215, - "loss_rtd": 0.23744194209575653, - "loss_sent": 0.24149754643440247, - "loss_sod": 0.17500735819339752, - "loss_total": 0.6539468765258789, - "step": 238199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.5632820129394531, - "learning_rate": 2.41260697815217e-05, - "loss": 0.4441, - "step": 238200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.408656120300293, - "loss_rtd": 0.21569550037384033, - "loss_sent": 0.040420565754175186, - "loss_sod": 0.16368474066257477, - "loss_total": 0.4198007881641388, - "step": 238299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.322138786315918, - "loss_rtd": 0.22217020392417908, - "loss_sent": 0.09246724843978882, - "loss_sod": 0.023493852466344833, - "loss_total": 0.338131308555603, - "step": 238299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.2043564319610596, - "learning_rate": 2.4098920966822307e-05, - "loss": 0.4621, - "step": 238300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.709166526794434, - "loss_rtd": 0.23016920685768127, - "loss_sent": 0.20664529502391815, - "loss_sod": 0.0580989234149456, - "loss_total": 0.49491339921951294, - "step": 238399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.634028911590576, - "loss_rtd": 0.22805720567703247, - "loss_sent": 0.11374135315418243, - "loss_sod": 0.0163797028362751, - "loss_total": 0.3581782579421997, - "step": 238399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.6835154891014099, - "learning_rate": 2.4071782585074455e-05, - "loss": 0.448, - "step": 238400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.901325702667236, - "loss_rtd": 0.2259879857301712, - "loss_sent": 0.4070032238960266, - "loss_sod": 0.11817653477191925, - "loss_total": 0.7511677742004395, - "step": 238499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.890055179595947, - "loss_rtd": 0.24064171314239502, - "loss_sent": 0.08593276143074036, - "loss_sod": 0.1841154545545578, - "loss_total": 0.5106899738311768, - "step": 238499 - }, - { - "epoch": 0.029, - "grad_norm": 2.0252127647399902, - "learning_rate": 2.404465464720947e-05, - "loss": 0.452, - "step": 238500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.779674530029297, - "loss_rtd": 0.2189570665359497, - "loss_sent": 0.410211443901062, - "loss_sod": 0.04524721950292587, - "loss_total": 0.6744157075881958, - "step": 238599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.71017599105835, - "loss_rtd": 0.22937899827957153, - "loss_sent": 0.3298669755458832, - "loss_sod": 0.09960901737213135, - "loss_total": 0.6588549613952637, - "step": 238599 - }, - { - "epoch": 0.0292, - "grad_norm": 3.146510124206543, - "learning_rate": 2.4017537164154514e-05, - "loss": 0.4532, - "step": 238600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.4319281578063965, - "loss_rtd": 0.21059221029281616, - "loss_sent": 0.09813720732927322, - "loss_sod": 0.03410068154335022, - "loss_total": 0.342830091714859, - "step": 238699 - }, - { - "epoch": 0.029398, - "loss_gen": 6.038654327392578, - "loss_rtd": 0.22054323554039001, - "loss_sent": 0.17490822076797485, - "loss_sod": 0.1179456114768982, - "loss_total": 0.5133970975875854, - "step": 238699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.0059821605682373, - "learning_rate": 2.399043014683246e-05, - "loss": 0.4392, - "step": 238700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.929017066955566, - "loss_rtd": 0.21582269668579102, - "loss_sent": 0.1245991513133049, - "loss_sod": 0.014260164462029934, - "loss_total": 0.35468199849128723, - "step": 238799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.320408821105957, - "loss_rtd": 0.23964263498783112, - "loss_sent": 0.18945762515068054, - "loss_sod": 0.004603613168001175, - "loss_total": 0.43370386958122253, - "step": 238799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.510923147201538, - "learning_rate": 2.396333360616203e-05, - "loss": 0.4559, - "step": 238800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.772350788116455, - "loss_rtd": 0.2370069921016693, - "loss_sent": 0.094133660197258, - "loss_sod": 0.004237771034240723, - "loss_total": 0.33537840843200684, - "step": 238899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.60974645614624, - "loss_rtd": 0.23583225905895233, - "loss_sent": 0.23875966668128967, - "loss_sod": 0.028905808925628662, - "loss_total": 0.5034977197647095, - "step": 238899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.3758431673049927, - "learning_rate": 2.393624755305771e-05, - "loss": 0.4491, - "step": 238900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.795708656311035, - "loss_rtd": 0.22569571435451508, - "loss_sent": 0.1735101342201233, - "loss_sod": 0.009773963131010532, - "loss_total": 0.4089798331260681, - "step": 238999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.156543254852295, - "loss_rtd": 0.22884215414524078, - "loss_sent": 0.06918361783027649, - "loss_sod": 0.06429454684257507, - "loss_total": 0.36232033371925354, - "step": 238999 - }, - { - "epoch": 0.03, - "grad_norm": 0.682130753993988, - "learning_rate": 2.390917199842978e-05, - "loss": 0.4673, - "step": 239000 - }, - { - "epoch": 0.03, - "eval_loss": 0.42803001403808594, - "eval_runtime": 151.3306, - "eval_samples_per_second": 102.048, - "eval_steps_per_second": 0.8, - "step": 239000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.893220901489258, - "loss_rtd": 0.23933978378772736, - "loss_sent": 0.16924291849136353, - "loss_sod": 0.051185209304094315, - "loss_total": 0.4597679078578949, - "step": 239099 - }, - { - "epoch": 0.030198, - "loss_gen": 6.047658920288086, - "loss_rtd": 0.23218569159507751, - "loss_sent": 0.07819077372550964, - "loss_sod": 0.12189389020204544, - "loss_total": 0.432270348072052, - "step": 239099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.4540042877197266, - "learning_rate": 2.388210695318423e-05, - "loss": 0.4578, - "step": 239100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.5395588874816895, - "loss_rtd": 0.22231055796146393, - "loss_sent": 0.19725970923900604, - "loss_sod": 0.05616962909698486, - "loss_total": 0.47573989629745483, - "step": 239199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.67051887512207, - "loss_rtd": 0.23646663129329681, - "loss_sent": 0.06493733823299408, - "loss_sod": 0.006037513259798288, - "loss_total": 0.30744147300720215, - "step": 239199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.253751516342163, - "learning_rate": 2.385505242822288e-05, - "loss": 0.4412, - "step": 239200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.815771579742432, - "loss_rtd": 0.23468433320522308, - "loss_sent": 0.14919671416282654, - "loss_sod": 0.14313432574272156, - "loss_total": 0.5270153284072876, - "step": 239299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.409769535064697, - "loss_rtd": 0.2294633984565735, - "loss_sent": 0.1363288015127182, - "loss_sod": 0.01859310083091259, - "loss_total": 0.38438528776168823, - "step": 239299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.8529370427131653, - "learning_rate": 2.38280084344433e-05, - "loss": 0.4479, - "step": 239300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.452807903289795, - "loss_rtd": 0.20699205994606018, - "loss_sent": 0.5443159937858582, - "loss_sod": 0.0021774633787572384, - "loss_total": 0.7534855008125305, - "step": 239399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.706867694854736, - "loss_rtd": 0.25227901339530945, - "loss_sent": 0.26974064111709595, - "loss_sod": 0.08123065531253815, - "loss_total": 0.6032503247261047, - "step": 239399 - }, - { - "epoch": 0.0308, - "grad_norm": 2.7138097286224365, - "learning_rate": 2.3800974982738773e-05, - "loss": 0.4626, - "step": 239400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.703469276428223, - "loss_rtd": 0.2405497282743454, - "loss_sent": 0.11308270692825317, - "loss_sod": 0.09160737693309784, - "loss_total": 0.4452398121356964, - "step": 239499 - }, - { - "epoch": 0.030998, - "loss_gen": 4.877888202667236, - "loss_rtd": 0.1974320113658905, - "loss_sent": 0.0006430040230043232, - "loss_sod": 0.08650462329387665, - "loss_total": 0.28457963466644287, - "step": 239499 - }, - { - "epoch": 0.031, - "grad_norm": 0.8747193217277527, - "learning_rate": 2.3773952083998392e-05, - "loss": 0.4376, - "step": 239500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.368193626403809, - "loss_rtd": 0.21049441397190094, - "loss_sent": 0.055834781378507614, - "loss_sod": 0.006824597716331482, - "loss_total": 0.27315378189086914, - "step": 239599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.744250297546387, - "loss_rtd": 0.2513865828514099, - "loss_sent": 0.14889192581176758, - "loss_sod": 0.11034315824508667, - "loss_total": 0.5106216669082642, - "step": 239599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.6689745187759399, - "learning_rate": 2.374693974910697e-05, - "loss": 0.4722, - "step": 239600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.915278911590576, - "loss_rtd": 0.2438460886478424, - "loss_sent": 0.19906951487064362, - "loss_sod": 0.010084887966513634, - "loss_total": 0.4530004858970642, - "step": 239699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.869354724884033, - "loss_rtd": 0.23033317923545837, - "loss_sent": 0.09512707591056824, - "loss_sod": 0.13668489456176758, - "loss_total": 0.4621451497077942, - "step": 239699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.1120641231536865, - "learning_rate": 2.3719937988945102e-05, - "loss": 0.4479, - "step": 239700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.956075668334961, - "loss_rtd": 0.24548287689685822, - "loss_sent": 0.21017757058143616, - "loss_sod": 0.01185193657875061, - "loss_total": 0.4675123989582062, - "step": 239799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.417455673217773, - "loss_rtd": 0.22980546951293945, - "loss_sent": 0.1534159779548645, - "loss_sod": 0.01895293965935707, - "loss_total": 0.4021743834018707, - "step": 239799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.22420072555542, - "learning_rate": 2.3692946814389043e-05, - "loss": 0.4707, - "step": 239800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.628900051116943, - "loss_rtd": 0.2454957365989685, - "loss_sent": 0.1552024483680725, - "loss_sod": 0.003533473936840892, - "loss_total": 0.4042316675186157, - "step": 239899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.544217586517334, - "loss_rtd": 0.2314174324274063, - "loss_sent": 0.1561063677072525, - "loss_sod": 0.0257144495844841, - "loss_total": 0.4132382273674011, - "step": 239899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.113447904586792, - "learning_rate": 2.3665966236310873e-05, - "loss": 0.4529, - "step": 239900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.736084938049316, - "loss_rtd": 0.2461896687746048, - "loss_sent": 0.11952412873506546, - "loss_sod": 0.02386702038347721, - "loss_total": 0.3895808160305023, - "step": 239999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.892647743225098, - "loss_rtd": 0.2216283679008484, - "loss_sent": 0.3390713334083557, - "loss_sod": 0.021314382553100586, - "loss_total": 0.5820140838623047, - "step": 239999 - }, - { - "epoch": 0.032, - "grad_norm": 0.8471458554267883, - "learning_rate": 2.3638996265578345e-05, - "loss": 0.4609, - "step": 240000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4270385801792145, - "eval_runtime": 151.4497, - "eval_samples_per_second": 101.968, - "eval_steps_per_second": 0.799, - "step": 240000 - }, - { - "epoch": 0.000198, - "loss_gen": 6.174821853637695, - "loss_rtd": 0.23269867897033691, - "loss_sent": 0.04734470322728157, - "loss_sod": 0.13130277395248413, - "loss_total": 0.4113461673259735, - "step": 240099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.896604537963867, - "loss_rtd": 0.2383555769920349, - "loss_sent": 0.0619867779314518, - "loss_sod": 0.04733799025416374, - "loss_total": 0.34768033027648926, - "step": 240099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.7818575501441956, - "learning_rate": 2.361203691305499e-05, - "loss": 0.4703, - "step": 240100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.213702201843262, - "loss_rtd": 0.19562645256519318, - "loss_sent": 0.09218383580446243, - "loss_sod": 0.12223028391599655, - "loss_total": 0.41004055738449097, - "step": 240199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.583803653717041, - "loss_rtd": 0.22966305911540985, - "loss_sent": 0.19943755865097046, - "loss_sod": 0.010032592341303825, - "loss_total": 0.4391332268714905, - "step": 240199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.1320916414260864, - "learning_rate": 2.358508818959999e-05, - "loss": 0.4418, - "step": 240200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.706082344055176, - "loss_rtd": 0.2443791776895523, - "loss_sent": 0.0584988035261631, - "loss_sod": 0.11845959722995758, - "loss_total": 0.4213375747203827, - "step": 240299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.8394269943237305, - "loss_rtd": 0.22650183737277985, - "loss_sent": 0.4121139347553253, - "loss_sod": 0.1565793752670288, - "loss_total": 0.7951951026916504, - "step": 240299 - }, - { - "epoch": 0.0006, - "grad_norm": 2.023341655731201, - "learning_rate": 2.355815010606831e-05, - "loss": 0.4467, - "step": 240300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.811026573181152, - "loss_rtd": 0.24636968970298767, - "loss_sent": 0.21656563878059387, - "loss_sod": 0.10607089102268219, - "loss_total": 0.5690062046051025, - "step": 240399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.745444297790527, - "loss_rtd": 0.21473713219165802, - "loss_sent": 0.29169970750808716, - "loss_sod": 0.057059645652770996, - "loss_total": 0.563496470451355, - "step": 240399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.0170907974243164, - "learning_rate": 2.353122267331061e-05, - "loss": 0.449, - "step": 240400 - }, - { - "epoch": 0.000998, - "loss_gen": 6.04369592666626, - "loss_rtd": 0.24859841167926788, - "loss_sent": 0.27929022908210754, - "loss_sod": 0.012861400842666626, - "loss_total": 0.5407500267028809, - "step": 240499 - }, - { - "epoch": 0.000998, - "loss_gen": 6.0217509269714355, - "loss_rtd": 0.23393367230892181, - "loss_sent": 0.15887829661369324, - "loss_sod": 0.0831235870718956, - "loss_total": 0.47593554854393005, - "step": 240499 - }, - { - "epoch": 0.001, - "grad_norm": 0.9431577920913696, - "learning_rate": 2.3504305902173262e-05, - "loss": 0.4288, - "step": 240500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.6050124168396, - "loss_rtd": 0.2500074803829193, - "loss_sent": 0.1043907105922699, - "loss_sod": 0.06603623926639557, - "loss_total": 0.42043444514274597, - "step": 240599 - }, - { - "epoch": 0.001198, - "loss_gen": 6.001707553863525, - "loss_rtd": 0.23530079424381256, - "loss_sent": 0.6919125914573669, - "loss_sod": 0.05928611755371094, - "loss_total": 0.9864994883537292, - "step": 240599 - }, - { - "epoch": 0.0012, - "grad_norm": 3.281320095062256, - "learning_rate": 2.3477399803498314e-05, - "loss": 0.4645, - "step": 240600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.266785621643066, - "loss_rtd": 0.20185698568820953, - "loss_sent": 0.037269651889801025, - "loss_sod": 0.01602785289287567, - "loss_total": 0.25515449047088623, - "step": 240699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.644423007965088, - "loss_rtd": 0.24058449268341064, - "loss_sent": 0.09521439671516418, - "loss_sod": 0.0025251905899494886, - "loss_total": 0.33832406997680664, - "step": 240699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.5941767692565918, - "learning_rate": 2.345050438812355e-05, - "loss": 0.4535, - "step": 240700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.874456882476807, - "loss_rtd": 0.22525304555892944, - "loss_sent": 0.2878045439720154, - "loss_sod": 0.09540347754955292, - "loss_total": 0.6084610819816589, - "step": 240799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.894911766052246, - "loss_rtd": 0.22562730312347412, - "loss_sent": 0.3028585612773895, - "loss_sod": 0.020743096247315407, - "loss_total": 0.5492289662361145, - "step": 240799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.1248347759246826, - "learning_rate": 2.342361966688247e-05, - "loss": 0.4589, - "step": 240800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.601938247680664, - "loss_rtd": 0.23857371509075165, - "loss_sent": 0.1302148997783661, - "loss_sod": 0.009533866308629513, - "loss_total": 0.3783224821090698, - "step": 240899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.81471061706543, - "loss_rtd": 0.2276061475276947, - "loss_sent": 0.10715650767087936, - "loss_sod": 0.024203572422266006, - "loss_total": 0.35896623134613037, - "step": 240899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.6274192929267883, - "learning_rate": 2.3396745650604186e-05, - "loss": 0.4586, - "step": 240900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.213769912719727, - "loss_rtd": 0.20258015394210815, - "loss_sent": 0.028924955055117607, - "loss_sod": 0.012560616247355938, - "loss_total": 0.24406573176383972, - "step": 240999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.9080657958984375, - "loss_rtd": 0.23435091972351074, - "loss_sent": 0.07439293712377548, - "loss_sod": 0.03413313999772072, - "loss_total": 0.34287700057029724, - "step": 240999 - }, - { - "epoch": 0.002, - "grad_norm": 0.6711193919181824, - "learning_rate": 2.336988235011357e-05, - "loss": 0.4375, - "step": 241000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4283449351787567, - "eval_runtime": 162.6705, - "eval_samples_per_second": 94.934, - "eval_steps_per_second": 0.744, - "step": 241000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.6881303787231445, - "loss_rtd": 0.22605517506599426, - "loss_sent": 0.39668649435043335, - "loss_sod": 0.09255596995353699, - "loss_total": 0.7152976393699646, - "step": 241099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.743150234222412, - "loss_rtd": 0.22890755534172058, - "loss_sent": 0.4766629636287689, - "loss_sod": 0.08745452016592026, - "loss_total": 0.793025016784668, - "step": 241099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.7100263833999634, - "learning_rate": 2.3343029776231163e-05, - "loss": 0.4559, - "step": 241100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.571623802185059, - "loss_rtd": 0.22446729242801666, - "loss_sent": 0.1801891028881073, - "loss_sod": 0.024750633165240288, - "loss_total": 0.4294070303440094, - "step": 241199 - }, - { - "epoch": 0.002398, - "loss_gen": 4.958050727844238, - "loss_rtd": 0.20256225764751434, - "loss_sent": 0.01209980994462967, - "loss_sod": 0.20996412634849548, - "loss_total": 0.4246261715888977, - "step": 241199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.115786075592041, - "learning_rate": 2.3316187939773192e-05, - "loss": 0.4641, - "step": 241200 - }, - { - "epoch": 0.002598, - "loss_gen": 4.70155668258667, - "loss_rtd": 0.1794353872537613, - "loss_sent": 0.07801483571529388, - "loss_sod": 0.03325256332755089, - "loss_total": 0.29070279002189636, - "step": 241299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.621855735778809, - "loss_rtd": 0.2466362863779068, - "loss_sent": 0.15442420542240143, - "loss_sod": 0.08801429718732834, - "loss_total": 0.4890747666358948, - "step": 241299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.6202239394187927, - "learning_rate": 2.328935685155151e-05, - "loss": 0.4706, - "step": 241300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.332050800323486, - "loss_rtd": 0.18241283297538757, - "loss_sent": 2.6445484763826244e-05, - "loss_sod": 0.050217967480421066, - "loss_total": 0.23265725374221802, - "step": 241399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.652956485748291, - "loss_rtd": 0.18314598500728607, - "loss_sent": 2.7188658350496553e-05, - "loss_sod": 0.2116314172744751, - "loss_total": 0.39480459690093994, - "step": 241399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.1327468156814575, - "learning_rate": 2.326253652237369e-05, - "loss": 0.4472, - "step": 241400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.280188083648682, - "loss_rtd": 0.21391335129737854, - "loss_sent": 0.02823665179312229, - "loss_sod": 0.02851317636668682, - "loss_total": 0.27066317200660706, - "step": 241499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.112308979034424, - "loss_rtd": 0.21453484892845154, - "loss_sent": 5.337989568943158e-05, - "loss_sod": 0.18758845329284668, - "loss_total": 0.4021766781806946, - "step": 241499 - }, - { - "epoch": 0.003, - "grad_norm": 1.0647000074386597, - "learning_rate": 2.3235726963042966e-05, - "loss": 0.4404, - "step": 241500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.487748146057129, - "loss_rtd": 0.2085038721561432, - "loss_sent": 0.09526938945055008, - "loss_sod": 0.0808418020606041, - "loss_total": 0.38461506366729736, - "step": 241599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.592741966247559, - "loss_rtd": 0.243381530046463, - "loss_sent": 0.07295112311840057, - "loss_sod": 0.06469008326530457, - "loss_total": 0.38102275133132935, - "step": 241599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9712144732475281, - "learning_rate": 2.3208928184358236e-05, - "loss": 0.4447, - "step": 241600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.652876377105713, - "loss_rtd": 0.21591812372207642, - "loss_sent": 0.35284423828125, - "loss_sod": 0.020741023123264313, - "loss_total": 0.5895034074783325, - "step": 241699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.119246006011963, - "loss_rtd": 0.20154471695423126, - "loss_sent": 0.07837800681591034, - "loss_sod": 0.021901678293943405, - "loss_total": 0.3018243908882141, - "step": 241699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.6129971742630005, - "learning_rate": 2.3182140197114012e-05, - "loss": 0.4575, - "step": 241700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.8978400230407715, - "loss_rtd": 0.22588253021240234, - "loss_sent": 0.16254116594791412, - "loss_sod": 0.016494540497660637, - "loss_total": 0.40491825342178345, - "step": 241799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.733808517456055, - "loss_rtd": 0.23193776607513428, - "loss_sent": 0.07303871959447861, - "loss_sod": 0.03720690310001373, - "loss_total": 0.342183381319046, - "step": 241799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.6855172514915466, - "learning_rate": 2.3155363012100507e-05, - "loss": 0.4711, - "step": 241800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.7418437004089355, - "loss_rtd": 0.24587838351726532, - "loss_sent": 0.09250931441783905, - "loss_sod": 0.04317227005958557, - "loss_total": 0.38155996799468994, - "step": 241899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.823653221130371, - "loss_rtd": 0.22120451927185059, - "loss_sent": 0.22145628929138184, - "loss_sod": 0.04665272682905197, - "loss_total": 0.489313542842865, - "step": 241899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.5941023230552673, - "learning_rate": 2.3128596640103574e-05, - "loss": 0.4611, - "step": 241900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.7284016609191895, - "loss_rtd": 0.2311260998249054, - "loss_sent": 0.40789496898651123, - "loss_sod": 0.013776625506579876, - "loss_total": 0.6527976989746094, - "step": 241999 - }, - { - "epoch": 0.003998, - "loss_gen": 6.094147682189941, - "loss_rtd": 0.24135150015354156, - "loss_sent": 0.10780978947877884, - "loss_sod": 0.06081417575478554, - "loss_total": 0.40997546911239624, - "step": 241999 - }, - { - "epoch": 0.004, - "grad_norm": 1.6425068378448486, - "learning_rate": 2.310184109190472e-05, - "loss": 0.4361, - "step": 242000 - }, - { - "epoch": 0.004, - "eval_loss": 0.42943087220191956, - "eval_runtime": 151.5831, - "eval_samples_per_second": 101.878, - "eval_steps_per_second": 0.798, - "step": 242000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.632208347320557, - "loss_rtd": 0.2249802052974701, - "loss_sent": 0.4374160170555115, - "loss_sod": 0.023674190044403076, - "loss_total": 0.686070442199707, - "step": 242099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.471686840057373, - "loss_rtd": 0.23182249069213867, - "loss_sent": 0.16629771888256073, - "loss_sod": 0.03183560073375702, - "loss_total": 0.4299558401107788, - "step": 242099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2165805101394653, - "learning_rate": 2.3075096378281036e-05, - "loss": 0.4352, - "step": 242100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.546979904174805, - "loss_rtd": 0.23156070709228516, - "loss_sent": 0.16346849501132965, - "loss_sod": 0.04800717160105705, - "loss_total": 0.44303637742996216, - "step": 242199 - }, - { - "epoch": 0.004398, - "loss_gen": 6.060455322265625, - "loss_rtd": 0.23157013952732086, - "loss_sent": 0.17300662398338318, - "loss_sod": 0.04722176492214203, - "loss_total": 0.45179852843284607, - "step": 242199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.2987682819366455, - "learning_rate": 2.304836251000531e-05, - "loss": 0.4454, - "step": 242200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.9557905197143555, - "loss_rtd": 0.21919752657413483, - "loss_sent": 0.18476250767707825, - "loss_sod": 0.05176394432783127, - "loss_total": 0.45572397112846375, - "step": 242299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.481745719909668, - "loss_rtd": 0.22046954929828644, - "loss_sent": 0.09188221395015717, - "loss_sod": 0.03570885211229324, - "loss_total": 0.34806060791015625, - "step": 242299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.0712467432022095, - "learning_rate": 2.3021639497845966e-05, - "loss": 0.4677, - "step": 242300 - }, - { - "epoch": 0.004798, - "loss_gen": 6.09511137008667, - "loss_rtd": 0.2507239580154419, - "loss_sent": 0.2072407603263855, - "loss_sod": 0.12444519996643066, - "loss_total": 0.5824099183082581, - "step": 242399 - }, - { - "epoch": 0.004798, - "loss_gen": 6.472561359405518, - "loss_rtd": 0.24314600229263306, - "loss_sent": 0.0769466683268547, - "loss_sod": 0.066665418446064, - "loss_total": 0.38675808906555176, - "step": 242399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.6674302816390991, - "learning_rate": 2.299492735256698e-05, - "loss": 0.4405, - "step": 242400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.394045829772949, - "loss_rtd": 0.2016497105360031, - "loss_sent": 0.01999494433403015, - "loss_sod": 0.1257443130016327, - "loss_total": 0.34738895297050476, - "step": 242499 - }, - { - "epoch": 0.004998, - "loss_gen": 6.0678019523620605, - "loss_rtd": 0.23026862740516663, - "loss_sent": 0.09744556248188019, - "loss_sod": 0.06970666348934174, - "loss_total": 0.39742082357406616, - "step": 242499 - }, - { - "epoch": 0.005, - "grad_norm": 1.0150680541992188, - "learning_rate": 2.2968226084928035e-05, - "loss": 0.456, - "step": 242500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.373544692993164, - "loss_rtd": 0.1858280897140503, - "loss_sent": 0.06773902475833893, - "loss_sod": 0.12823840975761414, - "loss_total": 0.38180553913116455, - "step": 242599 - }, - { - "epoch": 0.005198, - "loss_gen": 6.14440393447876, - "loss_rtd": 0.2186332494020462, - "loss_sent": 0.1912665218114853, - "loss_sod": 0.03565386310219765, - "loss_total": 0.44555366039276123, - "step": 242599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.85086989402771, - "learning_rate": 2.2941535705684385e-05, - "loss": 0.4544, - "step": 242600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.905035972595215, - "loss_rtd": 0.2487083375453949, - "loss_sent": 0.39679527282714844, - "loss_sod": 0.07764381915330887, - "loss_total": 0.7231473922729492, - "step": 242699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.728309154510498, - "loss_rtd": 0.2311766892671585, - "loss_sent": 0.2341116964817047, - "loss_sod": 0.004367607645690441, - "loss_total": 0.46965599060058594, - "step": 242699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.6963841915130615, - "learning_rate": 2.2914856225586933e-05, - "loss": 0.4399, - "step": 242700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.7613043785095215, - "loss_rtd": 0.23951976001262665, - "loss_sent": 0.2711679935455322, - "loss_sod": 0.031781427562236786, - "loss_total": 0.5424691438674927, - "step": 242799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.814478397369385, - "loss_rtd": 0.2483694702386856, - "loss_sent": 0.12304899096488953, - "loss_sod": 0.03288950026035309, - "loss_total": 0.4043079614639282, - "step": 242799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.7700718641281128, - "learning_rate": 2.2888187655382144e-05, - "loss": 0.4451, - "step": 242800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.66154670715332, - "loss_rtd": 0.24562153220176697, - "loss_sent": 0.14087800681591034, - "loss_sod": 0.02246302366256714, - "loss_total": 0.40896254777908325, - "step": 242899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.781575679779053, - "loss_rtd": 0.21482908725738525, - "loss_sent": 0.27028486132621765, - "loss_sod": 0.056306540966033936, - "loss_total": 0.5414204597473145, - "step": 242899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.9976038932800293, - "learning_rate": 2.286153000581212e-05, - "loss": 0.4508, - "step": 242900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.461580276489258, - "loss_rtd": 0.21401362121105194, - "loss_sent": 0.11155346781015396, - "loss_sod": 0.04492160305380821, - "loss_total": 0.3704886734485626, - "step": 242999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.641631126403809, - "loss_rtd": 0.25520673394203186, - "loss_sent": 0.19426937401294708, - "loss_sod": 0.012615029700100422, - "loss_total": 0.4620911478996277, - "step": 242999 - }, - { - "epoch": 0.006, - "grad_norm": 0.9921756982803345, - "learning_rate": 2.283488328761456e-05, - "loss": 0.461, - "step": 243000 - }, - { - "epoch": 0.006, - "eval_loss": 0.42579010128974915, - "eval_runtime": 152.9012, - "eval_samples_per_second": 101.0, - "eval_steps_per_second": 0.791, - "step": 243000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.39358377456665, - "loss_rtd": 0.22952860593795776, - "loss_sent": 0.19048072397708893, - "loss_sod": 0.03400341421365738, - "loss_total": 0.45401275157928467, - "step": 243099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.478324890136719, - "loss_rtd": 0.2199968844652176, - "loss_sent": 0.14730414748191833, - "loss_sod": 0.022844431921839714, - "loss_total": 0.390145480632782, - "step": 243099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.6951247453689575, - "learning_rate": 2.280824751152279e-05, - "loss": 0.4597, - "step": 243100 - }, - { - "epoch": 0.006398, - "loss_gen": 6.15172815322876, - "loss_rtd": 0.2365787923336029, - "loss_sent": 0.192633256316185, - "loss_sod": 0.05162573605775833, - "loss_total": 0.48083776235580444, - "step": 243199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.5926833152771, - "loss_rtd": 0.21989813446998596, - "loss_sent": 0.06581819802522659, - "loss_sod": 0.013996992260217667, - "loss_total": 0.2997133135795593, - "step": 243199 - }, - { - "epoch": 0.0064, - "grad_norm": 2.6375739574432373, - "learning_rate": 2.2781622688265646e-05, - "loss": 0.4613, - "step": 243200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.6530585289001465, - "loss_rtd": 0.25011229515075684, - "loss_sent": 0.14732234179973602, - "loss_sod": 0.00843762420117855, - "loss_total": 0.40587228536605835, - "step": 243299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.328200340270996, - "loss_rtd": 0.24131344258785248, - "loss_sent": 0.07536239176988602, - "loss_sod": 0.04264714941382408, - "loss_total": 0.3593229651451111, - "step": 243299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.6082994341850281, - "learning_rate": 2.2755008828567632e-05, - "loss": 0.4387, - "step": 243300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.879546642303467, - "loss_rtd": 0.23271173238754272, - "loss_sent": 0.07562201470136642, - "loss_sod": 0.06916868686676025, - "loss_total": 0.37750244140625, - "step": 243399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.528488636016846, - "loss_rtd": 0.21066364645957947, - "loss_sent": 6.91495297360234e-05, - "loss_sod": 0.08674241602420807, - "loss_total": 0.2974752187728882, - "step": 243399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.7027512192726135, - "learning_rate": 2.27284059431488e-05, - "loss": 0.4565, - "step": 243400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.234533309936523, - "loss_rtd": 0.1966201215982437, - "loss_sent": 0.08713880181312561, - "loss_sod": 0.2176421582698822, - "loss_total": 0.5014010667800903, - "step": 243499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.716063976287842, - "loss_rtd": 0.25576815009117126, - "loss_sent": 0.28784382343292236, - "loss_sod": 0.04102654382586479, - "loss_total": 0.5846384763717651, - "step": 243499 - }, - { - "epoch": 0.007, - "grad_norm": 1.3425332307815552, - "learning_rate": 2.2701814042724818e-05, - "loss": 0.437, - "step": 243500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.361954689025879, - "loss_rtd": 0.23103067278862, - "loss_sent": 0.23926769196987152, - "loss_sod": 0.020815439522266388, - "loss_total": 0.4911137819290161, - "step": 243599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.695514678955078, - "loss_rtd": 0.23887500166893005, - "loss_sent": 0.21414825320243835, - "loss_sod": 0.06075389310717583, - "loss_total": 0.5137771368026733, - "step": 243599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.6319363117218018, - "learning_rate": 2.267523313800685e-05, - "loss": 0.4598, - "step": 243600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.656366348266602, - "loss_rtd": 0.21935313940048218, - "loss_sent": 0.07514968514442444, - "loss_sod": 0.07651719450950623, - "loss_total": 0.37102001905441284, - "step": 243699 - }, - { - "epoch": 0.007398, - "loss_gen": 6.36208963394165, - "loss_rtd": 0.21408240497112274, - "loss_sent": 0.08127298951148987, - "loss_sod": 0.110919289290905, - "loss_total": 0.406274676322937, - "step": 243699 - }, - { - "epoch": 0.0074, - "grad_norm": 0.9099557399749756, - "learning_rate": 2.2648663239701712e-05, - "loss": 0.4535, - "step": 243700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.040685653686523, - "loss_rtd": 0.18819379806518555, - "loss_sent": 0.013052689842879772, - "loss_sod": 0.08438065648078918, - "loss_total": 0.285627156496048, - "step": 243799 - }, - { - "epoch": 0.007598, - "loss_gen": 6.159510612487793, - "loss_rtd": 0.239523246884346, - "loss_sent": 0.21420960128307343, - "loss_sod": 0.07605443894863129, - "loss_total": 0.5297873020172119, - "step": 243799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.7743996977806091, - "learning_rate": 2.2622104358511742e-05, - "loss": 0.4621, - "step": 243800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.7327985763549805, - "loss_rtd": 0.2255195677280426, - "loss_sent": 0.09750816226005554, - "loss_sod": 0.09191425144672394, - "loss_total": 0.4149419963359833, - "step": 243899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.552008152008057, - "loss_rtd": 0.23785394430160522, - "loss_sent": 0.17532411217689514, - "loss_sod": 0.047099769115448, - "loss_total": 0.46027782559394836, - "step": 243899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.9805784225463867, - "learning_rate": 2.2595556505134885e-05, - "loss": 0.4528, - "step": 243900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.487459659576416, - "loss_rtd": 0.24633139371871948, - "loss_sent": 0.25000327825546265, - "loss_sod": 0.00778064364567399, - "loss_total": 0.5041153430938721, - "step": 243999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.831045150756836, - "loss_rtd": 0.24189917743206024, - "loss_sent": 0.2143426239490509, - "loss_sod": 0.032830022275447845, - "loss_total": 0.4890718162059784, - "step": 243999 - }, - { - "epoch": 0.008, - "grad_norm": 1.0033119916915894, - "learning_rate": 2.2569019690264593e-05, - "loss": 0.455, - "step": 244000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4203929901123047, - "eval_runtime": 151.4455, - "eval_samples_per_second": 101.971, - "eval_steps_per_second": 0.799, - "step": 244000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.603832244873047, - "loss_rtd": 0.23946131765842438, - "loss_sent": 0.12526585161685944, - "loss_sod": 0.01837044395506382, - "loss_total": 0.3830975890159607, - "step": 244099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.641284942626953, - "loss_rtd": 0.22863775491714478, - "loss_sent": 0.18284723162651062, - "loss_sod": 0.01325727254152298, - "loss_total": 0.4247422516345978, - "step": 244099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.6582899689674377, - "learning_rate": 2.254249392458987e-05, - "loss": 0.4511, - "step": 244100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.689499855041504, - "loss_rtd": 0.23861071467399597, - "loss_sent": 0.3746132552623749, - "loss_sod": 0.011193893849849701, - "loss_total": 0.6244179010391235, - "step": 244199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.840762138366699, - "loss_rtd": 0.24167542159557343, - "loss_sent": 0.1296040117740631, - "loss_sod": 0.02931944653391838, - "loss_total": 0.4005988836288452, - "step": 244199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.9416965842247009, - "learning_rate": 2.2515979218795307e-05, - "loss": 0.4384, - "step": 244200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.463130950927734, - "loss_rtd": 0.2265370935201645, - "loss_sent": 0.18656446039676666, - "loss_sod": 0.06559178233146667, - "loss_total": 0.47869330644607544, - "step": 244299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.253119945526123, - "loss_rtd": 0.21707893908023834, - "loss_sent": 0.1347392499446869, - "loss_sod": 0.0169514212757349, - "loss_total": 0.3687696158885956, - "step": 244299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.8346599340438843, - "learning_rate": 2.248947558356105e-05, - "loss": 0.4481, - "step": 244300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.3741230964660645, - "loss_rtd": 0.2366834282875061, - "loss_sent": 0.10139898955821991, - "loss_sod": 0.004884220659732819, - "loss_total": 0.34296661615371704, - "step": 244399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.582619667053223, - "loss_rtd": 0.2204575091600418, - "loss_sent": 0.015588854439556599, - "loss_sod": 0.04638000950217247, - "loss_total": 0.2824263572692871, - "step": 244399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.822998046875, - "learning_rate": 2.246298302956272e-05, - "loss": 0.4303, - "step": 244400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.60807991027832, - "loss_rtd": 0.2295462042093277, - "loss_sent": 0.09245659410953522, - "loss_sod": 0.019276469945907593, - "loss_total": 0.3412792682647705, - "step": 244499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.777159214019775, - "loss_rtd": 0.22936400771141052, - "loss_sent": 0.2728959918022156, - "loss_sod": 0.08682326227426529, - "loss_total": 0.5890832543373108, - "step": 244499 - }, - { - "epoch": 0.009, - "grad_norm": 0.8801591992378235, - "learning_rate": 2.243650156747153e-05, - "loss": 0.4516, - "step": 244500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.761781215667725, - "loss_rtd": 0.24474243819713593, - "loss_sent": 0.2567749321460724, - "loss_sod": 0.10763518512248993, - "loss_total": 0.6091525554656982, - "step": 244599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.992979526519775, - "loss_rtd": 0.22405067086219788, - "loss_sent": 0.09581495076417923, - "loss_sod": 0.01569833606481552, - "loss_total": 0.3355639576911926, - "step": 244599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.0448980331420898, - "learning_rate": 2.2410031207954214e-05, - "loss": 0.4446, - "step": 244600 - }, - { - "epoch": 0.009398, - "loss_gen": 6.007643222808838, - "loss_rtd": 0.2292904257774353, - "loss_sent": 0.11359409242868423, - "loss_sod": 0.04000445455312729, - "loss_total": 0.3828889727592468, - "step": 244699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.532634258270264, - "loss_rtd": 0.21435268223285675, - "loss_sent": 0.0600326769053936, - "loss_sod": 0.010471741668879986, - "loss_total": 0.2848570942878723, - "step": 244699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.7944142818450928, - "learning_rate": 2.2383571961673044e-05, - "loss": 0.4619, - "step": 244700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.7671685218811035, - "loss_rtd": 0.2227422147989273, - "loss_sent": 0.06705641746520996, - "loss_sod": 0.14935719966888428, - "loss_total": 0.43915581703186035, - "step": 244799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.227385520935059, - "loss_rtd": 0.2073536068201065, - "loss_sent": 7.633544009877369e-05, - "loss_sod": 0.07124359160661697, - "loss_total": 0.27867352962493896, - "step": 244799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.815395712852478, - "learning_rate": 2.235712383928577e-05, - "loss": 0.4528, - "step": 244800 - }, - { - "epoch": 0.009798, - "loss_gen": 6.026076316833496, - "loss_rtd": 0.25128811597824097, - "loss_sent": 0.25853249430656433, - "loss_sod": 0.08229227364063263, - "loss_total": 0.5921128988265991, - "step": 244899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.640193939208984, - "loss_rtd": 0.2260315716266632, - "loss_sent": 0.2339726835489273, - "loss_sod": 0.11494414508342743, - "loss_total": 0.5749483704566956, - "step": 244899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.3601337671279907, - "learning_rate": 2.2330686851445702e-05, - "loss": 0.4435, - "step": 244900 - }, - { - "epoch": 0.009998, - "loss_gen": 6.0089850425720215, - "loss_rtd": 0.2364109456539154, - "loss_sent": 0.7876885533332825, - "loss_sod": 0.03947332873940468, - "loss_total": 1.063572883605957, - "step": 244999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.786645412445068, - "loss_rtd": 0.2300453782081604, - "loss_sent": 0.10783194750547409, - "loss_sod": 0.018082313239574432, - "loss_total": 0.3559596538543701, - "step": 244999 - }, - { - "epoch": 0.01, - "grad_norm": 2.2042479515075684, - "learning_rate": 2.2304261008801668e-05, - "loss": 0.4485, - "step": 245000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4339143633842468, - "eval_runtime": 151.4764, - "eval_samples_per_second": 101.95, - "eval_steps_per_second": 0.799, - "step": 245000 - }, - { - "epoch": 0.010198, - "loss_gen": 4.967748641967773, - "loss_rtd": 0.20206503570079803, - "loss_sent": 0.0022498390171676874, - "loss_sod": 0.08894479274749756, - "loss_total": 0.2932596802711487, - "step": 245099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.727007865905762, - "loss_rtd": 0.23747678101062775, - "loss_sent": 0.0795101746916771, - "loss_sod": 0.09804389625787735, - "loss_total": 0.4150308668613434, - "step": 245099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8656148314476013, - "learning_rate": 2.2277846321998004e-05, - "loss": 0.4565, - "step": 245100 - }, - { - "epoch": 0.010398, - "loss_gen": 6.024504661560059, - "loss_rtd": 0.24439726769924164, - "loss_sent": 0.22530032694339752, - "loss_sod": 0.0551542304456234, - "loss_total": 0.5248517990112305, - "step": 245199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.88653564453125, - "loss_rtd": 0.23230203986167908, - "loss_sent": 0.08770927786827087, - "loss_sod": 0.021674348041415215, - "loss_total": 0.3416856527328491, - "step": 245199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.6006246209144592, - "learning_rate": 2.225144280167451e-05, - "loss": 0.4623, - "step": 245200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.808145523071289, - "loss_rtd": 0.23848189413547516, - "loss_sent": 0.2995993494987488, - "loss_sod": 0.056915100663900375, - "loss_total": 0.5949963331222534, - "step": 245299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.709053993225098, - "loss_rtd": 0.2434210479259491, - "loss_sent": 0.22623975574970245, - "loss_sod": 0.0472450815141201, - "loss_total": 0.5169059038162231, - "step": 245299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.4242582321166992, - "learning_rate": 2.2225050458466535e-05, - "loss": 0.448, - "step": 245300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.238818645477295, - "loss_rtd": 0.20706391334533691, - "loss_sent": 0.058697737753391266, - "loss_sod": 0.040166862308979034, - "loss_total": 0.305928498506546, - "step": 245399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.6264166831970215, - "loss_rtd": 0.22170746326446533, - "loss_sent": 0.3966951072216034, - "loss_sod": 0.049822889268398285, - "loss_total": 0.6682254672050476, - "step": 245399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.1491999626159668, - "learning_rate": 2.2198669303004932e-05, - "loss": 0.452, - "step": 245400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.834951877593994, - "loss_rtd": 0.23625729978084564, - "loss_sent": 0.07587853074073792, - "loss_sod": 0.10082794725894928, - "loss_total": 0.41296377778053284, - "step": 245499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.6272478103637695, - "loss_rtd": 0.20926325023174286, - "loss_sent": 0.00026445966796018183, - "loss_sod": 0.1732027232646942, - "loss_total": 0.3827304244041443, - "step": 245499 - }, - { - "epoch": 0.011, - "grad_norm": 1.1503254175186157, - "learning_rate": 2.2172299345915986e-05, - "loss": 0.4421, - "step": 245500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.753690719604492, - "loss_rtd": 0.25438231229782104, - "loss_sent": 0.07825588434934616, - "loss_sod": 0.007483081892132759, - "loss_total": 0.3401212990283966, - "step": 245599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.800004005432129, - "loss_rtd": 0.2196556031703949, - "loss_sent": 0.15227054059505463, - "loss_sod": 0.10250011086463928, - "loss_total": 0.47442626953125, - "step": 245599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.2017216682434082, - "learning_rate": 2.214594059782154e-05, - "loss": 0.4585, - "step": 245600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.600679874420166, - "loss_rtd": 0.20575957000255585, - "loss_sent": 0.0019685945007950068, - "loss_sod": 0.10293687880039215, - "loss_total": 0.3106650412082672, - "step": 245699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.707396507263184, - "loss_rtd": 0.23821589350700378, - "loss_sent": 0.08778704702854156, - "loss_sod": 0.02279324270784855, - "loss_total": 0.34879618883132935, - "step": 245699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.8220459818840027, - "learning_rate": 2.2119593069338895e-05, - "loss": 0.4446, - "step": 245700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.885300636291504, - "loss_rtd": 0.2289028763771057, - "loss_sent": 0.16664643585681915, - "loss_sod": 0.020120171830058098, - "loss_total": 0.4156695008277893, - "step": 245799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.484748363494873, - "loss_rtd": 0.21507808566093445, - "loss_sent": 0.08143315464258194, - "loss_sod": 0.06266427040100098, - "loss_total": 0.35917550325393677, - "step": 245799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.9669427871704102, - "learning_rate": 2.2093256771080838e-05, - "loss": 0.4268, - "step": 245800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.960178375244141, - "loss_rtd": 0.24078325927257538, - "loss_sent": 0.24635076522827148, - "loss_sod": 0.046413302421569824, - "loss_total": 0.5335473418235779, - "step": 245899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.852598667144775, - "loss_rtd": 0.22588245570659637, - "loss_sent": 0.11066222935914993, - "loss_sod": 0.0523991584777832, - "loss_total": 0.3889438509941101, - "step": 245899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.851073145866394, - "learning_rate": 2.2066931713655604e-05, - "loss": 0.4327, - "step": 245900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.353276252746582, - "loss_rtd": 0.19924487173557281, - "loss_sent": 0.16762703657150269, - "loss_sod": 0.13295862078666687, - "loss_total": 0.49983054399490356, - "step": 245999 - }, - { - "epoch": 0.011998, - "loss_gen": 6.044212341308594, - "loss_rtd": 0.22048138082027435, - "loss_sent": 0.32683029770851135, - "loss_sod": 0.10784997045993805, - "loss_total": 0.6551616191864014, - "step": 245999 - }, - { - "epoch": 0.012, - "grad_norm": 1.317622423171997, - "learning_rate": 2.2040617907666934e-05, - "loss": 0.4653, - "step": 246000 - }, - { - "epoch": 0.012, - "eval_loss": 0.42428115010261536, - "eval_runtime": 151.8415, - "eval_samples_per_second": 101.705, - "eval_steps_per_second": 0.797, - "step": 246000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.779860973358154, - "loss_rtd": 0.222878098487854, - "loss_sent": 0.25464460253715515, - "loss_sod": 0.11167255789041519, - "loss_total": 0.5891952514648438, - "step": 246099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.683056354522705, - "loss_rtd": 0.23052029311656952, - "loss_sent": 0.11374194920063019, - "loss_sod": 0.017238955944776535, - "loss_total": 0.36150121688842773, - "step": 246099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.1351783275604248, - "learning_rate": 2.201431536371402e-05, - "loss": 0.4619, - "step": 246100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.850572109222412, - "loss_rtd": 0.2256869077682495, - "loss_sent": 0.3070152699947357, - "loss_sod": 0.01178007572889328, - "loss_total": 0.5444822311401367, - "step": 246199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.7851386070251465, - "loss_rtd": 0.21720394492149353, - "loss_sent": 0.2863521873950958, - "loss_sod": 0.024845104664564133, - "loss_total": 0.528401255607605, - "step": 246199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.4040193557739258, - "learning_rate": 2.1988024092391558e-05, - "loss": 0.4495, - "step": 246200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.6477861404418945, - "loss_rtd": 0.24160976707935333, - "loss_sent": 0.33366549015045166, - "loss_sod": 0.0467105396091938, - "loss_total": 0.6219857931137085, - "step": 246299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.879884243011475, - "loss_rtd": 0.23690751194953918, - "loss_sent": 0.35896140336990356, - "loss_sod": 0.08702407032251358, - "loss_total": 0.6828929781913757, - "step": 246299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.3787297010421753, - "learning_rate": 2.1961744104289618e-05, - "loss": 0.4664, - "step": 246300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.86262321472168, - "loss_rtd": 0.23806585371494293, - "loss_sent": 0.2090684324502945, - "loss_sod": 0.019638799130916595, - "loss_total": 0.4667730927467346, - "step": 246399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.701531887054443, - "loss_rtd": 0.2358715981245041, - "loss_sent": 0.2644566297531128, - "loss_sod": 0.013847066089510918, - "loss_total": 0.514175295829773, - "step": 246399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.3796790838241577, - "learning_rate": 2.19354754099938e-05, - "loss": 0.4634, - "step": 246400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.696658134460449, - "loss_rtd": 0.2243136763572693, - "loss_sent": 0.35249295830726624, - "loss_sod": 0.03702107071876526, - "loss_total": 0.6138277053833008, - "step": 246499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.864236354827881, - "loss_rtd": 0.21939466893672943, - "loss_sent": 0.2594083845615387, - "loss_sod": 0.024586467072367668, - "loss_total": 0.5033895373344421, - "step": 246499 - }, - { - "epoch": 0.013, - "grad_norm": 1.9242634773254395, - "learning_rate": 2.1909218020085136e-05, - "loss": 0.4575, - "step": 246500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.756565570831299, - "loss_rtd": 0.2348550707101822, - "loss_sent": 0.2771225571632385, - "loss_sod": 0.06357685476541519, - "loss_total": 0.5755544900894165, - "step": 246599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.748210906982422, - "loss_rtd": 0.23125533759593964, - "loss_sent": 0.06457473337650299, - "loss_sod": 0.004369403701275587, - "loss_total": 0.3001994788646698, - "step": 246599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0573989152908325, - "learning_rate": 2.1882971945140106e-05, - "loss": 0.4386, - "step": 246600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.897297382354736, - "loss_rtd": 0.22446118295192719, - "loss_sent": 0.5125123858451843, - "loss_sod": 0.07384099066257477, - "loss_total": 0.8108145594596863, - "step": 246699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.738885402679443, - "loss_rtd": 0.20799201726913452, - "loss_sent": 0.140020489692688, - "loss_sod": 0.16425809264183044, - "loss_total": 0.5122705698013306, - "step": 246699 - }, - { - "epoch": 0.0134, - "grad_norm": 2.0589182376861572, - "learning_rate": 2.1856737195730596e-05, - "loss": 0.4539, - "step": 246700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.783360481262207, - "loss_rtd": 0.2284352034330368, - "loss_sent": 0.2370624542236328, - "loss_sod": 0.07437717914581299, - "loss_total": 0.5398748517036438, - "step": 246799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.760488986968994, - "loss_rtd": 0.2174978256225586, - "loss_sent": 0.36561107635498047, - "loss_sod": 0.08976496011018753, - "loss_total": 0.672873854637146, - "step": 246799 - }, - { - "epoch": 0.0136, - "grad_norm": 2.8151371479034424, - "learning_rate": 2.1830513782423984e-05, - "loss": 0.4682, - "step": 246800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.592465877532959, - "loss_rtd": 0.23686639964580536, - "loss_sent": 0.08548545837402344, - "loss_sod": 0.0479353666305542, - "loss_total": 0.3702872395515442, - "step": 246899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.187298774719238, - "loss_rtd": 0.23485742509365082, - "loss_sent": 0.00015608601097483188, - "loss_sod": 0.1666051298379898, - "loss_total": 0.401618629693985, - "step": 246899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.1387323141098022, - "learning_rate": 2.1804301715783064e-05, - "loss": 0.4444, - "step": 246900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.925267696380615, - "loss_rtd": 0.23347876965999603, - "loss_sent": 0.15267211198806763, - "loss_sod": 0.049669016152620316, - "loss_total": 0.4358198940753937, - "step": 246999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.698176383972168, - "loss_rtd": 0.2147320955991745, - "loss_sent": 0.04833608865737915, - "loss_sod": 0.012624384835362434, - "loss_total": 0.27569258213043213, - "step": 246999 - }, - { - "epoch": 0.014, - "grad_norm": 0.7666748762130737, - "learning_rate": 2.1778101006366032e-05, - "loss": 0.4434, - "step": 247000 - }, - { - "epoch": 0.014, - "eval_loss": 0.42115986347198486, - "eval_runtime": 151.5978, - "eval_samples_per_second": 101.868, - "eval_steps_per_second": 0.798, - "step": 247000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.583296775817871, - "loss_rtd": 0.21369054913520813, - "loss_sent": 0.4174363911151886, - "loss_sod": 0.04971133545041084, - "loss_total": 0.6808382868766785, - "step": 247099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.939522743225098, - "loss_rtd": 0.23565998673439026, - "loss_sent": 0.3900221586227417, - "loss_sod": 0.04761674255132675, - "loss_total": 0.6732988953590393, - "step": 247099 - }, - { - "epoch": 0.0142, - "grad_norm": 3.3717472553253174, - "learning_rate": 2.175191166472653e-05, - "loss": 0.446, - "step": 247100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.71102237701416, - "loss_rtd": 0.2300696223974228, - "loss_sent": 0.18655012547969818, - "loss_sod": 0.04130156710743904, - "loss_total": 0.4579213261604309, - "step": 247199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.667944431304932, - "loss_rtd": 0.22791703045368195, - "loss_sent": 0.09452652931213379, - "loss_sod": 0.006408519111573696, - "loss_total": 0.3288520574569702, - "step": 247199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.7964329123497009, - "learning_rate": 2.1725733701413636e-05, - "loss": 0.445, - "step": 247200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.576109409332275, - "loss_rtd": 0.2410363107919693, - "loss_sent": 0.34477588534355164, - "loss_sod": 0.16448423266410828, - "loss_total": 0.7502964735031128, - "step": 247299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.662108421325684, - "loss_rtd": 0.2261047661304474, - "loss_sent": 0.1713200956583023, - "loss_sod": 0.005798437632620335, - "loss_total": 0.40322330594062805, - "step": 247299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.8646548986434937, - "learning_rate": 2.1699567126971843e-05, - "loss": 0.4599, - "step": 247300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.640431880950928, - "loss_rtd": 0.23056964576244354, - "loss_sent": 0.1417578011751175, - "loss_sod": 0.02476336620748043, - "loss_total": 0.3970907926559448, - "step": 247399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.29046106338501, - "loss_rtd": 0.19295956194400787, - "loss_sent": 3.200511127943173e-05, - "loss_sod": 0.0558713860809803, - "loss_total": 0.24886295199394226, - "step": 247399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.0499322414398193, - "learning_rate": 2.167341195194101e-05, - "loss": 0.4416, - "step": 247400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.772815704345703, - "loss_rtd": 0.20959270000457764, - "loss_sent": 0.00048504251753911376, - "loss_sod": 0.1760254204273224, - "loss_total": 0.38610315322875977, - "step": 247499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.480262756347656, - "loss_rtd": 0.19009432196617126, - "loss_sent": 7.621695112902671e-05, - "loss_sod": 0.14515167474746704, - "loss_total": 0.33532220125198364, - "step": 247499 - }, - { - "epoch": 0.015, - "grad_norm": 1.1378663778305054, - "learning_rate": 2.1647268186856452e-05, - "loss": 0.4597, - "step": 247500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.816526889801025, - "loss_rtd": 0.23251159489154816, - "loss_sent": 0.30066436529159546, - "loss_sod": 0.072323277592659, - "loss_total": 0.605499267578125, - "step": 247599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.729894161224365, - "loss_rtd": 0.2259802520275116, - "loss_sent": 0.14471116662025452, - "loss_sod": 0.056692980229854584, - "loss_total": 0.4273844063282013, - "step": 247599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.9972118139266968, - "learning_rate": 2.1621135842248872e-05, - "loss": 0.4519, - "step": 247600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.717024326324463, - "loss_rtd": 0.22244328260421753, - "loss_sent": 0.37024953961372375, - "loss_sod": 0.17716263234615326, - "loss_total": 0.7698554992675781, - "step": 247699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.46368408203125, - "loss_rtd": 0.22375303506851196, - "loss_sent": 0.07559648901224136, - "loss_sod": 0.004048466682434082, - "loss_total": 0.3033979833126068, - "step": 247699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.8086750507354736, - "learning_rate": 2.1595014928644406e-05, - "loss": 0.4459, - "step": 247700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.685397624969482, - "loss_rtd": 0.2309625893831253, - "loss_sent": 0.30750802159309387, - "loss_sod": 0.08355244249105453, - "loss_total": 0.6220230460166931, - "step": 247799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.512424468994141, - "loss_rtd": 0.2207656353712082, - "loss_sent": 0.20813463628292084, - "loss_sod": 0.04484511539340019, - "loss_total": 0.4737454056739807, - "step": 247799 - }, - { - "epoch": 0.0156, - "grad_norm": 0.8984826803207397, - "learning_rate": 2.1568905456564514e-05, - "loss": 0.4439, - "step": 247800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.573122024536133, - "loss_rtd": 0.2387402057647705, - "loss_sent": 0.21148760616779327, - "loss_sod": 0.028351351618766785, - "loss_total": 0.47857916355133057, - "step": 247899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.622769832611084, - "loss_rtd": 0.20884425938129425, - "loss_sent": 0.3075295090675354, - "loss_sod": 0.03448361158370972, - "loss_total": 0.5508573651313782, - "step": 247899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.7045705914497375, - "learning_rate": 2.1542807436526102e-05, - "loss": 0.45, - "step": 247900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.8512678146362305, - "loss_rtd": 0.22766973078250885, - "loss_sent": 0.2784850597381592, - "loss_sod": 0.05045732855796814, - "loss_total": 0.5566121339797974, - "step": 247999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.559403896331787, - "loss_rtd": 0.2467317134141922, - "loss_sent": 0.23199117183685303, - "loss_sod": 0.012821109034121037, - "loss_total": 0.4915440082550049, - "step": 247999 - }, - { - "epoch": 0.016, - "grad_norm": 0.8777672052383423, - "learning_rate": 2.1516720879041453e-05, - "loss": 0.4508, - "step": 248000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4283154308795929, - "eval_runtime": 151.5003, - "eval_samples_per_second": 101.934, - "eval_steps_per_second": 0.799, - "step": 248000 - }, - { - "epoch": 0.016198, - "loss_gen": 6.132227897644043, - "loss_rtd": 0.2485898733139038, - "loss_sent": 0.11270882934331894, - "loss_sod": 0.11559832096099854, - "loss_total": 0.4768970310688019, - "step": 248099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.833548069000244, - "loss_rtd": 0.20343995094299316, - "loss_sent": 0.22904229164123535, - "loss_sod": 0.03247417137026787, - "loss_total": 0.4649564027786255, - "step": 248099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.696287989616394, - "learning_rate": 2.1490645794618247e-05, - "loss": 0.4396, - "step": 248100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.809331893920898, - "loss_rtd": 0.21478372812271118, - "loss_sent": 0.10742197930812836, - "loss_sod": 0.054235316812992096, - "loss_total": 0.37644100189208984, - "step": 248199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.897103786468506, - "loss_rtd": 0.2588536739349365, - "loss_sent": 0.2113327831029892, - "loss_sod": 0.009779886342585087, - "loss_total": 0.47996634244918823, - "step": 248199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.5072851181030273, - "learning_rate": 2.146458219375948e-05, - "loss": 0.4351, - "step": 248200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.730291366577148, - "loss_rtd": 0.2134108990430832, - "loss_sent": 0.11594606935977936, - "loss_sod": 0.023139316588640213, - "loss_total": 0.35249626636505127, - "step": 248299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.768892765045166, - "loss_rtd": 0.22115463018417358, - "loss_sent": 0.25451236963272095, - "loss_sod": 0.08053763210773468, - "loss_total": 0.556204617023468, - "step": 248299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.1511993408203125, - "learning_rate": 2.14385300869636e-05, - "loss": 0.4436, - "step": 248300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.109781742095947, - "loss_rtd": 0.19637225568294525, - "loss_sent": 0.06141408905386925, - "loss_sod": 0.10375300794839859, - "loss_total": 0.361539363861084, - "step": 248399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.668107509613037, - "loss_rtd": 0.21653003990650177, - "loss_sent": 0.2802726924419403, - "loss_sod": 0.1111130565404892, - "loss_total": 0.6079157590866089, - "step": 248399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.1210267543792725, - "learning_rate": 2.1412489484724384e-05, - "loss": 0.448, - "step": 248400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.797348976135254, - "loss_rtd": 0.2264557033777237, - "loss_sent": 0.23551037907600403, - "loss_sod": 0.005144703201949596, - "loss_total": 0.4671107828617096, - "step": 248499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.7537665367126465, - "loss_rtd": 0.21819786727428436, - "loss_sent": 0.3020470142364502, - "loss_sod": 0.0521635077893734, - "loss_total": 0.5724083781242371, - "step": 248499 - }, - { - "epoch": 0.017, - "grad_norm": 2.03287410736084, - "learning_rate": 2.1386460397531e-05, - "loss": 0.4355, - "step": 248500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.910955429077148, - "loss_rtd": 0.23703964054584503, - "loss_sent": 0.055078279227018356, - "loss_sod": 0.07126298546791077, - "loss_total": 0.36338090896606445, - "step": 248599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.778095245361328, - "loss_rtd": 0.2506910562515259, - "loss_sent": 0.18475283682346344, - "loss_sod": 0.09531427919864655, - "loss_total": 0.5307581424713135, - "step": 248599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.9766395688056946, - "learning_rate": 2.1360442835867926e-05, - "loss": 0.4514, - "step": 248600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.89865255355835, - "loss_rtd": 0.22510012984275818, - "loss_sent": 0.13692350685596466, - "loss_sod": 0.041749171912670135, - "loss_total": 0.40377283096313477, - "step": 248699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.703371047973633, - "loss_rtd": 0.21377354860305786, - "loss_sent": 0.09150873869657516, - "loss_sod": 0.08654826879501343, - "loss_total": 0.39183056354522705, - "step": 248699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.8470425605773926, - "learning_rate": 2.133443681021506e-05, - "loss": 0.4387, - "step": 248700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.854787349700928, - "loss_rtd": 0.22065089643001556, - "loss_sent": 0.14864091575145721, - "loss_sod": 0.011109406128525734, - "loss_total": 0.38040122389793396, - "step": 248799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.786114692687988, - "loss_rtd": 0.2307480275630951, - "loss_sent": 0.050676070153713226, - "loss_sod": 0.027572056278586388, - "loss_total": 0.30899617075920105, - "step": 248799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.424483060836792, - "learning_rate": 2.1308442331047634e-05, - "loss": 0.439, - "step": 248800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.779053688049316, - "loss_rtd": 0.2416631579399109, - "loss_sent": 0.14630766212940216, - "loss_sod": 0.06027863174676895, - "loss_total": 0.4482494592666626, - "step": 248899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.042469024658203, - "loss_rtd": 0.19791507720947266, - "loss_sent": 0.006845710799098015, - "loss_sod": 0.05875186622142792, - "loss_total": 0.26351267099380493, - "step": 248899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.9350420236587524, - "learning_rate": 2.1282459408836186e-05, - "loss": 0.4493, - "step": 248900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.95554256439209, - "loss_rtd": 0.2389046549797058, - "loss_sent": 0.6801117062568665, - "loss_sod": 0.030125457793474197, - "loss_total": 0.9491418600082397, - "step": 248999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.599197864532471, - "loss_rtd": 0.225342258810997, - "loss_sent": 0.22508233785629272, - "loss_sod": 0.06279405206441879, - "loss_total": 0.5132186412811279, - "step": 248999 - }, - { - "epoch": 0.018, - "grad_norm": 2.1206676959991455, - "learning_rate": 2.1256488054046658e-05, - "loss": 0.4438, - "step": 249000 - }, - { - "epoch": 0.018, - "eval_loss": 0.42814549803733826, - "eval_runtime": 153.1422, - "eval_samples_per_second": 100.841, - "eval_steps_per_second": 0.79, - "step": 249000 - }, - { - "epoch": 0.018198, - "loss_gen": 6.016199588775635, - "loss_rtd": 0.22620250284671783, - "loss_sent": 0.17632102966308594, - "loss_sod": 0.04826517403125763, - "loss_total": 0.4507887065410614, - "step": 249099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.909693241119385, - "loss_rtd": 0.2267966866493225, - "loss_sent": 0.2964528501033783, - "loss_sod": 0.11600951105356216, - "loss_total": 0.6392590403556824, - "step": 249099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.9242974519729614, - "learning_rate": 2.12305282771403e-05, - "loss": 0.4525, - "step": 249100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.695147514343262, - "loss_rtd": 0.2238437980413437, - "loss_sent": 0.24166767299175262, - "loss_sod": 0.010288280434906483, - "loss_total": 0.4757997393608093, - "step": 249199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.540353775024414, - "loss_rtd": 0.22267025709152222, - "loss_sent": 0.07612654566764832, - "loss_sod": 0.020782165229320526, - "loss_total": 0.31957897543907166, - "step": 249199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.4858189821243286, - "learning_rate": 2.1204580088573733e-05, - "loss": 0.4557, - "step": 249200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.780377388000488, - "loss_rtd": 0.24228976666927338, - "loss_sent": 0.0438273586332798, - "loss_sod": 0.09033120423555374, - "loss_total": 0.3764483332633972, - "step": 249299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.685077667236328, - "loss_rtd": 0.24057133495807648, - "loss_sent": 0.23263029754161835, - "loss_sod": 0.09057365357875824, - "loss_total": 0.5637753009796143, - "step": 249299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.1972358226776123, - "learning_rate": 2.117864349879884e-05, - "loss": 0.4493, - "step": 249300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.7665815353393555, - "loss_rtd": 0.2430795282125473, - "loss_sent": 0.07919485867023468, - "loss_sod": 0.05886111408472061, - "loss_total": 0.3811355233192444, - "step": 249399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.909588813781738, - "loss_rtd": 0.2451922595500946, - "loss_sent": 0.29324105381965637, - "loss_sod": 0.1357181817293167, - "loss_total": 0.6741515398025513, - "step": 249399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.2323169708251953, - "learning_rate": 2.1152718518262903e-05, - "loss": 0.4347, - "step": 249400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.551820755004883, - "loss_rtd": 0.24110427498817444, - "loss_sent": 0.1510792225599289, - "loss_sod": 0.01303178258240223, - "loss_total": 0.4052152633666992, - "step": 249499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.698292255401611, - "loss_rtd": 0.2367483377456665, - "loss_sent": 0.37549740076065063, - "loss_sod": 0.06762387603521347, - "loss_total": 0.6798696517944336, - "step": 249499 - }, - { - "epoch": 0.019, - "grad_norm": 1.4118274450302124, - "learning_rate": 2.1126805157408496e-05, - "loss": 0.4365, - "step": 249500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.511271953582764, - "loss_rtd": 0.20768186450004578, - "loss_sent": 0.14879639446735382, - "loss_sod": 0.07677138596773148, - "loss_total": 0.43324965238571167, - "step": 249599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.441946983337402, - "loss_rtd": 0.24837626516819, - "loss_sent": 0.08175481110811234, - "loss_sod": 0.005567528773099184, - "loss_total": 0.33569860458374023, - "step": 249599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.5600844025611877, - "learning_rate": 2.1100903426673536e-05, - "loss": 0.444, - "step": 249600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.156698703765869, - "loss_rtd": 0.2006874829530716, - "loss_sent": 2.6125118893105537e-05, - "loss_sod": 0.14785784482955933, - "loss_total": 0.34857144951820374, - "step": 249699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.049666404724121, - "loss_rtd": 0.18938666582107544, - "loss_sent": 2.5378220016136765e-05, - "loss_sod": 0.12293696403503418, - "loss_total": 0.31234902143478394, - "step": 249699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.7393021583557129, - "learning_rate": 2.10750133364912e-05, - "loss": 0.4475, - "step": 249700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.47639274597168, - "loss_rtd": 0.21608366072177887, - "loss_sent": 0.03439813107252121, - "loss_sod": 0.12057603895664215, - "loss_total": 0.3710578382015228, - "step": 249799 - }, - { - "epoch": 0.019598, - "loss_gen": 4.983134746551514, - "loss_rtd": 0.1847182959318161, - "loss_sent": 0.03477868810296059, - "loss_sod": 0.050732582807540894, - "loss_total": 0.2702295780181885, - "step": 249799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.823193371295929, - "learning_rate": 2.1049134897290036e-05, - "loss": 0.4292, - "step": 249800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.758688926696777, - "loss_rtd": 0.2276776134967804, - "loss_sent": 0.181236132979393, - "loss_sod": 0.041420139372348785, - "loss_total": 0.4503338932991028, - "step": 249899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.971752166748047, - "loss_rtd": 0.2346062958240509, - "loss_sent": 0.17096562683582306, - "loss_sod": 0.06036635488271713, - "loss_total": 0.4659382700920105, - "step": 249899 - }, - { - "epoch": 0.0198, - "grad_norm": 2.3099324703216553, - "learning_rate": 2.102326811949387e-05, - "loss": 0.4802, - "step": 249900 - }, - { - "epoch": 0.019998, - "loss_gen": 6.1688151359558105, - "loss_rtd": 0.22333301603794098, - "loss_sent": 0.18693652749061584, - "loss_sod": 0.18587328493595123, - "loss_total": 0.5961428284645081, - "step": 249999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.6526384353637695, - "loss_rtd": 0.20717786252498627, - "loss_sent": 0.05291612073779106, - "loss_sod": 0.14070719480514526, - "loss_total": 0.4008011817932129, - "step": 249999 - }, - { - "epoch": 0.02, - "grad_norm": 1.6782922744750977, - "learning_rate": 2.0997413013521867e-05, - "loss": 0.4637, - "step": 250000 - }, - { - "epoch": 0.02, - "eval_loss": 0.4257298409938812, - "eval_runtime": 151.7832, - "eval_samples_per_second": 101.744, - "eval_steps_per_second": 0.797, - "step": 250000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.887287139892578, - "loss_rtd": 0.22037333250045776, - "loss_sent": 0.0781002938747406, - "loss_sod": 0.02560676634311676, - "loss_total": 0.3240804076194763, - "step": 250099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.702293395996094, - "loss_rtd": 0.2283933460712433, - "loss_sent": 0.3841685354709625, - "loss_sod": 0.044053301215171814, - "loss_total": 0.656615138053894, - "step": 250099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.5517466068267822, - "learning_rate": 2.097156958978841e-05, - "loss": 0.455, - "step": 250100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.590933799743652, - "loss_rtd": 0.23131448030471802, - "loss_sent": 0.10019551217556, - "loss_sod": 0.0016662365524098277, - "loss_total": 0.33317622542381287, - "step": 250199 - }, - { - "epoch": 0.020398, - "loss_gen": 6.149798393249512, - "loss_rtd": 0.23659397661685944, - "loss_sent": 0.09647634625434875, - "loss_sod": 0.10330408811569214, - "loss_total": 0.43637439608573914, - "step": 250199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.1645917892456055, - "learning_rate": 2.0945737858703245e-05, - "loss": 0.4274, - "step": 250200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.983415603637695, - "loss_rtd": 0.22915597259998322, - "loss_sent": 0.20124390721321106, - "loss_sod": 0.11508305370807648, - "loss_total": 0.5454829335212708, - "step": 250299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.548913955688477, - "loss_rtd": 0.23363099992275238, - "loss_sent": 0.17175786197185516, - "loss_sod": 0.018295908346772194, - "loss_total": 0.4236847758293152, - "step": 250299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.2545076608657837, - "learning_rate": 2.091991783067142e-05, - "loss": 0.4347, - "step": 250300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.855342864990234, - "loss_rtd": 0.22722971439361572, - "loss_sent": 0.14149326086044312, - "loss_sod": 0.0870337188243866, - "loss_total": 0.45575669407844543, - "step": 250399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.924689292907715, - "loss_rtd": 0.224134162068367, - "loss_sent": 0.02492489479482174, - "loss_sod": 0.03921440616250038, - "loss_total": 0.28827348351478577, - "step": 250399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8212274312973022, - "learning_rate": 2.0894109516093195e-05, - "loss": 0.4465, - "step": 250400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.6750359535217285, - "loss_rtd": 0.2115013748407364, - "loss_sent": 0.0660208910703659, - "loss_sod": 0.07971183955669403, - "loss_total": 0.3572341203689575, - "step": 250499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.898503303527832, - "loss_rtd": 0.22151196002960205, - "loss_sent": 0.5142325758934021, - "loss_sod": 0.05417371541261673, - "loss_total": 0.7899182438850403, - "step": 250499 - }, - { - "epoch": 0.021, - "grad_norm": 2.3125884532928467, - "learning_rate": 2.086831292536418e-05, - "loss": 0.4684, - "step": 250500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.5948405265808105, - "loss_rtd": 0.21999555826187134, - "loss_sent": 0.06595755368471146, - "loss_sod": 0.006651579402387142, - "loss_total": 0.2926046848297119, - "step": 250599 - }, - { - "epoch": 0.021198, - "loss_gen": 6.026972770690918, - "loss_rtd": 0.23303301632404327, - "loss_sent": 0.19365358352661133, - "loss_sod": 0.09179709106683731, - "loss_total": 0.5184836983680725, - "step": 250599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.8277121186256409, - "learning_rate": 2.0842528068875233e-05, - "loss": 0.4451, - "step": 250600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.363009452819824, - "loss_rtd": 0.20621538162231445, - "loss_sent": 0.0007631806074641645, - "loss_sod": 0.21446320414543152, - "loss_total": 0.4214417636394501, - "step": 250699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.159182071685791, - "loss_rtd": 0.1883380264043808, - "loss_sent": 0.00517320167273283, - "loss_sod": 0.0313698910176754, - "loss_total": 0.2248811274766922, - "step": 250699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.0292924642562866, - "learning_rate": 2.0816754957012506e-05, - "loss": 0.4328, - "step": 250700 - }, - { - "epoch": 0.021598, - "loss_gen": 6.009695529937744, - "loss_rtd": 0.21848805248737335, - "loss_sent": 0.14705587923526764, - "loss_sod": 0.1392502337694168, - "loss_total": 0.504794180393219, - "step": 250799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.875698089599609, - "loss_rtd": 0.24496762454509735, - "loss_sent": 0.055516812950372696, - "loss_sod": 0.05094952508807182, - "loss_total": 0.35143396258354187, - "step": 250799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.8969025015830994, - "learning_rate": 2.0790993600157384e-05, - "loss": 0.4484, - "step": 250800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.118767738342285, - "loss_rtd": 0.18534214794635773, - "loss_sent": 0.008448443375527859, - "loss_sod": 0.19007673859596252, - "loss_total": 0.3838673532009125, - "step": 250899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.62872838973999, - "loss_rtd": 0.2451535165309906, - "loss_sent": 0.10492289066314697, - "loss_sod": 0.03780888020992279, - "loss_total": 0.38788527250289917, - "step": 250899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.2935609817504883, - "learning_rate": 2.076524400868654e-05, - "loss": 0.4448, - "step": 250900 - }, - { - "epoch": 0.021998, - "loss_gen": 7.03744649887085, - "loss_rtd": 0.24140574038028717, - "loss_sent": 0.04671387001872063, - "loss_sod": 0.10378556698560715, - "loss_total": 0.39190515875816345, - "step": 250999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.519412517547607, - "loss_rtd": 0.20484577119350433, - "loss_sent": 0.20768705010414124, - "loss_sod": 0.05683238059282303, - "loss_total": 0.4693652093410492, - "step": 250999 - }, - { - "epoch": 0.022, - "grad_norm": 1.482284665107727, - "learning_rate": 2.0739506192971913e-05, - "loss": 0.4452, - "step": 251000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4228754937648773, - "eval_runtime": 151.7485, - "eval_samples_per_second": 101.767, - "eval_steps_per_second": 0.797, - "step": 251000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.65935754776001, - "loss_rtd": 0.1884467601776123, - "loss_sent": 0.1918184906244278, - "loss_sod": 0.10814428329467773, - "loss_total": 0.48840951919555664, - "step": 251099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.581549167633057, - "loss_rtd": 0.23618453741073608, - "loss_sent": 0.17425058782100677, - "loss_sod": 0.05884721875190735, - "loss_total": 0.469282329082489, - "step": 251099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.955230712890625, - "learning_rate": 2.0713780163380712e-05, - "loss": 0.4599, - "step": 251100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.499749660491943, - "loss_rtd": 0.22472791373729706, - "loss_sent": 0.1220015436410904, - "loss_sod": 0.06044648587703705, - "loss_total": 0.4071759581565857, - "step": 251199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.156343460083008, - "loss_rtd": 0.19477254152297974, - "loss_sent": 0.035995736718177795, - "loss_sod": 0.03143073618412018, - "loss_total": 0.2621990144252777, - "step": 251199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.7173367738723755, - "learning_rate": 2.068806593027534e-05, - "loss": 0.4417, - "step": 251200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.773978233337402, - "loss_rtd": 0.260987788438797, - "loss_sent": 0.26252150535583496, - "loss_sod": 0.02137020230293274, - "loss_total": 0.5448794960975647, - "step": 251299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.492867946624756, - "loss_rtd": 0.22154872119426727, - "loss_sent": 0.09729105979204178, - "loss_sod": 0.03462834656238556, - "loss_total": 0.353468120098114, - "step": 251299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.8783969283103943, - "learning_rate": 2.066236350401351e-05, - "loss": 0.4547, - "step": 251300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.690173149108887, - "loss_rtd": 0.2423451989889145, - "loss_sent": 0.5937155485153198, - "loss_sod": 0.03639678657054901, - "loss_total": 0.8724575042724609, - "step": 251399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.853487968444824, - "loss_rtd": 0.21431252360343933, - "loss_sent": 0.2413485199213028, - "loss_sod": 0.21916517615318298, - "loss_total": 0.6748262047767639, - "step": 251399 - }, - { - "epoch": 0.0228, - "grad_norm": 2.0817222595214844, - "learning_rate": 2.063667289494815e-05, - "loss": 0.4588, - "step": 251400 - }, - { - "epoch": 0.022998, - "loss_gen": 6.574832439422607, - "loss_rtd": 0.2396440953016281, - "loss_sent": 0.10621442645788193, - "loss_sod": 0.11426478624343872, - "loss_total": 0.46012333035469055, - "step": 251499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.385630130767822, - "loss_rtd": 0.2209346890449524, - "loss_sent": 0.20252737402915955, - "loss_sod": 0.08124950528144836, - "loss_total": 0.5047115683555603, - "step": 251499 - }, - { - "epoch": 0.023, - "grad_norm": 1.0906034708023071, - "learning_rate": 2.0610994113427452e-05, - "loss": 0.4311, - "step": 251500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.799736022949219, - "loss_rtd": 0.22718507051467896, - "loss_sent": 0.0844174399971962, - "loss_sod": 0.0614926777780056, - "loss_total": 0.37309518456459045, - "step": 251599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.91364860534668, - "loss_rtd": 0.24203763902187347, - "loss_sent": 0.1268770843744278, - "loss_sod": 0.04052715748548508, - "loss_total": 0.40944188833236694, - "step": 251599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.8985000848770142, - "learning_rate": 2.0585327169794793e-05, - "loss": 0.4283, - "step": 251600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.676454544067383, - "loss_rtd": 0.24494819343090057, - "loss_sent": 0.24845416843891144, - "loss_sod": 0.10490782558917999, - "loss_total": 0.5983101725578308, - "step": 251699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.693519115447998, - "loss_rtd": 0.22302430868148804, - "loss_sent": 0.18203288316726685, - "loss_sod": 0.02259897254407406, - "loss_total": 0.4276561737060547, - "step": 251699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.0740926265716553, - "learning_rate": 2.0559672074388835e-05, - "loss": 0.452, - "step": 251700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.71986722946167, - "loss_rtd": 0.21114739775657654, - "loss_sent": 0.38429152965545654, - "loss_sod": 0.03487692400813103, - "loss_total": 0.6303158402442932, - "step": 251799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.775424003601074, - "loss_rtd": 0.24753491580486298, - "loss_sent": 0.1396598368883133, - "loss_sod": 0.023601790890097618, - "loss_total": 0.41079652309417725, - "step": 251799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.0082423686981201, - "learning_rate": 2.053402883754346e-05, - "loss": 0.4547, - "step": 251800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.8386945724487305, - "loss_rtd": 0.23833312094211578, - "loss_sent": 0.39287030696868896, - "loss_sod": 0.045166682451963425, - "loss_total": 0.6763701438903809, - "step": 251899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.735254287719727, - "loss_rtd": 0.21878089010715485, - "loss_sent": 0.24877341091632843, - "loss_sod": 0.04096533730626106, - "loss_total": 0.5085196495056152, - "step": 251899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.951898992061615, - "learning_rate": 2.050839746958773e-05, - "loss": 0.4387, - "step": 251900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.8343682289123535, - "loss_rtd": 0.23846673965454102, - "loss_sent": 0.30951541662216187, - "loss_sod": 0.012829523533582687, - "loss_total": 0.5608116388320923, - "step": 251999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.8856072425842285, - "loss_rtd": 0.21703128516674042, - "loss_sent": 0.37415531277656555, - "loss_sod": 0.09952997416257858, - "loss_total": 0.690716564655304, - "step": 251999 - }, - { - "epoch": 0.024, - "grad_norm": 1.9577677249908447, - "learning_rate": 2.0482777980845972e-05, - "loss": 0.4187, - "step": 252000 - }, - { - "epoch": 0.024, - "eval_loss": 0.42289337515830994, - "eval_runtime": 151.7939, - "eval_samples_per_second": 101.737, - "eval_steps_per_second": 0.797, - "step": 252000 - }, - { - "epoch": 0.024198, - "loss_gen": 4.916977405548096, - "loss_rtd": 0.1900949478149414, - "loss_sent": 2.561290420999285e-05, - "loss_sod": 0.044411174952983856, - "loss_total": 0.23453174531459808, - "step": 252099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.1336259841918945, - "loss_rtd": 0.18254713714122772, - "loss_sent": 0.09441950917243958, - "loss_sod": 0.05666785687208176, - "loss_total": 0.33363449573516846, - "step": 252099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.7168747186660767, - "learning_rate": 2.0457170381637714e-05, - "loss": 0.4347, - "step": 252100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.757220268249512, - "loss_rtd": 0.23494116961956024, - "loss_sent": 0.12362934648990631, - "loss_sod": 0.057746171951293945, - "loss_total": 0.4163166880607605, - "step": 252199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.632613658905029, - "loss_rtd": 0.2236497849225998, - "loss_sent": 0.15610367059707642, - "loss_sod": 0.02144138514995575, - "loss_total": 0.40119484066963196, - "step": 252199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.8283018469810486, - "learning_rate": 2.043157468227771e-05, - "loss": 0.4548, - "step": 252200 - }, - { - "epoch": 0.024598, - "loss_gen": 6.04722261428833, - "loss_rtd": 0.19264057278633118, - "loss_sent": 0.20455722510814667, - "loss_sod": 0.04163656011223793, - "loss_total": 0.43883436918258667, - "step": 252299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.652451515197754, - "loss_rtd": 0.22907041013240814, - "loss_sent": 0.13255144655704498, - "loss_sod": 0.027676325291395187, - "loss_total": 0.3892982006072998, - "step": 252299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.3295665979385376, - "learning_rate": 2.0405990893075866e-05, - "loss": 0.4442, - "step": 252300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.768548965454102, - "loss_rtd": 0.23619456589221954, - "loss_sent": 0.07642235606908798, - "loss_sod": 0.06291034072637558, - "loss_total": 0.3755272626876831, - "step": 252399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.375941276550293, - "loss_rtd": 0.19022639095783234, - "loss_sent": 0.054528672248125076, - "loss_sod": 0.14079278707504272, - "loss_total": 0.38554784655570984, - "step": 252399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.0614683628082275, - "learning_rate": 2.0380419024337355e-05, - "loss": 0.4582, - "step": 252400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.842750072479248, - "loss_rtd": 0.231532484292984, - "loss_sent": 0.11322799324989319, - "loss_sod": 0.006990238558501005, - "loss_total": 0.3517507314682007, - "step": 252499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.830024242401123, - "loss_rtd": 0.21234385669231415, - "loss_sent": 0.1968112587928772, - "loss_sod": 0.031806040555238724, - "loss_total": 0.44096115231513977, - "step": 252499 - }, - { - "epoch": 0.025, - "grad_norm": 0.9424448609352112, - "learning_rate": 2.035485908636254e-05, - "loss": 0.4404, - "step": 252500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.690739154815674, - "loss_rtd": 0.20290978252887726, - "loss_sent": 0.15227241814136505, - "loss_sod": 0.12506070733070374, - "loss_total": 0.48024290800094604, - "step": 252599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.893576622009277, - "loss_rtd": 0.22877617180347443, - "loss_sent": 0.13091930747032166, - "loss_sod": 0.03059357777237892, - "loss_total": 0.3902890682220459, - "step": 252599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.1129803657531738, - "learning_rate": 2.032931108944692e-05, - "loss": 0.4487, - "step": 252600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.083102703094482, - "loss_rtd": 0.17864498496055603, - "loss_sent": 2.9816606911481358e-05, - "loss_sod": 0.037687476724386215, - "loss_total": 0.21636228263378143, - "step": 252699 - }, - { - "epoch": 0.025398, - "loss_gen": 4.865263938903809, - "loss_rtd": 0.16908150911331177, - "loss_sent": 0.011248045600950718, - "loss_sod": 0.03134654462337494, - "loss_total": 0.21167610585689545, - "step": 252699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.7066148519515991, - "learning_rate": 2.0303775043881255e-05, - "loss": 0.4485, - "step": 252700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.265369892120361, - "loss_rtd": 0.1809772104024887, - "loss_sent": 0.05907053127884865, - "loss_sod": 0.04577813670039177, - "loss_total": 0.2858258783817291, - "step": 252799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.592155933380127, - "loss_rtd": 0.2288004457950592, - "loss_sent": 0.06423830986022949, - "loss_sod": 0.024152319878339767, - "loss_total": 0.31719106435775757, - "step": 252799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.5810658931732178, - "learning_rate": 2.0278250959951443e-05, - "loss": 0.4433, - "step": 252800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.966113090515137, - "loss_rtd": 0.21657642722129822, - "loss_sent": 0.11611036211252213, - "loss_sod": 0.046724990010261536, - "loss_total": 0.3794117867946625, - "step": 252899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.758605003356934, - "loss_rtd": 0.21248145401477814, - "loss_sent": 0.24654428660869598, - "loss_sod": 0.09136464446783066, - "loss_total": 0.550390362739563, - "step": 252899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.7527257204055786, - "learning_rate": 2.0252738847938585e-05, - "loss": 0.457, - "step": 252900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.563623905181885, - "loss_rtd": 0.2416469007730484, - "loss_sent": 0.3352299630641937, - "loss_sod": 0.01914307475090027, - "loss_total": 0.596019983291626, - "step": 252999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.356666564941406, - "loss_rtd": 0.2195223718881607, - "loss_sent": 0.06701061874628067, - "loss_sod": 0.10076329112052917, - "loss_total": 0.38729628920555115, - "step": 252999 - }, - { - "epoch": 0.026, - "grad_norm": 1.4748008251190186, - "learning_rate": 2.0227238718118963e-05, - "loss": 0.4332, - "step": 253000 - }, - { - "epoch": 0.026, - "eval_loss": 0.4322918951511383, - "eval_runtime": 151.7236, - "eval_samples_per_second": 101.784, - "eval_steps_per_second": 0.798, - "step": 253000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.863963603973389, - "loss_rtd": 0.2454257756471634, - "loss_sent": 0.1348862200975418, - "loss_sod": 0.04228346049785614, - "loss_total": 0.42259544134140015, - "step": 253099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.711928367614746, - "loss_rtd": 0.22430557012557983, - "loss_sent": 0.3639461398124695, - "loss_sod": 0.04680056497454643, - "loss_total": 0.6350522637367249, - "step": 253099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.656769871711731, - "learning_rate": 2.0201750580764044e-05, - "loss": 0.4645, - "step": 253100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.750680923461914, - "loss_rtd": 0.23668131232261658, - "loss_sent": 0.24485036730766296, - "loss_sod": 0.0677449107170105, - "loss_total": 0.54927659034729, - "step": 253199 - }, - { - "epoch": 0.026398, - "loss_gen": 6.049872398376465, - "loss_rtd": 0.2207106202840805, - "loss_sent": 0.10593386739492416, - "loss_sod": 0.053735025227069855, - "loss_total": 0.3803795278072357, - "step": 253199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.709084153175354, - "learning_rate": 2.017627444614041e-05, - "loss": 0.444, - "step": 253200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.8751959800720215, - "loss_rtd": 0.2263498157262802, - "loss_sent": 0.18755370378494263, - "loss_sod": 0.0030357346404343843, - "loss_total": 0.41693925857543945, - "step": 253299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.824941635131836, - "loss_rtd": 0.2340388298034668, - "loss_sent": 0.10973584651947021, - "loss_sod": 0.06521715223789215, - "loss_total": 0.40899181365966797, - "step": 253299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.080283284187317, - "learning_rate": 2.015081032450986e-05, - "loss": 0.4371, - "step": 253300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.92127799987793, - "loss_rtd": 0.22395184636116028, - "loss_sent": 0.18231117725372314, - "loss_sod": 0.10265080630779266, - "loss_total": 0.5089138150215149, - "step": 253399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.888629913330078, - "loss_rtd": 0.23281361162662506, - "loss_sent": 0.08794432878494263, - "loss_sod": 0.13615782558918, - "loss_total": 0.4569157660007477, - "step": 253399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.3039835691452026, - "learning_rate": 2.012535822612936e-05, - "loss": 0.4643, - "step": 253400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.305265426635742, - "loss_rtd": 0.20122742652893066, - "loss_sent": 0.027874859049916267, - "loss_sod": 0.18004044890403748, - "loss_total": 0.40914273262023926, - "step": 253499 - }, - { - "epoch": 0.026998, - "loss_gen": 6.316204071044922, - "loss_rtd": 0.2375180572271347, - "loss_sent": 0.14653708040714264, - "loss_sod": 0.13144049048423767, - "loss_total": 0.5154955983161926, - "step": 253499 - }, - { - "epoch": 0.027, - "grad_norm": 1.2195602655410767, - "learning_rate": 2.009991816125098e-05, - "loss": 0.4438, - "step": 253500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.774242401123047, - "loss_rtd": 0.22510267794132233, - "loss_sent": 0.13734416663646698, - "loss_sod": 0.01843191497027874, - "loss_total": 0.380878746509552, - "step": 253599 - }, - { - "epoch": 0.027198, - "loss_gen": 6.295194149017334, - "loss_rtd": 0.24192148447036743, - "loss_sent": 0.13870932161808014, - "loss_sod": 0.06025253236293793, - "loss_total": 0.4408833384513855, - "step": 253599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.4461230039596558, - "learning_rate": 2.0074490140121982e-05, - "loss": 0.463, - "step": 253600 - }, - { - "epoch": 0.027398, - "loss_gen": 6.187361717224121, - "loss_rtd": 0.22875021398067474, - "loss_sent": 0.10544523596763611, - "loss_sod": 0.017582543194293976, - "loss_total": 0.3517780005931854, - "step": 253699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.74911642074585, - "loss_rtd": 0.23467105627059937, - "loss_sent": 0.18539391458034515, - "loss_sod": 0.0055173709988594055, - "loss_total": 0.4255823493003845, - "step": 253699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.4206382036209106, - "learning_rate": 2.004907417298478e-05, - "loss": 0.4209, - "step": 253700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.139222145080566, - "loss_rtd": 0.2642526626586914, - "loss_sent": 0.0941305011510849, - "loss_sod": 0.1769954264163971, - "loss_total": 0.5353785753250122, - "step": 253799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.594113826751709, - "loss_rtd": 0.1976829469203949, - "loss_sent": 0.057075586169958115, - "loss_sod": 0.023945963010191917, - "loss_total": 0.2787044942378998, - "step": 253799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.4231631755828857, - "learning_rate": 2.0023670270076937e-05, - "loss": 0.4389, - "step": 253800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.666767597198486, - "loss_rtd": 0.23870056867599487, - "loss_sent": 0.48226192593574524, - "loss_sod": 0.013401873409748077, - "loss_total": 0.73436439037323, - "step": 253899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.971577167510986, - "loss_rtd": 0.20716333389282227, - "loss_sent": 0.24025537073612213, - "loss_sod": 0.07430551946163177, - "loss_total": 0.5217242240905762, - "step": 253899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.488202452659607, - "learning_rate": 1.9998278441631108e-05, - "loss": 0.4418, - "step": 253900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.235413551330566, - "loss_rtd": 0.1910182386636734, - "loss_sent": 3.3657182939350605e-05, - "loss_sod": 0.11094896495342255, - "loss_total": 0.30200085043907166, - "step": 253999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.100149631500244, - "loss_rtd": 0.19616694748401642, - "loss_sent": 0.0688726156949997, - "loss_sod": 0.05267348885536194, - "loss_total": 0.31771305203437805, - "step": 253999 - }, - { - "epoch": 0.028, - "grad_norm": 0.7923054695129395, - "learning_rate": 1.9972898697875135e-05, - "loss": 0.4481, - "step": 254000 - }, - { - "epoch": 0.028, - "eval_loss": 0.41873565316200256, - "eval_runtime": 151.8254, - "eval_samples_per_second": 101.716, - "eval_steps_per_second": 0.797, - "step": 254000 - }, - { - "epoch": 0.028198, - "loss_gen": 4.996367454528809, - "loss_rtd": 0.1675935685634613, - "loss_sent": 3.037181159015745e-05, - "loss_sod": 0.1063089668750763, - "loss_total": 0.27393290400505066, - "step": 254099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.810243129730225, - "loss_rtd": 0.210595965385437, - "loss_sent": 0.3463844954967499, - "loss_sod": 0.040098853409290314, - "loss_total": 0.5970792770385742, - "step": 254099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.048243522644043, - "learning_rate": 1.9947531049031976e-05, - "loss": 0.4489, - "step": 254100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.795935153961182, - "loss_rtd": 0.22435301542282104, - "loss_sent": 0.14384007453918457, - "loss_sod": 0.03588526323437691, - "loss_total": 0.4040783643722534, - "step": 254199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.829339981079102, - "loss_rtd": 0.22517512738704681, - "loss_sent": 0.16799873113632202, - "loss_sod": 0.05237983912229538, - "loss_total": 0.4455536901950836, - "step": 254199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.508608341217041, - "learning_rate": 1.9922175505319733e-05, - "loss": 0.4457, - "step": 254200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.382936000823975, - "loss_rtd": 0.21463479101657867, - "loss_sent": 0.05007823184132576, - "loss_sod": 0.01350468024611473, - "loss_total": 0.27821770310401917, - "step": 254299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.707792282104492, - "loss_rtd": 0.23562106490135193, - "loss_sent": 0.1486457735300064, - "loss_sod": 0.0814094990491867, - "loss_total": 0.46567630767822266, - "step": 254299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.9088292121887207, - "learning_rate": 1.989683207695158e-05, - "loss": 0.4587, - "step": 254300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.139110565185547, - "loss_rtd": 0.1864052563905716, - "loss_sent": 3.037551141460426e-05, - "loss_sod": 0.06559682637453079, - "loss_total": 0.25203245878219604, - "step": 254399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.914858818054199, - "loss_rtd": 0.23055309057235718, - "loss_sent": 0.23952604830265045, - "loss_sod": 0.017169222235679626, - "loss_total": 0.48724836111068726, - "step": 254399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.832278311252594, - "learning_rate": 1.987150077413587e-05, - "loss": 0.4368, - "step": 254400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.868295669555664, - "loss_rtd": 0.21565981209278107, - "loss_sent": 0.13931824266910553, - "loss_sod": 0.09656614810228348, - "loss_total": 0.45154422521591187, - "step": 254499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.699411392211914, - "loss_rtd": 0.21974077820777893, - "loss_sent": 0.2226129174232483, - "loss_sod": 0.04936359450221062, - "loss_total": 0.49171727895736694, - "step": 254499 - }, - { - "epoch": 0.029, - "grad_norm": 2.1245124340057373, - "learning_rate": 1.9846181607076043e-05, - "loss": 0.4423, - "step": 254500 - }, - { - "epoch": 0.029198, - "loss_gen": 6.034182071685791, - "loss_rtd": 0.22618551552295685, - "loss_sent": 0.15084312856197357, - "loss_sod": 0.04063883423805237, - "loss_total": 0.4176675081253052, - "step": 254599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.342142105102539, - "loss_rtd": 0.2164769172668457, - "loss_sent": 0.0017474597552791238, - "loss_sod": 0.11026746779680252, - "loss_total": 0.32849183678627014, - "step": 254599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.2419036626815796, - "learning_rate": 1.982087458597068e-05, - "loss": 0.4458, - "step": 254600 - }, - { - "epoch": 0.029398, - "loss_gen": 4.955076694488525, - "loss_rtd": 0.18739967048168182, - "loss_sent": 0.021308621391654015, - "loss_sod": 0.06322623789310455, - "loss_total": 0.27193453907966614, - "step": 254699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.2264604568481445, - "loss_rtd": 0.19921955466270447, - "loss_sent": 0.07856092602014542, - "loss_sod": 0.011125471442937851, - "loss_total": 0.28890594840049744, - "step": 254699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.5246447920799255, - "learning_rate": 1.97955797210134e-05, - "loss": 0.463, - "step": 254700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.651065826416016, - "loss_rtd": 0.21787527203559875, - "loss_sent": 0.097113236784935, - "loss_sod": 0.048450201749801636, - "loss_total": 0.3634387254714966, - "step": 254799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.7161664962768555, - "loss_rtd": 0.22050975263118744, - "loss_sent": 0.05476810038089752, - "loss_sod": 0.08707212656736374, - "loss_total": 0.3623499870300293, - "step": 254799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.0568890571594238, - "learning_rate": 1.9770297022393004e-05, - "loss": 0.4364, - "step": 254800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.602153778076172, - "loss_rtd": 0.23433373868465424, - "loss_sent": 0.18747679889202118, - "loss_sod": 0.006510594859719276, - "loss_total": 0.42832112312316895, - "step": 254899 - }, - { - "epoch": 0.029798, - "loss_gen": 6.1688151359558105, - "loss_rtd": 0.21581362187862396, - "loss_sent": 0.1440548151731491, - "loss_sod": 0.05405785143375397, - "loss_total": 0.41392630338668823, - "step": 254899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.7897472977638245, - "learning_rate": 1.9745026500293362e-05, - "loss": 0.4545, - "step": 254900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.436579704284668, - "loss_rtd": 0.2115718275308609, - "loss_sent": 0.0863354280591011, - "loss_sod": 0.12983065843582153, - "loss_total": 0.42773789167404175, - "step": 254999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.243215560913086, - "loss_rtd": 0.19306324422359467, - "loss_sent": 0.015153673477470875, - "loss_sod": 0.08352413773536682, - "loss_total": 0.2917410433292389, - "step": 254999 - }, - { - "epoch": 0.03, - "grad_norm": 0.9842661023139954, - "learning_rate": 1.9719768164893415e-05, - "loss": 0.4232, - "step": 255000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4207229018211365, - "eval_runtime": 151.5625, - "eval_samples_per_second": 101.892, - "eval_steps_per_second": 0.798, - "step": 255000 - }, - { - "epoch": 0.030198, - "loss_gen": 6.143173694610596, - "loss_rtd": 0.2157437652349472, - "loss_sent": 0.27789658308029175, - "loss_sod": 0.06362170726060867, - "loss_total": 0.5572620630264282, - "step": 255099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.865470886230469, - "loss_rtd": 0.2301105409860611, - "loss_sent": 0.34119436144828796, - "loss_sod": 0.1609756201505661, - "loss_total": 0.7322804927825928, - "step": 255099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.4151833057403564, - "learning_rate": 1.969452202636723e-05, - "loss": 0.4399, - "step": 255100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.303222179412842, - "loss_rtd": 0.18868422508239746, - "loss_sent": 0.0045908973552286625, - "loss_sod": 0.03284907341003418, - "loss_total": 0.22612419724464417, - "step": 255199 - }, - { - "epoch": 0.030398, - "loss_gen": 6.221683502197266, - "loss_rtd": 0.23939965665340424, - "loss_sent": 0.06910251080989838, - "loss_sod": 0.12874291837215424, - "loss_total": 0.43724507093429565, - "step": 255199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.8739762902259827, - "learning_rate": 1.966928809488395e-05, - "loss": 0.4363, - "step": 255200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.833194255828857, - "loss_rtd": 0.23057061433792114, - "loss_sent": 0.3710896968841553, - "loss_sod": 0.11740000545978546, - "loss_total": 0.7190603017807007, - "step": 255299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.44817590713501, - "loss_rtd": 0.234102264046669, - "loss_sent": 0.0429513044655323, - "loss_sod": 0.004230175167322159, - "loss_total": 0.28128373622894287, - "step": 255299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.7727651596069336, - "learning_rate": 1.964406638060781e-05, - "loss": 0.4217, - "step": 255300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.4819183349609375, - "loss_rtd": 0.22465725243091583, - "loss_sent": 0.1288895606994629, - "loss_sod": 0.059488095343112946, - "loss_total": 0.41303491592407227, - "step": 255399 - }, - { - "epoch": 0.030798, - "loss_gen": 6.006296634674072, - "loss_rtd": 0.22454136610031128, - "loss_sent": 0.07526542991399765, - "loss_sod": 0.05416024476289749, - "loss_total": 0.3539670407772064, - "step": 255399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.1668645143508911, - "learning_rate": 1.961885689369809e-05, - "loss": 0.4417, - "step": 255400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.520474433898926, - "loss_rtd": 0.21842928230762482, - "loss_sent": 0.0738108828663826, - "loss_sod": 0.04817997291684151, - "loss_total": 0.340420126914978, - "step": 255499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.870036602020264, - "loss_rtd": 0.21882574260234833, - "loss_sent": 0.25362205505371094, - "loss_sod": 0.12334321439266205, - "loss_total": 0.5957909822463989, - "step": 255499 - }, - { - "epoch": 0.031, - "grad_norm": 1.005359172821045, - "learning_rate": 1.9593659644309177e-05, - "loss": 0.4443, - "step": 255500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.490281581878662, - "loss_rtd": 0.22614344954490662, - "loss_sent": 0.09881580621004105, - "loss_sod": 0.011842755600810051, - "loss_total": 0.33680200576782227, - "step": 255599 - }, - { - "epoch": 0.031198, - "loss_gen": 6.003447532653809, - "loss_rtd": 0.23837314546108246, - "loss_sent": 0.1303938329219818, - "loss_sod": 0.032098740339279175, - "loss_total": 0.40086570382118225, - "step": 255599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.5261101722717285, - "learning_rate": 1.9568474642590523e-05, - "loss": 0.4544, - "step": 255600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.562786102294922, - "loss_rtd": 0.23503659665584564, - "loss_sent": 0.1858574002981186, - "loss_sod": 0.1714448630809784, - "loss_total": 0.5923388600349426, - "step": 255699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.179942607879639, - "loss_rtd": 0.18859492242336273, - "loss_sent": 0.05462285876274109, - "loss_sod": 0.15915504097938538, - "loss_total": 0.402372807264328, - "step": 255699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.346409559249878, - "learning_rate": 1.9543301898686662e-05, - "loss": 0.4607, - "step": 255700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.731593132019043, - "loss_rtd": 0.2347627878189087, - "loss_sent": 0.4166901409626007, - "loss_sod": 0.03454044088721275, - "loss_total": 0.6859933733940125, - "step": 255799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.529477119445801, - "loss_rtd": 0.2211005538702011, - "loss_sent": 0.07924675196409225, - "loss_sod": 0.05242312699556351, - "loss_total": 0.35277044773101807, - "step": 255799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.2263169288635254, - "learning_rate": 1.9518141422737136e-05, - "loss": 0.4349, - "step": 255800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.775720119476318, - "loss_rtd": 0.2190198451280594, - "loss_sent": 0.21844342350959778, - "loss_sod": 0.016671624034643173, - "loss_total": 0.45413488149642944, - "step": 255899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.653126239776611, - "loss_rtd": 0.2120610624551773, - "loss_sent": 0.1320323646068573, - "loss_sod": 0.046516623347997665, - "loss_total": 0.3906100392341614, - "step": 255899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.464066505432129, - "learning_rate": 1.9492993224876597e-05, - "loss": 0.4434, - "step": 255900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.5872626304626465, - "loss_rtd": 0.22028420865535736, - "loss_sent": 0.14101046323776245, - "loss_sod": 0.11387832462787628, - "loss_total": 0.4751729965209961, - "step": 255999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.998569011688232, - "loss_rtd": 0.21319493651390076, - "loss_sent": 0.19112010300159454, - "loss_sod": 0.07989027351140976, - "loss_total": 0.48420530557632446, - "step": 255999 - }, - { - "epoch": 0.032, - "grad_norm": 1.0494569540023804, - "learning_rate": 1.9467857315234746e-05, - "loss": 0.4404, - "step": 256000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4182792007923126, - "eval_runtime": 151.8291, - "eval_samples_per_second": 101.713, - "eval_steps_per_second": 0.797, - "step": 256000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.719137191772461, - "loss_rtd": 0.2249094843864441, - "loss_sent": 0.11407400667667389, - "loss_sod": 0.015464075841009617, - "loss_total": 0.3544475734233856, - "step": 256099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.7993974685668945, - "loss_rtd": 0.23655447363853455, - "loss_sent": 0.1094830185174942, - "loss_sod": 0.0687658041715622, - "loss_total": 0.41480326652526855, - "step": 256099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.841873824596405, - "learning_rate": 1.944273370393633e-05, - "loss": 0.4443, - "step": 256100 - }, - { - "epoch": 0.000398, - "loss_gen": 6.1038689613342285, - "loss_rtd": 0.23958365619182587, - "loss_sent": 0.35503125190734863, - "loss_sod": 0.028593841940164566, - "loss_total": 0.62320876121521, - "step": 256199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.77200984954834, - "loss_rtd": 0.2335517406463623, - "loss_sent": 0.3759221136569977, - "loss_sod": 0.15240080654621124, - "loss_total": 0.7618746757507324, - "step": 256199 - }, - { - "epoch": 0.0004, - "grad_norm": 2.4883439540863037, - "learning_rate": 1.9417622401101104e-05, - "loss": 0.4547, - "step": 256200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.6341776847839355, - "loss_rtd": 0.20687498152256012, - "loss_sent": 0.10849983990192413, - "loss_sod": 0.0372319258749485, - "loss_total": 0.35260674357414246, - "step": 256299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.907930374145508, - "loss_rtd": 0.20953242480754852, - "loss_sent": 0.14768798649311066, - "loss_sod": 0.03266579285264015, - "loss_total": 0.38988620042800903, - "step": 256299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.8542525768280029, - "learning_rate": 1.939252341684392e-05, - "loss": 0.4305, - "step": 256300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.550402641296387, - "loss_rtd": 0.21594884991645813, - "loss_sent": 0.01939331367611885, - "loss_sod": 0.010465777479112148, - "loss_total": 0.2458079308271408, - "step": 256399 - }, - { - "epoch": 0.000798, - "loss_gen": 4.908761978149414, - "loss_rtd": 0.18330790102481842, - "loss_sent": 2.562065128586255e-05, - "loss_sod": 0.07614215463399887, - "loss_total": 0.2594756782054901, - "step": 256399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.6345007419586182, - "learning_rate": 1.936743676127466e-05, - "loss": 0.442, - "step": 256400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.735369682312012, - "loss_rtd": 0.23683825135231018, - "loss_sent": 0.15582871437072754, - "loss_sod": 0.08239433169364929, - "loss_total": 0.475061297416687, - "step": 256499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.574130058288574, - "loss_rtd": 0.24696214497089386, - "loss_sent": 0.13589608669281006, - "loss_sod": 0.016467146575450897, - "loss_total": 0.3993253707885742, - "step": 256499 - }, - { - "epoch": 0.001, - "grad_norm": 1.1938645839691162, - "learning_rate": 1.9342362444498197e-05, - "loss": 0.4357, - "step": 256500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.991654872894287, - "loss_rtd": 0.2191253900527954, - "loss_sent": 0.3768891394138336, - "loss_sod": 0.06542088091373444, - "loss_total": 0.6614353656768799, - "step": 256599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.852071762084961, - "loss_rtd": 0.21143855154514313, - "loss_sent": 0.11855126172304153, - "loss_sod": 0.06887871026992798, - "loss_total": 0.39886850118637085, - "step": 256599 - }, - { - "epoch": 0.0012, - "grad_norm": 2.1177027225494385, - "learning_rate": 1.931730047661447e-05, - "loss": 0.437, - "step": 256600 - }, - { - "epoch": 0.001398, - "loss_gen": 6.183917999267578, - "loss_rtd": 0.22021916508674622, - "loss_sent": 0.08833926171064377, - "loss_sod": 0.024696988984942436, - "loss_total": 0.333255410194397, - "step": 256699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.445835590362549, - "loss_rtd": 0.22125393152236938, - "loss_sent": 0.08685436099767685, - "loss_sod": 0.099408358335495, - "loss_total": 0.4075166583061218, - "step": 256699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.3161251544952393, - "learning_rate": 1.9292250867718442e-05, - "loss": 0.4439, - "step": 256700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.84780740737915, - "loss_rtd": 0.21963860094547272, - "loss_sent": 0.2052403688430786, - "loss_sod": 0.008837012574076653, - "loss_total": 0.4337159991264343, - "step": 256799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.553376197814941, - "loss_rtd": 0.23662590980529785, - "loss_sent": 0.10431526601314545, - "loss_sod": 0.005601783748716116, - "loss_total": 0.34654295444488525, - "step": 256799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.6843005418777466, - "learning_rate": 1.926721362790011e-05, - "loss": 0.4529, - "step": 256800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.042369842529297, - "loss_rtd": 0.19062404334545135, - "loss_sent": 0.009145115502178669, - "loss_sod": 0.04888478294014931, - "loss_total": 0.24865393340587616, - "step": 256899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.888213157653809, - "loss_rtd": 0.20920048654079437, - "loss_sent": 0.3349314332008362, - "loss_sod": 0.05534731596708298, - "loss_total": 0.5994791984558105, - "step": 256899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.5948286056518555, - "learning_rate": 1.9242188767244433e-05, - "loss": 0.436, - "step": 256900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.962906360626221, - "loss_rtd": 0.22339452803134918, - "loss_sent": 0.2463454008102417, - "loss_sod": 0.014831135049462318, - "loss_total": 0.48457106947898865, - "step": 256999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.909237861633301, - "loss_rtd": 0.2261003702878952, - "loss_sent": 0.14804089069366455, - "loss_sod": 0.010999368503689766, - "loss_total": 0.38514062762260437, - "step": 256999 - }, - { - "epoch": 0.002, - "grad_norm": 1.479538083076477, - "learning_rate": 1.921717629583145e-05, - "loss": 0.4491, - "step": 257000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4171997904777527, - "eval_runtime": 153.5453, - "eval_samples_per_second": 100.576, - "eval_steps_per_second": 0.788, - "step": 257000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.606085300445557, - "loss_rtd": 0.2263501137495041, - "loss_sent": 0.2513013780117035, - "loss_sod": 0.006674719508737326, - "loss_total": 0.48432621359825134, - "step": 257099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.469945907592773, - "loss_rtd": 0.21970972418785095, - "loss_sent": 0.29935726523399353, - "loss_sod": 0.06727207452058792, - "loss_total": 0.5863390564918518, - "step": 257099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.4244379997253418, - "learning_rate": 1.919217622373617e-05, - "loss": 0.4308, - "step": 257100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.54227876663208, - "loss_rtd": 0.24072355031967163, - "loss_sent": 0.22241312265396118, - "loss_sod": 0.11213670670986176, - "loss_total": 0.5752733945846558, - "step": 257199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.939525127410889, - "loss_rtd": 0.22330863773822784, - "loss_sent": 0.1421503871679306, - "loss_sod": 0.12618714570999146, - "loss_total": 0.4916461706161499, - "step": 257199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.5518525838851929, - "learning_rate": 1.9167188561028636e-05, - "loss": 0.4608, - "step": 257200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.404356956481934, - "loss_rtd": 0.2190902829170227, - "loss_sent": 0.191134974360466, - "loss_sod": 0.015676403418183327, - "loss_total": 0.4259016513824463, - "step": 257299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.670596122741699, - "loss_rtd": 0.2514934241771698, - "loss_sent": 0.323307603597641, - "loss_sod": 0.0061821406707167625, - "loss_total": 0.5809831619262695, - "step": 257299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.0254656076431274, - "learning_rate": 1.914221331777385e-05, - "loss": 0.4324, - "step": 257300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.744279384613037, - "loss_rtd": 0.22491784393787384, - "loss_sent": 0.234115868806839, - "loss_sod": 0.039530716836452484, - "loss_total": 0.4985644221305847, - "step": 257399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.65138578414917, - "loss_rtd": 0.22519871592521667, - "loss_sent": 0.03546035289764404, - "loss_sod": 0.03364551067352295, - "loss_total": 0.29430457949638367, - "step": 257399 - }, - { - "epoch": 0.0028, - "grad_norm": 0.7370484471321106, - "learning_rate": 1.911725050403185e-05, - "loss": 0.4411, - "step": 257400 - }, - { - "epoch": 0.002998, - "loss_gen": 6.109411716461182, - "loss_rtd": 0.23645177483558655, - "loss_sent": 0.3965546786785126, - "loss_sod": 0.12229511141777039, - "loss_total": 0.7553015947341919, - "step": 257499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.601687908172607, - "loss_rtd": 0.21877025067806244, - "loss_sent": 0.21668651700019836, - "loss_sod": 0.03357876092195511, - "loss_total": 0.4690355360507965, - "step": 257499 - }, - { - "epoch": 0.003, - "grad_norm": 1.3516509532928467, - "learning_rate": 1.909230012985765e-05, - "loss": 0.4495, - "step": 257500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.896432399749756, - "loss_rtd": 0.2194606363773346, - "loss_sent": 0.11421237885951996, - "loss_sod": 0.1204255074262619, - "loss_total": 0.45409852266311646, - "step": 257599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.730504035949707, - "loss_rtd": 0.23530344665050507, - "loss_sent": 0.08894983679056168, - "loss_sod": 0.12989532947540283, - "loss_total": 0.45414862036705017, - "step": 257599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9213800430297852, - "learning_rate": 1.906736220530128e-05, - "loss": 0.4471, - "step": 257600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.863643646240234, - "loss_rtd": 0.24199290573596954, - "loss_sent": 0.2199094593524933, - "loss_sod": 0.017921190708875656, - "loss_total": 0.4798235595226288, - "step": 257699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.4639058113098145, - "loss_rtd": 0.20808997750282288, - "loss_sent": 0.03998017683625221, - "loss_sod": 0.014196610078215599, - "loss_total": 0.26226675510406494, - "step": 257699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.0237022638320923, - "learning_rate": 1.90424367404077e-05, - "loss": 0.4435, - "step": 257700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.857865810394287, - "loss_rtd": 0.22634819149971008, - "loss_sent": 0.15773418545722961, - "loss_sod": 0.05826546251773834, - "loss_total": 0.44234785437583923, - "step": 257799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.956275939941406, - "loss_rtd": 0.23653164505958557, - "loss_sent": 0.15997876226902008, - "loss_sod": 0.03316502645611763, - "loss_total": 0.42967545986175537, - "step": 257799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.0987085103988647, - "learning_rate": 1.90175237452169e-05, - "loss": 0.4323, - "step": 257800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.447580814361572, - "loss_rtd": 0.24280719459056854, - "loss_sent": 0.26339027285575867, - "loss_sod": 0.05107992887496948, - "loss_total": 0.5572774410247803, - "step": 257899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.788817405700684, - "loss_rtd": 0.23417778313159943, - "loss_sent": 0.058433547616004944, - "loss_sod": 0.06375335156917572, - "loss_total": 0.3563646674156189, - "step": 257899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.2282878160476685, - "learning_rate": 1.899262322976384e-05, - "loss": 0.4527, - "step": 257900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.897584915161133, - "loss_rtd": 0.2256196290254593, - "loss_sent": 0.1502881795167923, - "loss_sod": 0.09983941167593002, - "loss_total": 0.4757472276687622, - "step": 257999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.642777919769287, - "loss_rtd": 0.21140892803668976, - "loss_sent": 0.17053905129432678, - "loss_sod": 0.05413663387298584, - "loss_total": 0.4360845983028412, - "step": 257999 - }, - { - "epoch": 0.004, - "grad_norm": 1.1078600883483887, - "learning_rate": 1.8967735204078423e-05, - "loss": 0.4454, - "step": 258000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4140926003456116, - "eval_runtime": 150.2666, - "eval_samples_per_second": 102.771, - "eval_steps_per_second": 0.805, - "step": 258000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.197231769561768, - "loss_rtd": 0.18180225789546967, - "loss_sent": 2.8777447369066067e-05, - "loss_sod": 0.09882558882236481, - "loss_total": 0.280656635761261, - "step": 258099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.277012348175049, - "loss_rtd": 0.21173299849033356, - "loss_sent": 4.875852391705848e-05, - "loss_sod": 0.07263106107711792, - "loss_total": 0.28441280126571655, - "step": 258099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.8212816715240479, - "learning_rate": 1.8942859678185554e-05, - "loss": 0.423, - "step": 258100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.801881313323975, - "loss_rtd": 0.2259974628686905, - "loss_sent": 0.21458883583545685, - "loss_sod": 0.05621238425374031, - "loss_total": 0.49679869413375854, - "step": 258199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.9818267822265625, - "loss_rtd": 0.22572585940361023, - "loss_sent": 0.0223389845341444, - "loss_sod": 0.05908418819308281, - "loss_total": 0.3071490228176117, - "step": 258199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.1999270915985107, - "learning_rate": 1.8917996662105092e-05, - "loss": 0.4313, - "step": 258200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.425445079803467, - "loss_rtd": 0.2140994518995285, - "loss_sent": 0.15417629480361938, - "loss_sod": 0.004347951151430607, - "loss_total": 0.3726236820220947, - "step": 258299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.947686672210693, - "loss_rtd": 0.23279711604118347, - "loss_sent": 0.29078197479248047, - "loss_sod": 0.013146903365850449, - "loss_total": 0.5367259979248047, - "step": 258299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.7846020460128784, - "learning_rate": 1.8893146165851876e-05, - "loss": 0.4564, - "step": 258300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.848681926727295, - "loss_rtd": 0.2188025861978531, - "loss_sent": 0.2246469408273697, - "loss_sod": 0.08644305914640427, - "loss_total": 0.5298925638198853, - "step": 258399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.731697082519531, - "loss_rtd": 0.23489528894424438, - "loss_sent": 0.17494313418865204, - "loss_sod": 0.0680108591914177, - "loss_total": 0.4778493046760559, - "step": 258399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.1933016777038574, - "learning_rate": 1.8868308199435648e-05, - "loss": 0.4424, - "step": 258400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.807486534118652, - "loss_rtd": 0.23709937930107117, - "loss_sent": 0.09492206573486328, - "loss_sod": 0.04649343714118004, - "loss_total": 0.3785148859024048, - "step": 258499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.812468528747559, - "loss_rtd": 0.22758077085018158, - "loss_sent": 0.24587169289588928, - "loss_sod": 0.012292643077671528, - "loss_total": 0.4857451021671295, - "step": 258499 - }, - { - "epoch": 0.005, - "grad_norm": 1.2961477041244507, - "learning_rate": 1.884348277286115e-05, - "loss": 0.455, - "step": 258500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.979040622711182, - "loss_rtd": 0.257358193397522, - "loss_sent": 0.09835020452737808, - "loss_sod": 0.06101030483841896, - "loss_total": 0.4167186915874481, - "step": 258599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.869791507720947, - "loss_rtd": 0.23474963009357452, - "loss_sent": 0.12189159542322159, - "loss_sod": 0.07525230199098587, - "loss_total": 0.431893527507782, - "step": 258599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.9264788031578064, - "learning_rate": 1.8818669896128066e-05, - "loss": 0.4374, - "step": 258600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.952756881713867, - "loss_rtd": 0.2469499260187149, - "loss_sent": 0.131473109126091, - "loss_sod": 0.06519979238510132, - "loss_total": 0.4436228275299072, - "step": 258699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.444943428039551, - "loss_rtd": 0.2175222784280777, - "loss_sent": 0.023357335478067398, - "loss_sod": 0.1995525360107422, - "loss_total": 0.4404321312904358, - "step": 258699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.4726243019104004, - "learning_rate": 1.8793869579231038e-05, - "loss": 0.4478, - "step": 258700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.217036247253418, - "loss_rtd": 0.20182210206985474, - "loss_sent": 0.0020134393125772476, - "loss_sod": 0.07372411340475082, - "loss_total": 0.27755966782569885, - "step": 258799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.066666603088379, - "loss_rtd": 0.18927043676376343, - "loss_sent": 0.00011247151996940374, - "loss_sod": 0.091739222407341, - "loss_total": 0.2811221480369568, - "step": 258799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.811972439289093, - "learning_rate": 1.8769081832159595e-05, - "loss": 0.4523, - "step": 258800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.779342174530029, - "loss_rtd": 0.20872755348682404, - "loss_sent": 0.4420732855796814, - "loss_sod": 0.017633313313126564, - "loss_total": 0.6684341430664062, - "step": 258899 - }, - { - "epoch": 0.005798, - "loss_gen": 6.020589351654053, - "loss_rtd": 0.2380535751581192, - "loss_sent": 0.21202102303504944, - "loss_sod": 0.010953258723020554, - "loss_total": 0.4610278606414795, - "step": 258899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.0986590385437012, - "learning_rate": 1.8744306664898254e-05, - "loss": 0.4355, - "step": 258900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.813717365264893, - "loss_rtd": 0.23481868207454681, - "loss_sent": 0.3383063077926636, - "loss_sod": 0.0625721737742424, - "loss_total": 0.6356971263885498, - "step": 258999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.8133697509765625, - "loss_rtd": 0.23310688138008118, - "loss_sent": 0.17369568347930908, - "loss_sod": 0.06796613335609436, - "loss_total": 0.4747686982154846, - "step": 258999 - }, - { - "epoch": 0.006, - "grad_norm": 1.0945936441421509, - "learning_rate": 1.871954408742645e-05, - "loss": 0.4318, - "step": 259000 - }, - { - "epoch": 0.006, - "eval_loss": 0.41480275988578796, - "eval_runtime": 149.8703, - "eval_samples_per_second": 103.042, - "eval_steps_per_second": 0.807, - "step": 259000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.685275554656982, - "loss_rtd": 0.23449456691741943, - "loss_sent": 0.20681414008140564, - "loss_sod": 0.06243611499667168, - "loss_total": 0.5037448406219482, - "step": 259099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.759958267211914, - "loss_rtd": 0.21148507297039032, - "loss_sent": 0.09391295164823532, - "loss_sod": 0.032898806035518646, - "loss_total": 0.3382968306541443, - "step": 259099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.5001825094223022, - "learning_rate": 1.8694794109718566e-05, - "loss": 0.4559, - "step": 259100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.817416191101074, - "loss_rtd": 0.22993461787700653, - "loss_sent": 0.5977669358253479, - "loss_sod": 0.05000462383031845, - "loss_total": 0.8777061700820923, - "step": 259199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.833678722381592, - "loss_rtd": 0.24896228313446045, - "loss_sent": 0.12470057606697083, - "loss_sod": 0.02391231432557106, - "loss_total": 0.39757516980171204, - "step": 259199 - }, - { - "epoch": 0.0064, - "grad_norm": 2.281923770904541, - "learning_rate": 1.867005674174385e-05, - "loss": 0.4536, - "step": 259200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.6843647956848145, - "loss_rtd": 0.2394198775291443, - "loss_sent": 0.2764662504196167, - "loss_sod": 0.015383703634142876, - "loss_total": 0.5312697887420654, - "step": 259299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.640019416809082, - "loss_rtd": 0.19201642274856567, - "loss_sent": 0.14057159423828125, - "loss_sod": 0.011013489216566086, - "loss_total": 0.3436015248298645, - "step": 259299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.6997259259223938, - "learning_rate": 1.8645331993466537e-05, - "loss": 0.4352, - "step": 259300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.74385404586792, - "loss_rtd": 0.22940178215503693, - "loss_sent": 0.20560473203659058, - "loss_sod": 0.006837142165750265, - "loss_total": 0.4418436586856842, - "step": 259399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.917254447937012, - "loss_rtd": 0.2073005735874176, - "loss_sent": 0.24624089896678925, - "loss_sod": 0.023274298757314682, - "loss_total": 0.47681576013565063, - "step": 259399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.60682213306427, - "learning_rate": 1.8620619874845746e-05, - "loss": 0.4441, - "step": 259400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.354649543762207, - "loss_rtd": 0.2093132585287094, - "loss_sent": 0.0009695728658698499, - "loss_sod": 0.2127683162689209, - "loss_total": 0.4230511486530304, - "step": 259499 - }, - { - "epoch": 0.006998, - "loss_gen": 4.940433502197266, - "loss_rtd": 0.16785122454166412, - "loss_sent": 0.0041999006643891335, - "loss_sod": 0.016859542578458786, - "loss_total": 0.18891067802906036, - "step": 259499 - }, - { - "epoch": 0.007, - "grad_norm": 0.8403108716011047, - "learning_rate": 1.8595920395835532e-05, - "loss": 0.4378, - "step": 259500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.662302017211914, - "loss_rtd": 0.21706236898899078, - "loss_sent": 0.13679741322994232, - "loss_sod": 0.0185824166983366, - "loss_total": 0.37244218587875366, - "step": 259599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.968424320220947, - "loss_rtd": 0.2268364429473877, - "loss_sent": 0.19327765703201294, - "loss_sod": 0.07941228896379471, - "loss_total": 0.49952638149261475, - "step": 259599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.5976347327232361, - "learning_rate": 1.857123356638481e-05, - "loss": 0.4344, - "step": 259600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.7492146492004395, - "loss_rtd": 0.22425615787506104, - "loss_sent": 0.10312268882989883, - "loss_sod": 0.16404588520526886, - "loss_total": 0.49142470955848694, - "step": 259699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.878417015075684, - "loss_rtd": 0.23800617456436157, - "loss_sent": 0.09379882365465164, - "loss_sod": 0.10820753872394562, - "loss_total": 0.44001251459121704, - "step": 259699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.623661994934082, - "learning_rate": 1.854655939643745e-05, - "loss": 0.4282, - "step": 259700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.168807029724121, - "loss_rtd": 0.18829461932182312, - "loss_sent": 2.6924166377284564e-05, - "loss_sod": 0.05271182209253311, - "loss_total": 0.24103336036205292, - "step": 259799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.36625337600708, - "loss_rtd": 0.20180922746658325, - "loss_sent": 0.054681196808815, - "loss_sod": 0.030089624226093292, - "loss_total": 0.28658002614974976, - "step": 259799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.7371716499328613, - "learning_rate": 1.8521897895932222e-05, - "loss": 0.4344, - "step": 259800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.196185111999512, - "loss_rtd": 0.1803692877292633, - "loss_sent": 0.0004966436536051333, - "loss_sod": 0.10203516483306885, - "loss_total": 0.2829011082649231, - "step": 259899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.8036580085754395, - "loss_rtd": 0.22227637469768524, - "loss_sent": 0.12897543609142303, - "loss_sod": 0.07313300669193268, - "loss_total": 0.42438483238220215, - "step": 259899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.0138169527053833, - "learning_rate": 1.8497249074802737e-05, - "loss": 0.4485, - "step": 259900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.907511234283447, - "loss_rtd": 0.22554604709148407, - "loss_sent": 0.1830756664276123, - "loss_sod": 0.1009528711438179, - "loss_total": 0.5095745921134949, - "step": 259999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.93868350982666, - "loss_rtd": 0.22718757390975952, - "loss_sent": 0.1629742980003357, - "loss_sod": 0.023497367277741432, - "loss_total": 0.4136592447757721, - "step": 259999 - }, - { - "epoch": 0.008, - "grad_norm": 0.7653558254241943, - "learning_rate": 1.8472612942977558e-05, - "loss": 0.4424, - "step": 260000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4210491478443146, - "eval_runtime": 151.5593, - "eval_samples_per_second": 101.894, - "eval_steps_per_second": 0.798, - "step": 260000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.790144920349121, - "loss_rtd": 0.21712996065616608, - "loss_sent": 0.14015312492847443, - "loss_sod": 0.12637047469615936, - "loss_total": 0.48365354537963867, - "step": 260099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.493537902832031, - "loss_rtd": 0.20442447066307068, - "loss_sent": 0.00033282683580182493, - "loss_sod": 0.08371838182210922, - "loss_total": 0.2884756922721863, - "step": 260099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.9041178822517395, - "learning_rate": 1.8447989510380116e-05, - "loss": 0.4436, - "step": 260100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.798304080963135, - "loss_rtd": 0.23409560322761536, - "loss_sent": 0.07618724554777145, - "loss_sod": 0.09535852074623108, - "loss_total": 0.4056413769721985, - "step": 260199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.546088218688965, - "loss_rtd": 0.21580104529857635, - "loss_sent": 0.07127001136541367, - "loss_sod": 0.08646036684513092, - "loss_total": 0.37353143095970154, - "step": 260199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.2357592582702637, - "learning_rate": 1.842337878692874e-05, - "loss": 0.4302, - "step": 260200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.51783561706543, - "loss_rtd": 0.19924241304397583, - "loss_sent": 0.07657813280820847, - "loss_sod": 0.17297813296318054, - "loss_total": 0.44879868626594543, - "step": 260299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.854599475860596, - "loss_rtd": 0.23725444078445435, - "loss_sent": 0.38655468821525574, - "loss_sod": 0.06673618406057358, - "loss_total": 0.6905453205108643, - "step": 260299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.7616416215896606, - "learning_rate": 1.8398780782536602e-05, - "loss": 0.4622, - "step": 260300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.7527265548706055, - "loss_rtd": 0.21186961233615875, - "loss_sent": 0.04811631515622139, - "loss_sod": 0.02911721169948578, - "loss_total": 0.2891031503677368, - "step": 260399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.9584245681762695, - "loss_rtd": 0.23307180404663086, - "loss_sent": 0.11670845746994019, - "loss_sod": 0.03522536903619766, - "loss_total": 0.3850056231021881, - "step": 260399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.0394291877746582, - "learning_rate": 1.837419550711178e-05, - "loss": 0.4443, - "step": 260400 - }, - { - "epoch": 0.008998, - "loss_gen": 6.225304126739502, - "loss_rtd": 0.2223932147026062, - "loss_sent": 0.3683706521987915, - "loss_sod": 0.08619772642850876, - "loss_total": 0.6769616007804871, - "step": 260499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.999357223510742, - "loss_rtd": 0.2185937911272049, - "loss_sent": 0.10943987220525742, - "loss_sod": 0.10983487218618393, - "loss_total": 0.43786853551864624, - "step": 260499 - }, - { - "epoch": 0.009, - "grad_norm": 1.2048826217651367, - "learning_rate": 1.8349622970557227e-05, - "loss": 0.4661, - "step": 260500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.170165061950684, - "loss_rtd": 0.19268710911273956, - "loss_sent": 0.04321198910474777, - "loss_sod": 0.07380912452936172, - "loss_total": 0.30970823764801025, - "step": 260599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.835212230682373, - "loss_rtd": 0.226507306098938, - "loss_sent": 0.09596236795186996, - "loss_sod": 0.09300627559423447, - "loss_total": 0.4154759645462036, - "step": 260599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.9575132727622986, - "learning_rate": 1.8325063182770774e-05, - "loss": 0.4337, - "step": 260600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.77083683013916, - "loss_rtd": 0.21723388135433197, - "loss_sent": 0.2323702722787857, - "loss_sod": 0.006748107261955738, - "loss_total": 0.45635226368904114, - "step": 260699 - }, - { - "epoch": 0.009398, - "loss_gen": 6.030009746551514, - "loss_rtd": 0.23521681129932404, - "loss_sent": 0.20554371178150177, - "loss_sod": 0.04541389271616936, - "loss_total": 0.48617440462112427, - "step": 260699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.8153152465820312, - "learning_rate": 1.830051615364507e-05, - "loss": 0.4336, - "step": 260700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.599565029144287, - "loss_rtd": 0.24138863384723663, - "loss_sent": 0.0987170934677124, - "loss_sod": 0.010491971857845783, - "loss_total": 0.35059770941734314, - "step": 260799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.788488864898682, - "loss_rtd": 0.23278270661830902, - "loss_sent": 0.24295656383037567, - "loss_sod": 0.060434430837631226, - "loss_total": 0.5361737012863159, - "step": 260799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.104736566543579, - "learning_rate": 1.827598189306766e-05, - "loss": 0.4353, - "step": 260800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.7677154541015625, - "loss_rtd": 0.23435553908348083, - "loss_sent": 0.2477198988199234, - "loss_sod": 0.021104900166392326, - "loss_total": 0.5031803250312805, - "step": 260899 - }, - { - "epoch": 0.009798, - "loss_gen": 6.119668960571289, - "loss_rtd": 0.23140205442905426, - "loss_sent": 0.08643033355474472, - "loss_sod": 0.08407612890005112, - "loss_total": 0.4019085168838501, - "step": 260899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.2241572141647339, - "learning_rate": 1.8251460410920955e-05, - "loss": 0.4435, - "step": 260900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.381664752960205, - "loss_rtd": 0.1852429211139679, - "loss_sent": 0.0017346754902973771, - "loss_sod": 0.039578549563884735, - "loss_total": 0.22655615210533142, - "step": 260999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.7723517417907715, - "loss_rtd": 0.2153102159500122, - "loss_sent": 0.10258069634437561, - "loss_sod": 0.12844592332839966, - "loss_total": 0.4463368356227875, - "step": 260999 - }, - { - "epoch": 0.01, - "grad_norm": 0.9164645075798035, - "learning_rate": 1.8226951717082236e-05, - "loss": 0.4296, - "step": 261000 - }, - { - "epoch": 0.01, - "eval_loss": 0.42220592498779297, - "eval_runtime": 150.1395, - "eval_samples_per_second": 102.858, - "eval_steps_per_second": 0.806, - "step": 261000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.770923614501953, - "loss_rtd": 0.22430983185768127, - "loss_sent": 0.15456733107566833, - "loss_sod": 0.08047390729188919, - "loss_total": 0.4593510627746582, - "step": 261099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.349076747894287, - "loss_rtd": 0.1937120109796524, - "loss_sent": 0.010808099992573261, - "loss_sod": 0.06667657941579819, - "loss_total": 0.2711966931819916, - "step": 261099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.9448351263999939, - "learning_rate": 1.820245582142353e-05, - "loss": 0.4284, - "step": 261100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.942781925201416, - "loss_rtd": 0.22013670206069946, - "loss_sent": 0.2352485954761505, - "loss_sod": 0.09197963774204254, - "loss_total": 0.5473649501800537, - "step": 261199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.960268497467041, - "loss_rtd": 0.23670102655887604, - "loss_sent": 0.17174559831619263, - "loss_sod": 0.07600845396518707, - "loss_total": 0.48445507884025574, - "step": 261199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.0700087547302246, - "learning_rate": 1.8177972733811816e-05, - "loss": 0.4491, - "step": 261200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.878331184387207, - "loss_rtd": 0.23307335376739502, - "loss_sent": 0.1402657926082611, - "loss_sod": 0.01706000044941902, - "loss_total": 0.39039915800094604, - "step": 261299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.737022399902344, - "loss_rtd": 0.2178761512041092, - "loss_sent": 0.3490902781486511, - "loss_sod": 0.018797334283590317, - "loss_total": 0.5857637524604797, - "step": 261299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.3782583475112915, - "learning_rate": 1.8153502464108878e-05, - "loss": 0.4297, - "step": 261300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.765710353851318, - "loss_rtd": 0.24119292199611664, - "loss_sent": 0.2397157996892929, - "loss_sod": 0.01448000967502594, - "loss_total": 0.4953887462615967, - "step": 261399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.466281890869141, - "loss_rtd": 0.25056958198547363, - "loss_sent": 0.34469687938690186, - "loss_sod": 0.007440236397087574, - "loss_total": 0.6027066707611084, - "step": 261399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.3865495920181274, - "learning_rate": 1.8129045022171354e-05, - "loss": 0.4478, - "step": 261400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.893030166625977, - "loss_rtd": 0.23232214152812958, - "loss_sent": 0.14779751002788544, - "loss_sod": 0.019544130191206932, - "loss_total": 0.3996638059616089, - "step": 261499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.448110580444336, - "loss_rtd": 0.1919664740562439, - "loss_sent": 0.013673014007508755, - "loss_sod": 0.04023461788892746, - "loss_total": 0.24587410688400269, - "step": 261499 - }, - { - "epoch": 0.011, - "grad_norm": 0.8250448703765869, - "learning_rate": 1.810460041785067e-05, - "loss": 0.4321, - "step": 261500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.925307750701904, - "loss_rtd": 0.22781497240066528, - "loss_sent": 0.3265842795372009, - "loss_sod": 0.02865752950310707, - "loss_total": 0.5830568075180054, - "step": 261599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.71012544631958, - "loss_rtd": 0.21731022000312805, - "loss_sent": 0.409697562456131, - "loss_sod": 0.04533015564084053, - "loss_total": 0.6723379492759705, - "step": 261599 - }, - { - "epoch": 0.0112, - "grad_norm": 3.7671751976013184, - "learning_rate": 1.8080168660993124e-05, - "loss": 0.4422, - "step": 261600 - }, - { - "epoch": 0.011398, - "loss_gen": 6.314149379730225, - "loss_rtd": 0.24056515097618103, - "loss_sent": 0.17447257041931152, - "loss_sod": 0.039770349860191345, - "loss_total": 0.4548080563545227, - "step": 261699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.892195701599121, - "loss_rtd": 0.21015846729278564, - "loss_sent": 0.3552035987377167, - "loss_sod": 0.05402024835348129, - "loss_total": 0.6193823218345642, - "step": 261699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.7854501008987427, - "learning_rate": 1.8055749761439822e-05, - "loss": 0.4344, - "step": 261700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.551679611206055, - "loss_rtd": 0.21133549511432648, - "loss_sent": 0.001736497855745256, - "loss_sod": 0.15424686670303345, - "loss_total": 0.3673188388347626, - "step": 261799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.148797512054443, - "loss_rtd": 0.17374002933502197, - "loss_sent": 2.777163899736479e-05, - "loss_sod": 0.13416633009910583, - "loss_total": 0.30793413519859314, - "step": 261799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.1299976110458374, - "learning_rate": 1.803134372902671e-05, - "loss": 0.4462, - "step": 261800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.854678153991699, - "loss_rtd": 0.23862631618976593, - "loss_sent": 0.1387609839439392, - "loss_sod": 0.022210635244846344, - "loss_total": 0.3995979428291321, - "step": 261899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.819527626037598, - "loss_rtd": 0.21107836067676544, - "loss_sent": 0.19799655675888062, - "loss_sod": 0.13092638552188873, - "loss_total": 0.5400012731552124, - "step": 261899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.9039456248283386, - "learning_rate": 1.8006950573584514e-05, - "loss": 0.4433, - "step": 261900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.435007572174072, - "loss_rtd": 0.19027191400527954, - "loss_sent": 6.607301475014538e-05, - "loss_sod": 0.1539205014705658, - "loss_total": 0.34425848722457886, - "step": 261999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.698180675506592, - "loss_rtd": 0.1957779973745346, - "loss_sent": 0.013719347305595875, - "loss_sod": 0.13571713864803314, - "loss_total": 0.34521448612213135, - "step": 261999 - }, - { - "epoch": 0.012, - "grad_norm": 1.1269047260284424, - "learning_rate": 1.798257030493879e-05, - "loss": 0.4432, - "step": 262000 - }, - { - "epoch": 0.012, - "eval_loss": 0.41653695702552795, - "eval_runtime": 150.2225, - "eval_samples_per_second": 102.801, - "eval_steps_per_second": 0.805, - "step": 262000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.099584102630615, - "loss_rtd": 0.22928960621356964, - "loss_sent": 0.2986092269420624, - "loss_sod": 0.04986407607793808, - "loss_total": 0.5777629017829895, - "step": 262099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.942395210266113, - "loss_rtd": 0.2586112916469574, - "loss_sent": 0.08630359172821045, - "loss_sod": 0.016102178022265434, - "loss_total": 0.36101704835891724, - "step": 262099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.3678045272827148, - "learning_rate": 1.7958202932909924e-05, - "loss": 0.4451, - "step": 262100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.783684730529785, - "loss_rtd": 0.2295273095369339, - "loss_sent": 0.4989902079105377, - "loss_sod": 0.03363502770662308, - "loss_total": 0.7621525526046753, - "step": 262199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.774890422821045, - "loss_rtd": 0.22113268077373505, - "loss_sent": 0.15401792526245117, - "loss_sod": 0.037544529885053635, - "loss_total": 0.41269513964653015, - "step": 262199 - }, - { - "epoch": 0.0124, - "grad_norm": 2.615041971206665, - "learning_rate": 1.7933848467313104e-05, - "loss": 0.4259, - "step": 262200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.877339839935303, - "loss_rtd": 0.22457431256771088, - "loss_sent": 0.28257301449775696, - "loss_sod": 0.0448489785194397, - "loss_total": 0.5519963502883911, - "step": 262299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.756425380706787, - "loss_rtd": 0.21935823559761047, - "loss_sent": 0.12605126202106476, - "loss_sod": 0.0387648269534111, - "loss_total": 0.3841743469238281, - "step": 262299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.8164938688278198, - "learning_rate": 1.7909506917958263e-05, - "loss": 0.4672, - "step": 262300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.550956726074219, - "loss_rtd": 0.21570302546024323, - "loss_sent": 0.11563875526189804, - "loss_sod": 0.07274002581834793, - "loss_total": 0.4040818214416504, - "step": 262399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.435138702392578, - "loss_rtd": 0.2403213530778885, - "loss_sent": 0.10892730951309204, - "loss_sod": 0.007292766589671373, - "loss_total": 0.3565414249897003, - "step": 262399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.2493977546691895, - "learning_rate": 1.78851782946502e-05, - "loss": 0.4354, - "step": 262400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.865581035614014, - "loss_rtd": 0.227000892162323, - "loss_sent": 0.1196775734424591, - "loss_sod": 0.03792010247707367, - "loss_total": 0.38459858298301697, - "step": 262499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.682101726531982, - "loss_rtd": 0.23281818628311157, - "loss_sent": 0.1366461217403412, - "loss_sod": 0.26875850558280945, - "loss_total": 0.6382228136062622, - "step": 262499 - }, - { - "epoch": 0.013, - "grad_norm": 1.4447888135910034, - "learning_rate": 1.78608626071885e-05, - "loss": 0.4301, - "step": 262500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.692393779754639, - "loss_rtd": 0.23628023266792297, - "loss_sent": 0.11476903408765793, - "loss_sod": 0.07787059247493744, - "loss_total": 0.42891985177993774, - "step": 262599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.288466930389404, - "loss_rtd": 0.19595885276794434, - "loss_sent": 0.040694888681173325, - "loss_sod": 0.09491953998804092, - "loss_total": 0.3315732777118683, - "step": 262599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.9164319038391113, - "learning_rate": 1.783655986536748e-05, - "loss": 0.4444, - "step": 262600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.399204730987549, - "loss_rtd": 0.19503091275691986, - "loss_sent": 0.06113690510392189, - "loss_sod": 0.06586134433746338, - "loss_total": 0.322029173374176, - "step": 262699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.644018173217773, - "loss_rtd": 0.23641744256019592, - "loss_sent": 0.29530683159828186, - "loss_sod": 0.04435921087861061, - "loss_total": 0.5760834813117981, - "step": 262699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.6195112466812134, - "learning_rate": 1.7812270078976295e-05, - "loss": 0.4474, - "step": 262700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.9916300773620605, - "loss_rtd": 0.23040005564689636, - "loss_sent": 0.12761488556861877, - "loss_sod": 0.03742263466119766, - "loss_total": 0.3954375684261322, - "step": 262799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.535221099853516, - "loss_rtd": 0.2318277657032013, - "loss_sent": 0.1480415314435959, - "loss_sod": 0.013859817758202553, - "loss_total": 0.3937291204929352, - "step": 262799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.9762578010559082, - "learning_rate": 1.778799325779888e-05, - "loss": 0.4293, - "step": 262800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.449334144592285, - "loss_rtd": 0.2052937150001526, - "loss_sent": 0.11499036848545074, - "loss_sod": 0.007230848073959351, - "loss_total": 0.3275149464607239, - "step": 262899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.600157260894775, - "loss_rtd": 0.21847011148929596, - "loss_sent": 0.2771449089050293, - "loss_sod": 0.02621576003730297, - "loss_total": 0.5218307971954346, - "step": 262899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.9009488821029663, - "learning_rate": 1.7763729411613943e-05, - "loss": 0.4548, - "step": 262900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.869690895080566, - "loss_rtd": 0.22194433212280273, - "loss_sent": 0.2831106185913086, - "loss_sod": 0.06669734418392181, - "loss_total": 0.5717523097991943, - "step": 262999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.614821910858154, - "loss_rtd": 0.2193174958229065, - "loss_sent": 0.18294291198253632, - "loss_sod": 0.017757045105099678, - "loss_total": 0.42001745104789734, - "step": 262999 - }, - { - "epoch": 0.014, - "grad_norm": 0.8729706406593323, - "learning_rate": 1.7739478550194928e-05, - "loss": 0.4354, - "step": 263000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4197606146335602, - "eval_runtime": 150.1876, - "eval_samples_per_second": 102.825, - "eval_steps_per_second": 0.806, - "step": 263000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.513040542602539, - "loss_rtd": 0.20443759858608246, - "loss_sent": 0.26258519291877747, - "loss_sod": 0.019108762964606285, - "loss_total": 0.48613154888153076, - "step": 263099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.50419807434082, - "loss_rtd": 0.18771882355213165, - "loss_sent": 0.01435436587780714, - "loss_sod": 0.06438340246677399, - "loss_total": 0.26645660400390625, - "step": 263099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.1289606094360352, - "learning_rate": 1.771524068331009e-05, - "loss": 0.4311, - "step": 263100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.496088981628418, - "loss_rtd": 0.19501489400863647, - "loss_sent": 0.07023801654577255, - "loss_sod": 0.003959077410399914, - "loss_total": 0.2692119777202606, - "step": 263199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.87968635559082, - "loss_rtd": 0.20503193140029907, - "loss_sent": 0.2680976688861847, - "loss_sod": 0.0630282461643219, - "loss_total": 0.5361578464508057, - "step": 263199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.8410677909851074, - "learning_rate": 1.7691015820722445e-05, - "loss": 0.4433, - "step": 263200 - }, - { - "epoch": 0.014598, - "loss_gen": 6.038082122802734, - "loss_rtd": 0.21350103616714478, - "loss_sent": 0.11250603944063187, - "loss_sod": 0.07663729786872864, - "loss_total": 0.4026443660259247, - "step": 263299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.946733474731445, - "loss_rtd": 0.24036170542240143, - "loss_sent": 0.0938752144575119, - "loss_sod": 0.025490881875157356, - "loss_total": 0.35972779989242554, - "step": 263299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.75941401720047, - "learning_rate": 1.7666803972189787e-05, - "loss": 0.4478, - "step": 263300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.910594463348389, - "loss_rtd": 0.22480866312980652, - "loss_sent": 0.1361338347196579, - "loss_sod": 0.10118836164474487, - "loss_total": 0.4621308445930481, - "step": 263399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.547285079956055, - "loss_rtd": 0.2045281082391739, - "loss_sent": 0.0008704860811121762, - "loss_sod": 0.07897263020277023, - "loss_total": 0.2843712270259857, - "step": 263399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.7293432950973511, - "learning_rate": 1.7642605147464604e-05, - "loss": 0.4582, - "step": 263400 - }, - { - "epoch": 0.014998, - "loss_gen": 6.056944847106934, - "loss_rtd": 0.2077402025461197, - "loss_sent": 0.2911964952945709, - "loss_sod": 0.058053940534591675, - "loss_total": 0.5569906234741211, - "step": 263499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.862424850463867, - "loss_rtd": 0.21458794176578522, - "loss_sent": 0.18894952535629272, - "loss_sod": 0.012409215793013573, - "loss_total": 0.4159466624259949, - "step": 263499 - }, - { - "epoch": 0.015, - "grad_norm": 1.3289551734924316, - "learning_rate": 1.761841935629419e-05, - "loss": 0.4299, - "step": 263500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.703284740447998, - "loss_rtd": 0.22918656468391418, - "loss_sent": 0.12732811272144318, - "loss_sod": 0.0786266028881073, - "loss_total": 0.43514126539230347, - "step": 263599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.550841331481934, - "loss_rtd": 0.21863055229187012, - "loss_sent": 0.08928092569112778, - "loss_sod": 0.05062025040388107, - "loss_total": 0.35853174328804016, - "step": 263599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.434622883796692, - "learning_rate": 1.7594246608420596e-05, - "loss": 0.4486, - "step": 263600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.5538787841796875, - "loss_rtd": 0.2212289571762085, - "loss_sent": 0.009823180735111237, - "loss_sod": 0.06804470717906952, - "loss_total": 0.29909685254096985, - "step": 263699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.048125743865967, - "loss_rtd": 0.18726715445518494, - "loss_sent": 2.449906969559379e-05, - "loss_sod": 0.14628250896930695, - "loss_total": 0.33357417583465576, - "step": 263699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.281462550163269, - "learning_rate": 1.7570086913580604e-05, - "loss": 0.4288, - "step": 263700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.608348846435547, - "loss_rtd": 0.20947839319705963, - "loss_sent": 0.24628858268260956, - "loss_sod": 0.00575850298628211, - "loss_total": 0.46152549982070923, - "step": 263799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.382534980773926, - "loss_rtd": 0.20266138017177582, - "loss_sent": 0.016107751056551933, - "loss_sod": 0.14555230736732483, - "loss_total": 0.36432141065597534, - "step": 263799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.0276693105697632, - "learning_rate": 1.7545940281505708e-05, - "loss": 0.4304, - "step": 263800 - }, - { - "epoch": 0.015798, - "loss_gen": 6.2425856590271, - "loss_rtd": 0.2131475806236267, - "loss_sent": 0.10438407212495804, - "loss_sod": 0.15258373320102692, - "loss_total": 0.4701153635978699, - "step": 263899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.785247802734375, - "loss_rtd": 0.23983649909496307, - "loss_sent": 0.13914045691490173, - "loss_sod": 0.005039280280470848, - "loss_total": 0.384016215801239, - "step": 263899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.2071220874786377, - "learning_rate": 1.752180672192219e-05, - "loss": 0.4325, - "step": 263900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.54254150390625, - "loss_rtd": 0.23202620446681976, - "loss_sent": 0.18760034441947937, - "loss_sod": 0.03395552933216095, - "loss_total": 0.4535820782184601, - "step": 263999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.472883701324463, - "loss_rtd": 0.19441907107830048, - "loss_sent": 0.02146313339471817, - "loss_sod": 0.10616981238126755, - "loss_total": 0.322052001953125, - "step": 263999 - }, - { - "epoch": 0.016, - "grad_norm": 0.976776659488678, - "learning_rate": 1.7497686244551038e-05, - "loss": 0.4453, - "step": 264000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4193997383117676, - "eval_runtime": 150.0928, - "eval_samples_per_second": 102.89, - "eval_steps_per_second": 0.806, - "step": 264000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.838901519775391, - "loss_rtd": 0.2291000485420227, - "loss_sent": 0.211017906665802, - "loss_sod": 0.03950728476047516, - "loss_total": 0.47962522506713867, - "step": 264099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.698509216308594, - "loss_rtd": 0.23480352759361267, - "loss_sent": 0.3536294400691986, - "loss_sod": 0.010975979268550873, - "loss_total": 0.5994089841842651, - "step": 264099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.448573350906372, - "learning_rate": 1.7473578859107996e-05, - "loss": 0.4548, - "step": 264100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.902383804321289, - "loss_rtd": 0.2170427292585373, - "loss_sent": 0.14012722671031952, - "loss_sod": 0.004921960178762674, - "loss_total": 0.36209189891815186, - "step": 264199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.668992042541504, - "loss_rtd": 0.2066890150308609, - "loss_sent": 0.04021590203046799, - "loss_sod": 0.16928046941757202, - "loss_total": 0.4161853790283203, - "step": 264199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.0368317365646362, - "learning_rate": 1.7449484575303483e-05, - "loss": 0.4307, - "step": 264200 - }, - { - "epoch": 0.016598, - "loss_gen": 6.014760971069336, - "loss_rtd": 0.23033563792705536, - "loss_sent": 0.161784365773201, - "loss_sod": 0.0634281262755394, - "loss_total": 0.45554810762405396, - "step": 264299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.611240386962891, - "loss_rtd": 0.21924929320812225, - "loss_sent": 0.08862759917974472, - "loss_sod": 0.0011986112222075462, - "loss_total": 0.3090755045413971, - "step": 264299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8183719515800476, - "learning_rate": 1.742540340284269e-05, - "loss": 0.4435, - "step": 264300 - }, - { - "epoch": 0.016798, - "loss_gen": 6.161042213439941, - "loss_rtd": 0.21823842823505402, - "loss_sent": 0.1290140599012375, - "loss_sod": 0.054388489574193954, - "loss_total": 0.40164095163345337, - "step": 264399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.768616676330566, - "loss_rtd": 0.2210206836462021, - "loss_sent": 0.1565665900707245, - "loss_sod": 0.04180103540420532, - "loss_total": 0.4193883240222931, - "step": 264399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.7145875692367554, - "learning_rate": 1.7401335351425528e-05, - "loss": 0.4419, - "step": 264400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.702146530151367, - "loss_rtd": 0.23462967574596405, - "loss_sent": 0.13261710107326508, - "loss_sod": 0.050790537148714066, - "loss_total": 0.4180372953414917, - "step": 264499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.901893615722656, - "loss_rtd": 0.23588888347148895, - "loss_sent": 0.16363589465618134, - "loss_sod": 0.0845356285572052, - "loss_total": 0.4840604066848755, - "step": 264499 - }, - { - "epoch": 0.017, - "grad_norm": 0.9297024011611938, - "learning_rate": 1.7377280430746573e-05, - "loss": 0.44, - "step": 264500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.765101909637451, - "loss_rtd": 0.23689806461334229, - "loss_sent": 0.15025775134563446, - "loss_sod": 0.013489529490470886, - "loss_total": 0.40064537525177, - "step": 264599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.952276706695557, - "loss_rtd": 0.2074059545993805, - "loss_sent": 0.017983173951506615, - "loss_sod": 0.09613238275051117, - "loss_total": 0.32152149081230164, - "step": 264599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.932601273059845, - "learning_rate": 1.7353238650495156e-05, - "loss": 0.4379, - "step": 264600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.697495460510254, - "loss_rtd": 0.19966207444667816, - "loss_sent": 0.09372258186340332, - "loss_sod": 0.036938801407814026, - "loss_total": 0.3303234577178955, - "step": 264699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.791696548461914, - "loss_rtd": 0.20207498967647552, - "loss_sent": 0.4867168068885803, - "loss_sod": 0.06019856780767441, - "loss_total": 0.7489903569221497, - "step": 264699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.1397545337677002, - "learning_rate": 1.7329210020355307e-05, - "loss": 0.4252, - "step": 264700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.878633975982666, - "loss_rtd": 0.19591793417930603, - "loss_sent": 0.08669566363096237, - "loss_sod": 0.025733646005392075, - "loss_total": 0.30834725499153137, - "step": 264799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.755843162536621, - "loss_rtd": 0.24229732155799866, - "loss_sent": 0.18416090309619904, - "loss_sod": 0.018144188448786736, - "loss_total": 0.44460242986679077, - "step": 264799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.7541510462760925, - "learning_rate": 1.7305194550005776e-05, - "loss": 0.45, - "step": 264800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.819304943084717, - "loss_rtd": 0.20940464735031128, - "loss_sent": 0.14331218600273132, - "loss_sod": 0.046574972569942474, - "loss_total": 0.3992918133735657, - "step": 264899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.564984321594238, - "loss_rtd": 0.22418752312660217, - "loss_sent": 0.3626002371311188, - "loss_sod": 0.04436088353395462, - "loss_total": 0.631148636341095, - "step": 264899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.0071419477462769, - "learning_rate": 1.728119224911995e-05, - "loss": 0.4449, - "step": 264900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.87677526473999, - "loss_rtd": 0.19869399070739746, - "loss_sent": 0.5174181461334229, - "loss_sod": 0.03321876749396324, - "loss_total": 0.7493308782577515, - "step": 264999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.963561058044434, - "loss_rtd": 0.2295539677143097, - "loss_sent": 0.06396544724702835, - "loss_sod": 0.03682500496506691, - "loss_total": 0.33034440875053406, - "step": 264999 - }, - { - "epoch": 0.018, - "grad_norm": 1.95595383644104, - "learning_rate": 1.7257203127365972e-05, - "loss": 0.4391, - "step": 265000 - }, - { - "epoch": 0.018, - "eval_loss": 0.41752806305885315, - "eval_runtime": 150.3532, - "eval_samples_per_second": 102.711, - "eval_steps_per_second": 0.805, - "step": 265000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.199895858764648, - "loss_rtd": 0.18209359049797058, - "loss_sent": 0.012325072661042213, - "loss_sod": 0.2070392221212387, - "loss_total": 0.40145787596702576, - "step": 265099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.887097358703613, - "loss_rtd": 0.2314467579126358, - "loss_sent": 0.17575231194496155, - "loss_sod": 0.007308542262762785, - "loss_total": 0.4145076274871826, - "step": 265099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.245650291442871, - "learning_rate": 1.7233227194406665e-05, - "loss": 0.4219, - "step": 265100 - }, - { - "epoch": 0.018398, - "loss_gen": 6.223198413848877, - "loss_rtd": 0.24890851974487305, - "loss_sent": 0.09819793701171875, - "loss_sod": 0.07153752446174622, - "loss_total": 0.418643981218338, - "step": 265199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.632750034332275, - "loss_rtd": 0.22274403274059296, - "loss_sent": 0.11046051979064941, - "loss_sod": 0.06312073767185211, - "loss_total": 0.3963252902030945, - "step": 265199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.1298741102218628, - "learning_rate": 1.7209264459899537e-05, - "loss": 0.438, - "step": 265200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.852601051330566, - "loss_rtd": 0.206861674785614, - "loss_sent": 0.06961745023727417, - "loss_sod": 0.013976778835058212, - "loss_total": 0.2904559075832367, - "step": 265299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.838532447814941, - "loss_rtd": 0.230842724442482, - "loss_sent": 0.293140172958374, - "loss_sod": 0.025453124195337296, - "loss_total": 0.5494360327720642, - "step": 265299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.7868773341178894, - "learning_rate": 1.7185314933496744e-05, - "loss": 0.4466, - "step": 265300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.1168532371521, - "loss_rtd": 0.1774856299161911, - "loss_sent": 0.0019256524974480271, - "loss_sod": 0.016026396304368973, - "loss_total": 0.19543766975402832, - "step": 265399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.6769561767578125, - "loss_rtd": 0.22008772194385529, - "loss_sent": 0.148380845785141, - "loss_sod": 0.05477447807788849, - "loss_total": 0.42324304580688477, - "step": 265399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.9292224049568176, - "learning_rate": 1.7161378624845175e-05, - "loss": 0.4324, - "step": 265400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.493320941925049, - "loss_rtd": 0.20276357233524323, - "loss_sent": 0.22482146322727203, - "loss_sod": 0.002522670431062579, - "loss_total": 0.4301077127456665, - "step": 265499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.925997734069824, - "loss_rtd": 0.2251824587583542, - "loss_sent": 0.11820624768733978, - "loss_sod": 0.028906188905239105, - "loss_total": 0.37229490280151367, - "step": 265499 - }, - { - "epoch": 0.019, - "grad_norm": 1.5733423233032227, - "learning_rate": 1.7137455543586372e-05, - "loss": 0.4297, - "step": 265500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.135681629180908, - "loss_rtd": 0.17314018309116364, - "loss_sent": 0.00036163756158202887, - "loss_sod": 0.09965495020151138, - "loss_total": 0.2731567919254303, - "step": 265599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.846146106719971, - "loss_rtd": 0.2163507491350174, - "loss_sent": 0.14655791223049164, - "loss_sod": 0.09526507556438446, - "loss_total": 0.4581737518310547, - "step": 265599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.3445922136306763, - "learning_rate": 1.711354569935656e-05, - "loss": 0.4375, - "step": 265600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.286101818084717, - "loss_rtd": 0.1882425844669342, - "loss_sent": 5.8458357671042904e-05, - "loss_sod": 0.04162990301847458, - "loss_total": 0.22993095219135284, - "step": 265699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.513396263122559, - "loss_rtd": 0.19958806037902832, - "loss_sent": 0.06855539232492447, - "loss_sod": 0.1291506141424179, - "loss_total": 0.3972940742969513, - "step": 265699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.7645038366317749, - "learning_rate": 1.7089649101786588e-05, - "loss": 0.4433, - "step": 265700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.727639675140381, - "loss_rtd": 0.23569880425930023, - "loss_sent": 0.39408543705940247, - "loss_sod": 0.05898343026638031, - "loss_total": 0.688767671585083, - "step": 265799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.962072372436523, - "loss_rtd": 0.22397878766059875, - "loss_sent": 0.05560656636953354, - "loss_sod": 0.05383361876010895, - "loss_total": 0.33341899514198303, - "step": 265799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.8413041830062866, - "learning_rate": 1.7065765760502022e-05, - "loss": 0.429, - "step": 265800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.179636478424072, - "loss_rtd": 0.1974020004272461, - "loss_sent": 0.03730133920907974, - "loss_sod": 0.11626625061035156, - "loss_total": 0.3509695827960968, - "step": 265899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.240450859069824, - "loss_rtd": 0.18879199028015137, - "loss_sent": 0.07717913389205933, - "loss_sod": 0.0672200545668602, - "loss_total": 0.3331911861896515, - "step": 265899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.9509782195091248, - "learning_rate": 1.7041895685123087e-05, - "loss": 0.4245, - "step": 265900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.625738620758057, - "loss_rtd": 0.22091302275657654, - "loss_sent": 0.27037566900253296, - "loss_sod": 0.006796187721192837, - "loss_total": 0.4980848729610443, - "step": 265999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.942508697509766, - "loss_rtd": 0.2323511242866516, - "loss_sent": 0.1803351491689682, - "loss_sod": 0.11847847700119019, - "loss_total": 0.5311647653579712, - "step": 265999 - }, - { - "epoch": 0.02, - "grad_norm": 1.4394222497940063, - "learning_rate": 1.7018038885264615e-05, - "loss": 0.4177, - "step": 266000 - }, - { - "epoch": 0.02, - "eval_loss": 0.42416712641716003, - "eval_runtime": 151.8511, - "eval_samples_per_second": 101.698, - "eval_steps_per_second": 0.797, - "step": 266000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.478032112121582, - "loss_rtd": 0.2207171618938446, - "loss_sent": 0.11589272320270538, - "loss_sod": 0.030967382714152336, - "loss_total": 0.3675772547721863, - "step": 266099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.847688674926758, - "loss_rtd": 0.20218083262443542, - "loss_sent": 0.16010279953479767, - "loss_sod": 0.04950456693768501, - "loss_total": 0.4117882251739502, - "step": 266099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.408447265625, - "learning_rate": 1.6994195370536135e-05, - "loss": 0.4339, - "step": 266100 - }, - { - "epoch": 0.020398, - "loss_gen": 6.003078937530518, - "loss_rtd": 0.23656605184078217, - "loss_sent": 0.19200684130191803, - "loss_sod": 0.02124079503118992, - "loss_total": 0.4498136639595032, - "step": 266199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.59977388381958, - "loss_rtd": 0.2051219791173935, - "loss_sent": 0.11943111568689346, - "loss_sod": 0.018068883568048477, - "loss_total": 0.34262198209762573, - "step": 266199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.6684094667434692, - "learning_rate": 1.697036515054181e-05, - "loss": 0.4203, - "step": 266200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.6577277183532715, - "loss_rtd": 0.2120039314031601, - "loss_sent": 0.12950928509235382, - "loss_sod": 0.029606901109218597, - "loss_total": 0.3711200952529907, - "step": 266299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.689751148223877, - "loss_rtd": 0.22746476531028748, - "loss_sent": 0.2197207361459732, - "loss_sod": 0.08924826979637146, - "loss_total": 0.536433756351471, - "step": 266299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.1296987533569336, - "learning_rate": 1.694654823488047e-05, - "loss": 0.4351, - "step": 266300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.731989860534668, - "loss_rtd": 0.21588566899299622, - "loss_sent": 0.10215010493993759, - "loss_sod": 0.06963024288415909, - "loss_total": 0.3876660168170929, - "step": 266399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.85683536529541, - "loss_rtd": 0.21849097311496735, - "loss_sent": 0.20090869069099426, - "loss_sod": 0.0825258195400238, - "loss_total": 0.5019254684448242, - "step": 266399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.3961708545684814, - "learning_rate": 1.692274463314553e-05, - "loss": 0.4575, - "step": 266400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.548666954040527, - "loss_rtd": 0.22105520963668823, - "loss_sent": 0.1915527731180191, - "loss_sod": 0.03421821817755699, - "loss_total": 0.4468262195587158, - "step": 266499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.296370506286621, - "loss_rtd": 0.1955137401819229, - "loss_sent": 0.031068984419107437, - "loss_sod": 0.10109543800354004, - "loss_total": 0.3276781439781189, - "step": 266499 - }, - { - "epoch": 0.021, - "grad_norm": 0.9188606142997742, - "learning_rate": 1.68989543549251e-05, - "loss": 0.4355, - "step": 266500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.99031400680542, - "loss_rtd": 0.2285676747560501, - "loss_sent": 0.18636761605739594, - "loss_sod": 0.03560897707939148, - "loss_total": 0.4505442976951599, - "step": 266599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.960809230804443, - "loss_rtd": 0.20881226658821106, - "loss_sent": 0.0602426715195179, - "loss_sod": 0.1764306277036667, - "loss_total": 0.44548556208610535, - "step": 266599 - }, - { - "epoch": 0.0212, - "grad_norm": 2.0185892581939697, - "learning_rate": 1.6875177409801897e-05, - "loss": 0.4338, - "step": 266600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.464375972747803, - "loss_rtd": 0.19333864748477936, - "loss_sent": 0.023246033117175102, - "loss_sod": 0.08310497552156448, - "loss_total": 0.2996896505355835, - "step": 266699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.380824089050293, - "loss_rtd": 0.1889808624982834, - "loss_sent": 0.08529441058635712, - "loss_sod": 0.0023418976925313473, - "loss_total": 0.276617169380188, - "step": 266699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.6752312183380127, - "learning_rate": 1.685141380735329e-05, - "loss": 0.4415, - "step": 266700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.404900074005127, - "loss_rtd": 0.22304946184158325, - "loss_sent": 0.00990715716034174, - "loss_sod": 0.0994158387184143, - "loss_total": 0.3323724567890167, - "step": 266799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.072579383850098, - "loss_rtd": 0.18661810457706451, - "loss_sent": 2.7130905436933972e-05, - "loss_sod": 0.1396275758743286, - "loss_total": 0.32627278566360474, - "step": 266799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.9894542098045349, - "learning_rate": 1.682766355715122e-05, - "loss": 0.45, - "step": 266800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.838479995727539, - "loss_rtd": 0.21540221571922302, - "loss_sent": 0.3221489191055298, - "loss_sod": 0.012889444828033447, - "loss_total": 0.5504405498504639, - "step": 266899 - }, - { - "epoch": 0.021798, - "loss_gen": 6.1297125816345215, - "loss_rtd": 0.22166520357131958, - "loss_sent": 0.15729407966136932, - "loss_sod": 0.019165389239788055, - "loss_total": 0.39812469482421875, - "step": 266899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.8149746656417847, - "learning_rate": 1.6803926668762298e-05, - "loss": 0.4442, - "step": 266900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.621804714202881, - "loss_rtd": 0.23044511675834656, - "loss_sent": 0.23261255025863647, - "loss_sod": 0.02542426437139511, - "loss_total": 0.48848193883895874, - "step": 266999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.649903297424316, - "loss_rtd": 0.21168476343154907, - "loss_sent": 0.059327382594347, - "loss_sod": 0.11903805285692215, - "loss_total": 0.3900502026081085, - "step": 266999 - }, - { - "epoch": 0.022, - "grad_norm": 1.2215574979782104, - "learning_rate": 1.6780203151747742e-05, - "loss": 0.4099, - "step": 267000 - }, - { - "epoch": 0.022, - "eval_loss": 0.4148196280002594, - "eval_runtime": 150.5299, - "eval_samples_per_second": 102.591, - "eval_steps_per_second": 0.804, - "step": 267000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.30557918548584, - "loss_rtd": 0.1764380931854248, - "loss_sent": 2.36851137742633e-05, - "loss_sod": 0.050875790417194366, - "loss_total": 0.2273375540971756, - "step": 267099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.727357387542725, - "loss_rtd": 0.19363459944725037, - "loss_sent": 0.5238352417945862, - "loss_sod": 0.13281087577342987, - "loss_total": 0.8502807021141052, - "step": 267099 - }, - { - "epoch": 0.0222, - "grad_norm": 2.4402108192443848, - "learning_rate": 1.6756493015663403e-05, - "loss": 0.4301, - "step": 267100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.480380535125732, - "loss_rtd": 0.23758484423160553, - "loss_sent": 0.12472793459892273, - "loss_sod": 0.014917224645614624, - "loss_total": 0.3772300183773041, - "step": 267199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.757543563842773, - "loss_rtd": 0.22850534319877625, - "loss_sent": 0.16525429487228394, - "loss_sod": 0.03858501464128494, - "loss_total": 0.4323446452617645, - "step": 267199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.9831203818321228, - "learning_rate": 1.6732796270059693e-05, - "loss": 0.4452, - "step": 267200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.9338250160217285, - "loss_rtd": 0.22473864257335663, - "loss_sent": 0.10696947574615479, - "loss_sod": 0.07062780857086182, - "loss_total": 0.4023359417915344, - "step": 267299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.790135860443115, - "loss_rtd": 0.23071084916591644, - "loss_sent": 0.15764625370502472, - "loss_sod": 0.14289075136184692, - "loss_total": 0.5312478542327881, - "step": 267299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.2271019220352173, - "learning_rate": 1.6709112924481657e-05, - "loss": 0.4342, - "step": 267300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.565337657928467, - "loss_rtd": 0.20565111935138702, - "loss_sent": 0.041761934757232666, - "loss_sod": 0.15724752843379974, - "loss_total": 0.40466058254241943, - "step": 267399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.711415767669678, - "loss_rtd": 0.20858651399612427, - "loss_sent": 0.15100525319576263, - "loss_sod": 0.027561167255043983, - "loss_total": 0.38715294003486633, - "step": 267399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.9476865530014038, - "learning_rate": 1.6685442988468973e-05, - "loss": 0.436, - "step": 267400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.75596284866333, - "loss_rtd": 0.2451847940683365, - "loss_sent": 0.17701643705368042, - "loss_sod": 0.04414738714694977, - "loss_total": 0.4663486182689667, - "step": 267499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.96491813659668, - "loss_rtd": 0.22007696330547333, - "loss_sent": 0.22976228594779968, - "loss_sod": 0.014164028689265251, - "loss_total": 0.4640032649040222, - "step": 267499 - }, - { - "epoch": 0.023, - "grad_norm": 1.0047032833099365, - "learning_rate": 1.6661786471555858e-05, - "loss": 0.4352, - "step": 267500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.881296634674072, - "loss_rtd": 0.2165652960538864, - "loss_sent": 0.03391829505562782, - "loss_sod": 0.07588096708059311, - "loss_total": 0.32636454701423645, - "step": 267599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.825904846191406, - "loss_rtd": 0.2303563356399536, - "loss_sent": 0.32303234934806824, - "loss_sod": 0.03734510391950607, - "loss_total": 0.5907337665557861, - "step": 267599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.319616436958313, - "learning_rate": 1.663814338327116e-05, - "loss": 0.4351, - "step": 267600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.730740547180176, - "loss_rtd": 0.2254457324743271, - "loss_sent": 0.2892443835735321, - "loss_sod": 0.08354109525680542, - "loss_total": 0.5982311964035034, - "step": 267699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.685034275054932, - "loss_rtd": 0.21458850800991058, - "loss_sent": 0.09530942142009735, - "loss_sod": 0.061370886862277985, - "loss_total": 0.3712688088417053, - "step": 267699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.315625548362732, - "learning_rate": 1.661451373313832e-05, - "loss": 0.4301, - "step": 267700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.6471662521362305, - "loss_rtd": 0.2073613703250885, - "loss_sent": 0.1655789017677307, - "loss_sod": 0.026781873777508736, - "loss_total": 0.399722158908844, - "step": 267799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.882993221282959, - "loss_rtd": 0.23457126319408417, - "loss_sent": 0.07836773246526718, - "loss_sod": 0.01736392267048359, - "loss_total": 0.3303029239177704, - "step": 267799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.6371013522148132, - "learning_rate": 1.659089753067537e-05, - "loss": 0.4307, - "step": 267800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.863724708557129, - "loss_rtd": 0.22295472025871277, - "loss_sent": 0.23114438354969025, - "loss_sod": 0.013383528217673302, - "loss_total": 0.46748262643814087, - "step": 267899 - }, - { - "epoch": 0.023798, - "loss_gen": 6.320718288421631, - "loss_rtd": 0.23582635819911957, - "loss_sent": 0.10020633786916733, - "loss_sod": 0.21261908113956451, - "loss_total": 0.5486517548561096, - "step": 267899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.1309590339660645, - "learning_rate": 1.656729478539488e-05, - "loss": 0.434, - "step": 267900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.900601387023926, - "loss_rtd": 0.20932823419570923, - "loss_sent": 0.2272641807794571, - "loss_sod": 0.12037929892539978, - "loss_total": 0.5569717288017273, - "step": 267999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.909946441650391, - "loss_rtd": 0.21609345078468323, - "loss_sent": 0.16744546592235565, - "loss_sod": 0.02023504301905632, - "loss_total": 0.4037739634513855, - "step": 267999 - }, - { - "epoch": 0.024, - "grad_norm": 1.2057437896728516, - "learning_rate": 1.6543705506804057e-05, - "loss": 0.4413, - "step": 268000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4166734218597412, - "eval_runtime": 150.2464, - "eval_samples_per_second": 102.784, - "eval_steps_per_second": 0.805, - "step": 268000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.234063625335693, - "loss_rtd": 0.18069347739219666, - "loss_sent": 0.0002493959618732333, - "loss_sod": 0.12024861574172974, - "loss_total": 0.3011914789676666, - "step": 268099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.2875213623046875, - "loss_rtd": 0.1950591504573822, - "loss_sent": 0.01434609480202198, - "loss_sod": 0.0864410549402237, - "loss_total": 0.2958463132381439, - "step": 268099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.6516631245613098, - "learning_rate": 1.6520129704404658e-05, - "loss": 0.4361, - "step": 268100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.475121974945068, - "loss_rtd": 0.2370995432138443, - "loss_sent": 0.21873275935649872, - "loss_sod": 0.010058843530714512, - "loss_total": 0.46589115262031555, - "step": 268199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.579318523406982, - "loss_rtd": 0.23786893486976624, - "loss_sent": 0.09398513287305832, - "loss_sod": 0.014935585670173168, - "loss_total": 0.3467896580696106, - "step": 268199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.305841326713562, - "learning_rate": 1.6496567387693018e-05, - "loss": 0.4131, - "step": 268200 - }, - { - "epoch": 0.024598, - "loss_gen": 6.101864337921143, - "loss_rtd": 0.19255444407463074, - "loss_sent": 0.35791492462158203, - "loss_sod": 0.17142772674560547, - "loss_total": 0.7218971252441406, - "step": 268299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.433445453643799, - "loss_rtd": 0.19646601378917694, - "loss_sent": 0.0854458138346672, - "loss_sod": 0.03310718014836311, - "loss_total": 0.31501901149749756, - "step": 268299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.7531074285507202, - "learning_rate": 1.647301856616002e-05, - "loss": 0.4563, - "step": 268300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.8197102546691895, - "loss_rtd": 0.21163439750671387, - "loss_sent": 0.13387049734592438, - "loss_sod": 0.035824965685606, - "loss_total": 0.38132986426353455, - "step": 268399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.995186805725098, - "loss_rtd": 0.21819134056568146, - "loss_sent": 0.26021432876586914, - "loss_sod": 0.05272424966096878, - "loss_total": 0.5311299562454224, - "step": 268399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.9215118885040283, - "learning_rate": 1.644948324929113e-05, - "loss": 0.4256, - "step": 268400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.807307243347168, - "loss_rtd": 0.21854138374328613, - "loss_sent": 0.10618340969085693, - "loss_sod": 0.035427503287792206, - "loss_total": 0.36015230417251587, - "step": 268499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.153491497039795, - "loss_rtd": 0.20173749327659607, - "loss_sent": 2.4461814973619767e-05, - "loss_sod": 0.125558003783226, - "loss_total": 0.3273199498653412, - "step": 268499 - }, - { - "epoch": 0.025, - "grad_norm": 0.9242141842842102, - "learning_rate": 1.6425961446566373e-05, - "loss": 0.4461, - "step": 268500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.697330951690674, - "loss_rtd": 0.2073066085577011, - "loss_sent": 0.007205627392977476, - "loss_sod": 0.06607302278280258, - "loss_total": 0.28058525919914246, - "step": 268599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.58998441696167, - "loss_rtd": 0.23148037493228912, - "loss_sent": 0.18772272765636444, - "loss_sod": 0.006988976616412401, - "loss_total": 0.4261920750141144, - "step": 268599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.9894757270812988, - "learning_rate": 1.640245316746035e-05, - "loss": 0.4383, - "step": 268600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.730067729949951, - "loss_rtd": 0.2177078127861023, - "loss_sent": 0.2573537528514862, - "loss_sod": 0.038831986486911774, - "loss_total": 0.5138935446739197, - "step": 268699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.883787631988525, - "loss_rtd": 0.21808990836143494, - "loss_sent": 0.4537474811077118, - "loss_sod": 0.010962520726025105, - "loss_total": 0.6827999353408813, - "step": 268699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8552432656288147, - "learning_rate": 1.6378958421442153e-05, - "loss": 0.4247, - "step": 268700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.448304176330566, - "loss_rtd": 0.21249639987945557, - "loss_sent": 0.0010201549157500267, - "loss_sod": 0.07972157001495361, - "loss_total": 0.2932381331920624, - "step": 268799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.408642768859863, - "loss_rtd": 0.18969331681728363, - "loss_sent": 0.008647771552205086, - "loss_sod": 0.052934639155864716, - "loss_total": 0.2512757182121277, - "step": 268799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.7549141645431519, - "learning_rate": 1.635547721797549e-05, - "loss": 0.4484, - "step": 268800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.7269816398620605, - "loss_rtd": 0.19920344650745392, - "loss_sent": 0.2688048779964447, - "loss_sod": 0.013905920088291168, - "loss_total": 0.4819142520427704, - "step": 268899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.738521575927734, - "loss_rtd": 0.20524026453495026, - "loss_sent": 0.046123404055833817, - "loss_sod": 0.009331931360065937, - "loss_total": 0.26069560647010803, - "step": 268899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.1248387098312378, - "learning_rate": 1.633200956651859e-05, - "loss": 0.4308, - "step": 268900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.629986763000488, - "loss_rtd": 0.20220881700515747, - "loss_sent": 0.13901971280574799, - "loss_sod": 0.012678693048655987, - "loss_total": 0.3539072275161743, - "step": 268999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.598387241363525, - "loss_rtd": 0.20648714900016785, - "loss_sent": 0.08330265432596207, - "loss_sod": 0.017148887738585472, - "loss_total": 0.30693867802619934, - "step": 268999 - }, - { - "epoch": 0.026, - "grad_norm": 0.48872238397598267, - "learning_rate": 1.6308555476524194e-05, - "loss": 0.4399, - "step": 269000 - }, - { - "epoch": 0.026, - "eval_loss": 0.41795945167541504, - "eval_runtime": 150.0528, - "eval_samples_per_second": 102.917, - "eval_steps_per_second": 0.806, - "step": 269000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.742762088775635, - "loss_rtd": 0.2292107194662094, - "loss_sent": 0.17488723993301392, - "loss_sod": 0.027320269495248795, - "loss_total": 0.431418240070343, - "step": 269099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.613757133483887, - "loss_rtd": 0.24387536942958832, - "loss_sent": 0.39201292395591736, - "loss_sod": 0.11247213184833527, - "loss_total": 0.7483603954315186, - "step": 269099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.2894322872161865, - "learning_rate": 1.628511495743963e-05, - "loss": 0.4444, - "step": 269100 - }, - { - "epoch": 0.026398, - "loss_gen": 6.1032843589782715, - "loss_rtd": 0.23158025741577148, - "loss_sent": 0.21154244244098663, - "loss_sod": 0.018028700724244118, - "loss_total": 0.4611514210700989, - "step": 269199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.8828535079956055, - "loss_rtd": 0.20052839815616608, - "loss_sent": 0.2413274645805359, - "loss_sod": 0.01449059322476387, - "loss_total": 0.45634645223617554, - "step": 269199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.0119686126708984, - "learning_rate": 1.6261688018706724e-05, - "loss": 0.4329, - "step": 269200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.8638834953308105, - "loss_rtd": 0.22481068968772888, - "loss_sent": 0.2950937747955322, - "loss_sod": 0.007474738638848066, - "loss_total": 0.5273792147636414, - "step": 269299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.876376152038574, - "loss_rtd": 0.22316375374794006, - "loss_sent": 0.04835449531674385, - "loss_sod": 0.009289991110563278, - "loss_total": 0.2808082401752472, - "step": 269299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.6763526201248169, - "learning_rate": 1.6238274669761866e-05, - "loss": 0.448, - "step": 269300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.312898635864258, - "loss_rtd": 0.18635858595371246, - "loss_sent": 6.018896237947047e-05, - "loss_sod": 0.09727039188146591, - "loss_total": 0.2836891710758209, - "step": 269399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.71761417388916, - "loss_rtd": 0.21085000038146973, - "loss_sent": 0.14621832966804504, - "loss_sod": 0.06339356303215027, - "loss_total": 0.42046189308166504, - "step": 269399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.9217313528060913, - "learning_rate": 1.6214874920035917e-05, - "loss": 0.4303, - "step": 269400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.7887749671936035, - "loss_rtd": 0.21253244578838348, - "loss_sent": 0.07061567157506943, - "loss_sod": 0.06723160296678543, - "loss_total": 0.35037973523139954, - "step": 269499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.745434761047363, - "loss_rtd": 0.2194966971874237, - "loss_sent": 0.0950508713722229, - "loss_sod": 0.01831759139895439, - "loss_total": 0.3328651785850525, - "step": 269499 - }, - { - "epoch": 0.027, - "grad_norm": 0.7986164093017578, - "learning_rate": 1.619148877895431e-05, - "loss": 0.4529, - "step": 269500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.825835704803467, - "loss_rtd": 0.2061392366886139, - "loss_sent": 0.021851060912013054, - "loss_sod": 0.013518210500478745, - "loss_total": 0.24150851368904114, - "step": 269599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.209312438964844, - "loss_rtd": 0.1861337423324585, - "loss_sent": 0.003053348045796156, - "loss_sod": 0.1544070541858673, - "loss_total": 0.34359413385391235, - "step": 269599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.8490173816680908, - "learning_rate": 1.6168116255936994e-05, - "loss": 0.4473, - "step": 269600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.419999599456787, - "loss_rtd": 0.17292170226573944, - "loss_sent": 0.0003229019930586219, - "loss_sod": 0.1488613784313202, - "loss_total": 0.3221059739589691, - "step": 269699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.54251766204834, - "loss_rtd": 0.19121377170085907, - "loss_sent": 0.07150860875844955, - "loss_sod": 0.10690651088953018, - "loss_total": 0.36962890625, - "step": 269699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.1455020904541016, - "learning_rate": 1.6144757360398395e-05, - "loss": 0.4556, - "step": 269700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.758680820465088, - "loss_rtd": 0.20524001121520996, - "loss_sent": 0.4888244867324829, - "loss_sod": 0.007920067757368088, - "loss_total": 0.7019845843315125, - "step": 269799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.787389755249023, - "loss_rtd": 0.22080378234386444, - "loss_sent": 0.10038325935602188, - "loss_sod": 0.1250607669353485, - "loss_total": 0.44624778628349304, - "step": 269799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.2480300664901733, - "learning_rate": 1.6121412101747492e-05, - "loss": 0.4397, - "step": 269800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.59853982925415, - "loss_rtd": 0.20155419409275055, - "loss_sent": 0.05726367607712746, - "loss_sod": 0.02039262093603611, - "loss_total": 0.27921050786972046, - "step": 269899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.393312931060791, - "loss_rtd": 0.169960156083107, - "loss_sent": 0.020561737939715385, - "loss_sod": 0.04915458336472511, - "loss_total": 0.23967647552490234, - "step": 269899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.6546497344970703, - "learning_rate": 1.609808048938773e-05, - "loss": 0.4419, - "step": 269900 - }, - { - "epoch": 0.027998, - "loss_gen": 6.180161952972412, - "loss_rtd": 0.22595210373401642, - "loss_sent": 0.15271803736686707, - "loss_sod": 0.09914088249206543, - "loss_total": 0.4778110086917877, - "step": 269999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.168148517608643, - "loss_rtd": 0.20173440873622894, - "loss_sent": 0.005457804538309574, - "loss_sod": 0.11283750087022781, - "loss_total": 0.32002973556518555, - "step": 269999 - }, - { - "epoch": 0.028, - "grad_norm": 1.3075617551803589, - "learning_rate": 1.6074762532717093e-05, - "loss": 0.4638, - "step": 270000 - }, - { - "epoch": 0.028, - "eval_loss": 0.4133795201778412, - "eval_runtime": 150.3009, - "eval_samples_per_second": 102.747, - "eval_steps_per_second": 0.805, - "step": 270000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.982174873352051, - "loss_rtd": 0.22061192989349365, - "loss_sent": 0.1923549324274063, - "loss_sod": 0.04667946323752403, - "loss_total": 0.4596463441848755, - "step": 270099 - }, - { - "epoch": 0.028198, - "loss_gen": 6.057941436767578, - "loss_rtd": 0.21350853145122528, - "loss_sent": 0.08760643750429153, - "loss_sod": 0.08947840332984924, - "loss_total": 0.39059334993362427, - "step": 270099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.9949202537536621, - "learning_rate": 1.605145824112805e-05, - "loss": 0.412, - "step": 270100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.641507625579834, - "loss_rtd": 0.1965770423412323, - "loss_sent": 0.08672265708446503, - "loss_sod": 0.05908845737576485, - "loss_total": 0.3423881530761719, - "step": 270199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.62833833694458, - "loss_rtd": 0.2123500108718872, - "loss_sent": 0.06860460340976715, - "loss_sod": 0.16088838875293732, - "loss_total": 0.4418429732322693, - "step": 270199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.074608564376831, - "learning_rate": 1.602816762400758e-05, - "loss": 0.4417, - "step": 270200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.634223461151123, - "loss_rtd": 0.2132130265235901, - "loss_sent": 0.058612026274204254, - "loss_sod": 0.09744498133659363, - "loss_total": 0.3692700266838074, - "step": 270299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.164697647094727, - "loss_rtd": 0.18826591968536377, - "loss_sent": 0.004804539028555155, - "loss_sod": 0.13274680078029633, - "loss_total": 0.3258172869682312, - "step": 270299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.1823171377182007, - "learning_rate": 1.6004890690737112e-05, - "loss": 0.457, - "step": 270300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.896481990814209, - "loss_rtd": 0.22735892236232758, - "loss_sent": 0.08205220848321915, - "loss_sod": 0.1463623344898224, - "loss_total": 0.4557734429836273, - "step": 270399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.64373779296875, - "loss_rtd": 0.22357432544231415, - "loss_sent": 0.2212490439414978, - "loss_sod": 0.017883561551570892, - "loss_total": 0.46270692348480225, - "step": 270399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.1900618076324463, - "learning_rate": 1.5981627450692614e-05, - "loss": 0.4442, - "step": 270400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.767705917358398, - "loss_rtd": 0.22999972105026245, - "loss_sent": 0.15789847075939178, - "loss_sod": 0.14105084538459778, - "loss_total": 0.5289490222930908, - "step": 270499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.778688907623291, - "loss_rtd": 0.2190682291984558, - "loss_sent": 0.40757206082344055, - "loss_sod": 0.02697756141424179, - "loss_total": 0.6536178588867188, - "step": 270499 - }, - { - "epoch": 0.029, - "grad_norm": 1.4509148597717285, - "learning_rate": 1.5958377913244527e-05, - "loss": 0.4339, - "step": 270500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.53188943862915, - "loss_rtd": 0.19379432499408722, - "loss_sent": 0.09786945581436157, - "loss_sod": 0.08292634785175323, - "loss_total": 0.374590128660202, - "step": 270599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.495702266693115, - "loss_rtd": 0.20815308392047882, - "loss_sent": 0.01570098102092743, - "loss_sod": 0.1423245370388031, - "loss_total": 0.36617863178253174, - "step": 270599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.8742807507514954, - "learning_rate": 1.5935142087757727e-05, - "loss": 0.4349, - "step": 270600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.954782962799072, - "loss_rtd": 0.20175915956497192, - "loss_sent": 0.12556029856204987, - "loss_sod": 0.018054986372590065, - "loss_total": 0.3453744649887085, - "step": 270699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.508944511413574, - "loss_rtd": 0.19804103672504425, - "loss_sent": 0.00024759970256127417, - "loss_sod": 0.03680426999926567, - "loss_total": 0.2350929081439972, - "step": 270699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.7660701870918274, - "learning_rate": 1.5911919983591617e-05, - "loss": 0.4444, - "step": 270700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.674312591552734, - "loss_rtd": 0.22166262567043304, - "loss_sent": 0.0702732726931572, - "loss_sod": 0.008209237828850746, - "loss_total": 0.30014514923095703, - "step": 270799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.849605560302734, - "loss_rtd": 0.22538764774799347, - "loss_sent": 0.2957608103752136, - "loss_sod": 0.013990513049066067, - "loss_total": 0.5351389646530151, - "step": 270799 - }, - { - "epoch": 0.0296, - "grad_norm": 0.6954144239425659, - "learning_rate": 1.5888711610100064e-05, - "loss": 0.4263, - "step": 270800 - }, - { - "epoch": 0.029798, - "loss_gen": 6.219542503356934, - "loss_rtd": 0.2291915863752365, - "loss_sent": 0.15675795078277588, - "loss_sod": 0.08937126398086548, - "loss_total": 0.47532081604003906, - "step": 270899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.773324966430664, - "loss_rtd": 0.21983198821544647, - "loss_sent": 0.11543253064155579, - "loss_sod": 0.0073675187304615974, - "loss_total": 0.3426320552825928, - "step": 270899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.270186185836792, - "learning_rate": 1.586551697663141e-05, - "loss": 0.4308, - "step": 270900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.716264247894287, - "loss_rtd": 0.21419492363929749, - "loss_sent": 0.38679981231689453, - "loss_sod": 0.019500968977808952, - "loss_total": 0.6204956769943237, - "step": 270999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.190142631530762, - "loss_rtd": 0.18350224196910858, - "loss_sent": 0.007931055501103401, - "loss_sod": 0.023862555623054504, - "loss_total": 0.21529585123062134, - "step": 270999 - }, - { - "epoch": 0.03, - "grad_norm": 1.137611985206604, - "learning_rate": 1.5842336092528427e-05, - "loss": 0.4465, - "step": 271000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4173237383365631, - "eval_runtime": 150.629, - "eval_samples_per_second": 102.523, - "eval_steps_per_second": 0.803, - "step": 271000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.4995574951171875, - "loss_rtd": 0.20884856581687927, - "loss_sent": 0.08446690440177917, - "loss_sod": 0.06681458652019501, - "loss_total": 0.36013004183769226, - "step": 271099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.772039890289307, - "loss_rtd": 0.2209097146987915, - "loss_sent": 0.1933259665966034, - "loss_sod": 0.04324163496494293, - "loss_total": 0.45747730135917664, - "step": 271099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.000438928604126, - "learning_rate": 1.5819168967128374e-05, - "loss": 0.4593, - "step": 271100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.74977445602417, - "loss_rtd": 0.21794189512729645, - "loss_sent": 0.21439877152442932, - "loss_sod": 0.029228707775473595, - "loss_total": 0.4615693688392639, - "step": 271199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.9667439460754395, - "loss_rtd": 0.23573808372020721, - "loss_sent": 0.11964467912912369, - "loss_sod": 0.04007008671760559, - "loss_total": 0.3954528272151947, - "step": 271199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.8552267551422119, - "learning_rate": 1.579601560976297e-05, - "loss": 0.4342, - "step": 271200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.5353007316589355, - "loss_rtd": 0.21769912540912628, - "loss_sent": 0.3156224489212036, - "loss_sod": 0.018426664173603058, - "loss_total": 0.5517482757568359, - "step": 271299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.710964679718018, - "loss_rtd": 0.22503423690795898, - "loss_sent": 0.09214138984680176, - "loss_sod": 0.07522615045309067, - "loss_total": 0.392401784658432, - "step": 271299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.2099968194961548, - "learning_rate": 1.577287602975841e-05, - "loss": 0.4298, - "step": 271300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.57111120223999, - "loss_rtd": 0.23344328999519348, - "loss_sent": 0.09366744011640549, - "loss_sod": 0.03955375403165817, - "loss_total": 0.36666449904441833, - "step": 271399 - }, - { - "epoch": 0.030798, - "loss_gen": 6.4007673263549805, - "loss_rtd": 0.24450141191482544, - "loss_sent": 0.08490917831659317, - "loss_sod": 0.10845999419689178, - "loss_total": 0.437870591878891, - "step": 271399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.1461840867996216, - "learning_rate": 1.5749750236435277e-05, - "loss": 0.4505, - "step": 271400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.678263187408447, - "loss_rtd": 0.20214150846004486, - "loss_sent": 0.12077659368515015, - "loss_sod": 0.03934522718191147, - "loss_total": 0.3622633218765259, - "step": 271499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.953507423400879, - "loss_rtd": 0.23510979115962982, - "loss_sent": 0.19212287664413452, - "loss_sod": 0.012832150794565678, - "loss_total": 0.44006481766700745, - "step": 271499 - }, - { - "epoch": 0.031, - "grad_norm": 0.7390113472938538, - "learning_rate": 1.572663823910865e-05, - "loss": 0.4259, - "step": 271500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.757784366607666, - "loss_rtd": 0.21268510818481445, - "loss_sent": 0.28529173135757446, - "loss_sod": 0.005190334282815456, - "loss_total": 0.5031671524047852, - "step": 271599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.3992133140563965, - "loss_rtd": 0.1795322597026825, - "loss_sent": 0.006780500989407301, - "loss_sod": 0.031529609113931656, - "loss_total": 0.21784237027168274, - "step": 271599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.2566139698028564, - "learning_rate": 1.5703540047088045e-05, - "loss": 0.4158, - "step": 271600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.956812381744385, - "loss_rtd": 0.22287671267986298, - "loss_sent": 0.5607706904411316, - "loss_sod": 0.15211720764636993, - "loss_total": 0.9357646107673645, - "step": 271699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.977387428283691, - "loss_rtd": 0.2216854840517044, - "loss_sent": 0.08405521512031555, - "loss_sod": 0.011146768927574158, - "loss_total": 0.3168874680995941, - "step": 271699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.8207440376281738, - "learning_rate": 1.5680455669677418e-05, - "loss": 0.4327, - "step": 271700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.83808708190918, - "loss_rtd": 0.2178107053041458, - "loss_sent": 0.20109620690345764, - "loss_sod": 0.006562143564224243, - "loss_total": 0.4254690408706665, - "step": 271799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.626862525939941, - "loss_rtd": 0.23068249225616455, - "loss_sent": 0.7458118200302124, - "loss_sod": 0.006968685891479254, - "loss_total": 0.9834629893302917, - "step": 271799 - }, - { - "epoch": 0.0316, - "grad_norm": 2.781471014022827, - "learning_rate": 1.5657385116175132e-05, - "loss": 0.425, - "step": 271800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.72520637512207, - "loss_rtd": 0.23127830028533936, - "loss_sent": 0.08770978450775146, - "loss_sod": 0.056782398372888565, - "loss_total": 0.3757704794406891, - "step": 271899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.877103805541992, - "loss_rtd": 0.22334948182106018, - "loss_sent": 0.20821848511695862, - "loss_sod": 0.028325015679001808, - "loss_total": 0.45989298820495605, - "step": 271899 - }, - { - "epoch": 0.0318, - "grad_norm": 0.7804876565933228, - "learning_rate": 1.563432839587401e-05, - "loss": 0.4408, - "step": 271900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.407350540161133, - "loss_rtd": 0.22078903019428253, - "loss_sent": 0.20672528445720673, - "loss_sod": 0.012127239257097244, - "loss_total": 0.439641535282135, - "step": 271999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.709309101104736, - "loss_rtd": 0.24471458792686462, - "loss_sent": 0.08863084763288498, - "loss_sod": 0.019763953983783722, - "loss_total": 0.3531093895435333, - "step": 271999 - }, - { - "epoch": 0.032, - "grad_norm": 0.6426042318344116, - "learning_rate": 1.561128551806132e-05, - "loss": 0.4359, - "step": 272000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4193708598613739, - "eval_runtime": 150.3918, - "eval_samples_per_second": 102.685, - "eval_steps_per_second": 0.805, - "step": 272000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.854343414306641, - "loss_rtd": 0.22708803415298462, - "loss_sent": 0.13898998498916626, - "loss_sod": 0.09644470363855362, - "loss_total": 0.4625227153301239, - "step": 272099 - }, - { - "epoch": 0.000198, - "loss_gen": 6.205217361450195, - "loss_rtd": 0.22106121480464935, - "loss_sent": 0.09671042859554291, - "loss_sod": 0.047797903418540955, - "loss_total": 0.365569531917572, - "step": 272099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.7376329898834229, - "learning_rate": 1.5588256492018692e-05, - "loss": 0.4259, - "step": 272100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.971658229827881, - "loss_rtd": 0.2248469591140747, - "loss_sent": 0.1618236005306244, - "loss_sod": 0.04234761744737625, - "loss_total": 0.42901816964149475, - "step": 272199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.041867733001709, - "loss_rtd": 0.18634530901908875, - "loss_sent": 0.0001128903022618033, - "loss_sod": 0.08167225122451782, - "loss_total": 0.26813045144081116, - "step": 272199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.1817041635513306, - "learning_rate": 1.5565241327022233e-05, - "loss": 0.4243, - "step": 272200 - }, - { - "epoch": 0.000598, - "loss_gen": 6.258779048919678, - "loss_rtd": 0.22976508736610413, - "loss_sent": 0.305381178855896, - "loss_sod": 0.03196942061185837, - "loss_total": 0.5671156644821167, - "step": 272299 - }, - { - "epoch": 0.000598, - "loss_gen": 6.000399589538574, - "loss_rtd": 0.21835242211818695, - "loss_sent": 0.4397623538970947, - "loss_sod": 0.08194266259670258, - "loss_total": 0.7400574684143066, - "step": 272299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.7773146629333496, - "learning_rate": 1.5542240032342453e-05, - "loss": 0.4324, - "step": 272300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.836642742156982, - "loss_rtd": 0.208672896027565, - "loss_sent": 0.19265063107013702, - "loss_sod": 0.060387954115867615, - "loss_total": 0.46171146631240845, - "step": 272399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.5153679847717285, - "loss_rtd": 0.22192445397377014, - "loss_sent": 0.26429370045661926, - "loss_sod": 0.07314467430114746, - "loss_total": 0.5593628287315369, - "step": 272399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.0331426858901978, - "learning_rate": 1.5519252617244284e-05, - "loss": 0.4375, - "step": 272400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.724026679992676, - "loss_rtd": 0.2140340656042099, - "loss_sent": 0.31759142875671387, - "loss_sod": 0.11842348426580429, - "loss_total": 0.6500489711761475, - "step": 272499 - }, - { - "epoch": 0.000998, - "loss_gen": 6.262879848480225, - "loss_rtd": 0.22788311541080475, - "loss_sent": 0.30835044384002686, - "loss_sod": 0.06990567594766617, - "loss_total": 0.6061392426490784, - "step": 272499 - }, - { - "epoch": 0.001, - "grad_norm": 1.3533614873886108, - "learning_rate": 1.549627909098702e-05, - "loss": 0.4512, - "step": 272500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.785185813903809, - "loss_rtd": 0.21991954743862152, - "loss_sent": 0.2482890635728836, - "loss_sod": 0.06520332396030426, - "loss_total": 0.5334119200706482, - "step": 272599 - }, - { - "epoch": 0.001198, - "loss_gen": 6.257391929626465, - "loss_rtd": 0.22569477558135986, - "loss_sent": 0.2106040120124817, - "loss_sod": 0.1530519276857376, - "loss_total": 0.589350700378418, - "step": 272599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.1151965856552124, - "learning_rate": 1.5473319462824408e-05, - "loss": 0.4373, - "step": 272600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.766992568969727, - "loss_rtd": 0.22347459197044373, - "loss_sent": 0.19389308989048004, - "loss_sod": 0.03797778859734535, - "loss_total": 0.4553454518318176, - "step": 272699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.842998027801514, - "loss_rtd": 0.23596161603927612, - "loss_sent": 0.07384367287158966, - "loss_sod": 0.04780445992946625, - "loss_total": 0.35760974884033203, - "step": 272699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.8956001400947571, - "learning_rate": 1.5450373742004592e-05, - "loss": 0.4411, - "step": 272700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.876858234405518, - "loss_rtd": 0.21792659163475037, - "loss_sent": 0.347202330827713, - "loss_sod": 0.08309298753738403, - "loss_total": 0.6482219099998474, - "step": 272799 - }, - { - "epoch": 0.001598, - "loss_gen": 6.156166076660156, - "loss_rtd": 0.23301275074481964, - "loss_sent": 0.1636011302471161, - "loss_sod": 0.11997898668050766, - "loss_total": 0.5165928602218628, - "step": 272799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.5213719606399536, - "learning_rate": 1.5427441937770115e-05, - "loss": 0.4334, - "step": 272800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.906069755554199, - "loss_rtd": 0.23371683061122894, - "loss_sent": 0.26300743222236633, - "loss_sod": 0.0921693667769432, - "loss_total": 0.5888936519622803, - "step": 272899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.64545202255249, - "loss_rtd": 0.20052975416183472, - "loss_sent": 0.10251563042402267, - "loss_sod": 0.06725096702575684, - "loss_total": 0.3702963590621948, - "step": 272899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.0158360004425049, - "learning_rate": 1.5404524059357877e-05, - "loss": 0.4327, - "step": 272900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.925593376159668, - "loss_rtd": 0.21296720206737518, - "loss_sent": 0.2139560729265213, - "loss_sod": 0.030827436596155167, - "loss_total": 0.45775070786476135, - "step": 272999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.651845932006836, - "loss_rtd": 0.21196268498897552, - "loss_sent": 0.13291458785533905, - "loss_sod": 0.020824376493692398, - "loss_total": 0.3657016456127167, - "step": 272999 - }, - { - "epoch": 0.002, - "grad_norm": 0.6587467789649963, - "learning_rate": 1.5381620115999214e-05, - "loss": 0.4234, - "step": 273000 - }, - { - "epoch": 0.002, - "eval_loss": 0.41479891538619995, - "eval_runtime": 154.9089, - "eval_samples_per_second": 99.691, - "eval_steps_per_second": 0.781, - "step": 273000 - }, - { - "epoch": 0.002198, - "loss_gen": 6.200292110443115, - "loss_rtd": 0.21195562183856964, - "loss_sent": 0.11321356892585754, - "loss_sod": 0.07670631259679794, - "loss_total": 0.40187549591064453, - "step": 273099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.7697649002075195, - "loss_rtd": 0.220800518989563, - "loss_sent": 0.04641459882259369, - "loss_sod": 0.04055223986506462, - "loss_total": 0.3077673316001892, - "step": 273099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.0034691095352173, - "learning_rate": 1.535873011691982e-05, - "loss": 0.4446, - "step": 273100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.887719631195068, - "loss_rtd": 0.21751265227794647, - "loss_sent": 0.12734873592853546, - "loss_sod": 0.013623987324535847, - "loss_total": 0.3584853708744049, - "step": 273199 - }, - { - "epoch": 0.002398, - "loss_gen": 6.000583171844482, - "loss_rtd": 0.20690982043743134, - "loss_sent": 0.1919780969619751, - "loss_sod": 0.07696212828159332, - "loss_total": 0.47585004568099976, - "step": 273199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.7924743294715881, - "learning_rate": 1.5335854071339813e-05, - "loss": 0.423, - "step": 273200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.654041767120361, - "loss_rtd": 0.22770124673843384, - "loss_sent": 0.11472953855991364, - "loss_sod": 0.038625702261924744, - "loss_total": 0.3810564875602722, - "step": 273299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.648713111877441, - "loss_rtd": 0.19792591035366058, - "loss_sent": 0.3324960768222809, - "loss_sod": 0.0631243959069252, - "loss_total": 0.5935463905334473, - "step": 273299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.7783716917037964, - "learning_rate": 1.5312991988473625e-05, - "loss": 0.4557, - "step": 273300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.617676258087158, - "loss_rtd": 0.23671136796474457, - "loss_sent": 0.3875289261341095, - "loss_sod": 0.07125213742256165, - "loss_total": 0.6954923868179321, - "step": 273399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.685251235961914, - "loss_rtd": 0.21025420725345612, - "loss_sent": 0.10254145413637161, - "loss_sod": 0.10889772325754166, - "loss_total": 0.4216933846473694, - "step": 273399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.5747517347335815, - "learning_rate": 1.529014387753011e-05, - "loss": 0.4236, - "step": 273400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.638803958892822, - "loss_rtd": 0.21310441195964813, - "loss_sent": 0.18038244545459747, - "loss_sod": 0.1423824429512024, - "loss_total": 0.535869300365448, - "step": 273499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.264876365661621, - "loss_rtd": 0.17995695769786835, - "loss_sent": 0.10798300057649612, - "loss_sod": 0.08907205611467361, - "loss_total": 0.3770120143890381, - "step": 273499 - }, - { - "epoch": 0.003, - "grad_norm": 1.2970952987670898, - "learning_rate": 1.5267309747712517e-05, - "loss": 0.423, - "step": 273500 - }, - { - "epoch": 0.003198, - "loss_gen": 6.159759044647217, - "loss_rtd": 0.22437120974063873, - "loss_sent": 0.16736772656440735, - "loss_sod": 0.05562593415379524, - "loss_total": 0.447364866733551, - "step": 273599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.799188613891602, - "loss_rtd": 0.22752384841442108, - "loss_sent": 0.238138347864151, - "loss_sod": 0.008632916025817394, - "loss_total": 0.47429510951042175, - "step": 273599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.5340250730514526, - "learning_rate": 1.5244489608218377e-05, - "loss": 0.4355, - "step": 273600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.6213836669921875, - "loss_rtd": 0.2576653063297272, - "loss_sent": 0.1624908745288849, - "loss_sod": 0.016954997554421425, - "loss_total": 0.43711116909980774, - "step": 273699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.219740390777588, - "loss_rtd": 0.21375170350074768, - "loss_sent": 0.0008478844538331032, - "loss_sod": 0.14455559849739075, - "loss_total": 0.35915517807006836, - "step": 273699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.051249623298645, - "learning_rate": 1.5221683468239673e-05, - "loss": 0.4182, - "step": 273700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.7904839515686035, - "loss_rtd": 0.17814978957176208, - "loss_sent": 0.10398238897323608, - "loss_sod": 0.08456475287675858, - "loss_total": 0.36669692397117615, - "step": 273799 - }, - { - "epoch": 0.003598, - "loss_gen": 6.159872531890869, - "loss_rtd": 0.2229016125202179, - "loss_sent": 0.3075779676437378, - "loss_sod": 0.04954897612333298, - "loss_total": 0.5800285339355469, - "step": 273799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.7286344766616821, - "learning_rate": 1.5198891336962707e-05, - "loss": 0.4353, - "step": 273800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.964946746826172, - "loss_rtd": 0.20447297394275665, - "loss_sent": 0.1900361180305481, - "loss_sod": 0.07651027292013168, - "loss_total": 0.47101935744285583, - "step": 273899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.49271297454834, - "loss_rtd": 0.22054538130760193, - "loss_sent": 0.2098027765750885, - "loss_sod": 0.0044085439294576645, - "loss_total": 0.43475669622421265, - "step": 273899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.0789908170700073, - "learning_rate": 1.5176113223568167e-05, - "loss": 0.4351, - "step": 273900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.062466144561768, - "loss_rtd": 0.1730503886938095, - "loss_sent": 2.5076464226003736e-05, - "loss_sod": 0.14788149297237396, - "loss_total": 0.3209569454193115, - "step": 273999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.458028316497803, - "loss_rtd": 0.19143301248550415, - "loss_sent": 0.17083045840263367, - "loss_sod": 0.0875568687915802, - "loss_total": 0.449820339679718, - "step": 273999 - }, - { - "epoch": 0.004, - "grad_norm": 1.5684807300567627, - "learning_rate": 1.5153349137231038e-05, - "loss": 0.4205, - "step": 274000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4165240526199341, - "eval_runtime": 151.1454, - "eval_samples_per_second": 102.173, - "eval_steps_per_second": 0.801, - "step": 274000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.204282283782959, - "loss_rtd": 0.1609993278980255, - "loss_sent": 0.0012794769136235118, - "loss_sod": 0.05992255359888077, - "loss_total": 0.22220134735107422, - "step": 274099 - }, - { - "epoch": 0.004198, - "loss_gen": 6.008677005767822, - "loss_rtd": 0.23444169759750366, - "loss_sent": 0.4248729348182678, - "loss_sod": 0.09115256369113922, - "loss_total": 0.7504671812057495, - "step": 274099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2817180156707764, - "learning_rate": 1.5130599087120706e-05, - "loss": 0.428, - "step": 274100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.449244499206543, - "loss_rtd": 0.20360541343688965, - "loss_sent": 0.02436850033700466, - "loss_sod": 0.04385147988796234, - "loss_total": 0.2718254029750824, - "step": 274199 - }, - { - "epoch": 0.004398, - "loss_gen": 4.987967014312744, - "loss_rtd": 0.18337763845920563, - "loss_sent": 2.4480446882080287e-05, - "loss_sod": 0.043488502502441406, - "loss_total": 0.22689062356948853, - "step": 274199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.5992895364761353, - "learning_rate": 1.5107863082400897e-05, - "loss": 0.4311, - "step": 274200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.187564373016357, - "loss_rtd": 0.1814659982919693, - "loss_sent": 3.216753248125315e-05, - "loss_sod": 0.14015816152095795, - "loss_total": 0.32165631651878357, - "step": 274299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.265283584594727, - "loss_rtd": 0.19243568181991577, - "loss_sent": 0.04902365058660507, - "loss_sod": 0.11203384399414062, - "loss_total": 0.35349318385124207, - "step": 274299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.2498400211334229, - "learning_rate": 1.508514113222968e-05, - "loss": 0.4359, - "step": 274300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.494047164916992, - "loss_rtd": 0.19427287578582764, - "loss_sent": 2.600963125587441e-05, - "loss_sod": 0.05721534416079521, - "loss_total": 0.2515142261981964, - "step": 274399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.231760501861572, - "loss_rtd": 0.18359731137752533, - "loss_sent": 2.7589037927100435e-05, - "loss_sod": 0.291170597076416, - "loss_total": 0.47479552030563354, - "step": 274399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.336585283279419, - "learning_rate": 1.5062433245759422e-05, - "loss": 0.4411, - "step": 274400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.843842506408691, - "loss_rtd": 0.22189895808696747, - "loss_sent": 0.10693559050559998, - "loss_sod": 0.06513269990682602, - "loss_total": 0.39396724104881287, - "step": 274499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.820666313171387, - "loss_rtd": 0.19190526008605957, - "loss_sent": 0.2163529247045517, - "loss_sod": 0.027400104328989983, - "loss_total": 0.4356582760810852, - "step": 274499 - }, - { - "epoch": 0.005, - "grad_norm": 1.718505859375, - "learning_rate": 1.5039739432136873e-05, - "loss": 0.4297, - "step": 274500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.920298099517822, - "loss_rtd": 0.23496927320957184, - "loss_sent": 0.08924492448568344, - "loss_sod": 0.02064770646393299, - "loss_total": 0.3448619246482849, - "step": 274599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.958324909210205, - "loss_rtd": 0.19699518382549286, - "loss_sent": 0.24578341841697693, - "loss_sod": 0.05115849897265434, - "loss_total": 0.49393710494041443, - "step": 274599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.8538525700569153, - "learning_rate": 1.5017059700503105e-05, - "loss": 0.438, - "step": 274600 - }, - { - "epoch": 0.005398, - "loss_gen": 6.252610206604004, - "loss_rtd": 0.23672576248645782, - "loss_sent": 0.21985003352165222, - "loss_sod": 0.07795245945453644, - "loss_total": 0.5345282554626465, - "step": 274699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.9698486328125, - "loss_rtd": 0.22775180637836456, - "loss_sent": 0.22323723137378693, - "loss_sod": 0.06044522300362587, - "loss_total": 0.5114342570304871, - "step": 274699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.2575621604919434, - "learning_rate": 1.4994394059993521e-05, - "loss": 0.436, - "step": 274700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.722237586975098, - "loss_rtd": 0.2087721824645996, - "loss_sent": 0.02655068412423134, - "loss_sod": 0.06442528963088989, - "loss_total": 0.29974815249443054, - "step": 274799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.214300632476807, - "loss_rtd": 0.1811797022819519, - "loss_sent": 0.057300493121147156, - "loss_sod": 0.04453400522470474, - "loss_total": 0.283014178276062, - "step": 274799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.7612568736076355, - "learning_rate": 1.4971742519737803e-05, - "loss": 0.421, - "step": 274800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.314690589904785, - "loss_rtd": 0.22435688972473145, - "loss_sent": 0.21978983283042908, - "loss_sod": 0.08311988413333893, - "loss_total": 0.5272666215896606, - "step": 274899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.76157283782959, - "loss_rtd": 0.23554137349128723, - "loss_sent": 0.12696672976016998, - "loss_sod": 0.0958535447716713, - "loss_total": 0.4583616256713867, - "step": 274899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.2189191579818726, - "learning_rate": 1.4949105088860017e-05, - "loss": 0.4249, - "step": 274900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.916494846343994, - "loss_rtd": 0.2172781527042389, - "loss_sent": 0.14282725751399994, - "loss_sod": 0.039861761033535004, - "loss_total": 0.3999671936035156, - "step": 274999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.9629364013671875, - "loss_rtd": 0.21175292134284973, - "loss_sent": 0.1134389191865921, - "loss_sod": 0.001974719576537609, - "loss_total": 0.3271665573120117, - "step": 274999 - }, - { - "epoch": 0.006, - "grad_norm": 1.2094670534133911, - "learning_rate": 1.4926481776478501e-05, - "loss": 0.454, - "step": 275000 - }, - { - "epoch": 0.006, - "eval_loss": 0.41286784410476685, - "eval_runtime": 150.9879, - "eval_samples_per_second": 102.28, - "eval_steps_per_second": 0.801, - "step": 275000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.998263359069824, - "loss_rtd": 0.21866413950920105, - "loss_sent": 0.14763079583644867, - "loss_sod": 0.01852121762931347, - "loss_total": 0.38481616973876953, - "step": 275099 - }, - { - "epoch": 0.006198, - "loss_gen": 6.226556777954102, - "loss_rtd": 0.22671213746070862, - "loss_sent": 0.07838278263807297, - "loss_sod": 0.0725444108247757, - "loss_total": 0.3776393532752991, - "step": 275099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.2292507886886597, - "learning_rate": 1.4903872591705953e-05, - "loss": 0.4469, - "step": 275100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.403130054473877, - "loss_rtd": 0.17612871527671814, - "loss_sent": 0.0024083247408270836, - "loss_sod": 0.14639602601528168, - "loss_total": 0.3249330520629883, - "step": 275199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.9787278175354, - "loss_rtd": 0.21918079257011414, - "loss_sent": 0.14101746678352356, - "loss_sod": 0.002642581705003977, - "loss_total": 0.36284083127975464, - "step": 275199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.9513075947761536, - "learning_rate": 1.4881277543649308e-05, - "loss": 0.4199, - "step": 275200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.5881028175354, - "loss_rtd": 0.20095397531986237, - "loss_sent": 0.11840689182281494, - "loss_sod": 0.06324057281017303, - "loss_total": 0.38260143995285034, - "step": 275299 - }, - { - "epoch": 0.006598, - "loss_gen": 6.146471977233887, - "loss_rtd": 0.22917453944683075, - "loss_sent": 0.0370137095451355, - "loss_sod": 0.1151127964258194, - "loss_total": 0.38130104541778564, - "step": 275299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.9370741844177246, - "learning_rate": 1.485869664140987e-05, - "loss": 0.4374, - "step": 275300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.571595191955566, - "loss_rtd": 0.2085263431072235, - "loss_sent": 0.031222401186823845, - "loss_sod": 0.08115590363740921, - "loss_total": 0.3209046423435211, - "step": 275399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.117570400238037, - "loss_rtd": 0.1735120266675949, - "loss_sent": 2.8771723009413108e-05, - "loss_sod": 0.0636037290096283, - "loss_total": 0.23714452981948853, - "step": 275399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.7947896122932434, - "learning_rate": 1.483612989408324e-05, - "loss": 0.421, - "step": 275400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.947719097137451, - "loss_rtd": 0.2378571629524231, - "loss_sent": 0.1077478751540184, - "loss_sod": 0.06863857805728912, - "loss_total": 0.41424360871315, - "step": 275499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.633233070373535, - "loss_rtd": 0.20518076419830322, - "loss_sent": 0.0270475372672081, - "loss_sod": 0.025233253836631775, - "loss_total": 0.2574615478515625, - "step": 275499 - }, - { - "epoch": 0.007, - "grad_norm": 1.0830098390579224, - "learning_rate": 1.4813577310759268e-05, - "loss": 0.4115, - "step": 275500 - }, - { - "epoch": 0.007198, - "loss_gen": 6.004197597503662, - "loss_rtd": 0.23788532614707947, - "loss_sent": 0.043408095836639404, - "loss_sod": 0.06540364027023315, - "loss_total": 0.346697062253952, - "step": 275599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.529959678649902, - "loss_rtd": 0.23637913167476654, - "loss_sent": 0.25661712884902954, - "loss_sod": 0.030556680634617805, - "loss_total": 0.5235529541969299, - "step": 275599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.00693941116333, - "learning_rate": 1.4791038900522148e-05, - "loss": 0.4383, - "step": 275600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.628551483154297, - "loss_rtd": 0.21775345504283905, - "loss_sent": 0.13816148042678833, - "loss_sod": 0.0368569940328598, - "loss_total": 0.3927719295024872, - "step": 275699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.497834205627441, - "loss_rtd": 0.1999260038137436, - "loss_sent": 0.07742981612682343, - "loss_sod": 0.01064966432750225, - "loss_total": 0.2880054712295532, - "step": 275699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.2652186155319214, - "learning_rate": 1.4768514672450345e-05, - "loss": 0.4254, - "step": 275700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.588418483734131, - "loss_rtd": 0.2098814994096756, - "loss_sent": 0.0544310137629509, - "loss_sod": 0.01398796122521162, - "loss_total": 0.2783004641532898, - "step": 275799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.634303092956543, - "loss_rtd": 0.22205425798892975, - "loss_sent": 0.11263878643512726, - "loss_sod": 0.026592668145895004, - "loss_total": 0.3612857162952423, - "step": 275799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.7957473397254944, - "learning_rate": 1.4746004635616634e-05, - "loss": 0.4164, - "step": 275800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.97379207611084, - "loss_rtd": 0.19933107495307922, - "loss_sent": 0.07186050713062286, - "loss_sod": 0.037373773753643036, - "loss_total": 0.3085653781890869, - "step": 275899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.917050361633301, - "loss_rtd": 0.21200326085090637, - "loss_sent": 0.08479931950569153, - "loss_sod": 0.02335244044661522, - "loss_total": 0.3201550245285034, - "step": 275899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.6725935935974121, - "learning_rate": 1.4723508799088025e-05, - "loss": 0.4428, - "step": 275900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.615390300750732, - "loss_rtd": 0.19833429157733917, - "loss_sent": 0.11061300337314606, - "loss_sod": 0.02802497148513794, - "loss_total": 0.33697226643562317, - "step": 275999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.542318344116211, - "loss_rtd": 0.1746564507484436, - "loss_sent": 0.11866465955972672, - "loss_sod": 0.07623709738254547, - "loss_total": 0.369558185338974, - "step": 275999 - }, - { - "epoch": 0.008, - "grad_norm": 0.777366042137146, - "learning_rate": 1.4701027171925853e-05, - "loss": 0.4344, - "step": 276000 - }, - { - "epoch": 0.008, - "eval_loss": 0.41162198781967163, - "eval_runtime": 152.5004, - "eval_samples_per_second": 101.265, - "eval_steps_per_second": 0.793, - "step": 276000 - }, - { - "epoch": 0.008198, - "loss_gen": 6.17227029800415, - "loss_rtd": 0.22520144283771515, - "loss_sent": 0.05229737237095833, - "loss_sod": 0.04423283040523529, - "loss_total": 0.32173165678977966, - "step": 276099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.640833854675293, - "loss_rtd": 0.22593384981155396, - "loss_sent": 0.17062826454639435, - "loss_sod": 0.12412357330322266, - "loss_total": 0.5206856727600098, - "step": 276099 - }, - { - "epoch": 0.0082, - "grad_norm": 0.7446812391281128, - "learning_rate": 1.4678559763185718e-05, - "loss": 0.4297, - "step": 276100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.91195821762085, - "loss_rtd": 0.21936455368995667, - "loss_sent": 0.34628406167030334, - "loss_sod": 0.015731997787952423, - "loss_total": 0.5813806056976318, - "step": 276199 - }, - { - "epoch": 0.008398, - "loss_gen": 6.316697120666504, - "loss_rtd": 0.2094716876745224, - "loss_sent": 0.1173069104552269, - "loss_sod": 0.06010034680366516, - "loss_total": 0.38687893748283386, - "step": 276199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.9478293657302856, - "learning_rate": 1.46561065819175e-05, - "loss": 0.4225, - "step": 276200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.881045818328857, - "loss_rtd": 0.22816449403762817, - "loss_sent": 0.2730463445186615, - "loss_sod": 0.02581781893968582, - "loss_total": 0.5270286798477173, - "step": 276299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.697487831115723, - "loss_rtd": 0.2018587589263916, - "loss_sent": 0.39347535371780396, - "loss_sod": 0.02456580102443695, - "loss_total": 0.6198999285697937, - "step": 276299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.8602070808410645, - "learning_rate": 1.4633667637165305e-05, - "loss": 0.4302, - "step": 276300 - }, - { - "epoch": 0.008798, - "loss_gen": 6.158581256866455, - "loss_rtd": 0.23338966071605682, - "loss_sent": 0.13927677273750305, - "loss_sod": 0.03054956905543804, - "loss_total": 0.40321600437164307, - "step": 276399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.349296569824219, - "loss_rtd": 0.18747206032276154, - "loss_sent": 0.0666453018784523, - "loss_sod": 0.034182045608758926, - "loss_total": 0.28829941153526306, - "step": 276399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.7663335800170898, - "learning_rate": 1.4611242937967562e-05, - "loss": 0.451, - "step": 276400 - }, - { - "epoch": 0.008998, - "loss_gen": 6.009261131286621, - "loss_rtd": 0.21562351286411285, - "loss_sent": 0.20994015038013458, - "loss_sod": 0.04199356585741043, - "loss_total": 0.46755725145339966, - "step": 276499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.512537002563477, - "loss_rtd": 0.22194944322109222, - "loss_sent": 0.32049813866615295, - "loss_sod": 0.0023547913879156113, - "loss_total": 0.5448023676872253, - "step": 276499 - }, - { - "epoch": 0.009, - "grad_norm": 0.7253670692443848, - "learning_rate": 1.4588832493356924e-05, - "loss": 0.4405, - "step": 276500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.093565940856934, - "loss_rtd": 0.1910579800605774, - "loss_sent": 2.6950241590384394e-05, - "loss_sod": 0.03554120287299156, - "loss_total": 0.22662614285945892, - "step": 276599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.972431659698486, - "loss_rtd": 0.21581493318080902, - "loss_sent": 0.24811503291130066, - "loss_sod": 0.03777027875185013, - "loss_total": 0.5017002820968628, - "step": 276599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.0715969800949097, - "learning_rate": 1.4566436312360349e-05, - "loss": 0.4206, - "step": 276600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.644054889678955, - "loss_rtd": 0.23983755707740784, - "loss_sent": 0.3277575969696045, - "loss_sod": 0.009140715934336185, - "loss_total": 0.5767358541488647, - "step": 276699 - }, - { - "epoch": 0.009398, - "loss_gen": 6.069643020629883, - "loss_rtd": 0.2163091003894806, - "loss_sent": 0.353619784116745, - "loss_sod": 0.09094171971082687, - "loss_total": 0.6608706116676331, - "step": 276699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.3661854267120361, - "learning_rate": 1.4544054403998969e-05, - "loss": 0.4322, - "step": 276700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.873619556427002, - "loss_rtd": 0.19589076936244965, - "loss_sent": 0.27185410261154175, - "loss_sod": 0.01664174534380436, - "loss_total": 0.4843866229057312, - "step": 276799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.717758655548096, - "loss_rtd": 0.1972099393606186, - "loss_sent": 0.16506752371788025, - "loss_sod": 0.11253196001052856, - "loss_total": 0.4748094379901886, - "step": 276799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.8117824196815491, - "learning_rate": 1.4521686777288234e-05, - "loss": 0.4388, - "step": 276800 - }, - { - "epoch": 0.009798, - "loss_gen": 4.978543281555176, - "loss_rtd": 0.18296152353286743, - "loss_sent": 2.4838065655785613e-05, - "loss_sod": 0.1248551458120346, - "loss_total": 0.3078415095806122, - "step": 276899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.24105978012085, - "loss_rtd": 0.18175606429576874, - "loss_sent": 0.02356233075261116, - "loss_sod": 0.011996938847005367, - "loss_total": 0.21731533110141754, - "step": 276899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.7824288606643677, - "learning_rate": 1.4499333441237838e-05, - "loss": 0.4274, - "step": 276900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.631489276885986, - "loss_rtd": 0.22237901389598846, - "loss_sent": 0.20925535261631012, - "loss_sod": 0.15647080540657043, - "loss_total": 0.5881051421165466, - "step": 276999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.795790195465088, - "loss_rtd": 0.2195892632007599, - "loss_sent": 0.1186850517988205, - "loss_sod": 0.015782378613948822, - "loss_total": 0.354056715965271, - "step": 276999 - }, - { - "epoch": 0.01, - "grad_norm": 1.1823580265045166, - "learning_rate": 1.4476994404851668e-05, - "loss": 0.4314, - "step": 277000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4196847379207611, - "eval_runtime": 150.9205, - "eval_samples_per_second": 102.325, - "eval_steps_per_second": 0.802, - "step": 277000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.772769927978516, - "loss_rtd": 0.21531303226947784, - "loss_sent": 0.2676294147968292, - "loss_sod": 0.008648330345749855, - "loss_total": 0.49159079790115356, - "step": 277099 - }, - { - "epoch": 0.010198, - "loss_gen": 6.091549396514893, - "loss_rtd": 0.2283223420381546, - "loss_sent": 0.22223016619682312, - "loss_sod": 0.09155251085758209, - "loss_total": 0.5421050190925598, - "step": 277099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.085469365119934, - "learning_rate": 1.4454669677127907e-05, - "loss": 0.4322, - "step": 277100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.694558620452881, - "loss_rtd": 0.22899405658245087, - "loss_sent": 0.021348869428038597, - "loss_sod": 0.07458329945802689, - "loss_total": 0.3249262273311615, - "step": 277199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.591134071350098, - "loss_rtd": 0.21290600299835205, - "loss_sent": 0.1980232149362564, - "loss_sod": 0.05598120763897896, - "loss_total": 0.4669104218482971, - "step": 277199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.8149976134300232, - "learning_rate": 1.4432359267058953e-05, - "loss": 0.4212, - "step": 277200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.793703079223633, - "loss_rtd": 0.23179537057876587, - "loss_sent": 0.07164697349071503, - "loss_sod": 0.02043539099395275, - "loss_total": 0.32387775182724, - "step": 277299 - }, - { - "epoch": 0.010598, - "loss_gen": 6.0543317794799805, - "loss_rtd": 0.22306634485721588, - "loss_sent": 0.32701098918914795, - "loss_sod": 0.01589788869023323, - "loss_total": 0.5659751892089844, - "step": 277299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9578184485435486, - "learning_rate": 1.4410063183631446e-05, - "loss": 0.4235, - "step": 277300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.882953643798828, - "loss_rtd": 0.2202696055173874, - "loss_sent": 0.40532752871513367, - "loss_sod": 0.09417147934436798, - "loss_total": 0.7197686433792114, - "step": 277399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.9690117835998535, - "loss_rtd": 0.23239389061927795, - "loss_sent": 0.300907164812088, - "loss_sod": 0.08439088612794876, - "loss_total": 0.6176919341087341, - "step": 277399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.7343910932540894, - "learning_rate": 1.4387781435826215e-05, - "loss": 0.412, - "step": 277400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.794314384460449, - "loss_rtd": 0.21898530423641205, - "loss_sent": 0.11131469160318375, - "loss_sod": 0.05166494846343994, - "loss_total": 0.38196495175361633, - "step": 277499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.808643341064453, - "loss_rtd": 0.22665444016456604, - "loss_sent": 0.30303165316581726, - "loss_sod": 0.0033834788482636213, - "loss_total": 0.5330695509910583, - "step": 277499 - }, - { - "epoch": 0.011, - "grad_norm": 0.6830807328224182, - "learning_rate": 1.436551403261836e-05, - "loss": 0.4503, - "step": 277500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.957104206085205, - "loss_rtd": 0.2045014649629593, - "loss_sent": 0.14599353075027466, - "loss_sod": 0.03382723033428192, - "loss_total": 0.38432222604751587, - "step": 277599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.789407253265381, - "loss_rtd": 0.20713423192501068, - "loss_sent": 0.09987723082304001, - "loss_sod": 0.05185386538505554, - "loss_total": 0.35886532068252563, - "step": 277599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.0491242408752441, - "learning_rate": 1.4343260982977196e-05, - "loss": 0.4421, - "step": 277600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.814051151275635, - "loss_rtd": 0.21798686683177948, - "loss_sent": 0.1862952709197998, - "loss_sod": 0.038201622664928436, - "loss_total": 0.4424837529659271, - "step": 277699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.72694730758667, - "loss_rtd": 0.24146729707717896, - "loss_sent": 0.31808140873908997, - "loss_sod": 0.010205268859863281, - "loss_total": 0.5697540044784546, - "step": 277699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.9509449601173401, - "learning_rate": 1.432102229586626e-05, - "loss": 0.4455, - "step": 277700 - }, - { - "epoch": 0.011598, - "loss_gen": 6.004022598266602, - "loss_rtd": 0.22351793944835663, - "loss_sent": 0.1464354395866394, - "loss_sod": 0.0887962132692337, - "loss_total": 0.45874959230422974, - "step": 277799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.679751396179199, - "loss_rtd": 0.22603632509708405, - "loss_sent": 0.12933529913425446, - "loss_sod": 0.02899974212050438, - "loss_total": 0.3843713402748108, - "step": 277799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.9555836915969849, - "learning_rate": 1.4298797980243255e-05, - "loss": 0.4205, - "step": 277800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.585626125335693, - "loss_rtd": 0.21574117243289948, - "loss_sent": 0.5474934577941895, - "loss_sod": 0.007428249344229698, - "loss_total": 0.7706629037857056, - "step": 277899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.836236953735352, - "loss_rtd": 0.22890616953372955, - "loss_sent": 0.14271174371242523, - "loss_sod": 0.01348471362143755, - "loss_total": 0.38510262966156006, - "step": 277899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.4604941606521606, - "learning_rate": 1.4276588045060163e-05, - "loss": 0.4354, - "step": 277900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.417580604553223, - "loss_rtd": 0.1938585638999939, - "loss_sent": 0.0372602641582489, - "loss_sod": 0.0591491237282753, - "loss_total": 0.2902679443359375, - "step": 277999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.544027328491211, - "loss_rtd": 0.19217461347579956, - "loss_sent": 0.09470445662736893, - "loss_sod": 0.03213660791516304, - "loss_total": 0.3190156817436218, - "step": 277999 - }, - { - "epoch": 0.012, - "grad_norm": 0.6893901824951172, - "learning_rate": 1.425439249926313e-05, - "loss": 0.4261, - "step": 278000 - }, - { - "epoch": 0.012, - "eval_loss": 0.4183647930622101, - "eval_runtime": 151.2535, - "eval_samples_per_second": 102.1, - "eval_steps_per_second": 0.8, - "step": 278000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.114015102386475, - "loss_rtd": 0.22248844802379608, - "loss_sent": 0.32810482382774353, - "loss_sod": 0.04194863140583038, - "loss_total": 0.5925419330596924, - "step": 278099 - }, - { - "epoch": 0.012198, - "loss_gen": 6.096914768218994, - "loss_rtd": 0.22503113746643066, - "loss_sent": 0.1498892605304718, - "loss_sod": 0.02221049927175045, - "loss_total": 0.39713090658187866, - "step": 278099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.7431953549385071, - "learning_rate": 1.4232211351792552e-05, - "loss": 0.4332, - "step": 278100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.778744220733643, - "loss_rtd": 0.20733730494976044, - "loss_sent": 0.12015029788017273, - "loss_sod": 0.06986133754253387, - "loss_total": 0.39734894037246704, - "step": 278199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.296205043792725, - "loss_rtd": 0.18247418105602264, - "loss_sent": 3.088546509388834e-05, - "loss_sod": 0.31866455078125, - "loss_total": 0.5011696219444275, - "step": 278199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.4823925495147705, - "learning_rate": 1.4210044611582934e-05, - "loss": 0.4189, - "step": 278200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.900089263916016, - "loss_rtd": 0.19683802127838135, - "loss_sent": 0.2003399282693863, - "loss_sod": 0.06887827813625336, - "loss_total": 0.466056227684021, - "step": 278299 - }, - { - "epoch": 0.012598, - "loss_gen": 6.01000452041626, - "loss_rtd": 0.20670291781425476, - "loss_sent": 0.03745774179697037, - "loss_sod": 0.016352390870451927, - "loss_total": 0.260513037443161, - "step": 278299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.8375272154808044, - "learning_rate": 1.4187892287563071e-05, - "loss": 0.4273, - "step": 278300 - }, - { - "epoch": 0.012798, - "loss_gen": 6.473291397094727, - "loss_rtd": 0.2656841576099396, - "loss_sent": 0.0679529532790184, - "loss_sod": 0.11885888129472733, - "loss_total": 0.4524959921836853, - "step": 278399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.9420061111450195, - "loss_rtd": 0.2288360446691513, - "loss_sent": 0.09559198468923569, - "loss_sod": 0.09812648594379425, - "loss_total": 0.42255452275276184, - "step": 278399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.2853294610977173, - "learning_rate": 1.4165754388655906e-05, - "loss": 0.4395, - "step": 278400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.745471477508545, - "loss_rtd": 0.2117670774459839, - "loss_sent": 0.18949981033802032, - "loss_sod": 0.0478760227560997, - "loss_total": 0.4491429328918457, - "step": 278499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.499617099761963, - "loss_rtd": 0.2230619192123413, - "loss_sent": 0.097218818962574, - "loss_sod": 0.06726241111755371, - "loss_total": 0.3875431418418884, - "step": 278499 - }, - { - "epoch": 0.013, - "grad_norm": 0.8567191958427429, - "learning_rate": 1.4143630923778606e-05, - "loss": 0.4512, - "step": 278500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.759278774261475, - "loss_rtd": 0.2041715532541275, - "loss_sent": 0.14937297999858856, - "loss_sod": 0.07851532101631165, - "loss_total": 0.4320598840713501, - "step": 278599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.783989906311035, - "loss_rtd": 0.2008306086063385, - "loss_sent": 0.2086365818977356, - "loss_sod": 0.07383053004741669, - "loss_total": 0.4832977056503296, - "step": 278599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.1480988264083862, - "learning_rate": 1.4121521901842467e-05, - "loss": 0.4392, - "step": 278600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.692169666290283, - "loss_rtd": 0.2233029156923294, - "loss_sent": 0.362612247467041, - "loss_sod": 0.03540550917387009, - "loss_total": 0.6213206648826599, - "step": 278699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.673735618591309, - "loss_rtd": 0.20805056393146515, - "loss_sent": 0.03982659429311752, - "loss_sod": 0.12513360381126404, - "loss_total": 0.3730107843875885, - "step": 278699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.594168782234192, - "learning_rate": 1.4099427331753018e-05, - "loss": 0.4476, - "step": 278700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.833976745605469, - "loss_rtd": 0.22022613883018494, - "loss_sent": 0.1856670379638672, - "loss_sod": 0.07268480956554413, - "loss_total": 0.47857797145843506, - "step": 278799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.80198860168457, - "loss_rtd": 0.21828624606132507, - "loss_sent": 0.19001781940460205, - "loss_sod": 0.030553320422768593, - "loss_total": 0.43885737657546997, - "step": 278799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.7171003222465515, - "learning_rate": 1.4077347222409942e-05, - "loss": 0.4406, - "step": 278800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.872027397155762, - "loss_rtd": 0.21578609943389893, - "loss_sent": 0.21043899655342102, - "loss_sod": 0.06564974784851074, - "loss_total": 0.4918748438358307, - "step": 278899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.9985032081604, - "loss_rtd": 0.23049300909042358, - "loss_sent": 0.25634822249412537, - "loss_sod": 0.013149198144674301, - "loss_total": 0.49999043345451355, - "step": 278899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.0039849281311035, - "learning_rate": 1.4055281582707125e-05, - "loss": 0.4331, - "step": 278900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.637964725494385, - "loss_rtd": 0.21313661336898804, - "loss_sent": 0.0539582297205925, - "loss_sod": 0.15895986557006836, - "loss_total": 0.4260547161102295, - "step": 278999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.225949764251709, - "loss_rtd": 0.1816854029893875, - "loss_sent": 5.1824903493979946e-05, - "loss_sod": 0.05483756214380264, - "loss_total": 0.23657479882240295, - "step": 278999 - }, - { - "epoch": 0.014, - "grad_norm": 0.9494348168373108, - "learning_rate": 1.4033230421532574e-05, - "loss": 0.4261, - "step": 279000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4114840030670166, - "eval_runtime": 151.0764, - "eval_samples_per_second": 102.22, - "eval_steps_per_second": 0.801, - "step": 279000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.948075771331787, - "loss_rtd": 0.21769101917743683, - "loss_sent": 0.18114681541919708, - "loss_sod": 0.03720657899975777, - "loss_total": 0.4360443949699402, - "step": 279099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.593774795532227, - "loss_rtd": 0.22905591130256653, - "loss_sent": 0.5004178285598755, - "loss_sod": 0.018901998177170753, - "loss_total": 0.7483757734298706, - "step": 279099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.857199788093567, - "learning_rate": 1.4011193747768509e-05, - "loss": 0.4385, - "step": 279100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.325761795043945, - "loss_rtd": 0.1875769942998886, - "loss_sent": 0.015059086494147778, - "loss_sod": 0.02798033133149147, - "loss_total": 0.23061640560626984, - "step": 279199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.665338516235352, - "loss_rtd": 0.20474572479724884, - "loss_sent": 0.03706784546375275, - "loss_sod": 0.042797110974788666, - "loss_total": 0.28461068868637085, - "step": 279199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.6072078347206116, - "learning_rate": 1.3989171570291294e-05, - "loss": 0.434, - "step": 279200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.405586242675781, - "loss_rtd": 0.19190989434719086, - "loss_sent": 0.003731678007170558, - "loss_sod": 0.07705147564411163, - "loss_total": 0.27269303798675537, - "step": 279299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.41531229019165, - "loss_rtd": 0.21093900501728058, - "loss_sent": 0.014339824207127094, - "loss_sod": 0.09241083264350891, - "loss_total": 0.3176896870136261, - "step": 279299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.7842441201210022, - "learning_rate": 1.396716389797148e-05, - "loss": 0.4327, - "step": 279300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.80268669128418, - "loss_rtd": 0.21594296395778656, - "loss_sent": 0.2316673994064331, - "loss_sod": 0.03718440607190132, - "loss_total": 0.4847947657108307, - "step": 279399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.298433303833008, - "loss_rtd": 0.1938018649816513, - "loss_sent": 0.018638234585523605, - "loss_sod": 0.0383220873773098, - "loss_total": 0.2507621943950653, - "step": 279399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.7067802548408508, - "learning_rate": 1.394517073967373e-05, - "loss": 0.4529, - "step": 279400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.532692909240723, - "loss_rtd": 0.24964453279972076, - "loss_sent": 0.2670353949069977, - "loss_sod": 0.04572862759232521, - "loss_total": 0.5624085664749146, - "step": 279499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.870636463165283, - "loss_rtd": 0.234962597489357, - "loss_sent": 0.17811015248298645, - "loss_sod": 0.041591327637434006, - "loss_total": 0.45466408133506775, - "step": 279499 - }, - { - "epoch": 0.015, - "grad_norm": 1.5451864004135132, - "learning_rate": 1.3923192104256888e-05, - "loss": 0.4283, - "step": 279500 - }, - { - "epoch": 0.015198, - "loss_gen": 6.10763692855835, - "loss_rtd": 0.20867428183555603, - "loss_sent": 0.31970512866973877, - "loss_sod": 0.05931424722075462, - "loss_total": 0.5876936912536621, - "step": 279599 - }, - { - "epoch": 0.015198, - "loss_gen": 6.079599857330322, - "loss_rtd": 0.2157566249370575, - "loss_sent": 0.24810534715652466, - "loss_sod": 0.15353038907051086, - "loss_total": 0.617392361164093, - "step": 279599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.3794596195220947, - "learning_rate": 1.3901228000573951e-05, - "loss": 0.429, - "step": 279600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.296116828918457, - "loss_rtd": 0.1936623603105545, - "loss_sent": 2.509882688173093e-05, - "loss_sod": 0.09577268362045288, - "loss_total": 0.28946012258529663, - "step": 279699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.075648307800293, - "loss_rtd": 0.16984108090400696, - "loss_sent": 2.540243076509796e-05, - "loss_sod": 0.16885748505592346, - "loss_total": 0.33872395753860474, - "step": 279699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.2368967533111572, - "learning_rate": 1.3879278437472083e-05, - "loss": 0.4295, - "step": 279700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.920673370361328, - "loss_rtd": 0.2070206254720688, - "loss_sent": 0.1506125032901764, - "loss_sod": 0.09789955615997314, - "loss_total": 0.4555326998233795, - "step": 279799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.842594146728516, - "loss_rtd": 0.22099550068378448, - "loss_sent": 0.16854983568191528, - "loss_sod": 0.028708353638648987, - "loss_total": 0.41825369000434875, - "step": 279799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.3597524166107178, - "learning_rate": 1.3857343423792518e-05, - "loss": 0.404, - "step": 279800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.681039333343506, - "loss_rtd": 0.20897695422172546, - "loss_sent": 0.016555357724428177, - "loss_sod": 0.05064110457897186, - "loss_total": 0.2761734127998352, - "step": 279899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.172247409820557, - "loss_rtd": 0.16623836755752563, - "loss_sent": 0.000807274307589978, - "loss_sod": 0.1987113505601883, - "loss_total": 0.3657569885253906, - "step": 279899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.2343541383743286, - "learning_rate": 1.3835422968370698e-05, - "loss": 0.4351, - "step": 279900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.726408958435059, - "loss_rtd": 0.2137012481689453, - "loss_sent": 0.2577979564666748, - "loss_sod": 0.01765153743326664, - "loss_total": 0.4891507625579834, - "step": 279999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.963380813598633, - "loss_rtd": 0.23053455352783203, - "loss_sent": 0.3462984263896942, - "loss_sod": 0.11793604493141174, - "loss_total": 0.694769024848938, - "step": 279999 - }, - { - "epoch": 0.016, - "grad_norm": 0.9562790989875793, - "learning_rate": 1.3813517080036198e-05, - "loss": 0.4177, - "step": 280000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4070643186569214, - "eval_runtime": 151.3227, - "eval_samples_per_second": 102.053, - "eval_steps_per_second": 0.8, - "step": 280000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.96182918548584, - "loss_rtd": 0.2096618264913559, - "loss_sent": 0.10885757207870483, - "loss_sod": 0.038305506110191345, - "loss_total": 0.3568249046802521, - "step": 280099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.419727802276611, - "loss_rtd": 0.19863568246364594, - "loss_sent": 0.031493622809648514, - "loss_sod": 0.08233529329299927, - "loss_total": 0.3124646246433258, - "step": 280099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.8464761972427368, - "learning_rate": 1.3791625767612682e-05, - "loss": 0.4365, - "step": 280100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.853739261627197, - "loss_rtd": 0.24195794761180878, - "loss_sent": 0.0944938138127327, - "loss_sod": 0.010652352124452591, - "loss_total": 0.34710410237312317, - "step": 280199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.659969806671143, - "loss_rtd": 0.21010221540927887, - "loss_sent": 0.24196834862232208, - "loss_sod": 0.007421444170176983, - "loss_total": 0.459492027759552, - "step": 280199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.7923970222473145, - "learning_rate": 1.3769749039917968e-05, - "loss": 0.4328, - "step": 280200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.695321083068848, - "loss_rtd": 0.2275717407464981, - "loss_sent": 0.2553408443927765, - "loss_sod": 0.06647907942533493, - "loss_total": 0.5493916273117065, - "step": 280299 - }, - { - "epoch": 0.016598, - "loss_gen": 6.023808002471924, - "loss_rtd": 0.22004935145378113, - "loss_sent": 0.12588292360305786, - "loss_sod": 0.03191075846552849, - "loss_total": 0.3778430223464966, - "step": 280299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.882409930229187, - "learning_rate": 1.3747886905764012e-05, - "loss": 0.4379, - "step": 280300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.480722427368164, - "loss_rtd": 0.19871079921722412, - "loss_sent": 0.09207594394683838, - "loss_sod": 0.009698489680886269, - "loss_total": 0.3004852533340454, - "step": 280399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.713308811187744, - "loss_rtd": 0.22099217772483826, - "loss_sent": 0.2679978311061859, - "loss_sod": 0.006327535957098007, - "loss_total": 0.4953175485134125, - "step": 280399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.1832282543182373, - "learning_rate": 1.372603937395689e-05, - "loss": 0.4526, - "step": 280400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.249807834625244, - "loss_rtd": 0.1779339760541916, - "loss_sent": 0.006705451291054487, - "loss_sod": 0.17092213034629822, - "loss_total": 0.3555615544319153, - "step": 280499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.696319103240967, - "loss_rtd": 0.20037059485912323, - "loss_sent": 0.13759541511535645, - "loss_sod": 0.05141723155975342, - "loss_total": 0.3893832564353943, - "step": 280499 - }, - { - "epoch": 0.017, - "grad_norm": 1.077949047088623, - "learning_rate": 1.370420645329676e-05, - "loss": 0.4188, - "step": 280500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.335850238800049, - "loss_rtd": 0.18321716785430908, - "loss_sent": 0.0345289409160614, - "loss_sod": 0.04553137719631195, - "loss_total": 0.26327747106552124, - "step": 280599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.804455280303955, - "loss_rtd": 0.22036710381507874, - "loss_sent": 0.6789878010749817, - "loss_sod": 0.03309963271021843, - "loss_total": 0.932454526424408, - "step": 280599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.4698665142059326, - "learning_rate": 1.3682388152577924e-05, - "loss": 0.4288, - "step": 280600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.792542934417725, - "loss_rtd": 0.2209930270910263, - "loss_sent": 0.2628214955329895, - "loss_sod": 0.145041823387146, - "loss_total": 0.628856360912323, - "step": 280699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.430458068847656, - "loss_rtd": 0.22566501796245575, - "loss_sent": 0.09811260551214218, - "loss_sod": 0.008618427440524101, - "loss_total": 0.3323960602283478, - "step": 280699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.1568013429641724, - "learning_rate": 1.3660584480588795e-05, - "loss": 0.4272, - "step": 280700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.936997413635254, - "loss_rtd": 0.21900537610054016, - "loss_sent": 0.11840333044528961, - "loss_sod": 0.04137301817536354, - "loss_total": 0.3787817358970642, - "step": 280799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.852680206298828, - "loss_rtd": 0.19177745282649994, - "loss_sent": 0.10055841505527496, - "loss_sod": 0.09903798252344131, - "loss_total": 0.391373872756958, - "step": 280799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.954412579536438, - "learning_rate": 1.3638795446111913e-05, - "loss": 0.4445, - "step": 280800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.223379135131836, - "loss_rtd": 0.18811871111392975, - "loss_sent": 2.899908577091992e-05, - "loss_sod": 0.05685259774327278, - "loss_total": 0.24500030279159546, - "step": 280899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.524837970733643, - "loss_rtd": 0.19736768305301666, - "loss_sent": 0.14851026237010956, - "loss_sod": 0.08419989049434662, - "loss_total": 0.43007785081863403, - "step": 280899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.0047430992126465, - "learning_rate": 1.3617021057923856e-05, - "loss": 0.4326, - "step": 280900 - }, - { - "epoch": 0.017998, - "loss_gen": 6.013608455657959, - "loss_rtd": 0.1934678703546524, - "loss_sent": 0.205018550157547, - "loss_sod": 0.03125808387994766, - "loss_total": 0.42974451184272766, - "step": 280999 - }, - { - "epoch": 0.017998, - "loss_gen": 6.036141395568848, - "loss_rtd": 0.22736336290836334, - "loss_sent": 0.20304203033447266, - "loss_sod": 0.0939517617225647, - "loss_total": 0.5243571400642395, - "step": 280999 - }, - { - "epoch": 0.018, - "grad_norm": 1.1123294830322266, - "learning_rate": 1.3595261324795366e-05, - "loss": 0.4394, - "step": 281000 - }, - { - "epoch": 0.018, - "eval_loss": 0.4104762673377991, - "eval_runtime": 151.2003, - "eval_samples_per_second": 102.136, - "eval_steps_per_second": 0.8, - "step": 281000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.797642230987549, - "loss_rtd": 0.21336081624031067, - "loss_sent": 0.1785113662481308, - "loss_sod": 0.023090077564120293, - "loss_total": 0.4149622619152069, - "step": 281099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.4549336433410645, - "loss_rtd": 0.20158612728118896, - "loss_sent": 0.027963347733020782, - "loss_sod": 0.11027807742357254, - "loss_total": 0.3398275375366211, - "step": 281099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.2796478271484375, - "learning_rate": 1.3573516255491265e-05, - "loss": 0.4314, - "step": 281100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.885648250579834, - "loss_rtd": 0.2175597995519638, - "loss_sent": 0.4733242988586426, - "loss_sod": 0.014459663070738316, - "loss_total": 0.7053437829017639, - "step": 281199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.595763206481934, - "loss_rtd": 0.2076541930437088, - "loss_sent": 0.0002502041752450168, - "loss_sod": 0.15390491485595703, - "loss_total": 0.36180928349494934, - "step": 281199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.9548704624176025, - "learning_rate": 1.3551785858770478e-05, - "loss": 0.4098, - "step": 281200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.988670349121094, - "loss_rtd": 0.21267130970954895, - "loss_sent": 0.3423349857330322, - "loss_sod": 0.018251899629831314, - "loss_total": 0.5732581615447998, - "step": 281299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.636219501495361, - "loss_rtd": 0.2078903615474701, - "loss_sent": 0.13064752519130707, - "loss_sod": 0.0047988081350922585, - "loss_total": 0.34333670139312744, - "step": 281299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.9335140585899353, - "learning_rate": 1.3530070143385965e-05, - "loss": 0.43, - "step": 281300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.231248378753662, - "loss_rtd": 0.17432983219623566, - "loss_sent": 0.05204940587282181, - "loss_sod": 0.02497088722884655, - "loss_total": 0.25135013461112976, - "step": 281399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.542165279388428, - "loss_rtd": 0.2278299331665039, - "loss_sent": 0.13975077867507935, - "loss_sod": 0.02129683829843998, - "loss_total": 0.3888775706291199, - "step": 281399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.9037200808525085, - "learning_rate": 1.350836911808484e-05, - "loss": 0.4472, - "step": 281400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.556024551391602, - "loss_rtd": 0.20617425441741943, - "loss_sent": 0.04082641005516052, - "loss_sod": 0.06385300308465958, - "loss_total": 0.31085366010665894, - "step": 281499 - }, - { - "epoch": 0.018998, - "loss_gen": 6.012040615081787, - "loss_rtd": 0.21958351135253906, - "loss_sent": 0.09644078463315964, - "loss_sod": 0.11470630764961243, - "loss_total": 0.4307306110858917, - "step": 281499 - }, - { - "epoch": 0.019, - "grad_norm": 1.2389469146728516, - "learning_rate": 1.3486682791608285e-05, - "loss": 0.4261, - "step": 281500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.968993663787842, - "loss_rtd": 0.2420225739479065, - "loss_sent": 0.18818706274032593, - "loss_sod": 0.15060660243034363, - "loss_total": 0.5808162689208984, - "step": 281599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.804429531097412, - "loss_rtd": 0.2363513559103012, - "loss_sent": 0.6853828430175781, - "loss_sod": 0.01946105621755123, - "loss_total": 0.9411952495574951, - "step": 281599 - }, - { - "epoch": 0.0192, - "grad_norm": 2.3807199001312256, - "learning_rate": 1.3465011172691521e-05, - "loss": 0.4191, - "step": 281600 - }, - { - "epoch": 0.019398, - "loss_gen": 6.050154209136963, - "loss_rtd": 0.21635742485523224, - "loss_sent": 0.05861207842826843, - "loss_sod": 0.02883468195796013, - "loss_total": 0.3038041889667511, - "step": 281699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.7222676277160645, - "loss_rtd": 0.21847565472126007, - "loss_sent": 0.1909605860710144, - "loss_sod": 0.11452765762805939, - "loss_total": 0.5239639282226562, - "step": 281699 - }, - { - "epoch": 0.0194, - "grad_norm": 2.022014856338501, - "learning_rate": 1.344335427006389e-05, - "loss": 0.4236, - "step": 281700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.700893878936768, - "loss_rtd": 0.20058487355709076, - "loss_sent": 0.23017999529838562, - "loss_sod": 0.017526116222143173, - "loss_total": 0.44829100370407104, - "step": 281799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.10507345199585, - "loss_rtd": 0.16948948800563812, - "loss_sent": 2.6156780222663656e-05, - "loss_sod": 0.03725551813840866, - "loss_total": 0.2067711502313614, - "step": 281799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.7906410694122314, - "learning_rate": 1.3421712092448784e-05, - "loss": 0.42, - "step": 281800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.543769359588623, - "loss_rtd": 0.2028585523366928, - "loss_sent": 0.011168386787176132, - "loss_sod": 0.0761219710111618, - "loss_total": 0.29014891386032104, - "step": 281899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.394286155700684, - "loss_rtd": 0.17520064115524292, - "loss_sent": 6.80653247400187e-05, - "loss_sod": 0.19853290915489197, - "loss_total": 0.3738016188144684, - "step": 281899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.062142014503479, - "learning_rate": 1.3400084648563687e-05, - "loss": 0.423, - "step": 281900 - }, - { - "epoch": 0.019998, - "loss_gen": 6.219426155090332, - "loss_rtd": 0.21608854830265045, - "loss_sent": 0.15127773582935333, - "loss_sod": 0.039266083389520645, - "loss_total": 0.40663236379623413, - "step": 281999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.831309795379639, - "loss_rtd": 0.2534753382205963, - "loss_sent": 0.3156713843345642, - "loss_sod": 0.053194474428892136, - "loss_total": 0.6223411560058594, - "step": 281999 - }, - { - "epoch": 0.02, - "grad_norm": 1.422544002532959, - "learning_rate": 1.3378471947120108e-05, - "loss": 0.4254, - "step": 282000 - }, - { - "epoch": 0.02, - "eval_loss": 0.4120514988899231, - "eval_runtime": 153.1231, - "eval_samples_per_second": 100.853, - "eval_steps_per_second": 0.79, - "step": 282000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.8682756423950195, - "loss_rtd": 0.19730664789676666, - "loss_sent": 0.2298789918422699, - "loss_sod": 0.051396775990724564, - "loss_total": 0.4785824120044708, - "step": 282099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.661902904510498, - "loss_rtd": 0.22755858302116394, - "loss_sent": 0.5371243953704834, - "loss_sod": 0.0011561757419258356, - "loss_total": 0.765839159488678, - "step": 282099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.3973290920257568, - "learning_rate": 1.3356873996823643e-05, - "loss": 0.4355, - "step": 282100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.888813018798828, - "loss_rtd": 0.20488464832305908, - "loss_sent": 0.2436603158712387, - "loss_sod": 0.023993542417883873, - "loss_total": 0.4725385010242462, - "step": 282199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.726215362548828, - "loss_rtd": 0.22751547396183014, - "loss_sent": 0.15657109022140503, - "loss_sod": 0.08309026807546616, - "loss_total": 0.4671768248081207, - "step": 282199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.1480568647384644, - "learning_rate": 1.3335290806373951e-05, - "loss": 0.4332, - "step": 282200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.7480974197387695, - "loss_rtd": 0.22389169037342072, - "loss_sent": 0.22032272815704346, - "loss_sod": 0.024300508201122284, - "loss_total": 0.46851491928100586, - "step": 282299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.5183515548706055, - "loss_rtd": 0.19824880361557007, - "loss_sent": 0.13213348388671875, - "loss_sod": 0.021483005955815315, - "loss_total": 0.351865291595459, - "step": 282299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.1079515218734741, - "learning_rate": 1.3313722384464756e-05, - "loss": 0.436, - "step": 282300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.875498294830322, - "loss_rtd": 0.20670932531356812, - "loss_sent": 0.09952390938997269, - "loss_sod": 0.05448728799819946, - "loss_total": 0.36072051525115967, - "step": 282399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.664567470550537, - "loss_rtd": 0.18414762616157532, - "loss_sent": 0.04319232329726219, - "loss_sod": 0.030659429728984833, - "loss_total": 0.25799939036369324, - "step": 282399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8064749240875244, - "learning_rate": 1.3292168739783777e-05, - "loss": 0.4235, - "step": 282400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.922492980957031, - "loss_rtd": 0.22151030600070953, - "loss_sent": 0.24791277945041656, - "loss_sod": 0.06692000478506088, - "loss_total": 0.5363430976867676, - "step": 282499 - }, - { - "epoch": 0.020998, - "loss_gen": 6.015212059020996, - "loss_rtd": 0.21163247525691986, - "loss_sent": 0.32908403873443604, - "loss_sod": 0.0717523992061615, - "loss_total": 0.6124688982963562, - "step": 282499 - }, - { - "epoch": 0.021, - "grad_norm": 1.5593205690383911, - "learning_rate": 1.3270629881012842e-05, - "loss": 0.4279, - "step": 282500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.767723083496094, - "loss_rtd": 0.22423988580703735, - "loss_sent": 0.12806497514247894, - "loss_sod": 0.05011466145515442, - "loss_total": 0.4024195075035095, - "step": 282599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.565870761871338, - "loss_rtd": 0.19891837239265442, - "loss_sent": 0.07426561415195465, - "loss_sod": 0.018944283947348595, - "loss_total": 0.2921282649040222, - "step": 282599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.7791507840156555, - "learning_rate": 1.32491058168278e-05, - "loss": 0.4299, - "step": 282600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.895869731903076, - "loss_rtd": 0.21902136504650116, - "loss_sent": 0.31859588623046875, - "loss_sod": 0.022601434960961342, - "loss_total": 0.5602186918258667, - "step": 282699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.891465187072754, - "loss_rtd": 0.22689184546470642, - "loss_sent": 0.12094026058912277, - "loss_sod": 0.09026947617530823, - "loss_total": 0.438101589679718, - "step": 282699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.9102500081062317, - "learning_rate": 1.3227596555898553e-05, - "loss": 0.4355, - "step": 282700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.7174973487854, - "loss_rtd": 0.18339064717292786, - "loss_sent": 0.15521188080310822, - "loss_sod": 0.12047228217124939, - "loss_total": 0.45907479524612427, - "step": 282799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.60105037689209, - "loss_rtd": 0.2015862911939621, - "loss_sent": 0.2438632994890213, - "loss_sod": 0.00624456163495779, - "loss_total": 0.451694130897522, - "step": 282799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.2852686643600464, - "learning_rate": 1.3206102106889001e-05, - "loss": 0.4376, - "step": 282800 - }, - { - "epoch": 0.021798, - "loss_gen": 6.158794403076172, - "loss_rtd": 0.2291603535413742, - "loss_sent": 0.16960297524929047, - "loss_sod": 0.0897158682346344, - "loss_total": 0.4884791970252991, - "step": 282899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.54933500289917, - "loss_rtd": 0.19277606904506683, - "loss_sent": 0.00838877446949482, - "loss_sod": 0.15760350227355957, - "loss_total": 0.35876837372779846, - "step": 282899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.240827202796936, - "learning_rate": 1.318462247845712e-05, - "loss": 0.4283, - "step": 282900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.909274578094482, - "loss_rtd": 0.23066742718219757, - "loss_sent": 0.340031236410141, - "loss_sod": 0.09343505650758743, - "loss_total": 0.6641337275505066, - "step": 282999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.683666229248047, - "loss_rtd": 0.23398225009441376, - "loss_sent": 0.11925114691257477, - "loss_sod": 0.015400709584355354, - "loss_total": 0.36863410472869873, - "step": 282999 - }, - { - "epoch": 0.022, - "grad_norm": 1.2445647716522217, - "learning_rate": 1.3163157679254918e-05, - "loss": 0.4139, - "step": 283000 - }, - { - "epoch": 0.022, - "eval_loss": 0.40600138902664185, - "eval_runtime": 151.0534, - "eval_samples_per_second": 102.235, - "eval_steps_per_second": 0.801, - "step": 283000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.523969650268555, - "loss_rtd": 0.21689656376838684, - "loss_sent": 0.07073554396629333, - "loss_sod": 0.06892915815114975, - "loss_total": 0.3565612733364105, - "step": 283099 - }, - { - "epoch": 0.022198, - "loss_gen": 4.999656677246094, - "loss_rtd": 0.1698964238166809, - "loss_sent": 2.757607217063196e-05, - "loss_sod": 0.07586808502674103, - "loss_total": 0.24579209089279175, - "step": 283099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.0766270160675049, - "learning_rate": 1.3141707717928381e-05, - "loss": 0.4368, - "step": 283100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.874646186828613, - "loss_rtd": 0.20832401514053345, - "loss_sent": 0.3349180817604065, - "loss_sod": 0.15273280441761017, - "loss_total": 0.6959748864173889, - "step": 283199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.822833061218262, - "loss_rtd": 0.21687422692775726, - "loss_sent": 0.06077207997441292, - "loss_sod": 0.007514716126024723, - "loss_total": 0.28516101837158203, - "step": 283199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.3074407577514648, - "learning_rate": 1.3120272603117573e-05, - "loss": 0.4209, - "step": 283200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.798384666442871, - "loss_rtd": 0.231093168258667, - "loss_sent": 0.30197083950042725, - "loss_sod": 0.08319886028766632, - "loss_total": 0.6162628531455994, - "step": 283299 - }, - { - "epoch": 0.022598, - "loss_gen": 6.18732213973999, - "loss_rtd": 0.21987217664718628, - "loss_sent": 0.1981782764196396, - "loss_sod": 0.04336914047598839, - "loss_total": 0.46141958236694336, - "step": 283299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.1965978145599365, - "learning_rate": 1.3098852343456542e-05, - "loss": 0.4283, - "step": 283300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.856659412384033, - "loss_rtd": 0.22240784764289856, - "loss_sent": 0.026708999648690224, - "loss_sod": 0.11427552998065948, - "loss_total": 0.3633923828601837, - "step": 283399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.88224458694458, - "loss_rtd": 0.20745408535003662, - "loss_sent": 0.43349555134773254, - "loss_sod": 0.012127671390771866, - "loss_total": 0.6530773043632507, - "step": 283399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.0618562698364258, - "learning_rate": 1.3077446947573397e-05, - "loss": 0.4178, - "step": 283400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.896244049072266, - "loss_rtd": 0.2344936728477478, - "loss_sent": 0.11081881076097488, - "loss_sod": 0.07712383568286896, - "loss_total": 0.42243629693984985, - "step": 283499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.9980645179748535, - "loss_rtd": 0.21711502969264984, - "loss_sent": 0.2896641492843628, - "loss_sod": 0.011189509183168411, - "loss_total": 0.5179686546325684, - "step": 283499 - }, - { - "epoch": 0.023, - "grad_norm": 0.949335515499115, - "learning_rate": 1.3056056424090186e-05, - "loss": 0.4196, - "step": 283500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.704510688781738, - "loss_rtd": 0.2115384191274643, - "loss_sent": 0.0277202520519495, - "loss_sod": 0.009425907395780087, - "loss_total": 0.2486845850944519, - "step": 283599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.615524768829346, - "loss_rtd": 0.20658348500728607, - "loss_sent": 0.059819743037223816, - "loss_sod": 0.021676931530237198, - "loss_total": 0.2880801558494568, - "step": 283599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.48402082920074463, - "learning_rate": 1.3034680781623026e-05, - "loss": 0.4319, - "step": 283600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.913975715637207, - "loss_rtd": 0.2053077667951584, - "loss_sent": 0.48148226737976074, - "loss_sod": 0.09669242799282074, - "loss_total": 0.7834824323654175, - "step": 283699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.48197603225708, - "loss_rtd": 0.21156112849712372, - "loss_sent": 0.41472411155700684, - "loss_sod": 0.013013198040425777, - "loss_total": 0.6392984390258789, - "step": 283699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.6488102674484253, - "learning_rate": 1.3013320028782033e-05, - "loss": 0.4235, - "step": 283700 - }, - { - "epoch": 0.023598, - "loss_gen": 6.132323265075684, - "loss_rtd": 0.23241229355335236, - "loss_sent": 0.3086828887462616, - "loss_sod": 0.03744089603424072, - "loss_total": 0.5785360336303711, - "step": 283799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.932138919830322, - "loss_rtd": 0.2254108488559723, - "loss_sent": 0.12026329338550568, - "loss_sod": 0.026655053719878197, - "loss_total": 0.3723291754722595, - "step": 283799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.9617313146591187, - "learning_rate": 1.2991974174171323e-05, - "loss": 0.4376, - "step": 283800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.448973655700684, - "loss_rtd": 0.1710488349199295, - "loss_sent": 0.021012771874666214, - "loss_sod": 0.0922928899526596, - "loss_total": 0.2843545079231262, - "step": 283899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.906702995300293, - "loss_rtd": 0.2369176596403122, - "loss_sent": 0.11955936253070831, - "loss_sod": 0.05116061121225357, - "loss_total": 0.40763765573501587, - "step": 283899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.7681860327720642, - "learning_rate": 1.2970643226388973e-05, - "loss": 0.4387, - "step": 283900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.948052883148193, - "loss_rtd": 0.2227478325366974, - "loss_sent": 0.17543916404247284, - "loss_sod": 0.05872737243771553, - "loss_total": 0.45691436529159546, - "step": 283999 - }, - { - "epoch": 0.023998, - "loss_gen": 6.008965969085693, - "loss_rtd": 0.2151934653520584, - "loss_sent": 0.13839246332645416, - "loss_sod": 0.03402182459831238, - "loss_total": 0.38760775327682495, - "step": 283999 - }, - { - "epoch": 0.024, - "grad_norm": 0.9981189370155334, - "learning_rate": 1.2949327194027105e-05, - "loss": 0.4168, - "step": 284000 - }, - { - "epoch": 0.024, - "eval_loss": 0.41382986307144165, - "eval_runtime": 151.2755, - "eval_samples_per_second": 102.085, - "eval_steps_per_second": 0.8, - "step": 284000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.6349101066589355, - "loss_rtd": 0.2067456692457199, - "loss_sent": 0.3376573920249939, - "loss_sod": 0.0167418010532856, - "loss_total": 0.5611448287963867, - "step": 284099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.6613640785217285, - "loss_rtd": 0.22606918215751648, - "loss_sent": 0.10882196575403214, - "loss_sod": 0.004473487846553326, - "loss_total": 0.3393646478652954, - "step": 284099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.751107394695282, - "learning_rate": 1.2928026085671813e-05, - "loss": 0.4178, - "step": 284100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.774574279785156, - "loss_rtd": 0.20110966265201569, - "loss_sent": 0.18786457180976868, - "loss_sod": 0.09336121380329132, - "loss_total": 0.4823354482650757, - "step": 284199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.376277923583984, - "loss_rtd": 0.1898571401834488, - "loss_sent": 2.540801688155625e-05, - "loss_sod": 0.12208402156829834, - "loss_total": 0.31196656823158264, - "step": 284199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.0591825246810913, - "learning_rate": 1.2906739909903193e-05, - "loss": 0.4433, - "step": 284200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.828518390655518, - "loss_rtd": 0.23971131443977356, - "loss_sent": 0.12309505045413971, - "loss_sod": 0.1512659788131714, - "loss_total": 0.5140723586082458, - "step": 284299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.744897365570068, - "loss_rtd": 0.22822363674640656, - "loss_sent": 0.08411303162574768, - "loss_sod": 0.033215202391147614, - "loss_total": 0.34555187821388245, - "step": 284299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.157599687576294, - "learning_rate": 1.2885468675295287e-05, - "loss": 0.4317, - "step": 284300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.711983680725098, - "loss_rtd": 0.21978799998760223, - "loss_sent": 0.45889976620674133, - "loss_sod": 0.12963168323040009, - "loss_total": 0.8083194494247437, - "step": 284399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.7123517990112305, - "loss_rtd": 0.2189454883337021, - "loss_sent": 0.1288938671350479, - "loss_sod": 0.0427108071744442, - "loss_total": 0.3905501365661621, - "step": 284399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.4148083925247192, - "learning_rate": 1.2864212390416158e-05, - "loss": 0.4569, - "step": 284400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.427609920501709, - "loss_rtd": 0.18476331233978271, - "loss_sent": 0.24138493835926056, - "loss_sod": 0.016914937645196915, - "loss_total": 0.4430631995201111, - "step": 284499 - }, - { - "epoch": 0.024998, - "loss_gen": 6.197299003601074, - "loss_rtd": 0.21423634886741638, - "loss_sent": 0.07877682894468307, - "loss_sod": 0.05729764699935913, - "loss_total": 0.3503108322620392, - "step": 284499 - }, - { - "epoch": 0.025, - "grad_norm": 0.9697398543357849, - "learning_rate": 1.2842971063827857e-05, - "loss": 0.4331, - "step": 284500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.556057453155518, - "loss_rtd": 0.19708921015262604, - "loss_sent": 2.9624856324517168e-05, - "loss_sod": 0.08423587679862976, - "loss_total": 0.28135472536087036, - "step": 284599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.281551837921143, - "loss_rtd": 0.1667519062757492, - "loss_sent": 3.996157829533331e-05, - "loss_sod": 0.07685260474681854, - "loss_total": 0.24364447593688965, - "step": 284599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.7385295033454895, - "learning_rate": 1.2821744704086352e-05, - "loss": 0.4358, - "step": 284600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.991865634918213, - "loss_rtd": 0.21228674054145813, - "loss_sent": 0.26478254795074463, - "loss_sod": 0.030033813789486885, - "loss_total": 0.5071030855178833, - "step": 284699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.828866004943848, - "loss_rtd": 0.2195064127445221, - "loss_sent": 0.19662582874298096, - "loss_sod": 0.03633663430809975, - "loss_total": 0.4524688720703125, - "step": 284699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.1441248655319214, - "learning_rate": 1.2800533319741631e-05, - "loss": 0.419, - "step": 284700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.646084785461426, - "loss_rtd": 0.24002711474895477, - "loss_sent": 0.3304119408130646, - "loss_sod": 0.015011422336101532, - "loss_total": 0.5854504704475403, - "step": 284799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.964494705200195, - "loss_rtd": 0.20331549644470215, - "loss_sent": 0.3138743042945862, - "loss_sod": 0.1335727572441101, - "loss_total": 0.6507625579833984, - "step": 284799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.8249038457870483, - "learning_rate": 1.2779336919337643e-05, - "loss": 0.4332, - "step": 284800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.492527961730957, - "loss_rtd": 0.20996277034282684, - "loss_sent": 0.12192250788211823, - "loss_sod": 0.010068539530038834, - "loss_total": 0.3419538140296936, - "step": 284899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.226845741271973, - "loss_rtd": 0.1744282841682434, - "loss_sent": 0.0313674733042717, - "loss_sod": 0.033174075186252594, - "loss_total": 0.2389698326587677, - "step": 284899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.7089688777923584, - "learning_rate": 1.2758155511412306e-05, - "loss": 0.4292, - "step": 284900 - }, - { - "epoch": 0.025998, - "loss_gen": 6.070541858673096, - "loss_rtd": 0.2007075399160385, - "loss_sent": 0.09279533475637436, - "loss_sod": 0.11625343561172485, - "loss_total": 0.4097563326358795, - "step": 284999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.112553119659424, - "loss_rtd": 0.18466633558273315, - "loss_sent": 2.7075042453361675e-05, - "loss_sod": 0.08028829097747803, - "loss_total": 0.2649816870689392, - "step": 284999 - }, - { - "epoch": 0.026, - "grad_norm": 0.9484739303588867, - "learning_rate": 1.273698910449746e-05, - "loss": 0.4421, - "step": 285000 - }, - { - "epoch": 0.026, - "eval_loss": 0.41398826241493225, - "eval_runtime": 151.3999, - "eval_samples_per_second": 102.001, - "eval_steps_per_second": 0.799, - "step": 285000 - }, - { - "epoch": 0.026198, - "loss_gen": 6.07914924621582, - "loss_rtd": 0.21907606720924377, - "loss_sent": 0.08856616169214249, - "loss_sod": 0.037902090698480606, - "loss_total": 0.34554430842399597, - "step": 285099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.945745468139648, - "loss_rtd": 0.24509187042713165, - "loss_sent": 0.12188887596130371, - "loss_sod": 0.02607342228293419, - "loss_total": 0.39305415749549866, - "step": 285099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.6241201162338257, - "learning_rate": 1.271583770711895e-05, - "loss": 0.4237, - "step": 285100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.871770858764648, - "loss_rtd": 0.1924058347940445, - "loss_sent": 0.07700707763433456, - "loss_sod": 0.03474319353699684, - "loss_total": 0.304156094789505, - "step": 285199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.897307395935059, - "loss_rtd": 0.20265106856822968, - "loss_sent": 0.07986465096473694, - "loss_sod": 0.05640251934528351, - "loss_total": 0.3389182388782501, - "step": 285199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.8830915093421936, - "learning_rate": 1.2694701327796548e-05, - "loss": 0.4342, - "step": 285200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.877799034118652, - "loss_rtd": 0.2351219356060028, - "loss_sent": 0.3728555142879486, - "loss_sod": 0.017333796247839928, - "loss_total": 0.6253112554550171, - "step": 285299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.8381547927856445, - "loss_rtd": 0.21782098710536957, - "loss_sent": 0.2870821952819824, - "loss_sod": 0.07433715462684631, - "loss_total": 0.5792403221130371, - "step": 285299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.999060034751892, - "learning_rate": 1.267357997504401e-05, - "loss": 0.4218, - "step": 285300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.939278602600098, - "loss_rtd": 0.22044366598129272, - "loss_sent": 0.3877010941505432, - "loss_sod": 0.028766902163624763, - "loss_total": 0.6369116306304932, - "step": 285399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.718808174133301, - "loss_rtd": 0.20779336988925934, - "loss_sent": 0.22688840329647064, - "loss_sod": 0.0855991393327713, - "loss_total": 0.5202808976173401, - "step": 285399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.714625597000122, - "learning_rate": 1.2652473657368974e-05, - "loss": 0.4393, - "step": 285400 - }, - { - "epoch": 0.026998, - "loss_gen": 6.065155506134033, - "loss_rtd": 0.1971694380044937, - "loss_sent": 0.5773887038230896, - "loss_sod": 0.02404419332742691, - "loss_total": 0.7986023426055908, - "step": 285499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.852124214172363, - "loss_rtd": 0.21547146141529083, - "loss_sent": 0.13859428465366364, - "loss_sod": 0.05556311458349228, - "loss_total": 0.40962886810302734, - "step": 285499 - }, - { - "epoch": 0.027, - "grad_norm": 1.6814595460891724, - "learning_rate": 1.2631382383273088e-05, - "loss": 0.4486, - "step": 285500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.898143291473389, - "loss_rtd": 0.21077917516231537, - "loss_sent": 0.3432920277118683, - "loss_sod": 0.042259760200977325, - "loss_total": 0.596331000328064, - "step": 285599 - }, - { - "epoch": 0.027198, - "loss_gen": 6.2751288414001465, - "loss_rtd": 0.21900509297847748, - "loss_sent": 0.08064062893390656, - "loss_sod": 0.055173758417367935, - "loss_total": 0.35481947660446167, - "step": 285599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.4933332204818726, - "learning_rate": 1.2610306161251905e-05, - "loss": 0.4231, - "step": 285600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.716874599456787, - "loss_rtd": 0.2126436084508896, - "loss_sent": 0.1443919539451599, - "loss_sod": 0.07392412424087524, - "loss_total": 0.43095970153808594, - "step": 285699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.25924015045166, - "loss_rtd": 0.2138407975435257, - "loss_sent": 0.011563577689230442, - "loss_sod": 0.1020401269197464, - "loss_total": 0.32744449377059937, - "step": 285699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.8555403351783752, - "learning_rate": 1.2589244999794946e-05, - "loss": 0.4172, - "step": 285700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.041465759277344, - "loss_rtd": 0.2162010371685028, - "loss_sent": 0.23427049815654755, - "loss_sod": 0.08856480568647385, - "loss_total": 0.5390363335609436, - "step": 285799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.668476581573486, - "loss_rtd": 0.20259647071361542, - "loss_sent": 0.341512531042099, - "loss_sod": 0.00797621626406908, - "loss_total": 0.5520852208137512, - "step": 285799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.9753674268722534, - "learning_rate": 1.2568198907385609e-05, - "loss": 0.4292, - "step": 285800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.720056533813477, - "loss_rtd": 0.2221757173538208, - "loss_sent": 0.281146377325058, - "loss_sod": 0.03888082504272461, - "loss_total": 0.5422029495239258, - "step": 285899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.756960868835449, - "loss_rtd": 0.2197091430425644, - "loss_sent": 0.3437296152114868, - "loss_sod": 0.013600092381238937, - "loss_total": 0.5770388841629028, - "step": 285899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.7216100692749023, - "learning_rate": 1.2547167892501277e-05, - "loss": 0.423, - "step": 285900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.471036434173584, - "loss_rtd": 0.1858777403831482, - "loss_sent": 0.10051614046096802, - "loss_sod": 0.07392618805170059, - "loss_total": 0.3603200614452362, - "step": 285999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.082937717437744, - "loss_rtd": 0.16976216435432434, - "loss_sent": 0.026819400489330292, - "loss_sod": 0.054379597306251526, - "loss_total": 0.25096115469932556, - "step": 285999 - }, - { - "epoch": 0.028, - "grad_norm": 0.9310762286186218, - "learning_rate": 1.2526151963613242e-05, - "loss": 0.4371, - "step": 286000 - }, - { - "epoch": 0.028, - "eval_loss": 0.40129756927490234, - "eval_runtime": 151.672, - "eval_samples_per_second": 101.818, - "eval_steps_per_second": 0.798, - "step": 286000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.8664326667785645, - "loss_rtd": 0.20758090913295746, - "loss_sent": 0.2003534883260727, - "loss_sod": 0.07129999995231628, - "loss_total": 0.47923439741134644, - "step": 286099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.703149318695068, - "loss_rtd": 0.2316005825996399, - "loss_sent": 0.1504419445991516, - "loss_sod": 0.05148168280720711, - "loss_total": 0.4335242211818695, - "step": 286099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.8415143489837646, - "learning_rate": 1.2505151129186727e-05, - "loss": 0.4379, - "step": 286100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.7814459800720215, - "loss_rtd": 0.18596313893795013, - "loss_sent": 0.218510240316391, - "loss_sod": 0.048116009682416916, - "loss_total": 0.45258939266204834, - "step": 286199 - }, - { - "epoch": 0.028398, - "loss_gen": 6.188547134399414, - "loss_rtd": 0.20028914511203766, - "loss_sent": 0.2166014164686203, - "loss_sod": 0.03200073167681694, - "loss_total": 0.448891282081604, - "step": 286199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.1353873014450073, - "learning_rate": 1.2484165397680841e-05, - "loss": 0.4263, - "step": 286200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.512418270111084, - "loss_rtd": 0.22223679721355438, - "loss_sent": 0.15049083530902863, - "loss_sod": 0.009644124656915665, - "loss_total": 0.3823717534542084, - "step": 286299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.767345905303955, - "loss_rtd": 0.19095493853092194, - "loss_sent": 0.03672843053936958, - "loss_sod": 0.12461046129465103, - "loss_total": 0.35229384899139404, - "step": 286299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.9279076457023621, - "learning_rate": 1.2463194777548642e-05, - "loss": 0.44, - "step": 286300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.638607025146484, - "loss_rtd": 0.21502289175987244, - "loss_sent": 0.09224697202444077, - "loss_sod": 0.036453887820243835, - "loss_total": 0.34372374415397644, - "step": 286399 - }, - { - "epoch": 0.028798, - "loss_gen": 6.230381965637207, - "loss_rtd": 0.21746058762073517, - "loss_sent": 0.10541404783725739, - "loss_sod": 0.031930990517139435, - "loss_total": 0.3548056483268738, - "step": 286399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.8662870526313782, - "learning_rate": 1.2442239277237117e-05, - "loss": 0.4413, - "step": 286400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.631784439086914, - "loss_rtd": 0.22725287079811096, - "loss_sent": 0.07502210140228271, - "loss_sod": 0.0033889992628246546, - "loss_total": 0.30566397309303284, - "step": 286499 - }, - { - "epoch": 0.028998, - "loss_gen": 6.191287517547607, - "loss_rtd": 0.22293871641159058, - "loss_sent": 0.1790841966867447, - "loss_sod": 0.06443575024604797, - "loss_total": 0.46645867824554443, - "step": 286499 - }, - { - "epoch": 0.029, - "grad_norm": 0.5956138968467712, - "learning_rate": 1.24212989051871e-05, - "loss": 0.402, - "step": 286500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.489704132080078, - "loss_rtd": 0.18943743407726288, - "loss_sent": 0.0016923850635066628, - "loss_sod": 0.13636058568954468, - "loss_total": 0.3274904191493988, - "step": 286599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.920446872711182, - "loss_rtd": 0.19623598456382751, - "loss_sent": 0.15697337687015533, - "loss_sod": 0.01677049696445465, - "loss_total": 0.3699798583984375, - "step": 286599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.7704411745071411, - "learning_rate": 1.240037366983341e-05, - "loss": 0.4328, - "step": 286600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.745086193084717, - "loss_rtd": 0.22206702828407288, - "loss_sent": 0.08591610193252563, - "loss_sod": 0.012878085486590862, - "loss_total": 0.32086122035980225, - "step": 286699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.579181671142578, - "loss_rtd": 0.20150712132453918, - "loss_sent": 0.20237770676612854, - "loss_sod": 0.04131292179226875, - "loss_total": 0.4451977610588074, - "step": 286699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.2384002208709717, - "learning_rate": 1.2379463579604689e-05, - "loss": 0.4294, - "step": 286700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.212658405303955, - "loss_rtd": 0.1719597429037094, - "loss_sent": 0.017256371676921844, - "loss_sod": 0.26873350143432617, - "loss_total": 0.4579496383666992, - "step": 286799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.894923686981201, - "loss_rtd": 0.22619961202144623, - "loss_sent": 0.18095217645168304, - "loss_sod": 0.09205351024866104, - "loss_total": 0.4992052912712097, - "step": 286799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.484777569770813, - "learning_rate": 1.2358568642923546e-05, - "loss": 0.4396, - "step": 286800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.847315788269043, - "loss_rtd": 0.2166406661272049, - "loss_sent": 0.16564525663852692, - "loss_sod": 0.011585107073187828, - "loss_total": 0.393871009349823, - "step": 286899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.832507133483887, - "loss_rtd": 0.21148845553398132, - "loss_sent": 0.21389459073543549, - "loss_sod": 0.05752115696668625, - "loss_total": 0.48290419578552246, - "step": 286899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.6614314317703247, - "learning_rate": 1.233768886820646e-05, - "loss": 0.4519, - "step": 286900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.466715335845947, - "loss_rtd": 0.18587522208690643, - "loss_sent": 0.2249709814786911, - "loss_sod": 0.05599728226661682, - "loss_total": 0.46684348583221436, - "step": 286999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.908249378204346, - "loss_rtd": 0.20992301404476166, - "loss_sent": 0.20955248177051544, - "loss_sod": 0.03031115047633648, - "loss_total": 0.4497866630554199, - "step": 286999 - }, - { - "epoch": 0.03, - "grad_norm": 1.157019853591919, - "learning_rate": 1.2316824263863785e-05, - "loss": 0.4248, - "step": 287000 - }, - { - "epoch": 0.03, - "eval_loss": 0.40862521529197693, - "eval_runtime": 151.418, - "eval_samples_per_second": 101.989, - "eval_steps_per_second": 0.799, - "step": 287000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.93383264541626, - "loss_rtd": 0.218502476811409, - "loss_sent": 0.08899467438459396, - "loss_sod": 0.09092241525650024, - "loss_total": 0.3984195590019226, - "step": 287099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.598023414611816, - "loss_rtd": 0.22513043880462646, - "loss_sent": 0.07191810756921768, - "loss_sod": 0.0750078409910202, - "loss_total": 0.37205639481544495, - "step": 287099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.1030205488204956, - "learning_rate": 1.2295974838299785e-05, - "loss": 0.4316, - "step": 287100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.7092180252075195, - "loss_rtd": 0.22398166358470917, - "loss_sent": 0.34127724170684814, - "loss_sod": 0.01627691090106964, - "loss_total": 0.581535816192627, - "step": 287199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.87132453918457, - "loss_rtd": 0.20336180925369263, - "loss_sent": 0.1266353875398636, - "loss_sod": 0.03997209668159485, - "loss_total": 0.36996930837631226, - "step": 287199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.5085489749908447, - "learning_rate": 1.2275140599912616e-05, - "loss": 0.4211, - "step": 287200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.954097270965576, - "loss_rtd": 0.19193975627422333, - "loss_sent": 0.03704122081398964, - "loss_sod": 0.06875681132078171, - "loss_total": 0.2977377772331238, - "step": 287299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.827497482299805, - "loss_rtd": 0.21817778050899506, - "loss_sent": 0.21135996282100677, - "loss_sod": 0.017506320029497147, - "loss_total": 0.4470440745353699, - "step": 287299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.918419361114502, - "learning_rate": 1.2254321557094311e-05, - "loss": 0.4358, - "step": 287300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.492374897003174, - "loss_rtd": 0.20044521987438202, - "loss_sent": 0.07455956190824509, - "loss_sod": 0.017204325646162033, - "loss_total": 0.29220911860466003, - "step": 287399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.504309177398682, - "loss_rtd": 0.18586887419223785, - "loss_sent": 0.013198519125580788, - "loss_sod": 0.055731356143951416, - "loss_total": 0.2547987401485443, - "step": 287399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.9885357022285461, - "learning_rate": 1.2233517718230758e-05, - "loss": 0.4177, - "step": 287400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.919321060180664, - "loss_rtd": 0.21748416125774384, - "loss_sent": 0.10268405824899673, - "loss_sod": 0.018541250377893448, - "loss_total": 0.3387094736099243, - "step": 287499 - }, - { - "epoch": 0.030998, - "loss_gen": 6.114922523498535, - "loss_rtd": 0.22717665135860443, - "loss_sent": 0.17845787107944489, - "loss_sod": 0.011602483689785004, - "loss_total": 0.41723698377609253, - "step": 287499 - }, - { - "epoch": 0.031, - "grad_norm": 0.6173911690711975, - "learning_rate": 1.2212729091701752e-05, - "loss": 0.3988, - "step": 287500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.648708343505859, - "loss_rtd": 0.17013563215732574, - "loss_sent": 0.12543965876102448, - "loss_sod": 0.09033001959323883, - "loss_total": 0.38590532541275024, - "step": 287599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.628352165222168, - "loss_rtd": 0.21228154003620148, - "loss_sent": 0.10157328099012375, - "loss_sod": 0.06106088310480118, - "loss_total": 0.3749157190322876, - "step": 287599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.354960322380066, - "learning_rate": 1.2191955685880963e-05, - "loss": 0.4403, - "step": 287600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.752691268920898, - "loss_rtd": 0.20500053465366364, - "loss_sent": 0.2797483801841736, - "loss_sod": 0.0019102394580841064, - "loss_total": 0.4866591691970825, - "step": 287699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.715508460998535, - "loss_rtd": 0.22658459842205048, - "loss_sent": 0.1587134152650833, - "loss_sod": 0.017449066042900085, - "loss_total": 0.40274709463119507, - "step": 287699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.9837059378623962, - "learning_rate": 1.217119750913589e-05, - "loss": 0.4479, - "step": 287700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.924016952514648, - "loss_rtd": 0.21890516579151154, - "loss_sent": 0.1304733008146286, - "loss_sod": 0.03267096355557442, - "loss_total": 0.38204944133758545, - "step": 287799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.6897969245910645, - "loss_rtd": 0.21855436265468597, - "loss_sent": 0.1375998556613922, - "loss_sod": 0.02110433019697666, - "loss_total": 0.3772585391998291, - "step": 287799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.2844737768173218, - "learning_rate": 1.2150454569827935e-05, - "loss": 0.4113, - "step": 287800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.7530012130737305, - "loss_rtd": 0.22878804802894592, - "loss_sent": 0.4068334698677063, - "loss_sod": 0.023075032979249954, - "loss_total": 0.6586965322494507, - "step": 287899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.890971660614014, - "loss_rtd": 0.22797581553459167, - "loss_sent": 0.24472099542617798, - "loss_sod": 0.05026178061962128, - "loss_total": 0.5229585766792297, - "step": 287899 - }, - { - "epoch": 0.0318, - "grad_norm": 2.550966739654541, - "learning_rate": 1.2129726876312347e-05, - "loss": 0.406, - "step": 287900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.977214336395264, - "loss_rtd": 0.223429337143898, - "loss_sent": 0.10118972510099411, - "loss_sod": 0.022638369351625443, - "loss_total": 0.34725743532180786, - "step": 287999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.822166919708252, - "loss_rtd": 0.22542782127857208, - "loss_sent": 0.20130446553230286, - "loss_sod": 0.03689045459032059, - "loss_total": 0.4636227488517761, - "step": 287999 - }, - { - "epoch": 0.032, - "grad_norm": 1.104121446609497, - "learning_rate": 1.2109014436938265e-05, - "loss": 0.4343, - "step": 288000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4042772054672241, - "eval_runtime": 151.4653, - "eval_samples_per_second": 101.957, - "eval_steps_per_second": 0.799, - "step": 288000 - }, - { - "epoch": 0.000198, - "loss_gen": 6.583367347717285, - "loss_rtd": 0.21817566454410553, - "loss_sent": 0.13085076212882996, - "loss_sod": 0.12323612719774246, - "loss_total": 0.47226256132125854, - "step": 288099 - }, - { - "epoch": 0.000198, - "loss_gen": 6.112459182739258, - "loss_rtd": 0.20037896931171417, - "loss_sent": 0.36410635709762573, - "loss_sod": 0.06308729201555252, - "loss_total": 0.6275726556777954, - "step": 288099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.806197166442871, - "learning_rate": 1.208831726004862e-05, - "loss": 0.4501, - "step": 288100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.959825038909912, - "loss_rtd": 0.2052944004535675, - "loss_sent": 0.2453039139509201, - "loss_sod": 0.05934235453605652, - "loss_total": 0.5099406838417053, - "step": 288199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.293071746826172, - "loss_rtd": 0.1985822468996048, - "loss_sent": 0.04508104547858238, - "loss_sod": 0.06112917512655258, - "loss_total": 0.30479246377944946, - "step": 288199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.0070668458938599, - "learning_rate": 1.206763535398025e-05, - "loss": 0.4345, - "step": 288200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.489973545074463, - "loss_rtd": 0.22991295158863068, - "loss_sent": 0.15148195624351501, - "loss_sod": 0.005384674295783043, - "loss_total": 0.3867795765399933, - "step": 288299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.503274917602539, - "loss_rtd": 0.20294919610023499, - "loss_sent": 0.010002459399402142, - "loss_sod": 0.08979085087776184, - "loss_total": 0.30274251103401184, - "step": 288299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.8200414180755615, - "learning_rate": 1.2046968727063823e-05, - "loss": 0.4195, - "step": 288300 - }, - { - "epoch": 0.000798, - "loss_gen": 6.101378917694092, - "loss_rtd": 0.2206754982471466, - "loss_sent": 0.16157306730747223, - "loss_sod": 0.1463429480791092, - "loss_total": 0.528591513633728, - "step": 288399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.862943172454834, - "loss_rtd": 0.21129311621189117, - "loss_sent": 0.2719140648841858, - "loss_sod": 0.055987320840358734, - "loss_total": 0.5391944646835327, - "step": 288399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.0483568906784058, - "learning_rate": 1.202631738762387e-05, - "loss": 0.4424, - "step": 288400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.596846580505371, - "loss_rtd": 0.19718573987483978, - "loss_sent": 0.12381140142679214, - "loss_sod": 0.07885465025901794, - "loss_total": 0.3998517692089081, - "step": 288499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.635765075683594, - "loss_rtd": 0.1893777996301651, - "loss_sent": 0.21595261991024017, - "loss_sod": 0.014623412862420082, - "loss_total": 0.4199538230895996, - "step": 288499 - }, - { - "epoch": 0.001, - "grad_norm": 1.088371753692627, - "learning_rate": 1.2005681343978713e-05, - "loss": 0.4237, - "step": 288500 - }, - { - "epoch": 0.001198, - "loss_gen": 6.086782455444336, - "loss_rtd": 0.22220873832702637, - "loss_sent": 0.19847138226032257, - "loss_sod": 0.13065265119075775, - "loss_total": 0.5513327717781067, - "step": 288599 - }, - { - "epoch": 0.001198, - "loss_gen": 6.182405471801758, - "loss_rtd": 0.22912736237049103, - "loss_sent": 0.26886099576950073, - "loss_sod": 0.03880622982978821, - "loss_total": 0.5367946028709412, - "step": 288599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.9216969013214111, - "learning_rate": 1.1985060604440574e-05, - "loss": 0.4193, - "step": 288600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.336289405822754, - "loss_rtd": 0.19004136323928833, - "loss_sent": 0.0002211226528743282, - "loss_sod": 0.06926990300416946, - "loss_total": 0.2595323920249939, - "step": 288699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.84647274017334, - "loss_rtd": 0.21220256388187408, - "loss_sent": 0.07044178992509842, - "loss_sod": 0.03766937553882599, - "loss_total": 0.3203137218952179, - "step": 288699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.7235668897628784, - "learning_rate": 1.196445517731547e-05, - "loss": 0.4107, - "step": 288700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.91360330581665, - "loss_rtd": 0.2236490547657013, - "loss_sent": 0.12645292282104492, - "loss_sod": 0.11816971004009247, - "loss_total": 0.4682716727256775, - "step": 288799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.722621917724609, - "loss_rtd": 0.21771691739559174, - "loss_sent": 0.09559899568557739, - "loss_sod": 0.05412505567073822, - "loss_total": 0.36744096875190735, - "step": 288799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.1119811534881592, - "learning_rate": 1.1943865070903294e-05, - "loss": 0.4279, - "step": 288800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.7283244132995605, - "loss_rtd": 0.21435987949371338, - "loss_sent": 0.17511047422885895, - "loss_sod": 0.030356254428625107, - "loss_total": 0.4198266267776489, - "step": 288899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.526970863342285, - "loss_rtd": 0.19748038053512573, - "loss_sent": 0.14994210004806519, - "loss_sod": 0.009142027236521244, - "loss_total": 0.3565645217895508, - "step": 288899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.6695646047592163, - "learning_rate": 1.1923290293497696e-05, - "loss": 0.4262, - "step": 288900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.682656764984131, - "loss_rtd": 0.21027801930904388, - "loss_sent": 0.2659796178340912, - "loss_sod": 0.04973718151450157, - "loss_total": 0.5259947776794434, - "step": 288999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.95244836807251, - "loss_rtd": 0.2203441858291626, - "loss_sent": 0.5933377146720886, - "loss_sod": 0.021773140877485275, - "loss_total": 0.835455060005188, - "step": 288999 - }, - { - "epoch": 0.002, - "grad_norm": 1.7816059589385986, - "learning_rate": 1.1902730853386219e-05, - "loss": 0.4357, - "step": 289000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4045261740684509, - "eval_runtime": 154.5568, - "eval_samples_per_second": 99.918, - "eval_steps_per_second": 0.783, - "step": 289000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.636537075042725, - "loss_rtd": 0.19163213670253754, - "loss_sent": 0.0680651143193245, - "loss_sod": 0.01924016699194908, - "loss_total": 0.278937429189682, - "step": 289099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.6999993324279785, - "loss_rtd": 0.18549351394176483, - "loss_sent": 0.421495258808136, - "loss_sod": 0.05939367786049843, - "loss_total": 0.6663824319839478, - "step": 289099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.0450456142425537, - "learning_rate": 1.1882186758850205e-05, - "loss": 0.419, - "step": 289100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.386354923248291, - "loss_rtd": 0.16911444067955017, - "loss_sent": 0.0005919820396229625, - "loss_sod": 0.0733618512749672, - "loss_total": 0.24306826293468475, - "step": 289199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.968642234802246, - "loss_rtd": 0.20339106023311615, - "loss_sent": 0.1694738119840622, - "loss_sod": 0.05585000291466713, - "loss_total": 0.4287148714065552, - "step": 289199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.9566167593002319, - "learning_rate": 1.1861658018164802e-05, - "loss": 0.4182, - "step": 289200 - }, - { - "epoch": 0.002598, - "loss_gen": 6.1110100746154785, - "loss_rtd": 0.2216099351644516, - "loss_sent": 0.14492638409137726, - "loss_sod": 0.07394503057003021, - "loss_total": 0.44048136472702026, - "step": 289299 - }, - { - "epoch": 0.002598, - "loss_gen": 6.032350540161133, - "loss_rtd": 0.23270705342292786, - "loss_sent": 0.0479719340801239, - "loss_sod": 0.012125834822654724, - "loss_total": 0.2928048372268677, - "step": 289299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.0500189065933228, - "learning_rate": 1.1841144639598977e-05, - "loss": 0.4158, - "step": 289300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.233558654785156, - "loss_rtd": 0.1882786601781845, - "loss_sent": 0.008049456402659416, - "loss_sod": 0.16217921674251556, - "loss_total": 0.35850733518600464, - "step": 289399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.24332332611084, - "loss_rtd": 0.17740464210510254, - "loss_sent": 0.016357656568288803, - "loss_sod": 0.13456860184669495, - "loss_total": 0.3283309042453766, - "step": 289399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.1031545400619507, - "learning_rate": 1.1820646631415538e-05, - "loss": 0.4112, - "step": 289400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.92902946472168, - "loss_rtd": 0.220808744430542, - "loss_sent": 0.12423915416002274, - "loss_sod": 0.07556038349866867, - "loss_total": 0.4206082820892334, - "step": 289499 - }, - { - "epoch": 0.002998, - "loss_gen": 6.194652080535889, - "loss_rtd": 0.21770694851875305, - "loss_sent": 0.6755025386810303, - "loss_sod": 0.060136828571558, - "loss_total": 0.953346312046051, - "step": 289499 - }, - { - "epoch": 0.003, - "grad_norm": 2.728039503097534, - "learning_rate": 1.1800164001871078e-05, - "loss": 0.4284, - "step": 289500 - }, - { - "epoch": 0.003198, - "loss_gen": 6.139127731323242, - "loss_rtd": 0.21810507774353027, - "loss_sent": 0.0803564265370369, - "loss_sod": 0.03863891586661339, - "loss_total": 0.33710041642189026, - "step": 289599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.752684116363525, - "loss_rtd": 0.22759109735488892, - "loss_sent": 0.30329668521881104, - "loss_sod": 0.027985205873847008, - "loss_total": 0.5588729977607727, - "step": 289599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.119472861289978, - "learning_rate": 1.177969675921598e-05, - "loss": 0.4457, - "step": 289600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.589775562286377, - "loss_rtd": 0.22320495545864105, - "loss_sent": 0.23578467965126038, - "loss_sod": 0.007745911367237568, - "loss_total": 0.4667355418205261, - "step": 289699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.557400703430176, - "loss_rtd": 0.2014605849981308, - "loss_sent": 0.140837162733078, - "loss_sod": 0.08160769939422607, - "loss_total": 0.42390546202659607, - "step": 289699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.7268755435943604, - "learning_rate": 1.1759244911694451e-05, - "loss": 0.4394, - "step": 289700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.619295120239258, - "loss_rtd": 0.17604520916938782, - "loss_sent": 0.039584919810295105, - "loss_sod": 0.09962044656276703, - "loss_total": 0.31525057554244995, - "step": 289799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.715934753417969, - "loss_rtd": 0.2389906346797943, - "loss_sent": 0.11277256906032562, - "loss_sod": 0.030528396368026733, - "loss_total": 0.38229161500930786, - "step": 289799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.0768576860427856, - "learning_rate": 1.1738808467544505e-05, - "loss": 0.4088, - "step": 289800 - }, - { - "epoch": 0.003798, - "loss_gen": 6.084167003631592, - "loss_rtd": 0.23010893166065216, - "loss_sent": 0.09768994152545929, - "loss_sod": 0.04006550461053848, - "loss_total": 0.36786437034606934, - "step": 289899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.800467491149902, - "loss_rtd": 0.216391459107399, - "loss_sent": 0.2136368453502655, - "loss_sod": 0.07278364151716232, - "loss_total": 0.5028119087219238, - "step": 289899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.9832937717437744, - "learning_rate": 1.171838743499794e-05, - "loss": 0.4067, - "step": 289900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.556502819061279, - "loss_rtd": 0.21895749866962433, - "loss_sent": 0.06765895336866379, - "loss_sod": 0.038971636444330215, - "loss_total": 0.3255881071090698, - "step": 289999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.713016986846924, - "loss_rtd": 0.21213513612747192, - "loss_sent": 0.14003337919712067, - "loss_sod": 0.018597260117530823, - "loss_total": 0.3707657754421234, - "step": 289999 - }, - { - "epoch": 0.004, - "grad_norm": 0.8992867469787598, - "learning_rate": 1.1697981822280329e-05, - "loss": 0.4336, - "step": 290000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4080997407436371, - "eval_runtime": 151.4259, - "eval_samples_per_second": 101.984, - "eval_steps_per_second": 0.799, - "step": 290000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.927196979522705, - "loss_rtd": 0.21384547650814056, - "loss_sent": 0.28930529952049255, - "loss_sod": 0.053418584167957306, - "loss_total": 0.5565693378448486, - "step": 290099 - }, - { - "epoch": 0.004198, - "loss_gen": 6.024545669555664, - "loss_rtd": 0.2033890038728714, - "loss_sent": 0.049007099121809006, - "loss_sod": 0.024014577269554138, - "loss_total": 0.27641066908836365, - "step": 290099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.8494968414306641, - "learning_rate": 1.1677591637611058e-05, - "loss": 0.4102, - "step": 290100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.861697673797607, - "loss_rtd": 0.1946389377117157, - "loss_sent": 0.2799685597419739, - "loss_sod": 0.01940545253455639, - "loss_total": 0.4940129518508911, - "step": 290199 - }, - { - "epoch": 0.004398, - "loss_gen": 6.297935962677002, - "loss_rtd": 0.22417907416820526, - "loss_sent": 0.13598723709583282, - "loss_sod": 0.07087448984384537, - "loss_total": 0.43104082345962524, - "step": 290199 - }, - { - "epoch": 0.0044, - "grad_norm": 0.7087154388427734, - "learning_rate": 1.1657216889203294e-05, - "loss": 0.4239, - "step": 290200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.808155059814453, - "loss_rtd": 0.21386592090129852, - "loss_sent": 0.13774049282073975, - "loss_sod": 0.07614940404891968, - "loss_total": 0.42775583267211914, - "step": 290299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.876097679138184, - "loss_rtd": 0.21150068938732147, - "loss_sent": 0.08654508739709854, - "loss_sod": 0.030276494100689888, - "loss_total": 0.32832226157188416, - "step": 290299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.7103103995323181, - "learning_rate": 1.1636857585263994e-05, - "loss": 0.4103, - "step": 290300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.536046028137207, - "loss_rtd": 0.18324218690395355, - "loss_sent": 0.18905538320541382, - "loss_sod": 0.018242817372083664, - "loss_total": 0.39054039120674133, - "step": 290399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.71199369430542, - "loss_rtd": 0.23030778765678406, - "loss_sent": 0.19299103319644928, - "loss_sod": 0.15578626096248627, - "loss_total": 0.5790850520133972, - "step": 290399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.9059568047523499, - "learning_rate": 1.1616513733993856e-05, - "loss": 0.4336, - "step": 290400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.776209831237793, - "loss_rtd": 0.20986218750476837, - "loss_sent": 0.1940474957227707, - "loss_sod": 0.03195120394229889, - "loss_total": 0.43586087226867676, - "step": 290499 - }, - { - "epoch": 0.004998, - "loss_gen": 6.1553473472595215, - "loss_rtd": 0.22172221541404724, - "loss_sent": 0.0935099795460701, - "loss_sod": 0.019701147451996803, - "loss_total": 0.334933340549469, - "step": 290499 - }, - { - "epoch": 0.005, - "grad_norm": 1.9684752225875854, - "learning_rate": 1.1596185343587395e-05, - "loss": 0.4315, - "step": 290500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.282568454742432, - "loss_rtd": 0.17847198247909546, - "loss_sent": 0.01683359593153, - "loss_sod": 0.05404090881347656, - "loss_total": 0.24934649467468262, - "step": 290599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.774869918823242, - "loss_rtd": 0.21058449149131775, - "loss_sent": 0.2569732367992401, - "loss_sod": 0.05100230127573013, - "loss_total": 0.5185600519180298, - "step": 290599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.8482804298400879, - "learning_rate": 1.1575872422232892e-05, - "loss": 0.4345, - "step": 290600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.804244518280029, - "loss_rtd": 0.19818954169750214, - "loss_sent": 0.0005043614073656499, - "loss_sod": 0.1376422792673111, - "loss_total": 0.3363361954689026, - "step": 290699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.581037998199463, - "loss_rtd": 0.16705118119716644, - "loss_sent": 6.964366912143305e-05, - "loss_sod": 0.10265404731035233, - "loss_total": 0.2697748839855194, - "step": 290699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.1441339254379272, - "learning_rate": 1.1555574978112387e-05, - "loss": 0.4184, - "step": 290700 - }, - { - "epoch": 0.005598, - "loss_gen": 6.0515217781066895, - "loss_rtd": 0.23304536938667297, - "loss_sent": 0.2999255955219269, - "loss_sod": 0.002595345489680767, - "loss_total": 0.5355663299560547, - "step": 290799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.992300987243652, - "loss_rtd": 0.23110775649547577, - "loss_sent": 0.11044023931026459, - "loss_sod": 0.1251615285873413, - "loss_total": 0.46670955419540405, - "step": 290799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.3418282270431519, - "learning_rate": 1.1535293019401678e-05, - "loss": 0.431, - "step": 290800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.040250301361084, - "loss_rtd": 0.19023177027702332, - "loss_sent": 0.36948129534721375, - "loss_sod": 0.01797431707382202, - "loss_total": 0.5776873826980591, - "step": 290899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.865544319152832, - "loss_rtd": 0.2297186553478241, - "loss_sent": 0.19893589615821838, - "loss_sod": 0.0579451285302639, - "loss_total": 0.4865996837615967, - "step": 290899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.5304007530212402, - "learning_rate": 1.1515026554270336e-05, - "loss": 0.4336, - "step": 290900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.982074737548828, - "loss_rtd": 0.20522068440914154, - "loss_sent": 0.06905671209096909, - "loss_sod": 0.1271514892578125, - "loss_total": 0.4014289081096649, - "step": 290999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.474308013916016, - "loss_rtd": 0.19547529518604279, - "loss_sent": 3.6256878956919536e-05, - "loss_sod": 0.05424252897500992, - "loss_total": 0.24975408613681793, - "step": 290999 - }, - { - "epoch": 0.006, - "grad_norm": 0.7216436862945557, - "learning_rate": 1.1494775590881707e-05, - "loss": 0.439, - "step": 291000 - }, - { - "epoch": 0.006, - "eval_loss": 0.41002848744392395, - "eval_runtime": 152.8536, - "eval_samples_per_second": 101.031, - "eval_steps_per_second": 0.792, - "step": 291000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.9718499183654785, - "loss_rtd": 0.22338102757930756, - "loss_sent": 0.41834065318107605, - "loss_sod": 0.014543645083904266, - "loss_total": 0.6562653183937073, - "step": 291099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.756255149841309, - "loss_rtd": 0.2223668396472931, - "loss_sent": 0.22848108410835266, - "loss_sod": 0.018167486414313316, - "loss_total": 0.4690154194831848, - "step": 291099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.14185631275177, - "learning_rate": 1.1474540137392853e-05, - "loss": 0.4335, - "step": 291100 - }, - { - "epoch": 0.006398, - "loss_gen": 6.031336784362793, - "loss_rtd": 0.20401334762573242, - "loss_sent": 0.31035447120666504, - "loss_sod": 0.011721178889274597, - "loss_total": 0.5260890126228333, - "step": 291199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.556453227996826, - "loss_rtd": 0.19337640702724457, - "loss_sent": 0.06550833582878113, - "loss_sod": 0.022421490401029587, - "loss_total": 0.2813062369823456, - "step": 291199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.1999459266662598, - "learning_rate": 1.1454320201954626e-05, - "loss": 0.4427, - "step": 291200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.517667770385742, - "loss_rtd": 0.2339557409286499, - "loss_sent": 0.07689621299505234, - "loss_sod": 0.017893413081765175, - "loss_total": 0.32874536514282227, - "step": 291299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.999050140380859, - "loss_rtd": 0.23434123396873474, - "loss_sent": 0.09787822514772415, - "loss_sod": 0.0470002181828022, - "loss_total": 0.3792196810245514, - "step": 291299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.653445839881897, - "learning_rate": 1.1434115792711614e-05, - "loss": 0.415, - "step": 291300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.648245334625244, - "loss_rtd": 0.1952313631772995, - "loss_sent": 3.299563832115382e-05, - "loss_sod": 0.12789836525917053, - "loss_total": 0.323162704706192, - "step": 291399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.682374477386475, - "loss_rtd": 0.1928941160440445, - "loss_sent": 0.0007378265727311373, - "loss_sod": 0.09649474918842316, - "loss_total": 0.2901266813278198, - "step": 291399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.8920658826828003, - "learning_rate": 1.1413926917802159e-05, - "loss": 0.4209, - "step": 291400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.852756977081299, - "loss_rtd": 0.212942436337471, - "loss_sent": 0.18852655589580536, - "loss_sod": 0.1215762048959732, - "loss_total": 0.5230451822280884, - "step": 291499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.678630352020264, - "loss_rtd": 0.21963368356227875, - "loss_sent": 0.1962205171585083, - "loss_sod": 0.013026234693825245, - "loss_total": 0.4288804531097412, - "step": 291499 - }, - { - "epoch": 0.007, - "grad_norm": 1.5081746578216553, - "learning_rate": 1.139375358535832e-05, - "loss": 0.4392, - "step": 291500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.75437593460083, - "loss_rtd": 0.2078815996646881, - "loss_sent": 0.3053898513317108, - "loss_sod": 0.08104534447193146, - "loss_total": 0.594316840171814, - "step": 291599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.293683052062988, - "loss_rtd": 0.19586960971355438, - "loss_sent": 3.034544897673186e-05, - "loss_sod": 0.07798950374126434, - "loss_total": 0.2738894522190094, - "step": 291599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.0767451524734497, - "learning_rate": 1.137359580350591e-05, - "loss": 0.4231, - "step": 291600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.748904705047607, - "loss_rtd": 0.2279605269432068, - "loss_sent": 0.13035079836845398, - "loss_sod": 0.05731924623250961, - "loss_total": 0.415630578994751, - "step": 291699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.693149089813232, - "loss_rtd": 0.17812733352184296, - "loss_sent": 0.09372135996818542, - "loss_sod": 0.1162639930844307, - "loss_total": 0.3881126940250397, - "step": 291699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.0027326345443726, - "learning_rate": 1.1353453580364497e-05, - "loss": 0.4276, - "step": 291700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.866568088531494, - "loss_rtd": 0.22758306562900543, - "loss_sent": 0.19470615684986115, - "loss_sod": 0.025758441537618637, - "loss_total": 0.4480476379394531, - "step": 291799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.84188175201416, - "loss_rtd": 0.21974079310894012, - "loss_sent": 0.13410304486751556, - "loss_sod": 0.05712611600756645, - "loss_total": 0.41096997261047363, - "step": 291799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.9332654476165771, - "learning_rate": 1.1333326924047371e-05, - "loss": 0.4407, - "step": 291800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.9282097816467285, - "loss_rtd": 0.2190975695848465, - "loss_sent": 0.23697197437286377, - "loss_sod": 0.008148223161697388, - "loss_total": 0.46421778202056885, - "step": 291899 - }, - { - "epoch": 0.007798, - "loss_gen": 6.181413173675537, - "loss_rtd": 0.21828410029411316, - "loss_sent": 0.27279528975486755, - "loss_sod": 0.02741367369890213, - "loss_total": 0.5184930562973022, - "step": 291899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.0647717714309692, - "learning_rate": 1.1313215842661523e-05, - "loss": 0.4405, - "step": 291900 - }, - { - "epoch": 0.007998, - "loss_gen": 6.085374355316162, - "loss_rtd": 0.22489675879478455, - "loss_sent": 0.20845045149326324, - "loss_sod": 0.06859986484050751, - "loss_total": 0.5019470453262329, - "step": 291999 - }, - { - "epoch": 0.007998, - "loss_gen": 6.197389125823975, - "loss_rtd": 0.21831458806991577, - "loss_sent": 0.21181783080101013, - "loss_sod": 0.015346596948802471, - "loss_total": 0.44547903537750244, - "step": 291999 - }, - { - "epoch": 0.008, - "grad_norm": 0.9735605120658875, - "learning_rate": 1.1293120344307712e-05, - "loss": 0.3987, - "step": 292000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4108790457248688, - "eval_runtime": 151.4209, - "eval_samples_per_second": 101.987, - "eval_steps_per_second": 0.799, - "step": 292000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.955929279327393, - "loss_rtd": 0.23072001338005066, - "loss_sent": 0.2771455943584442, - "loss_sod": 0.020346995443105698, - "loss_total": 0.5282126069068909, - "step": 292099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.931318283081055, - "loss_rtd": 0.22061000764369965, - "loss_sent": 0.3473089635372162, - "loss_sod": 0.04022546112537384, - "loss_total": 0.6081444025039673, - "step": 292099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.074404001235962, - "learning_rate": 1.127304043708039e-05, - "loss": 0.4526, - "step": 292100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.771393299102783, - "loss_rtd": 0.19248847663402557, - "loss_sent": 0.2669306695461273, - "loss_sod": 0.03017154335975647, - "loss_total": 0.48959070444107056, - "step": 292199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.533403396606445, - "loss_rtd": 0.1992393136024475, - "loss_sent": 0.00835365243256092, - "loss_sod": 0.06661911308765411, - "loss_total": 0.2742120623588562, - "step": 292199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.0609321594238281, - "learning_rate": 1.1252976129067767e-05, - "loss": 0.4196, - "step": 292200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.930607318878174, - "loss_rtd": 0.23103652894496918, - "loss_sent": 0.1808062195777893, - "loss_sod": 0.05816710740327835, - "loss_total": 0.47000986337661743, - "step": 292299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.779129981994629, - "loss_rtd": 0.2152262181043625, - "loss_sent": 0.15281803905963898, - "loss_sod": 0.035360969603061676, - "loss_total": 0.40340524911880493, - "step": 292299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.7079684734344482, - "learning_rate": 1.1232927428351713e-05, - "loss": 0.4077, - "step": 292300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.861176013946533, - "loss_rtd": 0.23282544314861298, - "loss_sent": 0.3248097598552704, - "loss_sod": 0.04428011178970337, - "loss_total": 0.6019153594970703, - "step": 292399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.710519790649414, - "loss_rtd": 0.21236442029476166, - "loss_sent": 0.3869825005531311, - "loss_sod": 0.015781860798597336, - "loss_total": 0.615128755569458, - "step": 292399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.3579967021942139, - "learning_rate": 1.1212894343007851e-05, - "loss": 0.4411, - "step": 292400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.598791122436523, - "loss_rtd": 0.2247728407382965, - "loss_sent": 0.10374794155359268, - "loss_sod": 0.006241069175302982, - "loss_total": 0.3347618579864502, - "step": 292499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.96766996383667, - "loss_rtd": 0.2124291956424713, - "loss_sent": 0.04815928265452385, - "loss_sod": 0.011035613715648651, - "loss_total": 0.2716240882873535, - "step": 292499 - }, - { - "epoch": 0.009, - "grad_norm": 0.6185656785964966, - "learning_rate": 1.1192876881105524e-05, - "loss": 0.435, - "step": 292500 - }, - { - "epoch": 0.009198, - "loss_gen": 6.012024402618408, - "loss_rtd": 0.2138938456773758, - "loss_sent": 0.12079918384552002, - "loss_sod": 0.04975533485412598, - "loss_total": 0.3844483494758606, - "step": 292599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.75975227355957, - "loss_rtd": 0.208217591047287, - "loss_sent": 0.13656321167945862, - "loss_sod": 0.05625367909669876, - "loss_total": 0.40103447437286377, - "step": 292599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.2162506580352783, - "learning_rate": 1.1172875050707737e-05, - "loss": 0.4321, - "step": 292600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.854612350463867, - "loss_rtd": 0.22599859535694122, - "loss_sent": 0.21165831387043, - "loss_sod": 0.03499480336904526, - "loss_total": 0.47265172004699707, - "step": 292699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.9117584228515625, - "loss_rtd": 0.2201584130525589, - "loss_sent": 0.27716943621635437, - "loss_sod": 0.010912577621638775, - "loss_total": 0.5082404613494873, - "step": 292699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.9428503513336182, - "learning_rate": 1.115288885987123e-05, - "loss": 0.4168, - "step": 292700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.924159049987793, - "loss_rtd": 0.2209009975194931, - "loss_sent": 0.15670207142829895, - "loss_sod": 0.012711119838058949, - "loss_total": 0.3903141915798187, - "step": 292799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.813932418823242, - "loss_rtd": 0.2028120458126068, - "loss_sent": 0.12386512756347656, - "loss_sod": 0.030841028317809105, - "loss_total": 0.35751819610595703, - "step": 292799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.7272285223007202, - "learning_rate": 1.1132918316646451e-05, - "loss": 0.4278, - "step": 292800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.8489789962768555, - "loss_rtd": 0.2129559963941574, - "loss_sent": 0.3505731225013733, - "loss_sod": 0.10345625877380371, - "loss_total": 0.6669853925704956, - "step": 292899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.843230724334717, - "loss_rtd": 0.19831852614879608, - "loss_sent": 0.372302770614624, - "loss_sod": 0.0443035289645195, - "loss_total": 0.6149247884750366, - "step": 292899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.7858167886734009, - "learning_rate": 1.1112963429077539e-05, - "loss": 0.4316, - "step": 292900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.7075605392456055, - "loss_rtd": 0.18111105263233185, - "loss_sent": 0.009544480592012405, - "loss_sod": 0.06162203848361969, - "loss_total": 0.25227758288383484, - "step": 292999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.608867645263672, - "loss_rtd": 0.1733575314283371, - "loss_sent": 0.06498207896947861, - "loss_sod": 0.019020648673176765, - "loss_total": 0.2573602497577667, - "step": 292999 - }, - { - "epoch": 0.01, - "grad_norm": 0.7675855755805969, - "learning_rate": 1.1093024205202291e-05, - "loss": 0.42, - "step": 293000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4080387055873871, - "eval_runtime": 151.421, - "eval_samples_per_second": 101.987, - "eval_steps_per_second": 0.799, - "step": 293000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.766430854797363, - "loss_rtd": 0.2050667256116867, - "loss_sent": 0.0007737329578958452, - "loss_sod": 0.17520329356193542, - "loss_total": 0.3810437321662903, - "step": 293099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.688209533691406, - "loss_rtd": 0.20277422666549683, - "loss_sent": 0.28075194358825684, - "loss_sod": 0.03886014595627785, - "loss_total": 0.5223863124847412, - "step": 293099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.5843936204910278, - "learning_rate": 1.1073100653052244e-05, - "loss": 0.4179, - "step": 293100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.868280410766602, - "loss_rtd": 0.2234855592250824, - "loss_sent": 0.2485310286283493, - "loss_sod": 0.022049788385629654, - "loss_total": 0.49406635761260986, - "step": 293199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.338741302490234, - "loss_rtd": 0.18713144958019257, - "loss_sent": 0.022464105859398842, - "loss_sod": 0.04646528512239456, - "loss_total": 0.2560608386993408, - "step": 293199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.688319981098175, - "learning_rate": 1.1053192780652594e-05, - "loss": 0.4308, - "step": 293200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.75809907913208, - "loss_rtd": 0.21313640475273132, - "loss_sent": 0.35639873147010803, - "loss_sod": 0.11235233396291733, - "loss_total": 0.6818875074386597, - "step": 293299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.880958080291748, - "loss_rtd": 0.22107014060020447, - "loss_sent": 0.3080473840236664, - "loss_sod": 0.05575653538107872, - "loss_total": 0.5848740339279175, - "step": 293299 - }, - { - "epoch": 0.0106, - "grad_norm": 3.041757583618164, - "learning_rate": 1.103330059602225e-05, - "loss": 0.4356, - "step": 293300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.958160400390625, - "loss_rtd": 0.21889819204807281, - "loss_sent": 0.14059273898601532, - "loss_sod": 0.039761994034051895, - "loss_total": 0.3992529511451721, - "step": 293399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.773192882537842, - "loss_rtd": 0.21891982853412628, - "loss_sent": 0.33050525188446045, - "loss_sod": 0.009372915141284466, - "loss_total": 0.5587980151176453, - "step": 293399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.1098265647888184, - "learning_rate": 1.1013424107173753e-05, - "loss": 0.4228, - "step": 293400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.468547821044922, - "loss_rtd": 0.18723736703395844, - "loss_sent": 0.0818619504570961, - "loss_sod": 0.1300342082977295, - "loss_total": 0.3991335332393646, - "step": 293499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.687875747680664, - "loss_rtd": 0.20722660422325134, - "loss_sent": 0.2695978879928589, - "loss_sod": 0.012449869886040688, - "loss_total": 0.48927438259124756, - "step": 293499 - }, - { - "epoch": 0.011, - "grad_norm": 1.1542943716049194, - "learning_rate": 1.0993563322113365e-05, - "loss": 0.4211, - "step": 293500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.802901268005371, - "loss_rtd": 0.2055222988128662, - "loss_sent": 0.278396338224411, - "loss_sod": 0.040736123919487, - "loss_total": 0.524654746055603, - "step": 293599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.8944783210754395, - "loss_rtd": 0.19163289666175842, - "loss_sent": 0.12466850876808167, - "loss_sod": 0.056496649980545044, - "loss_total": 0.37279805541038513, - "step": 293599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.147995114326477, - "learning_rate": 1.0973718248841003e-05, - "loss": 0.4383, - "step": 293600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.991019248962402, - "loss_rtd": 0.22885239124298096, - "loss_sent": 0.2256946861743927, - "loss_sod": 0.02889326959848404, - "loss_total": 0.4834403395652771, - "step": 293699 - }, - { - "epoch": 0.011398, - "loss_gen": 6.015378475189209, - "loss_rtd": 0.21590931713581085, - "loss_sent": 0.11068647354841232, - "loss_sod": 0.05200549215078354, - "loss_total": 0.3786012828350067, - "step": 293699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.2632439136505127, - "learning_rate": 1.0953888895350279e-05, - "loss": 0.4527, - "step": 293700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.888670921325684, - "loss_rtd": 0.2036859095096588, - "loss_sent": 0.17866621911525726, - "loss_sod": 0.18804116547107697, - "loss_total": 0.5703933238983154, - "step": 293799 - }, - { - "epoch": 0.011598, - "loss_gen": 6.096378326416016, - "loss_rtd": 0.21143315732479095, - "loss_sent": 0.111661896109581, - "loss_sod": 0.028706058859825134, - "loss_total": 0.3518010973930359, - "step": 293799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.6043952703475952, - "learning_rate": 1.0934075269628425e-05, - "loss": 0.4373, - "step": 293800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.471293926239014, - "loss_rtd": 0.20524010062217712, - "loss_sent": 0.3265687823295593, - "loss_sod": 0.06880029290914536, - "loss_total": 0.6006091833114624, - "step": 293899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.927410125732422, - "loss_rtd": 0.1994786560535431, - "loss_sent": 0.2063358873128891, - "loss_sod": 0.08651162683963776, - "loss_total": 0.49232620000839233, - "step": 293899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.4650521278381348, - "learning_rate": 1.091427737965638e-05, - "loss": 0.4264, - "step": 293900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.625615119934082, - "loss_rtd": 0.20205235481262207, - "loss_sent": 0.1508893072605133, - "loss_sod": 0.01654103957116604, - "loss_total": 0.36948269605636597, - "step": 293999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.612279415130615, - "loss_rtd": 0.226791113615036, - "loss_sent": 0.16933803260326385, - "loss_sod": 0.003286023624241352, - "loss_total": 0.39941516518592834, - "step": 293999 - }, - { - "epoch": 0.012, - "grad_norm": 0.9215983152389526, - "learning_rate": 1.0894495233408746e-05, - "loss": 0.4227, - "step": 294000 - }, - { - "epoch": 0.012, - "eval_loss": 0.41339993476867676, - "eval_runtime": 151.3193, - "eval_samples_per_second": 102.056, - "eval_steps_per_second": 0.8, - "step": 294000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.763054847717285, - "loss_rtd": 0.20253047347068787, - "loss_sent": 0.29640471935272217, - "loss_sod": 0.05300295352935791, - "loss_total": 0.5519381761550903, - "step": 294099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.384382247924805, - "loss_rtd": 0.17322170734405518, - "loss_sent": 0.07608072459697723, - "loss_sod": 0.0016368563519790769, - "loss_total": 0.2509393095970154, - "step": 294099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.4361717700958252, - "learning_rate": 1.0874728838853742e-05, - "loss": 0.4371, - "step": 294100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.522751808166504, - "loss_rtd": 0.2118579000234604, - "loss_sent": 0.09907300770282745, - "loss_sod": 0.001598638598807156, - "loss_total": 0.3125295639038086, - "step": 294199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.788977146148682, - "loss_rtd": 0.18781417608261108, - "loss_sent": 0.006302570924162865, - "loss_sod": 0.04807844012975693, - "loss_total": 0.24219518899917603, - "step": 294199 - }, - { - "epoch": 0.0124, - "grad_norm": 0.5275652408599854, - "learning_rate": 1.085497820395328e-05, - "loss": 0.4405, - "step": 294200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.767052173614502, - "loss_rtd": 0.21944154798984528, - "loss_sent": 0.3759983777999878, - "loss_sod": 0.07488761842250824, - "loss_total": 0.6703275442123413, - "step": 294299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.955311298370361, - "loss_rtd": 0.215692937374115, - "loss_sent": 0.29599979519844055, - "loss_sod": 0.051369279623031616, - "loss_total": 0.5630620121955872, - "step": 294299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.6063812971115112, - "learning_rate": 1.083524333666292e-05, - "loss": 0.4139, - "step": 294300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.296679496765137, - "loss_rtd": 0.19681201875209808, - "loss_sent": 0.00028336889226920903, - "loss_sod": 0.1252090483903885, - "loss_total": 0.3223044276237488, - "step": 294399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.031749725341797, - "loss_rtd": 0.17776720225811005, - "loss_sent": 2.7434438379714265e-05, - "loss_sod": 0.12903621792793274, - "loss_total": 0.30683085322380066, - "step": 294399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.2570996284484863, - "learning_rate": 1.0815524244931875e-05, - "loss": 0.4282, - "step": 294400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.5852532386779785, - "loss_rtd": 0.1918531209230423, - "loss_sent": 0.08934935927391052, - "loss_sod": 0.045001521706581116, - "loss_total": 0.32620400190353394, - "step": 294499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.534328937530518, - "loss_rtd": 0.1863044649362564, - "loss_sent": 0.02965555712580681, - "loss_sod": 0.01043771207332611, - "loss_total": 0.22639773786067963, - "step": 294499 - }, - { - "epoch": 0.013, - "grad_norm": 0.5376600027084351, - "learning_rate": 1.0795820936702961e-05, - "loss": 0.4217, - "step": 294500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.873933792114258, - "loss_rtd": 0.21789704263210297, - "loss_sent": 0.07002614438533783, - "loss_sod": 0.013727325946092606, - "loss_total": 0.3016505241394043, - "step": 294599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.904489040374756, - "loss_rtd": 0.21422289311885834, - "loss_sent": 0.1969592124223709, - "loss_sod": 0.07365565001964569, - "loss_total": 0.48483777046203613, - "step": 294599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0407181978225708, - "learning_rate": 1.0776133419912682e-05, - "loss": 0.4435, - "step": 294600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.62467098236084, - "loss_rtd": 0.18759846687316895, - "loss_sent": 0.017625365406274796, - "loss_sod": 0.03514987975358963, - "loss_total": 0.24037370085716248, - "step": 294699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.553157806396484, - "loss_rtd": 0.1785619556903839, - "loss_sent": 0.01877358928322792, - "loss_sod": 0.03354465216398239, - "loss_total": 0.23088020086288452, - "step": 294699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.5728604793548584, - "learning_rate": 1.0756461702491177e-05, - "loss": 0.4177, - "step": 294700 - }, - { - "epoch": 0.013598, - "loss_gen": 6.215653419494629, - "loss_rtd": 0.21273796260356903, - "loss_sent": 0.17294226586818695, - "loss_sod": 0.01092920545488596, - "loss_total": 0.39660942554473877, - "step": 294799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.096684455871582, - "loss_rtd": 0.17959147691726685, - "loss_sent": 0.021339621394872665, - "loss_sod": 0.11821482330560684, - "loss_total": 0.31914591789245605, - "step": 294799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.9141101837158203, - "learning_rate": 1.0736805792362214e-05, - "loss": 0.4432, - "step": 294800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.342113494873047, - "loss_rtd": 0.19361698627471924, - "loss_sent": 0.09061580151319504, - "loss_sod": 0.0437910333275795, - "loss_total": 0.3280238211154938, - "step": 294899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.73457145690918, - "loss_rtd": 0.20573337376117706, - "loss_sent": 0.11693263053894043, - "loss_sod": 0.01017211563885212, - "loss_total": 0.33283811807632446, - "step": 294899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.8815062046051025, - "learning_rate": 1.0717165697443177e-05, - "loss": 0.4115, - "step": 294900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.697635173797607, - "loss_rtd": 0.18362516164779663, - "loss_sent": 0.0441841259598732, - "loss_sod": 0.0444292277097702, - "loss_total": 0.27223852276802063, - "step": 294999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.7185492515563965, - "loss_rtd": 0.1831914335489273, - "loss_sent": 0.00016801382298581302, - "loss_sod": 0.10571719706058502, - "loss_total": 0.28907665610313416, - "step": 294999 - }, - { - "epoch": 0.014, - "grad_norm": 1.007917881011963, - "learning_rate": 1.069754142564509e-05, - "loss": 0.43, - "step": 295000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4112323522567749, - "eval_runtime": 151.4167, - "eval_samples_per_second": 101.99, - "eval_steps_per_second": 0.799, - "step": 295000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.640534400939941, - "loss_rtd": 0.20620249211788177, - "loss_sent": 0.3413327932357788, - "loss_sod": 0.0013544512912631035, - "loss_total": 0.5488897562026978, - "step": 295099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.677936553955078, - "loss_rtd": 0.2097548544406891, - "loss_sent": 0.1667233407497406, - "loss_sod": 0.07956099510192871, - "loss_total": 0.4560391902923584, - "step": 295099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.303545594215393, - "learning_rate": 1.0677932984872624e-05, - "loss": 0.4405, - "step": 295100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.546933174133301, - "loss_rtd": 0.19512756168842316, - "loss_sent": 0.003282061545178294, - "loss_sod": 0.07110337913036346, - "loss_total": 0.26951301097869873, - "step": 295199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.376771450042725, - "loss_rtd": 0.1766503006219864, - "loss_sent": 0.04354753717780113, - "loss_sod": 0.10782700777053833, - "loss_total": 0.32802483439445496, - "step": 295199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.8968387246131897, - "learning_rate": 1.0658340383024057e-05, - "loss": 0.4263, - "step": 295200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.709486484527588, - "loss_rtd": 0.2513912320137024, - "loss_sent": 0.2460688054561615, - "loss_sod": 0.016093676909804344, - "loss_total": 0.5135537385940552, - "step": 295299 - }, - { - "epoch": 0.014598, - "loss_gen": 6.1804399490356445, - "loss_rtd": 0.22530977427959442, - "loss_sent": 0.14730171859264374, - "loss_sod": 0.03301909938454628, - "loss_total": 0.40563058853149414, - "step": 295299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.877086341381073, - "learning_rate": 1.0638763627991283e-05, - "loss": 0.4127, - "step": 295300 - }, - { - "epoch": 0.014798, - "loss_gen": 6.099516868591309, - "loss_rtd": 0.22501027584075928, - "loss_sent": 0.08149467408657074, - "loss_sod": 0.14560794830322266, - "loss_total": 0.45211291313171387, - "step": 295399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.662152290344238, - "loss_rtd": 0.222167506814003, - "loss_sent": 0.15366816520690918, - "loss_sod": 0.12854395806789398, - "loss_total": 0.5043796300888062, - "step": 295399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.238622784614563, - "learning_rate": 1.06192027276598e-05, - "loss": 0.4047, - "step": 295400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.903112411499023, - "loss_rtd": 0.20635390281677246, - "loss_sent": 0.13178260624408722, - "loss_sod": 0.15884466469287872, - "loss_total": 0.496981143951416, - "step": 295499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.323427677154541, - "loss_rtd": 0.1728198230266571, - "loss_sent": 0.045676738023757935, - "loss_sod": 0.08595232665538788, - "loss_total": 0.3044488728046417, - "step": 295499 - }, - { - "epoch": 0.015, - "grad_norm": 1.3525426387786865, - "learning_rate": 1.0599657689908742e-05, - "loss": 0.4227, - "step": 295500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.918006420135498, - "loss_rtd": 0.21017996966838837, - "loss_sent": 0.12502259016036987, - "loss_sod": 0.0637504905462265, - "loss_total": 0.39895305037498474, - "step": 295599 - }, - { - "epoch": 0.015198, - "loss_gen": 6.010242938995361, - "loss_rtd": 0.20556005835533142, - "loss_sent": 0.19859637320041656, - "loss_sod": 0.034558624029159546, - "loss_total": 0.43871504068374634, - "step": 295599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.5819967985153198, - "learning_rate": 1.0580128522610872e-05, - "loss": 0.4275, - "step": 295600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.248749732971191, - "loss_rtd": 0.18831850588321686, - "loss_sent": 0.030619287863373756, - "loss_sod": 0.05449339747428894, - "loss_total": 0.2734311819076538, - "step": 295699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.176632881164551, - "loss_rtd": 0.16702422499656677, - "loss_sent": 0.03597523272037506, - "loss_sod": 0.0655825212597847, - "loss_total": 0.26858198642730713, - "step": 295699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.7380995750427246, - "learning_rate": 1.056061523363251e-05, - "loss": 0.4084, - "step": 295700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.933668613433838, - "loss_rtd": 0.23030397295951843, - "loss_sent": 0.11890958249568939, - "loss_sod": 0.1161596029996872, - "loss_total": 0.465373158454895, - "step": 295799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.771134376525879, - "loss_rtd": 0.20498719811439514, - "loss_sent": 0.06305620819330215, - "loss_sod": 0.024693351238965988, - "loss_total": 0.2927367687225342, - "step": 295799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.2381393909454346, - "learning_rate": 1.0541117830833608e-05, - "loss": 0.435, - "step": 295800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.445761680603027, - "loss_rtd": 0.19832631945610046, - "loss_sent": 0.09245370328426361, - "loss_sod": 0.06520716845989227, - "loss_total": 0.35598719120025635, - "step": 295899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.892358779907227, - "loss_rtd": 0.22854578495025635, - "loss_sent": 0.16346819698810577, - "loss_sod": 0.04468546062707901, - "loss_total": 0.43669945001602173, - "step": 295899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.5533887147903442, - "learning_rate": 1.052163632206773e-05, - "loss": 0.4148, - "step": 295900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.874584197998047, - "loss_rtd": 0.2053760290145874, - "loss_sent": 0.12736639380455017, - "loss_sod": 0.12023165822029114, - "loss_total": 0.4529740810394287, - "step": 295999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.839260101318359, - "loss_rtd": 0.2121221274137497, - "loss_sent": 0.20069658756256104, - "loss_sod": 0.012942355126142502, - "loss_total": 0.42576107382774353, - "step": 295999 - }, - { - "epoch": 0.016, - "grad_norm": 0.8821128606796265, - "learning_rate": 1.050217071518203e-05, - "loss": 0.4363, - "step": 296000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4061391353607178, - "eval_runtime": 151.6422, - "eval_samples_per_second": 101.838, - "eval_steps_per_second": 0.798, - "step": 296000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.9743757247924805, - "loss_rtd": 0.21355779469013214, - "loss_sent": 0.2628011405467987, - "loss_sod": 0.13888202607631683, - "loss_total": 0.6152409315109253, - "step": 296099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.897843360900879, - "loss_rtd": 0.2111058086156845, - "loss_sent": 0.10183496028184891, - "loss_sod": 0.02770070731639862, - "loss_total": 0.34064146876335144, - "step": 296099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.251771092414856, - "learning_rate": 1.0482721018017232e-05, - "loss": 0.4263, - "step": 296100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.334744453430176, - "loss_rtd": 0.1818494349718094, - "loss_sent": 0.03574901446700096, - "loss_sod": 0.03192409500479698, - "loss_total": 0.24952255189418793, - "step": 296199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.954919338226318, - "loss_rtd": 0.20946823060512543, - "loss_sent": 0.2702799439430237, - "loss_sod": 0.12556950747966766, - "loss_total": 0.6053177118301392, - "step": 296199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.9505832195281982, - "learning_rate": 1.0463287238407682e-05, - "loss": 0.4314, - "step": 296200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.765938758850098, - "loss_rtd": 0.21922503411769867, - "loss_sent": 0.3533184826374054, - "loss_sod": 0.038111329078674316, - "loss_total": 0.6106548309326172, - "step": 296299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.936850547790527, - "loss_rtd": 0.21708908677101135, - "loss_sent": 0.09488590806722641, - "loss_sod": 0.02059009112417698, - "loss_total": 0.3325650990009308, - "step": 296299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.1279613971710205, - "learning_rate": 1.0443869384181304e-05, - "loss": 0.4186, - "step": 296300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.8069682121276855, - "loss_rtd": 0.2173311859369278, - "loss_sent": 0.06469067931175232, - "loss_sod": 0.09630684554576874, - "loss_total": 0.37832871079444885, - "step": 296399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.9797797203063965, - "loss_rtd": 0.2254602611064911, - "loss_sent": 0.3228491246700287, - "loss_sod": 0.08778952062129974, - "loss_total": 0.6360988616943359, - "step": 296399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.264456868171692, - "learning_rate": 1.0424467463159621e-05, - "loss": 0.4301, - "step": 296400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.872979164123535, - "loss_rtd": 0.22613875567913055, - "loss_sent": 0.10173720121383667, - "loss_sod": 0.03349519520998001, - "loss_total": 0.36137115955352783, - "step": 296499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.38588285446167, - "loss_rtd": 0.1842663586139679, - "loss_sent": 0.03717409819364548, - "loss_sod": 0.08778329193592072, - "loss_total": 0.3092237710952759, - "step": 296499 - }, - { - "epoch": 0.017, - "grad_norm": 0.9810932874679565, - "learning_rate": 1.0405081483157698e-05, - "loss": 0.448, - "step": 296500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.761422634124756, - "loss_rtd": 0.19922517240047455, - "loss_sent": 0.15324638783931732, - "loss_sod": 0.028836267068982124, - "loss_total": 0.38130784034729004, - "step": 296599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.834839820861816, - "loss_rtd": 0.18709823489189148, - "loss_sent": 0.20373891294002533, - "loss_sod": 0.014057589694857597, - "loss_total": 0.40489473938941956, - "step": 296599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.6301513314247131, - "learning_rate": 1.0385711451984216e-05, - "loss": 0.3983, - "step": 296600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.727804183959961, - "loss_rtd": 0.20559702813625336, - "loss_sent": 0.25096338987350464, - "loss_sod": 0.012648189440369606, - "loss_total": 0.46920859813690186, - "step": 296699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.767256736755371, - "loss_rtd": 0.22392408549785614, - "loss_sent": 0.2691446542739868, - "loss_sod": 0.016966650262475014, - "loss_total": 0.5100353956222534, - "step": 296699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.9540920257568359, - "learning_rate": 1.0366357377441427e-05, - "loss": 0.4354, - "step": 296700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.80673885345459, - "loss_rtd": 0.21131189167499542, - "loss_sent": 0.20243430137634277, - "loss_sod": 0.08631715923547745, - "loss_total": 0.5000633597373962, - "step": 296799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.06290340423584, - "loss_rtd": 0.1622084677219391, - "loss_sent": 0.00015526461356785148, - "loss_sod": 0.10462068021297455, - "loss_total": 0.26698440313339233, - "step": 296799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.232321858406067, - "learning_rate": 1.0347019267325158e-05, - "loss": 0.423, - "step": 296800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.783798694610596, - "loss_rtd": 0.21064801514148712, - "loss_sent": 0.0831662192940712, - "loss_sod": 0.06500860303640366, - "loss_total": 0.3588228225708008, - "step": 296899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.779657363891602, - "loss_rtd": 0.2202780544757843, - "loss_sent": 0.06787479668855667, - "loss_sod": 0.10200324654579163, - "loss_total": 0.390156090259552, - "step": 296899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.2608566284179688, - "learning_rate": 1.0327697129424774e-05, - "loss": 0.4132, - "step": 296900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.684676647186279, - "loss_rtd": 0.2112094908952713, - "loss_sent": 0.12982314825057983, - "loss_sod": 0.025935960933566093, - "loss_total": 0.3669686019420624, - "step": 296999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.565051078796387, - "loss_rtd": 0.1902083456516266, - "loss_sent": 0.08210460096597672, - "loss_sod": 0.08566206693649292, - "loss_total": 0.3579750061035156, - "step": 296999 - }, - { - "epoch": 0.018, - "grad_norm": 0.8872039318084717, - "learning_rate": 1.030839097152324e-05, - "loss": 0.4287, - "step": 297000 - }, - { - "epoch": 0.018, - "eval_loss": 0.4070330858230591, - "eval_runtime": 151.3494, - "eval_samples_per_second": 102.035, - "eval_steps_per_second": 0.799, - "step": 297000 - }, - { - "epoch": 0.018198, - "loss_gen": 6.026285171508789, - "loss_rtd": 0.24354510009288788, - "loss_sent": 0.14378716051578522, - "loss_sod": 0.07584456354379654, - "loss_total": 0.4631768465042114, - "step": 297099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.670589923858643, - "loss_rtd": 0.21470296382904053, - "loss_sent": 0.07253681868314743, - "loss_sod": 0.14160436391830444, - "loss_total": 0.428844153881073, - "step": 297099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.7557255029678345, - "learning_rate": 1.0289100801397088e-05, - "loss": 0.4385, - "step": 297100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.80548620223999, - "loss_rtd": 0.2085079401731491, - "loss_sent": 0.12764374911785126, - "loss_sod": 0.03700674697756767, - "loss_total": 0.37315845489501953, - "step": 297199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.622036933898926, - "loss_rtd": 0.1856563836336136, - "loss_sent": 0.015428837388753891, - "loss_sod": 0.03319863975048065, - "loss_total": 0.23428386449813843, - "step": 297199 - }, - { - "epoch": 0.0184, - "grad_norm": 0.8043609261512756, - "learning_rate": 1.0269826626816376e-05, - "loss": 0.4314, - "step": 297200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.673922061920166, - "loss_rtd": 0.1946595311164856, - "loss_sent": 0.07288999110460281, - "loss_sod": 0.04901793226599693, - "loss_total": 0.31656745076179504, - "step": 297299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.985547065734863, - "loss_rtd": 0.21650931239128113, - "loss_sent": 0.32444992661476135, - "loss_sod": 0.01558828353881836, - "loss_total": 0.5565475225448608, - "step": 297299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.7502100467681885, - "learning_rate": 1.0250568455544745e-05, - "loss": 0.4293, - "step": 297300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.949221134185791, - "loss_rtd": 0.21529193222522736, - "loss_sent": 0.25843536853790283, - "loss_sod": 0.042320068925619125, - "loss_total": 0.5160473585128784, - "step": 297399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.934944152832031, - "loss_rtd": 0.212518572807312, - "loss_sent": 0.16465464234352112, - "loss_sod": 0.011793924495577812, - "loss_total": 0.3889671564102173, - "step": 297399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.8618829250335693, - "learning_rate": 1.0231326295339388e-05, - "loss": 0.4303, - "step": 297400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.899423122406006, - "loss_rtd": 0.23357856273651123, - "loss_sent": 0.2989017069339752, - "loss_sod": 0.01683850958943367, - "loss_total": 0.549318790435791, - "step": 297499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.954813003540039, - "loss_rtd": 0.22031544148921967, - "loss_sent": 0.10967754572629929, - "loss_sod": 0.02306656539440155, - "loss_total": 0.3530595600605011, - "step": 297499 - }, - { - "epoch": 0.019, - "grad_norm": 0.791570782661438, - "learning_rate": 1.0212100153951054e-05, - "loss": 0.4191, - "step": 297500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.731978416442871, - "loss_rtd": 0.23210537433624268, - "loss_sent": 0.10853546857833862, - "loss_sod": 0.0026697558350861073, - "loss_total": 0.3433105945587158, - "step": 297599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.460188865661621, - "loss_rtd": 0.1825115829706192, - "loss_sent": 0.007262526545673609, - "loss_sod": 0.10843467712402344, - "loss_total": 0.2982087731361389, - "step": 297599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.9947394728660583, - "learning_rate": 1.019289003912401e-05, - "loss": 0.4318, - "step": 297600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.637037754058838, - "loss_rtd": 0.22613970935344696, - "loss_sent": 0.24242305755615234, - "loss_sod": 0.02330154925584793, - "loss_total": 0.49186432361602783, - "step": 297699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.62609338760376, - "loss_rtd": 0.22445453703403473, - "loss_sent": 0.19351904094219208, - "loss_sod": 0.006364143453538418, - "loss_total": 0.4243377149105072, - "step": 297699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.9231829047203064, - "learning_rate": 1.017369595859609e-05, - "loss": 0.4344, - "step": 297700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.795924663543701, - "loss_rtd": 0.2381930649280548, - "loss_sent": 0.13380736112594604, - "loss_sod": 0.0353718027472496, - "loss_total": 0.40737223625183105, - "step": 297799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.828614234924316, - "loss_rtd": 0.216282457113266, - "loss_sent": 0.08772077411413193, - "loss_sod": 0.012895278632640839, - "loss_total": 0.31689852476119995, - "step": 297799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.7130690813064575, - "learning_rate": 1.0154517920098682e-05, - "loss": 0.4276, - "step": 297800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.616603851318359, - "loss_rtd": 0.20068252086639404, - "loss_sent": 0.13653664290905, - "loss_sod": 0.06905515491962433, - "loss_total": 0.40627431869506836, - "step": 297899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.861021995544434, - "loss_rtd": 0.22706495225429535, - "loss_sent": 0.32415828108787537, - "loss_sod": 0.0038043325766921043, - "loss_total": 0.5550275444984436, - "step": 297899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.12308669090271, - "learning_rate": 1.0135355931356705e-05, - "loss": 0.4493, - "step": 297900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.923847675323486, - "loss_rtd": 0.20875504612922668, - "loss_sent": 0.5159587264060974, - "loss_sod": 0.09680992364883423, - "loss_total": 0.8215236663818359, - "step": 297999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.8114190101623535, - "loss_rtd": 0.22599723935127258, - "loss_sent": 0.16716141998767853, - "loss_sod": 0.040851183235645294, - "loss_total": 0.434009850025177, - "step": 297999 - }, - { - "epoch": 0.02, - "grad_norm": 2.635293483734131, - "learning_rate": 1.0116210000088578e-05, - "loss": 0.443, - "step": 298000 - }, - { - "epoch": 0.02, - "eval_loss": 0.40928593277931213, - "eval_runtime": 151.8378, - "eval_samples_per_second": 101.707, - "eval_steps_per_second": 0.797, - "step": 298000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.813126087188721, - "loss_rtd": 0.22400996088981628, - "loss_sent": 0.12310399860143661, - "loss_sod": 0.020084943622350693, - "loss_total": 0.3671989142894745, - "step": 298099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.907942295074463, - "loss_rtd": 0.21236549317836761, - "loss_sent": 0.10100822895765305, - "loss_sod": 0.07614253461360931, - "loss_total": 0.38951626420021057, - "step": 298099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.754381000995636, - "learning_rate": 1.0097080134006286e-05, - "loss": 0.4397, - "step": 298100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.606247425079346, - "loss_rtd": 0.18676139414310455, - "loss_sent": 0.040077440440654755, - "loss_sod": 0.08575140684843063, - "loss_total": 0.31259024143218994, - "step": 298199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.761253356933594, - "loss_rtd": 0.20684626698493958, - "loss_sent": 0.32071366906166077, - "loss_sod": 0.023238133639097214, - "loss_total": 0.5507980585098267, - "step": 298199 - }, - { - "epoch": 0.0204, - "grad_norm": 2.0746564865112305, - "learning_rate": 1.0077966340815354e-05, - "loss": 0.4155, - "step": 298200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.899225234985352, - "loss_rtd": 0.204883873462677, - "loss_sent": 0.31172147393226624, - "loss_sod": 0.05679526552557945, - "loss_total": 0.573400616645813, - "step": 298299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.9609904289245605, - "loss_rtd": 0.21264024078845978, - "loss_sent": 0.1893542855978012, - "loss_sod": 0.043941430747509, - "loss_total": 0.4459359645843506, - "step": 298299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.960498332977295, - "learning_rate": 1.0058868628214813e-05, - "loss": 0.4244, - "step": 298300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.936389446258545, - "loss_rtd": 0.21245436370372772, - "loss_sent": 0.42014551162719727, - "loss_sod": 0.042525287717580795, - "loss_total": 0.6751251220703125, - "step": 298399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.671829700469971, - "loss_rtd": 0.2047274261713028, - "loss_sent": 0.2431950569152832, - "loss_sod": 0.0022957162000238895, - "loss_total": 0.45021820068359375, - "step": 298399 - }, - { - "epoch": 0.0208, - "grad_norm": 2.06866717338562, - "learning_rate": 1.00397870038972e-05, - "loss": 0.4285, - "step": 298400 - }, - { - "epoch": 0.020998, - "loss_gen": 6.019495964050293, - "loss_rtd": 0.22130316495895386, - "loss_sent": 0.10773345828056335, - "loss_sod": 0.06705247610807419, - "loss_total": 0.396089106798172, - "step": 298499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.783188819885254, - "loss_rtd": 0.21490241587162018, - "loss_sent": 0.13539086282253265, - "loss_sod": 0.0357794389128685, - "loss_total": 0.38607269525527954, - "step": 298499 - }, - { - "epoch": 0.021, - "grad_norm": 0.8840310573577881, - "learning_rate": 1.0020721475548606e-05, - "loss": 0.4287, - "step": 298500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.865187644958496, - "loss_rtd": 0.19026094675064087, - "loss_sent": 0.2522822618484497, - "loss_sod": 0.035999853163957596, - "loss_total": 0.4785430431365967, - "step": 298599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.421957492828369, - "loss_rtd": 0.21228951215744019, - "loss_sent": 0.1677059680223465, - "loss_sod": 0.02884584479033947, - "loss_total": 0.4088413119316101, - "step": 298599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.372063398361206, - "learning_rate": 1.0001672050848632e-05, - "loss": 0.432, - "step": 298600 - }, - { - "epoch": 0.021398, - "loss_gen": 6.029733180999756, - "loss_rtd": 0.20245236158370972, - "loss_sent": 0.23813922703266144, - "loss_sod": 0.04080799221992493, - "loss_total": 0.4813995957374573, - "step": 298699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.817180633544922, - "loss_rtd": 0.21810311079025269, - "loss_sent": 0.11740967631340027, - "loss_sod": 0.06908391416072845, - "loss_total": 0.4045967161655426, - "step": 298699 - }, - { - "epoch": 0.0214, - "grad_norm": 0.9485461711883545, - "learning_rate": 9.982638737470358e-06, - "loss": 0.42, - "step": 298700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.792064666748047, - "loss_rtd": 0.2184455245733261, - "loss_sent": 0.5901139378547668, - "loss_sod": 0.0015406090533360839, - "loss_total": 0.8101000785827637, - "step": 298799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.668981552124023, - "loss_rtd": 0.19227340817451477, - "loss_sent": 0.2015993446111679, - "loss_sod": 0.07817303389310837, - "loss_total": 0.47204577922821045, - "step": 298799 - }, - { - "epoch": 0.0216, - "grad_norm": 2.764828681945801, - "learning_rate": 9.963621543080415e-06, - "loss": 0.4254, - "step": 298800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.603916645050049, - "loss_rtd": 0.21229764819145203, - "loss_sent": 0.3499624729156494, - "loss_sod": 0.013875177130103111, - "loss_total": 0.5761352777481079, - "step": 298899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.999195098876953, - "loss_rtd": 0.21483048796653748, - "loss_sent": 0.21669946610927582, - "loss_sod": 0.03287728875875473, - "loss_total": 0.4644072651863098, - "step": 298899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.6463184356689453, - "learning_rate": 9.944620475338928e-06, - "loss": 0.4267, - "step": 298900 - }, - { - "epoch": 0.021998, - "loss_gen": 6.022584438323975, - "loss_rtd": 0.2096121609210968, - "loss_sent": 0.20677828788757324, - "loss_sod": 0.04668641835451126, - "loss_total": 0.4630768597126007, - "step": 298999 - }, - { - "epoch": 0.021998, - "loss_gen": 6.090778827667236, - "loss_rtd": 0.21469448506832123, - "loss_sent": 0.15987078845500946, - "loss_sod": 0.07775652408599854, - "loss_total": 0.4523218274116516, - "step": 298999 - }, - { - "epoch": 0.022, - "grad_norm": 1.2275803089141846, - "learning_rate": 9.925635541899536e-06, - "loss": 0.419, - "step": 299000 - }, - { - "epoch": 0.022, - "eval_loss": 0.40254780650138855, - "eval_runtime": 151.5688, - "eval_samples_per_second": 101.888, - "eval_steps_per_second": 0.798, - "step": 299000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.889149188995361, - "loss_rtd": 0.21863630414009094, - "loss_sent": 0.08945917338132858, - "loss_sod": 0.07114804536104202, - "loss_total": 0.37924352288246155, - "step": 299099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.488976001739502, - "loss_rtd": 0.2114456444978714, - "loss_sent": 0.20754298567771912, - "loss_sod": 0.00454333983361721, - "loss_total": 0.4235319495201111, - "step": 299099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.01533043384552, - "learning_rate": 9.906666750409337e-06, - "loss": 0.4243, - "step": 299100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.907752990722656, - "loss_rtd": 0.21214336156845093, - "loss_sent": 0.24359628558158875, - "loss_sod": 0.043090175837278366, - "loss_total": 0.49882981181144714, - "step": 299199 - }, - { - "epoch": 0.022398, - "loss_gen": 6.102293014526367, - "loss_rtd": 0.2234363704919815, - "loss_sent": 0.3608255386352539, - "loss_sod": 0.062283582985401154, - "loss_total": 0.6465455293655396, - "step": 299199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.8902572393417358, - "learning_rate": 9.887714108508983e-06, - "loss": 0.4465, - "step": 299200 - }, - { - "epoch": 0.022598, - "loss_gen": 6.135397434234619, - "loss_rtd": 0.21637411415576935, - "loss_sent": 0.28540757298469543, - "loss_sod": 0.005812958814203739, - "loss_total": 0.507594645023346, - "step": 299299 - }, - { - "epoch": 0.022598, - "loss_gen": 6.284907341003418, - "loss_rtd": 0.219236820936203, - "loss_sent": 0.11850326508283615, - "loss_sod": 0.01861223578453064, - "loss_total": 0.3563523292541504, - "step": 299299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.237573266029358, - "learning_rate": 9.868777623832586e-06, - "loss": 0.4217, - "step": 299300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.801009178161621, - "loss_rtd": 0.2080560028553009, - "loss_sent": 0.18533608317375183, - "loss_sod": 0.014758003875613213, - "loss_total": 0.4081500768661499, - "step": 299399 - }, - { - "epoch": 0.022798, - "loss_gen": 6.101986408233643, - "loss_rtd": 0.2245248407125473, - "loss_sent": 0.15532957017421722, - "loss_sod": 0.06217414140701294, - "loss_total": 0.44202858209609985, - "step": 299399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.7558884024620056, - "learning_rate": 9.849857304007781e-06, - "loss": 0.4222, - "step": 299400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.513538837432861, - "loss_rtd": 0.21030937135219574, - "loss_sent": 0.20333507657051086, - "loss_sod": 0.029832664877176285, - "loss_total": 0.4434770941734314, - "step": 299499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.658996105194092, - "loss_rtd": 0.20771342515945435, - "loss_sent": 0.07299751043319702, - "loss_sod": 0.12441278994083405, - "loss_total": 0.4051237106323242, - "step": 299499 - }, - { - "epoch": 0.023, - "grad_norm": 0.9803382158279419, - "learning_rate": 9.830953156655636e-06, - "loss": 0.4322, - "step": 299500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.8414483070373535, - "loss_rtd": 0.21803100407123566, - "loss_sent": 0.14960582554340363, - "loss_sod": 0.03182917833328247, - "loss_total": 0.39946600794792175, - "step": 299599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.983527183532715, - "loss_rtd": 0.21602879464626312, - "loss_sent": 0.2488606870174408, - "loss_sod": 0.06502971053123474, - "loss_total": 0.5299191474914551, - "step": 299599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.2604292631149292, - "learning_rate": 9.812065189390756e-06, - "loss": 0.4158, - "step": 299600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.726539134979248, - "loss_rtd": 0.1950404793024063, - "loss_sent": 0.12808853387832642, - "loss_sod": 0.031436990946531296, - "loss_total": 0.3545660078525543, - "step": 299699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.22651481628418, - "loss_rtd": 0.1948186159133911, - "loss_sent": 2.413586298644077e-05, - "loss_sod": 0.15451985597610474, - "loss_total": 0.3493626117706299, - "step": 299699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8676385283470154, - "learning_rate": 9.79319340982121e-06, - "loss": 0.4277, - "step": 299700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.286085605621338, - "loss_rtd": 0.1636226773262024, - "loss_sent": 0.06757794320583344, - "loss_sod": 0.019085204228758812, - "loss_total": 0.250285804271698, - "step": 299799 - }, - { - "epoch": 0.023598, - "loss_gen": 6.023898601531982, - "loss_rtd": 0.2052106410264969, - "loss_sent": 0.28852471709251404, - "loss_sod": 0.011790897697210312, - "loss_total": 0.5055262446403503, - "step": 299799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.8971786499023438, - "learning_rate": 9.774337825548563e-06, - "loss": 0.4175, - "step": 299800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.704233646392822, - "loss_rtd": 0.22727744281291962, - "loss_sent": 0.12884476780891418, - "loss_sod": 0.04108048230409622, - "loss_total": 0.3972027003765106, - "step": 299899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.376214981079102, - "loss_rtd": 0.17768554389476776, - "loss_sent": 0.033320698887109756, - "loss_sod": 0.008416893891990185, - "loss_total": 0.21942313015460968, - "step": 299899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.7222482562065125, - "learning_rate": 9.75549844416782e-06, - "loss": 0.4323, - "step": 299900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.739912986755371, - "loss_rtd": 0.21299508213996887, - "loss_sent": 0.3322557210922241, - "loss_sod": 0.013067997992038727, - "loss_total": 0.5583187937736511, - "step": 299999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.824176788330078, - "loss_rtd": 0.2032489776611328, - "loss_sent": 0.3767026364803314, - "loss_sod": 0.02217734232544899, - "loss_total": 0.6021289825439453, - "step": 299999 - }, - { - "epoch": 0.024, - "grad_norm": 2.0592379570007324, - "learning_rate": 9.736675273267487e-06, - "loss": 0.4224, - "step": 300000 - }, - { - "epoch": 0.024, - "eval_loss": 0.40290775895118713, - "eval_runtime": 151.5169, - "eval_samples_per_second": 101.923, - "eval_steps_per_second": 0.799, - "step": 300000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.842031478881836, - "loss_rtd": 0.20937180519104004, - "loss_sent": 0.2385185956954956, - "loss_sod": 0.021150756627321243, - "loss_total": 0.4690411686897278, - "step": 300099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.577695369720459, - "loss_rtd": 0.22668272256851196, - "loss_sent": 0.14812883734703064, - "loss_sod": 0.004138198681175709, - "loss_total": 0.37894976139068604, - "step": 300099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.4563050270080566, - "learning_rate": 9.717868320429541e-06, - "loss": 0.4364, - "step": 300100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.9344964027404785, - "loss_rtd": 0.19561925530433655, - "loss_sent": 0.045883551239967346, - "loss_sod": 0.09183825552463531, - "loss_total": 0.3333410620689392, - "step": 300199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.7659478187561035, - "loss_rtd": 0.21479427814483643, - "loss_sent": 0.2246863692998886, - "loss_sod": 0.028010647743940353, - "loss_total": 0.4674912989139557, - "step": 300199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9620458483695984, - "learning_rate": 9.699077593229434e-06, - "loss": 0.4158, - "step": 300200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.711204528808594, - "loss_rtd": 0.2187155783176422, - "loss_sent": 0.34310054779052734, - "loss_sod": 0.04240068420767784, - "loss_total": 0.6042168140411377, - "step": 300299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.713837623596191, - "loss_rtd": 0.2238655835390091, - "loss_sent": 0.13969789445400238, - "loss_sod": 0.05623272806406021, - "loss_total": 0.4197962284088135, - "step": 300299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.2001155614852905, - "learning_rate": 9.680303099236031e-06, - "loss": 0.4134, - "step": 300300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.767083644866943, - "loss_rtd": 0.20476669073104858, - "loss_sent": 0.1843688040971756, - "loss_sod": 0.05684830993413925, - "loss_total": 0.4459838271141052, - "step": 300399 - }, - { - "epoch": 0.024798, - "loss_gen": 6.067483901977539, - "loss_rtd": 0.20688337087631226, - "loss_sent": 0.2843725383281708, - "loss_sod": 0.05607297271490097, - "loss_total": 0.5473288893699646, - "step": 300399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.3059539794921875, - "learning_rate": 9.661544846011728e-06, - "loss": 0.4266, - "step": 300400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.937251091003418, - "loss_rtd": 0.2108709067106247, - "loss_sent": 0.14311131834983826, - "loss_sod": 0.12280671298503876, - "loss_total": 0.4767889380455017, - "step": 300499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.702668190002441, - "loss_rtd": 0.21346786618232727, - "loss_sent": 0.07040661573410034, - "loss_sod": 0.06024638190865517, - "loss_total": 0.3441208600997925, - "step": 300499 - }, - { - "epoch": 0.025, - "grad_norm": 1.0034197568893433, - "learning_rate": 9.642802841112347e-06, - "loss": 0.4303, - "step": 300500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.861401081085205, - "loss_rtd": 0.18989452719688416, - "loss_sent": 0.19715692102909088, - "loss_sod": 0.03241632133722305, - "loss_total": 0.4194677472114563, - "step": 300599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.596365928649902, - "loss_rtd": 0.21305887401103973, - "loss_sent": 0.06802504509687424, - "loss_sod": 0.02314567193388939, - "loss_total": 0.30422958731651306, - "step": 300599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.781015157699585, - "learning_rate": 9.624077092087142e-06, - "loss": 0.4143, - "step": 300600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.217522144317627, - "loss_rtd": 0.15774831175804138, - "loss_sent": 2.5797296984819695e-05, - "loss_sod": 0.02641209587454796, - "loss_total": 0.1841862052679062, - "step": 300699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.510445594787598, - "loss_rtd": 0.18725579977035522, - "loss_sent": 0.07301981002092361, - "loss_sod": 0.1081763505935669, - "loss_total": 0.36845195293426514, - "step": 300699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.9929467439651489, - "learning_rate": 9.605367606478854e-06, - "loss": 0.419, - "step": 300700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.955038070678711, - "loss_rtd": 0.21208456158638, - "loss_sent": 0.22477084398269653, - "loss_sod": 0.036805808544158936, - "loss_total": 0.4736612141132355, - "step": 300799 - }, - { - "epoch": 0.025598, - "loss_gen": 6.133488178253174, - "loss_rtd": 0.21041129529476166, - "loss_sent": 0.4313626289367676, - "loss_sod": 0.024549473077058792, - "loss_total": 0.6663234233856201, - "step": 300799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.38124680519104, - "learning_rate": 9.586674391823663e-06, - "loss": 0.4271, - "step": 300800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.827110290527344, - "loss_rtd": 0.22941620647907257, - "loss_sent": 0.1237158551812172, - "loss_sod": 0.05527558550238609, - "loss_total": 0.40840762853622437, - "step": 300899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.892513751983643, - "loss_rtd": 0.2249341607093811, - "loss_sent": 0.1240287646651268, - "loss_sod": 0.036415159702301025, - "loss_total": 0.3853780925273895, - "step": 300899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.31509268283844, - "learning_rate": 9.567997455651212e-06, - "loss": 0.4322, - "step": 300900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.883686542510986, - "loss_rtd": 0.21076661348342896, - "loss_sent": 0.20811495184898376, - "loss_sod": 0.03484820947051048, - "loss_total": 0.4537297785282135, - "step": 300999 - }, - { - "epoch": 0.025998, - "loss_gen": 6.016758918762207, - "loss_rtd": 0.24303993582725525, - "loss_sent": 0.12013135850429535, - "loss_sod": 0.006829577032476664, - "loss_total": 0.3700008690357208, - "step": 300999 - }, - { - "epoch": 0.026, - "grad_norm": 1.149437665939331, - "learning_rate": 9.549336805484531e-06, - "loss": 0.4247, - "step": 301000 - }, - { - "epoch": 0.026, - "eval_loss": 0.3994949460029602, - "eval_runtime": 151.57, - "eval_samples_per_second": 101.887, - "eval_steps_per_second": 0.798, - "step": 301000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.778402328491211, - "loss_rtd": 0.20204311609268188, - "loss_sent": 0.19165849685668945, - "loss_sod": 0.01693936064839363, - "loss_total": 0.4106409549713135, - "step": 301099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.913434982299805, - "loss_rtd": 0.21450239419937134, - "loss_sent": 0.33072736859321594, - "loss_sod": 0.04181693121790886, - "loss_total": 0.5870466828346252, - "step": 301099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.1853196620941162, - "learning_rate": 9.53069244884015e-06, - "loss": 0.413, - "step": 301100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.8812713623046875, - "loss_rtd": 0.2307126373052597, - "loss_sent": 0.2065943330526352, - "loss_sod": 0.022692713886499405, - "loss_total": 0.459999680519104, - "step": 301199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.574290752410889, - "loss_rtd": 0.23668262362480164, - "loss_sent": 0.11356791108846664, - "loss_sod": 0.028580449521541595, - "loss_total": 0.37883099913597107, - "step": 301199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.6476990580558777, - "learning_rate": 9.512064393228015e-06, - "loss": 0.4067, - "step": 301200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.341989517211914, - "loss_rtd": 0.16177833080291748, - "loss_sent": 2.711415072553791e-05, - "loss_sod": 0.08934150636196136, - "loss_total": 0.25114697217941284, - "step": 301299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.641251087188721, - "loss_rtd": 0.1875661462545395, - "loss_sent": 0.11462666094303131, - "loss_sod": 0.1264946311712265, - "loss_total": 0.4286874532699585, - "step": 301299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.1820822954177856, - "learning_rate": 9.493452646151506e-06, - "loss": 0.4319, - "step": 301300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.6729512214660645, - "loss_rtd": 0.20079460740089417, - "loss_sent": 0.0888160839676857, - "loss_sod": 0.04933574050664902, - "loss_total": 0.3389464318752289, - "step": 301399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.719766139984131, - "loss_rtd": 0.21247261762619019, - "loss_sent": 0.14630696177482605, - "loss_sod": 0.009837578982114792, - "loss_total": 0.3686171770095825, - "step": 301399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.0473250150680542, - "learning_rate": 9.474857215107419e-06, - "loss": 0.4369, - "step": 301400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.690413475036621, - "loss_rtd": 0.19615985453128815, - "loss_sent": 0.07599996030330658, - "loss_sod": 0.1089341789484024, - "loss_total": 0.38109397888183594, - "step": 301499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.081170082092285, - "loss_rtd": 0.16104573011398315, - "loss_sent": 0.0011152346851304173, - "loss_sod": 0.044768575578927994, - "loss_total": 0.2069295346736908, - "step": 301499 - }, - { - "epoch": 0.027, - "grad_norm": 1.1119778156280518, - "learning_rate": 9.456278107585998e-06, - "loss": 0.4403, - "step": 301500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.589986801147461, - "loss_rtd": 0.1943880319595337, - "loss_sent": 0.004554521758109331, - "loss_sod": 0.1489199846982956, - "loss_total": 0.3478625416755676, - "step": 301599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.281999111175537, - "loss_rtd": 0.17661824822425842, - "loss_sent": 0.005106969270855188, - "loss_sod": 0.039718326181173325, - "loss_total": 0.2214435487985611, - "step": 301599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.9654894471168518, - "learning_rate": 9.437715331070907e-06, - "loss": 0.4415, - "step": 301600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.765803813934326, - "loss_rtd": 0.22063113749027252, - "loss_sent": 0.11184588819742203, - "loss_sod": 0.004072606097906828, - "loss_total": 0.33654963970184326, - "step": 301699 - }, - { - "epoch": 0.027398, - "loss_gen": 6.429196357727051, - "loss_rtd": 0.22575436532497406, - "loss_sent": 0.0653010904788971, - "loss_sod": 0.07813893258571625, - "loss_total": 0.3691943883895874, - "step": 301699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.9896982908248901, - "learning_rate": 9.419168893039242e-06, - "loss": 0.4175, - "step": 301700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.694515228271484, - "loss_rtd": 0.2080574482679367, - "loss_sent": 0.06674934178590775, - "loss_sod": 0.08304727077484131, - "loss_total": 0.35785406827926636, - "step": 301799 - }, - { - "epoch": 0.027598, - "loss_gen": 6.014454364776611, - "loss_rtd": 0.19791358709335327, - "loss_sent": 0.18025633692741394, - "loss_sod": 0.06847569346427917, - "loss_total": 0.4466456174850464, - "step": 301799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8307927846908569, - "learning_rate": 9.400638800961487e-06, - "loss": 0.4191, - "step": 301800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.827275276184082, - "loss_rtd": 0.2270171046257019, - "loss_sent": 0.39101526141166687, - "loss_sod": 0.03082764521241188, - "loss_total": 0.648859977722168, - "step": 301899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.811234474182129, - "loss_rtd": 0.21190690994262695, - "loss_sent": 0.14939238131046295, - "loss_sod": 0.0018225417006760836, - "loss_total": 0.3631218373775482, - "step": 301899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.2130917310714722, - "learning_rate": 9.382125062301562e-06, - "loss": 0.4287, - "step": 301900 - }, - { - "epoch": 0.027998, - "loss_gen": 6.002170562744141, - "loss_rtd": 0.22525110840797424, - "loss_sent": 0.256336510181427, - "loss_sod": 0.07571940869092941, - "loss_total": 0.5573070049285889, - "step": 301999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.7604594230651855, - "loss_rtd": 0.21847416460514069, - "loss_sent": 0.1460372358560562, - "loss_sod": 0.06556612998247147, - "loss_total": 0.43007755279541016, - "step": 301999 - }, - { - "epoch": 0.028, - "grad_norm": 1.1580942869186401, - "learning_rate": 9.363627684516818e-06, - "loss": 0.4306, - "step": 302000 - }, - { - "epoch": 0.028, - "eval_loss": 0.4082627594470978, - "eval_runtime": 151.8601, - "eval_samples_per_second": 101.692, - "eval_steps_per_second": 0.797, - "step": 302000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.524214744567871, - "loss_rtd": 0.24167396128177643, - "loss_sent": 0.16886498034000397, - "loss_sod": 0.024528495967388153, - "loss_total": 0.43506741523742676, - "step": 302099 - }, - { - "epoch": 0.028198, - "loss_gen": 6.628436088562012, - "loss_rtd": 0.206573948264122, - "loss_sent": 0.064475879073143, - "loss_sod": 0.3051353693008423, - "loss_total": 0.5761852264404297, - "step": 302099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.5883597135543823, - "learning_rate": 9.34514667505797e-06, - "loss": 0.435, - "step": 302100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.717016220092773, - "loss_rtd": 0.21950861811637878, - "loss_sent": 0.4334104359149933, - "loss_sod": 0.01246470957994461, - "loss_total": 0.6653837561607361, - "step": 302199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.838071346282959, - "loss_rtd": 0.19945593178272247, - "loss_sent": 0.22899119555950165, - "loss_sod": 0.014160841703414917, - "loss_total": 0.44260796904563904, - "step": 302199 - }, - { - "epoch": 0.0284, - "grad_norm": 0.9990032315254211, - "learning_rate": 9.326682041369178e-06, - "loss": 0.4218, - "step": 302200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.844183921813965, - "loss_rtd": 0.2198197990655899, - "loss_sent": 0.4132113754749298, - "loss_sod": 0.10050918161869049, - "loss_total": 0.7335403561592102, - "step": 302299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.786034107208252, - "loss_rtd": 0.2089647352695465, - "loss_sent": 0.3188110589981079, - "loss_sod": 0.04004322737455368, - "loss_total": 0.5678189992904663, - "step": 302299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.4748289585113525, - "learning_rate": 9.308233790887999e-06, - "loss": 0.4207, - "step": 302300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.850669860839844, - "loss_rtd": 0.21573230624198914, - "loss_sent": 0.11724942922592163, - "loss_sod": 0.046550020575523376, - "loss_total": 0.37953174114227295, - "step": 302399 - }, - { - "epoch": 0.028798, - "loss_gen": 6.027736663818359, - "loss_rtd": 0.21444594860076904, - "loss_sent": 0.09546088427305222, - "loss_sod": 0.05568915605545044, - "loss_total": 0.3655959963798523, - "step": 302399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.1638667583465576, - "learning_rate": 9.289801931045395e-06, - "loss": 0.4279, - "step": 302400 - }, - { - "epoch": 0.028998, - "loss_gen": 6.195638179779053, - "loss_rtd": 0.21306723356246948, - "loss_sent": 0.37989112734794617, - "loss_sod": 0.10237512737512589, - "loss_total": 0.6953334808349609, - "step": 302499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.978787899017334, - "loss_rtd": 0.20936015248298645, - "loss_sent": 0.049700818955898285, - "loss_sod": 0.08687801659107208, - "loss_total": 0.3459390103816986, - "step": 302499 - }, - { - "epoch": 0.029, - "grad_norm": 1.337684154510498, - "learning_rate": 9.271386469265691e-06, - "loss": 0.4428, - "step": 302500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.901731967926025, - "loss_rtd": 0.21487298607826233, - "loss_sent": 0.11970613151788712, - "loss_sod": 0.005755370482802391, - "loss_total": 0.3403344750404358, - "step": 302599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.754985809326172, - "loss_rtd": 0.19078297913074493, - "loss_sent": 0.12921775877475739, - "loss_sod": 0.07635924220085144, - "loss_total": 0.39635998010635376, - "step": 302599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.9090731143951416, - "learning_rate": 9.252987412966647e-06, - "loss": 0.4289, - "step": 302600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.673803806304932, - "loss_rtd": 0.23954981565475464, - "loss_sent": 0.1688096523284912, - "loss_sod": 0.010437065735459328, - "loss_total": 0.4187965393066406, - "step": 302699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.75961446762085, - "loss_rtd": 0.20999027788639069, - "loss_sent": 0.2323492169380188, - "loss_sod": 0.019814923405647278, - "loss_total": 0.46215441823005676, - "step": 302699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.8130232691764832, - "learning_rate": 9.234604769559401e-06, - "loss": 0.4079, - "step": 302700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.370748043060303, - "loss_rtd": 0.19760359823703766, - "loss_sent": 0.02775844745337963, - "loss_sod": 0.04738260433077812, - "loss_total": 0.27274465560913086, - "step": 302799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.298427581787109, - "loss_rtd": 0.19612470269203186, - "loss_sent": 0.0008724250365048647, - "loss_sod": 0.12282057851552963, - "loss_total": 0.3198177218437195, - "step": 302799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.1167999505996704, - "learning_rate": 9.216238546448492e-06, - "loss": 0.433, - "step": 302800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.976594924926758, - "loss_rtd": 0.21692176163196564, - "loss_sent": 0.15063509345054626, - "loss_sod": 0.015588534064590931, - "loss_total": 0.38314539194107056, - "step": 302899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.5621418952941895, - "loss_rtd": 0.2005169689655304, - "loss_sent": 0.3393249213695526, - "loss_sod": 0.02127825655043125, - "loss_total": 0.5611201524734497, - "step": 302899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.424239993095398, - "learning_rate": 9.197888751031803e-06, - "loss": 0.4257, - "step": 302900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.912893295288086, - "loss_rtd": 0.21181289851665497, - "loss_sent": 0.0844845324754715, - "loss_sod": 0.0034042359329760075, - "loss_total": 0.29970166087150574, - "step": 302999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.859111309051514, - "loss_rtd": 0.20496448874473572, - "loss_sent": 0.03563971444964409, - "loss_sod": 0.17539413273334503, - "loss_total": 0.41599833965301514, - "step": 302999 - }, - { - "epoch": 0.03, - "grad_norm": 0.7764007449150085, - "learning_rate": 9.17955539070065e-06, - "loss": 0.4259, - "step": 303000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4112508296966553, - "eval_runtime": 151.5863, - "eval_samples_per_second": 101.876, - "eval_steps_per_second": 0.798, - "step": 303000 - }, - { - "epoch": 0.000198, - "loss_gen": 6.050841808319092, - "loss_rtd": 0.2241448163986206, - "loss_sent": 0.12831345200538635, - "loss_sod": 0.07911839336156845, - "loss_total": 0.431576669216156, - "step": 303099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.845432758331299, - "loss_rtd": 0.22832554578781128, - "loss_sent": 0.2352820485830307, - "loss_sod": 0.0187496617436409, - "loss_total": 0.4823572635650635, - "step": 303099 - }, - { - "epoch": 0.0002, - "grad_norm": 0.9500147700309753, - "learning_rate": 9.1612384728397e-06, - "loss": 0.4208, - "step": 303100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.8878021240234375, - "loss_rtd": 0.20205999910831451, - "loss_sent": 0.2727389633655548, - "loss_sod": 0.012358536943793297, - "loss_total": 0.4871574938297272, - "step": 303199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.56168794631958, - "loss_rtd": 0.19289152324199677, - "loss_sent": 0.13245266675949097, - "loss_sod": 0.01741240732371807, - "loss_total": 0.34275659918785095, - "step": 303199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.6196224689483643, - "learning_rate": 9.142938004827023e-06, - "loss": 0.4093, - "step": 303200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.765680313110352, - "loss_rtd": 0.2056213766336441, - "loss_sent": 0.2856610119342804, - "loss_sod": 0.002769284648820758, - "loss_total": 0.4940516948699951, - "step": 303299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.84200382232666, - "loss_rtd": 0.21612456440925598, - "loss_sent": 0.1716960370540619, - "loss_sod": 0.04607880860567093, - "loss_total": 0.4338994026184082, - "step": 303299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.8032750487327576, - "learning_rate": 9.124653994034022e-06, - "loss": 0.4204, - "step": 303300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.454245567321777, - "loss_rtd": 0.17996229231357574, - "loss_sent": 0.023502692580223083, - "loss_sod": 0.10512920469045639, - "loss_total": 0.3085941672325134, - "step": 303399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.909790992736816, - "loss_rtd": 0.2325899749994278, - "loss_sent": 0.40775030851364136, - "loss_sod": 0.013750200159847736, - "loss_total": 0.6540904641151428, - "step": 303399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.775709629058838, - "learning_rate": 9.106386447825499e-06, - "loss": 0.4226, - "step": 303400 - }, - { - "epoch": 0.000998, - "loss_gen": 6.396899223327637, - "loss_rtd": 0.22315895557403564, - "loss_sent": 0.3802189826965332, - "loss_sod": 0.08616264164447784, - "loss_total": 0.6895405650138855, - "step": 303499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.647336959838867, - "loss_rtd": 0.1995159238576889, - "loss_sent": 0.07161133736371994, - "loss_sod": 0.047437883913517, - "loss_total": 0.31856516003608704, - "step": 303499 - }, - { - "epoch": 0.001, - "grad_norm": 0.9866959452629089, - "learning_rate": 9.088135373559642e-06, - "loss": 0.4158, - "step": 303500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.747816562652588, - "loss_rtd": 0.2257116734981537, - "loss_sent": 0.09745519608259201, - "loss_sod": 0.050352804362773895, - "loss_total": 0.3735196888446808, - "step": 303599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.646799087524414, - "loss_rtd": 0.21798551082611084, - "loss_sent": 0.35812464356422424, - "loss_sod": 0.008219663053750992, - "loss_total": 0.5843298435211182, - "step": 303599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.3834220170974731, - "learning_rate": 9.069900778587948e-06, - "loss": 0.432, - "step": 303600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.22725248336792, - "loss_rtd": 0.18125180900096893, - "loss_sent": 0.0008502436685375869, - "loss_sod": 0.12379438430070877, - "loss_total": 0.30589643120765686, - "step": 303699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.671199321746826, - "loss_rtd": 0.1780216246843338, - "loss_sent": 0.11826789379119873, - "loss_sod": 0.10215851664543152, - "loss_total": 0.39844805002212524, - "step": 303699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.1688143014907837, - "learning_rate": 9.05168267025534e-06, - "loss": 0.4289, - "step": 303700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.7385573387146, - "loss_rtd": 0.194263756275177, - "loss_sent": 0.13795211911201477, - "loss_sod": 0.008502896875143051, - "loss_total": 0.3407187759876251, - "step": 303799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.806217670440674, - "loss_rtd": 0.21256524324417114, - "loss_sent": 0.2162608504295349, - "loss_sod": 0.007465574890375137, - "loss_total": 0.4362916648387909, - "step": 303799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.5026214122772217, - "learning_rate": 9.03348105590004e-06, - "loss": 0.4326, - "step": 303800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.081581115722656, - "loss_rtd": 0.17530463635921478, - "loss_sent": 2.538939588703215e-05, - "loss_sod": 0.09611823409795761, - "loss_total": 0.2714482545852661, - "step": 303899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.63226318359375, - "loss_rtd": 0.19327083230018616, - "loss_sent": 0.1276666522026062, - "loss_sod": 0.018620138987898827, - "loss_total": 0.33955761790275574, - "step": 303899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.214566707611084, - "learning_rate": 9.015295942853674e-06, - "loss": 0.4243, - "step": 303900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.632124423980713, - "loss_rtd": 0.17851640284061432, - "loss_sent": 0.05572657287120819, - "loss_sod": 0.06375811249017715, - "loss_total": 0.29800111055374146, - "step": 303999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.911339282989502, - "loss_rtd": 0.20451124012470245, - "loss_sent": 0.14055180549621582, - "loss_sod": 0.03734595701098442, - "loss_total": 0.382409006357193, - "step": 303999 - }, - { - "epoch": 0.002, - "grad_norm": 0.7258864045143127, - "learning_rate": 8.997127338441214e-06, - "loss": 0.4089, - "step": 304000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4066995084285736, - "eval_runtime": 154.1938, - "eval_samples_per_second": 100.153, - "eval_steps_per_second": 0.785, - "step": 304000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.176668643951416, - "loss_rtd": 0.17963097989559174, - "loss_sent": 0.1344403773546219, - "loss_sod": 0.015325573273003101, - "loss_total": 0.32939693331718445, - "step": 304099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.764191627502441, - "loss_rtd": 0.21740901470184326, - "loss_sent": 0.2713092565536499, - "loss_sod": 0.02650246024131775, - "loss_total": 0.5152207612991333, - "step": 304099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.444466233253479, - "learning_rate": 8.978975249980947e-06, - "loss": 0.4087, - "step": 304100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.423428058624268, - "loss_rtd": 0.16987082362174988, - "loss_sent": 0.03858550265431404, - "loss_sod": 0.011892799288034439, - "loss_total": 0.22034911811351776, - "step": 304199 - }, - { - "epoch": 0.002398, - "loss_gen": 6.461963653564453, - "loss_rtd": 0.2220069319009781, - "loss_sent": 0.12931038439273834, - "loss_sod": 0.06686516851186752, - "loss_total": 0.41818249225616455, - "step": 304199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.2428261041641235, - "learning_rate": 8.960839684784539e-06, - "loss": 0.4055, - "step": 304200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.889702796936035, - "loss_rtd": 0.21018104255199432, - "loss_sent": 0.27102744579315186, - "loss_sod": 0.05837943032383919, - "loss_total": 0.5395879149436951, - "step": 304299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.725460529327393, - "loss_rtd": 0.21940413117408752, - "loss_sent": 0.21741972863674164, - "loss_sod": 0.014049429446458817, - "loss_total": 0.4508732855319977, - "step": 304299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.1784645318984985, - "learning_rate": 8.942720650157004e-06, - "loss": 0.4199, - "step": 304300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.978394508361816, - "loss_rtd": 0.2223317176103592, - "loss_sent": 0.10638202726840973, - "loss_sod": 0.0260927714407444, - "loss_total": 0.354806512594223, - "step": 304399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.75954008102417, - "loss_rtd": 0.206836998462677, - "loss_sent": 0.1952584683895111, - "loss_sod": 0.01332041248679161, - "loss_total": 0.41541588306427, - "step": 304399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.024192214012146, - "learning_rate": 8.924618153396691e-06, - "loss": 0.4291, - "step": 304400 - }, - { - "epoch": 0.002998, - "loss_gen": 6.017472267150879, - "loss_rtd": 0.20987379550933838, - "loss_sent": 0.33753156661987305, - "loss_sod": 0.08601886034011841, - "loss_total": 0.6334242224693298, - "step": 304499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.853302001953125, - "loss_rtd": 0.19905000925064087, - "loss_sent": 0.12959928810596466, - "loss_sod": 0.04448812082409859, - "loss_total": 0.3731374144554138, - "step": 304499 - }, - { - "epoch": 0.003, - "grad_norm": 1.1366268396377563, - "learning_rate": 8.906532201795258e-06, - "loss": 0.427, - "step": 304500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.765444755554199, - "loss_rtd": 0.21887369453907013, - "loss_sent": 0.10184040665626526, - "loss_sod": 0.0014352030120790005, - "loss_total": 0.3221493065357208, - "step": 304599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.891476631164551, - "loss_rtd": 0.22070364654064178, - "loss_sent": 0.1429951786994934, - "loss_sod": 0.012814337387681007, - "loss_total": 0.37651318311691284, - "step": 304599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.5514282584190369, - "learning_rate": 8.888462802637747e-06, - "loss": 0.4148, - "step": 304600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.945662975311279, - "loss_rtd": 0.1951703280210495, - "loss_sent": 0.18956737220287323, - "loss_sod": 0.006553894840180874, - "loss_total": 0.3912915885448456, - "step": 304699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.197839736938477, - "loss_rtd": 0.15526467561721802, - "loss_sent": 2.5514187655062415e-05, - "loss_sod": 0.14672408998012543, - "loss_total": 0.3020142912864685, - "step": 304699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.3620028495788574, - "learning_rate": 8.870409963202498e-06, - "loss": 0.4273, - "step": 304700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.761125564575195, - "loss_rtd": 0.21085788309574127, - "loss_sent": 0.16229256987571716, - "loss_sod": 0.012492502108216286, - "loss_total": 0.385642945766449, - "step": 304799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.987504959106445, - "loss_rtd": 0.21323375403881073, - "loss_sent": 0.22197028994560242, - "loss_sod": 0.056242480874061584, - "loss_total": 0.49144652485847473, - "step": 304799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.070345401763916, - "learning_rate": 8.852373690761213e-06, - "loss": 0.4143, - "step": 304800 - }, - { - "epoch": 0.003798, - "loss_gen": 6.215188980102539, - "loss_rtd": 0.22704657912254333, - "loss_sent": 0.7372082471847534, - "loss_sod": 0.037779513746500015, - "loss_total": 1.002034306526184, - "step": 304899 - }, - { - "epoch": 0.003798, - "loss_gen": 6.315957069396973, - "loss_rtd": 0.2315492331981659, - "loss_sent": 0.12474516779184341, - "loss_sod": 0.039301835000514984, - "loss_total": 0.3955962359905243, - "step": 304899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.9579159021377563, - "learning_rate": 8.834353992578864e-06, - "loss": 0.4151, - "step": 304900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.721354961395264, - "loss_rtd": 0.20183220505714417, - "loss_sent": 0.026712244376540184, - "loss_sod": 0.019418183714151382, - "loss_total": 0.24796262383460999, - "step": 304999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.389822006225586, - "loss_rtd": 0.1734730303287506, - "loss_sent": 0.03372808173298836, - "loss_sod": 0.02425766922533512, - "loss_total": 0.23145878314971924, - "step": 304999 - }, - { - "epoch": 0.004, - "grad_norm": 0.6036795973777771, - "learning_rate": 8.816350875913809e-06, - "loss": 0.4372, - "step": 305000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4023120403289795, - "eval_runtime": 150.9107, - "eval_samples_per_second": 102.332, - "eval_steps_per_second": 0.802, - "step": 305000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.973085403442383, - "loss_rtd": 0.21758605539798737, - "loss_sent": 0.09473367780447006, - "loss_sod": 0.017782317474484444, - "loss_total": 0.3301020562648773, - "step": 305099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.739782333374023, - "loss_rtd": 0.1868346631526947, - "loss_sent": 0.06093979254364967, - "loss_sod": 0.11810218542814255, - "loss_total": 0.3658766448497772, - "step": 305099 - }, - { - "epoch": 0.0042, - "grad_norm": 0.9413608908653259, - "learning_rate": 8.798364348017712e-06, - "loss": 0.4327, - "step": 305100 - }, - { - "epoch": 0.004398, - "loss_gen": 6.132180690765381, - "loss_rtd": 0.2076473832130432, - "loss_sent": 0.10466967523097992, - "loss_sod": 0.04797498136758804, - "loss_total": 0.3602920174598694, - "step": 305199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.889447212219238, - "loss_rtd": 0.21890375018119812, - "loss_sent": 0.40674641728401184, - "loss_sod": 0.020622648298740387, - "loss_total": 0.6462727785110474, - "step": 305199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.126071572303772, - "learning_rate": 8.780394416135512e-06, - "loss": 0.4266, - "step": 305200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.813969612121582, - "loss_rtd": 0.2175520807504654, - "loss_sent": 0.3686645030975342, - "loss_sod": 0.06446811556816101, - "loss_total": 0.6506847143173218, - "step": 305299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.83366584777832, - "loss_rtd": 0.20576824247837067, - "loss_sent": 0.15161006152629852, - "loss_sod": 0.07579024136066437, - "loss_total": 0.43316853046417236, - "step": 305299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.6219701766967773, - "learning_rate": 8.762441087505513e-06, - "loss": 0.4226, - "step": 305300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.857290744781494, - "loss_rtd": 0.22079241275787354, - "loss_sent": 0.2261514961719513, - "loss_sod": 0.05775216221809387, - "loss_total": 0.5046960711479187, - "step": 305399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.9695587158203125, - "loss_rtd": 0.22768771648406982, - "loss_sent": 0.1626943200826645, - "loss_sod": 0.03034215047955513, - "loss_total": 0.42072421312332153, - "step": 305399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.2125617265701294, - "learning_rate": 8.744504369359313e-06, - "loss": 0.4207, - "step": 305400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.4371018409729, - "loss_rtd": 0.1744883507490158, - "loss_sent": 0.03116326406598091, - "loss_sod": 0.0531020350754261, - "loss_total": 0.2587536573410034, - "step": 305499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.606621265411377, - "loss_rtd": 0.1650807112455368, - "loss_sent": 0.04597359150648117, - "loss_sod": 0.17810678482055664, - "loss_total": 0.3891611099243164, - "step": 305499 - }, - { - "epoch": 0.005, - "grad_norm": 1.2446157932281494, - "learning_rate": 8.726584268921827e-06, - "loss": 0.4172, - "step": 305500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.588064670562744, - "loss_rtd": 0.19660793244838715, - "loss_sent": 0.007900391705334187, - "loss_sod": 0.07882022112607956, - "loss_total": 0.2833285331726074, - "step": 305599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.797532558441162, - "loss_rtd": 0.19245637953281403, - "loss_sent": 0.3430267572402954, - "loss_sod": 0.0847749263048172, - "loss_total": 0.620258092880249, - "step": 305599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.202588438987732, - "learning_rate": 8.708680793411256e-06, - "loss": 0.428, - "step": 305600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.728855133056641, - "loss_rtd": 0.21642325818538666, - "loss_sent": 0.18111597001552582, - "loss_sod": 0.03321612998843193, - "loss_total": 0.4307553768157959, - "step": 305699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.846833229064941, - "loss_rtd": 0.2047511637210846, - "loss_sent": 0.27941465377807617, - "loss_sod": 0.06933329254388809, - "loss_total": 0.5534991025924683, - "step": 305699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.4724195003509521, - "learning_rate": 8.690793950039122e-06, - "loss": 0.4311, - "step": 305700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.72415828704834, - "loss_rtd": 0.2253650426864624, - "loss_sent": 0.2744627892971039, - "loss_sod": 0.016727153211832047, - "loss_total": 0.5165549516677856, - "step": 305799 - }, - { - "epoch": 0.005598, - "loss_gen": 6.1052350997924805, - "loss_rtd": 0.21438582241535187, - "loss_sent": 0.3632456064224243, - "loss_sod": 0.02788006141781807, - "loss_total": 0.605511486530304, - "step": 305799 - }, - { - "epoch": 0.0056, - "grad_norm": 2.0527873039245605, - "learning_rate": 8.672923746010242e-06, - "loss": 0.4223, - "step": 305800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.081293106079102, - "loss_rtd": 0.2098085582256317, - "loss_sent": 0.45756104588508606, - "loss_sod": 0.06941455602645874, - "loss_total": 0.7367841601371765, - "step": 305899 - }, - { - "epoch": 0.005798, - "loss_gen": 6.042714595794678, - "loss_rtd": 0.20942792296409607, - "loss_sent": 0.322293758392334, - "loss_sod": 0.14053134620189667, - "loss_total": 0.6722530126571655, - "step": 305899 - }, - { - "epoch": 0.0058, - "grad_norm": 2.0109331607818604, - "learning_rate": 8.655070188522752e-06, - "loss": 0.4232, - "step": 305900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.819046974182129, - "loss_rtd": 0.1984071135520935, - "loss_sent": 0.19649410247802734, - "loss_sod": 0.10485399514436722, - "loss_total": 0.49975520372390747, - "step": 305999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.798454761505127, - "loss_rtd": 0.21941687166690826, - "loss_sent": 0.30555278062820435, - "loss_sod": 0.06277532875537872, - "loss_total": 0.587744951248169, - "step": 305999 - }, - { - "epoch": 0.006, - "grad_norm": 1.1517330408096313, - "learning_rate": 8.637233284768026e-06, - "loss": 0.42, - "step": 306000 - }, - { - "epoch": 0.006, - "eval_loss": 0.4086720943450928, - "eval_runtime": 150.8522, - "eval_samples_per_second": 102.372, - "eval_steps_per_second": 0.802, - "step": 306000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.253348350524902, - "loss_rtd": 0.1574322134256363, - "loss_sent": 0.010820952244102955, - "loss_sod": 0.09293248504400253, - "loss_total": 0.2611856460571289, - "step": 306099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.309996604919434, - "loss_rtd": 0.1717710942029953, - "loss_sent": 0.0022359779104590416, - "loss_sod": 0.049111489206552505, - "loss_total": 0.22311855852603912, - "step": 306099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.753007709980011, - "learning_rate": 8.61941304193079e-06, - "loss": 0.4214, - "step": 306100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.302216529846191, - "loss_rtd": 0.1714804768562317, - "loss_sent": 0.00319567765109241, - "loss_sod": 0.10349278897047043, - "loss_total": 0.2781689465045929, - "step": 306199 - }, - { - "epoch": 0.006398, - "loss_gen": 6.137814521789551, - "loss_rtd": 0.20129691064357758, - "loss_sent": 0.11549151688814163, - "loss_sod": 0.021362487226724625, - "loss_total": 0.33815091848373413, - "step": 306199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.9512887597084045, - "learning_rate": 8.601609467189037e-06, - "loss": 0.4173, - "step": 306200 - }, - { - "epoch": 0.006598, - "loss_gen": 6.000729560852051, - "loss_rtd": 0.2171856015920639, - "loss_sent": 0.09850183129310608, - "loss_sod": 0.05850699171423912, - "loss_total": 0.3741944134235382, - "step": 306299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.706486701965332, - "loss_rtd": 0.22972096502780914, - "loss_sent": 0.1362563967704773, - "loss_sod": 0.0803494080901146, - "loss_total": 0.44632676243782043, - "step": 306299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.6421685218811035, - "learning_rate": 8.583822567714045e-06, - "loss": 0.4413, - "step": 306300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.928966522216797, - "loss_rtd": 0.20692603290081024, - "loss_sent": 0.3748835623264313, - "loss_sod": 0.013958632946014404, - "loss_total": 0.5957682132720947, - "step": 306399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.421674728393555, - "loss_rtd": 0.1743885576725006, - "loss_sent": 0.006461540702730417, - "loss_sod": 0.03082374669611454, - "loss_total": 0.21167385578155518, - "step": 306399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.5194551944732666, - "learning_rate": 8.566052350670362e-06, - "loss": 0.427, - "step": 306400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.7855939865112305, - "loss_rtd": 0.20919832587242126, - "loss_sent": 0.26255521178245544, - "loss_sod": 0.04820305109024048, - "loss_total": 0.5199565887451172, - "step": 306499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.920647144317627, - "loss_rtd": 0.19213224947452545, - "loss_sent": 0.16192471981048584, - "loss_sod": 0.0509570874273777, - "loss_total": 0.4050140380859375, - "step": 306499 - }, - { - "epoch": 0.007, - "grad_norm": 1.3673182725906372, - "learning_rate": 8.548298823215833e-06, - "loss": 0.4062, - "step": 306500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.848636150360107, - "loss_rtd": 0.22508534789085388, - "loss_sent": 0.1098591759800911, - "loss_sod": 0.07311011105775833, - "loss_total": 0.4080546200275421, - "step": 306599 - }, - { - "epoch": 0.007198, - "loss_gen": 6.084027290344238, - "loss_rtd": 0.22001692652702332, - "loss_sent": 0.05970962718129158, - "loss_sod": 0.00628052419051528, - "loss_total": 0.2860070765018463, - "step": 306599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.802567720413208, - "learning_rate": 8.530561992501595e-06, - "loss": 0.4224, - "step": 306600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.832769870758057, - "loss_rtd": 0.23815402388572693, - "loss_sent": 0.2707604169845581, - "loss_sod": 0.06610839813947678, - "loss_total": 0.57502281665802, - "step": 306699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.9994401931762695, - "loss_rtd": 0.2005050927400589, - "loss_sent": 0.06158899515867233, - "loss_sod": 0.0013490061974152923, - "loss_total": 0.26344308257102966, - "step": 306699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.000777244567871, - "learning_rate": 8.512841865672017e-06, - "loss": 0.425, - "step": 306700 - }, - { - "epoch": 0.007598, - "loss_gen": 6.238945484161377, - "loss_rtd": 0.21320757269859314, - "loss_sent": 0.16016341745853424, - "loss_sod": 0.1064838171005249, - "loss_total": 0.4798548221588135, - "step": 306799 - }, - { - "epoch": 0.007598, - "loss_gen": 6.0477423667907715, - "loss_rtd": 0.22165288031101227, - "loss_sent": 0.07312484830617905, - "loss_sod": 0.01312224194407463, - "loss_total": 0.30789998173713684, - "step": 306799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.1002517938613892, - "learning_rate": 8.495138449864775e-06, - "loss": 0.426, - "step": 306800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.6312031745910645, - "loss_rtd": 0.20619864761829376, - "loss_sent": 0.2728365361690521, - "loss_sod": 0.02045396715402603, - "loss_total": 0.4994891583919525, - "step": 306899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.870377063751221, - "loss_rtd": 0.23009894788265228, - "loss_sent": 0.32479292154312134, - "loss_sod": 0.20592084527015686, - "loss_total": 0.7608126997947693, - "step": 306899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.719900131225586, - "learning_rate": 8.477451752210803e-06, - "loss": 0.4312, - "step": 306900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.845320224761963, - "loss_rtd": 0.24552302062511444, - "loss_sent": 0.057985760271549225, - "loss_sod": 0.021562224254012108, - "loss_total": 0.3250710070133209, - "step": 306999 - }, - { - "epoch": 0.007998, - "loss_gen": 6.025938987731934, - "loss_rtd": 0.23549242317676544, - "loss_sent": 0.20588351786136627, - "loss_sod": 0.03881218284368515, - "loss_total": 0.48018813133239746, - "step": 306999 - }, - { - "epoch": 0.008, - "grad_norm": 0.9462911486625671, - "learning_rate": 8.459781779834303e-06, - "loss": 0.4277, - "step": 307000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4029849171638489, - "eval_runtime": 151.0495, - "eval_samples_per_second": 102.238, - "eval_steps_per_second": 0.801, - "step": 307000 - }, - { - "epoch": 0.008198, - "loss_gen": 6.112674713134766, - "loss_rtd": 0.2207801789045334, - "loss_sent": 0.28970956802368164, - "loss_sod": 0.05031801387667656, - "loss_total": 0.5608077645301819, - "step": 307099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.869450569152832, - "loss_rtd": 0.19692069292068481, - "loss_sent": 0.26349619030952454, - "loss_sod": 0.037413813173770905, - "loss_total": 0.49783068895339966, - "step": 307099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.637731671333313, - "learning_rate": 8.442128539852729e-06, - "loss": 0.4284, - "step": 307100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.389323711395264, - "loss_rtd": 0.20814189314842224, - "loss_sent": 0.06257633864879608, - "loss_sod": 0.02617780677974224, - "loss_total": 0.2968960404396057, - "step": 307199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.581353187561035, - "loss_rtd": 0.17588059604167938, - "loss_sent": 0.02419213205575943, - "loss_sod": 0.061465442180633545, - "loss_total": 0.26153817772865295, - "step": 307199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.6717695593833923, - "learning_rate": 8.424492039376809e-06, - "loss": 0.4137, - "step": 307200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.201351642608643, - "loss_rtd": 0.15850655734539032, - "loss_sent": 0.023376837372779846, - "loss_sod": 0.03525843098759651, - "loss_total": 0.21714182198047638, - "step": 307299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.674869060516357, - "loss_rtd": 0.21613983809947968, - "loss_sent": 0.11836820095777512, - "loss_sod": 0.012142570689320564, - "loss_total": 0.3466506004333496, - "step": 307299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.6594082713127136, - "learning_rate": 8.406872285510525e-06, - "loss": 0.4122, - "step": 307300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.550102233886719, - "loss_rtd": 0.16742391884326935, - "loss_sent": 0.043269120156764984, - "loss_sod": 0.037360548973083496, - "loss_total": 0.24805358052253723, - "step": 307399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.916943073272705, - "loss_rtd": 0.2425854653120041, - "loss_sent": 0.1675383597612381, - "loss_sod": 0.03806499019265175, - "loss_total": 0.448188841342926, - "step": 307399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.17293381690979, - "learning_rate": 8.38926928535112e-06, - "loss": 0.4251, - "step": 307400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.976449489593506, - "loss_rtd": 0.21067480742931366, - "loss_sent": 0.19202172756195068, - "loss_sod": 0.04000909626483917, - "loss_total": 0.4427056312561035, - "step": 307499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.856799125671387, - "loss_rtd": 0.19372926652431488, - "loss_sent": 0.03597626835107803, - "loss_sod": 0.016917118802666664, - "loss_total": 0.24662265181541443, - "step": 307499 - }, - { - "epoch": 0.009, - "grad_norm": 0.7623675465583801, - "learning_rate": 8.37168304598906e-06, - "loss": 0.421, - "step": 307500 - }, - { - "epoch": 0.009198, - "loss_gen": 6.130488872528076, - "loss_rtd": 0.2060985267162323, - "loss_sent": 0.07861499488353729, - "loss_sod": 0.017229467630386353, - "loss_total": 0.30194300413131714, - "step": 307599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.748977184295654, - "loss_rtd": 0.21738268435001373, - "loss_sent": 0.1314508467912674, - "loss_sod": 0.09008508175611496, - "loss_total": 0.4389185905456543, - "step": 307599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.7999606728553772, - "learning_rate": 8.354113574508088e-06, - "loss": 0.3978, - "step": 307600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.7744221687316895, - "loss_rtd": 0.20567235350608826, - "loss_sent": 0.19313567876815796, - "loss_sod": 0.006816547363996506, - "loss_total": 0.4056245684623718, - "step": 307699 - }, - { - "epoch": 0.009398, - "loss_gen": 6.0000715255737305, - "loss_rtd": 0.22208687663078308, - "loss_sent": 0.18611925840377808, - "loss_sod": 0.033019907772541046, - "loss_total": 0.4412260353565216, - "step": 307699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.6177722811698914, - "learning_rate": 8.33656087798519e-06, - "loss": 0.4489, - "step": 307700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.748345375061035, - "loss_rtd": 0.21914519369602203, - "loss_sent": 0.30765247344970703, - "loss_sod": 0.0384446457028389, - "loss_total": 0.5652422904968262, - "step": 307799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.634149074554443, - "loss_rtd": 0.19680051505565643, - "loss_sent": 0.12341060489416122, - "loss_sod": 0.014248199760913849, - "loss_total": 0.3344593346118927, - "step": 307799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.9522451162338257, - "learning_rate": 8.319024963490596e-06, - "loss": 0.4384, - "step": 307800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.977197170257568, - "loss_rtd": 0.20900194346904755, - "loss_sent": 0.17911319434642792, - "loss_sod": 0.00570686673745513, - "loss_total": 0.3938220143318176, - "step": 307899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.983529090881348, - "loss_rtd": 0.21527643501758575, - "loss_sent": 0.20467884838581085, - "loss_sod": 0.029048863798379898, - "loss_total": 0.4490041434764862, - "step": 307899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.6754030585289001, - "learning_rate": 8.301505838087753e-06, - "loss": 0.4323, - "step": 307900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.864013671875, - "loss_rtd": 0.21233950555324554, - "loss_sent": 0.45691630244255066, - "loss_sod": 0.010679269209504128, - "loss_total": 0.679935097694397, - "step": 307999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.977777004241943, - "loss_rtd": 0.2054664045572281, - "loss_sent": 0.17416636645793915, - "loss_sod": 0.06325425207614899, - "loss_total": 0.44288700819015503, - "step": 307999 - }, - { - "epoch": 0.01, - "grad_norm": 1.1362477540969849, - "learning_rate": 8.284003508833376e-06, - "loss": 0.437, - "step": 308000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4098188877105713, - "eval_runtime": 150.7216, - "eval_samples_per_second": 102.46, - "eval_steps_per_second": 0.803, - "step": 308000 - }, - { - "epoch": 0.010198, - "loss_gen": 6.214615345001221, - "loss_rtd": 0.2278728038072586, - "loss_sent": 0.1740763783454895, - "loss_sod": 0.06421086192131042, - "loss_total": 0.4661600589752197, - "step": 308099 - }, - { - "epoch": 0.010198, - "loss_gen": 6.3376569747924805, - "loss_rtd": 0.20672421157360077, - "loss_sent": 0.17966632544994354, - "loss_sod": 0.060254521667957306, - "loss_total": 0.4466450810432434, - "step": 308099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.337296962738037, - "learning_rate": 8.266517982777405e-06, - "loss": 0.4145, - "step": 308100 - }, - { - "epoch": 0.010398, - "loss_gen": 6.027499198913574, - "loss_rtd": 0.2186448723077774, - "loss_sent": 0.1646350473165512, - "loss_sod": 0.03394540026783943, - "loss_total": 0.41722530126571655, - "step": 308199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.978206634521484, - "loss_rtd": 0.21628262102603912, - "loss_sent": 0.06598977744579315, - "loss_sod": 0.00904631894081831, - "loss_total": 0.29131871461868286, - "step": 308199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.6215561628341675, - "learning_rate": 8.249049266962988e-06, - "loss": 0.4356, - "step": 308200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.798785209655762, - "loss_rtd": 0.201616570353508, - "loss_sent": 0.2186223417520523, - "loss_sod": 0.020315643399953842, - "loss_total": 0.44055455923080444, - "step": 308299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.9894700050354, - "loss_rtd": 0.20303881168365479, - "loss_sent": 0.12828856706619263, - "loss_sod": 0.06300602853298187, - "loss_total": 0.3943334221839905, - "step": 308299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.0811374187469482, - "learning_rate": 8.231597368426531e-06, - "loss": 0.4271, - "step": 308300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.905354022979736, - "loss_rtd": 0.21323953568935394, - "loss_sent": 0.25014716386795044, - "loss_sod": 0.012424895539879799, - "loss_total": 0.47581160068511963, - "step": 308399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.935938835144043, - "loss_rtd": 0.21692337095737457, - "loss_sent": 0.17644526064395905, - "loss_sod": 0.01989194005727768, - "loss_total": 0.4132605791091919, - "step": 308399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.8411424160003662, - "learning_rate": 8.214162294197664e-06, - "loss": 0.4363, - "step": 308400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.775577068328857, - "loss_rtd": 0.19676491618156433, - "loss_sent": 0.12638449668884277, - "loss_sod": 0.03428375720977783, - "loss_total": 0.35743317008018494, - "step": 308499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.870701789855957, - "loss_rtd": 0.18911170959472656, - "loss_sent": 0.3005780279636383, - "loss_sod": 0.04763563722372055, - "loss_total": 0.537325382232666, - "step": 308499 - }, - { - "epoch": 0.011, - "grad_norm": 0.9006572961807251, - "learning_rate": 8.196744051299239e-06, - "loss": 0.4176, - "step": 308500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.286965370178223, - "loss_rtd": 0.16980917751789093, - "loss_sent": 0.00037132465513423085, - "loss_sod": 0.02953839674592018, - "loss_total": 0.1997188925743103, - "step": 308599 - }, - { - "epoch": 0.011198, - "loss_gen": 6.053245544433594, - "loss_rtd": 0.21546247601509094, - "loss_sent": 0.24839432537555695, - "loss_sod": 0.01285785622894764, - "loss_total": 0.4767146706581116, - "step": 308599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.1296887397766113, - "learning_rate": 8.179342646747295e-06, - "loss": 0.4094, - "step": 308600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.91952657699585, - "loss_rtd": 0.18278734385967255, - "loss_sent": 0.0061375899240374565, - "loss_sod": 0.1125425398349762, - "loss_total": 0.3014674484729767, - "step": 308699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.580056190490723, - "loss_rtd": 0.19616815447807312, - "loss_sent": 0.025577362626791, - "loss_sod": 0.08445725589990616, - "loss_total": 0.30620276927948, - "step": 308699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.9004150629043579, - "learning_rate": 8.16195808755113e-06, - "loss": 0.422, - "step": 308700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.969113826751709, - "loss_rtd": 0.21206234395503998, - "loss_sent": 0.024632275104522705, - "loss_sod": 0.12887535989284515, - "loss_total": 0.36556997895240784, - "step": 308799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.520026683807373, - "loss_rtd": 0.23807401955127716, - "loss_sent": 0.10061891376972198, - "loss_sod": 0.0021364905405789614, - "loss_total": 0.34082943201065063, - "step": 308799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.0176562070846558, - "learning_rate": 8.144590380713252e-06, - "loss": 0.4253, - "step": 308800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.825716018676758, - "loss_rtd": 0.2084958255290985, - "loss_sent": 0.08576523512601852, - "loss_sod": 0.09504387527704239, - "loss_total": 0.3893049359321594, - "step": 308899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.504941463470459, - "loss_rtd": 0.22477424144744873, - "loss_sent": 0.41075342893600464, - "loss_sod": 0.013927502557635307, - "loss_total": 0.649455189704895, - "step": 308899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.9160724878311157, - "learning_rate": 8.127239533229369e-06, - "loss": 0.4335, - "step": 308900 - }, - { - "epoch": 0.011998, - "loss_gen": 6.3390679359436035, - "loss_rtd": 0.19545334577560425, - "loss_sent": 0.12336979061365128, - "loss_sod": 0.07010123133659363, - "loss_total": 0.38892436027526855, - "step": 308999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.7447404861450195, - "loss_rtd": 0.21294380724430084, - "loss_sent": 0.08362966030836105, - "loss_sod": 0.05657492205500603, - "loss_total": 0.3531484007835388, - "step": 308999 - }, - { - "epoch": 0.012, - "grad_norm": 1.0022826194763184, - "learning_rate": 8.109905552088388e-06, - "loss": 0.4265, - "step": 309000 - }, - { - "epoch": 0.012, - "eval_loss": 0.4004424810409546, - "eval_runtime": 151.3261, - "eval_samples_per_second": 102.051, - "eval_steps_per_second": 0.8, - "step": 309000 - }, - { - "epoch": 0.012198, - "loss_gen": 5.181689739227295, - "loss_rtd": 0.1635502576828003, - "loss_sent": 0.004259913228452206, - "loss_sod": 0.0969497412443161, - "loss_total": 0.26475992798805237, - "step": 309099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.584157943725586, - "loss_rtd": 0.21053799986839294, - "loss_sent": 0.20525944232940674, - "loss_sod": 0.008120628073811531, - "loss_total": 0.42391806840896606, - "step": 309099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.9442334771156311, - "learning_rate": 8.092588444272437e-06, - "loss": 0.4211, - "step": 309100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.62851619720459, - "loss_rtd": 0.22425146400928497, - "loss_sent": 0.23456962406635284, - "loss_sod": 0.0038013344164937735, - "loss_total": 0.46262240409851074, - "step": 309199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.6538286209106445, - "loss_rtd": 0.19378627836704254, - "loss_sent": 0.03899012878537178, - "loss_sod": 0.04087996482849121, - "loss_total": 0.27365636825561523, - "step": 309199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.3899539709091187, - "learning_rate": 8.075288216756849e-06, - "loss": 0.4391, - "step": 309200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.944112777709961, - "loss_rtd": 0.21902185678482056, - "loss_sent": 0.16321153938770294, - "loss_sod": 0.050549089908599854, - "loss_total": 0.43278247117996216, - "step": 309299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.640238285064697, - "loss_rtd": 0.18983475863933563, - "loss_sent": 0.011616911739110947, - "loss_sod": 0.08104175329208374, - "loss_total": 0.2824934422969818, - "step": 309299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.8254089951515198, - "learning_rate": 8.058004876510167e-06, - "loss": 0.4284, - "step": 309300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.712543964385986, - "loss_rtd": 0.21910497546195984, - "loss_sent": 0.2007427215576172, - "loss_sod": 0.03808535262942314, - "loss_total": 0.4579330384731293, - "step": 309399 - }, - { - "epoch": 0.012798, - "loss_gen": 6.1219329833984375, - "loss_rtd": 0.20662803947925568, - "loss_sent": 0.009931064210832119, - "loss_sod": 0.09581126272678375, - "loss_total": 0.3123703598976135, - "step": 309399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.3430635929107666, - "learning_rate": 8.040738430494094e-06, - "loss": 0.4296, - "step": 309400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.938852787017822, - "loss_rtd": 0.205555260181427, - "loss_sent": 0.39438170194625854, - "loss_sod": 0.04952486604452133, - "loss_total": 0.6494618654251099, - "step": 309499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.96164608001709, - "loss_rtd": 0.2238394021987915, - "loss_sent": 0.4905053377151489, - "loss_sod": 0.03014589473605156, - "loss_total": 0.7444906234741211, - "step": 309499 - }, - { - "epoch": 0.013, - "grad_norm": 2.7371878623962402, - "learning_rate": 8.023488885663561e-06, - "loss": 0.4333, - "step": 309500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.913938522338867, - "loss_rtd": 0.2102501541376114, - "loss_sent": 0.12971247732639313, - "loss_sod": 0.020894642919301987, - "loss_total": 0.3608572781085968, - "step": 309599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.409268856048584, - "loss_rtd": 0.17835590243339539, - "loss_sent": 0.018471039831638336, - "loss_sod": 0.02411385253071785, - "loss_total": 0.22094079852104187, - "step": 309599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.6213912963867188, - "learning_rate": 8.006256248966698e-06, - "loss": 0.423, - "step": 309600 - }, - { - "epoch": 0.013398, - "loss_gen": 6.235551357269287, - "loss_rtd": 0.22233493626117706, - "loss_sent": 0.15506519377231598, - "loss_sod": 0.07004674524068832, - "loss_total": 0.44744688272476196, - "step": 309699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.766732692718506, - "loss_rtd": 0.18688441812992096, - "loss_sent": 0.2524843215942383, - "loss_sod": 0.02328348346054554, - "loss_total": 0.46265220642089844, - "step": 309699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.9869791269302368, - "learning_rate": 7.989040527344782e-06, - "loss": 0.4294, - "step": 309700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.2954487800598145, - "loss_rtd": 0.17030614614486694, - "loss_sent": 2.866191243811045e-05, - "loss_sod": 0.04264570400118828, - "loss_total": 0.2129805088043213, - "step": 309799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.9240593910217285, - "loss_rtd": 0.18576836585998535, - "loss_sent": 0.08892825245857239, - "loss_sod": 0.019113805145025253, - "loss_total": 0.2938104271888733, - "step": 309799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.9123243093490601, - "learning_rate": 7.971841727732322e-06, - "loss": 0.4303, - "step": 309800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.842307090759277, - "loss_rtd": 0.21397273242473602, - "loss_sent": 0.13180017471313477, - "loss_sod": 0.01135589275509119, - "loss_total": 0.3571287989616394, - "step": 309899 - }, - { - "epoch": 0.013798, - "loss_gen": 6.162484645843506, - "loss_rtd": 0.20132121443748474, - "loss_sent": 0.1628807932138443, - "loss_sod": 0.09502141177654266, - "loss_total": 0.4592233896255493, - "step": 309899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.8144239783287048, - "learning_rate": 7.954659857056984e-06, - "loss": 0.4129, - "step": 309900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.821217060089111, - "loss_rtd": 0.20290368795394897, - "loss_sent": 0.1426912546157837, - "loss_sod": 0.039541371166706085, - "loss_total": 0.38513630628585815, - "step": 309999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.852529525756836, - "loss_rtd": 0.19443050026893616, - "loss_sent": 0.14577209949493408, - "loss_sod": 0.03677899017930031, - "loss_total": 0.37698158621788025, - "step": 309999 - }, - { - "epoch": 0.014, - "grad_norm": 0.8347766399383545, - "learning_rate": 7.93749492223964e-06, - "loss": 0.4071, - "step": 310000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4037657082080841, - "eval_runtime": 150.9514, - "eval_samples_per_second": 102.304, - "eval_steps_per_second": 0.802, - "step": 310000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.922464370727539, - "loss_rtd": 0.20891152322292328, - "loss_sent": 0.2245611548423767, - "loss_sod": 0.07452702522277832, - "loss_total": 0.5079997181892395, - "step": 310099 - }, - { - "epoch": 0.014198, - "loss_gen": 6.004929542541504, - "loss_rtd": 0.1983024924993515, - "loss_sent": 0.17077364027500153, - "loss_sod": 0.055036187171936035, - "loss_total": 0.42411231994628906, - "step": 310099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.4000916481018066, - "learning_rate": 7.9203469301943e-06, - "loss": 0.4217, - "step": 310100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.872138500213623, - "loss_rtd": 0.21818286180496216, - "loss_sent": 0.2573728561401367, - "loss_sod": 0.09202329814434052, - "loss_total": 0.5675790309906006, - "step": 310199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.679378032684326, - "loss_rtd": 0.2042427510023117, - "loss_sent": 0.2703692317008972, - "loss_sod": 0.08388488739728928, - "loss_total": 0.5584968328475952, - "step": 310199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.8075625896453857, - "learning_rate": 7.90321588782818e-06, - "loss": 0.4301, - "step": 310200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.760520935058594, - "loss_rtd": 0.20171736180782318, - "loss_sent": 0.19153904914855957, - "loss_sod": 0.0881851464509964, - "loss_total": 0.48144155740737915, - "step": 310299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.656811237335205, - "loss_rtd": 0.2206784337759018, - "loss_sent": 0.14364071190357208, - "loss_sod": 0.00568934204056859, - "loss_total": 0.3700084686279297, - "step": 310299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.1731743812561035, - "learning_rate": 7.886101802041672e-06, - "loss": 0.43, - "step": 310300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.315280914306641, - "loss_rtd": 0.1629236340522766, - "loss_sent": 0.020134510472416878, - "loss_sod": 0.09313705563545227, - "loss_total": 0.2761951982975006, - "step": 310399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.15852689743042, - "loss_rtd": 0.17851246893405914, - "loss_sent": 0.009823448956012726, - "loss_sod": 0.04415597766637802, - "loss_total": 0.2324918806552887, - "step": 310399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.8541032075881958, - "learning_rate": 7.86900467972833e-06, - "loss": 0.4118, - "step": 310400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.873555660247803, - "loss_rtd": 0.20283553004264832, - "loss_sent": 0.04049250856041908, - "loss_sod": 0.0539071299135685, - "loss_total": 0.2972351610660553, - "step": 310499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.465541839599609, - "loss_rtd": 0.17580774426460266, - "loss_sent": 0.06673498451709747, - "loss_sod": 0.03656630963087082, - "loss_total": 0.27910906076431274, - "step": 310499 - }, - { - "epoch": 0.015, - "grad_norm": 0.7130053043365479, - "learning_rate": 7.851924527774856e-06, - "loss": 0.4077, - "step": 310500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.883062362670898, - "loss_rtd": 0.20074811577796936, - "loss_sent": 0.2130950391292572, - "loss_sod": 0.015295304358005524, - "loss_total": 0.4291384518146515, - "step": 310599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.838763236999512, - "loss_rtd": 0.2280886322259903, - "loss_sent": 0.060331884771585464, - "loss_sod": 0.06348002701997757, - "loss_total": 0.35190054774284363, - "step": 310599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.7528257966041565, - "learning_rate": 7.834861353061146e-06, - "loss": 0.4148, - "step": 310600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.73049783706665, - "loss_rtd": 0.21847784519195557, - "loss_sent": 0.38030803203582764, - "loss_sod": 0.01773514412343502, - "loss_total": 0.6165210008621216, - "step": 310699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.263930797576904, - "loss_rtd": 0.17946848273277283, - "loss_sent": 0.05554213747382164, - "loss_sod": 0.10321007668972015, - "loss_total": 0.3382206857204437, - "step": 310699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.1871514320373535, - "learning_rate": 7.817815162460234e-06, - "loss": 0.4204, - "step": 310700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.89235258102417, - "loss_rtd": 0.22214749455451965, - "loss_sent": 0.32768210768699646, - "loss_sod": 0.03052462451159954, - "loss_total": 0.5803542137145996, - "step": 310799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.6772260665893555, - "loss_rtd": 0.22638951241970062, - "loss_sent": 0.17034272849559784, - "loss_sod": 0.0030031949281692505, - "loss_total": 0.3997354507446289, - "step": 310799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.5054242610931396, - "learning_rate": 7.800785962838342e-06, - "loss": 0.4151, - "step": 310800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.18792200088501, - "loss_rtd": 0.1806230992078781, - "loss_sent": 0.10035458207130432, - "loss_sod": 0.0033172129187732935, - "loss_total": 0.28429490327835083, - "step": 310899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.800225257873535, - "loss_rtd": 0.23230841755867004, - "loss_sent": 0.23793214559555054, - "loss_sod": 0.012393715791404247, - "loss_total": 0.4826342761516571, - "step": 310899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.760189414024353, - "learning_rate": 7.783773761054808e-06, - "loss": 0.4163, - "step": 310900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.997812747955322, - "loss_rtd": 0.20307239890098572, - "loss_sent": 0.15201528370380402, - "loss_sod": 0.019326893612742424, - "loss_total": 0.3744145631790161, - "step": 310999 - }, - { - "epoch": 0.015998, - "loss_gen": 6.301429748535156, - "loss_rtd": 0.20385730266571045, - "loss_sent": 0.3304522931575775, - "loss_sod": 0.01187446340918541, - "loss_total": 0.5461840629577637, - "step": 310999 - }, - { - "epoch": 0.016, - "grad_norm": 1.095665693283081, - "learning_rate": 7.76677856396215e-06, - "loss": 0.4285, - "step": 311000 - }, - { - "epoch": 0.016, - "eval_loss": 0.40931645035743713, - "eval_runtime": 151.065, - "eval_samples_per_second": 102.228, - "eval_steps_per_second": 0.801, - "step": 311000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.989282608032227, - "loss_rtd": 0.21287782490253448, - "loss_sent": 0.24741996824741364, - "loss_sod": 0.05294226109981537, - "loss_total": 0.5132400393486023, - "step": 311099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.814924716949463, - "loss_rtd": 0.2096560150384903, - "loss_sent": 0.08405181020498276, - "loss_sod": 0.008440888486802578, - "loss_total": 0.302148699760437, - "step": 311099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.880911111831665, - "learning_rate": 7.749800378406042e-06, - "loss": 0.4382, - "step": 311100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.812239646911621, - "loss_rtd": 0.21669985353946686, - "loss_sent": 0.3005824089050293, - "loss_sod": 0.040739044547080994, - "loss_total": 0.5580213069915771, - "step": 311199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.896325588226318, - "loss_rtd": 0.20856140553951263, - "loss_sent": 0.22192752361297607, - "loss_sod": 0.004638968035578728, - "loss_total": 0.4351279139518738, - "step": 311199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.3912092447280884, - "learning_rate": 7.732839211225295e-06, - "loss": 0.4333, - "step": 311200 - }, - { - "epoch": 0.016598, - "loss_gen": 6.134925842285156, - "loss_rtd": 0.22416922450065613, - "loss_sent": 0.21561919152736664, - "loss_sod": 0.015460368245840073, - "loss_total": 0.45524877309799194, - "step": 311299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.324389934539795, - "loss_rtd": 0.1754237413406372, - "loss_sent": 0.048315126448869705, - "loss_sod": 0.04650310054421425, - "loss_total": 0.27024197578430176, - "step": 311299 - }, - { - "epoch": 0.0166, - "grad_norm": 0.8866177797317505, - "learning_rate": 7.715895069251855e-06, - "loss": 0.4177, - "step": 311300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.278530597686768, - "loss_rtd": 0.15986032783985138, - "loss_sent": 0.024686088785529137, - "loss_sod": 0.12176243960857391, - "loss_total": 0.3063088655471802, - "step": 311399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.9626312255859375, - "loss_rtd": 0.21207858622074127, - "loss_sent": 0.15108805894851685, - "loss_sod": 0.07089990377426147, - "loss_total": 0.4340665340423584, - "step": 311399 - }, - { - "epoch": 0.0168, - "grad_norm": 1.3012689352035522, - "learning_rate": 7.698967959310815e-06, - "loss": 0.3987, - "step": 311400 - }, - { - "epoch": 0.016998, - "loss_gen": 6.174948215484619, - "loss_rtd": 0.22797290980815887, - "loss_sent": 0.15986447036266327, - "loss_sod": 0.030468493700027466, - "loss_total": 0.4183058738708496, - "step": 311499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.9885149002075195, - "loss_rtd": 0.20035885274410248, - "loss_sent": 0.2758614718914032, - "loss_sod": 0.05242743343114853, - "loss_total": 0.528647780418396, - "step": 311499 - }, - { - "epoch": 0.017, - "grad_norm": 0.9514748454093933, - "learning_rate": 7.682057888220439e-06, - "loss": 0.4372, - "step": 311500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.339448928833008, - "loss_rtd": 0.14678730070590973, - "loss_sent": 2.946843596873805e-05, - "loss_sod": 0.2171320915222168, - "loss_total": 0.36394885182380676, - "step": 311599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.157167434692383, - "loss_rtd": 0.15583649277687073, - "loss_sent": 0.130470409989357, - "loss_sod": 0.05063977092504501, - "loss_total": 0.33694666624069214, - "step": 311599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.4903481006622314, - "learning_rate": 7.665164862792074e-06, - "loss": 0.4313, - "step": 311600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.914399147033691, - "loss_rtd": 0.20276793837547302, - "loss_sent": 0.1353200525045395, - "loss_sod": 0.017816588282585144, - "loss_total": 0.35590457916259766, - "step": 311699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.839435577392578, - "loss_rtd": 0.20555274188518524, - "loss_sent": 0.08742252737283707, - "loss_sod": 0.07438893616199493, - "loss_total": 0.36736419796943665, - "step": 311699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.7307558655738831, - "learning_rate": 7.64828888983024e-06, - "loss": 0.4353, - "step": 311700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.628203392028809, - "loss_rtd": 0.21813525259494781, - "loss_sent": 0.08447877317667007, - "loss_sod": 0.03288706764578819, - "loss_total": 0.3355010747909546, - "step": 311799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.8870849609375, - "loss_rtd": 0.20519371330738068, - "loss_sent": 0.0029134685173630714, - "loss_sod": 0.18275833129882812, - "loss_total": 0.3908655047416687, - "step": 311799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.9509856104850769, - "learning_rate": 7.631429976132577e-06, - "loss": 0.4224, - "step": 311800 - }, - { - "epoch": 0.017798, - "loss_gen": 6.52073335647583, - "loss_rtd": 0.23186054825782776, - "loss_sent": 0.12316082417964935, - "loss_sod": 0.08576104044914246, - "loss_total": 0.44078242778778076, - "step": 311899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.790531635284424, - "loss_rtd": 0.21376632153987885, - "loss_sent": 0.10404068231582642, - "loss_sod": 0.020191598683595657, - "loss_total": 0.3379985988140106, - "step": 311899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.0056074857711792, - "learning_rate": 7.614588128489864e-06, - "loss": 0.4193, - "step": 311900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.688158988952637, - "loss_rtd": 0.22824282944202423, - "loss_sent": 0.2352687120437622, - "loss_sod": 0.009616414085030556, - "loss_total": 0.47312796115875244, - "step": 311999 - }, - { - "epoch": 0.017998, - "loss_gen": 6.112806797027588, - "loss_rtd": 0.20789451897144318, - "loss_sent": 0.24069654941558838, - "loss_sod": 0.04331938922405243, - "loss_total": 0.491910457611084, - "step": 311999 - }, - { - "epoch": 0.018, - "grad_norm": 1.3421379327774048, - "learning_rate": 7.597763353685966e-06, - "loss": 0.4189, - "step": 312000 - }, - { - "epoch": 0.018, - "eval_loss": 0.40631282329559326, - "eval_runtime": 151.4112, - "eval_samples_per_second": 101.994, - "eval_steps_per_second": 0.799, - "step": 312000 - }, - { - "epoch": 0.018198, - "loss_gen": 6.034616947174072, - "loss_rtd": 0.21921761333942413, - "loss_sent": 0.13182824850082397, - "loss_sod": 0.06258413195610046, - "loss_total": 0.41363000869750977, - "step": 312099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.633738040924072, - "loss_rtd": 0.19921241700649261, - "loss_sent": 0.10855448246002197, - "loss_sod": 0.04418952018022537, - "loss_total": 0.35195642709732056, - "step": 312099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.8034456372261047, - "learning_rate": 7.580955658497924e-06, - "loss": 0.427, - "step": 312100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.59451150894165, - "loss_rtd": 0.19150112569332123, - "loss_sent": 0.035920217633247375, - "loss_sod": 0.08746415376663208, - "loss_total": 0.3148854970932007, - "step": 312199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.153200626373291, - "loss_rtd": 0.1642102599143982, - "loss_sent": 0.02211596444249153, - "loss_sod": 0.04951123893260956, - "loss_total": 0.23583745956420898, - "step": 312199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.1884304285049438, - "learning_rate": 7.564165049695882e-06, - "loss": 0.4254, - "step": 312200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.964243412017822, - "loss_rtd": 0.22288161516189575, - "loss_sent": 0.3737553060054779, - "loss_sod": 0.061662301421165466, - "loss_total": 0.6582992076873779, - "step": 312299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.274056434631348, - "loss_rtd": 0.1981636881828308, - "loss_sent": 0.006828839424997568, - "loss_sod": 0.16011330485343933, - "loss_total": 0.36510583758354187, - "step": 312299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.2488231658935547, - "learning_rate": 7.547391534043069e-06, - "loss": 0.4405, - "step": 312300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.872720718383789, - "loss_rtd": 0.2019290030002594, - "loss_sent": 0.11224554479122162, - "loss_sod": 0.07444722950458527, - "loss_total": 0.38862180709838867, - "step": 312399 - }, - { - "epoch": 0.018798, - "loss_gen": 6.146448135375977, - "loss_rtd": 0.20309272408485413, - "loss_sent": 0.02162293717265129, - "loss_sod": 0.03802800923585892, - "loss_total": 0.26274365186691284, - "step": 312399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.240323781967163, - "learning_rate": 7.5306351182958865e-06, - "loss": 0.426, - "step": 312400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.935499668121338, - "loss_rtd": 0.2323479950428009, - "loss_sent": 0.08306394517421722, - "loss_sod": 0.02807151898741722, - "loss_total": 0.34348344802856445, - "step": 312499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.975403785705566, - "loss_rtd": 0.21715359389781952, - "loss_sent": 0.11344219744205475, - "loss_sod": 0.14398878812789917, - "loss_total": 0.47458457946777344, - "step": 312499 - }, - { - "epoch": 0.019, - "grad_norm": 1.4612895250320435, - "learning_rate": 7.5138958092037806e-06, - "loss": 0.428, - "step": 312500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.920339107513428, - "loss_rtd": 0.218119814991951, - "loss_sent": 0.2784358561038971, - "loss_sod": 0.008258887566626072, - "loss_total": 0.5048145651817322, - "step": 312599 - }, - { - "epoch": 0.019198, - "loss_gen": 6.144434452056885, - "loss_rtd": 0.2286521941423416, - "loss_sent": 0.08838971704244614, - "loss_sod": 0.050909243524074554, - "loss_total": 0.3679511547088623, - "step": 312599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.8755446672439575, - "learning_rate": 7.497173613509367e-06, - "loss": 0.4299, - "step": 312600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.931336879730225, - "loss_rtd": 0.19997598230838776, - "loss_sent": 0.27158093452453613, - "loss_sod": 0.014547404833137989, - "loss_total": 0.4861043095588684, - "step": 312699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.624778747558594, - "loss_rtd": 0.2028387188911438, - "loss_sent": 0.16659888625144958, - "loss_sod": 0.098826102912426, - "loss_total": 0.4682637155056, - "step": 312699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.1667513847351074, - "learning_rate": 7.4804685379483486e-06, - "loss": 0.4254, - "step": 312700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.845862865447998, - "loss_rtd": 0.21606336534023285, - "loss_sent": 0.18905384838581085, - "loss_sod": 0.018588026985526085, - "loss_total": 0.42370522022247314, - "step": 312799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.843987464904785, - "loss_rtd": 0.22016587853431702, - "loss_sent": 0.24973087012767792, - "loss_sod": 0.030912479385733604, - "loss_total": 0.5008092522621155, - "step": 312799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.0143873691558838, - "learning_rate": 7.463780589249508e-06, - "loss": 0.4393, - "step": 312800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.206735134124756, - "loss_rtd": 0.16853559017181396, - "loss_sent": 0.0024505567271262407, - "loss_sod": 0.03042198345065117, - "loss_total": 0.20140813291072845, - "step": 312899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.293466091156006, - "loss_rtd": 0.1788954883813858, - "loss_sent": 0.013938656076788902, - "loss_sod": 0.043361686170101166, - "loss_total": 0.23619581758975983, - "step": 312899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.5000883340835571, - "learning_rate": 7.447109774134758e-06, - "loss": 0.4363, - "step": 312900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.893698215484619, - "loss_rtd": 0.20332132279872894, - "loss_sent": 0.0011745416559278965, - "loss_sod": 0.18095089495182037, - "loss_total": 0.3854467570781708, - "step": 312999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.233047008514404, - "loss_rtd": 0.2026233971118927, - "loss_sent": 2.6892364985542372e-05, - "loss_sod": 0.1481531858444214, - "loss_total": 0.3508034646511078, - "step": 312999 - }, - { - "epoch": 0.02, - "grad_norm": 1.1689213514328003, - "learning_rate": 7.4304560993191e-06, - "loss": 0.4296, - "step": 313000 - }, - { - "epoch": 0.02, - "eval_loss": 0.40171995759010315, - "eval_runtime": 151.2712, - "eval_samples_per_second": 102.088, - "eval_steps_per_second": 0.8, - "step": 313000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.944424152374268, - "loss_rtd": 0.21740028262138367, - "loss_sent": 0.06304635107517242, - "loss_sod": 0.02363731525838375, - "loss_total": 0.3040839433670044, - "step": 313099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.934466361999512, - "loss_rtd": 0.19472838938236237, - "loss_sent": 0.2642924189567566, - "loss_sod": 0.009696826338768005, - "loss_total": 0.46871763467788696, - "step": 313099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.8409777879714966, - "learning_rate": 7.41381957151065e-06, - "loss": 0.4214, - "step": 313100 - }, - { - "epoch": 0.020398, - "loss_gen": 6.062003135681152, - "loss_rtd": 0.1981760859489441, - "loss_sent": 0.281217485666275, - "loss_sod": 0.0636417493224144, - "loss_total": 0.5430353283882141, - "step": 313199 - }, - { - "epoch": 0.020398, - "loss_gen": 6.133944988250732, - "loss_rtd": 0.21455906331539154, - "loss_sent": 0.11391421407461166, - "loss_sod": 0.025152625516057014, - "loss_total": 0.3536258935928345, - "step": 313199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.8949501514434814, - "learning_rate": 7.3972001974105694e-06, - "loss": 0.4187, - "step": 313200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.584046840667725, - "loss_rtd": 0.21862752735614777, - "loss_sent": 0.26217788457870483, - "loss_sod": 0.035076115280389786, - "loss_total": 0.5158815383911133, - "step": 313299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.972877502441406, - "loss_rtd": 0.2138463407754898, - "loss_sent": 0.18063798546791077, - "loss_sod": 0.0426754429936409, - "loss_total": 0.43715977668762207, - "step": 313299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.3265968561172485, - "learning_rate": 7.380597983713155e-06, - "loss": 0.4306, - "step": 313300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.408375263214111, - "loss_rtd": 0.17621396481990814, - "loss_sent": 0.027988268062472343, - "loss_sod": 0.05240663141012192, - "loss_total": 0.25660884380340576, - "step": 313399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.836403846740723, - "loss_rtd": 0.22482101619243622, - "loss_sent": 0.4502793848514557, - "loss_sod": 0.013839355669915676, - "loss_total": 0.6889397501945496, - "step": 313399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.960966944694519, - "learning_rate": 7.36401293710578e-06, - "loss": 0.4351, - "step": 313400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.090680122375488, - "loss_rtd": 0.15573930740356445, - "loss_sent": 0.1062464490532875, - "loss_sod": 0.008934445679187775, - "loss_total": 0.27092018723487854, - "step": 313499 - }, - { - "epoch": 0.020998, - "loss_gen": 6.124439716339111, - "loss_rtd": 0.20087675750255585, - "loss_sent": 0.10363459587097168, - "loss_sod": 0.09466443955898285, - "loss_total": 0.3991757929325104, - "step": 313499 - }, - { - "epoch": 0.021, - "grad_norm": 0.8966503739356995, - "learning_rate": 7.347445064268898e-06, - "loss": 0.4205, - "step": 313500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.830630779266357, - "loss_rtd": 0.21341219544410706, - "loss_sent": 0.0895337164402008, - "loss_sod": 0.034656867384910583, - "loss_total": 0.33760279417037964, - "step": 313599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.908732891082764, - "loss_rtd": 0.19999216496944427, - "loss_sent": 0.3119899332523346, - "loss_sod": 0.023746121674776077, - "loss_total": 0.5357282161712646, - "step": 313599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.1671432256698608, - "learning_rate": 7.33089437187604e-06, - "loss": 0.4306, - "step": 313600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.955630779266357, - "loss_rtd": 0.21357406675815582, - "loss_sent": 0.3937452733516693, - "loss_sod": 0.03356746584177017, - "loss_total": 0.6408867835998535, - "step": 313699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.9988532066345215, - "loss_rtd": 0.2110321819782257, - "loss_sent": 0.10222094506025314, - "loss_sod": 0.1282128244638443, - "loss_total": 0.44146597385406494, - "step": 313699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.0259250402450562, - "learning_rate": 7.3143608665938225e-06, - "loss": 0.4255, - "step": 313700 - }, - { - "epoch": 0.021598, - "loss_gen": 6.168946266174316, - "loss_rtd": 0.20611238479614258, - "loss_sent": 0.07899408787488937, - "loss_sod": 0.05453554168343544, - "loss_total": 0.3396420180797577, - "step": 313799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.612627983093262, - "loss_rtd": 0.1827203631401062, - "loss_sent": 0.0346013680100441, - "loss_sod": 0.03594528138637543, - "loss_total": 0.2532670199871063, - "step": 313799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.996342122554779, - "learning_rate": 7.297844555081945e-06, - "loss": 0.4272, - "step": 313800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.412449836730957, - "loss_rtd": 0.19608855247497559, - "loss_sent": 0.1503404676914215, - "loss_sod": 0.0775788277387619, - "loss_total": 0.4240078628063202, - "step": 313899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.896376609802246, - "loss_rtd": 0.21017661690711975, - "loss_sent": 0.2769564688205719, - "loss_sod": 0.13673001527786255, - "loss_total": 0.6238631010055542, - "step": 313899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.518585205078125, - "learning_rate": 7.2813454439931875e-06, - "loss": 0.4045, - "step": 313900 - }, - { - "epoch": 0.021998, - "loss_gen": 6.018864154815674, - "loss_rtd": 0.2055513709783554, - "loss_sent": 0.2541314363479614, - "loss_sod": 0.09997926652431488, - "loss_total": 0.5596621036529541, - "step": 313999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.715604305267334, - "loss_rtd": 0.20882795751094818, - "loss_sent": 0.023550404235720634, - "loss_sod": 0.056766293942928314, - "loss_total": 0.2891446352005005, - "step": 313999 - }, - { - "epoch": 0.022, - "grad_norm": 1.240139126777649, - "learning_rate": 7.26486353997336e-06, - "loss": 0.4328, - "step": 314000 - }, - { - "epoch": 0.022, - "eval_loss": 0.40542072057724, - "eval_runtime": 152.7304, - "eval_samples_per_second": 101.113, - "eval_steps_per_second": 0.792, - "step": 314000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.8403120040893555, - "loss_rtd": 0.21521833539009094, - "loss_sent": 0.2516106367111206, - "loss_sod": 0.13746392726898193, - "loss_total": 0.6042928695678711, - "step": 314099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.720678329467773, - "loss_rtd": 0.21709758043289185, - "loss_sent": 0.4640616476535797, - "loss_sod": 0.006305079907178879, - "loss_total": 0.6874642968177795, - "step": 314099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.88277006149292, - "learning_rate": 7.248398849661392e-06, - "loss": 0.4231, - "step": 314100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.740084648132324, - "loss_rtd": 0.1869879513978958, - "loss_sent": 0.24680136144161224, - "loss_sod": 0.047100603580474854, - "loss_total": 0.4808899164199829, - "step": 314199 - }, - { - "epoch": 0.022398, - "loss_gen": 6.137388229370117, - "loss_rtd": 0.2184019237756729, - "loss_sent": 0.5415347814559937, - "loss_sod": 0.05859958007931709, - "loss_total": 0.8185362815856934, - "step": 314199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.4765161275863647, - "learning_rate": 7.2319513796892615e-06, - "loss": 0.4046, - "step": 314200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.72477388381958, - "loss_rtd": 0.22151228785514832, - "loss_sent": 0.10201455652713776, - "loss_sod": 0.08975923806428909, - "loss_total": 0.41328608989715576, - "step": 314299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.935103416442871, - "loss_rtd": 0.2225561887025833, - "loss_sent": 0.24215862154960632, - "loss_sod": 0.0822596549987793, - "loss_total": 0.5469744205474854, - "step": 314299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.2356349229812622, - "learning_rate": 7.215521136681996e-06, - "loss": 0.402, - "step": 314300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.923129081726074, - "loss_rtd": 0.2045292854309082, - "loss_sent": 0.35342252254486084, - "loss_sod": 0.03676004707813263, - "loss_total": 0.5947118401527405, - "step": 314399 - }, - { - "epoch": 0.022798, - "loss_gen": 6.300541877746582, - "loss_rtd": 0.22245635092258453, - "loss_sent": 0.11081698536872864, - "loss_sod": 0.10975705832242966, - "loss_total": 0.44303038716316223, - "step": 314399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.436285376548767, - "learning_rate": 7.199108127257692e-06, - "loss": 0.4262, - "step": 314400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.1642913818359375, - "loss_rtd": 0.18441419303417206, - "loss_sent": 2.651439353940077e-05, - "loss_sod": 0.1234402135014534, - "loss_total": 0.3078809380531311, - "step": 314499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.329806804656982, - "loss_rtd": 0.15288867056369781, - "loss_sent": 0.17453642189502716, - "loss_sod": 0.002712737303227186, - "loss_total": 0.33013784885406494, - "step": 314499 - }, - { - "epoch": 0.023, - "grad_norm": 1.0421608686447144, - "learning_rate": 7.182712358027521e-06, - "loss": 0.4233, - "step": 314500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.7112908363342285, - "loss_rtd": 0.20314361155033112, - "loss_sent": 0.11444418877363205, - "loss_sod": 0.07358378916978836, - "loss_total": 0.3911716043949127, - "step": 314599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.245554447174072, - "loss_rtd": 0.1752987504005432, - "loss_sent": 4.759164949064143e-05, - "loss_sod": 0.06419199705123901, - "loss_total": 0.23953834176063538, - "step": 314599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.0081534385681152, - "learning_rate": 7.166333835595707e-06, - "loss": 0.4377, - "step": 314600 - }, - { - "epoch": 0.023398, - "loss_gen": 6.1736836433410645, - "loss_rtd": 0.2087811827659607, - "loss_sent": 0.13610079884529114, - "loss_sod": 0.03175272047519684, - "loss_total": 0.37663471698760986, - "step": 314699 - }, - { - "epoch": 0.023398, - "loss_gen": 6.245881080627441, - "loss_rtd": 0.20825320482254028, - "loss_sent": 0.11125075817108154, - "loss_sod": 0.05730395019054413, - "loss_total": 0.37680792808532715, - "step": 314699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.6630667448043823, - "learning_rate": 7.149972566559482e-06, - "loss": 0.4169, - "step": 314700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.723850727081299, - "loss_rtd": 0.21545104682445526, - "loss_sent": 0.05776102840900421, - "loss_sod": 0.01691570319235325, - "loss_total": 0.29012778401374817, - "step": 314799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.832726001739502, - "loss_rtd": 0.20727503299713135, - "loss_sent": 0.10226765275001526, - "loss_sod": 0.023137206211686134, - "loss_total": 0.3326798975467682, - "step": 314799 - }, - { - "epoch": 0.0236, - "grad_norm": 0.6317310333251953, - "learning_rate": 7.133628557509187e-06, - "loss": 0.4218, - "step": 314800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.593243598937988, - "loss_rtd": 0.17638765275478363, - "loss_sent": 0.010891798883676529, - "loss_sod": 0.05664808303117752, - "loss_total": 0.24392753839492798, - "step": 314899 - }, - { - "epoch": 0.023798, - "loss_gen": 6.372849464416504, - "loss_rtd": 0.2191629558801651, - "loss_sent": 0.10814128071069717, - "loss_sod": 0.0372735895216465, - "loss_total": 0.3645778298377991, - "step": 314899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.8626344799995422, - "learning_rate": 7.117301815028182e-06, - "loss": 0.4228, - "step": 314900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.699850082397461, - "loss_rtd": 0.21511022746562958, - "loss_sent": 0.09173356741666794, - "loss_sod": 0.0150165855884552, - "loss_total": 0.3218603730201721, - "step": 314999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.611486911773682, - "loss_rtd": 0.21274064481258392, - "loss_sent": 0.286617249250412, - "loss_sod": 0.01891474798321724, - "loss_total": 0.5182726383209229, - "step": 314999 - }, - { - "epoch": 0.024, - "grad_norm": 0.7114839553833008, - "learning_rate": 7.1009923456928915e-06, - "loss": 0.4359, - "step": 315000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4024796187877655, - "eval_runtime": 151.1801, - "eval_samples_per_second": 102.15, - "eval_steps_per_second": 0.8, - "step": 315000 - }, - { - "epoch": 0.024198, - "loss_gen": 6.079152584075928, - "loss_rtd": 0.22925634682178497, - "loss_sent": 0.23604364693164825, - "loss_sod": 0.01550333108752966, - "loss_total": 0.48080331087112427, - "step": 315099 - }, - { - "epoch": 0.024198, - "loss_gen": 6.009952545166016, - "loss_rtd": 0.21401935815811157, - "loss_sent": 0.11091934144496918, - "loss_sod": 0.15941095352172852, - "loss_total": 0.48434966802597046, - "step": 315099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.0999048948287964, - "learning_rate": 7.0847001560727375e-06, - "loss": 0.4386, - "step": 315100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.308565616607666, - "loss_rtd": 0.17562098801136017, - "loss_sent": 0.058068182319402695, - "loss_sod": 0.04143832251429558, - "loss_total": 0.27512750029563904, - "step": 315199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.552271366119385, - "loss_rtd": 0.21226602792739868, - "loss_sent": 0.08181682974100113, - "loss_sod": 0.0055959089659154415, - "loss_total": 0.299678772687912, - "step": 315199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.7959131002426147, - "learning_rate": 7.068425252730232e-06, - "loss": 0.4408, - "step": 315200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.913577556610107, - "loss_rtd": 0.20919254422187805, - "loss_sent": 0.2013363540172577, - "loss_sod": 0.10238775610923767, - "loss_total": 0.5129166841506958, - "step": 315299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.806041240692139, - "loss_rtd": 0.21975742280483246, - "loss_sent": 0.16951124370098114, - "loss_sod": 0.010074999183416367, - "loss_total": 0.39934366941452026, - "step": 315299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.209050178527832, - "learning_rate": 7.052167642220903e-06, - "loss": 0.4294, - "step": 315300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.845470428466797, - "loss_rtd": 0.20528078079223633, - "loss_sent": 0.4749945104122162, - "loss_sod": 0.012741737067699432, - "loss_total": 0.6930170059204102, - "step": 315399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.882688045501709, - "loss_rtd": 0.24752117693424225, - "loss_sent": 0.3105274438858032, - "loss_sod": 0.018023159354925156, - "loss_total": 0.5760717391967773, - "step": 315399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.6325089931488037, - "learning_rate": 7.035927331093317e-06, - "loss": 0.4296, - "step": 315400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.894001483917236, - "loss_rtd": 0.23202523589134216, - "loss_sent": 0.22358691692352295, - "loss_sod": 0.04244053363800049, - "loss_total": 0.4980526864528656, - "step": 315499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.756228923797607, - "loss_rtd": 0.21856294572353363, - "loss_sent": 0.25014421343803406, - "loss_sod": 0.058342427015304565, - "loss_total": 0.5270495414733887, - "step": 315499 - }, - { - "epoch": 0.025, - "grad_norm": 1.3457165956497192, - "learning_rate": 7.0197043258890596e-06, - "loss": 0.421, - "step": 315500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.60030460357666, - "loss_rtd": 0.18207070231437683, - "loss_sent": 0.0326513908803463, - "loss_sod": 0.05149613320827484, - "loss_total": 0.2662182152271271, - "step": 315599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.878728866577148, - "loss_rtd": 0.2024921476840973, - "loss_sent": 0.13212017714977264, - "loss_sod": 0.014641729183495045, - "loss_total": 0.3492540717124939, - "step": 315599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.8276498317718506, - "learning_rate": 7.003498633142752e-06, - "loss": 0.4289, - "step": 315600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.8293938636779785, - "loss_rtd": 0.22322751581668854, - "loss_sent": 0.25092437863349915, - "loss_sod": 0.009997747838497162, - "loss_total": 0.48414963483810425, - "step": 315699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.895769119262695, - "loss_rtd": 0.21014940738677979, - "loss_sent": 0.07953742891550064, - "loss_sod": 0.048508595675230026, - "loss_total": 0.33819541335105896, - "step": 315699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.4700539112091064, - "learning_rate": 6.987310259382057e-06, - "loss": 0.4307, - "step": 315700 - }, - { - "epoch": 0.025598, - "loss_gen": 6.073229789733887, - "loss_rtd": 0.2322186529636383, - "loss_sent": 0.22029481828212738, - "loss_sod": 0.025288663804531097, - "loss_total": 0.4778021574020386, - "step": 315799 - }, - { - "epoch": 0.025598, - "loss_gen": 6.111647129058838, - "loss_rtd": 0.21665161848068237, - "loss_sent": 0.2351970076560974, - "loss_sod": 0.08440050482749939, - "loss_total": 0.5362491607666016, - "step": 315799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.3426125049591064, - "learning_rate": 6.971139211127659e-06, - "loss": 0.4119, - "step": 315800 - }, - { - "epoch": 0.025798, - "loss_gen": 6.150747776031494, - "loss_rtd": 0.2225160300731659, - "loss_sent": 0.22520409524440765, - "loss_sod": 0.02255828306078911, - "loss_total": 0.47027841210365295, - "step": 315899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.702725887298584, - "loss_rtd": 0.19524094462394714, - "loss_sent": 0.3580003082752228, - "loss_sod": 0.054254043847322464, - "loss_total": 0.6074953079223633, - "step": 315899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.545710563659668, - "learning_rate": 6.954985494893229e-06, - "loss": 0.4348, - "step": 315900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.883828163146973, - "loss_rtd": 0.2226816862821579, - "loss_sent": 0.10587580502033234, - "loss_sod": 0.01885738968849182, - "loss_total": 0.34741488099098206, - "step": 315999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.713308334350586, - "loss_rtd": 0.19941580295562744, - "loss_sent": 0.03474288806319237, - "loss_sod": 0.08108684420585632, - "loss_total": 0.31524553894996643, - "step": 315999 - }, - { - "epoch": 0.026, - "grad_norm": 0.74559485912323, - "learning_rate": 6.938849117185492e-06, - "loss": 0.4246, - "step": 316000 - }, - { - "epoch": 0.026, - "eval_loss": 0.40634486079216003, - "eval_runtime": 151.2059, - "eval_samples_per_second": 102.132, - "eval_steps_per_second": 0.8, - "step": 316000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.952905178070068, - "loss_rtd": 0.232254758477211, - "loss_sent": 0.07545031607151031, - "loss_sod": 0.003857793053612113, - "loss_total": 0.3115628659725189, - "step": 316099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.720460891723633, - "loss_rtd": 0.20521163940429688, - "loss_sent": 0.2356717437505722, - "loss_sod": 0.0021521239541471004, - "loss_total": 0.4430355131626129, - "step": 316099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.8450875282287598, - "learning_rate": 6.922730084504192e-06, - "loss": 0.4247, - "step": 316100 - }, - { - "epoch": 0.026398, - "loss_gen": 6.488436222076416, - "loss_rtd": 0.20710326731204987, - "loss_sent": 0.020241430029273033, - "loss_sod": 0.11870314925909042, - "loss_total": 0.34604784846305847, - "step": 316199 - }, - { - "epoch": 0.026398, - "loss_gen": 6.002236843109131, - "loss_rtd": 0.23638859391212463, - "loss_sent": 0.09361567348241806, - "loss_sod": 0.05375950038433075, - "loss_total": 0.38376376032829285, - "step": 316199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.783872127532959, - "learning_rate": 6.906628403342052e-06, - "loss": 0.4241, - "step": 316200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.753622055053711, - "loss_rtd": 0.17708782851696014, - "loss_sent": 0.1306205838918686, - "loss_sod": 0.025192318484187126, - "loss_total": 0.332900732755661, - "step": 316299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.759315490722656, - "loss_rtd": 0.20716163516044617, - "loss_sent": 0.34905558824539185, - "loss_sod": 0.008463521488010883, - "loss_total": 0.5646807551383972, - "step": 316299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.2977489233016968, - "learning_rate": 6.8905440801848405e-06, - "loss": 0.4236, - "step": 316300 - }, - { - "epoch": 0.026798, - "loss_gen": 6.12885856628418, - "loss_rtd": 0.21159934997558594, - "loss_sent": 0.322174996137619, - "loss_sod": 0.023119572550058365, - "loss_total": 0.5568939447402954, - "step": 316399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.945991039276123, - "loss_rtd": 0.20298978686332703, - "loss_sent": 0.1152675673365593, - "loss_sod": 0.0343075655400753, - "loss_total": 0.3525649309158325, - "step": 316399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.7906752824783325, - "learning_rate": 6.8744771215113135e-06, - "loss": 0.4062, - "step": 316400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.150700092315674, - "loss_rtd": 0.16034561395645142, - "loss_sent": 0.038422808051109314, - "loss_sod": 0.05374298244714737, - "loss_total": 0.2525113821029663, - "step": 316499 - }, - { - "epoch": 0.026998, - "loss_gen": 6.009461402893066, - "loss_rtd": 0.21647918224334717, - "loss_sent": 0.1017431765794754, - "loss_sod": 0.01856192946434021, - "loss_total": 0.336784303188324, - "step": 316499 - }, - { - "epoch": 0.027, - "grad_norm": 0.8859041929244995, - "learning_rate": 6.85842753379326e-06, - "loss": 0.4262, - "step": 316500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.820038318634033, - "loss_rtd": 0.2261316031217575, - "loss_sent": 0.29477351903915405, - "loss_sod": 0.03190707787871361, - "loss_total": 0.5528122186660767, - "step": 316599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.3127641677856445, - "loss_rtd": 0.17756026983261108, - "loss_sent": 0.002590323332697153, - "loss_sod": 0.05262359604239464, - "loss_total": 0.23277419805526733, - "step": 316599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.209650993347168, - "learning_rate": 6.842395323495426e-06, - "loss": 0.4207, - "step": 316600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.927210807800293, - "loss_rtd": 0.21501344442367554, - "loss_sent": 0.24600785970687866, - "loss_sod": 0.011339105665683746, - "loss_total": 0.47236040234565735, - "step": 316699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.2444844245910645, - "loss_rtd": 0.16733349859714508, - "loss_sent": 0.005530119873583317, - "loss_sod": 0.12684503197669983, - "loss_total": 0.29970866441726685, - "step": 316699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.082572340965271, - "learning_rate": 6.82638049707559e-06, - "loss": 0.4273, - "step": 316700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.0740885734558105, - "loss_rtd": 0.2189008593559265, - "loss_sent": 0.14137573540210724, - "loss_sod": 0.06856880336999893, - "loss_total": 0.4288454055786133, - "step": 316799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.498562812805176, - "loss_rtd": 0.18266771733760834, - "loss_sent": 0.053660523146390915, - "loss_sod": 0.1443597674369812, - "loss_total": 0.38068801164627075, - "step": 316799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.530004620552063, - "learning_rate": 6.810383060984527e-06, - "loss": 0.4201, - "step": 316800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.307231903076172, - "loss_rtd": 0.17917531728744507, - "loss_sent": 2.47952248173533e-05, - "loss_sod": 0.04512486606836319, - "loss_total": 0.22432497143745422, - "step": 316899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.204262733459473, - "loss_rtd": 0.16439220309257507, - "loss_sent": 0.0013377791037783027, - "loss_sod": 0.09129650890827179, - "loss_total": 0.2570264935493469, - "step": 316899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.849273681640625, - "learning_rate": 6.794403021666018e-06, - "loss": 0.4177, - "step": 316900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.209410667419434, - "loss_rtd": 0.17891815304756165, - "loss_sent": 2.555143873905763e-05, - "loss_sod": 0.026099855080246925, - "loss_total": 0.20504356920719147, - "step": 316999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.612294673919678, - "loss_rtd": 0.19472810626029968, - "loss_sent": 0.295123428106308, - "loss_sod": 0.029760317876935005, - "loss_total": 0.5196118354797363, - "step": 316999 - }, - { - "epoch": 0.028, - "grad_norm": 1.2646737098693848, - "learning_rate": 6.778440385556789e-06, - "loss": 0.4153, - "step": 317000 - }, - { - "epoch": 0.028, - "eval_loss": 0.40046969056129456, - "eval_runtime": 151.353, - "eval_samples_per_second": 102.033, - "eval_steps_per_second": 0.799, - "step": 317000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.807673931121826, - "loss_rtd": 0.1759045273065567, - "loss_sent": 0.08461452275514603, - "loss_sod": 0.018785716965794563, - "loss_total": 0.27930477261543274, - "step": 317099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.419627666473389, - "loss_rtd": 0.1807471215724945, - "loss_sent": 0.03169324994087219, - "loss_sod": 0.060750093311071396, - "loss_total": 0.2731904685497284, - "step": 317099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.9224417209625244, - "learning_rate": 6.762495159086607e-06, - "loss": 0.4238, - "step": 317100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.9013824462890625, - "loss_rtd": 0.21823909878730774, - "loss_sent": 0.3828633725643158, - "loss_sod": 0.01689605787396431, - "loss_total": 0.6179985404014587, - "step": 317199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.776583194732666, - "loss_rtd": 0.22855596244335175, - "loss_sent": 0.18834030628204346, - "loss_sod": 0.00982157327234745, - "loss_total": 0.4267178475856781, - "step": 317199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.315647840499878, - "learning_rate": 6.746567348678201e-06, - "loss": 0.4299, - "step": 317200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.961749076843262, - "loss_rtd": 0.19732806086540222, - "loss_sent": 0.3418771028518677, - "loss_sod": 0.10058655589818954, - "loss_total": 0.63979172706604, - "step": 317299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.753267288208008, - "loss_rtd": 0.18467769026756287, - "loss_sent": 0.042127400636672974, - "loss_sod": 0.04599412903189659, - "loss_total": 0.27279922366142273, - "step": 317299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.9673850536346436, - "learning_rate": 6.730656960747311e-06, - "loss": 0.4252, - "step": 317300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.565941333770752, - "loss_rtd": 0.1751120239496231, - "loss_sent": 0.015778986737132072, - "loss_sod": 0.1042807325720787, - "loss_total": 0.29517173767089844, - "step": 317399 - }, - { - "epoch": 0.028798, - "loss_gen": 6.044151306152344, - "loss_rtd": 0.20001552999019623, - "loss_sent": 0.08831530064344406, - "loss_sod": 0.09159000962972641, - "loss_total": 0.3799208402633667, - "step": 317399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.2267037630081177, - "learning_rate": 6.714764001702606e-06, - "loss": 0.411, - "step": 317400 - }, - { - "epoch": 0.028998, - "loss_gen": 6.121268272399902, - "loss_rtd": 0.22611607611179352, - "loss_sent": 0.4467504322528839, - "loss_sod": 0.05430105701088905, - "loss_total": 0.7271676063537598, - "step": 317499 - }, - { - "epoch": 0.028998, - "loss_gen": 6.016317367553711, - "loss_rtd": 0.21046489477157593, - "loss_sent": 0.27657005190849304, - "loss_sod": 0.04403619468212128, - "loss_total": 0.5310711860656738, - "step": 317499 - }, - { - "epoch": 0.029, - "grad_norm": 0.9841665029525757, - "learning_rate": 6.698888477945786e-06, - "loss": 0.4266, - "step": 317500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.926337242126465, - "loss_rtd": 0.19186662137508392, - "loss_sent": 0.07662045955657959, - "loss_sod": 0.12023736536502838, - "loss_total": 0.3887244462966919, - "step": 317599 - }, - { - "epoch": 0.029198, - "loss_gen": 6.068818092346191, - "loss_rtd": 0.19683921337127686, - "loss_sent": 0.2143409103155136, - "loss_sod": 0.05517958477139473, - "loss_total": 0.4663597345352173, - "step": 317599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.3692232370376587, - "learning_rate": 6.683030395871525e-06, - "loss": 0.4206, - "step": 317600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.710707664489746, - "loss_rtd": 0.21629835665225983, - "loss_sent": 0.09587308019399643, - "loss_sod": 0.007617895491421223, - "loss_total": 0.319789320230484, - "step": 317699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.954129219055176, - "loss_rtd": 0.22051052749156952, - "loss_sent": 0.08118244260549545, - "loss_sod": 0.03730803355574608, - "loss_total": 0.33900099992752075, - "step": 317699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.6702138781547546, - "learning_rate": 6.667189761867426e-06, - "loss": 0.4137, - "step": 317700 - }, - { - "epoch": 0.029598, - "loss_gen": 6.153414726257324, - "loss_rtd": 0.21158844232559204, - "loss_sent": 0.2731311023235321, - "loss_sod": 0.054120589047670364, - "loss_total": 0.5388401746749878, - "step": 317799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.767001152038574, - "loss_rtd": 0.21963752806186676, - "loss_sent": 0.056638315320014954, - "loss_sod": 0.060463808476924896, - "loss_total": 0.3367396593093872, - "step": 317799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.2742230892181396, - "learning_rate": 6.651366582314106e-06, - "loss": 0.4201, - "step": 317800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.668087959289551, - "loss_rtd": 0.19701330363750458, - "loss_sent": 0.3478797674179077, - "loss_sod": 0.044805239886045456, - "loss_total": 0.589698314666748, - "step": 317899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.5290913581848145, - "loss_rtd": 0.20819711685180664, - "loss_sent": 0.05382475256919861, - "loss_sod": 0.0117452098056674, - "loss_total": 0.2737670838832855, - "step": 317899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.291848063468933, - "learning_rate": 6.635560863585144e-06, - "loss": 0.4284, - "step": 317900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.629080772399902, - "loss_rtd": 0.22029152512550354, - "loss_sent": 0.38260847330093384, - "loss_sod": 0.006366434041410685, - "loss_total": 0.6092664003372192, - "step": 317999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.788712501525879, - "loss_rtd": 0.19587451219558716, - "loss_sent": 0.021938376128673553, - "loss_sod": 0.1297975331544876, - "loss_total": 0.3476104140281677, - "step": 317999 - }, - { - "epoch": 0.03, - "grad_norm": 2.7727575302124023, - "learning_rate": 6.619772612047092e-06, - "loss": 0.4329, - "step": 318000 - }, - { - "epoch": 0.03, - "eval_loss": 0.4041973650455475, - "eval_runtime": 151.401, - "eval_samples_per_second": 102.001, - "eval_steps_per_second": 0.799, - "step": 318000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.844808101654053, - "loss_rtd": 0.23298917710781097, - "loss_sent": 0.13160644471645355, - "loss_sod": 0.06651133298873901, - "loss_total": 0.4311069846153259, - "step": 318099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.991744041442871, - "loss_rtd": 0.20404934883117676, - "loss_sent": 0.16844616830348969, - "loss_sod": 0.07261691242456436, - "loss_total": 0.445112407207489, - "step": 318099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.9993682503700256, - "learning_rate": 6.6040018340594315e-06, - "loss": 0.4178, - "step": 318100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.731882572174072, - "loss_rtd": 0.19733524322509766, - "loss_sent": 0.14549008011817932, - "loss_sod": 0.08695275336503983, - "loss_total": 0.4297780692577362, - "step": 318199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.993220329284668, - "loss_rtd": 0.20111925899982452, - "loss_sent": 0.29472586512565613, - "loss_sod": 0.016072260215878487, - "loss_total": 0.5119173526763916, - "step": 318199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.168389081954956, - "learning_rate": 6.588248535974645e-06, - "loss": 0.4434, - "step": 318200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.904257774353027, - "loss_rtd": 0.22033774852752686, - "loss_sent": 0.5427066683769226, - "loss_sod": 0.06353654712438583, - "loss_total": 0.8265810012817383, - "step": 318299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.867843151092529, - "loss_rtd": 0.18177077174186707, - "loss_sent": 0.05589713528752327, - "loss_sod": 0.11904123425483704, - "loss_total": 0.35670915246009827, - "step": 318299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.657719612121582, - "learning_rate": 6.572512724138158e-06, - "loss": 0.4178, - "step": 318300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.974632740020752, - "loss_rtd": 0.23202939331531525, - "loss_sent": 0.09834026545286179, - "loss_sod": 0.07482287287712097, - "loss_total": 0.4051925539970398, - "step": 318399 - }, - { - "epoch": 0.030798, - "loss_gen": 6.390172481536865, - "loss_rtd": 0.2133445143699646, - "loss_sent": 0.3928605914115906, - "loss_sod": 0.06184985488653183, - "loss_total": 0.6680549383163452, - "step": 318399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.7911866903305054, - "learning_rate": 6.5567944048883615e-06, - "loss": 0.4304, - "step": 318400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.871227264404297, - "loss_rtd": 0.2086278647184372, - "loss_sent": 0.3483510911464691, - "loss_sod": 0.011658594943583012, - "loss_total": 0.5686375498771667, - "step": 318499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.808342933654785, - "loss_rtd": 0.20525531470775604, - "loss_sent": 0.16486814618110657, - "loss_sod": 0.04720059782266617, - "loss_total": 0.4173240661621094, - "step": 318499 - }, - { - "epoch": 0.031, - "grad_norm": 1.1544314622879028, - "learning_rate": 6.541093584556574e-06, - "loss": 0.4256, - "step": 318500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.843674659729004, - "loss_rtd": 0.227028489112854, - "loss_sent": 0.3395182490348816, - "loss_sod": 0.03323065862059593, - "loss_total": 0.5997774004936218, - "step": 318599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.810780048370361, - "loss_rtd": 0.2051435261964798, - "loss_sent": 0.17964157462120056, - "loss_sod": 0.039741553366184235, - "loss_total": 0.4245266616344452, - "step": 318599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.5714060068130493, - "learning_rate": 6.525410269467091e-06, - "loss": 0.4142, - "step": 318600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.445639610290527, - "loss_rtd": 0.186831995844841, - "loss_sent": 0.0022034335415810347, - "loss_sod": 0.047516778111457825, - "loss_total": 0.23655220866203308, - "step": 318699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.501916885375977, - "loss_rtd": 0.18114088475704193, - "loss_sent": 0.12337516248226166, - "loss_sod": 0.0120387626811862, - "loss_total": 0.31655481457710266, - "step": 318699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.711101233959198, - "learning_rate": 6.509744465937151e-06, - "loss": 0.4182, - "step": 318700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.846083641052246, - "loss_rtd": 0.21401764452457428, - "loss_sent": 0.2561408281326294, - "loss_sod": 0.03093666210770607, - "loss_total": 0.5010951161384583, - "step": 318799 - }, - { - "epoch": 0.031598, - "loss_gen": 6.083571434020996, - "loss_rtd": 0.22676680982112885, - "loss_sent": 0.37605637311935425, - "loss_sod": 0.10717137902975082, - "loss_total": 0.7099945545196533, - "step": 318799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.3865742683410645, - "learning_rate": 6.494096180276954e-06, - "loss": 0.42, - "step": 318800 - }, - { - "epoch": 0.031798, - "loss_gen": 6.157656669616699, - "loss_rtd": 0.21140675246715546, - "loss_sent": 0.152816504240036, - "loss_sod": 0.07600642740726471, - "loss_total": 0.4402296841144562, - "step": 318899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.833266258239746, - "loss_rtd": 0.20781660079956055, - "loss_sent": 0.17311154305934906, - "loss_sod": 0.03738585114479065, - "loss_total": 0.41831398010253906, - "step": 318899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.2343511581420898, - "learning_rate": 6.478465418789598e-06, - "loss": 0.4118, - "step": 318900 - }, - { - "epoch": 0.031998, - "loss_gen": 6.131964206695557, - "loss_rtd": 0.2100180983543396, - "loss_sent": 0.2400071918964386, - "loss_sod": 0.04273661971092224, - "loss_total": 0.49276190996170044, - "step": 318999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.553122520446777, - "loss_rtd": 0.19109469652175903, - "loss_sent": 0.017574027180671692, - "loss_sod": 0.10851524770259857, - "loss_total": 0.3171839714050293, - "step": 318999 - }, - { - "epoch": 0.032, - "grad_norm": 0.9409855008125305, - "learning_rate": 6.4628521877711675e-06, - "loss": 0.4235, - "step": 319000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4110987186431885, - "eval_runtime": 151.2179, - "eval_samples_per_second": 102.124, - "eval_steps_per_second": 0.8, - "step": 319000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.976017475128174, - "loss_rtd": 0.19671468436717987, - "loss_sent": 0.5756129622459412, - "loss_sod": 0.05595851689577103, - "loss_total": 0.8282861709594727, - "step": 319099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.68550443649292, - "loss_rtd": 0.1934850513935089, - "loss_sent": 0.25003373622894287, - "loss_sod": 0.005886332131922245, - "loss_total": 0.44940513372421265, - "step": 319099 - }, - { - "epoch": 0.0002, - "grad_norm": 2.3204917907714844, - "learning_rate": 6.4472564935106835e-06, - "loss": 0.4239, - "step": 319100 - }, - { - "epoch": 0.000398, - "loss_gen": 6.178202152252197, - "loss_rtd": 0.23375770449638367, - "loss_sent": 0.39146536588668823, - "loss_sod": 0.03721356764435768, - "loss_total": 0.6624366044998169, - "step": 319199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.941952705383301, - "loss_rtd": 0.21831358969211578, - "loss_sent": 0.3978358209133148, - "loss_sod": 0.050270840525627136, - "loss_total": 0.6664202213287354, - "step": 319199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.753470778465271, - "learning_rate": 6.431678342290065e-06, - "loss": 0.4417, - "step": 319200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.796645641326904, - "loss_rtd": 0.21562138199806213, - "loss_sent": 0.1595989614725113, - "loss_sod": 0.04095842316746712, - "loss_total": 0.41617876291275024, - "step": 319299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.814482688903809, - "loss_rtd": 0.17968492209911346, - "loss_sent": 0.20665135979652405, - "loss_sod": 0.024276655167341232, - "loss_total": 0.41061294078826904, - "step": 319299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.8900070190429688, - "learning_rate": 6.416117740384198e-06, - "loss": 0.4323, - "step": 319300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.882384777069092, - "loss_rtd": 0.20576916635036469, - "loss_sent": 0.4200195074081421, - "loss_sod": 0.025177771225571632, - "loss_total": 0.650966465473175, - "step": 319399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.954499244689941, - "loss_rtd": 0.195393368601799, - "loss_sent": 0.045674994587898254, - "loss_sod": 0.035737112164497375, - "loss_total": 0.27680546045303345, - "step": 319399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.8523480892181396, - "learning_rate": 6.40057469406089e-06, - "loss": 0.4033, - "step": 319400 - }, - { - "epoch": 0.000998, - "loss_gen": 6.113741874694824, - "loss_rtd": 0.2027757465839386, - "loss_sent": 0.7217386364936829, - "loss_sod": 0.02413523755967617, - "loss_total": 0.9486496448516846, - "step": 319499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.816281795501709, - "loss_rtd": 0.21378113329410553, - "loss_sent": 0.09893307834863663, - "loss_sod": 0.006139421835541725, - "loss_total": 0.3188536465167999, - "step": 319499 - }, - { - "epoch": 0.001, - "grad_norm": 3.013335704803467, - "learning_rate": 6.385049209580896e-06, - "loss": 0.4202, - "step": 319500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.436474800109863, - "loss_rtd": 0.148299902677536, - "loss_sent": 0.00681539298966527, - "loss_sod": 0.07231418043375015, - "loss_total": 0.22742946445941925, - "step": 319599 - }, - { - "epoch": 0.001198, - "loss_gen": 6.113706111907959, - "loss_rtd": 0.22431142628192902, - "loss_sent": 0.1506250947713852, - "loss_sod": 0.011137381196022034, - "loss_total": 0.38607388734817505, - "step": 319599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.9200167059898376, - "learning_rate": 6.36954129319785e-06, - "loss": 0.4085, - "step": 319600 - }, - { - "epoch": 0.001398, - "loss_gen": 6.074091911315918, - "loss_rtd": 0.21150025725364685, - "loss_sent": 0.22251741588115692, - "loss_sod": 0.03574107587337494, - "loss_total": 0.4697587490081787, - "step": 319699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.260921001434326, - "loss_rtd": 0.20089313387870789, - "loss_sent": 0.00206866767257452, - "loss_sod": 0.16894492506980896, - "loss_total": 0.37190672755241394, - "step": 319699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.1115680932998657, - "learning_rate": 6.354050951158358e-06, - "loss": 0.4054, - "step": 319700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.841836452484131, - "loss_rtd": 0.21303756535053253, - "loss_sent": 0.1961507946252823, - "loss_sod": 0.012042567133903503, - "loss_total": 0.42123091220855713, - "step": 319799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.830868721008301, - "loss_rtd": 0.21649155020713806, - "loss_sent": 0.34402939677238464, - "loss_sod": 0.04911269620060921, - "loss_total": 0.6096336841583252, - "step": 319799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.6048423051834106, - "learning_rate": 6.338578189701921e-06, - "loss": 0.4472, - "step": 319800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.899473667144775, - "loss_rtd": 0.22005142271518707, - "loss_sent": 0.27338936924934387, - "loss_sod": 0.006652672775089741, - "loss_total": 0.5000934600830078, - "step": 319899 - }, - { - "epoch": 0.001798, - "loss_gen": 6.022148132324219, - "loss_rtd": 0.2090901881456375, - "loss_sent": 0.13876378536224365, - "loss_sod": 0.08535449951887131, - "loss_total": 0.4332084655761719, - "step": 319899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.0410821437835693, - "learning_rate": 6.323123015060978e-06, - "loss": 0.4223, - "step": 319900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.855429649353027, - "loss_rtd": 0.21403956413269043, - "loss_sent": 0.27373263239860535, - "loss_sod": 0.007027428597211838, - "loss_total": 0.4947996139526367, - "step": 319999 - }, - { - "epoch": 0.001998, - "loss_gen": 6.12824010848999, - "loss_rtd": 0.21014724671840668, - "loss_sent": 0.16254407167434692, - "loss_sod": 0.05450683832168579, - "loss_total": 0.4271981716156006, - "step": 319999 - }, - { - "epoch": 0.002, - "grad_norm": 0.716411828994751, - "learning_rate": 6.307685433460853e-06, - "loss": 0.444, - "step": 320000 - }, - { - "epoch": 0.002, - "eval_loss": 0.4017051160335541, - "eval_runtime": 154.2258, - "eval_samples_per_second": 100.132, - "eval_steps_per_second": 0.785, - "step": 320000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.743134021759033, - "loss_rtd": 0.2047204226255417, - "loss_sent": 0.19227875769138336, - "loss_sod": 0.0972495898604393, - "loss_total": 0.49424874782562256, - "step": 320099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.994846820831299, - "loss_rtd": 0.21796832978725433, - "loss_sent": 0.6728981137275696, - "loss_sod": 0.03634445369243622, - "loss_total": 0.9272109270095825, - "step": 320099 - }, - { - "epoch": 0.0022, - "grad_norm": 2.0933148860931396, - "learning_rate": 6.292265451119805e-06, - "loss": 0.4335, - "step": 320100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.971792697906494, - "loss_rtd": 0.21235860884189606, - "loss_sent": 0.22334183752536774, - "loss_sod": 0.04061507433652878, - "loss_total": 0.4763154983520508, - "step": 320199 - }, - { - "epoch": 0.002398, - "loss_gen": 6.040041923522949, - "loss_rtd": 0.2328938990831375, - "loss_sent": 0.44883209466934204, - "loss_sod": 0.09541422128677368, - "loss_total": 0.777140200138092, - "step": 320199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.631795883178711, - "learning_rate": 6.276863074249012e-06, - "loss": 0.4122, - "step": 320200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.901726722717285, - "loss_rtd": 0.22118978202342987, - "loss_sent": 0.10289447009563446, - "loss_sod": 0.03695704787969589, - "loss_total": 0.3610413074493408, - "step": 320299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.704709529876709, - "loss_rtd": 0.2171352654695511, - "loss_sent": 0.07313850522041321, - "loss_sod": 0.00678838649764657, - "loss_total": 0.2970621585845947, - "step": 320299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.0979467630386353, - "learning_rate": 6.261478309052554e-06, - "loss": 0.4334, - "step": 320300 - }, - { - "epoch": 0.002798, - "loss_gen": 6.131543159484863, - "loss_rtd": 0.21549084782600403, - "loss_sent": 0.2658088207244873, - "loss_sod": 0.013012303039431572, - "loss_total": 0.49431198835372925, - "step": 320399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.947504043579102, - "loss_rtd": 0.2135101556777954, - "loss_sent": 0.24038216471672058, - "loss_sod": 0.03134508430957794, - "loss_total": 0.4852374196052551, - "step": 320399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.5543198585510254, - "learning_rate": 6.246111161727392e-06, - "loss": 0.4344, - "step": 320400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.960519313812256, - "loss_rtd": 0.2095993161201477, - "loss_sent": 0.17482279241085052, - "loss_sod": 0.10335630178451538, - "loss_total": 0.4877784252166748, - "step": 320499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.681966781616211, - "loss_rtd": 0.18359705805778503, - "loss_sent": 0.19377291202545166, - "loss_sod": 0.05301167815923691, - "loss_total": 0.4303816556930542, - "step": 320499 - }, - { - "epoch": 0.003, - "grad_norm": 1.1342074871063232, - "learning_rate": 6.230761638463417e-06, - "loss": 0.4281, - "step": 320500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.191170692443848, - "loss_rtd": 0.18369318544864655, - "loss_sent": 0.012727886438369751, - "loss_sod": 0.052066899836063385, - "loss_total": 0.24848797917366028, - "step": 320599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.531480312347412, - "loss_rtd": 0.1919640451669693, - "loss_sent": 0.10939568281173706, - "loss_sod": 0.07849612832069397, - "loss_total": 0.3798558712005615, - "step": 320599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.7807751297950745, - "learning_rate": 6.215429745443435e-06, - "loss": 0.4268, - "step": 320600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.721339702606201, - "loss_rtd": 0.21336989104747772, - "loss_sent": 0.21239691972732544, - "loss_sod": 0.023153558373451233, - "loss_total": 0.4489203691482544, - "step": 320699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.7912211418151855, - "loss_rtd": 0.2425805926322937, - "loss_sent": 0.06593421846628189, - "loss_sod": 0.024264683946967125, - "loss_total": 0.33277949690818787, - "step": 320699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.6599763035774231, - "learning_rate": 6.200115488843106e-06, - "loss": 0.4153, - "step": 320700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.787447929382324, - "loss_rtd": 0.20720696449279785, - "loss_sent": 0.08620306849479675, - "loss_sod": 0.039434611797332764, - "loss_total": 0.33284464478492737, - "step": 320799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.430987358093262, - "loss_rtd": 0.16499969363212585, - "loss_sent": 0.07435581833124161, - "loss_sod": 0.060972969979047775, - "loss_total": 0.30032849311828613, - "step": 320799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.8227694630622864, - "learning_rate": 6.1848188748310275e-06, - "loss": 0.4091, - "step": 320800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.704413414001465, - "loss_rtd": 0.22211644053459167, - "loss_sent": 0.2629146873950958, - "loss_sod": 0.0040412116795778275, - "loss_total": 0.489072322845459, - "step": 320899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.9420084953308105, - "loss_rtd": 0.20459522306919098, - "loss_sent": 0.1701173484325409, - "loss_sod": 0.019897334277629852, - "loss_total": 0.39460989832878113, - "step": 320899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.146616816520691, - "learning_rate": 6.169539909568655e-06, - "loss": 0.4191, - "step": 320900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.657702922821045, - "loss_rtd": 0.20114222168922424, - "loss_sent": 0.1660340279340744, - "loss_sod": 0.028680017217993736, - "loss_total": 0.39585626125335693, - "step": 320999 - }, - { - "epoch": 0.003998, - "loss_gen": 6.145058631896973, - "loss_rtd": 0.21122728288173676, - "loss_sent": 0.275448203086853, - "loss_sod": 0.007465800270438194, - "loss_total": 0.49414128065109253, - "step": 320999 - }, - { - "epoch": 0.004, - "grad_norm": 1.9879456758499146, - "learning_rate": 6.154278599210367e-06, - "loss": 0.4305, - "step": 321000 - }, - { - "epoch": 0.004, - "eval_loss": 0.4108101725578308, - "eval_runtime": 150.8562, - "eval_samples_per_second": 102.369, - "eval_steps_per_second": 0.802, - "step": 321000 - }, - { - "epoch": 0.004198, - "loss_gen": 6.024953365325928, - "loss_rtd": 0.21365736424922943, - "loss_sent": 0.21364599466323853, - "loss_sod": 0.059438835829496384, - "loss_total": 0.48674219846725464, - "step": 321099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.437925338745117, - "loss_rtd": 0.2158796191215515, - "loss_sent": 0.2654319405555725, - "loss_sod": 0.040151964873075485, - "loss_total": 0.5214635133743286, - "step": 321099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.0901381969451904, - "learning_rate": 6.139034949903427e-06, - "loss": 0.4273, - "step": 321100 - }, - { - "epoch": 0.004398, - "loss_gen": 6.391094207763672, - "loss_rtd": 0.21198046207427979, - "loss_sent": 0.0628882646560669, - "loss_sod": 0.18223553895950317, - "loss_total": 0.45710426568984985, - "step": 321199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.735109806060791, - "loss_rtd": 0.2082480639219284, - "loss_sent": 0.11475709080696106, - "loss_sod": 0.02624213509261608, - "loss_total": 0.3492472767829895, - "step": 321199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.1446348428726196, - "learning_rate": 6.123808967787953e-06, - "loss": 0.415, - "step": 321200 - }, - { - "epoch": 0.004598, - "loss_gen": 6.019451141357422, - "loss_rtd": 0.21636879444122314, - "loss_sent": 0.5439419746398926, - "loss_sod": 0.027772653847932816, - "loss_total": 0.7880834341049194, - "step": 321299 - }, - { - "epoch": 0.004598, - "loss_gen": 6.030162334442139, - "loss_rtd": 0.22694973647594452, - "loss_sent": 0.13330230116844177, - "loss_sod": 0.05584421008825302, - "loss_total": 0.4160962402820587, - "step": 321299 - }, - { - "epoch": 0.0046, - "grad_norm": 2.6562745571136475, - "learning_rate": 6.108600658996977e-06, - "loss": 0.4349, - "step": 321300 - }, - { - "epoch": 0.004798, - "loss_gen": 6.245081424713135, - "loss_rtd": 0.21182465553283691, - "loss_sent": 0.13359057903289795, - "loss_sod": 0.08868192136287689, - "loss_total": 0.43409717082977295, - "step": 321399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.9391865730285645, - "loss_rtd": 0.22120217978954315, - "loss_sent": 0.06328216940164566, - "loss_sod": 0.05518423765897751, - "loss_total": 0.3396685719490051, - "step": 321399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.2882983684539795, - "learning_rate": 6.09341002965641e-06, - "loss": 0.431, - "step": 321400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.970085620880127, - "loss_rtd": 0.20211665332317352, - "loss_sent": 0.24362456798553467, - "loss_sod": 0.09598428010940552, - "loss_total": 0.5417255163192749, - "step": 321499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.794473171234131, - "loss_rtd": 0.2029036283493042, - "loss_sent": 0.12314584851264954, - "loss_sod": 0.08155947178602219, - "loss_total": 0.4076089560985565, - "step": 321499 - }, - { - "epoch": 0.005, - "grad_norm": 0.9801462292671204, - "learning_rate": 6.078237085885041e-06, - "loss": 0.4243, - "step": 321500 - }, - { - "epoch": 0.005198, - "loss_gen": 6.007789134979248, - "loss_rtd": 0.21848537027835846, - "loss_sent": 0.06751511991024017, - "loss_sod": 0.012613615021109581, - "loss_total": 0.2986140847206116, - "step": 321599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.825482368469238, - "loss_rtd": 0.2034791260957718, - "loss_sent": 0.2505021393299103, - "loss_sod": 0.0028420707676559687, - "loss_total": 0.45682334899902344, - "step": 321599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.6087115406990051, - "learning_rate": 6.063081833794515e-06, - "loss": 0.4432, - "step": 321600 - }, - { - "epoch": 0.005398, - "loss_gen": 6.068107604980469, - "loss_rtd": 0.2043725550174713, - "loss_sent": 0.17323701083660126, - "loss_sod": 0.0464274063706398, - "loss_total": 0.42403697967529297, - "step": 321699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.962821006774902, - "loss_rtd": 0.21008557081222534, - "loss_sent": 0.30374470353126526, - "loss_sod": 0.007715006824582815, - "loss_total": 0.5215452909469604, - "step": 321699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.6156496405601501, - "learning_rate": 6.04794427948937e-06, - "loss": 0.4023, - "step": 321700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.743560314178467, - "loss_rtd": 0.22464556992053986, - "loss_sent": 0.16896145045757294, - "loss_sod": 0.15009906888008118, - "loss_total": 0.5437061190605164, - "step": 321799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.551284313201904, - "loss_rtd": 0.21979455649852753, - "loss_sent": 0.11361131072044373, - "loss_sod": 0.19593751430511475, - "loss_total": 0.5293433666229248, - "step": 321799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.5520875453948975, - "learning_rate": 6.0328244290670076e-06, - "loss": 0.4123, - "step": 321800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.0887131690979, - "loss_rtd": 0.21372784674167633, - "loss_sent": 0.14582060277462006, - "loss_sod": 0.0354970283806324, - "loss_total": 0.3950454592704773, - "step": 321899 - }, - { - "epoch": 0.005798, - "loss_gen": 6.088348865509033, - "loss_rtd": 0.1735665500164032, - "loss_sent": 0.12466592341661453, - "loss_sod": 0.08897095173597336, - "loss_total": 0.3872034251689911, - "step": 321899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.028296947479248, - "learning_rate": 6.017722288617722e-06, - "loss": 0.4112, - "step": 321900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.786381721496582, - "loss_rtd": 0.22154591977596283, - "loss_sent": 0.2573598325252533, - "loss_sod": 0.1154954582452774, - "loss_total": 0.5944012403488159, - "step": 321999 - }, - { - "epoch": 0.005998, - "loss_gen": 6.044796943664551, - "loss_rtd": 0.1966133564710617, - "loss_sent": 0.09351838380098343, - "loss_sod": 0.01564120128750801, - "loss_total": 0.30577293038368225, - "step": 321999 - }, - { - "epoch": 0.006, - "grad_norm": 1.3540593385696411, - "learning_rate": 6.002637864224631e-06, - "loss": 0.4135, - "step": 322000 - }, - { - "epoch": 0.006, - "eval_loss": 0.4049164652824402, - "eval_runtime": 150.672, - "eval_samples_per_second": 102.494, - "eval_steps_per_second": 0.803, - "step": 322000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.896158218383789, - "loss_rtd": 0.1843385100364685, - "loss_sent": 0.23589207231998444, - "loss_sod": 0.007741671986877918, - "loss_total": 0.4279722571372986, - "step": 322099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.73489236831665, - "loss_rtd": 0.20232951641082764, - "loss_sent": 0.1733345240354538, - "loss_sod": 0.0740676149725914, - "loss_total": 0.44973164796829224, - "step": 322099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.8871188759803772, - "learning_rate": 5.9875711619637456e-06, - "loss": 0.4317, - "step": 322100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.838665008544922, - "loss_rtd": 0.20343336462974548, - "loss_sent": 0.11810265481472015, - "loss_sod": 0.03417399898171425, - "loss_total": 0.3557100296020508, - "step": 322199 - }, - { - "epoch": 0.006398, - "loss_gen": 6.101709365844727, - "loss_rtd": 0.1968148648738861, - "loss_sent": 0.48954933881759644, - "loss_sod": 0.017828695476055145, - "loss_total": 0.7041928768157959, - "step": 322199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.3429930210113525, - "learning_rate": 5.972522187903939e-06, - "loss": 0.4166, - "step": 322200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.993008136749268, - "loss_rtd": 0.19729797542095184, - "loss_sent": 0.11938892304897308, - "loss_sod": 0.026073159649968147, - "loss_total": 0.3427600562572479, - "step": 322299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.700710296630859, - "loss_rtd": 0.1850474774837494, - "loss_sent": 0.04739578440785408, - "loss_sod": 0.09909026324748993, - "loss_total": 0.3315335214138031, - "step": 322299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.0525033473968506, - "learning_rate": 5.957490948106925e-06, - "loss": 0.4198, - "step": 322300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.894399642944336, - "loss_rtd": 0.19894111156463623, - "loss_sent": 0.6496871113777161, - "loss_sod": 0.029115945100784302, - "loss_total": 0.877744197845459, - "step": 322399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.973742961883545, - "loss_rtd": 0.22670283913612366, - "loss_sent": 0.3474273383617401, - "loss_sod": 0.0482768714427948, - "loss_total": 0.622407078742981, - "step": 322399 - }, - { - "epoch": 0.0068, - "grad_norm": 2.0627973079681396, - "learning_rate": 5.942477448627287e-06, - "loss": 0.4355, - "step": 322400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.220848560333252, - "loss_rtd": 0.16829366981983185, - "loss_sent": 0.008023840375244617, - "loss_sod": 0.06601382046937943, - "loss_total": 0.2423313409090042, - "step": 322499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.9026336669921875, - "loss_rtd": 0.19783097505569458, - "loss_sent": 0.24712328612804413, - "loss_sod": 0.10601376742124557, - "loss_total": 0.5509680509567261, - "step": 322499 - }, - { - "epoch": 0.007, - "grad_norm": 1.1102864742279053, - "learning_rate": 5.92748169551246e-06, - "loss": 0.4332, - "step": 322500 - }, - { - "epoch": 0.007198, - "loss_gen": 6.023877143859863, - "loss_rtd": 0.19681943953037262, - "loss_sent": 0.20057518780231476, - "loss_sod": 0.038912322372198105, - "loss_total": 0.4363069534301758, - "step": 322599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.604940414428711, - "loss_rtd": 0.2346101701259613, - "loss_sent": 0.11208489537239075, - "loss_sod": 0.003951771650463343, - "loss_total": 0.35064685344696045, - "step": 322599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.5625324845314026, - "learning_rate": 5.9125036948027515e-06, - "loss": 0.4085, - "step": 322600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.948314189910889, - "loss_rtd": 0.21103662252426147, - "loss_sent": 0.05874745547771454, - "loss_sod": 0.13852140307426453, - "loss_total": 0.40830546617507935, - "step": 322699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.678983211517334, - "loss_rtd": 0.21298469603061676, - "loss_sent": 0.13734786212444305, - "loss_sod": 0.07159619778394699, - "loss_total": 0.4219287633895874, - "step": 322699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.4385826587677002, - "learning_rate": 5.897543452531273e-06, - "loss": 0.4225, - "step": 322700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.906988143920898, - "loss_rtd": 0.22667856514453888, - "loss_sent": 0.1278458535671234, - "loss_sod": 0.024468297138810158, - "loss_total": 0.3789927065372467, - "step": 322799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.956003665924072, - "loss_rtd": 0.22413918375968933, - "loss_sent": 0.27263471484184265, - "loss_sod": 0.08346781134605408, - "loss_total": 0.5802416801452637, - "step": 322799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.3001375198364258, - "learning_rate": 5.882600974724017e-06, - "loss": 0.4335, - "step": 322800 - }, - { - "epoch": 0.007798, - "loss_gen": 6.4441752433776855, - "loss_rtd": 0.21956200897693634, - "loss_sent": 0.056823354214429855, - "loss_sod": 0.19637469947338104, - "loss_total": 0.47276005148887634, - "step": 322899 - }, - { - "epoch": 0.007798, - "loss_gen": 6.187901496887207, - "loss_rtd": 0.18620547652244568, - "loss_sent": 0.08474566042423248, - "loss_sod": 0.14326469600200653, - "loss_total": 0.4142158031463623, - "step": 322899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.7970831394195557, - "learning_rate": 5.867676267399807e-06, - "loss": 0.4145, - "step": 322900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.986310005187988, - "loss_rtd": 0.20238037407398224, - "loss_sent": 0.16570836305618286, - "loss_sod": 0.012983284890651703, - "loss_total": 0.3810720145702362, - "step": 322999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.3566389083862305, - "loss_rtd": 0.18088479340076447, - "loss_sent": 2.4724446120671928e-05, - "loss_sod": 0.0914454311132431, - "loss_total": 0.27235496044158936, - "step": 322999 - }, - { - "epoch": 0.008, - "grad_norm": 0.8922746777534485, - "learning_rate": 5.852769336570335e-06, - "loss": 0.4157, - "step": 323000 - }, - { - "epoch": 0.008, - "eval_loss": 0.3941841125488281, - "eval_runtime": 150.7699, - "eval_samples_per_second": 102.428, - "eval_steps_per_second": 0.803, - "step": 323000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.788620471954346, - "loss_rtd": 0.21855397522449493, - "loss_sent": 0.5130893588066101, - "loss_sod": 0.01934962347149849, - "loss_total": 0.7509929537773132, - "step": 323099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.9072065353393555, - "loss_rtd": 0.21429945528507233, - "loss_sent": 0.4870833158493042, - "loss_sod": 0.052126333117485046, - "loss_total": 0.7535091042518616, - "step": 323099 - }, - { - "epoch": 0.0082, - "grad_norm": 2.9539873600006104, - "learning_rate": 5.83788018824008e-06, - "loss": 0.4355, - "step": 323100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.896347522735596, - "loss_rtd": 0.20507663488388062, - "loss_sent": 0.3175153136253357, - "loss_sod": 0.020972145721316338, - "loss_total": 0.5435640811920166, - "step": 323199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.557343006134033, - "loss_rtd": 0.1855645477771759, - "loss_sent": 0.0837344229221344, - "loss_sod": 0.05035916715860367, - "loss_total": 0.3196581304073334, - "step": 323199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.6112529039382935, - "learning_rate": 5.823008828406407e-06, - "loss": 0.4266, - "step": 323200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.066956043243408, - "loss_rtd": 0.1619899570941925, - "loss_sent": 0.000483002164401114, - "loss_sod": 0.02260011062026024, - "loss_total": 0.18507307767868042, - "step": 323299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.710513591766357, - "loss_rtd": 0.20172366499900818, - "loss_sent": 0.29260778427124023, - "loss_sod": 0.01542019471526146, - "loss_total": 0.5097516775131226, - "step": 323299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.088402509689331, - "learning_rate": 5.8081552630594945e-06, - "loss": 0.4283, - "step": 323300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.820318698883057, - "loss_rtd": 0.2332913875579834, - "loss_sent": 0.15792295336723328, - "loss_sod": 0.05823970586061478, - "loss_total": 0.44945403933525085, - "step": 323399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.7537150382995605, - "loss_rtd": 0.21362803876399994, - "loss_sent": 0.27700141072273254, - "loss_sod": 0.009000813588500023, - "loss_total": 0.49963027238845825, - "step": 323399 - }, - { - "epoch": 0.0088, - "grad_norm": 0.7966939210891724, - "learning_rate": 5.79331949818237e-06, - "loss": 0.4246, - "step": 323400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.81212043762207, - "loss_rtd": 0.17437124252319336, - "loss_sent": 0.0001814306597225368, - "loss_sod": 0.2614227533340454, - "loss_total": 0.43597543239593506, - "step": 323499 - }, - { - "epoch": 0.008998, - "loss_gen": 5.512197017669678, - "loss_rtd": 0.17744967341423035, - "loss_sent": 3.1737588869873434e-05, - "loss_sod": 0.10350587964057922, - "loss_total": 0.28098729252815247, - "step": 323499 - }, - { - "epoch": 0.009, - "grad_norm": 1.2898768186569214, - "learning_rate": 5.778501539750853e-06, - "loss": 0.424, - "step": 323500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.914320945739746, - "loss_rtd": 0.2515323758125305, - "loss_sent": 0.27580705285072327, - "loss_sod": 0.031762272119522095, - "loss_total": 0.5591017007827759, - "step": 323599 - }, - { - "epoch": 0.009198, - "loss_gen": 6.314299583435059, - "loss_rtd": 0.20393356680870056, - "loss_sent": 0.14942575991153717, - "loss_sod": 0.03397171571850777, - "loss_total": 0.3873310387134552, - "step": 323599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.2606782913208008, - "learning_rate": 5.763701393733628e-06, - "loss": 0.4336, - "step": 323600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.89138126373291, - "loss_rtd": 0.18766659498214722, - "loss_sent": 0.10472889244556427, - "loss_sod": 0.04665805399417877, - "loss_total": 0.33905354142189026, - "step": 323699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.872872352600098, - "loss_rtd": 0.21082136034965515, - "loss_sent": 0.14542880654335022, - "loss_sod": 0.03512772172689438, - "loss_total": 0.39137789607048035, - "step": 323699 - }, - { - "epoch": 0.0094, - "grad_norm": 0.6540446877479553, - "learning_rate": 5.748919066092196e-06, - "loss": 0.4298, - "step": 323700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.380775451660156, - "loss_rtd": 0.1875913143157959, - "loss_sent": 0.0753634124994278, - "loss_sod": 0.03475553169846535, - "loss_total": 0.29771023988723755, - "step": 323799 - }, - { - "epoch": 0.009598, - "loss_gen": 6.2815985679626465, - "loss_rtd": 0.2141750007867813, - "loss_sent": 0.3287227153778076, - "loss_sod": 0.10725986957550049, - "loss_total": 0.6501575708389282, - "step": 323799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.8032573461532593, - "learning_rate": 5.734154562780869e-06, - "loss": 0.4346, - "step": 323800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.349405765533447, - "loss_rtd": 0.15455269813537598, - "loss_sent": 0.02965625189244747, - "loss_sod": 0.12667007744312286, - "loss_total": 0.31087902188301086, - "step": 323899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.886132717132568, - "loss_rtd": 0.20519687235355377, - "loss_sent": 0.18642914295196533, - "loss_sod": 0.025915272533893585, - "loss_total": 0.4175412952899933, - "step": 323899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.018308401107788, - "learning_rate": 5.7194078897467866e-06, - "loss": 0.399, - "step": 323900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.419346809387207, - "loss_rtd": 0.17705883085727692, - "loss_sent": 0.034329622983932495, - "loss_sod": 0.0573057122528553, - "loss_total": 0.2686941623687744, - "step": 323999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.800714492797852, - "loss_rtd": 0.22096692025661469, - "loss_sent": 0.18013055622577667, - "loss_sod": 0.0074227736331522465, - "loss_total": 0.40852025151252747, - "step": 323999 - }, - { - "epoch": 0.01, - "grad_norm": 0.788143515586853, - "learning_rate": 5.704679052929912e-06, - "loss": 0.4304, - "step": 324000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4050164520740509, - "eval_runtime": 150.6406, - "eval_samples_per_second": 102.516, - "eval_steps_per_second": 0.803, - "step": 324000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.502592086791992, - "loss_rtd": 0.16796788573265076, - "loss_sent": 0.009812552481889725, - "loss_sod": 0.012626749463379383, - "loss_total": 0.1904071867465973, - "step": 324099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.706894397735596, - "loss_rtd": 0.21625037491321564, - "loss_sent": 0.2922687232494354, - "loss_sod": 0.07662703096866608, - "loss_total": 0.5851461291313171, - "step": 324099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.191178798675537, - "learning_rate": 5.689968058263029e-06, - "loss": 0.4358, - "step": 324100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.98969030380249, - "loss_rtd": 0.22385506331920624, - "loss_sent": 0.09724408388137817, - "loss_sod": 0.001695240498520434, - "loss_total": 0.3227943778038025, - "step": 324199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.861635684967041, - "loss_rtd": 0.2096160501241684, - "loss_sent": 0.14023081958293915, - "loss_sod": 0.020481225103139877, - "loss_total": 0.3703280985355377, - "step": 324199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.924454927444458, - "learning_rate": 5.675274911671702e-06, - "loss": 0.4126, - "step": 324200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.927198886871338, - "loss_rtd": 0.19980433583259583, - "loss_sent": 0.07807888090610504, - "loss_sod": 0.051596079021692276, - "loss_total": 0.32947927713394165, - "step": 324299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.897017002105713, - "loss_rtd": 0.1724708378314972, - "loss_sent": 0.15576156973838806, - "loss_sod": 0.04659992828965187, - "loss_total": 0.3748323321342468, - "step": 324299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.426881194114685, - "learning_rate": 5.660599619074342e-06, - "loss": 0.4221, - "step": 324300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.188392162322998, - "loss_rtd": 0.16919992864131927, - "loss_sent": 2.4333296096301638e-05, - "loss_sod": 0.04423704743385315, - "loss_total": 0.21346130967140198, - "step": 324399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.4636993408203125, - "loss_rtd": 0.20324519276618958, - "loss_sent": 0.06986478716135025, - "loss_sod": 0.009771407581865788, - "loss_total": 0.28288137912750244, - "step": 324399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.5278782844543457, - "learning_rate": 5.645942186382147e-06, - "loss": 0.4087, - "step": 324400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.868215084075928, - "loss_rtd": 0.1995425522327423, - "loss_sent": 0.11778850108385086, - "loss_sod": 0.021918118000030518, - "loss_total": 0.3392491638660431, - "step": 324499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.459663391113281, - "loss_rtd": 0.17289361357688904, - "loss_sent": 0.000690351938828826, - "loss_sod": 0.10180986672639847, - "loss_total": 0.2753938138484955, - "step": 324499 - }, - { - "epoch": 0.011, - "grad_norm": 0.7451019883155823, - "learning_rate": 5.6313026194991515e-06, - "loss": 0.4, - "step": 324500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.870443344116211, - "loss_rtd": 0.22134725749492645, - "loss_sent": 0.07503363490104675, - "loss_sod": 0.022276245057582855, - "loss_total": 0.31865713000297546, - "step": 324599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.7765913009643555, - "loss_rtd": 0.21152400970458984, - "loss_sent": 0.16066452860832214, - "loss_sod": 0.027201417833566666, - "loss_total": 0.39938995242118835, - "step": 324599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.8734956383705139, - "learning_rate": 5.616680924322143e-06, - "loss": 0.4356, - "step": 324600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.798498153686523, - "loss_rtd": 0.210484117269516, - "loss_sent": 0.2456606924533844, - "loss_sod": 0.017464028671383858, - "loss_total": 0.4736088514328003, - "step": 324699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.863271236419678, - "loss_rtd": 0.20461349189281464, - "loss_sent": 0.08429497480392456, - "loss_sod": 0.013856226578354836, - "loss_total": 0.3027647137641907, - "step": 324699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.3390682935714722, - "learning_rate": 5.60207710674075e-06, - "loss": 0.4167, - "step": 324700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.786705493927002, - "loss_rtd": 0.2024630606174469, - "loss_sent": 0.045959584414958954, - "loss_sod": 0.0023766355589032173, - "loss_total": 0.2507992684841156, - "step": 324799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.913586616516113, - "loss_rtd": 0.20431970059871674, - "loss_sent": 0.16872656345367432, - "loss_sod": 0.006521163508296013, - "loss_total": 0.3795674443244934, - "step": 324799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.6634280681610107, - "learning_rate": 5.587491172637388e-06, - "loss": 0.4399, - "step": 324800 - }, - { - "epoch": 0.011798, - "loss_gen": 6.0628886222839355, - "loss_rtd": 0.20214658975601196, - "loss_sent": 0.27476999163627625, - "loss_sod": 0.025656037032604218, - "loss_total": 0.5025726556777954, - "step": 324899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.674528121948242, - "loss_rtd": 0.22252005338668823, - "loss_sent": 0.06508571654558182, - "loss_sod": 0.04835352301597595, - "loss_total": 0.3359592854976654, - "step": 324899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.7498753070831299, - "learning_rate": 5.572923127887281e-06, - "loss": 0.4248, - "step": 324900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.742950916290283, - "loss_rtd": 0.22713983058929443, - "loss_sent": 0.3100818395614624, - "loss_sod": 0.019239237532019615, - "loss_total": 0.5564609169960022, - "step": 324999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.700028419494629, - "loss_rtd": 0.23019778728485107, - "loss_sent": 0.11089194566011429, - "loss_sod": 0.02070794254541397, - "loss_total": 0.36179766058921814, - "step": 324999 - }, - { - "epoch": 0.012, - "grad_norm": 0.7887712717056274, - "learning_rate": 5.558372978358417e-06, - "loss": 0.4335, - "step": 325000 - }, - { - "epoch": 0.012, - "eval_loss": 0.4047861099243164, - "eval_runtime": 150.8833, - "eval_samples_per_second": 102.351, - "eval_steps_per_second": 0.802, - "step": 325000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.0647664070129395, - "loss_rtd": 0.21331389248371124, - "loss_sent": 0.17131194472312927, - "loss_sod": 0.044183120131492615, - "loss_total": 0.42880895733833313, - "step": 325099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.845245838165283, - "loss_rtd": 0.21022851765155792, - "loss_sent": 0.25248074531555176, - "loss_sod": 0.029903419315814972, - "loss_total": 0.49261268973350525, - "step": 325099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.7494087219238281, - "learning_rate": 5.543840729911598e-06, - "loss": 0.4031, - "step": 325100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.503693103790283, - "loss_rtd": 0.19288434088230133, - "loss_sent": 0.010657139122486115, - "loss_sod": 0.11942486464977264, - "loss_total": 0.3229663372039795, - "step": 325199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.176707744598389, - "loss_rtd": 0.16236528754234314, - "loss_sent": 2.5268305762438104e-05, - "loss_sod": 0.22292031347751617, - "loss_total": 0.3853108882904053, - "step": 325199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.164449691772461, - "learning_rate": 5.52932638840043e-06, - "loss": 0.433, - "step": 325200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.929834842681885, - "loss_rtd": 0.1952754110097885, - "loss_sent": 0.18080949783325195, - "loss_sod": 0.1016479954123497, - "loss_total": 0.47773289680480957, - "step": 325299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.479615211486816, - "loss_rtd": 0.16272349655628204, - "loss_sent": 0.04267306625843048, - "loss_sod": 0.06971586495637894, - "loss_total": 0.27511245012283325, - "step": 325299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.7706376314163208, - "learning_rate": 5.5148299596712635e-06, - "loss": 0.4289, - "step": 325300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.211487293243408, - "loss_rtd": 0.1634761542081833, - "loss_sent": 0.0009794557699933648, - "loss_sod": 0.1122213751077652, - "loss_total": 0.2766769826412201, - "step": 325399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.991980075836182, - "loss_rtd": 0.2033514678478241, - "loss_sent": 0.09735716879367828, - "loss_sod": 0.11001276969909668, - "loss_total": 0.41072142124176025, - "step": 325399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.1931016445159912, - "learning_rate": 5.500351449563274e-06, - "loss": 0.4179, - "step": 325400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.370450973510742, - "loss_rtd": 0.22828343510627747, - "loss_sent": 0.5143671631813049, - "loss_sod": 0.005769146606326103, - "loss_total": 0.7484197616577148, - "step": 325499 - }, - { - "epoch": 0.012998, - "loss_gen": 6.189851760864258, - "loss_rtd": 0.20508435368537903, - "loss_sent": 0.12709660828113556, - "loss_sod": 0.054082177579402924, - "loss_total": 0.3862631320953369, - "step": 325499 - }, - { - "epoch": 0.013, - "grad_norm": 1.8359318971633911, - "learning_rate": 5.4858908639083936e-06, - "loss": 0.4183, - "step": 325500 - }, - { - "epoch": 0.013198, - "loss_gen": 4.988429069519043, - "loss_rtd": 0.18256030976772308, - "loss_sent": 2.42159567278577e-05, - "loss_sod": 0.053691912442445755, - "loss_total": 0.23627643287181854, - "step": 325599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.198243141174316, - "loss_rtd": 0.17098495364189148, - "loss_sent": 0.01786923222243786, - "loss_sod": 0.013378378935158253, - "loss_total": 0.20223256945610046, - "step": 325599 - }, - { - "epoch": 0.0132, - "grad_norm": 0.44063425064086914, - "learning_rate": 5.47144820853136e-06, - "loss": 0.4013, - "step": 325600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.267978191375732, - "loss_rtd": 0.17308084666728973, - "loss_sent": 0.03930805251002312, - "loss_sod": 0.017490502446889877, - "loss_total": 0.22987940907478333, - "step": 325699 - }, - { - "epoch": 0.013398, - "loss_gen": 6.175917148590088, - "loss_rtd": 0.20863088965415955, - "loss_sent": 0.04831375926733017, - "loss_sod": 0.10151347517967224, - "loss_total": 0.35845813155174255, - "step": 325699 - }, - { - "epoch": 0.0134, - "grad_norm": 0.7104655504226685, - "learning_rate": 5.4570234892496574e-06, - "loss": 0.4271, - "step": 325700 - }, - { - "epoch": 0.013598, - "loss_gen": 6.254371166229248, - "loss_rtd": 0.20921887457370758, - "loss_sent": 0.06468168646097183, - "loss_sod": 0.12483938038349152, - "loss_total": 0.39873993396759033, - "step": 325799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.966006755828857, - "loss_rtd": 0.2077091634273529, - "loss_sent": 0.1878802627325058, - "loss_sod": 0.042571596801280975, - "loss_total": 0.4381610155105591, - "step": 325799 - }, - { - "epoch": 0.0136, - "grad_norm": 0.8915233016014099, - "learning_rate": 5.442616711873561e-06, - "loss": 0.4033, - "step": 325800 - }, - { - "epoch": 0.013798, - "loss_gen": 6.033672332763672, - "loss_rtd": 0.21461041271686554, - "loss_sent": 0.29450756311416626, - "loss_sod": 0.04083441197872162, - "loss_total": 0.5499523878097534, - "step": 325899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.867950439453125, - "loss_rtd": 0.19465017318725586, - "loss_sent": 0.17105290293693542, - "loss_sod": 0.0341770239174366, - "loss_total": 0.3998801112174988, - "step": 325899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.8925150632858276, - "learning_rate": 5.428227882206127e-06, - "loss": 0.4315, - "step": 325900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.686586380004883, - "loss_rtd": 0.24040189385414124, - "loss_sent": 0.5801467299461365, - "loss_sod": 0.04293229803442955, - "loss_total": 0.8634809255599976, - "step": 325999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.7457194328308105, - "loss_rtd": 0.19677533209323883, - "loss_sent": 0.36162111163139343, - "loss_sod": 0.018474020063877106, - "loss_total": 0.5768704414367676, - "step": 325999 - }, - { - "epoch": 0.014, - "grad_norm": 3.1317358016967773, - "learning_rate": 5.413857006043183e-06, - "loss": 0.4196, - "step": 326000 - }, - { - "epoch": 0.014, - "eval_loss": 0.4038664698600769, - "eval_runtime": 150.9074, - "eval_samples_per_second": 102.334, - "eval_steps_per_second": 0.802, - "step": 326000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.6637749671936035, - "loss_rtd": 0.2100795954465866, - "loss_sent": 0.2706587016582489, - "loss_sod": 0.006405374966561794, - "loss_total": 0.4871436655521393, - "step": 326099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.706732749938965, - "loss_rtd": 0.21142153441905975, - "loss_sent": 0.04162096977233887, - "loss_sod": 0.06310342252254486, - "loss_total": 0.3161459267139435, - "step": 326099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.3565428256988525, - "learning_rate": 5.399504089173291e-06, - "loss": 0.413, - "step": 326100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.53071928024292, - "loss_rtd": 0.21235330402851105, - "loss_sent": 0.21853940188884735, - "loss_sod": 0.010951917618513107, - "loss_total": 0.441844642162323, - "step": 326199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.990543842315674, - "loss_rtd": 0.20678623020648956, - "loss_sent": 0.17031468451023102, - "loss_sod": 0.006323775742202997, - "loss_total": 0.38342469930648804, - "step": 326199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.7048099040985107, - "learning_rate": 5.3851691373778215e-06, - "loss": 0.411, - "step": 326200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.824671268463135, - "loss_rtd": 0.19651657342910767, - "loss_sent": 0.24372057616710663, - "loss_sod": 0.022527601569890976, - "loss_total": 0.4627647399902344, - "step": 326299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.953470230102539, - "loss_rtd": 0.2178725302219391, - "loss_sent": 0.2566494047641754, - "loss_sod": 0.00923298392444849, - "loss_total": 0.4837549328804016, - "step": 326299 - }, - { - "epoch": 0.0146, - "grad_norm": 1.2701095342636108, - "learning_rate": 5.370852156430889e-06, - "loss": 0.4048, - "step": 326300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.846068382263184, - "loss_rtd": 0.21089419722557068, - "loss_sent": 0.3753054738044739, - "loss_sod": 0.045550860464572906, - "loss_total": 0.6317505240440369, - "step": 326399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.885951995849609, - "loss_rtd": 0.2226095050573349, - "loss_sent": 0.14516200125217438, - "loss_sod": 0.0367400124669075, - "loss_total": 0.4045115113258362, - "step": 326399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.4749726057052612, - "learning_rate": 5.356553152099381e-06, - "loss": 0.4165, - "step": 326400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.990963935852051, - "loss_rtd": 0.22117309272289276, - "loss_sent": 0.11019255220890045, - "loss_sod": 0.07310804724693298, - "loss_total": 0.4044736623764038, - "step": 326499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.933068752288818, - "loss_rtd": 0.20000134408473969, - "loss_sent": 0.19873477518558502, - "loss_sod": 0.05105184391140938, - "loss_total": 0.449787974357605, - "step": 326499 - }, - { - "epoch": 0.015, - "grad_norm": 1.7105019092559814, - "learning_rate": 5.342272130142911e-06, - "loss": 0.4207, - "step": 326500 - }, - { - "epoch": 0.015198, - "loss_gen": 6.19381046295166, - "loss_rtd": 0.22458291053771973, - "loss_sent": 0.17473535239696503, - "loss_sod": 0.15026640892028809, - "loss_total": 0.549584686756134, - "step": 326599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.7951836585998535, - "loss_rtd": 0.1988408863544464, - "loss_sent": 0.12660862505435944, - "loss_sod": 0.061969250440597534, - "loss_total": 0.3874187469482422, - "step": 326599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.3819634914398193, - "learning_rate": 5.3280090963138905e-06, - "loss": 0.4097, - "step": 326600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.61575984954834, - "loss_rtd": 0.1741504967212677, - "loss_sent": 2.4094882974168286e-05, - "loss_sod": 0.10541263222694397, - "loss_total": 0.2795872092247009, - "step": 326699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.273968696594238, - "loss_rtd": 0.1708516776561737, - "loss_sent": 0.0007279975106939673, - "loss_sod": 0.061606720089912415, - "loss_total": 0.23318639397621155, - "step": 326699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.1828755140304565, - "learning_rate": 5.313764056357462e-06, - "loss": 0.4176, - "step": 326700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.504697322845459, - "loss_rtd": 0.18083103001117706, - "loss_sent": 0.10256487876176834, - "loss_sod": 0.06174900382757187, - "loss_total": 0.3451448976993561, - "step": 326799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.871052265167236, - "loss_rtd": 0.20904289186000824, - "loss_sent": 0.42971178889274597, - "loss_sod": 0.027380742132663727, - "loss_total": 0.6661354303359985, - "step": 326799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.1960879564285278, - "learning_rate": 5.299537016011535e-06, - "loss": 0.4174, - "step": 326800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.506750583648682, - "loss_rtd": 0.17384295165538788, - "loss_sent": 0.06002357602119446, - "loss_sod": 0.12112460285425186, - "loss_total": 0.3549911379814148, - "step": 326899 - }, - { - "epoch": 0.015798, - "loss_gen": 6.256747722625732, - "loss_rtd": 0.2172124683856964, - "loss_sent": 0.09165572375059128, - "loss_sod": 0.06175126135349274, - "loss_total": 0.3706194758415222, - "step": 326899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.1491069793701172, - "learning_rate": 5.285327981006744e-06, - "loss": 0.4205, - "step": 326900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.526986598968506, - "loss_rtd": 0.22238966822624207, - "loss_sent": 0.061034757643938065, - "loss_sod": 0.01487022452056408, - "loss_total": 0.29829466342926025, - "step": 326999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.909547805786133, - "loss_rtd": 0.23055598139762878, - "loss_sent": 0.0981447771191597, - "loss_sod": 0.06247810646891594, - "loss_total": 0.39117884635925293, - "step": 326999 - }, - { - "epoch": 0.016, - "grad_norm": 1.1581236124038696, - "learning_rate": 5.271136957066497e-06, - "loss": 0.4098, - "step": 327000 - }, - { - "epoch": 0.016, - "eval_loss": 0.4041591286659241, - "eval_runtime": 151.0159, - "eval_samples_per_second": 102.261, - "eval_steps_per_second": 0.801, - "step": 327000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.215590476989746, - "loss_rtd": 0.17874009907245636, - "loss_sent": 0.03347104787826538, - "loss_sod": 0.020950332283973694, - "loss_total": 0.23316147923469543, - "step": 327099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.9166669845581055, - "loss_rtd": 0.20646648108959198, - "loss_sent": 0.08283329755067825, - "loss_sod": 0.005750981159508228, - "loss_total": 0.2950507700443268, - "step": 327099 - }, - { - "epoch": 0.0162, - "grad_norm": 0.5116104483604431, - "learning_rate": 5.25696394990694e-06, - "loss": 0.4102, - "step": 327100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.8377275466918945, - "loss_rtd": 0.2196062058210373, - "loss_sent": 0.18252575397491455, - "loss_sod": 0.019245656207203865, - "loss_total": 0.42137759923934937, - "step": 327199 - }, - { - "epoch": 0.016398, - "loss_gen": 6.1156744956970215, - "loss_rtd": 0.2223893404006958, - "loss_sent": 0.2272312045097351, - "loss_sod": 0.06531637907028198, - "loss_total": 0.5149369239807129, - "step": 327199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.089553952217102, - "learning_rate": 5.242808965236951e-06, - "loss": 0.4309, - "step": 327200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.7514424324035645, - "loss_rtd": 0.22236521542072296, - "loss_sent": 0.2495274692773819, - "loss_sod": 0.020663181319832802, - "loss_total": 0.4925558567047119, - "step": 327299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.885983467102051, - "loss_rtd": 0.21689851582050323, - "loss_sent": 0.14415277540683746, - "loss_sod": 0.06567038595676422, - "loss_total": 0.4267216920852661, - "step": 327299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.581281304359436, - "learning_rate": 5.228672008758151e-06, - "loss": 0.4229, - "step": 327300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.7649922370910645, - "loss_rtd": 0.19582818448543549, - "loss_sent": 0.09548603743314743, - "loss_sod": 0.06491593271493912, - "loss_total": 0.3562301695346832, - "step": 327399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.346714973449707, - "loss_rtd": 0.11843686550855637, - "loss_sent": 0.00842170137912035, - "loss_sod": 0.04954763874411583, - "loss_total": 0.17640620470046997, - "step": 327399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.840590238571167, - "learning_rate": 5.214553086164914e-06, - "loss": 0.4262, - "step": 327400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.7721052169799805, - "loss_rtd": 0.16569367051124573, - "loss_sent": 4.4171603803988546e-05, - "loss_sod": 0.1965005099773407, - "loss_total": 0.362238347530365, - "step": 327499 - }, - { - "epoch": 0.016998, - "loss_gen": 6.104852199554443, - "loss_rtd": 0.2262401133775711, - "loss_sent": 0.24634747207164764, - "loss_sod": 0.04126156121492386, - "loss_total": 0.513849139213562, - "step": 327499 - }, - { - "epoch": 0.017, - "grad_norm": 1.8922315835952759, - "learning_rate": 5.200452203144351e-06, - "loss": 0.4037, - "step": 327500 - }, - { - "epoch": 0.017198, - "loss_gen": 6.110281944274902, - "loss_rtd": 0.22877375781536102, - "loss_sent": 0.373697966337204, - "loss_sod": 0.0164099782705307, - "loss_total": 0.6188817024230957, - "step": 327599 - }, - { - "epoch": 0.017198, - "loss_gen": 6.031182289123535, - "loss_rtd": 0.2108943611383438, - "loss_sent": 0.25661003589630127, - "loss_sod": 0.11277800053358078, - "loss_total": 0.5802823901176453, - "step": 327599 - }, - { - "epoch": 0.0172, - "grad_norm": 2.1249122619628906, - "learning_rate": 5.18636936537627e-06, - "loss": 0.3971, - "step": 327600 - }, - { - "epoch": 0.017398, - "loss_gen": 6.0957417488098145, - "loss_rtd": 0.2222178876399994, - "loss_sent": 0.041979361325502396, - "loss_sod": 0.15878616273403168, - "loss_total": 0.42298340797424316, - "step": 327699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.945942401885986, - "loss_rtd": 0.21855933964252472, - "loss_sent": 0.32858410477638245, - "loss_sod": 0.015324393287301064, - "loss_total": 0.5624678134918213, - "step": 327699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.450487732887268, - "learning_rate": 5.1723045785332495e-06, - "loss": 0.4524, - "step": 327700 - }, - { - "epoch": 0.017598, - "loss_gen": 6.043429374694824, - "loss_rtd": 0.20989467203617096, - "loss_sent": 0.2914145588874817, - "loss_sod": 0.01982644572854042, - "loss_total": 0.521135687828064, - "step": 327799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.924368858337402, - "loss_rtd": 0.21258048713207245, - "loss_sent": 0.3502015769481659, - "loss_sod": 0.02351352758705616, - "loss_total": 0.5862956047058105, - "step": 327799 - }, - { - "epoch": 0.0176, - "grad_norm": 1.5364757776260376, - "learning_rate": 5.1582578482805845e-06, - "loss": 0.424, - "step": 327800 - }, - { - "epoch": 0.017798, - "loss_gen": 6.11417293548584, - "loss_rtd": 0.20679043233394623, - "loss_sent": 0.13846267759799957, - "loss_sod": 0.028997337445616722, - "loss_total": 0.3742504417896271, - "step": 327899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.896090507507324, - "loss_rtd": 0.2007712870836258, - "loss_sent": 0.3696061074733734, - "loss_sod": 0.014266236685216427, - "loss_total": 0.5846436023712158, - "step": 327899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.9549663066864014, - "learning_rate": 5.144229180276306e-06, - "loss": 0.4127, - "step": 327900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.984939098358154, - "loss_rtd": 0.2159128189086914, - "loss_sent": 0.11442665755748749, - "loss_sod": 0.09687717258930206, - "loss_total": 0.42721664905548096, - "step": 327999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.556375503540039, - "loss_rtd": 0.1961243599653244, - "loss_sent": 0.0903892070055008, - "loss_sod": 0.04251960292458534, - "loss_total": 0.32903316617012024, - "step": 327999 - }, - { - "epoch": 0.018, - "grad_norm": 1.0247933864593506, - "learning_rate": 5.130218580171142e-06, - "loss": 0.4108, - "step": 328000 - }, - { - "epoch": 0.018, - "eval_loss": 0.40445011854171753, - "eval_runtime": 150.832, - "eval_samples_per_second": 102.385, - "eval_steps_per_second": 0.802, - "step": 328000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.796125411987305, - "loss_rtd": 0.2105560004711151, - "loss_sent": 0.11167427152395248, - "loss_sod": 0.09179627895355225, - "loss_total": 0.41402655839920044, - "step": 328099 - }, - { - "epoch": 0.018198, - "loss_gen": 6.014242649078369, - "loss_rtd": 0.2252071499824524, - "loss_sent": 0.6624555587768555, - "loss_sod": 0.03477327153086662, - "loss_total": 0.922435998916626, - "step": 328099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.8746533393859863, - "learning_rate": 5.116226053608564e-06, - "loss": 0.4303, - "step": 328100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.971130847930908, - "loss_rtd": 0.2006784975528717, - "loss_sent": 0.11580485105514526, - "loss_sod": 0.0019422958139330149, - "loss_total": 0.31842565536499023, - "step": 328199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.904659271240234, - "loss_rtd": 0.23206931352615356, - "loss_sent": 0.13666632771492004, - "loss_sod": 0.02995448186993599, - "loss_total": 0.3986901342868805, - "step": 328199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.0602086782455444, - "learning_rate": 5.102251606224767e-06, - "loss": 0.4404, - "step": 328200 - }, - { - "epoch": 0.018598, - "loss_gen": 6.061380863189697, - "loss_rtd": 0.21679025888442993, - "loss_sent": 0.10320959240198135, - "loss_sod": 0.049177706241607666, - "loss_total": 0.36917755007743835, - "step": 328299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.787722587585449, - "loss_rtd": 0.22701473534107208, - "loss_sent": 0.20476825535297394, - "loss_sod": 0.07264123857021332, - "loss_total": 0.5044242143630981, - "step": 328299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.087689995765686, - "learning_rate": 5.088295243648661e-06, - "loss": 0.4157, - "step": 328300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.234886169433594, - "loss_rtd": 0.16600844264030457, - "loss_sent": 0.025756871327757835, - "loss_sod": 0.03823602572083473, - "loss_total": 0.2300013303756714, - "step": 328399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.358644962310791, - "loss_rtd": 0.1765257567167282, - "loss_sent": 5.3043782827444375e-05, - "loss_sod": 0.06265078485012054, - "loss_total": 0.23922958970069885, - "step": 328399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.744238555431366, - "learning_rate": 5.074356971501853e-06, - "loss": 0.4128, - "step": 328400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.778722763061523, - "loss_rtd": 0.21752364933490753, - "loss_sent": 0.05778423324227333, - "loss_sod": 0.07231084257364273, - "loss_total": 0.3476187288761139, - "step": 328499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.061800956726074, - "loss_rtd": 0.17444182932376862, - "loss_sent": 0.015127585269510746, - "loss_sod": 0.03423468768596649, - "loss_total": 0.22380410134792328, - "step": 328499 - }, - { - "epoch": 0.019, - "grad_norm": 0.6600273251533508, - "learning_rate": 5.060436795398682e-06, - "loss": 0.4335, - "step": 328500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.3270487785339355, - "loss_rtd": 0.1867864727973938, - "loss_sent": 4.857310705119744e-05, - "loss_sod": 0.04032709077000618, - "loss_total": 0.22716213762760162, - "step": 328599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.446119785308838, - "loss_rtd": 0.18360395729541779, - "loss_sent": 0.053086526691913605, - "loss_sod": 0.06497945636510849, - "loss_total": 0.3016699254512787, - "step": 328599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.7255327105522156, - "learning_rate": 5.046534720946206e-06, - "loss": 0.4151, - "step": 328600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.637638092041016, - "loss_rtd": 0.22256742417812347, - "loss_sent": 0.10479529947042465, - "loss_sod": 0.05819655582308769, - "loss_total": 0.3855592906475067, - "step": 328699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.865545749664307, - "loss_rtd": 0.2192705124616623, - "loss_sent": 0.4976094961166382, - "loss_sod": 0.06025764346122742, - "loss_total": 0.7771376371383667, - "step": 328699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.254181146621704, - "learning_rate": 5.032650753744156e-06, - "loss": 0.4062, - "step": 328700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.697680950164795, - "loss_rtd": 0.1803479790687561, - "loss_sent": 0.006791319232434034, - "loss_sod": 0.06313110142946243, - "loss_total": 0.25027039647102356, - "step": 328799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.858571529388428, - "loss_rtd": 0.21300005912780762, - "loss_sent": 0.1427404135465622, - "loss_sod": 0.03782755136489868, - "loss_total": 0.3935680389404297, - "step": 328799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.8665528893470764, - "learning_rate": 5.018784899385009e-06, - "loss": 0.4164, - "step": 328800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.665689945220947, - "loss_rtd": 0.19979779422283173, - "loss_sent": 0.1384660005569458, - "loss_sod": 0.07756797969341278, - "loss_total": 0.4158317744731903, - "step": 328899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.6860857009887695, - "loss_rtd": 0.22062909603118896, - "loss_sent": 0.24843384325504303, - "loss_sod": 0.009958944283425808, - "loss_total": 0.4790218770503998, - "step": 328899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.0162591934204102, - "learning_rate": 5.004937163453926e-06, - "loss": 0.4225, - "step": 328900 - }, - { - "epoch": 0.019998, - "loss_gen": 6.146473407745361, - "loss_rtd": 0.20645081996917725, - "loss_sent": 0.26955464482307434, - "loss_sod": 0.06980663537979126, - "loss_total": 0.5458121299743652, - "step": 328999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.845710754394531, - "loss_rtd": 0.23007895052433014, - "loss_sent": 0.3061400055885315, - "loss_sod": 0.018914230167865753, - "loss_total": 0.5551332235336304, - "step": 328999 - }, - { - "epoch": 0.02, - "grad_norm": 2.0122175216674805, - "learning_rate": 4.9911075515287784e-06, - "loss": 0.4163, - "step": 329000 - }, - { - "epoch": 0.02, - "eval_loss": 0.40156105160713196, - "eval_runtime": 152.2938, - "eval_samples_per_second": 101.403, - "eval_steps_per_second": 0.795, - "step": 329000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.700454235076904, - "loss_rtd": 0.199051633477211, - "loss_sent": 0.14078226685523987, - "loss_sod": 0.01881776563823223, - "loss_total": 0.35865166783332825, - "step": 329099 - }, - { - "epoch": 0.020198, - "loss_gen": 6.165766716003418, - "loss_rtd": 0.21244393289089203, - "loss_sent": 0.2035900056362152, - "loss_sod": 0.02626965194940567, - "loss_total": 0.4423035979270935, - "step": 329099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.7645477652549744, - "learning_rate": 4.9772960691801205e-06, - "loss": 0.4157, - "step": 329100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.6507768630981445, - "loss_rtd": 0.18301333487033844, - "loss_sent": 0.06695782393217087, - "loss_sod": 0.01913287490606308, - "loss_total": 0.2691040337085724, - "step": 329199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.8309736251831055, - "loss_rtd": 0.2215300053358078, - "loss_sent": 0.11230618506669998, - "loss_sod": 0.053049247711896896, - "loss_total": 0.3868854343891144, - "step": 329199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.005005955696106, - "learning_rate": 4.963502721971219e-06, - "loss": 0.4304, - "step": 329200 - }, - { - "epoch": 0.020598, - "loss_gen": 6.053281784057617, - "loss_rtd": 0.2102501392364502, - "loss_sent": 0.28845369815826416, - "loss_sod": 0.04737616702914238, - "loss_total": 0.5460799932479858, - "step": 329299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.9263997077941895, - "loss_rtd": 0.2220664769411087, - "loss_sent": 0.570816159248352, - "loss_sod": 0.007466979790478945, - "loss_total": 0.8003495931625366, - "step": 329299 - }, - { - "epoch": 0.0206, - "grad_norm": 2.763258695602417, - "learning_rate": 4.949727515458047e-06, - "loss": 0.4051, - "step": 329300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.951574325561523, - "loss_rtd": 0.21616458892822266, - "loss_sent": 0.13302062451839447, - "loss_sod": 0.08037355542182922, - "loss_total": 0.42955875396728516, - "step": 329399 - }, - { - "epoch": 0.020798, - "loss_gen": 6.0092058181762695, - "loss_rtd": 0.22435683012008667, - "loss_sent": 0.19314207136631012, - "loss_sod": 0.012176268734037876, - "loss_total": 0.4296751618385315, - "step": 329399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8418480753898621, - "learning_rate": 4.935970455189231e-06, - "loss": 0.4428, - "step": 329400 - }, - { - "epoch": 0.020998, - "loss_gen": 6.190423488616943, - "loss_rtd": 0.22325991094112396, - "loss_sent": 0.2840415835380554, - "loss_sod": 0.133723646402359, - "loss_total": 0.6410251259803772, - "step": 329499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.801259517669678, - "loss_rtd": 0.18949387967586517, - "loss_sent": 0.1130494773387909, - "loss_sod": 0.004593970719724894, - "loss_total": 0.30713731050491333, - "step": 329499 - }, - { - "epoch": 0.021, - "grad_norm": 1.0208121538162231, - "learning_rate": 4.922231546706141e-06, - "loss": 0.3941, - "step": 329500 - }, - { - "epoch": 0.021198, - "loss_gen": 6.144215106964111, - "loss_rtd": 0.21672308444976807, - "loss_sent": 0.17375807464122772, - "loss_sod": 0.1187053769826889, - "loss_total": 0.5091865658760071, - "step": 329599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.733941078186035, - "loss_rtd": 0.21451887488365173, - "loss_sent": 0.061883535236120224, - "loss_sod": 0.05031023547053337, - "loss_total": 0.32671263813972473, - "step": 329599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.432464361190796, - "learning_rate": 4.908510795542781e-06, - "loss": 0.4102, - "step": 329600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.977442264556885, - "loss_rtd": 0.21298815310001373, - "loss_sent": 0.4723530411720276, - "loss_sod": 0.0020596610847860575, - "loss_total": 0.6874008774757385, - "step": 329699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.839695930480957, - "loss_rtd": 0.20961076021194458, - "loss_sent": 0.10564636439085007, - "loss_sod": 0.005157722160220146, - "loss_total": 0.32041484117507935, - "step": 329699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.1619139909744263, - "learning_rate": 4.894808207225882e-06, - "loss": 0.4134, - "step": 329700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.89856481552124, - "loss_rtd": 0.22122664749622345, - "loss_sent": 0.15422587096691132, - "loss_sod": 0.08974475413560867, - "loss_total": 0.46519726514816284, - "step": 329799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.273439884185791, - "loss_rtd": 0.17803272604942322, - "loss_sent": 0.04515692964196205, - "loss_sod": 0.06484217941761017, - "loss_total": 0.28803184628486633, - "step": 329799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.8945031762123108, - "learning_rate": 4.881123787274849e-06, - "loss": 0.4226, - "step": 329800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.8102569580078125, - "loss_rtd": 0.20951087772846222, - "loss_sent": 0.3647569715976715, - "loss_sod": 0.03198857977986336, - "loss_total": 0.6062564253807068, - "step": 329899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.953311443328857, - "loss_rtd": 0.21475659310817719, - "loss_sent": 0.1410730928182602, - "loss_sod": 0.08741606771945953, - "loss_total": 0.4432457685470581, - "step": 329899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.364906668663025, - "learning_rate": 4.8674575412017535e-06, - "loss": 0.4077, - "step": 329900 - }, - { - "epoch": 0.021998, - "loss_gen": 6.12127161026001, - "loss_rtd": 0.21860116720199585, - "loss_sent": 0.3105919361114502, - "loss_sod": 0.032918207347393036, - "loss_total": 0.5621113181114197, - "step": 329999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.730120658874512, - "loss_rtd": 0.21275348961353302, - "loss_sent": 0.09844788908958435, - "loss_sod": 0.004904065281152725, - "loss_total": 0.3161054253578186, - "step": 329999 - }, - { - "epoch": 0.022, - "grad_norm": 1.579740047454834, - "learning_rate": 4.853809474511362e-06, - "loss": 0.431, - "step": 330000 - }, - { - "epoch": 0.022, - "eval_loss": 0.3937540054321289, - "eval_runtime": 150.8178, - "eval_samples_per_second": 102.395, - "eval_steps_per_second": 0.802, - "step": 330000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.902409553527832, - "loss_rtd": 0.19768086075782776, - "loss_sent": 0.06069553270936012, - "loss_sod": 0.10106280446052551, - "loss_total": 0.3594391942024231, - "step": 330099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.86720085144043, - "loss_rtd": 0.2345312386751175, - "loss_sent": 0.07318239659070969, - "loss_sod": 0.002706903498619795, - "loss_total": 0.31042054295539856, - "step": 330099 - }, - { - "epoch": 0.0222, - "grad_norm": 0.8370559811592102, - "learning_rate": 4.840179592701111e-06, - "loss": 0.3974, - "step": 330100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.917291641235352, - "loss_rtd": 0.19501982629299164, - "loss_sent": 0.24797093868255615, - "loss_sod": 0.03391135483980179, - "loss_total": 0.4769021272659302, - "step": 330199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.8460259437561035, - "loss_rtd": 0.20476824045181274, - "loss_sent": 0.421603262424469, - "loss_sod": 0.022958368062973022, - "loss_total": 0.6493299007415771, - "step": 330199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.2038854360580444, - "learning_rate": 4.826567901261131e-06, - "loss": 0.4202, - "step": 330200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.807995796203613, - "loss_rtd": 0.21853376924991608, - "loss_sent": 0.4487013816833496, - "loss_sod": 0.02856394648551941, - "loss_total": 0.6957991123199463, - "step": 330299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.814579010009766, - "loss_rtd": 0.2088613361120224, - "loss_sent": 0.11348633468151093, - "loss_sod": 0.032599225640296936, - "loss_total": 0.35494691133499146, - "step": 330299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.9923590421676636, - "learning_rate": 4.812974405674192e-06, - "loss": 0.4108, - "step": 330300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.453118324279785, - "loss_rtd": 0.18154007196426392, - "loss_sent": 0.0454830676317215, - "loss_sod": 0.029125642031431198, - "loss_total": 0.2561487853527069, - "step": 330399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.678781986236572, - "loss_rtd": 0.18436993658542633, - "loss_sent": 0.09236066788434982, - "loss_sod": 0.03614688292145729, - "loss_total": 0.31287747621536255, - "step": 330399 - }, - { - "epoch": 0.0228, - "grad_norm": 0.7529614567756653, - "learning_rate": 4.7993991114157585e-06, - "loss": 0.4214, - "step": 330400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.745978355407715, - "loss_rtd": 0.21094991266727448, - "loss_sent": 0.08378388732671738, - "loss_sod": 0.014974737539887428, - "loss_total": 0.30970853567123413, - "step": 330499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.638060569763184, - "loss_rtd": 0.21033842861652374, - "loss_sent": 0.1420600712299347, - "loss_sod": 0.018450338393449783, - "loss_total": 0.3708488345146179, - "step": 330499 - }, - { - "epoch": 0.023, - "grad_norm": 0.7480770945549011, - "learning_rate": 4.785842023953951e-06, - "loss": 0.4241, - "step": 330500 - }, - { - "epoch": 0.023198, - "loss_gen": 6.05764102935791, - "loss_rtd": 0.22154130041599274, - "loss_sent": 0.09092435240745544, - "loss_sod": 0.047517575323581696, - "loss_total": 0.3599832355976105, - "step": 330599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.909764289855957, - "loss_rtd": 0.2100762575864792, - "loss_sent": 0.12694038450717926, - "loss_sod": 0.03214793652296066, - "loss_total": 0.3691645860671997, - "step": 330599 - }, - { - "epoch": 0.0232, - "grad_norm": 1.1987636089324951, - "learning_rate": 4.772303148749585e-06, - "loss": 0.413, - "step": 330600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.682451248168945, - "loss_rtd": 0.22611628472805023, - "loss_sent": 0.3129121661186218, - "loss_sod": 0.007085670251399279, - "loss_total": 0.5461140871047974, - "step": 330699 - }, - { - "epoch": 0.023398, - "loss_gen": 6.099686622619629, - "loss_rtd": 0.20334909856319427, - "loss_sent": 0.1557920277118683, - "loss_sod": 0.021201398223638535, - "loss_total": 0.3803425431251526, - "step": 330699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.9493911266326904, - "learning_rate": 4.758782491256092e-06, - "loss": 0.4123, - "step": 330700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.896134853363037, - "loss_rtd": 0.2380102276802063, - "loss_sent": 0.2869156301021576, - "loss_sod": 0.017074715346097946, - "loss_total": 0.5420005321502686, - "step": 330799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.955774784088135, - "loss_rtd": 0.21714618802070618, - "loss_sent": 0.30493423342704773, - "loss_sod": 0.03374708816409111, - "loss_total": 0.5558274984359741, - "step": 330799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.0822604894638062, - "learning_rate": 4.745280056919599e-06, - "loss": 0.3997, - "step": 330800 - }, - { - "epoch": 0.023798, - "loss_gen": 6.148456573486328, - "loss_rtd": 0.2017562836408615, - "loss_sent": 0.35946691036224365, - "loss_sod": 0.013228020630776882, - "loss_total": 0.574451208114624, - "step": 330899 - }, - { - "epoch": 0.023798, - "loss_gen": 6.044058799743652, - "loss_rtd": 0.21628901362419128, - "loss_sent": 0.20557735860347748, - "loss_sod": 0.038398340344429016, - "loss_total": 0.46026474237442017, - "step": 330899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.6446350812911987, - "learning_rate": 4.731795851178889e-06, - "loss": 0.4111, - "step": 330900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.843855857849121, - "loss_rtd": 0.20253688097000122, - "loss_sent": 0.13047783076763153, - "loss_sod": 0.008157811127603054, - "loss_total": 0.3411725163459778, - "step": 330999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.928069591522217, - "loss_rtd": 0.19949424266815186, - "loss_sent": 0.28561946749687195, - "loss_sod": 0.13547632098197937, - "loss_total": 0.6205900311470032, - "step": 330999 - }, - { - "epoch": 0.024, - "grad_norm": 1.5329244136810303, - "learning_rate": 4.7183298794654055e-06, - "loss": 0.4174, - "step": 331000 - }, - { - "epoch": 0.024, - "eval_loss": 0.4025688171386719, - "eval_runtime": 151.162, - "eval_samples_per_second": 102.162, - "eval_steps_per_second": 0.8, - "step": 331000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.378228664398193, - "loss_rtd": 0.17921258509159088, - "loss_sent": 2.3826671167626046e-05, - "loss_sod": 0.10741123557090759, - "loss_total": 0.28664764761924744, - "step": 331099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.4460296630859375, - "loss_rtd": 0.15814580023288727, - "loss_sent": 0.019092829897999763, - "loss_sod": 0.19544920325279236, - "loss_total": 0.37268784642219543, - "step": 331099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.2692033052444458, - "learning_rate": 4.704882147203221e-06, - "loss": 0.4189, - "step": 331100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.772451400756836, - "loss_rtd": 0.20426462590694427, - "loss_sent": 0.09192955493927002, - "loss_sod": 0.04017992690205574, - "loss_total": 0.33637410402297974, - "step": 331199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.893918991088867, - "loss_rtd": 0.20278365910053253, - "loss_sent": 0.18656004965305328, - "loss_sod": 0.10743507742881775, - "loss_total": 0.49677878618240356, - "step": 331199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.0564281940460205, - "learning_rate": 4.691452659809092e-06, - "loss": 0.4259, - "step": 331200 - }, - { - "epoch": 0.024598, - "loss_gen": 6.00196647644043, - "loss_rtd": 0.2053288370370865, - "loss_sent": 0.4615537226200104, - "loss_sod": 0.023048587143421173, - "loss_total": 0.6899311542510986, - "step": 331299 - }, - { - "epoch": 0.024598, - "loss_gen": 6.271486759185791, - "loss_rtd": 0.2220696359872818, - "loss_sent": 0.23215635120868683, - "loss_sod": 0.04173806309700012, - "loss_total": 0.49596405029296875, - "step": 331299 - }, - { - "epoch": 0.0246, - "grad_norm": 2.853314161300659, - "learning_rate": 4.678041422692414e-06, - "loss": 0.4395, - "step": 331300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.253432273864746, - "loss_rtd": 0.1898089200258255, - "loss_sent": 0.005767661612480879, - "loss_sod": 0.08395013958215714, - "loss_total": 0.2795267403125763, - "step": 331399 - }, - { - "epoch": 0.024798, - "loss_gen": 4.980484962463379, - "loss_rtd": 0.1327882707118988, - "loss_sent": 2.9455086405505426e-05, - "loss_sod": 0.030677590519189835, - "loss_total": 0.16349531710147858, - "step": 331399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.982572078704834, - "learning_rate": 4.664648441255237e-06, - "loss": 0.4176, - "step": 331400 - }, - { - "epoch": 0.024998, - "loss_gen": 6.060155868530273, - "loss_rtd": 0.20426997542381287, - "loss_sent": 0.23714661598205566, - "loss_sod": 0.05675915628671646, - "loss_total": 0.4981757402420044, - "step": 331499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.616395473480225, - "loss_rtd": 0.20990914106369019, - "loss_sent": 0.08643140643835068, - "loss_sod": 0.025104522705078125, - "loss_total": 0.3214450776576996, - "step": 331499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8399190306663513, - "learning_rate": 4.651273720892241e-06, - "loss": 0.433, - "step": 331500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.994178295135498, - "loss_rtd": 0.2139955461025238, - "loss_sent": 0.19764533638954163, - "loss_sod": 0.019860614091157913, - "loss_total": 0.43150150775909424, - "step": 331599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.882445335388184, - "loss_rtd": 0.2033146768808365, - "loss_sent": 0.07411482185125351, - "loss_sod": 0.01846025511622429, - "loss_total": 0.2958897650241852, - "step": 331599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.157501459121704, - "learning_rate": 4.637917266990766e-06, - "loss": 0.4205, - "step": 331600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.274556636810303, - "loss_rtd": 0.18524613976478577, - "loss_sent": 2.4407792807323858e-05, - "loss_sod": 0.19543135166168213, - "loss_total": 0.3807018995285034, - "step": 331699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.718486309051514, - "loss_rtd": 0.19666177034378052, - "loss_sent": 0.036764759570360184, - "loss_sod": 0.042713478207588196, - "loss_total": 0.2761400043964386, - "step": 331699 - }, - { - "epoch": 0.0254, - "grad_norm": 1.2056779861450195, - "learning_rate": 4.6245790849307966e-06, - "loss": 0.4246, - "step": 331700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.873589515686035, - "loss_rtd": 0.2102714329957962, - "loss_sent": 0.1378559023141861, - "loss_sod": 0.07041903585195541, - "loss_total": 0.4185463786125183, - "step": 331799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.940451145172119, - "loss_rtd": 0.19915197789669037, - "loss_sent": 0.4912972152233124, - "loss_sod": 0.039577387273311615, - "loss_total": 0.7300266027450562, - "step": 331799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.8488335609436035, - "learning_rate": 4.611259180084942e-06, - "loss": 0.4208, - "step": 331800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.767001152038574, - "loss_rtd": 0.20476117730140686, - "loss_sent": 0.19007740914821625, - "loss_sod": 0.00638324860483408, - "loss_total": 0.4012218415737152, - "step": 331899 - }, - { - "epoch": 0.025798, - "loss_gen": 6.0202765464782715, - "loss_rtd": 0.2258429080247879, - "loss_sent": 0.17241758108139038, - "loss_sod": 0.02805159240961075, - "loss_total": 0.42631208896636963, - "step": 331899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.3682467937469482, - "learning_rate": 4.597957557818456e-06, - "loss": 0.4192, - "step": 331900 - }, - { - "epoch": 0.025998, - "loss_gen": 6.004212856292725, - "loss_rtd": 0.2101108431816101, - "loss_sent": 0.310147225856781, - "loss_sod": 0.02578401193022728, - "loss_total": 0.5460420846939087, - "step": 331999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.76234769821167, - "loss_rtd": 0.21826069056987762, - "loss_sent": 0.06891893595457077, - "loss_sod": 0.008715218864381313, - "loss_total": 0.2958948314189911, - "step": 331999 - }, - { - "epoch": 0.026, - "grad_norm": 0.9495974183082581, - "learning_rate": 4.584674223489238e-06, - "loss": 0.426, - "step": 332000 - }, - { - "epoch": 0.026, - "eval_loss": 0.40221095085144043, - "eval_runtime": 150.896, - "eval_samples_per_second": 102.342, - "eval_steps_per_second": 0.802, - "step": 332000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.7623610496521, - "loss_rtd": 0.19780445098876953, - "loss_sent": 0.1770644634962082, - "loss_sod": 0.0342760868370533, - "loss_total": 0.4091449975967407, - "step": 332099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.960206508636475, - "loss_rtd": 0.211539164185524, - "loss_sent": 0.10768450796604156, - "loss_sod": 0.03814418241381645, - "loss_total": 0.3573678731918335, - "step": 332099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9639382362365723, - "learning_rate": 4.5714091824478225e-06, - "loss": 0.4232, - "step": 332100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.600955486297607, - "loss_rtd": 0.2025652825832367, - "loss_sent": 0.08323058485984802, - "loss_sod": 0.016329387202858925, - "loss_total": 0.3021252751350403, - "step": 332199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.2745041847229, - "loss_rtd": 0.16661636531352997, - "loss_sent": 0.05867577716708183, - "loss_sod": 0.039194971323013306, - "loss_total": 0.2644871175289154, - "step": 332199 - }, - { - "epoch": 0.0264, - "grad_norm": 0.7608261108398438, - "learning_rate": 4.55816244003735e-06, - "loss": 0.4154, - "step": 332200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.867252349853516, - "loss_rtd": 0.19580833613872528, - "loss_sent": 0.14206142723560333, - "loss_sod": 0.02759961597621441, - "loss_total": 0.36546939611434937, - "step": 332299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.935952186584473, - "loss_rtd": 0.20971213281154633, - "loss_sent": 0.22192716598510742, - "loss_sod": 0.01085263304412365, - "loss_total": 0.44249194860458374, - "step": 332299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.8669111132621765, - "learning_rate": 4.5449340015936035e-06, - "loss": 0.4324, - "step": 332300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.52266788482666, - "loss_rtd": 0.22774533927440643, - "loss_sent": 0.2556370198726654, - "loss_sod": 0.045659035444259644, - "loss_total": 0.5290414094924927, - "step": 332399 - }, - { - "epoch": 0.026798, - "loss_gen": 6.224093437194824, - "loss_rtd": 0.1985151618719101, - "loss_sent": 0.12970012426376343, - "loss_sod": 0.04957842454314232, - "loss_total": 0.37779372930526733, - "step": 332399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.9836230874061584, - "learning_rate": 4.531723872445015e-06, - "loss": 0.406, - "step": 332400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.808529376983643, - "loss_rtd": 0.20841383934020996, - "loss_sent": 0.18419690430164337, - "loss_sod": 0.04101523011922836, - "loss_total": 0.4336259961128235, - "step": 332499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.823904514312744, - "loss_rtd": 0.21034353971481323, - "loss_sent": 0.0916057601571083, - "loss_sod": 0.05213785171508789, - "loss_total": 0.35408714413642883, - "step": 332499 - }, - { - "epoch": 0.027, - "grad_norm": 0.9661129117012024, - "learning_rate": 4.518532057912617e-06, - "loss": 0.422, - "step": 332500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.203954219818115, - "loss_rtd": 0.15858696401119232, - "loss_sent": 2.3970091206138022e-05, - "loss_sod": 0.07966428995132446, - "loss_total": 0.23827522993087769, - "step": 332599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.551509857177734, - "loss_rtd": 0.1735328584909439, - "loss_sent": 0.059783339500427246, - "loss_sod": 0.16423340141773224, - "loss_total": 0.3975495994091034, - "step": 332599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.4101791381835938, - "learning_rate": 4.505358563310058e-06, - "loss": 0.4181, - "step": 332600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.864551067352295, - "loss_rtd": 0.2109133005142212, - "loss_sent": 0.35520753264427185, - "loss_sod": 0.09790265560150146, - "loss_total": 0.6640235185623169, - "step": 332699 - }, - { - "epoch": 0.027398, - "loss_gen": 6.5219855308532715, - "loss_rtd": 0.19909067451953888, - "loss_sent": 0.1204799935221672, - "loss_sod": 0.1021123081445694, - "loss_total": 0.4216829836368561, - "step": 332699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.3958179950714111, - "learning_rate": 4.4922033939436285e-06, - "loss": 0.4164, - "step": 332700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.039506912231445, - "loss_rtd": 0.22093138098716736, - "loss_sent": 0.4375079572200775, - "loss_sod": 0.019474415108561516, - "loss_total": 0.6779137849807739, - "step": 332799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.763535499572754, - "loss_rtd": 0.21234478056430817, - "loss_sent": 0.3248450458049774, - "loss_sod": 0.04969676584005356, - "loss_total": 0.5868865847587585, - "step": 332799 - }, - { - "epoch": 0.0276, - "grad_norm": 2.4942786693573, - "learning_rate": 4.479066555112233e-06, - "loss": 0.406, - "step": 332800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.916197776794434, - "loss_rtd": 0.2195403277873993, - "loss_sent": 0.2267300933599472, - "loss_sod": 0.0210191048681736, - "loss_total": 0.4672895073890686, - "step": 332899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.780374526977539, - "loss_rtd": 0.187737375497818, - "loss_sent": 0.11766417324542999, - "loss_sod": 0.004940190352499485, - "loss_total": 0.3103417456150055, - "step": 332899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.367370367050171, - "learning_rate": 4.465948052107388e-06, - "loss": 0.4057, - "step": 332900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.854354381561279, - "loss_rtd": 0.20612572133541107, - "loss_sent": 0.2872171103954315, - "loss_sod": 0.007354743778705597, - "loss_total": 0.5006976127624512, - "step": 332999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.293296813964844, - "loss_rtd": 0.17345906794071198, - "loss_sent": 0.002196255372837186, - "loss_sod": 0.08091104030609131, - "loss_total": 0.2565663456916809, - "step": 332999 - }, - { - "epoch": 0.028, - "grad_norm": 1.0123155117034912, - "learning_rate": 4.452847890213218e-06, - "loss": 0.4264, - "step": 333000 - }, - { - "epoch": 0.028, - "eval_loss": 0.39921894669532776, - "eval_runtime": 151.2059, - "eval_samples_per_second": 102.132, - "eval_steps_per_second": 0.8, - "step": 333000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.67648983001709, - "loss_rtd": 0.20993544161319733, - "loss_sent": 0.056589748710393906, - "loss_sod": 0.03604697436094284, - "loss_total": 0.3025721609592438, - "step": 333099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.277653694152832, - "loss_rtd": 0.19301404058933258, - "loss_sent": 2.6689376682043076e-05, - "loss_sod": 0.14895227551460266, - "loss_total": 0.3419930338859558, - "step": 333099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.9726792573928833, - "learning_rate": 4.439766074706469e-06, - "loss": 0.4038, - "step": 333100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.845165729522705, - "loss_rtd": 0.216050386428833, - "loss_sent": 0.23020198941230774, - "loss_sod": 0.024312572553753853, - "loss_total": 0.47056496143341064, - "step": 333199 - }, - { - "epoch": 0.028398, - "loss_gen": 6.083836555480957, - "loss_rtd": 0.20436830818653107, - "loss_sent": 0.1874687373638153, - "loss_sod": 0.031638503074645996, - "loss_total": 0.42347556352615356, - "step": 333199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.3433164358139038, - "learning_rate": 4.426702610856509e-06, - "loss": 0.41, - "step": 333200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.622547626495361, - "loss_rtd": 0.19420625269412994, - "loss_sent": 0.15851068496704102, - "loss_sod": 0.05337762087583542, - "loss_total": 0.4060945510864258, - "step": 333299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.85243558883667, - "loss_rtd": 0.2162763476371765, - "loss_sent": 0.17673559486865997, - "loss_sod": 0.0460338331758976, - "loss_total": 0.439045786857605, - "step": 333299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.818702220916748, - "learning_rate": 4.413657503925278e-06, - "loss": 0.4265, - "step": 333300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.828593730926514, - "loss_rtd": 0.20923027396202087, - "loss_sent": 0.08376859873533249, - "loss_sod": 0.011367118917405605, - "loss_total": 0.30436599254608154, - "step": 333399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.851904392242432, - "loss_rtd": 0.23087845742702484, - "loss_sent": 0.07225316017866135, - "loss_sod": 0.012547656893730164, - "loss_total": 0.31567928194999695, - "step": 333399 - }, - { - "epoch": 0.0288, - "grad_norm": 0.5214357972145081, - "learning_rate": 4.400630759167351e-06, - "loss": 0.4255, - "step": 333400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.92397928237915, - "loss_rtd": 0.2033231556415558, - "loss_sent": 0.2656804621219635, - "loss_sod": 0.014136162586510181, - "loss_total": 0.4831397831439972, - "step": 333499 - }, - { - "epoch": 0.028998, - "loss_gen": 6.0329203605651855, - "loss_rtd": 0.19545994699001312, - "loss_sent": 0.3682764768600464, - "loss_sod": 0.059346456080675125, - "loss_total": 0.6230828762054443, - "step": 333499 - }, - { - "epoch": 0.029, - "grad_norm": 2.159041166305542, - "learning_rate": 4.387622381829898e-06, - "loss": 0.4516, - "step": 333500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.921048164367676, - "loss_rtd": 0.21536974608898163, - "loss_sent": 0.33759239315986633, - "loss_sod": 0.009517904371023178, - "loss_total": 0.5624800324440002, - "step": 333599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.990695953369141, - "loss_rtd": 0.19041845202445984, - "loss_sent": 0.3113158345222473, - "loss_sod": 0.08361324667930603, - "loss_total": 0.5853475332260132, - "step": 333599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.6791911125183105, - "learning_rate": 4.3746323771527095e-06, - "loss": 0.4228, - "step": 333600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.6814866065979, - "loss_rtd": 0.19786830246448517, - "loss_sent": 0.0681382566690445, - "loss_sod": 0.04563521221280098, - "loss_total": 0.31164175271987915, - "step": 333699 - }, - { - "epoch": 0.029398, - "loss_gen": 4.993233680725098, - "loss_rtd": 0.14971184730529785, - "loss_sent": 0.001132254721596837, - "loss_sod": 0.06022341549396515, - "loss_total": 0.21106751263141632, - "step": 333699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.7783605456352234, - "learning_rate": 4.361660750368129e-06, - "loss": 0.4281, - "step": 333700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.527941703796387, - "loss_rtd": 0.16626811027526855, - "loss_sent": 3.38069221470505e-05, - "loss_sod": 0.06308425217866898, - "loss_total": 0.22938616573810577, - "step": 333799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.3150129318237305, - "loss_rtd": 0.14878471195697784, - "loss_sent": 0.010522390715777874, - "loss_sod": 0.09989697486162186, - "loss_total": 0.25920408964157104, - "step": 333799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.063338041305542, - "learning_rate": 4.348707506701144e-06, - "loss": 0.4405, - "step": 333800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.809854984283447, - "loss_rtd": 0.2180991917848587, - "loss_sent": 0.38718050718307495, - "loss_sod": 0.08889317512512207, - "loss_total": 0.6941728591918945, - "step": 333899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.348130702972412, - "loss_rtd": 0.17794682085514069, - "loss_sent": 0.0166650228202343, - "loss_sod": 0.07842186093330383, - "loss_total": 0.2730337083339691, - "step": 333899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.609898567199707, - "learning_rate": 4.335772651369318e-06, - "loss": 0.413, - "step": 333900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.576001167297363, - "loss_rtd": 0.1630786806344986, - "loss_sent": 2.432770634186454e-05, - "loss_sod": 0.19459104537963867, - "loss_total": 0.35769402980804443, - "step": 333999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.616293430328369, - "loss_rtd": 0.1615023910999298, - "loss_sent": 0.001869112253189087, - "loss_sod": 0.13924895226955414, - "loss_total": 0.30262044072151184, - "step": 333999 - }, - { - "epoch": 0.03, - "grad_norm": 1.396501064300537, - "learning_rate": 4.322856189582814e-06, - "loss": 0.4012, - "step": 334000 - }, - { - "epoch": 0.03, - "eval_loss": 0.40007197856903076, - "eval_runtime": 150.8858, - "eval_samples_per_second": 102.349, - "eval_steps_per_second": 0.802, - "step": 334000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.429172515869141, - "loss_rtd": 0.17763426899909973, - "loss_sent": 0.04315278306603432, - "loss_sod": 0.0121862031519413, - "loss_total": 0.23297324776649475, - "step": 334099 - }, - { - "epoch": 0.030198, - "loss_gen": 6.005903720855713, - "loss_rtd": 0.21334975957870483, - "loss_sent": 0.1491318941116333, - "loss_sod": 0.005976186133921146, - "loss_total": 0.3684578537940979, - "step": 334099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.41744357347488403, - "learning_rate": 4.309958126544361e-06, - "loss": 0.4623, - "step": 334100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.986791610717773, - "loss_rtd": 0.21256619691848755, - "loss_sent": 0.0793716236948967, - "loss_sod": 0.07309561222791672, - "loss_total": 0.36503344774246216, - "step": 334199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.930571556091309, - "loss_rtd": 0.20606614649295807, - "loss_sent": 0.2768394947052002, - "loss_sod": 0.054543472826480865, - "loss_total": 0.5374491214752197, - "step": 334199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.406572937965393, - "learning_rate": 4.297078467449317e-06, - "loss": 0.4166, - "step": 334200 - }, - { - "epoch": 0.030598, - "loss_gen": 6.10678768157959, - "loss_rtd": 0.2104867547750473, - "loss_sent": 0.17154887318611145, - "loss_sod": 0.06073742359876633, - "loss_total": 0.4427730441093445, - "step": 334299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.565155506134033, - "loss_rtd": 0.19027173519134521, - "loss_sent": 0.2150413691997528, - "loss_sod": 0.004843328148126602, - "loss_total": 0.4101564288139343, - "step": 334299 - }, - { - "epoch": 0.0306, - "grad_norm": 0.7450965642929077, - "learning_rate": 4.284217217485598e-06, - "loss": 0.4079, - "step": 334300 - }, - { - "epoch": 0.030798, - "loss_gen": 6.118762016296387, - "loss_rtd": 0.19611217081546783, - "loss_sent": 0.520443320274353, - "loss_sod": 0.020224379375576973, - "loss_total": 0.7367798686027527, - "step": 334399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.879006862640381, - "loss_rtd": 0.21311043202877045, - "loss_sent": 0.07570214569568634, - "loss_sod": 0.029709361493587494, - "loss_total": 0.3185219168663025, - "step": 334399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.4805805683135986, - "learning_rate": 4.271374381833726e-06, - "loss": 0.4249, - "step": 334400 - }, - { - "epoch": 0.030998, - "loss_gen": 6.06980562210083, - "loss_rtd": 0.20259004831314087, - "loss_sent": 0.10015519708395004, - "loss_sod": 0.028424939140677452, - "loss_total": 0.3311701714992523, - "step": 334499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.831358432769775, - "loss_rtd": 0.23395182192325592, - "loss_sent": 0.17726194858551025, - "loss_sod": 0.033757809549570084, - "loss_total": 0.44497159123420715, - "step": 334499 - }, - { - "epoch": 0.031, - "grad_norm": 0.9060441255569458, - "learning_rate": 4.258549965666775e-06, - "loss": 0.4053, - "step": 334500 - }, - { - "epoch": 0.031198, - "loss_gen": 6.059021472930908, - "loss_rtd": 0.21139657497406006, - "loss_sent": 0.3153374493122101, - "loss_sod": 0.036520786583423615, - "loss_total": 0.5632548332214355, - "step": 334599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.8541364669799805, - "loss_rtd": 0.2198180854320526, - "loss_sent": 0.07089675217866898, - "loss_sod": 0.0195726677775383, - "loss_total": 0.3102875053882599, - "step": 334599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.8097517490386963, - "learning_rate": 4.245743974150429e-06, - "loss": 0.4226, - "step": 334600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.453299522399902, - "loss_rtd": 0.18011866509914398, - "loss_sent": 2.362178565817885e-05, - "loss_sod": 0.10460500419139862, - "loss_total": 0.28474730253219604, - "step": 334699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.481233596801758, - "loss_rtd": 0.17237406969070435, - "loss_sent": 2.381735248491168e-05, - "loss_sod": 0.08869794011116028, - "loss_total": 0.2610958218574524, - "step": 334699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.827171802520752, - "learning_rate": 4.2329564124429455e-06, - "loss": 0.4162, - "step": 334700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.341057777404785, - "loss_rtd": 0.18300172686576843, - "loss_sent": 0.013174586929380894, - "loss_sod": 0.038474343717098236, - "loss_total": 0.23465067148208618, - "step": 334799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.3318939208984375, - "loss_rtd": 0.19755342602729797, - "loss_sent": 0.13849912583827972, - "loss_sod": 0.021874723955988884, - "loss_total": 0.35792726278305054, - "step": 334799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.9487466812133789, - "learning_rate": 4.220187285695137e-06, - "loss": 0.418, - "step": 334800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.989569664001465, - "loss_rtd": 0.20516981184482574, - "loss_sent": 0.2394498884677887, - "loss_sod": 0.02916102670133114, - "loss_total": 0.47378072142601013, - "step": 334899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.98516845703125, - "loss_rtd": 0.21761848032474518, - "loss_sent": 0.14362525939941406, - "loss_sod": 0.04514048993587494, - "loss_total": 0.4063842296600342, - "step": 334899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.5992496013641357, - "learning_rate": 4.207436599050418e-06, - "loss": 0.4284, - "step": 334900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.7612624168396, - "loss_rtd": 0.20631392300128937, - "loss_sent": 0.07714895159006119, - "loss_sod": 0.015602566301822662, - "loss_total": 0.2990654408931732, - "step": 334999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.76633882522583, - "loss_rtd": 0.1994708627462387, - "loss_sent": 0.17649705708026886, - "loss_sod": 0.018484674394130707, - "loss_total": 0.3944525718688965, - "step": 334999 - }, - { - "epoch": 0.032, - "grad_norm": 0.6809886693954468, - "learning_rate": 4.1947043576447575e-06, - "loss": 0.4309, - "step": 335000 - }, - { - "epoch": 0.032, - "eval_loss": 0.39839765429496765, - "eval_runtime": 152.5008, - "eval_samples_per_second": 101.265, - "eval_steps_per_second": 0.793, - "step": 335000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.944319725036621, - "loss_rtd": 0.21528884768486023, - "loss_sent": 0.22339734435081482, - "loss_sod": 0.02848324179649353, - "loss_total": 0.4671694338321686, - "step": 335099 - }, - { - "epoch": 0.000198, - "loss_gen": 6.1428656578063965, - "loss_rtd": 0.207811176776886, - "loss_sent": 0.0520741268992424, - "loss_sod": 0.10995840281248093, - "loss_total": 0.3698437213897705, - "step": 335099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.1972646713256836, - "learning_rate": 4.181990566606714e-06, - "loss": 0.4186, - "step": 335100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.5024871826171875, - "loss_rtd": 0.2226995825767517, - "loss_sent": 0.1211906224489212, - "loss_sod": 0.00852234847843647, - "loss_total": 0.35241255164146423, - "step": 335199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.963098526000977, - "loss_rtd": 0.20816507935523987, - "loss_sent": 0.17893053591251373, - "loss_sod": 0.01881728321313858, - "loss_total": 0.4059128761291504, - "step": 335199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.5845366716384888, - "learning_rate": 4.169295231057385e-06, - "loss": 0.4128, - "step": 335200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.1882853507995605, - "loss_rtd": 0.18059375882148743, - "loss_sent": 2.342807420063764e-05, - "loss_sod": 0.14032980799674988, - "loss_total": 0.32094699144363403, - "step": 335299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.190073013305664, - "loss_rtd": 0.16980881989002228, - "loss_sent": 0.09756369143724442, - "loss_sod": 0.11427263915538788, - "loss_total": 0.381645143032074, - "step": 335299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.116196632385254, - "learning_rate": 4.156618356110453e-06, - "loss": 0.4281, - "step": 335300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.813076972961426, - "loss_rtd": 0.21575145423412323, - "loss_sent": 0.4184168577194214, - "loss_sod": 0.04405057057738304, - "loss_total": 0.6782188415527344, - "step": 335399 - }, - { - "epoch": 0.000798, - "loss_gen": 6.0512213706970215, - "loss_rtd": 0.19395382702350616, - "loss_sent": 0.21937574446201324, - "loss_sod": 0.07406766712665558, - "loss_total": 0.4873972535133362, - "step": 335399 - }, - { - "epoch": 0.0008, - "grad_norm": 2.974411964416504, - "learning_rate": 4.143959946872167e-06, - "loss": 0.4205, - "step": 335400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.950162887573242, - "loss_rtd": 0.18977870047092438, - "loss_sent": 0.1720137596130371, - "loss_sod": 0.0743497759103775, - "loss_total": 0.436142235994339, - "step": 335499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.927491188049316, - "loss_rtd": 0.20648464560508728, - "loss_sent": 0.5373300909996033, - "loss_sod": 0.02205422893166542, - "loss_total": 0.7658689618110657, - "step": 335499 - }, - { - "epoch": 0.001, - "grad_norm": 1.7152159214019775, - "learning_rate": 4.131320008441336e-06, - "loss": 0.4373, - "step": 335500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.957103252410889, - "loss_rtd": 0.20949038863182068, - "loss_sent": 0.12904676795005798, - "loss_sod": 0.029970047995448112, - "loss_total": 0.3685072064399719, - "step": 335599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.924739360809326, - "loss_rtd": 0.206039160490036, - "loss_sent": 0.1924024075269699, - "loss_sod": 0.005136069841682911, - "loss_total": 0.40357762575149536, - "step": 335599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.5711073279380798, - "learning_rate": 4.118698545909311e-06, - "loss": 0.4161, - "step": 335600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.936959743499756, - "loss_rtd": 0.20973554253578186, - "loss_sent": 0.09327895194292068, - "loss_sod": 0.014217485673725605, - "loss_total": 0.3172319829463959, - "step": 335699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.935591697692871, - "loss_rtd": 0.20587770640850067, - "loss_sent": 0.04953960329294205, - "loss_sod": 0.014704114757478237, - "loss_total": 0.27012142539024353, - "step": 335699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.6653057336807251, - "learning_rate": 4.10609556436003e-06, - "loss": 0.3989, - "step": 335700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.859136581420898, - "loss_rtd": 0.22443939745426178, - "loss_sent": 0.17341157793998718, - "loss_sod": 0.005505038425326347, - "loss_total": 0.40335601568222046, - "step": 335799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.770941257476807, - "loss_rtd": 0.19478027522563934, - "loss_sent": 0.20250475406646729, - "loss_sod": 0.04144607484340668, - "loss_total": 0.4387311041355133, - "step": 335799 - }, - { - "epoch": 0.0016, - "grad_norm": 1.1641507148742676, - "learning_rate": 4.093511068869965e-06, - "loss": 0.4142, - "step": 335800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.79935359954834, - "loss_rtd": 0.22523431479930878, - "loss_sent": 0.06826870888471603, - "loss_sod": 0.03287557139992714, - "loss_total": 0.32637861371040344, - "step": 335899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.779669284820557, - "loss_rtd": 0.22280511260032654, - "loss_sent": 0.2086220234632492, - "loss_sod": 0.009479574859142303, - "loss_total": 0.44090670347213745, - "step": 335899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.1660898923873901, - "learning_rate": 4.080945064508157e-06, - "loss": 0.4219, - "step": 335900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.86229133605957, - "loss_rtd": 0.20115306973457336, - "loss_sent": 0.14512121677398682, - "loss_sod": 0.03269626945257187, - "loss_total": 0.37897056341171265, - "step": 335999 - }, - { - "epoch": 0.001998, - "loss_gen": 6.054866313934326, - "loss_rtd": 0.21286867558956146, - "loss_sent": 0.15473420917987823, - "loss_sod": 0.01898730918765068, - "loss_total": 0.3865901827812195, - "step": 335999 - }, - { - "epoch": 0.002, - "grad_norm": 0.7506734728813171, - "learning_rate": 4.068397556336179e-06, - "loss": 0.4249, - "step": 336000 - }, - { - "epoch": 0.002, - "eval_loss": 0.3963145911693573, - "eval_runtime": 153.2053, - "eval_samples_per_second": 100.799, - "eval_steps_per_second": 0.79, - "step": 336000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.863484859466553, - "loss_rtd": 0.2079601287841797, - "loss_sent": 0.14807070791721344, - "loss_sod": 0.10638213902711868, - "loss_total": 0.46241295337677, - "step": 336099 - }, - { - "epoch": 0.002198, - "loss_gen": 6.015509128570557, - "loss_rtd": 0.2064938247203827, - "loss_sent": 0.1779230684041977, - "loss_sod": 0.1254219114780426, - "loss_total": 0.5098388195037842, - "step": 336099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.2778013944625854, - "learning_rate": 4.0558685494081764e-06, - "loss": 0.4224, - "step": 336100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.445869445800781, - "loss_rtd": 0.20915165543556213, - "loss_sent": 0.0642315000295639, - "loss_sod": 0.07996993511915207, - "loss_total": 0.3533530831336975, - "step": 336199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.256261348724365, - "loss_rtd": 0.1652882844209671, - "loss_sent": 0.022786187008023262, - "loss_sod": 0.1049724817276001, - "loss_total": 0.2930469512939453, - "step": 336199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.9616134762763977, - "learning_rate": 4.043358048770834e-06, - "loss": 0.4217, - "step": 336200 - }, - { - "epoch": 0.002598, - "loss_gen": 5.7176833152771, - "loss_rtd": 0.2001343071460724, - "loss_sent": 0.10172601789236069, - "loss_sod": 0.033488817512989044, - "loss_total": 0.3353491425514221, - "step": 336299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.589804172515869, - "loss_rtd": 0.1875990331172943, - "loss_sent": 0.0018354837084189057, - "loss_sod": 0.11609485745429993, - "loss_total": 0.30552938580513, - "step": 336299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.421616792678833, - "learning_rate": 4.030866059463362e-06, - "loss": 0.4166, - "step": 336300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.7938971519470215, - "loss_rtd": 0.22643759846687317, - "loss_sent": 0.21268542110919952, - "loss_sod": 0.02447209507226944, - "loss_total": 0.46359509229660034, - "step": 336399 - }, - { - "epoch": 0.002798, - "loss_gen": 6.084261894226074, - "loss_rtd": 0.1936543732881546, - "loss_sent": 0.0703633725643158, - "loss_sod": 0.009327598847448826, - "loss_total": 0.27334535121917725, - "step": 336399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.0431867837905884, - "learning_rate": 4.018392586517544e-06, - "loss": 0.4098, - "step": 336400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.781310558319092, - "loss_rtd": 0.21961914002895355, - "loss_sent": 0.1871901899576187, - "loss_sod": 0.0016068393597379327, - "loss_total": 0.40841615200042725, - "step": 336499 - }, - { - "epoch": 0.002998, - "loss_gen": 6.274241924285889, - "loss_rtd": 0.21812716126441956, - "loss_sent": 0.3167370557785034, - "loss_sod": 0.0775049701333046, - "loss_total": 0.612369179725647, - "step": 336499 - }, - { - "epoch": 0.003, - "grad_norm": 0.9896555542945862, - "learning_rate": 4.005937634957696e-06, - "loss": 0.422, - "step": 336500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.9590559005737305, - "loss_rtd": 0.23067691922187805, - "loss_sent": 0.9917678236961365, - "loss_sod": 0.029147058725357056, - "loss_total": 1.2515918016433716, - "step": 336599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.142953872680664, - "loss_rtd": 0.16721126437187195, - "loss_sent": 0.0006171088316477835, - "loss_sod": 0.07498796284198761, - "loss_total": 0.24281632900238037, - "step": 336599 - }, - { - "epoch": 0.0032, - "grad_norm": 3.682013511657715, - "learning_rate": 3.993501209800676e-06, - "loss": 0.4179, - "step": 336600 - }, - { - "epoch": 0.003398, - "loss_gen": 6.105652332305908, - "loss_rtd": 0.2125975489616394, - "loss_sent": 0.11406980454921722, - "loss_sod": 0.0600692518055439, - "loss_total": 0.3867366313934326, - "step": 336699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.841837406158447, - "loss_rtd": 0.22694866359233856, - "loss_sent": 0.21269488334655762, - "loss_sod": 0.031707677990198135, - "loss_total": 0.4713512361049652, - "step": 336699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.438538908958435, - "learning_rate": 3.981083316055862e-06, - "loss": 0.427, - "step": 336700 - }, - { - "epoch": 0.003598, - "loss_gen": 6.0138020515441895, - "loss_rtd": 0.20830239355564117, - "loss_sent": 0.1359240710735321, - "loss_sod": 0.08318870514631271, - "loss_total": 0.4274151623249054, - "step": 336799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.254580497741699, - "loss_rtd": 0.17418579757213593, - "loss_sent": 0.00016088095435407013, - "loss_sod": 0.059335820376873016, - "loss_total": 0.23368249833583832, - "step": 336799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.0473743677139282, - "learning_rate": 3.968683958725183e-06, - "loss": 0.4281, - "step": 336800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.933165550231934, - "loss_rtd": 0.2162189483642578, - "loss_sent": 0.28007441759109497, - "loss_sod": 0.0607638955116272, - "loss_total": 0.55705726146698, - "step": 336899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.874255180358887, - "loss_rtd": 0.2069503664970398, - "loss_sent": 0.2948680520057678, - "loss_sod": 0.07497943192720413, - "loss_total": 0.5767978429794312, - "step": 336899 - }, - { - "epoch": 0.0038, - "grad_norm": 2.0698721408843994, - "learning_rate": 3.956303142803097e-06, - "loss": 0.4111, - "step": 336900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.75591516494751, - "loss_rtd": 0.2001626044511795, - "loss_sent": 0.28182515501976013, - "loss_sod": 0.03909014165401459, - "loss_total": 0.5210778713226318, - "step": 336999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.950390338897705, - "loss_rtd": 0.2040172666311264, - "loss_sent": 0.05994034186005592, - "loss_sod": 0.027685757726430893, - "loss_total": 0.2916433811187744, - "step": 336999 - }, - { - "epoch": 0.004, - "grad_norm": 0.766802191734314, - "learning_rate": 3.943940873276608e-06, - "loss": 0.4463, - "step": 337000 - }, - { - "epoch": 0.004, - "eval_loss": 0.3947581350803375, - "eval_runtime": 150.6148, - "eval_samples_per_second": 102.533, - "eval_steps_per_second": 0.803, - "step": 337000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.382259845733643, - "loss_rtd": 0.17015394568443298, - "loss_sent": 0.013352852314710617, - "loss_sod": 0.11146476864814758, - "loss_total": 0.2949715554714203, - "step": 337099 - }, - { - "epoch": 0.004198, - "loss_gen": 6.169939994812012, - "loss_rtd": 0.21506501734256744, - "loss_sent": 0.0743965283036232, - "loss_sod": 0.050989117473363876, - "loss_total": 0.340450644493103, - "step": 337099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.0970839262008667, - "learning_rate": 3.931597155125222e-06, - "loss": 0.4321, - "step": 337100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.968051433563232, - "loss_rtd": 0.192471444606781, - "loss_sent": 0.36148494482040405, - "loss_sod": 0.07960955798625946, - "loss_total": 0.6335659623146057, - "step": 337199 - }, - { - "epoch": 0.004398, - "loss_gen": 6.046945571899414, - "loss_rtd": 0.20671626925468445, - "loss_sent": 0.19385822117328644, - "loss_sod": 0.08800552040338516, - "loss_total": 0.48857998847961426, - "step": 337199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.465620994567871, - "learning_rate": 3.919271993320994e-06, - "loss": 0.4207, - "step": 337200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.441961288452148, - "loss_rtd": 0.16775529086589813, - "loss_sent": 0.07787902653217316, - "loss_sod": 0.027034278959035873, - "loss_total": 0.27266860008239746, - "step": 337299 - }, - { - "epoch": 0.004598, - "loss_gen": 6.021490097045898, - "loss_rtd": 0.21516260504722595, - "loss_sent": 0.25842270255088806, - "loss_sod": 0.16038836538791656, - "loss_total": 0.6339737176895142, - "step": 337299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.9959056973457336, - "learning_rate": 3.906965392828493e-06, - "loss": 0.4231, - "step": 337300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.752862930297852, - "loss_rtd": 0.1995813250541687, - "loss_sent": 0.06377172470092773, - "loss_sod": 0.01823241449892521, - "loss_total": 0.2815854549407959, - "step": 337399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.549044609069824, - "loss_rtd": 0.19926828145980835, - "loss_sent": 0.007355755195021629, - "loss_sod": 0.1424844115972519, - "loss_total": 0.3491084575653076, - "step": 337399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.97892826795578, - "learning_rate": 3.894677358604826e-06, - "loss": 0.4143, - "step": 337400 - }, - { - "epoch": 0.004998, - "loss_gen": 6.17965030670166, - "loss_rtd": 0.20519477128982544, - "loss_sent": 0.23570743203163147, - "loss_sod": 0.05581946298480034, - "loss_total": 0.49672165513038635, - "step": 337499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.923890113830566, - "loss_rtd": 0.2153753787279129, - "loss_sent": 0.13199764490127563, - "loss_sod": 0.029959501698613167, - "loss_total": 0.37733250856399536, - "step": 337499 - }, - { - "epoch": 0.005, - "grad_norm": 0.9899687767028809, - "learning_rate": 3.882407895599599e-06, - "loss": 0.4197, - "step": 337500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.413747787475586, - "loss_rtd": 0.158955916762352, - "loss_sent": 0.040096450597047806, - "loss_sod": 0.023038877174258232, - "loss_total": 0.22209124267101288, - "step": 337599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.880749225616455, - "loss_rtd": 0.21064843237400055, - "loss_sent": 0.09280610829591751, - "loss_sod": 0.07383677363395691, - "loss_total": 0.37729132175445557, - "step": 337599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.6896345615386963, - "learning_rate": 3.87015700875496e-06, - "loss": 0.399, - "step": 337600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.829585075378418, - "loss_rtd": 0.22672949731349945, - "loss_sent": 0.1859520971775055, - "loss_sod": 0.07305468618869781, - "loss_total": 0.48573628067970276, - "step": 337699 - }, - { - "epoch": 0.005398, - "loss_gen": 6.094359874725342, - "loss_rtd": 0.19445569813251495, - "loss_sent": 0.241752028465271, - "loss_sod": 0.05559616535902023, - "loss_total": 0.4918038845062256, - "step": 337699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.1671324968338013, - "learning_rate": 3.857924703005555e-06, - "loss": 0.4338, - "step": 337700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.765002250671387, - "loss_rtd": 0.20760849118232727, - "loss_sent": 0.14129865169525146, - "loss_sod": 0.016897909343242645, - "loss_total": 0.365805059671402, - "step": 337799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.4883832931518555, - "loss_rtd": 0.16705027222633362, - "loss_sent": 0.0009274539770558476, - "loss_sod": 0.03859792277216911, - "loss_total": 0.2065756469964981, - "step": 337799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.7741756439208984, - "learning_rate": 3.845710983278583e-06, - "loss": 0.42, - "step": 337800 - }, - { - "epoch": 0.005798, - "loss_gen": 6.211860656738281, - "loss_rtd": 0.2270527184009552, - "loss_sent": 0.07621984928846359, - "loss_sod": 0.028961829841136932, - "loss_total": 0.3322344124317169, - "step": 337899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.8315300941467285, - "loss_rtd": 0.22638453543186188, - "loss_sent": 0.47674882411956787, - "loss_sod": 0.0761602371931076, - "loss_total": 0.7792935967445374, - "step": 337899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.1903001070022583, - "learning_rate": 3.833515854493691e-06, - "loss": 0.4034, - "step": 337900 - }, - { - "epoch": 0.005998, - "loss_gen": 6.163069725036621, - "loss_rtd": 0.22339676320552826, - "loss_sent": 0.22539108991622925, - "loss_sod": 0.015134986490011215, - "loss_total": 0.4639228582382202, - "step": 337999 - }, - { - "epoch": 0.005998, - "loss_gen": 6.285745143890381, - "loss_rtd": 0.223933145403862, - "loss_sent": 0.14128655195236206, - "loss_sod": 0.015176388435065746, - "loss_total": 0.3803960680961609, - "step": 337999 - }, - { - "epoch": 0.006, - "grad_norm": 0.7822142839431763, - "learning_rate": 3.821339321563089e-06, - "loss": 0.4294, - "step": 338000 - }, - { - "epoch": 0.006, - "eval_loss": 0.4008514881134033, - "eval_runtime": 149.5304, - "eval_samples_per_second": 103.277, - "eval_steps_per_second": 0.809, - "step": 338000 - }, - { - "epoch": 0.006198, - "loss_gen": 6.204260349273682, - "loss_rtd": 0.20586571097373962, - "loss_sent": 0.40420326590538025, - "loss_sod": 0.2035951018333435, - "loss_total": 0.8136640787124634, - "step": 338099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.772861480712891, - "loss_rtd": 0.21573849022388458, - "loss_sent": 0.46023422479629517, - "loss_sod": 0.07706906646490097, - "loss_total": 0.7530417442321777, - "step": 338099 - }, - { - "epoch": 0.0062, - "grad_norm": 2.3411641120910645, - "learning_rate": 3.8091813893914864e-06, - "loss": 0.4146, - "step": 338100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.746396541595459, - "loss_rtd": 0.2155022770166397, - "loss_sent": 0.17408345639705658, - "loss_sod": 0.003342859912663698, - "loss_total": 0.3929286003112793, - "step": 338199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.945261478424072, - "loss_rtd": 0.21275067329406738, - "loss_sent": 0.10976219177246094, - "loss_sod": 0.028655165806412697, - "loss_total": 0.35116803646087646, - "step": 338199 - }, - { - "epoch": 0.0064, - "grad_norm": 0.7478144764900208, - "learning_rate": 3.7970420628761105e-06, - "loss": 0.4304, - "step": 338200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.782810688018799, - "loss_rtd": 0.22391600906848907, - "loss_sent": 0.20661456882953644, - "loss_sod": 0.010610237717628479, - "loss_total": 0.4411408305168152, - "step": 338299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.733725070953369, - "loss_rtd": 0.206951305270195, - "loss_sent": 0.13049571216106415, - "loss_sod": 0.07839620858430862, - "loss_total": 0.41584324836730957, - "step": 338299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.2371070384979248, - "learning_rate": 3.78492134690665e-06, - "loss": 0.4254, - "step": 338300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.954866409301758, - "loss_rtd": 0.23353427648544312, - "loss_sent": 0.16861510276794434, - "loss_sod": 0.05040101706981659, - "loss_total": 0.45255041122436523, - "step": 338399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.877741813659668, - "loss_rtd": 0.19605544209480286, - "loss_sent": 0.030897455289959908, - "loss_sod": 0.056787945330142975, - "loss_total": 0.2837408483028412, - "step": 338399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.0223498344421387, - "learning_rate": 3.77281924636535e-06, - "loss": 0.4103, - "step": 338400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.412992000579834, - "loss_rtd": 0.17041103541851044, - "loss_sent": 0.24925978481769562, - "loss_sod": 0.018770035356283188, - "loss_total": 0.43844085931777954, - "step": 338499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.862917900085449, - "loss_rtd": 0.19434136152267456, - "loss_sent": 0.2050703763961792, - "loss_sod": 0.03860627859830856, - "loss_total": 0.4380180239677429, - "step": 338499 - }, - { - "epoch": 0.007, - "grad_norm": 1.2954009771347046, - "learning_rate": 3.7607357661269272e-06, - "loss": 0.4013, - "step": 338500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.875136852264404, - "loss_rtd": 0.22192594408988953, - "loss_sent": 0.13295848667621613, - "loss_sod": 0.029642509296536446, - "loss_total": 0.38452696800231934, - "step": 338599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.88073205947876, - "loss_rtd": 0.2221127152442932, - "loss_sent": 0.15584711730480194, - "loss_sod": 0.06707973778247833, - "loss_total": 0.4450395703315735, - "step": 338599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.4723381996154785, - "learning_rate": 3.748670911058616e-06, - "loss": 0.4199, - "step": 338600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.959644794464111, - "loss_rtd": 0.20913678407669067, - "loss_sent": 0.08110801875591278, - "loss_sod": 0.21267099678516388, - "loss_total": 0.5029157996177673, - "step": 338699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.260806083679199, - "loss_rtd": 0.17016012966632843, - "loss_sent": 0.0023471980821341276, - "loss_sod": 0.110336072742939, - "loss_total": 0.2828434109687805, - "step": 338699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.2005341053009033, - "learning_rate": 3.7366246860201182e-06, - "loss": 0.404, - "step": 338700 - }, - { - "epoch": 0.007598, - "loss_gen": 6.073202610015869, - "loss_rtd": 0.22639216482639313, - "loss_sent": 0.08206695318222046, - "loss_sod": 0.023818880319595337, - "loss_total": 0.3322780132293701, - "step": 338799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.572092533111572, - "loss_rtd": 0.17619915306568146, - "loss_sent": 0.05038800090551376, - "loss_sod": 0.07302147895097733, - "loss_total": 0.29960864782333374, - "step": 338799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.1604137420654297, - "learning_rate": 3.7245970958636687e-06, - "loss": 0.4133, - "step": 338800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.995290756225586, - "loss_rtd": 0.21080608665943146, - "loss_sent": 0.34130969643592834, - "loss_sod": 0.01080277469009161, - "loss_total": 0.5629185438156128, - "step": 338899 - }, - { - "epoch": 0.007798, - "loss_gen": 6.070387840270996, - "loss_rtd": 0.21030038595199585, - "loss_sent": 0.27426761388778687, - "loss_sod": 0.07019522786140442, - "loss_total": 0.5547631978988647, - "step": 338899 - }, - { - "epoch": 0.0078, - "grad_norm": 2.2233266830444336, - "learning_rate": 3.712588145433976e-06, - "loss": 0.4151, - "step": 338900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.772355079650879, - "loss_rtd": 0.2053069919347763, - "loss_sent": 0.064731165766716, - "loss_sod": 0.037257201969623566, - "loss_total": 0.30729538202285767, - "step": 338999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.197329044342041, - "loss_rtd": 0.15753231942653656, - "loss_sent": 2.3899308871477842e-05, - "loss_sod": 0.056196585297584534, - "loss_total": 0.21375280618667603, - "step": 338999 - }, - { - "epoch": 0.008, - "grad_norm": 0.8829795122146606, - "learning_rate": 3.7005978395682482e-06, - "loss": 0.4161, - "step": 339000 - }, - { - "epoch": 0.008, - "eval_loss": 0.4010652005672455, - "eval_runtime": 149.9038, - "eval_samples_per_second": 103.019, - "eval_steps_per_second": 0.807, - "step": 339000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.983335971832275, - "loss_rtd": 0.19274388253688812, - "loss_sent": 0.09417889267206192, - "loss_sod": 0.05159136280417442, - "loss_total": 0.33851414918899536, - "step": 339099 - }, - { - "epoch": 0.008198, - "loss_gen": 6.370143413543701, - "loss_rtd": 0.22788284718990326, - "loss_sent": 0.1849139928817749, - "loss_sod": 0.05349213257431984, - "loss_total": 0.4662889838218689, - "step": 339099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.2802579402923584, - "learning_rate": 3.6886261830961665e-06, - "loss": 0.4087, - "step": 339100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.889089107513428, - "loss_rtd": 0.20249640941619873, - "loss_sent": 0.136704221367836, - "loss_sod": 0.0257358830422163, - "loss_total": 0.3649365305900574, - "step": 339199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.897090435028076, - "loss_rtd": 0.22126959264278412, - "loss_sent": 0.19033564627170563, - "loss_sod": 0.19496454298496246, - "loss_total": 0.606569766998291, - "step": 339199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.105751395225525, - "learning_rate": 3.6766731808399234e-06, - "loss": 0.4247, - "step": 339200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.897000789642334, - "loss_rtd": 0.2111540138721466, - "loss_sent": 0.19834111630916595, - "loss_sod": 0.047086890786886215, - "loss_total": 0.4565820097923279, - "step": 339299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.844721794128418, - "loss_rtd": 0.20830696821212769, - "loss_sent": 0.07999664545059204, - "loss_sod": 0.038247983902692795, - "loss_total": 0.326551616191864, - "step": 339299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.65416020154953, - "learning_rate": 3.66473883761419e-06, - "loss": 0.4281, - "step": 339300 - }, - { - "epoch": 0.008798, - "loss_gen": 6.223285675048828, - "loss_rtd": 0.2105582356452942, - "loss_sent": 0.10665461421012878, - "loss_sod": 0.013735967688262463, - "loss_total": 0.3309488296508789, - "step": 339399 - }, - { - "epoch": 0.008798, - "loss_gen": 6.001707077026367, - "loss_rtd": 0.2266661375761032, - "loss_sent": 0.16380570828914642, - "loss_sod": 0.07627823948860168, - "loss_total": 0.4667500853538513, - "step": 339399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.0086357593536377, - "learning_rate": 3.6528231582260984e-06, - "loss": 0.4255, - "step": 339400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.79616641998291, - "loss_rtd": 0.2053869664669037, - "loss_sent": 0.2299124300479889, - "loss_sod": 0.040310800075531006, - "loss_total": 0.4756101965904236, - "step": 339499 - }, - { - "epoch": 0.008998, - "loss_gen": 6.108221054077148, - "loss_rtd": 0.20687364041805267, - "loss_sent": 0.15326544642448425, - "loss_sod": 0.05609961599111557, - "loss_total": 0.4162386953830719, - "step": 339499 - }, - { - "epoch": 0.009, - "grad_norm": 1.5119096040725708, - "learning_rate": 3.6409261474753043e-06, - "loss": 0.4161, - "step": 339500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.806908130645752, - "loss_rtd": 0.20581737160682678, - "loss_sent": 0.07188493758440018, - "loss_sod": 0.026977792382240295, - "loss_total": 0.30468010902404785, - "step": 339599 - }, - { - "epoch": 0.009198, - "loss_gen": 6.116364002227783, - "loss_rtd": 0.22399155795574188, - "loss_sent": 0.14591443538665771, - "loss_sod": 0.028032680973410606, - "loss_total": 0.39793866872787476, - "step": 339599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.8714219331741333, - "learning_rate": 3.629047810153907e-06, - "loss": 0.437, - "step": 339600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.894096851348877, - "loss_rtd": 0.21251995861530304, - "loss_sent": 0.23561184108257294, - "loss_sod": 0.06450232863426208, - "loss_total": 0.5126341581344604, - "step": 339699 - }, - { - "epoch": 0.009398, - "loss_gen": 6.02771520614624, - "loss_rtd": 0.18507251143455505, - "loss_sent": 0.13063956797122955, - "loss_sod": 0.00920623168349266, - "loss_total": 0.32491832971572876, - "step": 339699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.1654026508331299, - "learning_rate": 3.617188151046519e-06, - "loss": 0.4049, - "step": 339700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.9308695793151855, - "loss_rtd": 0.21963249146938324, - "loss_sent": 0.3654840886592865, - "loss_sod": 0.050618160516023636, - "loss_total": 0.6357347369194031, - "step": 339799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.643835067749023, - "loss_rtd": 0.1976223737001419, - "loss_sent": 0.12658996880054474, - "loss_sod": 0.020686248317360878, - "loss_total": 0.3448985815048218, - "step": 339799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.81893390417099, - "learning_rate": 3.6053471749301847e-06, - "loss": 0.4131, - "step": 339800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.648099899291992, - "loss_rtd": 0.2215256541967392, - "loss_sent": 0.06031196191906929, - "loss_sod": 0.03526972606778145, - "loss_total": 0.31710734963417053, - "step": 339899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.75663948059082, - "loss_rtd": 0.2025076448917389, - "loss_sent": 0.269387811422348, - "loss_sod": 0.016846321523189545, - "loss_total": 0.48874178528785706, - "step": 339899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.0655364990234375, - "learning_rate": 3.5935248865744673e-06, - "loss": 0.4056, - "step": 339900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.908276557922363, - "loss_rtd": 0.19214953482151031, - "loss_sent": 0.23334498703479767, - "loss_sod": 0.04038998484611511, - "loss_total": 0.4658845067024231, - "step": 339999 - }, - { - "epoch": 0.009998, - "loss_gen": 6.1002516746521, - "loss_rtd": 0.20336997509002686, - "loss_sent": 0.22725780308246613, - "loss_sod": 0.0748271718621254, - "loss_total": 0.505454957485199, - "step": 339999 - }, - { - "epoch": 0.01, - "grad_norm": 1.4487290382385254, - "learning_rate": 3.581721290741369e-06, - "loss": 0.4154, - "step": 340000 - }, - { - "epoch": 0.01, - "eval_loss": 0.4006238281726837, - "eval_runtime": 150.0884, - "eval_samples_per_second": 102.893, - "eval_steps_per_second": 0.806, - "step": 340000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.326809406280518, - "loss_rtd": 0.17155982553958893, - "loss_sent": 0.055715400725603104, - "loss_sod": 0.03777296096086502, - "loss_total": 0.26504820585250854, - "step": 340099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.804373741149902, - "loss_rtd": 0.19311803579330444, - "loss_sent": 0.23289738595485687, - "loss_sod": 0.019639087840914726, - "loss_total": 0.4456545114517212, - "step": 340099 - }, - { - "epoch": 0.0102, - "grad_norm": 0.8826153874397278, - "learning_rate": 3.569936392185391e-06, - "loss": 0.4208, - "step": 340100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.741005897521973, - "loss_rtd": 0.2289055734872818, - "loss_sent": 0.10577091574668884, - "loss_sod": 0.004277893807739019, - "loss_total": 0.3389543890953064, - "step": 340199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.89778470993042, - "loss_rtd": 0.21025080978870392, - "loss_sent": 0.15272365510463715, - "loss_sod": 0.028359398245811462, - "loss_total": 0.3913338780403137, - "step": 340199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.8030951619148254, - "learning_rate": 3.5581701956534818e-06, - "loss": 0.4252, - "step": 340200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.805662155151367, - "loss_rtd": 0.21620453894138336, - "loss_sent": 0.2286563366651535, - "loss_sod": 0.0036057299003005028, - "loss_total": 0.44846659898757935, - "step": 340299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.9886956214904785, - "loss_rtd": 0.21013662219047546, - "loss_sent": 0.1385829746723175, - "loss_sod": 0.09061326086521149, - "loss_total": 0.43933287262916565, - "step": 340299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.0946193933486938, - "learning_rate": 3.546422705885055e-06, - "loss": 0.4024, - "step": 340300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.908155918121338, - "loss_rtd": 0.22734303772449493, - "loss_sent": 0.4903893768787384, - "loss_sod": 0.11314050853252411, - "loss_total": 0.8308728933334351, - "step": 340399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.893324375152588, - "loss_rtd": 0.21723562479019165, - "loss_sent": 0.0734143853187561, - "loss_sod": 0.05252843722701073, - "loss_total": 0.3431784510612488, - "step": 340399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.9139138460159302, - "learning_rate": 3.5346939276120096e-06, - "loss": 0.4203, - "step": 340400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.145823955535889, - "loss_rtd": 0.17083795368671417, - "loss_sent": 2.4692773877177387e-05, - "loss_sod": 0.033463336527347565, - "loss_total": 0.20432598888874054, - "step": 340499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.2899370193481445, - "loss_rtd": 0.15717390179634094, - "loss_sent": 0.038508735597133636, - "loss_sod": 0.0543653629720211, - "loss_total": 0.2500480115413666, - "step": 340499 - }, - { - "epoch": 0.011, - "grad_norm": 0.7320871949195862, - "learning_rate": 3.5229838655587044e-06, - "loss": 0.4009, - "step": 340500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.847060203552246, - "loss_rtd": 0.21480822563171387, - "loss_sent": 0.2133861929178238, - "loss_sod": 0.004829126875847578, - "loss_total": 0.4330235421657562, - "step": 340599 - }, - { - "epoch": 0.011198, - "loss_gen": 6.185930252075195, - "loss_rtd": 0.2196062058210373, - "loss_sent": 0.1698533594608307, - "loss_sod": 0.03258263319730759, - "loss_total": 0.42204219102859497, - "step": 340599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.8248491883277893, - "learning_rate": 3.5112925244419337e-06, - "loss": 0.4295, - "step": 340600 - }, - { - "epoch": 0.011398, - "loss_gen": 6.373525619506836, - "loss_rtd": 0.19577983021736145, - "loss_sent": 0.06667690724134445, - "loss_sod": 0.16156956553459167, - "loss_total": 0.4240263104438782, - "step": 340699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.640323162078857, - "loss_rtd": 0.17199872434139252, - "loss_sent": 9.931313979905099e-05, - "loss_sod": 0.06525272130966187, - "loss_total": 0.23735076189041138, - "step": 340699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.078114628791809, - "learning_rate": 3.4996199089709692e-06, - "loss": 0.4074, - "step": 340700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.662497043609619, - "loss_rtd": 0.20580726861953735, - "loss_sent": 0.2913249433040619, - "loss_sod": 0.011537499725818634, - "loss_total": 0.5086697340011597, - "step": 340799 - }, - { - "epoch": 0.011598, - "loss_gen": 6.062875747680664, - "loss_rtd": 0.23240281641483307, - "loss_sent": 0.36906126141548157, - "loss_sod": 0.014366144314408302, - "loss_total": 0.6158302426338196, - "step": 340799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.3367756605148315, - "learning_rate": 3.487966023847555e-06, - "loss": 0.4049, - "step": 340800 - }, - { - "epoch": 0.011798, - "loss_gen": 6.10473108291626, - "loss_rtd": 0.20512263476848602, - "loss_sent": 0.4059183895587921, - "loss_sod": 0.15950946509838104, - "loss_total": 0.7705504894256592, - "step": 340899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.811824321746826, - "loss_rtd": 0.21852651238441467, - "loss_sent": 0.1622115671634674, - "loss_sod": 0.02196568250656128, - "loss_total": 0.40270376205444336, - "step": 340899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.750742793083191, - "learning_rate": 3.476330873765854e-06, - "loss": 0.4038, - "step": 340900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.998531818389893, - "loss_rtd": 0.21462537348270416, - "loss_sent": 0.14007796347141266, - "loss_sod": 0.05634921044111252, - "loss_total": 0.41105252504348755, - "step": 340999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.929116725921631, - "loss_rtd": 0.2102845311164856, - "loss_sent": 0.16890662908554077, - "loss_sod": 0.001856447197496891, - "loss_total": 0.3810476064682007, - "step": 340999 - }, - { - "epoch": 0.012, - "grad_norm": 0.7612988352775574, - "learning_rate": 3.464714463412516e-06, - "loss": 0.4116, - "step": 341000 - }, - { - "epoch": 0.012, - "eval_loss": 0.3963385820388794, - "eval_runtime": 150.8414, - "eval_samples_per_second": 102.379, - "eval_steps_per_second": 0.802, - "step": 341000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.03617000579834, - "loss_rtd": 0.1962297409772873, - "loss_sent": 0.22792571783065796, - "loss_sod": 0.11228495091199875, - "loss_total": 0.536440372467041, - "step": 341099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.751300811767578, - "loss_rtd": 0.18305706977844238, - "loss_sent": 0.17126314342021942, - "loss_sod": 0.01719723455607891, - "loss_total": 0.37151744961738586, - "step": 341099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.9330779314041138, - "learning_rate": 3.453116797466627e-06, - "loss": 0.416, - "step": 341100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.9853196144104, - "loss_rtd": 0.21844954788684845, - "loss_sent": 0.10476718842983246, - "loss_sod": 0.017409641295671463, - "loss_total": 0.3406263589859009, - "step": 341199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.535221576690674, - "loss_rtd": 0.17223674058914185, - "loss_sent": 0.004667786881327629, - "loss_sod": 0.0542733408510685, - "loss_total": 0.23117786645889282, - "step": 341199 - }, - { - "epoch": 0.0124, - "grad_norm": 3.3137733936309814, - "learning_rate": 3.441537880599732e-06, - "loss": 0.3993, - "step": 341200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.888561725616455, - "loss_rtd": 0.20931757986545563, - "loss_sent": 0.2222614884376526, - "loss_sod": 0.08970314264297485, - "loss_total": 0.5212821960449219, - "step": 341299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.249072074890137, - "loss_rtd": 0.16802287101745605, - "loss_sent": 0.001813581446185708, - "loss_sod": 0.03134084492921829, - "loss_total": 0.20117728412151337, - "step": 341299 - }, - { - "epoch": 0.0126, - "grad_norm": 0.8807783722877502, - "learning_rate": 3.429977717475802e-06, - "loss": 0.4124, - "step": 341300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.882512092590332, - "loss_rtd": 0.21080563962459564, - "loss_sent": 0.30358272790908813, - "loss_sod": 0.06010904908180237, - "loss_total": 0.574497401714325, - "step": 341399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.923336982727051, - "loss_rtd": 0.20899485051631927, - "loss_sent": 0.11464784294366837, - "loss_sod": 0.14935755729675293, - "loss_total": 0.47300025820732117, - "step": 341399 - }, - { - "epoch": 0.0128, - "grad_norm": 1.6107983589172363, - "learning_rate": 3.4184363127512833e-06, - "loss": 0.4165, - "step": 341400 - }, - { - "epoch": 0.012998, - "loss_gen": 6.030972957611084, - "loss_rtd": 0.2066817283630371, - "loss_sent": 0.34057924151420593, - "loss_sod": 0.021736331284046173, - "loss_total": 0.5689972639083862, - "step": 341499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.77199649810791, - "loss_rtd": 0.20100796222686768, - "loss_sent": 0.20542021095752716, - "loss_sod": 0.08238121122121811, - "loss_total": 0.48880940675735474, - "step": 341499 - }, - { - "epoch": 0.013, - "grad_norm": 1.3586835861206055, - "learning_rate": 3.4069136710750404e-06, - "loss": 0.422, - "step": 341500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.997402667999268, - "loss_rtd": 0.20390596985816956, - "loss_sent": 0.18963722884655, - "loss_sod": 0.02654527686536312, - "loss_total": 0.4200884699821472, - "step": 341599 - }, - { - "epoch": 0.013198, - "loss_gen": 6.158804416656494, - "loss_rtd": 0.20410539209842682, - "loss_sent": 0.1750335544347763, - "loss_sod": 0.03373246267437935, - "loss_total": 0.41287142038345337, - "step": 341599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.1250416040420532, - "learning_rate": 3.395409797088411e-06, - "loss": 0.419, - "step": 341600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.943315029144287, - "loss_rtd": 0.20542287826538086, - "loss_sent": 0.0901661068201065, - "loss_sod": 0.08766307681798935, - "loss_total": 0.3832520842552185, - "step": 341699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.370689868927002, - "loss_rtd": 0.15447942912578583, - "loss_sent": 0.0003166712122038007, - "loss_sod": 0.09208841621875763, - "loss_total": 0.2468845099210739, - "step": 341699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.168798804283142, - "learning_rate": 3.3839246954251337e-06, - "loss": 0.4119, - "step": 341700 - }, - { - "epoch": 0.013598, - "loss_gen": 6.064821243286133, - "loss_rtd": 0.2190457284450531, - "loss_sent": 0.09213320165872574, - "loss_sod": 0.048973917961120605, - "loss_total": 0.36015284061431885, - "step": 341799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.890889644622803, - "loss_rtd": 0.19885052740573883, - "loss_sent": 0.21808795630931854, - "loss_sod": 0.036045633256435394, - "loss_total": 0.452984094619751, - "step": 341799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.1047885417938232, - "learning_rate": 3.3724583707114123e-06, - "loss": 0.4099, - "step": 341800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.762095928192139, - "loss_rtd": 0.1914149671792984, - "loss_sent": 0.09680893272161484, - "loss_sod": 0.030672771856188774, - "loss_total": 0.31889668107032776, - "step": 341899 - }, - { - "epoch": 0.013798, - "loss_gen": 6.0736589431762695, - "loss_rtd": 0.2036304771900177, - "loss_sent": 0.19782862067222595, - "loss_sod": 0.03012201562523842, - "loss_total": 0.4315811097621918, - "step": 341899 - }, - { - "epoch": 0.0138, - "grad_norm": 0.9368470907211304, - "learning_rate": 3.361010827565886e-06, - "loss": 0.4135, - "step": 341900 - }, - { - "epoch": 0.013998, - "loss_gen": 6.138782501220703, - "loss_rtd": 0.22057823836803436, - "loss_sent": 0.07051631808280945, - "loss_sod": 0.036086756736040115, - "loss_total": 0.32718130946159363, - "step": 341999 - }, - { - "epoch": 0.013998, - "loss_gen": 6.156554222106934, - "loss_rtd": 0.22142986953258514, - "loss_sent": 0.187363862991333, - "loss_sod": 0.06705398857593536, - "loss_total": 0.4758477210998535, - "step": 341999 - }, - { - "epoch": 0.014, - "grad_norm": 1.0070147514343262, - "learning_rate": 3.3495820705996274e-06, - "loss": 0.4201, - "step": 342000 - }, - { - "epoch": 0.014, - "eval_loss": 0.39424291253089905, - "eval_runtime": 150.2061, - "eval_samples_per_second": 102.812, - "eval_steps_per_second": 0.806, - "step": 342000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.940567970275879, - "loss_rtd": 0.23230868577957153, - "loss_sent": 0.35433995723724365, - "loss_sod": 0.049607060849666595, - "loss_total": 0.6362557411193848, - "step": 342099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.838633060455322, - "loss_rtd": 0.19210414588451385, - "loss_sent": 0.04289254918694496, - "loss_sod": 0.09127867966890335, - "loss_total": 0.32627537846565247, - "step": 342099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.1639574766159058, - "learning_rate": 3.3381721044161262e-06, - "loss": 0.4324, - "step": 342100 - }, - { - "epoch": 0.014398, - "loss_gen": 6.07776403427124, - "loss_rtd": 0.1801523119211197, - "loss_sent": 0.12497272342443466, - "loss_sod": 0.054323021322488785, - "loss_total": 0.35944804549217224, - "step": 342199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.801862716674805, - "loss_rtd": 0.21373224258422852, - "loss_sent": 0.1702360063791275, - "loss_sod": 0.026983605697751045, - "loss_total": 0.4109518527984619, - "step": 342199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.3709266185760498, - "learning_rate": 3.3267809336113175e-06, - "loss": 0.407, - "step": 342200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.709898471832275, - "loss_rtd": 0.2265806794166565, - "loss_sent": 0.3270280063152313, - "loss_sod": 0.03656139224767685, - "loss_total": 0.5901700854301453, - "step": 342299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.581545829772949, - "loss_rtd": 0.2105221003293991, - "loss_sent": 0.09505219757556915, - "loss_sod": 0.03128906339406967, - "loss_total": 0.33686333894729614, - "step": 342299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.9331885576248169, - "learning_rate": 3.3154085627735698e-06, - "loss": 0.4363, - "step": 342300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.754115104675293, - "loss_rtd": 0.19943825900554657, - "loss_sent": 0.3611813485622406, - "loss_sod": 0.013223467394709587, - "loss_total": 0.5738430619239807, - "step": 342399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.950212001800537, - "loss_rtd": 0.2199244648218155, - "loss_sent": 0.3878166675567627, - "loss_sod": 0.169979065656662, - "loss_total": 0.7777202129364014, - "step": 342399 - }, - { - "epoch": 0.0148, - "grad_norm": 1.8014847040176392, - "learning_rate": 3.304054996483674e-06, - "loss": 0.4227, - "step": 342400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.824573516845703, - "loss_rtd": 0.2237839698791504, - "loss_sent": 0.09823964536190033, - "loss_sod": 0.017051920294761658, - "loss_total": 0.3390755355358124, - "step": 342499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.877294063568115, - "loss_rtd": 0.21096977591514587, - "loss_sent": 0.1755797415971756, - "loss_sod": 0.002020066836848855, - "loss_total": 0.38856959342956543, - "step": 342499 - }, - { - "epoch": 0.015, - "grad_norm": 0.5809124708175659, - "learning_rate": 3.2927202393148393e-06, - "loss": 0.4327, - "step": 342500 - }, - { - "epoch": 0.015198, - "loss_gen": 6.27562141418457, - "loss_rtd": 0.21944203972816467, - "loss_sent": 0.15664042532444, - "loss_sod": 0.05238547921180725, - "loss_total": 0.42846792936325073, - "step": 342599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.808011531829834, - "loss_rtd": 0.19500215351581573, - "loss_sent": 0.03753923252224922, - "loss_sod": 0.06146138906478882, - "loss_total": 0.2940027713775635, - "step": 342599 - }, - { - "epoch": 0.0152, - "grad_norm": 1.0454044342041016, - "learning_rate": 3.2814042958327016e-06, - "loss": 0.4289, - "step": 342600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.459552764892578, - "loss_rtd": 0.16028933227062225, - "loss_sent": 0.017681865021586418, - "loss_sod": 0.0945415049791336, - "loss_total": 0.2725127041339874, - "step": 342699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.953822612762451, - "loss_rtd": 0.21610717475414276, - "loss_sent": 0.2768329083919525, - "loss_sod": 0.006910734809935093, - "loss_total": 0.4998508095741272, - "step": 342699 - }, - { - "epoch": 0.0154, - "grad_norm": 1.4511522054672241, - "learning_rate": 3.2701071705953377e-06, - "loss": 0.4183, - "step": 342700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.7881598472595215, - "loss_rtd": 0.21523109078407288, - "loss_sent": 0.1633302867412567, - "loss_sod": 0.0501314215362072, - "loss_total": 0.4286927878856659, - "step": 342799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.951810836791992, - "loss_rtd": 0.2228037714958191, - "loss_sent": 0.15994679927825928, - "loss_sod": 0.041540808975696564, - "loss_total": 0.42429137229919434, - "step": 342799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.3096485137939453, - "learning_rate": 3.2588288681532077e-06, - "loss": 0.4134, - "step": 342800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.827404499053955, - "loss_rtd": 0.21465258300304413, - "loss_sent": 0.32007789611816406, - "loss_sod": 0.04397441819310188, - "loss_total": 0.5787048935890198, - "step": 342899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.772224426269531, - "loss_rtd": 0.21917395293712616, - "loss_sent": 0.33243751525878906, - "loss_sod": 0.07959354668855667, - "loss_total": 0.6312050223350525, - "step": 342899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.5774588584899902, - "learning_rate": 3.2475693930492214e-06, - "loss": 0.4261, - "step": 342900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.823678016662598, - "loss_rtd": 0.20381247997283936, - "loss_sent": 0.19986392557621002, - "loss_sod": 0.004302198067307472, - "loss_total": 0.4079785943031311, - "step": 342999 - }, - { - "epoch": 0.015998, - "loss_gen": 6.02730655670166, - "loss_rtd": 0.2274365872144699, - "loss_sent": 0.08434657007455826, - "loss_sod": 0.07830630987882614, - "loss_total": 0.3900894522666931, - "step": 342999 - }, - { - "epoch": 0.016, - "grad_norm": 0.8446077704429626, - "learning_rate": 3.23632874981869e-06, - "loss": 0.4326, - "step": 343000 - }, - { - "epoch": 0.016, - "eval_loss": 0.38928958773612976, - "eval_runtime": 150.3513, - "eval_samples_per_second": 102.713, - "eval_steps_per_second": 0.805, - "step": 343000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.625696182250977, - "loss_rtd": 0.20675015449523926, - "loss_sent": 0.4063093066215515, - "loss_sod": 0.041951365768909454, - "loss_total": 0.6550108194351196, - "step": 343099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.676318168640137, - "loss_rtd": 0.20205141603946686, - "loss_sent": 0.18781642615795135, - "loss_sod": 0.01967936009168625, - "loss_total": 0.40954720973968506, - "step": 343099 - }, - { - "epoch": 0.0162, - "grad_norm": 4.332789897918701, - "learning_rate": 3.225106942989359e-06, - "loss": 0.4034, - "step": 343100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.8456220626831055, - "loss_rtd": 0.2000020444393158, - "loss_sent": 0.24244247376918793, - "loss_sod": 0.03900426998734474, - "loss_total": 0.481448769569397, - "step": 343199 - }, - { - "epoch": 0.016398, - "loss_gen": 6.019463539123535, - "loss_rtd": 0.20181627571582794, - "loss_sent": 0.14744023978710175, - "loss_sod": 0.041491057723760605, - "loss_total": 0.3907475769519806, - "step": 343199 - }, - { - "epoch": 0.0164, - "grad_norm": 1.7115260362625122, - "learning_rate": 3.213903977081345e-06, - "loss": 0.4255, - "step": 343200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.8853349685668945, - "loss_rtd": 0.18609805405139923, - "loss_sent": 0.12787283957004547, - "loss_sod": 0.03629517927765846, - "loss_total": 0.35026606917381287, - "step": 343299 - }, - { - "epoch": 0.016598, - "loss_gen": 6.13047981262207, - "loss_rtd": 0.1903739869594574, - "loss_sent": 0.21343553066253662, - "loss_sod": 0.0346207469701767, - "loss_total": 0.4384302496910095, - "step": 343299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.5341449975967407, - "learning_rate": 3.2027198566072115e-06, - "loss": 0.4192, - "step": 343300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.255621433258057, - "loss_rtd": 0.1636127382516861, - "loss_sent": 0.0005237420555204153, - "loss_sod": 0.02681725099682808, - "loss_total": 0.19095373153686523, - "step": 343399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.464029788970947, - "loss_rtd": 0.1909935027360916, - "loss_sent": 2.4588471205788665e-05, - "loss_sod": 0.13042181730270386, - "loss_total": 0.3214398920536041, - "step": 343399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.9985593557357788, - "learning_rate": 3.1915545860719265e-06, - "loss": 0.4172, - "step": 343400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.821273326873779, - "loss_rtd": 0.2242823988199234, - "loss_sent": 0.08130759000778198, - "loss_sod": 0.007630887441337109, - "loss_total": 0.3132208585739136, - "step": 343499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.664610862731934, - "loss_rtd": 0.197879821062088, - "loss_sent": 0.032269980758428574, - "loss_sod": 0.040124744176864624, - "loss_total": 0.2702745497226715, - "step": 343499 - }, - { - "epoch": 0.017, - "grad_norm": 0.8526556491851807, - "learning_rate": 3.180408169972865e-06, - "loss": 0.4159, - "step": 343500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.924649715423584, - "loss_rtd": 0.21138909459114075, - "loss_sent": 0.10157202929258347, - "loss_sod": 0.04640458524227142, - "loss_total": 0.35936570167541504, - "step": 343599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.885820388793945, - "loss_rtd": 0.21065209805965424, - "loss_sent": 0.4478943347930908, - "loss_sod": 0.0231836698949337, - "loss_total": 0.6817300915718079, - "step": 343599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.8805345296859741, - "learning_rate": 3.1692806127997853e-06, - "loss": 0.4183, - "step": 343600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.767274379730225, - "loss_rtd": 0.19500966370105743, - "loss_sent": 0.18620166182518005, - "loss_sod": 0.033180274069309235, - "loss_total": 0.4143916070461273, - "step": 343699 - }, - { - "epoch": 0.017398, - "loss_gen": 6.134350299835205, - "loss_rtd": 0.17665618658065796, - "loss_sent": 0.1611063927412033, - "loss_sod": 0.07501335442066193, - "loss_total": 0.4127759337425232, - "step": 343699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.3646003007888794, - "learning_rate": 3.1581719190348745e-06, - "loss": 0.4211, - "step": 343700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.61315393447876, - "loss_rtd": 0.20061041414737701, - "loss_sent": 0.34774741530418396, - "loss_sod": 0.0024513767566531897, - "loss_total": 0.5508092045783997, - "step": 343799 - }, - { - "epoch": 0.017598, - "loss_gen": 6.274115562438965, - "loss_rtd": 0.2139493077993393, - "loss_sent": 0.423194944858551, - "loss_sod": 0.06745512783527374, - "loss_total": 0.7045993804931641, - "step": 343799 - }, - { - "epoch": 0.0176, - "grad_norm": 3.2399985790252686, - "learning_rate": 3.1470820931527146e-06, - "loss": 0.4171, - "step": 343800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.903570652008057, - "loss_rtd": 0.19082964956760406, - "loss_sent": 0.27847811579704285, - "loss_sod": 0.0468842014670372, - "loss_total": 0.5161919593811035, - "step": 343899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.536373138427734, - "loss_rtd": 0.18130025267601013, - "loss_sent": 0.025892140343785286, - "loss_sod": 0.14663097262382507, - "loss_total": 0.35382336378097534, - "step": 343899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.5265181064605713, - "learning_rate": 3.136011139620293e-06, - "loss": 0.4298, - "step": 343900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.869435787200928, - "loss_rtd": 0.23094557225704193, - "loss_sent": 0.19800937175750732, - "loss_sod": 0.03712037205696106, - "loss_total": 0.4660753011703491, - "step": 343999 - }, - { - "epoch": 0.017998, - "loss_gen": 6.170574188232422, - "loss_rtd": 0.21233905851840973, - "loss_sent": 0.16728414595127106, - "loss_sod": 0.05359148979187012, - "loss_total": 0.4332146644592285, - "step": 343999 - }, - { - "epoch": 0.018, - "grad_norm": 1.372086524963379, - "learning_rate": 3.1249590628969703e-06, - "loss": 0.4355, - "step": 344000 - }, - { - "epoch": 0.018, - "eval_loss": 0.3993147313594818, - "eval_runtime": 150.0789, - "eval_samples_per_second": 102.899, - "eval_steps_per_second": 0.806, - "step": 344000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.801217079162598, - "loss_rtd": 0.2368241250514984, - "loss_sent": 0.20728328824043274, - "loss_sod": 0.08091040700674057, - "loss_total": 0.5250178575515747, - "step": 344099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.689062595367432, - "loss_rtd": 0.23044942319393158, - "loss_sent": 0.11517254263162613, - "loss_sod": 0.029528971761465073, - "loss_total": 0.3751509189605713, - "step": 344099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.1511733531951904, - "learning_rate": 3.1139258674345307e-06, - "loss": 0.423, - "step": 344100 - }, - { - "epoch": 0.018398, - "loss_gen": 5.80804443359375, - "loss_rtd": 0.23635703325271606, - "loss_sent": 0.3243516981601715, - "loss_sod": 0.04271318018436432, - "loss_total": 0.6034219264984131, - "step": 344199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.5195794105529785, - "loss_rtd": 0.21610403060913086, - "loss_sent": 0.19665342569351196, - "loss_sod": 0.008008423261344433, - "loss_total": 0.42076587677001953, - "step": 344199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.282293677330017, - "learning_rate": 3.102911557677152e-06, - "loss": 0.42, - "step": 344200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.815941333770752, - "loss_rtd": 0.19316354393959045, - "loss_sent": 0.20119412243366241, - "loss_sod": 0.10021115094423294, - "loss_total": 0.4945688247680664, - "step": 344299 - }, - { - "epoch": 0.018598, - "loss_gen": 6.013330459594727, - "loss_rtd": 0.21986351907253265, - "loss_sent": 0.32029974460601807, - "loss_sod": 0.01828307844698429, - "loss_total": 0.5584463477134705, - "step": 344299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.2562536001205444, - "learning_rate": 3.0919161380613793e-06, - "loss": 0.4158, - "step": 344300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.973540782928467, - "loss_rtd": 0.21433651447296143, - "loss_sent": 0.32896122336387634, - "loss_sod": 0.04544921591877937, - "loss_total": 0.588746964931488, - "step": 344399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.3708815574646, - "loss_rtd": 0.17824068665504456, - "loss_sent": 0.03908013179898262, - "loss_sod": 0.0200839601457119, - "loss_total": 0.23740477859973907, - "step": 344399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.985539197921753, - "learning_rate": 3.0809396130161817e-06, - "loss": 0.427, - "step": 344400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.236983299255371, - "loss_rtd": 0.16330642998218536, - "loss_sent": 0.017720935866236687, - "loss_sod": 0.04442419111728668, - "loss_total": 0.22545155882835388, - "step": 344499 - }, - { - "epoch": 0.018998, - "loss_gen": 6.019647121429443, - "loss_rtd": 0.2066533863544464, - "loss_sent": 0.3200598657131195, - "loss_sod": 0.07714848965406418, - "loss_total": 0.6038617491722107, - "step": 344499 - }, - { - "epoch": 0.019, - "grad_norm": 1.3927303552627563, - "learning_rate": 3.0699819869628943e-06, - "loss": 0.4076, - "step": 344500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.909665584564209, - "loss_rtd": 0.2125091552734375, - "loss_sent": 0.10321272164583206, - "loss_sod": 0.07435595244169235, - "loss_total": 0.3900778293609619, - "step": 344599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.874019622802734, - "loss_rtd": 0.21441978216171265, - "loss_sent": 0.2573813199996948, - "loss_sod": 0.016887767240405083, - "loss_total": 0.4886888861656189, - "step": 344599 - }, - { - "epoch": 0.0192, - "grad_norm": 1.1322945356369019, - "learning_rate": 3.059043264315259e-06, - "loss": 0.4258, - "step": 344600 - }, - { - "epoch": 0.019398, - "loss_gen": 6.0817437171936035, - "loss_rtd": 0.2001039981842041, - "loss_sent": 0.14977571368217468, - "loss_sod": 0.03723203018307686, - "loss_total": 0.38711172342300415, - "step": 344699 - }, - { - "epoch": 0.019398, - "loss_gen": 6.110212802886963, - "loss_rtd": 0.21464529633522034, - "loss_sent": 0.11174482852220535, - "loss_sod": 0.05072154104709625, - "loss_total": 0.37711167335510254, - "step": 344699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.9789242744445801, - "learning_rate": 3.0481234494793786e-06, - "loss": 0.3965, - "step": 344700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.1890058517456055, - "loss_rtd": 0.17944465577602386, - "loss_sent": 2.522169961594045e-05, - "loss_sod": 0.069935142993927, - "loss_total": 0.24940502643585205, - "step": 344799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.619328498840332, - "loss_rtd": 0.18111827969551086, - "loss_sent": 0.0649733766913414, - "loss_sod": 0.006838769651949406, - "loss_total": 0.2529304325580597, - "step": 344799 - }, - { - "epoch": 0.0196, - "grad_norm": 0.6171516180038452, - "learning_rate": 3.0372225468537518e-06, - "loss": 0.389, - "step": 344800 - }, - { - "epoch": 0.019798, - "loss_gen": 6.050124168395996, - "loss_rtd": 0.22084671258926392, - "loss_sent": 0.23997093737125397, - "loss_sod": 0.016547055914998055, - "loss_total": 0.477364718914032, - "step": 344899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.950810432434082, - "loss_rtd": 0.20542003214359283, - "loss_sent": 0.1829788237810135, - "loss_sod": 0.017055897042155266, - "loss_total": 0.40545475482940674, - "step": 344899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.7295876741409302, - "learning_rate": 3.026340560829272e-06, - "loss": 0.4319, - "step": 344900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.952529430389404, - "loss_rtd": 0.22140717506408691, - "loss_sent": 0.1345573514699936, - "loss_sod": 0.05161639675498009, - "loss_total": 0.4075809121131897, - "step": 344999 - }, - { - "epoch": 0.019998, - "loss_gen": 6.1175336837768555, - "loss_rtd": 0.20115554332733154, - "loss_sent": 0.14179235696792603, - "loss_sod": 0.06289678812026978, - "loss_total": 0.40584468841552734, - "step": 344999 - }, - { - "epoch": 0.02, - "grad_norm": 1.2358646392822266, - "learning_rate": 3.015477495789204e-06, - "loss": 0.4123, - "step": 345000 - }, - { - "epoch": 0.02, - "eval_loss": 0.3962290287017822, - "eval_runtime": 151.7704, - "eval_samples_per_second": 101.752, - "eval_steps_per_second": 0.797, - "step": 345000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.8342156410217285, - "loss_rtd": 0.2033233940601349, - "loss_sent": 0.17222349345684052, - "loss_sod": 0.08103340864181519, - "loss_total": 0.4565802812576294, - "step": 345099 - }, - { - "epoch": 0.020198, - "loss_gen": 6.0856852531433105, - "loss_rtd": 0.2134939283132553, - "loss_sent": 0.10274343192577362, - "loss_sod": 0.14162708818912506, - "loss_total": 0.4578644633293152, - "step": 345099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.2765001058578491, - "learning_rate": 3.004633356109171e-06, - "loss": 0.4355, - "step": 345100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.485940456390381, - "loss_rtd": 0.17827960848808289, - "loss_sent": 0.012701097875833511, - "loss_sod": 0.04595581442117691, - "loss_total": 0.2369365096092224, - "step": 345199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.916727066040039, - "loss_rtd": 0.21812865138053894, - "loss_sent": 0.366161584854126, - "loss_sod": 0.05725693702697754, - "loss_total": 0.6415472030639648, - "step": 345199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.2278943061828613, - "learning_rate": 2.993808146157201e-06, - "loss": 0.4141, - "step": 345200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.699069976806641, - "loss_rtd": 0.1663743555545807, - "loss_sent": 0.06079315394163132, - "loss_sod": 0.039484769105911255, - "loss_total": 0.26665228605270386, - "step": 345299 - }, - { - "epoch": 0.020598, - "loss_gen": 6.088677883148193, - "loss_rtd": 0.19248007237911224, - "loss_sent": 0.3041754961013794, - "loss_sod": 0.08131252974271774, - "loss_total": 0.5779681205749512, - "step": 345299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.2986756563186646, - "learning_rate": 2.9830018702936946e-06, - "loss": 0.4206, - "step": 345300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.754996299743652, - "loss_rtd": 0.22188951075077057, - "loss_sent": 0.13857239484786987, - "loss_sod": 0.02905525080859661, - "loss_total": 0.3895171582698822, - "step": 345399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.5441813468933105, - "loss_rtd": 0.22319650650024414, - "loss_sent": 0.10199989378452301, - "loss_sod": 0.017508793622255325, - "loss_total": 0.3427051901817322, - "step": 345399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.043805480003357, - "learning_rate": 2.9722145328714147e-06, - "loss": 0.3906, - "step": 345400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.86095666885376, - "loss_rtd": 0.20533639192581177, - "loss_sent": 0.3244718611240387, - "loss_sod": 0.0008062995038926601, - "loss_total": 0.5306145548820496, - "step": 345499 - }, - { - "epoch": 0.020998, - "loss_gen": 6.024995803833008, - "loss_rtd": 0.19126074016094208, - "loss_sent": 0.1506563425064087, - "loss_sod": 0.04864851012825966, - "loss_total": 0.39056557416915894, - "step": 345499 - }, - { - "epoch": 0.021, - "grad_norm": 1.12796151638031, - "learning_rate": 2.961446138235491e-06, - "loss": 0.4078, - "step": 345500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.854345321655273, - "loss_rtd": 0.1963692456483841, - "loss_sent": 0.35575902462005615, - "loss_sod": 0.013423793949186802, - "loss_total": 0.5655520558357239, - "step": 345599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.773720741271973, - "loss_rtd": 0.2145538032054901, - "loss_sent": 0.12099415808916092, - "loss_sod": 0.03502482548356056, - "loss_total": 0.3705727756023407, - "step": 345599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.850409209728241, - "learning_rate": 2.950696690723437e-06, - "loss": 0.4429, - "step": 345600 - }, - { - "epoch": 0.021398, - "loss_gen": 6.223632335662842, - "loss_rtd": 0.21802760660648346, - "loss_sent": 0.28415265679359436, - "loss_sod": 0.02522401139140129, - "loss_total": 0.5274043083190918, - "step": 345699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.781085014343262, - "loss_rtd": 0.2077065408229828, - "loss_sent": 0.15649068355560303, - "loss_sod": 0.05073114484548569, - "loss_total": 0.4149283766746521, - "step": 345699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.5991970300674438, - "learning_rate": 2.939966194665139e-06, - "loss": 0.4119, - "step": 345700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.400541305541992, - "loss_rtd": 0.15435712039470673, - "loss_sent": 0.0008989697089418769, - "loss_sod": 0.17497660219669342, - "loss_total": 0.3302326798439026, - "step": 345799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.4629316329956055, - "loss_rtd": 0.17461073398590088, - "loss_sent": 0.00023651798255741596, - "loss_sod": 0.053559303283691406, - "loss_total": 0.22840654850006104, - "step": 345799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.1368045806884766, - "learning_rate": 2.929254654382818e-06, - "loss": 0.3953, - "step": 345800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.579807281494141, - "loss_rtd": 0.1817014068365097, - "loss_sent": 0.03756831958889961, - "loss_sod": 0.028328150510787964, - "loss_total": 0.24759787321090698, - "step": 345899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.854094505310059, - "loss_rtd": 0.20325829088687897, - "loss_sent": 0.22642068564891815, - "loss_sod": 0.11384735256433487, - "loss_total": 0.5435263514518738, - "step": 345899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.1834964752197266, - "learning_rate": 2.9185620741911e-06, - "loss": 0.4, - "step": 345900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.6838178634643555, - "loss_rtd": 0.2041275054216385, - "loss_sent": 0.45005539059638977, - "loss_sod": 0.013449644669890404, - "loss_total": 0.667632520198822, - "step": 345999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.546184539794922, - "loss_rtd": 0.20533013343811035, - "loss_sent": 0.13110969960689545, - "loss_sod": 0.013098989613354206, - "loss_total": 0.34953880310058594, - "step": 345999 - }, - { - "epoch": 0.022, - "grad_norm": 2.9552416801452637, - "learning_rate": 2.907888458396946e-06, - "loss": 0.4146, - "step": 346000 - }, - { - "epoch": 0.022, - "eval_loss": 0.394890695810318, - "eval_runtime": 150.3967, - "eval_samples_per_second": 102.682, - "eval_steps_per_second": 0.805, - "step": 346000 - }, - { - "epoch": 0.022198, - "loss_gen": 6.177165508270264, - "loss_rtd": 0.20960292220115662, - "loss_sent": 0.22950461506843567, - "loss_sod": 0.015612190589308739, - "loss_total": 0.4547197222709656, - "step": 346099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.956109046936035, - "loss_rtd": 0.18064485490322113, - "loss_sent": 0.06233496963977814, - "loss_sod": 0.015554279088973999, - "loss_total": 0.25853410363197327, - "step": 346099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.1283988952636719, - "learning_rate": 2.8972338112996933e-06, - "loss": 0.4164, - "step": 346100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.784541130065918, - "loss_rtd": 0.19813288748264313, - "loss_sent": 0.2804802358150482, - "loss_sod": 0.10672664642333984, - "loss_total": 0.5853397846221924, - "step": 346199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.828537940979004, - "loss_rtd": 0.1991063356399536, - "loss_sent": 0.13255983591079712, - "loss_sod": 0.02511822059750557, - "loss_total": 0.3567844033241272, - "step": 346199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.2302149534225464, - "learning_rate": 2.886598137191021e-06, - "loss": 0.4232, - "step": 346200 - }, - { - "epoch": 0.022598, - "loss_gen": 6.1805195808410645, - "loss_rtd": 0.1977512389421463, - "loss_sent": 0.053855400532484055, - "loss_sod": 0.10048055648803711, - "loss_total": 0.3520871698856354, - "step": 346299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.240399360656738, - "loss_rtd": 0.1770283430814743, - "loss_sent": 0.00022501042985823005, - "loss_sod": 0.07942786812782288, - "loss_total": 0.2566812038421631, - "step": 346299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.9154486656188965, - "learning_rate": 2.8759814403549857e-06, - "loss": 0.418, - "step": 346300 - }, - { - "epoch": 0.022798, - "loss_gen": 6.037413120269775, - "loss_rtd": 0.2178463488817215, - "loss_sent": 0.08771871775388718, - "loss_sod": 0.10883640497922897, - "loss_total": 0.41440147161483765, - "step": 346399 - }, - { - "epoch": 0.022798, - "loss_gen": 6.080913066864014, - "loss_rtd": 0.20475168526172638, - "loss_sent": 0.10265511274337769, - "loss_sod": 0.05653534084558487, - "loss_total": 0.36394214630126953, - "step": 346399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.0961288213729858, - "learning_rate": 2.8653837250679992e-06, - "loss": 0.4216, - "step": 346400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.868289470672607, - "loss_rtd": 0.18100692331790924, - "loss_sent": 0.22061115503311157, - "loss_sod": 0.008930054493248463, - "loss_total": 0.4105481505393982, - "step": 346499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.729867458343506, - "loss_rtd": 0.22023260593414307, - "loss_sent": 0.17562322318553925, - "loss_sod": 0.0014352818252518773, - "loss_total": 0.3972911238670349, - "step": 346499 - }, - { - "epoch": 0.023, - "grad_norm": 0.720500648021698, - "learning_rate": 2.8548049955988034e-06, - "loss": 0.4191, - "step": 346500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.30037260055542, - "loss_rtd": 0.1821168065071106, - "loss_sent": 0.018517278134822845, - "loss_sod": 0.06972560286521912, - "loss_total": 0.27035969495773315, - "step": 346599 - }, - { - "epoch": 0.023198, - "loss_gen": 6.110710144042969, - "loss_rtd": 0.21628817915916443, - "loss_sent": 0.0536821223795414, - "loss_sod": 0.012368336319923401, - "loss_total": 0.2823386490345001, - "step": 346599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.7004936933517456, - "learning_rate": 2.8442452562085277e-06, - "loss": 0.4147, - "step": 346600 - }, - { - "epoch": 0.023398, - "loss_gen": 6.185731887817383, - "loss_rtd": 0.2094772458076477, - "loss_sent": 0.33429858088493347, - "loss_sod": 0.0627126544713974, - "loss_total": 0.6064884662628174, - "step": 346699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.375607967376709, - "loss_rtd": 0.17778867483139038, - "loss_sent": 2.4312806999660097e-05, - "loss_sod": 0.14853551983833313, - "loss_total": 0.32634851336479187, - "step": 346699 - }, - { - "epoch": 0.0234, - "grad_norm": 1.469561219215393, - "learning_rate": 2.8337045111506143e-06, - "loss": 0.4143, - "step": 346700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.915907859802246, - "loss_rtd": 0.213593989610672, - "loss_sent": 0.1473378986120224, - "loss_sod": 0.004312505479902029, - "loss_total": 0.36524438858032227, - "step": 346799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.875881671905518, - "loss_rtd": 0.19377167522907257, - "loss_sent": 0.03378697484731674, - "loss_sod": 0.0711243748664856, - "loss_total": 0.2986830174922943, - "step": 346799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.2263453006744385, - "learning_rate": 2.823182764670884e-06, - "loss": 0.415, - "step": 346800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.8458099365234375, - "loss_rtd": 0.21698597073554993, - "loss_sent": 0.12918789684772491, - "loss_sod": 0.046139199286699295, - "loss_total": 0.39231306314468384, - "step": 346899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.901025772094727, - "loss_rtd": 0.22223611176013947, - "loss_sent": 0.43904101848602295, - "loss_sod": 0.07018986344337463, - "loss_total": 0.7314670085906982, - "step": 346899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.2484575510025024, - "learning_rate": 2.812680021007491e-06, - "loss": 0.4213, - "step": 346900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.7027058601379395, - "loss_rtd": 0.17953623831272125, - "loss_sent": 0.10065238922834396, - "loss_sod": 0.06401006132364273, - "loss_total": 0.34419870376586914, - "step": 346999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.867369174957275, - "loss_rtd": 0.19031651318073273, - "loss_sent": 0.13639628887176514, - "loss_sod": 0.061066266149282455, - "loss_total": 0.3877790570259094, - "step": 346999 - }, - { - "epoch": 0.024, - "grad_norm": 1.2584656476974487, - "learning_rate": 2.802196284390951e-06, - "loss": 0.3971, - "step": 347000 - }, - { - "epoch": 0.024, - "eval_loss": 0.40038397908210754, - "eval_runtime": 150.6489, - "eval_samples_per_second": 102.51, - "eval_steps_per_second": 0.803, - "step": 347000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.800816059112549, - "loss_rtd": 0.19879066944122314, - "loss_sent": 0.19041036069393158, - "loss_sod": 0.038729384541511536, - "loss_total": 0.42793041467666626, - "step": 347099 - }, - { - "epoch": 0.024198, - "loss_gen": 6.2220778465271, - "loss_rtd": 0.21360954642295837, - "loss_sent": 0.12894247472286224, - "loss_sod": 0.03946185111999512, - "loss_total": 0.38201385736465454, - "step": 347099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.9547300934791565, - "learning_rate": 2.7917315590440975e-06, - "loss": 0.4114, - "step": 347100 - }, - { - "epoch": 0.024398, - "loss_gen": 6.054231643676758, - "loss_rtd": 0.22664403915405273, - "loss_sent": 0.33643460273742676, - "loss_sod": 0.013595287688076496, - "loss_total": 0.5766739249229431, - "step": 347199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.922176837921143, - "loss_rtd": 0.19353726506233215, - "loss_sent": 0.14798879623413086, - "loss_sod": 0.011312786489725113, - "loss_total": 0.3528388440608978, - "step": 347199 - }, - { - "epoch": 0.0244, - "grad_norm": 1.0335471630096436, - "learning_rate": 2.7812858491821305e-06, - "loss": 0.4203, - "step": 347200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.435997009277344, - "loss_rtd": 0.16638603806495667, - "loss_sent": 0.041973795741796494, - "loss_sod": 0.014071225188672543, - "loss_total": 0.22243106365203857, - "step": 347299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.869622230529785, - "loss_rtd": 0.2162979245185852, - "loss_sent": 0.17184807360172272, - "loss_sod": 0.0443805567920208, - "loss_total": 0.43252652883529663, - "step": 347299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.9300585985183716, - "learning_rate": 2.7708591590125786e-06, - "loss": 0.4075, - "step": 347300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.8696818351745605, - "loss_rtd": 0.19919665157794952, - "loss_sent": 0.11572212725877762, - "loss_sod": 0.037589769810438156, - "loss_total": 0.352508544921875, - "step": 347399 - }, - { - "epoch": 0.024798, - "loss_gen": 6.15183162689209, - "loss_rtd": 0.2178945243358612, - "loss_sent": 0.16867373883724213, - "loss_sod": 0.0836891382932663, - "loss_total": 0.47025740146636963, - "step": 347399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.7121327519416809, - "learning_rate": 2.7604514927353e-06, - "loss": 0.391, - "step": 347400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.5167412757873535, - "loss_rtd": 0.21066704392433167, - "loss_sent": 0.05076691880822182, - "loss_sod": 0.02963225170969963, - "loss_total": 0.2910662293434143, - "step": 347499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.308770656585693, - "loss_rtd": 0.1771867871284485, - "loss_sent": 2.2852533220429905e-05, - "loss_sod": 0.1340395212173462, - "loss_total": 0.31124916672706604, - "step": 347499 - }, - { - "epoch": 0.025, - "grad_norm": 1.2258532047271729, - "learning_rate": 2.7500628545425177e-06, - "loss": 0.4178, - "step": 347500 - }, - { - "epoch": 0.025198, - "loss_gen": 6.061978816986084, - "loss_rtd": 0.21013672649860382, - "loss_sent": 0.22456708550453186, - "loss_sod": 0.07185360789299011, - "loss_total": 0.5065574645996094, - "step": 347599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.73763370513916, - "loss_rtd": 0.19905613362789154, - "loss_sent": 2.395332558080554e-05, - "loss_sod": 0.0841599628329277, - "loss_total": 0.28324005007743835, - "step": 347599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.3880962133407593, - "learning_rate": 2.7396932486187634e-06, - "loss": 0.4199, - "step": 347600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.509828567504883, - "loss_rtd": 0.1791846752166748, - "loss_sent": 2.3359158149105497e-05, - "loss_sod": 0.05962657183408737, - "loss_total": 0.23883461952209473, - "step": 347699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.709102630615234, - "loss_rtd": 0.18945716321468353, - "loss_sent": 0.09810947626829147, - "loss_sod": 0.01001054234802723, - "loss_total": 0.2975771725177765, - "step": 347699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.7576766014099121, - "learning_rate": 2.7293426791409228e-06, - "loss": 0.4236, - "step": 347700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.762884616851807, - "loss_rtd": 0.20979386568069458, - "loss_sent": 0.08806712180376053, - "loss_sod": 0.05111350864171982, - "loss_total": 0.3489744961261749, - "step": 347799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.711957931518555, - "loss_rtd": 0.19799841940402985, - "loss_sent": 0.07584696263074875, - "loss_sod": 0.07980622351169586, - "loss_total": 0.35365161299705505, - "step": 347799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.0089797973632812, - "learning_rate": 2.719011150278189e-06, - "loss": 0.4078, - "step": 347800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.955827713012695, - "loss_rtd": 0.20267730951309204, - "loss_sent": 0.20192524790763855, - "loss_sod": 0.06518060714006424, - "loss_total": 0.46978315711021423, - "step": 347899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.790525913238525, - "loss_rtd": 0.18522995710372925, - "loss_sent": 0.4387192726135254, - "loss_sod": 0.07361598312854767, - "loss_total": 0.6975651979446411, - "step": 347899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.8381214141845703, - "learning_rate": 2.7086986661921164e-06, - "loss": 0.4022, - "step": 347900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.7063164710998535, - "loss_rtd": 0.20931647717952728, - "loss_sent": 0.5352754592895508, - "loss_sod": 0.0051032924093306065, - "loss_total": 0.7496952414512634, - "step": 347999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.9625701904296875, - "loss_rtd": 0.2113124579191208, - "loss_sent": 0.09579739719629288, - "loss_sod": 0.0391378290951252, - "loss_total": 0.34624767303466797, - "step": 347999 - }, - { - "epoch": 0.026, - "grad_norm": 1.46800696849823, - "learning_rate": 2.6984052310365604e-06, - "loss": 0.4039, - "step": 348000 - }, - { - "epoch": 0.026, - "eval_loss": 0.3929156959056854, - "eval_runtime": 150.1285, - "eval_samples_per_second": 102.865, - "eval_steps_per_second": 0.806, - "step": 348000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.764568328857422, - "loss_rtd": 0.21054331958293915, - "loss_sent": 0.15424807369709015, - "loss_sod": 0.05876941233873367, - "loss_total": 0.4235607981681824, - "step": 348099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.832766532897949, - "loss_rtd": 0.22070473432540894, - "loss_sent": 0.26257625222206116, - "loss_sod": 0.001788466819562018, - "loss_total": 0.4850694537162781, - "step": 348099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.9740235805511475, - "learning_rate": 2.688130848957726e-06, - "loss": 0.4014, - "step": 348100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.914451599121094, - "loss_rtd": 0.22258107364177704, - "loss_sent": 0.547576367855072, - "loss_sod": 0.03981813043355942, - "loss_total": 0.8099755644798279, - "step": 348199 - }, - { - "epoch": 0.026398, - "loss_gen": 6.155699253082275, - "loss_rtd": 0.23639720678329468, - "loss_sent": 0.14518557488918304, - "loss_sod": 0.11732011288404465, - "loss_total": 0.49890291690826416, - "step": 348199 - }, - { - "epoch": 0.0264, - "grad_norm": 2.6284737586975098, - "learning_rate": 2.6778755240941256e-06, - "loss": 0.4104, - "step": 348200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.842240333557129, - "loss_rtd": 0.20628581941127777, - "loss_sent": 0.024945596233010292, - "loss_sod": 0.07663668692111969, - "loss_total": 0.307868093252182, - "step": 348299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.494091033935547, - "loss_rtd": 0.16447770595550537, - "loss_sent": 2.5635152269387618e-05, - "loss_sod": 0.15456324815750122, - "loss_total": 0.31906658411026, - "step": 348299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.1853891611099243, - "learning_rate": 2.6676392605766043e-06, - "loss": 0.4373, - "step": 348300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.845241069793701, - "loss_rtd": 0.20244090259075165, - "loss_sent": 0.3507749140262604, - "loss_sod": 0.01659446582198143, - "loss_total": 0.5698102712631226, - "step": 348399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.945862293243408, - "loss_rtd": 0.20262795686721802, - "loss_sent": 0.23626303672790527, - "loss_sod": 0.03078293427824974, - "loss_total": 0.46967393159866333, - "step": 348399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.5612776279449463, - "learning_rate": 2.6574220625283253e-06, - "loss": 0.3915, - "step": 348400 - }, - { - "epoch": 0.026998, - "loss_gen": 6.285218715667725, - "loss_rtd": 0.22945177555084229, - "loss_sent": 0.058136165142059326, - "loss_sod": 0.09110814332962036, - "loss_total": 0.378696084022522, - "step": 348499 - }, - { - "epoch": 0.026998, - "loss_gen": 6.327558994293213, - "loss_rtd": 0.20909154415130615, - "loss_sent": 0.12776273488998413, - "loss_sod": 0.0658029168844223, - "loss_total": 0.4026572108268738, - "step": 348499 - }, - { - "epoch": 0.027, - "grad_norm": 1.0374902486801147, - "learning_rate": 2.647223934064791e-06, - "loss": 0.4079, - "step": 348500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.985374450683594, - "loss_rtd": 0.224440336227417, - "loss_sent": 0.20375190675258636, - "loss_sod": 0.13233348727226257, - "loss_total": 0.5605257153511047, - "step": 348599 - }, - { - "epoch": 0.027198, - "loss_gen": 6.076707363128662, - "loss_rtd": 0.2122603803873062, - "loss_sent": 0.17057929933071136, - "loss_sod": 0.1456124633550644, - "loss_total": 0.5284521579742432, - "step": 348599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.492030143737793, - "learning_rate": 2.6370448792937817e-06, - "loss": 0.429, - "step": 348600 - }, - { - "epoch": 0.027398, - "loss_gen": 6.192418098449707, - "loss_rtd": 0.2122466266155243, - "loss_sent": 0.03144949674606323, - "loss_sod": 0.05586479976773262, - "loss_total": 0.29956093430519104, - "step": 348699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.737111568450928, - "loss_rtd": 0.22504131495952606, - "loss_sent": 0.16083656251430511, - "loss_sod": 0.0524526983499527, - "loss_total": 0.43833059072494507, - "step": 348699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.9526489973068237, - "learning_rate": 2.6268849023154294e-06, - "loss": 0.4264, - "step": 348700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.714515686035156, - "loss_rtd": 0.215691477060318, - "loss_sent": 0.21480238437652588, - "loss_sod": 0.017601560801267624, - "loss_total": 0.448095440864563, - "step": 348799 - }, - { - "epoch": 0.027598, - "loss_gen": 6.153249263763428, - "loss_rtd": 0.20012231171131134, - "loss_sent": 0.21374326944351196, - "loss_sod": 0.12404444068670273, - "loss_total": 0.537909984588623, - "step": 348799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.3342827558517456, - "learning_rate": 2.6167440072221826e-06, - "loss": 0.4333, - "step": 348800 - }, - { - "epoch": 0.027798, - "loss_gen": 6.138761043548584, - "loss_rtd": 0.20648187398910522, - "loss_sent": 0.20477847754955292, - "loss_sod": 0.019451338797807693, - "loss_total": 0.43071168661117554, - "step": 348899 - }, - { - "epoch": 0.027798, - "loss_gen": 6.130977630615234, - "loss_rtd": 0.2113247811794281, - "loss_sent": 0.20160768926143646, - "loss_sod": 0.04805793613195419, - "loss_total": 0.46099042892456055, - "step": 348899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.7857123613357544, - "learning_rate": 2.606622198098774e-06, - "loss": 0.3957, - "step": 348900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.957788467407227, - "loss_rtd": 0.21118159592151642, - "loss_sent": 0.10581184178590775, - "loss_sod": 0.11430491507053375, - "loss_total": 0.4312983453273773, - "step": 348999 - }, - { - "epoch": 0.027998, - "loss_gen": 6.248266696929932, - "loss_rtd": 0.21391551196575165, - "loss_sent": 0.1562371701002121, - "loss_sod": 0.04114203900098801, - "loss_total": 0.41129469871520996, - "step": 348999 - }, - { - "epoch": 0.028, - "grad_norm": 0.9734150171279907, - "learning_rate": 2.59651947902228e-06, - "loss": 0.4003, - "step": 349000 - }, - { - "epoch": 0.028, - "eval_loss": 0.3954238295555115, - "eval_runtime": 150.5644, - "eval_samples_per_second": 102.567, - "eval_steps_per_second": 0.804, - "step": 349000 - }, - { - "epoch": 0.028198, - "loss_gen": 6.046821594238281, - "loss_rtd": 0.21712510287761688, - "loss_sent": 0.1850036084651947, - "loss_sod": 0.05440608412027359, - "loss_total": 0.45653480291366577, - "step": 349099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.506927013397217, - "loss_rtd": 0.1919369101524353, - "loss_sent": 0.006589873693883419, - "loss_sod": 0.13269713521003723, - "loss_total": 0.33122390508651733, - "step": 349099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.9546361565589905, - "learning_rate": 2.586435854062069e-06, - "loss": 0.4144, - "step": 349100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.589245796203613, - "loss_rtd": 0.17651744186878204, - "loss_sent": 2.693514215934556e-05, - "loss_sod": 0.2369680106639862, - "loss_total": 0.4135124087333679, - "step": 349199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.500578880310059, - "loss_rtd": 0.15838541090488434, - "loss_sent": 0.015942055732011795, - "loss_sod": 0.0537986196577549, - "loss_total": 0.22812607884407043, - "step": 349199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.1543065309524536, - "learning_rate": 2.5763713272798363e-06, - "loss": 0.4299, - "step": 349200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.8040385246276855, - "loss_rtd": 0.20990204811096191, - "loss_sent": 0.22593101859092712, - "loss_sod": 0.02127135917544365, - "loss_total": 0.4571044445037842, - "step": 349299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.805685520172119, - "loss_rtd": 0.2091977447271347, - "loss_sent": 0.13367465138435364, - "loss_sod": 0.19740842282772064, - "loss_total": 0.540280818939209, - "step": 349299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.1336307525634766, - "learning_rate": 2.5663259027295495e-06, - "loss": 0.426, - "step": 349300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.842103004455566, - "loss_rtd": 0.21626141667366028, - "loss_sent": 0.2969825565814972, - "loss_sod": 0.09903450310230255, - "loss_total": 0.6122784614562988, - "step": 349399 - }, - { - "epoch": 0.028798, - "loss_gen": 6.108442783355713, - "loss_rtd": 0.21364839375019073, - "loss_sent": 0.380411833524704, - "loss_sod": 0.10154962539672852, - "loss_total": 0.6956098079681396, - "step": 349399 - }, - { - "epoch": 0.0288, - "grad_norm": 2.2528610229492188, - "learning_rate": 2.5562995844575165e-06, - "loss": 0.4325, - "step": 349400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.953249931335449, - "loss_rtd": 0.22787602245807648, - "loss_sent": 0.47932255268096924, - "loss_sod": 0.010822047479450703, - "loss_total": 0.7180206179618835, - "step": 349499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.906355381011963, - "loss_rtd": 0.24051526188850403, - "loss_sent": 0.18612606823444366, - "loss_sod": 0.02720388025045395, - "loss_total": 0.45384520292282104, - "step": 349499 - }, - { - "epoch": 0.029, - "grad_norm": 1.124613881111145, - "learning_rate": 2.5462923765023405e-06, - "loss": 0.4311, - "step": 349500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.871358871459961, - "loss_rtd": 0.21468381583690643, - "loss_sent": 0.23703357577323914, - "loss_sod": 0.01827329769730568, - "loss_total": 0.46999067068099976, - "step": 349599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.956061840057373, - "loss_rtd": 0.2228805273771286, - "loss_sent": 0.22619858384132385, - "loss_sod": 0.03803536295890808, - "loss_total": 0.48711445927619934, - "step": 349599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.0738983154296875, - "learning_rate": 2.5363042828949244e-06, - "loss": 0.4307, - "step": 349600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.824796676635742, - "loss_rtd": 0.19950264692306519, - "loss_sent": 0.1863393485546112, - "loss_sod": 0.030530648306012154, - "loss_total": 0.4163726568222046, - "step": 349699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.915545463562012, - "loss_rtd": 0.19600270688533783, - "loss_sent": 0.21444423496723175, - "loss_sod": 0.049449022859334946, - "loss_total": 0.4598959684371948, - "step": 349699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.9910133481025696, - "learning_rate": 2.5263353076584675e-06, - "loss": 0.4212, - "step": 349700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.945225715637207, - "loss_rtd": 0.20018666982650757, - "loss_sent": 0.30984848737716675, - "loss_sod": 0.01725754328072071, - "loss_total": 0.5272927284240723, - "step": 349799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.607385635375977, - "loss_rtd": 0.19031044840812683, - "loss_sent": 0.00022725979215465486, - "loss_sod": 0.09896920621395111, - "loss_total": 0.2895069122314453, - "step": 349799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.5268876552581787, - "learning_rate": 2.516385454808462e-06, - "loss": 0.4151, - "step": 349800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.659405708312988, - "loss_rtd": 0.22153975069522858, - "loss_sent": 0.23582425713539124, - "loss_sod": 0.020174628123641014, - "loss_total": 0.4775386452674866, - "step": 349899 - }, - { - "epoch": 0.029798, - "loss_gen": 6.096982002258301, - "loss_rtd": 0.22046737372875214, - "loss_sent": 0.10626024752855301, - "loss_sod": 0.023365899920463562, - "loss_total": 0.3500935137271881, - "step": 349899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.1230626106262207, - "learning_rate": 2.5064547283527195e-06, - "loss": 0.4278, - "step": 349900 - }, - { - "epoch": 0.029998, - "loss_gen": 6.255684852600098, - "loss_rtd": 0.21020130813121796, - "loss_sent": 0.101527139544487, - "loss_sod": 0.10524416714906693, - "loss_total": 0.41697263717651367, - "step": 349999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.902994632720947, - "loss_rtd": 0.21985577046871185, - "loss_sent": 0.16969870030879974, - "loss_sod": 0.01671951822936535, - "loss_total": 0.4062739908695221, - "step": 349999 - }, - { - "epoch": 0.03, - "grad_norm": 0.980015218257904, - "learning_rate": 2.4965431322913403e-06, - "loss": 0.4131, - "step": 350000 - }, - { - "epoch": 0.03, - "eval_loss": 0.3928295075893402, - "eval_runtime": 150.3192, - "eval_samples_per_second": 102.735, - "eval_steps_per_second": 0.805, - "step": 350000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.98183536529541, - "loss_rtd": 0.2173718810081482, - "loss_sent": 0.25578585267066956, - "loss_sod": 0.032859232276678085, - "loss_total": 0.5060169696807861, - "step": 350099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.7258782386779785, - "loss_rtd": 0.2088528573513031, - "loss_sent": 0.21010349690914154, - "loss_sod": 0.026808001101017, - "loss_total": 0.44576436281204224, - "step": 350099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.282133936882019, - "learning_rate": 2.4866506706167025e-06, - "loss": 0.4112, - "step": 350100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.903136730194092, - "loss_rtd": 0.20793114602565765, - "loss_sent": 0.3298494219779968, - "loss_sod": 0.019741952419281006, - "loss_total": 0.5575225353240967, - "step": 350199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.937349796295166, - "loss_rtd": 0.21580328047275543, - "loss_sent": 0.04803667217493057, - "loss_sod": 0.032336391508579254, - "loss_total": 0.29617634415626526, - "step": 350199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.0936003923416138, - "learning_rate": 2.4767773473134914e-06, - "loss": 0.4243, - "step": 350200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.933935642242432, - "loss_rtd": 0.23456254601478577, - "loss_sent": 0.2062845677137375, - "loss_sod": 0.09549696743488312, - "loss_total": 0.5363441109657288, - "step": 350299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.8825812339782715, - "loss_rtd": 0.21979853510856628, - "loss_sent": 0.15967939794063568, - "loss_sod": 0.03874390572309494, - "loss_total": 0.4182218313217163, - "step": 350299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.3687313795089722, - "learning_rate": 2.4669231663586867e-06, - "loss": 0.4157, - "step": 350300 - }, - { - "epoch": 0.030798, - "loss_gen": 6.029076099395752, - "loss_rtd": 0.22488151490688324, - "loss_sent": 0.26323971152305603, - "loss_sod": 0.010428966954350471, - "loss_total": 0.4985501766204834, - "step": 350399 - }, - { - "epoch": 0.030798, - "loss_gen": 6.057694911956787, - "loss_rtd": 0.20339101552963257, - "loss_sent": 0.28822627663612366, - "loss_sod": 0.10470068454742432, - "loss_total": 0.5963180065155029, - "step": 350399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.9787907600402832, - "learning_rate": 2.457088131721541e-06, - "loss": 0.435, - "step": 350400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.268889427185059, - "loss_rtd": 0.1550845503807068, - "loss_sent": 0.07245110720396042, - "loss_sod": 0.0402236171066761, - "loss_total": 0.2677592635154724, - "step": 350499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.932065486907959, - "loss_rtd": 0.20996759831905365, - "loss_sent": 0.5353861451148987, - "loss_sod": 0.04811495542526245, - "loss_total": 0.793468713760376, - "step": 350499 - }, - { - "epoch": 0.031, - "grad_norm": 1.6151803731918335, - "learning_rate": 2.447272247363608e-06, - "loss": 0.4297, - "step": 350500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.960935115814209, - "loss_rtd": 0.19832560420036316, - "loss_sent": 0.32032203674316406, - "loss_sod": 0.003909667953848839, - "loss_total": 0.5225573182106018, - "step": 350599 - }, - { - "epoch": 0.031198, - "loss_gen": 6.08972692489624, - "loss_rtd": 0.20870810747146606, - "loss_sent": 0.3987029194831848, - "loss_sod": 0.06763564050197601, - "loss_total": 0.6750466823577881, - "step": 350599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.2335033416748047, - "learning_rate": 2.43747551723873e-06, - "loss": 0.4207, - "step": 350600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.954829692840576, - "loss_rtd": 0.21568158268928528, - "loss_sent": 0.1402682065963745, - "loss_sod": 0.005869862157851458, - "loss_total": 0.36181965470314026, - "step": 350699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.8494367599487305, - "loss_rtd": 0.2147720456123352, - "loss_sent": 0.1354246884584427, - "loss_sod": 0.018866462633013725, - "loss_total": 0.36906319856643677, - "step": 350699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.8288174867630005, - "learning_rate": 2.4276979452930282e-06, - "loss": 0.4016, - "step": 350700 - }, - { - "epoch": 0.031598, - "loss_gen": 6.417673587799072, - "loss_rtd": 0.2109319418668747, - "loss_sent": 0.12982873618602753, - "loss_sod": 0.03536287695169449, - "loss_total": 0.3761235475540161, - "step": 350799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.627594947814941, - "loss_rtd": 0.1990581750869751, - "loss_sent": 0.17239323258399963, - "loss_sod": 0.016169602051377296, - "loss_total": 0.3876210153102875, - "step": 350799 - }, - { - "epoch": 0.0316, - "grad_norm": 0.8868272304534912, - "learning_rate": 2.4179395354649026e-06, - "loss": 0.4166, - "step": 350800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.561457633972168, - "loss_rtd": 0.1908586621284485, - "loss_sent": 0.030903350561857224, - "loss_sod": 0.08093588054180145, - "loss_total": 0.30269789695739746, - "step": 350899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.4618988037109375, - "loss_rtd": 0.1912572979927063, - "loss_sent": 0.01910628378391266, - "loss_sod": 0.11057379841804504, - "loss_total": 0.3209373950958252, - "step": 350899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.0488332509994507, - "learning_rate": 2.4082002916850366e-06, - "loss": 0.4223, - "step": 350900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.624969005584717, - "loss_rtd": 0.19831901788711548, - "loss_sent": 0.04244771599769592, - "loss_sod": 0.028687791898846626, - "loss_total": 0.2694545388221741, - "step": 350999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.983346462249756, - "loss_rtd": 0.21950097382068634, - "loss_sent": 0.23065710067749023, - "loss_sod": 0.03074844554066658, - "loss_total": 0.48090651631355286, - "step": 350999 - }, - { - "epoch": 0.032, - "grad_norm": 1.4386192560195923, - "learning_rate": 2.3984802178764086e-06, - "loss": 0.4238, - "step": 351000 - }, - { - "epoch": 0.032, - "eval_loss": 0.3988931179046631, - "eval_runtime": 150.6248, - "eval_samples_per_second": 102.526, - "eval_steps_per_second": 0.803, - "step": 351000 - }, - { - "epoch": 0.032198, - "loss_gen": 6.070059776306152, - "loss_rtd": 0.2153177559375763, - "loss_sent": 0.143410786986351, - "loss_sod": 0.019158367067575455, - "loss_total": 0.37788689136505127, - "step": 351099 - }, - { - "epoch": 0.032198, - "loss_gen": 5.941234111785889, - "loss_rtd": 0.19865795969963074, - "loss_sent": 0.07141967117786407, - "loss_sod": 0.02514990046620369, - "loss_total": 0.2952275276184082, - "step": 351099 - }, - { - "epoch": 0.0322, - "grad_norm": 0.7352594137191772, - "learning_rate": 2.3887793179542594e-06, - "loss": 0.4142, - "step": 351100 - }, - { - "epoch": 0.032398, - "loss_gen": 5.969297409057617, - "loss_rtd": 0.1960950493812561, - "loss_sent": 0.21834954619407654, - "loss_sod": 0.05671348795294762, - "loss_total": 0.47115808725357056, - "step": 351199 - }, - { - "epoch": 0.032398, - "loss_gen": 5.7469282150268555, - "loss_rtd": 0.20001927018165588, - "loss_sent": 0.27628093957901, - "loss_sod": 0.14189350605010986, - "loss_total": 0.6181937456130981, - "step": 351199 - }, - { - "epoch": 0.0324, - "grad_norm": 1.7035173177719116, - "learning_rate": 2.379097595826102e-06, - "loss": 0.404, - "step": 351200 - }, - { - "epoch": 0.032598, - "loss_gen": 5.7358880043029785, - "loss_rtd": 0.19336995482444763, - "loss_sent": 0.2013741284608841, - "loss_sod": 0.00435943529009819, - "loss_total": 0.3991035223007202, - "step": 351299 - }, - { - "epoch": 0.032598, - "loss_gen": 6.023784637451172, - "loss_rtd": 0.19719797372817993, - "loss_sent": 0.1325165033340454, - "loss_sod": 0.024149995297193527, - "loss_total": 0.35386449098587036, - "step": 351299 - }, - { - "epoch": 0.0326, - "grad_norm": 0.83186274766922, - "learning_rate": 2.369435055391733e-06, - "loss": 0.413, - "step": 351300 - }, - { - "epoch": 0.032798, - "loss_gen": 6.057023525238037, - "loss_rtd": 0.20856040716171265, - "loss_sent": 0.13905474543571472, - "loss_sod": 0.04135048761963844, - "loss_total": 0.3889656364917755, - "step": 351399 - }, - { - "epoch": 0.032798, - "loss_gen": 5.9625325202941895, - "loss_rtd": 0.22160308063030243, - "loss_sent": 0.286081999540329, - "loss_sod": 0.05469052866101265, - "loss_total": 0.5623756051063538, - "step": 351399 - }, - { - "epoch": 0.0328, - "grad_norm": 0.7798752188682556, - "learning_rate": 2.3597917005432346e-06, - "loss": 0.416, - "step": 351400 - }, - { - "epoch": 0.032998, - "loss_gen": 5.706137657165527, - "loss_rtd": 0.19809125363826752, - "loss_sent": 0.12273921072483063, - "loss_sod": 0.012218467891216278, - "loss_total": 0.333048939704895, - "step": 351499 - }, - { - "epoch": 0.032998, - "loss_gen": 5.9923224449157715, - "loss_rtd": 0.1937067210674286, - "loss_sent": 0.11819963902235031, - "loss_sod": 0.14596877992153168, - "loss_total": 0.45787513256073, - "step": 351499 - }, - { - "epoch": 0.033, - "grad_norm": 1.1767587661743164, - "learning_rate": 2.350167535164943e-06, - "loss": 0.4342, - "step": 351500 - }, - { - "epoch": 0.033198, - "loss_gen": 6.103427886962891, - "loss_rtd": 0.19066065549850464, - "loss_sent": 0.24041242897510529, - "loss_sod": 0.025873901322484016, - "loss_total": 0.4569469690322876, - "step": 351599 - }, - { - "epoch": 0.033198, - "loss_gen": 5.711111545562744, - "loss_rtd": 0.2296997308731079, - "loss_sent": 0.1315288245677948, - "loss_sod": 0.02059810236096382, - "loss_total": 0.3818266689777374, - "step": 351599 - }, - { - "epoch": 0.0332, - "grad_norm": 1.1959350109100342, - "learning_rate": 2.3405625631334694e-06, - "loss": 0.4119, - "step": 351600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.726678371429443, - "loss_rtd": 0.20519202947616577, - "loss_sent": 0.22255714237689972, - "loss_sod": 0.010626724921166897, - "loss_total": 0.43837589025497437, - "step": 351699 - }, - { - "epoch": 0.033398, - "loss_gen": 6.037045478820801, - "loss_rtd": 0.20537537336349487, - "loss_sent": 0.030526742339134216, - "loss_sod": 0.22692060470581055, - "loss_total": 0.46282273530960083, - "step": 351699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.103611946105957, - "learning_rate": 2.33097678831769e-06, - "loss": 0.409, - "step": 351700 - }, - { - "epoch": 0.033598, - "loss_gen": 5.16274356842041, - "loss_rtd": 0.16753192245960236, - "loss_sent": 2.2867430743644945e-05, - "loss_sod": 0.07413460314273834, - "loss_total": 0.24168939888477325, - "step": 351799 - }, - { - "epoch": 0.033598, - "loss_gen": 5.506666660308838, - "loss_rtd": 0.16758745908737183, - "loss_sent": 0.10821570456027985, - "loss_sod": 0.10490790009498596, - "loss_total": 0.38071107864379883, - "step": 351799 - }, - { - "epoch": 0.0336, - "grad_norm": 1.0368785858154297, - "learning_rate": 2.321410214578762e-06, - "loss": 0.3977, - "step": 351800 - }, - { - "epoch": 0.033798, - "loss_gen": 5.81887674331665, - "loss_rtd": 0.2052488923072815, - "loss_sent": 0.14953112602233887, - "loss_sod": 0.0201532281935215, - "loss_total": 0.37493324279785156, - "step": 351899 - }, - { - "epoch": 0.033798, - "loss_gen": 5.939299583435059, - "loss_rtd": 0.20820355415344238, - "loss_sent": 0.13707712292671204, - "loss_sod": 0.040300384163856506, - "loss_total": 0.3855810761451721, - "step": 351899 - }, - { - "epoch": 0.0338, - "grad_norm": 0.7562331557273865, - "learning_rate": 2.3118628457701076e-06, - "loss": 0.4186, - "step": 351900 - }, - { - "epoch": 0.033998, - "loss_gen": 6.217770576477051, - "loss_rtd": 0.20336152613162994, - "loss_sent": 0.22984600067138672, - "loss_sod": 0.12778106331825256, - "loss_total": 0.5609886050224304, - "step": 351999 - }, - { - "epoch": 0.033998, - "loss_gen": 5.971132755279541, - "loss_rtd": 0.21956539154052734, - "loss_sent": 0.16360871493816376, - "loss_sod": 0.009821237064898014, - "loss_total": 0.39299535751342773, - "step": 351999 - }, - { - "epoch": 0.034, - "grad_norm": 1.0129319429397583, - "learning_rate": 2.302334685737384e-06, - "loss": 0.4287, - "step": 352000 - }, - { - "epoch": 0.034, - "eval_loss": 0.3950361907482147, - "eval_runtime": 150.3199, - "eval_samples_per_second": 102.734, - "eval_steps_per_second": 0.805, - "step": 352000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.844827175140381, - "loss_rtd": 0.21237419545650482, - "loss_sent": 0.1966703236103058, - "loss_sod": 0.11328340321779251, - "loss_total": 0.5223278999328613, - "step": 352099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.695902347564697, - "loss_rtd": 0.21383731067180634, - "loss_sent": 0.34264758229255676, - "loss_sod": 0.005707689095288515, - "loss_total": 0.5621925592422485, - "step": 352099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.374636173248291, - "learning_rate": 2.292825738318549e-06, - "loss": 0.4182, - "step": 352100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.194711208343506, - "loss_rtd": 0.1682964414358139, - "loss_sent": 2.9674323741346598e-05, - "loss_sod": 0.13813243806362152, - "loss_total": 0.30645856261253357, - "step": 352199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.3243889808654785, - "loss_rtd": 0.18843649327754974, - "loss_sent": 0.056837670505046844, - "loss_sod": 0.015786737203598022, - "loss_total": 0.261060893535614, - "step": 352199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.9507030248641968, - "learning_rate": 2.283336007343806e-06, - "loss": 0.4197, - "step": 352200 - }, - { - "epoch": 0.000598, - "loss_gen": 6.032334804534912, - "loss_rtd": 0.208065003156662, - "loss_sent": 0.1603982001543045, - "loss_sod": 0.021933242678642273, - "loss_total": 0.39039644598960876, - "step": 352299 - }, - { - "epoch": 0.000598, - "loss_gen": 6.044798851013184, - "loss_rtd": 0.21557746827602386, - "loss_sent": 0.21242542564868927, - "loss_sod": 0.018241213634610176, - "loss_total": 0.44624412059783936, - "step": 352299 - }, - { - "epoch": 0.0006, - "grad_norm": 0.9371817708015442, - "learning_rate": 2.2738654966356022e-06, - "loss": 0.4366, - "step": 352300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.862653732299805, - "loss_rtd": 0.19908036291599274, - "loss_sent": 0.1968402862548828, - "loss_sod": 0.013897551223635674, - "loss_total": 0.40981820225715637, - "step": 352399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.86594820022583, - "loss_rtd": 0.2057826966047287, - "loss_sent": 0.1858564168214798, - "loss_sod": 0.03528302535414696, - "loss_total": 0.42692214250564575, - "step": 352399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.9980801939964294, - "learning_rate": 2.2644142100086753e-06, - "loss": 0.3959, - "step": 352400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.303768634796143, - "loss_rtd": 0.13790901005268097, - "loss_sent": 0.00020305546058807522, - "loss_sod": 0.07721947878599167, - "loss_total": 0.2153315395116806, - "step": 352499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.938932418823242, - "loss_rtd": 0.2166273444890976, - "loss_sent": 0.17042265832424164, - "loss_sod": 0.03659671172499657, - "loss_total": 0.4236466884613037, - "step": 352499 - }, - { - "epoch": 0.001, - "grad_norm": 1.0985387563705444, - "learning_rate": 2.2549821512699966e-06, - "loss": 0.413, - "step": 352500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.79124641418457, - "loss_rtd": 0.2195327877998352, - "loss_sent": 0.28755638003349304, - "loss_sod": 0.011121327057480812, - "loss_total": 0.5182105302810669, - "step": 352599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.831072807312012, - "loss_rtd": 0.21045763790607452, - "loss_sent": 0.03898901119828224, - "loss_sod": 0.01899530552327633, - "loss_total": 0.26844194531440735, - "step": 352599 - }, - { - "epoch": 0.0012, - "grad_norm": 1.124880075454712, - "learning_rate": 2.245569324218799e-06, - "loss": 0.4126, - "step": 352600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.486479759216309, - "loss_rtd": 0.17362302541732788, - "loss_sent": 0.0638706311583519, - "loss_sod": 0.05291808769106865, - "loss_total": 0.29041174054145813, - "step": 352699 - }, - { - "epoch": 0.001398, - "loss_gen": 6.121653079986572, - "loss_rtd": 0.19986870884895325, - "loss_sent": 0.10900358855724335, - "loss_sod": 0.07488373667001724, - "loss_total": 0.38375604152679443, - "step": 352699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.9247352480888367, - "learning_rate": 2.2361757326465614e-06, - "loss": 0.4079, - "step": 352700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.895353317260742, - "loss_rtd": 0.19956815242767334, - "loss_sent": 0.12468837201595306, - "loss_sod": 0.08447334170341492, - "loss_total": 0.4087298512458801, - "step": 352799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.469888687133789, - "loss_rtd": 0.17113961279392242, - "loss_sent": 0.043581195175647736, - "loss_sod": 0.04969732090830803, - "loss_total": 0.2644181251525879, - "step": 352799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.890491247177124, - "learning_rate": 2.2268013803370293e-06, - "loss": 0.4425, - "step": 352800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.819416522979736, - "loss_rtd": 0.2038039118051529, - "loss_sent": 0.3093571364879608, - "loss_sod": 0.0046736132353544235, - "loss_total": 0.5178346633911133, - "step": 352899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.836423873901367, - "loss_rtd": 0.19253765046596527, - "loss_sent": 0.132777601480484, - "loss_sod": 0.046197764575481415, - "loss_total": 0.3715130090713501, - "step": 352899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.1710330247879028, - "learning_rate": 2.217446271066187e-06, - "loss": 0.4359, - "step": 352900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.7525739669799805, - "loss_rtd": 0.18090245127677917, - "loss_sent": 0.101189523935318, - "loss_sod": 0.05926916375756264, - "loss_total": 0.3413611352443695, - "step": 352999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.707718849182129, - "loss_rtd": 0.21078839898109436, - "loss_sent": 0.12415298819541931, - "loss_sod": 0.047776564955711365, - "loss_total": 0.38271793723106384, - "step": 352999 - }, - { - "epoch": 0.002, - "grad_norm": 0.9399814009666443, - "learning_rate": 2.208110408602276e-06, - "loss": 0.417, - "step": 353000 - }, - { - "epoch": 0.002, - "eval_loss": 0.3918011486530304, - "eval_runtime": 152.6188, - "eval_samples_per_second": 101.187, - "eval_steps_per_second": 0.793, - "step": 353000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.581538200378418, - "loss_rtd": 0.1928052455186844, - "loss_sent": 0.0830656960606575, - "loss_sod": 0.030780553817749023, - "loss_total": 0.3066515028476715, - "step": 353099 - }, - { - "epoch": 0.002198, - "loss_gen": 6.365875720977783, - "loss_rtd": 0.21668121218681335, - "loss_sent": 0.12727312743663788, - "loss_sod": 0.06451734900474548, - "loss_total": 0.4084717035293579, - "step": 353099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.1897996664047241, - "learning_rate": 2.1987937967057783e-06, - "loss": 0.4132, - "step": 353100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.393984317779541, - "loss_rtd": 0.18165439367294312, - "loss_sent": 0.03973681107163429, - "loss_sod": 0.03042837604880333, - "loss_total": 0.25181958079338074, - "step": 353199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.648141384124756, - "loss_rtd": 0.18211771547794342, - "loss_sent": 0.004699456971138716, - "loss_sod": 0.1369915008544922, - "loss_total": 0.3238086700439453, - "step": 353199 - }, - { - "epoch": 0.0024, - "grad_norm": 0.7428009510040283, - "learning_rate": 2.1894964391294125e-06, - "loss": 0.4422, - "step": 353200 - }, - { - "epoch": 0.002598, - "loss_gen": 6.0232696533203125, - "loss_rtd": 0.19451023638248444, - "loss_sent": 0.27461326122283936, - "loss_sod": 0.012914705090224743, - "loss_total": 0.4820381999015808, - "step": 353299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.886040687561035, - "loss_rtd": 0.2097945511341095, - "loss_sent": 0.21921497583389282, - "loss_sod": 0.015865452587604523, - "loss_total": 0.44487497210502625, - "step": 353299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.9458757638931274, - "learning_rate": 2.180218339618173e-06, - "loss": 0.4166, - "step": 353300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.676329135894775, - "loss_rtd": 0.21489547193050385, - "loss_sent": 0.17359676957130432, - "loss_sod": 0.06085289642214775, - "loss_total": 0.4493451416492462, - "step": 353399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.956292629241943, - "loss_rtd": 0.2165389060974121, - "loss_sent": 0.17427848279476166, - "loss_sod": 0.04603196308016777, - "loss_total": 0.43684935569763184, - "step": 353399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.1209734678268433, - "learning_rate": 2.17095950190927e-06, - "loss": 0.4167, - "step": 353400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.919392108917236, - "loss_rtd": 0.21150611340999603, - "loss_sent": 0.1576291024684906, - "loss_sod": 0.007452788762748241, - "loss_total": 0.37658798694610596, - "step": 353499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.6520209312438965, - "loss_rtd": 0.21992997825145721, - "loss_sent": 0.23487496376037598, - "loss_sod": 0.03435094654560089, - "loss_total": 0.4891558885574341, - "step": 353499 - }, - { - "epoch": 0.003, - "grad_norm": 1.1233352422714233, - "learning_rate": 2.1617199297321534e-06, - "loss": 0.4284, - "step": 353500 - }, - { - "epoch": 0.003198, - "loss_gen": 6.06951904296875, - "loss_rtd": 0.1953224241733551, - "loss_sent": 0.20304681360721588, - "loss_sod": 0.07646000385284424, - "loss_total": 0.4748292565345764, - "step": 353599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.541001796722412, - "loss_rtd": 0.19102561473846436, - "loss_sent": 0.005712227895855904, - "loss_sod": 0.10757511854171753, - "loss_total": 0.30431297421455383, - "step": 353599 - }, - { - "epoch": 0.0032, - "grad_norm": 1.9571014642715454, - "learning_rate": 2.1524996268085296e-06, - "loss": 0.4052, - "step": 353600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.754105567932129, - "loss_rtd": 0.2006927877664566, - "loss_sent": 0.06379576772451401, - "loss_sod": 0.030287204310297966, - "loss_total": 0.29477575421333313, - "step": 353699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.204795837402344, - "loss_rtd": 0.15813377499580383, - "loss_sent": 2.4847346139722504e-05, - "loss_sod": 0.08588747680187225, - "loss_total": 0.24404609203338623, - "step": 353699 - }, - { - "epoch": 0.0034, - "grad_norm": 0.899935781955719, - "learning_rate": 2.143298596852339e-06, - "loss": 0.4236, - "step": 353700 - }, - { - "epoch": 0.003598, - "loss_gen": 5.738497734069824, - "loss_rtd": 0.21620681881904602, - "loss_sent": 0.279610812664032, - "loss_sod": 0.022876763716340065, - "loss_total": 0.5186944007873535, - "step": 353799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.833151817321777, - "loss_rtd": 0.20951002836227417, - "loss_sent": 0.1569215953350067, - "loss_sod": 0.0022610409650951624, - "loss_total": 0.36869266629219055, - "step": 353799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.8144100904464722, - "learning_rate": 2.1341168435697447e-06, - "loss": 0.422, - "step": 353800 - }, - { - "epoch": 0.003798, - "loss_gen": 6.018966197967529, - "loss_rtd": 0.2131778597831726, - "loss_sent": 0.16325236856937408, - "loss_sod": 0.0442400798201561, - "loss_total": 0.4206703305244446, - "step": 353899 - }, - { - "epoch": 0.003798, - "loss_gen": 6.1745476722717285, - "loss_rtd": 0.23178789019584656, - "loss_sent": 0.0814250186085701, - "loss_sod": 0.07034356147050858, - "loss_total": 0.38355645537376404, - "step": 353899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.9145764112472534, - "learning_rate": 2.12495437065916e-06, - "loss": 0.4134, - "step": 353900 - }, - { - "epoch": 0.003998, - "loss_gen": 6.161501407623291, - "loss_rtd": 0.21247737109661102, - "loss_sent": 0.1705043762922287, - "loss_sod": 0.10521357506513596, - "loss_total": 0.4881953001022339, - "step": 353999 - }, - { - "epoch": 0.003998, - "loss_gen": 6.057038307189941, - "loss_rtd": 0.20198795199394226, - "loss_sent": 0.045900702476501465, - "loss_sod": 0.1148776113986969, - "loss_total": 0.3627662658691406, - "step": 353999 - }, - { - "epoch": 0.004, - "grad_norm": 1.0753530263900757, - "learning_rate": 2.115811181811228e-06, - "loss": 0.4343, - "step": 354000 - }, - { - "epoch": 0.004, - "eval_loss": 0.40010935068130493, - "eval_runtime": 150.0678, - "eval_samples_per_second": 102.907, - "eval_steps_per_second": 0.806, - "step": 354000 - }, - { - "epoch": 0.004198, - "loss_gen": 6.502257823944092, - "loss_rtd": 0.2139170914888382, - "loss_sent": 0.0895005464553833, - "loss_sod": 0.1457839459180832, - "loss_total": 0.4492015838623047, - "step": 354099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.863682746887207, - "loss_rtd": 0.19926874339580536, - "loss_sent": 0.1459185779094696, - "loss_sod": 0.10132648795843124, - "loss_total": 0.4465138018131256, - "step": 354099 - }, - { - "epoch": 0.0042, - "grad_norm": 2.152190685272217, - "learning_rate": 2.1066872807088354e-06, - "loss": 0.4372, - "step": 354100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.831955909729004, - "loss_rtd": 0.21212802827358246, - "loss_sent": 0.11381430178880692, - "loss_sod": 0.08838170766830444, - "loss_total": 0.4143240451812744, - "step": 354199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.714260101318359, - "loss_rtd": 0.22318173944950104, - "loss_sent": 0.19266586005687714, - "loss_sod": 0.03717808425426483, - "loss_total": 0.4530256986618042, - "step": 354199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.3364078998565674, - "learning_rate": 2.0975826710270707e-06, - "loss": 0.444, - "step": 354200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.322637557983398, - "loss_rtd": 0.19468343257904053, - "loss_sent": 2.3605018213856965e-05, - "loss_sod": 0.11811772733926773, - "loss_total": 0.3128247559070587, - "step": 354299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.0764617919921875, - "loss_rtd": 0.15593793988227844, - "loss_sent": 2.572458834038116e-05, - "loss_sod": 0.18683885037899017, - "loss_total": 0.342802494764328, - "step": 354299 - }, - { - "epoch": 0.0046, - "grad_norm": 1.3551969528198242, - "learning_rate": 2.088497356433278e-06, - "loss": 0.4177, - "step": 354300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.867374897003174, - "loss_rtd": 0.18638940155506134, - "loss_sent": 0.10143978148698807, - "loss_sod": 0.04324105754494667, - "loss_total": 0.3310702443122864, - "step": 354399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.99941873550415, - "loss_rtd": 0.2125442624092102, - "loss_sent": 0.1141626313328743, - "loss_sod": 0.06408238410949707, - "loss_total": 0.390789270401001, - "step": 354399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.1286380290985107, - "learning_rate": 2.0794313405870236e-06, - "loss": 0.4195, - "step": 354400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.459729194641113, - "loss_rtd": 0.15806931257247925, - "loss_sent": 0.00018017186084762216, - "loss_sod": 0.023364700376987457, - "loss_total": 0.18161417543888092, - "step": 354499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.466588973999023, - "loss_rtd": 0.17173349857330322, - "loss_sent": 0.12588773667812347, - "loss_sod": 0.04384646192193031, - "loss_total": 0.3414676785469055, - "step": 354499 - }, - { - "epoch": 0.005, - "grad_norm": 0.7194051742553711, - "learning_rate": 2.0703846271400983e-06, - "loss": 0.4024, - "step": 354500 - }, - { - "epoch": 0.005198, - "loss_gen": 6.066932201385498, - "loss_rtd": 0.22332562506198883, - "loss_sent": 0.262665331363678, - "loss_sod": 0.04054757580161095, - "loss_total": 0.5265384912490845, - "step": 354599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.763453960418701, - "loss_rtd": 0.2057800590991974, - "loss_sent": 0.1919267326593399, - "loss_sod": 0.043964192271232605, - "loss_total": 0.4416709840297699, - "step": 354599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.1999951601028442, - "learning_rate": 2.061357219736504e-06, - "loss": 0.408, - "step": 354600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.837345123291016, - "loss_rtd": 0.19844655692577362, - "loss_sent": 0.1703146994113922, - "loss_sod": 0.17566174268722534, - "loss_total": 0.54442298412323, - "step": 354699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.693556308746338, - "loss_rtd": 0.18148230016231537, - "loss_sent": 2.3122607672121376e-05, - "loss_sod": 0.1757783591747284, - "loss_total": 0.3572837710380554, - "step": 354699 - }, - { - "epoch": 0.0054, - "grad_norm": 1.7587082386016846, - "learning_rate": 2.0523491220124924e-06, - "loss": 0.4198, - "step": 354700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.87233304977417, - "loss_rtd": 0.1956353634595871, - "loss_sent": 0.16608545184135437, - "loss_sod": 0.02105873078107834, - "loss_total": 0.3827795386314392, - "step": 354799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.842890739440918, - "loss_rtd": 0.22205030918121338, - "loss_sent": 0.1724475771188736, - "loss_sod": 0.004117500968277454, - "loss_total": 0.39861539006233215, - "step": 354799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.5454592704772949, - "learning_rate": 2.0433603375965227e-06, - "loss": 0.4191, - "step": 354800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.969061374664307, - "loss_rtd": 0.22664503753185272, - "loss_sent": 0.39275848865509033, - "loss_sod": 0.24920883774757385, - "loss_total": 0.8686123490333557, - "step": 354899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.787162780761719, - "loss_rtd": 0.19412535429000854, - "loss_sent": 0.30275753140449524, - "loss_sod": 0.026805846020579338, - "loss_total": 0.5236887335777283, - "step": 354899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.8073854446411133, - "learning_rate": 2.0343908701092817e-06, - "loss": 0.4289, - "step": 354900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.882112979888916, - "loss_rtd": 0.20660750567913055, - "loss_sent": 0.17035874724388123, - "loss_sod": 0.023847075179219246, - "loss_total": 0.40081334114074707, - "step": 354999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.957112789154053, - "loss_rtd": 0.1990405172109604, - "loss_sent": 0.24796076118946075, - "loss_sod": 0.05260968953371048, - "loss_total": 0.499610960483551, - "step": 354999 - }, - { - "epoch": 0.006, - "grad_norm": 1.0234116315841675, - "learning_rate": 2.025440723163652e-06, - "loss": 0.4226, - "step": 355000 - }, - { - "epoch": 0.006, - "eval_loss": 0.3955202102661133, - "eval_runtime": 149.8632, - "eval_samples_per_second": 103.047, - "eval_steps_per_second": 0.807, - "step": 355000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.702818870544434, - "loss_rtd": 0.2071809321641922, - "loss_sent": 0.15628786385059357, - "loss_sod": 0.0020802603103220463, - "loss_total": 0.3655490577220917, - "step": 355099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.622329235076904, - "loss_rtd": 0.19860802590847015, - "loss_sent": 0.0619230642914772, - "loss_sod": 0.007985853590071201, - "loss_total": 0.2685169577598572, - "step": 355099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.6136819124221802, - "learning_rate": 2.0165099003647603e-06, - "loss": 0.4151, - "step": 355100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.951848983764648, - "loss_rtd": 0.21448597311973572, - "loss_sent": 0.1498323678970337, - "loss_sod": 0.02414526417851448, - "loss_total": 0.3884636163711548, - "step": 355199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.998314380645752, - "loss_rtd": 0.2019582837820053, - "loss_sent": 0.352652370929718, - "loss_sod": 0.1898314654827118, - "loss_total": 0.7444421052932739, - "step": 355199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.685928225517273, - "learning_rate": 2.007598405309946e-06, - "loss": 0.4235, - "step": 355200 - }, - { - "epoch": 0.006598, - "loss_gen": 6.01767110824585, - "loss_rtd": 0.22767655551433563, - "loss_sent": 0.1614169031381607, - "loss_sod": 0.12196917831897736, - "loss_total": 0.5110626220703125, - "step": 355299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.759350776672363, - "loss_rtd": 0.22138266265392303, - "loss_sent": 0.10136804729700089, - "loss_sod": 0.01644078828394413, - "loss_total": 0.3391914963722229, - "step": 355299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.9406350255012512, - "learning_rate": 1.9987062415887604e-06, - "loss": 0.4222, - "step": 355300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.697607517242432, - "loss_rtd": 0.20214276015758514, - "loss_sent": 0.0004224515869282186, - "loss_sod": 0.23241263628005981, - "loss_total": 0.43497785925865173, - "step": 355399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.344110488891602, - "loss_rtd": 0.1906622052192688, - "loss_sent": 2.2936350433155894e-05, - "loss_sod": 0.07854408025741577, - "loss_total": 0.2692292332649231, - "step": 355399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.0965168476104736, - "learning_rate": 1.9898334127829486e-06, - "loss": 0.4194, - "step": 355400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.239232540130615, - "loss_rtd": 0.16593962907791138, - "loss_sent": 0.022577477619051933, - "loss_sod": 0.1156080812215805, - "loss_total": 0.30412518978118896, - "step": 355499 - }, - { - "epoch": 0.006998, - "loss_gen": 6.062136173248291, - "loss_rtd": 0.20743438601493835, - "loss_sent": 0.210471048951149, - "loss_sod": 0.03318113461136818, - "loss_total": 0.4510865807533264, - "step": 355499 - }, - { - "epoch": 0.007, - "grad_norm": 0.9438206553459167, - "learning_rate": 1.9809799224665025e-06, - "loss": 0.418, - "step": 355500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.930011749267578, - "loss_rtd": 0.1955890953540802, - "loss_sent": 0.43107500672340393, - "loss_sod": 0.03834614157676697, - "loss_total": 0.6650102138519287, - "step": 355599 - }, - { - "epoch": 0.007198, - "loss_gen": 6.390570163726807, - "loss_rtd": 0.1871335357427597, - "loss_sent": 0.2828793525695801, - "loss_sod": 0.018466539680957794, - "loss_total": 0.4884794354438782, - "step": 355599 - }, - { - "epoch": 0.0072, - "grad_norm": 1.360158920288086, - "learning_rate": 1.9721457742055973e-06, - "loss": 0.4128, - "step": 355600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.43441104888916, - "loss_rtd": 0.2008632868528366, - "loss_sent": 0.1046755313873291, - "loss_sod": 0.11621154844760895, - "loss_total": 0.42175036668777466, - "step": 355699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.410533905029297, - "loss_rtd": 0.17058065533638, - "loss_sent": 0.03604581207036972, - "loss_sod": 0.10525237023830414, - "loss_total": 0.31187883019447327, - "step": 355699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.3569351434707642, - "learning_rate": 1.9633309715586412e-06, - "loss": 0.4279, - "step": 355700 - }, - { - "epoch": 0.007598, - "loss_gen": 6.2586236000061035, - "loss_rtd": 0.2144034504890442, - "loss_sent": 0.1542971134185791, - "loss_sod": 0.03798966482281685, - "loss_total": 0.40669023990631104, - "step": 355799 - }, - { - "epoch": 0.007598, - "loss_gen": 6.216439723968506, - "loss_rtd": 0.18448476493358612, - "loss_sent": 0.07719166576862335, - "loss_sod": 0.09614969789981842, - "loss_total": 0.3578261137008667, - "step": 355799 - }, - { - "epoch": 0.0076, - "grad_norm": 1.2310986518859863, - "learning_rate": 1.954535518076217e-06, - "loss": 0.433, - "step": 355800 - }, - { - "epoch": 0.007798, - "loss_gen": 6.04836368560791, - "loss_rtd": 0.2088451087474823, - "loss_sent": 0.10443481057882309, - "loss_sod": 0.11040560901165009, - "loss_total": 0.4236855208873749, - "step": 355899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.211187362670898, - "loss_rtd": 0.17163363099098206, - "loss_sent": 2.266068258904852e-05, - "loss_sod": 0.03339499980211258, - "loss_total": 0.20505128800868988, - "step": 355899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.9474918842315674, - "learning_rate": 1.945759417301135e-06, - "loss": 0.405, - "step": 355900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.996926307678223, - "loss_rtd": 0.22321800887584686, - "loss_sent": 0.38234007358551025, - "loss_sod": 0.09975333511829376, - "loss_total": 0.7053114175796509, - "step": 355999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.949470520019531, - "loss_rtd": 0.20287185907363892, - "loss_sent": 0.2822295129299164, - "loss_sod": 0.03472301736474037, - "loss_total": 0.5198243856430054, - "step": 355999 - }, - { - "epoch": 0.008, - "grad_norm": 1.4557656049728394, - "learning_rate": 1.9370026727684175e-06, - "loss": 0.4011, - "step": 356000 - }, - { - "epoch": 0.008, - "eval_loss": 0.40251606702804565, - "eval_runtime": 150.0712, - "eval_samples_per_second": 102.904, - "eval_steps_per_second": 0.806, - "step": 356000 - }, - { - "epoch": 0.008198, - "loss_gen": 6.220467567443848, - "loss_rtd": 0.20813670754432678, - "loss_sent": 0.41210559010505676, - "loss_sod": 0.014181757345795631, - "loss_total": 0.634424090385437, - "step": 356099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.828224182128906, - "loss_rtd": 0.21587900817394257, - "loss_sent": 0.25055649876594543, - "loss_sod": 0.037630707025527954, - "loss_total": 0.5040662288665771, - "step": 356099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.6622540950775146, - "learning_rate": 1.928265288005282e-06, - "loss": 0.4319, - "step": 356100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.877004623413086, - "loss_rtd": 0.21264329552650452, - "loss_sent": 0.16616591811180115, - "loss_sod": 0.01253450009971857, - "loss_total": 0.39134371280670166, - "step": 356199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.99909782409668, - "loss_rtd": 0.2093016356229782, - "loss_sent": 0.20995502173900604, - "loss_sod": 0.1282491534948349, - "loss_total": 0.547505795955658, - "step": 356199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.3165335655212402, - "learning_rate": 1.9195472665311355e-06, - "loss": 0.4186, - "step": 356200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.983824253082275, - "loss_rtd": 0.18738792836666107, - "loss_sent": 0.22271102666854858, - "loss_sod": 0.06770418584346771, - "loss_total": 0.47780314087867737, - "step": 356299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.583632469177246, - "loss_rtd": 0.18682065606117249, - "loss_sent": 0.005454404279589653, - "loss_sod": 0.15563565492630005, - "loss_total": 0.34791070222854614, - "step": 356299 - }, - { - "epoch": 0.0086, - "grad_norm": 1.2503660917282104, - "learning_rate": 1.910848611857602e-06, - "loss": 0.4109, - "step": 356300 - }, - { - "epoch": 0.008798, - "loss_gen": 6.008808612823486, - "loss_rtd": 0.199904665350914, - "loss_sent": 0.0830218642950058, - "loss_sod": 0.07319788634777069, - "loss_total": 0.3561244010925293, - "step": 356399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.961312294006348, - "loss_rtd": 0.22446058690547943, - "loss_sent": 0.16952523589134216, - "loss_sod": 0.025866538286209106, - "loss_total": 0.4198523461818695, - "step": 356399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.1060283184051514, - "learning_rate": 1.90216932748849e-06, - "loss": 0.4202, - "step": 356400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.801023006439209, - "loss_rtd": 0.20940759778022766, - "loss_sent": 0.28753530979156494, - "loss_sod": 0.0116899898275733, - "loss_total": 0.5086328983306885, - "step": 356499 - }, - { - "epoch": 0.008998, - "loss_gen": 6.066738605499268, - "loss_rtd": 0.2130727916955948, - "loss_sent": 0.16303817927837372, - "loss_sod": 0.19328413903713226, - "loss_total": 0.569395124912262, - "step": 356499 - }, - { - "epoch": 0.009, - "grad_norm": 1.4080960750579834, - "learning_rate": 1.8935094169198308e-06, - "loss": 0.4117, - "step": 356500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.32443904876709, - "loss_rtd": 0.18271073698997498, - "loss_sent": 0.016854075714945793, - "loss_sod": 0.0061331442557275295, - "loss_total": 0.2056979537010193, - "step": 356599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.517611980438232, - "loss_rtd": 0.18196389079093933, - "loss_sent": 0.016464676707983017, - "loss_sod": 0.03613041341304779, - "loss_total": 0.23455898463726044, - "step": 356599 - }, - { - "epoch": 0.0092, - "grad_norm": 0.4810616374015808, - "learning_rate": 1.8848688836398176e-06, - "loss": 0.3864, - "step": 356600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.816564559936523, - "loss_rtd": 0.1941842883825302, - "loss_sent": 0.3590242266654968, - "loss_sod": 0.0036043724976480007, - "loss_total": 0.5568128824234009, - "step": 356699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.704836845397949, - "loss_rtd": 0.17948199808597565, - "loss_sent": 0.014717033132910728, - "loss_sod": 0.11702261865139008, - "loss_total": 0.3112216591835022, - "step": 356699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.7406604290008545, - "learning_rate": 1.8762477311288663e-06, - "loss": 0.4343, - "step": 356700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.646180152893066, - "loss_rtd": 0.21884070336818695, - "loss_sent": 0.2832874357700348, - "loss_sod": 0.04909444972872734, - "loss_total": 0.551222562789917, - "step": 356799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.81979513168335, - "loss_rtd": 0.1980317085981369, - "loss_sent": 0.11066953837871552, - "loss_sod": 0.10543151944875717, - "loss_total": 0.4141327738761902, - "step": 356799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.0447427034378052, - "learning_rate": 1.8676459628595766e-06, - "loss": 0.4012, - "step": 356800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.995449542999268, - "loss_rtd": 0.20971901714801788, - "loss_sent": 0.24040107429027557, - "loss_sod": 0.03846020624041557, - "loss_total": 0.48858028650283813, - "step": 356899 - }, - { - "epoch": 0.009798, - "loss_gen": 5.749032497406006, - "loss_rtd": 0.20903363823890686, - "loss_sent": 0.14285731315612793, - "loss_sod": 0.004941024351865053, - "loss_total": 0.35683196783065796, - "step": 356899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.6844404339790344, - "learning_rate": 1.8590635822967385e-06, - "loss": 0.4199, - "step": 356900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.497154712677002, - "loss_rtd": 0.187617689371109, - "loss_sent": 0.021793534979224205, - "loss_sod": 0.040550097823143005, - "loss_total": 0.24996131658554077, - "step": 356999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.992887020111084, - "loss_rtd": 0.20962125062942505, - "loss_sent": 0.37087512016296387, - "loss_sod": 0.029054788872599602, - "loss_total": 0.609551191329956, - "step": 356999 - }, - { - "epoch": 0.01, - "grad_norm": 0.9969701766967773, - "learning_rate": 1.850500592897325e-06, - "loss": 0.4095, - "step": 357000 - }, - { - "epoch": 0.01, - "eval_loss": 0.3956516981124878, - "eval_runtime": 149.9188, - "eval_samples_per_second": 103.009, - "eval_steps_per_second": 0.807, - "step": 357000 - }, - { - "epoch": 0.010198, - "loss_gen": 6.80160665512085, - "loss_rtd": 0.2306348979473114, - "loss_sent": 0.08139466494321823, - "loss_sod": 0.17715565860271454, - "loss_total": 0.4891852140426636, - "step": 357099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.674367904663086, - "loss_rtd": 0.23430436849594116, - "loss_sent": 0.25197505950927734, - "loss_sod": 0.01835767924785614, - "loss_total": 0.5046371221542358, - "step": 357099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.1275088787078857, - "learning_rate": 1.8419569981105166e-06, - "loss": 0.4216, - "step": 357100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.982246398925781, - "loss_rtd": 0.20664797723293304, - "loss_sent": 0.21538527309894562, - "loss_sod": 0.006636639591306448, - "loss_total": 0.42866986989974976, - "step": 357199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.894622802734375, - "loss_rtd": 0.22928206622600555, - "loss_sent": 0.16588445007801056, - "loss_sod": 0.03347032517194748, - "loss_total": 0.4286368489265442, - "step": 357199 - }, - { - "epoch": 0.0104, - "grad_norm": 0.6683774590492249, - "learning_rate": 1.833432801377677e-06, - "loss": 0.4083, - "step": 357200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.725360870361328, - "loss_rtd": 0.21421876549720764, - "loss_sent": 0.24396134912967682, - "loss_sod": 0.009419852867722511, - "loss_total": 0.4675999879837036, - "step": 357299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.806037425994873, - "loss_rtd": 0.2170538306236267, - "loss_sent": 0.18101391196250916, - "loss_sod": 0.03645111620426178, - "loss_total": 0.43451884388923645, - "step": 357299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9233309626579285, - "learning_rate": 1.824928006132337e-06, - "loss": 0.4233, - "step": 357300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.391997814178467, - "loss_rtd": 0.19370044767856598, - "loss_sent": 0.004411658737808466, - "loss_sod": 0.13107523322105408, - "loss_total": 0.3291873633861542, - "step": 357399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.142665863037109, - "loss_rtd": 0.15653546154499054, - "loss_sent": 0.05312584340572357, - "loss_sod": 0.05472799763083458, - "loss_total": 0.264389306306839, - "step": 357399 - }, - { - "epoch": 0.0108, - "grad_norm": 0.9104020595550537, - "learning_rate": 1.81644261580024e-06, - "loss": 0.4303, - "step": 357400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.425527095794678, - "loss_rtd": 0.17077837884426117, - "loss_sent": 0.0359957255423069, - "loss_sod": 0.04859377443790436, - "loss_total": 0.25536787509918213, - "step": 357499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.666432857513428, - "loss_rtd": 0.20479275286197662, - "loss_sent": 0.38120099902153015, - "loss_sod": 0.04730219766497612, - "loss_total": 0.6332959532737732, - "step": 357499 - }, - { - "epoch": 0.011, - "grad_norm": 1.7625724077224731, - "learning_rate": 1.807976633799291e-06, - "loss": 0.4121, - "step": 357500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.845230579376221, - "loss_rtd": 0.19764827191829681, - "loss_sent": 0.2432090938091278, - "loss_sod": 0.024489443749189377, - "loss_total": 0.4653468132019043, - "step": 357599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.8189544677734375, - "loss_rtd": 0.2043876349925995, - "loss_sent": 0.09671732783317566, - "loss_sod": 0.02181229554116726, - "loss_total": 0.32291725277900696, - "step": 357599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.6875277757644653, - "learning_rate": 1.7995300635395951e-06, - "loss": 0.4275, - "step": 357600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.883091926574707, - "loss_rtd": 0.19942772388458252, - "loss_sent": 0.05011919140815735, - "loss_sod": 0.012135586701333523, - "loss_total": 0.26168251037597656, - "step": 357699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.89393424987793, - "loss_rtd": 0.21682314574718475, - "loss_sent": 0.06651324033737183, - "loss_sod": 0.01784605160355568, - "loss_total": 0.30118244886398315, - "step": 357699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.4985584318637848, - "learning_rate": 1.79110290842342e-06, - "loss": 0.3998, - "step": 357700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.273667335510254, - "loss_rtd": 0.18570521473884583, - "loss_sent": 0.013371221721172333, - "loss_sod": 0.08951470255851746, - "loss_total": 0.2885911464691162, - "step": 357799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.142648220062256, - "loss_rtd": 0.16314288973808289, - "loss_sent": 2.5110792194027454e-05, - "loss_sod": 0.069739930331707, - "loss_total": 0.23290793597698212, - "step": 357799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.7510794401168823, - "learning_rate": 1.7826951718452335e-06, - "loss": 0.4145, - "step": 357800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.882158279418945, - "loss_rtd": 0.2175660878419876, - "loss_sent": 0.23877964913845062, - "loss_sod": 0.10670842230319977, - "loss_total": 0.5630541443824768, - "step": 357899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.799062728881836, - "loss_rtd": 0.19137810170650482, - "loss_sent": 0.13315854966640472, - "loss_sod": 0.04499879479408264, - "loss_total": 0.3695354461669922, - "step": 357899 - }, - { - "epoch": 0.0118, - "grad_norm": 1.2190783023834229, - "learning_rate": 1.77430685719166e-06, - "loss": 0.4076, - "step": 357900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.983433246612549, - "loss_rtd": 0.20193849503993988, - "loss_sent": 0.2824704945087433, - "loss_sod": 0.020014457404613495, - "loss_total": 0.5044234395027161, - "step": 357999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.964925765991211, - "loss_rtd": 0.1909264177083969, - "loss_sent": 0.12705761194229126, - "loss_sod": 0.023693429306149483, - "loss_total": 0.3416774570941925, - "step": 357999 - }, - { - "epoch": 0.012, - "grad_norm": 0.8381167054176331, - "learning_rate": 1.7659379678415244e-06, - "loss": 0.4253, - "step": 358000 - }, - { - "epoch": 0.012, - "eval_loss": 0.40045633912086487, - "eval_runtime": 150.2548, - "eval_samples_per_second": 102.779, - "eval_steps_per_second": 0.805, - "step": 358000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.01365327835083, - "loss_rtd": 0.19572857022285461, - "loss_sent": 0.19667977094650269, - "loss_sod": 0.09414590150117874, - "loss_total": 0.48655423521995544, - "step": 358099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.961184501647949, - "loss_rtd": 0.237196147441864, - "loss_sent": 0.11936698853969574, - "loss_sod": 0.011582116596400738, - "loss_total": 0.36814525723457336, - "step": 358099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.356001377105713, - "learning_rate": 1.7575885071658027e-06, - "loss": 0.3948, - "step": 358100 - }, - { - "epoch": 0.012398, - "loss_gen": 6.13053035736084, - "loss_rtd": 0.21780376136302948, - "loss_sent": 0.14030757546424866, - "loss_sod": 0.11369664967060089, - "loss_total": 0.47180798649787903, - "step": 358199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.940971374511719, - "loss_rtd": 0.21676567196846008, - "loss_sent": 0.20150306820869446, - "loss_sod": 0.008833327330648899, - "loss_total": 0.42710208892822266, - "step": 358199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.0728989839553833, - "learning_rate": 1.74925847852766e-06, - "loss": 0.4119, - "step": 358200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.732760906219482, - "loss_rtd": 0.2261783331632614, - "loss_sent": 0.49197709560394287, - "loss_sod": 0.0071923090144991875, - "loss_total": 0.7253477573394775, - "step": 358299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.803589820861816, - "loss_rtd": 0.19533567130565643, - "loss_sent": 0.14458568394184113, - "loss_sod": 0.011589504778385162, - "loss_total": 0.3515108823776245, - "step": 358299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.2767122983932495, - "learning_rate": 1.74094788528244e-06, - "loss": 0.4075, - "step": 358300 - }, - { - "epoch": 0.012798, - "loss_gen": 5.919802188873291, - "loss_rtd": 0.1859709918498993, - "loss_sent": 0.7436004281044006, - "loss_sod": 0.027039239183068275, - "loss_total": 0.9566106796264648, - "step": 358399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.749619007110596, - "loss_rtd": 0.21578435599803925, - "loss_sent": 0.12787169218063354, - "loss_sod": 0.00744420662522316, - "loss_total": 0.35110026597976685, - "step": 358399 - }, - { - "epoch": 0.0128, - "grad_norm": 3.856870174407959, - "learning_rate": 1.732656730777632e-06, - "loss": 0.4288, - "step": 358400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.978856086730957, - "loss_rtd": 0.21464388072490692, - "loss_sent": 0.32323721051216125, - "loss_sod": 0.04082895815372467, - "loss_total": 0.5787100791931152, - "step": 358499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.8420491218566895, - "loss_rtd": 0.18880939483642578, - "loss_sent": 0.14689841866493225, - "loss_sod": 0.0094448896124959, - "loss_total": 0.34515270590782166, - "step": 358499 - }, - { - "epoch": 0.013, - "grad_norm": 1.0212700366973877, - "learning_rate": 1.7243850183529197e-06, - "loss": 0.4125, - "step": 358500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.8610615730285645, - "loss_rtd": 0.20242686569690704, - "loss_sent": 0.3094406723976135, - "loss_sod": 0.0030163044575601816, - "loss_total": 0.5148838758468628, - "step": 358599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.5904717445373535, - "loss_rtd": 0.2228073626756668, - "loss_sent": 0.11718317866325378, - "loss_sod": 0.08247916400432587, - "loss_total": 0.42246970534324646, - "step": 358599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.3363889455795288, - "learning_rate": 1.7161327513401492e-06, - "loss": 0.4297, - "step": 358600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.875903129577637, - "loss_rtd": 0.21359845995903015, - "loss_sent": 0.08436977863311768, - "loss_sod": 0.002072614151984453, - "loss_total": 0.3000408411026001, - "step": 358699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.7648820877075195, - "loss_rtd": 0.2127772718667984, - "loss_sent": 0.3052695095539093, - "loss_sod": 0.030976422131061554, - "loss_total": 0.5490232110023499, - "step": 358699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.5202645063400269, - "learning_rate": 1.7078999330633395e-06, - "loss": 0.413, - "step": 358700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.926563739776611, - "loss_rtd": 0.21490149199962616, - "loss_sent": 0.09910505264997482, - "loss_sod": 0.0029799845069646835, - "loss_total": 0.3169865310192108, - "step": 358799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.8224568367004395, - "loss_rtd": 0.2163776457309723, - "loss_sent": 0.052175674587488174, - "loss_sod": 0.046546582132577896, - "loss_total": 0.31509989500045776, - "step": 358799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.0362154245376587, - "learning_rate": 1.6996865668386596e-06, - "loss": 0.4233, - "step": 358800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.657197952270508, - "loss_rtd": 0.20873859524726868, - "loss_sent": 0.15544243156909943, - "loss_sod": 0.010381044819951057, - "loss_total": 0.3745620846748352, - "step": 358899 - }, - { - "epoch": 0.013798, - "loss_gen": 6.037142753601074, - "loss_rtd": 0.22123707830905914, - "loss_sent": 0.21781538426876068, - "loss_sod": 0.031087543815374374, - "loss_total": 0.4701399803161621, - "step": 358899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.4298346042633057, - "learning_rate": 1.691492655974447e-06, - "loss": 0.4162, - "step": 358900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.96540641784668, - "loss_rtd": 0.19525554776191711, - "loss_sent": 0.15494407713413239, - "loss_sod": 0.03438544645905495, - "loss_total": 0.38458508253097534, - "step": 358999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.9260029792785645, - "loss_rtd": 0.19141554832458496, - "loss_sent": 0.05381951481103897, - "loss_sod": 0.023254919797182083, - "loss_total": 0.2684899866580963, - "step": 358999 - }, - { - "epoch": 0.014, - "grad_norm": 0.7269117832183838, - "learning_rate": 1.6833182037712226e-06, - "loss": 0.3892, - "step": 359000 - }, - { - "epoch": 0.014, - "eval_loss": 0.39771997928619385, - "eval_runtime": 149.8968, - "eval_samples_per_second": 103.024, - "eval_steps_per_second": 0.807, - "step": 359000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.809222221374512, - "loss_rtd": 0.20183490216732025, - "loss_sent": 0.13142982125282288, - "loss_sod": 0.06742454320192337, - "loss_total": 0.4006892740726471, - "step": 359099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.224217414855957, - "loss_rtd": 0.16696400940418243, - "loss_sent": 0.010838507674634457, - "loss_sod": 0.04039539396762848, - "loss_total": 0.21819791197776794, - "step": 359099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.8199211955070496, - "learning_rate": 1.6751632135216467e-06, - "loss": 0.4309, - "step": 359100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.704248428344727, - "loss_rtd": 0.19448193907737732, - "loss_sent": 0.10888109356164932, - "loss_sod": 0.01757044903934002, - "loss_total": 0.3209334909915924, - "step": 359199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.809918403625488, - "loss_rtd": 0.20986898243427277, - "loss_sent": 0.08062642812728882, - "loss_sod": 0.041022978723049164, - "loss_total": 0.33151838183403015, - "step": 359199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.7899925112724304, - "learning_rate": 1.6670276885105474e-06, - "loss": 0.4032, - "step": 359200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.747605323791504, - "loss_rtd": 0.2124408334493637, - "loss_sent": 0.13505761325359344, - "loss_sod": 0.009342052973806858, - "loss_total": 0.35684049129486084, - "step": 359299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.84201717376709, - "loss_rtd": 0.21779082715511322, - "loss_sent": 0.16671185195446014, - "loss_sod": 0.009043235331773758, - "loss_total": 0.393545925617218, - "step": 359299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.7241244912147522, - "learning_rate": 1.6589116320149145e-06, - "loss": 0.4184, - "step": 359300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.692610263824463, - "loss_rtd": 0.17546941339969635, - "loss_sent": 4.074591561220586e-05, - "loss_sod": 0.0981811136007309, - "loss_total": 0.2736912667751312, - "step": 359399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.110828399658203, - "loss_rtd": 0.15602681040763855, - "loss_sent": 0.0010624686256051064, - "loss_sod": 0.028543993830680847, - "loss_total": 0.18563327193260193, - "step": 359399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.8788437843322754, - "learning_rate": 1.6508150473038942e-06, - "loss": 0.4298, - "step": 359400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.625291347503662, - "loss_rtd": 0.19761300086975098, - "loss_sent": 0.08058664947748184, - "loss_sod": 0.0576014406979084, - "loss_total": 0.3358010947704315, - "step": 359499 - }, - { - "epoch": 0.014998, - "loss_gen": 5.9618916511535645, - "loss_rtd": 0.21147429943084717, - "loss_sent": 0.14091724157333374, - "loss_sod": 0.059383898973464966, - "loss_total": 0.4117754399776459, - "step": 359499 - }, - { - "epoch": 0.015, - "grad_norm": 1.4707019329071045, - "learning_rate": 1.6427379376387997e-06, - "loss": 0.4107, - "step": 359500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.667448043823242, - "loss_rtd": 0.19172178208827972, - "loss_sent": 0.06701655685901642, - "loss_sod": 0.026283372193574905, - "loss_total": 0.28502172231674194, - "step": 359599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.998225688934326, - "loss_rtd": 0.22887977957725525, - "loss_sent": 0.2037605345249176, - "loss_sod": 0.02702626958489418, - "loss_total": 0.45966657996177673, - "step": 359599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.8633623123168945, - "learning_rate": 1.6346803062730732e-06, - "loss": 0.4138, - "step": 359600 - }, - { - "epoch": 0.015398, - "loss_gen": 6.027843952178955, - "loss_rtd": 0.2001073658466339, - "loss_sent": 0.255400687456131, - "loss_sod": 0.009331222623586655, - "loss_total": 0.46483927965164185, - "step": 359699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.828859806060791, - "loss_rtd": 0.21302835643291473, - "loss_sent": 0.12264768779277802, - "loss_sod": 0.042284101247787476, - "loss_total": 0.3779601454734802, - "step": 359699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.9584111571311951, - "learning_rate": 1.626642156452335e-06, - "loss": 0.4123, - "step": 359700 - }, - { - "epoch": 0.015598, - "loss_gen": 6.0631103515625, - "loss_rtd": 0.20375189185142517, - "loss_sent": 0.4830540716648102, - "loss_sod": 0.03947862237691879, - "loss_total": 0.7262846231460571, - "step": 359799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.555032253265381, - "loss_rtd": 0.17465907335281372, - "loss_sent": 0.010923276655375957, - "loss_sod": 0.014672109857201576, - "loss_total": 0.20025447010993958, - "step": 359799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.4968209266662598, - "learning_rate": 1.618623491414356e-06, - "loss": 0.4073, - "step": 359800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.842617988586426, - "loss_rtd": 0.22465088963508606, - "loss_sent": 0.2357582002878189, - "loss_sod": 0.0856863260269165, - "loss_total": 0.5460954308509827, - "step": 359899 - }, - { - "epoch": 0.015798, - "loss_gen": 6.2033867835998535, - "loss_rtd": 0.21900822222232819, - "loss_sent": 0.0709872618317604, - "loss_sod": 0.08439719676971436, - "loss_total": 0.37439265847206116, - "step": 359899 - }, - { - "epoch": 0.0158, - "grad_norm": 1.3617589473724365, - "learning_rate": 1.6106243143890475e-06, - "loss": 0.4551, - "step": 359900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.951446533203125, - "loss_rtd": 0.19969972968101501, - "loss_sent": 0.2698728144168854, - "loss_sod": 0.02050439827144146, - "loss_total": 0.4900769591331482, - "step": 359999 - }, - { - "epoch": 0.015998, - "loss_gen": 6.22728157043457, - "loss_rtd": 0.21384070813655853, - "loss_sent": 0.1385621875524521, - "loss_sod": 0.11862621456384659, - "loss_total": 0.4710291028022766, - "step": 359999 - }, - { - "epoch": 0.016, - "grad_norm": 0.7700254917144775, - "learning_rate": 1.6026446285984764e-06, - "loss": 0.4238, - "step": 360000 - }, - { - "epoch": 0.016, - "eval_loss": 0.3966046869754791, - "eval_runtime": 150.2741, - "eval_samples_per_second": 102.766, - "eval_steps_per_second": 0.805, - "step": 360000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.609473705291748, - "loss_rtd": 0.2155919075012207, - "loss_sent": 0.3692477345466614, - "loss_sod": 0.021549124270677567, - "loss_total": 0.6063887476921082, - "step": 360099 - }, - { - "epoch": 0.016198, - "loss_gen": 6.194726943969727, - "loss_rtd": 0.20113371312618256, - "loss_sent": 0.15237769484519958, - "loss_sod": 0.09046486765146255, - "loss_total": 0.4439762830734253, - "step": 360099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.8825088739395142, - "learning_rate": 1.5946844372568603e-06, - "loss": 0.4204, - "step": 360100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.893786907196045, - "loss_rtd": 0.19623367488384247, - "loss_sent": 0.17366598546504974, - "loss_sod": 0.025182297453284264, - "loss_total": 0.39508193731307983, - "step": 360199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.7727766036987305, - "loss_rtd": 0.20183885097503662, - "loss_sent": 0.4144172966480255, - "loss_sod": 0.04856124892830849, - "loss_total": 0.6648173928260803, - "step": 360199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.9529916048049927, - "learning_rate": 1.586743743570568e-06, - "loss": 0.4187, - "step": 360200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.666574954986572, - "loss_rtd": 0.1906665414571762, - "loss_sent": 0.051899347454309464, - "loss_sod": 0.06233369559049606, - "loss_total": 0.30489957332611084, - "step": 360299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.374049663543701, - "loss_rtd": 0.16987192630767822, - "loss_sent": 2.2513539079227485e-05, - "loss_sod": 0.09455791860818863, - "loss_total": 0.26445233821868896, - "step": 360299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.1828080415725708, - "learning_rate": 1.5788225507381016e-06, - "loss": 0.4202, - "step": 360300 - }, - { - "epoch": 0.016798, - "loss_gen": 5.958693027496338, - "loss_rtd": 0.21530373394489288, - "loss_sent": 0.49312102794647217, - "loss_sod": 0.018909523263573647, - "loss_total": 0.7273342609405518, - "step": 360399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.716071605682373, - "loss_rtd": 0.1904827058315277, - "loss_sent": 2.9666818591067567e-05, - "loss_sod": 0.09116768836975098, - "loss_total": 0.28168004751205444, - "step": 360399 - }, - { - "epoch": 0.0168, - "grad_norm": 2.5394370555877686, - "learning_rate": 1.5709208619501258e-06, - "loss": 0.4325, - "step": 360400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.553415775299072, - "loss_rtd": 0.17159251868724823, - "loss_sent": 0.014099686406552792, - "loss_sod": 0.08110490441322327, - "loss_total": 0.26679709553718567, - "step": 360499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.853082180023193, - "loss_rtd": 0.20450982451438904, - "loss_sent": 0.15783369541168213, - "loss_sod": 0.02496618591248989, - "loss_total": 0.3873097002506256, - "step": 360499 - }, - { - "epoch": 0.017, - "grad_norm": 0.9959359765052795, - "learning_rate": 1.563038680389428e-06, - "loss": 0.4166, - "step": 360500 - }, - { - "epoch": 0.017198, - "loss_gen": 6.147935390472412, - "loss_rtd": 0.22501952946186066, - "loss_sent": 0.21500326693058014, - "loss_sod": 0.08628611266613007, - "loss_total": 0.5263088941574097, - "step": 360599 - }, - { - "epoch": 0.017198, - "loss_gen": 6.3497843742370605, - "loss_rtd": 0.1911085546016693, - "loss_sent": 0.3059438467025757, - "loss_sod": 0.021273598074913025, - "loss_total": 0.5183259844779968, - "step": 360599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.5327043533325195, - "learning_rate": 1.5551760092309686e-06, - "loss": 0.417, - "step": 360600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.660865783691406, - "loss_rtd": 0.2337588220834732, - "loss_sent": 0.3001292049884796, - "loss_sod": 0.0288230050355196, - "loss_total": 0.5627110004425049, - "step": 360699 - }, - { - "epoch": 0.017398, - "loss_gen": 6.022895812988281, - "loss_rtd": 0.20605115592479706, - "loss_sent": 0.10438123345375061, - "loss_sod": 0.05512363463640213, - "loss_total": 0.3655560314655304, - "step": 360699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.8538192510604858, - "learning_rate": 1.5473328516418083e-06, - "loss": 0.4171, - "step": 360700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.789525985717773, - "loss_rtd": 0.20868565142154694, - "loss_sent": 0.251506507396698, - "loss_sod": 0.0150322075933218, - "loss_total": 0.4752243757247925, - "step": 360799 - }, - { - "epoch": 0.017598, - "loss_gen": 6.079143524169922, - "loss_rtd": 0.20754873752593994, - "loss_sent": 0.13171996176242828, - "loss_sod": 0.016614915803074837, - "loss_total": 0.3558835983276367, - "step": 360799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.6854639053344727, - "learning_rate": 1.5395092107811871e-06, - "loss": 0.4262, - "step": 360800 - }, - { - "epoch": 0.017798, - "loss_gen": 6.033287525177002, - "loss_rtd": 0.21023119986057281, - "loss_sent": 0.12515436112880707, - "loss_sod": 0.022964123636484146, - "loss_total": 0.35834968090057373, - "step": 360899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.790826320648193, - "loss_rtd": 0.20912012457847595, - "loss_sent": 0.3771190047264099, - "loss_sod": 0.05757752060890198, - "loss_total": 0.6438166499137878, - "step": 360899 - }, - { - "epoch": 0.0178, - "grad_norm": 2.1769378185272217, - "learning_rate": 1.531705089800456e-06, - "loss": 0.4316, - "step": 360900 - }, - { - "epoch": 0.017998, - "loss_gen": 6.075564861297607, - "loss_rtd": 0.21308259665966034, - "loss_sent": 0.18532679975032806, - "loss_sod": 0.06486063450574875, - "loss_total": 0.46327000856399536, - "step": 360999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.79880428314209, - "loss_rtd": 0.23234602808952332, - "loss_sent": 0.16001008450984955, - "loss_sod": 0.007651767693459988, - "loss_total": 0.40000787377357483, - "step": 360999 - }, - { - "epoch": 0.018, - "grad_norm": 0.8836267590522766, - "learning_rate": 1.5239204918431282e-06, - "loss": 0.4161, - "step": 361000 - }, - { - "epoch": 0.018, - "eval_loss": 0.3971779942512512, - "eval_runtime": 149.9999, - "eval_samples_per_second": 102.953, - "eval_steps_per_second": 0.807, - "step": 361000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.867193222045898, - "loss_rtd": 0.20037880539894104, - "loss_sent": 0.07236301898956299, - "loss_sod": 0.008503071032464504, - "loss_total": 0.2812448740005493, - "step": 361099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.917838096618652, - "loss_rtd": 0.21738804876804352, - "loss_sent": 0.20074941217899323, - "loss_sod": 0.010854817926883698, - "loss_total": 0.42899227142333984, - "step": 361099 - }, - { - "epoch": 0.0182, - "grad_norm": 0.7468519806861877, - "learning_rate": 1.516155420044818e-06, - "loss": 0.4156, - "step": 361100 - }, - { - "epoch": 0.018398, - "loss_gen": 6.057809352874756, - "loss_rtd": 0.20180083811283112, - "loss_sent": 0.16114170849323273, - "loss_sod": 0.18150779604911804, - "loss_total": 0.5444503426551819, - "step": 361199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.785623550415039, - "loss_rtd": 0.2219465672969818, - "loss_sent": 0.25474539399147034, - "loss_sod": 0.010815219953656197, - "loss_total": 0.487507164478302, - "step": 361199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.78762948513031, - "learning_rate": 1.5084098775333122e-06, - "loss": 0.4116, - "step": 361200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.623623371124268, - "loss_rtd": 0.20676663517951965, - "loss_sent": 0.48878878355026245, - "loss_sod": 0.0915762186050415, - "loss_total": 0.787131667137146, - "step": 361299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.426612854003906, - "loss_rtd": 0.1787305772304535, - "loss_sent": 0.0002620848536025733, - "loss_sod": 0.07906591892242432, - "loss_total": 0.2580585777759552, - "step": 361299 - }, - { - "epoch": 0.0186, - "grad_norm": 1.9698206186294556, - "learning_rate": 1.5006838674285094e-06, - "loss": 0.418, - "step": 361300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.861983776092529, - "loss_rtd": 0.2160624861717224, - "loss_sent": 0.43743348121643066, - "loss_sod": 0.018248524516820908, - "loss_total": 0.6717444658279419, - "step": 361399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.972048282623291, - "loss_rtd": 0.20444737374782562, - "loss_sent": 0.1747852861881256, - "loss_sod": 0.06728965044021606, - "loss_total": 0.4465223252773285, - "step": 361399 - }, - { - "epoch": 0.0188, - "grad_norm": 1.4579147100448608, - "learning_rate": 1.492977392842443e-06, - "loss": 0.4198, - "step": 361400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.92585563659668, - "loss_rtd": 0.22486479580402374, - "loss_sent": 0.23057816922664642, - "loss_sod": 0.004533334169536829, - "loss_total": 0.45997631549835205, - "step": 361499 - }, - { - "epoch": 0.018998, - "loss_gen": 6.12306022644043, - "loss_rtd": 0.21385575830936432, - "loss_sent": 0.05615532025694847, - "loss_sod": 0.03984547033905983, - "loss_total": 0.3098565638065338, - "step": 361499 - }, - { - "epoch": 0.019, - "grad_norm": 0.5748463273048401, - "learning_rate": 1.4852904568792792e-06, - "loss": 0.4259, - "step": 361500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.845739364624023, - "loss_rtd": 0.2064388394355774, - "loss_sent": 0.11149514466524124, - "loss_sod": 0.046577051281929016, - "loss_total": 0.36451101303100586, - "step": 361599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.389118671417236, - "loss_rtd": 0.1845724731683731, - "loss_sent": 0.026308724656701088, - "loss_sod": 0.07386022061109543, - "loss_total": 0.2847414016723633, - "step": 361599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.7511069178581238, - "learning_rate": 1.4776230626353195e-06, - "loss": 0.4182, - "step": 361600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.899303913116455, - "loss_rtd": 0.2296900749206543, - "loss_sent": 0.07064184546470642, - "loss_sod": 0.03610319271683693, - "loss_total": 0.33643510937690735, - "step": 361699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.9688873291015625, - "loss_rtd": 0.2145996242761612, - "loss_sent": 0.17921024560928345, - "loss_sod": 0.07614393532276154, - "loss_total": 0.4699538052082062, - "step": 361699 - }, - { - "epoch": 0.0194, - "grad_norm": 0.9803617596626282, - "learning_rate": 1.469975213198993e-06, - "loss": 0.4239, - "step": 361700 - }, - { - "epoch": 0.019598, - "loss_gen": 5.839181423187256, - "loss_rtd": 0.21481937170028687, - "loss_sent": 0.09946290403604507, - "loss_sod": 0.03148527070879936, - "loss_total": 0.3457675576210022, - "step": 361799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.744566440582275, - "loss_rtd": 0.2183230072259903, - "loss_sent": 0.21551957726478577, - "loss_sod": 0.10369285941123962, - "loss_total": 0.5375354290008545, - "step": 361799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.0184943675994873, - "learning_rate": 1.4623469116508415e-06, - "loss": 0.421, - "step": 361800 - }, - { - "epoch": 0.019798, - "loss_gen": 6.41466760635376, - "loss_rtd": 0.205556258559227, - "loss_sent": 0.22103486955165863, - "loss_sod": 0.07430548965930939, - "loss_total": 0.5008966326713562, - "step": 361899 - }, - { - "epoch": 0.019798, - "loss_gen": 6.195828914642334, - "loss_rtd": 0.19055189192295074, - "loss_sent": 0.16132284700870514, - "loss_sod": 0.02069966122508049, - "loss_total": 0.3725743889808655, - "step": 361899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.7664250731468201, - "learning_rate": 1.4547381610635457e-06, - "loss": 0.4236, - "step": 361900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.322070598602295, - "loss_rtd": 0.1838359236717224, - "loss_sent": 2.66446058958536e-05, - "loss_sod": 0.025309057906270027, - "loss_total": 0.2091716229915619, - "step": 361999 - }, - { - "epoch": 0.019998, - "loss_gen": 5.830729007720947, - "loss_rtd": 0.2038728892803192, - "loss_sent": 0.09125927835702896, - "loss_sod": 0.013036874122917652, - "loss_total": 0.30816903710365295, - "step": 361999 - }, - { - "epoch": 0.02, - "grad_norm": 0.7681975960731506, - "learning_rate": 1.4471489645019153e-06, - "loss": 0.4175, - "step": 362000 - }, - { - "epoch": 0.02, - "eval_loss": 0.39502063393592834, - "eval_runtime": 151.6702, - "eval_samples_per_second": 101.82, - "eval_steps_per_second": 0.798, - "step": 362000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.536657333374023, - "loss_rtd": 0.2285952866077423, - "loss_sent": 0.26336389780044556, - "loss_sod": 0.017407717183232307, - "loss_total": 0.5093668699264526, - "step": 362099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.961015701293945, - "loss_rtd": 0.22314313054084778, - "loss_sent": 0.3466446101665497, - "loss_sod": 0.07911237329244614, - "loss_total": 0.6489001512527466, - "step": 362099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.4666422605514526, - "learning_rate": 1.4395793250228828e-06, - "loss": 0.414, - "step": 362100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.490997791290283, - "loss_rtd": 0.16417410969734192, - "loss_sent": 0.009751019068062305, - "loss_sod": 0.06044970825314522, - "loss_total": 0.23437483608722687, - "step": 362199 - }, - { - "epoch": 0.020398, - "loss_gen": 5.978743076324463, - "loss_rtd": 0.1945478767156601, - "loss_sent": 0.943418562412262, - "loss_sod": 0.05019240826368332, - "loss_total": 1.1881588697433472, - "step": 362199 - }, - { - "epoch": 0.0204, - "grad_norm": 3.0135691165924072, - "learning_rate": 1.4320292456754869e-06, - "loss": 0.4167, - "step": 362200 - }, - { - "epoch": 0.020598, - "loss_gen": 6.460975170135498, - "loss_rtd": 0.23090098798274994, - "loss_sent": 0.29713425040245056, - "loss_sod": 0.09923360496759415, - "loss_total": 0.6272688508033752, - "step": 362299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.66818380355835, - "loss_rtd": 0.18967388570308685, - "loss_sent": 0.13752974569797516, - "loss_sod": 0.041123323142528534, - "loss_total": 0.36832696199417114, - "step": 362299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.4002727270126343, - "learning_rate": 1.4244987295009004e-06, - "loss": 0.4088, - "step": 362300 - }, - { - "epoch": 0.020798, - "loss_gen": 6.059211730957031, - "loss_rtd": 0.2148807942867279, - "loss_sent": 0.2364805042743683, - "loss_sod": 0.09082937240600586, - "loss_total": 0.542190670967102, - "step": 362399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.923233985900879, - "loss_rtd": 0.2028600573539734, - "loss_sent": 0.2622675895690918, - "loss_sod": 0.02612539380788803, - "loss_total": 0.4912530481815338, - "step": 362399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.725727915763855, - "learning_rate": 1.4169877795324193e-06, - "loss": 0.4177, - "step": 362400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.69184684753418, - "loss_rtd": 0.2002236545085907, - "loss_sent": 0.1865220069885254, - "loss_sod": 0.028119299560785294, - "loss_total": 0.4148649573326111, - "step": 362499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.90725040435791, - "loss_rtd": 0.19531695544719696, - "loss_sent": 0.29432928562164307, - "loss_sod": 0.013063258491456509, - "loss_total": 0.5027095079421997, - "step": 362499 - }, - { - "epoch": 0.021, - "grad_norm": 1.4516067504882812, - "learning_rate": 1.4094963987954513e-06, - "loss": 0.4099, - "step": 362500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.707841873168945, - "loss_rtd": 0.19868823885917664, - "loss_sent": 0.1642095446586609, - "loss_sod": 0.027306100353598595, - "loss_total": 0.39020389318466187, - "step": 362599 - }, - { - "epoch": 0.021198, - "loss_gen": 6.0773844718933105, - "loss_rtd": 0.21753957867622375, - "loss_sent": 0.06987687200307846, - "loss_sod": 0.04470841586589813, - "loss_total": 0.33212485909461975, - "step": 362599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.6767948269844055, - "learning_rate": 1.4020245903075214e-06, - "loss": 0.4023, - "step": 362600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.555398464202881, - "loss_rtd": 0.20148345828056335, - "loss_sent": 0.22582140564918518, - "loss_sod": 0.030306756496429443, - "loss_total": 0.457611620426178, - "step": 362699 - }, - { - "epoch": 0.021398, - "loss_gen": 6.039510726928711, - "loss_rtd": 0.2075674682855606, - "loss_sent": 0.29000580310821533, - "loss_sod": 0.04601665958762169, - "loss_total": 0.5435899496078491, - "step": 362699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.5605874061584473, - "learning_rate": 1.3945723570782721e-06, - "loss": 0.4047, - "step": 362700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.936351299285889, - "loss_rtd": 0.20335261523723602, - "loss_sent": 0.1253519058227539, - "loss_sod": 0.08839675784111023, - "loss_total": 0.41710126399993896, - "step": 362799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.9179253578186035, - "loss_rtd": 0.1987505406141281, - "loss_sent": 0.3874909579753876, - "loss_sod": 0.026826800778508186, - "loss_total": 0.6130682826042175, - "step": 362799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.2404730319976807, - "learning_rate": 1.3871397021094634e-06, - "loss": 0.4224, - "step": 362800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.974052906036377, - "loss_rtd": 0.21229790151119232, - "loss_sent": 0.18950486183166504, - "loss_sod": 0.04930327832698822, - "loss_total": 0.4511060416698456, - "step": 362899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.780531883239746, - "loss_rtd": 0.20626120269298553, - "loss_sent": 0.3620285987854004, - "loss_sod": 0.0024015717208385468, - "loss_total": 0.5706913471221924, - "step": 362899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.1324671506881714, - "learning_rate": 1.3797266283949784e-06, - "loss": 0.4301, - "step": 362900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.887303352355957, - "loss_rtd": 0.21104301512241364, - "loss_sent": 0.11129486560821533, - "loss_sod": 0.08403178304433823, - "loss_total": 0.4063696563243866, - "step": 362999 - }, - { - "epoch": 0.021998, - "loss_gen": 6.0441107749938965, - "loss_rtd": 0.20486198365688324, - "loss_sent": 0.2891045808792114, - "loss_sod": 0.04153968393802643, - "loss_total": 0.5355062484741211, - "step": 362999 - }, - { - "epoch": 0.022, - "grad_norm": 1.1031920909881592, - "learning_rate": 1.3723331389207893e-06, - "loss": 0.4165, - "step": 363000 - }, - { - "epoch": 0.022, - "eval_loss": 0.3953239321708679, - "eval_runtime": 150.2575, - "eval_samples_per_second": 102.777, - "eval_steps_per_second": 0.805, - "step": 363000 - }, - { - "epoch": 0.022198, - "loss_gen": 6.003880977630615, - "loss_rtd": 0.2102266550064087, - "loss_sent": 0.4249902367591858, - "loss_sod": 0.037185702472925186, - "loss_total": 0.6724026203155518, - "step": 363099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.912100791931152, - "loss_rtd": 0.2274281084537506, - "loss_sent": 0.14505121111869812, - "loss_sod": 0.024947090074419975, - "loss_total": 0.39742639660835266, - "step": 363099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.2895781993865967, - "learning_rate": 1.3649592366649922e-06, - "loss": 0.4153, - "step": 363100 - }, - { - "epoch": 0.022398, - "loss_gen": 6.098489761352539, - "loss_rtd": 0.21353188157081604, - "loss_sent": 0.11935798078775406, - "loss_sod": 0.044251710176467896, - "loss_total": 0.3771415650844574, - "step": 363199 - }, - { - "epoch": 0.022398, - "loss_gen": 6.05353307723999, - "loss_rtd": 0.19804789125919342, - "loss_sent": 0.16045266389846802, - "loss_sod": 0.026974501088261604, - "loss_total": 0.3854750394821167, - "step": 363199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.8909897804260254, - "learning_rate": 1.3576049245978052e-06, - "loss": 0.4166, - "step": 363200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.051226615905762, - "loss_rtd": 0.15107493102550507, - "loss_sent": 0.005753755569458008, - "loss_sod": 0.012761048041284084, - "loss_total": 0.16958972811698914, - "step": 363299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.795234680175781, - "loss_rtd": 0.2226068526506424, - "loss_sent": 0.14512288570404053, - "loss_sod": 0.012116256169974804, - "loss_total": 0.3798459768295288, - "step": 363299 - }, - { - "epoch": 0.0226, - "grad_norm": 0.6773077249526978, - "learning_rate": 1.3502702056815308e-06, - "loss": 0.4272, - "step": 363300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.938736438751221, - "loss_rtd": 0.20495794713497162, - "loss_sent": 0.18485291302204132, - "loss_sod": 0.04449421167373657, - "loss_total": 0.4343050718307495, - "step": 363399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.523612976074219, - "loss_rtd": 0.1764361560344696, - "loss_sent": 0.051566604524850845, - "loss_sod": 0.1222393810749054, - "loss_total": 0.35024213790893555, - "step": 363399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.1190216541290283, - "learning_rate": 1.342955082870606e-06, - "loss": 0.4081, - "step": 363400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.842376232147217, - "loss_rtd": 0.20203332602977753, - "loss_sent": 0.2586800754070282, - "loss_sod": 0.025043871253728867, - "loss_total": 0.4857572913169861, - "step": 363499 - }, - { - "epoch": 0.022998, - "loss_gen": 6.056408882141113, - "loss_rtd": 0.19791297614574432, - "loss_sent": 0.15465180575847626, - "loss_sod": 0.053231462836265564, - "loss_total": 0.40579622983932495, - "step": 363499 - }, - { - "epoch": 0.023, - "grad_norm": 1.9620479345321655, - "learning_rate": 1.3356595591115516e-06, - "loss": 0.4011, - "step": 363500 - }, - { - "epoch": 0.023198, - "loss_gen": 6.134500980377197, - "loss_rtd": 0.2163434773683548, - "loss_sent": 0.22586670517921448, - "loss_sod": 0.06513367593288422, - "loss_total": 0.5073438882827759, - "step": 363599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.782366752624512, - "loss_rtd": 0.2267577201128006, - "loss_sent": 0.0642230436205864, - "loss_sod": 0.003363637952134013, - "loss_total": 0.29434439539909363, - "step": 363599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.8331063389778137, - "learning_rate": 1.3283836373430059e-06, - "loss": 0.4111, - "step": 363600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.831057071685791, - "loss_rtd": 0.18285562098026276, - "loss_sent": 6.96662173140794e-05, - "loss_sod": 0.0824233740568161, - "loss_total": 0.2653486728668213, - "step": 363699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.61504602432251, - "loss_rtd": 0.1799226701259613, - "loss_sent": 3.860507786157541e-05, - "loss_sod": 0.09754588454961777, - "loss_total": 0.27750715613365173, - "step": 363699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8533154726028442, - "learning_rate": 1.3211273204957186e-06, - "loss": 0.4163, - "step": 363700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.171628475189209, - "loss_rtd": 0.17534704506397247, - "loss_sent": 0.002893816912546754, - "loss_sod": 0.13911409676074982, - "loss_total": 0.3173549473285675, - "step": 363799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.8331499099731445, - "loss_rtd": 0.21556037664413452, - "loss_sent": 0.10961310565471649, - "loss_sod": 0.06676691025495529, - "loss_total": 0.3919404149055481, - "step": 363799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.1043574810028076, - "learning_rate": 1.3138906114925132e-06, - "loss": 0.4018, - "step": 363800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.4935126304626465, - "loss_rtd": 0.1978653371334076, - "loss_sent": 2.524955925764516e-05, - "loss_sod": 0.13067789375782013, - "loss_total": 0.3285684883594513, - "step": 363899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.425945281982422, - "loss_rtd": 0.16025815904140472, - "loss_sent": 5.956891618552618e-05, - "loss_sod": 0.08635027706623077, - "loss_total": 0.2466680109500885, - "step": 363899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.9258561134338379, - "learning_rate": 1.306673513248352e-06, - "loss": 0.3935, - "step": 363900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.766495227813721, - "loss_rtd": 0.16648903489112854, - "loss_sent": 2.2863701815367676e-05, - "loss_sod": 0.06929951906204224, - "loss_total": 0.23581141233444214, - "step": 363999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.72847318649292, - "loss_rtd": 0.17708592116832733, - "loss_sent": 0.01743916980922222, - "loss_sod": 0.09215869009494781, - "loss_total": 0.2866837680339813, - "step": 363999 - }, - { - "epoch": 0.024, - "grad_norm": 0.9077220559120178, - "learning_rate": 1.2994760286702767e-06, - "loss": 0.4285, - "step": 364000 - }, - { - "epoch": 0.024, - "eval_loss": 0.39305803179740906, - "eval_runtime": 150.4121, - "eval_samples_per_second": 102.671, - "eval_steps_per_second": 0.804, - "step": 364000 - }, - { - "epoch": 0.024198, - "loss_gen": 6.001964092254639, - "loss_rtd": 0.21313053369522095, - "loss_sent": 0.3803410232067108, - "loss_sod": 0.010521373711526394, - "loss_total": 0.6039929389953613, - "step": 364099 - }, - { - "epoch": 0.024198, - "loss_gen": 6.142898082733154, - "loss_rtd": 0.22206230461597443, - "loss_sent": 0.13670524954795837, - "loss_sod": 0.012702586129307747, - "loss_total": 0.3714701533317566, - "step": 364099 - }, - { - "epoch": 0.0242, - "grad_norm": 0.8884294629096985, - "learning_rate": 1.2922981606574348e-06, - "loss": 0.4052, - "step": 364100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.53360652923584, - "loss_rtd": 0.17999228835105896, - "loss_sent": 2.7344931368133985e-05, - "loss_sod": 0.03845468908548355, - "loss_total": 0.21847431361675262, - "step": 364199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.515370845794678, - "loss_rtd": 0.1846403181552887, - "loss_sent": 0.11708176881074905, - "loss_sod": 0.04682455211877823, - "loss_total": 0.3485466241836548, - "step": 364199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9045760035514832, - "learning_rate": 1.2851399121010687e-06, - "loss": 0.4004, - "step": 364200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.977632999420166, - "loss_rtd": 0.2112598568201065, - "loss_sent": 0.5074205994606018, - "loss_sod": 0.03248698264360428, - "loss_total": 0.7511674165725708, - "step": 364299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.592776775360107, - "loss_rtd": 0.1873634159564972, - "loss_sent": 0.02264423482120037, - "loss_sod": 0.05196958780288696, - "loss_total": 0.2619772255420685, - "step": 364299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.5159434080123901, - "learning_rate": 1.2780012858845169e-06, - "loss": 0.4099, - "step": 364300 - }, - { - "epoch": 0.024798, - "loss_gen": 5.8185529708862305, - "loss_rtd": 0.19173812866210938, - "loss_sent": 0.1363159418106079, - "loss_sod": 0.09146563708782196, - "loss_total": 0.41951972246170044, - "step": 364399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.35556697845459, - "loss_rtd": 0.19410699605941772, - "loss_sent": 0.09060783684253693, - "loss_sod": 0.029730046167969704, - "loss_total": 0.314444899559021, - "step": 364399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.2566590309143066, - "learning_rate": 1.270882284883229e-06, - "loss": 0.427, - "step": 364400 - }, - { - "epoch": 0.024998, - "loss_gen": 6.060112476348877, - "loss_rtd": 0.2194378525018692, - "loss_sent": 0.25543156266212463, - "loss_sod": 0.08118052780628204, - "loss_total": 0.5560499429702759, - "step": 364499 - }, - { - "epoch": 0.024998, - "loss_gen": 6.098330020904541, - "loss_rtd": 0.20278386771678925, - "loss_sent": 0.1597369760274887, - "loss_sod": 0.07793529331684113, - "loss_total": 0.4404561519622803, - "step": 364499 - }, - { - "epoch": 0.025, - "grad_norm": 1.1031123399734497, - "learning_rate": 1.2637829119647172e-06, - "loss": 0.4143, - "step": 364500 - }, - { - "epoch": 0.025198, - "loss_gen": 6.049230098724365, - "loss_rtd": 0.23129546642303467, - "loss_sent": 0.1357702761888504, - "loss_sod": 0.025197722017765045, - "loss_total": 0.3922634720802307, - "step": 364599 - }, - { - "epoch": 0.025198, - "loss_gen": 6.046126842498779, - "loss_rtd": 0.2011864334344864, - "loss_sent": 0.14850084483623505, - "loss_sod": 0.016730941832065582, - "loss_total": 0.3664182424545288, - "step": 364599 - }, - { - "epoch": 0.0252, - "grad_norm": 0.8324997425079346, - "learning_rate": 1.2567031699886267e-06, - "loss": 0.4184, - "step": 364600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.379150867462158, - "loss_rtd": 0.17376330494880676, - "loss_sent": 0.038890544325113297, - "loss_sod": 0.07320712506771088, - "loss_total": 0.28586098551750183, - "step": 364699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.987701892852783, - "loss_rtd": 0.21855410933494568, - "loss_sent": 0.11376553028821945, - "loss_sod": 0.011206366121768951, - "loss_total": 0.3435260057449341, - "step": 364699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.7531861662864685, - "learning_rate": 1.2496430618066656e-06, - "loss": 0.4313, - "step": 364700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.782328128814697, - "loss_rtd": 0.18509866297245026, - "loss_sent": 0.289043664932251, - "loss_sod": 0.012807246297597885, - "loss_total": 0.4869495630264282, - "step": 364799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.947630405426025, - "loss_rtd": 0.21062570810317993, - "loss_sent": 0.24626106023788452, - "loss_sod": 0.03454527258872986, - "loss_total": 0.4914320409297943, - "step": 364799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.2206841707229614, - "learning_rate": 1.2426025902626592e-06, - "loss": 0.4143, - "step": 364800 - }, - { - "epoch": 0.025798, - "loss_gen": 6.341766834259033, - "loss_rtd": 0.23094379901885986, - "loss_sent": 0.15503183007240295, - "loss_sod": 0.020966900512576103, - "loss_total": 0.4069425165653229, - "step": 364899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.520390510559082, - "loss_rtd": 0.1978568583726883, - "loss_sent": 0.02651343122124672, - "loss_sod": 0.062034301459789276, - "loss_total": 0.2864045798778534, - "step": 364899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.8073655366897583, - "learning_rate": 1.2355817581924944e-06, - "loss": 0.3938, - "step": 364900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.278810501098633, - "loss_rtd": 0.1681014448404312, - "loss_sent": 0.009314697235822678, - "loss_sod": 0.07258976250886917, - "loss_total": 0.25000590085983276, - "step": 364999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.8487677574157715, - "loss_rtd": 0.21409372985363007, - "loss_sent": 0.11997484415769577, - "loss_sod": 0.00814978126436472, - "loss_total": 0.3422183692455292, - "step": 364999 - }, - { - "epoch": 0.026, - "grad_norm": 0.7892587780952454, - "learning_rate": 1.2285805684241592e-06, - "loss": 0.3964, - "step": 365000 - }, - { - "epoch": 0.026, - "eval_loss": 0.39621347188949585, - "eval_runtime": 150.439, - "eval_samples_per_second": 102.653, - "eval_steps_per_second": 0.804, - "step": 365000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.922516345977783, - "loss_rtd": 0.19338731467723846, - "loss_sent": 0.09866566210985184, - "loss_sod": 0.04476850479841232, - "loss_total": 0.33682146668434143, - "step": 365099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.713855266571045, - "loss_rtd": 0.2088758498430252, - "loss_sent": 0.4114687442779541, - "loss_sod": 0.08549314737319946, - "loss_total": 0.7058377265930176, - "step": 365099 - }, - { - "epoch": 0.0262, - "grad_norm": 1.5730397701263428, - "learning_rate": 1.2215990237777419e-06, - "loss": 0.4098, - "step": 365100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.531763553619385, - "loss_rtd": 0.1814747154712677, - "loss_sent": 0.075872503221035, - "loss_sod": 0.06202581524848938, - "loss_total": 0.3193730413913727, - "step": 365199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.825388431549072, - "loss_rtd": 0.2028234899044037, - "loss_sent": 0.21024520695209503, - "loss_sod": 0.02161809802055359, - "loss_total": 0.4346867799758911, - "step": 365199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.2231762409210205, - "learning_rate": 1.21463712706541e-06, - "loss": 0.4154, - "step": 365200 - }, - { - "epoch": 0.026598, - "loss_gen": 6.497543811798096, - "loss_rtd": 0.19838689267635345, - "loss_sent": 0.1384795606136322, - "loss_sod": 0.04936708137392998, - "loss_total": 0.38623353838920593, - "step": 365299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.94274377822876, - "loss_rtd": 0.22551003098487854, - "loss_sent": 0.21004793047904968, - "loss_sod": 0.021115781739354134, - "loss_total": 0.4566737413406372, - "step": 365299 - }, - { - "epoch": 0.0266, - "grad_norm": 0.6815785765647888, - "learning_rate": 1.2076948810914036e-06, - "loss": 0.421, - "step": 365300 - }, - { - "epoch": 0.026798, - "loss_gen": 6.2182111740112305, - "loss_rtd": 0.22094619274139404, - "loss_sent": 0.13484445214271545, - "loss_sod": 0.028378788381814957, - "loss_total": 0.38416942954063416, - "step": 365399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.6115193367004395, - "loss_rtd": 0.1851220428943634, - "loss_sent": 0.12120498716831207, - "loss_sod": 0.009966659359633923, - "loss_total": 0.3162936866283417, - "step": 365399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.6521903276443481, - "learning_rate": 1.2007722886520634e-06, - "loss": 0.4183, - "step": 365400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.724940299987793, - "loss_rtd": 0.19978727400302887, - "loss_sent": 0.2348742038011551, - "loss_sod": 0.024614332243800163, - "loss_total": 0.4592758119106293, - "step": 365499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.600672721862793, - "loss_rtd": 0.21980349719524384, - "loss_sent": 0.3051648736000061, - "loss_sod": 0.022819124162197113, - "loss_total": 0.5477874875068665, - "step": 365499 - }, - { - "epoch": 0.027, - "grad_norm": 2.144178867340088, - "learning_rate": 1.1938693525358147e-06, - "loss": 0.4189, - "step": 365500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.77287483215332, - "loss_rtd": 0.20586730539798737, - "loss_sent": 0.12783952057361603, - "loss_sod": 0.002401602454483509, - "loss_total": 0.3361084461212158, - "step": 365599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.98655891418457, - "loss_rtd": 0.23912228643894196, - "loss_sent": 0.18128874897956848, - "loss_sod": 0.031496576964855194, - "loss_total": 0.45190760493278503, - "step": 365599 - }, - { - "epoch": 0.0272, - "grad_norm": 0.8339471220970154, - "learning_rate": 1.1869860755231555e-06, - "loss": 0.4093, - "step": 365600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.848016738891602, - "loss_rtd": 0.21581026911735535, - "loss_sent": 0.5503897070884705, - "loss_sod": 0.05699167400598526, - "loss_total": 0.8231916427612305, - "step": 365699 - }, - { - "epoch": 0.027398, - "loss_gen": 6.081898212432861, - "loss_rtd": 0.2190917730331421, - "loss_sent": 0.11327815055847168, - "loss_sod": 0.030282845720648766, - "loss_total": 0.3626527786254883, - "step": 365699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.380051612854004, - "learning_rate": 1.1801224603866624e-06, - "loss": 0.4239, - "step": 365700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.3949713706970215, - "loss_rtd": 0.17945027351379395, - "loss_sent": 0.07280128449201584, - "loss_sod": 0.039197977632284164, - "loss_total": 0.29144954681396484, - "step": 365799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.641369819641113, - "loss_rtd": 0.1944916695356369, - "loss_sent": 0.28476682305336, - "loss_sod": 0.052630335092544556, - "loss_total": 0.5318888425827026, - "step": 365799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.8354421854019165, - "learning_rate": 1.1732785098910015e-06, - "loss": 0.4149, - "step": 365800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.409779071807861, - "loss_rtd": 0.15218254923820496, - "loss_sent": 0.0001465770765207708, - "loss_sod": 0.05663083493709564, - "loss_total": 0.20895996689796448, - "step": 365899 - }, - { - "epoch": 0.027798, - "loss_gen": 6.117166996002197, - "loss_rtd": 0.2116507589817047, - "loss_sent": 0.08239760249853134, - "loss_sod": 0.08382310718297958, - "loss_total": 0.37787148356437683, - "step": 365899 - }, - { - "epoch": 0.0278, - "grad_norm": 1.1110810041427612, - "learning_rate": 1.1664542267929236e-06, - "loss": 0.4229, - "step": 365900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.705819606781006, - "loss_rtd": 0.18144482374191284, - "loss_sent": 0.07757905125617981, - "loss_sod": 0.03736221790313721, - "loss_total": 0.29638609290122986, - "step": 365999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.793848037719727, - "loss_rtd": 0.1927725076675415, - "loss_sent": 0.10372845828533173, - "loss_sod": 0.020766176283359528, - "loss_total": 0.31726711988449097, - "step": 365999 - }, - { - "epoch": 0.028, - "grad_norm": 1.0994117259979248, - "learning_rate": 1.1596496138412405e-06, - "loss": 0.4248, - "step": 366000 - }, - { - "epoch": 0.028, - "eval_loss": 0.3949771225452423, - "eval_runtime": 150.2526, - "eval_samples_per_second": 102.78, - "eval_steps_per_second": 0.805, - "step": 366000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.71099328994751, - "loss_rtd": 0.2157634049654007, - "loss_sent": 0.10976104438304901, - "loss_sod": 0.0013143944088369608, - "loss_total": 0.3268388509750366, - "step": 366099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.858689308166504, - "loss_rtd": 0.2047213315963745, - "loss_sent": 0.035277750343084335, - "loss_sod": 0.042113568633794785, - "loss_total": 0.28211265802383423, - "step": 366099 - }, - { - "epoch": 0.0282, - "grad_norm": 0.6429213285446167, - "learning_rate": 1.1528646737768544e-06, - "loss": 0.4217, - "step": 366100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.90069055557251, - "loss_rtd": 0.20099638402462006, - "loss_sent": 0.12928296625614166, - "loss_sod": 0.05825787037611008, - "loss_total": 0.3885372281074524, - "step": 366199 - }, - { - "epoch": 0.028398, - "loss_gen": 5.547062873840332, - "loss_rtd": 0.17707639932632446, - "loss_sent": 0.010011816397309303, - "loss_sod": 0.07997766882181168, - "loss_total": 0.2670658826828003, - "step": 366199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.237799882888794, - "learning_rate": 1.1460994093327294e-06, - "loss": 0.4174, - "step": 366200 - }, - { - "epoch": 0.028598, - "loss_gen": 5.897192001342773, - "loss_rtd": 0.21719658374786377, - "loss_sent": 0.11083323508501053, - "loss_sod": 0.02024351805448532, - "loss_total": 0.3482733368873596, - "step": 366299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.976822376251221, - "loss_rtd": 0.20563849806785583, - "loss_sent": 0.2585853934288025, - "loss_sod": 0.0752931609749794, - "loss_total": 0.5395170450210571, - "step": 366299 - }, - { - "epoch": 0.0286, - "grad_norm": 1.2281782627105713, - "learning_rate": 1.1393538232339297e-06, - "loss": 0.428, - "step": 366300 - }, - { - "epoch": 0.028798, - "loss_gen": 6.089418888092041, - "loss_rtd": 0.19139453768730164, - "loss_sent": 0.23716497421264648, - "loss_sod": 0.030864574015140533, - "loss_total": 0.45942407846450806, - "step": 366399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.777108669281006, - "loss_rtd": 0.23330779373645782, - "loss_sent": 0.21502019464969635, - "loss_sod": 0.035990871489048004, - "loss_total": 0.4843188524246216, - "step": 366399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.0611236095428467, - "learning_rate": 1.1326279181975597e-06, - "loss": 0.4153, - "step": 366400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.207485198974609, - "loss_rtd": 0.1862115114927292, - "loss_sent": 2.4232707801274955e-05, - "loss_sod": 0.14000684022903442, - "loss_total": 0.326242595911026, - "step": 366499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.205263614654541, - "loss_rtd": 0.16241781413555145, - "loss_sent": 2.5706000087666325e-05, - "loss_sod": 0.39901670813560486, - "loss_total": 0.5614601969718933, - "step": 366499 - }, - { - "epoch": 0.029, - "grad_norm": 1.6571792364120483, - "learning_rate": 1.1259216969328245e-06, - "loss": 0.4161, - "step": 366500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.826868534088135, - "loss_rtd": 0.20336231589317322, - "loss_sent": 0.44961991906166077, - "loss_sod": 0.005061282776296139, - "loss_total": 0.6580435037612915, - "step": 366599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.7266411781311035, - "loss_rtd": 0.21946556866168976, - "loss_sent": 0.2193601280450821, - "loss_sod": 0.04416005313396454, - "loss_total": 0.4829857349395752, - "step": 366599 - }, - { - "epoch": 0.0292, - "grad_norm": 1.7573833465576172, - "learning_rate": 1.1192351621409803e-06, - "loss": 0.4112, - "step": 366600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.756178855895996, - "loss_rtd": 0.22352372109889984, - "loss_sent": 0.04831600934267044, - "loss_sod": 0.08884535729885101, - "loss_total": 0.3606850802898407, - "step": 366699 - }, - { - "epoch": 0.029398, - "loss_gen": 6.121270656585693, - "loss_rtd": 0.20888565480709076, - "loss_sent": 0.11543800681829453, - "loss_sod": 0.08961872011423111, - "loss_total": 0.4139423966407776, - "step": 366699 - }, - { - "epoch": 0.0294, - "grad_norm": 1.986507534980774, - "learning_rate": 1.1125683165153778e-06, - "loss": 0.4145, - "step": 366700 - }, - { - "epoch": 0.029598, - "loss_gen": 6.292279243469238, - "loss_rtd": 0.21221573650836945, - "loss_sent": 0.37807655334472656, - "loss_sod": 0.03985176235437393, - "loss_total": 0.6301440596580505, - "step": 366799 - }, - { - "epoch": 0.029598, - "loss_gen": 5.803103923797607, - "loss_rtd": 0.22886456549167633, - "loss_sent": 0.4535507559776306, - "loss_sod": 0.009185624308884144, - "loss_total": 0.6916009187698364, - "step": 366799 - }, - { - "epoch": 0.0296, - "grad_norm": 2.8848581314086914, - "learning_rate": 1.1059211627414024e-06, - "loss": 0.4084, - "step": 366800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.5437517166137695, - "loss_rtd": 0.20916791260242462, - "loss_sent": 0.0702207088470459, - "loss_sod": 0.006545985583215952, - "loss_total": 0.2859346270561218, - "step": 366899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.918610572814941, - "loss_rtd": 0.20172882080078125, - "loss_sent": 0.2698323726654053, - "loss_sod": 0.023541470989584923, - "loss_total": 0.4951026439666748, - "step": 366899 - }, - { - "epoch": 0.0298, - "grad_norm": 0.6408466100692749, - "learning_rate": 1.0992937034965345e-06, - "loss": 0.4197, - "step": 366900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.614994049072266, - "loss_rtd": 0.19955208897590637, - "loss_sent": 0.23481421172618866, - "loss_sod": 0.018059633672237396, - "loss_total": 0.4524259567260742, - "step": 366999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.810892105102539, - "loss_rtd": 0.22652031481266022, - "loss_sent": 0.41318392753601074, - "loss_sod": 0.013888641260564327, - "loss_total": 0.6535928845405579, - "step": 366999 - }, - { - "epoch": 0.03, - "grad_norm": 0.8815780282020569, - "learning_rate": 1.0926859414503165e-06, - "loss": 0.4225, - "step": 367000 - }, - { - "epoch": 0.03, - "eval_loss": 0.3982074558734894, - "eval_runtime": 150.1153, - "eval_samples_per_second": 102.874, - "eval_steps_per_second": 0.806, - "step": 367000 - }, - { - "epoch": 0.030198, - "loss_gen": 6.105481147766113, - "loss_rtd": 0.2054414600133896, - "loss_sent": 0.2658814489841461, - "loss_sod": 0.012263797223567963, - "loss_total": 0.48358669877052307, - "step": 367099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.74627161026001, - "loss_rtd": 0.1987999677658081, - "loss_sent": 0.3739352226257324, - "loss_sod": 0.03399643301963806, - "loss_total": 0.606731653213501, - "step": 367099 - }, - { - "epoch": 0.0302, - "grad_norm": 1.4633259773254395, - "learning_rate": 1.0860978792643527e-06, - "loss": 0.4243, - "step": 367100 - }, - { - "epoch": 0.030398, - "loss_gen": 5.5245490074157715, - "loss_rtd": 0.19503413140773773, - "loss_sent": 0.06523527204990387, - "loss_sod": 0.034987159073352814, - "loss_total": 0.2952565550804138, - "step": 367199 - }, - { - "epoch": 0.030398, - "loss_gen": 5.764045715332031, - "loss_rtd": 0.20709584653377533, - "loss_sent": 0.519471287727356, - "loss_sod": 0.04452098533511162, - "loss_total": 0.7710881233215332, - "step": 367199 - }, - { - "epoch": 0.0304, - "grad_norm": 1.2977943420410156, - "learning_rate": 1.079529519592315e-06, - "loss": 0.428, - "step": 367200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.746772289276123, - "loss_rtd": 0.17845812439918518, - "loss_sent": 0.022705502808094025, - "loss_sod": 0.14325875043869019, - "loss_total": 0.3444223701953888, - "step": 367299 - }, - { - "epoch": 0.030598, - "loss_gen": 5.987921714782715, - "loss_rtd": 0.22216100990772247, - "loss_sent": 0.17972496151924133, - "loss_sod": 0.02743348479270935, - "loss_total": 0.42931944131851196, - "step": 367299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.1215088367462158, - "learning_rate": 1.0729808650799367e-06, - "loss": 0.4266, - "step": 367300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.921072483062744, - "loss_rtd": 0.22161927819252014, - "loss_sent": 0.17021888494491577, - "loss_sod": 0.026013188064098358, - "loss_total": 0.41785135865211487, - "step": 367399 - }, - { - "epoch": 0.030798, - "loss_gen": 5.684786319732666, - "loss_rtd": 0.20871436595916748, - "loss_sent": 0.4201463460922241, - "loss_sod": 0.020044436678290367, - "loss_total": 0.6489051580429077, - "step": 367399 - }, - { - "epoch": 0.0308, - "grad_norm": 1.6168192625045776, - "learning_rate": 1.0664519183650078e-06, - "loss": 0.403, - "step": 367400 - }, - { - "epoch": 0.030998, - "loss_gen": 5.3276543617248535, - "loss_rtd": 0.17410051822662354, - "loss_sent": 0.0006031687371432781, - "loss_sod": 0.07032284140586853, - "loss_total": 0.24502652883529663, - "step": 367499 - }, - { - "epoch": 0.030998, - "loss_gen": 5.103432655334473, - "loss_rtd": 0.15331293642520905, - "loss_sent": 0.0462593249976635, - "loss_sod": 0.021818481385707855, - "loss_total": 0.2213907390832901, - "step": 367499 - }, - { - "epoch": 0.031, - "grad_norm": 0.648463785648346, - "learning_rate": 1.0599426820774083e-06, - "loss": 0.4031, - "step": 367500 - }, - { - "epoch": 0.031198, - "loss_gen": 6.198997974395752, - "loss_rtd": 0.20361800491809845, - "loss_sent": 0.07401644438505173, - "loss_sod": 0.04422963783144951, - "loss_total": 0.3218640983104706, - "step": 367599 - }, - { - "epoch": 0.031198, - "loss_gen": 6.041176795959473, - "loss_rtd": 0.19243599474430084, - "loss_sent": 0.060878489166498184, - "loss_sod": 0.04446679353713989, - "loss_total": 0.2977812886238098, - "step": 367599 - }, - { - "epoch": 0.0312, - "grad_norm": 0.5834943056106567, - "learning_rate": 1.0534531588390351e-06, - "loss": 0.4232, - "step": 367600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.941068172454834, - "loss_rtd": 0.20858453214168549, - "loss_sent": 0.2920042872428894, - "loss_sod": 0.003702358575537801, - "loss_total": 0.5042911767959595, - "step": 367699 - }, - { - "epoch": 0.031398, - "loss_gen": 6.093193054199219, - "loss_rtd": 0.21786224842071533, - "loss_sent": 0.06440935283899307, - "loss_sod": 0.036137934774160385, - "loss_total": 0.3184095323085785, - "step": 367699 - }, - { - "epoch": 0.0314, - "grad_norm": 1.2168036699295044, - "learning_rate": 1.0469833512638749e-06, - "loss": 0.4244, - "step": 367700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.4780402183532715, - "loss_rtd": 0.16045156121253967, - "loss_sent": 0.05479176342487335, - "loss_sod": 0.08664466440677643, - "loss_total": 0.30188798904418945, - "step": 367799 - }, - { - "epoch": 0.031598, - "loss_gen": 6.051278114318848, - "loss_rtd": 0.21020716428756714, - "loss_sent": 0.29729753732681274, - "loss_sod": 0.01826038956642151, - "loss_total": 0.525765061378479, - "step": 367799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.131919264793396, - "learning_rate": 1.040533261957971e-06, - "loss": 0.4232, - "step": 367800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.865645408630371, - "loss_rtd": 0.2175774872303009, - "loss_sent": 0.3327351212501526, - "loss_sod": 0.054611686617136, - "loss_total": 0.6049243211746216, - "step": 367899 - }, - { - "epoch": 0.031798, - "loss_gen": 5.993206977844238, - "loss_rtd": 0.2082110494375229, - "loss_sent": 0.503560483455658, - "loss_sod": 0.03816059231758118, - "loss_total": 0.7499321103096008, - "step": 367899 - }, - { - "epoch": 0.0318, - "grad_norm": 2.405414581298828, - "learning_rate": 1.0341028935194118e-06, - "loss": 0.4136, - "step": 367900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.362062931060791, - "loss_rtd": 0.16042421758174896, - "loss_sent": 0.036109164357185364, - "loss_sod": 0.03005451150238514, - "loss_total": 0.22658789157867432, - "step": 367999 - }, - { - "epoch": 0.031998, - "loss_gen": 5.485085487365723, - "loss_rtd": 0.21209366619586945, - "loss_sent": 0.14857341349124908, - "loss_sod": 0.0034150639548897743, - "loss_total": 0.3640821576118469, - "step": 367999 - }, - { - "epoch": 0.032, - "grad_norm": 0.7494519352912903, - "learning_rate": 1.0276922485383478e-06, - "loss": 0.4176, - "step": 368000 - }, - { - "epoch": 0.032, - "eval_loss": 0.3909725546836853, - "eval_runtime": 151.9422, - "eval_samples_per_second": 101.637, - "eval_steps_per_second": 0.796, - "step": 368000 - }, - { - "epoch": 0.032198, - "loss_gen": 5.760014533996582, - "loss_rtd": 0.22472217679023743, - "loss_sent": 0.3562431335449219, - "loss_sod": 0.008840315043926239, - "loss_total": 0.5898056030273438, - "step": 368099 - }, - { - "epoch": 0.032198, - "loss_gen": 5.884953022003174, - "loss_rtd": 0.1917876899242401, - "loss_sent": 0.18902307748794556, - "loss_sod": 0.10972850024700165, - "loss_total": 0.4905392527580261, - "step": 368099 - }, - { - "epoch": 0.0322, - "grad_norm": 1.0253633260726929, - "learning_rate": 1.0213013295969909e-06, - "loss": 0.4224, - "step": 368100 - }, - { - "epoch": 0.032398, - "loss_gen": 6.02172327041626, - "loss_rtd": 0.20438213646411896, - "loss_sent": 0.33307620882987976, - "loss_sod": 0.05011449754238129, - "loss_total": 0.5875728130340576, - "step": 368199 - }, - { - "epoch": 0.032398, - "loss_gen": 5.7057085037231445, - "loss_rtd": 0.19295965135097504, - "loss_sent": 0.3768034279346466, - "loss_sod": 0.06536341458559036, - "loss_total": 0.6351264715194702, - "step": 368199 - }, - { - "epoch": 0.0324, - "grad_norm": 2.3299827575683594, - "learning_rate": 1.0149301392696097e-06, - "loss": 0.4343, - "step": 368200 - }, - { - "epoch": 0.032598, - "loss_gen": 5.719908237457275, - "loss_rtd": 0.21271894872188568, - "loss_sent": 0.7014883160591125, - "loss_sod": 0.07076235115528107, - "loss_total": 0.9849696159362793, - "step": 368299 - }, - { - "epoch": 0.032598, - "loss_gen": 5.860711097717285, - "loss_rtd": 0.20450043678283691, - "loss_sent": 0.10516748577356339, - "loss_sod": 0.016955647617578506, - "loss_total": 0.3266235589981079, - "step": 368299 - }, - { - "epoch": 0.0326, - "grad_norm": 2.1229238510131836, - "learning_rate": 1.0085786801225016e-06, - "loss": 0.4465, - "step": 368300 - }, - { - "epoch": 0.032798, - "loss_gen": 5.92908239364624, - "loss_rtd": 0.21102026104927063, - "loss_sent": 0.14362558722496033, - "loss_sod": 0.059954918920993805, - "loss_total": 0.41460075974464417, - "step": 368399 - }, - { - "epoch": 0.032798, - "loss_gen": 5.716544151306152, - "loss_rtd": 0.22120824456214905, - "loss_sent": 0.16445207595825195, - "loss_sod": 0.028329530730843544, - "loss_total": 0.4139898419380188, - "step": 368399 - }, - { - "epoch": 0.0328, - "grad_norm": 1.179724931716919, - "learning_rate": 1.0022469547140422e-06, - "loss": 0.4182, - "step": 368400 - }, - { - "epoch": 0.032998, - "loss_gen": 6.229667663574219, - "loss_rtd": 0.23950572311878204, - "loss_sent": 0.09101808816194534, - "loss_sod": 0.017648961395025253, - "loss_total": 0.34817275404930115, - "step": 368499 - }, - { - "epoch": 0.032998, - "loss_gen": 5.549963474273682, - "loss_rtd": 0.18107330799102783, - "loss_sent": 0.014808050356805325, - "loss_sod": 0.0342152938246727, - "loss_total": 0.23009665310382843, - "step": 368499 - }, - { - "epoch": 0.033, - "grad_norm": 0.6222190260887146, - "learning_rate": 9.959349655946527e-07, - "loss": 0.4016, - "step": 368500 - }, - { - "epoch": 0.033198, - "loss_gen": 6.249142169952393, - "loss_rtd": 0.22375130653381348, - "loss_sent": 0.11936704069375992, - "loss_sod": 0.059540338814258575, - "loss_total": 0.4026586711406708, - "step": 368599 - }, - { - "epoch": 0.033198, - "loss_gen": 5.763450622558594, - "loss_rtd": 0.20569217205047607, - "loss_sent": 0.18210792541503906, - "loss_sod": 0.035910092294216156, - "loss_total": 0.4237101972103119, - "step": 368599 - }, - { - "epoch": 0.0332, - "grad_norm": 0.7430477738380432, - "learning_rate": 9.896427153068045e-07, - "loss": 0.4256, - "step": 368600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.651984691619873, - "loss_rtd": 0.23930306732654572, - "loss_sent": 0.06848891824483871, - "loss_sod": 0.030633607879281044, - "loss_total": 0.3384256064891815, - "step": 368699 - }, - { - "epoch": 0.033398, - "loss_gen": 6.198563098907471, - "loss_rtd": 0.1870102882385254, - "loss_sent": 0.3745875656604767, - "loss_sod": 0.02009415253996849, - "loss_total": 0.5816919803619385, - "step": 368699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.0569384098052979, - "learning_rate": 9.833702063850037e-07, - "loss": 0.4194, - "step": 368700 - }, - { - "epoch": 0.033598, - "loss_gen": 5.391874313354492, - "loss_rtd": 0.16648748517036438, - "loss_sent": 0.0024607256054878235, - "loss_sod": 0.042681336402893066, - "loss_total": 0.21162953972816467, - "step": 368799 - }, - { - "epoch": 0.033598, - "loss_gen": 5.786269187927246, - "loss_rtd": 0.18479004502296448, - "loss_sent": 0.1572154462337494, - "loss_sod": 0.004926848225295544, - "loss_total": 0.3469323515892029, - "step": 368799 - }, - { - "epoch": 0.0336, - "grad_norm": 0.5583371520042419, - "learning_rate": 9.771174413558182e-07, - "loss": 0.4187, - "step": 368800 - }, - { - "epoch": 0.033798, - "loss_gen": 5.921062469482422, - "loss_rtd": 0.2207319587469101, - "loss_sent": 0.04883941635489464, - "loss_sod": 0.06598001718521118, - "loss_total": 0.3355514109134674, - "step": 368899 - }, - { - "epoch": 0.033798, - "loss_gen": 5.296463966369629, - "loss_rtd": 0.18052725493907928, - "loss_sent": 2.27016607823316e-05, - "loss_sod": 0.057295117527246475, - "loss_total": 0.2378450632095337, - "step": 368899 - }, - { - "epoch": 0.0338, - "grad_norm": 0.76303631067276, - "learning_rate": 9.708844227378666e-07, - "loss": 0.4373, - "step": 368900 - }, - { - "epoch": 0.033998, - "loss_gen": 5.713212966918945, - "loss_rtd": 0.20468173921108246, - "loss_sent": 0.21971869468688965, - "loss_sod": 0.050458066165447235, - "loss_total": 0.47485849261283875, - "step": 368999 - }, - { - "epoch": 0.033998, - "loss_gen": 5.200697422027588, - "loss_rtd": 0.15441444516181946, - "loss_sent": 0.010356126353144646, - "loss_sod": 0.09508315473794937, - "loss_total": 0.259853720664978, - "step": 368999 - }, - { - "epoch": 0.034, - "grad_norm": 1.424156665802002, - "learning_rate": 9.646711530418129e-07, - "loss": 0.4213, - "step": 369000 - }, - { - "epoch": 0.034, - "eval_loss": 0.39443323016166687, - "eval_runtime": 150.452, - "eval_samples_per_second": 102.644, - "eval_steps_per_second": 0.804, - "step": 369000 - }, - { - "epoch": 0.000198, - "loss_gen": 5.88716983795166, - "loss_rtd": 0.2215983122587204, - "loss_sent": 0.4346535801887512, - "loss_sod": 0.0489102266728878, - "loss_total": 0.7051621079444885, - "step": 369099 - }, - { - "epoch": 0.000198, - "loss_gen": 6.08715295791626, - "loss_rtd": 0.19842500984668732, - "loss_sent": 0.047167420387268066, - "loss_sod": 0.2069673240184784, - "loss_total": 0.45255976915359497, - "step": 369099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.6441986560821533, - "learning_rate": 9.584776347703496e-07, - "loss": 0.4048, - "step": 369100 - }, - { - "epoch": 0.000398, - "loss_gen": 5.984760284423828, - "loss_rtd": 0.19478578865528107, - "loss_sent": 0.0894625335931778, - "loss_sod": 0.04968101903796196, - "loss_total": 0.3339293599128723, - "step": 369199 - }, - { - "epoch": 0.000398, - "loss_gen": 6.103747844696045, - "loss_rtd": 0.20013120770454407, - "loss_sent": 0.1266428381204605, - "loss_sod": 0.07164319604635239, - "loss_total": 0.39841723442077637, - "step": 369199 - }, - { - "epoch": 0.0004, - "grad_norm": 0.8013847470283508, - "learning_rate": 9.523038704182308e-07, - "loss": 0.4229, - "step": 369200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.663417816162109, - "loss_rtd": 0.18251462280750275, - "loss_sent": 0.17311504483222961, - "loss_sod": 0.016081828624010086, - "loss_total": 0.37171149253845215, - "step": 369299 - }, - { - "epoch": 0.000598, - "loss_gen": 6.117463111877441, - "loss_rtd": 0.20723600685596466, - "loss_sent": 0.2576310634613037, - "loss_sod": 0.044517409056425095, - "loss_total": 0.5093845129013062, - "step": 369299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.0062775611877441, - "learning_rate": 9.461498624722509e-07, - "loss": 0.4167, - "step": 369300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.934455871582031, - "loss_rtd": 0.21126939356327057, - "loss_sent": 0.16447478532791138, - "loss_sod": 0.009031460620462894, - "loss_total": 0.38477563858032227, - "step": 369399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.673975467681885, - "loss_rtd": 0.18478532135486603, - "loss_sent": 0.13288885354995728, - "loss_sod": 0.021034542471170425, - "loss_total": 0.33870869874954224, - "step": 369399 - }, - { - "epoch": 0.0008, - "grad_norm": 0.9572023153305054, - "learning_rate": 9.400156134112326e-07, - "loss": 0.4103, - "step": 369400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.870675563812256, - "loss_rtd": 0.20630477368831635, - "loss_sent": 0.1055145338177681, - "loss_sod": 0.024161716923117638, - "loss_total": 0.33598101139068604, - "step": 369499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.971395492553711, - "loss_rtd": 0.22829517722129822, - "loss_sent": 0.15899527072906494, - "loss_sod": 0.013825427740812302, - "loss_total": 0.40111589431762695, - "step": 369499 - }, - { - "epoch": 0.001, - "grad_norm": 0.7026100754737854, - "learning_rate": 9.339011257060603e-07, - "loss": 0.425, - "step": 369500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.483034610748291, - "loss_rtd": 0.2005041539669037, - "loss_sent": 0.024152904748916626, - "loss_sod": 0.008470947854220867, - "loss_total": 0.23312801122665405, - "step": 369599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.93954610824585, - "loss_rtd": 0.1927112489938736, - "loss_sent": 0.08015337586402893, - "loss_sod": 0.037822175770998, - "loss_total": 0.31068679690361023, - "step": 369599 - }, - { - "epoch": 0.0012, - "grad_norm": 0.759127676486969, - "learning_rate": 9.278064018196475e-07, - "loss": 0.4097, - "step": 369600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.919787406921387, - "loss_rtd": 0.2028111219406128, - "loss_sent": 0.14415960013866425, - "loss_sod": 0.004172762390226126, - "loss_total": 0.351143479347229, - "step": 369699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.837738037109375, - "loss_rtd": 0.2000478059053421, - "loss_sent": 0.2426430881023407, - "loss_sod": 0.046445779502391815, - "loss_total": 0.489136666059494, - "step": 369699 - }, - { - "epoch": 0.0014, - "grad_norm": 1.3490941524505615, - "learning_rate": 9.217314442069524e-07, - "loss": 0.4247, - "step": 369700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.895453453063965, - "loss_rtd": 0.17332077026367188, - "loss_sent": 0.03327735885977745, - "loss_sod": 0.11678000539541245, - "loss_total": 0.32337814569473267, - "step": 369799 - }, - { - "epoch": 0.001598, - "loss_gen": 5.440080165863037, - "loss_rtd": 0.2166971117258072, - "loss_sent": 0.033422715961933136, - "loss_sod": 0.023754417896270752, - "loss_total": 0.2738742530345917, - "step": 369799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.8282711505889893, - "learning_rate": 9.156762553149567e-07, - "loss": 0.4147, - "step": 369800 - }, - { - "epoch": 0.001798, - "loss_gen": 6.211438179016113, - "loss_rtd": 0.19635449349880219, - "loss_sent": 0.2437390685081482, - "loss_sod": 0.06331045925617218, - "loss_total": 0.5034040212631226, - "step": 369899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.734776973724365, - "loss_rtd": 0.23625712096691132, - "loss_sent": 0.2243865430355072, - "loss_sod": 0.09715672582387924, - "loss_total": 0.5578004121780396, - "step": 369899 - }, - { - "epoch": 0.0018, - "grad_norm": 1.4983323812484741, - "learning_rate": 9.096408375826982e-07, - "loss": 0.4345, - "step": 369900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.560866832733154, - "loss_rtd": 0.1789192110300064, - "loss_sent": 0.02297811023890972, - "loss_sod": 0.07768049836158752, - "loss_total": 0.2795778214931488, - "step": 369999 - }, - { - "epoch": 0.001998, - "loss_gen": 6.039785861968994, - "loss_rtd": 0.22173763811588287, - "loss_sent": 0.22057120501995087, - "loss_sod": 0.010319402441382408, - "loss_total": 0.4526282548904419, - "step": 369999 - }, - { - "epoch": 0.002, - "grad_norm": 0.9138365983963013, - "learning_rate": 9.036251934412376e-07, - "loss": 0.4373, - "step": 370000 - }, - { - "epoch": 0.002, - "eval_loss": 0.3974721431732178, - "eval_runtime": 152.2824, - "eval_samples_per_second": 101.41, - "eval_steps_per_second": 0.795, - "step": 370000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.77457332611084, - "loss_rtd": 0.1922367513179779, - "loss_sent": 0.04758775234222412, - "loss_sod": 0.07165735960006714, - "loss_total": 0.31148186326026917, - "step": 370099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.326714515686035, - "loss_rtd": 0.1894858479499817, - "loss_sent": 2.274636244692374e-05, - "loss_sod": 0.08740726113319397, - "loss_total": 0.27691584825515747, - "step": 370099 - }, - { - "epoch": 0.0022, - "grad_norm": 1.0516712665557861, - "learning_rate": 8.97629325313687e-07, - "loss": 0.4203, - "step": 370100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.750786781311035, - "loss_rtd": 0.1920003443956375, - "loss_sent": 0.16336694359779358, - "loss_sod": 0.049515023827552795, - "loss_total": 0.4048823118209839, - "step": 370199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.883330345153809, - "loss_rtd": 0.19946648180484772, - "loss_sent": 0.4097523093223572, - "loss_sod": 0.006896537728607655, - "loss_total": 0.6161153316497803, - "step": 370199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.3667125701904297, - "learning_rate": 8.916532356151586e-07, - "loss": 0.4088, - "step": 370200 - }, - { - "epoch": 0.002598, - "loss_gen": 6.184383869171143, - "loss_rtd": 0.19715061783790588, - "loss_sent": 0.2926498055458069, - "loss_sod": 0.026086285710334778, - "loss_total": 0.5158867239952087, - "step": 370299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.670344829559326, - "loss_rtd": 0.16458524763584137, - "loss_sent": 0.04784877970814705, - "loss_sod": 0.0956805944442749, - "loss_total": 0.308114618062973, - "step": 370299 - }, - { - "epoch": 0.0026, - "grad_norm": 0.749548614025116, - "learning_rate": 8.856969267528436e-07, - "loss": 0.4216, - "step": 370300 - }, - { - "epoch": 0.002798, - "loss_gen": 5.911998271942139, - "loss_rtd": 0.23035354912281036, - "loss_sent": 0.1706063449382782, - "loss_sod": 0.11654205620288849, - "loss_total": 0.517501950263977, - "step": 370399 - }, - { - "epoch": 0.002798, - "loss_gen": 5.9849724769592285, - "loss_rtd": 0.22994734346866608, - "loss_sent": 0.14057643711566925, - "loss_sod": 0.014609819278120995, - "loss_total": 0.3851335942745209, - "step": 370399 - }, - { - "epoch": 0.0028, - "grad_norm": 1.3703480958938599, - "learning_rate": 8.797604011259287e-07, - "loss": 0.423, - "step": 370400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.987913131713867, - "loss_rtd": 0.21612316370010376, - "loss_sent": 0.3199959397315979, - "loss_sod": 0.03054291009902954, - "loss_total": 0.5666620135307312, - "step": 370499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.7482171058654785, - "loss_rtd": 0.21483327448368073, - "loss_sent": 0.31899651885032654, - "loss_sod": 0.01689985767006874, - "loss_total": 0.5507296323776245, - "step": 370499 - }, - { - "epoch": 0.003, - "grad_norm": 1.6967475414276123, - "learning_rate": 8.738436611256507e-07, - "loss": 0.4164, - "step": 370500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.9801201820373535, - "loss_rtd": 0.21836118400096893, - "loss_sent": 0.19397112727165222, - "loss_sod": 0.05779365077614784, - "loss_total": 0.4701259732246399, - "step": 370599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.831068992614746, - "loss_rtd": 0.20626583695411682, - "loss_sent": 0.0638139620423317, - "loss_sod": 0.0216029305011034, - "loss_total": 0.29168272018432617, - "step": 370599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9187771081924438, - "learning_rate": 8.6794670913527e-07, - "loss": 0.4056, - "step": 370600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.984137535095215, - "loss_rtd": 0.20997627079486847, - "loss_sent": 0.15081514418125153, - "loss_sod": 0.04709908366203308, - "loss_total": 0.4078904986381531, - "step": 370699 - }, - { - "epoch": 0.003398, - "loss_gen": 6.200585842132568, - "loss_rtd": 0.22599759697914124, - "loss_sent": 0.1308494210243225, - "loss_sod": 0.17154893279075623, - "loss_total": 0.52839595079422, - "step": 370699 - }, - { - "epoch": 0.0034, - "grad_norm": 1.126378059387207, - "learning_rate": 8.620695475300811e-07, - "loss": 0.4141, - "step": 370700 - }, - { - "epoch": 0.003598, - "loss_gen": 6.237166404724121, - "loss_rtd": 0.20681335031986237, - "loss_sent": 0.1380896270275116, - "loss_sod": 0.033758148550987244, - "loss_total": 0.3786611258983612, - "step": 370799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.950350761413574, - "loss_rtd": 0.1995602548122406, - "loss_sent": 0.12774446606636047, - "loss_sod": 0.02221403457224369, - "loss_total": 0.3495187759399414, - "step": 370799 - }, - { - "epoch": 0.0036, - "grad_norm": 0.5069965720176697, - "learning_rate": 8.562121786774013e-07, - "loss": 0.4106, - "step": 370800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.380465984344482, - "loss_rtd": 0.18398678302764893, - "loss_sent": 0.022619973868131638, - "loss_sod": 0.06271077692508698, - "loss_total": 0.26931753754615784, - "step": 370899 - }, - { - "epoch": 0.003798, - "loss_gen": 6.374415397644043, - "loss_rtd": 0.22577594220638275, - "loss_sent": 0.08491288870573044, - "loss_sod": 0.16143465042114258, - "loss_total": 0.47212350368499756, - "step": 370899 - }, - { - "epoch": 0.0038, - "grad_norm": 1.1972861289978027, - "learning_rate": 8.503746049365824e-07, - "loss": 0.4142, - "step": 370900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.563924312591553, - "loss_rtd": 0.19831405580043793, - "loss_sent": 0.08573966473340988, - "loss_sod": 0.01143189799040556, - "loss_total": 0.29548561573028564, - "step": 370999 - }, - { - "epoch": 0.003998, - "loss_gen": 6.10603141784668, - "loss_rtd": 0.2115442156791687, - "loss_sent": 0.25086724758148193, - "loss_sod": 0.04794318601489067, - "loss_total": 0.5103546380996704, - "step": 370999 - }, - { - "epoch": 0.004, - "grad_norm": 0.7185167074203491, - "learning_rate": 8.445568286589877e-07, - "loss": 0.4364, - "step": 371000 - }, - { - "epoch": 0.004, - "eval_loss": 0.398637056350708, - "eval_runtime": 150.0122, - "eval_samples_per_second": 102.945, - "eval_steps_per_second": 0.807, - "step": 371000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.839559555053711, - "loss_rtd": 0.2218700647354126, - "loss_sent": 0.359031081199646, - "loss_sod": 0.010356377810239792, - "loss_total": 0.5912575125694275, - "step": 371099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.853979110717773, - "loss_rtd": 0.19204586744308472, - "loss_sent": 0.2216770350933075, - "loss_sod": 0.0032190692145377398, - "loss_total": 0.41694197058677673, - "step": 371099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.2039587497711182, - "learning_rate": 8.387588521880263e-07, - "loss": 0.423, - "step": 371100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.858482837677002, - "loss_rtd": 0.20762097835540771, - "loss_sent": 0.07991106808185577, - "loss_sod": 0.031615227460861206, - "loss_total": 0.3191472887992859, - "step": 371199 - }, - { - "epoch": 0.004398, - "loss_gen": 5.80542516708374, - "loss_rtd": 0.21335211396217346, - "loss_sent": 0.20138762891292572, - "loss_sod": 0.00400786055251956, - "loss_total": 0.41874760389328003, - "step": 371199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.05449640750885, - "learning_rate": 8.329806778591299e-07, - "loss": 0.4141, - "step": 371200 - }, - { - "epoch": 0.004598, - "loss_gen": 6.135004043579102, - "loss_rtd": 0.22044090926647186, - "loss_sent": 0.8127871751785278, - "loss_sod": 0.02085859142243862, - "loss_total": 1.054086685180664, - "step": 371299 - }, - { - "epoch": 0.004598, - "loss_gen": 6.0553388595581055, - "loss_rtd": 0.20307990908622742, - "loss_sent": 0.2669093906879425, - "loss_sod": 0.016738833859562874, - "loss_total": 0.48672813177108765, - "step": 371299 - }, - { - "epoch": 0.0046, - "grad_norm": 2.1555721759796143, - "learning_rate": 8.272223079997255e-07, - "loss": 0.4306, - "step": 371300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.8579535484313965, - "loss_rtd": 0.21771731972694397, - "loss_sent": 0.04809056967496872, - "loss_sod": 0.09572092443704605, - "loss_total": 0.36152881383895874, - "step": 371399 - }, - { - "epoch": 0.004798, - "loss_gen": 5.682850360870361, - "loss_rtd": 0.17702673375606537, - "loss_sent": 0.06961818784475327, - "loss_sod": 0.02396053448319435, - "loss_total": 0.2706054449081421, - "step": 371399 - }, - { - "epoch": 0.0048, - "grad_norm": 0.9025712609291077, - "learning_rate": 8.214837449292967e-07, - "loss": 0.41, - "step": 371400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.280576705932617, - "loss_rtd": 0.14410962164402008, - "loss_sent": 0.003438427811488509, - "loss_sod": 0.034187521785497665, - "loss_total": 0.1817355751991272, - "step": 371499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.555894374847412, - "loss_rtd": 0.17416097223758698, - "loss_sent": 0.02043677493929863, - "loss_sod": 0.01033407635986805, - "loss_total": 0.2049318253993988, - "step": 371499 - }, - { - "epoch": 0.005, - "grad_norm": 0.6194052696228027, - "learning_rate": 8.157649909593335e-07, - "loss": 0.3958, - "step": 371500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.490361213684082, - "loss_rtd": 0.18433135747909546, - "loss_sent": 0.017026018351316452, - "loss_sod": 0.021255729719996452, - "loss_total": 0.2226131111383438, - "step": 371599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.757503032684326, - "loss_rtd": 0.19877558946609497, - "loss_sent": 0.2536308467388153, - "loss_sod": 0.006711670663207769, - "loss_total": 0.459118127822876, - "step": 371599 - }, - { - "epoch": 0.0052, - "grad_norm": 0.7384194731712341, - "learning_rate": 8.100660483933542e-07, - "loss": 0.4202, - "step": 371600 - }, - { - "epoch": 0.005398, - "loss_gen": 6.322305202484131, - "loss_rtd": 0.2076469212770462, - "loss_sent": 0.07007652521133423, - "loss_sod": 0.04098549485206604, - "loss_total": 0.31870895624160767, - "step": 371699 - }, - { - "epoch": 0.005398, - "loss_gen": 5.4590959548950195, - "loss_rtd": 0.1987762302160263, - "loss_sent": 2.4607086743344553e-05, - "loss_sod": 0.19191618263721466, - "loss_total": 0.3907170295715332, - "step": 371699 - }, - { - "epoch": 0.0054, - "grad_norm": 0.9139410257339478, - "learning_rate": 8.043869195268894e-07, - "loss": 0.4048, - "step": 371700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.7678728103637695, - "loss_rtd": 0.2182355672121048, - "loss_sent": 0.16884374618530273, - "loss_sod": 0.005624018609523773, - "loss_total": 0.3927033245563507, - "step": 371799 - }, - { - "epoch": 0.005598, - "loss_gen": 6.226258277893066, - "loss_rtd": 0.2252960205078125, - "loss_sent": 0.19111286103725433, - "loss_sod": 0.038502730429172516, - "loss_total": 0.45491158962249756, - "step": 371799 - }, - { - "epoch": 0.0056, - "grad_norm": 0.6736849546432495, - "learning_rate": 7.987276066474869e-07, - "loss": 0.4287, - "step": 371800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.072695255279541, - "loss_rtd": 0.15994885563850403, - "loss_sent": 2.2642059775535017e-05, - "loss_sod": 0.13835397362709045, - "loss_total": 0.29832547903060913, - "step": 371899 - }, - { - "epoch": 0.005798, - "loss_gen": 5.742897033691406, - "loss_rtd": 0.19277235865592957, - "loss_sent": 0.07018637657165527, - "loss_sod": 0.018538443371653557, - "loss_total": 0.28149718046188354, - "step": 371899 - }, - { - "epoch": 0.0058, - "grad_norm": 1.0694468021392822, - "learning_rate": 7.930881120347178e-07, - "loss": 0.4143, - "step": 371900 - }, - { - "epoch": 0.005998, - "loss_gen": 5.143819808959961, - "loss_rtd": 0.14488475024700165, - "loss_sent": 0.06838599592447281, - "loss_sod": 0.03661201149225235, - "loss_total": 0.2498827576637268, - "step": 371999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.919182777404785, - "loss_rtd": 0.23770809173583984, - "loss_sent": 0.13715243339538574, - "loss_sod": 0.03412795066833496, - "loss_total": 0.40898847579956055, - "step": 371999 - }, - { - "epoch": 0.006, - "grad_norm": 0.8738687038421631, - "learning_rate": 7.874684379601759e-07, - "loss": 0.4084, - "step": 372000 - }, - { - "epoch": 0.006, - "eval_loss": 0.39766815304756165, - "eval_runtime": 150.0059, - "eval_samples_per_second": 102.949, - "eval_steps_per_second": 0.807, - "step": 372000 - }, - { - "epoch": 0.006198, - "loss_gen": 6.289210796356201, - "loss_rtd": 0.23419596254825592, - "loss_sent": 0.1906905472278595, - "loss_sod": 0.11558223515748978, - "loss_total": 0.5404687523841858, - "step": 372099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.580062389373779, - "loss_rtd": 0.2032138556241989, - "loss_sent": 0.09807055443525314, - "loss_sod": 0.04224370792508125, - "loss_total": 0.3435281217098236, - "step": 372099 - }, - { - "epoch": 0.0062, - "grad_norm": 1.2346962690353394, - "learning_rate": 7.818685866874676e-07, - "loss": 0.4101, - "step": 372100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.849940776824951, - "loss_rtd": 0.20512159168720245, - "loss_sent": 0.20169490575790405, - "loss_sod": 0.04118049889802933, - "loss_total": 0.44799700379371643, - "step": 372199 - }, - { - "epoch": 0.006398, - "loss_gen": 5.919855117797852, - "loss_rtd": 0.2348971962928772, - "loss_sent": 0.0944678857922554, - "loss_sod": 0.06624012440443039, - "loss_total": 0.395605206489563, - "step": 372199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.6020902395248413, - "learning_rate": 7.762885604721993e-07, - "loss": 0.4346, - "step": 372200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.477977275848389, - "loss_rtd": 0.17749889194965363, - "loss_sent": 0.09746012836694717, - "loss_sod": 0.017157068476080894, - "loss_total": 0.29211607575416565, - "step": 372299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.660252094268799, - "loss_rtd": 0.20052680373191833, - "loss_sent": 0.0274093858897686, - "loss_sod": 0.051001738756895065, - "loss_total": 0.2789379060268402, - "step": 372299 - }, - { - "epoch": 0.0066, - "grad_norm": 0.9118999242782593, - "learning_rate": 7.70728361562012e-07, - "loss": 0.4052, - "step": 372300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.358914375305176, - "loss_rtd": 0.16730189323425293, - "loss_sent": 0.020632697269320488, - "loss_sod": 0.11295046657323837, - "loss_total": 0.30088505148887634, - "step": 372399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.22660493850708, - "loss_rtd": 0.17281118035316467, - "loss_sent": 2.5519548216834664e-05, - "loss_sod": 0.16093236207962036, - "loss_total": 0.3337690532207489, - "step": 372399 - }, - { - "epoch": 0.0068, - "grad_norm": 1.1899315118789673, - "learning_rate": 7.651879921965588e-07, - "loss": 0.4135, - "step": 372400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.4811506271362305, - "loss_rtd": 0.17571194469928741, - "loss_sent": 0.09175330400466919, - "loss_sod": 0.021479532122612, - "loss_total": 0.2889447808265686, - "step": 372499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.855925559997559, - "loss_rtd": 0.20085661113262177, - "loss_sent": 0.22879676520824432, - "loss_sod": 0.027214793488383293, - "loss_total": 0.45686817169189453, - "step": 372499 - }, - { - "epoch": 0.007, - "grad_norm": 1.073201060295105, - "learning_rate": 7.596674546074878e-07, - "loss": 0.4049, - "step": 372500 - }, - { - "epoch": 0.007198, - "loss_gen": 5.647804260253906, - "loss_rtd": 0.17650999128818512, - "loss_sent": 0.03875567764043808, - "loss_sod": 0.05519890785217285, - "loss_total": 0.27046456933021545, - "step": 372599 - }, - { - "epoch": 0.007198, - "loss_gen": 6.038423538208008, - "loss_rtd": 0.19966991245746613, - "loss_sent": 0.13861516118049622, - "loss_sod": 0.014357440173625946, - "loss_total": 0.3526425063610077, - "step": 372599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.722663402557373, - "learning_rate": 7.541667510184813e-07, - "loss": 0.4187, - "step": 372600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.560591697692871, - "loss_rtd": 0.19094672799110413, - "loss_sent": 0.0033574746921658516, - "loss_sod": 0.11088939011096954, - "loss_total": 0.30519360303878784, - "step": 372699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.180716514587402, - "loss_rtd": 0.1749231368303299, - "loss_sent": 2.4763548935879953e-05, - "loss_sod": 0.2965400516986847, - "loss_total": 0.47148796916007996, - "step": 372699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.639691948890686, - "learning_rate": 7.48685883645217e-07, - "loss": 0.4133, - "step": 372700 - }, - { - "epoch": 0.007598, - "loss_gen": 5.789942264556885, - "loss_rtd": 0.199631467461586, - "loss_sent": 0.2081708163022995, - "loss_sod": 0.04268084093928337, - "loss_total": 0.45048314332962036, - "step": 372799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.569168567657471, - "loss_rtd": 0.1726132482290268, - "loss_sent": 0.018431365489959717, - "loss_sod": 0.08531937748193741, - "loss_total": 0.2763639986515045, - "step": 372799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.9298040270805359, - "learning_rate": 7.432248546953902e-07, - "loss": 0.41, - "step": 372800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.711914539337158, - "loss_rtd": 0.2136341780424118, - "loss_sent": 0.10143914073705673, - "loss_sod": 0.061825983226299286, - "loss_total": 0.3768993020057678, - "step": 372899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.920680046081543, - "loss_rtd": 0.21260219812393188, - "loss_sent": 0.07651954144239426, - "loss_sod": 0.027535876259207726, - "loss_total": 0.3166576027870178, - "step": 372899 - }, - { - "epoch": 0.0078, - "grad_norm": 0.8316558599472046, - "learning_rate": 7.377836663687021e-07, - "loss": 0.4021, - "step": 372900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.0966668128967285, - "loss_rtd": 0.16073346138000488, - "loss_sent": 2.7389023671275936e-05, - "loss_sod": 0.02465459704399109, - "loss_total": 0.18541544675827026, - "step": 372999 - }, - { - "epoch": 0.007998, - "loss_gen": 5.805216312408447, - "loss_rtd": 0.2233431339263916, - "loss_sent": 0.22811299562454224, - "loss_sod": 0.05292152613401413, - "loss_total": 0.5043776631355286, - "step": 372999 - }, - { - "epoch": 0.008, - "grad_norm": 0.739676296710968, - "learning_rate": 7.323623208568608e-07, - "loss": 0.403, - "step": 373000 - }, - { - "epoch": 0.008, - "eval_loss": 0.3985455334186554, - "eval_runtime": 150.3411, - "eval_samples_per_second": 102.72, - "eval_steps_per_second": 0.805, - "step": 373000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.874575138092041, - "loss_rtd": 0.21611829102039337, - "loss_sent": 0.298623263835907, - "loss_sod": 0.04416034370660782, - "loss_total": 0.5589019060134888, - "step": 373099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.559823989868164, - "loss_rtd": 0.2269548773765564, - "loss_sent": 0.4483194649219513, - "loss_sod": 0.03525351360440254, - "loss_total": 0.7105278968811035, - "step": 373099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.5289615392684937, - "learning_rate": 7.269608203435807e-07, - "loss": 0.4188, - "step": 373100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.941773414611816, - "loss_rtd": 0.19337964057922363, - "loss_sent": 0.031218715012073517, - "loss_sod": 0.1129196435213089, - "loss_total": 0.33751797676086426, - "step": 373199 - }, - { - "epoch": 0.008398, - "loss_gen": 6.265268802642822, - "loss_rtd": 0.2057289332151413, - "loss_sent": 0.10668730735778809, - "loss_sod": 0.028574064373970032, - "loss_total": 0.3409903049468994, - "step": 373199 - }, - { - "epoch": 0.0084, - "grad_norm": 0.8747782707214355, - "learning_rate": 7.21579167004599e-07, - "loss": 0.4255, - "step": 373200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.486286640167236, - "loss_rtd": 0.17256313562393188, - "loss_sent": 0.029097484424710274, - "loss_sod": 0.08437571674585342, - "loss_total": 0.28603634238243103, - "step": 373299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.839051723480225, - "loss_rtd": 0.19659079611301422, - "loss_sent": 0.060338344424963, - "loss_sod": 0.053871020674705505, - "loss_total": 0.310800164937973, - "step": 373299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.8155134916305542, - "learning_rate": 7.162173630076263e-07, - "loss": 0.405, - "step": 373300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.77953577041626, - "loss_rtd": 0.21442870795726776, - "loss_sent": 0.28438112139701843, - "loss_sod": 0.023305360227823257, - "loss_total": 0.522115170955658, - "step": 373399 - }, - { - "epoch": 0.008798, - "loss_gen": 5.832570552825928, - "loss_rtd": 0.19995197653770447, - "loss_sent": 0.3022240698337555, - "loss_sod": 0.014634872786700726, - "loss_total": 0.5168108940124512, - "step": 373399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.0309109687805176, - "learning_rate": 7.108754105124127e-07, - "loss": 0.3979, - "step": 373400 - }, - { - "epoch": 0.008998, - "loss_gen": 5.288453102111816, - "loss_rtd": 0.16407644748687744, - "loss_sent": 5.9396268625278026e-05, - "loss_sod": 0.02640312723815441, - "loss_total": 0.19053897261619568, - "step": 373499 - }, - { - "epoch": 0.008998, - "loss_gen": 6.096435546875, - "loss_rtd": 0.1909773051738739, - "loss_sent": 0.23970675468444824, - "loss_sod": 0.008967695757746696, - "loss_total": 0.439651757478714, - "step": 373499 - }, - { - "epoch": 0.009, - "grad_norm": 0.7173610329627991, - "learning_rate": 7.055533116706869e-07, - "loss": 0.414, - "step": 373500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.57069206237793, - "loss_rtd": 0.17872034013271332, - "loss_sent": 0.11318963766098022, - "loss_sod": 0.0892697349190712, - "loss_total": 0.38117972016334534, - "step": 373599 - }, - { - "epoch": 0.009198, - "loss_gen": 6.252004146575928, - "loss_rtd": 0.19231130182743073, - "loss_sent": 0.12317709624767303, - "loss_sod": 0.09741857647895813, - "loss_total": 0.4129070043563843, - "step": 373599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.621515154838562, - "learning_rate": 7.002510686261898e-07, - "loss": 0.4194, - "step": 373600 - }, - { - "epoch": 0.009398, - "loss_gen": 6.404895305633545, - "loss_rtd": 0.2336234748363495, - "loss_sent": 0.0452134944498539, - "loss_sod": 0.04235793277621269, - "loss_total": 0.3211948871612549, - "step": 373699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.9691290855407715, - "loss_rtd": 0.2142149955034256, - "loss_sent": 0.3562409579753876, - "loss_sod": 0.016476528719067574, - "loss_total": 0.5869324803352356, - "step": 373699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.2180225849151611, - "learning_rate": 6.949686835146685e-07, - "loss": 0.4128, - "step": 373700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.945064067840576, - "loss_rtd": 0.20639783143997192, - "loss_sent": 0.33897995948791504, - "loss_sod": 0.1643020063638687, - "loss_total": 0.7096797823905945, - "step": 373799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.601099491119385, - "loss_rtd": 0.20600178837776184, - "loss_sent": 0.1476443111896515, - "loss_sod": 0.05563286691904068, - "loss_total": 0.4092789590358734, - "step": 373799 - }, - { - "epoch": 0.0096, - "grad_norm": 1.3141870498657227, - "learning_rate": 6.897061584638654e-07, - "loss": 0.4031, - "step": 373800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.614050388336182, - "loss_rtd": 0.17569977045059204, - "loss_sent": 0.05042315647006035, - "loss_sod": 0.03975456953048706, - "loss_total": 0.26587748527526855, - "step": 373899 - }, - { - "epoch": 0.009798, - "loss_gen": 6.103392601013184, - "loss_rtd": 0.2152901589870453, - "loss_sent": 0.15932482481002808, - "loss_sod": 0.014758851379156113, - "loss_total": 0.3893738389015198, - "step": 373899 - }, - { - "epoch": 0.0098, - "grad_norm": 0.6234253644943237, - "learning_rate": 6.844634955935292e-07, - "loss": 0.4201, - "step": 373900 - }, - { - "epoch": 0.009998, - "loss_gen": 5.901730060577393, - "loss_rtd": 0.19243325293064117, - "loss_sent": 0.26281651854515076, - "loss_sod": 0.07989338785409927, - "loss_total": 0.5351431369781494, - "step": 373999 - }, - { - "epoch": 0.009998, - "loss_gen": 6.170499324798584, - "loss_rtd": 0.20321491360664368, - "loss_sent": 0.08104973286390305, - "loss_sod": 0.07737652212381363, - "loss_total": 0.36164116859436035, - "step": 373999 - }, - { - "epoch": 0.01, - "grad_norm": 1.4586005210876465, - "learning_rate": 6.79240697015393e-07, - "loss": 0.4122, - "step": 374000 - }, - { - "epoch": 0.01, - "eval_loss": 0.39439496397972107, - "eval_runtime": 149.9884, - "eval_samples_per_second": 102.961, - "eval_steps_per_second": 0.807, - "step": 374000 - }, - { - "epoch": 0.010198, - "loss_gen": 5.722848892211914, - "loss_rtd": 0.21253347396850586, - "loss_sent": 0.41837382316589355, - "loss_sod": 0.0029890353325754404, - "loss_total": 0.6338963508605957, - "step": 374099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.7291340827941895, - "loss_rtd": 0.20271934568881989, - "loss_sent": 0.08784540742635727, - "loss_sod": 0.011821900494396687, - "loss_total": 0.30238664150238037, - "step": 374099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.226920247077942, - "learning_rate": 6.740377648332075e-07, - "loss": 0.395, - "step": 374100 - }, - { - "epoch": 0.010398, - "loss_gen": 6.0751566886901855, - "loss_rtd": 0.2085796594619751, - "loss_sent": 0.1787007749080658, - "loss_sod": 0.021359167993068695, - "loss_total": 0.4086396098136902, - "step": 374199 - }, - { - "epoch": 0.010398, - "loss_gen": 5.632849216461182, - "loss_rtd": 0.22320057451725006, - "loss_sent": 0.3246040940284729, - "loss_sod": 0.0007886893581598997, - "loss_total": 0.5485933423042297, - "step": 374199 - }, - { - "epoch": 0.0104, - "grad_norm": 1.323475956916809, - "learning_rate": 6.688547011427126e-07, - "loss": 0.4088, - "step": 374200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.7162184715271, - "loss_rtd": 0.21202796697616577, - "loss_sent": 0.22131867706775665, - "loss_sod": 0.03446045145392418, - "loss_total": 0.4678071141242981, - "step": 374299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.85204553604126, - "loss_rtd": 0.2098681926727295, - "loss_sent": 0.3486562669277191, - "loss_sod": 0.011006612330675125, - "loss_total": 0.5695310831069946, - "step": 374299 - }, - { - "epoch": 0.0106, - "grad_norm": 1.8341209888458252, - "learning_rate": 6.636915080316442e-07, - "loss": 0.4208, - "step": 374300 - }, - { - "epoch": 0.010798, - "loss_gen": 5.68907356262207, - "loss_rtd": 0.20671889185905457, - "loss_sent": 0.2256435751914978, - "loss_sod": 0.0048705353401601315, - "loss_total": 0.43723300099372864, - "step": 374399 - }, - { - "epoch": 0.010798, - "loss_gen": 6.107625961303711, - "loss_rtd": 0.20792579650878906, - "loss_sent": 0.6907059550285339, - "loss_sod": 0.028999656438827515, - "loss_total": 0.9276313781738281, - "step": 374399 - }, - { - "epoch": 0.0108, - "grad_norm": 4.132088661193848, - "learning_rate": 6.585481875797384e-07, - "loss": 0.419, - "step": 374400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.786088943481445, - "loss_rtd": 0.21682004630565643, - "loss_sent": 0.2618052065372467, - "loss_sod": 0.007072009611874819, - "loss_total": 0.48569726943969727, - "step": 374499 - }, - { - "epoch": 0.010998, - "loss_gen": 5.653449535369873, - "loss_rtd": 0.20159128308296204, - "loss_sent": 0.18401367962360382, - "loss_sod": 0.09294983744621277, - "loss_total": 0.47855478525161743, - "step": 374499 - }, - { - "epoch": 0.011, - "grad_norm": 1.2994540929794312, - "learning_rate": 6.534247418587158e-07, - "loss": 0.3971, - "step": 374500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.844245433807373, - "loss_rtd": 0.21887655556201935, - "loss_sent": 0.1843392550945282, - "loss_sod": 0.056441012769937515, - "loss_total": 0.45965683460235596, - "step": 374599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.890125274658203, - "loss_rtd": 0.18784134089946747, - "loss_sent": 0.2701111137866974, - "loss_sod": 0.04189835861325264, - "loss_total": 0.4998508095741272, - "step": 374599 - }, - { - "epoch": 0.0112, - "grad_norm": 1.931838870048523, - "learning_rate": 6.483211729323091e-07, - "loss": 0.4132, - "step": 374600 - }, - { - "epoch": 0.011398, - "loss_gen": 5.651385307312012, - "loss_rtd": 0.17697350680828094, - "loss_sent": 0.004192035179585218, - "loss_sod": 0.06596556305885315, - "loss_total": 0.2471311092376709, - "step": 374699 - }, - { - "epoch": 0.011398, - "loss_gen": 6.000596046447754, - "loss_rtd": 0.2265426516532898, - "loss_sent": 0.17024017870426178, - "loss_sod": 0.06462591886520386, - "loss_total": 0.46140873432159424, - "step": 374699 - }, - { - "epoch": 0.0114, - "grad_norm": 0.8797689080238342, - "learning_rate": 6.432374828562405e-07, - "loss": 0.3966, - "step": 374700 - }, - { - "epoch": 0.011598, - "loss_gen": 5.8981475830078125, - "loss_rtd": 0.21112960577011108, - "loss_sent": 0.09434548765420914, - "loss_sod": 0.056651681661605835, - "loss_total": 0.36212676763534546, - "step": 374799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.834134578704834, - "loss_rtd": 0.20715786516666412, - "loss_sent": 0.2500268816947937, - "loss_sod": 0.012542951852083206, - "loss_total": 0.46972769498825073, - "step": 374799 - }, - { - "epoch": 0.0116, - "grad_norm": 0.8593697547912598, - "learning_rate": 6.381736736781996e-07, - "loss": 0.4295, - "step": 374800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.788888454437256, - "loss_rtd": 0.19422529637813568, - "loss_sent": 0.10145033150911331, - "loss_sod": 0.031490758061409, - "loss_total": 0.3271663784980774, - "step": 374899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.987154960632324, - "loss_rtd": 0.210292786359787, - "loss_sent": 0.19489161670207977, - "loss_sod": 0.0206521637737751, - "loss_total": 0.42583656311035156, - "step": 374899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.7297804951667786, - "learning_rate": 6.33129747437905e-07, - "loss": 0.4093, - "step": 374900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.484149932861328, - "loss_rtd": 0.18677547574043274, - "loss_sent": 0.0006474746041931212, - "loss_sod": 0.1446777582168579, - "loss_total": 0.3321007192134857, - "step": 374999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.319724082946777, - "loss_rtd": 0.16887767612934113, - "loss_sent": 0.06040981039404869, - "loss_sod": 0.04936861991882324, - "loss_total": 0.27865609526634216, - "step": 374999 - }, - { - "epoch": 0.012, - "grad_norm": 1.015921950340271, - "learning_rate": 6.281057061670425e-07, - "loss": 0.4053, - "step": 375000 - }, - { - "epoch": 0.012, - "eval_loss": 0.39766398072242737, - "eval_runtime": 150.2669, - "eval_samples_per_second": 102.77, - "eval_steps_per_second": 0.805, - "step": 375000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.15659236907959, - "loss_rtd": 0.21070319414138794, - "loss_sent": 0.0696774274110794, - "loss_sod": 0.05396242439746857, - "loss_total": 0.3343430161476135, - "step": 375099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.826860427856445, - "loss_rtd": 0.20759032666683197, - "loss_sent": 0.06363599002361298, - "loss_sod": 0.07944272458553314, - "loss_total": 0.3506690263748169, - "step": 375099 - }, - { - "epoch": 0.0122, - "grad_norm": 1.0957943201065063, - "learning_rate": 6.231015518892991e-07, - "loss": 0.4127, - "step": 375100 - }, - { - "epoch": 0.012398, - "loss_gen": 6.238338470458984, - "loss_rtd": 0.22129666805267334, - "loss_sent": 0.17585736513137817, - "loss_sod": 0.02833162620663643, - "loss_total": 0.42548567056655884, - "step": 375199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.958853721618652, - "loss_rtd": 0.2143506556749344, - "loss_sent": 0.3217431306838989, - "loss_sod": 0.018301580101251602, - "loss_total": 0.5543953776359558, - "step": 375199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.0528647899627686, - "learning_rate": 6.181172866203455e-07, - "loss": 0.4109, - "step": 375200 - }, - { - "epoch": 0.012598, - "loss_gen": 6.195026397705078, - "loss_rtd": 0.2206852287054062, - "loss_sent": 0.06826501339673996, - "loss_sod": 0.019596481695771217, - "loss_total": 0.3085467219352722, - "step": 375299 - }, - { - "epoch": 0.012598, - "loss_gen": 5.812566757202148, - "loss_rtd": 0.2109343707561493, - "loss_sent": 0.12104281038045883, - "loss_sod": 0.012759190052747726, - "loss_total": 0.34473636746406555, - "step": 375299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.4340285062789917, - "learning_rate": 6.131529123678426e-07, - "loss": 0.4193, - "step": 375300 - }, - { - "epoch": 0.012798, - "loss_gen": 6.311695575714111, - "loss_rtd": 0.18870778381824493, - "loss_sent": 0.10390974581241608, - "loss_sod": 0.006855587009340525, - "loss_total": 0.2994731068611145, - "step": 375399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.540896892547607, - "loss_rtd": 0.18843285739421844, - "loss_sent": 0.01570253260433674, - "loss_sod": 0.03287762776017189, - "loss_total": 0.23701301217079163, - "step": 375399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.8500491976737976, - "learning_rate": 6.082084311314407e-07, - "loss": 0.432, - "step": 375400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.719391822814941, - "loss_rtd": 0.19244983792304993, - "loss_sent": 0.06878488510847092, - "loss_sod": 0.05827927961945534, - "loss_total": 0.3195140063762665, - "step": 375499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.634114742279053, - "loss_rtd": 0.23059429228305817, - "loss_sent": 0.0931749939918518, - "loss_sod": 0.020263686776161194, - "loss_total": 0.34403297305107117, - "step": 375499 - }, - { - "epoch": 0.013, - "grad_norm": 0.8408635258674622, - "learning_rate": 6.0328384490278e-07, - "loss": 0.4256, - "step": 375500 - }, - { - "epoch": 0.013198, - "loss_gen": 5.912682056427002, - "loss_rtd": 0.1841646283864975, - "loss_sent": 0.35598501563072205, - "loss_sod": 0.08835349977016449, - "loss_total": 0.628503143787384, - "step": 375599 - }, - { - "epoch": 0.013198, - "loss_gen": 5.978342533111572, - "loss_rtd": 0.20006583631038666, - "loss_sent": 0.34124019742012024, - "loss_sod": 0.011476047337055206, - "loss_total": 0.5527820587158203, - "step": 375599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.6812154054641724, - "learning_rate": 5.983791556654739e-07, - "loss": 0.4248, - "step": 375600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.824143886566162, - "loss_rtd": 0.21374043822288513, - "loss_sent": 0.2805253565311432, - "loss_sod": 0.013898611068725586, - "loss_total": 0.5081644058227539, - "step": 375699 - }, - { - "epoch": 0.013398, - "loss_gen": 6.045247554779053, - "loss_rtd": 0.21502593159675598, - "loss_sent": 0.3159756660461426, - "loss_sod": 0.014830069616436958, - "loss_total": 0.5458316802978516, - "step": 375699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.2717933654785156, - "learning_rate": 5.934943653951363e-07, - "loss": 0.4235, - "step": 375700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.889509677886963, - "loss_rtd": 0.2240026444196701, - "loss_sent": 0.05575236305594444, - "loss_sod": 0.05731187015771866, - "loss_total": 0.3370668888092041, - "step": 375799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.970902919769287, - "loss_rtd": 0.20444510877132416, - "loss_sent": 0.059766512364149094, - "loss_sod": 0.05850087106227875, - "loss_total": 0.3227124810218811, - "step": 375799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.0538915395736694, - "learning_rate": 5.886294760593602e-07, - "loss": 0.4027, - "step": 375800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.686825275421143, - "loss_rtd": 0.21245726943016052, - "loss_sent": 0.415727436542511, - "loss_sod": 0.0053630247712135315, - "loss_total": 0.6335477232933044, - "step": 375899 - }, - { - "epoch": 0.013798, - "loss_gen": 6.145212173461914, - "loss_rtd": 0.20726776123046875, - "loss_sent": 0.14269645512104034, - "loss_sod": 0.0076035140082240105, - "loss_total": 0.3575677275657654, - "step": 375899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.8053621053695679, - "learning_rate": 5.837844896177225e-07, - "loss": 0.4129, - "step": 375900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.996018409729004, - "loss_rtd": 0.19898325204849243, - "loss_sent": 0.10224363952875137, - "loss_sod": 0.1120859682559967, - "loss_total": 0.4133128523826599, - "step": 375999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.66213321685791, - "loss_rtd": 0.21605584025382996, - "loss_sent": 0.1493690013885498, - "loss_sod": 0.03288176283240318, - "loss_total": 0.39830660820007324, - "step": 375999 - }, - { - "epoch": 0.014, - "grad_norm": 1.5464924573898315, - "learning_rate": 5.789594080217842e-07, - "loss": 0.4238, - "step": 376000 - }, - { - "epoch": 0.014, - "eval_loss": 0.39667361974716187, - "eval_runtime": 150.1327, - "eval_samples_per_second": 102.862, - "eval_steps_per_second": 0.806, - "step": 376000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.963353633880615, - "loss_rtd": 0.2136533409357071, - "loss_sent": 0.11979737132787704, - "loss_sod": 0.022669518366456032, - "loss_total": 0.356120228767395, - "step": 376099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.6367411613464355, - "loss_rtd": 0.20582905411720276, - "loss_sent": 0.11848904937505722, - "loss_sod": 0.057092249393463135, - "loss_total": 0.3814103603363037, - "step": 376099 - }, - { - "epoch": 0.0142, - "grad_norm": 0.599355161190033, - "learning_rate": 5.741542332150851e-07, - "loss": 0.4058, - "step": 376100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.652035236358643, - "loss_rtd": 0.21074751019477844, - "loss_sent": 0.2718053460121155, - "loss_sod": 0.014466974884271622, - "loss_total": 0.49701982736587524, - "step": 376199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.715115070343018, - "loss_rtd": 0.21487551927566528, - "loss_sent": 0.03479180857539177, - "loss_sod": 0.003185899928212166, - "loss_total": 0.2528532147407532, - "step": 376199 - }, - { - "epoch": 0.0144, - "grad_norm": 1.0825930833816528, - "learning_rate": 5.693689671331548e-07, - "loss": 0.4034, - "step": 376200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.825916290283203, - "loss_rtd": 0.19891230762004852, - "loss_sent": 0.12521834671497345, - "loss_sod": 0.0035258494317531586, - "loss_total": 0.32765650749206543, - "step": 376299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.987072944641113, - "loss_rtd": 0.20104490220546722, - "loss_sent": 0.05299517884850502, - "loss_sod": 0.08359536528587341, - "loss_total": 0.33763542771339417, - "step": 376299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.9456308484077454, - "learning_rate": 5.646036117034847e-07, - "loss": 0.4399, - "step": 376300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.766575813293457, - "loss_rtd": 0.1912553310394287, - "loss_sent": 0.2122780978679657, - "loss_sod": 0.03363974392414093, - "loss_total": 0.43717318773269653, - "step": 376399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.739385604858398, - "loss_rtd": 0.2020392119884491, - "loss_sent": 0.057260312139987946, - "loss_sod": 0.04135715588927269, - "loss_total": 0.30065667629241943, - "step": 376399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.8377069234848022, - "learning_rate": 5.598581688455729e-07, - "loss": 0.4127, - "step": 376400 - }, - { - "epoch": 0.014998, - "loss_gen": 6.154478073120117, - "loss_rtd": 0.2175951898097992, - "loss_sent": 0.12526096403598785, - "loss_sod": 0.12082574516534805, - "loss_total": 0.4636818766593933, - "step": 376499 - }, - { - "epoch": 0.014998, - "loss_gen": 6.048160552978516, - "loss_rtd": 0.2201756089925766, - "loss_sent": 0.16244281828403473, - "loss_sod": 0.017377931624650955, - "loss_total": 0.3999963402748108, - "step": 376499 - }, - { - "epoch": 0.015, - "grad_norm": 1.3159281015396118, - "learning_rate": 5.55132640470879e-07, - "loss": 0.4078, - "step": 376500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.822164058685303, - "loss_rtd": 0.21577589213848114, - "loss_sent": 0.23769521713256836, - "loss_sod": 0.011649082414805889, - "loss_total": 0.46512019634246826, - "step": 376599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.9925079345703125, - "loss_rtd": 0.18235640227794647, - "loss_sent": 0.1412941962480545, - "loss_sod": 0.0333288311958313, - "loss_total": 0.3569794297218323, - "step": 376599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.7923346161842346, - "learning_rate": 5.504270284828417e-07, - "loss": 0.4225, - "step": 376600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.866918563842773, - "loss_rtd": 0.2069401741027832, - "loss_sent": 0.19980648159980774, - "loss_sod": 0.013730566948652267, - "loss_total": 0.4204772114753723, - "step": 376699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.525289535522461, - "loss_rtd": 0.1726590096950531, - "loss_sent": 0.07154635339975357, - "loss_sod": 0.053593751043081284, - "loss_total": 0.29779911041259766, - "step": 376699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.9670600891113281, - "learning_rate": 5.457413347768891e-07, - "loss": 0.4209, - "step": 376700 - }, - { - "epoch": 0.015598, - "loss_gen": 6.064262866973877, - "loss_rtd": 0.21177524328231812, - "loss_sent": 0.10027604550123215, - "loss_sod": 0.12483374774456024, - "loss_total": 0.4368850588798523, - "step": 376799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.948376655578613, - "loss_rtd": 0.2219124734401703, - "loss_sent": 0.08038464933633804, - "loss_sod": 0.19572797417640686, - "loss_total": 0.4980250895023346, - "step": 376799 - }, - { - "epoch": 0.0156, - "grad_norm": 1.2898521423339844, - "learning_rate": 5.410755612404061e-07, - "loss": 0.4191, - "step": 376800 - }, - { - "epoch": 0.015798, - "loss_gen": 5.998179912567139, - "loss_rtd": 0.19214703142642975, - "loss_sent": 0.09091558307409286, - "loss_sod": 0.0075148604810237885, - "loss_total": 0.2905774712562561, - "step": 376899 - }, - { - "epoch": 0.015798, - "loss_gen": 6.530083656311035, - "loss_rtd": 0.22096644341945648, - "loss_sent": 0.040187615901231766, - "loss_sod": 0.08871316909790039, - "loss_total": 0.3498672544956207, - "step": 376899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.7704149484634399, - "learning_rate": 5.364297097527781e-07, - "loss": 0.3964, - "step": 376900 - }, - { - "epoch": 0.015998, - "loss_gen": 5.968879222869873, - "loss_rtd": 0.21126744151115417, - "loss_sent": 0.23530399799346924, - "loss_sod": 0.03537895902991295, - "loss_total": 0.48195040225982666, - "step": 376999 - }, - { - "epoch": 0.015998, - "loss_gen": 6.198812484741211, - "loss_rtd": 0.22787810862064362, - "loss_sent": 0.30264151096343994, - "loss_sod": 0.14800915122032166, - "loss_total": 0.6785287857055664, - "step": 376999 - }, - { - "epoch": 0.016, - "grad_norm": 1.2362818717956543, - "learning_rate": 5.318037821853417e-07, - "loss": 0.4005, - "step": 377000 - }, - { - "epoch": 0.016, - "eval_loss": 0.3953685462474823, - "eval_runtime": 150.2873, - "eval_samples_per_second": 102.756, - "eval_steps_per_second": 0.805, - "step": 377000 - }, - { - "epoch": 0.016198, - "loss_gen": 6.084448337554932, - "loss_rtd": 0.1892758458852768, - "loss_sent": 0.09442143887281418, - "loss_sod": 0.022943057119846344, - "loss_total": 0.3066403567790985, - "step": 377099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.924466609954834, - "loss_rtd": 0.2213367074728012, - "loss_sent": 0.26644495129585266, - "loss_sod": 0.09163976460695267, - "loss_total": 0.5794214010238647, - "step": 377099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.101434588432312, - "learning_rate": 5.271977804014283e-07, - "loss": 0.419, - "step": 377100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.712164878845215, - "loss_rtd": 0.20631778240203857, - "loss_sent": 0.07045809179544449, - "loss_sod": 0.0227108895778656, - "loss_total": 0.29948675632476807, - "step": 377199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.66616153717041, - "loss_rtd": 0.22503411769866943, - "loss_sent": 0.13139791786670685, - "loss_sod": 0.03343576192855835, - "loss_total": 0.38986778259277344, - "step": 377199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.961425244808197, - "learning_rate": 5.226117062563319e-07, - "loss": 0.4223, - "step": 377200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.948493480682373, - "loss_rtd": 0.19645100831985474, - "loss_sent": 0.21695654094219208, - "loss_sod": 0.06329571455717087, - "loss_total": 0.4767032861709595, - "step": 377299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.683925628662109, - "loss_rtd": 0.21708688139915466, - "loss_sent": 0.14205588400363922, - "loss_sod": 0.01813621260225773, - "loss_total": 0.37727898359298706, - "step": 377299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.0263874530792236, - "learning_rate": 5.180455615973301e-07, - "loss": 0.4139, - "step": 377300 - }, - { - "epoch": 0.016798, - "loss_gen": 6.031938076019287, - "loss_rtd": 0.1937197744846344, - "loss_sent": 0.13819313049316406, - "loss_sod": 0.027516640722751617, - "loss_total": 0.3594295382499695, - "step": 377399 - }, - { - "epoch": 0.016798, - "loss_gen": 5.546912670135498, - "loss_rtd": 0.179046168923378, - "loss_sent": 0.0023346352390944958, - "loss_sod": 0.024476638063788414, - "loss_total": 0.20585744082927704, - "step": 377399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.8463380336761475, - "learning_rate": 5.134993482636518e-07, - "loss": 0.3982, - "step": 377400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.622273921966553, - "loss_rtd": 0.20638105273246765, - "loss_sent": 0.10678320378065109, - "loss_sod": 0.026414811611175537, - "loss_total": 0.3395790755748749, - "step": 377499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.296456336975098, - "loss_rtd": 0.18110458552837372, - "loss_sent": 2.4245722670457326e-05, - "loss_sod": 0.11737100034952164, - "loss_total": 0.29849985241889954, - "step": 377499 - }, - { - "epoch": 0.017, - "grad_norm": 0.8881797194480896, - "learning_rate": 5.089730680865212e-07, - "loss": 0.4057, - "step": 377500 - }, - { - "epoch": 0.017198, - "loss_gen": 6.0927934646606445, - "loss_rtd": 0.21190163493156433, - "loss_sent": 0.3824966847896576, - "loss_sod": 0.09746366739273071, - "loss_total": 0.6918619871139526, - "step": 377599 - }, - { - "epoch": 0.017198, - "loss_gen": 5.840753078460693, - "loss_rtd": 0.20463943481445312, - "loss_sent": 0.27296513319015503, - "loss_sod": 0.07115224003791809, - "loss_total": 0.5487568378448486, - "step": 377599 - }, - { - "epoch": 0.0172, - "grad_norm": 1.2717090845108032, - "learning_rate": 5.044667228891131e-07, - "loss": 0.4043, - "step": 377600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.747354984283447, - "loss_rtd": 0.2005842626094818, - "loss_sent": 0.2876623570919037, - "loss_sod": 0.011541787534952164, - "loss_total": 0.49978840351104736, - "step": 377699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.748420715332031, - "loss_rtd": 0.2131267488002777, - "loss_sent": 0.0777624249458313, - "loss_sod": 0.005437182728201151, - "loss_total": 0.29632633924484253, - "step": 377699 - }, - { - "epoch": 0.0174, - "grad_norm": 0.786728024482727, - "learning_rate": 4.999803144865978e-07, - "loss": 0.4283, - "step": 377700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.955152988433838, - "loss_rtd": 0.2116282433271408, - "loss_sent": 0.14277306199073792, - "loss_sod": 0.01791638508439064, - "loss_total": 0.37231767177581787, - "step": 377799 - }, - { - "epoch": 0.017598, - "loss_gen": 5.786294937133789, - "loss_rtd": 0.20854634046554565, - "loss_sent": 0.27219003438949585, - "loss_sod": 0.03270246088504791, - "loss_total": 0.5134388208389282, - "step": 377799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.8396150469779968, - "learning_rate": 4.955138446860907e-07, - "loss": 0.3925, - "step": 377800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.196096897125244, - "loss_rtd": 0.15838152170181274, - "loss_sent": 2.2329139028443024e-05, - "loss_sod": 0.10838563740253448, - "loss_total": 0.2667894661426544, - "step": 377899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.3700127601623535, - "loss_rtd": 0.1617504209280014, - "loss_sent": 0.020374365150928497, - "loss_sod": 0.066576287150383, - "loss_total": 0.2487010657787323, - "step": 377899 - }, - { - "epoch": 0.0178, - "grad_norm": 1.0081185102462769, - "learning_rate": 4.910673152866862e-07, - "loss": 0.4188, - "step": 377900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.937511444091797, - "loss_rtd": 0.2209366112947464, - "loss_sent": 0.24928177893161774, - "loss_sod": 0.004064135253429413, - "loss_total": 0.47428250312805176, - "step": 377999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.362171173095703, - "loss_rtd": 0.1926054209470749, - "loss_sent": 0.13002458214759827, - "loss_sod": 0.0025852625258266926, - "loss_total": 0.32521528005599976, - "step": 377999 - }, - { - "epoch": 0.018, - "grad_norm": 0.7147202491760254, - "learning_rate": 4.86640728079446e-07, - "loss": 0.4346, - "step": 378000 - }, - { - "epoch": 0.018, - "eval_loss": 0.3972267806529999, - "eval_runtime": 150.3853, - "eval_samples_per_second": 102.69, - "eval_steps_per_second": 0.805, - "step": 378000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.6956658363342285, - "loss_rtd": 0.17405839264392853, - "loss_sent": 0.2557704746723175, - "loss_sod": 0.08298088610172272, - "loss_total": 0.5128097534179688, - "step": 378099 - }, - { - "epoch": 0.018198, - "loss_gen": 5.304107666015625, - "loss_rtd": 0.1735163927078247, - "loss_sent": 2.3417098418576643e-05, - "loss_sod": 0.17949749529361725, - "loss_total": 0.3530372977256775, - "step": 378099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.085662841796875, - "learning_rate": 4.822340848473994e-07, - "loss": 0.4277, - "step": 378100 - }, - { - "epoch": 0.018398, - "loss_gen": 6.019848346710205, - "loss_rtd": 0.21076439321041107, - "loss_sent": 0.18268175423145294, - "loss_sod": 0.056887537240982056, - "loss_total": 0.4503336548805237, - "step": 378199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.67705774307251, - "loss_rtd": 0.1733749955892563, - "loss_sent": 0.06824298948049545, - "loss_sod": 0.0922779068350792, - "loss_total": 0.33389589190483093, - "step": 378199 - }, - { - "epoch": 0.0184, - "grad_norm": 1.7420406341552734, - "learning_rate": 4.778473873655432e-07, - "loss": 0.418, - "step": 378200 - }, - { - "epoch": 0.018598, - "loss_gen": 6.0037007331848145, - "loss_rtd": 0.21802647411823273, - "loss_sent": 0.298694908618927, - "loss_sod": 0.007177173625677824, - "loss_total": 0.5238985419273376, - "step": 378299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.92147970199585, - "loss_rtd": 0.20518158376216888, - "loss_sent": 0.44562461972236633, - "loss_sod": 0.023967813700437546, - "loss_total": 0.6747740507125854, - "step": 378299 - }, - { - "epoch": 0.0186, - "grad_norm": 2.0879807472229004, - "learning_rate": 4.73480637400836e-07, - "loss": 0.406, - "step": 378300 - }, - { - "epoch": 0.018798, - "loss_gen": 6.265625953674316, - "loss_rtd": 0.20076626539230347, - "loss_sent": 0.18996816873550415, - "loss_sod": 0.019429976120591164, - "loss_total": 0.41016441583633423, - "step": 378399 - }, - { - "epoch": 0.018798, - "loss_gen": 5.924288272857666, - "loss_rtd": 0.21152663230895996, - "loss_sent": 0.19503511488437653, - "loss_sod": 0.012357569299638271, - "loss_total": 0.41891932487487793, - "step": 378399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.8700541853904724, - "learning_rate": 4.691338367122045e-07, - "loss": 0.416, - "step": 378400 - }, - { - "epoch": 0.018998, - "loss_gen": 5.599389553070068, - "loss_rtd": 0.20247696340084076, - "loss_sent": 0.010313127189874649, - "loss_sod": 0.040898703038692474, - "loss_total": 0.253688782453537, - "step": 378499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.468404769897461, - "loss_rtd": 0.17046506702899933, - "loss_sent": 0.05398714914917946, - "loss_sod": 0.18214984238147736, - "loss_total": 0.40660205483436584, - "step": 378499 - }, - { - "epoch": 0.019, - "grad_norm": 1.0383541584014893, - "learning_rate": 4.6480698705054226e-07, - "loss": 0.4245, - "step": 378500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.799564361572266, - "loss_rtd": 0.21832984685897827, - "loss_sent": 0.07897751778364182, - "loss_sod": 0.01685876026749611, - "loss_total": 0.3141661286354065, - "step": 378599 - }, - { - "epoch": 0.019198, - "loss_gen": 6.005757808685303, - "loss_rtd": 0.21833860874176025, - "loss_sent": 0.19936542212963104, - "loss_sod": 0.04156842827796936, - "loss_total": 0.45927244424819946, - "step": 378599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.8350887894630432, - "learning_rate": 4.6050009015869976e-07, - "loss": 0.4325, - "step": 378600 - }, - { - "epoch": 0.019398, - "loss_gen": 5.9876389503479, - "loss_rtd": 0.20312464237213135, - "loss_sent": 0.2526932954788208, - "loss_sod": 0.1680360585451126, - "loss_total": 0.6238539814949036, - "step": 378699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.834028720855713, - "loss_rtd": 0.218141108751297, - "loss_sent": 0.13190466165542603, - "loss_sod": 0.05220230668783188, - "loss_total": 0.4022480845451355, - "step": 378699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.1730320453643799, - "learning_rate": 4.5621314777149483e-07, - "loss": 0.4131, - "step": 378700 - }, - { - "epoch": 0.019598, - "loss_gen": 6.032174587249756, - "loss_rtd": 0.1992754489183426, - "loss_sent": 0.4519427418708801, - "loss_sod": 0.030463453382253647, - "loss_total": 0.6816816329956055, - "step": 378799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.951891899108887, - "loss_rtd": 0.1831546276807785, - "loss_sent": 0.1545332670211792, - "loss_sod": 0.0045846295543015, - "loss_total": 0.3422725200653076, - "step": 378799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.8087645769119263, - "learning_rate": 4.519461616157183e-07, - "loss": 0.4072, - "step": 378800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.514514923095703, - "loss_rtd": 0.17730656266212463, - "loss_sent": 0.020894506946206093, - "loss_sod": 0.04431368410587311, - "loss_total": 0.24251475930213928, - "step": 378899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.795938491821289, - "loss_rtd": 0.19509421288967133, - "loss_sent": 0.139750137925148, - "loss_sod": 0.22240638732910156, - "loss_total": 0.5572507381439209, - "step": 378899 - }, - { - "epoch": 0.0198, - "grad_norm": 1.5947258472442627, - "learning_rate": 4.476991334100955e-07, - "loss": 0.4291, - "step": 378900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.886747360229492, - "loss_rtd": 0.21091145277023315, - "loss_sent": 0.16238117218017578, - "loss_sod": 0.025292634963989258, - "loss_total": 0.3985852599143982, - "step": 378999 - }, - { - "epoch": 0.019998, - "loss_gen": 6.108876705169678, - "loss_rtd": 0.21461129188537598, - "loss_sent": 0.19172941148281097, - "loss_sod": 0.0646105706691742, - "loss_total": 0.47095125913619995, - "step": 378999 - }, - { - "epoch": 0.02, - "grad_norm": 1.4274873733520508, - "learning_rate": 4.4347206486533564e-07, - "loss": 0.4227, - "step": 379000 - }, - { - "epoch": 0.02, - "eval_loss": 0.3934597074985504, - "eval_runtime": 151.6554, - "eval_samples_per_second": 101.83, - "eval_steps_per_second": 0.798, - "step": 379000 - }, - { - "epoch": 0.020198, - "loss_gen": 5.500655174255371, - "loss_rtd": 0.1995168924331665, - "loss_sent": 0.18026581406593323, - "loss_sod": 0.006543800700455904, - "loss_total": 0.38632649183273315, - "step": 379099 - }, - { - "epoch": 0.020198, - "loss_gen": 5.8818039894104, - "loss_rtd": 0.203139528632164, - "loss_sent": 0.037558067589998245, - "loss_sod": 0.04296644777059555, - "loss_total": 0.2836640477180481, - "step": 379099 - }, - { - "epoch": 0.0202, - "grad_norm": 0.7500585317611694, - "learning_rate": 4.39264957684099e-07, - "loss": 0.4148, - "step": 379100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.910189151763916, - "loss_rtd": 0.21938611567020416, - "loss_sent": 0.09466508030891418, - "loss_sod": 0.018406489863991737, - "loss_total": 0.33245769143104553, - "step": 379199 - }, - { - "epoch": 0.020398, - "loss_gen": 6.212303638458252, - "loss_rtd": 0.21138016879558563, - "loss_sent": 0.12816506624221802, - "loss_sod": 0.08652271330356598, - "loss_total": 0.42606794834136963, - "step": 379199 - }, - { - "epoch": 0.0204, - "grad_norm": 0.9723511338233948, - "learning_rate": 4.350778135610134e-07, - "loss": 0.4195, - "step": 379200 - }, - { - "epoch": 0.020598, - "loss_gen": 5.28507661819458, - "loss_rtd": 0.16205933690071106, - "loss_sent": 2.245020732516423e-05, - "loss_sod": 0.048897143453359604, - "loss_total": 0.2109789401292801, - "step": 379299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.454601287841797, - "loss_rtd": 0.1901465803384781, - "loss_sent": 0.03092203289270401, - "loss_sod": 0.10005979984998703, - "loss_total": 0.32112839818000793, - "step": 379299 - }, - { - "epoch": 0.0206, - "grad_norm": 1.2063854932785034, - "learning_rate": 4.309106341826574e-07, - "loss": 0.4099, - "step": 379300 - }, - { - "epoch": 0.020798, - "loss_gen": 6.110975742340088, - "loss_rtd": 0.20995761454105377, - "loss_sent": 0.15996411442756653, - "loss_sod": 0.061814285814762115, - "loss_total": 0.431736022233963, - "step": 379399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.985228061676025, - "loss_rtd": 0.20598271489143372, - "loss_sent": 0.2394324690103531, - "loss_sod": 0.018116464838385582, - "loss_total": 0.46353164315223694, - "step": 379399 - }, - { - "epoch": 0.0208, - "grad_norm": 1.0743968486785889, - "learning_rate": 4.267634212275717e-07, - "loss": 0.4161, - "step": 379400 - }, - { - "epoch": 0.020998, - "loss_gen": 6.155957221984863, - "loss_rtd": 0.2147091031074524, - "loss_sent": 0.09639734774827957, - "loss_sod": 0.03152427077293396, - "loss_total": 0.3426307141780853, - "step": 379499 - }, - { - "epoch": 0.020998, - "loss_gen": 6.12700891494751, - "loss_rtd": 0.19344329833984375, - "loss_sent": 0.3789941370487213, - "loss_sod": 0.03244452923536301, - "loss_total": 0.604882001876831, - "step": 379499 - }, - { - "epoch": 0.021, - "grad_norm": 1.2646256685256958, - "learning_rate": 4.2263617636624786e-07, - "loss": 0.4155, - "step": 379500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.411799430847168, - "loss_rtd": 0.173185795545578, - "loss_sent": 0.042459771037101746, - "loss_sod": 0.07946296036243439, - "loss_total": 0.2951085567474365, - "step": 379599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.8188629150390625, - "loss_rtd": 0.20450222492218018, - "loss_sent": 0.13934794068336487, - "loss_sod": 0.07375186681747437, - "loss_total": 0.4176020324230194, - "step": 379599 - }, - { - "epoch": 0.0212, - "grad_norm": 0.7739236950874329, - "learning_rate": 4.185289012611504e-07, - "loss": 0.4182, - "step": 379600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.5912885665893555, - "loss_rtd": 0.18380384147167206, - "loss_sent": 0.14199651777744293, - "loss_sod": 0.09766144305467606, - "loss_total": 0.42346179485321045, - "step": 379699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.986878395080566, - "loss_rtd": 0.20203180611133575, - "loss_sent": 0.06627637147903442, - "loss_sod": 0.09104157984256744, - "loss_total": 0.3593497574329376, - "step": 379699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.4827905893325806, - "learning_rate": 4.1444159756667824e-07, - "loss": 0.4281, - "step": 379700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.994523048400879, - "loss_rtd": 0.19935733079910278, - "loss_sent": 0.24832437932491302, - "loss_sod": 0.04916829243302345, - "loss_total": 0.49685001373291016, - "step": 379799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.899327754974365, - "loss_rtd": 0.21462617814540863, - "loss_sent": 0.12212895601987839, - "loss_sod": 0.032889872789382935, - "loss_total": 0.36964499950408936, - "step": 379799 - }, - { - "epoch": 0.0216, - "grad_norm": 1.5767993927001953, - "learning_rate": 4.103742669291977e-07, - "loss": 0.4254, - "step": 379800 - }, - { - "epoch": 0.021798, - "loss_gen": 6.336661338806152, - "loss_rtd": 0.20013760030269623, - "loss_sent": 0.10599343478679657, - "loss_sod": 0.10513609647750854, - "loss_total": 0.41126716136932373, - "step": 379899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.859251976013184, - "loss_rtd": 0.21320293843746185, - "loss_sent": 0.07086401432752609, - "loss_sod": 0.05484158918261528, - "loss_total": 0.3389085531234741, - "step": 379899 - }, - { - "epoch": 0.0218, - "grad_norm": 1.0110816955566406, - "learning_rate": 4.063269109870316e-07, - "loss": 0.3923, - "step": 379900 - }, - { - "epoch": 0.021998, - "loss_gen": 6.067895412445068, - "loss_rtd": 0.21180304884910583, - "loss_sent": 0.08946903049945831, - "loss_sod": 0.03902129828929901, - "loss_total": 0.34029334783554077, - "step": 379999 - }, - { - "epoch": 0.021998, - "loss_gen": 5.960249900817871, - "loss_rtd": 0.22714562714099884, - "loss_sent": 0.16184094548225403, - "loss_sod": 0.07150998711585999, - "loss_total": 0.46049657464027405, - "step": 379999 - }, - { - "epoch": 0.022, - "grad_norm": 1.3660411834716797, - "learning_rate": 4.0229953137045917e-07, - "loss": 0.4086, - "step": 380000 - }, - { - "epoch": 0.022, - "eval_loss": 0.3902009129524231, - "eval_runtime": 150.1671, - "eval_samples_per_second": 102.839, - "eval_steps_per_second": 0.806, - "step": 380000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.954092025756836, - "loss_rtd": 0.18810109794139862, - "loss_sent": 0.24659323692321777, - "loss_sod": 0.047537416219711304, - "loss_total": 0.4822317361831665, - "step": 380099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.394299507141113, - "loss_rtd": 0.1872742474079132, - "loss_sent": 2.289350777573418e-05, - "loss_sod": 0.08977370709180832, - "loss_total": 0.2770708501338959, - "step": 380099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.5520741939544678, - "learning_rate": 3.9829212970170506e-07, - "loss": 0.4212, - "step": 380100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.50739049911499, - "loss_rtd": 0.2191905975341797, - "loss_sent": 0.21501494944095612, - "loss_sod": 0.0014510282780975103, - "loss_total": 0.4356565773487091, - "step": 380199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.622569561004639, - "loss_rtd": 0.21803200244903564, - "loss_sent": 0.16014115512371063, - "loss_sod": 0.04715009406208992, - "loss_total": 0.4253232479095459, - "step": 380199 - }, - { - "epoch": 0.0224, - "grad_norm": 0.610840916633606, - "learning_rate": 3.943047075949446e-07, - "loss": 0.4071, - "step": 380200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.882879257202148, - "loss_rtd": 0.18428292870521545, - "loss_sent": 0.17902645468711853, - "loss_sod": 0.09229131788015366, - "loss_total": 0.45560070872306824, - "step": 380299 - }, - { - "epoch": 0.022598, - "loss_gen": 5.4265313148498535, - "loss_rtd": 0.18168246746063232, - "loss_sent": 4.284538226784207e-05, - "loss_sod": 0.11683396995067596, - "loss_total": 0.298559308052063, - "step": 380299 - }, - { - "epoch": 0.0226, - "grad_norm": 1.1512634754180908, - "learning_rate": 3.9033726665632096e-07, - "loss": 0.4145, - "step": 380300 - }, - { - "epoch": 0.022798, - "loss_gen": 5.272458553314209, - "loss_rtd": 0.17100802063941956, - "loss_sent": 0.015773722901940346, - "loss_sod": 0.04977009817957878, - "loss_total": 0.23655185103416443, - "step": 380399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.599095344543457, - "loss_rtd": 0.18492868542671204, - "loss_sent": 0.16344574093818665, - "loss_sod": 0.10426199436187744, - "loss_total": 0.4526364207267761, - "step": 380399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.2339563369750977, - "learning_rate": 3.8638980848391125e-07, - "loss": 0.4231, - "step": 380400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.604328155517578, - "loss_rtd": 0.2052101194858551, - "loss_sent": 0.2013697326183319, - "loss_sod": 0.07968216389417648, - "loss_total": 0.4862620234489441, - "step": 380499 - }, - { - "epoch": 0.022998, - "loss_gen": 5.289646148681641, - "loss_rtd": 0.17533379793167114, - "loss_sent": 2.7983234758721665e-05, - "loss_sod": 0.11863875389099121, - "loss_total": 0.2940005362033844, - "step": 380499 - }, - { - "epoch": 0.023, - "grad_norm": 1.6092185974121094, - "learning_rate": 3.824623346677547e-07, - "loss": 0.4211, - "step": 380500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.850788116455078, - "loss_rtd": 0.20604585111141205, - "loss_sent": 0.275841623544693, - "loss_sod": 0.033807314932346344, - "loss_total": 0.515694797039032, - "step": 380599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.522738456726074, - "loss_rtd": 0.1687469333410263, - "loss_sent": 0.005756030790507793, - "loss_sod": 0.032747939229011536, - "loss_total": 0.2072509080171585, - "step": 380599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.8034012913703918, - "learning_rate": 3.785548467898359e-07, - "loss": 0.4376, - "step": 380600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.938969135284424, - "loss_rtd": 0.20497600734233856, - "loss_sent": 0.1025799810886383, - "loss_sod": 0.008984355255961418, - "loss_total": 0.31654036045074463, - "step": 380699 - }, - { - "epoch": 0.023398, - "loss_gen": 6.065103054046631, - "loss_rtd": 0.2054506540298462, - "loss_sent": 0.05428088828921318, - "loss_sod": 0.05435691773891449, - "loss_total": 0.31408846378326416, - "step": 380699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8988030552864075, - "learning_rate": 3.7466734642408463e-07, - "loss": 0.4211, - "step": 380700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.444797992706299, - "loss_rtd": 0.1727626621723175, - "loss_sent": 0.056168101727962494, - "loss_sod": 0.14595083892345428, - "loss_total": 0.3748815953731537, - "step": 380799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.7508955001831055, - "loss_rtd": 0.19710072875022888, - "loss_sent": 0.3005600571632385, - "loss_sod": 0.01652163825929165, - "loss_total": 0.514182448387146, - "step": 380799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.959169626235962, - "learning_rate": 3.707998351363984e-07, - "loss": 0.4136, - "step": 380800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.873311996459961, - "loss_rtd": 0.20481862127780914, - "loss_sent": 0.3269082307815552, - "loss_sod": 0.03254479914903641, - "loss_total": 0.5642716884613037, - "step": 380899 - }, - { - "epoch": 0.023798, - "loss_gen": 6.063734531402588, - "loss_rtd": 0.20686492323875427, - "loss_sent": 0.38673967123031616, - "loss_sod": 0.021104365587234497, - "loss_total": 0.6147089600563049, - "step": 380899 - }, - { - "epoch": 0.0238, - "grad_norm": 1.3335402011871338, - "learning_rate": 3.6695231448460324e-07, - "loss": 0.4066, - "step": 380900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.842053413391113, - "loss_rtd": 0.20197023451328278, - "loss_sent": 0.11227338016033173, - "loss_sod": 0.027149679139256477, - "loss_total": 0.34139329195022583, - "step": 380999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.8046183586120605, - "loss_rtd": 0.23079979419708252, - "loss_sent": 0.17582298815250397, - "loss_sod": 0.009079264476895332, - "loss_total": 0.41570204496383667, - "step": 380999 - }, - { - "epoch": 0.024, - "grad_norm": 1.4506800174713135, - "learning_rate": 3.631247860184761e-07, - "loss": 0.4051, - "step": 381000 - }, - { - "epoch": 0.024, - "eval_loss": 0.3930273652076721, - "eval_runtime": 150.3698, - "eval_samples_per_second": 102.7, - "eval_steps_per_second": 0.805, - "step": 381000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.845580101013184, - "loss_rtd": 0.2230759710073471, - "loss_sent": 0.3735257089138031, - "loss_sod": 0.024172678589820862, - "loss_total": 0.6207743883132935, - "step": 381099 - }, - { - "epoch": 0.024198, - "loss_gen": 6.022526264190674, - "loss_rtd": 0.21440497040748596, - "loss_sent": 0.16394156217575073, - "loss_sod": 0.05334143340587616, - "loss_total": 0.43168795108795166, - "step": 381099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.4220038652420044, - "learning_rate": 3.5931725127975047e-07, - "loss": 0.4175, - "step": 381100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.436118125915527, - "loss_rtd": 0.16183926165103912, - "loss_sent": 0.08216781169176102, - "loss_sod": 0.01098170317709446, - "loss_total": 0.25498878955841064, - "step": 381199 - }, - { - "epoch": 0.024398, - "loss_gen": 6.093311309814453, - "loss_rtd": 0.19413651525974274, - "loss_sent": 0.12365799397230148, - "loss_sod": 0.02665119245648384, - "loss_total": 0.34444570541381836, - "step": 381199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.6130800843238831, - "learning_rate": 3.55529711802105e-07, - "loss": 0.4144, - "step": 381200 - }, - { - "epoch": 0.024598, - "loss_gen": 6.526711940765381, - "loss_rtd": 0.2141195684671402, - "loss_sent": 0.0823674201965332, - "loss_sod": 0.12698936462402344, - "loss_total": 0.42347633838653564, - "step": 381299 - }, - { - "epoch": 0.024598, - "loss_gen": 6.169975280761719, - "loss_rtd": 0.20990502834320068, - "loss_sent": 0.12368293106555939, - "loss_sod": 0.045053135603666306, - "loss_total": 0.3786410689353943, - "step": 381299 - }, - { - "epoch": 0.0246, - "grad_norm": 1.2590303421020508, - "learning_rate": 3.5176216911114724e-07, - "loss": 0.4058, - "step": 381300 - }, - { - "epoch": 0.024798, - "loss_gen": 6.049837112426758, - "loss_rtd": 0.2047654390335083, - "loss_sent": 0.21812596917152405, - "loss_sod": 0.0736774355173111, - "loss_total": 0.49656882882118225, - "step": 381399 - }, - { - "epoch": 0.024798, - "loss_gen": 6.16081428527832, - "loss_rtd": 0.22861410677433014, - "loss_sent": 0.11025246977806091, - "loss_sod": 0.09058435261249542, - "loss_total": 0.4294509291648865, - "step": 381399 - }, - { - "epoch": 0.0248, - "grad_norm": 0.8518393635749817, - "learning_rate": 3.4801462472445203e-07, - "loss": 0.4219, - "step": 381400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.403074264526367, - "loss_rtd": 0.19154304265975952, - "loss_sent": 0.03534320741891861, - "loss_sod": 0.0910448357462883, - "loss_total": 0.31793108582496643, - "step": 381499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.185169219970703, - "loss_rtd": 0.1610122174024582, - "loss_sent": 0.06993773579597473, - "loss_sod": 0.0635794997215271, - "loss_total": 0.29452943801879883, - "step": 381499 - }, - { - "epoch": 0.025, - "grad_norm": 0.8139126300811768, - "learning_rate": 3.442870801515341e-07, - "loss": 0.4193, - "step": 381500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.932486057281494, - "loss_rtd": 0.19373764097690582, - "loss_sent": 0.29014596343040466, - "loss_sod": 0.02605004794895649, - "loss_total": 0.5099336504936218, - "step": 381599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.304928302764893, - "loss_rtd": 0.1672595739364624, - "loss_sent": 0.026334472000598907, - "loss_sod": 0.15049013495445251, - "loss_total": 0.3440841734409332, - "step": 381599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.2460983991622925, - "learning_rate": 3.405795368938369e-07, - "loss": 0.4269, - "step": 381600 - }, - { - "epoch": 0.025398, - "loss_gen": 6.1368560791015625, - "loss_rtd": 0.209011048078537, - "loss_sent": 0.1695796251296997, - "loss_sod": 0.009269974194467068, - "loss_total": 0.38786065578460693, - "step": 381699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.6605682373046875, - "loss_rtd": 0.21456101536750793, - "loss_sent": 0.10210420936346054, - "loss_sod": 0.03545018285512924, - "loss_total": 0.3521154224872589, - "step": 381699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8272795677185059, - "learning_rate": 3.3689199644476587e-07, - "loss": 0.4083, - "step": 381700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.899969100952148, - "loss_rtd": 0.20056556165218353, - "loss_sent": 0.16467887163162231, - "loss_sod": 0.0619795024394989, - "loss_total": 0.42722392082214355, - "step": 381799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.529124736785889, - "loss_rtd": 0.20809099078178406, - "loss_sent": 0.06432030349969864, - "loss_sod": 0.037849728018045425, - "loss_total": 0.3102610111236572, - "step": 381799 - }, - { - "epoch": 0.0256, - "grad_norm": 1.134244441986084, - "learning_rate": 3.3322446028965503e-07, - "loss": 0.4233, - "step": 381800 - }, - { - "epoch": 0.025798, - "loss_gen": 5.27346134185791, - "loss_rtd": 0.19142065942287445, - "loss_sent": 0.007555554620921612, - "loss_sod": 0.03351636976003647, - "loss_total": 0.2324925810098648, - "step": 381899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.270022869110107, - "loss_rtd": 0.17058920860290527, - "loss_sent": 2.4998216758831404e-05, - "loss_sod": 0.10906675457954407, - "loss_total": 0.2796809673309326, - "step": 381899 - }, - { - "epoch": 0.0258, - "grad_norm": 0.9053205847740173, - "learning_rate": 3.2957692990580046e-07, - "loss": 0.429, - "step": 381900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.275055885314941, - "loss_rtd": 0.16804182529449463, - "loss_sent": 2.5137811462627724e-05, - "loss_sod": 0.07545459270477295, - "loss_total": 0.2435215562582016, - "step": 381999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.577271461486816, - "loss_rtd": 0.209976926445961, - "loss_sent": 0.15293532609939575, - "loss_sod": 0.01812799833714962, - "loss_total": 0.3810402452945709, - "step": 381999 - }, - { - "epoch": 0.026, - "grad_norm": 0.7928724884986877, - "learning_rate": 3.2594940676241027e-07, - "loss": 0.4248, - "step": 382000 - }, - { - "epoch": 0.026, - "eval_loss": 0.39272138476371765, - "eval_runtime": 150.1275, - "eval_samples_per_second": 102.866, - "eval_steps_per_second": 0.806, - "step": 382000 - }, - { - "epoch": 0.026198, - "loss_gen": 5.781370639801025, - "loss_rtd": 0.19625608623027802, - "loss_sent": 0.05940936878323555, - "loss_sod": 0.07032468914985657, - "loss_total": 0.32599014043807983, - "step": 382099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.562563419342041, - "loss_rtd": 0.170019268989563, - "loss_sent": 0.04797758534550667, - "loss_sod": 0.05059007182717323, - "loss_total": 0.2685869336128235, - "step": 382099 - }, - { - "epoch": 0.0262, - "grad_norm": 0.752170741558075, - "learning_rate": 3.2234189232066004e-07, - "loss": 0.4143, - "step": 382100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.564212322235107, - "loss_rtd": 0.18514154851436615, - "loss_sent": 0.055654071271419525, - "loss_sod": 0.15288811922073364, - "loss_total": 0.3936837613582611, - "step": 382199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.553371906280518, - "loss_rtd": 0.1830948144197464, - "loss_sent": 0.16740046441555023, - "loss_sod": 0.10825850069522858, - "loss_total": 0.458753764629364, - "step": 382199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.5331205129623413, - "learning_rate": 3.187543880336541e-07, - "loss": 0.3994, - "step": 382200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.898194313049316, - "loss_rtd": 0.1988452970981598, - "loss_sent": 0.22878481447696686, - "loss_sod": 0.04501491039991379, - "loss_total": 0.4726450443267822, - "step": 382299 - }, - { - "epoch": 0.026598, - "loss_gen": 5.903650283813477, - "loss_rtd": 0.19803264737129211, - "loss_sent": 0.5170067548751831, - "loss_sod": 0.03003372997045517, - "loss_total": 0.745073139667511, - "step": 382299 - }, - { - "epoch": 0.0266, - "grad_norm": 2.603910446166992, - "learning_rate": 3.151868953464476e-07, - "loss": 0.4201, - "step": 382300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.6080217361450195, - "loss_rtd": 0.2083578109741211, - "loss_sent": 0.1562943160533905, - "loss_sod": 0.011052027344703674, - "loss_total": 0.37570416927337646, - "step": 382399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.847711086273193, - "loss_rtd": 0.20529872179031372, - "loss_sent": 0.04937053471803665, - "loss_sod": 0.017431385815143585, - "loss_total": 0.27210062742233276, - "step": 382399 - }, - { - "epoch": 0.0268, - "grad_norm": 0.7713347673416138, - "learning_rate": 3.1163941569600787e-07, - "loss": 0.4201, - "step": 382400 - }, - { - "epoch": 0.026998, - "loss_gen": 6.18224573135376, - "loss_rtd": 0.23028218746185303, - "loss_sent": 0.10545142740011215, - "loss_sod": 0.022825714200735092, - "loss_total": 0.35855934023857117, - "step": 382499 - }, - { - "epoch": 0.026998, - "loss_gen": 5.670419692993164, - "loss_rtd": 0.21611681580543518, - "loss_sent": 0.14540377259254456, - "loss_sod": 0.012308724224567413, - "loss_total": 0.37382930517196655, - "step": 382499 - }, - { - "epoch": 0.027, - "grad_norm": 1.094574213027954, - "learning_rate": 3.0811195051127507e-07, - "loss": 0.4108, - "step": 382500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.5457892417907715, - "loss_rtd": 0.1806672364473343, - "loss_sent": 0.00011805249232565984, - "loss_sod": 0.08111029863357544, - "loss_total": 0.2618955671787262, - "step": 382599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.893887996673584, - "loss_rtd": 0.19793257117271423, - "loss_sent": 0.04744026064872742, - "loss_sod": 0.119545117020607, - "loss_total": 0.36491796374320984, - "step": 382599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.2319138050079346, - "learning_rate": 3.0460450121310156e-07, - "loss": 0.434, - "step": 382600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.232121467590332, - "loss_rtd": 0.15601125359535217, - "loss_sent": 0.02890804223716259, - "loss_sod": 0.14162380993366241, - "loss_total": 0.3265431225299835, - "step": 382699 - }, - { - "epoch": 0.027398, - "loss_gen": 6.055953502655029, - "loss_rtd": 0.20038843154907227, - "loss_sent": 0.22261404991149902, - "loss_sod": 0.006330553907901049, - "loss_total": 0.42933303117752075, - "step": 382699 - }, - { - "epoch": 0.0274, - "grad_norm": 0.8192993402481079, - "learning_rate": 3.0111706921429615e-07, - "loss": 0.4135, - "step": 382700 - }, - { - "epoch": 0.027598, - "loss_gen": 6.151395797729492, - "loss_rtd": 0.23215961456298828, - "loss_sent": 0.045967016369104385, - "loss_sod": 0.025773657485842705, - "loss_total": 0.30390027165412903, - "step": 382799 - }, - { - "epoch": 0.027598, - "loss_gen": 6.031484127044678, - "loss_rtd": 0.21376293897628784, - "loss_sent": 0.30357852578163147, - "loss_sod": 0.046195387840270996, - "loss_total": 0.5635368824005127, - "step": 382799 - }, - { - "epoch": 0.0276, - "grad_norm": 0.8617048859596252, - "learning_rate": 2.9764965591959073e-07, - "loss": 0.4175, - "step": 382800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.6411614418029785, - "loss_rtd": 0.21144753694534302, - "loss_sent": 0.16217614710330963, - "loss_sod": 0.013701547868549824, - "loss_total": 0.3873252272605896, - "step": 382899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.872683048248291, - "loss_rtd": 0.2178838551044464, - "loss_sent": 0.16557738184928894, - "loss_sod": 0.03549986705183983, - "loss_total": 0.4189611077308655, - "step": 382899 - }, - { - "epoch": 0.0278, - "grad_norm": 0.8648281097412109, - "learning_rate": 2.942022627256624e-07, - "loss": 0.4013, - "step": 382900 - }, - { - "epoch": 0.027998, - "loss_gen": 6.025871753692627, - "loss_rtd": 0.20410758256912231, - "loss_sent": 0.1275903433561325, - "loss_sod": 0.0383104532957077, - "loss_total": 0.3700083792209625, - "step": 382999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.962867736816406, - "loss_rtd": 0.19217722117900848, - "loss_sent": 0.18039610981941223, - "loss_sod": 0.017892785370349884, - "loss_total": 0.3904661238193512, - "step": 382999 - }, - { - "epoch": 0.028, - "grad_norm": 1.1089924573898315, - "learning_rate": 2.9077489102111167e-07, - "loss": 0.4406, - "step": 383000 - }, - { - "epoch": 0.028, - "eval_loss": 0.3974106013774872, - "eval_runtime": 150.3955, - "eval_samples_per_second": 102.683, - "eval_steps_per_second": 0.805, - "step": 383000 - }, - { - "epoch": 0.028198, - "loss_gen": 5.829176902770996, - "loss_rtd": 0.19727171957492828, - "loss_sent": 0.3101958632469177, - "loss_sod": 0.013695419766008854, - "loss_total": 0.5211629867553711, - "step": 383099 - }, - { - "epoch": 0.028198, - "loss_gen": 5.7120747566223145, - "loss_rtd": 0.20222920179367065, - "loss_sent": 0.08801617473363876, - "loss_sod": 0.0060181827284395695, - "loss_total": 0.29626354575157166, - "step": 383099 - }, - { - "epoch": 0.0282, - "grad_norm": 1.1163129806518555, - "learning_rate": 2.8736754218650076e-07, - "loss": 0.4124, - "step": 383100 - }, - { - "epoch": 0.028398, - "loss_gen": 5.193973541259766, - "loss_rtd": 0.16248661279678345, - "loss_sent": 2.4179706088034436e-05, - "loss_sod": 0.08134716749191284, - "loss_total": 0.2438579648733139, - "step": 383199 - }, - { - "epoch": 0.028398, - "loss_gen": 6.03629207611084, - "loss_rtd": 0.198272705078125, - "loss_sent": 0.16059327125549316, - "loss_sod": 0.004946468397974968, - "loss_total": 0.3638124465942383, - "step": 383199 - }, - { - "epoch": 0.0284, - "grad_norm": 1.1488338708877563, - "learning_rate": 2.8398021759429295e-07, - "loss": 0.4275, - "step": 383200 - }, - { - "epoch": 0.028598, - "loss_gen": 6.227768898010254, - "loss_rtd": 0.22333618998527527, - "loss_sent": 0.035455722361803055, - "loss_sod": 0.04420716315507889, - "loss_total": 0.3029990792274475, - "step": 383299 - }, - { - "epoch": 0.028598, - "loss_gen": 5.957921981811523, - "loss_rtd": 0.21033763885498047, - "loss_sent": 0.12784285843372345, - "loss_sod": 0.024178283289074898, - "loss_total": 0.36235877871513367, - "step": 383299 - }, - { - "epoch": 0.0286, - "grad_norm": 0.8141974806785583, - "learning_rate": 2.8061291860890236e-07, - "loss": 0.4191, - "step": 383300 - }, - { - "epoch": 0.028798, - "loss_gen": 5.763555526733398, - "loss_rtd": 0.19557757675647736, - "loss_sent": 0.25977006554603577, - "loss_sod": 0.08676838129758835, - "loss_total": 0.5421160459518433, - "step": 383399 - }, - { - "epoch": 0.028798, - "loss_gen": 5.887138843536377, - "loss_rtd": 0.20468755066394806, - "loss_sent": 0.275336354970932, - "loss_sod": 0.049571797251701355, - "loss_total": 0.5295957326889038, - "step": 383399 - }, - { - "epoch": 0.0288, - "grad_norm": 1.3290231227874756, - "learning_rate": 2.7726564658669406e-07, - "loss": 0.3955, - "step": 383400 - }, - { - "epoch": 0.028998, - "loss_gen": 5.662152290344238, - "loss_rtd": 0.2109338939189911, - "loss_sent": 0.13339297473430634, - "loss_sod": 0.04413021728396416, - "loss_total": 0.3884570598602295, - "step": 383499 - }, - { - "epoch": 0.028998, - "loss_gen": 5.84342098236084, - "loss_rtd": 0.197341650724411, - "loss_sent": 0.1957792043685913, - "loss_sod": 0.05706150457262993, - "loss_total": 0.45018234848976135, - "step": 383499 - }, - { - "epoch": 0.029, - "grad_norm": 1.300395131111145, - "learning_rate": 2.739384028759284e-07, - "loss": 0.426, - "step": 383500 - }, - { - "epoch": 0.029198, - "loss_gen": 5.621011257171631, - "loss_rtd": 0.1888379603624344, - "loss_sent": 0.07751519232988358, - "loss_sod": 0.08898812532424927, - "loss_total": 0.35534125566482544, - "step": 383599 - }, - { - "epoch": 0.029198, - "loss_gen": 5.290693283081055, - "loss_rtd": 0.16856513917446136, - "loss_sent": 0.014348041266202927, - "loss_sod": 0.0435963049530983, - "loss_total": 0.22650949656963348, - "step": 383599 - }, - { - "epoch": 0.0292, - "grad_norm": 0.8381374478340149, - "learning_rate": 2.706311888168278e-07, - "loss": 0.4217, - "step": 383600 - }, - { - "epoch": 0.029398, - "loss_gen": 5.772955894470215, - "loss_rtd": 0.20471149682998657, - "loss_sent": 0.10987544804811478, - "loss_sod": 0.0125756049528718, - "loss_total": 0.32716256380081177, - "step": 383699 - }, - { - "epoch": 0.029398, - "loss_gen": 5.772219181060791, - "loss_rtd": 0.18905729055404663, - "loss_sent": 0.05473000183701515, - "loss_sod": 0.0527057982981205, - "loss_total": 0.2964930832386017, - "step": 383699 - }, - { - "epoch": 0.0294, - "grad_norm": 0.7054979205131531, - "learning_rate": 2.6734400574153773e-07, - "loss": 0.4373, - "step": 383700 - }, - { - "epoch": 0.029598, - "loss_gen": 5.712389945983887, - "loss_rtd": 0.21115665137767792, - "loss_sent": 0.27840518951416016, - "loss_sod": 0.01093914732336998, - "loss_total": 0.5005009770393372, - "step": 383799 - }, - { - "epoch": 0.029598, - "loss_gen": 6.105762004852295, - "loss_rtd": 0.22365495562553406, - "loss_sent": 0.1008310467004776, - "loss_sod": 0.11533018946647644, - "loss_total": 0.4398161768913269, - "step": 383799 - }, - { - "epoch": 0.0296, - "grad_norm": 1.273370623588562, - "learning_rate": 2.640768549741379e-07, - "loss": 0.4268, - "step": 383800 - }, - { - "epoch": 0.029798, - "loss_gen": 5.544104099273682, - "loss_rtd": 0.1853761076927185, - "loss_sent": 0.28038954734802246, - "loss_sod": 0.017326954752206802, - "loss_total": 0.48309260606765747, - "step": 383899 - }, - { - "epoch": 0.029798, - "loss_gen": 5.926936149597168, - "loss_rtd": 0.2125987857580185, - "loss_sent": 0.3669087588787079, - "loss_sod": 0.05336569994688034, - "loss_total": 0.6328732371330261, - "step": 383899 - }, - { - "epoch": 0.0298, - "grad_norm": 1.3195164203643799, - "learning_rate": 2.6082973783063125e-07, - "loss": 0.4064, - "step": 383900 - }, - { - "epoch": 0.029998, - "loss_gen": 5.669084072113037, - "loss_rtd": 0.17921671271324158, - "loss_sent": 0.05930311232805252, - "loss_sod": 0.017765387892723083, - "loss_total": 0.2562852203845978, - "step": 383999 - }, - { - "epoch": 0.029998, - "loss_gen": 5.860673904418945, - "loss_rtd": 0.18688058853149414, - "loss_sent": 0.11732591688632965, - "loss_sod": 0.027635326609015465, - "loss_total": 0.3318418264389038, - "step": 383999 - }, - { - "epoch": 0.03, - "grad_norm": 1.0545306205749512, - "learning_rate": 2.5760265561896035e-07, - "loss": 0.4014, - "step": 384000 - }, - { - "epoch": 0.03, - "eval_loss": 0.3930528163909912, - "eval_runtime": 150.1463, - "eval_samples_per_second": 102.853, - "eval_steps_per_second": 0.806, - "step": 384000 - }, - { - "epoch": 0.030198, - "loss_gen": 5.7394208908081055, - "loss_rtd": 0.18927958607673645, - "loss_sent": 0.05443378537893295, - "loss_sod": 0.05979667976498604, - "loss_total": 0.30351004004478455, - "step": 384099 - }, - { - "epoch": 0.030198, - "loss_gen": 5.6442341804504395, - "loss_rtd": 0.18970787525177002, - "loss_sent": 0.08145482838153839, - "loss_sod": 0.026608798652887344, - "loss_total": 0.29777151346206665, - "step": 384099 - }, - { - "epoch": 0.0302, - "grad_norm": 0.8515053987503052, - "learning_rate": 2.5439560963898546e-07, - "loss": 0.4056, - "step": 384100 - }, - { - "epoch": 0.030398, - "loss_gen": 6.171351909637451, - "loss_rtd": 0.20819953083992004, - "loss_sent": 0.21172025799751282, - "loss_sod": 0.026197826489806175, - "loss_total": 0.4461176097393036, - "step": 384199 - }, - { - "epoch": 0.030398, - "loss_gen": 6.090738773345947, - "loss_rtd": 0.20947818458080292, - "loss_sent": 0.36782175302505493, - "loss_sod": 0.09364922344684601, - "loss_total": 0.6709491610527039, - "step": 384199 - }, - { - "epoch": 0.0304, - "grad_norm": 0.9973644018173218, - "learning_rate": 2.5120860118251765e-07, - "loss": 0.4199, - "step": 384200 - }, - { - "epoch": 0.030598, - "loss_gen": 5.825852394104004, - "loss_rtd": 0.17805254459381104, - "loss_sent": 0.006554849445819855, - "loss_sod": 0.14529208838939667, - "loss_total": 0.32989948987960815, - "step": 384299 - }, - { - "epoch": 0.030598, - "loss_gen": 6.071846008300781, - "loss_rtd": 0.2020629197359085, - "loss_sent": 0.37729617953300476, - "loss_sod": 0.14929164946079254, - "loss_total": 0.7286507487297058, - "step": 384299 - }, - { - "epoch": 0.0306, - "grad_norm": 1.2775895595550537, - "learning_rate": 2.48041631533269e-07, - "loss": 0.4257, - "step": 384300 - }, - { - "epoch": 0.030798, - "loss_gen": 5.82039737701416, - "loss_rtd": 0.20472243428230286, - "loss_sent": 0.3711397647857666, - "loss_sod": 0.07700426876544952, - "loss_total": 0.6528664827346802, - "step": 384399 - }, - { - "epoch": 0.030798, - "loss_gen": 6.16239070892334, - "loss_rtd": 0.23037785291671753, - "loss_sent": 0.13526126742362976, - "loss_sod": 0.07043778151273727, - "loss_total": 0.43607690930366516, - "step": 384399 - }, - { - "epoch": 0.0308, - "grad_norm": 0.9397040605545044, - "learning_rate": 2.448947019669079e-07, - "loss": 0.4166, - "step": 384400 - }, - { - "epoch": 0.030998, - "loss_gen": 6.035325050354004, - "loss_rtd": 0.23700235784053802, - "loss_sent": 0.07824476063251495, - "loss_sod": 0.018092438578605652, - "loss_total": 0.3333395719528198, - "step": 384499 - }, - { - "epoch": 0.030998, - "loss_gen": 6.173375129699707, - "loss_rtd": 0.20125679671764374, - "loss_sent": 0.253811776638031, - "loss_sod": 0.050551868975162506, - "loss_total": 0.5056204795837402, - "step": 384499 - }, - { - "epoch": 0.031, - "grad_norm": 0.9964558482170105, - "learning_rate": 2.4176781375100374e-07, - "loss": 0.3826, - "step": 384500 - }, - { - "epoch": 0.031198, - "loss_gen": 5.509469509124756, - "loss_rtd": 0.22962869703769684, - "loss_sent": 0.20768941938877106, - "loss_sod": 0.05147034674882889, - "loss_total": 0.4887884855270386, - "step": 384599 - }, - { - "epoch": 0.031198, - "loss_gen": 5.993651390075684, - "loss_rtd": 0.19513122737407684, - "loss_sent": 0.17074358463287354, - "loss_sod": 0.04677882045507431, - "loss_total": 0.4126536250114441, - "step": 384599 - }, - { - "epoch": 0.0312, - "grad_norm": 1.2593488693237305, - "learning_rate": 2.386609681450824e-07, - "loss": 0.4198, - "step": 384600 - }, - { - "epoch": 0.031398, - "loss_gen": 5.636050701141357, - "loss_rtd": 0.19941259920597076, - "loss_sent": 0.08777549862861633, - "loss_sod": 0.0025134964380413294, - "loss_total": 0.28970158100128174, - "step": 384699 - }, - { - "epoch": 0.031398, - "loss_gen": 5.861685752868652, - "loss_rtd": 0.22229737043380737, - "loss_sent": 0.22134365141391754, - "loss_sod": 0.010315588675439358, - "loss_total": 0.45395660400390625, - "step": 384699 - }, - { - "epoch": 0.0314, - "grad_norm": 0.7069348692893982, - "learning_rate": 2.3557416640056507e-07, - "loss": 0.4189, - "step": 384700 - }, - { - "epoch": 0.031598, - "loss_gen": 5.751885414123535, - "loss_rtd": 0.2034197449684143, - "loss_sent": 0.139282688498497, - "loss_sod": 0.12326791137456894, - "loss_total": 0.46597033739089966, - "step": 384799 - }, - { - "epoch": 0.031598, - "loss_gen": 5.881619930267334, - "loss_rtd": 0.21095749735832214, - "loss_sent": 0.15282794833183289, - "loss_sod": 0.01254718005657196, - "loss_total": 0.3763326406478882, - "step": 384799 - }, - { - "epoch": 0.0316, - "grad_norm": 1.5512088537216187, - "learning_rate": 2.3250740976082374e-07, - "loss": 0.4155, - "step": 384800 - }, - { - "epoch": 0.031798, - "loss_gen": 5.850015163421631, - "loss_rtd": 0.17747046053409576, - "loss_sent": 0.1268550455570221, - "loss_sod": 0.1188102662563324, - "loss_total": 0.42313578724861145, - "step": 384899 - }, - { - "epoch": 0.031798, - "loss_gen": 6.000199794769287, - "loss_rtd": 0.20449306070804596, - "loss_sent": 0.10725359618663788, - "loss_sod": 0.049531131982803345, - "loss_total": 0.36127781867980957, - "step": 384899 - }, - { - "epoch": 0.0318, - "grad_norm": 1.0175875425338745, - "learning_rate": 2.2946069946114812e-07, - "loss": 0.4079, - "step": 384900 - }, - { - "epoch": 0.031998, - "loss_gen": 5.862027168273926, - "loss_rtd": 0.1923486888408661, - "loss_sent": 0.027933618053793907, - "loss_sod": 0.016524532809853554, - "loss_total": 0.23680683970451355, - "step": 384999 - }, - { - "epoch": 0.031998, - "loss_gen": 6.198087692260742, - "loss_rtd": 0.19978289306163788, - "loss_sent": 0.11878103762865067, - "loss_sod": 0.03205890953540802, - "loss_total": 0.35062283277511597, - "step": 384999 - }, - { - "epoch": 0.032, - "grad_norm": 0.5346891283988953, - "learning_rate": 2.2643403672875096e-07, - "loss": 0.4111, - "step": 385000 - }, - { - "epoch": 0.032, - "eval_loss": 0.4042034149169922, - "eval_runtime": 151.5404, - "eval_samples_per_second": 101.907, - "eval_steps_per_second": 0.798, - "step": 385000 - }, - { - "epoch": 0.032198, - "loss_gen": 6.003377914428711, - "loss_rtd": 0.21727389097213745, - "loss_sent": 0.16895000636577606, - "loss_sod": 0.05419757217168808, - "loss_total": 0.440421462059021, - "step": 385099 - }, - { - "epoch": 0.032198, - "loss_gen": 5.99362850189209, - "loss_rtd": 0.21535775065422058, - "loss_sent": 0.1301010698080063, - "loss_sod": 0.09239351004362106, - "loss_total": 0.43785232305526733, - "step": 385099 - }, - { - "epoch": 0.0322, - "grad_norm": 1.2825134992599487, - "learning_rate": 2.234274227827682e-07, - "loss": 0.4274, - "step": 385100 - }, - { - "epoch": 0.032398, - "loss_gen": 5.866090774536133, - "loss_rtd": 0.2175987809896469, - "loss_sent": 0.18335312604904175, - "loss_sod": 0.10989538580179214, - "loss_total": 0.5108473300933838, - "step": 385199 - }, - { - "epoch": 0.032398, - "loss_gen": 6.124553203582764, - "loss_rtd": 0.19101719558238983, - "loss_sent": 0.06919978559017181, - "loss_sod": 0.0287528894841671, - "loss_total": 0.28896987438201904, - "step": 385199 - }, - { - "epoch": 0.0324, - "grad_norm": 0.9552285671234131, - "learning_rate": 2.2044085883426435e-07, - "loss": 0.4007, - "step": 385200 - }, - { - "epoch": 0.032598, - "loss_gen": 5.905416488647461, - "loss_rtd": 0.21314513683319092, - "loss_sent": 0.4079139530658722, - "loss_sod": 0.03686285391449928, - "loss_total": 0.6579219102859497, - "step": 385299 - }, - { - "epoch": 0.032598, - "loss_gen": 5.727965831756592, - "loss_rtd": 0.1941075623035431, - "loss_sent": 0.1891579031944275, - "loss_sod": 0.01784624718129635, - "loss_total": 0.4011117219924927, - "step": 385299 - }, - { - "epoch": 0.0326, - "grad_norm": 1.9801541566848755, - "learning_rate": 2.174743460862383e-07, - "loss": 0.4309, - "step": 385300 - }, - { - "epoch": 0.032798, - "loss_gen": 5.6684980392456055, - "loss_rtd": 0.2137640118598938, - "loss_sent": 0.22157108783721924, - "loss_sod": 0.03500162810087204, - "loss_total": 0.4703367352485657, - "step": 385399 - }, - { - "epoch": 0.032798, - "loss_gen": 6.408374309539795, - "loss_rtd": 0.22371450066566467, - "loss_sent": 0.09138733893632889, - "loss_sod": 0.020972158759832382, - "loss_total": 0.33607399463653564, - "step": 385399 - }, - { - "epoch": 0.0328, - "grad_norm": 0.9286766052246094, - "learning_rate": 2.1452788573358417e-07, - "loss": 0.4045, - "step": 385400 - }, - { - "epoch": 0.032998, - "loss_gen": 5.861077785491943, - "loss_rtd": 0.2003045678138733, - "loss_sent": 0.18459151685237885, - "loss_sod": 0.026224972680211067, - "loss_total": 0.41112107038497925, - "step": 385499 - }, - { - "epoch": 0.032998, - "loss_gen": 6.228074073791504, - "loss_rtd": 0.21457162499427795, - "loss_sent": 0.19774803519248962, - "loss_sod": 0.07375230640172958, - "loss_total": 0.48607197403907776, - "step": 385499 - }, - { - "epoch": 0.033, - "grad_norm": 0.6838713884353638, - "learning_rate": 2.1160147896314709e-07, - "loss": 0.4385, - "step": 385500 - }, - { - "epoch": 0.033198, - "loss_gen": 5.819832801818848, - "loss_rtd": 0.23806336522102356, - "loss_sent": 0.15349172055721283, - "loss_sod": 0.08721265941858292, - "loss_total": 0.4787677526473999, - "step": 385599 - }, - { - "epoch": 0.033198, - "loss_gen": 6.045207977294922, - "loss_rtd": 0.2031872421503067, - "loss_sent": 0.16494791209697723, - "loss_sod": 0.007363510318100452, - "loss_total": 0.3754986524581909, - "step": 385599 - }, - { - "epoch": 0.0332, - "grad_norm": 0.6510876417160034, - "learning_rate": 2.086951269536841e-07, - "loss": 0.4093, - "step": 385600 - }, - { - "epoch": 0.033398, - "loss_gen": 5.804920673370361, - "loss_rtd": 0.2171359807252884, - "loss_sent": 0.40684863924980164, - "loss_sod": 0.036228228360414505, - "loss_total": 0.6602128744125366, - "step": 385699 - }, - { - "epoch": 0.033398, - "loss_gen": 5.90587854385376, - "loss_rtd": 0.20843030512332916, - "loss_sent": 0.33964505791664124, - "loss_sod": 0.02735271118581295, - "loss_total": 0.5754280686378479, - "step": 385699 - }, - { - "epoch": 0.0334, - "grad_norm": 1.999657154083252, - "learning_rate": 2.0580883087586434e-07, - "loss": 0.4108, - "step": 385700 - }, - { - "epoch": 0.033598, - "loss_gen": 5.4843058586120605, - "loss_rtd": 0.18094661831855774, - "loss_sent": 0.04104515165090561, - "loss_sod": 0.06437574326992035, - "loss_total": 0.2863675057888031, - "step": 385799 - }, - { - "epoch": 0.033598, - "loss_gen": 5.223611831665039, - "loss_rtd": 0.14001010358333588, - "loss_sent": 3.558278694981709e-05, - "loss_sod": 0.06727577745914459, - "loss_total": 0.20732146501541138, - "step": 385799 - }, - { - "epoch": 0.0336, - "grad_norm": 0.6759509444236755, - "learning_rate": 2.0294259189229669e-07, - "loss": 0.4256, - "step": 385800 - }, - { - "epoch": 0.033798, - "loss_gen": 5.40152645111084, - "loss_rtd": 0.15343868732452393, - "loss_sent": 0.027872242033481598, - "loss_sod": 0.17344465851783752, - "loss_total": 0.35475558042526245, - "step": 385899 - }, - { - "epoch": 0.033798, - "loss_gen": 5.650938510894775, - "loss_rtd": 0.17356042563915253, - "loss_sent": 0.04674892500042915, - "loss_sod": 0.2110554277896881, - "loss_total": 0.4313647747039795, - "step": 385899 - }, - { - "epoch": 0.0338, - "grad_norm": 1.336422324180603, - "learning_rate": 2.0009641115749657e-07, - "loss": 0.4086, - "step": 385900 - }, - { - "epoch": 0.033998, - "loss_gen": 5.942296981811523, - "loss_rtd": 0.20877361297607422, - "loss_sent": 0.2556697726249695, - "loss_sod": 0.040888089686632156, - "loss_total": 0.5053314566612244, - "step": 385999 - }, - { - "epoch": 0.033998, - "loss_gen": 5.3268656730651855, - "loss_rtd": 0.2024465650320053, - "loss_sent": 0.01012949924916029, - "loss_sod": 0.11086844652891159, - "loss_total": 0.32344451546669006, - "step": 385999 - }, - { - "epoch": 0.034, - "grad_norm": 0.8952791690826416, - "learning_rate": 1.972702898179024e-07, - "loss": 0.4115, - "step": 386000 - }, - { - "epoch": 0.034, - "eval_loss": 0.4023250937461853, - "eval_runtime": 150.0033, - "eval_samples_per_second": 102.951, - "eval_steps_per_second": 0.807, - "step": 386000 - }, - { - "epoch": 0.000198, - "loss_gen": 6.070064067840576, - "loss_rtd": 0.22699445486068726, - "loss_sent": 0.23134654760360718, - "loss_sod": 0.002963309409096837, - "loss_total": 0.46130430698394775, - "step": 386099 - }, - { - "epoch": 0.000198, - "loss_gen": 5.610538482666016, - "loss_rtd": 0.2264588475227356, - "loss_sent": 0.16599753499031067, - "loss_sod": 0.05153141915798187, - "loss_total": 0.44398781657218933, - "step": 386099 - }, - { - "epoch": 0.0002, - "grad_norm": 1.428983449935913, - "learning_rate": 1.94464229011887e-07, - "loss": 0.4189, - "step": 386100 - }, - { - "epoch": 0.000398, - "loss_gen": 6.106116771697998, - "loss_rtd": 0.21767903864383698, - "loss_sent": 0.11991839110851288, - "loss_sod": 0.05915965884923935, - "loss_total": 0.3967570662498474, - "step": 386199 - }, - { - "epoch": 0.000398, - "loss_gen": 5.915314674377441, - "loss_rtd": 0.20738781988620758, - "loss_sent": 0.1467045545578003, - "loss_sod": 0.01918071135878563, - "loss_total": 0.3732730746269226, - "step": 386199 - }, - { - "epoch": 0.0004, - "grad_norm": 1.726487398147583, - "learning_rate": 1.9167822986971838e-07, - "loss": 0.418, - "step": 386200 - }, - { - "epoch": 0.000598, - "loss_gen": 5.798270225524902, - "loss_rtd": 0.2067991942167282, - "loss_sent": 0.05629365146160126, - "loss_sod": 0.005625860765576363, - "loss_total": 0.2687187194824219, - "step": 386299 - }, - { - "epoch": 0.000598, - "loss_gen": 5.905458927154541, - "loss_rtd": 0.22061729431152344, - "loss_sent": 0.4025327265262604, - "loss_sod": 0.11184671521186829, - "loss_total": 0.7349967360496521, - "step": 386299 - }, - { - "epoch": 0.0006, - "grad_norm": 1.7462387084960938, - "learning_rate": 1.889122935136045e-07, - "loss": 0.4205, - "step": 386300 - }, - { - "epoch": 0.000798, - "loss_gen": 5.854974746704102, - "loss_rtd": 0.2143116444349289, - "loss_sent": 0.21600039303302765, - "loss_sod": 0.030118010938167572, - "loss_total": 0.4604300260543823, - "step": 386399 - }, - { - "epoch": 0.000798, - "loss_gen": 5.925917148590088, - "loss_rtd": 0.2109098732471466, - "loss_sent": 0.24010999500751495, - "loss_sod": 0.03709014505147934, - "loss_total": 0.4881100058555603, - "step": 386399 - }, - { - "epoch": 0.0008, - "grad_norm": 1.3110747337341309, - "learning_rate": 1.861664210576597e-07, - "loss": 0.4123, - "step": 386400 - }, - { - "epoch": 0.000998, - "loss_gen": 5.94522762298584, - "loss_rtd": 0.2248603105545044, - "loss_sent": 0.15762391686439514, - "loss_sod": 0.026143062859773636, - "loss_total": 0.40862730145454407, - "step": 386499 - }, - { - "epoch": 0.000998, - "loss_gen": 5.8175578117370605, - "loss_rtd": 0.20158784091472626, - "loss_sent": 0.11587843298912048, - "loss_sod": 0.08866304159164429, - "loss_total": 0.4061293303966522, - "step": 386499 - }, - { - "epoch": 0.001, - "grad_norm": 1.1315805912017822, - "learning_rate": 1.8344061360792696e-07, - "loss": 0.4193, - "step": 386500 - }, - { - "epoch": 0.001198, - "loss_gen": 5.619412422180176, - "loss_rtd": 0.20225612819194794, - "loss_sent": 0.432159423828125, - "loss_sod": 0.10655496269464493, - "loss_total": 0.7409704923629761, - "step": 386599 - }, - { - "epoch": 0.001198, - "loss_gen": 5.699313640594482, - "loss_rtd": 0.1926557570695877, - "loss_sent": 0.09600414335727692, - "loss_sod": 0.01603185385465622, - "loss_total": 0.30469173192977905, - "step": 386599 - }, - { - "epoch": 0.0012, - "grad_norm": 2.7344155311584473, - "learning_rate": 1.8073487226235585e-07, - "loss": 0.4037, - "step": 386600 - }, - { - "epoch": 0.001398, - "loss_gen": 5.58875036239624, - "loss_rtd": 0.21671243011951447, - "loss_sent": 0.05101096257567406, - "loss_sod": 0.03717564418911934, - "loss_total": 0.30489903688430786, - "step": 386699 - }, - { - "epoch": 0.001398, - "loss_gen": 5.969257354736328, - "loss_rtd": 0.19849984347820282, - "loss_sent": 0.24645492434501648, - "loss_sod": 0.03874385356903076, - "loss_total": 0.48369860649108887, - "step": 386699 - }, - { - "epoch": 0.0014, - "grad_norm": 0.7487657070159912, - "learning_rate": 1.78049198110819e-07, - "loss": 0.4166, - "step": 386700 - }, - { - "epoch": 0.001598, - "loss_gen": 5.676285266876221, - "loss_rtd": 0.18517841398715973, - "loss_sent": 0.10563670098781586, - "loss_sod": 0.0461493618786335, - "loss_total": 0.33696448802948, - "step": 386799 - }, - { - "epoch": 0.001598, - "loss_gen": 6.1383209228515625, - "loss_rtd": 0.21085192263126373, - "loss_sent": 0.15274006128311157, - "loss_sod": 0.036854542791843414, - "loss_total": 0.4004465341567993, - "step": 386799 - }, - { - "epoch": 0.0016, - "grad_norm": 0.7479475736618042, - "learning_rate": 1.7538359223510657e-07, - "loss": 0.4032, - "step": 386800 - }, - { - "epoch": 0.001798, - "loss_gen": 5.616851806640625, - "loss_rtd": 0.18981054425239563, - "loss_sent": 0.0009357736562378705, - "loss_sod": 0.11195822805166245, - "loss_total": 0.3027045428752899, - "step": 386899 - }, - { - "epoch": 0.001798, - "loss_gen": 5.281965255737305, - "loss_rtd": 0.15668818354606628, - "loss_sent": 0.14297324419021606, - "loss_sod": 0.019447026774287224, - "loss_total": 0.3191084563732147, - "step": 386899 - }, - { - "epoch": 0.0018, - "grad_norm": 0.8839295506477356, - "learning_rate": 1.7273805570892643e-07, - "loss": 0.4094, - "step": 386900 - }, - { - "epoch": 0.001998, - "loss_gen": 5.872824668884277, - "loss_rtd": 0.20653888583183289, - "loss_sent": 0.06045421585440636, - "loss_sod": 0.060713499784469604, - "loss_total": 0.32770660519599915, - "step": 386999 - }, - { - "epoch": 0.001998, - "loss_gen": 5.871523380279541, - "loss_rtd": 0.21357491612434387, - "loss_sent": 0.16598744690418243, - "loss_sod": 0.0067842514254152775, - "loss_total": 0.38634660840034485, - "step": 386999 - }, - { - "epoch": 0.002, - "grad_norm": 0.70506352186203, - "learning_rate": 1.7011258959789832e-07, - "loss": 0.4138, - "step": 387000 - }, - { - "epoch": 0.002, - "eval_loss": 0.38906314969062805, - "eval_runtime": 152.8094, - "eval_samples_per_second": 101.061, - "eval_steps_per_second": 0.792, - "step": 387000 - }, - { - "epoch": 0.002198, - "loss_gen": 5.941445827484131, - "loss_rtd": 0.2205839902162552, - "loss_sent": 0.24621763825416565, - "loss_sod": 0.03623174503445625, - "loss_total": 0.5030333995819092, - "step": 387099 - }, - { - "epoch": 0.002198, - "loss_gen": 5.913673400878906, - "loss_rtd": 0.1947707235813141, - "loss_sent": 0.28452837467193604, - "loss_sod": 0.04408812150359154, - "loss_total": 0.5233871936798096, - "step": 387099 - }, - { - "epoch": 0.0022, - "grad_norm": 0.8668487071990967, - "learning_rate": 1.675071949595597e-07, - "loss": 0.4297, - "step": 387100 - }, - { - "epoch": 0.002398, - "loss_gen": 5.786038398742676, - "loss_rtd": 0.23068787157535553, - "loss_sent": 0.16383348405361176, - "loss_sod": 0.0026258546859025955, - "loss_total": 0.39714720845222473, - "step": 387199 - }, - { - "epoch": 0.002398, - "loss_gen": 5.983335971832275, - "loss_rtd": 0.21985170245170593, - "loss_sent": 0.2635365426540375, - "loss_sod": 0.07929453253746033, - "loss_total": 0.5626827478408813, - "step": 387199 - }, - { - "epoch": 0.0024, - "grad_norm": 1.0871856212615967, - "learning_rate": 1.6492187284336546e-07, - "loss": 0.4306, - "step": 387200 - }, - { - "epoch": 0.002598, - "loss_gen": 6.006869316101074, - "loss_rtd": 0.19651633501052856, - "loss_sent": 0.11407383531332016, - "loss_sod": 0.06965118646621704, - "loss_total": 0.38024136424064636, - "step": 387299 - }, - { - "epoch": 0.002598, - "loss_gen": 5.749596118927002, - "loss_rtd": 0.2103724628686905, - "loss_sent": 0.0785311684012413, - "loss_sod": 0.02853006310760975, - "loss_total": 0.3174336850643158, - "step": 387299 - }, - { - "epoch": 0.0026, - "grad_norm": 1.018624186515808, - "learning_rate": 1.6235662429068266e-07, - "loss": 0.435, - "step": 387300 - }, - { - "epoch": 0.002798, - "loss_gen": 6.01922082901001, - "loss_rtd": 0.1941412389278412, - "loss_sent": 0.15151628851890564, - "loss_sod": 0.04446466267108917, - "loss_total": 0.3901221752166748, - "step": 387399 - }, - { - "epoch": 0.002798, - "loss_gen": 6.114126682281494, - "loss_rtd": 0.2113575041294098, - "loss_sent": 0.1491912603378296, - "loss_sod": 0.02346138283610344, - "loss_total": 0.3840101361274719, - "step": 387399 - }, - { - "epoch": 0.0028, - "grad_norm": 0.7707430124282837, - "learning_rate": 1.5981145033479027e-07, - "loss": 0.4003, - "step": 387400 - }, - { - "epoch": 0.002998, - "loss_gen": 5.946076393127441, - "loss_rtd": 0.184996098279953, - "loss_sent": 0.534684419631958, - "loss_sod": 0.013645555824041367, - "loss_total": 0.7333260774612427, - "step": 387499 - }, - { - "epoch": 0.002998, - "loss_gen": 5.926239490509033, - "loss_rtd": 0.2150571346282959, - "loss_sent": 0.20656444132328033, - "loss_sod": 0.12248778343200684, - "loss_total": 0.5441093444824219, - "step": 387499 - }, - { - "epoch": 0.003, - "grad_norm": 1.3904284238815308, - "learning_rate": 1.5728635200087938e-07, - "loss": 0.4064, - "step": 387500 - }, - { - "epoch": 0.003198, - "loss_gen": 5.78752326965332, - "loss_rtd": 0.1943551003932953, - "loss_sent": 0.07164817303419113, - "loss_sod": 0.03743426129221916, - "loss_total": 0.3034375309944153, - "step": 387599 - }, - { - "epoch": 0.003198, - "loss_gen": 5.449794292449951, - "loss_rtd": 0.1654985547065735, - "loss_sent": 0.016390370205044746, - "loss_sod": 0.1290266364812851, - "loss_total": 0.31091558933258057, - "step": 387599 - }, - { - "epoch": 0.0032, - "grad_norm": 0.9882694482803345, - "learning_rate": 1.5478133030607522e-07, - "loss": 0.4017, - "step": 387600 - }, - { - "epoch": 0.003398, - "loss_gen": 5.94898796081543, - "loss_rtd": 0.21364334225654602, - "loss_sent": 0.15191717445850372, - "loss_sod": 0.05954572185873985, - "loss_total": 0.4251062273979187, - "step": 387699 - }, - { - "epoch": 0.003398, - "loss_gen": 5.825323104858398, - "loss_rtd": 0.20545744895935059, - "loss_sent": 0.642839789390564, - "loss_sod": 0.12043068557977676, - "loss_total": 0.9687279462814331, - "step": 387699 - }, - { - "epoch": 0.0034, - "grad_norm": 2.926527500152588, - "learning_rate": 1.5229638625938182e-07, - "loss": 0.4243, - "step": 387700 - }, - { - "epoch": 0.003598, - "loss_gen": 6.00454044342041, - "loss_rtd": 0.21406090259552002, - "loss_sent": 0.28503379225730896, - "loss_sod": 0.08366947621107101, - "loss_total": 0.5827641487121582, - "step": 387799 - }, - { - "epoch": 0.003598, - "loss_gen": 5.878026962280273, - "loss_rtd": 0.20210398733615875, - "loss_sent": 0.3536505401134491, - "loss_sod": 0.005776241421699524, - "loss_total": 0.5615307688713074, - "step": 387799 - }, - { - "epoch": 0.0036, - "grad_norm": 1.3980095386505127, - "learning_rate": 1.4983152086174844e-07, - "loss": 0.4275, - "step": 387800 - }, - { - "epoch": 0.003798, - "loss_gen": 5.576259613037109, - "loss_rtd": 0.1753866970539093, - "loss_sent": 0.0556168295443058, - "loss_sod": 0.012317564338445663, - "loss_total": 0.24332109093666077, - "step": 387899 - }, - { - "epoch": 0.003798, - "loss_gen": 5.643679618835449, - "loss_rtd": 0.17319001257419586, - "loss_sent": 0.030518053099513054, - "loss_sod": 0.022975772619247437, - "loss_total": 0.2266838401556015, - "step": 387899 - }, - { - "epoch": 0.0038, - "grad_norm": 0.5258404016494751, - "learning_rate": 1.473867351060143e-07, - "loss": 0.4209, - "step": 387900 - }, - { - "epoch": 0.003998, - "loss_gen": 5.597390651702881, - "loss_rtd": 0.20969554781913757, - "loss_sent": 0.24825482070446014, - "loss_sod": 0.05265036225318909, - "loss_total": 0.510600745677948, - "step": 387999 - }, - { - "epoch": 0.003998, - "loss_gen": 5.971643447875977, - "loss_rtd": 0.20426103472709656, - "loss_sent": 0.09622426331043243, - "loss_sod": 0.05200056731700897, - "loss_total": 0.3524858355522156, - "step": 387999 - }, - { - "epoch": 0.004, - "grad_norm": 1.4008424282073975, - "learning_rate": 1.4496202997694165e-07, - "loss": 0.4163, - "step": 388000 - }, - { - "epoch": 0.004, - "eval_loss": 0.40085962414741516, - "eval_runtime": 149.9182, - "eval_samples_per_second": 103.009, - "eval_steps_per_second": 0.807, - "step": 388000 - }, - { - "epoch": 0.004198, - "loss_gen": 5.842921733856201, - "loss_rtd": 0.1869976669549942, - "loss_sent": 0.1804313212633133, - "loss_sod": 0.013124816119670868, - "loss_total": 0.3805537819862366, - "step": 388099 - }, - { - "epoch": 0.004198, - "loss_gen": 5.944189548492432, - "loss_rtd": 0.21024784445762634, - "loss_sent": 0.3657797873020172, - "loss_sod": 0.13275648653507233, - "loss_total": 0.7087841033935547, - "step": 388099 - }, - { - "epoch": 0.0042, - "grad_norm": 1.804450511932373, - "learning_rate": 1.4255740645120475e-07, - "loss": 0.4081, - "step": 388100 - }, - { - "epoch": 0.004398, - "loss_gen": 5.947266101837158, - "loss_rtd": 0.200907364487648, - "loss_sent": 0.24104642868041992, - "loss_sod": 0.0234132781624794, - "loss_total": 0.46536707878112793, - "step": 388199 - }, - { - "epoch": 0.004398, - "loss_gen": 6.066715717315674, - "loss_rtd": 0.22250112891197205, - "loss_sent": 0.476871520280838, - "loss_sod": 0.01108524575829506, - "loss_total": 0.7104579210281372, - "step": 388199 - }, - { - "epoch": 0.0044, - "grad_norm": 1.4031575918197632, - "learning_rate": 1.4017286549737884e-07, - "loss": 0.4126, - "step": 388200 - }, - { - "epoch": 0.004598, - "loss_gen": 5.766085147857666, - "loss_rtd": 0.22764943540096283, - "loss_sent": 0.2466544657945633, - "loss_sod": 0.04013913869857788, - "loss_total": 0.514443039894104, - "step": 388299 - }, - { - "epoch": 0.004598, - "loss_gen": 5.9625654220581055, - "loss_rtd": 0.1829644739627838, - "loss_sent": 0.2364800125360489, - "loss_sod": 0.0007151039317250252, - "loss_total": 0.42015957832336426, - "step": 388299 - }, - { - "epoch": 0.0046, - "grad_norm": 0.9459004402160645, - "learning_rate": 1.378084080759623e-07, - "loss": 0.4243, - "step": 388300 - }, - { - "epoch": 0.004798, - "loss_gen": 5.781798362731934, - "loss_rtd": 0.20330537855625153, - "loss_sent": 0.18447181582450867, - "loss_sod": 0.06919562816619873, - "loss_total": 0.45697280764579773, - "step": 388399 - }, - { - "epoch": 0.004798, - "loss_gen": 6.023786544799805, - "loss_rtd": 0.21240007877349854, - "loss_sent": 0.14837747812271118, - "loss_sod": 0.09386526793241501, - "loss_total": 0.4546428322792053, - "step": 388399 - }, - { - "epoch": 0.0048, - "grad_norm": 1.1525017023086548, - "learning_rate": 1.354640351393488e-07, - "loss": 0.4147, - "step": 388400 - }, - { - "epoch": 0.004998, - "loss_gen": 5.607063293457031, - "loss_rtd": 0.1774669736623764, - "loss_sent": 0.19217942655086517, - "loss_sod": 0.03440697491168976, - "loss_total": 0.40405339002609253, - "step": 388499 - }, - { - "epoch": 0.004998, - "loss_gen": 5.909639835357666, - "loss_rtd": 0.20952454209327698, - "loss_sent": 0.10948611050844193, - "loss_sod": 0.04632039740681648, - "loss_total": 0.3653310537338257, - "step": 388499 - }, - { - "epoch": 0.005, - "grad_norm": 0.8776269555091858, - "learning_rate": 1.3313974763186632e-07, - "loss": 0.4087, - "step": 388500 - }, - { - "epoch": 0.005198, - "loss_gen": 5.467298984527588, - "loss_rtd": 0.17283503711223602, - "loss_sent": 0.006112792529165745, - "loss_sod": 0.07768760621547699, - "loss_total": 0.2566354274749756, - "step": 388599 - }, - { - "epoch": 0.005198, - "loss_gen": 5.354569911956787, - "loss_rtd": 0.16272178292274475, - "loss_sent": 0.07872873544692993, - "loss_sod": 0.08613304793834686, - "loss_total": 0.32758355140686035, - "step": 388599 - }, - { - "epoch": 0.0052, - "grad_norm": 1.0944674015045166, - "learning_rate": 1.3083554648972707e-07, - "loss": 0.432, - "step": 388600 - }, - { - "epoch": 0.005398, - "loss_gen": 5.879866600036621, - "loss_rtd": 0.22599825263023376, - "loss_sent": 0.4684654772281647, - "loss_sod": 0.02286195382475853, - "loss_total": 0.7173256874084473, - "step": 388699 - }, - { - "epoch": 0.005398, - "loss_gen": 6.004671573638916, - "loss_rtd": 0.20385044813156128, - "loss_sent": 0.20219197869300842, - "loss_sod": 0.02373768761754036, - "loss_total": 0.42978012561798096, - "step": 388699 - }, - { - "epoch": 0.0054, - "grad_norm": 2.1442813873291016, - "learning_rate": 1.285514326410664e-07, - "loss": 0.4185, - "step": 388700 - }, - { - "epoch": 0.005598, - "loss_gen": 5.789896488189697, - "loss_rtd": 0.2033519148826599, - "loss_sent": 0.24579353630542755, - "loss_sod": 0.014285017736256123, - "loss_total": 0.4634304642677307, - "step": 388799 - }, - { - "epoch": 0.005598, - "loss_gen": 5.791206359863281, - "loss_rtd": 0.2188941091299057, - "loss_sent": 0.37389636039733887, - "loss_sod": 0.018224112689495087, - "loss_total": 0.6110146045684814, - "step": 388799 - }, - { - "epoch": 0.0056, - "grad_norm": 1.1618871688842773, - "learning_rate": 1.2628740700592613e-07, - "loss": 0.429, - "step": 388800 - }, - { - "epoch": 0.005798, - "loss_gen": 5.514381408691406, - "loss_rtd": 0.22766588628292084, - "loss_sent": 0.10298759490251541, - "loss_sod": 0.004655986092984676, - "loss_total": 0.3353094756603241, - "step": 388899 - }, - { - "epoch": 0.005798, - "loss_gen": 6.139984130859375, - "loss_rtd": 0.19265694916248322, - "loss_sent": 0.3504784405231476, - "loss_sod": 0.017564527690410614, - "loss_total": 0.5606999397277832, - "step": 388899 - }, - { - "epoch": 0.0058, - "grad_norm": 0.9985604882240295, - "learning_rate": 1.2404347049625453e-07, - "loss": 0.4122, - "step": 388900 - }, - { - "epoch": 0.005998, - "loss_gen": 6.484412670135498, - "loss_rtd": 0.2049032300710678, - "loss_sent": 0.12536132335662842, - "loss_sod": 0.11522291600704193, - "loss_total": 0.44548746943473816, - "step": 388999 - }, - { - "epoch": 0.005998, - "loss_gen": 5.710115909576416, - "loss_rtd": 0.21021217107772827, - "loss_sent": 0.11833181977272034, - "loss_sod": 0.06307626515626907, - "loss_total": 0.3916202485561371, - "step": 388999 - }, - { - "epoch": 0.006, - "grad_norm": 0.8251280784606934, - "learning_rate": 1.218196240159064e-07, - "loss": 0.4118, - "step": 389000 - }, - { - "epoch": 0.006, - "eval_loss": 0.3989259898662567, - "eval_runtime": 151.4889, - "eval_samples_per_second": 101.941, - "eval_steps_per_second": 0.799, - "step": 389000 - }, - { - "epoch": 0.006198, - "loss_gen": 5.922330856323242, - "loss_rtd": 0.20675304532051086, - "loss_sent": 0.2864395081996918, - "loss_sod": 0.04010416567325592, - "loss_total": 0.5332967042922974, - "step": 389099 - }, - { - "epoch": 0.006198, - "loss_gen": 5.8388824462890625, - "loss_rtd": 0.22652123868465424, - "loss_sent": 0.16067945957183838, - "loss_sod": 0.005014869384467602, - "loss_total": 0.3922155499458313, - "step": 389099 - }, - { - "epoch": 0.0062, - "grad_norm": 0.7238194942474365, - "learning_rate": 1.1961586846064855e-07, - "loss": 0.4005, - "step": 389100 - }, - { - "epoch": 0.006398, - "loss_gen": 5.659124851226807, - "loss_rtd": 0.18338139355182648, - "loss_sent": 0.030780743807554245, - "loss_sod": 0.12981653213500977, - "loss_total": 0.3439786434173584, - "step": 389199 - }, - { - "epoch": 0.006398, - "loss_gen": 6.135606288909912, - "loss_rtd": 0.2033698856830597, - "loss_sent": 0.1476442962884903, - "loss_sod": 0.08842173963785172, - "loss_total": 0.4394358992576599, - "step": 389199 - }, - { - "epoch": 0.0064, - "grad_norm": 1.0673482418060303, - "learning_rate": 1.174322047181542e-07, - "loss": 0.3953, - "step": 389200 - }, - { - "epoch": 0.006598, - "loss_gen": 5.762836933135986, - "loss_rtd": 0.18234577775001526, - "loss_sent": 0.1359056979417801, - "loss_sod": 0.07956341654062271, - "loss_total": 0.39781486988067627, - "step": 389299 - }, - { - "epoch": 0.006598, - "loss_gen": 5.6690497398376465, - "loss_rtd": 0.21876993775367737, - "loss_sent": 0.15573304891586304, - "loss_sod": 0.03257669508457184, - "loss_total": 0.40707969665527344, - "step": 389299 - }, - { - "epoch": 0.0066, - "grad_norm": 1.1443850994110107, - "learning_rate": 1.1526863366800311e-07, - "loss": 0.4184, - "step": 389300 - }, - { - "epoch": 0.006798, - "loss_gen": 5.593442916870117, - "loss_rtd": 0.16900567710399628, - "loss_sent": 2.23645238293102e-05, - "loss_sod": 0.13190466165542603, - "loss_total": 0.30093270540237427, - "step": 389399 - }, - { - "epoch": 0.006798, - "loss_gen": 5.429333209991455, - "loss_rtd": 0.16373611986637115, - "loss_sent": 2.240178218926303e-05, - "loss_sod": 0.07734975218772888, - "loss_total": 0.2411082684993744, - "step": 389399 - }, - { - "epoch": 0.0068, - "grad_norm": 0.9363405108451843, - "learning_rate": 1.1312515618168151e-07, - "loss": 0.4081, - "step": 389400 - }, - { - "epoch": 0.006998, - "loss_gen": 5.979067802429199, - "loss_rtd": 0.21712931990623474, - "loss_sent": 0.09526192396879196, - "loss_sod": 0.10427667945623398, - "loss_total": 0.4166679382324219, - "step": 389499 - }, - { - "epoch": 0.006998, - "loss_gen": 5.794955730438232, - "loss_rtd": 0.19414635002613068, - "loss_sent": 0.4644427001476288, - "loss_sod": 0.1854114681482315, - "loss_total": 0.844000518321991, - "step": 389499 - }, - { - "epoch": 0.007, - "grad_norm": 1.9044196605682373, - "learning_rate": 1.1100177312258209e-07, - "loss": 0.4137, - "step": 389500 - }, - { - "epoch": 0.007198, - "loss_gen": 6.104918479919434, - "loss_rtd": 0.20259903371334076, - "loss_sent": 0.13526050746440887, - "loss_sod": 0.012019608169794083, - "loss_total": 0.3498791456222534, - "step": 389599 - }, - { - "epoch": 0.007198, - "loss_gen": 5.950196743011475, - "loss_rtd": 0.22066274285316467, - "loss_sent": 0.17763370275497437, - "loss_sod": 0.008551598526537418, - "loss_total": 0.4068480432033539, - "step": 389599 - }, - { - "epoch": 0.0072, - "grad_norm": 0.5089385509490967, - "learning_rate": 1.0889848534599845e-07, - "loss": 0.4315, - "step": 389600 - }, - { - "epoch": 0.007398, - "loss_gen": 5.721614360809326, - "loss_rtd": 0.21245476603507996, - "loss_sent": 0.2959735095500946, - "loss_sod": 0.014893703162670135, - "loss_total": 0.5233219861984253, - "step": 389699 - }, - { - "epoch": 0.007398, - "loss_gen": 5.780533790588379, - "loss_rtd": 0.20026029646396637, - "loss_sent": 0.20466534793376923, - "loss_sod": 0.010142795741558075, - "loss_total": 0.41506844758987427, - "step": 389699 - }, - { - "epoch": 0.0074, - "grad_norm": 1.5230454206466675, - "learning_rate": 1.0681529369914179e-07, - "loss": 0.4087, - "step": 389700 - }, - { - "epoch": 0.007598, - "loss_gen": 6.072732448577881, - "loss_rtd": 0.21685031056404114, - "loss_sent": 0.16334985196590424, - "loss_sod": 0.018847428262233734, - "loss_total": 0.3990476131439209, - "step": 389799 - }, - { - "epoch": 0.007598, - "loss_gen": 5.786645889282227, - "loss_rtd": 0.18284183740615845, - "loss_sent": 0.20970797538757324, - "loss_sod": 0.011413590982556343, - "loss_total": 0.4039633870124817, - "step": 389799 - }, - { - "epoch": 0.0076, - "grad_norm": 0.8005570769309998, - "learning_rate": 1.0475219902111311e-07, - "loss": 0.4078, - "step": 389800 - }, - { - "epoch": 0.007798, - "loss_gen": 5.708725929260254, - "loss_rtd": 0.19107985496520996, - "loss_sent": 0.46121594309806824, - "loss_sod": 0.007444228045642376, - "loss_total": 0.6597400307655334, - "step": 389899 - }, - { - "epoch": 0.007798, - "loss_gen": 5.856007099151611, - "loss_rtd": 0.20543049275875092, - "loss_sent": 0.16584698855876923, - "loss_sod": 0.008434868417680264, - "loss_total": 0.3797123432159424, - "step": 389899 - }, - { - "epoch": 0.0078, - "grad_norm": 1.8368874788284302, - "learning_rate": 1.0270920214293656e-07, - "loss": 0.3981, - "step": 389900 - }, - { - "epoch": 0.007998, - "loss_gen": 5.760108947753906, - "loss_rtd": 0.2410728484392166, - "loss_sent": 0.060165468603372574, - "loss_sod": 0.015030169859528542, - "loss_total": 0.3162684738636017, - "step": 389999 - }, - { - "epoch": 0.007998, - "loss_gen": 6.092087268829346, - "loss_rtd": 0.18792934715747833, - "loss_sent": 0.1581990271806717, - "loss_sod": 0.020702190697193146, - "loss_total": 0.36683058738708496, - "step": 389999 - }, - { - "epoch": 0.008, - "grad_norm": 0.7414451837539673, - "learning_rate": 1.0068630388752609e-07, - "loss": 0.4129, - "step": 390000 - }, - { - "epoch": 0.008, - "eval_loss": 0.39498767256736755, - "eval_runtime": 149.8397, - "eval_samples_per_second": 103.063, - "eval_steps_per_second": 0.808, - "step": 390000 - }, - { - "epoch": 0.008198, - "loss_gen": 5.884185791015625, - "loss_rtd": 0.20744697749614716, - "loss_sent": 0.19164077937602997, - "loss_sod": 0.01973988302052021, - "loss_total": 0.4188276529312134, - "step": 390099 - }, - { - "epoch": 0.008198, - "loss_gen": 5.681702613830566, - "loss_rtd": 0.22216004133224487, - "loss_sent": 0.12108003348112106, - "loss_sod": 0.06146732717752457, - "loss_total": 0.4047074019908905, - "step": 390099 - }, - { - "epoch": 0.0082, - "grad_norm": 1.1543318033218384, - "learning_rate": 9.868350506970215e-08, - "loss": 0.4175, - "step": 390100 - }, - { - "epoch": 0.008398, - "loss_gen": 5.980822563171387, - "loss_rtd": 0.2021224945783615, - "loss_sent": 0.3701927363872528, - "loss_sod": 0.01370286662131548, - "loss_total": 0.5860180854797363, - "step": 390199 - }, - { - "epoch": 0.008398, - "loss_gen": 5.838639736175537, - "loss_rtd": 0.21277514100074768, - "loss_sent": 0.3194020688533783, - "loss_sod": 0.022717181593179703, - "loss_total": 0.5548943877220154, - "step": 390199 - }, - { - "epoch": 0.0084, - "grad_norm": 1.0459808111190796, - "learning_rate": 9.670080649619717e-08, - "loss": 0.4047, - "step": 390200 - }, - { - "epoch": 0.008598, - "loss_gen": 5.3288044929504395, - "loss_rtd": 0.17446598410606384, - "loss_sent": 2.305740963493008e-05, - "loss_sod": 0.059867922216653824, - "loss_total": 0.23435695469379425, - "step": 390299 - }, - { - "epoch": 0.008598, - "loss_gen": 5.603953838348389, - "loss_rtd": 0.1641068011522293, - "loss_sent": 0.09302457422018051, - "loss_sod": 0.07938657701015472, - "loss_total": 0.33651795983314514, - "step": 390299 - }, - { - "epoch": 0.0086, - "grad_norm": 0.8998137712478638, - "learning_rate": 9.473820896564456e-08, - "loss": 0.427, - "step": 390300 - }, - { - "epoch": 0.008798, - "loss_gen": 5.830483913421631, - "loss_rtd": 0.2241693139076233, - "loss_sent": 0.3471631407737732, - "loss_sod": 0.005685629788786173, - "loss_total": 0.5770180821418762, - "step": 390399 - }, - { - "epoch": 0.008798, - "loss_gen": 6.0032854080200195, - "loss_rtd": 0.22199895977973938, - "loss_sent": 0.20922960340976715, - "loss_sod": 0.0637601763010025, - "loss_total": 0.49498873949050903, - "step": 390399 - }, - { - "epoch": 0.0088, - "grad_norm": 1.4937852621078491, - "learning_rate": 9.27957132685675e-08, - "loss": 0.3951, - "step": 390400 - }, - { - "epoch": 0.008998, - "loss_gen": 6.0157470703125, - "loss_rtd": 0.2011171281337738, - "loss_sent": 0.2135574072599411, - "loss_sod": 0.003952609375119209, - "loss_total": 0.41862714290618896, - "step": 390499 - }, - { - "epoch": 0.008998, - "loss_gen": 6.025473117828369, - "loss_rtd": 0.2084123194217682, - "loss_sent": 0.30119219422340393, - "loss_sod": 0.012695659883320332, - "loss_total": 0.5223001837730408, - "step": 390499 - }, - { - "epoch": 0.009, - "grad_norm": 0.8088017702102661, - "learning_rate": 9.08733201874068e-08, - "loss": 0.4175, - "step": 390500 - }, - { - "epoch": 0.009198, - "loss_gen": 5.977158546447754, - "loss_rtd": 0.20262813568115234, - "loss_sent": 0.2567712962627411, - "loss_sod": 0.06708398461341858, - "loss_total": 0.526483416557312, - "step": 390599 - }, - { - "epoch": 0.009198, - "loss_gen": 5.823272705078125, - "loss_rtd": 0.21129167079925537, - "loss_sent": 0.08319632709026337, - "loss_sod": 0.1491374969482422, - "loss_total": 0.4436255097389221, - "step": 390599 - }, - { - "epoch": 0.0092, - "grad_norm": 1.7465977668762207, - "learning_rate": 8.897103049650412e-08, - "loss": 0.3949, - "step": 390600 - }, - { - "epoch": 0.009398, - "loss_gen": 5.868819236755371, - "loss_rtd": 0.214304119348526, - "loss_sent": 0.14899778366088867, - "loss_sod": 0.02877265214920044, - "loss_total": 0.3920745551586151, - "step": 390699 - }, - { - "epoch": 0.009398, - "loss_gen": 5.775680065155029, - "loss_rtd": 0.23000310361385345, - "loss_sent": 0.06425362825393677, - "loss_sod": 0.10222698003053665, - "loss_total": 0.39648371934890747, - "step": 390699 - }, - { - "epoch": 0.0094, - "grad_norm": 1.050985336303711, - "learning_rate": 8.708884496210768e-08, - "loss": 0.4117, - "step": 390700 - }, - { - "epoch": 0.009598, - "loss_gen": 5.170206069946289, - "loss_rtd": 0.17056401073932648, - "loss_sent": 2.2412956241169013e-05, - "loss_sod": 0.08258362114429474, - "loss_total": 0.25317004323005676, - "step": 390799 - }, - { - "epoch": 0.009598, - "loss_gen": 5.413459300994873, - "loss_rtd": 0.16320635378360748, - "loss_sent": 0.06688027083873749, - "loss_sod": 0.09646856784820557, - "loss_total": 0.32655519247055054, - "step": 390799 - }, - { - "epoch": 0.0096, - "grad_norm": 0.9087501168251038, - "learning_rate": 8.522676434234989e-08, - "loss": 0.4019, - "step": 390800 - }, - { - "epoch": 0.009798, - "loss_gen": 5.793584823608398, - "loss_rtd": 0.22636756300926208, - "loss_sent": 0.12923869490623474, - "loss_sod": 0.022730231285095215, - "loss_total": 0.37833648920059204, - "step": 390899 - }, - { - "epoch": 0.009798, - "loss_gen": 6.046942234039307, - "loss_rtd": 0.21500037610530853, - "loss_sent": 0.42974740266799927, - "loss_sod": 0.08335862308740616, - "loss_total": 0.7281063795089722, - "step": 390899 - }, - { - "epoch": 0.0098, - "grad_norm": 1.06930673122406, - "learning_rate": 8.338478938728634e-08, - "loss": 0.4044, - "step": 390900 - }, - { - "epoch": 0.009998, - "loss_gen": 6.144214153289795, - "loss_rtd": 0.20340508222579956, - "loss_sent": 0.08943892270326614, - "loss_sod": 0.03819578140974045, - "loss_total": 0.33103978633880615, - "step": 390999 - }, - { - "epoch": 0.009998, - "loss_gen": 5.959928512573242, - "loss_rtd": 0.2147907316684723, - "loss_sent": 0.08194345980882645, - "loss_sod": 0.08400901407003403, - "loss_total": 0.38074320554733276, - "step": 390999 - }, - { - "epoch": 0.01, - "grad_norm": 0.8810135722160339, - "learning_rate": 8.156292083885686e-08, - "loss": 0.4213, - "step": 391000 - }, - { - "epoch": 0.01, - "eval_loss": 0.389736145734787, - "eval_runtime": 150.3148, - "eval_samples_per_second": 102.738, - "eval_steps_per_second": 0.805, - "step": 391000 - }, - { - "epoch": 0.010198, - "loss_gen": 6.116897106170654, - "loss_rtd": 0.19888576865196228, - "loss_sent": 0.4463501274585724, - "loss_sod": 0.0318634957075119, - "loss_total": 0.6770994067192078, - "step": 391099 - }, - { - "epoch": 0.010198, - "loss_gen": 5.774707794189453, - "loss_rtd": 0.2125256359577179, - "loss_sent": 0.07201242446899414, - "loss_sod": 0.03535296395421028, - "loss_total": 0.3198910355567932, - "step": 391099 - }, - { - "epoch": 0.0102, - "grad_norm": 1.494367003440857, - "learning_rate": 7.976115943091334e-08, - "loss": 0.4085, - "step": 391100 - }, - { - "epoch": 0.010398, - "loss_gen": 5.748145580291748, - "loss_rtd": 0.2154005765914917, - "loss_sent": 0.6336947679519653, - "loss_sod": 0.028976723551750183, - "loss_total": 0.8780720829963684, - "step": 391199 - }, - { - "epoch": 0.010398, - "loss_gen": 6.1587748527526855, - "loss_rtd": 0.21844086050987244, - "loss_sent": 0.5092006921768188, - "loss_sod": 0.09236706048250198, - "loss_total": 0.8200086355209351, - "step": 391199 - }, - { - "epoch": 0.0104, - "grad_norm": 2.631178617477417, - "learning_rate": 7.797950588920855e-08, - "loss": 0.4198, - "step": 391200 - }, - { - "epoch": 0.010598, - "loss_gen": 5.9071364402771, - "loss_rtd": 0.21475382149219513, - "loss_sent": 0.10161326825618744, - "loss_sod": 0.10331739485263824, - "loss_total": 0.4196844696998596, - "step": 391299 - }, - { - "epoch": 0.010598, - "loss_gen": 5.329215049743652, - "loss_rtd": 0.17051318287849426, - "loss_sent": 0.00010216770169790834, - "loss_sod": 0.024984223768115044, - "loss_total": 0.19559957087039948, - "step": 391299 - }, - { - "epoch": 0.0106, - "grad_norm": 0.9734504222869873, - "learning_rate": 7.621796093138511e-08, - "loss": 0.4274, - "step": 391300 - }, - { - "epoch": 0.010798, - "loss_gen": 6.007766246795654, - "loss_rtd": 0.20674045383930206, - "loss_sent": 0.043503355234861374, - "loss_sod": 0.0852990448474884, - "loss_total": 0.33554285764694214, - "step": 391399 - }, - { - "epoch": 0.010798, - "loss_gen": 5.994940757751465, - "loss_rtd": 0.20298358798027039, - "loss_sent": 0.19987483322620392, - "loss_sod": 0.015618769451975822, - "loss_total": 0.4184771776199341, - "step": 391399 - }, - { - "epoch": 0.0108, - "grad_norm": 1.005513310432434, - "learning_rate": 7.447652526699766e-08, - "loss": 0.4158, - "step": 391400 - }, - { - "epoch": 0.010998, - "loss_gen": 5.739851951599121, - "loss_rtd": 0.20979925990104675, - "loss_sent": 0.16687257587909698, - "loss_sod": 0.02306627482175827, - "loss_total": 0.3997381329536438, - "step": 391499 - }, - { - "epoch": 0.010998, - "loss_gen": 6.118026256561279, - "loss_rtd": 0.22426855564117432, - "loss_sent": 0.2071017622947693, - "loss_sod": 0.040315523743629456, - "loss_total": 0.47168582677841187, - "step": 391499 - }, - { - "epoch": 0.011, - "grad_norm": 0.8095478415489197, - "learning_rate": 7.275519959749622e-08, - "loss": 0.4192, - "step": 391500 - }, - { - "epoch": 0.011198, - "loss_gen": 5.163768291473389, - "loss_rtd": 0.15256644785404205, - "loss_sent": 2.355658943997696e-05, - "loss_sod": 0.03270483762025833, - "loss_total": 0.18529485166072845, - "step": 391599 - }, - { - "epoch": 0.011198, - "loss_gen": 5.399328708648682, - "loss_rtd": 0.17275498807430267, - "loss_sent": 0.003234855132177472, - "loss_sod": 0.04913503676652908, - "loss_total": 0.22512488067150116, - "step": 391599 - }, - { - "epoch": 0.0112, - "grad_norm": 0.6801968812942505, - "learning_rate": 7.105398461623169e-08, - "loss": 0.4044, - "step": 391600 - }, - { - "epoch": 0.011398, - "loss_gen": 6.149866580963135, - "loss_rtd": 0.2096904069185257, - "loss_sent": 0.21248237788677216, - "loss_sod": 0.052042677998542786, - "loss_total": 0.47421544790267944, - "step": 391699 - }, - { - "epoch": 0.011398, - "loss_gen": 5.65095853805542, - "loss_rtd": 0.18483102321624756, - "loss_sent": 0.049149829894304276, - "loss_sod": 0.01666828989982605, - "loss_total": 0.2506491541862488, - "step": 391699 - }, - { - "epoch": 0.0114, - "grad_norm": 1.081852674484253, - "learning_rate": 6.937288100844485e-08, - "loss": 0.4074, - "step": 391700 - }, - { - "epoch": 0.011598, - "loss_gen": 6.197673797607422, - "loss_rtd": 0.2072892040014267, - "loss_sent": 0.010362375527620316, - "loss_sod": 0.15921708941459656, - "loss_total": 0.37686866521835327, - "step": 391799 - }, - { - "epoch": 0.011598, - "loss_gen": 5.257237434387207, - "loss_rtd": 0.16337601840496063, - "loss_sent": 2.4078102796920575e-05, - "loss_sod": 0.06838693469762802, - "loss_total": 0.23178702592849731, - "step": 391799 - }, - { - "epoch": 0.0116, - "grad_norm": 1.020723819732666, - "learning_rate": 6.771188945129958e-08, - "loss": 0.4117, - "step": 391800 - }, - { - "epoch": 0.011798, - "loss_gen": 5.401880741119385, - "loss_rtd": 0.15832042694091797, - "loss_sent": 0.002661922248080373, - "loss_sod": 0.049096040427684784, - "loss_total": 0.2100784033536911, - "step": 391899 - }, - { - "epoch": 0.011798, - "loss_gen": 5.75095796585083, - "loss_rtd": 0.2094363421201706, - "loss_sent": 0.16649065911769867, - "loss_sod": 0.07074245810508728, - "loss_total": 0.44666945934295654, - "step": 391899 - }, - { - "epoch": 0.0118, - "grad_norm": 0.9183186888694763, - "learning_rate": 6.607101061382737e-08, - "loss": 0.4106, - "step": 391900 - }, - { - "epoch": 0.011998, - "loss_gen": 5.547502040863037, - "loss_rtd": 0.18443872034549713, - "loss_sent": 0.08254744112491608, - "loss_sod": 0.013162685558199883, - "loss_total": 0.28014886379241943, - "step": 391999 - }, - { - "epoch": 0.011998, - "loss_gen": 5.738937854766846, - "loss_rtd": 0.20439235866069794, - "loss_sent": 0.30122336745262146, - "loss_sod": 0.04113311320543289, - "loss_total": 0.5467488765716553, - "step": 391999 - }, - { - "epoch": 0.012, - "grad_norm": 0.9484466910362244, - "learning_rate": 6.445024515698284e-08, - "loss": 0.4061, - "step": 392000 - }, - { - "epoch": 0.012, - "eval_loss": 0.40118321776390076, - "eval_runtime": 149.9632, - "eval_samples_per_second": 102.979, - "eval_steps_per_second": 0.807, - "step": 392000 - }, - { - "epoch": 0.012198, - "loss_gen": 6.28818941116333, - "loss_rtd": 0.20023567974567413, - "loss_sent": 0.2177213877439499, - "loss_sod": 0.07984787225723267, - "loss_total": 0.4978049397468567, - "step": 392099 - }, - { - "epoch": 0.012198, - "loss_gen": 5.625298023223877, - "loss_rtd": 0.18772730231285095, - "loss_sent": 2.5435860152356327e-05, - "loss_sod": 0.07460042834281921, - "loss_total": 0.26235315203666687, - "step": 392099 - }, - { - "epoch": 0.0122, - "grad_norm": 0.8974661231040955, - "learning_rate": 6.284959373360489e-08, - "loss": 0.4146, - "step": 392100 - }, - { - "epoch": 0.012398, - "loss_gen": 5.831862449645996, - "loss_rtd": 0.24395421147346497, - "loss_sent": 0.17839743196964264, - "loss_sod": 0.07743652909994125, - "loss_total": 0.49978816509246826, - "step": 392199 - }, - { - "epoch": 0.012398, - "loss_gen": 5.410398006439209, - "loss_rtd": 0.18935169279575348, - "loss_sent": 0.0018181510968133807, - "loss_sod": 0.024343883618712425, - "loss_total": 0.21551372110843658, - "step": 392199 - }, - { - "epoch": 0.0124, - "grad_norm": 1.2036210298538208, - "learning_rate": 6.126905698843887e-08, - "loss": 0.4086, - "step": 392200 - }, - { - "epoch": 0.012598, - "loss_gen": 5.893172264099121, - "loss_rtd": 0.2119544893503189, - "loss_sent": 0.14370910823345184, - "loss_sod": 0.043216902762651443, - "loss_total": 0.3988804817199707, - "step": 392299 - }, - { - "epoch": 0.012598, - "loss_gen": 6.108611106872559, - "loss_rtd": 0.21778523921966553, - "loss_sent": 0.09171459078788757, - "loss_sod": 0.12346639484167099, - "loss_total": 0.4329662322998047, - "step": 392299 - }, - { - "epoch": 0.0126, - "grad_norm": 1.3098056316375732, - "learning_rate": 5.970863555812555e-08, - "loss": 0.4117, - "step": 392300 - }, - { - "epoch": 0.012798, - "loss_gen": 6.1655192375183105, - "loss_rtd": 0.21565361320972443, - "loss_sent": 0.19704580307006836, - "loss_sod": 0.05005672574043274, - "loss_total": 0.4627561569213867, - "step": 392399 - }, - { - "epoch": 0.012798, - "loss_gen": 5.855915069580078, - "loss_rtd": 0.18138495087623596, - "loss_sent": 0.2532788813114166, - "loss_sod": 0.031894732266664505, - "loss_total": 0.466558575630188, - "step": 392399 - }, - { - "epoch": 0.0128, - "grad_norm": 0.983752965927124, - "learning_rate": 5.816833007120659e-08, - "loss": 0.4136, - "step": 392400 - }, - { - "epoch": 0.012998, - "loss_gen": 5.982431411743164, - "loss_rtd": 0.19362428784370422, - "loss_sent": 0.1276284009218216, - "loss_sod": 0.018778566271066666, - "loss_total": 0.3400312662124634, - "step": 392499 - }, - { - "epoch": 0.012998, - "loss_gen": 5.719311237335205, - "loss_rtd": 0.20908941328525543, - "loss_sent": 0.2693195939064026, - "loss_sod": 0.017360523343086243, - "loss_total": 0.49576953053474426, - "step": 392499 - }, - { - "epoch": 0.013, - "grad_norm": 1.3150999546051025, - "learning_rate": 5.66481411481079e-08, - "loss": 0.4242, - "step": 392500 - }, - { - "epoch": 0.013198, - "loss_gen": 6.014215469360352, - "loss_rtd": 0.20960800349712372, - "loss_sent": 0.2963671088218689, - "loss_sod": 0.06170869618654251, - "loss_total": 0.5676838159561157, - "step": 392599 - }, - { - "epoch": 0.013198, - "loss_gen": 6.473970890045166, - "loss_rtd": 0.19976429641246796, - "loss_sent": 0.1299380362033844, - "loss_sod": 0.07771517336368561, - "loss_total": 0.40741750597953796, - "step": 392599 - }, - { - "epoch": 0.0132, - "grad_norm": 1.0788829326629639, - "learning_rate": 5.514806940116191e-08, - "loss": 0.4149, - "step": 392600 - }, - { - "epoch": 0.013398, - "loss_gen": 5.213524341583252, - "loss_rtd": 0.15696687996387482, - "loss_sent": 2.4247579858638346e-05, - "loss_sod": 0.0948181077837944, - "loss_total": 0.2518092393875122, - "step": 392699 - }, - { - "epoch": 0.013398, - "loss_gen": 5.6687397956848145, - "loss_rtd": 0.19349080324172974, - "loss_sent": 0.07878529280424118, - "loss_sod": 0.01941695623099804, - "loss_total": 0.2916930615901947, - "step": 392699 - }, - { - "epoch": 0.0134, - "grad_norm": 1.054790735244751, - "learning_rate": 5.366811543460748e-08, - "loss": 0.4295, - "step": 392700 - }, - { - "epoch": 0.013598, - "loss_gen": 5.832163333892822, - "loss_rtd": 0.20374974608421326, - "loss_sent": 0.10759253799915314, - "loss_sod": 0.04071095958352089, - "loss_total": 0.3520532250404358, - "step": 392799 - }, - { - "epoch": 0.013598, - "loss_gen": 5.9442877769470215, - "loss_rtd": 0.18590156733989716, - "loss_sent": 0.08245570957660675, - "loss_sod": 0.021068645641207695, - "loss_total": 0.28942590951919556, - "step": 392799 - }, - { - "epoch": 0.0136, - "grad_norm": 1.20208740234375, - "learning_rate": 5.220827984455667e-08, - "loss": 0.3783, - "step": 392800 - }, - { - "epoch": 0.013798, - "loss_gen": 5.723763942718506, - "loss_rtd": 0.23243838548660278, - "loss_sent": 0.045534372329711914, - "loss_sod": 0.03477635234594345, - "loss_total": 0.31274911761283875, - "step": 392899 - }, - { - "epoch": 0.013798, - "loss_gen": 5.849414348602295, - "loss_rtd": 0.211965411901474, - "loss_sent": 0.16320006549358368, - "loss_sod": 0.058151278644800186, - "loss_total": 0.43331676721572876, - "step": 392899 - }, - { - "epoch": 0.0138, - "grad_norm": 1.233581781387329, - "learning_rate": 5.0768563219044665e-08, - "loss": 0.4183, - "step": 392900 - }, - { - "epoch": 0.013998, - "loss_gen": 5.444845676422119, - "loss_rtd": 0.18386173248291016, - "loss_sent": 0.00011939453543163836, - "loss_sod": 0.11187899112701416, - "loss_total": 0.2958601117134094, - "step": 392999 - }, - { - "epoch": 0.013998, - "loss_gen": 5.19231653213501, - "loss_rtd": 0.1738746613264084, - "loss_sent": 2.268303433083929e-05, - "loss_sod": 0.14518167078495026, - "loss_total": 0.3190790116786957, - "step": 392999 - }, - { - "epoch": 0.014, - "grad_norm": 1.4038021564483643, - "learning_rate": 4.934896613797424e-08, - "loss": 0.4145, - "step": 393000 - }, - { - "epoch": 0.014, - "eval_loss": 0.39626842737197876, - "eval_runtime": 150.164, - "eval_samples_per_second": 102.841, - "eval_steps_per_second": 0.806, - "step": 393000 - }, - { - "epoch": 0.014198, - "loss_gen": 5.95033597946167, - "loss_rtd": 0.1983281970024109, - "loss_sent": 0.647224485874176, - "loss_sod": 0.04138145223259926, - "loss_total": 0.8869341611862183, - "step": 393099 - }, - { - "epoch": 0.014198, - "loss_gen": 5.889521598815918, - "loss_rtd": 0.1870291531085968, - "loss_sent": 0.07497472316026688, - "loss_sod": 0.06532853096723557, - "loss_total": 0.32733240723609924, - "step": 393099 - }, - { - "epoch": 0.0142, - "grad_norm": 1.5091347694396973, - "learning_rate": 4.794948917317132e-08, - "loss": 0.4209, - "step": 393100 - }, - { - "epoch": 0.014398, - "loss_gen": 5.345975399017334, - "loss_rtd": 0.17086759209632874, - "loss_sent": 0.05822031572461128, - "loss_sod": 0.04950503259897232, - "loss_total": 0.27859294414520264, - "step": 393199 - }, - { - "epoch": 0.014398, - "loss_gen": 5.704697608947754, - "loss_rtd": 0.2205292135477066, - "loss_sent": 0.036288071423769, - "loss_sod": 0.06426063925027847, - "loss_total": 0.32107794284820557, - "step": 393199 - }, - { - "epoch": 0.0144, - "grad_norm": 0.9023429155349731, - "learning_rate": 4.6570132888340556e-08, - "loss": 0.4216, - "step": 393200 - }, - { - "epoch": 0.014598, - "loss_gen": 5.873831748962402, - "loss_rtd": 0.20705078542232513, - "loss_sent": 0.20871666073799133, - "loss_sod": 0.04147268086671829, - "loss_total": 0.45724013447761536, - "step": 393299 - }, - { - "epoch": 0.014598, - "loss_gen": 5.4875288009643555, - "loss_rtd": 0.1822550743818283, - "loss_sent": 0.007055058144032955, - "loss_sod": 0.01906927488744259, - "loss_total": 0.20837940275669098, - "step": 393299 - }, - { - "epoch": 0.0146, - "grad_norm": 0.661662757396698, - "learning_rate": 4.52108978390875e-08, - "loss": 0.402, - "step": 393300 - }, - { - "epoch": 0.014798, - "loss_gen": 5.859102725982666, - "loss_rtd": 0.22546504437923431, - "loss_sent": 0.0710543766617775, - "loss_sod": 0.029986320063471794, - "loss_total": 0.32650572061538696, - "step": 393399 - }, - { - "epoch": 0.014798, - "loss_gen": 5.5915303230285645, - "loss_rtd": 0.16614660620689392, - "loss_sent": 0.028622141107916832, - "loss_sod": 0.11509465426206589, - "loss_total": 0.309863418340683, - "step": 393399 - }, - { - "epoch": 0.0148, - "grad_norm": 0.8462416529655457, - "learning_rate": 4.3871784572907524e-08, - "loss": 0.4071, - "step": 393400 - }, - { - "epoch": 0.014998, - "loss_gen": 5.232621669769287, - "loss_rtd": 0.17480048537254333, - "loss_sent": 0.032950110733509064, - "loss_sod": 0.013565173372626305, - "loss_total": 0.22131577134132385, - "step": 393499 - }, - { - "epoch": 0.014998, - "loss_gen": 6.006125450134277, - "loss_rtd": 0.20174795389175415, - "loss_sent": 0.4658416509628296, - "loss_sod": 0.004407984670251608, - "loss_total": 0.671997606754303, - "step": 393499 - }, - { - "epoch": 0.015, - "grad_norm": 1.8555643558502197, - "learning_rate": 4.2552793629202504e-08, - "loss": 0.4105, - "step": 393500 - }, - { - "epoch": 0.015198, - "loss_gen": 5.7444353103637695, - "loss_rtd": 0.21366789937019348, - "loss_sent": 0.3370654881000519, - "loss_sod": 0.021276114508509636, - "loss_total": 0.5720095038414001, - "step": 393599 - }, - { - "epoch": 0.015198, - "loss_gen": 5.987033367156982, - "loss_rtd": 0.22995273768901825, - "loss_sent": 0.11565449088811874, - "loss_sod": 0.02097945846617222, - "loss_total": 0.36658668518066406, - "step": 393599 - }, - { - "epoch": 0.0152, - "grad_norm": 0.9959455728530884, - "learning_rate": 4.1253925539253003e-08, - "loss": 0.4147, - "step": 393600 - }, - { - "epoch": 0.015398, - "loss_gen": 5.640103340148926, - "loss_rtd": 0.20078890025615692, - "loss_sent": 0.006867082789540291, - "loss_sod": 0.09368519484996796, - "loss_total": 0.30134117603302, - "step": 393699 - }, - { - "epoch": 0.015398, - "loss_gen": 5.250030994415283, - "loss_rtd": 0.16638965904712677, - "loss_sent": 0.00990522000938654, - "loss_sod": 0.05199884995818138, - "loss_total": 0.22829373180866241, - "step": 393699 - }, - { - "epoch": 0.0154, - "grad_norm": 0.8872623443603516, - "learning_rate": 3.9975180826251626e-08, - "loss": 0.4305, - "step": 393700 - }, - { - "epoch": 0.015598, - "loss_gen": 5.638567924499512, - "loss_rtd": 0.19004599750041962, - "loss_sent": 0.28857457637786865, - "loss_sod": 0.04916493967175484, - "loss_total": 0.5277855396270752, - "step": 393799 - }, - { - "epoch": 0.015598, - "loss_gen": 5.700003147125244, - "loss_rtd": 0.22980214655399323, - "loss_sent": 0.15928246080875397, - "loss_sod": 0.01416728924959898, - "loss_total": 0.40325188636779785, - "step": 393799 - }, - { - "epoch": 0.0156, - "grad_norm": 2.3052096366882324, - "learning_rate": 3.8716560005269684e-08, - "loss": 0.4061, - "step": 393800 - }, - { - "epoch": 0.015798, - "loss_gen": 6.030062198638916, - "loss_rtd": 0.2230379283428192, - "loss_sent": 0.20075556635856628, - "loss_sod": 0.05416977405548096, - "loss_total": 0.47796326875686646, - "step": 393899 - }, - { - "epoch": 0.015798, - "loss_gen": 5.665853977203369, - "loss_rtd": 0.22915604710578918, - "loss_sent": 0.2098565250635147, - "loss_sod": 0.008518276736140251, - "loss_total": 0.4475308656692505, - "step": 393899 - }, - { - "epoch": 0.0158, - "grad_norm": 0.8969384431838989, - "learning_rate": 3.747806358328498e-08, - "loss": 0.4395, - "step": 393900 - }, - { - "epoch": 0.015998, - "loss_gen": 6.154298305511475, - "loss_rtd": 0.2080710530281067, - "loss_sent": 0.13580961525440216, - "loss_sod": 0.12290502339601517, - "loss_total": 0.4667856693267822, - "step": 393999 - }, - { - "epoch": 0.015998, - "loss_gen": 5.941771507263184, - "loss_rtd": 0.22374530136585236, - "loss_sent": 0.1344057023525238, - "loss_sod": 0.06482969224452972, - "loss_total": 0.4229806959629059, - "step": 393999 - }, - { - "epoch": 0.016, - "grad_norm": 0.9252235293388367, - "learning_rate": 3.6259692059159576e-08, - "loss": 0.401, - "step": 394000 - }, - { - "epoch": 0.016, - "eval_loss": 0.39670079946517944, - "eval_runtime": 150.1918, - "eval_samples_per_second": 102.822, - "eval_steps_per_second": 0.806, - "step": 394000 - }, - { - "epoch": 0.016198, - "loss_gen": 5.955575942993164, - "loss_rtd": 0.21677181124687195, - "loss_sent": 0.352148175239563, - "loss_sod": 0.018472759053111076, - "loss_total": 0.5873927474021912, - "step": 394099 - }, - { - "epoch": 0.016198, - "loss_gen": 5.223123550415039, - "loss_rtd": 0.17382578551769257, - "loss_sent": 2.585828406154178e-05, - "loss_sod": 0.13978484272956848, - "loss_total": 0.3136364817619324, - "step": 394099 - }, - { - "epoch": 0.0162, - "grad_norm": 1.3951376676559448, - "learning_rate": 3.506144592365645e-08, - "loss": 0.4154, - "step": 394100 - }, - { - "epoch": 0.016398, - "loss_gen": 5.602262020111084, - "loss_rtd": 0.22612063586711884, - "loss_sent": 0.2637924551963806, - "loss_sod": 0.003283230122178793, - "loss_total": 0.4931963086128235, - "step": 394199 - }, - { - "epoch": 0.016398, - "loss_gen": 5.5274152755737305, - "loss_rtd": 0.19322897493839264, - "loss_sent": 0.024348091334104538, - "loss_sod": 0.04663927108049393, - "loss_total": 0.2642163336277008, - "step": 394199 - }, - { - "epoch": 0.0164, - "grad_norm": 0.7509187459945679, - "learning_rate": 3.3883325659428425e-08, - "loss": 0.3897, - "step": 394200 - }, - { - "epoch": 0.016598, - "loss_gen": 5.995658874511719, - "loss_rtd": 0.2020183950662613, - "loss_sent": 0.04633113741874695, - "loss_sod": 0.07576620578765869, - "loss_total": 0.32411572337150574, - "step": 394299 - }, - { - "epoch": 0.016598, - "loss_gen": 5.280943870544434, - "loss_rtd": 0.18137438595294952, - "loss_sent": 2.3182210497907363e-05, - "loss_sod": 0.126439169049263, - "loss_total": 0.30783674120903015, - "step": 394299 - }, - { - "epoch": 0.0166, - "grad_norm": 1.3717312812805176, - "learning_rate": 3.272533174102366e-08, - "loss": 0.4239, - "step": 394300 - }, - { - "epoch": 0.016798, - "loss_gen": 6.185173511505127, - "loss_rtd": 0.21150723099708557, - "loss_sent": 0.03277455270290375, - "loss_sod": 0.04609215632081032, - "loss_total": 0.29037392139434814, - "step": 394399 - }, - { - "epoch": 0.016798, - "loss_gen": 6.213319778442383, - "loss_rtd": 0.2253490686416626, - "loss_sent": 0.1608581840991974, - "loss_sod": 0.02025735192000866, - "loss_total": 0.4064646065235138, - "step": 394399 - }, - { - "epoch": 0.0168, - "grad_norm": 0.6647672057151794, - "learning_rate": 3.1587464634874606e-08, - "loss": 0.4042, - "step": 394400 - }, - { - "epoch": 0.016998, - "loss_gen": 5.9767537117004395, - "loss_rtd": 0.19369633495807648, - "loss_sent": 0.1807086169719696, - "loss_sod": 0.020512813702225685, - "loss_total": 0.3949177861213684, - "step": 394499 - }, - { - "epoch": 0.016998, - "loss_gen": 5.709785461425781, - "loss_rtd": 0.1785837709903717, - "loss_sent": 0.06159829720854759, - "loss_sod": 0.01736762747168541, - "loss_total": 0.2575497031211853, - "step": 394499 - }, - { - "epoch": 0.017, - "grad_norm": 0.7903895974159241, - "learning_rate": 3.0469724799320196e-08, - "loss": 0.4243, - "step": 394500 - }, - { - "epoch": 0.017198, - "loss_gen": 5.914492130279541, - "loss_rtd": 0.19854649901390076, - "loss_sent": 0.08682210743427277, - "loss_sod": 0.042485035955905914, - "loss_total": 0.32785362005233765, - "step": 394599 - }, - { - "epoch": 0.017198, - "loss_gen": 6.035208702087402, - "loss_rtd": 0.21304945647716522, - "loss_sent": 0.20649196207523346, - "loss_sod": 0.017722059041261673, - "loss_total": 0.43726348876953125, - "step": 394599 - }, - { - "epoch": 0.0172, - "grad_norm": 0.6683346033096313, - "learning_rate": 2.937211268458917e-08, - "loss": 0.4165, - "step": 394600 - }, - { - "epoch": 0.017398, - "loss_gen": 5.919938087463379, - "loss_rtd": 0.20923687517642975, - "loss_sent": 0.27039802074432373, - "loss_sod": 0.06865854561328888, - "loss_total": 0.5482934713363647, - "step": 394699 - }, - { - "epoch": 0.017398, - "loss_gen": 5.799861907958984, - "loss_rtd": 0.21269604563713074, - "loss_sent": 0.044693950563669205, - "loss_sod": 0.019913161173462868, - "loss_total": 0.27730315923690796, - "step": 394699 - }, - { - "epoch": 0.0174, - "grad_norm": 1.3340352773666382, - "learning_rate": 2.8294628732788985e-08, - "loss": 0.4298, - "step": 394700 - }, - { - "epoch": 0.017598, - "loss_gen": 5.700779914855957, - "loss_rtd": 0.1747732162475586, - "loss_sent": 0.048937201499938965, - "loss_sod": 0.04359501227736473, - "loss_total": 0.2673054337501526, - "step": 394799 - }, - { - "epoch": 0.017598, - "loss_gen": 6.062201499938965, - "loss_rtd": 0.22227883338928223, - "loss_sent": 0.3737484812736511, - "loss_sod": 0.011067884042859077, - "loss_total": 0.6070951819419861, - "step": 394799 - }, - { - "epoch": 0.0176, - "grad_norm": 0.9948114156723022, - "learning_rate": 2.7237273377944684e-08, - "loss": 0.4083, - "step": 394800 - }, - { - "epoch": 0.017798, - "loss_gen": 5.300630569458008, - "loss_rtd": 0.1731356531381607, - "loss_sent": 0.00145197962410748, - "loss_sod": 0.07960737496614456, - "loss_total": 0.2541950047016144, - "step": 394899 - }, - { - "epoch": 0.017798, - "loss_gen": 5.130007266998291, - "loss_rtd": 0.17352192103862762, - "loss_sent": 2.2366391931427643e-05, - "loss_sod": 0.09852245450019836, - "loss_total": 0.27206671237945557, - "step": 394899 - }, - { - "epoch": 0.0178, - "grad_norm": 0.9013128876686096, - "learning_rate": 2.6200047045943366e-08, - "loss": 0.406, - "step": 394900 - }, - { - "epoch": 0.017998, - "loss_gen": 5.662559509277344, - "loss_rtd": 0.22165432572364807, - "loss_sent": 0.13525241613388062, - "loss_sod": 0.027606867253780365, - "loss_total": 0.38451361656188965, - "step": 394999 - }, - { - "epoch": 0.017998, - "loss_gen": 5.6816792488098145, - "loss_rtd": 0.2266261875629425, - "loss_sent": 0.4238015115261078, - "loss_sod": 0.004062837455421686, - "loss_total": 0.6544905304908752, - "step": 394999 - }, - { - "epoch": 0.018, - "grad_norm": 1.2733234167099, - "learning_rate": 2.5182950154589712e-08, - "loss": 0.411, - "step": 395000 - }, - { - "epoch": 0.018, - "eval_loss": 0.3975522518157959, - "eval_runtime": 150.0529, - "eval_samples_per_second": 102.917, - "eval_steps_per_second": 0.806, - "step": 395000 - }, - { - "epoch": 0.018198, - "loss_gen": 5.792178630828857, - "loss_rtd": 0.21859611570835114, - "loss_sent": 0.13951151072978973, - "loss_sod": 0.05840788781642914, - "loss_total": 0.4165155291557312, - "step": 395099 - }, - { - "epoch": 0.018198, - "loss_gen": 6.0479888916015625, - "loss_rtd": 0.23042891919612885, - "loss_sent": 0.20273616909980774, - "loss_sod": 0.0860833078622818, - "loss_total": 0.519248366355896, - "step": 395099 - }, - { - "epoch": 0.0182, - "grad_norm": 1.0764565467834473, - "learning_rate": 2.4185983113567124e-08, - "loss": 0.428, - "step": 395100 - }, - { - "epoch": 0.018398, - "loss_gen": 6.170469760894775, - "loss_rtd": 0.24229945242404938, - "loss_sent": 0.7802306413650513, - "loss_sod": 0.023285791277885437, - "loss_total": 1.0458159446716309, - "step": 395199 - }, - { - "epoch": 0.018398, - "loss_gen": 5.718085765838623, - "loss_rtd": 0.184598907828331, - "loss_sent": 0.26514795422554016, - "loss_sod": 0.03541194647550583, - "loss_total": 0.4851588010787964, - "step": 395199 - }, - { - "epoch": 0.0184, - "grad_norm": 2.733649492263794, - "learning_rate": 2.320914632445437e-08, - "loss": 0.4093, - "step": 395200 - }, - { - "epoch": 0.018598, - "loss_gen": 5.969252109527588, - "loss_rtd": 0.20024250447750092, - "loss_sent": 0.32519903779029846, - "loss_sod": 0.0019377648131921887, - "loss_total": 0.5273792743682861, - "step": 395299 - }, - { - "epoch": 0.018598, - "loss_gen": 5.480463981628418, - "loss_rtd": 0.18705987930297852, - "loss_sent": 0.034322500228881836, - "loss_sod": 0.06096341088414192, - "loss_total": 0.2823457717895508, - "step": 395299 - }, - { - "epoch": 0.0186, - "grad_norm": 0.9037191271781921, - "learning_rate": 2.2252440180720036e-08, - "loss": 0.4011, - "step": 395300 - }, - { - "epoch": 0.018798, - "loss_gen": 5.389925956726074, - "loss_rtd": 0.17318344116210938, - "loss_sent": 0.0014120283303782344, - "loss_sod": 0.11518971621990204, - "loss_total": 0.2897852063179016, - "step": 395399 - }, - { - "epoch": 0.018798, - "loss_gen": 6.047664642333984, - "loss_rtd": 0.2042461335659027, - "loss_sent": 0.1126847043633461, - "loss_sod": 0.06417940557003021, - "loss_total": 0.38111022114753723, - "step": 395399 - }, - { - "epoch": 0.0188, - "grad_norm": 0.989812970161438, - "learning_rate": 2.131586506772254e-08, - "loss": 0.4059, - "step": 395400 - }, - { - "epoch": 0.018998, - "loss_gen": 6.207770347595215, - "loss_rtd": 0.21371395885944366, - "loss_sent": 0.3242473304271698, - "loss_sod": 0.06220316141843796, - "loss_total": 0.6001644134521484, - "step": 395499 - }, - { - "epoch": 0.018998, - "loss_gen": 5.763589382171631, - "loss_rtd": 0.1895914524793625, - "loss_sent": 0.1785668432712555, - "loss_sod": 0.03489559143781662, - "loss_total": 0.403053879737854, - "step": 395499 - }, - { - "epoch": 0.019, - "grad_norm": 0.7213029265403748, - "learning_rate": 2.0399421362721215e-08, - "loss": 0.4095, - "step": 395500 - }, - { - "epoch": 0.019198, - "loss_gen": 5.1523661613464355, - "loss_rtd": 0.1546034961938858, - "loss_sent": 0.018785694614052773, - "loss_sod": 0.037261370569467545, - "loss_total": 0.21065056324005127, - "step": 395599 - }, - { - "epoch": 0.019198, - "loss_gen": 5.888722896575928, - "loss_rtd": 0.22429607808589935, - "loss_sent": 0.05058183893561363, - "loss_sod": 0.0337141752243042, - "loss_total": 0.3085921108722687, - "step": 395599 - }, - { - "epoch": 0.0192, - "grad_norm": 0.6930906176567078, - "learning_rate": 1.950310943485967e-08, - "loss": 0.404, - "step": 395600 - }, - { - "epoch": 0.019398, - "loss_gen": 6.158482074737549, - "loss_rtd": 0.21151158213615417, - "loss_sent": 0.10480982065200806, - "loss_sod": 0.10025284439325333, - "loss_total": 0.41657423973083496, - "step": 395699 - }, - { - "epoch": 0.019398, - "loss_gen": 5.929201602935791, - "loss_rtd": 0.19908545911312103, - "loss_sent": 0.08760825544595718, - "loss_sod": 0.04331972822546959, - "loss_total": 0.3300134539604187, - "step": 395699 - }, - { - "epoch": 0.0194, - "grad_norm": 1.2213667631149292, - "learning_rate": 1.862692964516022e-08, - "loss": 0.4134, - "step": 395700 - }, - { - "epoch": 0.019598, - "loss_gen": 6.002203464508057, - "loss_rtd": 0.2268359512090683, - "loss_sent": 0.09985633939504623, - "loss_sod": 0.02582608349621296, - "loss_total": 0.35251837968826294, - "step": 395799 - }, - { - "epoch": 0.019598, - "loss_gen": 5.99774694442749, - "loss_rtd": 0.19766496121883392, - "loss_sent": 0.3084019124507904, - "loss_sod": 0.010111101903021336, - "loss_total": 0.5161780118942261, - "step": 395799 - }, - { - "epoch": 0.0196, - "grad_norm": 1.337019681930542, - "learning_rate": 1.7770882346562766e-08, - "loss": 0.4131, - "step": 395800 - }, - { - "epoch": 0.019798, - "loss_gen": 5.453426361083984, - "loss_rtd": 0.19900771975517273, - "loss_sent": 0.03479054197669029, - "loss_sod": 0.039392851293087006, - "loss_total": 0.2731911242008209, - "step": 395899 - }, - { - "epoch": 0.019798, - "loss_gen": 5.1930317878723145, - "loss_rtd": 0.17708735167980194, - "loss_sent": 2.3433634851244278e-05, - "loss_sod": 0.07683393359184265, - "loss_total": 0.2539446949958801, - "step": 395899 - }, - { - "epoch": 0.0198, - "grad_norm": 0.7898704409599304, - "learning_rate": 1.693496788387483e-08, - "loss": 0.4209, - "step": 395900 - }, - { - "epoch": 0.019998, - "loss_gen": 5.888876914978027, - "loss_rtd": 0.21547532081604004, - "loss_sent": 0.22610247135162354, - "loss_sod": 0.06966503709554672, - "loss_total": 0.5112428665161133, - "step": 395999 - }, - { - "epoch": 0.019998, - "loss_gen": 6.246582508087158, - "loss_rtd": 0.22070348262786865, - "loss_sent": 0.20763853192329407, - "loss_sod": 0.015469067730009556, - "loss_total": 0.4438110888004303, - "step": 395999 - }, - { - "epoch": 0.02, - "grad_norm": 0.9909964203834534, - "learning_rate": 1.6119186593804848e-08, - "loss": 0.4382, - "step": 396000 - }, - { - "epoch": 0.02, - "eval_loss": 0.3933391571044922, - "eval_runtime": 151.6017, - "eval_samples_per_second": 101.866, - "eval_steps_per_second": 0.798, - "step": 396000 - }, - { - "epoch": 0.020198, - "loss_gen": 6.113070487976074, - "loss_rtd": 0.2115258425474167, - "loss_sent": 0.20682215690612793, - "loss_sod": 0.042044926434755325, - "loss_total": 0.46039292216300964, - "step": 396099 - }, - { - "epoch": 0.020198, - "loss_gen": 6.27998161315918, - "loss_rtd": 0.20464813709259033, - "loss_sent": 0.31439003348350525, - "loss_sod": 0.11591437458992004, - "loss_total": 0.6349525451660156, - "step": 396099 - }, - { - "epoch": 0.0202, - "grad_norm": 1.7895152568817139, - "learning_rate": 1.5323538804951077e-08, - "loss": 0.4144, - "step": 396100 - }, - { - "epoch": 0.020398, - "loss_gen": 5.5880937576293945, - "loss_rtd": 0.21506370604038239, - "loss_sent": 0.1927960067987442, - "loss_sod": 0.034131310880184174, - "loss_total": 0.44199103116989136, - "step": 396199 - }, - { - "epoch": 0.020398, - "loss_gen": 6.354598522186279, - "loss_rtd": 0.21413680911064148, - "loss_sent": 0.1955765038728714, - "loss_sod": 0.10964728891849518, - "loss_total": 0.5193606019020081, - "step": 396199 - }, - { - "epoch": 0.0204, - "grad_norm": 1.1391600370407104, - "learning_rate": 1.4548024837796048e-08, - "loss": 0.4092, - "step": 396200 - }, - { - "epoch": 0.020598, - "loss_gen": 6.109943866729736, - "loss_rtd": 0.18975844979286194, - "loss_sent": 0.198400616645813, - "loss_sod": 0.09451808780431747, - "loss_total": 0.482677161693573, - "step": 396299 - }, - { - "epoch": 0.020598, - "loss_gen": 5.9123215675354, - "loss_rtd": 0.21667487919330597, - "loss_sent": 0.10699949413537979, - "loss_sod": 0.03720562532544136, - "loss_total": 0.3608799874782562, - "step": 396299 - }, - { - "epoch": 0.0206, - "grad_norm": 0.8118402361869812, - "learning_rate": 1.3792645004717663e-08, - "loss": 0.3997, - "step": 396300 - }, - { - "epoch": 0.020798, - "loss_gen": 5.112740516662598, - "loss_rtd": 0.17680253088474274, - "loss_sent": 2.3321877961279824e-05, - "loss_sod": 0.08652521669864655, - "loss_total": 0.26335108280181885, - "step": 396399 - }, - { - "epoch": 0.020798, - "loss_gen": 5.7683186531066895, - "loss_rtd": 0.20547260344028473, - "loss_sent": 0.10369966924190521, - "loss_sod": 0.02828032150864601, - "loss_total": 0.33745259046554565, - "step": 396399 - }, - { - "epoch": 0.0208, - "grad_norm": 0.8942224979400635, - "learning_rate": 1.3057399609983646e-08, - "loss": 0.4087, - "step": 396400 - }, - { - "epoch": 0.020998, - "loss_gen": 5.889406681060791, - "loss_rtd": 0.18509641289710999, - "loss_sent": 0.16096730530261993, - "loss_sod": 0.08021023869514465, - "loss_total": 0.4262739419937134, - "step": 396499 - }, - { - "epoch": 0.020998, - "loss_gen": 5.772846221923828, - "loss_rtd": 0.20384618639945984, - "loss_sent": 0.2623909115791321, - "loss_sod": 0.05246247351169586, - "loss_total": 0.518699586391449, - "step": 396499 - }, - { - "epoch": 0.021, - "grad_norm": 1.794934630393982, - "learning_rate": 1.2342288949757086e-08, - "loss": 0.3897, - "step": 396500 - }, - { - "epoch": 0.021198, - "loss_gen": 5.727966785430908, - "loss_rtd": 0.22208158671855927, - "loss_sent": 0.14549203217029572, - "loss_sod": 0.12180855125188828, - "loss_total": 0.48938214778900146, - "step": 396599 - }, - { - "epoch": 0.021198, - "loss_gen": 5.7613325119018555, - "loss_rtd": 0.19115635752677917, - "loss_sent": 0.0005651103565469384, - "loss_sod": 0.11405406892299652, - "loss_total": 0.30577552318573, - "step": 396599 - }, - { - "epoch": 0.0212, - "grad_norm": 1.111713171005249, - "learning_rate": 1.1647313312074248e-08, - "loss": 0.4166, - "step": 396600 - }, - { - "epoch": 0.021398, - "loss_gen": 5.817107200622559, - "loss_rtd": 0.22824671864509583, - "loss_sent": 0.2634729743003845, - "loss_sod": 0.034896522760391235, - "loss_total": 0.5266162157058716, - "step": 396699 - }, - { - "epoch": 0.021398, - "loss_gen": 5.967624664306641, - "loss_rtd": 0.22594067454338074, - "loss_sent": 0.13916714489459991, - "loss_sod": 0.013836721889674664, - "loss_total": 0.3789445459842682, - "step": 396699 - }, - { - "epoch": 0.0214, - "grad_norm": 1.0892983675003052, - "learning_rate": 1.0972472976872317e-08, - "loss": 0.4199, - "step": 396700 - }, - { - "epoch": 0.021598, - "loss_gen": 5.413094997406006, - "loss_rtd": 0.164296954870224, - "loss_sent": 0.0005756186437793076, - "loss_sod": 0.046316053718328476, - "loss_total": 0.21118862926959991, - "step": 396799 - }, - { - "epoch": 0.021598, - "loss_gen": 5.681094646453857, - "loss_rtd": 0.18285973370075226, - "loss_sent": 0.11255313456058502, - "loss_sod": 0.027348056435585022, - "loss_total": 0.3227609395980835, - "step": 396799 - }, - { - "epoch": 0.0216, - "grad_norm": 0.5557261109352112, - "learning_rate": 1.0317768215983847e-08, - "loss": 0.4111, - "step": 396800 - }, - { - "epoch": 0.021798, - "loss_gen": 5.866828918457031, - "loss_rtd": 0.21312595903873444, - "loss_sent": 0.3726646602153778, - "loss_sod": 0.011665992438793182, - "loss_total": 0.5974565744400024, - "step": 396899 - }, - { - "epoch": 0.021798, - "loss_gen": 5.861220359802246, - "loss_rtd": 0.2063438594341278, - "loss_sent": 0.04638810083270073, - "loss_sod": 0.02496756985783577, - "loss_total": 0.2776995301246643, - "step": 396899 - }, - { - "epoch": 0.0218, - "grad_norm": 0.8123736381530762, - "learning_rate": 9.683199293120116e-09, - "loss": 0.4271, - "step": 396900 - }, - { - "epoch": 0.021998, - "loss_gen": 5.931436061859131, - "loss_rtd": 0.21014858782291412, - "loss_sent": 0.16449576616287231, - "loss_sod": 0.035002026706933975, - "loss_total": 0.4096463918685913, - "step": 396999 - }, - { - "epoch": 0.021998, - "loss_gen": 6.23885440826416, - "loss_rtd": 0.2218223214149475, - "loss_sent": 0.13119150698184967, - "loss_sod": 0.0584576278924942, - "loss_total": 0.411471426486969, - "step": 396999 - }, - { - "epoch": 0.022, - "grad_norm": 0.967769980430603, - "learning_rate": 9.068766463887768e-09, - "loss": 0.4137, - "step": 397000 - }, - { - "epoch": 0.022, - "eval_loss": 0.39533543586730957, - "eval_runtime": 149.965, - "eval_samples_per_second": 102.977, - "eval_steps_per_second": 0.807, - "step": 397000 - }, - { - "epoch": 0.022198, - "loss_gen": 5.757076740264893, - "loss_rtd": 0.21644426882266998, - "loss_sent": 0.36708390712738037, - "loss_sod": 0.004734056536108255, - "loss_total": 0.5882622003555298, - "step": 397099 - }, - { - "epoch": 0.022198, - "loss_gen": 5.524250030517578, - "loss_rtd": 0.2052869349718094, - "loss_sent": 0.29954639077186584, - "loss_sod": 0.05333589389920235, - "loss_total": 0.5581692457199097, - "step": 397099 - }, - { - "epoch": 0.0222, - "grad_norm": 1.7729299068450928, - "learning_rate": 8.47446997577217e-09, - "loss": 0.4073, - "step": 397100 - }, - { - "epoch": 0.022398, - "loss_gen": 5.344290256500244, - "loss_rtd": 0.16685928404331207, - "loss_sent": 0.0002616595884319395, - "loss_sod": 0.03148813545703888, - "loss_total": 0.19860908389091492, - "step": 397199 - }, - { - "epoch": 0.022398, - "loss_gen": 5.928763389587402, - "loss_rtd": 0.21665158867835999, - "loss_sent": 0.2575046122074127, - "loss_sod": 0.11655202507972717, - "loss_total": 0.5907082557678223, - "step": 397199 - }, - { - "epoch": 0.0224, - "grad_norm": 1.2157509326934814, - "learning_rate": 7.900310068165163e-09, - "loss": 0.4283, - "step": 397200 - }, - { - "epoch": 0.022598, - "loss_gen": 5.86064338684082, - "loss_rtd": 0.19511862099170685, - "loss_sent": 0.42830026149749756, - "loss_sod": 0.029919231310486794, - "loss_total": 0.6533381342887878, - "step": 397299 - }, - { - "epoch": 0.022598, - "loss_gen": 6.055613994598389, - "loss_rtd": 0.22604653239250183, - "loss_sent": 0.2757790982723236, - "loss_sod": 0.015424983575940132, - "loss_total": 0.5172505974769592, - "step": 397299 - }, - { - "epoch": 0.0226, - "grad_norm": 2.5486299991607666, - "learning_rate": 7.346286972337302e-09, - "loss": 0.4145, - "step": 397300 - }, - { - "epoch": 0.022798, - "loss_gen": 6.520084381103516, - "loss_rtd": 0.19186510145664215, - "loss_sent": 0.20222032070159912, - "loss_sod": 0.03658706694841385, - "loss_total": 0.4306724965572357, - "step": 397399 - }, - { - "epoch": 0.022798, - "loss_gen": 5.960272789001465, - "loss_rtd": 0.22346623241901398, - "loss_sent": 0.15942415595054626, - "loss_sod": 0.015284555032849312, - "loss_total": 0.3981749415397644, - "step": 397399 - }, - { - "epoch": 0.0228, - "grad_norm": 1.2572697401046753, - "learning_rate": 6.812400911443417e-09, - "loss": 0.4174, - "step": 397400 - }, - { - "epoch": 0.022998, - "loss_gen": 5.888513088226318, - "loss_rtd": 0.20500653982162476, - "loss_sent": 0.08508677035570145, - "loss_sod": 0.2508503496646881, - "loss_total": 0.5409436821937561, - "step": 397499 - }, - { - "epoch": 0.022998, - "loss_gen": 6.165918827056885, - "loss_rtd": 0.23296770453453064, - "loss_sent": 0.1689748466014862, - "loss_sod": 0.031435467302799225, - "loss_total": 0.4333780109882355, - "step": 397499 - }, - { - "epoch": 0.023, - "grad_norm": 1.6528115272521973, - "learning_rate": 6.2986521005392595e-09, - "loss": 0.4172, - "step": 397500 - }, - { - "epoch": 0.023198, - "loss_gen": 5.944231033325195, - "loss_rtd": 0.21620090305805206, - "loss_sent": 0.17428633570671082, - "loss_sod": 0.0337856188416481, - "loss_total": 0.4242728650569916, - "step": 397599 - }, - { - "epoch": 0.023198, - "loss_gen": 5.931299209594727, - "loss_rtd": 0.2072267383337021, - "loss_sent": 0.07298165559768677, - "loss_sod": 0.020796025171875954, - "loss_total": 0.30100440979003906, - "step": 397599 - }, - { - "epoch": 0.0232, - "grad_norm": 0.8798516392707825, - "learning_rate": 5.8050407465537475e-09, - "loss": 0.3918, - "step": 397600 - }, - { - "epoch": 0.023398, - "loss_gen": 5.75743293762207, - "loss_rtd": 0.21875669062137604, - "loss_sent": 0.14590534567832947, - "loss_sod": 0.01297023892402649, - "loss_total": 0.3776322603225708, - "step": 397699 - }, - { - "epoch": 0.023398, - "loss_gen": 5.408660411834717, - "loss_rtd": 0.18407467007637024, - "loss_sent": 0.011094531044363976, - "loss_sod": 0.08246287703514099, - "loss_total": 0.27763208746910095, - "step": 397699 - }, - { - "epoch": 0.0234, - "grad_norm": 0.8510765433311462, - "learning_rate": 5.331567048322272e-09, - "loss": 0.405, - "step": 397700 - }, - { - "epoch": 0.023598, - "loss_gen": 5.693921089172363, - "loss_rtd": 0.19635532796382904, - "loss_sent": 0.11468133330345154, - "loss_sod": 0.14380469918251038, - "loss_total": 0.45484134554862976, - "step": 397799 - }, - { - "epoch": 0.023598, - "loss_gen": 5.726851463317871, - "loss_rtd": 0.2062656730413437, - "loss_sent": 0.2271997481584549, - "loss_sod": 0.06671570986509323, - "loss_total": 0.5001811385154724, - "step": 397799 - }, - { - "epoch": 0.0236, - "grad_norm": 1.4799067974090576, - "learning_rate": 4.878231196558947e-09, - "loss": 0.4147, - "step": 397800 - }, - { - "epoch": 0.023798, - "loss_gen": 5.480199337005615, - "loss_rtd": 0.1762045919895172, - "loss_sent": 0.003955208696424961, - "loss_sod": 0.018526971340179443, - "loss_total": 0.19868677854537964, - "step": 397899 - }, - { - "epoch": 0.023798, - "loss_gen": 5.268898963928223, - "loss_rtd": 0.17445386946201324, - "loss_sent": 0.007511140778660774, - "loss_sod": 0.09362047910690308, - "loss_total": 0.27558547258377075, - "step": 397899 - }, - { - "epoch": 0.0238, - "grad_norm": 0.8327092528343201, - "learning_rate": 4.445033373862151e-09, - "loss": 0.4009, - "step": 397900 - }, - { - "epoch": 0.023998, - "loss_gen": 5.45214319229126, - "loss_rtd": 0.16575685143470764, - "loss_sent": 2.3105838408810087e-05, - "loss_sod": 0.10151588916778564, - "loss_total": 0.26729583740234375, - "step": 397999 - }, - { - "epoch": 0.023998, - "loss_gen": 5.442799091339111, - "loss_rtd": 0.16667398810386658, - "loss_sent": 0.009982021525502205, - "loss_sod": 0.03796064108610153, - "loss_total": 0.21461665630340576, - "step": 397999 - }, - { - "epoch": 0.024, - "grad_norm": 0.8787603378295898, - "learning_rate": 4.031973754725637e-09, - "loss": 0.4256, - "step": 398000 - }, - { - "epoch": 0.024, - "eval_loss": 0.3966180086135864, - "eval_runtime": 150.4248, - "eval_samples_per_second": 102.663, - "eval_steps_per_second": 0.804, - "step": 398000 - }, - { - "epoch": 0.024198, - "loss_gen": 5.452498435974121, - "loss_rtd": 0.18792618811130524, - "loss_sent": 0.09348011016845703, - "loss_sod": 0.16174200177192688, - "loss_total": 0.44314831495285034, - "step": 398099 - }, - { - "epoch": 0.024198, - "loss_gen": 5.271835803985596, - "loss_rtd": 0.17350687086582184, - "loss_sent": 2.3092798073776066e-05, - "loss_sod": 0.09141494333744049, - "loss_total": 0.2649449110031128, - "step": 398099 - }, - { - "epoch": 0.0242, - "grad_norm": 1.3214391469955444, - "learning_rate": 3.6390525055329806e-09, - "loss": 0.4086, - "step": 398100 - }, - { - "epoch": 0.024398, - "loss_gen": 5.939271450042725, - "loss_rtd": 0.2222088724374771, - "loss_sent": 0.08363074064254761, - "loss_sod": 0.03482465073466301, - "loss_total": 0.340664267539978, - "step": 398199 - }, - { - "epoch": 0.024398, - "loss_gen": 5.742465496063232, - "loss_rtd": 0.1840306967496872, - "loss_sent": 0.12969596683979034, - "loss_sod": 0.008474668487906456, - "loss_total": 0.32220131158828735, - "step": 398199 - }, - { - "epoch": 0.0244, - "grad_norm": 0.9859786629676819, - "learning_rate": 3.266269784552023e-09, - "loss": 0.414, - "step": 398200 - }, - { - "epoch": 0.024598, - "loss_gen": 5.351289749145508, - "loss_rtd": 0.17522194981575012, - "loss_sent": 0.006022478919476271, - "loss_sod": 0.06655101478099823, - "loss_total": 0.2477954477071762, - "step": 398299 - }, - { - "epoch": 0.024598, - "loss_gen": 5.743028163909912, - "loss_rtd": 0.19148989021778107, - "loss_sent": 0.1353217512369156, - "loss_sod": 0.016871271654963493, - "loss_total": 0.3436829149723053, - "step": 398299 - }, - { - "epoch": 0.0246, - "grad_norm": 0.7481858134269714, - "learning_rate": 2.913625741940429e-09, - "loss": 0.4291, - "step": 398300 - }, - { - "epoch": 0.024798, - "loss_gen": 6.19744348526001, - "loss_rtd": 0.205043762922287, - "loss_sent": 0.2743740975856781, - "loss_sod": 0.006529063917696476, - "loss_total": 0.485946923494339, - "step": 398399 - }, - { - "epoch": 0.024798, - "loss_gen": 5.926233291625977, - "loss_rtd": 0.20683732628822327, - "loss_sent": 0.16888761520385742, - "loss_sod": 0.020709645003080368, - "loss_total": 0.39643460512161255, - "step": 398399 - }, - { - "epoch": 0.0248, - "grad_norm": 1.181447982788086, - "learning_rate": 2.5811205197401322e-09, - "loss": 0.4194, - "step": 398400 - }, - { - "epoch": 0.024998, - "loss_gen": 5.8736042976379395, - "loss_rtd": 0.20080658793449402, - "loss_sent": 0.3153776526451111, - "loss_sod": 0.04780576378107071, - "loss_total": 0.5639899969100952, - "step": 398499 - }, - { - "epoch": 0.024998, - "loss_gen": 5.90634822845459, - "loss_rtd": 0.21827960014343262, - "loss_sent": 0.10579497367143631, - "loss_sod": 0.005877520423382521, - "loss_total": 0.32995209097862244, - "step": 398499 - }, - { - "epoch": 0.025, - "grad_norm": 1.1717947721481323, - "learning_rate": 2.2687542518828874e-09, - "loss": 0.4299, - "step": 398500 - }, - { - "epoch": 0.025198, - "loss_gen": 5.878081798553467, - "loss_rtd": 0.19720390439033508, - "loss_sent": 0.19777674973011017, - "loss_sod": 0.051944032311439514, - "loss_total": 0.44692468643188477, - "step": 398599 - }, - { - "epoch": 0.025198, - "loss_gen": 5.84797477722168, - "loss_rtd": 0.21240682899951935, - "loss_sent": 0.46165311336517334, - "loss_sod": 0.01230304129421711, - "loss_total": 0.6863629817962646, - "step": 398599 - }, - { - "epoch": 0.0252, - "grad_norm": 1.7682371139526367, - "learning_rate": 1.9765270641958213e-09, - "loss": 0.4085, - "step": 398600 - }, - { - "epoch": 0.025398, - "loss_gen": 5.527498245239258, - "loss_rtd": 0.17584048211574554, - "loss_sent": 0.0007072031730785966, - "loss_sod": 0.027694545686244965, - "loss_total": 0.20424222946166992, - "step": 398699 - }, - { - "epoch": 0.025398, - "loss_gen": 5.6394782066345215, - "loss_rtd": 0.19597937166690826, - "loss_sent": 0.027343623340129852, - "loss_sod": 0.07703717052936554, - "loss_total": 0.30036017298698425, - "step": 398699 - }, - { - "epoch": 0.0254, - "grad_norm": 0.8241334557533264, - "learning_rate": 1.704439074379227e-09, - "loss": 0.412, - "step": 398700 - }, - { - "epoch": 0.025598, - "loss_gen": 5.8821635246276855, - "loss_rtd": 0.2143273949623108, - "loss_sent": 0.1178353875875473, - "loss_sod": 0.017243245616555214, - "loss_total": 0.34940603375434875, - "step": 398799 - }, - { - "epoch": 0.025598, - "loss_gen": 5.804738521575928, - "loss_rtd": 0.19504877924919128, - "loss_sent": 0.025929627940058708, - "loss_sod": 0.018858319148421288, - "loss_total": 0.23983672261238098, - "step": 398799 - }, - { - "epoch": 0.0256, - "grad_norm": 0.6110307574272156, - "learning_rate": 1.4524903920398736e-09, - "loss": 0.4038, - "step": 398800 - }, - { - "epoch": 0.025798, - "loss_gen": 6.070090293884277, - "loss_rtd": 0.21025177836418152, - "loss_sent": 0.2085135132074356, - "loss_sod": 0.0638621523976326, - "loss_total": 0.4826274514198303, - "step": 398899 - }, - { - "epoch": 0.025798, - "loss_gen": 5.646976947784424, - "loss_rtd": 0.22182682156562805, - "loss_sent": 0.12924838066101074, - "loss_sod": 0.05625419318675995, - "loss_total": 0.40732938051223755, - "step": 398899 - }, - { - "epoch": 0.0258, - "grad_norm": 1.20877206325531, - "learning_rate": 1.2206811186576962e-09, - "loss": 0.4023, - "step": 398900 - }, - { - "epoch": 0.025998, - "loss_gen": 5.952920436859131, - "loss_rtd": 0.2104673832654953, - "loss_sent": 0.19287212193012238, - "loss_sod": 0.07483374327421188, - "loss_total": 0.47817325592041016, - "step": 398999 - }, - { - "epoch": 0.025998, - "loss_gen": 5.882963180541992, - "loss_rtd": 0.1719771921634674, - "loss_sent": 0.07575849443674088, - "loss_sod": 0.1464938074350357, - "loss_total": 0.3942295014858246, - "step": 398999 - }, - { - "epoch": 0.026, - "grad_norm": 1.297067403793335, - "learning_rate": 1.009011347602451e-09, - "loss": 0.4091, - "step": 399000 - }, - { - "epoch": 0.026, - "eval_loss": 0.4002399444580078, - "eval_runtime": 149.9812, - "eval_samples_per_second": 102.966, - "eval_steps_per_second": 0.807, - "step": 399000 - }, - { - "epoch": 0.026198, - "loss_gen": 6.132881164550781, - "loss_rtd": 0.20493392646312714, - "loss_sent": 0.5589383244514465, - "loss_sod": 0.023373253643512726, - "loss_total": 0.787245512008667, - "step": 399099 - }, - { - "epoch": 0.026198, - "loss_gen": 5.744200229644775, - "loss_rtd": 0.19487015902996063, - "loss_sent": 0.027530426159501076, - "loss_sod": 0.04802277684211731, - "loss_total": 0.27042335271835327, - "step": 399099 - }, - { - "epoch": 0.0262, - "grad_norm": 2.5343658924102783, - "learning_rate": 8.174811641392665e-10, - "loss": 0.4239, - "step": 399100 - }, - { - "epoch": 0.026398, - "loss_gen": 5.711893081665039, - "loss_rtd": 0.2208995521068573, - "loss_sent": 0.0921013280749321, - "loss_sod": 0.06058129295706749, - "loss_total": 0.3735821843147278, - "step": 399199 - }, - { - "epoch": 0.026398, - "loss_gen": 5.812560081481934, - "loss_rtd": 0.19347859919071198, - "loss_sent": 0.37179550528526306, - "loss_sod": 0.038559816777706146, - "loss_total": 0.6038339138031006, - "step": 399199 - }, - { - "epoch": 0.0264, - "grad_norm": 1.0608220100402832, - "learning_rate": 6.460906454175408e-10, - "loss": 0.4158, - "step": 399200 - }, - { - "epoch": 0.026598, - "loss_gen": 5.791370868682861, - "loss_rtd": 0.2209886759519577, - "loss_sent": 0.11350365728139877, - "loss_sod": 0.04800304025411606, - "loss_total": 0.38249537348747253, - "step": 399299 - }, - { - "epoch": 0.026598, - "loss_gen": 6.132655143737793, - "loss_rtd": 0.20953939855098724, - "loss_sent": 0.12483220547437668, - "loss_sod": 0.024725060909986496, - "loss_total": 0.3590966761112213, - "step": 399299 - }, - { - "epoch": 0.0266, - "grad_norm": 1.3723679780960083, - "learning_rate": 4.948398604709414e-10, - "loss": 0.4149, - "step": 399300 - }, - { - "epoch": 0.026798, - "loss_gen": 5.434885501861572, - "loss_rtd": 0.17824910581111908, - "loss_sent": 0.02135716937482357, - "loss_sod": 0.12772992253303528, - "loss_total": 0.3273361921310425, - "step": 399399 - }, - { - "epoch": 0.026798, - "loss_gen": 5.362635612487793, - "loss_rtd": 0.17097197473049164, - "loss_sent": 0.22683192789554596, - "loss_sod": 0.01930174231529236, - "loss_total": 0.41710564494132996, - "step": 399399 - }, - { - "epoch": 0.0268, - "grad_norm": 1.625372290611267, - "learning_rate": 3.637288702229569e-10, - "loss": 0.4104, - "step": 399400 - }, - { - "epoch": 0.026998, - "loss_gen": 5.829373836517334, - "loss_rtd": 0.2223738580942154, - "loss_sent": 0.2690161168575287, - "loss_sod": 0.02829951047897339, - "loss_total": 0.5196894407272339, - "step": 399499 - }, - { - "epoch": 0.026998, - "loss_gen": 6.015805244445801, - "loss_rtd": 0.20060022175312042, - "loss_sent": 0.08454405516386032, - "loss_sod": 0.05157846584916115, - "loss_total": 0.3367227613925934, - "step": 399499 - }, - { - "epoch": 0.027, - "grad_norm": 0.8972489833831787, - "learning_rate": 2.527577274868964e-10, - "loss": 0.4207, - "step": 399500 - }, - { - "epoch": 0.027198, - "loss_gen": 5.29140043258667, - "loss_rtd": 0.18922896683216095, - "loss_sent": 0.045968472957611084, - "loss_sod": 0.09831858426332474, - "loss_total": 0.33351603150367737, - "step": 399599 - }, - { - "epoch": 0.027198, - "loss_gen": 5.128223896026611, - "loss_rtd": 0.1511480212211609, - "loss_sent": 2.2403644834412262e-05, - "loss_sod": 0.08321768790483475, - "loss_total": 0.2343880981206894, - "step": 399599 - }, - { - "epoch": 0.0272, - "grad_norm": 1.0857374668121338, - "learning_rate": 1.6192647696033902e-10, - "loss": 0.4028, - "step": 399600 - }, - { - "epoch": 0.027398, - "loss_gen": 5.591905117034912, - "loss_rtd": 0.20010913908481598, - "loss_sent": 0.07157837599515915, - "loss_sod": 0.01919025555253029, - "loss_total": 0.2908777594566345, - "step": 399699 - }, - { - "epoch": 0.027398, - "loss_gen": 5.796738624572754, - "loss_rtd": 0.23129458725452423, - "loss_sent": 0.1561926305294037, - "loss_sod": 0.02767675742506981, - "loss_total": 0.41516396403312683, - "step": 399699 - }, - { - "epoch": 0.0274, - "grad_norm": 1.0839260816574097, - "learning_rate": 9.123515523068449e-11, - "loss": 0.4279, - "step": 399700 - }, - { - "epoch": 0.027598, - "loss_gen": 5.8244242668151855, - "loss_rtd": 0.219486802816391, - "loss_sent": 0.1759643703699112, - "loss_sod": 0.02567342296242714, - "loss_total": 0.42112457752227783, - "step": 399799 - }, - { - "epoch": 0.027598, - "loss_gen": 5.842320919036865, - "loss_rtd": 0.2068764567375183, - "loss_sent": 0.0915793776512146, - "loss_sod": 0.004401105921715498, - "loss_total": 0.3028569221496582, - "step": 399799 - }, - { - "epoch": 0.0276, - "grad_norm": 1.001084327697754, - "learning_rate": 4.068379076960227e-11, - "loss": 0.4144, - "step": 399800 - }, - { - "epoch": 0.027798, - "loss_gen": 5.913634300231934, - "loss_rtd": 0.1969427615404129, - "loss_sent": 0.3520510494709015, - "loss_sod": 0.01477159932255745, - "loss_total": 0.5637654066085815, - "step": 399899 - }, - { - "epoch": 0.027798, - "loss_gen": 5.979221820831299, - "loss_rtd": 0.22889800369739532, - "loss_sent": 0.4575820863246918, - "loss_sod": 0.05908600240945816, - "loss_total": 0.7455661296844482, - "step": 399899 - }, - { - "epoch": 0.0278, - "grad_norm": 2.4349780082702637, - "learning_rate": 1.0272403944133757e-11, - "loss": 0.4252, - "step": 399900 - }, - { - "epoch": 0.027998, - "loss_gen": 5.880891799926758, - "loss_rtd": 0.24432171881198883, - "loss_sent": 0.054657068103551865, - "loss_sod": 0.054939813911914825, - "loss_total": 0.3539186120033264, - "step": 399999 - }, - { - "epoch": 0.027998, - "loss_gen": 5.968878269195557, - "loss_rtd": 0.21166859567165375, - "loss_sent": 0.12592780590057373, - "loss_sod": 0.03586617857217789, - "loss_total": 0.37346258759498596, - "step": 399999 - }, - { - "epoch": 0.028, - "grad_norm": 1.1277533769607544, - "learning_rate": 1.0070000389106326e-15, - "loss": 0.4122, - "step": 400000 - }, - { - "epoch": 0.028, - "eval_loss": 0.3968818187713623, - "eval_runtime": 150.0239, - "eval_samples_per_second": 102.937, - "eval_steps_per_second": 0.807, - "step": 400000 } ], "logging_steps": 100, @@ -103234,7 +1360,7 @@ "attributes": {} } }, - "total_flos": 2.79402966417408e+19, + "total_flos": 7.41815485464576e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null