| { |
| "best_global_step": 4000, |
| "best_metric": 31.3442, |
| "best_model_checkpoint": "indictrans2-en-indic-dist-200M-en-indic-iitb-finetuned-eng_Latn-to-mar_Deva/checkpoint-4000", |
| "epoch": 3.878910532085336, |
| "eval_steps": 4000, |
| "global_step": 92000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004216207100092757, |
| "grad_norm": 10.629733085632324, |
| "learning_rate": 1.999165190994182e-05, |
| "loss": 6.2178, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.008432414200185513, |
| "grad_norm": 10.794089317321777, |
| "learning_rate": 1.9983219495741632e-05, |
| "loss": 4.5911, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01264862130027827, |
| "grad_norm": 11.978194236755371, |
| "learning_rate": 1.9974787081541448e-05, |
| "loss": 3.3837, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.016864828400371026, |
| "grad_norm": 10.057585716247559, |
| "learning_rate": 1.996635466734126e-05, |
| "loss": 2.3077, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.021081035500463783, |
| "grad_norm": 6.098118782043457, |
| "learning_rate": 1.9957922253141076e-05, |
| "loss": 1.4151, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02529724260055654, |
| "grad_norm": 2.8122458457946777, |
| "learning_rate": 1.994948983894089e-05, |
| "loss": 0.8033, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.029513449700649296, |
| "grad_norm": 1.5206619501113892, |
| "learning_rate": 1.9941057424740704e-05, |
| "loss": 0.5806, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.03372965680074205, |
| "grad_norm": 2.194688558578491, |
| "learning_rate": 1.993262501054052e-05, |
| "loss": 0.5271, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.037945863900834806, |
| "grad_norm": 1.3595080375671387, |
| "learning_rate": 1.9924192596340336e-05, |
| "loss": 0.5115, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.042162071000927566, |
| "grad_norm": 1.391483187675476, |
| "learning_rate": 1.9915760182140148e-05, |
| "loss": 0.529, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.04637827810102032, |
| "grad_norm": 1.4901494979858398, |
| "learning_rate": 1.9907327767939964e-05, |
| "loss": 0.5027, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.05059448520111308, |
| "grad_norm": 1.3334758281707764, |
| "learning_rate": 1.9898895353739776e-05, |
| "loss": 0.4803, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.05481069230120583, |
| "grad_norm": 1.407790184020996, |
| "learning_rate": 1.9890462939539592e-05, |
| "loss": 0.4897, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.05902689940129859, |
| "grad_norm": 0.8968947529792786, |
| "learning_rate": 1.9882030525339408e-05, |
| "loss": 0.4822, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.06324310650139135, |
| "grad_norm": 1.3116236925125122, |
| "learning_rate": 1.987359811113922e-05, |
| "loss": 0.4815, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0674593136014841, |
| "grad_norm": 1.0337741374969482, |
| "learning_rate": 1.9865165696939036e-05, |
| "loss": 0.4704, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.07167552070157686, |
| "grad_norm": 1.1927516460418701, |
| "learning_rate": 1.985673328273885e-05, |
| "loss": 0.4721, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.07589172780166961, |
| "grad_norm": 0.9303850531578064, |
| "learning_rate": 1.9848300868538664e-05, |
| "loss": 0.4703, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.08010793490176238, |
| "grad_norm": 1.325838565826416, |
| "learning_rate": 1.9839868454338477e-05, |
| "loss": 0.4648, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.08432414200185513, |
| "grad_norm": 0.8775719404220581, |
| "learning_rate": 1.9831436040138292e-05, |
| "loss": 0.4916, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.08854034910194788, |
| "grad_norm": 1.0944805145263672, |
| "learning_rate": 1.9823003625938108e-05, |
| "loss": 0.4771, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.09275655620204064, |
| "grad_norm": 1.6143155097961426, |
| "learning_rate": 1.9814571211737924e-05, |
| "loss": 0.466, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.0969727633021334, |
| "grad_norm": 1.1526069641113281, |
| "learning_rate": 1.9806138797537736e-05, |
| "loss": 0.4543, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.10118897040222616, |
| "grad_norm": 1.3782904148101807, |
| "learning_rate": 1.9797706383337552e-05, |
| "loss": 0.4671, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.10540517750231891, |
| "grad_norm": 1.2834385633468628, |
| "learning_rate": 1.9789273969137365e-05, |
| "loss": 0.4682, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.10962138460241166, |
| "grad_norm": 1.5170279741287231, |
| "learning_rate": 1.978084155493718e-05, |
| "loss": 0.4645, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.11383759170250443, |
| "grad_norm": 1.4424182176589966, |
| "learning_rate": 1.9772409140736996e-05, |
| "loss": 0.4746, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.11805379880259718, |
| "grad_norm": 1.9791672229766846, |
| "learning_rate": 1.976397672653681e-05, |
| "loss": 0.4495, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.12227000590268994, |
| "grad_norm": 1.112899899482727, |
| "learning_rate": 1.9755544312336624e-05, |
| "loss": 0.452, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.1264862130027827, |
| "grad_norm": 1.2428970336914062, |
| "learning_rate": 1.9747111898136437e-05, |
| "loss": 0.4559, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.13070242010287544, |
| "grad_norm": 0.8663131594657898, |
| "learning_rate": 1.9738679483936252e-05, |
| "loss": 0.4377, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1349186272029682, |
| "grad_norm": 1.1876728534698486, |
| "learning_rate": 1.9730247069736065e-05, |
| "loss": 0.4525, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.13913483430306098, |
| "grad_norm": 0.9078199863433838, |
| "learning_rate": 1.9721814655535884e-05, |
| "loss": 0.4382, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.14335104140315372, |
| "grad_norm": 0.9842768907546997, |
| "learning_rate": 1.9713382241335696e-05, |
| "loss": 0.4292, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.14756724850324648, |
| "grad_norm": 1.2574409246444702, |
| "learning_rate": 1.9704949827135512e-05, |
| "loss": 0.4372, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.15178345560333922, |
| "grad_norm": 1.0580419301986694, |
| "learning_rate": 1.9696517412935325e-05, |
| "loss": 0.4531, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.155999662703432, |
| "grad_norm": 1.4315508604049683, |
| "learning_rate": 1.968808499873514e-05, |
| "loss": 0.4503, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.16021586980352476, |
| "grad_norm": 1.4435293674468994, |
| "learning_rate": 1.9679652584534953e-05, |
| "loss": 0.4472, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.1644320769036175, |
| "grad_norm": 1.1712230443954468, |
| "learning_rate": 1.967122017033477e-05, |
| "loss": 0.4495, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.16864828400371026, |
| "grad_norm": 0.952250599861145, |
| "learning_rate": 1.966278775613458e-05, |
| "loss": 0.4274, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.16864828400371026, |
| "eval_bleu": 9.1559, |
| "eval_bleurt": null, |
| "eval_chrfpp": 31.3442, |
| "eval_comet": 0.5302, |
| "eval_gen_len": 20.8691, |
| "eval_loss": 0.4290911853313446, |
| "eval_runtime": 1006.2399, |
| "eval_samples_per_second": 47.141, |
| "eval_steps_per_second": 2.947, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.17286449110380303, |
| "grad_norm": 0.8649118542671204, |
| "learning_rate": 1.9654355341934397e-05, |
| "loss": 0.4538, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.17708069820389577, |
| "grad_norm": 1.0827510356903076, |
| "learning_rate": 1.9645922927734212e-05, |
| "loss": 0.4334, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.18129690530398854, |
| "grad_norm": 1.2431646585464478, |
| "learning_rate": 1.9637490513534025e-05, |
| "loss": 0.4319, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.18551311240408128, |
| "grad_norm": 1.222221851348877, |
| "learning_rate": 1.962905809933384e-05, |
| "loss": 0.4401, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.18972931950417404, |
| "grad_norm": 0.8401798605918884, |
| "learning_rate": 1.9620625685133656e-05, |
| "loss": 0.4312, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.1939455266042668, |
| "grad_norm": 1.088202714920044, |
| "learning_rate": 1.9612193270933472e-05, |
| "loss": 0.4476, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.19816173370435955, |
| "grad_norm": 1.2317404747009277, |
| "learning_rate": 1.9603760856733285e-05, |
| "loss": 0.4456, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.20237794080445232, |
| "grad_norm": 2.003664255142212, |
| "learning_rate": 1.95953284425331e-05, |
| "loss": 0.4337, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.20659414790454508, |
| "grad_norm": 0.987022340297699, |
| "learning_rate": 1.9586896028332913e-05, |
| "loss": 0.4368, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.21081035500463782, |
| "grad_norm": 1.0784544944763184, |
| "learning_rate": 1.957846361413273e-05, |
| "loss": 0.4447, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2150265621047306, |
| "grad_norm": 0.9378799796104431, |
| "learning_rate": 1.957003119993254e-05, |
| "loss": 0.4217, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.21924276920482333, |
| "grad_norm": 1.2435382604599, |
| "learning_rate": 1.9561598785732357e-05, |
| "loss": 0.4357, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.2234589763049161, |
| "grad_norm": 1.362450361251831, |
| "learning_rate": 1.955316637153217e-05, |
| "loss": 0.4507, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.22767518340500886, |
| "grad_norm": 1.155408263206482, |
| "learning_rate": 1.9544733957331985e-05, |
| "loss": 0.4283, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.2318913905051016, |
| "grad_norm": 1.3443132638931274, |
| "learning_rate": 1.95363015431318e-05, |
| "loss": 0.4333, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.23610759760519437, |
| "grad_norm": 1.2103915214538574, |
| "learning_rate": 1.9527869128931616e-05, |
| "loss": 0.4337, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.24032380470528714, |
| "grad_norm": 1.0087685585021973, |
| "learning_rate": 1.951943671473143e-05, |
| "loss": 0.416, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.24454001180537988, |
| "grad_norm": 1.2199865579605103, |
| "learning_rate": 1.9511004300531245e-05, |
| "loss": 0.4331, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.24875621890547264, |
| "grad_norm": 0.9081279039382935, |
| "learning_rate": 1.9502571886331057e-05, |
| "loss": 0.4227, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.2529724260055654, |
| "grad_norm": 1.2454545497894287, |
| "learning_rate": 1.9494139472130873e-05, |
| "loss": 0.4295, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.25718863310565815, |
| "grad_norm": 1.2220704555511475, |
| "learning_rate": 1.948570705793069e-05, |
| "loss": 0.4344, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.2614048402057509, |
| "grad_norm": 0.9363239407539368, |
| "learning_rate": 1.94772746437305e-05, |
| "loss": 0.4311, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2656210473058437, |
| "grad_norm": 1.3526592254638672, |
| "learning_rate": 1.9468842229530317e-05, |
| "loss": 0.4196, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.2698372544059364, |
| "grad_norm": 1.1111302375793457, |
| "learning_rate": 1.946040981533013e-05, |
| "loss": 0.4196, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.27405346150602916, |
| "grad_norm": 1.1077983379364014, |
| "learning_rate": 1.9451977401129945e-05, |
| "loss": 0.43, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.27826966860612196, |
| "grad_norm": 1.3143802881240845, |
| "learning_rate": 1.9443544986929757e-05, |
| "loss": 0.4085, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.2824858757062147, |
| "grad_norm": 0.971645176410675, |
| "learning_rate": 1.9435112572729573e-05, |
| "loss": 0.431, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.28670208280630743, |
| "grad_norm": 1.212292194366455, |
| "learning_rate": 1.942668015852939e-05, |
| "loss": 0.4243, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.29091828990640023, |
| "grad_norm": 1.331641435623169, |
| "learning_rate": 1.9418247744329205e-05, |
| "loss": 0.4142, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.29513449700649297, |
| "grad_norm": 1.3871821165084839, |
| "learning_rate": 1.9409815330129017e-05, |
| "loss": 0.416, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.2993507041065857, |
| "grad_norm": 1.1271859407424927, |
| "learning_rate": 1.9401382915928833e-05, |
| "loss": 0.4228, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.30356691120667845, |
| "grad_norm": 0.9559811353683472, |
| "learning_rate": 1.9392950501728645e-05, |
| "loss": 0.4135, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.30778311830677124, |
| "grad_norm": 0.8394259810447693, |
| "learning_rate": 1.938451808752846e-05, |
| "loss": 0.4333, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.311999325406864, |
| "grad_norm": 0.82978755235672, |
| "learning_rate": 1.9376085673328277e-05, |
| "loss": 0.4198, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.3162155325069567, |
| "grad_norm": 1.0237234830856323, |
| "learning_rate": 1.936765325912809e-05, |
| "loss": 0.4333, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3204317396070495, |
| "grad_norm": 1.2100563049316406, |
| "learning_rate": 1.9359220844927905e-05, |
| "loss": 0.4286, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.32464794670714225, |
| "grad_norm": 1.3460373878479004, |
| "learning_rate": 1.9350788430727717e-05, |
| "loss": 0.4275, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.328864153807235, |
| "grad_norm": 1.0469090938568115, |
| "learning_rate": 1.9342356016527533e-05, |
| "loss": 0.4285, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.3330803609073278, |
| "grad_norm": 1.6244388818740845, |
| "learning_rate": 1.9333923602327346e-05, |
| "loss": 0.4311, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.3372965680074205, |
| "grad_norm": 1.1776193380355835, |
| "learning_rate": 1.9325491188127165e-05, |
| "loss": 0.414, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.3372965680074205, |
| "eval_bleu": 9.4762, |
| "eval_bleurt": null, |
| "eval_chrfpp": 31.7112, |
| "eval_comet": 0.5327, |
| "eval_gen_len": 20.8762, |
| "eval_loss": 0.40556150674819946, |
| "eval_runtime": 1022.9164, |
| "eval_samples_per_second": 46.372, |
| "eval_steps_per_second": 2.899, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.34151277510751327, |
| "grad_norm": 1.427694320678711, |
| "learning_rate": 1.9317058773926977e-05, |
| "loss": 0.4135, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.34572898220760606, |
| "grad_norm": 1.3969519138336182, |
| "learning_rate": 1.9308626359726793e-05, |
| "loss": 0.424, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.3499451893076988, |
| "grad_norm": 1.012691617012024, |
| "learning_rate": 1.9300193945526605e-05, |
| "loss": 0.4239, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.35416139640779154, |
| "grad_norm": 1.0593016147613525, |
| "learning_rate": 1.929176153132642e-05, |
| "loss": 0.4066, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.35837760350788433, |
| "grad_norm": 0.7688089609146118, |
| "learning_rate": 1.9283329117126233e-05, |
| "loss": 0.4097, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.3625938106079771, |
| "grad_norm": 1.1880069971084595, |
| "learning_rate": 1.927489670292605e-05, |
| "loss": 0.4231, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.3668100177080698, |
| "grad_norm": 1.010106086730957, |
| "learning_rate": 1.9266464288725865e-05, |
| "loss": 0.4211, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.37102622480816255, |
| "grad_norm": 1.1799863576889038, |
| "learning_rate": 1.9258031874525677e-05, |
| "loss": 0.414, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.37524243190825535, |
| "grad_norm": 1.1016535758972168, |
| "learning_rate": 1.9249599460325493e-05, |
| "loss": 0.4167, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.3794586390083481, |
| "grad_norm": 0.9111543297767639, |
| "learning_rate": 1.9241167046125306e-05, |
| "loss": 0.4116, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.3836748461084408, |
| "grad_norm": 1.0465009212493896, |
| "learning_rate": 1.923273463192512e-05, |
| "loss": 0.4029, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.3878910532085336, |
| "grad_norm": 1.1918885707855225, |
| "learning_rate": 1.9224302217724937e-05, |
| "loss": 0.4129, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.39210726030862636, |
| "grad_norm": 0.9597665667533875, |
| "learning_rate": 1.9215869803524753e-05, |
| "loss": 0.4158, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.3963234674087191, |
| "grad_norm": 1.3496443033218384, |
| "learning_rate": 1.9207437389324565e-05, |
| "loss": 0.4103, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.4005396745088119, |
| "grad_norm": 0.9334352016448975, |
| "learning_rate": 1.919900497512438e-05, |
| "loss": 0.4165, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.40475588160890463, |
| "grad_norm": 0.7650086879730225, |
| "learning_rate": 1.9190572560924194e-05, |
| "loss": 0.4126, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.40897208870899737, |
| "grad_norm": 0.8084037899971008, |
| "learning_rate": 1.918214014672401e-05, |
| "loss": 0.409, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.41318829580909017, |
| "grad_norm": 2.67522931098938, |
| "learning_rate": 1.9173707732523822e-05, |
| "loss": 0.4211, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.4174045029091829, |
| "grad_norm": 1.3551160097122192, |
| "learning_rate": 1.9165275318323637e-05, |
| "loss": 0.4273, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.42162071000927565, |
| "grad_norm": 1.408456802368164, |
| "learning_rate": 1.9156842904123453e-05, |
| "loss": 0.4069, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.4258369171093684, |
| "grad_norm": 0.8736149668693542, |
| "learning_rate": 1.9148410489923266e-05, |
| "loss": 0.4086, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.4300531242094612, |
| "grad_norm": 0.9605196118354797, |
| "learning_rate": 1.913997807572308e-05, |
| "loss": 0.4032, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.4342693313095539, |
| "grad_norm": 0.9522096514701843, |
| "learning_rate": 1.9131545661522894e-05, |
| "loss": 0.4225, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.43848553840964666, |
| "grad_norm": 1.1162022352218628, |
| "learning_rate": 1.912311324732271e-05, |
| "loss": 0.4092, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.44270174550973945, |
| "grad_norm": 1.0420705080032349, |
| "learning_rate": 1.9114680833122525e-05, |
| "loss": 0.4015, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.4469179526098322, |
| "grad_norm": 1.5382574796676636, |
| "learning_rate": 1.910624841892234e-05, |
| "loss": 0.4084, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.45113415970992493, |
| "grad_norm": 1.0530604124069214, |
| "learning_rate": 1.9097816004722154e-05, |
| "loss": 0.4182, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.4553503668100177, |
| "grad_norm": 1.0158157348632812, |
| "learning_rate": 1.908938359052197e-05, |
| "loss": 0.3976, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.45956657391011047, |
| "grad_norm": 1.01460862159729, |
| "learning_rate": 1.9080951176321782e-05, |
| "loss": 0.4104, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.4637827810102032, |
| "grad_norm": 0.9880945682525635, |
| "learning_rate": 1.9072518762121598e-05, |
| "loss": 0.4077, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.467998988110296, |
| "grad_norm": 0.9603067636489868, |
| "learning_rate": 1.906408634792141e-05, |
| "loss": 0.4121, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.47221519521038874, |
| "grad_norm": 1.2611957788467407, |
| "learning_rate": 1.9055653933721226e-05, |
| "loss": 0.4074, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.4764314023104815, |
| "grad_norm": 1.094708800315857, |
| "learning_rate": 1.9047221519521038e-05, |
| "loss": 0.3905, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.4806476094105743, |
| "grad_norm": 0.9691109657287598, |
| "learning_rate": 1.9038789105320854e-05, |
| "loss": 0.4066, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.484863816510667, |
| "grad_norm": 1.3236321210861206, |
| "learning_rate": 1.903035669112067e-05, |
| "loss": 0.4216, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.48908002361075975, |
| "grad_norm": 0.8272280693054199, |
| "learning_rate": 1.9021924276920485e-05, |
| "loss": 0.4048, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.4932962307108525, |
| "grad_norm": 1.0969959497451782, |
| "learning_rate": 1.9013491862720298e-05, |
| "loss": 0.409, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.4975124378109453, |
| "grad_norm": 1.1777257919311523, |
| "learning_rate": 1.9005059448520114e-05, |
| "loss": 0.3956, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.5017286449110381, |
| "grad_norm": 1.3242672681808472, |
| "learning_rate": 1.899662703431993e-05, |
| "loss": 0.4, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.5059448520111308, |
| "grad_norm": 0.9798252582550049, |
| "learning_rate": 1.8988194620119742e-05, |
| "loss": 0.4048, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5059448520111308, |
| "eval_bleu": 9.7085, |
| "eval_bleurt": null, |
| "eval_chrfpp": 32.0437, |
| "eval_comet": 0.534, |
| "eval_gen_len": 20.8716, |
| "eval_loss": 0.3886358141899109, |
| "eval_runtime": 1131.9138, |
| "eval_samples_per_second": 41.907, |
| "eval_steps_per_second": 2.619, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5101610591112236, |
| "grad_norm": 1.1658809185028076, |
| "learning_rate": 1.8979762205919558e-05, |
| "loss": 0.4064, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.5143772662113163, |
| "grad_norm": 1.077453374862671, |
| "learning_rate": 1.897132979171937e-05, |
| "loss": 0.3909, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.518593473311409, |
| "grad_norm": 0.7280858159065247, |
| "learning_rate": 1.8962897377519186e-05, |
| "loss": 0.3804, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.5228096804115018, |
| "grad_norm": 0.9924391508102417, |
| "learning_rate": 1.8954464963318998e-05, |
| "loss": 0.3938, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.5270258875115945, |
| "grad_norm": 1.1247611045837402, |
| "learning_rate": 1.8946032549118814e-05, |
| "loss": 0.4073, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.5312420946116874, |
| "grad_norm": 1.0452404022216797, |
| "learning_rate": 1.8937600134918626e-05, |
| "loss": 0.4064, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.5354583017117801, |
| "grad_norm": 1.024165153503418, |
| "learning_rate": 1.8929167720718445e-05, |
| "loss": 0.4067, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.5396745088118728, |
| "grad_norm": 1.2145025730133057, |
| "learning_rate": 1.8920735306518258e-05, |
| "loss": 0.4021, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.5438907159119656, |
| "grad_norm": 1.086727499961853, |
| "learning_rate": 1.8912302892318074e-05, |
| "loss": 0.3882, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.5481069230120583, |
| "grad_norm": 0.8512001633644104, |
| "learning_rate": 1.8903870478117886e-05, |
| "loss": 0.4171, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5523231301121511, |
| "grad_norm": 1.3099777698516846, |
| "learning_rate": 1.8895438063917702e-05, |
| "loss": 0.4128, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.5565393372122439, |
| "grad_norm": 1.1675434112548828, |
| "learning_rate": 1.8887005649717514e-05, |
| "loss": 0.4096, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.5607555443123367, |
| "grad_norm": 0.8974719047546387, |
| "learning_rate": 1.887857323551733e-05, |
| "loss": 0.4226, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.5649717514124294, |
| "grad_norm": 0.967807948589325, |
| "learning_rate": 1.8870140821317146e-05, |
| "loss": 0.3983, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.5691879585125221, |
| "grad_norm": 1.1763675212860107, |
| "learning_rate": 1.8861708407116958e-05, |
| "loss": 0.4071, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.5734041656126149, |
| "grad_norm": 1.1269315481185913, |
| "learning_rate": 1.8853275992916774e-05, |
| "loss": 0.395, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.5776203727127076, |
| "grad_norm": 1.2609223127365112, |
| "learning_rate": 1.8844843578716586e-05, |
| "loss": 0.387, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.5818365798128005, |
| "grad_norm": 1.0252714157104492, |
| "learning_rate": 1.8836411164516402e-05, |
| "loss": 0.4011, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.5860527869128932, |
| "grad_norm": 0.8436282277107239, |
| "learning_rate": 1.8827978750316218e-05, |
| "loss": 0.3984, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.5902689940129859, |
| "grad_norm": 0.9598125219345093, |
| "learning_rate": 1.8819546336116034e-05, |
| "loss": 0.3923, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.5944852011130787, |
| "grad_norm": 0.8917134404182434, |
| "learning_rate": 1.8811113921915846e-05, |
| "loss": 0.4149, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.5987014082131714, |
| "grad_norm": 0.9456690549850464, |
| "learning_rate": 1.8802681507715662e-05, |
| "loss": 0.3969, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.6029176153132642, |
| "grad_norm": 1.3810299634933472, |
| "learning_rate": 1.8794249093515474e-05, |
| "loss": 0.4047, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.6071338224133569, |
| "grad_norm": 0.9798800945281982, |
| "learning_rate": 1.878581667931529e-05, |
| "loss": 0.3943, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.6113500295134497, |
| "grad_norm": 1.0832455158233643, |
| "learning_rate": 1.8777384265115102e-05, |
| "loss": 0.4021, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.6155662366135425, |
| "grad_norm": 1.0834710597991943, |
| "learning_rate": 1.8768951850914918e-05, |
| "loss": 0.4116, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.6197824437136352, |
| "grad_norm": 1.3946242332458496, |
| "learning_rate": 1.8760519436714734e-05, |
| "loss": 0.386, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.623998650813728, |
| "grad_norm": 1.0280137062072754, |
| "learning_rate": 1.8752087022514546e-05, |
| "loss": 0.3944, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.6282148579138207, |
| "grad_norm": 0.9602075815200806, |
| "learning_rate": 1.8743654608314362e-05, |
| "loss": 0.3982, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.6324310650139134, |
| "grad_norm": 0.9291537404060364, |
| "learning_rate": 1.8735222194114175e-05, |
| "loss": 0.3926, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.6366472721140063, |
| "grad_norm": 0.811850905418396, |
| "learning_rate": 1.872678977991399e-05, |
| "loss": 0.4024, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.640863479214099, |
| "grad_norm": 0.8130801916122437, |
| "learning_rate": 1.8718357365713806e-05, |
| "loss": 0.3984, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.6450796863141918, |
| "grad_norm": 0.8128789067268372, |
| "learning_rate": 1.8709924951513622e-05, |
| "loss": 0.3832, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.6492958934142845, |
| "grad_norm": 1.1947672367095947, |
| "learning_rate": 1.8701492537313434e-05, |
| "loss": 0.3912, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.6535121005143772, |
| "grad_norm": 1.2158654928207397, |
| "learning_rate": 1.869306012311325e-05, |
| "loss": 0.3876, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.65772830761447, |
| "grad_norm": 0.8703183531761169, |
| "learning_rate": 1.8684627708913062e-05, |
| "loss": 0.3862, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.6619445147145627, |
| "grad_norm": 0.9874376058578491, |
| "learning_rate": 1.8676195294712878e-05, |
| "loss": 0.3886, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.6661607218146556, |
| "grad_norm": 1.0630080699920654, |
| "learning_rate": 1.866776288051269e-05, |
| "loss": 0.3941, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.6703769289147483, |
| "grad_norm": 1.0598597526550293, |
| "learning_rate": 1.8659330466312506e-05, |
| "loss": 0.3898, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.674593136014841, |
| "grad_norm": 0.6748641729354858, |
| "learning_rate": 1.8650898052112322e-05, |
| "loss": 0.3972, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.674593136014841, |
| "eval_bleu": 10.0416, |
| "eval_bleurt": null, |
| "eval_chrfpp": 32.3205, |
| "eval_comet": 0.5353, |
| "eval_gen_len": 20.8769, |
| "eval_loss": 0.3783491253852844, |
| "eval_runtime": 1363.5698, |
| "eval_samples_per_second": 34.787, |
| "eval_steps_per_second": 2.174, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.6788093431149338, |
| "grad_norm": 1.0870927572250366, |
| "learning_rate": 1.8642465637912135e-05, |
| "loss": 0.4016, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.6830255502150265, |
| "grad_norm": 1.2668064832687378, |
| "learning_rate": 1.863403322371195e-05, |
| "loss": 0.3733, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.6872417573151193, |
| "grad_norm": 1.145337700843811, |
| "learning_rate": 1.8625600809511766e-05, |
| "loss": 0.3888, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.6914579644152121, |
| "grad_norm": 1.0644266605377197, |
| "learning_rate": 1.861716839531158e-05, |
| "loss": 0.4011, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.6956741715153049, |
| "grad_norm": 1.0268157720565796, |
| "learning_rate": 1.8608735981111394e-05, |
| "loss": 0.3988, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.6998903786153976, |
| "grad_norm": 1.1735461950302124, |
| "learning_rate": 1.860030356691121e-05, |
| "loss": 0.3953, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.7041065857154903, |
| "grad_norm": 1.0253026485443115, |
| "learning_rate": 1.8591871152711023e-05, |
| "loss": 0.392, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.7083227928155831, |
| "grad_norm": 1.061868667602539, |
| "learning_rate": 1.858343873851084e-05, |
| "loss": 0.3851, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.7125389999156758, |
| "grad_norm": 1.8942055702209473, |
| "learning_rate": 1.857500632431065e-05, |
| "loss": 0.4071, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.7167552070157687, |
| "grad_norm": 1.0703763961791992, |
| "learning_rate": 1.8566573910110466e-05, |
| "loss": 0.3964, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.7209714141158614, |
| "grad_norm": 1.1151158809661865, |
| "learning_rate": 1.855814149591028e-05, |
| "loss": 0.3837, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.7251876212159541, |
| "grad_norm": 0.9483737349510193, |
| "learning_rate": 1.8549709081710095e-05, |
| "loss": 0.3994, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.7294038283160469, |
| "grad_norm": 1.2941887378692627, |
| "learning_rate": 1.8541276667509907e-05, |
| "loss": 0.3924, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.7336200354161396, |
| "grad_norm": 0.8903588652610779, |
| "learning_rate": 1.8532844253309723e-05, |
| "loss": 0.3915, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.7378362425162324, |
| "grad_norm": 1.2245477437973022, |
| "learning_rate": 1.852441183910954e-05, |
| "loss": 0.38, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.7420524496163251, |
| "grad_norm": 1.2684203386306763, |
| "learning_rate": 1.8515979424909354e-05, |
| "loss": 0.4012, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.746268656716418, |
| "grad_norm": 0.9190846681594849, |
| "learning_rate": 1.8507547010709167e-05, |
| "loss": 0.3874, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.7504848638165107, |
| "grad_norm": 1.3668287992477417, |
| "learning_rate": 1.8499114596508983e-05, |
| "loss": 0.3803, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.7547010709166034, |
| "grad_norm": 1.227296233177185, |
| "learning_rate": 1.84906821823088e-05, |
| "loss": 0.3736, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.7589172780166962, |
| "grad_norm": 0.9625111222267151, |
| "learning_rate": 1.848224976810861e-05, |
| "loss": 0.4079, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.7631334851167889, |
| "grad_norm": 1.1573898792266846, |
| "learning_rate": 1.8473817353908427e-05, |
| "loss": 0.3766, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.7673496922168817, |
| "grad_norm": 1.0590816736221313, |
| "learning_rate": 1.846538493970824e-05, |
| "loss": 0.3768, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.7715658993169745, |
| "grad_norm": 1.1647247076034546, |
| "learning_rate": 1.8456952525508055e-05, |
| "loss": 0.4032, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.7757821064170672, |
| "grad_norm": 1.1958105564117432, |
| "learning_rate": 1.8448520111307867e-05, |
| "loss": 0.3776, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.77999831351716, |
| "grad_norm": 0.9784579277038574, |
| "learning_rate": 1.8440087697107683e-05, |
| "loss": 0.3828, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.7842145206172527, |
| "grad_norm": 0.7342677712440491, |
| "learning_rate": 1.8431655282907495e-05, |
| "loss": 0.3854, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.7884307277173455, |
| "grad_norm": 1.1513690948486328, |
| "learning_rate": 1.8423222868707314e-05, |
| "loss": 0.379, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.7926469348174382, |
| "grad_norm": 1.2128503322601318, |
| "learning_rate": 1.8414790454507127e-05, |
| "loss": 0.3873, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.7968631419175309, |
| "grad_norm": 0.9662160277366638, |
| "learning_rate": 1.8406358040306943e-05, |
| "loss": 0.3875, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.8010793490176238, |
| "grad_norm": 1.006768822669983, |
| "learning_rate": 1.8397925626106755e-05, |
| "loss": 0.3791, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8052955561177165, |
| "grad_norm": 1.0220060348510742, |
| "learning_rate": 1.838949321190657e-05, |
| "loss": 0.3772, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.8095117632178093, |
| "grad_norm": 1.3875762224197388, |
| "learning_rate": 1.8381060797706383e-05, |
| "loss": 0.3827, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.813727970317902, |
| "grad_norm": 1.1512092351913452, |
| "learning_rate": 1.83726283835062e-05, |
| "loss": 0.3765, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.8179441774179947, |
| "grad_norm": 1.107202172279358, |
| "learning_rate": 1.8364195969306015e-05, |
| "loss": 0.3951, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.8221603845180875, |
| "grad_norm": 1.2689570188522339, |
| "learning_rate": 1.8355763555105827e-05, |
| "loss": 0.396, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.8263765916181803, |
| "grad_norm": 0.8329848051071167, |
| "learning_rate": 1.8347331140905643e-05, |
| "loss": 0.3845, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.8305927987182731, |
| "grad_norm": 1.0717341899871826, |
| "learning_rate": 1.8338898726705455e-05, |
| "loss": 0.3804, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.8348090058183658, |
| "grad_norm": 0.8131351470947266, |
| "learning_rate": 1.8330466312505274e-05, |
| "loss": 0.3799, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.8390252129184586, |
| "grad_norm": 0.992375373840332, |
| "learning_rate": 1.8322033898305087e-05, |
| "loss": 0.3841, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.8432414200185513, |
| "grad_norm": 1.202879548072815, |
| "learning_rate": 1.8313601484104903e-05, |
| "loss": 0.3791, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.8432414200185513, |
| "eval_bleu": 10.1124, |
| "eval_bleurt": null, |
| "eval_chrfpp": 32.5597, |
| "eval_comet": 0.5374, |
| "eval_gen_len": 20.8772, |
| "eval_loss": 0.36685481667518616, |
| "eval_runtime": 1369.9335, |
| "eval_samples_per_second": 34.626, |
| "eval_steps_per_second": 2.164, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.847457627118644, |
| "grad_norm": 0.9284172058105469, |
| "learning_rate": 1.8305169069904715e-05, |
| "loss": 0.3832, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.8516738342187368, |
| "grad_norm": 1.2193272113800049, |
| "learning_rate": 1.829673665570453e-05, |
| "loss": 0.3777, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.8558900413188296, |
| "grad_norm": 0.7489703297615051, |
| "learning_rate": 1.8288304241504343e-05, |
| "loss": 0.383, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.8601062484189224, |
| "grad_norm": 0.9435054063796997, |
| "learning_rate": 1.827987182730416e-05, |
| "loss": 0.3691, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.8643224555190151, |
| "grad_norm": 1.2876486778259277, |
| "learning_rate": 1.827143941310397e-05, |
| "loss": 0.3679, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.8685386626191078, |
| "grad_norm": 1.0788872241973877, |
| "learning_rate": 1.8263006998903787e-05, |
| "loss": 0.388, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.8727548697192006, |
| "grad_norm": 1.425753116607666, |
| "learning_rate": 1.8254574584703603e-05, |
| "loss": 0.3845, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.8769710768192933, |
| "grad_norm": 1.7826398611068726, |
| "learning_rate": 1.8246142170503415e-05, |
| "loss": 0.3874, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.8811872839193862, |
| "grad_norm": 0.7879995107650757, |
| "learning_rate": 1.823770975630323e-05, |
| "loss": 0.3894, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.8854034910194789, |
| "grad_norm": 1.137299656867981, |
| "learning_rate": 1.8229277342103047e-05, |
| "loss": 0.3782, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.8896196981195716, |
| "grad_norm": 1.3012561798095703, |
| "learning_rate": 1.822084492790286e-05, |
| "loss": 0.3841, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.8938359052196644, |
| "grad_norm": 1.351110577583313, |
| "learning_rate": 1.8212412513702675e-05, |
| "loss": 0.3868, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.8980521123197571, |
| "grad_norm": 1.3879759311676025, |
| "learning_rate": 1.820398009950249e-05, |
| "loss": 0.3907, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.9022683194198499, |
| "grad_norm": 1.047498345375061, |
| "learning_rate": 1.8195547685302303e-05, |
| "loss": 0.3961, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.9064845265199427, |
| "grad_norm": 0.7418652176856995, |
| "learning_rate": 1.818711527110212e-05, |
| "loss": 0.3814, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.9107007336200355, |
| "grad_norm": 0.9947733879089355, |
| "learning_rate": 1.817868285690193e-05, |
| "loss": 0.3724, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.9149169407201282, |
| "grad_norm": 1.223792314529419, |
| "learning_rate": 1.8170250442701747e-05, |
| "loss": 0.377, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.9191331478202209, |
| "grad_norm": 0.9043552875518799, |
| "learning_rate": 1.816181802850156e-05, |
| "loss": 0.3775, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.9233493549203137, |
| "grad_norm": 0.8440760374069214, |
| "learning_rate": 1.8153385614301375e-05, |
| "loss": 0.3818, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.9275655620204064, |
| "grad_norm": 3.621572732925415, |
| "learning_rate": 1.814495320010119e-05, |
| "loss": 0.3895, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.9317817691204991, |
| "grad_norm": 1.0998283624649048, |
| "learning_rate": 1.8136520785901004e-05, |
| "loss": 0.3612, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.935997976220592, |
| "grad_norm": 1.0755438804626465, |
| "learning_rate": 1.812808837170082e-05, |
| "loss": 0.3841, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.9402141833206847, |
| "grad_norm": 1.0700905323028564, |
| "learning_rate": 1.8119655957500635e-05, |
| "loss": 0.3858, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.9444303904207775, |
| "grad_norm": 0.925391674041748, |
| "learning_rate": 1.8111223543300448e-05, |
| "loss": 0.3716, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.9486465975208702, |
| "grad_norm": 1.4689534902572632, |
| "learning_rate": 1.8102791129100263e-05, |
| "loss": 0.3806, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.952862804620963, |
| "grad_norm": 1.0605189800262451, |
| "learning_rate": 1.809435871490008e-05, |
| "loss": 0.3706, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.9570790117210557, |
| "grad_norm": 1.186305046081543, |
| "learning_rate": 1.808592630069989e-05, |
| "loss": 0.3748, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.9612952188211485, |
| "grad_norm": 1.3206912279129028, |
| "learning_rate": 1.8077493886499707e-05, |
| "loss": 0.3814, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.9655114259212413, |
| "grad_norm": 1.2885257005691528, |
| "learning_rate": 1.806906147229952e-05, |
| "loss": 0.3823, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.969727633021334, |
| "grad_norm": 1.059088945388794, |
| "learning_rate": 1.8060629058099335e-05, |
| "loss": 0.3886, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.9739438401214268, |
| "grad_norm": 1.139894962310791, |
| "learning_rate": 1.8052196643899148e-05, |
| "loss": 0.3679, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.9781600472215195, |
| "grad_norm": 1.169776201248169, |
| "learning_rate": 1.8043764229698964e-05, |
| "loss": 0.3844, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.9823762543216122, |
| "grad_norm": 1.1594703197479248, |
| "learning_rate": 1.803533181549878e-05, |
| "loss": 0.3791, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.986592461421705, |
| "grad_norm": 1.0355985164642334, |
| "learning_rate": 1.8026899401298595e-05, |
| "loss": 0.378, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.9908086685217978, |
| "grad_norm": 1.0133675336837769, |
| "learning_rate": 1.8018466987098408e-05, |
| "loss": 0.3666, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.9950248756218906, |
| "grad_norm": 0.8024215698242188, |
| "learning_rate": 1.8010034572898223e-05, |
| "loss": 0.3523, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.9992410827219833, |
| "grad_norm": 0.9694296717643738, |
| "learning_rate": 1.8001602158698036e-05, |
| "loss": 0.3694, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.0034572898220762, |
| "grad_norm": 0.8517723083496094, |
| "learning_rate": 1.799316974449785e-05, |
| "loss": 0.3637, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.0076734969221688, |
| "grad_norm": 0.951740562915802, |
| "learning_rate": 1.7984737330297667e-05, |
| "loss": 0.3541, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.0118897040222616, |
| "grad_norm": 1.074562907218933, |
| "learning_rate": 1.797630491609748e-05, |
| "loss": 0.3388, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.0118897040222616, |
| "eval_bleu": 10.3014, |
| "eval_bleurt": null, |
| "eval_chrfpp": 32.7152, |
| "eval_comet": 0.5376, |
| "eval_gen_len": 20.8763, |
| "eval_loss": 0.3606056571006775, |
| "eval_runtime": 1366.2367, |
| "eval_samples_per_second": 34.719, |
| "eval_steps_per_second": 2.17, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.0161059111223543, |
| "grad_norm": 0.845503568649292, |
| "learning_rate": 1.7967872501897296e-05, |
| "loss": 0.337, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.0203221182224471, |
| "grad_norm": 0.886055588722229, |
| "learning_rate": 1.7959440087697108e-05, |
| "loss": 0.3557, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.0245383253225397, |
| "grad_norm": 0.8072330355644226, |
| "learning_rate": 1.7951007673496924e-05, |
| "loss": 0.3492, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.0287545324226326, |
| "grad_norm": 1.0184261798858643, |
| "learning_rate": 1.7942575259296736e-05, |
| "loss": 0.3394, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.0329707395227254, |
| "grad_norm": 0.9989670515060425, |
| "learning_rate": 1.7934142845096552e-05, |
| "loss": 0.3368, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.037186946622818, |
| "grad_norm": 1.0039615631103516, |
| "learning_rate": 1.7925710430896368e-05, |
| "loss": 0.3566, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.041403153722911, |
| "grad_norm": 0.9721380472183228, |
| "learning_rate": 1.7917278016696183e-05, |
| "loss": 0.3468, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.0456193608230036, |
| "grad_norm": 1.2254228591918945, |
| "learning_rate": 1.7908845602495996e-05, |
| "loss": 0.3574, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.0498355679230964, |
| "grad_norm": 0.8590681552886963, |
| "learning_rate": 1.790041318829581e-05, |
| "loss": 0.3629, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.054051775023189, |
| "grad_norm": 0.9355018734931946, |
| "learning_rate": 1.7891980774095624e-05, |
| "loss": 0.3321, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.0582679821232819, |
| "grad_norm": 0.9381804466247559, |
| "learning_rate": 1.788354835989544e-05, |
| "loss": 0.3349, |
| "step": 25100 |
| }, |
| { |
| "epoch": 1.0624841892233747, |
| "grad_norm": 1.0869252681732178, |
| "learning_rate": 1.7875115945695256e-05, |
| "loss": 0.3464, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.0667003963234674, |
| "grad_norm": 1.0041707754135132, |
| "learning_rate": 1.7866683531495068e-05, |
| "loss": 0.3503, |
| "step": 25300 |
| }, |
| { |
| "epoch": 1.0709166034235602, |
| "grad_norm": 0.8508927226066589, |
| "learning_rate": 1.7858251117294884e-05, |
| "loss": 0.3552, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1.0751328105236528, |
| "grad_norm": 1.2893351316452026, |
| "learning_rate": 1.7849818703094696e-05, |
| "loss": 0.3491, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.0793490176237457, |
| "grad_norm": 0.8087054491043091, |
| "learning_rate": 1.7841386288894512e-05, |
| "loss": 0.3256, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1.0835652247238385, |
| "grad_norm": 1.098964810371399, |
| "learning_rate": 1.7832953874694324e-05, |
| "loss": 0.3378, |
| "step": 25700 |
| }, |
| { |
| "epoch": 1.0877814318239312, |
| "grad_norm": 0.9316391944885254, |
| "learning_rate": 1.7824521460494143e-05, |
| "loss": 0.341, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.091997638924024, |
| "grad_norm": 1.127323865890503, |
| "learning_rate": 1.7816089046293956e-05, |
| "loss": 0.3574, |
| "step": 25900 |
| }, |
| { |
| "epoch": 1.0962138460241166, |
| "grad_norm": 1.1783215999603271, |
| "learning_rate": 1.780765663209377e-05, |
| "loss": 0.3501, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.1004300531242095, |
| "grad_norm": 0.8408191800117493, |
| "learning_rate": 1.7799224217893584e-05, |
| "loss": 0.3501, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.1046462602243021, |
| "grad_norm": 1.200312614440918, |
| "learning_rate": 1.77907918036934e-05, |
| "loss": 0.3377, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1.108862467324395, |
| "grad_norm": 1.207794189453125, |
| "learning_rate": 1.7782359389493212e-05, |
| "loss": 0.3426, |
| "step": 26300 |
| }, |
| { |
| "epoch": 1.1130786744244878, |
| "grad_norm": 1.1393693685531616, |
| "learning_rate": 1.7773926975293028e-05, |
| "loss": 0.3393, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.1172948815245805, |
| "grad_norm": 1.1110385656356812, |
| "learning_rate": 1.776549456109284e-05, |
| "loss": 0.3428, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.1215110886246733, |
| "grad_norm": 0.830635130405426, |
| "learning_rate": 1.7757062146892656e-05, |
| "loss": 0.3451, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1.125727295724766, |
| "grad_norm": 0.9378274083137512, |
| "learning_rate": 1.7748629732692472e-05, |
| "loss": 0.3408, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1.1299435028248588, |
| "grad_norm": 1.1354353427886963, |
| "learning_rate": 1.7740197318492284e-05, |
| "loss": 0.3526, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1.1341597099249516, |
| "grad_norm": 1.0900987386703491, |
| "learning_rate": 1.77317649042921e-05, |
| "loss": 0.3471, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1.1383759170250443, |
| "grad_norm": 0.799541175365448, |
| "learning_rate": 1.7723332490091916e-05, |
| "loss": 0.3434, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.142592124125137, |
| "grad_norm": 0.8897498250007629, |
| "learning_rate": 1.771490007589173e-05, |
| "loss": 0.3309, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1.1468083312252297, |
| "grad_norm": 1.2157037258148193, |
| "learning_rate": 1.7706467661691544e-05, |
| "loss": 0.3478, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1.1510245383253226, |
| "grad_norm": 1.3008877038955688, |
| "learning_rate": 1.769803524749136e-05, |
| "loss": 0.3476, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1.1552407454254152, |
| "grad_norm": 0.851649820804596, |
| "learning_rate": 1.7689602833291172e-05, |
| "loss": 0.3475, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1.159456952525508, |
| "grad_norm": 1.1734685897827148, |
| "learning_rate": 1.7681170419090988e-05, |
| "loss": 0.3388, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.163673159625601, |
| "grad_norm": 0.779128909111023, |
| "learning_rate": 1.76727380048908e-05, |
| "loss": 0.3432, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1.1678893667256935, |
| "grad_norm": 0.845840334892273, |
| "learning_rate": 1.7664305590690616e-05, |
| "loss": 0.3395, |
| "step": 27700 |
| }, |
| { |
| "epoch": 1.1721055738257864, |
| "grad_norm": 0.9524010419845581, |
| "learning_rate": 1.765587317649043e-05, |
| "loss": 0.3224, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1.176321780925879, |
| "grad_norm": 0.8855528235435486, |
| "learning_rate": 1.7647440762290244e-05, |
| "loss": 0.3383, |
| "step": 27900 |
| }, |
| { |
| "epoch": 1.1805379880259719, |
| "grad_norm": 1.00590980052948, |
| "learning_rate": 1.763900834809006e-05, |
| "loss": 0.3404, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.1805379880259719, |
| "eval_bleu": 10.3175, |
| "eval_bleurt": null, |
| "eval_chrfpp": 32.8418, |
| "eval_comet": 0.5385, |
| "eval_gen_len": 20.8769, |
| "eval_loss": 0.35359427332878113, |
| "eval_runtime": 1378.0895, |
| "eval_samples_per_second": 34.421, |
| "eval_steps_per_second": 2.152, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.1847541951260645, |
| "grad_norm": 0.936487078666687, |
| "learning_rate": 1.7630575933889876e-05, |
| "loss": 0.3491, |
| "step": 28100 |
| }, |
| { |
| "epoch": 1.1889704022261574, |
| "grad_norm": 1.366697907447815, |
| "learning_rate": 1.762214351968969e-05, |
| "loss": 0.3393, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1.1931866093262502, |
| "grad_norm": 1.3890807628631592, |
| "learning_rate": 1.7613711105489504e-05, |
| "loss": 0.3445, |
| "step": 28300 |
| }, |
| { |
| "epoch": 1.1974028164263428, |
| "grad_norm": 1.003692388534546, |
| "learning_rate": 1.7605278691289317e-05, |
| "loss": 0.3438, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1.2016190235264357, |
| "grad_norm": 1.1394814252853394, |
| "learning_rate": 1.7596846277089132e-05, |
| "loss": 0.353, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.2058352306265283, |
| "grad_norm": 1.1011195182800293, |
| "learning_rate": 1.7588413862888948e-05, |
| "loss": 0.33, |
| "step": 28600 |
| }, |
| { |
| "epoch": 1.2100514377266212, |
| "grad_norm": 1.3941056728363037, |
| "learning_rate": 1.757998144868876e-05, |
| "loss": 0.3491, |
| "step": 28700 |
| }, |
| { |
| "epoch": 1.2142676448267138, |
| "grad_norm": 1.0769147872924805, |
| "learning_rate": 1.7571549034488576e-05, |
| "loss": 0.3349, |
| "step": 28800 |
| }, |
| { |
| "epoch": 1.2184838519268066, |
| "grad_norm": 1.0283029079437256, |
| "learning_rate": 1.756311662028839e-05, |
| "loss": 0.3331, |
| "step": 28900 |
| }, |
| { |
| "epoch": 1.2227000590268995, |
| "grad_norm": 1.1583529710769653, |
| "learning_rate": 1.7554684206088204e-05, |
| "loss": 0.3415, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.2269162661269921, |
| "grad_norm": 0.9916228652000427, |
| "learning_rate": 1.7546251791888017e-05, |
| "loss": 0.3423, |
| "step": 29100 |
| }, |
| { |
| "epoch": 1.231132473227085, |
| "grad_norm": 1.2261525392532349, |
| "learning_rate": 1.7537819377687833e-05, |
| "loss": 0.3474, |
| "step": 29200 |
| }, |
| { |
| "epoch": 1.2353486803271776, |
| "grad_norm": 1.1459957361221313, |
| "learning_rate": 1.752938696348765e-05, |
| "loss": 0.3494, |
| "step": 29300 |
| }, |
| { |
| "epoch": 1.2395648874272704, |
| "grad_norm": 1.4597856998443604, |
| "learning_rate": 1.7520954549287464e-05, |
| "loss": 0.3506, |
| "step": 29400 |
| }, |
| { |
| "epoch": 1.243781094527363, |
| "grad_norm": 1.1697700023651123, |
| "learning_rate": 1.7512522135087277e-05, |
| "loss": 0.3234, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.247997301627456, |
| "grad_norm": 1.378232717514038, |
| "learning_rate": 1.7504089720887092e-05, |
| "loss": 0.3384, |
| "step": 29600 |
| }, |
| { |
| "epoch": 1.2522135087275488, |
| "grad_norm": 1.1748912334442139, |
| "learning_rate": 1.7495657306686905e-05, |
| "loss": 0.3359, |
| "step": 29700 |
| }, |
| { |
| "epoch": 1.2564297158276414, |
| "grad_norm": 0.7615249752998352, |
| "learning_rate": 1.748722489248672e-05, |
| "loss": 0.3419, |
| "step": 29800 |
| }, |
| { |
| "epoch": 1.2606459229277343, |
| "grad_norm": 0.9890500903129578, |
| "learning_rate": 1.7478792478286536e-05, |
| "loss": 0.3424, |
| "step": 29900 |
| }, |
| { |
| "epoch": 1.264862130027827, |
| "grad_norm": 1.0541248321533203, |
| "learning_rate": 1.747036006408635e-05, |
| "loss": 0.3325, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.2690783371279197, |
| "grad_norm": 1.171378493309021, |
| "learning_rate": 1.7461927649886164e-05, |
| "loss": 0.3435, |
| "step": 30100 |
| }, |
| { |
| "epoch": 1.2732945442280124, |
| "grad_norm": 1.087592363357544, |
| "learning_rate": 1.7453495235685977e-05, |
| "loss": 0.3263, |
| "step": 30200 |
| }, |
| { |
| "epoch": 1.2775107513281052, |
| "grad_norm": 1.0550174713134766, |
| "learning_rate": 1.7445062821485793e-05, |
| "loss": 0.3434, |
| "step": 30300 |
| }, |
| { |
| "epoch": 1.281726958428198, |
| "grad_norm": 0.9705281257629395, |
| "learning_rate": 1.7436630407285605e-05, |
| "loss": 0.345, |
| "step": 30400 |
| }, |
| { |
| "epoch": 1.2859431655282907, |
| "grad_norm": 1.1588115692138672, |
| "learning_rate": 1.7428197993085424e-05, |
| "loss": 0.334, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.2901593726283835, |
| "grad_norm": 0.9370762705802917, |
| "learning_rate": 1.7419765578885237e-05, |
| "loss": 0.333, |
| "step": 30600 |
| }, |
| { |
| "epoch": 1.2943755797284764, |
| "grad_norm": 1.0201505422592163, |
| "learning_rate": 1.7411333164685052e-05, |
| "loss": 0.339, |
| "step": 30700 |
| }, |
| { |
| "epoch": 1.298591786828569, |
| "grad_norm": 0.8545118570327759, |
| "learning_rate": 1.7402900750484865e-05, |
| "loss": 0.337, |
| "step": 30800 |
| }, |
| { |
| "epoch": 1.3028079939286616, |
| "grad_norm": 1.1026926040649414, |
| "learning_rate": 1.739446833628468e-05, |
| "loss": 0.3363, |
| "step": 30900 |
| }, |
| { |
| "epoch": 1.3070242010287545, |
| "grad_norm": 0.9570561051368713, |
| "learning_rate": 1.7386035922084493e-05, |
| "loss": 0.3424, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.3112404081288473, |
| "grad_norm": 1.092779278755188, |
| "learning_rate": 1.737760350788431e-05, |
| "loss": 0.3331, |
| "step": 31100 |
| }, |
| { |
| "epoch": 1.31545661522894, |
| "grad_norm": 0.7841922044754028, |
| "learning_rate": 1.7369171093684125e-05, |
| "loss": 0.3357, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1.3196728223290328, |
| "grad_norm": 0.9633954167366028, |
| "learning_rate": 1.7360738679483937e-05, |
| "loss": 0.3341, |
| "step": 31300 |
| }, |
| { |
| "epoch": 1.3238890294291257, |
| "grad_norm": 1.0769535303115845, |
| "learning_rate": 1.7352306265283753e-05, |
| "loss": 0.3403, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1.3281052365292183, |
| "grad_norm": 0.9702937602996826, |
| "learning_rate": 1.7343873851083565e-05, |
| "loss": 0.3379, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.3323214436293112, |
| "grad_norm": 0.8990470767021179, |
| "learning_rate": 1.733544143688338e-05, |
| "loss": 0.3299, |
| "step": 31600 |
| }, |
| { |
| "epoch": 1.3365376507294038, |
| "grad_norm": 1.2237523794174194, |
| "learning_rate": 1.7327009022683197e-05, |
| "loss": 0.3422, |
| "step": 31700 |
| }, |
| { |
| "epoch": 1.3407538578294966, |
| "grad_norm": 1.086236596107483, |
| "learning_rate": 1.7318576608483012e-05, |
| "loss": 0.3395, |
| "step": 31800 |
| }, |
| { |
| "epoch": 1.3449700649295893, |
| "grad_norm": 1.2538822889328003, |
| "learning_rate": 1.7310144194282825e-05, |
| "loss": 0.3279, |
| "step": 31900 |
| }, |
| { |
| "epoch": 1.349186272029682, |
| "grad_norm": 1.317533016204834, |
| "learning_rate": 1.730171178008264e-05, |
| "loss": 0.3322, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.349186272029682, |
| "eval_bleu": 10.4799, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.0168, |
| "eval_comet": 0.5397, |
| "eval_gen_len": 20.874, |
| "eval_loss": 0.346453994512558, |
| "eval_runtime": 1388.5027, |
| "eval_samples_per_second": 34.163, |
| "eval_steps_per_second": 2.135, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.353402479129775, |
| "grad_norm": 0.9494001865386963, |
| "learning_rate": 1.7293279365882453e-05, |
| "loss": 0.3409, |
| "step": 32100 |
| }, |
| { |
| "epoch": 1.3576186862298676, |
| "grad_norm": 0.9309782981872559, |
| "learning_rate": 1.728484695168227e-05, |
| "loss": 0.3305, |
| "step": 32200 |
| }, |
| { |
| "epoch": 1.3618348933299604, |
| "grad_norm": 1.0548337697982788, |
| "learning_rate": 1.727641453748208e-05, |
| "loss": 0.3471, |
| "step": 32300 |
| }, |
| { |
| "epoch": 1.366051100430053, |
| "grad_norm": 0.9669992923736572, |
| "learning_rate": 1.7267982123281897e-05, |
| "loss": 0.3416, |
| "step": 32400 |
| }, |
| { |
| "epoch": 1.370267307530146, |
| "grad_norm": 1.1200644969940186, |
| "learning_rate": 1.725954970908171e-05, |
| "loss": 0.337, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.3744835146302385, |
| "grad_norm": 1.1911766529083252, |
| "learning_rate": 1.7251117294881525e-05, |
| "loss": 0.3256, |
| "step": 32600 |
| }, |
| { |
| "epoch": 1.3786997217303314, |
| "grad_norm": 0.8328487873077393, |
| "learning_rate": 1.724268488068134e-05, |
| "loss": 0.3432, |
| "step": 32700 |
| }, |
| { |
| "epoch": 1.3829159288304242, |
| "grad_norm": 1.1673336029052734, |
| "learning_rate": 1.7234252466481153e-05, |
| "loss": 0.3405, |
| "step": 32800 |
| }, |
| { |
| "epoch": 1.3871321359305169, |
| "grad_norm": 0.9292609691619873, |
| "learning_rate": 1.722582005228097e-05, |
| "loss": 0.339, |
| "step": 32900 |
| }, |
| { |
| "epoch": 1.3913483430306097, |
| "grad_norm": 0.9854961037635803, |
| "learning_rate": 1.7217387638080785e-05, |
| "loss": 0.3358, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.3955645501307024, |
| "grad_norm": 0.8137360215187073, |
| "learning_rate": 1.72089552238806e-05, |
| "loss": 0.3453, |
| "step": 33100 |
| }, |
| { |
| "epoch": 1.3997807572307952, |
| "grad_norm": 1.2690805196762085, |
| "learning_rate": 1.7200522809680413e-05, |
| "loss": 0.3361, |
| "step": 33200 |
| }, |
| { |
| "epoch": 1.4039969643308878, |
| "grad_norm": 1.0749177932739258, |
| "learning_rate": 1.719209039548023e-05, |
| "loss": 0.3495, |
| "step": 33300 |
| }, |
| { |
| "epoch": 1.4082131714309807, |
| "grad_norm": 1.118641972541809, |
| "learning_rate": 1.718365798128004e-05, |
| "loss": 0.3387, |
| "step": 33400 |
| }, |
| { |
| "epoch": 1.4124293785310735, |
| "grad_norm": 1.184773564338684, |
| "learning_rate": 1.7175225567079857e-05, |
| "loss": 0.3272, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.4166455856311662, |
| "grad_norm": 0.9476341009140015, |
| "learning_rate": 1.716679315287967e-05, |
| "loss": 0.3544, |
| "step": 33600 |
| }, |
| { |
| "epoch": 1.420861792731259, |
| "grad_norm": 0.9392078518867493, |
| "learning_rate": 1.7158360738679485e-05, |
| "loss": 0.3405, |
| "step": 33700 |
| }, |
| { |
| "epoch": 1.4250779998313516, |
| "grad_norm": 1.2548301219940186, |
| "learning_rate": 1.7149928324479298e-05, |
| "loss": 0.3167, |
| "step": 33800 |
| }, |
| { |
| "epoch": 1.4292942069314445, |
| "grad_norm": 1.0808441638946533, |
| "learning_rate": 1.7141495910279113e-05, |
| "loss": 0.3395, |
| "step": 33900 |
| }, |
| { |
| "epoch": 1.4335104140315371, |
| "grad_norm": 0.8488920331001282, |
| "learning_rate": 1.713306349607893e-05, |
| "loss": 0.3252, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.43772662113163, |
| "grad_norm": 1.0139002799987793, |
| "learning_rate": 1.7124631081878745e-05, |
| "loss": 0.3359, |
| "step": 34100 |
| }, |
| { |
| "epoch": 1.4419428282317228, |
| "grad_norm": 1.288271427154541, |
| "learning_rate": 1.7116198667678557e-05, |
| "loss": 0.3312, |
| "step": 34200 |
| }, |
| { |
| "epoch": 1.4461590353318154, |
| "grad_norm": 0.9306642413139343, |
| "learning_rate": 1.7107766253478373e-05, |
| "loss": 0.336, |
| "step": 34300 |
| }, |
| { |
| "epoch": 1.4503752424319083, |
| "grad_norm": 1.1435469388961792, |
| "learning_rate": 1.7099333839278185e-05, |
| "loss": 0.3313, |
| "step": 34400 |
| }, |
| { |
| "epoch": 1.4545914495320011, |
| "grad_norm": 0.9129034280776978, |
| "learning_rate": 1.7090901425078e-05, |
| "loss": 0.3351, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.4588076566320938, |
| "grad_norm": 1.0107824802398682, |
| "learning_rate": 1.7082469010877817e-05, |
| "loss": 0.3357, |
| "step": 34600 |
| }, |
| { |
| "epoch": 1.4630238637321864, |
| "grad_norm": 1.1336674690246582, |
| "learning_rate": 1.707403659667763e-05, |
| "loss": 0.3274, |
| "step": 34700 |
| }, |
| { |
| "epoch": 1.4672400708322793, |
| "grad_norm": 1.1610007286071777, |
| "learning_rate": 1.7065604182477445e-05, |
| "loss": 0.3438, |
| "step": 34800 |
| }, |
| { |
| "epoch": 1.471456277932372, |
| "grad_norm": 1.0345039367675781, |
| "learning_rate": 1.7057171768277258e-05, |
| "loss": 0.3356, |
| "step": 34900 |
| }, |
| { |
| "epoch": 1.4756724850324647, |
| "grad_norm": 0.9744789600372314, |
| "learning_rate": 1.7048739354077073e-05, |
| "loss": 0.3346, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.4798886921325576, |
| "grad_norm": 1.4097639322280884, |
| "learning_rate": 1.7040306939876886e-05, |
| "loss": 0.3279, |
| "step": 35100 |
| }, |
| { |
| "epoch": 1.4841048992326504, |
| "grad_norm": 1.1328394412994385, |
| "learning_rate": 1.7031874525676705e-05, |
| "loss": 0.3351, |
| "step": 35200 |
| }, |
| { |
| "epoch": 1.488321106332743, |
| "grad_norm": 1.0193605422973633, |
| "learning_rate": 1.7023442111476517e-05, |
| "loss": 0.3286, |
| "step": 35300 |
| }, |
| { |
| "epoch": 1.4925373134328357, |
| "grad_norm": 0.8527234792709351, |
| "learning_rate": 1.7015009697276333e-05, |
| "loss": 0.326, |
| "step": 35400 |
| }, |
| { |
| "epoch": 1.4967535205329285, |
| "grad_norm": 0.8829551339149475, |
| "learning_rate": 1.7006577283076146e-05, |
| "loss": 0.3322, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.5009697276330214, |
| "grad_norm": 1.0889208316802979, |
| "learning_rate": 1.699814486887596e-05, |
| "loss": 0.3413, |
| "step": 35600 |
| }, |
| { |
| "epoch": 1.505185934733114, |
| "grad_norm": 1.0842567682266235, |
| "learning_rate": 1.6989712454675774e-05, |
| "loss": 0.337, |
| "step": 35700 |
| }, |
| { |
| "epoch": 1.5094021418332069, |
| "grad_norm": 1.0290625095367432, |
| "learning_rate": 1.698128004047559e-05, |
| "loss": 0.3292, |
| "step": 35800 |
| }, |
| { |
| "epoch": 1.5136183489332997, |
| "grad_norm": 0.9727330803871155, |
| "learning_rate": 1.6972847626275405e-05, |
| "loss": 0.3388, |
| "step": 35900 |
| }, |
| { |
| "epoch": 1.5178345560333923, |
| "grad_norm": 0.9701403975486755, |
| "learning_rate": 1.6964415212075218e-05, |
| "loss": 0.3211, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.5178345560333923, |
| "eval_bleu": 10.7275, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.2537, |
| "eval_comet": 0.5414, |
| "eval_gen_len": 20.8726, |
| "eval_loss": 0.34036117792129517, |
| "eval_runtime": 1386.4377, |
| "eval_samples_per_second": 34.214, |
| "eval_steps_per_second": 2.139, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.522050763133485, |
| "grad_norm": 1.1016792058944702, |
| "learning_rate": 1.6955982797875033e-05, |
| "loss": 0.3267, |
| "step": 36100 |
| }, |
| { |
| "epoch": 1.5262669702335778, |
| "grad_norm": 0.9762911796569824, |
| "learning_rate": 1.6947550383674846e-05, |
| "loss": 0.3191, |
| "step": 36200 |
| }, |
| { |
| "epoch": 1.5304831773336707, |
| "grad_norm": 0.9911622405052185, |
| "learning_rate": 1.693911796947466e-05, |
| "loss": 0.3428, |
| "step": 36300 |
| }, |
| { |
| "epoch": 1.5346993844337633, |
| "grad_norm": 1.3638495206832886, |
| "learning_rate": 1.6930685555274477e-05, |
| "loss": 0.3353, |
| "step": 36400 |
| }, |
| { |
| "epoch": 1.5389155915338562, |
| "grad_norm": 0.9522203803062439, |
| "learning_rate": 1.6922253141074293e-05, |
| "loss": 0.3368, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.543131798633949, |
| "grad_norm": 0.9717823266983032, |
| "learning_rate": 1.6913820726874106e-05, |
| "loss": 0.3382, |
| "step": 36600 |
| }, |
| { |
| "epoch": 1.5473480057340416, |
| "grad_norm": 1.055655837059021, |
| "learning_rate": 1.690538831267392e-05, |
| "loss": 0.3312, |
| "step": 36700 |
| }, |
| { |
| "epoch": 1.5515642128341343, |
| "grad_norm": 1.0646960735321045, |
| "learning_rate": 1.6896955898473734e-05, |
| "loss": 0.3249, |
| "step": 36800 |
| }, |
| { |
| "epoch": 1.5557804199342273, |
| "grad_norm": 1.2053914070129395, |
| "learning_rate": 1.688852348427355e-05, |
| "loss": 0.3305, |
| "step": 36900 |
| }, |
| { |
| "epoch": 1.55999662703432, |
| "grad_norm": 1.0148818492889404, |
| "learning_rate": 1.6880091070073362e-05, |
| "loss": 0.3328, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.5642128341344126, |
| "grad_norm": 1.2227891683578491, |
| "learning_rate": 1.6871658655873178e-05, |
| "loss": 0.3327, |
| "step": 37100 |
| }, |
| { |
| "epoch": 1.5684290412345054, |
| "grad_norm": 1.0082377195358276, |
| "learning_rate": 1.6863226241672993e-05, |
| "loss": 0.328, |
| "step": 37200 |
| }, |
| { |
| "epoch": 1.5726452483345983, |
| "grad_norm": 0.9407429695129395, |
| "learning_rate": 1.6854793827472806e-05, |
| "loss": 0.3401, |
| "step": 37300 |
| }, |
| { |
| "epoch": 1.576861455434691, |
| "grad_norm": 1.115344762802124, |
| "learning_rate": 1.684636141327262e-05, |
| "loss": 0.3401, |
| "step": 37400 |
| }, |
| { |
| "epoch": 1.5810776625347835, |
| "grad_norm": 1.064095377922058, |
| "learning_rate": 1.6837928999072434e-05, |
| "loss": 0.3166, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.5852938696348766, |
| "grad_norm": 1.0824617147445679, |
| "learning_rate": 1.682949658487225e-05, |
| "loss": 0.312, |
| "step": 37600 |
| }, |
| { |
| "epoch": 1.5895100767349692, |
| "grad_norm": 1.1635481119155884, |
| "learning_rate": 1.6821064170672066e-05, |
| "loss": 0.3342, |
| "step": 37700 |
| }, |
| { |
| "epoch": 1.5937262838350619, |
| "grad_norm": 1.2488656044006348, |
| "learning_rate": 1.681263175647188e-05, |
| "loss": 0.3358, |
| "step": 37800 |
| }, |
| { |
| "epoch": 1.5979424909351547, |
| "grad_norm": 1.297699213027954, |
| "learning_rate": 1.6804199342271694e-05, |
| "loss": 0.3383, |
| "step": 37900 |
| }, |
| { |
| "epoch": 1.6021586980352476, |
| "grad_norm": 1.4961750507354736, |
| "learning_rate": 1.679576692807151e-05, |
| "loss": 0.3302, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.6063749051353402, |
| "grad_norm": 1.1194815635681152, |
| "learning_rate": 1.6787334513871322e-05, |
| "loss": 0.3234, |
| "step": 38100 |
| }, |
| { |
| "epoch": 1.610591112235433, |
| "grad_norm": 0.9607496857643127, |
| "learning_rate": 1.6778902099671138e-05, |
| "loss": 0.3364, |
| "step": 38200 |
| }, |
| { |
| "epoch": 1.614807319335526, |
| "grad_norm": 1.6336873769760132, |
| "learning_rate": 1.677046968547095e-05, |
| "loss": 0.3416, |
| "step": 38300 |
| }, |
| { |
| "epoch": 1.6190235264356185, |
| "grad_norm": 1.0101227760314941, |
| "learning_rate": 1.6762037271270766e-05, |
| "loss": 0.3269, |
| "step": 38400 |
| }, |
| { |
| "epoch": 1.6232397335357112, |
| "grad_norm": 0.7373623251914978, |
| "learning_rate": 1.6753604857070582e-05, |
| "loss": 0.3485, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.627455940635804, |
| "grad_norm": 0.9564256072044373, |
| "learning_rate": 1.6745172442870394e-05, |
| "loss": 0.3302, |
| "step": 38600 |
| }, |
| { |
| "epoch": 1.6316721477358969, |
| "grad_norm": 1.0523947477340698, |
| "learning_rate": 1.673674002867021e-05, |
| "loss": 0.3379, |
| "step": 38700 |
| }, |
| { |
| "epoch": 1.6358883548359895, |
| "grad_norm": 1.6321437358856201, |
| "learning_rate": 1.6728307614470026e-05, |
| "loss": 0.3287, |
| "step": 38800 |
| }, |
| { |
| "epoch": 1.6401045619360823, |
| "grad_norm": 0.7536235451698303, |
| "learning_rate": 1.6719875200269838e-05, |
| "loss": 0.3215, |
| "step": 38900 |
| }, |
| { |
| "epoch": 1.6443207690361752, |
| "grad_norm": 1.126569390296936, |
| "learning_rate": 1.6711442786069654e-05, |
| "loss": 0.3361, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.6485369761362678, |
| "grad_norm": 1.2291463613510132, |
| "learning_rate": 1.670301037186947e-05, |
| "loss": 0.338, |
| "step": 39100 |
| }, |
| { |
| "epoch": 1.6527531832363604, |
| "grad_norm": 1.1694891452789307, |
| "learning_rate": 1.6694577957669282e-05, |
| "loss": 0.3318, |
| "step": 39200 |
| }, |
| { |
| "epoch": 1.6569693903364533, |
| "grad_norm": 1.023356318473816, |
| "learning_rate": 1.6686145543469098e-05, |
| "loss": 0.3269, |
| "step": 39300 |
| }, |
| { |
| "epoch": 1.6611855974365461, |
| "grad_norm": 1.048325777053833, |
| "learning_rate": 1.667771312926891e-05, |
| "loss": 0.3208, |
| "step": 39400 |
| }, |
| { |
| "epoch": 1.6654018045366388, |
| "grad_norm": 0.9685364961624146, |
| "learning_rate": 1.6669280715068726e-05, |
| "loss": 0.3311, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.6696180116367316, |
| "grad_norm": 1.1764518022537231, |
| "learning_rate": 1.666084830086854e-05, |
| "loss": 0.3387, |
| "step": 39600 |
| }, |
| { |
| "epoch": 1.6738342187368245, |
| "grad_norm": 0.9446860551834106, |
| "learning_rate": 1.6652415886668354e-05, |
| "loss": 0.3387, |
| "step": 39700 |
| }, |
| { |
| "epoch": 1.678050425836917, |
| "grad_norm": 0.9704703092575073, |
| "learning_rate": 1.6643983472468167e-05, |
| "loss": 0.333, |
| "step": 39800 |
| }, |
| { |
| "epoch": 1.6822666329370097, |
| "grad_norm": 1.2208021879196167, |
| "learning_rate": 1.6635551058267982e-05, |
| "loss": 0.3335, |
| "step": 39900 |
| }, |
| { |
| "epoch": 1.6864828400371026, |
| "grad_norm": 0.8652202486991882, |
| "learning_rate": 1.6627118644067798e-05, |
| "loss": 0.3161, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.6864828400371026, |
| "eval_bleu": 10.8027, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.334, |
| "eval_comet": 0.5405, |
| "eval_gen_len": 20.8782, |
| "eval_loss": 0.3363133370876312, |
| "eval_runtime": 1388.0432, |
| "eval_samples_per_second": 34.174, |
| "eval_steps_per_second": 2.136, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.6906990471371954, |
| "grad_norm": 1.0811352729797363, |
| "learning_rate": 1.6618686229867614e-05, |
| "loss": 0.3193, |
| "step": 40100 |
| }, |
| { |
| "epoch": 1.694915254237288, |
| "grad_norm": 1.07560396194458, |
| "learning_rate": 1.6610253815667426e-05, |
| "loss": 0.3316, |
| "step": 40200 |
| }, |
| { |
| "epoch": 1.699131461337381, |
| "grad_norm": 1.0102812051773071, |
| "learning_rate": 1.6601821401467242e-05, |
| "loss": 0.3391, |
| "step": 40300 |
| }, |
| { |
| "epoch": 1.7033476684374738, |
| "grad_norm": 1.0425939559936523, |
| "learning_rate": 1.6593388987267058e-05, |
| "loss": 0.3322, |
| "step": 40400 |
| }, |
| { |
| "epoch": 1.7075638755375664, |
| "grad_norm": 1.0230315923690796, |
| "learning_rate": 1.658495657306687e-05, |
| "loss": 0.3282, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.711780082637659, |
| "grad_norm": 1.18186616897583, |
| "learning_rate": 1.6576524158866686e-05, |
| "loss": 0.3468, |
| "step": 40600 |
| }, |
| { |
| "epoch": 1.7159962897377519, |
| "grad_norm": 0.9138390421867371, |
| "learning_rate": 1.65680917446665e-05, |
| "loss": 0.3321, |
| "step": 40700 |
| }, |
| { |
| "epoch": 1.7202124968378447, |
| "grad_norm": 0.9740304350852966, |
| "learning_rate": 1.6559659330466314e-05, |
| "loss": 0.3218, |
| "step": 40800 |
| }, |
| { |
| "epoch": 1.7244287039379373, |
| "grad_norm": 1.2450716495513916, |
| "learning_rate": 1.6551226916266127e-05, |
| "loss": 0.3257, |
| "step": 40900 |
| }, |
| { |
| "epoch": 1.7286449110380302, |
| "grad_norm": 1.093040943145752, |
| "learning_rate": 1.6542794502065942e-05, |
| "loss": 0.3245, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.732861118138123, |
| "grad_norm": 0.9208073616027832, |
| "learning_rate": 1.6534362087865755e-05, |
| "loss": 0.3252, |
| "step": 41100 |
| }, |
| { |
| "epoch": 1.7370773252382157, |
| "grad_norm": 0.7854002714157104, |
| "learning_rate": 1.6525929673665574e-05, |
| "loss": 0.3304, |
| "step": 41200 |
| }, |
| { |
| "epoch": 1.7412935323383083, |
| "grad_norm": 0.9421478509902954, |
| "learning_rate": 1.6517497259465386e-05, |
| "loss": 0.3218, |
| "step": 41300 |
| }, |
| { |
| "epoch": 1.7455097394384014, |
| "grad_norm": 1.282019853591919, |
| "learning_rate": 1.6509064845265202e-05, |
| "loss": 0.3224, |
| "step": 41400 |
| }, |
| { |
| "epoch": 1.749725946538494, |
| "grad_norm": 1.2438828945159912, |
| "learning_rate": 1.6500632431065014e-05, |
| "loss": 0.3343, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.7539421536385866, |
| "grad_norm": 1.0174343585968018, |
| "learning_rate": 1.649220001686483e-05, |
| "loss": 0.3212, |
| "step": 41600 |
| }, |
| { |
| "epoch": 1.7581583607386795, |
| "grad_norm": 1.1391730308532715, |
| "learning_rate": 1.6483767602664643e-05, |
| "loss": 0.3145, |
| "step": 41700 |
| }, |
| { |
| "epoch": 1.7623745678387723, |
| "grad_norm": 1.1963273286819458, |
| "learning_rate": 1.647533518846446e-05, |
| "loss": 0.3153, |
| "step": 41800 |
| }, |
| { |
| "epoch": 1.766590774938865, |
| "grad_norm": 1.0125856399536133, |
| "learning_rate": 1.6466902774264274e-05, |
| "loss": 0.3251, |
| "step": 41900 |
| }, |
| { |
| "epoch": 1.7708069820389576, |
| "grad_norm": 1.2673448324203491, |
| "learning_rate": 1.6458470360064087e-05, |
| "loss": 0.3231, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.7750231891390507, |
| "grad_norm": 0.9198032021522522, |
| "learning_rate": 1.6450037945863902e-05, |
| "loss": 0.3332, |
| "step": 42100 |
| }, |
| { |
| "epoch": 1.7792393962391433, |
| "grad_norm": 1.0789839029312134, |
| "learning_rate": 1.6441605531663715e-05, |
| "loss": 0.3289, |
| "step": 42200 |
| }, |
| { |
| "epoch": 1.783455603339236, |
| "grad_norm": 1.2579984664916992, |
| "learning_rate": 1.643317311746353e-05, |
| "loss": 0.3166, |
| "step": 42300 |
| }, |
| { |
| "epoch": 1.7876718104393288, |
| "grad_norm": 1.1121423244476318, |
| "learning_rate": 1.6424740703263346e-05, |
| "loss": 0.3417, |
| "step": 42400 |
| }, |
| { |
| "epoch": 1.7918880175394216, |
| "grad_norm": 0.8245619535446167, |
| "learning_rate": 1.6416308289063162e-05, |
| "loss": 0.3274, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.7961042246395142, |
| "grad_norm": 1.3833199739456177, |
| "learning_rate": 1.6407875874862975e-05, |
| "loss": 0.3228, |
| "step": 42600 |
| }, |
| { |
| "epoch": 1.800320431739607, |
| "grad_norm": 1.387864112854004, |
| "learning_rate": 1.639944346066279e-05, |
| "loss": 0.3373, |
| "step": 42700 |
| }, |
| { |
| "epoch": 1.8045366388397, |
| "grad_norm": 1.053475260734558, |
| "learning_rate": 1.6391011046462603e-05, |
| "loss": 0.3292, |
| "step": 42800 |
| }, |
| { |
| "epoch": 1.8087528459397926, |
| "grad_norm": 1.1653027534484863, |
| "learning_rate": 1.638257863226242e-05, |
| "loss": 0.3243, |
| "step": 42900 |
| }, |
| { |
| "epoch": 1.8129690530398852, |
| "grad_norm": 0.7447928786277771, |
| "learning_rate": 1.637414621806223e-05, |
| "loss": 0.3211, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.817185260139978, |
| "grad_norm": 1.282525897026062, |
| "learning_rate": 1.6365713803862047e-05, |
| "loss": 0.3356, |
| "step": 43100 |
| }, |
| { |
| "epoch": 1.821401467240071, |
| "grad_norm": 0.9279470443725586, |
| "learning_rate": 1.6357281389661862e-05, |
| "loss": 0.3316, |
| "step": 43200 |
| }, |
| { |
| "epoch": 1.8256176743401635, |
| "grad_norm": 1.2694237232208252, |
| "learning_rate": 1.6348848975461675e-05, |
| "loss": 0.3219, |
| "step": 43300 |
| }, |
| { |
| "epoch": 1.8298338814402564, |
| "grad_norm": 0.9450383186340332, |
| "learning_rate": 1.634041656126149e-05, |
| "loss": 0.3309, |
| "step": 43400 |
| }, |
| { |
| "epoch": 1.8340500885403492, |
| "grad_norm": 0.7561673521995544, |
| "learning_rate": 1.6331984147061306e-05, |
| "loss": 0.3241, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.8382662956404419, |
| "grad_norm": 1.1571000814437866, |
| "learning_rate": 1.632355173286112e-05, |
| "loss": 0.3125, |
| "step": 43600 |
| }, |
| { |
| "epoch": 1.8424825027405345, |
| "grad_norm": 0.931656002998352, |
| "learning_rate": 1.6315119318660935e-05, |
| "loss": 0.3318, |
| "step": 43700 |
| }, |
| { |
| "epoch": 1.8466987098406273, |
| "grad_norm": 0.8409183025360107, |
| "learning_rate": 1.630668690446075e-05, |
| "loss": 0.3288, |
| "step": 43800 |
| }, |
| { |
| "epoch": 1.8509149169407202, |
| "grad_norm": 0.9442520141601562, |
| "learning_rate": 1.6298254490260563e-05, |
| "loss": 0.3215, |
| "step": 43900 |
| }, |
| { |
| "epoch": 1.8551311240408128, |
| "grad_norm": 0.9926084876060486, |
| "learning_rate": 1.628982207606038e-05, |
| "loss": 0.3248, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.8551311240408128, |
| "eval_bleu": 10.8661, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.5077, |
| "eval_comet": 0.5417, |
| "eval_gen_len": 20.8761, |
| "eval_loss": 0.33092769980430603, |
| "eval_runtime": 1383.7975, |
| "eval_samples_per_second": 34.279, |
| "eval_steps_per_second": 2.143, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.8593473311409057, |
| "grad_norm": 1.0377424955368042, |
| "learning_rate": 1.628138966186019e-05, |
| "loss": 0.345, |
| "step": 44100 |
| }, |
| { |
| "epoch": 1.8635635382409985, |
| "grad_norm": 0.848667562007904, |
| "learning_rate": 1.6272957247660007e-05, |
| "loss": 0.3422, |
| "step": 44200 |
| }, |
| { |
| "epoch": 1.8677797453410911, |
| "grad_norm": 0.9515209197998047, |
| "learning_rate": 1.626452483345982e-05, |
| "loss": 0.3259, |
| "step": 44300 |
| }, |
| { |
| "epoch": 1.8719959524411838, |
| "grad_norm": 1.2481857538223267, |
| "learning_rate": 1.6256092419259635e-05, |
| "loss": 0.3302, |
| "step": 44400 |
| }, |
| { |
| "epoch": 1.8762121595412766, |
| "grad_norm": 0.7382723093032837, |
| "learning_rate": 1.624766000505945e-05, |
| "loss": 0.3171, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.8804283666413695, |
| "grad_norm": 1.148926854133606, |
| "learning_rate": 1.6239227590859263e-05, |
| "loss": 0.3227, |
| "step": 44600 |
| }, |
| { |
| "epoch": 1.884644573741462, |
| "grad_norm": 0.9098696112632751, |
| "learning_rate": 1.623079517665908e-05, |
| "loss": 0.3117, |
| "step": 44700 |
| }, |
| { |
| "epoch": 1.888860780841555, |
| "grad_norm": 1.4447691440582275, |
| "learning_rate": 1.6222362762458895e-05, |
| "loss": 0.3316, |
| "step": 44800 |
| }, |
| { |
| "epoch": 1.8930769879416478, |
| "grad_norm": 0.9378564953804016, |
| "learning_rate": 1.6213930348258707e-05, |
| "loss": 0.3345, |
| "step": 44900 |
| }, |
| { |
| "epoch": 1.8972931950417404, |
| "grad_norm": 1.1684097051620483, |
| "learning_rate": 1.6205497934058523e-05, |
| "loss": 0.3177, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.901509402141833, |
| "grad_norm": 1.1021177768707275, |
| "learning_rate": 1.619706551985834e-05, |
| "loss": 0.3355, |
| "step": 45100 |
| }, |
| { |
| "epoch": 1.905725609241926, |
| "grad_norm": 1.5381673574447632, |
| "learning_rate": 1.618863310565815e-05, |
| "loss": 0.3279, |
| "step": 45200 |
| }, |
| { |
| "epoch": 1.9099418163420188, |
| "grad_norm": 1.1123120784759521, |
| "learning_rate": 1.6180200691457967e-05, |
| "loss": 0.3231, |
| "step": 45300 |
| }, |
| { |
| "epoch": 1.9141580234421114, |
| "grad_norm": 0.9993001818656921, |
| "learning_rate": 1.617176827725778e-05, |
| "loss": 0.3179, |
| "step": 45400 |
| }, |
| { |
| "epoch": 1.9183742305422042, |
| "grad_norm": 1.2264147996902466, |
| "learning_rate": 1.6163335863057595e-05, |
| "loss": 0.3189, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1.922590437642297, |
| "grad_norm": 0.9310553669929504, |
| "learning_rate": 1.6154903448857407e-05, |
| "loss": 0.3325, |
| "step": 45600 |
| }, |
| { |
| "epoch": 1.9268066447423897, |
| "grad_norm": 0.8175243139266968, |
| "learning_rate": 1.6146471034657223e-05, |
| "loss": 0.3304, |
| "step": 45700 |
| }, |
| { |
| "epoch": 1.9310228518424823, |
| "grad_norm": 1.0943084955215454, |
| "learning_rate": 1.6138038620457035e-05, |
| "loss": 0.3283, |
| "step": 45800 |
| }, |
| { |
| "epoch": 1.9352390589425754, |
| "grad_norm": 1.1721843481063843, |
| "learning_rate": 1.6129606206256855e-05, |
| "loss": 0.3369, |
| "step": 45900 |
| }, |
| { |
| "epoch": 1.939455266042668, |
| "grad_norm": 1.3300516605377197, |
| "learning_rate": 1.6121173792056667e-05, |
| "loss": 0.325, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.9436714731427607, |
| "grad_norm": 0.9788861274719238, |
| "learning_rate": 1.6112741377856483e-05, |
| "loss": 0.3213, |
| "step": 46100 |
| }, |
| { |
| "epoch": 1.9478876802428535, |
| "grad_norm": 0.9827736616134644, |
| "learning_rate": 1.6104308963656295e-05, |
| "loss": 0.3232, |
| "step": 46200 |
| }, |
| { |
| "epoch": 1.9521038873429464, |
| "grad_norm": 1.8439685106277466, |
| "learning_rate": 1.609587654945611e-05, |
| "loss": 0.3343, |
| "step": 46300 |
| }, |
| { |
| "epoch": 1.956320094443039, |
| "grad_norm": 1.3670806884765625, |
| "learning_rate": 1.6087444135255927e-05, |
| "loss": 0.3207, |
| "step": 46400 |
| }, |
| { |
| "epoch": 1.9605363015431319, |
| "grad_norm": 1.2202095985412598, |
| "learning_rate": 1.607901172105574e-05, |
| "loss": 0.3179, |
| "step": 46500 |
| }, |
| { |
| "epoch": 1.9647525086432247, |
| "grad_norm": 0.9660270810127258, |
| "learning_rate": 1.6070579306855555e-05, |
| "loss": 0.3305, |
| "step": 46600 |
| }, |
| { |
| "epoch": 1.9689687157433173, |
| "grad_norm": 0.8134570121765137, |
| "learning_rate": 1.6062146892655367e-05, |
| "loss": 0.3276, |
| "step": 46700 |
| }, |
| { |
| "epoch": 1.97318492284341, |
| "grad_norm": 0.957103431224823, |
| "learning_rate": 1.6053714478455183e-05, |
| "loss": 0.3262, |
| "step": 46800 |
| }, |
| { |
| "epoch": 1.9774011299435028, |
| "grad_norm": 0.9770568609237671, |
| "learning_rate": 1.6045282064254996e-05, |
| "loss": 0.334, |
| "step": 46900 |
| }, |
| { |
| "epoch": 1.9816173370435957, |
| "grad_norm": 0.9361381530761719, |
| "learning_rate": 1.603684965005481e-05, |
| "loss": 0.3256, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1.9858335441436883, |
| "grad_norm": 0.9023398756980896, |
| "learning_rate": 1.6028417235854627e-05, |
| "loss": 0.3176, |
| "step": 47100 |
| }, |
| { |
| "epoch": 1.9900497512437811, |
| "grad_norm": 1.149931788444519, |
| "learning_rate": 1.6019984821654443e-05, |
| "loss": 0.3158, |
| "step": 47200 |
| }, |
| { |
| "epoch": 1.994265958343874, |
| "grad_norm": 1.1268340349197388, |
| "learning_rate": 1.6011552407454255e-05, |
| "loss": 0.3266, |
| "step": 47300 |
| }, |
| { |
| "epoch": 1.9984821654439666, |
| "grad_norm": 1.1518200635910034, |
| "learning_rate": 1.600311999325407e-05, |
| "loss": 0.3279, |
| "step": 47400 |
| }, |
| { |
| "epoch": 2.0026983725440592, |
| "grad_norm": 0.8826459646224976, |
| "learning_rate": 1.5994687579053883e-05, |
| "loss": 0.3068, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.0069145796441523, |
| "grad_norm": 3.155869722366333, |
| "learning_rate": 1.59862551648537e-05, |
| "loss": 0.2818, |
| "step": 47600 |
| }, |
| { |
| "epoch": 2.011130786744245, |
| "grad_norm": 1.0178985595703125, |
| "learning_rate": 1.597782275065351e-05, |
| "loss": 0.3089, |
| "step": 47700 |
| }, |
| { |
| "epoch": 2.0153469938443376, |
| "grad_norm": 0.9122896790504456, |
| "learning_rate": 1.5969390336453327e-05, |
| "loss": 0.3009, |
| "step": 47800 |
| }, |
| { |
| "epoch": 2.01956320094443, |
| "grad_norm": 0.8784326910972595, |
| "learning_rate": 1.5960957922253143e-05, |
| "loss": 0.2948, |
| "step": 47900 |
| }, |
| { |
| "epoch": 2.0237794080445233, |
| "grad_norm": 1.5919113159179688, |
| "learning_rate": 1.5952525508052956e-05, |
| "loss": 0.2849, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.0237794080445233, |
| "eval_bleu": 10.9104, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.5186, |
| "eval_comet": 0.5415, |
| "eval_gen_len": 20.8749, |
| "eval_loss": 0.3277857005596161, |
| "eval_runtime": 1380.1121, |
| "eval_samples_per_second": 34.37, |
| "eval_steps_per_second": 2.148, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.027995615144616, |
| "grad_norm": 1.1097805500030518, |
| "learning_rate": 1.594409309385277e-05, |
| "loss": 0.2929, |
| "step": 48100 |
| }, |
| { |
| "epoch": 2.0322118222447085, |
| "grad_norm": 1.0113049745559692, |
| "learning_rate": 1.5935660679652584e-05, |
| "loss": 0.2906, |
| "step": 48200 |
| }, |
| { |
| "epoch": 2.0364280293448016, |
| "grad_norm": 0.9952514171600342, |
| "learning_rate": 1.5927228265452403e-05, |
| "loss": 0.2948, |
| "step": 48300 |
| }, |
| { |
| "epoch": 2.0406442364448942, |
| "grad_norm": 0.9028350114822388, |
| "learning_rate": 1.5918795851252215e-05, |
| "loss": 0.3016, |
| "step": 48400 |
| }, |
| { |
| "epoch": 2.044860443544987, |
| "grad_norm": 1.228007197380066, |
| "learning_rate": 1.591036343705203e-05, |
| "loss": 0.2829, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.0490766506450795, |
| "grad_norm": 0.9562857747077942, |
| "learning_rate": 1.5901931022851844e-05, |
| "loss": 0.2995, |
| "step": 48600 |
| }, |
| { |
| "epoch": 2.0532928577451726, |
| "grad_norm": 0.7891142964363098, |
| "learning_rate": 1.589349860865166e-05, |
| "loss": 0.3013, |
| "step": 48700 |
| }, |
| { |
| "epoch": 2.057509064845265, |
| "grad_norm": 0.8914188742637634, |
| "learning_rate": 1.588506619445147e-05, |
| "loss": 0.3034, |
| "step": 48800 |
| }, |
| { |
| "epoch": 2.061725271945358, |
| "grad_norm": 1.180168867111206, |
| "learning_rate": 1.5876633780251287e-05, |
| "loss": 0.3043, |
| "step": 48900 |
| }, |
| { |
| "epoch": 2.065941479045451, |
| "grad_norm": 1.1622966527938843, |
| "learning_rate": 1.58682013660511e-05, |
| "loss": 0.2952, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.0701576861455435, |
| "grad_norm": 1.1493375301361084, |
| "learning_rate": 1.5859768951850916e-05, |
| "loss": 0.3084, |
| "step": 49100 |
| }, |
| { |
| "epoch": 2.074373893245636, |
| "grad_norm": 1.2714732885360718, |
| "learning_rate": 1.585133653765073e-05, |
| "loss": 0.294, |
| "step": 49200 |
| }, |
| { |
| "epoch": 2.0785901003457288, |
| "grad_norm": 0.6899680495262146, |
| "learning_rate": 1.5842904123450544e-05, |
| "loss": 0.2936, |
| "step": 49300 |
| }, |
| { |
| "epoch": 2.082806307445822, |
| "grad_norm": 0.9679650068283081, |
| "learning_rate": 1.583447170925036e-05, |
| "loss": 0.296, |
| "step": 49400 |
| }, |
| { |
| "epoch": 2.0870225145459145, |
| "grad_norm": 0.8275384902954102, |
| "learning_rate": 1.5826039295050175e-05, |
| "loss": 0.3055, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.091238721646007, |
| "grad_norm": 1.0451972484588623, |
| "learning_rate": 1.5817606880849988e-05, |
| "loss": 0.2983, |
| "step": 49600 |
| }, |
| { |
| "epoch": 2.0954549287461, |
| "grad_norm": 0.8411651253700256, |
| "learning_rate": 1.5809174466649804e-05, |
| "loss": 0.3067, |
| "step": 49700 |
| }, |
| { |
| "epoch": 2.099671135846193, |
| "grad_norm": 1.1985809803009033, |
| "learning_rate": 1.580074205244962e-05, |
| "loss": 0.2959, |
| "step": 49800 |
| }, |
| { |
| "epoch": 2.1038873429462854, |
| "grad_norm": 1.143189787864685, |
| "learning_rate": 1.5792309638249432e-05, |
| "loss": 0.3003, |
| "step": 49900 |
| }, |
| { |
| "epoch": 2.108103550046378, |
| "grad_norm": 1.1173498630523682, |
| "learning_rate": 1.5783877224049248e-05, |
| "loss": 0.2941, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.112319757146471, |
| "grad_norm": 1.442973017692566, |
| "learning_rate": 1.577544480984906e-05, |
| "loss": 0.2909, |
| "step": 50100 |
| }, |
| { |
| "epoch": 2.1165359642465638, |
| "grad_norm": 2.1009395122528076, |
| "learning_rate": 1.5767012395648876e-05, |
| "loss": 0.2989, |
| "step": 50200 |
| }, |
| { |
| "epoch": 2.1207521713466564, |
| "grad_norm": 0.8531181216239929, |
| "learning_rate": 1.5758579981448688e-05, |
| "loss": 0.2897, |
| "step": 50300 |
| }, |
| { |
| "epoch": 2.1249683784467495, |
| "grad_norm": 0.9189686179161072, |
| "learning_rate": 1.5750147567248504e-05, |
| "loss": 0.3031, |
| "step": 50400 |
| }, |
| { |
| "epoch": 2.129184585546842, |
| "grad_norm": 1.1998023986816406, |
| "learning_rate": 1.574171515304832e-05, |
| "loss": 0.3012, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.1334007926469347, |
| "grad_norm": 1.015147089958191, |
| "learning_rate": 1.5733282738848132e-05, |
| "loss": 0.3005, |
| "step": 50600 |
| }, |
| { |
| "epoch": 2.137616999747028, |
| "grad_norm": 0.907986581325531, |
| "learning_rate": 1.5724850324647948e-05, |
| "loss": 0.293, |
| "step": 50700 |
| }, |
| { |
| "epoch": 2.1418332068471204, |
| "grad_norm": 0.7289599776268005, |
| "learning_rate": 1.5716417910447764e-05, |
| "loss": 0.2911, |
| "step": 50800 |
| }, |
| { |
| "epoch": 2.146049413947213, |
| "grad_norm": 0.9426230788230896, |
| "learning_rate": 1.5707985496247576e-05, |
| "loss": 0.2916, |
| "step": 50900 |
| }, |
| { |
| "epoch": 2.1502656210473057, |
| "grad_norm": 0.7667168378829956, |
| "learning_rate": 1.5699553082047392e-05, |
| "loss": 0.2903, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.1544818281473987, |
| "grad_norm": 1.0113255977630615, |
| "learning_rate": 1.5691120667847208e-05, |
| "loss": 0.2982, |
| "step": 51100 |
| }, |
| { |
| "epoch": 2.1586980352474914, |
| "grad_norm": 1.3701939582824707, |
| "learning_rate": 1.568268825364702e-05, |
| "loss": 0.3025, |
| "step": 51200 |
| }, |
| { |
| "epoch": 2.162914242347584, |
| "grad_norm": 0.9606438875198364, |
| "learning_rate": 1.5674255839446836e-05, |
| "loss": 0.2999, |
| "step": 51300 |
| }, |
| { |
| "epoch": 2.167130449447677, |
| "grad_norm": 1.252716064453125, |
| "learning_rate": 1.5665823425246648e-05, |
| "loss": 0.2951, |
| "step": 51400 |
| }, |
| { |
| "epoch": 2.1713466565477697, |
| "grad_norm": 1.0813300609588623, |
| "learning_rate": 1.5657391011046464e-05, |
| "loss": 0.3044, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.1755628636478623, |
| "grad_norm": 1.3432576656341553, |
| "learning_rate": 1.5648958596846276e-05, |
| "loss": 0.2975, |
| "step": 51600 |
| }, |
| { |
| "epoch": 2.179779070747955, |
| "grad_norm": 1.1878820657730103, |
| "learning_rate": 1.5640526182646092e-05, |
| "loss": 0.2983, |
| "step": 51700 |
| }, |
| { |
| "epoch": 2.183995277848048, |
| "grad_norm": 0.9856054186820984, |
| "learning_rate": 1.5632093768445908e-05, |
| "loss": 0.2989, |
| "step": 51800 |
| }, |
| { |
| "epoch": 2.1882114849481407, |
| "grad_norm": 1.0320172309875488, |
| "learning_rate": 1.5623661354245724e-05, |
| "loss": 0.3075, |
| "step": 51900 |
| }, |
| { |
| "epoch": 2.1924276920482333, |
| "grad_norm": 1.0002996921539307, |
| "learning_rate": 1.5615228940045536e-05, |
| "loss": 0.2842, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.1924276920482333, |
| "eval_bleu": 11.0893, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.7186, |
| "eval_comet": 0.5425, |
| "eval_gen_len": 20.875, |
| "eval_loss": 0.3242399990558624, |
| "eval_runtime": 1379.6714, |
| "eval_samples_per_second": 34.381, |
| "eval_steps_per_second": 2.149, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.1966438991483264, |
| "grad_norm": 1.0548425912857056, |
| "learning_rate": 1.5606796525845352e-05, |
| "loss": 0.2999, |
| "step": 52100 |
| }, |
| { |
| "epoch": 2.200860106248419, |
| "grad_norm": 0.9152701497077942, |
| "learning_rate": 1.5598364111645164e-05, |
| "loss": 0.295, |
| "step": 52200 |
| }, |
| { |
| "epoch": 2.2050763133485116, |
| "grad_norm": 0.6967754364013672, |
| "learning_rate": 1.558993169744498e-05, |
| "loss": 0.3083, |
| "step": 52300 |
| }, |
| { |
| "epoch": 2.2092925204486042, |
| "grad_norm": 1.1519029140472412, |
| "learning_rate": 1.5581499283244796e-05, |
| "loss": 0.2902, |
| "step": 52400 |
| }, |
| { |
| "epoch": 2.2135087275486973, |
| "grad_norm": 1.1648145914077759, |
| "learning_rate": 1.5573066869044608e-05, |
| "loss": 0.3046, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.21772493464879, |
| "grad_norm": 1.3708479404449463, |
| "learning_rate": 1.5564634454844424e-05, |
| "loss": 0.2916, |
| "step": 52600 |
| }, |
| { |
| "epoch": 2.2219411417488826, |
| "grad_norm": 1.298677682876587, |
| "learning_rate": 1.5556202040644236e-05, |
| "loss": 0.2973, |
| "step": 52700 |
| }, |
| { |
| "epoch": 2.2261573488489756, |
| "grad_norm": 0.917349100112915, |
| "learning_rate": 1.5547769626444052e-05, |
| "loss": 0.296, |
| "step": 52800 |
| }, |
| { |
| "epoch": 2.2303735559490683, |
| "grad_norm": 0.9578775763511658, |
| "learning_rate": 1.5539337212243865e-05, |
| "loss": 0.2904, |
| "step": 52900 |
| }, |
| { |
| "epoch": 2.234589763049161, |
| "grad_norm": 1.1845663785934448, |
| "learning_rate": 1.5530904798043684e-05, |
| "loss": 0.2993, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.2388059701492535, |
| "grad_norm": 0.9572575688362122, |
| "learning_rate": 1.5522472383843496e-05, |
| "loss": 0.3096, |
| "step": 53100 |
| }, |
| { |
| "epoch": 2.2430221772493466, |
| "grad_norm": 1.1121599674224854, |
| "learning_rate": 1.5514039969643312e-05, |
| "loss": 0.2969, |
| "step": 53200 |
| }, |
| { |
| "epoch": 2.2472383843494392, |
| "grad_norm": 1.1499770879745483, |
| "learning_rate": 1.5505607555443124e-05, |
| "loss": 0.3047, |
| "step": 53300 |
| }, |
| { |
| "epoch": 2.251454591449532, |
| "grad_norm": 0.9810878038406372, |
| "learning_rate": 1.549717514124294e-05, |
| "loss": 0.3008, |
| "step": 53400 |
| }, |
| { |
| "epoch": 2.255670798549625, |
| "grad_norm": 0.9504501819610596, |
| "learning_rate": 1.5488742727042752e-05, |
| "loss": 0.303, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.2598870056497176, |
| "grad_norm": 0.8722612857818604, |
| "learning_rate": 1.5480310312842568e-05, |
| "loss": 0.3017, |
| "step": 53600 |
| }, |
| { |
| "epoch": 2.26410321274981, |
| "grad_norm": 1.1492185592651367, |
| "learning_rate": 1.5471877898642384e-05, |
| "loss": 0.2932, |
| "step": 53700 |
| }, |
| { |
| "epoch": 2.2683194198499033, |
| "grad_norm": 1.248023509979248, |
| "learning_rate": 1.5463445484442196e-05, |
| "loss": 0.3085, |
| "step": 53800 |
| }, |
| { |
| "epoch": 2.272535626949996, |
| "grad_norm": 0.9625715017318726, |
| "learning_rate": 1.5455013070242012e-05, |
| "loss": 0.2991, |
| "step": 53900 |
| }, |
| { |
| "epoch": 2.2767518340500885, |
| "grad_norm": 1.1480776071548462, |
| "learning_rate": 1.5446580656041825e-05, |
| "loss": 0.2919, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.280968041150181, |
| "grad_norm": 1.2776678800582886, |
| "learning_rate": 1.543814824184164e-05, |
| "loss": 0.2985, |
| "step": 54100 |
| }, |
| { |
| "epoch": 2.285184248250274, |
| "grad_norm": 1.0501973628997803, |
| "learning_rate": 1.5429715827641456e-05, |
| "loss": 0.3052, |
| "step": 54200 |
| }, |
| { |
| "epoch": 2.289400455350367, |
| "grad_norm": 1.2569295167922974, |
| "learning_rate": 1.5421283413441272e-05, |
| "loss": 0.2963, |
| "step": 54300 |
| }, |
| { |
| "epoch": 2.2936166624504595, |
| "grad_norm": 1.1297552585601807, |
| "learning_rate": 1.5412850999241084e-05, |
| "loss": 0.2987, |
| "step": 54400 |
| }, |
| { |
| "epoch": 2.297832869550552, |
| "grad_norm": 1.2715651988983154, |
| "learning_rate": 1.54044185850409e-05, |
| "loss": 0.2974, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.302049076650645, |
| "grad_norm": 1.2650036811828613, |
| "learning_rate": 1.5395986170840712e-05, |
| "loss": 0.3003, |
| "step": 54600 |
| }, |
| { |
| "epoch": 2.306265283750738, |
| "grad_norm": 1.1509555578231812, |
| "learning_rate": 1.5387553756640528e-05, |
| "loss": 0.3007, |
| "step": 54700 |
| }, |
| { |
| "epoch": 2.3104814908508304, |
| "grad_norm": 0.7887945771217346, |
| "learning_rate": 1.537912134244034e-05, |
| "loss": 0.2914, |
| "step": 54800 |
| }, |
| { |
| "epoch": 2.3146976979509235, |
| "grad_norm": 1.1291335821151733, |
| "learning_rate": 1.5370688928240156e-05, |
| "loss": 0.3031, |
| "step": 54900 |
| }, |
| { |
| "epoch": 2.318913905051016, |
| "grad_norm": 1.0857185125350952, |
| "learning_rate": 1.536225651403997e-05, |
| "loss": 0.2953, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.3231301121511088, |
| "grad_norm": 0.8543188571929932, |
| "learning_rate": 1.5353824099839785e-05, |
| "loss": 0.2902, |
| "step": 55100 |
| }, |
| { |
| "epoch": 2.327346319251202, |
| "grad_norm": 0.8242142796516418, |
| "learning_rate": 1.53453916856396e-05, |
| "loss": 0.2837, |
| "step": 55200 |
| }, |
| { |
| "epoch": 2.3315625263512945, |
| "grad_norm": 0.9572939872741699, |
| "learning_rate": 1.5336959271439413e-05, |
| "loss": 0.3028, |
| "step": 55300 |
| }, |
| { |
| "epoch": 2.335778733451387, |
| "grad_norm": 0.9514021277427673, |
| "learning_rate": 1.532852685723923e-05, |
| "loss": 0.2904, |
| "step": 55400 |
| }, |
| { |
| "epoch": 2.3399949405514797, |
| "grad_norm": 0.9875904321670532, |
| "learning_rate": 1.5320094443039044e-05, |
| "loss": 0.3006, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.344211147651573, |
| "grad_norm": 1.1220569610595703, |
| "learning_rate": 1.531166202883886e-05, |
| "loss": 0.2915, |
| "step": 55600 |
| }, |
| { |
| "epoch": 2.3484273547516654, |
| "grad_norm": 1.2072755098342896, |
| "learning_rate": 1.5303229614638673e-05, |
| "loss": 0.2922, |
| "step": 55700 |
| }, |
| { |
| "epoch": 2.352643561851758, |
| "grad_norm": 0.9680849313735962, |
| "learning_rate": 1.5294797200438488e-05, |
| "loss": 0.2919, |
| "step": 55800 |
| }, |
| { |
| "epoch": 2.3568597689518507, |
| "grad_norm": 1.0141241550445557, |
| "learning_rate": 1.52863647862383e-05, |
| "loss": 0.2953, |
| "step": 55900 |
| }, |
| { |
| "epoch": 2.3610759760519437, |
| "grad_norm": 1.1104940176010132, |
| "learning_rate": 1.5277932372038116e-05, |
| "loss": 0.3066, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.3610759760519437, |
| "eval_bleu": 11.0717, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.766, |
| "eval_comet": 0.5431, |
| "eval_gen_len": 20.8744, |
| "eval_loss": 0.31978920102119446, |
| "eval_runtime": 1367.3903, |
| "eval_samples_per_second": 34.69, |
| "eval_steps_per_second": 2.168, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.3652921831520364, |
| "grad_norm": 0.8834062218666077, |
| "learning_rate": 1.526949995783793e-05, |
| "loss": 0.2964, |
| "step": 56100 |
| }, |
| { |
| "epoch": 2.369508390252129, |
| "grad_norm": 1.1473215818405151, |
| "learning_rate": 1.5261067543637745e-05, |
| "loss": 0.3116, |
| "step": 56200 |
| }, |
| { |
| "epoch": 2.373724597352222, |
| "grad_norm": 1.0120465755462646, |
| "learning_rate": 1.5252635129437559e-05, |
| "loss": 0.29, |
| "step": 56300 |
| }, |
| { |
| "epoch": 2.3779408044523147, |
| "grad_norm": 1.1583025455474854, |
| "learning_rate": 1.5244202715237373e-05, |
| "loss": 0.2926, |
| "step": 56400 |
| }, |
| { |
| "epoch": 2.3821570115524073, |
| "grad_norm": 1.3375540971755981, |
| "learning_rate": 1.5235770301037187e-05, |
| "loss": 0.2999, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.3863732186525004, |
| "grad_norm": 0.9002558588981628, |
| "learning_rate": 1.5227337886837004e-05, |
| "loss": 0.297, |
| "step": 56600 |
| }, |
| { |
| "epoch": 2.390589425752593, |
| "grad_norm": 0.9687677025794983, |
| "learning_rate": 1.5218905472636818e-05, |
| "loss": 0.2872, |
| "step": 56700 |
| }, |
| { |
| "epoch": 2.3948056328526857, |
| "grad_norm": 0.8272023797035217, |
| "learning_rate": 1.5210473058436633e-05, |
| "loss": 0.2959, |
| "step": 56800 |
| }, |
| { |
| "epoch": 2.3990218399527783, |
| "grad_norm": 0.8559250235557556, |
| "learning_rate": 1.5202040644236447e-05, |
| "loss": 0.2851, |
| "step": 56900 |
| }, |
| { |
| "epoch": 2.4032380470528714, |
| "grad_norm": 0.9264187812805176, |
| "learning_rate": 1.519360823003626e-05, |
| "loss": 0.3003, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.407454254152964, |
| "grad_norm": 0.9628717303276062, |
| "learning_rate": 1.5185175815836075e-05, |
| "loss": 0.2871, |
| "step": 57100 |
| }, |
| { |
| "epoch": 2.4116704612530566, |
| "grad_norm": 1.0523897409439087, |
| "learning_rate": 1.5176743401635889e-05, |
| "loss": 0.3034, |
| "step": 57200 |
| }, |
| { |
| "epoch": 2.4158866683531497, |
| "grad_norm": 1.1983698606491089, |
| "learning_rate": 1.5168310987435703e-05, |
| "loss": 0.2959, |
| "step": 57300 |
| }, |
| { |
| "epoch": 2.4201028754532423, |
| "grad_norm": 0.9500690698623657, |
| "learning_rate": 1.5159878573235519e-05, |
| "loss": 0.2952, |
| "step": 57400 |
| }, |
| { |
| "epoch": 2.424319082553335, |
| "grad_norm": 1.007147192955017, |
| "learning_rate": 1.5151446159035333e-05, |
| "loss": 0.2875, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.4285352896534276, |
| "grad_norm": 1.1029393672943115, |
| "learning_rate": 1.5143013744835147e-05, |
| "loss": 0.2912, |
| "step": 57600 |
| }, |
| { |
| "epoch": 2.4327514967535206, |
| "grad_norm": 0.9687654376029968, |
| "learning_rate": 1.5134581330634961e-05, |
| "loss": 0.2944, |
| "step": 57700 |
| }, |
| { |
| "epoch": 2.4369677038536133, |
| "grad_norm": 1.0068873167037964, |
| "learning_rate": 1.5126148916434777e-05, |
| "loss": 0.2941, |
| "step": 57800 |
| }, |
| { |
| "epoch": 2.441183910953706, |
| "grad_norm": 1.0818562507629395, |
| "learning_rate": 1.5117716502234591e-05, |
| "loss": 0.2997, |
| "step": 57900 |
| }, |
| { |
| "epoch": 2.445400118053799, |
| "grad_norm": 0.8829095363616943, |
| "learning_rate": 1.5109284088034407e-05, |
| "loss": 0.2951, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.4496163251538916, |
| "grad_norm": 0.9035953283309937, |
| "learning_rate": 1.510085167383422e-05, |
| "loss": 0.3027, |
| "step": 58100 |
| }, |
| { |
| "epoch": 2.4538325322539842, |
| "grad_norm": 0.9314539432525635, |
| "learning_rate": 1.5092419259634035e-05, |
| "loss": 0.287, |
| "step": 58200 |
| }, |
| { |
| "epoch": 2.4580487393540773, |
| "grad_norm": 1.2419513463974, |
| "learning_rate": 1.5083986845433849e-05, |
| "loss": 0.2901, |
| "step": 58300 |
| }, |
| { |
| "epoch": 2.46226494645417, |
| "grad_norm": 1.06702721118927, |
| "learning_rate": 1.5075554431233663e-05, |
| "loss": 0.2941, |
| "step": 58400 |
| }, |
| { |
| "epoch": 2.4664811535542626, |
| "grad_norm": 1.1835907697677612, |
| "learning_rate": 1.5067122017033477e-05, |
| "loss": 0.296, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.470697360654355, |
| "grad_norm": 0.8179803490638733, |
| "learning_rate": 1.5058689602833291e-05, |
| "loss": 0.2888, |
| "step": 58600 |
| }, |
| { |
| "epoch": 2.4749135677544483, |
| "grad_norm": 0.777482807636261, |
| "learning_rate": 1.5050257188633105e-05, |
| "loss": 0.2923, |
| "step": 58700 |
| }, |
| { |
| "epoch": 2.479129774854541, |
| "grad_norm": 1.0873775482177734, |
| "learning_rate": 1.5041824774432921e-05, |
| "loss": 0.2942, |
| "step": 58800 |
| }, |
| { |
| "epoch": 2.4833459819546335, |
| "grad_norm": 0.9708207249641418, |
| "learning_rate": 1.5033392360232737e-05, |
| "loss": 0.2917, |
| "step": 58900 |
| }, |
| { |
| "epoch": 2.487562189054726, |
| "grad_norm": 1.1274676322937012, |
| "learning_rate": 1.5024959946032551e-05, |
| "loss": 0.2895, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.491778396154819, |
| "grad_norm": 1.5154836177825928, |
| "learning_rate": 1.5016527531832365e-05, |
| "loss": 0.2952, |
| "step": 59100 |
| }, |
| { |
| "epoch": 2.495994603254912, |
| "grad_norm": 1.0997233390808105, |
| "learning_rate": 1.5008095117632179e-05, |
| "loss": 0.3037, |
| "step": 59200 |
| }, |
| { |
| "epoch": 2.5002108103550045, |
| "grad_norm": 0.8567424416542053, |
| "learning_rate": 1.4999662703431995e-05, |
| "loss": 0.2986, |
| "step": 59300 |
| }, |
| { |
| "epoch": 2.5044270174550975, |
| "grad_norm": 1.1187572479248047, |
| "learning_rate": 1.4991230289231809e-05, |
| "loss": 0.2912, |
| "step": 59400 |
| }, |
| { |
| "epoch": 2.50864322455519, |
| "grad_norm": 1.0140553712844849, |
| "learning_rate": 1.4982797875031623e-05, |
| "loss": 0.2853, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.512859431655283, |
| "grad_norm": 1.2574357986450195, |
| "learning_rate": 1.4974365460831437e-05, |
| "loss": 0.2785, |
| "step": 59600 |
| }, |
| { |
| "epoch": 2.517075638755376, |
| "grad_norm": 1.3628848791122437, |
| "learning_rate": 1.4965933046631251e-05, |
| "loss": 0.295, |
| "step": 59700 |
| }, |
| { |
| "epoch": 2.5212918458554685, |
| "grad_norm": 1.3058345317840576, |
| "learning_rate": 1.4957500632431065e-05, |
| "loss": 0.2837, |
| "step": 59800 |
| }, |
| { |
| "epoch": 2.525508052955561, |
| "grad_norm": 1.1940069198608398, |
| "learning_rate": 1.494906821823088e-05, |
| "loss": 0.2934, |
| "step": 59900 |
| }, |
| { |
| "epoch": 2.529724260055654, |
| "grad_norm": 0.9008951187133789, |
| "learning_rate": 1.4940635804030694e-05, |
| "loss": 0.2827, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.529724260055654, |
| "eval_bleu": 11.1804, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.8664, |
| "eval_comet": 0.5434, |
| "eval_gen_len": 20.874, |
| "eval_loss": 0.3158092796802521, |
| "eval_runtime": 1392.2104, |
| "eval_samples_per_second": 34.072, |
| "eval_steps_per_second": 2.13, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.533940467155747, |
| "grad_norm": 1.0412942171096802, |
| "learning_rate": 1.4932203389830511e-05, |
| "loss": 0.3027, |
| "step": 60100 |
| }, |
| { |
| "epoch": 2.5381566742558395, |
| "grad_norm": 1.2109469175338745, |
| "learning_rate": 1.4923770975630325e-05, |
| "loss": 0.2911, |
| "step": 60200 |
| }, |
| { |
| "epoch": 2.542372881355932, |
| "grad_norm": 1.0820285081863403, |
| "learning_rate": 1.491533856143014e-05, |
| "loss": 0.2946, |
| "step": 60300 |
| }, |
| { |
| "epoch": 2.5465890884560247, |
| "grad_norm": 1.6041064262390137, |
| "learning_rate": 1.4906906147229953e-05, |
| "loss": 0.2969, |
| "step": 60400 |
| }, |
| { |
| "epoch": 2.550805295556118, |
| "grad_norm": 1.1145155429840088, |
| "learning_rate": 1.4898473733029767e-05, |
| "loss": 0.285, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.5550215026562104, |
| "grad_norm": 1.105671763420105, |
| "learning_rate": 1.4890041318829581e-05, |
| "loss": 0.2979, |
| "step": 60600 |
| }, |
| { |
| "epoch": 2.559237709756303, |
| "grad_norm": 1.5386360883712769, |
| "learning_rate": 1.4881608904629397e-05, |
| "loss": 0.296, |
| "step": 60700 |
| }, |
| { |
| "epoch": 2.563453916856396, |
| "grad_norm": 0.9032478928565979, |
| "learning_rate": 1.4873176490429211e-05, |
| "loss": 0.3074, |
| "step": 60800 |
| }, |
| { |
| "epoch": 2.5676701239564887, |
| "grad_norm": 0.9565399885177612, |
| "learning_rate": 1.4864744076229025e-05, |
| "loss": 0.2831, |
| "step": 60900 |
| }, |
| { |
| "epoch": 2.5718863310565814, |
| "grad_norm": 0.8517098426818848, |
| "learning_rate": 1.485631166202884e-05, |
| "loss": 0.2882, |
| "step": 61000 |
| }, |
| { |
| "epoch": 2.5761025381566744, |
| "grad_norm": 0.9721641540527344, |
| "learning_rate": 1.4847879247828654e-05, |
| "loss": 0.29, |
| "step": 61100 |
| }, |
| { |
| "epoch": 2.580318745256767, |
| "grad_norm": 1.0403635501861572, |
| "learning_rate": 1.4839446833628468e-05, |
| "loss": 0.291, |
| "step": 61200 |
| }, |
| { |
| "epoch": 2.5845349523568597, |
| "grad_norm": 0.9895289540290833, |
| "learning_rate": 1.4831014419428285e-05, |
| "loss": 0.2879, |
| "step": 61300 |
| }, |
| { |
| "epoch": 2.5887511594569528, |
| "grad_norm": 0.7700739502906799, |
| "learning_rate": 1.48225820052281e-05, |
| "loss": 0.2902, |
| "step": 61400 |
| }, |
| { |
| "epoch": 2.5929673665570454, |
| "grad_norm": 1.0654332637786865, |
| "learning_rate": 1.4814149591027913e-05, |
| "loss": 0.2909, |
| "step": 61500 |
| }, |
| { |
| "epoch": 2.597183573657138, |
| "grad_norm": 1.0778576135635376, |
| "learning_rate": 1.4805717176827727e-05, |
| "loss": 0.2935, |
| "step": 61600 |
| }, |
| { |
| "epoch": 2.6013997807572307, |
| "grad_norm": 0.9637481570243835, |
| "learning_rate": 1.4797284762627541e-05, |
| "loss": 0.2909, |
| "step": 61700 |
| }, |
| { |
| "epoch": 2.6056159878573233, |
| "grad_norm": 0.9111833572387695, |
| "learning_rate": 1.4788852348427356e-05, |
| "loss": 0.2945, |
| "step": 61800 |
| }, |
| { |
| "epoch": 2.6098321949574164, |
| "grad_norm": 1.1596795320510864, |
| "learning_rate": 1.478041993422717e-05, |
| "loss": 0.2805, |
| "step": 61900 |
| }, |
| { |
| "epoch": 2.614048402057509, |
| "grad_norm": 1.007734775543213, |
| "learning_rate": 1.4771987520026985e-05, |
| "loss": 0.2975, |
| "step": 62000 |
| }, |
| { |
| "epoch": 2.6182646091576016, |
| "grad_norm": 1.0803258419036865, |
| "learning_rate": 1.47635551058268e-05, |
| "loss": 0.285, |
| "step": 62100 |
| }, |
| { |
| "epoch": 2.6224808162576947, |
| "grad_norm": 1.2612115144729614, |
| "learning_rate": 1.4755122691626614e-05, |
| "loss": 0.2805, |
| "step": 62200 |
| }, |
| { |
| "epoch": 2.6266970233577873, |
| "grad_norm": 1.2345725297927856, |
| "learning_rate": 1.4746690277426428e-05, |
| "loss": 0.3011, |
| "step": 62300 |
| }, |
| { |
| "epoch": 2.63091323045788, |
| "grad_norm": 0.9898720383644104, |
| "learning_rate": 1.4738257863226242e-05, |
| "loss": 0.2929, |
| "step": 62400 |
| }, |
| { |
| "epoch": 2.635129437557973, |
| "grad_norm": 1.1107537746429443, |
| "learning_rate": 1.4729825449026058e-05, |
| "loss": 0.2869, |
| "step": 62500 |
| }, |
| { |
| "epoch": 2.6393456446580656, |
| "grad_norm": 1.1848292350769043, |
| "learning_rate": 1.4721393034825873e-05, |
| "loss": 0.2916, |
| "step": 62600 |
| }, |
| { |
| "epoch": 2.6435618517581583, |
| "grad_norm": 1.2696866989135742, |
| "learning_rate": 1.4712960620625687e-05, |
| "loss": 0.2927, |
| "step": 62700 |
| }, |
| { |
| "epoch": 2.6477780588582513, |
| "grad_norm": 0.9077075123786926, |
| "learning_rate": 1.4704528206425502e-05, |
| "loss": 0.3002, |
| "step": 62800 |
| }, |
| { |
| "epoch": 2.651994265958344, |
| "grad_norm": 1.1843475103378296, |
| "learning_rate": 1.4696095792225316e-05, |
| "loss": 0.2846, |
| "step": 62900 |
| }, |
| { |
| "epoch": 2.6562104730584366, |
| "grad_norm": 0.8227006196975708, |
| "learning_rate": 1.468766337802513e-05, |
| "loss": 0.2951, |
| "step": 63000 |
| }, |
| { |
| "epoch": 2.6604266801585292, |
| "grad_norm": 1.1044954061508179, |
| "learning_rate": 1.4679230963824944e-05, |
| "loss": 0.2914, |
| "step": 63100 |
| }, |
| { |
| "epoch": 2.6646428872586223, |
| "grad_norm": 0.7992355227470398, |
| "learning_rate": 1.4670798549624758e-05, |
| "loss": 0.291, |
| "step": 63200 |
| }, |
| { |
| "epoch": 2.668859094358715, |
| "grad_norm": 1.0405184030532837, |
| "learning_rate": 1.4662366135424572e-05, |
| "loss": 0.3053, |
| "step": 63300 |
| }, |
| { |
| "epoch": 2.6730753014588076, |
| "grad_norm": 0.9635149240493774, |
| "learning_rate": 1.4653933721224388e-05, |
| "loss": 0.2981, |
| "step": 63400 |
| }, |
| { |
| "epoch": 2.6772915085589, |
| "grad_norm": 0.9436842799186707, |
| "learning_rate": 1.4645501307024202e-05, |
| "loss": 0.2995, |
| "step": 63500 |
| }, |
| { |
| "epoch": 2.6815077156589933, |
| "grad_norm": 1.4944032430648804, |
| "learning_rate": 1.4637068892824016e-05, |
| "loss": 0.2766, |
| "step": 63600 |
| }, |
| { |
| "epoch": 2.685723922759086, |
| "grad_norm": 1.2989920377731323, |
| "learning_rate": 1.4628636478623832e-05, |
| "loss": 0.284, |
| "step": 63700 |
| }, |
| { |
| "epoch": 2.6899401298591785, |
| "grad_norm": 1.1201857328414917, |
| "learning_rate": 1.4620204064423646e-05, |
| "loss": 0.2878, |
| "step": 63800 |
| }, |
| { |
| "epoch": 2.6941563369592716, |
| "grad_norm": 0.7587829232215881, |
| "learning_rate": 1.4611771650223462e-05, |
| "loss": 0.2815, |
| "step": 63900 |
| }, |
| { |
| "epoch": 2.698372544059364, |
| "grad_norm": 1.278507113456726, |
| "learning_rate": 1.4603339236023276e-05, |
| "loss": 0.2879, |
| "step": 64000 |
| }, |
| { |
| "epoch": 2.698372544059364, |
| "eval_bleu": 11.3398, |
| "eval_bleurt": null, |
| "eval_chrfpp": 33.9415, |
| "eval_comet": 0.5433, |
| "eval_gen_len": 20.8771, |
| "eval_loss": 0.3120929002761841, |
| "eval_runtime": 1359.9271, |
| "eval_samples_per_second": 34.881, |
| "eval_steps_per_second": 2.18, |
| "step": 64000 |
| }, |
| { |
| "epoch": 2.702588751159457, |
| "grad_norm": 1.3177260160446167, |
| "learning_rate": 1.459490682182309e-05, |
| "loss": 0.2893, |
| "step": 64100 |
| }, |
| { |
| "epoch": 2.70680495825955, |
| "grad_norm": 1.136610507965088, |
| "learning_rate": 1.4586474407622904e-05, |
| "loss": 0.3055, |
| "step": 64200 |
| }, |
| { |
| "epoch": 2.7110211653596425, |
| "grad_norm": 1.152239203453064, |
| "learning_rate": 1.4578041993422718e-05, |
| "loss": 0.2845, |
| "step": 64300 |
| }, |
| { |
| "epoch": 2.715237372459735, |
| "grad_norm": 1.1815099716186523, |
| "learning_rate": 1.4569609579222532e-05, |
| "loss": 0.2855, |
| "step": 64400 |
| }, |
| { |
| "epoch": 2.7194535795598282, |
| "grad_norm": 0.8875076174736023, |
| "learning_rate": 1.4561177165022346e-05, |
| "loss": 0.2902, |
| "step": 64500 |
| }, |
| { |
| "epoch": 2.723669786659921, |
| "grad_norm": 0.9175387024879456, |
| "learning_rate": 1.455274475082216e-05, |
| "loss": 0.2843, |
| "step": 64600 |
| }, |
| { |
| "epoch": 2.7278859937600135, |
| "grad_norm": 0.9587578773498535, |
| "learning_rate": 1.4544312336621976e-05, |
| "loss": 0.2877, |
| "step": 64700 |
| }, |
| { |
| "epoch": 2.732102200860106, |
| "grad_norm": 0.8387385606765747, |
| "learning_rate": 1.453587992242179e-05, |
| "loss": 0.2882, |
| "step": 64800 |
| }, |
| { |
| "epoch": 2.7363184079601988, |
| "grad_norm": 1.0625752210617065, |
| "learning_rate": 1.4527447508221606e-05, |
| "loss": 0.3063, |
| "step": 64900 |
| }, |
| { |
| "epoch": 2.740534615060292, |
| "grad_norm": 1.0522043704986572, |
| "learning_rate": 1.451901509402142e-05, |
| "loss": 0.2854, |
| "step": 65000 |
| }, |
| { |
| "epoch": 2.7447508221603845, |
| "grad_norm": 1.0325335264205933, |
| "learning_rate": 1.4510582679821234e-05, |
| "loss": 0.287, |
| "step": 65100 |
| }, |
| { |
| "epoch": 2.748967029260477, |
| "grad_norm": 0.7415598630905151, |
| "learning_rate": 1.4502150265621048e-05, |
| "loss": 0.2783, |
| "step": 65200 |
| }, |
| { |
| "epoch": 2.75318323636057, |
| "grad_norm": 1.2780919075012207, |
| "learning_rate": 1.4493717851420864e-05, |
| "loss": 0.2854, |
| "step": 65300 |
| }, |
| { |
| "epoch": 2.757399443460663, |
| "grad_norm": 1.0465195178985596, |
| "learning_rate": 1.4485285437220678e-05, |
| "loss": 0.2942, |
| "step": 65400 |
| }, |
| { |
| "epoch": 2.7616156505607554, |
| "grad_norm": 0.8310420513153076, |
| "learning_rate": 1.4476853023020492e-05, |
| "loss": 0.2896, |
| "step": 65500 |
| }, |
| { |
| "epoch": 2.7658318576608485, |
| "grad_norm": 0.9426363706588745, |
| "learning_rate": 1.4468420608820306e-05, |
| "loss": 0.2839, |
| "step": 65600 |
| }, |
| { |
| "epoch": 2.770048064760941, |
| "grad_norm": 1.195570945739746, |
| "learning_rate": 1.445998819462012e-05, |
| "loss": 0.2894, |
| "step": 65700 |
| }, |
| { |
| "epoch": 2.7742642718610337, |
| "grad_norm": 1.1623390913009644, |
| "learning_rate": 1.4451555780419934e-05, |
| "loss": 0.2897, |
| "step": 65800 |
| }, |
| { |
| "epoch": 2.778480478961127, |
| "grad_norm": 0.8429798483848572, |
| "learning_rate": 1.4443123366219748e-05, |
| "loss": 0.2877, |
| "step": 65900 |
| }, |
| { |
| "epoch": 2.7826966860612194, |
| "grad_norm": 1.0380526781082153, |
| "learning_rate": 1.4434690952019562e-05, |
| "loss": 0.2745, |
| "step": 66000 |
| }, |
| { |
| "epoch": 2.786912893161312, |
| "grad_norm": 1.0655642747879028, |
| "learning_rate": 1.442625853781938e-05, |
| "loss": 0.2906, |
| "step": 66100 |
| }, |
| { |
| "epoch": 2.7911291002614047, |
| "grad_norm": 0.9089457392692566, |
| "learning_rate": 1.4417826123619194e-05, |
| "loss": 0.2918, |
| "step": 66200 |
| }, |
| { |
| "epoch": 2.7953453073614973, |
| "grad_norm": 0.9997662305831909, |
| "learning_rate": 1.4409393709419008e-05, |
| "loss": 0.2835, |
| "step": 66300 |
| }, |
| { |
| "epoch": 2.7995615144615904, |
| "grad_norm": 1.1101837158203125, |
| "learning_rate": 1.4400961295218822e-05, |
| "loss": 0.2774, |
| "step": 66400 |
| }, |
| { |
| "epoch": 2.803777721561683, |
| "grad_norm": 0.9055351614952087, |
| "learning_rate": 1.4392528881018636e-05, |
| "loss": 0.2976, |
| "step": 66500 |
| }, |
| { |
| "epoch": 2.8079939286617757, |
| "grad_norm": 0.9779496192932129, |
| "learning_rate": 1.4384096466818452e-05, |
| "loss": 0.3006, |
| "step": 66600 |
| }, |
| { |
| "epoch": 2.8122101357618687, |
| "grad_norm": 1.180202603340149, |
| "learning_rate": 1.4375664052618266e-05, |
| "loss": 0.2886, |
| "step": 66700 |
| }, |
| { |
| "epoch": 2.8164263428619614, |
| "grad_norm": 1.1998876333236694, |
| "learning_rate": 1.436723163841808e-05, |
| "loss": 0.2888, |
| "step": 66800 |
| }, |
| { |
| "epoch": 2.820642549962054, |
| "grad_norm": 1.1736562252044678, |
| "learning_rate": 1.4358799224217894e-05, |
| "loss": 0.2894, |
| "step": 66900 |
| }, |
| { |
| "epoch": 2.824858757062147, |
| "grad_norm": 1.5462160110473633, |
| "learning_rate": 1.4350366810017708e-05, |
| "loss": 0.2958, |
| "step": 67000 |
| }, |
| { |
| "epoch": 2.8290749641622397, |
| "grad_norm": 1.0704152584075928, |
| "learning_rate": 1.4341934395817523e-05, |
| "loss": 0.2809, |
| "step": 67100 |
| }, |
| { |
| "epoch": 2.8332911712623323, |
| "grad_norm": 1.1228965520858765, |
| "learning_rate": 1.433350198161734e-05, |
| "loss": 0.2859, |
| "step": 67200 |
| }, |
| { |
| "epoch": 2.8375073783624254, |
| "grad_norm": 0.8643785715103149, |
| "learning_rate": 1.4325069567417154e-05, |
| "loss": 0.2812, |
| "step": 67300 |
| }, |
| { |
| "epoch": 2.841723585462518, |
| "grad_norm": 1.112558364868164, |
| "learning_rate": 1.4316637153216968e-05, |
| "loss": 0.2825, |
| "step": 67400 |
| }, |
| { |
| "epoch": 2.8459397925626106, |
| "grad_norm": 0.8024058938026428, |
| "learning_rate": 1.4308204739016782e-05, |
| "loss": 0.2807, |
| "step": 67500 |
| }, |
| { |
| "epoch": 2.8501559996627033, |
| "grad_norm": 1.2020084857940674, |
| "learning_rate": 1.4299772324816596e-05, |
| "loss": 0.2945, |
| "step": 67600 |
| }, |
| { |
| "epoch": 2.8543722067627963, |
| "grad_norm": 0.9170039296150208, |
| "learning_rate": 1.429133991061641e-05, |
| "loss": 0.2821, |
| "step": 67700 |
| }, |
| { |
| "epoch": 2.858588413862889, |
| "grad_norm": 1.3312523365020752, |
| "learning_rate": 1.4282907496416225e-05, |
| "loss": 0.2861, |
| "step": 67800 |
| }, |
| { |
| "epoch": 2.8628046209629816, |
| "grad_norm": 1.2471208572387695, |
| "learning_rate": 1.4274475082216039e-05, |
| "loss": 0.2832, |
| "step": 67900 |
| }, |
| { |
| "epoch": 2.8670208280630742, |
| "grad_norm": 1.0585116147994995, |
| "learning_rate": 1.4266042668015854e-05, |
| "loss": 0.2884, |
| "step": 68000 |
| }, |
| { |
| "epoch": 2.8670208280630742, |
| "eval_bleu": 11.4168, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.1465, |
| "eval_comet": 0.5446, |
| "eval_gen_len": 20.8719, |
| "eval_loss": 0.3076566457748413, |
| "eval_runtime": 1396.301, |
| "eval_samples_per_second": 33.972, |
| "eval_steps_per_second": 2.123, |
| "step": 68000 |
| }, |
| { |
| "epoch": 2.8712370351631673, |
| "grad_norm": 0.7719435095787048, |
| "learning_rate": 1.4257610253815668e-05, |
| "loss": 0.2931, |
| "step": 68100 |
| }, |
| { |
| "epoch": 2.87545324226326, |
| "grad_norm": 0.9555220007896423, |
| "learning_rate": 1.4249177839615483e-05, |
| "loss": 0.3001, |
| "step": 68200 |
| }, |
| { |
| "epoch": 2.8796694493633526, |
| "grad_norm": 1.2510745525360107, |
| "learning_rate": 1.4240745425415297e-05, |
| "loss": 0.2911, |
| "step": 68300 |
| }, |
| { |
| "epoch": 2.8838856564634456, |
| "grad_norm": 1.013611078262329, |
| "learning_rate": 1.4232313011215112e-05, |
| "loss": 0.2784, |
| "step": 68400 |
| }, |
| { |
| "epoch": 2.8881018635635383, |
| "grad_norm": 1.0333224534988403, |
| "learning_rate": 1.4223880597014928e-05, |
| "loss": 0.2862, |
| "step": 68500 |
| }, |
| { |
| "epoch": 2.892318070663631, |
| "grad_norm": 0.9320237040519714, |
| "learning_rate": 1.4215448182814742e-05, |
| "loss": 0.2913, |
| "step": 68600 |
| }, |
| { |
| "epoch": 2.896534277763724, |
| "grad_norm": 0.8303669691085815, |
| "learning_rate": 1.4207015768614556e-05, |
| "loss": 0.2892, |
| "step": 68700 |
| }, |
| { |
| "epoch": 2.9007504848638166, |
| "grad_norm": 0.9247381091117859, |
| "learning_rate": 1.419858335441437e-05, |
| "loss": 0.2853, |
| "step": 68800 |
| }, |
| { |
| "epoch": 2.904966691963909, |
| "grad_norm": 0.8872043490409851, |
| "learning_rate": 1.4190150940214185e-05, |
| "loss": 0.2772, |
| "step": 68900 |
| }, |
| { |
| "epoch": 2.9091828990640023, |
| "grad_norm": 1.039004921913147, |
| "learning_rate": 1.4181718526013999e-05, |
| "loss": 0.2749, |
| "step": 69000 |
| }, |
| { |
| "epoch": 2.913399106164095, |
| "grad_norm": 0.9193853139877319, |
| "learning_rate": 1.4173286111813813e-05, |
| "loss": 0.2809, |
| "step": 69100 |
| }, |
| { |
| "epoch": 2.9176153132641875, |
| "grad_norm": 1.53365957736969, |
| "learning_rate": 1.4164853697613627e-05, |
| "loss": 0.2907, |
| "step": 69200 |
| }, |
| { |
| "epoch": 2.92183152036428, |
| "grad_norm": 0.719420850276947, |
| "learning_rate": 1.4156421283413443e-05, |
| "loss": 0.2783, |
| "step": 69300 |
| }, |
| { |
| "epoch": 2.926047727464373, |
| "grad_norm": 0.9032190442085266, |
| "learning_rate": 1.4147988869213257e-05, |
| "loss": 0.2757, |
| "step": 69400 |
| }, |
| { |
| "epoch": 2.930263934564466, |
| "grad_norm": 0.9444390535354614, |
| "learning_rate": 1.413955645501307e-05, |
| "loss": 0.2884, |
| "step": 69500 |
| }, |
| { |
| "epoch": 2.9344801416645585, |
| "grad_norm": 1.3838810920715332, |
| "learning_rate": 1.4131124040812887e-05, |
| "loss": 0.2893, |
| "step": 69600 |
| }, |
| { |
| "epoch": 2.938696348764651, |
| "grad_norm": 1.2459306716918945, |
| "learning_rate": 1.41226916266127e-05, |
| "loss": 0.2893, |
| "step": 69700 |
| }, |
| { |
| "epoch": 2.942912555864744, |
| "grad_norm": 1.2073965072631836, |
| "learning_rate": 1.4114259212412515e-05, |
| "loss": 0.2844, |
| "step": 69800 |
| }, |
| { |
| "epoch": 2.947128762964837, |
| "grad_norm": 0.9537835717201233, |
| "learning_rate": 1.410582679821233e-05, |
| "loss": 0.2851, |
| "step": 69900 |
| }, |
| { |
| "epoch": 2.9513449700649295, |
| "grad_norm": 1.0783562660217285, |
| "learning_rate": 1.4097394384012145e-05, |
| "loss": 0.2769, |
| "step": 70000 |
| }, |
| { |
| "epoch": 2.9555611771650225, |
| "grad_norm": 1.0770165920257568, |
| "learning_rate": 1.4088961969811959e-05, |
| "loss": 0.2914, |
| "step": 70100 |
| }, |
| { |
| "epoch": 2.959777384265115, |
| "grad_norm": 1.1360630989074707, |
| "learning_rate": 1.4080529555611773e-05, |
| "loss": 0.2985, |
| "step": 70200 |
| }, |
| { |
| "epoch": 2.963993591365208, |
| "grad_norm": 0.7796394228935242, |
| "learning_rate": 1.4072097141411587e-05, |
| "loss": 0.2934, |
| "step": 70300 |
| }, |
| { |
| "epoch": 2.968209798465301, |
| "grad_norm": 1.3134346008300781, |
| "learning_rate": 1.4063664727211401e-05, |
| "loss": 0.2926, |
| "step": 70400 |
| }, |
| { |
| "epoch": 2.9724260055653935, |
| "grad_norm": 1.082274317741394, |
| "learning_rate": 1.4055232313011215e-05, |
| "loss": 0.2934, |
| "step": 70500 |
| }, |
| { |
| "epoch": 2.976642212665486, |
| "grad_norm": 1.1806004047393799, |
| "learning_rate": 1.4046799898811029e-05, |
| "loss": 0.2824, |
| "step": 70600 |
| }, |
| { |
| "epoch": 2.9808584197655787, |
| "grad_norm": 1.0039913654327393, |
| "learning_rate": 1.4038367484610845e-05, |
| "loss": 0.2887, |
| "step": 70700 |
| }, |
| { |
| "epoch": 2.9850746268656714, |
| "grad_norm": 0.9361388087272644, |
| "learning_rate": 1.402993507041066e-05, |
| "loss": 0.3021, |
| "step": 70800 |
| }, |
| { |
| "epoch": 2.9892908339657644, |
| "grad_norm": 0.9558672904968262, |
| "learning_rate": 1.4021502656210475e-05, |
| "loss": 0.2891, |
| "step": 70900 |
| }, |
| { |
| "epoch": 2.993507041065857, |
| "grad_norm": 1.2940090894699097, |
| "learning_rate": 1.4013070242010289e-05, |
| "loss": 0.2901, |
| "step": 71000 |
| }, |
| { |
| "epoch": 2.9977232481659497, |
| "grad_norm": 0.9156692028045654, |
| "learning_rate": 1.4004637827810103e-05, |
| "loss": 0.2858, |
| "step": 71100 |
| }, |
| { |
| "epoch": 3.0019394552660428, |
| "grad_norm": 1.4320809841156006, |
| "learning_rate": 1.3996205413609919e-05, |
| "loss": 0.2682, |
| "step": 71200 |
| }, |
| { |
| "epoch": 3.0061556623661354, |
| "grad_norm": 1.0174983739852905, |
| "learning_rate": 1.3987772999409733e-05, |
| "loss": 0.2601, |
| "step": 71300 |
| }, |
| { |
| "epoch": 3.010371869466228, |
| "grad_norm": 1.1944080591201782, |
| "learning_rate": 1.3979340585209547e-05, |
| "loss": 0.2587, |
| "step": 71400 |
| }, |
| { |
| "epoch": 3.014588076566321, |
| "grad_norm": 1.1349588632583618, |
| "learning_rate": 1.3970908171009361e-05, |
| "loss": 0.275, |
| "step": 71500 |
| }, |
| { |
| "epoch": 3.0188042836664137, |
| "grad_norm": 1.0986281633377075, |
| "learning_rate": 1.3962475756809175e-05, |
| "loss": 0.2624, |
| "step": 71600 |
| }, |
| { |
| "epoch": 3.0230204907665064, |
| "grad_norm": 1.050392746925354, |
| "learning_rate": 1.395404334260899e-05, |
| "loss": 0.2676, |
| "step": 71700 |
| }, |
| { |
| "epoch": 3.027236697866599, |
| "grad_norm": 2.1180052757263184, |
| "learning_rate": 1.3945610928408803e-05, |
| "loss": 0.2541, |
| "step": 71800 |
| }, |
| { |
| "epoch": 3.031452904966692, |
| "grad_norm": 1.1823991537094116, |
| "learning_rate": 1.3937178514208617e-05, |
| "loss": 0.2618, |
| "step": 71900 |
| }, |
| { |
| "epoch": 3.0356691120667847, |
| "grad_norm": 0.8114222288131714, |
| "learning_rate": 1.3928746100008435e-05, |
| "loss": 0.264, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.0356691120667847, |
| "eval_bleu": 11.3651, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.13, |
| "eval_comet": 0.5443, |
| "eval_gen_len": 20.8717, |
| "eval_loss": 0.3058512210845947, |
| "eval_runtime": 1368.271, |
| "eval_samples_per_second": 34.668, |
| "eval_steps_per_second": 2.167, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.0398853191668773, |
| "grad_norm": 0.9190216064453125, |
| "learning_rate": 1.3920313685808249e-05, |
| "loss": 0.256, |
| "step": 72100 |
| }, |
| { |
| "epoch": 3.0441015262669704, |
| "grad_norm": 1.0350401401519775, |
| "learning_rate": 1.3911881271608063e-05, |
| "loss": 0.2637, |
| "step": 72200 |
| }, |
| { |
| "epoch": 3.048317733367063, |
| "grad_norm": 1.3594918251037598, |
| "learning_rate": 1.3903448857407877e-05, |
| "loss": 0.258, |
| "step": 72300 |
| }, |
| { |
| "epoch": 3.0525339404671556, |
| "grad_norm": 0.9170486927032471, |
| "learning_rate": 1.3895016443207691e-05, |
| "loss": 0.2646, |
| "step": 72400 |
| }, |
| { |
| "epoch": 3.0567501475672487, |
| "grad_norm": 1.1841360330581665, |
| "learning_rate": 1.3886584029007505e-05, |
| "loss": 0.2685, |
| "step": 72500 |
| }, |
| { |
| "epoch": 3.0609663546673413, |
| "grad_norm": 0.9434228539466858, |
| "learning_rate": 1.3878151614807321e-05, |
| "loss": 0.2689, |
| "step": 72600 |
| }, |
| { |
| "epoch": 3.065182561767434, |
| "grad_norm": 0.9371470212936401, |
| "learning_rate": 1.3869719200607135e-05, |
| "loss": 0.2657, |
| "step": 72700 |
| }, |
| { |
| "epoch": 3.0693987688675266, |
| "grad_norm": 0.862743616104126, |
| "learning_rate": 1.386128678640695e-05, |
| "loss": 0.2554, |
| "step": 72800 |
| }, |
| { |
| "epoch": 3.0736149759676197, |
| "grad_norm": 1.0926709175109863, |
| "learning_rate": 1.3852854372206763e-05, |
| "loss": 0.2596, |
| "step": 72900 |
| }, |
| { |
| "epoch": 3.0778311830677123, |
| "grad_norm": 1.4358494281768799, |
| "learning_rate": 1.3844421958006577e-05, |
| "loss": 0.269, |
| "step": 73000 |
| }, |
| { |
| "epoch": 3.082047390167805, |
| "grad_norm": 1.0654805898666382, |
| "learning_rate": 1.3835989543806391e-05, |
| "loss": 0.2584, |
| "step": 73100 |
| }, |
| { |
| "epoch": 3.086263597267898, |
| "grad_norm": 1.1404222249984741, |
| "learning_rate": 1.3827557129606209e-05, |
| "loss": 0.2629, |
| "step": 73200 |
| }, |
| { |
| "epoch": 3.0904798043679906, |
| "grad_norm": 1.7569445371627808, |
| "learning_rate": 1.3819124715406023e-05, |
| "loss": 0.2762, |
| "step": 73300 |
| }, |
| { |
| "epoch": 3.0946960114680833, |
| "grad_norm": 1.0361456871032715, |
| "learning_rate": 1.3810692301205837e-05, |
| "loss": 0.2652, |
| "step": 73400 |
| }, |
| { |
| "epoch": 3.098912218568176, |
| "grad_norm": 1.2701947689056396, |
| "learning_rate": 1.3802259887005651e-05, |
| "loss": 0.272, |
| "step": 73500 |
| }, |
| { |
| "epoch": 3.103128425668269, |
| "grad_norm": 1.0151307582855225, |
| "learning_rate": 1.3793827472805465e-05, |
| "loss": 0.2643, |
| "step": 73600 |
| }, |
| { |
| "epoch": 3.1073446327683616, |
| "grad_norm": 1.0204205513000488, |
| "learning_rate": 1.378539505860528e-05, |
| "loss": 0.2533, |
| "step": 73700 |
| }, |
| { |
| "epoch": 3.111560839868454, |
| "grad_norm": 0.9302712678909302, |
| "learning_rate": 1.3776962644405094e-05, |
| "loss": 0.2656, |
| "step": 73800 |
| }, |
| { |
| "epoch": 3.1157770469685473, |
| "grad_norm": 0.9464835524559021, |
| "learning_rate": 1.3768530230204908e-05, |
| "loss": 0.2571, |
| "step": 73900 |
| }, |
| { |
| "epoch": 3.11999325406864, |
| "grad_norm": 0.83738774061203, |
| "learning_rate": 1.3760097816004723e-05, |
| "loss": 0.2612, |
| "step": 74000 |
| }, |
| { |
| "epoch": 3.1242094611687325, |
| "grad_norm": 1.519640326499939, |
| "learning_rate": 1.3751665401804537e-05, |
| "loss": 0.2708, |
| "step": 74100 |
| }, |
| { |
| "epoch": 3.128425668268825, |
| "grad_norm": 0.8798648715019226, |
| "learning_rate": 1.3743232987604352e-05, |
| "loss": 0.2619, |
| "step": 74200 |
| }, |
| { |
| "epoch": 3.1326418753689182, |
| "grad_norm": 1.1586161851882935, |
| "learning_rate": 1.3734800573404167e-05, |
| "loss": 0.2617, |
| "step": 74300 |
| }, |
| { |
| "epoch": 3.136858082469011, |
| "grad_norm": 1.134314775466919, |
| "learning_rate": 1.3726368159203981e-05, |
| "loss": 0.2664, |
| "step": 74400 |
| }, |
| { |
| "epoch": 3.1410742895691035, |
| "grad_norm": 0.8345361351966858, |
| "learning_rate": 1.3717935745003797e-05, |
| "loss": 0.2614, |
| "step": 74500 |
| }, |
| { |
| "epoch": 3.1452904966691966, |
| "grad_norm": 1.4608139991760254, |
| "learning_rate": 1.3709503330803611e-05, |
| "loss": 0.275, |
| "step": 74600 |
| }, |
| { |
| "epoch": 3.149506703769289, |
| "grad_norm": 1.0728867053985596, |
| "learning_rate": 1.3701070916603425e-05, |
| "loss": 0.2715, |
| "step": 74700 |
| }, |
| { |
| "epoch": 3.153722910869382, |
| "grad_norm": 0.96731036901474, |
| "learning_rate": 1.369263850240324e-05, |
| "loss": 0.2708, |
| "step": 74800 |
| }, |
| { |
| "epoch": 3.1579391179694745, |
| "grad_norm": 1.0788986682891846, |
| "learning_rate": 1.3684206088203054e-05, |
| "loss": 0.2666, |
| "step": 74900 |
| }, |
| { |
| "epoch": 3.1621553250695675, |
| "grad_norm": 1.2752233743667603, |
| "learning_rate": 1.3675773674002868e-05, |
| "loss": 0.2622, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.16637153216966, |
| "grad_norm": 1.037100076675415, |
| "learning_rate": 1.3667341259802682e-05, |
| "loss": 0.2663, |
| "step": 75100 |
| }, |
| { |
| "epoch": 3.170587739269753, |
| "grad_norm": 1.1296089887619019, |
| "learning_rate": 1.3658908845602496e-05, |
| "loss": 0.2693, |
| "step": 75200 |
| }, |
| { |
| "epoch": 3.174803946369846, |
| "grad_norm": 1.0998258590698242, |
| "learning_rate": 1.3650476431402312e-05, |
| "loss": 0.2659, |
| "step": 75300 |
| }, |
| { |
| "epoch": 3.1790201534699385, |
| "grad_norm": 0.9814489483833313, |
| "learning_rate": 1.3642044017202126e-05, |
| "loss": 0.2599, |
| "step": 75400 |
| }, |
| { |
| "epoch": 3.183236360570031, |
| "grad_norm": 1.1682645082473755, |
| "learning_rate": 1.3633611603001941e-05, |
| "loss": 0.2558, |
| "step": 75500 |
| }, |
| { |
| "epoch": 3.1874525676701237, |
| "grad_norm": 0.9016832113265991, |
| "learning_rate": 1.3625179188801756e-05, |
| "loss": 0.256, |
| "step": 75600 |
| }, |
| { |
| "epoch": 3.191668774770217, |
| "grad_norm": 1.146004319190979, |
| "learning_rate": 1.361674677460157e-05, |
| "loss": 0.261, |
| "step": 75700 |
| }, |
| { |
| "epoch": 3.1958849818703094, |
| "grad_norm": 0.9824726581573486, |
| "learning_rate": 1.3608314360401384e-05, |
| "loss": 0.2599, |
| "step": 75800 |
| }, |
| { |
| "epoch": 3.200101188970402, |
| "grad_norm": 0.9161651730537415, |
| "learning_rate": 1.35998819462012e-05, |
| "loss": 0.2707, |
| "step": 75900 |
| }, |
| { |
| "epoch": 3.204317396070495, |
| "grad_norm": 0.7958844900131226, |
| "learning_rate": 1.3591449532001014e-05, |
| "loss": 0.2653, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.204317396070495, |
| "eval_bleu": 11.5178, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.2395, |
| "eval_comet": 0.5445, |
| "eval_gen_len": 20.8749, |
| "eval_loss": 0.30260327458381653, |
| "eval_runtime": 1385.9778, |
| "eval_samples_per_second": 34.225, |
| "eval_steps_per_second": 2.139, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.2085336031705878, |
| "grad_norm": 1.1375194787979126, |
| "learning_rate": 1.3583017117800828e-05, |
| "loss": 0.28, |
| "step": 76100 |
| }, |
| { |
| "epoch": 3.2127498102706804, |
| "grad_norm": 1.3891476392745972, |
| "learning_rate": 1.3574584703600642e-05, |
| "loss": 0.2673, |
| "step": 76200 |
| }, |
| { |
| "epoch": 3.2169660173707735, |
| "grad_norm": 0.8804958462715149, |
| "learning_rate": 1.3566152289400456e-05, |
| "loss": 0.2609, |
| "step": 76300 |
| }, |
| { |
| "epoch": 3.221182224470866, |
| "grad_norm": 1.148685097694397, |
| "learning_rate": 1.355771987520027e-05, |
| "loss": 0.2584, |
| "step": 76400 |
| }, |
| { |
| "epoch": 3.2253984315709587, |
| "grad_norm": 1.2225135564804077, |
| "learning_rate": 1.3549287461000084e-05, |
| "loss": 0.2693, |
| "step": 76500 |
| }, |
| { |
| "epoch": 3.2296146386710514, |
| "grad_norm": 1.3453797101974487, |
| "learning_rate": 1.3540855046799898e-05, |
| "loss": 0.2582, |
| "step": 76600 |
| }, |
| { |
| "epoch": 3.2338308457711444, |
| "grad_norm": 1.1462249755859375, |
| "learning_rate": 1.3532422632599716e-05, |
| "loss": 0.261, |
| "step": 76700 |
| }, |
| { |
| "epoch": 3.238047052871237, |
| "grad_norm": 1.3225855827331543, |
| "learning_rate": 1.352399021839953e-05, |
| "loss": 0.2593, |
| "step": 76800 |
| }, |
| { |
| "epoch": 3.2422632599713297, |
| "grad_norm": 1.0181751251220703, |
| "learning_rate": 1.3515557804199344e-05, |
| "loss": 0.2735, |
| "step": 76900 |
| }, |
| { |
| "epoch": 3.2464794670714223, |
| "grad_norm": 1.4639792442321777, |
| "learning_rate": 1.3507125389999158e-05, |
| "loss": 0.2684, |
| "step": 77000 |
| }, |
| { |
| "epoch": 3.2506956741715154, |
| "grad_norm": 0.9273201823234558, |
| "learning_rate": 1.3498692975798972e-05, |
| "loss": 0.2691, |
| "step": 77100 |
| }, |
| { |
| "epoch": 3.254911881271608, |
| "grad_norm": 1.0048563480377197, |
| "learning_rate": 1.3490260561598788e-05, |
| "loss": 0.2694, |
| "step": 77200 |
| }, |
| { |
| "epoch": 3.2591280883717006, |
| "grad_norm": 0.7959649562835693, |
| "learning_rate": 1.3481828147398602e-05, |
| "loss": 0.2644, |
| "step": 77300 |
| }, |
| { |
| "epoch": 3.2633442954717937, |
| "grad_norm": 0.9960177540779114, |
| "learning_rate": 1.3473395733198416e-05, |
| "loss": 0.2634, |
| "step": 77400 |
| }, |
| { |
| "epoch": 3.2675605025718863, |
| "grad_norm": 0.8392547965049744, |
| "learning_rate": 1.346496331899823e-05, |
| "loss": 0.2641, |
| "step": 77500 |
| }, |
| { |
| "epoch": 3.271776709671979, |
| "grad_norm": 0.8676571846008301, |
| "learning_rate": 1.3456530904798044e-05, |
| "loss": 0.2651, |
| "step": 77600 |
| }, |
| { |
| "epoch": 3.275992916772072, |
| "grad_norm": 0.8622851371765137, |
| "learning_rate": 1.3448098490597858e-05, |
| "loss": 0.2579, |
| "step": 77700 |
| }, |
| { |
| "epoch": 3.2802091238721647, |
| "grad_norm": 1.0479894876480103, |
| "learning_rate": 1.3439666076397672e-05, |
| "loss": 0.2743, |
| "step": 77800 |
| }, |
| { |
| "epoch": 3.2844253309722573, |
| "grad_norm": 0.8865060806274414, |
| "learning_rate": 1.343123366219749e-05, |
| "loss": 0.2675, |
| "step": 77900 |
| }, |
| { |
| "epoch": 3.28864153807235, |
| "grad_norm": 0.9038619995117188, |
| "learning_rate": 1.3422801247997304e-05, |
| "loss": 0.2595, |
| "step": 78000 |
| }, |
| { |
| "epoch": 3.292857745172443, |
| "grad_norm": 1.3131364583969116, |
| "learning_rate": 1.3414368833797118e-05, |
| "loss": 0.2638, |
| "step": 78100 |
| }, |
| { |
| "epoch": 3.2970739522725356, |
| "grad_norm": 0.7389425039291382, |
| "learning_rate": 1.3405936419596932e-05, |
| "loss": 0.261, |
| "step": 78200 |
| }, |
| { |
| "epoch": 3.3012901593726283, |
| "grad_norm": 1.0259007215499878, |
| "learning_rate": 1.3397504005396746e-05, |
| "loss": 0.2699, |
| "step": 78300 |
| }, |
| { |
| "epoch": 3.305506366472721, |
| "grad_norm": 1.1851677894592285, |
| "learning_rate": 1.338907159119656e-05, |
| "loss": 0.2675, |
| "step": 78400 |
| }, |
| { |
| "epoch": 3.309722573572814, |
| "grad_norm": 1.12969172000885, |
| "learning_rate": 1.3380639176996374e-05, |
| "loss": 0.2663, |
| "step": 78500 |
| }, |
| { |
| "epoch": 3.3139387806729066, |
| "grad_norm": 0.6820844411849976, |
| "learning_rate": 1.337220676279619e-05, |
| "loss": 0.2741, |
| "step": 78600 |
| }, |
| { |
| "epoch": 3.318154987772999, |
| "grad_norm": 0.805769681930542, |
| "learning_rate": 1.3363774348596004e-05, |
| "loss": 0.2604, |
| "step": 78700 |
| }, |
| { |
| "epoch": 3.3223711948730923, |
| "grad_norm": 1.0743379592895508, |
| "learning_rate": 1.3355341934395818e-05, |
| "loss": 0.2718, |
| "step": 78800 |
| }, |
| { |
| "epoch": 3.326587401973185, |
| "grad_norm": 0.9101067185401917, |
| "learning_rate": 1.3346909520195632e-05, |
| "loss": 0.2663, |
| "step": 78900 |
| }, |
| { |
| "epoch": 3.3308036090732775, |
| "grad_norm": 1.1092708110809326, |
| "learning_rate": 1.3338477105995446e-05, |
| "loss": 0.2643, |
| "step": 79000 |
| }, |
| { |
| "epoch": 3.3350198161733706, |
| "grad_norm": 0.9594613313674927, |
| "learning_rate": 1.3330044691795264e-05, |
| "loss": 0.2634, |
| "step": 79100 |
| }, |
| { |
| "epoch": 3.3392360232734632, |
| "grad_norm": 0.7595967650413513, |
| "learning_rate": 1.3321612277595078e-05, |
| "loss": 0.2634, |
| "step": 79200 |
| }, |
| { |
| "epoch": 3.343452230373556, |
| "grad_norm": 1.062562346458435, |
| "learning_rate": 1.3313179863394892e-05, |
| "loss": 0.2641, |
| "step": 79300 |
| }, |
| { |
| "epoch": 3.347668437473649, |
| "grad_norm": 1.0648949146270752, |
| "learning_rate": 1.3304747449194706e-05, |
| "loss": 0.2768, |
| "step": 79400 |
| }, |
| { |
| "epoch": 3.3518846445737416, |
| "grad_norm": 1.2904027700424194, |
| "learning_rate": 1.329631503499452e-05, |
| "loss": 0.2666, |
| "step": 79500 |
| }, |
| { |
| "epoch": 3.356100851673834, |
| "grad_norm": 1.052890658378601, |
| "learning_rate": 1.3287882620794334e-05, |
| "loss": 0.2621, |
| "step": 79600 |
| }, |
| { |
| "epoch": 3.360317058773927, |
| "grad_norm": 0.9428207874298096, |
| "learning_rate": 1.3279450206594148e-05, |
| "loss": 0.2743, |
| "step": 79700 |
| }, |
| { |
| "epoch": 3.36453326587402, |
| "grad_norm": 1.0574067831039429, |
| "learning_rate": 1.3271017792393962e-05, |
| "loss": 0.2701, |
| "step": 79800 |
| }, |
| { |
| "epoch": 3.3687494729741125, |
| "grad_norm": 1.0170483589172363, |
| "learning_rate": 1.3262585378193778e-05, |
| "loss": 0.2719, |
| "step": 79900 |
| }, |
| { |
| "epoch": 3.372965680074205, |
| "grad_norm": 0.9573329091072083, |
| "learning_rate": 1.3254152963993592e-05, |
| "loss": 0.259, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.372965680074205, |
| "eval_bleu": 11.5551, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.3387, |
| "eval_comet": 0.5455, |
| "eval_gen_len": 20.8748, |
| "eval_loss": 0.29944270849227905, |
| "eval_runtime": 1379.6059, |
| "eval_samples_per_second": 34.383, |
| "eval_steps_per_second": 2.149, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.377181887174298, |
| "grad_norm": 1.0012997388839722, |
| "learning_rate": 1.3245720549793406e-05, |
| "loss": 0.2702, |
| "step": 80100 |
| }, |
| { |
| "epoch": 3.381398094274391, |
| "grad_norm": 1.2515530586242676, |
| "learning_rate": 1.323728813559322e-05, |
| "loss": 0.2662, |
| "step": 80200 |
| }, |
| { |
| "epoch": 3.3856143013744835, |
| "grad_norm": 1.2128268480300903, |
| "learning_rate": 1.3228855721393036e-05, |
| "loss": 0.2622, |
| "step": 80300 |
| }, |
| { |
| "epoch": 3.389830508474576, |
| "grad_norm": 0.9349983930587769, |
| "learning_rate": 1.322042330719285e-05, |
| "loss": 0.2524, |
| "step": 80400 |
| }, |
| { |
| "epoch": 3.394046715574669, |
| "grad_norm": 0.8696044087409973, |
| "learning_rate": 1.3211990892992666e-05, |
| "loss": 0.2622, |
| "step": 80500 |
| }, |
| { |
| "epoch": 3.398262922674762, |
| "grad_norm": 1.0299559831619263, |
| "learning_rate": 1.320355847879248e-05, |
| "loss": 0.2612, |
| "step": 80600 |
| }, |
| { |
| "epoch": 3.4024791297748544, |
| "grad_norm": 1.1014291048049927, |
| "learning_rate": 1.3195126064592294e-05, |
| "loss": 0.269, |
| "step": 80700 |
| }, |
| { |
| "epoch": 3.4066953368749475, |
| "grad_norm": 1.00558340549469, |
| "learning_rate": 1.3186693650392108e-05, |
| "loss": 0.2634, |
| "step": 80800 |
| }, |
| { |
| "epoch": 3.41091154397504, |
| "grad_norm": 0.9738940596580505, |
| "learning_rate": 1.3178261236191923e-05, |
| "loss": 0.2572, |
| "step": 80900 |
| }, |
| { |
| "epoch": 3.415127751075133, |
| "grad_norm": 0.9514461755752563, |
| "learning_rate": 1.3169828821991737e-05, |
| "loss": 0.2692, |
| "step": 81000 |
| }, |
| { |
| "epoch": 3.4193439581752254, |
| "grad_norm": 1.0373955965042114, |
| "learning_rate": 1.316139640779155e-05, |
| "loss": 0.252, |
| "step": 81100 |
| }, |
| { |
| "epoch": 3.4235601652753185, |
| "grad_norm": 1.02577805519104, |
| "learning_rate": 1.3152963993591365e-05, |
| "loss": 0.264, |
| "step": 81200 |
| }, |
| { |
| "epoch": 3.427776372375411, |
| "grad_norm": 1.1359772682189941, |
| "learning_rate": 1.314453157939118e-05, |
| "loss": 0.2577, |
| "step": 81300 |
| }, |
| { |
| "epoch": 3.4319925794755037, |
| "grad_norm": 1.2096680402755737, |
| "learning_rate": 1.3136099165190995e-05, |
| "loss": 0.2599, |
| "step": 81400 |
| }, |
| { |
| "epoch": 3.4362087865755964, |
| "grad_norm": 0.9178110957145691, |
| "learning_rate": 1.312766675099081e-05, |
| "loss": 0.2806, |
| "step": 81500 |
| }, |
| { |
| "epoch": 3.4404249936756894, |
| "grad_norm": 1.1373695135116577, |
| "learning_rate": 1.3119234336790625e-05, |
| "loss": 0.2569, |
| "step": 81600 |
| }, |
| { |
| "epoch": 3.444641200775782, |
| "grad_norm": 1.1340562105178833, |
| "learning_rate": 1.3110801922590439e-05, |
| "loss": 0.2647, |
| "step": 81700 |
| }, |
| { |
| "epoch": 3.4488574078758747, |
| "grad_norm": 0.8706813454627991, |
| "learning_rate": 1.3102369508390254e-05, |
| "loss": 0.2629, |
| "step": 81800 |
| }, |
| { |
| "epoch": 3.4530736149759678, |
| "grad_norm": 1.1597013473510742, |
| "learning_rate": 1.3093937094190068e-05, |
| "loss": 0.2644, |
| "step": 81900 |
| }, |
| { |
| "epoch": 3.4572898220760604, |
| "grad_norm": 0.9667551517486572, |
| "learning_rate": 1.3085504679989883e-05, |
| "loss": 0.2599, |
| "step": 82000 |
| }, |
| { |
| "epoch": 3.461506029176153, |
| "grad_norm": 1.319297194480896, |
| "learning_rate": 1.3077072265789697e-05, |
| "loss": 0.2536, |
| "step": 82100 |
| }, |
| { |
| "epoch": 3.465722236276246, |
| "grad_norm": 1.0382535457611084, |
| "learning_rate": 1.306863985158951e-05, |
| "loss": 0.2639, |
| "step": 82200 |
| }, |
| { |
| "epoch": 3.4699384433763387, |
| "grad_norm": 1.1606062650680542, |
| "learning_rate": 1.3060207437389325e-05, |
| "loss": 0.2602, |
| "step": 82300 |
| }, |
| { |
| "epoch": 3.4741546504764313, |
| "grad_norm": 0.8028622269630432, |
| "learning_rate": 1.3051775023189139e-05, |
| "loss": 0.2652, |
| "step": 82400 |
| }, |
| { |
| "epoch": 3.478370857576524, |
| "grad_norm": 1.2183704376220703, |
| "learning_rate": 1.3043342608988953e-05, |
| "loss": 0.2639, |
| "step": 82500 |
| }, |
| { |
| "epoch": 3.482587064676617, |
| "grad_norm": 1.0649685859680176, |
| "learning_rate": 1.303491019478877e-05, |
| "loss": 0.2601, |
| "step": 82600 |
| }, |
| { |
| "epoch": 3.4868032717767097, |
| "grad_norm": 0.9648706316947937, |
| "learning_rate": 1.3026477780588585e-05, |
| "loss": 0.2594, |
| "step": 82700 |
| }, |
| { |
| "epoch": 3.4910194788768023, |
| "grad_norm": 1.0118120908737183, |
| "learning_rate": 1.3018045366388399e-05, |
| "loss": 0.2598, |
| "step": 82800 |
| }, |
| { |
| "epoch": 3.4952356859768954, |
| "grad_norm": 0.8838013410568237, |
| "learning_rate": 1.3009612952188213e-05, |
| "loss": 0.2677, |
| "step": 82900 |
| }, |
| { |
| "epoch": 3.499451893076988, |
| "grad_norm": 1.0722988843917847, |
| "learning_rate": 1.3001180537988027e-05, |
| "loss": 0.2644, |
| "step": 83000 |
| }, |
| { |
| "epoch": 3.5036681001770806, |
| "grad_norm": 1.016909122467041, |
| "learning_rate": 1.2992748123787841e-05, |
| "loss": 0.2568, |
| "step": 83100 |
| }, |
| { |
| "epoch": 3.5078843072771733, |
| "grad_norm": 0.9746114611625671, |
| "learning_rate": 1.2984315709587657e-05, |
| "loss": 0.2703, |
| "step": 83200 |
| }, |
| { |
| "epoch": 3.5121005143772663, |
| "grad_norm": 1.0796164274215698, |
| "learning_rate": 1.297588329538747e-05, |
| "loss": 0.2494, |
| "step": 83300 |
| }, |
| { |
| "epoch": 3.516316721477359, |
| "grad_norm": 0.9735890030860901, |
| "learning_rate": 1.2967450881187285e-05, |
| "loss": 0.2587, |
| "step": 83400 |
| }, |
| { |
| "epoch": 3.5205329285774516, |
| "grad_norm": 0.9859138131141663, |
| "learning_rate": 1.2959018466987099e-05, |
| "loss": 0.2733, |
| "step": 83500 |
| }, |
| { |
| "epoch": 3.5247491356775447, |
| "grad_norm": 1.0381730794906616, |
| "learning_rate": 1.2950586052786913e-05, |
| "loss": 0.2641, |
| "step": 83600 |
| }, |
| { |
| "epoch": 3.5289653427776373, |
| "grad_norm": 0.8919042348861694, |
| "learning_rate": 1.2942153638586727e-05, |
| "loss": 0.2571, |
| "step": 83700 |
| }, |
| { |
| "epoch": 3.53318154987773, |
| "grad_norm": 0.9157905578613281, |
| "learning_rate": 1.2933721224386545e-05, |
| "loss": 0.2703, |
| "step": 83800 |
| }, |
| { |
| "epoch": 3.537397756977823, |
| "grad_norm": 1.2394428253173828, |
| "learning_rate": 1.2925288810186359e-05, |
| "loss": 0.2614, |
| "step": 83900 |
| }, |
| { |
| "epoch": 3.5416139640779156, |
| "grad_norm": 0.7191671133041382, |
| "learning_rate": 1.2916856395986173e-05, |
| "loss": 0.2635, |
| "step": 84000 |
| }, |
| { |
| "epoch": 3.5416139640779156, |
| "eval_bleu": 11.6956, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.4214, |
| "eval_comet": 0.5453, |
| "eval_gen_len": 20.8742, |
| "eval_loss": 0.2975287437438965, |
| "eval_runtime": 1344.7261, |
| "eval_samples_per_second": 35.275, |
| "eval_steps_per_second": 2.205, |
| "step": 84000 |
| }, |
| { |
| "epoch": 3.5458301711780082, |
| "grad_norm": 0.8943409323692322, |
| "learning_rate": 1.2908423981785987e-05, |
| "loss": 0.2593, |
| "step": 84100 |
| }, |
| { |
| "epoch": 3.550046378278101, |
| "grad_norm": 0.978228747844696, |
| "learning_rate": 1.2899991567585801e-05, |
| "loss": 0.272, |
| "step": 84200 |
| }, |
| { |
| "epoch": 3.5542625853781935, |
| "grad_norm": 0.7906679511070251, |
| "learning_rate": 1.2891559153385615e-05, |
| "loss": 0.2636, |
| "step": 84300 |
| }, |
| { |
| "epoch": 3.5584787924782866, |
| "grad_norm": 1.0755661725997925, |
| "learning_rate": 1.2883126739185429e-05, |
| "loss": 0.2673, |
| "step": 84400 |
| }, |
| { |
| "epoch": 3.562694999578379, |
| "grad_norm": 1.0431406497955322, |
| "learning_rate": 1.2874694324985245e-05, |
| "loss": 0.2591, |
| "step": 84500 |
| }, |
| { |
| "epoch": 3.566911206678472, |
| "grad_norm": 1.1435611248016357, |
| "learning_rate": 1.2866261910785059e-05, |
| "loss": 0.2745, |
| "step": 84600 |
| }, |
| { |
| "epoch": 3.571127413778565, |
| "grad_norm": 1.0531567335128784, |
| "learning_rate": 1.2857829496584873e-05, |
| "loss": 0.2641, |
| "step": 84700 |
| }, |
| { |
| "epoch": 3.5753436208786575, |
| "grad_norm": 1.1592568159103394, |
| "learning_rate": 1.2849397082384687e-05, |
| "loss": 0.2602, |
| "step": 84800 |
| }, |
| { |
| "epoch": 3.57955982797875, |
| "grad_norm": 0.975192666053772, |
| "learning_rate": 1.2840964668184501e-05, |
| "loss": 0.2643, |
| "step": 84900 |
| }, |
| { |
| "epoch": 3.5837760350788432, |
| "grad_norm": 1.0815240144729614, |
| "learning_rate": 1.2832532253984317e-05, |
| "loss": 0.2566, |
| "step": 85000 |
| }, |
| { |
| "epoch": 3.587992242178936, |
| "grad_norm": 0.9638839960098267, |
| "learning_rate": 1.2824099839784133e-05, |
| "loss": 0.256, |
| "step": 85100 |
| }, |
| { |
| "epoch": 3.5922084492790285, |
| "grad_norm": 0.9916542768478394, |
| "learning_rate": 1.2815667425583947e-05, |
| "loss": 0.266, |
| "step": 85200 |
| }, |
| { |
| "epoch": 3.5964246563791216, |
| "grad_norm": 0.7632570266723633, |
| "learning_rate": 1.2807235011383761e-05, |
| "loss": 0.255, |
| "step": 85300 |
| }, |
| { |
| "epoch": 3.600640863479214, |
| "grad_norm": 1.1871920824050903, |
| "learning_rate": 1.2798802597183575e-05, |
| "loss": 0.2631, |
| "step": 85400 |
| }, |
| { |
| "epoch": 3.604857070579307, |
| "grad_norm": 1.2854896783828735, |
| "learning_rate": 1.279037018298339e-05, |
| "loss": 0.2681, |
| "step": 85500 |
| }, |
| { |
| "epoch": 3.6090732776793994, |
| "grad_norm": 1.0291328430175781, |
| "learning_rate": 1.2781937768783203e-05, |
| "loss": 0.2668, |
| "step": 85600 |
| }, |
| { |
| "epoch": 3.6132894847794925, |
| "grad_norm": 1.1781797409057617, |
| "learning_rate": 1.2773505354583017e-05, |
| "loss": 0.2684, |
| "step": 85700 |
| }, |
| { |
| "epoch": 3.617505691879585, |
| "grad_norm": 1.0925040245056152, |
| "learning_rate": 1.2765072940382831e-05, |
| "loss": 0.2631, |
| "step": 85800 |
| }, |
| { |
| "epoch": 3.621721898979678, |
| "grad_norm": 1.1996210813522339, |
| "learning_rate": 1.2756640526182647e-05, |
| "loss": 0.2543, |
| "step": 85900 |
| }, |
| { |
| "epoch": 3.6259381060797704, |
| "grad_norm": 1.081076979637146, |
| "learning_rate": 1.2748208111982461e-05, |
| "loss": 0.2647, |
| "step": 86000 |
| }, |
| { |
| "epoch": 3.6301543131798635, |
| "grad_norm": 1.1334370374679565, |
| "learning_rate": 1.2739775697782275e-05, |
| "loss": 0.2657, |
| "step": 86100 |
| }, |
| { |
| "epoch": 3.634370520279956, |
| "grad_norm": 1.1829060316085815, |
| "learning_rate": 1.2731343283582091e-05, |
| "loss": 0.2628, |
| "step": 86200 |
| }, |
| { |
| "epoch": 3.6385867273800487, |
| "grad_norm": 0.9868782758712769, |
| "learning_rate": 1.2722910869381905e-05, |
| "loss": 0.2728, |
| "step": 86300 |
| }, |
| { |
| "epoch": 3.642802934480142, |
| "grad_norm": 1.0498135089874268, |
| "learning_rate": 1.271447845518172e-05, |
| "loss": 0.2621, |
| "step": 86400 |
| }, |
| { |
| "epoch": 3.6470191415802344, |
| "grad_norm": 1.3446903228759766, |
| "learning_rate": 1.2706046040981535e-05, |
| "loss": 0.2667, |
| "step": 86500 |
| }, |
| { |
| "epoch": 3.651235348680327, |
| "grad_norm": 1.084639072418213, |
| "learning_rate": 1.269761362678135e-05, |
| "loss": 0.27, |
| "step": 86600 |
| }, |
| { |
| "epoch": 3.65545155578042, |
| "grad_norm": 1.2545477151870728, |
| "learning_rate": 1.2689181212581163e-05, |
| "loss": 0.259, |
| "step": 86700 |
| }, |
| { |
| "epoch": 3.6596677628805128, |
| "grad_norm": 1.0665180683135986, |
| "learning_rate": 1.2680748798380977e-05, |
| "loss": 0.2604, |
| "step": 86800 |
| }, |
| { |
| "epoch": 3.6638839699806054, |
| "grad_norm": 1.826547384262085, |
| "learning_rate": 1.2672316384180791e-05, |
| "loss": 0.2746, |
| "step": 86900 |
| }, |
| { |
| "epoch": 3.6681001770806985, |
| "grad_norm": 1.2480037212371826, |
| "learning_rate": 1.2663883969980606e-05, |
| "loss": 0.2674, |
| "step": 87000 |
| }, |
| { |
| "epoch": 3.672316384180791, |
| "grad_norm": 1.1907069683074951, |
| "learning_rate": 1.265545155578042e-05, |
| "loss": 0.2825, |
| "step": 87100 |
| }, |
| { |
| "epoch": 3.6765325912808837, |
| "grad_norm": 0.9594092965126038, |
| "learning_rate": 1.2647019141580234e-05, |
| "loss": 0.2723, |
| "step": 87200 |
| }, |
| { |
| "epoch": 3.6807487983809763, |
| "grad_norm": 1.1402744054794312, |
| "learning_rate": 1.263858672738005e-05, |
| "loss": 0.2647, |
| "step": 87300 |
| }, |
| { |
| "epoch": 3.684965005481069, |
| "grad_norm": 1.1719417572021484, |
| "learning_rate": 1.2630154313179865e-05, |
| "loss": 0.2661, |
| "step": 87400 |
| }, |
| { |
| "epoch": 3.689181212581162, |
| "grad_norm": 1.0842151641845703, |
| "learning_rate": 1.262172189897968e-05, |
| "loss": 0.2643, |
| "step": 87500 |
| }, |
| { |
| "epoch": 3.6933974196812547, |
| "grad_norm": 0.8954026103019714, |
| "learning_rate": 1.2613289484779493e-05, |
| "loss": 0.264, |
| "step": 87600 |
| }, |
| { |
| "epoch": 3.6976136267813473, |
| "grad_norm": 0.8815357089042664, |
| "learning_rate": 1.2604857070579308e-05, |
| "loss": 0.2618, |
| "step": 87700 |
| }, |
| { |
| "epoch": 3.7018298338814404, |
| "grad_norm": 0.9481285214424133, |
| "learning_rate": 1.2596424656379123e-05, |
| "loss": 0.2525, |
| "step": 87800 |
| }, |
| { |
| "epoch": 3.706046040981533, |
| "grad_norm": 1.143824577331543, |
| "learning_rate": 1.2587992242178937e-05, |
| "loss": 0.2661, |
| "step": 87900 |
| }, |
| { |
| "epoch": 3.7102622480816256, |
| "grad_norm": 1.0877296924591064, |
| "learning_rate": 1.2579559827978752e-05, |
| "loss": 0.2528, |
| "step": 88000 |
| }, |
| { |
| "epoch": 3.7102622480816256, |
| "eval_bleu": 11.7437, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.4824, |
| "eval_comet": 0.5455, |
| "eval_gen_len": 20.8768, |
| "eval_loss": 0.2948751747608185, |
| "eval_runtime": 1328.1507, |
| "eval_samples_per_second": 35.715, |
| "eval_steps_per_second": 2.232, |
| "step": 88000 |
| }, |
| { |
| "epoch": 3.7144784551817187, |
| "grad_norm": 0.8538644313812256, |
| "learning_rate": 1.2571127413778566e-05, |
| "loss": 0.2588, |
| "step": 88100 |
| }, |
| { |
| "epoch": 3.7186946622818113, |
| "grad_norm": 0.7428072094917297, |
| "learning_rate": 1.256269499957838e-05, |
| "loss": 0.2604, |
| "step": 88200 |
| }, |
| { |
| "epoch": 3.722910869381904, |
| "grad_norm": 0.9546633362770081, |
| "learning_rate": 1.2554262585378194e-05, |
| "loss": 0.2636, |
| "step": 88300 |
| }, |
| { |
| "epoch": 3.727127076481997, |
| "grad_norm": 1.0499143600463867, |
| "learning_rate": 1.2545830171178008e-05, |
| "loss": 0.2483, |
| "step": 88400 |
| }, |
| { |
| "epoch": 3.7313432835820897, |
| "grad_norm": 0.892047643661499, |
| "learning_rate": 1.2537397756977822e-05, |
| "loss": 0.2567, |
| "step": 88500 |
| }, |
| { |
| "epoch": 3.7355594906821823, |
| "grad_norm": 0.9587951302528381, |
| "learning_rate": 1.252896534277764e-05, |
| "loss": 0.2694, |
| "step": 88600 |
| }, |
| { |
| "epoch": 3.739775697782275, |
| "grad_norm": 1.1876782178878784, |
| "learning_rate": 1.2520532928577454e-05, |
| "loss": 0.2626, |
| "step": 88700 |
| }, |
| { |
| "epoch": 3.7439919048823675, |
| "grad_norm": 0.9685199856758118, |
| "learning_rate": 1.2512100514377268e-05, |
| "loss": 0.2623, |
| "step": 88800 |
| }, |
| { |
| "epoch": 3.7482081119824606, |
| "grad_norm": 1.2437102794647217, |
| "learning_rate": 1.2503668100177082e-05, |
| "loss": 0.2553, |
| "step": 88900 |
| }, |
| { |
| "epoch": 3.7524243190825533, |
| "grad_norm": 0.9781164526939392, |
| "learning_rate": 1.2495235685976896e-05, |
| "loss": 0.2614, |
| "step": 89000 |
| }, |
| { |
| "epoch": 3.756640526182646, |
| "grad_norm": 1.0566153526306152, |
| "learning_rate": 1.248680327177671e-05, |
| "loss": 0.2673, |
| "step": 89100 |
| }, |
| { |
| "epoch": 3.760856733282739, |
| "grad_norm": 0.990650475025177, |
| "learning_rate": 1.2478370857576526e-05, |
| "loss": 0.2666, |
| "step": 89200 |
| }, |
| { |
| "epoch": 3.7650729403828316, |
| "grad_norm": 1.4134660959243774, |
| "learning_rate": 1.246993844337634e-05, |
| "loss": 0.2627, |
| "step": 89300 |
| }, |
| { |
| "epoch": 3.769289147482924, |
| "grad_norm": 0.844741940498352, |
| "learning_rate": 1.2461506029176154e-05, |
| "loss": 0.255, |
| "step": 89400 |
| }, |
| { |
| "epoch": 3.7735053545830173, |
| "grad_norm": 1.163191795349121, |
| "learning_rate": 1.2453073614975968e-05, |
| "loss": 0.2581, |
| "step": 89500 |
| }, |
| { |
| "epoch": 3.77772156168311, |
| "grad_norm": 1.472217082977295, |
| "learning_rate": 1.2444641200775782e-05, |
| "loss": 0.2619, |
| "step": 89600 |
| }, |
| { |
| "epoch": 3.7819377687832025, |
| "grad_norm": 0.9781330823898315, |
| "learning_rate": 1.24362087865756e-05, |
| "loss": 0.2537, |
| "step": 89700 |
| }, |
| { |
| "epoch": 3.7861539758832956, |
| "grad_norm": 1.3486849069595337, |
| "learning_rate": 1.2427776372375414e-05, |
| "loss": 0.2613, |
| "step": 89800 |
| }, |
| { |
| "epoch": 3.7903701829833882, |
| "grad_norm": 0.9725906848907471, |
| "learning_rate": 1.2419343958175228e-05, |
| "loss": 0.2559, |
| "step": 89900 |
| }, |
| { |
| "epoch": 3.794586390083481, |
| "grad_norm": 1.3243602514266968, |
| "learning_rate": 1.2410911543975042e-05, |
| "loss": 0.26, |
| "step": 90000 |
| }, |
| { |
| "epoch": 3.7988025971835735, |
| "grad_norm": 1.0398342609405518, |
| "learning_rate": 1.2402479129774856e-05, |
| "loss": 0.2559, |
| "step": 90100 |
| }, |
| { |
| "epoch": 3.8030188042836666, |
| "grad_norm": 1.1146111488342285, |
| "learning_rate": 1.239404671557467e-05, |
| "loss": 0.2634, |
| "step": 90200 |
| }, |
| { |
| "epoch": 3.807235011383759, |
| "grad_norm": 1.127182960510254, |
| "learning_rate": 1.2385614301374484e-05, |
| "loss": 0.2606, |
| "step": 90300 |
| }, |
| { |
| "epoch": 3.811451218483852, |
| "grad_norm": 1.015506386756897, |
| "learning_rate": 1.2377181887174298e-05, |
| "loss": 0.2678, |
| "step": 90400 |
| }, |
| { |
| "epoch": 3.8156674255839444, |
| "grad_norm": 1.116790533065796, |
| "learning_rate": 1.2368749472974114e-05, |
| "loss": 0.2509, |
| "step": 90500 |
| }, |
| { |
| "epoch": 3.8198836326840375, |
| "grad_norm": 0.8489646911621094, |
| "learning_rate": 1.2360317058773928e-05, |
| "loss": 0.2583, |
| "step": 90600 |
| }, |
| { |
| "epoch": 3.82409983978413, |
| "grad_norm": 1.4936515092849731, |
| "learning_rate": 1.2351884644573742e-05, |
| "loss": 0.2573, |
| "step": 90700 |
| }, |
| { |
| "epoch": 3.828316046884223, |
| "grad_norm": 1.065934419631958, |
| "learning_rate": 1.2343452230373556e-05, |
| "loss": 0.2634, |
| "step": 90800 |
| }, |
| { |
| "epoch": 3.832532253984316, |
| "grad_norm": 1.003338098526001, |
| "learning_rate": 1.2335019816173372e-05, |
| "loss": 0.268, |
| "step": 90900 |
| }, |
| { |
| "epoch": 3.8367484610844085, |
| "grad_norm": 1.0324583053588867, |
| "learning_rate": 1.2326587401973186e-05, |
| "loss": 0.2549, |
| "step": 91000 |
| }, |
| { |
| "epoch": 3.840964668184501, |
| "grad_norm": 1.2623025178909302, |
| "learning_rate": 1.2318154987773002e-05, |
| "loss": 0.2692, |
| "step": 91100 |
| }, |
| { |
| "epoch": 3.845180875284594, |
| "grad_norm": 0.9342853426933289, |
| "learning_rate": 1.2309722573572816e-05, |
| "loss": 0.254, |
| "step": 91200 |
| }, |
| { |
| "epoch": 3.849397082384687, |
| "grad_norm": 1.087953805923462, |
| "learning_rate": 1.230129015937263e-05, |
| "loss": 0.2584, |
| "step": 91300 |
| }, |
| { |
| "epoch": 3.8536132894847794, |
| "grad_norm": 0.8858796954154968, |
| "learning_rate": 1.2292857745172444e-05, |
| "loss": 0.249, |
| "step": 91400 |
| }, |
| { |
| "epoch": 3.8578294965848725, |
| "grad_norm": 0.9204941987991333, |
| "learning_rate": 1.2284425330972258e-05, |
| "loss": 0.2557, |
| "step": 91500 |
| }, |
| { |
| "epoch": 3.862045703684965, |
| "grad_norm": 1.2459721565246582, |
| "learning_rate": 1.2275992916772072e-05, |
| "loss": 0.2699, |
| "step": 91600 |
| }, |
| { |
| "epoch": 3.8662619107850578, |
| "grad_norm": 0.9872584342956543, |
| "learning_rate": 1.2267560502571886e-05, |
| "loss": 0.2583, |
| "step": 91700 |
| }, |
| { |
| "epoch": 3.8704781178851504, |
| "grad_norm": 1.076714277267456, |
| "learning_rate": 1.22591280883717e-05, |
| "loss": 0.264, |
| "step": 91800 |
| }, |
| { |
| "epoch": 3.874694324985243, |
| "grad_norm": 1.9756258726119995, |
| "learning_rate": 1.2250695674171516e-05, |
| "loss": 0.2596, |
| "step": 91900 |
| }, |
| { |
| "epoch": 3.878910532085336, |
| "grad_norm": 0.9622089266777039, |
| "learning_rate": 1.224226325997133e-05, |
| "loss": 0.2579, |
| "step": 92000 |
| }, |
| { |
| "epoch": 3.878910532085336, |
| "eval_bleu": 11.8761, |
| "eval_bleurt": null, |
| "eval_chrfpp": 34.5875, |
| "eval_comet": 0.5465, |
| "eval_gen_len": 20.8682, |
| "eval_loss": 0.29051172733306885, |
| "eval_runtime": 1094.2642, |
| "eval_samples_per_second": 43.349, |
| "eval_steps_per_second": 2.71, |
| "step": 92000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 237180, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.060418285102694e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|