diff --git "a/checkpoint-2000/trainer_state.json" "b/checkpoint-2000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2000/trainer_state.json" @@ -0,0 +1,66834 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5751671579552807, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "ce_ib": 65.99971008300781, + "ce_orig": 0.8247115612030029, + "epoch": 0, + "kl_loss": 3969.01025390625, + "loss_ib": 39.756099700927734, + "step": 0 + }, + { + "ce_ib": 61.875301361083984, + "ce_orig": 0.3094598948955536, + "epoch": 0, + "kl_loss": 1816.435302734375, + "loss_ib": 18.226226806640625, + "step": 0 + }, + { + "ce_ib": 65.33805084228516, + "ce_orig": 1.0820972919464111, + "epoch": 0, + "kl_loss": 4051.13818359375, + "loss_ib": 40.576717376708984, + "step": 0 + }, + { + "ce_ib": 65.36083221435547, + "ce_orig": 0.8601827025413513, + "epoch": 0, + "kl_loss": 3727.80126953125, + "loss_ib": 37.3433723449707, + "step": 0 + }, + { + "ce_ib": 64.40461730957031, + "ce_orig": 1.3601988554000854, + "epoch": 0.00028758357897764035, + "kl_loss": 3548.660888671875, + "loss_ib": 35.5510139465332, + "step": 1 + }, + { + "ce_ib": 66.136474609375, + "ce_orig": 0.9451982975006104, + "epoch": 0.00028758357897764035, + "kl_loss": 4003.119140625, + "loss_ib": 40.097328186035156, + "step": 1 + }, + { + "ce_ib": 65.30732727050781, + "ce_orig": 1.3611608743667603, + "epoch": 0.00028758357897764035, + "kl_loss": 3076.302490234375, + "loss_ib": 30.828330993652344, + "step": 1 + }, + { + "ce_ib": 63.613216400146484, + "ce_orig": 0.5681392550468445, + "epoch": 0.00028758357897764035, + "kl_loss": 3922.22265625, + "loss_ib": 39.28583908081055, + "step": 1 + }, + { + "ce_ib": 65.20169067382812, + "ce_orig": 0.9869711399078369, + "epoch": 0.0005751671579552807, + "kl_loss": 4010.333251953125, + "loss_ib": 40.16853332519531, + "step": 2 + }, + { + "ce_ib": 64.6613540649414, + "ce_orig": 1.0124142169952393, + "epoch": 0.0005751671579552807, + "kl_loss": 3416.4658203125, + "loss_ib": 34.22931671142578, + "step": 2 + }, + { + "ce_ib": 64.3924560546875, + "ce_orig": 0.825140118598938, + "epoch": 0.0005751671579552807, + "kl_loss": 3954.5244140625, + "loss_ib": 39.60963439941406, + "step": 2 + }, + { + "ce_ib": 66.31563568115234, + "ce_orig": 1.6114795207977295, + "epoch": 0.0005751671579552807, + "kl_loss": 3360.53955078125, + "loss_ib": 33.67171096801758, + "step": 2 + }, + { + "ce_ib": 63.97846603393555, + "ce_orig": 1.0248628854751587, + "epoch": 0.0008627507369329212, + "kl_loss": 3866.74462890625, + "loss_ib": 38.73142623901367, + "step": 3 + }, + { + "ce_ib": 64.94669342041016, + "ce_orig": 0.7158174514770508, + "epoch": 0.0008627507369329212, + "kl_loss": 3586.52783203125, + "loss_ib": 35.93022537231445, + "step": 3 + }, + { + "ce_ib": 66.78568267822266, + "ce_orig": 1.1728931665420532, + "epoch": 0.0008627507369329212, + "kl_loss": 3981.269775390625, + "loss_ib": 39.87948226928711, + "step": 3 + }, + { + "ce_ib": 66.30445861816406, + "ce_orig": 0.9273799657821655, + "epoch": 0.0008627507369329212, + "kl_loss": 3999.728271484375, + "loss_ib": 40.0635871887207, + "step": 3 + }, + { + "ce_ib": 63.22294616699219, + "ce_orig": 0.6721798181533813, + "epoch": 0.0011503343159105614, + "kl_loss": 3434.2626953125, + "loss_ib": 34.40584945678711, + "step": 4 + }, + { + "ce_ib": 65.629150390625, + "ce_orig": 0.851636528968811, + "epoch": 0.0011503343159105614, + "kl_loss": 3777.80029296875, + "loss_ib": 37.843631744384766, + "step": 4 + }, + { + "ce_ib": 65.70416259765625, + "ce_orig": 0.8407150506973267, + "epoch": 0.0011503343159105614, + "kl_loss": 3663.44775390625, + "loss_ib": 36.70018005371094, + "step": 4 + }, + { + "ce_ib": 65.25149536132812, + "ce_orig": 0.8431562781333923, + "epoch": 0.0011503343159105614, + "kl_loss": 4073.102783203125, + "loss_ib": 40.79627990722656, + "step": 4 + }, + { + "epoch": 0.0014379178948882019, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 37.6651, + "step": 5 + }, + { + "ce_ib": 63.31033706665039, + "ce_orig": 0.5193647146224976, + "epoch": 0.0014379178948882019, + "kl_loss": 3829.75732421875, + "loss_ib": 38.36088180541992, + "step": 5 + }, + { + "ce_ib": 64.82113647460938, + "ce_orig": 0.9080048203468323, + "epoch": 0.0014379178948882019, + "kl_loss": 4034.60400390625, + "loss_ib": 40.41086196899414, + "step": 5 + }, + { + "ce_ib": 67.75746154785156, + "ce_orig": 1.7583141326904297, + "epoch": 0.0014379178948882019, + "kl_loss": 3362.895751953125, + "loss_ib": 33.696712493896484, + "step": 5 + }, + { + "ce_ib": 65.55052947998047, + "ce_orig": 1.0019645690917969, + "epoch": 0.0014379178948882019, + "kl_loss": 3561.7119140625, + "loss_ib": 35.68266677856445, + "step": 5 + }, + { + "ce_ib": 65.5093765258789, + "ce_orig": 1.2022827863693237, + "epoch": 0.0017255014738658423, + "kl_loss": 3854.793212890625, + "loss_ib": 38.613441467285156, + "step": 6 + }, + { + "ce_ib": 63.95633316040039, + "ce_orig": 0.5561846494674683, + "epoch": 0.0017255014738658423, + "kl_loss": 3231.163818359375, + "loss_ib": 32.37559509277344, + "step": 6 + }, + { + "ce_ib": 66.91143798828125, + "ce_orig": 1.007911205291748, + "epoch": 0.0017255014738658423, + "kl_loss": 3694.936767578125, + "loss_ib": 37.01627731323242, + "step": 6 + }, + { + "ce_ib": 65.86326599121094, + "ce_orig": 1.1325939893722534, + "epoch": 0.0017255014738658423, + "kl_loss": 3653.87255859375, + "loss_ib": 36.60458755493164, + "step": 6 + }, + { + "ce_ib": 61.932804107666016, + "ce_orig": 0.3588312268257141, + "epoch": 0.0020130850528434826, + "kl_loss": 2617.568359375, + "loss_ib": 26.23761749267578, + "step": 7 + }, + { + "ce_ib": 66.4891586303711, + "ce_orig": 0.9551964402198792, + "epoch": 0.0020130850528434826, + "kl_loss": 4009.619140625, + "loss_ib": 40.162681579589844, + "step": 7 + }, + { + "ce_ib": 64.68766021728516, + "ce_orig": 1.3480956554412842, + "epoch": 0.0020130850528434826, + "kl_loss": 3682.406494140625, + "loss_ib": 36.88875198364258, + "step": 7 + }, + { + "ce_ib": 65.71851348876953, + "ce_orig": 1.4119411706924438, + "epoch": 0.0020130850528434826, + "kl_loss": 3544.10595703125, + "loss_ib": 35.50677490234375, + "step": 7 + }, + { + "ce_ib": 64.80267333984375, + "ce_orig": 1.1264560222625732, + "epoch": 0.002300668631821123, + "kl_loss": 3803.631103515625, + "loss_ib": 38.101112365722656, + "step": 8 + }, + { + "ce_ib": 64.57341766357422, + "ce_orig": 0.8282275199890137, + "epoch": 0.002300668631821123, + "kl_loss": 4064.74267578125, + "loss_ib": 40.711997985839844, + "step": 8 + }, + { + "ce_ib": 64.71014404296875, + "ce_orig": 0.8245378732681274, + "epoch": 0.002300668631821123, + "kl_loss": 3696.43896484375, + "loss_ib": 37.02909851074219, + "step": 8 + }, + { + "ce_ib": 66.23856353759766, + "ce_orig": 0.7464695572853088, + "epoch": 0.002300668631821123, + "kl_loss": 3910.202880859375, + "loss_ib": 39.16826629638672, + "step": 8 + }, + { + "ce_ib": 66.02950286865234, + "ce_orig": 1.2234686613082886, + "epoch": 0.0025882522107987635, + "kl_loss": 3270.403076171875, + "loss_ib": 32.77006149291992, + "step": 9 + }, + { + "ce_ib": 61.797386169433594, + "ce_orig": 0.6015214920043945, + "epoch": 0.0025882522107987635, + "kl_loss": 3816.387939453125, + "loss_ib": 38.22567367553711, + "step": 9 + }, + { + "ce_ib": 61.85765075683594, + "ce_orig": 0.6827896237373352, + "epoch": 0.0025882522107987635, + "kl_loss": 3886.591064453125, + "loss_ib": 38.92776870727539, + "step": 9 + }, + { + "ce_ib": 66.08187866210938, + "ce_orig": 1.3109632730484009, + "epoch": 0.0025882522107987635, + "kl_loss": 3950.779541015625, + "loss_ib": 39.573875427246094, + "step": 9 + }, + { + "epoch": 0.0028758357897764038, + "grad_norm": 519.0091552734375, + "learning_rate": 1.2738853503184715e-07, + "loss": 37.7545, + "step": 10 + }, + { + "ce_ib": 64.04639434814453, + "ce_orig": 0.7621712684631348, + "epoch": 0.0028758357897764038, + "kl_loss": 3556.8876953125, + "loss_ib": 35.6329231262207, + "step": 10 + }, + { + "ce_ib": 68.0383071899414, + "ce_orig": 1.6497186422348022, + "epoch": 0.0028758357897764038, + "kl_loss": 3772.04345703125, + "loss_ib": 37.78847122192383, + "step": 10 + }, + { + "ce_ib": 68.69857025146484, + "ce_orig": 1.7943047285079956, + "epoch": 0.0028758357897764038, + "kl_loss": 3361.59521484375, + "loss_ib": 33.68465042114258, + "step": 10 + }, + { + "ce_ib": 66.5051040649414, + "ce_orig": 0.9888308644294739, + "epoch": 0.0028758357897764038, + "kl_loss": 3659.6396484375, + "loss_ib": 36.662899017333984, + "step": 10 + }, + { + "ce_ib": 69.01343536376953, + "ce_orig": 1.8538011312484741, + "epoch": 0.003163419368754044, + "kl_loss": 3817.212158203125, + "loss_ib": 38.24113464355469, + "step": 11 + }, + { + "ce_ib": 66.35260772705078, + "ce_orig": 1.5063494443893433, + "epoch": 0.003163419368754044, + "kl_loss": 3289.161376953125, + "loss_ib": 32.95796585083008, + "step": 11 + }, + { + "ce_ib": 63.46610641479492, + "ce_orig": 0.9150936007499695, + "epoch": 0.003163419368754044, + "kl_loss": 3871.0595703125, + "loss_ib": 38.77406311035156, + "step": 11 + }, + { + "ce_ib": 65.04010009765625, + "ce_orig": 0.8206998705863953, + "epoch": 0.003163419368754044, + "kl_loss": 1842.705322265625, + "loss_ib": 18.492094039916992, + "step": 11 + }, + { + "ce_ib": 64.97047424316406, + "ce_orig": 1.1343697309494019, + "epoch": 0.0034510029477316847, + "kl_loss": 3761.435546875, + "loss_ib": 37.679325103759766, + "step": 12 + }, + { + "ce_ib": 65.20761108398438, + "ce_orig": 0.8448718190193176, + "epoch": 0.0034510029477316847, + "kl_loss": 3675.133544921875, + "loss_ib": 36.81654357910156, + "step": 12 + }, + { + "ce_ib": 61.798465728759766, + "ce_orig": 0.8912767767906189, + "epoch": 0.0034510029477316847, + "kl_loss": 3862.031982421875, + "loss_ib": 38.6821174621582, + "step": 12 + }, + { + "ce_ib": 62.26899337768555, + "ce_orig": 0.6894794702529907, + "epoch": 0.0034510029477316847, + "kl_loss": 3852.8837890625, + "loss_ib": 38.59110641479492, + "step": 12 + }, + { + "ce_ib": 62.82128143310547, + "ce_orig": 0.8209494948387146, + "epoch": 0.003738586526709325, + "kl_loss": 4055.579345703125, + "loss_ib": 40.618614196777344, + "step": 13 + }, + { + "ce_ib": 63.56863021850586, + "ce_orig": 0.6192977428436279, + "epoch": 0.003738586526709325, + "kl_loss": 3213.953369140625, + "loss_ib": 32.203102111816406, + "step": 13 + }, + { + "ce_ib": 63.4449348449707, + "ce_orig": 0.8305644392967224, + "epoch": 0.003738586526709325, + "kl_loss": 4162.50732421875, + "loss_ib": 41.68851852416992, + "step": 13 + }, + { + "ce_ib": 63.81005859375, + "ce_orig": 0.63532555103302, + "epoch": 0.003738586526709325, + "kl_loss": 3868.43896484375, + "loss_ib": 38.748199462890625, + "step": 13 + }, + { + "ce_ib": 63.38967514038086, + "ce_orig": 1.0747102499008179, + "epoch": 0.004026170105686965, + "kl_loss": 3797.86328125, + "loss_ib": 38.04201889038086, + "step": 14 + }, + { + "ce_ib": 64.95621490478516, + "ce_orig": 0.2551676332950592, + "epoch": 0.004026170105686965, + "kl_loss": 3229.682861328125, + "loss_ib": 32.36178207397461, + "step": 14 + }, + { + "ce_ib": 63.782310485839844, + "ce_orig": 0.9092867374420166, + "epoch": 0.004026170105686965, + "kl_loss": 4164.35009765625, + "loss_ib": 41.70728302001953, + "step": 14 + }, + { + "ce_ib": 62.4797477722168, + "ce_orig": 0.47730547189712524, + "epoch": 0.004026170105686965, + "kl_loss": 3852.087158203125, + "loss_ib": 38.583351135253906, + "step": 14 + }, + { + "epoch": 0.004313753684664605, + "grad_norm": 523.5825805664062, + "learning_rate": 2.8662420382165606e-07, + "loss": 37.7441, + "step": 15 + }, + { + "ce_ib": 66.0411148071289, + "ce_orig": 1.2483270168304443, + "epoch": 0.004313753684664605, + "kl_loss": 3730.42578125, + "loss_ib": 37.370296478271484, + "step": 15 + }, + { + "ce_ib": 62.428306579589844, + "ce_orig": 0.6228333711624146, + "epoch": 0.004313753684664605, + "kl_loss": 3604.6357421875, + "loss_ib": 36.108787536621094, + "step": 15 + }, + { + "ce_ib": 63.51506042480469, + "ce_orig": 1.2859349250793457, + "epoch": 0.004313753684664605, + "kl_loss": 4202.3095703125, + "loss_ib": 42.08660888671875, + "step": 15 + }, + { + "ce_ib": 63.46177291870117, + "ce_orig": 0.7081962823867798, + "epoch": 0.004313753684664605, + "kl_loss": 3794.773193359375, + "loss_ib": 38.011192321777344, + "step": 15 + }, + { + "ce_ib": 63.84330749511719, + "ce_orig": 0.7915632724761963, + "epoch": 0.004601337263642246, + "kl_loss": 3308.423828125, + "loss_ib": 33.14807891845703, + "step": 16 + }, + { + "ce_ib": 64.69558715820312, + "ce_orig": 1.4574185609817505, + "epoch": 0.004601337263642246, + "kl_loss": 3845.239990234375, + "loss_ib": 38.517093658447266, + "step": 16 + }, + { + "ce_ib": 63.32929992675781, + "ce_orig": 0.954424262046814, + "epoch": 0.004601337263642246, + "kl_loss": 3470.448486328125, + "loss_ib": 34.76781463623047, + "step": 16 + }, + { + "ce_ib": 67.40885925292969, + "ce_orig": 0.8406963348388672, + "epoch": 0.004601337263642246, + "kl_loss": 4098.966796875, + "loss_ib": 41.05707550048828, + "step": 16 + }, + { + "ce_ib": 66.77290344238281, + "ce_orig": 1.3419686555862427, + "epoch": 0.004888920842619887, + "kl_loss": 3622.17578125, + "loss_ib": 36.28852844238281, + "step": 17 + }, + { + "ce_ib": 62.80875778198242, + "ce_orig": 0.6389923691749573, + "epoch": 0.004888920842619887, + "kl_loss": 4102.7236328125, + "loss_ib": 41.09004211425781, + "step": 17 + }, + { + "ce_ib": 64.88585662841797, + "ce_orig": 1.1766713857650757, + "epoch": 0.004888920842619887, + "kl_loss": 1908.9375, + "loss_ib": 19.154260635375977, + "step": 17 + }, + { + "ce_ib": 65.38214874267578, + "ce_orig": 1.252450942993164, + "epoch": 0.004888920842619887, + "kl_loss": 3970.094482421875, + "loss_ib": 39.766326904296875, + "step": 17 + }, + { + "ce_ib": 64.7368392944336, + "ce_orig": 1.1009352207183838, + "epoch": 0.005176504421597527, + "kl_loss": 3504.70361328125, + "loss_ib": 35.11177062988281, + "step": 18 + }, + { + "ce_ib": 62.09738540649414, + "ce_orig": 0.480591744184494, + "epoch": 0.005176504421597527, + "kl_loss": 3198.1953125, + "loss_ib": 32.04404830932617, + "step": 18 + }, + { + "ce_ib": 65.45724487304688, + "ce_orig": 0.8052865266799927, + "epoch": 0.005176504421597527, + "kl_loss": 3857.419189453125, + "loss_ib": 38.6396484375, + "step": 18 + }, + { + "ce_ib": 65.82563781738281, + "ce_orig": 1.2099261283874512, + "epoch": 0.005176504421597527, + "kl_loss": 3628.470703125, + "loss_ib": 36.35053253173828, + "step": 18 + }, + { + "ce_ib": 64.87178802490234, + "ce_orig": 1.0739271640777588, + "epoch": 0.005464088000575167, + "kl_loss": 3784.338623046875, + "loss_ib": 37.90825653076172, + "step": 19 + }, + { + "ce_ib": 66.35687255859375, + "ce_orig": 1.3444932699203491, + "epoch": 0.005464088000575167, + "kl_loss": 3339.950927734375, + "loss_ib": 33.46586608886719, + "step": 19 + }, + { + "ce_ib": 67.06342315673828, + "ce_orig": 0.8359652757644653, + "epoch": 0.005464088000575167, + "kl_loss": 4230.32666015625, + "loss_ib": 42.370330810546875, + "step": 19 + }, + { + "ce_ib": 67.96249389648438, + "ce_orig": 1.7302289009094238, + "epoch": 0.005464088000575167, + "kl_loss": 3579.07421875, + "loss_ib": 35.85870361328125, + "step": 19 + }, + { + "epoch": 0.0057516715795528075, + "grad_norm": 525.4837036132812, + "learning_rate": 4.45859872611465e-07, + "loss": 38.2435, + "step": 20 + }, + { + "ce_ib": 64.20391082763672, + "ce_orig": 0.7589595913887024, + "epoch": 0.0057516715795528075, + "kl_loss": 4128.66064453125, + "loss_ib": 41.35081100463867, + "step": 20 + }, + { + "ce_ib": 62.830806732177734, + "ce_orig": 0.6316859126091003, + "epoch": 0.0057516715795528075, + "kl_loss": 3403.72509765625, + "loss_ib": 34.10007858276367, + "step": 20 + }, + { + "ce_ib": 65.20977783203125, + "ce_orig": 0.8842067718505859, + "epoch": 0.0057516715795528075, + "kl_loss": 4221.73095703125, + "loss_ib": 42.28252029418945, + "step": 20 + }, + { + "ce_ib": 63.80289077758789, + "ce_orig": 1.1217824220657349, + "epoch": 0.0057516715795528075, + "kl_loss": 3923.58349609375, + "loss_ib": 39.29963684082031, + "step": 20 + }, + { + "ce_ib": 63.151161193847656, + "ce_orig": 0.725497305393219, + "epoch": 0.006039255158530448, + "kl_loss": 4119.48046875, + "loss_ib": 41.25795364379883, + "step": 21 + }, + { + "ce_ib": 62.518638610839844, + "ce_orig": 0.6902149319648743, + "epoch": 0.006039255158530448, + "kl_loss": 3765.32373046875, + "loss_ib": 37.715755462646484, + "step": 21 + }, + { + "ce_ib": 64.55998229980469, + "ce_orig": 1.0123672485351562, + "epoch": 0.006039255158530448, + "kl_loss": 3793.22509765625, + "loss_ib": 37.99681091308594, + "step": 21 + }, + { + "ce_ib": 61.955604553222656, + "ce_orig": 0.528033971786499, + "epoch": 0.006039255158530448, + "kl_loss": 3723.4833984375, + "loss_ib": 37.29678726196289, + "step": 21 + }, + { + "ce_ib": 62.95112609863281, + "ce_orig": 0.7799142003059387, + "epoch": 0.006326838737508088, + "kl_loss": 3707.60546875, + "loss_ib": 37.13900375366211, + "step": 22 + }, + { + "ce_ib": 63.943023681640625, + "ce_orig": 0.836663544178009, + "epoch": 0.006326838737508088, + "kl_loss": 3695.3837890625, + "loss_ib": 37.01778030395508, + "step": 22 + }, + { + "ce_ib": 61.96399688720703, + "ce_orig": 0.5541026592254639, + "epoch": 0.006326838737508088, + "kl_loss": 3717.060302734375, + "loss_ib": 37.232566833496094, + "step": 22 + }, + { + "ce_ib": 65.02377319335938, + "ce_orig": 1.0211303234100342, + "epoch": 0.006326838737508088, + "kl_loss": 3948.7412109375, + "loss_ib": 39.55243682861328, + "step": 22 + }, + { + "ce_ib": 64.2979736328125, + "ce_orig": 1.0038220882415771, + "epoch": 0.006614422316485728, + "kl_loss": 3556.190185546875, + "loss_ib": 35.626197814941406, + "step": 23 + }, + { + "ce_ib": 64.74838256835938, + "ce_orig": 1.4320262670516968, + "epoch": 0.006614422316485728, + "kl_loss": 3511.77587890625, + "loss_ib": 35.1825065612793, + "step": 23 + }, + { + "ce_ib": 66.12266540527344, + "ce_orig": 1.3454687595367432, + "epoch": 0.006614422316485728, + "kl_loss": 3485.62158203125, + "loss_ib": 34.92233657836914, + "step": 23 + }, + { + "ce_ib": 65.04991912841797, + "ce_orig": 1.1041706800460815, + "epoch": 0.006614422316485728, + "kl_loss": 3322.958984375, + "loss_ib": 33.294639587402344, + "step": 23 + }, + { + "ce_ib": 63.05337905883789, + "ce_orig": 0.8803771734237671, + "epoch": 0.006902005895463369, + "kl_loss": 4095.29345703125, + "loss_ib": 41.01598358154297, + "step": 24 + }, + { + "ce_ib": 62.59025192260742, + "ce_orig": 0.5355072021484375, + "epoch": 0.006902005895463369, + "kl_loss": 2086.240478515625, + "loss_ib": 20.92499542236328, + "step": 24 + }, + { + "ce_ib": 63.184295654296875, + "ce_orig": 0.7724276781082153, + "epoch": 0.006902005895463369, + "kl_loss": 4004.15185546875, + "loss_ib": 40.10470199584961, + "step": 24 + }, + { + "ce_ib": 65.41134643554688, + "ce_orig": 0.9222034811973572, + "epoch": 0.006902005895463369, + "kl_loss": 4123.5751953125, + "loss_ib": 41.30116271972656, + "step": 24 + }, + { + "epoch": 0.00718958947444101, + "grad_norm": 504.01654052734375, + "learning_rate": 6.05095541401274e-07, + "loss": 37.9487, + "step": 25 + }, + { + "ce_ib": 65.42027282714844, + "ce_orig": 1.304446816444397, + "epoch": 0.00718958947444101, + "kl_loss": 3540.853515625, + "loss_ib": 35.47395324707031, + "step": 25 + }, + { + "ce_ib": 61.20964050292969, + "ce_orig": 0.4327137768268585, + "epoch": 0.00718958947444101, + "kl_loss": 3690.677734375, + "loss_ib": 36.96798324584961, + "step": 25 + }, + { + "ce_ib": 64.6102066040039, + "ce_orig": 1.0307230949401855, + "epoch": 0.00718958947444101, + "kl_loss": 4015.37060546875, + "loss_ib": 40.21831512451172, + "step": 25 + }, + { + "ce_ib": 64.76322174072266, + "ce_orig": 0.9839794039726257, + "epoch": 0.00718958947444101, + "kl_loss": 4032.43212890625, + "loss_ib": 40.38908386230469, + "step": 25 + }, + { + "ce_ib": 65.33113098144531, + "ce_orig": 1.4617711305618286, + "epoch": 0.00747717305341865, + "kl_loss": 3820.6474609375, + "loss_ib": 38.27180480957031, + "step": 26 + }, + { + "ce_ib": 65.57540130615234, + "ce_orig": 0.7502631545066833, + "epoch": 0.00747717305341865, + "kl_loss": 3953.942138671875, + "loss_ib": 39.60499572753906, + "step": 26 + }, + { + "ce_ib": 66.47959899902344, + "ce_orig": 1.3454749584197998, + "epoch": 0.00747717305341865, + "kl_loss": 3569.14794921875, + "loss_ib": 35.757957458496094, + "step": 26 + }, + { + "ce_ib": 66.15406799316406, + "ce_orig": 1.0591317415237427, + "epoch": 0.00747717305341865, + "kl_loss": 3647.95068359375, + "loss_ib": 36.54566192626953, + "step": 26 + }, + { + "ce_ib": 61.56562042236328, + "ce_orig": 0.7531498074531555, + "epoch": 0.00776475663239629, + "kl_loss": 3646.81689453125, + "loss_ib": 36.52973175048828, + "step": 27 + }, + { + "ce_ib": 63.738616943359375, + "ce_orig": 0.7576659321784973, + "epoch": 0.00776475663239629, + "kl_loss": 4090.55078125, + "loss_ib": 40.96924591064453, + "step": 27 + }, + { + "ce_ib": 63.68565368652344, + "ce_orig": 0.8695321083068848, + "epoch": 0.00776475663239629, + "kl_loss": 3782.551025390625, + "loss_ib": 37.88919448852539, + "step": 27 + }, + { + "ce_ib": 62.30582046508789, + "ce_orig": 0.5045351982116699, + "epoch": 0.00776475663239629, + "kl_loss": 3620.3427734375, + "loss_ib": 36.26573181152344, + "step": 27 + }, + { + "ce_ib": 63.34856414794922, + "ce_orig": 0.741316556930542, + "epoch": 0.00805234021137393, + "kl_loss": 4024.19580078125, + "loss_ib": 40.30530548095703, + "step": 28 + }, + { + "ce_ib": 64.60435485839844, + "ce_orig": 0.7678440809249878, + "epoch": 0.00805234021137393, + "kl_loss": 3513.181884765625, + "loss_ib": 35.1964225769043, + "step": 28 + }, + { + "ce_ib": 64.85627746582031, + "ce_orig": 1.3411056995391846, + "epoch": 0.00805234021137393, + "kl_loss": 3659.3564453125, + "loss_ib": 36.65842056274414, + "step": 28 + }, + { + "ce_ib": 68.69432830810547, + "ce_orig": 1.4908052682876587, + "epoch": 0.00805234021137393, + "kl_loss": 4179.46630859375, + "loss_ib": 41.86335754394531, + "step": 28 + }, + { + "ce_ib": 64.14762115478516, + "ce_orig": 0.7301002740859985, + "epoch": 0.008339923790351571, + "kl_loss": 4014.65771484375, + "loss_ib": 40.210723876953125, + "step": 29 + }, + { + "ce_ib": 65.77961730957031, + "ce_orig": 1.078151822090149, + "epoch": 0.008339923790351571, + "kl_loss": 3336.4443359375, + "loss_ib": 33.43022155761719, + "step": 29 + }, + { + "ce_ib": 62.39012908935547, + "ce_orig": 1.5332895517349243, + "epoch": 0.008339923790351571, + "kl_loss": 4004.0390625, + "loss_ib": 40.102779388427734, + "step": 29 + }, + { + "ce_ib": 63.807186126708984, + "ce_orig": 0.9249582886695862, + "epoch": 0.008339923790351571, + "kl_loss": 3553.84716796875, + "loss_ib": 35.60227584838867, + "step": 29 + }, + { + "epoch": 0.00862750736932921, + "grad_norm": 537.8850708007812, + "learning_rate": 7.643312101910829e-07, + "loss": 38.6273, + "step": 30 + }, + { + "ce_ib": 64.0005111694336, + "ce_orig": 0.8134416937828064, + "epoch": 0.00862750736932921, + "kl_loss": 3766.99658203125, + "loss_ib": 37.73396682739258, + "step": 30 + }, + { + "ce_ib": 65.6531982421875, + "ce_orig": 1.2596931457519531, + "epoch": 0.00862750736932921, + "kl_loss": 3756.150146484375, + "loss_ib": 37.62715530395508, + "step": 30 + }, + { + "ce_ib": 66.32474517822266, + "ce_orig": 1.5833230018615723, + "epoch": 0.00862750736932921, + "kl_loss": 3616.305419921875, + "loss_ib": 36.22937774658203, + "step": 30 + }, + { + "ce_ib": 68.49303436279297, + "ce_orig": 1.2524874210357666, + "epoch": 0.00862750736932921, + "kl_loss": 3675.2001953125, + "loss_ib": 36.82049560546875, + "step": 30 + }, + { + "ce_ib": 66.44476318359375, + "ce_orig": 1.3207565546035767, + "epoch": 0.008915090948306852, + "kl_loss": 3776.052734375, + "loss_ib": 37.82697296142578, + "step": 31 + }, + { + "ce_ib": 66.1202163696289, + "ce_orig": 1.7769383192062378, + "epoch": 0.008915090948306852, + "kl_loss": 3872.0908203125, + "loss_ib": 38.78702926635742, + "step": 31 + }, + { + "ce_ib": 64.21722412109375, + "ce_orig": 1.2050706148147583, + "epoch": 0.008915090948306852, + "kl_loss": 3775.35009765625, + "loss_ib": 37.81771469116211, + "step": 31 + }, + { + "ce_ib": 64.00657653808594, + "ce_orig": 0.745306670665741, + "epoch": 0.008915090948306852, + "kl_loss": 4056.444580078125, + "loss_ib": 40.62845230102539, + "step": 31 + }, + { + "ce_ib": 64.75992584228516, + "ce_orig": 0.9638186097145081, + "epoch": 0.009202674527284491, + "kl_loss": 3975.2265625, + "loss_ib": 39.81702423095703, + "step": 32 + }, + { + "ce_ib": 63.76476287841797, + "ce_orig": 0.7001180052757263, + "epoch": 0.009202674527284491, + "kl_loss": 3708.013671875, + "loss_ib": 37.14390182495117, + "step": 32 + }, + { + "ce_ib": 62.59078598022461, + "ce_orig": 0.581017255783081, + "epoch": 0.009202674527284491, + "kl_loss": 4050.337890625, + "loss_ib": 40.56596755981445, + "step": 32 + }, + { + "ce_ib": 62.47100830078125, + "ce_orig": 0.4765642583370209, + "epoch": 0.009202674527284491, + "kl_loss": 3852.07666015625, + "loss_ib": 38.58323669433594, + "step": 32 + }, + { + "ce_ib": 66.74118041992188, + "ce_orig": 1.0750036239624023, + "epoch": 0.009490258106262132, + "kl_loss": 3840.857177734375, + "loss_ib": 38.475311279296875, + "step": 33 + }, + { + "ce_ib": 62.256229400634766, + "ce_orig": 1.111011028289795, + "epoch": 0.009490258106262132, + "kl_loss": 3756.697265625, + "loss_ib": 37.62922668457031, + "step": 33 + }, + { + "ce_ib": 60.74306869506836, + "ce_orig": 0.322427362203598, + "epoch": 0.009490258106262132, + "kl_loss": 3481.74169921875, + "loss_ib": 34.87815856933594, + "step": 33 + }, + { + "ce_ib": 63.85698318481445, + "ce_orig": 1.245665192604065, + "epoch": 0.009490258106262132, + "kl_loss": 3702.04638671875, + "loss_ib": 37.084320068359375, + "step": 33 + }, + { + "ce_ib": 64.15027618408203, + "ce_orig": 0.7340657114982605, + "epoch": 0.009777841685239774, + "kl_loss": 2706.0263671875, + "loss_ib": 27.124412536621094, + "step": 34 + }, + { + "ce_ib": 62.855018615722656, + "ce_orig": 1.0289608240127563, + "epoch": 0.009777841685239774, + "kl_loss": 3802.005615234375, + "loss_ib": 38.082908630371094, + "step": 34 + }, + { + "ce_ib": 63.055484771728516, + "ce_orig": 0.7458648681640625, + "epoch": 0.009777841685239774, + "kl_loss": 3990.32861328125, + "loss_ib": 39.96634292602539, + "step": 34 + }, + { + "ce_ib": 61.685733795166016, + "ce_orig": 0.4432576894760132, + "epoch": 0.009777841685239774, + "kl_loss": 2863.019775390625, + "loss_ib": 28.69188117980957, + "step": 34 + }, + { + "epoch": 0.010065425264217413, + "grad_norm": 523.54052734375, + "learning_rate": 9.235668789808917e-07, + "loss": 37.7138, + "step": 35 + }, + { + "ce_ib": 63.075679779052734, + "ce_orig": 0.7427234053611755, + "epoch": 0.010065425264217413, + "kl_loss": 3800.17724609375, + "loss_ib": 38.06484603881836, + "step": 35 + }, + { + "ce_ib": 66.27129364013672, + "ce_orig": 1.4802910089492798, + "epoch": 0.010065425264217413, + "kl_loss": 3349.9755859375, + "loss_ib": 33.5660285949707, + "step": 35 + }, + { + "ce_ib": 62.79461669921875, + "ce_orig": 0.8912234902381897, + "epoch": 0.010065425264217413, + "kl_loss": 4039.09765625, + "loss_ib": 40.45376968383789, + "step": 35 + }, + { + "ce_ib": 61.9300537109375, + "ce_orig": 0.6817716360092163, + "epoch": 0.010065425264217413, + "kl_loss": 3835.2861328125, + "loss_ib": 38.414791107177734, + "step": 35 + }, + { + "ce_ib": 63.25111770629883, + "ce_orig": 0.9492425918579102, + "epoch": 0.010353008843195054, + "kl_loss": 3599.0205078125, + "loss_ib": 36.0534553527832, + "step": 36 + }, + { + "ce_ib": 64.83221435546875, + "ce_orig": 1.1269358396530151, + "epoch": 0.010353008843195054, + "kl_loss": 3825.91796875, + "loss_ib": 38.32400894165039, + "step": 36 + }, + { + "ce_ib": 63.47658920288086, + "ce_orig": 0.7525137662887573, + "epoch": 0.010353008843195054, + "kl_loss": 3816.482421875, + "loss_ib": 38.22829818725586, + "step": 36 + }, + { + "ce_ib": 67.63275909423828, + "ce_orig": 1.4331247806549072, + "epoch": 0.010353008843195054, + "kl_loss": 3869.70654296875, + "loss_ib": 38.76469802856445, + "step": 36 + }, + { + "ce_ib": 62.58089065551758, + "ce_orig": 0.6857898235321045, + "epoch": 0.010640592422172693, + "kl_loss": 3316.5986328125, + "loss_ib": 33.22856521606445, + "step": 37 + }, + { + "ce_ib": 66.71737670898438, + "ce_orig": 1.6872270107269287, + "epoch": 0.010640592422172693, + "kl_loss": 3741.76953125, + "loss_ib": 37.48440933227539, + "step": 37 + }, + { + "ce_ib": 64.51302337646484, + "ce_orig": 1.0037118196487427, + "epoch": 0.010640592422172693, + "kl_loss": 4072.7705078125, + "loss_ib": 40.79221725463867, + "step": 37 + }, + { + "ce_ib": 64.44024658203125, + "ce_orig": 1.0666587352752686, + "epoch": 0.010640592422172693, + "kl_loss": 3476.2373046875, + "loss_ib": 34.826812744140625, + "step": 37 + }, + { + "ce_ib": 63.56709289550781, + "ce_orig": 0.6757309436798096, + "epoch": 0.010928176001150335, + "kl_loss": 3920.34814453125, + "loss_ib": 39.26704788208008, + "step": 38 + }, + { + "ce_ib": 66.18359375, + "ce_orig": 1.389379620552063, + "epoch": 0.010928176001150335, + "kl_loss": 3573.64013671875, + "loss_ib": 35.80258560180664, + "step": 38 + }, + { + "ce_ib": 65.27085876464844, + "ce_orig": 0.9928706884384155, + "epoch": 0.010928176001150335, + "kl_loss": 3934.83349609375, + "loss_ib": 39.413604736328125, + "step": 38 + }, + { + "ce_ib": 62.60868453979492, + "ce_orig": 0.5065615773200989, + "epoch": 0.010928176001150335, + "kl_loss": 3360.3466796875, + "loss_ib": 33.66607666015625, + "step": 38 + }, + { + "ce_ib": 63.16704177856445, + "ce_orig": 0.6447534561157227, + "epoch": 0.011215759580127974, + "kl_loss": 3981.157470703125, + "loss_ib": 39.87474060058594, + "step": 39 + }, + { + "ce_ib": 66.88977813720703, + "ce_orig": 1.1577696800231934, + "epoch": 0.011215759580127974, + "kl_loss": 3895.958984375, + "loss_ib": 39.0264778137207, + "step": 39 + }, + { + "ce_ib": 66.50093841552734, + "ce_orig": 1.4465612173080444, + "epoch": 0.011215759580127974, + "kl_loss": 3656.223388671875, + "loss_ib": 36.62873458862305, + "step": 39 + }, + { + "ce_ib": 63.415382385253906, + "ce_orig": 0.7691327929496765, + "epoch": 0.011215759580127974, + "kl_loss": 4074.531005859375, + "loss_ib": 40.80872344970703, + "step": 39 + }, + { + "epoch": 0.011503343159105615, + "grad_norm": 543.6448364257812, + "learning_rate": 1.0828025477707007e-06, + "loss": 38.3393, + "step": 40 + }, + { + "ce_ib": 68.20819091796875, + "ce_orig": 1.7859582901000977, + "epoch": 0.011503343159105615, + "kl_loss": 3490.180908203125, + "loss_ib": 34.97001647949219, + "step": 40 + }, + { + "ce_ib": 63.85101318359375, + "ce_orig": 0.798017144203186, + "epoch": 0.011503343159105615, + "kl_loss": 3664.87158203125, + "loss_ib": 36.71256637573242, + "step": 40 + }, + { + "ce_ib": 65.26078033447266, + "ce_orig": 1.3482457399368286, + "epoch": 0.011503343159105615, + "kl_loss": 3666.393310546875, + "loss_ib": 36.72919464111328, + "step": 40 + }, + { + "ce_ib": 64.7423324584961, + "ce_orig": 1.047332763671875, + "epoch": 0.011503343159105615, + "kl_loss": 3898.302734375, + "loss_ib": 39.047767639160156, + "step": 40 + }, + { + "ce_ib": 63.11514663696289, + "ce_orig": 0.707227349281311, + "epoch": 0.011790926738083256, + "kl_loss": 3996.748046875, + "loss_ib": 40.03059387207031, + "step": 41 + }, + { + "ce_ib": 62.858245849609375, + "ce_orig": 0.7572628259658813, + "epoch": 0.011790926738083256, + "kl_loss": 3845.11328125, + "loss_ib": 38.51399230957031, + "step": 41 + }, + { + "ce_ib": 64.94149780273438, + "ce_orig": 0.9001584649085999, + "epoch": 0.011790926738083256, + "kl_loss": 3669.0107421875, + "loss_ib": 36.75504684448242, + "step": 41 + }, + { + "ce_ib": 64.22615051269531, + "ce_orig": 0.9319191575050354, + "epoch": 0.011790926738083256, + "kl_loss": 3406.089599609375, + "loss_ib": 34.1251220703125, + "step": 41 + }, + { + "ce_ib": 66.28656005859375, + "ce_orig": 1.7123758792877197, + "epoch": 0.012078510317060896, + "kl_loss": 3858.2216796875, + "loss_ib": 38.648502349853516, + "step": 42 + }, + { + "ce_ib": 63.06233215332031, + "ce_orig": 0.7600352764129639, + "epoch": 0.012078510317060896, + "kl_loss": 3817.84375, + "loss_ib": 38.24149703979492, + "step": 42 + }, + { + "ce_ib": 62.57767868041992, + "ce_orig": 0.9215527772903442, + "epoch": 0.012078510317060896, + "kl_loss": 3511.0380859375, + "loss_ib": 35.17295837402344, + "step": 42 + }, + { + "ce_ib": 64.01197814941406, + "ce_orig": 0.6495408415794373, + "epoch": 0.012078510317060896, + "kl_loss": 3883.279541015625, + "loss_ib": 38.89680480957031, + "step": 42 + }, + { + "ce_ib": 63.950992584228516, + "ce_orig": 0.8470758199691772, + "epoch": 0.012366093896038537, + "kl_loss": 3836.8623046875, + "loss_ib": 38.43257141113281, + "step": 43 + }, + { + "ce_ib": 66.46541595458984, + "ce_orig": 1.070137619972229, + "epoch": 0.012366093896038537, + "kl_loss": 3574.319580078125, + "loss_ib": 35.80965805053711, + "step": 43 + }, + { + "ce_ib": 61.230316162109375, + "ce_orig": 0.6914916634559631, + "epoch": 0.012366093896038537, + "kl_loss": 4103.490234375, + "loss_ib": 41.09613037109375, + "step": 43 + }, + { + "ce_ib": 65.71780395507812, + "ce_orig": 1.2423909902572632, + "epoch": 0.012366093896038537, + "kl_loss": 3505.72607421875, + "loss_ib": 35.12297821044922, + "step": 43 + }, + { + "ce_ib": 63.5611572265625, + "ce_orig": 0.9509873986244202, + "epoch": 0.012653677475016176, + "kl_loss": 3528.08349609375, + "loss_ib": 35.34439468383789, + "step": 44 + }, + { + "ce_ib": 61.90439987182617, + "ce_orig": 1.0406547784805298, + "epoch": 0.012653677475016176, + "kl_loss": 3284.989501953125, + "loss_ib": 32.911800384521484, + "step": 44 + }, + { + "ce_ib": 62.566444396972656, + "ce_orig": 0.9737301468849182, + "epoch": 0.012653677475016176, + "kl_loss": 4007.912841796875, + "loss_ib": 40.141693115234375, + "step": 44 + }, + { + "ce_ib": 62.55556869506836, + "ce_orig": 0.9186174273490906, + "epoch": 0.012653677475016176, + "kl_loss": 3678.3505859375, + "loss_ib": 36.84606170654297, + "step": 44 + }, + { + "epoch": 0.012941261053993817, + "grad_norm": 492.6309509277344, + "learning_rate": 1.2420382165605097e-06, + "loss": 37.2694, + "step": 45 + }, + { + "ce_ib": 63.57815170288086, + "ce_orig": 1.3040772676467896, + "epoch": 0.012941261053993817, + "kl_loss": 3878.02587890625, + "loss_ib": 38.84383773803711, + "step": 45 + }, + { + "ce_ib": 61.98274230957031, + "ce_orig": 0.6795246601104736, + "epoch": 0.012941261053993817, + "kl_loss": 3593.56640625, + "loss_ib": 35.99764633178711, + "step": 45 + }, + { + "ce_ib": 63.4442138671875, + "ce_orig": 0.9516732692718506, + "epoch": 0.012941261053993817, + "kl_loss": 3920.56982421875, + "loss_ib": 39.269142150878906, + "step": 45 + }, + { + "ce_ib": 64.35120391845703, + "ce_orig": 0.7613200545310974, + "epoch": 0.012941261053993817, + "kl_loss": 3579.57763671875, + "loss_ib": 35.86012649536133, + "step": 45 + }, + { + "ce_ib": 62.913352966308594, + "ce_orig": 1.0408005714416504, + "epoch": 0.013228844632971457, + "kl_loss": 3620.77099609375, + "loss_ib": 36.27062225341797, + "step": 46 + }, + { + "ce_ib": 63.58440399169922, + "ce_orig": 0.8055190443992615, + "epoch": 0.013228844632971457, + "kl_loss": 3386.48876953125, + "loss_ib": 33.928470611572266, + "step": 46 + }, + { + "ce_ib": 63.420753479003906, + "ce_orig": 1.1024976968765259, + "epoch": 0.013228844632971457, + "kl_loss": 3643.9453125, + "loss_ib": 36.502872467041016, + "step": 46 + }, + { + "ce_ib": 62.746665954589844, + "ce_orig": 0.7064395546913147, + "epoch": 0.013228844632971457, + "kl_loss": 3931.369873046875, + "loss_ib": 39.37644577026367, + "step": 46 + }, + { + "ce_ib": 63.00592041015625, + "ce_orig": 0.8414040803909302, + "epoch": 0.013516428211949098, + "kl_loss": 4138.1728515625, + "loss_ib": 41.44473648071289, + "step": 47 + }, + { + "ce_ib": 66.07843017578125, + "ce_orig": 0.8475580811500549, + "epoch": 0.013516428211949098, + "kl_loss": 3865.19287109375, + "loss_ib": 38.7180061340332, + "step": 47 + }, + { + "ce_ib": 63.92705535888672, + "ce_orig": 0.9875443577766418, + "epoch": 0.013516428211949098, + "kl_loss": 4158.44189453125, + "loss_ib": 41.64834213256836, + "step": 47 + }, + { + "ce_ib": 68.14057922363281, + "ce_orig": 1.756430745124817, + "epoch": 0.013516428211949098, + "kl_loss": 3738.97314453125, + "loss_ib": 37.45787048339844, + "step": 47 + }, + { + "ce_ib": 63.9988899230957, + "ce_orig": 0.8397009968757629, + "epoch": 0.013804011790926739, + "kl_loss": 3744.52294921875, + "loss_ib": 37.50922775268555, + "step": 48 + }, + { + "ce_ib": 64.73321533203125, + "ce_orig": 1.5420986413955688, + "epoch": 0.013804011790926739, + "kl_loss": 3818.59228515625, + "loss_ib": 38.25065612792969, + "step": 48 + }, + { + "ce_ib": 64.00019073486328, + "ce_orig": 0.5949701070785522, + "epoch": 0.013804011790926739, + "kl_loss": 3568.99609375, + "loss_ib": 35.75395965576172, + "step": 48 + }, + { + "ce_ib": 64.06549072265625, + "ce_orig": 1.3993630409240723, + "epoch": 0.013804011790926739, + "kl_loss": 2974.96728515625, + "loss_ib": 29.813737869262695, + "step": 48 + }, + { + "ce_ib": 64.43647766113281, + "ce_orig": 1.1406134366989136, + "epoch": 0.014091595369904378, + "kl_loss": 3809.33447265625, + "loss_ib": 38.157779693603516, + "step": 49 + }, + { + "ce_ib": 61.58470153808594, + "ce_orig": 0.76979660987854, + "epoch": 0.014091595369904378, + "kl_loss": 4037.28759765625, + "loss_ib": 40.4344596862793, + "step": 49 + }, + { + "ce_ib": 62.92927551269531, + "ce_orig": 1.3724863529205322, + "epoch": 0.014091595369904378, + "kl_loss": 3676.944580078125, + "loss_ib": 36.832374572753906, + "step": 49 + }, + { + "ce_ib": 62.875492095947266, + "ce_orig": 0.6223806142807007, + "epoch": 0.014091595369904378, + "kl_loss": 3579.248046875, + "loss_ib": 35.85535430908203, + "step": 49 + }, + { + "epoch": 0.01437917894888202, + "grad_norm": 533.30029296875, + "learning_rate": 1.4012738853503185e-06, + "loss": 37.7487, + "step": 50 + }, + { + "ce_ib": 61.585453033447266, + "ce_orig": 0.8775674104690552, + "epoch": 0.01437917894888202, + "kl_loss": 3878.78076171875, + "loss_ib": 38.84939193725586, + "step": 50 + }, + { + "ce_ib": 58.95383834838867, + "ce_orig": 0.7002028822898865, + "epoch": 0.01437917894888202, + "kl_loss": 3601.2216796875, + "loss_ib": 36.0711669921875, + "step": 50 + }, + { + "ce_ib": 62.58400344848633, + "ce_orig": 0.7227221131324768, + "epoch": 0.01437917894888202, + "kl_loss": 3643.05615234375, + "loss_ib": 36.49314498901367, + "step": 50 + }, + { + "ce_ib": 66.29362487792969, + "ce_orig": 1.0485941171646118, + "epoch": 0.01437917894888202, + "kl_loss": 3717.5888671875, + "loss_ib": 37.242183685302734, + "step": 50 + }, + { + "ce_ib": 61.591148376464844, + "ce_orig": 0.6134757995605469, + "epoch": 0.014666762527859659, + "kl_loss": 4068.88330078125, + "loss_ib": 40.750423431396484, + "step": 51 + }, + { + "ce_ib": 64.17617797851562, + "ce_orig": 1.2959325313568115, + "epoch": 0.014666762527859659, + "kl_loss": 3517.951171875, + "loss_ib": 35.24368667602539, + "step": 51 + }, + { + "ce_ib": 61.61008834838867, + "ce_orig": 0.6165804862976074, + "epoch": 0.014666762527859659, + "kl_loss": 4074.12744140625, + "loss_ib": 40.80288314819336, + "step": 51 + }, + { + "ce_ib": 63.430118560791016, + "ce_orig": 1.3455349206924438, + "epoch": 0.014666762527859659, + "kl_loss": 3925.92333984375, + "loss_ib": 39.322662353515625, + "step": 51 + }, + { + "ce_ib": 63.63911819458008, + "ce_orig": 0.813752293586731, + "epoch": 0.0149543461068373, + "kl_loss": 3411.76806640625, + "loss_ib": 34.18132019042969, + "step": 52 + }, + { + "ce_ib": 62.01913833618164, + "ce_orig": 0.9041391611099243, + "epoch": 0.0149543461068373, + "kl_loss": 3552.44091796875, + "loss_ib": 35.58642578125, + "step": 52 + }, + { + "ce_ib": 63.11591339111328, + "ce_orig": 0.8099521994590759, + "epoch": 0.0149543461068373, + "kl_loss": 3363.9013671875, + "loss_ib": 33.70212936401367, + "step": 52 + }, + { + "ce_ib": 63.846641540527344, + "ce_orig": 1.0799516439437866, + "epoch": 0.0149543461068373, + "kl_loss": 3810.20947265625, + "loss_ib": 38.16594314575195, + "step": 52 + }, + { + "ce_ib": 63.17069625854492, + "ce_orig": 1.2767831087112427, + "epoch": 0.015241929685814939, + "kl_loss": 3672.79248046875, + "loss_ib": 36.79109573364258, + "step": 53 + }, + { + "ce_ib": 60.14902877807617, + "ce_orig": 0.5526849627494812, + "epoch": 0.015241929685814939, + "kl_loss": 2977.911376953125, + "loss_ib": 29.839262008666992, + "step": 53 + }, + { + "ce_ib": 63.31485366821289, + "ce_orig": 0.7787724137306213, + "epoch": 0.015241929685814939, + "kl_loss": 3514.63232421875, + "loss_ib": 35.20963668823242, + "step": 53 + }, + { + "ce_ib": 64.56353759765625, + "ce_orig": 1.6204540729522705, + "epoch": 0.015241929685814939, + "kl_loss": 3812.165771484375, + "loss_ib": 38.18621826171875, + "step": 53 + }, + { + "ce_ib": 66.47161865234375, + "ce_orig": 1.488782525062561, + "epoch": 0.01552951326479258, + "kl_loss": 3656.353759765625, + "loss_ib": 36.630008697509766, + "step": 54 + }, + { + "ce_ib": 63.27266311645508, + "ce_orig": 1.2602483034133911, + "epoch": 0.01552951326479258, + "kl_loss": 3836.474609375, + "loss_ib": 38.428016662597656, + "step": 54 + }, + { + "ce_ib": 61.19478225708008, + "ce_orig": 0.9387843608856201, + "epoch": 0.01552951326479258, + "kl_loss": 3736.385009765625, + "loss_ib": 37.425045013427734, + "step": 54 + }, + { + "ce_ib": 62.278865814208984, + "ce_orig": 0.6224288940429688, + "epoch": 0.01552951326479258, + "kl_loss": 3810.37646484375, + "loss_ib": 38.16604232788086, + "step": 54 + }, + { + "epoch": 0.01581709684377022, + "grad_norm": 496.5709533691406, + "learning_rate": 1.5605095541401275e-06, + "loss": 37.2354, + "step": 55 + }, + { + "ce_ib": 64.26878356933594, + "ce_orig": 0.8660982847213745, + "epoch": 0.01581709684377022, + "kl_loss": 3508.06201171875, + "loss_ib": 35.1448860168457, + "step": 55 + }, + { + "ce_ib": 58.63752365112305, + "ce_orig": 0.08659573644399643, + "epoch": 0.01581709684377022, + "kl_loss": 513.92724609375, + "loss_ib": 5.197909832000732, + "step": 55 + }, + { + "ce_ib": 61.785953521728516, + "ce_orig": 0.9901527762413025, + "epoch": 0.01581709684377022, + "kl_loss": 3603.44580078125, + "loss_ib": 36.09624481201172, + "step": 55 + }, + { + "ce_ib": 64.46088409423828, + "ce_orig": 0.8389644026756287, + "epoch": 0.01581709684377022, + "kl_loss": 3921.08642578125, + "loss_ib": 39.27532196044922, + "step": 55 + }, + { + "ce_ib": 60.15986251831055, + "ce_orig": 0.8044717311859131, + "epoch": 0.01610468042274786, + "kl_loss": 3653.1875, + "loss_ib": 36.59203338623047, + "step": 56 + }, + { + "ce_ib": 62.024410247802734, + "ce_orig": 0.6691257953643799, + "epoch": 0.01610468042274786, + "kl_loss": 4127.29736328125, + "loss_ib": 41.334999084472656, + "step": 56 + }, + { + "ce_ib": 62.78162384033203, + "ce_orig": 0.9230839014053345, + "epoch": 0.01610468042274786, + "kl_loss": 3697.928955078125, + "loss_ib": 37.04206848144531, + "step": 56 + }, + { + "ce_ib": 64.67729949951172, + "ce_orig": 1.3531347513198853, + "epoch": 0.01610468042274786, + "kl_loss": 3432.21728515625, + "loss_ib": 34.38684844970703, + "step": 56 + }, + { + "ce_ib": 61.56424331665039, + "ce_orig": 0.8097767233848572, + "epoch": 0.016392264001725502, + "kl_loss": 3895.91943359375, + "loss_ib": 39.02075958251953, + "step": 57 + }, + { + "ce_ib": 62.23572540283203, + "ce_orig": 0.5408704876899719, + "epoch": 0.016392264001725502, + "kl_loss": 3602.846923828125, + "loss_ib": 36.09070587158203, + "step": 57 + }, + { + "ce_ib": 63.37240982055664, + "ce_orig": 0.7751593589782715, + "epoch": 0.016392264001725502, + "kl_loss": 3264.66650390625, + "loss_ib": 32.71003723144531, + "step": 57 + }, + { + "ce_ib": 64.54995727539062, + "ce_orig": 1.2101812362670898, + "epoch": 0.016392264001725502, + "kl_loss": 3891.133544921875, + "loss_ib": 38.97588348388672, + "step": 57 + }, + { + "ce_ib": 64.34214782714844, + "ce_orig": 1.1633491516113281, + "epoch": 0.016679847580703143, + "kl_loss": 3507.682861328125, + "loss_ib": 35.141170501708984, + "step": 58 + }, + { + "ce_ib": 64.65531921386719, + "ce_orig": 1.0707935094833374, + "epoch": 0.016679847580703143, + "kl_loss": 3560.9189453125, + "loss_ib": 35.67384338378906, + "step": 58 + }, + { + "ce_ib": 62.398475646972656, + "ce_orig": 1.028975009918213, + "epoch": 0.016679847580703143, + "kl_loss": 3907.4013671875, + "loss_ib": 39.136409759521484, + "step": 58 + }, + { + "ce_ib": 62.34195327758789, + "ce_orig": 1.025146484375, + "epoch": 0.016679847580703143, + "kl_loss": 3447.82470703125, + "loss_ib": 34.54058837890625, + "step": 58 + }, + { + "ce_ib": 64.7926254272461, + "ce_orig": 1.7371116876602173, + "epoch": 0.01696743115968078, + "kl_loss": 3368.210205078125, + "loss_ib": 33.74689483642578, + "step": 59 + }, + { + "ce_ib": 63.827476501464844, + "ce_orig": 1.2639371156692505, + "epoch": 0.01696743115968078, + "kl_loss": 3764.4599609375, + "loss_ib": 37.70842742919922, + "step": 59 + }, + { + "ce_ib": 60.48318099975586, + "ce_orig": 0.5967444181442261, + "epoch": 0.01696743115968078, + "kl_loss": 3661.50927734375, + "loss_ib": 36.675575256347656, + "step": 59 + }, + { + "ce_ib": 62.90448760986328, + "ce_orig": 0.8884239792823792, + "epoch": 0.01696743115968078, + "kl_loss": 3642.8115234375, + "loss_ib": 36.49102020263672, + "step": 59 + }, + { + "epoch": 0.01725501473865842, + "grad_norm": 521.4768676757812, + "learning_rate": 1.7197452229299363e-06, + "loss": 37.203, + "step": 60 + }, + { + "ce_ib": 62.17192077636719, + "ce_orig": 0.7363674640655518, + "epoch": 0.01725501473865842, + "kl_loss": 3631.49609375, + "loss_ib": 36.377132415771484, + "step": 60 + }, + { + "ce_ib": 63.342933654785156, + "ce_orig": 1.0785236358642578, + "epoch": 0.01725501473865842, + "kl_loss": 3575.03369140625, + "loss_ib": 35.81367874145508, + "step": 60 + }, + { + "ce_ib": 61.84444046020508, + "ce_orig": 0.8591632843017578, + "epoch": 0.01725501473865842, + "kl_loss": 3692.08203125, + "loss_ib": 36.982662200927734, + "step": 60 + }, + { + "ce_ib": 63.40077209472656, + "ce_orig": 1.3737884759902954, + "epoch": 0.01725501473865842, + "kl_loss": 3226.7734375, + "loss_ib": 32.33113479614258, + "step": 60 + }, + { + "ce_ib": 60.77010726928711, + "ce_orig": 0.5792077779769897, + "epoch": 0.017542598317636063, + "kl_loss": 3807.59423828125, + "loss_ib": 38.13671112060547, + "step": 61 + }, + { + "ce_ib": 65.03406524658203, + "ce_orig": 1.6079394817352295, + "epoch": 0.017542598317636063, + "kl_loss": 3764.595703125, + "loss_ib": 37.71099090576172, + "step": 61 + }, + { + "ce_ib": 62.88136672973633, + "ce_orig": 0.9871428608894348, + "epoch": 0.017542598317636063, + "kl_loss": 3831.7412109375, + "loss_ib": 38.38029098510742, + "step": 61 + }, + { + "ce_ib": 62.55516815185547, + "ce_orig": 0.7838013172149658, + "epoch": 0.017542598317636063, + "kl_loss": 3860.99755859375, + "loss_ib": 38.67253112792969, + "step": 61 + }, + { + "ce_ib": 66.16559600830078, + "ce_orig": 1.8575478792190552, + "epoch": 0.017830181896613704, + "kl_loss": 3639.782958984375, + "loss_ib": 36.463993072509766, + "step": 62 + }, + { + "ce_ib": 59.800865173339844, + "ce_orig": 0.7499110698699951, + "epoch": 0.017830181896613704, + "kl_loss": 2505.564453125, + "loss_ib": 25.11544418334961, + "step": 62 + }, + { + "ce_ib": 64.14867401123047, + "ce_orig": 1.4294443130493164, + "epoch": 0.017830181896613704, + "kl_loss": 3695.159423828125, + "loss_ib": 37.015743255615234, + "step": 62 + }, + { + "ce_ib": 59.92710494995117, + "ce_orig": 0.7166628241539001, + "epoch": 0.017830181896613704, + "kl_loss": 3911.38427734375, + "loss_ib": 39.17376708984375, + "step": 62 + }, + { + "ce_ib": 64.37249755859375, + "ce_orig": 1.247324824333191, + "epoch": 0.018117765475591345, + "kl_loss": 3376.881103515625, + "loss_ib": 33.83318328857422, + "step": 63 + }, + { + "ce_ib": 62.949920654296875, + "ce_orig": 0.5989828705787659, + "epoch": 0.018117765475591345, + "kl_loss": 3794.5458984375, + "loss_ib": 38.00840759277344, + "step": 63 + }, + { + "ce_ib": 65.01542663574219, + "ce_orig": 1.4691771268844604, + "epoch": 0.018117765475591345, + "kl_loss": 3403.2939453125, + "loss_ib": 34.09795379638672, + "step": 63 + }, + { + "ce_ib": 61.29307556152344, + "ce_orig": 0.7313998937606812, + "epoch": 0.018117765475591345, + "kl_loss": 3450.58984375, + "loss_ib": 34.56719207763672, + "step": 63 + }, + { + "ce_ib": 60.609066009521484, + "ce_orig": 0.7843332886695862, + "epoch": 0.018405349054568983, + "kl_loss": 3731.23095703125, + "loss_ib": 37.37291717529297, + "step": 64 + }, + { + "ce_ib": 62.530723571777344, + "ce_orig": 0.6766409873962402, + "epoch": 0.018405349054568983, + "kl_loss": 3585.4892578125, + "loss_ib": 35.917423248291016, + "step": 64 + }, + { + "ce_ib": 59.86486053466797, + "ce_orig": 0.5166366696357727, + "epoch": 0.018405349054568983, + "kl_loss": 3537.461181640625, + "loss_ib": 35.43447494506836, + "step": 64 + }, + { + "ce_ib": 62.64058303833008, + "ce_orig": 0.9777031540870667, + "epoch": 0.018405349054568983, + "kl_loss": 3730.766845703125, + "loss_ib": 37.37030792236328, + "step": 64 + }, + { + "epoch": 0.018692932633546624, + "grad_norm": 518.4480590820312, + "learning_rate": 1.8789808917197455e-06, + "loss": 36.8274, + "step": 65 + }, + { + "ce_ib": 62.54412078857422, + "ce_orig": 0.7984204888343811, + "epoch": 0.018692932633546624, + "kl_loss": 3530.49755859375, + "loss_ib": 35.36751937866211, + "step": 65 + }, + { + "ce_ib": 60.793087005615234, + "ce_orig": 0.7615864276885986, + "epoch": 0.018692932633546624, + "kl_loss": 3840.51513671875, + "loss_ib": 38.4659423828125, + "step": 65 + }, + { + "ce_ib": 60.25053787231445, + "ce_orig": 0.9209924340248108, + "epoch": 0.018692932633546624, + "kl_loss": 3897.0556640625, + "loss_ib": 39.03080749511719, + "step": 65 + }, + { + "ce_ib": 63.16765594482422, + "ce_orig": 1.3678812980651855, + "epoch": 0.018692932633546624, + "kl_loss": 3938.2587890625, + "loss_ib": 39.44575500488281, + "step": 65 + }, + { + "ce_ib": 65.65672302246094, + "ce_orig": 1.016355037689209, + "epoch": 0.018980516212524265, + "kl_loss": 3797.2421875, + "loss_ib": 38.03807830810547, + "step": 66 + }, + { + "ce_ib": 61.379974365234375, + "ce_orig": 1.0548150539398193, + "epoch": 0.018980516212524265, + "kl_loss": 3564.20751953125, + "loss_ib": 35.703453063964844, + "step": 66 + }, + { + "ce_ib": 62.37001419067383, + "ce_orig": 0.9475827217102051, + "epoch": 0.018980516212524265, + "kl_loss": 3571.18115234375, + "loss_ib": 35.7741813659668, + "step": 66 + }, + { + "ce_ib": 60.9146728515625, + "ce_orig": 0.784809947013855, + "epoch": 0.018980516212524265, + "kl_loss": 3582.17822265625, + "loss_ib": 35.88269805908203, + "step": 66 + }, + { + "ce_ib": 63.815486907958984, + "ce_orig": 1.3292514085769653, + "epoch": 0.019268099791501906, + "kl_loss": 3440.73779296875, + "loss_ib": 34.47119140625, + "step": 67 + }, + { + "ce_ib": 62.98185348510742, + "ce_orig": 0.7911183834075928, + "epoch": 0.019268099791501906, + "kl_loss": 4040.31640625, + "loss_ib": 40.46614456176758, + "step": 67 + }, + { + "ce_ib": 65.0182876586914, + "ce_orig": 0.7907848358154297, + "epoch": 0.019268099791501906, + "kl_loss": 3936.2587890625, + "loss_ib": 39.42760467529297, + "step": 67 + }, + { + "ce_ib": 63.88326644897461, + "ce_orig": 1.092854380607605, + "epoch": 0.019268099791501906, + "kl_loss": 1859.8902587890625, + "loss_ib": 18.662784576416016, + "step": 67 + }, + { + "ce_ib": 61.63788604736328, + "ce_orig": 0.5948306322097778, + "epoch": 0.019555683370479547, + "kl_loss": 3881.165771484375, + "loss_ib": 38.873294830322266, + "step": 68 + }, + { + "ce_ib": 60.62575912475586, + "ce_orig": 0.5677074193954468, + "epoch": 0.019555683370479547, + "kl_loss": 3741.05078125, + "loss_ib": 37.47113037109375, + "step": 68 + }, + { + "ce_ib": 60.886016845703125, + "ce_orig": 0.5185374617576599, + "epoch": 0.019555683370479547, + "kl_loss": 3542.03857421875, + "loss_ib": 35.48126983642578, + "step": 68 + }, + { + "ce_ib": 60.4649543762207, + "ce_orig": 0.4317881166934967, + "epoch": 0.019555683370479547, + "kl_loss": 3231.02587890625, + "loss_ib": 32.370723724365234, + "step": 68 + }, + { + "ce_ib": 63.6849365234375, + "ce_orig": 1.51223886013031, + "epoch": 0.019843266949457185, + "kl_loss": 3498.36083984375, + "loss_ib": 35.04729080200195, + "step": 69 + }, + { + "ce_ib": 63.855262756347656, + "ce_orig": 2.3971757888793945, + "epoch": 0.019843266949457185, + "kl_loss": 3531.71630859375, + "loss_ib": 35.38101577758789, + "step": 69 + }, + { + "ce_ib": 61.03330993652344, + "ce_orig": 1.2093490362167358, + "epoch": 0.019843266949457185, + "kl_loss": 3718.281494140625, + "loss_ib": 37.24384689331055, + "step": 69 + }, + { + "ce_ib": 64.49278259277344, + "ce_orig": 1.474419355392456, + "epoch": 0.019843266949457185, + "kl_loss": 3354.119140625, + "loss_ib": 33.605682373046875, + "step": 69 + }, + { + "epoch": 0.020130850528434826, + "grad_norm": 502.4320068359375, + "learning_rate": 2.0382165605095544e-06, + "loss": 36.6694, + "step": 70 + }, + { + "ce_ib": 62.60323715209961, + "ce_orig": 0.8422316312789917, + "epoch": 0.020130850528434826, + "kl_loss": 3720.64892578125, + "loss_ib": 37.26909255981445, + "step": 70 + }, + { + "ce_ib": 61.0345573425293, + "ce_orig": 0.4435622990131378, + "epoch": 0.020130850528434826, + "kl_loss": 3550.4306640625, + "loss_ib": 35.56534194946289, + "step": 70 + }, + { + "ce_ib": 62.675987243652344, + "ce_orig": 1.0512957572937012, + "epoch": 0.020130850528434826, + "kl_loss": 3372.498046875, + "loss_ib": 33.787654876708984, + "step": 70 + }, + { + "ce_ib": 63.02972412109375, + "ce_orig": 0.8881096839904785, + "epoch": 0.020130850528434826, + "kl_loss": 3634.5400390625, + "loss_ib": 36.40842819213867, + "step": 70 + }, + { + "ce_ib": 61.19450759887695, + "ce_orig": 0.8308007717132568, + "epoch": 0.020418434107412467, + "kl_loss": 3668.546875, + "loss_ib": 36.74666213989258, + "step": 71 + }, + { + "ce_ib": 62.893211364746094, + "ce_orig": 0.6296738982200623, + "epoch": 0.020418434107412467, + "kl_loss": 3531.10107421875, + "loss_ib": 35.3739013671875, + "step": 71 + }, + { + "ce_ib": 61.59178924560547, + "ce_orig": 0.9227543473243713, + "epoch": 0.020418434107412467, + "kl_loss": 3549.591552734375, + "loss_ib": 35.5575065612793, + "step": 71 + }, + { + "ce_ib": 60.22856140136719, + "ce_orig": 0.6385669708251953, + "epoch": 0.020418434107412467, + "kl_loss": 3632.27978515625, + "loss_ib": 36.383026123046875, + "step": 71 + }, + { + "ce_ib": 64.18019104003906, + "ce_orig": 1.6183723211288452, + "epoch": 0.020706017686390108, + "kl_loss": 3154.05224609375, + "loss_ib": 31.60470199584961, + "step": 72 + }, + { + "ce_ib": 60.683284759521484, + "ce_orig": 0.7608581185340881, + "epoch": 0.020706017686390108, + "kl_loss": 3858.5693359375, + "loss_ib": 38.64637756347656, + "step": 72 + }, + { + "ce_ib": 60.50144577026367, + "ce_orig": 0.7999545335769653, + "epoch": 0.020706017686390108, + "kl_loss": 3660.2255859375, + "loss_ib": 36.66275405883789, + "step": 72 + }, + { + "ce_ib": 61.18289566040039, + "ce_orig": 1.1362156867980957, + "epoch": 0.020706017686390108, + "kl_loss": 3251.9755859375, + "loss_ib": 32.58094024658203, + "step": 72 + }, + { + "ce_ib": 61.33281326293945, + "ce_orig": 0.6669225096702576, + "epoch": 0.02099360126536775, + "kl_loss": 3684.797119140625, + "loss_ib": 36.9093017578125, + "step": 73 + }, + { + "ce_ib": 59.9718017578125, + "ce_orig": 0.9883142113685608, + "epoch": 0.02099360126536775, + "kl_loss": 3631.879638671875, + "loss_ib": 36.37876510620117, + "step": 73 + }, + { + "ce_ib": 63.71513748168945, + "ce_orig": 1.3611314296722412, + "epoch": 0.02099360126536775, + "kl_loss": 3757.900146484375, + "loss_ib": 37.64271545410156, + "step": 73 + }, + { + "ce_ib": 61.1561164855957, + "ce_orig": 0.8598636388778687, + "epoch": 0.02099360126536775, + "kl_loss": 3841.884033203125, + "loss_ib": 38.47999572753906, + "step": 73 + }, + { + "ce_ib": 61.85321044921875, + "ce_orig": 0.8115242719650269, + "epoch": 0.021281184844345387, + "kl_loss": 3850.6259765625, + "loss_ib": 38.568111419677734, + "step": 74 + }, + { + "ce_ib": 58.246185302734375, + "ce_orig": 0.5221734046936035, + "epoch": 0.021281184844345387, + "kl_loss": 3487.754150390625, + "loss_ib": 34.935787200927734, + "step": 74 + }, + { + "ce_ib": 61.469078063964844, + "ce_orig": 0.7319106459617615, + "epoch": 0.021281184844345387, + "kl_loss": 4135.6591796875, + "loss_ib": 41.418060302734375, + "step": 74 + }, + { + "ce_ib": 61.441585540771484, + "ce_orig": 0.9860128164291382, + "epoch": 0.021281184844345387, + "kl_loss": 3332.26318359375, + "loss_ib": 33.384071350097656, + "step": 74 + }, + { + "epoch": 0.021568768423323028, + "grad_norm": 524.3435668945312, + "learning_rate": 2.1974522292993634e-06, + "loss": 36.7901, + "step": 75 + }, + { + "ce_ib": 60.86553192138672, + "ce_orig": 0.6997863054275513, + "epoch": 0.021568768423323028, + "kl_loss": 3372.901123046875, + "loss_ib": 33.789878845214844, + "step": 75 + }, + { + "ce_ib": 62.65220260620117, + "ce_orig": 1.4506391286849976, + "epoch": 0.021568768423323028, + "kl_loss": 3574.6826171875, + "loss_ib": 35.80947494506836, + "step": 75 + }, + { + "ce_ib": 61.273311614990234, + "ce_orig": 0.7289857864379883, + "epoch": 0.021568768423323028, + "kl_loss": 3926.74658203125, + "loss_ib": 39.328739166259766, + "step": 75 + }, + { + "ce_ib": 60.932491302490234, + "ce_orig": 0.6170489192008972, + "epoch": 0.021568768423323028, + "kl_loss": 3798.67724609375, + "loss_ib": 38.04770278930664, + "step": 75 + }, + { + "ce_ib": 60.94731140136719, + "ce_orig": 1.1426844596862793, + "epoch": 0.02185635200230067, + "kl_loss": 3991.97900390625, + "loss_ib": 39.980735778808594, + "step": 76 + }, + { + "ce_ib": 59.70903396606445, + "ce_orig": 0.753753125667572, + "epoch": 0.02185635200230067, + "kl_loss": 3774.1162109375, + "loss_ib": 37.80086898803711, + "step": 76 + }, + { + "ce_ib": 60.583736419677734, + "ce_orig": 1.2148487567901611, + "epoch": 0.02185635200230067, + "kl_loss": 3596.19140625, + "loss_ib": 36.02249526977539, + "step": 76 + }, + { + "ce_ib": 61.524559020996094, + "ce_orig": 1.4590885639190674, + "epoch": 0.02185635200230067, + "kl_loss": 4056.229248046875, + "loss_ib": 40.62381362915039, + "step": 76 + }, + { + "ce_ib": 60.858001708984375, + "ce_orig": 1.2687323093414307, + "epoch": 0.02214393558127831, + "kl_loss": 3721.635009765625, + "loss_ib": 37.27720642089844, + "step": 77 + }, + { + "ce_ib": 60.991886138916016, + "ce_orig": 0.7511414289474487, + "epoch": 0.02214393558127831, + "kl_loss": 3765.05615234375, + "loss_ib": 37.711551666259766, + "step": 77 + }, + { + "ce_ib": 63.26270294189453, + "ce_orig": 1.4974795579910278, + "epoch": 0.02214393558127831, + "kl_loss": 3476.8515625, + "loss_ib": 34.8317756652832, + "step": 77 + }, + { + "ce_ib": 60.98178482055664, + "ce_orig": 0.8308293223381042, + "epoch": 0.02214393558127831, + "kl_loss": 3436.35009765625, + "loss_ib": 34.42448043823242, + "step": 77 + }, + { + "ce_ib": 60.006534576416016, + "ce_orig": 0.7794530391693115, + "epoch": 0.022431519160255948, + "kl_loss": 3976.89306640625, + "loss_ib": 39.82893753051758, + "step": 78 + }, + { + "ce_ib": 60.324668884277344, + "ce_orig": 1.0577147006988525, + "epoch": 0.022431519160255948, + "kl_loss": 3699.833984375, + "loss_ib": 37.05866241455078, + "step": 78 + }, + { + "ce_ib": 63.445526123046875, + "ce_orig": 1.2055957317352295, + "epoch": 0.022431519160255948, + "kl_loss": 3332.263671875, + "loss_ib": 33.38608169555664, + "step": 78 + }, + { + "ce_ib": 62.043052673339844, + "ce_orig": 1.2299708127975464, + "epoch": 0.022431519160255948, + "kl_loss": 3498.40966796875, + "loss_ib": 35.046138763427734, + "step": 78 + }, + { + "ce_ib": 62.630794525146484, + "ce_orig": 1.6087855100631714, + "epoch": 0.02271910273923359, + "kl_loss": 3403.677490234375, + "loss_ib": 34.09940719604492, + "step": 79 + }, + { + "ce_ib": 61.70748519897461, + "ce_orig": 1.395445466041565, + "epoch": 0.02271910273923359, + "kl_loss": 3506.6875, + "loss_ib": 35.12858200073242, + "step": 79 + }, + { + "ce_ib": 60.67852020263672, + "ce_orig": 0.8778415322303772, + "epoch": 0.02271910273923359, + "kl_loss": 3626.2841796875, + "loss_ib": 36.32352066040039, + "step": 79 + }, + { + "ce_ib": 62.01763153076172, + "ce_orig": 1.1855616569519043, + "epoch": 0.02271910273923359, + "kl_loss": 3562.23974609375, + "loss_ib": 35.68441390991211, + "step": 79 + }, + { + "epoch": 0.02300668631821123, + "grad_norm": 495.8802795410156, + "learning_rate": 2.356687898089172e-06, + "loss": 36.0833, + "step": 80 + }, + { + "ce_ib": 61.64104080200195, + "ce_orig": 1.086162805557251, + "epoch": 0.02300668631821123, + "kl_loss": 3640.07470703125, + "loss_ib": 36.46238708496094, + "step": 80 + }, + { + "ce_ib": 61.57204055786133, + "ce_orig": 0.9259805083274841, + "epoch": 0.02300668631821123, + "kl_loss": 3893.42578125, + "loss_ib": 38.99583053588867, + "step": 80 + }, + { + "ce_ib": 66.0667953491211, + "ce_orig": 1.7032816410064697, + "epoch": 0.02300668631821123, + "kl_loss": 3534.810546875, + "loss_ib": 35.4141731262207, + "step": 80 + }, + { + "ce_ib": 59.70638656616211, + "ce_orig": 0.9606295228004456, + "epoch": 0.02300668631821123, + "kl_loss": 3836.31494140625, + "loss_ib": 38.422855377197266, + "step": 80 + }, + { + "ce_ib": 62.870216369628906, + "ce_orig": 1.2989033460617065, + "epoch": 0.02329426989718887, + "kl_loss": 3637.41015625, + "loss_ib": 36.436973571777344, + "step": 81 + }, + { + "ce_ib": 62.709312438964844, + "ce_orig": 1.103994369506836, + "epoch": 0.02329426989718887, + "kl_loss": 3570.747802734375, + "loss_ib": 35.77018737792969, + "step": 81 + }, + { + "ce_ib": 61.99235916137695, + "ce_orig": 1.6946630477905273, + "epoch": 0.02329426989718887, + "kl_loss": 3532.9951171875, + "loss_ib": 35.39194107055664, + "step": 81 + }, + { + "ce_ib": 60.915489196777344, + "ce_orig": 0.9102914333343506, + "epoch": 0.02329426989718887, + "kl_loss": 3406.8291015625, + "loss_ib": 34.129207611083984, + "step": 81 + }, + { + "ce_ib": 59.30317306518555, + "ce_orig": 0.6368826031684875, + "epoch": 0.023581853476166512, + "kl_loss": 3712.42236328125, + "loss_ib": 37.183528900146484, + "step": 82 + }, + { + "ce_ib": 60.09833526611328, + "ce_orig": 0.9400615096092224, + "epoch": 0.023581853476166512, + "kl_loss": 3772.23291015625, + "loss_ib": 37.78242492675781, + "step": 82 + }, + { + "ce_ib": 59.77298355102539, + "ce_orig": 1.1513607501983643, + "epoch": 0.023581853476166512, + "kl_loss": 3608.817138671875, + "loss_ib": 36.147945404052734, + "step": 82 + }, + { + "ce_ib": 61.4395637512207, + "ce_orig": 0.8887077569961548, + "epoch": 0.023581853476166512, + "kl_loss": 3723.0693359375, + "loss_ib": 37.29213333129883, + "step": 82 + }, + { + "ce_ib": 59.11687469482422, + "ce_orig": 1.0084015130996704, + "epoch": 0.02386943705514415, + "kl_loss": 3634.8994140625, + "loss_ib": 36.40810775756836, + "step": 83 + }, + { + "ce_ib": 61.65114212036133, + "ce_orig": 1.2165615558624268, + "epoch": 0.02386943705514415, + "kl_loss": 3236.768310546875, + "loss_ib": 32.4293327331543, + "step": 83 + }, + { + "ce_ib": 61.245872497558594, + "ce_orig": 0.9978702068328857, + "epoch": 0.02386943705514415, + "kl_loss": 3855.5927734375, + "loss_ib": 38.61717224121094, + "step": 83 + }, + { + "ce_ib": 64.20453643798828, + "ce_orig": 1.7128099203109741, + "epoch": 0.02386943705514415, + "kl_loss": 3634.4951171875, + "loss_ib": 36.40915298461914, + "step": 83 + }, + { + "ce_ib": 64.26237487792969, + "ce_orig": 1.0946186780929565, + "epoch": 0.02415702063412179, + "kl_loss": 3485.233642578125, + "loss_ib": 34.91659927368164, + "step": 84 + }, + { + "ce_ib": 60.3849983215332, + "ce_orig": 0.9367128610610962, + "epoch": 0.02415702063412179, + "kl_loss": 3541.22021484375, + "loss_ib": 35.47258758544922, + "step": 84 + }, + { + "ce_ib": 58.71778869628906, + "ce_orig": 0.7141556143760681, + "epoch": 0.02415702063412179, + "kl_loss": 3839.89599609375, + "loss_ib": 38.45767593383789, + "step": 84 + }, + { + "ce_ib": 59.888084411621094, + "ce_orig": 0.76758873462677, + "epoch": 0.02415702063412179, + "kl_loss": 3747.78759765625, + "loss_ib": 37.53776168823242, + "step": 84 + }, + { + "epoch": 0.024444604213099432, + "grad_norm": 524.7237548828125, + "learning_rate": 2.515923566878981e-06, + "loss": 36.6037, + "step": 85 + }, + { + "ce_ib": 60.545921325683594, + "ce_orig": 1.2117584943771362, + "epoch": 0.024444604213099432, + "kl_loss": 3843.150634765625, + "loss_ib": 38.4920539855957, + "step": 85 + }, + { + "ce_ib": 58.764076232910156, + "ce_orig": 0.38705337047576904, + "epoch": 0.024444604213099432, + "kl_loss": 3380.5478515625, + "loss_ib": 33.86424255371094, + "step": 85 + }, + { + "ce_ib": 63.67119216918945, + "ce_orig": 1.6082842350006104, + "epoch": 0.024444604213099432, + "kl_loss": 3766.42333984375, + "loss_ib": 37.7279052734375, + "step": 85 + }, + { + "ce_ib": 59.66868591308594, + "ce_orig": 0.8970634341239929, + "epoch": 0.024444604213099432, + "kl_loss": 3629.2216796875, + "loss_ib": 36.35188293457031, + "step": 85 + }, + { + "ce_ib": 64.16704559326172, + "ce_orig": 2.276355743408203, + "epoch": 0.024732187792077073, + "kl_loss": 3044.044921875, + "loss_ib": 30.504615783691406, + "step": 86 + }, + { + "ce_ib": 60.19831848144531, + "ce_orig": 0.9945586919784546, + "epoch": 0.024732187792077073, + "kl_loss": 3570.64990234375, + "loss_ib": 35.76669692993164, + "step": 86 + }, + { + "ce_ib": 60.47395706176758, + "ce_orig": 0.7600452303886414, + "epoch": 0.024732187792077073, + "kl_loss": 3757.107666015625, + "loss_ib": 37.63154983520508, + "step": 86 + }, + { + "ce_ib": 61.33274841308594, + "ce_orig": 1.0650345087051392, + "epoch": 0.024732187792077073, + "kl_loss": 3494.167236328125, + "loss_ib": 35.00300598144531, + "step": 86 + }, + { + "ce_ib": 58.06923294067383, + "ce_orig": 1.0382181406021118, + "epoch": 0.025019771371054714, + "kl_loss": 3489.38623046875, + "loss_ib": 34.95193099975586, + "step": 87 + }, + { + "ce_ib": 59.217193603515625, + "ce_orig": 0.9920439720153809, + "epoch": 0.025019771371054714, + "kl_loss": 3379.11279296875, + "loss_ib": 33.850341796875, + "step": 87 + }, + { + "ce_ib": 59.13267517089844, + "ce_orig": 0.7496766448020935, + "epoch": 0.025019771371054714, + "kl_loss": 3705.8818359375, + "loss_ib": 37.117950439453125, + "step": 87 + }, + { + "ce_ib": 63.39867401123047, + "ce_orig": 1.7902837991714478, + "epoch": 0.025019771371054714, + "kl_loss": 3600.96484375, + "loss_ib": 36.07304763793945, + "step": 87 + }, + { + "ce_ib": 59.85429763793945, + "ce_orig": 0.6192349791526794, + "epoch": 0.025307354950032352, + "kl_loss": 3378.796630859375, + "loss_ib": 33.84782028198242, + "step": 88 + }, + { + "ce_ib": 59.664588928222656, + "ce_orig": 0.28315597772598267, + "epoch": 0.025307354950032352, + "kl_loss": 3035.71630859375, + "loss_ib": 30.416828155517578, + "step": 88 + }, + { + "ce_ib": 59.85287094116211, + "ce_orig": 0.8578697443008423, + "epoch": 0.025307354950032352, + "kl_loss": 3704.26123046875, + "loss_ib": 37.10246658325195, + "step": 88 + }, + { + "ce_ib": 60.02167510986328, + "ce_orig": 0.865718424320221, + "epoch": 0.025307354950032352, + "kl_loss": 3559.58740234375, + "loss_ib": 35.6558952331543, + "step": 88 + }, + { + "ce_ib": 61.92749786376953, + "ce_orig": 1.8417946100234985, + "epoch": 0.025594938529009993, + "kl_loss": 3461.548828125, + "loss_ib": 34.67741775512695, + "step": 89 + }, + { + "ce_ib": 60.532379150390625, + "ce_orig": 1.1578682661056519, + "epoch": 0.025594938529009993, + "kl_loss": 3567.969482421875, + "loss_ib": 35.74022674560547, + "step": 89 + }, + { + "ce_ib": 57.639652252197266, + "ce_orig": 0.9897644519805908, + "epoch": 0.025594938529009993, + "kl_loss": 3668.02587890625, + "loss_ib": 36.73789596557617, + "step": 89 + }, + { + "ce_ib": 60.55142593383789, + "ce_orig": 0.9929890632629395, + "epoch": 0.025594938529009993, + "kl_loss": 3560.878662109375, + "loss_ib": 35.66933822631836, + "step": 89 + }, + { + "epoch": 0.025882522107987634, + "grad_norm": 502.2541198730469, + "learning_rate": 2.67515923566879e-06, + "loss": 36.2964, + "step": 90 + }, + { + "ce_ib": 59.376216888427734, + "ce_orig": 0.7132657766342163, + "epoch": 0.025882522107987634, + "kl_loss": 3782.4814453125, + "loss_ib": 37.88418960571289, + "step": 90 + }, + { + "ce_ib": 61.76494216918945, + "ce_orig": 1.0529389381408691, + "epoch": 0.025882522107987634, + "kl_loss": 3165.3203125, + "loss_ib": 31.714967727661133, + "step": 90 + }, + { + "ce_ib": 63.20905303955078, + "ce_orig": 2.0391719341278076, + "epoch": 0.025882522107987634, + "kl_loss": 3383.056884765625, + "loss_ib": 33.893775939941406, + "step": 90 + }, + { + "ce_ib": 60.66413116455078, + "ce_orig": 1.1053894758224487, + "epoch": 0.025882522107987634, + "kl_loss": 3465.13671875, + "loss_ib": 34.712032318115234, + "step": 90 + }, + { + "ce_ib": 59.18904113769531, + "ce_orig": 0.6099984049797058, + "epoch": 0.026170105686965275, + "kl_loss": 3366.81494140625, + "loss_ib": 33.72733688354492, + "step": 91 + }, + { + "ce_ib": 60.47399139404297, + "ce_orig": 1.2133395671844482, + "epoch": 0.026170105686965275, + "kl_loss": 3215.339111328125, + "loss_ib": 32.213863372802734, + "step": 91 + }, + { + "ce_ib": 59.12387466430664, + "ce_orig": 1.0556620359420776, + "epoch": 0.026170105686965275, + "kl_loss": 3760.199462890625, + "loss_ib": 37.66111755371094, + "step": 91 + }, + { + "ce_ib": 58.41682052612305, + "ce_orig": 0.8695086240768433, + "epoch": 0.026170105686965275, + "kl_loss": 3594.7275390625, + "loss_ib": 36.00569152832031, + "step": 91 + }, + { + "ce_ib": 61.80458450317383, + "ce_orig": 1.1173126697540283, + "epoch": 0.026457689265942913, + "kl_loss": 2937.051513671875, + "loss_ib": 29.43231773376465, + "step": 92 + }, + { + "ce_ib": 58.19034957885742, + "ce_orig": 1.1345878839492798, + "epoch": 0.026457689265942913, + "kl_loss": 3721.2353515625, + "loss_ib": 37.27054214477539, + "step": 92 + }, + { + "ce_ib": 64.21983337402344, + "ce_orig": 2.1032416820526123, + "epoch": 0.026457689265942913, + "kl_loss": 3113.11279296875, + "loss_ib": 31.19534683227539, + "step": 92 + }, + { + "ce_ib": 56.81288146972656, + "ce_orig": 0.7186897993087769, + "epoch": 0.026457689265942913, + "kl_loss": 3551.26708984375, + "loss_ib": 35.56948471069336, + "step": 92 + }, + { + "ce_ib": 58.19551467895508, + "ce_orig": 0.6073794364929199, + "epoch": 0.026745272844920554, + "kl_loss": 3257.37744140625, + "loss_ib": 32.6319694519043, + "step": 93 + }, + { + "ce_ib": 59.8862190246582, + "ce_orig": 1.0717018842697144, + "epoch": 0.026745272844920554, + "kl_loss": 3663.1142578125, + "loss_ib": 36.6910285949707, + "step": 93 + }, + { + "ce_ib": 60.67866134643555, + "ce_orig": 0.8607167601585388, + "epoch": 0.026745272844920554, + "kl_loss": 3073.027099609375, + "loss_ib": 30.79094886779785, + "step": 93 + }, + { + "ce_ib": 57.884647369384766, + "ce_orig": 0.7569469809532166, + "epoch": 0.026745272844920554, + "kl_loss": 3273.467041015625, + "loss_ib": 32.79255676269531, + "step": 93 + }, + { + "ce_ib": 62.44021987915039, + "ce_orig": 1.394387125968933, + "epoch": 0.027032856423898195, + "kl_loss": 3389.59716796875, + "loss_ib": 33.958412170410156, + "step": 94 + }, + { + "ce_ib": 60.312530517578125, + "ce_orig": 1.1131266355514526, + "epoch": 0.027032856423898195, + "kl_loss": 3403.34423828125, + "loss_ib": 34.093753814697266, + "step": 94 + }, + { + "ce_ib": 58.25202941894531, + "ce_orig": 0.8151227831840515, + "epoch": 0.027032856423898195, + "kl_loss": 3855.5947265625, + "loss_ib": 38.61419677734375, + "step": 94 + }, + { + "ce_ib": 61.3564338684082, + "ce_orig": 1.1052205562591553, + "epoch": 0.027032856423898195, + "kl_loss": 3158.87841796875, + "loss_ib": 31.6501407623291, + "step": 94 + }, + { + "epoch": 0.027320440002875836, + "grad_norm": 506.2858581542969, + "learning_rate": 2.834394904458599e-06, + "loss": 35.9074, + "step": 95 + }, + { + "ce_ib": 65.28954315185547, + "ce_orig": 1.8119444847106934, + "epoch": 0.027320440002875836, + "kl_loss": 3606.654296875, + "loss_ib": 36.131832122802734, + "step": 95 + }, + { + "ce_ib": 59.008201599121094, + "ce_orig": 1.0341309309005737, + "epoch": 0.027320440002875836, + "kl_loss": 3603.617431640625, + "loss_ib": 36.09518051147461, + "step": 95 + }, + { + "ce_ib": 59.24610137939453, + "ce_orig": 0.8969424962997437, + "epoch": 0.027320440002875836, + "kl_loss": 3502.19482421875, + "loss_ib": 35.08119201660156, + "step": 95 + }, + { + "ce_ib": 60.19147491455078, + "ce_orig": 1.0157058238983154, + "epoch": 0.027320440002875836, + "kl_loss": 3443.8740234375, + "loss_ib": 34.49892807006836, + "step": 95 + }, + { + "ce_ib": 60.122161865234375, + "ce_orig": 0.915783703327179, + "epoch": 0.027608023581853477, + "kl_loss": 3757.819091796875, + "loss_ib": 37.63831329345703, + "step": 96 + }, + { + "ce_ib": 56.74940490722656, + "ce_orig": 0.5688998103141785, + "epoch": 0.027608023581853477, + "kl_loss": 3403.82177734375, + "loss_ib": 34.094966888427734, + "step": 96 + }, + { + "ce_ib": 59.47806167602539, + "ce_orig": 0.9031659364700317, + "epoch": 0.027608023581853477, + "kl_loss": 3398.11572265625, + "loss_ib": 34.04063415527344, + "step": 96 + }, + { + "ce_ib": 61.29694747924805, + "ce_orig": 1.6301769018173218, + "epoch": 0.027608023581853477, + "kl_loss": 3120.549560546875, + "loss_ib": 31.26679039001465, + "step": 96 + }, + { + "ce_ib": 57.831748962402344, + "ce_orig": 0.7541747689247131, + "epoch": 0.027895607160831115, + "kl_loss": 3017.34765625, + "loss_ib": 30.231307983398438, + "step": 97 + }, + { + "ce_ib": 58.9442253112793, + "ce_orig": 1.0282090902328491, + "epoch": 0.027895607160831115, + "kl_loss": 3543.724853515625, + "loss_ib": 35.496192932128906, + "step": 97 + }, + { + "ce_ib": 62.54707336425781, + "ce_orig": 1.1943050622940063, + "epoch": 0.027895607160831115, + "kl_loss": 3438.58935546875, + "loss_ib": 34.44844055175781, + "step": 97 + }, + { + "ce_ib": 59.76130676269531, + "ce_orig": 0.9242035150527954, + "epoch": 0.027895607160831115, + "kl_loss": 3405.859375, + "loss_ib": 34.11835479736328, + "step": 97 + }, + { + "ce_ib": 59.63943862915039, + "ce_orig": 1.080672025680542, + "epoch": 0.028183190739808756, + "kl_loss": 3514.46875, + "loss_ib": 35.20432662963867, + "step": 98 + }, + { + "ce_ib": 58.865867614746094, + "ce_orig": 0.8346519470214844, + "epoch": 0.028183190739808756, + "kl_loss": 3126.54638671875, + "loss_ib": 31.32432746887207, + "step": 98 + }, + { + "ce_ib": 59.8038330078125, + "ce_orig": 0.5681230425834656, + "epoch": 0.028183190739808756, + "kl_loss": 3689.09228515625, + "loss_ib": 36.95072555541992, + "step": 98 + }, + { + "ce_ib": 57.7287483215332, + "ce_orig": 1.3980305194854736, + "epoch": 0.028183190739808756, + "kl_loss": 3602.91552734375, + "loss_ib": 36.086883544921875, + "step": 98 + }, + { + "ce_ib": 59.70271682739258, + "ce_orig": 0.7515245676040649, + "epoch": 0.028470774318786397, + "kl_loss": 3556.43603515625, + "loss_ib": 35.624061584472656, + "step": 99 + }, + { + "ce_ib": 59.0880126953125, + "ce_orig": 1.349737524986267, + "epoch": 0.028470774318786397, + "kl_loss": 3817.392578125, + "loss_ib": 38.23301315307617, + "step": 99 + }, + { + "ce_ib": 58.57311248779297, + "ce_orig": 0.8737644553184509, + "epoch": 0.028470774318786397, + "kl_loss": 3711.00537109375, + "loss_ib": 37.16862869262695, + "step": 99 + }, + { + "ce_ib": 60.80643081665039, + "ce_orig": 1.3529119491577148, + "epoch": 0.028470774318786397, + "kl_loss": 3494.3671875, + "loss_ib": 35.004478454589844, + "step": 99 + }, + { + "epoch": 0.02875835789776404, + "grad_norm": 506.0978088378906, + "learning_rate": 2.993630573248408e-06, + "loss": 35.5561, + "step": 100 + }, + { + "ce_ib": 58.56661605834961, + "ce_orig": 1.1666901111602783, + "epoch": 0.02875835789776404, + "kl_loss": 3524.689453125, + "loss_ib": 35.30546188354492, + "step": 100 + }, + { + "ce_ib": 62.831783294677734, + "ce_orig": 2.2819557189941406, + "epoch": 0.02875835789776404, + "kl_loss": 3127.18798828125, + "loss_ib": 31.3347110748291, + "step": 100 + }, + { + "ce_ib": 58.19925308227539, + "ce_orig": 0.8314814567565918, + "epoch": 0.02875835789776404, + "kl_loss": 3376.9296875, + "loss_ib": 33.82749557495117, + "step": 100 + }, + { + "ce_ib": 57.46586608886719, + "ce_orig": 0.7712212800979614, + "epoch": 0.02875835789776404, + "kl_loss": 3693.83544921875, + "loss_ib": 36.995819091796875, + "step": 100 + }, + { + "ce_ib": 63.20458221435547, + "ce_orig": 2.0820841789245605, + "epoch": 0.02904594147674168, + "kl_loss": 3015.462890625, + "loss_ib": 30.21783447265625, + "step": 101 + }, + { + "ce_ib": 58.481056213378906, + "ce_orig": 0.621671736240387, + "epoch": 0.02904594147674168, + "kl_loss": 3436.01416015625, + "loss_ib": 34.41862106323242, + "step": 101 + }, + { + "ce_ib": 63.86113357543945, + "ce_orig": 1.306851863861084, + "epoch": 0.02904594147674168, + "kl_loss": 3503.22216796875, + "loss_ib": 35.09608459472656, + "step": 101 + }, + { + "ce_ib": 58.2662239074707, + "ce_orig": 0.7199594974517822, + "epoch": 0.02904594147674168, + "kl_loss": 3752.4775390625, + "loss_ib": 37.58304214477539, + "step": 101 + }, + { + "ce_ib": 57.16976547241211, + "ce_orig": 0.7961263656616211, + "epoch": 0.029333525055719317, + "kl_loss": 3487.98388671875, + "loss_ib": 34.937007904052734, + "step": 102 + }, + { + "ce_ib": 59.150516510009766, + "ce_orig": 1.1825999021530151, + "epoch": 0.029333525055719317, + "kl_loss": 3089.3515625, + "loss_ib": 30.952665328979492, + "step": 102 + }, + { + "ce_ib": 58.3362922668457, + "ce_orig": 1.7745475769042969, + "epoch": 0.029333525055719317, + "kl_loss": 3348.009033203125, + "loss_ib": 33.53842544555664, + "step": 102 + }, + { + "ce_ib": 57.58949661254883, + "ce_orig": 1.5423011779785156, + "epoch": 0.029333525055719317, + "kl_loss": 3305.6611328125, + "loss_ib": 33.114200592041016, + "step": 102 + }, + { + "ce_ib": 61.6888427734375, + "ce_orig": 1.5072656869888306, + "epoch": 0.02962110863469696, + "kl_loss": 3463.50439453125, + "loss_ib": 34.69673156738281, + "step": 103 + }, + { + "ce_ib": 60.02954864501953, + "ce_orig": 1.457014560699463, + "epoch": 0.02962110863469696, + "kl_loss": 3395.760986328125, + "loss_ib": 34.01763916015625, + "step": 103 + }, + { + "ce_ib": 59.91975021362305, + "ce_orig": 1.4898319244384766, + "epoch": 0.02962110863469696, + "kl_loss": 3560.35009765625, + "loss_ib": 35.66341781616211, + "step": 103 + }, + { + "ce_ib": 61.39513397216797, + "ce_orig": 1.2825353145599365, + "epoch": 0.02962110863469696, + "kl_loss": 3457.565673828125, + "loss_ib": 34.63705062866211, + "step": 103 + }, + { + "ce_ib": 58.27676773071289, + "ce_orig": 0.7831246256828308, + "epoch": 0.0299086922136746, + "kl_loss": 3082.635498046875, + "loss_ib": 30.88463020324707, + "step": 104 + }, + { + "ce_ib": 59.33658218383789, + "ce_orig": 1.2523558139801025, + "epoch": 0.0299086922136746, + "kl_loss": 3511.50048828125, + "loss_ib": 35.174339294433594, + "step": 104 + }, + { + "ce_ib": 56.923065185546875, + "ce_orig": 0.8949446678161621, + "epoch": 0.0299086922136746, + "kl_loss": 3366.443603515625, + "loss_ib": 33.72135925292969, + "step": 104 + }, + { + "ce_ib": 56.37105178833008, + "ce_orig": 0.2978222370147705, + "epoch": 0.0299086922136746, + "kl_loss": 2516.5166015625, + "loss_ib": 25.22153663635254, + "step": 104 + }, + { + "epoch": 0.03019627579265224, + "grad_norm": 481.3152770996094, + "learning_rate": 3.1528662420382165e-06, + "loss": 34.5437, + "step": 105 + }, + { + "ce_ib": 58.88717269897461, + "ce_orig": 1.2730207443237305, + "epoch": 0.03019627579265224, + "kl_loss": 3434.847412109375, + "loss_ib": 34.4073600769043, + "step": 105 + }, + { + "ce_ib": 61.64189147949219, + "ce_orig": 1.5729440450668335, + "epoch": 0.03019627579265224, + "kl_loss": 3480.771484375, + "loss_ib": 34.86935806274414, + "step": 105 + }, + { + "ce_ib": 58.20305633544922, + "ce_orig": 0.7971786856651306, + "epoch": 0.03019627579265224, + "kl_loss": 3075.66064453125, + "loss_ib": 30.814809799194336, + "step": 105 + }, + { + "ce_ib": 60.825565338134766, + "ce_orig": 1.2366398572921753, + "epoch": 0.03019627579265224, + "kl_loss": 3418.934326171875, + "loss_ib": 34.25016784667969, + "step": 105 + }, + { + "ce_ib": 58.60723114013672, + "ce_orig": 0.7808988690376282, + "epoch": 0.030483859371629878, + "kl_loss": 3320.3857421875, + "loss_ib": 33.26246643066406, + "step": 106 + }, + { + "ce_ib": 57.563995361328125, + "ce_orig": 0.8758856058120728, + "epoch": 0.030483859371629878, + "kl_loss": 3181.57958984375, + "loss_ib": 31.87335777282715, + "step": 106 + }, + { + "ce_ib": 55.23259353637695, + "ce_orig": 0.22707822918891907, + "epoch": 0.030483859371629878, + "kl_loss": 2120.76416015625, + "loss_ib": 21.26287269592285, + "step": 106 + }, + { + "ce_ib": 54.9997673034668, + "ce_orig": 0.2669578790664673, + "epoch": 0.030483859371629878, + "kl_loss": 2237.90966796875, + "loss_ib": 22.43409538269043, + "step": 106 + }, + { + "ce_ib": 58.98134994506836, + "ce_orig": 1.2273164987564087, + "epoch": 0.03077144295060752, + "kl_loss": 1990.204833984375, + "loss_ib": 19.961029052734375, + "step": 107 + }, + { + "ce_ib": 59.87055587768555, + "ce_orig": 1.3858299255371094, + "epoch": 0.03077144295060752, + "kl_loss": 3156.53173828125, + "loss_ib": 31.625186920166016, + "step": 107 + }, + { + "ce_ib": 55.48119354248047, + "ce_orig": 0.8153777122497559, + "epoch": 0.03077144295060752, + "kl_loss": 3736.759765625, + "loss_ib": 37.42307662963867, + "step": 107 + }, + { + "ce_ib": 58.34563446044922, + "ce_orig": 0.9158485531806946, + "epoch": 0.03077144295060752, + "kl_loss": 3471.65380859375, + "loss_ib": 34.77488327026367, + "step": 107 + }, + { + "ce_ib": 58.289031982421875, + "ce_orig": 1.0781446695327759, + "epoch": 0.03105902652958516, + "kl_loss": 3476.87841796875, + "loss_ib": 34.82707214355469, + "step": 108 + }, + { + "ce_ib": 56.98807144165039, + "ce_orig": 0.7857345938682556, + "epoch": 0.03105902652958516, + "kl_loss": 3440.1845703125, + "loss_ib": 34.458831787109375, + "step": 108 + }, + { + "ce_ib": 58.76127624511719, + "ce_orig": 0.7976669073104858, + "epoch": 0.03105902652958516, + "kl_loss": 3203.509765625, + "loss_ib": 32.0938606262207, + "step": 108 + }, + { + "ce_ib": 60.71581268310547, + "ce_orig": 1.6471302509307861, + "epoch": 0.03105902652958516, + "kl_loss": 3262.56396484375, + "loss_ib": 32.68635559082031, + "step": 108 + }, + { + "ce_ib": 58.29198455810547, + "ce_orig": 0.6282819509506226, + "epoch": 0.0313466101085628, + "kl_loss": 3269.9423828125, + "loss_ib": 32.757713317871094, + "step": 109 + }, + { + "ce_ib": 58.99745559692383, + "ce_orig": 1.346901297569275, + "epoch": 0.0313466101085628, + "kl_loss": 3163.72998046875, + "loss_ib": 31.6962947845459, + "step": 109 + }, + { + "ce_ib": 60.90428924560547, + "ce_orig": 1.9273408651351929, + "epoch": 0.0313466101085628, + "kl_loss": 3333.64697265625, + "loss_ib": 33.39737319946289, + "step": 109 + }, + { + "ce_ib": 56.191925048828125, + "ce_orig": 0.5414481163024902, + "epoch": 0.0313466101085628, + "kl_loss": 2611.040283203125, + "loss_ib": 26.166593551635742, + "step": 109 + }, + { + "epoch": 0.03163419368754044, + "grad_norm": 497.0209655761719, + "learning_rate": 3.3121019108280255e-06, + "loss": 34.4129, + "step": 110 + }, + { + "ce_ib": 59.72053909301758, + "ce_orig": 0.9706757664680481, + "epoch": 0.03163419368754044, + "kl_loss": 3381.095947265625, + "loss_ib": 33.87068176269531, + "step": 110 + }, + { + "ce_ib": 57.03586196899414, + "ce_orig": 0.7681443691253662, + "epoch": 0.03163419368754044, + "kl_loss": 3697.955322265625, + "loss_ib": 37.03658676147461, + "step": 110 + }, + { + "ce_ib": 56.56296157836914, + "ce_orig": 1.0412012338638306, + "epoch": 0.03163419368754044, + "kl_loss": 3397.827880859375, + "loss_ib": 34.03483963012695, + "step": 110 + }, + { + "ce_ib": 58.891090393066406, + "ce_orig": 1.897376537322998, + "epoch": 0.03163419368754044, + "kl_loss": 3184.9423828125, + "loss_ib": 31.908315658569336, + "step": 110 + }, + { + "ce_ib": 56.234859466552734, + "ce_orig": 0.5728248953819275, + "epoch": 0.031921777266518084, + "kl_loss": 3383.22705078125, + "loss_ib": 33.88850402832031, + "step": 111 + }, + { + "ce_ib": 57.63355255126953, + "ce_orig": 1.1094613075256348, + "epoch": 0.031921777266518084, + "kl_loss": 3412.992431640625, + "loss_ib": 34.187557220458984, + "step": 111 + }, + { + "ce_ib": 60.40391540527344, + "ce_orig": 1.840659737586975, + "epoch": 0.031921777266518084, + "kl_loss": 3138.0, + "loss_ib": 31.44040298461914, + "step": 111 + }, + { + "ce_ib": 59.63862609863281, + "ce_orig": 1.0923250913619995, + "epoch": 0.031921777266518084, + "kl_loss": 3231.64892578125, + "loss_ib": 32.37612533569336, + "step": 111 + }, + { + "ce_ib": 57.537532806396484, + "ce_orig": 1.1983774900436401, + "epoch": 0.03220936084549572, + "kl_loss": 3396.81494140625, + "loss_ib": 34.02568435668945, + "step": 112 + }, + { + "ce_ib": 58.707515716552734, + "ce_orig": 0.9369058012962341, + "epoch": 0.03220936084549572, + "kl_loss": 3233.78564453125, + "loss_ib": 32.39656448364258, + "step": 112 + }, + { + "ce_ib": 58.834808349609375, + "ce_orig": 1.592905044555664, + "epoch": 0.03220936084549572, + "kl_loss": 3009.6337890625, + "loss_ib": 30.15517234802246, + "step": 112 + }, + { + "ce_ib": 56.853721618652344, + "ce_orig": 1.277111291885376, + "epoch": 0.03220936084549572, + "kl_loss": 3313.233642578125, + "loss_ib": 33.18918991088867, + "step": 112 + }, + { + "ce_ib": 57.65592575073242, + "ce_orig": 1.234552025794983, + "epoch": 0.032496944424473366, + "kl_loss": 3572.73828125, + "loss_ib": 35.785037994384766, + "step": 113 + }, + { + "ce_ib": 57.79000473022461, + "ce_orig": 0.7371742725372314, + "epoch": 0.032496944424473366, + "kl_loss": 3456.331787109375, + "loss_ib": 34.62110900878906, + "step": 113 + }, + { + "ce_ib": 56.60825729370117, + "ce_orig": 0.4902428388595581, + "epoch": 0.032496944424473366, + "kl_loss": 3120.79296875, + "loss_ib": 31.264537811279297, + "step": 113 + }, + { + "ce_ib": 56.7935905456543, + "ce_orig": 1.1464534997940063, + "epoch": 0.032496944424473366, + "kl_loss": 3540.481201171875, + "loss_ib": 35.461605072021484, + "step": 113 + }, + { + "ce_ib": 57.10232925415039, + "ce_orig": 0.8498454093933105, + "epoch": 0.032784528003451004, + "kl_loss": 3277.51904296875, + "loss_ib": 32.83229064941406, + "step": 114 + }, + { + "ce_ib": 55.13420486450195, + "ce_orig": 0.6758685111999512, + "epoch": 0.032784528003451004, + "kl_loss": 3604.8583984375, + "loss_ib": 36.10371780395508, + "step": 114 + }, + { + "ce_ib": 56.1276741027832, + "ce_orig": 0.5766817331314087, + "epoch": 0.032784528003451004, + "kl_loss": 3085.36865234375, + "loss_ib": 30.909812927246094, + "step": 114 + }, + { + "ce_ib": 57.931053161621094, + "ce_orig": 1.4995805025100708, + "epoch": 0.032784528003451004, + "kl_loss": 3098.906005859375, + "loss_ib": 31.0469913482666, + "step": 114 + }, + { + "epoch": 0.03307211158242864, + "grad_norm": 491.3838806152344, + "learning_rate": 3.4713375796178345e-06, + "loss": 34.0102, + "step": 115 + }, + { + "ce_ib": 59.15627670288086, + "ce_orig": 1.2845157384872437, + "epoch": 0.03307211158242864, + "kl_loss": 3372.32470703125, + "loss_ib": 33.78240203857422, + "step": 115 + }, + { + "ce_ib": 53.61143112182617, + "ce_orig": 0.45459315180778503, + "epoch": 0.03307211158242864, + "kl_loss": 1565.43212890625, + "loss_ib": 15.707931518554688, + "step": 115 + }, + { + "ce_ib": 57.589969635009766, + "ce_orig": 0.9765967130661011, + "epoch": 0.03307211158242864, + "kl_loss": 3676.704345703125, + "loss_ib": 36.82463455200195, + "step": 115 + }, + { + "ce_ib": 60.20054244995117, + "ce_orig": 1.7167049646377563, + "epoch": 0.03307211158242864, + "kl_loss": 3165.83837890625, + "loss_ib": 31.718584060668945, + "step": 115 + }, + { + "ce_ib": 53.816566467285156, + "ce_orig": 0.67730712890625, + "epoch": 0.033359695161406286, + "kl_loss": 3459.393310546875, + "loss_ib": 34.64774703979492, + "step": 116 + }, + { + "ce_ib": 54.58203887939453, + "ce_orig": 0.911034107208252, + "epoch": 0.033359695161406286, + "kl_loss": 3594.5859375, + "loss_ib": 36.00044250488281, + "step": 116 + }, + { + "ce_ib": 57.87051773071289, + "ce_orig": 1.118709921836853, + "epoch": 0.033359695161406286, + "kl_loss": 3187.24072265625, + "loss_ib": 31.93027687072754, + "step": 116 + }, + { + "ce_ib": 56.325897216796875, + "ce_orig": 0.9163821935653687, + "epoch": 0.033359695161406286, + "kl_loss": 3129.631591796875, + "loss_ib": 31.35264015197754, + "step": 116 + }, + { + "ce_ib": 54.92118835449219, + "ce_orig": 0.38672956824302673, + "epoch": 0.033647278740383924, + "kl_loss": 3076.05810546875, + "loss_ib": 30.815502166748047, + "step": 117 + }, + { + "ce_ib": 54.690006256103516, + "ce_orig": 0.8246923685073853, + "epoch": 0.033647278740383924, + "kl_loss": 3273.61962890625, + "loss_ib": 32.79088592529297, + "step": 117 + }, + { + "ce_ib": 54.07289505004883, + "ce_orig": 0.7164103388786316, + "epoch": 0.033647278740383924, + "kl_loss": 3623.53173828125, + "loss_ib": 36.289390563964844, + "step": 117 + }, + { + "ce_ib": 60.675941467285156, + "ce_orig": 1.377237319946289, + "epoch": 0.033647278740383924, + "kl_loss": 3372.191650390625, + "loss_ib": 33.7825927734375, + "step": 117 + }, + { + "ce_ib": 56.7794189453125, + "ce_orig": 0.9724603295326233, + "epoch": 0.03393486231936156, + "kl_loss": 3288.50390625, + "loss_ib": 32.94181823730469, + "step": 118 + }, + { + "ce_ib": 54.519351959228516, + "ce_orig": 0.5410425662994385, + "epoch": 0.03393486231936156, + "kl_loss": 3287.3623046875, + "loss_ib": 32.92814254760742, + "step": 118 + }, + { + "ce_ib": 54.34451675415039, + "ce_orig": 0.8038122653961182, + "epoch": 0.03393486231936156, + "kl_loss": 3050.375, + "loss_ib": 30.558095932006836, + "step": 118 + }, + { + "ce_ib": 55.54384994506836, + "ce_orig": 0.8650107979774475, + "epoch": 0.03393486231936156, + "kl_loss": 3366.908447265625, + "loss_ib": 33.72462844848633, + "step": 118 + }, + { + "ce_ib": 57.188228607177734, + "ce_orig": 0.9910153746604919, + "epoch": 0.034222445898339206, + "kl_loss": 3439.970458984375, + "loss_ib": 34.45689010620117, + "step": 119 + }, + { + "ce_ib": 56.907711029052734, + "ce_orig": 0.7115373611450195, + "epoch": 0.034222445898339206, + "kl_loss": 3301.9189453125, + "loss_ib": 33.07609558105469, + "step": 119 + }, + { + "ce_ib": 57.2123908996582, + "ce_orig": 1.0718692541122437, + "epoch": 0.034222445898339206, + "kl_loss": 3499.33203125, + "loss_ib": 35.050533294677734, + "step": 119 + }, + { + "ce_ib": 53.44661331176758, + "ce_orig": 0.24829663336277008, + "epoch": 0.034222445898339206, + "kl_loss": 2903.864013671875, + "loss_ib": 29.092086791992188, + "step": 119 + }, + { + "epoch": 0.03451002947731684, + "grad_norm": 488.701416015625, + "learning_rate": 3.6305732484076435e-06, + "loss": 33.466, + "step": 120 + }, + { + "ce_ib": 58.145294189453125, + "ce_orig": 1.4251874685287476, + "epoch": 0.03451002947731684, + "kl_loss": 3136.32958984375, + "loss_ib": 31.42144012451172, + "step": 120 + }, + { + "ce_ib": 56.203006744384766, + "ce_orig": 0.8692367076873779, + "epoch": 0.03451002947731684, + "kl_loss": 3459.39892578125, + "loss_ib": 34.65019226074219, + "step": 120 + }, + { + "ce_ib": 53.20535659790039, + "ce_orig": 0.23018254339694977, + "epoch": 0.03451002947731684, + "kl_loss": 1242.847900390625, + "loss_ib": 12.481684684753418, + "step": 120 + }, + { + "ce_ib": 56.525516510009766, + "ce_orig": 1.0064990520477295, + "epoch": 0.03451002947731684, + "kl_loss": 3232.018310546875, + "loss_ib": 32.376708984375, + "step": 120 + }, + { + "ce_ib": 54.402645111083984, + "ce_orig": 0.8779659867286682, + "epoch": 0.03479761305629449, + "kl_loss": 3443.0625, + "loss_ib": 34.48502731323242, + "step": 121 + }, + { + "ce_ib": 57.10426330566406, + "ce_orig": 1.2402876615524292, + "epoch": 0.03479761305629449, + "kl_loss": 3028.55224609375, + "loss_ib": 30.342626571655273, + "step": 121 + }, + { + "ce_ib": 58.463226318359375, + "ce_orig": 1.786939263343811, + "epoch": 0.03479761305629449, + "kl_loss": 3314.563232421875, + "loss_ib": 33.204097747802734, + "step": 121 + }, + { + "ce_ib": 55.72274398803711, + "ce_orig": 0.8808313608169556, + "epoch": 0.03479761305629449, + "kl_loss": 3207.419677734375, + "loss_ib": 32.12991714477539, + "step": 121 + }, + { + "ce_ib": 54.48756408691406, + "ce_orig": 0.7947918772697449, + "epoch": 0.035085196635272126, + "kl_loss": 3365.093505859375, + "loss_ib": 33.705421447753906, + "step": 122 + }, + { + "ce_ib": 55.213951110839844, + "ce_orig": 0.8712708353996277, + "epoch": 0.035085196635272126, + "kl_loss": 3264.61181640625, + "loss_ib": 32.701332092285156, + "step": 122 + }, + { + "ce_ib": 56.54671096801758, + "ce_orig": 0.3289700150489807, + "epoch": 0.035085196635272126, + "kl_loss": 2472.1728515625, + "loss_ib": 24.778276443481445, + "step": 122 + }, + { + "ce_ib": 58.04352951049805, + "ce_orig": 1.5535142421722412, + "epoch": 0.035085196635272126, + "kl_loss": 2923.5888671875, + "loss_ib": 29.29393196105957, + "step": 122 + }, + { + "ce_ib": 55.66524124145508, + "ce_orig": 0.7867345809936523, + "epoch": 0.03537278021424976, + "kl_loss": 3238.93212890625, + "loss_ib": 32.444984436035156, + "step": 123 + }, + { + "ce_ib": 54.70219039916992, + "ce_orig": 0.9664096832275391, + "epoch": 0.03537278021424976, + "kl_loss": 3296.1142578125, + "loss_ib": 33.01584243774414, + "step": 123 + }, + { + "ce_ib": 56.01795196533203, + "ce_orig": 0.7100675702095032, + "epoch": 0.03537278021424976, + "kl_loss": 2146.758544921875, + "loss_ib": 21.523603439331055, + "step": 123 + }, + { + "ce_ib": 56.77366256713867, + "ce_orig": 1.1051965951919556, + "epoch": 0.03537278021424976, + "kl_loss": 3093.79345703125, + "loss_ib": 30.994707107543945, + "step": 123 + }, + { + "ce_ib": 57.009521484375, + "ce_orig": 0.7903485894203186, + "epoch": 0.03566036379322741, + "kl_loss": 3563.074462890625, + "loss_ib": 35.68775177001953, + "step": 124 + }, + { + "ce_ib": 55.95924758911133, + "ce_orig": 0.9127640724182129, + "epoch": 0.03566036379322741, + "kl_loss": 3396.574951171875, + "loss_ib": 34.02170944213867, + "step": 124 + }, + { + "ce_ib": 56.98552703857422, + "ce_orig": 1.0692386627197266, + "epoch": 0.03566036379322741, + "kl_loss": 3032.76025390625, + "loss_ib": 30.384586334228516, + "step": 124 + }, + { + "ce_ib": 54.63241958618164, + "ce_orig": 1.1963261365890503, + "epoch": 0.03566036379322741, + "kl_loss": 3243.490966796875, + "loss_ib": 32.489540100097656, + "step": 124 + }, + { + "epoch": 0.035947947372205045, + "grad_norm": 484.4737854003906, + "learning_rate": 3.789808917197453e-06, + "loss": 32.9421, + "step": 125 + }, + { + "ce_ib": 53.653743743896484, + "ce_orig": 0.7380173802375793, + "epoch": 0.035947947372205045, + "kl_loss": 3123.354736328125, + "loss_ib": 31.287200927734375, + "step": 125 + }, + { + "ce_ib": 59.990928649902344, + "ce_orig": 1.5150277614593506, + "epoch": 0.035947947372205045, + "kl_loss": 3297.04443359375, + "loss_ib": 33.030433654785156, + "step": 125 + }, + { + "ce_ib": 57.65206527709961, + "ce_orig": 0.8391215801239014, + "epoch": 0.035947947372205045, + "kl_loss": 3000.66748046875, + "loss_ib": 30.0643253326416, + "step": 125 + }, + { + "ce_ib": 56.54597854614258, + "ce_orig": 1.0180349349975586, + "epoch": 0.035947947372205045, + "kl_loss": 3431.23974609375, + "loss_ib": 34.36894226074219, + "step": 125 + }, + { + "ce_ib": 54.98883819580078, + "ce_orig": 0.7524531483650208, + "epoch": 0.03623553095118269, + "kl_loss": 3311.05419921875, + "loss_ib": 33.16552734375, + "step": 126 + }, + { + "ce_ib": 54.45235061645508, + "ce_orig": 1.1706984043121338, + "epoch": 0.03623553095118269, + "kl_loss": 2987.413818359375, + "loss_ib": 29.928590774536133, + "step": 126 + }, + { + "ce_ib": 56.98481369018555, + "ce_orig": 1.6676733493804932, + "epoch": 0.03623553095118269, + "kl_loss": 2663.32080078125, + "loss_ib": 26.69019317626953, + "step": 126 + }, + { + "ce_ib": 55.758174896240234, + "ce_orig": 1.054375410079956, + "epoch": 0.03623553095118269, + "kl_loss": 3309.024658203125, + "loss_ib": 33.14600372314453, + "step": 126 + }, + { + "ce_ib": 56.77848815917969, + "ce_orig": 1.3328591585159302, + "epoch": 0.03652311453016033, + "kl_loss": 3304.2041015625, + "loss_ib": 33.09881591796875, + "step": 127 + }, + { + "ce_ib": 57.968807220458984, + "ce_orig": 1.372467041015625, + "epoch": 0.03652311453016033, + "kl_loss": 3255.86376953125, + "loss_ib": 32.61660385131836, + "step": 127 + }, + { + "ce_ib": 53.87700653076172, + "ce_orig": 0.602317214012146, + "epoch": 0.03652311453016033, + "kl_loss": 2769.507080078125, + "loss_ib": 27.748947143554688, + "step": 127 + }, + { + "ce_ib": 57.150821685791016, + "ce_orig": 1.5336121320724487, + "epoch": 0.03652311453016033, + "kl_loss": 3108.03857421875, + "loss_ib": 31.137535095214844, + "step": 127 + }, + { + "ce_ib": 55.34831619262695, + "ce_orig": 1.2944082021713257, + "epoch": 0.036810698109137965, + "kl_loss": 3049.82568359375, + "loss_ib": 30.553606033325195, + "step": 128 + }, + { + "ce_ib": 55.08821105957031, + "ce_orig": 1.2756754159927368, + "epoch": 0.036810698109137965, + "kl_loss": 3294.335205078125, + "loss_ib": 32.99843978881836, + "step": 128 + }, + { + "ce_ib": 56.542110443115234, + "ce_orig": 1.3169299364089966, + "epoch": 0.036810698109137965, + "kl_loss": 3289.144287109375, + "loss_ib": 32.9479866027832, + "step": 128 + }, + { + "ce_ib": 54.76836395263672, + "ce_orig": 0.9867354035377502, + "epoch": 0.036810698109137965, + "kl_loss": 3241.2333984375, + "loss_ib": 32.46710205078125, + "step": 128 + }, + { + "ce_ib": 54.50560760498047, + "ce_orig": 0.9337356090545654, + "epoch": 0.03709828168811561, + "kl_loss": 3071.6708984375, + "loss_ib": 30.77121353149414, + "step": 129 + }, + { + "ce_ib": 56.29538345336914, + "ce_orig": 1.379828691482544, + "epoch": 0.03709828168811561, + "kl_loss": 2586.766845703125, + "loss_ib": 25.92396354675293, + "step": 129 + }, + { + "ce_ib": 54.395912170410156, + "ce_orig": 0.26942014694213867, + "epoch": 0.03709828168811561, + "kl_loss": 3214.80029296875, + "loss_ib": 32.20240020751953, + "step": 129 + }, + { + "ce_ib": 53.4327392578125, + "ce_orig": 1.03145432472229, + "epoch": 0.03709828168811561, + "kl_loss": 3447.09765625, + "loss_ib": 34.524410247802734, + "step": 129 + }, + { + "epoch": 0.03738586526709325, + "grad_norm": 485.5062255859375, + "learning_rate": 3.949044585987262e-06, + "loss": 32.6015, + "step": 130 + }, + { + "ce_ib": 57.03399658203125, + "ce_orig": 1.2322174310684204, + "epoch": 0.03738586526709325, + "kl_loss": 3191.55419921875, + "loss_ib": 31.97257423400879, + "step": 130 + }, + { + "ce_ib": 52.21621322631836, + "ce_orig": 0.743526816368103, + "epoch": 0.03738586526709325, + "kl_loss": 3611.2763671875, + "loss_ib": 36.16497802734375, + "step": 130 + }, + { + "ce_ib": 58.345272064208984, + "ce_orig": 1.358251929283142, + "epoch": 0.03738586526709325, + "kl_loss": 3283.64990234375, + "loss_ib": 32.89484405517578, + "step": 130 + }, + { + "ce_ib": 56.030879974365234, + "ce_orig": 0.6732674241065979, + "epoch": 0.03738586526709325, + "kl_loss": 3234.9384765625, + "loss_ib": 32.40541458129883, + "step": 130 + }, + { + "ce_ib": 57.00092697143555, + "ce_orig": 0.9541739225387573, + "epoch": 0.03767344884607089, + "kl_loss": 3000.126708984375, + "loss_ib": 30.05826759338379, + "step": 131 + }, + { + "ce_ib": 56.356407165527344, + "ce_orig": 1.652900218963623, + "epoch": 0.03767344884607089, + "kl_loss": 2972.48486328125, + "loss_ib": 29.781206130981445, + "step": 131 + }, + { + "ce_ib": 56.224830627441406, + "ce_orig": 1.5765421390533447, + "epoch": 0.03767344884607089, + "kl_loss": 3185.729248046875, + "loss_ib": 31.913516998291016, + "step": 131 + }, + { + "ce_ib": 54.61565017700195, + "ce_orig": 0.7351348996162415, + "epoch": 0.03767344884607089, + "kl_loss": 3193.2255859375, + "loss_ib": 31.98687171936035, + "step": 131 + }, + { + "ce_ib": 54.55472183227539, + "ce_orig": 1.086585283279419, + "epoch": 0.03796103242504853, + "kl_loss": 3093.6298828125, + "loss_ib": 30.990854263305664, + "step": 132 + }, + { + "ce_ib": 52.542877197265625, + "ce_orig": 1.0202161073684692, + "epoch": 0.03796103242504853, + "kl_loss": 3178.2353515625, + "loss_ib": 31.834896087646484, + "step": 132 + }, + { + "ce_ib": 58.19728088378906, + "ce_orig": 1.5428324937820435, + "epoch": 0.03796103242504853, + "kl_loss": 3120.078857421875, + "loss_ib": 31.25898551940918, + "step": 132 + }, + { + "ce_ib": 55.14646911621094, + "ce_orig": 1.1489073038101196, + "epoch": 0.03796103242504853, + "kl_loss": 3333.323974609375, + "loss_ib": 33.38838577270508, + "step": 132 + }, + { + "ce_ib": 58.42383575439453, + "ce_orig": 1.3307501077651978, + "epoch": 0.03824861600402617, + "kl_loss": 3119.617919921875, + "loss_ib": 31.254600524902344, + "step": 133 + }, + { + "ce_ib": 53.10658645629883, + "ce_orig": 0.47057151794433594, + "epoch": 0.03824861600402617, + "kl_loss": 2581.533203125, + "loss_ib": 25.868436813354492, + "step": 133 + }, + { + "ce_ib": 55.19010543823242, + "ce_orig": 1.461709976196289, + "epoch": 0.03824861600402617, + "kl_loss": 3218.223876953125, + "loss_ib": 32.2374267578125, + "step": 133 + }, + { + "ce_ib": 55.269439697265625, + "ce_orig": 0.9877228140830994, + "epoch": 0.03824861600402617, + "kl_loss": 3257.63916015625, + "loss_ib": 32.63166046142578, + "step": 133 + }, + { + "ce_ib": 54.13009262084961, + "ce_orig": 1.1180518865585327, + "epoch": 0.03853619958300381, + "kl_loss": 3161.342529296875, + "loss_ib": 31.66755485534668, + "step": 134 + }, + { + "ce_ib": 51.55915069580078, + "ce_orig": 0.476241797208786, + "epoch": 0.03853619958300381, + "kl_loss": 3272.551025390625, + "loss_ib": 32.777069091796875, + "step": 134 + }, + { + "ce_ib": 51.68458557128906, + "ce_orig": 0.78632652759552, + "epoch": 0.03853619958300381, + "kl_loss": 3286.03662109375, + "loss_ib": 32.91204833984375, + "step": 134 + }, + { + "ce_ib": 52.53611373901367, + "ce_orig": 0.601127564907074, + "epoch": 0.03853619958300381, + "kl_loss": 3292.48193359375, + "loss_ib": 32.97735595703125, + "step": 134 + }, + { + "epoch": 0.03882378316198145, + "grad_norm": 484.1724853515625, + "learning_rate": 4.10828025477707e-06, + "loss": 32.4331, + "step": 135 + }, + { + "ce_ib": 52.33879089355469, + "ce_orig": 0.9939437508583069, + "epoch": 0.03882378316198145, + "kl_loss": 3350.6103515625, + "loss_ib": 33.558441162109375, + "step": 135 + }, + { + "ce_ib": 57.75893783569336, + "ce_orig": 1.900199055671692, + "epoch": 0.03882378316198145, + "kl_loss": 3010.15771484375, + "loss_ib": 30.15933609008789, + "step": 135 + }, + { + "ce_ib": 52.984676361083984, + "ce_orig": 1.0698907375335693, + "epoch": 0.03882378316198145, + "kl_loss": 3073.521484375, + "loss_ib": 30.78820037841797, + "step": 135 + }, + { + "ce_ib": 52.373348236083984, + "ce_orig": 0.7963224649429321, + "epoch": 0.03882378316198145, + "kl_loss": 2985.98486328125, + "loss_ib": 29.912221908569336, + "step": 135 + }, + { + "ce_ib": 52.30683517456055, + "ce_orig": 0.7155612707138062, + "epoch": 0.039111366740959094, + "kl_loss": 3103.802978515625, + "loss_ib": 31.090335845947266, + "step": 136 + }, + { + "ce_ib": 53.33190155029297, + "ce_orig": 0.7446028590202332, + "epoch": 0.039111366740959094, + "kl_loss": 3185.09228515625, + "loss_ib": 31.904254913330078, + "step": 136 + }, + { + "ce_ib": 52.83795928955078, + "ce_orig": 0.77790367603302, + "epoch": 0.039111366740959094, + "kl_loss": 2887.193359375, + "loss_ib": 28.92477035522461, + "step": 136 + }, + { + "ce_ib": 54.76385498046875, + "ce_orig": 0.3635737895965576, + "epoch": 0.039111366740959094, + "kl_loss": 1756.336669921875, + "loss_ib": 17.618131637573242, + "step": 136 + }, + { + "ce_ib": 55.12189865112305, + "ce_orig": 1.2548986673355103, + "epoch": 0.03939895031993673, + "kl_loss": 3142.7470703125, + "loss_ib": 31.48259162902832, + "step": 137 + }, + { + "ce_ib": 52.87950897216797, + "ce_orig": 0.8208221793174744, + "epoch": 0.03939895031993673, + "kl_loss": 3317.441650390625, + "loss_ib": 33.227294921875, + "step": 137 + }, + { + "ce_ib": 53.339599609375, + "ce_orig": 0.7617712020874023, + "epoch": 0.03939895031993673, + "kl_loss": 3318.7333984375, + "loss_ib": 33.24067306518555, + "step": 137 + }, + { + "ce_ib": 52.08175277709961, + "ce_orig": 0.8632348775863647, + "epoch": 0.03939895031993673, + "kl_loss": 2449.124267578125, + "loss_ib": 24.543325424194336, + "step": 137 + }, + { + "ce_ib": 54.37158203125, + "ce_orig": 1.257277250289917, + "epoch": 0.03968653389891437, + "kl_loss": 3025.6142578125, + "loss_ib": 30.310514450073242, + "step": 138 + }, + { + "ce_ib": 54.39937210083008, + "ce_orig": 0.8799593448638916, + "epoch": 0.03968653389891437, + "kl_loss": 3261.203857421875, + "loss_ib": 32.666439056396484, + "step": 138 + }, + { + "ce_ib": 53.31037902832031, + "ce_orig": 0.5872792601585388, + "epoch": 0.03968653389891437, + "kl_loss": 2659.81982421875, + "loss_ib": 26.651508331298828, + "step": 138 + }, + { + "ce_ib": 52.60165786743164, + "ce_orig": 0.9464865922927856, + "epoch": 0.03968653389891437, + "kl_loss": 3371.40185546875, + "loss_ib": 33.76662063598633, + "step": 138 + }, + { + "ce_ib": 52.575260162353516, + "ce_orig": 0.46206337213516235, + "epoch": 0.039974117477892014, + "kl_loss": 2850.687255859375, + "loss_ib": 28.5594482421875, + "step": 139 + }, + { + "ce_ib": 53.40571975708008, + "ce_orig": 0.6606719493865967, + "epoch": 0.039974117477892014, + "kl_loss": 2885.156005859375, + "loss_ib": 28.904964447021484, + "step": 139 + }, + { + "ce_ib": 54.047019958496094, + "ce_orig": 1.0017621517181396, + "epoch": 0.039974117477892014, + "kl_loss": 2731.3134765625, + "loss_ib": 27.3671817779541, + "step": 139 + }, + { + "ce_ib": 53.62920379638672, + "ce_orig": 0.9739691615104675, + "epoch": 0.039974117477892014, + "kl_loss": 3102.27099609375, + "loss_ib": 31.076339721679688, + "step": 139 + }, + { + "epoch": 0.04026170105686965, + "grad_norm": 459.9145202636719, + "learning_rate": 4.26751592356688e-06, + "loss": 31.4281, + "step": 140 + }, + { + "ce_ib": 54.69853973388672, + "ce_orig": 1.1423823833465576, + "epoch": 0.04026170105686965, + "kl_loss": 3194.418701171875, + "loss_ib": 31.998886108398438, + "step": 140 + }, + { + "ce_ib": 50.82587432861328, + "ce_orig": 0.6784489750862122, + "epoch": 0.04026170105686965, + "kl_loss": 3204.444091796875, + "loss_ib": 32.09526443481445, + "step": 140 + }, + { + "ce_ib": 51.895416259765625, + "ce_orig": 0.7560833096504211, + "epoch": 0.04026170105686965, + "kl_loss": 3291.0400390625, + "loss_ib": 32.96229553222656, + "step": 140 + }, + { + "ce_ib": 51.76190948486328, + "ce_orig": 0.8880281448364258, + "epoch": 0.04026170105686965, + "kl_loss": 3192.113037109375, + "loss_ib": 31.97289276123047, + "step": 140 + }, + { + "ce_ib": 55.869300842285156, + "ce_orig": 1.4632692337036133, + "epoch": 0.040549284635847296, + "kl_loss": 3255.39404296875, + "loss_ib": 32.60980987548828, + "step": 141 + }, + { + "ce_ib": 51.57696533203125, + "ce_orig": 1.1582895517349243, + "epoch": 0.040549284635847296, + "kl_loss": 3429.54150390625, + "loss_ib": 34.34699249267578, + "step": 141 + }, + { + "ce_ib": 53.07311248779297, + "ce_orig": 1.187181830406189, + "epoch": 0.040549284635847296, + "kl_loss": 3198.211669921875, + "loss_ib": 32.03519058227539, + "step": 141 + }, + { + "ce_ib": 55.36647415161133, + "ce_orig": 1.443948745727539, + "epoch": 0.040549284635847296, + "kl_loss": 2707.63525390625, + "loss_ib": 27.131717681884766, + "step": 141 + }, + { + "ce_ib": 53.37755584716797, + "ce_orig": 1.1925128698349, + "epoch": 0.040836868214824934, + "kl_loss": 2913.3408203125, + "loss_ib": 29.186784744262695, + "step": 142 + }, + { + "ce_ib": 50.930152893066406, + "ce_orig": 0.631533682346344, + "epoch": 0.040836868214824934, + "kl_loss": 3136.6171875, + "loss_ib": 31.41710090637207, + "step": 142 + }, + { + "ce_ib": 54.71884536743164, + "ce_orig": 0.2883818447589874, + "epoch": 0.040836868214824934, + "kl_loss": 1113.8369140625, + "loss_ib": 11.19308853149414, + "step": 142 + }, + { + "ce_ib": 53.51261901855469, + "ce_orig": 0.9670610427856445, + "epoch": 0.040836868214824934, + "kl_loss": 3197.89208984375, + "loss_ib": 32.032432556152344, + "step": 142 + }, + { + "ce_ib": 53.469810485839844, + "ce_orig": 0.9970093965530396, + "epoch": 0.04112445179380257, + "kl_loss": 3185.8349609375, + "loss_ib": 31.91181755065918, + "step": 143 + }, + { + "ce_ib": 52.19078826904297, + "ce_orig": 0.9311832785606384, + "epoch": 0.04112445179380257, + "kl_loss": 3028.153076171875, + "loss_ib": 30.33371925354004, + "step": 143 + }, + { + "ce_ib": 54.6331901550293, + "ce_orig": 1.481334924697876, + "epoch": 0.04112445179380257, + "kl_loss": 2883.3681640625, + "loss_ib": 28.888315200805664, + "step": 143 + }, + { + "ce_ib": 54.52168273925781, + "ce_orig": 1.2398836612701416, + "epoch": 0.04112445179380257, + "kl_loss": 2991.29931640625, + "loss_ib": 29.967514038085938, + "step": 143 + }, + { + "ce_ib": 55.38058090209961, + "ce_orig": 1.4223779439926147, + "epoch": 0.041412035372780216, + "kl_loss": 2755.46337890625, + "loss_ib": 27.610013961791992, + "step": 144 + }, + { + "ce_ib": 52.242977142333984, + "ce_orig": 0.357547402381897, + "epoch": 0.041412035372780216, + "kl_loss": 2976.813232421875, + "loss_ib": 29.820375442504883, + "step": 144 + }, + { + "ce_ib": 50.533851623535156, + "ce_orig": 0.5890440940856934, + "epoch": 0.041412035372780216, + "kl_loss": 3232.82861328125, + "loss_ib": 32.37881851196289, + "step": 144 + }, + { + "ce_ib": 50.53768539428711, + "ce_orig": 0.7130187153816223, + "epoch": 0.041412035372780216, + "kl_loss": 3210.93310546875, + "loss_ib": 32.15986633300781, + "step": 144 + }, + { + "epoch": 0.041699618951757854, + "grad_norm": 441.0871887207031, + "learning_rate": 4.426751592356688e-06, + "loss": 31.0274, + "step": 145 + }, + { + "ce_ib": 54.21467208862305, + "ce_orig": 1.196142315864563, + "epoch": 0.041699618951757854, + "kl_loss": 3077.82958984375, + "loss_ib": 30.832509994506836, + "step": 145 + }, + { + "ce_ib": 52.88565444946289, + "ce_orig": 0.6000714898109436, + "epoch": 0.041699618951757854, + "kl_loss": 3106.078369140625, + "loss_ib": 31.11366844177246, + "step": 145 + }, + { + "ce_ib": 52.295101165771484, + "ce_orig": 0.775937020778656, + "epoch": 0.041699618951757854, + "kl_loss": 3244.89892578125, + "loss_ib": 32.50128173828125, + "step": 145 + }, + { + "ce_ib": 52.64463424682617, + "ce_orig": 1.2740693092346191, + "epoch": 0.041699618951757854, + "kl_loss": 2585.204833984375, + "loss_ib": 25.904691696166992, + "step": 145 + }, + { + "ce_ib": 49.40985870361328, + "ce_orig": 0.8610404133796692, + "epoch": 0.0419872025307355, + "kl_loss": 3257.540771484375, + "loss_ib": 32.62481689453125, + "step": 146 + }, + { + "ce_ib": 52.093650817871094, + "ce_orig": 0.6095184683799744, + "epoch": 0.0419872025307355, + "kl_loss": 2127.02783203125, + "loss_ib": 21.322372436523438, + "step": 146 + }, + { + "ce_ib": 50.681373596191406, + "ce_orig": 0.5939872860908508, + "epoch": 0.0419872025307355, + "kl_loss": 3274.1162109375, + "loss_ib": 32.79184341430664, + "step": 146 + }, + { + "ce_ib": 52.19261169433594, + "ce_orig": 1.2758476734161377, + "epoch": 0.0419872025307355, + "kl_loss": 3013.127685546875, + "loss_ib": 30.183467864990234, + "step": 146 + }, + { + "ce_ib": 53.93763732910156, + "ce_orig": 1.4467533826828003, + "epoch": 0.042274786109713136, + "kl_loss": 2893.376953125, + "loss_ib": 28.987707138061523, + "step": 147 + }, + { + "ce_ib": 50.615875244140625, + "ce_orig": 0.8429998159408569, + "epoch": 0.042274786109713136, + "kl_loss": 3275.34423828125, + "loss_ib": 32.80405807495117, + "step": 147 + }, + { + "ce_ib": 51.70206832885742, + "ce_orig": 0.6785926222801208, + "epoch": 0.042274786109713136, + "kl_loss": 2999.413818359375, + "loss_ib": 30.045839309692383, + "step": 147 + }, + { + "ce_ib": 52.60684585571289, + "ce_orig": 0.7887744903564453, + "epoch": 0.042274786109713136, + "kl_loss": 2922.228515625, + "loss_ib": 29.274892807006836, + "step": 147 + }, + { + "ce_ib": 53.9986457824707, + "ce_orig": 1.4007996320724487, + "epoch": 0.042562369688690774, + "kl_loss": 2788.4990234375, + "loss_ib": 27.938987731933594, + "step": 148 + }, + { + "ce_ib": 51.32645034790039, + "ce_orig": 0.51473069190979, + "epoch": 0.042562369688690774, + "kl_loss": 3179.380126953125, + "loss_ib": 31.84512710571289, + "step": 148 + }, + { + "ce_ib": 50.38296127319336, + "ce_orig": 0.7060822248458862, + "epoch": 0.042562369688690774, + "kl_loss": 3014.38427734375, + "loss_ib": 30.194225311279297, + "step": 148 + }, + { + "ce_ib": 50.93301773071289, + "ce_orig": 0.6229360699653625, + "epoch": 0.042562369688690774, + "kl_loss": 2787.4912109375, + "loss_ib": 27.925844192504883, + "step": 148 + }, + { + "ce_ib": 50.93965148925781, + "ce_orig": 0.719894528388977, + "epoch": 0.04284995326766842, + "kl_loss": 3031.69287109375, + "loss_ib": 30.367868423461914, + "step": 149 + }, + { + "ce_ib": 54.47814178466797, + "ce_orig": 2.0223007202148438, + "epoch": 0.04284995326766842, + "kl_loss": 2659.58544921875, + "loss_ib": 26.650331497192383, + "step": 149 + }, + { + "ce_ib": 49.89313888549805, + "ce_orig": 0.8729849457740784, + "epoch": 0.04284995326766842, + "kl_loss": 2983.194580078125, + "loss_ib": 29.881837844848633, + "step": 149 + }, + { + "ce_ib": 52.88545608520508, + "ce_orig": 1.3256616592407227, + "epoch": 0.04284995326766842, + "kl_loss": 3132.82666015625, + "loss_ib": 31.38115119934082, + "step": 149 + }, + { + "epoch": 0.043137536846646056, + "grad_norm": 459.27581787109375, + "learning_rate": 4.585987261146497e-06, + "loss": 30.1884, + "step": 150 + }, + { + "ce_ib": 50.605384826660156, + "ce_orig": 1.0216200351715088, + "epoch": 0.043137536846646056, + "kl_loss": 3048.3466796875, + "loss_ib": 30.53407096862793, + "step": 150 + }, + { + "ce_ib": 52.01803207397461, + "ce_orig": 1.3228956460952759, + "epoch": 0.043137536846646056, + "kl_loss": 2962.606201171875, + "loss_ib": 29.67807960510254, + "step": 150 + }, + { + "ce_ib": 53.09737014770508, + "ce_orig": 0.548643946647644, + "epoch": 0.043137536846646056, + "kl_loss": 2902.078125, + "loss_ib": 29.07387924194336, + "step": 150 + }, + { + "ce_ib": 55.06275177001953, + "ce_orig": 1.2563978433609009, + "epoch": 0.043137536846646056, + "kl_loss": 3022.199951171875, + "loss_ib": 30.277061462402344, + "step": 150 + }, + { + "ce_ib": 51.54874801635742, + "ce_orig": 0.9161649942398071, + "epoch": 0.043425120425623694, + "kl_loss": 3167.26806640625, + "loss_ib": 31.724227905273438, + "step": 151 + }, + { + "ce_ib": 55.47027587890625, + "ce_orig": 2.167449712753296, + "epoch": 0.043425120425623694, + "kl_loss": 2913.7783203125, + "loss_ib": 29.193254470825195, + "step": 151 + }, + { + "ce_ib": 52.68526077270508, + "ce_orig": 0.6615663766860962, + "epoch": 0.043425120425623694, + "kl_loss": 2900.98974609375, + "loss_ib": 29.06258201599121, + "step": 151 + }, + { + "ce_ib": 52.60275650024414, + "ce_orig": 1.29401695728302, + "epoch": 0.043425120425623694, + "kl_loss": 2888.08935546875, + "loss_ib": 28.933494567871094, + "step": 151 + }, + { + "ce_ib": 52.2644157409668, + "ce_orig": 0.8575372695922852, + "epoch": 0.04371270400460134, + "kl_loss": 2958.155029296875, + "loss_ib": 29.63381576538086, + "step": 152 + }, + { + "ce_ib": 53.492984771728516, + "ce_orig": 1.9527488946914673, + "epoch": 0.04371270400460134, + "kl_loss": 2661.2138671875, + "loss_ib": 26.665632247924805, + "step": 152 + }, + { + "ce_ib": 51.27360153198242, + "ce_orig": 0.8329764604568481, + "epoch": 0.04371270400460134, + "kl_loss": 3091.322265625, + "loss_ib": 30.964496612548828, + "step": 152 + }, + { + "ce_ib": 50.547359466552734, + "ce_orig": 0.8471105694770813, + "epoch": 0.04371270400460134, + "kl_loss": 3160.12841796875, + "loss_ib": 31.651830673217773, + "step": 152 + }, + { + "ce_ib": 54.09409713745117, + "ce_orig": 1.3073227405548096, + "epoch": 0.044000287583578976, + "kl_loss": 2638.63916015625, + "loss_ib": 26.44048500061035, + "step": 153 + }, + { + "ce_ib": 51.90098571777344, + "ce_orig": 1.1256515979766846, + "epoch": 0.044000287583578976, + "kl_loss": 2839.703857421875, + "loss_ib": 28.44894027709961, + "step": 153 + }, + { + "ce_ib": 52.50508499145508, + "ce_orig": 1.0821564197540283, + "epoch": 0.044000287583578976, + "kl_loss": 2833.94873046875, + "loss_ib": 28.391990661621094, + "step": 153 + }, + { + "ce_ib": 50.86665344238281, + "ce_orig": 0.8515676259994507, + "epoch": 0.044000287583578976, + "kl_loss": 3101.07177734375, + "loss_ib": 31.06158447265625, + "step": 153 + }, + { + "ce_ib": 50.96135711669922, + "ce_orig": 0.7307798862457275, + "epoch": 0.04428787116255662, + "kl_loss": 2950.802490234375, + "loss_ib": 29.55898666381836, + "step": 154 + }, + { + "ce_ib": 48.63287353515625, + "ce_orig": 0.43459776043891907, + "epoch": 0.04428787116255662, + "kl_loss": 2904.9091796875, + "loss_ib": 29.09772300720215, + "step": 154 + }, + { + "ce_ib": 52.380332946777344, + "ce_orig": 1.771660566329956, + "epoch": 0.04428787116255662, + "kl_loss": 2897.521240234375, + "loss_ib": 29.027591705322266, + "step": 154 + }, + { + "ce_ib": 51.10393524169922, + "ce_orig": 0.7512515783309937, + "epoch": 0.04428787116255662, + "kl_loss": 2788.21533203125, + "loss_ib": 27.933256149291992, + "step": 154 + }, + { + "epoch": 0.04457545474153426, + "grad_norm": 448.2861022949219, + "learning_rate": 4.745222929936306e-06, + "loss": 30.0583, + "step": 155 + }, + { + "ce_ib": 49.585208892822266, + "ce_orig": 1.1462219953536987, + "epoch": 0.04457545474153426, + "kl_loss": 3172.34619140625, + "loss_ib": 31.773046493530273, + "step": 155 + }, + { + "ce_ib": 52.14706802368164, + "ce_orig": 1.3412156105041504, + "epoch": 0.04457545474153426, + "kl_loss": 2451.83447265625, + "loss_ib": 24.57048988342285, + "step": 155 + }, + { + "ce_ib": 50.7850341796875, + "ce_orig": 0.6238870620727539, + "epoch": 0.04457545474153426, + "kl_loss": 2603.59716796875, + "loss_ib": 26.08675765991211, + "step": 155 + }, + { + "ce_ib": 53.027767181396484, + "ce_orig": 1.0817673206329346, + "epoch": 0.04457545474153426, + "kl_loss": 2944.25390625, + "loss_ib": 29.49556541442871, + "step": 155 + }, + { + "ce_ib": 52.55374526977539, + "ce_orig": 0.7729134559631348, + "epoch": 0.044863038320511896, + "kl_loss": 2258.596435546875, + "loss_ib": 22.638517379760742, + "step": 156 + }, + { + "ce_ib": 50.43128967285156, + "ce_orig": 0.5304668545722961, + "epoch": 0.044863038320511896, + "kl_loss": 2332.14794921875, + "loss_ib": 23.371912002563477, + "step": 156 + }, + { + "ce_ib": 48.52753448486328, + "ce_orig": 0.8267412185668945, + "epoch": 0.044863038320511896, + "kl_loss": 2993.38134765625, + "loss_ib": 29.98233985900879, + "step": 156 + }, + { + "ce_ib": 47.48508834838867, + "ce_orig": 0.8616426587104797, + "epoch": 0.044863038320511896, + "kl_loss": 2922.7626953125, + "loss_ib": 29.27511215209961, + "step": 156 + }, + { + "ce_ib": 48.53927230834961, + "ce_orig": 0.7964724898338318, + "epoch": 0.04515062189948954, + "kl_loss": 2892.180419921875, + "loss_ib": 28.97034454345703, + "step": 157 + }, + { + "ce_ib": 48.53841018676758, + "ce_orig": 0.7484610080718994, + "epoch": 0.04515062189948954, + "kl_loss": 2939.5947265625, + "loss_ib": 29.44448471069336, + "step": 157 + }, + { + "ce_ib": 47.32265090942383, + "ce_orig": 0.5679759979248047, + "epoch": 0.04515062189948954, + "kl_loss": 3089.84228515625, + "loss_ib": 30.945743560791016, + "step": 157 + }, + { + "ce_ib": 50.060791015625, + "ce_orig": 0.9825291633605957, + "epoch": 0.04515062189948954, + "kl_loss": 2617.75341796875, + "loss_ib": 26.22759437561035, + "step": 157 + }, + { + "ce_ib": 54.85032272338867, + "ce_orig": 1.4325385093688965, + "epoch": 0.04543820547846718, + "kl_loss": 2687.5576171875, + "loss_ib": 26.93042755126953, + "step": 158 + }, + { + "ce_ib": 51.16709899902344, + "ce_orig": 1.104825735092163, + "epoch": 0.04543820547846718, + "kl_loss": 2728.729736328125, + "loss_ib": 27.338462829589844, + "step": 158 + }, + { + "ce_ib": 51.86174774169922, + "ce_orig": 1.2090163230895996, + "epoch": 0.04543820547846718, + "kl_loss": 2632.31298828125, + "loss_ib": 26.37499237060547, + "step": 158 + }, + { + "ce_ib": 50.25190734863281, + "ce_orig": 0.7247406244277954, + "epoch": 0.04543820547846718, + "kl_loss": 2597.2509765625, + "loss_ib": 26.02276039123535, + "step": 158 + }, + { + "ce_ib": 50.78385543823242, + "ce_orig": 0.8480434417724609, + "epoch": 0.04572578905744482, + "kl_loss": 2646.48876953125, + "loss_ib": 26.515670776367188, + "step": 159 + }, + { + "ce_ib": 52.23230743408203, + "ce_orig": 1.2253400087356567, + "epoch": 0.04572578905744482, + "kl_loss": 2775.04931640625, + "loss_ib": 27.802724838256836, + "step": 159 + }, + { + "ce_ib": 54.1110954284668, + "ce_orig": 1.6037498712539673, + "epoch": 0.04572578905744482, + "kl_loss": 2728.93798828125, + "loss_ib": 27.343490600585938, + "step": 159 + }, + { + "ce_ib": 49.18448257446289, + "ce_orig": 0.9601776599884033, + "epoch": 0.04572578905744482, + "kl_loss": 2112.19775390625, + "loss_ib": 21.171161651611328, + "step": 159 + }, + { + "epoch": 0.04601337263642246, + "grad_norm": 445.6031799316406, + "learning_rate": 4.904458598726115e-06, + "loss": 28.9221, + "step": 160 + }, + { + "ce_ib": 50.62841033935547, + "ce_orig": 0.9100584387779236, + "epoch": 0.04601337263642246, + "kl_loss": 2742.574462890625, + "loss_ib": 27.47637176513672, + "step": 160 + }, + { + "ce_ib": 51.763214111328125, + "ce_orig": 1.2738044261932373, + "epoch": 0.04601337263642246, + "kl_loss": 2890.77099609375, + "loss_ib": 28.95947265625, + "step": 160 + }, + { + "ce_ib": 48.35606384277344, + "ce_orig": 0.8663270473480225, + "epoch": 0.04601337263642246, + "kl_loss": 2612.95361328125, + "loss_ib": 26.177892684936523, + "step": 160 + }, + { + "ce_ib": 48.03151321411133, + "ce_orig": 1.0263557434082031, + "epoch": 0.04601337263642246, + "kl_loss": 3139.1376953125, + "loss_ib": 31.439409255981445, + "step": 160 + }, + { + "ce_ib": 48.87652587890625, + "ce_orig": 0.6928001046180725, + "epoch": 0.0463009562154001, + "kl_loss": 2784.03466796875, + "loss_ib": 27.889223098754883, + "step": 161 + }, + { + "ce_ib": 48.439903259277344, + "ce_orig": 1.0835696458816528, + "epoch": 0.0463009562154001, + "kl_loss": 3102.0654296875, + "loss_ib": 31.069093704223633, + "step": 161 + }, + { + "ce_ib": 50.39353561401367, + "ce_orig": 0.7698415517807007, + "epoch": 0.0463009562154001, + "kl_loss": 1483.0360107421875, + "loss_ib": 14.880752563476562, + "step": 161 + }, + { + "ce_ib": 51.16973876953125, + "ce_orig": 1.4212281703948975, + "epoch": 0.0463009562154001, + "kl_loss": 2305.23681640625, + "loss_ib": 23.10353660583496, + "step": 161 + }, + { + "ce_ib": 49.88762283325195, + "ce_orig": 0.6684384346008301, + "epoch": 0.04658853979437774, + "kl_loss": 2829.216796875, + "loss_ib": 28.34205436706543, + "step": 162 + }, + { + "ce_ib": 47.18947219848633, + "ce_orig": 1.0846861600875854, + "epoch": 0.04658853979437774, + "kl_loss": 2960.41796875, + "loss_ib": 29.651369094848633, + "step": 162 + }, + { + "ce_ib": 48.81897735595703, + "ce_orig": 0.5415892004966736, + "epoch": 0.04658853979437774, + "kl_loss": 2178.2265625, + "loss_ib": 21.831083297729492, + "step": 162 + }, + { + "ce_ib": 48.079383850097656, + "ce_orig": 0.9551630020141602, + "epoch": 0.04658853979437774, + "kl_loss": 3026.81884765625, + "loss_ib": 30.316267013549805, + "step": 162 + }, + { + "ce_ib": 48.57440185546875, + "ce_orig": 0.8759099245071411, + "epoch": 0.04687612337335538, + "kl_loss": 2439.7685546875, + "loss_ib": 24.446258544921875, + "step": 163 + }, + { + "ce_ib": 49.86715316772461, + "ce_orig": 1.0347627401351929, + "epoch": 0.04687612337335538, + "kl_loss": 2655.91357421875, + "loss_ib": 26.6090030670166, + "step": 163 + }, + { + "ce_ib": 49.29155731201172, + "ce_orig": 0.8745110034942627, + "epoch": 0.04687612337335538, + "kl_loss": 2898.206298828125, + "loss_ib": 29.031354904174805, + "step": 163 + }, + { + "ce_ib": 50.21908950805664, + "ce_orig": 1.458777904510498, + "epoch": 0.04687612337335538, + "kl_loss": 2556.103515625, + "loss_ib": 25.61125373840332, + "step": 163 + }, + { + "ce_ib": 48.91142654418945, + "ce_orig": 0.6749159693717957, + "epoch": 0.047163706952333025, + "kl_loss": 2708.091796875, + "loss_ib": 27.12982940673828, + "step": 164 + }, + { + "ce_ib": 50.982364654541016, + "ce_orig": 0.6340957283973694, + "epoch": 0.047163706952333025, + "kl_loss": 2460.154296875, + "loss_ib": 24.652523040771484, + "step": 164 + }, + { + "ce_ib": 51.6622428894043, + "ce_orig": 0.8664228320121765, + "epoch": 0.047163706952333025, + "kl_loss": 2626.44091796875, + "loss_ib": 26.316070556640625, + "step": 164 + }, + { + "ce_ib": 48.696929931640625, + "ce_orig": 0.46712541580200195, + "epoch": 0.047163706952333025, + "kl_loss": 2673.508544921875, + "loss_ib": 26.783781051635742, + "step": 164 + }, + { + "epoch": 0.04745129053131066, + "grad_norm": 424.326904296875, + "learning_rate": 5.063694267515924e-06, + "loss": 28.6858, + "step": 165 + }, + { + "ce_ib": 47.32035827636719, + "ce_orig": 0.7065096497535706, + "epoch": 0.04745129053131066, + "kl_loss": 2612.828857421875, + "loss_ib": 26.175607681274414, + "step": 165 + }, + { + "ce_ib": 53.695343017578125, + "ce_orig": 0.9257593154907227, + "epoch": 0.04745129053131066, + "kl_loss": 2765.24658203125, + "loss_ib": 27.706161499023438, + "step": 165 + }, + { + "ce_ib": 46.061485290527344, + "ce_orig": 0.6095507740974426, + "epoch": 0.04745129053131066, + "kl_loss": 2924.33349609375, + "loss_ib": 29.289396286010742, + "step": 165 + }, + { + "ce_ib": 49.95652770996094, + "ce_orig": 0.39805570244789124, + "epoch": 0.04745129053131066, + "kl_loss": 2691.94287109375, + "loss_ib": 26.969383239746094, + "step": 165 + }, + { + "ce_ib": 48.33354187011719, + "ce_orig": 1.066157579421997, + "epoch": 0.0477388741102883, + "kl_loss": 2646.9306640625, + "loss_ib": 26.51763916015625, + "step": 166 + }, + { + "ce_ib": 44.15623474121094, + "ce_orig": 0.12060567736625671, + "epoch": 0.0477388741102883, + "kl_loss": 1492.6593017578125, + "loss_ib": 14.970748901367188, + "step": 166 + }, + { + "ce_ib": 49.3094367980957, + "ce_orig": 1.4275071620941162, + "epoch": 0.0477388741102883, + "kl_loss": 2720.3349609375, + "loss_ib": 27.25265884399414, + "step": 166 + }, + { + "ce_ib": 48.710384368896484, + "ce_orig": 0.6945171356201172, + "epoch": 0.0477388741102883, + "kl_loss": 2963.2177734375, + "loss_ib": 29.68088722229004, + "step": 166 + }, + { + "ce_ib": 50.00297927856445, + "ce_orig": 1.062921404838562, + "epoch": 0.048026457689265944, + "kl_loss": 2493.588134765625, + "loss_ib": 24.985883712768555, + "step": 167 + }, + { + "ce_ib": 48.40403366088867, + "ce_orig": 0.8593358397483826, + "epoch": 0.048026457689265944, + "kl_loss": 2742.294921875, + "loss_ib": 27.471351623535156, + "step": 167 + }, + { + "ce_ib": 46.95838165283203, + "ce_orig": 0.5543254017829895, + "epoch": 0.048026457689265944, + "kl_loss": 2647.65576171875, + "loss_ib": 26.523515701293945, + "step": 167 + }, + { + "ce_ib": 48.49465560913086, + "ce_orig": 0.9735248684883118, + "epoch": 0.048026457689265944, + "kl_loss": 2853.06201171875, + "loss_ib": 28.579113006591797, + "step": 167 + }, + { + "ce_ib": 47.27997589111328, + "ce_orig": 0.986024022102356, + "epoch": 0.04831404126824358, + "kl_loss": 2900.328369140625, + "loss_ib": 29.05056381225586, + "step": 168 + }, + { + "ce_ib": 47.31760025024414, + "ce_orig": 0.7703031897544861, + "epoch": 0.04831404126824358, + "kl_loss": 2637.726318359375, + "loss_ib": 26.424579620361328, + "step": 168 + }, + { + "ce_ib": 48.37574768066406, + "ce_orig": 0.9344309568405151, + "epoch": 0.04831404126824358, + "kl_loss": 2614.62939453125, + "loss_ib": 26.194669723510742, + "step": 168 + }, + { + "ce_ib": 46.49268341064453, + "ce_orig": 0.7439426183700562, + "epoch": 0.04831404126824358, + "kl_loss": 2807.43408203125, + "loss_ib": 28.120832443237305, + "step": 168 + }, + { + "ce_ib": 48.985836029052734, + "ce_orig": 1.2297948598861694, + "epoch": 0.04860162484722123, + "kl_loss": 2628.673828125, + "loss_ib": 26.335723876953125, + "step": 169 + }, + { + "ce_ib": 47.281558990478516, + "ce_orig": 1.0321601629257202, + "epoch": 0.04860162484722123, + "kl_loss": 2619.60400390625, + "loss_ib": 26.243322372436523, + "step": 169 + }, + { + "ce_ib": 47.78618621826172, + "ce_orig": 1.0097578763961792, + "epoch": 0.04860162484722123, + "kl_loss": 2793.48046875, + "loss_ib": 27.982589721679688, + "step": 169 + }, + { + "ce_ib": 47.44657897949219, + "ce_orig": 1.0025876760482788, + "epoch": 0.04860162484722123, + "kl_loss": 2977.064453125, + "loss_ib": 29.818090438842773, + "step": 169 + }, + { + "epoch": 0.048889208426198864, + "grad_norm": 426.09820556640625, + "learning_rate": 5.222929936305733e-06, + "loss": 27.4107, + "step": 170 + }, + { + "ce_ib": 48.28227996826172, + "ce_orig": 1.2277311086654663, + "epoch": 0.048889208426198864, + "kl_loss": 2577.318359375, + "loss_ib": 25.82146453857422, + "step": 170 + }, + { + "ce_ib": 48.456058502197266, + "ce_orig": 1.0684230327606201, + "epoch": 0.048889208426198864, + "kl_loss": 2181.768798828125, + "loss_ib": 21.86614418029785, + "step": 170 + }, + { + "ce_ib": 46.85710906982422, + "ce_orig": 0.6369960308074951, + "epoch": 0.048889208426198864, + "kl_loss": 2463.142578125, + "loss_ib": 24.67828369140625, + "step": 170 + }, + { + "ce_ib": 50.36212921142578, + "ce_orig": 1.4817943572998047, + "epoch": 0.048889208426198864, + "kl_loss": 2810.15185546875, + "loss_ib": 28.151878356933594, + "step": 170 + }, + { + "ce_ib": 52.663734436035156, + "ce_orig": 1.3452659845352173, + "epoch": 0.0491767920051765, + "kl_loss": 2626.7080078125, + "loss_ib": 26.31974220275879, + "step": 171 + }, + { + "ce_ib": 44.656044006347656, + "ce_orig": 0.6703915596008301, + "epoch": 0.0491767920051765, + "kl_loss": 2806.165771484375, + "loss_ib": 28.106313705444336, + "step": 171 + }, + { + "ce_ib": 49.89923858642578, + "ce_orig": 0.8543052673339844, + "epoch": 0.0491767920051765, + "kl_loss": 2713.99462890625, + "loss_ib": 27.18984603881836, + "step": 171 + }, + { + "ce_ib": 43.8620719909668, + "ce_orig": 0.943006157875061, + "epoch": 0.0491767920051765, + "kl_loss": 2850.892578125, + "loss_ib": 28.55278778076172, + "step": 171 + }, + { + "ce_ib": 49.70503234863281, + "ce_orig": 1.0421463251113892, + "epoch": 0.04946437558415415, + "kl_loss": 2539.814453125, + "loss_ib": 25.44784927368164, + "step": 172 + }, + { + "ce_ib": 46.116146087646484, + "ce_orig": 0.8185178637504578, + "epoch": 0.04946437558415415, + "kl_loss": 2844.80224609375, + "loss_ib": 28.494136810302734, + "step": 172 + }, + { + "ce_ib": 48.9494514465332, + "ce_orig": 1.0492463111877441, + "epoch": 0.04946437558415415, + "kl_loss": 2648.58056640625, + "loss_ib": 26.53475570678711, + "step": 172 + }, + { + "ce_ib": 45.931827545166016, + "ce_orig": 0.6465027332305908, + "epoch": 0.04946437558415415, + "kl_loss": 2837.117919921875, + "loss_ib": 28.417110443115234, + "step": 172 + }, + { + "ce_ib": 46.874114990234375, + "ce_orig": 0.8541361093521118, + "epoch": 0.049751959163131784, + "kl_loss": 2705.7646484375, + "loss_ib": 27.104520797729492, + "step": 173 + }, + { + "ce_ib": 49.18909454345703, + "ce_orig": 0.8849540948867798, + "epoch": 0.049751959163131784, + "kl_loss": 2525.669921875, + "loss_ib": 25.30588722229004, + "step": 173 + }, + { + "ce_ib": 49.24184036254883, + "ce_orig": 1.275468111038208, + "epoch": 0.049751959163131784, + "kl_loss": 2448.7724609375, + "loss_ib": 24.536964416503906, + "step": 173 + }, + { + "ce_ib": 48.2338981628418, + "ce_orig": 1.5680046081542969, + "epoch": 0.049751959163131784, + "kl_loss": 2418.929443359375, + "loss_ib": 24.23752784729004, + "step": 173 + }, + { + "ce_ib": 46.14396667480469, + "ce_orig": 0.541758120059967, + "epoch": 0.05003954274210943, + "kl_loss": 1417.190185546875, + "loss_ib": 14.218045234680176, + "step": 174 + }, + { + "ce_ib": 45.08213806152344, + "ce_orig": 1.0284433364868164, + "epoch": 0.05003954274210943, + "kl_loss": 2671.77490234375, + "loss_ib": 26.76283073425293, + "step": 174 + }, + { + "ce_ib": 47.65272903442383, + "ce_orig": 0.7605993151664734, + "epoch": 0.05003954274210943, + "kl_loss": 2763.138916015625, + "loss_ib": 27.67904281616211, + "step": 174 + }, + { + "ce_ib": 45.709381103515625, + "ce_orig": 0.6788672208786011, + "epoch": 0.05003954274210943, + "kl_loss": 2476.36376953125, + "loss_ib": 24.80934715270996, + "step": 174 + }, + { + "epoch": 0.050327126321087066, + "grad_norm": 413.482666015625, + "learning_rate": 5.3821656050955415e-06, + "loss": 27.0741, + "step": 175 + }, + { + "ce_ib": 50.374427795410156, + "ce_orig": 1.4046454429626465, + "epoch": 0.050327126321087066, + "kl_loss": 2682.432373046875, + "loss_ib": 26.874696731567383, + "step": 175 + }, + { + "ce_ib": 45.06935119628906, + "ce_orig": 0.6166799068450928, + "epoch": 0.050327126321087066, + "kl_loss": 2835.7060546875, + "loss_ib": 28.402128219604492, + "step": 175 + }, + { + "ce_ib": 48.812774658203125, + "ce_orig": 0.8136134147644043, + "epoch": 0.050327126321087066, + "kl_loss": 2307.1552734375, + "loss_ib": 23.120365142822266, + "step": 175 + }, + { + "ce_ib": 46.97080612182617, + "ce_orig": 0.8589736819267273, + "epoch": 0.050327126321087066, + "kl_loss": 2857.4111328125, + "loss_ib": 28.62108039855957, + "step": 175 + }, + { + "ce_ib": 47.42732238769531, + "ce_orig": 0.9090732336044312, + "epoch": 0.050614709900064704, + "kl_loss": 2458.442138671875, + "loss_ib": 24.631847381591797, + "step": 176 + }, + { + "ce_ib": 48.879520416259766, + "ce_orig": 1.1093182563781738, + "epoch": 0.050614709900064704, + "kl_loss": 2716.2275390625, + "loss_ib": 27.21115493774414, + "step": 176 + }, + { + "ce_ib": 45.72584915161133, + "ce_orig": 0.7976894378662109, + "epoch": 0.050614709900064704, + "kl_loss": 2734.431640625, + "loss_ib": 27.39004135131836, + "step": 176 + }, + { + "ce_ib": 48.863277435302734, + "ce_orig": 1.1800131797790527, + "epoch": 0.050614709900064704, + "kl_loss": 2549.98583984375, + "loss_ib": 25.54871940612793, + "step": 176 + }, + { + "ce_ib": 49.88660430908203, + "ce_orig": 1.3869460821151733, + "epoch": 0.05090229347904235, + "kl_loss": 2348.32080078125, + "loss_ib": 23.53309440612793, + "step": 177 + }, + { + "ce_ib": 46.74383544921875, + "ce_orig": 0.861750602722168, + "epoch": 0.05090229347904235, + "kl_loss": 2821.16943359375, + "loss_ib": 28.25843620300293, + "step": 177 + }, + { + "ce_ib": 46.5212516784668, + "ce_orig": 0.9110401272773743, + "epoch": 0.05090229347904235, + "kl_loss": 1920.51806640625, + "loss_ib": 19.25170135498047, + "step": 177 + }, + { + "ce_ib": 45.50922775268555, + "ce_orig": 0.762277364730835, + "epoch": 0.05090229347904235, + "kl_loss": 2755.8955078125, + "loss_ib": 27.60446548461914, + "step": 177 + }, + { + "ce_ib": 45.10780715942383, + "ce_orig": 0.9310659170150757, + "epoch": 0.051189877058019986, + "kl_loss": 2471.52685546875, + "loss_ib": 24.7603759765625, + "step": 178 + }, + { + "ce_ib": 46.319915771484375, + "ce_orig": 0.8169469237327576, + "epoch": 0.051189877058019986, + "kl_loss": 2594.20947265625, + "loss_ib": 25.988414764404297, + "step": 178 + }, + { + "ce_ib": 49.259620666503906, + "ce_orig": 1.4122384786605835, + "epoch": 0.051189877058019986, + "kl_loss": 2652.73583984375, + "loss_ib": 26.576618194580078, + "step": 178 + }, + { + "ce_ib": 47.23049545288086, + "ce_orig": 1.2408967018127441, + "epoch": 0.051189877058019986, + "kl_loss": 2727.913818359375, + "loss_ib": 27.32636833190918, + "step": 178 + }, + { + "ce_ib": 48.52485656738281, + "ce_orig": 0.561891496181488, + "epoch": 0.051477460636997624, + "kl_loss": 2130.169921875, + "loss_ib": 21.350223541259766, + "step": 179 + }, + { + "ce_ib": 51.518375396728516, + "ce_orig": 1.9220662117004395, + "epoch": 0.051477460636997624, + "kl_loss": 2634.577392578125, + "loss_ib": 26.39729118347168, + "step": 179 + }, + { + "ce_ib": 45.864845275878906, + "ce_orig": 0.9671883583068848, + "epoch": 0.051477460636997624, + "kl_loss": 2693.470458984375, + "loss_ib": 26.980567932128906, + "step": 179 + }, + { + "ce_ib": 45.78055191040039, + "ce_orig": 0.9218305349349976, + "epoch": 0.051477460636997624, + "kl_loss": 2529.023681640625, + "loss_ib": 25.336017608642578, + "step": 179 + }, + { + "epoch": 0.05176504421597527, + "grad_norm": 409.18316650390625, + "learning_rate": 5.541401273885351e-06, + "loss": 26.2136, + "step": 180 + }, + { + "ce_ib": 47.819671630859375, + "ce_orig": 1.0385197401046753, + "epoch": 0.05176504421597527, + "kl_loss": 2495.19580078125, + "loss_ib": 24.99977684020996, + "step": 180 + }, + { + "ce_ib": 48.391448974609375, + "ce_orig": 1.5398671627044678, + "epoch": 0.05176504421597527, + "kl_loss": 2459.814453125, + "loss_ib": 24.646535873413086, + "step": 180 + }, + { + "ce_ib": 45.70133590698242, + "ce_orig": 1.4346660375595093, + "epoch": 0.05176504421597527, + "kl_loss": 2470.22998046875, + "loss_ib": 24.74799919128418, + "step": 180 + }, + { + "ce_ib": 45.42562484741211, + "ce_orig": 1.027616262435913, + "epoch": 0.05176504421597527, + "kl_loss": 2735.41064453125, + "loss_ib": 27.3995304107666, + "step": 180 + }, + { + "ce_ib": 46.22211456298828, + "ce_orig": 1.2898683547973633, + "epoch": 0.052052627794952906, + "kl_loss": 2344.324462890625, + "loss_ib": 23.48946762084961, + "step": 181 + }, + { + "ce_ib": 45.933162689208984, + "ce_orig": 0.3807089924812317, + "epoch": 0.052052627794952906, + "kl_loss": 2405.65283203125, + "loss_ib": 24.102460861206055, + "step": 181 + }, + { + "ce_ib": 48.467613220214844, + "ce_orig": 1.0768738985061646, + "epoch": 0.052052627794952906, + "kl_loss": 2349.7158203125, + "loss_ib": 23.545623779296875, + "step": 181 + }, + { + "ce_ib": 43.66925811767578, + "ce_orig": 0.540174126625061, + "epoch": 0.052052627794952906, + "kl_loss": 2476.49609375, + "loss_ib": 24.808629989624023, + "step": 181 + }, + { + "ce_ib": 50.432796478271484, + "ce_orig": 1.4778696298599243, + "epoch": 0.05234021137393055, + "kl_loss": 2345.696533203125, + "loss_ib": 23.507396697998047, + "step": 182 + }, + { + "ce_ib": 44.97416687011719, + "ce_orig": 0.8222552537918091, + "epoch": 0.05234021137393055, + "kl_loss": 2430.04833984375, + "loss_ib": 24.345455169677734, + "step": 182 + }, + { + "ce_ib": 48.673431396484375, + "ce_orig": 1.0176633596420288, + "epoch": 0.05234021137393055, + "kl_loss": 2423.87109375, + "loss_ib": 24.287384033203125, + "step": 182 + }, + { + "ce_ib": 44.51708221435547, + "ce_orig": 0.6682273745536804, + "epoch": 0.05234021137393055, + "kl_loss": 2574.65380859375, + "loss_ib": 25.791053771972656, + "step": 182 + }, + { + "ce_ib": 46.51145935058594, + "ce_orig": 1.098725438117981, + "epoch": 0.05262779495290819, + "kl_loss": 2416.0830078125, + "loss_ib": 24.207340240478516, + "step": 183 + }, + { + "ce_ib": 48.851741790771484, + "ce_orig": 1.3513818979263306, + "epoch": 0.05262779495290819, + "kl_loss": 2428.113037109375, + "loss_ib": 24.32998275756836, + "step": 183 + }, + { + "ce_ib": 44.624210357666016, + "ce_orig": 0.6521418690681458, + "epoch": 0.05262779495290819, + "kl_loss": 2469.832275390625, + "loss_ib": 24.74294662475586, + "step": 183 + }, + { + "ce_ib": 48.94157791137695, + "ce_orig": 1.2012220621109009, + "epoch": 0.05262779495290819, + "kl_loss": 2484.021484375, + "loss_ib": 24.8891544342041, + "step": 183 + }, + { + "ce_ib": 47.77019500732422, + "ce_orig": 0.8514222502708435, + "epoch": 0.052915378531885826, + "kl_loss": 2497.58349609375, + "loss_ib": 25.023605346679688, + "step": 184 + }, + { + "ce_ib": 46.87969970703125, + "ce_orig": 1.339136004447937, + "epoch": 0.052915378531885826, + "kl_loss": 2096.02587890625, + "loss_ib": 21.007137298583984, + "step": 184 + }, + { + "ce_ib": 48.20975875854492, + "ce_orig": 1.381858229637146, + "epoch": 0.052915378531885826, + "kl_loss": 2225.2705078125, + "loss_ib": 22.300914764404297, + "step": 184 + }, + { + "ce_ib": 48.024993896484375, + "ce_orig": 0.6926367282867432, + "epoch": 0.052915378531885826, + "kl_loss": 2289.90087890625, + "loss_ib": 22.947032928466797, + "step": 184 + }, + { + "epoch": 0.05320296211086347, + "grad_norm": 394.3677062988281, + "learning_rate": 5.7006369426751594e-06, + "loss": 25.7313, + "step": 185 + }, + { + "ce_ib": 46.50983428955078, + "ce_orig": 1.1019322872161865, + "epoch": 0.05320296211086347, + "kl_loss": 2536.72998046875, + "loss_ib": 25.413808822631836, + "step": 185 + }, + { + "ce_ib": 44.686187744140625, + "ce_orig": 0.7587331533432007, + "epoch": 0.05320296211086347, + "kl_loss": 2358.890380859375, + "loss_ib": 23.633588790893555, + "step": 185 + }, + { + "ce_ib": 44.013580322265625, + "ce_orig": 0.7084860801696777, + "epoch": 0.05320296211086347, + "kl_loss": 2679.59033203125, + "loss_ib": 26.839916229248047, + "step": 185 + }, + { + "ce_ib": 43.398311614990234, + "ce_orig": 0.9784666299819946, + "epoch": 0.05320296211086347, + "kl_loss": 2370.73291015625, + "loss_ib": 23.7507266998291, + "step": 185 + }, + { + "ce_ib": 44.2143669128418, + "ce_orig": 0.786540687084198, + "epoch": 0.05349054568984111, + "kl_loss": 2228.3095703125, + "loss_ib": 22.327308654785156, + "step": 186 + }, + { + "ce_ib": 46.3162727355957, + "ce_orig": 1.3913073539733887, + "epoch": 0.05349054568984111, + "kl_loss": 2373.01953125, + "loss_ib": 23.77651023864746, + "step": 186 + }, + { + "ce_ib": 46.72264862060547, + "ce_orig": 1.0480103492736816, + "epoch": 0.05349054568984111, + "kl_loss": 2314.23193359375, + "loss_ib": 23.189043045043945, + "step": 186 + }, + { + "ce_ib": 48.647151947021484, + "ce_orig": 1.5039794445037842, + "epoch": 0.05349054568984111, + "kl_loss": 1132.71826171875, + "loss_ib": 11.375829696655273, + "step": 186 + }, + { + "ce_ib": 43.383331298828125, + "ce_orig": 0.5294433832168579, + "epoch": 0.05377812926881875, + "kl_loss": 1875.989990234375, + "loss_ib": 18.80328369140625, + "step": 187 + }, + { + "ce_ib": 46.24761962890625, + "ce_orig": 0.8533762693405151, + "epoch": 0.05377812926881875, + "kl_loss": 2202.668701171875, + "loss_ib": 22.072933197021484, + "step": 187 + }, + { + "ce_ib": 43.863502502441406, + "ce_orig": 1.4904100894927979, + "epoch": 0.05377812926881875, + "kl_loss": 2434.986083984375, + "loss_ib": 24.393722534179688, + "step": 187 + }, + { + "ce_ib": 44.60874557495117, + "ce_orig": 0.33239492774009705, + "epoch": 0.05377812926881875, + "kl_loss": 2029.0833740234375, + "loss_ib": 20.33544158935547, + "step": 187 + }, + { + "ce_ib": 43.16314697265625, + "ce_orig": 0.7957232594490051, + "epoch": 0.05406571284779639, + "kl_loss": 2502.121826171875, + "loss_ib": 25.064382553100586, + "step": 188 + }, + { + "ce_ib": 42.3214225769043, + "ce_orig": 0.8192757964134216, + "epoch": 0.05406571284779639, + "kl_loss": 2299.024169921875, + "loss_ib": 23.032562255859375, + "step": 188 + }, + { + "ce_ib": 45.48292541503906, + "ce_orig": 0.9447529911994934, + "epoch": 0.05406571284779639, + "kl_loss": 2239.673828125, + "loss_ib": 22.44222068786621, + "step": 188 + }, + { + "ce_ib": 43.36006164550781, + "ce_orig": 0.7006217837333679, + "epoch": 0.05406571284779639, + "kl_loss": 2508.6435546875, + "loss_ib": 25.12979507446289, + "step": 188 + }, + { + "ce_ib": 42.9240837097168, + "ce_orig": 0.9430150985717773, + "epoch": 0.05435329642677403, + "kl_loss": 2421.86962890625, + "loss_ib": 24.261621475219727, + "step": 189 + }, + { + "ce_ib": 44.628814697265625, + "ce_orig": 1.0881403684616089, + "epoch": 0.05435329642677403, + "kl_loss": 2387.1103515625, + "loss_ib": 23.91573143005371, + "step": 189 + }, + { + "ce_ib": 43.131500244140625, + "ce_orig": 0.6736454963684082, + "epoch": 0.05435329642677403, + "kl_loss": 2487.064453125, + "loss_ib": 24.913776397705078, + "step": 189 + }, + { + "ce_ib": 44.661094665527344, + "ce_orig": 0.9864615797996521, + "epoch": 0.05435329642677403, + "kl_loss": 2235.8447265625, + "loss_ib": 22.403106689453125, + "step": 189 + }, + { + "epoch": 0.05464088000575167, + "grad_norm": 399.3453369140625, + "learning_rate": 5.859872611464969e-06, + "loss": 24.4398, + "step": 190 + }, + { + "ce_ib": 45.041744232177734, + "ce_orig": 0.8773884177207947, + "epoch": 0.05464088000575167, + "kl_loss": 2267.437744140625, + "loss_ib": 22.719417572021484, + "step": 190 + }, + { + "ce_ib": 46.31273651123047, + "ce_orig": 1.0835819244384766, + "epoch": 0.05464088000575167, + "kl_loss": 2374.10107421875, + "loss_ib": 23.787322998046875, + "step": 190 + }, + { + "ce_ib": 42.20440673828125, + "ce_orig": 0.9557557106018066, + "epoch": 0.05464088000575167, + "kl_loss": 2271.1640625, + "loss_ib": 22.75384521484375, + "step": 190 + }, + { + "ce_ib": 45.23324203491211, + "ce_orig": 0.8508480787277222, + "epoch": 0.05464088000575167, + "kl_loss": 2288.09375, + "loss_ib": 22.926172256469727, + "step": 190 + }, + { + "ce_ib": 44.05533218383789, + "ce_orig": 1.154534101486206, + "epoch": 0.05492846358472931, + "kl_loss": 2600.14404296875, + "loss_ib": 26.045494079589844, + "step": 191 + }, + { + "ce_ib": 42.00983810424805, + "ce_orig": 0.7044571042060852, + "epoch": 0.05492846358472931, + "kl_loss": 2365.684326171875, + "loss_ib": 23.6988525390625, + "step": 191 + }, + { + "ce_ib": 45.84080123901367, + "ce_orig": 0.8432292938232422, + "epoch": 0.05492846358472931, + "kl_loss": 2435.255859375, + "loss_ib": 24.39839744567871, + "step": 191 + }, + { + "ce_ib": 41.58427047729492, + "ce_orig": 0.5829588770866394, + "epoch": 0.05492846358472931, + "kl_loss": 2457.0322265625, + "loss_ib": 24.611906051635742, + "step": 191 + }, + { + "ce_ib": 42.24211120605469, + "ce_orig": 0.9801141023635864, + "epoch": 0.055216047163706955, + "kl_loss": 2417.925537109375, + "loss_ib": 24.22149658203125, + "step": 192 + }, + { + "ce_ib": 45.56145095825195, + "ce_orig": 1.132083535194397, + "epoch": 0.055216047163706955, + "kl_loss": 2193.166259765625, + "loss_ib": 21.977224349975586, + "step": 192 + }, + { + "ce_ib": 46.364322662353516, + "ce_orig": 1.4373035430908203, + "epoch": 0.055216047163706955, + "kl_loss": 2148.3759765625, + "loss_ib": 21.53012466430664, + "step": 192 + }, + { + "ce_ib": 43.91224670410156, + "ce_orig": 1.0104138851165771, + "epoch": 0.055216047163706955, + "kl_loss": 2297.23193359375, + "loss_ib": 23.0162296295166, + "step": 192 + }, + { + "ce_ib": 43.70963668823242, + "ce_orig": 1.4233311414718628, + "epoch": 0.05550363074268459, + "kl_loss": 2309.242919921875, + "loss_ib": 23.136137008666992, + "step": 193 + }, + { + "ce_ib": 42.54071807861328, + "ce_orig": 1.2717257738113403, + "epoch": 0.05550363074268459, + "kl_loss": 2467.593994140625, + "loss_ib": 24.718481063842773, + "step": 193 + }, + { + "ce_ib": 44.76433181762695, + "ce_orig": 0.4072558581829071, + "epoch": 0.05550363074268459, + "kl_loss": 2135.32177734375, + "loss_ib": 21.397979736328125, + "step": 193 + }, + { + "ce_ib": 43.77593231201172, + "ce_orig": 0.9473220705986023, + "epoch": 0.05550363074268459, + "kl_loss": 2351.19873046875, + "loss_ib": 23.555763244628906, + "step": 193 + }, + { + "ce_ib": 41.941593170166016, + "ce_orig": 0.8737780451774597, + "epoch": 0.05579121432166223, + "kl_loss": 2363.43896484375, + "loss_ib": 23.67633056640625, + "step": 194 + }, + { + "ce_ib": 45.53238296508789, + "ce_orig": 1.0324821472167969, + "epoch": 0.05579121432166223, + "kl_loss": 1954.951416015625, + "loss_ib": 19.59504508972168, + "step": 194 + }, + { + "ce_ib": 43.686954498291016, + "ce_orig": 1.3375800848007202, + "epoch": 0.05579121432166223, + "kl_loss": 2365.90576171875, + "loss_ib": 23.702743530273438, + "step": 194 + }, + { + "ce_ib": 43.68901443481445, + "ce_orig": 0.5833651423454285, + "epoch": 0.05579121432166223, + "kl_loss": 2179.84130859375, + "loss_ib": 21.84210205078125, + "step": 194 + }, + { + "epoch": 0.056078797900639875, + "grad_norm": 384.2991943359375, + "learning_rate": 6.019108280254777e-06, + "loss": 23.4525, + "step": 195 + }, + { + "ce_ib": 43.28430938720703, + "ce_orig": 0.8238533139228821, + "epoch": 0.056078797900639875, + "kl_loss": 2549.11474609375, + "loss_ib": 25.53443145751953, + "step": 195 + }, + { + "ce_ib": 43.542911529541016, + "ce_orig": 0.9835025072097778, + "epoch": 0.056078797900639875, + "kl_loss": 2104.084228515625, + "loss_ib": 21.08438491821289, + "step": 195 + }, + { + "ce_ib": 44.020999908447266, + "ce_orig": 0.9901052713394165, + "epoch": 0.056078797900639875, + "kl_loss": 1952.4921875, + "loss_ib": 19.56894302368164, + "step": 195 + }, + { + "ce_ib": 45.76945877075195, + "ce_orig": 0.8322806358337402, + "epoch": 0.056078797900639875, + "kl_loss": 2153.4716796875, + "loss_ib": 21.580486297607422, + "step": 195 + }, + { + "ce_ib": 43.12766647338867, + "ce_orig": 0.9949517846107483, + "epoch": 0.05636638147961751, + "kl_loss": 2381.666259765625, + "loss_ib": 23.85978889465332, + "step": 196 + }, + { + "ce_ib": 42.28154754638672, + "ce_orig": 0.9624870419502258, + "epoch": 0.05636638147961751, + "kl_loss": 2345.0966796875, + "loss_ib": 23.493249893188477, + "step": 196 + }, + { + "ce_ib": 44.944583892822266, + "ce_orig": 0.47114697098731995, + "epoch": 0.05636638147961751, + "kl_loss": 1733.882080078125, + "loss_ib": 17.383766174316406, + "step": 196 + }, + { + "ce_ib": 42.78217697143555, + "ce_orig": 0.7587113976478577, + "epoch": 0.05636638147961751, + "kl_loss": 2418.23876953125, + "loss_ib": 24.225170135498047, + "step": 196 + }, + { + "ce_ib": 40.34938049316406, + "ce_orig": 0.5144612789154053, + "epoch": 0.05665396505859516, + "kl_loss": 2105.162109375, + "loss_ib": 21.091970443725586, + "step": 197 + }, + { + "ce_ib": 43.35491943359375, + "ce_orig": 0.6633918285369873, + "epoch": 0.05665396505859516, + "kl_loss": 2375.418212890625, + "loss_ib": 23.797536849975586, + "step": 197 + }, + { + "ce_ib": 45.42133331298828, + "ce_orig": 1.425979495048523, + "epoch": 0.05665396505859516, + "kl_loss": 1852.203857421875, + "loss_ib": 18.567459106445312, + "step": 197 + }, + { + "ce_ib": 42.6270751953125, + "ce_orig": 1.091071367263794, + "epoch": 0.05665396505859516, + "kl_loss": 2149.162353515625, + "loss_ib": 21.534250259399414, + "step": 197 + }, + { + "ce_ib": 48.074180603027344, + "ce_orig": 1.6172330379486084, + "epoch": 0.056941548637572795, + "kl_loss": 2109.87890625, + "loss_ib": 21.146862030029297, + "step": 198 + }, + { + "ce_ib": 42.51495361328125, + "ce_orig": 1.009562611579895, + "epoch": 0.056941548637572795, + "kl_loss": 2003.40283203125, + "loss_ib": 20.076541900634766, + "step": 198 + }, + { + "ce_ib": 44.50498580932617, + "ce_orig": 1.2147884368896484, + "epoch": 0.056941548637572795, + "kl_loss": 2296.0703125, + "loss_ib": 23.005207061767578, + "step": 198 + }, + { + "ce_ib": 45.61008834838867, + "ce_orig": 1.532022476196289, + "epoch": 0.056941548637572795, + "kl_loss": 2186.6220703125, + "loss_ib": 21.91183090209961, + "step": 198 + }, + { + "ce_ib": 42.80625534057617, + "ce_orig": 1.1610299348831177, + "epoch": 0.05722913221655043, + "kl_loss": 2305.758056640625, + "loss_ib": 23.100385665893555, + "step": 199 + }, + { + "ce_ib": 44.845333099365234, + "ce_orig": 1.0554615259170532, + "epoch": 0.05722913221655043, + "kl_loss": 2195.99072265625, + "loss_ib": 22.00475311279297, + "step": 199 + }, + { + "ce_ib": 41.97274398803711, + "ce_orig": 0.9705357551574707, + "epoch": 0.05722913221655043, + "kl_loss": 2215.55859375, + "loss_ib": 22.19755744934082, + "step": 199 + }, + { + "ce_ib": 41.66038131713867, + "ce_orig": 0.8861182928085327, + "epoch": 0.05722913221655043, + "kl_loss": 1905.2489013671875, + "loss_ib": 19.094148635864258, + "step": 199 + }, + { + "epoch": 0.05751671579552808, + "grad_norm": 379.6163024902344, + "learning_rate": 6.178343949044586e-06, + "loss": 23.0704, + "step": 200 + }, + { + "ce_ib": 41.82258987426758, + "ce_orig": 1.130007266998291, + "epoch": 0.05751671579552808, + "kl_loss": 2195.69970703125, + "loss_ib": 21.998821258544922, + "step": 200 + }, + { + "ce_ib": 46.66122817993164, + "ce_orig": 1.4467494487762451, + "epoch": 0.05751671579552808, + "kl_loss": 2178.219970703125, + "loss_ib": 21.828859329223633, + "step": 200 + }, + { + "ce_ib": 39.04912567138672, + "ce_orig": 1.1100558042526245, + "epoch": 0.05751671579552808, + "kl_loss": 2271.130859375, + "loss_ib": 22.75035858154297, + "step": 200 + }, + { + "ce_ib": 40.80558395385742, + "ce_orig": 0.830470860004425, + "epoch": 0.05751671579552808, + "kl_loss": 2165.312255859375, + "loss_ib": 21.693927764892578, + "step": 200 + }, + { + "ce_ib": 40.64300537109375, + "ce_orig": 1.0102934837341309, + "epoch": 0.057804299374505715, + "kl_loss": 2148.4072265625, + "loss_ib": 21.52471351623535, + "step": 201 + }, + { + "ce_ib": 43.90663528442383, + "ce_orig": 1.1066926717758179, + "epoch": 0.057804299374505715, + "kl_loss": 2117.95751953125, + "loss_ib": 21.223480224609375, + "step": 201 + }, + { + "ce_ib": 45.89930725097656, + "ce_orig": 1.7181086540222168, + "epoch": 0.057804299374505715, + "kl_loss": 1860.36865234375, + "loss_ib": 18.649585723876953, + "step": 201 + }, + { + "ce_ib": 39.91169357299805, + "ce_orig": 0.3805517554283142, + "epoch": 0.057804299374505715, + "kl_loss": 1964.464599609375, + "loss_ib": 19.68455696105957, + "step": 201 + }, + { + "ce_ib": 39.736671447753906, + "ce_orig": 0.5894677639007568, + "epoch": 0.05809188295348336, + "kl_loss": 2246.01611328125, + "loss_ib": 22.499897003173828, + "step": 202 + }, + { + "ce_ib": 40.524208068847656, + "ce_orig": 0.9540011882781982, + "epoch": 0.05809188295348336, + "kl_loss": 1354.6728515625, + "loss_ib": 13.587251663208008, + "step": 202 + }, + { + "ce_ib": 43.64582061767578, + "ce_orig": 1.034263253211975, + "epoch": 0.05809188295348336, + "kl_loss": 2062.5126953125, + "loss_ib": 20.668771743774414, + "step": 202 + }, + { + "ce_ib": 41.73001480102539, + "ce_orig": 0.8725116848945618, + "epoch": 0.05809188295348336, + "kl_loss": 2051.177734375, + "loss_ib": 20.553508758544922, + "step": 202 + }, + { + "ce_ib": 44.100730895996094, + "ce_orig": 1.4435735940933228, + "epoch": 0.058379466532461, + "kl_loss": 1786.4874267578125, + "loss_ib": 17.908973693847656, + "step": 203 + }, + { + "ce_ib": 39.613006591796875, + "ce_orig": 0.7148452401161194, + "epoch": 0.058379466532461, + "kl_loss": 2156.13232421875, + "loss_ib": 21.600934982299805, + "step": 203 + }, + { + "ce_ib": 40.6436653137207, + "ce_orig": 1.2090833187103271, + "epoch": 0.058379466532461, + "kl_loss": 2073.139404296875, + "loss_ib": 20.772037506103516, + "step": 203 + }, + { + "ce_ib": 38.75384521484375, + "ce_orig": 0.2740119993686676, + "epoch": 0.058379466532461, + "kl_loss": 1232.078125, + "loss_ib": 12.35953426361084, + "step": 203 + }, + { + "ce_ib": 41.146873474121094, + "ce_orig": 0.719344973564148, + "epoch": 0.058667050111438634, + "kl_loss": 1714.5777587890625, + "loss_ib": 17.18692398071289, + "step": 204 + }, + { + "ce_ib": 44.12678527832031, + "ce_orig": 1.2889289855957031, + "epoch": 0.058667050111438634, + "kl_loss": 1714.3292236328125, + "loss_ib": 17.187419891357422, + "step": 204 + }, + { + "ce_ib": 44.051700592041016, + "ce_orig": 1.077775478363037, + "epoch": 0.058667050111438634, + "kl_loss": 2116.22900390625, + "loss_ib": 21.206342697143555, + "step": 204 + }, + { + "ce_ib": 38.1263427734375, + "ce_orig": 0.6518339514732361, + "epoch": 0.058667050111438634, + "kl_loss": 2124.76513671875, + "loss_ib": 21.285778045654297, + "step": 204 + }, + { + "epoch": 0.05895463369041628, + "grad_norm": 334.65350341796875, + "learning_rate": 6.337579617834395e-06, + "loss": 21.643, + "step": 205 + }, + { + "ce_ib": 40.87744140625, + "ce_orig": 1.0821335315704346, + "epoch": 0.05895463369041628, + "kl_loss": 2118.4775390625, + "loss_ib": 21.225650787353516, + "step": 205 + }, + { + "ce_ib": 39.00108337402344, + "ce_orig": 0.5186193585395813, + "epoch": 0.05895463369041628, + "kl_loss": 2132.87939453125, + "loss_ib": 21.367794036865234, + "step": 205 + }, + { + "ce_ib": 42.660888671875, + "ce_orig": 1.201238989830017, + "epoch": 0.05895463369041628, + "kl_loss": 2008.679443359375, + "loss_ib": 20.12945556640625, + "step": 205 + }, + { + "ce_ib": 42.61842346191406, + "ce_orig": 0.9650039076805115, + "epoch": 0.05895463369041628, + "kl_loss": 2077.22314453125, + "loss_ib": 20.814849853515625, + "step": 205 + }, + { + "ce_ib": 38.07194137573242, + "ce_orig": 0.9797282814979553, + "epoch": 0.05924221726939392, + "kl_loss": 2239.80859375, + "loss_ib": 22.4361572265625, + "step": 206 + }, + { + "ce_ib": 42.637840270996094, + "ce_orig": 1.2511439323425293, + "epoch": 0.05924221726939392, + "kl_loss": 2223.17578125, + "loss_ib": 22.27439308166504, + "step": 206 + }, + { + "ce_ib": 40.19730758666992, + "ce_orig": 1.2803971767425537, + "epoch": 0.05924221726939392, + "kl_loss": 1948.2891845703125, + "loss_ib": 19.523088455200195, + "step": 206 + }, + { + "ce_ib": 40.72916030883789, + "ce_orig": 0.9539033770561218, + "epoch": 0.05924221726939392, + "kl_loss": 1906.385498046875, + "loss_ib": 19.104583740234375, + "step": 206 + }, + { + "ce_ib": 41.54541778564453, + "ce_orig": 0.9414616823196411, + "epoch": 0.05952980084837156, + "kl_loss": 1876.511474609375, + "loss_ib": 18.806659698486328, + "step": 207 + }, + { + "ce_ib": 41.48387908935547, + "ce_orig": 0.8939663171768188, + "epoch": 0.05952980084837156, + "kl_loss": 1365.2000732421875, + "loss_ib": 13.693485260009766, + "step": 207 + }, + { + "ce_ib": 39.200660705566406, + "ce_orig": 0.8465067148208618, + "epoch": 0.05952980084837156, + "kl_loss": 1455.4010009765625, + "loss_ib": 14.593210220336914, + "step": 207 + }, + { + "ce_ib": 44.42674255371094, + "ce_orig": 1.9264086484909058, + "epoch": 0.05952980084837156, + "kl_loss": 2006.107421875, + "loss_ib": 20.105499267578125, + "step": 207 + }, + { + "ce_ib": 41.2900505065918, + "ce_orig": 1.6340312957763672, + "epoch": 0.0598173844273492, + "kl_loss": 1908.3016357421875, + "loss_ib": 19.124305725097656, + "step": 208 + }, + { + "ce_ib": 41.597694396972656, + "ce_orig": 0.9764799475669861, + "epoch": 0.0598173844273492, + "kl_loss": 1880.7303466796875, + "loss_ib": 18.848901748657227, + "step": 208 + }, + { + "ce_ib": 42.240516662597656, + "ce_orig": 0.4290001690387726, + "epoch": 0.0598173844273492, + "kl_loss": 1004.71728515625, + "loss_ib": 10.0894136428833, + "step": 208 + }, + { + "ce_ib": 42.21794128417969, + "ce_orig": 1.0113506317138672, + "epoch": 0.0598173844273492, + "kl_loss": 1826.12890625, + "loss_ib": 18.30350685119629, + "step": 208 + }, + { + "ce_ib": 41.9946403503418, + "ce_orig": 1.175087332725525, + "epoch": 0.06010496800632684, + "kl_loss": 1176.065185546875, + "loss_ib": 11.80264663696289, + "step": 209 + }, + { + "ce_ib": 41.64639663696289, + "ce_orig": 1.211875081062317, + "epoch": 0.06010496800632684, + "kl_loss": 2071.13623046875, + "loss_ib": 20.753007888793945, + "step": 209 + }, + { + "ce_ib": 40.602378845214844, + "ce_orig": 1.2118444442749023, + "epoch": 0.06010496800632684, + "kl_loss": 2032.2001953125, + "loss_ib": 20.36260414123535, + "step": 209 + }, + { + "ce_ib": 39.86399459838867, + "ce_orig": 1.1752040386199951, + "epoch": 0.06010496800632684, + "kl_loss": 2107.810546875, + "loss_ib": 21.11796760559082, + "step": 209 + }, + { + "epoch": 0.06039255158530448, + "grad_norm": 353.71893310546875, + "learning_rate": 6.496815286624204e-06, + "loss": 20.5732, + "step": 210 + }, + { + "ce_ib": 42.62069320678711, + "ce_orig": 1.424117922782898, + "epoch": 0.06039255158530448, + "kl_loss": 1516.96630859375, + "loss_ib": 15.21228313446045, + "step": 210 + }, + { + "ce_ib": 39.955963134765625, + "ce_orig": 0.44305068254470825, + "epoch": 0.06039255158530448, + "kl_loss": 1933.53759765625, + "loss_ib": 19.37533187866211, + "step": 210 + }, + { + "ce_ib": 40.190860748291016, + "ce_orig": 0.597926914691925, + "epoch": 0.06039255158530448, + "kl_loss": 2069.74560546875, + "loss_ib": 20.737646102905273, + "step": 210 + }, + { + "ce_ib": 39.46810531616211, + "ce_orig": 0.713378369808197, + "epoch": 0.06039255158530448, + "kl_loss": 1792.88720703125, + "loss_ib": 17.968339920043945, + "step": 210 + }, + { + "ce_ib": 42.34745788574219, + "ce_orig": 1.0557239055633545, + "epoch": 0.06068013516428212, + "kl_loss": 1849.0069580078125, + "loss_ib": 18.53241729736328, + "step": 211 + }, + { + "ce_ib": 38.35053634643555, + "ce_orig": 0.603425145149231, + "epoch": 0.06068013516428212, + "kl_loss": 1920.453857421875, + "loss_ib": 19.242889404296875, + "step": 211 + }, + { + "ce_ib": 37.083927154541016, + "ce_orig": 1.2688902616500854, + "epoch": 0.06068013516428212, + "kl_loss": 2041.569580078125, + "loss_ib": 20.45277976989746, + "step": 211 + }, + { + "ce_ib": 38.7510871887207, + "ce_orig": 0.6306071877479553, + "epoch": 0.06068013516428212, + "kl_loss": 1976.189453125, + "loss_ib": 19.80064582824707, + "step": 211 + }, + { + "ce_ib": 36.3626594543457, + "ce_orig": 0.4798745810985565, + "epoch": 0.060967718743259756, + "kl_loss": 1731.49951171875, + "loss_ib": 17.351356506347656, + "step": 212 + }, + { + "ce_ib": 39.894752502441406, + "ce_orig": 0.991927444934845, + "epoch": 0.060967718743259756, + "kl_loss": 2037.4395751953125, + "loss_ib": 20.414289474487305, + "step": 212 + }, + { + "ce_ib": 41.493896484375, + "ce_orig": 1.472151756286621, + "epoch": 0.060967718743259756, + "kl_loss": 1865.9774169921875, + "loss_ib": 18.70126724243164, + "step": 212 + }, + { + "ce_ib": 42.53767013549805, + "ce_orig": 1.1901134252548218, + "epoch": 0.060967718743259756, + "kl_loss": 1645.129638671875, + "loss_ib": 16.493833541870117, + "step": 212 + }, + { + "ce_ib": 41.67341232299805, + "ce_orig": 2.049192428588867, + "epoch": 0.0612553023222374, + "kl_loss": 1779.3302001953125, + "loss_ib": 17.83497428894043, + "step": 213 + }, + { + "ce_ib": 42.051273345947266, + "ce_orig": 1.3447684049606323, + "epoch": 0.0612553023222374, + "kl_loss": 1777.956787109375, + "loss_ib": 17.821619033813477, + "step": 213 + }, + { + "ce_ib": 37.9370002746582, + "ce_orig": 0.4780416190624237, + "epoch": 0.0612553023222374, + "kl_loss": 1839.32177734375, + "loss_ib": 18.431154251098633, + "step": 213 + }, + { + "ce_ib": 39.32098388671875, + "ce_orig": 1.0718315839767456, + "epoch": 0.0612553023222374, + "kl_loss": 2034.295166015625, + "loss_ib": 20.382272720336914, + "step": 213 + }, + { + "ce_ib": 36.60554885864258, + "ce_orig": 0.5966328978538513, + "epoch": 0.06154288590121504, + "kl_loss": 1988.932861328125, + "loss_ib": 19.925933837890625, + "step": 214 + }, + { + "ce_ib": 41.9276123046875, + "ce_orig": 0.6651936173439026, + "epoch": 0.06154288590121504, + "kl_loss": 1789.4920654296875, + "loss_ib": 17.936847686767578, + "step": 214 + }, + { + "ce_ib": 40.556461334228516, + "ce_orig": 0.4927489459514618, + "epoch": 0.06154288590121504, + "kl_loss": 1789.6015625, + "loss_ib": 17.93657112121582, + "step": 214 + }, + { + "ce_ib": 39.251651763916016, + "ce_orig": 1.0295277833938599, + "epoch": 0.06154288590121504, + "kl_loss": 1894.323486328125, + "loss_ib": 18.982486724853516, + "step": 214 + }, + { + "epoch": 0.06183046948019268, + "grad_norm": 317.97894287109375, + "learning_rate": 6.6560509554140125e-06, + "loss": 19.6963, + "step": 215 + }, + { + "ce_ib": 38.56227111816406, + "ce_orig": 0.6546093821525574, + "epoch": 0.06183046948019268, + "kl_loss": 1737.800048828125, + "loss_ib": 17.416563034057617, + "step": 215 + }, + { + "ce_ib": 35.2888069152832, + "ce_orig": 0.7404365539550781, + "epoch": 0.06183046948019268, + "kl_loss": 1965.703857421875, + "loss_ib": 19.69232749938965, + "step": 215 + }, + { + "ce_ib": 40.4928092956543, + "ce_orig": 1.043062686920166, + "epoch": 0.06183046948019268, + "kl_loss": 1873.6024169921875, + "loss_ib": 18.77651596069336, + "step": 215 + }, + { + "ce_ib": 39.602508544921875, + "ce_orig": 1.0876483917236328, + "epoch": 0.06183046948019268, + "kl_loss": 1824.4144287109375, + "loss_ib": 18.28374671936035, + "step": 215 + }, + { + "ce_ib": 35.879852294921875, + "ce_orig": 0.8248341679573059, + "epoch": 0.06211805305917032, + "kl_loss": 1824.806640625, + "loss_ib": 18.283946990966797, + "step": 216 + }, + { + "ce_ib": 35.09049606323242, + "ce_orig": 0.5326448082923889, + "epoch": 0.06211805305917032, + "kl_loss": 1892.9739990234375, + "loss_ib": 18.96483039855957, + "step": 216 + }, + { + "ce_ib": 40.311012268066406, + "ce_orig": 1.6227895021438599, + "epoch": 0.06211805305917032, + "kl_loss": 1743.340087890625, + "loss_ib": 17.473711013793945, + "step": 216 + }, + { + "ce_ib": 36.35209274291992, + "ce_orig": 0.7696553468704224, + "epoch": 0.06211805305917032, + "kl_loss": 1989.461181640625, + "loss_ib": 19.93096351623535, + "step": 216 + }, + { + "ce_ib": 36.76679611206055, + "ce_orig": 0.7665999531745911, + "epoch": 0.06240563663814796, + "kl_loss": 1747.884033203125, + "loss_ib": 17.515605926513672, + "step": 217 + }, + { + "ce_ib": 38.807064056396484, + "ce_orig": 0.9963610172271729, + "epoch": 0.06240563663814796, + "kl_loss": 1780.5374755859375, + "loss_ib": 17.844181060791016, + "step": 217 + }, + { + "ce_ib": 39.64936828613281, + "ce_orig": 0.7059118151664734, + "epoch": 0.06240563663814796, + "kl_loss": 1668.8529052734375, + "loss_ib": 16.728178024291992, + "step": 217 + }, + { + "ce_ib": 37.85905838012695, + "ce_orig": 1.064191460609436, + "epoch": 0.06240563663814796, + "kl_loss": 1903.99462890625, + "loss_ib": 19.077804565429688, + "step": 217 + }, + { + "ce_ib": 36.819175720214844, + "ce_orig": 0.5811072587966919, + "epoch": 0.0626932202171256, + "kl_loss": 1824.97802734375, + "loss_ib": 18.286598205566406, + "step": 218 + }, + { + "ce_ib": 37.54027557373047, + "ce_orig": 0.9560117125511169, + "epoch": 0.0626932202171256, + "kl_loss": 1839.77392578125, + "loss_ib": 18.435279846191406, + "step": 218 + }, + { + "ce_ib": 39.93457794189453, + "ce_orig": 1.0178155899047852, + "epoch": 0.0626932202171256, + "kl_loss": 1711.14697265625, + "loss_ib": 17.151403427124023, + "step": 218 + }, + { + "ce_ib": 40.65945816040039, + "ce_orig": 0.742775022983551, + "epoch": 0.0626932202171256, + "kl_loss": 1604.2640380859375, + "loss_ib": 16.08329963684082, + "step": 218 + }, + { + "ce_ib": 41.853858947753906, + "ce_orig": 1.375777244567871, + "epoch": 0.06298080379610324, + "kl_loss": 1801.32958984375, + "loss_ib": 18.05514907836914, + "step": 219 + }, + { + "ce_ib": 36.28791046142578, + "ce_orig": 0.9161471724510193, + "epoch": 0.06298080379610324, + "kl_loss": 1644.6923828125, + "loss_ib": 16.483211517333984, + "step": 219 + }, + { + "ce_ib": 37.08815383911133, + "ce_orig": 0.728233814239502, + "epoch": 0.06298080379610324, + "kl_loss": 1778.4482421875, + "loss_ib": 17.821571350097656, + "step": 219 + }, + { + "ce_ib": 37.880104064941406, + "ce_orig": 1.3340355157852173, + "epoch": 0.06298080379610324, + "kl_loss": 1593.228759765625, + "loss_ib": 15.97016716003418, + "step": 219 + }, + { + "epoch": 0.06326838737508088, + "grad_norm": 318.8504943847656, + "learning_rate": 6.815286624203822e-06, + "loss": 18.6164, + "step": 220 + }, + { + "ce_ib": 39.489933013916016, + "ce_orig": 1.1903218030929565, + "epoch": 0.06326838737508088, + "kl_loss": 1687.38232421875, + "loss_ib": 16.913312911987305, + "step": 220 + }, + { + "ce_ib": 37.481624603271484, + "ce_orig": 0.7840384244918823, + "epoch": 0.06326838737508088, + "kl_loss": 1726.1568603515625, + "loss_ib": 17.299049377441406, + "step": 220 + }, + { + "ce_ib": 35.45810317993164, + "ce_orig": 0.7813513278961182, + "epoch": 0.06326838737508088, + "kl_loss": 1733.7164306640625, + "loss_ib": 17.372621536254883, + "step": 220 + }, + { + "ce_ib": 40.4595832824707, + "ce_orig": 1.3600130081176758, + "epoch": 0.06326838737508088, + "kl_loss": 1568.1533203125, + "loss_ib": 15.721991539001465, + "step": 220 + }, + { + "ce_ib": 35.940345764160156, + "ce_orig": 1.0896117687225342, + "epoch": 0.06355597095405853, + "kl_loss": 1728.0986328125, + "loss_ib": 17.316925048828125, + "step": 221 + }, + { + "ce_ib": 35.44572830200195, + "ce_orig": 0.2852933406829834, + "epoch": 0.06355597095405853, + "kl_loss": 963.3050537109375, + "loss_ib": 9.668496131896973, + "step": 221 + }, + { + "ce_ib": 33.80705261230469, + "ce_orig": 0.791994571685791, + "epoch": 0.06355597095405853, + "kl_loss": 1822.135009765625, + "loss_ib": 18.255157470703125, + "step": 221 + }, + { + "ce_ib": 36.91697692871094, + "ce_orig": 0.6960796117782593, + "epoch": 0.06355597095405853, + "kl_loss": 1718.594970703125, + "loss_ib": 17.22286605834961, + "step": 221 + }, + { + "ce_ib": 39.449485778808594, + "ce_orig": 0.9346453547477722, + "epoch": 0.06384355453303617, + "kl_loss": 1414.82958984375, + "loss_ib": 14.187745094299316, + "step": 222 + }, + { + "ce_ib": 38.30500793457031, + "ce_orig": 0.8375841975212097, + "epoch": 0.06384355453303617, + "kl_loss": 1562.5009765625, + "loss_ib": 15.663313865661621, + "step": 222 + }, + { + "ce_ib": 41.28805923461914, + "ce_orig": 0.7960017919540405, + "epoch": 0.06384355453303617, + "kl_loss": 1512.22802734375, + "loss_ib": 15.163567543029785, + "step": 222 + }, + { + "ce_ib": 32.99767303466797, + "ce_orig": 0.6580086946487427, + "epoch": 0.06384355453303617, + "kl_loss": 1726.3076171875, + "loss_ib": 17.29607391357422, + "step": 222 + }, + { + "ce_ib": 33.17844009399414, + "ce_orig": 0.7080318927764893, + "epoch": 0.0641311381120138, + "kl_loss": 1755.3681640625, + "loss_ib": 17.58686065673828, + "step": 223 + }, + { + "ce_ib": 37.70219421386719, + "ce_orig": 0.6540882587432861, + "epoch": 0.0641311381120138, + "kl_loss": 1503.2210693359375, + "loss_ib": 15.069912910461426, + "step": 223 + }, + { + "ce_ib": 40.99760818481445, + "ce_orig": 0.7546887993812561, + "epoch": 0.0641311381120138, + "kl_loss": 1461.0548095703125, + "loss_ib": 14.651545524597168, + "step": 223 + }, + { + "ce_ib": 36.96660614013672, + "ce_orig": 0.6669592261314392, + "epoch": 0.0641311381120138, + "kl_loss": 1589.726318359375, + "loss_ib": 15.934229850769043, + "step": 223 + }, + { + "ce_ib": 41.123348236083984, + "ce_orig": 1.2109692096710205, + "epoch": 0.06441872169099144, + "kl_loss": 1519.9962158203125, + "loss_ib": 15.241085052490234, + "step": 224 + }, + { + "ce_ib": 36.78544998168945, + "ce_orig": 0.9228610992431641, + "epoch": 0.06441872169099144, + "kl_loss": 1535.970703125, + "loss_ib": 15.396492958068848, + "step": 224 + }, + { + "ce_ib": 36.03901672363281, + "ce_orig": 1.0696367025375366, + "epoch": 0.06441872169099144, + "kl_loss": 1711.971923828125, + "loss_ib": 17.155757904052734, + "step": 224 + }, + { + "ce_ib": 38.314327239990234, + "ce_orig": 1.826366662979126, + "epoch": 0.06441872169099144, + "kl_loss": 1629.136962890625, + "loss_ib": 16.329683303833008, + "step": 224 + }, + { + "epoch": 0.06470630526996908, + "grad_norm": 308.50250244140625, + "learning_rate": 6.9745222929936305e-06, + "loss": 16.8856, + "step": 225 + }, + { + "ce_ib": 36.43165969848633, + "ce_orig": 0.878455638885498, + "epoch": 0.06470630526996908, + "kl_loss": 1289.9693603515625, + "loss_ib": 12.936124801635742, + "step": 225 + }, + { + "ce_ib": 40.33540344238281, + "ce_orig": 1.3886396884918213, + "epoch": 0.06470630526996908, + "kl_loss": 1656.87060546875, + "loss_ib": 16.609041213989258, + "step": 225 + }, + { + "ce_ib": 36.43739700317383, + "ce_orig": 0.4045904874801636, + "epoch": 0.06470630526996908, + "kl_loss": 1671.9920654296875, + "loss_ib": 16.756359100341797, + "step": 225 + }, + { + "ce_ib": 36.36072540283203, + "ce_orig": 1.0076552629470825, + "epoch": 0.06470630526996908, + "kl_loss": 1584.193115234375, + "loss_ib": 15.878292083740234, + "step": 225 + }, + { + "ce_ib": 38.3975830078125, + "ce_orig": 1.267250895500183, + "epoch": 0.06499388884894673, + "kl_loss": 1569.5213623046875, + "loss_ib": 15.733610153198242, + "step": 226 + }, + { + "ce_ib": 38.38508224487305, + "ce_orig": 1.265257477760315, + "epoch": 0.06499388884894673, + "kl_loss": 1453.87646484375, + "loss_ib": 14.577149391174316, + "step": 226 + }, + { + "ce_ib": 39.651161193847656, + "ce_orig": 0.6877183318138123, + "epoch": 0.06499388884894673, + "kl_loss": 1393.7093505859375, + "loss_ib": 13.976743698120117, + "step": 226 + }, + { + "ce_ib": 36.53251647949219, + "ce_orig": 1.5096549987792969, + "epoch": 0.06499388884894673, + "kl_loss": 1307.943115234375, + "loss_ib": 13.115962982177734, + "step": 226 + }, + { + "ce_ib": 37.703006744384766, + "ce_orig": 0.9586830139160156, + "epoch": 0.06528147242792437, + "kl_loss": 1452.981201171875, + "loss_ib": 14.567514419555664, + "step": 227 + }, + { + "ce_ib": 34.66203308105469, + "ce_orig": 0.658699095249176, + "epoch": 0.06528147242792437, + "kl_loss": 1551.1995849609375, + "loss_ib": 15.54665756225586, + "step": 227 + }, + { + "ce_ib": 35.329044342041016, + "ce_orig": 0.6904061436653137, + "epoch": 0.06528147242792437, + "kl_loss": 1509.483154296875, + "loss_ib": 15.130160331726074, + "step": 227 + }, + { + "ce_ib": 35.24424743652344, + "ce_orig": 0.5379785895347595, + "epoch": 0.06528147242792437, + "kl_loss": 1509.2982177734375, + "loss_ib": 15.128226280212402, + "step": 227 + }, + { + "ce_ib": 35.123985290527344, + "ce_orig": 0.7466920614242554, + "epoch": 0.06556905600690201, + "kl_loss": 1561.8984375, + "loss_ib": 15.654109001159668, + "step": 228 + }, + { + "ce_ib": 34.78830337524414, + "ce_orig": 0.7827273607254028, + "epoch": 0.06556905600690201, + "kl_loss": 1582.02099609375, + "loss_ib": 15.854998588562012, + "step": 228 + }, + { + "ce_ib": 31.581981658935547, + "ce_orig": 0.2600187063217163, + "epoch": 0.06556905600690201, + "kl_loss": 1321.60205078125, + "loss_ib": 13.247602462768555, + "step": 228 + }, + { + "ce_ib": 34.20478820800781, + "ce_orig": 1.0527675151824951, + "epoch": 0.06556905600690201, + "kl_loss": 1532.9888916015625, + "loss_ib": 15.364093780517578, + "step": 228 + }, + { + "ce_ib": 39.04445266723633, + "ce_orig": 1.652494192123413, + "epoch": 0.06585663958587964, + "kl_loss": 1284.142578125, + "loss_ib": 12.88046932220459, + "step": 229 + }, + { + "ce_ib": 37.834381103515625, + "ce_orig": 1.3008118867874146, + "epoch": 0.06585663958587964, + "kl_loss": 1602.8289794921875, + "loss_ib": 16.066123962402344, + "step": 229 + }, + { + "ce_ib": 34.8093147277832, + "ce_orig": 0.9119290113449097, + "epoch": 0.06585663958587964, + "kl_loss": 1195.5177001953125, + "loss_ib": 11.989986419677734, + "step": 229 + }, + { + "ce_ib": 37.39421081542969, + "ce_orig": 1.299423336982727, + "epoch": 0.06585663958587964, + "kl_loss": 1378.4208984375, + "loss_ib": 13.821602821350098, + "step": 229 + }, + { + "epoch": 0.06614422316485728, + "grad_norm": 279.78515625, + "learning_rate": 7.13375796178344e-06, + "loss": 15.8318, + "step": 230 + }, + { + "ce_ib": 39.00707244873047, + "ce_orig": 1.9128481149673462, + "epoch": 0.06614422316485728, + "kl_loss": 1414.39697265625, + "loss_ib": 14.182976722717285, + "step": 230 + }, + { + "ce_ib": 36.59072494506836, + "ce_orig": 1.3344916105270386, + "epoch": 0.06614422316485728, + "kl_loss": 1368.765869140625, + "loss_ib": 13.724248886108398, + "step": 230 + }, + { + "ce_ib": 36.91270065307617, + "ce_orig": 1.1911953687667847, + "epoch": 0.06614422316485728, + "kl_loss": 1506.248291015625, + "loss_ib": 15.099395751953125, + "step": 230 + }, + { + "ce_ib": 35.45751953125, + "ce_orig": 0.9839146733283997, + "epoch": 0.06614422316485728, + "kl_loss": 1557.294189453125, + "loss_ib": 15.608399391174316, + "step": 230 + }, + { + "ce_ib": 40.05620193481445, + "ce_orig": 0.5404795408248901, + "epoch": 0.06643180674383492, + "kl_loss": 1331.465576171875, + "loss_ib": 13.354711532592773, + "step": 231 + }, + { + "ce_ib": 35.91750717163086, + "ce_orig": 1.0286931991577148, + "epoch": 0.06643180674383492, + "kl_loss": 1538.906982421875, + "loss_ib": 15.42498779296875, + "step": 231 + }, + { + "ce_ib": 37.91292953491211, + "ce_orig": 0.6502935886383057, + "epoch": 0.06643180674383492, + "kl_loss": 1359.6444091796875, + "loss_ib": 13.634356498718262, + "step": 231 + }, + { + "ce_ib": 36.25615310668945, + "ce_orig": 1.0073096752166748, + "epoch": 0.06643180674383492, + "kl_loss": 1373.6658935546875, + "loss_ib": 13.77291488647461, + "step": 231 + }, + { + "ce_ib": 33.69683837890625, + "ce_orig": 0.46744322776794434, + "epoch": 0.06671939032281257, + "kl_loss": 1326.230712890625, + "loss_ib": 13.296003341674805, + "step": 232 + }, + { + "ce_ib": 33.371883392333984, + "ce_orig": 1.2536524534225464, + "epoch": 0.06671939032281257, + "kl_loss": 1449.79150390625, + "loss_ib": 14.531286239624023, + "step": 232 + }, + { + "ce_ib": 35.073936462402344, + "ce_orig": 1.0432262420654297, + "epoch": 0.06671939032281257, + "kl_loss": 1357.572021484375, + "loss_ib": 13.610794067382812, + "step": 232 + }, + { + "ce_ib": 35.290687561035156, + "ce_orig": 0.8194282650947571, + "epoch": 0.06671939032281257, + "kl_loss": 1452.7779541015625, + "loss_ib": 14.563069343566895, + "step": 232 + }, + { + "ce_ib": 39.44172668457031, + "ce_orig": 1.5786598920822144, + "epoch": 0.06700697390179021, + "kl_loss": 1254.966796875, + "loss_ib": 12.589109420776367, + "step": 233 + }, + { + "ce_ib": 34.740867614746094, + "ce_orig": 0.563507616519928, + "epoch": 0.06700697390179021, + "kl_loss": 1330.1361083984375, + "loss_ib": 13.336101531982422, + "step": 233 + }, + { + "ce_ib": 36.04484176635742, + "ce_orig": 1.1875897645950317, + "epoch": 0.06700697390179021, + "kl_loss": 1417.7935791015625, + "loss_ib": 14.213980674743652, + "step": 233 + }, + { + "ce_ib": 31.511131286621094, + "ce_orig": 0.6714182496070862, + "epoch": 0.06700697390179021, + "kl_loss": 1382.14794921875, + "loss_ib": 13.85299015045166, + "step": 233 + }, + { + "ce_ib": 33.84688186645508, + "ce_orig": 1.096521258354187, + "epoch": 0.06729455748076785, + "kl_loss": 1357.806884765625, + "loss_ib": 13.61191463470459, + "step": 234 + }, + { + "ce_ib": 34.99058532714844, + "ce_orig": 1.0461159944534302, + "epoch": 0.06729455748076785, + "kl_loss": 1225.052734375, + "loss_ib": 12.285517692565918, + "step": 234 + }, + { + "ce_ib": 34.9071044921875, + "ce_orig": 0.9976585507392883, + "epoch": 0.06729455748076785, + "kl_loss": 1408.259521484375, + "loss_ib": 14.117502212524414, + "step": 234 + }, + { + "ce_ib": 37.175872802734375, + "ce_orig": 1.5398781299591064, + "epoch": 0.06729455748076785, + "kl_loss": 1296.612548828125, + "loss_ib": 13.003300666809082, + "step": 234 + }, + { + "epoch": 0.06758214105974548, + "grad_norm": 268.0568542480469, + "learning_rate": 7.2929936305732485e-06, + "loss": 14.6834, + "step": 235 + }, + { + "ce_ib": 35.610557556152344, + "ce_orig": 0.7642791867256165, + "epoch": 0.06758214105974548, + "kl_loss": 1381.1488037109375, + "loss_ib": 13.847098350524902, + "step": 235 + }, + { + "ce_ib": 38.893550872802734, + "ce_orig": 1.8394078016281128, + "epoch": 0.06758214105974548, + "kl_loss": 1152.2271728515625, + "loss_ib": 11.561165809631348, + "step": 235 + }, + { + "ce_ib": 32.011322021484375, + "ce_orig": 0.9249970316886902, + "epoch": 0.06758214105974548, + "kl_loss": 1395.470458984375, + "loss_ib": 13.986716270446777, + "step": 235 + }, + { + "ce_ib": 35.36570739746094, + "ce_orig": 1.026782751083374, + "epoch": 0.06758214105974548, + "kl_loss": 1339.9072265625, + "loss_ib": 13.43443775177002, + "step": 235 + }, + { + "ce_ib": 33.16312789916992, + "ce_orig": 1.0328998565673828, + "epoch": 0.06786972463872312, + "kl_loss": 1300.6361083984375, + "loss_ib": 13.03952407836914, + "step": 236 + }, + { + "ce_ib": 35.08463668823242, + "ce_orig": 1.3721755743026733, + "epoch": 0.06786972463872312, + "kl_loss": 1194.1552734375, + "loss_ib": 11.97663688659668, + "step": 236 + }, + { + "ce_ib": 31.49561882019043, + "ce_orig": 0.3084181249141693, + "epoch": 0.06786972463872312, + "kl_loss": 934.6522216796875, + "loss_ib": 9.37801742553711, + "step": 236 + }, + { + "ce_ib": 39.625789642333984, + "ce_orig": 1.2989716529846191, + "epoch": 0.06786972463872312, + "kl_loss": 1081.615234375, + "loss_ib": 10.855777740478516, + "step": 236 + }, + { + "ce_ib": 33.67836380004883, + "ce_orig": 1.3483405113220215, + "epoch": 0.06815730821770077, + "kl_loss": 1198.904541015625, + "loss_ib": 12.022723197937012, + "step": 237 + }, + { + "ce_ib": 35.59366989135742, + "ce_orig": 0.9075685143470764, + "epoch": 0.06815730821770077, + "kl_loss": 1204.507568359375, + "loss_ib": 12.080668449401855, + "step": 237 + }, + { + "ce_ib": 36.645938873291016, + "ce_orig": 0.6160690188407898, + "epoch": 0.06815730821770077, + "kl_loss": 1352.1148681640625, + "loss_ib": 13.557793617248535, + "step": 237 + }, + { + "ce_ib": 34.851688385009766, + "ce_orig": 0.7488659024238586, + "epoch": 0.06815730821770077, + "kl_loss": 1261.7066650390625, + "loss_ib": 12.651918411254883, + "step": 237 + }, + { + "ce_ib": 31.677663803100586, + "ce_orig": 0.6202912330627441, + "epoch": 0.06844489179667841, + "kl_loss": 1198.86669921875, + "loss_ib": 12.020343780517578, + "step": 238 + }, + { + "ce_ib": 33.36151885986328, + "ce_orig": 0.7369568347930908, + "epoch": 0.06844489179667841, + "kl_loss": 1171.602294921875, + "loss_ib": 11.749384880065918, + "step": 238 + }, + { + "ce_ib": 37.02521896362305, + "ce_orig": 0.6275981664657593, + "epoch": 0.06844489179667841, + "kl_loss": 1234.6282958984375, + "loss_ib": 12.383307456970215, + "step": 238 + }, + { + "ce_ib": 33.56972885131836, + "ce_orig": 0.8399911522865295, + "epoch": 0.06844489179667841, + "kl_loss": 1177.68603515625, + "loss_ib": 11.810429573059082, + "step": 238 + }, + { + "ce_ib": 36.48527526855469, + "ce_orig": 1.2248564958572388, + "epoch": 0.06873247537565605, + "kl_loss": 1168.8984375, + "loss_ib": 11.725469589233398, + "step": 239 + }, + { + "ce_ib": 32.57621765136719, + "ce_orig": 0.8083109259605408, + "epoch": 0.06873247537565605, + "kl_loss": 1238.533935546875, + "loss_ib": 12.417914390563965, + "step": 239 + }, + { + "ce_ib": 36.354488372802734, + "ce_orig": 1.729040503501892, + "epoch": 0.06873247537565605, + "kl_loss": 1160.8841552734375, + "loss_ib": 11.645195960998535, + "step": 239 + }, + { + "ce_ib": 33.25252151489258, + "ce_orig": 0.8631963729858398, + "epoch": 0.06873247537565605, + "kl_loss": 1161.73974609375, + "loss_ib": 11.650649070739746, + "step": 239 + }, + { + "epoch": 0.06902005895463369, + "grad_norm": 242.57241821289062, + "learning_rate": 7.452229299363057e-06, + "loss": 13.2382, + "step": 240 + }, + { + "ce_ib": 32.97649002075195, + "ce_orig": 0.5754613876342773, + "epoch": 0.06902005895463369, + "kl_loss": 1180.76708984375, + "loss_ib": 11.840646743774414, + "step": 240 + }, + { + "ce_ib": 35.5557861328125, + "ce_orig": 1.3153690099716187, + "epoch": 0.06902005895463369, + "kl_loss": 1166.3897705078125, + "loss_ib": 11.69945240020752, + "step": 240 + }, + { + "ce_ib": 30.03131675720215, + "ce_orig": 0.5445340275764465, + "epoch": 0.06902005895463369, + "kl_loss": 1206.5845947265625, + "loss_ib": 12.095877647399902, + "step": 240 + }, + { + "ce_ib": 34.68654251098633, + "ce_orig": 1.0324212312698364, + "epoch": 0.06902005895463369, + "kl_loss": 1035.31201171875, + "loss_ib": 10.387805938720703, + "step": 240 + }, + { + "ce_ib": 34.29194259643555, + "ce_orig": 0.9263237714767456, + "epoch": 0.06930764253361132, + "kl_loss": 1131.122314453125, + "loss_ib": 11.345515251159668, + "step": 241 + }, + { + "ce_ib": 35.83911895751953, + "ce_orig": 0.6829422116279602, + "epoch": 0.06930764253361132, + "kl_loss": 1171.150634765625, + "loss_ib": 11.747344970703125, + "step": 241 + }, + { + "ce_ib": 34.61550521850586, + "ce_orig": 0.7391694188117981, + "epoch": 0.06930764253361132, + "kl_loss": 1050.002685546875, + "loss_ib": 10.534642219543457, + "step": 241 + }, + { + "ce_ib": 31.543256759643555, + "ce_orig": 0.8060687780380249, + "epoch": 0.06930764253361132, + "kl_loss": 1197.1962890625, + "loss_ib": 12.00350570678711, + "step": 241 + }, + { + "ce_ib": 32.99800109863281, + "ce_orig": 0.6455181837081909, + "epoch": 0.06959522611258898, + "kl_loss": 1041.767333984375, + "loss_ib": 10.450671195983887, + "step": 242 + }, + { + "ce_ib": 32.91671371459961, + "ce_orig": 0.7244043350219727, + "epoch": 0.06959522611258898, + "kl_loss": 1103.9139404296875, + "loss_ib": 11.07205581665039, + "step": 242 + }, + { + "ce_ib": 35.45330047607422, + "ce_orig": 0.9272658228874207, + "epoch": 0.06959522611258898, + "kl_loss": 1027.905517578125, + "loss_ib": 10.314507484436035, + "step": 242 + }, + { + "ce_ib": 34.885498046875, + "ce_orig": 0.8863522410392761, + "epoch": 0.06959522611258898, + "kl_loss": 1155.1387939453125, + "loss_ib": 11.586273193359375, + "step": 242 + }, + { + "ce_ib": 34.44084930419922, + "ce_orig": 1.152998924255371, + "epoch": 0.06988280969156661, + "kl_loss": 1133.0599365234375, + "loss_ib": 11.365039825439453, + "step": 243 + }, + { + "ce_ib": 35.273677825927734, + "ce_orig": 1.2428306341171265, + "epoch": 0.06988280969156661, + "kl_loss": 1112.822021484375, + "loss_ib": 11.163493156433105, + "step": 243 + }, + { + "ce_ib": 32.52173614501953, + "ce_orig": 1.016830325126648, + "epoch": 0.06988280969156661, + "kl_loss": 1070.12255859375, + "loss_ib": 10.733747482299805, + "step": 243 + }, + { + "ce_ib": 34.803653717041016, + "ce_orig": 0.615959107875824, + "epoch": 0.06988280969156661, + "kl_loss": 1086.107421875, + "loss_ib": 10.895877838134766, + "step": 243 + }, + { + "ce_ib": 32.82182693481445, + "ce_orig": 0.8602744936943054, + "epoch": 0.07017039327054425, + "kl_loss": 1035.995849609375, + "loss_ib": 10.392780303955078, + "step": 244 + }, + { + "ce_ib": 31.894535064697266, + "ce_orig": 0.6907263398170471, + "epoch": 0.07017039327054425, + "kl_loss": 979.4188232421875, + "loss_ib": 9.826082229614258, + "step": 244 + }, + { + "ce_ib": 35.21843719482422, + "ce_orig": 1.3801195621490479, + "epoch": 0.07017039327054425, + "kl_loss": 1049.561279296875, + "loss_ib": 10.530831336975098, + "step": 244 + }, + { + "ce_ib": 34.30471420288086, + "ce_orig": 0.7986380457878113, + "epoch": 0.07017039327054425, + "kl_loss": 997.249755859375, + "loss_ib": 10.006802558898926, + "step": 244 + }, + { + "epoch": 0.07045797684952189, + "grad_norm": 220.81076049804688, + "learning_rate": 7.611464968152867e-06, + "loss": 11.7147, + "step": 245 + }, + { + "ce_ib": 33.57966613769531, + "ce_orig": 0.7934867143630981, + "epoch": 0.07045797684952189, + "kl_loss": 1044.489501953125, + "loss_ib": 10.478473663330078, + "step": 245 + }, + { + "ce_ib": 30.60529327392578, + "ce_orig": 0.5140112638473511, + "epoch": 0.07045797684952189, + "kl_loss": 979.260498046875, + "loss_ib": 9.823209762573242, + "step": 245 + }, + { + "ce_ib": 36.327213287353516, + "ce_orig": 1.3819940090179443, + "epoch": 0.07045797684952189, + "kl_loss": 994.7232666015625, + "loss_ib": 9.983559608459473, + "step": 245 + }, + { + "ce_ib": 34.17820739746094, + "ce_orig": 0.8814669847488403, + "epoch": 0.07045797684952189, + "kl_loss": 885.5364990234375, + "loss_ib": 8.889543533325195, + "step": 245 + }, + { + "ce_ib": 34.66371154785156, + "ce_orig": 1.3104512691497803, + "epoch": 0.07074556042849953, + "kl_loss": 962.946533203125, + "loss_ib": 9.664129257202148, + "step": 246 + }, + { + "ce_ib": 32.48523712158203, + "ce_orig": 0.19654367864131927, + "epoch": 0.07074556042849953, + "kl_loss": 613.4555053710938, + "loss_ib": 6.1670403480529785, + "step": 246 + }, + { + "ce_ib": 37.2482795715332, + "ce_orig": 1.528881549835205, + "epoch": 0.07074556042849953, + "kl_loss": 901.1966552734375, + "loss_ib": 9.049215316772461, + "step": 246 + }, + { + "ce_ib": 32.179996490478516, + "ce_orig": 0.784110963344574, + "epoch": 0.07074556042849953, + "kl_loss": 928.5962524414062, + "loss_ib": 9.318142890930176, + "step": 246 + }, + { + "ce_ib": 29.451961517333984, + "ce_orig": 0.9916685819625854, + "epoch": 0.07103314400747718, + "kl_loss": 983.1722412109375, + "loss_ib": 9.861173629760742, + "step": 247 + }, + { + "ce_ib": 37.86883544921875, + "ce_orig": 1.3681284189224243, + "epoch": 0.07103314400747718, + "kl_loss": 783.9342041015625, + "loss_ib": 7.87721061706543, + "step": 247 + }, + { + "ce_ib": 35.45719528198242, + "ce_orig": 1.3227193355560303, + "epoch": 0.07103314400747718, + "kl_loss": 848.042724609375, + "loss_ib": 8.515884399414062, + "step": 247 + }, + { + "ce_ib": 32.25755310058594, + "ce_orig": 0.7888100147247314, + "epoch": 0.07103314400747718, + "kl_loss": 908.6535034179688, + "loss_ib": 9.118792533874512, + "step": 247 + }, + { + "ce_ib": 34.29731369018555, + "ce_orig": 1.3129619359970093, + "epoch": 0.07132072758645482, + "kl_loss": 884.159912109375, + "loss_ib": 8.875896453857422, + "step": 248 + }, + { + "ce_ib": 34.386695861816406, + "ce_orig": 0.8120501041412354, + "epoch": 0.07132072758645482, + "kl_loss": 871.235107421875, + "loss_ib": 8.746737480163574, + "step": 248 + }, + { + "ce_ib": 30.895753860473633, + "ce_orig": 0.7307835817337036, + "epoch": 0.07132072758645482, + "kl_loss": 892.11767578125, + "loss_ib": 8.952072143554688, + "step": 248 + }, + { + "ce_ib": 33.708763122558594, + "ce_orig": 0.9609125852584839, + "epoch": 0.07132072758645482, + "kl_loss": 908.0333251953125, + "loss_ib": 9.114041328430176, + "step": 248 + }, + { + "ce_ib": 33.838768005371094, + "ce_orig": 1.0209710597991943, + "epoch": 0.07160831116543245, + "kl_loss": 893.3701171875, + "loss_ib": 8.96753978729248, + "step": 249 + }, + { + "ce_ib": 32.03993606567383, + "ce_orig": 1.0714709758758545, + "epoch": 0.07160831116543245, + "kl_loss": 845.7508544921875, + "loss_ib": 8.489547729492188, + "step": 249 + }, + { + "ce_ib": 29.649864196777344, + "ce_orig": 1.0681530237197876, + "epoch": 0.07160831116543245, + "kl_loss": 1036.672119140625, + "loss_ib": 10.396370887756348, + "step": 249 + }, + { + "ce_ib": 32.387245178222656, + "ce_orig": 1.1253947019577026, + "epoch": 0.07160831116543245, + "kl_loss": 863.2434692382812, + "loss_ib": 8.66482162475586, + "step": 249 + }, + { + "epoch": 0.07189589474441009, + "grad_norm": 205.96604919433594, + "learning_rate": 7.770700636942676e-06, + "loss": 10.0614, + "step": 250 + }, + { + "ce_ib": 34.68352127075195, + "ce_orig": 1.0909476280212402, + "epoch": 0.07189589474441009, + "kl_loss": 867.9714965820312, + "loss_ib": 8.714398384094238, + "step": 250 + }, + { + "ce_ib": 31.70074462890625, + "ce_orig": 0.9184356331825256, + "epoch": 0.07189589474441009, + "kl_loss": 894.34326171875, + "loss_ib": 8.975132942199707, + "step": 250 + }, + { + "ce_ib": 36.209068298339844, + "ce_orig": 1.1706846952438354, + "epoch": 0.07189589474441009, + "kl_loss": 795.684814453125, + "loss_ib": 7.9930572509765625, + "step": 250 + }, + { + "ce_ib": 35.70771789550781, + "ce_orig": 0.712752640247345, + "epoch": 0.07189589474441009, + "kl_loss": 757.6834106445312, + "loss_ib": 7.612542152404785, + "step": 250 + }, + { + "ce_ib": 30.09554100036621, + "ce_orig": 0.7422066330909729, + "epoch": 0.07218347832338773, + "kl_loss": 993.2034301757812, + "loss_ib": 9.962129592895508, + "step": 251 + }, + { + "ce_ib": 30.356294631958008, + "ce_orig": 0.8336584568023682, + "epoch": 0.07218347832338773, + "kl_loss": 753.03955078125, + "loss_ib": 7.560751914978027, + "step": 251 + }, + { + "ce_ib": 34.888736724853516, + "ce_orig": 0.7718689441680908, + "epoch": 0.07218347832338773, + "kl_loss": 869.8005981445312, + "loss_ib": 8.732894897460938, + "step": 251 + }, + { + "ce_ib": 34.146759033203125, + "ce_orig": 1.3637772798538208, + "epoch": 0.07218347832338773, + "kl_loss": 775.9141235351562, + "loss_ib": 7.793287754058838, + "step": 251 + }, + { + "ce_ib": 36.634437561035156, + "ce_orig": 1.066004991531372, + "epoch": 0.07247106190236538, + "kl_loss": 765.8868408203125, + "loss_ib": 7.695502758026123, + "step": 252 + }, + { + "ce_ib": 34.45478439331055, + "ce_orig": 1.2148858308792114, + "epoch": 0.07247106190236538, + "kl_loss": 777.1328735351562, + "loss_ib": 7.805783748626709, + "step": 252 + }, + { + "ce_ib": 32.530982971191406, + "ce_orig": 1.1445003747940063, + "epoch": 0.07247106190236538, + "kl_loss": 722.8099975585938, + "loss_ib": 7.260631084442139, + "step": 252 + }, + { + "ce_ib": 33.494747161865234, + "ce_orig": 1.2305960655212402, + "epoch": 0.07247106190236538, + "kl_loss": 792.6470947265625, + "loss_ib": 7.959965705871582, + "step": 252 + }, + { + "ce_ib": 31.90201187133789, + "ce_orig": 1.0651308298110962, + "epoch": 0.07275864548134302, + "kl_loss": 772.0308227539062, + "loss_ib": 7.7522101402282715, + "step": 253 + }, + { + "ce_ib": 36.638572692871094, + "ce_orig": 1.0030566453933716, + "epoch": 0.07275864548134302, + "kl_loss": 877.4444580078125, + "loss_ib": 8.81108283996582, + "step": 253 + }, + { + "ce_ib": 33.78273010253906, + "ce_orig": 1.0988649129867554, + "epoch": 0.07275864548134302, + "kl_loss": 711.0679931640625, + "loss_ib": 7.144462585449219, + "step": 253 + }, + { + "ce_ib": 33.45583724975586, + "ce_orig": 0.7203549146652222, + "epoch": 0.07275864548134302, + "kl_loss": 781.1646728515625, + "loss_ib": 7.845102310180664, + "step": 253 + }, + { + "ce_ib": 33.97596740722656, + "ce_orig": 1.545408010482788, + "epoch": 0.07304622906032066, + "kl_loss": 652.7290649414062, + "loss_ib": 6.5612664222717285, + "step": 254 + }, + { + "ce_ib": 34.555152893066406, + "ce_orig": 0.9807875752449036, + "epoch": 0.07304622906032066, + "kl_loss": 776.7444458007812, + "loss_ib": 7.801999568939209, + "step": 254 + }, + { + "ce_ib": 31.55140495300293, + "ce_orig": 0.7501665949821472, + "epoch": 0.07304622906032066, + "kl_loss": 769.890869140625, + "loss_ib": 7.730460166931152, + "step": 254 + }, + { + "ce_ib": 35.186798095703125, + "ce_orig": 0.7437403202056885, + "epoch": 0.07304622906032066, + "kl_loss": 796.2337646484375, + "loss_ib": 7.997524261474609, + "step": 254 + }, + { + "epoch": 0.0733338126392983, + "grad_norm": 180.0628204345703, + "learning_rate": 7.929936305732485e-06, + "loss": 8.6844, + "step": 255 + }, + { + "ce_ib": 34.1368408203125, + "ce_orig": 0.9626627564430237, + "epoch": 0.0733338126392983, + "kl_loss": 747.2984008789062, + "loss_ib": 7.507120609283447, + "step": 255 + }, + { + "ce_ib": 31.835412979125977, + "ce_orig": 0.8661757707595825, + "epoch": 0.0733338126392983, + "kl_loss": 723.6868896484375, + "loss_ib": 7.268703937530518, + "step": 255 + }, + { + "ce_ib": 40.257713317871094, + "ce_orig": 2.202180862426758, + "epoch": 0.0733338126392983, + "kl_loss": 661.350341796875, + "loss_ib": 6.65376091003418, + "step": 255 + }, + { + "ce_ib": 32.313350677490234, + "ce_orig": 0.7933053374290466, + "epoch": 0.0733338126392983, + "kl_loss": 702.9135131835938, + "loss_ib": 7.061448097229004, + "step": 255 + }, + { + "ce_ib": 32.77560806274414, + "ce_orig": 0.7840645909309387, + "epoch": 0.07362139621827593, + "kl_loss": 670.623291015625, + "loss_ib": 6.73900842666626, + "step": 256 + }, + { + "ce_ib": 34.22809982299805, + "ce_orig": 0.9132450819015503, + "epoch": 0.07362139621827593, + "kl_loss": 655.1952514648438, + "loss_ib": 6.586180210113525, + "step": 256 + }, + { + "ce_ib": 36.01190948486328, + "ce_orig": 1.1352635622024536, + "epoch": 0.07362139621827593, + "kl_loss": 622.5662841796875, + "loss_ib": 6.261674880981445, + "step": 256 + }, + { + "ce_ib": 31.15489959716797, + "ce_orig": 0.7360689043998718, + "epoch": 0.07362139621827593, + "kl_loss": 706.380126953125, + "loss_ib": 7.094955921173096, + "step": 256 + }, + { + "ce_ib": 37.11478042602539, + "ce_orig": 0.8814412355422974, + "epoch": 0.07390897979725358, + "kl_loss": 654.782958984375, + "loss_ib": 6.584944248199463, + "step": 257 + }, + { + "ce_ib": 36.152000427246094, + "ce_orig": 1.3912250995635986, + "epoch": 0.07390897979725358, + "kl_loss": 541.4805297851562, + "loss_ib": 5.45095682144165, + "step": 257 + }, + { + "ce_ib": 35.5803108215332, + "ce_orig": 0.9307012557983398, + "epoch": 0.07390897979725358, + "kl_loss": 653.8729248046875, + "loss_ib": 6.574309349060059, + "step": 257 + }, + { + "ce_ib": 32.77763366699219, + "ce_orig": 1.1101514101028442, + "epoch": 0.07390897979725358, + "kl_loss": 638.899658203125, + "loss_ib": 6.421773910522461, + "step": 257 + }, + { + "ce_ib": 30.221391677856445, + "ce_orig": 0.6179237365722656, + "epoch": 0.07419656337623122, + "kl_loss": 560.8075561523438, + "loss_ib": 5.638297080993652, + "step": 258 + }, + { + "ce_ib": 33.3847770690918, + "ce_orig": 0.734990656375885, + "epoch": 0.07419656337623122, + "kl_loss": 551.2991333007812, + "loss_ib": 5.5463762283325195, + "step": 258 + }, + { + "ce_ib": 37.109310150146484, + "ce_orig": 1.5755938291549683, + "epoch": 0.07419656337623122, + "kl_loss": 494.017578125, + "loss_ib": 4.977284908294678, + "step": 258 + }, + { + "ce_ib": 35.911502838134766, + "ce_orig": 1.535288691520691, + "epoch": 0.07419656337623122, + "kl_loss": 573.640869140625, + "loss_ib": 5.772319793701172, + "step": 258 + }, + { + "ce_ib": 37.536102294921875, + "ce_orig": 0.8849014043807983, + "epoch": 0.07448414695520886, + "kl_loss": 512.6968994140625, + "loss_ib": 5.1645050048828125, + "step": 259 + }, + { + "ce_ib": 33.12932586669922, + "ce_orig": 0.5551506876945496, + "epoch": 0.07448414695520886, + "kl_loss": 503.11016845703125, + "loss_ib": 5.064230918884277, + "step": 259 + }, + { + "ce_ib": 33.1467170715332, + "ce_orig": 1.3075788021087646, + "epoch": 0.07448414695520886, + "kl_loss": 618.4505615234375, + "loss_ib": 6.217652320861816, + "step": 259 + }, + { + "ce_ib": 34.920448303222656, + "ce_orig": 0.9793207049369812, + "epoch": 0.07448414695520886, + "kl_loss": 603.3938598632812, + "loss_ib": 6.068859100341797, + "step": 259 + }, + { + "epoch": 0.0747717305341865, + "grad_norm": 157.27696228027344, + "learning_rate": 8.089171974522295e-06, + "loss": 7.372, + "step": 260 + }, + { + "ce_ib": 35.68424987792969, + "ce_orig": 1.031170129776001, + "epoch": 0.0747717305341865, + "kl_loss": 524.8594970703125, + "loss_ib": 5.284278869628906, + "step": 260 + }, + { + "ce_ib": 35.50361633300781, + "ce_orig": 0.5432813167572021, + "epoch": 0.0747717305341865, + "kl_loss": 531.8944091796875, + "loss_ib": 5.354447364807129, + "step": 260 + }, + { + "ce_ib": 34.80183792114258, + "ce_orig": 0.8772653937339783, + "epoch": 0.0747717305341865, + "kl_loss": 527.3760986328125, + "loss_ib": 5.308562278747559, + "step": 260 + }, + { + "ce_ib": 34.62561798095703, + "ce_orig": 0.8580355048179626, + "epoch": 0.0747717305341865, + "kl_loss": 524.876708984375, + "loss_ib": 5.283392906188965, + "step": 260 + }, + { + "ce_ib": 33.79844665527344, + "ce_orig": 0.6697705984115601, + "epoch": 0.07505931411316413, + "kl_loss": 518.130126953125, + "loss_ib": 5.215099334716797, + "step": 261 + }, + { + "ce_ib": 35.4140739440918, + "ce_orig": 1.116640567779541, + "epoch": 0.07505931411316413, + "kl_loss": 495.97052001953125, + "loss_ib": 4.995119094848633, + "step": 261 + }, + { + "ce_ib": 40.22637176513672, + "ce_orig": 1.194669485092163, + "epoch": 0.07505931411316413, + "kl_loss": 483.3502197265625, + "loss_ib": 4.8737287521362305, + "step": 261 + }, + { + "ce_ib": 37.521358489990234, + "ce_orig": 1.1145161390304565, + "epoch": 0.07505931411316413, + "kl_loss": 467.95684814453125, + "loss_ib": 4.717089653015137, + "step": 261 + }, + { + "ce_ib": 37.80555725097656, + "ce_orig": 0.8788209557533264, + "epoch": 0.07534689769214178, + "kl_loss": 423.5547180175781, + "loss_ib": 4.27335262298584, + "step": 262 + }, + { + "ce_ib": 36.85504150390625, + "ce_orig": 0.5465120077133179, + "epoch": 0.07534689769214178, + "kl_loss": 476.8722229003906, + "loss_ib": 4.805577278137207, + "step": 262 + }, + { + "ce_ib": 37.499755859375, + "ce_orig": 1.1639437675476074, + "epoch": 0.07534689769214178, + "kl_loss": 467.90582275390625, + "loss_ib": 4.71655797958374, + "step": 262 + }, + { + "ce_ib": 36.78924560546875, + "ce_orig": 1.2826528549194336, + "epoch": 0.07534689769214178, + "kl_loss": 477.6324768066406, + "loss_ib": 4.813113689422607, + "step": 262 + }, + { + "ce_ib": 38.421451568603516, + "ce_orig": 0.8689420819282532, + "epoch": 0.07563448127111942, + "kl_loss": 477.5055847167969, + "loss_ib": 4.813477039337158, + "step": 263 + }, + { + "ce_ib": 35.91413879394531, + "ce_orig": 0.8632240891456604, + "epoch": 0.07563448127111942, + "kl_loss": 485.8994445800781, + "loss_ib": 4.894908428192139, + "step": 263 + }, + { + "ce_ib": 39.28192901611328, + "ce_orig": 0.877941370010376, + "epoch": 0.07563448127111942, + "kl_loss": 447.78070068359375, + "loss_ib": 4.517088890075684, + "step": 263 + }, + { + "ce_ib": 40.37826156616211, + "ce_orig": 0.8957875370979309, + "epoch": 0.07563448127111942, + "kl_loss": 403.5020446777344, + "loss_ib": 4.0753984451293945, + "step": 263 + }, + { + "ce_ib": 42.27157974243164, + "ce_orig": 1.3913816213607788, + "epoch": 0.07592206485009706, + "kl_loss": 423.022216796875, + "loss_ib": 4.272493839263916, + "step": 264 + }, + { + "ce_ib": 36.27720260620117, + "ce_orig": 1.0942326784133911, + "epoch": 0.07592206485009706, + "kl_loss": 400.34161376953125, + "loss_ib": 4.039693355560303, + "step": 264 + }, + { + "ce_ib": 33.6429328918457, + "ce_orig": 0.8256592154502869, + "epoch": 0.07592206485009706, + "kl_loss": 424.09423828125, + "loss_ib": 4.274585247039795, + "step": 264 + }, + { + "ce_ib": 38.3378791809082, + "ce_orig": 1.0243308544158936, + "epoch": 0.07592206485009706, + "kl_loss": 472.14508056640625, + "loss_ib": 4.759788513183594, + "step": 264 + }, + { + "epoch": 0.0762096484290747, + "grad_norm": 123.24594116210938, + "learning_rate": 8.248407643312102e-06, + "loss": 5.7351, + "step": 265 + }, + { + "ce_ib": 38.452537536621094, + "ce_orig": 1.0122849941253662, + "epoch": 0.0762096484290747, + "kl_loss": 435.57763671875, + "loss_ib": 4.394228935241699, + "step": 265 + }, + { + "ce_ib": 39.90632629394531, + "ce_orig": 1.178553819656372, + "epoch": 0.0762096484290747, + "kl_loss": 329.4811096191406, + "loss_ib": 3.334717273712158, + "step": 265 + }, + { + "ce_ib": 34.292686462402344, + "ce_orig": 1.4103134870529175, + "epoch": 0.0762096484290747, + "kl_loss": 385.14447021484375, + "loss_ib": 3.885737180709839, + "step": 265 + }, + { + "ce_ib": 42.36109161376953, + "ce_orig": 1.1843057870864868, + "epoch": 0.0762096484290747, + "kl_loss": 398.69232177734375, + "loss_ib": 4.029284477233887, + "step": 265 + }, + { + "ce_ib": 38.15491485595703, + "ce_orig": 1.1772407293319702, + "epoch": 0.07649723200805233, + "kl_loss": 360.84259033203125, + "loss_ib": 3.646580696105957, + "step": 266 + }, + { + "ce_ib": 43.11347961425781, + "ce_orig": 1.3642669916152954, + "epoch": 0.07649723200805233, + "kl_loss": 326.4147033691406, + "loss_ib": 3.307260274887085, + "step": 266 + }, + { + "ce_ib": 46.28087615966797, + "ce_orig": 1.7264760732650757, + "epoch": 0.07649723200805233, + "kl_loss": 390.9441223144531, + "loss_ib": 3.955721855163574, + "step": 266 + }, + { + "ce_ib": 44.236488342285156, + "ce_orig": 2.0390701293945312, + "epoch": 0.07649723200805233, + "kl_loss": 340.2770690917969, + "loss_ib": 3.447007179260254, + "step": 266 + }, + { + "ce_ib": 48.65879821777344, + "ce_orig": 1.3260499238967896, + "epoch": 0.07678481558702999, + "kl_loss": 225.09542846679688, + "loss_ib": 2.2996129989624023, + "step": 267 + }, + { + "ce_ib": 44.989524841308594, + "ce_orig": 0.7958585619926453, + "epoch": 0.07678481558702999, + "kl_loss": 330.1822509765625, + "loss_ib": 3.3468120098114014, + "step": 267 + }, + { + "ce_ib": 44.695777893066406, + "ce_orig": 1.6154531240463257, + "epoch": 0.07678481558702999, + "kl_loss": 283.551513671875, + "loss_ib": 2.8802108764648438, + "step": 267 + }, + { + "ce_ib": 50.11431884765625, + "ce_orig": 2.1371569633483887, + "epoch": 0.07678481558702999, + "kl_loss": 288.13592529296875, + "loss_ib": 2.931473731994629, + "step": 267 + }, + { + "ce_ib": 49.849369049072266, + "ce_orig": 1.6994304656982422, + "epoch": 0.07707239916600762, + "kl_loss": 307.9149169921875, + "loss_ib": 3.128998279571533, + "step": 268 + }, + { + "ce_ib": 46.516693115234375, + "ce_orig": 2.531648635864258, + "epoch": 0.07707239916600762, + "kl_loss": 257.6400451660156, + "loss_ib": 2.6229171752929688, + "step": 268 + }, + { + "ce_ib": 49.18770980834961, + "ce_orig": 0.8902948498725891, + "epoch": 0.07707239916600762, + "kl_loss": 287.6979064941406, + "loss_ib": 2.926166534423828, + "step": 268 + }, + { + "ce_ib": 43.51984786987305, + "ce_orig": 0.8550523519515991, + "epoch": 0.07707239916600762, + "kl_loss": 319.45574951171875, + "loss_ib": 3.238077402114868, + "step": 268 + }, + { + "ce_ib": 57.45269012451172, + "ce_orig": 1.6990851163864136, + "epoch": 0.07735998274498526, + "kl_loss": 247.26651000976562, + "loss_ib": 2.5301177501678467, + "step": 269 + }, + { + "ce_ib": 53.520240783691406, + "ce_orig": 1.453221082687378, + "epoch": 0.07735998274498526, + "kl_loss": 240.28468322753906, + "loss_ib": 2.456367015838623, + "step": 269 + }, + { + "ce_ib": 45.94785690307617, + "ce_orig": 1.0849944353103638, + "epoch": 0.07735998274498526, + "kl_loss": 244.33505249023438, + "loss_ib": 2.4892983436584473, + "step": 269 + }, + { + "ce_ib": 43.29793930053711, + "ce_orig": 1.103232741355896, + "epoch": 0.07735998274498526, + "kl_loss": 253.640625, + "loss_ib": 2.5797042846679688, + "step": 269 + }, + { + "epoch": 0.0776475663239629, + "grad_norm": 89.99483489990234, + "learning_rate": 8.407643312101912e-06, + "loss": 4.4374, + "step": 270 + }, + { + "ce_ib": 39.90031814575195, + "ce_orig": 0.8725808262825012, + "epoch": 0.0776475663239629, + "kl_loss": 259.3697509765625, + "loss_ib": 2.6335976123809814, + "step": 270 + }, + { + "ce_ib": 42.03105926513672, + "ce_orig": 0.8716458678245544, + "epoch": 0.0776475663239629, + "kl_loss": 230.16326904296875, + "loss_ib": 2.3436636924743652, + "step": 270 + }, + { + "ce_ib": 42.68729782104492, + "ce_orig": 0.665064811706543, + "epoch": 0.0776475663239629, + "kl_loss": 249.3673095703125, + "loss_ib": 2.536360263824463, + "step": 270 + }, + { + "ce_ib": 39.38070297241211, + "ce_orig": 1.2106064558029175, + "epoch": 0.0776475663239629, + "kl_loss": 242.24522399902344, + "loss_ib": 2.4618327617645264, + "step": 270 + }, + { + "ce_ib": 45.427120208740234, + "ce_orig": 0.9336494207382202, + "epoch": 0.07793514990294054, + "kl_loss": 236.5987548828125, + "loss_ib": 2.411414623260498, + "step": 271 + }, + { + "ce_ib": 40.51905059814453, + "ce_orig": 0.82356858253479, + "epoch": 0.07793514990294054, + "kl_loss": 320.950927734375, + "loss_ib": 3.250028371810913, + "step": 271 + }, + { + "ce_ib": 45.87284469604492, + "ce_orig": 2.147392988204956, + "epoch": 0.07793514990294054, + "kl_loss": 183.42807006835938, + "loss_ib": 1.8801534175872803, + "step": 271 + }, + { + "ce_ib": 39.21931838989258, + "ce_orig": 0.6727441549301147, + "epoch": 0.07793514990294054, + "kl_loss": 259.3567199707031, + "loss_ib": 2.632786512374878, + "step": 271 + }, + { + "ce_ib": 47.522220611572266, + "ce_orig": 2.0349419116973877, + "epoch": 0.07822273348191819, + "kl_loss": 171.4339141845703, + "loss_ib": 1.7618613243103027, + "step": 272 + }, + { + "ce_ib": 52.649227142333984, + "ce_orig": 1.1070398092269897, + "epoch": 0.07822273348191819, + "kl_loss": 232.63453674316406, + "loss_ib": 2.3789944648742676, + "step": 272 + }, + { + "ce_ib": 46.19776916503906, + "ce_orig": 1.476123332977295, + "epoch": 0.07822273348191819, + "kl_loss": 210.17898559570312, + "loss_ib": 2.1479876041412354, + "step": 272 + }, + { + "ce_ib": 48.105499267578125, + "ce_orig": 1.1524395942687988, + "epoch": 0.07822273348191819, + "kl_loss": 196.00885009765625, + "loss_ib": 2.0081939697265625, + "step": 272 + }, + { + "ce_ib": 46.68330383300781, + "ce_orig": 0.884026050567627, + "epoch": 0.07851031706089583, + "kl_loss": 164.7951202392578, + "loss_ib": 1.6946345567703247, + "step": 273 + }, + { + "ce_ib": 46.4140739440918, + "ce_orig": 1.2737939357757568, + "epoch": 0.07851031706089583, + "kl_loss": 168.782470703125, + "loss_ib": 1.7342387437820435, + "step": 273 + }, + { + "ce_ib": 44.1322135925293, + "ce_orig": 0.8222432136535645, + "epoch": 0.07851031706089583, + "kl_loss": 183.51260375976562, + "loss_ib": 1.8792582750320435, + "step": 273 + }, + { + "ce_ib": 47.30166244506836, + "ce_orig": 1.5957039594650269, + "epoch": 0.07851031706089583, + "kl_loss": 154.28269958496094, + "loss_ib": 1.5901285409927368, + "step": 273 + }, + { + "ce_ib": 49.634849548339844, + "ce_orig": 1.259581208229065, + "epoch": 0.07879790063987346, + "kl_loss": 156.60406494140625, + "loss_ib": 1.615675449371338, + "step": 274 + }, + { + "ce_ib": 45.53166198730469, + "ce_orig": 1.591690182685852, + "epoch": 0.07879790063987346, + "kl_loss": 165.76138305664062, + "loss_ib": 1.7031453847885132, + "step": 274 + }, + { + "ce_ib": 48.556114196777344, + "ce_orig": 1.4561307430267334, + "epoch": 0.07879790063987346, + "kl_loss": 145.72293090820312, + "loss_ib": 1.505785346031189, + "step": 274 + }, + { + "ce_ib": 51.11494064331055, + "ce_orig": 1.5235646963119507, + "epoch": 0.07879790063987346, + "kl_loss": 142.93948364257812, + "loss_ib": 1.4805097579956055, + "step": 274 + }, + { + "epoch": 0.0790854842188511, + "grad_norm": 66.16566467285156, + "learning_rate": 8.566878980891721e-06, + "loss": 3.4286, + "step": 275 + }, + { + "ce_ib": 39.09020233154297, + "ce_orig": 0.8260847330093384, + "epoch": 0.0790854842188511, + "kl_loss": 139.89077758789062, + "loss_ib": 1.4379980564117432, + "step": 275 + }, + { + "ce_ib": 48.023948669433594, + "ce_orig": 1.396461009979248, + "epoch": 0.0790854842188511, + "kl_loss": 146.98944091796875, + "loss_ib": 1.517918348312378, + "step": 275 + }, + { + "ce_ib": 42.226966857910156, + "ce_orig": 1.0770304203033447, + "epoch": 0.0790854842188511, + "kl_loss": 156.64419555664062, + "loss_ib": 1.6086689233779907, + "step": 275 + }, + { + "ce_ib": 36.58985900878906, + "ce_orig": 1.2845979928970337, + "epoch": 0.0790854842188511, + "kl_loss": 211.11338806152344, + "loss_ib": 2.14772367477417, + "step": 275 + }, + { + "ce_ib": 44.49997329711914, + "ce_orig": 1.6609095335006714, + "epoch": 0.07937306779782874, + "kl_loss": 128.6660614013672, + "loss_ib": 1.331160545349121, + "step": 276 + }, + { + "ce_ib": 40.899295806884766, + "ce_orig": 1.6309945583343506, + "epoch": 0.07937306779782874, + "kl_loss": 139.94818115234375, + "loss_ib": 1.4403811693191528, + "step": 276 + }, + { + "ce_ib": 40.4159049987793, + "ce_orig": 0.8224959373474121, + "epoch": 0.07937306779782874, + "kl_loss": 169.31053161621094, + "loss_ib": 1.7335212230682373, + "step": 276 + }, + { + "ce_ib": 40.17991638183594, + "ce_orig": 1.1940970420837402, + "epoch": 0.07937306779782874, + "kl_loss": 130.6182403564453, + "loss_ib": 1.3463622331619263, + "step": 276 + }, + { + "ce_ib": 48.19374465942383, + "ce_orig": 1.0767353773117065, + "epoch": 0.07966065137680639, + "kl_loss": 152.88658142089844, + "loss_ib": 1.5770596265792847, + "step": 277 + }, + { + "ce_ib": 48.65080261230469, + "ce_orig": 1.0481511354446411, + "epoch": 0.07966065137680639, + "kl_loss": 114.82660675048828, + "loss_ib": 1.1969168186187744, + "step": 277 + }, + { + "ce_ib": 39.97406768798828, + "ce_orig": 1.4612183570861816, + "epoch": 0.07966065137680639, + "kl_loss": 131.66680908203125, + "loss_ib": 1.3566421270370483, + "step": 277 + }, + { + "ce_ib": 42.1298828125, + "ce_orig": 1.1980208158493042, + "epoch": 0.07966065137680639, + "kl_loss": 127.7425308227539, + "loss_ib": 1.3195551633834839, + "step": 277 + }, + { + "ce_ib": 40.26860427856445, + "ce_orig": 1.4410648345947266, + "epoch": 0.07994823495578403, + "kl_loss": 95.930908203125, + "loss_ib": 0.9995777010917664, + "step": 278 + }, + { + "ce_ib": 40.977745056152344, + "ce_orig": 1.2464489936828613, + "epoch": 0.07994823495578403, + "kl_loss": 109.65506744384766, + "loss_ib": 1.1375283002853394, + "step": 278 + }, + { + "ce_ib": 42.28449249267578, + "ce_orig": 0.9634944796562195, + "epoch": 0.07994823495578403, + "kl_loss": 126.33438110351562, + "loss_ib": 1.3056282997131348, + "step": 278 + }, + { + "ce_ib": 35.779815673828125, + "ce_orig": 1.0111249685287476, + "epoch": 0.07994823495578403, + "kl_loss": 108.74060821533203, + "loss_ib": 1.1231858730316162, + "step": 278 + }, + { + "ce_ib": 41.58162307739258, + "ce_orig": 0.858116626739502, + "epoch": 0.08023581853476167, + "kl_loss": 94.94490051269531, + "loss_ib": 0.9910306334495544, + "step": 279 + }, + { + "ce_ib": 40.43954086303711, + "ce_orig": 1.5376956462860107, + "epoch": 0.08023581853476167, + "kl_loss": 114.16512298583984, + "loss_ib": 1.1820907592773438, + "step": 279 + }, + { + "ce_ib": 39.77493667602539, + "ce_orig": 1.4295803308486938, + "epoch": 0.08023581853476167, + "kl_loss": 97.94513702392578, + "loss_ib": 1.019226312637329, + "step": 279 + }, + { + "ce_ib": 37.93669509887695, + "ce_orig": 0.9915726780891418, + "epoch": 0.08023581853476167, + "kl_loss": 100.8277359008789, + "loss_ib": 1.046213984489441, + "step": 279 + }, + { + "epoch": 0.0805234021137393, + "grad_norm": 46.49250793457031, + "learning_rate": 8.726114649681529e-06, + "loss": 2.5267, + "step": 280 + }, + { + "ce_ib": 40.85035705566406, + "ce_orig": 0.9079654216766357, + "epoch": 0.0805234021137393, + "kl_loss": 103.09587860107422, + "loss_ib": 1.07180917263031, + "step": 280 + }, + { + "ce_ib": 36.37861633300781, + "ce_orig": 0.7120267152786255, + "epoch": 0.0805234021137393, + "kl_loss": 100.90724182128906, + "loss_ib": 1.045451045036316, + "step": 280 + }, + { + "ce_ib": 41.00496292114258, + "ce_orig": 0.710496187210083, + "epoch": 0.0805234021137393, + "kl_loss": 125.49755096435547, + "loss_ib": 1.295980453491211, + "step": 280 + }, + { + "ce_ib": 41.43547058105469, + "ce_orig": 1.6307661533355713, + "epoch": 0.0805234021137393, + "kl_loss": 75.49640655517578, + "loss_ib": 0.7963995337486267, + "step": 280 + }, + { + "ce_ib": 35.050662994384766, + "ce_orig": 0.9704921245574951, + "epoch": 0.08081098569271694, + "kl_loss": 99.29730224609375, + "loss_ib": 1.028023600578308, + "step": 281 + }, + { + "ce_ib": 37.16071701049805, + "ce_orig": 0.9510385394096375, + "epoch": 0.08081098569271694, + "kl_loss": 84.18745422363281, + "loss_ib": 0.879035234451294, + "step": 281 + }, + { + "ce_ib": 40.97324752807617, + "ce_orig": 0.6980030536651611, + "epoch": 0.08081098569271694, + "kl_loss": 85.10052490234375, + "loss_ib": 0.8919785022735596, + "step": 281 + }, + { + "ce_ib": 37.02272033691406, + "ce_orig": 1.8392256498336792, + "epoch": 0.08081098569271694, + "kl_loss": 110.20343017578125, + "loss_ib": 1.1390570402145386, + "step": 281 + }, + { + "ce_ib": 37.121299743652344, + "ce_orig": 1.2205618619918823, + "epoch": 0.08109856927169459, + "kl_loss": 70.73722839355469, + "loss_ib": 0.7444935441017151, + "step": 282 + }, + { + "ce_ib": 37.17251968383789, + "ce_orig": 1.2131117582321167, + "epoch": 0.08109856927169459, + "kl_loss": 72.53062438964844, + "loss_ib": 0.762478768825531, + "step": 282 + }, + { + "ce_ib": 38.68415832519531, + "ce_orig": 1.2195942401885986, + "epoch": 0.08109856927169459, + "kl_loss": 97.90969848632812, + "loss_ib": 1.017781138420105, + "step": 282 + }, + { + "ce_ib": 44.60282897949219, + "ce_orig": 2.035943031311035, + "epoch": 0.08109856927169459, + "kl_loss": 74.025634765625, + "loss_ib": 0.7848591208457947, + "step": 282 + }, + { + "ce_ib": 32.7514533996582, + "ce_orig": 1.1092407703399658, + "epoch": 0.08138615285067223, + "kl_loss": 64.70214080810547, + "loss_ib": 0.6797728538513184, + "step": 283 + }, + { + "ce_ib": 34.242916107177734, + "ce_orig": 1.563724160194397, + "epoch": 0.08138615285067223, + "kl_loss": 74.12162780761719, + "loss_ib": 0.7754591703414917, + "step": 283 + }, + { + "ce_ib": 39.811283111572266, + "ce_orig": 1.3022565841674805, + "epoch": 0.08138615285067223, + "kl_loss": 79.62696075439453, + "loss_ib": 0.8360808491706848, + "step": 283 + }, + { + "ce_ib": 38.87461853027344, + "ce_orig": 0.7815361618995667, + "epoch": 0.08138615285067223, + "kl_loss": 73.88887023925781, + "loss_ib": 0.7777632474899292, + "step": 283 + }, + { + "ce_ib": 31.66046905517578, + "ce_orig": 1.1096495389938354, + "epoch": 0.08167373642964987, + "kl_loss": 92.97333526611328, + "loss_ib": 0.9613937735557556, + "step": 284 + }, + { + "ce_ib": 36.92252731323242, + "ce_orig": 0.7650741338729858, + "epoch": 0.08167373642964987, + "kl_loss": 67.56904602050781, + "loss_ib": 0.7126129865646362, + "step": 284 + }, + { + "ce_ib": 40.48139953613281, + "ce_orig": 1.4183719158172607, + "epoch": 0.08167373642964987, + "kl_loss": 63.496063232421875, + "loss_ib": 0.6754420399665833, + "step": 284 + }, + { + "ce_ib": 31.517629623413086, + "ce_orig": 0.8108515739440918, + "epoch": 0.08167373642964987, + "kl_loss": 62.563720703125, + "loss_ib": 0.6571548581123352, + "step": 284 + }, + { + "epoch": 0.0819613200086275, + "grad_norm": 30.389598846435547, + "learning_rate": 8.885350318471338e-06, + "loss": 2.0103, + "step": 285 + }, + { + "ce_ib": 32.3922119140625, + "ce_orig": 0.8194482326507568, + "epoch": 0.0819613200086275, + "kl_loss": 72.32354736328125, + "loss_ib": 0.7556276321411133, + "step": 285 + }, + { + "ce_ib": 38.804710388183594, + "ce_orig": 0.9490994811058044, + "epoch": 0.0819613200086275, + "kl_loss": 65.6147689819336, + "loss_ib": 0.6949523687362671, + "step": 285 + }, + { + "ce_ib": 34.912132263183594, + "ce_orig": 1.0493797063827515, + "epoch": 0.0819613200086275, + "kl_loss": 53.544273376464844, + "loss_ib": 0.5703548789024353, + "step": 285 + }, + { + "ce_ib": 32.628414154052734, + "ce_orig": 0.8485954403877258, + "epoch": 0.0819613200086275, + "kl_loss": 102.72671508789062, + "loss_ib": 1.0598955154418945, + "step": 285 + }, + { + "ce_ib": 37.96284866333008, + "ce_orig": 1.6377449035644531, + "epoch": 0.08224890358760514, + "kl_loss": 51.751041412353516, + "loss_ib": 0.555473268032074, + "step": 286 + }, + { + "ce_ib": 37.74256134033203, + "ce_orig": 1.1003116369247437, + "epoch": 0.08224890358760514, + "kl_loss": 77.45193481445312, + "loss_ib": 0.8122618794441223, + "step": 286 + }, + { + "ce_ib": 36.4458122253418, + "ce_orig": 1.4265295267105103, + "epoch": 0.08224890358760514, + "kl_loss": 75.32568359375, + "loss_ib": 0.7897026538848877, + "step": 286 + }, + { + "ce_ib": 38.405860900878906, + "ce_orig": 1.6222838163375854, + "epoch": 0.08224890358760514, + "kl_loss": 59.407928466796875, + "loss_ib": 0.6324851512908936, + "step": 286 + }, + { + "ce_ib": 32.6006965637207, + "ce_orig": 0.6943832635879517, + "epoch": 0.0825364871665828, + "kl_loss": 55.28700256347656, + "loss_ib": 0.5854707360267639, + "step": 287 + }, + { + "ce_ib": 36.58828353881836, + "ce_orig": 1.536011815071106, + "epoch": 0.0825364871665828, + "kl_loss": 66.80679321289062, + "loss_ib": 0.704656183719635, + "step": 287 + }, + { + "ce_ib": 28.62580680847168, + "ce_orig": 0.7646328210830688, + "epoch": 0.0825364871665828, + "kl_loss": 55.685218811035156, + "loss_ib": 0.5854779481887817, + "step": 287 + }, + { + "ce_ib": 33.84457778930664, + "ce_orig": 0.9994617700576782, + "epoch": 0.0825364871665828, + "kl_loss": 58.716033935546875, + "loss_ib": 0.6210048794746399, + "step": 287 + }, + { + "ce_ib": 30.92181968688965, + "ce_orig": 1.1378878355026245, + "epoch": 0.08282407074556043, + "kl_loss": 56.736080169677734, + "loss_ib": 0.5982826352119446, + "step": 288 + }, + { + "ce_ib": 33.0194091796875, + "ce_orig": 0.7808621525764465, + "epoch": 0.08282407074556043, + "kl_loss": 66.98883056640625, + "loss_ib": 0.7029076814651489, + "step": 288 + }, + { + "ce_ib": 34.579010009765625, + "ce_orig": 0.7823261022567749, + "epoch": 0.08282407074556043, + "kl_loss": 51.778160095214844, + "loss_ib": 0.5523605942726135, + "step": 288 + }, + { + "ce_ib": 31.606508255004883, + "ce_orig": 0.74196857213974, + "epoch": 0.08282407074556043, + "kl_loss": 49.30632019042969, + "loss_ib": 0.5246697068214417, + "step": 288 + }, + { + "ce_ib": 34.344478607177734, + "ce_orig": 0.6873656511306763, + "epoch": 0.08311165432453807, + "kl_loss": 53.74017333984375, + "loss_ib": 0.5717462301254272, + "step": 289 + }, + { + "ce_ib": 33.72829818725586, + "ce_orig": 1.6017377376556396, + "epoch": 0.08311165432453807, + "kl_loss": 46.42070770263672, + "loss_ib": 0.49793535470962524, + "step": 289 + }, + { + "ce_ib": 31.348825454711914, + "ce_orig": 1.2451566457748413, + "epoch": 0.08311165432453807, + "kl_loss": 46.207916259765625, + "loss_ib": 0.49342799186706543, + "step": 289 + }, + { + "ce_ib": 31.967754364013672, + "ce_orig": 0.8155576586723328, + "epoch": 0.08311165432453807, + "kl_loss": 50.1954460144043, + "loss_ib": 0.5339221954345703, + "step": 289 + }, + { + "epoch": 0.08339923790351571, + "grad_norm": 17.25715446472168, + "learning_rate": 9.044585987261148e-06, + "loss": 1.716, + "step": 290 + }, + { + "ce_ib": 29.394428253173828, + "ce_orig": 0.6837283968925476, + "epoch": 0.08339923790351571, + "kl_loss": 50.952415466308594, + "loss_ib": 0.5389185547828674, + "step": 290 + }, + { + "ce_ib": 29.528079986572266, + "ce_orig": 0.9319428205490112, + "epoch": 0.08339923790351571, + "kl_loss": 49.36161804199219, + "loss_ib": 0.5231442451477051, + "step": 290 + }, + { + "ce_ib": 31.811763763427734, + "ce_orig": 1.0387821197509766, + "epoch": 0.08339923790351571, + "kl_loss": 46.64958572387695, + "loss_ib": 0.49830758571624756, + "step": 290 + }, + { + "ce_ib": 26.820646286010742, + "ce_orig": 1.016945719718933, + "epoch": 0.08339923790351571, + "kl_loss": 50.506019592285156, + "loss_ib": 0.531880795955658, + "step": 290 + }, + { + "ce_ib": 31.8367919921875, + "ce_orig": 1.025216817855835, + "epoch": 0.08368682148249335, + "kl_loss": 35.0272216796875, + "loss_ib": 0.38210898637771606, + "step": 291 + }, + { + "ce_ib": 33.226322174072266, + "ce_orig": 1.2911221981048584, + "epoch": 0.08368682148249335, + "kl_loss": 58.08740997314453, + "loss_ib": 0.6141003966331482, + "step": 291 + }, + { + "ce_ib": 32.05693435668945, + "ce_orig": 1.126991629600525, + "epoch": 0.08368682148249335, + "kl_loss": 47.8099479675293, + "loss_ib": 0.5101563930511475, + "step": 291 + }, + { + "ce_ib": 31.257158279418945, + "ce_orig": 0.95924311876297, + "epoch": 0.08368682148249335, + "kl_loss": 45.37004089355469, + "loss_ib": 0.48495757579803467, + "step": 291 + }, + { + "ce_ib": 30.047136306762695, + "ce_orig": 1.108436942100525, + "epoch": 0.083974405061471, + "kl_loss": 44.887725830078125, + "loss_ib": 0.47892439365386963, + "step": 292 + }, + { + "ce_ib": 29.945829391479492, + "ce_orig": 1.0467983484268188, + "epoch": 0.083974405061471, + "kl_loss": 35.674888610839844, + "loss_ib": 0.38669469952583313, + "step": 292 + }, + { + "ce_ib": 31.76511001586914, + "ce_orig": 1.2328693866729736, + "epoch": 0.083974405061471, + "kl_loss": 43.979034423828125, + "loss_ib": 0.4715554416179657, + "step": 292 + }, + { + "ce_ib": 32.23439407348633, + "ce_orig": 1.2534009218215942, + "epoch": 0.083974405061471, + "kl_loss": 38.1639518737793, + "loss_ib": 0.4138738811016083, + "step": 292 + }, + { + "ce_ib": 30.920150756835938, + "ce_orig": 0.8429147005081177, + "epoch": 0.08426198864044863, + "kl_loss": 42.72565460205078, + "loss_ib": 0.4581766724586487, + "step": 293 + }, + { + "ce_ib": 29.249128341674805, + "ce_orig": 1.381933331489563, + "epoch": 0.08426198864044863, + "kl_loss": 38.22590255737305, + "loss_ib": 0.41150814294815063, + "step": 293 + }, + { + "ce_ib": 27.40785026550293, + "ce_orig": 0.7817553877830505, + "epoch": 0.08426198864044863, + "kl_loss": 34.802711486816406, + "loss_ib": 0.375434935092926, + "step": 293 + }, + { + "ce_ib": 31.316415786743164, + "ce_orig": 0.8332533836364746, + "epoch": 0.08426198864044863, + "kl_loss": 44.91577911376953, + "loss_ib": 0.48047420382499695, + "step": 293 + }, + { + "ce_ib": 25.795753479003906, + "ce_orig": 1.2377901077270508, + "epoch": 0.08454957221942627, + "kl_loss": 40.89204406738281, + "loss_ib": 0.4347161650657654, + "step": 294 + }, + { + "ce_ib": 32.04143142700195, + "ce_orig": 0.7857619524002075, + "epoch": 0.08454957221942627, + "kl_loss": 36.315128326416016, + "loss_ib": 0.3951927125453949, + "step": 294 + }, + { + "ce_ib": 29.322782516479492, + "ce_orig": 0.8479982614517212, + "epoch": 0.08454957221942627, + "kl_loss": 43.97806167602539, + "loss_ib": 0.4691033959388733, + "step": 294 + }, + { + "ce_ib": 27.92338752746582, + "ce_orig": 0.9372240900993347, + "epoch": 0.08454957221942627, + "kl_loss": 34.369789123535156, + "loss_ib": 0.3716212809085846, + "step": 294 + }, + { + "epoch": 0.08483715579840391, + "grad_norm": 11.152618408203125, + "learning_rate": 9.203821656050957e-06, + "loss": 1.4786, + "step": 295 + }, + { + "ce_ib": 31.50551986694336, + "ce_orig": 0.7309855818748474, + "epoch": 0.08483715579840391, + "kl_loss": 36.99781799316406, + "loss_ib": 0.4014836847782135, + "step": 295 + }, + { + "ce_ib": 28.562597274780273, + "ce_orig": 0.9782903790473938, + "epoch": 0.08483715579840391, + "kl_loss": 40.10491180419922, + "loss_ib": 0.4296116828918457, + "step": 295 + }, + { + "ce_ib": 24.42827796936035, + "ce_orig": 0.8424835205078125, + "epoch": 0.08483715579840391, + "kl_loss": 33.25676727294922, + "loss_ib": 0.35699597001075745, + "step": 295 + }, + { + "ce_ib": 29.361215591430664, + "ce_orig": 0.6136335730552673, + "epoch": 0.08483715579840391, + "kl_loss": 38.58903121948242, + "loss_ib": 0.4152515232563019, + "step": 295 + }, + { + "ce_ib": 26.14788818359375, + "ce_orig": 0.85167396068573, + "epoch": 0.08512473937738155, + "kl_loss": 38.04214096069336, + "loss_ib": 0.4065692722797394, + "step": 296 + }, + { + "ce_ib": 29.764019012451172, + "ce_orig": 1.7308716773986816, + "epoch": 0.08512473937738155, + "kl_loss": 32.98516845703125, + "loss_ib": 0.3596157133579254, + "step": 296 + }, + { + "ce_ib": 30.012575149536133, + "ce_orig": 1.0343323945999146, + "epoch": 0.08512473937738155, + "kl_loss": 38.74031066894531, + "loss_ib": 0.41741567850112915, + "step": 296 + }, + { + "ce_ib": 30.881479263305664, + "ce_orig": 1.7702387571334839, + "epoch": 0.08512473937738155, + "kl_loss": 21.21739959716797, + "loss_ib": 0.24305547773838043, + "step": 296 + }, + { + "ce_ib": 24.717079162597656, + "ce_orig": 0.9549234509468079, + "epoch": 0.08541232295635919, + "kl_loss": 37.45452117919922, + "loss_ib": 0.39926227927207947, + "step": 297 + }, + { + "ce_ib": 26.27513885498047, + "ce_orig": 0.7507061958312988, + "epoch": 0.08541232295635919, + "kl_loss": 34.658172607421875, + "loss_ib": 0.37285685539245605, + "step": 297 + }, + { + "ce_ib": 26.04485321044922, + "ce_orig": 0.6326652765274048, + "epoch": 0.08541232295635919, + "kl_loss": 41.222965240478516, + "loss_ib": 0.4382745027542114, + "step": 297 + }, + { + "ce_ib": 22.960899353027344, + "ce_orig": 0.855808675289154, + "epoch": 0.08541232295635919, + "kl_loss": 32.55647277832031, + "loss_ib": 0.34852561354637146, + "step": 297 + }, + { + "ce_ib": 27.634048461914062, + "ce_orig": 0.8469107747077942, + "epoch": 0.08569990653533684, + "kl_loss": 52.79608917236328, + "loss_ib": 0.5555949211120605, + "step": 298 + }, + { + "ce_ib": 25.107303619384766, + "ce_orig": 1.0334007740020752, + "epoch": 0.08569990653533684, + "kl_loss": 23.502300262451172, + "loss_ib": 0.2601303160190582, + "step": 298 + }, + { + "ce_ib": 26.35793113708496, + "ce_orig": 0.8023892641067505, + "epoch": 0.08569990653533684, + "kl_loss": 33.53641128540039, + "loss_ib": 0.36172202229499817, + "step": 298 + }, + { + "ce_ib": 28.72397804260254, + "ce_orig": 0.8318001627922058, + "epoch": 0.08569990653533684, + "kl_loss": 28.708393096923828, + "loss_ib": 0.31580790877342224, + "step": 298 + }, + { + "ce_ib": 25.58355140686035, + "ce_orig": 0.6520025134086609, + "epoch": 0.08598749011431447, + "kl_loss": 27.046260833740234, + "loss_ib": 0.2960461676120758, + "step": 299 + }, + { + "ce_ib": 27.802156448364258, + "ce_orig": 0.7854000926017761, + "epoch": 0.08598749011431447, + "kl_loss": 28.843101501464844, + "loss_ib": 0.31623315811157227, + "step": 299 + }, + { + "ce_ib": 23.300045013427734, + "ce_orig": 0.8532059192657471, + "epoch": 0.08598749011431447, + "kl_loss": 27.97170639038086, + "loss_ib": 0.3030170798301697, + "step": 299 + }, + { + "ce_ib": 27.139148712158203, + "ce_orig": 0.8574588894844055, + "epoch": 0.08598749011431447, + "kl_loss": 65.27938842773438, + "loss_ib": 0.6799330115318298, + "step": 299 + }, + { + "epoch": 0.08627507369329211, + "grad_norm": 8.516314506530762, + "learning_rate": 9.363057324840765e-06, + "loss": 1.3038, + "step": 300 + }, + { + "ce_ib": 27.859683990478516, + "ce_orig": 0.7980900406837463, + "epoch": 0.08627507369329211, + "kl_loss": 31.5460262298584, + "loss_ib": 0.34331992268562317, + "step": 300 + }, + { + "ce_ib": 26.832685470581055, + "ce_orig": 1.1817421913146973, + "epoch": 0.08627507369329211, + "kl_loss": 36.501564025878906, + "loss_ib": 0.3918483257293701, + "step": 300 + }, + { + "ce_ib": 24.322065353393555, + "ce_orig": 1.163743495941162, + "epoch": 0.08627507369329211, + "kl_loss": 25.613513946533203, + "loss_ib": 0.28045719861984253, + "step": 300 + }, + { + "ce_ib": 23.191028594970703, + "ce_orig": 0.7588955760002136, + "epoch": 0.08627507369329211, + "kl_loss": 25.832067489624023, + "loss_ib": 0.28151169419288635, + "step": 300 + }, + { + "ce_ib": 28.026681900024414, + "ce_orig": 1.1685611009597778, + "epoch": 0.08656265727226975, + "kl_loss": 27.28811264038086, + "loss_ib": 0.30090779066085815, + "step": 301 + }, + { + "ce_ib": 28.903520584106445, + "ce_orig": 0.8756263852119446, + "epoch": 0.08656265727226975, + "kl_loss": 40.801658630371094, + "loss_ib": 0.43692007660865784, + "step": 301 + }, + { + "ce_ib": 32.01344299316406, + "ce_orig": 1.7789306640625, + "epoch": 0.08656265727226975, + "kl_loss": 28.048770904541016, + "loss_ib": 0.31250113248825073, + "step": 301 + }, + { + "ce_ib": 24.778104782104492, + "ce_orig": 1.0734585523605347, + "epoch": 0.08656265727226975, + "kl_loss": 26.787107467651367, + "loss_ib": 0.29264917969703674, + "step": 301 + }, + { + "ce_ib": 26.043542861938477, + "ce_orig": 0.6056478023529053, + "epoch": 0.08685024085124739, + "kl_loss": 22.7061767578125, + "loss_ib": 0.2531053125858307, + "step": 302 + }, + { + "ce_ib": 28.747360229492188, + "ce_orig": 0.8331350684165955, + "epoch": 0.08685024085124739, + "kl_loss": 35.54327392578125, + "loss_ib": 0.38418009877204895, + "step": 302 + }, + { + "ce_ib": 25.376556396484375, + "ce_orig": 0.9154051542282104, + "epoch": 0.08685024085124739, + "kl_loss": 17.989727020263672, + "loss_ib": 0.2052738070487976, + "step": 302 + }, + { + "ce_ib": 28.94438362121582, + "ce_orig": 1.2490043640136719, + "epoch": 0.08685024085124739, + "kl_loss": 38.229942321777344, + "loss_ib": 0.4112437963485718, + "step": 302 + }, + { + "ce_ib": 24.730915069580078, + "ce_orig": 1.1931933164596558, + "epoch": 0.08713782443022504, + "kl_loss": 29.647769927978516, + "loss_ib": 0.3212085962295532, + "step": 303 + }, + { + "ce_ib": 22.95311737060547, + "ce_orig": 1.0858471393585205, + "epoch": 0.08713782443022504, + "kl_loss": 30.164508819580078, + "loss_ib": 0.32459819316864014, + "step": 303 + }, + { + "ce_ib": 25.692777633666992, + "ce_orig": 1.2659823894500732, + "epoch": 0.08713782443022504, + "kl_loss": 45.62352752685547, + "loss_ib": 0.4819280505180359, + "step": 303 + }, + { + "ce_ib": 23.456066131591797, + "ce_orig": 0.6252316236495972, + "epoch": 0.08713782443022504, + "kl_loss": 23.17284393310547, + "loss_ib": 0.25518450140953064, + "step": 303 + }, + { + "ce_ib": 26.05988883972168, + "ce_orig": 1.3680285215377808, + "epoch": 0.08742540800920268, + "kl_loss": 24.49565315246582, + "loss_ib": 0.2710164189338684, + "step": 304 + }, + { + "ce_ib": 25.749126434326172, + "ce_orig": 1.3819550275802612, + "epoch": 0.08742540800920268, + "kl_loss": 28.92790412902832, + "loss_ib": 0.3150281608104706, + "step": 304 + }, + { + "ce_ib": 22.42483139038086, + "ce_orig": 0.5715821981430054, + "epoch": 0.08742540800920268, + "kl_loss": 22.742015838623047, + "loss_ib": 0.2498449832201004, + "step": 304 + }, + { + "ce_ib": 25.59157371520996, + "ce_orig": 1.4033931493759155, + "epoch": 0.08742540800920268, + "kl_loss": 30.259891510009766, + "loss_ib": 0.32819050550460815, + "step": 304 + }, + { + "epoch": 0.08771299158818031, + "grad_norm": 3.7502119541168213, + "learning_rate": 9.522292993630574e-06, + "loss": 1.2559, + "step": 305 + }, + { + "ce_ib": 29.969907760620117, + "ce_orig": 2.0007097721099854, + "epoch": 0.08771299158818031, + "kl_loss": 22.26645278930664, + "loss_ib": 0.2526344358921051, + "step": 305 + }, + { + "ce_ib": 19.34192657470703, + "ce_orig": 0.8707708716392517, + "epoch": 0.08771299158818031, + "kl_loss": 26.55549430847168, + "loss_ib": 0.2848968505859375, + "step": 305 + }, + { + "ce_ib": 21.973365783691406, + "ce_orig": 1.0064218044281006, + "epoch": 0.08771299158818031, + "kl_loss": 22.693504333496094, + "loss_ib": 0.2489084005355835, + "step": 305 + }, + { + "ce_ib": 26.216506958007812, + "ce_orig": 0.8900886178016663, + "epoch": 0.08771299158818031, + "kl_loss": 19.825634002685547, + "loss_ib": 0.22447283565998077, + "step": 305 + }, + { + "ce_ib": 23.15169906616211, + "ce_orig": 1.2219324111938477, + "epoch": 0.08800057516715795, + "kl_loss": 24.46666717529297, + "loss_ib": 0.2678183615207672, + "step": 306 + }, + { + "ce_ib": 22.739940643310547, + "ce_orig": 1.0926791429519653, + "epoch": 0.08800057516715795, + "kl_loss": 27.521900177001953, + "loss_ib": 0.29795894026756287, + "step": 306 + }, + { + "ce_ib": 25.519643783569336, + "ce_orig": 0.8576159477233887, + "epoch": 0.08800057516715795, + "kl_loss": 20.15566062927246, + "loss_ib": 0.2270762324333191, + "step": 306 + }, + { + "ce_ib": 21.95849609375, + "ce_orig": 0.7875012755393982, + "epoch": 0.08800057516715795, + "kl_loss": 23.06599998474121, + "loss_ib": 0.2526184916496277, + "step": 306 + }, + { + "ce_ib": 29.18073272705078, + "ce_orig": 2.031694173812866, + "epoch": 0.08828815874613559, + "kl_loss": 23.33743667602539, + "loss_ib": 0.2625550925731659, + "step": 307 + }, + { + "ce_ib": 21.970809936523438, + "ce_orig": 0.589923620223999, + "epoch": 0.08828815874613559, + "kl_loss": 26.482616424560547, + "loss_ib": 0.2867969572544098, + "step": 307 + }, + { + "ce_ib": 21.679189682006836, + "ce_orig": 0.5775496363639832, + "epoch": 0.08828815874613559, + "kl_loss": 48.13336181640625, + "loss_ib": 0.5030127763748169, + "step": 307 + }, + { + "ce_ib": 25.202064514160156, + "ce_orig": 0.4014778137207031, + "epoch": 0.08828815874613559, + "kl_loss": 27.67257308959961, + "loss_ib": 0.301927775144577, + "step": 307 + }, + { + "ce_ib": 27.295534133911133, + "ce_orig": 1.3986626863479614, + "epoch": 0.08857574232511324, + "kl_loss": 19.44972801208496, + "loss_ib": 0.2217928022146225, + "step": 308 + }, + { + "ce_ib": 26.677940368652344, + "ce_orig": 1.4669982194900513, + "epoch": 0.08857574232511324, + "kl_loss": 22.110187530517578, + "loss_ib": 0.24777980148792267, + "step": 308 + }, + { + "ce_ib": 25.397268295288086, + "ce_orig": 0.7194269299507141, + "epoch": 0.08857574232511324, + "kl_loss": 21.656837463378906, + "loss_ib": 0.2419656366109848, + "step": 308 + }, + { + "ce_ib": 23.615497589111328, + "ce_orig": 0.7230740189552307, + "epoch": 0.08857574232511324, + "kl_loss": 20.54231071472168, + "loss_ib": 0.22903859615325928, + "step": 308 + }, + { + "ce_ib": 25.946504592895508, + "ce_orig": 0.7649667263031006, + "epoch": 0.08886332590409088, + "kl_loss": 19.743709564208984, + "loss_ib": 0.2233835905790329, + "step": 309 + }, + { + "ce_ib": 25.53705596923828, + "ce_orig": 1.3177523612976074, + "epoch": 0.08886332590409088, + "kl_loss": 17.188051223754883, + "loss_ib": 0.19741755723953247, + "step": 309 + }, + { + "ce_ib": 23.944272994995117, + "ce_orig": 1.095251202583313, + "epoch": 0.08886332590409088, + "kl_loss": 32.30261993408203, + "loss_ib": 0.34697046875953674, + "step": 309 + }, + { + "ce_ib": 24.48792266845703, + "ce_orig": 1.1920284032821655, + "epoch": 0.08886332590409088, + "kl_loss": 22.399471282958984, + "loss_ib": 0.2484826296567917, + "step": 309 + }, + { + "epoch": 0.08915090948306852, + "grad_norm": 4.221822738647461, + "learning_rate": 9.681528662420384e-06, + "loss": 1.2441, + "step": 310 + }, + { + "ce_ib": 21.77839469909668, + "ce_orig": 0.7371479272842407, + "epoch": 0.08915090948306852, + "kl_loss": 20.585721969604492, + "loss_ib": 0.22763560712337494, + "step": 310 + }, + { + "ce_ib": 19.997831344604492, + "ce_orig": 0.6613921523094177, + "epoch": 0.08915090948306852, + "kl_loss": 19.700130462646484, + "loss_ib": 0.2169991284608841, + "step": 310 + }, + { + "ce_ib": 26.353721618652344, + "ce_orig": 1.7854292392730713, + "epoch": 0.08915090948306852, + "kl_loss": 17.48261260986328, + "loss_ib": 0.2011798471212387, + "step": 310 + }, + { + "ce_ib": 19.99268913269043, + "ce_orig": 0.6877007484436035, + "epoch": 0.08915090948306852, + "kl_loss": 21.342994689941406, + "loss_ib": 0.2334226369857788, + "step": 310 + }, + { + "ce_ib": 18.89698028564453, + "ce_orig": 0.30386409163475037, + "epoch": 0.08943849306204615, + "kl_loss": 36.01085662841797, + "loss_ib": 0.3790055513381958, + "step": 311 + }, + { + "ce_ib": 22.5959415435791, + "ce_orig": 0.44536206126213074, + "epoch": 0.08943849306204615, + "kl_loss": 23.401079177856445, + "loss_ib": 0.25660672783851624, + "step": 311 + }, + { + "ce_ib": 26.67799186706543, + "ce_orig": 1.3645676374435425, + "epoch": 0.08943849306204615, + "kl_loss": 21.983440399169922, + "loss_ib": 0.24651238322257996, + "step": 311 + }, + { + "ce_ib": 27.179054260253906, + "ce_orig": 1.9106354713439941, + "epoch": 0.08943849306204615, + "kl_loss": 18.935150146484375, + "loss_ib": 0.21653054654598236, + "step": 311 + }, + { + "ce_ib": 21.86924934387207, + "ce_orig": 0.520060658454895, + "epoch": 0.08972607664102379, + "kl_loss": 16.354835510253906, + "loss_ib": 0.18541759252548218, + "step": 312 + }, + { + "ce_ib": 21.63800621032715, + "ce_orig": 1.0606663227081299, + "epoch": 0.08972607664102379, + "kl_loss": 43.62150573730469, + "loss_ib": 0.4578530490398407, + "step": 312 + }, + { + "ce_ib": 23.62603187561035, + "ce_orig": 1.3403759002685547, + "epoch": 0.08972607664102379, + "kl_loss": 19.3665771484375, + "loss_ib": 0.21729178726673126, + "step": 312 + }, + { + "ce_ib": 19.89155387878418, + "ce_orig": 0.7725546956062317, + "epoch": 0.08972607664102379, + "kl_loss": 18.36844253540039, + "loss_ib": 0.2035759687423706, + "step": 312 + }, + { + "ce_ib": 22.370769500732422, + "ce_orig": 0.6480394601821899, + "epoch": 0.09001366022000144, + "kl_loss": 17.688186645507812, + "loss_ib": 0.1992526352405548, + "step": 313 + }, + { + "ce_ib": 24.45700454711914, + "ce_orig": 0.5826147198677063, + "epoch": 0.09001366022000144, + "kl_loss": 18.841575622558594, + "loss_ib": 0.21287274360656738, + "step": 313 + }, + { + "ce_ib": 25.685205459594727, + "ce_orig": 1.5794010162353516, + "epoch": 0.09001366022000144, + "kl_loss": 16.19678497314453, + "loss_ib": 0.18765303492546082, + "step": 313 + }, + { + "ce_ib": 25.032880783081055, + "ce_orig": 1.691644549369812, + "epoch": 0.09001366022000144, + "kl_loss": 16.912708282470703, + "loss_ib": 0.19415995478630066, + "step": 313 + }, + { + "ce_ib": 19.637710571289062, + "ce_orig": 0.5982837080955505, + "epoch": 0.09030124379897908, + "kl_loss": 16.402633666992188, + "loss_ib": 0.18366405367851257, + "step": 314 + }, + { + "ce_ib": 22.416698455810547, + "ce_orig": 0.7960580587387085, + "epoch": 0.09030124379897908, + "kl_loss": 21.684656143188477, + "loss_ib": 0.23926326632499695, + "step": 314 + }, + { + "ce_ib": 24.144296646118164, + "ce_orig": 1.1491901874542236, + "epoch": 0.09030124379897908, + "kl_loss": 16.15469741821289, + "loss_ib": 0.1856912523508072, + "step": 314 + }, + { + "ce_ib": 22.414451599121094, + "ce_orig": 0.6498157382011414, + "epoch": 0.09030124379897908, + "kl_loss": 15.109755516052246, + "loss_ib": 0.17351199686527252, + "step": 314 + }, + { + "epoch": 0.09058882737795672, + "grad_norm": 1.5902462005615234, + "learning_rate": 9.840764331210191e-06, + "loss": 1.2124, + "step": 315 + }, + { + "ce_ib": 26.398815155029297, + "ce_orig": 1.2464390993118286, + "epoch": 0.09058882737795672, + "kl_loss": 15.754015922546387, + "loss_ib": 0.18393898010253906, + "step": 315 + }, + { + "ce_ib": 19.097566604614258, + "ce_orig": 0.7237119674682617, + "epoch": 0.09058882737795672, + "kl_loss": 21.74990463256836, + "loss_ib": 0.23659659922122955, + "step": 315 + }, + { + "ce_ib": 15.259860038757324, + "ce_orig": 0.2898668348789215, + "epoch": 0.09058882737795672, + "kl_loss": 29.279403686523438, + "loss_ib": 0.3080538809299469, + "step": 315 + }, + { + "ce_ib": 19.407550811767578, + "ce_orig": 0.8700235486030579, + "epoch": 0.09058882737795672, + "kl_loss": 19.4202938079834, + "loss_ib": 0.2136104851961136, + "step": 315 + }, + { + "ce_ib": 18.41707992553711, + "ce_orig": 0.8072125911712646, + "epoch": 0.09087641095693436, + "kl_loss": 20.386289596557617, + "loss_ib": 0.22227996587753296, + "step": 316 + }, + { + "ce_ib": 23.172889709472656, + "ce_orig": 0.4192945659160614, + "epoch": 0.09087641095693436, + "kl_loss": 16.535175323486328, + "loss_ib": 0.18852464854717255, + "step": 316 + }, + { + "ce_ib": 25.58465003967285, + "ce_orig": 0.7333693504333496, + "epoch": 0.09087641095693436, + "kl_loss": 16.984933853149414, + "loss_ib": 0.19543398916721344, + "step": 316 + }, + { + "ce_ib": 23.119829177856445, + "ce_orig": 1.2351598739624023, + "epoch": 0.09087641095693436, + "kl_loss": 17.573955535888672, + "loss_ib": 0.19885937869548798, + "step": 316 + }, + { + "ce_ib": 20.77093505859375, + "ce_orig": 0.638796865940094, + "epoch": 0.091163994535912, + "kl_loss": 13.646297454833984, + "loss_ib": 0.15723390877246857, + "step": 317 + }, + { + "ce_ib": 22.370588302612305, + "ce_orig": 0.7030758857727051, + "epoch": 0.091163994535912, + "kl_loss": 19.98332977294922, + "loss_ib": 0.22220388054847717, + "step": 317 + }, + { + "ce_ib": 23.507246017456055, + "ce_orig": 0.9605042934417725, + "epoch": 0.091163994535912, + "kl_loss": 17.364158630371094, + "loss_ib": 0.19714882969856262, + "step": 317 + }, + { + "ce_ib": 25.993309020996094, + "ce_orig": 1.3534818887710571, + "epoch": 0.091163994535912, + "kl_loss": 15.692447662353516, + "loss_ib": 0.1829177886247635, + "step": 317 + }, + { + "ce_ib": 26.447153091430664, + "ce_orig": 0.6662286520004272, + "epoch": 0.09145157811488965, + "kl_loss": 19.282215118408203, + "loss_ib": 0.2192692905664444, + "step": 318 + }, + { + "ce_ib": 21.66655158996582, + "ce_orig": 0.5830262899398804, + "epoch": 0.09145157811488965, + "kl_loss": 16.306236267089844, + "loss_ib": 0.18472890555858612, + "step": 318 + }, + { + "ce_ib": 25.510692596435547, + "ce_orig": 1.2415066957473755, + "epoch": 0.09145157811488965, + "kl_loss": 15.806875228881836, + "loss_ib": 0.1835794448852539, + "step": 318 + }, + { + "ce_ib": 19.910005569458008, + "ce_orig": 0.7447091341018677, + "epoch": 0.09145157811488965, + "kl_loss": 17.13539695739746, + "loss_ib": 0.19126397371292114, + "step": 318 + }, + { + "ce_ib": 24.191679000854492, + "ce_orig": 1.3691377639770508, + "epoch": 0.09173916169386728, + "kl_loss": 14.619853973388672, + "loss_ib": 0.17039021849632263, + "step": 319 + }, + { + "ce_ib": 24.796480178833008, + "ce_orig": 1.0005704164505005, + "epoch": 0.09173916169386728, + "kl_loss": 23.08795738220215, + "loss_ib": 0.2556760609149933, + "step": 319 + }, + { + "ce_ib": 18.99901580810547, + "ce_orig": 0.8472815155982971, + "epoch": 0.09173916169386728, + "kl_loss": 16.455188751220703, + "loss_ib": 0.1835509091615677, + "step": 319 + }, + { + "ce_ib": 23.486665725708008, + "ce_orig": 0.9926466345787048, + "epoch": 0.09173916169386728, + "kl_loss": 13.656463623046875, + "loss_ib": 0.16005130112171173, + "step": 319 + }, + { + "epoch": 0.09202674527284492, + "grad_norm": 1.6402511596679688, + "learning_rate": 1e-05, + "loss": 1.138, + "step": 320 + }, + { + "ce_ib": 17.978233337402344, + "ce_orig": 0.7672592401504517, + "epoch": 0.09202674527284492, + "kl_loss": 16.61567497253418, + "loss_ib": 0.18413497507572174, + "step": 320 + }, + { + "ce_ib": 22.24078369140625, + "ce_orig": 1.269389271736145, + "epoch": 0.09202674527284492, + "kl_loss": 13.635717391967773, + "loss_ib": 0.1585979461669922, + "step": 320 + }, + { + "ce_ib": 24.34579086303711, + "ce_orig": 0.9150453209877014, + "epoch": 0.09202674527284492, + "kl_loss": 17.23645782470703, + "loss_ib": 0.19671037793159485, + "step": 320 + }, + { + "ce_ib": 20.447195053100586, + "ce_orig": 0.7540422081947327, + "epoch": 0.09202674527284492, + "kl_loss": 16.120819091796875, + "loss_ib": 0.1816553771495819, + "step": 320 + }, + { + "ce_ib": 20.8408145904541, + "ce_orig": 0.6497412919998169, + "epoch": 0.09231432885182256, + "kl_loss": 13.179418563842773, + "loss_ib": 0.15263499319553375, + "step": 321 + }, + { + "ce_ib": 18.48600196838379, + "ce_orig": 0.9051377773284912, + "epoch": 0.09231432885182256, + "kl_loss": 13.9927339553833, + "loss_ib": 0.1584133356809616, + "step": 321 + }, + { + "ce_ib": 22.45148277282715, + "ce_orig": 1.3368862867355347, + "epoch": 0.09231432885182256, + "kl_loss": 13.805723190307617, + "loss_ib": 0.16050870716571808, + "step": 321 + }, + { + "ce_ib": 22.361835479736328, + "ce_orig": 0.4947150647640228, + "epoch": 0.09231432885182256, + "kl_loss": 13.823465347290039, + "loss_ib": 0.16059647500514984, + "step": 321 + }, + { + "ce_ib": 27.675626754760742, + "ce_orig": 1.5604900121688843, + "epoch": 0.0926019124308002, + "kl_loss": 12.6363525390625, + "loss_ib": 0.1540391445159912, + "step": 322 + }, + { + "ce_ib": 21.165691375732422, + "ce_orig": 0.9618255496025085, + "epoch": 0.0926019124308002, + "kl_loss": 18.356464385986328, + "loss_ib": 0.2047303318977356, + "step": 322 + }, + { + "ce_ib": 17.5034122467041, + "ce_orig": 0.6583025455474854, + "epoch": 0.0926019124308002, + "kl_loss": 14.965330123901367, + "loss_ib": 0.16715671122074127, + "step": 322 + }, + { + "ce_ib": 19.85868263244629, + "ce_orig": 0.8282234072685242, + "epoch": 0.0926019124308002, + "kl_loss": 14.633644104003906, + "loss_ib": 0.1661951243877411, + "step": 322 + }, + { + "ce_ib": 18.45901870727539, + "ce_orig": 0.7398732304573059, + "epoch": 0.09288949600977785, + "kl_loss": 15.228702545166016, + "loss_ib": 0.17074604332447052, + "step": 323 + }, + { + "ce_ib": 21.03128433227539, + "ce_orig": 0.8697280883789062, + "epoch": 0.09288949600977785, + "kl_loss": 14.359245300292969, + "loss_ib": 0.16462373733520508, + "step": 323 + }, + { + "ce_ib": 18.499732971191406, + "ce_orig": 0.5559062361717224, + "epoch": 0.09288949600977785, + "kl_loss": 14.474294662475586, + "loss_ib": 0.16324268281459808, + "step": 323 + }, + { + "ce_ib": 17.314205169677734, + "ce_orig": 0.5949759483337402, + "epoch": 0.09288949600977785, + "kl_loss": 12.18869400024414, + "loss_ib": 0.13920114934444427, + "step": 323 + }, + { + "ce_ib": 22.12590789794922, + "ce_orig": 1.2088953256607056, + "epoch": 0.09317707958875548, + "kl_loss": 12.109560012817383, + "loss_ib": 0.14322151243686676, + "step": 324 + }, + { + "ce_ib": 19.701847076416016, + "ce_orig": 0.6696236729621887, + "epoch": 0.09317707958875548, + "kl_loss": 14.687576293945312, + "loss_ib": 0.16657760739326477, + "step": 324 + }, + { + "ce_ib": 22.29142951965332, + "ce_orig": 1.1997344493865967, + "epoch": 0.09317707958875548, + "kl_loss": 12.63182258605957, + "loss_ib": 0.14860965311527252, + "step": 324 + }, + { + "ce_ib": 20.73260498046875, + "ce_orig": 0.9823715090751648, + "epoch": 0.09317707958875548, + "kl_loss": 12.120622634887695, + "loss_ib": 0.14193882048130035, + "step": 324 + }, + { + "epoch": 0.09346466316773312, + "grad_norm": 1.5406866073608398, + "learning_rate": 9.999993976919739e-06, + "loss": 1.1078, + "step": 325 + }, + { + "ce_ib": 17.106014251708984, + "ce_orig": 0.9145632982254028, + "epoch": 0.09346466316773312, + "kl_loss": 14.363018989562988, + "loss_ib": 0.16073618829250336, + "step": 325 + }, + { + "ce_ib": 19.175251007080078, + "ce_orig": 0.7700109481811523, + "epoch": 0.09346466316773312, + "kl_loss": 14.140527725219727, + "loss_ib": 0.16058051586151123, + "step": 325 + }, + { + "ce_ib": 15.837416648864746, + "ce_orig": 0.772528886795044, + "epoch": 0.09346466316773312, + "kl_loss": 12.842889785766602, + "loss_ib": 0.14426632225513458, + "step": 325 + }, + { + "ce_ib": 15.72890853881836, + "ce_orig": 0.7171897888183594, + "epoch": 0.09346466316773312, + "kl_loss": 11.653585433959961, + "loss_ib": 0.13226476311683655, + "step": 325 + }, + { + "ce_ib": 26.823095321655273, + "ce_orig": 1.9181299209594727, + "epoch": 0.09375224674671076, + "kl_loss": 11.220107078552246, + "loss_ib": 0.13902415335178375, + "step": 326 + }, + { + "ce_ib": 19.747053146362305, + "ce_orig": 0.9309285283088684, + "epoch": 0.09375224674671076, + "kl_loss": 10.759939193725586, + "loss_ib": 0.1273464411497116, + "step": 326 + }, + { + "ce_ib": 21.19155502319336, + "ce_orig": 1.635532259941101, + "epoch": 0.09375224674671076, + "kl_loss": 12.692992210388184, + "loss_ib": 0.14812147617340088, + "step": 326 + }, + { + "ce_ib": 21.15056037902832, + "ce_orig": 0.7623363137245178, + "epoch": 0.09375224674671076, + "kl_loss": 13.625322341918945, + "loss_ib": 0.15740378201007843, + "step": 326 + }, + { + "ce_ib": 16.852680206298828, + "ce_orig": 0.7469913363456726, + "epoch": 0.0940398303256884, + "kl_loss": 10.560868263244629, + "loss_ib": 0.12246136367321014, + "step": 327 + }, + { + "ce_ib": 16.11965560913086, + "ce_orig": 0.752680242061615, + "epoch": 0.0940398303256884, + "kl_loss": 12.737372398376465, + "loss_ib": 0.14349336922168732, + "step": 327 + }, + { + "ce_ib": 18.678325653076172, + "ce_orig": 0.8419510722160339, + "epoch": 0.0940398303256884, + "kl_loss": 12.982912063598633, + "loss_ib": 0.14850744605064392, + "step": 327 + }, + { + "ce_ib": 20.507835388183594, + "ce_orig": 0.933698296546936, + "epoch": 0.0940398303256884, + "kl_loss": 11.535351753234863, + "loss_ib": 0.1358613520860672, + "step": 327 + }, + { + "ce_ib": 16.491111755371094, + "ce_orig": 0.4157365560531616, + "epoch": 0.09432741390466605, + "kl_loss": 11.347173690795898, + "loss_ib": 0.12996284663677216, + "step": 328 + }, + { + "ce_ib": 20.290693283081055, + "ce_orig": 0.6378398537635803, + "epoch": 0.09432741390466605, + "kl_loss": 10.955358505249023, + "loss_ib": 0.1298442780971527, + "step": 328 + }, + { + "ce_ib": 20.74325942993164, + "ce_orig": 0.9873928427696228, + "epoch": 0.09432741390466605, + "kl_loss": 11.168992042541504, + "loss_ib": 0.13243317604064941, + "step": 328 + }, + { + "ce_ib": 22.277246475219727, + "ce_orig": 1.0927495956420898, + "epoch": 0.09432741390466605, + "kl_loss": 14.169788360595703, + "loss_ib": 0.16397511959075928, + "step": 328 + }, + { + "ce_ib": 23.106748580932617, + "ce_orig": 1.1934113502502441, + "epoch": 0.09461499748364369, + "kl_loss": 11.351577758789062, + "loss_ib": 0.13662251830101013, + "step": 329 + }, + { + "ce_ib": 19.171072006225586, + "ce_orig": 0.9011801481246948, + "epoch": 0.09461499748364369, + "kl_loss": 11.967233657836914, + "loss_ib": 0.13884340226650238, + "step": 329 + }, + { + "ce_ib": 19.95760726928711, + "ce_orig": 0.6395582556724548, + "epoch": 0.09461499748364369, + "kl_loss": 11.200337409973145, + "loss_ib": 0.13196097314357758, + "step": 329 + }, + { + "ce_ib": 20.071157455444336, + "ce_orig": 0.9473389983177185, + "epoch": 0.09461499748364369, + "kl_loss": 14.839773178100586, + "loss_ib": 0.1684688925743103, + "step": 329 + }, + { + "epoch": 0.09490258106262132, + "grad_norm": 0.9824780821800232, + "learning_rate": 9.999975907693462e-06, + "loss": 1.1087, + "step": 330 + }, + { + "ce_ib": 20.931066513061523, + "ce_orig": 1.4924328327178955, + "epoch": 0.09490258106262132, + "kl_loss": 10.959052085876465, + "loss_ib": 0.13052158057689667, + "step": 330 + }, + { + "ce_ib": 23.309009552001953, + "ce_orig": 0.8217906951904297, + "epoch": 0.09490258106262132, + "kl_loss": 13.162786483764648, + "loss_ib": 0.15493687987327576, + "step": 330 + }, + { + "ce_ib": 19.14462661743164, + "ce_orig": 0.7224079966545105, + "epoch": 0.09490258106262132, + "kl_loss": 15.73659610748291, + "loss_ib": 0.17651057243347168, + "step": 330 + }, + { + "ce_ib": 16.93087387084961, + "ce_orig": 0.3897332549095154, + "epoch": 0.09490258106262132, + "kl_loss": 9.802057266235352, + "loss_ib": 0.11495144665241241, + "step": 330 + }, + { + "ce_ib": 20.574167251586914, + "ce_orig": 0.7844420075416565, + "epoch": 0.09519016464159896, + "kl_loss": 12.472640991210938, + "loss_ib": 0.14530058205127716, + "step": 331 + }, + { + "ce_ib": 21.064104080200195, + "ce_orig": 0.8571724891662598, + "epoch": 0.09519016464159896, + "kl_loss": 12.077247619628906, + "loss_ib": 0.14183658361434937, + "step": 331 + }, + { + "ce_ib": 19.205732345581055, + "ce_orig": 0.591139554977417, + "epoch": 0.09519016464159896, + "kl_loss": 10.037530899047852, + "loss_ib": 0.11958103626966476, + "step": 331 + }, + { + "ce_ib": 19.454252243041992, + "ce_orig": 0.9017695188522339, + "epoch": 0.09519016464159896, + "kl_loss": 10.572440147399902, + "loss_ib": 0.12517865002155304, + "step": 331 + }, + { + "ce_ib": 15.203242301940918, + "ce_orig": 0.5361948013305664, + "epoch": 0.0954777482205766, + "kl_loss": 10.661357879638672, + "loss_ib": 0.12181682139635086, + "step": 332 + }, + { + "ce_ib": 21.122093200683594, + "ce_orig": 1.4699267148971558, + "epoch": 0.0954777482205766, + "kl_loss": 9.819560050964355, + "loss_ib": 0.1193176880478859, + "step": 332 + }, + { + "ce_ib": 19.769392013549805, + "ce_orig": 0.5775290131568909, + "epoch": 0.0954777482205766, + "kl_loss": 9.717262268066406, + "loss_ib": 0.11694201081991196, + "step": 332 + }, + { + "ce_ib": 18.72998046875, + "ce_orig": 0.5287953615188599, + "epoch": 0.0954777482205766, + "kl_loss": 13.474811553955078, + "loss_ib": 0.15347810089588165, + "step": 332 + }, + { + "ce_ib": 14.733593940734863, + "ce_orig": 0.4213125705718994, + "epoch": 0.09576533179955425, + "kl_loss": 11.17019271850586, + "loss_ib": 0.1264355182647705, + "step": 333 + }, + { + "ce_ib": 21.969772338867188, + "ce_orig": 1.0359489917755127, + "epoch": 0.09576533179955425, + "kl_loss": 12.137819290161133, + "loss_ib": 0.14334796369075775, + "step": 333 + }, + { + "ce_ib": 19.76876449584961, + "ce_orig": 0.8385518193244934, + "epoch": 0.09576533179955425, + "kl_loss": 12.675048828125, + "loss_ib": 0.1465192437171936, + "step": 333 + }, + { + "ce_ib": 16.6859188079834, + "ce_orig": 0.7033459544181824, + "epoch": 0.09576533179955425, + "kl_loss": 11.24110221862793, + "loss_ib": 0.12909694015979767, + "step": 333 + }, + { + "ce_ib": 18.942955017089844, + "ce_orig": 0.6511563062667847, + "epoch": 0.09605291537853189, + "kl_loss": 12.197677612304688, + "loss_ib": 0.1409197300672531, + "step": 334 + }, + { + "ce_ib": 12.525162696838379, + "ce_orig": 0.2835647463798523, + "epoch": 0.09605291537853189, + "kl_loss": 8.74501895904541, + "loss_ib": 0.0999753549695015, + "step": 334 + }, + { + "ce_ib": 19.1585693359375, + "ce_orig": 0.5772603750228882, + "epoch": 0.09605291537853189, + "kl_loss": 11.537700653076172, + "loss_ib": 0.13453558087348938, + "step": 334 + }, + { + "ce_ib": 18.65268898010254, + "ce_orig": 0.6586172580718994, + "epoch": 0.09605291537853189, + "kl_loss": 9.829732894897461, + "loss_ib": 0.11695001274347305, + "step": 334 + }, + { + "epoch": 0.09634049895750953, + "grad_norm": 1.5527466535568237, + "learning_rate": 9.999945792364704e-06, + "loss": 1.0047, + "step": 335 + }, + { + "ce_ib": 18.45952796936035, + "ce_orig": 0.9835706949234009, + "epoch": 0.09634049895750953, + "kl_loss": 11.043167114257812, + "loss_ib": 0.1288911998271942, + "step": 335 + }, + { + "ce_ib": 19.220260620117188, + "ce_orig": 1.0070465803146362, + "epoch": 0.09634049895750953, + "kl_loss": 9.694038391113281, + "loss_ib": 0.11616063863039017, + "step": 335 + }, + { + "ce_ib": 20.90534019470215, + "ce_orig": 0.7721905708312988, + "epoch": 0.09634049895750953, + "kl_loss": 10.956405639648438, + "loss_ib": 0.1304693967103958, + "step": 335 + }, + { + "ce_ib": 22.549360275268555, + "ce_orig": 0.7169628143310547, + "epoch": 0.09634049895750953, + "kl_loss": 10.45715618133545, + "loss_ib": 0.12712092697620392, + "step": 335 + }, + { + "ce_ib": 19.706451416015625, + "ce_orig": 1.2544941902160645, + "epoch": 0.09662808253648716, + "kl_loss": 5.737251281738281, + "loss_ib": 0.07707896083593369, + "step": 336 + }, + { + "ce_ib": 24.045684814453125, + "ce_orig": 1.628864049911499, + "epoch": 0.09662808253648716, + "kl_loss": 9.666478157043457, + "loss_ib": 0.12071046233177185, + "step": 336 + }, + { + "ce_ib": 22.39566993713379, + "ce_orig": 1.3797554969787598, + "epoch": 0.09662808253648716, + "kl_loss": 9.557376861572266, + "loss_ib": 0.11796943098306656, + "step": 336 + }, + { + "ce_ib": 16.502885818481445, + "ce_orig": 0.678697407245636, + "epoch": 0.09662808253648716, + "kl_loss": 12.349996566772461, + "loss_ib": 0.14000284671783447, + "step": 336 + }, + { + "ce_ib": 21.189701080322266, + "ce_orig": 1.7581653594970703, + "epoch": 0.0969156661154648, + "kl_loss": 11.109577178955078, + "loss_ib": 0.1322854608297348, + "step": 337 + }, + { + "ce_ib": 20.682483673095703, + "ce_orig": 1.0360445976257324, + "epoch": 0.0969156661154648, + "kl_loss": 9.650278091430664, + "loss_ib": 0.11718526482582092, + "step": 337 + }, + { + "ce_ib": 23.174293518066406, + "ce_orig": 0.9172191619873047, + "epoch": 0.0969156661154648, + "kl_loss": 10.747881889343262, + "loss_ib": 0.13065311312675476, + "step": 337 + }, + { + "ce_ib": 20.76695442199707, + "ce_orig": 0.5460869073867798, + "epoch": 0.0969156661154648, + "kl_loss": 11.630158424377441, + "loss_ib": 0.13706853985786438, + "step": 337 + }, + { + "ce_ib": 23.635868072509766, + "ce_orig": 1.7053964138031006, + "epoch": 0.09720324969444245, + "kl_loss": 9.801689147949219, + "loss_ib": 0.121652752161026, + "step": 338 + }, + { + "ce_ib": 19.619415283203125, + "ce_orig": 0.8643050193786621, + "epoch": 0.09720324969444245, + "kl_loss": 9.933218002319336, + "loss_ib": 0.11895159631967545, + "step": 338 + }, + { + "ce_ib": 21.83019256591797, + "ce_orig": 0.8322968482971191, + "epoch": 0.09720324969444245, + "kl_loss": 10.368824005126953, + "loss_ib": 0.12551842629909515, + "step": 338 + }, + { + "ce_ib": 18.191864013671875, + "ce_orig": 0.4908624589443207, + "epoch": 0.09720324969444245, + "kl_loss": 11.974782943725586, + "loss_ib": 0.1379396915435791, + "step": 338 + }, + { + "ce_ib": 20.153644561767578, + "ce_orig": 0.5820345282554626, + "epoch": 0.09749083327342009, + "kl_loss": 9.271571159362793, + "loss_ib": 0.11286935210227966, + "step": 339 + }, + { + "ce_ib": 16.755735397338867, + "ce_orig": 0.8004245758056641, + "epoch": 0.09749083327342009, + "kl_loss": 9.602378845214844, + "loss_ib": 0.11277952045202255, + "step": 339 + }, + { + "ce_ib": 21.61349868774414, + "ce_orig": 1.3253728151321411, + "epoch": 0.09749083327342009, + "kl_loss": 9.877376556396484, + "loss_ib": 0.12038726359605789, + "step": 339 + }, + { + "ce_ib": 23.785110473632812, + "ce_orig": 1.1449768543243408, + "epoch": 0.09749083327342009, + "kl_loss": 9.455681800842285, + "loss_ib": 0.11834193021059036, + "step": 339 + }, + { + "epoch": 0.09777841685239773, + "grad_norm": 0.7407243251800537, + "learning_rate": 9.999903631006022e-06, + "loss": 1.0521, + "step": 340 + }, + { + "ce_ib": 10.92531967163086, + "ce_orig": 0.2659711241722107, + "epoch": 0.09777841685239773, + "kl_loss": 7.5069732666015625, + "loss_ib": 0.08599505573511124, + "step": 340 + }, + { + "ce_ib": 19.90782356262207, + "ce_orig": 0.5355432033538818, + "epoch": 0.09777841685239773, + "kl_loss": 11.578774452209473, + "loss_ib": 0.13569556176662445, + "step": 340 + }, + { + "ce_ib": 15.142422676086426, + "ce_orig": 0.7282365560531616, + "epoch": 0.09777841685239773, + "kl_loss": 10.324485778808594, + "loss_ib": 0.11838727444410324, + "step": 340 + }, + { + "ce_ib": 20.629169464111328, + "ce_orig": 0.9958592057228088, + "epoch": 0.09777841685239773, + "kl_loss": 9.883302688598633, + "loss_ib": 0.11946219205856323, + "step": 340 + }, + { + "ce_ib": 18.038537979125977, + "ce_orig": 0.692686140537262, + "epoch": 0.09806600043137537, + "kl_loss": 9.38126277923584, + "loss_ib": 0.11185116320848465, + "step": 341 + }, + { + "ce_ib": 20.98015022277832, + "ce_orig": 0.5099575519561768, + "epoch": 0.09806600043137537, + "kl_loss": 9.678869247436523, + "loss_ib": 0.11776883900165558, + "step": 341 + }, + { + "ce_ib": 17.243499755859375, + "ce_orig": 0.692179799079895, + "epoch": 0.09806600043137537, + "kl_loss": 9.476663589477539, + "loss_ib": 0.11201013624668121, + "step": 341 + }, + { + "ce_ib": 25.067062377929688, + "ce_orig": 0.792171835899353, + "epoch": 0.09806600043137537, + "kl_loss": 10.635600090026855, + "loss_ib": 0.13142305612564087, + "step": 341 + }, + { + "ce_ib": 15.628839492797852, + "ce_orig": 0.7891074419021606, + "epoch": 0.098353584010353, + "kl_loss": 10.068859100341797, + "loss_ib": 0.11631742864847183, + "step": 342 + }, + { + "ce_ib": 18.483537673950195, + "ce_orig": 0.9051138758659363, + "epoch": 0.098353584010353, + "kl_loss": 10.133465766906738, + "loss_ib": 0.11981818825006485, + "step": 342 + }, + { + "ce_ib": 16.36567497253418, + "ce_orig": 0.6817749738693237, + "epoch": 0.098353584010353, + "kl_loss": 9.654040336608887, + "loss_ib": 0.1129060685634613, + "step": 342 + }, + { + "ce_ib": 20.89409828186035, + "ce_orig": 1.1035248041152954, + "epoch": 0.098353584010353, + "kl_loss": 10.291540145874023, + "loss_ib": 0.12380949407815933, + "step": 342 + }, + { + "ce_ib": 22.037933349609375, + "ce_orig": 1.694153904914856, + "epoch": 0.09864116758933066, + "kl_loss": 9.139327049255371, + "loss_ib": 0.11343120038509369, + "step": 343 + }, + { + "ce_ib": 21.519075393676758, + "ce_orig": 1.4431602954864502, + "epoch": 0.09864116758933066, + "kl_loss": 9.308493614196777, + "loss_ib": 0.11460401117801666, + "step": 343 + }, + { + "ce_ib": 15.5767240524292, + "ce_orig": 0.7400184273719788, + "epoch": 0.09864116758933066, + "kl_loss": 9.34182357788086, + "loss_ib": 0.10899496078491211, + "step": 343 + }, + { + "ce_ib": 14.478399276733398, + "ce_orig": 0.4603125751018524, + "epoch": 0.09864116758933066, + "kl_loss": 9.755362510681152, + "loss_ib": 0.11203201860189438, + "step": 343 + }, + { + "ce_ib": 17.020166397094727, + "ce_orig": 0.5673547387123108, + "epoch": 0.0989287511683083, + "kl_loss": 11.05903148651123, + "loss_ib": 0.1276104748249054, + "step": 344 + }, + { + "ce_ib": 17.05959701538086, + "ce_orig": 0.7991743683815002, + "epoch": 0.0989287511683083, + "kl_loss": 9.682877540588379, + "loss_ib": 0.11388836801052094, + "step": 344 + }, + { + "ce_ib": 23.936145782470703, + "ce_orig": 1.328629732131958, + "epoch": 0.0989287511683083, + "kl_loss": 10.604333877563477, + "loss_ib": 0.1299794763326645, + "step": 344 + }, + { + "ce_ib": 22.645984649658203, + "ce_orig": 1.3614290952682495, + "epoch": 0.0989287511683083, + "kl_loss": 9.37100601196289, + "loss_ib": 0.11635604500770569, + "step": 344 + }, + { + "epoch": 0.09921633474728593, + "grad_norm": 1.2907381057739258, + "learning_rate": 9.99984942371899e-06, + "loss": 1.0005, + "step": 345 + }, + { + "ce_ib": 19.495277404785156, + "ce_orig": 1.2335152626037598, + "epoch": 0.09921633474728593, + "kl_loss": 10.051846504211426, + "loss_ib": 0.12001373618841171, + "step": 345 + }, + { + "ce_ib": 16.080678939819336, + "ce_orig": 0.7765376567840576, + "epoch": 0.09921633474728593, + "kl_loss": 9.520978927612305, + "loss_ib": 0.11129046231508255, + "step": 345 + }, + { + "ce_ib": 17.06264305114746, + "ce_orig": 0.6252941489219666, + "epoch": 0.09921633474728593, + "kl_loss": 9.93945026397705, + "loss_ib": 0.11645714193582535, + "step": 345 + }, + { + "ce_ib": 18.10491371154785, + "ce_orig": 0.9270275831222534, + "epoch": 0.09921633474728593, + "kl_loss": 9.839916229248047, + "loss_ib": 0.11650407314300537, + "step": 345 + }, + { + "ce_ib": 21.288270950317383, + "ce_orig": 1.8509690761566162, + "epoch": 0.09950391832626357, + "kl_loss": 9.440530776977539, + "loss_ib": 0.1156935766339302, + "step": 346 + }, + { + "ce_ib": 16.040321350097656, + "ce_orig": 0.9570891261100769, + "epoch": 0.09950391832626357, + "kl_loss": 10.13039493560791, + "loss_ib": 0.11734427511692047, + "step": 346 + }, + { + "ce_ib": 20.773448944091797, + "ce_orig": 1.5323985815048218, + "epoch": 0.09950391832626357, + "kl_loss": 10.627010345458984, + "loss_ib": 0.12704354524612427, + "step": 346 + }, + { + "ce_ib": 13.598661422729492, + "ce_orig": 0.7118152379989624, + "epoch": 0.09950391832626357, + "kl_loss": 9.598701477050781, + "loss_ib": 0.10958567261695862, + "step": 346 + }, + { + "ce_ib": 13.722025871276855, + "ce_orig": 0.8525584936141968, + "epoch": 0.0997915019052412, + "kl_loss": 10.663893699645996, + "loss_ib": 0.12036096304655075, + "step": 347 + }, + { + "ce_ib": 17.72942352294922, + "ce_orig": 0.879482626914978, + "epoch": 0.0997915019052412, + "kl_loss": 10.045938491821289, + "loss_ib": 0.11818880587816238, + "step": 347 + }, + { + "ce_ib": 17.762847900390625, + "ce_orig": 1.0413190126419067, + "epoch": 0.0997915019052412, + "kl_loss": 10.000950813293457, + "loss_ib": 0.11777235567569733, + "step": 347 + }, + { + "ce_ib": 22.781930923461914, + "ce_orig": 1.1397019624710083, + "epoch": 0.0997915019052412, + "kl_loss": 9.38424301147461, + "loss_ib": 0.1166243627667427, + "step": 347 + }, + { + "ce_ib": 21.994335174560547, + "ce_orig": 0.43941164016723633, + "epoch": 0.10007908548421886, + "kl_loss": 9.569962501525879, + "loss_ib": 0.1176939532160759, + "step": 348 + }, + { + "ce_ib": 16.95915985107422, + "ce_orig": 0.5146183371543884, + "epoch": 0.10007908548421886, + "kl_loss": 9.597618103027344, + "loss_ib": 0.11293534189462662, + "step": 348 + }, + { + "ce_ib": 13.567898750305176, + "ce_orig": 0.6933156847953796, + "epoch": 0.10007908548421886, + "kl_loss": 9.457947731018066, + "loss_ib": 0.10814736783504486, + "step": 348 + }, + { + "ce_ib": 16.038314819335938, + "ce_orig": 0.9144206643104553, + "epoch": 0.10007908548421886, + "kl_loss": 9.451361656188965, + "loss_ib": 0.11055192351341248, + "step": 348 + }, + { + "ce_ib": 15.3102445602417, + "ce_orig": 0.6907638311386108, + "epoch": 0.1003666690631965, + "kl_loss": 8.99312973022461, + "loss_ib": 0.10524154454469681, + "step": 349 + }, + { + "ce_ib": 16.133337020874023, + "ce_orig": 0.885292649269104, + "epoch": 0.1003666690631965, + "kl_loss": 9.09972095489502, + "loss_ib": 0.10713054239749908, + "step": 349 + }, + { + "ce_ib": 21.13068962097168, + "ce_orig": 1.0056211948394775, + "epoch": 0.1003666690631965, + "kl_loss": 11.5963134765625, + "loss_ib": 0.13709382712841034, + "step": 349 + }, + { + "ce_ib": 20.446731567382812, + "ce_orig": 1.2884644269943237, + "epoch": 0.1003666690631965, + "kl_loss": 9.364080429077148, + "loss_ib": 0.1140875294804573, + "step": 349 + }, + { + "epoch": 0.10065425264217413, + "grad_norm": 0.45241594314575195, + "learning_rate": 9.999783170634207e-06, + "loss": 1.0049, + "step": 350 + }, + { + "ce_ib": 20.207548141479492, + "ce_orig": 0.9007079005241394, + "epoch": 0.10065425264217413, + "kl_loss": 8.826637268066406, + "loss_ib": 0.10847391933202744, + "step": 350 + }, + { + "ce_ib": 14.89907169342041, + "ce_orig": 0.848534882068634, + "epoch": 0.10065425264217413, + "kl_loss": 9.761069297790527, + "loss_ib": 0.11250976473093033, + "step": 350 + }, + { + "ce_ib": 20.37824058532715, + "ce_orig": 1.4016444683074951, + "epoch": 0.10065425264217413, + "kl_loss": 9.232507705688477, + "loss_ib": 0.11270332336425781, + "step": 350 + }, + { + "ce_ib": 16.76759910583496, + "ce_orig": 1.1012144088745117, + "epoch": 0.10065425264217413, + "kl_loss": 10.15268325805664, + "loss_ib": 0.11829442530870438, + "step": 350 + }, + { + "ce_ib": 15.976619720458984, + "ce_orig": 0.8754503726959229, + "epoch": 0.10094183622115177, + "kl_loss": 9.422735214233398, + "loss_ib": 0.11020396649837494, + "step": 351 + }, + { + "ce_ib": 19.49113655090332, + "ce_orig": 1.116249680519104, + "epoch": 0.10094183622115177, + "kl_loss": 9.386714935302734, + "loss_ib": 0.1133582815527916, + "step": 351 + }, + { + "ce_ib": 17.88022232055664, + "ce_orig": 0.9254348278045654, + "epoch": 0.10094183622115177, + "kl_loss": 9.275611877441406, + "loss_ib": 0.11063633859157562, + "step": 351 + }, + { + "ce_ib": 9.351910591125488, + "ce_orig": 0.16225206851959229, + "epoch": 0.10094183622115177, + "kl_loss": 9.214266777038574, + "loss_ib": 0.10149458050727844, + "step": 351 + }, + { + "ce_ib": 16.736501693725586, + "ce_orig": 0.8068060278892517, + "epoch": 0.10122941980012941, + "kl_loss": 9.62176513671875, + "loss_ib": 0.11295414716005325, + "step": 352 + }, + { + "ce_ib": 18.84697723388672, + "ce_orig": 0.6195511221885681, + "epoch": 0.10122941980012941, + "kl_loss": 9.747774124145508, + "loss_ib": 0.11632471531629562, + "step": 352 + }, + { + "ce_ib": 18.591981887817383, + "ce_orig": 0.8959230184555054, + "epoch": 0.10122941980012941, + "kl_loss": 8.57149887084961, + "loss_ib": 0.10430697351694107, + "step": 352 + }, + { + "ce_ib": 16.255172729492188, + "ce_orig": 0.7202159762382507, + "epoch": 0.10122941980012941, + "kl_loss": 12.734394073486328, + "loss_ib": 0.14359910786151886, + "step": 352 + }, + { + "ce_ib": 14.430315971374512, + "ce_orig": 0.9846038818359375, + "epoch": 0.10151700337910706, + "kl_loss": 9.04564094543457, + "loss_ib": 0.10488671809434891, + "step": 353 + }, + { + "ce_ib": 19.057903289794922, + "ce_orig": 0.708838701248169, + "epoch": 0.10151700337910706, + "kl_loss": 8.713069915771484, + "loss_ib": 0.10618860274553299, + "step": 353 + }, + { + "ce_ib": 20.327953338623047, + "ce_orig": 1.7989298105239868, + "epoch": 0.10151700337910706, + "kl_loss": 9.945318222045898, + "loss_ib": 0.11978112906217575, + "step": 353 + }, + { + "ce_ib": 14.250055313110352, + "ce_orig": 0.6734949350357056, + "epoch": 0.10151700337910706, + "kl_loss": 9.17054557800293, + "loss_ib": 0.10595551133155823, + "step": 353 + }, + { + "ce_ib": 15.82697868347168, + "ce_orig": 0.9444003701210022, + "epoch": 0.1018045869580847, + "kl_loss": 9.11765193939209, + "loss_ib": 0.10700349509716034, + "step": 354 + }, + { + "ce_ib": 15.842026710510254, + "ce_orig": 0.6050642132759094, + "epoch": 0.1018045869580847, + "kl_loss": 8.850725173950195, + "loss_ib": 0.1043492779135704, + "step": 354 + }, + { + "ce_ib": 18.677658081054688, + "ce_orig": 0.8625993728637695, + "epoch": 0.1018045869580847, + "kl_loss": 9.405292510986328, + "loss_ib": 0.11273057758808136, + "step": 354 + }, + { + "ce_ib": 13.065309524536133, + "ce_orig": 0.39125949144363403, + "epoch": 0.1018045869580847, + "kl_loss": 6.584395408630371, + "loss_ib": 0.0789092630147934, + "step": 354 + }, + { + "epoch": 0.10209217053706234, + "grad_norm": 0.49203693866729736, + "learning_rate": 9.999704871911289e-06, + "loss": 0.9968, + "step": 355 + }, + { + "ce_ib": 19.234111785888672, + "ce_orig": 0.9246622920036316, + "epoch": 0.10209217053706234, + "kl_loss": 8.509796142578125, + "loss_ib": 0.1043320745229721, + "step": 355 + }, + { + "ce_ib": 17.678333282470703, + "ce_orig": 0.8307465314865112, + "epoch": 0.10209217053706234, + "kl_loss": 8.600641250610352, + "loss_ib": 0.10368474572896957, + "step": 355 + }, + { + "ce_ib": 11.18205738067627, + "ce_orig": 0.1704210788011551, + "epoch": 0.10209217053706234, + "kl_loss": 10.59097957611084, + "loss_ib": 0.11709185689687729, + "step": 355 + }, + { + "ce_ib": 14.95598316192627, + "ce_orig": 0.8469982147216797, + "epoch": 0.10209217053706234, + "kl_loss": 9.722857475280762, + "loss_ib": 0.1121845617890358, + "step": 355 + }, + { + "ce_ib": 16.683542251586914, + "ce_orig": 1.2110188007354736, + "epoch": 0.10237975411603997, + "kl_loss": 8.89454460144043, + "loss_ib": 0.10562898218631744, + "step": 356 + }, + { + "ce_ib": 14.379767417907715, + "ce_orig": 0.6198569536209106, + "epoch": 0.10237975411603997, + "kl_loss": 10.061267852783203, + "loss_ib": 0.11499244719743729, + "step": 356 + }, + { + "ce_ib": 16.58348846435547, + "ce_orig": 1.1141889095306396, + "epoch": 0.10237975411603997, + "kl_loss": 9.346358299255371, + "loss_ib": 0.11004707217216492, + "step": 356 + }, + { + "ce_ib": 16.944780349731445, + "ce_orig": 0.8266158699989319, + "epoch": 0.10237975411603997, + "kl_loss": 9.04732894897461, + "loss_ib": 0.10741806775331497, + "step": 356 + }, + { + "ce_ib": 16.06398582458496, + "ce_orig": 0.904808759689331, + "epoch": 0.10266733769501761, + "kl_loss": 9.288269996643066, + "loss_ib": 0.10894668102264404, + "step": 357 + }, + { + "ce_ib": 18.518054962158203, + "ce_orig": 0.6518286466598511, + "epoch": 0.10266733769501761, + "kl_loss": 9.153616905212402, + "loss_ib": 0.11005422472953796, + "step": 357 + }, + { + "ce_ib": 13.961670875549316, + "ce_orig": 0.7661263942718506, + "epoch": 0.10266733769501761, + "kl_loss": 9.662349700927734, + "loss_ib": 0.11058516055345535, + "step": 357 + }, + { + "ce_ib": 16.693498611450195, + "ce_orig": 0.7039583325386047, + "epoch": 0.10266733769501761, + "kl_loss": 9.203071594238281, + "loss_ib": 0.1087242066860199, + "step": 357 + }, + { + "ce_ib": 14.941229820251465, + "ce_orig": 0.6703804135322571, + "epoch": 0.10295492127399525, + "kl_loss": 7.32505464553833, + "loss_ib": 0.08819177746772766, + "step": 358 + }, + { + "ce_ib": 10.472589492797852, + "ce_orig": 0.28409555554389954, + "epoch": 0.10295492127399525, + "kl_loss": 10.202686309814453, + "loss_ib": 0.11249945312738419, + "step": 358 + }, + { + "ce_ib": 18.69029998779297, + "ce_orig": 0.9642467498779297, + "epoch": 0.10295492127399525, + "kl_loss": 9.411130905151367, + "loss_ib": 0.11280160397291183, + "step": 358 + }, + { + "ce_ib": 17.212318420410156, + "ce_orig": 0.8138781785964966, + "epoch": 0.10295492127399525, + "kl_loss": 8.968740463256836, + "loss_ib": 0.10689971596002579, + "step": 358 + }, + { + "ce_ib": 17.45269203186035, + "ce_orig": 0.377750039100647, + "epoch": 0.1032425048529729, + "kl_loss": 6.718733787536621, + "loss_ib": 0.0846400260925293, + "step": 359 + }, + { + "ce_ib": 20.13898468017578, + "ce_orig": 1.5750316381454468, + "epoch": 0.1032425048529729, + "kl_loss": 9.422719955444336, + "loss_ib": 0.11436618864536285, + "step": 359 + }, + { + "ce_ib": 17.9411678314209, + "ce_orig": 0.8798750638961792, + "epoch": 0.1032425048529729, + "kl_loss": 9.073663711547852, + "loss_ib": 0.10867780447006226, + "step": 359 + }, + { + "ce_ib": 17.482316970825195, + "ce_orig": 1.371580958366394, + "epoch": 0.1032425048529729, + "kl_loss": 9.437213897705078, + "loss_ib": 0.11185445636510849, + "step": 359 + }, + { + "epoch": 0.10353008843195054, + "grad_norm": 0.40732964873313904, + "learning_rate": 9.999614527738882e-06, + "loss": 1.0384, + "step": 360 + }, + { + "ce_ib": 20.317060470581055, + "ce_orig": 0.6699705123901367, + "epoch": 0.10353008843195054, + "kl_loss": 8.502317428588867, + "loss_ib": 0.10534022748470306, + "step": 360 + }, + { + "ce_ib": 14.938705444335938, + "ce_orig": 0.696804404258728, + "epoch": 0.10353008843195054, + "kl_loss": 9.356005668640137, + "loss_ib": 0.10849875956773758, + "step": 360 + }, + { + "ce_ib": 15.835675239562988, + "ce_orig": 0.8596332669258118, + "epoch": 0.10353008843195054, + "kl_loss": 9.73591423034668, + "loss_ib": 0.11319481581449509, + "step": 360 + }, + { + "ce_ib": 15.533881187438965, + "ce_orig": 0.4826924502849579, + "epoch": 0.10353008843195054, + "kl_loss": 10.024049758911133, + "loss_ib": 0.11577437818050385, + "step": 360 + }, + { + "ce_ib": 12.633554458618164, + "ce_orig": 0.5069536566734314, + "epoch": 0.10381767201092817, + "kl_loss": 9.803174018859863, + "loss_ib": 0.11066529154777527, + "step": 361 + }, + { + "ce_ib": 13.828798294067383, + "ce_orig": 0.896979033946991, + "epoch": 0.10381767201092817, + "kl_loss": 8.719182968139648, + "loss_ib": 0.10102062672376633, + "step": 361 + }, + { + "ce_ib": 21.034914016723633, + "ce_orig": 1.8323390483856201, + "epoch": 0.10381767201092817, + "kl_loss": 9.037761688232422, + "loss_ib": 0.11141253262758255, + "step": 361 + }, + { + "ce_ib": 16.179244995117188, + "ce_orig": 0.7684395909309387, + "epoch": 0.10381767201092817, + "kl_loss": 8.469200134277344, + "loss_ib": 0.100871242582798, + "step": 361 + }, + { + "ce_ib": 13.840865135192871, + "ce_orig": 1.0341031551361084, + "epoch": 0.10410525558990581, + "kl_loss": 7.858333110809326, + "loss_ib": 0.0924241915345192, + "step": 362 + }, + { + "ce_ib": 22.43819236755371, + "ce_orig": 1.788472294807434, + "epoch": 0.10410525558990581, + "kl_loss": 9.436954498291016, + "loss_ib": 0.1168077364563942, + "step": 362 + }, + { + "ce_ib": 19.555612564086914, + "ce_orig": 1.5130561590194702, + "epoch": 0.10410525558990581, + "kl_loss": 9.508270263671875, + "loss_ib": 0.1146383136510849, + "step": 362 + }, + { + "ce_ib": 13.82888412475586, + "ce_orig": 0.5086873769760132, + "epoch": 0.10410525558990581, + "kl_loss": 9.382222175598145, + "loss_ib": 0.10765110701322556, + "step": 362 + }, + { + "ce_ib": 16.548250198364258, + "ce_orig": 1.2047643661499023, + "epoch": 0.10439283916888345, + "kl_loss": 8.958712577819824, + "loss_ib": 0.10613537579774857, + "step": 363 + }, + { + "ce_ib": 15.741909980773926, + "ce_orig": 0.8534471392631531, + "epoch": 0.10439283916888345, + "kl_loss": 9.691909790039062, + "loss_ib": 0.11266100406646729, + "step": 363 + }, + { + "ce_ib": 21.730342864990234, + "ce_orig": 1.475590467453003, + "epoch": 0.10439283916888345, + "kl_loss": 9.107532501220703, + "loss_ib": 0.11280567198991776, + "step": 363 + }, + { + "ce_ib": 13.439830780029297, + "ce_orig": 0.6186426877975464, + "epoch": 0.10439283916888345, + "kl_loss": 9.766258239746094, + "loss_ib": 0.11110240966081619, + "step": 363 + }, + { + "ce_ib": 16.56508445739746, + "ce_orig": 0.9517434239387512, + "epoch": 0.1046804227478611, + "kl_loss": 9.201667785644531, + "loss_ib": 0.10858175903558731, + "step": 364 + }, + { + "ce_ib": 15.66641902923584, + "ce_orig": 0.1410691887140274, + "epoch": 0.1046804227478611, + "kl_loss": 12.184783935546875, + "loss_ib": 0.13751424849033356, + "step": 364 + }, + { + "ce_ib": 16.9328670501709, + "ce_orig": 0.7690316438674927, + "epoch": 0.1046804227478611, + "kl_loss": 7.339565753936768, + "loss_ib": 0.09032852202653885, + "step": 364 + }, + { + "ce_ib": 19.301227569580078, + "ce_orig": 1.541357159614563, + "epoch": 0.1046804227478611, + "kl_loss": 8.6048583984375, + "loss_ib": 0.10534980893135071, + "step": 364 + }, + { + "epoch": 0.10496800632683874, + "grad_norm": 0.35407018661499023, + "learning_rate": 9.99951213833464e-06, + "loss": 1.0547, + "step": 365 + }, + { + "ce_ib": 16.928804397583008, + "ce_orig": 0.898991048336029, + "epoch": 0.10496800632683874, + "kl_loss": 8.901844024658203, + "loss_ib": 0.10594724118709564, + "step": 365 + }, + { + "ce_ib": 12.694430351257324, + "ce_orig": 0.6792229413986206, + "epoch": 0.10496800632683874, + "kl_loss": 9.286140441894531, + "loss_ib": 0.10555583983659744, + "step": 365 + }, + { + "ce_ib": 19.038597106933594, + "ce_orig": 1.1029527187347412, + "epoch": 0.10496800632683874, + "kl_loss": 8.959847450256348, + "loss_ib": 0.10863707214593887, + "step": 365 + }, + { + "ce_ib": 14.017401695251465, + "ce_orig": 0.8020762205123901, + "epoch": 0.10496800632683874, + "kl_loss": 10.084455490112305, + "loss_ib": 0.11486195027828217, + "step": 365 + }, + { + "ce_ib": 17.723241806030273, + "ce_orig": 1.3404425382614136, + "epoch": 0.10525558990581638, + "kl_loss": 8.840962409973145, + "loss_ib": 0.1061328649520874, + "step": 366 + }, + { + "ce_ib": 13.039340019226074, + "ce_orig": 0.7993932962417603, + "epoch": 0.10525558990581638, + "kl_loss": 8.735919952392578, + "loss_ib": 0.10039854049682617, + "step": 366 + }, + { + "ce_ib": 15.478903770446777, + "ce_orig": 0.7874028086662292, + "epoch": 0.10525558990581638, + "kl_loss": 8.431257247924805, + "loss_ib": 0.09979147464036942, + "step": 366 + }, + { + "ce_ib": 14.608510971069336, + "ce_orig": 0.8104147911071777, + "epoch": 0.10525558990581638, + "kl_loss": 8.885534286499023, + "loss_ib": 0.10346385091543198, + "step": 366 + }, + { + "ce_ib": 15.228021621704102, + "ce_orig": 0.8766224384307861, + "epoch": 0.10554317348479401, + "kl_loss": 9.194426536560059, + "loss_ib": 0.10717228800058365, + "step": 367 + }, + { + "ce_ib": 15.027702331542969, + "ce_orig": 0.7485743165016174, + "epoch": 0.10554317348479401, + "kl_loss": 8.38540267944336, + "loss_ib": 0.09888172894716263, + "step": 367 + }, + { + "ce_ib": 17.5020694732666, + "ce_orig": 1.4059276580810547, + "epoch": 0.10554317348479401, + "kl_loss": 9.084081649780273, + "loss_ib": 0.10834288597106934, + "step": 367 + }, + { + "ce_ib": 17.624956130981445, + "ce_orig": 1.1633917093276978, + "epoch": 0.10554317348479401, + "kl_loss": 9.179400444030762, + "loss_ib": 0.10941895842552185, + "step": 367 + }, + { + "ce_ib": 17.59357452392578, + "ce_orig": 1.2003300189971924, + "epoch": 0.10583075706377165, + "kl_loss": 8.543558120727539, + "loss_ib": 0.10302915424108505, + "step": 368 + }, + { + "ce_ib": 18.518356323242188, + "ce_orig": 1.2260750532150269, + "epoch": 0.10583075706377165, + "kl_loss": 8.891761779785156, + "loss_ib": 0.10743597149848938, + "step": 368 + }, + { + "ce_ib": 17.378921508789062, + "ce_orig": 1.023598551750183, + "epoch": 0.10583075706377165, + "kl_loss": 8.479592323303223, + "loss_ib": 0.10217484086751938, + "step": 368 + }, + { + "ce_ib": 19.736833572387695, + "ce_orig": 1.5643643140792847, + "epoch": 0.10583075706377165, + "kl_loss": 8.811531066894531, + "loss_ib": 0.10785213857889175, + "step": 368 + }, + { + "ce_ib": 13.935962677001953, + "ce_orig": 0.5430191159248352, + "epoch": 0.1061183406427493, + "kl_loss": 9.201333999633789, + "loss_ib": 0.10594930499792099, + "step": 369 + }, + { + "ce_ib": 17.181163787841797, + "ce_orig": 1.3840625286102295, + "epoch": 0.1061183406427493, + "kl_loss": 8.802513122558594, + "loss_ib": 0.10520629584789276, + "step": 369 + }, + { + "ce_ib": 14.592924118041992, + "ce_orig": 0.8257763385772705, + "epoch": 0.1061183406427493, + "kl_loss": 8.735795974731445, + "loss_ib": 0.10195088386535645, + "step": 369 + }, + { + "ce_ib": 15.771759986877441, + "ce_orig": 0.7309710383415222, + "epoch": 0.1061183406427493, + "kl_loss": 9.097509384155273, + "loss_ib": 0.1067468523979187, + "step": 369 + }, + { + "epoch": 0.10640592422172694, + "grad_norm": 0.4832640290260315, + "learning_rate": 9.999397703945243e-06, + "loss": 1.0498, + "step": 370 + }, + { + "ce_ib": 15.806496620178223, + "ce_orig": 0.879632294178009, + "epoch": 0.10640592422172694, + "kl_loss": 10.071746826171875, + "loss_ib": 0.11652395874261856, + "step": 370 + }, + { + "ce_ib": 15.620360374450684, + "ce_orig": 0.6177505254745483, + "epoch": 0.10640592422172694, + "kl_loss": 8.700934410095215, + "loss_ib": 0.10262969881296158, + "step": 370 + }, + { + "ce_ib": 14.942098617553711, + "ce_orig": 0.6238611936569214, + "epoch": 0.10640592422172694, + "kl_loss": 9.041037559509277, + "loss_ib": 0.1053524762392044, + "step": 370 + }, + { + "ce_ib": 13.575098991394043, + "ce_orig": 0.7554785013198853, + "epoch": 0.10640592422172694, + "kl_loss": 8.968904495239258, + "loss_ib": 0.10326413810253143, + "step": 370 + }, + { + "ce_ib": 10.327584266662598, + "ce_orig": 0.23832768201828003, + "epoch": 0.10669350780070458, + "kl_loss": 7.03424072265625, + "loss_ib": 0.08066999167203903, + "step": 371 + }, + { + "ce_ib": 19.388700485229492, + "ce_orig": 1.3649802207946777, + "epoch": 0.10669350780070458, + "kl_loss": 9.210512161254883, + "loss_ib": 0.11149382591247559, + "step": 371 + }, + { + "ce_ib": 11.52276611328125, + "ce_orig": 0.7070875763893127, + "epoch": 0.10669350780070458, + "kl_loss": 8.641892433166504, + "loss_ib": 0.09794168919324875, + "step": 371 + }, + { + "ce_ib": 19.951656341552734, + "ce_orig": 1.3112847805023193, + "epoch": 0.10669350780070458, + "kl_loss": 9.238996505737305, + "loss_ib": 0.11234162002801895, + "step": 371 + }, + { + "ce_ib": 19.08662986755371, + "ce_orig": 1.517033338546753, + "epoch": 0.10698109137968222, + "kl_loss": 9.000858306884766, + "loss_ib": 0.10909520834684372, + "step": 372 + }, + { + "ce_ib": 13.207756996154785, + "ce_orig": 0.596856951713562, + "epoch": 0.10698109137968222, + "kl_loss": 8.860549926757812, + "loss_ib": 0.10181325674057007, + "step": 372 + }, + { + "ce_ib": 19.195131301879883, + "ce_orig": 0.9714540839195251, + "epoch": 0.10698109137968222, + "kl_loss": 9.710214614868164, + "loss_ib": 0.11629726737737656, + "step": 372 + }, + { + "ce_ib": 17.687284469604492, + "ce_orig": 1.5548173189163208, + "epoch": 0.10698109137968222, + "kl_loss": 8.532265663146973, + "loss_ib": 0.10300993919372559, + "step": 372 + }, + { + "ce_ib": 18.050241470336914, + "ce_orig": 0.8549476265907288, + "epoch": 0.10726867495865985, + "kl_loss": 8.890786170959473, + "loss_ib": 0.10695809870958328, + "step": 373 + }, + { + "ce_ib": 15.486068725585938, + "ce_orig": 0.7641202807426453, + "epoch": 0.10726867495865985, + "kl_loss": 8.97690486907959, + "loss_ib": 0.1052551120519638, + "step": 373 + }, + { + "ce_ib": 20.0921573638916, + "ce_orig": 1.5870574712753296, + "epoch": 0.10726867495865985, + "kl_loss": 8.584416389465332, + "loss_ib": 0.10593631863594055, + "step": 373 + }, + { + "ce_ib": 14.03450870513916, + "ce_orig": 0.8633349537849426, + "epoch": 0.10726867495865985, + "kl_loss": 9.580089569091797, + "loss_ib": 0.10983540117740631, + "step": 373 + }, + { + "ce_ib": 18.23748779296875, + "ce_orig": 0.9740087985992432, + "epoch": 0.1075562585376375, + "kl_loss": 8.163890838623047, + "loss_ib": 0.09987638890743256, + "step": 374 + }, + { + "ce_ib": 16.242849349975586, + "ce_orig": 0.9537095427513123, + "epoch": 0.1075562585376375, + "kl_loss": 8.648405075073242, + "loss_ib": 0.10272689908742905, + "step": 374 + }, + { + "ce_ib": 15.156339645385742, + "ce_orig": 1.0340036153793335, + "epoch": 0.1075562585376375, + "kl_loss": 8.221203804016113, + "loss_ib": 0.09736837446689606, + "step": 374 + }, + { + "ce_ib": 16.337060928344727, + "ce_orig": 1.1412116289138794, + "epoch": 0.1075562585376375, + "kl_loss": 8.982345581054688, + "loss_ib": 0.10616051405668259, + "step": 374 + }, + { + "epoch": 0.10784384211661514, + "grad_norm": 0.609579861164093, + "learning_rate": 9.999271224846397e-06, + "loss": 1.0013, + "step": 375 + }, + { + "ce_ib": 9.566058158874512, + "ce_orig": 0.2985042929649353, + "epoch": 0.10784384211661514, + "kl_loss": 7.494280815124512, + "loss_ib": 0.08450886607170105, + "step": 375 + }, + { + "ce_ib": 12.867706298828125, + "ce_orig": 0.5363652110099792, + "epoch": 0.10784384211661514, + "kl_loss": 8.874717712402344, + "loss_ib": 0.10161488503217697, + "step": 375 + }, + { + "ce_ib": 14.6071195602417, + "ce_orig": 1.2598626613616943, + "epoch": 0.10784384211661514, + "kl_loss": 8.637162208557129, + "loss_ib": 0.10097873210906982, + "step": 375 + }, + { + "ce_ib": 17.33331871032715, + "ce_orig": 0.6892062425613403, + "epoch": 0.10784384211661514, + "kl_loss": 8.592702865600586, + "loss_ib": 0.1032603457570076, + "step": 375 + }, + { + "ce_ib": 15.182628631591797, + "ce_orig": 0.8526699542999268, + "epoch": 0.10813142569559278, + "kl_loss": 8.426027297973633, + "loss_ib": 0.09944289922714233, + "step": 376 + }, + { + "ce_ib": 18.341575622558594, + "ce_orig": 0.621722400188446, + "epoch": 0.10813142569559278, + "kl_loss": 7.857001304626465, + "loss_ib": 0.09691158682107925, + "step": 376 + }, + { + "ce_ib": 14.127799987792969, + "ce_orig": 0.7635291218757629, + "epoch": 0.10813142569559278, + "kl_loss": 8.97607421875, + "loss_ib": 0.10388854146003723, + "step": 376 + }, + { + "ce_ib": 16.845251083374023, + "ce_orig": 1.1074169874191284, + "epoch": 0.10813142569559278, + "kl_loss": 8.656087875366211, + "loss_ib": 0.10340613126754761, + "step": 376 + }, + { + "ce_ib": 16.19705581665039, + "ce_orig": 0.8272833824157715, + "epoch": 0.10841900927457042, + "kl_loss": 8.441411972045898, + "loss_ib": 0.10061117261648178, + "step": 377 + }, + { + "ce_ib": 11.034353256225586, + "ce_orig": 0.6063670516014099, + "epoch": 0.10841900927457042, + "kl_loss": 9.254929542541504, + "loss_ib": 0.10358364880084991, + "step": 377 + }, + { + "ce_ib": 16.136695861816406, + "ce_orig": 0.9638150930404663, + "epoch": 0.10841900927457042, + "kl_loss": 8.382518768310547, + "loss_ib": 0.09996187686920166, + "step": 377 + }, + { + "ce_ib": 14.821840286254883, + "ce_orig": 1.11579430103302, + "epoch": 0.10841900927457042, + "kl_loss": 8.752357482910156, + "loss_ib": 0.10234541445970535, + "step": 377 + }, + { + "ce_ib": 14.535453796386719, + "ce_orig": 0.5030508041381836, + "epoch": 0.10870659285354806, + "kl_loss": 8.088106155395508, + "loss_ib": 0.09541651606559753, + "step": 378 + }, + { + "ce_ib": 13.514139175415039, + "ce_orig": 0.43041279911994934, + "epoch": 0.10870659285354806, + "kl_loss": 8.524757385253906, + "loss_ib": 0.09876170754432678, + "step": 378 + }, + { + "ce_ib": 13.1725435256958, + "ce_orig": 0.5961971282958984, + "epoch": 0.10870659285354806, + "kl_loss": 8.799945831298828, + "loss_ib": 0.10117200016975403, + "step": 378 + }, + { + "ce_ib": 19.455663681030273, + "ce_orig": 1.371256947517395, + "epoch": 0.10870659285354806, + "kl_loss": 7.995181083679199, + "loss_ib": 0.09940747171640396, + "step": 378 + }, + { + "ce_ib": 13.492290496826172, + "ce_orig": 0.9726055264472961, + "epoch": 0.10899417643252571, + "kl_loss": 8.796764373779297, + "loss_ib": 0.10145992785692215, + "step": 379 + }, + { + "ce_ib": 11.911052703857422, + "ce_orig": 0.7123900651931763, + "epoch": 0.10899417643252571, + "kl_loss": 8.622007369995117, + "loss_ib": 0.09813112020492554, + "step": 379 + }, + { + "ce_ib": 15.950429916381836, + "ce_orig": 0.5447075963020325, + "epoch": 0.10899417643252571, + "kl_loss": 8.17884635925293, + "loss_ib": 0.09773889183998108, + "step": 379 + }, + { + "ce_ib": 17.951889038085938, + "ce_orig": 0.9959045052528381, + "epoch": 0.10899417643252571, + "kl_loss": 8.3568696975708, + "loss_ib": 0.1015205830335617, + "step": 379 + }, + { + "epoch": 0.10928176001150335, + "grad_norm": 0.6501461863517761, + "learning_rate": 9.99913270134281e-06, + "loss": 0.9988, + "step": 380 + }, + { + "ce_ib": 12.876981735229492, + "ce_orig": 0.5673431754112244, + "epoch": 0.10928176001150335, + "kl_loss": 7.431344032287598, + "loss_ib": 0.0871904194355011, + "step": 380 + }, + { + "ce_ib": 15.092470169067383, + "ce_orig": 0.2880413830280304, + "epoch": 0.10928176001150335, + "kl_loss": 8.873856544494629, + "loss_ib": 0.10383103042840958, + "step": 380 + }, + { + "ce_ib": 12.99868392944336, + "ce_orig": 0.5703913569450378, + "epoch": 0.10928176001150335, + "kl_loss": 8.380903244018555, + "loss_ib": 0.09680771827697754, + "step": 380 + }, + { + "ce_ib": 13.363186836242676, + "ce_orig": 0.5209031701087952, + "epoch": 0.10928176001150335, + "kl_loss": 8.865699768066406, + "loss_ib": 0.10202018171548843, + "step": 380 + }, + { + "ce_ib": 20.89650535583496, + "ce_orig": 1.6800005435943604, + "epoch": 0.10956934359048098, + "kl_loss": 8.10032844543457, + "loss_ib": 0.10189979523420334, + "step": 381 + }, + { + "ce_ib": 17.024986267089844, + "ce_orig": 0.6650580167770386, + "epoch": 0.10956934359048098, + "kl_loss": 7.477260112762451, + "loss_ib": 0.0917975902557373, + "step": 381 + }, + { + "ce_ib": 21.227222442626953, + "ce_orig": 1.5764412879943848, + "epoch": 0.10956934359048098, + "kl_loss": 8.041464805603027, + "loss_ib": 0.10164187103509903, + "step": 381 + }, + { + "ce_ib": 18.747699737548828, + "ce_orig": 1.2042864561080933, + "epoch": 0.10956934359048098, + "kl_loss": 7.809324264526367, + "loss_ib": 0.09684094041585922, + "step": 381 + }, + { + "ce_ib": 13.682280540466309, + "ce_orig": 0.604314386844635, + "epoch": 0.10985692716945862, + "kl_loss": 8.266580581665039, + "loss_ib": 0.09634808450937271, + "step": 382 + }, + { + "ce_ib": 11.598891258239746, + "ce_orig": 0.7977884411811829, + "epoch": 0.10985692716945862, + "kl_loss": 8.074966430664062, + "loss_ib": 0.09234855324029922, + "step": 382 + }, + { + "ce_ib": 10.722358703613281, + "ce_orig": 0.488436758518219, + "epoch": 0.10985692716945862, + "kl_loss": 8.471232414245605, + "loss_ib": 0.09543468058109283, + "step": 382 + }, + { + "ce_ib": 20.917461395263672, + "ce_orig": 1.5944008827209473, + "epoch": 0.10985692716945862, + "kl_loss": 8.631521224975586, + "loss_ib": 0.10723267495632172, + "step": 382 + }, + { + "ce_ib": 14.213534355163574, + "ce_orig": 0.9382426142692566, + "epoch": 0.11014451074843626, + "kl_loss": 8.12414836883545, + "loss_ib": 0.09545501321554184, + "step": 383 + }, + { + "ce_ib": 10.728821754455566, + "ce_orig": 0.7022181749343872, + "epoch": 0.11014451074843626, + "kl_loss": 8.53184700012207, + "loss_ib": 0.0960472822189331, + "step": 383 + }, + { + "ce_ib": 10.197715759277344, + "ce_orig": 0.512974202632904, + "epoch": 0.11014451074843626, + "kl_loss": 8.536653518676758, + "loss_ib": 0.09556424617767334, + "step": 383 + }, + { + "ce_ib": 12.831421852111816, + "ce_orig": 0.6488183736801147, + "epoch": 0.11014451074843626, + "kl_loss": 8.425491333007812, + "loss_ib": 0.0970863327383995, + "step": 383 + }, + { + "ce_ib": 13.790513038635254, + "ce_orig": 0.48860305547714233, + "epoch": 0.11043209432741391, + "kl_loss": 8.638005256652832, + "loss_ib": 0.1001705601811409, + "step": 384 + }, + { + "ce_ib": 18.993335723876953, + "ce_orig": 1.300746202468872, + "epoch": 0.11043209432741391, + "kl_loss": 7.856682777404785, + "loss_ib": 0.09756016731262207, + "step": 384 + }, + { + "ce_ib": 14.618206024169922, + "ce_orig": 0.6457257270812988, + "epoch": 0.11043209432741391, + "kl_loss": 5.0488386154174805, + "loss_ib": 0.0651065930724144, + "step": 384 + }, + { + "ce_ib": 13.924184799194336, + "ce_orig": 0.7180906534194946, + "epoch": 0.11043209432741391, + "kl_loss": 8.496256828308105, + "loss_ib": 0.09888675808906555, + "step": 384 + }, + { + "epoch": 0.11071967790639155, + "grad_norm": 0.7011797428131104, + "learning_rate": 9.998982133768226e-06, + "loss": 0.9557, + "step": 385 + }, + { + "ce_ib": 15.821268081665039, + "ce_orig": 1.4919018745422363, + "epoch": 0.11071967790639155, + "kl_loss": 8.213622093200684, + "loss_ib": 0.09795748442411423, + "step": 385 + }, + { + "ce_ib": 14.86267375946045, + "ce_orig": 1.0427626371383667, + "epoch": 0.11071967790639155, + "kl_loss": 7.624295234680176, + "loss_ib": 0.091105617582798, + "step": 385 + }, + { + "ce_ib": 12.792627334594727, + "ce_orig": 0.644935131072998, + "epoch": 0.11071967790639155, + "kl_loss": 8.272308349609375, + "loss_ib": 0.09551570564508438, + "step": 385 + }, + { + "ce_ib": 13.713454246520996, + "ce_orig": 0.8965685963630676, + "epoch": 0.11071967790639155, + "kl_loss": 9.92483901977539, + "loss_ib": 0.11296184360980988, + "step": 385 + }, + { + "ce_ib": 16.306076049804688, + "ce_orig": 0.7789519429206848, + "epoch": 0.11100726148536919, + "kl_loss": 7.479434967041016, + "loss_ib": 0.09110042452812195, + "step": 386 + }, + { + "ce_ib": 15.762940406799316, + "ce_orig": 0.8707707524299622, + "epoch": 0.11100726148536919, + "kl_loss": 7.34848165512085, + "loss_ib": 0.08924775570631027, + "step": 386 + }, + { + "ce_ib": 6.3066205978393555, + "ce_orig": 0.19392843544483185, + "epoch": 0.11100726148536919, + "kl_loss": 5.0305914878845215, + "loss_ib": 0.0566125325858593, + "step": 386 + }, + { + "ce_ib": 14.033432006835938, + "ce_orig": 0.7708988189697266, + "epoch": 0.11100726148536919, + "kl_loss": 8.070176124572754, + "loss_ib": 0.09473519027233124, + "step": 386 + }, + { + "ce_ib": 16.794458389282227, + "ce_orig": 0.8729531168937683, + "epoch": 0.11129484506434682, + "kl_loss": 8.056872367858887, + "loss_ib": 0.09736318141222, + "step": 387 + }, + { + "ce_ib": 14.828986167907715, + "ce_orig": 1.121248483657837, + "epoch": 0.11129484506434682, + "kl_loss": 7.610101699829102, + "loss_ib": 0.09092999994754791, + "step": 387 + }, + { + "ce_ib": 13.89840030670166, + "ce_orig": 0.740386426448822, + "epoch": 0.11129484506434682, + "kl_loss": 7.463097095489502, + "loss_ib": 0.08852936327457428, + "step": 387 + }, + { + "ce_ib": 12.567804336547852, + "ce_orig": 0.956537663936615, + "epoch": 0.11129484506434682, + "kl_loss": 7.956699848175049, + "loss_ib": 0.09213479608297348, + "step": 387 + }, + { + "ce_ib": 11.099259376525879, + "ce_orig": 0.6955797076225281, + "epoch": 0.11158242864332446, + "kl_loss": 8.742734909057617, + "loss_ib": 0.09852661192417145, + "step": 388 + }, + { + "ce_ib": 15.241199493408203, + "ce_orig": 0.7828308939933777, + "epoch": 0.11158242864332446, + "kl_loss": 8.080060005187988, + "loss_ib": 0.09604179859161377, + "step": 388 + }, + { + "ce_ib": 12.71835994720459, + "ce_orig": 0.5617780685424805, + "epoch": 0.11158242864332446, + "kl_loss": 7.975733280181885, + "loss_ib": 0.09247568994760513, + "step": 388 + }, + { + "ce_ib": 15.852190971374512, + "ce_orig": 0.9822865128517151, + "epoch": 0.11158242864332446, + "kl_loss": 7.865725040435791, + "loss_ib": 0.09450943768024445, + "step": 388 + }, + { + "ce_ib": 15.686773300170898, + "ce_orig": 0.8103384971618652, + "epoch": 0.11187001222230211, + "kl_loss": 7.693680763244629, + "loss_ib": 0.09262357652187347, + "step": 389 + }, + { + "ce_ib": 15.016551971435547, + "ce_orig": 0.9884656071662903, + "epoch": 0.11187001222230211, + "kl_loss": 7.716882228851318, + "loss_ib": 0.0921853706240654, + "step": 389 + }, + { + "ce_ib": 18.931434631347656, + "ce_orig": 1.5234384536743164, + "epoch": 0.11187001222230211, + "kl_loss": 7.528309345245361, + "loss_ib": 0.09421452134847641, + "step": 389 + }, + { + "ce_ib": 17.58110237121582, + "ce_orig": 1.5781915187835693, + "epoch": 0.11187001222230211, + "kl_loss": 7.889880180358887, + "loss_ib": 0.09647990018129349, + "step": 389 + }, + { + "epoch": 0.11215759580127975, + "grad_norm": 0.6288163661956787, + "learning_rate": 9.998819522485392e-06, + "loss": 1.0119, + "step": 390 + }, + { + "ce_ib": 15.403480529785156, + "ce_orig": 0.7215459942817688, + "epoch": 0.11215759580127975, + "kl_loss": 8.92459774017334, + "loss_ib": 0.10464945435523987, + "step": 390 + }, + { + "ce_ib": 12.422520637512207, + "ce_orig": 0.38442984223365784, + "epoch": 0.11215759580127975, + "kl_loss": 7.306635856628418, + "loss_ib": 0.08548887819051743, + "step": 390 + }, + { + "ce_ib": 16.295055389404297, + "ce_orig": 0.8220521211624146, + "epoch": 0.11215759580127975, + "kl_loss": 7.3963141441345215, + "loss_ib": 0.09025819599628448, + "step": 390 + }, + { + "ce_ib": 13.246119499206543, + "ce_orig": 0.6906241774559021, + "epoch": 0.11215759580127975, + "kl_loss": 7.956465721130371, + "loss_ib": 0.09281077235937119, + "step": 390 + }, + { + "ce_ib": 13.792742729187012, + "ce_orig": 0.844266414642334, + "epoch": 0.11244517938025739, + "kl_loss": 8.29489517211914, + "loss_ib": 0.0967416912317276, + "step": 391 + }, + { + "ce_ib": 16.884506225585938, + "ce_orig": 0.9576941132545471, + "epoch": 0.11244517938025739, + "kl_loss": 7.322904586791992, + "loss_ib": 0.0901135504245758, + "step": 391 + }, + { + "ce_ib": 12.04941463470459, + "ce_orig": 0.6010465025901794, + "epoch": 0.11244517938025739, + "kl_loss": 7.9065093994140625, + "loss_ib": 0.09111450612545013, + "step": 391 + }, + { + "ce_ib": 13.96172046661377, + "ce_orig": 0.6447573900222778, + "epoch": 0.11244517938025739, + "kl_loss": 7.576847076416016, + "loss_ib": 0.08973018079996109, + "step": 391 + }, + { + "ce_ib": 15.197918891906738, + "ce_orig": 0.7180730700492859, + "epoch": 0.11273276295923502, + "kl_loss": 7.893502235412598, + "loss_ib": 0.0941329374909401, + "step": 392 + }, + { + "ce_ib": 13.946671485900879, + "ce_orig": 0.7366077303886414, + "epoch": 0.11273276295923502, + "kl_loss": 7.63916015625, + "loss_ib": 0.09033826738595963, + "step": 392 + }, + { + "ce_ib": 14.26876163482666, + "ce_orig": 0.8613617420196533, + "epoch": 0.11273276295923502, + "kl_loss": 7.369016647338867, + "loss_ib": 0.087958924472332, + "step": 392 + }, + { + "ce_ib": 12.909405708312988, + "ce_orig": 0.7305828332901001, + "epoch": 0.11273276295923502, + "kl_loss": 7.314949035644531, + "loss_ib": 0.08605889230966568, + "step": 392 + }, + { + "ce_ib": 18.242956161499023, + "ce_orig": 1.3529711961746216, + "epoch": 0.11302034653821266, + "kl_loss": 7.389057636260986, + "loss_ib": 0.092133529484272, + "step": 393 + }, + { + "ce_ib": 9.846675872802734, + "ce_orig": 0.7093670964241028, + "epoch": 0.11302034653821266, + "kl_loss": 7.952066898345947, + "loss_ib": 0.0893673375248909, + "step": 393 + }, + { + "ce_ib": 12.509729385375977, + "ce_orig": 1.0103166103363037, + "epoch": 0.11302034653821266, + "kl_loss": 8.187524795532227, + "loss_ib": 0.09438497573137283, + "step": 393 + }, + { + "ce_ib": 6.737217426300049, + "ce_orig": 0.21716581284999847, + "epoch": 0.11302034653821266, + "kl_loss": 5.771838188171387, + "loss_ib": 0.06445559859275818, + "step": 393 + }, + { + "ce_ib": 12.970864295959473, + "ce_orig": 0.689153790473938, + "epoch": 0.11330793011719031, + "kl_loss": 7.806953430175781, + "loss_ib": 0.09104040265083313, + "step": 394 + }, + { + "ce_ib": 14.298376083374023, + "ce_orig": 0.6159449815750122, + "epoch": 0.11330793011719031, + "kl_loss": 7.474273204803467, + "loss_ib": 0.08904110640287399, + "step": 394 + }, + { + "ce_ib": 16.35755729675293, + "ce_orig": 1.128537893295288, + "epoch": 0.11330793011719031, + "kl_loss": 7.542243003845215, + "loss_ib": 0.09177998453378677, + "step": 394 + }, + { + "ce_ib": 16.245141983032227, + "ce_orig": 0.5840805768966675, + "epoch": 0.11330793011719031, + "kl_loss": 7.685763835906982, + "loss_ib": 0.09310277551412582, + "step": 394 + }, + { + "epoch": 0.11359551369616795, + "grad_norm": 0.43641915917396545, + "learning_rate": 9.998644867886077e-06, + "loss": 0.9567, + "step": 395 + }, + { + "ce_ib": 14.849902153015137, + "ce_orig": 0.9369897842407227, + "epoch": 0.11359551369616795, + "kl_loss": 7.421789646148682, + "loss_ib": 0.08906780183315277, + "step": 395 + }, + { + "ce_ib": 12.70493221282959, + "ce_orig": 0.7189629673957825, + "epoch": 0.11359551369616795, + "kl_loss": 7.841180801391602, + "loss_ib": 0.09111674129962921, + "step": 395 + }, + { + "ce_ib": 16.844697952270508, + "ce_orig": 0.6727548241615295, + "epoch": 0.11359551369616795, + "kl_loss": 6.990694522857666, + "loss_ib": 0.08675163984298706, + "step": 395 + }, + { + "ce_ib": 14.839481353759766, + "ce_orig": 1.255350947380066, + "epoch": 0.11359551369616795, + "kl_loss": 7.494076728820801, + "loss_ib": 0.08978024125099182, + "step": 395 + }, + { + "ce_ib": 16.765884399414062, + "ce_orig": 1.331714153289795, + "epoch": 0.11388309727514559, + "kl_loss": 7.407535552978516, + "loss_ib": 0.09084123373031616, + "step": 396 + }, + { + "ce_ib": 10.355143547058105, + "ce_orig": 0.6202336549758911, + "epoch": 0.11388309727514559, + "kl_loss": 7.605844497680664, + "loss_ib": 0.08641359210014343, + "step": 396 + }, + { + "ce_ib": 14.112444877624512, + "ce_orig": 0.8373818397521973, + "epoch": 0.11388309727514559, + "kl_loss": 7.473760604858398, + "loss_ib": 0.08885005116462708, + "step": 396 + }, + { + "ce_ib": 13.782599449157715, + "ce_orig": 0.816245973110199, + "epoch": 0.11388309727514559, + "kl_loss": 7.807076454162598, + "loss_ib": 0.09185335785150528, + "step": 396 + }, + { + "ce_ib": 15.40560245513916, + "ce_orig": 1.211715579032898, + "epoch": 0.11417068085412323, + "kl_loss": 7.456460475921631, + "loss_ib": 0.08997020870447159, + "step": 397 + }, + { + "ce_ib": 9.293402671813965, + "ce_orig": 0.4598255753517151, + "epoch": 0.11417068085412323, + "kl_loss": 8.316404342651367, + "loss_ib": 0.09245744347572327, + "step": 397 + }, + { + "ce_ib": 14.194599151611328, + "ce_orig": 0.8155964612960815, + "epoch": 0.11417068085412323, + "kl_loss": 7.829108715057373, + "loss_ib": 0.09248568117618561, + "step": 397 + }, + { + "ce_ib": 12.846905708312988, + "ce_orig": 0.8388014435768127, + "epoch": 0.11417068085412323, + "kl_loss": 7.558581829071045, + "loss_ib": 0.08843272179365158, + "step": 397 + }, + { + "ce_ib": 10.11628532409668, + "ce_orig": 0.6828235983848572, + "epoch": 0.11445826443310086, + "kl_loss": 7.854046821594238, + "loss_ib": 0.08865674585103989, + "step": 398 + }, + { + "ce_ib": 9.932538032531738, + "ce_orig": 0.49403539299964905, + "epoch": 0.11445826443310086, + "kl_loss": 8.248268127441406, + "loss_ib": 0.0924152135848999, + "step": 398 + }, + { + "ce_ib": 13.556487083435059, + "ce_orig": 0.49110883474349976, + "epoch": 0.11445826443310086, + "kl_loss": 7.1056108474731445, + "loss_ib": 0.08461259305477142, + "step": 398 + }, + { + "ce_ib": 17.799386978149414, + "ce_orig": 1.120202660560608, + "epoch": 0.11445826443310086, + "kl_loss": 7.619751930236816, + "loss_ib": 0.09399690479040146, + "step": 398 + }, + { + "ce_ib": 18.385801315307617, + "ce_orig": 1.4876097440719604, + "epoch": 0.11474584801207852, + "kl_loss": 7.5165114402771, + "loss_ib": 0.093550905585289, + "step": 399 + }, + { + "ce_ib": 14.88972282409668, + "ce_orig": 1.251899003982544, + "epoch": 0.11474584801207852, + "kl_loss": 7.511725902557373, + "loss_ib": 0.09000697731971741, + "step": 399 + }, + { + "ce_ib": 11.505959510803223, + "ce_orig": 0.5990752577781677, + "epoch": 0.11474584801207852, + "kl_loss": 7.720244884490967, + "loss_ib": 0.08870840817689896, + "step": 399 + }, + { + "ce_ib": 14.54392147064209, + "ce_orig": 0.8790196180343628, + "epoch": 0.11474584801207852, + "kl_loss": 7.9987359046936035, + "loss_ib": 0.09453127533197403, + "step": 399 + }, + { + "epoch": 0.11503343159105615, + "grad_norm": 0.5361573100090027, + "learning_rate": 9.998458170391065e-06, + "loss": 0.9792, + "step": 400 + }, + { + "ce_ib": 18.511137008666992, + "ce_orig": 0.8711093664169312, + "epoch": 0.11503343159105615, + "kl_loss": 7.370296478271484, + "loss_ib": 0.09221409261226654, + "step": 400 + }, + { + "ce_ib": 12.954771995544434, + "ce_orig": 0.9472190141677856, + "epoch": 0.11503343159105615, + "kl_loss": 7.668916702270508, + "loss_ib": 0.0896439328789711, + "step": 400 + }, + { + "ce_ib": 9.006753921508789, + "ce_orig": 0.6201379299163818, + "epoch": 0.11503343159105615, + "kl_loss": 7.630331516265869, + "loss_ib": 0.08531006425619125, + "step": 400 + }, + { + "ce_ib": 15.421821594238281, + "ce_orig": 1.425597906112671, + "epoch": 0.11503343159105615, + "kl_loss": 7.540129661560059, + "loss_ib": 0.09082311391830444, + "step": 400 + }, + { + "ce_ib": 15.149130821228027, + "ce_orig": 1.0793287754058838, + "epoch": 0.11532101517003379, + "kl_loss": 7.484195232391357, + "loss_ib": 0.08999107778072357, + "step": 401 + }, + { + "ce_ib": 10.156062126159668, + "ce_orig": 0.5667037963867188, + "epoch": 0.11532101517003379, + "kl_loss": 7.341686248779297, + "loss_ib": 0.08357291668653488, + "step": 401 + }, + { + "ce_ib": 11.375419616699219, + "ce_orig": 1.1060158014297485, + "epoch": 0.11532101517003379, + "kl_loss": 7.683910846710205, + "loss_ib": 0.08821453154087067, + "step": 401 + }, + { + "ce_ib": 17.671218872070312, + "ce_orig": 1.273245096206665, + "epoch": 0.11532101517003379, + "kl_loss": 7.472596645355225, + "loss_ib": 0.09239718317985535, + "step": 401 + }, + { + "ce_ib": 16.562593460083008, + "ce_orig": 1.0524271726608276, + "epoch": 0.11560859874901143, + "kl_loss": 7.498256683349609, + "loss_ib": 0.09154515713453293, + "step": 402 + }, + { + "ce_ib": 13.353780746459961, + "ce_orig": 0.8203778862953186, + "epoch": 0.11560859874901143, + "kl_loss": 7.896320343017578, + "loss_ib": 0.09231697767972946, + "step": 402 + }, + { + "ce_ib": 15.274881362915039, + "ce_orig": 1.007253885269165, + "epoch": 0.11560859874901143, + "kl_loss": 7.653887748718262, + "loss_ib": 0.09181375056505203, + "step": 402 + }, + { + "ce_ib": 16.868385314941406, + "ce_orig": 1.3746187686920166, + "epoch": 0.11560859874901143, + "kl_loss": 6.850212097167969, + "loss_ib": 0.0853705033659935, + "step": 402 + }, + { + "ce_ib": 14.26913833618164, + "ce_orig": 1.1488964557647705, + "epoch": 0.11589618232798907, + "kl_loss": 7.498937606811523, + "loss_ib": 0.08925851434469223, + "step": 403 + }, + { + "ce_ib": 13.987759590148926, + "ce_orig": 0.4179192781448364, + "epoch": 0.11589618232798907, + "kl_loss": 7.400528430938721, + "loss_ib": 0.08799304068088531, + "step": 403 + }, + { + "ce_ib": 13.340829849243164, + "ce_orig": 0.7595700025558472, + "epoch": 0.11589618232798907, + "kl_loss": 7.298229217529297, + "loss_ib": 0.08632311969995499, + "step": 403 + }, + { + "ce_ib": 12.933422088623047, + "ce_orig": 0.6614567041397095, + "epoch": 0.11589618232798907, + "kl_loss": 7.262064456939697, + "loss_ib": 0.08555406332015991, + "step": 403 + }, + { + "ce_ib": 12.648677825927734, + "ce_orig": 1.055274248123169, + "epoch": 0.11618376590696672, + "kl_loss": 7.3485426902771, + "loss_ib": 0.08613410592079163, + "step": 404 + }, + { + "ce_ib": 15.602824211120605, + "ce_orig": 1.1300698518753052, + "epoch": 0.11618376590696672, + "kl_loss": 7.553566932678223, + "loss_ib": 0.09113849699497223, + "step": 404 + }, + { + "ce_ib": 11.030848503112793, + "ce_orig": 0.6406192779541016, + "epoch": 0.11618376590696672, + "kl_loss": 7.551169395446777, + "loss_ib": 0.0865425392985344, + "step": 404 + }, + { + "ce_ib": 18.89850616455078, + "ce_orig": 1.4245729446411133, + "epoch": 0.11618376590696672, + "kl_loss": 7.921879768371582, + "loss_ib": 0.09811729937791824, + "step": 404 + }, + { + "epoch": 0.11647134948594436, + "grad_norm": 0.5651848316192627, + "learning_rate": 9.998259430450155e-06, + "loss": 1.0022, + "step": 405 + }, + { + "ce_ib": 14.010991096496582, + "ce_orig": 0.7675843834877014, + "epoch": 0.11647134948594436, + "kl_loss": 7.355520248413086, + "loss_ib": 0.08756618946790695, + "step": 405 + }, + { + "ce_ib": 16.283018112182617, + "ce_orig": 1.158023715019226, + "epoch": 0.11647134948594436, + "kl_loss": 7.45557975769043, + "loss_ib": 0.09083881229162216, + "step": 405 + }, + { + "ce_ib": 15.902323722839355, + "ce_orig": 1.0440561771392822, + "epoch": 0.11647134948594436, + "kl_loss": 7.513373851776123, + "loss_ib": 0.09103605896234512, + "step": 405 + }, + { + "ce_ib": 17.28444480895996, + "ce_orig": 1.34712815284729, + "epoch": 0.11647134948594436, + "kl_loss": 7.375174045562744, + "loss_ib": 0.09103618562221527, + "step": 405 + }, + { + "ce_ib": 16.60024070739746, + "ce_orig": 1.3468883037567139, + "epoch": 0.116758933064922, + "kl_loss": 7.249956130981445, + "loss_ib": 0.08909979462623596, + "step": 406 + }, + { + "ce_ib": 10.413902282714844, + "ce_orig": 0.792377233505249, + "epoch": 0.116758933064922, + "kl_loss": 7.449808120727539, + "loss_ib": 0.08491198718547821, + "step": 406 + }, + { + "ce_ib": 14.015169143676758, + "ce_orig": 0.9668265581130981, + "epoch": 0.116758933064922, + "kl_loss": 7.551003456115723, + "loss_ib": 0.08952520042657852, + "step": 406 + }, + { + "ce_ib": 10.880705833435059, + "ce_orig": 0.5008480548858643, + "epoch": 0.116758933064922, + "kl_loss": 7.180622100830078, + "loss_ib": 0.08268693089485168, + "step": 406 + }, + { + "ce_ib": 14.242043495178223, + "ce_orig": 0.5770112872123718, + "epoch": 0.11704651664389963, + "kl_loss": 6.760585308074951, + "loss_ib": 0.0818478912115097, + "step": 407 + }, + { + "ce_ib": 17.079055786132812, + "ce_orig": 1.7003521919250488, + "epoch": 0.11704651664389963, + "kl_loss": 7.319855690002441, + "loss_ib": 0.09027761220932007, + "step": 407 + }, + { + "ce_ib": 13.891350746154785, + "ce_orig": 0.849856972694397, + "epoch": 0.11704651664389963, + "kl_loss": 6.7790093421936035, + "loss_ib": 0.08168143779039383, + "step": 407 + }, + { + "ce_ib": 16.732454299926758, + "ce_orig": 1.2152447700500488, + "epoch": 0.11704651664389963, + "kl_loss": 7.023953437805176, + "loss_ib": 0.08697198331356049, + "step": 407 + }, + { + "ce_ib": 11.8071870803833, + "ce_orig": 0.5867161154747009, + "epoch": 0.11733410022287727, + "kl_loss": 7.046442985534668, + "loss_ib": 0.08227161318063736, + "step": 408 + }, + { + "ce_ib": 12.511359214782715, + "ce_orig": 0.7658900022506714, + "epoch": 0.11733410022287727, + "kl_loss": 7.0474042892456055, + "loss_ib": 0.08298540115356445, + "step": 408 + }, + { + "ce_ib": 14.617786407470703, + "ce_orig": 0.7642480731010437, + "epoch": 0.11733410022287727, + "kl_loss": 6.15494966506958, + "loss_ib": 0.0761672779917717, + "step": 408 + }, + { + "ce_ib": 13.998462677001953, + "ce_orig": 0.3132195472717285, + "epoch": 0.11733410022287727, + "kl_loss": 6.85751485824585, + "loss_ib": 0.08257361501455307, + "step": 408 + }, + { + "ce_ib": 11.64556884765625, + "ce_orig": 0.5106444954872131, + "epoch": 0.11762168380185492, + "kl_loss": 7.280755996704102, + "loss_ib": 0.08445312082767487, + "step": 409 + }, + { + "ce_ib": 15.677848815917969, + "ce_orig": 1.337202548980713, + "epoch": 0.11762168380185492, + "kl_loss": 7.441099166870117, + "loss_ib": 0.0900888442993164, + "step": 409 + }, + { + "ce_ib": 16.44597625732422, + "ce_orig": 0.7064342498779297, + "epoch": 0.11762168380185492, + "kl_loss": 7.3761820793151855, + "loss_ib": 0.0902077928185463, + "step": 409 + }, + { + "ce_ib": 15.110173225402832, + "ce_orig": 1.0219260454177856, + "epoch": 0.11762168380185492, + "kl_loss": 7.731924533843994, + "loss_ib": 0.09242941439151764, + "step": 409 + }, + { + "epoch": 0.11790926738083256, + "grad_norm": 0.3323611617088318, + "learning_rate": 9.998048648542153e-06, + "loss": 0.9117, + "step": 410 + }, + { + "ce_ib": 16.602235794067383, + "ce_orig": 1.2994755506515503, + "epoch": 0.11790926738083256, + "kl_loss": 7.015318870544434, + "loss_ib": 0.0867554247379303, + "step": 410 + }, + { + "ce_ib": 15.049646377563477, + "ce_orig": 0.3460133969783783, + "epoch": 0.11790926738083256, + "kl_loss": 6.893805503845215, + "loss_ib": 0.08398769795894623, + "step": 410 + }, + { + "ce_ib": 15.774944305419922, + "ce_orig": 0.9854010343551636, + "epoch": 0.11790926738083256, + "kl_loss": 7.28230094909668, + "loss_ib": 0.08859795331954956, + "step": 410 + }, + { + "ce_ib": 15.553691864013672, + "ce_orig": 1.1951489448547363, + "epoch": 0.11790926738083256, + "kl_loss": 7.345269680023193, + "loss_ib": 0.08900638669729233, + "step": 410 + }, + { + "ce_ib": 13.911700248718262, + "ce_orig": 0.7811279296875, + "epoch": 0.1181968509598102, + "kl_loss": 7.179073333740234, + "loss_ib": 0.08570243418216705, + "step": 411 + }, + { + "ce_ib": 15.475322723388672, + "ce_orig": 1.222588300704956, + "epoch": 0.1181968509598102, + "kl_loss": 6.665042877197266, + "loss_ib": 0.08212574571371078, + "step": 411 + }, + { + "ce_ib": 17.793344497680664, + "ce_orig": 1.2325204610824585, + "epoch": 0.1181968509598102, + "kl_loss": 7.175121307373047, + "loss_ib": 0.08954454958438873, + "step": 411 + }, + { + "ce_ib": 15.057259559631348, + "ce_orig": 1.0142443180084229, + "epoch": 0.1181968509598102, + "kl_loss": 7.372516632080078, + "loss_ib": 0.0887824222445488, + "step": 411 + }, + { + "ce_ib": 12.571479797363281, + "ce_orig": 0.9224892854690552, + "epoch": 0.11848443453878783, + "kl_loss": 7.373008728027344, + "loss_ib": 0.08630156517028809, + "step": 412 + }, + { + "ce_ib": 10.39489459991455, + "ce_orig": 0.5030142068862915, + "epoch": 0.11848443453878783, + "kl_loss": 8.151752471923828, + "loss_ib": 0.0919124186038971, + "step": 412 + }, + { + "ce_ib": 14.446382522583008, + "ce_orig": 0.6720181703567505, + "epoch": 0.11848443453878783, + "kl_loss": 6.821091651916504, + "loss_ib": 0.08265729993581772, + "step": 412 + }, + { + "ce_ib": 13.9717378616333, + "ce_orig": 0.8369989395141602, + "epoch": 0.11848443453878783, + "kl_loss": 7.489789962768555, + "loss_ib": 0.08886963874101639, + "step": 412 + }, + { + "ce_ib": 13.278703689575195, + "ce_orig": 0.5604954361915588, + "epoch": 0.11877201811776547, + "kl_loss": 6.503649711608887, + "loss_ib": 0.07831519842147827, + "step": 413 + }, + { + "ce_ib": 14.608017921447754, + "ce_orig": 1.0261048078536987, + "epoch": 0.11877201811776547, + "kl_loss": 5.455432891845703, + "loss_ib": 0.06916234642267227, + "step": 413 + }, + { + "ce_ib": 13.513481140136719, + "ce_orig": 0.8638569116592407, + "epoch": 0.11877201811776547, + "kl_loss": 7.192094802856445, + "loss_ib": 0.08543442189693451, + "step": 413 + }, + { + "ce_ib": 14.755450248718262, + "ce_orig": 1.0192320346832275, + "epoch": 0.11877201811776547, + "kl_loss": 6.588578224182129, + "loss_ib": 0.0806412324309349, + "step": 413 + }, + { + "ce_ib": 13.155234336853027, + "ce_orig": 1.021310806274414, + "epoch": 0.11905960169674312, + "kl_loss": 6.788153648376465, + "loss_ib": 0.0810367688536644, + "step": 414 + }, + { + "ce_ib": 16.875110626220703, + "ce_orig": 1.1028145551681519, + "epoch": 0.11905960169674312, + "kl_loss": 7.1112470626831055, + "loss_ib": 0.08798757940530777, + "step": 414 + }, + { + "ce_ib": 15.40821647644043, + "ce_orig": 1.1819595098495483, + "epoch": 0.11905960169674312, + "kl_loss": 6.240026473999023, + "loss_ib": 0.07780847698450089, + "step": 414 + }, + { + "ce_ib": 13.668084144592285, + "ce_orig": 0.795190155506134, + "epoch": 0.11905960169674312, + "kl_loss": 7.253881931304932, + "loss_ib": 0.08620689809322357, + "step": 414 + }, + { + "epoch": 0.11934718527572076, + "grad_norm": 0.39023032784461975, + "learning_rate": 9.997825825174889e-06, + "loss": 0.9566, + "step": 415 + }, + { + "ce_ib": 12.504598617553711, + "ce_orig": 1.0579363107681274, + "epoch": 0.11934718527572076, + "kl_loss": 7.502658843994141, + "loss_ib": 0.0875311866402626, + "step": 415 + }, + { + "ce_ib": 17.01618766784668, + "ce_orig": 1.1351312398910522, + "epoch": 0.11934718527572076, + "kl_loss": 7.204804420471191, + "loss_ib": 0.08906423300504684, + "step": 415 + }, + { + "ce_ib": 12.350006103515625, + "ce_orig": 0.7742664217948914, + "epoch": 0.11934718527572076, + "kl_loss": 6.772958278656006, + "loss_ib": 0.0800795927643776, + "step": 415 + }, + { + "ce_ib": 9.901521682739258, + "ce_orig": 0.73922199010849, + "epoch": 0.11934718527572076, + "kl_loss": 6.764502048492432, + "loss_ib": 0.07754654437303543, + "step": 415 + }, + { + "ce_ib": 14.0098237991333, + "ce_orig": 1.1979413032531738, + "epoch": 0.1196347688546984, + "kl_loss": 6.7593770027160645, + "loss_ib": 0.08160359412431717, + "step": 416 + }, + { + "ce_ib": 15.960185050964355, + "ce_orig": 1.1578559875488281, + "epoch": 0.1196347688546984, + "kl_loss": 6.498690128326416, + "loss_ib": 0.08094708621501923, + "step": 416 + }, + { + "ce_ib": 17.35699462890625, + "ce_orig": 1.3880764245986938, + "epoch": 0.1196347688546984, + "kl_loss": 6.918699264526367, + "loss_ib": 0.0865439847111702, + "step": 416 + }, + { + "ce_ib": 10.74183177947998, + "ce_orig": 0.8996087908744812, + "epoch": 0.1196347688546984, + "kl_loss": 6.894655227661133, + "loss_ib": 0.07968838512897491, + "step": 416 + }, + { + "ce_ib": 12.287108421325684, + "ce_orig": 0.556649386882782, + "epoch": 0.11992235243367604, + "kl_loss": 6.701784610748291, + "loss_ib": 0.07930494844913483, + "step": 417 + }, + { + "ce_ib": 16.17057991027832, + "ce_orig": 1.4443365335464478, + "epoch": 0.11992235243367604, + "kl_loss": 7.298962116241455, + "loss_ib": 0.08916020393371582, + "step": 417 + }, + { + "ce_ib": 14.572080612182617, + "ce_orig": 1.5205786228179932, + "epoch": 0.11992235243367604, + "kl_loss": 7.550329208374023, + "loss_ib": 0.09007536619901657, + "step": 417 + }, + { + "ce_ib": 16.126951217651367, + "ce_orig": 1.6121647357940674, + "epoch": 0.11992235243367604, + "kl_loss": 6.497934341430664, + "loss_ib": 0.0811062902212143, + "step": 417 + }, + { + "ce_ib": 11.263349533081055, + "ce_orig": 0.5618718266487122, + "epoch": 0.12020993601265367, + "kl_loss": 7.025567054748535, + "loss_ib": 0.08151902258396149, + "step": 418 + }, + { + "ce_ib": 17.31966781616211, + "ce_orig": 0.9057826399803162, + "epoch": 0.12020993601265367, + "kl_loss": 6.166810989379883, + "loss_ib": 0.07898777723312378, + "step": 418 + }, + { + "ce_ib": 14.19983959197998, + "ce_orig": 0.42357009649276733, + "epoch": 0.12020993601265367, + "kl_loss": 7.017156600952148, + "loss_ib": 0.0843714028596878, + "step": 418 + }, + { + "ce_ib": 12.986461639404297, + "ce_orig": 0.9464898705482483, + "epoch": 0.12020993601265367, + "kl_loss": 7.11696195602417, + "loss_ib": 0.0841560810804367, + "step": 418 + }, + { + "ce_ib": 5.582565784454346, + "ce_orig": 0.16826413571834564, + "epoch": 0.12049751959163132, + "kl_loss": 4.485371112823486, + "loss_ib": 0.05043627694249153, + "step": 419 + }, + { + "ce_ib": 10.89441204071045, + "ce_orig": 0.6798584461212158, + "epoch": 0.12049751959163132, + "kl_loss": 7.0565643310546875, + "loss_ib": 0.08146005123853683, + "step": 419 + }, + { + "ce_ib": 12.65652847290039, + "ce_orig": 0.6649844646453857, + "epoch": 0.12049751959163132, + "kl_loss": 6.781154632568359, + "loss_ib": 0.0804680734872818, + "step": 419 + }, + { + "ce_ib": 14.701452255249023, + "ce_orig": 1.5013635158538818, + "epoch": 0.12049751959163132, + "kl_loss": 6.757889747619629, + "loss_ib": 0.08228034526109695, + "step": 419 + }, + { + "epoch": 0.12078510317060896, + "grad_norm": 0.3697260916233063, + "learning_rate": 9.99759096088519e-06, + "loss": 1.025, + "step": 420 + }, + { + "ce_ib": 15.822488784790039, + "ce_orig": 1.4374024868011475, + "epoch": 0.12078510317060896, + "kl_loss": 6.555868148803711, + "loss_ib": 0.0813811644911766, + "step": 420 + }, + { + "ce_ib": 16.061962127685547, + "ce_orig": 1.07735013961792, + "epoch": 0.12078510317060896, + "kl_loss": 7.0854363441467285, + "loss_ib": 0.08691632002592087, + "step": 420 + }, + { + "ce_ib": 11.24513053894043, + "ce_orig": 0.944068968296051, + "epoch": 0.12078510317060896, + "kl_loss": 7.07620096206665, + "loss_ib": 0.08200713992118835, + "step": 420 + }, + { + "ce_ib": 9.319937705993652, + "ce_orig": 0.4624161422252655, + "epoch": 0.12078510317060896, + "kl_loss": 6.860795974731445, + "loss_ib": 0.07792789489030838, + "step": 420 + }, + { + "ce_ib": 14.560148239135742, + "ce_orig": 0.9780954718589783, + "epoch": 0.1210726867495866, + "kl_loss": 7.158355712890625, + "loss_ib": 0.08614370226860046, + "step": 421 + }, + { + "ce_ib": 11.736210823059082, + "ce_orig": 0.8180287480354309, + "epoch": 0.1210726867495866, + "kl_loss": 6.502464771270752, + "loss_ib": 0.07676085829734802, + "step": 421 + }, + { + "ce_ib": 11.860121726989746, + "ce_orig": 0.7424320578575134, + "epoch": 0.1210726867495866, + "kl_loss": 6.852118968963623, + "loss_ib": 0.08038130402565002, + "step": 421 + }, + { + "ce_ib": 8.209848403930664, + "ce_orig": 0.38280463218688965, + "epoch": 0.1210726867495866, + "kl_loss": 5.902113437652588, + "loss_ib": 0.06723098456859589, + "step": 421 + }, + { + "ce_ib": 11.078797340393066, + "ce_orig": 1.1233412027359009, + "epoch": 0.12136027032856424, + "kl_loss": 7.3105268478393555, + "loss_ib": 0.08418406546115875, + "step": 422 + }, + { + "ce_ib": 11.66649055480957, + "ce_orig": 0.6671274900436401, + "epoch": 0.12136027032856424, + "kl_loss": 7.258861064910889, + "loss_ib": 0.08425509929656982, + "step": 422 + }, + { + "ce_ib": 12.51806354522705, + "ce_orig": 0.8585453033447266, + "epoch": 0.12136027032856424, + "kl_loss": 4.624774932861328, + "loss_ib": 0.05876580998301506, + "step": 422 + }, + { + "ce_ib": 8.197091102600098, + "ce_orig": 0.7487708330154419, + "epoch": 0.12136027032856424, + "kl_loss": 7.244273662567139, + "loss_ib": 0.08063982427120209, + "step": 422 + }, + { + "ce_ib": 13.192282676696777, + "ce_orig": 0.7279910445213318, + "epoch": 0.12164785390754188, + "kl_loss": 6.454935073852539, + "loss_ib": 0.07774163037538528, + "step": 423 + }, + { + "ce_ib": 14.898761749267578, + "ce_orig": 0.8458772301673889, + "epoch": 0.12164785390754188, + "kl_loss": 6.546128273010254, + "loss_ib": 0.0803600400686264, + "step": 423 + }, + { + "ce_ib": 9.523457527160645, + "ce_orig": 0.5388420224189758, + "epoch": 0.12164785390754188, + "kl_loss": 6.858234405517578, + "loss_ib": 0.07810579985380173, + "step": 423 + }, + { + "ce_ib": 15.374958992004395, + "ce_orig": 0.9829406142234802, + "epoch": 0.12164785390754188, + "kl_loss": 7.067386627197266, + "loss_ib": 0.08604881912469864, + "step": 423 + }, + { + "ce_ib": 14.404019355773926, + "ce_orig": 0.7524064779281616, + "epoch": 0.12193543748651951, + "kl_loss": 6.316936492919922, + "loss_ib": 0.07757338136434555, + "step": 424 + }, + { + "ce_ib": 12.131924629211426, + "ce_orig": 0.5756824612617493, + "epoch": 0.12193543748651951, + "kl_loss": 6.499780654907227, + "loss_ib": 0.07712972909212112, + "step": 424 + }, + { + "ce_ib": 14.279351234436035, + "ce_orig": 1.1739262342453003, + "epoch": 0.12193543748651951, + "kl_loss": 6.669313430786133, + "loss_ib": 0.08097247779369354, + "step": 424 + }, + { + "ce_ib": 11.006293296813965, + "ce_orig": 0.9451432228088379, + "epoch": 0.12193543748651951, + "kl_loss": 6.912232875823975, + "loss_ib": 0.08012861758470535, + "step": 424 + }, + { + "epoch": 0.12222302106549716, + "grad_norm": 0.3377140164375305, + "learning_rate": 9.9973440562389e-06, + "loss": 0.9795, + "step": 425 + }, + { + "ce_ib": 9.749797821044922, + "ce_orig": 0.4160507023334503, + "epoch": 0.12222302106549716, + "kl_loss": 6.430499076843262, + "loss_ib": 0.0740547850728035, + "step": 425 + }, + { + "ce_ib": 13.227080345153809, + "ce_orig": 0.7790858745574951, + "epoch": 0.12222302106549716, + "kl_loss": 6.962891578674316, + "loss_ib": 0.08285599201917648, + "step": 425 + }, + { + "ce_ib": 11.88380241394043, + "ce_orig": 0.8634589314460754, + "epoch": 0.12222302106549716, + "kl_loss": 6.578843593597412, + "loss_ib": 0.07767223566770554, + "step": 425 + }, + { + "ce_ib": 11.239786148071289, + "ce_orig": 0.6265932321548462, + "epoch": 0.12222302106549716, + "kl_loss": 6.705416202545166, + "loss_ib": 0.07829394936561584, + "step": 425 + }, + { + "ce_ib": 13.436622619628906, + "ce_orig": 0.9564598202705383, + "epoch": 0.1225106046444748, + "kl_loss": 6.993147850036621, + "loss_ib": 0.08336810022592545, + "step": 426 + }, + { + "ce_ib": 10.594719886779785, + "ce_orig": 0.2862907946109772, + "epoch": 0.1225106046444748, + "kl_loss": 6.47970724105835, + "loss_ib": 0.07539179176092148, + "step": 426 + }, + { + "ce_ib": 11.986897468566895, + "ce_orig": 0.8417707681655884, + "epoch": 0.1225106046444748, + "kl_loss": 6.577683448791504, + "loss_ib": 0.07776372879743576, + "step": 426 + }, + { + "ce_ib": 12.561016082763672, + "ce_orig": 0.631502091884613, + "epoch": 0.1225106046444748, + "kl_loss": 6.709961891174316, + "loss_ib": 0.07966063171625137, + "step": 426 + }, + { + "ce_ib": 11.712442398071289, + "ce_orig": 0.6886617541313171, + "epoch": 0.12279818822345244, + "kl_loss": 5.167166709899902, + "loss_ib": 0.06338410824537277, + "step": 427 + }, + { + "ce_ib": 16.66871452331543, + "ce_orig": 1.3375333547592163, + "epoch": 0.12279818822345244, + "kl_loss": 6.451887130737305, + "loss_ib": 0.08118758350610733, + "step": 427 + }, + { + "ce_ib": 16.5571346282959, + "ce_orig": 0.8830122351646423, + "epoch": 0.12279818822345244, + "kl_loss": 6.616879463195801, + "loss_ib": 0.08272592723369598, + "step": 427 + }, + { + "ce_ib": 14.535783767700195, + "ce_orig": 1.0712122917175293, + "epoch": 0.12279818822345244, + "kl_loss": 7.098201274871826, + "loss_ib": 0.08551779389381409, + "step": 427 + }, + { + "ce_ib": 13.506948471069336, + "ce_orig": 0.8588005304336548, + "epoch": 0.12308577180243008, + "kl_loss": 6.7285261154174805, + "loss_ib": 0.08079220354557037, + "step": 428 + }, + { + "ce_ib": 17.66691780090332, + "ce_orig": 1.5079838037490845, + "epoch": 0.12308577180243008, + "kl_loss": 5.905591011047363, + "loss_ib": 0.07672282308340073, + "step": 428 + }, + { + "ce_ib": 13.11380672454834, + "ce_orig": 0.8807310461997986, + "epoch": 0.12308577180243008, + "kl_loss": 6.711277008056641, + "loss_ib": 0.08022657036781311, + "step": 428 + }, + { + "ce_ib": 16.873750686645508, + "ce_orig": 1.461756944656372, + "epoch": 0.12308577180243008, + "kl_loss": 6.9212870597839355, + "loss_ib": 0.08608661592006683, + "step": 428 + }, + { + "ce_ib": 8.633293151855469, + "ce_orig": 0.3702143728733063, + "epoch": 0.12337335538140771, + "kl_loss": 6.651648998260498, + "loss_ib": 0.0751497820019722, + "step": 429 + }, + { + "ce_ib": 12.873100280761719, + "ce_orig": 0.8258522152900696, + "epoch": 0.12337335538140771, + "kl_loss": 6.824582099914551, + "loss_ib": 0.08111891895532608, + "step": 429 + }, + { + "ce_ib": 13.301980972290039, + "ce_orig": 1.0934252738952637, + "epoch": 0.12337335538140771, + "kl_loss": 6.730596542358398, + "loss_ib": 0.08060794323682785, + "step": 429 + }, + { + "ce_ib": 9.3226318359375, + "ce_orig": 0.567753255367279, + "epoch": 0.12337335538140771, + "kl_loss": 6.865760326385498, + "loss_ib": 0.07798023521900177, + "step": 429 + }, + { + "epoch": 0.12366093896038537, + "grad_norm": 0.2692417800426483, + "learning_rate": 9.99708511183087e-06, + "loss": 1.0201, + "step": 430 + }, + { + "ce_ib": 13.1838960647583, + "ce_orig": 0.8950438499450684, + "epoch": 0.12366093896038537, + "kl_loss": 6.7184343338012695, + "loss_ib": 0.08036824315786362, + "step": 430 + }, + { + "ce_ib": 14.154587745666504, + "ce_orig": 1.36903977394104, + "epoch": 0.12366093896038537, + "kl_loss": 7.069368839263916, + "loss_ib": 0.08484827727079391, + "step": 430 + }, + { + "ce_ib": 19.739904403686523, + "ce_orig": 2.018660068511963, + "epoch": 0.12366093896038537, + "kl_loss": 6.694252967834473, + "loss_ib": 0.08668243139982224, + "step": 430 + }, + { + "ce_ib": 12.907319068908691, + "ce_orig": 1.3039312362670898, + "epoch": 0.12366093896038537, + "kl_loss": 6.895936012268066, + "loss_ib": 0.08186668157577515, + "step": 430 + }, + { + "ce_ib": 12.085116386413574, + "ce_orig": 0.8512650728225708, + "epoch": 0.123948522539363, + "kl_loss": 6.91609001159668, + "loss_ib": 0.08124601095914841, + "step": 431 + }, + { + "ce_ib": 13.673727989196777, + "ce_orig": 1.095442295074463, + "epoch": 0.123948522539363, + "kl_loss": 6.370013236999512, + "loss_ib": 0.07737386226654053, + "step": 431 + }, + { + "ce_ib": 12.386370658874512, + "ce_orig": 0.9683983325958252, + "epoch": 0.123948522539363, + "kl_loss": 6.190813064575195, + "loss_ib": 0.07429450005292892, + "step": 431 + }, + { + "ce_ib": 12.50558090209961, + "ce_orig": 0.49803832173347473, + "epoch": 0.123948522539363, + "kl_loss": 6.456753730773926, + "loss_ib": 0.0770731121301651, + "step": 431 + }, + { + "ce_ib": 17.67017936706543, + "ce_orig": 1.4665132761001587, + "epoch": 0.12423610611834064, + "kl_loss": 6.8333563804626465, + "loss_ib": 0.08600374311208725, + "step": 432 + }, + { + "ce_ib": 15.355440139770508, + "ce_orig": 1.0783402919769287, + "epoch": 0.12423610611834064, + "kl_loss": 6.24898624420166, + "loss_ib": 0.07784529775381088, + "step": 432 + }, + { + "ce_ib": 12.970996856689453, + "ce_orig": 0.908065676689148, + "epoch": 0.12423610611834064, + "kl_loss": 6.597336769104004, + "loss_ib": 0.0789443626999855, + "step": 432 + }, + { + "ce_ib": 10.632279396057129, + "ce_orig": 0.8153582215309143, + "epoch": 0.12423610611834064, + "kl_loss": 6.526000022888184, + "loss_ib": 0.07589227706193924, + "step": 432 + }, + { + "ce_ib": 15.43746566772461, + "ce_orig": 0.9074482917785645, + "epoch": 0.12452368969731828, + "kl_loss": 6.491483688354492, + "loss_ib": 0.0803523063659668, + "step": 433 + }, + { + "ce_ib": 8.455195426940918, + "ce_orig": 0.6100387573242188, + "epoch": 0.12452368969731828, + "kl_loss": 6.833198070526123, + "loss_ib": 0.07678717374801636, + "step": 433 + }, + { + "ce_ib": 12.123833656311035, + "ce_orig": 0.36186474561691284, + "epoch": 0.12452368969731828, + "kl_loss": 6.947832107543945, + "loss_ib": 0.08160214871168137, + "step": 433 + }, + { + "ce_ib": 10.345142364501953, + "ce_orig": 0.6522800326347351, + "epoch": 0.12452368969731828, + "kl_loss": 6.990741729736328, + "loss_ib": 0.08025255799293518, + "step": 433 + }, + { + "ce_ib": 16.296939849853516, + "ce_orig": 0.889448344707489, + "epoch": 0.12481127327629592, + "kl_loss": 6.41514778137207, + "loss_ib": 0.08044841885566711, + "step": 434 + }, + { + "ce_ib": 11.671658515930176, + "ce_orig": 0.5737780332565308, + "epoch": 0.12481127327629592, + "kl_loss": 6.855953693389893, + "loss_ib": 0.0802311971783638, + "step": 434 + }, + { + "ce_ib": 14.455811500549316, + "ce_orig": 0.9031121134757996, + "epoch": 0.12481127327629592, + "kl_loss": 6.662840843200684, + "loss_ib": 0.0810842216014862, + "step": 434 + }, + { + "ce_ib": 11.030969619750977, + "ce_orig": 0.5072352290153503, + "epoch": 0.12481127327629592, + "kl_loss": 6.787086486816406, + "loss_ib": 0.07890183478593826, + "step": 434 + }, + { + "epoch": 0.12509885685527355, + "grad_norm": 0.3564906418323517, + "learning_rate": 9.99681412828496e-06, + "loss": 0.9509, + "step": 435 + }, + { + "ce_ib": 13.964662551879883, + "ce_orig": 0.7778974175453186, + "epoch": 0.12509885685527355, + "kl_loss": 6.725362777709961, + "loss_ib": 0.08121828734874725, + "step": 435 + }, + { + "ce_ib": 11.405627250671387, + "ce_orig": 0.7385088801383972, + "epoch": 0.12509885685527355, + "kl_loss": 6.814925193786621, + "loss_ib": 0.07955487817525864, + "step": 435 + }, + { + "ce_ib": 9.46563720703125, + "ce_orig": 0.48979493975639343, + "epoch": 0.12509885685527355, + "kl_loss": 6.513537883758545, + "loss_ib": 0.07460101693868637, + "step": 435 + }, + { + "ce_ib": 11.945999145507812, + "ce_orig": 1.1584625244140625, + "epoch": 0.12509885685527355, + "kl_loss": 6.719861030578613, + "loss_ib": 0.07914461195468903, + "step": 435 + }, + { + "ce_ib": 10.591259002685547, + "ce_orig": 1.0169495344161987, + "epoch": 0.1253864404342512, + "kl_loss": 6.246793746948242, + "loss_ib": 0.07305919378995895, + "step": 436 + }, + { + "ce_ib": 11.989269256591797, + "ce_orig": 0.7858111262321472, + "epoch": 0.1253864404342512, + "kl_loss": 6.57621431350708, + "loss_ib": 0.07775141298770905, + "step": 436 + }, + { + "ce_ib": 19.393007278442383, + "ce_orig": 1.7850255966186523, + "epoch": 0.1253864404342512, + "kl_loss": 6.306278228759766, + "loss_ib": 0.08245578408241272, + "step": 436 + }, + { + "ce_ib": 12.036297798156738, + "ce_orig": 0.7531123161315918, + "epoch": 0.1253864404342512, + "kl_loss": 6.1202850341796875, + "loss_ib": 0.07323914766311646, + "step": 436 + }, + { + "ce_ib": 10.144059181213379, + "ce_orig": 0.3747836649417877, + "epoch": 0.12567402401322886, + "kl_loss": 4.424054145812988, + "loss_ib": 0.05438460409641266, + "step": 437 + }, + { + "ce_ib": 14.241146087646484, + "ce_orig": 1.154929280281067, + "epoch": 0.12567402401322886, + "kl_loss": 6.789527893066406, + "loss_ib": 0.08213642239570618, + "step": 437 + }, + { + "ce_ib": 10.732177734375, + "ce_orig": 0.37166017293930054, + "epoch": 0.12567402401322886, + "kl_loss": 6.624038219451904, + "loss_ib": 0.07697255909442902, + "step": 437 + }, + { + "ce_ib": 11.875864028930664, + "ce_orig": 0.5969848036766052, + "epoch": 0.12567402401322886, + "kl_loss": 6.518521308898926, + "loss_ib": 0.07706107199192047, + "step": 437 + }, + { + "ce_ib": 10.759546279907227, + "ce_orig": 0.5922636985778809, + "epoch": 0.12596160759220648, + "kl_loss": 6.725497245788574, + "loss_ib": 0.07801451534032822, + "step": 438 + }, + { + "ce_ib": 15.553537368774414, + "ce_orig": 0.734350323677063, + "epoch": 0.12596160759220648, + "kl_loss": 6.394842147827148, + "loss_ib": 0.07950195670127869, + "step": 438 + }, + { + "ce_ib": 6.804005146026611, + "ce_orig": 0.3962157666683197, + "epoch": 0.12596160759220648, + "kl_loss": 5.0119147300720215, + "loss_ib": 0.05692315101623535, + "step": 438 + }, + { + "ce_ib": 12.179630279541016, + "ce_orig": 0.42266571521759033, + "epoch": 0.12596160759220648, + "kl_loss": 6.785035133361816, + "loss_ib": 0.08002997934818268, + "step": 438 + }, + { + "ce_ib": 11.9126558303833, + "ce_orig": 0.6911407709121704, + "epoch": 0.12624919117118413, + "kl_loss": 5.950160026550293, + "loss_ib": 0.07141425460577011, + "step": 439 + }, + { + "ce_ib": 13.094669342041016, + "ce_orig": 0.827157199382782, + "epoch": 0.12624919117118413, + "kl_loss": 6.5986528396606445, + "loss_ib": 0.07908119261264801, + "step": 439 + }, + { + "ce_ib": 9.707082748413086, + "ce_orig": 0.6359635591506958, + "epoch": 0.12624919117118413, + "kl_loss": 5.76313591003418, + "loss_ib": 0.06733844429254532, + "step": 439 + }, + { + "ce_ib": 14.719921112060547, + "ce_orig": 0.9809554815292358, + "epoch": 0.12624919117118413, + "kl_loss": 6.400526523590088, + "loss_ib": 0.0787251815199852, + "step": 439 + }, + { + "epoch": 0.12653677475016176, + "grad_norm": 0.3794838488101959, + "learning_rate": 9.996531106254027e-06, + "loss": 0.9376, + "step": 440 + }, + { + "ce_ib": 12.931950569152832, + "ce_orig": 0.9358404278755188, + "epoch": 0.12653677475016176, + "kl_loss": 6.613272666931152, + "loss_ib": 0.07906467467546463, + "step": 440 + }, + { + "ce_ib": 13.42741584777832, + "ce_orig": 0.7416799068450928, + "epoch": 0.12653677475016176, + "kl_loss": 6.373296737670898, + "loss_ib": 0.07716038078069687, + "step": 440 + }, + { + "ce_ib": 12.632997512817383, + "ce_orig": 1.1645599603652954, + "epoch": 0.12653677475016176, + "kl_loss": 6.401862144470215, + "loss_ib": 0.07665161788463593, + "step": 440 + }, + { + "ce_ib": 16.439931869506836, + "ce_orig": 1.5734604597091675, + "epoch": 0.12653677475016176, + "kl_loss": 6.544001579284668, + "loss_ib": 0.08187995105981827, + "step": 440 + }, + { + "ce_ib": 13.676290512084961, + "ce_orig": 1.0207133293151855, + "epoch": 0.1268243583291394, + "kl_loss": 6.34605073928833, + "loss_ib": 0.0771367996931076, + "step": 441 + }, + { + "ce_ib": 10.394545555114746, + "ce_orig": 0.721448540687561, + "epoch": 0.1268243583291394, + "kl_loss": 6.206020355224609, + "loss_ib": 0.07245474308729172, + "step": 441 + }, + { + "ce_ib": 11.452078819274902, + "ce_orig": 1.0113741159439087, + "epoch": 0.1268243583291394, + "kl_loss": 6.649386405944824, + "loss_ib": 0.07794594019651413, + "step": 441 + }, + { + "ce_ib": 13.160637855529785, + "ce_orig": 0.8914715647697449, + "epoch": 0.1268243583291394, + "kl_loss": 6.297116279602051, + "loss_ib": 0.07613179832696915, + "step": 441 + }, + { + "ce_ib": 13.181857109069824, + "ce_orig": 1.0282145738601685, + "epoch": 0.12711194190811706, + "kl_loss": 6.352758884429932, + "loss_ib": 0.07670944184064865, + "step": 442 + }, + { + "ce_ib": 10.155783653259277, + "ce_orig": 0.7708104252815247, + "epoch": 0.12711194190811706, + "kl_loss": 6.537982940673828, + "loss_ib": 0.0755356103181839, + "step": 442 + }, + { + "ce_ib": 11.089113235473633, + "ce_orig": 0.761037290096283, + "epoch": 0.12711194190811706, + "kl_loss": 6.812819004058838, + "loss_ib": 0.07921729981899261, + "step": 442 + }, + { + "ce_ib": 13.044452667236328, + "ce_orig": 0.8526286482810974, + "epoch": 0.12711194190811706, + "kl_loss": 6.086942672729492, + "loss_ib": 0.07391387969255447, + "step": 442 + }, + { + "ce_ib": 14.216473579406738, + "ce_orig": 1.2142722606658936, + "epoch": 0.12739952548709468, + "kl_loss": 6.530941486358643, + "loss_ib": 0.079525887966156, + "step": 443 + }, + { + "ce_ib": 16.07403564453125, + "ce_orig": 1.820540428161621, + "epoch": 0.12739952548709468, + "kl_loss": 6.171257495880127, + "loss_ib": 0.07778660953044891, + "step": 443 + }, + { + "ce_ib": 15.684346199035645, + "ce_orig": 1.0308736562728882, + "epoch": 0.12739952548709468, + "kl_loss": 6.518500328063965, + "loss_ib": 0.08086934685707092, + "step": 443 + }, + { + "ce_ib": 10.408639907836914, + "ce_orig": 0.4763404130935669, + "epoch": 0.12739952548709468, + "kl_loss": 5.605975151062012, + "loss_ib": 0.06646838784217834, + "step": 443 + }, + { + "ce_ib": 12.710926055908203, + "ce_orig": 0.828603208065033, + "epoch": 0.12768710906607234, + "kl_loss": 6.303215026855469, + "loss_ib": 0.07574307173490524, + "step": 444 + }, + { + "ce_ib": 10.51518440246582, + "ce_orig": 0.46696940064430237, + "epoch": 0.12768710906607234, + "kl_loss": 6.153438568115234, + "loss_ib": 0.0720495656132698, + "step": 444 + }, + { + "ce_ib": 8.491880416870117, + "ce_orig": 0.7530315518379211, + "epoch": 0.12768710906607234, + "kl_loss": 6.541855812072754, + "loss_ib": 0.0739104375243187, + "step": 444 + }, + { + "ce_ib": 16.409770965576172, + "ce_orig": 1.3153358697891235, + "epoch": 0.12768710906607234, + "kl_loss": 6.441512107849121, + "loss_ib": 0.08082488924264908, + "step": 444 + }, + { + "epoch": 0.12797469264504996, + "grad_norm": 0.27752232551574707, + "learning_rate": 9.996236046419941e-06, + "loss": 0.993, + "step": 445 + }, + { + "ce_ib": 13.984725952148438, + "ce_orig": 0.8543322086334229, + "epoch": 0.12797469264504996, + "kl_loss": 5.812187194824219, + "loss_ib": 0.07210659980773926, + "step": 445 + }, + { + "ce_ib": 13.997330665588379, + "ce_orig": 0.5448931455612183, + "epoch": 0.12797469264504996, + "kl_loss": 6.332775592803955, + "loss_ib": 0.07732508331537247, + "step": 445 + }, + { + "ce_ib": 12.42682933807373, + "ce_orig": 0.7178173065185547, + "epoch": 0.12797469264504996, + "kl_loss": 6.424311637878418, + "loss_ib": 0.07666994631290436, + "step": 445 + }, + { + "ce_ib": 9.565461158752441, + "ce_orig": 0.8134915232658386, + "epoch": 0.12797469264504996, + "kl_loss": 6.6445112228393555, + "loss_ib": 0.0760105699300766, + "step": 445 + }, + { + "ce_ib": 8.924689292907715, + "ce_orig": 0.5386297106742859, + "epoch": 0.1282622762240276, + "kl_loss": 6.35334587097168, + "loss_ib": 0.07245814800262451, + "step": 446 + }, + { + "ce_ib": 16.734355926513672, + "ce_orig": 1.0333242416381836, + "epoch": 0.1282622762240276, + "kl_loss": 6.456616401672363, + "loss_ib": 0.0813005119562149, + "step": 446 + }, + { + "ce_ib": 11.987763404846191, + "ce_orig": 1.0803635120391846, + "epoch": 0.1282622762240276, + "kl_loss": 6.508333206176758, + "loss_ib": 0.07707109302282333, + "step": 446 + }, + { + "ce_ib": 10.953421592712402, + "ce_orig": 0.6203237175941467, + "epoch": 0.1282622762240276, + "kl_loss": 6.250512599945068, + "loss_ib": 0.07345854490995407, + "step": 446 + }, + { + "ce_ib": 9.890134811401367, + "ce_orig": 0.7359150648117065, + "epoch": 0.12854985980300526, + "kl_loss": 6.235504627227783, + "loss_ib": 0.07224518060684204, + "step": 447 + }, + { + "ce_ib": 14.127108573913574, + "ce_orig": 0.6828675866127014, + "epoch": 0.12854985980300526, + "kl_loss": 6.255981922149658, + "loss_ib": 0.07668692618608475, + "step": 447 + }, + { + "ce_ib": 7.48207426071167, + "ce_orig": 0.6292188167572021, + "epoch": 0.12854985980300526, + "kl_loss": 6.336426734924316, + "loss_ib": 0.07084634155035019, + "step": 447 + }, + { + "ce_ib": 14.48934268951416, + "ce_orig": 0.9720964431762695, + "epoch": 0.12854985980300526, + "kl_loss": 6.416128158569336, + "loss_ib": 0.07865062355995178, + "step": 447 + }, + { + "ce_ib": 9.954630851745605, + "ce_orig": 0.7827691435813904, + "epoch": 0.12883744338198289, + "kl_loss": 6.488411903381348, + "loss_ib": 0.07483874261379242, + "step": 448 + }, + { + "ce_ib": 9.191707611083984, + "ce_orig": 0.7463720440864563, + "epoch": 0.12883744338198289, + "kl_loss": 6.294938087463379, + "loss_ib": 0.07214108854532242, + "step": 448 + }, + { + "ce_ib": 11.25207805633545, + "ce_orig": 0.8659082651138306, + "epoch": 0.12883744338198289, + "kl_loss": 6.527953147888184, + "loss_ib": 0.07653161138296127, + "step": 448 + }, + { + "ce_ib": 13.492358207702637, + "ce_orig": 0.8569541573524475, + "epoch": 0.12883744338198289, + "kl_loss": 6.035775661468506, + "loss_ib": 0.073850117623806, + "step": 448 + }, + { + "ce_ib": 7.855426788330078, + "ce_orig": 0.2511903643608093, + "epoch": 0.12912502696096054, + "kl_loss": 4.307304859161377, + "loss_ib": 0.050928473472595215, + "step": 449 + }, + { + "ce_ib": 15.168793678283691, + "ce_orig": 0.9671982526779175, + "epoch": 0.12912502696096054, + "kl_loss": 6.6048150062561035, + "loss_ib": 0.08121694624423981, + "step": 449 + }, + { + "ce_ib": 12.226943969726562, + "ce_orig": 0.47483983635902405, + "epoch": 0.12912502696096054, + "kl_loss": 6.506505489349365, + "loss_ib": 0.07729199528694153, + "step": 449 + }, + { + "ce_ib": 10.032635688781738, + "ce_orig": 0.9576376676559448, + "epoch": 0.12912502696096054, + "kl_loss": 6.404877662658691, + "loss_ib": 0.0740814059972763, + "step": 449 + }, + { + "epoch": 0.12941261053993816, + "grad_norm": 0.2948267161846161, + "learning_rate": 9.995928949493568e-06, + "loss": 0.9556, + "step": 450 + }, + { + "ce_ib": 13.594502449035645, + "ce_orig": 0.7634487748146057, + "epoch": 0.12941261053993816, + "kl_loss": 6.214456558227539, + "loss_ib": 0.0757390707731247, + "step": 450 + }, + { + "ce_ib": 11.100218772888184, + "ce_orig": 0.5212127566337585, + "epoch": 0.12941261053993816, + "kl_loss": 6.206829071044922, + "loss_ib": 0.07316850870847702, + "step": 450 + }, + { + "ce_ib": 10.686315536499023, + "ce_orig": 0.7214837670326233, + "epoch": 0.12941261053993816, + "kl_loss": 6.353018760681152, + "loss_ib": 0.07421649992465973, + "step": 450 + }, + { + "ce_ib": 10.190530776977539, + "ce_orig": 0.8089180588722229, + "epoch": 0.12941261053993816, + "kl_loss": 6.573402404785156, + "loss_ib": 0.07592455297708511, + "step": 450 + }, + { + "ce_ib": 9.33697509765625, + "ce_orig": 0.5962152481079102, + "epoch": 0.1297001941189158, + "kl_loss": 6.153956413269043, + "loss_ib": 0.07087653875350952, + "step": 451 + }, + { + "ce_ib": 11.186933517456055, + "ce_orig": 0.6141796708106995, + "epoch": 0.1297001941189158, + "kl_loss": 6.5120697021484375, + "loss_ib": 0.07630763202905655, + "step": 451 + }, + { + "ce_ib": 14.915994644165039, + "ce_orig": 0.9922129511833191, + "epoch": 0.1297001941189158, + "kl_loss": 5.893243312835693, + "loss_ib": 0.0738484263420105, + "step": 451 + }, + { + "ce_ib": 11.000643730163574, + "ce_orig": 0.7535352110862732, + "epoch": 0.1297001941189158, + "kl_loss": 6.5356645584106445, + "loss_ib": 0.07635729014873505, + "step": 451 + }, + { + "ce_ib": 10.349251747131348, + "ce_orig": 0.5634931921958923, + "epoch": 0.12998777769789346, + "kl_loss": 6.1491804122924805, + "loss_ib": 0.0718410536646843, + "step": 452 + }, + { + "ce_ib": 14.159017562866211, + "ce_orig": 1.0384783744812012, + "epoch": 0.12998777769789346, + "kl_loss": 6.039100646972656, + "loss_ib": 0.07455001771450043, + "step": 452 + }, + { + "ce_ib": 9.425822257995605, + "ce_orig": 0.7196267247200012, + "epoch": 0.12998777769789346, + "kl_loss": 6.463262557983398, + "loss_ib": 0.07405844330787659, + "step": 452 + }, + { + "ce_ib": 13.532403945922852, + "ce_orig": 0.748589277267456, + "epoch": 0.12998777769789346, + "kl_loss": 6.038760662078857, + "loss_ib": 0.07392001152038574, + "step": 452 + }, + { + "ce_ib": 10.410351753234863, + "ce_orig": 0.5488285422325134, + "epoch": 0.1302753612768711, + "kl_loss": 6.22846794128418, + "loss_ib": 0.07269503176212311, + "step": 453 + }, + { + "ce_ib": 12.851318359375, + "ce_orig": 0.9907112717628479, + "epoch": 0.1302753612768711, + "kl_loss": 6.350039482116699, + "loss_ib": 0.07635170966386795, + "step": 453 + }, + { + "ce_ib": 13.529624938964844, + "ce_orig": 0.9963613152503967, + "epoch": 0.1302753612768711, + "kl_loss": 6.477352142333984, + "loss_ib": 0.07830314338207245, + "step": 453 + }, + { + "ce_ib": 15.717424392700195, + "ce_orig": 0.9456937313079834, + "epoch": 0.1302753612768711, + "kl_loss": 5.184221267700195, + "loss_ib": 0.0675596371293068, + "step": 453 + }, + { + "ce_ib": 11.294244766235352, + "ce_orig": 0.7741903066635132, + "epoch": 0.13056294485584874, + "kl_loss": 6.089672088623047, + "loss_ib": 0.07219096273183823, + "step": 454 + }, + { + "ce_ib": 18.31461524963379, + "ce_orig": 1.1526209115982056, + "epoch": 0.13056294485584874, + "kl_loss": 5.830255508422852, + "loss_ib": 0.07661716639995575, + "step": 454 + }, + { + "ce_ib": 10.833757400512695, + "ce_orig": 0.6415656805038452, + "epoch": 0.13056294485584874, + "kl_loss": 6.350003242492676, + "loss_ib": 0.0743337869644165, + "step": 454 + }, + { + "ce_ib": 13.974409103393555, + "ce_orig": 1.0648647546768188, + "epoch": 0.13056294485584874, + "kl_loss": 5.880853652954102, + "loss_ib": 0.07278294116258621, + "step": 454 + }, + { + "epoch": 0.13085052843482636, + "grad_norm": 0.3984464406967163, + "learning_rate": 9.995609816214774e-06, + "loss": 0.9742, + "step": 455 + }, + { + "ce_ib": 10.744162559509277, + "ce_orig": 0.9050172567367554, + "epoch": 0.13085052843482636, + "kl_loss": 6.5762224197387695, + "loss_ib": 0.07650638371706009, + "step": 455 + }, + { + "ce_ib": 8.354386329650879, + "ce_orig": 0.7557839155197144, + "epoch": 0.13085052843482636, + "kl_loss": 5.9736809730529785, + "loss_ib": 0.06809119880199432, + "step": 455 + }, + { + "ce_ib": 10.220728874206543, + "ce_orig": 0.536035418510437, + "epoch": 0.13085052843482636, + "kl_loss": 6.033191680908203, + "loss_ib": 0.07055263966321945, + "step": 455 + }, + { + "ce_ib": 18.092269897460938, + "ce_orig": 1.7775410413742065, + "epoch": 0.13085052843482636, + "kl_loss": 6.227479457855225, + "loss_ib": 0.0803670659661293, + "step": 455 + }, + { + "ce_ib": 11.44895076751709, + "ce_orig": 0.6709085702896118, + "epoch": 0.13113811201380401, + "kl_loss": 5.662714004516602, + "loss_ib": 0.06807608902454376, + "step": 456 + }, + { + "ce_ib": 14.361438751220703, + "ce_orig": 1.2514116764068604, + "epoch": 0.13113811201380401, + "kl_loss": 6.082196235656738, + "loss_ib": 0.07518339902162552, + "step": 456 + }, + { + "ce_ib": 15.36643123626709, + "ce_orig": 1.2210443019866943, + "epoch": 0.13113811201380401, + "kl_loss": 6.145846366882324, + "loss_ib": 0.07682488858699799, + "step": 456 + }, + { + "ce_ib": 9.23930835723877, + "ce_orig": 0.4679323732852936, + "epoch": 0.13113811201380401, + "kl_loss": 5.857122421264648, + "loss_ib": 0.0678105279803276, + "step": 456 + }, + { + "ce_ib": 18.075246810913086, + "ce_orig": 1.4876693487167358, + "epoch": 0.13142569559278164, + "kl_loss": 6.473189353942871, + "loss_ib": 0.08280713856220245, + "step": 457 + }, + { + "ce_ib": 10.967455863952637, + "ce_orig": 0.7587569355964661, + "epoch": 0.13142569559278164, + "kl_loss": 6.15573787689209, + "loss_ib": 0.07252483069896698, + "step": 457 + }, + { + "ce_ib": 15.197246551513672, + "ce_orig": 0.9926710724830627, + "epoch": 0.13142569559278164, + "kl_loss": 6.070707321166992, + "loss_ib": 0.07590431720018387, + "step": 457 + }, + { + "ce_ib": 13.815834999084473, + "ce_orig": 0.7014583349227905, + "epoch": 0.13142569559278164, + "kl_loss": 5.886041641235352, + "loss_ib": 0.07267624884843826, + "step": 457 + }, + { + "ce_ib": 11.019710540771484, + "ce_orig": 0.645072340965271, + "epoch": 0.1317132791717593, + "kl_loss": 6.059167861938477, + "loss_ib": 0.07161138951778412, + "step": 458 + }, + { + "ce_ib": 14.778169631958008, + "ce_orig": 1.0757112503051758, + "epoch": 0.1317132791717593, + "kl_loss": 5.94471549987793, + "loss_ib": 0.07422532141208649, + "step": 458 + }, + { + "ce_ib": 14.401138305664062, + "ce_orig": 0.5452439188957214, + "epoch": 0.1317132791717593, + "kl_loss": 6.017438888549805, + "loss_ib": 0.0745755285024643, + "step": 458 + }, + { + "ce_ib": 12.480062484741211, + "ce_orig": 0.863021731376648, + "epoch": 0.1317132791717593, + "kl_loss": 6.301546096801758, + "loss_ib": 0.07549552619457245, + "step": 458 + }, + { + "ce_ib": 6.62205696105957, + "ce_orig": 0.3466249108314514, + "epoch": 0.13200086275073694, + "kl_loss": 5.449361801147461, + "loss_ib": 0.061115674674510956, + "step": 459 + }, + { + "ce_ib": 17.263084411621094, + "ce_orig": 1.387851595878601, + "epoch": 0.13200086275073694, + "kl_loss": 5.80389404296875, + "loss_ib": 0.07530201971530914, + "step": 459 + }, + { + "ce_ib": 12.40281867980957, + "ce_orig": 0.5882412195205688, + "epoch": 0.13200086275073694, + "kl_loss": 5.933984756469727, + "loss_ib": 0.07174266874790192, + "step": 459 + }, + { + "ce_ib": 10.269407272338867, + "ce_orig": 0.6979908347129822, + "epoch": 0.13200086275073694, + "kl_loss": 6.144120216369629, + "loss_ib": 0.07171060889959335, + "step": 459 + }, + { + "epoch": 0.13228844632971457, + "grad_norm": 0.392301082611084, + "learning_rate": 9.995278647352428e-06, + "loss": 0.8929, + "step": 460 + }, + { + "ce_ib": 12.763174057006836, + "ce_orig": 0.5633679032325745, + "epoch": 0.13228844632971457, + "kl_loss": 5.510239601135254, + "loss_ib": 0.06786557286977768, + "step": 460 + }, + { + "ce_ib": 12.732988357543945, + "ce_orig": 0.23703357577323914, + "epoch": 0.13228844632971457, + "kl_loss": 5.470260143280029, + "loss_ib": 0.06743558496236801, + "step": 460 + }, + { + "ce_ib": 10.131989479064941, + "ce_orig": 0.6852651834487915, + "epoch": 0.13228844632971457, + "kl_loss": 6.123454570770264, + "loss_ib": 0.07136653363704681, + "step": 460 + }, + { + "ce_ib": 12.067431449890137, + "ce_orig": 0.7499302625656128, + "epoch": 0.13228844632971457, + "kl_loss": 5.742093086242676, + "loss_ib": 0.06948836147785187, + "step": 460 + }, + { + "ce_ib": 7.949161052703857, + "ce_orig": 0.5661373734474182, + "epoch": 0.13257602990869222, + "kl_loss": 5.77607536315918, + "loss_ib": 0.0657099112868309, + "step": 461 + }, + { + "ce_ib": 11.964921951293945, + "ce_orig": 0.7726590633392334, + "epoch": 0.13257602990869222, + "kl_loss": 5.976222991943359, + "loss_ib": 0.07172714918851852, + "step": 461 + }, + { + "ce_ib": 10.301544189453125, + "ce_orig": 0.6989135146141052, + "epoch": 0.13257602990869222, + "kl_loss": 5.811915397644043, + "loss_ib": 0.06842069327831268, + "step": 461 + }, + { + "ce_ib": 12.018680572509766, + "ce_orig": 0.7360786199569702, + "epoch": 0.13257602990869222, + "kl_loss": 5.752782821655273, + "loss_ib": 0.06954650580883026, + "step": 461 + }, + { + "ce_ib": 9.352438926696777, + "ce_orig": 0.6635019779205322, + "epoch": 0.13286361348766984, + "kl_loss": 5.842447280883789, + "loss_ib": 0.06777691096067429, + "step": 462 + }, + { + "ce_ib": 13.350435256958008, + "ce_orig": 1.5100382566452026, + "epoch": 0.13286361348766984, + "kl_loss": 5.89756965637207, + "loss_ib": 0.07232613116502762, + "step": 462 + }, + { + "ce_ib": 12.962512969970703, + "ce_orig": 0.5430381894111633, + "epoch": 0.13286361348766984, + "kl_loss": 5.671024322509766, + "loss_ib": 0.06967275589704514, + "step": 462 + }, + { + "ce_ib": 15.555392265319824, + "ce_orig": 1.549025297164917, + "epoch": 0.13286361348766984, + "kl_loss": 5.619405269622803, + "loss_ib": 0.07174944132566452, + "step": 462 + }, + { + "ce_ib": 12.196456909179688, + "ce_orig": 0.8404883742332458, + "epoch": 0.1331511970666475, + "kl_loss": 5.807744026184082, + "loss_ib": 0.07027389109134674, + "step": 463 + }, + { + "ce_ib": 10.140958786010742, + "ce_orig": 0.9204779267311096, + "epoch": 0.1331511970666475, + "kl_loss": 5.694162368774414, + "loss_ib": 0.06708257645368576, + "step": 463 + }, + { + "ce_ib": 14.248191833496094, + "ce_orig": 0.7399206161499023, + "epoch": 0.1331511970666475, + "kl_loss": 5.870853900909424, + "loss_ib": 0.07295673340559006, + "step": 463 + }, + { + "ce_ib": 12.88770580291748, + "ce_orig": 0.6557974219322205, + "epoch": 0.1331511970666475, + "kl_loss": 5.735309600830078, + "loss_ib": 0.0702408030629158, + "step": 463 + }, + { + "ce_ib": 11.3536376953125, + "ce_orig": 0.9138345718383789, + "epoch": 0.13343878064562514, + "kl_loss": 5.644110202789307, + "loss_ib": 0.06779474020004272, + "step": 464 + }, + { + "ce_ib": 12.081389427185059, + "ce_orig": 0.947689950466156, + "epoch": 0.13343878064562514, + "kl_loss": 5.870572090148926, + "loss_ib": 0.07078710943460464, + "step": 464 + }, + { + "ce_ib": 8.881162643432617, + "ce_orig": 0.952629029750824, + "epoch": 0.13343878064562514, + "kl_loss": 5.588096618652344, + "loss_ib": 0.06476213037967682, + "step": 464 + }, + { + "ce_ib": 9.299169540405273, + "ce_orig": 0.60807204246521, + "epoch": 0.13343878064562514, + "kl_loss": 5.195706844329834, + "loss_ib": 0.06125623732805252, + "step": 464 + }, + { + "epoch": 0.13372636422460277, + "grad_norm": 0.45935723185539246, + "learning_rate": 9.994935443704391e-06, + "loss": 0.9342, + "step": 465 + }, + { + "ce_ib": 15.007346153259277, + "ce_orig": 1.2609058618545532, + "epoch": 0.13372636422460277, + "kl_loss": 5.37989616394043, + "loss_ib": 0.06880630552768707, + "step": 465 + }, + { + "ce_ib": 7.51100492477417, + "ce_orig": 0.5963767170906067, + "epoch": 0.13372636422460277, + "kl_loss": 5.823338508605957, + "loss_ib": 0.06574439257383347, + "step": 465 + }, + { + "ce_ib": 11.900202751159668, + "ce_orig": 0.4563358724117279, + "epoch": 0.13372636422460277, + "kl_loss": 5.743979454040527, + "loss_ib": 0.06933999806642532, + "step": 465 + }, + { + "ce_ib": 14.910943984985352, + "ce_orig": 0.8666954636573792, + "epoch": 0.13372636422460277, + "kl_loss": 5.86253023147583, + "loss_ib": 0.07353624701499939, + "step": 465 + }, + { + "ce_ib": 11.311989784240723, + "ce_orig": 0.803551197052002, + "epoch": 0.13401394780358042, + "kl_loss": 5.7215681076049805, + "loss_ib": 0.06852766871452332, + "step": 466 + }, + { + "ce_ib": 12.853880882263184, + "ce_orig": 0.5360819697380066, + "epoch": 0.13401394780358042, + "kl_loss": 5.336982727050781, + "loss_ib": 0.06622370332479477, + "step": 466 + }, + { + "ce_ib": 12.081340789794922, + "ce_orig": 0.9268986582756042, + "epoch": 0.13401394780358042, + "kl_loss": 5.40451717376709, + "loss_ib": 0.0661265105009079, + "step": 466 + }, + { + "ce_ib": 10.360613822937012, + "ce_orig": 0.7845146059989929, + "epoch": 0.13401394780358042, + "kl_loss": 5.394956588745117, + "loss_ib": 0.06431017816066742, + "step": 466 + }, + { + "ce_ib": 13.594584465026855, + "ce_orig": 0.7611533999443054, + "epoch": 0.13430153138255804, + "kl_loss": 5.510991096496582, + "loss_ib": 0.06870449334383011, + "step": 467 + }, + { + "ce_ib": 11.39364242553711, + "ce_orig": 0.6709606647491455, + "epoch": 0.13430153138255804, + "kl_loss": 5.329100608825684, + "loss_ib": 0.06468464434146881, + "step": 467 + }, + { + "ce_ib": 15.204527854919434, + "ce_orig": 1.456774115562439, + "epoch": 0.13430153138255804, + "kl_loss": 5.282338619232178, + "loss_ib": 0.06802791357040405, + "step": 467 + }, + { + "ce_ib": 9.643840789794922, + "ce_orig": 0.6612191200256348, + "epoch": 0.13430153138255804, + "kl_loss": 5.333034992218018, + "loss_ib": 0.06297419220209122, + "step": 467 + }, + { + "ce_ib": 11.347843170166016, + "ce_orig": 1.0858099460601807, + "epoch": 0.1345891149615357, + "kl_loss": 5.0229034423828125, + "loss_ib": 0.061576876789331436, + "step": 468 + }, + { + "ce_ib": 12.166611671447754, + "ce_orig": 0.7169655561447144, + "epoch": 0.1345891149615357, + "kl_loss": 5.225884437561035, + "loss_ib": 0.06442546099424362, + "step": 468 + }, + { + "ce_ib": 13.179616928100586, + "ce_orig": 0.9957132339477539, + "epoch": 0.1345891149615357, + "kl_loss": 5.586675643920898, + "loss_ib": 0.06904637068510056, + "step": 468 + }, + { + "ce_ib": 10.90320110321045, + "ce_orig": 0.694044828414917, + "epoch": 0.1345891149615357, + "kl_loss": 4.852416038513184, + "loss_ib": 0.05942736193537712, + "step": 468 + }, + { + "ce_ib": 10.619782447814941, + "ce_orig": 0.4662715196609497, + "epoch": 0.13487669854051335, + "kl_loss": 5.184469223022461, + "loss_ib": 0.06246447563171387, + "step": 469 + }, + { + "ce_ib": 9.323065757751465, + "ce_orig": 0.6369857788085938, + "epoch": 0.13487669854051335, + "kl_loss": 5.136096000671387, + "loss_ib": 0.060684025287628174, + "step": 469 + }, + { + "ce_ib": 10.448844909667969, + "ce_orig": 0.4876580238342285, + "epoch": 0.13487669854051335, + "kl_loss": 5.2007832527160645, + "loss_ib": 0.062456678599119186, + "step": 469 + }, + { + "ce_ib": 11.4501371383667, + "ce_orig": 0.6889066100120544, + "epoch": 0.13487669854051335, + "kl_loss": 5.519144058227539, + "loss_ib": 0.06664157658815384, + "step": 469 + }, + { + "epoch": 0.13516428211949097, + "grad_norm": 0.4563974142074585, + "learning_rate": 9.994580206097524e-06, + "loss": 0.9271, + "step": 470 + }, + { + "ce_ib": 10.97485637664795, + "ce_orig": 0.6626176834106445, + "epoch": 0.13516428211949097, + "kl_loss": 5.277484893798828, + "loss_ib": 0.06374970078468323, + "step": 470 + }, + { + "ce_ib": 8.894420623779297, + "ce_orig": 0.4637753367424011, + "epoch": 0.13516428211949097, + "kl_loss": 5.26820182800293, + "loss_ib": 0.06157643720507622, + "step": 470 + }, + { + "ce_ib": 13.850231170654297, + "ce_orig": 0.8222572803497314, + "epoch": 0.13516428211949097, + "kl_loss": 3.3914904594421387, + "loss_ib": 0.047765135765075684, + "step": 470 + }, + { + "ce_ib": 9.599710464477539, + "ce_orig": 0.8736235499382019, + "epoch": 0.13516428211949097, + "kl_loss": 5.436291694641113, + "loss_ib": 0.06396262347698212, + "step": 470 + }, + { + "ce_ib": 9.535674095153809, + "ce_orig": 0.7246021032333374, + "epoch": 0.13545186569846862, + "kl_loss": 5.152594089508057, + "loss_ib": 0.061061613261699677, + "step": 471 + }, + { + "ce_ib": 11.557367324829102, + "ce_orig": 0.7219054102897644, + "epoch": 0.13545186569846862, + "kl_loss": 4.933101654052734, + "loss_ib": 0.0608883835375309, + "step": 471 + }, + { + "ce_ib": 11.222688674926758, + "ce_orig": 0.7581503987312317, + "epoch": 0.13545186569846862, + "kl_loss": 5.2493391036987305, + "loss_ib": 0.06371607631444931, + "step": 471 + }, + { + "ce_ib": 7.992416858673096, + "ce_orig": 0.7171717286109924, + "epoch": 0.13545186569846862, + "kl_loss": 5.1756591796875, + "loss_ib": 0.05974900722503662, + "step": 471 + }, + { + "ce_ib": 5.774598121643066, + "ce_orig": 0.2617477774620056, + "epoch": 0.13573944927744624, + "kl_loss": 3.714776039123535, + "loss_ib": 0.042922358959913254, + "step": 472 + }, + { + "ce_ib": 15.86950969696045, + "ce_orig": 1.3682712316513062, + "epoch": 0.13573944927744624, + "kl_loss": 5.002852439880371, + "loss_ib": 0.06589803844690323, + "step": 472 + }, + { + "ce_ib": 15.778973579406738, + "ce_orig": 1.778786063194275, + "epoch": 0.13573944927744624, + "kl_loss": 5.145055294036865, + "loss_ib": 0.06722952425479889, + "step": 472 + }, + { + "ce_ib": 9.892607688903809, + "ce_orig": 0.6026872992515564, + "epoch": 0.13573944927744624, + "kl_loss": 4.9220356941223145, + "loss_ib": 0.05911296233534813, + "step": 472 + }, + { + "ce_ib": 14.954146385192871, + "ce_orig": 1.6441103219985962, + "epoch": 0.1360270328564239, + "kl_loss": 5.022004127502441, + "loss_ib": 0.06517418473958969, + "step": 473 + }, + { + "ce_ib": 14.230586051940918, + "ce_orig": 1.1324756145477295, + "epoch": 0.1360270328564239, + "kl_loss": 5.008617401123047, + "loss_ib": 0.0643167570233345, + "step": 473 + }, + { + "ce_ib": 9.870515823364258, + "ce_orig": 0.50955730676651, + "epoch": 0.1360270328564239, + "kl_loss": 5.198309898376465, + "loss_ib": 0.06185361370444298, + "step": 473 + }, + { + "ce_ib": 7.58746862411499, + "ce_orig": 0.576608419418335, + "epoch": 0.1360270328564239, + "kl_loss": 5.101164817810059, + "loss_ib": 0.058599118143320084, + "step": 473 + }, + { + "ce_ib": 11.188053131103516, + "ce_orig": 1.0111771821975708, + "epoch": 0.13631461643540155, + "kl_loss": 4.980704307556152, + "loss_ib": 0.06099509447813034, + "step": 474 + }, + { + "ce_ib": 11.85481071472168, + "ce_orig": 0.8680632710456848, + "epoch": 0.13631461643540155, + "kl_loss": 4.838929653167725, + "loss_ib": 0.060244105756282806, + "step": 474 + }, + { + "ce_ib": 11.664010047912598, + "ce_orig": 0.6513270735740662, + "epoch": 0.13631461643540155, + "kl_loss": 4.969212532043457, + "loss_ib": 0.061356134712696075, + "step": 474 + }, + { + "ce_ib": 12.634693145751953, + "ce_orig": 0.6730305552482605, + "epoch": 0.13631461643540155, + "kl_loss": 4.7532453536987305, + "loss_ib": 0.06016714498400688, + "step": 474 + }, + { + "epoch": 0.13660220001437917, + "grad_norm": 0.41322335600852966, + "learning_rate": 9.99421293538767e-06, + "loss": 0.952, + "step": 475 + }, + { + "ce_ib": 9.952411651611328, + "ce_orig": 0.6906881928443909, + "epoch": 0.13660220001437917, + "kl_loss": 4.742203712463379, + "loss_ib": 0.05737444758415222, + "step": 475 + }, + { + "ce_ib": 11.219844818115234, + "ce_orig": 0.7193230986595154, + "epoch": 0.13660220001437917, + "kl_loss": 4.660732269287109, + "loss_ib": 0.057827167212963104, + "step": 475 + }, + { + "ce_ib": 10.129925727844238, + "ce_orig": 0.7533198595046997, + "epoch": 0.13660220001437917, + "kl_loss": 4.963289260864258, + "loss_ib": 0.05976282060146332, + "step": 475 + }, + { + "ce_ib": 8.743351936340332, + "ce_orig": 0.5432742238044739, + "epoch": 0.13660220001437917, + "kl_loss": 4.890501022338867, + "loss_ib": 0.05764836072921753, + "step": 475 + }, + { + "ce_ib": 16.304983139038086, + "ce_orig": 1.4945815801620483, + "epoch": 0.13688978359335682, + "kl_loss": 4.846185207366943, + "loss_ib": 0.06476683169603348, + "step": 476 + }, + { + "ce_ib": 15.700722694396973, + "ce_orig": 1.2550569772720337, + "epoch": 0.13688978359335682, + "kl_loss": 4.873666763305664, + "loss_ib": 0.0644373893737793, + "step": 476 + }, + { + "ce_ib": 11.274219512939453, + "ce_orig": 0.7325409054756165, + "epoch": 0.13688978359335682, + "kl_loss": 4.502358436584473, + "loss_ib": 0.056297801434993744, + "step": 476 + }, + { + "ce_ib": 12.488386154174805, + "ce_orig": 0.7823653221130371, + "epoch": 0.13688978359335682, + "kl_loss": 4.8517374992370605, + "loss_ib": 0.061005763709545135, + "step": 476 + }, + { + "ce_ib": 9.597527503967285, + "ce_orig": 0.8816280961036682, + "epoch": 0.13717736717233445, + "kl_loss": 4.802122592926025, + "loss_ib": 0.057618748396635056, + "step": 477 + }, + { + "ce_ib": 7.878790855407715, + "ce_orig": 0.669119119644165, + "epoch": 0.13717736717233445, + "kl_loss": 4.979962348937988, + "loss_ib": 0.05767841264605522, + "step": 477 + }, + { + "ce_ib": 10.391016960144043, + "ce_orig": 0.7251664400100708, + "epoch": 0.13717736717233445, + "kl_loss": 4.9956560134887695, + "loss_ib": 0.060347575694322586, + "step": 477 + }, + { + "ce_ib": 11.260332107543945, + "ce_orig": 0.7019518613815308, + "epoch": 0.13717736717233445, + "kl_loss": 4.841489791870117, + "loss_ib": 0.05967522785067558, + "step": 477 + }, + { + "ce_ib": 9.614903450012207, + "ce_orig": 0.8139093518257141, + "epoch": 0.1374649507513121, + "kl_loss": 4.899896621704102, + "loss_ib": 0.058613866567611694, + "step": 478 + }, + { + "ce_ib": 11.576051712036133, + "ce_orig": 0.5408310294151306, + "epoch": 0.1374649507513121, + "kl_loss": 4.424466133117676, + "loss_ib": 0.055820710957050323, + "step": 478 + }, + { + "ce_ib": 12.201531410217285, + "ce_orig": 1.1268466711044312, + "epoch": 0.1374649507513121, + "kl_loss": 4.69233512878418, + "loss_ib": 0.05912488326430321, + "step": 478 + }, + { + "ce_ib": 9.991912841796875, + "ce_orig": 0.9469978213310242, + "epoch": 0.1374649507513121, + "kl_loss": 4.918972969055176, + "loss_ib": 0.05918164178729057, + "step": 478 + }, + { + "ce_ib": 10.741558074951172, + "ce_orig": 0.4736107587814331, + "epoch": 0.13775253433028975, + "kl_loss": 4.647714138031006, + "loss_ib": 0.05721869692206383, + "step": 479 + }, + { + "ce_ib": 13.863398551940918, + "ce_orig": 0.7937755584716797, + "epoch": 0.13775253433028975, + "kl_loss": 4.548620700836182, + "loss_ib": 0.059349603950977325, + "step": 479 + }, + { + "ce_ib": 12.365586280822754, + "ce_orig": 0.7733124494552612, + "epoch": 0.13775253433028975, + "kl_loss": 4.751549243927002, + "loss_ib": 0.05988107621669769, + "step": 479 + }, + { + "ce_ib": 9.24503231048584, + "ce_orig": 0.6917009949684143, + "epoch": 0.13775253433028975, + "kl_loss": 4.599456787109375, + "loss_ib": 0.05523959919810295, + "step": 479 + }, + { + "epoch": 0.13804011790926737, + "grad_norm": 0.28194499015808105, + "learning_rate": 9.993833632459675e-06, + "loss": 0.9569, + "step": 480 + }, + { + "ce_ib": 12.575913429260254, + "ce_orig": 0.8253871202468872, + "epoch": 0.13804011790926737, + "kl_loss": 4.399908542633057, + "loss_ib": 0.056574996560811996, + "step": 480 + }, + { + "ce_ib": 12.079404830932617, + "ce_orig": 0.7573724985122681, + "epoch": 0.13804011790926737, + "kl_loss": 4.628897666931152, + "loss_ib": 0.05836838111281395, + "step": 480 + }, + { + "ce_ib": 14.135128021240234, + "ce_orig": 1.195788860321045, + "epoch": 0.13804011790926737, + "kl_loss": 4.788009166717529, + "loss_ib": 0.062015216797590256, + "step": 480 + }, + { + "ce_ib": 13.080334663391113, + "ce_orig": 1.0536785125732422, + "epoch": 0.13804011790926737, + "kl_loss": 4.861077308654785, + "loss_ib": 0.061691105365753174, + "step": 480 + }, + { + "ce_ib": 15.20880126953125, + "ce_orig": 1.4970366954803467, + "epoch": 0.13832770148824503, + "kl_loss": 4.581524848937988, + "loss_ib": 0.061024051159620285, + "step": 481 + }, + { + "ce_ib": 9.704122543334961, + "ce_orig": 0.7620049118995667, + "epoch": 0.13832770148824503, + "kl_loss": 4.4944915771484375, + "loss_ib": 0.05464903637766838, + "step": 481 + }, + { + "ce_ib": 14.204291343688965, + "ce_orig": 1.2470651865005493, + "epoch": 0.13832770148824503, + "kl_loss": 4.557282447814941, + "loss_ib": 0.059777114540338516, + "step": 481 + }, + { + "ce_ib": 10.805310249328613, + "ce_orig": 0.8926163911819458, + "epoch": 0.13832770148824503, + "kl_loss": 4.54813289642334, + "loss_ib": 0.05628664046525955, + "step": 481 + }, + { + "ce_ib": 8.615751266479492, + "ce_orig": 0.7180139422416687, + "epoch": 0.13861528506722265, + "kl_loss": 4.497965335845947, + "loss_ib": 0.0535954050719738, + "step": 482 + }, + { + "ce_ib": 13.28380012512207, + "ce_orig": 0.8565104007720947, + "epoch": 0.13861528506722265, + "kl_loss": 4.163414001464844, + "loss_ib": 0.05491793900728226, + "step": 482 + }, + { + "ce_ib": 16.117412567138672, + "ce_orig": 1.5288479328155518, + "epoch": 0.13861528506722265, + "kl_loss": 4.652551651000977, + "loss_ib": 0.06264292448759079, + "step": 482 + }, + { + "ce_ib": 15.705317497253418, + "ce_orig": 1.3943670988082886, + "epoch": 0.13861528506722265, + "kl_loss": 4.385931015014648, + "loss_ib": 0.05956462770700455, + "step": 482 + }, + { + "ce_ib": 12.427248001098633, + "ce_orig": 0.8222273588180542, + "epoch": 0.1389028686462003, + "kl_loss": 4.483578681945801, + "loss_ib": 0.05726303532719612, + "step": 483 + }, + { + "ce_ib": 9.109222412109375, + "ce_orig": 0.6663987636566162, + "epoch": 0.1389028686462003, + "kl_loss": 4.623089790344238, + "loss_ib": 0.05534011870622635, + "step": 483 + }, + { + "ce_ib": 16.840740203857422, + "ce_orig": 1.5569089651107788, + "epoch": 0.1389028686462003, + "kl_loss": 4.513226509094238, + "loss_ib": 0.061973001807928085, + "step": 483 + }, + { + "ce_ib": 13.14278507232666, + "ce_orig": 0.7401519417762756, + "epoch": 0.1389028686462003, + "kl_loss": 4.656795978546143, + "loss_ib": 0.05971074476838112, + "step": 483 + }, + { + "ce_ib": 14.228574752807617, + "ce_orig": 0.5264460444450378, + "epoch": 0.13919045222517795, + "kl_loss": 4.304325580596924, + "loss_ib": 0.05727183073759079, + "step": 484 + }, + { + "ce_ib": 16.023216247558594, + "ce_orig": 1.285567283630371, + "epoch": 0.13919045222517795, + "kl_loss": 4.187580585479736, + "loss_ib": 0.05789902061223984, + "step": 484 + }, + { + "ce_ib": 15.015509605407715, + "ce_orig": 1.6406548023223877, + "epoch": 0.13919045222517795, + "kl_loss": 4.492351531982422, + "loss_ib": 0.059939026832580566, + "step": 484 + }, + { + "ce_ib": 13.410754203796387, + "ce_orig": 1.4283090829849243, + "epoch": 0.13919045222517795, + "kl_loss": 4.561341762542725, + "loss_ib": 0.05902417004108429, + "step": 484 + }, + { + "epoch": 0.13947803580415558, + "grad_norm": 0.42398667335510254, + "learning_rate": 9.993442298227365e-06, + "loss": 1.0074, + "step": 485 + }, + { + "ce_ib": 16.616222381591797, + "ce_orig": 1.3357499837875366, + "epoch": 0.13947803580415558, + "kl_loss": 4.212893486022949, + "loss_ib": 0.05874515324831009, + "step": 485 + }, + { + "ce_ib": 11.187970161437988, + "ce_orig": 0.9521239995956421, + "epoch": 0.13947803580415558, + "kl_loss": 4.148205757141113, + "loss_ib": 0.05267002806067467, + "step": 485 + }, + { + "ce_ib": 11.176460266113281, + "ce_orig": 0.48416224122047424, + "epoch": 0.13947803580415558, + "kl_loss": 4.578839302062988, + "loss_ib": 0.05696485564112663, + "step": 485 + }, + { + "ce_ib": 14.064797401428223, + "ce_orig": 1.0060439109802246, + "epoch": 0.13947803580415558, + "kl_loss": 4.404331207275391, + "loss_ib": 0.05810810998082161, + "step": 485 + }, + { + "ce_ib": 15.115540504455566, + "ce_orig": 0.9532531499862671, + "epoch": 0.13976561938313323, + "kl_loss": 4.2361860275268555, + "loss_ib": 0.057477399706840515, + "step": 486 + }, + { + "ce_ib": 9.784770965576172, + "ce_orig": 0.7604672312736511, + "epoch": 0.13976561938313323, + "kl_loss": 3.9188036918640137, + "loss_ib": 0.048972804099321365, + "step": 486 + }, + { + "ce_ib": 10.663135528564453, + "ce_orig": 1.0786744356155396, + "epoch": 0.13976561938313323, + "kl_loss": 4.246434211730957, + "loss_ib": 0.0531274788081646, + "step": 486 + }, + { + "ce_ib": 8.10494613647461, + "ce_orig": 0.5798110961914062, + "epoch": 0.13976561938313323, + "kl_loss": 4.231927871704102, + "loss_ib": 0.05042422190308571, + "step": 486 + }, + { + "ce_ib": 12.337913513183594, + "ce_orig": 0.7753936648368835, + "epoch": 0.14005320296211085, + "kl_loss": 4.142770290374756, + "loss_ib": 0.053765613585710526, + "step": 487 + }, + { + "ce_ib": 15.07596206665039, + "ce_orig": 0.8154249787330627, + "epoch": 0.14005320296211085, + "kl_loss": 4.023566246032715, + "loss_ib": 0.055311620235443115, + "step": 487 + }, + { + "ce_ib": 13.26456069946289, + "ce_orig": 1.3517224788665771, + "epoch": 0.14005320296211085, + "kl_loss": 4.021327018737793, + "loss_ib": 0.053477831184864044, + "step": 487 + }, + { + "ce_ib": 11.321921348571777, + "ce_orig": 0.6585462093353271, + "epoch": 0.14005320296211085, + "kl_loss": 3.9664478302001953, + "loss_ib": 0.050986398011446, + "step": 487 + }, + { + "ce_ib": 11.75551700592041, + "ce_orig": 0.7766084671020508, + "epoch": 0.1403407865410885, + "kl_loss": 3.7924036979675293, + "loss_ib": 0.049679554998874664, + "step": 488 + }, + { + "ce_ib": 15.715422630310059, + "ce_orig": 1.3684426546096802, + "epoch": 0.1403407865410885, + "kl_loss": 3.701831817626953, + "loss_ib": 0.05273373797535896, + "step": 488 + }, + { + "ce_ib": 8.13493824005127, + "ce_orig": 0.45366278290748596, + "epoch": 0.1403407865410885, + "kl_loss": 4.0575456619262695, + "loss_ib": 0.04871039465069771, + "step": 488 + }, + { + "ce_ib": 13.73061752319336, + "ce_orig": 1.1564494371414185, + "epoch": 0.1403407865410885, + "kl_loss": 4.102625846862793, + "loss_ib": 0.05475687235593796, + "step": 488 + }, + { + "ce_ib": 13.535247802734375, + "ce_orig": 0.9498729109764099, + "epoch": 0.14062837012006615, + "kl_loss": 2.995370864868164, + "loss_ib": 0.04348895326256752, + "step": 489 + }, + { + "ce_ib": 7.940598487854004, + "ce_orig": 0.3024381101131439, + "epoch": 0.14062837012006615, + "kl_loss": 3.7741479873657227, + "loss_ib": 0.04568207636475563, + "step": 489 + }, + { + "ce_ib": 11.454663276672363, + "ce_orig": 0.6730047464370728, + "epoch": 0.14062837012006615, + "kl_loss": 3.8181753158569336, + "loss_ib": 0.049636416137218475, + "step": 489 + }, + { + "ce_ib": 8.796786308288574, + "ce_orig": 0.7032504677772522, + "epoch": 0.14062837012006615, + "kl_loss": 3.953084945678711, + "loss_ib": 0.04832763597369194, + "step": 489 + }, + { + "epoch": 0.14091595369904378, + "grad_norm": 0.40750885009765625, + "learning_rate": 9.993038933633556e-06, + "loss": 0.9795, + "step": 490 + }, + { + "ce_ib": 9.949647903442383, + "ce_orig": 0.5322861671447754, + "epoch": 0.14091595369904378, + "kl_loss": 3.8681774139404297, + "loss_ib": 0.048631418496370316, + "step": 490 + }, + { + "ce_ib": 15.904637336730957, + "ce_orig": 1.6171191930770874, + "epoch": 0.14091595369904378, + "kl_loss": 3.6979196071624756, + "loss_ib": 0.05288383364677429, + "step": 490 + }, + { + "ce_ib": 12.01391315460205, + "ce_orig": 1.2611167430877686, + "epoch": 0.14091595369904378, + "kl_loss": 3.5611257553100586, + "loss_ib": 0.04762516915798187, + "step": 490 + }, + { + "ce_ib": 9.39164924621582, + "ce_orig": 0.9851351380348206, + "epoch": 0.14091595369904378, + "kl_loss": 3.2981302738189697, + "loss_ib": 0.04237294942140579, + "step": 490 + }, + { + "ce_ib": 12.360732078552246, + "ce_orig": 1.4508610963821411, + "epoch": 0.14120353727802143, + "kl_loss": 3.5956835746765137, + "loss_ib": 0.0483175665140152, + "step": 491 + }, + { + "ce_ib": 12.937994003295898, + "ce_orig": 1.1618151664733887, + "epoch": 0.14120353727802143, + "kl_loss": 3.563871383666992, + "loss_ib": 0.048576705157756805, + "step": 491 + }, + { + "ce_ib": 17.118064880371094, + "ce_orig": 1.445876121520996, + "epoch": 0.14120353727802143, + "kl_loss": 3.657188892364502, + "loss_ib": 0.05368995666503906, + "step": 491 + }, + { + "ce_ib": 11.58482837677002, + "ce_orig": 0.9607007503509521, + "epoch": 0.14120353727802143, + "kl_loss": 3.736398458480835, + "loss_ib": 0.04894881322979927, + "step": 491 + }, + { + "ce_ib": 8.748994827270508, + "ce_orig": 0.9032168984413147, + "epoch": 0.14149112085699905, + "kl_loss": 3.5443851947784424, + "loss_ib": 0.0441928468644619, + "step": 492 + }, + { + "ce_ib": 10.45909309387207, + "ce_orig": 0.7486007809638977, + "epoch": 0.14149112085699905, + "kl_loss": 3.231372594833374, + "loss_ib": 0.042772818356752396, + "step": 492 + }, + { + "ce_ib": 17.54129409790039, + "ce_orig": 1.9069491624832153, + "epoch": 0.14149112085699905, + "kl_loss": 3.558845043182373, + "loss_ib": 0.05312974378466606, + "step": 492 + }, + { + "ce_ib": 15.910091400146484, + "ce_orig": 1.024116039276123, + "epoch": 0.14149112085699905, + "kl_loss": 3.7413394451141357, + "loss_ib": 0.05332348868250847, + "step": 492 + }, + { + "ce_ib": 8.667006492614746, + "ce_orig": 0.30886101722717285, + "epoch": 0.1417787044359767, + "kl_loss": 3.7348480224609375, + "loss_ib": 0.04601548612117767, + "step": 493 + }, + { + "ce_ib": 11.867700576782227, + "ce_orig": 1.1173264980316162, + "epoch": 0.1417787044359767, + "kl_loss": 3.64851713180542, + "loss_ib": 0.04835287109017372, + "step": 493 + }, + { + "ce_ib": 9.549649238586426, + "ce_orig": 0.6670407056808472, + "epoch": 0.1417787044359767, + "kl_loss": 3.220088481903076, + "loss_ib": 0.04175053536891937, + "step": 493 + }, + { + "ce_ib": 12.184220314025879, + "ce_orig": 0.7223286032676697, + "epoch": 0.1417787044359767, + "kl_loss": 2.9731616973876953, + "loss_ib": 0.041915833950042725, + "step": 493 + }, + { + "ce_ib": 8.040367126464844, + "ce_orig": 0.49942535161972046, + "epoch": 0.14206628801495436, + "kl_loss": 3.2337100505828857, + "loss_ib": 0.04037746787071228, + "step": 494 + }, + { + "ce_ib": 9.84688949584961, + "ce_orig": 0.3545916974544525, + "epoch": 0.14206628801495436, + "kl_loss": 3.2256717681884766, + "loss_ib": 0.0421036034822464, + "step": 494 + }, + { + "ce_ib": 13.763298034667969, + "ce_orig": 1.4035097360610962, + "epoch": 0.14206628801495436, + "kl_loss": 3.3014750480651855, + "loss_ib": 0.04677804931998253, + "step": 494 + }, + { + "ce_ib": 10.143733978271484, + "ce_orig": 0.5595765709877014, + "epoch": 0.14206628801495436, + "kl_loss": 3.2254323959350586, + "loss_ib": 0.042398057878017426, + "step": 494 + }, + { + "epoch": 0.14235387159393198, + "grad_norm": 0.3967653214931488, + "learning_rate": 9.992623539650048e-06, + "loss": 0.947, + "step": 495 + }, + { + "ce_ib": 8.59465217590332, + "ce_orig": 0.6234766840934753, + "epoch": 0.14235387159393198, + "kl_loss": 2.992295026779175, + "loss_ib": 0.038517601788043976, + "step": 495 + }, + { + "ce_ib": 11.884413719177246, + "ce_orig": 1.2884939908981323, + "epoch": 0.14235387159393198, + "kl_loss": 3.2238070964813232, + "loss_ib": 0.04412248358130455, + "step": 495 + }, + { + "ce_ib": 12.295158386230469, + "ce_orig": 0.9079825282096863, + "epoch": 0.14235387159393198, + "kl_loss": 3.261247158050537, + "loss_ib": 0.04490762948989868, + "step": 495 + }, + { + "ce_ib": 11.556446075439453, + "ce_orig": 0.9584062695503235, + "epoch": 0.14235387159393198, + "kl_loss": 2.946765661239624, + "loss_ib": 0.0410241037607193, + "step": 495 + }, + { + "ce_ib": 13.715659141540527, + "ce_orig": 1.301735520362854, + "epoch": 0.14264145517290963, + "kl_loss": 3.156116485595703, + "loss_ib": 0.04527682065963745, + "step": 496 + }, + { + "ce_ib": 12.423558235168457, + "ce_orig": 0.9063977599143982, + "epoch": 0.14264145517290963, + "kl_loss": 3.1704659461975098, + "loss_ib": 0.04412821680307388, + "step": 496 + }, + { + "ce_ib": 15.041820526123047, + "ce_orig": 0.9676111340522766, + "epoch": 0.14264145517290963, + "kl_loss": 3.3395094871520996, + "loss_ib": 0.04843691736459732, + "step": 496 + }, + { + "ce_ib": 17.061294555664062, + "ce_orig": 2.1067917346954346, + "epoch": 0.14264145517290963, + "kl_loss": 3.0767550468444824, + "loss_ib": 0.04782884567975998, + "step": 496 + }, + { + "ce_ib": 11.296546936035156, + "ce_orig": 0.9768355488777161, + "epoch": 0.14292903875188726, + "kl_loss": 2.777761936187744, + "loss_ib": 0.03907416760921478, + "step": 497 + }, + { + "ce_ib": 11.038111686706543, + "ce_orig": 0.8759608268737793, + "epoch": 0.14292903875188726, + "kl_loss": 3.004284381866455, + "loss_ib": 0.04108095541596413, + "step": 497 + }, + { + "ce_ib": 11.033641815185547, + "ce_orig": 0.5562906265258789, + "epoch": 0.14292903875188726, + "kl_loss": 2.9936814308166504, + "loss_ib": 0.04097045958042145, + "step": 497 + }, + { + "ce_ib": 11.028107643127441, + "ce_orig": 1.197322130203247, + "epoch": 0.14292903875188726, + "kl_loss": 2.7564334869384766, + "loss_ib": 0.038592442870140076, + "step": 497 + }, + { + "ce_ib": 14.40247631072998, + "ce_orig": 1.2345876693725586, + "epoch": 0.1432166223308649, + "kl_loss": 2.6974658966064453, + "loss_ib": 0.04137713462114334, + "step": 498 + }, + { + "ce_ib": 12.58112621307373, + "ce_orig": 0.6902965903282166, + "epoch": 0.1432166223308649, + "kl_loss": 2.812513828277588, + "loss_ib": 0.04070626199245453, + "step": 498 + }, + { + "ce_ib": 10.28808879852295, + "ce_orig": 0.5265849232673645, + "epoch": 0.1432166223308649, + "kl_loss": 2.965153217315674, + "loss_ib": 0.039939623326063156, + "step": 498 + }, + { + "ce_ib": 11.784356117248535, + "ce_orig": 0.7255687117576599, + "epoch": 0.1432166223308649, + "kl_loss": 2.6487035751342773, + "loss_ib": 0.03827139362692833, + "step": 498 + }, + { + "ce_ib": 15.454878807067871, + "ce_orig": 1.4307360649108887, + "epoch": 0.14350420590984256, + "kl_loss": 2.638218641281128, + "loss_ib": 0.04183706268668175, + "step": 499 + }, + { + "ce_ib": 11.41547679901123, + "ce_orig": 1.0924228429794312, + "epoch": 0.14350420590984256, + "kl_loss": 2.683103084564209, + "loss_ib": 0.038246504962444305, + "step": 499 + }, + { + "ce_ib": 7.627654552459717, + "ce_orig": 0.6519067883491516, + "epoch": 0.14350420590984256, + "kl_loss": 2.4674363136291504, + "loss_ib": 0.032302018254995346, + "step": 499 + }, + { + "ce_ib": 9.059030532836914, + "ce_orig": 0.9335259199142456, + "epoch": 0.14350420590984256, + "kl_loss": 2.7982892990112305, + "loss_ib": 0.03704192489385605, + "step": 499 + }, + { + "epoch": 0.14379178948882018, + "grad_norm": 0.3535037636756897, + "learning_rate": 9.99219611727762e-06, + "loss": 0.9542, + "step": 500 + }, + { + "ce_ib": 12.202539443969727, + "ce_orig": 1.0921835899353027, + "epoch": 0.14379178948882018, + "kl_loss": 2.6520490646362305, + "loss_ib": 0.03872302919626236, + "step": 500 + }, + { + "ce_ib": 10.411079406738281, + "ce_orig": 1.0469058752059937, + "epoch": 0.14379178948882018, + "kl_loss": 2.550952434539795, + "loss_ib": 0.035920605063438416, + "step": 500 + }, + { + "ce_ib": 10.579100608825684, + "ce_orig": 0.7717140316963196, + "epoch": 0.14379178948882018, + "kl_loss": 2.458261013031006, + "loss_ib": 0.03516170755028725, + "step": 500 + }, + { + "ce_ib": 10.30762004852295, + "ce_orig": 0.47040054202079773, + "epoch": 0.14379178948882018, + "kl_loss": 2.5514392852783203, + "loss_ib": 0.03582201525568962, + "step": 500 + }, + { + "ce_ib": 8.242244720458984, + "ce_orig": 0.7109037041664124, + "epoch": 0.14407937306779783, + "kl_loss": 2.2276763916015625, + "loss_ib": 0.03051900863647461, + "step": 501 + }, + { + "ce_ib": 12.867658615112305, + "ce_orig": 1.3448221683502197, + "epoch": 0.14407937306779783, + "kl_loss": 2.390228271484375, + "loss_ib": 0.036769941449165344, + "step": 501 + }, + { + "ce_ib": 12.047565460205078, + "ce_orig": 1.1863359212875366, + "epoch": 0.14407937306779783, + "kl_loss": 2.2493762969970703, + "loss_ib": 0.034541331231594086, + "step": 501 + }, + { + "ce_ib": 7.998773574829102, + "ce_orig": 0.7724082469940186, + "epoch": 0.14407937306779783, + "kl_loss": 2.218076229095459, + "loss_ib": 0.03017953597009182, + "step": 501 + }, + { + "ce_ib": 8.808771133422852, + "ce_orig": 0.346529096364975, + "epoch": 0.14436695664677546, + "kl_loss": 2.290055751800537, + "loss_ib": 0.031709328293800354, + "step": 502 + }, + { + "ce_ib": 14.042349815368652, + "ce_orig": 0.8455150723457336, + "epoch": 0.14436695664677546, + "kl_loss": 2.330575466156006, + "loss_ib": 0.037348102778196335, + "step": 502 + }, + { + "ce_ib": 9.860107421875, + "ce_orig": 0.6380610466003418, + "epoch": 0.14436695664677546, + "kl_loss": 2.2274303436279297, + "loss_ib": 0.03213440999388695, + "step": 502 + }, + { + "ce_ib": 7.90905237197876, + "ce_orig": 0.7825286388397217, + "epoch": 0.14436695664677546, + "kl_loss": 2.2258763313293457, + "loss_ib": 0.03016781434416771, + "step": 502 + }, + { + "ce_ib": 11.905344009399414, + "ce_orig": 0.5903149247169495, + "epoch": 0.1446545402257531, + "kl_loss": 2.3541271686553955, + "loss_ib": 0.035446614027023315, + "step": 503 + }, + { + "ce_ib": 9.341203689575195, + "ce_orig": 0.5810147523880005, + "epoch": 0.1446545402257531, + "kl_loss": 1.9661822319030762, + "loss_ib": 0.029003025963902473, + "step": 503 + }, + { + "ce_ib": 11.79522705078125, + "ce_orig": 0.9077520966529846, + "epoch": 0.1446545402257531, + "kl_loss": 2.15496826171875, + "loss_ib": 0.03334490954875946, + "step": 503 + }, + { + "ce_ib": 12.743915557861328, + "ce_orig": 0.9383360147476196, + "epoch": 0.1446545402257531, + "kl_loss": 2.3349556922912598, + "loss_ib": 0.03609347343444824, + "step": 503 + }, + { + "ce_ib": 14.691107749938965, + "ce_orig": 1.4200998544692993, + "epoch": 0.14494212380473076, + "kl_loss": 1.9536817073822021, + "loss_ib": 0.034227922558784485, + "step": 504 + }, + { + "ce_ib": 13.637428283691406, + "ce_orig": 1.0158778429031372, + "epoch": 0.14494212380473076, + "kl_loss": 2.0383381843566895, + "loss_ib": 0.0340208075940609, + "step": 504 + }, + { + "ce_ib": 12.56800651550293, + "ce_orig": 0.9646020531654358, + "epoch": 0.14494212380473076, + "kl_loss": 2.094742774963379, + "loss_ib": 0.033515434712171555, + "step": 504 + }, + { + "ce_ib": 12.06141185760498, + "ce_orig": 1.0306986570358276, + "epoch": 0.14494212380473076, + "kl_loss": 2.175787925720215, + "loss_ib": 0.0338192917406559, + "step": 504 + }, + { + "epoch": 0.14522970738370838, + "grad_norm": 0.33350127935409546, + "learning_rate": 9.991756667546032e-06, + "loss": 0.9489, + "step": 505 + }, + { + "ce_ib": 8.817842483520508, + "ce_orig": 0.6972077488899231, + "epoch": 0.14522970738370838, + "kl_loss": 1.7852230072021484, + "loss_ib": 0.026670072227716446, + "step": 505 + }, + { + "ce_ib": 12.437209129333496, + "ce_orig": 0.6099770069122314, + "epoch": 0.14522970738370838, + "kl_loss": 2.4981532096862793, + "loss_ib": 0.03741874173283577, + "step": 505 + }, + { + "ce_ib": 11.220137596130371, + "ce_orig": 0.48771539330482483, + "epoch": 0.14522970738370838, + "kl_loss": 2.1456246376037598, + "loss_ib": 0.03267638385295868, + "step": 505 + }, + { + "ce_ib": 8.924522399902344, + "ce_orig": 0.5051496624946594, + "epoch": 0.14522970738370838, + "kl_loss": 1.9803262948989868, + "loss_ib": 0.028727782890200615, + "step": 505 + }, + { + "ce_ib": 12.348464012145996, + "ce_orig": 0.8960937857627869, + "epoch": 0.14551729096268604, + "kl_loss": 1.814281702041626, + "loss_ib": 0.030491279438138008, + "step": 506 + }, + { + "ce_ib": 7.2094316482543945, + "ce_orig": 0.47113969922065735, + "epoch": 0.14551729096268604, + "kl_loss": 1.9257069826126099, + "loss_ib": 0.026466500014066696, + "step": 506 + }, + { + "ce_ib": 11.242286682128906, + "ce_orig": 0.4751087427139282, + "epoch": 0.14551729096268604, + "kl_loss": 2.0415260791778564, + "loss_ib": 0.03165754675865173, + "step": 506 + }, + { + "ce_ib": 10.97330379486084, + "ce_orig": 0.642387866973877, + "epoch": 0.14551729096268604, + "kl_loss": 2.0187416076660156, + "loss_ib": 0.03116072155535221, + "step": 506 + }, + { + "ce_ib": 16.359783172607422, + "ce_orig": 1.645643949508667, + "epoch": 0.14580487454166366, + "kl_loss": 1.8092081546783447, + "loss_ib": 0.034451864659786224, + "step": 507 + }, + { + "ce_ib": 10.260422706604004, + "ce_orig": 0.9393115639686584, + "epoch": 0.14580487454166366, + "kl_loss": 2.0392160415649414, + "loss_ib": 0.03065258450806141, + "step": 507 + }, + { + "ce_ib": 15.242369651794434, + "ce_orig": 0.9167593717575073, + "epoch": 0.14580487454166366, + "kl_loss": 1.7671477794647217, + "loss_ib": 0.03291384503245354, + "step": 507 + }, + { + "ce_ib": 11.220046043395996, + "ce_orig": 0.7460023164749146, + "epoch": 0.14580487454166366, + "kl_loss": 1.9201838970184326, + "loss_ib": 0.03042188659310341, + "step": 507 + }, + { + "ce_ib": 10.025154113769531, + "ce_orig": 0.6676295399665833, + "epoch": 0.1460924581206413, + "kl_loss": 2.052180767059326, + "loss_ib": 0.03054695948958397, + "step": 508 + }, + { + "ce_ib": 14.332385063171387, + "ce_orig": 0.7957293391227722, + "epoch": 0.1460924581206413, + "kl_loss": 1.7676377296447754, + "loss_ib": 0.032008763402700424, + "step": 508 + }, + { + "ce_ib": 14.902412414550781, + "ce_orig": 1.4487565755844116, + "epoch": 0.1460924581206413, + "kl_loss": 1.6765596866607666, + "loss_ib": 0.031668007373809814, + "step": 508 + }, + { + "ce_ib": 9.683943748474121, + "ce_orig": 1.0197540521621704, + "epoch": 0.1460924581206413, + "kl_loss": 1.610795021057129, + "loss_ib": 0.025791892781853676, + "step": 508 + }, + { + "ce_ib": 15.52167797088623, + "ce_orig": 1.7749841213226318, + "epoch": 0.14638004169961896, + "kl_loss": 1.6767610311508179, + "loss_ib": 0.0322892889380455, + "step": 509 + }, + { + "ce_ib": 13.908830642700195, + "ce_orig": 0.9751031994819641, + "epoch": 0.14638004169961896, + "kl_loss": 1.6931664943695068, + "loss_ib": 0.030840495601296425, + "step": 509 + }, + { + "ce_ib": 9.30358600616455, + "ce_orig": 0.5810970664024353, + "epoch": 0.14638004169961896, + "kl_loss": 1.6350352764129639, + "loss_ib": 0.025653937831521034, + "step": 509 + }, + { + "ce_ib": 12.388398170471191, + "ce_orig": 1.3628201484680176, + "epoch": 0.14638004169961896, + "kl_loss": 1.6909823417663574, + "loss_ib": 0.029298221692442894, + "step": 509 + }, + { + "epoch": 0.1466676252785966, + "grad_norm": 0.18405361473560333, + "learning_rate": 9.991305191514018e-06, + "loss": 0.8849, + "step": 510 + }, + { + "ce_ib": 8.311685562133789, + "ce_orig": 0.7321364283561707, + "epoch": 0.1466676252785966, + "kl_loss": 1.6412349939346313, + "loss_ib": 0.02472403459250927, + "step": 510 + }, + { + "ce_ib": 15.0967378616333, + "ce_orig": 1.2958650588989258, + "epoch": 0.1466676252785966, + "kl_loss": 1.786908745765686, + "loss_ib": 0.032965827733278275, + "step": 510 + }, + { + "ce_ib": 17.36980438232422, + "ce_orig": 1.9791719913482666, + "epoch": 0.1466676252785966, + "kl_loss": 1.8581990003585815, + "loss_ib": 0.03595179319381714, + "step": 510 + }, + { + "ce_ib": 12.289637565612793, + "ce_orig": 0.7127004265785217, + "epoch": 0.1466676252785966, + "kl_loss": 1.685407280921936, + "loss_ib": 0.02914370968937874, + "step": 510 + }, + { + "ce_ib": 15.004842758178711, + "ce_orig": 1.406548023223877, + "epoch": 0.14695520885757424, + "kl_loss": 1.5896825790405273, + "loss_ib": 0.030901670455932617, + "step": 511 + }, + { + "ce_ib": 10.542769432067871, + "ce_orig": 0.7553672790527344, + "epoch": 0.14695520885757424, + "kl_loss": 1.7919988632202148, + "loss_ib": 0.028462758287787437, + "step": 511 + }, + { + "ce_ib": 13.622482299804688, + "ce_orig": 1.207612156867981, + "epoch": 0.14695520885757424, + "kl_loss": 1.765162706375122, + "loss_ib": 0.03127410635352135, + "step": 511 + }, + { + "ce_ib": 8.20363998413086, + "ce_orig": 0.6386350989341736, + "epoch": 0.14695520885757424, + "kl_loss": 1.5854518413543701, + "loss_ib": 0.024058157578110695, + "step": 511 + }, + { + "ce_ib": 13.55444622039795, + "ce_orig": 1.339708924293518, + "epoch": 0.14724279243655186, + "kl_loss": 1.6126033067703247, + "loss_ib": 0.029680481180548668, + "step": 512 + }, + { + "ce_ib": 8.628886222839355, + "ce_orig": 0.5619939565658569, + "epoch": 0.14724279243655186, + "kl_loss": 1.6175487041473389, + "loss_ib": 0.02480437234044075, + "step": 512 + }, + { + "ce_ib": 10.747875213623047, + "ce_orig": 0.8628310561180115, + "epoch": 0.14724279243655186, + "kl_loss": 1.5580902099609375, + "loss_ib": 0.02632877789437771, + "step": 512 + }, + { + "ce_ib": 11.197033882141113, + "ce_orig": 0.588740348815918, + "epoch": 0.14724279243655186, + "kl_loss": 1.625337839126587, + "loss_ib": 0.02745041251182556, + "step": 512 + }, + { + "ce_ib": 17.5482177734375, + "ce_orig": 1.566094160079956, + "epoch": 0.1475303760155295, + "kl_loss": 1.5798025131225586, + "loss_ib": 0.03334624320268631, + "step": 513 + }, + { + "ce_ib": 9.978529930114746, + "ce_orig": 0.9000970125198364, + "epoch": 0.1475303760155295, + "kl_loss": 1.5787134170532227, + "loss_ib": 0.025765664875507355, + "step": 513 + }, + { + "ce_ib": 7.795269012451172, + "ce_orig": 0.5428386926651001, + "epoch": 0.1475303760155295, + "kl_loss": 1.6383824348449707, + "loss_ib": 0.024179093539714813, + "step": 513 + }, + { + "ce_ib": 8.96078109741211, + "ce_orig": 0.45578211545944214, + "epoch": 0.1475303760155295, + "kl_loss": 1.5609797239303589, + "loss_ib": 0.024570578709244728, + "step": 513 + }, + { + "ce_ib": 14.166945457458496, + "ce_orig": 1.237365484237671, + "epoch": 0.14781795959450716, + "kl_loss": 1.5814361572265625, + "loss_ib": 0.029981307685375214, + "step": 514 + }, + { + "ce_ib": 15.119035720825195, + "ce_orig": 1.146514654159546, + "epoch": 0.14781795959450716, + "kl_loss": 1.787841796875, + "loss_ib": 0.032997455447912216, + "step": 514 + }, + { + "ce_ib": 11.368758201599121, + "ce_orig": 1.2245467901229858, + "epoch": 0.14781795959450716, + "kl_loss": 1.763063669204712, + "loss_ib": 0.028999393805861473, + "step": 514 + }, + { + "ce_ib": 10.331769943237305, + "ce_orig": 0.8155557513237, + "epoch": 0.14781795959450716, + "kl_loss": 1.5401368141174316, + "loss_ib": 0.025733135640621185, + "step": 514 + }, + { + "epoch": 0.1481055431734848, + "grad_norm": 0.10760627686977386, + "learning_rate": 9.990841690269293e-06, + "loss": 0.9116, + "step": 515 + }, + { + "ce_ib": 9.445401191711426, + "ce_orig": 0.5457524061203003, + "epoch": 0.1481055431734848, + "kl_loss": 1.5743253231048584, + "loss_ib": 0.02518865466117859, + "step": 515 + }, + { + "ce_ib": 12.43077278137207, + "ce_orig": 1.0758410692214966, + "epoch": 0.1481055431734848, + "kl_loss": 1.5002497434616089, + "loss_ib": 0.02743327058851719, + "step": 515 + }, + { + "ce_ib": 9.92260456085205, + "ce_orig": 0.9019168615341187, + "epoch": 0.1481055431734848, + "kl_loss": 1.5244628190994263, + "loss_ib": 0.025167230516672134, + "step": 515 + }, + { + "ce_ib": 10.745101928710938, + "ce_orig": 0.7567242980003357, + "epoch": 0.1481055431734848, + "kl_loss": 1.851075530052185, + "loss_ib": 0.029255857691168785, + "step": 515 + }, + { + "ce_ib": 11.868610382080078, + "ce_orig": 1.2269304990768433, + "epoch": 0.14839312675246244, + "kl_loss": 1.5262444019317627, + "loss_ib": 0.027131054550409317, + "step": 516 + }, + { + "ce_ib": 12.609333992004395, + "ce_orig": 1.2865411043167114, + "epoch": 0.14839312675246244, + "kl_loss": 1.5142364501953125, + "loss_ib": 0.027751697227358818, + "step": 516 + }, + { + "ce_ib": 15.365309715270996, + "ce_orig": 1.3366944789886475, + "epoch": 0.14839312675246244, + "kl_loss": 1.5448601245880127, + "loss_ib": 0.03081391006708145, + "step": 516 + }, + { + "ce_ib": 7.554969787597656, + "ce_orig": 0.7058290243148804, + "epoch": 0.14839312675246244, + "kl_loss": 1.5178661346435547, + "loss_ib": 0.02273363061249256, + "step": 516 + }, + { + "ce_ib": 8.25823974609375, + "ce_orig": 0.839113175868988, + "epoch": 0.14868071033144006, + "kl_loss": 1.557971477508545, + "loss_ib": 0.023837953805923462, + "step": 517 + }, + { + "ce_ib": 12.297826766967773, + "ce_orig": 1.190186619758606, + "epoch": 0.14868071033144006, + "kl_loss": 1.658672571182251, + "loss_ib": 0.02888455241918564, + "step": 517 + }, + { + "ce_ib": 13.659313201904297, + "ce_orig": 1.3207685947418213, + "epoch": 0.14868071033144006, + "kl_loss": 1.6122772693634033, + "loss_ib": 0.029782084748148918, + "step": 517 + }, + { + "ce_ib": 12.351545333862305, + "ce_orig": 0.750295877456665, + "epoch": 0.14868071033144006, + "kl_loss": 1.6051900386810303, + "loss_ib": 0.028403444215655327, + "step": 517 + }, + { + "ce_ib": 11.500561714172363, + "ce_orig": 0.8358568549156189, + "epoch": 0.14896829391041772, + "kl_loss": 1.5413665771484375, + "loss_ib": 0.02691422961652279, + "step": 518 + }, + { + "ce_ib": 9.108760833740234, + "ce_orig": 0.8471581339836121, + "epoch": 0.14896829391041772, + "kl_loss": 1.4837156534194946, + "loss_ib": 0.023945918306708336, + "step": 518 + }, + { + "ce_ib": 11.258501052856445, + "ce_orig": 0.804084300994873, + "epoch": 0.14896829391041772, + "kl_loss": 1.5666208267211914, + "loss_ib": 0.026924708858132362, + "step": 518 + }, + { + "ce_ib": 13.515926361083984, + "ce_orig": 0.8296307325363159, + "epoch": 0.14896829391041772, + "kl_loss": 1.5592403411865234, + "loss_ib": 0.029108328744769096, + "step": 518 + }, + { + "ce_ib": 13.376294136047363, + "ce_orig": 1.156392216682434, + "epoch": 0.14925587748939537, + "kl_loss": 1.6085054874420166, + "loss_ib": 0.029461350291967392, + "step": 519 + }, + { + "ce_ib": 9.568916320800781, + "ce_orig": 0.4749041795730591, + "epoch": 0.14925587748939537, + "kl_loss": 1.536426305770874, + "loss_ib": 0.02493317984044552, + "step": 519 + }, + { + "ce_ib": 16.30808448791504, + "ce_orig": 1.5859891176223755, + "epoch": 0.14925587748939537, + "kl_loss": 1.6146225929260254, + "loss_ib": 0.03245430812239647, + "step": 519 + }, + { + "ce_ib": 7.4144062995910645, + "ce_orig": 0.6065052151679993, + "epoch": 0.14925587748939537, + "kl_loss": 1.601442575454712, + "loss_ib": 0.02342883124947548, + "step": 519 + }, + { + "epoch": 0.149543461068373, + "grad_norm": 0.1481025665998459, + "learning_rate": 9.990366164928538e-06, + "loss": 0.8984, + "step": 520 + }, + { + "ce_ib": 15.21028995513916, + "ce_orig": 1.826915979385376, + "epoch": 0.149543461068373, + "kl_loss": 1.557509422302246, + "loss_ib": 0.03078538365662098, + "step": 520 + }, + { + "ce_ib": 10.92094612121582, + "ce_orig": 0.6996050477027893, + "epoch": 0.149543461068373, + "kl_loss": 1.522399663925171, + "loss_ib": 0.02614494226872921, + "step": 520 + }, + { + "ce_ib": 11.630789756774902, + "ce_orig": 0.5110880136489868, + "epoch": 0.149543461068373, + "kl_loss": 1.6356468200683594, + "loss_ib": 0.02798725850880146, + "step": 520 + }, + { + "ce_ib": 13.426923751831055, + "ce_orig": 0.7888133525848389, + "epoch": 0.149543461068373, + "kl_loss": 1.5482978820800781, + "loss_ib": 0.028909901157021523, + "step": 520 + }, + { + "ce_ib": 9.767127990722656, + "ce_orig": 0.9814274907112122, + "epoch": 0.14983104464735064, + "kl_loss": 1.706017255783081, + "loss_ib": 0.02682730183005333, + "step": 521 + }, + { + "ce_ib": 8.689159393310547, + "ce_orig": 0.5986825823783875, + "epoch": 0.14983104464735064, + "kl_loss": 1.5086462497711182, + "loss_ib": 0.023775622248649597, + "step": 521 + }, + { + "ce_ib": 7.830185890197754, + "ce_orig": 0.6796808242797852, + "epoch": 0.14983104464735064, + "kl_loss": 1.5338833332061768, + "loss_ib": 0.02316901832818985, + "step": 521 + }, + { + "ce_ib": 8.914192199707031, + "ce_orig": 0.5978474617004395, + "epoch": 0.14983104464735064, + "kl_loss": 1.5090875625610352, + "loss_ib": 0.024005066603422165, + "step": 521 + }, + { + "ce_ib": 11.189813613891602, + "ce_orig": 0.7912343144416809, + "epoch": 0.15011862822632827, + "kl_loss": 1.486729383468628, + "loss_ib": 0.026057107374072075, + "step": 522 + }, + { + "ce_ib": 12.931374549865723, + "ce_orig": 1.4291115999221802, + "epoch": 0.15011862822632827, + "kl_loss": 1.575240135192871, + "loss_ib": 0.028683776035904884, + "step": 522 + }, + { + "ce_ib": 6.15507173538208, + "ce_orig": 0.567773163318634, + "epoch": 0.15011862822632827, + "kl_loss": 1.5185538530349731, + "loss_ib": 0.021340610459446907, + "step": 522 + }, + { + "ce_ib": 13.948975563049316, + "ce_orig": 0.893979012966156, + "epoch": 0.15011862822632827, + "kl_loss": 1.5725568532943726, + "loss_ib": 0.02967454306781292, + "step": 522 + }, + { + "ce_ib": 14.437583923339844, + "ce_orig": 1.6611443758010864, + "epoch": 0.15040621180530592, + "kl_loss": 1.545514702796936, + "loss_ib": 0.029892729595303535, + "step": 523 + }, + { + "ce_ib": 12.710461616516113, + "ce_orig": 0.8755899667739868, + "epoch": 0.15040621180530592, + "kl_loss": 1.5674240589141846, + "loss_ib": 0.028384702280163765, + "step": 523 + }, + { + "ce_ib": 8.738030433654785, + "ce_orig": 0.8012534976005554, + "epoch": 0.15040621180530592, + "kl_loss": 1.5125902891159058, + "loss_ib": 0.023863932117819786, + "step": 523 + }, + { + "ce_ib": 11.891736030578613, + "ce_orig": 1.0337281227111816, + "epoch": 0.15040621180530592, + "kl_loss": 1.5082712173461914, + "loss_ib": 0.026974448934197426, + "step": 523 + }, + { + "ce_ib": 11.432838439941406, + "ce_orig": 0.8564993739128113, + "epoch": 0.15069379538428357, + "kl_loss": 1.4956270456314087, + "loss_ib": 0.02638910710811615, + "step": 524 + }, + { + "ce_ib": 6.101011276245117, + "ce_orig": 0.4317745864391327, + "epoch": 0.15069379538428357, + "kl_loss": 1.626281499862671, + "loss_ib": 0.022363826632499695, + "step": 524 + }, + { + "ce_ib": 11.570913314819336, + "ce_orig": 0.5999628901481628, + "epoch": 0.15069379538428357, + "kl_loss": 1.573062539100647, + "loss_ib": 0.027301540598273277, + "step": 524 + }, + { + "ce_ib": 10.142786979675293, + "ce_orig": 0.8973500728607178, + "epoch": 0.15069379538428357, + "kl_loss": 1.529207468032837, + "loss_ib": 0.025434860959649086, + "step": 524 + }, + { + "epoch": 0.1509813789632612, + "grad_norm": 0.09314551949501038, + "learning_rate": 9.989878616637401e-06, + "loss": 0.9524, + "step": 525 + }, + { + "ce_ib": 17.579530715942383, + "ce_orig": 1.8244338035583496, + "epoch": 0.1509813789632612, + "kl_loss": 1.5392405986785889, + "loss_ib": 0.032971933484077454, + "step": 525 + }, + { + "ce_ib": 11.251588821411133, + "ce_orig": 0.863845705986023, + "epoch": 0.1509813789632612, + "kl_loss": 1.5278338193893433, + "loss_ib": 0.02652992680668831, + "step": 525 + }, + { + "ce_ib": 8.327178955078125, + "ce_orig": 0.787936806678772, + "epoch": 0.1509813789632612, + "kl_loss": 1.545323371887207, + "loss_ib": 0.02378041297197342, + "step": 525 + }, + { + "ce_ib": 10.291125297546387, + "ce_orig": 0.7874522805213928, + "epoch": 0.1509813789632612, + "kl_loss": 1.4875431060791016, + "loss_ib": 0.025166556239128113, + "step": 525 + }, + { + "ce_ib": 12.645198822021484, + "ce_orig": 0.6515507698059082, + "epoch": 0.15126896254223884, + "kl_loss": 1.5123183727264404, + "loss_ib": 0.02776838280260563, + "step": 526 + }, + { + "ce_ib": 10.556811332702637, + "ce_orig": 1.057904601097107, + "epoch": 0.15126896254223884, + "kl_loss": 1.5609617233276367, + "loss_ib": 0.026166429743170738, + "step": 526 + }, + { + "ce_ib": 10.272709846496582, + "ce_orig": 0.8701647520065308, + "epoch": 0.15126896254223884, + "kl_loss": 1.559139370918274, + "loss_ib": 0.02586410380899906, + "step": 526 + }, + { + "ce_ib": 9.878427505493164, + "ce_orig": 0.654448926448822, + "epoch": 0.15126896254223884, + "kl_loss": 1.5793863534927368, + "loss_ib": 0.025672290474176407, + "step": 526 + }, + { + "ce_ib": 10.595402717590332, + "ce_orig": 0.7197730541229248, + "epoch": 0.15155654612121647, + "kl_loss": 1.4838206768035889, + "loss_ib": 0.025433609262108803, + "step": 527 + }, + { + "ce_ib": 8.086220741271973, + "ce_orig": 0.7310401797294617, + "epoch": 0.15155654612121647, + "kl_loss": 1.4655554294586182, + "loss_ib": 0.022741774097085, + "step": 527 + }, + { + "ce_ib": 7.985743522644043, + "ce_orig": 0.873805582523346, + "epoch": 0.15155654612121647, + "kl_loss": 1.5185062885284424, + "loss_ib": 0.02317080646753311, + "step": 527 + }, + { + "ce_ib": 9.111749649047852, + "ce_orig": 0.605055034160614, + "epoch": 0.15155654612121647, + "kl_loss": 1.5961281061172485, + "loss_ib": 0.025073029100894928, + "step": 527 + }, + { + "ce_ib": 13.70055103302002, + "ce_orig": 1.3269081115722656, + "epoch": 0.15184412970019412, + "kl_loss": 1.5185916423797607, + "loss_ib": 0.0288864653557539, + "step": 528 + }, + { + "ce_ib": 12.714378356933594, + "ce_orig": 0.890455424785614, + "epoch": 0.15184412970019412, + "kl_loss": 1.547227382659912, + "loss_ib": 0.028186652809381485, + "step": 528 + }, + { + "ce_ib": 13.768203735351562, + "ce_orig": 0.5043600797653198, + "epoch": 0.15184412970019412, + "kl_loss": 1.6048271656036377, + "loss_ib": 0.02981647476553917, + "step": 528 + }, + { + "ce_ib": 9.12528133392334, + "ce_orig": 0.8668175339698792, + "epoch": 0.15184412970019412, + "kl_loss": 1.5656447410583496, + "loss_ib": 0.02478172816336155, + "step": 528 + }, + { + "ce_ib": 7.842939376831055, + "ce_orig": 0.6462977528572083, + "epoch": 0.15213171327917177, + "kl_loss": 1.4568753242492676, + "loss_ib": 0.022411691024899483, + "step": 529 + }, + { + "ce_ib": 8.972084999084473, + "ce_orig": 0.5574026703834534, + "epoch": 0.15213171327917177, + "kl_loss": 1.5518778562545776, + "loss_ib": 0.02449086308479309, + "step": 529 + }, + { + "ce_ib": 13.132000923156738, + "ce_orig": 1.5121755599975586, + "epoch": 0.15213171327917177, + "kl_loss": 1.761476993560791, + "loss_ib": 0.030746769160032272, + "step": 529 + }, + { + "ce_ib": 10.317779541015625, + "ce_orig": 0.7324342131614685, + "epoch": 0.15213171327917177, + "kl_loss": 1.5073950290679932, + "loss_ib": 0.025391731411218643, + "step": 529 + }, + { + "epoch": 0.1524192968581494, + "grad_norm": 0.09296334534883499, + "learning_rate": 9.989379046570502e-06, + "loss": 0.9041, + "step": 530 + }, + { + "ce_ib": 14.723713874816895, + "ce_orig": 1.1431795358657837, + "epoch": 0.1524192968581494, + "kl_loss": 1.4883091449737549, + "loss_ib": 0.029606804251670837, + "step": 530 + }, + { + "ce_ib": 9.255940437316895, + "ce_orig": 1.317234992980957, + "epoch": 0.1524192968581494, + "kl_loss": 1.508230209350586, + "loss_ib": 0.024338241666555405, + "step": 530 + }, + { + "ce_ib": 10.745719909667969, + "ce_orig": 0.7085793614387512, + "epoch": 0.1524192968581494, + "kl_loss": 1.5349406003952026, + "loss_ib": 0.026095125824213028, + "step": 530 + }, + { + "ce_ib": 11.930793762207031, + "ce_orig": 0.7779906392097473, + "epoch": 0.1524192968581494, + "kl_loss": 1.449808955192566, + "loss_ib": 0.026428882032632828, + "step": 530 + }, + { + "ce_ib": 9.066697120666504, + "ce_orig": 0.8550069332122803, + "epoch": 0.15270688043712705, + "kl_loss": 1.448561191558838, + "loss_ib": 0.02355230785906315, + "step": 531 + }, + { + "ce_ib": 14.419188499450684, + "ce_orig": 0.8470748066902161, + "epoch": 0.15270688043712705, + "kl_loss": 1.476117730140686, + "loss_ib": 0.029180364683270454, + "step": 531 + }, + { + "ce_ib": 4.3977861404418945, + "ce_orig": 0.16003404557704926, + "epoch": 0.15270688043712705, + "kl_loss": 1.4080500602722168, + "loss_ib": 0.018478285521268845, + "step": 531 + }, + { + "ce_ib": 8.737853050231934, + "ce_orig": 0.8578985929489136, + "epoch": 0.15270688043712705, + "kl_loss": 1.5061912536621094, + "loss_ib": 0.02379976399242878, + "step": 531 + }, + { + "ce_ib": 13.226619720458984, + "ce_orig": 1.234938621520996, + "epoch": 0.15299446401610467, + "kl_loss": 1.5700280666351318, + "loss_ib": 0.028926901519298553, + "step": 532 + }, + { + "ce_ib": 13.38469123840332, + "ce_orig": 1.4244154691696167, + "epoch": 0.15299446401610467, + "kl_loss": 1.483577013015747, + "loss_ib": 0.028220461681485176, + "step": 532 + }, + { + "ce_ib": 10.670931816101074, + "ce_orig": 1.0447449684143066, + "epoch": 0.15299446401610467, + "kl_loss": 1.472721815109253, + "loss_ib": 0.025398148223757744, + "step": 532 + }, + { + "ce_ib": 9.630074501037598, + "ce_orig": 0.4802638292312622, + "epoch": 0.15299446401610467, + "kl_loss": 1.4675498008728027, + "loss_ib": 0.024305572733283043, + "step": 532 + }, + { + "ce_ib": 9.232088088989258, + "ce_orig": 0.6876621246337891, + "epoch": 0.15328204759508232, + "kl_loss": 1.5124475955963135, + "loss_ib": 0.02435656450688839, + "step": 533 + }, + { + "ce_ib": 9.646381378173828, + "ce_orig": 0.6899409890174866, + "epoch": 0.15328204759508232, + "kl_loss": 1.4822652339935303, + "loss_ib": 0.024469034746289253, + "step": 533 + }, + { + "ce_ib": 12.952717781066895, + "ce_orig": 1.2678933143615723, + "epoch": 0.15328204759508232, + "kl_loss": 1.5268654823303223, + "loss_ib": 0.02822137251496315, + "step": 533 + }, + { + "ce_ib": 12.745079040527344, + "ce_orig": 0.4462144672870636, + "epoch": 0.15328204759508232, + "kl_loss": 1.4826654195785522, + "loss_ib": 0.02757173217833042, + "step": 533 + }, + { + "ce_ib": 11.242120742797852, + "ce_orig": 0.727728009223938, + "epoch": 0.15356963117405997, + "kl_loss": 1.4587228298187256, + "loss_ib": 0.02582934871315956, + "step": 534 + }, + { + "ce_ib": 13.557268142700195, + "ce_orig": 1.029449701309204, + "epoch": 0.15356963117405997, + "kl_loss": 1.5601624250411987, + "loss_ib": 0.02915889210999012, + "step": 534 + }, + { + "ce_ib": 12.117486000061035, + "ce_orig": 1.2025965452194214, + "epoch": 0.15356963117405997, + "kl_loss": 1.4851754903793335, + "loss_ib": 0.026969240978360176, + "step": 534 + }, + { + "ce_ib": 11.584373474121094, + "ce_orig": 1.0777106285095215, + "epoch": 0.15356963117405997, + "kl_loss": 1.4760735034942627, + "loss_ib": 0.026345109567046165, + "step": 534 + }, + { + "epoch": 0.1538572147530376, + "grad_norm": 0.08552956581115723, + "learning_rate": 9.988867455931422e-06, + "loss": 0.9482, + "step": 535 + }, + { + "ce_ib": 15.782403945922852, + "ce_orig": 1.24473237991333, + "epoch": 0.1538572147530376, + "kl_loss": 1.498823881149292, + "loss_ib": 0.030770642682909966, + "step": 535 + }, + { + "ce_ib": 11.243325233459473, + "ce_orig": 0.5300117135047913, + "epoch": 0.1538572147530376, + "kl_loss": 1.634958028793335, + "loss_ib": 0.02759290672838688, + "step": 535 + }, + { + "ce_ib": 10.156462669372559, + "ce_orig": 0.8086475133895874, + "epoch": 0.1538572147530376, + "kl_loss": 1.5278410911560059, + "loss_ib": 0.02543487399816513, + "step": 535 + }, + { + "ce_ib": 8.719620704650879, + "ce_orig": 0.7736819386482239, + "epoch": 0.1538572147530376, + "kl_loss": 1.4572741985321045, + "loss_ib": 0.023292362689971924, + "step": 535 + }, + { + "ce_ib": 11.448285102844238, + "ce_orig": 1.3073540925979614, + "epoch": 0.15414479833201525, + "kl_loss": 1.468369722366333, + "loss_ib": 0.02613198198378086, + "step": 536 + }, + { + "ce_ib": 10.234390258789062, + "ce_orig": 0.6196459531784058, + "epoch": 0.15414479833201525, + "kl_loss": 1.4478919506072998, + "loss_ib": 0.024713311344385147, + "step": 536 + }, + { + "ce_ib": 8.465949058532715, + "ce_orig": 0.5101594924926758, + "epoch": 0.15414479833201525, + "kl_loss": 1.4375674724578857, + "loss_ib": 0.022841624915599823, + "step": 536 + }, + { + "ce_ib": 14.232527732849121, + "ce_orig": 0.9877519011497498, + "epoch": 0.15414479833201525, + "kl_loss": 1.5590825080871582, + "loss_ib": 0.02982335351407528, + "step": 536 + }, + { + "ce_ib": 13.802165031433105, + "ce_orig": 1.1603584289550781, + "epoch": 0.15443238191099287, + "kl_loss": 1.4649747610092163, + "loss_ib": 0.028451912105083466, + "step": 537 + }, + { + "ce_ib": 9.437994956970215, + "ce_orig": 1.0976390838623047, + "epoch": 0.15443238191099287, + "kl_loss": 1.453848123550415, + "loss_ib": 0.02397647500038147, + "step": 537 + }, + { + "ce_ib": 14.96358585357666, + "ce_orig": 1.2715431451797485, + "epoch": 0.15443238191099287, + "kl_loss": 1.4742491245269775, + "loss_ib": 0.0297060776501894, + "step": 537 + }, + { + "ce_ib": 13.275339126586914, + "ce_orig": 1.3510757684707642, + "epoch": 0.15443238191099287, + "kl_loss": 1.4458937644958496, + "loss_ib": 0.02773427590727806, + "step": 537 + }, + { + "ce_ib": 12.93941879272461, + "ce_orig": 1.3727543354034424, + "epoch": 0.15471996548997052, + "kl_loss": 1.466170310974121, + "loss_ib": 0.027601122856140137, + "step": 538 + }, + { + "ce_ib": 19.104490280151367, + "ce_orig": 2.0155956745147705, + "epoch": 0.15471996548997052, + "kl_loss": 1.4700965881347656, + "loss_ib": 0.03380545601248741, + "step": 538 + }, + { + "ce_ib": 9.300647735595703, + "ce_orig": 1.1129015684127808, + "epoch": 0.15471996548997052, + "kl_loss": 1.4137213230133057, + "loss_ib": 0.02343786135315895, + "step": 538 + }, + { + "ce_ib": 9.72518253326416, + "ce_orig": 1.0089741945266724, + "epoch": 0.15471996548997052, + "kl_loss": 1.412247657775879, + "loss_ib": 0.023847658187150955, + "step": 538 + }, + { + "ce_ib": 11.109746932983398, + "ce_orig": 0.6238597631454468, + "epoch": 0.15500754906894817, + "kl_loss": 1.424318552017212, + "loss_ib": 0.025352930650115013, + "step": 539 + }, + { + "ce_ib": 12.226134300231934, + "ce_orig": 0.7648814916610718, + "epoch": 0.15500754906894817, + "kl_loss": 1.5073215961456299, + "loss_ib": 0.02729935199022293, + "step": 539 + }, + { + "ce_ib": 10.58513069152832, + "ce_orig": 0.5339838862419128, + "epoch": 0.15500754906894817, + "kl_loss": 1.5678870677947998, + "loss_ib": 0.026263998821377754, + "step": 539 + }, + { + "ce_ib": 12.42918586730957, + "ce_orig": 0.8719852566719055, + "epoch": 0.15500754906894817, + "kl_loss": 1.4344046115875244, + "loss_ib": 0.026773232966661453, + "step": 539 + }, + { + "epoch": 0.1552951326479258, + "grad_norm": 0.08646312355995178, + "learning_rate": 9.988343845952697e-06, + "loss": 0.9388, + "step": 540 + }, + { + "ce_ib": 14.183638572692871, + "ce_orig": 0.9510587453842163, + "epoch": 0.1552951326479258, + "kl_loss": 1.4567286968231201, + "loss_ib": 0.02875092439353466, + "step": 540 + }, + { + "ce_ib": 14.285439491271973, + "ce_orig": 1.5962088108062744, + "epoch": 0.1552951326479258, + "kl_loss": 1.4375842809677124, + "loss_ib": 0.028661280870437622, + "step": 540 + }, + { + "ce_ib": 8.6818265914917, + "ce_orig": 0.9919387698173523, + "epoch": 0.1552951326479258, + "kl_loss": 1.4143463373184204, + "loss_ib": 0.02282528765499592, + "step": 540 + }, + { + "ce_ib": 11.165204048156738, + "ce_orig": 0.8994119763374329, + "epoch": 0.1552951326479258, + "kl_loss": 1.5460944175720215, + "loss_ib": 0.02662614732980728, + "step": 540 + }, + { + "ce_ib": 11.784940719604492, + "ce_orig": 0.982570469379425, + "epoch": 0.15558271622690345, + "kl_loss": 1.4393483400344849, + "loss_ib": 0.026178423315286636, + "step": 541 + }, + { + "ce_ib": 11.36942195892334, + "ce_orig": 0.8527225255966187, + "epoch": 0.15558271622690345, + "kl_loss": 1.5223984718322754, + "loss_ib": 0.02659340761601925, + "step": 541 + }, + { + "ce_ib": 12.205092430114746, + "ce_orig": 0.6624218225479126, + "epoch": 0.15558271622690345, + "kl_loss": 1.471164584159851, + "loss_ib": 0.026916736736893654, + "step": 541 + }, + { + "ce_ib": 9.685622215270996, + "ce_orig": 0.652384877204895, + "epoch": 0.15558271622690345, + "kl_loss": 1.4571822881698608, + "loss_ib": 0.024257445707917213, + "step": 541 + }, + { + "ce_ib": 8.85888385772705, + "ce_orig": 0.6606395244598389, + "epoch": 0.15587029980588107, + "kl_loss": 1.4010136127471924, + "loss_ib": 0.022869018837809563, + "step": 542 + }, + { + "ce_ib": 13.208836555480957, + "ce_orig": 1.2833889722824097, + "epoch": 0.15587029980588107, + "kl_loss": 1.411513328552246, + "loss_ib": 0.027323970571160316, + "step": 542 + }, + { + "ce_ib": 10.047677040100098, + "ce_orig": 0.7564672827720642, + "epoch": 0.15587029980588107, + "kl_loss": 1.4649560451507568, + "loss_ib": 0.024697236716747284, + "step": 542 + }, + { + "ce_ib": 8.681495666503906, + "ce_orig": 0.7242369651794434, + "epoch": 0.15587029980588107, + "kl_loss": 1.3844184875488281, + "loss_ib": 0.02252567932009697, + "step": 542 + }, + { + "ce_ib": 9.65349292755127, + "ce_orig": 0.8696082830429077, + "epoch": 0.15615788338485873, + "kl_loss": 1.4093207120895386, + "loss_ib": 0.02374669909477234, + "step": 543 + }, + { + "ce_ib": 13.341421127319336, + "ce_orig": 1.152627944946289, + "epoch": 0.15615788338485873, + "kl_loss": 1.4343910217285156, + "loss_ib": 0.02768533118069172, + "step": 543 + }, + { + "ce_ib": 10.65963363647461, + "ce_orig": 0.5063934326171875, + "epoch": 0.15615788338485873, + "kl_loss": 1.4016281366348267, + "loss_ib": 0.024675915017724037, + "step": 543 + }, + { + "ce_ib": 11.079456329345703, + "ce_orig": 1.1200248003005981, + "epoch": 0.15615788338485873, + "kl_loss": 1.400475263595581, + "loss_ib": 0.025084208697080612, + "step": 543 + }, + { + "ce_ib": 10.452417373657227, + "ce_orig": 0.4226267635822296, + "epoch": 0.15644546696383638, + "kl_loss": 1.4564342498779297, + "loss_ib": 0.02501676045358181, + "step": 544 + }, + { + "ce_ib": 9.227188110351562, + "ce_orig": 0.3644406199455261, + "epoch": 0.15644546696383638, + "kl_loss": 1.4034581184387207, + "loss_ib": 0.023261768743395805, + "step": 544 + }, + { + "ce_ib": 12.85843276977539, + "ce_orig": 1.260372519493103, + "epoch": 0.15644546696383638, + "kl_loss": 1.4097330570220947, + "loss_ib": 0.02695576101541519, + "step": 544 + }, + { + "ce_ib": 15.890623092651367, + "ce_orig": 1.9688998460769653, + "epoch": 0.15644546696383638, + "kl_loss": 1.469724178314209, + "loss_ib": 0.03058786317706108, + "step": 544 + }, + { + "epoch": 0.156733050542814, + "grad_norm": 0.09022902697324753, + "learning_rate": 9.987808217895829e-06, + "loss": 0.9285, + "step": 545 + }, + { + "ce_ib": 8.582206726074219, + "ce_orig": 0.5587666630744934, + "epoch": 0.156733050542814, + "kl_loss": 1.3718297481536865, + "loss_ib": 0.02230050601065159, + "step": 545 + }, + { + "ce_ib": 15.280888557434082, + "ce_orig": 1.1607708930969238, + "epoch": 0.156733050542814, + "kl_loss": 1.4650864601135254, + "loss_ib": 0.029931753873825073, + "step": 545 + }, + { + "ce_ib": 13.70768928527832, + "ce_orig": 1.192724585533142, + "epoch": 0.156733050542814, + "kl_loss": 1.49911367893219, + "loss_ib": 0.02869882434606552, + "step": 545 + }, + { + "ce_ib": 12.85840129852295, + "ce_orig": 1.2321618795394897, + "epoch": 0.156733050542814, + "kl_loss": 1.4133646488189697, + "loss_ib": 0.02699204906821251, + "step": 545 + }, + { + "ce_ib": 10.931328773498535, + "ce_orig": 1.0996520519256592, + "epoch": 0.15702063412179165, + "kl_loss": 1.3952105045318604, + "loss_ib": 0.024883432313799858, + "step": 546 + }, + { + "ce_ib": 11.635273933410645, + "ce_orig": 0.7298911809921265, + "epoch": 0.15702063412179165, + "kl_loss": 1.4759316444396973, + "loss_ib": 0.026394590735435486, + "step": 546 + }, + { + "ce_ib": 9.857783317565918, + "ce_orig": 0.6076138019561768, + "epoch": 0.15702063412179165, + "kl_loss": 1.5050930976867676, + "loss_ib": 0.02490871399641037, + "step": 546 + }, + { + "ce_ib": 11.730413436889648, + "ce_orig": 0.9210866093635559, + "epoch": 0.15702063412179165, + "kl_loss": 1.3834307193756104, + "loss_ib": 0.02556472085416317, + "step": 546 + }, + { + "ce_ib": 9.36272144317627, + "ce_orig": 0.7209946513175964, + "epoch": 0.15730821770076928, + "kl_loss": 1.3859405517578125, + "loss_ib": 0.023222126066684723, + "step": 547 + }, + { + "ce_ib": 10.93961238861084, + "ce_orig": 1.1104698181152344, + "epoch": 0.15730821770076928, + "kl_loss": 1.3689725399017334, + "loss_ib": 0.024629337713122368, + "step": 547 + }, + { + "ce_ib": 10.149394989013672, + "ce_orig": 0.9216436147689819, + "epoch": 0.15730821770076928, + "kl_loss": 1.3667898178100586, + "loss_ib": 0.023817293345928192, + "step": 547 + }, + { + "ce_ib": 9.941133499145508, + "ce_orig": 0.8583170771598816, + "epoch": 0.15730821770076928, + "kl_loss": 1.4201359748840332, + "loss_ib": 0.024142494425177574, + "step": 547 + }, + { + "ce_ib": 13.535919189453125, + "ce_orig": 0.8294936418533325, + "epoch": 0.15759580127974693, + "kl_loss": 1.4394149780273438, + "loss_ib": 0.027930067852139473, + "step": 548 + }, + { + "ce_ib": 6.7044782638549805, + "ce_orig": 0.655543863773346, + "epoch": 0.15759580127974693, + "kl_loss": 1.398592233657837, + "loss_ib": 0.020690400153398514, + "step": 548 + }, + { + "ce_ib": 12.02395248413086, + "ce_orig": 0.5793411731719971, + "epoch": 0.15759580127974693, + "kl_loss": 1.454443335533142, + "loss_ib": 0.02656838670372963, + "step": 548 + }, + { + "ce_ib": 9.43730354309082, + "ce_orig": 0.6028481125831604, + "epoch": 0.15759580127974693, + "kl_loss": 1.3634798526763916, + "loss_ib": 0.023072101175785065, + "step": 548 + }, + { + "ce_ib": 9.443431854248047, + "ce_orig": 0.8150414228439331, + "epoch": 0.15788338485872458, + "kl_loss": 1.3247261047363281, + "loss_ib": 0.02269069105386734, + "step": 549 + }, + { + "ce_ib": 12.465729713439941, + "ce_orig": 0.912677526473999, + "epoch": 0.15788338485872458, + "kl_loss": 1.4468390941619873, + "loss_ib": 0.026934120804071426, + "step": 549 + }, + { + "ce_ib": 11.708540916442871, + "ce_orig": 1.2497539520263672, + "epoch": 0.15788338485872458, + "kl_loss": 1.3749089241027832, + "loss_ib": 0.025457629933953285, + "step": 549 + }, + { + "ce_ib": 5.031269073486328, + "ce_orig": 0.17525199055671692, + "epoch": 0.15788338485872458, + "kl_loss": 1.4064466953277588, + "loss_ib": 0.019095735624432564, + "step": 549 + }, + { + "epoch": 0.1581709684377022, + "grad_norm": 0.09811785817146301, + "learning_rate": 9.987260573051268e-06, + "loss": 0.8876, + "step": 550 + }, + { + "ce_ib": 11.644174575805664, + "ce_orig": 1.3015292882919312, + "epoch": 0.1581709684377022, + "kl_loss": 1.3472862243652344, + "loss_ib": 0.02511703595519066, + "step": 550 + }, + { + "ce_ib": 10.139188766479492, + "ce_orig": 1.1227424144744873, + "epoch": 0.1581709684377022, + "kl_loss": 1.3244848251342773, + "loss_ib": 0.023384036496281624, + "step": 550 + }, + { + "ce_ib": 5.7533979415893555, + "ce_orig": 0.4904988706111908, + "epoch": 0.1581709684377022, + "kl_loss": 1.3460612297058105, + "loss_ib": 0.01921400986611843, + "step": 550 + }, + { + "ce_ib": 14.475028991699219, + "ce_orig": 1.008355736732483, + "epoch": 0.1581709684377022, + "kl_loss": 1.3830070495605469, + "loss_ib": 0.02830510027706623, + "step": 550 + }, + { + "ce_ib": 9.800948143005371, + "ce_orig": 0.5951581001281738, + "epoch": 0.15845855201667985, + "kl_loss": 1.4228395223617554, + "loss_ib": 0.024029342457652092, + "step": 551 + }, + { + "ce_ib": 12.266356468200684, + "ce_orig": 1.480778455734253, + "epoch": 0.15845855201667985, + "kl_loss": 1.3874316215515137, + "loss_ib": 0.026140673086047173, + "step": 551 + }, + { + "ce_ib": 11.24101734161377, + "ce_orig": 0.6378637552261353, + "epoch": 0.15845855201667985, + "kl_loss": 1.3844799995422363, + "loss_ib": 0.0250858161598444, + "step": 551 + }, + { + "ce_ib": 12.70676040649414, + "ce_orig": 1.2595171928405762, + "epoch": 0.15845855201667985, + "kl_loss": 1.3542779684066772, + "loss_ib": 0.026249539107084274, + "step": 551 + }, + { + "ce_ib": 13.323479652404785, + "ce_orig": 1.104166030883789, + "epoch": 0.15874613559565748, + "kl_loss": 1.3231797218322754, + "loss_ib": 0.026555275544524193, + "step": 552 + }, + { + "ce_ib": 8.531795501708984, + "ce_orig": 0.4913962483406067, + "epoch": 0.15874613559565748, + "kl_loss": 1.3631434440612793, + "loss_ib": 0.022163229063153267, + "step": 552 + }, + { + "ce_ib": 12.574892044067383, + "ce_orig": 0.9339185953140259, + "epoch": 0.15874613559565748, + "kl_loss": 1.434931993484497, + "loss_ib": 0.026924211531877518, + "step": 552 + }, + { + "ce_ib": 10.622230529785156, + "ce_orig": 0.9095126390457153, + "epoch": 0.15874613559565748, + "kl_loss": 1.363985538482666, + "loss_ib": 0.0242620836943388, + "step": 552 + }, + { + "ce_ib": 10.206563949584961, + "ce_orig": 0.5735985040664673, + "epoch": 0.15903371917463513, + "kl_loss": 1.4209774732589722, + "loss_ib": 0.02441633865237236, + "step": 553 + }, + { + "ce_ib": 15.614920616149902, + "ce_orig": 1.3737772703170776, + "epoch": 0.15903371917463513, + "kl_loss": 1.4201138019561768, + "loss_ib": 0.02981605939567089, + "step": 553 + }, + { + "ce_ib": 12.950101852416992, + "ce_orig": 0.9557084441184998, + "epoch": 0.15903371917463513, + "kl_loss": 1.4211621284484863, + "loss_ib": 0.027161721140146255, + "step": 553 + }, + { + "ce_ib": 10.123566627502441, + "ce_orig": 0.7820416688919067, + "epoch": 0.15903371917463513, + "kl_loss": 1.3192014694213867, + "loss_ib": 0.023315582424402237, + "step": 553 + }, + { + "ce_ib": 5.311279296875, + "ce_orig": 0.5123794078826904, + "epoch": 0.15932130275361278, + "kl_loss": 1.2463486194610596, + "loss_ib": 0.017774764448404312, + "step": 554 + }, + { + "ce_ib": 10.679170608520508, + "ce_orig": 0.7276657223701477, + "epoch": 0.15932130275361278, + "kl_loss": 1.3026518821716309, + "loss_ib": 0.02370568923652172, + "step": 554 + }, + { + "ce_ib": 13.4666109085083, + "ce_orig": 1.2032169103622437, + "epoch": 0.15932130275361278, + "kl_loss": 1.417797327041626, + "loss_ib": 0.027644583955407143, + "step": 554 + }, + { + "ce_ib": 12.01272201538086, + "ce_orig": 0.9139970541000366, + "epoch": 0.15932130275361278, + "kl_loss": 1.4650115966796875, + "loss_ib": 0.026662837713956833, + "step": 554 + }, + { + "epoch": 0.1596088863325904, + "grad_norm": 0.09588459134101868, + "learning_rate": 9.98670091273842e-06, + "loss": 0.9863, + "step": 555 + }, + { + "ce_ib": 12.853775978088379, + "ce_orig": 0.8478192090988159, + "epoch": 0.1596088863325904, + "kl_loss": 1.337024211883545, + "loss_ib": 0.02622401714324951, + "step": 555 + }, + { + "ce_ib": 9.791227340698242, + "ce_orig": 0.7623945474624634, + "epoch": 0.1596088863325904, + "kl_loss": 1.3745824098587036, + "loss_ib": 0.023537050932645798, + "step": 555 + }, + { + "ce_ib": 11.515276908874512, + "ce_orig": 0.6505551338195801, + "epoch": 0.1596088863325904, + "kl_loss": 1.350890040397644, + "loss_ib": 0.025024177506566048, + "step": 555 + }, + { + "ce_ib": 13.35179328918457, + "ce_orig": 1.0168282985687256, + "epoch": 0.1596088863325904, + "kl_loss": 1.336082100868225, + "loss_ib": 0.026712613180279732, + "step": 555 + }, + { + "ce_ib": 17.056640625, + "ce_orig": 1.6616370677947998, + "epoch": 0.15989646991156806, + "kl_loss": 1.3520833253860474, + "loss_ib": 0.03057747334241867, + "step": 556 + }, + { + "ce_ib": 6.637577056884766, + "ce_orig": 0.5979457497596741, + "epoch": 0.15989646991156806, + "kl_loss": 1.32643723487854, + "loss_ib": 0.019901949912309647, + "step": 556 + }, + { + "ce_ib": 11.517195701599121, + "ce_orig": 1.0731699466705322, + "epoch": 0.15989646991156806, + "kl_loss": 1.3701467514038086, + "loss_ib": 0.0252186618745327, + "step": 556 + }, + { + "ce_ib": 7.839071750640869, + "ce_orig": 0.57491534948349, + "epoch": 0.15989646991156806, + "kl_loss": 1.281550645828247, + "loss_ib": 0.020654577761888504, + "step": 556 + }, + { + "ce_ib": 5.376894950866699, + "ce_orig": 0.27646228671073914, + "epoch": 0.16018405349054568, + "kl_loss": 1.4170054197311401, + "loss_ib": 0.019546950235962868, + "step": 557 + }, + { + "ce_ib": 7.960681915283203, + "ce_orig": 0.8380683064460754, + "epoch": 0.16018405349054568, + "kl_loss": 1.3399286270141602, + "loss_ib": 0.021359967067837715, + "step": 557 + }, + { + "ce_ib": 12.966280937194824, + "ce_orig": 1.0689243078231812, + "epoch": 0.16018405349054568, + "kl_loss": 1.425252914428711, + "loss_ib": 0.027218809351325035, + "step": 557 + }, + { + "ce_ib": 12.531590461730957, + "ce_orig": 1.2268368005752563, + "epoch": 0.16018405349054568, + "kl_loss": 1.3414859771728516, + "loss_ib": 0.025946449488401413, + "step": 557 + }, + { + "ce_ib": 8.23051929473877, + "ce_orig": 0.6497761607170105, + "epoch": 0.16047163706952333, + "kl_loss": 1.3148771524429321, + "loss_ib": 0.021379288285970688, + "step": 558 + }, + { + "ce_ib": 11.831758499145508, + "ce_orig": 1.0878973007202148, + "epoch": 0.16047163706952333, + "kl_loss": 1.344857096672058, + "loss_ib": 0.025280330330133438, + "step": 558 + }, + { + "ce_ib": 10.697997093200684, + "ce_orig": 0.9739592671394348, + "epoch": 0.16047163706952333, + "kl_loss": 1.2617008686065674, + "loss_ib": 0.023315005004405975, + "step": 558 + }, + { + "ce_ib": 10.034689903259277, + "ce_orig": 0.7774488925933838, + "epoch": 0.16047163706952333, + "kl_loss": 1.3792924880981445, + "loss_ib": 0.02382761426270008, + "step": 558 + }, + { + "ce_ib": 8.980086326599121, + "ce_orig": 0.5476792454719543, + "epoch": 0.16075922064850098, + "kl_loss": 1.3168349266052246, + "loss_ib": 0.022148434072732925, + "step": 559 + }, + { + "ce_ib": 7.035679340362549, + "ce_orig": 0.627990186214447, + "epoch": 0.16075922064850098, + "kl_loss": 1.3569344282150269, + "loss_ib": 0.020605022087693214, + "step": 559 + }, + { + "ce_ib": 12.099848747253418, + "ce_orig": 1.201551914215088, + "epoch": 0.16075922064850098, + "kl_loss": 1.2747890949249268, + "loss_ib": 0.024847740307450294, + "step": 559 + }, + { + "ce_ib": 8.865999221801758, + "ce_orig": 0.7412122488021851, + "epoch": 0.16075922064850098, + "kl_loss": 1.4771380424499512, + "loss_ib": 0.023637380450963974, + "step": 559 + }, + { + "epoch": 0.1610468042274786, + "grad_norm": 0.0971461683511734, + "learning_rate": 9.986129238305635e-06, + "loss": 0.8747, + "step": 560 + }, + { + "ce_ib": 7.6997246742248535, + "ce_orig": 0.6025680303573608, + "epoch": 0.1610468042274786, + "kl_loss": 1.276071310043335, + "loss_ib": 0.02046043798327446, + "step": 560 + }, + { + "ce_ib": 7.523832321166992, + "ce_orig": 0.7658670544624329, + "epoch": 0.1610468042274786, + "kl_loss": 1.4311625957489014, + "loss_ib": 0.021835457533597946, + "step": 560 + }, + { + "ce_ib": 11.750297546386719, + "ce_orig": 0.4812588095664978, + "epoch": 0.1610468042274786, + "kl_loss": 1.3406429290771484, + "loss_ib": 0.02515672706067562, + "step": 560 + }, + { + "ce_ib": 10.141862869262695, + "ce_orig": 0.8624674081802368, + "epoch": 0.1610468042274786, + "kl_loss": 1.345240831375122, + "loss_ib": 0.023594269528985023, + "step": 560 + }, + { + "ce_ib": 9.809609413146973, + "ce_orig": 0.9545562863349915, + "epoch": 0.16133438780645626, + "kl_loss": 1.27205228805542, + "loss_ib": 0.02253013104200363, + "step": 561 + }, + { + "ce_ib": 12.615915298461914, + "ce_orig": 1.327091932296753, + "epoch": 0.16133438780645626, + "kl_loss": 1.282986044883728, + "loss_ib": 0.02544577606022358, + "step": 561 + }, + { + "ce_ib": 10.288837432861328, + "ce_orig": 0.5523210763931274, + "epoch": 0.16133438780645626, + "kl_loss": 1.3225059509277344, + "loss_ib": 0.023513898253440857, + "step": 561 + }, + { + "ce_ib": 10.36892032623291, + "ce_orig": 0.6983376741409302, + "epoch": 0.16133438780645626, + "kl_loss": 1.2711155414581299, + "loss_ib": 0.02308007702231407, + "step": 561 + }, + { + "ce_ib": 14.066039085388184, + "ce_orig": 0.9940349459648132, + "epoch": 0.16162197138543388, + "kl_loss": 1.3311264514923096, + "loss_ib": 0.027377303689718246, + "step": 562 + }, + { + "ce_ib": 9.398420333862305, + "ce_orig": 1.150452733039856, + "epoch": 0.16162197138543388, + "kl_loss": 1.3074944019317627, + "loss_ib": 0.02247336320579052, + "step": 562 + }, + { + "ce_ib": 9.80187702178955, + "ce_orig": 0.8328919410705566, + "epoch": 0.16162197138543388, + "kl_loss": 1.331373929977417, + "loss_ib": 0.02311561442911625, + "step": 562 + }, + { + "ce_ib": 12.888148307800293, + "ce_orig": 1.2748291492462158, + "epoch": 0.16162197138543388, + "kl_loss": 1.2818001508712769, + "loss_ib": 0.025706149637699127, + "step": 562 + }, + { + "ce_ib": 8.798264503479004, + "ce_orig": 0.66322261095047, + "epoch": 0.16190955496441153, + "kl_loss": 1.3028491735458374, + "loss_ib": 0.02182675525546074, + "step": 563 + }, + { + "ce_ib": 13.072640419006348, + "ce_orig": 1.0565416812896729, + "epoch": 0.16190955496441153, + "kl_loss": 1.2858983278274536, + "loss_ib": 0.025931624695658684, + "step": 563 + }, + { + "ce_ib": 11.12070083618164, + "ce_orig": 0.8622493743896484, + "epoch": 0.16190955496441153, + "kl_loss": 1.2600901126861572, + "loss_ib": 0.0237216018140316, + "step": 563 + }, + { + "ce_ib": 11.012995719909668, + "ce_orig": 0.7809346914291382, + "epoch": 0.16190955496441153, + "kl_loss": 1.2810771465301514, + "loss_ib": 0.02382376603782177, + "step": 563 + }, + { + "ce_ib": 10.03192138671875, + "ce_orig": 0.5545583367347717, + "epoch": 0.16219713854338919, + "kl_loss": 1.372998833656311, + "loss_ib": 0.023761911317706108, + "step": 564 + }, + { + "ce_ib": 8.590304374694824, + "ce_orig": 0.7225477695465088, + "epoch": 0.16219713854338919, + "kl_loss": 1.3027560710906982, + "loss_ib": 0.021617865189909935, + "step": 564 + }, + { + "ce_ib": 10.352544784545898, + "ce_orig": 0.8774200081825256, + "epoch": 0.16219713854338919, + "kl_loss": 1.2814412117004395, + "loss_ib": 0.02316695638000965, + "step": 564 + }, + { + "ce_ib": 8.562765121459961, + "ce_orig": 0.6415224075317383, + "epoch": 0.16219713854338919, + "kl_loss": 1.2804956436157227, + "loss_ib": 0.021367721259593964, + "step": 564 + }, + { + "epoch": 0.1624847221223668, + "grad_norm": 0.08632536977529526, + "learning_rate": 9.98554555113021e-06, + "loss": 0.8462, + "step": 565 + }, + { + "ce_ib": 12.551013946533203, + "ce_orig": 0.995196521282196, + "epoch": 0.1624847221223668, + "kl_loss": 1.3475830554962158, + "loss_ib": 0.02602684497833252, + "step": 565 + }, + { + "ce_ib": 11.848214149475098, + "ce_orig": 1.4025741815567017, + "epoch": 0.1624847221223668, + "kl_loss": 1.2776107788085938, + "loss_ib": 0.024624323472380638, + "step": 565 + }, + { + "ce_ib": 8.570831298828125, + "ce_orig": 0.6328908205032349, + "epoch": 0.1624847221223668, + "kl_loss": 1.2646872997283936, + "loss_ib": 0.021217703819274902, + "step": 565 + }, + { + "ce_ib": 9.687134742736816, + "ce_orig": 0.7903947234153748, + "epoch": 0.1624847221223668, + "kl_loss": 1.3254048824310303, + "loss_ib": 0.022941183298826218, + "step": 565 + }, + { + "ce_ib": 13.013336181640625, + "ce_orig": 1.2271647453308105, + "epoch": 0.16277230570134446, + "kl_loss": 1.2702226638793945, + "loss_ib": 0.02571556344628334, + "step": 566 + }, + { + "ce_ib": 12.480305671691895, + "ce_orig": 1.1103395223617554, + "epoch": 0.16277230570134446, + "kl_loss": 1.2821930646896362, + "loss_ib": 0.025302235037088394, + "step": 566 + }, + { + "ce_ib": 9.443026542663574, + "ce_orig": 0.5126791596412659, + "epoch": 0.16277230570134446, + "kl_loss": 1.2745044231414795, + "loss_ib": 0.02218807116150856, + "step": 566 + }, + { + "ce_ib": 9.337321281433105, + "ce_orig": 0.7954445481300354, + "epoch": 0.16277230570134446, + "kl_loss": 1.2490813732147217, + "loss_ib": 0.02182813547551632, + "step": 566 + }, + { + "ce_ib": 10.778318405151367, + "ce_orig": 0.7537108659744263, + "epoch": 0.16305988928032208, + "kl_loss": 1.2937819957733154, + "loss_ib": 0.023716138675808907, + "step": 567 + }, + { + "ce_ib": 9.771125793457031, + "ce_orig": 0.40443235635757446, + "epoch": 0.16305988928032208, + "kl_loss": 1.3131227493286133, + "loss_ib": 0.0229023527354002, + "step": 567 + }, + { + "ce_ib": 9.85836124420166, + "ce_orig": 0.5563979744911194, + "epoch": 0.16305988928032208, + "kl_loss": 1.2832622528076172, + "loss_ib": 0.02269098162651062, + "step": 567 + }, + { + "ce_ib": 13.68719482421875, + "ce_orig": 0.9920021295547485, + "epoch": 0.16305988928032208, + "kl_loss": 1.2698428630828857, + "loss_ib": 0.02638562209904194, + "step": 567 + }, + { + "ce_ib": 9.411405563354492, + "ce_orig": 0.9342118501663208, + "epoch": 0.16334747285929974, + "kl_loss": 1.2332212924957275, + "loss_ib": 0.021743619814515114, + "step": 568 + }, + { + "ce_ib": 15.157500267028809, + "ce_orig": 1.5584248304367065, + "epoch": 0.16334747285929974, + "kl_loss": 1.2862458229064941, + "loss_ib": 0.02801995724439621, + "step": 568 + }, + { + "ce_ib": 10.411499977111816, + "ce_orig": 0.8281600475311279, + "epoch": 0.16334747285929974, + "kl_loss": 1.2496166229248047, + "loss_ib": 0.022907666862010956, + "step": 568 + }, + { + "ce_ib": 10.096942901611328, + "ce_orig": 0.6468956470489502, + "epoch": 0.16334747285929974, + "kl_loss": 1.2605764865875244, + "loss_ib": 0.02270270697772503, + "step": 568 + }, + { + "ce_ib": 9.528172492980957, + "ce_orig": 0.8783382773399353, + "epoch": 0.1636350564382774, + "kl_loss": 1.2387837171554565, + "loss_ib": 0.021916009485721588, + "step": 569 + }, + { + "ce_ib": 8.954733848571777, + "ce_orig": 0.8919200897216797, + "epoch": 0.1636350564382774, + "kl_loss": 1.2251062393188477, + "loss_ib": 0.02120579592883587, + "step": 569 + }, + { + "ce_ib": 8.508342742919922, + "ce_orig": 0.6990381479263306, + "epoch": 0.1636350564382774, + "kl_loss": 1.2213623523712158, + "loss_ib": 0.02072196640074253, + "step": 569 + }, + { + "ce_ib": 11.344082832336426, + "ce_orig": 0.9525802731513977, + "epoch": 0.1636350564382774, + "kl_loss": 1.248590111732483, + "loss_ib": 0.023829983547329903, + "step": 569 + }, + { + "epoch": 0.163922640017255, + "grad_norm": 0.09219575673341751, + "learning_rate": 9.984949852618381e-06, + "loss": 0.8852, + "step": 570 + }, + { + "ce_ib": 8.961785316467285, + "ce_orig": 0.7948698997497559, + "epoch": 0.163922640017255, + "kl_loss": 1.2222837209701538, + "loss_ib": 0.02118462324142456, + "step": 570 + }, + { + "ce_ib": 12.20908260345459, + "ce_orig": 0.9208235144615173, + "epoch": 0.163922640017255, + "kl_loss": 1.2455497980117798, + "loss_ib": 0.024664580821990967, + "step": 570 + }, + { + "ce_ib": 9.334521293640137, + "ce_orig": 0.9958298206329346, + "epoch": 0.163922640017255, + "kl_loss": 1.2763538360595703, + "loss_ib": 0.022098058834671974, + "step": 570 + }, + { + "ce_ib": 12.762809753417969, + "ce_orig": 1.1121208667755127, + "epoch": 0.163922640017255, + "kl_loss": 1.2300899028778076, + "loss_ib": 0.025063710287213326, + "step": 570 + }, + { + "ce_ib": 15.606366157531738, + "ce_orig": 1.6491622924804688, + "epoch": 0.16421022359623266, + "kl_loss": 1.2227786779403687, + "loss_ib": 0.027834152802824974, + "step": 571 + }, + { + "ce_ib": 10.236468315124512, + "ce_orig": 0.8589720726013184, + "epoch": 0.16421022359623266, + "kl_loss": 1.256370186805725, + "loss_ib": 0.02280016802251339, + "step": 571 + }, + { + "ce_ib": 9.943655967712402, + "ce_orig": 0.5925063490867615, + "epoch": 0.16421022359623266, + "kl_loss": 1.2559595108032227, + "loss_ib": 0.02250325120985508, + "step": 571 + }, + { + "ce_ib": 11.613914489746094, + "ce_orig": 0.7779126763343811, + "epoch": 0.16421022359623266, + "kl_loss": 1.392745018005371, + "loss_ib": 0.025541365146636963, + "step": 571 + }, + { + "ce_ib": 12.392518043518066, + "ce_orig": 0.4280785620212555, + "epoch": 0.1644978071752103, + "kl_loss": 1.3646981716156006, + "loss_ib": 0.026039499789476395, + "step": 572 + }, + { + "ce_ib": 9.918338775634766, + "ce_orig": 0.5322098731994629, + "epoch": 0.1644978071752103, + "kl_loss": 1.2963712215423584, + "loss_ib": 0.02288205176591873, + "step": 572 + }, + { + "ce_ib": 14.274211883544922, + "ce_orig": 1.3050851821899414, + "epoch": 0.1644978071752103, + "kl_loss": 1.2759932279586792, + "loss_ib": 0.02703414298593998, + "step": 572 + }, + { + "ce_ib": 11.698960304260254, + "ce_orig": 1.155306100845337, + "epoch": 0.1644978071752103, + "kl_loss": 1.2604784965515137, + "loss_ib": 0.024303745478391647, + "step": 572 + }, + { + "ce_ib": 10.412774085998535, + "ce_orig": 1.052548885345459, + "epoch": 0.16478539075418794, + "kl_loss": 1.2234094142913818, + "loss_ib": 0.022646868601441383, + "step": 573 + }, + { + "ce_ib": 10.732352256774902, + "ce_orig": 0.9245730042457581, + "epoch": 0.16478539075418794, + "kl_loss": 1.2317912578582764, + "loss_ib": 0.02305026538670063, + "step": 573 + }, + { + "ce_ib": 9.59011173248291, + "ce_orig": 0.7064767479896545, + "epoch": 0.16478539075418794, + "kl_loss": 1.224424123764038, + "loss_ib": 0.021834352985024452, + "step": 573 + }, + { + "ce_ib": 9.361687660217285, + "ce_orig": 0.8686097264289856, + "epoch": 0.16478539075418794, + "kl_loss": 1.1842972040176392, + "loss_ib": 0.021204659715294838, + "step": 573 + }, + { + "ce_ib": 7.736264228820801, + "ce_orig": 0.6079578995704651, + "epoch": 0.1650729743331656, + "kl_loss": 1.213087558746338, + "loss_ib": 0.01986713893711567, + "step": 574 + }, + { + "ce_ib": 11.044466972351074, + "ce_orig": 0.8884755969047546, + "epoch": 0.1650729743331656, + "kl_loss": 1.2339417934417725, + "loss_ib": 0.023383883759379387, + "step": 574 + }, + { + "ce_ib": 10.914957046508789, + "ce_orig": 0.9813688397407532, + "epoch": 0.1650729743331656, + "kl_loss": 1.2557547092437744, + "loss_ib": 0.023472504690289497, + "step": 574 + }, + { + "ce_ib": 13.932374000549316, + "ce_orig": 1.102638840675354, + "epoch": 0.1650729743331656, + "kl_loss": 1.2306591272354126, + "loss_ib": 0.02623896487057209, + "step": 574 + }, + { + "epoch": 0.1653605579121432, + "grad_norm": 0.09370694309473038, + "learning_rate": 9.984342144205327e-06, + "loss": 0.9041, + "step": 575 + }, + { + "ce_ib": 9.729941368103027, + "ce_orig": 0.8137699961662292, + "epoch": 0.1653605579121432, + "kl_loss": 1.218010663986206, + "loss_ib": 0.0219100471585989, + "step": 575 + }, + { + "ce_ib": 11.345166206359863, + "ce_orig": 0.8795411586761475, + "epoch": 0.1653605579121432, + "kl_loss": 1.238341212272644, + "loss_ib": 0.02372857742011547, + "step": 575 + }, + { + "ce_ib": 12.664711952209473, + "ce_orig": 1.4619799852371216, + "epoch": 0.1653605579121432, + "kl_loss": 1.2479004859924316, + "loss_ib": 0.025143718346953392, + "step": 575 + }, + { + "ce_ib": 7.3197340965271, + "ce_orig": 0.5423354506492615, + "epoch": 0.1653605579121432, + "kl_loss": 1.4118754863739014, + "loss_ib": 0.021438488736748695, + "step": 575 + }, + { + "ce_ib": 11.586548805236816, + "ce_orig": 0.8300837874412537, + "epoch": 0.16564814149112086, + "kl_loss": 1.2648472785949707, + "loss_ib": 0.024235021322965622, + "step": 576 + }, + { + "ce_ib": 9.670539855957031, + "ce_orig": 0.8994592428207397, + "epoch": 0.16564814149112086, + "kl_loss": 1.202571988105774, + "loss_ib": 0.021696260198950768, + "step": 576 + }, + { + "ce_ib": 5.678918838500977, + "ce_orig": 0.28384220600128174, + "epoch": 0.16564814149112086, + "kl_loss": 1.3513402938842773, + "loss_ib": 0.019192321226000786, + "step": 576 + }, + { + "ce_ib": 11.467476844787598, + "ce_orig": 0.5451651215553284, + "epoch": 0.16564814149112086, + "kl_loss": 1.2790919542312622, + "loss_ib": 0.02425839565694332, + "step": 576 + }, + { + "ce_ib": 8.113414764404297, + "ce_orig": 0.7992563843727112, + "epoch": 0.1659357250700985, + "kl_loss": 1.1959567070007324, + "loss_ib": 0.02007298171520233, + "step": 577 + }, + { + "ce_ib": 6.4770002365112305, + "ce_orig": 0.4411783218383789, + "epoch": 0.1659357250700985, + "kl_loss": 1.2061271667480469, + "loss_ib": 0.018538272008299828, + "step": 577 + }, + { + "ce_ib": 9.995020866394043, + "ce_orig": 0.5499460697174072, + "epoch": 0.1659357250700985, + "kl_loss": 1.2197155952453613, + "loss_ib": 0.022192176431417465, + "step": 577 + }, + { + "ce_ib": 5.73984956741333, + "ce_orig": 0.6255266666412354, + "epoch": 0.1659357250700985, + "kl_loss": 1.1914690732955933, + "loss_ib": 0.0176545400172472, + "step": 577 + }, + { + "ce_ib": 12.113245964050293, + "ce_orig": 1.025524377822876, + "epoch": 0.16622330864907614, + "kl_loss": 1.2450977563858032, + "loss_ib": 0.024564223363995552, + "step": 578 + }, + { + "ce_ib": 5.700209617614746, + "ce_orig": 0.28980499505996704, + "epoch": 0.16622330864907614, + "kl_loss": 1.2563034296035767, + "loss_ib": 0.01826324500143528, + "step": 578 + }, + { + "ce_ib": 12.162859916687012, + "ce_orig": 0.8536310195922852, + "epoch": 0.16622330864907614, + "kl_loss": 1.220404028892517, + "loss_ib": 0.024366900324821472, + "step": 578 + }, + { + "ce_ib": 12.775392532348633, + "ce_orig": 1.3573917150497437, + "epoch": 0.16622330864907614, + "kl_loss": 1.1893036365509033, + "loss_ib": 0.024668429046869278, + "step": 578 + }, + { + "ce_ib": 13.641351699829102, + "ce_orig": 1.5364866256713867, + "epoch": 0.1665108922280538, + "kl_loss": 1.261054277420044, + "loss_ib": 0.026251891627907753, + "step": 579 + }, + { + "ce_ib": 11.468977928161621, + "ce_orig": 1.2083629369735718, + "epoch": 0.1665108922280538, + "kl_loss": 1.2402012348175049, + "loss_ib": 0.023870989680290222, + "step": 579 + }, + { + "ce_ib": 14.304160118103027, + "ce_orig": 1.1799192428588867, + "epoch": 0.1665108922280538, + "kl_loss": 1.2703068256378174, + "loss_ib": 0.027007225900888443, + "step": 579 + }, + { + "ce_ib": 13.134546279907227, + "ce_orig": 1.2673437595367432, + "epoch": 0.1665108922280538, + "kl_loss": 1.1939582824707031, + "loss_ib": 0.025074128061532974, + "step": 579 + }, + { + "epoch": 0.16679847580703142, + "grad_norm": 0.08239021897315979, + "learning_rate": 9.983722427355157e-06, + "loss": 0.9056, + "step": 580 + }, + { + "ce_ib": 13.295975685119629, + "ce_orig": 1.0868593454360962, + "epoch": 0.16679847580703142, + "kl_loss": 1.2228111028671265, + "loss_ib": 0.025524087250232697, + "step": 580 + }, + { + "ce_ib": 6.748361110687256, + "ce_orig": 0.49680906534194946, + "epoch": 0.16679847580703142, + "kl_loss": 1.2128095626831055, + "loss_ib": 0.0188764575868845, + "step": 580 + }, + { + "ce_ib": 13.180870056152344, + "ce_orig": 0.8746943473815918, + "epoch": 0.16679847580703142, + "kl_loss": 1.2134785652160645, + "loss_ib": 0.025315655395388603, + "step": 580 + }, + { + "ce_ib": 12.037747383117676, + "ce_orig": 0.9534928202629089, + "epoch": 0.16679847580703142, + "kl_loss": 1.2241135835647583, + "loss_ib": 0.024278882890939713, + "step": 580 + }, + { + "ce_ib": 8.912028312683105, + "ce_orig": 0.7467697858810425, + "epoch": 0.16708605938600907, + "kl_loss": 1.1925266981124878, + "loss_ib": 0.020837293937802315, + "step": 581 + }, + { + "ce_ib": 9.094566345214844, + "ce_orig": 0.6505690813064575, + "epoch": 0.16708605938600907, + "kl_loss": 1.1850394010543823, + "loss_ib": 0.020944960415363312, + "step": 581 + }, + { + "ce_ib": 12.56716251373291, + "ce_orig": 1.2765976190567017, + "epoch": 0.16708605938600907, + "kl_loss": 1.2733515501022339, + "loss_ib": 0.025300677865743637, + "step": 581 + }, + { + "ce_ib": 13.4862699508667, + "ce_orig": 1.0705842971801758, + "epoch": 0.16708605938600907, + "kl_loss": 1.2059423923492432, + "loss_ib": 0.025545692071318626, + "step": 581 + }, + { + "ce_ib": 8.2174711227417, + "ce_orig": 0.7286640405654907, + "epoch": 0.1673736429649867, + "kl_loss": 1.2195181846618652, + "loss_ib": 0.02041265182197094, + "step": 582 + }, + { + "ce_ib": 13.159209251403809, + "ce_orig": 1.2096375226974487, + "epoch": 0.1673736429649867, + "kl_loss": 1.2165024280548096, + "loss_ib": 0.02532423473894596, + "step": 582 + }, + { + "ce_ib": 11.165864944458008, + "ce_orig": 0.7757768630981445, + "epoch": 0.1673736429649867, + "kl_loss": 1.1681039333343506, + "loss_ib": 0.022846903651952744, + "step": 582 + }, + { + "ce_ib": 8.71281623840332, + "ce_orig": 0.7169674634933472, + "epoch": 0.1673736429649867, + "kl_loss": 1.191447377204895, + "loss_ib": 0.02062728814780712, + "step": 582 + }, + { + "ce_ib": 9.235655784606934, + "ce_orig": 0.6690336465835571, + "epoch": 0.16766122654396434, + "kl_loss": 1.2143634557724, + "loss_ib": 0.021379288285970688, + "step": 583 + }, + { + "ce_ib": 8.412071228027344, + "ce_orig": 0.843682587146759, + "epoch": 0.16766122654396434, + "kl_loss": 1.2117249965667725, + "loss_ib": 0.02052932232618332, + "step": 583 + }, + { + "ce_ib": 10.572442054748535, + "ce_orig": 0.7864252328872681, + "epoch": 0.16766122654396434, + "kl_loss": 1.2299147844314575, + "loss_ib": 0.0228715892881155, + "step": 583 + }, + { + "ce_ib": 8.599762916564941, + "ce_orig": 0.7645632028579712, + "epoch": 0.16766122654396434, + "kl_loss": 1.2004691362380981, + "loss_ib": 0.02060445211827755, + "step": 583 + }, + { + "ce_ib": 11.767807960510254, + "ce_orig": 1.0832188129425049, + "epoch": 0.167948810122942, + "kl_loss": 1.1508519649505615, + "loss_ib": 0.023276329040527344, + "step": 584 + }, + { + "ce_ib": 9.03587818145752, + "ce_orig": 0.7372077107429504, + "epoch": 0.167948810122942, + "kl_loss": 1.1607894897460938, + "loss_ib": 0.020643772557377815, + "step": 584 + }, + { + "ce_ib": 11.698123931884766, + "ce_orig": 0.8797475695610046, + "epoch": 0.167948810122942, + "kl_loss": 1.2153360843658447, + "loss_ib": 0.023851484060287476, + "step": 584 + }, + { + "ce_ib": 14.928531646728516, + "ce_orig": 1.3900351524353027, + "epoch": 0.167948810122942, + "kl_loss": 1.276613473892212, + "loss_ib": 0.027694664895534515, + "step": 584 + }, + { + "epoch": 0.16823639370191962, + "grad_norm": 0.11208527535200119, + "learning_rate": 9.983090703560911e-06, + "loss": 0.8947, + "step": 585 + }, + { + "ce_ib": 11.16375732421875, + "ce_orig": 0.9449269771575928, + "epoch": 0.16823639370191962, + "kl_loss": 1.197243094444275, + "loss_ib": 0.023136189207434654, + "step": 585 + }, + { + "ce_ib": 9.852100372314453, + "ce_orig": 1.078002691268921, + "epoch": 0.16823639370191962, + "kl_loss": 1.1607494354248047, + "loss_ib": 0.021459592506289482, + "step": 585 + }, + { + "ce_ib": 11.235018730163574, + "ce_orig": 1.1319299936294556, + "epoch": 0.16823639370191962, + "kl_loss": 1.2304571866989136, + "loss_ib": 0.0235395897179842, + "step": 585 + }, + { + "ce_ib": 7.328647613525391, + "ce_orig": 0.9203292727470398, + "epoch": 0.16823639370191962, + "kl_loss": 1.1338272094726562, + "loss_ib": 0.018666919320821762, + "step": 585 + }, + { + "ce_ib": 7.884848117828369, + "ce_orig": 0.5528172850608826, + "epoch": 0.16852397728089727, + "kl_loss": 1.2366106510162354, + "loss_ib": 0.02025095373392105, + "step": 586 + }, + { + "ce_ib": 13.856200218200684, + "ce_orig": 0.9960865378379822, + "epoch": 0.16852397728089727, + "kl_loss": 1.2090309858322144, + "loss_ib": 0.02594650909304619, + "step": 586 + }, + { + "ce_ib": 9.895005226135254, + "ce_orig": 0.5365419387817383, + "epoch": 0.16852397728089727, + "kl_loss": 1.1710314750671387, + "loss_ib": 0.021605320274829865, + "step": 586 + }, + { + "ce_ib": 11.017683029174805, + "ce_orig": 0.8266361951828003, + "epoch": 0.16852397728089727, + "kl_loss": 1.2141588926315308, + "loss_ib": 0.023159272968769073, + "step": 586 + }, + { + "ce_ib": 11.70738410949707, + "ce_orig": 0.9832698702812195, + "epoch": 0.1688115608598749, + "kl_loss": 1.1586453914642334, + "loss_ib": 0.023293837904930115, + "step": 587 + }, + { + "ce_ib": 8.602079391479492, + "ce_orig": 0.4699603319168091, + "epoch": 0.1688115608598749, + "kl_loss": 1.2227380275726318, + "loss_ib": 0.02082945965230465, + "step": 587 + }, + { + "ce_ib": 14.872537612915039, + "ce_orig": 1.5698349475860596, + "epoch": 0.1688115608598749, + "kl_loss": 1.2066720724105835, + "loss_ib": 0.026939257979393005, + "step": 587 + }, + { + "ce_ib": 9.357597351074219, + "ce_orig": 0.9672819972038269, + "epoch": 0.1688115608598749, + "kl_loss": 1.1683168411254883, + "loss_ib": 0.021040765568614006, + "step": 587 + }, + { + "ce_ib": 12.048833847045898, + "ce_orig": 1.0971179008483887, + "epoch": 0.16909914443885254, + "kl_loss": 1.2246365547180176, + "loss_ib": 0.024295201525092125, + "step": 588 + }, + { + "ce_ib": 7.782527446746826, + "ce_orig": 0.43647029995918274, + "epoch": 0.16909914443885254, + "kl_loss": 1.1583232879638672, + "loss_ib": 0.019365761429071426, + "step": 588 + }, + { + "ce_ib": 7.263584136962891, + "ce_orig": 0.7806444764137268, + "epoch": 0.16909914443885254, + "kl_loss": 1.1670253276824951, + "loss_ib": 0.018933836370706558, + "step": 588 + }, + { + "ce_ib": 5.9214959144592285, + "ce_orig": 0.5120716094970703, + "epoch": 0.16909914443885254, + "kl_loss": 1.1531264781951904, + "loss_ib": 0.01745275966823101, + "step": 588 + }, + { + "ce_ib": 11.046984672546387, + "ce_orig": 1.098747730255127, + "epoch": 0.16938672801783017, + "kl_loss": 1.141385793685913, + "loss_ib": 0.02246084250509739, + "step": 589 + }, + { + "ce_ib": 12.190630912780762, + "ce_orig": 0.7543506622314453, + "epoch": 0.16938672801783017, + "kl_loss": 1.1634085178375244, + "loss_ib": 0.023824715986847878, + "step": 589 + }, + { + "ce_ib": 11.490245819091797, + "ce_orig": 1.0767667293548584, + "epoch": 0.16938672801783017, + "kl_loss": 1.2043827772140503, + "loss_ib": 0.023534072563052177, + "step": 589 + }, + { + "ce_ib": 10.710217475891113, + "ce_orig": 0.803483247756958, + "epoch": 0.16938672801783017, + "kl_loss": 1.1858105659484863, + "loss_ib": 0.022568322718143463, + "step": 589 + }, + { + "epoch": 0.16967431159680782, + "grad_norm": 0.09646889567375183, + "learning_rate": 9.982446974344561e-06, + "loss": 0.893, + "step": 590 + }, + { + "ce_ib": 7.601869583129883, + "ce_orig": 0.4696982204914093, + "epoch": 0.16967431159680782, + "kl_loss": 1.1903643608093262, + "loss_ib": 0.019505511969327927, + "step": 590 + }, + { + "ce_ib": 9.697887420654297, + "ce_orig": 0.4679652452468872, + "epoch": 0.16967431159680782, + "kl_loss": 1.1964094638824463, + "loss_ib": 0.02166198194026947, + "step": 590 + }, + { + "ce_ib": 5.316531658172607, + "ce_orig": 0.6131843328475952, + "epoch": 0.16967431159680782, + "kl_loss": 1.1232414245605469, + "loss_ib": 0.016548944637179375, + "step": 590 + }, + { + "ce_ib": 11.569339752197266, + "ce_orig": 0.532830536365509, + "epoch": 0.16967431159680782, + "kl_loss": 1.2179789543151855, + "loss_ib": 0.023749129846692085, + "step": 590 + }, + { + "ce_ib": 14.859304428100586, + "ce_orig": 1.699377417564392, + "epoch": 0.16996189517578547, + "kl_loss": 1.1641483306884766, + "loss_ib": 0.02650078758597374, + "step": 591 + }, + { + "ce_ib": 12.174185752868652, + "ce_orig": 1.0933891534805298, + "epoch": 0.16996189517578547, + "kl_loss": 1.205374002456665, + "loss_ib": 0.024227924644947052, + "step": 591 + }, + { + "ce_ib": 12.189451217651367, + "ce_orig": 1.1713759899139404, + "epoch": 0.16996189517578547, + "kl_loss": 1.1780178546905518, + "loss_ib": 0.023969629779458046, + "step": 591 + }, + { + "ce_ib": 6.584260940551758, + "ce_orig": 0.531925618648529, + "epoch": 0.16996189517578547, + "kl_loss": 1.1629016399383545, + "loss_ib": 0.01821327582001686, + "step": 591 + }, + { + "ce_ib": 10.2810697555542, + "ce_orig": 1.2692686319351196, + "epoch": 0.1702494787547631, + "kl_loss": 1.1622142791748047, + "loss_ib": 0.02190321311354637, + "step": 592 + }, + { + "ce_ib": 8.695658683776855, + "ce_orig": 1.1439961194992065, + "epoch": 0.1702494787547631, + "kl_loss": 1.2565195560455322, + "loss_ib": 0.021260853856801987, + "step": 592 + }, + { + "ce_ib": 12.925812721252441, + "ce_orig": 1.113416314125061, + "epoch": 0.1702494787547631, + "kl_loss": 1.1489940881729126, + "loss_ib": 0.02441575564444065, + "step": 592 + }, + { + "ce_ib": 8.097929954528809, + "ce_orig": 0.3692007064819336, + "epoch": 0.1702494787547631, + "kl_loss": 1.2639890909194946, + "loss_ib": 0.020737819373607635, + "step": 592 + }, + { + "ce_ib": 12.470227241516113, + "ce_orig": 1.292494535446167, + "epoch": 0.17053706233374075, + "kl_loss": 1.2025644779205322, + "loss_ib": 0.024495873600244522, + "step": 593 + }, + { + "ce_ib": 8.987156867980957, + "ce_orig": 0.7146295309066772, + "epoch": 0.17053706233374075, + "kl_loss": 1.148221492767334, + "loss_ib": 0.02046937122941017, + "step": 593 + }, + { + "ce_ib": 8.649067878723145, + "ce_orig": 0.6290068626403809, + "epoch": 0.17053706233374075, + "kl_loss": 1.1626759767532349, + "loss_ib": 0.020275825634598732, + "step": 593 + }, + { + "ce_ib": 9.381182670593262, + "ce_orig": 0.8150485157966614, + "epoch": 0.17053706233374075, + "kl_loss": 1.1636794805526733, + "loss_ib": 0.021017977967858315, + "step": 593 + }, + { + "ce_ib": 13.075675010681152, + "ce_orig": 1.3097158670425415, + "epoch": 0.17082464591271837, + "kl_loss": 1.1714305877685547, + "loss_ib": 0.024789981544017792, + "step": 594 + }, + { + "ce_ib": 16.90681266784668, + "ce_orig": 1.63013756275177, + "epoch": 0.17082464591271837, + "kl_loss": 1.1639704704284668, + "loss_ib": 0.028546517714858055, + "step": 594 + }, + { + "ce_ib": 9.945943832397461, + "ce_orig": 0.620954692363739, + "epoch": 0.17082464591271837, + "kl_loss": 1.1276791095733643, + "loss_ib": 0.021222734823822975, + "step": 594 + }, + { + "ce_ib": 7.538568496704102, + "ce_orig": 0.7477220296859741, + "epoch": 0.17082464591271837, + "kl_loss": 1.1129953861236572, + "loss_ib": 0.01866852305829525, + "step": 594 + }, + { + "epoch": 0.17111222949169602, + "grad_norm": 0.0950080156326294, + "learning_rate": 9.981791241257001e-06, + "loss": 0.8499, + "step": 595 + }, + { + "ce_ib": 9.64914321899414, + "ce_orig": 0.8221632242202759, + "epoch": 0.17111222949169602, + "kl_loss": 1.2603493928909302, + "loss_ib": 0.022252636030316353, + "step": 595 + }, + { + "ce_ib": 10.428970336914062, + "ce_orig": 0.9476694464683533, + "epoch": 0.17111222949169602, + "kl_loss": 1.2723057270050049, + "loss_ib": 0.023152027279138565, + "step": 595 + }, + { + "ce_ib": 11.565075874328613, + "ce_orig": 0.7016614675521851, + "epoch": 0.17111222949169602, + "kl_loss": 1.214961290359497, + "loss_ib": 0.023714689537882805, + "step": 595 + }, + { + "ce_ib": 7.760899066925049, + "ce_orig": 0.655583918094635, + "epoch": 0.17111222949169602, + "kl_loss": 1.1367559432983398, + "loss_ib": 0.019128458574414253, + "step": 595 + }, + { + "ce_ib": 13.004273414611816, + "ce_orig": 1.171642541885376, + "epoch": 0.17139981307067367, + "kl_loss": 1.1165239810943604, + "loss_ib": 0.02416951209306717, + "step": 596 + }, + { + "ce_ib": 8.871601104736328, + "ce_orig": 0.6587203145027161, + "epoch": 0.17139981307067367, + "kl_loss": 1.1157963275909424, + "loss_ib": 0.020029563456773758, + "step": 596 + }, + { + "ce_ib": 8.543595314025879, + "ce_orig": 0.680071234703064, + "epoch": 0.17139981307067367, + "kl_loss": 1.2704293727874756, + "loss_ib": 0.02124788984656334, + "step": 596 + }, + { + "ce_ib": 12.145650863647461, + "ce_orig": 0.9769390821456909, + "epoch": 0.17139981307067367, + "kl_loss": 1.1289055347442627, + "loss_ib": 0.023434706032276154, + "step": 596 + }, + { + "ce_ib": 11.884757041931152, + "ce_orig": 0.7717639803886414, + "epoch": 0.1716873966496513, + "kl_loss": 1.1151626110076904, + "loss_ib": 0.02303638495504856, + "step": 597 + }, + { + "ce_ib": 13.524641036987305, + "ce_orig": 1.0390403270721436, + "epoch": 0.1716873966496513, + "kl_loss": 1.1430152654647827, + "loss_ib": 0.024954792112112045, + "step": 597 + }, + { + "ce_ib": 10.59398365020752, + "ce_orig": 1.0760669708251953, + "epoch": 0.1716873966496513, + "kl_loss": 1.267032265663147, + "loss_ib": 0.023264307528734207, + "step": 597 + }, + { + "ce_ib": 9.16975212097168, + "ce_orig": 0.7946443557739258, + "epoch": 0.1716873966496513, + "kl_loss": 1.332892894744873, + "loss_ib": 0.022498680278658867, + "step": 597 + }, + { + "ce_ib": 15.392538070678711, + "ce_orig": 1.7411152124404907, + "epoch": 0.17197498022862895, + "kl_loss": 1.1676359176635742, + "loss_ib": 0.02706889621913433, + "step": 598 + }, + { + "ce_ib": 10.269882202148438, + "ce_orig": 0.9134522676467896, + "epoch": 0.17197498022862895, + "kl_loss": 1.14839768409729, + "loss_ib": 0.021753858774900436, + "step": 598 + }, + { + "ce_ib": 8.251395225524902, + "ce_orig": 0.6777662634849548, + "epoch": 0.17197498022862895, + "kl_loss": 1.0949749946594238, + "loss_ib": 0.019201144576072693, + "step": 598 + }, + { + "ce_ib": 7.8162455558776855, + "ce_orig": 0.5873789191246033, + "epoch": 0.17197498022862895, + "kl_loss": 1.1189738512039185, + "loss_ib": 0.01900598406791687, + "step": 598 + }, + { + "ce_ib": 8.662796974182129, + "ce_orig": 0.919073760509491, + "epoch": 0.17226256380760657, + "kl_loss": 1.1068514585494995, + "loss_ib": 0.01973131112754345, + "step": 599 + }, + { + "ce_ib": 11.996581077575684, + "ce_orig": 1.1948639154434204, + "epoch": 0.17226256380760657, + "kl_loss": 1.1755080223083496, + "loss_ib": 0.02375166118144989, + "step": 599 + }, + { + "ce_ib": 12.551753044128418, + "ce_orig": 1.4086652994155884, + "epoch": 0.17226256380760657, + "kl_loss": 1.1488478183746338, + "loss_ib": 0.024040231481194496, + "step": 599 + }, + { + "ce_ib": 12.489664077758789, + "ce_orig": 1.4374010562896729, + "epoch": 0.17226256380760657, + "kl_loss": 1.163723111152649, + "loss_ib": 0.024126896634697914, + "step": 599 + }, + { + "epoch": 0.17255014738658422, + "grad_norm": 0.08578155934810638, + "learning_rate": 9.98112350587804e-06, + "loss": 0.8373, + "step": 600 + }, + { + "ce_ib": 14.228236198425293, + "ce_orig": 1.494513750076294, + "epoch": 0.17255014738658422, + "kl_loss": 1.1346684694290161, + "loss_ib": 0.025574922561645508, + "step": 600 + }, + { + "ce_ib": 10.47590160369873, + "ce_orig": 0.6758707165718079, + "epoch": 0.17255014738658422, + "kl_loss": 1.149749755859375, + "loss_ib": 0.021973399445414543, + "step": 600 + }, + { + "ce_ib": 9.56252670288086, + "ce_orig": 0.6861965656280518, + "epoch": 0.17255014738658422, + "kl_loss": 1.146235704421997, + "loss_ib": 0.021024884656071663, + "step": 600 + }, + { + "ce_ib": 8.769157409667969, + "ce_orig": 0.6426496505737305, + "epoch": 0.17255014738658422, + "kl_loss": 1.1051974296569824, + "loss_ib": 0.019821131601929665, + "step": 600 + }, + { + "ce_ib": 6.642595291137695, + "ce_orig": 0.5588423013687134, + "epoch": 0.17283773096556188, + "kl_loss": 1.1010701656341553, + "loss_ib": 0.017653297632932663, + "step": 601 + }, + { + "ce_ib": 10.748079299926758, + "ce_orig": 0.837460458278656, + "epoch": 0.17283773096556188, + "kl_loss": 1.1604368686676025, + "loss_ib": 0.022352447733283043, + "step": 601 + }, + { + "ce_ib": 8.990370750427246, + "ce_orig": 0.8383088707923889, + "epoch": 0.17283773096556188, + "kl_loss": 1.1006824970245361, + "loss_ib": 0.01999719627201557, + "step": 601 + }, + { + "ce_ib": 9.197964668273926, + "ce_orig": 0.686692476272583, + "epoch": 0.17283773096556188, + "kl_loss": 1.1443568468093872, + "loss_ib": 0.02064153179526329, + "step": 601 + }, + { + "ce_ib": 8.66501522064209, + "ce_orig": 0.525370180606842, + "epoch": 0.1731253145445395, + "kl_loss": 1.1227505207061768, + "loss_ib": 0.01989252120256424, + "step": 602 + }, + { + "ce_ib": 11.858187675476074, + "ce_orig": 0.8876397013664246, + "epoch": 0.1731253145445395, + "kl_loss": 1.1747143268585205, + "loss_ib": 0.023605331778526306, + "step": 602 + }, + { + "ce_ib": 6.1840105056762695, + "ce_orig": 0.6205452680587769, + "epoch": 0.1731253145445395, + "kl_loss": 1.0966415405273438, + "loss_ib": 0.017150426283478737, + "step": 602 + }, + { + "ce_ib": 11.21971607208252, + "ce_orig": 0.9764306545257568, + "epoch": 0.1731253145445395, + "kl_loss": 1.116091012954712, + "loss_ib": 0.02238062582910061, + "step": 602 + }, + { + "ce_ib": 6.916097164154053, + "ce_orig": 0.5436660051345825, + "epoch": 0.17341289812351715, + "kl_loss": 1.1902745962142944, + "loss_ib": 0.018818842247128487, + "step": 603 + }, + { + "ce_ib": 8.293400764465332, + "ce_orig": 0.8351038694381714, + "epoch": 0.17341289812351715, + "kl_loss": 1.1848680973052979, + "loss_ib": 0.0201420821249485, + "step": 603 + }, + { + "ce_ib": 6.613530158996582, + "ce_orig": 0.18875178694725037, + "epoch": 0.17341289812351715, + "kl_loss": 1.1903159618377686, + "loss_ib": 0.01851668953895569, + "step": 603 + }, + { + "ce_ib": 11.624032020568848, + "ce_orig": 1.285302758216858, + "epoch": 0.17341289812351715, + "kl_loss": 1.1025352478027344, + "loss_ib": 0.022649383172392845, + "step": 603 + }, + { + "ce_ib": 15.579438209533691, + "ce_orig": 1.5998573303222656, + "epoch": 0.17370048170249477, + "kl_loss": 1.2801098823547363, + "loss_ib": 0.028380535542964935, + "step": 604 + }, + { + "ce_ib": 12.44752025604248, + "ce_orig": 1.4038703441619873, + "epoch": 0.17370048170249477, + "kl_loss": 1.1902649402618408, + "loss_ib": 0.02435017004609108, + "step": 604 + }, + { + "ce_ib": 10.84005355834961, + "ce_orig": 0.6248370409011841, + "epoch": 0.17370048170249477, + "kl_loss": 1.1507587432861328, + "loss_ib": 0.022347640246152878, + "step": 604 + }, + { + "ce_ib": 8.29090404510498, + "ce_orig": 0.7321332097053528, + "epoch": 0.17370048170249477, + "kl_loss": 1.1337003707885742, + "loss_ib": 0.019627906382083893, + "step": 604 + }, + { + "epoch": 0.17398806528147243, + "grad_norm": 0.08757986128330231, + "learning_rate": 9.980443769816412e-06, + "loss": 0.8879, + "step": 605 + }, + { + "ce_ib": 16.679454803466797, + "ce_orig": 1.7097995281219482, + "epoch": 0.17398806528147243, + "kl_loss": 1.1676082611083984, + "loss_ib": 0.028355535119771957, + "step": 605 + }, + { + "ce_ib": 8.775374412536621, + "ce_orig": 1.0261955261230469, + "epoch": 0.17398806528147243, + "kl_loss": 1.0867377519607544, + "loss_ib": 0.019642751663923264, + "step": 605 + }, + { + "ce_ib": 9.250091552734375, + "ce_orig": 0.7789836525917053, + "epoch": 0.17398806528147243, + "kl_loss": 1.149935007095337, + "loss_ib": 0.020749442279338837, + "step": 605 + }, + { + "ce_ib": 9.42496395111084, + "ce_orig": 0.8384370803833008, + "epoch": 0.17398806528147243, + "kl_loss": 1.1811635494232178, + "loss_ib": 0.02123660035431385, + "step": 605 + }, + { + "ce_ib": 15.161707878112793, + "ce_orig": 2.020648717880249, + "epoch": 0.17427564886045008, + "kl_loss": 1.1757433414459229, + "loss_ib": 0.026919139549136162, + "step": 606 + }, + { + "ce_ib": 4.969954013824463, + "ce_orig": 0.5350307822227478, + "epoch": 0.17427564886045008, + "kl_loss": 1.0870544910430908, + "loss_ib": 0.015840498730540276, + "step": 606 + }, + { + "ce_ib": 10.833635330200195, + "ce_orig": 1.334272861480713, + "epoch": 0.17427564886045008, + "kl_loss": 1.116763949394226, + "loss_ib": 0.022001275792717934, + "step": 606 + }, + { + "ce_ib": 13.298807144165039, + "ce_orig": 1.2746291160583496, + "epoch": 0.17427564886045008, + "kl_loss": 1.1182126998901367, + "loss_ib": 0.02448093332350254, + "step": 606 + }, + { + "ce_ib": 8.028116226196289, + "ce_orig": 0.2780713737010956, + "epoch": 0.1745632324394277, + "kl_loss": 1.3144466876983643, + "loss_ib": 0.02117258310317993, + "step": 607 + }, + { + "ce_ib": 9.471242904663086, + "ce_orig": 0.8981790542602539, + "epoch": 0.1745632324394277, + "kl_loss": 1.1512048244476318, + "loss_ib": 0.020983289927244186, + "step": 607 + }, + { + "ce_ib": 11.815406799316406, + "ce_orig": 0.8096030950546265, + "epoch": 0.1745632324394277, + "kl_loss": 1.0907784700393677, + "loss_ib": 0.02272319234907627, + "step": 607 + }, + { + "ce_ib": 8.835672378540039, + "ce_orig": 0.8250407576560974, + "epoch": 0.1745632324394277, + "kl_loss": 1.0707385540008545, + "loss_ib": 0.019543059170246124, + "step": 607 + }, + { + "ce_ib": 14.99797248840332, + "ce_orig": 1.081514835357666, + "epoch": 0.17485081601840535, + "kl_loss": 1.3153797388076782, + "loss_ib": 0.028151769191026688, + "step": 608 + }, + { + "ce_ib": 13.794142723083496, + "ce_orig": 1.310433268547058, + "epoch": 0.17485081601840535, + "kl_loss": 1.173295497894287, + "loss_ib": 0.025527097284793854, + "step": 608 + }, + { + "ce_ib": 6.75504732131958, + "ce_orig": 0.7089630365371704, + "epoch": 0.17485081601840535, + "kl_loss": 1.1751198768615723, + "loss_ib": 0.01850624568760395, + "step": 608 + }, + { + "ce_ib": 10.636677742004395, + "ce_orig": 0.9004390239715576, + "epoch": 0.17485081601840535, + "kl_loss": 1.1462738513946533, + "loss_ib": 0.022099414840340614, + "step": 608 + }, + { + "ce_ib": 16.850482940673828, + "ce_orig": 1.1504744291305542, + "epoch": 0.17513839959738298, + "kl_loss": 1.2235054969787598, + "loss_ib": 0.029085537418723106, + "step": 609 + }, + { + "ce_ib": 13.915315628051758, + "ce_orig": 0.9233617782592773, + "epoch": 0.17513839959738298, + "kl_loss": 1.2110941410064697, + "loss_ib": 0.026026258245110512, + "step": 609 + }, + { + "ce_ib": 11.495623588562012, + "ce_orig": 0.9102531671524048, + "epoch": 0.17513839959738298, + "kl_loss": 1.1143162250518799, + "loss_ib": 0.02263878472149372, + "step": 609 + }, + { + "ce_ib": 11.01639175415039, + "ce_orig": 1.2989511489868164, + "epoch": 0.17513839959738298, + "kl_loss": 1.1308627128601074, + "loss_ib": 0.022325018420815468, + "step": 609 + }, + { + "epoch": 0.17542598317636063, + "grad_norm": 0.09487691521644592, + "learning_rate": 9.979752034709756e-06, + "loss": 0.943, + "step": 610 + }, + { + "ce_ib": 9.711048126220703, + "ce_orig": 0.9693747162818909, + "epoch": 0.17542598317636063, + "kl_loss": 1.074932336807251, + "loss_ib": 0.020460370928049088, + "step": 610 + }, + { + "ce_ib": 14.960775375366211, + "ce_orig": 1.6066879034042358, + "epoch": 0.17542598317636063, + "kl_loss": 1.1578710079193115, + "loss_ib": 0.026539484038949013, + "step": 610 + }, + { + "ce_ib": 11.009191513061523, + "ce_orig": 0.753725528717041, + "epoch": 0.17542598317636063, + "kl_loss": 1.1147217750549316, + "loss_ib": 0.022156409919261932, + "step": 610 + }, + { + "ce_ib": 7.617627143859863, + "ce_orig": 0.5374597311019897, + "epoch": 0.17542598317636063, + "kl_loss": 1.0917261838912964, + "loss_ib": 0.018534889444708824, + "step": 610 + }, + { + "ce_ib": 9.379159927368164, + "ce_orig": 0.839108943939209, + "epoch": 0.17571356675533828, + "kl_loss": 1.1200788021087646, + "loss_ib": 0.020579947158694267, + "step": 611 + }, + { + "ce_ib": 9.289336204528809, + "ce_orig": 0.5786874890327454, + "epoch": 0.17571356675533828, + "kl_loss": 1.251037836074829, + "loss_ib": 0.021799713373184204, + "step": 611 + }, + { + "ce_ib": 6.256180286407471, + "ce_orig": 0.496167927980423, + "epoch": 0.17571356675533828, + "kl_loss": 1.1585159301757812, + "loss_ib": 0.017841339111328125, + "step": 611 + }, + { + "ce_ib": 15.044721603393555, + "ce_orig": 1.532287836074829, + "epoch": 0.17571356675533828, + "kl_loss": 1.1178569793701172, + "loss_ib": 0.02622329257428646, + "step": 611 + }, + { + "ce_ib": 8.996905326843262, + "ce_orig": 0.9385986328125, + "epoch": 0.1760011503343159, + "kl_loss": 1.0947903394699097, + "loss_ib": 0.0199448075145483, + "step": 612 + }, + { + "ce_ib": 13.072503089904785, + "ce_orig": 0.988423764705658, + "epoch": 0.1760011503343159, + "kl_loss": 1.1813626289367676, + "loss_ib": 0.024886131286621094, + "step": 612 + }, + { + "ce_ib": 8.529322624206543, + "ce_orig": 0.733309805393219, + "epoch": 0.1760011503343159, + "kl_loss": 1.091768741607666, + "loss_ib": 0.01944701001048088, + "step": 612 + }, + { + "ce_ib": 12.893845558166504, + "ce_orig": 1.074021339416504, + "epoch": 0.1760011503343159, + "kl_loss": 1.106937050819397, + "loss_ib": 0.023963216692209244, + "step": 612 + }, + { + "ce_ib": 12.352333068847656, + "ce_orig": 0.9047135710716248, + "epoch": 0.17628873391329355, + "kl_loss": 1.1527860164642334, + "loss_ib": 0.023880193009972572, + "step": 613 + }, + { + "ce_ib": 11.772270202636719, + "ce_orig": 0.8379567265510559, + "epoch": 0.17628873391329355, + "kl_loss": 1.1471657752990723, + "loss_ib": 0.02324392832815647, + "step": 613 + }, + { + "ce_ib": 8.202688217163086, + "ce_orig": 0.5956844091415405, + "epoch": 0.17628873391329355, + "kl_loss": 1.133353352546692, + "loss_ib": 0.019536221399903297, + "step": 613 + }, + { + "ce_ib": 13.991854667663574, + "ce_orig": 1.4801995754241943, + "epoch": 0.17628873391329355, + "kl_loss": 1.1663241386413574, + "loss_ib": 0.025655098259449005, + "step": 613 + }, + { + "ce_ib": 9.825779914855957, + "ce_orig": 0.581002950668335, + "epoch": 0.17657631749227118, + "kl_loss": 1.0918254852294922, + "loss_ib": 0.02074403502047062, + "step": 614 + }, + { + "ce_ib": 9.005266189575195, + "ce_orig": 0.6865787506103516, + "epoch": 0.17657631749227118, + "kl_loss": 1.0758470296859741, + "loss_ib": 0.019763736054301262, + "step": 614 + }, + { + "ce_ib": 8.376243591308594, + "ce_orig": 0.7356083393096924, + "epoch": 0.17657631749227118, + "kl_loss": 1.0604243278503418, + "loss_ib": 0.018980486318469048, + "step": 614 + }, + { + "ce_ib": 10.727423667907715, + "ce_orig": 0.5603511333465576, + "epoch": 0.17657631749227118, + "kl_loss": 1.1472487449645996, + "loss_ib": 0.02219991199672222, + "step": 614 + }, + { + "epoch": 0.17686390107124883, + "grad_norm": 0.09426393359899521, + "learning_rate": 9.979048302224624e-06, + "loss": 0.8892, + "step": 615 + }, + { + "ce_ib": 13.396039962768555, + "ce_orig": 1.0348241329193115, + "epoch": 0.17686390107124883, + "kl_loss": 1.1215533018112183, + "loss_ib": 0.024611571803689003, + "step": 615 + }, + { + "ce_ib": 13.98047924041748, + "ce_orig": 0.8722472190856934, + "epoch": 0.17686390107124883, + "kl_loss": 1.12811279296875, + "loss_ib": 0.02526160702109337, + "step": 615 + }, + { + "ce_ib": 6.756248950958252, + "ce_orig": 0.654313862323761, + "epoch": 0.17686390107124883, + "kl_loss": 1.0764236450195312, + "loss_ib": 0.017520485445857048, + "step": 615 + }, + { + "ce_ib": 7.255735397338867, + "ce_orig": 0.381597638130188, + "epoch": 0.17686390107124883, + "kl_loss": 1.1145398616790771, + "loss_ib": 0.01840113289654255, + "step": 615 + }, + { + "ce_ib": 11.688261032104492, + "ce_orig": 0.495026558637619, + "epoch": 0.17715148465022648, + "kl_loss": 1.1114025115966797, + "loss_ib": 0.022802285850048065, + "step": 616 + }, + { + "ce_ib": 9.351430892944336, + "ce_orig": 0.6223934292793274, + "epoch": 0.17715148465022648, + "kl_loss": 1.0816258192062378, + "loss_ib": 0.02016768977046013, + "step": 616 + }, + { + "ce_ib": 7.122152328491211, + "ce_orig": 0.6948713660240173, + "epoch": 0.17715148465022648, + "kl_loss": 1.10568106174469, + "loss_ib": 0.018178964033722878, + "step": 616 + }, + { + "ce_ib": 6.703441619873047, + "ce_orig": 0.7388578653335571, + "epoch": 0.17715148465022648, + "kl_loss": 1.0877346992492676, + "loss_ib": 0.0175807885825634, + "step": 616 + }, + { + "ce_ib": 7.440734386444092, + "ce_orig": 0.5273018479347229, + "epoch": 0.1774390682292041, + "kl_loss": 1.0771890878677368, + "loss_ib": 0.01821262575685978, + "step": 617 + }, + { + "ce_ib": 7.850268363952637, + "ce_orig": 0.6726818680763245, + "epoch": 0.1774390682292041, + "kl_loss": 1.0832056999206543, + "loss_ib": 0.01868232525885105, + "step": 617 + }, + { + "ce_ib": 11.564708709716797, + "ce_orig": 0.9945627450942993, + "epoch": 0.1774390682292041, + "kl_loss": 1.079153299331665, + "loss_ib": 0.022356241941452026, + "step": 617 + }, + { + "ce_ib": 9.48259449005127, + "ce_orig": 1.3080958127975464, + "epoch": 0.1774390682292041, + "kl_loss": 1.0585891008377075, + "loss_ib": 0.020068485289812088, + "step": 617 + }, + { + "ce_ib": 6.543670654296875, + "ce_orig": 0.6545018553733826, + "epoch": 0.17772665180818176, + "kl_loss": 1.0263237953186035, + "loss_ib": 0.016806907951831818, + "step": 618 + }, + { + "ce_ib": 14.908156394958496, + "ce_orig": 1.3185038566589355, + "epoch": 0.17772665180818176, + "kl_loss": 1.0666757822036743, + "loss_ib": 0.02557491324841976, + "step": 618 + }, + { + "ce_ib": 7.1459832191467285, + "ce_orig": 0.6320990920066833, + "epoch": 0.17772665180818176, + "kl_loss": 1.0741521120071411, + "loss_ib": 0.017887502908706665, + "step": 618 + }, + { + "ce_ib": 13.496872901916504, + "ce_orig": 1.4044506549835205, + "epoch": 0.17772665180818176, + "kl_loss": 1.0919251441955566, + "loss_ib": 0.0244161244481802, + "step": 618 + }, + { + "ce_ib": 11.312828063964844, + "ce_orig": 0.8793609738349915, + "epoch": 0.17801423538715938, + "kl_loss": 1.1443016529083252, + "loss_ib": 0.02275584451854229, + "step": 619 + }, + { + "ce_ib": 6.740575790405273, + "ce_orig": 0.5719377398490906, + "epoch": 0.17801423538715938, + "kl_loss": 1.0880985260009766, + "loss_ib": 0.017621560022234917, + "step": 619 + }, + { + "ce_ib": 5.965404033660889, + "ce_orig": 0.7937454581260681, + "epoch": 0.17801423538715938, + "kl_loss": 1.054682970046997, + "loss_ib": 0.016512233763933182, + "step": 619 + }, + { + "ce_ib": 12.703347206115723, + "ce_orig": 0.7259251475334167, + "epoch": 0.17801423538715938, + "kl_loss": 1.1318557262420654, + "loss_ib": 0.024021903052926064, + "step": 619 + }, + { + "epoch": 0.17830181896613703, + "grad_norm": 0.10790643841028214, + "learning_rate": 9.978332574056468e-06, + "loss": 0.8558, + "step": 620 + }, + { + "ce_ib": 7.719181537628174, + "ce_orig": 0.7105019092559814, + "epoch": 0.17830181896613703, + "kl_loss": 1.0935070514678955, + "loss_ib": 0.018654251471161842, + "step": 620 + }, + { + "ce_ib": 7.3571600914001465, + "ce_orig": 0.46813029050827026, + "epoch": 0.17830181896613703, + "kl_loss": 1.1948972940444946, + "loss_ib": 0.019306132569909096, + "step": 620 + }, + { + "ce_ib": 15.235627174377441, + "ce_orig": 1.8518890142440796, + "epoch": 0.17830181896613703, + "kl_loss": 1.1301660537719727, + "loss_ib": 0.02653728984296322, + "step": 620 + }, + { + "ce_ib": 9.883748054504395, + "ce_orig": 0.7214540839195251, + "epoch": 0.17830181896613703, + "kl_loss": 1.0958552360534668, + "loss_ib": 0.0208422988653183, + "step": 620 + }, + { + "ce_ib": 9.291598320007324, + "ce_orig": 0.8704307079315186, + "epoch": 0.17858940254511468, + "kl_loss": 1.1199560165405273, + "loss_ib": 0.020491158589720726, + "step": 621 + }, + { + "ce_ib": 7.236078262329102, + "ce_orig": 0.6710290312767029, + "epoch": 0.17858940254511468, + "kl_loss": 1.0643848180770874, + "loss_ib": 0.017879927530884743, + "step": 621 + }, + { + "ce_ib": 10.731121063232422, + "ce_orig": 0.8992137908935547, + "epoch": 0.17858940254511468, + "kl_loss": 1.0887260437011719, + "loss_ib": 0.021618379279971123, + "step": 621 + }, + { + "ce_ib": 6.691609859466553, + "ce_orig": 0.23901374638080597, + "epoch": 0.17858940254511468, + "kl_loss": 1.2286667823791504, + "loss_ib": 0.01897827908396721, + "step": 621 + }, + { + "ce_ib": 10.243730545043945, + "ce_orig": 0.6851696968078613, + "epoch": 0.1788769861240923, + "kl_loss": 1.1422102451324463, + "loss_ib": 0.02166583202779293, + "step": 622 + }, + { + "ce_ib": 9.9014892578125, + "ce_orig": 0.861434280872345, + "epoch": 0.1788769861240923, + "kl_loss": 1.1246778964996338, + "loss_ib": 0.02114826813340187, + "step": 622 + }, + { + "ce_ib": 9.937746047973633, + "ce_orig": 0.7701823711395264, + "epoch": 0.1788769861240923, + "kl_loss": 1.0272612571716309, + "loss_ib": 0.02021035924553871, + "step": 622 + }, + { + "ce_ib": 11.04038143157959, + "ce_orig": 1.1329587697982788, + "epoch": 0.1788769861240923, + "kl_loss": 1.0876765251159668, + "loss_ib": 0.021917147561907768, + "step": 622 + }, + { + "ce_ib": 9.774174690246582, + "ce_orig": 0.6889547109603882, + "epoch": 0.17916456970306996, + "kl_loss": 1.1281490325927734, + "loss_ib": 0.021055664867162704, + "step": 623 + }, + { + "ce_ib": 8.301161766052246, + "ce_orig": 0.9744248390197754, + "epoch": 0.17916456970306996, + "kl_loss": 1.1025155782699585, + "loss_ib": 0.019326316192746162, + "step": 623 + }, + { + "ce_ib": 11.626615524291992, + "ce_orig": 1.0212253332138062, + "epoch": 0.17916456970306996, + "kl_loss": 1.1386442184448242, + "loss_ib": 0.023013057187199593, + "step": 623 + }, + { + "ce_ib": 9.228303909301758, + "ce_orig": 0.9091984629631042, + "epoch": 0.17916456970306996, + "kl_loss": 1.0976030826568604, + "loss_ib": 0.02020433358848095, + "step": 623 + }, + { + "ce_ib": 12.572793960571289, + "ce_orig": 1.365990161895752, + "epoch": 0.17945215328204758, + "kl_loss": 1.0628557205200195, + "loss_ib": 0.0232013501226902, + "step": 624 + }, + { + "ce_ib": 6.557443141937256, + "ce_orig": 0.38862401247024536, + "epoch": 0.17945215328204758, + "kl_loss": 1.0641001462936401, + "loss_ib": 0.01719844341278076, + "step": 624 + }, + { + "ce_ib": 11.218859672546387, + "ce_orig": 1.0366816520690918, + "epoch": 0.17945215328204758, + "kl_loss": 1.0765401124954224, + "loss_ib": 0.021984262391924858, + "step": 624 + }, + { + "ce_ib": 13.108721733093262, + "ce_orig": 1.2099130153656006, + "epoch": 0.17945215328204758, + "kl_loss": 1.0831751823425293, + "loss_ib": 0.023940471932291985, + "step": 624 + }, + { + "epoch": 0.17973973686102523, + "grad_norm": 0.0922677144408226, + "learning_rate": 9.977604851929648e-06, + "loss": 0.9102, + "step": 625 + }, + { + "ce_ib": 8.263596534729004, + "ce_orig": 0.6577824354171753, + "epoch": 0.17973973686102523, + "kl_loss": 1.1678799390792847, + "loss_ib": 0.019942395389080048, + "step": 625 + }, + { + "ce_ib": 7.4331560134887695, + "ce_orig": 0.6097143888473511, + "epoch": 0.17973973686102523, + "kl_loss": 1.0634379386901855, + "loss_ib": 0.018067536875605583, + "step": 625 + }, + { + "ce_ib": 9.178628921508789, + "ce_orig": 0.8557302355766296, + "epoch": 0.17973973686102523, + "kl_loss": 1.1199026107788086, + "loss_ib": 0.02037765458226204, + "step": 625 + }, + { + "ce_ib": 8.077595710754395, + "ce_orig": 0.7694026231765747, + "epoch": 0.17973973686102523, + "kl_loss": 0.9992647171020508, + "loss_ib": 0.018070241436362267, + "step": 625 + }, + { + "ce_ib": 14.997899055480957, + "ce_orig": 1.3085542917251587, + "epoch": 0.18002732044000289, + "kl_loss": 1.0909315347671509, + "loss_ib": 0.025907214730978012, + "step": 626 + }, + { + "ce_ib": 14.065059661865234, + "ce_orig": 1.3615686893463135, + "epoch": 0.18002732044000289, + "kl_loss": 1.1272248029708862, + "loss_ib": 0.025337306782603264, + "step": 626 + }, + { + "ce_ib": 9.213851928710938, + "ce_orig": 0.4737341105937958, + "epoch": 0.18002732044000289, + "kl_loss": 1.1103893518447876, + "loss_ib": 0.020317744463682175, + "step": 626 + }, + { + "ce_ib": 10.141777992248535, + "ce_orig": 1.0340423583984375, + "epoch": 0.18002732044000289, + "kl_loss": 1.1753777265548706, + "loss_ib": 0.021895555779337883, + "step": 626 + }, + { + "ce_ib": 10.606011390686035, + "ce_orig": 0.5892968773841858, + "epoch": 0.1803149040189805, + "kl_loss": 1.1311826705932617, + "loss_ib": 0.021917838603258133, + "step": 627 + }, + { + "ce_ib": 8.124170303344727, + "ce_orig": 0.42015740275382996, + "epoch": 0.1803149040189805, + "kl_loss": 1.108632206916809, + "loss_ib": 0.019210491329431534, + "step": 627 + }, + { + "ce_ib": 10.349467277526855, + "ce_orig": 0.7760807275772095, + "epoch": 0.1803149040189805, + "kl_loss": 1.0804953575134277, + "loss_ib": 0.02115442231297493, + "step": 627 + }, + { + "ce_ib": 8.495221138000488, + "ce_orig": 0.5720870494842529, + "epoch": 0.1803149040189805, + "kl_loss": 1.1552854776382446, + "loss_ib": 0.020048074424266815, + "step": 627 + }, + { + "ce_ib": 13.331109046936035, + "ce_orig": 1.5061442852020264, + "epoch": 0.18060248759795816, + "kl_loss": 1.1464059352874756, + "loss_ib": 0.0247951690107584, + "step": 628 + }, + { + "ce_ib": 11.41611099243164, + "ce_orig": 1.1628519296646118, + "epoch": 0.18060248759795816, + "kl_loss": 1.0600998401641846, + "loss_ib": 0.022017108276486397, + "step": 628 + }, + { + "ce_ib": 12.698983192443848, + "ce_orig": 1.187642216682434, + "epoch": 0.18060248759795816, + "kl_loss": 1.1207376718521118, + "loss_ib": 0.02390635944902897, + "step": 628 + }, + { + "ce_ib": 15.049531936645508, + "ce_orig": 1.8772828578948975, + "epoch": 0.18060248759795816, + "kl_loss": 1.087199330329895, + "loss_ib": 0.025921525433659554, + "step": 628 + }, + { + "ce_ib": 8.250436782836914, + "ce_orig": 0.7029029130935669, + "epoch": 0.18089007117693578, + "kl_loss": 1.1021440029144287, + "loss_ib": 0.01927187666296959, + "step": 629 + }, + { + "ce_ib": 13.998225212097168, + "ce_orig": 1.2588441371917725, + "epoch": 0.18089007117693578, + "kl_loss": 1.080070972442627, + "loss_ib": 0.024798937141895294, + "step": 629 + }, + { + "ce_ib": 12.915166854858398, + "ce_orig": 1.296819806098938, + "epoch": 0.18089007117693578, + "kl_loss": 1.1545573472976685, + "loss_ib": 0.024460740387439728, + "step": 629 + }, + { + "ce_ib": 9.678729057312012, + "ce_orig": 0.5961591601371765, + "epoch": 0.18089007117693578, + "kl_loss": 1.220086932182312, + "loss_ib": 0.02187959849834442, + "step": 629 + }, + { + "epoch": 0.18117765475591344, + "grad_norm": 0.0966140478849411, + "learning_rate": 9.97686513759741e-06, + "loss": 0.9272, + "step": 630 + }, + { + "ce_ib": 13.0838623046875, + "ce_orig": 1.260634422302246, + "epoch": 0.18117765475591344, + "kl_loss": 1.1091606616973877, + "loss_ib": 0.02417546696960926, + "step": 630 + }, + { + "ce_ib": 6.934885025024414, + "ce_orig": 0.4972129762172699, + "epoch": 0.18117765475591344, + "kl_loss": 1.0627329349517822, + "loss_ib": 0.01756221428513527, + "step": 630 + }, + { + "ce_ib": 12.18768310546875, + "ce_orig": 1.4976786375045776, + "epoch": 0.18117765475591344, + "kl_loss": 1.0954440832138062, + "loss_ib": 0.023142123594880104, + "step": 630 + }, + { + "ce_ib": 11.109347343444824, + "ce_orig": 1.1459025144577026, + "epoch": 0.18117765475591344, + "kl_loss": 1.054826259613037, + "loss_ib": 0.021657610312104225, + "step": 630 + }, + { + "ce_ib": 10.95541763305664, + "ce_orig": 0.6836705207824707, + "epoch": 0.1814652383348911, + "kl_loss": 1.0429816246032715, + "loss_ib": 0.021385235711932182, + "step": 631 + }, + { + "ce_ib": 10.109951972961426, + "ce_orig": 0.8095191717147827, + "epoch": 0.1814652383348911, + "kl_loss": 1.072096347808838, + "loss_ib": 0.020830916240811348, + "step": 631 + }, + { + "ce_ib": 7.40610408782959, + "ce_orig": 0.6426340341567993, + "epoch": 0.1814652383348911, + "kl_loss": 1.0597965717315674, + "loss_ib": 0.018004069104790688, + "step": 631 + }, + { + "ce_ib": 8.1773681640625, + "ce_orig": 0.9187619090080261, + "epoch": 0.1814652383348911, + "kl_loss": 1.0321749448776245, + "loss_ib": 0.018499117344617844, + "step": 631 + }, + { + "ce_ib": 7.07262659072876, + "ce_orig": 0.8202535510063171, + "epoch": 0.1817528219138687, + "kl_loss": 0.9864460229873657, + "loss_ib": 0.01693708635866642, + "step": 632 + }, + { + "ce_ib": 7.180476188659668, + "ce_orig": 0.4838648736476898, + "epoch": 0.1817528219138687, + "kl_loss": 1.0089023113250732, + "loss_ib": 0.017269499599933624, + "step": 632 + }, + { + "ce_ib": 15.649883270263672, + "ce_orig": 1.6325815916061401, + "epoch": 0.1817528219138687, + "kl_loss": 1.1197898387908936, + "loss_ib": 0.026847781613469124, + "step": 632 + }, + { + "ce_ib": 10.30772590637207, + "ce_orig": 0.8685612082481384, + "epoch": 0.1817528219138687, + "kl_loss": 1.0974705219268799, + "loss_ib": 0.02128242887556553, + "step": 632 + }, + { + "ce_ib": 8.978386878967285, + "ce_orig": 1.0552774667739868, + "epoch": 0.18204040549284636, + "kl_loss": 1.012761116027832, + "loss_ib": 0.019105996936559677, + "step": 633 + }, + { + "ce_ib": 10.887259483337402, + "ce_orig": 1.5764906406402588, + "epoch": 0.18204040549284636, + "kl_loss": 1.0496618747711182, + "loss_ib": 0.021383875980973244, + "step": 633 + }, + { + "ce_ib": 10.534551620483398, + "ce_orig": 1.1252496242523193, + "epoch": 0.18204040549284636, + "kl_loss": 1.0642296075820923, + "loss_ib": 0.02117684856057167, + "step": 633 + }, + { + "ce_ib": 9.471675872802734, + "ce_orig": 0.3606927990913391, + "epoch": 0.18204040549284636, + "kl_loss": 1.1879955530166626, + "loss_ib": 0.021351629868149757, + "step": 633 + }, + { + "ce_ib": 9.976670265197754, + "ce_orig": 1.006685495376587, + "epoch": 0.182327989071824, + "kl_loss": 1.0557842254638672, + "loss_ib": 0.020534511655569077, + "step": 634 + }, + { + "ce_ib": 8.402043342590332, + "ce_orig": 1.0922396183013916, + "epoch": 0.182327989071824, + "kl_loss": 1.023686408996582, + "loss_ib": 0.018638907000422478, + "step": 634 + }, + { + "ce_ib": 10.423868179321289, + "ce_orig": 0.9032129049301147, + "epoch": 0.182327989071824, + "kl_loss": 1.1368285417556763, + "loss_ib": 0.021792152896523476, + "step": 634 + }, + { + "ce_ib": 7.688118934631348, + "ce_orig": 0.46169134974479675, + "epoch": 0.182327989071824, + "kl_loss": 1.095708966255188, + "loss_ib": 0.018645208328962326, + "step": 634 + }, + { + "epoch": 0.18261557265080164, + "grad_norm": 0.09018189460039139, + "learning_rate": 9.976113432841903e-06, + "loss": 0.9332, + "step": 635 + }, + { + "ce_ib": 9.740583419799805, + "ce_orig": 0.7694171667098999, + "epoch": 0.18261557265080164, + "kl_loss": 1.0555506944656372, + "loss_ib": 0.020296089351177216, + "step": 635 + }, + { + "ce_ib": 8.057018280029297, + "ce_orig": 0.48739856481552124, + "epoch": 0.18261557265080164, + "kl_loss": 1.2050068378448486, + "loss_ib": 0.02010708674788475, + "step": 635 + }, + { + "ce_ib": 8.886861801147461, + "ce_orig": 0.4765666127204895, + "epoch": 0.18261557265080164, + "kl_loss": 1.071749210357666, + "loss_ib": 0.019604353234171867, + "step": 635 + }, + { + "ce_ib": 13.106306076049805, + "ce_orig": 0.9705209136009216, + "epoch": 0.18261557265080164, + "kl_loss": 1.1007217168807983, + "loss_ib": 0.024113522842526436, + "step": 635 + }, + { + "ce_ib": 10.954051971435547, + "ce_orig": 1.050213098526001, + "epoch": 0.1829031562297793, + "kl_loss": 1.066004991531372, + "loss_ib": 0.02161410264670849, + "step": 636 + }, + { + "ce_ib": 9.91700553894043, + "ce_orig": 0.680712103843689, + "epoch": 0.1829031562297793, + "kl_loss": 1.132023572921753, + "loss_ib": 0.021237241104245186, + "step": 636 + }, + { + "ce_ib": 8.767101287841797, + "ce_orig": 0.44354522228240967, + "epoch": 0.1829031562297793, + "kl_loss": 1.108346939086914, + "loss_ib": 0.01985057070851326, + "step": 636 + }, + { + "ce_ib": 9.128534317016602, + "ce_orig": 0.3988248407840729, + "epoch": 0.1829031562297793, + "kl_loss": 1.086971402168274, + "loss_ib": 0.019998246803879738, + "step": 636 + }, + { + "ce_ib": 13.582477569580078, + "ce_orig": 1.6579183340072632, + "epoch": 0.1831907398087569, + "kl_loss": 1.0730781555175781, + "loss_ib": 0.02431325800716877, + "step": 637 + }, + { + "ce_ib": 17.059223175048828, + "ce_orig": 1.6747536659240723, + "epoch": 0.1831907398087569, + "kl_loss": 1.1021394729614258, + "loss_ib": 0.028080618008971214, + "step": 637 + }, + { + "ce_ib": 13.071850776672363, + "ce_orig": 1.5738227367401123, + "epoch": 0.1831907398087569, + "kl_loss": 1.1231281757354736, + "loss_ib": 0.0243031308054924, + "step": 637 + }, + { + "ce_ib": 9.79615592956543, + "ce_orig": 1.0085848569869995, + "epoch": 0.1831907398087569, + "kl_loss": 1.0980044603347778, + "loss_ib": 0.02077619917690754, + "step": 637 + }, + { + "ce_ib": 6.036836624145508, + "ce_orig": 0.4214065670967102, + "epoch": 0.18347832338773457, + "kl_loss": 1.0429325103759766, + "loss_ib": 0.016466161236166954, + "step": 638 + }, + { + "ce_ib": 7.146793365478516, + "ce_orig": 0.6582375764846802, + "epoch": 0.18347832338773457, + "kl_loss": 1.0144261121749878, + "loss_ib": 0.017291054129600525, + "step": 638 + }, + { + "ce_ib": 13.084831237792969, + "ce_orig": 1.4547115564346313, + "epoch": 0.18347832338773457, + "kl_loss": 1.0451858043670654, + "loss_ib": 0.023536689579486847, + "step": 638 + }, + { + "ce_ib": 10.764507293701172, + "ce_orig": 1.1796584129333496, + "epoch": 0.18347832338773457, + "kl_loss": 1.077453851699829, + "loss_ib": 0.02153904363512993, + "step": 638 + }, + { + "ce_ib": 4.687036514282227, + "ce_orig": 0.18363694846630096, + "epoch": 0.1837659069667122, + "kl_loss": 1.1889199018478394, + "loss_ib": 0.016576234251260757, + "step": 639 + }, + { + "ce_ib": 10.766698837280273, + "ce_orig": 0.825289785861969, + "epoch": 0.1837659069667122, + "kl_loss": 1.0548924207687378, + "loss_ib": 0.021315621212124825, + "step": 639 + }, + { + "ce_ib": 8.863663673400879, + "ce_orig": 0.8204382061958313, + "epoch": 0.1837659069667122, + "kl_loss": 1.019961953163147, + "loss_ib": 0.019063282757997513, + "step": 639 + }, + { + "ce_ib": 13.531327247619629, + "ce_orig": 1.1665120124816895, + "epoch": 0.1837659069667122, + "kl_loss": 1.0381747484207153, + "loss_ib": 0.023913072422146797, + "step": 639 + }, + { + "epoch": 0.18405349054568984, + "grad_norm": 0.09696701914072037, + "learning_rate": 9.975349739474156e-06, + "loss": 0.8875, + "step": 640 + }, + { + "ce_ib": 7.491870403289795, + "ce_orig": 0.907927930355072, + "epoch": 0.18405349054568984, + "kl_loss": 0.9900725483894348, + "loss_ib": 0.01739259622991085, + "step": 640 + }, + { + "ce_ib": 7.663341999053955, + "ce_orig": 0.5722526907920837, + "epoch": 0.18405349054568984, + "kl_loss": 1.0283775329589844, + "loss_ib": 0.01794711872935295, + "step": 640 + }, + { + "ce_ib": 10.040138244628906, + "ce_orig": 0.7578917145729065, + "epoch": 0.18405349054568984, + "kl_loss": 1.0339435338974, + "loss_ib": 0.020379573106765747, + "step": 640 + }, + { + "ce_ib": 11.124543190002441, + "ce_orig": 1.2751201391220093, + "epoch": 0.18405349054568984, + "kl_loss": 1.0301541090011597, + "loss_ib": 0.021426083520054817, + "step": 640 + }, + { + "ce_ib": 11.990524291992188, + "ce_orig": 1.2356369495391846, + "epoch": 0.1843410741246675, + "kl_loss": 1.1035387516021729, + "loss_ib": 0.023025913164019585, + "step": 641 + }, + { + "ce_ib": 7.862361431121826, + "ce_orig": 0.964101254940033, + "epoch": 0.1843410741246675, + "kl_loss": 1.1734020709991455, + "loss_ib": 0.01959638111293316, + "step": 641 + }, + { + "ce_ib": 9.207853317260742, + "ce_orig": 0.9062885642051697, + "epoch": 0.1843410741246675, + "kl_loss": 1.020951747894287, + "loss_ib": 0.019417371600866318, + "step": 641 + }, + { + "ce_ib": 9.713889122009277, + "ce_orig": 1.302318811416626, + "epoch": 0.1843410741246675, + "kl_loss": 1.1001968383789062, + "loss_ib": 0.020715856924653053, + "step": 641 + }, + { + "ce_ib": 6.821298599243164, + "ce_orig": 0.7004828453063965, + "epoch": 0.18462865770364512, + "kl_loss": 0.9664976596832275, + "loss_ib": 0.01648627407848835, + "step": 642 + }, + { + "ce_ib": 9.508224487304688, + "ce_orig": 0.832171618938446, + "epoch": 0.18462865770364512, + "kl_loss": 1.0534615516662598, + "loss_ib": 0.020042838528752327, + "step": 642 + }, + { + "ce_ib": 7.55943489074707, + "ce_orig": 0.6746954917907715, + "epoch": 0.18462865770364512, + "kl_loss": 1.0909643173217773, + "loss_ib": 0.018469078466296196, + "step": 642 + }, + { + "ce_ib": 7.165235996246338, + "ce_orig": 0.9785995483398438, + "epoch": 0.18462865770364512, + "kl_loss": 1.0123915672302246, + "loss_ib": 0.01728915236890316, + "step": 642 + }, + { + "ce_ib": 10.234342575073242, + "ce_orig": 0.9009053707122803, + "epoch": 0.18491624128262277, + "kl_loss": 1.1004221439361572, + "loss_ib": 0.02123856544494629, + "step": 643 + }, + { + "ce_ib": 8.069269180297852, + "ce_orig": 0.7381336688995361, + "epoch": 0.18491624128262277, + "kl_loss": 1.0298247337341309, + "loss_ib": 0.01836751587688923, + "step": 643 + }, + { + "ce_ib": 10.050087928771973, + "ce_orig": 0.9887573719024658, + "epoch": 0.18491624128262277, + "kl_loss": 1.0247390270233154, + "loss_ib": 0.020297478884458542, + "step": 643 + }, + { + "ce_ib": 14.767660140991211, + "ce_orig": 1.0759152173995972, + "epoch": 0.18491624128262277, + "kl_loss": 1.0658990144729614, + "loss_ib": 0.025426648557186127, + "step": 643 + }, + { + "ce_ib": 9.213264465332031, + "ce_orig": 0.7979077696800232, + "epoch": 0.1852038248616004, + "kl_loss": 1.041956901550293, + "loss_ib": 0.01963283307850361, + "step": 644 + }, + { + "ce_ib": 11.142786979675293, + "ce_orig": 1.003310203552246, + "epoch": 0.1852038248616004, + "kl_loss": 1.024578332901001, + "loss_ib": 0.021388567984104156, + "step": 644 + }, + { + "ce_ib": 9.134848594665527, + "ce_orig": 0.9812929034233093, + "epoch": 0.1852038248616004, + "kl_loss": 1.1117109060287476, + "loss_ib": 0.020251957699656487, + "step": 644 + }, + { + "ce_ib": 12.999911308288574, + "ce_orig": 1.423545479774475, + "epoch": 0.1852038248616004, + "kl_loss": 1.049558401107788, + "loss_ib": 0.023495495319366455, + "step": 644 + }, + { + "epoch": 0.18549140844057804, + "grad_norm": 0.09764409065246582, + "learning_rate": 9.974574059334082e-06, + "loss": 0.9161, + "step": 645 + }, + { + "ce_ib": 14.430760383605957, + "ce_orig": 0.45323994755744934, + "epoch": 0.18549140844057804, + "kl_loss": 1.1187865734100342, + "loss_ib": 0.025618623942136765, + "step": 645 + }, + { + "ce_ib": 6.7074151039123535, + "ce_orig": 0.8436351418495178, + "epoch": 0.18549140844057804, + "kl_loss": 0.9737348556518555, + "loss_ib": 0.01644476316869259, + "step": 645 + }, + { + "ce_ib": 9.11927604675293, + "ce_orig": 1.0630453824996948, + "epoch": 0.18549140844057804, + "kl_loss": 0.966637134552002, + "loss_ib": 0.018785648047924042, + "step": 645 + }, + { + "ce_ib": 9.039983749389648, + "ce_orig": 0.4303601086139679, + "epoch": 0.18549140844057804, + "kl_loss": 1.0904786586761475, + "loss_ib": 0.019944770261645317, + "step": 645 + }, + { + "ce_ib": 9.382161140441895, + "ce_orig": 0.8849090933799744, + "epoch": 0.1857789920195557, + "kl_loss": 1.0450866222381592, + "loss_ib": 0.019833028316497803, + "step": 646 + }, + { + "ce_ib": 7.5546064376831055, + "ce_orig": 0.5726062655448914, + "epoch": 0.1857789920195557, + "kl_loss": 1.0881624221801758, + "loss_ib": 0.018436230719089508, + "step": 646 + }, + { + "ce_ib": 8.01627254486084, + "ce_orig": 0.65036940574646, + "epoch": 0.1857789920195557, + "kl_loss": 1.0229861736297607, + "loss_ib": 0.018246134743094444, + "step": 646 + }, + { + "ce_ib": 7.372588634490967, + "ce_orig": 0.6817443370819092, + "epoch": 0.1857789920195557, + "kl_loss": 0.9919678568840027, + "loss_ib": 0.017292266711592674, + "step": 646 + }, + { + "ce_ib": 10.641061782836914, + "ce_orig": 0.9732003211975098, + "epoch": 0.18606657559853332, + "kl_loss": 1.0023996829986572, + "loss_ib": 0.020665058866143227, + "step": 647 + }, + { + "ce_ib": 10.230724334716797, + "ce_orig": 0.749370276927948, + "epoch": 0.18606657559853332, + "kl_loss": 1.0023596286773682, + "loss_ib": 0.02025432139635086, + "step": 647 + }, + { + "ce_ib": 8.159378051757812, + "ce_orig": 0.7660282850265503, + "epoch": 0.18606657559853332, + "kl_loss": 1.0770816802978516, + "loss_ib": 0.018930193036794662, + "step": 647 + }, + { + "ce_ib": 11.307751655578613, + "ce_orig": 0.7283535003662109, + "epoch": 0.18606657559853332, + "kl_loss": 1.1148804426193237, + "loss_ib": 0.02245655469596386, + "step": 647 + }, + { + "ce_ib": 10.556684494018555, + "ce_orig": 1.098810076713562, + "epoch": 0.18635415917751097, + "kl_loss": 1.0427453517913818, + "loss_ib": 0.020984139293432236, + "step": 648 + }, + { + "ce_ib": 5.236063003540039, + "ce_orig": 0.2835554778575897, + "epoch": 0.18635415917751097, + "kl_loss": 1.169142246246338, + "loss_ib": 0.016927484422922134, + "step": 648 + }, + { + "ce_ib": 9.705862998962402, + "ce_orig": 1.00014328956604, + "epoch": 0.18635415917751097, + "kl_loss": 0.9767618179321289, + "loss_ib": 0.019473480060696602, + "step": 648 + }, + { + "ce_ib": 11.64765453338623, + "ce_orig": 1.3154453039169312, + "epoch": 0.18635415917751097, + "kl_loss": 1.0039262771606445, + "loss_ib": 0.021686915308237076, + "step": 648 + }, + { + "ce_ib": 8.285019874572754, + "ce_orig": 0.970478892326355, + "epoch": 0.1866417427564886, + "kl_loss": 0.9888217449188232, + "loss_ib": 0.018173236399888992, + "step": 649 + }, + { + "ce_ib": 10.269580841064453, + "ce_orig": 0.5915196537971497, + "epoch": 0.1866417427564886, + "kl_loss": 1.0976067781448364, + "loss_ib": 0.021245649084448814, + "step": 649 + }, + { + "ce_ib": 7.284735202789307, + "ce_orig": 0.6597338914871216, + "epoch": 0.1866417427564886, + "kl_loss": 1.0240867137908936, + "loss_ib": 0.017525602132081985, + "step": 649 + }, + { + "ce_ib": 5.271642208099365, + "ce_orig": 0.3803437352180481, + "epoch": 0.1866417427564886, + "kl_loss": 1.114564061164856, + "loss_ib": 0.016417281702160835, + "step": 649 + }, + { + "epoch": 0.18692932633546624, + "grad_norm": 0.1129700243473053, + "learning_rate": 9.973786394290475e-06, + "loss": 0.8796, + "step": 650 + }, + { + "ce_ib": 12.168571472167969, + "ce_orig": 1.0805795192718506, + "epoch": 0.18692932633546624, + "kl_loss": 1.005476713180542, + "loss_ib": 0.0222233384847641, + "step": 650 + }, + { + "ce_ib": 9.973319053649902, + "ce_orig": 1.3568997383117676, + "epoch": 0.18692932633546624, + "kl_loss": 1.1083464622497559, + "loss_ib": 0.021056782454252243, + "step": 650 + }, + { + "ce_ib": 10.53954792022705, + "ce_orig": 0.7421830892562866, + "epoch": 0.18692932633546624, + "kl_loss": 1.061907410621643, + "loss_ib": 0.021158622577786446, + "step": 650 + }, + { + "ce_ib": 4.828139781951904, + "ce_orig": 0.21636667847633362, + "epoch": 0.18692932633546624, + "kl_loss": 1.1125696897506714, + "loss_ib": 0.01595383696258068, + "step": 650 + }, + { + "ce_ib": 9.867918014526367, + "ce_orig": 0.3963659703731537, + "epoch": 0.1872169099144439, + "kl_loss": 1.1094999313354492, + "loss_ib": 0.020962918177247047, + "step": 651 + }, + { + "ce_ib": 9.336775779724121, + "ce_orig": 0.7294745445251465, + "epoch": 0.1872169099144439, + "kl_loss": 0.9877459406852722, + "loss_ib": 0.019214235246181488, + "step": 651 + }, + { + "ce_ib": 7.323286533355713, + "ce_orig": 0.42315956950187683, + "epoch": 0.1872169099144439, + "kl_loss": 0.990585207939148, + "loss_ib": 0.01722913794219494, + "step": 651 + }, + { + "ce_ib": 7.316843032836914, + "ce_orig": 0.44551074504852295, + "epoch": 0.1872169099144439, + "kl_loss": 1.039764165878296, + "loss_ib": 0.01771448366343975, + "step": 651 + }, + { + "ce_ib": 10.291936874389648, + "ce_orig": 0.7233940362930298, + "epoch": 0.18750449349342152, + "kl_loss": 1.0752284526824951, + "loss_ib": 0.02104422077536583, + "step": 652 + }, + { + "ce_ib": 7.927389621734619, + "ce_orig": 0.7603445053100586, + "epoch": 0.18750449349342152, + "kl_loss": 1.0002994537353516, + "loss_ib": 0.01793038286268711, + "step": 652 + }, + { + "ce_ib": 10.401845932006836, + "ce_orig": 1.1115306615829468, + "epoch": 0.18750449349342152, + "kl_loss": 1.0052720308303833, + "loss_ib": 0.020454566925764084, + "step": 652 + }, + { + "ce_ib": 11.643852233886719, + "ce_orig": 1.4086114168167114, + "epoch": 0.18750449349342152, + "kl_loss": 1.0603067874908447, + "loss_ib": 0.022246917709708214, + "step": 652 + }, + { + "ce_ib": 6.45538330078125, + "ce_orig": 0.8548846244812012, + "epoch": 0.18779207707239917, + "kl_loss": 0.945244312286377, + "loss_ib": 0.015907825902104378, + "step": 653 + }, + { + "ce_ib": 11.45804500579834, + "ce_orig": 0.9945077300071716, + "epoch": 0.18779207707239917, + "kl_loss": 0.9493337273597717, + "loss_ib": 0.02095138281583786, + "step": 653 + }, + { + "ce_ib": 12.099946975708008, + "ce_orig": 1.0418729782104492, + "epoch": 0.18779207707239917, + "kl_loss": 1.0050960779190063, + "loss_ib": 0.022150907665491104, + "step": 653 + }, + { + "ce_ib": 8.56289005279541, + "ce_orig": 0.7450115084648132, + "epoch": 0.18779207707239917, + "kl_loss": 0.9960210919380188, + "loss_ib": 0.01852310076355934, + "step": 653 + }, + { + "ce_ib": 12.9265775680542, + "ce_orig": 1.5550066232681274, + "epoch": 0.1880796606513768, + "kl_loss": 0.9958865642547607, + "loss_ib": 0.022885441780090332, + "step": 654 + }, + { + "ce_ib": 5.975699424743652, + "ce_orig": 0.6117834448814392, + "epoch": 0.1880796606513768, + "kl_loss": 0.9439165592193604, + "loss_ib": 0.015414864756166935, + "step": 654 + }, + { + "ce_ib": 9.573440551757812, + "ce_orig": 0.972037672996521, + "epoch": 0.1880796606513768, + "kl_loss": 0.9460088610649109, + "loss_ib": 0.019033528864383698, + "step": 654 + }, + { + "ce_ib": 10.364381790161133, + "ce_orig": 0.6434758305549622, + "epoch": 0.1880796606513768, + "kl_loss": 1.0528744459152222, + "loss_ib": 0.020893124863505363, + "step": 654 + }, + { + "epoch": 0.18836724423035445, + "grad_norm": 0.09933654963970184, + "learning_rate": 9.972986746241005e-06, + "loss": 0.9236, + "step": 655 + }, + { + "ce_ib": 11.423005104064941, + "ce_orig": 0.504085898399353, + "epoch": 0.18836724423035445, + "kl_loss": 1.0513900518417358, + "loss_ib": 0.02193690463900566, + "step": 655 + }, + { + "ce_ib": 7.901275634765625, + "ce_orig": 0.8905818462371826, + "epoch": 0.18836724423035445, + "kl_loss": 1.0549099445343018, + "loss_ib": 0.01845037378370762, + "step": 655 + }, + { + "ce_ib": 12.124505043029785, + "ce_orig": 1.5267455577850342, + "epoch": 0.18836724423035445, + "kl_loss": 1.0243613719940186, + "loss_ib": 0.022368118166923523, + "step": 655 + }, + { + "ce_ib": 8.49409008026123, + "ce_orig": 0.4249543845653534, + "epoch": 0.18836724423035445, + "kl_loss": 0.9719215631484985, + "loss_ib": 0.01821330562233925, + "step": 655 + }, + { + "ce_ib": 11.336442947387695, + "ce_orig": 1.0143924951553345, + "epoch": 0.1886548278093321, + "kl_loss": 1.0201416015625, + "loss_ib": 0.021537858992815018, + "step": 656 + }, + { + "ce_ib": 10.415478706359863, + "ce_orig": 0.9272794127464294, + "epoch": 0.1886548278093321, + "kl_loss": 1.0198733806610107, + "loss_ib": 0.02061421424150467, + "step": 656 + }, + { + "ce_ib": 6.768211841583252, + "ce_orig": 0.8758606910705566, + "epoch": 0.1886548278093321, + "kl_loss": 0.9514140486717224, + "loss_ib": 0.016282351687550545, + "step": 656 + }, + { + "ce_ib": 8.312684059143066, + "ce_orig": 0.8438398838043213, + "epoch": 0.1886548278093321, + "kl_loss": 0.9719037413597107, + "loss_ib": 0.01803172007203102, + "step": 656 + }, + { + "ce_ib": 12.224742889404297, + "ce_orig": 0.791438102722168, + "epoch": 0.18894241138830972, + "kl_loss": 1.07827889919281, + "loss_ib": 0.023007530719041824, + "step": 657 + }, + { + "ce_ib": 10.384964942932129, + "ce_orig": 0.6525826454162598, + "epoch": 0.18894241138830972, + "kl_loss": 1.042457103729248, + "loss_ib": 0.020809534937143326, + "step": 657 + }, + { + "ce_ib": 10.262805938720703, + "ce_orig": 0.6871621608734131, + "epoch": 0.18894241138830972, + "kl_loss": 1.0356314182281494, + "loss_ib": 0.020619120448827744, + "step": 657 + }, + { + "ce_ib": 10.362799644470215, + "ce_orig": 0.842133104801178, + "epoch": 0.18894241138830972, + "kl_loss": 1.0107371807098389, + "loss_ib": 0.02047017030417919, + "step": 657 + }, + { + "ce_ib": 12.086880683898926, + "ce_orig": 1.178883671760559, + "epoch": 0.18922999496728737, + "kl_loss": 0.9930935502052307, + "loss_ib": 0.022017816081643105, + "step": 658 + }, + { + "ce_ib": 9.931184768676758, + "ce_orig": 0.5481682419776917, + "epoch": 0.18922999496728737, + "kl_loss": 0.983871579170227, + "loss_ib": 0.019769899547100067, + "step": 658 + }, + { + "ce_ib": 9.0752534866333, + "ce_orig": 0.6848755478858948, + "epoch": 0.18922999496728737, + "kl_loss": 0.9044622182846069, + "loss_ib": 0.018119875341653824, + "step": 658 + }, + { + "ce_ib": 10.438591003417969, + "ce_orig": 1.0720442533493042, + "epoch": 0.18922999496728737, + "kl_loss": 1.0088304281234741, + "loss_ib": 0.02052689529955387, + "step": 658 + }, + { + "ce_ib": 9.08749008178711, + "ce_orig": 1.1288058757781982, + "epoch": 0.189517578546265, + "kl_loss": 0.9419035315513611, + "loss_ib": 0.018506525084376335, + "step": 659 + }, + { + "ce_ib": 10.958107948303223, + "ce_orig": 0.6390134692192078, + "epoch": 0.189517578546265, + "kl_loss": 1.059513807296753, + "loss_ib": 0.021553244441747665, + "step": 659 + }, + { + "ce_ib": 12.540372848510742, + "ce_orig": 1.3411237001419067, + "epoch": 0.189517578546265, + "kl_loss": 0.990556001663208, + "loss_ib": 0.022445930168032646, + "step": 659 + }, + { + "ce_ib": 14.00953483581543, + "ce_orig": 1.3561463356018066, + "epoch": 0.189517578546265, + "kl_loss": 1.1565253734588623, + "loss_ib": 0.025574788451194763, + "step": 659 + }, + { + "epoch": 0.18980516212524265, + "grad_norm": 0.10293088853359222, + "learning_rate": 9.972175117112208e-06, + "loss": 0.8983, + "step": 660 + }, + { + "ce_ib": 9.332024574279785, + "ce_orig": 0.8229407668113708, + "epoch": 0.18980516212524265, + "kl_loss": 0.9839355945587158, + "loss_ib": 0.01917138136923313, + "step": 660 + }, + { + "ce_ib": 10.171957015991211, + "ce_orig": 0.8634912967681885, + "epoch": 0.18980516212524265, + "kl_loss": 0.9303529858589172, + "loss_ib": 0.019475486129522324, + "step": 660 + }, + { + "ce_ib": 10.142843246459961, + "ce_orig": 0.9541047215461731, + "epoch": 0.18980516212524265, + "kl_loss": 0.9670487642288208, + "loss_ib": 0.019813330844044685, + "step": 660 + }, + { + "ce_ib": 8.594452857971191, + "ce_orig": 0.660327136516571, + "epoch": 0.18980516212524265, + "kl_loss": 1.0246200561523438, + "loss_ib": 0.01884065382182598, + "step": 660 + }, + { + "ce_ib": 9.446117401123047, + "ce_orig": 0.6387197375297546, + "epoch": 0.1900927457042203, + "kl_loss": 0.9925702810287476, + "loss_ib": 0.019371818751096725, + "step": 661 + }, + { + "ce_ib": 12.91454792022705, + "ce_orig": 1.6437798738479614, + "epoch": 0.1900927457042203, + "kl_loss": 1.0110113620758057, + "loss_ib": 0.0230246614664793, + "step": 661 + }, + { + "ce_ib": 9.189199447631836, + "ce_orig": 0.9704218506813049, + "epoch": 0.1900927457042203, + "kl_loss": 0.9272133111953735, + "loss_ib": 0.018461331725120544, + "step": 661 + }, + { + "ce_ib": 10.05646800994873, + "ce_orig": 0.6157249212265015, + "epoch": 0.1900927457042203, + "kl_loss": 0.9937683939933777, + "loss_ib": 0.01999415084719658, + "step": 661 + }, + { + "ce_ib": 4.904020309448242, + "ce_orig": 0.5232660174369812, + "epoch": 0.19038032928319792, + "kl_loss": 0.9572215676307678, + "loss_ib": 0.014476235955953598, + "step": 662 + }, + { + "ce_ib": 11.595885276794434, + "ce_orig": 0.5337156057357788, + "epoch": 0.19038032928319792, + "kl_loss": 1.1112439632415771, + "loss_ib": 0.02270832471549511, + "step": 662 + }, + { + "ce_ib": 9.98287296295166, + "ce_orig": 0.9497804045677185, + "epoch": 0.19038032928319792, + "kl_loss": 1.0508451461791992, + "loss_ib": 0.02049132250249386, + "step": 662 + }, + { + "ce_ib": 8.917495727539062, + "ce_orig": 1.0380656719207764, + "epoch": 0.19038032928319792, + "kl_loss": 0.9809185266494751, + "loss_ib": 0.018726680427789688, + "step": 662 + }, + { + "ce_ib": 8.603903770446777, + "ce_orig": 0.8844617605209351, + "epoch": 0.19066791286217558, + "kl_loss": 0.9898391366004944, + "loss_ib": 0.01850229501724243, + "step": 663 + }, + { + "ce_ib": 8.496954917907715, + "ce_orig": 0.889788031578064, + "epoch": 0.19066791286217558, + "kl_loss": 0.968459963798523, + "loss_ib": 0.018181554973125458, + "step": 663 + }, + { + "ce_ib": 13.463947296142578, + "ce_orig": 1.5212091207504272, + "epoch": 0.19066791286217558, + "kl_loss": 1.3541009426116943, + "loss_ib": 0.02700495719909668, + "step": 663 + }, + { + "ce_ib": 7.513195991516113, + "ce_orig": 0.6632037162780762, + "epoch": 0.19066791286217558, + "kl_loss": 0.9870805144309998, + "loss_ib": 0.01738400012254715, + "step": 663 + }, + { + "ce_ib": 7.072785377502441, + "ce_orig": 0.5436831116676331, + "epoch": 0.1909554964411532, + "kl_loss": 0.9044357538223267, + "loss_ib": 0.016117142513394356, + "step": 664 + }, + { + "ce_ib": 11.165665626525879, + "ce_orig": 1.3044129610061646, + "epoch": 0.1909554964411532, + "kl_loss": 0.9187023043632507, + "loss_ib": 0.020352687686681747, + "step": 664 + }, + { + "ce_ib": 8.00759506225586, + "ce_orig": 0.5903300046920776, + "epoch": 0.1909554964411532, + "kl_loss": 0.9148058891296387, + "loss_ib": 0.017155654728412628, + "step": 664 + }, + { + "ce_ib": 9.988378524780273, + "ce_orig": 0.9803927540779114, + "epoch": 0.1909554964411532, + "kl_loss": 0.883601188659668, + "loss_ib": 0.018824391067028046, + "step": 664 + }, + { + "epoch": 0.19124308002013085, + "grad_norm": 0.1290864795446396, + "learning_rate": 9.971351508859488e-06, + "loss": 0.9177, + "step": 665 + }, + { + "ce_ib": 8.51546859741211, + "ce_orig": 0.7638845443725586, + "epoch": 0.19124308002013085, + "kl_loss": 0.8143119812011719, + "loss_ib": 0.016658587381243706, + "step": 665 + }, + { + "ce_ib": 6.580199718475342, + "ce_orig": 0.5441961884498596, + "epoch": 0.19124308002013085, + "kl_loss": 0.898180365562439, + "loss_ib": 0.015562002547085285, + "step": 665 + }, + { + "ce_ib": 9.424781799316406, + "ce_orig": 0.8517243266105652, + "epoch": 0.19124308002013085, + "kl_loss": 0.9845488667488098, + "loss_ib": 0.019270269200205803, + "step": 665 + }, + { + "ce_ib": 11.227746963500977, + "ce_orig": 0.7697399258613586, + "epoch": 0.19124308002013085, + "kl_loss": 1.012844204902649, + "loss_ib": 0.021356189623475075, + "step": 665 + }, + { + "ce_ib": 13.270841598510742, + "ce_orig": 1.091408133506775, + "epoch": 0.1915306635991085, + "kl_loss": 0.9616622924804688, + "loss_ib": 0.022887462750077248, + "step": 666 + }, + { + "ce_ib": 5.886293411254883, + "ce_orig": 0.31356731057167053, + "epoch": 0.1915306635991085, + "kl_loss": 0.971168041229248, + "loss_ib": 0.015597973950207233, + "step": 666 + }, + { + "ce_ib": 12.855634689331055, + "ce_orig": 1.4516594409942627, + "epoch": 0.1915306635991085, + "kl_loss": 0.9783580303192139, + "loss_ib": 0.022639214992523193, + "step": 666 + }, + { + "ce_ib": 9.463467597961426, + "ce_orig": 0.9096047878265381, + "epoch": 0.1915306635991085, + "kl_loss": 0.9198427200317383, + "loss_ib": 0.018661893904209137, + "step": 666 + }, + { + "ce_ib": 10.283754348754883, + "ce_orig": 1.3314762115478516, + "epoch": 0.19181824717808613, + "kl_loss": 1.057845115661621, + "loss_ib": 0.02086220681667328, + "step": 667 + }, + { + "ce_ib": 7.73128080368042, + "ce_orig": 0.6765826940536499, + "epoch": 0.19181824717808613, + "kl_loss": 0.9598665833473206, + "loss_ib": 0.017329946160316467, + "step": 667 + }, + { + "ce_ib": 13.801294326782227, + "ce_orig": 1.7749202251434326, + "epoch": 0.19181824717808613, + "kl_loss": 0.9999049305915833, + "loss_ib": 0.02380034327507019, + "step": 667 + }, + { + "ce_ib": 14.625811576843262, + "ce_orig": 1.5362874269485474, + "epoch": 0.19181824717808613, + "kl_loss": 0.9581853747367859, + "loss_ib": 0.024207664653658867, + "step": 667 + }, + { + "ce_ib": 9.781160354614258, + "ce_orig": 0.941349983215332, + "epoch": 0.19210583075706378, + "kl_loss": 0.9020639657974243, + "loss_ib": 0.018801799044013023, + "step": 668 + }, + { + "ce_ib": 10.93012523651123, + "ce_orig": 1.1870393753051758, + "epoch": 0.19210583075706378, + "kl_loss": 0.9062073230743408, + "loss_ib": 0.019992198795080185, + "step": 668 + }, + { + "ce_ib": 10.397263526916504, + "ce_orig": 0.7395128011703491, + "epoch": 0.19210583075706378, + "kl_loss": 0.9939507246017456, + "loss_ib": 0.02033677138388157, + "step": 668 + }, + { + "ce_ib": 8.197195053100586, + "ce_orig": 0.802003800868988, + "epoch": 0.19210583075706378, + "kl_loss": 1.0717787742614746, + "loss_ib": 0.018914982676506042, + "step": 668 + }, + { + "ce_ib": 8.156253814697266, + "ce_orig": 0.6003333926200867, + "epoch": 0.1923934143360414, + "kl_loss": 0.9009373188018799, + "loss_ib": 0.01716562733054161, + "step": 669 + }, + { + "ce_ib": 9.363840103149414, + "ce_orig": 0.7796534895896912, + "epoch": 0.1923934143360414, + "kl_loss": 0.9603847861289978, + "loss_ib": 0.01896768808364868, + "step": 669 + }, + { + "ce_ib": 6.624312400817871, + "ce_orig": 0.7180654406547546, + "epoch": 0.1923934143360414, + "kl_loss": 0.8511247038841248, + "loss_ib": 0.015135559253394604, + "step": 669 + }, + { + "ce_ib": 11.258011817932129, + "ce_orig": 0.6326055526733398, + "epoch": 0.1923934143360414, + "kl_loss": 1.004683256149292, + "loss_ib": 0.021304845809936523, + "step": 669 + }, + { + "epoch": 0.19268099791501905, + "grad_norm": 0.07958894968032837, + "learning_rate": 9.970515923467106e-06, + "loss": 0.8465, + "step": 670 + }, + { + "ce_ib": 7.144465923309326, + "ce_orig": 0.5346347093582153, + "epoch": 0.19268099791501905, + "kl_loss": 1.0311341285705566, + "loss_ib": 0.017455806955695152, + "step": 670 + }, + { + "ce_ib": 9.569890975952148, + "ce_orig": 1.0249342918395996, + "epoch": 0.19268099791501905, + "kl_loss": 1.0497715473175049, + "loss_ib": 0.02006760612130165, + "step": 670 + }, + { + "ce_ib": 7.342098236083984, + "ce_orig": 0.5448954105377197, + "epoch": 0.19268099791501905, + "kl_loss": 1.0126290321350098, + "loss_ib": 0.017468387261033058, + "step": 670 + }, + { + "ce_ib": 13.511815071105957, + "ce_orig": 1.6245704889297485, + "epoch": 0.19268099791501905, + "kl_loss": 0.9676916599273682, + "loss_ib": 0.02318873070180416, + "step": 670 + }, + { + "ce_ib": 9.829526901245117, + "ce_orig": 0.7800765633583069, + "epoch": 0.1929685814939967, + "kl_loss": 0.9008135199546814, + "loss_ib": 0.018837660551071167, + "step": 671 + }, + { + "ce_ib": 7.996551990509033, + "ce_orig": 0.7976639270782471, + "epoch": 0.1929685814939967, + "kl_loss": 0.9442075490951538, + "loss_ib": 0.017438627779483795, + "step": 671 + }, + { + "ce_ib": 11.391491889953613, + "ce_orig": 1.0702950954437256, + "epoch": 0.1929685814939967, + "kl_loss": 0.8962193131446838, + "loss_ib": 0.020353684201836586, + "step": 671 + }, + { + "ce_ib": 13.608918190002441, + "ce_orig": 1.1835685968399048, + "epoch": 0.1929685814939967, + "kl_loss": 1.0202008485794067, + "loss_ib": 0.02381092496216297, + "step": 671 + }, + { + "ce_ib": 7.881733417510986, + "ce_orig": 0.9604411125183105, + "epoch": 0.19325616507297433, + "kl_loss": 0.9708698391914368, + "loss_ib": 0.01759043149650097, + "step": 672 + }, + { + "ce_ib": 10.143787384033203, + "ce_orig": 0.805814802646637, + "epoch": 0.19325616507297433, + "kl_loss": 1.020028829574585, + "loss_ib": 0.020344074815511703, + "step": 672 + }, + { + "ce_ib": 8.782960891723633, + "ce_orig": 0.6452949047088623, + "epoch": 0.19325616507297433, + "kl_loss": 0.9659644961357117, + "loss_ib": 0.018442604690790176, + "step": 672 + }, + { + "ce_ib": 10.82643985748291, + "ce_orig": 0.8774537444114685, + "epoch": 0.19325616507297433, + "kl_loss": 0.8915767669677734, + "loss_ib": 0.01974220760166645, + "step": 672 + }, + { + "ce_ib": 8.120016098022461, + "ce_orig": 0.778851330280304, + "epoch": 0.19354374865195198, + "kl_loss": 0.9042708277702332, + "loss_ib": 0.01716272346675396, + "step": 673 + }, + { + "ce_ib": 10.228824615478516, + "ce_orig": 0.9065789580345154, + "epoch": 0.19354374865195198, + "kl_loss": 0.8779407143592834, + "loss_ib": 0.019008230417966843, + "step": 673 + }, + { + "ce_ib": 11.03148078918457, + "ce_orig": 1.0241479873657227, + "epoch": 0.19354374865195198, + "kl_loss": 1.204726219177246, + "loss_ib": 0.023078741505742073, + "step": 673 + }, + { + "ce_ib": 12.928085327148438, + "ce_orig": 1.197397232055664, + "epoch": 0.19354374865195198, + "kl_loss": 0.9511390328407288, + "loss_ib": 0.02243947423994541, + "step": 673 + }, + { + "ce_ib": 9.485182762145996, + "ce_orig": 0.6011760234832764, + "epoch": 0.1938313322309296, + "kl_loss": 1.0419973134994507, + "loss_ib": 0.019905155524611473, + "step": 674 + }, + { + "ce_ib": 6.017853736877441, + "ce_orig": 0.6303385496139526, + "epoch": 0.1938313322309296, + "kl_loss": 0.9145022034645081, + "loss_ib": 0.015162874944508076, + "step": 674 + }, + { + "ce_ib": 8.599544525146484, + "ce_orig": 0.5987470149993896, + "epoch": 0.1938313322309296, + "kl_loss": 0.8968786001205444, + "loss_ib": 0.017568331211805344, + "step": 674 + }, + { + "ce_ib": 5.367640018463135, + "ce_orig": 0.26319989562034607, + "epoch": 0.1938313322309296, + "kl_loss": 0.851106584072113, + "loss_ib": 0.013878704980015755, + "step": 674 + }, + { + "epoch": 0.19411891580990726, + "grad_norm": 0.08299347013235092, + "learning_rate": 9.969668362948186e-06, + "loss": 0.8623, + "step": 675 + }, + { + "ce_ib": 8.513809204101562, + "ce_orig": 0.46651700139045715, + "epoch": 0.19411891580990726, + "kl_loss": 1.1063188314437866, + "loss_ib": 0.019576996564865112, + "step": 675 + }, + { + "ce_ib": 13.300872802734375, + "ce_orig": 1.5554956197738647, + "epoch": 0.19411891580990726, + "kl_loss": 0.911888837814331, + "loss_ib": 0.0224197618663311, + "step": 675 + }, + { + "ce_ib": 10.305272102355957, + "ce_orig": 0.7289824485778809, + "epoch": 0.19411891580990726, + "kl_loss": 0.9203410148620605, + "loss_ib": 0.019508682191371918, + "step": 675 + }, + { + "ce_ib": 8.656975746154785, + "ce_orig": 0.8286035656929016, + "epoch": 0.19411891580990726, + "kl_loss": 0.9431976079940796, + "loss_ib": 0.018088949844241142, + "step": 675 + }, + { + "ce_ib": 7.818869113922119, + "ce_orig": 0.4440051317214966, + "epoch": 0.1944064993888849, + "kl_loss": 1.0188822746276855, + "loss_ib": 0.018007691949605942, + "step": 676 + }, + { + "ce_ib": 7.344069957733154, + "ce_orig": 0.45538222789764404, + "epoch": 0.1944064993888849, + "kl_loss": 1.0300064086914062, + "loss_ib": 0.017644133418798447, + "step": 676 + }, + { + "ce_ib": 7.1561384201049805, + "ce_orig": 0.5423603653907776, + "epoch": 0.1944064993888849, + "kl_loss": 0.8832270503044128, + "loss_ib": 0.015988409519195557, + "step": 676 + }, + { + "ce_ib": 9.0109281539917, + "ce_orig": 0.8102385401725769, + "epoch": 0.1944064993888849, + "kl_loss": 0.915709376335144, + "loss_ib": 0.018168022856116295, + "step": 676 + }, + { + "ce_ib": 7.42899751663208, + "ce_orig": 0.5108758807182312, + "epoch": 0.19469408296786253, + "kl_loss": 0.8757451176643372, + "loss_ib": 0.01618644967675209, + "step": 677 + }, + { + "ce_ib": 12.158177375793457, + "ce_orig": 0.9250555634498596, + "epoch": 0.19469408296786253, + "kl_loss": 0.9929431676864624, + "loss_ib": 0.02208760939538479, + "step": 677 + }, + { + "ce_ib": 9.143503189086914, + "ce_orig": 0.7566794157028198, + "epoch": 0.19469408296786253, + "kl_loss": 1.0857152938842773, + "loss_ib": 0.020000655204057693, + "step": 677 + }, + { + "ce_ib": 11.579789161682129, + "ce_orig": 1.1167807579040527, + "epoch": 0.19469408296786253, + "kl_loss": 0.9444433450698853, + "loss_ib": 0.021024221554398537, + "step": 677 + }, + { + "ce_ib": 10.260880470275879, + "ce_orig": 1.0816290378570557, + "epoch": 0.19498166654684018, + "kl_loss": 0.9158464670181274, + "loss_ib": 0.019419346004724503, + "step": 678 + }, + { + "ce_ib": 12.340543746948242, + "ce_orig": 0.7932845950126648, + "epoch": 0.19498166654684018, + "kl_loss": 0.9657367467880249, + "loss_ib": 0.021997911855578423, + "step": 678 + }, + { + "ce_ib": 9.458576202392578, + "ce_orig": 1.1390806436538696, + "epoch": 0.19498166654684018, + "kl_loss": 0.8594547510147095, + "loss_ib": 0.018053123727440834, + "step": 678 + }, + { + "ce_ib": 7.780326843261719, + "ce_orig": 0.803799569606781, + "epoch": 0.19498166654684018, + "kl_loss": 0.9818826913833618, + "loss_ib": 0.01759915240108967, + "step": 678 + }, + { + "ce_ib": 4.1620354652404785, + "ce_orig": 0.2644416093826294, + "epoch": 0.1952692501258178, + "kl_loss": 1.0103613138198853, + "loss_ib": 0.014265649020671844, + "step": 679 + }, + { + "ce_ib": 10.257436752319336, + "ce_orig": 0.8702826499938965, + "epoch": 0.1952692501258178, + "kl_loss": 0.911620020866394, + "loss_ib": 0.019373636692762375, + "step": 679 + }, + { + "ce_ib": 9.919028282165527, + "ce_orig": 0.7849615216255188, + "epoch": 0.1952692501258178, + "kl_loss": 0.982805609703064, + "loss_ib": 0.019747084006667137, + "step": 679 + }, + { + "ce_ib": 8.560178756713867, + "ce_orig": 0.5670347809791565, + "epoch": 0.1952692501258178, + "kl_loss": 0.8926770687103271, + "loss_ib": 0.017486948519945145, + "step": 679 + }, + { + "epoch": 0.19555683370479546, + "grad_norm": 0.09906060248613358, + "learning_rate": 9.968808829344692e-06, + "loss": 0.8659, + "step": 680 + }, + { + "ce_ib": 5.249537944793701, + "ce_orig": 0.43986520171165466, + "epoch": 0.19555683370479546, + "kl_loss": 1.0424385070800781, + "loss_ib": 0.01567392237484455, + "step": 680 + }, + { + "ce_ib": 14.56251049041748, + "ce_orig": 1.4373424053192139, + "epoch": 0.19555683370479546, + "kl_loss": 0.9467419385910034, + "loss_ib": 0.0240299291908741, + "step": 680 + }, + { + "ce_ib": 7.976964473724365, + "ce_orig": 0.5848947763442993, + "epoch": 0.19555683370479546, + "kl_loss": 0.7993128299713135, + "loss_ib": 0.01597009226679802, + "step": 680 + }, + { + "ce_ib": 8.310443878173828, + "ce_orig": 0.49164944887161255, + "epoch": 0.19555683370479546, + "kl_loss": 0.8992608189582825, + "loss_ib": 0.01730305142700672, + "step": 680 + }, + { + "ce_ib": 13.469533920288086, + "ce_orig": 1.6232653856277466, + "epoch": 0.1958444172837731, + "kl_loss": 0.9035540819168091, + "loss_ib": 0.022505072876811028, + "step": 681 + }, + { + "ce_ib": 7.154322624206543, + "ce_orig": 0.5702813267707825, + "epoch": 0.1958444172837731, + "kl_loss": 0.8137340545654297, + "loss_ib": 0.015291662886738777, + "step": 681 + }, + { + "ce_ib": 7.098222255706787, + "ce_orig": 0.735628068447113, + "epoch": 0.1958444172837731, + "kl_loss": 0.8927323818206787, + "loss_ib": 0.016025545075535774, + "step": 681 + }, + { + "ce_ib": 5.824950695037842, + "ce_orig": 0.766068160533905, + "epoch": 0.1958444172837731, + "kl_loss": 0.8107069730758667, + "loss_ib": 0.013932020403444767, + "step": 681 + }, + { + "ce_ib": 10.2679443359375, + "ce_orig": 0.5897117853164673, + "epoch": 0.19613200086275073, + "kl_loss": 0.8887754678726196, + "loss_ib": 0.019155697897076607, + "step": 682 + }, + { + "ce_ib": 7.825944900512695, + "ce_orig": 1.018816351890564, + "epoch": 0.19613200086275073, + "kl_loss": 0.7450103759765625, + "loss_ib": 0.0152760474011302, + "step": 682 + }, + { + "ce_ib": 10.176102638244629, + "ce_orig": 0.9543701410293579, + "epoch": 0.19613200086275073, + "kl_loss": 0.9644123911857605, + "loss_ib": 0.01982022635638714, + "step": 682 + }, + { + "ce_ib": 11.218408584594727, + "ce_orig": 1.213329553604126, + "epoch": 0.19613200086275073, + "kl_loss": 0.8925070762634277, + "loss_ib": 0.020143479108810425, + "step": 682 + }, + { + "ce_ib": 11.08190631866455, + "ce_orig": 1.1313570737838745, + "epoch": 0.19641958444172838, + "kl_loss": 0.9318565130233765, + "loss_ib": 0.02040047198534012, + "step": 683 + }, + { + "ce_ib": 9.102522850036621, + "ce_orig": 1.1076076030731201, + "epoch": 0.19641958444172838, + "kl_loss": 0.9036017060279846, + "loss_ib": 0.018138539046049118, + "step": 683 + }, + { + "ce_ib": 8.345836639404297, + "ce_orig": 0.9177960753440857, + "epoch": 0.19641958444172838, + "kl_loss": 0.9092356562614441, + "loss_ib": 0.017438193783164024, + "step": 683 + }, + { + "ce_ib": 13.083810806274414, + "ce_orig": 1.4744526147842407, + "epoch": 0.19641958444172838, + "kl_loss": 0.8807121515274048, + "loss_ib": 0.021890930831432343, + "step": 683 + }, + { + "ce_ib": 9.102757453918457, + "ce_orig": 0.7407585978507996, + "epoch": 0.196707168020706, + "kl_loss": 0.9970856308937073, + "loss_ib": 0.019073612987995148, + "step": 684 + }, + { + "ce_ib": 8.29672908782959, + "ce_orig": 0.6851121187210083, + "epoch": 0.196707168020706, + "kl_loss": 0.9605600833892822, + "loss_ib": 0.017902329564094543, + "step": 684 + }, + { + "ce_ib": 8.740421295166016, + "ce_orig": 0.8621047735214233, + "epoch": 0.196707168020706, + "kl_loss": 0.8722232580184937, + "loss_ib": 0.017462654039263725, + "step": 684 + }, + { + "ce_ib": 7.918392658233643, + "ce_orig": 0.7503530979156494, + "epoch": 0.196707168020706, + "kl_loss": 0.7531979084014893, + "loss_ib": 0.015450372360646725, + "step": 684 + }, + { + "epoch": 0.19699475159968366, + "grad_norm": 0.08257844299077988, + "learning_rate": 9.967937324727446e-06, + "loss": 0.8724, + "step": 685 + }, + { + "ce_ib": 11.491483688354492, + "ce_orig": 1.5021545886993408, + "epoch": 0.19699475159968366, + "kl_loss": 0.8115209341049194, + "loss_ib": 0.0196066927164793, + "step": 685 + }, + { + "ce_ib": 13.129999160766602, + "ce_orig": 1.1804494857788086, + "epoch": 0.19699475159968366, + "kl_loss": 0.7936393022537231, + "loss_ib": 0.021066393703222275, + "step": 685 + }, + { + "ce_ib": 10.34216022491455, + "ce_orig": 1.2397124767303467, + "epoch": 0.19699475159968366, + "kl_loss": 0.7960874438285828, + "loss_ib": 0.01830303482711315, + "step": 685 + }, + { + "ce_ib": 11.881531715393066, + "ce_orig": 1.3413195610046387, + "epoch": 0.19699475159968366, + "kl_loss": 0.913062334060669, + "loss_ib": 0.02101215347647667, + "step": 685 + }, + { + "ce_ib": 6.886374473571777, + "ce_orig": 0.8009768128395081, + "epoch": 0.1972823351786613, + "kl_loss": 0.6990371942520142, + "loss_ib": 0.013876745477318764, + "step": 686 + }, + { + "ce_ib": 10.036053657531738, + "ce_orig": 1.1837598085403442, + "epoch": 0.1972823351786613, + "kl_loss": 0.8260252475738525, + "loss_ib": 0.01829630509018898, + "step": 686 + }, + { + "ce_ib": 9.117568016052246, + "ce_orig": 0.6687161922454834, + "epoch": 0.1972823351786613, + "kl_loss": 0.8096814155578613, + "loss_ib": 0.01721438206732273, + "step": 686 + }, + { + "ce_ib": 8.085248947143555, + "ce_orig": 1.340101718902588, + "epoch": 0.1972823351786613, + "kl_loss": 0.7643517851829529, + "loss_ib": 0.015728766098618507, + "step": 686 + }, + { + "ce_ib": 11.919937133789062, + "ce_orig": 1.2236392498016357, + "epoch": 0.19756991875763893, + "kl_loss": 0.7394053339958191, + "loss_ib": 0.0193139910697937, + "step": 687 + }, + { + "ce_ib": 8.260920524597168, + "ce_orig": 0.9186649322509766, + "epoch": 0.19756991875763893, + "kl_loss": 0.7759698629379272, + "loss_ib": 0.016020620241761208, + "step": 687 + }, + { + "ce_ib": 8.8240966796875, + "ce_orig": 1.0625789165496826, + "epoch": 0.19756991875763893, + "kl_loss": 0.9100702404975891, + "loss_ib": 0.017924798652529716, + "step": 687 + }, + { + "ce_ib": 12.304547309875488, + "ce_orig": 0.936794638633728, + "epoch": 0.19756991875763893, + "kl_loss": 0.930564820766449, + "loss_ib": 0.021610194817185402, + "step": 687 + }, + { + "ce_ib": 8.338387489318848, + "ce_orig": 0.7846511602401733, + "epoch": 0.1978575023366166, + "kl_loss": 0.8309119939804077, + "loss_ib": 0.01664750836789608, + "step": 688 + }, + { + "ce_ib": 7.630675315856934, + "ce_orig": 0.6111834645271301, + "epoch": 0.1978575023366166, + "kl_loss": 0.9697389602661133, + "loss_ib": 0.017328064888715744, + "step": 688 + }, + { + "ce_ib": 11.70467472076416, + "ce_orig": 0.7941328287124634, + "epoch": 0.1978575023366166, + "kl_loss": 0.8729178309440613, + "loss_ib": 0.020433852449059486, + "step": 688 + }, + { + "ce_ib": 15.2159423828125, + "ce_orig": 1.038719654083252, + "epoch": 0.1978575023366166, + "kl_loss": 0.9375836253166199, + "loss_ib": 0.024591779336333275, + "step": 688 + }, + { + "ce_ib": 9.57465648651123, + "ce_orig": 0.7560713887214661, + "epoch": 0.1981450859155942, + "kl_loss": 0.8299669027328491, + "loss_ib": 0.017874324694275856, + "step": 689 + }, + { + "ce_ib": 7.711019515991211, + "ce_orig": 0.8740010261535645, + "epoch": 0.1981450859155942, + "kl_loss": 0.9161753058433533, + "loss_ib": 0.016872772946953773, + "step": 689 + }, + { + "ce_ib": 8.346826553344727, + "ce_orig": 0.6488251686096191, + "epoch": 0.1981450859155942, + "kl_loss": 0.9876278042793274, + "loss_ib": 0.018223104998469353, + "step": 689 + }, + { + "ce_ib": 11.599117279052734, + "ce_orig": 1.0851843357086182, + "epoch": 0.1981450859155942, + "kl_loss": 0.8547726273536682, + "loss_ib": 0.020146843045949936, + "step": 689 + }, + { + "epoch": 0.19843266949457186, + "grad_norm": 0.09532356262207031, + "learning_rate": 9.9670538511961e-06, + "loss": 0.8666, + "step": 690 + }, + { + "ce_ib": 9.944104194641113, + "ce_orig": 0.9654097557067871, + "epoch": 0.19843266949457186, + "kl_loss": 0.996139645576477, + "loss_ib": 0.01990550011396408, + "step": 690 + }, + { + "ce_ib": 8.787851333618164, + "ce_orig": 0.6779507994651794, + "epoch": 0.19843266949457186, + "kl_loss": 0.9373599886894226, + "loss_ib": 0.018161451444029808, + "step": 690 + }, + { + "ce_ib": 7.5119452476501465, + "ce_orig": 1.0238131284713745, + "epoch": 0.19843266949457186, + "kl_loss": 0.6883217096328735, + "loss_ib": 0.014395162463188171, + "step": 690 + }, + { + "ce_ib": 12.74599552154541, + "ce_orig": 1.3570128679275513, + "epoch": 0.19843266949457186, + "kl_loss": 0.8598670363426208, + "loss_ib": 0.021344665437936783, + "step": 690 + }, + { + "ce_ib": 9.191486358642578, + "ce_orig": 0.5052329301834106, + "epoch": 0.1987202530735495, + "kl_loss": 0.8542582988739014, + "loss_ib": 0.017734069377183914, + "step": 691 + }, + { + "ce_ib": 10.381093978881836, + "ce_orig": 1.0137697458267212, + "epoch": 0.1987202530735495, + "kl_loss": 0.769364595413208, + "loss_ib": 0.01807473972439766, + "step": 691 + }, + { + "ce_ib": 8.68432331085205, + "ce_orig": 0.7807736992835999, + "epoch": 0.1987202530735495, + "kl_loss": 0.8298584222793579, + "loss_ib": 0.0169829074293375, + "step": 691 + }, + { + "ce_ib": 6.291802883148193, + "ce_orig": 0.3723978102207184, + "epoch": 0.1987202530735495, + "kl_loss": 0.8000516891479492, + "loss_ib": 0.014292319305241108, + "step": 691 + }, + { + "ce_ib": 12.264336585998535, + "ce_orig": 1.4954090118408203, + "epoch": 0.19900783665252714, + "kl_loss": 0.796958327293396, + "loss_ib": 0.020233919844031334, + "step": 692 + }, + { + "ce_ib": 10.001005172729492, + "ce_orig": 1.2661001682281494, + "epoch": 0.19900783665252714, + "kl_loss": 0.8150477409362793, + "loss_ib": 0.018151482567191124, + "step": 692 + }, + { + "ce_ib": 9.576811790466309, + "ce_orig": 1.3062323331832886, + "epoch": 0.19900783665252714, + "kl_loss": 0.7767390012741089, + "loss_ib": 0.017344200983643532, + "step": 692 + }, + { + "ce_ib": 6.84829044342041, + "ce_orig": 0.5050943493843079, + "epoch": 0.19900783665252714, + "kl_loss": 0.8827191591262817, + "loss_ib": 0.015675483271479607, + "step": 692 + }, + { + "ce_ib": 8.917977333068848, + "ce_orig": 1.199506163597107, + "epoch": 0.1992954202315048, + "kl_loss": 0.6274911165237427, + "loss_ib": 0.015192887745797634, + "step": 693 + }, + { + "ce_ib": 7.1223883628845215, + "ce_orig": 0.7409626841545105, + "epoch": 0.1992954202315048, + "kl_loss": 0.7926149368286133, + "loss_ib": 0.015048536472022533, + "step": 693 + }, + { + "ce_ib": 11.097723007202148, + "ce_orig": 1.1110416650772095, + "epoch": 0.1992954202315048, + "kl_loss": 0.8221895694732666, + "loss_ib": 0.019319618120789528, + "step": 693 + }, + { + "ce_ib": 10.933575630187988, + "ce_orig": 0.7388496994972229, + "epoch": 0.1992954202315048, + "kl_loss": 1.0315768718719482, + "loss_ib": 0.02124934457242489, + "step": 693 + }, + { + "ce_ib": 8.662981986999512, + "ce_orig": 0.5169118642807007, + "epoch": 0.1995830038104824, + "kl_loss": 0.9072721004486084, + "loss_ib": 0.01773570291697979, + "step": 694 + }, + { + "ce_ib": 11.462176322937012, + "ce_orig": 1.4820420742034912, + "epoch": 0.1995830038104824, + "kl_loss": 0.8469343185424805, + "loss_ib": 0.019931519404053688, + "step": 694 + }, + { + "ce_ib": 8.445666313171387, + "ce_orig": 0.9313681721687317, + "epoch": 0.1995830038104824, + "kl_loss": 0.6418800354003906, + "loss_ib": 0.014864466153085232, + "step": 694 + }, + { + "ce_ib": 7.5244975090026855, + "ce_orig": 0.591560423374176, + "epoch": 0.1995830038104824, + "kl_loss": 0.8560516834259033, + "loss_ib": 0.01608501374721527, + "step": 694 + }, + { + "epoch": 0.19987058738946006, + "grad_norm": 0.10591301321983337, + "learning_rate": 9.966158410879148e-06, + "loss": 0.9255, + "step": 695 + }, + { + "ce_ib": 9.375509262084961, + "ce_orig": 1.00591242313385, + "epoch": 0.19987058738946006, + "kl_loss": 0.9248544573783875, + "loss_ib": 0.01862405426800251, + "step": 695 + }, + { + "ce_ib": 11.656967163085938, + "ce_orig": 1.2776415348052979, + "epoch": 0.19987058738946006, + "kl_loss": 0.7392134666442871, + "loss_ib": 0.019049102440476418, + "step": 695 + }, + { + "ce_ib": 10.279980659484863, + "ce_orig": 0.7536091804504395, + "epoch": 0.19987058738946006, + "kl_loss": 0.7704986333847046, + "loss_ib": 0.017984967678785324, + "step": 695 + }, + { + "ce_ib": 9.143665313720703, + "ce_orig": 0.7292720079421997, + "epoch": 0.19987058738946006, + "kl_loss": 0.8170077800750732, + "loss_ib": 0.017313743010163307, + "step": 695 + }, + { + "ce_ib": 10.739947319030762, + "ce_orig": 0.5883066058158875, + "epoch": 0.20015817096843772, + "kl_loss": 0.9453576803207397, + "loss_ib": 0.020193524658679962, + "step": 696 + }, + { + "ce_ib": 8.81908130645752, + "ce_orig": 1.073760986328125, + "epoch": 0.20015817096843772, + "kl_loss": 0.6506307125091553, + "loss_ib": 0.015325388871133327, + "step": 696 + }, + { + "ce_ib": 5.927511692047119, + "ce_orig": 0.6327902674674988, + "epoch": 0.20015817096843772, + "kl_loss": 0.8255484104156494, + "loss_ib": 0.01418299600481987, + "step": 696 + }, + { + "ce_ib": 8.285160064697266, + "ce_orig": 0.7552601099014282, + "epoch": 0.20015817096843772, + "kl_loss": 1.0531952381134033, + "loss_ib": 0.01881711184978485, + "step": 696 + }, + { + "ce_ib": 15.350908279418945, + "ce_orig": 1.7327600717544556, + "epoch": 0.20044575454741534, + "kl_loss": 0.9541932940483093, + "loss_ib": 0.024892840534448624, + "step": 697 + }, + { + "ce_ib": 5.672478199005127, + "ce_orig": 0.4321776032447815, + "epoch": 0.20044575454741534, + "kl_loss": 1.0703692436218262, + "loss_ib": 0.01637617126107216, + "step": 697 + }, + { + "ce_ib": 9.48302936553955, + "ce_orig": 1.0044292211532593, + "epoch": 0.20044575454741534, + "kl_loss": 0.827777087688446, + "loss_ib": 0.017760800197720528, + "step": 697 + }, + { + "ce_ib": 12.105079650878906, + "ce_orig": 0.9517088532447815, + "epoch": 0.20044575454741534, + "kl_loss": 0.8866432905197144, + "loss_ib": 0.0209715124219656, + "step": 697 + }, + { + "ce_ib": 11.073482513427734, + "ce_orig": 1.1429903507232666, + "epoch": 0.200733338126393, + "kl_loss": 0.6459304690361023, + "loss_ib": 0.01753278635442257, + "step": 698 + }, + { + "ce_ib": 10.372661590576172, + "ce_orig": 0.9689992070198059, + "epoch": 0.200733338126393, + "kl_loss": 0.8759998679161072, + "loss_ib": 0.01913265883922577, + "step": 698 + }, + { + "ce_ib": 11.510743141174316, + "ce_orig": 0.5491771101951599, + "epoch": 0.200733338126393, + "kl_loss": 0.8505983352661133, + "loss_ib": 0.020016726106405258, + "step": 698 + }, + { + "ce_ib": 6.4225029945373535, + "ce_orig": 0.5128731727600098, + "epoch": 0.200733338126393, + "kl_loss": 0.6717511415481567, + "loss_ib": 0.013140014372766018, + "step": 698 + }, + { + "ce_ib": 13.731460571289062, + "ce_orig": 0.9959490895271301, + "epoch": 0.20102092170537061, + "kl_loss": 0.8257856369018555, + "loss_ib": 0.02198931574821472, + "step": 699 + }, + { + "ce_ib": 5.540718078613281, + "ce_orig": 0.5061084032058716, + "epoch": 0.20102092170537061, + "kl_loss": 0.750801146030426, + "loss_ib": 0.013048729859292507, + "step": 699 + }, + { + "ce_ib": 7.21422004699707, + "ce_orig": 0.7217543721199036, + "epoch": 0.20102092170537061, + "kl_loss": 0.7603013515472412, + "loss_ib": 0.014817233197391033, + "step": 699 + }, + { + "ce_ib": 9.123298645019531, + "ce_orig": 0.6771790981292725, + "epoch": 0.20102092170537061, + "kl_loss": 0.8053810596466064, + "loss_ib": 0.01717710867524147, + "step": 699 + }, + { + "epoch": 0.20130850528434827, + "grad_norm": 0.10600654780864716, + "learning_rate": 9.965251005933915e-06, + "loss": 0.8397, + "step": 700 + }, + { + "ce_ib": 6.327905654907227, + "ce_orig": 0.5939226746559143, + "epoch": 0.20130850528434827, + "kl_loss": 0.6826160550117493, + "loss_ib": 0.013154065236449242, + "step": 700 + }, + { + "ce_ib": 9.836183547973633, + "ce_orig": 0.9476636648178101, + "epoch": 0.20130850528434827, + "kl_loss": 0.7302706837654114, + "loss_ib": 0.01713889092206955, + "step": 700 + }, + { + "ce_ib": 11.0946683883667, + "ce_orig": 1.1720821857452393, + "epoch": 0.20130850528434827, + "kl_loss": 0.8669959306716919, + "loss_ib": 0.019764628261327744, + "step": 700 + }, + { + "ce_ib": 8.813426971435547, + "ce_orig": 1.1299982070922852, + "epoch": 0.20130850528434827, + "kl_loss": 0.6991374492645264, + "loss_ib": 0.015804801136255264, + "step": 700 + }, + { + "ce_ib": 8.754616737365723, + "ce_orig": 1.024243950843811, + "epoch": 0.20159608886332592, + "kl_loss": 0.6542124152183533, + "loss_ib": 0.01529674045741558, + "step": 701 + }, + { + "ce_ib": 7.708653450012207, + "ce_orig": 0.6619385480880737, + "epoch": 0.20159608886332592, + "kl_loss": 0.6982452273368835, + "loss_ib": 0.014691106043756008, + "step": 701 + }, + { + "ce_ib": 12.858272552490234, + "ce_orig": 1.6826236248016357, + "epoch": 0.20159608886332592, + "kl_loss": 0.752007007598877, + "loss_ib": 0.020378341898322105, + "step": 701 + }, + { + "ce_ib": 8.944458961486816, + "ce_orig": 1.0155476331710815, + "epoch": 0.20159608886332592, + "kl_loss": 0.7168185710906982, + "loss_ib": 0.016112644225358963, + "step": 701 + }, + { + "ce_ib": 13.010807991027832, + "ce_orig": 0.88566654920578, + "epoch": 0.20188367244230354, + "kl_loss": 0.6652591228485107, + "loss_ib": 0.01966339908540249, + "step": 702 + }, + { + "ce_ib": 10.586206436157227, + "ce_orig": 1.103887677192688, + "epoch": 0.20188367244230354, + "kl_loss": 0.8720508813858032, + "loss_ib": 0.019306715577840805, + "step": 702 + }, + { + "ce_ib": 10.615009307861328, + "ce_orig": 0.6724156141281128, + "epoch": 0.20188367244230354, + "kl_loss": 0.9320215582847595, + "loss_ib": 0.01993522420525551, + "step": 702 + }, + { + "ce_ib": 8.414154052734375, + "ce_orig": 0.5818290710449219, + "epoch": 0.20188367244230354, + "kl_loss": 0.7827242612838745, + "loss_ib": 0.016241395846009254, + "step": 702 + }, + { + "ce_ib": 12.710061073303223, + "ce_orig": 1.1648833751678467, + "epoch": 0.2021712560212812, + "kl_loss": 0.8374737501144409, + "loss_ib": 0.021084798499941826, + "step": 703 + }, + { + "ce_ib": 7.598687171936035, + "ce_orig": 0.5311350226402283, + "epoch": 0.2021712560212812, + "kl_loss": 0.6079769134521484, + "loss_ib": 0.013678456656634808, + "step": 703 + }, + { + "ce_ib": 9.277694702148438, + "ce_orig": 0.9339279532432556, + "epoch": 0.2021712560212812, + "kl_loss": 0.8527544736862183, + "loss_ib": 0.01780523918569088, + "step": 703 + }, + { + "ce_ib": 10.174999237060547, + "ce_orig": 1.0432989597320557, + "epoch": 0.2021712560212812, + "kl_loss": 0.7690742015838623, + "loss_ib": 0.0178657416254282, + "step": 703 + }, + { + "ce_ib": 14.90958309173584, + "ce_orig": 1.5392428636550903, + "epoch": 0.20245883960025882, + "kl_loss": 0.7166196703910828, + "loss_ib": 0.022075779736042023, + "step": 704 + }, + { + "ce_ib": 10.524937629699707, + "ce_orig": 0.6721472144126892, + "epoch": 0.20245883960025882, + "kl_loss": 0.9726794958114624, + "loss_ib": 0.02025173231959343, + "step": 704 + }, + { + "ce_ib": 5.51300048828125, + "ce_orig": 0.6144857406616211, + "epoch": 0.20245883960025882, + "kl_loss": 0.5522742867469788, + "loss_ib": 0.011035742238163948, + "step": 704 + }, + { + "ce_ib": 10.596753120422363, + "ce_orig": 1.102412223815918, + "epoch": 0.20245883960025882, + "kl_loss": 0.6392167806625366, + "loss_ib": 0.01698892004787922, + "step": 704 + }, + { + "epoch": 0.20274642317923647, + "grad_norm": 0.10856325924396515, + "learning_rate": 9.96433163854655e-06, + "loss": 0.8989, + "step": 705 + }, + { + "ce_ib": 8.712186813354492, + "ce_orig": 0.788031280040741, + "epoch": 0.20274642317923647, + "kl_loss": 1.01998770236969, + "loss_ib": 0.018912063911557198, + "step": 705 + }, + { + "ce_ib": 7.393013954162598, + "ce_orig": 0.6134517788887024, + "epoch": 0.20274642317923647, + "kl_loss": 0.6545846462249756, + "loss_ib": 0.013938860967755318, + "step": 705 + }, + { + "ce_ib": 8.60453987121582, + "ce_orig": 0.6714913249015808, + "epoch": 0.20274642317923647, + "kl_loss": 0.8573524951934814, + "loss_ib": 0.017178066074848175, + "step": 705 + }, + { + "ce_ib": 8.413896560668945, + "ce_orig": 0.7840592265129089, + "epoch": 0.20274642317923647, + "kl_loss": 0.671722412109375, + "loss_ib": 0.015131120570003986, + "step": 705 + }, + { + "ce_ib": 8.781254768371582, + "ce_orig": 1.1526238918304443, + "epoch": 0.20303400675821412, + "kl_loss": 0.579412579536438, + "loss_ib": 0.014575380831956863, + "step": 706 + }, + { + "ce_ib": 9.846917152404785, + "ce_orig": 0.8160407543182373, + "epoch": 0.20303400675821412, + "kl_loss": 0.7465524673461914, + "loss_ib": 0.017312441021203995, + "step": 706 + }, + { + "ce_ib": 7.303924560546875, + "ce_orig": 0.45040014386177063, + "epoch": 0.20303400675821412, + "kl_loss": 0.5369355082511902, + "loss_ib": 0.012673280201852322, + "step": 706 + }, + { + "ce_ib": 9.779562950134277, + "ce_orig": 0.7343361973762512, + "epoch": 0.20303400675821412, + "kl_loss": 0.6700093150138855, + "loss_ib": 0.016479656100273132, + "step": 706 + }, + { + "ce_ib": 13.944967269897461, + "ce_orig": 1.2236454486846924, + "epoch": 0.20332159033719174, + "kl_loss": 0.7157278060913086, + "loss_ib": 0.021102245897054672, + "step": 707 + }, + { + "ce_ib": 11.342824935913086, + "ce_orig": 1.2302511930465698, + "epoch": 0.20332159033719174, + "kl_loss": 0.6444662809371948, + "loss_ib": 0.01778748631477356, + "step": 707 + }, + { + "ce_ib": 12.286307334899902, + "ce_orig": 1.2609407901763916, + "epoch": 0.20332159033719174, + "kl_loss": 0.890746533870697, + "loss_ib": 0.021193772554397583, + "step": 707 + }, + { + "ce_ib": 8.960893630981445, + "ce_orig": 0.8889079093933105, + "epoch": 0.20332159033719174, + "kl_loss": 0.6222097873687744, + "loss_ib": 0.015182990580797195, + "step": 707 + }, + { + "ce_ib": 8.693357467651367, + "ce_orig": 0.9998373985290527, + "epoch": 0.2036091739161694, + "kl_loss": 0.615609884262085, + "loss_ib": 0.014849456027150154, + "step": 708 + }, + { + "ce_ib": 8.566075325012207, + "ce_orig": 0.7132963538169861, + "epoch": 0.2036091739161694, + "kl_loss": 0.6065419316291809, + "loss_ib": 0.014631494879722595, + "step": 708 + }, + { + "ce_ib": 10.03363037109375, + "ce_orig": 0.62996906042099, + "epoch": 0.2036091739161694, + "kl_loss": 0.7953388690948486, + "loss_ib": 0.017987018451094627, + "step": 708 + }, + { + "ce_ib": 4.836100101470947, + "ce_orig": 0.8530847430229187, + "epoch": 0.2036091739161694, + "kl_loss": 0.5656656622886658, + "loss_ib": 0.01049275603145361, + "step": 708 + }, + { + "ce_ib": 18.170930862426758, + "ce_orig": 2.1947522163391113, + "epoch": 0.20389675749514702, + "kl_loss": 0.7079334259033203, + "loss_ib": 0.0252502653747797, + "step": 709 + }, + { + "ce_ib": 7.828283309936523, + "ce_orig": 0.9819263219833374, + "epoch": 0.20389675749514702, + "kl_loss": 0.6801222562789917, + "loss_ib": 0.014629505574703217, + "step": 709 + }, + { + "ce_ib": 10.20206069946289, + "ce_orig": 1.1113237142562866, + "epoch": 0.20389675749514702, + "kl_loss": 0.7220378518104553, + "loss_ib": 0.01742243953049183, + "step": 709 + }, + { + "ce_ib": 10.351544380187988, + "ce_orig": 1.0983593463897705, + "epoch": 0.20389675749514702, + "kl_loss": 0.5909432172775269, + "loss_ib": 0.01626097597181797, + "step": 709 + }, + { + "epoch": 0.20418434107412467, + "grad_norm": 0.08941768109798431, + "learning_rate": 9.96340031093202e-06, + "loss": 0.9056, + "step": 710 + }, + { + "ce_ib": 11.825169563293457, + "ce_orig": 0.9450059533119202, + "epoch": 0.20418434107412467, + "kl_loss": 0.6044723391532898, + "loss_ib": 0.017869891598820686, + "step": 710 + }, + { + "ce_ib": 7.037207126617432, + "ce_orig": 0.7366377711296082, + "epoch": 0.20418434107412467, + "kl_loss": 0.6832977533340454, + "loss_ib": 0.013870184309780598, + "step": 710 + }, + { + "ce_ib": 5.916614055633545, + "ce_orig": 0.4967573285102844, + "epoch": 0.20418434107412467, + "kl_loss": 0.7460091710090637, + "loss_ib": 0.013376705348491669, + "step": 710 + }, + { + "ce_ib": 6.968353271484375, + "ce_orig": 0.7892439961433411, + "epoch": 0.20418434107412467, + "kl_loss": 0.6605713963508606, + "loss_ib": 0.013574067503213882, + "step": 710 + }, + { + "ce_ib": 6.03911018371582, + "ce_orig": 0.5401313304901123, + "epoch": 0.20447192465310232, + "kl_loss": 0.6317548155784607, + "loss_ib": 0.012356657534837723, + "step": 711 + }, + { + "ce_ib": 14.128073692321777, + "ce_orig": 1.5721606016159058, + "epoch": 0.20447192465310232, + "kl_loss": 0.6152846813201904, + "loss_ib": 0.02028091810643673, + "step": 711 + }, + { + "ce_ib": 13.067774772644043, + "ce_orig": 1.452383279800415, + "epoch": 0.20447192465310232, + "kl_loss": 0.6436014175415039, + "loss_ib": 0.019503789022564888, + "step": 711 + }, + { + "ce_ib": 8.091301918029785, + "ce_orig": 0.7273240685462952, + "epoch": 0.20447192465310232, + "kl_loss": 0.6026842594146729, + "loss_ib": 0.014118144288659096, + "step": 711 + }, + { + "ce_ib": 8.550193786621094, + "ce_orig": 0.7258903980255127, + "epoch": 0.20475950823207995, + "kl_loss": 0.6672005653381348, + "loss_ib": 0.015222198329865932, + "step": 712 + }, + { + "ce_ib": 6.334939002990723, + "ce_orig": 0.5934350490570068, + "epoch": 0.20475950823207995, + "kl_loss": 0.5275875329971313, + "loss_ib": 0.011610814370214939, + "step": 712 + }, + { + "ce_ib": 7.126665115356445, + "ce_orig": 0.6666408181190491, + "epoch": 0.20475950823207995, + "kl_loss": 0.5349841117858887, + "loss_ib": 0.01247650571167469, + "step": 712 + }, + { + "ce_ib": 10.216915130615234, + "ce_orig": 0.9188041090965271, + "epoch": 0.20475950823207995, + "kl_loss": 0.7570939064025879, + "loss_ib": 0.017787855118513107, + "step": 712 + }, + { + "ce_ib": 9.504396438598633, + "ce_orig": 0.8975003957748413, + "epoch": 0.2050470918110576, + "kl_loss": 0.6097875833511353, + "loss_ib": 0.01560227107256651, + "step": 713 + }, + { + "ce_ib": 9.296903610229492, + "ce_orig": 1.331569790840149, + "epoch": 0.2050470918110576, + "kl_loss": 0.6761667728424072, + "loss_ib": 0.01605857163667679, + "step": 713 + }, + { + "ce_ib": 7.583889961242676, + "ce_orig": 0.712594211101532, + "epoch": 0.2050470918110576, + "kl_loss": 0.5781969428062439, + "loss_ib": 0.013365860097110271, + "step": 713 + }, + { + "ce_ib": 11.214754104614258, + "ce_orig": 1.008966088294983, + "epoch": 0.2050470918110576, + "kl_loss": 0.5274929404258728, + "loss_ib": 0.016489684581756592, + "step": 713 + }, + { + "ce_ib": 13.248461723327637, + "ce_orig": 1.7106783390045166, + "epoch": 0.20533467539003522, + "kl_loss": 0.4616009593009949, + "loss_ib": 0.017864469438791275, + "step": 714 + }, + { + "ce_ib": 10.139266967773438, + "ce_orig": 0.7416722178459167, + "epoch": 0.20533467539003522, + "kl_loss": 0.49146589636802673, + "loss_ib": 0.015053926035761833, + "step": 714 + }, + { + "ce_ib": 6.59840202331543, + "ce_orig": 0.7029027342796326, + "epoch": 0.20533467539003522, + "kl_loss": 0.5324534177780151, + "loss_ib": 0.011922935955226421, + "step": 714 + }, + { + "ce_ib": 8.762123107910156, + "ce_orig": 0.8172003030776978, + "epoch": 0.20533467539003522, + "kl_loss": 0.5413413047790527, + "loss_ib": 0.0141755361109972, + "step": 714 + }, + { + "epoch": 0.20562225896901287, + "grad_norm": 0.09573056548833847, + "learning_rate": 9.962457025334114e-06, + "loss": 0.8855, + "step": 715 + }, + { + "ce_ib": 8.281875610351562, + "ce_orig": 0.8879901170730591, + "epoch": 0.20562225896901287, + "kl_loss": 0.47865283489227295, + "loss_ib": 0.01306840404868126, + "step": 715 + }, + { + "ce_ib": 7.326986789703369, + "ce_orig": 0.7315509915351868, + "epoch": 0.20562225896901287, + "kl_loss": 0.5715488195419312, + "loss_ib": 0.01304247509688139, + "step": 715 + }, + { + "ce_ib": 9.936646461486816, + "ce_orig": 0.5313572883605957, + "epoch": 0.20562225896901287, + "kl_loss": 0.7098532915115356, + "loss_ib": 0.01703517884016037, + "step": 715 + }, + { + "ce_ib": 8.325387954711914, + "ce_orig": 1.0558624267578125, + "epoch": 0.20562225896901287, + "kl_loss": 0.510680079460144, + "loss_ib": 0.013432187959551811, + "step": 715 + }, + { + "ce_ib": 6.32294225692749, + "ce_orig": 0.755020260810852, + "epoch": 0.2059098425479905, + "kl_loss": 0.4962891638278961, + "loss_ib": 0.011285834014415741, + "step": 716 + }, + { + "ce_ib": 6.709957122802734, + "ce_orig": 0.7449535131454468, + "epoch": 0.2059098425479905, + "kl_loss": 0.5581060647964478, + "loss_ib": 0.012291017919778824, + "step": 716 + }, + { + "ce_ib": 11.16142749786377, + "ce_orig": 1.2088764905929565, + "epoch": 0.2059098425479905, + "kl_loss": 0.5308176875114441, + "loss_ib": 0.016469605267047882, + "step": 716 + }, + { + "ce_ib": 8.455696105957031, + "ce_orig": 0.8597549200057983, + "epoch": 0.2059098425479905, + "kl_loss": 0.5310311317443848, + "loss_ib": 0.013766007497906685, + "step": 716 + }, + { + "ce_ib": 9.843843460083008, + "ce_orig": 0.9751378297805786, + "epoch": 0.20619742612696815, + "kl_loss": 0.7993011474609375, + "loss_ib": 0.017836853861808777, + "step": 717 + }, + { + "ce_ib": 8.898174285888672, + "ce_orig": 0.609527051448822, + "epoch": 0.20619742612696815, + "kl_loss": 0.9439896941184998, + "loss_ib": 0.018338071182370186, + "step": 717 + }, + { + "ce_ib": 12.661697387695312, + "ce_orig": 1.3917475938796997, + "epoch": 0.20619742612696815, + "kl_loss": 0.6134680509567261, + "loss_ib": 0.01879637874662876, + "step": 717 + }, + { + "ce_ib": 4.8259477615356445, + "ce_orig": 0.48925209045410156, + "epoch": 0.20619742612696815, + "kl_loss": 0.5553791522979736, + "loss_ib": 0.010379738174378872, + "step": 717 + }, + { + "ce_ib": 12.544927597045898, + "ce_orig": 0.7721540331840515, + "epoch": 0.2064850097059458, + "kl_loss": 0.4881455898284912, + "loss_ib": 0.01742638275027275, + "step": 718 + }, + { + "ce_ib": 10.139900207519531, + "ce_orig": 0.9012062549591064, + "epoch": 0.2064850097059458, + "kl_loss": 0.637915313243866, + "loss_ib": 0.016519052907824516, + "step": 718 + }, + { + "ce_ib": 10.471162796020508, + "ce_orig": 1.1879448890686035, + "epoch": 0.2064850097059458, + "kl_loss": 0.8380963802337646, + "loss_ib": 0.018852125853300095, + "step": 718 + }, + { + "ce_ib": 9.023296356201172, + "ce_orig": 0.45530980825424194, + "epoch": 0.2064850097059458, + "kl_loss": 0.6838054656982422, + "loss_ib": 0.015861351042985916, + "step": 718 + }, + { + "ce_ib": 13.007994651794434, + "ce_orig": 1.3200677633285522, + "epoch": 0.20677259328492342, + "kl_loss": 0.5242291688919067, + "loss_ib": 0.01825028657913208, + "step": 719 + }, + { + "ce_ib": 12.970458030700684, + "ce_orig": 1.702121376991272, + "epoch": 0.20677259328492342, + "kl_loss": 0.502326250076294, + "loss_ib": 0.01799372024834156, + "step": 719 + }, + { + "ce_ib": 5.795266151428223, + "ce_orig": 0.5755612254142761, + "epoch": 0.20677259328492342, + "kl_loss": 0.5967477560043335, + "loss_ib": 0.011762742884457111, + "step": 719 + }, + { + "ce_ib": 10.548118591308594, + "ce_orig": 0.9648749828338623, + "epoch": 0.20677259328492342, + "kl_loss": 0.5070229768753052, + "loss_ib": 0.015618347562849522, + "step": 719 + }, + { + "epoch": 0.20706017686390107, + "grad_norm": 0.09754003584384918, + "learning_rate": 9.961501784025423e-06, + "loss": 0.8849, + "step": 720 + }, + { + "ce_ib": 6.8866190910339355, + "ce_orig": 0.45583146810531616, + "epoch": 0.20706017686390107, + "kl_loss": 0.4389684200286865, + "loss_ib": 0.011276302859187126, + "step": 720 + }, + { + "ce_ib": 12.2448148727417, + "ce_orig": 1.3936138153076172, + "epoch": 0.20706017686390107, + "kl_loss": 0.42197367548942566, + "loss_ib": 0.01646455191075802, + "step": 720 + }, + { + "ce_ib": 8.876269340515137, + "ce_orig": 0.5300930738449097, + "epoch": 0.20706017686390107, + "kl_loss": 0.4920913577079773, + "loss_ib": 0.013797182589769363, + "step": 720 + }, + { + "ce_ib": 7.045018672943115, + "ce_orig": 0.7694444060325623, + "epoch": 0.20706017686390107, + "kl_loss": 0.502578854560852, + "loss_ib": 0.012070806697010994, + "step": 720 + }, + { + "ce_ib": 8.191524505615234, + "ce_orig": 0.5949411988258362, + "epoch": 0.2073477604428787, + "kl_loss": 0.6976209878921509, + "loss_ib": 0.01516773458570242, + "step": 721 + }, + { + "ce_ib": 8.799589157104492, + "ce_orig": 0.6730172634124756, + "epoch": 0.2073477604428787, + "kl_loss": 0.6129693388938904, + "loss_ib": 0.01492928247898817, + "step": 721 + }, + { + "ce_ib": 9.574918746948242, + "ce_orig": 1.5401928424835205, + "epoch": 0.2073477604428787, + "kl_loss": 0.4680205285549164, + "loss_ib": 0.014255124144256115, + "step": 721 + }, + { + "ce_ib": 11.500478744506836, + "ce_orig": 1.1516295671463013, + "epoch": 0.2073477604428787, + "kl_loss": 0.5712054967880249, + "loss_ib": 0.017212534323334694, + "step": 721 + }, + { + "ce_ib": 6.9377641677856445, + "ce_orig": 0.47552016377449036, + "epoch": 0.20763534402185635, + "kl_loss": 0.5818679332733154, + "loss_ib": 0.01275644265115261, + "step": 722 + }, + { + "ce_ib": 7.533665180206299, + "ce_orig": 0.8571917414665222, + "epoch": 0.20763534402185635, + "kl_loss": 0.4062355160713196, + "loss_ib": 0.011596020311117172, + "step": 722 + }, + { + "ce_ib": 4.523435592651367, + "ce_orig": 0.5144174098968506, + "epoch": 0.20763534402185635, + "kl_loss": 0.46129077672958374, + "loss_ib": 0.009136342443525791, + "step": 722 + }, + { + "ce_ib": 5.738825798034668, + "ce_orig": 0.3778877556324005, + "epoch": 0.20763534402185635, + "kl_loss": 0.4390355944633484, + "loss_ib": 0.010129181668162346, + "step": 722 + }, + { + "ce_ib": 6.621108055114746, + "ce_orig": 0.5974579453468323, + "epoch": 0.207922927600834, + "kl_loss": 0.444467157125473, + "loss_ib": 0.011065779253840446, + "step": 723 + }, + { + "ce_ib": 9.774152755737305, + "ce_orig": 0.6996477246284485, + "epoch": 0.207922927600834, + "kl_loss": 0.605032742023468, + "loss_ib": 0.01582447998225689, + "step": 723 + }, + { + "ce_ib": 8.50704574584961, + "ce_orig": 0.8165543079376221, + "epoch": 0.207922927600834, + "kl_loss": 0.6351085901260376, + "loss_ib": 0.014858131296932697, + "step": 723 + }, + { + "ce_ib": 5.1617231369018555, + "ce_orig": 0.5427976250648499, + "epoch": 0.207922927600834, + "kl_loss": 0.4432010054588318, + "loss_ib": 0.009593733586370945, + "step": 723 + }, + { + "ce_ib": 10.265229225158691, + "ce_orig": 0.3247712552547455, + "epoch": 0.20821051117981162, + "kl_loss": 0.9256395697593689, + "loss_ib": 0.019521623849868774, + "step": 724 + }, + { + "ce_ib": 12.566752433776855, + "ce_orig": 0.9292450547218323, + "epoch": 0.20821051117981162, + "kl_loss": 0.5198833346366882, + "loss_ib": 0.017765585333108902, + "step": 724 + }, + { + "ce_ib": 4.4857707023620605, + "ce_orig": 0.13291123509407043, + "epoch": 0.20821051117981162, + "kl_loss": 0.8171424865722656, + "loss_ib": 0.012657195329666138, + "step": 724 + }, + { + "ce_ib": 11.237386703491211, + "ce_orig": 1.4978471994400024, + "epoch": 0.20821051117981162, + "kl_loss": 0.6373554468154907, + "loss_ib": 0.01761094108223915, + "step": 724 + }, + { + "epoch": 0.20849809475878928, + "grad_norm": 0.08204614371061325, + "learning_rate": 9.960534589307342e-06, + "loss": 0.9127, + "step": 725 + }, + { + "ce_ib": 6.750695705413818, + "ce_orig": 0.7500053644180298, + "epoch": 0.20849809475878928, + "kl_loss": 0.44563400745391846, + "loss_ib": 0.0112070357427001, + "step": 725 + }, + { + "ce_ib": 7.751323223114014, + "ce_orig": 1.0083248615264893, + "epoch": 0.20849809475878928, + "kl_loss": 0.41058549284935, + "loss_ib": 0.011857178062200546, + "step": 725 + }, + { + "ce_ib": 8.529701232910156, + "ce_orig": 0.73545902967453, + "epoch": 0.20849809475878928, + "kl_loss": 0.5081138014793396, + "loss_ib": 0.01361083984375, + "step": 725 + }, + { + "ce_ib": 6.2612080574035645, + "ce_orig": 0.5796510577201843, + "epoch": 0.20849809475878928, + "kl_loss": 0.39641040563583374, + "loss_ib": 0.010225312784314156, + "step": 725 + }, + { + "ce_ib": 6.802225112915039, + "ce_orig": 0.6491565704345703, + "epoch": 0.2087856783377669, + "kl_loss": 0.40076354146003723, + "loss_ib": 0.010809860192239285, + "step": 726 + }, + { + "ce_ib": 10.529885292053223, + "ce_orig": 0.8254870772361755, + "epoch": 0.2087856783377669, + "kl_loss": 0.5448965430259705, + "loss_ib": 0.015978850424289703, + "step": 726 + }, + { + "ce_ib": 12.080382347106934, + "ce_orig": 1.3747988939285278, + "epoch": 0.2087856783377669, + "kl_loss": 0.607007622718811, + "loss_ib": 0.018150458112359047, + "step": 726 + }, + { + "ce_ib": 13.895086288452148, + "ce_orig": 1.569737195968628, + "epoch": 0.2087856783377669, + "kl_loss": 0.41842395067214966, + "loss_ib": 0.018079325556755066, + "step": 726 + }, + { + "ce_ib": 10.388771057128906, + "ce_orig": 0.8246784806251526, + "epoch": 0.20907326191674455, + "kl_loss": 0.6921413540840149, + "loss_ib": 0.017310185357928276, + "step": 727 + }, + { + "ce_ib": 10.750786781311035, + "ce_orig": 0.6747448444366455, + "epoch": 0.20907326191674455, + "kl_loss": 0.46249350905418396, + "loss_ib": 0.015375722199678421, + "step": 727 + }, + { + "ce_ib": 8.939618110656738, + "ce_orig": 0.611909806728363, + "epoch": 0.20907326191674455, + "kl_loss": 0.4692227244377136, + "loss_ib": 0.013631845824420452, + "step": 727 + }, + { + "ce_ib": 5.823955059051514, + "ce_orig": 0.5695940852165222, + "epoch": 0.20907326191674455, + "kl_loss": 0.4087258577346802, + "loss_ib": 0.00991121307015419, + "step": 727 + }, + { + "ce_ib": 7.428273677825928, + "ce_orig": 0.6111478805541992, + "epoch": 0.2093608454957222, + "kl_loss": 0.4407083988189697, + "loss_ib": 0.01183535810559988, + "step": 728 + }, + { + "ce_ib": 8.432186126708984, + "ce_orig": 0.7240220308303833, + "epoch": 0.2093608454957222, + "kl_loss": 0.5889390707015991, + "loss_ib": 0.014321576803922653, + "step": 728 + }, + { + "ce_ib": 9.506328582763672, + "ce_orig": 0.885880708694458, + "epoch": 0.2093608454957222, + "kl_loss": 0.39755725860595703, + "loss_ib": 0.013481900095939636, + "step": 728 + }, + { + "ce_ib": 10.747995376586914, + "ce_orig": 0.9302851557731628, + "epoch": 0.2093608454957222, + "kl_loss": 0.6458637118339539, + "loss_ib": 0.01720663346350193, + "step": 728 + }, + { + "ce_ib": 8.671708106994629, + "ce_orig": 0.6951517462730408, + "epoch": 0.20964842907469983, + "kl_loss": 0.547339916229248, + "loss_ib": 0.014145107008516788, + "step": 729 + }, + { + "ce_ib": 9.09277629852295, + "ce_orig": 0.6532163023948669, + "epoch": 0.20964842907469983, + "kl_loss": 0.4491899609565735, + "loss_ib": 0.013584675267338753, + "step": 729 + }, + { + "ce_ib": 8.521883964538574, + "ce_orig": 0.26346156001091003, + "epoch": 0.20964842907469983, + "kl_loss": 0.7473446130752563, + "loss_ib": 0.0159953311085701, + "step": 729 + }, + { + "ce_ib": 8.46525764465332, + "ce_orig": 0.761343777179718, + "epoch": 0.20964842907469983, + "kl_loss": 0.49551889300346375, + "loss_ib": 0.013420446775853634, + "step": 729 + }, + { + "epoch": 0.20993601265367748, + "grad_norm": 0.10517257452011108, + "learning_rate": 9.959555443510074e-06, + "loss": 0.8883, + "step": 730 + }, + { + "ce_ib": 9.539706230163574, + "ce_orig": 0.8811664581298828, + "epoch": 0.20993601265367748, + "kl_loss": 0.4699278473854065, + "loss_ib": 0.014238984324038029, + "step": 730 + }, + { + "ce_ib": 10.79475212097168, + "ce_orig": 0.9388590455055237, + "epoch": 0.20993601265367748, + "kl_loss": 1.093339443206787, + "loss_ib": 0.0217281486839056, + "step": 730 + }, + { + "ce_ib": 11.920876502990723, + "ce_orig": 1.155131220817566, + "epoch": 0.20993601265367748, + "kl_loss": 0.49514299631118774, + "loss_ib": 0.016872305423021317, + "step": 730 + }, + { + "ce_ib": 12.770062446594238, + "ce_orig": 1.2717667818069458, + "epoch": 0.20993601265367748, + "kl_loss": 0.467684268951416, + "loss_ib": 0.017446905374526978, + "step": 730 + }, + { + "ce_ib": 9.915130615234375, + "ce_orig": 0.7397444248199463, + "epoch": 0.2102235962326551, + "kl_loss": 0.5744942426681519, + "loss_ib": 0.01566007360816002, + "step": 731 + }, + { + "ce_ib": 11.337221145629883, + "ce_orig": 0.6278934478759766, + "epoch": 0.2102235962326551, + "kl_loss": 0.47471189498901367, + "loss_ib": 0.0160843413323164, + "step": 731 + }, + { + "ce_ib": 6.727563858032227, + "ce_orig": 0.5554884076118469, + "epoch": 0.2102235962326551, + "kl_loss": 0.38505086302757263, + "loss_ib": 0.01057807169854641, + "step": 731 + }, + { + "ce_ib": 4.216444492340088, + "ce_orig": 0.15175974369049072, + "epoch": 0.2102235962326551, + "kl_loss": 0.7317667007446289, + "loss_ib": 0.011534111574292183, + "step": 731 + }, + { + "ce_ib": 6.452449798583984, + "ce_orig": 0.48745986819267273, + "epoch": 0.21051117981163275, + "kl_loss": 0.4054802656173706, + "loss_ib": 0.0105072520673275, + "step": 732 + }, + { + "ce_ib": 7.071081161499023, + "ce_orig": 0.9309906363487244, + "epoch": 0.21051117981163275, + "kl_loss": 0.37749940156936646, + "loss_ib": 0.010846075601875782, + "step": 732 + }, + { + "ce_ib": 11.04909896850586, + "ce_orig": 0.874702513217926, + "epoch": 0.21051117981163275, + "kl_loss": 0.4390341639518738, + "loss_ib": 0.015439440496265888, + "step": 732 + }, + { + "ce_ib": 7.799300193786621, + "ce_orig": 0.8155895471572876, + "epoch": 0.21051117981163275, + "kl_loss": 0.4740564823150635, + "loss_ib": 0.01253986544907093, + "step": 732 + }, + { + "ce_ib": 7.702383995056152, + "ce_orig": 0.5887701511383057, + "epoch": 0.2107987633906104, + "kl_loss": 0.5023674964904785, + "loss_ib": 0.01272605825215578, + "step": 733 + }, + { + "ce_ib": 8.737090110778809, + "ce_orig": 0.8836251497268677, + "epoch": 0.2107987633906104, + "kl_loss": 0.48343145847320557, + "loss_ib": 0.013571404851973057, + "step": 733 + }, + { + "ce_ib": 9.702954292297363, + "ce_orig": 0.9348666071891785, + "epoch": 0.2107987633906104, + "kl_loss": 0.7717263698577881, + "loss_ib": 0.017420217394828796, + "step": 733 + }, + { + "ce_ib": 9.321721076965332, + "ce_orig": 0.6164664626121521, + "epoch": 0.2107987633906104, + "kl_loss": 0.5251142382621765, + "loss_ib": 0.014572863467037678, + "step": 733 + }, + { + "ce_ib": 10.808871269226074, + "ce_orig": 0.7415775656700134, + "epoch": 0.21108634696958803, + "kl_loss": 0.5616943836212158, + "loss_ib": 0.01642581634223461, + "step": 734 + }, + { + "ce_ib": 9.013411521911621, + "ce_orig": 0.6960151195526123, + "epoch": 0.21108634696958803, + "kl_loss": 0.45591843128204346, + "loss_ib": 0.013572595082223415, + "step": 734 + }, + { + "ce_ib": 10.70376205444336, + "ce_orig": 0.8505563735961914, + "epoch": 0.21108634696958803, + "kl_loss": 0.4155931770801544, + "loss_ib": 0.014859694056212902, + "step": 734 + }, + { + "ce_ib": 10.576190948486328, + "ce_orig": 1.013388752937317, + "epoch": 0.21108634696958803, + "kl_loss": 0.5600894689559937, + "loss_ib": 0.016177086159586906, + "step": 734 + }, + { + "epoch": 0.21137393054856568, + "grad_norm": 0.10216815024614334, + "learning_rate": 9.958564348992604e-06, + "loss": 0.9112, + "step": 735 + }, + { + "ce_ib": 8.351442337036133, + "ce_orig": 0.6271442174911499, + "epoch": 0.21137393054856568, + "kl_loss": 0.45761042833328247, + "loss_ib": 0.012927546165883541, + "step": 735 + }, + { + "ce_ib": 10.175559997558594, + "ce_orig": 0.9649935960769653, + "epoch": 0.21137393054856568, + "kl_loss": 0.5237610340118408, + "loss_ib": 0.01541317068040371, + "step": 735 + }, + { + "ce_ib": 11.151328086853027, + "ce_orig": 1.160567045211792, + "epoch": 0.21137393054856568, + "kl_loss": 0.44404470920562744, + "loss_ib": 0.015591775067150593, + "step": 735 + }, + { + "ce_ib": 8.23231029510498, + "ce_orig": 0.8223357200622559, + "epoch": 0.21137393054856568, + "kl_loss": 0.4122684597969055, + "loss_ib": 0.012354995124042034, + "step": 735 + }, + { + "ce_ib": 9.226214408874512, + "ce_orig": 1.0122361183166504, + "epoch": 0.2116615141275433, + "kl_loss": 0.3391454219818115, + "loss_ib": 0.012617669068276882, + "step": 736 + }, + { + "ce_ib": 12.772435188293457, + "ce_orig": 1.63818359375, + "epoch": 0.2116615141275433, + "kl_loss": 0.6720362901687622, + "loss_ib": 0.019492797553539276, + "step": 736 + }, + { + "ce_ib": 10.953327178955078, + "ce_orig": 1.301689624786377, + "epoch": 0.2116615141275433, + "kl_loss": 0.4017236828804016, + "loss_ib": 0.014970564283430576, + "step": 736 + }, + { + "ce_ib": 13.185845375061035, + "ce_orig": 1.3509107828140259, + "epoch": 0.2116615141275433, + "kl_loss": 0.3720896244049072, + "loss_ib": 0.0169067420065403, + "step": 736 + }, + { + "ce_ib": 5.992466449737549, + "ce_orig": 0.6994779706001282, + "epoch": 0.21194909770652096, + "kl_loss": 0.37846922874450684, + "loss_ib": 0.009777158498764038, + "step": 737 + }, + { + "ce_ib": 4.946014404296875, + "ce_orig": 0.448905885219574, + "epoch": 0.21194909770652096, + "kl_loss": 0.3910108208656311, + "loss_ib": 0.008856122381985188, + "step": 737 + }, + { + "ce_ib": 8.710723876953125, + "ce_orig": 0.955891489982605, + "epoch": 0.21194909770652096, + "kl_loss": 0.3540381193161011, + "loss_ib": 0.012251105159521103, + "step": 737 + }, + { + "ce_ib": 9.716163635253906, + "ce_orig": 0.777677595615387, + "epoch": 0.21194909770652096, + "kl_loss": 0.840921938419342, + "loss_ib": 0.0181253831833601, + "step": 737 + }, + { + "ce_ib": 9.742884635925293, + "ce_orig": 0.8019170761108398, + "epoch": 0.2122366812854986, + "kl_loss": 0.4170638918876648, + "loss_ib": 0.013913523405790329, + "step": 738 + }, + { + "ce_ib": 9.633951187133789, + "ce_orig": 0.9468701481819153, + "epoch": 0.2122366812854986, + "kl_loss": 0.44602394104003906, + "loss_ib": 0.014094190672039986, + "step": 738 + }, + { + "ce_ib": 9.999152183532715, + "ce_orig": 1.5994606018066406, + "epoch": 0.2122366812854986, + "kl_loss": 0.34741610288619995, + "loss_ib": 0.013473312370479107, + "step": 738 + }, + { + "ce_ib": 7.614631175994873, + "ce_orig": 0.44442856311798096, + "epoch": 0.2122366812854986, + "kl_loss": 0.42184555530548096, + "loss_ib": 0.011833085678517818, + "step": 738 + }, + { + "ce_ib": 3.805742025375366, + "ce_orig": 0.3643724024295807, + "epoch": 0.21252426486447623, + "kl_loss": 0.73167484998703, + "loss_ib": 0.011122490279376507, + "step": 739 + }, + { + "ce_ib": 7.001569747924805, + "ce_orig": 0.6076573133468628, + "epoch": 0.21252426486447623, + "kl_loss": 0.3848869204521179, + "loss_ib": 0.010850438848137856, + "step": 739 + }, + { + "ce_ib": 6.058032035827637, + "ce_orig": 0.9139571785926819, + "epoch": 0.21252426486447623, + "kl_loss": 0.43339502811431885, + "loss_ib": 0.010391981340944767, + "step": 739 + }, + { + "ce_ib": 9.221721649169922, + "ce_orig": 1.1071618795394897, + "epoch": 0.21252426486447623, + "kl_loss": 0.3810133934020996, + "loss_ib": 0.01303185522556305, + "step": 739 + }, + { + "epoch": 0.21281184844345388, + "grad_norm": 0.09019612520933151, + "learning_rate": 9.95756130814271e-06, + "loss": 0.8816, + "step": 740 + }, + { + "ce_ib": 9.799250602722168, + "ce_orig": 0.7617897987365723, + "epoch": 0.21281184844345388, + "kl_loss": 0.46645045280456543, + "loss_ib": 0.014463755302131176, + "step": 740 + }, + { + "ce_ib": 8.431238174438477, + "ce_orig": 0.7609443068504333, + "epoch": 0.21281184844345388, + "kl_loss": 0.353823721408844, + "loss_ib": 0.011969475075602531, + "step": 740 + }, + { + "ce_ib": 6.829717636108398, + "ce_orig": 0.6648247241973877, + "epoch": 0.21281184844345388, + "kl_loss": 0.3253084421157837, + "loss_ib": 0.01008280273526907, + "step": 740 + }, + { + "ce_ib": 9.891703605651855, + "ce_orig": 1.1517934799194336, + "epoch": 0.21281184844345388, + "kl_loss": 0.43470141291618347, + "loss_ib": 0.014238717034459114, + "step": 740 + }, + { + "ce_ib": 6.036440372467041, + "ce_orig": 0.5608431696891785, + "epoch": 0.2130994320224315, + "kl_loss": 0.5712899565696716, + "loss_ib": 0.01174934022128582, + "step": 741 + }, + { + "ce_ib": 7.961015224456787, + "ce_orig": 0.5696125626564026, + "epoch": 0.2130994320224315, + "kl_loss": 0.37901923060417175, + "loss_ib": 0.01175120659172535, + "step": 741 + }, + { + "ce_ib": 9.610107421875, + "ce_orig": 0.6799657344818115, + "epoch": 0.2130994320224315, + "kl_loss": 0.5099486112594604, + "loss_ib": 0.0147095937281847, + "step": 741 + }, + { + "ce_ib": 10.553266525268555, + "ce_orig": 1.0597807168960571, + "epoch": 0.2130994320224315, + "kl_loss": 0.9013509750366211, + "loss_ib": 0.019566776230931282, + "step": 741 + }, + { + "ce_ib": 11.050984382629395, + "ce_orig": 1.2893459796905518, + "epoch": 0.21338701560140916, + "kl_loss": 0.45824098587036133, + "loss_ib": 0.015633394941687584, + "step": 742 + }, + { + "ce_ib": 10.593522071838379, + "ce_orig": 1.4311033487319946, + "epoch": 0.21338701560140916, + "kl_loss": 0.33632180094718933, + "loss_ib": 0.013956740498542786, + "step": 742 + }, + { + "ce_ib": 8.308612823486328, + "ce_orig": 0.9085618257522583, + "epoch": 0.21338701560140916, + "kl_loss": 0.42611122131347656, + "loss_ib": 0.012569725513458252, + "step": 742 + }, + { + "ce_ib": 8.803531646728516, + "ce_orig": 0.5293317437171936, + "epoch": 0.21338701560140916, + "kl_loss": 0.42089781165122986, + "loss_ib": 0.013012508861720562, + "step": 742 + }, + { + "ce_ib": 10.681037902832031, + "ce_orig": 0.6758571863174438, + "epoch": 0.2136745991803868, + "kl_loss": 0.4455721974372864, + "loss_ib": 0.015136758796870708, + "step": 743 + }, + { + "ce_ib": 5.145867347717285, + "ce_orig": 0.4771097004413605, + "epoch": 0.2136745991803868, + "kl_loss": 0.3146681487560272, + "loss_ib": 0.0082925483584404, + "step": 743 + }, + { + "ce_ib": 8.998390197753906, + "ce_orig": 0.4767987132072449, + "epoch": 0.2136745991803868, + "kl_loss": 0.44158506393432617, + "loss_ib": 0.013414240442216396, + "step": 743 + }, + { + "ce_ib": 10.034675598144531, + "ce_orig": 0.8264948129653931, + "epoch": 0.2136745991803868, + "kl_loss": 0.36563044786453247, + "loss_ib": 0.013690979219973087, + "step": 743 + }, + { + "ce_ib": 8.774846076965332, + "ce_orig": 0.8711258172988892, + "epoch": 0.21396218275936443, + "kl_loss": 0.448600172996521, + "loss_ib": 0.013260847888886929, + "step": 744 + }, + { + "ce_ib": 6.589221954345703, + "ce_orig": 0.8571322560310364, + "epoch": 0.21396218275936443, + "kl_loss": 0.32901668548583984, + "loss_ib": 0.009879388846457005, + "step": 744 + }, + { + "ce_ib": 7.771622657775879, + "ce_orig": 0.9770241379737854, + "epoch": 0.21396218275936443, + "kl_loss": 0.5221589803695679, + "loss_ib": 0.01299321185797453, + "step": 744 + }, + { + "ce_ib": 12.875645637512207, + "ce_orig": 1.2932630777359009, + "epoch": 0.21396218275936443, + "kl_loss": 0.5584806799888611, + "loss_ib": 0.018460452556610107, + "step": 744 + }, + { + "epoch": 0.21424976633834208, + "grad_norm": 0.11534402519464493, + "learning_rate": 9.956546323376948e-06, + "loss": 0.8441, + "step": 745 + }, + { + "ce_ib": 12.278318405151367, + "ce_orig": 0.611358642578125, + "epoch": 0.21424976633834208, + "kl_loss": 0.5517995357513428, + "loss_ib": 0.017796313390135765, + "step": 745 + }, + { + "ce_ib": 9.37865161895752, + "ce_orig": 0.9221431612968445, + "epoch": 0.21424976633834208, + "kl_loss": 0.2928432822227478, + "loss_ib": 0.012307084165513515, + "step": 745 + }, + { + "ce_ib": 8.047618865966797, + "ce_orig": 0.9915336966514587, + "epoch": 0.21424976633834208, + "kl_loss": 0.45079469680786133, + "loss_ib": 0.012555565685033798, + "step": 745 + }, + { + "ce_ib": 9.573211669921875, + "ce_orig": 0.9948348999023438, + "epoch": 0.21424976633834208, + "kl_loss": 0.4275258481502533, + "loss_ib": 0.013848470523953438, + "step": 745 + }, + { + "ce_ib": 8.365690231323242, + "ce_orig": 0.8193045258522034, + "epoch": 0.2145373499173197, + "kl_loss": 0.44444945454597473, + "loss_ib": 0.012810184620320797, + "step": 746 + }, + { + "ce_ib": 6.967347621917725, + "ce_orig": 0.4872395098209381, + "epoch": 0.2145373499173197, + "kl_loss": 0.37023940682411194, + "loss_ib": 0.010669741779565811, + "step": 746 + }, + { + "ce_ib": 7.190746784210205, + "ce_orig": 0.6708618402481079, + "epoch": 0.2145373499173197, + "kl_loss": 0.3780478239059448, + "loss_ib": 0.010971223935484886, + "step": 746 + }, + { + "ce_ib": 11.051678657531738, + "ce_orig": 1.0682791471481323, + "epoch": 0.2145373499173197, + "kl_loss": 0.33363956212997437, + "loss_ib": 0.014388074167072773, + "step": 746 + }, + { + "ce_ib": 8.238478660583496, + "ce_orig": 0.7487781047821045, + "epoch": 0.21482493349629736, + "kl_loss": 0.37909066677093506, + "loss_ib": 0.012029385194182396, + "step": 747 + }, + { + "ce_ib": 12.03074836730957, + "ce_orig": 1.157182216644287, + "epoch": 0.21482493349629736, + "kl_loss": 0.37361055612564087, + "loss_ib": 0.015766853466629982, + "step": 747 + }, + { + "ce_ib": 8.529624938964844, + "ce_orig": 1.0179344415664673, + "epoch": 0.21482493349629736, + "kl_loss": 0.30930227041244507, + "loss_ib": 0.011622647754848003, + "step": 747 + }, + { + "ce_ib": 7.966403007507324, + "ce_orig": 0.6700258255004883, + "epoch": 0.21482493349629736, + "kl_loss": 0.37224870920181274, + "loss_ib": 0.011688889935612679, + "step": 747 + }, + { + "ce_ib": 9.755182266235352, + "ce_orig": 1.0040427446365356, + "epoch": 0.215112517075275, + "kl_loss": 0.3066710829734802, + "loss_ib": 0.012821893207728863, + "step": 748 + }, + { + "ce_ib": 10.194023132324219, + "ce_orig": 0.5760530233383179, + "epoch": 0.215112517075275, + "kl_loss": 0.5293586850166321, + "loss_ib": 0.015487611293792725, + "step": 748 + }, + { + "ce_ib": 11.507509231567383, + "ce_orig": 1.2553309202194214, + "epoch": 0.215112517075275, + "kl_loss": 0.3195509910583496, + "loss_ib": 0.01470301952213049, + "step": 748 + }, + { + "ce_ib": 10.408110618591309, + "ce_orig": 0.725406289100647, + "epoch": 0.215112517075275, + "kl_loss": 0.3801497220993042, + "loss_ib": 0.014209607616066933, + "step": 748 + }, + { + "ce_ib": 8.620140075683594, + "ce_orig": 0.7735955715179443, + "epoch": 0.21540010065425264, + "kl_loss": 0.5387185215950012, + "loss_ib": 0.014007325284183025, + "step": 749 + }, + { + "ce_ib": 5.869650363922119, + "ce_orig": 0.7176041603088379, + "epoch": 0.21540010065425264, + "kl_loss": 0.37166261672973633, + "loss_ib": 0.009586276486515999, + "step": 749 + }, + { + "ce_ib": 9.476244926452637, + "ce_orig": 1.0505683422088623, + "epoch": 0.21540010065425264, + "kl_loss": 0.3689851760864258, + "loss_ib": 0.013166096061468124, + "step": 749 + }, + { + "ce_ib": 10.051746368408203, + "ce_orig": 0.613065779209137, + "epoch": 0.21540010065425264, + "kl_loss": 0.3970192074775696, + "loss_ib": 0.014021937735378742, + "step": 749 + }, + { + "epoch": 0.2156876842332303, + "grad_norm": 0.09746310114860535, + "learning_rate": 9.955519397140656e-06, + "loss": 0.9247, + "step": 750 + }, + { + "ce_ib": 8.546123504638672, + "ce_orig": 0.8507575392723083, + "epoch": 0.2156876842332303, + "kl_loss": 0.3480690121650696, + "loss_ib": 0.012026812881231308, + "step": 750 + }, + { + "ce_ib": 12.74342155456543, + "ce_orig": 0.9122403860092163, + "epoch": 0.2156876842332303, + "kl_loss": 0.39195001125335693, + "loss_ib": 0.016662921756505966, + "step": 750 + }, + { + "ce_ib": 12.673941612243652, + "ce_orig": 1.130251407623291, + "epoch": 0.2156876842332303, + "kl_loss": 0.38955867290496826, + "loss_ib": 0.016569528728723526, + "step": 750 + }, + { + "ce_ib": 11.575626373291016, + "ce_orig": 0.889440655708313, + "epoch": 0.2156876842332303, + "kl_loss": 0.3685222864151001, + "loss_ib": 0.015260848216712475, + "step": 750 + }, + { + "ce_ib": 9.500494003295898, + "ce_orig": 0.5060651302337646, + "epoch": 0.2159752678122079, + "kl_loss": 0.2954930067062378, + "loss_ib": 0.01245542336255312, + "step": 751 + }, + { + "ce_ib": 11.144340515136719, + "ce_orig": 0.7600336670875549, + "epoch": 0.2159752678122079, + "kl_loss": 0.6074585914611816, + "loss_ib": 0.017218926921486855, + "step": 751 + }, + { + "ce_ib": 7.084355354309082, + "ce_orig": 0.8810204267501831, + "epoch": 0.2159752678122079, + "kl_loss": 0.4073539078235626, + "loss_ib": 0.01115789357572794, + "step": 751 + }, + { + "ce_ib": 8.677960395812988, + "ce_orig": 0.8457415103912354, + "epoch": 0.2159752678122079, + "kl_loss": 0.6076983213424683, + "loss_ib": 0.014754943549633026, + "step": 751 + }, + { + "ce_ib": 10.253267288208008, + "ce_orig": 1.123890995979309, + "epoch": 0.21626285139118556, + "kl_loss": 0.30214011669158936, + "loss_ib": 0.013274667784571648, + "step": 752 + }, + { + "ce_ib": 11.143954277038574, + "ce_orig": 0.9621107578277588, + "epoch": 0.21626285139118556, + "kl_loss": 0.40844857692718506, + "loss_ib": 0.015228440053761005, + "step": 752 + }, + { + "ce_ib": 7.633870601654053, + "ce_orig": 0.7040098309516907, + "epoch": 0.21626285139118556, + "kl_loss": 0.4934859871864319, + "loss_ib": 0.012568730860948563, + "step": 752 + }, + { + "ce_ib": 15.08940601348877, + "ce_orig": 1.8990564346313477, + "epoch": 0.21626285139118556, + "kl_loss": 0.3932008445262909, + "loss_ib": 0.0190214142203331, + "step": 752 + }, + { + "ce_ib": 6.168686389923096, + "ce_orig": 0.654904305934906, + "epoch": 0.2165504349701632, + "kl_loss": 0.34978413581848145, + "loss_ib": 0.00966652762144804, + "step": 753 + }, + { + "ce_ib": 14.297883987426758, + "ce_orig": 1.6642775535583496, + "epoch": 0.2165504349701632, + "kl_loss": 0.41902047395706177, + "loss_ib": 0.018488090485334396, + "step": 753 + }, + { + "ce_ib": 8.20667839050293, + "ce_orig": 1.1301182508468628, + "epoch": 0.2165504349701632, + "kl_loss": 0.36091458797454834, + "loss_ib": 0.011815824545919895, + "step": 753 + }, + { + "ce_ib": 10.069759368896484, + "ce_orig": 0.8426340818405151, + "epoch": 0.2165504349701632, + "kl_loss": 0.4236080050468445, + "loss_ib": 0.0143058393150568, + "step": 753 + }, + { + "ce_ib": 9.213713645935059, + "ce_orig": 1.3873950242996216, + "epoch": 0.21683801854914084, + "kl_loss": 0.41740885376930237, + "loss_ib": 0.013387802988290787, + "step": 754 + }, + { + "ce_ib": 8.983365058898926, + "ce_orig": 0.7299124598503113, + "epoch": 0.21683801854914084, + "kl_loss": 0.347156286239624, + "loss_ib": 0.012454927898943424, + "step": 754 + }, + { + "ce_ib": 8.433507919311523, + "ce_orig": 1.1746602058410645, + "epoch": 0.21683801854914084, + "kl_loss": 0.3870384693145752, + "loss_ib": 0.012303893454372883, + "step": 754 + }, + { + "ce_ib": 8.158003807067871, + "ce_orig": 0.9122118353843689, + "epoch": 0.21683801854914084, + "kl_loss": 0.32447659969329834, + "loss_ib": 0.011402769014239311, + "step": 754 + }, + { + "epoch": 0.2171256021281185, + "grad_norm": 0.10924255847930908, + "learning_rate": 9.954480531907935e-06, + "loss": 0.9322, + "step": 755 + }, + { + "ce_ib": 7.25479793548584, + "ce_orig": 0.7606655955314636, + "epoch": 0.2171256021281185, + "kl_loss": 0.38670188188552856, + "loss_ib": 0.011121816001832485, + "step": 755 + }, + { + "ce_ib": 6.150333881378174, + "ce_orig": 0.5132085680961609, + "epoch": 0.2171256021281185, + "kl_loss": 0.5129187107086182, + "loss_ib": 0.011279520578682423, + "step": 755 + }, + { + "ce_ib": 8.504961967468262, + "ce_orig": 0.43100351095199585, + "epoch": 0.2171256021281185, + "kl_loss": 0.36954519152641296, + "loss_ib": 0.012200413271784782, + "step": 755 + }, + { + "ce_ib": 7.746326923370361, + "ce_orig": 0.9925227165222168, + "epoch": 0.2171256021281185, + "kl_loss": 0.35602182149887085, + "loss_ib": 0.011306545697152615, + "step": 755 + }, + { + "ce_ib": 11.200096130371094, + "ce_orig": 0.693459689617157, + "epoch": 0.2174131857070961, + "kl_loss": 0.43727046251296997, + "loss_ib": 0.015572800301015377, + "step": 756 + }, + { + "ce_ib": 9.643890380859375, + "ce_orig": 0.9979680180549622, + "epoch": 0.2174131857070961, + "kl_loss": 0.4014120399951935, + "loss_ib": 0.0136580104008317, + "step": 756 + }, + { + "ce_ib": 9.413188934326172, + "ce_orig": 0.6425915360450745, + "epoch": 0.2174131857070961, + "kl_loss": 0.41752490401268005, + "loss_ib": 0.0135884378105402, + "step": 756 + }, + { + "ce_ib": 8.591798782348633, + "ce_orig": 0.6160233020782471, + "epoch": 0.2174131857070961, + "kl_loss": 0.39359089732170105, + "loss_ib": 0.0125277079641819, + "step": 756 + }, + { + "ce_ib": 9.97320556640625, + "ce_orig": 0.9202979803085327, + "epoch": 0.21770076928607376, + "kl_loss": 0.38434839248657227, + "loss_ib": 0.01381669007241726, + "step": 757 + }, + { + "ce_ib": 11.336276054382324, + "ce_orig": 1.1318460702896118, + "epoch": 0.21770076928607376, + "kl_loss": 0.426124632358551, + "loss_ib": 0.015597522258758545, + "step": 757 + }, + { + "ce_ib": 7.240839958190918, + "ce_orig": 0.6424944996833801, + "epoch": 0.21770076928607376, + "kl_loss": 0.47551506757736206, + "loss_ib": 0.011995989829301834, + "step": 757 + }, + { + "ce_ib": 8.176185607910156, + "ce_orig": 0.8402231931686401, + "epoch": 0.21770076928607376, + "kl_loss": 0.3214970827102661, + "loss_ib": 0.011391155421733856, + "step": 757 + }, + { + "ce_ib": 7.582910060882568, + "ce_orig": 0.6925665736198425, + "epoch": 0.21798835286505142, + "kl_loss": 0.2832297086715698, + "loss_ib": 0.010415206663310528, + "step": 758 + }, + { + "ce_ib": 6.832791805267334, + "ce_orig": 0.514137327671051, + "epoch": 0.21798835286505142, + "kl_loss": 0.3456891179084778, + "loss_ib": 0.010289683006703854, + "step": 758 + }, + { + "ce_ib": 10.459983825683594, + "ce_orig": 1.1698083877563477, + "epoch": 0.21798835286505142, + "kl_loss": 0.33782780170440674, + "loss_ib": 0.013838262297213078, + "step": 758 + }, + { + "ce_ib": 8.3121919631958, + "ce_orig": 0.6954447627067566, + "epoch": 0.21798835286505142, + "kl_loss": 0.38117825984954834, + "loss_ib": 0.012123974040150642, + "step": 758 + }, + { + "ce_ib": 6.0204010009765625, + "ce_orig": 0.7387468814849854, + "epoch": 0.21827593644402904, + "kl_loss": 0.42067545652389526, + "loss_ib": 0.01022715587168932, + "step": 759 + }, + { + "ce_ib": 8.076128005981445, + "ce_orig": 0.6554126143455505, + "epoch": 0.21827593644402904, + "kl_loss": 0.3814757466316223, + "loss_ib": 0.011890885420143604, + "step": 759 + }, + { + "ce_ib": 7.948196887969971, + "ce_orig": 0.7137631177902222, + "epoch": 0.21827593644402904, + "kl_loss": 0.4568563997745514, + "loss_ib": 0.012516760267317295, + "step": 759 + }, + { + "ce_ib": 7.82420015335083, + "ce_orig": 0.8517636060714722, + "epoch": 0.21827593644402904, + "kl_loss": 0.38165003061294556, + "loss_ib": 0.011640701442956924, + "step": 759 + }, + { + "epoch": 0.2185635200230067, + "grad_norm": 0.10263609886169434, + "learning_rate": 9.953429730181653e-06, + "loss": 0.897, + "step": 760 + }, + { + "ce_ib": 9.436492919921875, + "ce_orig": 0.9970396757125854, + "epoch": 0.2185635200230067, + "kl_loss": 0.350554496049881, + "loss_ib": 0.012942037545144558, + "step": 760 + }, + { + "ce_ib": 8.581624984741211, + "ce_orig": 0.445902556180954, + "epoch": 0.2185635200230067, + "kl_loss": 0.44985431432724, + "loss_ib": 0.013080167584121227, + "step": 760 + }, + { + "ce_ib": 5.427255153656006, + "ce_orig": 0.6384531855583191, + "epoch": 0.2185635200230067, + "kl_loss": 0.3149919807910919, + "loss_ib": 0.008577174507081509, + "step": 760 + }, + { + "ce_ib": 12.8211030960083, + "ce_orig": 1.4226137399673462, + "epoch": 0.2185635200230067, + "kl_loss": 0.297141969203949, + "loss_ib": 0.015792522579431534, + "step": 760 + }, + { + "ce_ib": 12.39339542388916, + "ce_orig": 1.4045170545578003, + "epoch": 0.21885110360198431, + "kl_loss": 0.3612511157989502, + "loss_ib": 0.016005907207727432, + "step": 761 + }, + { + "ce_ib": 7.98706579208374, + "ce_orig": 0.4089260399341583, + "epoch": 0.21885110360198431, + "kl_loss": 0.5325812101364136, + "loss_ib": 0.013312878087162971, + "step": 761 + }, + { + "ce_ib": 9.43854808807373, + "ce_orig": 0.47952842712402344, + "epoch": 0.21885110360198431, + "kl_loss": 0.39229825139045715, + "loss_ib": 0.013361530378460884, + "step": 761 + }, + { + "ce_ib": 7.874027729034424, + "ce_orig": 0.8605637550354004, + "epoch": 0.21885110360198431, + "kl_loss": 0.3315168619155884, + "loss_ib": 0.01118919625878334, + "step": 761 + }, + { + "ce_ib": 14.053482055664062, + "ce_orig": 1.8178443908691406, + "epoch": 0.21913868718096197, + "kl_loss": 0.4107596278190613, + "loss_ib": 0.018161077052354813, + "step": 762 + }, + { + "ce_ib": 12.965909957885742, + "ce_orig": 1.3146981000900269, + "epoch": 0.21913868718096197, + "kl_loss": 0.35652273893356323, + "loss_ib": 0.016531137749552727, + "step": 762 + }, + { + "ce_ib": 8.16763973236084, + "ce_orig": 1.06657874584198, + "epoch": 0.21913868718096197, + "kl_loss": 0.3259735703468323, + "loss_ib": 0.011427376419305801, + "step": 762 + }, + { + "ce_ib": 9.887922286987305, + "ce_orig": 0.7984805107116699, + "epoch": 0.21913868718096197, + "kl_loss": 0.4151677191257477, + "loss_ib": 0.01403959933668375, + "step": 762 + }, + { + "ce_ib": 12.173575401306152, + "ce_orig": 0.7809048891067505, + "epoch": 0.21942627075993962, + "kl_loss": 0.8220731019973755, + "loss_ib": 0.020394306629896164, + "step": 763 + }, + { + "ce_ib": 11.080092430114746, + "ce_orig": 1.3501423597335815, + "epoch": 0.21942627075993962, + "kl_loss": 0.3217220902442932, + "loss_ib": 0.014297313056886196, + "step": 763 + }, + { + "ce_ib": 5.698494911193848, + "ce_orig": 0.587028443813324, + "epoch": 0.21942627075993962, + "kl_loss": 0.4448961913585663, + "loss_ib": 0.010147457011044025, + "step": 763 + }, + { + "ce_ib": 7.876105785369873, + "ce_orig": 0.8657087087631226, + "epoch": 0.21942627075993962, + "kl_loss": 0.35243791341781616, + "loss_ib": 0.01140048447996378, + "step": 763 + }, + { + "ce_ib": 7.448467254638672, + "ce_orig": 0.6551787257194519, + "epoch": 0.21971385433891724, + "kl_loss": 0.31218671798706055, + "loss_ib": 0.010570335201919079, + "step": 764 + }, + { + "ce_ib": 6.403468608856201, + "ce_orig": 0.5633848905563354, + "epoch": 0.21971385433891724, + "kl_loss": 0.6558365821838379, + "loss_ib": 0.01296183466911316, + "step": 764 + }, + { + "ce_ib": 5.391688346862793, + "ce_orig": 0.3037964403629303, + "epoch": 0.21971385433891724, + "kl_loss": 0.3110879361629486, + "loss_ib": 0.008502568118274212, + "step": 764 + }, + { + "ce_ib": 11.914216995239258, + "ce_orig": 0.712795078754425, + "epoch": 0.21971385433891724, + "kl_loss": 0.8264556527137756, + "loss_ib": 0.020178772509098053, + "step": 764 + }, + { + "epoch": 0.2200014379178949, + "grad_norm": 0.08830351382493973, + "learning_rate": 9.952366994493438e-06, + "loss": 0.8629, + "step": 765 + }, + { + "ce_ib": 10.298672676086426, + "ce_orig": 1.1052519083023071, + "epoch": 0.2200014379178949, + "kl_loss": 0.29135391116142273, + "loss_ib": 0.013212212361395359, + "step": 765 + }, + { + "ce_ib": 5.10940408706665, + "ce_orig": 0.6949278116226196, + "epoch": 0.2200014379178949, + "kl_loss": 0.2762865126132965, + "loss_ib": 0.007872268557548523, + "step": 765 + }, + { + "ce_ib": 10.096806526184082, + "ce_orig": 1.0484293699264526, + "epoch": 0.2200014379178949, + "kl_loss": 0.4013941287994385, + "loss_ib": 0.0141107477247715, + "step": 765 + }, + { + "ce_ib": 6.776886463165283, + "ce_orig": 0.8039520382881165, + "epoch": 0.2200014379178949, + "kl_loss": 0.2949376702308655, + "loss_ib": 0.009726262651383877, + "step": 765 + }, + { + "ce_ib": 5.609190940856934, + "ce_orig": 0.5181265473365784, + "epoch": 0.22028902149687252, + "kl_loss": 0.31120482087135315, + "loss_ib": 0.008721238933503628, + "step": 766 + }, + { + "ce_ib": 7.555119037628174, + "ce_orig": 0.7217943668365479, + "epoch": 0.22028902149687252, + "kl_loss": 0.34005963802337646, + "loss_ib": 0.010955714620649815, + "step": 766 + }, + { + "ce_ib": 9.171826362609863, + "ce_orig": 1.1021441221237183, + "epoch": 0.22028902149687252, + "kl_loss": 0.39605045318603516, + "loss_ib": 0.01313233096152544, + "step": 766 + }, + { + "ce_ib": 9.409163475036621, + "ce_orig": 0.983971893787384, + "epoch": 0.22028902149687252, + "kl_loss": 0.3282513916492462, + "loss_ib": 0.012691677547991276, + "step": 766 + }, + { + "ce_ib": 10.134336471557617, + "ce_orig": 0.8422884345054626, + "epoch": 0.22057660507585017, + "kl_loss": 0.6862824559211731, + "loss_ib": 0.016997160390019417, + "step": 767 + }, + { + "ce_ib": 8.78597640991211, + "ce_orig": 0.6801238656044006, + "epoch": 0.22057660507585017, + "kl_loss": 0.383102685213089, + "loss_ib": 0.012617003172636032, + "step": 767 + }, + { + "ce_ib": 10.374561309814453, + "ce_orig": 1.0352544784545898, + "epoch": 0.22057660507585017, + "kl_loss": 0.32708740234375, + "loss_ib": 0.013645435683429241, + "step": 767 + }, + { + "ce_ib": 4.927638530731201, + "ce_orig": 0.5614314079284668, + "epoch": 0.22057660507585017, + "kl_loss": 0.37318044900894165, + "loss_ib": 0.008659442886710167, + "step": 767 + }, + { + "ce_ib": 11.366865158081055, + "ce_orig": 0.8449366688728333, + "epoch": 0.22086418865482782, + "kl_loss": 0.36128175258636475, + "loss_ib": 0.014979682862758636, + "step": 768 + }, + { + "ce_ib": 9.95175838470459, + "ce_orig": 1.076265811920166, + "epoch": 0.22086418865482782, + "kl_loss": 0.3022347688674927, + "loss_ib": 0.012974105775356293, + "step": 768 + }, + { + "ce_ib": 10.05750846862793, + "ce_orig": 0.8622808456420898, + "epoch": 0.22086418865482782, + "kl_loss": 0.3522017002105713, + "loss_ib": 0.013579525984823704, + "step": 768 + }, + { + "ce_ib": 8.465921401977539, + "ce_orig": 0.7081587910652161, + "epoch": 0.22086418865482782, + "kl_loss": 0.4725481867790222, + "loss_ib": 0.01319140288978815, + "step": 768 + }, + { + "ce_ib": 12.121952056884766, + "ce_orig": 0.9619759917259216, + "epoch": 0.22115177223380544, + "kl_loss": 0.5650833249092102, + "loss_ib": 0.01777278631925583, + "step": 769 + }, + { + "ce_ib": 7.01571798324585, + "ce_orig": 0.5827529430389404, + "epoch": 0.22115177223380544, + "kl_loss": 0.36164534091949463, + "loss_ib": 0.010632171295583248, + "step": 769 + }, + { + "ce_ib": 9.584782600402832, + "ce_orig": 0.9394201636314392, + "epoch": 0.22115177223380544, + "kl_loss": 0.3103780746459961, + "loss_ib": 0.012688562273979187, + "step": 769 + }, + { + "ce_ib": 13.475021362304688, + "ce_orig": 1.4583699703216553, + "epoch": 0.22115177223380544, + "kl_loss": 0.4726155996322632, + "loss_ib": 0.018201176077127457, + "step": 769 + }, + { + "epoch": 0.2214393558127831, + "grad_norm": 0.09313464909791946, + "learning_rate": 9.951292327403663e-06, + "loss": 0.9476, + "step": 770 + }, + { + "ce_ib": 10.434471130371094, + "ce_orig": 1.0447543859481812, + "epoch": 0.2214393558127831, + "kl_loss": 0.3816416561603546, + "loss_ib": 0.01425088755786419, + "step": 770 + }, + { + "ce_ib": 6.2150187492370605, + "ce_orig": 0.7222704291343689, + "epoch": 0.2214393558127831, + "kl_loss": 0.3173387050628662, + "loss_ib": 0.009388405829668045, + "step": 770 + }, + { + "ce_ib": 15.137702941894531, + "ce_orig": 2.1303963661193848, + "epoch": 0.2214393558127831, + "kl_loss": 0.3662612736225128, + "loss_ib": 0.018800314515829086, + "step": 770 + }, + { + "ce_ib": 9.431180953979492, + "ce_orig": 0.7595182061195374, + "epoch": 0.2214393558127831, + "kl_loss": 0.35950183868408203, + "loss_ib": 0.01302619930356741, + "step": 770 + }, + { + "ce_ib": 7.864904880523682, + "ce_orig": 0.48237812519073486, + "epoch": 0.22172693939176072, + "kl_loss": 0.35039085149765015, + "loss_ib": 0.011368812993168831, + "step": 771 + }, + { + "ce_ib": 10.381065368652344, + "ce_orig": 0.9781961441040039, + "epoch": 0.22172693939176072, + "kl_loss": 0.45010584592819214, + "loss_ib": 0.014882123097777367, + "step": 771 + }, + { + "ce_ib": 8.742616653442383, + "ce_orig": 0.7862986326217651, + "epoch": 0.22172693939176072, + "kl_loss": 0.439785897731781, + "loss_ib": 0.013140475377440453, + "step": 771 + }, + { + "ce_ib": 5.651560306549072, + "ce_orig": 0.42288464307785034, + "epoch": 0.22172693939176072, + "kl_loss": 0.660328209400177, + "loss_ib": 0.012254842557013035, + "step": 771 + }, + { + "ce_ib": 6.206586837768555, + "ce_orig": 0.48183295130729675, + "epoch": 0.22201452297073837, + "kl_loss": 0.30323436856269836, + "loss_ib": 0.00923893041908741, + "step": 772 + }, + { + "ce_ib": 9.07832145690918, + "ce_orig": 0.8749274611473083, + "epoch": 0.22201452297073837, + "kl_loss": 0.41655945777893066, + "loss_ib": 0.013243915513157845, + "step": 772 + }, + { + "ce_ib": 6.524220943450928, + "ce_orig": 0.6328911185264587, + "epoch": 0.22201452297073837, + "kl_loss": 0.4318011701107025, + "loss_ib": 0.010842232033610344, + "step": 772 + }, + { + "ce_ib": 5.257693290710449, + "ce_orig": 0.3358021080493927, + "epoch": 0.22201452297073837, + "kl_loss": 0.25356054306030273, + "loss_ib": 0.007793298922479153, + "step": 772 + }, + { + "ce_ib": 8.05919075012207, + "ce_orig": 0.5993462800979614, + "epoch": 0.22230210654971602, + "kl_loss": 0.37483856081962585, + "loss_ib": 0.011807575821876526, + "step": 773 + }, + { + "ce_ib": 7.331540584564209, + "ce_orig": 0.6803370118141174, + "epoch": 0.22230210654971602, + "kl_loss": 0.38549911975860596, + "loss_ib": 0.011186531744897366, + "step": 773 + }, + { + "ce_ib": 6.345643997192383, + "ce_orig": 0.6718232035636902, + "epoch": 0.22230210654971602, + "kl_loss": 0.38235121965408325, + "loss_ib": 0.010169154964387417, + "step": 773 + }, + { + "ce_ib": 12.004826545715332, + "ce_orig": 1.3992005586624146, + "epoch": 0.22230210654971602, + "kl_loss": 0.4480227530002594, + "loss_ib": 0.016485054045915604, + "step": 773 + }, + { + "ce_ib": 5.82366418838501, + "ce_orig": 0.5827267169952393, + "epoch": 0.22258969012869365, + "kl_loss": 0.38700664043426514, + "loss_ib": 0.009693730622529984, + "step": 774 + }, + { + "ce_ib": 12.45301342010498, + "ce_orig": 1.3923219442367554, + "epoch": 0.22258969012869365, + "kl_loss": 0.32873886823654175, + "loss_ib": 0.015740402042865753, + "step": 774 + }, + { + "ce_ib": 11.65266227722168, + "ce_orig": 1.4098445177078247, + "epoch": 0.22258969012869365, + "kl_loss": 0.34009307622909546, + "loss_ib": 0.015053593553602695, + "step": 774 + }, + { + "ce_ib": 7.026700019836426, + "ce_orig": 0.7507833242416382, + "epoch": 0.22258969012869365, + "kl_loss": 0.30786123871803284, + "loss_ib": 0.010105312801897526, + "step": 774 + }, + { + "epoch": 0.2228772737076713, + "grad_norm": 0.09665674716234207, + "learning_rate": 9.95020573150145e-06, + "loss": 0.8829, + "step": 775 + }, + { + "ce_ib": 9.703756332397461, + "ce_orig": 0.9238471388816833, + "epoch": 0.2228772737076713, + "kl_loss": 0.328873872756958, + "loss_ib": 0.012992494739592075, + "step": 775 + }, + { + "ce_ib": 13.514963150024414, + "ce_orig": 1.3463358879089355, + "epoch": 0.2228772737076713, + "kl_loss": 0.3467975854873657, + "loss_ib": 0.01698293909430504, + "step": 775 + }, + { + "ce_ib": 8.213980674743652, + "ce_orig": 0.8611528873443604, + "epoch": 0.2228772737076713, + "kl_loss": 0.7585752010345459, + "loss_ib": 0.015799731016159058, + "step": 775 + }, + { + "ce_ib": 8.563956260681152, + "ce_orig": 0.8483219146728516, + "epoch": 0.2228772737076713, + "kl_loss": 0.34036189317703247, + "loss_ib": 0.011967575177550316, + "step": 775 + }, + { + "ce_ib": 9.094470024108887, + "ce_orig": 1.0628784894943237, + "epoch": 0.22316485728664892, + "kl_loss": 0.46681421995162964, + "loss_ib": 0.013762611895799637, + "step": 776 + }, + { + "ce_ib": 7.872087478637695, + "ce_orig": 0.8701239228248596, + "epoch": 0.22316485728664892, + "kl_loss": 0.3346797227859497, + "loss_ib": 0.011218884028494358, + "step": 776 + }, + { + "ce_ib": 10.01878833770752, + "ce_orig": 0.8464388847351074, + "epoch": 0.22316485728664892, + "kl_loss": 0.47228649258613586, + "loss_ib": 0.014741652645170689, + "step": 776 + }, + { + "ce_ib": 7.2308549880981445, + "ce_orig": 0.8908596038818359, + "epoch": 0.22316485728664892, + "kl_loss": 0.3172210454940796, + "loss_ib": 0.010403065010905266, + "step": 776 + }, + { + "ce_ib": 8.690382957458496, + "ce_orig": 0.7194284200668335, + "epoch": 0.22345244086562657, + "kl_loss": 0.3316614031791687, + "loss_ib": 0.01200699619948864, + "step": 777 + }, + { + "ce_ib": 7.480838775634766, + "ce_orig": 0.7475316524505615, + "epoch": 0.22345244086562657, + "kl_loss": 0.3133096694946289, + "loss_ib": 0.010613935068249702, + "step": 777 + }, + { + "ce_ib": 16.189916610717773, + "ce_orig": 2.0802390575408936, + "epoch": 0.22345244086562657, + "kl_loss": 0.484038770198822, + "loss_ib": 0.021030303090810776, + "step": 777 + }, + { + "ce_ib": 7.3860626220703125, + "ce_orig": 0.5432776212692261, + "epoch": 0.22345244086562657, + "kl_loss": 0.39021003246307373, + "loss_ib": 0.011288163252174854, + "step": 777 + }, + { + "ce_ib": 5.361101150512695, + "ce_orig": 0.6422094106674194, + "epoch": 0.22374002444460422, + "kl_loss": 0.3227311372756958, + "loss_ib": 0.008588411845266819, + "step": 778 + }, + { + "ce_ib": 11.809074401855469, + "ce_orig": 1.1206345558166504, + "epoch": 0.22374002444460422, + "kl_loss": 0.34884482622146606, + "loss_ib": 0.015297521837055683, + "step": 778 + }, + { + "ce_ib": 8.916701316833496, + "ce_orig": 0.8444573283195496, + "epoch": 0.22374002444460422, + "kl_loss": 0.3931344151496887, + "loss_ib": 0.01284804567694664, + "step": 778 + }, + { + "ce_ib": 7.642374038696289, + "ce_orig": 1.1561375856399536, + "epoch": 0.22374002444460422, + "kl_loss": 0.35411643981933594, + "loss_ib": 0.011183538474142551, + "step": 778 + }, + { + "ce_ib": 6.7063140869140625, + "ce_orig": 0.6542351245880127, + "epoch": 0.22402760802358185, + "kl_loss": 0.34912005066871643, + "loss_ib": 0.010197513736784458, + "step": 779 + }, + { + "ce_ib": 7.014062404632568, + "ce_orig": 0.5142630934715271, + "epoch": 0.22402760802358185, + "kl_loss": 0.34287211298942566, + "loss_ib": 0.010442784056067467, + "step": 779 + }, + { + "ce_ib": 9.529996871948242, + "ce_orig": 0.7422083616256714, + "epoch": 0.22402760802358185, + "kl_loss": 0.3641512989997864, + "loss_ib": 0.013171510770916939, + "step": 779 + }, + { + "ce_ib": 6.52680778503418, + "ce_orig": 0.8021174669265747, + "epoch": 0.22402760802358185, + "kl_loss": 0.32752007246017456, + "loss_ib": 0.009802008979022503, + "step": 779 + }, + { + "epoch": 0.2243151916025595, + "grad_norm": 0.10656443983316422, + "learning_rate": 9.949107209404664e-06, + "loss": 0.8954, + "step": 780 + }, + { + "ce_ib": 9.61880874633789, + "ce_orig": 0.8786146640777588, + "epoch": 0.2243151916025595, + "kl_loss": 0.3575580418109894, + "loss_ib": 0.013194388709962368, + "step": 780 + }, + { + "ce_ib": 10.178520202636719, + "ce_orig": 0.807783305644989, + "epoch": 0.2243151916025595, + "kl_loss": 0.342892587184906, + "loss_ib": 0.013607447035610676, + "step": 780 + }, + { + "ce_ib": 9.118548393249512, + "ce_orig": 1.1404297351837158, + "epoch": 0.2243151916025595, + "kl_loss": 0.30617213249206543, + "loss_ib": 0.012180269695818424, + "step": 780 + }, + { + "ce_ib": 10.034414291381836, + "ce_orig": 0.7776552438735962, + "epoch": 0.2243151916025595, + "kl_loss": 0.4022853970527649, + "loss_ib": 0.014057268388569355, + "step": 780 + }, + { + "ce_ib": 9.740767478942871, + "ce_orig": 1.099388599395752, + "epoch": 0.22460277518153712, + "kl_loss": 0.6486046314239502, + "loss_ib": 0.016226813197135925, + "step": 781 + }, + { + "ce_ib": 6.6487016677856445, + "ce_orig": 0.6519371271133423, + "epoch": 0.22460277518153712, + "kl_loss": 0.3416748344898224, + "loss_ib": 0.010065450333058834, + "step": 781 + }, + { + "ce_ib": 4.7245869636535645, + "ce_orig": 0.6195375323295593, + "epoch": 0.22460277518153712, + "kl_loss": 0.2902180552482605, + "loss_ib": 0.007626766804605722, + "step": 781 + }, + { + "ce_ib": 12.154743194580078, + "ce_orig": 1.4246693849563599, + "epoch": 0.22460277518153712, + "kl_loss": 0.25261539220809937, + "loss_ib": 0.014680897817015648, + "step": 781 + }, + { + "ce_ib": 5.439098834991455, + "ce_orig": 0.8269286155700684, + "epoch": 0.22489035876051477, + "kl_loss": 0.34107422828674316, + "loss_ib": 0.00884984154254198, + "step": 782 + }, + { + "ce_ib": 9.174774169921875, + "ce_orig": 1.1341043710708618, + "epoch": 0.22489035876051477, + "kl_loss": 0.3221195936203003, + "loss_ib": 0.012395970523357391, + "step": 782 + }, + { + "ce_ib": 9.228188514709473, + "ce_orig": 1.234445333480835, + "epoch": 0.22489035876051477, + "kl_loss": 0.30531132221221924, + "loss_ib": 0.012281300500035286, + "step": 782 + }, + { + "ce_ib": 5.356451034545898, + "ce_orig": 0.8137478828430176, + "epoch": 0.22489035876051477, + "kl_loss": 0.37726348638534546, + "loss_ib": 0.009129085578024387, + "step": 782 + }, + { + "ce_ib": 9.502201080322266, + "ce_orig": 0.8526181578636169, + "epoch": 0.22517794233949243, + "kl_loss": 0.6065495014190674, + "loss_ib": 0.01556769572198391, + "step": 783 + }, + { + "ce_ib": 7.823635578155518, + "ce_orig": 0.6923622488975525, + "epoch": 0.22517794233949243, + "kl_loss": 0.394509494304657, + "loss_ib": 0.011768730357289314, + "step": 783 + }, + { + "ce_ib": 6.387758255004883, + "ce_orig": 0.6680426001548767, + "epoch": 0.22517794233949243, + "kl_loss": 0.30215880274772644, + "loss_ib": 0.009409346617758274, + "step": 783 + }, + { + "ce_ib": 7.9621663093566895, + "ce_orig": 0.2912781834602356, + "epoch": 0.22517794233949243, + "kl_loss": 0.6065285205841064, + "loss_ib": 0.01402745209634304, + "step": 783 + }, + { + "ce_ib": 8.740853309631348, + "ce_orig": 1.1666960716247559, + "epoch": 0.22546552591847005, + "kl_loss": 0.26575469970703125, + "loss_ib": 0.011398401111364365, + "step": 784 + }, + { + "ce_ib": 11.868843078613281, + "ce_orig": 1.1964963674545288, + "epoch": 0.22546552591847005, + "kl_loss": 0.34643134474754333, + "loss_ib": 0.01533315610140562, + "step": 784 + }, + { + "ce_ib": 11.849778175354004, + "ce_orig": 0.6975755095481873, + "epoch": 0.22546552591847005, + "kl_loss": 0.48860257863998413, + "loss_ib": 0.016735803335905075, + "step": 784 + }, + { + "ce_ib": 7.8105998039245605, + "ce_orig": 1.0470623970031738, + "epoch": 0.22546552591847005, + "kl_loss": 0.31021207571029663, + "loss_ib": 0.010912721045315266, + "step": 784 + }, + { + "epoch": 0.2257531094974477, + "grad_norm": 0.11681295186281204, + "learning_rate": 9.9479967637599e-06, + "loss": 0.9212, + "step": 785 + }, + { + "ce_ib": 11.292010307312012, + "ce_orig": 1.259683609008789, + "epoch": 0.2257531094974477, + "kl_loss": 0.2796482443809509, + "loss_ib": 0.014088491909205914, + "step": 785 + }, + { + "ce_ib": 10.634122848510742, + "ce_orig": 0.9388463497161865, + "epoch": 0.2257531094974477, + "kl_loss": 0.33059000968933105, + "loss_ib": 0.013940023258328438, + "step": 785 + }, + { + "ce_ib": 12.245208740234375, + "ce_orig": 1.6006643772125244, + "epoch": 0.2257531094974477, + "kl_loss": 0.43430644273757935, + "loss_ib": 0.016588272526860237, + "step": 785 + }, + { + "ce_ib": 7.943255424499512, + "ce_orig": 0.701815128326416, + "epoch": 0.2257531094974477, + "kl_loss": 0.42579883337020874, + "loss_ib": 0.012201243080198765, + "step": 785 + }, + { + "ce_ib": 5.053991794586182, + "ce_orig": 0.5053215622901917, + "epoch": 0.22604069307642533, + "kl_loss": 0.31988024711608887, + "loss_ib": 0.008252793923020363, + "step": 786 + }, + { + "ce_ib": 10.378332138061523, + "ce_orig": 1.0649505853652954, + "epoch": 0.22604069307642533, + "kl_loss": 0.3701839745044708, + "loss_ib": 0.014080171473324299, + "step": 786 + }, + { + "ce_ib": 6.406869888305664, + "ce_orig": 0.7461100220680237, + "epoch": 0.22604069307642533, + "kl_loss": 0.3331264853477478, + "loss_ib": 0.0097381342202425, + "step": 786 + }, + { + "ce_ib": 9.13780689239502, + "ce_orig": 0.6390551924705505, + "epoch": 0.22604069307642533, + "kl_loss": 0.5007272958755493, + "loss_ib": 0.014145080000162125, + "step": 786 + }, + { + "ce_ib": 8.147912979125977, + "ce_orig": 0.5080342292785645, + "epoch": 0.22632827665540298, + "kl_loss": 0.4450484812259674, + "loss_ib": 0.012598397210240364, + "step": 787 + }, + { + "ce_ib": 7.028100490570068, + "ce_orig": 0.593861997127533, + "epoch": 0.22632827665540298, + "kl_loss": 0.4049058258533478, + "loss_ib": 0.011077158153057098, + "step": 787 + }, + { + "ce_ib": 8.043399810791016, + "ce_orig": 0.4268825054168701, + "epoch": 0.22632827665540298, + "kl_loss": 0.35771211981773376, + "loss_ib": 0.011620521545410156, + "step": 787 + }, + { + "ce_ib": 6.9142866134643555, + "ce_orig": 0.4698316156864166, + "epoch": 0.22632827665540298, + "kl_loss": 0.8020865321159363, + "loss_ib": 0.014935152605175972, + "step": 787 + }, + { + "ce_ib": 6.95719575881958, + "ce_orig": 0.8023804426193237, + "epoch": 0.22661586023438063, + "kl_loss": 0.3693576455116272, + "loss_ib": 0.010650772601366043, + "step": 788 + }, + { + "ce_ib": 12.449686050415039, + "ce_orig": 1.3771573305130005, + "epoch": 0.22661586023438063, + "kl_loss": 0.4264632761478424, + "loss_ib": 0.016714317724108696, + "step": 788 + }, + { + "ce_ib": 11.89621353149414, + "ce_orig": 1.398105502128601, + "epoch": 0.22661586023438063, + "kl_loss": 0.30330199003219604, + "loss_ib": 0.014929232187569141, + "step": 788 + }, + { + "ce_ib": 5.163127422332764, + "ce_orig": 0.5273948907852173, + "epoch": 0.22661586023438063, + "kl_loss": 0.360205739736557, + "loss_ib": 0.008765184320509434, + "step": 788 + }, + { + "ce_ib": 10.611333847045898, + "ce_orig": 0.8973667025566101, + "epoch": 0.22690344381335825, + "kl_loss": 0.42807987332344055, + "loss_ib": 0.01489213202148676, + "step": 789 + }, + { + "ce_ib": 4.420098304748535, + "ce_orig": 0.5175418853759766, + "epoch": 0.22690344381335825, + "kl_loss": 0.30578553676605225, + "loss_ib": 0.007477953098714352, + "step": 789 + }, + { + "ce_ib": 7.4076008796691895, + "ce_orig": 0.5371741056442261, + "epoch": 0.22690344381335825, + "kl_loss": 0.3347129821777344, + "loss_ib": 0.010754730552434921, + "step": 789 + }, + { + "ce_ib": 9.960221290588379, + "ce_orig": 0.9401814937591553, + "epoch": 0.22690344381335825, + "kl_loss": 0.3342566192150116, + "loss_ib": 0.013302787207067013, + "step": 789 + }, + { + "epoch": 0.2271910273923359, + "grad_norm": 0.0974324494600296, + "learning_rate": 9.946874397242474e-06, + "loss": 0.9038, + "step": 790 + }, + { + "ce_ib": 10.756206512451172, + "ce_orig": 0.7469778060913086, + "epoch": 0.2271910273923359, + "kl_loss": 0.3871549963951111, + "loss_ib": 0.014627756550908089, + "step": 790 + }, + { + "ce_ib": 11.253997802734375, + "ce_orig": 1.2753102779388428, + "epoch": 0.2271910273923359, + "kl_loss": 0.37940990924835205, + "loss_ib": 0.015048096887767315, + "step": 790 + }, + { + "ce_ib": 7.96759557723999, + "ce_orig": 0.5226830840110779, + "epoch": 0.2271910273923359, + "kl_loss": 0.4906744360923767, + "loss_ib": 0.012874339707195759, + "step": 790 + }, + { + "ce_ib": 6.907197952270508, + "ce_orig": 0.8273372054100037, + "epoch": 0.2271910273923359, + "kl_loss": 0.29984915256500244, + "loss_ib": 0.009905689395964146, + "step": 790 + }, + { + "ce_ib": 11.577720642089844, + "ce_orig": 0.4754190146923065, + "epoch": 0.22747861097131353, + "kl_loss": 0.5800215601921082, + "loss_ib": 0.017377937212586403, + "step": 791 + }, + { + "ce_ib": 7.266067028045654, + "ce_orig": 0.7913497090339661, + "epoch": 0.22747861097131353, + "kl_loss": 0.32481786608695984, + "loss_ib": 0.010514246299862862, + "step": 791 + }, + { + "ce_ib": 10.663007736206055, + "ce_orig": 0.8187626004219055, + "epoch": 0.22747861097131353, + "kl_loss": 0.36268165707588196, + "loss_ib": 0.014289823360741138, + "step": 791 + }, + { + "ce_ib": 10.45195484161377, + "ce_orig": 1.220002293586731, + "epoch": 0.22747861097131353, + "kl_loss": 0.37439438700675964, + "loss_ib": 0.014195898547768593, + "step": 791 + }, + { + "ce_ib": 10.41888427734375, + "ce_orig": 0.6695852279663086, + "epoch": 0.22776619455029118, + "kl_loss": 0.5112478733062744, + "loss_ib": 0.01553136296570301, + "step": 792 + }, + { + "ce_ib": 10.26807689666748, + "ce_orig": 1.4850273132324219, + "epoch": 0.22776619455029118, + "kl_loss": 0.45192694664001465, + "loss_ib": 0.014787346124649048, + "step": 792 + }, + { + "ce_ib": 13.323302268981934, + "ce_orig": 1.7503565549850464, + "epoch": 0.22776619455029118, + "kl_loss": 0.3711824417114258, + "loss_ib": 0.01703512668609619, + "step": 792 + }, + { + "ce_ib": 5.721099853515625, + "ce_orig": 0.3669746220111847, + "epoch": 0.22776619455029118, + "kl_loss": 0.31216946244239807, + "loss_ib": 0.00884279515594244, + "step": 792 + }, + { + "ce_ib": 11.519103050231934, + "ce_orig": 0.9824369549751282, + "epoch": 0.22805377812926883, + "kl_loss": 0.3825758397579193, + "loss_ib": 0.015344860963523388, + "step": 793 + }, + { + "ce_ib": 6.631891250610352, + "ce_orig": 0.6841241121292114, + "epoch": 0.22805377812926883, + "kl_loss": 0.3822288513183594, + "loss_ib": 0.010454179719090462, + "step": 793 + }, + { + "ce_ib": 7.766302585601807, + "ce_orig": 0.9525435566902161, + "epoch": 0.22805377812926883, + "kl_loss": 0.3503475785255432, + "loss_ib": 0.011269778944551945, + "step": 793 + }, + { + "ce_ib": 10.979907035827637, + "ce_orig": 0.8087160587310791, + "epoch": 0.22805377812926883, + "kl_loss": 0.4700503349304199, + "loss_ib": 0.015680409967899323, + "step": 793 + }, + { + "ce_ib": 4.012631416320801, + "ce_orig": 0.49861544370651245, + "epoch": 0.22834136170824645, + "kl_loss": 0.8556938171386719, + "loss_ib": 0.012569569051265717, + "step": 794 + }, + { + "ce_ib": 8.279598236083984, + "ce_orig": 0.13107673823833466, + "epoch": 0.22834136170824645, + "kl_loss": 0.8502056002616882, + "loss_ib": 0.016781654208898544, + "step": 794 + }, + { + "ce_ib": 7.012378215789795, + "ce_orig": 0.4306791424751282, + "epoch": 0.22834136170824645, + "kl_loss": 0.317436158657074, + "loss_ib": 0.010186740197241306, + "step": 794 + }, + { + "ce_ib": 8.948698043823242, + "ce_orig": 0.9477734565734863, + "epoch": 0.22834136170824645, + "kl_loss": 0.6815387606620789, + "loss_ib": 0.015764085575938225, + "step": 794 + }, + { + "epoch": 0.2286289452872241, + "grad_norm": 0.11332917958498001, + "learning_rate": 9.945740112556433e-06, + "loss": 0.8909, + "step": 795 + }, + { + "ce_ib": 9.626729965209961, + "ce_orig": 0.807258665561676, + "epoch": 0.2286289452872241, + "kl_loss": 0.30290764570236206, + "loss_ib": 0.012655805796384811, + "step": 795 + }, + { + "ce_ib": 8.031062126159668, + "ce_orig": 0.6332983374595642, + "epoch": 0.2286289452872241, + "kl_loss": 0.4431205987930298, + "loss_ib": 0.012462267652153969, + "step": 795 + }, + { + "ce_ib": 6.183389663696289, + "ce_orig": 0.3490990400314331, + "epoch": 0.2286289452872241, + "kl_loss": 0.2828608751296997, + "loss_ib": 0.009011998772621155, + "step": 795 + }, + { + "ce_ib": 10.629033088684082, + "ce_orig": 1.2572027444839478, + "epoch": 0.2286289452872241, + "kl_loss": 0.2958335280418396, + "loss_ib": 0.013587366789579391, + "step": 795 + }, + { + "ce_ib": 12.632726669311523, + "ce_orig": 1.813147783279419, + "epoch": 0.22891652886620173, + "kl_loss": 0.34517595171928406, + "loss_ib": 0.01608448661863804, + "step": 796 + }, + { + "ce_ib": 9.120535850524902, + "ce_orig": 1.1200886964797974, + "epoch": 0.22891652886620173, + "kl_loss": 0.36460253596305847, + "loss_ib": 0.012766561470925808, + "step": 796 + }, + { + "ce_ib": 8.088251113891602, + "ce_orig": 0.8303477168083191, + "epoch": 0.22891652886620173, + "kl_loss": 0.3226301074028015, + "loss_ib": 0.01131455134600401, + "step": 796 + }, + { + "ce_ib": 10.398566246032715, + "ce_orig": 0.7473430633544922, + "epoch": 0.22891652886620173, + "kl_loss": 0.40119439363479614, + "loss_ib": 0.01441050972789526, + "step": 796 + }, + { + "ce_ib": 7.080221176147461, + "ce_orig": 0.7931245565414429, + "epoch": 0.22920411244517938, + "kl_loss": 0.29053109884262085, + "loss_ib": 0.00998553168028593, + "step": 797 + }, + { + "ce_ib": 7.256488800048828, + "ce_orig": 0.8034987449645996, + "epoch": 0.22920411244517938, + "kl_loss": 0.3614344596862793, + "loss_ib": 0.010870832949876785, + "step": 797 + }, + { + "ce_ib": 10.867740631103516, + "ce_orig": 0.8990861177444458, + "epoch": 0.22920411244517938, + "kl_loss": 0.46362748742103577, + "loss_ib": 0.015504015609622002, + "step": 797 + }, + { + "ce_ib": 9.055109977722168, + "ce_orig": 0.9181981086730957, + "epoch": 0.22920411244517938, + "kl_loss": 0.4794706106185913, + "loss_ib": 0.013849816285073757, + "step": 797 + }, + { + "ce_ib": 7.544148921966553, + "ce_orig": 0.6566915512084961, + "epoch": 0.22949169602415703, + "kl_loss": 0.30486202239990234, + "loss_ib": 0.010592768900096416, + "step": 798 + }, + { + "ce_ib": 5.6905059814453125, + "ce_orig": 0.8550714254379272, + "epoch": 0.22949169602415703, + "kl_loss": 0.45504266023635864, + "loss_ib": 0.010240932926535606, + "step": 798 + }, + { + "ce_ib": 6.176849842071533, + "ce_orig": 0.6315608024597168, + "epoch": 0.22949169602415703, + "kl_loss": 0.3964434266090393, + "loss_ib": 0.010141284205019474, + "step": 798 + }, + { + "ce_ib": 4.630974769592285, + "ce_orig": 0.44450247287750244, + "epoch": 0.22949169602415703, + "kl_loss": 0.36748573184013367, + "loss_ib": 0.00830583181232214, + "step": 798 + }, + { + "ce_ib": 11.174190521240234, + "ce_orig": 1.3261359930038452, + "epoch": 0.22977927960313466, + "kl_loss": 0.2634417414665222, + "loss_ib": 0.013808608055114746, + "step": 799 + }, + { + "ce_ib": 8.188764572143555, + "ce_orig": 0.5794906616210938, + "epoch": 0.22977927960313466, + "kl_loss": 0.40460944175720215, + "loss_ib": 0.01223485916852951, + "step": 799 + }, + { + "ce_ib": 8.544333457946777, + "ce_orig": 0.705375075340271, + "epoch": 0.22977927960313466, + "kl_loss": 0.3461841940879822, + "loss_ib": 0.012006175704300404, + "step": 799 + }, + { + "ce_ib": 10.321372985839844, + "ce_orig": 0.9484913945198059, + "epoch": 0.22977927960313466, + "kl_loss": 0.343988835811615, + "loss_ib": 0.01376126054674387, + "step": 799 + }, + { + "epoch": 0.2300668631821123, + "grad_norm": 0.08445550501346588, + "learning_rate": 9.94459391243453e-06, + "loss": 0.8778, + "step": 800 + }, + { + "ce_ib": 7.361359119415283, + "ce_orig": 0.678317129611969, + "epoch": 0.2300668631821123, + "kl_loss": 0.6545436382293701, + "loss_ib": 0.013906795531511307, + "step": 800 + }, + { + "ce_ib": 9.871505737304688, + "ce_orig": 1.0527350902557373, + "epoch": 0.2300668631821123, + "kl_loss": 0.6774921417236328, + "loss_ib": 0.016646428033709526, + "step": 800 + }, + { + "ce_ib": 10.61447525024414, + "ce_orig": 1.2886981964111328, + "epoch": 0.2300668631821123, + "kl_loss": 0.32457613945007324, + "loss_ib": 0.013860235922038555, + "step": 800 + }, + { + "ce_ib": 12.351025581359863, + "ce_orig": 1.449278712272644, + "epoch": 0.2300668631821123, + "kl_loss": 0.3870459496974945, + "loss_ib": 0.016221484169363976, + "step": 800 + }, + { + "ce_ib": 6.113245964050293, + "ce_orig": 0.49481338262557983, + "epoch": 0.23035444676108993, + "kl_loss": 0.31978094577789307, + "loss_ib": 0.009311055764555931, + "step": 801 + }, + { + "ce_ib": 9.732067108154297, + "ce_orig": 1.0109401941299438, + "epoch": 0.23035444676108993, + "kl_loss": 0.37637659907341003, + "loss_ib": 0.013495832681655884, + "step": 801 + }, + { + "ce_ib": 7.419297695159912, + "ce_orig": 0.680454671382904, + "epoch": 0.23035444676108993, + "kl_loss": 0.3423839509487152, + "loss_ib": 0.01084313727915287, + "step": 801 + }, + { + "ce_ib": 7.179284572601318, + "ce_orig": 0.8883647322654724, + "epoch": 0.23035444676108993, + "kl_loss": 0.3522745966911316, + "loss_ib": 0.01070203073322773, + "step": 801 + }, + { + "ce_ib": 12.911877632141113, + "ce_orig": 0.9866081476211548, + "epoch": 0.23064203034006758, + "kl_loss": 0.293899267911911, + "loss_ib": 0.015850869938731194, + "step": 802 + }, + { + "ce_ib": 10.869671821594238, + "ce_orig": 1.2515939474105835, + "epoch": 0.23064203034006758, + "kl_loss": 0.6851050853729248, + "loss_ib": 0.017720723524689674, + "step": 802 + }, + { + "ce_ib": 7.759640216827393, + "ce_orig": 0.6238282918930054, + "epoch": 0.23064203034006758, + "kl_loss": 0.36549922823905945, + "loss_ib": 0.011414632201194763, + "step": 802 + }, + { + "ce_ib": 3.205626964569092, + "ce_orig": 0.13584905862808228, + "epoch": 0.23064203034006758, + "kl_loss": 0.7906656265258789, + "loss_ib": 0.011112282983958721, + "step": 802 + }, + { + "ce_ib": 12.389301300048828, + "ce_orig": 1.3402420282363892, + "epoch": 0.23092961391904523, + "kl_loss": 0.35545194149017334, + "loss_ib": 0.015943819656968117, + "step": 803 + }, + { + "ce_ib": 10.47632122039795, + "ce_orig": 0.7908617854118347, + "epoch": 0.23092961391904523, + "kl_loss": 0.31981128454208374, + "loss_ib": 0.013674433343112469, + "step": 803 + }, + { + "ce_ib": 9.429079055786133, + "ce_orig": 1.371010661125183, + "epoch": 0.23092961391904523, + "kl_loss": 0.6973379850387573, + "loss_ib": 0.016402458772063255, + "step": 803 + }, + { + "ce_ib": 7.480784893035889, + "ce_orig": 0.8337413668632507, + "epoch": 0.23092961391904523, + "kl_loss": 0.3528236150741577, + "loss_ib": 0.01100902073085308, + "step": 803 + }, + { + "ce_ib": 5.842719078063965, + "ce_orig": 0.5573224425315857, + "epoch": 0.23121719749802286, + "kl_loss": 0.2670140862464905, + "loss_ib": 0.008512860164046288, + "step": 804 + }, + { + "ce_ib": 7.656364917755127, + "ce_orig": 0.7828370928764343, + "epoch": 0.23121719749802286, + "kl_loss": 0.2876054346561432, + "loss_ib": 0.010532419197261333, + "step": 804 + }, + { + "ce_ib": 4.491579532623291, + "ce_orig": 0.33045491576194763, + "epoch": 0.23121719749802286, + "kl_loss": 0.6809794902801514, + "loss_ib": 0.011301374062895775, + "step": 804 + }, + { + "ce_ib": 9.232762336730957, + "ce_orig": 0.7977848649024963, + "epoch": 0.23121719749802286, + "kl_loss": 0.2893211245536804, + "loss_ib": 0.012125973589718342, + "step": 804 + }, + { + "epoch": 0.2315047810770005, + "grad_norm": 0.09778746962547302, + "learning_rate": 9.943435799638226e-06, + "loss": 0.9126, + "step": 805 + }, + { + "ce_ib": 10.316106796264648, + "ce_orig": 1.1037230491638184, + "epoch": 0.2315047810770005, + "kl_loss": 0.38173002004623413, + "loss_ib": 0.014133407734334469, + "step": 805 + }, + { + "ce_ib": 10.278280258178711, + "ce_orig": 1.2289481163024902, + "epoch": 0.2315047810770005, + "kl_loss": 0.4344818592071533, + "loss_ib": 0.014623099006712437, + "step": 805 + }, + { + "ce_ib": 10.008515357971191, + "ce_orig": 1.114085078239441, + "epoch": 0.2315047810770005, + "kl_loss": 0.4228159785270691, + "loss_ib": 0.014236673712730408, + "step": 805 + }, + { + "ce_ib": 10.21235179901123, + "ce_orig": 1.434356451034546, + "epoch": 0.2315047810770005, + "kl_loss": 0.26860710978507996, + "loss_ib": 0.012898423708975315, + "step": 805 + }, + { + "ce_ib": 9.20308780670166, + "ce_orig": 0.6685881018638611, + "epoch": 0.23179236465597813, + "kl_loss": 0.35367828607559204, + "loss_ib": 0.012739870697259903, + "step": 806 + }, + { + "ce_ib": 12.784605026245117, + "ce_orig": 1.4579914808273315, + "epoch": 0.23179236465597813, + "kl_loss": 0.34686392545700073, + "loss_ib": 0.016253244131803513, + "step": 806 + }, + { + "ce_ib": 11.806829452514648, + "ce_orig": 1.5088831186294556, + "epoch": 0.23179236465597813, + "kl_loss": 0.28930962085723877, + "loss_ib": 0.014699925668537617, + "step": 806 + }, + { + "ce_ib": 9.593900680541992, + "ce_orig": 1.328742265701294, + "epoch": 0.23179236465597813, + "kl_loss": 0.33179858326911926, + "loss_ib": 0.012911886908113956, + "step": 806 + }, + { + "ce_ib": 9.164833068847656, + "ce_orig": 1.269349455833435, + "epoch": 0.23207994823495579, + "kl_loss": 0.3544562757015228, + "loss_ib": 0.012709395959973335, + "step": 807 + }, + { + "ce_ib": 7.470652103424072, + "ce_orig": 0.6890314817428589, + "epoch": 0.23207994823495579, + "kl_loss": 0.3289049565792084, + "loss_ib": 0.010759701952338219, + "step": 807 + }, + { + "ce_ib": 9.631521224975586, + "ce_orig": 1.2519800662994385, + "epoch": 0.23207994823495579, + "kl_loss": 0.2588420510292053, + "loss_ib": 0.01221994124352932, + "step": 807 + }, + { + "ce_ib": 8.615235328674316, + "ce_orig": 0.5310425162315369, + "epoch": 0.23207994823495579, + "kl_loss": 0.46589159965515137, + "loss_ib": 0.01327415183186531, + "step": 807 + }, + { + "ce_ib": 7.7025861740112305, + "ce_orig": 0.8470107913017273, + "epoch": 0.23236753181393344, + "kl_loss": 0.3353464603424072, + "loss_ib": 0.011056050658226013, + "step": 808 + }, + { + "ce_ib": 10.234197616577148, + "ce_orig": 1.0393426418304443, + "epoch": 0.23236753181393344, + "kl_loss": 0.49282306432724, + "loss_ib": 0.015162426978349686, + "step": 808 + }, + { + "ce_ib": 8.97604751586914, + "ce_orig": 0.8064647316932678, + "epoch": 0.23236753181393344, + "kl_loss": 0.4205772876739502, + "loss_ib": 0.013181819580495358, + "step": 808 + }, + { + "ce_ib": 10.455026626586914, + "ce_orig": 1.1835849285125732, + "epoch": 0.23236753181393344, + "kl_loss": 0.3391731381416321, + "loss_ib": 0.013846756890416145, + "step": 808 + }, + { + "ce_ib": 6.682322025299072, + "ce_orig": 0.7806753516197205, + "epoch": 0.23265511539291106, + "kl_loss": 0.2750164270401001, + "loss_ib": 0.00943248625844717, + "step": 809 + }, + { + "ce_ib": 7.129208087921143, + "ce_orig": 0.5480561256408691, + "epoch": 0.23265511539291106, + "kl_loss": 0.4469287395477295, + "loss_ib": 0.0115984957665205, + "step": 809 + }, + { + "ce_ib": 13.105687141418457, + "ce_orig": 0.685859739780426, + "epoch": 0.23265511539291106, + "kl_loss": 0.27275753021240234, + "loss_ib": 0.015833262354135513, + "step": 809 + }, + { + "ce_ib": 9.026814460754395, + "ce_orig": 1.057628870010376, + "epoch": 0.23265511539291106, + "kl_loss": 0.3957173228263855, + "loss_ib": 0.012983987107872963, + "step": 809 + }, + { + "epoch": 0.2329426989718887, + "grad_norm": 0.10287559032440186, + "learning_rate": 9.942265776957687e-06, + "loss": 0.9129, + "step": 810 + }, + { + "ce_ib": 9.951510429382324, + "ce_orig": 1.1167075634002686, + "epoch": 0.2329426989718887, + "kl_loss": 0.3010333776473999, + "loss_ib": 0.012961843982338905, + "step": 810 + }, + { + "ce_ib": 10.840296745300293, + "ce_orig": 1.0002007484436035, + "epoch": 0.2329426989718887, + "kl_loss": 0.38143882155418396, + "loss_ib": 0.01465468481183052, + "step": 810 + }, + { + "ce_ib": 7.044186592102051, + "ce_orig": 0.799129068851471, + "epoch": 0.2329426989718887, + "kl_loss": 0.3452165424823761, + "loss_ib": 0.0104963518679142, + "step": 810 + }, + { + "ce_ib": 13.050943374633789, + "ce_orig": 1.4166967868804932, + "epoch": 0.2329426989718887, + "kl_loss": 0.3720596432685852, + "loss_ib": 0.01677154004573822, + "step": 810 + }, + { + "ce_ib": 6.821801662445068, + "ce_orig": 0.7074012756347656, + "epoch": 0.23323028255086634, + "kl_loss": 0.3542909622192383, + "loss_ib": 0.010364711284637451, + "step": 811 + }, + { + "ce_ib": 8.195531845092773, + "ce_orig": 1.0204046964645386, + "epoch": 0.23323028255086634, + "kl_loss": 0.28537851572036743, + "loss_ib": 0.011049317196011543, + "step": 811 + }, + { + "ce_ib": 6.7708001136779785, + "ce_orig": 0.5408430695533752, + "epoch": 0.23323028255086634, + "kl_loss": 0.4277627468109131, + "loss_ib": 0.011048427782952785, + "step": 811 + }, + { + "ce_ib": 10.415853500366211, + "ce_orig": 0.7485983371734619, + "epoch": 0.23323028255086634, + "kl_loss": 0.27399230003356934, + "loss_ib": 0.01315577607601881, + "step": 811 + }, + { + "ce_ib": 11.759166717529297, + "ce_orig": 1.3388240337371826, + "epoch": 0.233517866129844, + "kl_loss": 0.3040698766708374, + "loss_ib": 0.014799864962697029, + "step": 812 + }, + { + "ce_ib": 7.798691272735596, + "ce_orig": 1.0542670488357544, + "epoch": 0.233517866129844, + "kl_loss": 0.3233657777309418, + "loss_ib": 0.01103234849870205, + "step": 812 + }, + { + "ce_ib": 14.092377662658691, + "ce_orig": 1.9352741241455078, + "epoch": 0.233517866129844, + "kl_loss": 0.4481472373008728, + "loss_ib": 0.01857384853065014, + "step": 812 + }, + { + "ce_ib": 6.820461750030518, + "ce_orig": 0.4188855290412903, + "epoch": 0.233517866129844, + "kl_loss": 0.2712195813655853, + "loss_ib": 0.009532657451927662, + "step": 812 + }, + { + "ce_ib": 10.21103572845459, + "ce_orig": 1.5039421319961548, + "epoch": 0.23380544970882164, + "kl_loss": 0.3258100152015686, + "loss_ib": 0.013469135388731956, + "step": 813 + }, + { + "ce_ib": 8.036877632141113, + "ce_orig": 0.608624279499054, + "epoch": 0.23380544970882164, + "kl_loss": 0.2845733165740967, + "loss_ib": 0.010882611386477947, + "step": 813 + }, + { + "ce_ib": 10.804905891418457, + "ce_orig": 1.0994369983673096, + "epoch": 0.23380544970882164, + "kl_loss": 0.2775050699710846, + "loss_ib": 0.013579956255853176, + "step": 813 + }, + { + "ce_ib": 6.5864362716674805, + "ce_orig": 0.5708433985710144, + "epoch": 0.23380544970882164, + "kl_loss": 0.2514118552207947, + "loss_ib": 0.009100555442273617, + "step": 813 + }, + { + "ce_ib": 7.656818866729736, + "ce_orig": 0.9114794731140137, + "epoch": 0.23409303328779926, + "kl_loss": 0.28084778785705566, + "loss_ib": 0.010465297847986221, + "step": 814 + }, + { + "ce_ib": 7.377896308898926, + "ce_orig": 0.9978185296058655, + "epoch": 0.23409303328779926, + "kl_loss": 0.2847989797592163, + "loss_ib": 0.010225885547697544, + "step": 814 + }, + { + "ce_ib": 6.432290077209473, + "ce_orig": 0.6785269379615784, + "epoch": 0.23409303328779926, + "kl_loss": 0.30970633029937744, + "loss_ib": 0.009529353119432926, + "step": 814 + }, + { + "ce_ib": 8.577414512634277, + "ce_orig": 0.6792881488800049, + "epoch": 0.23409303328779926, + "kl_loss": 0.3012913465499878, + "loss_ib": 0.011590328067541122, + "step": 814 + }, + { + "epoch": 0.23438061686677691, + "grad_norm": 0.10755941271781921, + "learning_rate": 9.941083847211765e-06, + "loss": 0.9294, + "step": 815 + }, + { + "ce_ib": 7.941585540771484, + "ce_orig": 0.5222452878952026, + "epoch": 0.23438061686677691, + "kl_loss": 0.3275793194770813, + "loss_ib": 0.01121737901121378, + "step": 815 + }, + { + "ce_ib": 11.759594917297363, + "ce_orig": 0.684937596321106, + "epoch": 0.23438061686677691, + "kl_loss": 0.3714814782142639, + "loss_ib": 0.01547440979629755, + "step": 815 + }, + { + "ce_ib": 8.328024864196777, + "ce_orig": 0.5521450042724609, + "epoch": 0.23438061686677691, + "kl_loss": 0.3888009488582611, + "loss_ib": 0.012216033414006233, + "step": 815 + }, + { + "ce_ib": 6.953820705413818, + "ce_orig": 0.537897527217865, + "epoch": 0.23438061686677691, + "kl_loss": 0.2841954827308655, + "loss_ib": 0.009795775637030602, + "step": 815 + }, + { + "ce_ib": 6.938723564147949, + "ce_orig": 1.0933799743652344, + "epoch": 0.23466820044575454, + "kl_loss": 0.2745826840400696, + "loss_ib": 0.009684550575911999, + "step": 816 + }, + { + "ce_ib": 7.015166759490967, + "ce_orig": 1.0554184913635254, + "epoch": 0.23466820044575454, + "kl_loss": 0.24745316803455353, + "loss_ib": 0.009489698335528374, + "step": 816 + }, + { + "ce_ib": 7.070734977722168, + "ce_orig": 0.6463883519172668, + "epoch": 0.23466820044575454, + "kl_loss": 0.3349419832229614, + "loss_ib": 0.010420155711472034, + "step": 816 + }, + { + "ce_ib": 11.36481761932373, + "ce_orig": 1.2426626682281494, + "epoch": 0.23466820044575454, + "kl_loss": 0.35446441173553467, + "loss_ib": 0.01490946114063263, + "step": 816 + }, + { + "ce_ib": 7.427221298217773, + "ce_orig": 0.8329726457595825, + "epoch": 0.2349557840247322, + "kl_loss": 0.34363722801208496, + "loss_ib": 0.0108635937795043, + "step": 817 + }, + { + "ce_ib": 5.901673316955566, + "ce_orig": 0.7341867685317993, + "epoch": 0.2349557840247322, + "kl_loss": 0.27652859687805176, + "loss_ib": 0.008666959591209888, + "step": 817 + }, + { + "ce_ib": 7.916401386260986, + "ce_orig": 0.6233932971954346, + "epoch": 0.2349557840247322, + "kl_loss": 0.38233524560928345, + "loss_ib": 0.011739754118025303, + "step": 817 + }, + { + "ce_ib": 6.952549934387207, + "ce_orig": 0.4051516354084015, + "epoch": 0.2349557840247322, + "kl_loss": 0.7161735892295837, + "loss_ib": 0.014114285819232464, + "step": 817 + }, + { + "ce_ib": 8.112340927124023, + "ce_orig": 0.41056379675865173, + "epoch": 0.23524336760370984, + "kl_loss": 0.4498043656349182, + "loss_ib": 0.012610385194420815, + "step": 818 + }, + { + "ce_ib": 7.351808547973633, + "ce_orig": 0.6276510953903198, + "epoch": 0.23524336760370984, + "kl_loss": 0.3949786424636841, + "loss_ib": 0.011301594786345959, + "step": 818 + }, + { + "ce_ib": 8.169877052307129, + "ce_orig": 0.9647888541221619, + "epoch": 0.23524336760370984, + "kl_loss": 0.32170844078063965, + "loss_ib": 0.011386961676180363, + "step": 818 + }, + { + "ce_ib": 10.368202209472656, + "ce_orig": 1.1499764919281006, + "epoch": 0.23524336760370984, + "kl_loss": 0.47000110149383545, + "loss_ib": 0.01506821345537901, + "step": 818 + }, + { + "ce_ib": 10.158099174499512, + "ce_orig": 1.1036230325698853, + "epoch": 0.23553095118268746, + "kl_loss": 0.42805948853492737, + "loss_ib": 0.014438693411648273, + "step": 819 + }, + { + "ce_ib": 9.477944374084473, + "ce_orig": 1.0577062368392944, + "epoch": 0.23553095118268746, + "kl_loss": 0.26351502537727356, + "loss_ib": 0.012113094329833984, + "step": 819 + }, + { + "ce_ib": 3.1393284797668457, + "ce_orig": 0.1595279723405838, + "epoch": 0.23553095118268746, + "kl_loss": 0.6530660390853882, + "loss_ib": 0.009669989347457886, + "step": 819 + }, + { + "ce_ib": 10.7774658203125, + "ce_orig": 1.2825448513031006, + "epoch": 0.23553095118268746, + "kl_loss": 0.6769564151763916, + "loss_ib": 0.01754703000187874, + "step": 819 + }, + { + "epoch": 0.23581853476166512, + "grad_norm": 0.10050812363624573, + "learning_rate": 9.939890013248006e-06, + "loss": 0.8356, + "step": 820 + }, + { + "ce_ib": 6.623199939727783, + "ce_orig": 0.5048424601554871, + "epoch": 0.23581853476166512, + "kl_loss": 0.32067549228668213, + "loss_ib": 0.00982995517551899, + "step": 820 + }, + { + "ce_ib": 9.443016052246094, + "ce_orig": 1.0931298732757568, + "epoch": 0.23581853476166512, + "kl_loss": 0.25568336248397827, + "loss_ib": 0.011999850161373615, + "step": 820 + }, + { + "ce_ib": 7.976568222045898, + "ce_orig": 0.669291079044342, + "epoch": 0.23581853476166512, + "kl_loss": 0.42081308364868164, + "loss_ib": 0.01218469813466072, + "step": 820 + }, + { + "ce_ib": 4.948537349700928, + "ce_orig": 0.4827899932861328, + "epoch": 0.23581853476166512, + "kl_loss": 0.25315725803375244, + "loss_ib": 0.007480109576135874, + "step": 820 + }, + { + "ce_ib": 14.804322242736816, + "ce_orig": 1.9942870140075684, + "epoch": 0.23610611834064274, + "kl_loss": 0.43627458810806274, + "loss_ib": 0.019167067483067513, + "step": 821 + }, + { + "ce_ib": 4.713957786560059, + "ce_orig": 0.42061755061149597, + "epoch": 0.23610611834064274, + "kl_loss": 0.3404502272605896, + "loss_ib": 0.008118459954857826, + "step": 821 + }, + { + "ce_ib": 9.87876033782959, + "ce_orig": 0.6623562574386597, + "epoch": 0.23610611834064274, + "kl_loss": 0.37182098627090454, + "loss_ib": 0.013596970587968826, + "step": 821 + }, + { + "ce_ib": 7.073148727416992, + "ce_orig": 0.8113523125648499, + "epoch": 0.23610611834064274, + "kl_loss": 0.3398459553718567, + "loss_ib": 0.010471608489751816, + "step": 821 + }, + { + "ce_ib": 7.666951656341553, + "ce_orig": 0.7712521553039551, + "epoch": 0.2363937019196204, + "kl_loss": 0.34804296493530273, + "loss_ib": 0.011147381737828255, + "step": 822 + }, + { + "ce_ib": 6.349161624908447, + "ce_orig": 0.7387241125106812, + "epoch": 0.2363937019196204, + "kl_loss": 0.28054261207580566, + "loss_ib": 0.009154587984085083, + "step": 822 + }, + { + "ce_ib": 5.58187198638916, + "ce_orig": 0.5994656085968018, + "epoch": 0.2363937019196204, + "kl_loss": 0.31460386514663696, + "loss_ib": 0.008727909997105598, + "step": 822 + }, + { + "ce_ib": 7.449942111968994, + "ce_orig": 0.5919069647789001, + "epoch": 0.2363937019196204, + "kl_loss": 0.2681111693382263, + "loss_ib": 0.010131053626537323, + "step": 822 + }, + { + "ce_ib": 13.98037052154541, + "ce_orig": 0.6386498808860779, + "epoch": 0.23668128549859804, + "kl_loss": 0.44783568382263184, + "loss_ib": 0.01845872774720192, + "step": 823 + }, + { + "ce_ib": 5.139134883880615, + "ce_orig": 0.7114477157592773, + "epoch": 0.23668128549859804, + "kl_loss": 0.27849745750427246, + "loss_ib": 0.007924109697341919, + "step": 823 + }, + { + "ce_ib": 6.715068340301514, + "ce_orig": 0.5276003479957581, + "epoch": 0.23668128549859804, + "kl_loss": 0.384267121553421, + "loss_ib": 0.010557739064097404, + "step": 823 + }, + { + "ce_ib": 7.8920793533325195, + "ce_orig": 0.8194569945335388, + "epoch": 0.23668128549859804, + "kl_loss": 0.2780001163482666, + "loss_ib": 0.01067208033055067, + "step": 823 + }, + { + "ce_ib": 8.298215866088867, + "ce_orig": 0.4189370572566986, + "epoch": 0.23696886907757567, + "kl_loss": 0.4898415207862854, + "loss_ib": 0.013196630403399467, + "step": 824 + }, + { + "ce_ib": 7.782186508178711, + "ce_orig": 0.4692075252532959, + "epoch": 0.23696886907757567, + "kl_loss": 0.4824924170970917, + "loss_ib": 0.012607110664248466, + "step": 824 + }, + { + "ce_ib": 9.8721923828125, + "ce_orig": 1.1827633380889893, + "epoch": 0.23696886907757567, + "kl_loss": 0.2802169919013977, + "loss_ib": 0.012674362398684025, + "step": 824 + }, + { + "ce_ib": 9.907919883728027, + "ce_orig": 1.1879762411117554, + "epoch": 0.23696886907757567, + "kl_loss": 0.41324368119239807, + "loss_ib": 0.014040356501936913, + "step": 824 + }, + { + "epoch": 0.23725645265655332, + "grad_norm": 0.10062714666128159, + "learning_rate": 9.938684277942631e-06, + "loss": 0.8766, + "step": 825 + }, + { + "ce_ib": 8.782609939575195, + "ce_orig": 1.2377029657363892, + "epoch": 0.23725645265655332, + "kl_loss": 0.3123038113117218, + "loss_ib": 0.011905648745596409, + "step": 825 + }, + { + "ce_ib": 6.110267639160156, + "ce_orig": 0.6393804550170898, + "epoch": 0.23725645265655332, + "kl_loss": 0.33634820580482483, + "loss_ib": 0.009473749436438084, + "step": 825 + }, + { + "ce_ib": 9.359816551208496, + "ce_orig": 1.2528795003890991, + "epoch": 0.23725645265655332, + "kl_loss": 0.28830617666244507, + "loss_ib": 0.012242878787219524, + "step": 825 + }, + { + "ce_ib": 9.707563400268555, + "ce_orig": 0.8692981004714966, + "epoch": 0.23725645265655332, + "kl_loss": 0.5571379661560059, + "loss_ib": 0.015278941951692104, + "step": 825 + }, + { + "ce_ib": 9.498275756835938, + "ce_orig": 1.071899652481079, + "epoch": 0.23754403623553094, + "kl_loss": 0.2672438621520996, + "loss_ib": 0.012170715257525444, + "step": 826 + }, + { + "ce_ib": 9.843942642211914, + "ce_orig": 0.9619124531745911, + "epoch": 0.23754403623553094, + "kl_loss": 0.2982301712036133, + "loss_ib": 0.012826244346797466, + "step": 826 + }, + { + "ce_ib": 10.634578704833984, + "ce_orig": 1.325036644935608, + "epoch": 0.23754403623553094, + "kl_loss": 0.32248958945274353, + "loss_ib": 0.01385947410017252, + "step": 826 + }, + { + "ce_ib": 12.585394859313965, + "ce_orig": 1.4135090112686157, + "epoch": 0.23754403623553094, + "kl_loss": 0.46502685546875, + "loss_ib": 0.017235664650797844, + "step": 826 + }, + { + "ce_ib": 10.126785278320312, + "ce_orig": 1.5693209171295166, + "epoch": 0.2378316198145086, + "kl_loss": 0.2683885097503662, + "loss_ib": 0.012810669839382172, + "step": 827 + }, + { + "ce_ib": 6.727275848388672, + "ce_orig": 0.9106936454772949, + "epoch": 0.2378316198145086, + "kl_loss": 0.2777571678161621, + "loss_ib": 0.009504847228527069, + "step": 827 + }, + { + "ce_ib": 8.126458168029785, + "ce_orig": 0.8495746850967407, + "epoch": 0.2378316198145086, + "kl_loss": 0.26818782091140747, + "loss_ib": 0.010808336548507214, + "step": 827 + }, + { + "ce_ib": 10.300277709960938, + "ce_orig": 1.3873240947723389, + "epoch": 0.2378316198145086, + "kl_loss": 0.41793563961982727, + "loss_ib": 0.014479633420705795, + "step": 827 + }, + { + "ce_ib": 12.66947078704834, + "ce_orig": 1.5648332834243774, + "epoch": 0.23811920339348625, + "kl_loss": 0.3042876422405243, + "loss_ib": 0.015712348744273186, + "step": 828 + }, + { + "ce_ib": 8.798270225524902, + "ce_orig": 0.920973539352417, + "epoch": 0.23811920339348625, + "kl_loss": 0.34468623995780945, + "loss_ib": 0.012245132587850094, + "step": 828 + }, + { + "ce_ib": 8.365591049194336, + "ce_orig": 0.5817134976387024, + "epoch": 0.23811920339348625, + "kl_loss": 0.39118778705596924, + "loss_ib": 0.012277469038963318, + "step": 828 + }, + { + "ce_ib": 11.80969524383545, + "ce_orig": 1.7373600006103516, + "epoch": 0.23811920339348625, + "kl_loss": 0.3660210371017456, + "loss_ib": 0.01546990592032671, + "step": 828 + }, + { + "ce_ib": 7.223691463470459, + "ce_orig": 0.9051380157470703, + "epoch": 0.23840678697246387, + "kl_loss": 0.2608543038368225, + "loss_ib": 0.009832234121859074, + "step": 829 + }, + { + "ce_ib": 6.39984130859375, + "ce_orig": 0.6907777190208435, + "epoch": 0.23840678697246387, + "kl_loss": 0.3057895600795746, + "loss_ib": 0.00945773720741272, + "step": 829 + }, + { + "ce_ib": 5.409855365753174, + "ce_orig": 0.7622098326683044, + "epoch": 0.23840678697246387, + "kl_loss": 0.24784672260284424, + "loss_ib": 0.007888322696089745, + "step": 829 + }, + { + "ce_ib": 8.38547134399414, + "ce_orig": 0.7260690331459045, + "epoch": 0.23840678697246387, + "kl_loss": 0.29803162813186646, + "loss_ib": 0.01136578805744648, + "step": 829 + }, + { + "epoch": 0.23869437055144152, + "grad_norm": 0.11168427765369415, + "learning_rate": 9.93746664420054e-06, + "loss": 0.9104, + "step": 830 + }, + { + "ce_ib": 5.065134048461914, + "ce_orig": 0.8026469349861145, + "epoch": 0.23869437055144152, + "kl_loss": 0.278666615486145, + "loss_ib": 0.007851799950003624, + "step": 830 + }, + { + "ce_ib": 13.80343246459961, + "ce_orig": 0.9906110167503357, + "epoch": 0.23869437055144152, + "kl_loss": 0.3099890649318695, + "loss_ib": 0.01690332405269146, + "step": 830 + }, + { + "ce_ib": 8.821106910705566, + "ce_orig": 1.093506932258606, + "epoch": 0.23869437055144152, + "kl_loss": 0.3800942599773407, + "loss_ib": 0.012622050009667873, + "step": 830 + }, + { + "ce_ib": 11.06147575378418, + "ce_orig": 1.1915507316589355, + "epoch": 0.23869437055144152, + "kl_loss": 0.3803118169307709, + "loss_ib": 0.014864594675600529, + "step": 830 + }, + { + "ce_ib": 6.217090129852295, + "ce_orig": 0.4522053599357605, + "epoch": 0.23898195413041914, + "kl_loss": 0.3387228846549988, + "loss_ib": 0.009604318998754025, + "step": 831 + }, + { + "ce_ib": 8.78792953491211, + "ce_orig": 0.818161129951477, + "epoch": 0.23898195413041914, + "kl_loss": 0.3240455389022827, + "loss_ib": 0.012028384022414684, + "step": 831 + }, + { + "ce_ib": 9.777087211608887, + "ce_orig": 0.9559274315834045, + "epoch": 0.23898195413041914, + "kl_loss": 0.305178701877594, + "loss_ib": 0.01282887440174818, + "step": 831 + }, + { + "ce_ib": 8.793158531188965, + "ce_orig": 1.7135603427886963, + "epoch": 0.23898195413041914, + "kl_loss": 0.35992008447647095, + "loss_ib": 0.012392358854413033, + "step": 831 + }, + { + "ce_ib": 14.298954963684082, + "ce_orig": 1.9766751527786255, + "epoch": 0.2392695377093968, + "kl_loss": 0.37456846237182617, + "loss_ib": 0.018044639378786087, + "step": 832 + }, + { + "ce_ib": 5.9418816566467285, + "ce_orig": 0.7225015163421631, + "epoch": 0.2392695377093968, + "kl_loss": 0.35724693536758423, + "loss_ib": 0.009514350444078445, + "step": 832 + }, + { + "ce_ib": 8.032739639282227, + "ce_orig": 0.6944239139556885, + "epoch": 0.2392695377093968, + "kl_loss": 0.35457563400268555, + "loss_ib": 0.011578495614230633, + "step": 832 + }, + { + "ce_ib": 9.528278350830078, + "ce_orig": 1.3547769784927368, + "epoch": 0.2392695377093968, + "kl_loss": 0.4733262062072754, + "loss_ib": 0.014261540956795216, + "step": 832 + }, + { + "ce_ib": 10.221992492675781, + "ce_orig": 0.9856470227241516, + "epoch": 0.23955712128837445, + "kl_loss": 0.33555009961128235, + "loss_ib": 0.013577492907643318, + "step": 833 + }, + { + "ce_ib": 8.804466247558594, + "ce_orig": 0.928854763507843, + "epoch": 0.23955712128837445, + "kl_loss": 0.36634519696235657, + "loss_ib": 0.012467917986214161, + "step": 833 + }, + { + "ce_ib": 6.364208221435547, + "ce_orig": 0.8164104223251343, + "epoch": 0.23955712128837445, + "kl_loss": 0.22879984974861145, + "loss_ib": 0.008652206510305405, + "step": 833 + }, + { + "ce_ib": 8.315366744995117, + "ce_orig": 0.9906771779060364, + "epoch": 0.23955712128837445, + "kl_loss": 0.4181078374385834, + "loss_ib": 0.012496445327997208, + "step": 833 + }, + { + "ce_ib": 8.126928329467773, + "ce_orig": 0.9921467304229736, + "epoch": 0.23984470486735207, + "kl_loss": 0.2800453305244446, + "loss_ib": 0.010927380993962288, + "step": 834 + }, + { + "ce_ib": 5.869344234466553, + "ce_orig": 0.8272134065628052, + "epoch": 0.23984470486735207, + "kl_loss": 0.23419909179210663, + "loss_ib": 0.00821133516728878, + "step": 834 + }, + { + "ce_ib": 10.878475189208984, + "ce_orig": 1.308764934539795, + "epoch": 0.23984470486735207, + "kl_loss": 0.3325369656085968, + "loss_ib": 0.014203844591975212, + "step": 834 + }, + { + "ce_ib": 5.4352545738220215, + "ce_orig": 0.45368334650993347, + "epoch": 0.23984470486735207, + "kl_loss": 0.6772407293319702, + "loss_ib": 0.012207661755383015, + "step": 834 + }, + { + "epoch": 0.24013228844632972, + "grad_norm": 0.09686867892742157, + "learning_rate": 9.93623711495529e-06, + "loss": 0.9213, + "step": 835 + }, + { + "ce_ib": 8.014131546020508, + "ce_orig": 1.0289490222930908, + "epoch": 0.24013228844632972, + "kl_loss": 0.3445562720298767, + "loss_ib": 0.011459693312644958, + "step": 835 + }, + { + "ce_ib": 6.29163122177124, + "ce_orig": 0.48442956805229187, + "epoch": 0.24013228844632972, + "kl_loss": 0.3220330774784088, + "loss_ib": 0.009511961601674557, + "step": 835 + }, + { + "ce_ib": 7.94074821472168, + "ce_orig": 0.7545216083526611, + "epoch": 0.24013228844632972, + "kl_loss": 0.45153874158859253, + "loss_ib": 0.0124561358243227, + "step": 835 + }, + { + "ce_ib": 5.553848743438721, + "ce_orig": 0.3156552314758301, + "epoch": 0.24013228844632972, + "kl_loss": 0.35743337869644165, + "loss_ib": 0.00912818219512701, + "step": 835 + }, + { + "ce_ib": 7.592702865600586, + "ce_orig": 0.8854877352714539, + "epoch": 0.24041987202530735, + "kl_loss": 0.28391388058662415, + "loss_ib": 0.010431841015815735, + "step": 836 + }, + { + "ce_ib": 8.959979057312012, + "ce_orig": 0.5111126899719238, + "epoch": 0.24041987202530735, + "kl_loss": 0.426180899143219, + "loss_ib": 0.013221788220107555, + "step": 836 + }, + { + "ce_ib": 12.740047454833984, + "ce_orig": 1.6768262386322021, + "epoch": 0.24041987202530735, + "kl_loss": 0.3991634249687195, + "loss_ib": 0.016731681302189827, + "step": 836 + }, + { + "ce_ib": 12.013802528381348, + "ce_orig": 1.272831916809082, + "epoch": 0.24041987202530735, + "kl_loss": 0.3409850001335144, + "loss_ib": 0.015423652715981007, + "step": 836 + }, + { + "ce_ib": 2.0301332473754883, + "ce_orig": 0.16432513296604156, + "epoch": 0.240707455604285, + "kl_loss": 0.6776133179664612, + "loss_ib": 0.008806266821920872, + "step": 837 + }, + { + "ce_ib": 10.865999221801758, + "ce_orig": 1.410832405090332, + "epoch": 0.240707455604285, + "kl_loss": 0.312138170003891, + "loss_ib": 0.013987381011247635, + "step": 837 + }, + { + "ce_ib": 6.166257858276367, + "ce_orig": 0.6295948624610901, + "epoch": 0.240707455604285, + "kl_loss": 0.36339548230171204, + "loss_ib": 0.00980021245777607, + "step": 837 + }, + { + "ce_ib": 5.043333053588867, + "ce_orig": 0.46830177307128906, + "epoch": 0.240707455604285, + "kl_loss": 0.3290286064147949, + "loss_ib": 0.008333618752658367, + "step": 837 + }, + { + "ce_ib": 9.060256004333496, + "ce_orig": 1.1709790229797363, + "epoch": 0.24099503918326265, + "kl_loss": 0.27760225534439087, + "loss_ib": 0.0118362782523036, + "step": 838 + }, + { + "ce_ib": 6.811290264129639, + "ce_orig": 0.793929934501648, + "epoch": 0.24099503918326265, + "kl_loss": 0.31319600343704224, + "loss_ib": 0.009943250566720963, + "step": 838 + }, + { + "ce_ib": 4.931931018829346, + "ce_orig": 0.36589503288269043, + "epoch": 0.24099503918326265, + "kl_loss": 0.40839433670043945, + "loss_ib": 0.009015874937176704, + "step": 838 + }, + { + "ce_ib": 6.180576324462891, + "ce_orig": 0.4309951066970825, + "epoch": 0.24099503918326265, + "kl_loss": 0.2373921424150467, + "loss_ib": 0.008554497733712196, + "step": 838 + }, + { + "ce_ib": 5.762244701385498, + "ce_orig": 0.544806957244873, + "epoch": 0.24128262276224027, + "kl_loss": 0.31401538848876953, + "loss_ib": 0.008902398869395256, + "step": 839 + }, + { + "ce_ib": 7.717389106750488, + "ce_orig": 1.2485359907150269, + "epoch": 0.24128262276224027, + "kl_loss": 0.30778759717941284, + "loss_ib": 0.010795265436172485, + "step": 839 + }, + { + "ce_ib": 7.063183784484863, + "ce_orig": 0.768622100353241, + "epoch": 0.24128262276224027, + "kl_loss": 0.3657524585723877, + "loss_ib": 0.010720708407461643, + "step": 839 + }, + { + "ce_ib": 6.140594482421875, + "ce_orig": 0.7728462815284729, + "epoch": 0.24128262276224027, + "kl_loss": 0.2510579824447632, + "loss_ib": 0.008651173673570156, + "step": 839 + }, + { + "epoch": 0.24157020634121792, + "grad_norm": 0.10076677054166794, + "learning_rate": 9.934995693169104e-06, + "loss": 0.8875, + "step": 840 + }, + { + "ce_ib": 9.007379531860352, + "ce_orig": 0.8373998403549194, + "epoch": 0.24157020634121792, + "kl_loss": 0.3071047067642212, + "loss_ib": 0.012078425846993923, + "step": 840 + }, + { + "ce_ib": 6.403738975524902, + "ce_orig": 0.7129970192909241, + "epoch": 0.24157020634121792, + "kl_loss": 0.28114748001098633, + "loss_ib": 0.009215213358402252, + "step": 840 + }, + { + "ce_ib": 7.572381973266602, + "ce_orig": 0.8134757280349731, + "epoch": 0.24157020634121792, + "kl_loss": 0.3291366696357727, + "loss_ib": 0.010863748379051685, + "step": 840 + }, + { + "ce_ib": 7.471776962280273, + "ce_orig": 0.45619362592697144, + "epoch": 0.24157020634121792, + "kl_loss": 0.3013712763786316, + "loss_ib": 0.010485488921403885, + "step": 840 + }, + { + "ce_ib": 8.4400053024292, + "ce_orig": 0.8053632378578186, + "epoch": 0.24185778992019555, + "kl_loss": 0.30274561047554016, + "loss_ib": 0.011467461474239826, + "step": 841 + }, + { + "ce_ib": 13.409200668334961, + "ce_orig": 1.5305240154266357, + "epoch": 0.24185778992019555, + "kl_loss": 0.27949976921081543, + "loss_ib": 0.016204198822379112, + "step": 841 + }, + { + "ce_ib": 5.851305961608887, + "ce_orig": 0.7181586027145386, + "epoch": 0.24185778992019555, + "kl_loss": 0.28577935695648193, + "loss_ib": 0.00870910007506609, + "step": 841 + }, + { + "ce_ib": 8.071784973144531, + "ce_orig": 0.9581683278083801, + "epoch": 0.24185778992019555, + "kl_loss": 0.371356725692749, + "loss_ib": 0.011785351671278477, + "step": 841 + }, + { + "ce_ib": 11.643394470214844, + "ce_orig": 1.0913020372390747, + "epoch": 0.2421453734991732, + "kl_loss": 0.3137480914592743, + "loss_ib": 0.01478087529540062, + "step": 842 + }, + { + "ce_ib": 7.9136576652526855, + "ce_orig": 0.8460515141487122, + "epoch": 0.2421453734991732, + "kl_loss": 0.28541165590286255, + "loss_ib": 0.010767774656414986, + "step": 842 + }, + { + "ce_ib": 12.15166187286377, + "ce_orig": 0.9375542402267456, + "epoch": 0.2421453734991732, + "kl_loss": 0.466509073972702, + "loss_ib": 0.016816752031445503, + "step": 842 + }, + { + "ce_ib": 6.482849597930908, + "ce_orig": 0.652571439743042, + "epoch": 0.2421453734991732, + "kl_loss": 0.3070219159126282, + "loss_ib": 0.009553068317472935, + "step": 842 + }, + { + "ce_ib": 9.26317024230957, + "ce_orig": 0.6297864317893982, + "epoch": 0.24243295707815085, + "kl_loss": 0.8582457304000854, + "loss_ib": 0.017845628783106804, + "step": 843 + }, + { + "ce_ib": 9.843435287475586, + "ce_orig": 0.9486488103866577, + "epoch": 0.24243295707815085, + "kl_loss": 0.35706013441085815, + "loss_ib": 0.013414036482572556, + "step": 843 + }, + { + "ce_ib": 8.942062377929688, + "ce_orig": 0.6295011639595032, + "epoch": 0.24243295707815085, + "kl_loss": 0.3587941527366638, + "loss_ib": 0.012530003674328327, + "step": 843 + }, + { + "ce_ib": 8.235858917236328, + "ce_orig": 1.0911704301834106, + "epoch": 0.24243295707815085, + "kl_loss": 0.4925958812236786, + "loss_ib": 0.013161817565560341, + "step": 843 + }, + { + "ce_ib": 2.106238842010498, + "ce_orig": 0.10293695330619812, + "epoch": 0.24272054065712848, + "kl_loss": 0.6076182126998901, + "loss_ib": 0.008182420395314693, + "step": 844 + }, + { + "ce_ib": 10.04066276550293, + "ce_orig": 0.7675477862358093, + "epoch": 0.24272054065712848, + "kl_loss": 0.3272428512573242, + "loss_ib": 0.013313091360032558, + "step": 844 + }, + { + "ce_ib": 7.2537922859191895, + "ce_orig": 0.6966544985771179, + "epoch": 0.24272054065712848, + "kl_loss": 0.30012214183807373, + "loss_ib": 0.010255013592541218, + "step": 844 + }, + { + "ce_ib": 9.952733039855957, + "ce_orig": 0.8222768902778625, + "epoch": 0.24272054065712848, + "kl_loss": 0.29932597279548645, + "loss_ib": 0.01294599287211895, + "step": 844 + }, + { + "epoch": 0.24300812423610613, + "grad_norm": 0.09415728598833084, + "learning_rate": 9.93374238183286e-06, + "loss": 0.8609, + "step": 845 + }, + { + "ce_ib": 9.53044605255127, + "ce_orig": 0.9650492668151855, + "epoch": 0.24300812423610613, + "kl_loss": 0.23566867411136627, + "loss_ib": 0.011887133121490479, + "step": 845 + }, + { + "ce_ib": 9.0289945602417, + "ce_orig": 1.0505539178848267, + "epoch": 0.24300812423610613, + "kl_loss": 0.3517257273197174, + "loss_ib": 0.012546251527965069, + "step": 845 + }, + { + "ce_ib": 6.667138576507568, + "ce_orig": 0.6767503023147583, + "epoch": 0.24300812423610613, + "kl_loss": 0.25494682788848877, + "loss_ib": 0.009216606616973877, + "step": 845 + }, + { + "ce_ib": 10.858115196228027, + "ce_orig": 1.128201961517334, + "epoch": 0.24300812423610613, + "kl_loss": 0.3126045763492584, + "loss_ib": 0.013984160497784615, + "step": 845 + }, + { + "ce_ib": 8.37086296081543, + "ce_orig": 1.0704699754714966, + "epoch": 0.24329570781508375, + "kl_loss": 0.3986669182777405, + "loss_ib": 0.012357532978057861, + "step": 846 + }, + { + "ce_ib": 12.114412307739258, + "ce_orig": 0.8631466031074524, + "epoch": 0.24329570781508375, + "kl_loss": 0.5870100259780884, + "loss_ib": 0.017984513193368912, + "step": 846 + }, + { + "ce_ib": 10.035650253295898, + "ce_orig": 0.7930597066879272, + "epoch": 0.24329570781508375, + "kl_loss": 0.34875866770744324, + "loss_ib": 0.013523237779736519, + "step": 846 + }, + { + "ce_ib": 9.243821144104004, + "ce_orig": 0.9381915330886841, + "epoch": 0.24329570781508375, + "kl_loss": 0.6351751089096069, + "loss_ib": 0.0155955720692873, + "step": 846 + }, + { + "ce_ib": 7.255211353302002, + "ce_orig": 0.7044399380683899, + "epoch": 0.2435832913940614, + "kl_loss": 0.31700530648231506, + "loss_ib": 0.010425264947116375, + "step": 847 + }, + { + "ce_ib": 8.711723327636719, + "ce_orig": 0.8803223967552185, + "epoch": 0.2435832913940614, + "kl_loss": 0.3968978822231293, + "loss_ib": 0.012680701911449432, + "step": 847 + }, + { + "ce_ib": 6.419612407684326, + "ce_orig": 0.5552703142166138, + "epoch": 0.2435832913940614, + "kl_loss": 0.24681052565574646, + "loss_ib": 0.008887717500329018, + "step": 847 + }, + { + "ce_ib": 7.220922470092773, + "ce_orig": 0.8049042224884033, + "epoch": 0.2435832913940614, + "kl_loss": 0.25132423639297485, + "loss_ib": 0.00973416492342949, + "step": 847 + }, + { + "ce_ib": 10.610690116882324, + "ce_orig": 1.24649178981781, + "epoch": 0.24387087497303903, + "kl_loss": 0.3552427291870117, + "loss_ib": 0.014163116924464703, + "step": 848 + }, + { + "ce_ib": 8.324702262878418, + "ce_orig": 0.7784779667854309, + "epoch": 0.24387087497303903, + "kl_loss": 0.26029253005981445, + "loss_ib": 0.010927626863121986, + "step": 848 + }, + { + "ce_ib": 5.074281692504883, + "ce_orig": 0.5773417353630066, + "epoch": 0.24387087497303903, + "kl_loss": 0.34564918279647827, + "loss_ib": 0.008530773222446442, + "step": 848 + }, + { + "ce_ib": 6.707390308380127, + "ce_orig": 0.6460347175598145, + "epoch": 0.24387087497303903, + "kl_loss": 0.33726412057876587, + "loss_ib": 0.010080032050609589, + "step": 848 + }, + { + "ce_ib": 5.850057601928711, + "ce_orig": 0.5735775232315063, + "epoch": 0.24415845855201668, + "kl_loss": 0.30734121799468994, + "loss_ib": 0.008923470042645931, + "step": 849 + }, + { + "ce_ib": 9.06562614440918, + "ce_orig": 1.0471097230911255, + "epoch": 0.24415845855201668, + "kl_loss": 0.43707603216171265, + "loss_ib": 0.013436386361718178, + "step": 849 + }, + { + "ce_ib": 6.853366374969482, + "ce_orig": 0.9200989007949829, + "epoch": 0.24415845855201668, + "kl_loss": 0.3171440362930298, + "loss_ib": 0.010024807415902615, + "step": 849 + }, + { + "ce_ib": 5.92440128326416, + "ce_orig": 0.5834106802940369, + "epoch": 0.24415845855201668, + "kl_loss": 0.2906469702720642, + "loss_ib": 0.008830870501697063, + "step": 849 + }, + { + "epoch": 0.24444604213099433, + "grad_norm": 0.11549082398414612, + "learning_rate": 9.93247718396607e-06, + "loss": 0.8256, + "step": 850 + }, + { + "ce_ib": 6.185736179351807, + "ce_orig": 0.5565721392631531, + "epoch": 0.24444604213099433, + "kl_loss": 0.3115679621696472, + "loss_ib": 0.009301415644586086, + "step": 850 + }, + { + "ce_ib": 10.200575828552246, + "ce_orig": 1.0745152235031128, + "epoch": 0.24444604213099433, + "kl_loss": 0.3611696660518646, + "loss_ib": 0.013812271878123283, + "step": 850 + }, + { + "ce_ib": 11.002309799194336, + "ce_orig": 1.2843042612075806, + "epoch": 0.24444604213099433, + "kl_loss": 0.3904564380645752, + "loss_ib": 0.014906874857842922, + "step": 850 + }, + { + "ce_ib": 8.565811157226562, + "ce_orig": 0.9313501715660095, + "epoch": 0.24444604213099433, + "kl_loss": 0.3498835265636444, + "loss_ib": 0.012064645998179913, + "step": 850 + }, + { + "ce_ib": 6.367196559906006, + "ce_orig": 0.3282487392425537, + "epoch": 0.24473362570997195, + "kl_loss": 0.3030562996864319, + "loss_ib": 0.009397759102284908, + "step": 851 + }, + { + "ce_ib": 7.566930294036865, + "ce_orig": 0.4791885316371918, + "epoch": 0.24473362570997195, + "kl_loss": 0.3446214199066162, + "loss_ib": 0.011013145558536053, + "step": 851 + }, + { + "ce_ib": 6.3060078620910645, + "ce_orig": 0.7763472199440002, + "epoch": 0.24473362570997195, + "kl_loss": 0.2825550436973572, + "loss_ib": 0.009131558239459991, + "step": 851 + }, + { + "ce_ib": 6.97025728225708, + "ce_orig": 0.5963578820228577, + "epoch": 0.24473362570997195, + "kl_loss": 0.26160991191864014, + "loss_ib": 0.009586355648934841, + "step": 851 + }, + { + "ce_ib": 12.1688814163208, + "ce_orig": 1.5043821334838867, + "epoch": 0.2450212092889496, + "kl_loss": 0.4432004690170288, + "loss_ib": 0.01660088635981083, + "step": 852 + }, + { + "ce_ib": 10.238726615905762, + "ce_orig": 0.6479013562202454, + "epoch": 0.2450212092889496, + "kl_loss": 0.3322160243988037, + "loss_ib": 0.013560887426137924, + "step": 852 + }, + { + "ce_ib": 4.64656400680542, + "ce_orig": 0.37472283840179443, + "epoch": 0.2450212092889496, + "kl_loss": 0.388122022151947, + "loss_ib": 0.0085277846083045, + "step": 852 + }, + { + "ce_ib": 8.35583209991455, + "ce_orig": 0.9440561532974243, + "epoch": 0.2450212092889496, + "kl_loss": 0.34462809562683105, + "loss_ib": 0.011802112683653831, + "step": 852 + }, + { + "ce_ib": 7.765969753265381, + "ce_orig": 1.0505164861679077, + "epoch": 0.24530879286792723, + "kl_loss": 0.31646573543548584, + "loss_ib": 0.010930625721812248, + "step": 853 + }, + { + "ce_ib": 6.262195587158203, + "ce_orig": 0.6370275616645813, + "epoch": 0.24530879286792723, + "kl_loss": 0.3554043769836426, + "loss_ib": 0.009816239587962627, + "step": 853 + }, + { + "ce_ib": 3.9668338298797607, + "ce_orig": 0.3455740511417389, + "epoch": 0.24530879286792723, + "kl_loss": 0.33059853315353394, + "loss_ib": 0.007272819057106972, + "step": 853 + }, + { + "ce_ib": 5.173341751098633, + "ce_orig": 0.5830708742141724, + "epoch": 0.24530879286792723, + "kl_loss": 0.24811115860939026, + "loss_ib": 0.007654453162103891, + "step": 853 + }, + { + "ce_ib": 7.131439685821533, + "ce_orig": 0.9161396622657776, + "epoch": 0.24559637644690488, + "kl_loss": 0.6239281892776489, + "loss_ib": 0.01337072066962719, + "step": 854 + }, + { + "ce_ib": 9.913086891174316, + "ce_orig": 0.5309944748878479, + "epoch": 0.24559637644690488, + "kl_loss": 0.2742801010608673, + "loss_ib": 0.012655887752771378, + "step": 854 + }, + { + "ce_ib": 6.634543418884277, + "ce_orig": 0.8277848362922668, + "epoch": 0.24559637644690488, + "kl_loss": 0.27079910039901733, + "loss_ib": 0.009342534467577934, + "step": 854 + }, + { + "ce_ib": 6.51190185546875, + "ce_orig": 0.7204493880271912, + "epoch": 0.24559637644690488, + "kl_loss": 0.26875340938568115, + "loss_ib": 0.009199435822665691, + "step": 854 + }, + { + "epoch": 0.24588396002588253, + "grad_norm": 0.10104996711015701, + "learning_rate": 9.931200102616892e-06, + "loss": 0.8524, + "step": 855 + }, + { + "ce_ib": 9.483591079711914, + "ce_orig": 0.5329586267471313, + "epoch": 0.24588396002588253, + "kl_loss": 0.3656144142150879, + "loss_ib": 0.013139734975993633, + "step": 855 + }, + { + "ce_ib": 7.316298961639404, + "ce_orig": 0.8602546453475952, + "epoch": 0.24588396002588253, + "kl_loss": 0.2442716807126999, + "loss_ib": 0.009759015403687954, + "step": 855 + }, + { + "ce_ib": 6.996337890625, + "ce_orig": 0.7689603567123413, + "epoch": 0.24588396002588253, + "kl_loss": 0.28071606159210205, + "loss_ib": 0.009803498163819313, + "step": 855 + }, + { + "ce_ib": 9.025272369384766, + "ce_orig": 1.0526149272918701, + "epoch": 0.24588396002588253, + "kl_loss": 0.3657127618789673, + "loss_ib": 0.012682399712502956, + "step": 855 + }, + { + "ce_ib": 5.605438709259033, + "ce_orig": 0.4452979564666748, + "epoch": 0.24617154360486015, + "kl_loss": 0.36353376507759094, + "loss_ib": 0.009240776300430298, + "step": 856 + }, + { + "ce_ib": 8.554245948791504, + "ce_orig": 0.9303341507911682, + "epoch": 0.24617154360486015, + "kl_loss": 0.40140336751937866, + "loss_ib": 0.012568279169499874, + "step": 856 + }, + { + "ce_ib": 8.914340019226074, + "ce_orig": 0.9002864360809326, + "epoch": 0.24617154360486015, + "kl_loss": 0.3354775905609131, + "loss_ib": 0.012269115075469017, + "step": 856 + }, + { + "ce_ib": 8.865965843200684, + "ce_orig": 0.9647238254547119, + "epoch": 0.24617154360486015, + "kl_loss": 0.3433181643486023, + "loss_ib": 0.012299147434532642, + "step": 856 + }, + { + "ce_ib": 10.691956520080566, + "ce_orig": 1.4019811153411865, + "epoch": 0.2464591271838378, + "kl_loss": 0.33202850818634033, + "loss_ib": 0.014012240804731846, + "step": 857 + }, + { + "ce_ib": 6.769617557525635, + "ce_orig": 0.60886549949646, + "epoch": 0.2464591271838378, + "kl_loss": 0.34499895572662354, + "loss_ib": 0.010219607502222061, + "step": 857 + }, + { + "ce_ib": 5.660098075866699, + "ce_orig": 0.6113988757133484, + "epoch": 0.2464591271838378, + "kl_loss": 0.2966403067111969, + "loss_ib": 0.008626501075923443, + "step": 857 + }, + { + "ce_ib": 13.32357406616211, + "ce_orig": 1.3362207412719727, + "epoch": 0.2464591271838378, + "kl_loss": 0.29936474561691284, + "loss_ib": 0.016317222267389297, + "step": 857 + }, + { + "ce_ib": 2.6769988536834717, + "ce_orig": 0.18080325424671173, + "epoch": 0.24674671076281543, + "kl_loss": 0.6789752840995789, + "loss_ib": 0.009466751478612423, + "step": 858 + }, + { + "ce_ib": 8.577744483947754, + "ce_orig": 0.7904664278030396, + "epoch": 0.24674671076281543, + "kl_loss": 0.35377517342567444, + "loss_ib": 0.012115496210753918, + "step": 858 + }, + { + "ce_ib": 9.277144432067871, + "ce_orig": 0.858630359172821, + "epoch": 0.24674671076281543, + "kl_loss": 0.4751337170600891, + "loss_ib": 0.014028482139110565, + "step": 858 + }, + { + "ce_ib": 4.141605377197266, + "ce_orig": 0.31388720870018005, + "epoch": 0.24674671076281543, + "kl_loss": 0.6098456382751465, + "loss_ib": 0.010240061208605766, + "step": 858 + }, + { + "ce_ib": 9.002893447875977, + "ce_orig": 0.7537875175476074, + "epoch": 0.24703429434179308, + "kl_loss": 0.300067663192749, + "loss_ib": 0.012003568932414055, + "step": 859 + }, + { + "ce_ib": 4.289847373962402, + "ce_orig": 0.40423697233200073, + "epoch": 0.24703429434179308, + "kl_loss": 0.37460029125213623, + "loss_ib": 0.008035850711166859, + "step": 859 + }, + { + "ce_ib": 6.030440330505371, + "ce_orig": 0.7502020001411438, + "epoch": 0.24703429434179308, + "kl_loss": 0.301896333694458, + "loss_ib": 0.009049403481185436, + "step": 859 + }, + { + "ce_ib": 5.722672462463379, + "ce_orig": 0.8073954582214355, + "epoch": 0.24703429434179308, + "kl_loss": 0.2869844436645508, + "loss_ib": 0.00859251618385315, + "step": 859 + }, + { + "epoch": 0.24732187792077073, + "grad_norm": 0.09929006546735764, + "learning_rate": 9.929911140862109e-06, + "loss": 0.8739, + "step": 860 + }, + { + "ce_ib": 9.267024040222168, + "ce_orig": 1.2398027181625366, + "epoch": 0.24732187792077073, + "kl_loss": 0.28411030769348145, + "loss_ib": 0.01210812758654356, + "step": 860 + }, + { + "ce_ib": 8.08333683013916, + "ce_orig": 0.8170286417007446, + "epoch": 0.24732187792077073, + "kl_loss": 0.25793078541755676, + "loss_ib": 0.010662645101547241, + "step": 860 + }, + { + "ce_ib": 10.10383129119873, + "ce_orig": 1.1762322187423706, + "epoch": 0.24732187792077073, + "kl_loss": 0.3745589256286621, + "loss_ib": 0.013849420472979546, + "step": 860 + }, + { + "ce_ib": 10.957147598266602, + "ce_orig": 1.4055129289627075, + "epoch": 0.24732187792077073, + "kl_loss": 0.47874391078948975, + "loss_ib": 0.015744587406516075, + "step": 860 + }, + { + "ce_ib": 5.491908073425293, + "ce_orig": 0.8730387091636658, + "epoch": 0.24760946149974836, + "kl_loss": 0.2792191803455353, + "loss_ib": 0.008284100331366062, + "step": 861 + }, + { + "ce_ib": 9.5418701171875, + "ce_orig": 0.7111859321594238, + "epoch": 0.24760946149974836, + "kl_loss": 0.28549331426620483, + "loss_ib": 0.012396802194416523, + "step": 861 + }, + { + "ce_ib": 9.359732627868652, + "ce_orig": 0.5781843066215515, + "epoch": 0.24760946149974836, + "kl_loss": 0.3297385573387146, + "loss_ib": 0.01265711709856987, + "step": 861 + }, + { + "ce_ib": 4.646590232849121, + "ce_orig": 0.6796752214431763, + "epoch": 0.24760946149974836, + "kl_loss": 0.5749114751815796, + "loss_ib": 0.010395705699920654, + "step": 861 + }, + { + "ce_ib": 8.25394344329834, + "ce_orig": 0.9002748131752014, + "epoch": 0.247897045078726, + "kl_loss": 0.3159523904323578, + "loss_ib": 0.01141346711665392, + "step": 862 + }, + { + "ce_ib": 8.197535514831543, + "ce_orig": 0.6030675172805786, + "epoch": 0.247897045078726, + "kl_loss": 0.46316292881965637, + "loss_ib": 0.01282916497439146, + "step": 862 + }, + { + "ce_ib": 8.135879516601562, + "ce_orig": 0.5498018264770508, + "epoch": 0.247897045078726, + "kl_loss": 0.37288355827331543, + "loss_ib": 0.011864714324474335, + "step": 862 + }, + { + "ce_ib": 8.026688575744629, + "ce_orig": 0.6925224661827087, + "epoch": 0.247897045078726, + "kl_loss": 0.3975781798362732, + "loss_ib": 0.012002469971776009, + "step": 862 + }, + { + "ce_ib": 6.015689849853516, + "ce_orig": 0.795344889163971, + "epoch": 0.24818462865770363, + "kl_loss": 0.2662222981452942, + "loss_ib": 0.008677912876009941, + "step": 863 + }, + { + "ce_ib": 6.312599182128906, + "ce_orig": 0.4928840100765228, + "epoch": 0.24818462865770363, + "kl_loss": 0.28389108180999756, + "loss_ib": 0.009151509962975979, + "step": 863 + }, + { + "ce_ib": 12.868780136108398, + "ce_orig": 1.7627007961273193, + "epoch": 0.24818462865770363, + "kl_loss": 0.2926875352859497, + "loss_ib": 0.01579565554857254, + "step": 863 + }, + { + "ce_ib": 13.420784950256348, + "ce_orig": 1.5475761890411377, + "epoch": 0.24818462865770363, + "kl_loss": 0.9178951382637024, + "loss_ib": 0.022599736228585243, + "step": 863 + }, + { + "ce_ib": 9.601221084594727, + "ce_orig": 0.6273306012153625, + "epoch": 0.24847221223668128, + "kl_loss": 0.44295597076416016, + "loss_ib": 0.014030780643224716, + "step": 864 + }, + { + "ce_ib": 10.278837203979492, + "ce_orig": 1.2835111618041992, + "epoch": 0.24847221223668128, + "kl_loss": 0.33239656686782837, + "loss_ib": 0.013602802529931068, + "step": 864 + }, + { + "ce_ib": 10.454754829406738, + "ce_orig": 0.7932427525520325, + "epoch": 0.24847221223668128, + "kl_loss": 0.39738035202026367, + "loss_ib": 0.014428557828068733, + "step": 864 + }, + { + "ce_ib": 8.318263053894043, + "ce_orig": 1.0965704917907715, + "epoch": 0.24847221223668128, + "kl_loss": 0.27918365597724915, + "loss_ib": 0.011110099032521248, + "step": 864 + }, + { + "epoch": 0.24875979581565894, + "grad_norm": 0.09105879068374634, + "learning_rate": 9.928610301807134e-06, + "loss": 0.9249, + "step": 865 + }, + { + "ce_ib": 11.053398132324219, + "ce_orig": 0.98641037940979, + "epoch": 0.24875979581565894, + "kl_loss": 0.34464025497436523, + "loss_ib": 0.014499801211059093, + "step": 865 + }, + { + "ce_ib": 10.561725616455078, + "ce_orig": 0.9344740509986877, + "epoch": 0.24875979581565894, + "kl_loss": 0.40244221687316895, + "loss_ib": 0.014586147852241993, + "step": 865 + }, + { + "ce_ib": 11.87633991241455, + "ce_orig": 1.324188470840454, + "epoch": 0.24875979581565894, + "kl_loss": 0.3682016134262085, + "loss_ib": 0.015558355487883091, + "step": 865 + }, + { + "ce_ib": 6.436470985412598, + "ce_orig": 0.5687084794044495, + "epoch": 0.24875979581565894, + "kl_loss": 0.3997005224227905, + "loss_ib": 0.010433475486934185, + "step": 865 + }, + { + "ce_ib": 7.637430191040039, + "ce_orig": 0.796751856803894, + "epoch": 0.24904737939463656, + "kl_loss": 0.40374982357025146, + "loss_ib": 0.011674928478896618, + "step": 866 + }, + { + "ce_ib": 6.677864074707031, + "ce_orig": 0.6250858902931213, + "epoch": 0.24904737939463656, + "kl_loss": 0.38111498951911926, + "loss_ib": 0.010489013977348804, + "step": 866 + }, + { + "ce_ib": 12.016997337341309, + "ce_orig": 1.5606534481048584, + "epoch": 0.24904737939463656, + "kl_loss": 0.3536309599876404, + "loss_ib": 0.0155533067882061, + "step": 866 + }, + { + "ce_ib": 8.68997573852539, + "ce_orig": 1.1279748678207397, + "epoch": 0.24904737939463656, + "kl_loss": 0.4000932574272156, + "loss_ib": 0.012690908275544643, + "step": 866 + }, + { + "ce_ib": 6.430920124053955, + "ce_orig": 0.6035857200622559, + "epoch": 0.2493349629736142, + "kl_loss": 0.3381291925907135, + "loss_ib": 0.009812211617827415, + "step": 867 + }, + { + "ce_ib": 6.76858377456665, + "ce_orig": 0.8521968126296997, + "epoch": 0.2493349629736142, + "kl_loss": 0.2716369330883026, + "loss_ib": 0.009484952315688133, + "step": 867 + }, + { + "ce_ib": 15.660572052001953, + "ce_orig": 2.287351608276367, + "epoch": 0.2493349629736142, + "kl_loss": 0.33299481868743896, + "loss_ib": 0.018990520387887955, + "step": 867 + }, + { + "ce_ib": 7.993679523468018, + "ce_orig": 0.547291100025177, + "epoch": 0.2493349629736142, + "kl_loss": 0.7522663474082947, + "loss_ib": 0.015516342595219612, + "step": 867 + }, + { + "ce_ib": 9.38365364074707, + "ce_orig": 0.9876450300216675, + "epoch": 0.24962254655259183, + "kl_loss": 0.31372547149658203, + "loss_ib": 0.012520909309387207, + "step": 868 + }, + { + "ce_ib": 13.470992088317871, + "ce_orig": 1.528100609779358, + "epoch": 0.24962254655259183, + "kl_loss": 0.36726510524749756, + "loss_ib": 0.017143642529845238, + "step": 868 + }, + { + "ce_ib": 6.063528060913086, + "ce_orig": 0.8686865568161011, + "epoch": 0.24962254655259183, + "kl_loss": 0.291568398475647, + "loss_ib": 0.008979211561381817, + "step": 868 + }, + { + "ce_ib": 9.594892501831055, + "ce_orig": 0.7062548398971558, + "epoch": 0.24962254655259183, + "kl_loss": 0.7296528816223145, + "loss_ib": 0.016891421750187874, + "step": 868 + }, + { + "ce_ib": 9.050810813903809, + "ce_orig": 0.8255693912506104, + "epoch": 0.24991013013156949, + "kl_loss": 0.4382583498954773, + "loss_ib": 0.013433394022285938, + "step": 869 + }, + { + "ce_ib": 6.191425800323486, + "ce_orig": 0.7848706841468811, + "epoch": 0.24991013013156949, + "kl_loss": 0.22416508197784424, + "loss_ib": 0.008433076553046703, + "step": 869 + }, + { + "ce_ib": 6.417364120483398, + "ce_orig": 0.7727878093719482, + "epoch": 0.24991013013156949, + "kl_loss": 0.2643115520477295, + "loss_ib": 0.009060479700565338, + "step": 869 + }, + { + "ce_ib": 7.678783893585205, + "ce_orig": 0.7737617492675781, + "epoch": 0.24991013013156949, + "kl_loss": 0.42957326769828796, + "loss_ib": 0.0119745172560215, + "step": 869 + }, + { + "epoch": 0.2501977137105471, + "grad_norm": 0.10078983008861542, + "learning_rate": 9.927297588585984e-06, + "loss": 0.8886, + "step": 870 + }, + { + "ce_ib": 10.537775039672852, + "ce_orig": 1.0423405170440674, + "epoch": 0.2501977137105471, + "kl_loss": 0.3949153423309326, + "loss_ib": 0.014486928470432758, + "step": 870 + }, + { + "ce_ib": 5.604185581207275, + "ce_orig": 0.5952542424201965, + "epoch": 0.2501977137105471, + "kl_loss": 0.32443949580192566, + "loss_ib": 0.008848579600453377, + "step": 870 + }, + { + "ce_ib": 11.089546203613281, + "ce_orig": 0.9591896533966064, + "epoch": 0.2501977137105471, + "kl_loss": 0.250461608171463, + "loss_ib": 0.013594161719083786, + "step": 870 + }, + { + "ce_ib": 6.729354381561279, + "ce_orig": 1.008842945098877, + "epoch": 0.2501977137105471, + "kl_loss": 0.25118446350097656, + "loss_ib": 0.009241199120879173, + "step": 870 + }, + { + "ce_ib": 9.652894973754883, + "ce_orig": 1.2227216958999634, + "epoch": 0.2504852972895248, + "kl_loss": 0.4000895023345947, + "loss_ib": 0.013653790578246117, + "step": 871 + }, + { + "ce_ib": 9.622594833374023, + "ce_orig": 0.9795891642570496, + "epoch": 0.2504852972895248, + "kl_loss": 0.25612586736679077, + "loss_ib": 0.012183853425085545, + "step": 871 + }, + { + "ce_ib": 7.215780735015869, + "ce_orig": 0.4193066656589508, + "epoch": 0.2504852972895248, + "kl_loss": 0.4899890422821045, + "loss_ib": 0.012115671299397945, + "step": 871 + }, + { + "ce_ib": 6.65898323059082, + "ce_orig": 0.8104040622711182, + "epoch": 0.2504852972895248, + "kl_loss": 0.26327258348464966, + "loss_ib": 0.00929170846939087, + "step": 871 + }, + { + "ce_ib": 4.725259780883789, + "ce_orig": 0.3476851284503937, + "epoch": 0.2507728808685024, + "kl_loss": 0.6398271322250366, + "loss_ib": 0.011123530566692352, + "step": 872 + }, + { + "ce_ib": 5.815470218658447, + "ce_orig": 0.7363677620887756, + "epoch": 0.2507728808685024, + "kl_loss": 0.5072571039199829, + "loss_ib": 0.010888040997087955, + "step": 872 + }, + { + "ce_ib": 7.01890230178833, + "ce_orig": 0.5536361336708069, + "epoch": 0.2507728808685024, + "kl_loss": 0.3183189928531647, + "loss_ib": 0.010202092118561268, + "step": 872 + }, + { + "ce_ib": 4.89237642288208, + "ce_orig": 0.5722980499267578, + "epoch": 0.2507728808685024, + "kl_loss": 0.30087870359420776, + "loss_ib": 0.007901162840425968, + "step": 872 + }, + { + "ce_ib": 7.8064866065979, + "ce_orig": 1.0494457483291626, + "epoch": 0.25106046444748004, + "kl_loss": 0.27215179800987244, + "loss_ib": 0.010528003796935081, + "step": 873 + }, + { + "ce_ib": 6.676516056060791, + "ce_orig": 0.6909357905387878, + "epoch": 0.25106046444748004, + "kl_loss": 0.36614060401916504, + "loss_ib": 0.010337922722101212, + "step": 873 + }, + { + "ce_ib": 8.948216438293457, + "ce_orig": 0.7401660084724426, + "epoch": 0.25106046444748004, + "kl_loss": 0.3224928677082062, + "loss_ib": 0.012173144146800041, + "step": 873 + }, + { + "ce_ib": 5.891620635986328, + "ce_orig": 0.8088377714157104, + "epoch": 0.25106046444748004, + "kl_loss": 0.3477107882499695, + "loss_ib": 0.009368727914988995, + "step": 873 + }, + { + "ce_ib": 8.354748725891113, + "ce_orig": 0.9080251455307007, + "epoch": 0.2513480480264577, + "kl_loss": 0.4312264621257782, + "loss_ib": 0.012667013332247734, + "step": 874 + }, + { + "ce_ib": 9.125079154968262, + "ce_orig": 1.2377448081970215, + "epoch": 0.2513480480264577, + "kl_loss": 0.28963541984558105, + "loss_ib": 0.012021434493362904, + "step": 874 + }, + { + "ce_ib": 6.146969795227051, + "ce_orig": 0.7261344194412231, + "epoch": 0.2513480480264577, + "kl_loss": 0.2670226991176605, + "loss_ib": 0.008817196823656559, + "step": 874 + }, + { + "ce_ib": 8.836943626403809, + "ce_orig": 0.9964169859886169, + "epoch": 0.2513480480264577, + "kl_loss": 0.29464757442474365, + "loss_ib": 0.011783418245613575, + "step": 874 + }, + { + "epoch": 0.25163563160543534, + "grad_norm": 0.09101825952529907, + "learning_rate": 9.925973004361295e-06, + "loss": 0.9106, + "step": 875 + }, + { + "ce_ib": 10.854291915893555, + "ce_orig": 1.7030887603759766, + "epoch": 0.25163563160543534, + "kl_loss": 0.2948107421398163, + "loss_ib": 0.013802398927509785, + "step": 875 + }, + { + "ce_ib": 5.0278215408325195, + "ce_orig": 0.6354923844337463, + "epoch": 0.25163563160543534, + "kl_loss": 0.27879562973976135, + "loss_ib": 0.007815778255462646, + "step": 875 + }, + { + "ce_ib": 3.9477179050445557, + "ce_orig": 0.27782636880874634, + "epoch": 0.25163563160543534, + "kl_loss": 0.5922541618347168, + "loss_ib": 0.009870259091258049, + "step": 875 + }, + { + "ce_ib": 9.96784782409668, + "ce_orig": 0.8732298016548157, + "epoch": 0.25163563160543534, + "kl_loss": 0.3482765555381775, + "loss_ib": 0.01345061231404543, + "step": 875 + }, + { + "ce_ib": 10.751762390136719, + "ce_orig": 0.7869917750358582, + "epoch": 0.25192321518441296, + "kl_loss": 0.35357779264450073, + "loss_ib": 0.014287540689110756, + "step": 876 + }, + { + "ce_ib": 11.038793563842773, + "ce_orig": 1.3073241710662842, + "epoch": 0.25192321518441296, + "kl_loss": 0.3102778494358063, + "loss_ib": 0.014141570776700974, + "step": 876 + }, + { + "ce_ib": 10.809063911437988, + "ce_orig": 1.3352001905441284, + "epoch": 0.25192321518441296, + "kl_loss": 0.266183078289032, + "loss_ib": 0.013470894657075405, + "step": 876 + }, + { + "ce_ib": 9.622008323669434, + "ce_orig": 1.2020785808563232, + "epoch": 0.25192321518441296, + "kl_loss": 0.29643064737319946, + "loss_ib": 0.012586314231157303, + "step": 876 + }, + { + "ce_ib": 4.619173526763916, + "ce_orig": 0.5329123735427856, + "epoch": 0.2522107987633906, + "kl_loss": 0.25063222646713257, + "loss_ib": 0.007125495467334986, + "step": 877 + }, + { + "ce_ib": 14.786099433898926, + "ce_orig": 1.6358728408813477, + "epoch": 0.2522107987633906, + "kl_loss": 0.3289327919483185, + "loss_ib": 0.018075427040457726, + "step": 877 + }, + { + "ce_ib": 6.436891555786133, + "ce_orig": 0.6348329782485962, + "epoch": 0.2522107987633906, + "kl_loss": 0.24020250141620636, + "loss_ib": 0.008838916197419167, + "step": 877 + }, + { + "ce_ib": 8.219223976135254, + "ce_orig": 0.4184137284755707, + "epoch": 0.2522107987633906, + "kl_loss": 0.2933961749076843, + "loss_ib": 0.011153184808790684, + "step": 877 + }, + { + "ce_ib": 11.890969276428223, + "ce_orig": 1.2066895961761475, + "epoch": 0.25249838234236827, + "kl_loss": 0.3087109327316284, + "loss_ib": 0.014978078193962574, + "step": 878 + }, + { + "ce_ib": 8.250067710876465, + "ce_orig": 0.8217212557792664, + "epoch": 0.25249838234236827, + "kl_loss": 0.30817484855651855, + "loss_ib": 0.011331815272569656, + "step": 878 + }, + { + "ce_ib": 5.804820537567139, + "ce_orig": 0.7277146577835083, + "epoch": 0.25249838234236827, + "kl_loss": 0.2948615252971649, + "loss_ib": 0.00875343568623066, + "step": 878 + }, + { + "ce_ib": 7.200780868530273, + "ce_orig": 0.7634241580963135, + "epoch": 0.25249838234236827, + "kl_loss": 0.434295117855072, + "loss_ib": 0.011543731205165386, + "step": 878 + }, + { + "ce_ib": 8.42392635345459, + "ce_orig": 0.452232301235199, + "epoch": 0.2527859659213459, + "kl_loss": 0.40239661931991577, + "loss_ib": 0.012447891756892204, + "step": 879 + }, + { + "ce_ib": 9.636970520019531, + "ce_orig": 0.909797728061676, + "epoch": 0.2527859659213459, + "kl_loss": 0.2469838708639145, + "loss_ib": 0.012106809765100479, + "step": 879 + }, + { + "ce_ib": 10.866209983825684, + "ce_orig": 0.9333135485649109, + "epoch": 0.2527859659213459, + "kl_loss": 0.2821698486804962, + "loss_ib": 0.01368790864944458, + "step": 879 + }, + { + "ce_ib": 4.037529468536377, + "ce_orig": 0.6108816266059875, + "epoch": 0.2527859659213459, + "kl_loss": 0.2413032352924347, + "loss_ib": 0.006450561806559563, + "step": 879 + }, + { + "epoch": 0.2530735495003235, + "grad_norm": 0.09534526616334915, + "learning_rate": 9.924636552324296e-06, + "loss": 0.8423, + "step": 880 + }, + { + "ce_ib": 7.282698631286621, + "ce_orig": 1.1506547927856445, + "epoch": 0.2530735495003235, + "kl_loss": 0.34004634618759155, + "loss_ib": 0.01068316213786602, + "step": 880 + }, + { + "ce_ib": 11.481069564819336, + "ce_orig": 1.6637837886810303, + "epoch": 0.2530735495003235, + "kl_loss": 0.6563593149185181, + "loss_ib": 0.018044661730527878, + "step": 880 + }, + { + "ce_ib": 10.626919746398926, + "ce_orig": 1.1782991886138916, + "epoch": 0.2530735495003235, + "kl_loss": 0.328736275434494, + "loss_ib": 0.013914283365011215, + "step": 880 + }, + { + "ce_ib": 11.945712089538574, + "ce_orig": 1.6569890975952148, + "epoch": 0.2530735495003235, + "kl_loss": 0.39438027143478394, + "loss_ib": 0.015889516100287437, + "step": 880 + }, + { + "ce_ib": 6.359279632568359, + "ce_orig": 0.8192113637924194, + "epoch": 0.2533611330793012, + "kl_loss": 0.3645118176937103, + "loss_ib": 0.010004397481679916, + "step": 881 + }, + { + "ce_ib": 10.964028358459473, + "ce_orig": 0.7998406887054443, + "epoch": 0.2533611330793012, + "kl_loss": 0.49998754262924194, + "loss_ib": 0.015963904559612274, + "step": 881 + }, + { + "ce_ib": 11.182770729064941, + "ce_orig": 1.3286393880844116, + "epoch": 0.2533611330793012, + "kl_loss": 0.4621961712837219, + "loss_ib": 0.015804732218384743, + "step": 881 + }, + { + "ce_ib": 8.53327751159668, + "ce_orig": 0.8316857218742371, + "epoch": 0.2533611330793012, + "kl_loss": 0.3155197203159332, + "loss_ib": 0.0116884745657444, + "step": 881 + }, + { + "ce_ib": 6.411721229553223, + "ce_orig": 0.7425188422203064, + "epoch": 0.2536487166582788, + "kl_loss": 0.2903468906879425, + "loss_ib": 0.00931518990546465, + "step": 882 + }, + { + "ce_ib": 6.149788856506348, + "ce_orig": 0.8823583722114563, + "epoch": 0.2536487166582788, + "kl_loss": 0.2212170958518982, + "loss_ib": 0.00836195982992649, + "step": 882 + }, + { + "ce_ib": 9.317445755004883, + "ce_orig": 0.932159423828125, + "epoch": 0.2536487166582788, + "kl_loss": 0.2839629650115967, + "loss_ib": 0.012157075107097626, + "step": 882 + }, + { + "ce_ib": 7.935296535491943, + "ce_orig": 0.9251559376716614, + "epoch": 0.2536487166582788, + "kl_loss": 0.3732267916202545, + "loss_ib": 0.011667564511299133, + "step": 882 + }, + { + "ce_ib": 5.324270725250244, + "ce_orig": 0.5246868133544922, + "epoch": 0.25393630023725644, + "kl_loss": 0.27826499938964844, + "loss_ib": 0.00810692086815834, + "step": 883 + }, + { + "ce_ib": 7.959061145782471, + "ce_orig": 1.0556241273880005, + "epoch": 0.25393630023725644, + "kl_loss": 0.2916038930416107, + "loss_ib": 0.010875099338591099, + "step": 883 + }, + { + "ce_ib": 6.867218971252441, + "ce_orig": 0.6588130593299866, + "epoch": 0.25393630023725644, + "kl_loss": 0.29453879594802856, + "loss_ib": 0.009812606498599052, + "step": 883 + }, + { + "ce_ib": 4.161896228790283, + "ce_orig": 0.4560477137565613, + "epoch": 0.25393630023725644, + "kl_loss": 0.2710115611553192, + "loss_ib": 0.006872011814266443, + "step": 883 + }, + { + "ce_ib": 6.286719799041748, + "ce_orig": 0.7286011576652527, + "epoch": 0.2542238838162341, + "kl_loss": 0.3385438621044159, + "loss_ib": 0.009672158397734165, + "step": 884 + }, + { + "ce_ib": 10.294978141784668, + "ce_orig": 0.775021493434906, + "epoch": 0.2542238838162341, + "kl_loss": 0.3102239966392517, + "loss_ib": 0.01339721865952015, + "step": 884 + }, + { + "ce_ib": 5.616055488586426, + "ce_orig": 0.5666757822036743, + "epoch": 0.2542238838162341, + "kl_loss": 0.3340657651424408, + "loss_ib": 0.00895671360194683, + "step": 884 + }, + { + "ce_ib": 6.7213826179504395, + "ce_orig": 0.6700468063354492, + "epoch": 0.2542238838162341, + "kl_loss": 0.259299099445343, + "loss_ib": 0.009314373135566711, + "step": 884 + }, + { + "epoch": 0.25451146739521174, + "grad_norm": 0.09304392337799072, + "learning_rate": 9.92328823569481e-06, + "loss": 0.9081, + "step": 885 + }, + { + "ce_ib": 10.979228973388672, + "ce_orig": 1.314060091972351, + "epoch": 0.25451146739521174, + "kl_loss": 0.4535999298095703, + "loss_ib": 0.015515227802097797, + "step": 885 + }, + { + "ce_ib": 9.044007301330566, + "ce_orig": 1.0800416469573975, + "epoch": 0.25451146739521174, + "kl_loss": 0.25342878699302673, + "loss_ib": 0.01157829537987709, + "step": 885 + }, + { + "ce_ib": 7.7309441566467285, + "ce_orig": 0.737576425075531, + "epoch": 0.25451146739521174, + "kl_loss": 0.25567033886909485, + "loss_ib": 0.010287647135555744, + "step": 885 + }, + { + "ce_ib": 5.4991774559021, + "ce_orig": 0.67442387342453, + "epoch": 0.25451146739521174, + "kl_loss": 0.22683289647102356, + "loss_ib": 0.007767506875097752, + "step": 885 + }, + { + "ce_ib": 9.953997611999512, + "ce_orig": 1.5094314813613892, + "epoch": 0.25479905097418937, + "kl_loss": 0.23526480793952942, + "loss_ib": 0.012306645512580872, + "step": 886 + }, + { + "ce_ib": 5.9002685546875, + "ce_orig": 0.8582682013511658, + "epoch": 0.25479905097418937, + "kl_loss": 0.2747255265712738, + "loss_ib": 0.008647523820400238, + "step": 886 + }, + { + "ce_ib": 3.582958698272705, + "ce_orig": 0.4530331492424011, + "epoch": 0.25479905097418937, + "kl_loss": 0.22470691800117493, + "loss_ib": 0.005830028094351292, + "step": 886 + }, + { + "ce_ib": 8.590431213378906, + "ce_orig": 0.7905410528182983, + "epoch": 0.25479905097418937, + "kl_loss": 0.32975587248802185, + "loss_ib": 0.011887989938259125, + "step": 886 + }, + { + "ce_ib": 12.112462997436523, + "ce_orig": 1.4443359375, + "epoch": 0.255086634553167, + "kl_loss": 0.39053958654403687, + "loss_ib": 0.016017857939004898, + "step": 887 + }, + { + "ce_ib": 7.377198219299316, + "ce_orig": 0.8195330500602722, + "epoch": 0.255086634553167, + "kl_loss": 0.3151588439941406, + "loss_ib": 0.010528786107897758, + "step": 887 + }, + { + "ce_ib": 5.7976861000061035, + "ce_orig": 0.6533734798431396, + "epoch": 0.255086634553167, + "kl_loss": 0.5457720756530762, + "loss_ib": 0.011255406774580479, + "step": 887 + }, + { + "ce_ib": 3.642547369003296, + "ce_orig": 0.3404500484466553, + "epoch": 0.255086634553167, + "kl_loss": 0.31931373476982117, + "loss_ib": 0.006835684645920992, + "step": 887 + }, + { + "ce_ib": 9.251564025878906, + "ce_orig": 1.1658694744110107, + "epoch": 0.25537421813214467, + "kl_loss": 0.276309609413147, + "loss_ib": 0.012014660984277725, + "step": 888 + }, + { + "ce_ib": 9.521452903747559, + "ce_orig": 1.0355658531188965, + "epoch": 0.25537421813214467, + "kl_loss": 0.3186280131340027, + "loss_ib": 0.012707732617855072, + "step": 888 + }, + { + "ce_ib": 8.611248970031738, + "ce_orig": 1.067291498184204, + "epoch": 0.25537421813214467, + "kl_loss": 0.3613908290863037, + "loss_ib": 0.012225157581269741, + "step": 888 + }, + { + "ce_ib": 6.128733158111572, + "ce_orig": 0.7295544147491455, + "epoch": 0.25537421813214467, + "kl_loss": 0.2833183705806732, + "loss_ib": 0.008961916901171207, + "step": 888 + }, + { + "ce_ib": 7.8607683181762695, + "ce_orig": 0.9411081075668335, + "epoch": 0.2556618017111223, + "kl_loss": 0.3833426833152771, + "loss_ib": 0.011694194748997688, + "step": 889 + }, + { + "ce_ib": 7.081918716430664, + "ce_orig": 0.7683687806129456, + "epoch": 0.2556618017111223, + "kl_loss": 0.3560516834259033, + "loss_ib": 0.010642435401678085, + "step": 889 + }, + { + "ce_ib": 6.406893253326416, + "ce_orig": 0.6722978353500366, + "epoch": 0.2556618017111223, + "kl_loss": 0.2877756357192993, + "loss_ib": 0.009284649044275284, + "step": 889 + }, + { + "ce_ib": 7.919890403747559, + "ce_orig": 1.0300544500350952, + "epoch": 0.2556618017111223, + "kl_loss": 0.3256221413612366, + "loss_ib": 0.011176111176609993, + "step": 889 + }, + { + "epoch": 0.2559493852900999, + "grad_norm": 0.08482884615659714, + "learning_rate": 9.921928057721242e-06, + "loss": 0.8751, + "step": 890 + }, + { + "ce_ib": 8.637929916381836, + "ce_orig": 0.9734044075012207, + "epoch": 0.2559493852900999, + "kl_loss": 0.32373255491256714, + "loss_ib": 0.011875255033373833, + "step": 890 + }, + { + "ce_ib": 8.563017845153809, + "ce_orig": 0.4407178461551666, + "epoch": 0.2559493852900999, + "kl_loss": 0.37603944540023804, + "loss_ib": 0.012323413044214249, + "step": 890 + }, + { + "ce_ib": 4.576637268066406, + "ce_orig": 0.5486153960227966, + "epoch": 0.2559493852900999, + "kl_loss": 0.2956191301345825, + "loss_ib": 0.007532828953117132, + "step": 890 + }, + { + "ce_ib": 6.462611675262451, + "ce_orig": 0.5243940353393555, + "epoch": 0.2559493852900999, + "kl_loss": 0.2781410813331604, + "loss_ib": 0.009244021959602833, + "step": 890 + }, + { + "ce_ib": 8.214609146118164, + "ce_orig": 0.5406211614608765, + "epoch": 0.2562369688690776, + "kl_loss": 0.40383273363113403, + "loss_ib": 0.012252936139702797, + "step": 891 + }, + { + "ce_ib": 9.679584503173828, + "ce_orig": 0.9921509027481079, + "epoch": 0.2562369688690776, + "kl_loss": 0.3201594352722168, + "loss_ib": 0.01288117840886116, + "step": 891 + }, + { + "ce_ib": 8.80469036102295, + "ce_orig": 1.0968488454818726, + "epoch": 0.2562369688690776, + "kl_loss": 0.683159589767456, + "loss_ib": 0.01563628576695919, + "step": 891 + }, + { + "ce_ib": 6.155488014221191, + "ce_orig": 0.5672451853752136, + "epoch": 0.2562369688690776, + "kl_loss": 0.39383214712142944, + "loss_ib": 0.010093809105455875, + "step": 891 + }, + { + "ce_ib": 6.969648838043213, + "ce_orig": 0.7994107604026794, + "epoch": 0.2565245524480552, + "kl_loss": 0.3125140070915222, + "loss_ib": 0.010094788856804371, + "step": 892 + }, + { + "ce_ib": 7.141666412353516, + "ce_orig": 0.4777945876121521, + "epoch": 0.2565245524480552, + "kl_loss": 0.3340398967266083, + "loss_ib": 0.010482065379619598, + "step": 892 + }, + { + "ce_ib": 8.195847511291504, + "ce_orig": 1.0143331289291382, + "epoch": 0.2565245524480552, + "kl_loss": 0.4175947308540344, + "loss_ib": 0.012371795251965523, + "step": 892 + }, + { + "ce_ib": 10.73903751373291, + "ce_orig": 0.8994997143745422, + "epoch": 0.2565245524480552, + "kl_loss": 0.2581188380718231, + "loss_ib": 0.013320226222276688, + "step": 892 + }, + { + "ce_ib": 3.9347095489501953, + "ce_orig": 0.3383885324001312, + "epoch": 0.25681213602703284, + "kl_loss": 0.3705168068408966, + "loss_ib": 0.007639877498149872, + "step": 893 + }, + { + "ce_ib": 9.592818260192871, + "ce_orig": 0.9891217947006226, + "epoch": 0.25681213602703284, + "kl_loss": 0.3685084581375122, + "loss_ib": 0.013277902267873287, + "step": 893 + }, + { + "ce_ib": 8.22640609741211, + "ce_orig": 0.8431357145309448, + "epoch": 0.25681213602703284, + "kl_loss": 0.28724151849746704, + "loss_ib": 0.011098820716142654, + "step": 893 + }, + { + "ce_ib": 6.6608357429504395, + "ce_orig": 0.7445570230484009, + "epoch": 0.25681213602703284, + "kl_loss": 0.23527176678180695, + "loss_ib": 0.009013553149998188, + "step": 893 + }, + { + "ce_ib": 4.6245598793029785, + "ce_orig": 0.5491394400596619, + "epoch": 0.2570997196060105, + "kl_loss": 0.2531249523162842, + "loss_ib": 0.007155809085816145, + "step": 894 + }, + { + "ce_ib": 7.3063063621521, + "ce_orig": 0.6298426985740662, + "epoch": 0.2570997196060105, + "kl_loss": 0.25430071353912354, + "loss_ib": 0.009849313646554947, + "step": 894 + }, + { + "ce_ib": 9.165714263916016, + "ce_orig": 1.1000019311904907, + "epoch": 0.2570997196060105, + "kl_loss": 0.5990852117538452, + "loss_ib": 0.015156567096710205, + "step": 894 + }, + { + "ce_ib": 8.9033842086792, + "ce_orig": 0.9108843207359314, + "epoch": 0.2570997196060105, + "kl_loss": 0.30792462825775146, + "loss_ib": 0.011982630006968975, + "step": 894 + }, + { + "epoch": 0.25738730318498815, + "grad_norm": 0.09863123297691345, + "learning_rate": 9.92055602168058e-06, + "loss": 0.8698, + "step": 895 + }, + { + "ce_ib": 7.7511515617370605, + "ce_orig": 0.7273695468902588, + "epoch": 0.25738730318498815, + "kl_loss": 0.2521267533302307, + "loss_ib": 0.010272419080138206, + "step": 895 + }, + { + "ce_ib": 8.19736385345459, + "ce_orig": 0.666002094745636, + "epoch": 0.25738730318498815, + "kl_loss": 0.45970863103866577, + "loss_ib": 0.012794449925422668, + "step": 895 + }, + { + "ce_ib": 8.727402687072754, + "ce_orig": 0.5750917196273804, + "epoch": 0.25738730318498815, + "kl_loss": 0.33787843585014343, + "loss_ib": 0.012106186710298061, + "step": 895 + }, + { + "ce_ib": 7.301302433013916, + "ce_orig": 0.49371325969696045, + "epoch": 0.25738730318498815, + "kl_loss": 0.39213109016418457, + "loss_ib": 0.011222613044083118, + "step": 895 + }, + { + "ce_ib": 8.087718963623047, + "ce_orig": 1.0477973222732544, + "epoch": 0.25767488676396577, + "kl_loss": 0.26531845331192017, + "loss_ib": 0.010740903206169605, + "step": 896 + }, + { + "ce_ib": 14.437392234802246, + "ce_orig": 1.9533960819244385, + "epoch": 0.25767488676396577, + "kl_loss": 0.3840804100036621, + "loss_ib": 0.018278196454048157, + "step": 896 + }, + { + "ce_ib": 12.922438621520996, + "ce_orig": 1.4912583827972412, + "epoch": 0.25767488676396577, + "kl_loss": 0.33412793278694153, + "loss_ib": 0.016263717785477638, + "step": 896 + }, + { + "ce_ib": 9.707921028137207, + "ce_orig": 1.1894207000732422, + "epoch": 0.25767488676396577, + "kl_loss": 0.2657119035720825, + "loss_ib": 0.012365040369331837, + "step": 896 + }, + { + "ce_ib": 9.557899475097656, + "ce_orig": 0.8531953692436218, + "epoch": 0.2579624703429434, + "kl_loss": 0.2381764054298401, + "loss_ib": 0.01193966343998909, + "step": 897 + }, + { + "ce_ib": 7.224669456481934, + "ce_orig": 1.0568636655807495, + "epoch": 0.2579624703429434, + "kl_loss": 0.2253345549106598, + "loss_ib": 0.009478014893829823, + "step": 897 + }, + { + "ce_ib": 9.596117973327637, + "ce_orig": 0.8501659035682678, + "epoch": 0.2579624703429434, + "kl_loss": 0.3259985148906708, + "loss_ib": 0.012856102548539639, + "step": 897 + }, + { + "ce_ib": 8.441751480102539, + "ce_orig": 0.8498370051383972, + "epoch": 0.2579624703429434, + "kl_loss": 0.34127742052078247, + "loss_ib": 0.011854525655508041, + "step": 897 + }, + { + "ce_ib": 9.776628494262695, + "ce_orig": 1.1606560945510864, + "epoch": 0.2582500539219211, + "kl_loss": 0.39107829332351685, + "loss_ib": 0.013687411323189735, + "step": 898 + }, + { + "ce_ib": 8.28480052947998, + "ce_orig": 0.8396362066268921, + "epoch": 0.2582500539219211, + "kl_loss": 0.5774872899055481, + "loss_ib": 0.014059673063457012, + "step": 898 + }, + { + "ce_ib": 5.942250728607178, + "ce_orig": 0.570982038974762, + "epoch": 0.2582500539219211, + "kl_loss": 0.35418200492858887, + "loss_ib": 0.009484071284532547, + "step": 898 + }, + { + "ce_ib": 13.332066535949707, + "ce_orig": 1.999030590057373, + "epoch": 0.2582500539219211, + "kl_loss": 0.32090240716934204, + "loss_ib": 0.016541089862585068, + "step": 898 + }, + { + "ce_ib": 6.9958176612854, + "ce_orig": 0.950183629989624, + "epoch": 0.2585376375008987, + "kl_loss": 0.2381882220506668, + "loss_ib": 0.00937770027667284, + "step": 899 + }, + { + "ce_ib": 10.04285717010498, + "ce_orig": 0.8781871199607849, + "epoch": 0.2585376375008987, + "kl_loss": 0.3415898382663727, + "loss_ib": 0.013458754867315292, + "step": 899 + }, + { + "ce_ib": 13.117063522338867, + "ce_orig": 1.5412489175796509, + "epoch": 0.2585376375008987, + "kl_loss": 0.28792649507522583, + "loss_ib": 0.015996329486370087, + "step": 899 + }, + { + "ce_ib": 5.770230293273926, + "ce_orig": 0.4059688448905945, + "epoch": 0.2585376375008987, + "kl_loss": 0.3441586196422577, + "loss_ib": 0.00921181682497263, + "step": 899 + }, + { + "epoch": 0.2588252210798763, + "grad_norm": 0.11080905795097351, + "learning_rate": 9.919172130878378e-06, + "loss": 0.8609, + "step": 900 + }, + { + "ce_ib": 7.81713342666626, + "ce_orig": 0.7552506923675537, + "epoch": 0.2588252210798763, + "kl_loss": 0.3844016194343567, + "loss_ib": 0.011661150492727757, + "step": 900 + }, + { + "ce_ib": 7.249913215637207, + "ce_orig": 0.968014657497406, + "epoch": 0.2588252210798763, + "kl_loss": 0.25877052545547485, + "loss_ib": 0.009837618097662926, + "step": 900 + }, + { + "ce_ib": 7.9647650718688965, + "ce_orig": 0.5272249579429626, + "epoch": 0.2588252210798763, + "kl_loss": 0.284597784280777, + "loss_ib": 0.01081074308604002, + "step": 900 + }, + { + "ce_ib": 8.811546325683594, + "ce_orig": 0.7540147304534912, + "epoch": 0.2588252210798763, + "kl_loss": 0.39285385608673096, + "loss_ib": 0.012740084901452065, + "step": 900 + }, + { + "ce_ib": 4.880631446838379, + "ce_orig": 0.5076504945755005, + "epoch": 0.259112804658854, + "kl_loss": 0.26206904649734497, + "loss_ib": 0.007501321844756603, + "step": 901 + }, + { + "ce_ib": 7.368719100952148, + "ce_orig": 0.6915988326072693, + "epoch": 0.259112804658854, + "kl_loss": 0.4998074769973755, + "loss_ib": 0.012366793118417263, + "step": 901 + }, + { + "ce_ib": 4.789905071258545, + "ce_orig": 0.756260335445404, + "epoch": 0.259112804658854, + "kl_loss": 0.2596268951892853, + "loss_ib": 0.00738617405295372, + "step": 901 + }, + { + "ce_ib": 6.979801177978516, + "ce_orig": 0.49811649322509766, + "epoch": 0.259112804658854, + "kl_loss": 0.3590124845504761, + "loss_ib": 0.010569925419986248, + "step": 901 + }, + { + "ce_ib": 5.86633825302124, + "ce_orig": 0.7878961563110352, + "epoch": 0.2594003882378316, + "kl_loss": 0.2769380211830139, + "loss_ib": 0.008635718375444412, + "step": 902 + }, + { + "ce_ib": 7.545517921447754, + "ce_orig": 0.9500758051872253, + "epoch": 0.2594003882378316, + "kl_loss": 0.21764595806598663, + "loss_ib": 0.009721977636218071, + "step": 902 + }, + { + "ce_ib": 6.535619258880615, + "ce_orig": 0.8012053966522217, + "epoch": 0.2594003882378316, + "kl_loss": 0.29037898778915405, + "loss_ib": 0.009439408779144287, + "step": 902 + }, + { + "ce_ib": 9.716675758361816, + "ce_orig": 0.9397892951965332, + "epoch": 0.2594003882378316, + "kl_loss": 0.3330523669719696, + "loss_ib": 0.01304719876497984, + "step": 902 + }, + { + "ce_ib": 4.242072105407715, + "ce_orig": 0.5148694515228271, + "epoch": 0.25968797181680925, + "kl_loss": 0.2478223443031311, + "loss_ib": 0.006720295175909996, + "step": 903 + }, + { + "ce_ib": 6.857370376586914, + "ce_orig": 0.48739153146743774, + "epoch": 0.25968797181680925, + "kl_loss": 0.3677447736263275, + "loss_ib": 0.010534818284213543, + "step": 903 + }, + { + "ce_ib": 5.933156967163086, + "ce_orig": 0.40238049626350403, + "epoch": 0.25968797181680925, + "kl_loss": 0.25679805874824524, + "loss_ib": 0.008501137606799603, + "step": 903 + }, + { + "ce_ib": 8.204896926879883, + "ce_orig": 0.7569003105163574, + "epoch": 0.25968797181680925, + "kl_loss": 0.2540472745895386, + "loss_ib": 0.010745369829237461, + "step": 903 + }, + { + "ce_ib": 6.275259494781494, + "ce_orig": 0.70445716381073, + "epoch": 0.25997555539578693, + "kl_loss": 0.3568292260169983, + "loss_ib": 0.009843551553785801, + "step": 904 + }, + { + "ce_ib": 9.74506664276123, + "ce_orig": 1.258253812789917, + "epoch": 0.25997555539578693, + "kl_loss": 0.2770775854587555, + "loss_ib": 0.012515842914581299, + "step": 904 + }, + { + "ce_ib": 10.810585021972656, + "ce_orig": 0.8011317849159241, + "epoch": 0.25997555539578693, + "kl_loss": 0.3040934205055237, + "loss_ib": 0.013851518742740154, + "step": 904 + }, + { + "ce_ib": 7.582912445068359, + "ce_orig": 0.9397713541984558, + "epoch": 0.25997555539578693, + "kl_loss": 0.22432610392570496, + "loss_ib": 0.00982617400586605, + "step": 904 + }, + { + "epoch": 0.26026313897476455, + "grad_norm": 0.09053654223680496, + "learning_rate": 9.917776388648748e-06, + "loss": 0.82, + "step": 905 + }, + { + "ce_ib": 10.179444313049316, + "ce_orig": 0.9755759835243225, + "epoch": 0.26026313897476455, + "kl_loss": 0.33106786012649536, + "loss_ib": 0.013490123674273491, + "step": 905 + }, + { + "ce_ib": 3.325618267059326, + "ce_orig": 0.16170339286327362, + "epoch": 0.26026313897476455, + "kl_loss": 0.5996187925338745, + "loss_ib": 0.009321806021034718, + "step": 905 + }, + { + "ce_ib": 7.778548240661621, + "ce_orig": 0.8278656005859375, + "epoch": 0.26026313897476455, + "kl_loss": 0.2725660502910614, + "loss_ib": 0.010504208505153656, + "step": 905 + }, + { + "ce_ib": 8.379356384277344, + "ce_orig": 0.9261234402656555, + "epoch": 0.26026313897476455, + "kl_loss": 0.378944993019104, + "loss_ib": 0.012168805114924908, + "step": 905 + }, + { + "ce_ib": 9.240448951721191, + "ce_orig": 0.6733576655387878, + "epoch": 0.2605507225537422, + "kl_loss": 0.4032820165157318, + "loss_ib": 0.013273268938064575, + "step": 906 + }, + { + "ce_ib": 12.491889953613281, + "ce_orig": 1.8018544912338257, + "epoch": 0.2605507225537422, + "kl_loss": 0.34695449471473694, + "loss_ib": 0.015961434692144394, + "step": 906 + }, + { + "ce_ib": 8.959487915039062, + "ce_orig": 1.4174104928970337, + "epoch": 0.2605507225537422, + "kl_loss": 0.30827200412750244, + "loss_ib": 0.012042207643389702, + "step": 906 + }, + { + "ce_ib": 7.630092620849609, + "ce_orig": 0.2582243084907532, + "epoch": 0.2605507225537422, + "kl_loss": 0.4620741903781891, + "loss_ib": 0.01225083414465189, + "step": 906 + }, + { + "ce_ib": 9.350717544555664, + "ce_orig": 0.8410260081291199, + "epoch": 0.2608383061327198, + "kl_loss": 0.27230745553970337, + "loss_ib": 0.012073791585862637, + "step": 907 + }, + { + "ce_ib": 5.942009925842285, + "ce_orig": 0.631314754486084, + "epoch": 0.2608383061327198, + "kl_loss": 0.23767834901809692, + "loss_ib": 0.008318793959915638, + "step": 907 + }, + { + "ce_ib": 7.381026744842529, + "ce_orig": 0.8023630380630493, + "epoch": 0.2608383061327198, + "kl_loss": 0.2556014060974121, + "loss_ib": 0.009937040507793427, + "step": 907 + }, + { + "ce_ib": 10.775535583496094, + "ce_orig": 1.5145833492279053, + "epoch": 0.2608383061327198, + "kl_loss": 0.3266568183898926, + "loss_ib": 0.014042104594409466, + "step": 907 + }, + { + "ce_ib": 6.754226207733154, + "ce_orig": 0.5556401014328003, + "epoch": 0.2611258897116975, + "kl_loss": 0.37610459327697754, + "loss_ib": 0.010515272617340088, + "step": 908 + }, + { + "ce_ib": 4.648565292358398, + "ce_orig": 0.42741912603378296, + "epoch": 0.2611258897116975, + "kl_loss": 0.2677002549171448, + "loss_ib": 0.007325568236410618, + "step": 908 + }, + { + "ce_ib": 9.743642807006836, + "ce_orig": 1.2022920846939087, + "epoch": 0.2611258897116975, + "kl_loss": 0.31233084201812744, + "loss_ib": 0.01286695059388876, + "step": 908 + }, + { + "ce_ib": 8.952857971191406, + "ce_orig": 1.0449740886688232, + "epoch": 0.2611258897116975, + "kl_loss": 0.290395051240921, + "loss_ib": 0.011856808327138424, + "step": 908 + }, + { + "ce_ib": 7.314600467681885, + "ce_orig": 0.487211138010025, + "epoch": 0.2614134732906751, + "kl_loss": 0.25061601400375366, + "loss_ib": 0.009820760227739811, + "step": 909 + }, + { + "ce_ib": 8.830986976623535, + "ce_orig": 0.7534437775611877, + "epoch": 0.2614134732906751, + "kl_loss": 0.276096910238266, + "loss_ib": 0.01159195601940155, + "step": 909 + }, + { + "ce_ib": 5.072354793548584, + "ce_orig": 0.7483262419700623, + "epoch": 0.2614134732906751, + "kl_loss": 0.2831575870513916, + "loss_ib": 0.007903930731117725, + "step": 909 + }, + { + "ce_ib": 6.34334659576416, + "ce_orig": 0.792799711227417, + "epoch": 0.2614134732906751, + "kl_loss": 0.27525418996810913, + "loss_ib": 0.009095888584852219, + "step": 909 + }, + { + "epoch": 0.2617010568696527, + "grad_norm": 0.09709301590919495, + "learning_rate": 9.916368798354356e-06, + "loss": 0.8731, + "step": 910 + }, + { + "ce_ib": 4.5776896476745605, + "ce_orig": 0.5359620451927185, + "epoch": 0.2617010568696527, + "kl_loss": 0.2551354765892029, + "loss_ib": 0.007129044272005558, + "step": 910 + }, + { + "ce_ib": 5.752841949462891, + "ce_orig": 0.4892864227294922, + "epoch": 0.2617010568696527, + "kl_loss": 0.28199148178100586, + "loss_ib": 0.008572756312787533, + "step": 910 + }, + { + "ce_ib": 7.317269325256348, + "ce_orig": 0.8823491930961609, + "epoch": 0.2617010568696527, + "kl_loss": 0.2838546335697174, + "loss_ib": 0.0101558156311512, + "step": 910 + }, + { + "ce_ib": 4.611359119415283, + "ce_orig": 0.6804866790771484, + "epoch": 0.2617010568696527, + "kl_loss": 0.24438399076461792, + "loss_ib": 0.007055198773741722, + "step": 910 + }, + { + "ce_ib": 6.782260894775391, + "ce_orig": 0.7364330887794495, + "epoch": 0.2619886404486304, + "kl_loss": 0.31710243225097656, + "loss_ib": 0.009953285567462444, + "step": 911 + }, + { + "ce_ib": 8.65565013885498, + "ce_orig": 1.165532112121582, + "epoch": 0.2619886404486304, + "kl_loss": 0.27839866280555725, + "loss_ib": 0.01143963634967804, + "step": 911 + }, + { + "ce_ib": 10.186466217041016, + "ce_orig": 0.9923035502433777, + "epoch": 0.2619886404486304, + "kl_loss": 0.34808266162872314, + "loss_ib": 0.013667291961610317, + "step": 911 + }, + { + "ce_ib": 7.47867488861084, + "ce_orig": 0.6916998028755188, + "epoch": 0.2619886404486304, + "kl_loss": 0.3453470766544342, + "loss_ib": 0.01093214564025402, + "step": 911 + }, + { + "ce_ib": 13.006325721740723, + "ce_orig": 1.4633382558822632, + "epoch": 0.26227622402760803, + "kl_loss": 0.23132070899009705, + "loss_ib": 0.015319532714784145, + "step": 912 + }, + { + "ce_ib": 6.6978654861450195, + "ce_orig": 0.7193461060523987, + "epoch": 0.26227622402760803, + "kl_loss": 0.24380119144916534, + "loss_ib": 0.009135877713561058, + "step": 912 + }, + { + "ce_ib": 7.76169490814209, + "ce_orig": 0.75163733959198, + "epoch": 0.26227622402760803, + "kl_loss": 0.27823013067245483, + "loss_ib": 0.010543995536863804, + "step": 912 + }, + { + "ce_ib": 10.430732727050781, + "ce_orig": 0.5616188049316406, + "epoch": 0.26227622402760803, + "kl_loss": 0.2974989116191864, + "loss_ib": 0.013405721634626389, + "step": 912 + }, + { + "ce_ib": 7.609768390655518, + "ce_orig": 0.4176271855831146, + "epoch": 0.26256380760658565, + "kl_loss": 0.38976022601127625, + "loss_ib": 0.011507370509207249, + "step": 913 + }, + { + "ce_ib": 11.438220977783203, + "ce_orig": 1.6095136404037476, + "epoch": 0.26256380760658565, + "kl_loss": 0.24979974329471588, + "loss_ib": 0.013936217874288559, + "step": 913 + }, + { + "ce_ib": 11.603028297424316, + "ce_orig": 1.417939305305481, + "epoch": 0.26256380760658565, + "kl_loss": 0.2228064239025116, + "loss_ib": 0.013831092976033688, + "step": 913 + }, + { + "ce_ib": 8.409954071044922, + "ce_orig": 0.827298104763031, + "epoch": 0.26256380760658565, + "kl_loss": 0.3605518341064453, + "loss_ib": 0.012015472166240215, + "step": 913 + }, + { + "ce_ib": 4.817587852478027, + "ce_orig": 0.4817865192890167, + "epoch": 0.2628513911855633, + "kl_loss": 0.25332438945770264, + "loss_ib": 0.007350832223892212, + "step": 914 + }, + { + "ce_ib": 8.813254356384277, + "ce_orig": 0.6531316041946411, + "epoch": 0.2628513911855633, + "kl_loss": 0.42108702659606934, + "loss_ib": 0.01302412524819374, + "step": 914 + }, + { + "ce_ib": 11.274821281433105, + "ce_orig": 1.3683724403381348, + "epoch": 0.2628513911855633, + "kl_loss": 0.42044275999069214, + "loss_ib": 0.015479249879717827, + "step": 914 + }, + { + "ce_ib": 4.2823686599731445, + "ce_orig": 0.432784765958786, + "epoch": 0.2628513911855633, + "kl_loss": 0.5916892290115356, + "loss_ib": 0.010199260897934437, + "step": 914 + }, + { + "epoch": 0.26313897476454096, + "grad_norm": 0.104263536632061, + "learning_rate": 9.914949363386417e-06, + "loss": 0.9239, + "step": 915 + }, + { + "ce_ib": 10.224993705749512, + "ce_orig": 0.7435204982757568, + "epoch": 0.26313897476454096, + "kl_loss": 0.48760131001472473, + "loss_ib": 0.01510100718587637, + "step": 915 + }, + { + "ce_ib": 5.030897617340088, + "ce_orig": 0.5878293514251709, + "epoch": 0.26313897476454096, + "kl_loss": 0.2752516567707062, + "loss_ib": 0.007783413864672184, + "step": 915 + }, + { + "ce_ib": 9.2077054977417, + "ce_orig": 1.0866307020187378, + "epoch": 0.26313897476454096, + "kl_loss": 0.27611714601516724, + "loss_ib": 0.011968877166509628, + "step": 915 + }, + { + "ce_ib": 7.913527965545654, + "ce_orig": 0.6080297827720642, + "epoch": 0.26313897476454096, + "kl_loss": 0.36963915824890137, + "loss_ib": 0.011609918437898159, + "step": 915 + }, + { + "ce_ib": 4.733314037322998, + "ce_orig": 0.3992489278316498, + "epoch": 0.2634265583435186, + "kl_loss": 0.2293146848678589, + "loss_ib": 0.0070264614187181, + "step": 916 + }, + { + "ce_ib": 9.097807884216309, + "ce_orig": 0.9395797848701477, + "epoch": 0.2634265583435186, + "kl_loss": 0.2749103009700775, + "loss_ib": 0.01184691023081541, + "step": 916 + }, + { + "ce_ib": 5.224862098693848, + "ce_orig": 0.49672380089759827, + "epoch": 0.2634265583435186, + "kl_loss": 0.3457143306732178, + "loss_ib": 0.008682005107402802, + "step": 916 + }, + { + "ce_ib": 3.887174367904663, + "ce_orig": 0.37023693323135376, + "epoch": 0.2634265583435186, + "kl_loss": 0.24267533421516418, + "loss_ib": 0.006313927471637726, + "step": 916 + }, + { + "ce_ib": 6.222542762756348, + "ce_orig": 0.5573530197143555, + "epoch": 0.2637141419224962, + "kl_loss": 0.30178603529930115, + "loss_ib": 0.009240402840077877, + "step": 917 + }, + { + "ce_ib": 9.022146224975586, + "ce_orig": 1.252068042755127, + "epoch": 0.2637141419224962, + "kl_loss": 0.26920855045318604, + "loss_ib": 0.01171423215419054, + "step": 917 + }, + { + "ce_ib": 7.715620994567871, + "ce_orig": 0.8185163140296936, + "epoch": 0.2637141419224962, + "kl_loss": 0.4483085572719574, + "loss_ib": 0.012198706157505512, + "step": 917 + }, + { + "ce_ib": 6.617023944854736, + "ce_orig": 1.0761107206344604, + "epoch": 0.2637141419224962, + "kl_loss": 0.3058355748653412, + "loss_ib": 0.00967537984251976, + "step": 917 + }, + { + "ce_ib": 5.220884799957275, + "ce_orig": 0.7539888620376587, + "epoch": 0.2640017255014739, + "kl_loss": 0.26427191495895386, + "loss_ib": 0.0078636035323143, + "step": 918 + }, + { + "ce_ib": 7.620891571044922, + "ce_orig": 0.7659056782722473, + "epoch": 0.2640017255014739, + "kl_loss": 0.30664142966270447, + "loss_ib": 0.010687305592000484, + "step": 918 + }, + { + "ce_ib": 8.80104923248291, + "ce_orig": 1.0766273736953735, + "epoch": 0.2640017255014739, + "kl_loss": 0.370442271232605, + "loss_ib": 0.01250547170639038, + "step": 918 + }, + { + "ce_ib": 12.601346969604492, + "ce_orig": 0.6492716073989868, + "epoch": 0.2640017255014739, + "kl_loss": 0.2531167268753052, + "loss_ib": 0.015132513828575611, + "step": 918 + }, + { + "ce_ib": 7.953497409820557, + "ce_orig": 0.7863147854804993, + "epoch": 0.2642893090804515, + "kl_loss": 0.25480780005455017, + "loss_ib": 0.010501575656235218, + "step": 919 + }, + { + "ce_ib": 9.819870948791504, + "ce_orig": 0.8340607285499573, + "epoch": 0.2642893090804515, + "kl_loss": 0.3362913727760315, + "loss_ib": 0.013182785362005234, + "step": 919 + }, + { + "ce_ib": 7.872880935668945, + "ce_orig": 1.1914820671081543, + "epoch": 0.2642893090804515, + "kl_loss": 0.2527698874473572, + "loss_ib": 0.010400580242276192, + "step": 919 + }, + { + "ce_ib": 10.222329139709473, + "ce_orig": 1.3306633234024048, + "epoch": 0.2642893090804515, + "kl_loss": 0.2606678009033203, + "loss_ib": 0.012829006649553776, + "step": 919 + }, + { + "epoch": 0.26457689265942913, + "grad_norm": 0.11017937958240509, + "learning_rate": 9.913518087164678e-06, + "loss": 0.8505, + "step": 920 + }, + { + "ce_ib": 7.44199800491333, + "ce_orig": 0.5371276140213013, + "epoch": 0.26457689265942913, + "kl_loss": 0.42192068696022034, + "loss_ib": 0.011661205440759659, + "step": 920 + }, + { + "ce_ib": 9.151060104370117, + "ce_orig": 1.068630337715149, + "epoch": 0.26457689265942913, + "kl_loss": 0.31581708788871765, + "loss_ib": 0.01230922993272543, + "step": 920 + }, + { + "ce_ib": 9.204903602600098, + "ce_orig": 0.909243643283844, + "epoch": 0.26457689265942913, + "kl_loss": 0.31501439213752747, + "loss_ib": 0.012355047278106213, + "step": 920 + }, + { + "ce_ib": 9.801901817321777, + "ce_orig": 0.9215630888938904, + "epoch": 0.26457689265942913, + "kl_loss": 0.3821442127227783, + "loss_ib": 0.013623344711959362, + "step": 920 + }, + { + "ce_ib": 9.883593559265137, + "ce_orig": 1.3681849241256714, + "epoch": 0.2648644762384068, + "kl_loss": 0.4136194586753845, + "loss_ib": 0.01401978824287653, + "step": 921 + }, + { + "ce_ib": 2.2384650707244873, + "ce_orig": 0.09530481696128845, + "epoch": 0.2648644762384068, + "kl_loss": 0.5867444276809692, + "loss_ib": 0.008105909451842308, + "step": 921 + }, + { + "ce_ib": 10.180109977722168, + "ce_orig": 0.8450271487236023, + "epoch": 0.2648644762384068, + "kl_loss": 0.2972285747528076, + "loss_ib": 0.013152395375072956, + "step": 921 + }, + { + "ce_ib": 7.981590270996094, + "ce_orig": 0.49811968207359314, + "epoch": 0.2648644762384068, + "kl_loss": 0.41247886419296265, + "loss_ib": 0.012106378562748432, + "step": 921 + }, + { + "ce_ib": 11.12768840789795, + "ce_orig": 1.3055758476257324, + "epoch": 0.26515205981738443, + "kl_loss": 0.4190482497215271, + "loss_ib": 0.015318172052502632, + "step": 922 + }, + { + "ce_ib": 6.9174723625183105, + "ce_orig": 0.5914834141731262, + "epoch": 0.26515205981738443, + "kl_loss": 0.275257408618927, + "loss_ib": 0.009670046158134937, + "step": 922 + }, + { + "ce_ib": 5.110889911651611, + "ce_orig": 0.4252597987651825, + "epoch": 0.26515205981738443, + "kl_loss": 0.25920987129211426, + "loss_ib": 0.007702989038079977, + "step": 922 + }, + { + "ce_ib": 9.542389869689941, + "ce_orig": 1.1555229425430298, + "epoch": 0.26515205981738443, + "kl_loss": 0.27457690238952637, + "loss_ib": 0.012288158759474754, + "step": 922 + }, + { + "ce_ib": 5.449411392211914, + "ce_orig": 0.7608138918876648, + "epoch": 0.26543964339636206, + "kl_loss": 0.2544456422328949, + "loss_ib": 0.007993867620825768, + "step": 923 + }, + { + "ce_ib": 5.343168258666992, + "ce_orig": 0.675761342048645, + "epoch": 0.26543964339636206, + "kl_loss": 0.2828470468521118, + "loss_ib": 0.00817163847386837, + "step": 923 + }, + { + "ce_ib": 5.600818157196045, + "ce_orig": 0.23253723978996277, + "epoch": 0.26543964339636206, + "kl_loss": 0.532296359539032, + "loss_ib": 0.010923781432211399, + "step": 923 + }, + { + "ce_ib": 7.541820526123047, + "ce_orig": 0.9155359268188477, + "epoch": 0.26543964339636206, + "kl_loss": 0.27482742071151733, + "loss_ib": 0.010290094651281834, + "step": 923 + }, + { + "ce_ib": 7.052559852600098, + "ce_orig": 0.744045078754425, + "epoch": 0.2657272269753397, + "kl_loss": 0.2872994840145111, + "loss_ib": 0.009925554506480694, + "step": 924 + }, + { + "ce_ib": 13.817998886108398, + "ce_orig": 1.8272193670272827, + "epoch": 0.2657272269753397, + "kl_loss": 0.3623213469982147, + "loss_ib": 0.017441213130950928, + "step": 924 + }, + { + "ce_ib": 10.127370834350586, + "ce_orig": 1.1621817350387573, + "epoch": 0.2657272269753397, + "kl_loss": 0.30838003754615784, + "loss_ib": 0.013211171142756939, + "step": 924 + }, + { + "ce_ib": 7.752861499786377, + "ce_orig": 0.7536821961402893, + "epoch": 0.2657272269753397, + "kl_loss": 0.3602597415447235, + "loss_ib": 0.011355457827448845, + "step": 924 + }, + { + "epoch": 0.26601481055431736, + "grad_norm": 0.11023896187543869, + "learning_rate": 9.912074973137413e-06, + "loss": 0.9011, + "step": 925 + }, + { + "ce_ib": 6.1034836769104, + "ce_orig": 0.6622889041900635, + "epoch": 0.26601481055431736, + "kl_loss": 0.2770423889160156, + "loss_ib": 0.00887390784919262, + "step": 925 + }, + { + "ce_ib": 8.66541576385498, + "ce_orig": 0.6636568903923035, + "epoch": 0.26601481055431736, + "kl_loss": 0.35214751958847046, + "loss_ib": 0.012186890468001366, + "step": 925 + }, + { + "ce_ib": 8.501686096191406, + "ce_orig": 1.0294500589370728, + "epoch": 0.26601481055431736, + "kl_loss": 0.32191595435142517, + "loss_ib": 0.011720845475792885, + "step": 925 + }, + { + "ce_ib": 8.163397789001465, + "ce_orig": 0.8285762071609497, + "epoch": 0.26601481055431736, + "kl_loss": 0.29768481850624084, + "loss_ib": 0.011140245944261551, + "step": 925 + }, + { + "ce_ib": 6.980233669281006, + "ce_orig": 0.7328478693962097, + "epoch": 0.266302394133295, + "kl_loss": 0.24676677584648132, + "loss_ib": 0.009447900578379631, + "step": 926 + }, + { + "ce_ib": 7.14872407913208, + "ce_orig": 0.8223768472671509, + "epoch": 0.266302394133295, + "kl_loss": 0.30274513363838196, + "loss_ib": 0.010176175273954868, + "step": 926 + }, + { + "ce_ib": 7.805938720703125, + "ce_orig": 0.9373571276664734, + "epoch": 0.266302394133295, + "kl_loss": 0.25375306606292725, + "loss_ib": 0.010343468748033047, + "step": 926 + }, + { + "ce_ib": 4.937993049621582, + "ce_orig": 0.6246491074562073, + "epoch": 0.266302394133295, + "kl_loss": 0.25294995307922363, + "loss_ib": 0.007467492483556271, + "step": 926 + }, + { + "ce_ib": 5.480818271636963, + "ce_orig": 0.5673008561134338, + "epoch": 0.2665899777122726, + "kl_loss": 0.2302064150571823, + "loss_ib": 0.0077828820794820786, + "step": 927 + }, + { + "ce_ib": 6.405802249908447, + "ce_orig": 0.5960977077484131, + "epoch": 0.2665899777122726, + "kl_loss": 0.2936103045940399, + "loss_ib": 0.009341904893517494, + "step": 927 + }, + { + "ce_ib": 11.765841484069824, + "ce_orig": 1.8276900053024292, + "epoch": 0.2665899777122726, + "kl_loss": 0.22832275927066803, + "loss_ib": 0.014049068093299866, + "step": 927 + }, + { + "ce_ib": 10.557626724243164, + "ce_orig": 1.3219131231307983, + "epoch": 0.2665899777122726, + "kl_loss": 0.31494155526161194, + "loss_ib": 0.01370704174041748, + "step": 927 + }, + { + "ce_ib": 7.791426181793213, + "ce_orig": 1.044633388519287, + "epoch": 0.2668775612912503, + "kl_loss": 0.22664925456047058, + "loss_ib": 0.010057918727397919, + "step": 928 + }, + { + "ce_ib": 5.639753341674805, + "ce_orig": 0.7303126454353333, + "epoch": 0.2668775612912503, + "kl_loss": 0.265280544757843, + "loss_ib": 0.00829255860298872, + "step": 928 + }, + { + "ce_ib": 9.726358413696289, + "ce_orig": 1.371625304222107, + "epoch": 0.2668775612912503, + "kl_loss": 0.2165038287639618, + "loss_ib": 0.011891396716237068, + "step": 928 + }, + { + "ce_ib": 6.140399932861328, + "ce_orig": 0.6564132571220398, + "epoch": 0.2668775612912503, + "kl_loss": 0.2733922004699707, + "loss_ib": 0.008874322287738323, + "step": 928 + }, + { + "ce_ib": 6.607884883880615, + "ce_orig": 0.5726844072341919, + "epoch": 0.2671651448702279, + "kl_loss": 0.36948060989379883, + "loss_ib": 0.010302690789103508, + "step": 929 + }, + { + "ce_ib": 8.565917015075684, + "ce_orig": 0.6998363733291626, + "epoch": 0.2671651448702279, + "kl_loss": 0.281715989112854, + "loss_ib": 0.011383076198399067, + "step": 929 + }, + { + "ce_ib": 5.31778621673584, + "ce_orig": 0.3831661343574524, + "epoch": 0.2671651448702279, + "kl_loss": 0.47300854325294495, + "loss_ib": 0.010047871619462967, + "step": 929 + }, + { + "ce_ib": 11.917003631591797, + "ce_orig": 1.680348515510559, + "epoch": 0.2671651448702279, + "kl_loss": 0.4127323031425476, + "loss_ib": 0.01604432612657547, + "step": 929 + }, + { + "epoch": 0.26745272844920553, + "grad_norm": 0.10447093099355698, + "learning_rate": 9.910620024781422e-06, + "loss": 0.9509, + "step": 930 + }, + { + "ce_ib": 11.539644241333008, + "ce_orig": 1.3957158327102661, + "epoch": 0.26745272844920553, + "kl_loss": 0.28435567021369934, + "loss_ib": 0.014383199624717236, + "step": 930 + }, + { + "ce_ib": 7.828366756439209, + "ce_orig": 1.2391057014465332, + "epoch": 0.26745272844920553, + "kl_loss": 0.2012348175048828, + "loss_ib": 0.009840714745223522, + "step": 930 + }, + { + "ce_ib": 8.055999755859375, + "ce_orig": 0.834807276725769, + "epoch": 0.26745272844920553, + "kl_loss": 0.3234785199165344, + "loss_ib": 0.011290784925222397, + "step": 930 + }, + { + "ce_ib": 2.334864854812622, + "ce_orig": 0.1836487501859665, + "epoch": 0.26745272844920553, + "kl_loss": 0.6265314817428589, + "loss_ib": 0.008600179105997086, + "step": 930 + }, + { + "ce_ib": 6.897594928741455, + "ce_orig": 0.6649829745292664, + "epoch": 0.2677403120281832, + "kl_loss": 0.2482946813106537, + "loss_ib": 0.009380541741847992, + "step": 931 + }, + { + "ce_ib": 2.828857660293579, + "ce_orig": 0.37604042887687683, + "epoch": 0.2677403120281832, + "kl_loss": 0.566237211227417, + "loss_ib": 0.008491230197250843, + "step": 931 + }, + { + "ce_ib": 10.757418632507324, + "ce_orig": 1.2388101816177368, + "epoch": 0.2677403120281832, + "kl_loss": 0.3185473084449768, + "loss_ib": 0.013942892663180828, + "step": 931 + }, + { + "ce_ib": 5.737816333770752, + "ce_orig": 0.8773601651191711, + "epoch": 0.2677403120281832, + "kl_loss": 0.28802287578582764, + "loss_ib": 0.008618045598268509, + "step": 931 + }, + { + "ce_ib": 5.9482855796813965, + "ce_orig": 0.7482618689537048, + "epoch": 0.26802789560716084, + "kl_loss": 0.36070847511291504, + "loss_ib": 0.00955536961555481, + "step": 932 + }, + { + "ce_ib": 15.711875915527344, + "ce_orig": 2.0872745513916016, + "epoch": 0.26802789560716084, + "kl_loss": 0.3053112328052521, + "loss_ib": 0.018764987587928772, + "step": 932 + }, + { + "ce_ib": 7.429306507110596, + "ce_orig": 0.7280347347259521, + "epoch": 0.26802789560716084, + "kl_loss": 0.2755368649959564, + "loss_ib": 0.010184675455093384, + "step": 932 + }, + { + "ce_ib": 6.810687065124512, + "ce_orig": 0.5476087927818298, + "epoch": 0.26802789560716084, + "kl_loss": 0.254897803068161, + "loss_ib": 0.009359664283692837, + "step": 932 + }, + { + "ce_ib": 4.790681838989258, + "ce_orig": 0.6239033341407776, + "epoch": 0.26831547918613846, + "kl_loss": 0.2333354651927948, + "loss_ib": 0.007124036550521851, + "step": 933 + }, + { + "ce_ib": 10.430938720703125, + "ce_orig": 1.3715506792068481, + "epoch": 0.26831547918613846, + "kl_loss": 0.3643788695335388, + "loss_ib": 0.014074727892875671, + "step": 933 + }, + { + "ce_ib": 8.228853225708008, + "ce_orig": 0.7698665857315063, + "epoch": 0.26831547918613846, + "kl_loss": 0.25796443223953247, + "loss_ib": 0.010808497667312622, + "step": 933 + }, + { + "ce_ib": 10.43002986907959, + "ce_orig": 0.6817733645439148, + "epoch": 0.26831547918613846, + "kl_loss": 0.40837064385414124, + "loss_ib": 0.01451373565942049, + "step": 933 + }, + { + "ce_ib": 6.217407703399658, + "ce_orig": 0.5401474237442017, + "epoch": 0.2686030627651161, + "kl_loss": 0.244707390666008, + "loss_ib": 0.008664481341838837, + "step": 934 + }, + { + "ce_ib": 6.634978294372559, + "ce_orig": 0.7534041404724121, + "epoch": 0.2686030627651161, + "kl_loss": 0.2863770127296448, + "loss_ib": 0.009498748928308487, + "step": 934 + }, + { + "ce_ib": 6.577731132507324, + "ce_orig": 0.7866048812866211, + "epoch": 0.2686030627651161, + "kl_loss": 0.3120877146720886, + "loss_ib": 0.009698607958853245, + "step": 934 + }, + { + "ce_ib": 6.873836994171143, + "ce_orig": 0.6249902248382568, + "epoch": 0.2686030627651161, + "kl_loss": 0.2403496652841568, + "loss_ib": 0.00927733350545168, + "step": 934 + }, + { + "epoch": 0.26889064634409376, + "grad_norm": 0.09980176389217377, + "learning_rate": 9.909153245602012e-06, + "loss": 0.8424, + "step": 935 + }, + { + "ce_ib": 8.975276947021484, + "ce_orig": 1.0671329498291016, + "epoch": 0.26889064634409376, + "kl_loss": 0.31780779361724854, + "loss_ib": 0.012153354473412037, + "step": 935 + }, + { + "ce_ib": 7.080759048461914, + "ce_orig": 0.6667758226394653, + "epoch": 0.26889064634409376, + "kl_loss": 0.4114856719970703, + "loss_ib": 0.01119561679661274, + "step": 935 + }, + { + "ce_ib": 8.725547790527344, + "ce_orig": 0.8014039993286133, + "epoch": 0.26889064634409376, + "kl_loss": 0.23206469416618347, + "loss_ib": 0.011046194471418858, + "step": 935 + }, + { + "ce_ib": 6.310739040374756, + "ce_orig": 0.6323633193969727, + "epoch": 0.26889064634409376, + "kl_loss": 0.3221661448478699, + "loss_ib": 0.009532400406897068, + "step": 935 + }, + { + "ce_ib": 6.315284729003906, + "ce_orig": 0.5775780081748962, + "epoch": 0.2691782299230714, + "kl_loss": 0.29390376806259155, + "loss_ib": 0.00925432238727808, + "step": 936 + }, + { + "ce_ib": 4.208686828613281, + "ce_orig": 0.4354095160961151, + "epoch": 0.2691782299230714, + "kl_loss": 0.297516405582428, + "loss_ib": 0.00718385074287653, + "step": 936 + }, + { + "ce_ib": 8.687440872192383, + "ce_orig": 0.6239301562309265, + "epoch": 0.2691782299230714, + "kl_loss": 0.30756255984306335, + "loss_ib": 0.011763066053390503, + "step": 936 + }, + { + "ce_ib": 8.584075927734375, + "ce_orig": 1.136924386024475, + "epoch": 0.2691782299230714, + "kl_loss": 0.2599954903125763, + "loss_ib": 0.011184030212461948, + "step": 936 + }, + { + "ce_ib": 5.511157035827637, + "ce_orig": 0.6932129859924316, + "epoch": 0.269465813502049, + "kl_loss": 0.27716803550720215, + "loss_ib": 0.008282837457954884, + "step": 937 + }, + { + "ce_ib": 9.595725059509277, + "ce_orig": 0.9539211988449097, + "epoch": 0.269465813502049, + "kl_loss": 0.24416208267211914, + "loss_ib": 0.012037346139550209, + "step": 937 + }, + { + "ce_ib": 6.6203083992004395, + "ce_orig": 0.9232450127601624, + "epoch": 0.269465813502049, + "kl_loss": 0.20647379755973816, + "loss_ib": 0.008685045875608921, + "step": 937 + }, + { + "ce_ib": 7.428555488586426, + "ce_orig": 0.48220083117485046, + "epoch": 0.269465813502049, + "kl_loss": 0.35726824402809143, + "loss_ib": 0.01100123766809702, + "step": 937 + }, + { + "ce_ib": 8.365070343017578, + "ce_orig": 0.43749791383743286, + "epoch": 0.2697533970810267, + "kl_loss": 0.30819666385650635, + "loss_ib": 0.01144703570753336, + "step": 938 + }, + { + "ce_ib": 8.154250144958496, + "ce_orig": 0.9003611207008362, + "epoch": 0.2697533970810267, + "kl_loss": 0.4072403311729431, + "loss_ib": 0.012226653285324574, + "step": 938 + }, + { + "ce_ib": 6.375957012176514, + "ce_orig": 0.6323530673980713, + "epoch": 0.2697533970810267, + "kl_loss": 0.32399243116378784, + "loss_ib": 0.009615881368517876, + "step": 938 + }, + { + "ce_ib": 7.025842666625977, + "ce_orig": 0.43968331813812256, + "epoch": 0.2697533970810267, + "kl_loss": 0.312505841255188, + "loss_ib": 0.010150901041924953, + "step": 938 + }, + { + "ce_ib": 7.08709192276001, + "ce_orig": 0.5549724698066711, + "epoch": 0.2700409806600043, + "kl_loss": 0.4219147861003876, + "loss_ib": 0.011306239292025566, + "step": 939 + }, + { + "ce_ib": 7.558558464050293, + "ce_orig": 1.0065475702285767, + "epoch": 0.2700409806600043, + "kl_loss": 0.2504430413246155, + "loss_ib": 0.010062988847494125, + "step": 939 + }, + { + "ce_ib": 6.018697738647461, + "ce_orig": 0.8349258899688721, + "epoch": 0.2700409806600043, + "kl_loss": 0.20029550790786743, + "loss_ib": 0.008021652698516846, + "step": 939 + }, + { + "ce_ib": 10.575093269348145, + "ce_orig": 1.4976614713668823, + "epoch": 0.2700409806600043, + "kl_loss": 0.3008938133716583, + "loss_ib": 0.013584030792117119, + "step": 939 + }, + { + "epoch": 0.27032856423898194, + "grad_norm": 0.10055164247751236, + "learning_rate": 9.907674639132995e-06, + "loss": 0.8408, + "step": 940 + }, + { + "ce_ib": 4.028119087219238, + "ce_orig": 0.4562585949897766, + "epoch": 0.27032856423898194, + "kl_loss": 0.2093207985162735, + "loss_ib": 0.006121327169239521, + "step": 940 + }, + { + "ce_ib": 6.0146613121032715, + "ce_orig": 0.6793175935745239, + "epoch": 0.27032856423898194, + "kl_loss": 0.21066246926784515, + "loss_ib": 0.00812128558754921, + "step": 940 + }, + { + "ce_ib": 8.57176399230957, + "ce_orig": 1.1877782344818115, + "epoch": 0.27032856423898194, + "kl_loss": 0.32324960827827454, + "loss_ib": 0.01180425938218832, + "step": 940 + }, + { + "ce_ib": 10.997118949890137, + "ce_orig": 1.800614595413208, + "epoch": 0.27032856423898194, + "kl_loss": 0.2794951796531677, + "loss_ib": 0.013792071491479874, + "step": 940 + }, + { + "ce_ib": 7.437707424163818, + "ce_orig": 1.0083261728286743, + "epoch": 0.2706161478179596, + "kl_loss": 0.21823345124721527, + "loss_ib": 0.009620042517781258, + "step": 941 + }, + { + "ce_ib": 6.081821441650391, + "ce_orig": 0.7146956324577332, + "epoch": 0.2706161478179596, + "kl_loss": 0.23236779868602753, + "loss_ib": 0.008405499160289764, + "step": 941 + }, + { + "ce_ib": 10.360185623168945, + "ce_orig": 1.2722724676132202, + "epoch": 0.2706161478179596, + "kl_loss": 0.4324212670326233, + "loss_ib": 0.014684397727251053, + "step": 941 + }, + { + "ce_ib": 8.173184394836426, + "ce_orig": 0.3789297640323639, + "epoch": 0.2706161478179596, + "kl_loss": 0.318384051322937, + "loss_ib": 0.011357024312019348, + "step": 941 + }, + { + "ce_ib": 7.6668782234191895, + "ce_orig": 0.6972788572311401, + "epoch": 0.27090373139693724, + "kl_loss": 0.40589210391044617, + "loss_ib": 0.01172579824924469, + "step": 942 + }, + { + "ce_ib": 8.189241409301758, + "ce_orig": 0.7313915491104126, + "epoch": 0.27090373139693724, + "kl_loss": 0.28188782930374146, + "loss_ib": 0.011008119210600853, + "step": 942 + }, + { + "ce_ib": 7.981906414031982, + "ce_orig": 0.9324852824211121, + "epoch": 0.27090373139693724, + "kl_loss": 0.3256570100784302, + "loss_ib": 0.011238477192819118, + "step": 942 + }, + { + "ce_ib": 8.018670082092285, + "ce_orig": 0.8148074150085449, + "epoch": 0.27090373139693724, + "kl_loss": 0.3132972717285156, + "loss_ib": 0.01115164253860712, + "step": 942 + }, + { + "ce_ib": 8.356046676635742, + "ce_orig": 0.5797178745269775, + "epoch": 0.27119131497591487, + "kl_loss": 0.4871473014354706, + "loss_ib": 0.013227519579231739, + "step": 943 + }, + { + "ce_ib": 10.591626167297363, + "ce_orig": 1.4526135921478271, + "epoch": 0.27119131497591487, + "kl_loss": 0.374215304851532, + "loss_ib": 0.01433377992361784, + "step": 943 + }, + { + "ce_ib": 6.816173553466797, + "ce_orig": 0.7827908396720886, + "epoch": 0.27119131497591487, + "kl_loss": 0.31245943903923035, + "loss_ib": 0.009940768592059612, + "step": 943 + }, + { + "ce_ib": 8.920738220214844, + "ce_orig": 1.1158427000045776, + "epoch": 0.27119131497591487, + "kl_loss": 0.3286566436290741, + "loss_ib": 0.012207304127514362, + "step": 943 + }, + { + "ce_ib": 9.12059211730957, + "ce_orig": 1.1127867698669434, + "epoch": 0.2714788985548925, + "kl_loss": 0.5925597548484802, + "loss_ib": 0.015046189539134502, + "step": 944 + }, + { + "ce_ib": 7.13171911239624, + "ce_orig": 0.7354855537414551, + "epoch": 0.2714788985548925, + "kl_loss": 0.3313751220703125, + "loss_ib": 0.010445470921695232, + "step": 944 + }, + { + "ce_ib": 5.612328052520752, + "ce_orig": 0.6953266859054565, + "epoch": 0.2714788985548925, + "kl_loss": 0.22593432664871216, + "loss_ib": 0.007871671579778194, + "step": 944 + }, + { + "ce_ib": 7.506075859069824, + "ce_orig": 0.7433436512947083, + "epoch": 0.2714788985548925, + "kl_loss": 0.3743892014026642, + "loss_ib": 0.011249967850744724, + "step": 944 + }, + { + "epoch": 0.27176648213387017, + "grad_norm": 0.10328007489442825, + "learning_rate": 9.906184208936675e-06, + "loss": 0.8559, + "step": 945 + }, + { + "ce_ib": 6.606997966766357, + "ce_orig": 0.9594669342041016, + "epoch": 0.27176648213387017, + "kl_loss": 0.3110538423061371, + "loss_ib": 0.00971753615885973, + "step": 945 + }, + { + "ce_ib": 7.5082621574401855, + "ce_orig": 0.7433197498321533, + "epoch": 0.27176648213387017, + "kl_loss": 0.3739091157913208, + "loss_ib": 0.011247353628277779, + "step": 945 + }, + { + "ce_ib": 6.4652605056762695, + "ce_orig": 0.44874706864356995, + "epoch": 0.27176648213387017, + "kl_loss": 0.3493693470954895, + "loss_ib": 0.009958953596651554, + "step": 945 + }, + { + "ce_ib": 11.719304084777832, + "ce_orig": 1.1819493770599365, + "epoch": 0.27176648213387017, + "kl_loss": 0.2937600314617157, + "loss_ib": 0.01465690415352583, + "step": 945 + }, + { + "ce_ib": 12.488480567932129, + "ce_orig": 1.4912511110305786, + "epoch": 0.2720540657128478, + "kl_loss": 0.3444375991821289, + "loss_ib": 0.015932856127619743, + "step": 946 + }, + { + "ce_ib": 6.640789985656738, + "ce_orig": 0.7050909399986267, + "epoch": 0.2720540657128478, + "kl_loss": 0.2555736005306244, + "loss_ib": 0.009196525439620018, + "step": 946 + }, + { + "ce_ib": 4.94317626953125, + "ce_orig": 0.5460999608039856, + "epoch": 0.2720540657128478, + "kl_loss": 0.28116828203201294, + "loss_ib": 0.007754858583211899, + "step": 946 + }, + { + "ce_ib": 12.355217933654785, + "ce_orig": 1.9291751384735107, + "epoch": 0.2720540657128478, + "kl_loss": 0.3321114778518677, + "loss_ib": 0.015676332637667656, + "step": 946 + }, + { + "ce_ib": 9.124369621276855, + "ce_orig": 1.180700659751892, + "epoch": 0.2723416492918254, + "kl_loss": 0.2571544945240021, + "loss_ib": 0.011695913970470428, + "step": 947 + }, + { + "ce_ib": 4.049078941345215, + "ce_orig": 0.46376824378967285, + "epoch": 0.2723416492918254, + "kl_loss": 0.5594756007194519, + "loss_ib": 0.00964383501559496, + "step": 947 + }, + { + "ce_ib": 6.723659515380859, + "ce_orig": 0.8255409002304077, + "epoch": 0.2723416492918254, + "kl_loss": 0.5856155157089233, + "loss_ib": 0.012579815462231636, + "step": 947 + }, + { + "ce_ib": 6.566412925720215, + "ce_orig": 0.3921998143196106, + "epoch": 0.2723416492918254, + "kl_loss": 0.8435590863227844, + "loss_ib": 0.015002003870904446, + "step": 947 + }, + { + "ce_ib": 7.8375725746154785, + "ce_orig": 1.2053213119506836, + "epoch": 0.2726292328708031, + "kl_loss": 0.3942751884460449, + "loss_ib": 0.011780323460698128, + "step": 948 + }, + { + "ce_ib": 6.784183025360107, + "ce_orig": 0.8397347331047058, + "epoch": 0.2726292328708031, + "kl_loss": 0.3014325499534607, + "loss_ib": 0.009798509068787098, + "step": 948 + }, + { + "ce_ib": 9.067359924316406, + "ce_orig": 0.6075240969657898, + "epoch": 0.2726292328708031, + "kl_loss": 0.3293374180793762, + "loss_ib": 0.01236073486506939, + "step": 948 + }, + { + "ce_ib": 8.427669525146484, + "ce_orig": 0.67606520652771, + "epoch": 0.2726292328708031, + "kl_loss": 0.411496102809906, + "loss_ib": 0.012542630545794964, + "step": 948 + }, + { + "ce_ib": 9.243931770324707, + "ce_orig": 0.8393007516860962, + "epoch": 0.2729168164497807, + "kl_loss": 0.31467002630233765, + "loss_ib": 0.012390632182359695, + "step": 949 + }, + { + "ce_ib": 10.456159591674805, + "ce_orig": 1.1939802169799805, + "epoch": 0.2729168164497807, + "kl_loss": 0.30369293689727783, + "loss_ib": 0.013493089005351067, + "step": 949 + }, + { + "ce_ib": 3.625389337539673, + "ce_orig": 0.4330550730228424, + "epoch": 0.2729168164497807, + "kl_loss": 0.21511869132518768, + "loss_ib": 0.005776576232165098, + "step": 949 + }, + { + "ce_ib": 9.729208946228027, + "ce_orig": 1.1287404298782349, + "epoch": 0.2729168164497807, + "kl_loss": 0.29766565561294556, + "loss_ib": 0.012705864384770393, + "step": 949 + }, + { + "epoch": 0.27320440002875834, + "grad_norm": 0.11360763758420944, + "learning_rate": 9.904681958603847e-06, + "loss": 0.8716, + "step": 950 + }, + { + "ce_ib": 5.15933084487915, + "ce_orig": 0.5913940668106079, + "epoch": 0.27320440002875834, + "kl_loss": 0.2843480110168457, + "loss_ib": 0.0080028111115098, + "step": 950 + }, + { + "ce_ib": 6.33256721496582, + "ce_orig": 0.7312302589416504, + "epoch": 0.27320440002875834, + "kl_loss": 0.3365132212638855, + "loss_ib": 0.00969769898802042, + "step": 950 + }, + { + "ce_ib": 7.236344814300537, + "ce_orig": 0.9631039500236511, + "epoch": 0.27320440002875834, + "kl_loss": 0.3992907404899597, + "loss_ib": 0.011229252442717552, + "step": 950 + }, + { + "ce_ib": 8.233247756958008, + "ce_orig": 0.9781149625778198, + "epoch": 0.27320440002875834, + "kl_loss": 0.32408010959625244, + "loss_ib": 0.011474048718810081, + "step": 950 + }, + { + "ce_ib": 9.425809860229492, + "ce_orig": 1.0185048580169678, + "epoch": 0.273491983607736, + "kl_loss": 0.3078628480434418, + "loss_ib": 0.012504437938332558, + "step": 951 + }, + { + "ce_ib": 5.389331340789795, + "ce_orig": 0.7841058969497681, + "epoch": 0.273491983607736, + "kl_loss": 0.28654882311820984, + "loss_ib": 0.008254819549620152, + "step": 951 + }, + { + "ce_ib": 9.956135749816895, + "ce_orig": 1.0741745233535767, + "epoch": 0.273491983607736, + "kl_loss": 0.2052108645439148, + "loss_ib": 0.0120082451030612, + "step": 951 + }, + { + "ce_ib": 10.591538429260254, + "ce_orig": 1.5130939483642578, + "epoch": 0.273491983607736, + "kl_loss": 0.3333120346069336, + "loss_ib": 0.013924659229815006, + "step": 951 + }, + { + "ce_ib": 11.254968643188477, + "ce_orig": 1.5675315856933594, + "epoch": 0.27377956718671365, + "kl_loss": 0.2911054790019989, + "loss_ib": 0.014166023582220078, + "step": 952 + }, + { + "ce_ib": 5.313507556915283, + "ce_orig": 0.3765413761138916, + "epoch": 0.27377956718671365, + "kl_loss": 0.23264440894126892, + "loss_ib": 0.007639951538294554, + "step": 952 + }, + { + "ce_ib": 8.539169311523438, + "ce_orig": 1.1101938486099243, + "epoch": 0.27377956718671365, + "kl_loss": 0.30267441272735596, + "loss_ib": 0.011565913446247578, + "step": 952 + }, + { + "ce_ib": 8.370697975158691, + "ce_orig": 1.101776123046875, + "epoch": 0.27377956718671365, + "kl_loss": 0.2343529760837555, + "loss_ib": 0.010714228264987469, + "step": 952 + }, + { + "ce_ib": 7.463455677032471, + "ce_orig": 0.7007122039794922, + "epoch": 0.27406715076569127, + "kl_loss": 0.2482740581035614, + "loss_ib": 0.009946195408701897, + "step": 953 + }, + { + "ce_ib": 9.414673805236816, + "ce_orig": 0.7169649004936218, + "epoch": 0.27406715076569127, + "kl_loss": 0.386259526014328, + "loss_ib": 0.013277268968522549, + "step": 953 + }, + { + "ce_ib": 6.160613059997559, + "ce_orig": 0.6735572814941406, + "epoch": 0.27406715076569127, + "kl_loss": 0.2884378135204315, + "loss_ib": 0.009044991806149483, + "step": 953 + }, + { + "ce_ib": 4.295875549316406, + "ce_orig": 0.3026116192340851, + "epoch": 0.27406715076569127, + "kl_loss": 0.3349815607070923, + "loss_ib": 0.007645691279321909, + "step": 953 + }, + { + "ce_ib": 11.889391899108887, + "ce_orig": 1.7062128782272339, + "epoch": 0.2743547343446689, + "kl_loss": 0.3287060856819153, + "loss_ib": 0.01517645362764597, + "step": 954 + }, + { + "ce_ib": 9.97256851196289, + "ce_orig": 0.8832455277442932, + "epoch": 0.2743547343446689, + "kl_loss": 0.3570169508457184, + "loss_ib": 0.013542737811803818, + "step": 954 + }, + { + "ce_ib": 11.863001823425293, + "ce_orig": 1.066734790802002, + "epoch": 0.2743547343446689, + "kl_loss": 0.3354340195655823, + "loss_ib": 0.015217342413961887, + "step": 954 + }, + { + "ce_ib": 6.305656909942627, + "ce_orig": 0.609102725982666, + "epoch": 0.2743547343446689, + "kl_loss": 0.33587515354156494, + "loss_ib": 0.009664407931268215, + "step": 954 + }, + { + "epoch": 0.2746423179236466, + "grad_norm": 0.09016523510217667, + "learning_rate": 9.903167891753781e-06, + "loss": 0.863, + "step": 955 + }, + { + "ce_ib": 7.7995100021362305, + "ce_orig": 0.48420944809913635, + "epoch": 0.2746423179236466, + "kl_loss": 0.4207814335823059, + "loss_ib": 0.012007324025034904, + "step": 955 + }, + { + "ce_ib": 9.223494529724121, + "ce_orig": 0.8398165702819824, + "epoch": 0.2746423179236466, + "kl_loss": 0.31191039085388184, + "loss_ib": 0.012342598289251328, + "step": 955 + }, + { + "ce_ib": 9.061304092407227, + "ce_orig": 0.5709256529808044, + "epoch": 0.2746423179236466, + "kl_loss": 0.35397371649742126, + "loss_ib": 0.012601041235029697, + "step": 955 + }, + { + "ce_ib": 6.83884859085083, + "ce_orig": 0.8219555020332336, + "epoch": 0.2746423179236466, + "kl_loss": 0.4126385450363159, + "loss_ib": 0.01096523366868496, + "step": 955 + }, + { + "ce_ib": 4.699288368225098, + "ce_orig": 0.44520479440689087, + "epoch": 0.2749299015026242, + "kl_loss": 0.325499027967453, + "loss_ib": 0.007954278029501438, + "step": 956 + }, + { + "ce_ib": 8.23465633392334, + "ce_orig": 0.8416075110435486, + "epoch": 0.2749299015026242, + "kl_loss": 0.21434545516967773, + "loss_ib": 0.010378110222518444, + "step": 956 + }, + { + "ce_ib": 6.925322532653809, + "ce_orig": 0.47291597723960876, + "epoch": 0.2749299015026242, + "kl_loss": 0.8208890557289124, + "loss_ib": 0.01513421256095171, + "step": 956 + }, + { + "ce_ib": 4.961340427398682, + "ce_orig": 0.7618390917778015, + "epoch": 0.2749299015026242, + "kl_loss": 0.2526277005672455, + "loss_ib": 0.0074876174330711365, + "step": 956 + }, + { + "ce_ib": 7.343484401702881, + "ce_orig": 0.8298138380050659, + "epoch": 0.2752174850816018, + "kl_loss": 0.36207157373428345, + "loss_ib": 0.010964199900627136, + "step": 957 + }, + { + "ce_ib": 7.905651569366455, + "ce_orig": 1.0571790933609009, + "epoch": 0.2752174850816018, + "kl_loss": 0.27009880542755127, + "loss_ib": 0.010606639087200165, + "step": 957 + }, + { + "ce_ib": 6.430732727050781, + "ce_orig": 0.7019039392471313, + "epoch": 0.2752174850816018, + "kl_loss": 0.3244302272796631, + "loss_ib": 0.009675034321844578, + "step": 957 + }, + { + "ce_ib": 9.795534133911133, + "ce_orig": 1.5741654634475708, + "epoch": 0.2752174850816018, + "kl_loss": 0.27315622568130493, + "loss_ib": 0.012527096085250378, + "step": 957 + }, + { + "ce_ib": 7.98039436340332, + "ce_orig": 0.5512452721595764, + "epoch": 0.2755050686605795, + "kl_loss": 0.3615424633026123, + "loss_ib": 0.011595819145441055, + "step": 958 + }, + { + "ce_ib": 6.763614654541016, + "ce_orig": 0.6786366105079651, + "epoch": 0.2755050686605795, + "kl_loss": 0.28236159682273865, + "loss_ib": 0.009587230160832405, + "step": 958 + }, + { + "ce_ib": 4.218715667724609, + "ce_orig": 0.6746366620063782, + "epoch": 0.2755050686605795, + "kl_loss": 0.3110579550266266, + "loss_ib": 0.007329294923692942, + "step": 958 + }, + { + "ce_ib": 11.259781837463379, + "ce_orig": 1.4071563482284546, + "epoch": 0.2755050686605795, + "kl_loss": 0.25216126441955566, + "loss_ib": 0.013781394809484482, + "step": 958 + }, + { + "ce_ib": 5.040888786315918, + "ce_orig": 0.8139171600341797, + "epoch": 0.2757926522395571, + "kl_loss": 0.1931689977645874, + "loss_ib": 0.0069725788198411465, + "step": 959 + }, + { + "ce_ib": 8.201353073120117, + "ce_orig": 0.8536245822906494, + "epoch": 0.2757926522395571, + "kl_loss": 0.2896878719329834, + "loss_ib": 0.011098232120275497, + "step": 959 + }, + { + "ce_ib": 5.141984939575195, + "ce_orig": 0.36424100399017334, + "epoch": 0.2757926522395571, + "kl_loss": 0.2921089828014374, + "loss_ib": 0.008063074201345444, + "step": 959 + }, + { + "ce_ib": 10.956453323364258, + "ce_orig": 1.2245234251022339, + "epoch": 0.2757926522395571, + "kl_loss": 0.4386565685272217, + "loss_ib": 0.015343018807470798, + "step": 959 + }, + { + "epoch": 0.27608023581853475, + "grad_norm": 0.1057107001543045, + "learning_rate": 9.901642012034214e-06, + "loss": 0.7911, + "step": 960 + }, + { + "ce_ib": 13.470467567443848, + "ce_orig": 1.7405394315719604, + "epoch": 0.27608023581853475, + "kl_loss": 0.328184574842453, + "loss_ib": 0.01675231382250786, + "step": 960 + }, + { + "ce_ib": 6.385258197784424, + "ce_orig": 0.726634681224823, + "epoch": 0.27608023581853475, + "kl_loss": 0.2683427333831787, + "loss_ib": 0.009068685583770275, + "step": 960 + }, + { + "ce_ib": 12.370391845703125, + "ce_orig": 1.2445263862609863, + "epoch": 0.27608023581853475, + "kl_loss": 0.29927101731300354, + "loss_ib": 0.015363101847469807, + "step": 960 + }, + { + "ce_ib": 8.821762084960938, + "ce_orig": 1.0471879243850708, + "epoch": 0.27608023581853475, + "kl_loss": 0.19080427289009094, + "loss_ib": 0.010729804635047913, + "step": 960 + }, + { + "ce_ib": 8.348326683044434, + "ce_orig": 1.2821494340896606, + "epoch": 0.2763678193975124, + "kl_loss": 0.4306579828262329, + "loss_ib": 0.012654906138777733, + "step": 961 + }, + { + "ce_ib": 9.511332511901855, + "ce_orig": 1.11271333694458, + "epoch": 0.2763678193975124, + "kl_loss": 0.39218825101852417, + "loss_ib": 0.013433215208351612, + "step": 961 + }, + { + "ce_ib": 10.88101577758789, + "ce_orig": 1.3116422891616821, + "epoch": 0.2763678193975124, + "kl_loss": 0.28412100672721863, + "loss_ib": 0.013722226023674011, + "step": 961 + }, + { + "ce_ib": 4.692732810974121, + "ce_orig": 0.5053314566612244, + "epoch": 0.2763678193975124, + "kl_loss": 0.20232996344566345, + "loss_ib": 0.006716032512485981, + "step": 961 + }, + { + "ce_ib": 4.9355149269104, + "ce_orig": 0.46557968854904175, + "epoch": 0.27665540297649005, + "kl_loss": 0.5611593723297119, + "loss_ib": 0.010547108016908169, + "step": 962 + }, + { + "ce_ib": 9.1857271194458, + "ce_orig": 0.8455274105072021, + "epoch": 0.27665540297649005, + "kl_loss": 0.2985427975654602, + "loss_ib": 0.012171154841780663, + "step": 962 + }, + { + "ce_ib": 10.592644691467285, + "ce_orig": 1.263620376586914, + "epoch": 0.27665540297649005, + "kl_loss": 0.4153948724269867, + "loss_ib": 0.014746594242751598, + "step": 962 + }, + { + "ce_ib": 6.028276443481445, + "ce_orig": 0.6360313892364502, + "epoch": 0.27665540297649005, + "kl_loss": 0.256117582321167, + "loss_ib": 0.008589452132582664, + "step": 962 + }, + { + "ce_ib": 7.142398357391357, + "ce_orig": 0.8674178123474121, + "epoch": 0.2769429865554677, + "kl_loss": 0.26119542121887207, + "loss_ib": 0.009754352271556854, + "step": 963 + }, + { + "ce_ib": 4.550671577453613, + "ce_orig": 0.4504320025444031, + "epoch": 0.2769429865554677, + "kl_loss": 0.27785179018974304, + "loss_ib": 0.00732918968424201, + "step": 963 + }, + { + "ce_ib": 10.917935371398926, + "ce_orig": 1.3695002794265747, + "epoch": 0.2769429865554677, + "kl_loss": 0.310863733291626, + "loss_ib": 0.014026571996510029, + "step": 963 + }, + { + "ce_ib": 5.352876663208008, + "ce_orig": 0.9681556224822998, + "epoch": 0.2769429865554677, + "kl_loss": 0.24146953225135803, + "loss_ib": 0.007767572067677975, + "step": 963 + }, + { + "ce_ib": 6.516645908355713, + "ce_orig": 0.6153663396835327, + "epoch": 0.2772305701344453, + "kl_loss": 0.2555677592754364, + "loss_ib": 0.009072324261069298, + "step": 964 + }, + { + "ce_ib": 5.9202094078063965, + "ce_orig": 0.562997043132782, + "epoch": 0.2772305701344453, + "kl_loss": 0.32697319984436035, + "loss_ib": 0.009189940989017487, + "step": 964 + }, + { + "ce_ib": 11.461648941040039, + "ce_orig": 1.4160990715026855, + "epoch": 0.2772305701344453, + "kl_loss": 0.20565414428710938, + "loss_ib": 0.013518190011382103, + "step": 964 + }, + { + "ce_ib": 4.950023174285889, + "ce_orig": 0.5089555978775024, + "epoch": 0.2772305701344453, + "kl_loss": 0.2313445806503296, + "loss_ib": 0.00726346904411912, + "step": 964 + }, + { + "epoch": 0.277518153713423, + "grad_norm": 0.10406413674354553, + "learning_rate": 9.900104323121344e-06, + "loss": 0.8932, + "step": 965 + }, + { + "ce_ib": 3.8523833751678467, + "ce_orig": 0.6625568866729736, + "epoch": 0.277518153713423, + "kl_loss": 0.206961527466774, + "loss_ib": 0.005921998526901007, + "step": 965 + }, + { + "ce_ib": 5.566319942474365, + "ce_orig": 0.4371108114719391, + "epoch": 0.277518153713423, + "kl_loss": 0.3670559823513031, + "loss_ib": 0.009236878715455532, + "step": 965 + }, + { + "ce_ib": 6.957183837890625, + "ce_orig": 0.5599235892295837, + "epoch": 0.277518153713423, + "kl_loss": 0.23749658465385437, + "loss_ib": 0.009332150220870972, + "step": 965 + }, + { + "ce_ib": 10.045352935791016, + "ce_orig": 1.2828036546707153, + "epoch": 0.277518153713423, + "kl_loss": 0.2769809663295746, + "loss_ib": 0.012815162539482117, + "step": 965 + }, + { + "ce_ib": 6.997009754180908, + "ce_orig": 0.8958306312561035, + "epoch": 0.2778057372924006, + "kl_loss": 0.26039648056030273, + "loss_ib": 0.00960097461938858, + "step": 966 + }, + { + "ce_ib": 9.824999809265137, + "ce_orig": 1.1781514883041382, + "epoch": 0.2778057372924006, + "kl_loss": 0.32364603877067566, + "loss_ib": 0.013061460107564926, + "step": 966 + }, + { + "ce_ib": 10.24802017211914, + "ce_orig": 1.2914663553237915, + "epoch": 0.2778057372924006, + "kl_loss": 0.3219491243362427, + "loss_ib": 0.013467512093484402, + "step": 966 + }, + { + "ce_ib": 5.80756950378418, + "ce_orig": 0.7262393236160278, + "epoch": 0.2778057372924006, + "kl_loss": 0.25441619753837585, + "loss_ib": 0.008351731114089489, + "step": 966 + }, + { + "ce_ib": 10.359816551208496, + "ce_orig": 1.53457510471344, + "epoch": 0.2780933208713782, + "kl_loss": 0.28124716877937317, + "loss_ib": 0.013172287493944168, + "step": 967 + }, + { + "ce_ib": 3.750755548477173, + "ce_orig": 0.25399720668792725, + "epoch": 0.2780933208713782, + "kl_loss": 0.5752956867218018, + "loss_ib": 0.009503712877631187, + "step": 967 + }, + { + "ce_ib": 8.690093994140625, + "ce_orig": 0.9079038500785828, + "epoch": 0.2780933208713782, + "kl_loss": 0.4378716051578522, + "loss_ib": 0.013068810105323792, + "step": 967 + }, + { + "ce_ib": 5.155570030212402, + "ce_orig": 0.4399418234825134, + "epoch": 0.2780933208713782, + "kl_loss": 0.19257600605487823, + "loss_ib": 0.007081329822540283, + "step": 967 + }, + { + "ce_ib": 7.317140102386475, + "ce_orig": 0.9123186469078064, + "epoch": 0.2783809044503559, + "kl_loss": 0.2773372530937195, + "loss_ib": 0.010090513154864311, + "step": 968 + }, + { + "ce_ib": 9.807036399841309, + "ce_orig": 1.104854702949524, + "epoch": 0.2783809044503559, + "kl_loss": 0.31141215562820435, + "loss_ib": 0.012921158224344254, + "step": 968 + }, + { + "ce_ib": 2.5050039291381836, + "ce_orig": 0.18219490349292755, + "epoch": 0.2783809044503559, + "kl_loss": 0.5909286141395569, + "loss_ib": 0.00841428991407156, + "step": 968 + }, + { + "ce_ib": 5.669373512268066, + "ce_orig": 0.5021364092826843, + "epoch": 0.2783809044503559, + "kl_loss": 0.3175276219844818, + "loss_ib": 0.0088446494191885, + "step": 968 + }, + { + "ce_ib": 6.658907890319824, + "ce_orig": 0.8224705457687378, + "epoch": 0.2786684880293335, + "kl_loss": 0.3462896943092346, + "loss_ib": 0.010121805593371391, + "step": 969 + }, + { + "ce_ib": 7.009324073791504, + "ce_orig": 1.261172890663147, + "epoch": 0.2786684880293335, + "kl_loss": 0.2837193012237549, + "loss_ib": 0.009846516884863377, + "step": 969 + }, + { + "ce_ib": 5.1103196144104, + "ce_orig": 0.6225570440292358, + "epoch": 0.2786684880293335, + "kl_loss": 0.25508421659469604, + "loss_ib": 0.007661161944270134, + "step": 969 + }, + { + "ce_ib": 7.586276531219482, + "ce_orig": 0.6994422078132629, + "epoch": 0.2786684880293335, + "kl_loss": 0.3505334258079529, + "loss_ib": 0.011091611348092556, + "step": 969 + }, + { + "epoch": 0.27895607160831115, + "grad_norm": 0.10747111588716507, + "learning_rate": 9.89855482871982e-06, + "loss": 0.8618, + "step": 970 + }, + { + "ce_ib": 8.283812522888184, + "ce_orig": 1.250517725944519, + "epoch": 0.27895607160831115, + "kl_loss": 0.2938983738422394, + "loss_ib": 0.011222796514630318, + "step": 970 + }, + { + "ce_ib": 7.375507354736328, + "ce_orig": 1.0513449907302856, + "epoch": 0.27895607160831115, + "kl_loss": 0.260643869638443, + "loss_ib": 0.009981945157051086, + "step": 970 + }, + { + "ce_ib": 8.069171905517578, + "ce_orig": 0.7485910654067993, + "epoch": 0.27895607160831115, + "kl_loss": 0.39277946949005127, + "loss_ib": 0.011996966786682606, + "step": 970 + }, + { + "ce_ib": 9.535097122192383, + "ce_orig": 0.617765486240387, + "epoch": 0.27895607160831115, + "kl_loss": 0.24510113894939423, + "loss_ib": 0.011986108496785164, + "step": 970 + }, + { + "ce_ib": 7.473255634307861, + "ce_orig": 0.7423644661903381, + "epoch": 0.27924365518728883, + "kl_loss": 0.27745312452316284, + "loss_ib": 0.010247787460684776, + "step": 971 + }, + { + "ce_ib": 8.25799560546875, + "ce_orig": 0.686055600643158, + "epoch": 0.27924365518728883, + "kl_loss": 0.21401247382164001, + "loss_ib": 0.010398120619356632, + "step": 971 + }, + { + "ce_ib": 7.072482109069824, + "ce_orig": 0.48852288722991943, + "epoch": 0.27924365518728883, + "kl_loss": 0.25071245431900024, + "loss_ib": 0.009579606354236603, + "step": 971 + }, + { + "ce_ib": 6.167069911956787, + "ce_orig": 0.6813110113143921, + "epoch": 0.27924365518728883, + "kl_loss": 0.25662925839424133, + "loss_ib": 0.008733362890779972, + "step": 971 + }, + { + "ce_ib": 10.110807418823242, + "ce_orig": 1.0463491678237915, + "epoch": 0.27953123876626645, + "kl_loss": 0.3023725748062134, + "loss_ib": 0.013134532608091831, + "step": 972 + }, + { + "ce_ib": 8.601117134094238, + "ce_orig": 1.0095446109771729, + "epoch": 0.27953123876626645, + "kl_loss": 0.2603193521499634, + "loss_ib": 0.011204310692846775, + "step": 972 + }, + { + "ce_ib": 8.582576751708984, + "ce_orig": 1.1173807382583618, + "epoch": 0.27953123876626645, + "kl_loss": 0.45898622274398804, + "loss_ib": 0.01317243929952383, + "step": 972 + }, + { + "ce_ib": 8.407683372497559, + "ce_orig": 0.682980477809906, + "epoch": 0.27953123876626645, + "kl_loss": 0.23422002792358398, + "loss_ib": 0.010749883018434048, + "step": 972 + }, + { + "ce_ib": 7.096373081207275, + "ce_orig": 0.8979065418243408, + "epoch": 0.2798188223452441, + "kl_loss": 0.36893928050994873, + "loss_ib": 0.010785765014588833, + "step": 973 + }, + { + "ce_ib": 6.754623889923096, + "ce_orig": 0.6018571853637695, + "epoch": 0.2798188223452441, + "kl_loss": 0.20240068435668945, + "loss_ib": 0.008778630755841732, + "step": 973 + }, + { + "ce_ib": 6.34320592880249, + "ce_orig": 0.5469645857810974, + "epoch": 0.2798188223452441, + "kl_loss": 0.3521893322467804, + "loss_ib": 0.009865098632872105, + "step": 973 + }, + { + "ce_ib": 8.217493057250977, + "ce_orig": 1.0419894456863403, + "epoch": 0.2798188223452441, + "kl_loss": 0.36468231678009033, + "loss_ib": 0.011864316649734974, + "step": 973 + }, + { + "ce_ib": 6.634602069854736, + "ce_orig": 0.6549820303916931, + "epoch": 0.2801064059242217, + "kl_loss": 0.3038894534111023, + "loss_ib": 0.009673496708273888, + "step": 974 + }, + { + "ce_ib": 9.341080665588379, + "ce_orig": 0.607479989528656, + "epoch": 0.2801064059242217, + "kl_loss": 0.26178374886512756, + "loss_ib": 0.011958918534219265, + "step": 974 + }, + { + "ce_ib": 9.761402130126953, + "ce_orig": 0.7530984878540039, + "epoch": 0.2801064059242217, + "kl_loss": 0.3960914611816406, + "loss_ib": 0.013722317293286324, + "step": 974 + }, + { + "ce_ib": 7.921651840209961, + "ce_orig": 0.7006543874740601, + "epoch": 0.2801064059242217, + "kl_loss": 0.2808942198753357, + "loss_ib": 0.010730594396591187, + "step": 974 + }, + { + "epoch": 0.2803939895031994, + "grad_norm": 0.09944093227386475, + "learning_rate": 9.896993532562736e-06, + "loss": 0.8261, + "step": 975 + }, + { + "ce_ib": 7.843717575073242, + "ce_orig": 0.8895473480224609, + "epoch": 0.2803939895031994, + "kl_loss": 0.35273388028144836, + "loss_ib": 0.01137105654925108, + "step": 975 + }, + { + "ce_ib": 9.197118759155273, + "ce_orig": 0.8774324059486389, + "epoch": 0.2803939895031994, + "kl_loss": 0.3209964632987976, + "loss_ib": 0.012407083064317703, + "step": 975 + }, + { + "ce_ib": 4.643885612487793, + "ce_orig": 0.6738633513450623, + "epoch": 0.2803939895031994, + "kl_loss": 0.23056040704250336, + "loss_ib": 0.0069494894705712795, + "step": 975 + }, + { + "ce_ib": 7.597918510437012, + "ce_orig": 0.7519400119781494, + "epoch": 0.2803939895031994, + "kl_loss": 0.3231240212917328, + "loss_ib": 0.010829159058630466, + "step": 975 + }, + { + "ce_ib": 5.716797351837158, + "ce_orig": 0.4619486927986145, + "epoch": 0.280681573082177, + "kl_loss": 0.2213851362466812, + "loss_ib": 0.007930648513138294, + "step": 976 + }, + { + "ce_ib": 4.659027099609375, + "ce_orig": 0.46190229058265686, + "epoch": 0.280681573082177, + "kl_loss": 0.21290841698646545, + "loss_ib": 0.006788111291825771, + "step": 976 + }, + { + "ce_ib": 8.67602252960205, + "ce_orig": 0.7991732358932495, + "epoch": 0.280681573082177, + "kl_loss": 0.4040037989616394, + "loss_ib": 0.012716060504317284, + "step": 976 + }, + { + "ce_ib": 11.049056053161621, + "ce_orig": 1.187608242034912, + "epoch": 0.280681573082177, + "kl_loss": 0.22732923924922943, + "loss_ib": 0.013322348706424236, + "step": 976 + }, + { + "ce_ib": 6.996013641357422, + "ce_orig": 0.3616046607494354, + "epoch": 0.28096915666115463, + "kl_loss": 0.5100580453872681, + "loss_ib": 0.012096593156456947, + "step": 977 + }, + { + "ce_ib": 5.4247727394104, + "ce_orig": 0.7208405137062073, + "epoch": 0.28096915666115463, + "kl_loss": 0.24420976638793945, + "loss_ib": 0.007866870611906052, + "step": 977 + }, + { + "ce_ib": 5.2020392417907715, + "ce_orig": 0.6198222041130066, + "epoch": 0.28096915666115463, + "kl_loss": 0.3132549524307251, + "loss_ib": 0.008334589190781116, + "step": 977 + }, + { + "ce_ib": 8.632140159606934, + "ce_orig": 1.0381910800933838, + "epoch": 0.28096915666115463, + "kl_loss": 0.25286003947257996, + "loss_ib": 0.011160740628838539, + "step": 977 + }, + { + "ce_ib": 6.451074600219727, + "ce_orig": 0.6350893378257751, + "epoch": 0.2812567402401323, + "kl_loss": 0.2584133744239807, + "loss_ib": 0.009035208262503147, + "step": 978 + }, + { + "ce_ib": 9.116836547851562, + "ce_orig": 1.3513867855072021, + "epoch": 0.2812567402401323, + "kl_loss": 0.24926617741584778, + "loss_ib": 0.011609498411417007, + "step": 978 + }, + { + "ce_ib": 9.134062767028809, + "ce_orig": 1.004563570022583, + "epoch": 0.2812567402401323, + "kl_loss": 0.49993661046028137, + "loss_ib": 0.014133429154753685, + "step": 978 + }, + { + "ce_ib": 12.412116050720215, + "ce_orig": 1.79780113697052, + "epoch": 0.2812567402401323, + "kl_loss": 0.410219669342041, + "loss_ib": 0.016514312475919724, + "step": 978 + }, + { + "ce_ib": 7.063502311706543, + "ce_orig": 0.6859074234962463, + "epoch": 0.28154432381910993, + "kl_loss": 0.3865056037902832, + "loss_ib": 0.010928559117019176, + "step": 979 + }, + { + "ce_ib": 6.0811543464660645, + "ce_orig": 0.7244781255722046, + "epoch": 0.28154432381910993, + "kl_loss": 0.23856481909751892, + "loss_ib": 0.008466802537441254, + "step": 979 + }, + { + "ce_ib": 10.540010452270508, + "ce_orig": 1.1688413619995117, + "epoch": 0.28154432381910993, + "kl_loss": 0.35274291038513184, + "loss_ib": 0.01406743936240673, + "step": 979 + }, + { + "ce_ib": 5.423574447631836, + "ce_orig": 0.8129374980926514, + "epoch": 0.28154432381910993, + "kl_loss": 0.31833183765411377, + "loss_ib": 0.008606893010437489, + "step": 979 + }, + { + "epoch": 0.28183190739808756, + "grad_norm": 0.09410839527845383, + "learning_rate": 9.895420438411616e-06, + "loss": 0.8391, + "step": 980 + }, + { + "ce_ib": 4.933524131774902, + "ce_orig": 0.5870782732963562, + "epoch": 0.28183190739808756, + "kl_loss": 0.24300794303417206, + "loss_ib": 0.007363603450357914, + "step": 980 + }, + { + "ce_ib": 8.602434158325195, + "ce_orig": 0.7935175895690918, + "epoch": 0.28183190739808756, + "kl_loss": 0.28050893545150757, + "loss_ib": 0.011407522484660149, + "step": 980 + }, + { + "ce_ib": 5.863503456115723, + "ce_orig": 0.7190868258476257, + "epoch": 0.28183190739808756, + "kl_loss": 0.2712858319282532, + "loss_ib": 0.008576362393796444, + "step": 980 + }, + { + "ce_ib": 6.571584224700928, + "ce_orig": 0.8708552718162537, + "epoch": 0.28183190739808756, + "kl_loss": 0.20640678703784943, + "loss_ib": 0.008635652251541615, + "step": 980 + }, + { + "ce_ib": 7.416860580444336, + "ce_orig": 0.6324207186698914, + "epoch": 0.28211949097706523, + "kl_loss": 0.23176950216293335, + "loss_ib": 0.009734555147588253, + "step": 981 + }, + { + "ce_ib": 5.944385051727295, + "ce_orig": 0.7745913863182068, + "epoch": 0.28211949097706523, + "kl_loss": 0.25892847776412964, + "loss_ib": 0.008533669635653496, + "step": 981 + }, + { + "ce_ib": 5.516068458557129, + "ce_orig": 0.46634528040885925, + "epoch": 0.28211949097706523, + "kl_loss": 0.33703067898750305, + "loss_ib": 0.008886375464498997, + "step": 981 + }, + { + "ce_ib": 9.37726879119873, + "ce_orig": 0.8699996471405029, + "epoch": 0.28211949097706523, + "kl_loss": 0.333581805229187, + "loss_ib": 0.012713085860013962, + "step": 981 + }, + { + "ce_ib": 7.538808822631836, + "ce_orig": 0.9253389835357666, + "epoch": 0.28240707455604286, + "kl_loss": 0.3331226408481598, + "loss_ib": 0.010870035737752914, + "step": 982 + }, + { + "ce_ib": 8.707592010498047, + "ce_orig": 1.1699987649917603, + "epoch": 0.28240707455604286, + "kl_loss": 0.3539894223213196, + "loss_ib": 0.012247486039996147, + "step": 982 + }, + { + "ce_ib": 4.077365875244141, + "ce_orig": 0.5715107917785645, + "epoch": 0.28240707455604286, + "kl_loss": 0.33435067534446716, + "loss_ib": 0.007420872338116169, + "step": 982 + }, + { + "ce_ib": 10.470292091369629, + "ce_orig": 1.234960675239563, + "epoch": 0.28240707455604286, + "kl_loss": 0.3328559994697571, + "loss_ib": 0.013798851519823074, + "step": 982 + }, + { + "ce_ib": 12.779997825622559, + "ce_orig": 1.8746068477630615, + "epoch": 0.2826946581350205, + "kl_loss": 0.3426949381828308, + "loss_ib": 0.016206946223974228, + "step": 983 + }, + { + "ce_ib": 6.349981307983398, + "ce_orig": 0.5666931867599487, + "epoch": 0.2826946581350205, + "kl_loss": 0.3899381756782532, + "loss_ib": 0.010249363258481026, + "step": 983 + }, + { + "ce_ib": 5.248485088348389, + "ce_orig": 0.6375545859336853, + "epoch": 0.2826946581350205, + "kl_loss": 0.2220623940229416, + "loss_ib": 0.007469109259545803, + "step": 983 + }, + { + "ce_ib": 8.392010688781738, + "ce_orig": 1.1137949228286743, + "epoch": 0.2826946581350205, + "kl_loss": 0.24818137288093567, + "loss_ib": 0.01087382435798645, + "step": 983 + }, + { + "ce_ib": 7.158164024353027, + "ce_orig": 0.5367669463157654, + "epoch": 0.2829822417139981, + "kl_loss": 0.27173370122909546, + "loss_ib": 0.00987550150603056, + "step": 984 + }, + { + "ce_ib": 9.511956214904785, + "ce_orig": 1.0864530801773071, + "epoch": 0.2829822417139981, + "kl_loss": 0.2743532657623291, + "loss_ib": 0.012255489826202393, + "step": 984 + }, + { + "ce_ib": 4.440701484680176, + "ce_orig": 0.66915363073349, + "epoch": 0.2829822417139981, + "kl_loss": 0.2580929696559906, + "loss_ib": 0.007021630648523569, + "step": 984 + }, + { + "ce_ib": 7.797125339508057, + "ce_orig": 0.9739370346069336, + "epoch": 0.2829822417139981, + "kl_loss": 0.24688200652599335, + "loss_ib": 0.01026594452559948, + "step": 984 + }, + { + "epoch": 0.2832698252929758, + "grad_norm": 0.09766387939453125, + "learning_rate": 9.893835550056407e-06, + "loss": 0.8618, + "step": 985 + }, + { + "ce_ib": 7.013763904571533, + "ce_orig": 0.8495591282844543, + "epoch": 0.2832698252929758, + "kl_loss": 0.23701989650726318, + "loss_ib": 0.009383962489664555, + "step": 985 + }, + { + "ce_ib": 5.373435020446777, + "ce_orig": 0.6739266514778137, + "epoch": 0.2832698252929758, + "kl_loss": 0.2958618402481079, + "loss_ib": 0.008332053199410439, + "step": 985 + }, + { + "ce_ib": 8.126791000366211, + "ce_orig": 0.8309746384620667, + "epoch": 0.2832698252929758, + "kl_loss": 0.38277897238731384, + "loss_ib": 0.011954580433666706, + "step": 985 + }, + { + "ce_ib": 8.946346282958984, + "ce_orig": 1.2075648307800293, + "epoch": 0.2832698252929758, + "kl_loss": 0.3251242935657501, + "loss_ib": 0.012197589501738548, + "step": 985 + }, + { + "ce_ib": 9.519036293029785, + "ce_orig": 0.8708526492118835, + "epoch": 0.2835574088719534, + "kl_loss": 0.31902384757995605, + "loss_ib": 0.01270927395671606, + "step": 986 + }, + { + "ce_ib": 11.005833625793457, + "ce_orig": 1.3966689109802246, + "epoch": 0.2835574088719534, + "kl_loss": 0.39275312423706055, + "loss_ib": 0.014933365397155285, + "step": 986 + }, + { + "ce_ib": 8.268836975097656, + "ce_orig": 1.026828408241272, + "epoch": 0.2835574088719534, + "kl_loss": 0.25195300579071045, + "loss_ib": 0.010788366198539734, + "step": 986 + }, + { + "ce_ib": 10.45020580291748, + "ce_orig": 1.0152733325958252, + "epoch": 0.2835574088719534, + "kl_loss": 0.2754089832305908, + "loss_ib": 0.013204295188188553, + "step": 986 + }, + { + "ce_ib": 8.130716323852539, + "ce_orig": 0.5325186848640442, + "epoch": 0.28384499245093103, + "kl_loss": 0.4100872576236725, + "loss_ib": 0.012231589294970036, + "step": 987 + }, + { + "ce_ib": 6.4655256271362305, + "ce_orig": 0.6601911783218384, + "epoch": 0.28384499245093103, + "kl_loss": 0.2558635473251343, + "loss_ib": 0.009024160914123058, + "step": 987 + }, + { + "ce_ib": 6.571667194366455, + "ce_orig": 0.621316134929657, + "epoch": 0.28384499245093103, + "kl_loss": 0.23040370643138885, + "loss_ib": 0.008875704370439053, + "step": 987 + }, + { + "ce_ib": 6.913454532623291, + "ce_orig": 0.48008617758750916, + "epoch": 0.28384499245093103, + "kl_loss": 0.4269219636917114, + "loss_ib": 0.011182674206793308, + "step": 987 + }, + { + "ce_ib": 6.073386192321777, + "ce_orig": 0.7689921855926514, + "epoch": 0.2841325760299087, + "kl_loss": 0.33685237169265747, + "loss_ib": 0.00944190938025713, + "step": 988 + }, + { + "ce_ib": 5.702093601226807, + "ce_orig": 0.7048614025115967, + "epoch": 0.2841325760299087, + "kl_loss": 0.2855239510536194, + "loss_ib": 0.0085573336109519, + "step": 988 + }, + { + "ce_ib": 6.3464884757995605, + "ce_orig": 0.6249024868011475, + "epoch": 0.2841325760299087, + "kl_loss": 0.37955427169799805, + "loss_ib": 0.010142030194401741, + "step": 988 + }, + { + "ce_ib": 7.4441657066345215, + "ce_orig": 0.9428173303604126, + "epoch": 0.2841325760299087, + "kl_loss": 0.41461101174354553, + "loss_ib": 0.011590275913476944, + "step": 988 + }, + { + "ce_ib": 8.65250015258789, + "ce_orig": 1.1751656532287598, + "epoch": 0.28442015960888634, + "kl_loss": 0.34314173460006714, + "loss_ib": 0.01208391785621643, + "step": 989 + }, + { + "ce_ib": 10.522750854492188, + "ce_orig": 1.5159534215927124, + "epoch": 0.28442015960888634, + "kl_loss": 0.23152892291545868, + "loss_ib": 0.012838039547204971, + "step": 989 + }, + { + "ce_ib": 9.248775482177734, + "ce_orig": 1.2440454959869385, + "epoch": 0.28442015960888634, + "kl_loss": 0.3038747012615204, + "loss_ib": 0.012287522666156292, + "step": 989 + }, + { + "ce_ib": 9.64876651763916, + "ce_orig": 1.3754632472991943, + "epoch": 0.28442015960888634, + "kl_loss": 0.2952038645744324, + "loss_ib": 0.01260080561041832, + "step": 989 + }, + { + "epoch": 0.28470774318786396, + "grad_norm": 0.11360838264226913, + "learning_rate": 9.892238871315477e-06, + "loss": 0.9178, + "step": 990 + }, + { + "ce_ib": 6.687885761260986, + "ce_orig": 0.7147387862205505, + "epoch": 0.28470774318786396, + "kl_loss": 0.2835395932197571, + "loss_ib": 0.009523281827569008, + "step": 990 + }, + { + "ce_ib": 7.797086715698242, + "ce_orig": 0.8477333784103394, + "epoch": 0.28470774318786396, + "kl_loss": 0.2771601676940918, + "loss_ib": 0.010568687692284584, + "step": 990 + }, + { + "ce_ib": 7.964920520782471, + "ce_orig": 1.1867727041244507, + "epoch": 0.28470774318786396, + "kl_loss": 0.21424566209316254, + "loss_ib": 0.010107376612722874, + "step": 990 + }, + { + "ce_ib": 7.639345169067383, + "ce_orig": 0.9419063329696655, + "epoch": 0.28470774318786396, + "kl_loss": 0.3332253694534302, + "loss_ib": 0.010971598327159882, + "step": 990 + }, + { + "ce_ib": 9.816707611083984, + "ce_orig": 0.7999016046524048, + "epoch": 0.28499532676684164, + "kl_loss": 0.33083677291870117, + "loss_ib": 0.013125075958669186, + "step": 991 + }, + { + "ce_ib": 10.84311580657959, + "ce_orig": 1.1912862062454224, + "epoch": 0.28499532676684164, + "kl_loss": 0.28931328654289246, + "loss_ib": 0.013736248947679996, + "step": 991 + }, + { + "ce_ib": 7.672483921051025, + "ce_orig": 0.9036149978637695, + "epoch": 0.28499532676684164, + "kl_loss": 0.3275108337402344, + "loss_ib": 0.010947591625154018, + "step": 991 + }, + { + "ce_ib": 7.526676177978516, + "ce_orig": 0.8651123046875, + "epoch": 0.28499532676684164, + "kl_loss": 0.2458728551864624, + "loss_ib": 0.009985404089093208, + "step": 991 + }, + { + "ce_ib": 8.313544273376465, + "ce_orig": 1.2926881313323975, + "epoch": 0.28528291034581926, + "kl_loss": 0.32590439915657043, + "loss_ib": 0.011572588235139847, + "step": 992 + }, + { + "ce_ib": 9.292975425720215, + "ce_orig": 1.3791533708572388, + "epoch": 0.28528291034581926, + "kl_loss": 0.2774926722049713, + "loss_ib": 0.012067901901900768, + "step": 992 + }, + { + "ce_ib": 6.573873996734619, + "ce_orig": 0.4999292194843292, + "epoch": 0.28528291034581926, + "kl_loss": 0.25200730562210083, + "loss_ib": 0.009093946777284145, + "step": 992 + }, + { + "ce_ib": 6.635220050811768, + "ce_orig": 0.6851158142089844, + "epoch": 0.28528291034581926, + "kl_loss": 0.2963348627090454, + "loss_ib": 0.009598568081855774, + "step": 992 + }, + { + "ce_ib": 7.602895736694336, + "ce_orig": 0.9763492941856384, + "epoch": 0.2855704939247969, + "kl_loss": 0.24165207147598267, + "loss_ib": 0.01001941692084074, + "step": 993 + }, + { + "ce_ib": 4.053561210632324, + "ce_orig": 0.7590509057044983, + "epoch": 0.2855704939247969, + "kl_loss": 0.22461174428462982, + "loss_ib": 0.006299678701907396, + "step": 993 + }, + { + "ce_ib": 11.96536922454834, + "ce_orig": 1.5654411315917969, + "epoch": 0.2855704939247969, + "kl_loss": 0.34373822808265686, + "loss_ib": 0.015402751043438911, + "step": 993 + }, + { + "ce_ib": 5.362922668457031, + "ce_orig": 0.8094271421432495, + "epoch": 0.2855704939247969, + "kl_loss": 0.280230849981308, + "loss_ib": 0.008165230974555016, + "step": 993 + }, + { + "ce_ib": 5.077024936676025, + "ce_orig": 0.6099755167961121, + "epoch": 0.2858580775037745, + "kl_loss": 0.2580031752586365, + "loss_ib": 0.007657056674361229, + "step": 994 + }, + { + "ce_ib": 7.4664812088012695, + "ce_orig": 0.9805220365524292, + "epoch": 0.2858580775037745, + "kl_loss": 0.2251621037721634, + "loss_ib": 0.009718102402985096, + "step": 994 + }, + { + "ce_ib": 7.463787078857422, + "ce_orig": 0.7944656014442444, + "epoch": 0.2858580775037745, + "kl_loss": 0.20221929252147675, + "loss_ib": 0.009485980495810509, + "step": 994 + }, + { + "ce_ib": 9.706088066101074, + "ce_orig": 1.0263926982879639, + "epoch": 0.2858580775037745, + "kl_loss": 0.32880985736846924, + "loss_ib": 0.012994186952710152, + "step": 994 + }, + { + "epoch": 0.2861456610827522, + "grad_norm": 0.10201858729124069, + "learning_rate": 9.89063040603559e-06, + "loss": 0.9227, + "step": 995 + }, + { + "ce_ib": 9.292113304138184, + "ce_orig": 1.1094127893447876, + "epoch": 0.2861456610827522, + "kl_loss": 0.3149503469467163, + "loss_ib": 0.01244161557406187, + "step": 995 + }, + { + "ce_ib": 11.187474250793457, + "ce_orig": 0.847707986831665, + "epoch": 0.2861456610827522, + "kl_loss": 0.28196096420288086, + "loss_ib": 0.0140070840716362, + "step": 995 + }, + { + "ce_ib": 5.9683427810668945, + "ce_orig": 0.7678855657577515, + "epoch": 0.2861456610827522, + "kl_loss": 0.24694621562957764, + "loss_ib": 0.008437804877758026, + "step": 995 + }, + { + "ce_ib": 4.204360008239746, + "ce_orig": 0.5550284385681152, + "epoch": 0.2861456610827522, + "kl_loss": 0.33598631620407104, + "loss_ib": 0.007564222440123558, + "step": 995 + }, + { + "ce_ib": 4.783699035644531, + "ce_orig": 0.5354413390159607, + "epoch": 0.2864332446617298, + "kl_loss": 0.3188742995262146, + "loss_ib": 0.007972441613674164, + "step": 996 + }, + { + "ce_ib": 7.208334445953369, + "ce_orig": 0.8125985860824585, + "epoch": 0.2864332446617298, + "kl_loss": 0.2528786063194275, + "loss_ib": 0.009737120941281319, + "step": 996 + }, + { + "ce_ib": 8.211366653442383, + "ce_orig": 0.8897217512130737, + "epoch": 0.2864332446617298, + "kl_loss": 0.251012921333313, + "loss_ib": 0.010721495375037193, + "step": 996 + }, + { + "ce_ib": 7.538112163543701, + "ce_orig": 0.8684660196304321, + "epoch": 0.2864332446617298, + "kl_loss": 0.24459782242774963, + "loss_ib": 0.009984089992940426, + "step": 996 + }, + { + "ce_ib": 5.27274751663208, + "ce_orig": 0.6079943776130676, + "epoch": 0.28672082824070744, + "kl_loss": 0.36212387681007385, + "loss_ib": 0.008893987163901329, + "step": 997 + }, + { + "ce_ib": 3.044394016265869, + "ce_orig": 0.46399974822998047, + "epoch": 0.28672082824070744, + "kl_loss": 0.26355600357055664, + "loss_ib": 0.005679954309016466, + "step": 997 + }, + { + "ce_ib": 6.704494476318359, + "ce_orig": 0.8144213557243347, + "epoch": 0.28672082824070744, + "kl_loss": 0.23444503545761108, + "loss_ib": 0.00904894433915615, + "step": 997 + }, + { + "ce_ib": 8.70203971862793, + "ce_orig": 1.2462023496627808, + "epoch": 0.28672082824070744, + "kl_loss": 0.21362106502056122, + "loss_ib": 0.010838249698281288, + "step": 997 + }, + { + "ce_ib": 6.0318922996521, + "ce_orig": 0.5330185294151306, + "epoch": 0.2870084118196851, + "kl_loss": 0.3407291769981384, + "loss_ib": 0.00943918339908123, + "step": 998 + }, + { + "ce_ib": 8.035571098327637, + "ce_orig": 1.056720495223999, + "epoch": 0.2870084118196851, + "kl_loss": 0.23358367383480072, + "loss_ib": 0.010371407493948936, + "step": 998 + }, + { + "ce_ib": 8.166302680969238, + "ce_orig": 0.6143516898155212, + "epoch": 0.2870084118196851, + "kl_loss": 0.29871490597724915, + "loss_ib": 0.011153452098369598, + "step": 998 + }, + { + "ce_ib": 8.71125602722168, + "ce_orig": 1.1870672702789307, + "epoch": 0.2870084118196851, + "kl_loss": 0.2737518548965454, + "loss_ib": 0.011448774486780167, + "step": 998 + }, + { + "ce_ib": 5.139594078063965, + "ce_orig": 0.5000967383384705, + "epoch": 0.28729599539866274, + "kl_loss": 0.5653914213180542, + "loss_ib": 0.010793508030474186, + "step": 999 + }, + { + "ce_ib": 6.412952899932861, + "ce_orig": 0.9104073643684387, + "epoch": 0.28729599539866274, + "kl_loss": 0.28137922286987305, + "loss_ib": 0.009226744994521141, + "step": 999 + }, + { + "ce_ib": 9.592132568359375, + "ce_orig": 0.7200940251350403, + "epoch": 0.28729599539866274, + "kl_loss": 0.3655579090118408, + "loss_ib": 0.013247711583971977, + "step": 999 + }, + { + "ce_ib": 8.952437400817871, + "ce_orig": 1.0639101266860962, + "epoch": 0.28729599539866274, + "kl_loss": 0.3098136782646179, + "loss_ib": 0.012050573714077473, + "step": 999 + }, + { + "epoch": 0.28758357897764036, + "grad_norm": 0.10075593739748001, + "learning_rate": 9.889010158091917e-06, + "loss": 0.92, + "step": 1000 + }, + { + "ce_ib": 9.31643009185791, + "ce_orig": 0.6427621245384216, + "epoch": 0.28758357897764036, + "kl_loss": 0.2728811800479889, + "loss_ib": 0.012045240961015224, + "step": 1000 + }, + { + "ce_ib": 8.242283821105957, + "ce_orig": 0.8939123749732971, + "epoch": 0.28758357897764036, + "kl_loss": 0.25834226608276367, + "loss_ib": 0.010825706645846367, + "step": 1000 + }, + { + "ce_ib": 7.34848165512085, + "ce_orig": 0.541661262512207, + "epoch": 0.28758357897764036, + "kl_loss": 0.3545241951942444, + "loss_ib": 0.010893723927438259, + "step": 1000 + }, + { + "ce_ib": 7.228389263153076, + "ce_orig": 0.8468849658966064, + "epoch": 0.28758357897764036, + "kl_loss": 0.33031901717185974, + "loss_ib": 0.01053157914429903, + "step": 1000 + }, + { + "ce_ib": 8.21964168548584, + "ce_orig": 0.6685616970062256, + "epoch": 0.28787116255661804, + "kl_loss": 0.2963103950023651, + "loss_ib": 0.011182744987308979, + "step": 1001 + }, + { + "ce_ib": 7.676856517791748, + "ce_orig": 0.6330392956733704, + "epoch": 0.28787116255661804, + "kl_loss": 0.31577515602111816, + "loss_ib": 0.010834608227014542, + "step": 1001 + }, + { + "ce_ib": 5.161186695098877, + "ce_orig": 0.7620050311088562, + "epoch": 0.28787116255661804, + "kl_loss": 0.2610274851322174, + "loss_ib": 0.0077714622020721436, + "step": 1001 + }, + { + "ce_ib": 6.980307102203369, + "ce_orig": 0.4666039049625397, + "epoch": 0.28787116255661804, + "kl_loss": 0.3128125071525574, + "loss_ib": 0.010108432732522488, + "step": 1001 + }, + { + "ce_ib": 11.080206871032715, + "ce_orig": 1.3631070852279663, + "epoch": 0.28815874613559567, + "kl_loss": 0.5862671732902527, + "loss_ib": 0.01694287732243538, + "step": 1002 + }, + { + "ce_ib": 7.41237211227417, + "ce_orig": 0.8579285740852356, + "epoch": 0.28815874613559567, + "kl_loss": 0.20175296068191528, + "loss_ib": 0.009429901838302612, + "step": 1002 + }, + { + "ce_ib": 9.326122283935547, + "ce_orig": 0.9100500345230103, + "epoch": 0.28815874613559567, + "kl_loss": 0.21318525075912476, + "loss_ib": 0.011457975022494793, + "step": 1002 + }, + { + "ce_ib": 8.27169132232666, + "ce_orig": 0.7772948741912842, + "epoch": 0.28815874613559567, + "kl_loss": 0.35793256759643555, + "loss_ib": 0.011851017363369465, + "step": 1002 + }, + { + "ce_ib": 3.5838937759399414, + "ce_orig": 0.6172391176223755, + "epoch": 0.2884463297145733, + "kl_loss": 0.2099723070859909, + "loss_ib": 0.0056836167350411415, + "step": 1003 + }, + { + "ce_ib": 11.964217185974121, + "ce_orig": 1.454162359237671, + "epoch": 0.2884463297145733, + "kl_loss": 0.26618391275405884, + "loss_ib": 0.014626056887209415, + "step": 1003 + }, + { + "ce_ib": 8.715005874633789, + "ce_orig": 1.0397450923919678, + "epoch": 0.2884463297145733, + "kl_loss": 0.4153057932853699, + "loss_ib": 0.012868063524365425, + "step": 1003 + }, + { + "ce_ib": 5.671633243560791, + "ce_orig": 0.5089535117149353, + "epoch": 0.2884463297145733, + "kl_loss": 0.3270706832408905, + "loss_ib": 0.00894234050065279, + "step": 1003 + }, + { + "ce_ib": 9.240535736083984, + "ce_orig": 0.9992527365684509, + "epoch": 0.2887339132935509, + "kl_loss": 0.3158443570137024, + "loss_ib": 0.012398979626595974, + "step": 1004 + }, + { + "ce_ib": 4.2876434326171875, + "ce_orig": 0.4395374357700348, + "epoch": 0.2887339132935509, + "kl_loss": 0.25293272733688354, + "loss_ib": 0.0068169706501066685, + "step": 1004 + }, + { + "ce_ib": 8.207853317260742, + "ce_orig": 0.8152860403060913, + "epoch": 0.2887339132935509, + "kl_loss": 0.32358676195144653, + "loss_ib": 0.011443721130490303, + "step": 1004 + }, + { + "ce_ib": 3.8691201210021973, + "ce_orig": 0.36630746722221375, + "epoch": 0.2887339132935509, + "kl_loss": 0.5294014811515808, + "loss_ib": 0.00916313473135233, + "step": 1004 + }, + { + "epoch": 0.2890214968725286, + "grad_norm": 0.09347087144851685, + "learning_rate": 9.88737813138801e-06, + "loss": 0.8989, + "step": 1005 + }, + { + "ce_ib": 7.5148749351501465, + "ce_orig": 0.7807815670967102, + "epoch": 0.2890214968725286, + "kl_loss": 0.28932327032089233, + "loss_ib": 0.010408108122646809, + "step": 1005 + }, + { + "ce_ib": 11.744305610656738, + "ce_orig": 1.5088998079299927, + "epoch": 0.2890214968725286, + "kl_loss": 0.3862993121147156, + "loss_ib": 0.015607299283146858, + "step": 1005 + }, + { + "ce_ib": 8.315278053283691, + "ce_orig": 0.6296089291572571, + "epoch": 0.2890214968725286, + "kl_loss": 0.24432632327079773, + "loss_ib": 0.010758541524410248, + "step": 1005 + }, + { + "ce_ib": 5.209807395935059, + "ce_orig": 0.365771621465683, + "epoch": 0.2890214968725286, + "kl_loss": 0.3716930150985718, + "loss_ib": 0.008926738053560257, + "step": 1005 + }, + { + "ce_ib": 7.445919990539551, + "ce_orig": 0.9328464269638062, + "epoch": 0.2893090804515062, + "kl_loss": 0.26125314831733704, + "loss_ib": 0.010058451443910599, + "step": 1006 + }, + { + "ce_ib": 6.672987937927246, + "ce_orig": 0.5431604981422424, + "epoch": 0.2893090804515062, + "kl_loss": 0.6196109652519226, + "loss_ib": 0.012869098223745823, + "step": 1006 + }, + { + "ce_ib": 9.32304573059082, + "ce_orig": 0.8576613664627075, + "epoch": 0.2893090804515062, + "kl_loss": 0.1936599165201187, + "loss_ib": 0.011259645223617554, + "step": 1006 + }, + { + "ce_ib": 8.495000839233398, + "ce_orig": 0.8172574043273926, + "epoch": 0.2893090804515062, + "kl_loss": 0.33307093381881714, + "loss_ib": 0.011825710535049438, + "step": 1006 + }, + { + "ce_ib": 8.885623931884766, + "ce_orig": 1.0538289546966553, + "epoch": 0.28959666403048384, + "kl_loss": 0.2817854881286621, + "loss_ib": 0.01170347910374403, + "step": 1007 + }, + { + "ce_ib": 5.0845417976379395, + "ce_orig": 0.8662396669387817, + "epoch": 0.28959666403048384, + "kl_loss": 0.2701185345649719, + "loss_ib": 0.007785727269947529, + "step": 1007 + }, + { + "ce_ib": 4.460420608520508, + "ce_orig": 0.8098589181900024, + "epoch": 0.28959666403048384, + "kl_loss": 0.2515374422073364, + "loss_ib": 0.006975794676691294, + "step": 1007 + }, + { + "ce_ib": 9.361479759216309, + "ce_orig": 1.1995859146118164, + "epoch": 0.28959666403048384, + "kl_loss": 0.2952824831008911, + "loss_ib": 0.01231430470943451, + "step": 1007 + }, + { + "ce_ib": 8.661140441894531, + "ce_orig": 0.9951620697975159, + "epoch": 0.2898842476094615, + "kl_loss": 0.2747381627559662, + "loss_ib": 0.011408521793782711, + "step": 1008 + }, + { + "ce_ib": 7.686845779418945, + "ce_orig": 0.8603042960166931, + "epoch": 0.2898842476094615, + "kl_loss": 0.2675703763961792, + "loss_ib": 0.010362548753619194, + "step": 1008 + }, + { + "ce_ib": 10.085550308227539, + "ce_orig": 0.561826765537262, + "epoch": 0.2898842476094615, + "kl_loss": 0.30274707078933716, + "loss_ib": 0.013113021850585938, + "step": 1008 + }, + { + "ce_ib": 9.925764083862305, + "ce_orig": 1.4767504930496216, + "epoch": 0.2898842476094615, + "kl_loss": 0.22820648550987244, + "loss_ib": 0.01220782846212387, + "step": 1008 + }, + { + "ce_ib": 8.104393005371094, + "ce_orig": 0.8048654198646545, + "epoch": 0.29017183118843914, + "kl_loss": 0.3005231022834778, + "loss_ib": 0.011109624058008194, + "step": 1009 + }, + { + "ce_ib": 4.768399238586426, + "ce_orig": 0.7496779561042786, + "epoch": 0.29017183118843914, + "kl_loss": 0.23560698330402374, + "loss_ib": 0.0071244691498577595, + "step": 1009 + }, + { + "ce_ib": 7.887166500091553, + "ce_orig": 1.1598186492919922, + "epoch": 0.29017183118843914, + "kl_loss": 0.27508389949798584, + "loss_ib": 0.01063800510019064, + "step": 1009 + }, + { + "ce_ib": 7.2288594245910645, + "ce_orig": 1.204685926437378, + "epoch": 0.29017183118843914, + "kl_loss": 0.24225227534770966, + "loss_ib": 0.009651382453739643, + "step": 1009 + }, + { + "epoch": 0.29045941476741677, + "grad_norm": 0.09235741198062897, + "learning_rate": 9.885734329855798e-06, + "loss": 0.8963, + "step": 1010 + }, + { + "ce_ib": 8.130411148071289, + "ce_orig": 0.9784060120582581, + "epoch": 0.29045941476741677, + "kl_loss": 0.2725956439971924, + "loss_ib": 0.010856368578970432, + "step": 1010 + }, + { + "ce_ib": 6.9893717765808105, + "ce_orig": 0.9504007697105408, + "epoch": 0.29045941476741677, + "kl_loss": 0.2702571153640747, + "loss_ib": 0.009691942483186722, + "step": 1010 + }, + { + "ce_ib": 7.257619857788086, + "ce_orig": 0.9049093127250671, + "epoch": 0.29045941476741677, + "kl_loss": 0.26276618242263794, + "loss_ib": 0.009885281324386597, + "step": 1010 + }, + { + "ce_ib": 2.96409273147583, + "ce_orig": 0.5564637780189514, + "epoch": 0.29045941476741677, + "kl_loss": 0.24044734239578247, + "loss_ib": 0.005368566606193781, + "step": 1010 + }, + { + "ce_ib": 9.04433822631836, + "ce_orig": 1.248004674911499, + "epoch": 0.29074699834639445, + "kl_loss": 0.5226652026176453, + "loss_ib": 0.01427098922431469, + "step": 1011 + }, + { + "ce_ib": 5.734447956085205, + "ce_orig": 0.7180121541023254, + "epoch": 0.29074699834639445, + "kl_loss": 0.27341729402542114, + "loss_ib": 0.008468620479106903, + "step": 1011 + }, + { + "ce_ib": 7.165276050567627, + "ce_orig": 0.6450743675231934, + "epoch": 0.29074699834639445, + "kl_loss": 0.2959662675857544, + "loss_ib": 0.010124938562512398, + "step": 1011 + }, + { + "ce_ib": 8.000144958496094, + "ce_orig": 1.1693824529647827, + "epoch": 0.29074699834639445, + "kl_loss": 0.3289404511451721, + "loss_ib": 0.011289549060165882, + "step": 1011 + }, + { + "ce_ib": 11.336484909057617, + "ce_orig": 1.0948889255523682, + "epoch": 0.29103458192537207, + "kl_loss": 0.2643250524997711, + "loss_ib": 0.013979734852910042, + "step": 1012 + }, + { + "ce_ib": 7.049655914306641, + "ce_orig": 0.7831434011459351, + "epoch": 0.29103458192537207, + "kl_loss": 0.2921431064605713, + "loss_ib": 0.009971086867153645, + "step": 1012 + }, + { + "ce_ib": 6.0634355545043945, + "ce_orig": 0.777411162853241, + "epoch": 0.29103458192537207, + "kl_loss": 0.2579835057258606, + "loss_ib": 0.00864327047020197, + "step": 1012 + }, + { + "ce_ib": 7.513561725616455, + "ce_orig": 1.4329041242599487, + "epoch": 0.29103458192537207, + "kl_loss": 0.32468241453170776, + "loss_ib": 0.010760385543107986, + "step": 1012 + }, + { + "ce_ib": 7.622284412384033, + "ce_orig": 0.6885894536972046, + "epoch": 0.2913221655043497, + "kl_loss": 0.30582404136657715, + "loss_ib": 0.01068052463233471, + "step": 1013 + }, + { + "ce_ib": 10.078372955322266, + "ce_orig": 0.8869221806526184, + "epoch": 0.2913221655043497, + "kl_loss": 0.25840914249420166, + "loss_ib": 0.012662463821470737, + "step": 1013 + }, + { + "ce_ib": 6.638105392456055, + "ce_orig": 1.0796653032302856, + "epoch": 0.2913221655043497, + "kl_loss": 0.26853376626968384, + "loss_ib": 0.009323443286120892, + "step": 1013 + }, + { + "ce_ib": 8.890301704406738, + "ce_orig": 1.291822910308838, + "epoch": 0.2913221655043497, + "kl_loss": 0.2958293557167053, + "loss_ib": 0.01184859499335289, + "step": 1013 + }, + { + "ce_ib": 8.304509162902832, + "ce_orig": 1.1144568920135498, + "epoch": 0.2916097490833273, + "kl_loss": 0.33458924293518066, + "loss_ib": 0.01165040209889412, + "step": 1014 + }, + { + "ce_ib": 5.494061470031738, + "ce_orig": 0.8900278210639954, + "epoch": 0.2916097490833273, + "kl_loss": 0.252785861492157, + "loss_ib": 0.00802191998809576, + "step": 1014 + }, + { + "ce_ib": 6.3537516593933105, + "ce_orig": 0.8443594574928284, + "epoch": 0.2916097490833273, + "kl_loss": 0.2535431385040283, + "loss_ib": 0.008889183402061462, + "step": 1014 + }, + { + "ce_ib": 11.406895637512207, + "ce_orig": 1.344512939453125, + "epoch": 0.2916097490833273, + "kl_loss": 0.28891634941101074, + "loss_ib": 0.014296059496700764, + "step": 1014 + }, + { + "epoch": 0.291897332662305, + "grad_norm": 0.09041693806648254, + "learning_rate": 9.884078757455583e-06, + "loss": 0.9109, + "step": 1015 + }, + { + "ce_ib": 9.290079116821289, + "ce_orig": 1.5962048768997192, + "epoch": 0.291897332662305, + "kl_loss": 0.28169265389442444, + "loss_ib": 0.012107006274163723, + "step": 1015 + }, + { + "ce_ib": 7.13163948059082, + "ce_orig": 0.6506978273391724, + "epoch": 0.291897332662305, + "kl_loss": 0.32169321179389954, + "loss_ib": 0.010348571464419365, + "step": 1015 + }, + { + "ce_ib": 2.4287688732147217, + "ce_orig": 0.17141437530517578, + "epoch": 0.291897332662305, + "kl_loss": 0.5213749408721924, + "loss_ib": 0.007642518263310194, + "step": 1015 + }, + { + "ce_ib": 5.544053077697754, + "ce_orig": 0.7960252165794373, + "epoch": 0.291897332662305, + "kl_loss": 0.26576048135757446, + "loss_ib": 0.00820165779441595, + "step": 1015 + }, + { + "ce_ib": 8.132667541503906, + "ce_orig": 0.7880412936210632, + "epoch": 0.2921849162412826, + "kl_loss": 0.3609253168106079, + "loss_ib": 0.011741920374333858, + "step": 1016 + }, + { + "ce_ib": 5.45988655090332, + "ce_orig": 0.7703402042388916, + "epoch": 0.2921849162412826, + "kl_loss": 0.23446249961853027, + "loss_ib": 0.007804511580616236, + "step": 1016 + }, + { + "ce_ib": 8.336692810058594, + "ce_orig": 1.0396983623504639, + "epoch": 0.2921849162412826, + "kl_loss": 0.2709803581237793, + "loss_ib": 0.011046496219933033, + "step": 1016 + }, + { + "ce_ib": 8.177699089050293, + "ce_orig": 0.5715538859367371, + "epoch": 0.2921849162412826, + "kl_loss": 0.29589879512786865, + "loss_ib": 0.01113668642938137, + "step": 1016 + }, + { + "ce_ib": 7.4771318435668945, + "ce_orig": 0.9154139757156372, + "epoch": 0.29247249982026025, + "kl_loss": 0.25590091943740845, + "loss_ib": 0.010036141611635685, + "step": 1017 + }, + { + "ce_ib": 7.049166679382324, + "ce_orig": 0.9006765484809875, + "epoch": 0.29247249982026025, + "kl_loss": 0.20401448011398315, + "loss_ib": 0.009089311584830284, + "step": 1017 + }, + { + "ce_ib": 4.249927043914795, + "ce_orig": 0.7018132209777832, + "epoch": 0.29247249982026025, + "kl_loss": 0.7936820387840271, + "loss_ib": 0.01218674797564745, + "step": 1017 + }, + { + "ce_ib": 4.743485450744629, + "ce_orig": 0.8688510060310364, + "epoch": 0.29247249982026025, + "kl_loss": 0.21329936385154724, + "loss_ib": 0.006876479368656874, + "step": 1017 + }, + { + "ce_ib": 9.883633613586426, + "ce_orig": 0.8130282163619995, + "epoch": 0.2927600833992379, + "kl_loss": 0.4127139747142792, + "loss_ib": 0.014010773971676826, + "step": 1018 + }, + { + "ce_ib": 6.509448528289795, + "ce_orig": 0.7483327388763428, + "epoch": 0.2927600833992379, + "kl_loss": 0.2256876528263092, + "loss_ib": 0.008766325190663338, + "step": 1018 + }, + { + "ce_ib": 7.60127067565918, + "ce_orig": 0.8613872528076172, + "epoch": 0.2927600833992379, + "kl_loss": 0.2583634853363037, + "loss_ib": 0.010184905491769314, + "step": 1018 + }, + { + "ce_ib": 3.682047128677368, + "ce_orig": 0.4670972228050232, + "epoch": 0.2927600833992379, + "kl_loss": 0.2330019176006317, + "loss_ib": 0.006012066267430782, + "step": 1018 + }, + { + "ce_ib": 2.297346353530884, + "ce_orig": 0.24013860523700714, + "epoch": 0.29304766697821555, + "kl_loss": 0.6258134245872498, + "loss_ib": 0.008555480279028416, + "step": 1019 + }, + { + "ce_ib": 8.305569648742676, + "ce_orig": 0.9835488200187683, + "epoch": 0.29304766697821555, + "kl_loss": 0.36162036657333374, + "loss_ib": 0.011921772733330727, + "step": 1019 + }, + { + "ce_ib": 10.808023452758789, + "ce_orig": 1.32200026512146, + "epoch": 0.29304766697821555, + "kl_loss": 0.27893298864364624, + "loss_ib": 0.013597352430224419, + "step": 1019 + }, + { + "ce_ib": 9.159648895263672, + "ce_orig": 0.862013041973114, + "epoch": 0.29304766697821555, + "kl_loss": 0.2749783396720886, + "loss_ib": 0.011909431777894497, + "step": 1019 + }, + { + "epoch": 0.2933352505571932, + "grad_norm": 0.08970300853252411, + "learning_rate": 9.882411418176023e-06, + "loss": 0.8709, + "step": 1020 + }, + { + "ce_ib": 3.6713716983795166, + "ce_orig": 0.7519941926002502, + "epoch": 0.2933352505571932, + "kl_loss": 0.20077042281627655, + "loss_ib": 0.0056790756061673164, + "step": 1020 + }, + { + "ce_ib": 10.4853515625, + "ce_orig": 1.4266449213027954, + "epoch": 0.2933352505571932, + "kl_loss": 0.20042094588279724, + "loss_ib": 0.012489561922848225, + "step": 1020 + }, + { + "ce_ib": 5.523143768310547, + "ce_orig": 0.49153417348861694, + "epoch": 0.2933352505571932, + "kl_loss": 0.3945586085319519, + "loss_ib": 0.009468729607760906, + "step": 1020 + }, + { + "ce_ib": 8.129873275756836, + "ce_orig": 0.6584604978561401, + "epoch": 0.2933352505571932, + "kl_loss": 0.36802613735198975, + "loss_ib": 0.011810134164988995, + "step": 1020 + }, + { + "ce_ib": 10.585819244384766, + "ce_orig": 1.2593013048171997, + "epoch": 0.29362283413617085, + "kl_loss": 0.2198692411184311, + "loss_ib": 0.012784511782228947, + "step": 1021 + }, + { + "ce_ib": 8.541426658630371, + "ce_orig": 0.8910534977912903, + "epoch": 0.29362283413617085, + "kl_loss": 0.2919941246509552, + "loss_ib": 0.011461366899311543, + "step": 1021 + }, + { + "ce_ib": 6.236502647399902, + "ce_orig": 0.5739825963973999, + "epoch": 0.29362283413617085, + "kl_loss": 0.3800389766693115, + "loss_ib": 0.010036892257630825, + "step": 1021 + }, + { + "ce_ib": 5.119142055511475, + "ce_orig": 0.3912176191806793, + "epoch": 0.29362283413617085, + "kl_loss": 0.1975279450416565, + "loss_ib": 0.007094421423971653, + "step": 1021 + }, + { + "ce_ib": 6.980103492736816, + "ce_orig": 0.6286670565605164, + "epoch": 0.2939104177151485, + "kl_loss": 0.35218000411987305, + "loss_ib": 0.010501904413104057, + "step": 1022 + }, + { + "ce_ib": 9.082075119018555, + "ce_orig": 1.1112430095672607, + "epoch": 0.2939104177151485, + "kl_loss": 0.33446168899536133, + "loss_ib": 0.012426692061126232, + "step": 1022 + }, + { + "ce_ib": 8.302471160888672, + "ce_orig": 0.8119482398033142, + "epoch": 0.2939104177151485, + "kl_loss": 0.28878772258758545, + "loss_ib": 0.011190347373485565, + "step": 1022 + }, + { + "ce_ib": 7.19743013381958, + "ce_orig": 0.737561047077179, + "epoch": 0.2939104177151485, + "kl_loss": 0.35057979822158813, + "loss_ib": 0.01070322748273611, + "step": 1022 + }, + { + "ce_ib": 6.976788520812988, + "ce_orig": 1.0390961170196533, + "epoch": 0.2941980012941261, + "kl_loss": 0.30705952644348145, + "loss_ib": 0.010047382675111294, + "step": 1023 + }, + { + "ce_ib": 6.412473201751709, + "ce_orig": 0.6833570003509521, + "epoch": 0.2941980012941261, + "kl_loss": 0.25251883268356323, + "loss_ib": 0.008937661536037922, + "step": 1023 + }, + { + "ce_ib": 4.118930816650391, + "ce_orig": 0.4952293038368225, + "epoch": 0.2941980012941261, + "kl_loss": 0.30007433891296387, + "loss_ib": 0.007119674701243639, + "step": 1023 + }, + { + "ce_ib": 8.888395309448242, + "ce_orig": 0.9846009612083435, + "epoch": 0.2941980012941261, + "kl_loss": 0.2865602970123291, + "loss_ib": 0.011753997765481472, + "step": 1023 + }, + { + "ce_ib": 8.605910301208496, + "ce_orig": 0.9668225049972534, + "epoch": 0.2944855848731037, + "kl_loss": 0.22117966413497925, + "loss_ib": 0.01081770658493042, + "step": 1024 + }, + { + "ce_ib": 7.79697322845459, + "ce_orig": 0.7142451405525208, + "epoch": 0.2944855848731037, + "kl_loss": 0.276878297328949, + "loss_ib": 0.010565755888819695, + "step": 1024 + }, + { + "ce_ib": 13.723976135253906, + "ce_orig": 1.894836664199829, + "epoch": 0.2944855848731037, + "kl_loss": 0.25465625524520874, + "loss_ib": 0.016270538792014122, + "step": 1024 + }, + { + "ce_ib": 10.904229164123535, + "ce_orig": 1.352797508239746, + "epoch": 0.2944855848731037, + "kl_loss": 0.24989211559295654, + "loss_ib": 0.01340315118432045, + "step": 1024 + }, + { + "epoch": 0.2947731684520814, + "grad_norm": 0.12832897901535034, + "learning_rate": 9.880732316034124e-06, + "loss": 0.8606, + "step": 1025 + }, + { + "ce_ib": 6.572211742401123, + "ce_orig": 0.46886974573135376, + "epoch": 0.2947731684520814, + "kl_loss": 0.38375556468963623, + "loss_ib": 0.010409767739474773, + "step": 1025 + }, + { + "ce_ib": 4.577428340911865, + "ce_orig": 0.5632930994033813, + "epoch": 0.2947731684520814, + "kl_loss": 0.18317699432373047, + "loss_ib": 0.006409198045730591, + "step": 1025 + }, + { + "ce_ib": 5.605873107910156, + "ce_orig": 0.8631706833839417, + "epoch": 0.2947731684520814, + "kl_loss": 0.25881266593933105, + "loss_ib": 0.008193999528884888, + "step": 1025 + }, + { + "ce_ib": 8.974735260009766, + "ce_orig": 1.4067844152450562, + "epoch": 0.2947731684520814, + "kl_loss": 0.4378132224082947, + "loss_ib": 0.013352867215871811, + "step": 1025 + }, + { + "ce_ib": 7.069388389587402, + "ce_orig": 0.8898831009864807, + "epoch": 0.295060752031059, + "kl_loss": 0.31348714232444763, + "loss_ib": 0.010204260237514973, + "step": 1026 + }, + { + "ce_ib": 7.188381195068359, + "ce_orig": 0.4827899932861328, + "epoch": 0.295060752031059, + "kl_loss": 0.23111492395401, + "loss_ib": 0.00949953030794859, + "step": 1026 + }, + { + "ce_ib": 4.271545886993408, + "ce_orig": 0.6371316909790039, + "epoch": 0.295060752031059, + "kl_loss": 0.21791093051433563, + "loss_ib": 0.0064506554044783115, + "step": 1026 + }, + { + "ce_ib": 8.962451934814453, + "ce_orig": 1.176334023475647, + "epoch": 0.295060752031059, + "kl_loss": 0.24294477701187134, + "loss_ib": 0.011391899548470974, + "step": 1026 + }, + { + "ce_ib": 9.733299255371094, + "ce_orig": 0.9046944379806519, + "epoch": 0.29534833561003665, + "kl_loss": 0.21920621395111084, + "loss_ib": 0.01192536111921072, + "step": 1027 + }, + { + "ce_ib": 6.503138542175293, + "ce_orig": 0.8069035410881042, + "epoch": 0.29534833561003665, + "kl_loss": 0.18204201757907867, + "loss_ib": 0.00832355860620737, + "step": 1027 + }, + { + "ce_ib": 6.754278182983398, + "ce_orig": 0.4663512110710144, + "epoch": 0.29534833561003665, + "kl_loss": 0.2619379162788391, + "loss_ib": 0.009373657405376434, + "step": 1027 + }, + { + "ce_ib": 5.69559907913208, + "ce_orig": 0.8563054800033569, + "epoch": 0.29534833561003665, + "kl_loss": 0.2036648690700531, + "loss_ib": 0.007732247933745384, + "step": 1027 + }, + { + "ce_ib": 9.430176734924316, + "ce_orig": 1.1677995920181274, + "epoch": 0.29563591918901433, + "kl_loss": 0.5974254608154297, + "loss_ib": 0.015404431149363518, + "step": 1028 + }, + { + "ce_ib": 4.5675787925720215, + "ce_orig": 0.3929074704647064, + "epoch": 0.29563591918901433, + "kl_loss": 0.27183613181114197, + "loss_ib": 0.007285939995199442, + "step": 1028 + }, + { + "ce_ib": 10.300110816955566, + "ce_orig": 0.7582395076751709, + "epoch": 0.29563591918901433, + "kl_loss": 0.4548535943031311, + "loss_ib": 0.014848646707832813, + "step": 1028 + }, + { + "ce_ib": 7.837728023529053, + "ce_orig": 0.41884028911590576, + "epoch": 0.29563591918901433, + "kl_loss": 0.2513379156589508, + "loss_ib": 0.010351106524467468, + "step": 1028 + }, + { + "ce_ib": 5.368338108062744, + "ce_orig": 0.3831135928630829, + "epoch": 0.29592350276799195, + "kl_loss": 0.31224921345710754, + "loss_ib": 0.008490830659866333, + "step": 1029 + }, + { + "ce_ib": 5.802161693572998, + "ce_orig": 0.46503645181655884, + "epoch": 0.29592350276799195, + "kl_loss": 0.18800479173660278, + "loss_ib": 0.0076822093687951565, + "step": 1029 + }, + { + "ce_ib": 5.5843281745910645, + "ce_orig": 0.7334034442901611, + "epoch": 0.29592350276799195, + "kl_loss": 0.25795796513557434, + "loss_ib": 0.008163907565176487, + "step": 1029 + }, + { + "ce_ib": 7.496910572052002, + "ce_orig": 1.0607128143310547, + "epoch": 0.29592350276799195, + "kl_loss": 0.24131786823272705, + "loss_ib": 0.00991008896380663, + "step": 1029 + }, + { + "epoch": 0.2962110863469696, + "grad_norm": 0.09884575009346008, + "learning_rate": 9.879041455075236e-06, + "loss": 0.8737, + "step": 1030 + }, + { + "ce_ib": 6.056578159332275, + "ce_orig": 0.7114567756652832, + "epoch": 0.2962110863469696, + "kl_loss": 0.28722214698791504, + "loss_ib": 0.008928799070417881, + "step": 1030 + }, + { + "ce_ib": 9.305815696716309, + "ce_orig": 0.5742266178131104, + "epoch": 0.2962110863469696, + "kl_loss": 0.35437315702438354, + "loss_ib": 0.012849547900259495, + "step": 1030 + }, + { + "ce_ib": 5.0757365226745605, + "ce_orig": 0.5962457656860352, + "epoch": 0.2962110863469696, + "kl_loss": 0.253349244594574, + "loss_ib": 0.007609229069203138, + "step": 1030 + }, + { + "ce_ib": 13.085478782653809, + "ce_orig": 1.712263822555542, + "epoch": 0.2962110863469696, + "kl_loss": 0.2800062894821167, + "loss_ib": 0.01588554121553898, + "step": 1030 + }, + { + "ce_ib": 7.016113758087158, + "ce_orig": 0.7030823826789856, + "epoch": 0.29649866992594726, + "kl_loss": 0.3182916045188904, + "loss_ib": 0.010199028998613358, + "step": 1031 + }, + { + "ce_ib": 7.362382888793945, + "ce_orig": 0.5507587790489197, + "epoch": 0.29649866992594726, + "kl_loss": 0.28012615442276, + "loss_ib": 0.010163644328713417, + "step": 1031 + }, + { + "ce_ib": 5.964332103729248, + "ce_orig": 0.39900022745132446, + "epoch": 0.29649866992594726, + "kl_loss": 0.26267558336257935, + "loss_ib": 0.00859108753502369, + "step": 1031 + }, + { + "ce_ib": 3.582094430923462, + "ce_orig": 0.5880594849586487, + "epoch": 0.29649866992594726, + "kl_loss": 0.20877045392990112, + "loss_ib": 0.005669798702001572, + "step": 1031 + }, + { + "ce_ib": 6.128958225250244, + "ce_orig": 0.9819390773773193, + "epoch": 0.2967862535049249, + "kl_loss": 0.22314751148223877, + "loss_ib": 0.008360433392226696, + "step": 1032 + }, + { + "ce_ib": 5.336613655090332, + "ce_orig": 0.6773203611373901, + "epoch": 0.2967862535049249, + "kl_loss": 0.18450209498405457, + "loss_ib": 0.007181634660810232, + "step": 1032 + }, + { + "ce_ib": 8.280702590942383, + "ce_orig": 1.1966774463653564, + "epoch": 0.2967862535049249, + "kl_loss": 0.21508166193962097, + "loss_ib": 0.010431519709527493, + "step": 1032 + }, + { + "ce_ib": 7.033453464508057, + "ce_orig": 0.8263683319091797, + "epoch": 0.2967862535049249, + "kl_loss": 0.1751515120267868, + "loss_ib": 0.00878496840596199, + "step": 1032 + }, + { + "ce_ib": 5.190278053283691, + "ce_orig": 0.7666304707527161, + "epoch": 0.2970738370839025, + "kl_loss": 0.19723618030548096, + "loss_ib": 0.007162639871239662, + "step": 1033 + }, + { + "ce_ib": 6.446037292480469, + "ce_orig": 0.6451196074485779, + "epoch": 0.2970738370839025, + "kl_loss": 0.6441924571990967, + "loss_ib": 0.012887961231172085, + "step": 1033 + }, + { + "ce_ib": 9.412089347839355, + "ce_orig": 1.2795425653457642, + "epoch": 0.2970738370839025, + "kl_loss": 0.2573137879371643, + "loss_ib": 0.011985227465629578, + "step": 1033 + }, + { + "ce_ib": 8.65932846069336, + "ce_orig": 0.6759138107299805, + "epoch": 0.2970738370839025, + "kl_loss": 0.3903021216392517, + "loss_ib": 0.012562349438667297, + "step": 1033 + }, + { + "ce_ib": 12.967315673828125, + "ce_orig": 2.125581741333008, + "epoch": 0.2973614206628801, + "kl_loss": 0.4240291714668274, + "loss_ib": 0.017207607626914978, + "step": 1034 + }, + { + "ce_ib": 6.941053867340088, + "ce_orig": 0.9638831615447998, + "epoch": 0.2973614206628801, + "kl_loss": 0.2834685444831848, + "loss_ib": 0.009775739163160324, + "step": 1034 + }, + { + "ce_ib": 11.784566879272461, + "ce_orig": 1.6271092891693115, + "epoch": 0.2973614206628801, + "kl_loss": 0.2749708294868469, + "loss_ib": 0.01453427504748106, + "step": 1034 + }, + { + "ce_ib": 10.994661331176758, + "ce_orig": 1.71238374710083, + "epoch": 0.2973614206628801, + "kl_loss": 0.3320281505584717, + "loss_ib": 0.014314942993223667, + "step": 1034 + }, + { + "epoch": 0.2976490042418578, + "grad_norm": 0.10119609534740448, + "learning_rate": 9.877338839373032e-06, + "loss": 0.881, + "step": 1035 + }, + { + "ce_ib": 8.704519271850586, + "ce_orig": 0.5223831534385681, + "epoch": 0.2976490042418578, + "kl_loss": 0.312034547328949, + "loss_ib": 0.011824864894151688, + "step": 1035 + }, + { + "ce_ib": 6.133333206176758, + "ce_orig": 0.764382004737854, + "epoch": 0.2976490042418578, + "kl_loss": 0.26369595527648926, + "loss_ib": 0.0087702926248312, + "step": 1035 + }, + { + "ce_ib": 6.9865946769714355, + "ce_orig": 0.6817479729652405, + "epoch": 0.2976490042418578, + "kl_loss": 0.20456379652023315, + "loss_ib": 0.00903223268687725, + "step": 1035 + }, + { + "ce_ib": 7.445248126983643, + "ce_orig": 0.923649787902832, + "epoch": 0.2976490042418578, + "kl_loss": 0.2720944583415985, + "loss_ib": 0.01016619335860014, + "step": 1035 + }, + { + "ce_ib": 7.735437393188477, + "ce_orig": 0.8882045149803162, + "epoch": 0.29793658782083543, + "kl_loss": 0.3494291603565216, + "loss_ib": 0.011229729279875755, + "step": 1036 + }, + { + "ce_ib": 7.865479469299316, + "ce_orig": 0.8003482222557068, + "epoch": 0.29793658782083543, + "kl_loss": 0.26265841722488403, + "loss_ib": 0.010492063127458096, + "step": 1036 + }, + { + "ce_ib": 5.9282989501953125, + "ce_orig": 0.8610711097717285, + "epoch": 0.29793658782083543, + "kl_loss": 0.21928074955940247, + "loss_ib": 0.008121106773614883, + "step": 1036 + }, + { + "ce_ib": 4.373647689819336, + "ce_orig": 0.5918238759040833, + "epoch": 0.29793658782083543, + "kl_loss": 0.21359167993068695, + "loss_ib": 0.006509564351290464, + "step": 1036 + }, + { + "ce_ib": 8.592558860778809, + "ce_orig": 1.061389684677124, + "epoch": 0.29822417139981305, + "kl_loss": 0.2698967456817627, + "loss_ib": 0.01129152625799179, + "step": 1037 + }, + { + "ce_ib": 8.775501251220703, + "ce_orig": 0.823801577091217, + "epoch": 0.29822417139981305, + "kl_loss": 0.4299342930316925, + "loss_ib": 0.01307484321296215, + "step": 1037 + }, + { + "ce_ib": 7.973211765289307, + "ce_orig": 0.808665931224823, + "epoch": 0.29822417139981305, + "kl_loss": 0.3724827766418457, + "loss_ib": 0.011698039248585701, + "step": 1037 + }, + { + "ce_ib": 10.214940071105957, + "ce_orig": 1.393505573272705, + "epoch": 0.29822417139981305, + "kl_loss": 0.4218568801879883, + "loss_ib": 0.014433508738875389, + "step": 1037 + }, + { + "ce_ib": 12.167582511901855, + "ce_orig": 0.8209355473518372, + "epoch": 0.29851175497879073, + "kl_loss": 0.3849828243255615, + "loss_ib": 0.016017410904169083, + "step": 1038 + }, + { + "ce_ib": 6.071450710296631, + "ce_orig": 0.5013828277587891, + "epoch": 0.29851175497879073, + "kl_loss": 0.2566567361354828, + "loss_ib": 0.008638017810881138, + "step": 1038 + }, + { + "ce_ib": 9.4537992477417, + "ce_orig": 1.401556134223938, + "epoch": 0.29851175497879073, + "kl_loss": 0.21812914311885834, + "loss_ib": 0.011635090224444866, + "step": 1038 + }, + { + "ce_ib": 6.496313095092773, + "ce_orig": 0.8470216989517212, + "epoch": 0.29851175497879073, + "kl_loss": 0.2540469765663147, + "loss_ib": 0.009036783128976822, + "step": 1038 + }, + { + "ce_ib": 7.5606279373168945, + "ce_orig": 1.107744574546814, + "epoch": 0.29879933855776836, + "kl_loss": 0.24179263412952423, + "loss_ib": 0.009978554211556911, + "step": 1039 + }, + { + "ce_ib": 7.44442892074585, + "ce_orig": 0.7664405703544617, + "epoch": 0.29879933855776836, + "kl_loss": 0.3160368502140045, + "loss_ib": 0.010604796931147575, + "step": 1039 + }, + { + "ce_ib": 4.493764877319336, + "ce_orig": 0.8293079137802124, + "epoch": 0.29879933855776836, + "kl_loss": 0.23693546652793884, + "loss_ib": 0.006863119546324015, + "step": 1039 + }, + { + "ce_ib": 9.437397003173828, + "ce_orig": 1.3835856914520264, + "epoch": 0.29879933855776836, + "kl_loss": 0.32433897256851196, + "loss_ib": 0.012680786661803722, + "step": 1039 + }, + { + "epoch": 0.299086922136746, + "grad_norm": 0.12828873097896576, + "learning_rate": 9.875624473029508e-06, + "loss": 0.868, + "step": 1040 + }, + { + "ce_ib": 5.776780605316162, + "ce_orig": 0.6242812871932983, + "epoch": 0.299086922136746, + "kl_loss": 0.21151088178157806, + "loss_ib": 0.007891889661550522, + "step": 1040 + }, + { + "ce_ib": 6.101773262023926, + "ce_orig": 0.7682925462722778, + "epoch": 0.299086922136746, + "kl_loss": 0.16745525598526, + "loss_ib": 0.007776325568556786, + "step": 1040 + }, + { + "ce_ib": 6.381745338439941, + "ce_orig": 0.8632617592811584, + "epoch": 0.299086922136746, + "kl_loss": 0.21712660789489746, + "loss_ib": 0.00855301134288311, + "step": 1040 + }, + { + "ce_ib": 9.156272888183594, + "ce_orig": 0.7367652654647827, + "epoch": 0.299086922136746, + "kl_loss": 0.4417145252227783, + "loss_ib": 0.0135734174400568, + "step": 1040 + }, + { + "ce_ib": 6.1640424728393555, + "ce_orig": 0.5637131333351135, + "epoch": 0.2993745057157236, + "kl_loss": 0.24898210167884827, + "loss_ib": 0.008653863333165646, + "step": 1041 + }, + { + "ce_ib": 5.767309665679932, + "ce_orig": 0.6547291278839111, + "epoch": 0.2993745057157236, + "kl_loss": 0.26231563091278076, + "loss_ib": 0.008390465751290321, + "step": 1041 + }, + { + "ce_ib": 6.178884983062744, + "ce_orig": 0.8318864703178406, + "epoch": 0.2993745057157236, + "kl_loss": 0.2267499417066574, + "loss_ib": 0.008446384221315384, + "step": 1041 + }, + { + "ce_ib": 6.974244594573975, + "ce_orig": 0.7129083275794983, + "epoch": 0.2993745057157236, + "kl_loss": 0.30809468030929565, + "loss_ib": 0.01005519088357687, + "step": 1041 + }, + { + "ce_ib": 7.284916400909424, + "ce_orig": 0.9830975532531738, + "epoch": 0.2996620892947013, + "kl_loss": 0.23813575506210327, + "loss_ib": 0.009666274301707745, + "step": 1042 + }, + { + "ce_ib": 4.249719619750977, + "ce_orig": 0.7718077301979065, + "epoch": 0.2996620892947013, + "kl_loss": 0.1756143569946289, + "loss_ib": 0.006005862727761269, + "step": 1042 + }, + { + "ce_ib": 9.810495376586914, + "ce_orig": 1.5580822229385376, + "epoch": 0.2996620892947013, + "kl_loss": 0.3328104019165039, + "loss_ib": 0.013138598762452602, + "step": 1042 + }, + { + "ce_ib": 7.983740329742432, + "ce_orig": 1.1797325611114502, + "epoch": 0.2996620892947013, + "kl_loss": 0.30479660630226135, + "loss_ib": 0.011031705886125565, + "step": 1042 + }, + { + "ce_ib": 5.6402082443237305, + "ce_orig": 0.8855639696121216, + "epoch": 0.2999496728736789, + "kl_loss": 0.2435443103313446, + "loss_ib": 0.008075650781393051, + "step": 1043 + }, + { + "ce_ib": 6.360856533050537, + "ce_orig": 0.9510517716407776, + "epoch": 0.2999496728736789, + "kl_loss": 0.24474883079528809, + "loss_ib": 0.008808344602584839, + "step": 1043 + }, + { + "ce_ib": 7.274141311645508, + "ce_orig": 0.7802663445472717, + "epoch": 0.2999496728736789, + "kl_loss": 0.2819164991378784, + "loss_ib": 0.010093306191265583, + "step": 1043 + }, + { + "ce_ib": 6.382179260253906, + "ce_orig": 0.9669009447097778, + "epoch": 0.2999496728736789, + "kl_loss": 0.33987829089164734, + "loss_ib": 0.009780962020158768, + "step": 1043 + }, + { + "ce_ib": 9.163640022277832, + "ce_orig": 1.0312260389328003, + "epoch": 0.30023725645265653, + "kl_loss": 0.2656589150428772, + "loss_ib": 0.011820228770375252, + "step": 1044 + }, + { + "ce_ib": 11.79682731628418, + "ce_orig": 0.9269500374794006, + "epoch": 0.30023725645265653, + "kl_loss": 0.24998760223388672, + "loss_ib": 0.014296703040599823, + "step": 1044 + }, + { + "ce_ib": 11.359130859375, + "ce_orig": 1.4259446859359741, + "epoch": 0.30023725645265653, + "kl_loss": 0.24341662228107452, + "loss_ib": 0.013793298043310642, + "step": 1044 + }, + { + "ce_ib": 9.31595230102539, + "ce_orig": 0.473222553730011, + "epoch": 0.30023725645265653, + "kl_loss": 0.28317737579345703, + "loss_ib": 0.012147726491093636, + "step": 1044 + }, + { + "epoch": 0.3005248400316342, + "grad_norm": 0.09338007122278214, + "learning_rate": 9.873898360174972e-06, + "loss": 0.8916, + "step": 1045 + }, + { + "ce_ib": 3.8569414615631104, + "ce_orig": 0.659389853477478, + "epoch": 0.3005248400316342, + "kl_loss": 0.17949606478214264, + "loss_ib": 0.005651901941746473, + "step": 1045 + }, + { + "ce_ib": 4.244134902954102, + "ce_orig": 0.7671023607254028, + "epoch": 0.3005248400316342, + "kl_loss": 0.2857362926006317, + "loss_ib": 0.007101497612893581, + "step": 1045 + }, + { + "ce_ib": 8.260163307189941, + "ce_orig": 1.042970061302185, + "epoch": 0.3005248400316342, + "kl_loss": 0.36634838581085205, + "loss_ib": 0.011923646554350853, + "step": 1045 + }, + { + "ce_ib": 5.835524559020996, + "ce_orig": 1.086169958114624, + "epoch": 0.3005248400316342, + "kl_loss": 0.26493754982948303, + "loss_ib": 0.008484899997711182, + "step": 1045 + }, + { + "ce_ib": 5.560356140136719, + "ce_orig": 0.6846012473106384, + "epoch": 0.30081242361061183, + "kl_loss": 0.23671673238277435, + "loss_ib": 0.00792752392590046, + "step": 1046 + }, + { + "ce_ib": 8.212461471557617, + "ce_orig": 1.0910207033157349, + "epoch": 0.30081242361061183, + "kl_loss": 0.25429821014404297, + "loss_ib": 0.010755443014204502, + "step": 1046 + }, + { + "ce_ib": 8.125418663024902, + "ce_orig": 1.192816972732544, + "epoch": 0.30081242361061183, + "kl_loss": 0.3981271982192993, + "loss_ib": 0.012106690555810928, + "step": 1046 + }, + { + "ce_ib": 7.482778549194336, + "ce_orig": 1.0049912929534912, + "epoch": 0.30081242361061183, + "kl_loss": 0.3500697612762451, + "loss_ib": 0.010983476415276527, + "step": 1046 + }, + { + "ce_ib": 8.525084495544434, + "ce_orig": 0.8908082842826843, + "epoch": 0.30110000718958946, + "kl_loss": 0.31901606917381287, + "loss_ib": 0.011715245433151722, + "step": 1047 + }, + { + "ce_ib": 9.571982383728027, + "ce_orig": 1.1031574010849, + "epoch": 0.30110000718958946, + "kl_loss": 0.49426859617233276, + "loss_ib": 0.01451466791331768, + "step": 1047 + }, + { + "ce_ib": 5.504137992858887, + "ce_orig": 0.6715264916419983, + "epoch": 0.30110000718958946, + "kl_loss": 0.2991946339607239, + "loss_ib": 0.008496084250509739, + "step": 1047 + }, + { + "ce_ib": 7.694691181182861, + "ce_orig": 0.9191931486129761, + "epoch": 0.30110000718958946, + "kl_loss": 0.48156046867370605, + "loss_ib": 0.012510295957326889, + "step": 1047 + }, + { + "ce_ib": 5.405656814575195, + "ce_orig": 0.42357781529426575, + "epoch": 0.30138759076856714, + "kl_loss": 0.29605257511138916, + "loss_ib": 0.008366182446479797, + "step": 1048 + }, + { + "ce_ib": 6.5060834884643555, + "ce_orig": 0.7840076088905334, + "epoch": 0.30138759076856714, + "kl_loss": 0.22564014792442322, + "loss_ib": 0.008762484416365623, + "step": 1048 + }, + { + "ce_ib": 6.392003059387207, + "ce_orig": 0.6996757388114929, + "epoch": 0.30138759076856714, + "kl_loss": 0.23019085824489594, + "loss_ib": 0.00869391206651926, + "step": 1048 + }, + { + "ce_ib": 9.806346893310547, + "ce_orig": 1.645071029663086, + "epoch": 0.30138759076856714, + "kl_loss": 0.302403062582016, + "loss_ib": 0.01283037755638361, + "step": 1048 + }, + { + "ce_ib": 7.08400821685791, + "ce_orig": 0.5776684880256653, + "epoch": 0.30167517434754476, + "kl_loss": 0.23051565885543823, + "loss_ib": 0.009389164857566357, + "step": 1049 + }, + { + "ce_ib": 8.618279457092285, + "ce_orig": 1.1826684474945068, + "epoch": 0.30167517434754476, + "kl_loss": 0.23479107022285461, + "loss_ib": 0.01096619013696909, + "step": 1049 + }, + { + "ce_ib": 12.606616020202637, + "ce_orig": 1.786026954650879, + "epoch": 0.30167517434754476, + "kl_loss": 0.4211004972457886, + "loss_ib": 0.016817620024085045, + "step": 1049 + }, + { + "ce_ib": 9.673046112060547, + "ce_orig": 1.1197701692581177, + "epoch": 0.30167517434754476, + "kl_loss": 0.42400917410850525, + "loss_ib": 0.013913137838244438, + "step": 1049 + }, + { + "epoch": 0.3019627579265224, + "grad_norm": 0.12706100940704346, + "learning_rate": 9.872160504968032e-06, + "loss": 0.958, + "step": 1050 + }, + { + "ce_ib": 7.350996494293213, + "ce_orig": 0.9169667363166809, + "epoch": 0.3019627579265224, + "kl_loss": 0.2554692029953003, + "loss_ib": 0.009905688464641571, + "step": 1050 + }, + { + "ce_ib": 5.9857635498046875, + "ce_orig": 0.606926679611206, + "epoch": 0.3019627579265224, + "kl_loss": 0.25890299677848816, + "loss_ib": 0.008574793115258217, + "step": 1050 + }, + { + "ce_ib": 7.011756420135498, + "ce_orig": 0.5373194217681885, + "epoch": 0.3019627579265224, + "kl_loss": 0.29745566844940186, + "loss_ib": 0.009986313059926033, + "step": 1050 + }, + { + "ce_ib": 8.080672264099121, + "ce_orig": 1.0620334148406982, + "epoch": 0.3019627579265224, + "kl_loss": 0.31463325023651123, + "loss_ib": 0.01122700423002243, + "step": 1050 + }, + { + "ce_ib": 4.345581531524658, + "ce_orig": 0.7046716809272766, + "epoch": 0.3022503415055, + "kl_loss": 0.17745816707611084, + "loss_ib": 0.006120163016021252, + "step": 1051 + }, + { + "ce_ib": 6.712942600250244, + "ce_orig": 0.8896129131317139, + "epoch": 0.3022503415055, + "kl_loss": 0.25029951333999634, + "loss_ib": 0.009215937927365303, + "step": 1051 + }, + { + "ce_ib": 6.059515953063965, + "ce_orig": 0.8851151466369629, + "epoch": 0.3022503415055, + "kl_loss": 0.24742120504379272, + "loss_ib": 0.008533728308975697, + "step": 1051 + }, + { + "ce_ib": 6.032464981079102, + "ce_orig": 0.6225546002388, + "epoch": 0.3022503415055, + "kl_loss": 0.293154776096344, + "loss_ib": 0.008964012376964092, + "step": 1051 + }, + { + "ce_ib": 9.237442970275879, + "ce_orig": 1.07052743434906, + "epoch": 0.3025379250844777, + "kl_loss": 0.3022955656051636, + "loss_ib": 0.01226039882749319, + "step": 1052 + }, + { + "ce_ib": 6.975470066070557, + "ce_orig": 0.6021490097045898, + "epoch": 0.3025379250844777, + "kl_loss": 0.23589861392974854, + "loss_ib": 0.00933445617556572, + "step": 1052 + }, + { + "ce_ib": 4.114511489868164, + "ce_orig": 0.4750274419784546, + "epoch": 0.3025379250844777, + "kl_loss": 0.5271556377410889, + "loss_ib": 0.00938606821000576, + "step": 1052 + }, + { + "ce_ib": 6.8197221755981445, + "ce_orig": 1.1514414548873901, + "epoch": 0.3025379250844777, + "kl_loss": 0.24803856015205383, + "loss_ib": 0.009300108067691326, + "step": 1052 + }, + { + "ce_ib": 11.652617454528809, + "ce_orig": 1.5600248575210571, + "epoch": 0.3028255086634553, + "kl_loss": 0.24478089809417725, + "loss_ib": 0.014100425876677036, + "step": 1053 + }, + { + "ce_ib": 5.302700996398926, + "ce_orig": 0.6378918886184692, + "epoch": 0.3028255086634553, + "kl_loss": 0.2648119330406189, + "loss_ib": 0.007950820028781891, + "step": 1053 + }, + { + "ce_ib": 6.452362537384033, + "ce_orig": 1.062257170677185, + "epoch": 0.3028255086634553, + "kl_loss": 0.30296024680137634, + "loss_ib": 0.009481964632868767, + "step": 1053 + }, + { + "ce_ib": 6.938072681427002, + "ce_orig": 1.032331943511963, + "epoch": 0.3028255086634553, + "kl_loss": 0.3145177960395813, + "loss_ib": 0.01008325070142746, + "step": 1053 + }, + { + "ce_ib": 7.614224910736084, + "ce_orig": 0.9548846483230591, + "epoch": 0.30311309224243294, + "kl_loss": 0.4099164307117462, + "loss_ib": 0.011713389307260513, + "step": 1054 + }, + { + "ce_ib": 8.340744972229004, + "ce_orig": 1.0289732217788696, + "epoch": 0.30311309224243294, + "kl_loss": 0.3351021409034729, + "loss_ib": 0.011691765859723091, + "step": 1054 + }, + { + "ce_ib": 5.127655506134033, + "ce_orig": 0.7370550632476807, + "epoch": 0.30311309224243294, + "kl_loss": 0.2200349122285843, + "loss_ib": 0.007328004576265812, + "step": 1054 + }, + { + "ce_ib": 7.544470310211182, + "ce_orig": 0.5004482269287109, + "epoch": 0.30311309224243294, + "kl_loss": 0.33762672543525696, + "loss_ib": 0.010920737870037556, + "step": 1054 + }, + { + "epoch": 0.3034006758214106, + "grad_norm": 0.10111220180988312, + "learning_rate": 9.870410911595581e-06, + "loss": 0.8707, + "step": 1055 + }, + { + "ce_ib": 8.684142112731934, + "ce_orig": 0.7165521383285522, + "epoch": 0.3034006758214106, + "kl_loss": 0.3941863477230072, + "loss_ib": 0.012626005336642265, + "step": 1055 + }, + { + "ce_ib": 6.7958879470825195, + "ce_orig": 0.8354387879371643, + "epoch": 0.3034006758214106, + "kl_loss": 0.20312602818012238, + "loss_ib": 0.008827148005366325, + "step": 1055 + }, + { + "ce_ib": 4.682708263397217, + "ce_orig": 0.6456162929534912, + "epoch": 0.3034006758214106, + "kl_loss": 0.21023279428482056, + "loss_ib": 0.006785036064684391, + "step": 1055 + }, + { + "ce_ib": 3.8527894020080566, + "ce_orig": 0.7322303056716919, + "epoch": 0.3034006758214106, + "kl_loss": 0.19984376430511475, + "loss_ib": 0.005851226858794689, + "step": 1055 + }, + { + "ce_ib": 12.782026290893555, + "ce_orig": 1.3153671026229858, + "epoch": 0.30368825940038824, + "kl_loss": 0.2544756233692169, + "loss_ib": 0.015326782129704952, + "step": 1056 + }, + { + "ce_ib": 5.77595329284668, + "ce_orig": 0.6023581624031067, + "epoch": 0.30368825940038824, + "kl_loss": 0.25102975964546204, + "loss_ib": 0.00828625075519085, + "step": 1056 + }, + { + "ce_ib": 7.34418249130249, + "ce_orig": 0.7453755140304565, + "epoch": 0.30368825940038824, + "kl_loss": 0.2862794101238251, + "loss_ib": 0.010206976905465126, + "step": 1056 + }, + { + "ce_ib": 4.816280364990234, + "ce_orig": 0.5178154110908508, + "epoch": 0.30368825940038824, + "kl_loss": 0.23472407460212708, + "loss_ib": 0.007163520902395248, + "step": 1056 + }, + { + "ce_ib": 5.181397438049316, + "ce_orig": 0.48226651549339294, + "epoch": 0.30397584297936586, + "kl_loss": 0.3203040063381195, + "loss_ib": 0.008384437300264835, + "step": 1057 + }, + { + "ce_ib": 9.590699195861816, + "ce_orig": 0.9705502986907959, + "epoch": 0.30397584297936586, + "kl_loss": 0.19820456206798553, + "loss_ib": 0.011572744697332382, + "step": 1057 + }, + { + "ce_ib": 8.903388977050781, + "ce_orig": 1.0351887941360474, + "epoch": 0.30397584297936586, + "kl_loss": 0.4149536192417145, + "loss_ib": 0.013052924536168575, + "step": 1057 + }, + { + "ce_ib": 4.388928413391113, + "ce_orig": 0.3223065733909607, + "epoch": 0.30397584297936586, + "kl_loss": 0.5553755760192871, + "loss_ib": 0.009942684322595596, + "step": 1057 + }, + { + "ce_ib": 6.448616027832031, + "ce_orig": 0.42247146368026733, + "epoch": 0.30426342655834354, + "kl_loss": 0.267733633518219, + "loss_ib": 0.009125952608883381, + "step": 1058 + }, + { + "ce_ib": 4.839138507843018, + "ce_orig": 0.40205830335617065, + "epoch": 0.30426342655834354, + "kl_loss": 0.24600833654403687, + "loss_ib": 0.007299221586436033, + "step": 1058 + }, + { + "ce_ib": 4.3870015144348145, + "ce_orig": 0.6392689347267151, + "epoch": 0.30426342655834354, + "kl_loss": 0.26623180508613586, + "loss_ib": 0.007049319799989462, + "step": 1058 + }, + { + "ce_ib": 5.980587959289551, + "ce_orig": 0.5612409710884094, + "epoch": 0.30426342655834354, + "kl_loss": 0.3410719931125641, + "loss_ib": 0.009391307830810547, + "step": 1058 + }, + { + "ce_ib": 9.207480430603027, + "ce_orig": 0.8099052309989929, + "epoch": 0.30455101013732117, + "kl_loss": 0.316256046295166, + "loss_ib": 0.012370039708912373, + "step": 1059 + }, + { + "ce_ib": 7.6445631980896, + "ce_orig": 0.9055308699607849, + "epoch": 0.30455101013732117, + "kl_loss": 0.25548362731933594, + "loss_ib": 0.01019939873367548, + "step": 1059 + }, + { + "ce_ib": 2.876786231994629, + "ce_orig": 0.6097726821899414, + "epoch": 0.30455101013732117, + "kl_loss": 0.1825810670852661, + "loss_ib": 0.004702596925199032, + "step": 1059 + }, + { + "ce_ib": 11.932756423950195, + "ce_orig": 1.4808422327041626, + "epoch": 0.30455101013732117, + "kl_loss": 0.32290422916412354, + "loss_ib": 0.015161799266934395, + "step": 1059 + }, + { + "epoch": 0.3048385937162988, + "grad_norm": 0.08608844131231308, + "learning_rate": 9.8686495842728e-06, + "loss": 0.8409, + "step": 1060 + }, + { + "ce_ib": 12.914449691772461, + "ce_orig": 1.5343337059020996, + "epoch": 0.3048385937162988, + "kl_loss": 0.22788041830062866, + "loss_ib": 0.015193254686892033, + "step": 1060 + }, + { + "ce_ib": 5.87794303894043, + "ce_orig": 0.774178147315979, + "epoch": 0.3048385937162988, + "kl_loss": 0.1882862150669098, + "loss_ib": 0.007760804612189531, + "step": 1060 + }, + { + "ce_ib": 8.641247749328613, + "ce_orig": 0.8724406957626343, + "epoch": 0.3048385937162988, + "kl_loss": 0.29896390438079834, + "loss_ib": 0.011630886234343052, + "step": 1060 + }, + { + "ce_ib": 2.8925094604492188, + "ce_orig": 0.3218468129634857, + "epoch": 0.3048385937162988, + "kl_loss": 0.6263114213943481, + "loss_ib": 0.009155623614788055, + "step": 1060 + }, + { + "ce_ib": 10.991764068603516, + "ce_orig": 1.4918473958969116, + "epoch": 0.3051261772952764, + "kl_loss": 0.2560180425643921, + "loss_ib": 0.013551943935453892, + "step": 1061 + }, + { + "ce_ib": 7.894521236419678, + "ce_orig": 0.41926810145378113, + "epoch": 0.3051261772952764, + "kl_loss": 0.28496524691581726, + "loss_ib": 0.01074417307972908, + "step": 1061 + }, + { + "ce_ib": 11.378397941589355, + "ce_orig": 1.5567322969436646, + "epoch": 0.3051261772952764, + "kl_loss": 0.35284626483917236, + "loss_ib": 0.014906859956681728, + "step": 1061 + }, + { + "ce_ib": 8.901268005371094, + "ce_orig": 0.9377511143684387, + "epoch": 0.3051261772952764, + "kl_loss": 0.44969889521598816, + "loss_ib": 0.013398257084190845, + "step": 1061 + }, + { + "ce_ib": 4.749931812286377, + "ce_orig": 0.5768422484397888, + "epoch": 0.3054137608742541, + "kl_loss": 0.31295904517173767, + "loss_ib": 0.007879522629082203, + "step": 1062 + }, + { + "ce_ib": 6.151778697967529, + "ce_orig": 0.9215263724327087, + "epoch": 0.3054137608742541, + "kl_loss": 0.26666373014450073, + "loss_ib": 0.00881841592490673, + "step": 1062 + }, + { + "ce_ib": 7.430096626281738, + "ce_orig": 0.7150664925575256, + "epoch": 0.3054137608742541, + "kl_loss": 0.21466025710105896, + "loss_ib": 0.009576699696481228, + "step": 1062 + }, + { + "ce_ib": 10.958990097045898, + "ce_orig": 1.4779627323150635, + "epoch": 0.3054137608742541, + "kl_loss": 0.5556790232658386, + "loss_ib": 0.01651577837765217, + "step": 1062 + }, + { + "ce_ib": 5.780200004577637, + "ce_orig": 0.5452198386192322, + "epoch": 0.3057013444532317, + "kl_loss": 0.27294105291366577, + "loss_ib": 0.008509610779583454, + "step": 1063 + }, + { + "ce_ib": 5.6268839836120605, + "ce_orig": 0.4185033440589905, + "epoch": 0.3057013444532317, + "kl_loss": 0.22825220227241516, + "loss_ib": 0.00790940597653389, + "step": 1063 + }, + { + "ce_ib": 7.02670431137085, + "ce_orig": 0.8597890734672546, + "epoch": 0.3057013444532317, + "kl_loss": 0.19926466047763824, + "loss_ib": 0.00901935063302517, + "step": 1063 + }, + { + "ce_ib": 6.201532363891602, + "ce_orig": 0.6214030385017395, + "epoch": 0.3057013444532317, + "kl_loss": 0.17251111567020416, + "loss_ib": 0.007926642894744873, + "step": 1063 + }, + { + "ce_ib": 8.757668495178223, + "ce_orig": 0.7274843454360962, + "epoch": 0.30598892803220934, + "kl_loss": 0.2544369101524353, + "loss_ib": 0.011302037164568901, + "step": 1064 + }, + { + "ce_ib": 6.019333362579346, + "ce_orig": 0.53138267993927, + "epoch": 0.30598892803220934, + "kl_loss": 0.2464289516210556, + "loss_ib": 0.008483623154461384, + "step": 1064 + }, + { + "ce_ib": 11.079992294311523, + "ce_orig": 1.1950740814208984, + "epoch": 0.30598892803220934, + "kl_loss": 0.2509814202785492, + "loss_ib": 0.01358980592340231, + "step": 1064 + }, + { + "ce_ib": 5.924220085144043, + "ce_orig": 0.6483764052391052, + "epoch": 0.30598892803220934, + "kl_loss": 0.20502346754074097, + "loss_ib": 0.007974454201757908, + "step": 1064 + }, + { + "epoch": 0.306276511611187, + "grad_norm": 0.09530378878116608, + "learning_rate": 9.86687652724313e-06, + "loss": 0.8737, + "step": 1065 + }, + { + "ce_ib": 8.690861701965332, + "ce_orig": 0.8475526571273804, + "epoch": 0.306276511611187, + "kl_loss": 0.35495179891586304, + "loss_ib": 0.012240380048751831, + "step": 1065 + }, + { + "ce_ib": 8.537470817565918, + "ce_orig": 1.1267801523208618, + "epoch": 0.306276511611187, + "kl_loss": 0.20950856804847717, + "loss_ib": 0.010632556863129139, + "step": 1065 + }, + { + "ce_ib": 6.342406749725342, + "ce_orig": 0.5704479813575745, + "epoch": 0.306276511611187, + "kl_loss": 0.24826784431934357, + "loss_ib": 0.008825085125863552, + "step": 1065 + }, + { + "ce_ib": 9.24067211151123, + "ce_orig": 0.8120705485343933, + "epoch": 0.306276511611187, + "kl_loss": 0.287699818611145, + "loss_ib": 0.012117668986320496, + "step": 1065 + }, + { + "ce_ib": 5.934770107269287, + "ce_orig": 0.538998544216156, + "epoch": 0.30656409519016464, + "kl_loss": 0.21416616439819336, + "loss_ib": 0.008076431229710579, + "step": 1066 + }, + { + "ce_ib": 11.81325912475586, + "ce_orig": 2.007389783859253, + "epoch": 0.30656409519016464, + "kl_loss": 0.33129292726516724, + "loss_ib": 0.015126187354326248, + "step": 1066 + }, + { + "ce_ib": 6.053503513336182, + "ce_orig": 0.6393458843231201, + "epoch": 0.30656409519016464, + "kl_loss": 0.30956026911735535, + "loss_ib": 0.009149106219410896, + "step": 1066 + }, + { + "ce_ib": 6.03159236907959, + "ce_orig": 0.9210621118545532, + "epoch": 0.30656409519016464, + "kl_loss": 0.23238983750343323, + "loss_ib": 0.008355490863323212, + "step": 1066 + }, + { + "ce_ib": 8.668764114379883, + "ce_orig": 0.8384073376655579, + "epoch": 0.30685167876914227, + "kl_loss": 0.30814939737319946, + "loss_ib": 0.011750257574021816, + "step": 1067 + }, + { + "ce_ib": 6.8552374839782715, + "ce_orig": 0.7925436496734619, + "epoch": 0.30685167876914227, + "kl_loss": 0.22319209575653076, + "loss_ib": 0.009087158367037773, + "step": 1067 + }, + { + "ce_ib": 11.231310844421387, + "ce_orig": 1.3585479259490967, + "epoch": 0.30685167876914227, + "kl_loss": 0.2911950349807739, + "loss_ib": 0.014143262058496475, + "step": 1067 + }, + { + "ce_ib": 8.62575912475586, + "ce_orig": 1.0265886783599854, + "epoch": 0.30685167876914227, + "kl_loss": 0.3105778992176056, + "loss_ib": 0.01173153892159462, + "step": 1067 + }, + { + "ce_ib": 7.522292137145996, + "ce_orig": 0.8019500374794006, + "epoch": 0.30713926234811995, + "kl_loss": 0.4065636992454529, + "loss_ib": 0.011587929911911488, + "step": 1068 + }, + { + "ce_ib": 8.296854972839355, + "ce_orig": 0.5768155455589294, + "epoch": 0.30713926234811995, + "kl_loss": 0.35958027839660645, + "loss_ib": 0.011892656795680523, + "step": 1068 + }, + { + "ce_ib": 4.952488899230957, + "ce_orig": 0.6868072748184204, + "epoch": 0.30713926234811995, + "kl_loss": 0.21061554551124573, + "loss_ib": 0.007058644201606512, + "step": 1068 + }, + { + "ce_ib": 7.091208457946777, + "ce_orig": 1.1439430713653564, + "epoch": 0.30713926234811995, + "kl_loss": 0.2813361585140228, + "loss_ib": 0.009904569946229458, + "step": 1068 + }, + { + "ce_ib": 9.432252883911133, + "ce_orig": 1.3456711769104004, + "epoch": 0.30742684592709757, + "kl_loss": 0.38078808784484863, + "loss_ib": 0.013240134343504906, + "step": 1069 + }, + { + "ce_ib": 7.201338291168213, + "ce_orig": 1.095965027809143, + "epoch": 0.30742684592709757, + "kl_loss": 0.32409659028053284, + "loss_ib": 0.01044230442494154, + "step": 1069 + }, + { + "ce_ib": 7.40519905090332, + "ce_orig": 1.0274244546890259, + "epoch": 0.30742684592709757, + "kl_loss": 0.25500303506851196, + "loss_ib": 0.009955229237675667, + "step": 1069 + }, + { + "ce_ib": 8.46408462524414, + "ce_orig": 0.7477757930755615, + "epoch": 0.30742684592709757, + "kl_loss": 0.24867352843284607, + "loss_ib": 0.01095082052052021, + "step": 1069 + }, + { + "epoch": 0.3077144295060752, + "grad_norm": 0.11164247989654541, + "learning_rate": 9.865091744778281e-06, + "loss": 0.9093, + "step": 1070 + }, + { + "ce_ib": 8.997332572937012, + "ce_orig": 1.2727254629135132, + "epoch": 0.3077144295060752, + "kl_loss": 0.28486955165863037, + "loss_ib": 0.011846027337014675, + "step": 1070 + }, + { + "ce_ib": 8.73849868774414, + "ce_orig": 0.8974448442459106, + "epoch": 0.3077144295060752, + "kl_loss": 0.28219351172447205, + "loss_ib": 0.01156043354421854, + "step": 1070 + }, + { + "ce_ib": 6.766753196716309, + "ce_orig": 0.8781928420066833, + "epoch": 0.3077144295060752, + "kl_loss": 0.2647661864757538, + "loss_ib": 0.009414414875209332, + "step": 1070 + }, + { + "ce_ib": 7.04093074798584, + "ce_orig": 0.9725215435028076, + "epoch": 0.3077144295060752, + "kl_loss": 0.22904753684997559, + "loss_ib": 0.009331406094133854, + "step": 1070 + }, + { + "ce_ib": 5.76390266418457, + "ce_orig": 0.3865777850151062, + "epoch": 0.3080020130850528, + "kl_loss": 0.41168057918548584, + "loss_ib": 0.009880708530545235, + "step": 1071 + }, + { + "ce_ib": 7.124619007110596, + "ce_orig": 1.183470368385315, + "epoch": 0.3080020130850528, + "kl_loss": 0.25442296266555786, + "loss_ib": 0.009668848477303982, + "step": 1071 + }, + { + "ce_ib": 8.085482597351074, + "ce_orig": 1.3335832357406616, + "epoch": 0.3080020130850528, + "kl_loss": 0.25079238414764404, + "loss_ib": 0.010593406856060028, + "step": 1071 + }, + { + "ce_ib": 5.582581043243408, + "ce_orig": 0.6214055418968201, + "epoch": 0.3080020130850528, + "kl_loss": 0.32632315158843994, + "loss_ib": 0.008845812641084194, + "step": 1071 + }, + { + "ce_ib": 9.020593643188477, + "ce_orig": 1.0598443746566772, + "epoch": 0.3082895966640305, + "kl_loss": 0.2390134483575821, + "loss_ib": 0.011410728096961975, + "step": 1072 + }, + { + "ce_ib": 4.355950355529785, + "ce_orig": 0.5563782453536987, + "epoch": 0.3082895966640305, + "kl_loss": 0.44233155250549316, + "loss_ib": 0.00877926591783762, + "step": 1072 + }, + { + "ce_ib": 5.475048542022705, + "ce_orig": 0.8107210397720337, + "epoch": 0.3082895966640305, + "kl_loss": 0.29146504402160645, + "loss_ib": 0.008389698341488838, + "step": 1072 + }, + { + "ce_ib": 9.553082466125488, + "ce_orig": 1.393612265586853, + "epoch": 0.3082895966640305, + "kl_loss": 0.47753843665122986, + "loss_ib": 0.014328466728329659, + "step": 1072 + }, + { + "ce_ib": 4.682051658630371, + "ce_orig": 0.6672455072402954, + "epoch": 0.3085771802430081, + "kl_loss": 0.23742368817329407, + "loss_ib": 0.007056288421154022, + "step": 1073 + }, + { + "ce_ib": 9.294081687927246, + "ce_orig": 1.250988245010376, + "epoch": 0.3085771802430081, + "kl_loss": 0.24257344007492065, + "loss_ib": 0.01171981543302536, + "step": 1073 + }, + { + "ce_ib": 6.823581695556641, + "ce_orig": 0.5856737494468689, + "epoch": 0.3085771802430081, + "kl_loss": 0.4416767358779907, + "loss_ib": 0.011240348219871521, + "step": 1073 + }, + { + "ce_ib": 8.78899097442627, + "ce_orig": 1.3411270380020142, + "epoch": 0.3085771802430081, + "kl_loss": 0.1991458237171173, + "loss_ib": 0.010780449025332928, + "step": 1073 + }, + { + "ce_ib": 10.130610466003418, + "ce_orig": 1.481398105621338, + "epoch": 0.30886476382198574, + "kl_loss": 0.21280843019485474, + "loss_ib": 0.01225869357585907, + "step": 1074 + }, + { + "ce_ib": 6.745814323425293, + "ce_orig": 0.6256406903266907, + "epoch": 0.30886476382198574, + "kl_loss": 0.21133030951023102, + "loss_ib": 0.008859117515385151, + "step": 1074 + }, + { + "ce_ib": 4.382914066314697, + "ce_orig": 0.5915647149085999, + "epoch": 0.30886476382198574, + "kl_loss": 0.2159290462732315, + "loss_ib": 0.006542204413563013, + "step": 1074 + }, + { + "ce_ib": 9.640929222106934, + "ce_orig": 0.9657080769538879, + "epoch": 0.30886476382198574, + "kl_loss": 0.27724915742874146, + "loss_ib": 0.012413420714437962, + "step": 1074 + }, + { + "epoch": 0.3091523474009634, + "grad_norm": 0.09821418672800064, + "learning_rate": 9.863295241178207e-06, + "loss": 0.9336, + "step": 1075 + }, + { + "ce_ib": 10.311457633972168, + "ce_orig": 1.1121739149093628, + "epoch": 0.3091523474009634, + "kl_loss": 0.23806434869766235, + "loss_ib": 0.012692100368440151, + "step": 1075 + }, + { + "ce_ib": 5.269136428833008, + "ce_orig": 0.8820784091949463, + "epoch": 0.3091523474009634, + "kl_loss": 0.22836144268512726, + "loss_ib": 0.007552750408649445, + "step": 1075 + }, + { + "ce_ib": 10.771013259887695, + "ce_orig": 1.2149564027786255, + "epoch": 0.3091523474009634, + "kl_loss": 0.2552693784236908, + "loss_ib": 0.0133237075060606, + "step": 1075 + }, + { + "ce_ib": 4.205135822296143, + "ce_orig": 0.5866997241973877, + "epoch": 0.3091523474009634, + "kl_loss": 0.2929501533508301, + "loss_ib": 0.007134637795388699, + "step": 1075 + }, + { + "ce_ib": 4.309512615203857, + "ce_orig": 0.6440024375915527, + "epoch": 0.30943993097994105, + "kl_loss": 0.19797396659851074, + "loss_ib": 0.006289252080023289, + "step": 1076 + }, + { + "ce_ib": 9.057791709899902, + "ce_orig": 0.7737462520599365, + "epoch": 0.30943993097994105, + "kl_loss": 0.23361043632030487, + "loss_ib": 0.01139389630407095, + "step": 1076 + }, + { + "ce_ib": 3.5322015285491943, + "ce_orig": 0.4126538336277008, + "epoch": 0.30943993097994105, + "kl_loss": 0.18980346620082855, + "loss_ib": 0.005430236458778381, + "step": 1076 + }, + { + "ce_ib": 7.820254325866699, + "ce_orig": 1.1803672313690186, + "epoch": 0.30943993097994105, + "kl_loss": 0.5156011581420898, + "loss_ib": 0.012976265512406826, + "step": 1076 + }, + { + "ce_ib": 10.2060546875, + "ce_orig": 1.340649127960205, + "epoch": 0.30972751455891867, + "kl_loss": 0.6236756443977356, + "loss_ib": 0.016442811116576195, + "step": 1077 + }, + { + "ce_ib": 6.512198448181152, + "ce_orig": 0.6726751923561096, + "epoch": 0.30972751455891867, + "kl_loss": 0.22547681629657745, + "loss_ib": 0.008766965940594673, + "step": 1077 + }, + { + "ce_ib": 7.2397565841674805, + "ce_orig": 1.0542415380477905, + "epoch": 0.30972751455891867, + "kl_loss": 0.3293522000312805, + "loss_ib": 0.010533277876675129, + "step": 1077 + }, + { + "ce_ib": 4.1594157218933105, + "ce_orig": 0.7226054072380066, + "epoch": 0.30972751455891867, + "kl_loss": 0.16181252896785736, + "loss_ib": 0.0057775406166911125, + "step": 1077 + }, + { + "ce_ib": 7.226589679718018, + "ce_orig": 0.8086729049682617, + "epoch": 0.31001509813789635, + "kl_loss": 0.29907116293907166, + "loss_ib": 0.010217301547527313, + "step": 1078 + }, + { + "ce_ib": 5.652985572814941, + "ce_orig": 0.8123196959495544, + "epoch": 0.31001509813789635, + "kl_loss": 0.21832415461540222, + "loss_ib": 0.007836227305233479, + "step": 1078 + }, + { + "ce_ib": 5.190840721130371, + "ce_orig": 0.6470297574996948, + "epoch": 0.31001509813789635, + "kl_loss": 0.379181444644928, + "loss_ib": 0.008982655592262745, + "step": 1078 + }, + { + "ce_ib": 8.699675559997559, + "ce_orig": 0.8381361961364746, + "epoch": 0.31001509813789635, + "kl_loss": 0.18951405584812164, + "loss_ib": 0.010594815947115421, + "step": 1078 + }, + { + "ce_ib": 6.517348289489746, + "ce_orig": 0.3661119043827057, + "epoch": 0.310302681716874, + "kl_loss": 0.2933904826641083, + "loss_ib": 0.009451253339648247, + "step": 1079 + }, + { + "ce_ib": 7.789237022399902, + "ce_orig": 0.814723014831543, + "epoch": 0.310302681716874, + "kl_loss": 0.4003676772117615, + "loss_ib": 0.01179291307926178, + "step": 1079 + }, + { + "ce_ib": 5.38702392578125, + "ce_orig": 0.5761942267417908, + "epoch": 0.310302681716874, + "kl_loss": 0.3325355648994446, + "loss_ib": 0.008712380193173885, + "step": 1079 + }, + { + "ce_ib": 7.839290142059326, + "ce_orig": 0.7794978618621826, + "epoch": 0.310302681716874, + "kl_loss": 0.2755085229873657, + "loss_ib": 0.010594374500215054, + "step": 1079 + }, + { + "epoch": 0.3105902652958516, + "grad_norm": 0.09332282096147537, + "learning_rate": 9.861487020771103e-06, + "loss": 0.8683, + "step": 1080 + }, + { + "ce_ib": 7.493288516998291, + "ce_orig": 0.9413217902183533, + "epoch": 0.3105902652958516, + "kl_loss": 0.2304474264383316, + "loss_ib": 0.009797762148082256, + "step": 1080 + }, + { + "ce_ib": 5.598116397857666, + "ce_orig": 1.187414526939392, + "epoch": 0.3105902652958516, + "kl_loss": 0.26155588030815125, + "loss_ib": 0.008213674649596214, + "step": 1080 + }, + { + "ce_ib": 9.935769081115723, + "ce_orig": 0.690135657787323, + "epoch": 0.3105902652958516, + "kl_loss": 0.3921072781085968, + "loss_ib": 0.013856842182576656, + "step": 1080 + }, + { + "ce_ib": 7.49739408493042, + "ce_orig": 0.7895194888114929, + "epoch": 0.3105902652958516, + "kl_loss": 0.1883796751499176, + "loss_ib": 0.009381189942359924, + "step": 1080 + }, + { + "ce_ib": 2.7070467472076416, + "ce_orig": 0.19051028788089752, + "epoch": 0.3108778488748292, + "kl_loss": 0.7031688690185547, + "loss_ib": 0.009738734923303127, + "step": 1081 + }, + { + "ce_ib": 10.460716247558594, + "ce_orig": 0.8958859443664551, + "epoch": 0.3108778488748292, + "kl_loss": 0.2543383240699768, + "loss_ib": 0.013004099950194359, + "step": 1081 + }, + { + "ce_ib": 8.759541511535645, + "ce_orig": 0.8319806456565857, + "epoch": 0.3108778488748292, + "kl_loss": 0.2823876738548279, + "loss_ib": 0.01158341858536005, + "step": 1081 + }, + { + "ce_ib": 3.7187533378601074, + "ce_orig": 0.3965437114238739, + "epoch": 0.3108778488748292, + "kl_loss": 0.3348379135131836, + "loss_ib": 0.0070671322755515575, + "step": 1081 + }, + { + "ce_ib": 11.48133659362793, + "ce_orig": 0.8450151681900024, + "epoch": 0.3111654324538069, + "kl_loss": 0.2467035949230194, + "loss_ib": 0.013948372565209866, + "step": 1082 + }, + { + "ce_ib": 8.043100357055664, + "ce_orig": 1.2448742389678955, + "epoch": 0.3111654324538069, + "kl_loss": 0.23143544793128967, + "loss_ib": 0.010357454419136047, + "step": 1082 + }, + { + "ce_ib": 7.008208751678467, + "ce_orig": 0.859933614730835, + "epoch": 0.3111654324538069, + "kl_loss": 0.24393969774246216, + "loss_ib": 0.009447605349123478, + "step": 1082 + }, + { + "ce_ib": 8.620953559875488, + "ce_orig": 1.273354172706604, + "epoch": 0.3111654324538069, + "kl_loss": 0.33981454372406006, + "loss_ib": 0.012019098736345768, + "step": 1082 + }, + { + "ce_ib": 9.286676406860352, + "ce_orig": 1.2248402833938599, + "epoch": 0.3114530160327845, + "kl_loss": 0.24009215831756592, + "loss_ib": 0.011687598191201687, + "step": 1083 + }, + { + "ce_ib": 4.905697345733643, + "ce_orig": 0.5274173617362976, + "epoch": 0.3114530160327845, + "kl_loss": 0.2764824330806732, + "loss_ib": 0.0076705217361450195, + "step": 1083 + }, + { + "ce_ib": 7.331008434295654, + "ce_orig": 0.8421371579170227, + "epoch": 0.3114530160327845, + "kl_loss": 0.2732947766780853, + "loss_ib": 0.010063955560326576, + "step": 1083 + }, + { + "ce_ib": 5.719943046569824, + "ce_orig": 0.579391360282898, + "epoch": 0.3114530160327845, + "kl_loss": 0.3374432325363159, + "loss_ib": 0.009094375185668468, + "step": 1083 + }, + { + "ce_ib": 9.131670951843262, + "ce_orig": 1.3303252458572388, + "epoch": 0.31174059961176215, + "kl_loss": 0.33134493231773376, + "loss_ib": 0.012445120140910149, + "step": 1084 + }, + { + "ce_ib": 6.945981502532959, + "ce_orig": 1.103769063949585, + "epoch": 0.31174059961176215, + "kl_loss": 0.3207002580165863, + "loss_ib": 0.010152983479201794, + "step": 1084 + }, + { + "ce_ib": 6.838366508483887, + "ce_orig": 0.8066115379333496, + "epoch": 0.31174059961176215, + "kl_loss": 0.27075129747390747, + "loss_ib": 0.009545879438519478, + "step": 1084 + }, + { + "ce_ib": 12.763402938842773, + "ce_orig": 1.7998559474945068, + "epoch": 0.31174059961176215, + "kl_loss": 0.4378839135169983, + "loss_ib": 0.017142243683338165, + "step": 1084 + }, + { + "epoch": 0.3120281831907398, + "grad_norm": 0.12011191248893738, + "learning_rate": 9.85966708791339e-06, + "loss": 0.9255, + "step": 1085 + }, + { + "ce_ib": 6.058638572692871, + "ce_orig": 0.7367510199546814, + "epoch": 0.3120281831907398, + "kl_loss": 0.23104149103164673, + "loss_ib": 0.008369053713977337, + "step": 1085 + }, + { + "ce_ib": 4.6562724113464355, + "ce_orig": 0.5617725253105164, + "epoch": 0.3120281831907398, + "kl_loss": 0.203491672873497, + "loss_ib": 0.006691189482808113, + "step": 1085 + }, + { + "ce_ib": 7.416159152984619, + "ce_orig": 0.6727170348167419, + "epoch": 0.3120281831907398, + "kl_loss": 0.34073999524116516, + "loss_ib": 0.010823559947311878, + "step": 1085 + }, + { + "ce_ib": 5.643037796020508, + "ce_orig": 0.6726337671279907, + "epoch": 0.3120281831907398, + "kl_loss": 0.1980753391981125, + "loss_ib": 0.007623791694641113, + "step": 1085 + }, + { + "ce_ib": 6.168734073638916, + "ce_orig": 0.7488775849342346, + "epoch": 0.31231576676971745, + "kl_loss": 0.21899104118347168, + "loss_ib": 0.00835864432156086, + "step": 1086 + }, + { + "ce_ib": 9.138340950012207, + "ce_orig": 1.072446584701538, + "epoch": 0.31231576676971745, + "kl_loss": 0.2767692804336548, + "loss_ib": 0.011906033381819725, + "step": 1086 + }, + { + "ce_ib": 5.069515228271484, + "ce_orig": 0.5102606415748596, + "epoch": 0.31231576676971745, + "kl_loss": 0.30164480209350586, + "loss_ib": 0.008085963316261768, + "step": 1086 + }, + { + "ce_ib": 8.540528297424316, + "ce_orig": 1.3709053993225098, + "epoch": 0.31231576676971745, + "kl_loss": 0.2561246156692505, + "loss_ib": 0.011101774871349335, + "step": 1086 + }, + { + "ce_ib": 4.596883773803711, + "ce_orig": 0.5804041028022766, + "epoch": 0.3126033503486951, + "kl_loss": 0.20311373472213745, + "loss_ib": 0.006628020666539669, + "step": 1087 + }, + { + "ce_ib": 4.821422576904297, + "ce_orig": 0.7049044370651245, + "epoch": 0.3126033503486951, + "kl_loss": 0.2860710918903351, + "loss_ib": 0.007682133931666613, + "step": 1087 + }, + { + "ce_ib": 9.637624740600586, + "ce_orig": 1.2415283918380737, + "epoch": 0.3126033503486951, + "kl_loss": 0.5572246313095093, + "loss_ib": 0.015209870412945747, + "step": 1087 + }, + { + "ce_ib": 5.758790969848633, + "ce_orig": 0.865833044052124, + "epoch": 0.3126033503486951, + "kl_loss": 0.17076636850833893, + "loss_ib": 0.007466454524546862, + "step": 1087 + }, + { + "ce_ib": 5.545116901397705, + "ce_orig": 0.7929593920707703, + "epoch": 0.31289093392767275, + "kl_loss": 0.29858559370040894, + "loss_ib": 0.008530973456799984, + "step": 1088 + }, + { + "ce_ib": 7.397669315338135, + "ce_orig": 1.049553394317627, + "epoch": 0.31289093392767275, + "kl_loss": 0.3252887427806854, + "loss_ib": 0.010650557465851307, + "step": 1088 + }, + { + "ce_ib": 9.861873626708984, + "ce_orig": 1.445408821105957, + "epoch": 0.31289093392767275, + "kl_loss": 0.43143945932388306, + "loss_ib": 0.014176268130540848, + "step": 1088 + }, + { + "ce_ib": 7.751357555389404, + "ce_orig": 0.9703396558761597, + "epoch": 0.31289093392767275, + "kl_loss": 0.27120447158813477, + "loss_ib": 0.010463401675224304, + "step": 1088 + }, + { + "ce_ib": 9.846639633178711, + "ce_orig": 1.2402608394622803, + "epoch": 0.3131785175066504, + "kl_loss": 0.30426251888275146, + "loss_ib": 0.012889264151453972, + "step": 1089 + }, + { + "ce_ib": 4.732132911682129, + "ce_orig": 0.8199735879898071, + "epoch": 0.3131785175066504, + "kl_loss": 0.22151115536689758, + "loss_ib": 0.006947244051843882, + "step": 1089 + }, + { + "ce_ib": 7.251793384552002, + "ce_orig": 0.8341598510742188, + "epoch": 0.3131785175066504, + "kl_loss": 0.26620736718177795, + "loss_ib": 0.009913867339491844, + "step": 1089 + }, + { + "ce_ib": 9.6569242477417, + "ce_orig": 0.8773269057273865, + "epoch": 0.3131785175066504, + "kl_loss": 0.35142725706100464, + "loss_ib": 0.013171196915209293, + "step": 1089 + }, + { + "epoch": 0.313466101085628, + "grad_norm": 0.11186288297176361, + "learning_rate": 9.857835446989708e-06, + "loss": 0.9513, + "step": 1090 + }, + { + "ce_ib": 7.215015888214111, + "ce_orig": 0.9098507761955261, + "epoch": 0.313466101085628, + "kl_loss": 0.3162091076374054, + "loss_ib": 0.010377106256783009, + "step": 1090 + }, + { + "ce_ib": 4.774172306060791, + "ce_orig": 0.4562654495239258, + "epoch": 0.313466101085628, + "kl_loss": 0.5272875428199768, + "loss_ib": 0.010047046467661858, + "step": 1090 + }, + { + "ce_ib": 7.998179912567139, + "ce_orig": 0.6721808910369873, + "epoch": 0.313466101085628, + "kl_loss": 0.2237975001335144, + "loss_ib": 0.010236154310405254, + "step": 1090 + }, + { + "ce_ib": 4.404753684997559, + "ce_orig": 0.8174905180931091, + "epoch": 0.313466101085628, + "kl_loss": 0.17927923798561096, + "loss_ib": 0.0061975461430847645, + "step": 1090 + }, + { + "ce_ib": 5.590545654296875, + "ce_orig": 0.63374263048172, + "epoch": 0.3137536846646056, + "kl_loss": 0.22596238553524017, + "loss_ib": 0.007850169204175472, + "step": 1091 + }, + { + "ce_ib": 5.190075397491455, + "ce_orig": 0.49746814370155334, + "epoch": 0.3137536846646056, + "kl_loss": 0.5168180465698242, + "loss_ib": 0.010358256287872791, + "step": 1091 + }, + { + "ce_ib": 7.648189544677734, + "ce_orig": 0.8305013179779053, + "epoch": 0.3137536846646056, + "kl_loss": 0.2619606554508209, + "loss_ib": 0.010267795994877815, + "step": 1091 + }, + { + "ce_ib": 2.5894863605499268, + "ce_orig": 0.4961947202682495, + "epoch": 0.3137536846646056, + "kl_loss": 0.16488581895828247, + "loss_ib": 0.004238344728946686, + "step": 1091 + }, + { + "ce_ib": 6.8501973152160645, + "ce_orig": 0.9551085233688354, + "epoch": 0.3140412682435833, + "kl_loss": 0.22183682024478912, + "loss_ib": 0.00906856544315815, + "step": 1092 + }, + { + "ce_ib": 9.278849601745605, + "ce_orig": 1.0578449964523315, + "epoch": 0.3140412682435833, + "kl_loss": 0.46093541383743286, + "loss_ib": 0.013888203538954258, + "step": 1092 + }, + { + "ce_ib": 10.20980453491211, + "ce_orig": 1.130508303642273, + "epoch": 0.3140412682435833, + "kl_loss": 0.26771920919418335, + "loss_ib": 0.012886996380984783, + "step": 1092 + }, + { + "ce_ib": 7.884620666503906, + "ce_orig": 1.2247728109359741, + "epoch": 0.3140412682435833, + "kl_loss": 0.5485724210739136, + "loss_ib": 0.013370344415307045, + "step": 1092 + }, + { + "ce_ib": 4.926917552947998, + "ce_orig": 0.620137631893158, + "epoch": 0.31432885182256093, + "kl_loss": 0.23593732714653015, + "loss_ib": 0.007286291103810072, + "step": 1093 + }, + { + "ce_ib": 6.589530944824219, + "ce_orig": 0.5658002495765686, + "epoch": 0.31432885182256093, + "kl_loss": 0.36354243755340576, + "loss_ib": 0.010224955156445503, + "step": 1093 + }, + { + "ce_ib": 7.028726100921631, + "ce_orig": 0.9482094049453735, + "epoch": 0.31432885182256093, + "kl_loss": 0.250606894493103, + "loss_ib": 0.009534794837236404, + "step": 1093 + }, + { + "ce_ib": 7.295685291290283, + "ce_orig": 0.8431223630905151, + "epoch": 0.31432885182256093, + "kl_loss": 0.8136886358261108, + "loss_ib": 0.015432571992278099, + "step": 1093 + }, + { + "ce_ib": 5.327012062072754, + "ce_orig": 0.7496179342269897, + "epoch": 0.31461643540153855, + "kl_loss": 0.32254457473754883, + "loss_ib": 0.008552457205951214, + "step": 1094 + }, + { + "ce_ib": 8.12031364440918, + "ce_orig": 0.6261737942695618, + "epoch": 0.31461643540153855, + "kl_loss": 0.2707287669181824, + "loss_ib": 0.010827600955963135, + "step": 1094 + }, + { + "ce_ib": 7.939435005187988, + "ce_orig": 0.72379070520401, + "epoch": 0.31461643540153855, + "kl_loss": 0.24098311364650726, + "loss_ib": 0.010349266231060028, + "step": 1094 + }, + { + "ce_ib": 5.902799606323242, + "ce_orig": 0.5440672636032104, + "epoch": 0.31461643540153855, + "kl_loss": 0.24652087688446045, + "loss_ib": 0.008368008770048618, + "step": 1094 + }, + { + "epoch": 0.31490401898051623, + "grad_norm": 0.09559078514575958, + "learning_rate": 9.855992102412909e-06, + "loss": 0.8071, + "step": 1095 + }, + { + "ce_ib": 8.018738746643066, + "ce_orig": 0.6169609427452087, + "epoch": 0.31490401898051623, + "kl_loss": 0.3646816313266754, + "loss_ib": 0.011665554717183113, + "step": 1095 + }, + { + "ce_ib": 5.268819808959961, + "ce_orig": 0.1188381165266037, + "epoch": 0.31490401898051623, + "kl_loss": 0.5084589123725891, + "loss_ib": 0.010353408753871918, + "step": 1095 + }, + { + "ce_ib": 6.112328052520752, + "ce_orig": 0.7286979556083679, + "epoch": 0.31490401898051623, + "kl_loss": 0.22665318846702576, + "loss_ib": 0.008378859609365463, + "step": 1095 + }, + { + "ce_ib": 9.162174224853516, + "ce_orig": 0.981454074382782, + "epoch": 0.31490401898051623, + "kl_loss": 0.23977544903755188, + "loss_ib": 0.011559928767383099, + "step": 1095 + }, + { + "ce_ib": 9.974848747253418, + "ce_orig": 1.2838889360427856, + "epoch": 0.31519160255949386, + "kl_loss": 0.3775405287742615, + "loss_ib": 0.01375025324523449, + "step": 1096 + }, + { + "ce_ib": 5.4543867111206055, + "ce_orig": 0.8588042855262756, + "epoch": 0.31519160255949386, + "kl_loss": 0.23723624646663666, + "loss_ib": 0.007826749235391617, + "step": 1096 + }, + { + "ce_ib": 6.729191303253174, + "ce_orig": 0.7287084460258484, + "epoch": 0.31519160255949386, + "kl_loss": 0.2046607881784439, + "loss_ib": 0.008775799535214901, + "step": 1096 + }, + { + "ce_ib": 6.898683071136475, + "ce_orig": 0.9344533681869507, + "epoch": 0.31519160255949386, + "kl_loss": 0.31471872329711914, + "loss_ib": 0.010045870207250118, + "step": 1096 + }, + { + "ce_ib": 3.085794687271118, + "ce_orig": 0.6212059855461121, + "epoch": 0.3154791861384715, + "kl_loss": 0.1684914082288742, + "loss_ib": 0.0047707087360322475, + "step": 1097 + }, + { + "ce_ib": 7.249536037445068, + "ce_orig": 0.6098146438598633, + "epoch": 0.3154791861384715, + "kl_loss": 0.4129192531108856, + "loss_ib": 0.011378727853298187, + "step": 1097 + }, + { + "ce_ib": 10.709492683410645, + "ce_orig": 0.8577325940132141, + "epoch": 0.3154791861384715, + "kl_loss": 0.2508341670036316, + "loss_ib": 0.013217834755778313, + "step": 1097 + }, + { + "ce_ib": 3.9720232486724854, + "ce_orig": 0.6190991401672363, + "epoch": 0.3154791861384715, + "kl_loss": 0.18957431614398956, + "loss_ib": 0.005867766682058573, + "step": 1097 + }, + { + "ce_ib": 8.379387855529785, + "ce_orig": 1.019614577293396, + "epoch": 0.31576676971744916, + "kl_loss": 0.2486770749092102, + "loss_ib": 0.010866157710552216, + "step": 1098 + }, + { + "ce_ib": 7.849251747131348, + "ce_orig": 0.6944756507873535, + "epoch": 0.31576676971744916, + "kl_loss": 0.19975972175598145, + "loss_ib": 0.00984684843569994, + "step": 1098 + }, + { + "ce_ib": 6.0830583572387695, + "ce_orig": 0.9229554533958435, + "epoch": 0.31576676971744916, + "kl_loss": 0.28290021419525146, + "loss_ib": 0.008912060409784317, + "step": 1098 + }, + { + "ce_ib": 8.36382007598877, + "ce_orig": 1.2436766624450684, + "epoch": 0.31576676971744916, + "kl_loss": 0.2294701784849167, + "loss_ib": 0.010658521205186844, + "step": 1098 + }, + { + "ce_ib": 4.324337005615234, + "ce_orig": 0.6799556612968445, + "epoch": 0.3160543532964268, + "kl_loss": 0.1960705667734146, + "loss_ib": 0.006285042501986027, + "step": 1099 + }, + { + "ce_ib": 3.3304200172424316, + "ce_orig": 0.5313477516174316, + "epoch": 0.3160543532964268, + "kl_loss": 0.22542990744113922, + "loss_ib": 0.0055847191251814365, + "step": 1099 + }, + { + "ce_ib": 4.986284255981445, + "ce_orig": 0.6891649961471558, + "epoch": 0.3160543532964268, + "kl_loss": 0.25679337978363037, + "loss_ib": 0.007554218173027039, + "step": 1099 + }, + { + "ce_ib": 4.57914924621582, + "ce_orig": 0.6530379056930542, + "epoch": 0.3160543532964268, + "kl_loss": 0.20343467593193054, + "loss_ib": 0.006613495759665966, + "step": 1099 + }, + { + "epoch": 0.3163419368754044, + "grad_norm": 0.11441560834646225, + "learning_rate": 9.854137058624034e-06, + "loss": 0.8445, + "step": 1100 + }, + { + "ce_ib": 2.812058687210083, + "ce_orig": 0.530636727809906, + "epoch": 0.3163419368754044, + "kl_loss": 0.19126400351524353, + "loss_ib": 0.004724698606878519, + "step": 1100 + }, + { + "ce_ib": 8.779197692871094, + "ce_orig": 1.1562453508377075, + "epoch": 0.3163419368754044, + "kl_loss": 0.23617590963840485, + "loss_ib": 0.011140956543385983, + "step": 1100 + }, + { + "ce_ib": 10.066039085388184, + "ce_orig": 1.1768686771392822, + "epoch": 0.3163419368754044, + "kl_loss": 0.28456875681877136, + "loss_ib": 0.012911726720631123, + "step": 1100 + }, + { + "ce_ib": 5.156683921813965, + "ce_orig": 0.7831324934959412, + "epoch": 0.3163419368754044, + "kl_loss": 0.201686292886734, + "loss_ib": 0.007173546589910984, + "step": 1100 + }, + { + "ce_ib": 3.357862949371338, + "ce_orig": 0.1526433229446411, + "epoch": 0.31662952045438203, + "kl_loss": 0.7000162601470947, + "loss_ib": 0.010358026251196861, + "step": 1101 + }, + { + "ce_ib": 7.347134113311768, + "ce_orig": 0.8936167359352112, + "epoch": 0.31662952045438203, + "kl_loss": 0.18082134425640106, + "loss_ib": 0.00915534794330597, + "step": 1101 + }, + { + "ce_ib": 10.29253101348877, + "ce_orig": 1.4773753881454468, + "epoch": 0.31662952045438203, + "kl_loss": 0.5856887102127075, + "loss_ib": 0.01614941842854023, + "step": 1101 + }, + { + "ce_ib": 5.112944602966309, + "ce_orig": 0.491621732711792, + "epoch": 0.31662952045438203, + "kl_loss": 0.22965355217456818, + "loss_ib": 0.007409479934722185, + "step": 1101 + }, + { + "ce_ib": 6.190317630767822, + "ce_orig": 0.6080651879310608, + "epoch": 0.3169171040333597, + "kl_loss": 0.2565647065639496, + "loss_ib": 0.008755965158343315, + "step": 1102 + }, + { + "ce_ib": 7.12357234954834, + "ce_orig": 0.5342175364494324, + "epoch": 0.3169171040333597, + "kl_loss": 0.3891940116882324, + "loss_ib": 0.011015512980520725, + "step": 1102 + }, + { + "ce_ib": 4.89890193939209, + "ce_orig": 0.8590795397758484, + "epoch": 0.3169171040333597, + "kl_loss": 0.17115063965320587, + "loss_ib": 0.006610408425331116, + "step": 1102 + }, + { + "ce_ib": 5.244142055511475, + "ce_orig": 0.5240667462348938, + "epoch": 0.3169171040333597, + "kl_loss": 0.2943491041660309, + "loss_ib": 0.008187633939087391, + "step": 1102 + }, + { + "ce_ib": 9.725130081176758, + "ce_orig": 1.2593644857406616, + "epoch": 0.31720468761233733, + "kl_loss": 0.27966809272766113, + "loss_ib": 0.01252180989831686, + "step": 1103 + }, + { + "ce_ib": 6.8854193687438965, + "ce_orig": 1.081408143043518, + "epoch": 0.31720468761233733, + "kl_loss": 0.39314359426498413, + "loss_ib": 0.010816855356097221, + "step": 1103 + }, + { + "ce_ib": 5.625035285949707, + "ce_orig": 0.7493113279342651, + "epoch": 0.31720468761233733, + "kl_loss": 0.29417410492897034, + "loss_ib": 0.008566776290535927, + "step": 1103 + }, + { + "ce_ib": 8.13229751586914, + "ce_orig": 0.7472993731498718, + "epoch": 0.31720468761233733, + "kl_loss": 0.2875515818595886, + "loss_ib": 0.011007812805473804, + "step": 1103 + }, + { + "ce_ib": 4.085546016693115, + "ce_orig": 0.7198461890220642, + "epoch": 0.31749227119131496, + "kl_loss": 0.1820926070213318, + "loss_ib": 0.005906471982598305, + "step": 1104 + }, + { + "ce_ib": 7.235878944396973, + "ce_orig": 1.0650726556777954, + "epoch": 0.31749227119131496, + "kl_loss": 0.28940922021865845, + "loss_ib": 0.01012997142970562, + "step": 1104 + }, + { + "ce_ib": 7.105355262756348, + "ce_orig": 0.8803567290306091, + "epoch": 0.31749227119131496, + "kl_loss": 0.19339075684547424, + "loss_ib": 0.009039262309670448, + "step": 1104 + }, + { + "ce_ib": 5.405515670776367, + "ce_orig": 0.5333710312843323, + "epoch": 0.31749227119131496, + "kl_loss": 0.2288927584886551, + "loss_ib": 0.007694443222135305, + "step": 1104 + }, + { + "epoch": 0.31777985477029264, + "grad_norm": 0.13750189542770386, + "learning_rate": 9.852270320092314e-06, + "loss": 0.842, + "step": 1105 + }, + { + "ce_ib": 6.282751083374023, + "ce_orig": 0.6561731696128845, + "epoch": 0.31777985477029264, + "kl_loss": 0.3129974603652954, + "loss_ib": 0.009412725456058979, + "step": 1105 + }, + { + "ce_ib": 9.112653732299805, + "ce_orig": 1.2813491821289062, + "epoch": 0.31777985477029264, + "kl_loss": 0.2854575514793396, + "loss_ib": 0.011967229656875134, + "step": 1105 + }, + { + "ce_ib": 6.1287760734558105, + "ce_orig": 0.5941926836967468, + "epoch": 0.31777985477029264, + "kl_loss": 0.26773563027381897, + "loss_ib": 0.008806131780147552, + "step": 1105 + }, + { + "ce_ib": 4.9001922607421875, + "ce_orig": 0.613892674446106, + "epoch": 0.31777985477029264, + "kl_loss": 0.2149442881345749, + "loss_ib": 0.007049635052680969, + "step": 1105 + }, + { + "ce_ib": 6.514833450317383, + "ce_orig": 0.7088329792022705, + "epoch": 0.31806743834927026, + "kl_loss": 0.22958248853683472, + "loss_ib": 0.008810658007860184, + "step": 1106 + }, + { + "ce_ib": 4.464747905731201, + "ce_orig": 0.49244388937950134, + "epoch": 0.31806743834927026, + "kl_loss": 0.2644960582256317, + "loss_ib": 0.007109708618372679, + "step": 1106 + }, + { + "ce_ib": 5.996800422668457, + "ce_orig": 0.8356698751449585, + "epoch": 0.31806743834927026, + "kl_loss": 0.2915058135986328, + "loss_ib": 0.008911858312785625, + "step": 1106 + }, + { + "ce_ib": 10.251335144042969, + "ce_orig": 1.1799588203430176, + "epoch": 0.31806743834927026, + "kl_loss": 0.26157453656196594, + "loss_ib": 0.012867080047726631, + "step": 1106 + }, + { + "ce_ib": 7.458197593688965, + "ce_orig": 0.9880890250205994, + "epoch": 0.3183550219282479, + "kl_loss": 0.21888777613639832, + "loss_ib": 0.009647075086832047, + "step": 1107 + }, + { + "ce_ib": 7.327599048614502, + "ce_orig": 0.7127462029457092, + "epoch": 0.3183550219282479, + "kl_loss": 0.43115466833114624, + "loss_ib": 0.011639145202934742, + "step": 1107 + }, + { + "ce_ib": 11.34225845336914, + "ce_orig": 1.4238355159759521, + "epoch": 0.3183550219282479, + "kl_loss": 0.2260439693927765, + "loss_ib": 0.013602697290480137, + "step": 1107 + }, + { + "ce_ib": 4.567384719848633, + "ce_orig": 0.5381410121917725, + "epoch": 0.3183550219282479, + "kl_loss": 0.24306389689445496, + "loss_ib": 0.006998023949563503, + "step": 1107 + }, + { + "ce_ib": 2.3374364376068115, + "ce_orig": 0.1576087772846222, + "epoch": 0.31864260550722556, + "kl_loss": 0.4729865789413452, + "loss_ib": 0.007067302241921425, + "step": 1108 + }, + { + "ce_ib": 6.564968109130859, + "ce_orig": 0.8243353962898254, + "epoch": 0.31864260550722556, + "kl_loss": 0.29320281744003296, + "loss_ib": 0.00949699617922306, + "step": 1108 + }, + { + "ce_ib": 6.024631977081299, + "ce_orig": 0.8786768913269043, + "epoch": 0.31864260550722556, + "kl_loss": 0.2908465564250946, + "loss_ib": 0.008933097124099731, + "step": 1108 + }, + { + "ce_ib": 6.480597496032715, + "ce_orig": 1.0345938205718994, + "epoch": 0.31864260550722556, + "kl_loss": 0.24593515694141388, + "loss_ib": 0.008939948864281178, + "step": 1108 + }, + { + "ce_ib": 4.5928215980529785, + "ce_orig": 0.6721344590187073, + "epoch": 0.3189301890862032, + "kl_loss": 0.29296231269836426, + "loss_ib": 0.00752244470641017, + "step": 1109 + }, + { + "ce_ib": 6.1651997566223145, + "ce_orig": 0.9395220279693604, + "epoch": 0.3189301890862032, + "kl_loss": 0.2568630278110504, + "loss_ib": 0.008733830414712429, + "step": 1109 + }, + { + "ce_ib": 7.659371376037598, + "ce_orig": 0.46118995547294617, + "epoch": 0.3189301890862032, + "kl_loss": 0.38257402181625366, + "loss_ib": 0.011485111899673939, + "step": 1109 + }, + { + "ce_ib": 9.976858139038086, + "ce_orig": 1.3371727466583252, + "epoch": 0.3189301890862032, + "kl_loss": 0.1954931914806366, + "loss_ib": 0.01193179003894329, + "step": 1109 + }, + { + "epoch": 0.3192177726651808, + "grad_norm": 0.10743506252765656, + "learning_rate": 9.850391891315159e-06, + "loss": 0.8003, + "step": 1110 + }, + { + "ce_ib": 6.971478462219238, + "ce_orig": 0.638785719871521, + "epoch": 0.3192177726651808, + "kl_loss": 0.24314042925834656, + "loss_ib": 0.009402883239090443, + "step": 1110 + }, + { + "ce_ib": 7.48801326751709, + "ce_orig": 1.115414023399353, + "epoch": 0.3192177726651808, + "kl_loss": 0.34067392349243164, + "loss_ib": 0.010894752107560635, + "step": 1110 + }, + { + "ce_ib": 7.332833766937256, + "ce_orig": 1.0408788919448853, + "epoch": 0.3192177726651808, + "kl_loss": 0.2122744917869568, + "loss_ib": 0.009455578401684761, + "step": 1110 + }, + { + "ce_ib": 4.309993743896484, + "ce_orig": 0.6265511512756348, + "epoch": 0.3192177726651808, + "kl_loss": 0.27435722947120667, + "loss_ib": 0.007053565699607134, + "step": 1110 + }, + { + "ce_ib": 8.549775123596191, + "ce_orig": 1.175057053565979, + "epoch": 0.31950535624415843, + "kl_loss": 0.34124380350112915, + "loss_ib": 0.011962213553488255, + "step": 1111 + }, + { + "ce_ib": 4.665210723876953, + "ce_orig": 0.4430766999721527, + "epoch": 0.31950535624415843, + "kl_loss": 0.5751094222068787, + "loss_ib": 0.010416304692626, + "step": 1111 + }, + { + "ce_ib": 5.320335865020752, + "ce_orig": 0.8363803029060364, + "epoch": 0.31950535624415843, + "kl_loss": 0.15228267014026642, + "loss_ib": 0.006843162700533867, + "step": 1111 + }, + { + "ce_ib": 5.226586818695068, + "ce_orig": 0.6585968732833862, + "epoch": 0.31950535624415843, + "kl_loss": 0.26359066367149353, + "loss_ib": 0.007862493395805359, + "step": 1111 + }, + { + "ce_ib": 5.659263610839844, + "ce_orig": 0.9022344946861267, + "epoch": 0.3197929398231361, + "kl_loss": 0.23027633130550385, + "loss_ib": 0.007962026633322239, + "step": 1112 + }, + { + "ce_ib": 8.274489402770996, + "ce_orig": 0.9125567078590393, + "epoch": 0.3197929398231361, + "kl_loss": 0.37815025448799133, + "loss_ib": 0.012055991217494011, + "step": 1112 + }, + { + "ce_ib": 5.287826061248779, + "ce_orig": 0.6700226068496704, + "epoch": 0.3197929398231361, + "kl_loss": 0.2153065800666809, + "loss_ib": 0.007440891582518816, + "step": 1112 + }, + { + "ce_ib": 4.901998043060303, + "ce_orig": 0.5990259647369385, + "epoch": 0.3197929398231361, + "kl_loss": 0.22657959163188934, + "loss_ib": 0.007167793810367584, + "step": 1112 + }, + { + "ce_ib": 8.574362754821777, + "ce_orig": 1.1461323499679565, + "epoch": 0.32008052340211374, + "kl_loss": 0.20302176475524902, + "loss_ib": 0.010604580864310265, + "step": 1113 + }, + { + "ce_ib": 5.339359760284424, + "ce_orig": 0.6689932346343994, + "epoch": 0.32008052340211374, + "kl_loss": 0.173078715801239, + "loss_ib": 0.0070701465010643005, + "step": 1113 + }, + { + "ce_ib": 7.654620170593262, + "ce_orig": 1.0080437660217285, + "epoch": 0.32008052340211374, + "kl_loss": 0.26648497581481934, + "loss_ib": 0.01031946949660778, + "step": 1113 + }, + { + "ce_ib": 9.265800476074219, + "ce_orig": 1.3707072734832764, + "epoch": 0.32008052340211374, + "kl_loss": 0.32689064741134644, + "loss_ib": 0.01253470592200756, + "step": 1113 + }, + { + "ce_ib": 6.342159271240234, + "ce_orig": 0.67596435546875, + "epoch": 0.32036810698109136, + "kl_loss": 0.21211153268814087, + "loss_ib": 0.00846327468752861, + "step": 1114 + }, + { + "ce_ib": 8.386610984802246, + "ce_orig": 0.9587525129318237, + "epoch": 0.32036810698109136, + "kl_loss": 0.24733799695968628, + "loss_ib": 0.010859991423785686, + "step": 1114 + }, + { + "ce_ib": 6.137772560119629, + "ce_orig": 0.9148625135421753, + "epoch": 0.32036810698109136, + "kl_loss": 0.30503973364830017, + "loss_ib": 0.009188170544803143, + "step": 1114 + }, + { + "ce_ib": 5.591522216796875, + "ce_orig": 0.4235896170139313, + "epoch": 0.32036810698109136, + "kl_loss": 0.2647779881954193, + "loss_ib": 0.008239302784204483, + "step": 1114 + }, + { + "epoch": 0.32065569056006904, + "grad_norm": 0.10749529302120209, + "learning_rate": 9.848501776818138e-06, + "loss": 0.8231, + "step": 1115 + }, + { + "ce_ib": 10.10469913482666, + "ce_orig": 1.2156010866165161, + "epoch": 0.32065569056006904, + "kl_loss": 0.2942560911178589, + "loss_ib": 0.01304725930094719, + "step": 1115 + }, + { + "ce_ib": 7.5524091720581055, + "ce_orig": 1.165973424911499, + "epoch": 0.32065569056006904, + "kl_loss": 0.2608107924461365, + "loss_ib": 0.010160517878830433, + "step": 1115 + }, + { + "ce_ib": 6.978625297546387, + "ce_orig": 0.7588456869125366, + "epoch": 0.32065569056006904, + "kl_loss": 0.20529861748218536, + "loss_ib": 0.009031611494719982, + "step": 1115 + }, + { + "ce_ib": 6.989262104034424, + "ce_orig": 0.7142454981803894, + "epoch": 0.32065569056006904, + "kl_loss": 0.30295610427856445, + "loss_ib": 0.010018822737038136, + "step": 1115 + }, + { + "ce_ib": 3.487807273864746, + "ce_orig": 0.3319603204727173, + "epoch": 0.32094327413904666, + "kl_loss": 0.5826542377471924, + "loss_ib": 0.009314349852502346, + "step": 1116 + }, + { + "ce_ib": 7.845709800720215, + "ce_orig": 0.8958043456077576, + "epoch": 0.32094327413904666, + "kl_loss": 0.3864516019821167, + "loss_ib": 0.01171022653579712, + "step": 1116 + }, + { + "ce_ib": 2.6743617057800293, + "ce_orig": 0.3000844717025757, + "epoch": 0.32094327413904666, + "kl_loss": 0.5113818645477295, + "loss_ib": 0.007788179907947779, + "step": 1116 + }, + { + "ce_ib": 8.463080406188965, + "ce_orig": 1.0114843845367432, + "epoch": 0.32094327413904666, + "kl_loss": 0.3062596321105957, + "loss_ib": 0.011525675654411316, + "step": 1116 + }, + { + "ce_ib": 10.33855152130127, + "ce_orig": 1.2473762035369873, + "epoch": 0.3212308577180243, + "kl_loss": 0.2699921131134033, + "loss_ib": 0.013038473203778267, + "step": 1117 + }, + { + "ce_ib": 8.67563533782959, + "ce_orig": 0.9891132116317749, + "epoch": 0.3212308577180243, + "kl_loss": 0.24578243494033813, + "loss_ib": 0.011133459396660328, + "step": 1117 + }, + { + "ce_ib": 11.412935256958008, + "ce_orig": 1.6994497776031494, + "epoch": 0.3212308577180243, + "kl_loss": 0.3418155312538147, + "loss_ib": 0.014831089414656162, + "step": 1117 + }, + { + "ce_ib": 6.820316314697266, + "ce_orig": 0.7683108448982239, + "epoch": 0.3212308577180243, + "kl_loss": 0.2374948263168335, + "loss_ib": 0.009195264428853989, + "step": 1117 + }, + { + "ce_ib": 5.1326704025268555, + "ce_orig": 0.7094582915306091, + "epoch": 0.32151844129700197, + "kl_loss": 0.2784407436847687, + "loss_ib": 0.007917077280580997, + "step": 1118 + }, + { + "ce_ib": 7.020430564880371, + "ce_orig": 1.007041573524475, + "epoch": 0.32151844129700197, + "kl_loss": 0.3052269518375397, + "loss_ib": 0.010072699747979641, + "step": 1118 + }, + { + "ce_ib": 3.6181588172912598, + "ce_orig": 0.5216957330703735, + "epoch": 0.32151844129700197, + "kl_loss": 0.23030348122119904, + "loss_ib": 0.005921193864196539, + "step": 1118 + }, + { + "ce_ib": 6.98055362701416, + "ce_orig": 0.91546231508255, + "epoch": 0.32151844129700197, + "kl_loss": 0.2503894567489624, + "loss_ib": 0.009484448470175266, + "step": 1118 + }, + { + "ce_ib": 3.671968460083008, + "ce_orig": 0.419208824634552, + "epoch": 0.3218060248759796, + "kl_loss": 0.24047990143299103, + "loss_ib": 0.006076767109334469, + "step": 1119 + }, + { + "ce_ib": 5.895604133605957, + "ce_orig": 0.5340924263000488, + "epoch": 0.3218060248759796, + "kl_loss": 0.18886855244636536, + "loss_ib": 0.007784290239214897, + "step": 1119 + }, + { + "ce_ib": 6.686251640319824, + "ce_orig": 0.8506937623023987, + "epoch": 0.3218060248759796, + "kl_loss": 0.42726773023605347, + "loss_ib": 0.010958928614854813, + "step": 1119 + }, + { + "ce_ib": 5.3213419914245605, + "ce_orig": 0.582292377948761, + "epoch": 0.3218060248759796, + "kl_loss": 0.21478994190692902, + "loss_ib": 0.0074692415073513985, + "step": 1119 + }, + { + "epoch": 0.3220936084549572, + "grad_norm": 0.09495838731527328, + "learning_rate": 9.846599981154975e-06, + "loss": 0.8629, + "step": 1120 + }, + { + "ce_ib": 6.199802398681641, + "ce_orig": 0.8364900946617126, + "epoch": 0.3220936084549572, + "kl_loss": 0.2115131914615631, + "loss_ib": 0.008314934559166431, + "step": 1120 + }, + { + "ce_ib": 7.682260990142822, + "ce_orig": 0.6661112308502197, + "epoch": 0.3220936084549572, + "kl_loss": 0.33651497960090637, + "loss_ib": 0.011047410778701305, + "step": 1120 + }, + { + "ce_ib": 7.0127973556518555, + "ce_orig": 0.47509559988975525, + "epoch": 0.3220936084549572, + "kl_loss": 0.4513690173625946, + "loss_ib": 0.01152648776769638, + "step": 1120 + }, + { + "ce_ib": 5.214129447937012, + "ce_orig": 0.7018793225288391, + "epoch": 0.3220936084549572, + "kl_loss": 0.20314499735832214, + "loss_ib": 0.007245579734444618, + "step": 1120 + }, + { + "ce_ib": 2.6834769248962402, + "ce_orig": 0.33916646242141724, + "epoch": 0.32238119203393484, + "kl_loss": 0.20696307718753815, + "loss_ib": 0.004753108136355877, + "step": 1121 + }, + { + "ce_ib": 11.433638572692871, + "ce_orig": 1.6915712356567383, + "epoch": 0.32238119203393484, + "kl_loss": 0.30615732073783875, + "loss_ib": 0.014495211653411388, + "step": 1121 + }, + { + "ce_ib": 7.008380889892578, + "ce_orig": 0.6508950591087341, + "epoch": 0.32238119203393484, + "kl_loss": 0.2695773243904114, + "loss_ib": 0.00970415398478508, + "step": 1121 + }, + { + "ce_ib": 10.525337219238281, + "ce_orig": 1.1632081270217896, + "epoch": 0.32238119203393484, + "kl_loss": 0.3140749931335449, + "loss_ib": 0.013666086830198765, + "step": 1121 + }, + { + "ce_ib": 9.778067588806152, + "ce_orig": 1.3605836629867554, + "epoch": 0.3226687756129125, + "kl_loss": 0.2360476404428482, + "loss_ib": 0.012138543650507927, + "step": 1122 + }, + { + "ce_ib": 5.721891403198242, + "ce_orig": 0.6278418898582458, + "epoch": 0.3226687756129125, + "kl_loss": 0.18863216042518616, + "loss_ib": 0.00760821346193552, + "step": 1122 + }, + { + "ce_ib": 6.584217071533203, + "ce_orig": 0.7220848202705383, + "epoch": 0.3226687756129125, + "kl_loss": 0.27102869749069214, + "loss_ib": 0.00929450336843729, + "step": 1122 + }, + { + "ce_ib": 9.604668617248535, + "ce_orig": 1.3348008394241333, + "epoch": 0.3226687756129125, + "kl_loss": 0.32848864793777466, + "loss_ib": 0.012889553792774677, + "step": 1122 + }, + { + "ce_ib": 10.836069107055664, + "ce_orig": 1.4283229112625122, + "epoch": 0.32295635919189014, + "kl_loss": 0.3214641809463501, + "loss_ib": 0.014050710946321487, + "step": 1123 + }, + { + "ce_ib": 5.0625319480896, + "ce_orig": 0.5947402715682983, + "epoch": 0.32295635919189014, + "kl_loss": 0.2923838496208191, + "loss_ib": 0.007986370474100113, + "step": 1123 + }, + { + "ce_ib": 6.186778545379639, + "ce_orig": 1.0432881116867065, + "epoch": 0.32295635919189014, + "kl_loss": 0.2616155445575714, + "loss_ib": 0.008802933618426323, + "step": 1123 + }, + { + "ce_ib": 11.60007381439209, + "ce_orig": 1.7871153354644775, + "epoch": 0.32295635919189014, + "kl_loss": 0.3969075679779053, + "loss_ib": 0.01556914858520031, + "step": 1123 + }, + { + "ce_ib": 9.874221801757812, + "ce_orig": 1.1525869369506836, + "epoch": 0.32324394277086776, + "kl_loss": 0.2839212119579315, + "loss_ib": 0.012713433243334293, + "step": 1124 + }, + { + "ce_ib": 5.750823497772217, + "ce_orig": 0.8648343682289124, + "epoch": 0.32324394277086776, + "kl_loss": 0.18375760316848755, + "loss_ib": 0.007588399574160576, + "step": 1124 + }, + { + "ce_ib": 7.892747402191162, + "ce_orig": 0.6954336166381836, + "epoch": 0.32324394277086776, + "kl_loss": 0.3389386236667633, + "loss_ib": 0.011282133869826794, + "step": 1124 + }, + { + "ce_ib": 9.0963716506958, + "ce_orig": 0.9955480694770813, + "epoch": 0.32324394277086776, + "kl_loss": 0.28071296215057373, + "loss_ib": 0.011903500184416771, + "step": 1124 + }, + { + "epoch": 0.32353152634984544, + "grad_norm": 0.09425008296966553, + "learning_rate": 9.844686508907538e-06, + "loss": 0.8663, + "step": 1125 + }, + { + "ce_ib": 12.10210132598877, + "ce_orig": 1.6567598581314087, + "epoch": 0.32353152634984544, + "kl_loss": 0.20576657354831696, + "loss_ib": 0.01415976695716381, + "step": 1125 + }, + { + "ce_ib": 8.69025707244873, + "ce_orig": 0.7126254439353943, + "epoch": 0.32353152634984544, + "kl_loss": 0.33204948902130127, + "loss_ib": 0.01201075129210949, + "step": 1125 + }, + { + "ce_ib": 6.837287902832031, + "ce_orig": 0.9816555380821228, + "epoch": 0.32353152634984544, + "kl_loss": 0.2468874454498291, + "loss_ib": 0.009306162595748901, + "step": 1125 + }, + { + "ce_ib": 6.7292327880859375, + "ce_orig": 0.8677306175231934, + "epoch": 0.32353152634984544, + "kl_loss": 0.23569883406162262, + "loss_ib": 0.009086220525205135, + "step": 1125 + }, + { + "ce_ib": 9.347533226013184, + "ce_orig": 0.4040294289588928, + "epoch": 0.32381910992882307, + "kl_loss": 0.34666794538497925, + "loss_ib": 0.012814212590456009, + "step": 1126 + }, + { + "ce_ib": 7.4951019287109375, + "ce_orig": 1.36309814453125, + "epoch": 0.32381910992882307, + "kl_loss": 0.18759815394878387, + "loss_ib": 0.009371084161102772, + "step": 1126 + }, + { + "ce_ib": 4.983643054962158, + "ce_orig": 0.8482396602630615, + "epoch": 0.32381910992882307, + "kl_loss": 0.25121212005615234, + "loss_ib": 0.007495764177292585, + "step": 1126 + }, + { + "ce_ib": 10.230554580688477, + "ce_orig": 0.7575033903121948, + "epoch": 0.32381910992882307, + "kl_loss": 0.27114659547805786, + "loss_ib": 0.012942020781338215, + "step": 1126 + }, + { + "ce_ib": 8.284119606018066, + "ce_orig": 1.0574978590011597, + "epoch": 0.3241066935078007, + "kl_loss": 0.23789556324481964, + "loss_ib": 0.010663075372576714, + "step": 1127 + }, + { + "ce_ib": 6.288346767425537, + "ce_orig": 0.9875720143318176, + "epoch": 0.3241066935078007, + "kl_loss": 0.26568907499313354, + "loss_ib": 0.008945236913859844, + "step": 1127 + }, + { + "ce_ib": 5.09885835647583, + "ce_orig": 0.7185342311859131, + "epoch": 0.3241066935078007, + "kl_loss": 0.5336880087852478, + "loss_ib": 0.0104357386007905, + "step": 1127 + }, + { + "ce_ib": 3.9797940254211426, + "ce_orig": 0.4946213364601135, + "epoch": 0.3241066935078007, + "kl_loss": 0.5142374634742737, + "loss_ib": 0.009122168645262718, + "step": 1127 + }, + { + "ce_ib": 6.337353229522705, + "ce_orig": 0.668466329574585, + "epoch": 0.32439427708677837, + "kl_loss": 0.4345847964286804, + "loss_ib": 0.010683201253414154, + "step": 1128 + }, + { + "ce_ib": 4.80925178527832, + "ce_orig": 1.2135826349258423, + "epoch": 0.32439427708677837, + "kl_loss": 0.21446409821510315, + "loss_ib": 0.0069538927637040615, + "step": 1128 + }, + { + "ce_ib": 6.434673309326172, + "ce_orig": 1.0011742115020752, + "epoch": 0.32439427708677837, + "kl_loss": 0.23707206547260284, + "loss_ib": 0.008805394172668457, + "step": 1128 + }, + { + "ce_ib": 8.776188850402832, + "ce_orig": 1.1622161865234375, + "epoch": 0.32439427708677837, + "kl_loss": 0.21508213877677917, + "loss_ib": 0.010927010327577591, + "step": 1128 + }, + { + "ce_ib": 8.308552742004395, + "ce_orig": 1.4075417518615723, + "epoch": 0.324681860665756, + "kl_loss": 0.22292867302894592, + "loss_ib": 0.010537839494645596, + "step": 1129 + }, + { + "ce_ib": 5.862061977386475, + "ce_orig": 0.9006170034408569, + "epoch": 0.324681860665756, + "kl_loss": 0.2912452518939972, + "loss_ib": 0.008774514310061932, + "step": 1129 + }, + { + "ce_ib": 10.914876937866211, + "ce_orig": 1.1613266468048096, + "epoch": 0.324681860665756, + "kl_loss": 0.23145024478435516, + "loss_ib": 0.013229379430413246, + "step": 1129 + }, + { + "ce_ib": 8.344647407531738, + "ce_orig": 1.1508418321609497, + "epoch": 0.324681860665756, + "kl_loss": 0.25917115807533264, + "loss_ib": 0.010936358943581581, + "step": 1129 + }, + { + "epoch": 0.3249694442447336, + "grad_norm": 0.11727182567119598, + "learning_rate": 9.842761364685824e-06, + "loss": 0.898, + "step": 1130 + }, + { + "ce_ib": 7.812705039978027, + "ce_orig": 1.451427698135376, + "epoch": 0.3249694442447336, + "kl_loss": 0.19738689064979553, + "loss_ib": 0.009786573238670826, + "step": 1130 + }, + { + "ce_ib": 6.450815200805664, + "ce_orig": 0.8914653658866882, + "epoch": 0.3249694442447336, + "kl_loss": 0.2358463555574417, + "loss_ib": 0.008809278719127178, + "step": 1130 + }, + { + "ce_ib": 6.206367015838623, + "ce_orig": 0.7019004225730896, + "epoch": 0.3249694442447336, + "kl_loss": 0.3190705180168152, + "loss_ib": 0.009397071786224842, + "step": 1130 + }, + { + "ce_ib": 6.046915531158447, + "ce_orig": 0.7179989814758301, + "epoch": 0.3249694442447336, + "kl_loss": 0.29096171259880066, + "loss_ib": 0.00895653199404478, + "step": 1130 + }, + { + "ce_ib": 7.675392150878906, + "ce_orig": 0.7474125623703003, + "epoch": 0.32525702782371124, + "kl_loss": 0.2970944941043854, + "loss_ib": 0.01064633671194315, + "step": 1131 + }, + { + "ce_ib": 5.643684387207031, + "ce_orig": 0.7740488648414612, + "epoch": 0.32525702782371124, + "kl_loss": 0.2655462920665741, + "loss_ib": 0.00829914677888155, + "step": 1131 + }, + { + "ce_ib": 12.916472434997559, + "ce_orig": 2.1709859371185303, + "epoch": 0.32525702782371124, + "kl_loss": 0.23599407076835632, + "loss_ib": 0.015276413410902023, + "step": 1131 + }, + { + "ce_ib": 6.033138751983643, + "ce_orig": 0.5232858657836914, + "epoch": 0.32525702782371124, + "kl_loss": 0.413301020860672, + "loss_ib": 0.010166148655116558, + "step": 1131 + }, + { + "ce_ib": 5.810317039489746, + "ce_orig": 0.42475029826164246, + "epoch": 0.3255446114026889, + "kl_loss": 0.3846255838871002, + "loss_ib": 0.00965657364577055, + "step": 1132 + }, + { + "ce_ib": 8.389314651489258, + "ce_orig": 0.6634515523910522, + "epoch": 0.3255446114026889, + "kl_loss": 0.3044917583465576, + "loss_ib": 0.01143423281610012, + "step": 1132 + }, + { + "ce_ib": 8.147542953491211, + "ce_orig": 1.1206897497177124, + "epoch": 0.3255446114026889, + "kl_loss": 0.6051585674285889, + "loss_ib": 0.014199127443134785, + "step": 1132 + }, + { + "ce_ib": 4.850771903991699, + "ce_orig": 0.9383435845375061, + "epoch": 0.3255446114026889, + "kl_loss": 0.16845840215682983, + "loss_ib": 0.006535355933010578, + "step": 1132 + }, + { + "ce_ib": 4.84213399887085, + "ce_orig": 0.5435336232185364, + "epoch": 0.32583219498166655, + "kl_loss": 0.2434813678264618, + "loss_ib": 0.007276947144418955, + "step": 1133 + }, + { + "ce_ib": 7.962569713592529, + "ce_orig": 0.961725115776062, + "epoch": 0.32583219498166655, + "kl_loss": 0.24803465604782104, + "loss_ib": 0.010442916303873062, + "step": 1133 + }, + { + "ce_ib": 5.276309967041016, + "ce_orig": 0.6765012145042419, + "epoch": 0.32583219498166655, + "kl_loss": 0.36133527755737305, + "loss_ib": 0.008889662101864815, + "step": 1133 + }, + { + "ce_ib": 4.470157623291016, + "ce_orig": 0.5433730483055115, + "epoch": 0.32583219498166655, + "kl_loss": 0.27675801515579224, + "loss_ib": 0.007237737532705069, + "step": 1133 + }, + { + "ce_ib": 5.233908176422119, + "ce_orig": 0.6392762064933777, + "epoch": 0.32611977856064417, + "kl_loss": 0.220979705452919, + "loss_ib": 0.007443705108016729, + "step": 1134 + }, + { + "ce_ib": 5.830175876617432, + "ce_orig": 0.7680152058601379, + "epoch": 0.32611977856064417, + "kl_loss": 0.38658952713012695, + "loss_ib": 0.009696071036159992, + "step": 1134 + }, + { + "ce_ib": 8.762476921081543, + "ce_orig": 1.609471321105957, + "epoch": 0.32611977856064417, + "kl_loss": 0.21049867570400238, + "loss_ib": 0.010867463424801826, + "step": 1134 + }, + { + "ce_ib": 7.221193313598633, + "ce_orig": 0.8087792992591858, + "epoch": 0.32611977856064417, + "kl_loss": 0.2421259880065918, + "loss_ib": 0.00964245293289423, + "step": 1134 + }, + { + "epoch": 0.32640736213962185, + "grad_norm": 0.09609334170818329, + "learning_rate": 9.840824553127954e-06, + "loss": 0.8168, + "step": 1135 + }, + { + "ce_ib": 7.099168300628662, + "ce_orig": 0.954532265663147, + "epoch": 0.32640736213962185, + "kl_loss": 0.22126835584640503, + "loss_ib": 0.009311852045357227, + "step": 1135 + }, + { + "ce_ib": 4.000297546386719, + "ce_orig": 0.6663585305213928, + "epoch": 0.32640736213962185, + "kl_loss": 0.2963029146194458, + "loss_ib": 0.006963326595723629, + "step": 1135 + }, + { + "ce_ib": 5.186071395874023, + "ce_orig": 0.5578123331069946, + "epoch": 0.32640736213962185, + "kl_loss": 0.24658748507499695, + "loss_ib": 0.007651946507394314, + "step": 1135 + }, + { + "ce_ib": 10.776365280151367, + "ce_orig": 1.1968107223510742, + "epoch": 0.32640736213962185, + "kl_loss": 0.21162372827529907, + "loss_ib": 0.012892603874206543, + "step": 1135 + }, + { + "ce_ib": 6.071680068969727, + "ce_orig": 0.6322076320648193, + "epoch": 0.32669494571859947, + "kl_loss": 0.28067898750305176, + "loss_ib": 0.008878469467163086, + "step": 1136 + }, + { + "ce_ib": 9.11697769165039, + "ce_orig": 0.9247298240661621, + "epoch": 0.32669494571859947, + "kl_loss": 0.2652074694633484, + "loss_ib": 0.011769052594900131, + "step": 1136 + }, + { + "ce_ib": 4.236838340759277, + "ce_orig": 0.572127103805542, + "epoch": 0.32669494571859947, + "kl_loss": 0.4477131962776184, + "loss_ib": 0.008713969960808754, + "step": 1136 + }, + { + "ce_ib": 5.590993404388428, + "ce_orig": 0.7507918477058411, + "epoch": 0.32669494571859947, + "kl_loss": 0.30635079741477966, + "loss_ib": 0.008654501289129257, + "step": 1136 + }, + { + "ce_ib": 5.092818737030029, + "ce_orig": 0.6064829230308533, + "epoch": 0.3269825292975771, + "kl_loss": 0.20581455528736115, + "loss_ib": 0.007150963880121708, + "step": 1137 + }, + { + "ce_ib": 6.453564643859863, + "ce_orig": 0.9269260168075562, + "epoch": 0.3269825292975771, + "kl_loss": 0.19207924604415894, + "loss_ib": 0.008374356664717197, + "step": 1137 + }, + { + "ce_ib": 10.570686340332031, + "ce_orig": 1.2108427286148071, + "epoch": 0.3269825292975771, + "kl_loss": 0.32438749074935913, + "loss_ib": 0.013814561069011688, + "step": 1137 + }, + { + "ce_ib": 5.679614067077637, + "ce_orig": 0.6215935945510864, + "epoch": 0.3269825292975771, + "kl_loss": 0.5665749907493591, + "loss_ib": 0.011345363222062588, + "step": 1137 + }, + { + "ce_ib": 5.880247592926025, + "ce_orig": 0.7091310024261475, + "epoch": 0.3272701128765548, + "kl_loss": 0.2912711799144745, + "loss_ib": 0.008792959153652191, + "step": 1138 + }, + { + "ce_ib": 9.816313743591309, + "ce_orig": 1.1858292818069458, + "epoch": 0.3272701128765548, + "kl_loss": 0.24216127395629883, + "loss_ib": 0.01223792601376772, + "step": 1138 + }, + { + "ce_ib": 6.8238444328308105, + "ce_orig": 0.5911304354667664, + "epoch": 0.3272701128765548, + "kl_loss": 0.2992258071899414, + "loss_ib": 0.009816101752221584, + "step": 1138 + }, + { + "ce_ib": 4.645931720733643, + "ce_orig": 0.7781140804290771, + "epoch": 0.3272701128765548, + "kl_loss": 0.24800650775432587, + "loss_ib": 0.007125996984541416, + "step": 1138 + }, + { + "ce_ib": 3.9870479106903076, + "ce_orig": 0.5202873945236206, + "epoch": 0.3275576964555324, + "kl_loss": 0.22053340077400208, + "loss_ib": 0.006192381959408522, + "step": 1139 + }, + { + "ce_ib": 8.540157318115234, + "ce_orig": 1.3470656871795654, + "epoch": 0.3275576964555324, + "kl_loss": 0.23808708786964417, + "loss_ib": 0.010921028442680836, + "step": 1139 + }, + { + "ce_ib": 6.005643367767334, + "ce_orig": 0.8297461867332458, + "epoch": 0.3275576964555324, + "kl_loss": 0.22874270379543304, + "loss_ib": 0.008293070830404758, + "step": 1139 + }, + { + "ce_ib": 8.215617179870605, + "ce_orig": 0.706344485282898, + "epoch": 0.3275576964555324, + "kl_loss": 0.3254474997520447, + "loss_ib": 0.011470092460513115, + "step": 1139 + }, + { + "epoch": 0.32784528003451, + "grad_norm": 0.09755509346723557, + "learning_rate": 9.838876078900158e-06, + "loss": 0.8811, + "step": 1140 + }, + { + "ce_ib": 5.847168445587158, + "ce_orig": 1.0301125049591064, + "epoch": 0.32784528003451, + "kl_loss": 0.22780725359916687, + "loss_ib": 0.008125240914523602, + "step": 1140 + }, + { + "ce_ib": 8.922221183776855, + "ce_orig": 0.9144381880760193, + "epoch": 0.32784528003451, + "kl_loss": 0.3217501640319824, + "loss_ib": 0.01213972270488739, + "step": 1140 + }, + { + "ce_ib": 7.23486328125, + "ce_orig": 0.4056432843208313, + "epoch": 0.32784528003451, + "kl_loss": 0.3580802083015442, + "loss_ib": 0.010815665125846863, + "step": 1140 + }, + { + "ce_ib": 5.954606056213379, + "ce_orig": 0.589507520198822, + "epoch": 0.32784528003451, + "kl_loss": 0.2619907557964325, + "loss_ib": 0.008574512787163258, + "step": 1140 + }, + { + "ce_ib": 10.314091682434082, + "ce_orig": 1.3086189031600952, + "epoch": 0.32813286361348765, + "kl_loss": 0.3001616597175598, + "loss_ib": 0.013315708376467228, + "step": 1141 + }, + { + "ce_ib": 6.59683084487915, + "ce_orig": 0.7322800755500793, + "epoch": 0.32813286361348765, + "kl_loss": 0.2564740777015686, + "loss_ib": 0.009161571972072124, + "step": 1141 + }, + { + "ce_ib": 8.852384567260742, + "ce_orig": 0.7106950879096985, + "epoch": 0.32813286361348765, + "kl_loss": 0.3281547427177429, + "loss_ib": 0.012133931741118431, + "step": 1141 + }, + { + "ce_ib": 7.752597332000732, + "ce_orig": 0.7331222295761108, + "epoch": 0.32813286361348765, + "kl_loss": 0.330089807510376, + "loss_ib": 0.011053495109081268, + "step": 1141 + }, + { + "ce_ib": 5.990482807159424, + "ce_orig": 0.5036841630935669, + "epoch": 0.3284204471924653, + "kl_loss": 0.25676479935646057, + "loss_ib": 0.008558130823075771, + "step": 1142 + }, + { + "ce_ib": 5.725650310516357, + "ce_orig": 0.6308770775794983, + "epoch": 0.3284204471924653, + "kl_loss": 0.18920324742794037, + "loss_ib": 0.007617682684212923, + "step": 1142 + }, + { + "ce_ib": 7.363343715667725, + "ce_orig": 0.8220383524894714, + "epoch": 0.3284204471924653, + "kl_loss": 0.36467304825782776, + "loss_ib": 0.01101007405668497, + "step": 1142 + }, + { + "ce_ib": 7.561408996582031, + "ce_orig": 0.729387640953064, + "epoch": 0.3284204471924653, + "kl_loss": 0.1974533498287201, + "loss_ib": 0.00953594222664833, + "step": 1142 + }, + { + "ce_ib": 9.33537769317627, + "ce_orig": 0.7693027853965759, + "epoch": 0.32870803077144295, + "kl_loss": 0.3023679256439209, + "loss_ib": 0.012359056621789932, + "step": 1143 + }, + { + "ce_ib": 5.879181861877441, + "ce_orig": 0.778829038143158, + "epoch": 0.32870803077144295, + "kl_loss": 0.32206517457962036, + "loss_ib": 0.00909983366727829, + "step": 1143 + }, + { + "ce_ib": 5.382481575012207, + "ce_orig": 0.737525999546051, + "epoch": 0.32870803077144295, + "kl_loss": 0.22152158617973328, + "loss_ib": 0.007597697898745537, + "step": 1143 + }, + { + "ce_ib": 11.252842903137207, + "ce_orig": 0.9058976173400879, + "epoch": 0.32870803077144295, + "kl_loss": 0.26175910234451294, + "loss_ib": 0.01387043483555317, + "step": 1143 + }, + { + "ce_ib": 9.349008560180664, + "ce_orig": 0.7972526550292969, + "epoch": 0.3289956143504206, + "kl_loss": 0.32632461190223694, + "loss_ib": 0.012612254358828068, + "step": 1144 + }, + { + "ce_ib": 9.687057495117188, + "ce_orig": 1.1150705814361572, + "epoch": 0.3289956143504206, + "kl_loss": 0.25978612899780273, + "loss_ib": 0.012284918688237667, + "step": 1144 + }, + { + "ce_ib": 5.115502834320068, + "ce_orig": 0.3979432284832001, + "epoch": 0.3289956143504206, + "kl_loss": 0.24917109310626984, + "loss_ib": 0.0076072136871516705, + "step": 1144 + }, + { + "ce_ib": 10.403783798217773, + "ce_orig": 1.24606192111969, + "epoch": 0.3289956143504206, + "kl_loss": 0.2913515865802765, + "loss_ib": 0.013317300006747246, + "step": 1144 + }, + { + "epoch": 0.32928319792939825, + "grad_norm": 0.09816709160804749, + "learning_rate": 9.83691594669676e-06, + "loss": 0.873, + "step": 1145 + }, + { + "ce_ib": 5.447243690490723, + "ce_orig": 0.6441102027893066, + "epoch": 0.32928319792939825, + "kl_loss": 0.23241248726844788, + "loss_ib": 0.007771369069814682, + "step": 1145 + }, + { + "ce_ib": 7.5973429679870605, + "ce_orig": 0.9865646362304688, + "epoch": 0.32928319792939825, + "kl_loss": 0.38887763023376465, + "loss_ib": 0.011486119590699673, + "step": 1145 + }, + { + "ce_ib": 7.2056779861450195, + "ce_orig": 0.6080095767974854, + "epoch": 0.32928319792939825, + "kl_loss": 0.23443345725536346, + "loss_ib": 0.009550012648105621, + "step": 1145 + }, + { + "ce_ib": 9.07731819152832, + "ce_orig": 0.840085506439209, + "epoch": 0.32928319792939825, + "kl_loss": 0.39250028133392334, + "loss_ib": 0.013002321124076843, + "step": 1145 + }, + { + "ce_ib": 9.375168800354004, + "ce_orig": 1.1422781944274902, + "epoch": 0.3295707815083759, + "kl_loss": 0.2328919619321823, + "loss_ib": 0.011704088188707829, + "step": 1146 + }, + { + "ce_ib": 4.802104473114014, + "ce_orig": 0.7695512771606445, + "epoch": 0.3295707815083759, + "kl_loss": 0.27905359864234924, + "loss_ib": 0.0075926403515040874, + "step": 1146 + }, + { + "ce_ib": 6.649759292602539, + "ce_orig": 0.9065436720848083, + "epoch": 0.3295707815083759, + "kl_loss": 0.3191312253475189, + "loss_ib": 0.0098410714417696, + "step": 1146 + }, + { + "ce_ib": 8.3096923828125, + "ce_orig": 0.8790789842605591, + "epoch": 0.3295707815083759, + "kl_loss": 0.24609088897705078, + "loss_ib": 0.010770602151751518, + "step": 1146 + }, + { + "ce_ib": 6.451026439666748, + "ce_orig": 0.5358216166496277, + "epoch": 0.3298583650873535, + "kl_loss": 0.19455279409885406, + "loss_ib": 0.008396554738283157, + "step": 1147 + }, + { + "ce_ib": 9.036972999572754, + "ce_orig": 0.6360263824462891, + "epoch": 0.3298583650873535, + "kl_loss": 0.24949690699577332, + "loss_ib": 0.011531941592693329, + "step": 1147 + }, + { + "ce_ib": 7.984084606170654, + "ce_orig": 0.7079024910926819, + "epoch": 0.3298583650873535, + "kl_loss": 0.25150322914123535, + "loss_ib": 0.010499116964638233, + "step": 1147 + }, + { + "ce_ib": 5.445267200469971, + "ce_orig": 0.6818575263023376, + "epoch": 0.3298583650873535, + "kl_loss": 0.23439064621925354, + "loss_ib": 0.007789174094796181, + "step": 1147 + }, + { + "ce_ib": 5.773715019226074, + "ce_orig": 0.8410392999649048, + "epoch": 0.3301459486663312, + "kl_loss": 0.1873149275779724, + "loss_ib": 0.007646864280104637, + "step": 1148 + }, + { + "ce_ib": 8.979928016662598, + "ce_orig": 1.226332187652588, + "epoch": 0.3301459486663312, + "kl_loss": 0.30875229835510254, + "loss_ib": 0.012067451141774654, + "step": 1148 + }, + { + "ce_ib": 5.598503589630127, + "ce_orig": 0.7870617508888245, + "epoch": 0.3301459486663312, + "kl_loss": 0.2419789880514145, + "loss_ib": 0.008018293417990208, + "step": 1148 + }, + { + "ce_ib": 4.286440849304199, + "ce_orig": 0.6685412526130676, + "epoch": 0.3301459486663312, + "kl_loss": 0.19143131375312805, + "loss_ib": 0.0062007540836930275, + "step": 1148 + }, + { + "ce_ib": 5.1454854011535645, + "ce_orig": 0.4119313657283783, + "epoch": 0.3304335322453088, + "kl_loss": 0.3295081853866577, + "loss_ib": 0.008440567180514336, + "step": 1149 + }, + { + "ce_ib": 4.645988464355469, + "ce_orig": 0.6422159075737, + "epoch": 0.3304335322453088, + "kl_loss": 0.22805818915367126, + "loss_ib": 0.006926570553332567, + "step": 1149 + }, + { + "ce_ib": 4.646116733551025, + "ce_orig": 0.3140992224216461, + "epoch": 0.3304335322453088, + "kl_loss": 0.35052090883255005, + "loss_ib": 0.008151326328516006, + "step": 1149 + }, + { + "ce_ib": 8.022761344909668, + "ce_orig": 0.9764880537986755, + "epoch": 0.3304335322453088, + "kl_loss": 0.21903610229492188, + "loss_ib": 0.010213121771812439, + "step": 1149 + }, + { + "epoch": 0.3307211158242864, + "grad_norm": 0.10742151737213135, + "learning_rate": 9.834944161240172e-06, + "loss": 0.8247, + "step": 1150 + }, + { + "ce_ib": 10.445816040039062, + "ce_orig": 1.3861838579177856, + "epoch": 0.3307211158242864, + "kl_loss": 0.31801116466522217, + "loss_ib": 0.013625928200781345, + "step": 1150 + }, + { + "ce_ib": 4.385981559753418, + "ce_orig": 0.8889569044113159, + "epoch": 0.3307211158242864, + "kl_loss": 0.22078658640384674, + "loss_ib": 0.006593847181648016, + "step": 1150 + }, + { + "ce_ib": 9.175389289855957, + "ce_orig": 0.8606024384498596, + "epoch": 0.3307211158242864, + "kl_loss": 0.38910189270973206, + "loss_ib": 0.013066408224403858, + "step": 1150 + }, + { + "ce_ib": 7.696003437042236, + "ce_orig": 0.4989853501319885, + "epoch": 0.3307211158242864, + "kl_loss": 0.2723633348941803, + "loss_ib": 0.010419636964797974, + "step": 1150 + }, + { + "ce_ib": 4.9289350509643555, + "ce_orig": 0.4876880645751953, + "epoch": 0.33100869940326405, + "kl_loss": 0.2187470644712448, + "loss_ib": 0.007116405759006739, + "step": 1151 + }, + { + "ce_ib": 4.567633152008057, + "ce_orig": 0.8315699100494385, + "epoch": 0.33100869940326405, + "kl_loss": 0.26455092430114746, + "loss_ib": 0.007213142234832048, + "step": 1151 + }, + { + "ce_ib": 11.272817611694336, + "ce_orig": 1.7354942560195923, + "epoch": 0.33100869940326405, + "kl_loss": 0.3189008831977844, + "loss_ib": 0.014461826533079147, + "step": 1151 + }, + { + "ce_ib": 7.502978324890137, + "ce_orig": 1.1127325296401978, + "epoch": 0.33100869940326405, + "kl_loss": 0.27890706062316895, + "loss_ib": 0.010292048566043377, + "step": 1151 + }, + { + "ce_ib": 9.4304838180542, + "ce_orig": 1.2863335609436035, + "epoch": 0.33129628298224173, + "kl_loss": 0.36749768257141113, + "loss_ib": 0.013105461373925209, + "step": 1152 + }, + { + "ce_ib": 9.577445983886719, + "ce_orig": 0.9453591108322144, + "epoch": 0.33129628298224173, + "kl_loss": 0.2589572072029114, + "loss_ib": 0.012167016975581646, + "step": 1152 + }, + { + "ce_ib": 6.510651111602783, + "ce_orig": 0.3509739339351654, + "epoch": 0.33129628298224173, + "kl_loss": 0.2997548282146454, + "loss_ib": 0.00950819905847311, + "step": 1152 + }, + { + "ce_ib": 5.625336647033691, + "ce_orig": 0.7494199275970459, + "epoch": 0.33129628298224173, + "kl_loss": 0.24421089887619019, + "loss_ib": 0.008067445829510689, + "step": 1152 + }, + { + "ce_ib": 6.045725345611572, + "ce_orig": 0.6475803256034851, + "epoch": 0.33158386656121935, + "kl_loss": 0.2583104074001312, + "loss_ib": 0.008628829382359982, + "step": 1153 + }, + { + "ce_ib": 7.56576681137085, + "ce_orig": 0.9475224614143372, + "epoch": 0.33158386656121935, + "kl_loss": 0.2908802032470703, + "loss_ib": 0.010474569164216518, + "step": 1153 + }, + { + "ce_ib": 9.017477989196777, + "ce_orig": 1.088793158531189, + "epoch": 0.33158386656121935, + "kl_loss": 0.3477438688278198, + "loss_ib": 0.012494917027652264, + "step": 1153 + }, + { + "ce_ib": 7.708969593048096, + "ce_orig": 1.1444097757339478, + "epoch": 0.33158386656121935, + "kl_loss": 0.33121156692504883, + "loss_ib": 0.011021084152162075, + "step": 1153 + }, + { + "ce_ib": 5.410340309143066, + "ce_orig": 0.5340110063552856, + "epoch": 0.331871450140197, + "kl_loss": 0.23038868606090546, + "loss_ib": 0.007714227307587862, + "step": 1154 + }, + { + "ce_ib": 6.989197254180908, + "ce_orig": 0.6077629327774048, + "epoch": 0.331871450140197, + "kl_loss": 0.2350027859210968, + "loss_ib": 0.009339225478470325, + "step": 1154 + }, + { + "ce_ib": 8.7223539352417, + "ce_orig": 1.0650534629821777, + "epoch": 0.331871450140197, + "kl_loss": 0.3202980160713196, + "loss_ib": 0.01192533504217863, + "step": 1154 + }, + { + "ce_ib": 4.908301830291748, + "ce_orig": 0.9075806736946106, + "epoch": 0.331871450140197, + "kl_loss": 0.2366945594549179, + "loss_ib": 0.007275247480720282, + "step": 1154 + }, + { + "epoch": 0.33215903371917466, + "grad_norm": 0.08863980323076248, + "learning_rate": 9.832960727280887e-06, + "loss": 0.8609, + "step": 1155 + }, + { + "ce_ib": 5.900798320770264, + "ce_orig": 0.8526896834373474, + "epoch": 0.33215903371917466, + "kl_loss": 0.22288133203983307, + "loss_ib": 0.008129611611366272, + "step": 1155 + }, + { + "ce_ib": 4.731245994567871, + "ce_orig": 0.5797931551933289, + "epoch": 0.33215903371917466, + "kl_loss": 0.20637674629688263, + "loss_ib": 0.006795013323426247, + "step": 1155 + }, + { + "ce_ib": 7.965129852294922, + "ce_orig": 0.7734341025352478, + "epoch": 0.33215903371917466, + "kl_loss": 0.26391905546188354, + "loss_ib": 0.010604320093989372, + "step": 1155 + }, + { + "ce_ib": 5.641459941864014, + "ce_orig": 0.8688373565673828, + "epoch": 0.33215903371917466, + "kl_loss": 0.18195831775665283, + "loss_ib": 0.007461042609065771, + "step": 1155 + }, + { + "ce_ib": 8.416781425476074, + "ce_orig": 1.1019142866134644, + "epoch": 0.3324466172981523, + "kl_loss": 0.22895824909210205, + "loss_ib": 0.010706363245844841, + "step": 1156 + }, + { + "ce_ib": 7.812315940856934, + "ce_orig": 1.1109238862991333, + "epoch": 0.3324466172981523, + "kl_loss": 0.2360575646162033, + "loss_ib": 0.010172891430556774, + "step": 1156 + }, + { + "ce_ib": 6.696736812591553, + "ce_orig": 0.809617280960083, + "epoch": 0.3324466172981523, + "kl_loss": 0.23302681744098663, + "loss_ib": 0.00902700424194336, + "step": 1156 + }, + { + "ce_ib": 5.31292724609375, + "ce_orig": 0.46756190061569214, + "epoch": 0.3324466172981523, + "kl_loss": 0.23464633524417877, + "loss_ib": 0.007659390568733215, + "step": 1156 + }, + { + "ce_ib": 4.710688591003418, + "ce_orig": 0.6011915802955627, + "epoch": 0.3327342008771299, + "kl_loss": 0.6861224174499512, + "loss_ib": 0.011571912094950676, + "step": 1157 + }, + { + "ce_ib": 10.64146614074707, + "ce_orig": 1.7183725833892822, + "epoch": 0.3327342008771299, + "kl_loss": 0.2469731569290161, + "loss_ib": 0.013111197389662266, + "step": 1157 + }, + { + "ce_ib": 4.336862564086914, + "ce_orig": 0.9087672233581543, + "epoch": 0.3327342008771299, + "kl_loss": 0.1872231662273407, + "loss_ib": 0.006209094543009996, + "step": 1157 + }, + { + "ce_ib": 7.800237655639648, + "ce_orig": 0.9339820146560669, + "epoch": 0.3327342008771299, + "kl_loss": 0.2661847770214081, + "loss_ib": 0.010462084785103798, + "step": 1157 + }, + { + "ce_ib": 9.811734199523926, + "ce_orig": 1.4699519872665405, + "epoch": 0.3330217844561076, + "kl_loss": 0.26861608028411865, + "loss_ib": 0.01249789446592331, + "step": 1158 + }, + { + "ce_ib": 9.553086280822754, + "ce_orig": 1.3622437715530396, + "epoch": 0.3330217844561076, + "kl_loss": 0.2595096826553345, + "loss_ib": 0.012148181907832623, + "step": 1158 + }, + { + "ce_ib": 3.6624906063079834, + "ce_orig": 0.40558406710624695, + "epoch": 0.3330217844561076, + "kl_loss": 0.4334304630756378, + "loss_ib": 0.007996794767677784, + "step": 1158 + }, + { + "ce_ib": 7.823740482330322, + "ce_orig": 0.8662386536598206, + "epoch": 0.3330217844561076, + "kl_loss": 0.30475282669067383, + "loss_ib": 0.01087126974016428, + "step": 1158 + }, + { + "ce_ib": 6.864394664764404, + "ce_orig": 0.6482590436935425, + "epoch": 0.3333093680350852, + "kl_loss": 0.30486205220222473, + "loss_ib": 0.009913015179336071, + "step": 1159 + }, + { + "ce_ib": 10.348648071289062, + "ce_orig": 1.443572998046875, + "epoch": 0.3333093680350852, + "kl_loss": 0.33501648902893066, + "loss_ib": 0.013698812574148178, + "step": 1159 + }, + { + "ce_ib": 9.253283500671387, + "ce_orig": 1.5353633165359497, + "epoch": 0.3333093680350852, + "kl_loss": 0.3021102845668793, + "loss_ib": 0.01227438636124134, + "step": 1159 + }, + { + "ce_ib": 8.694429397583008, + "ce_orig": 0.9382469058036804, + "epoch": 0.3333093680350852, + "kl_loss": 0.2970583438873291, + "loss_ib": 0.011665012687444687, + "step": 1159 + }, + { + "epoch": 0.33359695161406283, + "grad_norm": 0.10275700688362122, + "learning_rate": 9.830965649597455e-06, + "loss": 0.935, + "step": 1160 + }, + { + "ce_ib": 5.686638832092285, + "ce_orig": 0.6911338567733765, + "epoch": 0.33359695161406283, + "kl_loss": 0.34932830929756165, + "loss_ib": 0.009179921820759773, + "step": 1160 + }, + { + "ce_ib": 6.213655948638916, + "ce_orig": 0.41276422142982483, + "epoch": 0.33359695161406283, + "kl_loss": 0.29866302013397217, + "loss_ib": 0.009200286120176315, + "step": 1160 + }, + { + "ce_ib": 6.622434616088867, + "ce_orig": 1.0098650455474854, + "epoch": 0.33359695161406283, + "kl_loss": 0.3152294158935547, + "loss_ib": 0.009774728678166866, + "step": 1160 + }, + { + "ce_ib": 5.391828536987305, + "ce_orig": 0.8483104705810547, + "epoch": 0.33359695161406283, + "kl_loss": 0.29343515634536743, + "loss_ib": 0.008326179347932339, + "step": 1160 + }, + { + "ce_ib": 3.05668044090271, + "ce_orig": 0.5592519640922546, + "epoch": 0.33388453519304045, + "kl_loss": 0.1691984087228775, + "loss_ib": 0.004748664330691099, + "step": 1161 + }, + { + "ce_ib": 6.491842746734619, + "ce_orig": 0.8682400584220886, + "epoch": 0.33388453519304045, + "kl_loss": 0.2605954706668854, + "loss_ib": 0.00909779779613018, + "step": 1161 + }, + { + "ce_ib": 4.910883903503418, + "ce_orig": 0.66831374168396, + "epoch": 0.33388453519304045, + "kl_loss": 0.3180694878101349, + "loss_ib": 0.008091578260064125, + "step": 1161 + }, + { + "ce_ib": 7.609592437744141, + "ce_orig": 1.0245137214660645, + "epoch": 0.33388453519304045, + "kl_loss": 0.42994049191474915, + "loss_ib": 0.011908996850252151, + "step": 1161 + }, + { + "ce_ib": 3.5741307735443115, + "ce_orig": 0.677931547164917, + "epoch": 0.33417211877201813, + "kl_loss": 0.2711496949195862, + "loss_ib": 0.006285627838224173, + "step": 1162 + }, + { + "ce_ib": 8.588187217712402, + "ce_orig": 0.8838889002799988, + "epoch": 0.33417211877201813, + "kl_loss": 0.34474146366119385, + "loss_ib": 0.01203560084104538, + "step": 1162 + }, + { + "ce_ib": 8.509382247924805, + "ce_orig": 1.1210061311721802, + "epoch": 0.33417211877201813, + "kl_loss": 0.22604356706142426, + "loss_ib": 0.010769817978143692, + "step": 1162 + }, + { + "ce_ib": 7.751454830169678, + "ce_orig": 0.4753890633583069, + "epoch": 0.33417211877201813, + "kl_loss": 0.25493529438972473, + "loss_ib": 0.010300807654857635, + "step": 1162 + }, + { + "ce_ib": 5.955982685089111, + "ce_orig": 0.7514666318893433, + "epoch": 0.33445970235099576, + "kl_loss": 0.2573981285095215, + "loss_ib": 0.0085299639031291, + "step": 1163 + }, + { + "ce_ib": 6.996506690979004, + "ce_orig": 1.0626541376113892, + "epoch": 0.33445970235099576, + "kl_loss": 0.159402996301651, + "loss_ib": 0.008590537123382092, + "step": 1163 + }, + { + "ce_ib": 9.42298412322998, + "ce_orig": 1.2421761751174927, + "epoch": 0.33445970235099576, + "kl_loss": 0.18329188227653503, + "loss_ib": 0.011255903169512749, + "step": 1163 + }, + { + "ce_ib": 6.042232513427734, + "ce_orig": 0.43465274572372437, + "epoch": 0.33445970235099576, + "kl_loss": 0.2287701964378357, + "loss_ib": 0.008329934440553188, + "step": 1163 + }, + { + "ce_ib": 6.9496750831604, + "ce_orig": 0.44478708505630493, + "epoch": 0.3347472859299734, + "kl_loss": 0.2602609097957611, + "loss_ib": 0.009552284143865108, + "step": 1164 + }, + { + "ce_ib": 5.079557418823242, + "ce_orig": 0.7076042294502258, + "epoch": 0.3347472859299734, + "kl_loss": 0.25082868337631226, + "loss_ib": 0.007587844040244818, + "step": 1164 + }, + { + "ce_ib": 9.466922760009766, + "ce_orig": 1.1378250122070312, + "epoch": 0.3347472859299734, + "kl_loss": 0.30516183376312256, + "loss_ib": 0.012518541887402534, + "step": 1164 + }, + { + "ce_ib": 8.919095993041992, + "ce_orig": 1.0271871089935303, + "epoch": 0.3347472859299734, + "kl_loss": 0.25492143630981445, + "loss_ib": 0.011468309909105301, + "step": 1164 + }, + { + "epoch": 0.33503486950895106, + "grad_norm": 0.10710824280977249, + "learning_rate": 9.828958932996483e-06, + "loss": 0.8727, + "step": 1165 + }, + { + "ce_ib": 8.745596885681152, + "ce_orig": 1.240087628364563, + "epoch": 0.33503486950895106, + "kl_loss": 0.21713513135910034, + "loss_ib": 0.010916948318481445, + "step": 1165 + }, + { + "ce_ib": 8.954095840454102, + "ce_orig": 1.4257614612579346, + "epoch": 0.33503486950895106, + "kl_loss": 0.2196533977985382, + "loss_ib": 0.011150629259645939, + "step": 1165 + }, + { + "ce_ib": 7.628279685974121, + "ce_orig": 1.0557085275650024, + "epoch": 0.33503486950895106, + "kl_loss": 0.27045494318008423, + "loss_ib": 0.01033282931894064, + "step": 1165 + }, + { + "ce_ib": 6.705820083618164, + "ce_orig": 0.17608341574668884, + "epoch": 0.33503486950895106, + "kl_loss": 0.46943503618240356, + "loss_ib": 0.01140016969293356, + "step": 1165 + }, + { + "ce_ib": 7.123849868774414, + "ce_orig": 0.4561106562614441, + "epoch": 0.3353224530879287, + "kl_loss": 0.23111434280872345, + "loss_ib": 0.009434993378818035, + "step": 1166 + }, + { + "ce_ib": 7.194637298583984, + "ce_orig": 0.5614644885063171, + "epoch": 0.3353224530879287, + "kl_loss": 0.35345301032066345, + "loss_ib": 0.010729167610406876, + "step": 1166 + }, + { + "ce_ib": 4.771577835083008, + "ce_orig": 0.8040262460708618, + "epoch": 0.3353224530879287, + "kl_loss": 0.32398122549057007, + "loss_ib": 0.008011389523744583, + "step": 1166 + }, + { + "ce_ib": 9.567709922790527, + "ce_orig": 1.383070707321167, + "epoch": 0.3353224530879287, + "kl_loss": 0.27471500635147095, + "loss_ib": 0.012314860709011555, + "step": 1166 + }, + { + "ce_ib": 6.410955429077148, + "ce_orig": 0.8717294335365295, + "epoch": 0.3356100366669063, + "kl_loss": 0.25556251406669617, + "loss_ib": 0.008966580033302307, + "step": 1167 + }, + { + "ce_ib": 3.2919697761535645, + "ce_orig": 0.35355857014656067, + "epoch": 0.3356100366669063, + "kl_loss": 0.2551717758178711, + "loss_ib": 0.00584368733689189, + "step": 1167 + }, + { + "ce_ib": 7.970048904418945, + "ce_orig": 1.257944941520691, + "epoch": 0.3356100366669063, + "kl_loss": 0.25095152854919434, + "loss_ib": 0.010479564778506756, + "step": 1167 + }, + { + "ce_ib": 8.489374160766602, + "ce_orig": 1.0214661359786987, + "epoch": 0.3356100366669063, + "kl_loss": 0.23575599491596222, + "loss_ib": 0.010846934281289577, + "step": 1167 + }, + { + "ce_ib": 7.049896240234375, + "ce_orig": 1.0765953063964844, + "epoch": 0.335897620245884, + "kl_loss": 0.5470938682556152, + "loss_ib": 0.012520834803581238, + "step": 1168 + }, + { + "ce_ib": 8.322129249572754, + "ce_orig": 1.0483063459396362, + "epoch": 0.335897620245884, + "kl_loss": 0.2858157753944397, + "loss_ib": 0.011180286295711994, + "step": 1168 + }, + { + "ce_ib": 8.06887435913086, + "ce_orig": 1.226811170578003, + "epoch": 0.335897620245884, + "kl_loss": 0.2247699499130249, + "loss_ib": 0.010316574014723301, + "step": 1168 + }, + { + "ce_ib": 4.309444904327393, + "ce_orig": 0.39025741815567017, + "epoch": 0.335897620245884, + "kl_loss": 0.24306048452854156, + "loss_ib": 0.006740049459040165, + "step": 1168 + }, + { + "ce_ib": 8.550530433654785, + "ce_orig": 0.5781838893890381, + "epoch": 0.3361852038248616, + "kl_loss": 0.4174377918243408, + "loss_ib": 0.01272490806877613, + "step": 1169 + }, + { + "ce_ib": 4.730093479156494, + "ce_orig": 0.5063918232917786, + "epoch": 0.3361852038248616, + "kl_loss": 0.2270166277885437, + "loss_ib": 0.007000259589403868, + "step": 1169 + }, + { + "ce_ib": 5.606704235076904, + "ce_orig": 0.7304767966270447, + "epoch": 0.3361852038248616, + "kl_loss": 0.2581622898578644, + "loss_ib": 0.00818832777440548, + "step": 1169 + }, + { + "ce_ib": 6.313033103942871, + "ce_orig": 0.6566088795661926, + "epoch": 0.3361852038248616, + "kl_loss": 0.22507020831108093, + "loss_ib": 0.008563735522329807, + "step": 1169 + }, + { + "epoch": 0.33647278740383924, + "grad_norm": 0.0896129310131073, + "learning_rate": 9.826940582312617e-06, + "loss": 0.868, + "step": 1170 + }, + { + "ce_ib": 8.58356761932373, + "ce_orig": 1.0399962663650513, + "epoch": 0.33647278740383924, + "kl_loss": 0.2707816958427429, + "loss_ib": 0.01129138469696045, + "step": 1170 + }, + { + "ce_ib": 5.062176704406738, + "ce_orig": 1.0545399188995361, + "epoch": 0.33647278740383924, + "kl_loss": 0.24855771660804749, + "loss_ib": 0.0075477538630366325, + "step": 1170 + }, + { + "ce_ib": 7.816201210021973, + "ce_orig": 0.6892129182815552, + "epoch": 0.33647278740383924, + "kl_loss": 0.3450187146663666, + "loss_ib": 0.011266388930380344, + "step": 1170 + }, + { + "ce_ib": 5.691289901733398, + "ce_orig": 0.7200804352760315, + "epoch": 0.33647278740383924, + "kl_loss": 0.28832268714904785, + "loss_ib": 0.008574516512453556, + "step": 1170 + }, + { + "ce_ib": 4.545563697814941, + "ce_orig": 0.3753531873226166, + "epoch": 0.33676037098281686, + "kl_loss": 0.1820743829011917, + "loss_ib": 0.006366307381540537, + "step": 1171 + }, + { + "ce_ib": 6.307059288024902, + "ce_orig": 0.7057124376296997, + "epoch": 0.33676037098281686, + "kl_loss": 0.18096861243247986, + "loss_ib": 0.00811674538999796, + "step": 1171 + }, + { + "ce_ib": 8.955676078796387, + "ce_orig": 1.7811617851257324, + "epoch": 0.33676037098281686, + "kl_loss": 0.24952708184719086, + "loss_ib": 0.01145094633102417, + "step": 1171 + }, + { + "ce_ib": 6.340480327606201, + "ce_orig": 0.8761670589447021, + "epoch": 0.33676037098281686, + "kl_loss": 0.20854952931404114, + "loss_ib": 0.008425976149737835, + "step": 1171 + }, + { + "ce_ib": 7.614955902099609, + "ce_orig": 1.0214663743972778, + "epoch": 0.33704795456179454, + "kl_loss": 0.34129971265792847, + "loss_ib": 0.011027953587472439, + "step": 1172 + }, + { + "ce_ib": 4.446689128875732, + "ce_orig": 0.5307155251502991, + "epoch": 0.33704795456179454, + "kl_loss": 0.2831967771053314, + "loss_ib": 0.0072786565870046616, + "step": 1172 + }, + { + "ce_ib": 7.496953964233398, + "ce_orig": 1.0684783458709717, + "epoch": 0.33704795456179454, + "kl_loss": 0.26136043667793274, + "loss_ib": 0.01011055801063776, + "step": 1172 + }, + { + "ce_ib": 9.910701751708984, + "ce_orig": 0.9857996702194214, + "epoch": 0.33704795456179454, + "kl_loss": 0.2773677706718445, + "loss_ib": 0.012684379704296589, + "step": 1172 + }, + { + "ce_ib": 7.447598457336426, + "ce_orig": 0.7740610837936401, + "epoch": 0.33733553814077216, + "kl_loss": 0.28991585969924927, + "loss_ib": 0.010346757248044014, + "step": 1173 + }, + { + "ce_ib": 8.320796012878418, + "ce_orig": 0.91221684217453, + "epoch": 0.33733553814077216, + "kl_loss": 0.28020790219306946, + "loss_ib": 0.011122874915599823, + "step": 1173 + }, + { + "ce_ib": 11.175503730773926, + "ce_orig": 1.6308225393295288, + "epoch": 0.33733553814077216, + "kl_loss": 0.34757018089294434, + "loss_ib": 0.014651205390691757, + "step": 1173 + }, + { + "ce_ib": 6.0726728439331055, + "ce_orig": 0.5518878698348999, + "epoch": 0.33733553814077216, + "kl_loss": 0.25517135858535767, + "loss_ib": 0.008624386973679066, + "step": 1173 + }, + { + "ce_ib": 6.273822784423828, + "ce_orig": 0.7508142590522766, + "epoch": 0.3376231217197498, + "kl_loss": 0.3808406591415405, + "loss_ib": 0.010082229040563107, + "step": 1174 + }, + { + "ce_ib": 7.740455150604248, + "ce_orig": 0.8385905623435974, + "epoch": 0.3376231217197498, + "kl_loss": 0.23284657299518585, + "loss_ib": 0.010068920440971851, + "step": 1174 + }, + { + "ce_ib": 4.734543323516846, + "ce_orig": 0.69203120470047, + "epoch": 0.3376231217197498, + "kl_loss": 0.33553797006607056, + "loss_ib": 0.008089922368526459, + "step": 1174 + }, + { + "ce_ib": 8.494558334350586, + "ce_orig": 1.1173644065856934, + "epoch": 0.3376231217197498, + "kl_loss": 0.32031548023223877, + "loss_ib": 0.011697713285684586, + "step": 1174 + }, + { + "epoch": 0.33791070529872747, + "grad_norm": 0.09744829684495926, + "learning_rate": 9.824910602408528e-06, + "loss": 0.931, + "step": 1175 + }, + { + "ce_ib": 6.686302661895752, + "ce_orig": 0.8108925819396973, + "epoch": 0.33791070529872747, + "kl_loss": 0.34382033348083496, + "loss_ib": 0.010124506428837776, + "step": 1175 + }, + { + "ce_ib": 5.286237716674805, + "ce_orig": 0.3699839115142822, + "epoch": 0.33791070529872747, + "kl_loss": 0.16003935039043427, + "loss_ib": 0.00688663125038147, + "step": 1175 + }, + { + "ce_ib": 10.233742713928223, + "ce_orig": 1.4519670009613037, + "epoch": 0.33791070529872747, + "kl_loss": 0.25252097845077515, + "loss_ib": 0.012758953496813774, + "step": 1175 + }, + { + "ce_ib": 8.468210220336914, + "ce_orig": 1.0720109939575195, + "epoch": 0.33791070529872747, + "kl_loss": 0.38942861557006836, + "loss_ib": 0.012362496927380562, + "step": 1175 + }, + { + "ce_ib": 5.240467548370361, + "ce_orig": 0.8880922198295593, + "epoch": 0.3381982888777051, + "kl_loss": 0.22541771829128265, + "loss_ib": 0.007494644727557898, + "step": 1176 + }, + { + "ce_ib": 7.298333644866943, + "ce_orig": 0.511344850063324, + "epoch": 0.3381982888777051, + "kl_loss": 0.31986504793167114, + "loss_ib": 0.010496983304619789, + "step": 1176 + }, + { + "ce_ib": 7.720901012420654, + "ce_orig": 1.3299001455307007, + "epoch": 0.3381982888777051, + "kl_loss": 0.18659347295761108, + "loss_ib": 0.009586836211383343, + "step": 1176 + }, + { + "ce_ib": 10.655141830444336, + "ce_orig": 1.165802240371704, + "epoch": 0.3381982888777051, + "kl_loss": 0.2347698211669922, + "loss_ib": 0.013002839870750904, + "step": 1176 + }, + { + "ce_ib": 4.5789265632629395, + "ce_orig": 0.5766971707344055, + "epoch": 0.3384858724566827, + "kl_loss": 0.19134168326854706, + "loss_ib": 0.00649234326556325, + "step": 1177 + }, + { + "ce_ib": 4.78309440612793, + "ce_orig": 0.6634275317192078, + "epoch": 0.3384858724566827, + "kl_loss": 0.21720531582832336, + "loss_ib": 0.006955147720873356, + "step": 1177 + }, + { + "ce_ib": 7.196147441864014, + "ce_orig": 0.7668088674545288, + "epoch": 0.3384858724566827, + "kl_loss": 0.41518035531044006, + "loss_ib": 0.01134795043617487, + "step": 1177 + }, + { + "ce_ib": 4.836219310760498, + "ce_orig": 0.33054235577583313, + "epoch": 0.3384858724566827, + "kl_loss": 0.22906270623207092, + "loss_ib": 0.0071268463507294655, + "step": 1177 + }, + { + "ce_ib": 5.0311455726623535, + "ce_orig": 0.5774480104446411, + "epoch": 0.33877345603566034, + "kl_loss": 0.24220524728298187, + "loss_ib": 0.007453198079019785, + "step": 1178 + }, + { + "ce_ib": 11.710144996643066, + "ce_orig": 2.045793056488037, + "epoch": 0.33877345603566034, + "kl_loss": 0.3105084300041199, + "loss_ib": 0.014815229922533035, + "step": 1178 + }, + { + "ce_ib": 8.100988388061523, + "ce_orig": 0.9709780812263489, + "epoch": 0.33877345603566034, + "kl_loss": 0.18915799260139465, + "loss_ib": 0.00999256782233715, + "step": 1178 + }, + { + "ce_ib": 5.740729331970215, + "ce_orig": 0.48105230927467346, + "epoch": 0.33877345603566034, + "kl_loss": 0.3077582120895386, + "loss_ib": 0.008818311616778374, + "step": 1178 + }, + { + "ce_ib": 8.696881294250488, + "ce_orig": 1.047848105430603, + "epoch": 0.339061039614638, + "kl_loss": 0.25821876525878906, + "loss_ib": 0.011279068887233734, + "step": 1179 + }, + { + "ce_ib": 8.131317138671875, + "ce_orig": 0.9292140603065491, + "epoch": 0.339061039614638, + "kl_loss": 0.2904861569404602, + "loss_ib": 0.011036179028451443, + "step": 1179 + }, + { + "ce_ib": 5.153392314910889, + "ce_orig": 0.8021326065063477, + "epoch": 0.339061039614638, + "kl_loss": 0.22328916192054749, + "loss_ib": 0.007386283483356237, + "step": 1179 + }, + { + "ce_ib": 6.696101665496826, + "ce_orig": 1.0752699375152588, + "epoch": 0.339061039614638, + "kl_loss": 0.2535207271575928, + "loss_ib": 0.009231309406459332, + "step": 1179 + }, + { + "epoch": 0.33934862319361564, + "grad_norm": 0.10896741598844528, + "learning_rate": 9.822868998174914e-06, + "loss": 0.9011, + "step": 1180 + }, + { + "ce_ib": 7.786527156829834, + "ce_orig": 0.8277803659439087, + "epoch": 0.33934862319361564, + "kl_loss": 0.32757166028022766, + "loss_ib": 0.011062243953347206, + "step": 1180 + }, + { + "ce_ib": 5.486213684082031, + "ce_orig": 0.6657846570014954, + "epoch": 0.33934862319361564, + "kl_loss": 0.27369385957717896, + "loss_ib": 0.008223151788115501, + "step": 1180 + }, + { + "ce_ib": 6.703554153442383, + "ce_orig": 0.8262245059013367, + "epoch": 0.33934862319361564, + "kl_loss": 0.28381454944610596, + "loss_ib": 0.009541699662804604, + "step": 1180 + }, + { + "ce_ib": 4.37410306930542, + "ce_orig": 0.6137135028839111, + "epoch": 0.33934862319361564, + "kl_loss": 0.304335355758667, + "loss_ib": 0.007417456712573767, + "step": 1180 + }, + { + "ce_ib": 8.273782730102539, + "ce_orig": 1.3585046529769897, + "epoch": 0.33963620677259326, + "kl_loss": 0.26714888215065, + "loss_ib": 0.010945270769298077, + "step": 1181 + }, + { + "ce_ib": 5.653596878051758, + "ce_orig": 0.46559175848960876, + "epoch": 0.33963620677259326, + "kl_loss": 0.2943084239959717, + "loss_ib": 0.00859668105840683, + "step": 1181 + }, + { + "ce_ib": 9.782648086547852, + "ce_orig": 1.2866759300231934, + "epoch": 0.33963620677259326, + "kl_loss": 0.3248477578163147, + "loss_ib": 0.01303112506866455, + "step": 1181 + }, + { + "ce_ib": 4.714155197143555, + "ce_orig": 0.6941638588905334, + "epoch": 0.33963620677259326, + "kl_loss": 0.21619562804698944, + "loss_ib": 0.006876111496239901, + "step": 1181 + }, + { + "ce_ib": 6.625691890716553, + "ce_orig": 0.5445870161056519, + "epoch": 0.33992379035157094, + "kl_loss": 0.23607990145683289, + "loss_ib": 0.008986490778625011, + "step": 1182 + }, + { + "ce_ib": 10.256919860839844, + "ce_orig": 1.1101305484771729, + "epoch": 0.33992379035157094, + "kl_loss": 0.4181860089302063, + "loss_ib": 0.014438779093325138, + "step": 1182 + }, + { + "ce_ib": 9.138778686523438, + "ce_orig": 1.4723149538040161, + "epoch": 0.33992379035157094, + "kl_loss": 0.33457493782043457, + "loss_ib": 0.012484529055655003, + "step": 1182 + }, + { + "ce_ib": 10.66258430480957, + "ce_orig": 1.4558533430099487, + "epoch": 0.33992379035157094, + "kl_loss": 0.19665685296058655, + "loss_ib": 0.012629152275621891, + "step": 1182 + }, + { + "ce_ib": 6.267303466796875, + "ce_orig": 0.7697774171829224, + "epoch": 0.34021137393054857, + "kl_loss": 0.5935349464416504, + "loss_ib": 0.012202654033899307, + "step": 1183 + }, + { + "ce_ib": 5.881093502044678, + "ce_orig": 0.4210011065006256, + "epoch": 0.34021137393054857, + "kl_loss": 0.25692081451416016, + "loss_ib": 0.008450301364064217, + "step": 1183 + }, + { + "ce_ib": 7.419436454772949, + "ce_orig": 1.293154239654541, + "epoch": 0.34021137393054857, + "kl_loss": 0.26566553115844727, + "loss_ib": 0.010076090693473816, + "step": 1183 + }, + { + "ce_ib": 8.447088241577148, + "ce_orig": 1.1318758726119995, + "epoch": 0.34021137393054857, + "kl_loss": 0.26372429728507996, + "loss_ib": 0.011084331199526787, + "step": 1183 + }, + { + "ce_ib": 6.175292491912842, + "ce_orig": 0.9468995928764343, + "epoch": 0.3404989575095262, + "kl_loss": 0.23682790994644165, + "loss_ib": 0.008543571457266808, + "step": 1184 + }, + { + "ce_ib": 7.1323466300964355, + "ce_orig": 0.8490071892738342, + "epoch": 0.3404989575095262, + "kl_loss": 0.2767108678817749, + "loss_ib": 0.00989945512264967, + "step": 1184 + }, + { + "ce_ib": 6.375570774078369, + "ce_orig": 0.5345741510391235, + "epoch": 0.3404989575095262, + "kl_loss": 0.41314756870269775, + "loss_ib": 0.010507047176361084, + "step": 1184 + }, + { + "ce_ib": 7.322238445281982, + "ce_orig": 1.1370083093643188, + "epoch": 0.3404989575095262, + "kl_loss": 0.2956608235836029, + "loss_ib": 0.010278847068548203, + "step": 1184 + }, + { + "epoch": 0.34078654108850387, + "grad_norm": 0.10617753863334656, + "learning_rate": 9.820815774530473e-06, + "loss": 0.9171, + "step": 1185 + }, + { + "ce_ib": 7.385372161865234, + "ce_orig": 0.7977047562599182, + "epoch": 0.34078654108850387, + "kl_loss": 0.2396102249622345, + "loss_ib": 0.00978147517889738, + "step": 1185 + }, + { + "ce_ib": 5.597156047821045, + "ce_orig": 0.7299818992614746, + "epoch": 0.34078654108850387, + "kl_loss": 0.198746919631958, + "loss_ib": 0.007584625389426947, + "step": 1185 + }, + { + "ce_ib": 7.188481330871582, + "ce_orig": 1.3601056337356567, + "epoch": 0.34078654108850387, + "kl_loss": 0.44576168060302734, + "loss_ib": 0.011646098457276821, + "step": 1185 + }, + { + "ce_ib": 6.3431267738342285, + "ce_orig": 0.7139648795127869, + "epoch": 0.34078654108850387, + "kl_loss": 0.3590855896472931, + "loss_ib": 0.009933982975780964, + "step": 1185 + }, + { + "ce_ib": 8.162310600280762, + "ce_orig": 0.6909653544425964, + "epoch": 0.3410741246674815, + "kl_loss": 0.2678564786911011, + "loss_ib": 0.010840876027941704, + "step": 1186 + }, + { + "ce_ib": 8.19145393371582, + "ce_orig": 0.899401843547821, + "epoch": 0.3410741246674815, + "kl_loss": 0.2756710350513458, + "loss_ib": 0.010948164388537407, + "step": 1186 + }, + { + "ce_ib": 5.634396076202393, + "ce_orig": 0.8212024569511414, + "epoch": 0.3410741246674815, + "kl_loss": 0.19679471850395203, + "loss_ib": 0.007602343335747719, + "step": 1186 + }, + { + "ce_ib": 8.251862525939941, + "ce_orig": 1.219065546989441, + "epoch": 0.3410741246674815, + "kl_loss": 0.28769683837890625, + "loss_ib": 0.011128830723464489, + "step": 1186 + }, + { + "ce_ib": 9.134533882141113, + "ce_orig": 1.1667752265930176, + "epoch": 0.3413617082464591, + "kl_loss": 0.23589232563972473, + "loss_ib": 0.011493457481265068, + "step": 1187 + }, + { + "ce_ib": 1.7729456424713135, + "ce_orig": 0.17611609399318695, + "epoch": 0.3413617082464591, + "kl_loss": 0.6091700792312622, + "loss_ib": 0.007864645682275295, + "step": 1187 + }, + { + "ce_ib": 5.996875286102295, + "ce_orig": 0.7136014103889465, + "epoch": 0.3413617082464591, + "kl_loss": 0.2414180040359497, + "loss_ib": 0.00841105543076992, + "step": 1187 + }, + { + "ce_ib": 8.649369239807129, + "ce_orig": 1.5905383825302124, + "epoch": 0.3413617082464591, + "kl_loss": 0.42170989513397217, + "loss_ib": 0.012866468168795109, + "step": 1187 + }, + { + "ce_ib": 6.070381164550781, + "ce_orig": 0.599243700504303, + "epoch": 0.34164929182543674, + "kl_loss": 0.5712750554084778, + "loss_ib": 0.011783132329583168, + "step": 1188 + }, + { + "ce_ib": 7.615642070770264, + "ce_orig": 0.8723514080047607, + "epoch": 0.34164929182543674, + "kl_loss": 0.31852707266807556, + "loss_ib": 0.010800912044942379, + "step": 1188 + }, + { + "ce_ib": 6.003516674041748, + "ce_orig": 0.6859092712402344, + "epoch": 0.34164929182543674, + "kl_loss": 0.21503300964832306, + "loss_ib": 0.008153846487402916, + "step": 1188 + }, + { + "ce_ib": 5.837275981903076, + "ce_orig": 0.5255218744277954, + "epoch": 0.34164929182543674, + "kl_loss": 0.33323538303375244, + "loss_ib": 0.009169629774987698, + "step": 1188 + }, + { + "ce_ib": 7.077301502227783, + "ce_orig": 0.5584138631820679, + "epoch": 0.3419368754044144, + "kl_loss": 0.3154032528400421, + "loss_ib": 0.010231333784759045, + "step": 1189 + }, + { + "ce_ib": 4.683353900909424, + "ce_orig": 1.0100295543670654, + "epoch": 0.3419368754044144, + "kl_loss": 0.23839840292930603, + "loss_ib": 0.0070673380978405476, + "step": 1189 + }, + { + "ce_ib": 6.981997489929199, + "ce_orig": 0.8947463035583496, + "epoch": 0.3419368754044144, + "kl_loss": 0.30623385310173035, + "loss_ib": 0.010044336318969727, + "step": 1189 + }, + { + "ce_ib": 8.45647144317627, + "ce_orig": 1.307883858680725, + "epoch": 0.3419368754044144, + "kl_loss": 0.3093627393245697, + "loss_ib": 0.011550098657608032, + "step": 1189 + }, + { + "epoch": 0.34222445898339204, + "grad_norm": 0.09876321256160736, + "learning_rate": 9.818750936421894e-06, + "loss": 0.9088, + "step": 1190 + }, + { + "ce_ib": 9.657329559326172, + "ce_orig": 1.32797110080719, + "epoch": 0.34222445898339204, + "kl_loss": 0.2832852602005005, + "loss_ib": 0.012490181252360344, + "step": 1190 + }, + { + "ce_ib": 5.555445194244385, + "ce_orig": 0.38129884004592896, + "epoch": 0.34222445898339204, + "kl_loss": 0.37391650676727295, + "loss_ib": 0.009294610470533371, + "step": 1190 + }, + { + "ce_ib": 5.386440753936768, + "ce_orig": 0.8913825750350952, + "epoch": 0.34222445898339204, + "kl_loss": 0.4977033734321594, + "loss_ib": 0.010363473556935787, + "step": 1190 + }, + { + "ce_ib": 6.759226322174072, + "ce_orig": 0.8466914296150208, + "epoch": 0.34222445898339204, + "kl_loss": 0.29842573404312134, + "loss_ib": 0.009743483737111092, + "step": 1190 + }, + { + "ce_ib": 5.570773601531982, + "ce_orig": 0.5707303285598755, + "epoch": 0.34251204256236967, + "kl_loss": 0.35615256428718567, + "loss_ib": 0.009132299572229385, + "step": 1191 + }, + { + "ce_ib": 5.576659202575684, + "ce_orig": 0.28790995478630066, + "epoch": 0.34251204256236967, + "kl_loss": 0.45576032996177673, + "loss_ib": 0.010134262964129448, + "step": 1191 + }, + { + "ce_ib": 11.361724853515625, + "ce_orig": 1.6689220666885376, + "epoch": 0.34251204256236967, + "kl_loss": 0.3620713949203491, + "loss_ib": 0.014982438646256924, + "step": 1191 + }, + { + "ce_ib": 8.733168601989746, + "ce_orig": 0.7460479736328125, + "epoch": 0.34251204256236967, + "kl_loss": 0.3041139245033264, + "loss_ib": 0.011774308048188686, + "step": 1191 + }, + { + "ce_ib": 14.115975379943848, + "ce_orig": 0.7688201069831848, + "epoch": 0.34279962614134735, + "kl_loss": 0.6082284450531006, + "loss_ib": 0.020198259502649307, + "step": 1192 + }, + { + "ce_ib": 5.99207878112793, + "ce_orig": 0.8545339107513428, + "epoch": 0.34279962614134735, + "kl_loss": 0.21773307025432587, + "loss_ib": 0.008169409818947315, + "step": 1192 + }, + { + "ce_ib": 3.9453814029693604, + "ce_orig": 0.5740529298782349, + "epoch": 0.34279962614134735, + "kl_loss": 0.2615795135498047, + "loss_ib": 0.006561176851391792, + "step": 1192 + }, + { + "ce_ib": 6.691573619842529, + "ce_orig": 0.8405234813690186, + "epoch": 0.34279962614134735, + "kl_loss": 0.3192151188850403, + "loss_ib": 0.009883725084364414, + "step": 1192 + }, + { + "ce_ib": 6.382562637329102, + "ce_orig": 0.7000412940979004, + "epoch": 0.34308720972032497, + "kl_loss": 0.3458077907562256, + "loss_ib": 0.009840640239417553, + "step": 1193 + }, + { + "ce_ib": 5.735156059265137, + "ce_orig": 0.5913882851600647, + "epoch": 0.34308720972032497, + "kl_loss": 0.21425525844097137, + "loss_ib": 0.007877708412706852, + "step": 1193 + }, + { + "ce_ib": 3.7972733974456787, + "ce_orig": 0.3964553475379944, + "epoch": 0.34308720972032497, + "kl_loss": 0.30395567417144775, + "loss_ib": 0.006836830172687769, + "step": 1193 + }, + { + "ce_ib": 8.773690223693848, + "ce_orig": 1.267960786819458, + "epoch": 0.34308720972032497, + "kl_loss": 0.28563830256462097, + "loss_ib": 0.011630073189735413, + "step": 1193 + }, + { + "ce_ib": 8.8164701461792, + "ce_orig": 1.25728440284729, + "epoch": 0.3433747932993026, + "kl_loss": 0.2996102273464203, + "loss_ib": 0.011812572367489338, + "step": 1194 + }, + { + "ce_ib": 6.719943523406982, + "ce_orig": 0.874944806098938, + "epoch": 0.3433747932993026, + "kl_loss": 0.19839473068714142, + "loss_ib": 0.008703891187906265, + "step": 1194 + }, + { + "ce_ib": 9.051475524902344, + "ce_orig": 1.5228703022003174, + "epoch": 0.3433747932993026, + "kl_loss": 0.2388281524181366, + "loss_ib": 0.01143975742161274, + "step": 1194 + }, + { + "ce_ib": 6.544286251068115, + "ce_orig": 0.8071295022964478, + "epoch": 0.3433747932993026, + "kl_loss": 0.21565112471580505, + "loss_ib": 0.008700797334313393, + "step": 1194 + }, + { + "epoch": 0.3436623768782803, + "grad_norm": 0.10329017043113708, + "learning_rate": 9.816674488823855e-06, + "loss": 0.943, + "step": 1195 + }, + { + "ce_ib": 6.421690940856934, + "ce_orig": 0.5300900340080261, + "epoch": 0.3436623768782803, + "kl_loss": 0.238215833902359, + "loss_ib": 0.00880384910851717, + "step": 1195 + }, + { + "ce_ib": 3.6498396396636963, + "ce_orig": 0.66141277551651, + "epoch": 0.3436623768782803, + "kl_loss": 0.23390421271324158, + "loss_ib": 0.005988881457597017, + "step": 1195 + }, + { + "ce_ib": 9.907021522521973, + "ce_orig": 0.8684224486351013, + "epoch": 0.3436623768782803, + "kl_loss": 0.26282399892807007, + "loss_ib": 0.012535261921584606, + "step": 1195 + }, + { + "ce_ib": 5.8363356590271, + "ce_orig": 0.5741661787033081, + "epoch": 0.3436623768782803, + "kl_loss": 0.239915132522583, + "loss_ib": 0.008235487155616283, + "step": 1195 + }, + { + "ce_ib": 5.6322784423828125, + "ce_orig": 0.797076404094696, + "epoch": 0.3439499604572579, + "kl_loss": 0.2528030276298523, + "loss_ib": 0.008160308003425598, + "step": 1196 + }, + { + "ce_ib": 7.733359336853027, + "ce_orig": 0.8631399869918823, + "epoch": 0.3439499604572579, + "kl_loss": 0.32714614272117615, + "loss_ib": 0.011004820466041565, + "step": 1196 + }, + { + "ce_ib": 7.169785499572754, + "ce_orig": 0.9283391237258911, + "epoch": 0.3439499604572579, + "kl_loss": 0.19584496319293976, + "loss_ib": 0.009128234349191189, + "step": 1196 + }, + { + "ce_ib": 4.730182647705078, + "ce_orig": 0.30926573276519775, + "epoch": 0.3439499604572579, + "kl_loss": 0.2444748878479004, + "loss_ib": 0.007174931466579437, + "step": 1196 + }, + { + "ce_ib": 4.997303485870361, + "ce_orig": 0.5115468502044678, + "epoch": 0.3442375440362355, + "kl_loss": 0.21230413019657135, + "loss_ib": 0.0071203443221747875, + "step": 1197 + }, + { + "ce_ib": 4.934088230133057, + "ce_orig": 0.5581589937210083, + "epoch": 0.3442375440362355, + "kl_loss": 0.19467350840568542, + "loss_ib": 0.00688082305714488, + "step": 1197 + }, + { + "ce_ib": 6.582386016845703, + "ce_orig": 0.543520987033844, + "epoch": 0.3442375440362355, + "kl_loss": 0.29340246319770813, + "loss_ib": 0.009516410529613495, + "step": 1197 + }, + { + "ce_ib": 6.0954508781433105, + "ce_orig": 0.7179067730903625, + "epoch": 0.3442375440362355, + "kl_loss": 0.20862382650375366, + "loss_ib": 0.00818168930709362, + "step": 1197 + }, + { + "ce_ib": 8.645586013793945, + "ce_orig": 1.1063332557678223, + "epoch": 0.34452512761521314, + "kl_loss": 0.2973189353942871, + "loss_ib": 0.011618776246905327, + "step": 1198 + }, + { + "ce_ib": 7.050906181335449, + "ce_orig": 1.1842377185821533, + "epoch": 0.34452512761521314, + "kl_loss": 0.2546880841255188, + "loss_ib": 0.009597786702215672, + "step": 1198 + }, + { + "ce_ib": 6.744875431060791, + "ce_orig": 0.6672945618629456, + "epoch": 0.34452512761521314, + "kl_loss": 0.29981529712677, + "loss_ib": 0.009743028320372105, + "step": 1198 + }, + { + "ce_ib": 9.70975112915039, + "ce_orig": 1.2213077545166016, + "epoch": 0.34452512761521314, + "kl_loss": 0.29038137197494507, + "loss_ib": 0.012613564729690552, + "step": 1198 + }, + { + "ce_ib": 6.745275497436523, + "ce_orig": 0.7089959383010864, + "epoch": 0.3448127111941908, + "kl_loss": 0.22385196387767792, + "loss_ib": 0.008983795531094074, + "step": 1199 + }, + { + "ce_ib": 6.717109680175781, + "ce_orig": 0.7282753586769104, + "epoch": 0.3448127111941908, + "kl_loss": 0.4085049331188202, + "loss_ib": 0.010802159085869789, + "step": 1199 + }, + { + "ce_ib": 8.308910369873047, + "ce_orig": 0.9311578273773193, + "epoch": 0.3448127111941908, + "kl_loss": 0.2200586348772049, + "loss_ib": 0.010509496554732323, + "step": 1199 + }, + { + "ce_ib": 6.919417858123779, + "ce_orig": 0.981727659702301, + "epoch": 0.3448127111941908, + "kl_loss": 0.3038212060928345, + "loss_ib": 0.009957630187273026, + "step": 1199 + }, + { + "epoch": 0.34510029477316845, + "grad_norm": 0.11079682409763336, + "learning_rate": 9.814586436738998e-06, + "loss": 0.8328, + "step": 1200 + }, + { + "ce_ib": 2.4315686225891113, + "ce_orig": 0.35374554991722107, + "epoch": 0.34510029477316845, + "kl_loss": 0.5890471935272217, + "loss_ib": 0.008322040550410748, + "step": 1200 + }, + { + "ce_ib": 3.5685207843780518, + "ce_orig": 0.49777722358703613, + "epoch": 0.34510029477316845, + "kl_loss": 0.19487112760543823, + "loss_ib": 0.005517232231795788, + "step": 1200 + }, + { + "ce_ib": 2.2421321868896484, + "ce_orig": 0.22677947580814362, + "epoch": 0.34510029477316845, + "kl_loss": 0.26925888657569885, + "loss_ib": 0.004934720695018768, + "step": 1200 + }, + { + "ce_ib": 3.7340574264526367, + "ce_orig": 0.6081327199935913, + "epoch": 0.34510029477316845, + "kl_loss": 0.19507867097854614, + "loss_ib": 0.005684844218194485, + "step": 1200 + }, + { + "ce_ib": 6.920347213745117, + "ce_orig": 0.8367874026298523, + "epoch": 0.34538787835214607, + "kl_loss": 0.3019488453865051, + "loss_ib": 0.009939835406839848, + "step": 1201 + }, + { + "ce_ib": 5.684332847595215, + "ce_orig": 1.077157735824585, + "epoch": 0.34538787835214607, + "kl_loss": 0.1948537677526474, + "loss_ib": 0.007632870692759752, + "step": 1201 + }, + { + "ce_ib": 9.116320610046387, + "ce_orig": 1.3145966529846191, + "epoch": 0.34538787835214607, + "kl_loss": 0.2820887565612793, + "loss_ib": 0.011937207542359829, + "step": 1201 + }, + { + "ce_ib": 8.325549125671387, + "ce_orig": 1.0065648555755615, + "epoch": 0.34538787835214607, + "kl_loss": 0.20022490620613098, + "loss_ib": 0.010327798314392567, + "step": 1201 + }, + { + "ce_ib": 9.057621955871582, + "ce_orig": 0.5740677118301392, + "epoch": 0.34567546193112375, + "kl_loss": 0.37036046385765076, + "loss_ib": 0.01276122685521841, + "step": 1202 + }, + { + "ce_ib": 7.503146171569824, + "ce_orig": 1.0310344696044922, + "epoch": 0.34567546193112375, + "kl_loss": 0.30264437198638916, + "loss_ib": 0.010529589839279652, + "step": 1202 + }, + { + "ce_ib": 6.097241401672363, + "ce_orig": 0.6190967559814453, + "epoch": 0.34567546193112375, + "kl_loss": 0.3846118450164795, + "loss_ib": 0.009943359531462193, + "step": 1202 + }, + { + "ce_ib": 6.613105297088623, + "ce_orig": 0.7227441668510437, + "epoch": 0.34567546193112375, + "kl_loss": 0.3394354581832886, + "loss_ib": 0.010007459670305252, + "step": 1202 + }, + { + "ce_ib": 3.6348624229431152, + "ce_orig": 0.8466792106628418, + "epoch": 0.3459630455101014, + "kl_loss": 0.21068529784679413, + "loss_ib": 0.005741715431213379, + "step": 1203 + }, + { + "ce_ib": 3.840749979019165, + "ce_orig": 0.7769103050231934, + "epoch": 0.3459630455101014, + "kl_loss": 0.20804926753044128, + "loss_ib": 0.005921242758631706, + "step": 1203 + }, + { + "ce_ib": 10.696869850158691, + "ce_orig": 1.6593687534332275, + "epoch": 0.3459630455101014, + "kl_loss": 0.2703685164451599, + "loss_ib": 0.013400554656982422, + "step": 1203 + }, + { + "ce_ib": 9.642045974731445, + "ce_orig": 1.204627275466919, + "epoch": 0.3459630455101014, + "kl_loss": 0.27802687883377075, + "loss_ib": 0.01242231484502554, + "step": 1203 + }, + { + "ce_ib": 7.497930526733398, + "ce_orig": 0.8816418647766113, + "epoch": 0.346250629089079, + "kl_loss": 0.2705840766429901, + "loss_ib": 0.0102037712931633, + "step": 1204 + }, + { + "ce_ib": 9.714649200439453, + "ce_orig": 1.4165947437286377, + "epoch": 0.346250629089079, + "kl_loss": 0.2666982114315033, + "loss_ib": 0.012381630949676037, + "step": 1204 + }, + { + "ce_ib": 8.192032814025879, + "ce_orig": 0.9408739805221558, + "epoch": 0.346250629089079, + "kl_loss": 0.23753347992897034, + "loss_ib": 0.010567368008196354, + "step": 1204 + }, + { + "ce_ib": 5.191983699798584, + "ce_orig": 0.2504975497722626, + "epoch": 0.346250629089079, + "kl_loss": 0.19956733286380768, + "loss_ib": 0.007187656592577696, + "step": 1204 + }, + { + "epoch": 0.3465382126680567, + "grad_norm": 0.09698857367038727, + "learning_rate": 9.812486785197924e-06, + "loss": 0.864, + "step": 1205 + }, + { + "ce_ib": 5.932236194610596, + "ce_orig": 0.5712811350822449, + "epoch": 0.3465382126680567, + "kl_loss": 0.2073478102684021, + "loss_ib": 0.008005714043974876, + "step": 1205 + }, + { + "ce_ib": 9.851508140563965, + "ce_orig": 1.2491554021835327, + "epoch": 0.3465382126680567, + "kl_loss": 0.26627326011657715, + "loss_ib": 0.012514240108430386, + "step": 1205 + }, + { + "ce_ib": 7.024086952209473, + "ce_orig": 1.2069941759109497, + "epoch": 0.3465382126680567, + "kl_loss": 0.17645485699176788, + "loss_ib": 0.008788635954260826, + "step": 1205 + }, + { + "ce_ib": 8.410697937011719, + "ce_orig": 0.8548517227172852, + "epoch": 0.3465382126680567, + "kl_loss": 0.34426677227020264, + "loss_ib": 0.01185336522758007, + "step": 1205 + }, + { + "ce_ib": 10.94921588897705, + "ce_orig": 1.5700947046279907, + "epoch": 0.3468257962470343, + "kl_loss": 0.2599565386772156, + "loss_ib": 0.013548781163990498, + "step": 1206 + }, + { + "ce_ib": 6.561511993408203, + "ce_orig": 0.7359023690223694, + "epoch": 0.3468257962470343, + "kl_loss": 0.2437046766281128, + "loss_ib": 0.008998558856546879, + "step": 1206 + }, + { + "ce_ib": 5.731392860412598, + "ce_orig": 0.6426838040351868, + "epoch": 0.3468257962470343, + "kl_loss": 0.2911027669906616, + "loss_ib": 0.008642420172691345, + "step": 1206 + }, + { + "ce_ib": 7.189348220825195, + "ce_orig": 0.7906773090362549, + "epoch": 0.3468257962470343, + "kl_loss": 0.2725953459739685, + "loss_ib": 0.009915301576256752, + "step": 1206 + }, + { + "ce_ib": 5.853690147399902, + "ce_orig": 0.6496776938438416, + "epoch": 0.3471133798260119, + "kl_loss": 0.2832804322242737, + "loss_ib": 0.008686495013535023, + "step": 1207 + }, + { + "ce_ib": 5.281362533569336, + "ce_orig": 0.4212856590747833, + "epoch": 0.3471133798260119, + "kl_loss": 0.261347234249115, + "loss_ib": 0.007894834503531456, + "step": 1207 + }, + { + "ce_ib": 8.261890411376953, + "ce_orig": 0.8318018913269043, + "epoch": 0.3471133798260119, + "kl_loss": 0.2593042850494385, + "loss_ib": 0.01085493341088295, + "step": 1207 + }, + { + "ce_ib": 6.534182548522949, + "ce_orig": 0.5106642842292786, + "epoch": 0.3471133798260119, + "kl_loss": 0.2081461399793625, + "loss_ib": 0.008615643717348576, + "step": 1207 + }, + { + "ce_ib": 9.115994453430176, + "ce_orig": 0.8802713751792908, + "epoch": 0.34740096340498955, + "kl_loss": 0.2686144709587097, + "loss_ib": 0.01180213876068592, + "step": 1208 + }, + { + "ce_ib": 7.533227920532227, + "ce_orig": 0.9319108128547668, + "epoch": 0.34740096340498955, + "kl_loss": 0.2602120339870453, + "loss_ib": 0.010135347954928875, + "step": 1208 + }, + { + "ce_ib": 8.701380729675293, + "ce_orig": 1.2851958274841309, + "epoch": 0.34740096340498955, + "kl_loss": 0.2070631980895996, + "loss_ib": 0.01077201310545206, + "step": 1208 + }, + { + "ce_ib": 5.88447904586792, + "ce_orig": 0.626255989074707, + "epoch": 0.34740096340498955, + "kl_loss": 0.20318818092346191, + "loss_ib": 0.007916361093521118, + "step": 1208 + }, + { + "ce_ib": 3.801424980163574, + "ce_orig": 0.49367231130599976, + "epoch": 0.34768854698396723, + "kl_loss": 0.27957862615585327, + "loss_ib": 0.006597211118787527, + "step": 1209 + }, + { + "ce_ib": 8.915145874023438, + "ce_orig": 1.2738125324249268, + "epoch": 0.34768854698396723, + "kl_loss": 0.25946545600891113, + "loss_ib": 0.011509799398481846, + "step": 1209 + }, + { + "ce_ib": 7.498837471008301, + "ce_orig": 0.46814650297164917, + "epoch": 0.34768854698396723, + "kl_loss": 0.30411720275878906, + "loss_ib": 0.01054000947624445, + "step": 1209 + }, + { + "ce_ib": 9.11368465423584, + "ce_orig": 1.3512606620788574, + "epoch": 0.34768854698396723, + "kl_loss": 0.29874473810195923, + "loss_ib": 0.012101132422685623, + "step": 1209 + }, + { + "epoch": 0.34797613056294485, + "grad_norm": 0.11233938485383987, + "learning_rate": 9.810375539259184e-06, + "loss": 0.8904, + "step": 1210 + }, + { + "ce_ib": 6.393960952758789, + "ce_orig": 0.6846663355827332, + "epoch": 0.34797613056294485, + "kl_loss": 0.2849634885787964, + "loss_ib": 0.009243596345186234, + "step": 1210 + }, + { + "ce_ib": 6.281631946563721, + "ce_orig": 0.8732141852378845, + "epoch": 0.34797613056294485, + "kl_loss": 0.21354413032531738, + "loss_ib": 0.008417073637247086, + "step": 1210 + }, + { + "ce_ib": 6.037140846252441, + "ce_orig": 0.4813711643218994, + "epoch": 0.34797613056294485, + "kl_loss": 0.2744561433792114, + "loss_ib": 0.008781702257692814, + "step": 1210 + }, + { + "ce_ib": 8.615631103515625, + "ce_orig": 1.3087021112442017, + "epoch": 0.34797613056294485, + "kl_loss": 0.2230015993118286, + "loss_ib": 0.010845646262168884, + "step": 1210 + }, + { + "ce_ib": 4.9515380859375, + "ce_orig": 0.7219204306602478, + "epoch": 0.3482637141419225, + "kl_loss": 0.2197096347808838, + "loss_ib": 0.007148634176701307, + "step": 1211 + }, + { + "ce_ib": 8.830263137817383, + "ce_orig": 1.387976884841919, + "epoch": 0.3482637141419225, + "kl_loss": 0.2981712818145752, + "loss_ib": 0.011811976321041584, + "step": 1211 + }, + { + "ce_ib": 6.819638252258301, + "ce_orig": 0.5993950366973877, + "epoch": 0.3482637141419225, + "kl_loss": 0.41057854890823364, + "loss_ib": 0.01092542428523302, + "step": 1211 + }, + { + "ce_ib": 7.180927276611328, + "ce_orig": 0.9277037382125854, + "epoch": 0.3482637141419225, + "kl_loss": 0.3397431969642639, + "loss_ib": 0.010578359477221966, + "step": 1211 + }, + { + "ce_ib": 4.253024578094482, + "ce_orig": 0.6946703791618347, + "epoch": 0.34855129772090016, + "kl_loss": 0.23091982305049896, + "loss_ib": 0.0065622227266430855, + "step": 1212 + }, + { + "ce_ib": 4.551302433013916, + "ce_orig": 0.7342872619628906, + "epoch": 0.34855129772090016, + "kl_loss": 0.2785085439682007, + "loss_ib": 0.007336387410759926, + "step": 1212 + }, + { + "ce_ib": 4.195248126983643, + "ce_orig": 0.5465542674064636, + "epoch": 0.34855129772090016, + "kl_loss": 0.21678900718688965, + "loss_ib": 0.006363137625157833, + "step": 1212 + }, + { + "ce_ib": 7.5936079025268555, + "ce_orig": 1.040616512298584, + "epoch": 0.34855129772090016, + "kl_loss": 0.18895815312862396, + "loss_ib": 0.009483189322054386, + "step": 1212 + }, + { + "ce_ib": 4.654483795166016, + "ce_orig": 0.3671084940433502, + "epoch": 0.3488388812998778, + "kl_loss": 0.22442926466464996, + "loss_ib": 0.006898776162415743, + "step": 1213 + }, + { + "ce_ib": 4.995607376098633, + "ce_orig": 0.8385266661643982, + "epoch": 0.3488388812998778, + "kl_loss": 0.18434521555900574, + "loss_ib": 0.006839059293270111, + "step": 1213 + }, + { + "ce_ib": 6.814243793487549, + "ce_orig": 0.710411548614502, + "epoch": 0.3488388812998778, + "kl_loss": 0.20435184240341187, + "loss_ib": 0.008857762441039085, + "step": 1213 + }, + { + "ce_ib": 8.296137809753418, + "ce_orig": 0.9488054513931274, + "epoch": 0.3488388812998778, + "kl_loss": 0.337627649307251, + "loss_ib": 0.011672413907945156, + "step": 1213 + }, + { + "ce_ib": 6.109886169433594, + "ce_orig": 0.8862010836601257, + "epoch": 0.3491264648788554, + "kl_loss": 0.17762064933776855, + "loss_ib": 0.00788609217852354, + "step": 1214 + }, + { + "ce_ib": 7.6561503410339355, + "ce_orig": 0.8912796378135681, + "epoch": 0.3491264648788554, + "kl_loss": 0.2638469636440277, + "loss_ib": 0.01029461994767189, + "step": 1214 + }, + { + "ce_ib": 7.491686820983887, + "ce_orig": 1.1298644542694092, + "epoch": 0.3491264648788554, + "kl_loss": 0.2536466717720032, + "loss_ib": 0.010028153657913208, + "step": 1214 + }, + { + "ce_ib": 8.880212783813477, + "ce_orig": 1.0798959732055664, + "epoch": 0.3491264648788554, + "kl_loss": 0.22560831904411316, + "loss_ib": 0.011136295273900032, + "step": 1214 + }, + { + "epoch": 0.3494140484578331, + "grad_norm": 0.09869109094142914, + "learning_rate": 9.808252704009258e-06, + "loss": 0.9111, + "step": 1215 + }, + { + "ce_ib": 8.043791770935059, + "ce_orig": 1.0452327728271484, + "epoch": 0.3494140484578331, + "kl_loss": 0.23494543135166168, + "loss_ib": 0.010393246077001095, + "step": 1215 + }, + { + "ce_ib": 6.372407913208008, + "ce_orig": 0.7277105450630188, + "epoch": 0.3494140484578331, + "kl_loss": 0.3500578999519348, + "loss_ib": 0.009872986935079098, + "step": 1215 + }, + { + "ce_ib": 4.63390588760376, + "ce_orig": 0.5187411308288574, + "epoch": 0.3494140484578331, + "kl_loss": 0.23105770349502563, + "loss_ib": 0.006944482680410147, + "step": 1215 + }, + { + "ce_ib": 6.37406587600708, + "ce_orig": 0.8014640808105469, + "epoch": 0.3494140484578331, + "kl_loss": 0.445901095867157, + "loss_ib": 0.010833077132701874, + "step": 1215 + }, + { + "ce_ib": 4.982112884521484, + "ce_orig": 0.5382281541824341, + "epoch": 0.3497016320368107, + "kl_loss": 0.2244800329208374, + "loss_ib": 0.007226912770420313, + "step": 1216 + }, + { + "ce_ib": 6.78171968460083, + "ce_orig": 0.89256352186203, + "epoch": 0.3497016320368107, + "kl_loss": 0.4011702835559845, + "loss_ib": 0.010793422348797321, + "step": 1216 + }, + { + "ce_ib": 6.829211711883545, + "ce_orig": 0.8494387865066528, + "epoch": 0.3497016320368107, + "kl_loss": 0.5227317810058594, + "loss_ib": 0.012056529521942139, + "step": 1216 + }, + { + "ce_ib": 6.879478931427002, + "ce_orig": 0.9619460701942444, + "epoch": 0.3497016320368107, + "kl_loss": 0.2130880057811737, + "loss_ib": 0.009010358713567257, + "step": 1216 + }, + { + "ce_ib": 3.678586483001709, + "ce_orig": 0.4464549720287323, + "epoch": 0.34998921561578833, + "kl_loss": 0.19008475542068481, + "loss_ib": 0.005579433869570494, + "step": 1217 + }, + { + "ce_ib": 5.889970302581787, + "ce_orig": 0.9165810942649841, + "epoch": 0.34998921561578833, + "kl_loss": 0.2494632750749588, + "loss_ib": 0.008384603075683117, + "step": 1217 + }, + { + "ce_ib": 4.036910533905029, + "ce_orig": 0.8464987874031067, + "epoch": 0.34998921561578833, + "kl_loss": 0.155860036611557, + "loss_ib": 0.005595511291176081, + "step": 1217 + }, + { + "ce_ib": 3.3294496536254883, + "ce_orig": 0.18816563487052917, + "epoch": 0.34998921561578833, + "kl_loss": 0.38086679577827454, + "loss_ib": 0.007138117216527462, + "step": 1217 + }, + { + "ce_ib": 5.73319673538208, + "ce_orig": 0.7483682632446289, + "epoch": 0.35027679919476595, + "kl_loss": 0.19469204545021057, + "loss_ib": 0.0076801166869699955, + "step": 1218 + }, + { + "ce_ib": 8.876380920410156, + "ce_orig": 1.0530145168304443, + "epoch": 0.35027679919476595, + "kl_loss": 0.2871856093406677, + "loss_ib": 0.011748237535357475, + "step": 1218 + }, + { + "ce_ib": 2.245468854904175, + "ce_orig": 0.210285022854805, + "epoch": 0.35027679919476595, + "kl_loss": 0.5912055373191833, + "loss_ib": 0.008157524280250072, + "step": 1218 + }, + { + "ce_ib": 6.965002059936523, + "ce_orig": 0.9538248181343079, + "epoch": 0.35027679919476595, + "kl_loss": 0.3390040993690491, + "loss_ib": 0.010355043224990368, + "step": 1218 + }, + { + "ce_ib": 5.554605960845947, + "ce_orig": 0.5747299194335938, + "epoch": 0.35056438277374363, + "kl_loss": 0.358900785446167, + "loss_ib": 0.009143614210188389, + "step": 1219 + }, + { + "ce_ib": 12.596790313720703, + "ce_orig": 1.7860560417175293, + "epoch": 0.35056438277374363, + "kl_loss": 0.2815093398094177, + "loss_ib": 0.015411884523928165, + "step": 1219 + }, + { + "ce_ib": 7.36424446105957, + "ce_orig": 1.0605376958847046, + "epoch": 0.35056438277374363, + "kl_loss": 0.22156105935573578, + "loss_ib": 0.009579855017364025, + "step": 1219 + }, + { + "ce_ib": 8.19119644165039, + "ce_orig": 1.3693232536315918, + "epoch": 0.35056438277374363, + "kl_loss": 0.2518046498298645, + "loss_ib": 0.010709242895245552, + "step": 1219 + }, + { + "epoch": 0.35085196635272126, + "grad_norm": 0.1013348326086998, + "learning_rate": 9.806118284562547e-06, + "loss": 0.9093, + "step": 1220 + }, + { + "ce_ib": 6.345913887023926, + "ce_orig": 1.0799219608306885, + "epoch": 0.35085196635272126, + "kl_loss": 0.2296457588672638, + "loss_ib": 0.008642371743917465, + "step": 1220 + }, + { + "ce_ib": 8.522051811218262, + "ce_orig": 0.7675303816795349, + "epoch": 0.35085196635272126, + "kl_loss": 0.23121927678585052, + "loss_ib": 0.010834244079887867, + "step": 1220 + }, + { + "ce_ib": 6.218955993652344, + "ce_orig": 0.7469417452812195, + "epoch": 0.35085196635272126, + "kl_loss": 0.32687658071517944, + "loss_ib": 0.00948772206902504, + "step": 1220 + }, + { + "ce_ib": 4.8152174949646, + "ce_orig": 0.5418853759765625, + "epoch": 0.35085196635272126, + "kl_loss": 0.7648394107818604, + "loss_ib": 0.012463611550629139, + "step": 1220 + }, + { + "ce_ib": 4.405568599700928, + "ce_orig": 0.4719264507293701, + "epoch": 0.3511395499316989, + "kl_loss": 0.20381106436252594, + "loss_ib": 0.006443679332733154, + "step": 1221 + }, + { + "ce_ib": 10.199350357055664, + "ce_orig": 1.4328693151474, + "epoch": 0.3511395499316989, + "kl_loss": 0.24579858779907227, + "loss_ib": 0.012657335959374905, + "step": 1221 + }, + { + "ce_ib": 7.0942912101745605, + "ce_orig": 1.0040373802185059, + "epoch": 0.3511395499316989, + "kl_loss": 0.5016403198242188, + "loss_ib": 0.0121106943115592, + "step": 1221 + }, + { + "ce_ib": 7.4397172927856445, + "ce_orig": 0.8064084649085999, + "epoch": 0.3511395499316989, + "kl_loss": 0.3063448965549469, + "loss_ib": 0.010503166355192661, + "step": 1221 + }, + { + "ce_ib": 4.31360387802124, + "ce_orig": 0.786395788192749, + "epoch": 0.35142713351067656, + "kl_loss": 0.1687285304069519, + "loss_ib": 0.006000889465212822, + "step": 1222 + }, + { + "ce_ib": 9.686395645141602, + "ce_orig": 1.5242962837219238, + "epoch": 0.35142713351067656, + "kl_loss": 0.34040552377700806, + "loss_ib": 0.013090450316667557, + "step": 1222 + }, + { + "ce_ib": 7.19453763961792, + "ce_orig": 0.7576363682746887, + "epoch": 0.35142713351067656, + "kl_loss": 0.34985777735710144, + "loss_ib": 0.010693115182220936, + "step": 1222 + }, + { + "ce_ib": 5.745079040527344, + "ce_orig": 0.5910704135894775, + "epoch": 0.35142713351067656, + "kl_loss": 0.3402925729751587, + "loss_ib": 0.009148004464805126, + "step": 1222 + }, + { + "ce_ib": 9.865362167358398, + "ce_orig": 1.643523931503296, + "epoch": 0.3517147170896542, + "kl_loss": 0.2811984717845917, + "loss_ib": 0.012677346356213093, + "step": 1223 + }, + { + "ce_ib": 8.58837604522705, + "ce_orig": 1.436140537261963, + "epoch": 0.3517147170896542, + "kl_loss": 0.2681333124637604, + "loss_ib": 0.011269708164036274, + "step": 1223 + }, + { + "ce_ib": 3.754268169403076, + "ce_orig": 0.6821795105934143, + "epoch": 0.3517147170896542, + "kl_loss": 0.22629603743553162, + "loss_ib": 0.006017228122800589, + "step": 1223 + }, + { + "ce_ib": 7.725935459136963, + "ce_orig": 0.7299022674560547, + "epoch": 0.3517147170896542, + "kl_loss": 0.27880483865737915, + "loss_ib": 0.01051398366689682, + "step": 1223 + }, + { + "ce_ib": 7.817504405975342, + "ce_orig": 1.1026769876480103, + "epoch": 0.3520023006686318, + "kl_loss": 0.25443196296691895, + "loss_ib": 0.010361824184656143, + "step": 1224 + }, + { + "ce_ib": 4.619856834411621, + "ce_orig": 0.5043254494667053, + "epoch": 0.3520023006686318, + "kl_loss": 0.1772795170545578, + "loss_ib": 0.006392651703208685, + "step": 1224 + }, + { + "ce_ib": 7.694671630859375, + "ce_orig": 0.7912726998329163, + "epoch": 0.3520023006686318, + "kl_loss": 0.28781387209892273, + "loss_ib": 0.010572809725999832, + "step": 1224 + }, + { + "ce_ib": 7.137585163116455, + "ce_orig": 0.7689734697341919, + "epoch": 0.3520023006686318, + "kl_loss": 0.31799736618995667, + "loss_ib": 0.010317559354007244, + "step": 1224 + }, + { + "epoch": 0.3522898842476095, + "grad_norm": 0.09380496293306351, + "learning_rate": 9.803972286061366e-06, + "loss": 0.9006, + "step": 1225 + }, + { + "ce_ib": 8.018561363220215, + "ce_orig": 1.227948546409607, + "epoch": 0.3522898842476095, + "kl_loss": 0.3420131504535675, + "loss_ib": 0.011438692919909954, + "step": 1225 + }, + { + "ce_ib": 8.40815258026123, + "ce_orig": 1.3984031677246094, + "epoch": 0.3522898842476095, + "kl_loss": 0.8153228759765625, + "loss_ib": 0.01656138151884079, + "step": 1225 + }, + { + "ce_ib": 8.145472526550293, + "ce_orig": 1.3120951652526855, + "epoch": 0.3522898842476095, + "kl_loss": 0.2454821616411209, + "loss_ib": 0.01060029398649931, + "step": 1225 + }, + { + "ce_ib": 9.155527114868164, + "ce_orig": 1.0914626121520996, + "epoch": 0.3522898842476095, + "kl_loss": 0.22678032517433167, + "loss_ib": 0.011423329822719097, + "step": 1225 + }, + { + "ce_ib": 4.5271806716918945, + "ce_orig": 0.5904192328453064, + "epoch": 0.3525774678265871, + "kl_loss": 0.19810084998607635, + "loss_ib": 0.006508189253509045, + "step": 1226 + }, + { + "ce_ib": 5.63466215133667, + "ce_orig": 0.8149072527885437, + "epoch": 0.3525774678265871, + "kl_loss": 0.1674325168132782, + "loss_ib": 0.007308987434953451, + "step": 1226 + }, + { + "ce_ib": 7.006682872772217, + "ce_orig": 0.8032488226890564, + "epoch": 0.3525774678265871, + "kl_loss": 0.2711438238620758, + "loss_ib": 0.009718121029436588, + "step": 1226 + }, + { + "ce_ib": 9.975687026977539, + "ce_orig": 1.0549230575561523, + "epoch": 0.3525774678265871, + "kl_loss": 0.3018321096897125, + "loss_ib": 0.012994008138775826, + "step": 1226 + }, + { + "ce_ib": 4.590726375579834, + "ce_orig": 0.5848087668418884, + "epoch": 0.35286505140556473, + "kl_loss": 0.28551799058914185, + "loss_ib": 0.007445906288921833, + "step": 1227 + }, + { + "ce_ib": 7.8249192237854, + "ce_orig": 1.0972347259521484, + "epoch": 0.35286505140556473, + "kl_loss": 0.22432781755924225, + "loss_ib": 0.010068196803331375, + "step": 1227 + }, + { + "ce_ib": 6.804174900054932, + "ce_orig": 0.5949727296829224, + "epoch": 0.35286505140556473, + "kl_loss": 0.33591410517692566, + "loss_ib": 0.010163315571844578, + "step": 1227 + }, + { + "ce_ib": 9.282870292663574, + "ce_orig": 1.5607327222824097, + "epoch": 0.35286505140556473, + "kl_loss": 0.31619787216186523, + "loss_ib": 0.012444849126040936, + "step": 1227 + }, + { + "ce_ib": 3.7365572452545166, + "ce_orig": 0.5436097979545593, + "epoch": 0.35315263498454236, + "kl_loss": 0.23581741750240326, + "loss_ib": 0.006094731390476227, + "step": 1228 + }, + { + "ce_ib": 9.241966247558594, + "ce_orig": 1.2546502351760864, + "epoch": 0.35315263498454236, + "kl_loss": 0.25352782011032104, + "loss_ib": 0.011777244508266449, + "step": 1228 + }, + { + "ce_ib": 7.271526336669922, + "ce_orig": 1.4413655996322632, + "epoch": 0.35315263498454236, + "kl_loss": 0.21419073641300201, + "loss_ib": 0.009413433261215687, + "step": 1228 + }, + { + "ce_ib": 12.323684692382812, + "ce_orig": 2.418548822402954, + "epoch": 0.35315263498454236, + "kl_loss": 0.20560504496097565, + "loss_ib": 0.014379735104739666, + "step": 1228 + }, + { + "ce_ib": 6.3870158195495605, + "ce_orig": 0.8991002440452576, + "epoch": 0.35344021856352004, + "kl_loss": 0.23421180248260498, + "loss_ib": 0.008729133754968643, + "step": 1229 + }, + { + "ce_ib": 3.585265874862671, + "ce_orig": 0.6805919408798218, + "epoch": 0.35344021856352004, + "kl_loss": 0.169908806681633, + "loss_ib": 0.005284354090690613, + "step": 1229 + }, + { + "ce_ib": 9.112138748168945, + "ce_orig": 1.2712079286575317, + "epoch": 0.35344021856352004, + "kl_loss": 0.23605284094810486, + "loss_ib": 0.011472667567431927, + "step": 1229 + }, + { + "ce_ib": 7.619685173034668, + "ce_orig": 1.315202236175537, + "epoch": 0.35344021856352004, + "kl_loss": 0.21875296533107758, + "loss_ib": 0.009807215072214603, + "step": 1229 + }, + { + "epoch": 0.35372780214249766, + "grad_norm": 0.13430581986904144, + "learning_rate": 9.801814713675922e-06, + "loss": 0.8915, + "step": 1230 + }, + { + "ce_ib": 9.14773941040039, + "ce_orig": 1.6861295700073242, + "epoch": 0.35372780214249766, + "kl_loss": 0.23906412720680237, + "loss_ib": 0.011538379825651646, + "step": 1230 + }, + { + "ce_ib": 7.55902624130249, + "ce_orig": 1.0963160991668701, + "epoch": 0.35372780214249766, + "kl_loss": 0.18921005725860596, + "loss_ib": 0.009451126679778099, + "step": 1230 + }, + { + "ce_ib": 7.272607326507568, + "ce_orig": 0.8341992497444153, + "epoch": 0.35372780214249766, + "kl_loss": 0.1842857003211975, + "loss_ib": 0.009115464054048061, + "step": 1230 + }, + { + "ce_ib": 11.349145889282227, + "ce_orig": 1.8195122480392456, + "epoch": 0.35372780214249766, + "kl_loss": 0.28681570291519165, + "loss_ib": 0.014217302203178406, + "step": 1230 + }, + { + "ce_ib": 6.5777435302734375, + "ce_orig": 1.0577532052993774, + "epoch": 0.3540153857214753, + "kl_loss": 0.2370855063199997, + "loss_ib": 0.008948598988354206, + "step": 1231 + }, + { + "ce_ib": 6.332425594329834, + "ce_orig": 0.4660177230834961, + "epoch": 0.3540153857214753, + "kl_loss": 0.4329322576522827, + "loss_ib": 0.010661747306585312, + "step": 1231 + }, + { + "ce_ib": 7.591532230377197, + "ce_orig": 0.6235303282737732, + "epoch": 0.3540153857214753, + "kl_loss": 0.20224440097808838, + "loss_ib": 0.009613975882530212, + "step": 1231 + }, + { + "ce_ib": 7.099629878997803, + "ce_orig": 1.0191956758499146, + "epoch": 0.3540153857214753, + "kl_loss": 0.18419277667999268, + "loss_ib": 0.008941558189690113, + "step": 1231 + }, + { + "ce_ib": 3.364225387573242, + "ce_orig": 0.47195231914520264, + "epoch": 0.35430296930045296, + "kl_loss": 0.20699940621852875, + "loss_ib": 0.0054342192597687244, + "step": 1232 + }, + { + "ce_ib": 9.607991218566895, + "ce_orig": 1.4189627170562744, + "epoch": 0.35430296930045296, + "kl_loss": 0.22665303945541382, + "loss_ib": 0.01187452208250761, + "step": 1232 + }, + { + "ce_ib": 7.680178165435791, + "ce_orig": 0.7752040028572083, + "epoch": 0.35430296930045296, + "kl_loss": 0.2515419125556946, + "loss_ib": 0.0101955970749259, + "step": 1232 + }, + { + "ce_ib": 10.139663696289062, + "ce_orig": 1.8179274797439575, + "epoch": 0.35430296930045296, + "kl_loss": 0.2826952040195465, + "loss_ib": 0.012966616079211235, + "step": 1232 + }, + { + "ce_ib": 4.237985610961914, + "ce_orig": 0.3648199141025543, + "epoch": 0.3545905528794306, + "kl_loss": 0.3507782220840454, + "loss_ib": 0.00774576747789979, + "step": 1233 + }, + { + "ce_ib": 6.260341644287109, + "ce_orig": 0.9368284940719604, + "epoch": 0.3545905528794306, + "kl_loss": 0.3227092921733856, + "loss_ib": 0.009487434290349483, + "step": 1233 + }, + { + "ce_ib": 5.236300945281982, + "ce_orig": 0.5902385711669922, + "epoch": 0.3545905528794306, + "kl_loss": 0.332288920879364, + "loss_ib": 0.008559189736843109, + "step": 1233 + }, + { + "ce_ib": 9.382746696472168, + "ce_orig": 0.9858824014663696, + "epoch": 0.3545905528794306, + "kl_loss": 0.24119676649570465, + "loss_ib": 0.011794714257121086, + "step": 1233 + }, + { + "ce_ib": 5.475151538848877, + "ce_orig": 0.8246574997901917, + "epoch": 0.3548781364584082, + "kl_loss": 0.2016981691122055, + "loss_ib": 0.007492133416235447, + "step": 1234 + }, + { + "ce_ib": 7.603310585021973, + "ce_orig": 0.8240784406661987, + "epoch": 0.3548781364584082, + "kl_loss": 0.2915946841239929, + "loss_ib": 0.010519257746636868, + "step": 1234 + }, + { + "ce_ib": 5.872536659240723, + "ce_orig": 0.5426589846611023, + "epoch": 0.3548781364584082, + "kl_loss": 0.26665955781936646, + "loss_ib": 0.008539131842553616, + "step": 1234 + }, + { + "ce_ib": 5.885195255279541, + "ce_orig": 0.43417975306510925, + "epoch": 0.3548781364584082, + "kl_loss": 0.4874606728553772, + "loss_ib": 0.010759801603853703, + "step": 1234 + }, + { + "epoch": 0.3551657200373859, + "grad_norm": 0.12445518374443054, + "learning_rate": 9.799645572604308e-06, + "loss": 0.9172, + "step": 1235 + }, + { + "ce_ib": 9.555274963378906, + "ce_orig": 1.4601075649261475, + "epoch": 0.3551657200373859, + "kl_loss": 0.3519800305366516, + "loss_ib": 0.01307507511228323, + "step": 1235 + }, + { + "ce_ib": 8.43733024597168, + "ce_orig": 0.7304520010948181, + "epoch": 0.3551657200373859, + "kl_loss": 0.2575218081474304, + "loss_ib": 0.01101254764944315, + "step": 1235 + }, + { + "ce_ib": 7.886836528778076, + "ce_orig": 1.0809401273727417, + "epoch": 0.3551657200373859, + "kl_loss": 0.22798356413841248, + "loss_ib": 0.010166672058403492, + "step": 1235 + }, + { + "ce_ib": 6.685604095458984, + "ce_orig": 0.8483709096908569, + "epoch": 0.3551657200373859, + "kl_loss": 0.28703203797340393, + "loss_ib": 0.00955592468380928, + "step": 1235 + }, + { + "ce_ib": 10.434022903442383, + "ce_orig": 1.7004473209381104, + "epoch": 0.3554533036163635, + "kl_loss": 0.27314144372940063, + "loss_ib": 0.013165437616407871, + "step": 1236 + }, + { + "ce_ib": 6.759583950042725, + "ce_orig": 0.9155409932136536, + "epoch": 0.3554533036163635, + "kl_loss": 0.3281250596046448, + "loss_ib": 0.010040833614766598, + "step": 1236 + }, + { + "ce_ib": 8.04914665222168, + "ce_orig": 0.8380681872367859, + "epoch": 0.3554533036163635, + "kl_loss": 0.2907578647136688, + "loss_ib": 0.010956724174320698, + "step": 1236 + }, + { + "ce_ib": 7.6711812019348145, + "ce_orig": 1.1974780559539795, + "epoch": 0.3554533036163635, + "kl_loss": 0.5388073921203613, + "loss_ib": 0.013059255667030811, + "step": 1236 + }, + { + "ce_ib": 7.075990200042725, + "ce_orig": 1.2427293062210083, + "epoch": 0.35574088719534114, + "kl_loss": 0.2628134489059448, + "loss_ib": 0.009704125113785267, + "step": 1237 + }, + { + "ce_ib": 8.428919792175293, + "ce_orig": 0.9998230338096619, + "epoch": 0.35574088719534114, + "kl_loss": 0.22011926770210266, + "loss_ib": 0.010630113072693348, + "step": 1237 + }, + { + "ce_ib": 9.050934791564941, + "ce_orig": 0.6873317360877991, + "epoch": 0.35574088719534114, + "kl_loss": 0.24966318905353546, + "loss_ib": 0.011547566391527653, + "step": 1237 + }, + { + "ce_ib": 5.283548355102539, + "ce_orig": 0.3626221716403961, + "epoch": 0.35574088719534114, + "kl_loss": 0.26341110467910767, + "loss_ib": 0.007917659357190132, + "step": 1237 + }, + { + "ce_ib": 5.644829273223877, + "ce_orig": 0.7479636073112488, + "epoch": 0.35602847077431876, + "kl_loss": 0.34714972972869873, + "loss_ib": 0.009116326458752155, + "step": 1238 + }, + { + "ce_ib": 8.070452690124512, + "ce_orig": 1.1102337837219238, + "epoch": 0.35602847077431876, + "kl_loss": 0.29915985465049744, + "loss_ib": 0.01106205116957426, + "step": 1238 + }, + { + "ce_ib": 8.20000171661377, + "ce_orig": 0.9142467975616455, + "epoch": 0.35602847077431876, + "kl_loss": 0.2920359969139099, + "loss_ib": 0.01112036220729351, + "step": 1238 + }, + { + "ce_ib": 5.415696620941162, + "ce_orig": 0.8824329376220703, + "epoch": 0.35602847077431876, + "kl_loss": 0.23210978507995605, + "loss_ib": 0.00773679418489337, + "step": 1238 + }, + { + "ce_ib": 5.435910224914551, + "ce_orig": 0.6103357076644897, + "epoch": 0.35631605435329644, + "kl_loss": 0.27932867407798767, + "loss_ib": 0.00822919700294733, + "step": 1239 + }, + { + "ce_ib": 7.409430503845215, + "ce_orig": 1.067017674446106, + "epoch": 0.35631605435329644, + "kl_loss": 0.28290998935699463, + "loss_ib": 0.010238530114293098, + "step": 1239 + }, + { + "ce_ib": 7.207986831665039, + "ce_orig": 0.7639102339744568, + "epoch": 0.35631605435329644, + "kl_loss": 0.3880687355995178, + "loss_ib": 0.011088673956692219, + "step": 1239 + }, + { + "ce_ib": 9.050562858581543, + "ce_orig": 0.777185320854187, + "epoch": 0.35631605435329644, + "kl_loss": 0.27783846855163574, + "loss_ib": 0.011828946880996227, + "step": 1239 + }, + { + "epoch": 0.35660363793227406, + "grad_norm": 0.1228327825665474, + "learning_rate": 9.797464868072489e-06, + "loss": 0.9154, + "step": 1240 + }, + { + "ce_ib": 7.292773723602295, + "ce_orig": 1.0429723262786865, + "epoch": 0.35660363793227406, + "kl_loss": 0.20807845890522003, + "loss_ib": 0.00937355775386095, + "step": 1240 + }, + { + "ce_ib": 4.26139497756958, + "ce_orig": 0.7658198475837708, + "epoch": 0.35660363793227406, + "kl_loss": 0.18837539851665497, + "loss_ib": 0.006145148538053036, + "step": 1240 + }, + { + "ce_ib": 5.861346244812012, + "ce_orig": 0.9043319821357727, + "epoch": 0.35660363793227406, + "kl_loss": 0.5151675939559937, + "loss_ib": 0.011013020761311054, + "step": 1240 + }, + { + "ce_ib": 5.455165386199951, + "ce_orig": 0.7346108555793762, + "epoch": 0.35660363793227406, + "kl_loss": 0.2766348719596863, + "loss_ib": 0.008221513591706753, + "step": 1240 + }, + { + "ce_ib": 5.686266899108887, + "ce_orig": 0.7627474665641785, + "epoch": 0.3568912215112517, + "kl_loss": 0.22055384516716003, + "loss_ib": 0.007891804911196232, + "step": 1241 + }, + { + "ce_ib": 9.36503791809082, + "ce_orig": 1.0557454824447632, + "epoch": 0.3568912215112517, + "kl_loss": 0.34796249866485596, + "loss_ib": 0.012844662182033062, + "step": 1241 + }, + { + "ce_ib": 5.484736919403076, + "ce_orig": 0.551432192325592, + "epoch": 0.3568912215112517, + "kl_loss": 0.264079749584198, + "loss_ib": 0.008125534281134605, + "step": 1241 + }, + { + "ce_ib": 4.883925914764404, + "ce_orig": 0.3223569095134735, + "epoch": 0.3568912215112517, + "kl_loss": 0.27467021346092224, + "loss_ib": 0.007630628068000078, + "step": 1241 + }, + { + "ce_ib": 3.6608123779296875, + "ce_orig": 0.4998208284378052, + "epoch": 0.35717880509022937, + "kl_loss": 0.2078043520450592, + "loss_ib": 0.005738855339586735, + "step": 1242 + }, + { + "ce_ib": 7.225930213928223, + "ce_orig": 0.7941088676452637, + "epoch": 0.35717880509022937, + "kl_loss": 0.313068687915802, + "loss_ib": 0.010356617160141468, + "step": 1242 + }, + { + "ce_ib": 7.831450939178467, + "ce_orig": 1.0795207023620605, + "epoch": 0.35717880509022937, + "kl_loss": 0.33597350120544434, + "loss_ib": 0.011191186495125294, + "step": 1242 + }, + { + "ce_ib": 3.794747829437256, + "ce_orig": 0.39786288142204285, + "epoch": 0.35717880509022937, + "kl_loss": 0.24459530413150787, + "loss_ib": 0.006240700837224722, + "step": 1242 + }, + { + "ce_ib": 8.7359037399292, + "ce_orig": 1.3474030494689941, + "epoch": 0.357466388669207, + "kl_loss": 0.30283403396606445, + "loss_ib": 0.011764245107769966, + "step": 1243 + }, + { + "ce_ib": 6.3446831703186035, + "ce_orig": 0.6941496729850769, + "epoch": 0.357466388669207, + "kl_loss": 0.2185823768377304, + "loss_ib": 0.008530506864190102, + "step": 1243 + }, + { + "ce_ib": 3.736961841583252, + "ce_orig": 0.5355826020240784, + "epoch": 0.357466388669207, + "kl_loss": 0.21260380744934082, + "loss_ib": 0.005863000173121691, + "step": 1243 + }, + { + "ce_ib": 7.129443168640137, + "ce_orig": 0.9421985149383545, + "epoch": 0.357466388669207, + "kl_loss": 0.30670252442359924, + "loss_ib": 0.01019646879285574, + "step": 1243 + }, + { + "ce_ib": 5.549740314483643, + "ce_orig": 0.5611580014228821, + "epoch": 0.3577539722481846, + "kl_loss": 0.4680359363555908, + "loss_ib": 0.01023009978234768, + "step": 1244 + }, + { + "ce_ib": 3.336719512939453, + "ce_orig": 0.4326326549053192, + "epoch": 0.3577539722481846, + "kl_loss": 0.2741917073726654, + "loss_ib": 0.006078636739403009, + "step": 1244 + }, + { + "ce_ib": 6.86518669128418, + "ce_orig": 0.7357510924339294, + "epoch": 0.3577539722481846, + "kl_loss": 0.4421781301498413, + "loss_ib": 0.011286967433989048, + "step": 1244 + }, + { + "ce_ib": 8.771323204040527, + "ce_orig": 0.7612717151641846, + "epoch": 0.3577539722481846, + "kl_loss": 0.15122197568416595, + "loss_ib": 0.010283542796969414, + "step": 1244 + }, + { + "epoch": 0.3580415558271623, + "grad_norm": 0.09404265880584717, + "learning_rate": 9.795272605334285e-06, + "loss": 0.8139, + "step": 1245 + }, + { + "ce_ib": 4.558228015899658, + "ce_orig": 0.712019681930542, + "epoch": 0.3580415558271623, + "kl_loss": 0.1576419323682785, + "loss_ib": 0.006134646944701672, + "step": 1245 + }, + { + "ce_ib": 11.132481575012207, + "ce_orig": 1.6099278926849365, + "epoch": 0.3580415558271623, + "kl_loss": 0.2066415250301361, + "loss_ib": 0.013198897242546082, + "step": 1245 + }, + { + "ce_ib": 7.148979663848877, + "ce_orig": 0.9569799304008484, + "epoch": 0.3580415558271623, + "kl_loss": 0.3003823161125183, + "loss_ib": 0.010152801871299744, + "step": 1245 + }, + { + "ce_ib": 3.507009744644165, + "ce_orig": 0.4302767217159271, + "epoch": 0.3580415558271623, + "kl_loss": 0.3271106481552124, + "loss_ib": 0.006778115872293711, + "step": 1245 + }, + { + "ce_ib": 8.464205741882324, + "ce_orig": 0.7025609612464905, + "epoch": 0.3583291394061399, + "kl_loss": 0.2535151243209839, + "loss_ib": 0.01099935732781887, + "step": 1246 + }, + { + "ce_ib": 8.343195915222168, + "ce_orig": 0.9053884148597717, + "epoch": 0.3583291394061399, + "kl_loss": 0.25387266278266907, + "loss_ib": 0.010881922207772732, + "step": 1246 + }, + { + "ce_ib": 5.672733306884766, + "ce_orig": 0.5709149241447449, + "epoch": 0.3583291394061399, + "kl_loss": 0.40478748083114624, + "loss_ib": 0.009720607660710812, + "step": 1246 + }, + { + "ce_ib": 6.032468318939209, + "ce_orig": 0.6649541854858398, + "epoch": 0.3583291394061399, + "kl_loss": 0.18674571812152863, + "loss_ib": 0.007899925112724304, + "step": 1246 + }, + { + "ce_ib": 3.471609592437744, + "ce_orig": 0.5787562131881714, + "epoch": 0.35861672298511754, + "kl_loss": 0.23248958587646484, + "loss_ib": 0.0057965051382780075, + "step": 1247 + }, + { + "ce_ib": 5.650269031524658, + "ce_orig": 0.5460963249206543, + "epoch": 0.35861672298511754, + "kl_loss": 0.3052501082420349, + "loss_ib": 0.008702769875526428, + "step": 1247 + }, + { + "ce_ib": 7.508909702301025, + "ce_orig": 0.26758936047554016, + "epoch": 0.35861672298511754, + "kl_loss": 0.2105289101600647, + "loss_ib": 0.009614198468625546, + "step": 1247 + }, + { + "ce_ib": 3.595618486404419, + "ce_orig": 0.44015583395957947, + "epoch": 0.35861672298511754, + "kl_loss": 0.15356630086898804, + "loss_ib": 0.0051312814466655254, + "step": 1247 + }, + { + "ce_ib": 5.277204513549805, + "ce_orig": 0.7604292035102844, + "epoch": 0.35890430656409517, + "kl_loss": 0.2653564512729645, + "loss_ib": 0.00793076865375042, + "step": 1248 + }, + { + "ce_ib": 3.4653306007385254, + "ce_orig": 0.5359505414962769, + "epoch": 0.35890430656409517, + "kl_loss": 0.14454194903373718, + "loss_ib": 0.004910749848932028, + "step": 1248 + }, + { + "ce_ib": 1.7260143756866455, + "ce_orig": 0.11219265311956406, + "epoch": 0.35890430656409517, + "kl_loss": 0.5338764786720276, + "loss_ib": 0.0070647792890667915, + "step": 1248 + }, + { + "ce_ib": 7.0942912101745605, + "ce_orig": 1.2052923440933228, + "epoch": 0.35890430656409517, + "kl_loss": 0.20122863352298737, + "loss_ib": 0.00910657737404108, + "step": 1248 + }, + { + "ce_ib": 6.274049282073975, + "ce_orig": 0.7101867198944092, + "epoch": 0.35919189014307285, + "kl_loss": 0.29377713799476624, + "loss_ib": 0.009211820550262928, + "step": 1249 + }, + { + "ce_ib": 5.265260696411133, + "ce_orig": 0.5141690969467163, + "epoch": 0.35919189014307285, + "kl_loss": 0.25618404150009155, + "loss_ib": 0.007827101275324821, + "step": 1249 + }, + { + "ce_ib": 7.371795177459717, + "ce_orig": 0.20159362256526947, + "epoch": 0.35919189014307285, + "kl_loss": 0.4881149232387543, + "loss_ib": 0.012252944521605968, + "step": 1249 + }, + { + "ce_ib": 10.091595649719238, + "ce_orig": 1.0113621950149536, + "epoch": 0.35919189014307285, + "kl_loss": 0.30168700218200684, + "loss_ib": 0.013108465820550919, + "step": 1249 + }, + { + "epoch": 0.35947947372205047, + "grad_norm": 0.10958760976791382, + "learning_rate": 9.79306878967137e-06, + "loss": 0.8439, + "step": 1250 + }, + { + "ce_ib": 4.476527690887451, + "ce_orig": 0.6243991255760193, + "epoch": 0.35947947372205047, + "kl_loss": 0.1909669041633606, + "loss_ib": 0.006386196240782738, + "step": 1250 + }, + { + "ce_ib": 11.409143447875977, + "ce_orig": 1.3198966979980469, + "epoch": 0.35947947372205047, + "kl_loss": 0.2266976684331894, + "loss_ib": 0.013676119968295097, + "step": 1250 + }, + { + "ce_ib": 6.0254106521606445, + "ce_orig": 0.6282336711883545, + "epoch": 0.35947947372205047, + "kl_loss": 0.2567211389541626, + "loss_ib": 0.008592622354626656, + "step": 1250 + }, + { + "ce_ib": 3.8054723739624023, + "ce_orig": 0.5951682925224304, + "epoch": 0.35947947372205047, + "kl_loss": 0.31412309408187866, + "loss_ib": 0.00694670295342803, + "step": 1250 + }, + { + "ce_ib": 7.657241344451904, + "ce_orig": 0.9810330867767334, + "epoch": 0.3597670573010281, + "kl_loss": 0.2732018828392029, + "loss_ib": 0.01038926001638174, + "step": 1251 + }, + { + "ce_ib": 6.377111434936523, + "ce_orig": 0.7457929849624634, + "epoch": 0.3597670573010281, + "kl_loss": 0.2344977855682373, + "loss_ib": 0.008722089231014252, + "step": 1251 + }, + { + "ce_ib": 7.052966594696045, + "ce_orig": 0.7349291443824768, + "epoch": 0.3597670573010281, + "kl_loss": 0.3841466009616852, + "loss_ib": 0.010894432663917542, + "step": 1251 + }, + { + "ce_ib": 4.98142671585083, + "ce_orig": 0.7807597517967224, + "epoch": 0.3597670573010281, + "kl_loss": 0.20535174012184143, + "loss_ib": 0.007034944370388985, + "step": 1251 + }, + { + "ce_ib": 5.311237812042236, + "ce_orig": 0.7335159778594971, + "epoch": 0.36005464088000577, + "kl_loss": 0.369441956281662, + "loss_ib": 0.009005657397210598, + "step": 1252 + }, + { + "ce_ib": 6.797234535217285, + "ce_orig": 0.6558939814567566, + "epoch": 0.36005464088000577, + "kl_loss": 0.3157234191894531, + "loss_ib": 0.009954468347132206, + "step": 1252 + }, + { + "ce_ib": 10.10152530670166, + "ce_orig": 1.7004576921463013, + "epoch": 0.36005464088000577, + "kl_loss": 0.2638339400291443, + "loss_ib": 0.012739865109324455, + "step": 1252 + }, + { + "ce_ib": 8.451179504394531, + "ce_orig": 1.1088011264801025, + "epoch": 0.36005464088000577, + "kl_loss": 0.2189917117357254, + "loss_ib": 0.010641096159815788, + "step": 1252 + }, + { + "ce_ib": 5.147396564483643, + "ce_orig": 0.47145599126815796, + "epoch": 0.3603422244589834, + "kl_loss": 0.38510337471961975, + "loss_ib": 0.008998430334031582, + "step": 1253 + }, + { + "ce_ib": 6.613088607788086, + "ce_orig": 0.7753716707229614, + "epoch": 0.3603422244589834, + "kl_loss": 0.2607177793979645, + "loss_ib": 0.009220265783369541, + "step": 1253 + }, + { + "ce_ib": 4.183328151702881, + "ce_orig": 0.5365942716598511, + "epoch": 0.3603422244589834, + "kl_loss": 0.14574888348579407, + "loss_ib": 0.005640816409140825, + "step": 1253 + }, + { + "ce_ib": 8.866814613342285, + "ce_orig": 1.2385174036026, + "epoch": 0.3603422244589834, + "kl_loss": 0.21413256227970123, + "loss_ib": 0.011008140631020069, + "step": 1253 + }, + { + "ce_ib": 9.283498764038086, + "ce_orig": 1.193198323249817, + "epoch": 0.360629808037961, + "kl_loss": 0.24336770176887512, + "loss_ib": 0.0117171760648489, + "step": 1254 + }, + { + "ce_ib": 5.101339340209961, + "ce_orig": 0.7213374972343445, + "epoch": 0.360629808037961, + "kl_loss": 0.23240378499031067, + "loss_ib": 0.007425377145409584, + "step": 1254 + }, + { + "ce_ib": 4.567759990692139, + "ce_orig": 0.5381884574890137, + "epoch": 0.360629808037961, + "kl_loss": 0.4335998296737671, + "loss_ib": 0.008903758600354195, + "step": 1254 + }, + { + "ce_ib": 6.097298622131348, + "ce_orig": 0.7400136590003967, + "epoch": 0.360629808037961, + "kl_loss": 0.2513897716999054, + "loss_ib": 0.008611195720732212, + "step": 1254 + }, + { + "epoch": 0.3609173916169387, + "grad_norm": 0.12052126973867416, + "learning_rate": 9.790853426393246e-06, + "loss": 0.9385, + "step": 1255 + }, + { + "ce_ib": 4.656575679779053, + "ce_orig": 0.5645290017127991, + "epoch": 0.3609173916169387, + "kl_loss": 0.193633571267128, + "loss_ib": 0.006592911202460527, + "step": 1255 + }, + { + "ce_ib": 8.54527473449707, + "ce_orig": 0.7616824507713318, + "epoch": 0.3609173916169387, + "kl_loss": 0.28272953629493713, + "loss_ib": 0.01137256994843483, + "step": 1255 + }, + { + "ce_ib": 7.215847015380859, + "ce_orig": 0.8538178205490112, + "epoch": 0.3609173916169387, + "kl_loss": 0.2556816339492798, + "loss_ib": 0.009772663936018944, + "step": 1255 + }, + { + "ce_ib": 5.195621013641357, + "ce_orig": 0.7892569899559021, + "epoch": 0.3609173916169387, + "kl_loss": 0.3346374034881592, + "loss_ib": 0.008541994728147984, + "step": 1255 + }, + { + "ce_ib": 6.393796443939209, + "ce_orig": 0.8440943360328674, + "epoch": 0.3612049751959163, + "kl_loss": 0.2655456066131592, + "loss_ib": 0.009049252606928349, + "step": 1256 + }, + { + "ce_ib": 7.520873069763184, + "ce_orig": 0.6589466333389282, + "epoch": 0.3612049751959163, + "kl_loss": 0.2204410433769226, + "loss_ib": 0.009725282900035381, + "step": 1256 + }, + { + "ce_ib": 6.403946399688721, + "ce_orig": 0.9284466505050659, + "epoch": 0.3612049751959163, + "kl_loss": 0.25736451148986816, + "loss_ib": 0.008977591060101986, + "step": 1256 + }, + { + "ce_ib": 5.164172172546387, + "ce_orig": 0.3477190136909485, + "epoch": 0.3612049751959163, + "kl_loss": 0.31088292598724365, + "loss_ib": 0.008273000828921795, + "step": 1256 + }, + { + "ce_ib": 7.093279838562012, + "ce_orig": 0.9794586300849915, + "epoch": 0.36149255877489395, + "kl_loss": 0.25934290885925293, + "loss_ib": 0.009686708450317383, + "step": 1257 + }, + { + "ce_ib": 3.6077513694763184, + "ce_orig": 0.6717298626899719, + "epoch": 0.36149255877489395, + "kl_loss": 0.2020391970872879, + "loss_ib": 0.005628143437206745, + "step": 1257 + }, + { + "ce_ib": 8.314215660095215, + "ce_orig": 1.189759373664856, + "epoch": 0.36149255877489395, + "kl_loss": 0.5190852284431458, + "loss_ib": 0.013505067676305771, + "step": 1257 + }, + { + "ce_ib": 4.868413925170898, + "ce_orig": 0.6784672141075134, + "epoch": 0.36149255877489395, + "kl_loss": 0.4079880714416504, + "loss_ib": 0.008948295377194881, + "step": 1257 + }, + { + "ce_ib": 8.850499153137207, + "ce_orig": 0.9206279516220093, + "epoch": 0.36178014235387157, + "kl_loss": 0.3540341854095459, + "loss_ib": 0.01239084079861641, + "step": 1258 + }, + { + "ce_ib": 9.655441284179688, + "ce_orig": 1.3504509925842285, + "epoch": 0.36178014235387157, + "kl_loss": 0.19344475865364075, + "loss_ib": 0.011589889414608479, + "step": 1258 + }, + { + "ce_ib": 8.310129165649414, + "ce_orig": 0.5761513710021973, + "epoch": 0.36178014235387157, + "kl_loss": 0.3058510422706604, + "loss_ib": 0.011368638835847378, + "step": 1258 + }, + { + "ce_ib": 7.006587982177734, + "ce_orig": 0.531325101852417, + "epoch": 0.36178014235387157, + "kl_loss": 0.23296204209327698, + "loss_ib": 0.009336207993328571, + "step": 1258 + }, + { + "ce_ib": 6.28865909576416, + "ce_orig": 0.7898370623588562, + "epoch": 0.36206772593284925, + "kl_loss": 0.35747230052948, + "loss_ib": 0.009863382205367088, + "step": 1259 + }, + { + "ce_ib": 5.487530708312988, + "ce_orig": 0.6396716833114624, + "epoch": 0.36206772593284925, + "kl_loss": 0.30861911177635193, + "loss_ib": 0.008573721162974834, + "step": 1259 + }, + { + "ce_ib": 5.676125526428223, + "ce_orig": 0.6367034316062927, + "epoch": 0.36206772593284925, + "kl_loss": 0.2743126451969147, + "loss_ib": 0.008419252000749111, + "step": 1259 + }, + { + "ce_ib": 9.005084991455078, + "ce_orig": 0.9271017909049988, + "epoch": 0.36206772593284925, + "kl_loss": 0.4467215836048126, + "loss_ib": 0.0134723000228405, + "step": 1259 + }, + { + "epoch": 0.3623553095118269, + "grad_norm": 0.09680938720703125, + "learning_rate": 9.788626520837235e-06, + "loss": 0.8753, + "step": 1260 + }, + { + "ce_ib": 6.723548889160156, + "ce_orig": 1.007396936416626, + "epoch": 0.3623553095118269, + "kl_loss": 0.1902208775281906, + "loss_ib": 0.0086257578805089, + "step": 1260 + }, + { + "ce_ib": 8.341665267944336, + "ce_orig": 1.1784008741378784, + "epoch": 0.3623553095118269, + "kl_loss": 0.26580482721328735, + "loss_ib": 0.010999713093042374, + "step": 1260 + }, + { + "ce_ib": 7.171521186828613, + "ce_orig": 1.0155116319656372, + "epoch": 0.3623553095118269, + "kl_loss": 0.19991645216941833, + "loss_ib": 0.009170685894787312, + "step": 1260 + }, + { + "ce_ib": 8.256757736206055, + "ce_orig": 1.5229460000991821, + "epoch": 0.3623553095118269, + "kl_loss": 0.21548259258270264, + "loss_ib": 0.010411583818495274, + "step": 1260 + }, + { + "ce_ib": 8.004878044128418, + "ce_orig": 1.2474851608276367, + "epoch": 0.3626428930908045, + "kl_loss": 0.2474817931652069, + "loss_ib": 0.010479695163667202, + "step": 1261 + }, + { + "ce_ib": 10.200814247131348, + "ce_orig": 1.5161863565444946, + "epoch": 0.3626428930908045, + "kl_loss": 0.32217681407928467, + "loss_ib": 0.013422582298517227, + "step": 1261 + }, + { + "ce_ib": 4.585286617279053, + "ce_orig": 0.772280216217041, + "epoch": 0.3626428930908045, + "kl_loss": 0.2460094690322876, + "loss_ib": 0.007045380771160126, + "step": 1261 + }, + { + "ce_ib": 5.989019870758057, + "ce_orig": 0.5098203420639038, + "epoch": 0.3626428930908045, + "kl_loss": 0.3226553797721863, + "loss_ib": 0.009215573780238628, + "step": 1261 + }, + { + "ce_ib": 2.9106924533843994, + "ce_orig": 0.2865024209022522, + "epoch": 0.3629304766697822, + "kl_loss": 0.28513258695602417, + "loss_ib": 0.005762017797678709, + "step": 1262 + }, + { + "ce_ib": 8.726869583129883, + "ce_orig": 1.2086615562438965, + "epoch": 0.3629304766697822, + "kl_loss": 0.19402892887592316, + "loss_ib": 0.010667159222066402, + "step": 1262 + }, + { + "ce_ib": 7.850878715515137, + "ce_orig": 1.1507000923156738, + "epoch": 0.3629304766697822, + "kl_loss": 0.27822476625442505, + "loss_ib": 0.010633125901222229, + "step": 1262 + }, + { + "ce_ib": 7.459512233734131, + "ce_orig": 0.743778645992279, + "epoch": 0.3629304766697822, + "kl_loss": 0.22068756818771362, + "loss_ib": 0.009666387923061848, + "step": 1262 + }, + { + "ce_ib": 4.350217819213867, + "ce_orig": 0.6065347194671631, + "epoch": 0.3632180602487598, + "kl_loss": 0.31890130043029785, + "loss_ib": 0.007539230398833752, + "step": 1263 + }, + { + "ce_ib": 4.045588493347168, + "ce_orig": 0.4070097506046295, + "epoch": 0.3632180602487598, + "kl_loss": 0.2118133008480072, + "loss_ib": 0.006163721438497305, + "step": 1263 + }, + { + "ce_ib": 6.7642621994018555, + "ce_orig": 0.9856066703796387, + "epoch": 0.3632180602487598, + "kl_loss": 0.3220054507255554, + "loss_ib": 0.009984316304326057, + "step": 1263 + }, + { + "ce_ib": 7.201728343963623, + "ce_orig": 0.5664107203483582, + "epoch": 0.3632180602487598, + "kl_loss": 0.33470451831817627, + "loss_ib": 0.010548772290349007, + "step": 1263 + }, + { + "ce_ib": 6.063399314880371, + "ce_orig": 0.5952107906341553, + "epoch": 0.3635056438277374, + "kl_loss": 0.2663578391075134, + "loss_ib": 0.008726977743208408, + "step": 1264 + }, + { + "ce_ib": 6.796082973480225, + "ce_orig": 0.9985532164573669, + "epoch": 0.3635056438277374, + "kl_loss": 0.28935104608535767, + "loss_ib": 0.009689592756330967, + "step": 1264 + }, + { + "ce_ib": 8.167562484741211, + "ce_orig": 1.0221731662750244, + "epoch": 0.3635056438277374, + "kl_loss": 0.22773785889148712, + "loss_ib": 0.010444940999150276, + "step": 1264 + }, + { + "ce_ib": 6.192745208740234, + "ce_orig": 0.8716363310813904, + "epoch": 0.3635056438277374, + "kl_loss": 0.2293071448802948, + "loss_ib": 0.008485816419124603, + "step": 1264 + }, + { + "epoch": 0.3637932274067151, + "grad_norm": 0.12041808664798737, + "learning_rate": 9.786388078368473e-06, + "loss": 0.8926, + "step": 1265 + }, + { + "ce_ib": 6.316495895385742, + "ce_orig": 0.7256811857223511, + "epoch": 0.3637932274067151, + "kl_loss": 0.396675705909729, + "loss_ib": 0.010283253155648708, + "step": 1265 + }, + { + "ce_ib": 4.922732830047607, + "ce_orig": 0.7064297795295715, + "epoch": 0.3637932274067151, + "kl_loss": 0.20546512305736542, + "loss_ib": 0.006977383978664875, + "step": 1265 + }, + { + "ce_ib": 7.470864295959473, + "ce_orig": 1.3002017736434937, + "epoch": 0.3637932274067151, + "kl_loss": 0.2607371211051941, + "loss_ib": 0.010078235529363155, + "step": 1265 + }, + { + "ce_ib": 8.990388870239258, + "ce_orig": 0.8157200813293457, + "epoch": 0.3637932274067151, + "kl_loss": 0.21014195680618286, + "loss_ib": 0.011091808788478374, + "step": 1265 + }, + { + "ce_ib": 5.218315124511719, + "ce_orig": 0.6002530455589294, + "epoch": 0.3640808109856927, + "kl_loss": 0.21902649104595184, + "loss_ib": 0.007408579811453819, + "step": 1266 + }, + { + "ce_ib": 4.633239269256592, + "ce_orig": 0.842490017414093, + "epoch": 0.3640808109856927, + "kl_loss": 0.254517138004303, + "loss_ib": 0.007178409956395626, + "step": 1266 + }, + { + "ce_ib": 6.270453929901123, + "ce_orig": 0.9670078754425049, + "epoch": 0.3640808109856927, + "kl_loss": 0.26729723811149597, + "loss_ib": 0.008943426422774792, + "step": 1266 + }, + { + "ce_ib": 8.4492769241333, + "ce_orig": 1.4273326396942139, + "epoch": 0.3640808109856927, + "kl_loss": 0.28911885619163513, + "loss_ib": 0.011340465396642685, + "step": 1266 + }, + { + "ce_ib": 2.5559194087982178, + "ce_orig": 0.11846259981393814, + "epoch": 0.36436839456467035, + "kl_loss": 0.6923233866691589, + "loss_ib": 0.009479152970016003, + "step": 1267 + }, + { + "ce_ib": 6.5786519050598145, + "ce_orig": 0.8715175986289978, + "epoch": 0.36436839456467035, + "kl_loss": 0.3284551799297333, + "loss_ib": 0.009863203391432762, + "step": 1267 + }, + { + "ce_ib": 6.378236293792725, + "ce_orig": 0.7787953019142151, + "epoch": 0.36436839456467035, + "kl_loss": 0.2639128863811493, + "loss_ib": 0.00901736505329609, + "step": 1267 + }, + { + "ce_ib": 5.599494457244873, + "ce_orig": 0.6616715788841248, + "epoch": 0.36436839456467035, + "kl_loss": 0.27571022510528564, + "loss_ib": 0.00835659634321928, + "step": 1267 + }, + { + "ce_ib": 5.361080646514893, + "ce_orig": 0.7969940304756165, + "epoch": 0.364655978143648, + "kl_loss": 0.27387315034866333, + "loss_ib": 0.0080998120829463, + "step": 1268 + }, + { + "ce_ib": 7.742045879364014, + "ce_orig": 0.970597505569458, + "epoch": 0.364655978143648, + "kl_loss": 0.17689698934555054, + "loss_ib": 0.009511015377938747, + "step": 1268 + }, + { + "ce_ib": 8.752741813659668, + "ce_orig": 1.0461597442626953, + "epoch": 0.364655978143648, + "kl_loss": 0.19503554701805115, + "loss_ib": 0.010703097097575665, + "step": 1268 + }, + { + "ce_ib": 7.340433120727539, + "ce_orig": 1.0559906959533691, + "epoch": 0.364655978143648, + "kl_loss": 0.3039883077144623, + "loss_ib": 0.010380315594375134, + "step": 1268 + }, + { + "ce_ib": 5.940576553344727, + "ce_orig": 0.4752423167228699, + "epoch": 0.36494356172262565, + "kl_loss": 0.33972451090812683, + "loss_ib": 0.00933782197535038, + "step": 1269 + }, + { + "ce_ib": 5.702699184417725, + "ce_orig": 0.9983672499656677, + "epoch": 0.36494356172262565, + "kl_loss": 0.2671028971672058, + "loss_ib": 0.008373728021979332, + "step": 1269 + }, + { + "ce_ib": 5.869582653045654, + "ce_orig": 0.6255727410316467, + "epoch": 0.36494356172262565, + "kl_loss": 0.18517814576625824, + "loss_ib": 0.007721364498138428, + "step": 1269 + }, + { + "ce_ib": 5.308149337768555, + "ce_orig": 0.9392489194869995, + "epoch": 0.36494356172262565, + "kl_loss": 0.22410941123962402, + "loss_ib": 0.00754924351349473, + "step": 1269 + }, + { + "epoch": 0.3652311453016033, + "grad_norm": 0.10248145461082458, + "learning_rate": 9.784138104379886e-06, + "loss": 0.8412, + "step": 1270 + }, + { + "ce_ib": 5.1530938148498535, + "ce_orig": 0.7408868670463562, + "epoch": 0.3652311453016033, + "kl_loss": 0.2502548098564148, + "loss_ib": 0.007655641995370388, + "step": 1270 + }, + { + "ce_ib": 5.076098918914795, + "ce_orig": 0.7266834378242493, + "epoch": 0.3652311453016033, + "kl_loss": 0.2595973610877991, + "loss_ib": 0.007672072388231754, + "step": 1270 + }, + { + "ce_ib": 7.772645950317383, + "ce_orig": 1.095561146736145, + "epoch": 0.3652311453016033, + "kl_loss": 0.23874793946743011, + "loss_ib": 0.010160124860703945, + "step": 1270 + }, + { + "ce_ib": 6.981496334075928, + "ce_orig": 1.0339139699935913, + "epoch": 0.3652311453016033, + "kl_loss": 0.25664764642715454, + "loss_ib": 0.009547972120344639, + "step": 1270 + }, + { + "ce_ib": 5.776525497436523, + "ce_orig": 0.7597066760063171, + "epoch": 0.3655187288805809, + "kl_loss": 0.19280946254730225, + "loss_ib": 0.007704620249569416, + "step": 1271 + }, + { + "ce_ib": 5.559055328369141, + "ce_orig": 0.773419201374054, + "epoch": 0.3655187288805809, + "kl_loss": 0.2716330885887146, + "loss_ib": 0.008275385946035385, + "step": 1271 + }, + { + "ce_ib": 4.46776008605957, + "ce_orig": 0.7645898461341858, + "epoch": 0.3655187288805809, + "kl_loss": 0.22453013062477112, + "loss_ib": 0.00671306112781167, + "step": 1271 + }, + { + "ce_ib": 8.849346160888672, + "ce_orig": 1.4099323749542236, + "epoch": 0.3655187288805809, + "kl_loss": 0.22820809483528137, + "loss_ib": 0.011131427250802517, + "step": 1271 + }, + { + "ce_ib": 7.46262264251709, + "ce_orig": 1.324544906616211, + "epoch": 0.3658063124595586, + "kl_loss": 0.23189082741737366, + "loss_ib": 0.009781531058251858, + "step": 1272 + }, + { + "ce_ib": 2.2801783084869385, + "ce_orig": 0.22608597576618195, + "epoch": 0.3658063124595586, + "kl_loss": 0.5748566389083862, + "loss_ib": 0.008028744719922543, + "step": 1272 + }, + { + "ce_ib": 11.055466651916504, + "ce_orig": 1.7213143110275269, + "epoch": 0.3658063124595586, + "kl_loss": 0.5324690937995911, + "loss_ib": 0.016380157321691513, + "step": 1272 + }, + { + "ce_ib": 5.690445899963379, + "ce_orig": 0.7228091955184937, + "epoch": 0.3658063124595586, + "kl_loss": 0.25707000494003296, + "loss_ib": 0.008261146023869514, + "step": 1272 + }, + { + "ce_ib": 5.308260917663574, + "ce_orig": 0.5550944209098816, + "epoch": 0.3660938960385362, + "kl_loss": 0.35145512223243713, + "loss_ib": 0.008822811767458916, + "step": 1273 + }, + { + "ce_ib": 9.893881797790527, + "ce_orig": 1.526107668876648, + "epoch": 0.3660938960385362, + "kl_loss": 0.47689947485923767, + "loss_ib": 0.014662875793874264, + "step": 1273 + }, + { + "ce_ib": 10.002148628234863, + "ce_orig": 1.4347825050354004, + "epoch": 0.3660938960385362, + "kl_loss": 0.5732181072235107, + "loss_ib": 0.01573432981967926, + "step": 1273 + }, + { + "ce_ib": 6.262985706329346, + "ce_orig": 0.7272791862487793, + "epoch": 0.3660938960385362, + "kl_loss": 0.2030579149723053, + "loss_ib": 0.00829356536269188, + "step": 1273 + }, + { + "ce_ib": 5.783365726470947, + "ce_orig": 0.6207488775253296, + "epoch": 0.3663814796175138, + "kl_loss": 0.24680054187774658, + "loss_ib": 0.00825137086212635, + "step": 1274 + }, + { + "ce_ib": 7.845116138458252, + "ce_orig": 0.9590703248977661, + "epoch": 0.3663814796175138, + "kl_loss": 0.30576378107070923, + "loss_ib": 0.01090275403112173, + "step": 1274 + }, + { + "ce_ib": 6.0168986320495605, + "ce_orig": 0.873681902885437, + "epoch": 0.3663814796175138, + "kl_loss": 0.2680283188819885, + "loss_ib": 0.008697181940078735, + "step": 1274 + }, + { + "ce_ib": 4.450382232666016, + "ce_orig": 0.5019147992134094, + "epoch": 0.3663814796175138, + "kl_loss": 0.2422555387020111, + "loss_ib": 0.006872937548905611, + "step": 1274 + }, + { + "epoch": 0.3666690631964915, + "grad_norm": 0.11415659636259079, + "learning_rate": 9.781876604292181e-06, + "loss": 0.895, + "step": 1275 + }, + { + "ce_ib": 7.331485748291016, + "ce_orig": 0.668783962726593, + "epoch": 0.3666690631964915, + "kl_loss": 0.177715003490448, + "loss_ib": 0.00910863559693098, + "step": 1275 + }, + { + "ce_ib": 7.6367902755737305, + "ce_orig": 0.4947163462638855, + "epoch": 0.3666690631964915, + "kl_loss": 0.27151551842689514, + "loss_ib": 0.010351944714784622, + "step": 1275 + }, + { + "ce_ib": 6.944085121154785, + "ce_orig": 0.817987322807312, + "epoch": 0.3666690631964915, + "kl_loss": 0.4041800796985626, + "loss_ib": 0.010985885746777058, + "step": 1275 + }, + { + "ce_ib": 5.102936267852783, + "ce_orig": 0.6391552090644836, + "epoch": 0.3666690631964915, + "kl_loss": 0.26941439509391785, + "loss_ib": 0.0077970800921320915, + "step": 1275 + }, + { + "ce_ib": 5.671210765838623, + "ce_orig": 0.9727239608764648, + "epoch": 0.36695664677546913, + "kl_loss": 0.46421921253204346, + "loss_ib": 0.01031340379267931, + "step": 1276 + }, + { + "ce_ib": 7.265056610107422, + "ce_orig": 1.0186502933502197, + "epoch": 0.36695664677546913, + "kl_loss": 0.33712470531463623, + "loss_ib": 0.010636303573846817, + "step": 1276 + }, + { + "ce_ib": 5.534669876098633, + "ce_orig": 0.6169944405555725, + "epoch": 0.36695664677546913, + "kl_loss": 0.261410653591156, + "loss_ib": 0.00814877636730671, + "step": 1276 + }, + { + "ce_ib": 10.333765983581543, + "ce_orig": 1.208878993988037, + "epoch": 0.36695664677546913, + "kl_loss": 0.31233105063438416, + "loss_ib": 0.013457076624035835, + "step": 1276 + }, + { + "ce_ib": 8.364578247070312, + "ce_orig": 1.1486130952835083, + "epoch": 0.36724423035444675, + "kl_loss": 0.31856000423431396, + "loss_ib": 0.011550177820026875, + "step": 1277 + }, + { + "ce_ib": 8.633726119995117, + "ce_orig": 1.2692739963531494, + "epoch": 0.36724423035444675, + "kl_loss": 0.178573340177536, + "loss_ib": 0.010419459082186222, + "step": 1277 + }, + { + "ce_ib": 5.818536758422852, + "ce_orig": 0.6847742795944214, + "epoch": 0.36724423035444675, + "kl_loss": 0.2574927508831024, + "loss_ib": 0.008393463678658009, + "step": 1277 + }, + { + "ce_ib": 9.451970100402832, + "ce_orig": 0.8362522721290588, + "epoch": 0.36724423035444675, + "kl_loss": 0.19935394823551178, + "loss_ib": 0.01144551020115614, + "step": 1277 + }, + { + "ce_ib": 4.879723072052002, + "ce_orig": 0.9751784205436707, + "epoch": 0.3675318139334244, + "kl_loss": 0.2573961317539215, + "loss_ib": 0.0074536846950650215, + "step": 1278 + }, + { + "ce_ib": 6.389813423156738, + "ce_orig": 0.9395421743392944, + "epoch": 0.3675318139334244, + "kl_loss": 0.25022318959236145, + "loss_ib": 0.008892044425010681, + "step": 1278 + }, + { + "ce_ib": 4.8754119873046875, + "ce_orig": 1.0186686515808105, + "epoch": 0.3675318139334244, + "kl_loss": 0.20029765367507935, + "loss_ib": 0.0068783885799348354, + "step": 1278 + }, + { + "ce_ib": 7.321893215179443, + "ce_orig": 0.7457486391067505, + "epoch": 0.3675318139334244, + "kl_loss": 0.22448067367076874, + "loss_ib": 0.009566700085997581, + "step": 1278 + }, + { + "ce_ib": 4.064176559448242, + "ce_orig": 0.6539361476898193, + "epoch": 0.36781939751240206, + "kl_loss": 0.23849976062774658, + "loss_ib": 0.006449174135923386, + "step": 1279 + }, + { + "ce_ib": 6.140800476074219, + "ce_orig": 0.642346978187561, + "epoch": 0.36781939751240206, + "kl_loss": 0.26734820008277893, + "loss_ib": 0.008814281783998013, + "step": 1279 + }, + { + "ce_ib": 2.3328685760498047, + "ce_orig": 0.2597403824329376, + "epoch": 0.36781939751240206, + "kl_loss": 0.550055205821991, + "loss_ib": 0.007833420298993587, + "step": 1279 + }, + { + "ce_ib": 5.496246814727783, + "ce_orig": 0.7764479517936707, + "epoch": 0.36781939751240206, + "kl_loss": 0.2076108306646347, + "loss_ib": 0.007572355214506388, + "step": 1279 + }, + { + "epoch": 0.3681069810913797, + "grad_norm": 0.1022362932562828, + "learning_rate": 9.779603583553842e-06, + "loss": 0.8978, + "step": 1280 + }, + { + "ce_ib": 6.203456878662109, + "ce_orig": 0.8070749640464783, + "epoch": 0.3681069810913797, + "kl_loss": 0.3705917000770569, + "loss_ib": 0.009909373708069324, + "step": 1280 + }, + { + "ce_ib": 8.270031929016113, + "ce_orig": 0.9747037291526794, + "epoch": 0.3681069810913797, + "kl_loss": 0.21375861763954163, + "loss_ib": 0.010407618246972561, + "step": 1280 + }, + { + "ce_ib": 6.111821174621582, + "ce_orig": 0.8507468700408936, + "epoch": 0.3681069810913797, + "kl_loss": 0.22477471828460693, + "loss_ib": 0.008359568193554878, + "step": 1280 + }, + { + "ce_ib": 3.5844693183898926, + "ce_orig": 0.5020504593849182, + "epoch": 0.3681069810913797, + "kl_loss": 0.3244742751121521, + "loss_ib": 0.006829211488366127, + "step": 1280 + }, + { + "ce_ib": 9.442797660827637, + "ce_orig": 1.0303080081939697, + "epoch": 0.3683945646703573, + "kl_loss": 0.34166717529296875, + "loss_ib": 0.012859469279646873, + "step": 1281 + }, + { + "ce_ib": 5.348814487457275, + "ce_orig": 1.0583198070526123, + "epoch": 0.3683945646703573, + "kl_loss": 0.20181548595428467, + "loss_ib": 0.007366969250142574, + "step": 1281 + }, + { + "ce_ib": 5.487965106964111, + "ce_orig": 0.6619595289230347, + "epoch": 0.3683945646703573, + "kl_loss": 0.20301175117492676, + "loss_ib": 0.007518082857131958, + "step": 1281 + }, + { + "ce_ib": 3.802037239074707, + "ce_orig": 0.5519328117370605, + "epoch": 0.3683945646703573, + "kl_loss": 0.19785550236701965, + "loss_ib": 0.005780591629445553, + "step": 1281 + }, + { + "ce_ib": 9.927574157714844, + "ce_orig": 1.1654108762741089, + "epoch": 0.368682148249335, + "kl_loss": 0.34230512380599976, + "loss_ib": 0.013350624591112137, + "step": 1282 + }, + { + "ce_ib": 7.524363040924072, + "ce_orig": 0.9500890970230103, + "epoch": 0.368682148249335, + "kl_loss": 0.3446368873119354, + "loss_ib": 0.010970731265842915, + "step": 1282 + }, + { + "ce_ib": 6.607281684875488, + "ce_orig": 0.6834641098976135, + "epoch": 0.368682148249335, + "kl_loss": 0.3660809099674225, + "loss_ib": 0.010268090292811394, + "step": 1282 + }, + { + "ce_ib": 8.014334678649902, + "ce_orig": 1.0792757272720337, + "epoch": 0.368682148249335, + "kl_loss": 0.3143981099128723, + "loss_ib": 0.01115831546485424, + "step": 1282 + }, + { + "ce_ib": 7.707310199737549, + "ce_orig": 0.734540581703186, + "epoch": 0.3689697318283126, + "kl_loss": 0.28208112716674805, + "loss_ib": 0.010528121143579483, + "step": 1283 + }, + { + "ce_ib": 5.258242607116699, + "ce_orig": 0.5540490746498108, + "epoch": 0.3689697318283126, + "kl_loss": 0.1847594976425171, + "loss_ib": 0.007105837110430002, + "step": 1283 + }, + { + "ce_ib": 7.624849796295166, + "ce_orig": 1.0621042251586914, + "epoch": 0.3689697318283126, + "kl_loss": 0.1952236145734787, + "loss_ib": 0.009577086195349693, + "step": 1283 + }, + { + "ce_ib": 7.061639308929443, + "ce_orig": 0.7444918751716614, + "epoch": 0.3689697318283126, + "kl_loss": 0.34235984086990356, + "loss_ib": 0.010485237464308739, + "step": 1283 + }, + { + "ce_ib": 6.315250873565674, + "ce_orig": 0.7070626616477966, + "epoch": 0.36925731540729023, + "kl_loss": 0.25096261501312256, + "loss_ib": 0.008824876509606838, + "step": 1284 + }, + { + "ce_ib": 7.8378424644470215, + "ce_orig": 0.45956188440322876, + "epoch": 0.36925731540729023, + "kl_loss": 0.3414694368839264, + "loss_ib": 0.011252536438405514, + "step": 1284 + }, + { + "ce_ib": 4.959325790405273, + "ce_orig": 0.7961344718933105, + "epoch": 0.36925731540729023, + "kl_loss": 0.25667428970336914, + "loss_ib": 0.007526068482547998, + "step": 1284 + }, + { + "ce_ib": 8.150565147399902, + "ce_orig": 1.2826651334762573, + "epoch": 0.36925731540729023, + "kl_loss": 0.2368721067905426, + "loss_ib": 0.01051928661763668, + "step": 1284 + }, + { + "epoch": 0.3695448989862679, + "grad_norm": 0.11932408809661865, + "learning_rate": 9.777319047641098e-06, + "loss": 0.843, + "step": 1285 + }, + { + "ce_ib": 9.26733112335205, + "ce_orig": 1.25326669216156, + "epoch": 0.3695448989862679, + "kl_loss": 0.2810722887516022, + "loss_ib": 0.012078053317964077, + "step": 1285 + }, + { + "ce_ib": 8.349451065063477, + "ce_orig": 0.8618485927581787, + "epoch": 0.3695448989862679, + "kl_loss": 0.2214396446943283, + "loss_ib": 0.010563847608864307, + "step": 1285 + }, + { + "ce_ib": 8.556676864624023, + "ce_orig": 0.8297768235206604, + "epoch": 0.3695448989862679, + "kl_loss": 0.24484603106975555, + "loss_ib": 0.011005137115716934, + "step": 1285 + }, + { + "ce_ib": 4.535597324371338, + "ce_orig": 0.6433321833610535, + "epoch": 0.3695448989862679, + "kl_loss": 0.18611079454421997, + "loss_ib": 0.0063967048190534115, + "step": 1285 + }, + { + "ce_ib": 7.341830253601074, + "ce_orig": 0.9603883028030396, + "epoch": 0.36983248256524553, + "kl_loss": 0.332909494638443, + "loss_ib": 0.010670925490558147, + "step": 1286 + }, + { + "ce_ib": 4.135986328125, + "ce_orig": 0.6429925560951233, + "epoch": 0.36983248256524553, + "kl_loss": 0.17457111179828644, + "loss_ib": 0.005881697405129671, + "step": 1286 + }, + { + "ce_ib": 10.247614860534668, + "ce_orig": 1.8241069316864014, + "epoch": 0.36983248256524553, + "kl_loss": 0.3723381459712982, + "loss_ib": 0.013970997184515, + "step": 1286 + }, + { + "ce_ib": 9.693305969238281, + "ce_orig": 1.5605031251907349, + "epoch": 0.36983248256524553, + "kl_loss": 0.28591495752334595, + "loss_ib": 0.012552455067634583, + "step": 1286 + }, + { + "ce_ib": 3.6252291202545166, + "ce_orig": 0.4865838885307312, + "epoch": 0.37012006614422316, + "kl_loss": 0.28489428758621216, + "loss_ib": 0.00647417176514864, + "step": 1287 + }, + { + "ce_ib": 8.392481803894043, + "ce_orig": 0.6792038083076477, + "epoch": 0.37012006614422316, + "kl_loss": 0.28094637393951416, + "loss_ib": 0.011201945133507252, + "step": 1287 + }, + { + "ce_ib": 6.859921455383301, + "ce_orig": 1.0309630632400513, + "epoch": 0.37012006614422316, + "kl_loss": 0.21855288743972778, + "loss_ib": 0.009045450948178768, + "step": 1287 + }, + { + "ce_ib": 9.25197696685791, + "ce_orig": 1.3951451778411865, + "epoch": 0.37012006614422316, + "kl_loss": 0.2725180983543396, + "loss_ib": 0.011977157555520535, + "step": 1287 + }, + { + "ce_ib": 4.974238872528076, + "ce_orig": 0.700851559638977, + "epoch": 0.3704076497232008, + "kl_loss": 0.17885011434555054, + "loss_ib": 0.006762739736586809, + "step": 1288 + }, + { + "ce_ib": 6.060398101806641, + "ce_orig": 0.7376994490623474, + "epoch": 0.3704076497232008, + "kl_loss": 0.21035978198051453, + "loss_ib": 0.0081639951094985, + "step": 1288 + }, + { + "ce_ib": 6.525364875793457, + "ce_orig": 0.7431560754776001, + "epoch": 0.3704076497232008, + "kl_loss": 0.18042346835136414, + "loss_ib": 0.008329600095748901, + "step": 1288 + }, + { + "ce_ib": 9.062355041503906, + "ce_orig": 1.4950919151306152, + "epoch": 0.3704076497232008, + "kl_loss": 0.21809548139572144, + "loss_ib": 0.011243309825658798, + "step": 1288 + }, + { + "ce_ib": 5.605106830596924, + "ce_orig": 1.000119686126709, + "epoch": 0.37069523330217846, + "kl_loss": 0.2606685161590576, + "loss_ib": 0.008211791515350342, + "step": 1289 + }, + { + "ce_ib": 5.617210388183594, + "ce_orig": 0.8429021239280701, + "epoch": 0.37069523330217846, + "kl_loss": 0.23302999138832092, + "loss_ib": 0.007947510108351707, + "step": 1289 + }, + { + "ce_ib": 7.6939697265625, + "ce_orig": 0.448341965675354, + "epoch": 0.37069523330217846, + "kl_loss": 0.33551597595214844, + "loss_ib": 0.011049130000174046, + "step": 1289 + }, + { + "ce_ib": 6.936496257781982, + "ce_orig": 0.8369705677032471, + "epoch": 0.37069523330217846, + "kl_loss": 0.35632652044296265, + "loss_ib": 0.010499760508537292, + "step": 1289 + }, + { + "epoch": 0.3709828168811561, + "grad_norm": 0.11123020946979523, + "learning_rate": 9.775023002057931e-06, + "loss": 0.9009, + "step": 1290 + }, + { + "ce_ib": 7.711465358734131, + "ce_orig": 0.6811661124229431, + "epoch": 0.3709828168811561, + "kl_loss": 0.24951444566249847, + "loss_ib": 0.010206609964370728, + "step": 1290 + }, + { + "ce_ib": 6.423352241516113, + "ce_orig": 0.6590506434440613, + "epoch": 0.3709828168811561, + "kl_loss": 0.25252413749694824, + "loss_ib": 0.008948593400418758, + "step": 1290 + }, + { + "ce_ib": 7.123904228210449, + "ce_orig": 0.9283602237701416, + "epoch": 0.3709828168811561, + "kl_loss": 0.2745411992073059, + "loss_ib": 0.009869315661489964, + "step": 1290 + }, + { + "ce_ib": 5.736572265625, + "ce_orig": 0.8704516291618347, + "epoch": 0.3709828168811561, + "kl_loss": 0.23893356323242188, + "loss_ib": 0.008125907741487026, + "step": 1290 + }, + { + "ce_ib": 6.976745128631592, + "ce_orig": 0.6642255187034607, + "epoch": 0.3712704004601337, + "kl_loss": 0.24520739912986755, + "loss_ib": 0.009428819641470909, + "step": 1291 + }, + { + "ce_ib": 3.866387367248535, + "ce_orig": 0.6691374778747559, + "epoch": 0.3712704004601337, + "kl_loss": 0.19989712536334991, + "loss_ib": 0.0058653587475419044, + "step": 1291 + }, + { + "ce_ib": 4.301640510559082, + "ce_orig": 0.4932442307472229, + "epoch": 0.3712704004601337, + "kl_loss": 0.20259158313274384, + "loss_ib": 0.006327556446194649, + "step": 1291 + }, + { + "ce_ib": 6.155208587646484, + "ce_orig": 0.8413316607475281, + "epoch": 0.3712704004601337, + "kl_loss": 0.33612358570098877, + "loss_ib": 0.009516444988548756, + "step": 1291 + }, + { + "ce_ib": 8.439119338989258, + "ce_orig": 1.0063964128494263, + "epoch": 0.3715579840391114, + "kl_loss": 0.24863265454769135, + "loss_ib": 0.010925445705652237, + "step": 1292 + }, + { + "ce_ib": 3.834338426589966, + "ce_orig": 0.5589891076087952, + "epoch": 0.3715579840391114, + "kl_loss": 0.27248573303222656, + "loss_ib": 0.006559195462614298, + "step": 1292 + }, + { + "ce_ib": 3.839154005050659, + "ce_orig": 0.4755013883113861, + "epoch": 0.3715579840391114, + "kl_loss": 0.26118844747543335, + "loss_ib": 0.0064510381780564785, + "step": 1292 + }, + { + "ce_ib": 3.814335584640503, + "ce_orig": 0.6413306593894958, + "epoch": 0.3715579840391114, + "kl_loss": 0.259212464094162, + "loss_ib": 0.006406460423022509, + "step": 1292 + }, + { + "ce_ib": 4.642645359039307, + "ce_orig": 0.5980650186538696, + "epoch": 0.371845567618089, + "kl_loss": 0.28181472420692444, + "loss_ib": 0.007460792548954487, + "step": 1293 + }, + { + "ce_ib": 9.607026100158691, + "ce_orig": 1.2752386331558228, + "epoch": 0.371845567618089, + "kl_loss": 0.2727745473384857, + "loss_ib": 0.012334770523011684, + "step": 1293 + }, + { + "ce_ib": 5.609538555145264, + "ce_orig": 0.42599013447761536, + "epoch": 0.371845567618089, + "kl_loss": 0.3356173038482666, + "loss_ib": 0.008965711109340191, + "step": 1293 + }, + { + "ce_ib": 6.105560779571533, + "ce_orig": 0.8053005933761597, + "epoch": 0.371845567618089, + "kl_loss": 0.2141549289226532, + "loss_ib": 0.00824710913002491, + "step": 1293 + }, + { + "ce_ib": 7.115893363952637, + "ce_orig": 0.969586968421936, + "epoch": 0.37213315119706664, + "kl_loss": 0.2096409797668457, + "loss_ib": 0.009212302975356579, + "step": 1294 + }, + { + "ce_ib": 5.7646965980529785, + "ce_orig": 0.823491096496582, + "epoch": 0.37213315119706664, + "kl_loss": 0.2654654383659363, + "loss_ib": 0.00841935072094202, + "step": 1294 + }, + { + "ce_ib": 2.2926371097564697, + "ce_orig": 0.2627139687538147, + "epoch": 0.37213315119706664, + "kl_loss": 0.4336986839771271, + "loss_ib": 0.006629623472690582, + "step": 1294 + }, + { + "ce_ib": 5.9607462882995605, + "ce_orig": 0.6307532787322998, + "epoch": 0.37213315119706664, + "kl_loss": 0.26752978563308716, + "loss_ib": 0.008636044338345528, + "step": 1294 + }, + { + "epoch": 0.3724207347760443, + "grad_norm": 0.12341900169849396, + "learning_rate": 9.772715452336046e-06, + "loss": 0.8441, + "step": 1295 + }, + { + "ce_ib": 4.824267864227295, + "ce_orig": 0.700189471244812, + "epoch": 0.3724207347760443, + "kl_loss": 0.22174613177776337, + "loss_ib": 0.0070417290553450584, + "step": 1295 + }, + { + "ce_ib": 3.344231128692627, + "ce_orig": 0.5434899926185608, + "epoch": 0.3724207347760443, + "kl_loss": 0.21252413094043732, + "loss_ib": 0.005469472147524357, + "step": 1295 + }, + { + "ce_ib": 6.728499889373779, + "ce_orig": 0.7563159465789795, + "epoch": 0.3724207347760443, + "kl_loss": 0.461100310087204, + "loss_ib": 0.011339503340423107, + "step": 1295 + }, + { + "ce_ib": 6.366405010223389, + "ce_orig": 1.089074969291687, + "epoch": 0.3724207347760443, + "kl_loss": 0.2879982590675354, + "loss_ib": 0.009246387518942356, + "step": 1295 + }, + { + "ce_ib": 7.0506696701049805, + "ce_orig": 1.0485398769378662, + "epoch": 0.37270831835502194, + "kl_loss": 0.23068824410438538, + "loss_ib": 0.009357552044093609, + "step": 1296 + }, + { + "ce_ib": 5.35711145401001, + "ce_orig": 0.6842909455299377, + "epoch": 0.37270831835502194, + "kl_loss": 0.26941436529159546, + "loss_ib": 0.008051254786550999, + "step": 1296 + }, + { + "ce_ib": 4.525808811187744, + "ce_orig": 0.4813930094242096, + "epoch": 0.37270831835502194, + "kl_loss": 0.3485274314880371, + "loss_ib": 0.008011083118617535, + "step": 1296 + }, + { + "ce_ib": 4.62595272064209, + "ce_orig": 0.6876837015151978, + "epoch": 0.37270831835502194, + "kl_loss": 0.2051934003829956, + "loss_ib": 0.006677886471152306, + "step": 1296 + }, + { + "ce_ib": 8.571621894836426, + "ce_orig": 1.0995779037475586, + "epoch": 0.37299590193399956, + "kl_loss": 0.21237477660179138, + "loss_ib": 0.010695368982851505, + "step": 1297 + }, + { + "ce_ib": 4.471730709075928, + "ce_orig": 0.5538232922554016, + "epoch": 0.37299590193399956, + "kl_loss": 0.5044662952423096, + "loss_ib": 0.009516393765807152, + "step": 1297 + }, + { + "ce_ib": 4.533944606781006, + "ce_orig": 0.8222848176956177, + "epoch": 0.37299590193399956, + "kl_loss": 0.315934956073761, + "loss_ib": 0.00769329397007823, + "step": 1297 + }, + { + "ce_ib": 6.893505096435547, + "ce_orig": 0.8183647394180298, + "epoch": 0.37299590193399956, + "kl_loss": 0.2846068739891052, + "loss_ib": 0.009739574044942856, + "step": 1297 + }, + { + "ce_ib": 7.192381381988525, + "ce_orig": 1.0460643768310547, + "epoch": 0.3732834855129772, + "kl_loss": 0.25278496742248535, + "loss_ib": 0.009720231406390667, + "step": 1298 + }, + { + "ce_ib": 6.201778411865234, + "ce_orig": 0.9309136271476746, + "epoch": 0.3732834855129772, + "kl_loss": 0.27258479595184326, + "loss_ib": 0.00892762653529644, + "step": 1298 + }, + { + "ce_ib": 5.355123043060303, + "ce_orig": 0.8157143592834473, + "epoch": 0.3732834855129772, + "kl_loss": 0.28976643085479736, + "loss_ib": 0.00825278740376234, + "step": 1298 + }, + { + "ce_ib": 7.66465425491333, + "ce_orig": 0.924324095249176, + "epoch": 0.3732834855129772, + "kl_loss": 0.20117942988872528, + "loss_ib": 0.009676448069512844, + "step": 1298 + }, + { + "ce_ib": 10.385946273803711, + "ce_orig": 1.7267224788665771, + "epoch": 0.37357106909195487, + "kl_loss": 0.3095071315765381, + "loss_ib": 0.0134810172021389, + "step": 1299 + }, + { + "ce_ib": 8.435067176818848, + "ce_orig": 0.8238710761070251, + "epoch": 0.37357106909195487, + "kl_loss": 1.0476688146591187, + "loss_ib": 0.018911754712462425, + "step": 1299 + }, + { + "ce_ib": 5.311509132385254, + "ce_orig": 0.33952459692955017, + "epoch": 0.37357106909195487, + "kl_loss": 0.23365667462348938, + "loss_ib": 0.007648075465112925, + "step": 1299 + }, + { + "ce_ib": 7.611974716186523, + "ce_orig": 1.0087212324142456, + "epoch": 0.37357106909195487, + "kl_loss": 0.24070414900779724, + "loss_ib": 0.010019016452133656, + "step": 1299 + }, + { + "epoch": 0.3738586526709325, + "grad_norm": 0.12493283301591873, + "learning_rate": 9.770396404034863e-06, + "loss": 0.9006, + "step": 1300 + }, + { + "ce_ib": 8.976292610168457, + "ce_orig": 1.355560064315796, + "epoch": 0.3738586526709325, + "kl_loss": 0.2191731184720993, + "loss_ib": 0.011168022640049458, + "step": 1300 + }, + { + "ce_ib": 5.216058731079102, + "ce_orig": 0.5929533839225769, + "epoch": 0.3738586526709325, + "kl_loss": 0.29089826345443726, + "loss_ib": 0.008125041611492634, + "step": 1300 + }, + { + "ce_ib": 4.076107025146484, + "ce_orig": 0.48292118310928345, + "epoch": 0.3738586526709325, + "kl_loss": 0.1804906278848648, + "loss_ib": 0.005881013348698616, + "step": 1300 + }, + { + "ce_ib": 7.759974479675293, + "ce_orig": 0.8867191672325134, + "epoch": 0.3738586526709325, + "kl_loss": 0.31273460388183594, + "loss_ib": 0.010887320153415203, + "step": 1300 + }, + { + "ce_ib": 6.218832015991211, + "ce_orig": 0.7377117276191711, + "epoch": 0.3741462362499101, + "kl_loss": 0.39184969663619995, + "loss_ib": 0.010137328878045082, + "step": 1301 + }, + { + "ce_ib": 5.884152889251709, + "ce_orig": 0.5230953693389893, + "epoch": 0.3741462362499101, + "kl_loss": 0.29413941502571106, + "loss_ib": 0.008825547061860561, + "step": 1301 + }, + { + "ce_ib": 6.26108455657959, + "ce_orig": 1.0266234874725342, + "epoch": 0.3741462362499101, + "kl_loss": 0.35874414443969727, + "loss_ib": 0.009848525747656822, + "step": 1301 + }, + { + "ce_ib": 8.435012817382812, + "ce_orig": 1.1848310232162476, + "epoch": 0.3741462362499101, + "kl_loss": 0.2637442946434021, + "loss_ib": 0.01107245497405529, + "step": 1301 + }, + { + "ce_ib": 2.2409121990203857, + "ce_orig": 0.21592168509960175, + "epoch": 0.3744338198288878, + "kl_loss": 0.5623815059661865, + "loss_ib": 0.007864727638661861, + "step": 1302 + }, + { + "ce_ib": 4.696742057800293, + "ce_orig": 0.6992368698120117, + "epoch": 0.3744338198288878, + "kl_loss": 0.17069947719573975, + "loss_ib": 0.006403736770153046, + "step": 1302 + }, + { + "ce_ib": 6.172117233276367, + "ce_orig": 0.794901430606842, + "epoch": 0.3744338198288878, + "kl_loss": 0.26270967721939087, + "loss_ib": 0.008799213916063309, + "step": 1302 + }, + { + "ce_ib": 7.035194396972656, + "ce_orig": 0.9057155847549438, + "epoch": 0.3744338198288878, + "kl_loss": 0.36150482296943665, + "loss_ib": 0.010650242678821087, + "step": 1302 + }, + { + "ce_ib": 5.013677597045898, + "ce_orig": 0.5755417346954346, + "epoch": 0.3747214034078654, + "kl_loss": 0.30179062485694885, + "loss_ib": 0.00803158339112997, + "step": 1303 + }, + { + "ce_ib": 4.116542339324951, + "ce_orig": 0.7811974287033081, + "epoch": 0.3747214034078654, + "kl_loss": 0.2888774573802948, + "loss_ib": 0.007005317136645317, + "step": 1303 + }, + { + "ce_ib": 4.250243186950684, + "ce_orig": 0.7281661033630371, + "epoch": 0.3747214034078654, + "kl_loss": 0.17032361030578613, + "loss_ib": 0.005953479092568159, + "step": 1303 + }, + { + "ce_ib": 9.735255241394043, + "ce_orig": 1.2109895944595337, + "epoch": 0.3747214034078654, + "kl_loss": 0.1804056465625763, + "loss_ib": 0.011539311148226261, + "step": 1303 + }, + { + "ce_ib": 4.496049880981445, + "ce_orig": 0.4316735863685608, + "epoch": 0.37500898698684304, + "kl_loss": 0.22079500555992126, + "loss_ib": 0.006704000290483236, + "step": 1304 + }, + { + "ce_ib": 9.108988761901855, + "ce_orig": 1.4747859239578247, + "epoch": 0.37500898698684304, + "kl_loss": 0.2784188985824585, + "loss_ib": 0.011893176473677158, + "step": 1304 + }, + { + "ce_ib": 9.340219497680664, + "ce_orig": 1.2900398969650269, + "epoch": 0.37500898698684304, + "kl_loss": 0.2142384946346283, + "loss_ib": 0.011482604779303074, + "step": 1304 + }, + { + "ce_ib": 7.92219877243042, + "ce_orig": 1.0115225315093994, + "epoch": 0.37500898698684304, + "kl_loss": 0.31117451190948486, + "loss_ib": 0.011033943854272366, + "step": 1304 + }, + { + "epoch": 0.37529657056582066, + "grad_norm": 0.09640936553478241, + "learning_rate": 9.768065862741512e-06, + "loss": 0.8762, + "step": 1305 + }, + { + "ce_ib": 8.480440139770508, + "ce_orig": 1.1000053882598877, + "epoch": 0.37529657056582066, + "kl_loss": 0.24573183059692383, + "loss_ib": 0.010937758721411228, + "step": 1305 + }, + { + "ce_ib": 6.316285610198975, + "ce_orig": 0.744388222694397, + "epoch": 0.37529657056582066, + "kl_loss": 0.2152835577726364, + "loss_ib": 0.008469121530652046, + "step": 1305 + }, + { + "ce_ib": 3.8323137760162354, + "ce_orig": 0.9252414703369141, + "epoch": 0.37529657056582066, + "kl_loss": 0.15203577280044556, + "loss_ib": 0.005352671258151531, + "step": 1305 + }, + { + "ce_ib": 7.657679557800293, + "ce_orig": 1.0006682872772217, + "epoch": 0.37529657056582066, + "kl_loss": 0.20859548449516296, + "loss_ib": 0.00974363461136818, + "step": 1305 + }, + { + "ce_ib": 9.317044258117676, + "ce_orig": 1.1992231607437134, + "epoch": 0.37558415414479834, + "kl_loss": 0.2548269033432007, + "loss_ib": 0.011865313164889812, + "step": 1306 + }, + { + "ce_ib": 7.432621955871582, + "ce_orig": 0.8354329466819763, + "epoch": 0.37558415414479834, + "kl_loss": 0.3074944317340851, + "loss_ib": 0.01050756685435772, + "step": 1306 + }, + { + "ce_ib": 8.270771026611328, + "ce_orig": 1.0792176723480225, + "epoch": 0.37558415414479834, + "kl_loss": 0.2315516471862793, + "loss_ib": 0.010586287826299667, + "step": 1306 + }, + { + "ce_ib": 9.993446350097656, + "ce_orig": 1.468353509902954, + "epoch": 0.37558415414479834, + "kl_loss": 0.3438698649406433, + "loss_ib": 0.013432145118713379, + "step": 1306 + }, + { + "ce_ib": 9.865982055664062, + "ce_orig": 1.856319785118103, + "epoch": 0.37587173772377597, + "kl_loss": 0.22962693870067596, + "loss_ib": 0.012162251397967339, + "step": 1307 + }, + { + "ce_ib": 4.654817581176758, + "ce_orig": 0.596437931060791, + "epoch": 0.37587173772377597, + "kl_loss": 0.4065954089164734, + "loss_ib": 0.00872077140957117, + "step": 1307 + }, + { + "ce_ib": 11.853404998779297, + "ce_orig": 1.680164098739624, + "epoch": 0.37587173772377597, + "kl_loss": 0.16628415882587433, + "loss_ib": 0.013516247272491455, + "step": 1307 + }, + { + "ce_ib": 3.770814895629883, + "ce_orig": 0.6643754243850708, + "epoch": 0.37587173772377597, + "kl_loss": 0.2119031697511673, + "loss_ib": 0.0058898464776575565, + "step": 1307 + }, + { + "ce_ib": 7.13842248916626, + "ce_orig": 0.6096088886260986, + "epoch": 0.3761593213027536, + "kl_loss": 0.24172694981098175, + "loss_ib": 0.009555691853165627, + "step": 1308 + }, + { + "ce_ib": 7.158660411834717, + "ce_orig": 0.7481746673583984, + "epoch": 0.3761593213027536, + "kl_loss": 0.19245290756225586, + "loss_ib": 0.009083189070224762, + "step": 1308 + }, + { + "ce_ib": 4.829501152038574, + "ce_orig": 0.6256715655326843, + "epoch": 0.3761593213027536, + "kl_loss": 0.23448200523853302, + "loss_ib": 0.007174321450293064, + "step": 1308 + }, + { + "ce_ib": 8.600601196289062, + "ce_orig": 1.2861305475234985, + "epoch": 0.3761593213027536, + "kl_loss": 0.24668508768081665, + "loss_ib": 0.011067451909184456, + "step": 1308 + }, + { + "ce_ib": 8.377589225769043, + "ce_orig": 1.4738719463348389, + "epoch": 0.37644690488173127, + "kl_loss": 0.2912333309650421, + "loss_ib": 0.011289922520518303, + "step": 1309 + }, + { + "ce_ib": 5.8922119140625, + "ce_orig": 0.9516330361366272, + "epoch": 0.37644690488173127, + "kl_loss": 0.21654640138149261, + "loss_ib": 0.008057675324380398, + "step": 1309 + }, + { + "ce_ib": 6.052369594573975, + "ce_orig": 1.0148662328720093, + "epoch": 0.37644690488173127, + "kl_loss": 0.1896468997001648, + "loss_ib": 0.007948838174343109, + "step": 1309 + }, + { + "ce_ib": 6.462357044219971, + "ce_orig": 0.9467957615852356, + "epoch": 0.37644690488173127, + "kl_loss": 0.2643457353115082, + "loss_ib": 0.00910581462085247, + "step": 1309 + }, + { + "epoch": 0.3767344884607089, + "grad_norm": 0.11843874305486679, + "learning_rate": 9.765723834070805e-06, + "loss": 0.8566, + "step": 1310 + }, + { + "ce_ib": 10.225154876708984, + "ce_orig": 1.4613780975341797, + "epoch": 0.3767344884607089, + "kl_loss": 0.41715824604034424, + "loss_ib": 0.014396737329661846, + "step": 1310 + }, + { + "ce_ib": 11.08290958404541, + "ce_orig": 1.4982622861862183, + "epoch": 0.3767344884607089, + "kl_loss": 0.2907963991165161, + "loss_ib": 0.013990874402225018, + "step": 1310 + }, + { + "ce_ib": 5.848134994506836, + "ce_orig": 1.044801950454712, + "epoch": 0.3767344884607089, + "kl_loss": 0.26418235898017883, + "loss_ib": 0.008489958941936493, + "step": 1310 + }, + { + "ce_ib": 7.654242515563965, + "ce_orig": 0.9891024231910706, + "epoch": 0.3767344884607089, + "kl_loss": 0.3541780710220337, + "loss_ib": 0.011196022853255272, + "step": 1310 + }, + { + "ce_ib": 6.14833927154541, + "ce_orig": 0.8169759511947632, + "epoch": 0.3770220720396865, + "kl_loss": 0.16873528063297272, + "loss_ib": 0.007835691794753075, + "step": 1311 + }, + { + "ce_ib": 6.844672203063965, + "ce_orig": 0.7956097722053528, + "epoch": 0.3770220720396865, + "kl_loss": 0.22771617770195007, + "loss_ib": 0.009121834300458431, + "step": 1311 + }, + { + "ce_ib": 5.502779006958008, + "ce_orig": 0.37446027994155884, + "epoch": 0.3770220720396865, + "kl_loss": 0.35141971707344055, + "loss_ib": 0.0090169757604599, + "step": 1311 + }, + { + "ce_ib": 5.839422702789307, + "ce_orig": 0.5762996673583984, + "epoch": 0.3770220720396865, + "kl_loss": 0.23966707289218903, + "loss_ib": 0.008236093446612358, + "step": 1311 + }, + { + "ce_ib": 6.2157158851623535, + "ce_orig": 0.4993678331375122, + "epoch": 0.3773096556186642, + "kl_loss": 0.2873460054397583, + "loss_ib": 0.00908917561173439, + "step": 1312 + }, + { + "ce_ib": 6.513637065887451, + "ce_orig": 0.633793830871582, + "epoch": 0.3773096556186642, + "kl_loss": 0.28910765051841736, + "loss_ib": 0.009404714219272137, + "step": 1312 + }, + { + "ce_ib": 5.872572422027588, + "ce_orig": 0.7542458772659302, + "epoch": 0.3773096556186642, + "kl_loss": 0.1863556206226349, + "loss_ib": 0.00773612828925252, + "step": 1312 + }, + { + "ce_ib": 4.847990036010742, + "ce_orig": 0.5747877359390259, + "epoch": 0.3773096556186642, + "kl_loss": 0.19151942431926727, + "loss_ib": 0.006763183977454901, + "step": 1312 + }, + { + "ce_ib": 4.407119274139404, + "ce_orig": 0.8114118576049805, + "epoch": 0.3775972391976418, + "kl_loss": 0.19292503595352173, + "loss_ib": 0.006336369551718235, + "step": 1313 + }, + { + "ce_ib": 8.981295585632324, + "ce_orig": 1.2180886268615723, + "epoch": 0.3775972391976418, + "kl_loss": 0.2586621344089508, + "loss_ib": 0.011567916721105576, + "step": 1313 + }, + { + "ce_ib": 7.540792942047119, + "ce_orig": 1.174260139465332, + "epoch": 0.3775972391976418, + "kl_loss": 0.25622671842575073, + "loss_ib": 0.010103059932589531, + "step": 1313 + }, + { + "ce_ib": 6.744704246520996, + "ce_orig": 0.8999756574630737, + "epoch": 0.3775972391976418, + "kl_loss": 0.25654101371765137, + "loss_ib": 0.009310114197432995, + "step": 1313 + }, + { + "ce_ib": 5.615959167480469, + "ce_orig": 0.8794683814048767, + "epoch": 0.37788482277661944, + "kl_loss": 0.26790547370910645, + "loss_ib": 0.008295013569295406, + "step": 1314 + }, + { + "ce_ib": 4.905920028686523, + "ce_orig": 0.7111831307411194, + "epoch": 0.37788482277661944, + "kl_loss": 0.21155670285224915, + "loss_ib": 0.007021486759185791, + "step": 1314 + }, + { + "ce_ib": 5.637347221374512, + "ce_orig": 0.5438764095306396, + "epoch": 0.37788482277661944, + "kl_loss": 0.2609032094478607, + "loss_ib": 0.008246378973126411, + "step": 1314 + }, + { + "ce_ib": 7.9561448097229, + "ce_orig": 1.2378239631652832, + "epoch": 0.37788482277661944, + "kl_loss": 0.25759926438331604, + "loss_ib": 0.010532137006521225, + "step": 1314 + }, + { + "epoch": 0.37817240635559707, + "grad_norm": 0.10208520293235779, + "learning_rate": 9.763370323665233e-06, + "loss": 0.87, + "step": 1315 + }, + { + "ce_ib": 3.863255739212036, + "ce_orig": 0.4220186173915863, + "epoch": 0.37817240635559707, + "kl_loss": 0.2792007029056549, + "loss_ib": 0.006655262783169746, + "step": 1315 + }, + { + "ce_ib": 8.859979629516602, + "ce_orig": 1.0536916255950928, + "epoch": 0.37817240635559707, + "kl_loss": 0.5747706294059753, + "loss_ib": 0.01460768561810255, + "step": 1315 + }, + { + "ce_ib": 6.9834771156311035, + "ce_orig": 1.0781289339065552, + "epoch": 0.37817240635559707, + "kl_loss": 0.28461551666259766, + "loss_ib": 0.009829632006585598, + "step": 1315 + }, + { + "ce_ib": 5.6756134033203125, + "ce_orig": 0.9484390020370483, + "epoch": 0.37817240635559707, + "kl_loss": 0.20451746881008148, + "loss_ib": 0.007720788009464741, + "step": 1315 + }, + { + "ce_ib": 6.136423110961914, + "ce_orig": 0.7416388988494873, + "epoch": 0.37845998993457475, + "kl_loss": 0.16922442615032196, + "loss_ib": 0.007828667759895325, + "step": 1316 + }, + { + "ce_ib": 3.612623453140259, + "ce_orig": 0.6604862809181213, + "epoch": 0.37845998993457475, + "kl_loss": 0.21814820170402527, + "loss_ib": 0.005794105585664511, + "step": 1316 + }, + { + "ce_ib": 6.457236289978027, + "ce_orig": 0.6525535583496094, + "epoch": 0.37845998993457475, + "kl_loss": 0.3399786353111267, + "loss_ib": 0.00985702220350504, + "step": 1316 + }, + { + "ce_ib": 4.825037002563477, + "ce_orig": 0.5446357727050781, + "epoch": 0.37845998993457475, + "kl_loss": 0.2203066349029541, + "loss_ib": 0.007028103340417147, + "step": 1316 + }, + { + "ce_ib": 5.3223772048950195, + "ce_orig": 0.48080676794052124, + "epoch": 0.37874757351355237, + "kl_loss": 0.48036542534828186, + "loss_ib": 0.010126031003892422, + "step": 1317 + }, + { + "ce_ib": 6.367911338806152, + "ce_orig": 0.6589131951332092, + "epoch": 0.37874757351355237, + "kl_loss": 0.31526780128479004, + "loss_ib": 0.009520589374005795, + "step": 1317 + }, + { + "ce_ib": 7.104604721069336, + "ce_orig": 0.8577982187271118, + "epoch": 0.37874757351355237, + "kl_loss": 0.25986504554748535, + "loss_ib": 0.009703255258500576, + "step": 1317 + }, + { + "ce_ib": 5.245115756988525, + "ce_orig": 0.6983909010887146, + "epoch": 0.37874757351355237, + "kl_loss": 0.3243550658226013, + "loss_ib": 0.008488666266202927, + "step": 1317 + }, + { + "ce_ib": 4.573901176452637, + "ce_orig": 0.602114200592041, + "epoch": 0.37903515709253, + "kl_loss": 0.17613765597343445, + "loss_ib": 0.006335277575999498, + "step": 1318 + }, + { + "ce_ib": 10.542227745056152, + "ce_orig": 1.4979974031448364, + "epoch": 0.37903515709253, + "kl_loss": 0.4047009348869324, + "loss_ib": 0.014589237980544567, + "step": 1318 + }, + { + "ce_ib": 7.663567543029785, + "ce_orig": 1.2707366943359375, + "epoch": 0.37903515709253, + "kl_loss": 0.3883098363876343, + "loss_ib": 0.011546666733920574, + "step": 1318 + }, + { + "ce_ib": 7.179001808166504, + "ce_orig": 1.0249370336532593, + "epoch": 0.37903515709253, + "kl_loss": 0.22374102473258972, + "loss_ib": 0.009416411630809307, + "step": 1318 + }, + { + "ce_ib": 5.827734470367432, + "ce_orig": 0.6614716649055481, + "epoch": 0.3793227406715077, + "kl_loss": 0.23256278038024902, + "loss_ib": 0.008153362199664116, + "step": 1319 + }, + { + "ce_ib": 5.04184627532959, + "ce_orig": 0.8984420299530029, + "epoch": 0.3793227406715077, + "kl_loss": 0.20100192725658417, + "loss_ib": 0.007051866035908461, + "step": 1319 + }, + { + "ce_ib": 3.4561145305633545, + "ce_orig": 0.6697774529457092, + "epoch": 0.3793227406715077, + "kl_loss": 0.19955700635910034, + "loss_ib": 0.0054516843520104885, + "step": 1319 + }, + { + "ce_ib": 8.484859466552734, + "ce_orig": 1.0996067523956299, + "epoch": 0.3793227406715077, + "kl_loss": 0.211076021194458, + "loss_ib": 0.010595619678497314, + "step": 1319 + }, + { + "epoch": 0.3796103242504853, + "grad_norm": 0.12847267091274261, + "learning_rate": 9.76100533719495e-06, + "loss": 0.8867, + "step": 1320 + }, + { + "ce_ib": 3.297279119491577, + "ce_orig": 0.6684339642524719, + "epoch": 0.3796103242504853, + "kl_loss": 0.20436504483222961, + "loss_ib": 0.005340929608792067, + "step": 1320 + }, + { + "ce_ib": 5.393133163452148, + "ce_orig": 0.7639580368995667, + "epoch": 0.3796103242504853, + "kl_loss": 0.3076980710029602, + "loss_ib": 0.008470113389194012, + "step": 1320 + }, + { + "ce_ib": 5.216917037963867, + "ce_orig": 0.8593165278434753, + "epoch": 0.3796103242504853, + "kl_loss": 0.2214372307062149, + "loss_ib": 0.007431289181113243, + "step": 1320 + }, + { + "ce_ib": 4.598435401916504, + "ce_orig": 0.5633476376533508, + "epoch": 0.3796103242504853, + "kl_loss": 0.20765292644500732, + "loss_ib": 0.0066749644465744495, + "step": 1320 + }, + { + "ce_ib": 5.472358226776123, + "ce_orig": 0.5398709774017334, + "epoch": 0.3798979078294629, + "kl_loss": 0.2486787736415863, + "loss_ib": 0.007959146052598953, + "step": 1321 + }, + { + "ce_ib": 7.950688362121582, + "ce_orig": 1.1804054975509644, + "epoch": 0.3798979078294629, + "kl_loss": 0.2736315429210663, + "loss_ib": 0.010687003843486309, + "step": 1321 + }, + { + "ce_ib": 6.406330585479736, + "ce_orig": 1.0859549045562744, + "epoch": 0.3798979078294629, + "kl_loss": 0.166508287191391, + "loss_ib": 0.00807141326367855, + "step": 1321 + }, + { + "ce_ib": 4.812961578369141, + "ce_orig": 0.7601636648178101, + "epoch": 0.3798979078294629, + "kl_loss": 0.30231884121894836, + "loss_ib": 0.007836150005459785, + "step": 1321 + }, + { + "ce_ib": 5.525980472564697, + "ce_orig": 0.8041296601295471, + "epoch": 0.3801854914084406, + "kl_loss": 0.2853173613548279, + "loss_ib": 0.008379153907299042, + "step": 1322 + }, + { + "ce_ib": 4.20200252532959, + "ce_orig": 0.8979824185371399, + "epoch": 0.3801854914084406, + "kl_loss": 0.4491305947303772, + "loss_ib": 0.00869330856949091, + "step": 1322 + }, + { + "ce_ib": 4.392397880554199, + "ce_orig": 0.6132926940917969, + "epoch": 0.3801854914084406, + "kl_loss": 0.15145298838615417, + "loss_ib": 0.005906927399337292, + "step": 1322 + }, + { + "ce_ib": 7.698770999908447, + "ce_orig": 1.1069796085357666, + "epoch": 0.3801854914084406, + "kl_loss": 0.26237034797668457, + "loss_ib": 0.010322474874556065, + "step": 1322 + }, + { + "ce_ib": 2.342242956161499, + "ce_orig": 0.16955788433551788, + "epoch": 0.3804730749874182, + "kl_loss": 0.5944281816482544, + "loss_ib": 0.008286524564027786, + "step": 1323 + }, + { + "ce_ib": 8.634743690490723, + "ce_orig": 1.3468730449676514, + "epoch": 0.3804730749874182, + "kl_loss": 0.2927013337612152, + "loss_ib": 0.011561756953597069, + "step": 1323 + }, + { + "ce_ib": 8.237252235412598, + "ce_orig": 0.9930875301361084, + "epoch": 0.3804730749874182, + "kl_loss": 0.24464577436447144, + "loss_ib": 0.010683710686862469, + "step": 1323 + }, + { + "ce_ib": 4.277406215667725, + "ce_orig": 0.6701924204826355, + "epoch": 0.3804730749874182, + "kl_loss": 0.24595500528812408, + "loss_ib": 0.00673695607110858, + "step": 1323 + }, + { + "ce_ib": 8.226551055908203, + "ce_orig": 1.2383060455322266, + "epoch": 0.38076065856639585, + "kl_loss": 0.19104276597499847, + "loss_ib": 0.010136978700757027, + "step": 1324 + }, + { + "ce_ib": 5.543074607849121, + "ce_orig": 0.6039655208587646, + "epoch": 0.38076065856639585, + "kl_loss": 0.24177339673042297, + "loss_ib": 0.007960808463394642, + "step": 1324 + }, + { + "ce_ib": 4.560553073883057, + "ce_orig": 0.719609260559082, + "epoch": 0.38076065856639585, + "kl_loss": 0.2100091278553009, + "loss_ib": 0.006660644430667162, + "step": 1324 + }, + { + "ce_ib": 5.934459209442139, + "ce_orig": 0.7645275592803955, + "epoch": 0.38076065856639585, + "kl_loss": 0.22334322333335876, + "loss_ib": 0.008167891763150692, + "step": 1324 + }, + { + "epoch": 0.3810482421453735, + "grad_norm": 0.11815589666366577, + "learning_rate": 9.75862888035776e-06, + "loss": 0.8853, + "step": 1325 + }, + { + "ce_ib": 7.395681381225586, + "ce_orig": 1.428235411643982, + "epoch": 0.3810482421453735, + "kl_loss": 0.2361186295747757, + "loss_ib": 0.00975686777383089, + "step": 1325 + }, + { + "ce_ib": 8.123614311218262, + "ce_orig": 0.9370039701461792, + "epoch": 0.3810482421453735, + "kl_loss": 0.27744072675704956, + "loss_ib": 0.010898021049797535, + "step": 1325 + }, + { + "ce_ib": 6.170518398284912, + "ce_orig": 0.7545908689498901, + "epoch": 0.3810482421453735, + "kl_loss": 0.26478803157806396, + "loss_ib": 0.008818398229777813, + "step": 1325 + }, + { + "ce_ib": 6.98801851272583, + "ce_orig": 0.889594554901123, + "epoch": 0.3810482421453735, + "kl_loss": 0.25020599365234375, + "loss_ib": 0.009490078315138817, + "step": 1325 + }, + { + "ce_ib": 8.423517227172852, + "ce_orig": 0.5639511346817017, + "epoch": 0.38133582572435115, + "kl_loss": 0.3541921377182007, + "loss_ib": 0.011965438723564148, + "step": 1326 + }, + { + "ce_ib": 6.9096479415893555, + "ce_orig": 0.8797593712806702, + "epoch": 0.38133582572435115, + "kl_loss": 0.2511420249938965, + "loss_ib": 0.009421068243682384, + "step": 1326 + }, + { + "ce_ib": 4.40999698638916, + "ce_orig": 0.64253830909729, + "epoch": 0.38133582572435115, + "kl_loss": 0.2337566614151001, + "loss_ib": 0.00674756383523345, + "step": 1326 + }, + { + "ce_ib": 6.412195205688477, + "ce_orig": 0.7317036390304565, + "epoch": 0.38133582572435115, + "kl_loss": 0.40119338035583496, + "loss_ib": 0.010424129664897919, + "step": 1326 + }, + { + "ce_ib": 8.463457107543945, + "ce_orig": 1.4905399084091187, + "epoch": 0.3816234093033288, + "kl_loss": 0.27826082706451416, + "loss_ib": 0.011246065609157085, + "step": 1327 + }, + { + "ce_ib": 4.925488471984863, + "ce_orig": 0.7923012375831604, + "epoch": 0.3816234093033288, + "kl_loss": 0.24017053842544556, + "loss_ib": 0.007327193859964609, + "step": 1327 + }, + { + "ce_ib": 4.493514537811279, + "ce_orig": 0.6073864698410034, + "epoch": 0.3816234093033288, + "kl_loss": 0.13801854848861694, + "loss_ib": 0.005873700138181448, + "step": 1327 + }, + { + "ce_ib": 12.996380805969238, + "ce_orig": 2.183483123779297, + "epoch": 0.3816234093033288, + "kl_loss": 0.3698446750640869, + "loss_ib": 0.016694827005267143, + "step": 1327 + }, + { + "ce_ib": 5.160539627075195, + "ce_orig": 0.37488970160484314, + "epoch": 0.3819109928823064, + "kl_loss": 0.326717734336853, + "loss_ib": 0.008427716791629791, + "step": 1328 + }, + { + "ce_ib": 5.422794342041016, + "ce_orig": 0.7671301960945129, + "epoch": 0.3819109928823064, + "kl_loss": 0.2245626300573349, + "loss_ib": 0.007668420672416687, + "step": 1328 + }, + { + "ce_ib": 6.127049922943115, + "ce_orig": 0.7963330745697021, + "epoch": 0.3819109928823064, + "kl_loss": 0.36005595326423645, + "loss_ib": 0.00972760934382677, + "step": 1328 + }, + { + "ce_ib": 6.172006130218506, + "ce_orig": 0.7193319797515869, + "epoch": 0.3819109928823064, + "kl_loss": 0.4020848274230957, + "loss_ib": 0.010192854329943657, + "step": 1328 + }, + { + "ce_ib": 6.396267414093018, + "ce_orig": 0.5827741622924805, + "epoch": 0.3821985764612841, + "kl_loss": 0.25311774015426636, + "loss_ib": 0.00892744492739439, + "step": 1329 + }, + { + "ce_ib": 5.07418155670166, + "ce_orig": 0.4467667043209076, + "epoch": 0.3821985764612841, + "kl_loss": 0.2523980736732483, + "loss_ib": 0.007598162163048983, + "step": 1329 + }, + { + "ce_ib": 7.253851413726807, + "ce_orig": 0.934846818447113, + "epoch": 0.3821985764612841, + "kl_loss": 0.19089868664741516, + "loss_ib": 0.009162838570773602, + "step": 1329 + }, + { + "ce_ib": 3.882725954055786, + "ce_orig": 0.8223580121994019, + "epoch": 0.3821985764612841, + "kl_loss": 0.1783471554517746, + "loss_ib": 0.005666197277605534, + "step": 1329 + }, + { + "epoch": 0.3824861600402617, + "grad_norm": 0.11724074929952621, + "learning_rate": 9.75624095887909e-06, + "loss": 0.8862, + "step": 1330 + }, + { + "ce_ib": 7.525057315826416, + "ce_orig": 1.2038042545318604, + "epoch": 0.3824861600402617, + "kl_loss": 0.1984117329120636, + "loss_ib": 0.009509174153208733, + "step": 1330 + }, + { + "ce_ib": 4.756983280181885, + "ce_orig": 0.5972123742103577, + "epoch": 0.3824861600402617, + "kl_loss": 0.4368649423122406, + "loss_ib": 0.009125632233917713, + "step": 1330 + }, + { + "ce_ib": 3.6005241870880127, + "ce_orig": 0.4440179467201233, + "epoch": 0.3824861600402617, + "kl_loss": 0.3235924541950226, + "loss_ib": 0.006836448796093464, + "step": 1330 + }, + { + "ce_ib": 8.435921669006348, + "ce_orig": 0.8452567458152771, + "epoch": 0.3824861600402617, + "kl_loss": 0.2886652648448944, + "loss_ib": 0.011322574689984322, + "step": 1330 + }, + { + "ce_ib": 7.909755229949951, + "ce_orig": 0.9250390529632568, + "epoch": 0.3827737436192393, + "kl_loss": 0.19716186821460724, + "loss_ib": 0.00988137349486351, + "step": 1331 + }, + { + "ce_ib": 6.099967956542969, + "ce_orig": 0.807924211025238, + "epoch": 0.3827737436192393, + "kl_loss": 0.208805650472641, + "loss_ib": 0.008188024163246155, + "step": 1331 + }, + { + "ce_ib": 3.4478707313537598, + "ce_orig": 0.5189741253852844, + "epoch": 0.3827737436192393, + "kl_loss": 0.1719244122505188, + "loss_ib": 0.005167114548385143, + "step": 1331 + }, + { + "ce_ib": 4.225029468536377, + "ce_orig": 0.5319197773933411, + "epoch": 0.3827737436192393, + "kl_loss": 0.2567492127418518, + "loss_ib": 0.006792521104216576, + "step": 1331 + }, + { + "ce_ib": 3.7936832904815674, + "ce_orig": 0.6889621615409851, + "epoch": 0.383061327198217, + "kl_loss": 0.24284930527210236, + "loss_ib": 0.006222176365554333, + "step": 1332 + }, + { + "ce_ib": 6.814220905303955, + "ce_orig": 0.8868374824523926, + "epoch": 0.383061327198217, + "kl_loss": 0.20072780549526215, + "loss_ib": 0.008821499533951283, + "step": 1332 + }, + { + "ce_ib": 7.203690052032471, + "ce_orig": 0.9520978331565857, + "epoch": 0.383061327198217, + "kl_loss": 0.28056567907333374, + "loss_ib": 0.010009346529841423, + "step": 1332 + }, + { + "ce_ib": 8.070340156555176, + "ce_orig": 1.3334993124008179, + "epoch": 0.383061327198217, + "kl_loss": 0.1696416139602661, + "loss_ib": 0.009766755625605583, + "step": 1332 + }, + { + "ce_ib": 5.705083847045898, + "ce_orig": 0.6407719850540161, + "epoch": 0.38334891077719463, + "kl_loss": 0.29713699221611023, + "loss_ib": 0.008676453493535519, + "step": 1333 + }, + { + "ce_ib": 4.981590747833252, + "ce_orig": 0.5492885708808899, + "epoch": 0.38334891077719463, + "kl_loss": 0.28598642349243164, + "loss_ib": 0.007841454818844795, + "step": 1333 + }, + { + "ce_ib": 4.273902416229248, + "ce_orig": 0.807598888874054, + "epoch": 0.38334891077719463, + "kl_loss": 0.20297232270240784, + "loss_ib": 0.006303625646978617, + "step": 1333 + }, + { + "ce_ib": 9.406611442565918, + "ce_orig": 1.0958489179611206, + "epoch": 0.38334891077719463, + "kl_loss": 0.2598443031311035, + "loss_ib": 0.012005054391920567, + "step": 1333 + }, + { + "ce_ib": 7.128815650939941, + "ce_orig": 1.1524990797042847, + "epoch": 0.38363649435617225, + "kl_loss": 0.39675629138946533, + "loss_ib": 0.011096377857029438, + "step": 1334 + }, + { + "ce_ib": 10.28831958770752, + "ce_orig": 1.841440200805664, + "epoch": 0.38363649435617225, + "kl_loss": 0.318050354719162, + "loss_ib": 0.01346882339566946, + "step": 1334 + }, + { + "ce_ib": 3.544858932495117, + "ce_orig": 0.6753540635108948, + "epoch": 0.38363649435617225, + "kl_loss": 0.14195430278778076, + "loss_ib": 0.004964401945471764, + "step": 1334 + }, + { + "ce_ib": 7.335440158843994, + "ce_orig": 1.0214455127716064, + "epoch": 0.38363649435617225, + "kl_loss": 0.30033838748931885, + "loss_ib": 0.010338823311030865, + "step": 1334 + }, + { + "epoch": 0.3839240779351499, + "grad_norm": 0.10687928646802902, + "learning_rate": 9.753841578512007e-06, + "loss": 0.8394, + "step": 1335 + }, + { + "ce_ib": 3.9797632694244385, + "ce_orig": 0.8001569509506226, + "epoch": 0.3839240779351499, + "kl_loss": 0.40325748920440674, + "loss_ib": 0.008012338541448116, + "step": 1335 + }, + { + "ce_ib": 3.7276611328125, + "ce_orig": 0.7037749886512756, + "epoch": 0.3839240779351499, + "kl_loss": 0.17334817349910736, + "loss_ib": 0.005461142398416996, + "step": 1335 + }, + { + "ce_ib": 7.768494129180908, + "ce_orig": 1.434823989868164, + "epoch": 0.3839240779351499, + "kl_loss": 0.2680274546146393, + "loss_ib": 0.010448768734931946, + "step": 1335 + }, + { + "ce_ib": 8.82320785522461, + "ce_orig": 1.1123329401016235, + "epoch": 0.3839240779351499, + "kl_loss": 0.2079583704471588, + "loss_ib": 0.01090279221534729, + "step": 1335 + }, + { + "ce_ib": 8.984968185424805, + "ce_orig": 0.9062168598175049, + "epoch": 0.38421166151412756, + "kl_loss": 0.21713285148143768, + "loss_ib": 0.011156296357512474, + "step": 1336 + }, + { + "ce_ib": 4.361588478088379, + "ce_orig": 0.7425259351730347, + "epoch": 0.38421166151412756, + "kl_loss": 0.21430090069770813, + "loss_ib": 0.00650459760800004, + "step": 1336 + }, + { + "ce_ib": 8.561041831970215, + "ce_orig": 0.8657642006874084, + "epoch": 0.38421166151412756, + "kl_loss": 0.2691475749015808, + "loss_ib": 0.011252517811954021, + "step": 1336 + }, + { + "ce_ib": 8.476003646850586, + "ce_orig": 1.0116504430770874, + "epoch": 0.38421166151412756, + "kl_loss": 0.23885221779346466, + "loss_ib": 0.01086452603340149, + "step": 1336 + }, + { + "ce_ib": 6.69258451461792, + "ce_orig": 1.416684865951538, + "epoch": 0.3844992450931052, + "kl_loss": 0.2654043436050415, + "loss_ib": 0.00934662763029337, + "step": 1337 + }, + { + "ce_ib": 5.728124618530273, + "ce_orig": 0.5610059499740601, + "epoch": 0.3844992450931052, + "kl_loss": 0.35689449310302734, + "loss_ib": 0.00929707009345293, + "step": 1337 + }, + { + "ce_ib": 4.5632500648498535, + "ce_orig": 1.0310240983963013, + "epoch": 0.3844992450931052, + "kl_loss": 0.2000049650669098, + "loss_ib": 0.006563299801200628, + "step": 1337 + }, + { + "ce_ib": 6.055202007293701, + "ce_orig": 1.0701017379760742, + "epoch": 0.3844992450931052, + "kl_loss": 0.2184426188468933, + "loss_ib": 0.008239627815783024, + "step": 1337 + }, + { + "ce_ib": 5.917305946350098, + "ce_orig": 0.9330586791038513, + "epoch": 0.3847868286720828, + "kl_loss": 0.24308359622955322, + "loss_ib": 0.008348141796886921, + "step": 1338 + }, + { + "ce_ib": 6.002380847930908, + "ce_orig": 0.4452073574066162, + "epoch": 0.3847868286720828, + "kl_loss": 0.30033767223358154, + "loss_ib": 0.009005757048726082, + "step": 1338 + }, + { + "ce_ib": 7.772523403167725, + "ce_orig": 0.9946969151496887, + "epoch": 0.3847868286720828, + "kl_loss": 0.4542730450630188, + "loss_ib": 0.012315253727138042, + "step": 1338 + }, + { + "ce_ib": 8.752180099487305, + "ce_orig": 1.3333590030670166, + "epoch": 0.3847868286720828, + "kl_loss": 0.3577191233634949, + "loss_ib": 0.012329370714724064, + "step": 1338 + }, + { + "ce_ib": 8.994691848754883, + "ce_orig": 0.5966424942016602, + "epoch": 0.3850744122510605, + "kl_loss": 0.44705814123153687, + "loss_ib": 0.013465273194015026, + "step": 1339 + }, + { + "ce_ib": 9.827507019042969, + "ce_orig": 1.0684852600097656, + "epoch": 0.3850744122510605, + "kl_loss": 0.18542367219924927, + "loss_ib": 0.011681743897497654, + "step": 1339 + }, + { + "ce_ib": 5.672684192657471, + "ce_orig": 0.5647222995758057, + "epoch": 0.3850744122510605, + "kl_loss": 0.2828938961029053, + "loss_ib": 0.008501622825860977, + "step": 1339 + }, + { + "ce_ib": 7.591995716094971, + "ce_orig": 0.8451278209686279, + "epoch": 0.3850744122510605, + "kl_loss": 0.24813343584537506, + "loss_ib": 0.010073330253362656, + "step": 1339 + }, + { + "epoch": 0.3853619958300381, + "grad_norm": 0.10725533962249756, + "learning_rate": 9.75143074503717e-06, + "loss": 0.8553, + "step": 1340 + }, + { + "ce_ib": 8.64397144317627, + "ce_orig": 0.8796740174293518, + "epoch": 0.3853619958300381, + "kl_loss": 0.32401660084724426, + "loss_ib": 0.01188413705676794, + "step": 1340 + }, + { + "ce_ib": 8.900287628173828, + "ce_orig": 1.103305697441101, + "epoch": 0.3853619958300381, + "kl_loss": 0.23756858706474304, + "loss_ib": 0.011275973170995712, + "step": 1340 + }, + { + "ce_ib": 6.611321926116943, + "ce_orig": 0.9286351799964905, + "epoch": 0.3853619958300381, + "kl_loss": 0.2873968482017517, + "loss_ib": 0.009485290385782719, + "step": 1340 + }, + { + "ce_ib": 5.8496599197387695, + "ce_orig": 0.9573297500610352, + "epoch": 0.3853619958300381, + "kl_loss": 0.24602648615837097, + "loss_ib": 0.008309924043715, + "step": 1340 + }, + { + "ce_ib": 12.561427116394043, + "ce_orig": 2.141439199447632, + "epoch": 0.38564957940901573, + "kl_loss": 0.2753842771053314, + "loss_ib": 0.01531527005136013, + "step": 1341 + }, + { + "ce_ib": 8.508872032165527, + "ce_orig": 1.3350071907043457, + "epoch": 0.38564957940901573, + "kl_loss": 0.30021315813064575, + "loss_ib": 0.011511003598570824, + "step": 1341 + }, + { + "ce_ib": 8.320743560791016, + "ce_orig": 1.0415490865707397, + "epoch": 0.38564957940901573, + "kl_loss": 0.29242002964019775, + "loss_ib": 0.0112449424341321, + "step": 1341 + }, + { + "ce_ib": 3.407970905303955, + "ce_orig": 0.5049671530723572, + "epoch": 0.38564957940901573, + "kl_loss": 0.3130846917629242, + "loss_ib": 0.006538817659020424, + "step": 1341 + }, + { + "ce_ib": 5.786276340484619, + "ce_orig": 0.7689294815063477, + "epoch": 0.3859371629879934, + "kl_loss": 0.2797991633415222, + "loss_ib": 0.00858426745980978, + "step": 1342 + }, + { + "ce_ib": 3.7406654357910156, + "ce_orig": 0.6262723803520203, + "epoch": 0.3859371629879934, + "kl_loss": 0.26983505487442017, + "loss_ib": 0.006439015734940767, + "step": 1342 + }, + { + "ce_ib": 7.9180755615234375, + "ce_orig": 0.9678489565849304, + "epoch": 0.3859371629879934, + "kl_loss": 0.3031494617462158, + "loss_ib": 0.010949570685625076, + "step": 1342 + }, + { + "ce_ib": 6.733755111694336, + "ce_orig": 0.9541864395141602, + "epoch": 0.3859371629879934, + "kl_loss": 0.30572301149368286, + "loss_ib": 0.00979098491370678, + "step": 1342 + }, + { + "ce_ib": 6.16925573348999, + "ce_orig": 0.9874963760375977, + "epoch": 0.38622474656697103, + "kl_loss": 0.2973823547363281, + "loss_ib": 0.00914307963103056, + "step": 1343 + }, + { + "ce_ib": 6.805942535400391, + "ce_orig": 0.5375406742095947, + "epoch": 0.38622474656697103, + "kl_loss": 0.2381344437599182, + "loss_ib": 0.009187286719679832, + "step": 1343 + }, + { + "ce_ib": 4.226902484893799, + "ce_orig": 0.5966165065765381, + "epoch": 0.38622474656697103, + "kl_loss": 0.16867578029632568, + "loss_ib": 0.005913660395890474, + "step": 1343 + }, + { + "ce_ib": 7.073963165283203, + "ce_orig": 1.2386435270309448, + "epoch": 0.38622474656697103, + "kl_loss": 0.3061671853065491, + "loss_ib": 0.010135634802281857, + "step": 1343 + }, + { + "ce_ib": 5.890805721282959, + "ce_orig": 0.6538254618644714, + "epoch": 0.38651233014594866, + "kl_loss": 0.5040521621704102, + "loss_ib": 0.010931327007710934, + "step": 1344 + }, + { + "ce_ib": 3.833566665649414, + "ce_orig": 0.6508686542510986, + "epoch": 0.38651233014594866, + "kl_loss": 0.20960384607315063, + "loss_ib": 0.005929604638367891, + "step": 1344 + }, + { + "ce_ib": 4.755572319030762, + "ce_orig": 0.8241642713546753, + "epoch": 0.38651233014594866, + "kl_loss": 0.25931376218795776, + "loss_ib": 0.0073487102054059505, + "step": 1344 + }, + { + "ce_ib": 6.719439506530762, + "ce_orig": 0.9579751491546631, + "epoch": 0.38651233014594866, + "kl_loss": 0.2530074119567871, + "loss_ib": 0.00924951396882534, + "step": 1344 + }, + { + "epoch": 0.3867999137249263, + "grad_norm": 0.10795299708843231, + "learning_rate": 9.749008464262837e-06, + "loss": 0.8552, + "step": 1345 + }, + { + "ce_ib": 5.197643756866455, + "ce_orig": 1.011999487876892, + "epoch": 0.3867999137249263, + "kl_loss": 0.24813109636306763, + "loss_ib": 0.007678954396396875, + "step": 1345 + }, + { + "ce_ib": 5.459554195404053, + "ce_orig": 0.7550332546234131, + "epoch": 0.3867999137249263, + "kl_loss": 0.23652155697345734, + "loss_ib": 0.007824769243597984, + "step": 1345 + }, + { + "ce_ib": 7.382880210876465, + "ce_orig": 1.0171791315078735, + "epoch": 0.3867999137249263, + "kl_loss": 0.2015581727027893, + "loss_ib": 0.009398462250828743, + "step": 1345 + }, + { + "ce_ib": 4.113571643829346, + "ce_orig": 0.6482676267623901, + "epoch": 0.3867999137249263, + "kl_loss": 0.3286363482475281, + "loss_ib": 0.007399934809654951, + "step": 1345 + }, + { + "ce_ib": 4.742993354797363, + "ce_orig": 0.8031437397003174, + "epoch": 0.38708749730390396, + "kl_loss": 0.17718759179115295, + "loss_ib": 0.006514869164675474, + "step": 1346 + }, + { + "ce_ib": 5.845980644226074, + "ce_orig": 0.6565824151039124, + "epoch": 0.38708749730390396, + "kl_loss": 0.20798616111278534, + "loss_ib": 0.007925841957330704, + "step": 1346 + }, + { + "ce_ib": 6.087835788726807, + "ce_orig": 0.7384578585624695, + "epoch": 0.38708749730390396, + "kl_loss": 0.2806638777256012, + "loss_ib": 0.008894474245607853, + "step": 1346 + }, + { + "ce_ib": 4.014012813568115, + "ce_orig": 0.737417995929718, + "epoch": 0.38708749730390396, + "kl_loss": 0.2056456208229065, + "loss_ib": 0.006070469040423632, + "step": 1346 + }, + { + "ce_ib": 4.660091400146484, + "ce_orig": 0.506597638130188, + "epoch": 0.3873750808828816, + "kl_loss": 0.2430647909641266, + "loss_ib": 0.007090738974511623, + "step": 1347 + }, + { + "ce_ib": 3.7780253887176514, + "ce_orig": 0.6846595406532288, + "epoch": 0.3873750808828816, + "kl_loss": 0.1995660960674286, + "loss_ib": 0.005773685872554779, + "step": 1347 + }, + { + "ce_ib": 4.743012428283691, + "ce_orig": 0.7443711161613464, + "epoch": 0.3873750808828816, + "kl_loss": 0.22071635723114014, + "loss_ib": 0.006950175389647484, + "step": 1347 + }, + { + "ce_ib": 7.79235315322876, + "ce_orig": 0.5954486131668091, + "epoch": 0.3873750808828816, + "kl_loss": 0.22928141057491302, + "loss_ib": 0.010085166431963444, + "step": 1347 + }, + { + "ce_ib": 6.381791114807129, + "ce_orig": 0.977142333984375, + "epoch": 0.3876626644618592, + "kl_loss": 0.5138674974441528, + "loss_ib": 0.011520466767251492, + "step": 1348 + }, + { + "ce_ib": 7.824554920196533, + "ce_orig": 0.7537108659744263, + "epoch": 0.3876626644618592, + "kl_loss": 0.3558143377304077, + "loss_ib": 0.011382699012756348, + "step": 1348 + }, + { + "ce_ib": 5.348059177398682, + "ce_orig": 0.6633920073509216, + "epoch": 0.3876626644618592, + "kl_loss": 0.19098712503910065, + "loss_ib": 0.007257930468767881, + "step": 1348 + }, + { + "ce_ib": 6.024688243865967, + "ce_orig": 0.9390560388565063, + "epoch": 0.3876626644618592, + "kl_loss": 0.23263616859912872, + "loss_ib": 0.00835104938596487, + "step": 1348 + }, + { + "ce_ib": 8.379621505737305, + "ce_orig": 1.0396898984909058, + "epoch": 0.3879502480408369, + "kl_loss": 0.28618064522743225, + "loss_ib": 0.0112414276227355, + "step": 1349 + }, + { + "ce_ib": 7.121238708496094, + "ce_orig": 0.9079939723014832, + "epoch": 0.3879502480408369, + "kl_loss": 0.25672510266304016, + "loss_ib": 0.009688489139080048, + "step": 1349 + }, + { + "ce_ib": 4.600920677185059, + "ce_orig": 0.5308390855789185, + "epoch": 0.3879502480408369, + "kl_loss": 0.24835561215877533, + "loss_ib": 0.007084476761519909, + "step": 1349 + }, + { + "ce_ib": 6.664026260375977, + "ce_orig": 0.8902830481529236, + "epoch": 0.3879502480408369, + "kl_loss": 0.15312698483467102, + "loss_ib": 0.008195295929908752, + "step": 1349 + }, + { + "epoch": 0.3882378316198145, + "grad_norm": 0.11668138206005096, + "learning_rate": 9.746574742024846e-06, + "loss": 0.8753, + "step": 1350 + }, + { + "ce_ib": 5.689170837402344, + "ce_orig": 0.9190986156463623, + "epoch": 0.3882378316198145, + "kl_loss": 0.2182837426662445, + "loss_ib": 0.00787200778722763, + "step": 1350 + }, + { + "ce_ib": 9.147358894348145, + "ce_orig": 1.1027569770812988, + "epoch": 0.3882378316198145, + "kl_loss": 0.3216399550437927, + "loss_ib": 0.012363757938146591, + "step": 1350 + }, + { + "ce_ib": 8.353636741638184, + "ce_orig": 0.9055649638175964, + "epoch": 0.3882378316198145, + "kl_loss": 0.2102494239807129, + "loss_ib": 0.010456129908561707, + "step": 1350 + }, + { + "ce_ib": 11.257818222045898, + "ce_orig": 1.819875717163086, + "epoch": 0.3882378316198145, + "kl_loss": 0.3288717567920685, + "loss_ib": 0.014546535909175873, + "step": 1350 + }, + { + "ce_ib": 9.890776634216309, + "ce_orig": 1.695237636566162, + "epoch": 0.38852541519879213, + "kl_loss": 0.2624281644821167, + "loss_ib": 0.012515057809650898, + "step": 1351 + }, + { + "ce_ib": 8.211555480957031, + "ce_orig": 0.7318482995033264, + "epoch": 0.38852541519879213, + "kl_loss": 0.3202785551548004, + "loss_ib": 0.011414340697228909, + "step": 1351 + }, + { + "ce_ib": 4.634771347045898, + "ce_orig": 0.513573944568634, + "epoch": 0.38852541519879213, + "kl_loss": 0.17353704571723938, + "loss_ib": 0.0063701411709189415, + "step": 1351 + }, + { + "ce_ib": 6.764926433563232, + "ce_orig": 1.0543662309646606, + "epoch": 0.38852541519879213, + "kl_loss": 0.2905517816543579, + "loss_ib": 0.009670443832874298, + "step": 1351 + }, + { + "ce_ib": 7.045490264892578, + "ce_orig": 0.8002457022666931, + "epoch": 0.3888129987777698, + "kl_loss": 0.29713061451911926, + "loss_ib": 0.010016796179115772, + "step": 1352 + }, + { + "ce_ib": 8.436086654663086, + "ce_orig": 0.9077552556991577, + "epoch": 0.3888129987777698, + "kl_loss": 0.3642025589942932, + "loss_ib": 0.012078111991286278, + "step": 1352 + }, + { + "ce_ib": 11.715314865112305, + "ce_orig": 2.0356435775756836, + "epoch": 0.3888129987777698, + "kl_loss": 0.28554120659828186, + "loss_ib": 0.01457072701305151, + "step": 1352 + }, + { + "ce_ib": 4.3168230056762695, + "ce_orig": 0.7764055132865906, + "epoch": 0.3888129987777698, + "kl_loss": 0.5350329875946045, + "loss_ib": 0.009667153470218182, + "step": 1352 + }, + { + "ce_ib": 8.007417678833008, + "ce_orig": 1.0445178747177124, + "epoch": 0.38910058235674744, + "kl_loss": 0.35413455963134766, + "loss_ib": 0.011548763141036034, + "step": 1353 + }, + { + "ce_ib": 6.87570333480835, + "ce_orig": 0.5900187492370605, + "epoch": 0.38910058235674744, + "kl_loss": 0.29607564210891724, + "loss_ib": 0.009836459532380104, + "step": 1353 + }, + { + "ce_ib": 8.155138969421387, + "ce_orig": 1.0756138563156128, + "epoch": 0.38910058235674744, + "kl_loss": 0.3444306254386902, + "loss_ib": 0.011599444784224033, + "step": 1353 + }, + { + "ce_ib": 7.465303421020508, + "ce_orig": 0.5089307427406311, + "epoch": 0.38910058235674744, + "kl_loss": 0.2902388274669647, + "loss_ib": 0.01036769151687622, + "step": 1353 + }, + { + "ce_ib": 4.054125785827637, + "ce_orig": 0.5274952054023743, + "epoch": 0.38938816593572506, + "kl_loss": 0.28459879755973816, + "loss_ib": 0.006900113541632891, + "step": 1354 + }, + { + "ce_ib": 5.810909748077393, + "ce_orig": 0.7706413865089417, + "epoch": 0.38938816593572506, + "kl_loss": 0.30980196595191956, + "loss_ib": 0.00890892930328846, + "step": 1354 + }, + { + "ce_ib": 6.011123180389404, + "ce_orig": 0.8576530814170837, + "epoch": 0.38938816593572506, + "kl_loss": 0.35513028502464294, + "loss_ib": 0.009562425315380096, + "step": 1354 + }, + { + "ce_ib": 4.335852146148682, + "ce_orig": 0.8391232490539551, + "epoch": 0.38938816593572506, + "kl_loss": 0.2350061982870102, + "loss_ib": 0.006685914471745491, + "step": 1354 + }, + { + "epoch": 0.3896757495147027, + "grad_norm": 0.10238523036241531, + "learning_rate": 9.744129584186599e-06, + "loss": 0.8873, + "step": 1355 + }, + { + "ce_ib": 6.874613285064697, + "ce_orig": 1.2522763013839722, + "epoch": 0.3896757495147027, + "kl_loss": 0.26102685928344727, + "loss_ib": 0.009484881535172462, + "step": 1355 + }, + { + "ce_ib": 3.02353835105896, + "ce_orig": 0.5653353929519653, + "epoch": 0.3896757495147027, + "kl_loss": 0.17425790429115295, + "loss_ib": 0.004766117315739393, + "step": 1355 + }, + { + "ce_ib": 8.993725776672363, + "ce_orig": 1.1802468299865723, + "epoch": 0.3896757495147027, + "kl_loss": 0.3000760078430176, + "loss_ib": 0.01199448574334383, + "step": 1355 + }, + { + "ce_ib": 9.069596290588379, + "ce_orig": 1.3481104373931885, + "epoch": 0.3896757495147027, + "kl_loss": 0.3342220187187195, + "loss_ib": 0.012411816045641899, + "step": 1355 + }, + { + "ce_ib": 5.213831901550293, + "ce_orig": 1.1847585439682007, + "epoch": 0.38996333309368036, + "kl_loss": 0.27714553475379944, + "loss_ib": 0.007985287345945835, + "step": 1356 + }, + { + "ce_ib": 6.282614707946777, + "ce_orig": 0.9747211337089539, + "epoch": 0.38996333309368036, + "kl_loss": 0.2517557144165039, + "loss_ib": 0.008800172246992588, + "step": 1356 + }, + { + "ce_ib": 5.210909843444824, + "ce_orig": 0.6339356899261475, + "epoch": 0.38996333309368036, + "kl_loss": 0.22786879539489746, + "loss_ib": 0.007489597424864769, + "step": 1356 + }, + { + "ce_ib": 6.147688865661621, + "ce_orig": 1.0988351106643677, + "epoch": 0.38996333309368036, + "kl_loss": 0.24896582961082458, + "loss_ib": 0.008637347258627415, + "step": 1356 + }, + { + "ce_ib": 7.1301350593566895, + "ce_orig": 1.1670876741409302, + "epoch": 0.390250916672658, + "kl_loss": 0.20031045377254486, + "loss_ib": 0.009133240208029747, + "step": 1357 + }, + { + "ce_ib": 5.422787189483643, + "ce_orig": 0.8434985280036926, + "epoch": 0.390250916672658, + "kl_loss": 0.28638529777526855, + "loss_ib": 0.008286640048027039, + "step": 1357 + }, + { + "ce_ib": 9.074339866638184, + "ce_orig": 1.444636344909668, + "epoch": 0.390250916672658, + "kl_loss": 0.3131384253501892, + "loss_ib": 0.012205724604427814, + "step": 1357 + }, + { + "ce_ib": 4.545176029205322, + "ce_orig": 0.431958943605423, + "epoch": 0.390250916672658, + "kl_loss": 0.26394376158714294, + "loss_ib": 0.007184613961726427, + "step": 1357 + }, + { + "ce_ib": 6.025633335113525, + "ce_orig": 0.7248860001564026, + "epoch": 0.3905385002516356, + "kl_loss": 0.30378299951553345, + "loss_ib": 0.009063462726771832, + "step": 1358 + }, + { + "ce_ib": 4.739029407501221, + "ce_orig": 0.6262908577919006, + "epoch": 0.3905385002516356, + "kl_loss": 0.2560563087463379, + "loss_ib": 0.00729959225282073, + "step": 1358 + }, + { + "ce_ib": 7.491399765014648, + "ce_orig": 1.0817595720291138, + "epoch": 0.3905385002516356, + "kl_loss": 0.5565602779388428, + "loss_ib": 0.013057002797722816, + "step": 1358 + }, + { + "ce_ib": 6.858555793762207, + "ce_orig": 1.060663104057312, + "epoch": 0.3905385002516356, + "kl_loss": 0.26650601625442505, + "loss_ib": 0.00952361524105072, + "step": 1358 + }, + { + "ce_ib": 8.05048656463623, + "ce_orig": 1.2159759998321533, + "epoch": 0.3908260838306133, + "kl_loss": 0.4176337420940399, + "loss_ib": 0.012226823717355728, + "step": 1359 + }, + { + "ce_ib": 8.304803848266602, + "ce_orig": 0.8402754068374634, + "epoch": 0.3908260838306133, + "kl_loss": 0.3264410197734833, + "loss_ib": 0.011569214053452015, + "step": 1359 + }, + { + "ce_ib": 8.870750427246094, + "ce_orig": 0.8822159171104431, + "epoch": 0.3908260838306133, + "kl_loss": 0.26001280546188354, + "loss_ib": 0.011470877565443516, + "step": 1359 + }, + { + "ce_ib": 9.573052406311035, + "ce_orig": 1.2161577939987183, + "epoch": 0.3908260838306133, + "kl_loss": 0.2086510956287384, + "loss_ib": 0.011659563519060612, + "step": 1359 + }, + { + "epoch": 0.3911136674095909, + "grad_norm": 0.11230572313070297, + "learning_rate": 9.741672996639046e-06, + "loss": 0.8671, + "step": 1360 + }, + { + "ce_ib": 5.597973823547363, + "ce_orig": 0.8666937351226807, + "epoch": 0.3911136674095909, + "kl_loss": 0.15866965055465698, + "loss_ib": 0.007184670306742191, + "step": 1360 + }, + { + "ce_ib": 1.7455174922943115, + "ce_orig": 0.21882081031799316, + "epoch": 0.3911136674095909, + "kl_loss": 0.5883793234825134, + "loss_ib": 0.007629310712218285, + "step": 1360 + }, + { + "ce_ib": 5.6036505699157715, + "ce_orig": 1.038603663444519, + "epoch": 0.3911136674095909, + "kl_loss": 0.21248281002044678, + "loss_ib": 0.007728478871285915, + "step": 1360 + }, + { + "ce_ib": 7.444608688354492, + "ce_orig": 1.4021137952804565, + "epoch": 0.3911136674095909, + "kl_loss": 0.2528078556060791, + "loss_ib": 0.009972686879336834, + "step": 1360 + }, + { + "ce_ib": 5.095190525054932, + "ce_orig": 0.4394410252571106, + "epoch": 0.39140125098856854, + "kl_loss": 0.2796628475189209, + "loss_ib": 0.007891818881034851, + "step": 1361 + }, + { + "ce_ib": 3.942939281463623, + "ce_orig": 0.5083321928977966, + "epoch": 0.39140125098856854, + "kl_loss": 0.1425539255142212, + "loss_ib": 0.005368478130549192, + "step": 1361 + }, + { + "ce_ib": 3.894341468811035, + "ce_orig": 0.5946666598320007, + "epoch": 0.39140125098856854, + "kl_loss": 0.18116998672485352, + "loss_ib": 0.005706041119992733, + "step": 1361 + }, + { + "ce_ib": 3.2327067852020264, + "ce_orig": 0.5612151026725769, + "epoch": 0.39140125098856854, + "kl_loss": 0.1936264932155609, + "loss_ib": 0.005168972071260214, + "step": 1361 + }, + { + "ce_ib": 8.949872970581055, + "ce_orig": 1.2524312734603882, + "epoch": 0.3916888345675462, + "kl_loss": 0.25393006205558777, + "loss_ib": 0.011489172466099262, + "step": 1362 + }, + { + "ce_ib": 5.752600193023682, + "ce_orig": 0.43837770819664, + "epoch": 0.3916888345675462, + "kl_loss": 0.23531876504421234, + "loss_ib": 0.008105788379907608, + "step": 1362 + }, + { + "ce_ib": 5.596198558807373, + "ce_orig": 0.5549354553222656, + "epoch": 0.3916888345675462, + "kl_loss": 0.40296921133995056, + "loss_ib": 0.009625890292227268, + "step": 1362 + }, + { + "ce_ib": 6.891479969024658, + "ce_orig": 1.2710909843444824, + "epoch": 0.3916888345675462, + "kl_loss": 0.2136833369731903, + "loss_ib": 0.009028312750160694, + "step": 1362 + }, + { + "ce_ib": 6.111293792724609, + "ce_orig": 1.0104577541351318, + "epoch": 0.39197641814652384, + "kl_loss": 0.21573954820632935, + "loss_ib": 0.0082686897367239, + "step": 1363 + }, + { + "ce_ib": 4.847911357879639, + "ce_orig": 0.6719464063644409, + "epoch": 0.39197641814652384, + "kl_loss": 0.18924060463905334, + "loss_ib": 0.006740317214280367, + "step": 1363 + }, + { + "ce_ib": 7.202842712402344, + "ce_orig": 0.8166525363922119, + "epoch": 0.39197641814652384, + "kl_loss": 0.2810524106025696, + "loss_ib": 0.010013367049396038, + "step": 1363 + }, + { + "ce_ib": 13.842483520507812, + "ce_orig": 2.174565076828003, + "epoch": 0.39197641814652384, + "kl_loss": 0.2552925944328308, + "loss_ib": 0.016395408660173416, + "step": 1363 + }, + { + "ce_ib": 9.695000648498535, + "ce_orig": 1.372729778289795, + "epoch": 0.39226400172550147, + "kl_loss": 0.2769021987915039, + "loss_ib": 0.012464022263884544, + "step": 1364 + }, + { + "ce_ib": 7.319131374359131, + "ce_orig": 0.9690439701080322, + "epoch": 0.39226400172550147, + "kl_loss": 0.30465811491012573, + "loss_ib": 0.010365712456405163, + "step": 1364 + }, + { + "ce_ib": 8.980354309082031, + "ce_orig": 1.4327776432037354, + "epoch": 0.39226400172550147, + "kl_loss": 0.21982143819332123, + "loss_ib": 0.011178568005561829, + "step": 1364 + }, + { + "ce_ib": 5.232222080230713, + "ce_orig": 0.9438716173171997, + "epoch": 0.39226400172550147, + "kl_loss": 0.21315476298332214, + "loss_ib": 0.007363769691437483, + "step": 1364 + }, + { + "epoch": 0.3925515853044791, + "grad_norm": 0.11538668721914291, + "learning_rate": 9.73920498530068e-06, + "loss": 0.8671, + "step": 1365 + }, + { + "ce_ib": 4.021336555480957, + "ce_orig": 0.4584941864013672, + "epoch": 0.3925515853044791, + "kl_loss": 0.3306760787963867, + "loss_ib": 0.0073280977085232735, + "step": 1365 + }, + { + "ce_ib": 3.9117350578308105, + "ce_orig": 0.3970031440258026, + "epoch": 0.3925515853044791, + "kl_loss": 0.1676924228668213, + "loss_ib": 0.005588659085333347, + "step": 1365 + }, + { + "ce_ib": 4.400966167449951, + "ce_orig": 0.5858869552612305, + "epoch": 0.3925515853044791, + "kl_loss": 0.5540776252746582, + "loss_ib": 0.009941741824150085, + "step": 1365 + }, + { + "ce_ib": 3.1652352809906006, + "ce_orig": 0.5204657912254333, + "epoch": 0.3925515853044791, + "kl_loss": 0.14290514588356018, + "loss_ib": 0.004594286438077688, + "step": 1365 + }, + { + "ce_ib": 8.941207885742188, + "ce_orig": 1.6999166011810303, + "epoch": 0.39283916888345677, + "kl_loss": 0.20357975363731384, + "loss_ib": 0.010977005586028099, + "step": 1366 + }, + { + "ce_ib": 9.166007995605469, + "ce_orig": 1.3682373762130737, + "epoch": 0.39283916888345677, + "kl_loss": 0.3395346999168396, + "loss_ib": 0.012561354786157608, + "step": 1366 + }, + { + "ce_ib": 4.455244541168213, + "ce_orig": 0.6007863283157349, + "epoch": 0.39283916888345677, + "kl_loss": 0.25369763374328613, + "loss_ib": 0.006992220878601074, + "step": 1366 + }, + { + "ce_ib": 7.158716201782227, + "ce_orig": 1.056156039237976, + "epoch": 0.39283916888345677, + "kl_loss": 0.18826474249362946, + "loss_ib": 0.00904136337339878, + "step": 1366 + }, + { + "ce_ib": 4.3503499031066895, + "ce_orig": 0.6633068323135376, + "epoch": 0.3931267524624344, + "kl_loss": 0.2977406978607178, + "loss_ib": 0.007327756844460964, + "step": 1367 + }, + { + "ce_ib": 2.9677188396453857, + "ce_orig": 0.5597977638244629, + "epoch": 0.3931267524624344, + "kl_loss": 0.18398423492908478, + "loss_ib": 0.004807560704648495, + "step": 1367 + }, + { + "ce_ib": 3.997077226638794, + "ce_orig": 0.5306004881858826, + "epoch": 0.3931267524624344, + "kl_loss": 0.3879128694534302, + "loss_ib": 0.007876206189393997, + "step": 1367 + }, + { + "ce_ib": 5.02707052230835, + "ce_orig": 0.6347300410270691, + "epoch": 0.3931267524624344, + "kl_loss": 0.3675246834754944, + "loss_ib": 0.008702317252755165, + "step": 1367 + }, + { + "ce_ib": 6.142372131347656, + "ce_orig": 0.7465280294418335, + "epoch": 0.393414336041412, + "kl_loss": 0.17125555872917175, + "loss_ib": 0.007854927331209183, + "step": 1368 + }, + { + "ce_ib": 6.2471022605896, + "ce_orig": 0.6920092701911926, + "epoch": 0.393414336041412, + "kl_loss": 0.26138269901275635, + "loss_ib": 0.008860929869115353, + "step": 1368 + }, + { + "ce_ib": 5.284491062164307, + "ce_orig": 0.9602318406105042, + "epoch": 0.393414336041412, + "kl_loss": 0.19777587056159973, + "loss_ib": 0.007262249942868948, + "step": 1368 + }, + { + "ce_ib": 3.394522190093994, + "ce_orig": 0.4060690104961395, + "epoch": 0.393414336041412, + "kl_loss": 0.3864419460296631, + "loss_ib": 0.007258941885083914, + "step": 1368 + }, + { + "ce_ib": 5.781606197357178, + "ce_orig": 0.7252171039581299, + "epoch": 0.3937019196203897, + "kl_loss": 0.29058459401130676, + "loss_ib": 0.008687451481819153, + "step": 1369 + }, + { + "ce_ib": 8.223978996276855, + "ce_orig": 1.25217604637146, + "epoch": 0.3937019196203897, + "kl_loss": 0.2124319076538086, + "loss_ib": 0.010348298586905003, + "step": 1369 + }, + { + "ce_ib": 3.267852544784546, + "ce_orig": 0.5731252431869507, + "epoch": 0.3937019196203897, + "kl_loss": 0.16048312187194824, + "loss_ib": 0.004872683901339769, + "step": 1369 + }, + { + "ce_ib": 3.981436014175415, + "ce_orig": 0.656374990940094, + "epoch": 0.3937019196203897, + "kl_loss": 0.2220917046070099, + "loss_ib": 0.006202353164553642, + "step": 1369 + }, + { + "epoch": 0.3939895031993673, + "grad_norm": 0.1153147965669632, + "learning_rate": 9.73672555611751e-06, + "loss": 0.8134, + "step": 1370 + }, + { + "ce_ib": 4.606146335601807, + "ce_orig": 0.7800334095954895, + "epoch": 0.3939895031993673, + "kl_loss": 0.2652835249900818, + "loss_ib": 0.007258981466293335, + "step": 1370 + }, + { + "ce_ib": 5.3803324699401855, + "ce_orig": 0.7244141697883606, + "epoch": 0.3939895031993673, + "kl_loss": 0.15984582901000977, + "loss_ib": 0.006978790741413832, + "step": 1370 + }, + { + "ce_ib": 8.137627601623535, + "ce_orig": 1.3305286169052124, + "epoch": 0.3939895031993673, + "kl_loss": 0.20696598291397095, + "loss_ib": 0.010207287967205048, + "step": 1370 + }, + { + "ce_ib": 10.457786560058594, + "ce_orig": 1.3038825988769531, + "epoch": 0.3939895031993673, + "kl_loss": 0.2336128205060959, + "loss_ib": 0.01279391348361969, + "step": 1370 + }, + { + "ce_ib": 7.365723609924316, + "ce_orig": 0.9686176180839539, + "epoch": 0.39427708677834494, + "kl_loss": 0.32739341259002686, + "loss_ib": 0.010639658197760582, + "step": 1371 + }, + { + "ce_ib": 6.254762172698975, + "ce_orig": 1.2625272274017334, + "epoch": 0.39427708677834494, + "kl_loss": 0.2828513979911804, + "loss_ib": 0.009083276614546776, + "step": 1371 + }, + { + "ce_ib": 5.433732986450195, + "ce_orig": 0.9750329852104187, + "epoch": 0.39427708677834494, + "kl_loss": 0.1383676826953888, + "loss_ib": 0.0068174097687006, + "step": 1371 + }, + { + "ce_ib": 3.722066640853882, + "ce_orig": 0.789971649646759, + "epoch": 0.39427708677834494, + "kl_loss": 0.13841679692268372, + "loss_ib": 0.005106234457343817, + "step": 1371 + }, + { + "ce_ib": 5.894031047821045, + "ce_orig": 1.156799554824829, + "epoch": 0.3945646703573226, + "kl_loss": 0.2652025520801544, + "loss_ib": 0.008546056225895882, + "step": 1372 + }, + { + "ce_ib": 5.060960292816162, + "ce_orig": 0.5705182552337646, + "epoch": 0.3945646703573226, + "kl_loss": 0.3146953284740448, + "loss_ib": 0.008207913488149643, + "step": 1372 + }, + { + "ce_ib": 3.138653039932251, + "ce_orig": 0.5762611627578735, + "epoch": 0.3945646703573226, + "kl_loss": 0.19025224447250366, + "loss_ib": 0.005041175521910191, + "step": 1372 + }, + { + "ce_ib": 7.081219673156738, + "ce_orig": 1.0405709743499756, + "epoch": 0.3945646703573226, + "kl_loss": 0.28342974185943604, + "loss_ib": 0.009915516711771488, + "step": 1372 + }, + { + "ce_ib": 6.494819164276123, + "ce_orig": 0.656830906867981, + "epoch": 0.39485225393630025, + "kl_loss": 0.22173810005187988, + "loss_ib": 0.008712200447916985, + "step": 1373 + }, + { + "ce_ib": 8.728991508483887, + "ce_orig": 1.0599778890609741, + "epoch": 0.39485225393630025, + "kl_loss": 0.2607494592666626, + "loss_ib": 0.011336485855281353, + "step": 1373 + }, + { + "ce_ib": 4.47887659072876, + "ce_orig": 0.6049656867980957, + "epoch": 0.39485225393630025, + "kl_loss": 0.20736724138259888, + "loss_ib": 0.0065525490790605545, + "step": 1373 + }, + { + "ce_ib": 5.101130962371826, + "ce_orig": 0.9835696816444397, + "epoch": 0.39485225393630025, + "kl_loss": 0.23616167902946472, + "loss_ib": 0.007462748326361179, + "step": 1373 + }, + { + "ce_ib": 4.498948097229004, + "ce_orig": 0.6125088334083557, + "epoch": 0.39513983751527787, + "kl_loss": 0.2392752468585968, + "loss_ib": 0.006891700439155102, + "step": 1374 + }, + { + "ce_ib": 5.00796365737915, + "ce_orig": 0.5471829771995544, + "epoch": 0.39513983751527787, + "kl_loss": 0.2096775323152542, + "loss_ib": 0.007104738615453243, + "step": 1374 + }, + { + "ce_ib": 5.918644905090332, + "ce_orig": 0.7079725861549377, + "epoch": 0.39513983751527787, + "kl_loss": 0.40169966220855713, + "loss_ib": 0.009935641661286354, + "step": 1374 + }, + { + "ce_ib": 9.236472129821777, + "ce_orig": 1.5323234796524048, + "epoch": 0.39513983751527787, + "kl_loss": 0.32300513982772827, + "loss_ib": 0.012466523796319962, + "step": 1374 + }, + { + "epoch": 0.3954274210942555, + "grad_norm": 0.08639590442180634, + "learning_rate": 9.734234715063058e-06, + "loss": 0.8309, + "step": 1375 + }, + { + "ce_ib": 9.688190460205078, + "ce_orig": 1.6802754402160645, + "epoch": 0.3954274210942555, + "kl_loss": 0.24151378870010376, + "loss_ib": 0.012103328481316566, + "step": 1375 + }, + { + "ce_ib": 7.782926082611084, + "ce_orig": 1.4242134094238281, + "epoch": 0.3954274210942555, + "kl_loss": 0.2984180152416229, + "loss_ib": 0.010767105966806412, + "step": 1375 + }, + { + "ce_ib": 7.2921905517578125, + "ce_orig": 1.0522878170013428, + "epoch": 0.3954274210942555, + "kl_loss": 0.23101499676704407, + "loss_ib": 0.009602339938282967, + "step": 1375 + }, + { + "ce_ib": 4.890142440795898, + "ce_orig": 0.7784461379051208, + "epoch": 0.3954274210942555, + "kl_loss": 0.21940796077251434, + "loss_ib": 0.007084221579134464, + "step": 1375 + }, + { + "ce_ib": 6.3335394859313965, + "ce_orig": 0.8000179529190063, + "epoch": 0.3957150046732332, + "kl_loss": 0.20593953132629395, + "loss_ib": 0.008392934687435627, + "step": 1376 + }, + { + "ce_ib": 6.561087608337402, + "ce_orig": 0.9470977783203125, + "epoch": 0.3957150046732332, + "kl_loss": 0.23629780113697052, + "loss_ib": 0.008924066089093685, + "step": 1376 + }, + { + "ce_ib": 4.079127311706543, + "ce_orig": 0.92641681432724, + "epoch": 0.3957150046732332, + "kl_loss": 0.20223310589790344, + "loss_ib": 0.006101457867771387, + "step": 1376 + }, + { + "ce_ib": 6.066758155822754, + "ce_orig": 0.9022270441055298, + "epoch": 0.3957150046732332, + "kl_loss": 0.267997145652771, + "loss_ib": 0.008746729232370853, + "step": 1376 + }, + { + "ce_ib": 7.5327324867248535, + "ce_orig": 1.4820178747177124, + "epoch": 0.3960025882522108, + "kl_loss": 0.20720908045768738, + "loss_ib": 0.009604823775589466, + "step": 1377 + }, + { + "ce_ib": 8.504740715026855, + "ce_orig": 0.7840504050254822, + "epoch": 0.3960025882522108, + "kl_loss": 0.2782425582408905, + "loss_ib": 0.011287165805697441, + "step": 1377 + }, + { + "ce_ib": 5.524410247802734, + "ce_orig": 0.5637204051017761, + "epoch": 0.3960025882522108, + "kl_loss": 0.23940207064151764, + "loss_ib": 0.007918431423604488, + "step": 1377 + }, + { + "ce_ib": 3.232489585876465, + "ce_orig": 0.451824426651001, + "epoch": 0.3960025882522108, + "kl_loss": 0.22094693779945374, + "loss_ib": 0.005441958550363779, + "step": 1377 + }, + { + "ce_ib": 6.773088455200195, + "ce_orig": 0.7511025071144104, + "epoch": 0.3962901718311884, + "kl_loss": 0.23778003454208374, + "loss_ib": 0.00915088877081871, + "step": 1378 + }, + { + "ce_ib": 4.436305999755859, + "ce_orig": 0.677458643913269, + "epoch": 0.3962901718311884, + "kl_loss": 0.18002289533615112, + "loss_ib": 0.006236535031348467, + "step": 1378 + }, + { + "ce_ib": 5.611198425292969, + "ce_orig": 0.9948437213897705, + "epoch": 0.3962901718311884, + "kl_loss": 0.20385992527008057, + "loss_ib": 0.007649797480553389, + "step": 1378 + }, + { + "ce_ib": 9.630160331726074, + "ce_orig": 1.6383625268936157, + "epoch": 0.3962901718311884, + "kl_loss": 0.2511659264564514, + "loss_ib": 0.01214181911200285, + "step": 1378 + }, + { + "ce_ib": 7.712647438049316, + "ce_orig": 1.0958294868469238, + "epoch": 0.3965777554101661, + "kl_loss": 0.27488973736763, + "loss_ib": 0.010461544618010521, + "step": 1379 + }, + { + "ce_ib": 6.212006092071533, + "ce_orig": 0.8772387504577637, + "epoch": 0.3965777554101661, + "kl_loss": 0.250034362077713, + "loss_ib": 0.008712350390851498, + "step": 1379 + }, + { + "ce_ib": 2.9093852043151855, + "ce_orig": 0.18342959880828857, + "epoch": 0.3965777554101661, + "kl_loss": 0.9934872388839722, + "loss_ib": 0.012844257056713104, + "step": 1379 + }, + { + "ce_ib": 7.467209339141846, + "ce_orig": 1.1859040260314941, + "epoch": 0.3965777554101661, + "kl_loss": 0.274760365486145, + "loss_ib": 0.010214812122285366, + "step": 1379 + }, + { + "epoch": 0.3968653389891437, + "grad_norm": 0.10431889444589615, + "learning_rate": 9.731732468138338e-06, + "loss": 0.939, + "step": 1380 + }, + { + "ce_ib": 6.341503143310547, + "ce_orig": 0.8641688823699951, + "epoch": 0.3968653389891437, + "kl_loss": 0.35600030422210693, + "loss_ib": 0.009901505894958973, + "step": 1380 + }, + { + "ce_ib": 4.540727615356445, + "ce_orig": 0.6933018565177917, + "epoch": 0.3968653389891437, + "kl_loss": 0.29483872652053833, + "loss_ib": 0.007489114999771118, + "step": 1380 + }, + { + "ce_ib": 5.404646873474121, + "ce_orig": 0.8426015973091125, + "epoch": 0.3968653389891437, + "kl_loss": 0.2104819267988205, + "loss_ib": 0.007509466260671616, + "step": 1380 + }, + { + "ce_ib": 6.046473026275635, + "ce_orig": 0.644819438457489, + "epoch": 0.3968653389891437, + "kl_loss": 0.21694740653038025, + "loss_ib": 0.008215947076678276, + "step": 1380 + }, + { + "ce_ib": 8.979574203491211, + "ce_orig": 1.3111884593963623, + "epoch": 0.39715292256812135, + "kl_loss": 0.2692817449569702, + "loss_ib": 0.011672391556203365, + "step": 1381 + }, + { + "ce_ib": 10.654874801635742, + "ce_orig": 1.7917509078979492, + "epoch": 0.39715292256812135, + "kl_loss": 0.24403555691242218, + "loss_ib": 0.013095230795443058, + "step": 1381 + }, + { + "ce_ib": 4.652118682861328, + "ce_orig": 0.449733704328537, + "epoch": 0.39715292256812135, + "kl_loss": 0.2827409505844116, + "loss_ib": 0.007479527965188026, + "step": 1381 + }, + { + "ce_ib": 4.677504539489746, + "ce_orig": 0.4694943428039551, + "epoch": 0.39715292256812135, + "kl_loss": 0.20924463868141174, + "loss_ib": 0.006769950967282057, + "step": 1381 + }, + { + "ce_ib": 4.163352966308594, + "ce_orig": 0.3782082498073578, + "epoch": 0.397440506147099, + "kl_loss": 0.295204222202301, + "loss_ib": 0.0071153948083519936, + "step": 1382 + }, + { + "ce_ib": 7.688187122344971, + "ce_orig": 0.9315841794013977, + "epoch": 0.397440506147099, + "kl_loss": 0.17919141054153442, + "loss_ib": 0.009480101056396961, + "step": 1382 + }, + { + "ce_ib": 7.311005592346191, + "ce_orig": 0.939527153968811, + "epoch": 0.397440506147099, + "kl_loss": 0.20597898960113525, + "loss_ib": 0.009370795451104641, + "step": 1382 + }, + { + "ce_ib": 6.882622718811035, + "ce_orig": 0.8821007013320923, + "epoch": 0.397440506147099, + "kl_loss": 0.324748694896698, + "loss_ib": 0.010130109265446663, + "step": 1382 + }, + { + "ce_ib": 6.135637283325195, + "ce_orig": 1.4052454233169556, + "epoch": 0.39772808972607665, + "kl_loss": 0.35044461488723755, + "loss_ib": 0.009640082716941833, + "step": 1383 + }, + { + "ce_ib": 2.134206771850586, + "ce_orig": 0.5873817801475525, + "epoch": 0.39772808972607665, + "kl_loss": 0.7168362140655518, + "loss_ib": 0.00930256862193346, + "step": 1383 + }, + { + "ce_ib": 11.386298179626465, + "ce_orig": 1.552770733833313, + "epoch": 0.39772808972607665, + "kl_loss": 0.3069136440753937, + "loss_ib": 0.014455433934926987, + "step": 1383 + }, + { + "ce_ib": 4.990416526794434, + "ce_orig": 0.5759061574935913, + "epoch": 0.39772808972607665, + "kl_loss": 0.19468580186367035, + "loss_ib": 0.006937274243682623, + "step": 1383 + }, + { + "ce_ib": 8.349421501159668, + "ce_orig": 1.4221105575561523, + "epoch": 0.3980156733050543, + "kl_loss": 0.24148491024971008, + "loss_ib": 0.010764271020889282, + "step": 1384 + }, + { + "ce_ib": 5.238234996795654, + "ce_orig": 0.8385730385780334, + "epoch": 0.3980156733050543, + "kl_loss": 0.19655439257621765, + "loss_ib": 0.007203779183328152, + "step": 1384 + }, + { + "ce_ib": 7.211249351501465, + "ce_orig": 1.0338671207427979, + "epoch": 0.3980156733050543, + "kl_loss": 0.31964731216430664, + "loss_ib": 0.010407721623778343, + "step": 1384 + }, + { + "ce_ib": 5.4048380851745605, + "ce_orig": 0.5029476881027222, + "epoch": 0.3980156733050543, + "kl_loss": 0.32509592175483704, + "loss_ib": 0.008655796758830547, + "step": 1384 + }, + { + "epoch": 0.3983032568840319, + "grad_norm": 0.10620667785406113, + "learning_rate": 9.729218821371844e-06, + "loss": 0.8809, + "step": 1385 + }, + { + "ce_ib": 5.505414009094238, + "ce_orig": 0.9368299245834351, + "epoch": 0.3983032568840319, + "kl_loss": 0.24715746939182281, + "loss_ib": 0.007976988330483437, + "step": 1385 + }, + { + "ce_ib": 8.991453170776367, + "ce_orig": 1.7614587545394897, + "epoch": 0.3983032568840319, + "kl_loss": 0.21385596692562103, + "loss_ib": 0.011130011640489101, + "step": 1385 + }, + { + "ce_ib": 11.152636528015137, + "ce_orig": 1.6417429447174072, + "epoch": 0.3983032568840319, + "kl_loss": 0.22926491498947144, + "loss_ib": 0.013445286080241203, + "step": 1385 + }, + { + "ce_ib": 5.502985000610352, + "ce_orig": 0.8666877150535583, + "epoch": 0.3983032568840319, + "kl_loss": 0.3327026665210724, + "loss_ib": 0.008830011822283268, + "step": 1385 + }, + { + "ce_ib": 7.255623817443848, + "ce_orig": 1.1066640615463257, + "epoch": 0.3985908404630096, + "kl_loss": 0.39020809531211853, + "loss_ib": 0.011157704517245293, + "step": 1386 + }, + { + "ce_ib": 5.247617721557617, + "ce_orig": 0.6558859348297119, + "epoch": 0.3985908404630096, + "kl_loss": 0.2507811188697815, + "loss_ib": 0.007755429018288851, + "step": 1386 + }, + { + "ce_ib": 6.0136590003967285, + "ce_orig": 0.9555683135986328, + "epoch": 0.3985908404630096, + "kl_loss": 0.2821842133998871, + "loss_ib": 0.008835501037538052, + "step": 1386 + }, + { + "ce_ib": 7.45864200592041, + "ce_orig": 1.1321091651916504, + "epoch": 0.3985908404630096, + "kl_loss": 0.18043175339698792, + "loss_ib": 0.009262959472835064, + "step": 1386 + }, + { + "ce_ib": 5.33476448059082, + "ce_orig": 0.3529107868671417, + "epoch": 0.3988784240419872, + "kl_loss": 0.2667803168296814, + "loss_ib": 0.008002568036317825, + "step": 1387 + }, + { + "ce_ib": 4.215025901794434, + "ce_orig": 0.7531979084014893, + "epoch": 0.3988784240419872, + "kl_loss": 0.18931743502616882, + "loss_ib": 0.006108200643211603, + "step": 1387 + }, + { + "ce_ib": 7.2497639656066895, + "ce_orig": 1.3689095973968506, + "epoch": 0.3988784240419872, + "kl_loss": 0.19657601416110992, + "loss_ib": 0.009215524420142174, + "step": 1387 + }, + { + "ce_ib": 4.428555488586426, + "ce_orig": 0.5836307406425476, + "epoch": 0.3988784240419872, + "kl_loss": 0.2728900909423828, + "loss_ib": 0.00715745659545064, + "step": 1387 + }, + { + "ce_ib": 8.63098430633545, + "ce_orig": 1.268597960472107, + "epoch": 0.3991660076209648, + "kl_loss": 0.351509153842926, + "loss_ib": 0.012146075256168842, + "step": 1388 + }, + { + "ce_ib": 6.511746406555176, + "ce_orig": 1.0989983081817627, + "epoch": 0.3991660076209648, + "kl_loss": 0.22151046991348267, + "loss_ib": 0.00872685108333826, + "step": 1388 + }, + { + "ce_ib": 6.183234214782715, + "ce_orig": 0.8914353847503662, + "epoch": 0.3991660076209648, + "kl_loss": 0.2892371416091919, + "loss_ib": 0.009075605310499668, + "step": 1388 + }, + { + "ce_ib": 5.882315635681152, + "ce_orig": 0.9376100301742554, + "epoch": 0.3991660076209648, + "kl_loss": 0.3167765736579895, + "loss_ib": 0.009050081484019756, + "step": 1388 + }, + { + "ce_ib": 4.763624668121338, + "ce_orig": 0.7746894955635071, + "epoch": 0.3994535911999425, + "kl_loss": 0.28361696004867554, + "loss_ib": 0.007599794305860996, + "step": 1389 + }, + { + "ce_ib": 6.894254684448242, + "ce_orig": 1.0107172727584839, + "epoch": 0.3994535911999425, + "kl_loss": 0.24421223998069763, + "loss_ib": 0.009336377494037151, + "step": 1389 + }, + { + "ce_ib": 8.682352066040039, + "ce_orig": 1.454932451248169, + "epoch": 0.3994535911999425, + "kl_loss": 0.19313442707061768, + "loss_ib": 0.010613696649670601, + "step": 1389 + }, + { + "ce_ib": 8.39739990234375, + "ce_orig": 1.3320800065994263, + "epoch": 0.3994535911999425, + "kl_loss": 0.19475214183330536, + "loss_ib": 0.010344920679926872, + "step": 1389 + }, + { + "epoch": 0.3997411747789201, + "grad_norm": 0.1351994425058365, + "learning_rate": 9.726693780819535e-06, + "loss": 0.9787, + "step": 1390 + }, + { + "ce_ib": 5.831480979919434, + "ce_orig": 0.9803398251533508, + "epoch": 0.3997411747789201, + "kl_loss": 0.2716244161128998, + "loss_ib": 0.008547725155949593, + "step": 1390 + }, + { + "ce_ib": 10.931524276733398, + "ce_orig": 1.3122738599777222, + "epoch": 0.3997411747789201, + "kl_loss": 0.24578051269054413, + "loss_ib": 0.013389328494668007, + "step": 1390 + }, + { + "ce_ib": 5.385982513427734, + "ce_orig": 0.6934446096420288, + "epoch": 0.3997411747789201, + "kl_loss": 0.18988949060440063, + "loss_ib": 0.007284877356141806, + "step": 1390 + }, + { + "ce_ib": 5.229064464569092, + "ce_orig": 0.5983728170394897, + "epoch": 0.3997411747789201, + "kl_loss": 0.33651402592658997, + "loss_ib": 0.008594205603003502, + "step": 1390 + }, + { + "ce_ib": 3.7700514793395996, + "ce_orig": 0.7275258898735046, + "epoch": 0.40002875835789775, + "kl_loss": 0.20360833406448364, + "loss_ib": 0.0058061350136995316, + "step": 1391 + }, + { + "ce_ib": 7.143492698669434, + "ce_orig": 1.1508095264434814, + "epoch": 0.40002875835789775, + "kl_loss": 0.18225440382957458, + "loss_ib": 0.008966037072241306, + "step": 1391 + }, + { + "ce_ib": 5.067678928375244, + "ce_orig": 0.6422498822212219, + "epoch": 0.40002875835789775, + "kl_loss": 0.26821526885032654, + "loss_ib": 0.0077498313039541245, + "step": 1391 + }, + { + "ce_ib": 4.1541595458984375, + "ce_orig": 0.5645342469215393, + "epoch": 0.40002875835789775, + "kl_loss": 0.23720993101596832, + "loss_ib": 0.0065262592397630215, + "step": 1391 + }, + { + "ce_ib": 7.886209964752197, + "ce_orig": 0.7300704121589661, + "epoch": 0.40031634193687543, + "kl_loss": 0.3853447437286377, + "loss_ib": 0.011739656329154968, + "step": 1392 + }, + { + "ce_ib": 8.745075225830078, + "ce_orig": 0.8196200132369995, + "epoch": 0.40031634193687543, + "kl_loss": 0.29888561367988586, + "loss_ib": 0.011733931489288807, + "step": 1392 + }, + { + "ce_ib": 11.331106185913086, + "ce_orig": 1.2489943504333496, + "epoch": 0.40031634193687543, + "kl_loss": 0.2049940675497055, + "loss_ib": 0.013381047174334526, + "step": 1392 + }, + { + "ce_ib": 6.58360481262207, + "ce_orig": 0.8827504515647888, + "epoch": 0.40031634193687543, + "kl_loss": 0.25717267394065857, + "loss_ib": 0.0091553321108222, + "step": 1392 + }, + { + "ce_ib": 7.464869499206543, + "ce_orig": 0.8372098207473755, + "epoch": 0.40060392551585305, + "kl_loss": 0.18981406092643738, + "loss_ib": 0.009363009594380856, + "step": 1393 + }, + { + "ce_ib": 3.8323893547058105, + "ce_orig": 0.5065505504608154, + "epoch": 0.40060392551585305, + "kl_loss": 0.40909343957901, + "loss_ib": 0.007923323661088943, + "step": 1393 + }, + { + "ce_ib": 6.201034069061279, + "ce_orig": 0.6383828520774841, + "epoch": 0.40060392551585305, + "kl_loss": 0.495597243309021, + "loss_ib": 0.011157006956636906, + "step": 1393 + }, + { + "ce_ib": 3.3212897777557373, + "ce_orig": 0.5181757211685181, + "epoch": 0.40060392551585305, + "kl_loss": 0.5330347418785095, + "loss_ib": 0.00865163654088974, + "step": 1393 + }, + { + "ce_ib": 5.628756999969482, + "ce_orig": 1.1345739364624023, + "epoch": 0.4008915090948307, + "kl_loss": 0.2799358367919922, + "loss_ib": 0.008428115397691727, + "step": 1394 + }, + { + "ce_ib": 7.621800899505615, + "ce_orig": 1.179695725440979, + "epoch": 0.4008915090948307, + "kl_loss": 0.20849332213401794, + "loss_ib": 0.009706733748316765, + "step": 1394 + }, + { + "ce_ib": 4.524997711181641, + "ce_orig": 0.5907806158065796, + "epoch": 0.4008915090948307, + "kl_loss": 0.2825338840484619, + "loss_ib": 0.007350336294621229, + "step": 1394 + }, + { + "ce_ib": 5.2815470695495605, + "ce_orig": 0.9510052800178528, + "epoch": 0.4008915090948307, + "kl_loss": 0.20430287718772888, + "loss_ib": 0.007324576377868652, + "step": 1394 + }, + { + "epoch": 0.4011790926738083, + "grad_norm": 0.10714766383171082, + "learning_rate": 9.724157352564818e-06, + "loss": 0.8557, + "step": 1395 + }, + { + "ce_ib": 9.26974868774414, + "ce_orig": 1.2472344636917114, + "epoch": 0.4011790926738083, + "kl_loss": 0.22076760232448578, + "loss_ib": 0.011477424763143063, + "step": 1395 + }, + { + "ce_ib": 6.311810493469238, + "ce_orig": 1.118715524673462, + "epoch": 0.4011790926738083, + "kl_loss": 0.18845130503177643, + "loss_ib": 0.008196323178708553, + "step": 1395 + }, + { + "ce_ib": 8.513527870178223, + "ce_orig": 1.4789761304855347, + "epoch": 0.4011790926738083, + "kl_loss": 0.2904071807861328, + "loss_ib": 0.011417599394917488, + "step": 1395 + }, + { + "ce_ib": 8.834794044494629, + "ce_orig": 1.0904123783111572, + "epoch": 0.4011790926738083, + "kl_loss": 0.24855023622512817, + "loss_ib": 0.011320296674966812, + "step": 1395 + }, + { + "ce_ib": 10.29422378540039, + "ce_orig": 1.6876254081726074, + "epoch": 0.401466676252786, + "kl_loss": 0.25927823781967163, + "loss_ib": 0.01288700569421053, + "step": 1396 + }, + { + "ce_ib": 4.860633850097656, + "ce_orig": 0.3961508870124817, + "epoch": 0.401466676252786, + "kl_loss": 0.377347856760025, + "loss_ib": 0.0086341118440032, + "step": 1396 + }, + { + "ce_ib": 5.413529872894287, + "ce_orig": 0.6334626078605652, + "epoch": 0.401466676252786, + "kl_loss": 0.2238302230834961, + "loss_ib": 0.007651831954717636, + "step": 1396 + }, + { + "ce_ib": 7.909429550170898, + "ce_orig": 0.8602885007858276, + "epoch": 0.401466676252786, + "kl_loss": 0.3015333116054535, + "loss_ib": 0.01092476211488247, + "step": 1396 + }, + { + "ce_ib": 11.0640869140625, + "ce_orig": 1.5119869709014893, + "epoch": 0.4017542598317636, + "kl_loss": 0.21881186962127686, + "loss_ib": 0.013252205215394497, + "step": 1397 + }, + { + "ce_ib": 7.311441421508789, + "ce_orig": 0.8810110688209534, + "epoch": 0.4017542598317636, + "kl_loss": 0.295499324798584, + "loss_ib": 0.010266435332596302, + "step": 1397 + }, + { + "ce_ib": 4.952889919281006, + "ce_orig": 0.6296012997627258, + "epoch": 0.4017542598317636, + "kl_loss": 0.23733410239219666, + "loss_ib": 0.007326230872422457, + "step": 1397 + }, + { + "ce_ib": 4.622181415557861, + "ce_orig": 0.7245978116989136, + "epoch": 0.4017542598317636, + "kl_loss": 0.20384420454502106, + "loss_ib": 0.006660623475909233, + "step": 1397 + }, + { + "ce_ib": 5.9267683029174805, + "ce_orig": 0.5333219766616821, + "epoch": 0.40204184341074123, + "kl_loss": 0.20834708213806152, + "loss_ib": 0.008010238409042358, + "step": 1398 + }, + { + "ce_ib": 1.2435208559036255, + "ce_orig": 0.12993678450584412, + "epoch": 0.40204184341074123, + "kl_loss": 0.43682751059532166, + "loss_ib": 0.005611795466393232, + "step": 1398 + }, + { + "ce_ib": 6.2769036293029785, + "ce_orig": 0.3595212697982788, + "epoch": 0.40204184341074123, + "kl_loss": 0.47299015522003174, + "loss_ib": 0.011006806045770645, + "step": 1398 + }, + { + "ce_ib": 6.465590000152588, + "ce_orig": 1.3574104309082031, + "epoch": 0.40204184341074123, + "kl_loss": 0.20642521977424622, + "loss_ib": 0.008529841899871826, + "step": 1398 + }, + { + "ce_ib": 6.165492534637451, + "ce_orig": 1.2102705240249634, + "epoch": 0.4023294269897189, + "kl_loss": 0.18871337175369263, + "loss_ib": 0.008052625693380833, + "step": 1399 + }, + { + "ce_ib": 8.377633094787598, + "ce_orig": 0.949652910232544, + "epoch": 0.4023294269897189, + "kl_loss": 0.31055009365081787, + "loss_ib": 0.011483133770525455, + "step": 1399 + }, + { + "ce_ib": 4.6876139640808105, + "ce_orig": 0.6695358157157898, + "epoch": 0.4023294269897189, + "kl_loss": 0.2681241035461426, + "loss_ib": 0.007368855178356171, + "step": 1399 + }, + { + "ce_ib": 9.898786544799805, + "ce_orig": 1.370671272277832, + "epoch": 0.4023294269897189, + "kl_loss": 0.21454685926437378, + "loss_ib": 0.012044255621731281, + "step": 1399 + }, + { + "epoch": 0.40261701056869653, + "grad_norm": 0.10930775105953217, + "learning_rate": 9.72160954271854e-06, + "loss": 0.9588, + "step": 1400 + }, + { + "ce_ib": 5.554301738739014, + "ce_orig": 0.6720674633979797, + "epoch": 0.40261701056869653, + "kl_loss": 0.2781994044780731, + "loss_ib": 0.008336296305060387, + "step": 1400 + }, + { + "ce_ib": 5.45673942565918, + "ce_orig": 0.3260180652141571, + "epoch": 0.40261701056869653, + "kl_loss": 0.32308900356292725, + "loss_ib": 0.008687629364430904, + "step": 1400 + }, + { + "ce_ib": 5.333756446838379, + "ce_orig": 0.7048057317733765, + "epoch": 0.40261701056869653, + "kl_loss": 0.22332128882408142, + "loss_ib": 0.007566969376057386, + "step": 1400 + }, + { + "ce_ib": 7.247291564941406, + "ce_orig": 1.5441960096359253, + "epoch": 0.40261701056869653, + "kl_loss": 0.2041657269001007, + "loss_ib": 0.009288948960602283, + "step": 1400 + }, + { + "ce_ib": 5.841701984405518, + "ce_orig": 1.0211917161941528, + "epoch": 0.40290459414767416, + "kl_loss": 0.15334239602088928, + "loss_ib": 0.007375125773251057, + "step": 1401 + }, + { + "ce_ib": 1.6962262392044067, + "ce_orig": 0.263886958360672, + "epoch": 0.40290459414767416, + "kl_loss": 0.3646056652069092, + "loss_ib": 0.00534228328615427, + "step": 1401 + }, + { + "ce_ib": 4.868833541870117, + "ce_orig": 0.9672197699546814, + "epoch": 0.40290459414767416, + "kl_loss": 0.1706363558769226, + "loss_ib": 0.006575196981430054, + "step": 1401 + }, + { + "ce_ib": 6.2680768966674805, + "ce_orig": 0.741491973400116, + "epoch": 0.40290459414767416, + "kl_loss": 0.2005263715982437, + "loss_ib": 0.008273339830338955, + "step": 1401 + }, + { + "ce_ib": 7.944718837738037, + "ce_orig": 0.9443780183792114, + "epoch": 0.40319217772665183, + "kl_loss": 0.2510814964771271, + "loss_ib": 0.010455533862113953, + "step": 1402 + }, + { + "ce_ib": 4.779119968414307, + "ce_orig": 0.5356561541557312, + "epoch": 0.40319217772665183, + "kl_loss": 0.21569356322288513, + "loss_ib": 0.006936055142432451, + "step": 1402 + }, + { + "ce_ib": 7.5224385261535645, + "ce_orig": 0.6601194739341736, + "epoch": 0.40319217772665183, + "kl_loss": 0.28233033418655396, + "loss_ib": 0.010345742106437683, + "step": 1402 + }, + { + "ce_ib": 2.8687710762023926, + "ce_orig": 0.41683757305145264, + "epoch": 0.40319217772665183, + "kl_loss": 0.32434821128845215, + "loss_ib": 0.00611225375905633, + "step": 1402 + }, + { + "ce_ib": 12.094705581665039, + "ce_orig": 2.08050537109375, + "epoch": 0.40347976130562946, + "kl_loss": 0.25543084740638733, + "loss_ib": 0.014649013988673687, + "step": 1403 + }, + { + "ce_ib": 5.257640361785889, + "ce_orig": 0.6185838580131531, + "epoch": 0.40347976130562946, + "kl_loss": 0.20503397285938263, + "loss_ib": 0.007307979743927717, + "step": 1403 + }, + { + "ce_ib": 6.66251802444458, + "ce_orig": 1.0003657341003418, + "epoch": 0.40347976130562946, + "kl_loss": 0.2243478149175644, + "loss_ib": 0.008905996568500996, + "step": 1403 + }, + { + "ce_ib": 3.802760124206543, + "ce_orig": 0.5514618158340454, + "epoch": 0.40347976130562946, + "kl_loss": 0.2789892256259918, + "loss_ib": 0.006592652760446072, + "step": 1403 + }, + { + "ce_ib": 5.127264499664307, + "ce_orig": 0.7384080290794373, + "epoch": 0.4037673448846071, + "kl_loss": 0.2943039536476135, + "loss_ib": 0.008070304058492184, + "step": 1404 + }, + { + "ce_ib": 6.885931491851807, + "ce_orig": 0.5815180540084839, + "epoch": 0.4037673448846071, + "kl_loss": 0.33004623651504517, + "loss_ib": 0.010186392813920975, + "step": 1404 + }, + { + "ce_ib": 11.163129806518555, + "ce_orig": 1.7125380039215088, + "epoch": 0.4037673448846071, + "kl_loss": 0.27400827407836914, + "loss_ib": 0.013903211802244186, + "step": 1404 + }, + { + "ce_ib": 7.123013019561768, + "ce_orig": 0.45505034923553467, + "epoch": 0.4037673448846071, + "kl_loss": 0.218210369348526, + "loss_ib": 0.009305116720497608, + "step": 1404 + }, + { + "epoch": 0.4040549284635847, + "grad_norm": 0.1016739085316658, + "learning_rate": 9.719050357418962e-06, + "loss": 0.8958, + "step": 1405 + }, + { + "ce_ib": 5.687928199768066, + "ce_orig": 0.8055709600448608, + "epoch": 0.4040549284635847, + "kl_loss": 0.24938495457172394, + "loss_ib": 0.008181777782738209, + "step": 1405 + }, + { + "ce_ib": 6.564266204833984, + "ce_orig": 0.8630247712135315, + "epoch": 0.4040549284635847, + "kl_loss": 0.2232058048248291, + "loss_ib": 0.008796324022114277, + "step": 1405 + }, + { + "ce_ib": 10.02625560760498, + "ce_orig": 1.6753994226455688, + "epoch": 0.4040549284635847, + "kl_loss": 0.4892248511314392, + "loss_ib": 0.014918503351509571, + "step": 1405 + }, + { + "ce_ib": 5.1657586097717285, + "ce_orig": 0.8059775233268738, + "epoch": 0.4040549284635847, + "kl_loss": 0.2578337490558624, + "loss_ib": 0.007744096219539642, + "step": 1405 + }, + { + "ce_ib": 8.515156745910645, + "ce_orig": 0.9876176118850708, + "epoch": 0.4043425120425624, + "kl_loss": 0.2993163466453552, + "loss_ib": 0.011508320458233356, + "step": 1406 + }, + { + "ce_ib": 4.786593437194824, + "ce_orig": 0.6819922924041748, + "epoch": 0.4043425120425624, + "kl_loss": 0.26278918981552124, + "loss_ib": 0.007414484862238169, + "step": 1406 + }, + { + "ce_ib": 9.841745376586914, + "ce_orig": 1.6463918685913086, + "epoch": 0.4043425120425624, + "kl_loss": 0.25848478078842163, + "loss_ib": 0.012426593340933323, + "step": 1406 + }, + { + "ce_ib": 5.537144660949707, + "ce_orig": 0.6347407102584839, + "epoch": 0.4043425120425624, + "kl_loss": 0.29522833228111267, + "loss_ib": 0.008489427156746387, + "step": 1406 + }, + { + "ce_ib": 2.009383201599121, + "ce_orig": 0.4568289816379547, + "epoch": 0.40463009562154, + "kl_loss": 0.14038433134555817, + "loss_ib": 0.003413226455450058, + "step": 1407 + }, + { + "ce_ib": 8.308806419372559, + "ce_orig": 0.9344093799591064, + "epoch": 0.40463009562154, + "kl_loss": 0.3734573721885681, + "loss_ib": 0.012043380178511143, + "step": 1407 + }, + { + "ce_ib": 7.919328212738037, + "ce_orig": 0.4728245437145233, + "epoch": 0.40463009562154, + "kl_loss": 0.3370034992694855, + "loss_ib": 0.011289362795650959, + "step": 1407 + }, + { + "ce_ib": 7.972267150878906, + "ce_orig": 1.1928261518478394, + "epoch": 0.40463009562154, + "kl_loss": 0.2522418797016144, + "loss_ib": 0.010494685731828213, + "step": 1407 + }, + { + "ce_ib": 6.536746978759766, + "ce_orig": 0.30310842394828796, + "epoch": 0.40491767920051763, + "kl_loss": 0.3365011215209961, + "loss_ib": 0.009901758283376694, + "step": 1408 + }, + { + "ce_ib": 8.056245803833008, + "ce_orig": 0.7353057861328125, + "epoch": 0.40491767920051763, + "kl_loss": 0.2854786217212677, + "loss_ib": 0.010911031626164913, + "step": 1408 + }, + { + "ce_ib": 7.960206985473633, + "ce_orig": 1.1164931058883667, + "epoch": 0.40491767920051763, + "kl_loss": 0.24506092071533203, + "loss_ib": 0.01041081640869379, + "step": 1408 + }, + { + "ce_ib": 7.368075847625732, + "ce_orig": 1.0007930994033813, + "epoch": 0.40491767920051763, + "kl_loss": 0.2809593677520752, + "loss_ib": 0.010177669115364552, + "step": 1408 + }, + { + "ce_ib": 6.244273662567139, + "ce_orig": 0.9306173324584961, + "epoch": 0.4052052627794953, + "kl_loss": 0.2776910662651062, + "loss_ib": 0.009021184407174587, + "step": 1409 + }, + { + "ce_ib": 8.91396713256836, + "ce_orig": 1.491280198097229, + "epoch": 0.4052052627794953, + "kl_loss": 0.2998870313167572, + "loss_ib": 0.011912836693227291, + "step": 1409 + }, + { + "ce_ib": 7.80165958404541, + "ce_orig": 1.2421525716781616, + "epoch": 0.4052052627794953, + "kl_loss": 0.2242795079946518, + "loss_ib": 0.010044453665614128, + "step": 1409 + }, + { + "ce_ib": 5.978887557983398, + "ce_orig": 0.6589029431343079, + "epoch": 0.4052052627794953, + "kl_loss": 0.17178526520729065, + "loss_ib": 0.0076967403292655945, + "step": 1409 + }, + { + "epoch": 0.40549284635847294, + "grad_norm": 0.11591479182243347, + "learning_rate": 9.71647980283176e-06, + "loss": 0.8355, + "step": 1410 + }, + { + "ce_ib": 5.507479667663574, + "ce_orig": 0.8018810153007507, + "epoch": 0.40549284635847294, + "kl_loss": 0.2922688126564026, + "loss_ib": 0.00843016803264618, + "step": 1410 + }, + { + "ce_ib": 4.450343608856201, + "ce_orig": 0.7255460023880005, + "epoch": 0.40549284635847294, + "kl_loss": 0.25514137744903564, + "loss_ib": 0.007001757621765137, + "step": 1410 + }, + { + "ce_ib": 4.221258640289307, + "ce_orig": 0.8187763094902039, + "epoch": 0.40549284635847294, + "kl_loss": 0.2624683976173401, + "loss_ib": 0.006845942698419094, + "step": 1410 + }, + { + "ce_ib": 7.064179420471191, + "ce_orig": 0.7070431113243103, + "epoch": 0.40549284635847294, + "kl_loss": 0.21688687801361084, + "loss_ib": 0.009233048185706139, + "step": 1410 + }, + { + "ce_ib": 7.324556350708008, + "ce_orig": 0.8796791434288025, + "epoch": 0.40578042993745056, + "kl_loss": 0.2597200870513916, + "loss_ib": 0.009921757504343987, + "step": 1411 + }, + { + "ce_ib": 10.051068305969238, + "ce_orig": 1.3552758693695068, + "epoch": 0.40578042993745056, + "kl_loss": 0.3554384410381317, + "loss_ib": 0.01360545214265585, + "step": 1411 + }, + { + "ce_ib": 3.1821837425231934, + "ce_orig": 0.5796427130699158, + "epoch": 0.40578042993745056, + "kl_loss": 0.23813700675964355, + "loss_ib": 0.005563553422689438, + "step": 1411 + }, + { + "ce_ib": 5.153987407684326, + "ce_orig": 0.7459867000579834, + "epoch": 0.40578042993745056, + "kl_loss": 0.2217734456062317, + "loss_ib": 0.007371721789240837, + "step": 1411 + }, + { + "ce_ib": 6.2244977951049805, + "ce_orig": 1.012880563735962, + "epoch": 0.40606801351642824, + "kl_loss": 0.19198933243751526, + "loss_ib": 0.00814439170062542, + "step": 1412 + }, + { + "ce_ib": 7.10197114944458, + "ce_orig": 0.8235251903533936, + "epoch": 0.40606801351642824, + "kl_loss": 0.23724320530891418, + "loss_ib": 0.009474403224885464, + "step": 1412 + }, + { + "ce_ib": 5.754896640777588, + "ce_orig": 0.8808709383010864, + "epoch": 0.40606801351642824, + "kl_loss": 0.24545630812644958, + "loss_ib": 0.008209459483623505, + "step": 1412 + }, + { + "ce_ib": 5.674515247344971, + "ce_orig": 1.1812490224838257, + "epoch": 0.40606801351642824, + "kl_loss": 0.2397882491350174, + "loss_ib": 0.00807239767163992, + "step": 1412 + }, + { + "ce_ib": 2.947330951690674, + "ce_orig": 0.6269690990447998, + "epoch": 0.40635559709540586, + "kl_loss": 0.18147560954093933, + "loss_ib": 0.004762087017297745, + "step": 1413 + }, + { + "ce_ib": 8.58889102935791, + "ce_orig": 1.1103471517562866, + "epoch": 0.40635559709540586, + "kl_loss": 0.2692915201187134, + "loss_ib": 0.011281806044280529, + "step": 1413 + }, + { + "ce_ib": 4.350068092346191, + "ce_orig": 0.6857120394706726, + "epoch": 0.40635559709540586, + "kl_loss": 0.18276429176330566, + "loss_ib": 0.006177711300551891, + "step": 1413 + }, + { + "ce_ib": 5.827058792114258, + "ce_orig": 0.6069142818450928, + "epoch": 0.40635559709540586, + "kl_loss": 0.3223385810852051, + "loss_ib": 0.009050444699823856, + "step": 1413 + }, + { + "ce_ib": 3.627539873123169, + "ce_orig": 0.5261756181716919, + "epoch": 0.4066431806743835, + "kl_loss": 0.27821996808052063, + "loss_ib": 0.006409739144146442, + "step": 1414 + }, + { + "ce_ib": 10.914916038513184, + "ce_orig": 1.7057969570159912, + "epoch": 0.4066431806743835, + "kl_loss": 0.18722708523273468, + "loss_ib": 0.012787186540663242, + "step": 1414 + }, + { + "ce_ib": 5.268122673034668, + "ce_orig": 0.8776735067367554, + "epoch": 0.4066431806743835, + "kl_loss": 0.1794731169939041, + "loss_ib": 0.007062853313982487, + "step": 1414 + }, + { + "ce_ib": 1.5480438470840454, + "ce_orig": 0.19495157897472382, + "epoch": 0.4066431806743835, + "kl_loss": 0.5221264958381653, + "loss_ib": 0.0067693088203668594, + "step": 1414 + }, + { + "epoch": 0.4069307642533611, + "grad_norm": 0.10485367476940155, + "learning_rate": 9.713897885149994e-06, + "loss": 0.8869, + "step": 1415 + }, + { + "ce_ib": 6.97669792175293, + "ce_orig": 0.885951042175293, + "epoch": 0.4069307642533611, + "kl_loss": 0.22319351136684418, + "loss_ib": 0.00920863263309002, + "step": 1415 + }, + { + "ce_ib": 6.193924903869629, + "ce_orig": 1.229131817817688, + "epoch": 0.4069307642533611, + "kl_loss": 0.2454812377691269, + "loss_ib": 0.008648737333714962, + "step": 1415 + }, + { + "ce_ib": 8.014636039733887, + "ce_orig": 1.4707450866699219, + "epoch": 0.4069307642533611, + "kl_loss": 0.3282904028892517, + "loss_ib": 0.011297540739178658, + "step": 1415 + }, + { + "ce_ib": 5.57000207901001, + "ce_orig": 0.5667921304702759, + "epoch": 0.4069307642533611, + "kl_loss": 0.23232273757457733, + "loss_ib": 0.007893229834735394, + "step": 1415 + }, + { + "ce_ib": 4.052793979644775, + "ce_orig": 0.6895977258682251, + "epoch": 0.4072183478323388, + "kl_loss": 0.16937918961048126, + "loss_ib": 0.0057465857826173306, + "step": 1416 + }, + { + "ce_ib": 9.834171295166016, + "ce_orig": 1.3297240734100342, + "epoch": 0.4072183478323388, + "kl_loss": 0.2615419030189514, + "loss_ib": 0.012449590489268303, + "step": 1416 + }, + { + "ce_ib": 6.8249831199646, + "ce_orig": 0.9767938256263733, + "epoch": 0.4072183478323388, + "kl_loss": 0.27759337425231934, + "loss_ib": 0.009600916877388954, + "step": 1416 + }, + { + "ce_ib": 5.38134765625, + "ce_orig": 0.7195479869842529, + "epoch": 0.4072183478323388, + "kl_loss": 0.23229524493217468, + "loss_ib": 0.007704299408942461, + "step": 1416 + }, + { + "ce_ib": 4.78963565826416, + "ce_orig": 0.3465440571308136, + "epoch": 0.4075059314113164, + "kl_loss": 0.2830664813518524, + "loss_ib": 0.007620300631970167, + "step": 1417 + }, + { + "ce_ib": 3.202763557434082, + "ce_orig": 0.664502739906311, + "epoch": 0.4075059314113164, + "kl_loss": 0.2345416098833084, + "loss_ib": 0.005548179615288973, + "step": 1417 + }, + { + "ce_ib": 4.993650913238525, + "ce_orig": 1.1927835941314697, + "epoch": 0.4075059314113164, + "kl_loss": 0.156328484416008, + "loss_ib": 0.006556935608386993, + "step": 1417 + }, + { + "ce_ib": 4.823815822601318, + "ce_orig": 0.8249080181121826, + "epoch": 0.4075059314113164, + "kl_loss": 0.29396092891693115, + "loss_ib": 0.007763424888253212, + "step": 1417 + }, + { + "ce_ib": 4.946149826049805, + "ce_orig": 0.41540834307670593, + "epoch": 0.40779351499029404, + "kl_loss": 0.2321927547454834, + "loss_ib": 0.00726807676255703, + "step": 1418 + }, + { + "ce_ib": 5.462716579437256, + "ce_orig": 0.7939573526382446, + "epoch": 0.40779351499029404, + "kl_loss": 0.19948364794254303, + "loss_ib": 0.007457552943378687, + "step": 1418 + }, + { + "ce_ib": 6.427265644073486, + "ce_orig": 0.6861687898635864, + "epoch": 0.40779351499029404, + "kl_loss": 0.31438255310058594, + "loss_ib": 0.009571091271936893, + "step": 1418 + }, + { + "ce_ib": 9.064435005187988, + "ce_orig": 1.7697478532791138, + "epoch": 0.40779351499029404, + "kl_loss": 0.23587557673454285, + "loss_ib": 0.011423190124332905, + "step": 1418 + }, + { + "ce_ib": 5.1803460121154785, + "ce_orig": 0.9039329886436462, + "epoch": 0.4080810985692717, + "kl_loss": 0.2533673644065857, + "loss_ib": 0.007714019622653723, + "step": 1419 + }, + { + "ce_ib": 6.159054756164551, + "ce_orig": 1.274484395980835, + "epoch": 0.4080810985692717, + "kl_loss": 0.1596750020980835, + "loss_ib": 0.007755804341286421, + "step": 1419 + }, + { + "ce_ib": 6.3064799308776855, + "ce_orig": 0.614425003528595, + "epoch": 0.4080810985692717, + "kl_loss": 0.213271364569664, + "loss_ib": 0.008439193479716778, + "step": 1419 + }, + { + "ce_ib": 8.887816429138184, + "ce_orig": 0.7756012082099915, + "epoch": 0.4080810985692717, + "kl_loss": 0.7952628135681152, + "loss_ib": 0.01684044487774372, + "step": 1419 + }, + { + "epoch": 0.40836868214824934, + "grad_norm": 0.1200951635837555, + "learning_rate": 9.711304610594104e-06, + "loss": 0.8206, + "step": 1420 + }, + { + "ce_ib": 7.012088775634766, + "ce_orig": 1.0536072254180908, + "epoch": 0.40836868214824934, + "kl_loss": 0.16178733110427856, + "loss_ib": 0.008629961870610714, + "step": 1420 + }, + { + "ce_ib": 6.098531723022461, + "ce_orig": 1.1920461654663086, + "epoch": 0.40836868214824934, + "kl_loss": 0.2587133049964905, + "loss_ib": 0.00868566520512104, + "step": 1420 + }, + { + "ce_ib": 7.746165752410889, + "ce_orig": 0.9715460538864136, + "epoch": 0.40836868214824934, + "kl_loss": 0.39986562728881836, + "loss_ib": 0.01174482237547636, + "step": 1420 + }, + { + "ce_ib": 3.6467649936676025, + "ce_orig": 0.47526460886001587, + "epoch": 0.40836868214824934, + "kl_loss": 0.15108205378055573, + "loss_ib": 0.005157585721462965, + "step": 1420 + }, + { + "ce_ib": 8.291778564453125, + "ce_orig": 1.4398640394210815, + "epoch": 0.40865626572722696, + "kl_loss": 0.3259751796722412, + "loss_ib": 0.011551530100405216, + "step": 1421 + }, + { + "ce_ib": 8.222766876220703, + "ce_orig": 0.7970226407051086, + "epoch": 0.40865626572722696, + "kl_loss": 0.3190440237522125, + "loss_ib": 0.011413206346333027, + "step": 1421 + }, + { + "ce_ib": 7.1990065574646, + "ce_orig": 1.1771275997161865, + "epoch": 0.40865626572722696, + "kl_loss": 0.1906033456325531, + "loss_ib": 0.00910503976047039, + "step": 1421 + }, + { + "ce_ib": 6.150810241699219, + "ce_orig": 0.8190860152244568, + "epoch": 0.40865626572722696, + "kl_loss": 0.20879146456718445, + "loss_ib": 0.008238724432885647, + "step": 1421 + }, + { + "ce_ib": 4.397854328155518, + "ce_orig": 0.5997467637062073, + "epoch": 0.40894384930620464, + "kl_loss": 0.17261874675750732, + "loss_ib": 0.006124041974544525, + "step": 1422 + }, + { + "ce_ib": 8.484132766723633, + "ce_orig": 1.3368240594863892, + "epoch": 0.40894384930620464, + "kl_loss": 0.3058059811592102, + "loss_ib": 0.011542192660272121, + "step": 1422 + }, + { + "ce_ib": 6.705843448638916, + "ce_orig": 1.415745735168457, + "epoch": 0.40894384930620464, + "kl_loss": 0.3636002540588379, + "loss_ib": 0.010341846384108067, + "step": 1422 + }, + { + "ce_ib": 4.8936944007873535, + "ce_orig": 0.6219208240509033, + "epoch": 0.40894384930620464, + "kl_loss": 0.2456442266702652, + "loss_ib": 0.007350136525928974, + "step": 1422 + }, + { + "ce_ib": 7.285373210906982, + "ce_orig": 0.9635927081108093, + "epoch": 0.40923143288518227, + "kl_loss": 0.3069985806941986, + "loss_ib": 0.010355358943343163, + "step": 1423 + }, + { + "ce_ib": 8.484498977661133, + "ce_orig": 1.178830623626709, + "epoch": 0.40923143288518227, + "kl_loss": 0.18762880563735962, + "loss_ib": 0.010360786691308022, + "step": 1423 + }, + { + "ce_ib": 6.52214241027832, + "ce_orig": 0.715835452079773, + "epoch": 0.40923143288518227, + "kl_loss": 0.35115742683410645, + "loss_ib": 0.010033717378973961, + "step": 1423 + }, + { + "ce_ib": 6.971340179443359, + "ce_orig": 1.2938631772994995, + "epoch": 0.40923143288518227, + "kl_loss": 0.16480334103107452, + "loss_ib": 0.00861937366425991, + "step": 1423 + }, + { + "ce_ib": 4.016520977020264, + "ce_orig": 0.6445021629333496, + "epoch": 0.4095190164641599, + "kl_loss": 0.21900208294391632, + "loss_ib": 0.006206541322171688, + "step": 1424 + }, + { + "ce_ib": 6.553114891052246, + "ce_orig": 1.2314685583114624, + "epoch": 0.4095190164641599, + "kl_loss": 0.2770439684391022, + "loss_ib": 0.009323555044829845, + "step": 1424 + }, + { + "ce_ib": 9.892790794372559, + "ce_orig": 1.7373547554016113, + "epoch": 0.4095190164641599, + "kl_loss": 0.23448815941810608, + "loss_ib": 0.012237672694027424, + "step": 1424 + }, + { + "ce_ib": 6.126859664916992, + "ce_orig": 0.7430605888366699, + "epoch": 0.4095190164641599, + "kl_loss": 0.20536071062088013, + "loss_ib": 0.00818046648055315, + "step": 1424 + }, + { + "epoch": 0.4098066000431375, + "grad_norm": 0.11239798367023468, + "learning_rate": 9.70869998541189e-06, + "loss": 0.905, + "step": 1425 + }, + { + "ce_ib": 6.2881574630737305, + "ce_orig": 0.5882934331893921, + "epoch": 0.4098066000431375, + "kl_loss": 0.40092191100120544, + "loss_ib": 0.010297376662492752, + "step": 1425 + }, + { + "ce_ib": 2.095649242401123, + "ce_orig": 0.3593219518661499, + "epoch": 0.4098066000431375, + "kl_loss": 0.20840157568454742, + "loss_ib": 0.004179664887487888, + "step": 1425 + }, + { + "ce_ib": 9.387741088867188, + "ce_orig": 1.2381709814071655, + "epoch": 0.4098066000431375, + "kl_loss": 0.19193483889102936, + "loss_ib": 0.011307088658213615, + "step": 1425 + }, + { + "ce_ib": 4.3381524085998535, + "ce_orig": 0.3203746974468231, + "epoch": 0.4098066000431375, + "kl_loss": 0.636804461479187, + "loss_ib": 0.010706196539103985, + "step": 1425 + }, + { + "ce_ib": 7.042891502380371, + "ce_orig": 0.8674129247665405, + "epoch": 0.4100941836221152, + "kl_loss": 0.33732450008392334, + "loss_ib": 0.01041613519191742, + "step": 1426 + }, + { + "ce_ib": 6.801926136016846, + "ce_orig": 0.8991611003875732, + "epoch": 0.4100941836221152, + "kl_loss": 0.15263846516609192, + "loss_ib": 0.008328311145305634, + "step": 1426 + }, + { + "ce_ib": 6.085090637207031, + "ce_orig": 0.9544994831085205, + "epoch": 0.4100941836221152, + "kl_loss": 0.23805543780326843, + "loss_ib": 0.008465644903481007, + "step": 1426 + }, + { + "ce_ib": 6.1746296882629395, + "ce_orig": 0.7697358131408691, + "epoch": 0.4100941836221152, + "kl_loss": 0.2907295525074005, + "loss_ib": 0.009081925265491009, + "step": 1426 + }, + { + "ce_ib": 5.99095344543457, + "ce_orig": 0.7473070025444031, + "epoch": 0.4103817672010928, + "kl_loss": 0.2807786464691162, + "loss_ib": 0.00879873987287283, + "step": 1427 + }, + { + "ce_ib": 5.068008899688721, + "ce_orig": 0.7087705731391907, + "epoch": 0.4103817672010928, + "kl_loss": 0.21263612806797028, + "loss_ib": 0.0071943700313568115, + "step": 1427 + }, + { + "ce_ib": 5.395236015319824, + "ce_orig": 0.853853702545166, + "epoch": 0.4103817672010928, + "kl_loss": 0.2351619303226471, + "loss_ib": 0.007746854797005653, + "step": 1427 + }, + { + "ce_ib": 8.231751441955566, + "ce_orig": 1.556867003440857, + "epoch": 0.4103817672010928, + "kl_loss": 0.24943526089191437, + "loss_ib": 0.01072610355913639, + "step": 1427 + }, + { + "ce_ib": 9.276421546936035, + "ce_orig": 1.3323252201080322, + "epoch": 0.41066935078007044, + "kl_loss": 0.18265675008296967, + "loss_ib": 0.011102988384664059, + "step": 1428 + }, + { + "ce_ib": 6.04024076461792, + "ce_orig": 0.7419769167900085, + "epoch": 0.41066935078007044, + "kl_loss": 0.4130827784538269, + "loss_ib": 0.010171068832278252, + "step": 1428 + }, + { + "ce_ib": 7.3178300857543945, + "ce_orig": 1.3224425315856934, + "epoch": 0.41066935078007044, + "kl_loss": 0.23849479854106903, + "loss_ib": 0.009702778421342373, + "step": 1428 + }, + { + "ce_ib": 9.07182502746582, + "ce_orig": 1.5430114269256592, + "epoch": 0.41066935078007044, + "kl_loss": 0.26186248660087585, + "loss_ib": 0.01169044990092516, + "step": 1428 + }, + { + "ce_ib": 5.606130123138428, + "ce_orig": 0.9684110879898071, + "epoch": 0.4109569343590481, + "kl_loss": 0.28772905468940735, + "loss_ib": 0.008483420126140118, + "step": 1429 + }, + { + "ce_ib": 7.555757999420166, + "ce_orig": 0.9240909814834595, + "epoch": 0.4109569343590481, + "kl_loss": 0.20850898325443268, + "loss_ib": 0.009640848264098167, + "step": 1429 + }, + { + "ce_ib": 4.836035251617432, + "ce_orig": 0.8449050188064575, + "epoch": 0.4109569343590481, + "kl_loss": 0.23588550090789795, + "loss_ib": 0.007194890175014734, + "step": 1429 + }, + { + "ce_ib": 4.731801986694336, + "ce_orig": 0.5297708511352539, + "epoch": 0.4109569343590481, + "kl_loss": 0.2536720931529999, + "loss_ib": 0.0072685228660702705, + "step": 1429 + }, + { + "epoch": 0.41124451793802574, + "grad_norm": 0.10925720632076263, + "learning_rate": 9.706084015878496e-06, + "loss": 0.8944, + "step": 1430 + }, + { + "ce_ib": 5.5966572761535645, + "ce_orig": 0.7206467390060425, + "epoch": 0.41124451793802574, + "kl_loss": 0.27587223052978516, + "loss_ib": 0.008355379104614258, + "step": 1430 + }, + { + "ce_ib": 6.693604946136475, + "ce_orig": 0.7028912305831909, + "epoch": 0.41124451793802574, + "kl_loss": 0.3054695129394531, + "loss_ib": 0.009748300537467003, + "step": 1430 + }, + { + "ce_ib": 8.012578964233398, + "ce_orig": 0.9017972350120544, + "epoch": 0.41124451793802574, + "kl_loss": 0.35887661576271057, + "loss_ib": 0.011601345613598824, + "step": 1430 + }, + { + "ce_ib": 7.473693370819092, + "ce_orig": 1.0751852989196777, + "epoch": 0.41124451793802574, + "kl_loss": 0.25154802203178406, + "loss_ib": 0.009989173151552677, + "step": 1430 + }, + { + "ce_ib": 4.834800720214844, + "ce_orig": 0.4753416180610657, + "epoch": 0.41153210151700337, + "kl_loss": 0.29349297285079956, + "loss_ib": 0.007769729942083359, + "step": 1431 + }, + { + "ce_ib": 5.113678932189941, + "ce_orig": 0.7640902996063232, + "epoch": 0.41153210151700337, + "kl_loss": 0.30045798420906067, + "loss_ib": 0.00811825878918171, + "step": 1431 + }, + { + "ce_ib": 7.861815452575684, + "ce_orig": 1.0139867067337036, + "epoch": 0.41153210151700337, + "kl_loss": 0.1912413090467453, + "loss_ib": 0.009774228557944298, + "step": 1431 + }, + { + "ce_ib": 6.809225082397461, + "ce_orig": 0.9317693710327148, + "epoch": 0.41153210151700337, + "kl_loss": 0.3157738447189331, + "loss_ib": 0.009966962970793247, + "step": 1431 + }, + { + "ce_ib": 3.798652172088623, + "ce_orig": 0.19985660910606384, + "epoch": 0.411819685095981, + "kl_loss": 0.38498273491859436, + "loss_ib": 0.007648479659110308, + "step": 1432 + }, + { + "ce_ib": 5.891729354858398, + "ce_orig": 1.026416301727295, + "epoch": 0.411819685095981, + "kl_loss": 0.18191149830818176, + "loss_ib": 0.007710844278335571, + "step": 1432 + }, + { + "ce_ib": 5.983758926391602, + "ce_orig": 0.9381806254386902, + "epoch": 0.411819685095981, + "kl_loss": 0.18528085947036743, + "loss_ib": 0.007836567237973213, + "step": 1432 + }, + { + "ce_ib": 5.368931770324707, + "ce_orig": 0.6532841324806213, + "epoch": 0.411819685095981, + "kl_loss": 0.23761454224586487, + "loss_ib": 0.007745077367872, + "step": 1432 + }, + { + "ce_ib": 4.2342753410339355, + "ce_orig": 0.6612984538078308, + "epoch": 0.41210726867495867, + "kl_loss": 0.47673696279525757, + "loss_ib": 0.00900164432823658, + "step": 1433 + }, + { + "ce_ib": 7.980175018310547, + "ce_orig": 1.3077398538589478, + "epoch": 0.41210726867495867, + "kl_loss": 0.2359972447156906, + "loss_ib": 0.010340146720409393, + "step": 1433 + }, + { + "ce_ib": 5.692867279052734, + "ce_orig": 0.4656047224998474, + "epoch": 0.41210726867495867, + "kl_loss": 0.32997220754623413, + "loss_ib": 0.008992589078843594, + "step": 1433 + }, + { + "ce_ib": 7.435876369476318, + "ce_orig": 1.2225924730300903, + "epoch": 0.41210726867495867, + "kl_loss": 0.3203752338886261, + "loss_ib": 0.010639629326760769, + "step": 1433 + }, + { + "ce_ib": 5.403990268707275, + "ce_orig": 0.548412561416626, + "epoch": 0.4123948522539363, + "kl_loss": 0.23780032992362976, + "loss_ib": 0.007781993132084608, + "step": 1434 + }, + { + "ce_ib": 5.518211841583252, + "ce_orig": 0.7836940288543701, + "epoch": 0.4123948522539363, + "kl_loss": 0.2192806899547577, + "loss_ib": 0.007711017969995737, + "step": 1434 + }, + { + "ce_ib": 6.065393447875977, + "ce_orig": 0.8244072198867798, + "epoch": 0.4123948522539363, + "kl_loss": 0.16638624668121338, + "loss_ib": 0.007729256059974432, + "step": 1434 + }, + { + "ce_ib": 4.7424116134643555, + "ce_orig": 0.6653726100921631, + "epoch": 0.4123948522539363, + "kl_loss": 0.2525525391101837, + "loss_ib": 0.007267937064170837, + "step": 1434 + }, + { + "epoch": 0.4126824358329139, + "grad_norm": 0.09591538459062576, + "learning_rate": 9.703456708296405e-06, + "loss": 0.8678, + "step": 1435 + }, + { + "ce_ib": 5.17003870010376, + "ce_orig": 0.6860577464103699, + "epoch": 0.4126824358329139, + "kl_loss": 0.239205002784729, + "loss_ib": 0.007562088780105114, + "step": 1435 + }, + { + "ce_ib": 6.399457931518555, + "ce_orig": 1.0888773202896118, + "epoch": 0.4126824358329139, + "kl_loss": 0.1467989981174469, + "loss_ib": 0.007867448031902313, + "step": 1435 + }, + { + "ce_ib": 6.637815952301025, + "ce_orig": 0.8630712628364563, + "epoch": 0.4126824358329139, + "kl_loss": 0.19071052968502045, + "loss_ib": 0.008544920943677425, + "step": 1435 + }, + { + "ce_ib": 4.753064155578613, + "ce_orig": 0.8432294726371765, + "epoch": 0.4126824358329139, + "kl_loss": 0.2774086892604828, + "loss_ib": 0.007527151145040989, + "step": 1435 + }, + { + "ce_ib": 10.428075790405273, + "ce_orig": 1.4622315168380737, + "epoch": 0.4129700194118916, + "kl_loss": 0.1714332103729248, + "loss_ib": 0.012142407707870007, + "step": 1436 + }, + { + "ce_ib": 4.453615188598633, + "ce_orig": 0.5258841514587402, + "epoch": 0.4129700194118916, + "kl_loss": 0.19536477327346802, + "loss_ib": 0.006407263223081827, + "step": 1436 + }, + { + "ce_ib": 4.699621200561523, + "ce_orig": 0.3211705982685089, + "epoch": 0.4129700194118916, + "kl_loss": 0.3277928829193115, + "loss_ib": 0.00797754991799593, + "step": 1436 + }, + { + "ce_ib": 7.0514140129089355, + "ce_orig": 1.1391209363937378, + "epoch": 0.4129700194118916, + "kl_loss": 0.24634301662445068, + "loss_ib": 0.009514844045042992, + "step": 1436 + }, + { + "ce_ib": 3.10416316986084, + "ce_orig": 0.34879812598228455, + "epoch": 0.4132576029908692, + "kl_loss": 0.5351189970970154, + "loss_ib": 0.008455352857708931, + "step": 1437 + }, + { + "ce_ib": 6.911233425140381, + "ce_orig": 0.7747100591659546, + "epoch": 0.4132576029908692, + "kl_loss": 0.3284524977207184, + "loss_ib": 0.010195758193731308, + "step": 1437 + }, + { + "ce_ib": 8.081716537475586, + "ce_orig": 1.54729163646698, + "epoch": 0.4132576029908692, + "kl_loss": 0.22005927562713623, + "loss_ib": 0.010282308794558048, + "step": 1437 + }, + { + "ce_ib": 4.381701946258545, + "ce_orig": 0.7111383080482483, + "epoch": 0.4132576029908692, + "kl_loss": 0.23173516988754272, + "loss_ib": 0.006699053570628166, + "step": 1437 + }, + { + "ce_ib": 3.102794885635376, + "ce_orig": 0.6434416770935059, + "epoch": 0.41354518656984685, + "kl_loss": 0.1948084980249405, + "loss_ib": 0.0050508794374763966, + "step": 1438 + }, + { + "ce_ib": 2.404301404953003, + "ce_orig": 0.49301013350486755, + "epoch": 0.41354518656984685, + "kl_loss": 0.1756540983915329, + "loss_ib": 0.004160842392593622, + "step": 1438 + }, + { + "ce_ib": 6.963180065155029, + "ce_orig": 1.3057996034622192, + "epoch": 0.41354518656984685, + "kl_loss": 0.2243916094303131, + "loss_ib": 0.009207095950841904, + "step": 1438 + }, + { + "ce_ib": 7.458621978759766, + "ce_orig": 1.0133202075958252, + "epoch": 0.41354518656984685, + "kl_loss": 0.2954729497432709, + "loss_ib": 0.01041335053741932, + "step": 1438 + }, + { + "ce_ib": 6.399861812591553, + "ce_orig": 0.6673619747161865, + "epoch": 0.4138327701488245, + "kl_loss": 0.2691270709037781, + "loss_ib": 0.009091132320463657, + "step": 1439 + }, + { + "ce_ib": 3.874779224395752, + "ce_orig": 0.32614997029304504, + "epoch": 0.4138327701488245, + "kl_loss": 0.21969608962535858, + "loss_ib": 0.006071740295737982, + "step": 1439 + }, + { + "ce_ib": 6.7588372230529785, + "ce_orig": 0.9650707244873047, + "epoch": 0.4138327701488245, + "kl_loss": 0.257515549659729, + "loss_ib": 0.009333992376923561, + "step": 1439 + }, + { + "ce_ib": 5.581239700317383, + "ce_orig": 0.7861056923866272, + "epoch": 0.4138327701488245, + "kl_loss": 0.17133313417434692, + "loss_ib": 0.007294571027159691, + "step": 1439 + }, + { + "epoch": 0.41412035372780215, + "grad_norm": 0.08960733562707901, + "learning_rate": 9.700818068995407e-06, + "loss": 0.8817, + "step": 1440 + }, + { + "ce_ib": 7.127894401550293, + "ce_orig": 1.099780559539795, + "epoch": 0.41412035372780215, + "kl_loss": 0.21275877952575684, + "loss_ib": 0.009255481883883476, + "step": 1440 + }, + { + "ce_ib": 5.4239821434021, + "ce_orig": 1.1731576919555664, + "epoch": 0.41412035372780215, + "kl_loss": 0.21886183321475983, + "loss_ib": 0.007612599991261959, + "step": 1440 + }, + { + "ce_ib": 5.250892639160156, + "ce_orig": 0.7625457644462585, + "epoch": 0.41412035372780215, + "kl_loss": 0.19820094108581543, + "loss_ib": 0.007232902105897665, + "step": 1440 + }, + { + "ce_ib": 6.036355972290039, + "ce_orig": 0.5982348322868347, + "epoch": 0.41412035372780215, + "kl_loss": 0.20665858685970306, + "loss_ib": 0.008102942258119583, + "step": 1440 + }, + { + "ce_ib": 3.7877354621887207, + "ce_orig": 0.6050831079483032, + "epoch": 0.4144079373067798, + "kl_loss": 0.22110241651535034, + "loss_ib": 0.005998759064823389, + "step": 1441 + }, + { + "ce_ib": 7.939857006072998, + "ce_orig": 1.3141567707061768, + "epoch": 0.4144079373067798, + "kl_loss": 0.47960013151168823, + "loss_ib": 0.012735857628285885, + "step": 1441 + }, + { + "ce_ib": 4.724193096160889, + "ce_orig": 0.5596928000450134, + "epoch": 0.4144079373067798, + "kl_loss": 0.25891292095184326, + "loss_ib": 0.007313322275876999, + "step": 1441 + }, + { + "ce_ib": 4.276533126831055, + "ce_orig": 0.42426598072052, + "epoch": 0.4144079373067798, + "kl_loss": 0.26543980836868286, + "loss_ib": 0.006930931005626917, + "step": 1441 + }, + { + "ce_ib": 3.9375219345092773, + "ce_orig": 0.5840734243392944, + "epoch": 0.4146955208857574, + "kl_loss": 0.19807593524456024, + "loss_ib": 0.005918281152844429, + "step": 1442 + }, + { + "ce_ib": 5.756697654724121, + "ce_orig": 0.6194900870323181, + "epoch": 0.4146955208857574, + "kl_loss": 0.2578275203704834, + "loss_ib": 0.008334972895681858, + "step": 1442 + }, + { + "ce_ib": 5.568426132202148, + "ce_orig": 0.6911214590072632, + "epoch": 0.4146955208857574, + "kl_loss": 0.29421865940093994, + "loss_ib": 0.00851061288267374, + "step": 1442 + }, + { + "ce_ib": 8.481583595275879, + "ce_orig": 1.2472021579742432, + "epoch": 0.4146955208857574, + "kl_loss": 0.2462369054555893, + "loss_ib": 0.010943952947854996, + "step": 1442 + }, + { + "ce_ib": 5.0001420974731445, + "ce_orig": 0.6307148337364197, + "epoch": 0.4149831044647351, + "kl_loss": 0.22077739238739014, + "loss_ib": 0.007207916118204594, + "step": 1443 + }, + { + "ce_ib": 4.8295063972473145, + "ce_orig": 0.3529702425003052, + "epoch": 0.4149831044647351, + "kl_loss": 0.2350378781557083, + "loss_ib": 0.007179885171353817, + "step": 1443 + }, + { + "ce_ib": 5.45084810256958, + "ce_orig": 0.8416758179664612, + "epoch": 0.4149831044647351, + "kl_loss": 0.503132164478302, + "loss_ib": 0.01048217061907053, + "step": 1443 + }, + { + "ce_ib": 6.197181701660156, + "ce_orig": 0.5805358290672302, + "epoch": 0.4149831044647351, + "kl_loss": 0.1916370987892151, + "loss_ib": 0.008113552816212177, + "step": 1443 + }, + { + "ce_ib": 5.213675498962402, + "ce_orig": 0.5786038041114807, + "epoch": 0.4152706880437127, + "kl_loss": 0.18455766141414642, + "loss_ib": 0.007059251889586449, + "step": 1444 + }, + { + "ce_ib": 3.5388176441192627, + "ce_orig": 0.5333930850028992, + "epoch": 0.4152706880437127, + "kl_loss": 0.14553654193878174, + "loss_ib": 0.004994182847440243, + "step": 1444 + }, + { + "ce_ib": 3.691051959991455, + "ce_orig": 0.5889726281166077, + "epoch": 0.4152706880437127, + "kl_loss": 0.17222023010253906, + "loss_ib": 0.005413254257291555, + "step": 1444 + }, + { + "ce_ib": 7.286379814147949, + "ce_orig": 0.6404244899749756, + "epoch": 0.4152706880437127, + "kl_loss": 0.3586054742336273, + "loss_ib": 0.01087243389338255, + "step": 1444 + }, + { + "epoch": 0.4155582716226903, + "grad_norm": 0.11402870714664459, + "learning_rate": 9.6981681043326e-06, + "loss": 0.8482, + "step": 1445 + }, + { + "ce_ib": 4.635682106018066, + "ce_orig": 0.7357145547866821, + "epoch": 0.4155582716226903, + "kl_loss": 0.18429890275001526, + "loss_ib": 0.00647867051884532, + "step": 1445 + }, + { + "ce_ib": 8.371834754943848, + "ce_orig": 1.3967629671096802, + "epoch": 0.4155582716226903, + "kl_loss": 0.25143635272979736, + "loss_ib": 0.010886197909712791, + "step": 1445 + }, + { + "ce_ib": 4.193347930908203, + "ce_orig": 0.815974771976471, + "epoch": 0.4155582716226903, + "kl_loss": 0.17522263526916504, + "loss_ib": 0.005945574026554823, + "step": 1445 + }, + { + "ce_ib": 4.08240270614624, + "ce_orig": 0.6947717666625977, + "epoch": 0.4155582716226903, + "kl_loss": 0.13767096400260925, + "loss_ib": 0.005459112580865622, + "step": 1445 + }, + { + "ce_ib": 4.673977375030518, + "ce_orig": 0.7940452098846436, + "epoch": 0.415845855201668, + "kl_loss": 0.19539020955562592, + "loss_ib": 0.006627879571169615, + "step": 1446 + }, + { + "ce_ib": 6.093282699584961, + "ce_orig": 0.8800034523010254, + "epoch": 0.415845855201668, + "kl_loss": 0.26846742630004883, + "loss_ib": 0.00877795647829771, + "step": 1446 + }, + { + "ce_ib": 4.820080757141113, + "ce_orig": 0.34295347332954407, + "epoch": 0.415845855201668, + "kl_loss": 0.3963184952735901, + "loss_ib": 0.008783265016973019, + "step": 1446 + }, + { + "ce_ib": 8.66891098022461, + "ce_orig": 1.160307765007019, + "epoch": 0.415845855201668, + "kl_loss": 0.18693780899047852, + "loss_ib": 0.010538289323449135, + "step": 1446 + }, + { + "ce_ib": 8.202392578125, + "ce_orig": 0.8199527263641357, + "epoch": 0.4161334387806456, + "kl_loss": 0.3237569332122803, + "loss_ib": 0.011439962312579155, + "step": 1447 + }, + { + "ce_ib": 5.400129318237305, + "ce_orig": 0.445811003446579, + "epoch": 0.4161334387806456, + "kl_loss": 0.2745596170425415, + "loss_ib": 0.008145725354552269, + "step": 1447 + }, + { + "ce_ib": 3.9494316577911377, + "ce_orig": 0.4661496877670288, + "epoch": 0.4161334387806456, + "kl_loss": 0.3082231879234314, + "loss_ib": 0.007031663320958614, + "step": 1447 + }, + { + "ce_ib": 5.456274032592773, + "ce_orig": 0.9136053323745728, + "epoch": 0.4161334387806456, + "kl_loss": 0.2540931701660156, + "loss_ib": 0.00799720548093319, + "step": 1447 + }, + { + "ce_ib": 11.496981620788574, + "ce_orig": 1.669168472290039, + "epoch": 0.41642102235962325, + "kl_loss": 0.2675935626029968, + "loss_ib": 0.014172916300594807, + "step": 1448 + }, + { + "ce_ib": 6.146816730499268, + "ce_orig": 0.32925063371658325, + "epoch": 0.41642102235962325, + "kl_loss": 0.3541129231452942, + "loss_ib": 0.009687945246696472, + "step": 1448 + }, + { + "ce_ib": 6.110250473022461, + "ce_orig": 0.9609233736991882, + "epoch": 0.41642102235962325, + "kl_loss": 0.21723921597003937, + "loss_ib": 0.008282641880214214, + "step": 1448 + }, + { + "ce_ib": 2.770850419998169, + "ce_orig": 0.3007255494594574, + "epoch": 0.41642102235962325, + "kl_loss": 0.7660866379737854, + "loss_ib": 0.010431716218590736, + "step": 1448 + }, + { + "ce_ib": 3.9824109077453613, + "ce_orig": 0.590600311756134, + "epoch": 0.41670860593860093, + "kl_loss": 0.3264272212982178, + "loss_ib": 0.00724668288603425, + "step": 1449 + }, + { + "ce_ib": 4.859111785888672, + "ce_orig": 0.7057644724845886, + "epoch": 0.41670860593860093, + "kl_loss": 0.2452726811170578, + "loss_ib": 0.0073118386790156364, + "step": 1449 + }, + { + "ce_ib": 7.2970404624938965, + "ce_orig": 0.9776987433433533, + "epoch": 0.41670860593860093, + "kl_loss": 0.266795814037323, + "loss_ib": 0.009964998811483383, + "step": 1449 + }, + { + "ce_ib": 5.001408576965332, + "ce_orig": 0.7703402638435364, + "epoch": 0.41670860593860093, + "kl_loss": 0.2569698095321655, + "loss_ib": 0.007571106310933828, + "step": 1449 + }, + { + "epoch": 0.41699618951757855, + "grad_norm": 0.10733474045991898, + "learning_rate": 9.69550682069236e-06, + "loss": 0.8422, + "step": 1450 + }, + { + "ce_ib": 6.8307037353515625, + "ce_orig": 0.8752904534339905, + "epoch": 0.41699618951757855, + "kl_loss": 0.37020280957221985, + "loss_ib": 0.010532732121646404, + "step": 1450 + }, + { + "ce_ib": 6.152828216552734, + "ce_orig": 0.8328402638435364, + "epoch": 0.41699618951757855, + "kl_loss": 0.3578481078147888, + "loss_ib": 0.009731309488415718, + "step": 1450 + }, + { + "ce_ib": 4.250329971313477, + "ce_orig": 0.46717268228530884, + "epoch": 0.41699618951757855, + "kl_loss": 0.32806381583213806, + "loss_ib": 0.0075309681706130505, + "step": 1450 + }, + { + "ce_ib": 5.095127105712891, + "ce_orig": 0.3596700131893158, + "epoch": 0.41699618951757855, + "kl_loss": 0.24298208951950073, + "loss_ib": 0.007524948101490736, + "step": 1450 + }, + { + "ce_ib": 5.839034080505371, + "ce_orig": 0.8622336387634277, + "epoch": 0.4172837730965562, + "kl_loss": 0.22366026043891907, + "loss_ib": 0.008075636811554432, + "step": 1451 + }, + { + "ce_ib": 6.032916069030762, + "ce_orig": 1.2333753108978271, + "epoch": 0.4172837730965562, + "kl_loss": 0.27827978134155273, + "loss_ib": 0.008815714158117771, + "step": 1451 + }, + { + "ce_ib": 6.857519626617432, + "ce_orig": 0.8044654130935669, + "epoch": 0.4172837730965562, + "kl_loss": 0.29137736558914185, + "loss_ib": 0.00977129302918911, + "step": 1451 + }, + { + "ce_ib": 5.73335075378418, + "ce_orig": 0.6533335447311401, + "epoch": 0.4172837730965562, + "kl_loss": 0.2547338008880615, + "loss_ib": 0.008280688896775246, + "step": 1451 + }, + { + "ce_ib": 8.236536026000977, + "ce_orig": 1.2800368070602417, + "epoch": 0.4175713566755338, + "kl_loss": 0.205164834856987, + "loss_ib": 0.010288184508681297, + "step": 1452 + }, + { + "ce_ib": 7.160531044006348, + "ce_orig": 0.6678914427757263, + "epoch": 0.4175713566755338, + "kl_loss": 0.357463538646698, + "loss_ib": 0.010735166259109974, + "step": 1452 + }, + { + "ce_ib": 5.64363431930542, + "ce_orig": 0.8172556757926941, + "epoch": 0.4175713566755338, + "kl_loss": 0.2605098485946655, + "loss_ib": 0.008248732425272465, + "step": 1452 + }, + { + "ce_ib": 6.310688495635986, + "ce_orig": 0.6108517646789551, + "epoch": 0.4175713566755338, + "kl_loss": 0.2450312376022339, + "loss_ib": 0.008761000819504261, + "step": 1452 + }, + { + "ce_ib": 4.53042459487915, + "ce_orig": 0.8491594791412354, + "epoch": 0.4178589402545115, + "kl_loss": 0.2809299826622009, + "loss_ib": 0.007339724339544773, + "step": 1453 + }, + { + "ce_ib": 6.299931526184082, + "ce_orig": 0.6813634634017944, + "epoch": 0.4178589402545115, + "kl_loss": 0.21116459369659424, + "loss_ib": 0.008411576971411705, + "step": 1453 + }, + { + "ce_ib": 7.031019687652588, + "ce_orig": 0.8715657591819763, + "epoch": 0.4178589402545115, + "kl_loss": 0.2228662371635437, + "loss_ib": 0.009259682148694992, + "step": 1453 + }, + { + "ce_ib": 6.7674736976623535, + "ce_orig": 0.8163739442825317, + "epoch": 0.4178589402545115, + "kl_loss": 0.31730207800865173, + "loss_ib": 0.0099404938519001, + "step": 1453 + }, + { + "ce_ib": 6.6158528327941895, + "ce_orig": 0.8040879964828491, + "epoch": 0.4181465238334891, + "kl_loss": 0.3747669458389282, + "loss_ib": 0.010363521054387093, + "step": 1454 + }, + { + "ce_ib": 4.000415802001953, + "ce_orig": 0.6374342441558838, + "epoch": 0.4181465238334891, + "kl_loss": 0.19590912759304047, + "loss_ib": 0.005959507077932358, + "step": 1454 + }, + { + "ce_ib": 7.182856559753418, + "ce_orig": 1.3756706714630127, + "epoch": 0.4181465238334891, + "kl_loss": 0.27559158205986023, + "loss_ib": 0.009938772767782211, + "step": 1454 + }, + { + "ce_ib": 7.4786176681518555, + "ce_orig": 1.2998679876327515, + "epoch": 0.4181465238334891, + "kl_loss": 0.2330915331840515, + "loss_ib": 0.009809533134102821, + "step": 1454 + }, + { + "epoch": 0.4184341074124667, + "grad_norm": 0.12323027849197388, + "learning_rate": 9.692834224486338e-06, + "loss": 0.8675, + "step": 1455 + }, + { + "ce_ib": 2.875929832458496, + "ce_orig": 0.5546712279319763, + "epoch": 0.4184341074124667, + "kl_loss": 0.1498483419418335, + "loss_ib": 0.004374413285404444, + "step": 1455 + }, + { + "ce_ib": 5.155608654022217, + "ce_orig": 0.6677677035331726, + "epoch": 0.4184341074124667, + "kl_loss": 0.2732641100883484, + "loss_ib": 0.00788824912160635, + "step": 1455 + }, + { + "ce_ib": 5.532578945159912, + "ce_orig": 0.8035033345222473, + "epoch": 0.4184341074124667, + "kl_loss": 0.2326735258102417, + "loss_ib": 0.007859313860535622, + "step": 1455 + }, + { + "ce_ib": 5.624016761779785, + "ce_orig": 0.5841573476791382, + "epoch": 0.4184341074124667, + "kl_loss": 0.3162783980369568, + "loss_ib": 0.008786801248788834, + "step": 1455 + }, + { + "ce_ib": 5.438791751861572, + "ce_orig": 0.7679370045661926, + "epoch": 0.4187216909914444, + "kl_loss": 0.25558215379714966, + "loss_ib": 0.007994613610208035, + "step": 1456 + }, + { + "ce_ib": 4.692433834075928, + "ce_orig": 0.50995272397995, + "epoch": 0.4187216909914444, + "kl_loss": 0.19274017214775085, + "loss_ib": 0.006619835272431374, + "step": 1456 + }, + { + "ce_ib": 8.270526885986328, + "ce_orig": 1.1970558166503906, + "epoch": 0.4187216909914444, + "kl_loss": 0.3240256905555725, + "loss_ib": 0.011510784737765789, + "step": 1456 + }, + { + "ce_ib": 4.748291969299316, + "ce_orig": 0.6559625267982483, + "epoch": 0.4187216909914444, + "kl_loss": 0.24350781738758087, + "loss_ib": 0.007183369714766741, + "step": 1456 + }, + { + "ce_ib": 3.4031496047973633, + "ce_orig": 0.439064085483551, + "epoch": 0.41900927457042203, + "kl_loss": 0.3383101224899292, + "loss_ib": 0.006786250974982977, + "step": 1457 + }, + { + "ce_ib": 3.4370038509368896, + "ce_orig": 0.5014415383338928, + "epoch": 0.41900927457042203, + "kl_loss": 0.24829784035682678, + "loss_ib": 0.005919982213526964, + "step": 1457 + }, + { + "ce_ib": 7.7961039543151855, + "ce_orig": 0.9597184658050537, + "epoch": 0.41900927457042203, + "kl_loss": 0.20575028657913208, + "loss_ib": 0.009853607043623924, + "step": 1457 + }, + { + "ce_ib": 5.096848964691162, + "ce_orig": 0.859772264957428, + "epoch": 0.41900927457042203, + "kl_loss": 0.310441255569458, + "loss_ib": 0.00820126198232174, + "step": 1457 + }, + { + "ce_ib": 6.103631019592285, + "ce_orig": 0.82071852684021, + "epoch": 0.41929685814939965, + "kl_loss": 0.25560837984085083, + "loss_ib": 0.008659714832901955, + "step": 1458 + }, + { + "ce_ib": 4.901415824890137, + "ce_orig": 0.9847708344459534, + "epoch": 0.41929685814939965, + "kl_loss": 0.2805362343788147, + "loss_ib": 0.007706777658313513, + "step": 1458 + }, + { + "ce_ib": 4.433986186981201, + "ce_orig": 0.6790226697921753, + "epoch": 0.41929685814939965, + "kl_loss": 0.2497805505990982, + "loss_ib": 0.006931791547685862, + "step": 1458 + }, + { + "ce_ib": 9.856012344360352, + "ce_orig": 1.251712679862976, + "epoch": 0.41929685814939965, + "kl_loss": 0.2390027940273285, + "loss_ib": 0.01224603969603777, + "step": 1458 + }, + { + "ce_ib": 5.431840896606445, + "ce_orig": 0.9738479256629944, + "epoch": 0.41958444172837733, + "kl_loss": 0.2178439497947693, + "loss_ib": 0.007610280532389879, + "step": 1459 + }, + { + "ce_ib": 5.6414570808410645, + "ce_orig": 0.7506046891212463, + "epoch": 0.41958444172837733, + "kl_loss": 0.19342423975467682, + "loss_ib": 0.007575699593871832, + "step": 1459 + }, + { + "ce_ib": 5.220902919769287, + "ce_orig": 0.7075722813606262, + "epoch": 0.41958444172837733, + "kl_loss": 0.24338899552822113, + "loss_ib": 0.007654793094843626, + "step": 1459 + }, + { + "ce_ib": 4.7413010597229, + "ce_orig": 0.7294593453407288, + "epoch": 0.41958444172837733, + "kl_loss": 0.2376450002193451, + "loss_ib": 0.007117751054465771, + "step": 1459 + }, + { + "epoch": 0.41987202530735496, + "grad_norm": 0.1294163465499878, + "learning_rate": 9.69015032215344e-06, + "loss": 0.8336, + "step": 1460 + }, + { + "ce_ib": 4.186010837554932, + "ce_orig": 0.7451074123382568, + "epoch": 0.41987202530735496, + "kl_loss": 0.16504120826721191, + "loss_ib": 0.005836423020809889, + "step": 1460 + }, + { + "ce_ib": 5.1512322425842285, + "ce_orig": 0.4749121367931366, + "epoch": 0.41987202530735496, + "kl_loss": 0.3345229923725128, + "loss_ib": 0.008496462367475033, + "step": 1460 + }, + { + "ce_ib": 6.151814937591553, + "ce_orig": 0.7117960453033447, + "epoch": 0.41987202530735496, + "kl_loss": 0.22646743059158325, + "loss_ib": 0.008416488766670227, + "step": 1460 + }, + { + "ce_ib": 6.976144313812256, + "ce_orig": 1.4999809265136719, + "epoch": 0.41987202530735496, + "kl_loss": 0.18899531662464142, + "loss_ib": 0.00886609684675932, + "step": 1460 + }, + { + "ce_ib": 2.479775905609131, + "ce_orig": 0.27685609459877014, + "epoch": 0.4201596088863326, + "kl_loss": 0.7138643264770508, + "loss_ib": 0.009618419222533703, + "step": 1461 + }, + { + "ce_ib": 5.140337944030762, + "ce_orig": 0.6391001343727112, + "epoch": 0.4201596088863326, + "kl_loss": 0.2612397074699402, + "loss_ib": 0.007752734702080488, + "step": 1461 + }, + { + "ce_ib": 3.4452733993530273, + "ce_orig": 0.5357054471969604, + "epoch": 0.4201596088863326, + "kl_loss": 0.24114762246608734, + "loss_ib": 0.005856749136000872, + "step": 1461 + }, + { + "ce_ib": 4.0398712158203125, + "ce_orig": 0.6190880537033081, + "epoch": 0.4201596088863326, + "kl_loss": 0.28476786613464355, + "loss_ib": 0.006887550000101328, + "step": 1461 + }, + { + "ce_ib": 6.414108753204346, + "ce_orig": 1.0519330501556396, + "epoch": 0.4204471924653102, + "kl_loss": 0.23815977573394775, + "loss_ib": 0.008795706555247307, + "step": 1462 + }, + { + "ce_ib": 4.921961784362793, + "ce_orig": 0.7752912044525146, + "epoch": 0.4204471924653102, + "kl_loss": 0.21857008337974548, + "loss_ib": 0.0071076625026762486, + "step": 1462 + }, + { + "ce_ib": 3.1480214595794678, + "ce_orig": 0.6203112602233887, + "epoch": 0.4204471924653102, + "kl_loss": 0.23576763272285461, + "loss_ib": 0.005505697801709175, + "step": 1462 + }, + { + "ce_ib": 7.7558770179748535, + "ce_orig": 1.295670986175537, + "epoch": 0.4204471924653102, + "kl_loss": 0.2446509599685669, + "loss_ib": 0.010202386416494846, + "step": 1462 + }, + { + "ce_ib": 8.119604110717773, + "ce_orig": 1.3207716941833496, + "epoch": 0.4207347760442879, + "kl_loss": 0.2782466411590576, + "loss_ib": 0.010902070440351963, + "step": 1463 + }, + { + "ce_ib": 5.560944080352783, + "ce_orig": 0.8067669868469238, + "epoch": 0.4207347760442879, + "kl_loss": 0.33775997161865234, + "loss_ib": 0.008938543498516083, + "step": 1463 + }, + { + "ce_ib": 9.663983345031738, + "ce_orig": 0.9008134603500366, + "epoch": 0.4207347760442879, + "kl_loss": 0.4337159991264343, + "loss_ib": 0.014001142233610153, + "step": 1463 + }, + { + "ce_ib": 1.857756495475769, + "ce_orig": 0.36369869112968445, + "epoch": 0.4207347760442879, + "kl_loss": 0.12271080166101456, + "loss_ib": 0.003084864467382431, + "step": 1463 + }, + { + "ce_ib": 6.856328964233398, + "ce_orig": 1.1367650032043457, + "epoch": 0.4210223596232655, + "kl_loss": 0.26799026131629944, + "loss_ib": 0.009536231867969036, + "step": 1464 + }, + { + "ce_ib": 5.147492408752441, + "ce_orig": 0.46571192145347595, + "epoch": 0.4210223596232655, + "kl_loss": 0.3239108920097351, + "loss_ib": 0.008386600762605667, + "step": 1464 + }, + { + "ce_ib": 5.288569450378418, + "ce_orig": 0.36156103014945984, + "epoch": 0.4210223596232655, + "kl_loss": 0.27096402645111084, + "loss_ib": 0.007998209446668625, + "step": 1464 + }, + { + "ce_ib": 4.390820503234863, + "ce_orig": 0.6171783804893494, + "epoch": 0.4210223596232655, + "kl_loss": 0.18023133277893066, + "loss_ib": 0.006193133536726236, + "step": 1464 + }, + { + "epoch": 0.42130994320224313, + "grad_norm": 0.10051855444908142, + "learning_rate": 9.687455120159808e-06, + "loss": 0.8689, + "step": 1465 + }, + { + "ce_ib": 7.877378463745117, + "ce_orig": 1.2540488243103027, + "epoch": 0.42130994320224313, + "kl_loss": 0.1941794902086258, + "loss_ib": 0.009819173254072666, + "step": 1465 + }, + { + "ce_ib": 9.131965637207031, + "ce_orig": 0.6505073308944702, + "epoch": 0.42130994320224313, + "kl_loss": 0.2972337007522583, + "loss_ib": 0.012104302644729614, + "step": 1465 + }, + { + "ce_ib": 7.370760917663574, + "ce_orig": 1.3000590801239014, + "epoch": 0.42130994320224313, + "kl_loss": 0.19257795810699463, + "loss_ib": 0.009296540170907974, + "step": 1465 + }, + { + "ce_ib": 5.004774570465088, + "ce_orig": 0.4962858259677887, + "epoch": 0.42130994320224313, + "kl_loss": 0.18925777077674866, + "loss_ib": 0.006897352635860443, + "step": 1465 + }, + { + "ce_ib": 2.390171527862549, + "ce_orig": 0.26865100860595703, + "epoch": 0.4215975267812208, + "kl_loss": 0.5247079133987427, + "loss_ib": 0.0076372502371668816, + "step": 1466 + }, + { + "ce_ib": 3.2409467697143555, + "ce_orig": 0.5981196165084839, + "epoch": 0.4215975267812208, + "kl_loss": 0.1973566710948944, + "loss_ib": 0.0052145132794976234, + "step": 1466 + }, + { + "ce_ib": 5.385571479797363, + "ce_orig": 1.052996039390564, + "epoch": 0.4215975267812208, + "kl_loss": 0.22852903604507446, + "loss_ib": 0.0076708621345460415, + "step": 1466 + }, + { + "ce_ib": 10.688210487365723, + "ce_orig": 1.7798447608947754, + "epoch": 0.4215975267812208, + "kl_loss": 0.45732739567756653, + "loss_ib": 0.015261484310030937, + "step": 1466 + }, + { + "ce_ib": 7.756859302520752, + "ce_orig": 0.7536134719848633, + "epoch": 0.42188511036019843, + "kl_loss": 0.35317015647888184, + "loss_ib": 0.01128856185823679, + "step": 1467 + }, + { + "ce_ib": 4.976874828338623, + "ce_orig": 0.6887327432632446, + "epoch": 0.42188511036019843, + "kl_loss": 0.3163662552833557, + "loss_ib": 0.00814053788781166, + "step": 1467 + }, + { + "ce_ib": 8.146329879760742, + "ce_orig": 0.9967268705368042, + "epoch": 0.42188511036019843, + "kl_loss": 0.21219246089458466, + "loss_ib": 0.010268254205584526, + "step": 1467 + }, + { + "ce_ib": 4.826017379760742, + "ce_orig": 0.3177625238895416, + "epoch": 0.42188511036019843, + "kl_loss": 0.3488796055316925, + "loss_ib": 0.008314813487231731, + "step": 1467 + }, + { + "ce_ib": 7.634398460388184, + "ce_orig": 1.1551445722579956, + "epoch": 0.42217269393917606, + "kl_loss": 0.2153216451406479, + "loss_ib": 0.009787614457309246, + "step": 1468 + }, + { + "ce_ib": 7.4884843826293945, + "ce_orig": 0.9426076412200928, + "epoch": 0.42217269393917606, + "kl_loss": 0.34704655408859253, + "loss_ib": 0.010958950035274029, + "step": 1468 + }, + { + "ce_ib": 4.062495231628418, + "ce_orig": 0.3596497178077698, + "epoch": 0.42217269393917606, + "kl_loss": 0.4352097511291504, + "loss_ib": 0.00841459259390831, + "step": 1468 + }, + { + "ce_ib": 2.4920849800109863, + "ce_orig": 0.40482640266418457, + "epoch": 0.42217269393917606, + "kl_loss": 0.5243197679519653, + "loss_ib": 0.007735282648354769, + "step": 1468 + }, + { + "ce_ib": 5.229053974151611, + "ce_orig": 0.8194414973258972, + "epoch": 0.42246027751815374, + "kl_loss": 0.2739192247390747, + "loss_ib": 0.007968246005475521, + "step": 1469 + }, + { + "ce_ib": 6.554534912109375, + "ce_orig": 0.7452446222305298, + "epoch": 0.42246027751815374, + "kl_loss": 0.2899870276451111, + "loss_ib": 0.009454404935240746, + "step": 1469 + }, + { + "ce_ib": 6.135105609893799, + "ce_orig": 0.7410409450531006, + "epoch": 0.42246027751815374, + "kl_loss": 0.23898279666900635, + "loss_ib": 0.008524932898581028, + "step": 1469 + }, + { + "ce_ib": 4.908745765686035, + "ce_orig": 0.4362742006778717, + "epoch": 0.42246027751815374, + "kl_loss": 0.6675567626953125, + "loss_ib": 0.011584312655031681, + "step": 1469 + }, + { + "epoch": 0.42274786109713136, + "grad_norm": 0.10823974758386612, + "learning_rate": 9.68474862499881e-06, + "loss": 0.8345, + "step": 1470 + }, + { + "ce_ib": 8.23246955871582, + "ce_orig": 1.1660966873168945, + "epoch": 0.42274786109713136, + "kl_loss": 0.2664303183555603, + "loss_ib": 0.01089677307754755, + "step": 1470 + }, + { + "ce_ib": 3.9345450401306152, + "ce_orig": 0.4807896018028259, + "epoch": 0.42274786109713136, + "kl_loss": 0.2337619513273239, + "loss_ib": 0.006272164639085531, + "step": 1470 + }, + { + "ce_ib": 9.054957389831543, + "ce_orig": 1.2980424165725708, + "epoch": 0.42274786109713136, + "kl_loss": 0.2841121256351471, + "loss_ib": 0.01189607847481966, + "step": 1470 + }, + { + "ce_ib": 5.502397537231445, + "ce_orig": 0.7632869482040405, + "epoch": 0.42274786109713136, + "kl_loss": 0.18508179485797882, + "loss_ib": 0.007353215012699366, + "step": 1470 + }, + { + "ce_ib": 7.207784652709961, + "ce_orig": 1.1374162435531616, + "epoch": 0.423035444676109, + "kl_loss": 0.27467674016952515, + "loss_ib": 0.009954552166163921, + "step": 1471 + }, + { + "ce_ib": 8.317543029785156, + "ce_orig": 1.4949835538864136, + "epoch": 0.423035444676109, + "kl_loss": 0.21355046331882477, + "loss_ib": 0.01045304723083973, + "step": 1471 + }, + { + "ce_ib": 3.975776195526123, + "ce_orig": 0.4850504994392395, + "epoch": 0.423035444676109, + "kl_loss": 0.3246009349822998, + "loss_ib": 0.007221785839647055, + "step": 1471 + }, + { + "ce_ib": 9.7860689163208, + "ce_orig": 1.5497608184814453, + "epoch": 0.423035444676109, + "kl_loss": 0.30065202713012695, + "loss_ib": 0.012792589142918587, + "step": 1471 + }, + { + "ce_ib": 4.456712245941162, + "ce_orig": 0.8147068619728088, + "epoch": 0.4233230282550866, + "kl_loss": 0.27602076530456543, + "loss_ib": 0.007216919679194689, + "step": 1472 + }, + { + "ce_ib": 7.215217113494873, + "ce_orig": 0.9568760991096497, + "epoch": 0.4233230282550866, + "kl_loss": 0.21230699121952057, + "loss_ib": 0.009338286705315113, + "step": 1472 + }, + { + "ce_ib": 6.054652690887451, + "ce_orig": 0.6687721610069275, + "epoch": 0.4233230282550866, + "kl_loss": 0.33765116333961487, + "loss_ib": 0.009431163780391216, + "step": 1472 + }, + { + "ce_ib": 6.395743370056152, + "ce_orig": 0.6408840417861938, + "epoch": 0.4233230282550866, + "kl_loss": 0.31022149324417114, + "loss_ib": 0.009497958235442638, + "step": 1472 + }, + { + "ce_ib": 4.3919901847839355, + "ce_orig": 0.7670180797576904, + "epoch": 0.4236106118340643, + "kl_loss": 0.18993571400642395, + "loss_ib": 0.0062913475558161736, + "step": 1473 + }, + { + "ce_ib": 3.2439560890197754, + "ce_orig": 0.4134061336517334, + "epoch": 0.4236106118340643, + "kl_loss": 0.23562690615653992, + "loss_ib": 0.005600225180387497, + "step": 1473 + }, + { + "ce_ib": 5.886128902435303, + "ce_orig": 0.5026991963386536, + "epoch": 0.4236106118340643, + "kl_loss": 0.39356550574302673, + "loss_ib": 0.009821784682571888, + "step": 1473 + }, + { + "ce_ib": 10.143879890441895, + "ce_orig": 1.480689525604248, + "epoch": 0.4236106118340643, + "kl_loss": 0.2073626071214676, + "loss_ib": 0.0122175058349967, + "step": 1473 + }, + { + "ce_ib": 7.02231502532959, + "ce_orig": 0.5753958225250244, + "epoch": 0.4238981954130419, + "kl_loss": 0.6799865961074829, + "loss_ib": 0.013822181150317192, + "step": 1474 + }, + { + "ce_ib": 4.180866718292236, + "ce_orig": 0.7731302380561829, + "epoch": 0.4238981954130419, + "kl_loss": 0.18818950653076172, + "loss_ib": 0.006062761414796114, + "step": 1474 + }, + { + "ce_ib": 6.477175235748291, + "ce_orig": 0.8291592597961426, + "epoch": 0.4238981954130419, + "kl_loss": 0.2998458743095398, + "loss_ib": 0.00947563350200653, + "step": 1474 + }, + { + "ce_ib": 6.95041561126709, + "ce_orig": 0.47487786412239075, + "epoch": 0.4238981954130419, + "kl_loss": 0.6084232330322266, + "loss_ib": 0.013034648261964321, + "step": 1474 + }, + { + "epoch": 0.42418577899201954, + "grad_norm": 0.11552698165178299, + "learning_rate": 9.682030843191021e-06, + "loss": 0.9039, + "step": 1475 + }, + { + "ce_ib": 7.451219081878662, + "ce_orig": 1.3754565715789795, + "epoch": 0.42418577899201954, + "kl_loss": 0.26375889778137207, + "loss_ib": 0.01008880790323019, + "step": 1475 + }, + { + "ce_ib": 5.691998481750488, + "ce_orig": 0.6472443342208862, + "epoch": 0.42418577899201954, + "kl_loss": 0.23740312457084656, + "loss_ib": 0.008066029287874699, + "step": 1475 + }, + { + "ce_ib": 6.195897102355957, + "ce_orig": 0.9898815751075745, + "epoch": 0.42418577899201954, + "kl_loss": 0.21684029698371887, + "loss_ib": 0.0083643002435565, + "step": 1475 + }, + { + "ce_ib": 5.425847053527832, + "ce_orig": 0.5178442597389221, + "epoch": 0.42418577899201954, + "kl_loss": 0.2924429178237915, + "loss_ib": 0.00835027638822794, + "step": 1475 + }, + { + "ce_ib": 7.3904805183410645, + "ce_orig": 0.770089864730835, + "epoch": 0.4244733625709972, + "kl_loss": 0.21272428333759308, + "loss_ib": 0.009517722763121128, + "step": 1476 + }, + { + "ce_ib": 5.064784049987793, + "ce_orig": 0.6370176076889038, + "epoch": 0.4244733625709972, + "kl_loss": 0.28311437368392944, + "loss_ib": 0.007895927876234055, + "step": 1476 + }, + { + "ce_ib": 9.224869728088379, + "ce_orig": 1.5204010009765625, + "epoch": 0.4244733625709972, + "kl_loss": 0.30629977583885193, + "loss_ib": 0.0122878672555089, + "step": 1476 + }, + { + "ce_ib": 9.436309814453125, + "ce_orig": 1.3387373685836792, + "epoch": 0.4244733625709972, + "kl_loss": 0.2948681116104126, + "loss_ib": 0.01238499116152525, + "step": 1476 + }, + { + "ce_ib": 3.8061373233795166, + "ce_orig": 0.6259332895278931, + "epoch": 0.42476094614997484, + "kl_loss": 0.2015649676322937, + "loss_ib": 0.005821786820888519, + "step": 1477 + }, + { + "ce_ib": 6.14137077331543, + "ce_orig": 0.7257814407348633, + "epoch": 0.42476094614997484, + "kl_loss": 0.2847587466239929, + "loss_ib": 0.008988958783447742, + "step": 1477 + }, + { + "ce_ib": 9.099712371826172, + "ce_orig": 1.2262767553329468, + "epoch": 0.42476094614997484, + "kl_loss": 0.21662941575050354, + "loss_ib": 0.011266006156802177, + "step": 1477 + }, + { + "ce_ib": 4.089813232421875, + "ce_orig": 1.0098782777786255, + "epoch": 0.42476094614997484, + "kl_loss": 0.17735642194747925, + "loss_ib": 0.00586337735876441, + "step": 1477 + }, + { + "ce_ib": 7.239267826080322, + "ce_orig": 1.438734769821167, + "epoch": 0.42504852972895246, + "kl_loss": 0.20792442560195923, + "loss_ib": 0.009318511933088303, + "step": 1478 + }, + { + "ce_ib": 6.3115386962890625, + "ce_orig": 0.6404736042022705, + "epoch": 0.42504852972895246, + "kl_loss": 0.3348001539707184, + "loss_ib": 0.009659540839493275, + "step": 1478 + }, + { + "ce_ib": 7.504519939422607, + "ce_orig": 0.7322005033493042, + "epoch": 0.42504852972895246, + "kl_loss": 0.2848302721977234, + "loss_ib": 0.01035282202064991, + "step": 1478 + }, + { + "ce_ib": 5.972691535949707, + "ce_orig": 0.8413686752319336, + "epoch": 0.42504852972895246, + "kl_loss": 0.21566344797611237, + "loss_ib": 0.008129325695335865, + "step": 1478 + }, + { + "ce_ib": 8.528627395629883, + "ce_orig": 1.0025306940078735, + "epoch": 0.42533611330793014, + "kl_loss": 0.2139550894498825, + "loss_ib": 0.010668179020285606, + "step": 1479 + }, + { + "ce_ib": 3.167024612426758, + "ce_orig": 0.45949411392211914, + "epoch": 0.42533611330793014, + "kl_loss": 0.43662020564079285, + "loss_ib": 0.007533226627856493, + "step": 1479 + }, + { + "ce_ib": 5.232333660125732, + "ce_orig": 0.21036502718925476, + "epoch": 0.42533611330793014, + "kl_loss": 0.4534093141555786, + "loss_ib": 0.009766426868736744, + "step": 1479 + }, + { + "ce_ib": 4.0448899269104, + "ce_orig": 0.6088986396789551, + "epoch": 0.42533611330793014, + "kl_loss": 0.15418598055839539, + "loss_ib": 0.005586749874055386, + "step": 1479 + }, + { + "epoch": 0.42562369688690777, + "grad_norm": 0.11320596933364868, + "learning_rate": 9.679301781284209e-06, + "loss": 0.881, + "step": 1480 + }, + { + "ce_ib": 6.253572463989258, + "ce_orig": 0.9887263178825378, + "epoch": 0.42562369688690777, + "kl_loss": 0.25370287895202637, + "loss_ib": 0.008790601044893265, + "step": 1480 + }, + { + "ce_ib": 6.013632297515869, + "ce_orig": 0.6574453115463257, + "epoch": 0.42562369688690777, + "kl_loss": 0.3245493173599243, + "loss_ib": 0.009259125217795372, + "step": 1480 + }, + { + "ce_ib": 6.525053977966309, + "ce_orig": 1.4866364002227783, + "epoch": 0.42562369688690777, + "kl_loss": 0.1928248554468155, + "loss_ib": 0.008453302085399628, + "step": 1480 + }, + { + "ce_ib": 8.524917602539062, + "ce_orig": 1.3874448537826538, + "epoch": 0.42562369688690777, + "kl_loss": 0.20940372347831726, + "loss_ib": 0.01061895489692688, + "step": 1480 + }, + { + "ce_ib": 7.312863349914551, + "ce_orig": 0.865240216255188, + "epoch": 0.4259112804658854, + "kl_loss": 0.5077807307243347, + "loss_ib": 0.012390670366585255, + "step": 1481 + }, + { + "ce_ib": 7.382387161254883, + "ce_orig": 1.1883589029312134, + "epoch": 0.4259112804658854, + "kl_loss": 0.33302438259124756, + "loss_ib": 0.010712630115449429, + "step": 1481 + }, + { + "ce_ib": 5.761624336242676, + "ce_orig": 0.8007732033729553, + "epoch": 0.4259112804658854, + "kl_loss": 0.19490881264209747, + "loss_ib": 0.007710712496191263, + "step": 1481 + }, + { + "ce_ib": 5.854297161102295, + "ce_orig": 0.90272456407547, + "epoch": 0.4259112804658854, + "kl_loss": 0.2707770764827728, + "loss_ib": 0.008562067523598671, + "step": 1481 + }, + { + "ce_ib": 9.733773231506348, + "ce_orig": 1.9829741716384888, + "epoch": 0.426198864044863, + "kl_loss": 0.2391510009765625, + "loss_ib": 0.012125283479690552, + "step": 1482 + }, + { + "ce_ib": 6.117833614349365, + "ce_orig": 0.9634227752685547, + "epoch": 0.426198864044863, + "kl_loss": 0.249215230345726, + "loss_ib": 0.008609985932707787, + "step": 1482 + }, + { + "ce_ib": 6.840470790863037, + "ce_orig": 0.9898148775100708, + "epoch": 0.426198864044863, + "kl_loss": 0.25602009892463684, + "loss_ib": 0.009400671347975731, + "step": 1482 + }, + { + "ce_ib": 2.724457025527954, + "ce_orig": 0.2802466154098511, + "epoch": 0.426198864044863, + "kl_loss": 0.453813761472702, + "loss_ib": 0.007262594532221556, + "step": 1482 + }, + { + "ce_ib": 4.271263599395752, + "ce_orig": 1.0552265644073486, + "epoch": 0.4264864476238407, + "kl_loss": 0.18592074513435364, + "loss_ib": 0.006130470894277096, + "step": 1483 + }, + { + "ce_ib": 3.728668689727783, + "ce_orig": 0.5460267663002014, + "epoch": 0.4264864476238407, + "kl_loss": 0.3072531223297119, + "loss_ib": 0.00680119963362813, + "step": 1483 + }, + { + "ce_ib": 5.636871814727783, + "ce_orig": 0.8854542374610901, + "epoch": 0.4264864476238407, + "kl_loss": 0.20121414959430695, + "loss_ib": 0.007649013306945562, + "step": 1483 + }, + { + "ce_ib": 5.380930423736572, + "ce_orig": 0.7147185802459717, + "epoch": 0.4264864476238407, + "kl_loss": 0.29215019941329956, + "loss_ib": 0.008302432484924793, + "step": 1483 + }, + { + "ce_ib": 4.051311016082764, + "ce_orig": 0.7806766033172607, + "epoch": 0.4267740312028183, + "kl_loss": 0.2124352753162384, + "loss_ib": 0.006175663322210312, + "step": 1484 + }, + { + "ce_ib": 6.189905643463135, + "ce_orig": 0.7869464159011841, + "epoch": 0.4267740312028183, + "kl_loss": 0.33432358503341675, + "loss_ib": 0.009533141739666462, + "step": 1484 + }, + { + "ce_ib": 4.387548446655273, + "ce_orig": 1.0712177753448486, + "epoch": 0.4267740312028183, + "kl_loss": 0.16804896295070648, + "loss_ib": 0.006068038288503885, + "step": 1484 + }, + { + "ce_ib": 5.980642318725586, + "ce_orig": 1.0337541103363037, + "epoch": 0.4267740312028183, + "kl_loss": 0.27346473932266235, + "loss_ib": 0.008715289644896984, + "step": 1484 + }, + { + "epoch": 0.42706161478179594, + "grad_norm": 0.1281713843345642, + "learning_rate": 9.676561445853317e-06, + "loss": 0.9568, + "step": 1485 + }, + { + "ce_ib": 9.559850692749023, + "ce_orig": 1.2330940961837769, + "epoch": 0.42706161478179594, + "kl_loss": 0.26453912258148193, + "loss_ib": 0.012205241248011589, + "step": 1485 + }, + { + "ce_ib": 10.209794998168945, + "ce_orig": 0.9033480882644653, + "epoch": 0.42706161478179594, + "kl_loss": 0.33303964138031006, + "loss_ib": 0.013540191575884819, + "step": 1485 + }, + { + "ce_ib": 6.278723239898682, + "ce_orig": 1.024091124534607, + "epoch": 0.42706161478179594, + "kl_loss": 0.2991013526916504, + "loss_ib": 0.00926973670721054, + "step": 1485 + }, + { + "ce_ib": 4.2076263427734375, + "ce_orig": 0.4415350556373596, + "epoch": 0.42706161478179594, + "kl_loss": 0.1996149867773056, + "loss_ib": 0.006203775759786367, + "step": 1485 + }, + { + "ce_ib": 3.6215503215789795, + "ce_orig": 0.695202648639679, + "epoch": 0.4273491983607736, + "kl_loss": 0.23646116256713867, + "loss_ib": 0.005986162461340427, + "step": 1486 + }, + { + "ce_ib": 3.97869873046875, + "ce_orig": 0.602679431438446, + "epoch": 0.4273491983607736, + "kl_loss": 0.19544193148612976, + "loss_ib": 0.005933117587119341, + "step": 1486 + }, + { + "ce_ib": 3.666558265686035, + "ce_orig": 0.7779607772827148, + "epoch": 0.4273491983607736, + "kl_loss": 0.31298935413360596, + "loss_ib": 0.00679645175114274, + "step": 1486 + }, + { + "ce_ib": 4.802906036376953, + "ce_orig": 0.4945107400417328, + "epoch": 0.4273491983607736, + "kl_loss": 0.3955551087856293, + "loss_ib": 0.008758457377552986, + "step": 1486 + }, + { + "ce_ib": 3.2569401264190674, + "ce_orig": 0.7755473256111145, + "epoch": 0.42763678193975124, + "kl_loss": 0.18514487147331238, + "loss_ib": 0.005108388606458902, + "step": 1487 + }, + { + "ce_ib": 3.5347471237182617, + "ce_orig": 0.6426426768302917, + "epoch": 0.42763678193975124, + "kl_loss": 0.24189895391464233, + "loss_ib": 0.005953736137598753, + "step": 1487 + }, + { + "ce_ib": 3.0833957195281982, + "ce_orig": 0.3376903831958771, + "epoch": 0.42763678193975124, + "kl_loss": 0.5483517646789551, + "loss_ib": 0.008566913194954395, + "step": 1487 + }, + { + "ce_ib": 7.679892539978027, + "ce_orig": 1.2606022357940674, + "epoch": 0.42763678193975124, + "kl_loss": 0.23624634742736816, + "loss_ib": 0.010042356327176094, + "step": 1487 + }, + { + "ce_ib": 7.427282333374023, + "ce_orig": 0.9086766242980957, + "epoch": 0.42792436551872887, + "kl_loss": 0.2795743942260742, + "loss_ib": 0.010223026387393475, + "step": 1488 + }, + { + "ce_ib": 7.853158950805664, + "ce_orig": 0.7320258617401123, + "epoch": 0.42792436551872887, + "kl_loss": 0.2966246008872986, + "loss_ib": 0.010819405317306519, + "step": 1488 + }, + { + "ce_ib": 5.303369522094727, + "ce_orig": 1.0770010948181152, + "epoch": 0.42792436551872887, + "kl_loss": 0.16358917951583862, + "loss_ib": 0.006939261220395565, + "step": 1488 + }, + { + "ce_ib": 9.29581069946289, + "ce_orig": 1.4535112380981445, + "epoch": 0.42792436551872887, + "kl_loss": 0.3460651636123657, + "loss_ib": 0.012756462208926678, + "step": 1488 + }, + { + "ce_ib": 4.586825847625732, + "ce_orig": 0.6064006686210632, + "epoch": 0.42821194909770655, + "kl_loss": 0.2779026925563812, + "loss_ib": 0.00736585259437561, + "step": 1489 + }, + { + "ce_ib": 7.388415813446045, + "ce_orig": 1.4265230894088745, + "epoch": 0.42821194909770655, + "kl_loss": 0.1808740347623825, + "loss_ib": 0.009197155945003033, + "step": 1489 + }, + { + "ce_ib": 6.708005428314209, + "ce_orig": 0.8520740270614624, + "epoch": 0.42821194909770655, + "kl_loss": 0.17089973390102386, + "loss_ib": 0.008417002856731415, + "step": 1489 + }, + { + "ce_ib": 3.947643756866455, + "ce_orig": 0.5688271522521973, + "epoch": 0.42821194909770655, + "kl_loss": 0.22878821194171906, + "loss_ib": 0.006235525943338871, + "step": 1489 + }, + { + "epoch": 0.42849953267668417, + "grad_norm": 0.1272149682044983, + "learning_rate": 9.673809843500447e-06, + "loss": 0.8415, + "step": 1490 + }, + { + "ce_ib": 5.999438762664795, + "ce_orig": 1.1109205484390259, + "epoch": 0.42849953267668417, + "kl_loss": 0.23863765597343445, + "loss_ib": 0.008385815657675266, + "step": 1490 + }, + { + "ce_ib": 5.141195297241211, + "ce_orig": 0.8208162784576416, + "epoch": 0.42849953267668417, + "kl_loss": 0.17496593296527863, + "loss_ib": 0.006890854798257351, + "step": 1490 + }, + { + "ce_ib": 3.8230690956115723, + "ce_orig": 0.33701616525650024, + "epoch": 0.42849953267668417, + "kl_loss": 0.18180124461650848, + "loss_ib": 0.005641081370413303, + "step": 1490 + }, + { + "ce_ib": 4.297886371612549, + "ce_orig": 0.5608041286468506, + "epoch": 0.42849953267668417, + "kl_loss": 0.18302598595619202, + "loss_ib": 0.0061281463131308556, + "step": 1490 + }, + { + "ce_ib": 5.28217887878418, + "ce_orig": 0.8655717968940735, + "epoch": 0.4287871162556618, + "kl_loss": 0.2272111475467682, + "loss_ib": 0.007554290350526571, + "step": 1491 + }, + { + "ce_ib": 6.0911431312561035, + "ce_orig": 1.2214902639389038, + "epoch": 0.4287871162556618, + "kl_loss": 0.24875463545322418, + "loss_ib": 0.008578689768910408, + "step": 1491 + }, + { + "ce_ib": 4.257596969604492, + "ce_orig": 0.6725324988365173, + "epoch": 0.4287871162556618, + "kl_loss": 0.19750872254371643, + "loss_ib": 0.006232684012502432, + "step": 1491 + }, + { + "ce_ib": 4.0605340003967285, + "ce_orig": 0.8823887705802917, + "epoch": 0.4287871162556618, + "kl_loss": 0.21200858056545258, + "loss_ib": 0.006180619355291128, + "step": 1491 + }, + { + "ce_ib": 4.476174831390381, + "ce_orig": 1.0363364219665527, + "epoch": 0.4290746998346394, + "kl_loss": 0.18386420607566833, + "loss_ib": 0.006314816884696484, + "step": 1492 + }, + { + "ce_ib": 5.4428510665893555, + "ce_orig": 0.6156265139579773, + "epoch": 0.4290746998346394, + "kl_loss": 0.19407202303409576, + "loss_ib": 0.00738357100635767, + "step": 1492 + }, + { + "ce_ib": 8.190139770507812, + "ce_orig": 1.3100130558013916, + "epoch": 0.4290746998346394, + "kl_loss": 0.4860832095146179, + "loss_ib": 0.013050971552729607, + "step": 1492 + }, + { + "ce_ib": 6.034806728363037, + "ce_orig": 0.3907848298549652, + "epoch": 0.4290746998346394, + "kl_loss": 0.249636709690094, + "loss_ib": 0.008531173691153526, + "step": 1492 + }, + { + "ce_ib": 7.136789798736572, + "ce_orig": 0.9948228001594543, + "epoch": 0.4293622834136171, + "kl_loss": 0.494230180978775, + "loss_ib": 0.012079091742634773, + "step": 1493 + }, + { + "ce_ib": 4.873354434967041, + "ce_orig": 0.7475965023040771, + "epoch": 0.4293622834136171, + "kl_loss": 0.19573433697223663, + "loss_ib": 0.006830697413533926, + "step": 1493 + }, + { + "ce_ib": 8.23890209197998, + "ce_orig": 0.5245663523674011, + "epoch": 0.4293622834136171, + "kl_loss": 0.23461030423641205, + "loss_ib": 0.010585005395114422, + "step": 1493 + }, + { + "ce_ib": 4.986420154571533, + "ce_orig": 0.6100597977638245, + "epoch": 0.4293622834136171, + "kl_loss": 0.25687411427497864, + "loss_ib": 0.007555161602795124, + "step": 1493 + }, + { + "ce_ib": 5.936038017272949, + "ce_orig": 1.0116311311721802, + "epoch": 0.4296498669925947, + "kl_loss": 0.18465059995651245, + "loss_ib": 0.007782543543726206, + "step": 1494 + }, + { + "ce_ib": 2.660158634185791, + "ce_orig": 0.4223203659057617, + "epoch": 0.4296498669925947, + "kl_loss": 0.2522263526916504, + "loss_ib": 0.005182422231882811, + "step": 1494 + }, + { + "ce_ib": 3.642902374267578, + "ce_orig": 0.3541110157966614, + "epoch": 0.4296498669925947, + "kl_loss": 0.18501171469688416, + "loss_ib": 0.005493019707500935, + "step": 1494 + }, + { + "ce_ib": 2.7317256927490234, + "ce_orig": 0.6133846044540405, + "epoch": 0.4296498669925947, + "kl_loss": 0.15653100609779358, + "loss_ib": 0.004297035746276379, + "step": 1494 + }, + { + "epoch": 0.42993745057157234, + "grad_norm": 0.11513698846101761, + "learning_rate": 9.67104698085485e-06, + "loss": 0.8492, + "step": 1495 + }, + { + "ce_ib": 5.127799034118652, + "ce_orig": 0.5826548933982849, + "epoch": 0.42993745057157234, + "kl_loss": 0.18143674731254578, + "loss_ib": 0.006942166015505791, + "step": 1495 + }, + { + "ce_ib": 3.7079544067382812, + "ce_orig": 0.5327990055084229, + "epoch": 0.42993745057157234, + "kl_loss": 0.15200555324554443, + "loss_ib": 0.005228010471910238, + "step": 1495 + }, + { + "ce_ib": 7.367830276489258, + "ce_orig": 0.8680596947669983, + "epoch": 0.42993745057157234, + "kl_loss": 0.2782801687717438, + "loss_ib": 0.01015063188970089, + "step": 1495 + }, + { + "ce_ib": 6.355778694152832, + "ce_orig": 1.0745817422866821, + "epoch": 0.42993745057157234, + "kl_loss": 0.1733226329088211, + "loss_ib": 0.008089005015790462, + "step": 1495 + }, + { + "ce_ib": 7.89412260055542, + "ce_orig": 1.3843724727630615, + "epoch": 0.43022503415055, + "kl_loss": 0.28065750002861023, + "loss_ib": 0.010700698010623455, + "step": 1496 + }, + { + "ce_ib": 10.201616287231445, + "ce_orig": 1.7674388885498047, + "epoch": 0.43022503415055, + "kl_loss": 0.2781248390674591, + "loss_ib": 0.012982863932847977, + "step": 1496 + }, + { + "ce_ib": 5.998897075653076, + "ce_orig": 0.6868672966957092, + "epoch": 0.43022503415055, + "kl_loss": 0.37494516372680664, + "loss_ib": 0.009748348034918308, + "step": 1496 + }, + { + "ce_ib": 6.706963062286377, + "ce_orig": 0.680381178855896, + "epoch": 0.43022503415055, + "kl_loss": 0.24509286880493164, + "loss_ib": 0.009157891385257244, + "step": 1496 + }, + { + "ce_ib": 9.054398536682129, + "ce_orig": 1.5919907093048096, + "epoch": 0.43051261772952765, + "kl_loss": 0.25301802158355713, + "loss_ib": 0.011584578081965446, + "step": 1497 + }, + { + "ce_ib": 9.301456451416016, + "ce_orig": 1.3819289207458496, + "epoch": 0.43051261772952765, + "kl_loss": 0.23771429061889648, + "loss_ib": 0.011678599752485752, + "step": 1497 + }, + { + "ce_ib": 8.360661506652832, + "ce_orig": 1.6322040557861328, + "epoch": 0.43051261772952765, + "kl_loss": 0.24911445379257202, + "loss_ib": 0.010851806029677391, + "step": 1497 + }, + { + "ce_ib": 5.899044513702393, + "ce_orig": 1.0200097560882568, + "epoch": 0.43051261772952765, + "kl_loss": 0.21863295137882233, + "loss_ib": 0.008085373789072037, + "step": 1497 + }, + { + "ce_ib": 7.849880218505859, + "ce_orig": 1.2386505603790283, + "epoch": 0.43080020130850527, + "kl_loss": 0.31184014678001404, + "loss_ib": 0.010968281887471676, + "step": 1498 + }, + { + "ce_ib": 3.9608442783355713, + "ce_orig": 0.6503081321716309, + "epoch": 0.43080020130850527, + "kl_loss": 0.22978989779949188, + "loss_ib": 0.006258743349462748, + "step": 1498 + }, + { + "ce_ib": 6.0665717124938965, + "ce_orig": 1.1576424837112427, + "epoch": 0.43080020130850527, + "kl_loss": 0.21611745655536652, + "loss_ib": 0.00822774600237608, + "step": 1498 + }, + { + "ce_ib": 6.375781059265137, + "ce_orig": 1.1019394397735596, + "epoch": 0.43080020130850527, + "kl_loss": 0.20872078835964203, + "loss_ib": 0.008462988771498203, + "step": 1498 + }, + { + "ce_ib": 9.109877586364746, + "ce_orig": 1.6101223230361938, + "epoch": 0.43108778488748295, + "kl_loss": 0.28492850065231323, + "loss_ib": 0.011959162540733814, + "step": 1499 + }, + { + "ce_ib": 7.772365093231201, + "ce_orig": 1.3896774053573608, + "epoch": 0.43108778488748295, + "kl_loss": 0.21816033124923706, + "loss_ib": 0.009953968226909637, + "step": 1499 + }, + { + "ce_ib": 7.498557090759277, + "ce_orig": 0.6977278590202332, + "epoch": 0.43108778488748295, + "kl_loss": 0.4549769163131714, + "loss_ib": 0.012048325501382351, + "step": 1499 + }, + { + "ce_ib": 8.391149520874023, + "ce_orig": 1.1614909172058105, + "epoch": 0.43108778488748295, + "kl_loss": 0.2926523685455322, + "loss_ib": 0.011317672207951546, + "step": 1499 + }, + { + "epoch": 0.4313753684664606, + "grad_norm": 0.10857830196619034, + "learning_rate": 9.668272864572904e-06, + "loss": 0.9772, + "step": 1500 + }, + { + "ce_ib": 7.412587642669678, + "ce_orig": 1.3245453834533691, + "epoch": 0.4313753684664606, + "kl_loss": 0.19331267476081848, + "loss_ib": 0.009345714934170246, + "step": 1500 + }, + { + "ce_ib": 2.4376213550567627, + "ce_orig": 0.21778453886508942, + "epoch": 0.4313753684664606, + "kl_loss": 0.30824974179267883, + "loss_ib": 0.005520118400454521, + "step": 1500 + }, + { + "ce_ib": 4.506190776824951, + "ce_orig": 0.690782368183136, + "epoch": 0.4313753684664606, + "kl_loss": 0.22411122918128967, + "loss_ib": 0.006747303064912558, + "step": 1500 + }, + { + "ce_ib": 5.372386455535889, + "ce_orig": 0.811801016330719, + "epoch": 0.4313753684664606, + "kl_loss": 0.27325868606567383, + "loss_ib": 0.00810497347265482, + "step": 1500 + }, + { + "ce_ib": 5.54148006439209, + "ce_orig": 0.8415253162384033, + "epoch": 0.4316629520454382, + "kl_loss": 0.1796332150697708, + "loss_ib": 0.0073378118686378, + "step": 1501 + }, + { + "ce_ib": 7.4257049560546875, + "ce_orig": 0.7999733686447144, + "epoch": 0.4316629520454382, + "kl_loss": 0.2060936689376831, + "loss_ib": 0.009486641734838486, + "step": 1501 + }, + { + "ce_ib": 7.355204105377197, + "ce_orig": 1.4873985052108765, + "epoch": 0.4316629520454382, + "kl_loss": 0.1954437792301178, + "loss_ib": 0.009309642016887665, + "step": 1501 + }, + { + "ce_ib": 3.211336612701416, + "ce_orig": 0.5319896340370178, + "epoch": 0.4316629520454382, + "kl_loss": 0.1957024782896042, + "loss_ib": 0.005168361589312553, + "step": 1501 + }, + { + "ce_ib": 7.809789657592773, + "ce_orig": 1.3806283473968506, + "epoch": 0.4319505356244158, + "kl_loss": 0.4769303798675537, + "loss_ib": 0.012579092755913734, + "step": 1502 + }, + { + "ce_ib": 5.052849292755127, + "ce_orig": 0.748846173286438, + "epoch": 0.4319505356244158, + "kl_loss": 0.27402952313423157, + "loss_ib": 0.007793144788593054, + "step": 1502 + }, + { + "ce_ib": 5.5736083984375, + "ce_orig": 0.5355104207992554, + "epoch": 0.4319505356244158, + "kl_loss": 0.2576088607311249, + "loss_ib": 0.00814969651401043, + "step": 1502 + }, + { + "ce_ib": 9.92026424407959, + "ce_orig": 1.241144061088562, + "epoch": 0.4319505356244158, + "kl_loss": 0.4010234475135803, + "loss_ib": 0.01393049955368042, + "step": 1502 + }, + { + "ce_ib": 5.654511451721191, + "ce_orig": 1.0896109342575073, + "epoch": 0.4322381192033935, + "kl_loss": 0.19214342534542084, + "loss_ib": 0.00757594546303153, + "step": 1503 + }, + { + "ce_ib": 4.391484260559082, + "ce_orig": 0.6887181997299194, + "epoch": 0.4322381192033935, + "kl_loss": 0.2409554123878479, + "loss_ib": 0.006801038049161434, + "step": 1503 + }, + { + "ce_ib": 6.433215141296387, + "ce_orig": 0.7667641043663025, + "epoch": 0.4322381192033935, + "kl_loss": 0.3592512607574463, + "loss_ib": 0.010025727562606335, + "step": 1503 + }, + { + "ce_ib": 6.487468242645264, + "ce_orig": 1.3075002431869507, + "epoch": 0.4322381192033935, + "kl_loss": 0.2316405326128006, + "loss_ib": 0.00880387332290411, + "step": 1503 + }, + { + "ce_ib": 5.21145486831665, + "ce_orig": 0.6229283809661865, + "epoch": 0.4325257027823711, + "kl_loss": 0.2223101705312729, + "loss_ib": 0.0074345567263662815, + "step": 1504 + }, + { + "ce_ib": 7.79116153717041, + "ce_orig": 1.0141862630844116, + "epoch": 0.4325257027823711, + "kl_loss": 0.19813181459903717, + "loss_ib": 0.00977247953414917, + "step": 1504 + }, + { + "ce_ib": 6.594308853149414, + "ce_orig": 0.9326078295707703, + "epoch": 0.4325257027823711, + "kl_loss": 0.36497896909713745, + "loss_ib": 0.010244098491966724, + "step": 1504 + }, + { + "ce_ib": 4.622511386871338, + "ce_orig": 0.4883497357368469, + "epoch": 0.4325257027823711, + "kl_loss": 0.2173852175474167, + "loss_ib": 0.006796363275498152, + "step": 1504 + }, + { + "epoch": 0.43281328636134875, + "grad_norm": 0.1054597720503807, + "learning_rate": 9.665487501338097e-06, + "loss": 0.852, + "step": 1505 + }, + { + "ce_ib": 7.86767578125, + "ce_orig": 1.2227966785430908, + "epoch": 0.43281328636134875, + "kl_loss": 0.1758153885602951, + "loss_ib": 0.009625829756259918, + "step": 1505 + }, + { + "ce_ib": 7.569886684417725, + "ce_orig": 1.2155966758728027, + "epoch": 0.43281328636134875, + "kl_loss": 0.22750471532344818, + "loss_ib": 0.009844934567809105, + "step": 1505 + }, + { + "ce_ib": 5.736252784729004, + "ce_orig": 0.9024336338043213, + "epoch": 0.43281328636134875, + "kl_loss": 0.21125327050685883, + "loss_ib": 0.007848785258829594, + "step": 1505 + }, + { + "ce_ib": 4.251120567321777, + "ce_orig": 0.6517446041107178, + "epoch": 0.43281328636134875, + "kl_loss": 0.2938510775566101, + "loss_ib": 0.007189631462097168, + "step": 1505 + }, + { + "ce_ib": 4.12835693359375, + "ce_orig": 0.6448106169700623, + "epoch": 0.4331008699403264, + "kl_loss": 0.2133052945137024, + "loss_ib": 0.006261409260332584, + "step": 1506 + }, + { + "ce_ib": 8.605734825134277, + "ce_orig": 1.6096972227096558, + "epoch": 0.4331008699403264, + "kl_loss": 0.2922551929950714, + "loss_ib": 0.011528287082910538, + "step": 1506 + }, + { + "ce_ib": 3.7822954654693604, + "ce_orig": 0.5684179663658142, + "epoch": 0.4331008699403264, + "kl_loss": 0.21701683104038239, + "loss_ib": 0.005952463485300541, + "step": 1506 + }, + { + "ce_ib": 4.113936424255371, + "ce_orig": 0.7411576509475708, + "epoch": 0.4331008699403264, + "kl_loss": 0.41098541021347046, + "loss_ib": 0.008223790675401688, + "step": 1506 + }, + { + "ce_ib": 4.773281574249268, + "ce_orig": 0.8815646171569824, + "epoch": 0.43338845351930405, + "kl_loss": 0.19395877420902252, + "loss_ib": 0.0067128692753612995, + "step": 1507 + }, + { + "ce_ib": 5.464571475982666, + "ce_orig": 0.551234245300293, + "epoch": 0.43338845351930405, + "kl_loss": 0.3368734121322632, + "loss_ib": 0.008833305910229683, + "step": 1507 + }, + { + "ce_ib": 5.097268581390381, + "ce_orig": 0.6974053978919983, + "epoch": 0.43338845351930405, + "kl_loss": 0.21404951810836792, + "loss_ib": 0.007237763609737158, + "step": 1507 + }, + { + "ce_ib": 4.549488067626953, + "ce_orig": 0.6958446502685547, + "epoch": 0.43338845351930405, + "kl_loss": 0.41875457763671875, + "loss_ib": 0.008737033233046532, + "step": 1507 + }, + { + "ce_ib": 7.839320659637451, + "ce_orig": 0.8189214468002319, + "epoch": 0.4336760370982817, + "kl_loss": 0.22917625308036804, + "loss_ib": 0.01013108342885971, + "step": 1508 + }, + { + "ce_ib": 7.603825569152832, + "ce_orig": 1.1187269687652588, + "epoch": 0.4336760370982817, + "kl_loss": 0.24749788641929626, + "loss_ib": 0.010078804567456245, + "step": 1508 + }, + { + "ce_ib": 4.917834758758545, + "ce_orig": 0.6455671787261963, + "epoch": 0.4336760370982817, + "kl_loss": 0.19830983877182007, + "loss_ib": 0.006900932639837265, + "step": 1508 + }, + { + "ce_ib": 6.577511787414551, + "ce_orig": 0.6373150944709778, + "epoch": 0.4336760370982817, + "kl_loss": 0.2596627175807953, + "loss_ib": 0.009174139238893986, + "step": 1508 + }, + { + "ce_ib": 8.961044311523438, + "ce_orig": 1.27748441696167, + "epoch": 0.43396362067725935, + "kl_loss": 0.18702471256256104, + "loss_ib": 0.010831291787326336, + "step": 1509 + }, + { + "ce_ib": 5.431795120239258, + "ce_orig": 0.9966217875480652, + "epoch": 0.43396362067725935, + "kl_loss": 0.2645154595375061, + "loss_ib": 0.00807694997638464, + "step": 1509 + }, + { + "ce_ib": 5.892299652099609, + "ce_orig": 0.9126742482185364, + "epoch": 0.43396362067725935, + "kl_loss": 0.23256003856658936, + "loss_ib": 0.008217900060117245, + "step": 1509 + }, + { + "ce_ib": 5.339905738830566, + "ce_orig": 1.0408644676208496, + "epoch": 0.43396362067725935, + "kl_loss": 0.32789915800094604, + "loss_ib": 0.008618896827101707, + "step": 1509 + }, + { + "epoch": 0.434251204256237, + "grad_norm": 0.11715665459632874, + "learning_rate": 9.662690897861018e-06, + "loss": 0.8719, + "step": 1510 + }, + { + "ce_ib": 8.179683685302734, + "ce_orig": 0.557664692401886, + "epoch": 0.434251204256237, + "kl_loss": 0.23708882927894592, + "loss_ib": 0.010550571605563164, + "step": 1510 + }, + { + "ce_ib": 4.347968101501465, + "ce_orig": 0.7091971039772034, + "epoch": 0.434251204256237, + "kl_loss": 0.16048789024353027, + "loss_ib": 0.005952846724539995, + "step": 1510 + }, + { + "ce_ib": 9.547073364257812, + "ce_orig": 1.5443179607391357, + "epoch": 0.434251204256237, + "kl_loss": 0.27068954706192017, + "loss_ib": 0.012253968045115471, + "step": 1510 + }, + { + "ce_ib": 5.236355304718018, + "ce_orig": 0.6377555131912231, + "epoch": 0.434251204256237, + "kl_loss": 0.3052249848842621, + "loss_ib": 0.008288605138659477, + "step": 1510 + }, + { + "ce_ib": 4.119098663330078, + "ce_orig": 0.2889801859855652, + "epoch": 0.4345387878352146, + "kl_loss": 0.48718804121017456, + "loss_ib": 0.008990978822112083, + "step": 1511 + }, + { + "ce_ib": 6.019423484802246, + "ce_orig": 0.8404080271720886, + "epoch": 0.4345387878352146, + "kl_loss": 0.28162747621536255, + "loss_ib": 0.00883569847792387, + "step": 1511 + }, + { + "ce_ib": 7.011948585510254, + "ce_orig": 0.7254608869552612, + "epoch": 0.4345387878352146, + "kl_loss": 0.18887865543365479, + "loss_ib": 0.008900735527276993, + "step": 1511 + }, + { + "ce_ib": 9.401799201965332, + "ce_orig": 1.7815628051757812, + "epoch": 0.4345387878352146, + "kl_loss": 0.2903178930282593, + "loss_ib": 0.01230497844517231, + "step": 1511 + }, + { + "ce_ib": 10.27991008758545, + "ce_orig": 1.76600182056427, + "epoch": 0.4348263714141922, + "kl_loss": 0.482980340719223, + "loss_ib": 0.01510971412062645, + "step": 1512 + }, + { + "ce_ib": 5.170495986938477, + "ce_orig": 0.4687999486923218, + "epoch": 0.4348263714141922, + "kl_loss": 0.21127310395240784, + "loss_ib": 0.007283227052539587, + "step": 1512 + }, + { + "ce_ib": 7.678666591644287, + "ce_orig": 0.8942811489105225, + "epoch": 0.4348263714141922, + "kl_loss": 0.28342896699905396, + "loss_ib": 0.01051295641809702, + "step": 1512 + }, + { + "ce_ib": 9.15030574798584, + "ce_orig": 1.1176377534866333, + "epoch": 0.4348263714141922, + "kl_loss": 0.18655428290367126, + "loss_ib": 0.011015849187970161, + "step": 1512 + }, + { + "ce_ib": 4.875361442565918, + "ce_orig": 0.7338507175445557, + "epoch": 0.4351139549931699, + "kl_loss": 0.19533416628837585, + "loss_ib": 0.006828702986240387, + "step": 1513 + }, + { + "ce_ib": 3.502323627471924, + "ce_orig": 0.6863707304000854, + "epoch": 0.4351139549931699, + "kl_loss": 0.1371149718761444, + "loss_ib": 0.004873473197221756, + "step": 1513 + }, + { + "ce_ib": 6.512062072753906, + "ce_orig": 1.264266848564148, + "epoch": 0.4351139549931699, + "kl_loss": 0.2282438576221466, + "loss_ib": 0.00879450049251318, + "step": 1513 + }, + { + "ce_ib": 13.425664901733398, + "ce_orig": 2.067333698272705, + "epoch": 0.4351139549931699, + "kl_loss": 0.741570234298706, + "loss_ib": 0.020841367542743683, + "step": 1513 + }, + { + "ce_ib": 8.506553649902344, + "ce_orig": 1.420841097831726, + "epoch": 0.43540153857214753, + "kl_loss": 0.230472594499588, + "loss_ib": 0.010811279527842999, + "step": 1514 + }, + { + "ce_ib": 6.533507347106934, + "ce_orig": 0.893858790397644, + "epoch": 0.43540153857214753, + "kl_loss": 0.29412978887557983, + "loss_ib": 0.009474805556237698, + "step": 1514 + }, + { + "ce_ib": 8.280712127685547, + "ce_orig": 1.3050899505615234, + "epoch": 0.43540153857214753, + "kl_loss": 0.30212193727493286, + "loss_ib": 0.01130193192511797, + "step": 1514 + }, + { + "ce_ib": 5.96417236328125, + "ce_orig": 0.8574302792549133, + "epoch": 0.43540153857214753, + "kl_loss": 0.28048038482666016, + "loss_ib": 0.008768975734710693, + "step": 1514 + }, + { + "epoch": 0.43568912215112515, + "grad_norm": 0.13548529148101807, + "learning_rate": 9.659883060879333e-06, + "loss": 0.9358, + "step": 1515 + }, + { + "ce_ib": 3.8507282733917236, + "ce_orig": 0.8658118844032288, + "epoch": 0.43568912215112515, + "kl_loss": 0.17177735269069672, + "loss_ib": 0.005568502005189657, + "step": 1515 + }, + { + "ce_ib": 3.168516159057617, + "ce_orig": 0.40062665939331055, + "epoch": 0.43568912215112515, + "kl_loss": 0.4463632106781006, + "loss_ib": 0.007632147986441851, + "step": 1515 + }, + { + "ce_ib": 7.129504680633545, + "ce_orig": 0.9099282026290894, + "epoch": 0.43568912215112515, + "kl_loss": 0.1943160444498062, + "loss_ib": 0.009072665125131607, + "step": 1515 + }, + { + "ce_ib": 3.711723804473877, + "ce_orig": 0.6062166094779968, + "epoch": 0.43568912215112515, + "kl_loss": 0.17591118812561035, + "loss_ib": 0.005470836069434881, + "step": 1515 + }, + { + "ce_ib": 4.956431865692139, + "ce_orig": 0.8364520072937012, + "epoch": 0.43597670573010283, + "kl_loss": 0.2664000689983368, + "loss_ib": 0.007620432414114475, + "step": 1516 + }, + { + "ce_ib": 10.05667495727539, + "ce_orig": 1.1949691772460938, + "epoch": 0.43597670573010283, + "kl_loss": 0.24516168236732483, + "loss_ib": 0.012508291751146317, + "step": 1516 + }, + { + "ce_ib": 6.448936939239502, + "ce_orig": 0.7740538120269775, + "epoch": 0.43597670573010283, + "kl_loss": 0.336667537689209, + "loss_ib": 0.009815611876547337, + "step": 1516 + }, + { + "ce_ib": 3.7940187454223633, + "ce_orig": 1.0476545095443726, + "epoch": 0.43597670573010283, + "kl_loss": 0.499667763710022, + "loss_ib": 0.00879069697111845, + "step": 1516 + }, + { + "ce_ib": 4.319472789764404, + "ce_orig": 0.6066058874130249, + "epoch": 0.43626428930908046, + "kl_loss": 0.23717695474624634, + "loss_ib": 0.006691242102533579, + "step": 1517 + }, + { + "ce_ib": 6.696260452270508, + "ce_orig": 0.9695017337799072, + "epoch": 0.43626428930908046, + "kl_loss": 0.25179165601730347, + "loss_ib": 0.009214176796376705, + "step": 1517 + }, + { + "ce_ib": 8.007752418518066, + "ce_orig": 1.0904877185821533, + "epoch": 0.43626428930908046, + "kl_loss": 0.3288235068321228, + "loss_ib": 0.011295987293124199, + "step": 1517 + }, + { + "ce_ib": 6.769944190979004, + "ce_orig": 1.0057461261749268, + "epoch": 0.43626428930908046, + "kl_loss": 0.2518278956413269, + "loss_ib": 0.009288223460316658, + "step": 1517 + }, + { + "ce_ib": 4.416586399078369, + "ce_orig": 0.5639540553092957, + "epoch": 0.4365518728880581, + "kl_loss": 0.31698429584503174, + "loss_ib": 0.0075864288955926895, + "step": 1518 + }, + { + "ce_ib": 5.002842426300049, + "ce_orig": 0.6733067035675049, + "epoch": 0.4365518728880581, + "kl_loss": 0.26536762714385986, + "loss_ib": 0.0076565188355743885, + "step": 1518 + }, + { + "ce_ib": 10.548088073730469, + "ce_orig": 1.861885666847229, + "epoch": 0.4365518728880581, + "kl_loss": 0.3233809471130371, + "loss_ib": 0.013781897723674774, + "step": 1518 + }, + { + "ce_ib": 3.5734047889709473, + "ce_orig": 0.466007798910141, + "epoch": 0.4365518728880581, + "kl_loss": 0.5387842655181885, + "loss_ib": 0.008961247280240059, + "step": 1518 + }, + { + "ce_ib": 11.615768432617188, + "ce_orig": 2.115899085998535, + "epoch": 0.43683945646703576, + "kl_loss": 0.3307480812072754, + "loss_ib": 0.014923249371349812, + "step": 1519 + }, + { + "ce_ib": 5.898049354553223, + "ce_orig": 0.5681475400924683, + "epoch": 0.43683945646703576, + "kl_loss": 0.20464731752872467, + "loss_ib": 0.007944522425532341, + "step": 1519 + }, + { + "ce_ib": 4.767960548400879, + "ce_orig": 0.3993023931980133, + "epoch": 0.43683945646703576, + "kl_loss": 0.21171991527080536, + "loss_ib": 0.00688515929505229, + "step": 1519 + }, + { + "ce_ib": 5.281295299530029, + "ce_orig": 0.7131248712539673, + "epoch": 0.43683945646703576, + "kl_loss": 0.35403138399124146, + "loss_ib": 0.008821608498692513, + "step": 1519 + }, + { + "epoch": 0.4371270400460134, + "grad_norm": 0.09799163043498993, + "learning_rate": 9.65706399715777e-06, + "loss": 0.8888, + "step": 1520 + }, + { + "ce_ib": 7.551825046539307, + "ce_orig": 1.2064350843429565, + "epoch": 0.4371270400460134, + "kl_loss": 0.232176274061203, + "loss_ib": 0.009873587638139725, + "step": 1520 + }, + { + "ce_ib": 9.467873573303223, + "ce_orig": 1.4917707443237305, + "epoch": 0.4371270400460134, + "kl_loss": 0.23060013353824615, + "loss_ib": 0.011773874051868916, + "step": 1520 + }, + { + "ce_ib": 4.483201026916504, + "ce_orig": 0.5693926215171814, + "epoch": 0.4371270400460134, + "kl_loss": 0.2563168406486511, + "loss_ib": 0.00704636937007308, + "step": 1520 + }, + { + "ce_ib": 6.398351669311523, + "ce_orig": 0.8779327869415283, + "epoch": 0.4371270400460134, + "kl_loss": 0.23980334401130676, + "loss_ib": 0.008796385489404202, + "step": 1520 + }, + { + "ce_ib": 5.1323018074035645, + "ce_orig": 0.7175425291061401, + "epoch": 0.437414623624991, + "kl_loss": 0.18345916271209717, + "loss_ib": 0.006966893561184406, + "step": 1521 + }, + { + "ce_ib": 7.529026985168457, + "ce_orig": 1.5794041156768799, + "epoch": 0.437414623624991, + "kl_loss": 0.17436310648918152, + "loss_ib": 0.009272657334804535, + "step": 1521 + }, + { + "ce_ib": 10.308955192565918, + "ce_orig": 0.9079710841178894, + "epoch": 0.437414623624991, + "kl_loss": 0.23323319852352142, + "loss_ib": 0.01264128740876913, + "step": 1521 + }, + { + "ce_ib": 7.019486427307129, + "ce_orig": 1.42014741897583, + "epoch": 0.437414623624991, + "kl_loss": 0.19986101984977722, + "loss_ib": 0.009018097072839737, + "step": 1521 + }, + { + "ce_ib": 8.882169723510742, + "ce_orig": 1.5748227834701538, + "epoch": 0.43770220720396863, + "kl_loss": 0.2393156886100769, + "loss_ib": 0.01127532683312893, + "step": 1522 + }, + { + "ce_ib": 6.86137580871582, + "ce_orig": 0.8214037418365479, + "epoch": 0.43770220720396863, + "kl_loss": 0.21534022688865662, + "loss_ib": 0.009014777839183807, + "step": 1522 + }, + { + "ce_ib": 4.2742085456848145, + "ce_orig": 0.781179666519165, + "epoch": 0.43770220720396863, + "kl_loss": 0.2706628441810608, + "loss_ib": 0.006980836857110262, + "step": 1522 + }, + { + "ce_ib": 4.317927837371826, + "ce_orig": 0.4462431073188782, + "epoch": 0.43770220720396863, + "kl_loss": 0.26821720600128174, + "loss_ib": 0.007000099867582321, + "step": 1522 + }, + { + "ce_ib": 9.145509719848633, + "ce_orig": 1.3221925497055054, + "epoch": 0.4379897907829463, + "kl_loss": 0.1444806456565857, + "loss_ib": 0.01059031579643488, + "step": 1523 + }, + { + "ce_ib": 6.61660099029541, + "ce_orig": 1.1076503992080688, + "epoch": 0.4379897907829463, + "kl_loss": 0.2389574646949768, + "loss_ib": 0.009006176143884659, + "step": 1523 + }, + { + "ce_ib": 9.713702201843262, + "ce_orig": 1.8915526866912842, + "epoch": 0.4379897907829463, + "kl_loss": 0.7463563680648804, + "loss_ib": 0.017177265137434006, + "step": 1523 + }, + { + "ce_ib": 5.233415603637695, + "ce_orig": 0.8730946779251099, + "epoch": 0.4379897907829463, + "kl_loss": 0.24787402153015137, + "loss_ib": 0.007712156046181917, + "step": 1523 + }, + { + "ce_ib": 7.0577850341796875, + "ce_orig": 0.5874288082122803, + "epoch": 0.43827737436192393, + "kl_loss": 0.3205152153968811, + "loss_ib": 0.010262937285006046, + "step": 1524 + }, + { + "ce_ib": 3.819340467453003, + "ce_orig": 0.6037487387657166, + "epoch": 0.43827737436192393, + "kl_loss": 0.16586077213287354, + "loss_ib": 0.005477948114275932, + "step": 1524 + }, + { + "ce_ib": 7.668882846832275, + "ce_orig": 0.8537070155143738, + "epoch": 0.43827737436192393, + "kl_loss": 0.2507067024707794, + "loss_ib": 0.010175949893891811, + "step": 1524 + }, + { + "ce_ib": 5.600090980529785, + "ce_orig": 1.1724077463150024, + "epoch": 0.43827737436192393, + "kl_loss": 0.20900413393974304, + "loss_ib": 0.007690132595598698, + "step": 1524 + }, + { + "epoch": 0.43856495794090156, + "grad_norm": 0.09972728043794632, + "learning_rate": 9.654233713488112e-06, + "loss": 0.8899, + "step": 1525 + }, + { + "ce_ib": 6.1174798011779785, + "ce_orig": 1.08661949634552, + "epoch": 0.43856495794090156, + "kl_loss": 0.22504150867462158, + "loss_ib": 0.008367895148694515, + "step": 1525 + }, + { + "ce_ib": 3.9706997871398926, + "ce_orig": 0.5374826192855835, + "epoch": 0.43856495794090156, + "kl_loss": 0.18511299788951874, + "loss_ib": 0.005821830127388239, + "step": 1525 + }, + { + "ce_ib": 6.025529384613037, + "ce_orig": 0.9799771308898926, + "epoch": 0.43856495794090156, + "kl_loss": 0.25164347887039185, + "loss_ib": 0.008541963994503021, + "step": 1525 + }, + { + "ce_ib": 7.1165313720703125, + "ce_orig": 0.6115043759346008, + "epoch": 0.43856495794090156, + "kl_loss": 0.25530850887298584, + "loss_ib": 0.00966961681842804, + "step": 1525 + }, + { + "ce_ib": 5.062288284301758, + "ce_orig": 0.6672273874282837, + "epoch": 0.43885254151987924, + "kl_loss": 0.19899994134902954, + "loss_ib": 0.007052287925034761, + "step": 1526 + }, + { + "ce_ib": 7.357656478881836, + "ce_orig": 1.3270339965820312, + "epoch": 0.43885254151987924, + "kl_loss": 0.25842565298080444, + "loss_ib": 0.00994191225618124, + "step": 1526 + }, + { + "ce_ib": 7.756661891937256, + "ce_orig": 1.2379779815673828, + "epoch": 0.43885254151987924, + "kl_loss": 0.22414611279964447, + "loss_ib": 0.009998123161494732, + "step": 1526 + }, + { + "ce_ib": 3.338984251022339, + "ce_orig": 0.5923484563827515, + "epoch": 0.43885254151987924, + "kl_loss": 0.2646840810775757, + "loss_ib": 0.005985824856907129, + "step": 1526 + }, + { + "ce_ib": 3.647657632827759, + "ce_orig": 0.34279149770736694, + "epoch": 0.43914012509885686, + "kl_loss": 0.194602370262146, + "loss_ib": 0.005593681242316961, + "step": 1527 + }, + { + "ce_ib": 10.869415283203125, + "ce_orig": 1.901925802230835, + "epoch": 0.43914012509885686, + "kl_loss": 0.326307475566864, + "loss_ib": 0.014132489450275898, + "step": 1527 + }, + { + "ce_ib": 5.827927589416504, + "ce_orig": 0.7578860521316528, + "epoch": 0.43914012509885686, + "kl_loss": 0.25722992420196533, + "loss_ib": 0.008400226943194866, + "step": 1527 + }, + { + "ce_ib": 6.778678894042969, + "ce_orig": 1.1516461372375488, + "epoch": 0.43914012509885686, + "kl_loss": 0.28379058837890625, + "loss_ib": 0.00961658451706171, + "step": 1527 + }, + { + "ce_ib": 6.970860958099365, + "ce_orig": 0.8692914843559265, + "epoch": 0.4394277086778345, + "kl_loss": 0.29207298159599304, + "loss_ib": 0.009891591034829617, + "step": 1528 + }, + { + "ce_ib": 4.707976341247559, + "ce_orig": 0.4135763645172119, + "epoch": 0.4394277086778345, + "kl_loss": 0.22886447608470917, + "loss_ib": 0.006996620912104845, + "step": 1528 + }, + { + "ce_ib": 7.193721294403076, + "ce_orig": 0.8729004859924316, + "epoch": 0.4394277086778345, + "kl_loss": 0.20061442255973816, + "loss_ib": 0.009199866093695164, + "step": 1528 + }, + { + "ce_ib": 7.849767208099365, + "ce_orig": 1.0315536260604858, + "epoch": 0.4394277086778345, + "kl_loss": 0.27790358662605286, + "loss_ib": 0.010628802701830864, + "step": 1528 + }, + { + "ce_ib": 7.09470796585083, + "ce_orig": 1.4208656549453735, + "epoch": 0.43971529225681216, + "kl_loss": 0.3060705065727234, + "loss_ib": 0.010155413299798965, + "step": 1529 + }, + { + "ce_ib": 4.538375377655029, + "ce_orig": 0.6493577361106873, + "epoch": 0.43971529225681216, + "kl_loss": 0.3036617040634155, + "loss_ib": 0.007574991788715124, + "step": 1529 + }, + { + "ce_ib": 6.309713840484619, + "ce_orig": 0.5263689160346985, + "epoch": 0.43971529225681216, + "kl_loss": 0.3667697310447693, + "loss_ib": 0.009977410547435284, + "step": 1529 + }, + { + "ce_ib": 4.808983325958252, + "ce_orig": 0.5394362211227417, + "epoch": 0.43971529225681216, + "kl_loss": 0.20775997638702393, + "loss_ib": 0.006886583287268877, + "step": 1529 + }, + { + "epoch": 0.4400028758357898, + "grad_norm": 0.09744717180728912, + "learning_rate": 9.651392216689167e-06, + "loss": 0.8832, + "step": 1530 + }, + { + "ce_ib": 5.440213680267334, + "ce_orig": 0.5668556690216064, + "epoch": 0.4400028758357898, + "kl_loss": 0.2607230246067047, + "loss_ib": 0.008047443814575672, + "step": 1530 + }, + { + "ce_ib": 7.057228088378906, + "ce_orig": 0.7402814626693726, + "epoch": 0.4400028758357898, + "kl_loss": 0.1861785650253296, + "loss_ib": 0.008919013664126396, + "step": 1530 + }, + { + "ce_ib": 3.8372819423675537, + "ce_orig": 0.6064683794975281, + "epoch": 0.4400028758357898, + "kl_loss": 0.22002679109573364, + "loss_ib": 0.006037550047039986, + "step": 1530 + }, + { + "ce_ib": 5.222181797027588, + "ce_orig": 0.6531853079795837, + "epoch": 0.4400028758357898, + "kl_loss": 0.2058596909046173, + "loss_ib": 0.007280778605490923, + "step": 1530 + }, + { + "ce_ib": 6.428709030151367, + "ce_orig": 1.0921529531478882, + "epoch": 0.4402904594147674, + "kl_loss": 0.24581462144851685, + "loss_ib": 0.008886855095624924, + "step": 1531 + }, + { + "ce_ib": 6.755316257476807, + "ce_orig": 1.2458606958389282, + "epoch": 0.4402904594147674, + "kl_loss": 0.5204867720603943, + "loss_ib": 0.011960184201598167, + "step": 1531 + }, + { + "ce_ib": 2.8809092044830322, + "ce_orig": 0.4281614422798157, + "epoch": 0.4402904594147674, + "kl_loss": 0.2708930969238281, + "loss_ib": 0.005589840468019247, + "step": 1531 + }, + { + "ce_ib": 3.588864803314209, + "ce_orig": 0.5742392539978027, + "epoch": 0.4402904594147674, + "kl_loss": 0.25775015354156494, + "loss_ib": 0.006166366394609213, + "step": 1531 + }, + { + "ce_ib": 6.447246551513672, + "ce_orig": 1.0808292627334595, + "epoch": 0.44057804299374503, + "kl_loss": 0.1584368497133255, + "loss_ib": 0.008031615056097507, + "step": 1532 + }, + { + "ce_ib": 6.346353054046631, + "ce_orig": 1.025672197341919, + "epoch": 0.44057804299374503, + "kl_loss": 0.2612716257572174, + "loss_ib": 0.008959068916738033, + "step": 1532 + }, + { + "ce_ib": 6.36782693862915, + "ce_orig": 1.040495753288269, + "epoch": 0.44057804299374503, + "kl_loss": 0.23167358338832855, + "loss_ib": 0.008684562519192696, + "step": 1532 + }, + { + "ce_ib": 4.488460063934326, + "ce_orig": 0.4259486794471741, + "epoch": 0.44057804299374503, + "kl_loss": 0.2865976095199585, + "loss_ib": 0.0073544359765946865, + "step": 1532 + }, + { + "ce_ib": 4.456282138824463, + "ce_orig": 0.7206268906593323, + "epoch": 0.4408656265727227, + "kl_loss": 0.2152857780456543, + "loss_ib": 0.0066091399639844894, + "step": 1533 + }, + { + "ce_ib": 4.3911590576171875, + "ce_orig": 0.6018010377883911, + "epoch": 0.4408656265727227, + "kl_loss": 0.46944814920425415, + "loss_ib": 0.00908564031124115, + "step": 1533 + }, + { + "ce_ib": 4.210399150848389, + "ce_orig": 0.4141979217529297, + "epoch": 0.4408656265727227, + "kl_loss": 0.16251495480537415, + "loss_ib": 0.005835548508912325, + "step": 1533 + }, + { + "ce_ib": 7.888265132904053, + "ce_orig": 0.6826446056365967, + "epoch": 0.4408656265727227, + "kl_loss": 0.23286780714988708, + "loss_ib": 0.010216942988336086, + "step": 1533 + }, + { + "ce_ib": 7.644165992736816, + "ce_orig": 1.4100502729415894, + "epoch": 0.44115321015170034, + "kl_loss": 0.23425854742527008, + "loss_ib": 0.009986751712858677, + "step": 1534 + }, + { + "ce_ib": 3.7016208171844482, + "ce_orig": 0.6204091310501099, + "epoch": 0.44115321015170034, + "kl_loss": 0.18072494864463806, + "loss_ib": 0.005508870352059603, + "step": 1534 + }, + { + "ce_ib": 5.5186052322387695, + "ce_orig": 0.5334498286247253, + "epoch": 0.44115321015170034, + "kl_loss": 0.33088797330856323, + "loss_ib": 0.008827484212815762, + "step": 1534 + }, + { + "ce_ib": 4.205150604248047, + "ce_orig": 0.9227320551872253, + "epoch": 0.44115321015170034, + "kl_loss": 0.147797092795372, + "loss_ib": 0.005683121737092733, + "step": 1534 + }, + { + "epoch": 0.44144079373067796, + "grad_norm": 0.12952423095703125, + "learning_rate": 9.64853951360676e-06, + "loss": 0.8526, + "step": 1535 + }, + { + "ce_ib": 6.74899959564209, + "ce_orig": 1.1357985734939575, + "epoch": 0.44144079373067796, + "kl_loss": 0.2118733525276184, + "loss_ib": 0.008867733180522919, + "step": 1535 + }, + { + "ce_ib": 3.9933173656463623, + "ce_orig": 0.38218119740486145, + "epoch": 0.44144079373067796, + "kl_loss": 0.27774354815483093, + "loss_ib": 0.006770752370357513, + "step": 1535 + }, + { + "ce_ib": 5.220664024353027, + "ce_orig": 0.4890460968017578, + "epoch": 0.44144079373067796, + "kl_loss": 0.23115640878677368, + "loss_ib": 0.007532228250056505, + "step": 1535 + }, + { + "ce_ib": 8.40595531463623, + "ce_orig": 1.3035410642623901, + "epoch": 0.44144079373067796, + "kl_loss": 0.2943679690361023, + "loss_ib": 0.011349635198712349, + "step": 1535 + }, + { + "ce_ib": 6.669676303863525, + "ce_orig": 0.8286333084106445, + "epoch": 0.44172837730965564, + "kl_loss": 0.2603955864906311, + "loss_ib": 0.009273631498217583, + "step": 1536 + }, + { + "ce_ib": 3.091402292251587, + "ce_orig": 0.6263300776481628, + "epoch": 0.44172837730965564, + "kl_loss": 0.21528851985931396, + "loss_ib": 0.005244287196546793, + "step": 1536 + }, + { + "ce_ib": 5.090882778167725, + "ce_orig": 0.784512460231781, + "epoch": 0.44172837730965564, + "kl_loss": 0.16377821564674377, + "loss_ib": 0.006728664506226778, + "step": 1536 + }, + { + "ce_ib": 9.245505332946777, + "ce_orig": 1.5032682418823242, + "epoch": 0.44172837730965564, + "kl_loss": 0.23776906728744507, + "loss_ib": 0.011623196303844452, + "step": 1536 + }, + { + "ce_ib": 7.090968132019043, + "ce_orig": 1.1932905912399292, + "epoch": 0.44201596088863326, + "kl_loss": 0.2389073371887207, + "loss_ib": 0.009480041451752186, + "step": 1537 + }, + { + "ce_ib": 7.811031818389893, + "ce_orig": 1.1800780296325684, + "epoch": 0.44201596088863326, + "kl_loss": 0.2227729856967926, + "loss_ib": 0.010038761422038078, + "step": 1537 + }, + { + "ce_ib": 6.59114933013916, + "ce_orig": 1.033427119255066, + "epoch": 0.44201596088863326, + "kl_loss": 0.2936612069606781, + "loss_ib": 0.009527761489152908, + "step": 1537 + }, + { + "ce_ib": 4.836669921875, + "ce_orig": 0.5264076590538025, + "epoch": 0.44201596088863326, + "kl_loss": 0.17789442837238312, + "loss_ib": 0.006615614052861929, + "step": 1537 + }, + { + "ce_ib": 10.921903610229492, + "ce_orig": 1.932668924331665, + "epoch": 0.4423035444676109, + "kl_loss": 0.23033437132835388, + "loss_ib": 0.013225247152149677, + "step": 1538 + }, + { + "ce_ib": 5.171535968780518, + "ce_orig": 0.7136121988296509, + "epoch": 0.4423035444676109, + "kl_loss": 0.1687464714050293, + "loss_ib": 0.006859000772237778, + "step": 1538 + }, + { + "ce_ib": 5.190009593963623, + "ce_orig": 0.5568004846572876, + "epoch": 0.4423035444676109, + "kl_loss": 0.26972246170043945, + "loss_ib": 0.007887233980000019, + "step": 1538 + }, + { + "ce_ib": 8.552884101867676, + "ce_orig": 0.9528330564498901, + "epoch": 0.4423035444676109, + "kl_loss": 0.3009048402309418, + "loss_ib": 0.011561932042241096, + "step": 1538 + }, + { + "ce_ib": 4.198854446411133, + "ce_orig": 0.8415638208389282, + "epoch": 0.44259112804658857, + "kl_loss": 0.22010204195976257, + "loss_ib": 0.006399874575436115, + "step": 1539 + }, + { + "ce_ib": 9.696503639221191, + "ce_orig": 1.5634467601776123, + "epoch": 0.44259112804658857, + "kl_loss": 0.19891905784606934, + "loss_ib": 0.011685694567859173, + "step": 1539 + }, + { + "ce_ib": 4.511733531951904, + "ce_orig": 0.8286433219909668, + "epoch": 0.44259112804658857, + "kl_loss": 0.16388346254825592, + "loss_ib": 0.006150567904114723, + "step": 1539 + }, + { + "ce_ib": 6.678924083709717, + "ce_orig": 0.9080381393432617, + "epoch": 0.44259112804658857, + "kl_loss": 0.16212314367294312, + "loss_ib": 0.008300155401229858, + "step": 1539 + }, + { + "epoch": 0.4428787116255662, + "grad_norm": 0.12597346305847168, + "learning_rate": 9.645675611113715e-06, + "loss": 0.8547, + "step": 1540 + }, + { + "ce_ib": 5.553781986236572, + "ce_orig": 0.9590039849281311, + "epoch": 0.4428787116255662, + "kl_loss": 0.19984352588653564, + "loss_ib": 0.007552217226475477, + "step": 1540 + }, + { + "ce_ib": 7.916083335876465, + "ce_orig": 1.2729321718215942, + "epoch": 0.4428787116255662, + "kl_loss": 0.16869348287582397, + "loss_ib": 0.009603017941117287, + "step": 1540 + }, + { + "ce_ib": 6.0101399421691895, + "ce_orig": 0.8366454243659973, + "epoch": 0.4428787116255662, + "kl_loss": 0.2121955305337906, + "loss_ib": 0.008132095448672771, + "step": 1540 + }, + { + "ce_ib": 5.466952323913574, + "ce_orig": 0.7045579552650452, + "epoch": 0.4428787116255662, + "kl_loss": 0.33795952796936035, + "loss_ib": 0.008846547454595566, + "step": 1540 + }, + { + "ce_ib": 7.9375739097595215, + "ce_orig": 1.2572914361953735, + "epoch": 0.4431662952045438, + "kl_loss": 0.25002428889274597, + "loss_ib": 0.010437816381454468, + "step": 1541 + }, + { + "ce_ib": 4.4934234619140625, + "ce_orig": 0.39058825373649597, + "epoch": 0.4431662952045438, + "kl_loss": 0.25045621395111084, + "loss_ib": 0.006997985765337944, + "step": 1541 + }, + { + "ce_ib": 6.903958320617676, + "ce_orig": 0.5183854103088379, + "epoch": 0.4431662952045438, + "kl_loss": 0.31575220823287964, + "loss_ib": 0.010061481036245823, + "step": 1541 + }, + { + "ce_ib": 3.8567583560943604, + "ce_orig": 0.8497815728187561, + "epoch": 0.4431662952045438, + "kl_loss": 0.16291774809360504, + "loss_ib": 0.005485935602337122, + "step": 1541 + }, + { + "ce_ib": 3.3252243995666504, + "ce_orig": 0.605065107345581, + "epoch": 0.44345387878352144, + "kl_loss": 0.2954948842525482, + "loss_ib": 0.00628017308190465, + "step": 1542 + }, + { + "ce_ib": 6.021589756011963, + "ce_orig": 1.0220741033554077, + "epoch": 0.44345387878352144, + "kl_loss": 0.250985711812973, + "loss_ib": 0.008531446568667889, + "step": 1542 + }, + { + "ce_ib": 5.84227180480957, + "ce_orig": 0.8054718971252441, + "epoch": 0.44345387878352144, + "kl_loss": 0.2861500382423401, + "loss_ib": 0.008703771978616714, + "step": 1542 + }, + { + "ce_ib": 8.967488288879395, + "ce_orig": 1.5930452346801758, + "epoch": 0.44345387878352144, + "kl_loss": 0.23124566674232483, + "loss_ib": 0.011279945261776447, + "step": 1542 + }, + { + "ce_ib": 6.608915328979492, + "ce_orig": 0.62285315990448, + "epoch": 0.4437414623624991, + "kl_loss": 0.29133331775665283, + "loss_ib": 0.009522248059511185, + "step": 1543 + }, + { + "ce_ib": 3.287733793258667, + "ce_orig": 0.7592197060585022, + "epoch": 0.4437414623624991, + "kl_loss": 0.17416146397590637, + "loss_ib": 0.005029348190873861, + "step": 1543 + }, + { + "ce_ib": 4.219071388244629, + "ce_orig": 0.7607125043869019, + "epoch": 0.4437414623624991, + "kl_loss": 0.20168092846870422, + "loss_ib": 0.0062358807772397995, + "step": 1543 + }, + { + "ce_ib": 2.911717414855957, + "ce_orig": 0.3957778811454773, + "epoch": 0.4437414623624991, + "kl_loss": 0.28831154108047485, + "loss_ib": 0.005794832482933998, + "step": 1543 + }, + { + "ce_ib": 5.865091800689697, + "ce_orig": 0.7722824811935425, + "epoch": 0.44402904594147674, + "kl_loss": 0.24601982533931732, + "loss_ib": 0.008325289934873581, + "step": 1544 + }, + { + "ce_ib": 5.063828945159912, + "ce_orig": 0.706646740436554, + "epoch": 0.44402904594147674, + "kl_loss": 0.22632688283920288, + "loss_ib": 0.0073270974680781364, + "step": 1544 + }, + { + "ce_ib": 7.057989597320557, + "ce_orig": 1.2739616632461548, + "epoch": 0.44402904594147674, + "kl_loss": 0.13553673028945923, + "loss_ib": 0.008413356728851795, + "step": 1544 + }, + { + "ce_ib": 6.612596035003662, + "ce_orig": 1.0941510200500488, + "epoch": 0.44402904594147674, + "kl_loss": 0.33927834033966064, + "loss_ib": 0.010005378164350986, + "step": 1544 + }, + { + "epoch": 0.44431662952045436, + "grad_norm": 0.11126173287630081, + "learning_rate": 9.642800516109842e-06, + "loss": 0.9154, + "step": 1545 + }, + { + "ce_ib": 6.68749475479126, + "ce_orig": 1.1780781745910645, + "epoch": 0.44431662952045436, + "kl_loss": 0.2303389608860016, + "loss_ib": 0.008990884758532047, + "step": 1545 + }, + { + "ce_ib": 8.691813468933105, + "ce_orig": 1.0570861101150513, + "epoch": 0.44431662952045436, + "kl_loss": 0.23569072782993317, + "loss_ib": 0.01104872114956379, + "step": 1545 + }, + { + "ce_ib": 7.5655598640441895, + "ce_orig": 0.8942456841468811, + "epoch": 0.44431662952045436, + "kl_loss": 0.19266939163208008, + "loss_ib": 0.009492253884673119, + "step": 1545 + }, + { + "ce_ib": 4.0522966384887695, + "ce_orig": 0.8112623691558838, + "epoch": 0.44431662952045436, + "kl_loss": 0.1970742642879486, + "loss_ib": 0.006023039110004902, + "step": 1545 + }, + { + "ce_ib": 9.083720207214355, + "ce_orig": 1.3299872875213623, + "epoch": 0.44460421309943204, + "kl_loss": 0.7850167751312256, + "loss_ib": 0.01693388819694519, + "step": 1546 + }, + { + "ce_ib": 6.961404323577881, + "ce_orig": 1.0265910625457764, + "epoch": 0.44460421309943204, + "kl_loss": 0.22739318013191223, + "loss_ib": 0.009235336445271969, + "step": 1546 + }, + { + "ce_ib": 6.790365219116211, + "ce_orig": 0.7861922383308411, + "epoch": 0.44460421309943204, + "kl_loss": 0.2910609245300293, + "loss_ib": 0.009700974449515343, + "step": 1546 + }, + { + "ce_ib": 9.423171043395996, + "ce_orig": 1.499626874923706, + "epoch": 0.44460421309943204, + "kl_loss": 0.2499282956123352, + "loss_ib": 0.011922454461455345, + "step": 1546 + }, + { + "ce_ib": 5.589235782623291, + "ce_orig": 0.6277043223381042, + "epoch": 0.44489179667840967, + "kl_loss": 0.275199294090271, + "loss_ib": 0.00834122858941555, + "step": 1547 + }, + { + "ce_ib": 3.979626178741455, + "ce_orig": 0.5686247944831848, + "epoch": 0.44489179667840967, + "kl_loss": 0.1741591989994049, + "loss_ib": 0.005721218418329954, + "step": 1547 + }, + { + "ce_ib": 8.743000030517578, + "ce_orig": 0.9185367226600647, + "epoch": 0.44489179667840967, + "kl_loss": 0.19029943645000458, + "loss_ib": 0.010645993985235691, + "step": 1547 + }, + { + "ce_ib": 3.9479920864105225, + "ce_orig": 0.4463140666484833, + "epoch": 0.44489179667840967, + "kl_loss": 0.1770147979259491, + "loss_ib": 0.005718139931559563, + "step": 1547 + }, + { + "ce_ib": 4.838682651519775, + "ce_orig": 0.7097772359848022, + "epoch": 0.4451793802573873, + "kl_loss": 0.13939312100410461, + "loss_ib": 0.006232613697648048, + "step": 1548 + }, + { + "ce_ib": 4.897730827331543, + "ce_orig": 0.8895747065544128, + "epoch": 0.4451793802573873, + "kl_loss": 0.22099938988685608, + "loss_ib": 0.007107724901288748, + "step": 1548 + }, + { + "ce_ib": 4.8738508224487305, + "ce_orig": 0.7790616750717163, + "epoch": 0.4451793802573873, + "kl_loss": 0.433513343334198, + "loss_ib": 0.00920898374170065, + "step": 1548 + }, + { + "ce_ib": 6.785175800323486, + "ce_orig": 1.1384638547897339, + "epoch": 0.4451793802573873, + "kl_loss": 0.2551881968975067, + "loss_ib": 0.009337058290839195, + "step": 1548 + }, + { + "ce_ib": 7.461643218994141, + "ce_orig": 1.2999147176742554, + "epoch": 0.44546696383636497, + "kl_loss": 0.18034838140010834, + "loss_ib": 0.009265126660466194, + "step": 1549 + }, + { + "ce_ib": 9.189070701599121, + "ce_orig": 1.6017738580703735, + "epoch": 0.44546696383636497, + "kl_loss": 0.2147252857685089, + "loss_ib": 0.01133632380515337, + "step": 1549 + }, + { + "ce_ib": 5.132811069488525, + "ce_orig": 0.12677517533302307, + "epoch": 0.44546696383636497, + "kl_loss": 0.632737398147583, + "loss_ib": 0.011460185050964355, + "step": 1549 + }, + { + "ce_ib": 8.620797157287598, + "ce_orig": 1.5666041374206543, + "epoch": 0.44546696383636497, + "kl_loss": 0.21782813966274261, + "loss_ib": 0.010799078270792961, + "step": 1549 + }, + { + "epoch": 0.4457545474153426, + "grad_norm": 0.13772690296173096, + "learning_rate": 9.639914235521906e-06, + "loss": 0.863, + "step": 1550 + }, + { + "ce_ib": 6.6242451667785645, + "ce_orig": 0.9287428259849548, + "epoch": 0.4457545474153426, + "kl_loss": 0.3059898614883423, + "loss_ib": 0.009684143587946892, + "step": 1550 + }, + { + "ce_ib": 4.536319732666016, + "ce_orig": 0.94605952501297, + "epoch": 0.4457545474153426, + "kl_loss": 0.16641706228256226, + "loss_ib": 0.006200490053743124, + "step": 1550 + }, + { + "ce_ib": 7.407623291015625, + "ce_orig": 1.001709222793579, + "epoch": 0.4457545474153426, + "kl_loss": 0.26862847805023193, + "loss_ib": 0.010093907825648785, + "step": 1550 + }, + { + "ce_ib": 5.6089396476745605, + "ce_orig": 0.9210013747215271, + "epoch": 0.4457545474153426, + "kl_loss": 0.294098824262619, + "loss_ib": 0.008549927733838558, + "step": 1550 + }, + { + "ce_ib": 8.03560733795166, + "ce_orig": 0.9753695130348206, + "epoch": 0.4460421309943202, + "kl_loss": 0.23451608419418335, + "loss_ib": 0.010380768217146397, + "step": 1551 + }, + { + "ce_ib": 5.776275157928467, + "ce_orig": 0.8842979073524475, + "epoch": 0.4460421309943202, + "kl_loss": 0.2639259099960327, + "loss_ib": 0.008415534161031246, + "step": 1551 + }, + { + "ce_ib": 3.086613655090332, + "ce_orig": 0.5480087995529175, + "epoch": 0.4460421309943202, + "kl_loss": 0.23094633221626282, + "loss_ib": 0.0053960769437253475, + "step": 1551 + }, + { + "ce_ib": 3.417895793914795, + "ce_orig": 0.6882546544075012, + "epoch": 0.4460421309943202, + "kl_loss": 0.2075841724872589, + "loss_ib": 0.005493737291544676, + "step": 1551 + }, + { + "ce_ib": 5.6718668937683105, + "ce_orig": 0.9152762293815613, + "epoch": 0.44632971457329784, + "kl_loss": 0.31601691246032715, + "loss_ib": 0.008832036517560482, + "step": 1552 + }, + { + "ce_ib": 6.148171424865723, + "ce_orig": 0.9496646523475647, + "epoch": 0.44632971457329784, + "kl_loss": 0.17234985530376434, + "loss_ib": 0.007871669717133045, + "step": 1552 + }, + { + "ce_ib": 3.8803837299346924, + "ce_orig": 0.5967508554458618, + "epoch": 0.44632971457329784, + "kl_loss": 0.2288057506084442, + "loss_ib": 0.006168440915644169, + "step": 1552 + }, + { + "ce_ib": 2.663280963897705, + "ce_orig": 0.2842079997062683, + "epoch": 0.44632971457329784, + "kl_loss": 0.4749584197998047, + "loss_ib": 0.007412864826619625, + "step": 1552 + }, + { + "ce_ib": 6.659852504730225, + "ce_orig": 1.2880715131759644, + "epoch": 0.4466172981522755, + "kl_loss": 0.2627614736557007, + "loss_ib": 0.00928746722638607, + "step": 1553 + }, + { + "ce_ib": 5.383847236633301, + "ce_orig": 0.7564171552658081, + "epoch": 0.4466172981522755, + "kl_loss": 0.2418477088212967, + "loss_ib": 0.007802323903888464, + "step": 1553 + }, + { + "ce_ib": 8.930891990661621, + "ce_orig": 1.424485206604004, + "epoch": 0.4466172981522755, + "kl_loss": 0.23407617211341858, + "loss_ib": 0.011271653696894646, + "step": 1553 + }, + { + "ce_ib": 6.906391143798828, + "ce_orig": 0.9523347020149231, + "epoch": 0.4466172981522755, + "kl_loss": 0.27365630865097046, + "loss_ib": 0.009642953984439373, + "step": 1553 + }, + { + "ce_ib": 10.713818550109863, + "ce_orig": 1.5967822074890137, + "epoch": 0.44690488173125315, + "kl_loss": 0.19433662295341492, + "loss_ib": 0.012657185085117817, + "step": 1554 + }, + { + "ce_ib": 3.7548916339874268, + "ce_orig": 0.8045800924301147, + "epoch": 0.44690488173125315, + "kl_loss": 0.13913817703723907, + "loss_ib": 0.005146273411810398, + "step": 1554 + }, + { + "ce_ib": 5.803292751312256, + "ce_orig": 0.6776520013809204, + "epoch": 0.44690488173125315, + "kl_loss": 0.26821577548980713, + "loss_ib": 0.00848545040935278, + "step": 1554 + }, + { + "ce_ib": 3.515928268432617, + "ce_orig": 0.46322593092918396, + "epoch": 0.44690488173125315, + "kl_loss": 0.20348799228668213, + "loss_ib": 0.005550808273255825, + "step": 1554 + }, + { + "epoch": 0.44719246531023077, + "grad_norm": 0.13196228444576263, + "learning_rate": 9.637016776303631e-06, + "loss": 0.9205, + "step": 1555 + }, + { + "ce_ib": 3.699512481689453, + "ce_orig": 0.6298815608024597, + "epoch": 0.44719246531023077, + "kl_loss": 0.18475520610809326, + "loss_ib": 0.005547064356505871, + "step": 1555 + }, + { + "ce_ib": 3.959015369415283, + "ce_orig": 0.7226744294166565, + "epoch": 0.44719246531023077, + "kl_loss": 0.2643928825855255, + "loss_ib": 0.006602943874895573, + "step": 1555 + }, + { + "ce_ib": 7.5745110511779785, + "ce_orig": 0.3859376311302185, + "epoch": 0.44719246531023077, + "kl_loss": 0.7596704959869385, + "loss_ib": 0.015171214938163757, + "step": 1555 + }, + { + "ce_ib": 6.784470558166504, + "ce_orig": 0.9845409989356995, + "epoch": 0.44719246531023077, + "kl_loss": 0.14599010348320007, + "loss_ib": 0.00824437104165554, + "step": 1555 + }, + { + "ce_ib": 5.580190658569336, + "ce_orig": 0.6907637119293213, + "epoch": 0.44748004888920845, + "kl_loss": 0.3499740958213806, + "loss_ib": 0.009079932235181332, + "step": 1556 + }, + { + "ce_ib": 8.40283203125, + "ce_orig": 1.2569537162780762, + "epoch": 0.44748004888920845, + "kl_loss": 0.19355545938014984, + "loss_ib": 0.01033838652074337, + "step": 1556 + }, + { + "ce_ib": 5.298177242279053, + "ce_orig": 0.5841009616851807, + "epoch": 0.44748004888920845, + "kl_loss": 0.31074339151382446, + "loss_ib": 0.008405610918998718, + "step": 1556 + }, + { + "ce_ib": 4.919023036956787, + "ce_orig": 0.9556659460067749, + "epoch": 0.44748004888920845, + "kl_loss": 0.23168615996837616, + "loss_ib": 0.007235884666442871, + "step": 1556 + }, + { + "ce_ib": 7.061880111694336, + "ce_orig": 0.8365444540977478, + "epoch": 0.44776763246818607, + "kl_loss": 0.38870474696159363, + "loss_ib": 0.010948927141726017, + "step": 1557 + }, + { + "ce_ib": 5.572457790374756, + "ce_orig": 0.6920515298843384, + "epoch": 0.44776763246818607, + "kl_loss": 0.22817905247211456, + "loss_ib": 0.007854248397052288, + "step": 1557 + }, + { + "ce_ib": 3.7289929389953613, + "ce_orig": 0.545609176158905, + "epoch": 0.44776763246818607, + "kl_loss": 0.2949768900871277, + "loss_ib": 0.006678761914372444, + "step": 1557 + }, + { + "ce_ib": 3.3400137424468994, + "ce_orig": 0.5780962109565735, + "epoch": 0.44776763246818607, + "kl_loss": 0.14108021557331085, + "loss_ib": 0.004750816151499748, + "step": 1557 + }, + { + "ce_ib": 6.065938472747803, + "ce_orig": 1.059144139289856, + "epoch": 0.4480552160471637, + "kl_loss": 0.26288458704948425, + "loss_ib": 0.008694784715771675, + "step": 1558 + }, + { + "ce_ib": 8.30125617980957, + "ce_orig": 1.3371621370315552, + "epoch": 0.4480552160471637, + "kl_loss": 0.36227551102638245, + "loss_ib": 0.011924011632800102, + "step": 1558 + }, + { + "ce_ib": 4.027812957763672, + "ce_orig": 0.3836546838283539, + "epoch": 0.4480552160471637, + "kl_loss": 0.5240738987922668, + "loss_ib": 0.00926855206489563, + "step": 1558 + }, + { + "ce_ib": 8.322689056396484, + "ce_orig": 1.4534698724746704, + "epoch": 0.4480552160471637, + "kl_loss": 0.21685175597667694, + "loss_ib": 0.01049120631068945, + "step": 1558 + }, + { + "ce_ib": 4.906183242797852, + "ce_orig": 0.7980424761772156, + "epoch": 0.4483427996261414, + "kl_loss": 0.1907358318567276, + "loss_ib": 0.006813541520386934, + "step": 1559 + }, + { + "ce_ib": 6.262854099273682, + "ce_orig": 0.8929293155670166, + "epoch": 0.4483427996261414, + "kl_loss": 0.21179665625095367, + "loss_ib": 0.008380820974707603, + "step": 1559 + }, + { + "ce_ib": 6.7866621017456055, + "ce_orig": 0.6150768995285034, + "epoch": 0.4483427996261414, + "kl_loss": 0.31106656789779663, + "loss_ib": 0.009897327981889248, + "step": 1559 + }, + { + "ce_ib": 4.801574230194092, + "ce_orig": 0.6632340550422668, + "epoch": 0.4483427996261414, + "kl_loss": 0.4509941637516022, + "loss_ib": 0.009311515837907791, + "step": 1559 + }, + { + "epoch": 0.448630383205119, + "grad_norm": 0.12390803545713425, + "learning_rate": 9.634108145435665e-06, + "loss": 0.8429, + "step": 1560 + }, + { + "ce_ib": 7.607968807220459, + "ce_orig": 0.994629442691803, + "epoch": 0.448630383205119, + "kl_loss": 0.21743838489055634, + "loss_ib": 0.009782352484762669, + "step": 1560 + }, + { + "ce_ib": 8.091766357421875, + "ce_orig": 1.0472888946533203, + "epoch": 0.448630383205119, + "kl_loss": 0.18147239089012146, + "loss_ib": 0.00990648940205574, + "step": 1560 + }, + { + "ce_ib": 6.3520827293396, + "ce_orig": 1.1672568321228027, + "epoch": 0.448630383205119, + "kl_loss": 0.341322124004364, + "loss_ib": 0.009765303693711758, + "step": 1560 + }, + { + "ce_ib": 6.405336380004883, + "ce_orig": 1.0733023881912231, + "epoch": 0.448630383205119, + "kl_loss": 0.21727420389652252, + "loss_ib": 0.008578077889978886, + "step": 1560 + }, + { + "ce_ib": 5.873064994812012, + "ce_orig": 0.9369548559188843, + "epoch": 0.4489179667840966, + "kl_loss": 0.2877747416496277, + "loss_ib": 0.008750812150537968, + "step": 1561 + }, + { + "ce_ib": 7.807909965515137, + "ce_orig": 1.4815807342529297, + "epoch": 0.4489179667840966, + "kl_loss": 0.18937669694423676, + "loss_ib": 0.009701676666736603, + "step": 1561 + }, + { + "ce_ib": 4.973557472229004, + "ce_orig": 0.647799015045166, + "epoch": 0.4489179667840966, + "kl_loss": 0.2897361218929291, + "loss_ib": 0.00787091813981533, + "step": 1561 + }, + { + "ce_ib": 3.7155158519744873, + "ce_orig": 0.5365419387817383, + "epoch": 0.4489179667840966, + "kl_loss": 0.235978901386261, + "loss_ib": 0.006075304467231035, + "step": 1561 + }, + { + "ce_ib": 4.31659460067749, + "ce_orig": 0.5963836312294006, + "epoch": 0.44920555036307425, + "kl_loss": 0.21188724040985107, + "loss_ib": 0.006435466930270195, + "step": 1562 + }, + { + "ce_ib": 7.9532790184021, + "ce_orig": 0.521329939365387, + "epoch": 0.44920555036307425, + "kl_loss": 0.22407691180706024, + "loss_ib": 0.01019404735416174, + "step": 1562 + }, + { + "ce_ib": 7.27313232421875, + "ce_orig": 0.788743793964386, + "epoch": 0.44920555036307425, + "kl_loss": 0.23372234404087067, + "loss_ib": 0.009610354900360107, + "step": 1562 + }, + { + "ce_ib": 6.345553398132324, + "ce_orig": 1.1143635511398315, + "epoch": 0.44920555036307425, + "kl_loss": 0.24764125049114227, + "loss_ib": 0.008821966126561165, + "step": 1562 + }, + { + "ce_ib": 7.859414577484131, + "ce_orig": 1.0159443616867065, + "epoch": 0.4494931339420519, + "kl_loss": 0.23846226930618286, + "loss_ib": 0.010244037955999374, + "step": 1563 + }, + { + "ce_ib": 7.57835054397583, + "ce_orig": 1.1461509466171265, + "epoch": 0.4494931339420519, + "kl_loss": 0.22977961599826813, + "loss_ib": 0.009876146912574768, + "step": 1563 + }, + { + "ce_ib": 7.633553504943848, + "ce_orig": 1.1035288572311401, + "epoch": 0.4494931339420519, + "kl_loss": 0.22606161236763, + "loss_ib": 0.009894168935716152, + "step": 1563 + }, + { + "ce_ib": 8.209954261779785, + "ce_orig": 1.4839401245117188, + "epoch": 0.4494931339420519, + "kl_loss": 0.2907108664512634, + "loss_ib": 0.011117062531411648, + "step": 1563 + }, + { + "ce_ib": 4.617456912994385, + "ce_orig": 0.6988782286643982, + "epoch": 0.44978071752102955, + "kl_loss": 0.1478702425956726, + "loss_ib": 0.006096159107983112, + "step": 1564 + }, + { + "ce_ib": 7.396731376647949, + "ce_orig": 1.0505073070526123, + "epoch": 0.44978071752102955, + "kl_loss": 0.26244524121284485, + "loss_ib": 0.010021183639764786, + "step": 1564 + }, + { + "ce_ib": 7.378440856933594, + "ce_orig": 0.49798375368118286, + "epoch": 0.44978071752102955, + "kl_loss": 0.23360861837863922, + "loss_ib": 0.009714527055621147, + "step": 1564 + }, + { + "ce_ib": 4.991917610168457, + "ce_orig": 0.3777581453323364, + "epoch": 0.44978071752102955, + "kl_loss": 0.28184863924980164, + "loss_ib": 0.007810404058545828, + "step": 1564 + }, + { + "epoch": 0.4500683011000072, + "grad_norm": 0.10581698268651962, + "learning_rate": 9.63118834992558e-06, + "loss": 0.889, + "step": 1565 + }, + { + "ce_ib": 8.230337142944336, + "ce_orig": 1.355285882949829, + "epoch": 0.4500683011000072, + "kl_loss": 0.2831115126609802, + "loss_ib": 0.011061452329158783, + "step": 1565 + }, + { + "ce_ib": 6.374688148498535, + "ce_orig": 0.9068766236305237, + "epoch": 0.4500683011000072, + "kl_loss": 0.1812073290348053, + "loss_ib": 0.008186761289834976, + "step": 1565 + }, + { + "ce_ib": 7.544388771057129, + "ce_orig": 0.9622206091880798, + "epoch": 0.4500683011000072, + "kl_loss": 0.2278764247894287, + "loss_ib": 0.009823152795433998, + "step": 1565 + }, + { + "ce_ib": 4.876921653747559, + "ce_orig": 0.6613551378250122, + "epoch": 0.4500683011000072, + "kl_loss": 0.2815976142883301, + "loss_ib": 0.007692897692322731, + "step": 1565 + }, + { + "ce_ib": 4.218951225280762, + "ce_orig": 0.6239345669746399, + "epoch": 0.45035588467898485, + "kl_loss": 0.17060774564743042, + "loss_ib": 0.005925028119236231, + "step": 1566 + }, + { + "ce_ib": 3.829336643218994, + "ce_orig": 0.6506801843643188, + "epoch": 0.45035588467898485, + "kl_loss": 0.1707981675863266, + "loss_ib": 0.005537318531423807, + "step": 1566 + }, + { + "ce_ib": 7.071207046508789, + "ce_orig": 1.1898292303085327, + "epoch": 0.45035588467898485, + "kl_loss": 0.27813827991485596, + "loss_ib": 0.009852590039372444, + "step": 1566 + }, + { + "ce_ib": 5.183157920837402, + "ce_orig": 0.547346293926239, + "epoch": 0.45035588467898485, + "kl_loss": 0.3790608048439026, + "loss_ib": 0.00897376611828804, + "step": 1566 + }, + { + "ce_ib": 6.814054489135742, + "ce_orig": 1.0130473375320435, + "epoch": 0.4506434682579625, + "kl_loss": 0.25653523206710815, + "loss_ib": 0.00937940739095211, + "step": 1567 + }, + { + "ce_ib": 5.125803470611572, + "ce_orig": 0.594994306564331, + "epoch": 0.4506434682579625, + "kl_loss": 0.159761443734169, + "loss_ib": 0.006723417434841394, + "step": 1567 + }, + { + "ce_ib": 10.355138778686523, + "ce_orig": 1.708457589149475, + "epoch": 0.4506434682579625, + "kl_loss": 0.22042052447795868, + "loss_ib": 0.012559343129396439, + "step": 1567 + }, + { + "ce_ib": 5.936809539794922, + "ce_orig": 0.6713258028030396, + "epoch": 0.4506434682579625, + "kl_loss": 0.21336688101291656, + "loss_ib": 0.008070478215813637, + "step": 1567 + }, + { + "ce_ib": 4.86829137802124, + "ce_orig": 0.9267745018005371, + "epoch": 0.4509310518369401, + "kl_loss": 0.3055550456047058, + "loss_ib": 0.00792384147644043, + "step": 1568 + }, + { + "ce_ib": 7.262491703033447, + "ce_orig": 0.9000452160835266, + "epoch": 0.4509310518369401, + "kl_loss": 0.21604710817337036, + "loss_ib": 0.009422962553799152, + "step": 1568 + }, + { + "ce_ib": 6.170190334320068, + "ce_orig": 0.7533259391784668, + "epoch": 0.4509310518369401, + "kl_loss": 0.43113571405410767, + "loss_ib": 0.010481548495590687, + "step": 1568 + }, + { + "ce_ib": 9.377881050109863, + "ce_orig": 0.8980697393417358, + "epoch": 0.4509310518369401, + "kl_loss": 0.21846041083335876, + "loss_ib": 0.011562485247850418, + "step": 1568 + }, + { + "ce_ib": 9.08447551727295, + "ce_orig": 1.786766529083252, + "epoch": 0.4512186354159177, + "kl_loss": 0.26060664653778076, + "loss_ib": 0.011690542101860046, + "step": 1569 + }, + { + "ce_ib": 5.312047481536865, + "ce_orig": 0.864721417427063, + "epoch": 0.4512186354159177, + "kl_loss": 0.23503050208091736, + "loss_ib": 0.0076623521745204926, + "step": 1569 + }, + { + "ce_ib": 4.463824272155762, + "ce_orig": 0.4612298011779785, + "epoch": 0.4512186354159177, + "kl_loss": 0.374639093875885, + "loss_ib": 0.008210215717554092, + "step": 1569 + }, + { + "ce_ib": 5.714632034301758, + "ce_orig": 0.946444034576416, + "epoch": 0.4512186354159177, + "kl_loss": 0.2866571545600891, + "loss_ib": 0.008581203408539295, + "step": 1569 + }, + { + "epoch": 0.4515062189948954, + "grad_norm": 0.11883459240198135, + "learning_rate": 9.628257396807837e-06, + "loss": 0.9178, + "step": 1570 + }, + { + "ce_ib": 6.601624965667725, + "ce_orig": 1.0058202743530273, + "epoch": 0.4515062189948954, + "kl_loss": 0.3906886577606201, + "loss_ib": 0.01050851121544838, + "step": 1570 + }, + { + "ce_ib": 5.223203182220459, + "ce_orig": 0.8138896822929382, + "epoch": 0.4515062189948954, + "kl_loss": 0.3764778971672058, + "loss_ib": 0.00898798182606697, + "step": 1570 + }, + { + "ce_ib": 6.802353382110596, + "ce_orig": 0.9874704480171204, + "epoch": 0.4515062189948954, + "kl_loss": 0.1776190996170044, + "loss_ib": 0.008578544482588768, + "step": 1570 + }, + { + "ce_ib": 7.111766338348389, + "ce_orig": 1.030307650566101, + "epoch": 0.4515062189948954, + "kl_loss": 0.22811469435691833, + "loss_ib": 0.009392913430929184, + "step": 1570 + }, + { + "ce_ib": 5.195444583892822, + "ce_orig": 0.5673562288284302, + "epoch": 0.451793802573873, + "kl_loss": 0.3687005043029785, + "loss_ib": 0.008882449939846992, + "step": 1571 + }, + { + "ce_ib": 7.545969009399414, + "ce_orig": 0.8662387728691101, + "epoch": 0.451793802573873, + "kl_loss": 0.21740826964378357, + "loss_ib": 0.009720050729811192, + "step": 1571 + }, + { + "ce_ib": 5.633011341094971, + "ce_orig": 0.9099085927009583, + "epoch": 0.451793802573873, + "kl_loss": 0.17175164818763733, + "loss_ib": 0.0073505281470716, + "step": 1571 + }, + { + "ce_ib": 4.424170970916748, + "ce_orig": 0.40657246112823486, + "epoch": 0.451793802573873, + "kl_loss": 0.3058871030807495, + "loss_ib": 0.007483042310923338, + "step": 1571 + }, + { + "ce_ib": 4.206382751464844, + "ce_orig": 0.3294968605041504, + "epoch": 0.45208138615285065, + "kl_loss": 0.1678428202867508, + "loss_ib": 0.005884811282157898, + "step": 1572 + }, + { + "ce_ib": 6.239657878875732, + "ce_orig": 0.9700036644935608, + "epoch": 0.45208138615285065, + "kl_loss": 0.269490122795105, + "loss_ib": 0.008934559300541878, + "step": 1572 + }, + { + "ce_ib": 4.88264274597168, + "ce_orig": 0.7675372958183289, + "epoch": 0.45208138615285065, + "kl_loss": 0.14468610286712646, + "loss_ib": 0.006329504307359457, + "step": 1572 + }, + { + "ce_ib": 6.80122184753418, + "ce_orig": 1.0441501140594482, + "epoch": 0.45208138615285065, + "kl_loss": 0.1825808882713318, + "loss_ib": 0.008627030998468399, + "step": 1572 + }, + { + "ce_ib": 3.62424373626709, + "ce_orig": 0.5604528784751892, + "epoch": 0.45236896973182833, + "kl_loss": 0.2947345972061157, + "loss_ib": 0.006571589503437281, + "step": 1573 + }, + { + "ce_ib": 7.601794719696045, + "ce_orig": 1.3156349658966064, + "epoch": 0.45236896973182833, + "kl_loss": 0.29263508319854736, + "loss_ib": 0.010528144426643848, + "step": 1573 + }, + { + "ce_ib": 9.446778297424316, + "ce_orig": 1.2505724430084229, + "epoch": 0.45236896973182833, + "kl_loss": 0.7459277510643005, + "loss_ib": 0.01690605655312538, + "step": 1573 + }, + { + "ce_ib": 5.411386966705322, + "ce_orig": 0.655278742313385, + "epoch": 0.45236896973182833, + "kl_loss": 0.311574250459671, + "loss_ib": 0.008527129888534546, + "step": 1573 + }, + { + "ce_ib": 4.58731746673584, + "ce_orig": 0.5685978531837463, + "epoch": 0.45265655331080595, + "kl_loss": 0.4015287756919861, + "loss_ib": 0.008602604269981384, + "step": 1574 + }, + { + "ce_ib": 4.962559223175049, + "ce_orig": 0.7091254591941833, + "epoch": 0.45265655331080595, + "kl_loss": 0.2449083924293518, + "loss_ib": 0.00741164293140173, + "step": 1574 + }, + { + "ce_ib": 7.383504390716553, + "ce_orig": 0.6441636681556702, + "epoch": 0.45265655331080595, + "kl_loss": 0.3001161813735962, + "loss_ib": 0.010384666733443737, + "step": 1574 + }, + { + "ce_ib": 6.909374713897705, + "ce_orig": 1.234971523284912, + "epoch": 0.45265655331080595, + "kl_loss": 0.3168690502643585, + "loss_ib": 0.010078065097332, + "step": 1574 + }, + { + "epoch": 0.4529441368897836, + "grad_norm": 0.10603757202625275, + "learning_rate": 9.625315293143782e-06, + "loss": 0.9032, + "step": 1575 + }, + { + "ce_ib": 4.061898708343506, + "ce_orig": 0.45858651399612427, + "epoch": 0.4529441368897836, + "kl_loss": 0.32226306200027466, + "loss_ib": 0.0072845290414988995, + "step": 1575 + }, + { + "ce_ib": 3.875119209289551, + "ce_orig": 0.8682102560997009, + "epoch": 0.4529441368897836, + "kl_loss": 0.19093768298625946, + "loss_ib": 0.005784495733678341, + "step": 1575 + }, + { + "ce_ib": 8.514786720275879, + "ce_orig": 1.2168067693710327, + "epoch": 0.4529441368897836, + "kl_loss": 0.1767929196357727, + "loss_ib": 0.01028271671384573, + "step": 1575 + }, + { + "ce_ib": 6.604011058807373, + "ce_orig": 0.7284548282623291, + "epoch": 0.4529441368897836, + "kl_loss": 0.260367751121521, + "loss_ib": 0.009207688271999359, + "step": 1575 + }, + { + "ce_ib": 5.733308792114258, + "ce_orig": 0.9140332341194153, + "epoch": 0.45323172046876126, + "kl_loss": 0.25994765758514404, + "loss_ib": 0.008332785218954086, + "step": 1576 + }, + { + "ce_ib": 4.547566890716553, + "ce_orig": 0.5950409770011902, + "epoch": 0.45323172046876126, + "kl_loss": 0.3094450831413269, + "loss_ib": 0.007642017211765051, + "step": 1576 + }, + { + "ce_ib": 2.6981751918792725, + "ce_orig": 0.3503713011741638, + "epoch": 0.45323172046876126, + "kl_loss": 0.4907337427139282, + "loss_ib": 0.007605512626469135, + "step": 1576 + }, + { + "ce_ib": 6.769986629486084, + "ce_orig": 0.7730137705802917, + "epoch": 0.45323172046876126, + "kl_loss": 0.20853619277477264, + "loss_ib": 0.008855348452925682, + "step": 1576 + }, + { + "ce_ib": 4.362432956695557, + "ce_orig": 0.5568724870681763, + "epoch": 0.4535193040477389, + "kl_loss": 0.28733065724372864, + "loss_ib": 0.007235738914459944, + "step": 1577 + }, + { + "ce_ib": 4.170414924621582, + "ce_orig": 0.7116380333900452, + "epoch": 0.4535193040477389, + "kl_loss": 0.17540094256401062, + "loss_ib": 0.00592442462220788, + "step": 1577 + }, + { + "ce_ib": 3.8414411544799805, + "ce_orig": 0.5860041975975037, + "epoch": 0.4535193040477389, + "kl_loss": 0.5004762411117554, + "loss_ib": 0.008846203796565533, + "step": 1577 + }, + { + "ce_ib": 4.819023609161377, + "ce_orig": 0.4309028089046478, + "epoch": 0.4535193040477389, + "kl_loss": 0.3439497947692871, + "loss_ib": 0.008258521556854248, + "step": 1577 + }, + { + "ce_ib": 4.089813232421875, + "ce_orig": 0.6745030283927917, + "epoch": 0.4538068876267165, + "kl_loss": 0.18524643778800964, + "loss_ib": 0.0059422776103019714, + "step": 1578 + }, + { + "ce_ib": 5.1730451583862305, + "ce_orig": 0.779570996761322, + "epoch": 0.4538068876267165, + "kl_loss": 0.14864078164100647, + "loss_ib": 0.0066594528034329414, + "step": 1578 + }, + { + "ce_ib": 6.661314964294434, + "ce_orig": 1.013376235961914, + "epoch": 0.4538068876267165, + "kl_loss": 0.20308473706245422, + "loss_ib": 0.008692162111401558, + "step": 1578 + }, + { + "ce_ib": 7.177057266235352, + "ce_orig": 0.7783916592597961, + "epoch": 0.4538068876267165, + "kl_loss": 0.2335265874862671, + "loss_ib": 0.009512322954833508, + "step": 1578 + }, + { + "ce_ib": 3.965541362762451, + "ce_orig": 0.7133391499519348, + "epoch": 0.45409447120569413, + "kl_loss": 0.23369936645030975, + "loss_ib": 0.006302534602582455, + "step": 1579 + }, + { + "ce_ib": 4.178727626800537, + "ce_orig": 0.6237984895706177, + "epoch": 0.45409447120569413, + "kl_loss": 0.20310792326927185, + "loss_ib": 0.00620980653911829, + "step": 1579 + }, + { + "ce_ib": 5.857765197753906, + "ce_orig": 0.7854532599449158, + "epoch": 0.45409447120569413, + "kl_loss": 0.21422770619392395, + "loss_ib": 0.008000042289495468, + "step": 1579 + }, + { + "ce_ib": 7.018479347229004, + "ce_orig": 0.9364678263664246, + "epoch": 0.45409447120569413, + "kl_loss": 0.2819264531135559, + "loss_ib": 0.009837743826210499, + "step": 1579 + }, + { + "epoch": 0.4543820547846718, + "grad_norm": 0.12139754742383957, + "learning_rate": 9.62236204602163e-06, + "loss": 0.8779, + "step": 1580 + }, + { + "ce_ib": 7.709521293640137, + "ce_orig": 0.9983739256858826, + "epoch": 0.4543820547846718, + "kl_loss": 0.2801150381565094, + "loss_ib": 0.010510671883821487, + "step": 1580 + }, + { + "ce_ib": 8.958810806274414, + "ce_orig": 1.4371901750564575, + "epoch": 0.4543820547846718, + "kl_loss": 0.3038935661315918, + "loss_ib": 0.011997747235000134, + "step": 1580 + }, + { + "ce_ib": 5.254581451416016, + "ce_orig": 0.8143784403800964, + "epoch": 0.4543820547846718, + "kl_loss": 0.28170040249824524, + "loss_ib": 0.008071585558354855, + "step": 1580 + }, + { + "ce_ib": 4.93033504486084, + "ce_orig": 0.5638120770454407, + "epoch": 0.4543820547846718, + "kl_loss": 0.28069180250167847, + "loss_ib": 0.0077372523956000805, + "step": 1580 + }, + { + "ce_ib": 6.513678550720215, + "ce_orig": 0.8502893447875977, + "epoch": 0.45466963836364943, + "kl_loss": 0.23893187940120697, + "loss_ib": 0.00890299677848816, + "step": 1581 + }, + { + "ce_ib": 4.787106037139893, + "ce_orig": 0.8497691750526428, + "epoch": 0.45466963836364943, + "kl_loss": 0.20702172815799713, + "loss_ib": 0.006857323460280895, + "step": 1581 + }, + { + "ce_ib": 7.873937606811523, + "ce_orig": 1.1831961870193481, + "epoch": 0.45466963836364943, + "kl_loss": 0.28345048427581787, + "loss_ib": 0.010708441957831383, + "step": 1581 + }, + { + "ce_ib": 4.772539138793945, + "ce_orig": 0.680529773235321, + "epoch": 0.45466963836364943, + "kl_loss": 0.20636993646621704, + "loss_ib": 0.006836238317191601, + "step": 1581 + }, + { + "ce_ib": 4.1148223876953125, + "ce_orig": 0.641548752784729, + "epoch": 0.45495722194262705, + "kl_loss": 0.19606587290763855, + "loss_ib": 0.006075480952858925, + "step": 1582 + }, + { + "ce_ib": 6.089351654052734, + "ce_orig": 0.7535414695739746, + "epoch": 0.45495722194262705, + "kl_loss": 0.1836378574371338, + "loss_ib": 0.00792573019862175, + "step": 1582 + }, + { + "ce_ib": 6.302079200744629, + "ce_orig": 1.0797772407531738, + "epoch": 0.45495722194262705, + "kl_loss": 0.7078266143798828, + "loss_ib": 0.01338034588843584, + "step": 1582 + }, + { + "ce_ib": 4.168867588043213, + "ce_orig": 0.7152249217033386, + "epoch": 0.45495722194262705, + "kl_loss": 0.1736205816268921, + "loss_ib": 0.00590507360175252, + "step": 1582 + }, + { + "ce_ib": 3.9457499980926514, + "ce_orig": 0.6125537753105164, + "epoch": 0.45524480552160473, + "kl_loss": 0.1671469509601593, + "loss_ib": 0.005617219023406506, + "step": 1583 + }, + { + "ce_ib": 3.6192708015441895, + "ce_orig": 0.608466386795044, + "epoch": 0.45524480552160473, + "kl_loss": 0.2092287540435791, + "loss_ib": 0.005711558274924755, + "step": 1583 + }, + { + "ce_ib": 6.533486843109131, + "ce_orig": 0.9287842512130737, + "epoch": 0.45524480552160473, + "kl_loss": 0.21517397463321686, + "loss_ib": 0.008685226552188396, + "step": 1583 + }, + { + "ce_ib": 6.803183078765869, + "ce_orig": 0.9185959696769714, + "epoch": 0.45524480552160473, + "kl_loss": 0.27622342109680176, + "loss_ib": 0.009565416723489761, + "step": 1583 + }, + { + "ce_ib": 4.511726379394531, + "ce_orig": 0.7662067413330078, + "epoch": 0.45553238910058236, + "kl_loss": 0.17484962940216064, + "loss_ib": 0.006260222755372524, + "step": 1584 + }, + { + "ce_ib": 7.601507663726807, + "ce_orig": 0.9514760971069336, + "epoch": 0.45553238910058236, + "kl_loss": 0.25499165058135986, + "loss_ib": 0.010151424445211887, + "step": 1584 + }, + { + "ce_ib": 3.7975528240203857, + "ce_orig": 0.6705338358879089, + "epoch": 0.45553238910058236, + "kl_loss": 0.24700944125652313, + "loss_ib": 0.0062676467932760715, + "step": 1584 + }, + { + "ce_ib": 4.939420223236084, + "ce_orig": 0.8580985069274902, + "epoch": 0.45553238910058236, + "kl_loss": 0.40102115273475647, + "loss_ib": 0.00894963089376688, + "step": 1584 + }, + { + "epoch": 0.45581997267956, + "grad_norm": 0.12361612915992737, + "learning_rate": 9.619397662556434e-06, + "loss": 0.8915, + "step": 1585 + }, + { + "ce_ib": 4.5925374031066895, + "ce_orig": 0.6823598742485046, + "epoch": 0.45581997267956, + "kl_loss": 0.24436944723129272, + "loss_ib": 0.007036231458187103, + "step": 1585 + }, + { + "ce_ib": 6.4787092208862305, + "ce_orig": 1.0821856260299683, + "epoch": 0.45581997267956, + "kl_loss": 0.22065825760364532, + "loss_ib": 0.00868529174476862, + "step": 1585 + }, + { + "ce_ib": 5.234336853027344, + "ce_orig": 0.5106269717216492, + "epoch": 0.45581997267956, + "kl_loss": 0.4164009094238281, + "loss_ib": 0.009398345835506916, + "step": 1585 + }, + { + "ce_ib": 3.9497246742248535, + "ce_orig": 0.601236879825592, + "epoch": 0.45581997267956, + "kl_loss": 0.2019076943397522, + "loss_ib": 0.00596880167722702, + "step": 1585 + }, + { + "ce_ib": 5.286367893218994, + "ce_orig": 0.8565965294837952, + "epoch": 0.45610755625853766, + "kl_loss": 0.2048177868127823, + "loss_ib": 0.007334545720368624, + "step": 1586 + }, + { + "ce_ib": 3.798523187637329, + "ce_orig": 0.5074141025543213, + "epoch": 0.45610755625853766, + "kl_loss": 0.26109427213668823, + "loss_ib": 0.00640946626663208, + "step": 1586 + }, + { + "ce_ib": 6.737989902496338, + "ce_orig": 1.062273383140564, + "epoch": 0.45610755625853766, + "kl_loss": 0.2507803738117218, + "loss_ib": 0.009245793335139751, + "step": 1586 + }, + { + "ce_ib": 7.858916282653809, + "ce_orig": 1.1395679712295532, + "epoch": 0.45610755625853766, + "kl_loss": 0.3817332983016968, + "loss_ib": 0.011676249094307423, + "step": 1586 + }, + { + "ce_ib": 5.658448219299316, + "ce_orig": 0.8056834936141968, + "epoch": 0.4563951398375153, + "kl_loss": 0.2785063683986664, + "loss_ib": 0.008443511091172695, + "step": 1587 + }, + { + "ce_ib": 4.108708381652832, + "ce_orig": 0.7508328557014465, + "epoch": 0.4563951398375153, + "kl_loss": 0.161625474691391, + "loss_ib": 0.005724962800741196, + "step": 1587 + }, + { + "ce_ib": 5.145606994628906, + "ce_orig": 0.9987155199050903, + "epoch": 0.4563951398375153, + "kl_loss": 0.214373379945755, + "loss_ib": 0.007289340253919363, + "step": 1587 + }, + { + "ce_ib": 4.941565990447998, + "ce_orig": 0.7044458389282227, + "epoch": 0.4563951398375153, + "kl_loss": 0.17570409178733826, + "loss_ib": 0.006698607001453638, + "step": 1587 + }, + { + "ce_ib": 3.0381317138671875, + "ce_orig": 0.48189476132392883, + "epoch": 0.4566827234164929, + "kl_loss": 0.18399228155612946, + "loss_ib": 0.004878054838627577, + "step": 1588 + }, + { + "ce_ib": 6.867878437042236, + "ce_orig": 0.8533276319503784, + "epoch": 0.4566827234164929, + "kl_loss": 0.23878858983516693, + "loss_ib": 0.009255764074623585, + "step": 1588 + }, + { + "ce_ib": 5.534303665161133, + "ce_orig": 0.5891181826591492, + "epoch": 0.4566827234164929, + "kl_loss": 0.1646348536014557, + "loss_ib": 0.0071806525811553, + "step": 1588 + }, + { + "ce_ib": 4.690039157867432, + "ce_orig": 0.8940519094467163, + "epoch": 0.4566827234164929, + "kl_loss": 0.18715199828147888, + "loss_ib": 0.006561559159308672, + "step": 1588 + }, + { + "ce_ib": 4.361746788024902, + "ce_orig": 0.4742627441883087, + "epoch": 0.45697030699547053, + "kl_loss": 0.3901596665382385, + "loss_ib": 0.008263343013823032, + "step": 1589 + }, + { + "ce_ib": 10.482245445251465, + "ce_orig": 1.0567958354949951, + "epoch": 0.45697030699547053, + "kl_loss": 0.1713293194770813, + "loss_ib": 0.01219553966075182, + "step": 1589 + }, + { + "ce_ib": 7.079917907714844, + "ce_orig": 0.8915940523147583, + "epoch": 0.45697030699547053, + "kl_loss": 0.30659422278404236, + "loss_ib": 0.01014585979282856, + "step": 1589 + }, + { + "ce_ib": 3.8864879608154297, + "ce_orig": 0.5591042637825012, + "epoch": 0.45697030699547053, + "kl_loss": 0.20273935794830322, + "loss_ib": 0.0059138815850019455, + "step": 1589 + }, + { + "epoch": 0.4572578905744482, + "grad_norm": 0.1403963714838028, + "learning_rate": 9.616422149890085e-06, + "loss": 0.896, + "step": 1590 + }, + { + "ce_ib": 4.583006381988525, + "ce_orig": 0.5320500731468201, + "epoch": 0.4572578905744482, + "kl_loss": 0.2249002754688263, + "loss_ib": 0.006832009181380272, + "step": 1590 + }, + { + "ce_ib": 9.813572883605957, + "ce_orig": 1.42888605594635, + "epoch": 0.4572578905744482, + "kl_loss": 0.2696181535720825, + "loss_ib": 0.012509752996265888, + "step": 1590 + }, + { + "ce_ib": 7.4518632888793945, + "ce_orig": 0.5761680603027344, + "epoch": 0.4572578905744482, + "kl_loss": 0.39669954776763916, + "loss_ib": 0.011418858543038368, + "step": 1590 + }, + { + "ce_ib": 6.660532474517822, + "ce_orig": 0.7236970067024231, + "epoch": 0.4572578905744482, + "kl_loss": 0.31353431940078735, + "loss_ib": 0.009795875288546085, + "step": 1590 + }, + { + "ce_ib": 11.518242835998535, + "ce_orig": 1.814514398574829, + "epoch": 0.45754547415342584, + "kl_loss": 0.2373267114162445, + "loss_ib": 0.013891510665416718, + "step": 1591 + }, + { + "ce_ib": 4.618646621704102, + "ce_orig": 0.6165957450866699, + "epoch": 0.45754547415342584, + "kl_loss": 0.18919825553894043, + "loss_ib": 0.006510629318654537, + "step": 1591 + }, + { + "ce_ib": 3.9572300910949707, + "ce_orig": 0.5626340508460999, + "epoch": 0.45754547415342584, + "kl_loss": 0.23723739385604858, + "loss_ib": 0.006329604424536228, + "step": 1591 + }, + { + "ce_ib": 5.463366508483887, + "ce_orig": 0.8892853260040283, + "epoch": 0.45754547415342584, + "kl_loss": 0.27879300713539124, + "loss_ib": 0.008251296356320381, + "step": 1591 + }, + { + "ce_ib": 8.190649032592773, + "ce_orig": 1.2614320516586304, + "epoch": 0.45783305773240346, + "kl_loss": 0.31339144706726074, + "loss_ib": 0.011324563063681126, + "step": 1592 + }, + { + "ce_ib": 4.4856367111206055, + "ce_orig": 0.7876867055892944, + "epoch": 0.45783305773240346, + "kl_loss": 0.18498772382736206, + "loss_ib": 0.006335513666272163, + "step": 1592 + }, + { + "ce_ib": 6.534660339355469, + "ce_orig": 0.8544452786445618, + "epoch": 0.45783305773240346, + "kl_loss": 0.16686499118804932, + "loss_ib": 0.008203309960663319, + "step": 1592 + }, + { + "ce_ib": 6.89320182800293, + "ce_orig": 1.0038596391677856, + "epoch": 0.45783305773240346, + "kl_loss": 0.22854721546173096, + "loss_ib": 0.009178673848509789, + "step": 1592 + }, + { + "ce_ib": 5.936371326446533, + "ce_orig": 0.955289363861084, + "epoch": 0.45812064131138114, + "kl_loss": 0.23201020061969757, + "loss_ib": 0.008256473578512669, + "step": 1593 + }, + { + "ce_ib": 6.370386123657227, + "ce_orig": 1.2829995155334473, + "epoch": 0.45812064131138114, + "kl_loss": 0.17797306180000305, + "loss_ib": 0.008150117471814156, + "step": 1593 + }, + { + "ce_ib": 3.247084856033325, + "ce_orig": 0.7356828451156616, + "epoch": 0.45812064131138114, + "kl_loss": 0.23136037588119507, + "loss_ib": 0.005560688674449921, + "step": 1593 + }, + { + "ce_ib": 5.93695068359375, + "ce_orig": 1.0750383138656616, + "epoch": 0.45812064131138114, + "kl_loss": 0.3686366081237793, + "loss_ib": 0.009623317047953606, + "step": 1593 + }, + { + "ce_ib": 4.900889873504639, + "ce_orig": 0.8093308210372925, + "epoch": 0.45840822489035876, + "kl_loss": 0.2369879186153412, + "loss_ib": 0.007270768750458956, + "step": 1594 + }, + { + "ce_ib": 2.814967393875122, + "ce_orig": 0.43275266885757446, + "epoch": 0.45840822489035876, + "kl_loss": 0.22987329959869385, + "loss_ib": 0.005113700404763222, + "step": 1594 + }, + { + "ce_ib": 5.747320175170898, + "ce_orig": 0.8896006345748901, + "epoch": 0.45840822489035876, + "kl_loss": 0.18795983493328094, + "loss_ib": 0.007626918610185385, + "step": 1594 + }, + { + "ce_ib": 7.953315734863281, + "ce_orig": 0.7957958579063416, + "epoch": 0.45840822489035876, + "kl_loss": 0.2804729640483856, + "loss_ib": 0.010758046060800552, + "step": 1594 + }, + { + "epoch": 0.4586958084693364, + "grad_norm": 0.11645019799470901, + "learning_rate": 9.613435515191282e-06, + "loss": 0.8937, + "step": 1595 + }, + { + "ce_ib": 5.9087347984313965, + "ce_orig": 0.8856273293495178, + "epoch": 0.4586958084693364, + "kl_loss": 0.2484017014503479, + "loss_ib": 0.008392751216888428, + "step": 1595 + }, + { + "ce_ib": 6.337006568908691, + "ce_orig": 1.040190577507019, + "epoch": 0.4586958084693364, + "kl_loss": 0.22671160101890564, + "loss_ib": 0.008604122325778008, + "step": 1595 + }, + { + "ce_ib": 3.9932260513305664, + "ce_orig": 0.5096628665924072, + "epoch": 0.4586958084693364, + "kl_loss": 0.36274319887161255, + "loss_ib": 0.007620657328516245, + "step": 1595 + }, + { + "ce_ib": 6.888410568237305, + "ce_orig": 0.9193194508552551, + "epoch": 0.4586958084693364, + "kl_loss": 0.16623620688915253, + "loss_ib": 0.008550772443413734, + "step": 1595 + }, + { + "ce_ib": 3.571662425994873, + "ce_orig": 0.5151391625404358, + "epoch": 0.45898339204831407, + "kl_loss": 0.17320913076400757, + "loss_ib": 0.005303753539919853, + "step": 1596 + }, + { + "ce_ib": 6.104183197021484, + "ce_orig": 1.0585072040557861, + "epoch": 0.45898339204831407, + "kl_loss": 0.2961203455924988, + "loss_ib": 0.009065386839210987, + "step": 1596 + }, + { + "ce_ib": 6.868567943572998, + "ce_orig": 0.9249302744865417, + "epoch": 0.45898339204831407, + "kl_loss": 0.2994818091392517, + "loss_ib": 0.009863385930657387, + "step": 1596 + }, + { + "ce_ib": 2.8859384059906006, + "ce_orig": 0.6401421427726746, + "epoch": 0.45898339204831407, + "kl_loss": 0.20423877239227295, + "loss_ib": 0.0049283262342214584, + "step": 1596 + }, + { + "ce_ib": 7.507976055145264, + "ce_orig": 1.2500346899032593, + "epoch": 0.4592709756272917, + "kl_loss": 0.18150609731674194, + "loss_ib": 0.00932303722947836, + "step": 1597 + }, + { + "ce_ib": 4.735340118408203, + "ce_orig": 0.6459792852401733, + "epoch": 0.4592709756272917, + "kl_loss": 0.16608083248138428, + "loss_ib": 0.006396147888153791, + "step": 1597 + }, + { + "ce_ib": 5.722679138183594, + "ce_orig": 0.8577017188072205, + "epoch": 0.4592709756272917, + "kl_loss": 0.17896723747253418, + "loss_ib": 0.007512351498007774, + "step": 1597 + }, + { + "ce_ib": 5.09295129776001, + "ce_orig": 0.5127891898155212, + "epoch": 0.4592709756272917, + "kl_loss": 0.22883820533752441, + "loss_ib": 0.007381333503872156, + "step": 1597 + }, + { + "ce_ib": 12.0892333984375, + "ce_orig": 1.6606770753860474, + "epoch": 0.4595585592062693, + "kl_loss": 0.17387744784355164, + "loss_ib": 0.013828007504343987, + "step": 1598 + }, + { + "ce_ib": 3.8233511447906494, + "ce_orig": 0.401006281375885, + "epoch": 0.4595585592062693, + "kl_loss": 0.32604125142097473, + "loss_ib": 0.007083763834089041, + "step": 1598 + }, + { + "ce_ib": 7.2053117752075195, + "ce_orig": 1.3019723892211914, + "epoch": 0.4595585592062693, + "kl_loss": 0.2079797089099884, + "loss_ib": 0.009285109117627144, + "step": 1598 + }, + { + "ce_ib": 6.970457553863525, + "ce_orig": 1.142815113067627, + "epoch": 0.4595585592062693, + "kl_loss": 0.2328367829322815, + "loss_ib": 0.009298824705183506, + "step": 1598 + }, + { + "ce_ib": 6.52714204788208, + "ce_orig": 1.1193588972091675, + "epoch": 0.45984614278524694, + "kl_loss": 0.2331889271736145, + "loss_ib": 0.008859030902385712, + "step": 1599 + }, + { + "ce_ib": 5.672296047210693, + "ce_orig": 0.6065714359283447, + "epoch": 0.45984614278524694, + "kl_loss": 0.2097187489271164, + "loss_ib": 0.007769483607262373, + "step": 1599 + }, + { + "ce_ib": 4.53487491607666, + "ce_orig": 0.8241526484489441, + "epoch": 0.45984614278524694, + "kl_loss": 0.270264208316803, + "loss_ib": 0.007237516343593597, + "step": 1599 + }, + { + "ce_ib": 5.069760322570801, + "ce_orig": 0.7474254965782166, + "epoch": 0.45984614278524694, + "kl_loss": 0.2103268802165985, + "loss_ib": 0.007173029240220785, + "step": 1599 + }, + { + "epoch": 0.4601337263642246, + "grad_norm": 0.10566619038581848, + "learning_rate": 9.610437765655522e-06, + "loss": 0.8697, + "step": 1600 + }, + { + "ce_ib": 10.489505767822266, + "ce_orig": 1.6401379108428955, + "epoch": 0.4601337263642246, + "kl_loss": 0.1997671276330948, + "loss_ib": 0.012487176805734634, + "step": 1600 + }, + { + "ce_ib": 3.287750720977783, + "ce_orig": 0.6354742050170898, + "epoch": 0.4601337263642246, + "kl_loss": 0.2241269201040268, + "loss_ib": 0.005529019515961409, + "step": 1600 + }, + { + "ce_ib": 4.282444000244141, + "ce_orig": 0.4484873116016388, + "epoch": 0.4601337263642246, + "kl_loss": 0.3232642710208893, + "loss_ib": 0.00751508679240942, + "step": 1600 + }, + { + "ce_ib": 7.709043502807617, + "ce_orig": 0.8646803498268127, + "epoch": 0.4601337263642246, + "kl_loss": 0.280630886554718, + "loss_ib": 0.010515352711081505, + "step": 1600 + }, + { + "ce_ib": 6.3130669593811035, + "ce_orig": 1.0754271745681763, + "epoch": 0.46042130994320224, + "kl_loss": 0.24628064036369324, + "loss_ib": 0.008775873109698296, + "step": 1601 + }, + { + "ce_ib": 7.311588287353516, + "ce_orig": 0.664889395236969, + "epoch": 0.46042130994320224, + "kl_loss": 0.2610669732093811, + "loss_ib": 0.009922257624566555, + "step": 1601 + }, + { + "ce_ib": 7.912898540496826, + "ce_orig": 1.17232084274292, + "epoch": 0.46042130994320224, + "kl_loss": 0.22218400239944458, + "loss_ib": 0.010134738869965076, + "step": 1601 + }, + { + "ce_ib": 5.320058822631836, + "ce_orig": 0.6333699226379395, + "epoch": 0.46042130994320224, + "kl_loss": 0.18659427762031555, + "loss_ib": 0.007186001166701317, + "step": 1601 + }, + { + "ce_ib": 7.077117443084717, + "ce_orig": 1.322934627532959, + "epoch": 0.46070889352217986, + "kl_loss": 0.22655072808265686, + "loss_ib": 0.009342624805867672, + "step": 1602 + }, + { + "ce_ib": 8.87541389465332, + "ce_orig": 1.5171864032745361, + "epoch": 0.46070889352217986, + "kl_loss": 0.2778520882129669, + "loss_ib": 0.011653934605419636, + "step": 1602 + }, + { + "ce_ib": 4.750857353210449, + "ce_orig": 0.9474871754646301, + "epoch": 0.46070889352217986, + "kl_loss": 0.19248110055923462, + "loss_ib": 0.0066756680607795715, + "step": 1602 + }, + { + "ce_ib": 0.7055354714393616, + "ce_orig": 0.09127622097730637, + "epoch": 0.46070889352217986, + "kl_loss": 0.49646514654159546, + "loss_ib": 0.005670186597853899, + "step": 1602 + }, + { + "ce_ib": 4.906747341156006, + "ce_orig": 0.6237359046936035, + "epoch": 0.46099647710115754, + "kl_loss": 0.3591931462287903, + "loss_ib": 0.008498678915202618, + "step": 1603 + }, + { + "ce_ib": 5.325519561767578, + "ce_orig": 0.8479997515678406, + "epoch": 0.46099647710115754, + "kl_loss": 0.21136073768138885, + "loss_ib": 0.00743912672623992, + "step": 1603 + }, + { + "ce_ib": 7.966558933258057, + "ce_orig": 0.49447599053382874, + "epoch": 0.46099647710115754, + "kl_loss": 0.23808987438678741, + "loss_ib": 0.010347457602620125, + "step": 1603 + }, + { + "ce_ib": 5.689008712768555, + "ce_orig": 0.8724644184112549, + "epoch": 0.46099647710115754, + "kl_loss": 0.32918137311935425, + "loss_ib": 0.008980822749435902, + "step": 1603 + }, + { + "ce_ib": 5.284266471862793, + "ce_orig": 0.8119617104530334, + "epoch": 0.46128406068013517, + "kl_loss": 0.22637273371219635, + "loss_ib": 0.007547993678599596, + "step": 1604 + }, + { + "ce_ib": 7.932982444763184, + "ce_orig": 1.1257801055908203, + "epoch": 0.46128406068013517, + "kl_loss": 0.33682769536972046, + "loss_ib": 0.011301259510219097, + "step": 1604 + }, + { + "ce_ib": 6.964417934417725, + "ce_orig": 0.6016362309455872, + "epoch": 0.46128406068013517, + "kl_loss": 0.7285957336425781, + "loss_ib": 0.014250376261770725, + "step": 1604 + }, + { + "ce_ib": 10.399383544921875, + "ce_orig": 1.1091629266738892, + "epoch": 0.46128406068013517, + "kl_loss": 0.3269003927707672, + "loss_ib": 0.01366838626563549, + "step": 1604 + }, + { + "epoch": 0.4615716442591128, + "grad_norm": 0.11775634437799454, + "learning_rate": 9.607428908505078e-06, + "loss": 0.871, + "step": 1605 + }, + { + "ce_ib": 7.806272983551025, + "ce_orig": 1.1525717973709106, + "epoch": 0.4615716442591128, + "kl_loss": 0.24559307098388672, + "loss_ib": 0.01026220340281725, + "step": 1605 + }, + { + "ce_ib": 9.258004188537598, + "ce_orig": 0.9235900044441223, + "epoch": 0.4615716442591128, + "kl_loss": 0.21653199195861816, + "loss_ib": 0.01142332423478365, + "step": 1605 + }, + { + "ce_ib": 3.807772397994995, + "ce_orig": 0.501733124256134, + "epoch": 0.4615716442591128, + "kl_loss": 0.21480196714401245, + "loss_ib": 0.005955792032182217, + "step": 1605 + }, + { + "ce_ib": 3.2948741912841797, + "ce_orig": 0.24705025553703308, + "epoch": 0.4615716442591128, + "kl_loss": 0.2047540843486786, + "loss_ib": 0.005342415068298578, + "step": 1605 + }, + { + "ce_ib": 8.07645034790039, + "ce_orig": 0.9673423171043396, + "epoch": 0.46185922783809047, + "kl_loss": 0.18127772212028503, + "loss_ib": 0.009889227338135242, + "step": 1606 + }, + { + "ce_ib": 2.4459357261657715, + "ce_orig": 0.5707719326019287, + "epoch": 0.46185922783809047, + "kl_loss": 0.23185060918331146, + "loss_ib": 0.00476444186642766, + "step": 1606 + }, + { + "ce_ib": 6.811293125152588, + "ce_orig": 0.8880608677864075, + "epoch": 0.46185922783809047, + "kl_loss": 0.2558295428752899, + "loss_ib": 0.00936958845704794, + "step": 1606 + }, + { + "ce_ib": 6.341538429260254, + "ce_orig": 0.83788001537323, + "epoch": 0.46185922783809047, + "kl_loss": 0.2810502052307129, + "loss_ib": 0.009152039885520935, + "step": 1606 + }, + { + "ce_ib": 6.138156890869141, + "ce_orig": 1.0111743211746216, + "epoch": 0.4621468114170681, + "kl_loss": 0.17590323090553284, + "loss_ib": 0.007897189818322659, + "step": 1607 + }, + { + "ce_ib": 5.954296112060547, + "ce_orig": 1.0918947458267212, + "epoch": 0.4621468114170681, + "kl_loss": 0.18631017208099365, + "loss_ib": 0.007817397825419903, + "step": 1607 + }, + { + "ce_ib": 2.8839921951293945, + "ce_orig": 0.4976504147052765, + "epoch": 0.4621468114170681, + "kl_loss": 0.19970375299453735, + "loss_ib": 0.004881029482930899, + "step": 1607 + }, + { + "ce_ib": 6.565066337585449, + "ce_orig": 1.105733036994934, + "epoch": 0.4621468114170681, + "kl_loss": 0.20072929561138153, + "loss_ib": 0.008572359569370747, + "step": 1607 + }, + { + "ce_ib": 4.914196968078613, + "ce_orig": 0.40476492047309875, + "epoch": 0.4624343949960457, + "kl_loss": 0.38428157567977905, + "loss_ib": 0.008757012896239758, + "step": 1608 + }, + { + "ce_ib": 5.936429500579834, + "ce_orig": 0.7984280586242676, + "epoch": 0.4624343949960457, + "kl_loss": 0.31145086884498596, + "loss_ib": 0.009050937369465828, + "step": 1608 + }, + { + "ce_ib": 4.807775497436523, + "ce_orig": 0.776930034160614, + "epoch": 0.4624343949960457, + "kl_loss": 0.18399487435817719, + "loss_ib": 0.006647724192589521, + "step": 1608 + }, + { + "ce_ib": 5.2376322746276855, + "ce_orig": 0.9014029502868652, + "epoch": 0.4624343949960457, + "kl_loss": 0.28456035256385803, + "loss_ib": 0.008083236403763294, + "step": 1608 + }, + { + "ce_ib": 5.227330684661865, + "ce_orig": 0.7392305135726929, + "epoch": 0.46272197857502334, + "kl_loss": 0.26967063546180725, + "loss_ib": 0.007924037054181099, + "step": 1609 + }, + { + "ce_ib": 2.673739194869995, + "ce_orig": 0.509719729423523, + "epoch": 0.46272197857502334, + "kl_loss": 0.16655415296554565, + "loss_ib": 0.004339280538260937, + "step": 1609 + }, + { + "ce_ib": 5.3345046043396, + "ce_orig": 0.8803150653839111, + "epoch": 0.46272197857502334, + "kl_loss": 0.2357148826122284, + "loss_ib": 0.007691653911024332, + "step": 1609 + }, + { + "ce_ib": 6.286981582641602, + "ce_orig": 0.9555469155311584, + "epoch": 0.46272197857502334, + "kl_loss": 0.19689792394638062, + "loss_ib": 0.008255960419774055, + "step": 1609 + }, + { + "epoch": 0.463009562154001, + "grad_norm": 0.10211392492055893, + "learning_rate": 9.604408950988988e-06, + "loss": 0.8694, + "step": 1610 + }, + { + "ce_ib": 6.750776767730713, + "ce_orig": 0.6942508220672607, + "epoch": 0.463009562154001, + "kl_loss": 0.315233051776886, + "loss_ib": 0.009903106838464737, + "step": 1610 + }, + { + "ce_ib": 7.861863613128662, + "ce_orig": 1.380213737487793, + "epoch": 0.463009562154001, + "kl_loss": 0.2642974257469177, + "loss_ib": 0.010504838079214096, + "step": 1610 + }, + { + "ce_ib": 6.023126125335693, + "ce_orig": 0.9096881151199341, + "epoch": 0.463009562154001, + "kl_loss": 0.2661204934120178, + "loss_ib": 0.008684330619871616, + "step": 1610 + }, + { + "ce_ib": 9.524619102478027, + "ce_orig": 1.7194697856903076, + "epoch": 0.463009562154001, + "kl_loss": 0.2726157307624817, + "loss_ib": 0.012250776402652264, + "step": 1610 + }, + { + "ce_ib": 1.833953857421875, + "ce_orig": 0.2571110129356384, + "epoch": 0.46329714573297864, + "kl_loss": 0.5162818431854248, + "loss_ib": 0.00699677225202322, + "step": 1611 + }, + { + "ce_ib": 7.046572208404541, + "ce_orig": 1.1004294157028198, + "epoch": 0.46329714573297864, + "kl_loss": 0.2600012719631195, + "loss_ib": 0.009646585211157799, + "step": 1611 + }, + { + "ce_ib": 6.907512187957764, + "ce_orig": 1.0553163290023804, + "epoch": 0.46329714573297864, + "kl_loss": 0.3212874233722687, + "loss_ib": 0.010120387189090252, + "step": 1611 + }, + { + "ce_ib": 7.378001689910889, + "ce_orig": 1.1235740184783936, + "epoch": 0.46329714573297864, + "kl_loss": 0.3305365741252899, + "loss_ib": 0.010683367028832436, + "step": 1611 + }, + { + "ce_ib": 5.189960479736328, + "ce_orig": 0.7845567464828491, + "epoch": 0.46358472931195627, + "kl_loss": 0.20006737112998962, + "loss_ib": 0.0071906340308487415, + "step": 1612 + }, + { + "ce_ib": 6.913703441619873, + "ce_orig": 0.7184546589851379, + "epoch": 0.46358472931195627, + "kl_loss": 0.3024430572986603, + "loss_ib": 0.009938133880496025, + "step": 1612 + }, + { + "ce_ib": 7.071518421173096, + "ce_orig": 1.0965911149978638, + "epoch": 0.46358472931195627, + "kl_loss": 0.2570381760597229, + "loss_ib": 0.009641899727284908, + "step": 1612 + }, + { + "ce_ib": 4.9828410148620605, + "ce_orig": 0.7130359411239624, + "epoch": 0.46358472931195627, + "kl_loss": 0.20155639946460724, + "loss_ib": 0.006998404860496521, + "step": 1612 + }, + { + "ce_ib": 3.570085287094116, + "ce_orig": 0.6303805708885193, + "epoch": 0.46387231289093395, + "kl_loss": 0.15140435099601746, + "loss_ib": 0.005084129050374031, + "step": 1613 + }, + { + "ce_ib": 5.76014518737793, + "ce_orig": 0.6615375280380249, + "epoch": 0.46387231289093395, + "kl_loss": 0.2971652150154114, + "loss_ib": 0.008731797337532043, + "step": 1613 + }, + { + "ce_ib": 7.161432266235352, + "ce_orig": 1.0736418962478638, + "epoch": 0.46387231289093395, + "kl_loss": 0.24368785321712494, + "loss_ib": 0.00959831103682518, + "step": 1613 + }, + { + "ce_ib": 4.258911609649658, + "ce_orig": 0.6454308032989502, + "epoch": 0.46387231289093395, + "kl_loss": 0.22261682152748108, + "loss_ib": 0.006485079415142536, + "step": 1613 + }, + { + "ce_ib": 4.7470855712890625, + "ce_orig": 0.6598400473594666, + "epoch": 0.46415989646991157, + "kl_loss": 0.2554096579551697, + "loss_ib": 0.007301182020455599, + "step": 1614 + }, + { + "ce_ib": 4.303165435791016, + "ce_orig": 0.23601269721984863, + "epoch": 0.46415989646991157, + "kl_loss": 0.22935321927070618, + "loss_ib": 0.006596697494387627, + "step": 1614 + }, + { + "ce_ib": 4.91155481338501, + "ce_orig": 0.5904660820960999, + "epoch": 0.46415989646991157, + "kl_loss": 0.2633596658706665, + "loss_ib": 0.007545151747763157, + "step": 1614 + }, + { + "ce_ib": 9.601012229919434, + "ce_orig": 1.771711826324463, + "epoch": 0.46415989646991157, + "kl_loss": 0.24031728506088257, + "loss_ib": 0.012004184536635876, + "step": 1614 + }, + { + "epoch": 0.4644474800488892, + "grad_norm": 0.10622972995042801, + "learning_rate": 9.601377900383029e-06, + "loss": 0.8348, + "step": 1615 + }, + { + "ce_ib": 7.197417259216309, + "ce_orig": 0.7182507514953613, + "epoch": 0.4644474800488892, + "kl_loss": 0.35040098428726196, + "loss_ib": 0.010701427236199379, + "step": 1615 + }, + { + "ce_ib": 10.0778169631958, + "ce_orig": 1.1566050052642822, + "epoch": 0.4644474800488892, + "kl_loss": 0.22660639882087708, + "loss_ib": 0.012343880720436573, + "step": 1615 + }, + { + "ce_ib": 9.881003379821777, + "ce_orig": 1.7606111764907837, + "epoch": 0.4644474800488892, + "kl_loss": 0.265298992395401, + "loss_ib": 0.01253399346023798, + "step": 1615 + }, + { + "ce_ib": 6.909655570983887, + "ce_orig": 0.8872973322868347, + "epoch": 0.4644474800488892, + "kl_loss": 0.5289936065673828, + "loss_ib": 0.012199592776596546, + "step": 1615 + }, + { + "ce_ib": 8.272558212280273, + "ce_orig": 1.5352343320846558, + "epoch": 0.4647350636278669, + "kl_loss": 0.28025469183921814, + "loss_ib": 0.011075105518102646, + "step": 1616 + }, + { + "ce_ib": 4.740227222442627, + "ce_orig": 0.7010709643363953, + "epoch": 0.4647350636278669, + "kl_loss": 0.21988792717456818, + "loss_ib": 0.006939106620848179, + "step": 1616 + }, + { + "ce_ib": 2.735738754272461, + "ce_orig": 0.2933582663536072, + "epoch": 0.4647350636278669, + "kl_loss": 0.1642613708972931, + "loss_ib": 0.00437835231423378, + "step": 1616 + }, + { + "ce_ib": 9.15244197845459, + "ce_orig": 1.5223807096481323, + "epoch": 0.4647350636278669, + "kl_loss": 0.2403932809829712, + "loss_ib": 0.011556374840438366, + "step": 1616 + }, + { + "ce_ib": 5.430967807769775, + "ce_orig": 0.7502396702766418, + "epoch": 0.4650226472068445, + "kl_loss": 0.23676195740699768, + "loss_ib": 0.007798586972057819, + "step": 1617 + }, + { + "ce_ib": 7.760725975036621, + "ce_orig": 1.305103063583374, + "epoch": 0.4650226472068445, + "kl_loss": 0.19089996814727783, + "loss_ib": 0.00966972578316927, + "step": 1617 + }, + { + "ce_ib": 5.95468282699585, + "ce_orig": 0.969433069229126, + "epoch": 0.4650226472068445, + "kl_loss": 0.2655257284641266, + "loss_ib": 0.008609939366579056, + "step": 1617 + }, + { + "ce_ib": 4.386993885040283, + "ce_orig": 0.6455299258232117, + "epoch": 0.4650226472068445, + "kl_loss": 0.13578274846076965, + "loss_ib": 0.0057448213919997215, + "step": 1617 + }, + { + "ce_ib": 6.207513809204102, + "ce_orig": 0.927709698677063, + "epoch": 0.4653102307858221, + "kl_loss": 0.23612076044082642, + "loss_ib": 0.008568720892071724, + "step": 1618 + }, + { + "ce_ib": 6.073016166687012, + "ce_orig": 0.61612868309021, + "epoch": 0.4653102307858221, + "kl_loss": 0.23253057897090912, + "loss_ib": 0.008398322388529778, + "step": 1618 + }, + { + "ce_ib": 4.8698554039001465, + "ce_orig": 0.9121537208557129, + "epoch": 0.4653102307858221, + "kl_loss": 0.19702599942684174, + "loss_ib": 0.006840114947408438, + "step": 1618 + }, + { + "ce_ib": 3.864777088165283, + "ce_orig": 0.48421528935432434, + "epoch": 0.4653102307858221, + "kl_loss": 0.20810124278068542, + "loss_ib": 0.005945789627730846, + "step": 1618 + }, + { + "ce_ib": 6.4403395652771, + "ce_orig": 1.0684853792190552, + "epoch": 0.46559781436479974, + "kl_loss": 0.2469189614057541, + "loss_ib": 0.008909529075026512, + "step": 1619 + }, + { + "ce_ib": 2.098714828491211, + "ce_orig": 0.18352091312408447, + "epoch": 0.46559781436479974, + "kl_loss": 0.27719706296920776, + "loss_ib": 0.0048706852830946445, + "step": 1619 + }, + { + "ce_ib": 3.6879613399505615, + "ce_orig": 0.6930275559425354, + "epoch": 0.46559781436479974, + "kl_loss": 0.2174752801656723, + "loss_ib": 0.0058627137914299965, + "step": 1619 + }, + { + "ce_ib": 3.930967092514038, + "ce_orig": 0.7239459156990051, + "epoch": 0.46559781436479974, + "kl_loss": 0.23733000457286835, + "loss_ib": 0.0063042668625712395, + "step": 1619 + }, + { + "epoch": 0.4658853979437774, + "grad_norm": 0.12069246172904968, + "learning_rate": 9.598335763989703e-06, + "loss": 0.8468, + "step": 1620 + }, + { + "ce_ib": 2.3631093502044678, + "ce_orig": 0.5599596500396729, + "epoch": 0.4658853979437774, + "kl_loss": 0.1640537679195404, + "loss_ib": 0.004003646783530712, + "step": 1620 + }, + { + "ce_ib": 6.95280122756958, + "ce_orig": 1.01499605178833, + "epoch": 0.4658853979437774, + "kl_loss": 0.2521277070045471, + "loss_ib": 0.009474078193306923, + "step": 1620 + }, + { + "ce_ib": 9.791449546813965, + "ce_orig": 1.662266731262207, + "epoch": 0.4658853979437774, + "kl_loss": 0.22838973999023438, + "loss_ib": 0.012075347825884819, + "step": 1620 + }, + { + "ce_ib": 3.9279820919036865, + "ce_orig": 0.4412092864513397, + "epoch": 0.4658853979437774, + "kl_loss": 0.16607823967933655, + "loss_ib": 0.005588764324784279, + "step": 1620 + }, + { + "ce_ib": 8.574518203735352, + "ce_orig": 1.3736652135849, + "epoch": 0.46617298152275505, + "kl_loss": 0.33889293670654297, + "loss_ib": 0.011963448487222195, + "step": 1621 + }, + { + "ce_ib": 3.510213851928711, + "ce_orig": 0.6036752462387085, + "epoch": 0.46617298152275505, + "kl_loss": 0.2927241921424866, + "loss_ib": 0.006437455303966999, + "step": 1621 + }, + { + "ce_ib": 5.375370502471924, + "ce_orig": 0.6719651222229004, + "epoch": 0.46617298152275505, + "kl_loss": 0.2255745232105255, + "loss_ib": 0.007631115615367889, + "step": 1621 + }, + { + "ce_ib": 3.7908592224121094, + "ce_orig": 0.47120845317840576, + "epoch": 0.46617298152275505, + "kl_loss": 0.1993405669927597, + "loss_ib": 0.005784264765679836, + "step": 1621 + }, + { + "ce_ib": 3.3675405979156494, + "ce_orig": 0.37180203199386597, + "epoch": 0.46646056510173267, + "kl_loss": 0.194191575050354, + "loss_ib": 0.005309456493705511, + "step": 1622 + }, + { + "ce_ib": 4.649647235870361, + "ce_orig": 0.5024625658988953, + "epoch": 0.46646056510173267, + "kl_loss": 0.23426902294158936, + "loss_ib": 0.0069923377595841885, + "step": 1622 + }, + { + "ce_ib": 5.274606704711914, + "ce_orig": 0.6442150473594666, + "epoch": 0.46646056510173267, + "kl_loss": 0.24061354994773865, + "loss_ib": 0.007680742535740137, + "step": 1622 + }, + { + "ce_ib": 7.837526321411133, + "ce_orig": 1.271608829498291, + "epoch": 0.46646056510173267, + "kl_loss": 0.3615526258945465, + "loss_ib": 0.011453052051365376, + "step": 1622 + }, + { + "ce_ib": 4.621031761169434, + "ce_orig": 0.7775217294692993, + "epoch": 0.46674814868071035, + "kl_loss": 0.18649159371852875, + "loss_ib": 0.006485947873443365, + "step": 1623 + }, + { + "ce_ib": 4.2601518630981445, + "ce_orig": 0.8415741920471191, + "epoch": 0.46674814868071035, + "kl_loss": 0.20816299319267273, + "loss_ib": 0.0063417814671993256, + "step": 1623 + }, + { + "ce_ib": 7.340706825256348, + "ce_orig": 1.174594521522522, + "epoch": 0.46674814868071035, + "kl_loss": 0.1925428807735443, + "loss_ib": 0.009266135282814503, + "step": 1623 + }, + { + "ce_ib": 4.316766262054443, + "ce_orig": 0.9118759632110596, + "epoch": 0.46674814868071035, + "kl_loss": 0.13632294535636902, + "loss_ib": 0.0056799957528710365, + "step": 1623 + }, + { + "ce_ib": 6.24122428894043, + "ce_orig": 1.3776178359985352, + "epoch": 0.467035732259688, + "kl_loss": 0.3462154269218445, + "loss_ib": 0.009703378193080425, + "step": 1624 + }, + { + "ce_ib": 6.74078369140625, + "ce_orig": 0.6831387877464294, + "epoch": 0.467035732259688, + "kl_loss": 0.30415505170822144, + "loss_ib": 0.009782334789633751, + "step": 1624 + }, + { + "ce_ib": 5.621586322784424, + "ce_orig": 1.0059772729873657, + "epoch": 0.467035732259688, + "kl_loss": 0.2930832505226135, + "loss_ib": 0.008552419021725655, + "step": 1624 + }, + { + "ce_ib": 3.5580859184265137, + "ce_orig": 0.3204123079776764, + "epoch": 0.467035732259688, + "kl_loss": 0.3000769019126892, + "loss_ib": 0.006558854598551989, + "step": 1624 + }, + { + "epoch": 0.4673233158386656, + "grad_norm": 0.12965475022792816, + "learning_rate": 9.595282549138228e-06, + "loss": 0.947, + "step": 1625 + }, + { + "ce_ib": 8.029479026794434, + "ce_orig": 1.0885497331619263, + "epoch": 0.4673233158386656, + "kl_loss": 0.2798893451690674, + "loss_ib": 0.010828373022377491, + "step": 1625 + }, + { + "ce_ib": 5.572749137878418, + "ce_orig": 0.9018054604530334, + "epoch": 0.4673233158386656, + "kl_loss": 0.49554967880249023, + "loss_ib": 0.010528245940804482, + "step": 1625 + }, + { + "ce_ib": 8.166579246520996, + "ce_orig": 0.8004129528999329, + "epoch": 0.4673233158386656, + "kl_loss": 0.2181631475687027, + "loss_ib": 0.010348211042582989, + "step": 1625 + }, + { + "ce_ib": 5.353193759918213, + "ce_orig": 1.0144050121307373, + "epoch": 0.4673233158386656, + "kl_loss": 0.1349445879459381, + "loss_ib": 0.00670264009386301, + "step": 1625 + }, + { + "ce_ib": 4.509777069091797, + "ce_orig": 0.5489388704299927, + "epoch": 0.4676108994176433, + "kl_loss": 0.22875724732875824, + "loss_ib": 0.006797349080443382, + "step": 1626 + }, + { + "ce_ib": 6.12491512298584, + "ce_orig": 0.7085449695587158, + "epoch": 0.4676108994176433, + "kl_loss": 0.2517673969268799, + "loss_ib": 0.008642589673399925, + "step": 1626 + }, + { + "ce_ib": 4.152883529663086, + "ce_orig": 0.6175920963287354, + "epoch": 0.4676108994176433, + "kl_loss": 0.18230682611465454, + "loss_ib": 0.005975951906293631, + "step": 1626 + }, + { + "ce_ib": 1.8417199850082397, + "ce_orig": 0.4287397265434265, + "epoch": 0.4676108994176433, + "kl_loss": 0.1650610864162445, + "loss_ib": 0.003492330899462104, + "step": 1626 + }, + { + "ce_ib": 9.31445026397705, + "ce_orig": 1.1101757287979126, + "epoch": 0.4678984829966209, + "kl_loss": 0.23538920283317566, + "loss_ib": 0.011668342165648937, + "step": 1627 + }, + { + "ce_ib": 6.0186262130737305, + "ce_orig": 0.6789668798446655, + "epoch": 0.4678984829966209, + "kl_loss": 0.31773999333381653, + "loss_ib": 0.009196026250720024, + "step": 1627 + }, + { + "ce_ib": 5.332401752471924, + "ce_orig": 0.7318709492683411, + "epoch": 0.4678984829966209, + "kl_loss": 0.21486830711364746, + "loss_ib": 0.007481084670871496, + "step": 1627 + }, + { + "ce_ib": 7.069350242614746, + "ce_orig": 1.3702043294906616, + "epoch": 0.4678984829966209, + "kl_loss": 0.18601363897323608, + "loss_ib": 0.008929486386477947, + "step": 1627 + }, + { + "ce_ib": 4.144545555114746, + "ce_orig": 0.7570605278015137, + "epoch": 0.4681860665755985, + "kl_loss": 0.22682487964630127, + "loss_ib": 0.0064127943478524685, + "step": 1628 + }, + { + "ce_ib": 4.975637912750244, + "ce_orig": 0.9772812128067017, + "epoch": 0.4681860665755985, + "kl_loss": 0.2234506607055664, + "loss_ib": 0.0072101447731256485, + "step": 1628 + }, + { + "ce_ib": 4.926833152770996, + "ce_orig": 0.8578603863716125, + "epoch": 0.4681860665755985, + "kl_loss": 0.13924984633922577, + "loss_ib": 0.006319331470876932, + "step": 1628 + }, + { + "ce_ib": 7.390744209289551, + "ce_orig": 0.7690408229827881, + "epoch": 0.4681860665755985, + "kl_loss": 0.20541280508041382, + "loss_ib": 0.009444871917366982, + "step": 1628 + }, + { + "ce_ib": 3.5227086544036865, + "ce_orig": 0.622070848941803, + "epoch": 0.46847365015457615, + "kl_loss": 0.2001567929983139, + "loss_ib": 0.005524276290088892, + "step": 1629 + }, + { + "ce_ib": 9.255240440368652, + "ce_orig": 0.9039395451545715, + "epoch": 0.46847365015457615, + "kl_loss": 0.2905845642089844, + "loss_ib": 0.01216108538210392, + "step": 1629 + }, + { + "ce_ib": 6.1646199226379395, + "ce_orig": 0.6998523473739624, + "epoch": 0.46847365015457615, + "kl_loss": 0.28400886058807373, + "loss_ib": 0.009004708379507065, + "step": 1629 + }, + { + "ce_ib": 6.819250583648682, + "ce_orig": 0.3863005042076111, + "epoch": 0.46847365015457615, + "kl_loss": 0.2553757429122925, + "loss_ib": 0.009373007342219353, + "step": 1629 + }, + { + "epoch": 0.46876123373355383, + "grad_norm": 0.10917191207408905, + "learning_rate": 9.592218263184503e-06, + "loss": 0.8765, + "step": 1630 + }, + { + "ce_ib": 4.765841484069824, + "ce_orig": 0.589921772480011, + "epoch": 0.46876123373355383, + "kl_loss": 0.24076642096042633, + "loss_ib": 0.007173506077378988, + "step": 1630 + }, + { + "ce_ib": 4.582233905792236, + "ce_orig": 0.9084943532943726, + "epoch": 0.46876123373355383, + "kl_loss": 0.3679146468639374, + "loss_ib": 0.008261379785835743, + "step": 1630 + }, + { + "ce_ib": 7.527477264404297, + "ce_orig": 1.0047513246536255, + "epoch": 0.46876123373355383, + "kl_loss": 0.3431330621242523, + "loss_ib": 0.010958807542920113, + "step": 1630 + }, + { + "ce_ib": 2.3991539478302, + "ce_orig": 0.48176831007003784, + "epoch": 0.46876123373355383, + "kl_loss": 0.15134502947330475, + "loss_ib": 0.003912604413926601, + "step": 1630 + }, + { + "ce_ib": 9.214207649230957, + "ce_orig": 1.4497013092041016, + "epoch": 0.46904881731253145, + "kl_loss": 0.24560272693634033, + "loss_ib": 0.011670233681797981, + "step": 1631 + }, + { + "ce_ib": 7.9614362716674805, + "ce_orig": 1.3710211515426636, + "epoch": 0.46904881731253145, + "kl_loss": 0.17029419541358948, + "loss_ib": 0.009664378128945827, + "step": 1631 + }, + { + "ce_ib": 9.119590759277344, + "ce_orig": 1.3142935037612915, + "epoch": 0.46904881731253145, + "kl_loss": 0.24297848343849182, + "loss_ib": 0.01154937595129013, + "step": 1631 + }, + { + "ce_ib": 5.7404866218566895, + "ce_orig": 0.7808132171630859, + "epoch": 0.46904881731253145, + "kl_loss": 0.3067057132720947, + "loss_ib": 0.00880754366517067, + "step": 1631 + }, + { + "ce_ib": 4.927828311920166, + "ce_orig": 0.8637658357620239, + "epoch": 0.4693364008915091, + "kl_loss": 0.23912517726421356, + "loss_ib": 0.007319080177694559, + "step": 1632 + }, + { + "ce_ib": 6.485845565795898, + "ce_orig": 0.46057918667793274, + "epoch": 0.4693364008915091, + "kl_loss": 0.304848313331604, + "loss_ib": 0.009534328244626522, + "step": 1632 + }, + { + "ce_ib": 4.575008869171143, + "ce_orig": 0.5714587569236755, + "epoch": 0.4693364008915091, + "kl_loss": 0.280214786529541, + "loss_ib": 0.007377156987786293, + "step": 1632 + }, + { + "ce_ib": 5.922549724578857, + "ce_orig": 0.9120684862136841, + "epoch": 0.4693364008915091, + "kl_loss": 0.3095513880252838, + "loss_ib": 0.00901806354522705, + "step": 1632 + }, + { + "ce_ib": 7.879459857940674, + "ce_orig": 1.2214351892471313, + "epoch": 0.46962398447048675, + "kl_loss": 0.278079092502594, + "loss_ib": 0.01066024973988533, + "step": 1633 + }, + { + "ce_ib": 7.259771823883057, + "ce_orig": 1.1840142011642456, + "epoch": 0.46962398447048675, + "kl_loss": 0.23207473754882812, + "loss_ib": 0.009580519050359726, + "step": 1633 + }, + { + "ce_ib": 8.346108436584473, + "ce_orig": 0.706606924533844, + "epoch": 0.46962398447048675, + "kl_loss": 0.16712577641010284, + "loss_ib": 0.010017366148531437, + "step": 1633 + }, + { + "ce_ib": 8.588577270507812, + "ce_orig": 1.0711778402328491, + "epoch": 0.46962398447048675, + "kl_loss": 0.19195330142974854, + "loss_ib": 0.010508110746741295, + "step": 1633 + }, + { + "ce_ib": 5.0715813636779785, + "ce_orig": 0.8741798996925354, + "epoch": 0.4699115680494644, + "kl_loss": 0.2602311968803406, + "loss_ib": 0.007673893589526415, + "step": 1634 + }, + { + "ce_ib": 7.203498363494873, + "ce_orig": 0.7638635635375977, + "epoch": 0.4699115680494644, + "kl_loss": 0.1899445801973343, + "loss_ib": 0.009102944284677505, + "step": 1634 + }, + { + "ce_ib": 7.463857650756836, + "ce_orig": 1.3012150526046753, + "epoch": 0.4699115680494644, + "kl_loss": 0.2831643521785736, + "loss_ib": 0.010295500978827477, + "step": 1634 + }, + { + "ce_ib": 7.1105146408081055, + "ce_orig": 0.9453640580177307, + "epoch": 0.4699115680494644, + "kl_loss": 0.23901736736297607, + "loss_ib": 0.009500687941908836, + "step": 1634 + }, + { + "epoch": 0.470199151628442, + "grad_norm": 0.1111624464392662, + "learning_rate": 9.589142913511104e-06, + "loss": 0.8375, + "step": 1635 + }, + { + "ce_ib": 10.440080642700195, + "ce_orig": 1.3846956491470337, + "epoch": 0.470199151628442, + "kl_loss": 0.2537896931171417, + "loss_ib": 0.01297797728329897, + "step": 1635 + }, + { + "ce_ib": 7.813945770263672, + "ce_orig": 1.048168659210205, + "epoch": 0.470199151628442, + "kl_loss": 0.2586318552494049, + "loss_ib": 0.010400263592600822, + "step": 1635 + }, + { + "ce_ib": 5.204543590545654, + "ce_orig": 0.8099000453948975, + "epoch": 0.470199151628442, + "kl_loss": 0.22041164338588715, + "loss_ib": 0.007408659905195236, + "step": 1635 + }, + { + "ce_ib": 5.3694610595703125, + "ce_orig": 0.9361176490783691, + "epoch": 0.470199151628442, + "kl_loss": 0.2414846271276474, + "loss_ib": 0.0077843074686825275, + "step": 1635 + }, + { + "ce_ib": 4.985073089599609, + "ce_orig": 0.5407580137252808, + "epoch": 0.4704867352074197, + "kl_loss": 0.3416972756385803, + "loss_ib": 0.00840204581618309, + "step": 1636 + }, + { + "ce_ib": 5.113473415374756, + "ce_orig": 0.6522555351257324, + "epoch": 0.4704867352074197, + "kl_loss": 0.29770779609680176, + "loss_ib": 0.008090551011264324, + "step": 1636 + }, + { + "ce_ib": 3.002034902572632, + "ce_orig": 0.5235405564308167, + "epoch": 0.4704867352074197, + "kl_loss": 0.23390766978263855, + "loss_ib": 0.005341111216694117, + "step": 1636 + }, + { + "ce_ib": 4.9306321144104, + "ce_orig": 0.8330354690551758, + "epoch": 0.4704867352074197, + "kl_loss": 0.17335164546966553, + "loss_ib": 0.006664148531854153, + "step": 1636 + }, + { + "ce_ib": 7.28361177444458, + "ce_orig": 1.006595492362976, + "epoch": 0.4707743187863973, + "kl_loss": 0.24205949902534485, + "loss_ib": 0.009704207070171833, + "step": 1637 + }, + { + "ce_ib": 3.2504138946533203, + "ce_orig": 0.44360581040382385, + "epoch": 0.4707743187863973, + "kl_loss": 0.23381365835666656, + "loss_ib": 0.005588550586253405, + "step": 1637 + }, + { + "ce_ib": 4.840280532836914, + "ce_orig": 0.8442293405532837, + "epoch": 0.4707743187863973, + "kl_loss": 0.17008595168590546, + "loss_ib": 0.006541139911860228, + "step": 1637 + }, + { + "ce_ib": 5.317061424255371, + "ce_orig": 1.048091173171997, + "epoch": 0.4707743187863973, + "kl_loss": 0.23069365322589874, + "loss_ib": 0.007623997982591391, + "step": 1637 + }, + { + "ce_ib": 3.422884702682495, + "ce_orig": 0.6781839728355408, + "epoch": 0.47106190236537493, + "kl_loss": 0.13445274531841278, + "loss_ib": 0.004767411854118109, + "step": 1638 + }, + { + "ce_ib": 7.733299732208252, + "ce_orig": 1.2221657037734985, + "epoch": 0.47106190236537493, + "kl_loss": 0.38926810026168823, + "loss_ib": 0.011625980958342552, + "step": 1638 + }, + { + "ce_ib": 6.331749439239502, + "ce_orig": 0.6211827993392944, + "epoch": 0.47106190236537493, + "kl_loss": 0.2626792788505554, + "loss_ib": 0.008958541788160801, + "step": 1638 + }, + { + "ce_ib": 5.28639554977417, + "ce_orig": 0.6637152433395386, + "epoch": 0.47106190236537493, + "kl_loss": 0.2628288269042969, + "loss_ib": 0.007914683781564236, + "step": 1638 + }, + { + "ce_ib": 4.917193412780762, + "ce_orig": 0.8369187712669373, + "epoch": 0.47134948594435255, + "kl_loss": 0.1748519241809845, + "loss_ib": 0.006665712222456932, + "step": 1639 + }, + { + "ce_ib": 6.316074371337891, + "ce_orig": 0.5653436183929443, + "epoch": 0.47134948594435255, + "kl_loss": 0.5525051951408386, + "loss_ib": 0.011841126717627048, + "step": 1639 + }, + { + "ce_ib": 5.5230841636657715, + "ce_orig": 0.85850989818573, + "epoch": 0.47134948594435255, + "kl_loss": 0.21816977858543396, + "loss_ib": 0.007704782299697399, + "step": 1639 + }, + { + "ce_ib": 4.093878746032715, + "ce_orig": 0.6998303532600403, + "epoch": 0.47134948594435255, + "kl_loss": 0.17293649911880493, + "loss_ib": 0.005823243875056505, + "step": 1639 + }, + { + "epoch": 0.47163706952333023, + "grad_norm": 0.13059952855110168, + "learning_rate": 9.586056507527266e-06, + "loss": 0.8683, + "step": 1640 + }, + { + "ce_ib": 3.3075473308563232, + "ce_orig": 0.4052724838256836, + "epoch": 0.47163706952333023, + "kl_loss": 0.3414004445075989, + "loss_ib": 0.006721551064401865, + "step": 1640 + }, + { + "ce_ib": 7.159058570861816, + "ce_orig": 1.193800926208496, + "epoch": 0.47163706952333023, + "kl_loss": 0.2893860936164856, + "loss_ib": 0.010052919387817383, + "step": 1640 + }, + { + "ce_ib": 6.255849838256836, + "ce_orig": 0.9312906265258789, + "epoch": 0.47163706952333023, + "kl_loss": 0.26795995235443115, + "loss_ib": 0.00893544964492321, + "step": 1640 + }, + { + "ce_ib": 5.748259544372559, + "ce_orig": 1.22636878490448, + "epoch": 0.47163706952333023, + "kl_loss": 0.2102559357881546, + "loss_ib": 0.00785081833600998, + "step": 1640 + }, + { + "ce_ib": 0.89692622423172, + "ce_orig": 0.09618912637233734, + "epoch": 0.47192465310230786, + "kl_loss": 0.5119258165359497, + "loss_ib": 0.006016184110194445, + "step": 1641 + }, + { + "ce_ib": 5.4927897453308105, + "ce_orig": 0.5387899279594421, + "epoch": 0.47192465310230786, + "kl_loss": 0.243363618850708, + "loss_ib": 0.007926425896584988, + "step": 1641 + }, + { + "ce_ib": 6.7933573722839355, + "ce_orig": 1.027798056602478, + "epoch": 0.47192465310230786, + "kl_loss": 0.1942419707775116, + "loss_ib": 0.008735776878893375, + "step": 1641 + }, + { + "ce_ib": 5.988297462463379, + "ce_orig": 0.8516644239425659, + "epoch": 0.47192465310230786, + "kl_loss": 0.20200949907302856, + "loss_ib": 0.00800839252769947, + "step": 1641 + }, + { + "ce_ib": 4.71184778213501, + "ce_orig": 1.0416312217712402, + "epoch": 0.4722122366812855, + "kl_loss": 0.19892562925815582, + "loss_ib": 0.006701103877276182, + "step": 1642 + }, + { + "ce_ib": 5.363261699676514, + "ce_orig": 1.0295337438583374, + "epoch": 0.4722122366812855, + "kl_loss": 0.16552218794822693, + "loss_ib": 0.007018483709543943, + "step": 1642 + }, + { + "ce_ib": 6.74136209487915, + "ce_orig": 1.0272431373596191, + "epoch": 0.4722122366812855, + "kl_loss": 0.2746507525444031, + "loss_ib": 0.009487869217991829, + "step": 1642 + }, + { + "ce_ib": 0.9795145392417908, + "ce_orig": 0.1657838374376297, + "epoch": 0.4722122366812855, + "kl_loss": 0.501509964466095, + "loss_ib": 0.005994614213705063, + "step": 1642 + }, + { + "ce_ib": 3.574262857437134, + "ce_orig": 0.35515451431274414, + "epoch": 0.47249982026026316, + "kl_loss": 0.21287068724632263, + "loss_ib": 0.0057029700838029385, + "step": 1643 + }, + { + "ce_ib": 6.737833499908447, + "ce_orig": 0.5578605532646179, + "epoch": 0.47249982026026316, + "kl_loss": 0.32588255405426025, + "loss_ib": 0.009996659122407436, + "step": 1643 + }, + { + "ce_ib": 4.701847076416016, + "ce_orig": 0.7198030352592468, + "epoch": 0.47249982026026316, + "kl_loss": 0.268189013004303, + "loss_ib": 0.007383737247437239, + "step": 1643 + }, + { + "ce_ib": 4.935227870941162, + "ce_orig": 0.9215746521949768, + "epoch": 0.47249982026026316, + "kl_loss": 0.25834494829177856, + "loss_ib": 0.00751867750659585, + "step": 1643 + }, + { + "ce_ib": 5.433006286621094, + "ce_orig": 0.967570424079895, + "epoch": 0.4727874038392408, + "kl_loss": 0.14970257878303528, + "loss_ib": 0.006930031813681126, + "step": 1644 + }, + { + "ce_ib": 5.268426418304443, + "ce_orig": 0.9726541042327881, + "epoch": 0.4727874038392408, + "kl_loss": 0.22781161963939667, + "loss_ib": 0.007546542678028345, + "step": 1644 + }, + { + "ce_ib": 4.189428806304932, + "ce_orig": 0.9564615488052368, + "epoch": 0.4727874038392408, + "kl_loss": 0.2764762341976166, + "loss_ib": 0.006954191252589226, + "step": 1644 + }, + { + "ce_ib": 8.928913116455078, + "ce_orig": 1.3035036325454712, + "epoch": 0.4727874038392408, + "kl_loss": 0.2561219334602356, + "loss_ib": 0.01149013265967369, + "step": 1644 + }, + { + "epoch": 0.4730749874182184, + "grad_norm": 0.11808949708938599, + "learning_rate": 9.582959052668855e-06, + "loss": 0.8916, + "step": 1645 + }, + { + "ce_ib": 4.722048759460449, + "ce_orig": 0.8292427659034729, + "epoch": 0.4730749874182184, + "kl_loss": 0.17961278557777405, + "loss_ib": 0.006518176756799221, + "step": 1645 + }, + { + "ce_ib": 5.358216762542725, + "ce_orig": 0.3747130334377289, + "epoch": 0.4730749874182184, + "kl_loss": 0.37739047408103943, + "loss_ib": 0.009132121689617634, + "step": 1645 + }, + { + "ce_ib": 4.914391994476318, + "ce_orig": 0.3914518356323242, + "epoch": 0.4730749874182184, + "kl_loss": 0.25701606273651123, + "loss_ib": 0.007484552916139364, + "step": 1645 + }, + { + "ce_ib": 6.924988746643066, + "ce_orig": 0.5208780765533447, + "epoch": 0.4730749874182184, + "kl_loss": 0.29026472568511963, + "loss_ib": 0.009827635250985622, + "step": 1645 + }, + { + "ce_ib": 8.15476131439209, + "ce_orig": 1.0385876893997192, + "epoch": 0.4733625709971961, + "kl_loss": 0.21632888913154602, + "loss_ib": 0.010318050161004066, + "step": 1646 + }, + { + "ce_ib": 8.506185531616211, + "ce_orig": 1.3584825992584229, + "epoch": 0.4733625709971961, + "kl_loss": 0.27990996837615967, + "loss_ib": 0.011305284686386585, + "step": 1646 + }, + { + "ce_ib": 9.8707275390625, + "ce_orig": 1.7097861766815186, + "epoch": 0.4733625709971961, + "kl_loss": 0.27105122804641724, + "loss_ib": 0.012581239454448223, + "step": 1646 + }, + { + "ce_ib": 4.385462760925293, + "ce_orig": 0.47088804841041565, + "epoch": 0.4733625709971961, + "kl_loss": 0.30151084065437317, + "loss_ib": 0.0074005709029734135, + "step": 1646 + }, + { + "ce_ib": 3.344881057739258, + "ce_orig": 0.5342389941215515, + "epoch": 0.4736501545761737, + "kl_loss": 0.18429887294769287, + "loss_ib": 0.005187870003283024, + "step": 1647 + }, + { + "ce_ib": 7.1738457679748535, + "ce_orig": 0.8699575066566467, + "epoch": 0.4736501545761737, + "kl_loss": 0.15991726517677307, + "loss_ib": 0.008773017674684525, + "step": 1647 + }, + { + "ce_ib": 8.26055908203125, + "ce_orig": 1.2396870851516724, + "epoch": 0.4736501545761737, + "kl_loss": 0.24188140034675598, + "loss_ib": 0.010679373517632484, + "step": 1647 + }, + { + "ce_ib": 6.468341827392578, + "ce_orig": 1.0458532571792603, + "epoch": 0.4736501545761737, + "kl_loss": 0.46231311559677124, + "loss_ib": 0.011091472581028938, + "step": 1647 + }, + { + "ce_ib": 7.5267863273620605, + "ce_orig": 1.2770994901657104, + "epoch": 0.47393773815515133, + "kl_loss": 0.218851700425148, + "loss_ib": 0.009715302847325802, + "step": 1648 + }, + { + "ce_ib": 4.878725051879883, + "ce_orig": 0.6984403729438782, + "epoch": 0.47393773815515133, + "kl_loss": 0.2023659348487854, + "loss_ib": 0.0069023845717310905, + "step": 1648 + }, + { + "ce_ib": 4.6335601806640625, + "ce_orig": 0.709439754486084, + "epoch": 0.47393773815515133, + "kl_loss": 0.23505522310733795, + "loss_ib": 0.0069841123186051846, + "step": 1648 + }, + { + "ce_ib": 8.300333976745605, + "ce_orig": 0.872704029083252, + "epoch": 0.47393773815515133, + "kl_loss": 0.31134361028671265, + "loss_ib": 0.01141376979649067, + "step": 1648 + }, + { + "ce_ib": 5.34572696685791, + "ce_orig": 0.7818442583084106, + "epoch": 0.47422532173412896, + "kl_loss": 0.23405304551124573, + "loss_ib": 0.007686257362365723, + "step": 1649 + }, + { + "ce_ib": 5.499821186065674, + "ce_orig": 0.7070225477218628, + "epoch": 0.47422532173412896, + "kl_loss": 0.27050989866256714, + "loss_ib": 0.008204920217394829, + "step": 1649 + }, + { + "ce_ib": 7.790607929229736, + "ce_orig": 1.1115490198135376, + "epoch": 0.47422532173412896, + "kl_loss": 0.2872862219810486, + "loss_ib": 0.01066347025334835, + "step": 1649 + }, + { + "ce_ib": 7.456298828125, + "ce_orig": 1.2676522731781006, + "epoch": 0.47422532173412896, + "kl_loss": 0.2790737748146057, + "loss_ib": 0.010247036814689636, + "step": 1649 + }, + { + "epoch": 0.47451290531310664, + "grad_norm": 0.11858896166086197, + "learning_rate": 9.579850556398356e-06, + "loss": 0.8653, + "step": 1650 + }, + { + "ce_ib": 5.404179096221924, + "ce_orig": 1.1686975955963135, + "epoch": 0.47451290531310664, + "kl_loss": 0.17138412594795227, + "loss_ib": 0.007118019741028547, + "step": 1650 + }, + { + "ce_ib": 4.425675392150879, + "ce_orig": 0.6207453012466431, + "epoch": 0.47451290531310664, + "kl_loss": 0.2140074074268341, + "loss_ib": 0.006565749645233154, + "step": 1650 + }, + { + "ce_ib": 4.123245716094971, + "ce_orig": 0.6918128132820129, + "epoch": 0.47451290531310664, + "kl_loss": 0.2605014443397522, + "loss_ib": 0.006728260312229395, + "step": 1650 + }, + { + "ce_ib": 6.735450267791748, + "ce_orig": 0.9332886934280396, + "epoch": 0.47451290531310664, + "kl_loss": 0.26818910241127014, + "loss_ib": 0.009417342022061348, + "step": 1650 + }, + { + "ce_ib": 4.729572296142578, + "ce_orig": 0.7350138425827026, + "epoch": 0.47480048889208426, + "kl_loss": 0.25111934542655945, + "loss_ib": 0.007240765728056431, + "step": 1651 + }, + { + "ce_ib": 4.569364070892334, + "ce_orig": 0.5795966982841492, + "epoch": 0.47480048889208426, + "kl_loss": 0.27044403553009033, + "loss_ib": 0.007273804396390915, + "step": 1651 + }, + { + "ce_ib": 7.339898586273193, + "ce_orig": 1.3115758895874023, + "epoch": 0.47480048889208426, + "kl_loss": 0.22244146466255188, + "loss_ib": 0.009564314037561417, + "step": 1651 + }, + { + "ce_ib": 7.483953475952148, + "ce_orig": 1.1939107179641724, + "epoch": 0.47480048889208426, + "kl_loss": 0.22645403444766998, + "loss_ib": 0.009748494252562523, + "step": 1651 + }, + { + "ce_ib": 5.908069133758545, + "ce_orig": 0.7272500395774841, + "epoch": 0.4750880724710619, + "kl_loss": 0.3213765621185303, + "loss_ib": 0.009121834300458431, + "step": 1652 + }, + { + "ce_ib": 6.445575714111328, + "ce_orig": 0.8850076198577881, + "epoch": 0.4750880724710619, + "kl_loss": 0.17783117294311523, + "loss_ib": 0.008223887532949448, + "step": 1652 + }, + { + "ce_ib": 2.8532485961914062, + "ce_orig": 0.4185572564601898, + "epoch": 0.4750880724710619, + "kl_loss": 0.15768098831176758, + "loss_ib": 0.004430058412253857, + "step": 1652 + }, + { + "ce_ib": 4.459863185882568, + "ce_orig": 0.7344067692756653, + "epoch": 0.4750880724710619, + "kl_loss": 0.2608838677406311, + "loss_ib": 0.007068702019751072, + "step": 1652 + }, + { + "ce_ib": 5.409404754638672, + "ce_orig": 1.1071850061416626, + "epoch": 0.47537565605003956, + "kl_loss": 0.2114146500825882, + "loss_ib": 0.0075235506519675255, + "step": 1653 + }, + { + "ce_ib": 3.9895527362823486, + "ce_orig": 0.7837691903114319, + "epoch": 0.47537565605003956, + "kl_loss": 0.19163039326667786, + "loss_ib": 0.005905856844037771, + "step": 1653 + }, + { + "ce_ib": 5.048002243041992, + "ce_orig": 0.7901415228843689, + "epoch": 0.47537565605003956, + "kl_loss": 0.4744173586368561, + "loss_ib": 0.009792176075279713, + "step": 1653 + }, + { + "ce_ib": 6.277562618255615, + "ce_orig": 0.8317906856536865, + "epoch": 0.47537565605003956, + "kl_loss": 0.18643495440483093, + "loss_ib": 0.008141911588609219, + "step": 1653 + }, + { + "ce_ib": 4.940022945404053, + "ce_orig": 0.8361666202545166, + "epoch": 0.4756632396290172, + "kl_loss": 0.2563755214214325, + "loss_ib": 0.0075037782080471516, + "step": 1654 + }, + { + "ce_ib": 4.22787618637085, + "ce_orig": 0.7344029545783997, + "epoch": 0.4756632396290172, + "kl_loss": 0.26797008514404297, + "loss_ib": 0.006907577160745859, + "step": 1654 + }, + { + "ce_ib": 7.910140514373779, + "ce_orig": 1.2482683658599854, + "epoch": 0.4756632396290172, + "kl_loss": 0.30194056034088135, + "loss_ib": 0.010929546318948269, + "step": 1654 + }, + { + "ce_ib": 5.600918292999268, + "ce_orig": 0.6320188045501709, + "epoch": 0.4756632396290172, + "kl_loss": 0.2211628258228302, + "loss_ib": 0.00781254656612873, + "step": 1654 + }, + { + "epoch": 0.4759508232079948, + "grad_norm": 0.112090565264225, + "learning_rate": 9.576731026204862e-06, + "loss": 0.8784, + "step": 1655 + }, + { + "ce_ib": 6.266970157623291, + "ce_orig": 1.1994843482971191, + "epoch": 0.4759508232079948, + "kl_loss": 0.254923939704895, + "loss_ib": 0.008816209621727467, + "step": 1655 + }, + { + "ce_ib": 7.322546482086182, + "ce_orig": 1.139443278312683, + "epoch": 0.4759508232079948, + "kl_loss": 0.22967414557933807, + "loss_ib": 0.009619288146495819, + "step": 1655 + }, + { + "ce_ib": 5.917881488800049, + "ce_orig": 0.758932888507843, + "epoch": 0.4759508232079948, + "kl_loss": 0.2735050618648529, + "loss_ib": 0.00865293201059103, + "step": 1655 + }, + { + "ce_ib": 5.141608715057373, + "ce_orig": 0.6212088465690613, + "epoch": 0.4759508232079948, + "kl_loss": 0.26703643798828125, + "loss_ib": 0.007811972871422768, + "step": 1655 + }, + { + "ce_ib": 3.660046339035034, + "ce_orig": 0.38183343410491943, + "epoch": 0.4762384067869725, + "kl_loss": 0.2949381172657013, + "loss_ib": 0.006609427742660046, + "step": 1656 + }, + { + "ce_ib": 6.633572578430176, + "ce_orig": 0.861060619354248, + "epoch": 0.4762384067869725, + "kl_loss": 0.2914574444293976, + "loss_ib": 0.009548146277666092, + "step": 1656 + }, + { + "ce_ib": 5.865113735198975, + "ce_orig": 0.838970959186554, + "epoch": 0.4762384067869725, + "kl_loss": 0.2378089725971222, + "loss_ib": 0.008243203163146973, + "step": 1656 + }, + { + "ce_ib": 7.63082218170166, + "ce_orig": 1.2247337102890015, + "epoch": 0.4762384067869725, + "kl_loss": 0.3240877389907837, + "loss_ib": 0.010871698148548603, + "step": 1656 + }, + { + "ce_ib": 3.8568806648254395, + "ce_orig": 0.6943607330322266, + "epoch": 0.4765259903659501, + "kl_loss": 0.17261001467704773, + "loss_ib": 0.005582980811595917, + "step": 1657 + }, + { + "ce_ib": 5.316219806671143, + "ce_orig": 0.7326712608337402, + "epoch": 0.4765259903659501, + "kl_loss": 0.15569522976875305, + "loss_ib": 0.006873172242194414, + "step": 1657 + }, + { + "ce_ib": 4.3332366943359375, + "ce_orig": 0.7443872690200806, + "epoch": 0.4765259903659501, + "kl_loss": 0.2839428186416626, + "loss_ib": 0.007172664627432823, + "step": 1657 + }, + { + "ce_ib": 4.349783420562744, + "ce_orig": 0.38230669498443604, + "epoch": 0.4765259903659501, + "kl_loss": 0.2405998259782791, + "loss_ib": 0.006755781825631857, + "step": 1657 + }, + { + "ce_ib": 4.3664116859436035, + "ce_orig": 0.80488520860672, + "epoch": 0.47681357394492774, + "kl_loss": 0.3269127607345581, + "loss_ib": 0.007635539397597313, + "step": 1658 + }, + { + "ce_ib": 3.9766013622283936, + "ce_orig": 0.6910889744758606, + "epoch": 0.47681357394492774, + "kl_loss": 0.15313193202018738, + "loss_ib": 0.005507920868694782, + "step": 1658 + }, + { + "ce_ib": 4.066466808319092, + "ce_orig": 0.6271889209747314, + "epoch": 0.47681357394492774, + "kl_loss": 0.19948875904083252, + "loss_ib": 0.006061354652047157, + "step": 1658 + }, + { + "ce_ib": 6.327476501464844, + "ce_orig": 0.9295334815979004, + "epoch": 0.47681357394492774, + "kl_loss": 0.18949063122272491, + "loss_ib": 0.008222382515668869, + "step": 1658 + }, + { + "ce_ib": 4.394959926605225, + "ce_orig": 0.3115496337413788, + "epoch": 0.47710115752390536, + "kl_loss": 0.28351879119873047, + "loss_ib": 0.007230148185044527, + "step": 1659 + }, + { + "ce_ib": 6.753210067749023, + "ce_orig": 0.9570003151893616, + "epoch": 0.47710115752390536, + "kl_loss": 0.20350044965744019, + "loss_ib": 0.0087882149964571, + "step": 1659 + }, + { + "ce_ib": 6.190893650054932, + "ce_orig": 0.856682300567627, + "epoch": 0.47710115752390536, + "kl_loss": 0.2722907066345215, + "loss_ib": 0.008913800120353699, + "step": 1659 + }, + { + "ce_ib": 5.620670795440674, + "ce_orig": 0.853740394115448, + "epoch": 0.47710115752390536, + "kl_loss": 0.3125329613685608, + "loss_ib": 0.00874600000679493, + "step": 1659 + }, + { + "epoch": 0.47738874110288304, + "grad_norm": 0.1384882926940918, + "learning_rate": 9.573600469604044e-06, + "loss": 0.8339, + "step": 1660 + }, + { + "ce_ib": 6.315702438354492, + "ce_orig": 0.9741970896720886, + "epoch": 0.47738874110288304, + "kl_loss": 0.5963153839111328, + "loss_ib": 0.012278856709599495, + "step": 1660 + }, + { + "ce_ib": 8.414582252502441, + "ce_orig": 1.3185315132141113, + "epoch": 0.47738874110288304, + "kl_loss": 0.18932093679904938, + "loss_ib": 0.010307792574167252, + "step": 1660 + }, + { + "ce_ib": 6.331785202026367, + "ce_orig": 0.7015150785446167, + "epoch": 0.47738874110288304, + "kl_loss": 0.22453047335147858, + "loss_ib": 0.008577089756727219, + "step": 1660 + }, + { + "ce_ib": 5.603737831115723, + "ce_orig": 0.6692718863487244, + "epoch": 0.47738874110288304, + "kl_loss": 0.1807200163602829, + "loss_ib": 0.007410937920212746, + "step": 1660 + }, + { + "ce_ib": 4.327080249786377, + "ce_orig": 0.6909063458442688, + "epoch": 0.47767632468186066, + "kl_loss": 0.15561500191688538, + "loss_ib": 0.005883229896426201, + "step": 1661 + }, + { + "ce_ib": 4.2858805656433105, + "ce_orig": 0.7942281365394592, + "epoch": 0.47767632468186066, + "kl_loss": 0.1694888025522232, + "loss_ib": 0.005980768706649542, + "step": 1661 + }, + { + "ce_ib": 6.364988803863525, + "ce_orig": 0.889671802520752, + "epoch": 0.47767632468186066, + "kl_loss": 0.43929800391197205, + "loss_ib": 0.010757967829704285, + "step": 1661 + }, + { + "ce_ib": 3.5083861351013184, + "ce_orig": 0.7230427861213684, + "epoch": 0.47767632468186066, + "kl_loss": 0.22810864448547363, + "loss_ib": 0.005789472721517086, + "step": 1661 + }, + { + "ce_ib": 6.556941986083984, + "ce_orig": 0.500135064125061, + "epoch": 0.4779639082608383, + "kl_loss": 0.34359556436538696, + "loss_ib": 0.009992897510528564, + "step": 1662 + }, + { + "ce_ib": 6.858614921569824, + "ce_orig": 1.1202833652496338, + "epoch": 0.4779639082608383, + "kl_loss": 0.28433266282081604, + "loss_ib": 0.009701942093670368, + "step": 1662 + }, + { + "ce_ib": 3.0755460262298584, + "ce_orig": 0.5723063349723816, + "epoch": 0.4779639082608383, + "kl_loss": 0.23363028466701508, + "loss_ib": 0.005411848891526461, + "step": 1662 + }, + { + "ce_ib": 6.072047710418701, + "ce_orig": 0.8839273452758789, + "epoch": 0.4779639082608383, + "kl_loss": 0.3012058734893799, + "loss_ib": 0.009084106422960758, + "step": 1662 + }, + { + "ce_ib": 3.6702189445495605, + "ce_orig": 0.7512197494506836, + "epoch": 0.47825149183981597, + "kl_loss": 0.1513669192790985, + "loss_ib": 0.005183888133615255, + "step": 1663 + }, + { + "ce_ib": 4.374359607696533, + "ce_orig": 0.8027209043502808, + "epoch": 0.47825149183981597, + "kl_loss": 0.18617792427539825, + "loss_ib": 0.006236138753592968, + "step": 1663 + }, + { + "ce_ib": 3.1989543437957764, + "ce_orig": 0.7183689475059509, + "epoch": 0.47825149183981597, + "kl_loss": 0.16047146916389465, + "loss_ib": 0.004803669173270464, + "step": 1663 + }, + { + "ce_ib": 6.37852144241333, + "ce_orig": 1.3415592908859253, + "epoch": 0.47825149183981597, + "kl_loss": 0.18785199522972107, + "loss_ib": 0.00825704075396061, + "step": 1663 + }, + { + "ce_ib": 6.634273529052734, + "ce_orig": 1.201911211013794, + "epoch": 0.4785390754187936, + "kl_loss": 0.20284774899482727, + "loss_ib": 0.008662750944495201, + "step": 1664 + }, + { + "ce_ib": 5.713882923126221, + "ce_orig": 0.7980459332466125, + "epoch": 0.4785390754187936, + "kl_loss": 0.23021364212036133, + "loss_ib": 0.008016019128262997, + "step": 1664 + }, + { + "ce_ib": 5.318065166473389, + "ce_orig": 0.694571316242218, + "epoch": 0.4785390754187936, + "kl_loss": 0.26769188046455383, + "loss_ib": 0.007994984276592731, + "step": 1664 + }, + { + "ce_ib": 3.1290438175201416, + "ce_orig": 0.6283248662948608, + "epoch": 0.4785390754187936, + "kl_loss": 0.17708495259284973, + "loss_ib": 0.004899893421679735, + "step": 1664 + }, + { + "epoch": 0.4788266589977712, + "grad_norm": 0.14190877974033356, + "learning_rate": 9.57045889413814e-06, + "loss": 0.8418, + "step": 1665 + }, + { + "ce_ib": 4.5773468017578125, + "ce_orig": 0.563421368598938, + "epoch": 0.4788266589977712, + "kl_loss": 0.25328588485717773, + "loss_ib": 0.007110205944627523, + "step": 1665 + }, + { + "ce_ib": 4.391364574432373, + "ce_orig": 0.7508824467658997, + "epoch": 0.4788266589977712, + "kl_loss": 0.1749100238084793, + "loss_ib": 0.0061404649168252945, + "step": 1665 + }, + { + "ce_ib": 7.601984024047852, + "ce_orig": 1.1138851642608643, + "epoch": 0.4788266589977712, + "kl_loss": 0.16526943445205688, + "loss_ib": 0.009254678152501583, + "step": 1665 + }, + { + "ce_ib": 4.717142581939697, + "ce_orig": 0.9921720027923584, + "epoch": 0.4788266589977712, + "kl_loss": 0.21343350410461426, + "loss_ib": 0.006851477548480034, + "step": 1665 + }, + { + "ce_ib": 3.0098533630371094, + "ce_orig": 0.600035548210144, + "epoch": 0.4791142425767489, + "kl_loss": 0.18211796879768372, + "loss_ib": 0.004831032827496529, + "step": 1666 + }, + { + "ce_ib": 4.640512466430664, + "ce_orig": 0.7434430122375488, + "epoch": 0.4791142425767489, + "kl_loss": 0.3951219320297241, + "loss_ib": 0.008591732010245323, + "step": 1666 + }, + { + "ce_ib": 5.423933029174805, + "ce_orig": 0.6963688731193542, + "epoch": 0.4791142425767489, + "kl_loss": 0.2843274176120758, + "loss_ib": 0.008267207071185112, + "step": 1666 + }, + { + "ce_ib": 3.382248640060425, + "ce_orig": 0.6287346482276917, + "epoch": 0.4791142425767489, + "kl_loss": 0.22552873194217682, + "loss_ib": 0.005637535825371742, + "step": 1666 + }, + { + "ce_ib": 5.888190746307373, + "ce_orig": 0.9677170515060425, + "epoch": 0.4794018261557265, + "kl_loss": 0.22070422768592834, + "loss_ib": 0.00809523370116949, + "step": 1667 + }, + { + "ce_ib": 8.648638725280762, + "ce_orig": 1.1520633697509766, + "epoch": 0.4794018261557265, + "kl_loss": 0.22846215963363647, + "loss_ib": 0.010933260433375835, + "step": 1667 + }, + { + "ce_ib": 3.865662097930908, + "ce_orig": 0.649163007736206, + "epoch": 0.4794018261557265, + "kl_loss": 0.2529069781303406, + "loss_ib": 0.006394731812179089, + "step": 1667 + }, + { + "ce_ib": 3.900329828262329, + "ce_orig": 0.6768735647201538, + "epoch": 0.4794018261557265, + "kl_loss": 0.23504945635795593, + "loss_ib": 0.006250824313610792, + "step": 1667 + }, + { + "ce_ib": 8.672497749328613, + "ce_orig": 1.6776679754257202, + "epoch": 0.47968940973470414, + "kl_loss": 0.23172059655189514, + "loss_ib": 0.010989704169332981, + "step": 1668 + }, + { + "ce_ib": 3.6088309288024902, + "ce_orig": 0.4569207429885864, + "epoch": 0.47968940973470414, + "kl_loss": 0.23361648619174957, + "loss_ib": 0.005944995675235987, + "step": 1668 + }, + { + "ce_ib": 5.450647830963135, + "ce_orig": 0.8888800740242004, + "epoch": 0.47968940973470414, + "kl_loss": 0.22026127576828003, + "loss_ib": 0.007653260603547096, + "step": 1668 + }, + { + "ce_ib": 2.2504947185516357, + "ce_orig": 0.26895272731781006, + "epoch": 0.47968940973470414, + "kl_loss": 0.4324935972690582, + "loss_ib": 0.006575430277734995, + "step": 1668 + }, + { + "ce_ib": 9.812613487243652, + "ce_orig": 0.47519996762275696, + "epoch": 0.47997699331368177, + "kl_loss": 0.2601562738418579, + "loss_ib": 0.012414176017045975, + "step": 1669 + }, + { + "ce_ib": 8.931351661682129, + "ce_orig": 1.1598052978515625, + "epoch": 0.47997699331368177, + "kl_loss": 0.27191799879074097, + "loss_ib": 0.011650530621409416, + "step": 1669 + }, + { + "ce_ib": 5.104313850402832, + "ce_orig": 0.9475460052490234, + "epoch": 0.47997699331368177, + "kl_loss": 0.303463339805603, + "loss_ib": 0.008138947188854218, + "step": 1669 + }, + { + "ce_ib": 5.356082439422607, + "ce_orig": 0.7924367189407349, + "epoch": 0.47997699331368177, + "kl_loss": 0.17794081568717957, + "loss_ib": 0.007135489955544472, + "step": 1669 + }, + { + "epoch": 0.48026457689265944, + "grad_norm": 0.12426599115133286, + "learning_rate": 9.567306307375933e-06, + "loss": 0.8209, + "step": 1670 + }, + { + "ce_ib": 7.080532550811768, + "ce_orig": 1.2319401502609253, + "epoch": 0.48026457689265944, + "kl_loss": 0.24892403185367584, + "loss_ib": 0.009569772519171238, + "step": 1670 + }, + { + "ce_ib": 5.915502071380615, + "ce_orig": 0.8437856435775757, + "epoch": 0.48026457689265944, + "kl_loss": 0.7955646514892578, + "loss_ib": 0.013871148228645325, + "step": 1670 + }, + { + "ce_ib": 5.740050792694092, + "ce_orig": 0.8433563709259033, + "epoch": 0.48026457689265944, + "kl_loss": 0.24343541264533997, + "loss_ib": 0.008174404501914978, + "step": 1670 + }, + { + "ce_ib": 9.337093353271484, + "ce_orig": 1.3409016132354736, + "epoch": 0.48026457689265944, + "kl_loss": 0.2468741089105606, + "loss_ib": 0.011805834248661995, + "step": 1670 + }, + { + "ce_ib": 3.697110414505005, + "ce_orig": 0.4739634692668915, + "epoch": 0.48055216047163707, + "kl_loss": 0.4081552028656006, + "loss_ib": 0.007778662256896496, + "step": 1671 + }, + { + "ce_ib": 4.245136737823486, + "ce_orig": 0.8474032878875732, + "epoch": 0.48055216047163707, + "kl_loss": 0.20764032006263733, + "loss_ib": 0.0063215396367013454, + "step": 1671 + }, + { + "ce_ib": 4.236147403717041, + "ce_orig": 0.8165615200996399, + "epoch": 0.48055216047163707, + "kl_loss": 0.1807256042957306, + "loss_ib": 0.006043402943760157, + "step": 1671 + }, + { + "ce_ib": 4.574710369110107, + "ce_orig": 0.7062886357307434, + "epoch": 0.48055216047163707, + "kl_loss": 0.21255922317504883, + "loss_ib": 0.006700302008539438, + "step": 1671 + }, + { + "ce_ib": 5.6235761642456055, + "ce_orig": 0.9316110014915466, + "epoch": 0.4808397440506147, + "kl_loss": 0.1461721509695053, + "loss_ib": 0.0070852977223694324, + "step": 1672 + }, + { + "ce_ib": 5.345804691314697, + "ce_orig": 0.8603891730308533, + "epoch": 0.4808397440506147, + "kl_loss": 0.29918748140335083, + "loss_ib": 0.00833767931908369, + "step": 1672 + }, + { + "ce_ib": 6.696346282958984, + "ce_orig": 1.43319571018219, + "epoch": 0.4808397440506147, + "kl_loss": 0.18958798050880432, + "loss_ib": 0.008592226542532444, + "step": 1672 + }, + { + "ce_ib": 3.778940439224243, + "ce_orig": 0.8303045630455017, + "epoch": 0.4808397440506147, + "kl_loss": 0.22567567229270935, + "loss_ib": 0.006035697180777788, + "step": 1672 + }, + { + "ce_ib": 3.8298754692077637, + "ce_orig": 0.3461815416812897, + "epoch": 0.48112732762959237, + "kl_loss": 0.3373625576496124, + "loss_ib": 0.007203501183539629, + "step": 1673 + }, + { + "ce_ib": 5.3641252517700195, + "ce_orig": 0.7774767279624939, + "epoch": 0.48112732762959237, + "kl_loss": 0.2694661617279053, + "loss_ib": 0.008058786392211914, + "step": 1673 + }, + { + "ce_ib": 4.745968818664551, + "ce_orig": 0.942043125629425, + "epoch": 0.48112732762959237, + "kl_loss": 0.15933820605278015, + "loss_ib": 0.006339350715279579, + "step": 1673 + }, + { + "ce_ib": 8.730219841003418, + "ce_orig": 1.2593905925750732, + "epoch": 0.48112732762959237, + "kl_loss": 0.30499938130378723, + "loss_ib": 0.011780214495956898, + "step": 1673 + }, + { + "ce_ib": 5.755885601043701, + "ce_orig": 0.6556824445724487, + "epoch": 0.48141491120857, + "kl_loss": 0.12161286920309067, + "loss_ib": 0.006972013972699642, + "step": 1674 + }, + { + "ce_ib": 5.917918682098389, + "ce_orig": 0.728310227394104, + "epoch": 0.48141491120857, + "kl_loss": 0.41346365213394165, + "loss_ib": 0.010052554309368134, + "step": 1674 + }, + { + "ce_ib": 5.303135395050049, + "ce_orig": 0.7300191521644592, + "epoch": 0.48141491120857, + "kl_loss": 0.2900460362434387, + "loss_ib": 0.008203595876693726, + "step": 1674 + }, + { + "ce_ib": 3.7630128860473633, + "ce_orig": 0.6488651633262634, + "epoch": 0.48141491120857, + "kl_loss": 0.180400550365448, + "loss_ib": 0.005567018408328295, + "step": 1674 + }, + { + "epoch": 0.4817024947875476, + "grad_norm": 0.12936660647392273, + "learning_rate": 9.564142716912737e-06, + "loss": 0.8743, + "step": 1675 + }, + { + "ce_ib": 5.63591194152832, + "ce_orig": 0.7025658488273621, + "epoch": 0.4817024947875476, + "kl_loss": 0.31601640582084656, + "loss_ib": 0.008796075358986855, + "step": 1675 + }, + { + "ce_ib": 8.811908721923828, + "ce_orig": 1.0708190202713013, + "epoch": 0.4817024947875476, + "kl_loss": 0.25741177797317505, + "loss_ib": 0.011386026628315449, + "step": 1675 + }, + { + "ce_ib": 3.726789712905884, + "ce_orig": 0.7018229365348816, + "epoch": 0.4817024947875476, + "kl_loss": 0.20980185270309448, + "loss_ib": 0.005824807565659285, + "step": 1675 + }, + { + "ce_ib": 4.922455787658691, + "ce_orig": 0.8278821110725403, + "epoch": 0.4817024947875476, + "kl_loss": 0.18464845418930054, + "loss_ib": 0.006768940482288599, + "step": 1675 + }, + { + "ce_ib": 6.2850518226623535, + "ce_orig": 0.7126919031143188, + "epoch": 0.4819900783665253, + "kl_loss": 0.2205125093460083, + "loss_ib": 0.008490176871418953, + "step": 1676 + }, + { + "ce_ib": 7.34765100479126, + "ce_orig": 1.3165032863616943, + "epoch": 0.4819900783665253, + "kl_loss": 0.257365345954895, + "loss_ib": 0.009921303950250149, + "step": 1676 + }, + { + "ce_ib": 7.904400825500488, + "ce_orig": 1.4748260974884033, + "epoch": 0.4819900783665253, + "kl_loss": 0.27275893092155457, + "loss_ib": 0.010631990619003773, + "step": 1676 + }, + { + "ce_ib": 3.2757465839385986, + "ce_orig": 0.7456637024879456, + "epoch": 0.4819900783665253, + "kl_loss": 0.12369295209646225, + "loss_ib": 0.004512676037847996, + "step": 1676 + }, + { + "ce_ib": 5.002022743225098, + "ce_orig": 0.9142476320266724, + "epoch": 0.4822776619455029, + "kl_loss": 0.24907097220420837, + "loss_ib": 0.007492732722312212, + "step": 1677 + }, + { + "ce_ib": 5.418490886688232, + "ce_orig": 0.6175795793533325, + "epoch": 0.4822776619455029, + "kl_loss": 0.1571856439113617, + "loss_ib": 0.006990346591919661, + "step": 1677 + }, + { + "ce_ib": 7.182295799255371, + "ce_orig": 1.1133639812469482, + "epoch": 0.4822776619455029, + "kl_loss": 0.32248324155807495, + "loss_ib": 0.010407128371298313, + "step": 1677 + }, + { + "ce_ib": 7.538179397583008, + "ce_orig": 1.0507768392562866, + "epoch": 0.4822776619455029, + "kl_loss": 0.36521878838539124, + "loss_ib": 0.011190367862582207, + "step": 1677 + }, + { + "ce_ib": 6.171362400054932, + "ce_orig": 0.7541481256484985, + "epoch": 0.48256524552448055, + "kl_loss": 0.30361032485961914, + "loss_ib": 0.009207465685904026, + "step": 1678 + }, + { + "ce_ib": 3.476198673248291, + "ce_orig": 0.4351646900177002, + "epoch": 0.48256524552448055, + "kl_loss": 0.2083813101053238, + "loss_ib": 0.005560011602938175, + "step": 1678 + }, + { + "ce_ib": 8.659995079040527, + "ce_orig": 1.454718828201294, + "epoch": 0.48256524552448055, + "kl_loss": 0.2963302433490753, + "loss_ib": 0.011623297818005085, + "step": 1678 + }, + { + "ce_ib": 2.5902063846588135, + "ce_orig": 0.5103031992912292, + "epoch": 0.48256524552448055, + "kl_loss": 0.26295000314712524, + "loss_ib": 0.005219706334173679, + "step": 1678 + }, + { + "ce_ib": 4.061892986297607, + "ce_orig": 0.9226779937744141, + "epoch": 0.48285282910345817, + "kl_loss": 0.39064711332321167, + "loss_ib": 0.007968363352119923, + "step": 1679 + }, + { + "ce_ib": 4.2745361328125, + "ce_orig": 0.7729305624961853, + "epoch": 0.48285282910345817, + "kl_loss": 0.1899227499961853, + "loss_ib": 0.006173762958496809, + "step": 1679 + }, + { + "ce_ib": 6.655904769897461, + "ce_orig": 1.1210262775421143, + "epoch": 0.48285282910345817, + "kl_loss": 0.23119421303272247, + "loss_ib": 0.008967846632003784, + "step": 1679 + }, + { + "ce_ib": 7.641833305358887, + "ce_orig": 1.2369133234024048, + "epoch": 0.48285282910345817, + "kl_loss": 0.25590628385543823, + "loss_ib": 0.010200896300375462, + "step": 1679 + }, + { + "epoch": 0.48314041268243585, + "grad_norm": 0.10780736804008484, + "learning_rate": 9.560968130370376e-06, + "loss": 0.9124, + "step": 1680 + }, + { + "ce_ib": 7.560593605041504, + "ce_orig": 1.204754114151001, + "epoch": 0.48314041268243585, + "kl_loss": 0.2123798131942749, + "loss_ib": 0.00968439131975174, + "step": 1680 + }, + { + "ce_ib": 4.961677551269531, + "ce_orig": 0.9925312995910645, + "epoch": 0.48314041268243585, + "kl_loss": 0.19958920776844025, + "loss_ib": 0.006957569625228643, + "step": 1680 + }, + { + "ce_ib": 8.905904769897461, + "ce_orig": 1.682554006576538, + "epoch": 0.48314041268243585, + "kl_loss": 0.31346291303634644, + "loss_ib": 0.012040534056723118, + "step": 1680 + }, + { + "ce_ib": 4.896394729614258, + "ce_orig": 0.658926784992218, + "epoch": 0.48314041268243585, + "kl_loss": 0.27128756046295166, + "loss_ib": 0.007609270513057709, + "step": 1680 + }, + { + "ce_ib": 11.167470932006836, + "ce_orig": 1.5560513734817505, + "epoch": 0.4834279962614135, + "kl_loss": 0.19887183606624603, + "loss_ib": 0.01315618958324194, + "step": 1681 + }, + { + "ce_ib": 3.238182306289673, + "ce_orig": 0.3320446014404297, + "epoch": 0.4834279962614135, + "kl_loss": 0.22955651581287384, + "loss_ib": 0.005533747375011444, + "step": 1681 + }, + { + "ce_ib": 7.380128383636475, + "ce_orig": 1.2976762056350708, + "epoch": 0.4834279962614135, + "kl_loss": 0.23345749080181122, + "loss_ib": 0.00971470307558775, + "step": 1681 + }, + { + "ce_ib": 4.491724014282227, + "ce_orig": 0.49089425802230835, + "epoch": 0.4834279962614135, + "kl_loss": 0.3844373822212219, + "loss_ib": 0.008336097933351994, + "step": 1681 + }, + { + "ce_ib": 8.009778022766113, + "ce_orig": 1.3929885625839233, + "epoch": 0.4837155798403911, + "kl_loss": 0.19164226949214935, + "loss_ib": 0.009926200844347477, + "step": 1682 + }, + { + "ce_ib": 3.8315823078155518, + "ce_orig": 0.37863993644714355, + "epoch": 0.4837155798403911, + "kl_loss": 0.496357798576355, + "loss_ib": 0.008795159868896008, + "step": 1682 + }, + { + "ce_ib": 5.143205165863037, + "ce_orig": 0.8195101618766785, + "epoch": 0.4837155798403911, + "kl_loss": 0.2191523164510727, + "loss_ib": 0.007334728725254536, + "step": 1682 + }, + { + "ce_ib": 6.365453720092773, + "ce_orig": 1.110253930091858, + "epoch": 0.4837155798403911, + "kl_loss": 0.158380389213562, + "loss_ib": 0.007949257269501686, + "step": 1682 + }, + { + "ce_ib": 3.3443350791931152, + "ce_orig": 0.5569667220115662, + "epoch": 0.4840031634193688, + "kl_loss": 0.3231843113899231, + "loss_ib": 0.006576178129762411, + "step": 1683 + }, + { + "ce_ib": 8.063350677490234, + "ce_orig": 1.0422242879867554, + "epoch": 0.4840031634193688, + "kl_loss": 0.23159486055374146, + "loss_ib": 0.010379299521446228, + "step": 1683 + }, + { + "ce_ib": 7.586836338043213, + "ce_orig": 1.3177027702331543, + "epoch": 0.4840031634193688, + "kl_loss": 0.2565996050834656, + "loss_ib": 0.010152831673622131, + "step": 1683 + }, + { + "ce_ib": 5.579047679901123, + "ce_orig": 0.4584742784500122, + "epoch": 0.4840031634193688, + "kl_loss": 0.3952004313468933, + "loss_ib": 0.009531051851809025, + "step": 1683 + }, + { + "ce_ib": 3.8018834590911865, + "ce_orig": 0.633375883102417, + "epoch": 0.4842907469983464, + "kl_loss": 0.21211303770542145, + "loss_ib": 0.005923013668507338, + "step": 1684 + }, + { + "ce_ib": 4.17789888381958, + "ce_orig": 0.6012802720069885, + "epoch": 0.4842907469983464, + "kl_loss": 0.3501446843147278, + "loss_ib": 0.007679345551878214, + "step": 1684 + }, + { + "ce_ib": 4.611387729644775, + "ce_orig": 1.0369935035705566, + "epoch": 0.4842907469983464, + "kl_loss": 0.18224164843559265, + "loss_ib": 0.006433804053813219, + "step": 1684 + }, + { + "ce_ib": 4.641064167022705, + "ce_orig": 0.7356343269348145, + "epoch": 0.4842907469983464, + "kl_loss": 0.18943347036838531, + "loss_ib": 0.0065353987738490105, + "step": 1684 + }, + { + "epoch": 0.484578330577324, + "grad_norm": 0.11663123965263367, + "learning_rate": 9.557782555397167e-06, + "loss": 0.8837, + "step": 1685 + }, + { + "ce_ib": 6.534969329833984, + "ce_orig": 1.1525304317474365, + "epoch": 0.484578330577324, + "kl_loss": 0.22389784455299377, + "loss_ib": 0.008773948065936565, + "step": 1685 + }, + { + "ce_ib": 8.140199661254883, + "ce_orig": 1.0339369773864746, + "epoch": 0.484578330577324, + "kl_loss": 0.15249694883823395, + "loss_ib": 0.009665168821811676, + "step": 1685 + }, + { + "ce_ib": 5.809932708740234, + "ce_orig": 0.920750081539154, + "epoch": 0.484578330577324, + "kl_loss": 0.3143186867237091, + "loss_ib": 0.00895311962813139, + "step": 1685 + }, + { + "ce_ib": 5.950395107269287, + "ce_orig": 1.0373817682266235, + "epoch": 0.484578330577324, + "kl_loss": 0.16437512636184692, + "loss_ib": 0.007594146765768528, + "step": 1685 + }, + { + "ce_ib": 7.124546527862549, + "ce_orig": 0.8699126243591309, + "epoch": 0.4848659141563017, + "kl_loss": 0.2191973328590393, + "loss_ib": 0.0093165198341012, + "step": 1686 + }, + { + "ce_ib": 6.38749361038208, + "ce_orig": 1.0310173034667969, + "epoch": 0.4848659141563017, + "kl_loss": 0.29276883602142334, + "loss_ib": 0.009315181523561478, + "step": 1686 + }, + { + "ce_ib": 7.34552001953125, + "ce_orig": 1.3071355819702148, + "epoch": 0.4848659141563017, + "kl_loss": 0.2939929962158203, + "loss_ib": 0.010285450145602226, + "step": 1686 + }, + { + "ce_ib": 2.83811354637146, + "ce_orig": 0.4443729817867279, + "epoch": 0.4848659141563017, + "kl_loss": 0.14913912117481232, + "loss_ib": 0.004329504910856485, + "step": 1686 + }, + { + "ce_ib": 4.3961286544799805, + "ce_orig": 0.6618458032608032, + "epoch": 0.4851534977352793, + "kl_loss": 0.2749791741371155, + "loss_ib": 0.007145920302718878, + "step": 1687 + }, + { + "ce_ib": 6.693999767303467, + "ce_orig": 0.6293303966522217, + "epoch": 0.4851534977352793, + "kl_loss": 0.2422647476196289, + "loss_ib": 0.009116646833717823, + "step": 1687 + }, + { + "ce_ib": 8.874431610107422, + "ce_orig": 1.4825538396835327, + "epoch": 0.4851534977352793, + "kl_loss": 0.297224223613739, + "loss_ib": 0.011846673674881458, + "step": 1687 + }, + { + "ce_ib": 6.874109745025635, + "ce_orig": 1.1482160091400146, + "epoch": 0.4851534977352793, + "kl_loss": 0.19153878092765808, + "loss_ib": 0.008789497427642345, + "step": 1687 + }, + { + "ce_ib": 3.8232715129852295, + "ce_orig": 0.6324473023414612, + "epoch": 0.48544108131425695, + "kl_loss": 0.16611912846565247, + "loss_ib": 0.005484462715685368, + "step": 1688 + }, + { + "ce_ib": 7.457026958465576, + "ce_orig": 0.8956002593040466, + "epoch": 0.48544108131425695, + "kl_loss": 0.31268706917762756, + "loss_ib": 0.010583898052573204, + "step": 1688 + }, + { + "ce_ib": 7.080108642578125, + "ce_orig": 1.149488925933838, + "epoch": 0.48544108131425695, + "kl_loss": 0.2502942979335785, + "loss_ib": 0.009583051316440105, + "step": 1688 + }, + { + "ce_ib": 5.92689323425293, + "ce_orig": 0.9622479677200317, + "epoch": 0.48544108131425695, + "kl_loss": 0.18768984079360962, + "loss_ib": 0.007803791668266058, + "step": 1688 + }, + { + "ce_ib": 6.139303207397461, + "ce_orig": 1.1526930332183838, + "epoch": 0.4857286648932346, + "kl_loss": 0.23713448643684387, + "loss_ib": 0.008510648272931576, + "step": 1689 + }, + { + "ce_ib": 4.866360664367676, + "ce_orig": 0.6933936476707458, + "epoch": 0.4857286648932346, + "kl_loss": 0.2967952489852905, + "loss_ib": 0.007834312506020069, + "step": 1689 + }, + { + "ce_ib": 6.797483444213867, + "ce_orig": 1.2159748077392578, + "epoch": 0.4857286648932346, + "kl_loss": 0.22910846769809723, + "loss_ib": 0.009088567458093166, + "step": 1689 + }, + { + "ce_ib": 6.406723499298096, + "ce_orig": 1.1615173816680908, + "epoch": 0.4857286648932346, + "kl_loss": 0.25945186614990234, + "loss_ib": 0.009001241996884346, + "step": 1689 + }, + { + "epoch": 0.48601624847221225, + "grad_norm": 0.11609376221895218, + "learning_rate": 9.554585999667897e-06, + "loss": 0.9146, + "step": 1690 + }, + { + "ce_ib": 5.247783184051514, + "ce_orig": 0.8090236186981201, + "epoch": 0.48601624847221225, + "kl_loss": 0.26968154311180115, + "loss_ib": 0.007944597862660885, + "step": 1690 + }, + { + "ce_ib": 3.077852725982666, + "ce_orig": 0.5081151127815247, + "epoch": 0.48601624847221225, + "kl_loss": 0.18126562237739563, + "loss_ib": 0.004890508949756622, + "step": 1690 + }, + { + "ce_ib": 3.773611307144165, + "ce_orig": 0.694117546081543, + "epoch": 0.48601624847221225, + "kl_loss": 0.13272148370742798, + "loss_ib": 0.005100825801491737, + "step": 1690 + }, + { + "ce_ib": 4.646406650543213, + "ce_orig": 1.1609487533569336, + "epoch": 0.48601624847221225, + "kl_loss": 0.15832726657390594, + "loss_ib": 0.006229679565876722, + "step": 1690 + }, + { + "ce_ib": 6.037997245788574, + "ce_orig": 0.8993503451347351, + "epoch": 0.4863038320511899, + "kl_loss": 0.27683573961257935, + "loss_ib": 0.008806354366242886, + "step": 1691 + }, + { + "ce_ib": 4.57020902633667, + "ce_orig": 0.7976517677307129, + "epoch": 0.4863038320511899, + "kl_loss": 0.2402155101299286, + "loss_ib": 0.006972364149987698, + "step": 1691 + }, + { + "ce_ib": 5.292080402374268, + "ce_orig": 0.5217359066009521, + "epoch": 0.4863038320511899, + "kl_loss": 0.3130798637866974, + "loss_ib": 0.008422878570854664, + "step": 1691 + }, + { + "ce_ib": 6.2566118240356445, + "ce_orig": 0.6656879782676697, + "epoch": 0.4863038320511899, + "kl_loss": 0.28265661001205444, + "loss_ib": 0.009083177894353867, + "step": 1691 + }, + { + "ce_ib": 1.7650432586669922, + "ce_orig": 0.2512088716030121, + "epoch": 0.4865914156301675, + "kl_loss": 0.533643364906311, + "loss_ib": 0.0071014766581356525, + "step": 1692 + }, + { + "ce_ib": 6.721052646636963, + "ce_orig": 0.9016904830932617, + "epoch": 0.4865914156301675, + "kl_loss": 0.2493845820426941, + "loss_ib": 0.009214898571372032, + "step": 1692 + }, + { + "ce_ib": 4.070987701416016, + "ce_orig": 0.5971701145172119, + "epoch": 0.4865914156301675, + "kl_loss": 0.2511335015296936, + "loss_ib": 0.006582322530448437, + "step": 1692 + }, + { + "ce_ib": 8.66051959991455, + "ce_orig": 1.652574062347412, + "epoch": 0.4865914156301675, + "kl_loss": 0.3365510106086731, + "loss_ib": 0.012026029638946056, + "step": 1692 + }, + { + "ce_ib": 4.093155384063721, + "ce_orig": 0.8512476682662964, + "epoch": 0.4868789992091452, + "kl_loss": 0.17010530829429626, + "loss_ib": 0.005794208496809006, + "step": 1693 + }, + { + "ce_ib": 4.806715965270996, + "ce_orig": 0.7924500703811646, + "epoch": 0.4868789992091452, + "kl_loss": 0.2046879529953003, + "loss_ib": 0.006853595841675997, + "step": 1693 + }, + { + "ce_ib": 1.9750059843063354, + "ce_orig": 0.19683994352817535, + "epoch": 0.4868789992091452, + "kl_loss": 0.41815072298049927, + "loss_ib": 0.006156513001769781, + "step": 1693 + }, + { + "ce_ib": 4.909409046173096, + "ce_orig": 0.6787269711494446, + "epoch": 0.4868789992091452, + "kl_loss": 0.2110307216644287, + "loss_ib": 0.007019716314971447, + "step": 1693 + }, + { + "ce_ib": 3.878619909286499, + "ce_orig": 0.5532967448234558, + "epoch": 0.4871665827881228, + "kl_loss": 0.18584373593330383, + "loss_ib": 0.00573705742135644, + "step": 1694 + }, + { + "ce_ib": 3.7735960483551025, + "ce_orig": 0.7870396971702576, + "epoch": 0.4871665827881228, + "kl_loss": 0.160109281539917, + "loss_ib": 0.005374689120799303, + "step": 1694 + }, + { + "ce_ib": 5.4384260177612305, + "ce_orig": 0.819983184337616, + "epoch": 0.4871665827881228, + "kl_loss": 0.28779086470603943, + "loss_ib": 0.008316334336996078, + "step": 1694 + }, + { + "ce_ib": 3.6858084201812744, + "ce_orig": 0.7565300464630127, + "epoch": 0.4871665827881228, + "kl_loss": 0.3524230420589447, + "loss_ib": 0.007210038602352142, + "step": 1694 + }, + { + "epoch": 0.4874541663671004, + "grad_norm": 0.10960856080055237, + "learning_rate": 9.551378470883813e-06, + "loss": 0.7927, + "step": 1695 + }, + { + "ce_ib": 6.876530170440674, + "ce_orig": 1.097631812095642, + "epoch": 0.4874541663671004, + "kl_loss": 0.23491376638412476, + "loss_ib": 0.009225667454302311, + "step": 1695 + }, + { + "ce_ib": 8.743326187133789, + "ce_orig": 1.2223474979400635, + "epoch": 0.4874541663671004, + "kl_loss": 0.26982706785202026, + "loss_ib": 0.011441596783697605, + "step": 1695 + }, + { + "ce_ib": 4.467686176300049, + "ce_orig": 0.7761092782020569, + "epoch": 0.4874541663671004, + "kl_loss": 0.266385555267334, + "loss_ib": 0.007131541613489389, + "step": 1695 + }, + { + "ce_ib": 3.9495129585266113, + "ce_orig": 0.6083857417106628, + "epoch": 0.4874541663671004, + "kl_loss": 0.25618743896484375, + "loss_ib": 0.0065113878808915615, + "step": 1695 + }, + { + "ce_ib": 7.466285705566406, + "ce_orig": 0.969895601272583, + "epoch": 0.48774174994607805, + "kl_loss": 0.27946212887763977, + "loss_ib": 0.010260907001793385, + "step": 1696 + }, + { + "ce_ib": 9.822277069091797, + "ce_orig": 1.6922423839569092, + "epoch": 0.48774174994607805, + "kl_loss": 0.18676882982254028, + "loss_ib": 0.01168996561318636, + "step": 1696 + }, + { + "ce_ib": 6.797215938568115, + "ce_orig": 0.7068189382553101, + "epoch": 0.48774174994607805, + "kl_loss": 0.2542659640312195, + "loss_ib": 0.009339875541627407, + "step": 1696 + }, + { + "ce_ib": 5.888164520263672, + "ce_orig": 0.7551953792572021, + "epoch": 0.48774174994607805, + "kl_loss": 0.2606009244918823, + "loss_ib": 0.008494174107909203, + "step": 1696 + }, + { + "ce_ib": 6.542970180511475, + "ce_orig": 0.7762916088104248, + "epoch": 0.48802933352505573, + "kl_loss": 0.17304621636867523, + "loss_ib": 0.008273432962596416, + "step": 1697 + }, + { + "ce_ib": 4.387442111968994, + "ce_orig": 0.9359598159790039, + "epoch": 0.48802933352505573, + "kl_loss": 0.21369890868663788, + "loss_ib": 0.006524431053549051, + "step": 1697 + }, + { + "ce_ib": 5.826173782348633, + "ce_orig": 1.0441244840621948, + "epoch": 0.48802933352505573, + "kl_loss": 0.16852568089962006, + "loss_ib": 0.007511430885642767, + "step": 1697 + }, + { + "ce_ib": 5.742081165313721, + "ce_orig": 0.3558730185031891, + "epoch": 0.48802933352505573, + "kl_loss": 0.2536010146141052, + "loss_ib": 0.008278091438114643, + "step": 1697 + }, + { + "ce_ib": 7.02601957321167, + "ce_orig": 0.9475264549255371, + "epoch": 0.48831691710403335, + "kl_loss": 0.30207914113998413, + "loss_ib": 0.01004681084305048, + "step": 1698 + }, + { + "ce_ib": 4.868920803070068, + "ce_orig": 0.8664445281028748, + "epoch": 0.48831691710403335, + "kl_loss": 0.33686017990112305, + "loss_ib": 0.008237523026764393, + "step": 1698 + }, + { + "ce_ib": 3.719809055328369, + "ce_orig": 0.790595531463623, + "epoch": 0.48831691710403335, + "kl_loss": 0.3065032958984375, + "loss_ib": 0.006784841883927584, + "step": 1698 + }, + { + "ce_ib": 3.9900901317596436, + "ce_orig": 0.6241580843925476, + "epoch": 0.48831691710403335, + "kl_loss": 0.16233143210411072, + "loss_ib": 0.005613404326140881, + "step": 1698 + }, + { + "ce_ib": 8.611964225769043, + "ce_orig": 0.6841998100280762, + "epoch": 0.488604500683011, + "kl_loss": 0.3877715468406677, + "loss_ib": 0.0124896802008152, + "step": 1699 + }, + { + "ce_ib": 2.7463784217834473, + "ce_orig": 0.5244596004486084, + "epoch": 0.488604500683011, + "kl_loss": 0.16525377333164215, + "loss_ib": 0.004398916382342577, + "step": 1699 + }, + { + "ce_ib": 1.7029069662094116, + "ce_orig": 0.17470017075538635, + "epoch": 0.488604500683011, + "kl_loss": 0.38435274362564087, + "loss_ib": 0.0055464343167841434, + "step": 1699 + }, + { + "ce_ib": 3.5958950519561768, + "ce_orig": 0.4831347167491913, + "epoch": 0.488604500683011, + "kl_loss": 0.27735060453414917, + "loss_ib": 0.0063694012351334095, + "step": 1699 + }, + { + "epoch": 0.48889208426198866, + "grad_norm": 0.12257218360900879, + "learning_rate": 9.548159976772593e-06, + "loss": 0.8168, + "step": 1700 + }, + { + "ce_ib": 6.5620198249816895, + "ce_orig": 0.6928233504295349, + "epoch": 0.48889208426198866, + "kl_loss": 0.2537040412425995, + "loss_ib": 0.009099059738218784, + "step": 1700 + }, + { + "ce_ib": 7.755258083343506, + "ce_orig": 1.2825660705566406, + "epoch": 0.48889208426198866, + "kl_loss": 0.2563188672065735, + "loss_ib": 0.010318445973098278, + "step": 1700 + }, + { + "ce_ib": 4.941026210784912, + "ce_orig": 0.6089584827423096, + "epoch": 0.48889208426198866, + "kl_loss": 0.259429931640625, + "loss_ib": 0.007535324897617102, + "step": 1700 + }, + { + "ce_ib": 5.35645055770874, + "ce_orig": 0.6481252908706665, + "epoch": 0.48889208426198866, + "kl_loss": 0.2527640461921692, + "loss_ib": 0.007884090766310692, + "step": 1700 + }, + { + "ce_ib": 2.423488140106201, + "ce_orig": 0.5764703154563904, + "epoch": 0.4891796678409663, + "kl_loss": 0.1315830647945404, + "loss_ib": 0.003739318810403347, + "step": 1701 + }, + { + "ce_ib": 5.227047920227051, + "ce_orig": 0.9303528666496277, + "epoch": 0.4891796678409663, + "kl_loss": 0.2286655604839325, + "loss_ib": 0.007513702847063541, + "step": 1701 + }, + { + "ce_ib": 5.084414482116699, + "ce_orig": 0.42619651556015015, + "epoch": 0.4891796678409663, + "kl_loss": 0.3433401584625244, + "loss_ib": 0.008517815731465816, + "step": 1701 + }, + { + "ce_ib": 3.6347930431365967, + "ce_orig": 0.6375098824501038, + "epoch": 0.4891796678409663, + "kl_loss": 0.19476991891860962, + "loss_ib": 0.005582492332905531, + "step": 1701 + }, + { + "ce_ib": 4.194334983825684, + "ce_orig": 0.6213921308517456, + "epoch": 0.4894672514199439, + "kl_loss": 0.25615689158439636, + "loss_ib": 0.0067559038288891315, + "step": 1702 + }, + { + "ce_ib": 4.74091100692749, + "ce_orig": 0.38939017057418823, + "epoch": 0.4894672514199439, + "kl_loss": 0.27375322580337524, + "loss_ib": 0.007478443440049887, + "step": 1702 + }, + { + "ce_ib": 5.847019672393799, + "ce_orig": 0.7315099239349365, + "epoch": 0.4894672514199439, + "kl_loss": 0.2402675598859787, + "loss_ib": 0.008249695412814617, + "step": 1702 + }, + { + "ce_ib": 5.618659019470215, + "ce_orig": 0.8388023972511292, + "epoch": 0.4894672514199439, + "kl_loss": 0.19676315784454346, + "loss_ib": 0.007586290594190359, + "step": 1702 + }, + { + "ce_ib": 5.9708709716796875, + "ce_orig": 0.9229080677032471, + "epoch": 0.4897548349989216, + "kl_loss": 0.19908156991004944, + "loss_ib": 0.007961686700582504, + "step": 1703 + }, + { + "ce_ib": 5.164056777954102, + "ce_orig": 1.0340594053268433, + "epoch": 0.4897548349989216, + "kl_loss": 0.18683184683322906, + "loss_ib": 0.007032375317066908, + "step": 1703 + }, + { + "ce_ib": 3.921279191970825, + "ce_orig": 0.46810030937194824, + "epoch": 0.4897548349989216, + "kl_loss": 0.24264341592788696, + "loss_ib": 0.006347713526338339, + "step": 1703 + }, + { + "ce_ib": 4.657447338104248, + "ce_orig": 0.8225224614143372, + "epoch": 0.4897548349989216, + "kl_loss": 0.18631809949874878, + "loss_ib": 0.006520627997815609, + "step": 1703 + }, + { + "ce_ib": 7.400676727294922, + "ce_orig": 0.8509280681610107, + "epoch": 0.4900424185778992, + "kl_loss": 0.2300112247467041, + "loss_ib": 0.00970078818500042, + "step": 1704 + }, + { + "ce_ib": 5.8490424156188965, + "ce_orig": 1.0918712615966797, + "epoch": 0.4900424185778992, + "kl_loss": 0.7357668876647949, + "loss_ib": 0.01320671010762453, + "step": 1704 + }, + { + "ce_ib": 7.443508625030518, + "ce_orig": 0.8640341758728027, + "epoch": 0.4900424185778992, + "kl_loss": 0.2282414734363556, + "loss_ib": 0.009725923649966717, + "step": 1704 + }, + { + "ce_ib": 9.343680381774902, + "ce_orig": 1.5816478729248047, + "epoch": 0.4900424185778992, + "kl_loss": 0.1807907372713089, + "loss_ib": 0.011151587590575218, + "step": 1704 + }, + { + "epoch": 0.49033000215687683, + "grad_norm": 0.13203872740268707, + "learning_rate": 9.544930525088339e-06, + "loss": 0.8373, + "step": 1705 + }, + { + "ce_ib": 6.998898506164551, + "ce_orig": 0.47324255108833313, + "epoch": 0.49033000215687683, + "kl_loss": 0.3926210403442383, + "loss_ib": 0.0109251094982028, + "step": 1705 + }, + { + "ce_ib": 6.503366470336914, + "ce_orig": 0.7842316031455994, + "epoch": 0.49033000215687683, + "kl_loss": 0.33447882533073425, + "loss_ib": 0.00984815414994955, + "step": 1705 + }, + { + "ce_ib": 2.677558422088623, + "ce_orig": 0.7216985821723938, + "epoch": 0.49033000215687683, + "kl_loss": 0.1252802014350891, + "loss_ib": 0.0039303600788116455, + "step": 1705 + }, + { + "ce_ib": 5.550953388214111, + "ce_orig": 0.8223257064819336, + "epoch": 0.49033000215687683, + "kl_loss": 0.17812271416187286, + "loss_ib": 0.007332180626690388, + "step": 1705 + }, + { + "ce_ib": 3.981729745864868, + "ce_orig": 0.36211925745010376, + "epoch": 0.49061758573585446, + "kl_loss": 0.3238323926925659, + "loss_ib": 0.00722005357965827, + "step": 1706 + }, + { + "ce_ib": 5.24422025680542, + "ce_orig": 0.6217085123062134, + "epoch": 0.49061758573585446, + "kl_loss": 0.21295107901096344, + "loss_ib": 0.00737373111769557, + "step": 1706 + }, + { + "ce_ib": 4.3960347175598145, + "ce_orig": 1.0565332174301147, + "epoch": 0.49061758573585446, + "kl_loss": 0.21014171838760376, + "loss_ib": 0.006497452035546303, + "step": 1706 + }, + { + "ce_ib": 9.517853736877441, + "ce_orig": 1.5981885194778442, + "epoch": 0.49061758573585446, + "kl_loss": 0.18369972705841064, + "loss_ib": 0.01135485153645277, + "step": 1706 + }, + { + "ce_ib": 6.477010250091553, + "ce_orig": 0.8719711303710938, + "epoch": 0.49090516931483213, + "kl_loss": 0.27074265480041504, + "loss_ib": 0.009184436872601509, + "step": 1707 + }, + { + "ce_ib": 6.044996738433838, + "ce_orig": 1.1001982688903809, + "epoch": 0.49090516931483213, + "kl_loss": 0.2015824168920517, + "loss_ib": 0.008060821332037449, + "step": 1707 + }, + { + "ce_ib": 3.7902770042419434, + "ce_orig": 0.6360217928886414, + "epoch": 0.49090516931483213, + "kl_loss": 0.1633738875389099, + "loss_ib": 0.00542401522397995, + "step": 1707 + }, + { + "ce_ib": 3.3657476902008057, + "ce_orig": 0.6742066144943237, + "epoch": 0.49090516931483213, + "kl_loss": 0.20251691341400146, + "loss_ib": 0.0053909169510006905, + "step": 1707 + }, + { + "ce_ib": 4.074330806732178, + "ce_orig": 0.7614818811416626, + "epoch": 0.49119275289380976, + "kl_loss": 0.2751924395561218, + "loss_ib": 0.00682625500485301, + "step": 1708 + }, + { + "ce_ib": 7.01824951171875, + "ce_orig": 0.9621512293815613, + "epoch": 0.49119275289380976, + "kl_loss": 0.17211195826530457, + "loss_ib": 0.008739368990063667, + "step": 1708 + }, + { + "ce_ib": 8.20371150970459, + "ce_orig": 1.1622143983840942, + "epoch": 0.49119275289380976, + "kl_loss": 0.2795262932777405, + "loss_ib": 0.010998973622918129, + "step": 1708 + }, + { + "ce_ib": 8.394777297973633, + "ce_orig": 1.366093635559082, + "epoch": 0.49119275289380976, + "kl_loss": 0.17209161818027496, + "loss_ib": 0.01011569332331419, + "step": 1708 + }, + { + "ce_ib": 3.742612600326538, + "ce_orig": 0.4698511064052582, + "epoch": 0.4914803364727874, + "kl_loss": 0.21373629570007324, + "loss_ib": 0.005879975389689207, + "step": 1709 + }, + { + "ce_ib": 6.780648708343506, + "ce_orig": 1.2006686925888062, + "epoch": 0.4914803364727874, + "kl_loss": 0.19385182857513428, + "loss_ib": 0.008719166740775108, + "step": 1709 + }, + { + "ce_ib": 4.651514053344727, + "ce_orig": 0.9274974465370178, + "epoch": 0.4914803364727874, + "kl_loss": 0.18124522268772125, + "loss_ib": 0.006463966332376003, + "step": 1709 + }, + { + "ce_ib": 3.968463897705078, + "ce_orig": 0.5359354019165039, + "epoch": 0.4914803364727874, + "kl_loss": 0.24947825074195862, + "loss_ib": 0.0064632464200258255, + "step": 1709 + }, + { + "epoch": 0.49176792005176506, + "grad_norm": 0.12094637006521225, + "learning_rate": 9.54169012361155e-06, + "loss": 0.885, + "step": 1710 + }, + { + "ce_ib": 6.588129997253418, + "ce_orig": 1.0155458450317383, + "epoch": 0.49176792005176506, + "kl_loss": 0.2495093196630478, + "loss_ib": 0.009083223529160023, + "step": 1710 + }, + { + "ce_ib": 3.638334274291992, + "ce_orig": 0.42263445258140564, + "epoch": 0.49176792005176506, + "kl_loss": 0.21503064036369324, + "loss_ib": 0.005788641050457954, + "step": 1710 + }, + { + "ce_ib": 6.531235218048096, + "ce_orig": 0.8453525304794312, + "epoch": 0.49176792005176506, + "kl_loss": 0.23209929466247559, + "loss_ib": 0.00885222852230072, + "step": 1710 + }, + { + "ce_ib": 7.706050872802734, + "ce_orig": 0.9590526819229126, + "epoch": 0.49176792005176506, + "kl_loss": 0.24353507161140442, + "loss_ib": 0.010141400620341301, + "step": 1710 + }, + { + "ce_ib": 3.765565872192383, + "ce_orig": 0.8244125247001648, + "epoch": 0.4920555036307427, + "kl_loss": 0.4936164319515228, + "loss_ib": 0.008701730519533157, + "step": 1711 + }, + { + "ce_ib": 7.067040920257568, + "ce_orig": 1.2625956535339355, + "epoch": 0.4920555036307427, + "kl_loss": 0.7286717891693115, + "loss_ib": 0.014353758655488491, + "step": 1711 + }, + { + "ce_ib": 8.84214973449707, + "ce_orig": 1.6408401727676392, + "epoch": 0.4920555036307427, + "kl_loss": 0.24816496670246124, + "loss_ib": 0.011323799379169941, + "step": 1711 + }, + { + "ce_ib": 8.242490768432617, + "ce_orig": 1.148856520652771, + "epoch": 0.4920555036307427, + "kl_loss": 0.2580086588859558, + "loss_ib": 0.010822577401995659, + "step": 1711 + }, + { + "ce_ib": 5.534130573272705, + "ce_orig": 0.8404598832130432, + "epoch": 0.4923430872097203, + "kl_loss": 0.3187624216079712, + "loss_ib": 0.008721754886209965, + "step": 1712 + }, + { + "ce_ib": 8.611713409423828, + "ce_orig": 1.3891855478286743, + "epoch": 0.4923430872097203, + "kl_loss": 0.2638487219810486, + "loss_ib": 0.011250199750065804, + "step": 1712 + }, + { + "ce_ib": 4.341231822967529, + "ce_orig": 0.5861788392066956, + "epoch": 0.4923430872097203, + "kl_loss": 0.28423434495925903, + "loss_ib": 0.007183575537055731, + "step": 1712 + }, + { + "ce_ib": 6.878125190734863, + "ce_orig": 1.4111419916152954, + "epoch": 0.4923430872097203, + "kl_loss": 0.2343723028898239, + "loss_ib": 0.009221848100423813, + "step": 1712 + }, + { + "ce_ib": 6.162860870361328, + "ce_orig": 0.9645307660102844, + "epoch": 0.492630670788698, + "kl_loss": 0.20777800679206848, + "loss_ib": 0.008240641094744205, + "step": 1713 + }, + { + "ce_ib": 8.0570707321167, + "ce_orig": 1.2794287204742432, + "epoch": 0.492630670788698, + "kl_loss": 0.2319069355726242, + "loss_ib": 0.010376139543950558, + "step": 1713 + }, + { + "ce_ib": 5.692114353179932, + "ce_orig": 0.6935074925422668, + "epoch": 0.492630670788698, + "kl_loss": 0.26182082295417786, + "loss_ib": 0.008310322649776936, + "step": 1713 + }, + { + "ce_ib": 3.0138778686523438, + "ce_orig": 0.468887597322464, + "epoch": 0.492630670788698, + "kl_loss": 0.23393811285495758, + "loss_ib": 0.005353258922696114, + "step": 1713 + }, + { + "ce_ib": 8.280229568481445, + "ce_orig": 1.1677502393722534, + "epoch": 0.4929182543676756, + "kl_loss": 0.2325466424226761, + "loss_ib": 0.010605696588754654, + "step": 1714 + }, + { + "ce_ib": 6.72873592376709, + "ce_orig": 0.803114652633667, + "epoch": 0.4929182543676756, + "kl_loss": 0.4147999882698059, + "loss_ib": 0.010876736603677273, + "step": 1714 + }, + { + "ce_ib": 7.0533599853515625, + "ce_orig": 0.8928973078727722, + "epoch": 0.4929182543676756, + "kl_loss": 0.26862865686416626, + "loss_ib": 0.009739646688103676, + "step": 1714 + }, + { + "ce_ib": 4.385868549346924, + "ce_orig": 0.8779287338256836, + "epoch": 0.4929182543676756, + "kl_loss": 0.21822282671928406, + "loss_ib": 0.006568096578121185, + "step": 1714 + }, + { + "epoch": 0.49320583794665324, + "grad_norm": 0.14142395555973053, + "learning_rate": 9.538438780149104e-06, + "loss": 0.8621, + "step": 1715 + }, + { + "ce_ib": 3.48097825050354, + "ce_orig": 0.6204516887664795, + "epoch": 0.49320583794665324, + "kl_loss": 0.1770043522119522, + "loss_ib": 0.005251022055745125, + "step": 1715 + }, + { + "ce_ib": 6.237753868103027, + "ce_orig": 0.5141828060150146, + "epoch": 0.49320583794665324, + "kl_loss": 0.3674449324607849, + "loss_ib": 0.009912203066051006, + "step": 1715 + }, + { + "ce_ib": 4.3492817878723145, + "ce_orig": 0.6501482129096985, + "epoch": 0.49320583794665324, + "kl_loss": 0.22237512469291687, + "loss_ib": 0.006573033053427935, + "step": 1715 + }, + { + "ce_ib": 7.455378532409668, + "ce_orig": 1.408462405204773, + "epoch": 0.49320583794665324, + "kl_loss": 0.18538546562194824, + "loss_ib": 0.009309233166277409, + "step": 1715 + }, + { + "ce_ib": 5.599409103393555, + "ce_orig": 0.5918640494346619, + "epoch": 0.49349342152563086, + "kl_loss": 0.20586490631103516, + "loss_ib": 0.007658058311790228, + "step": 1716 + }, + { + "ce_ib": 5.35037088394165, + "ce_orig": 1.1837389469146729, + "epoch": 0.49349342152563086, + "kl_loss": 0.27578169107437134, + "loss_ib": 0.008108187466859818, + "step": 1716 + }, + { + "ce_ib": 5.994269847869873, + "ce_orig": 0.7266632318496704, + "epoch": 0.49349342152563086, + "kl_loss": 0.2978155016899109, + "loss_ib": 0.008972425013780594, + "step": 1716 + }, + { + "ce_ib": 3.708055019378662, + "ce_orig": 0.8986055254936218, + "epoch": 0.49349342152563086, + "kl_loss": 0.28073328733444214, + "loss_ib": 0.006515387911349535, + "step": 1716 + }, + { + "ce_ib": 6.660898208618164, + "ce_orig": 1.04195237159729, + "epoch": 0.49378100510460854, + "kl_loss": 0.26174142956733704, + "loss_ib": 0.0092783123254776, + "step": 1717 + }, + { + "ce_ib": 6.0924787521362305, + "ce_orig": 0.7078341841697693, + "epoch": 0.49378100510460854, + "kl_loss": 0.2791142761707306, + "loss_ib": 0.008883621543645859, + "step": 1717 + }, + { + "ce_ib": 3.7908501625061035, + "ce_orig": 0.791426956653595, + "epoch": 0.49378100510460854, + "kl_loss": 0.20216532051563263, + "loss_ib": 0.005812503397464752, + "step": 1717 + }, + { + "ce_ib": 2.7622318267822266, + "ce_orig": 0.3543376922607422, + "epoch": 0.49378100510460854, + "kl_loss": 0.22950060665607452, + "loss_ib": 0.005057237576693296, + "step": 1717 + }, + { + "ce_ib": 4.37089204788208, + "ce_orig": 0.7407205700874329, + "epoch": 0.49406858868358616, + "kl_loss": 0.2812882661819458, + "loss_ib": 0.007183774374425411, + "step": 1718 + }, + { + "ce_ib": 4.498960018157959, + "ce_orig": 0.6540777683258057, + "epoch": 0.49406858868358616, + "kl_loss": 0.18491533398628235, + "loss_ib": 0.006348113063722849, + "step": 1718 + }, + { + "ce_ib": 8.588423728942871, + "ce_orig": 1.2199863195419312, + "epoch": 0.49406858868358616, + "kl_loss": 0.3112899363040924, + "loss_ib": 0.01170132216066122, + "step": 1718 + }, + { + "ce_ib": 8.444191932678223, + "ce_orig": 1.4956631660461426, + "epoch": 0.49406858868358616, + "kl_loss": 0.2000323385000229, + "loss_ib": 0.010444514453411102, + "step": 1718 + }, + { + "ce_ib": 7.196175575256348, + "ce_orig": 0.8098769187927246, + "epoch": 0.4943561722625638, + "kl_loss": 0.289365291595459, + "loss_ib": 0.010089828632771969, + "step": 1719 + }, + { + "ce_ib": 7.820986747741699, + "ce_orig": 1.6552858352661133, + "epoch": 0.4943561722625638, + "kl_loss": 0.2239806354045868, + "loss_ib": 0.010060792788863182, + "step": 1719 + }, + { + "ce_ib": 6.495529651641846, + "ce_orig": 0.9777346849441528, + "epoch": 0.4943561722625638, + "kl_loss": 0.24929951131343842, + "loss_ib": 0.008988524787127972, + "step": 1719 + }, + { + "ce_ib": 5.176101207733154, + "ce_orig": 0.6900345087051392, + "epoch": 0.4943561722625638, + "kl_loss": 0.3611376881599426, + "loss_ib": 0.008787478320300579, + "step": 1719 + }, + { + "epoch": 0.49464375584154147, + "grad_norm": 0.13287211954593658, + "learning_rate": 9.535176502534242e-06, + "loss": 0.8975, + "step": 1720 + }, + { + "ce_ib": 8.692408561706543, + "ce_orig": 1.4137736558914185, + "epoch": 0.49464375584154147, + "kl_loss": 0.2862509787082672, + "loss_ib": 0.011554918251931667, + "step": 1720 + }, + { + "ce_ib": 7.457424163818359, + "ce_orig": 1.2316714525222778, + "epoch": 0.49464375584154147, + "kl_loss": 0.21747153997421265, + "loss_ib": 0.009632139466702938, + "step": 1720 + }, + { + "ce_ib": 4.586647987365723, + "ce_orig": 0.817991316318512, + "epoch": 0.49464375584154147, + "kl_loss": 0.20044812560081482, + "loss_ib": 0.006591129116714001, + "step": 1720 + }, + { + "ce_ib": 2.7127957344055176, + "ce_orig": 0.5314325094223022, + "epoch": 0.49464375584154147, + "kl_loss": 0.2668687701225281, + "loss_ib": 0.005381483118981123, + "step": 1720 + }, + { + "ce_ib": 3.999713182449341, + "ce_orig": 0.7891056537628174, + "epoch": 0.4949313394205191, + "kl_loss": 0.18380488455295563, + "loss_ib": 0.005837762262672186, + "step": 1721 + }, + { + "ce_ib": 5.790007591247559, + "ce_orig": 0.7806348204612732, + "epoch": 0.4949313394205191, + "kl_loss": 0.21094569563865662, + "loss_ib": 0.00789946410804987, + "step": 1721 + }, + { + "ce_ib": 5.886396408081055, + "ce_orig": 0.912238359451294, + "epoch": 0.4949313394205191, + "kl_loss": 0.24456319212913513, + "loss_ib": 0.008332028053700924, + "step": 1721 + }, + { + "ce_ib": 4.271791934967041, + "ce_orig": 0.7383297085762024, + "epoch": 0.4949313394205191, + "kl_loss": 0.24236194789409637, + "loss_ib": 0.006695411168038845, + "step": 1721 + }, + { + "ce_ib": 3.6333987712860107, + "ce_orig": 0.7166858911514282, + "epoch": 0.4952189229994967, + "kl_loss": 0.2111375331878662, + "loss_ib": 0.0057447743602097034, + "step": 1722 + }, + { + "ce_ib": 5.225841999053955, + "ce_orig": 0.6549127697944641, + "epoch": 0.4952189229994967, + "kl_loss": 0.2381574958562851, + "loss_ib": 0.007607416715472937, + "step": 1722 + }, + { + "ce_ib": 4.282773971557617, + "ce_orig": 0.6731183528900146, + "epoch": 0.4952189229994967, + "kl_loss": 0.14624956250190735, + "loss_ib": 0.005745269358158112, + "step": 1722 + }, + { + "ce_ib": 9.602765083312988, + "ce_orig": 1.1258394718170166, + "epoch": 0.4952189229994967, + "kl_loss": 0.18037505447864532, + "loss_ib": 0.01140651572495699, + "step": 1722 + }, + { + "ce_ib": 3.2537600994110107, + "ce_orig": 0.40181875228881836, + "epoch": 0.4955065065784744, + "kl_loss": 0.18366406857967377, + "loss_ib": 0.005090400576591492, + "step": 1723 + }, + { + "ce_ib": 6.9169416427612305, + "ce_orig": 0.6074340343475342, + "epoch": 0.4955065065784744, + "kl_loss": 0.30570995807647705, + "loss_ib": 0.009974041022360325, + "step": 1723 + }, + { + "ce_ib": 5.034404754638672, + "ce_orig": 0.934673011302948, + "epoch": 0.4955065065784744, + "kl_loss": 0.2834550738334656, + "loss_ib": 0.007868955843150616, + "step": 1723 + }, + { + "ce_ib": 3.9033546447753906, + "ce_orig": 0.59845370054245, + "epoch": 0.4955065065784744, + "kl_loss": 0.1284828931093216, + "loss_ib": 0.005188183858990669, + "step": 1723 + }, + { + "ce_ib": 3.368408203125, + "ce_orig": 0.4007030129432678, + "epoch": 0.495794090157452, + "kl_loss": 0.3604559302330017, + "loss_ib": 0.006972967181354761, + "step": 1724 + }, + { + "ce_ib": 5.497180938720703, + "ce_orig": 0.9960641264915466, + "epoch": 0.495794090157452, + "kl_loss": 0.2774682641029358, + "loss_ib": 0.00827186368405819, + "step": 1724 + }, + { + "ce_ib": 4.814739227294922, + "ce_orig": 0.8194516897201538, + "epoch": 0.495794090157452, + "kl_loss": 0.23562568426132202, + "loss_ib": 0.007170995697379112, + "step": 1724 + }, + { + "ce_ib": 6.389047145843506, + "ce_orig": 0.9979404211044312, + "epoch": 0.495794090157452, + "kl_loss": 0.2550808787345886, + "loss_ib": 0.008939855732023716, + "step": 1724 + }, + { + "epoch": 0.49608167373642964, + "grad_norm": 0.1158408373594284, + "learning_rate": 9.53190329862655e-06, + "loss": 0.8712, + "step": 1725 + }, + { + "ce_ib": 4.169690132141113, + "ce_orig": 0.6141831874847412, + "epoch": 0.49608167373642964, + "kl_loss": 0.25308936834335327, + "loss_ib": 0.006700583733618259, + "step": 1725 + }, + { + "ce_ib": 3.0598349571228027, + "ce_orig": 0.6335474252700806, + "epoch": 0.49608167373642964, + "kl_loss": 0.6865431666374207, + "loss_ib": 0.009925266727805138, + "step": 1725 + }, + { + "ce_ib": 4.783539772033691, + "ce_orig": 0.5297357439994812, + "epoch": 0.49608167373642964, + "kl_loss": 0.27161890268325806, + "loss_ib": 0.0074997288174927235, + "step": 1725 + }, + { + "ce_ib": 4.168352127075195, + "ce_orig": 0.6996920108795166, + "epoch": 0.49608167373642964, + "kl_loss": 0.2507363557815552, + "loss_ib": 0.00667571509256959, + "step": 1725 + }, + { + "ce_ib": 4.639172077178955, + "ce_orig": 0.7657274603843689, + "epoch": 0.49636925731540726, + "kl_loss": 0.2979816198348999, + "loss_ib": 0.007618988398462534, + "step": 1726 + }, + { + "ce_ib": 3.389683246612549, + "ce_orig": 0.7435678243637085, + "epoch": 0.49636925731540726, + "kl_loss": 0.20240336656570435, + "loss_ib": 0.005413717124611139, + "step": 1726 + }, + { + "ce_ib": 8.156920433044434, + "ce_orig": 1.2631562948226929, + "epoch": 0.49636925731540726, + "kl_loss": 0.216718852519989, + "loss_ib": 0.01032410841435194, + "step": 1726 + }, + { + "ce_ib": 9.228316307067871, + "ce_orig": 1.7304272651672363, + "epoch": 0.49636925731540726, + "kl_loss": 0.21756497025489807, + "loss_ib": 0.011403965763747692, + "step": 1726 + }, + { + "ce_ib": 5.42836856842041, + "ce_orig": 0.6177011132240295, + "epoch": 0.49665684089438494, + "kl_loss": 0.25108325481414795, + "loss_ib": 0.007939200848340988, + "step": 1727 + }, + { + "ce_ib": 5.849068641662598, + "ce_orig": 1.0078530311584473, + "epoch": 0.49665684089438494, + "kl_loss": 0.2704865634441376, + "loss_ib": 0.008553934283554554, + "step": 1727 + }, + { + "ce_ib": 7.377047061920166, + "ce_orig": 1.1187243461608887, + "epoch": 0.49665684089438494, + "kl_loss": 0.2173384428024292, + "loss_ib": 0.009550430811941624, + "step": 1727 + }, + { + "ce_ib": 7.3513617515563965, + "ce_orig": 0.6316287517547607, + "epoch": 0.49665684089438494, + "kl_loss": 0.23208868503570557, + "loss_ib": 0.009672248736023903, + "step": 1727 + }, + { + "ce_ib": 7.48237419128418, + "ce_orig": 0.7925584316253662, + "epoch": 0.49694442447336257, + "kl_loss": 0.36000531911849976, + "loss_ib": 0.011082427576184273, + "step": 1728 + }, + { + "ce_ib": 4.199851989746094, + "ce_orig": 0.7553547620773315, + "epoch": 0.49694442447336257, + "kl_loss": 0.3046613931655884, + "loss_ib": 0.0072464654222130775, + "step": 1728 + }, + { + "ce_ib": 3.7874884605407715, + "ce_orig": 0.675774872303009, + "epoch": 0.49694442447336257, + "kl_loss": 0.1680212914943695, + "loss_ib": 0.0054677012376487255, + "step": 1728 + }, + { + "ce_ib": 6.877801895141602, + "ce_orig": 1.1077038049697876, + "epoch": 0.49694442447336257, + "kl_loss": 0.24500277638435364, + "loss_ib": 0.00932782981544733, + "step": 1728 + }, + { + "ce_ib": 4.3296709060668945, + "ce_orig": 0.46692103147506714, + "epoch": 0.4972320080523402, + "kl_loss": 0.336995393037796, + "loss_ib": 0.007699624635279179, + "step": 1729 + }, + { + "ce_ib": 4.633038520812988, + "ce_orig": 0.8983129262924194, + "epoch": 0.4972320080523402, + "kl_loss": 0.30017250776290894, + "loss_ib": 0.007634763605892658, + "step": 1729 + }, + { + "ce_ib": 7.470649719238281, + "ce_orig": 1.2206380367279053, + "epoch": 0.4972320080523402, + "kl_loss": 0.2591376006603241, + "loss_ib": 0.010062025859951973, + "step": 1729 + }, + { + "ce_ib": 6.334610462188721, + "ce_orig": 0.8265384435653687, + "epoch": 0.4972320080523402, + "kl_loss": 0.2139129638671875, + "loss_ib": 0.008473739959299564, + "step": 1729 + }, + { + "epoch": 0.49751959163131787, + "grad_norm": 0.1053248941898346, + "learning_rate": 9.528619176311933e-06, + "loss": 0.9408, + "step": 1730 + }, + { + "ce_ib": 6.331774711608887, + "ce_orig": 1.1401456594467163, + "epoch": 0.49751959163131787, + "kl_loss": 0.2342427670955658, + "loss_ib": 0.008674201555550098, + "step": 1730 + }, + { + "ce_ib": 8.644342422485352, + "ce_orig": 1.4284965991973877, + "epoch": 0.49751959163131787, + "kl_loss": 0.2252599000930786, + "loss_ib": 0.010896940715610981, + "step": 1730 + }, + { + "ce_ib": 4.456500053405762, + "ce_orig": 0.5329476594924927, + "epoch": 0.49751959163131787, + "kl_loss": 0.19349047541618347, + "loss_ib": 0.006391404662281275, + "step": 1730 + }, + { + "ce_ib": 4.347330093383789, + "ce_orig": 0.8360463380813599, + "epoch": 0.49751959163131787, + "kl_loss": 0.2571093440055847, + "loss_ib": 0.006918422877788544, + "step": 1730 + }, + { + "ce_ib": 4.1414971351623535, + "ce_orig": 0.5835949778556824, + "epoch": 0.4978071752102955, + "kl_loss": 0.15816175937652588, + "loss_ib": 0.005723115056753159, + "step": 1731 + }, + { + "ce_ib": 3.465496778488159, + "ce_orig": 0.6377593278884888, + "epoch": 0.4978071752102955, + "kl_loss": 0.185111865401268, + "loss_ib": 0.00531661557033658, + "step": 1731 + }, + { + "ce_ib": 6.669215202331543, + "ce_orig": 0.6736965775489807, + "epoch": 0.4978071752102955, + "kl_loss": 0.2598609924316406, + "loss_ib": 0.00926782563328743, + "step": 1731 + }, + { + "ce_ib": 3.3897271156311035, + "ce_orig": 0.5642126202583313, + "epoch": 0.4978071752102955, + "kl_loss": 0.2248254418373108, + "loss_ib": 0.005637981928884983, + "step": 1731 + }, + { + "ce_ib": 8.572836875915527, + "ce_orig": 1.7926554679870605, + "epoch": 0.4980947587892731, + "kl_loss": 0.24958643317222595, + "loss_ib": 0.01106870174407959, + "step": 1732 + }, + { + "ce_ib": 4.426803112030029, + "ce_orig": 0.6430197954177856, + "epoch": 0.4980947587892731, + "kl_loss": 0.31002277135849, + "loss_ib": 0.007527030538767576, + "step": 1732 + }, + { + "ce_ib": 4.7570929527282715, + "ce_orig": 0.9767276048660278, + "epoch": 0.4980947587892731, + "kl_loss": 0.18130776286125183, + "loss_ib": 0.006570170167833567, + "step": 1732 + }, + { + "ce_ib": 5.817841529846191, + "ce_orig": 0.6756792664527893, + "epoch": 0.4980947587892731, + "kl_loss": 0.24906647205352783, + "loss_ib": 0.008308506570756435, + "step": 1732 + }, + { + "ce_ib": 5.906297206878662, + "ce_orig": 0.8869830369949341, + "epoch": 0.4983823423682508, + "kl_loss": 0.3161448538303375, + "loss_ib": 0.009067745879292488, + "step": 1733 + }, + { + "ce_ib": 3.069126844406128, + "ce_orig": 0.5189517736434937, + "epoch": 0.4983823423682508, + "kl_loss": 0.24513983726501465, + "loss_ib": 0.005520524922758341, + "step": 1733 + }, + { + "ce_ib": 4.824000358581543, + "ce_orig": 0.6937529444694519, + "epoch": 0.4983823423682508, + "kl_loss": 0.25546473264694214, + "loss_ib": 0.007378647103905678, + "step": 1733 + }, + { + "ce_ib": 4.200857162475586, + "ce_orig": 0.6918690800666809, + "epoch": 0.4983823423682508, + "kl_loss": 0.23723284900188446, + "loss_ib": 0.006573185790330172, + "step": 1733 + }, + { + "ce_ib": 7.380935192108154, + "ce_orig": 1.0280795097351074, + "epoch": 0.4986699259472284, + "kl_loss": 0.201813206076622, + "loss_ib": 0.009399067610502243, + "step": 1734 + }, + { + "ce_ib": 6.978311538696289, + "ce_orig": 1.288365364074707, + "epoch": 0.4986699259472284, + "kl_loss": 0.22564837336540222, + "loss_ib": 0.009234795346856117, + "step": 1734 + }, + { + "ce_ib": 4.881115913391113, + "ce_orig": 0.6921979188919067, + "epoch": 0.4986699259472284, + "kl_loss": 0.25443413853645325, + "loss_ib": 0.007425457239151001, + "step": 1734 + }, + { + "ce_ib": 2.8651721477508545, + "ce_orig": 0.28916382789611816, + "epoch": 0.4986699259472284, + "kl_loss": 0.5442566871643066, + "loss_ib": 0.008307739160954952, + "step": 1734 + }, + { + "epoch": 0.49895750952620604, + "grad_norm": 0.12058666348457336, + "learning_rate": 9.525324143502607e-06, + "loss": 0.8673, + "step": 1735 + }, + { + "ce_ib": 8.609652519226074, + "ce_orig": 1.4004709720611572, + "epoch": 0.49895750952620604, + "kl_loss": 0.2570205330848694, + "loss_ib": 0.011179856956005096, + "step": 1735 + }, + { + "ce_ib": 5.465366363525391, + "ce_orig": 0.6692785620689392, + "epoch": 0.49895750952620604, + "kl_loss": 0.22908666729927063, + "loss_ib": 0.007756233215332031, + "step": 1735 + }, + { + "ce_ib": 4.082115173339844, + "ce_orig": 0.6186438798904419, + "epoch": 0.49895750952620604, + "kl_loss": 0.2082287073135376, + "loss_ib": 0.006164402235299349, + "step": 1735 + }, + { + "ce_ib": 3.701429843902588, + "ce_orig": 0.5794656276702881, + "epoch": 0.49895750952620604, + "kl_loss": 0.22643627226352692, + "loss_ib": 0.005965792573988438, + "step": 1735 + }, + { + "ce_ib": 7.719645023345947, + "ce_orig": 1.1597990989685059, + "epoch": 0.49924509310518367, + "kl_loss": 0.21026304364204407, + "loss_ib": 0.00982227548956871, + "step": 1736 + }, + { + "ce_ib": 3.7463462352752686, + "ce_orig": 0.7905706763267517, + "epoch": 0.49924509310518367, + "kl_loss": 0.18672803044319153, + "loss_ib": 0.005613625980913639, + "step": 1736 + }, + { + "ce_ib": 5.696522235870361, + "ce_orig": 1.1005171537399292, + "epoch": 0.49924509310518367, + "kl_loss": 0.248357892036438, + "loss_ib": 0.008180101402103901, + "step": 1736 + }, + { + "ce_ib": 8.115674018859863, + "ce_orig": 1.2610074281692505, + "epoch": 0.49924509310518367, + "kl_loss": 0.2915061116218567, + "loss_ib": 0.011030735448002815, + "step": 1736 + }, + { + "ce_ib": 4.881083011627197, + "ce_orig": 0.8494547009468079, + "epoch": 0.49953267668416135, + "kl_loss": 0.1594853401184082, + "loss_ib": 0.006475936155766249, + "step": 1737 + }, + { + "ce_ib": 5.13024377822876, + "ce_orig": 0.8937183022499084, + "epoch": 0.49953267668416135, + "kl_loss": 0.1997142732143402, + "loss_ib": 0.007127386052161455, + "step": 1737 + }, + { + "ce_ib": 4.386274337768555, + "ce_orig": 0.7156038880348206, + "epoch": 0.49953267668416135, + "kl_loss": 0.21608230471611023, + "loss_ib": 0.006547097582370043, + "step": 1737 + }, + { + "ce_ib": 6.867334842681885, + "ce_orig": 1.0097808837890625, + "epoch": 0.49953267668416135, + "kl_loss": 0.28443610668182373, + "loss_ib": 0.009711695834994316, + "step": 1737 + }, + { + "ce_ib": 3.524717330932617, + "ce_orig": 0.3500223755836487, + "epoch": 0.49982026026313897, + "kl_loss": 0.27112168073654175, + "loss_ib": 0.00623593432828784, + "step": 1738 + }, + { + "ce_ib": 6.105541229248047, + "ce_orig": 1.2126398086547852, + "epoch": 0.49982026026313897, + "kl_loss": 0.1625138521194458, + "loss_ib": 0.007730680052191019, + "step": 1738 + }, + { + "ce_ib": 5.947635173797607, + "ce_orig": 1.0524487495422363, + "epoch": 0.49982026026313897, + "kl_loss": 0.32878363132476807, + "loss_ib": 0.009235471487045288, + "step": 1738 + }, + { + "ce_ib": 4.0039896965026855, + "ce_orig": 0.6879218816757202, + "epoch": 0.49982026026313897, + "kl_loss": 0.2660224735736847, + "loss_ib": 0.006664214190095663, + "step": 1738 + }, + { + "ce_ib": 7.965237617492676, + "ce_orig": 1.1455343961715698, + "epoch": 0.5001078438421166, + "kl_loss": 0.2807881236076355, + "loss_ib": 0.01077311858534813, + "step": 1739 + }, + { + "ce_ib": 5.267090797424316, + "ce_orig": 0.6191811561584473, + "epoch": 0.5001078438421166, + "kl_loss": 0.23513248562812805, + "loss_ib": 0.007618415169417858, + "step": 1739 + }, + { + "ce_ib": 4.817395210266113, + "ce_orig": 0.6077452898025513, + "epoch": 0.5001078438421166, + "kl_loss": 0.33568328619003296, + "loss_ib": 0.008174227550625801, + "step": 1739 + }, + { + "ce_ib": 4.480566501617432, + "ce_orig": 0.9865591526031494, + "epoch": 0.5001078438421166, + "kl_loss": 0.23918387293815613, + "loss_ib": 0.006872405298054218, + "step": 1739 + }, + { + "epoch": 0.5003954274210942, + "grad_norm": 0.12100203335285187, + "learning_rate": 9.522018208137066e-06, + "loss": 0.8901, + "step": 1740 + }, + { + "ce_ib": 11.01678466796875, + "ce_orig": 1.4696050882339478, + "epoch": 0.5003954274210942, + "kl_loss": 0.2423844039440155, + "loss_ib": 0.013440628536045551, + "step": 1740 + }, + { + "ce_ib": 6.453488826751709, + "ce_orig": 0.8022169470787048, + "epoch": 0.5003954274210942, + "kl_loss": 0.17600062489509583, + "loss_ib": 0.008213494904339314, + "step": 1740 + }, + { + "ce_ib": 6.403378009796143, + "ce_orig": 1.1907107830047607, + "epoch": 0.5003954274210942, + "kl_loss": 0.34092897176742554, + "loss_ib": 0.009812667965888977, + "step": 1740 + }, + { + "ce_ib": 3.2982351779937744, + "ce_orig": 0.6578614711761475, + "epoch": 0.5003954274210942, + "kl_loss": 0.27500489354133606, + "loss_ib": 0.006048284005373716, + "step": 1740 + }, + { + "ce_ib": 6.678479194641113, + "ce_orig": 1.2896283864974976, + "epoch": 0.5006830110000718, + "kl_loss": 0.32643431425094604, + "loss_ib": 0.00994282215833664, + "step": 1741 + }, + { + "ce_ib": 7.29425048828125, + "ce_orig": 0.7709023356437683, + "epoch": 0.5006830110000718, + "kl_loss": 0.20930366218090057, + "loss_ib": 0.009387287311255932, + "step": 1741 + }, + { + "ce_ib": 4.5514702796936035, + "ce_orig": 0.950491189956665, + "epoch": 0.5006830110000718, + "kl_loss": 0.22086811065673828, + "loss_ib": 0.0067601511254906654, + "step": 1741 + }, + { + "ce_ib": 5.60288667678833, + "ce_orig": 0.6810688972473145, + "epoch": 0.5006830110000718, + "kl_loss": 0.3826490044593811, + "loss_ib": 0.00942937657237053, + "step": 1741 + }, + { + "ce_ib": 9.729187965393066, + "ce_orig": 1.1416085958480835, + "epoch": 0.5009705945790496, + "kl_loss": 0.2906084656715393, + "loss_ib": 0.01263527199625969, + "step": 1742 + }, + { + "ce_ib": 3.0167596340179443, + "ce_orig": 0.6279767155647278, + "epoch": 0.5009705945790496, + "kl_loss": 0.2155575305223465, + "loss_ib": 0.005172334611415863, + "step": 1742 + }, + { + "ce_ib": 3.6063148975372314, + "ce_orig": 0.5931623578071594, + "epoch": 0.5009705945790496, + "kl_loss": 0.20270030200481415, + "loss_ib": 0.005633317865431309, + "step": 1742 + }, + { + "ce_ib": 3.6025731563568115, + "ce_orig": 0.504107654094696, + "epoch": 0.5009705945790496, + "kl_loss": 0.31339216232299805, + "loss_ib": 0.006736494600772858, + "step": 1742 + }, + { + "ce_ib": 6.566645622253418, + "ce_orig": 1.1916468143463135, + "epoch": 0.5012581781580272, + "kl_loss": 0.23999133706092834, + "loss_ib": 0.008966558612883091, + "step": 1743 + }, + { + "ce_ib": 4.4429450035095215, + "ce_orig": 0.7111625671386719, + "epoch": 0.5012581781580272, + "kl_loss": 0.1585032194852829, + "loss_ib": 0.0060279774479568005, + "step": 1743 + }, + { + "ce_ib": 3.6084020137786865, + "ce_orig": 0.5784590840339661, + "epoch": 0.5012581781580272, + "kl_loss": 0.19830071926116943, + "loss_ib": 0.005591409280896187, + "step": 1743 + }, + { + "ce_ib": 5.667142391204834, + "ce_orig": 0.7029927372932434, + "epoch": 0.5012581781580272, + "kl_loss": 0.2522284984588623, + "loss_ib": 0.008189426735043526, + "step": 1743 + }, + { + "ce_ib": 7.597262382507324, + "ce_orig": 0.6795341968536377, + "epoch": 0.5015457617370048, + "kl_loss": 0.5472317337989807, + "loss_ib": 0.013069579377770424, + "step": 1744 + }, + { + "ce_ib": 4.5865702629089355, + "ce_orig": 0.8706312775611877, + "epoch": 0.5015457617370048, + "kl_loss": 0.39241304993629456, + "loss_ib": 0.008510700426995754, + "step": 1744 + }, + { + "ce_ib": 6.553436279296875, + "ce_orig": 1.0656144618988037, + "epoch": 0.5015457617370048, + "kl_loss": 0.27133724093437195, + "loss_ib": 0.00926680862903595, + "step": 1744 + }, + { + "ce_ib": 3.608102321624756, + "ce_orig": 0.8301694393157959, + "epoch": 0.5015457617370048, + "kl_loss": 0.21063321828842163, + "loss_ib": 0.005714434199035168, + "step": 1744 + }, + { + "epoch": 0.5018333453159824, + "grad_norm": 0.12239973247051239, + "learning_rate": 9.518701378180082e-06, + "loss": 0.8828, + "step": 1745 + }, + { + "ce_ib": 4.432754993438721, + "ce_orig": 0.6014177799224854, + "epoch": 0.5018333453159824, + "kl_loss": 0.17354170978069305, + "loss_ib": 0.006168172229081392, + "step": 1745 + }, + { + "ce_ib": 2.7183847427368164, + "ce_orig": 0.3308621048927307, + "epoch": 0.5018333453159824, + "kl_loss": 0.4907413721084595, + "loss_ib": 0.007625798229128122, + "step": 1745 + }, + { + "ce_ib": 6.574141979217529, + "ce_orig": 1.18874192237854, + "epoch": 0.5018333453159824, + "kl_loss": 0.24722984433174133, + "loss_ib": 0.00904644001275301, + "step": 1745 + }, + { + "ce_ib": 3.3298656940460205, + "ce_orig": 0.6092104911804199, + "epoch": 0.5018333453159824, + "kl_loss": 0.21840175986289978, + "loss_ib": 0.0055138831958174706, + "step": 1745 + }, + { + "ce_ib": 3.5292844772338867, + "ce_orig": 0.6854714155197144, + "epoch": 0.5021209288949601, + "kl_loss": 0.1925717145204544, + "loss_ib": 0.0054550012573599815, + "step": 1746 + }, + { + "ce_ib": 6.507283687591553, + "ce_orig": 1.2124181985855103, + "epoch": 0.5021209288949601, + "kl_loss": 0.3079559803009033, + "loss_ib": 0.00958684366196394, + "step": 1746 + }, + { + "ce_ib": 3.4478015899658203, + "ce_orig": 0.609721302986145, + "epoch": 0.5021209288949601, + "kl_loss": 0.15236534178256989, + "loss_ib": 0.004971455316990614, + "step": 1746 + }, + { + "ce_ib": 5.8801045417785645, + "ce_orig": 0.7739959955215454, + "epoch": 0.5021209288949601, + "kl_loss": 0.3879520297050476, + "loss_ib": 0.009759625419974327, + "step": 1746 + }, + { + "ce_ib": 2.8765039443969727, + "ce_orig": 0.444017618894577, + "epoch": 0.5024085124739377, + "kl_loss": 0.17314675450325012, + "loss_ib": 0.004607971291989088, + "step": 1747 + }, + { + "ce_ib": 4.182227611541748, + "ce_orig": 0.40417736768722534, + "epoch": 0.5024085124739377, + "kl_loss": 0.2266075611114502, + "loss_ib": 0.00644830334931612, + "step": 1747 + }, + { + "ce_ib": 4.521939277648926, + "ce_orig": 0.7604411840438843, + "epoch": 0.5024085124739377, + "kl_loss": 0.1887277364730835, + "loss_ib": 0.0064092171378433704, + "step": 1747 + }, + { + "ce_ib": 5.178815841674805, + "ce_orig": 0.853346049785614, + "epoch": 0.5024085124739377, + "kl_loss": 0.2517073452472687, + "loss_ib": 0.007695889100432396, + "step": 1747 + }, + { + "ce_ib": 6.200848579406738, + "ce_orig": 0.9567843675613403, + "epoch": 0.5026960960529154, + "kl_loss": 0.26944202184677124, + "loss_ib": 0.008895268663764, + "step": 1748 + }, + { + "ce_ib": 8.421351432800293, + "ce_orig": 0.534841001033783, + "epoch": 0.5026960960529154, + "kl_loss": 0.2718900442123413, + "loss_ib": 0.011140250600874424, + "step": 1748 + }, + { + "ce_ib": 4.257621765136719, + "ce_orig": 0.7095168232917786, + "epoch": 0.5026960960529154, + "kl_loss": 0.2819932997226715, + "loss_ib": 0.007077554240822792, + "step": 1748 + }, + { + "ce_ib": 8.776861190795898, + "ce_orig": 1.6992207765579224, + "epoch": 0.5026960960529154, + "kl_loss": 0.2835456430912018, + "loss_ib": 0.011612317524850368, + "step": 1748 + }, + { + "ce_ib": 5.450873851776123, + "ce_orig": 0.7511981725692749, + "epoch": 0.502983679631893, + "kl_loss": 0.28340595960617065, + "loss_ib": 0.008284932933747768, + "step": 1749 + }, + { + "ce_ib": 4.766958713531494, + "ce_orig": 0.6778752207756042, + "epoch": 0.502983679631893, + "kl_loss": 0.20694370567798615, + "loss_ib": 0.006836395710706711, + "step": 1749 + }, + { + "ce_ib": 3.5692238807678223, + "ce_orig": 0.5320716500282288, + "epoch": 0.502983679631893, + "kl_loss": 0.2173265516757965, + "loss_ib": 0.005742488894611597, + "step": 1749 + }, + { + "ce_ib": 5.871660232543945, + "ce_orig": 1.1351439952850342, + "epoch": 0.502983679631893, + "kl_loss": 0.24728937447071075, + "loss_ib": 0.008344553411006927, + "step": 1749 + }, + { + "epoch": 0.5032712632108707, + "grad_norm": 0.11421152949333191, + "learning_rate": 9.515373661622665e-06, + "loss": 0.8177, + "step": 1750 + }, + { + "ce_ib": 11.361602783203125, + "ce_orig": 1.9655756950378418, + "epoch": 0.5032712632108707, + "kl_loss": 0.24857398867607117, + "loss_ib": 0.013847342692315578, + "step": 1750 + }, + { + "ce_ib": 5.9394097328186035, + "ce_orig": 0.7931159734725952, + "epoch": 0.5032712632108707, + "kl_loss": 0.2079966813325882, + "loss_ib": 0.008019376546144485, + "step": 1750 + }, + { + "ce_ib": 3.0225653648376465, + "ce_orig": 0.48664718866348267, + "epoch": 0.5032712632108707, + "kl_loss": 0.25711023807525635, + "loss_ib": 0.005593668203800917, + "step": 1750 + }, + { + "ce_ib": 5.224844932556152, + "ce_orig": 0.7229938507080078, + "epoch": 0.5032712632108707, + "kl_loss": 0.21279790997505188, + "loss_ib": 0.007352823857218027, + "step": 1750 + }, + { + "ce_ib": 5.852553367614746, + "ce_orig": 0.992152750492096, + "epoch": 0.5035588467898483, + "kl_loss": 0.3124367594718933, + "loss_ib": 0.008976920507848263, + "step": 1751 + }, + { + "ce_ib": 6.506232738494873, + "ce_orig": 0.840754508972168, + "epoch": 0.5035588467898483, + "kl_loss": 0.28680092096328735, + "loss_ib": 0.009374241344630718, + "step": 1751 + }, + { + "ce_ib": 5.130998134613037, + "ce_orig": 0.5533031821250916, + "epoch": 0.5035588467898483, + "kl_loss": 0.3280577063560486, + "loss_ib": 0.008411575108766556, + "step": 1751 + }, + { + "ce_ib": 6.5981926918029785, + "ce_orig": 0.7942853569984436, + "epoch": 0.5035588467898483, + "kl_loss": 0.34047335386276245, + "loss_ib": 0.010002925992012024, + "step": 1751 + }, + { + "ce_ib": 6.03656530380249, + "ce_orig": 1.0924416780471802, + "epoch": 0.5038464303688259, + "kl_loss": 0.43093806505203247, + "loss_ib": 0.01034594513475895, + "step": 1752 + }, + { + "ce_ib": 5.8077497482299805, + "ce_orig": 0.8590795993804932, + "epoch": 0.5038464303688259, + "kl_loss": 0.19107350707054138, + "loss_ib": 0.007718484383076429, + "step": 1752 + }, + { + "ce_ib": 3.6463088989257812, + "ce_orig": 0.6247625350952148, + "epoch": 0.5038464303688259, + "kl_loss": 0.19530805945396423, + "loss_ib": 0.0055993893183767796, + "step": 1752 + }, + { + "ce_ib": 4.36478328704834, + "ce_orig": 0.7276626825332642, + "epoch": 0.5038464303688259, + "kl_loss": 0.27551114559173584, + "loss_ib": 0.0071198949590325356, + "step": 1752 + }, + { + "ce_ib": 3.8157730102539062, + "ce_orig": 0.664462685585022, + "epoch": 0.5041340139478035, + "kl_loss": 0.24338558316230774, + "loss_ib": 0.006249628961086273, + "step": 1753 + }, + { + "ce_ib": 5.476027965545654, + "ce_orig": 1.096780776977539, + "epoch": 0.5041340139478035, + "kl_loss": 0.17896342277526855, + "loss_ib": 0.007265662308782339, + "step": 1753 + }, + { + "ce_ib": 5.217350006103516, + "ce_orig": 0.7322924733161926, + "epoch": 0.5041340139478035, + "kl_loss": 0.21693328022956848, + "loss_ib": 0.0073866830207407475, + "step": 1753 + }, + { + "ce_ib": 6.2500319480896, + "ce_orig": 1.0605376958847046, + "epoch": 0.5041340139478035, + "kl_loss": 0.24295900762081146, + "loss_ib": 0.00867962185293436, + "step": 1753 + }, + { + "ce_ib": 6.467724323272705, + "ce_orig": 0.9832982420921326, + "epoch": 0.5044215975267812, + "kl_loss": 0.17008638381958008, + "loss_ib": 0.008168588392436504, + "step": 1754 + }, + { + "ce_ib": 6.879829406738281, + "ce_orig": 1.5099862813949585, + "epoch": 0.5044215975267812, + "kl_loss": 0.26725825667381287, + "loss_ib": 0.00955241173505783, + "step": 1754 + }, + { + "ce_ib": 9.748746871948242, + "ce_orig": 1.5472444295883179, + "epoch": 0.5044215975267812, + "kl_loss": 0.27403298020362854, + "loss_ib": 0.01248907670378685, + "step": 1754 + }, + { + "ce_ib": 3.8178939819335938, + "ce_orig": 0.7111484408378601, + "epoch": 0.5044215975267812, + "kl_loss": 0.2375500202178955, + "loss_ib": 0.006193394307047129, + "step": 1754 + }, + { + "epoch": 0.5047091811057589, + "grad_norm": 0.14262355864048004, + "learning_rate": 9.512035066482055e-06, + "loss": 0.8646, + "step": 1755 + }, + { + "ce_ib": 6.009156227111816, + "ce_orig": 0.7930887937545776, + "epoch": 0.5047091811057589, + "kl_loss": 0.319755494594574, + "loss_ib": 0.009206710383296013, + "step": 1755 + }, + { + "ce_ib": 6.522556781768799, + "ce_orig": 0.9451214671134949, + "epoch": 0.5047091811057589, + "kl_loss": 0.26357918977737427, + "loss_ib": 0.00915834866464138, + "step": 1755 + }, + { + "ce_ib": 7.798460960388184, + "ce_orig": 1.1518856287002563, + "epoch": 0.5047091811057589, + "kl_loss": 0.25611695647239685, + "loss_ib": 0.010359629988670349, + "step": 1755 + }, + { + "ce_ib": 5.715204238891602, + "ce_orig": 1.0006587505340576, + "epoch": 0.5047091811057589, + "kl_loss": 0.28110426664352417, + "loss_ib": 0.00852624699473381, + "step": 1755 + }, + { + "ce_ib": 3.4260988235473633, + "ce_orig": 0.858456015586853, + "epoch": 0.5049967646847365, + "kl_loss": 0.2268604338169098, + "loss_ib": 0.005694702733308077, + "step": 1756 + }, + { + "ce_ib": 4.031425476074219, + "ce_orig": 0.7041755318641663, + "epoch": 0.5049967646847365, + "kl_loss": 0.23584911227226257, + "loss_ib": 0.006389916408807039, + "step": 1756 + }, + { + "ce_ib": 4.38675594329834, + "ce_orig": 0.6774066686630249, + "epoch": 0.5049967646847365, + "kl_loss": 0.16846446692943573, + "loss_ib": 0.006071400362998247, + "step": 1756 + }, + { + "ce_ib": 4.694783687591553, + "ce_orig": 0.6566852331161499, + "epoch": 0.5049967646847365, + "kl_loss": 0.15230196714401245, + "loss_ib": 0.006217803340405226, + "step": 1756 + }, + { + "ce_ib": 2.7962417602539062, + "ce_orig": 0.4238570034503937, + "epoch": 0.5052843482637142, + "kl_loss": 0.29057538509368896, + "loss_ib": 0.005701995920389891, + "step": 1757 + }, + { + "ce_ib": 7.179047584533691, + "ce_orig": 0.871265709400177, + "epoch": 0.5052843482637142, + "kl_loss": 0.25131675601005554, + "loss_ib": 0.00969221442937851, + "step": 1757 + }, + { + "ce_ib": 4.701264381408691, + "ce_orig": 1.0522727966308594, + "epoch": 0.5052843482637142, + "kl_loss": 0.20373789966106415, + "loss_ib": 0.006738643627613783, + "step": 1757 + }, + { + "ce_ib": 4.195560455322266, + "ce_orig": 0.8334342837333679, + "epoch": 0.5052843482637142, + "kl_loss": 0.19431960582733154, + "loss_ib": 0.006138755939900875, + "step": 1757 + }, + { + "ce_ib": 7.60351037979126, + "ce_orig": 1.42118501663208, + "epoch": 0.5055719318426918, + "kl_loss": 0.46376219391822815, + "loss_ib": 0.012241131626069546, + "step": 1758 + }, + { + "ce_ib": 5.35986328125, + "ce_orig": 0.7749453186988831, + "epoch": 0.5055719318426918, + "kl_loss": 0.24413596093654633, + "loss_ib": 0.0078012230806052685, + "step": 1758 + }, + { + "ce_ib": 7.557556629180908, + "ce_orig": 0.8648902177810669, + "epoch": 0.5055719318426918, + "kl_loss": 0.1665661633014679, + "loss_ib": 0.009223218075931072, + "step": 1758 + }, + { + "ce_ib": 8.175300598144531, + "ce_orig": 0.8997637629508972, + "epoch": 0.5055719318426918, + "kl_loss": 0.37757620215415955, + "loss_ib": 0.011951062828302383, + "step": 1758 + }, + { + "ce_ib": 8.870894432067871, + "ce_orig": 1.0376429557800293, + "epoch": 0.5058595154216694, + "kl_loss": 0.2231506109237671, + "loss_ib": 0.011102399788796902, + "step": 1759 + }, + { + "ce_ib": 3.632920742034912, + "ce_orig": 0.44894668459892273, + "epoch": 0.5058595154216694, + "kl_loss": 0.3840489983558655, + "loss_ib": 0.007473410107195377, + "step": 1759 + }, + { + "ce_ib": 6.754943370819092, + "ce_orig": 1.1315734386444092, + "epoch": 0.5058595154216694, + "kl_loss": 0.19753900170326233, + "loss_ib": 0.008730334229767323, + "step": 1759 + }, + { + "ce_ib": 4.836207866668701, + "ce_orig": 0.7240886688232422, + "epoch": 0.5058595154216694, + "kl_loss": 0.17611101269721985, + "loss_ib": 0.0065973177552223206, + "step": 1759 + }, + { + "epoch": 0.506147099000647, + "grad_norm": 0.12391646206378937, + "learning_rate": 9.508685600801704e-06, + "loss": 0.8845, + "step": 1760 + }, + { + "ce_ib": 4.2273688316345215, + "ce_orig": 1.025089979171753, + "epoch": 0.506147099000647, + "kl_loss": 0.14844375848770142, + "loss_ib": 0.00571180647239089, + "step": 1760 + }, + { + "ce_ib": 4.832033157348633, + "ce_orig": 0.9036097526550293, + "epoch": 0.506147099000647, + "kl_loss": 0.31038039922714233, + "loss_ib": 0.007935836911201477, + "step": 1760 + }, + { + "ce_ib": 5.035365581512451, + "ce_orig": 0.7276528477668762, + "epoch": 0.506147099000647, + "kl_loss": 0.2854631245136261, + "loss_ib": 0.007889996282756329, + "step": 1760 + }, + { + "ce_ib": 5.9888505935668945, + "ce_orig": 0.9987836480140686, + "epoch": 0.506147099000647, + "kl_loss": 0.17566829919815063, + "loss_ib": 0.007745533250272274, + "step": 1760 + }, + { + "ce_ib": 4.102182388305664, + "ce_orig": 0.95122230052948, + "epoch": 0.5064346825796247, + "kl_loss": 0.28314220905303955, + "loss_ib": 0.006933604367077351, + "step": 1761 + }, + { + "ce_ib": 6.556976795196533, + "ce_orig": 1.0992000102996826, + "epoch": 0.5064346825796247, + "kl_loss": 0.17420658469200134, + "loss_ib": 0.008299043402075768, + "step": 1761 + }, + { + "ce_ib": 6.701450347900391, + "ce_orig": 0.8601769208908081, + "epoch": 0.5064346825796247, + "kl_loss": 0.26562756299972534, + "loss_ib": 0.009357726201415062, + "step": 1761 + }, + { + "ce_ib": 3.7292327880859375, + "ce_orig": 0.5020140409469604, + "epoch": 0.5064346825796247, + "kl_loss": 0.1775652915239334, + "loss_ib": 0.00550488568842411, + "step": 1761 + }, + { + "ce_ib": 4.09344482421875, + "ce_orig": 0.7577466368675232, + "epoch": 0.5067222661586024, + "kl_loss": 0.1945955753326416, + "loss_ib": 0.006039400584995747, + "step": 1762 + }, + { + "ce_ib": 7.949859142303467, + "ce_orig": 1.1857138872146606, + "epoch": 0.5067222661586024, + "kl_loss": 0.15808865427970886, + "loss_ib": 0.009530745446681976, + "step": 1762 + }, + { + "ce_ib": 6.878933906555176, + "ce_orig": 1.056178092956543, + "epoch": 0.5067222661586024, + "kl_loss": 0.15867654979228973, + "loss_ib": 0.008465698920190334, + "step": 1762 + }, + { + "ce_ib": 1.1259777545928955, + "ce_orig": 0.13052049279212952, + "epoch": 0.5067222661586024, + "kl_loss": 0.5092992782592773, + "loss_ib": 0.006218970287591219, + "step": 1762 + }, + { + "ce_ib": 6.466426372528076, + "ce_orig": 1.1745500564575195, + "epoch": 0.50700984973758, + "kl_loss": 0.20478039979934692, + "loss_ib": 0.008514230139553547, + "step": 1763 + }, + { + "ce_ib": 7.062382698059082, + "ce_orig": 1.2049685716629028, + "epoch": 0.50700984973758, + "kl_loss": 0.24910223484039307, + "loss_ib": 0.009553404524922371, + "step": 1763 + }, + { + "ce_ib": 7.08675479888916, + "ce_orig": 1.275549054145813, + "epoch": 0.50700984973758, + "kl_loss": 0.1990867555141449, + "loss_ib": 0.009077622555196285, + "step": 1763 + }, + { + "ce_ib": 6.7942795753479, + "ce_orig": 0.9077232480049133, + "epoch": 0.50700984973758, + "kl_loss": 0.24441751837730408, + "loss_ib": 0.009238455444574356, + "step": 1763 + }, + { + "ce_ib": 5.48286247253418, + "ce_orig": 0.6654765605926514, + "epoch": 0.5072974333165576, + "kl_loss": 0.19381588697433472, + "loss_ib": 0.0074210213497281075, + "step": 1764 + }, + { + "ce_ib": 4.639284133911133, + "ce_orig": 0.9201982021331787, + "epoch": 0.5072974333165576, + "kl_loss": 0.23711277544498444, + "loss_ib": 0.007010411936789751, + "step": 1764 + }, + { + "ce_ib": 4.738180160522461, + "ce_orig": 0.9239326119422913, + "epoch": 0.5072974333165576, + "kl_loss": 0.2706284523010254, + "loss_ib": 0.007444465067237616, + "step": 1764 + }, + { + "ce_ib": 4.196111679077148, + "ce_orig": 0.2848537266254425, + "epoch": 0.5072974333165576, + "kl_loss": 0.25172311067581177, + "loss_ib": 0.006713342387229204, + "step": 1764 + }, + { + "epoch": 0.5075850168955353, + "grad_norm": 0.1055581197142601, + "learning_rate": 9.505325272651253e-06, + "loss": 0.8398, + "step": 1765 + }, + { + "ce_ib": 4.018991470336914, + "ce_orig": 0.7165852785110474, + "epoch": 0.5075850168955353, + "kl_loss": 0.21685855090618134, + "loss_ib": 0.00618757726624608, + "step": 1765 + }, + { + "ce_ib": 7.085607051849365, + "ce_orig": 0.42605891823768616, + "epoch": 0.5075850168955353, + "kl_loss": 0.2992393672466278, + "loss_ib": 0.010078000836074352, + "step": 1765 + }, + { + "ce_ib": 4.441742897033691, + "ce_orig": 0.7470413446426392, + "epoch": 0.5075850168955353, + "kl_loss": 0.15440087020397186, + "loss_ib": 0.005985751748085022, + "step": 1765 + }, + { + "ce_ib": 5.012276649475098, + "ce_orig": 0.7102251648902893, + "epoch": 0.5075850168955353, + "kl_loss": 0.20242461562156677, + "loss_ib": 0.007036522496491671, + "step": 1765 + }, + { + "ce_ib": 5.46077299118042, + "ce_orig": 1.014110803604126, + "epoch": 0.5078726004745129, + "kl_loss": 0.17652538418769836, + "loss_ib": 0.0072260270826518536, + "step": 1766 + }, + { + "ce_ib": 6.734499454498291, + "ce_orig": 1.149742603302002, + "epoch": 0.5078726004745129, + "kl_loss": 0.24488303065299988, + "loss_ib": 0.009183329530060291, + "step": 1766 + }, + { + "ce_ib": 4.96577787399292, + "ce_orig": 0.6755092740058899, + "epoch": 0.5078726004745129, + "kl_loss": 0.20076847076416016, + "loss_ib": 0.0069734626449644566, + "step": 1766 + }, + { + "ce_ib": 9.733043670654297, + "ce_orig": 1.1865333318710327, + "epoch": 0.5078726004745129, + "kl_loss": 0.3436145484447479, + "loss_ib": 0.013169188983738422, + "step": 1766 + }, + { + "ce_ib": 3.1408607959747314, + "ce_orig": 0.24576525390148163, + "epoch": 0.5081601840534905, + "kl_loss": 0.3785693049430847, + "loss_ib": 0.006926553789526224, + "step": 1767 + }, + { + "ce_ib": 3.944741725921631, + "ce_orig": 0.6151083707809448, + "epoch": 0.5081601840534905, + "kl_loss": 0.21588996052742004, + "loss_ib": 0.006103641353547573, + "step": 1767 + }, + { + "ce_ib": 6.064630508422852, + "ce_orig": 1.414920449256897, + "epoch": 0.5081601840534905, + "kl_loss": 0.22204412519931793, + "loss_ib": 0.008285071700811386, + "step": 1767 + }, + { + "ce_ib": 5.1514506340026855, + "ce_orig": 0.8176374435424805, + "epoch": 0.5081601840534905, + "kl_loss": 0.20381109416484833, + "loss_ib": 0.007189561612904072, + "step": 1767 + }, + { + "ce_ib": 5.502082347869873, + "ce_orig": 0.9125807881355286, + "epoch": 0.5084477676324682, + "kl_loss": 0.21032029390335083, + "loss_ib": 0.007605285383760929, + "step": 1768 + }, + { + "ce_ib": 6.051918029785156, + "ce_orig": 1.0317310094833374, + "epoch": 0.5084477676324682, + "kl_loss": 0.26851925253868103, + "loss_ib": 0.008737110532820225, + "step": 1768 + }, + { + "ce_ib": 3.6897971630096436, + "ce_orig": 0.6450068950653076, + "epoch": 0.5084477676324682, + "kl_loss": 0.3440362811088562, + "loss_ib": 0.007130159996449947, + "step": 1768 + }, + { + "ce_ib": 3.3664798736572266, + "ce_orig": 0.6561928391456604, + "epoch": 0.5084477676324682, + "kl_loss": 0.1874711513519287, + "loss_ib": 0.005241191480308771, + "step": 1768 + }, + { + "ce_ib": 3.5129668712615967, + "ce_orig": 0.5466682314872742, + "epoch": 0.5087353512114459, + "kl_loss": 0.25630488991737366, + "loss_ib": 0.006076015532016754, + "step": 1769 + }, + { + "ce_ib": 3.623927354812622, + "ce_orig": 0.718904972076416, + "epoch": 0.5087353512114459, + "kl_loss": 0.2727135419845581, + "loss_ib": 0.006351063027977943, + "step": 1769 + }, + { + "ce_ib": 3.5800700187683105, + "ce_orig": 0.6674350500106812, + "epoch": 0.5087353512114459, + "kl_loss": 0.28915029764175415, + "loss_ib": 0.006471572909504175, + "step": 1769 + }, + { + "ce_ib": 5.109983921051025, + "ce_orig": 0.5016462206840515, + "epoch": 0.5087353512114459, + "kl_loss": 0.36470139026641846, + "loss_ib": 0.008756997995078564, + "step": 1769 + }, + { + "epoch": 0.5090229347904235, + "grad_norm": 0.1167183369398117, + "learning_rate": 9.501954090126514e-06, + "loss": 0.8217, + "step": 1770 + }, + { + "ce_ib": 5.424228668212891, + "ce_orig": 0.6310187578201294, + "epoch": 0.5090229347904235, + "kl_loss": 0.21915018558502197, + "loss_ib": 0.007615730632096529, + "step": 1770 + }, + { + "ce_ib": 5.505951404571533, + "ce_orig": 0.9999389052391052, + "epoch": 0.5090229347904235, + "kl_loss": 0.22435197234153748, + "loss_ib": 0.007749471347779036, + "step": 1770 + }, + { + "ce_ib": 1.7814141511917114, + "ce_orig": 0.2353065013885498, + "epoch": 0.5090229347904235, + "kl_loss": 0.24533554911613464, + "loss_ib": 0.004234769381582737, + "step": 1770 + }, + { + "ce_ib": 2.6672050952911377, + "ce_orig": 0.367904931306839, + "epoch": 0.5090229347904235, + "kl_loss": 0.21802517771720886, + "loss_ib": 0.004847456701099873, + "step": 1770 + }, + { + "ce_ib": 3.7499396800994873, + "ce_orig": 0.3734409213066101, + "epoch": 0.5093105183694011, + "kl_loss": 0.195489764213562, + "loss_ib": 0.0057048373855650425, + "step": 1771 + }, + { + "ce_ib": 9.809314727783203, + "ce_orig": 2.07853102684021, + "epoch": 0.5093105183694011, + "kl_loss": 0.27149325609207153, + "loss_ib": 0.012524247169494629, + "step": 1771 + }, + { + "ce_ib": 3.7507245540618896, + "ce_orig": 0.8744357824325562, + "epoch": 0.5093105183694011, + "kl_loss": 0.19304901361465454, + "loss_ib": 0.005681214388459921, + "step": 1771 + }, + { + "ce_ib": 3.5979347229003906, + "ce_orig": 0.5374438166618347, + "epoch": 0.5093105183694011, + "kl_loss": 0.13741683959960938, + "loss_ib": 0.004972103051841259, + "step": 1771 + }, + { + "ce_ib": 4.961030006408691, + "ce_orig": 0.8366300463676453, + "epoch": 0.5095981019483787, + "kl_loss": 0.2362416386604309, + "loss_ib": 0.0073234462179243565, + "step": 1772 + }, + { + "ce_ib": 4.925734043121338, + "ce_orig": 0.6740673780441284, + "epoch": 0.5095981019483787, + "kl_loss": 0.22513160109519958, + "loss_ib": 0.0071770497597754, + "step": 1772 + }, + { + "ce_ib": 8.155735969543457, + "ce_orig": 1.4952911138534546, + "epoch": 0.5095981019483787, + "kl_loss": 0.2642083764076233, + "loss_ib": 0.010797820053994656, + "step": 1772 + }, + { + "ce_ib": 4.71515417098999, + "ce_orig": 0.9140082597732544, + "epoch": 0.5095981019483787, + "kl_loss": 0.4507846236228943, + "loss_ib": 0.009223000146448612, + "step": 1772 + }, + { + "ce_ib": 5.51159143447876, + "ce_orig": 0.9571477770805359, + "epoch": 0.5098856855273564, + "kl_loss": 0.23798805475234985, + "loss_ib": 0.00789147149771452, + "step": 1773 + }, + { + "ce_ib": 5.530810832977295, + "ce_orig": 0.43717384338378906, + "epoch": 0.5098856855273564, + "kl_loss": 0.3572522699832916, + "loss_ib": 0.009103333577513695, + "step": 1773 + }, + { + "ce_ib": 5.345722198486328, + "ce_orig": 0.9172801375389099, + "epoch": 0.5098856855273564, + "kl_loss": 0.15426042675971985, + "loss_ib": 0.00688832625746727, + "step": 1773 + }, + { + "ce_ib": 9.65254020690918, + "ce_orig": 1.5662338733673096, + "epoch": 0.5098856855273564, + "kl_loss": 0.17158162593841553, + "loss_ib": 0.011368355713784695, + "step": 1773 + }, + { + "ce_ib": 5.850159645080566, + "ce_orig": 0.9013000726699829, + "epoch": 0.510173269106334, + "kl_loss": 0.22897228598594666, + "loss_ib": 0.008139882236719131, + "step": 1774 + }, + { + "ce_ib": 2.766835927963257, + "ce_orig": 0.6518144607543945, + "epoch": 0.510173269106334, + "kl_loss": 0.20160110294818878, + "loss_ib": 0.004782847128808498, + "step": 1774 + }, + { + "ce_ib": 4.017307758331299, + "ce_orig": 0.7030872106552124, + "epoch": 0.510173269106334, + "kl_loss": 0.19265756011009216, + "loss_ib": 0.005943883676081896, + "step": 1774 + }, + { + "ce_ib": 6.924398899078369, + "ce_orig": 1.2229417562484741, + "epoch": 0.510173269106334, + "kl_loss": 0.23021990060806274, + "loss_ib": 0.009226597845554352, + "step": 1774 + }, + { + "epoch": 0.5104608526853117, + "grad_norm": 0.11179368942975998, + "learning_rate": 9.498572061349442e-06, + "loss": 0.8365, + "step": 1775 + }, + { + "ce_ib": 7.849050998687744, + "ce_orig": 0.9646425247192383, + "epoch": 0.5104608526853117, + "kl_loss": 0.17950886487960815, + "loss_ib": 0.009644139558076859, + "step": 1775 + }, + { + "ce_ib": 5.053280830383301, + "ce_orig": 0.9874280691146851, + "epoch": 0.5104608526853117, + "kl_loss": 0.18137818574905396, + "loss_ib": 0.006867062766104937, + "step": 1775 + }, + { + "ce_ib": 5.154513359069824, + "ce_orig": 0.38847586512565613, + "epoch": 0.5104608526853117, + "kl_loss": 0.2768966257572174, + "loss_ib": 0.007923480123281479, + "step": 1775 + }, + { + "ce_ib": 7.6988630294799805, + "ce_orig": 1.6250163316726685, + "epoch": 0.5104608526853117, + "kl_loss": 0.2540596127510071, + "loss_ib": 0.01023945864289999, + "step": 1775 + }, + { + "ce_ib": 1.5759936571121216, + "ce_orig": 0.16515246033668518, + "epoch": 0.5107484362642893, + "kl_loss": 0.5132174491882324, + "loss_ib": 0.006708168424665928, + "step": 1776 + }, + { + "ce_ib": 5.543222904205322, + "ce_orig": 0.925904393196106, + "epoch": 0.5107484362642893, + "kl_loss": 0.2087169885635376, + "loss_ib": 0.007630392909049988, + "step": 1776 + }, + { + "ce_ib": 6.327423572540283, + "ce_orig": 0.9812836647033691, + "epoch": 0.5107484362642893, + "kl_loss": 0.31686538457870483, + "loss_ib": 0.009496076963841915, + "step": 1776 + }, + { + "ce_ib": 6.192408561706543, + "ce_orig": 1.1052305698394775, + "epoch": 0.5107484362642893, + "kl_loss": 0.28637051582336426, + "loss_ib": 0.00905611366033554, + "step": 1776 + }, + { + "ce_ib": 4.368666172027588, + "ce_orig": 0.8808457255363464, + "epoch": 0.511036019843267, + "kl_loss": 0.2151002436876297, + "loss_ib": 0.006519668735563755, + "step": 1777 + }, + { + "ce_ib": 3.9586246013641357, + "ce_orig": 0.8993598818778992, + "epoch": 0.511036019843267, + "kl_loss": 0.25651246309280396, + "loss_ib": 0.0065237488597631454, + "step": 1777 + }, + { + "ce_ib": 5.036628723144531, + "ce_orig": 0.716555118560791, + "epoch": 0.511036019843267, + "kl_loss": 0.5716152191162109, + "loss_ib": 0.010752780362963676, + "step": 1777 + }, + { + "ce_ib": 5.706982135772705, + "ce_orig": 0.6277963519096375, + "epoch": 0.511036019843267, + "kl_loss": 0.23554293811321259, + "loss_ib": 0.008062411099672318, + "step": 1777 + }, + { + "ce_ib": 5.136541366577148, + "ce_orig": 1.0825738906860352, + "epoch": 0.5113236034222446, + "kl_loss": 0.18041208386421204, + "loss_ib": 0.006940662860870361, + "step": 1778 + }, + { + "ce_ib": 2.9055914878845215, + "ce_orig": 0.42757514119148254, + "epoch": 0.5113236034222446, + "kl_loss": 0.31750643253326416, + "loss_ib": 0.006080655846744776, + "step": 1778 + }, + { + "ce_ib": 4.574838161468506, + "ce_orig": 0.8141748905181885, + "epoch": 0.5113236034222446, + "kl_loss": 0.22247333824634552, + "loss_ib": 0.006799571216106415, + "step": 1778 + }, + { + "ce_ib": 2.3404247760772705, + "ce_orig": 0.24534167349338531, + "epoch": 0.5113236034222446, + "kl_loss": 0.5108161568641663, + "loss_ib": 0.007448586169630289, + "step": 1778 + }, + { + "ce_ib": 6.489202976226807, + "ce_orig": 0.9206019043922424, + "epoch": 0.5116111870012222, + "kl_loss": 0.32573944330215454, + "loss_ib": 0.009746597148478031, + "step": 1779 + }, + { + "ce_ib": 4.82711124420166, + "ce_orig": 0.5427364706993103, + "epoch": 0.5116111870012222, + "kl_loss": 0.280947208404541, + "loss_ib": 0.007636583410203457, + "step": 1779 + }, + { + "ce_ib": 4.279891490936279, + "ce_orig": 0.8699036836624146, + "epoch": 0.5116111870012222, + "kl_loss": 0.26657766103744507, + "loss_ib": 0.006945668254047632, + "step": 1779 + }, + { + "ce_ib": 6.691254138946533, + "ce_orig": 1.0939675569534302, + "epoch": 0.5116111870012222, + "kl_loss": 0.22720719873905182, + "loss_ib": 0.0089633259922266, + "step": 1779 + }, + { + "epoch": 0.5118987705801998, + "grad_norm": 0.11643703281879425, + "learning_rate": 9.495179194468135e-06, + "loss": 0.8764, + "step": 1780 + }, + { + "ce_ib": 6.29385232925415, + "ce_orig": 0.8807694911956787, + "epoch": 0.5118987705801998, + "kl_loss": 0.7251090407371521, + "loss_ib": 0.013544943183660507, + "step": 1780 + }, + { + "ce_ib": 3.4683852195739746, + "ce_orig": 0.5015277862548828, + "epoch": 0.5118987705801998, + "kl_loss": 0.17699450254440308, + "loss_ib": 0.005238330457359552, + "step": 1780 + }, + { + "ce_ib": 4.6820526123046875, + "ce_orig": 0.8917681574821472, + "epoch": 0.5118987705801998, + "kl_loss": 0.20606280863285065, + "loss_ib": 0.006742680445313454, + "step": 1780 + }, + { + "ce_ib": 5.075531005859375, + "ce_orig": 1.0754148960113525, + "epoch": 0.5118987705801998, + "kl_loss": 0.15766265988349915, + "loss_ib": 0.006652157288044691, + "step": 1780 + }, + { + "ce_ib": 5.327798366546631, + "ce_orig": 0.7447091341018677, + "epoch": 0.5121863541591775, + "kl_loss": 0.21377348899841309, + "loss_ib": 0.007465533446520567, + "step": 1781 + }, + { + "ce_ib": 5.237917423248291, + "ce_orig": 0.7073243856430054, + "epoch": 0.5121863541591775, + "kl_loss": 0.2471804916858673, + "loss_ib": 0.007709722500294447, + "step": 1781 + }, + { + "ce_ib": 6.874824523925781, + "ce_orig": 1.0658276081085205, + "epoch": 0.5121863541591775, + "kl_loss": 0.24324162304401398, + "loss_ib": 0.009307241067290306, + "step": 1781 + }, + { + "ce_ib": 5.399325847625732, + "ce_orig": 0.6844679713249207, + "epoch": 0.5121863541591775, + "kl_loss": 0.20346783101558685, + "loss_ib": 0.0074340044520795345, + "step": 1781 + }, + { + "ce_ib": 8.268445014953613, + "ce_orig": 1.5828357934951782, + "epoch": 0.5124739377381552, + "kl_loss": 0.14310169219970703, + "loss_ib": 0.009699461981654167, + "step": 1782 + }, + { + "ce_ib": 3.4318509101867676, + "ce_orig": 0.64007169008255, + "epoch": 0.5124739377381552, + "kl_loss": 0.22269004583358765, + "loss_ib": 0.00565875181928277, + "step": 1782 + }, + { + "ce_ib": 4.808406352996826, + "ce_orig": 0.5978017449378967, + "epoch": 0.5124739377381552, + "kl_loss": 0.3129728436470032, + "loss_ib": 0.007938134483993053, + "step": 1782 + }, + { + "ce_ib": 7.869020462036133, + "ce_orig": 1.6359044313430786, + "epoch": 0.5124739377381552, + "kl_loss": 0.2035483568906784, + "loss_ib": 0.00990450382232666, + "step": 1782 + }, + { + "ce_ib": 2.7303977012634277, + "ce_orig": 0.5645561218261719, + "epoch": 0.5127615213171328, + "kl_loss": 0.1680532693862915, + "loss_ib": 0.004410930443555117, + "step": 1783 + }, + { + "ce_ib": 4.404380798339844, + "ce_orig": 0.6755330562591553, + "epoch": 0.5127615213171328, + "kl_loss": 0.17769068479537964, + "loss_ib": 0.006181287579238415, + "step": 1783 + }, + { + "ce_ib": 4.336306095123291, + "ce_orig": 0.7614088654518127, + "epoch": 0.5127615213171328, + "kl_loss": 0.16061021387577057, + "loss_ib": 0.005942408461123705, + "step": 1783 + }, + { + "ce_ib": 6.839239120483398, + "ce_orig": 1.1383109092712402, + "epoch": 0.5127615213171328, + "kl_loss": 0.2974740266799927, + "loss_ib": 0.009813979268074036, + "step": 1783 + }, + { + "ce_ib": 7.303009510040283, + "ce_orig": 0.9598062634468079, + "epoch": 0.5130491048961104, + "kl_loss": 0.21405655145645142, + "loss_ib": 0.009443574585020542, + "step": 1784 + }, + { + "ce_ib": 6.253368854522705, + "ce_orig": 0.7201490998268127, + "epoch": 0.5130491048961104, + "kl_loss": 0.1870848834514618, + "loss_ib": 0.008124217391014099, + "step": 1784 + }, + { + "ce_ib": 3.032378911972046, + "ce_orig": 0.34329771995544434, + "epoch": 0.5130491048961104, + "kl_loss": 0.38947010040283203, + "loss_ib": 0.006927079986780882, + "step": 1784 + }, + { + "ce_ib": 3.8072333335876465, + "ce_orig": 0.5993536710739136, + "epoch": 0.5130491048961104, + "kl_loss": 0.1714390069246292, + "loss_ib": 0.0055216234177351, + "step": 1784 + }, + { + "epoch": 0.5133366884750881, + "grad_norm": 0.11334189772605896, + "learning_rate": 9.491775497656796e-06, + "loss": 0.9333, + "step": 1785 + }, + { + "ce_ib": 4.950538635253906, + "ce_orig": 0.7863808274269104, + "epoch": 0.5133366884750881, + "kl_loss": 0.17428286373615265, + "loss_ib": 0.006693367380648851, + "step": 1785 + }, + { + "ce_ib": 5.514882564544678, + "ce_orig": 0.9535226821899414, + "epoch": 0.5133366884750881, + "kl_loss": 0.17509450018405914, + "loss_ib": 0.007265827618539333, + "step": 1785 + }, + { + "ce_ib": 2.5482590198516846, + "ce_orig": 0.5628145933151245, + "epoch": 0.5133366884750881, + "kl_loss": 0.15639597177505493, + "loss_ib": 0.004112218972295523, + "step": 1785 + }, + { + "ce_ib": 4.515284538269043, + "ce_orig": 0.6384725570678711, + "epoch": 0.5133366884750881, + "kl_loss": 0.255043089389801, + "loss_ib": 0.007065715733915567, + "step": 1785 + }, + { + "ce_ib": 3.976243019104004, + "ce_orig": 0.5672842860221863, + "epoch": 0.5136242720540657, + "kl_loss": 0.3050616979598999, + "loss_ib": 0.007026860024780035, + "step": 1786 + }, + { + "ce_ib": 5.6237640380859375, + "ce_orig": 0.9839730858802795, + "epoch": 0.5136242720540657, + "kl_loss": 0.1823396533727646, + "loss_ib": 0.00744716078042984, + "step": 1786 + }, + { + "ce_ib": 6.167069435119629, + "ce_orig": 1.0040804147720337, + "epoch": 0.5136242720540657, + "kl_loss": 0.31575673818588257, + "loss_ib": 0.009324637241661549, + "step": 1786 + }, + { + "ce_ib": 7.244035243988037, + "ce_orig": 0.9823845028877258, + "epoch": 0.5136242720540657, + "kl_loss": 0.16594602167606354, + "loss_ib": 0.008903495036065578, + "step": 1786 + }, + { + "ce_ib": 5.678101062774658, + "ce_orig": 0.7103188037872314, + "epoch": 0.5139118556330433, + "kl_loss": 0.356295108795166, + "loss_ib": 0.009241051971912384, + "step": 1787 + }, + { + "ce_ib": 5.744541645050049, + "ce_orig": 0.6343448758125305, + "epoch": 0.5139118556330433, + "kl_loss": 0.34542733430862427, + "loss_ib": 0.009198814630508423, + "step": 1787 + }, + { + "ce_ib": 7.367956638336182, + "ce_orig": 1.3359696865081787, + "epoch": 0.5139118556330433, + "kl_loss": 0.17473283410072327, + "loss_ib": 0.00911528430879116, + "step": 1787 + }, + { + "ce_ib": 3.320540189743042, + "ce_orig": 0.48704051971435547, + "epoch": 0.5139118556330433, + "kl_loss": 0.2031700611114502, + "loss_ib": 0.0053522405214607716, + "step": 1787 + }, + { + "ce_ib": 5.3151631355285645, + "ce_orig": 0.6564879417419434, + "epoch": 0.514199439212021, + "kl_loss": 0.6673084497451782, + "loss_ib": 0.011988247744739056, + "step": 1788 + }, + { + "ce_ib": 5.180042743682861, + "ce_orig": 0.680637001991272, + "epoch": 0.514199439212021, + "kl_loss": 0.264859139919281, + "loss_ib": 0.007828634232282639, + "step": 1788 + }, + { + "ce_ib": 7.5746989250183105, + "ce_orig": 0.6266165375709534, + "epoch": 0.514199439212021, + "kl_loss": 0.2970588207244873, + "loss_ib": 0.010545287281274796, + "step": 1788 + }, + { + "ce_ib": 6.376684665679932, + "ce_orig": 0.6511795520782471, + "epoch": 0.514199439212021, + "kl_loss": 0.3848002552986145, + "loss_ib": 0.010224687866866589, + "step": 1788 + }, + { + "ce_ib": 9.771164894104004, + "ce_orig": 1.8776581287384033, + "epoch": 0.5144870227909987, + "kl_loss": 0.3116125464439392, + "loss_ib": 0.012887290678918362, + "step": 1789 + }, + { + "ce_ib": 5.537979602813721, + "ce_orig": 0.7493560910224915, + "epoch": 0.5144870227909987, + "kl_loss": 0.169920414686203, + "loss_ib": 0.00723718386143446, + "step": 1789 + }, + { + "ce_ib": 3.1610782146453857, + "ce_orig": 0.5918846726417542, + "epoch": 0.5144870227909987, + "kl_loss": 0.1520177721977234, + "loss_ib": 0.0046812561340630054, + "step": 1789 + }, + { + "ce_ib": 3.679049253463745, + "ce_orig": 0.7241947650909424, + "epoch": 0.5144870227909987, + "kl_loss": 0.21339523792266846, + "loss_ib": 0.005813001189380884, + "step": 1789 + }, + { + "epoch": 0.5147746063699763, + "grad_norm": 0.12801145017147064, + "learning_rate": 9.488360979115719e-06, + "loss": 0.8703, + "step": 1790 + }, + { + "ce_ib": 5.356670379638672, + "ce_orig": 0.858161985874176, + "epoch": 0.5147746063699763, + "kl_loss": 0.17264670133590698, + "loss_ib": 0.007083137519657612, + "step": 1790 + }, + { + "ce_ib": 5.483158588409424, + "ce_orig": 0.8920816779136658, + "epoch": 0.5147746063699763, + "kl_loss": 0.17023879289627075, + "loss_ib": 0.0071855466812849045, + "step": 1790 + }, + { + "ce_ib": 5.797133445739746, + "ce_orig": 0.503385603427887, + "epoch": 0.5147746063699763, + "kl_loss": 0.2506237030029297, + "loss_ib": 0.008303370326757431, + "step": 1790 + }, + { + "ce_ib": 6.2926154136657715, + "ce_orig": 0.9780447483062744, + "epoch": 0.5147746063699763, + "kl_loss": 0.24656260013580322, + "loss_ib": 0.008758241310715675, + "step": 1790 + }, + { + "ce_ib": 8.355574607849121, + "ce_orig": 1.5691980123519897, + "epoch": 0.5150621899489539, + "kl_loss": 0.1623454988002777, + "loss_ib": 0.00997903011739254, + "step": 1791 + }, + { + "ce_ib": 3.8799028396606445, + "ce_orig": 0.3765094578266144, + "epoch": 0.5150621899489539, + "kl_loss": 0.2286275327205658, + "loss_ib": 0.006166177801787853, + "step": 1791 + }, + { + "ce_ib": 3.321380615234375, + "ce_orig": 0.5442015528678894, + "epoch": 0.5150621899489539, + "kl_loss": 0.1851947158575058, + "loss_ib": 0.0051733278669416904, + "step": 1791 + }, + { + "ce_ib": 8.461237907409668, + "ce_orig": 1.496105670928955, + "epoch": 0.5150621899489539, + "kl_loss": 0.21716958284378052, + "loss_ib": 0.010632934048771858, + "step": 1791 + }, + { + "ce_ib": 5.559602737426758, + "ce_orig": 0.7069318294525146, + "epoch": 0.5153497735279315, + "kl_loss": 0.1599215269088745, + "loss_ib": 0.0071588181890547276, + "step": 1792 + }, + { + "ce_ib": 5.715478897094727, + "ce_orig": 0.5931094884872437, + "epoch": 0.5153497735279315, + "kl_loss": 0.20943738520145416, + "loss_ib": 0.0078098527155816555, + "step": 1792 + }, + { + "ce_ib": 6.070672035217285, + "ce_orig": 0.8749914169311523, + "epoch": 0.5153497735279315, + "kl_loss": 0.2170514613389969, + "loss_ib": 0.00824118684977293, + "step": 1792 + }, + { + "ce_ib": 5.315495014190674, + "ce_orig": 0.7516446709632874, + "epoch": 0.5153497735279315, + "kl_loss": 0.236702099442482, + "loss_ib": 0.007682515773922205, + "step": 1792 + }, + { + "ce_ib": 6.013267517089844, + "ce_orig": 0.95022052526474, + "epoch": 0.5156373571069092, + "kl_loss": 0.15140356123447418, + "loss_ib": 0.007527302950620651, + "step": 1793 + }, + { + "ce_ib": 3.3293774127960205, + "ce_orig": 0.5675919055938721, + "epoch": 0.5156373571069092, + "kl_loss": 0.2904687523841858, + "loss_ib": 0.006234065163880587, + "step": 1793 + }, + { + "ce_ib": 6.802953720092773, + "ce_orig": 1.1898548603057861, + "epoch": 0.5156373571069092, + "kl_loss": 0.2126396894454956, + "loss_ib": 0.008929350413382053, + "step": 1793 + }, + { + "ce_ib": 5.496389865875244, + "ce_orig": 0.4064522683620453, + "epoch": 0.5156373571069092, + "kl_loss": 0.18568216264247894, + "loss_ib": 0.007353211287409067, + "step": 1793 + }, + { + "ce_ib": 2.6666884422302246, + "ce_orig": 0.3487064838409424, + "epoch": 0.5159249406858868, + "kl_loss": 0.363121896982193, + "loss_ib": 0.006297907792031765, + "step": 1794 + }, + { + "ce_ib": 6.851724624633789, + "ce_orig": 1.0352543592453003, + "epoch": 0.5159249406858868, + "kl_loss": 0.1926984190940857, + "loss_ib": 0.008778708986938, + "step": 1794 + }, + { + "ce_ib": 7.737794399261475, + "ce_orig": 1.1757160425186157, + "epoch": 0.5159249406858868, + "kl_loss": 0.2267036885023117, + "loss_ib": 0.010004831477999687, + "step": 1794 + }, + { + "ce_ib": 6.57763147354126, + "ce_orig": 1.18329656124115, + "epoch": 0.5159249406858868, + "kl_loss": 0.28247499465942383, + "loss_ib": 0.009402381256222725, + "step": 1794 + }, + { + "epoch": 0.5162125242648645, + "grad_norm": 0.10428358614444733, + "learning_rate": 9.484935647071273e-06, + "loss": 0.8431, + "step": 1795 + }, + { + "ce_ib": 6.214389801025391, + "ce_orig": 1.3849897384643555, + "epoch": 0.5162125242648645, + "kl_loss": 0.19358517229557037, + "loss_ib": 0.00815024133771658, + "step": 1795 + }, + { + "ce_ib": 6.025190830230713, + "ce_orig": 0.49577391147613525, + "epoch": 0.5162125242648645, + "kl_loss": 0.26496514678001404, + "loss_ib": 0.008674842305481434, + "step": 1795 + }, + { + "ce_ib": 4.6445722579956055, + "ce_orig": 0.952302098274231, + "epoch": 0.5162125242648645, + "kl_loss": 0.20629054307937622, + "loss_ib": 0.006707477383315563, + "step": 1795 + }, + { + "ce_ib": 6.648656845092773, + "ce_orig": 0.8857218027114868, + "epoch": 0.5162125242648645, + "kl_loss": 0.24975921213626862, + "loss_ib": 0.009146248921751976, + "step": 1795 + }, + { + "ce_ib": 5.934831142425537, + "ce_orig": 0.5952855944633484, + "epoch": 0.5165001078438421, + "kl_loss": 0.29829543828964233, + "loss_ib": 0.008917785249650478, + "step": 1796 + }, + { + "ce_ib": 8.936899185180664, + "ce_orig": 1.2357314825057983, + "epoch": 0.5165001078438421, + "kl_loss": 0.2907228171825409, + "loss_ib": 0.011844126507639885, + "step": 1796 + }, + { + "ce_ib": 7.4102959632873535, + "ce_orig": 1.278591275215149, + "epoch": 0.5165001078438421, + "kl_loss": 0.23426221311092377, + "loss_ib": 0.009752918034791946, + "step": 1796 + }, + { + "ce_ib": 6.351538181304932, + "ce_orig": 1.2011011838912964, + "epoch": 0.5165001078438421, + "kl_loss": 0.18798768520355225, + "loss_ib": 0.008231415413320065, + "step": 1796 + }, + { + "ce_ib": 4.647305965423584, + "ce_orig": 0.6263547539710999, + "epoch": 0.5167876914228198, + "kl_loss": 0.24468818306922913, + "loss_ib": 0.0070941876620054245, + "step": 1797 + }, + { + "ce_ib": 4.786785125732422, + "ce_orig": 0.805772602558136, + "epoch": 0.5167876914228198, + "kl_loss": 0.2500882148742676, + "loss_ib": 0.007287667132914066, + "step": 1797 + }, + { + "ce_ib": 4.60325288772583, + "ce_orig": 0.7054445743560791, + "epoch": 0.5167876914228198, + "kl_loss": 0.25732022523880005, + "loss_ib": 0.007176455110311508, + "step": 1797 + }, + { + "ce_ib": 5.693312168121338, + "ce_orig": 0.6329914927482605, + "epoch": 0.5167876914228198, + "kl_loss": 0.33628445863723755, + "loss_ib": 0.009056156501173973, + "step": 1797 + }, + { + "ce_ib": 7.216248035430908, + "ce_orig": 1.340164303779602, + "epoch": 0.5170752750017974, + "kl_loss": 0.1946697235107422, + "loss_ib": 0.009162944741547108, + "step": 1798 + }, + { + "ce_ib": 5.003932952880859, + "ce_orig": 0.7873584628105164, + "epoch": 0.5170752750017974, + "kl_loss": 0.206161230802536, + "loss_ib": 0.007065545301884413, + "step": 1798 + }, + { + "ce_ib": 3.926520586013794, + "ce_orig": 0.6281074285507202, + "epoch": 0.5170752750017974, + "kl_loss": 0.15041778981685638, + "loss_ib": 0.005430698394775391, + "step": 1798 + }, + { + "ce_ib": 6.007340908050537, + "ce_orig": 0.9729514718055725, + "epoch": 0.5170752750017974, + "kl_loss": 0.16553735733032227, + "loss_ib": 0.007662714459002018, + "step": 1798 + }, + { + "ce_ib": 7.125491142272949, + "ce_orig": 0.9569961428642273, + "epoch": 0.517362858580775, + "kl_loss": 0.18219032883644104, + "loss_ib": 0.008947394788265228, + "step": 1799 + }, + { + "ce_ib": 7.163382053375244, + "ce_orig": 0.9688937067985535, + "epoch": 0.517362858580775, + "kl_loss": 0.23835283517837524, + "loss_ib": 0.009546910412609577, + "step": 1799 + }, + { + "ce_ib": 7.227553367614746, + "ce_orig": 1.2459492683410645, + "epoch": 0.517362858580775, + "kl_loss": 0.25786083936691284, + "loss_ib": 0.009806161746382713, + "step": 1799 + }, + { + "ce_ib": 7.201501369476318, + "ce_orig": 0.6090161800384521, + "epoch": 0.517362858580775, + "kl_loss": 0.34199318289756775, + "loss_ib": 0.010621433146297932, + "step": 1799 + }, + { + "epoch": 0.5176504421597526, + "grad_norm": 0.11489441245794296, + "learning_rate": 9.481499509775878e-06, + "loss": 0.9051, + "step": 1800 + }, + { + "ce_ib": 3.390878915786743, + "ce_orig": 0.4669150412082672, + "epoch": 0.5176504421597526, + "kl_loss": 0.2900773286819458, + "loss_ib": 0.006291652098298073, + "step": 1800 + }, + { + "ce_ib": 6.904531478881836, + "ce_orig": 1.0020664930343628, + "epoch": 0.5176504421597526, + "kl_loss": 0.2667804956436157, + "loss_ib": 0.009572336450219154, + "step": 1800 + }, + { + "ce_ib": 3.6753551959991455, + "ce_orig": 0.4369771480560303, + "epoch": 0.5176504421597526, + "kl_loss": 0.27190694212913513, + "loss_ib": 0.0063944244757294655, + "step": 1800 + }, + { + "ce_ib": 5.33976411819458, + "ce_orig": 0.9397656321525574, + "epoch": 0.5176504421597526, + "kl_loss": 0.3777155578136444, + "loss_ib": 0.009116919711232185, + "step": 1800 + }, + { + "ce_ib": 2.7868943214416504, + "ce_orig": 0.45415911078453064, + "epoch": 0.5179380257387303, + "kl_loss": 0.4942619502544403, + "loss_ib": 0.007729513570666313, + "step": 1801 + }, + { + "ce_ib": 5.863338947296143, + "ce_orig": 0.6420555114746094, + "epoch": 0.5179380257387303, + "kl_loss": 0.24516044557094574, + "loss_ib": 0.008314943872392178, + "step": 1801 + }, + { + "ce_ib": 3.1034252643585205, + "ce_orig": 0.5916230082511902, + "epoch": 0.5179380257387303, + "kl_loss": 0.18923625349998474, + "loss_ib": 0.004995787516236305, + "step": 1801 + }, + { + "ce_ib": 6.073347091674805, + "ce_orig": 0.6509691476821899, + "epoch": 0.5179380257387303, + "kl_loss": 0.22603951394557953, + "loss_ib": 0.008333742618560791, + "step": 1801 + }, + { + "ce_ib": 3.9230880737304688, + "ce_orig": 0.5485641360282898, + "epoch": 0.518225609317708, + "kl_loss": 0.23585425317287445, + "loss_ib": 0.006281630136072636, + "step": 1802 + }, + { + "ce_ib": 4.007041931152344, + "ce_orig": 0.6441857218742371, + "epoch": 0.518225609317708, + "kl_loss": 0.2565135061740875, + "loss_ib": 0.006572177167981863, + "step": 1802 + }, + { + "ce_ib": 8.00465202331543, + "ce_orig": 1.16056489944458, + "epoch": 0.518225609317708, + "kl_loss": 0.36502379179000854, + "loss_ib": 0.011654889211058617, + "step": 1802 + }, + { + "ce_ib": 5.138528347015381, + "ce_orig": 0.8069429397583008, + "epoch": 0.518225609317708, + "kl_loss": 0.20522454380989075, + "loss_ib": 0.0071907732635736465, + "step": 1802 + }, + { + "ce_ib": 3.2270872592926025, + "ce_orig": 0.6316734552383423, + "epoch": 0.5185131928966856, + "kl_loss": 0.2573164701461792, + "loss_ib": 0.005800251848995686, + "step": 1803 + }, + { + "ce_ib": 6.0523858070373535, + "ce_orig": 0.6935653686523438, + "epoch": 0.5185131928966856, + "kl_loss": 0.4677497148513794, + "loss_ib": 0.01072988286614418, + "step": 1803 + }, + { + "ce_ib": 4.9070281982421875, + "ce_orig": 0.7013511061668396, + "epoch": 0.5185131928966856, + "kl_loss": 0.22718186676502228, + "loss_ib": 0.007178847212344408, + "step": 1803 + }, + { + "ce_ib": 6.728785991668701, + "ce_orig": 1.182370662689209, + "epoch": 0.5185131928966856, + "kl_loss": 0.15556176006793976, + "loss_ib": 0.008284403942525387, + "step": 1803 + }, + { + "ce_ib": 4.9489054679870605, + "ce_orig": 0.6644280552864075, + "epoch": 0.5188007764756633, + "kl_loss": 0.2218227982521057, + "loss_ib": 0.0071671330370008945, + "step": 1804 + }, + { + "ce_ib": 3.5137171745300293, + "ce_orig": 0.744360089302063, + "epoch": 0.5188007764756633, + "kl_loss": 0.14267009496688843, + "loss_ib": 0.0049404180608689785, + "step": 1804 + }, + { + "ce_ib": 6.757044792175293, + "ce_orig": 1.1124842166900635, + "epoch": 0.5188007764756633, + "kl_loss": 0.23045742511749268, + "loss_ib": 0.009061618708074093, + "step": 1804 + }, + { + "ce_ib": 5.489344596862793, + "ce_orig": 1.2817620038986206, + "epoch": 0.5188007764756633, + "kl_loss": 0.2718643844127655, + "loss_ib": 0.008207988925278187, + "step": 1804 + }, + { + "epoch": 0.5190883600546409, + "grad_norm": 0.13748838007450104, + "learning_rate": 9.478052575507983e-06, + "loss": 0.883, + "step": 1805 + }, + { + "ce_ib": 4.155758857727051, + "ce_orig": 0.7262378931045532, + "epoch": 0.5190883600546409, + "kl_loss": 0.22212238609790802, + "loss_ib": 0.006376982666552067, + "step": 1805 + }, + { + "ce_ib": 6.122762680053711, + "ce_orig": 0.7501749992370605, + "epoch": 0.5190883600546409, + "kl_loss": 0.2191631942987442, + "loss_ib": 0.008314394392073154, + "step": 1805 + }, + { + "ce_ib": 4.582356929779053, + "ce_orig": 0.6791300177574158, + "epoch": 0.5190883600546409, + "kl_loss": 0.14363408088684082, + "loss_ib": 0.006018698215484619, + "step": 1805 + }, + { + "ce_ib": 7.492832183837891, + "ce_orig": 1.3158620595932007, + "epoch": 0.5190883600546409, + "kl_loss": 0.16507276892662048, + "loss_ib": 0.009143560193479061, + "step": 1805 + }, + { + "ce_ib": 8.37580394744873, + "ce_orig": 1.5903420448303223, + "epoch": 0.5193759436336185, + "kl_loss": 0.2548186779022217, + "loss_ib": 0.010923990979790688, + "step": 1806 + }, + { + "ce_ib": 3.802596092224121, + "ce_orig": 0.7291561365127563, + "epoch": 0.5193759436336185, + "kl_loss": 0.2117210030555725, + "loss_ib": 0.0059198057278990746, + "step": 1806 + }, + { + "ce_ib": 4.115170001983643, + "ce_orig": 1.112787127494812, + "epoch": 0.5193759436336185, + "kl_loss": 0.1683754026889801, + "loss_ib": 0.005798923783004284, + "step": 1806 + }, + { + "ce_ib": 5.299164295196533, + "ce_orig": 1.3519405126571655, + "epoch": 0.5193759436336185, + "kl_loss": 0.11419677734375, + "loss_ib": 0.006441132165491581, + "step": 1806 + }, + { + "ce_ib": 4.519089698791504, + "ce_orig": 0.6978946924209595, + "epoch": 0.5196635272125961, + "kl_loss": 0.3922877311706543, + "loss_ib": 0.008441966958343983, + "step": 1807 + }, + { + "ce_ib": 7.629366874694824, + "ce_orig": 1.3517639636993408, + "epoch": 0.5196635272125961, + "kl_loss": 0.2827756702899933, + "loss_ib": 0.010457123629748821, + "step": 1807 + }, + { + "ce_ib": 6.334427356719971, + "ce_orig": 1.0139820575714111, + "epoch": 0.5196635272125961, + "kl_loss": 0.5295234322547913, + "loss_ib": 0.011629662476480007, + "step": 1807 + }, + { + "ce_ib": 4.028738021850586, + "ce_orig": 0.34927576780319214, + "epoch": 0.5196635272125961, + "kl_loss": 0.24801911413669586, + "loss_ib": 0.006508929189294577, + "step": 1807 + }, + { + "ce_ib": 5.080193996429443, + "ce_orig": 0.6764727830886841, + "epoch": 0.5199511107915739, + "kl_loss": 0.28257232904434204, + "loss_ib": 0.00790591724216938, + "step": 1808 + }, + { + "ce_ib": 4.959287643432617, + "ce_orig": 0.680107831954956, + "epoch": 0.5199511107915739, + "kl_loss": 0.37651553750038147, + "loss_ib": 0.008724442683160305, + "step": 1808 + }, + { + "ce_ib": 3.6068825721740723, + "ce_orig": 0.5630862712860107, + "epoch": 0.5199511107915739, + "kl_loss": 0.2557922601699829, + "loss_ib": 0.00616480503231287, + "step": 1808 + }, + { + "ce_ib": 2.9706921577453613, + "ce_orig": 0.49435415863990784, + "epoch": 0.5199511107915739, + "kl_loss": 0.24606207013130188, + "loss_ib": 0.0054313126020133495, + "step": 1808 + }, + { + "ce_ib": 8.037646293640137, + "ce_orig": 1.4617276191711426, + "epoch": 0.5202386943705515, + "kl_loss": 0.73418128490448, + "loss_ib": 0.015379459597170353, + "step": 1809 + }, + { + "ce_ib": 5.94744348526001, + "ce_orig": 0.6382511854171753, + "epoch": 0.5202386943705515, + "kl_loss": 0.297149658203125, + "loss_ib": 0.008918940089643002, + "step": 1809 + }, + { + "ce_ib": 3.2467129230499268, + "ce_orig": 0.7186192870140076, + "epoch": 0.5202386943705515, + "kl_loss": 0.4525447189807892, + "loss_ib": 0.007772160228341818, + "step": 1809 + }, + { + "ce_ib": 3.5433926582336426, + "ce_orig": 0.39463841915130615, + "epoch": 0.5202386943705515, + "kl_loss": 0.23873688280582428, + "loss_ib": 0.005930761341005564, + "step": 1809 + }, + { + "epoch": 0.5205262779495291, + "grad_norm": 0.1307157725095749, + "learning_rate": 9.47459485257206e-06, + "loss": 0.8351, + "step": 1810 + }, + { + "ce_ib": 6.789810657501221, + "ce_orig": 1.124668836593628, + "epoch": 0.5205262779495291, + "kl_loss": 0.2300483137369156, + "loss_ib": 0.009090293198823929, + "step": 1810 + }, + { + "ce_ib": 3.373751163482666, + "ce_orig": 0.68404620885849, + "epoch": 0.5205262779495291, + "kl_loss": 0.17840611934661865, + "loss_ib": 0.005157812498509884, + "step": 1810 + }, + { + "ce_ib": 7.042891025543213, + "ce_orig": 1.2993831634521484, + "epoch": 0.5205262779495291, + "kl_loss": 0.20561161637306213, + "loss_ib": 0.009099007584154606, + "step": 1810 + }, + { + "ce_ib": 3.786529064178467, + "ce_orig": 0.6418856978416443, + "epoch": 0.5205262779495291, + "kl_loss": 0.17593012750148773, + "loss_ib": 0.005545829888433218, + "step": 1810 + }, + { + "ce_ib": 4.698478698730469, + "ce_orig": 0.3812403082847595, + "epoch": 0.5208138615285067, + "kl_loss": 0.20140892267227173, + "loss_ib": 0.006712567526847124, + "step": 1811 + }, + { + "ce_ib": 3.34533953666687, + "ce_orig": 0.7359578013420105, + "epoch": 0.5208138615285067, + "kl_loss": 0.13502469658851624, + "loss_ib": 0.004695586394518614, + "step": 1811 + }, + { + "ce_ib": 6.590109348297119, + "ce_orig": 1.0476917028427124, + "epoch": 0.5208138615285067, + "kl_loss": 0.3302564024925232, + "loss_ib": 0.00989267323166132, + "step": 1811 + }, + { + "ce_ib": 6.3849921226501465, + "ce_orig": 1.483210802078247, + "epoch": 0.5208138615285067, + "kl_loss": 0.15260383486747742, + "loss_ib": 0.00791103020310402, + "step": 1811 + }, + { + "ce_ib": 4.3355841636657715, + "ce_orig": 0.9082209467887878, + "epoch": 0.5211014451074844, + "kl_loss": 0.18973350524902344, + "loss_ib": 0.00623291963711381, + "step": 1812 + }, + { + "ce_ib": 4.391592025756836, + "ce_orig": 0.4710513949394226, + "epoch": 0.5211014451074844, + "kl_loss": 0.3285670280456543, + "loss_ib": 0.007677262183278799, + "step": 1812 + }, + { + "ce_ib": 6.789366245269775, + "ce_orig": 1.4087302684783936, + "epoch": 0.5211014451074844, + "kl_loss": 0.23697486519813538, + "loss_ib": 0.009159115143120289, + "step": 1812 + }, + { + "ce_ib": 7.329963684082031, + "ce_orig": 1.1739439964294434, + "epoch": 0.5211014451074844, + "kl_loss": 0.23812434077262878, + "loss_ib": 0.009711206890642643, + "step": 1812 + }, + { + "ce_ib": 4.353401184082031, + "ce_orig": 0.7496168613433838, + "epoch": 0.521389028686462, + "kl_loss": 0.15959800779819489, + "loss_ib": 0.005949381273239851, + "step": 1813 + }, + { + "ce_ib": 6.93917989730835, + "ce_orig": 0.8042504191398621, + "epoch": 0.521389028686462, + "kl_loss": 0.26898664236068726, + "loss_ib": 0.00962904654443264, + "step": 1813 + }, + { + "ce_ib": 3.2111239433288574, + "ce_orig": 0.6300265192985535, + "epoch": 0.521389028686462, + "kl_loss": 0.14442920684814453, + "loss_ib": 0.004655416123569012, + "step": 1813 + }, + { + "ce_ib": 5.082729816436768, + "ce_orig": 0.8240136504173279, + "epoch": 0.521389028686462, + "kl_loss": 0.2538681626319885, + "loss_ib": 0.007621411699801683, + "step": 1813 + }, + { + "ce_ib": 7.506804466247559, + "ce_orig": 1.042869210243225, + "epoch": 0.5216766122654396, + "kl_loss": 0.20386838912963867, + "loss_ib": 0.00954548828303814, + "step": 1814 + }, + { + "ce_ib": 5.0663886070251465, + "ce_orig": 0.6606091856956482, + "epoch": 0.5216766122654396, + "kl_loss": 0.20135483145713806, + "loss_ib": 0.007079937495291233, + "step": 1814 + }, + { + "ce_ib": 3.6751060485839844, + "ce_orig": 0.6798691749572754, + "epoch": 0.5216766122654396, + "kl_loss": 0.21309301257133484, + "loss_ib": 0.005806035827845335, + "step": 1814 + }, + { + "ce_ib": 2.864553213119507, + "ce_orig": 0.3934019207954407, + "epoch": 0.5216766122654396, + "kl_loss": 0.2546788156032562, + "loss_ib": 0.005411340855062008, + "step": 1814 + }, + { + "epoch": 0.5219641958444173, + "grad_norm": 0.13427554070949554, + "learning_rate": 9.471126349298557e-06, + "loss": 0.8379, + "step": 1815 + }, + { + "ce_ib": 9.50009822845459, + "ce_orig": 1.661615014076233, + "epoch": 0.5219641958444173, + "kl_loss": 0.28572994470596313, + "loss_ib": 0.012357397004961967, + "step": 1815 + }, + { + "ce_ib": 3.747300624847412, + "ce_orig": 0.6363303065299988, + "epoch": 0.5219641958444173, + "kl_loss": 0.19487634301185608, + "loss_ib": 0.005696064326912165, + "step": 1815 + }, + { + "ce_ib": 8.828596115112305, + "ce_orig": 1.535306453704834, + "epoch": 0.5219641958444173, + "kl_loss": 0.2173335999250412, + "loss_ib": 0.011001932434737682, + "step": 1815 + }, + { + "ce_ib": 4.563418388366699, + "ce_orig": 0.7024432420730591, + "epoch": 0.5219641958444173, + "kl_loss": 0.24083112180233002, + "loss_ib": 0.006971729453653097, + "step": 1815 + }, + { + "ce_ib": 2.5764825344085693, + "ce_orig": 0.5380098223686218, + "epoch": 0.522251779423395, + "kl_loss": 0.15003418922424316, + "loss_ib": 0.004076824523508549, + "step": 1816 + }, + { + "ce_ib": 7.708797931671143, + "ce_orig": 1.3907426595687866, + "epoch": 0.522251779423395, + "kl_loss": 0.25853198766708374, + "loss_ib": 0.010294117964804173, + "step": 1816 + }, + { + "ce_ib": 4.552369594573975, + "ce_orig": 0.44630172848701477, + "epoch": 0.522251779423395, + "kl_loss": 0.30989986658096313, + "loss_ib": 0.007651368156075478, + "step": 1816 + }, + { + "ce_ib": 5.001628875732422, + "ce_orig": 1.0199403762817383, + "epoch": 0.522251779423395, + "kl_loss": 0.16034522652626038, + "loss_ib": 0.006605081260204315, + "step": 1816 + }, + { + "ce_ib": 10.416280746459961, + "ce_orig": 1.8923710584640503, + "epoch": 0.5225393630023726, + "kl_loss": 0.2715950608253479, + "loss_ib": 0.013132231310009956, + "step": 1817 + }, + { + "ce_ib": 6.229454040527344, + "ce_orig": 1.060247778892517, + "epoch": 0.5225393630023726, + "kl_loss": 0.23605555295944214, + "loss_ib": 0.00859000999480486, + "step": 1817 + }, + { + "ce_ib": 6.946117401123047, + "ce_orig": 1.0619940757751465, + "epoch": 0.5225393630023726, + "kl_loss": 0.3530215620994568, + "loss_ib": 0.01047633308917284, + "step": 1817 + }, + { + "ce_ib": 8.509015083312988, + "ce_orig": 1.2890263795852661, + "epoch": 0.5225393630023726, + "kl_loss": 0.22496478259563446, + "loss_ib": 0.010758663527667522, + "step": 1817 + }, + { + "ce_ib": 4.807441711425781, + "ce_orig": 0.8506971001625061, + "epoch": 0.5228269465813502, + "kl_loss": 0.24618715047836304, + "loss_ib": 0.007269313093274832, + "step": 1818 + }, + { + "ce_ib": 6.583683013916016, + "ce_orig": 0.9181709885597229, + "epoch": 0.5228269465813502, + "kl_loss": 0.19941741228103638, + "loss_ib": 0.008577857166528702, + "step": 1818 + }, + { + "ce_ib": 6.426215648651123, + "ce_orig": 0.9120950102806091, + "epoch": 0.5228269465813502, + "kl_loss": 0.15273059904575348, + "loss_ib": 0.00795352179557085, + "step": 1818 + }, + { + "ce_ib": 6.072673320770264, + "ce_orig": 0.8870359063148499, + "epoch": 0.5228269465813502, + "kl_loss": 0.3441610634326935, + "loss_ib": 0.009514284320175648, + "step": 1818 + }, + { + "ce_ib": 3.889371633529663, + "ce_orig": 0.6676734089851379, + "epoch": 0.5231145301603278, + "kl_loss": 0.20489048957824707, + "loss_ib": 0.005938276648521423, + "step": 1819 + }, + { + "ce_ib": 6.331286907196045, + "ce_orig": 0.7984427213668823, + "epoch": 0.5231145301603278, + "kl_loss": 0.21341674029827118, + "loss_ib": 0.00846545398235321, + "step": 1819 + }, + { + "ce_ib": 6.073073863983154, + "ce_orig": 1.0317842960357666, + "epoch": 0.5231145301603278, + "kl_loss": 0.3320351541042328, + "loss_ib": 0.009393424727022648, + "step": 1819 + }, + { + "ce_ib": 5.782386302947998, + "ce_orig": 0.8939815163612366, + "epoch": 0.5231145301603278, + "kl_loss": 0.22046232223510742, + "loss_ib": 0.0079870093613863, + "step": 1819 + }, + { + "epoch": 0.5234021137393055, + "grad_norm": 0.1295160949230194, + "learning_rate": 9.467647074043911e-06, + "loss": 0.936, + "step": 1820 + }, + { + "ce_ib": 4.161619186401367, + "ce_orig": 0.6846871376037598, + "epoch": 0.5234021137393055, + "kl_loss": 0.16246896982192993, + "loss_ib": 0.005786309018731117, + "step": 1820 + }, + { + "ce_ib": 5.756795406341553, + "ce_orig": 1.0516748428344727, + "epoch": 0.5234021137393055, + "kl_loss": 0.2070978283882141, + "loss_ib": 0.007827773690223694, + "step": 1820 + }, + { + "ce_ib": 5.790688514709473, + "ce_orig": 0.8329687118530273, + "epoch": 0.5234021137393055, + "kl_loss": 0.3111580014228821, + "loss_ib": 0.00890226848423481, + "step": 1820 + }, + { + "ce_ib": 3.9477272033691406, + "ce_orig": 0.7902374863624573, + "epoch": 0.5234021137393055, + "kl_loss": 0.18525344133377075, + "loss_ib": 0.005800261162221432, + "step": 1820 + }, + { + "ce_ib": 6.208718299865723, + "ce_orig": 0.8927279114723206, + "epoch": 0.5236896973182831, + "kl_loss": 0.3012109398841858, + "loss_ib": 0.009220827370882034, + "step": 1821 + }, + { + "ce_ib": 6.990211486816406, + "ce_orig": 0.9636775851249695, + "epoch": 0.5236896973182831, + "kl_loss": 0.20870433747768402, + "loss_ib": 0.009077254682779312, + "step": 1821 + }, + { + "ce_ib": 8.73526668548584, + "ce_orig": 1.3548780679702759, + "epoch": 0.5236896973182831, + "kl_loss": 0.44657760858535767, + "loss_ib": 0.013201043009757996, + "step": 1821 + }, + { + "ce_ib": 5.386695384979248, + "ce_orig": 0.7904844284057617, + "epoch": 0.5236896973182831, + "kl_loss": 0.18388625979423523, + "loss_ib": 0.00722555723041296, + "step": 1821 + }, + { + "ce_ib": 4.732143878936768, + "ce_orig": 0.8115664720535278, + "epoch": 0.5239772808972608, + "kl_loss": 0.188096284866333, + "loss_ib": 0.006613106466829777, + "step": 1822 + }, + { + "ce_ib": 5.429836750030518, + "ce_orig": 0.6583589911460876, + "epoch": 0.5239772808972608, + "kl_loss": 0.2904740571975708, + "loss_ib": 0.008334577083587646, + "step": 1822 + }, + { + "ce_ib": 3.356475591659546, + "ce_orig": 0.6954246759414673, + "epoch": 0.5239772808972608, + "kl_loss": 0.14421172440052032, + "loss_ib": 0.004798592999577522, + "step": 1822 + }, + { + "ce_ib": 4.951510906219482, + "ce_orig": 0.9459335207939148, + "epoch": 0.5239772808972608, + "kl_loss": 0.24713939428329468, + "loss_ib": 0.007422904949635267, + "step": 1822 + }, + { + "ce_ib": 3.451356887817383, + "ce_orig": 0.7636024355888367, + "epoch": 0.5242648644762384, + "kl_loss": 0.21749041974544525, + "loss_ib": 0.005626261234283447, + "step": 1823 + }, + { + "ce_ib": 3.5414316654205322, + "ce_orig": 0.6915144920349121, + "epoch": 0.5242648644762384, + "kl_loss": 0.20919787883758545, + "loss_ib": 0.005633410066366196, + "step": 1823 + }, + { + "ce_ib": 4.181634902954102, + "ce_orig": 0.30439430475234985, + "epoch": 0.5242648644762384, + "kl_loss": 0.5112854242324829, + "loss_ib": 0.009294489398598671, + "step": 1823 + }, + { + "ce_ib": 6.770209789276123, + "ce_orig": 0.7689716219902039, + "epoch": 0.5242648644762384, + "kl_loss": 0.46790021657943726, + "loss_ib": 0.011449211277067661, + "step": 1823 + }, + { + "ce_ib": 6.827091217041016, + "ce_orig": 0.7090415358543396, + "epoch": 0.5245524480552161, + "kl_loss": 0.3946807384490967, + "loss_ib": 0.010773899033665657, + "step": 1824 + }, + { + "ce_ib": 5.242634296417236, + "ce_orig": 0.6740880012512207, + "epoch": 0.5245524480552161, + "kl_loss": 0.17949745059013367, + "loss_ib": 0.007037608418613672, + "step": 1824 + }, + { + "ce_ib": 7.583951473236084, + "ce_orig": 1.1575921773910522, + "epoch": 0.5245524480552161, + "kl_loss": 0.22207129001617432, + "loss_ib": 0.009804664179682732, + "step": 1824 + }, + { + "ce_ib": 3.365133047103882, + "ce_orig": 0.5091253519058228, + "epoch": 0.5245524480552161, + "kl_loss": 0.20353856682777405, + "loss_ib": 0.005400518886744976, + "step": 1824 + }, + { + "epoch": 0.5248400316341937, + "grad_norm": 0.13930882513523102, + "learning_rate": 9.4641570351905e-06, + "loss": 0.8489, + "step": 1825 + }, + { + "ce_ib": 5.171679496765137, + "ce_orig": 1.0531930923461914, + "epoch": 0.5248400316341937, + "kl_loss": 0.17203733325004578, + "loss_ib": 0.006892052944749594, + "step": 1825 + }, + { + "ce_ib": 4.869714260101318, + "ce_orig": 0.5718777775764465, + "epoch": 0.5248400316341937, + "kl_loss": 0.27115121483802795, + "loss_ib": 0.0075812265276908875, + "step": 1825 + }, + { + "ce_ib": 3.5416884422302246, + "ce_orig": 0.8580121397972107, + "epoch": 0.5248400316341937, + "kl_loss": 0.1877019703388214, + "loss_ib": 0.005418708082288504, + "step": 1825 + }, + { + "ce_ib": 9.05768871307373, + "ce_orig": 1.5205368995666504, + "epoch": 0.5248400316341937, + "kl_loss": 0.18774256110191345, + "loss_ib": 0.010935114696621895, + "step": 1825 + }, + { + "ce_ib": 3.9015533924102783, + "ce_orig": 0.8431259989738464, + "epoch": 0.5251276152131713, + "kl_loss": 0.14487867057323456, + "loss_ib": 0.005350339692085981, + "step": 1826 + }, + { + "ce_ib": 4.161779403686523, + "ce_orig": 0.8969098925590515, + "epoch": 0.5251276152131713, + "kl_loss": 0.2807765305042267, + "loss_ib": 0.006969544570893049, + "step": 1826 + }, + { + "ce_ib": 10.926655769348145, + "ce_orig": 1.9176727533340454, + "epoch": 0.5251276152131713, + "kl_loss": 0.5257304906845093, + "loss_ib": 0.01618395932018757, + "step": 1826 + }, + { + "ce_ib": 5.555898189544678, + "ce_orig": 0.9816900491714478, + "epoch": 0.5251276152131713, + "kl_loss": 0.3077981472015381, + "loss_ib": 0.008633879944682121, + "step": 1826 + }, + { + "ce_ib": 4.505410671234131, + "ce_orig": 0.7756586670875549, + "epoch": 0.5254151987921489, + "kl_loss": 0.3038627803325653, + "loss_ib": 0.007544038351625204, + "step": 1827 + }, + { + "ce_ib": 4.859555244445801, + "ce_orig": 0.9365068078041077, + "epoch": 0.5254151987921489, + "kl_loss": 0.20699253678321838, + "loss_ib": 0.006929480936378241, + "step": 1827 + }, + { + "ce_ib": 5.228682994842529, + "ce_orig": 0.827009916305542, + "epoch": 0.5254151987921489, + "kl_loss": 0.1980331391096115, + "loss_ib": 0.007209014613181353, + "step": 1827 + }, + { + "ce_ib": 2.3166110515594482, + "ce_orig": 0.30793437361717224, + "epoch": 0.5254151987921489, + "kl_loss": 0.17321155965328217, + "loss_ib": 0.0040487265214324, + "step": 1827 + }, + { + "ce_ib": 4.875110149383545, + "ce_orig": 0.6053239703178406, + "epoch": 0.5257027823711266, + "kl_loss": 0.2659691274166107, + "loss_ib": 0.0075348010286688805, + "step": 1828 + }, + { + "ce_ib": 5.519563674926758, + "ce_orig": 0.8646820783615112, + "epoch": 0.5257027823711266, + "kl_loss": 0.24867025017738342, + "loss_ib": 0.008006266318261623, + "step": 1828 + }, + { + "ce_ib": 5.005699157714844, + "ce_orig": 0.8784776329994202, + "epoch": 0.5257027823711266, + "kl_loss": 0.2948490381240845, + "loss_ib": 0.00795418955385685, + "step": 1828 + }, + { + "ce_ib": 8.660489082336426, + "ce_orig": 1.0395890474319458, + "epoch": 0.5257027823711266, + "kl_loss": 0.21846416592597961, + "loss_ib": 0.010845130309462547, + "step": 1828 + }, + { + "ce_ib": 3.270155191421509, + "ce_orig": 0.6141106486320496, + "epoch": 0.5259903659501043, + "kl_loss": 0.17618581652641296, + "loss_ib": 0.005032013636082411, + "step": 1829 + }, + { + "ce_ib": 5.039238929748535, + "ce_orig": 0.9362063407897949, + "epoch": 0.5259903659501043, + "kl_loss": 0.4431926906108856, + "loss_ib": 0.0094711659476161, + "step": 1829 + }, + { + "ce_ib": 5.330448627471924, + "ce_orig": 0.8868371844291687, + "epoch": 0.5259903659501043, + "kl_loss": 0.3449958264827728, + "loss_ib": 0.008780406787991524, + "step": 1829 + }, + { + "ce_ib": 7.948352336883545, + "ce_orig": 1.10189688205719, + "epoch": 0.5259903659501043, + "kl_loss": 0.15406270325183868, + "loss_ib": 0.00948897935450077, + "step": 1829 + }, + { + "epoch": 0.5262779495290819, + "grad_norm": 0.12116040289402008, + "learning_rate": 9.460656241146637e-06, + "loss": 0.8764, + "step": 1830 + }, + { + "ce_ib": 7.966455459594727, + "ce_orig": 1.353620171546936, + "epoch": 0.5262779495290819, + "kl_loss": 0.2310958206653595, + "loss_ib": 0.01027741376310587, + "step": 1830 + }, + { + "ce_ib": 5.285830974578857, + "ce_orig": 0.6761056184768677, + "epoch": 0.5262779495290819, + "kl_loss": 0.16036930680274963, + "loss_ib": 0.006889524403959513, + "step": 1830 + }, + { + "ce_ib": 9.416250228881836, + "ce_orig": 1.3026270866394043, + "epoch": 0.5262779495290819, + "kl_loss": 0.24720579385757446, + "loss_ib": 0.011888308450579643, + "step": 1830 + }, + { + "ce_ib": 5.543875694274902, + "ce_orig": 0.46377140283584595, + "epoch": 0.5262779495290819, + "kl_loss": 0.27701789140701294, + "loss_ib": 0.00831405445933342, + "step": 1830 + }, + { + "ce_ib": 2.8311800956726074, + "ce_orig": 0.2755441963672638, + "epoch": 0.5265655331080595, + "kl_loss": 0.25843143463134766, + "loss_ib": 0.005415494553744793, + "step": 1831 + }, + { + "ce_ib": 8.62271785736084, + "ce_orig": 1.1264030933380127, + "epoch": 0.5265655331080595, + "kl_loss": 0.32971686124801636, + "loss_ib": 0.01191988680511713, + "step": 1831 + }, + { + "ce_ib": 5.154438495635986, + "ce_orig": 0.9023049473762512, + "epoch": 0.5265655331080595, + "kl_loss": 0.24387776851654053, + "loss_ib": 0.0075932159088552, + "step": 1831 + }, + { + "ce_ib": 2.5585079193115234, + "ce_orig": 0.46558678150177, + "epoch": 0.5265655331080595, + "kl_loss": 0.17119866609573364, + "loss_ib": 0.004270494449883699, + "step": 1831 + }, + { + "ce_ib": 5.680759906768799, + "ce_orig": 1.1517668962478638, + "epoch": 0.5268531166870372, + "kl_loss": 0.2441079318523407, + "loss_ib": 0.008121839724481106, + "step": 1832 + }, + { + "ce_ib": 5.976526737213135, + "ce_orig": 0.9688917994499207, + "epoch": 0.5268531166870372, + "kl_loss": 0.22500276565551758, + "loss_ib": 0.008226553909480572, + "step": 1832 + }, + { + "ce_ib": 4.417006492614746, + "ce_orig": 0.8098064064979553, + "epoch": 0.5268531166870372, + "kl_loss": 0.1642741858959198, + "loss_ib": 0.006059748586267233, + "step": 1832 + }, + { + "ce_ib": 4.212074279785156, + "ce_orig": 0.6016438007354736, + "epoch": 0.5268531166870372, + "kl_loss": 0.20138077437877655, + "loss_ib": 0.006225882098078728, + "step": 1832 + }, + { + "ce_ib": 4.229487895965576, + "ce_orig": 0.3920978009700775, + "epoch": 0.5271407002660148, + "kl_loss": 0.22139926254749298, + "loss_ib": 0.006443480495363474, + "step": 1833 + }, + { + "ce_ib": 7.130161762237549, + "ce_orig": 1.1279047727584839, + "epoch": 0.5271407002660148, + "kl_loss": 0.2132570892572403, + "loss_ib": 0.009262732230126858, + "step": 1833 + }, + { + "ce_ib": 4.03831148147583, + "ce_orig": 0.5502790808677673, + "epoch": 0.5271407002660148, + "kl_loss": 0.20839695632457733, + "loss_ib": 0.006122280843555927, + "step": 1833 + }, + { + "ce_ib": 5.961589336395264, + "ce_orig": 0.9671207666397095, + "epoch": 0.5271407002660148, + "kl_loss": 0.2966713011264801, + "loss_ib": 0.008928302675485611, + "step": 1833 + }, + { + "ce_ib": 6.421506881713867, + "ce_orig": 1.0025813579559326, + "epoch": 0.5274282838449924, + "kl_loss": 0.19049221277236938, + "loss_ib": 0.008326428942382336, + "step": 1834 + }, + { + "ce_ib": 5.470172882080078, + "ce_orig": 0.9889363050460815, + "epoch": 0.5274282838449924, + "kl_loss": 0.17300361394882202, + "loss_ib": 0.007200208958238363, + "step": 1834 + }, + { + "ce_ib": 3.251887559890747, + "ce_orig": 0.7033747434616089, + "epoch": 0.5274282838449924, + "kl_loss": 0.22987093031406403, + "loss_ib": 0.0055505963973701, + "step": 1834 + }, + { + "ce_ib": 3.2938926219940186, + "ce_orig": 0.7175408005714417, + "epoch": 0.5274282838449924, + "kl_loss": 0.22159971296787262, + "loss_ib": 0.005509889684617519, + "step": 1834 + }, + { + "epoch": 0.5277158674239701, + "grad_norm": 0.10707426071166992, + "learning_rate": 9.45714470034655e-06, + "loss": 0.824, + "step": 1835 + }, + { + "ce_ib": 5.597881317138672, + "ce_orig": 0.9906297326087952, + "epoch": 0.5277158674239701, + "kl_loss": 0.254499226808548, + "loss_ib": 0.008142873644828796, + "step": 1835 + }, + { + "ce_ib": 4.040140151977539, + "ce_orig": 0.6455329060554504, + "epoch": 0.5277158674239701, + "kl_loss": 0.26006942987442017, + "loss_ib": 0.006640834733843803, + "step": 1835 + }, + { + "ce_ib": 4.632035732269287, + "ce_orig": 0.862791121006012, + "epoch": 0.5277158674239701, + "kl_loss": 0.20942223072052002, + "loss_ib": 0.006726257503032684, + "step": 1835 + }, + { + "ce_ib": 5.974380970001221, + "ce_orig": 0.761141836643219, + "epoch": 0.5277158674239701, + "kl_loss": 0.1809392273426056, + "loss_ib": 0.007783772889524698, + "step": 1835 + }, + { + "ce_ib": 3.7566730976104736, + "ce_orig": 0.5548977851867676, + "epoch": 0.5280034510029478, + "kl_loss": 0.2547493875026703, + "loss_ib": 0.006304166745394468, + "step": 1836 + }, + { + "ce_ib": 3.8706605434417725, + "ce_orig": 0.8601248860359192, + "epoch": 0.5280034510029478, + "kl_loss": 0.23735542595386505, + "loss_ib": 0.006244214717298746, + "step": 1836 + }, + { + "ce_ib": 6.391262531280518, + "ce_orig": 0.9554364681243896, + "epoch": 0.5280034510029478, + "kl_loss": 0.2322809100151062, + "loss_ib": 0.008714071474969387, + "step": 1836 + }, + { + "ce_ib": 6.248229026794434, + "ce_orig": 0.7384769320487976, + "epoch": 0.5280034510029478, + "kl_loss": 0.30158042907714844, + "loss_ib": 0.009264033287763596, + "step": 1836 + }, + { + "ce_ib": 5.4738993644714355, + "ce_orig": 0.8020815253257751, + "epoch": 0.5282910345819254, + "kl_loss": 0.27552667260169983, + "loss_ib": 0.008229166269302368, + "step": 1837 + }, + { + "ce_ib": 8.685053825378418, + "ce_orig": 0.5568800568580627, + "epoch": 0.5282910345819254, + "kl_loss": 0.3398151397705078, + "loss_ib": 0.012083206325769424, + "step": 1837 + }, + { + "ce_ib": 3.9550063610076904, + "ce_orig": 0.6799144744873047, + "epoch": 0.5282910345819254, + "kl_loss": 0.2494896501302719, + "loss_ib": 0.006449902430176735, + "step": 1837 + }, + { + "ce_ib": 5.9247918128967285, + "ce_orig": 0.8019424080848694, + "epoch": 0.5282910345819254, + "kl_loss": 0.2974478006362915, + "loss_ib": 0.008899269625544548, + "step": 1837 + }, + { + "ce_ib": 8.21461296081543, + "ce_orig": 1.2831261157989502, + "epoch": 0.528578618160903, + "kl_loss": 0.183128222823143, + "loss_ib": 0.010045895352959633, + "step": 1838 + }, + { + "ce_ib": 7.030628681182861, + "ce_orig": 0.9009212255477905, + "epoch": 0.528578618160903, + "kl_loss": 0.1871497929096222, + "loss_ib": 0.008902125991880894, + "step": 1838 + }, + { + "ce_ib": 8.397993087768555, + "ce_orig": 1.4693775177001953, + "epoch": 0.528578618160903, + "kl_loss": 0.28764963150024414, + "loss_ib": 0.011274490505456924, + "step": 1838 + }, + { + "ce_ib": 5.634881973266602, + "ce_orig": 1.246681809425354, + "epoch": 0.528578618160903, + "kl_loss": 0.14776155352592468, + "loss_ib": 0.0071124969981610775, + "step": 1838 + }, + { + "ce_ib": 6.353714942932129, + "ce_orig": 0.8384955525398254, + "epoch": 0.5288662017398806, + "kl_loss": 0.20396575331687927, + "loss_ib": 0.008393372409045696, + "step": 1839 + }, + { + "ce_ib": 3.9521143436431885, + "ce_orig": 0.652812123298645, + "epoch": 0.5288662017398806, + "kl_loss": 0.19296777248382568, + "loss_ib": 0.005881792400032282, + "step": 1839 + }, + { + "ce_ib": 3.103644371032715, + "ce_orig": 0.5401879549026489, + "epoch": 0.5288662017398806, + "kl_loss": 0.17736856639385223, + "loss_ib": 0.004877329804003239, + "step": 1839 + }, + { + "ce_ib": 5.33885383605957, + "ce_orig": 0.7373903393745422, + "epoch": 0.5288662017398806, + "kl_loss": 0.28985947370529175, + "loss_ib": 0.008237448520958424, + "step": 1839 + }, + { + "epoch": 0.5291537853188583, + "grad_norm": 0.12048087269067764, + "learning_rate": 9.453622421250353e-06, + "loss": 0.8883, + "step": 1840 + }, + { + "ce_ib": 6.563951015472412, + "ce_orig": 1.5464122295379639, + "epoch": 0.5291537853188583, + "kl_loss": 0.19577865302562714, + "loss_ib": 0.008521737530827522, + "step": 1840 + }, + { + "ce_ib": 6.511598587036133, + "ce_orig": 1.1769744157791138, + "epoch": 0.5291537853188583, + "kl_loss": 0.18648597598075867, + "loss_ib": 0.008376458659768105, + "step": 1840 + }, + { + "ce_ib": 4.372602462768555, + "ce_orig": 0.6788270473480225, + "epoch": 0.5291537853188583, + "kl_loss": 0.15491482615470886, + "loss_ib": 0.005921750329434872, + "step": 1840 + }, + { + "ce_ib": 6.303147315979004, + "ce_orig": 0.5359430313110352, + "epoch": 0.5291537853188583, + "kl_loss": 0.3028493821620941, + "loss_ib": 0.009331640787422657, + "step": 1840 + }, + { + "ce_ib": 6.323972225189209, + "ce_orig": 1.238000512123108, + "epoch": 0.5294413688978359, + "kl_loss": 0.31458938121795654, + "loss_ib": 0.009469865821301937, + "step": 1841 + }, + { + "ce_ib": 3.9357001781463623, + "ce_orig": 0.7044163346290588, + "epoch": 0.5294413688978359, + "kl_loss": 0.24502159655094147, + "loss_ib": 0.006385916378349066, + "step": 1841 + }, + { + "ce_ib": 7.748276233673096, + "ce_orig": 1.0002756118774414, + "epoch": 0.5294413688978359, + "kl_loss": 0.24710191786289215, + "loss_ib": 0.01021929644048214, + "step": 1841 + }, + { + "ce_ib": 4.496145248413086, + "ce_orig": 0.7424629330635071, + "epoch": 0.5294413688978359, + "kl_loss": 0.21064282953739166, + "loss_ib": 0.006602573208510876, + "step": 1841 + }, + { + "ce_ib": 4.606788158416748, + "ce_orig": 0.7641493082046509, + "epoch": 0.5297289524768136, + "kl_loss": 0.2618545889854431, + "loss_ib": 0.007225333712995052, + "step": 1842 + }, + { + "ce_ib": 1.3112238645553589, + "ce_orig": 0.12241856753826141, + "epoch": 0.5297289524768136, + "kl_loss": 0.5186067819595337, + "loss_ib": 0.006497291848063469, + "step": 1842 + }, + { + "ce_ib": 3.796043872833252, + "ce_orig": 0.6563534736633301, + "epoch": 0.5297289524768136, + "kl_loss": 0.18121027946472168, + "loss_ib": 0.005608146544545889, + "step": 1842 + }, + { + "ce_ib": 3.368903636932373, + "ce_orig": 0.6486485600471497, + "epoch": 0.5297289524768136, + "kl_loss": 0.40390148758888245, + "loss_ib": 0.007407918572425842, + "step": 1842 + }, + { + "ce_ib": 3.5803630352020264, + "ce_orig": 0.9925637245178223, + "epoch": 0.5300165360557912, + "kl_loss": 0.17552965879440308, + "loss_ib": 0.005335659720003605, + "step": 1843 + }, + { + "ce_ib": 2.867419719696045, + "ce_orig": 0.604449450969696, + "epoch": 0.5300165360557912, + "kl_loss": 0.23837560415267944, + "loss_ib": 0.005251175723969936, + "step": 1843 + }, + { + "ce_ib": 5.86564826965332, + "ce_orig": 1.1684297323226929, + "epoch": 0.5300165360557912, + "kl_loss": 0.20978295803070068, + "loss_ib": 0.00796347763389349, + "step": 1843 + }, + { + "ce_ib": 3.580265760421753, + "ce_orig": 0.6890649795532227, + "epoch": 0.5300165360557912, + "kl_loss": 0.19464614987373352, + "loss_ib": 0.00552672753110528, + "step": 1843 + }, + { + "ce_ib": 7.2468085289001465, + "ce_orig": 1.008955478668213, + "epoch": 0.5303041196347689, + "kl_loss": 0.24262303113937378, + "loss_ib": 0.009673038497567177, + "step": 1844 + }, + { + "ce_ib": 4.100954055786133, + "ce_orig": 0.8103383779525757, + "epoch": 0.5303041196347689, + "kl_loss": 0.19310420751571655, + "loss_ib": 0.006031996104866266, + "step": 1844 + }, + { + "ce_ib": 6.187928676605225, + "ce_orig": 1.223270058631897, + "epoch": 0.5303041196347689, + "kl_loss": 0.2613314986228943, + "loss_ib": 0.008801243267953396, + "step": 1844 + }, + { + "ce_ib": 6.660463809967041, + "ce_orig": 0.7532047033309937, + "epoch": 0.5303041196347689, + "kl_loss": 0.19040045142173767, + "loss_ib": 0.00856446847319603, + "step": 1844 + }, + { + "epoch": 0.5305917032137465, + "grad_norm": 0.10593312233686447, + "learning_rate": 9.450089412344037e-06, + "loss": 0.8629, + "step": 1845 + }, + { + "ce_ib": 7.035682201385498, + "ce_orig": 1.2130488157272339, + "epoch": 0.5305917032137465, + "kl_loss": 0.16078200936317444, + "loss_ib": 0.008643501438200474, + "step": 1845 + }, + { + "ce_ib": 5.997169494628906, + "ce_orig": 0.7861976027488708, + "epoch": 0.5305917032137465, + "kl_loss": 0.1388709545135498, + "loss_ib": 0.007385878823697567, + "step": 1845 + }, + { + "ce_ib": 4.488471508026123, + "ce_orig": 0.9490336179733276, + "epoch": 0.5305917032137465, + "kl_loss": 0.15419700741767883, + "loss_ib": 0.006030441261827946, + "step": 1845 + }, + { + "ce_ib": 5.691738605499268, + "ce_orig": 1.1692591905593872, + "epoch": 0.5305917032137465, + "kl_loss": 0.2401736080646515, + "loss_ib": 0.008093475364148617, + "step": 1845 + }, + { + "ce_ib": 3.7242166996002197, + "ce_orig": 0.693812370300293, + "epoch": 0.5308792867927241, + "kl_loss": 0.2123832106590271, + "loss_ib": 0.005848048720508814, + "step": 1846 + }, + { + "ce_ib": 4.208561420440674, + "ce_orig": 0.7323001027107239, + "epoch": 0.5308792867927241, + "kl_loss": 0.15289628505706787, + "loss_ib": 0.005737524013966322, + "step": 1846 + }, + { + "ce_ib": 6.112689971923828, + "ce_orig": 0.7250316143035889, + "epoch": 0.5308792867927241, + "kl_loss": 0.1991797685623169, + "loss_ib": 0.00810448732227087, + "step": 1846 + }, + { + "ce_ib": 5.507354259490967, + "ce_orig": 0.6912445425987244, + "epoch": 0.5308792867927241, + "kl_loss": 0.2743017077445984, + "loss_ib": 0.008250370621681213, + "step": 1846 + }, + { + "ce_ib": 6.0127668380737305, + "ce_orig": 1.1777681112289429, + "epoch": 0.5311668703717017, + "kl_loss": 0.2775518596172333, + "loss_ib": 0.008788284845650196, + "step": 1847 + }, + { + "ce_ib": 8.435003280639648, + "ce_orig": 0.9789080619812012, + "epoch": 0.5311668703717017, + "kl_loss": 0.30630841851234436, + "loss_ib": 0.011498087085783482, + "step": 1847 + }, + { + "ce_ib": 4.990412712097168, + "ce_orig": 0.6420254707336426, + "epoch": 0.5311668703717017, + "kl_loss": 0.2548336088657379, + "loss_ib": 0.0075387489050626755, + "step": 1847 + }, + { + "ce_ib": 5.797047138214111, + "ce_orig": 1.0188409090042114, + "epoch": 0.5311668703717017, + "kl_loss": 0.3676277995109558, + "loss_ib": 0.009473324753344059, + "step": 1847 + }, + { + "ce_ib": 9.056893348693848, + "ce_orig": 0.5721787810325623, + "epoch": 0.5314544539506794, + "kl_loss": 0.20263320207595825, + "loss_ib": 0.011083225719630718, + "step": 1848 + }, + { + "ce_ib": 3.6427559852600098, + "ce_orig": 0.6390652060508728, + "epoch": 0.5314544539506794, + "kl_loss": 0.18275010585784912, + "loss_ib": 0.00547025678679347, + "step": 1848 + }, + { + "ce_ib": 4.470255374908447, + "ce_orig": 0.7787516117095947, + "epoch": 0.5314544539506794, + "kl_loss": 0.21285396814346313, + "loss_ib": 0.006598794832825661, + "step": 1848 + }, + { + "ce_ib": 5.499170780181885, + "ce_orig": 0.8165490031242371, + "epoch": 0.5314544539506794, + "kl_loss": 0.1710931807756424, + "loss_ib": 0.0072101023979485035, + "step": 1848 + }, + { + "ce_ib": 2.9710166454315186, + "ce_orig": 0.5027737617492676, + "epoch": 0.5317420375296571, + "kl_loss": 0.21074137091636658, + "loss_ib": 0.005078430287539959, + "step": 1849 + }, + { + "ce_ib": 8.172359466552734, + "ce_orig": 1.1063852310180664, + "epoch": 0.5317420375296571, + "kl_loss": 0.22217628359794617, + "loss_ib": 0.010394122451543808, + "step": 1849 + }, + { + "ce_ib": 2.164992094039917, + "ce_orig": 0.40636521577835083, + "epoch": 0.5317420375296571, + "kl_loss": 0.16406163573265076, + "loss_ib": 0.003805608255788684, + "step": 1849 + }, + { + "ce_ib": 5.142477035522461, + "ce_orig": 0.82965487241745, + "epoch": 0.5317420375296571, + "kl_loss": 0.29681843519210815, + "loss_ib": 0.008110661059617996, + "step": 1849 + }, + { + "epoch": 0.5320296211086347, + "grad_norm": 0.10571553558111191, + "learning_rate": 9.446545682139437e-06, + "loss": 0.8116, + "step": 1850 + }, + { + "ce_ib": 3.0195400714874268, + "ce_orig": 0.5976029634475708, + "epoch": 0.5320296211086347, + "kl_loss": 0.24145764112472534, + "loss_ib": 0.005434115882962942, + "step": 1850 + }, + { + "ce_ib": 7.33281135559082, + "ce_orig": 0.9514852166175842, + "epoch": 0.5320296211086347, + "kl_loss": 0.30109816789627075, + "loss_ib": 0.010343791916966438, + "step": 1850 + }, + { + "ce_ib": 6.971595764160156, + "ce_orig": 0.744994580745697, + "epoch": 0.5320296211086347, + "kl_loss": 0.2107081413269043, + "loss_ib": 0.00907867681235075, + "step": 1850 + }, + { + "ce_ib": 4.410055160522461, + "ce_orig": 0.9290229678153992, + "epoch": 0.5320296211086347, + "kl_loss": 0.27106451988220215, + "loss_ib": 0.007120700087398291, + "step": 1850 + }, + { + "ce_ib": 7.343533039093018, + "ce_orig": 0.6487832069396973, + "epoch": 0.5323172046876123, + "kl_loss": 0.30970144271850586, + "loss_ib": 0.01044054701924324, + "step": 1851 + }, + { + "ce_ib": 2.6211395263671875, + "ce_orig": 0.3374985158443451, + "epoch": 0.5323172046876123, + "kl_loss": 0.18458132445812225, + "loss_ib": 0.004466952756047249, + "step": 1851 + }, + { + "ce_ib": 3.530855417251587, + "ce_orig": 0.7399183511734009, + "epoch": 0.5323172046876123, + "kl_loss": 0.22764435410499573, + "loss_ib": 0.005807298701256514, + "step": 1851 + }, + { + "ce_ib": 6.120885372161865, + "ce_orig": 0.6404238939285278, + "epoch": 0.5323172046876123, + "kl_loss": 0.2242920696735382, + "loss_ib": 0.008363805711269379, + "step": 1851 + }, + { + "ce_ib": 4.473357677459717, + "ce_orig": 0.23305130004882812, + "epoch": 0.53260478826659, + "kl_loss": 0.20445549488067627, + "loss_ib": 0.0065179127268493176, + "step": 1852 + }, + { + "ce_ib": 10.160700798034668, + "ce_orig": 1.1857593059539795, + "epoch": 0.53260478826659, + "kl_loss": 0.23605488240718842, + "loss_ib": 0.012521250173449516, + "step": 1852 + }, + { + "ce_ib": 3.499321222305298, + "ce_orig": 0.6823559403419495, + "epoch": 0.53260478826659, + "kl_loss": 0.2032666653394699, + "loss_ib": 0.0055319881066679955, + "step": 1852 + }, + { + "ce_ib": 7.681511878967285, + "ce_orig": 1.04057776927948, + "epoch": 0.53260478826659, + "kl_loss": 0.21528491377830505, + "loss_ib": 0.009834361262619495, + "step": 1852 + }, + { + "ce_ib": 4.359706401824951, + "ce_orig": 0.7192271947860718, + "epoch": 0.5328923718455676, + "kl_loss": 0.24812665581703186, + "loss_ib": 0.006840972695499659, + "step": 1853 + }, + { + "ce_ib": 3.1339895725250244, + "ce_orig": 0.810313880443573, + "epoch": 0.5328923718455676, + "kl_loss": 0.15557479858398438, + "loss_ib": 0.004689737223088741, + "step": 1853 + }, + { + "ce_ib": 3.581479549407959, + "ce_orig": 0.8280433416366577, + "epoch": 0.5328923718455676, + "kl_loss": 0.2017989158630371, + "loss_ib": 0.005599468946456909, + "step": 1853 + }, + { + "ce_ib": 4.026859283447266, + "ce_orig": 0.32227322459220886, + "epoch": 0.5328923718455676, + "kl_loss": 0.23444557189941406, + "loss_ib": 0.006371315103024244, + "step": 1853 + }, + { + "ce_ib": 4.264099597930908, + "ce_orig": 0.7270826101303101, + "epoch": 0.5331799554245452, + "kl_loss": 0.2571170926094055, + "loss_ib": 0.006835270207375288, + "step": 1854 + }, + { + "ce_ib": 3.458313226699829, + "ce_orig": 0.503343403339386, + "epoch": 0.5331799554245452, + "kl_loss": 0.1853717863559723, + "loss_ib": 0.005312031600624323, + "step": 1854 + }, + { + "ce_ib": 4.385552883148193, + "ce_orig": 0.643458902835846, + "epoch": 0.5331799554245452, + "kl_loss": 0.19659477472305298, + "loss_ib": 0.006351500749588013, + "step": 1854 + }, + { + "ce_ib": 4.395240783691406, + "ce_orig": 0.740811824798584, + "epoch": 0.5331799554245452, + "kl_loss": 0.16096997261047363, + "loss_ib": 0.006004940252751112, + "step": 1854 + }, + { + "epoch": 0.533467539003523, + "grad_norm": 0.12044809013605118, + "learning_rate": 9.442991239174225e-06, + "loss": 0.8133, + "step": 1855 + }, + { + "ce_ib": 6.274482727050781, + "ce_orig": 1.3136651515960693, + "epoch": 0.533467539003523, + "kl_loss": 0.2152913212776184, + "loss_ib": 0.008427395485341549, + "step": 1855 + }, + { + "ce_ib": 2.4021594524383545, + "ce_orig": 0.3267451822757721, + "epoch": 0.533467539003523, + "kl_loss": 0.19621196389198303, + "loss_ib": 0.004364278633147478, + "step": 1855 + }, + { + "ce_ib": 5.595287322998047, + "ce_orig": 0.7406966090202332, + "epoch": 0.533467539003523, + "kl_loss": 0.20382627844810486, + "loss_ib": 0.007633550092577934, + "step": 1855 + }, + { + "ce_ib": 6.821021556854248, + "ce_orig": 1.3345282077789307, + "epoch": 0.533467539003523, + "kl_loss": 0.22391046583652496, + "loss_ib": 0.009060125797986984, + "step": 1855 + }, + { + "ce_ib": 4.37741756439209, + "ce_orig": 0.5070826411247253, + "epoch": 0.5337551225825006, + "kl_loss": 0.1709289848804474, + "loss_ib": 0.00608670711517334, + "step": 1856 + }, + { + "ce_ib": 5.665239334106445, + "ce_orig": 0.543302595615387, + "epoch": 0.5337551225825006, + "kl_loss": 0.4002218246459961, + "loss_ib": 0.009667458012700081, + "step": 1856 + }, + { + "ce_ib": 10.248526573181152, + "ce_orig": 1.6775959730148315, + "epoch": 0.5337551225825006, + "kl_loss": 0.2522523105144501, + "loss_ib": 0.01277104951441288, + "step": 1856 + }, + { + "ce_ib": 4.3051228523254395, + "ce_orig": 0.6298352479934692, + "epoch": 0.5337551225825006, + "kl_loss": 0.30425626039505005, + "loss_ib": 0.007347684819251299, + "step": 1856 + }, + { + "ce_ib": 3.970698356628418, + "ce_orig": 0.5867936015129089, + "epoch": 0.5340427061614782, + "kl_loss": 0.17628365755081177, + "loss_ib": 0.005733535159379244, + "step": 1857 + }, + { + "ce_ib": 4.260129928588867, + "ce_orig": 0.704187273979187, + "epoch": 0.5340427061614782, + "kl_loss": 0.2587973475456238, + "loss_ib": 0.006848103366792202, + "step": 1857 + }, + { + "ce_ib": 4.6300554275512695, + "ce_orig": 0.6545487642288208, + "epoch": 0.5340427061614782, + "kl_loss": 0.3494999408721924, + "loss_ib": 0.008125054650008678, + "step": 1857 + }, + { + "ce_ib": 4.991218090057373, + "ce_orig": 0.862577497959137, + "epoch": 0.5340427061614782, + "kl_loss": 0.23879991471767426, + "loss_ib": 0.0073792170733213425, + "step": 1857 + }, + { + "ce_ib": 7.007440090179443, + "ce_orig": 1.259676456451416, + "epoch": 0.5343302897404558, + "kl_loss": 0.23333919048309326, + "loss_ib": 0.009340832009911537, + "step": 1858 + }, + { + "ce_ib": 5.029625415802002, + "ce_orig": 0.9573800563812256, + "epoch": 0.5343302897404558, + "kl_loss": 0.24174338579177856, + "loss_ib": 0.00744705880060792, + "step": 1858 + }, + { + "ce_ib": 4.438313961029053, + "ce_orig": 0.7793439626693726, + "epoch": 0.5343302897404558, + "kl_loss": 0.1766584813594818, + "loss_ib": 0.006204898934811354, + "step": 1858 + }, + { + "ce_ib": 5.21489953994751, + "ce_orig": 0.5086827874183655, + "epoch": 0.5343302897404558, + "kl_loss": 0.3779343366622925, + "loss_ib": 0.00899424310773611, + "step": 1858 + }, + { + "ce_ib": 4.461227893829346, + "ce_orig": 0.7233249545097351, + "epoch": 0.5346178733194334, + "kl_loss": 0.14038971066474915, + "loss_ib": 0.005865124985575676, + "step": 1859 + }, + { + "ce_ib": 8.790775299072266, + "ce_orig": 1.4900492429733276, + "epoch": 0.5346178733194334, + "kl_loss": 0.3974230885505676, + "loss_ib": 0.012765007093548775, + "step": 1859 + }, + { + "ce_ib": 4.1817545890808105, + "ce_orig": 0.3852037489414215, + "epoch": 0.5346178733194334, + "kl_loss": 0.23326320946216583, + "loss_ib": 0.006514386273920536, + "step": 1859 + }, + { + "ce_ib": 5.260295391082764, + "ce_orig": 0.434694766998291, + "epoch": 0.5346178733194334, + "kl_loss": 0.19891120493412018, + "loss_ib": 0.0072494070045650005, + "step": 1859 + }, + { + "epoch": 0.5349054568984111, + "grad_norm": 0.1161816194653511, + "learning_rate": 9.439426092011877e-06, + "loss": 0.8398, + "step": 1860 + }, + { + "ce_ib": 4.339866638183594, + "ce_orig": 0.7001444101333618, + "epoch": 0.5349054568984111, + "kl_loss": 0.2064824402332306, + "loss_ib": 0.006404690444469452, + "step": 1860 + }, + { + "ce_ib": 3.5802195072174072, + "ce_orig": 0.6469123959541321, + "epoch": 0.5349054568984111, + "kl_loss": 0.2189090996980667, + "loss_ib": 0.0057693105190992355, + "step": 1860 + }, + { + "ce_ib": 6.698261737823486, + "ce_orig": 1.117401123046875, + "epoch": 0.5349054568984111, + "kl_loss": 0.20926931500434875, + "loss_ib": 0.008790954947471619, + "step": 1860 + }, + { + "ce_ib": 6.679226875305176, + "ce_orig": 0.4575742185115814, + "epoch": 0.5349054568984111, + "kl_loss": 0.4343309998512268, + "loss_ib": 0.0110225360840559, + "step": 1860 + }, + { + "ce_ib": 5.680418968200684, + "ce_orig": 1.021546483039856, + "epoch": 0.5351930404773887, + "kl_loss": 0.23155631124973297, + "loss_ib": 0.007995981723070145, + "step": 1861 + }, + { + "ce_ib": 3.0201821327209473, + "ce_orig": 0.5457724928855896, + "epoch": 0.5351930404773887, + "kl_loss": 0.21453796327114105, + "loss_ib": 0.005165562033653259, + "step": 1861 + }, + { + "ce_ib": 4.915546417236328, + "ce_orig": 0.7357434034347534, + "epoch": 0.5351930404773887, + "kl_loss": 0.7311533689498901, + "loss_ib": 0.012227079831063747, + "step": 1861 + }, + { + "ce_ib": 5.287921905517578, + "ce_orig": 0.5148226618766785, + "epoch": 0.5351930404773887, + "kl_loss": 0.20094850659370422, + "loss_ib": 0.007297407370060682, + "step": 1861 + }, + { + "ce_ib": 6.0569305419921875, + "ce_orig": 0.9353012442588806, + "epoch": 0.5354806240563664, + "kl_loss": 0.19924458861351013, + "loss_ib": 0.008049375377595425, + "step": 1862 + }, + { + "ce_ib": 6.241096019744873, + "ce_orig": 1.3277326822280884, + "epoch": 0.5354806240563664, + "kl_loss": 0.3990367352962494, + "loss_ib": 0.010231463238596916, + "step": 1862 + }, + { + "ce_ib": 4.2260613441467285, + "ce_orig": 0.5645077228546143, + "epoch": 0.5354806240563664, + "kl_loss": 0.22848635911941528, + "loss_ib": 0.006510925013571978, + "step": 1862 + }, + { + "ce_ib": 4.25538444519043, + "ce_orig": 0.8132855296134949, + "epoch": 0.5354806240563664, + "kl_loss": 0.2353193461894989, + "loss_ib": 0.006608577910810709, + "step": 1862 + }, + { + "ce_ib": 4.660745620727539, + "ce_orig": 0.8753987550735474, + "epoch": 0.535768207635344, + "kl_loss": 0.16747131943702698, + "loss_ib": 0.006335458718240261, + "step": 1863 + }, + { + "ce_ib": 3.7566816806793213, + "ce_orig": 0.5486100316047668, + "epoch": 0.535768207635344, + "kl_loss": 0.2130318135023117, + "loss_ib": 0.005886999890208244, + "step": 1863 + }, + { + "ce_ib": 4.214888572692871, + "ce_orig": 0.8790446519851685, + "epoch": 0.535768207635344, + "kl_loss": 0.19534213840961456, + "loss_ib": 0.006168310064822435, + "step": 1863 + }, + { + "ce_ib": 6.063351631164551, + "ce_orig": 0.9974119067192078, + "epoch": 0.535768207635344, + "kl_loss": 0.19461339712142944, + "loss_ib": 0.008009484969079494, + "step": 1863 + }, + { + "ce_ib": 7.536983966827393, + "ce_orig": 1.4408035278320312, + "epoch": 0.5360557912143217, + "kl_loss": 0.32511430978775024, + "loss_ib": 0.010788126848638058, + "step": 1864 + }, + { + "ce_ib": 3.6541574001312256, + "ce_orig": 0.504055917263031, + "epoch": 0.5360557912143217, + "kl_loss": 0.16534735262393951, + "loss_ib": 0.0053076306357979774, + "step": 1864 + }, + { + "ce_ib": 4.483316898345947, + "ce_orig": 0.666479229927063, + "epoch": 0.5360557912143217, + "kl_loss": 0.15484187006950378, + "loss_ib": 0.006031735334545374, + "step": 1864 + }, + { + "ce_ib": 5.627042770385742, + "ce_orig": 1.2612030506134033, + "epoch": 0.5360557912143217, + "kl_loss": 0.20702578127384186, + "loss_ib": 0.007697300054132938, + "step": 1864 + }, + { + "epoch": 0.5363433747932993, + "grad_norm": 0.14977069199085236, + "learning_rate": 9.435850249241661e-06, + "loss": 0.8601, + "step": 1865 + }, + { + "ce_ib": 3.3860867023468018, + "ce_orig": 0.5201670527458191, + "epoch": 0.5363433747932993, + "kl_loss": 0.1453399956226349, + "loss_ib": 0.004839486442506313, + "step": 1865 + }, + { + "ce_ib": 5.578195095062256, + "ce_orig": 0.6977161169052124, + "epoch": 0.5363433747932993, + "kl_loss": 0.19398577511310577, + "loss_ib": 0.00751805305480957, + "step": 1865 + }, + { + "ce_ib": 4.881834506988525, + "ce_orig": 0.8882226347923279, + "epoch": 0.5363433747932993, + "kl_loss": 0.46950674057006836, + "loss_ib": 0.00957690179347992, + "step": 1865 + }, + { + "ce_ib": 5.812175273895264, + "ce_orig": 0.8029134273529053, + "epoch": 0.5363433747932993, + "kl_loss": 0.2707087993621826, + "loss_ib": 0.008519263006746769, + "step": 1865 + }, + { + "ce_ib": 8.659444808959961, + "ce_orig": 1.537245512008667, + "epoch": 0.5366309583722769, + "kl_loss": 0.22481295466423035, + "loss_ib": 0.01090757455676794, + "step": 1866 + }, + { + "ce_ib": 6.766580104827881, + "ce_orig": 0.9183495044708252, + "epoch": 0.5366309583722769, + "kl_loss": 0.22349092364311218, + "loss_ib": 0.009001489728689194, + "step": 1866 + }, + { + "ce_ib": 8.252838134765625, + "ce_orig": 0.9894993305206299, + "epoch": 0.5366309583722769, + "kl_loss": 0.15923966467380524, + "loss_ib": 0.009845234453678131, + "step": 1866 + }, + { + "ce_ib": 9.690625190734863, + "ce_orig": 1.1797600984573364, + "epoch": 0.5366309583722769, + "kl_loss": 0.21562716364860535, + "loss_ib": 0.011846896260976791, + "step": 1866 + }, + { + "ce_ib": 6.657962322235107, + "ce_orig": 1.0763212442398071, + "epoch": 0.5369185419512545, + "kl_loss": 0.1339656114578247, + "loss_ib": 0.007997618056833744, + "step": 1867 + }, + { + "ce_ib": 9.438556671142578, + "ce_orig": 1.5125812292099, + "epoch": 0.5369185419512545, + "kl_loss": 0.17226141691207886, + "loss_ib": 0.011161170899868011, + "step": 1867 + }, + { + "ce_ib": 3.83799409866333, + "ce_orig": 0.6082793474197388, + "epoch": 0.5369185419512545, + "kl_loss": 0.17104685306549072, + "loss_ib": 0.005548462737351656, + "step": 1867 + }, + { + "ce_ib": 4.337790489196777, + "ce_orig": 0.8539470434188843, + "epoch": 0.5369185419512545, + "kl_loss": 0.15344543755054474, + "loss_ib": 0.005872244480997324, + "step": 1867 + }, + { + "ce_ib": 4.979361534118652, + "ce_orig": 1.101728081703186, + "epoch": 0.5372061255302322, + "kl_loss": 0.1728961169719696, + "loss_ib": 0.006708322558552027, + "step": 1868 + }, + { + "ce_ib": 5.25793981552124, + "ce_orig": 0.8807792663574219, + "epoch": 0.5372061255302322, + "kl_loss": 0.1700776368379593, + "loss_ib": 0.006958715617656708, + "step": 1868 + }, + { + "ce_ib": 6.643743991851807, + "ce_orig": 1.299357533454895, + "epoch": 0.5372061255302322, + "kl_loss": 0.1740136742591858, + "loss_ib": 0.008383880369365215, + "step": 1868 + }, + { + "ce_ib": 6.947753429412842, + "ce_orig": 1.0708379745483398, + "epoch": 0.5372061255302322, + "kl_loss": 0.34942391514778137, + "loss_ib": 0.010441992431879044, + "step": 1868 + }, + { + "ce_ib": 4.8975443840026855, + "ce_orig": 0.6301613450050354, + "epoch": 0.5374937091092099, + "kl_loss": 0.18357506394386292, + "loss_ib": 0.006733294576406479, + "step": 1869 + }, + { + "ce_ib": 6.845427513122559, + "ce_orig": 0.771119236946106, + "epoch": 0.5374937091092099, + "kl_loss": 0.17592459917068481, + "loss_ib": 0.00860467366874218, + "step": 1869 + }, + { + "ce_ib": 5.129888534545898, + "ce_orig": 1.204761266708374, + "epoch": 0.5374937091092099, + "kl_loss": 0.11413148045539856, + "loss_ib": 0.006271203514188528, + "step": 1869 + }, + { + "ce_ib": 8.118036270141602, + "ce_orig": 1.9563100337982178, + "epoch": 0.5374937091092099, + "kl_loss": 0.7197785973548889, + "loss_ib": 0.015315822325646877, + "step": 1869 + }, + { + "epoch": 0.5377812926881875, + "grad_norm": 0.15173690021038055, + "learning_rate": 9.432263719478611e-06, + "loss": 0.8979, + "step": 1870 + }, + { + "ce_ib": 2.8820931911468506, + "ce_orig": 0.6525115370750427, + "epoch": 0.5377812926881875, + "kl_loss": 0.17920103669166565, + "loss_ib": 0.004674103576689959, + "step": 1870 + }, + { + "ce_ib": 8.007864952087402, + "ce_orig": 0.8055094480514526, + "epoch": 0.5377812926881875, + "kl_loss": 0.31373703479766846, + "loss_ib": 0.011145235039293766, + "step": 1870 + }, + { + "ce_ib": 5.944539546966553, + "ce_orig": 0.7078796029090881, + "epoch": 0.5377812926881875, + "kl_loss": 0.24224576354026794, + "loss_ib": 0.008366997353732586, + "step": 1870 + }, + { + "ce_ib": 9.522847175598145, + "ce_orig": 1.4455835819244385, + "epoch": 0.5377812926881875, + "kl_loss": 0.19860181212425232, + "loss_ib": 0.011508865281939507, + "step": 1870 + }, + { + "ce_ib": 3.601259469985962, + "ce_orig": 0.5386037230491638, + "epoch": 0.5380688762671652, + "kl_loss": 0.14678509533405304, + "loss_ib": 0.0050691100768744946, + "step": 1871 + }, + { + "ce_ib": 3.1088805198669434, + "ce_orig": 0.7355481386184692, + "epoch": 0.5380688762671652, + "kl_loss": 0.1154508888721466, + "loss_ib": 0.004263389389961958, + "step": 1871 + }, + { + "ce_ib": 6.967272758483887, + "ce_orig": 1.1396918296813965, + "epoch": 0.5380688762671652, + "kl_loss": 0.268510639667511, + "loss_ib": 0.009652378968894482, + "step": 1871 + }, + { + "ce_ib": 3.3585612773895264, + "ce_orig": 0.6488027572631836, + "epoch": 0.5380688762671652, + "kl_loss": 0.26048383116722107, + "loss_ib": 0.005963399074971676, + "step": 1871 + }, + { + "ce_ib": 2.642223596572876, + "ce_orig": 0.4931933283805847, + "epoch": 0.5383564598461428, + "kl_loss": 0.2122310996055603, + "loss_ib": 0.004764534533023834, + "step": 1872 + }, + { + "ce_ib": 5.761163711547852, + "ce_orig": 0.9917870759963989, + "epoch": 0.5383564598461428, + "kl_loss": 0.20570719242095947, + "loss_ib": 0.007818236015737057, + "step": 1872 + }, + { + "ce_ib": 7.926612854003906, + "ce_orig": 1.3921815156936646, + "epoch": 0.5383564598461428, + "kl_loss": 0.210135817527771, + "loss_ib": 0.010027971118688583, + "step": 1872 + }, + { + "ce_ib": 1.9567288160324097, + "ce_orig": 0.2648179531097412, + "epoch": 0.5383564598461428, + "kl_loss": 0.41877481341362, + "loss_ib": 0.006144477054476738, + "step": 1872 + }, + { + "ce_ib": 5.836376667022705, + "ce_orig": 0.9933511018753052, + "epoch": 0.5386440434251204, + "kl_loss": 0.27552711963653564, + "loss_ib": 0.008591647259891033, + "step": 1873 + }, + { + "ce_ib": 7.745918273925781, + "ce_orig": 1.4708333015441895, + "epoch": 0.5386440434251204, + "kl_loss": 0.33777713775634766, + "loss_ib": 0.011123690754175186, + "step": 1873 + }, + { + "ce_ib": 5.4888081550598145, + "ce_orig": 1.2457603216171265, + "epoch": 0.5386440434251204, + "kl_loss": 0.21348831057548523, + "loss_ib": 0.007623691577464342, + "step": 1873 + }, + { + "ce_ib": 5.6587605476379395, + "ce_orig": 0.7417976260185242, + "epoch": 0.5386440434251204, + "kl_loss": 0.27924901247024536, + "loss_ib": 0.00845125038176775, + "step": 1873 + }, + { + "ce_ib": 6.356706619262695, + "ce_orig": 0.9084938168525696, + "epoch": 0.538931627004098, + "kl_loss": 0.2768899202346802, + "loss_ib": 0.00912560522556305, + "step": 1874 + }, + { + "ce_ib": 8.2174072265625, + "ce_orig": 1.7141512632369995, + "epoch": 0.538931627004098, + "kl_loss": 0.13865156471729279, + "loss_ib": 0.009603923186659813, + "step": 1874 + }, + { + "ce_ib": 5.935986042022705, + "ce_orig": 0.8549964427947998, + "epoch": 0.538931627004098, + "kl_loss": 0.13466152548789978, + "loss_ib": 0.007282601203769445, + "step": 1874 + }, + { + "ce_ib": 5.00381326675415, + "ce_orig": 0.6767867207527161, + "epoch": 0.538931627004098, + "kl_loss": 0.3218216300010681, + "loss_ib": 0.00822202954441309, + "step": 1874 + }, + { + "epoch": 0.5392192105830758, + "grad_norm": 0.13676752150058746, + "learning_rate": 9.428666511363511e-06, + "loss": 0.8618, + "step": 1875 + }, + { + "ce_ib": 3.0487828254699707, + "ce_orig": 0.5314833521842957, + "epoch": 0.5392192105830758, + "kl_loss": 0.17765596508979797, + "loss_ib": 0.004825342446565628, + "step": 1875 + }, + { + "ce_ib": 7.444401264190674, + "ce_orig": 0.8767432570457458, + "epoch": 0.5392192105830758, + "kl_loss": 0.2486288845539093, + "loss_ib": 0.009930690750479698, + "step": 1875 + }, + { + "ce_ib": 2.8306288719177246, + "ce_orig": 0.44761285185813904, + "epoch": 0.5392192105830758, + "kl_loss": 0.10526077449321747, + "loss_ib": 0.0038832365535199642, + "step": 1875 + }, + { + "ce_ib": 3.7552425861358643, + "ce_orig": 0.7788964509963989, + "epoch": 0.5392192105830758, + "kl_loss": 0.18940740823745728, + "loss_ib": 0.0056493161246180534, + "step": 1875 + }, + { + "ce_ib": 5.244472503662109, + "ce_orig": 0.7166180610656738, + "epoch": 0.5395067941620534, + "kl_loss": 0.23369517922401428, + "loss_ib": 0.007581424433737993, + "step": 1876 + }, + { + "ce_ib": 7.7227253913879395, + "ce_orig": 1.4851809740066528, + "epoch": 0.5395067941620534, + "kl_loss": 0.15310481190681458, + "loss_ib": 0.009253773838281631, + "step": 1876 + }, + { + "ce_ib": 3.0689175128936768, + "ce_orig": 0.6693606376647949, + "epoch": 0.5395067941620534, + "kl_loss": 0.1706458032131195, + "loss_ib": 0.004775375593453646, + "step": 1876 + }, + { + "ce_ib": 6.795139312744141, + "ce_orig": 1.3710235357284546, + "epoch": 0.5395067941620534, + "kl_loss": 0.19929218292236328, + "loss_ib": 0.008788061328232288, + "step": 1876 + }, + { + "ce_ib": 5.0664448738098145, + "ce_orig": 0.5143498182296753, + "epoch": 0.539794377741031, + "kl_loss": 0.42177170515060425, + "loss_ib": 0.00928416196256876, + "step": 1877 + }, + { + "ce_ib": 7.223611354827881, + "ce_orig": 1.1241228580474854, + "epoch": 0.539794377741031, + "kl_loss": 0.20792663097381592, + "loss_ib": 0.009302877821028233, + "step": 1877 + }, + { + "ce_ib": 6.8414626121521, + "ce_orig": 0.7621123194694519, + "epoch": 0.539794377741031, + "kl_loss": 0.31143611669540405, + "loss_ib": 0.009955823421478271, + "step": 1877 + }, + { + "ce_ib": 6.14713716506958, + "ce_orig": 1.0838134288787842, + "epoch": 0.539794377741031, + "kl_loss": 0.18095140159130096, + "loss_ib": 0.007956651039421558, + "step": 1877 + }, + { + "ce_ib": 3.394634485244751, + "ce_orig": 0.47664836049079895, + "epoch": 0.5400819613200086, + "kl_loss": 0.30461785197257996, + "loss_ib": 0.006440812721848488, + "step": 1878 + }, + { + "ce_ib": 3.8926734924316406, + "ce_orig": 0.5368797779083252, + "epoch": 0.5400819613200086, + "kl_loss": 0.19092890620231628, + "loss_ib": 0.005801962688565254, + "step": 1878 + }, + { + "ce_ib": 6.97349214553833, + "ce_orig": 1.6624675989151, + "epoch": 0.5400819613200086, + "kl_loss": 0.21015450358390808, + "loss_ib": 0.009075037203729153, + "step": 1878 + }, + { + "ce_ib": 5.310675621032715, + "ce_orig": 1.0094043016433716, + "epoch": 0.5400819613200086, + "kl_loss": 0.22270002961158752, + "loss_ib": 0.0075376760214567184, + "step": 1878 + }, + { + "ce_ib": 4.8102874755859375, + "ce_orig": 0.8906371593475342, + "epoch": 0.5403695448989863, + "kl_loss": 0.2329912781715393, + "loss_ib": 0.0071402001194655895, + "step": 1879 + }, + { + "ce_ib": 7.846437931060791, + "ce_orig": 1.5362975597381592, + "epoch": 0.5403695448989863, + "kl_loss": 0.2593178153038025, + "loss_ib": 0.0104396166279912, + "step": 1879 + }, + { + "ce_ib": 6.457812309265137, + "ce_orig": 1.4550244808197021, + "epoch": 0.5403695448989863, + "kl_loss": 0.2090592086315155, + "loss_ib": 0.008548404090106487, + "step": 1879 + }, + { + "ce_ib": 4.535857200622559, + "ce_orig": 0.6384449601173401, + "epoch": 0.5403695448989863, + "kl_loss": 0.25717389583587646, + "loss_ib": 0.007107596378773451, + "step": 1879 + }, + { + "epoch": 0.5406571284779639, + "grad_norm": 0.11576619744300842, + "learning_rate": 9.42505863356287e-06, + "loss": 0.8797, + "step": 1880 + }, + { + "ce_ib": 3.876596450805664, + "ce_orig": 0.6871512532234192, + "epoch": 0.5406571284779639, + "kl_loss": 0.25499510765075684, + "loss_ib": 0.006426547653973103, + "step": 1880 + }, + { + "ce_ib": 4.118466377258301, + "ce_orig": 0.6281008124351501, + "epoch": 0.5406571284779639, + "kl_loss": 0.23545455932617188, + "loss_ib": 0.006473011802881956, + "step": 1880 + }, + { + "ce_ib": 5.587640762329102, + "ce_orig": 1.1079524755477905, + "epoch": 0.5406571284779639, + "kl_loss": 0.17437410354614258, + "loss_ib": 0.007331382017582655, + "step": 1880 + }, + { + "ce_ib": 6.028123378753662, + "ce_orig": 0.6316193342208862, + "epoch": 0.5406571284779639, + "kl_loss": 0.27316561341285706, + "loss_ib": 0.00875977985560894, + "step": 1880 + }, + { + "ce_ib": 6.767507076263428, + "ce_orig": 1.2337112426757812, + "epoch": 0.5409447120569415, + "kl_loss": 0.37158188223838806, + "loss_ib": 0.010483325459063053, + "step": 1881 + }, + { + "ce_ib": 3.529797315597534, + "ce_orig": 0.7312442660331726, + "epoch": 0.5409447120569415, + "kl_loss": 0.2754474878311157, + "loss_ib": 0.00628427229821682, + "step": 1881 + }, + { + "ce_ib": 4.150570392608643, + "ce_orig": 0.6519814133644104, + "epoch": 0.5409447120569415, + "kl_loss": 0.12673813104629517, + "loss_ib": 0.005417951382696629, + "step": 1881 + }, + { + "ce_ib": 7.97968864440918, + "ce_orig": 1.4256978034973145, + "epoch": 0.5409447120569415, + "kl_loss": 0.46767693758010864, + "loss_ib": 0.012656456790864468, + "step": 1881 + }, + { + "ce_ib": 4.163929462432861, + "ce_orig": 0.6977522373199463, + "epoch": 0.5412322956359192, + "kl_loss": 0.22474998235702515, + "loss_ib": 0.0064114294946193695, + "step": 1882 + }, + { + "ce_ib": 6.889791965484619, + "ce_orig": 1.1969032287597656, + "epoch": 0.5412322956359192, + "kl_loss": 0.33730947971343994, + "loss_ib": 0.010262886993587017, + "step": 1882 + }, + { + "ce_ib": 2.566894769668579, + "ce_orig": 0.4599597454071045, + "epoch": 0.5412322956359192, + "kl_loss": 0.11533834040164948, + "loss_ib": 0.003720278153195977, + "step": 1882 + }, + { + "ce_ib": 6.130898475646973, + "ce_orig": 1.097641110420227, + "epoch": 0.5412322956359192, + "kl_loss": 0.2714581787586212, + "loss_ib": 0.008845480158925056, + "step": 1882 + }, + { + "ce_ib": 5.6272687911987305, + "ce_orig": 0.811035692691803, + "epoch": 0.5415198792148969, + "kl_loss": 0.406782329082489, + "loss_ib": 0.009695092216134071, + "step": 1883 + }, + { + "ce_ib": 4.359681129455566, + "ce_orig": 0.6566734910011292, + "epoch": 0.5415198792148969, + "kl_loss": 0.2520410418510437, + "loss_ib": 0.00688009150326252, + "step": 1883 + }, + { + "ce_ib": 5.818939685821533, + "ce_orig": 0.9224424362182617, + "epoch": 0.5415198792148969, + "kl_loss": 0.2999403476715088, + "loss_ib": 0.00881834328174591, + "step": 1883 + }, + { + "ce_ib": 4.2311506271362305, + "ce_orig": 0.3763723075389862, + "epoch": 0.5415198792148969, + "kl_loss": 0.2320677787065506, + "loss_ib": 0.006551828235387802, + "step": 1883 + }, + { + "ce_ib": 3.7210848331451416, + "ce_orig": 0.830406129360199, + "epoch": 0.5418074627938745, + "kl_loss": 0.22216445207595825, + "loss_ib": 0.005942729767411947, + "step": 1884 + }, + { + "ce_ib": 5.938092231750488, + "ce_orig": 1.2687435150146484, + "epoch": 0.5418074627938745, + "kl_loss": 0.21658417582511902, + "loss_ib": 0.008103934116661549, + "step": 1884 + }, + { + "ce_ib": 4.062071323394775, + "ce_orig": 0.8783230781555176, + "epoch": 0.5418074627938745, + "kl_loss": 0.15394330024719238, + "loss_ib": 0.005601504351943731, + "step": 1884 + }, + { + "ce_ib": 5.388051509857178, + "ce_orig": 0.7404717206954956, + "epoch": 0.5418074627938745, + "kl_loss": 0.14342042803764343, + "loss_ib": 0.006822255905717611, + "step": 1884 + }, + { + "epoch": 0.5420950463728521, + "grad_norm": 0.1313483566045761, + "learning_rate": 9.421440094768903e-06, + "loss": 0.8407, + "step": 1885 + }, + { + "ce_ib": 3.99212384223938, + "ce_orig": 0.6558020114898682, + "epoch": 0.5420950463728521, + "kl_loss": 0.20062603056430817, + "loss_ib": 0.005998384207487106, + "step": 1885 + }, + { + "ce_ib": 5.2277727127075195, + "ce_orig": 0.8348150253295898, + "epoch": 0.5420950463728521, + "kl_loss": 0.20737120509147644, + "loss_ib": 0.0073014842346310616, + "step": 1885 + }, + { + "ce_ib": 3.134458541870117, + "ce_orig": 0.5520787239074707, + "epoch": 0.5420950463728521, + "kl_loss": 0.18166503310203552, + "loss_ib": 0.00495110871270299, + "step": 1885 + }, + { + "ce_ib": 4.72963285446167, + "ce_orig": 0.7465882897377014, + "epoch": 0.5420950463728521, + "kl_loss": 0.4079294204711914, + "loss_ib": 0.008808927610516548, + "step": 1885 + }, + { + "ce_ib": 5.483861446380615, + "ce_orig": 1.0649985074996948, + "epoch": 0.5423826299518297, + "kl_loss": 0.27285709977149963, + "loss_ib": 0.008212432265281677, + "step": 1886 + }, + { + "ce_ib": 4.002931118011475, + "ce_orig": 0.6592392921447754, + "epoch": 0.5423826299518297, + "kl_loss": 0.1890222281217575, + "loss_ib": 0.005893153604120016, + "step": 1886 + }, + { + "ce_ib": 4.448903560638428, + "ce_orig": 0.8768444657325745, + "epoch": 0.5423826299518297, + "kl_loss": 0.14126378297805786, + "loss_ib": 0.005861540790647268, + "step": 1886 + }, + { + "ce_ib": 6.241933345794678, + "ce_orig": 1.0336610078811646, + "epoch": 0.5423826299518297, + "kl_loss": 0.2534750699996948, + "loss_ib": 0.008776684291660786, + "step": 1886 + }, + { + "ce_ib": 3.197049856185913, + "ce_orig": 0.8099791407585144, + "epoch": 0.5426702135308074, + "kl_loss": 0.1748722493648529, + "loss_ib": 0.004945772234350443, + "step": 1887 + }, + { + "ce_ib": 4.192432403564453, + "ce_orig": 0.7689682841300964, + "epoch": 0.5426702135308074, + "kl_loss": 0.19127227365970612, + "loss_ib": 0.0061051552183926105, + "step": 1887 + }, + { + "ce_ib": 9.011332511901855, + "ce_orig": 1.5508743524551392, + "epoch": 0.5426702135308074, + "kl_loss": 0.18881624937057495, + "loss_ib": 0.010899494402110577, + "step": 1887 + }, + { + "ce_ib": 2.7345058917999268, + "ce_orig": 0.6141093373298645, + "epoch": 0.5426702135308074, + "kl_loss": 0.22916878759860992, + "loss_ib": 0.005026193801313639, + "step": 1887 + }, + { + "ce_ib": 6.5455098152160645, + "ce_orig": 1.1694104671478271, + "epoch": 0.542957797109785, + "kl_loss": 0.1957620233297348, + "loss_ib": 0.008503129705786705, + "step": 1888 + }, + { + "ce_ib": 3.9233615398406982, + "ce_orig": 0.5173555612564087, + "epoch": 0.542957797109785, + "kl_loss": 0.17764030396938324, + "loss_ib": 0.005699764471501112, + "step": 1888 + }, + { + "ce_ib": 8.286897659301758, + "ce_orig": 1.6964346170425415, + "epoch": 0.542957797109785, + "kl_loss": 0.21384048461914062, + "loss_ib": 0.010425303131341934, + "step": 1888 + }, + { + "ce_ib": 5.003795623779297, + "ce_orig": 0.7326104044914246, + "epoch": 0.542957797109785, + "kl_loss": 0.3118665814399719, + "loss_ib": 0.008122460916638374, + "step": 1888 + }, + { + "ce_ib": 3.2082910537719727, + "ce_orig": 0.6192259192466736, + "epoch": 0.5432453806887627, + "kl_loss": 0.17962077260017395, + "loss_ib": 0.005004498641937971, + "step": 1889 + }, + { + "ce_ib": 3.2921929359436035, + "ce_orig": 0.5335933566093445, + "epoch": 0.5432453806887627, + "kl_loss": 0.27632591128349304, + "loss_ib": 0.006055451929569244, + "step": 1889 + }, + { + "ce_ib": 6.6286234855651855, + "ce_orig": 1.1099865436553955, + "epoch": 0.5432453806887627, + "kl_loss": 0.1445891112089157, + "loss_ib": 0.00807451456785202, + "step": 1889 + }, + { + "ce_ib": 4.841403007507324, + "ce_orig": 0.49701279401779175, + "epoch": 0.5432453806887627, + "kl_loss": 0.32149016857147217, + "loss_ib": 0.008056304417550564, + "step": 1889 + }, + { + "epoch": 0.5435329642677403, + "grad_norm": 0.11242397874593735, + "learning_rate": 9.417810903699508e-06, + "loss": 0.8766, + "step": 1890 + }, + { + "ce_ib": 3.463954210281372, + "ce_orig": 0.6613526344299316, + "epoch": 0.5435329642677403, + "kl_loss": 0.16436061263084412, + "loss_ib": 0.005107560195028782, + "step": 1890 + }, + { + "ce_ib": 6.53847074508667, + "ce_orig": 1.2058744430541992, + "epoch": 0.5435329642677403, + "kl_loss": 0.2621225416660309, + "loss_ib": 0.009159696288406849, + "step": 1890 + }, + { + "ce_ib": 4.156022548675537, + "ce_orig": 0.8861443996429443, + "epoch": 0.5435329642677403, + "kl_loss": 0.17761921882629395, + "loss_ib": 0.005932214669883251, + "step": 1890 + }, + { + "ce_ib": 2.1399264335632324, + "ce_orig": 0.2459627389907837, + "epoch": 0.5435329642677403, + "kl_loss": 0.4725005626678467, + "loss_ib": 0.006864931900054216, + "step": 1890 + }, + { + "ce_ib": 5.926367282867432, + "ce_orig": 1.149590015411377, + "epoch": 0.543820547846718, + "kl_loss": 0.19166606664657593, + "loss_ib": 0.00784302782267332, + "step": 1891 + }, + { + "ce_ib": 5.3010969161987305, + "ce_orig": 0.6915357708930969, + "epoch": 0.543820547846718, + "kl_loss": 0.19798554480075836, + "loss_ib": 0.007280952297151089, + "step": 1891 + }, + { + "ce_ib": 4.656065940856934, + "ce_orig": 0.6376870274543762, + "epoch": 0.543820547846718, + "kl_loss": 0.25316092371940613, + "loss_ib": 0.007187675219029188, + "step": 1891 + }, + { + "ce_ib": 5.661493301391602, + "ce_orig": 0.9320741295814514, + "epoch": 0.543820547846718, + "kl_loss": 0.2215915024280548, + "loss_ib": 0.007877408526837826, + "step": 1891 + }, + { + "ce_ib": 5.105285167694092, + "ce_orig": 0.6744505763053894, + "epoch": 0.5441081314256956, + "kl_loss": 0.3318011164665222, + "loss_ib": 0.008423295803368092, + "step": 1892 + }, + { + "ce_ib": 3.8302907943725586, + "ce_orig": 0.528817355632782, + "epoch": 0.5441081314256956, + "kl_loss": 0.19222483038902283, + "loss_ib": 0.005752538796514273, + "step": 1892 + }, + { + "ce_ib": 4.628088474273682, + "ce_orig": 0.7512523531913757, + "epoch": 0.5441081314256956, + "kl_loss": 0.2041558027267456, + "loss_ib": 0.006669646129012108, + "step": 1892 + }, + { + "ce_ib": 5.9766621589660645, + "ce_orig": 0.9840378165245056, + "epoch": 0.5441081314256956, + "kl_loss": 0.271618515253067, + "loss_ib": 0.008692847564816475, + "step": 1892 + }, + { + "ce_ib": 6.145042896270752, + "ce_orig": 0.914753794670105, + "epoch": 0.5443957150046732, + "kl_loss": 0.26932966709136963, + "loss_ib": 0.00883833970874548, + "step": 1893 + }, + { + "ce_ib": 6.065830230712891, + "ce_orig": 1.0640356540679932, + "epoch": 0.5443957150046732, + "kl_loss": 0.36412590742111206, + "loss_ib": 0.009707089513540268, + "step": 1893 + }, + { + "ce_ib": 3.033369779586792, + "ce_orig": 0.43958115577697754, + "epoch": 0.5443957150046732, + "kl_loss": 0.17401011288166046, + "loss_ib": 0.00477347057312727, + "step": 1893 + }, + { + "ce_ib": 6.30896520614624, + "ce_orig": 1.375815510749817, + "epoch": 0.5443957150046732, + "kl_loss": 0.23319408297538757, + "loss_ib": 0.00864090584218502, + "step": 1893 + }, + { + "ce_ib": 3.9230875968933105, + "ce_orig": 0.6200323104858398, + "epoch": 0.5446832985836508, + "kl_loss": 0.22491362690925598, + "loss_ib": 0.006172223947942257, + "step": 1894 + }, + { + "ce_ib": 4.911445140838623, + "ce_orig": 0.5863372087478638, + "epoch": 0.5446832985836508, + "kl_loss": 0.28006529808044434, + "loss_ib": 0.007712098304182291, + "step": 1894 + }, + { + "ce_ib": 4.6514363288879395, + "ce_orig": 0.8628247976303101, + "epoch": 0.5446832985836508, + "kl_loss": 0.203122079372406, + "loss_ib": 0.006682656705379486, + "step": 1894 + }, + { + "ce_ib": 4.065433025360107, + "ce_orig": 0.6826019883155823, + "epoch": 0.5446832985836508, + "kl_loss": 0.26031285524368286, + "loss_ib": 0.006668561603873968, + "step": 1894 + }, + { + "epoch": 0.5449708821626286, + "grad_norm": 0.12011422216892242, + "learning_rate": 9.414171069098252e-06, + "loss": 0.8858, + "step": 1895 + }, + { + "ce_ib": 6.216679573059082, + "ce_orig": 0.9155467748641968, + "epoch": 0.5449708821626286, + "kl_loss": 0.2708143889904022, + "loss_ib": 0.008924824185669422, + "step": 1895 + }, + { + "ce_ib": 4.060703277587891, + "ce_orig": 0.5009088516235352, + "epoch": 0.5449708821626286, + "kl_loss": 0.16042546927928925, + "loss_ib": 0.00566495768725872, + "step": 1895 + }, + { + "ce_ib": 6.486944198608398, + "ce_orig": 0.8409891128540039, + "epoch": 0.5449708821626286, + "kl_loss": 0.15728840231895447, + "loss_ib": 0.008059828542172909, + "step": 1895 + }, + { + "ce_ib": 5.548868656158447, + "ce_orig": 0.6430012583732605, + "epoch": 0.5449708821626286, + "kl_loss": 0.2702459692955017, + "loss_ib": 0.008251328021287918, + "step": 1895 + }, + { + "ce_ib": 3.7308897972106934, + "ce_orig": 0.551981508731842, + "epoch": 0.5452584657416062, + "kl_loss": 0.1988811194896698, + "loss_ib": 0.005719700828194618, + "step": 1896 + }, + { + "ce_ib": 6.85680627822876, + "ce_orig": 0.6434974074363708, + "epoch": 0.5452584657416062, + "kl_loss": 0.2716730237007141, + "loss_ib": 0.009573535993695259, + "step": 1896 + }, + { + "ce_ib": 8.618288040161133, + "ce_orig": 1.4570660591125488, + "epoch": 0.5452584657416062, + "kl_loss": 0.22189399600028992, + "loss_ib": 0.010837228037416935, + "step": 1896 + }, + { + "ce_ib": 3.696220636367798, + "ce_orig": 0.7216467261314392, + "epoch": 0.5452584657416062, + "kl_loss": 0.20159435272216797, + "loss_ib": 0.00571216456592083, + "step": 1896 + }, + { + "ce_ib": 8.26495361328125, + "ce_orig": 1.3415573835372925, + "epoch": 0.5455460493205838, + "kl_loss": 0.24519026279449463, + "loss_ib": 0.010716855525970459, + "step": 1897 + }, + { + "ce_ib": 3.7366104125976562, + "ce_orig": 0.37444525957107544, + "epoch": 0.5455460493205838, + "kl_loss": 0.3108067214488983, + "loss_ib": 0.006844677962362766, + "step": 1897 + }, + { + "ce_ib": 5.597085475921631, + "ce_orig": 1.131085753440857, + "epoch": 0.5455460493205838, + "kl_loss": 0.17204684019088745, + "loss_ib": 0.007317553739994764, + "step": 1897 + }, + { + "ce_ib": 5.646449565887451, + "ce_orig": 1.0100291967391968, + "epoch": 0.5455460493205838, + "kl_loss": 0.19637063145637512, + "loss_ib": 0.007610156200826168, + "step": 1897 + }, + { + "ce_ib": 4.564586639404297, + "ce_orig": 0.4993542730808258, + "epoch": 0.5458336328995614, + "kl_loss": 0.2906390428543091, + "loss_ib": 0.007470977026969194, + "step": 1898 + }, + { + "ce_ib": 4.966852188110352, + "ce_orig": 0.8930625915527344, + "epoch": 0.5458336328995614, + "kl_loss": 0.18525347113609314, + "loss_ib": 0.006819386966526508, + "step": 1898 + }, + { + "ce_ib": 2.9131898880004883, + "ce_orig": 0.398380309343338, + "epoch": 0.5458336328995614, + "kl_loss": 0.1496700942516327, + "loss_ib": 0.0044098906219005585, + "step": 1898 + }, + { + "ce_ib": 6.019862651824951, + "ce_orig": 1.2035516500473022, + "epoch": 0.5458336328995614, + "kl_loss": 0.23380295932292938, + "loss_ib": 0.008357892744243145, + "step": 1898 + }, + { + "ce_ib": 6.085721492767334, + "ce_orig": 0.4494902789592743, + "epoch": 0.5461212164785391, + "kl_loss": 0.25177237391471863, + "loss_ib": 0.008603445254266262, + "step": 1899 + }, + { + "ce_ib": 3.5063061714172363, + "ce_orig": 0.6513923406600952, + "epoch": 0.5461212164785391, + "kl_loss": 0.19170190393924713, + "loss_ib": 0.00542332511395216, + "step": 1899 + }, + { + "ce_ib": 2.6341021060943604, + "ce_orig": 0.5152660012245178, + "epoch": 0.5461212164785391, + "kl_loss": 0.16661971807479858, + "loss_ib": 0.004300299100577831, + "step": 1899 + }, + { + "ce_ib": 5.236257553100586, + "ce_orig": 0.5088071823120117, + "epoch": 0.5461212164785391, + "kl_loss": 0.24358290433883667, + "loss_ib": 0.007672086823731661, + "step": 1899 + }, + { + "epoch": 0.5464088000575167, + "grad_norm": 0.11163178831338882, + "learning_rate": 9.410520599734338e-06, + "loss": 0.8181, + "step": 1900 + }, + { + "ce_ib": 6.69284200668335, + "ce_orig": 0.766628623008728, + "epoch": 0.5464088000575167, + "kl_loss": 0.14039671421051025, + "loss_ib": 0.00809680949896574, + "step": 1900 + }, + { + "ce_ib": 5.698209762573242, + "ce_orig": 0.45784991979599, + "epoch": 0.5464088000575167, + "kl_loss": 0.4267015755176544, + "loss_ib": 0.009965225122869015, + "step": 1900 + }, + { + "ce_ib": 3.716140031814575, + "ce_orig": 0.3612969219684601, + "epoch": 0.5464088000575167, + "kl_loss": 0.2603936493396759, + "loss_ib": 0.0063200765289366245, + "step": 1900 + }, + { + "ce_ib": 5.239020347595215, + "ce_orig": 1.0331742763519287, + "epoch": 0.5464088000575167, + "kl_loss": 0.24392680823802948, + "loss_ib": 0.007678288500756025, + "step": 1900 + }, + { + "ce_ib": 8.650659561157227, + "ce_orig": 1.662894368171692, + "epoch": 0.5466963836364943, + "kl_loss": 0.26084673404693604, + "loss_ib": 0.011259126476943493, + "step": 1901 + }, + { + "ce_ib": 7.4206438064575195, + "ce_orig": 1.3804047107696533, + "epoch": 0.5466963836364943, + "kl_loss": 0.20314303040504456, + "loss_ib": 0.009452074766159058, + "step": 1901 + }, + { + "ce_ib": 4.9321208000183105, + "ce_orig": 0.7552053332328796, + "epoch": 0.5466963836364943, + "kl_loss": 0.14079751074314117, + "loss_ib": 0.0063400957733392715, + "step": 1901 + }, + { + "ce_ib": 8.351343154907227, + "ce_orig": 1.5969254970550537, + "epoch": 0.5466963836364943, + "kl_loss": 0.17940068244934082, + "loss_ib": 0.01014534942805767, + "step": 1901 + }, + { + "ce_ib": 4.7066874504089355, + "ce_orig": 0.5475854873657227, + "epoch": 0.546983967215472, + "kl_loss": 0.192865252494812, + "loss_ib": 0.006635340396314859, + "step": 1902 + }, + { + "ce_ib": 2.6522388458251953, + "ce_orig": 0.45800575613975525, + "epoch": 0.546983967215472, + "kl_loss": 0.17094935476779938, + "loss_ib": 0.004361732397228479, + "step": 1902 + }, + { + "ce_ib": 3.336050271987915, + "ce_orig": 0.6363884806632996, + "epoch": 0.546983967215472, + "kl_loss": 0.21376313269138336, + "loss_ib": 0.005473681725561619, + "step": 1902 + }, + { + "ce_ib": 5.004870891571045, + "ce_orig": 0.8490200042724609, + "epoch": 0.546983967215472, + "kl_loss": 0.20807631313800812, + "loss_ib": 0.007085633929818869, + "step": 1902 + }, + { + "ce_ib": 3.4831836223602295, + "ce_orig": 0.37373557686805725, + "epoch": 0.5472715507944497, + "kl_loss": 0.1494193971157074, + "loss_ib": 0.004977377597242594, + "step": 1903 + }, + { + "ce_ib": 5.920387268066406, + "ce_orig": 1.1255521774291992, + "epoch": 0.5472715507944497, + "kl_loss": 0.3962468206882477, + "loss_ib": 0.009882855229079723, + "step": 1903 + }, + { + "ce_ib": 4.647152423858643, + "ce_orig": 0.8957937359809875, + "epoch": 0.5472715507944497, + "kl_loss": 0.1819320023059845, + "loss_ib": 0.006466472055763006, + "step": 1903 + }, + { + "ce_ib": 5.453622817993164, + "ce_orig": 0.9864134788513184, + "epoch": 0.5472715507944497, + "kl_loss": 0.18783822655677795, + "loss_ib": 0.00733200553804636, + "step": 1903 + }, + { + "ce_ib": 6.079820156097412, + "ce_orig": 0.8921633362770081, + "epoch": 0.5475591343734273, + "kl_loss": 0.40260013937950134, + "loss_ib": 0.01010582223534584, + "step": 1904 + }, + { + "ce_ib": 7.706158638000488, + "ce_orig": 1.3141320943832397, + "epoch": 0.5475591343734273, + "kl_loss": 0.2728313207626343, + "loss_ib": 0.010434472002089024, + "step": 1904 + }, + { + "ce_ib": 5.507092475891113, + "ce_orig": 0.9962813854217529, + "epoch": 0.5475591343734273, + "kl_loss": 0.20328868925571442, + "loss_ib": 0.0075399791821837425, + "step": 1904 + }, + { + "ce_ib": 8.686485290527344, + "ce_orig": 1.5090293884277344, + "epoch": 0.5475591343734273, + "kl_loss": 0.2840725779533386, + "loss_ib": 0.011527211405336857, + "step": 1904 + }, + { + "epoch": 0.5478467179524049, + "grad_norm": 0.11758630722761154, + "learning_rate": 9.406859504402597e-06, + "loss": 0.8631, + "step": 1905 + }, + { + "ce_ib": 5.677255153656006, + "ce_orig": 1.0834262371063232, + "epoch": 0.5478467179524049, + "kl_loss": 0.17184485495090485, + "loss_ib": 0.007395703811198473, + "step": 1905 + }, + { + "ce_ib": 4.020125389099121, + "ce_orig": 0.678731381893158, + "epoch": 0.5478467179524049, + "kl_loss": 0.1683593988418579, + "loss_ib": 0.005703719798475504, + "step": 1905 + }, + { + "ce_ib": 3.427790403366089, + "ce_orig": 0.8733957409858704, + "epoch": 0.5478467179524049, + "kl_loss": 0.2050205022096634, + "loss_ib": 0.00547799514606595, + "step": 1905 + }, + { + "ce_ib": 7.873215198516846, + "ce_orig": 1.0264562368392944, + "epoch": 0.5478467179524049, + "kl_loss": 0.2520449459552765, + "loss_ib": 0.010393664240837097, + "step": 1905 + }, + { + "ce_ib": 6.0730204582214355, + "ce_orig": 1.1063258647918701, + "epoch": 0.5481343015313825, + "kl_loss": 0.14009909331798553, + "loss_ib": 0.007474011741578579, + "step": 1906 + }, + { + "ce_ib": 4.413961410522461, + "ce_orig": 0.6400641798973083, + "epoch": 0.5481343015313825, + "kl_loss": 0.23015549778938293, + "loss_ib": 0.006715516094118357, + "step": 1906 + }, + { + "ce_ib": 6.499683380126953, + "ce_orig": 0.9660086035728455, + "epoch": 0.5481343015313825, + "kl_loss": 0.13740497827529907, + "loss_ib": 0.007873732596635818, + "step": 1906 + }, + { + "ce_ib": 4.543107032775879, + "ce_orig": 0.775365948677063, + "epoch": 0.5481343015313825, + "kl_loss": 0.24304203689098358, + "loss_ib": 0.006973527371883392, + "step": 1906 + }, + { + "ce_ib": 3.398146867752075, + "ce_orig": 0.40355029702186584, + "epoch": 0.5484218851103602, + "kl_loss": 0.10796613246202469, + "loss_ib": 0.004477808251976967, + "step": 1907 + }, + { + "ce_ib": 4.06525182723999, + "ce_orig": 0.8296524286270142, + "epoch": 0.5484218851103602, + "kl_loss": 0.19505798816680908, + "loss_ib": 0.006015831604599953, + "step": 1907 + }, + { + "ce_ib": 3.425095319747925, + "ce_orig": 0.7265019416809082, + "epoch": 0.5484218851103602, + "kl_loss": 0.2750162184238434, + "loss_ib": 0.0061752572655677795, + "step": 1907 + }, + { + "ce_ib": 6.132481098175049, + "ce_orig": 1.0314496755599976, + "epoch": 0.5484218851103602, + "kl_loss": 0.21296876668930054, + "loss_ib": 0.008262168616056442, + "step": 1907 + }, + { + "ce_ib": 6.469040870666504, + "ce_orig": 0.8110198974609375, + "epoch": 0.5487094686893378, + "kl_loss": 0.28225424885749817, + "loss_ib": 0.009291582740843296, + "step": 1908 + }, + { + "ce_ib": 6.6580328941345215, + "ce_orig": 0.9502229690551758, + "epoch": 0.5487094686893378, + "kl_loss": 0.27972739934921265, + "loss_ib": 0.009455306455492973, + "step": 1908 + }, + { + "ce_ib": 3.2922401428222656, + "ce_orig": 0.764376163482666, + "epoch": 0.5487094686893378, + "kl_loss": 0.24232013523578644, + "loss_ib": 0.005715441424399614, + "step": 1908 + }, + { + "ce_ib": 9.52113151550293, + "ce_orig": 0.905972957611084, + "epoch": 0.5487094686893378, + "kl_loss": 0.17079463601112366, + "loss_ib": 0.011229077354073524, + "step": 1908 + }, + { + "ce_ib": 6.658320426940918, + "ce_orig": 0.7265307903289795, + "epoch": 0.5489970522683155, + "kl_loss": 0.17263171076774597, + "loss_ib": 0.008384637534618378, + "step": 1909 + }, + { + "ce_ib": 5.1962175369262695, + "ce_orig": 1.1013990640640259, + "epoch": 0.5489970522683155, + "kl_loss": 0.20727990567684174, + "loss_ib": 0.007269016932696104, + "step": 1909 + }, + { + "ce_ib": 5.728460311889648, + "ce_orig": 1.0274955034255981, + "epoch": 0.5489970522683155, + "kl_loss": 0.24510778486728668, + "loss_ib": 0.008179537951946259, + "step": 1909 + }, + { + "ce_ib": 2.743274450302124, + "ce_orig": 0.40781253576278687, + "epoch": 0.5489970522683155, + "kl_loss": 0.1681884527206421, + "loss_ib": 0.004425159189850092, + "step": 1909 + }, + { + "epoch": 0.5492846358472931, + "grad_norm": 0.12240489572286606, + "learning_rate": 9.403187791923455e-06, + "loss": 0.9302, + "step": 1910 + }, + { + "ce_ib": 2.886667490005493, + "ce_orig": 0.5741050243377686, + "epoch": 0.5492846358472931, + "kl_loss": 0.16820037364959717, + "loss_ib": 0.004568671341985464, + "step": 1910 + }, + { + "ce_ib": 4.348527908325195, + "ce_orig": 0.3352336585521698, + "epoch": 0.5492846358472931, + "kl_loss": 0.29646146297454834, + "loss_ib": 0.007313142996281385, + "step": 1910 + }, + { + "ce_ib": 3.690995216369629, + "ce_orig": 0.8190628886222839, + "epoch": 0.5492846358472931, + "kl_loss": 0.17163097858428955, + "loss_ib": 0.005407304503023624, + "step": 1910 + }, + { + "ce_ib": 5.406653881072998, + "ce_orig": 0.8319385647773743, + "epoch": 0.5492846358472931, + "kl_loss": 0.2621734142303467, + "loss_ib": 0.008028388023376465, + "step": 1910 + }, + { + "ce_ib": 4.8341193199157715, + "ce_orig": 0.2599828541278839, + "epoch": 0.5495722194262708, + "kl_loss": 0.232896089553833, + "loss_ib": 0.007163079921156168, + "step": 1911 + }, + { + "ce_ib": 5.156683921813965, + "ce_orig": 0.5585552453994751, + "epoch": 0.5495722194262708, + "kl_loss": 0.28248530626296997, + "loss_ib": 0.007981536909937859, + "step": 1911 + }, + { + "ce_ib": 4.102294445037842, + "ce_orig": 0.9090990424156189, + "epoch": 0.5495722194262708, + "kl_loss": 0.12182167172431946, + "loss_ib": 0.0053205108270049095, + "step": 1911 + }, + { + "ce_ib": 3.1007497310638428, + "ce_orig": 0.3543629050254822, + "epoch": 0.5495722194262708, + "kl_loss": 0.23339495062828064, + "loss_ib": 0.005434698890894651, + "step": 1911 + }, + { + "ce_ib": 3.793285369873047, + "ce_orig": 0.6255490183830261, + "epoch": 0.5498598030052484, + "kl_loss": 0.19438017904758453, + "loss_ib": 0.005737087223678827, + "step": 1912 + }, + { + "ce_ib": 3.512158155441284, + "ce_orig": 0.5986344814300537, + "epoch": 0.5498598030052484, + "kl_loss": 0.1535426378250122, + "loss_ib": 0.005047584883868694, + "step": 1912 + }, + { + "ce_ib": 7.09074592590332, + "ce_orig": 1.1093448400497437, + "epoch": 0.5498598030052484, + "kl_loss": 0.23947542905807495, + "loss_ib": 0.009485499933362007, + "step": 1912 + }, + { + "ce_ib": 4.961276054382324, + "ce_orig": 0.995897650718689, + "epoch": 0.5498598030052484, + "kl_loss": 0.15836216509342194, + "loss_ib": 0.006544897332787514, + "step": 1912 + }, + { + "ce_ib": 5.7570037841796875, + "ce_orig": 0.8582257628440857, + "epoch": 0.550147386584226, + "kl_loss": 0.20467862486839294, + "loss_ib": 0.007803790271282196, + "step": 1913 + }, + { + "ce_ib": 6.998703479766846, + "ce_orig": 1.3555225133895874, + "epoch": 0.550147386584226, + "kl_loss": 0.24651572108268738, + "loss_ib": 0.009463860653340816, + "step": 1913 + }, + { + "ce_ib": 5.055090427398682, + "ce_orig": 0.5152579545974731, + "epoch": 0.550147386584226, + "kl_loss": 0.21888798475265503, + "loss_ib": 0.007243970409035683, + "step": 1913 + }, + { + "ce_ib": 5.7093729972839355, + "ce_orig": 1.3219268321990967, + "epoch": 0.550147386584226, + "kl_loss": 0.22586293518543243, + "loss_ib": 0.007968001998960972, + "step": 1913 + }, + { + "ce_ib": 6.738046646118164, + "ce_orig": 0.8517309427261353, + "epoch": 0.5504349701632036, + "kl_loss": 0.17055313289165497, + "loss_ib": 0.008443578146398067, + "step": 1914 + }, + { + "ce_ib": 4.148743629455566, + "ce_orig": 0.5026411414146423, + "epoch": 0.5504349701632036, + "kl_loss": 0.23652209341526031, + "loss_ib": 0.006513964384794235, + "step": 1914 + }, + { + "ce_ib": 3.847912549972534, + "ce_orig": 0.6879178881645203, + "epoch": 0.5504349701632036, + "kl_loss": 0.2362547069787979, + "loss_ib": 0.006210459396243095, + "step": 1914 + }, + { + "ce_ib": 4.1432671546936035, + "ce_orig": 0.6263501644134521, + "epoch": 0.5504349701632036, + "kl_loss": 0.23196320235729218, + "loss_ib": 0.006462899502366781, + "step": 1914 + }, + { + "epoch": 0.5507225537421814, + "grad_norm": 0.1128683015704155, + "learning_rate": 9.39950547114292e-06, + "loss": 0.9156, + "step": 1915 + }, + { + "ce_ib": 2.125145196914673, + "ce_orig": 0.31030476093292236, + "epoch": 0.5507225537421814, + "kl_loss": 0.2390914410352707, + "loss_ib": 0.004516059532761574, + "step": 1915 + }, + { + "ce_ib": 3.485665798187256, + "ce_orig": 0.8444491624832153, + "epoch": 0.5507225537421814, + "kl_loss": 0.2743951082229614, + "loss_ib": 0.006229616701602936, + "step": 1915 + }, + { + "ce_ib": 5.361143112182617, + "ce_orig": 0.9358366131782532, + "epoch": 0.5507225537421814, + "kl_loss": 0.2102963626384735, + "loss_ib": 0.007464107125997543, + "step": 1915 + }, + { + "ce_ib": 2.9449386596679688, + "ce_orig": 0.622304379940033, + "epoch": 0.5507225537421814, + "kl_loss": 0.2748371958732605, + "loss_ib": 0.005693310406059027, + "step": 1915 + }, + { + "ce_ib": 3.3908071517944336, + "ce_orig": 0.7540403604507446, + "epoch": 0.551010137321159, + "kl_loss": 0.19135555624961853, + "loss_ib": 0.005304362624883652, + "step": 1916 + }, + { + "ce_ib": 4.298043727874756, + "ce_orig": 0.5426058769226074, + "epoch": 0.551010137321159, + "kl_loss": 0.21068069338798523, + "loss_ib": 0.0064048501662909985, + "step": 1916 + }, + { + "ce_ib": 6.896023273468018, + "ce_orig": 1.1848102807998657, + "epoch": 0.551010137321159, + "kl_loss": 0.17196182906627655, + "loss_ib": 0.008615641854703426, + "step": 1916 + }, + { + "ce_ib": 6.652091026306152, + "ce_orig": 1.0947281122207642, + "epoch": 0.551010137321159, + "kl_loss": 0.3030722737312317, + "loss_ib": 0.009682813659310341, + "step": 1916 + }, + { + "ce_ib": 5.312432289123535, + "ce_orig": 1.1666892766952515, + "epoch": 0.5512977209001366, + "kl_loss": 0.2234857827425003, + "loss_ib": 0.007547290064394474, + "step": 1917 + }, + { + "ce_ib": 7.760123252868652, + "ce_orig": 1.0860095024108887, + "epoch": 0.5512977209001366, + "kl_loss": 0.2641758322715759, + "loss_ib": 0.01040188129991293, + "step": 1917 + }, + { + "ce_ib": 5.78464412689209, + "ce_orig": 0.7784232497215271, + "epoch": 0.5512977209001366, + "kl_loss": 0.20703254640102386, + "loss_ib": 0.007854970172047615, + "step": 1917 + }, + { + "ce_ib": 6.338881492614746, + "ce_orig": 0.6332058906555176, + "epoch": 0.5512977209001366, + "kl_loss": 0.22034861147403717, + "loss_ib": 0.00854236725717783, + "step": 1917 + }, + { + "ce_ib": 2.3926210403442383, + "ce_orig": 0.4297766089439392, + "epoch": 0.5515853044791142, + "kl_loss": 0.18296483159065247, + "loss_ib": 0.004222269169986248, + "step": 1918 + }, + { + "ce_ib": 4.578824043273926, + "ce_orig": 0.38781973719596863, + "epoch": 0.5515853044791142, + "kl_loss": 0.32944250106811523, + "loss_ib": 0.007873249240219593, + "step": 1918 + }, + { + "ce_ib": 4.018671989440918, + "ce_orig": 0.6380063891410828, + "epoch": 0.5515853044791142, + "kl_loss": 0.24909710884094238, + "loss_ib": 0.006509643048048019, + "step": 1918 + }, + { + "ce_ib": 5.295958042144775, + "ce_orig": 0.8462589383125305, + "epoch": 0.5515853044791142, + "kl_loss": 0.20792736113071442, + "loss_ib": 0.007375231012701988, + "step": 1918 + }, + { + "ce_ib": 8.229303359985352, + "ce_orig": 1.2693380117416382, + "epoch": 0.5518728880580919, + "kl_loss": 0.19707272946834564, + "loss_ib": 0.01020003017038107, + "step": 1919 + }, + { + "ce_ib": 3.729757785797119, + "ce_orig": 0.7573217153549194, + "epoch": 0.5518728880580919, + "kl_loss": 0.20582321286201477, + "loss_ib": 0.005787990055978298, + "step": 1919 + }, + { + "ce_ib": 5.051339626312256, + "ce_orig": 0.7594917416572571, + "epoch": 0.5518728880580919, + "kl_loss": 0.21279123425483704, + "loss_ib": 0.0071792518720030785, + "step": 1919 + }, + { + "ce_ib": 5.25255823135376, + "ce_orig": 0.8441687822341919, + "epoch": 0.5518728880580919, + "kl_loss": 0.16548152267932892, + "loss_ib": 0.006907373666763306, + "step": 1919 + }, + { + "epoch": 0.5521604716370695, + "grad_norm": 0.13038235902786255, + "learning_rate": 9.395812550932559e-06, + "loss": 0.8628, + "step": 1920 + }, + { + "ce_ib": 5.125471115112305, + "ce_orig": 0.9467810392379761, + "epoch": 0.5521604716370695, + "kl_loss": 0.2430807650089264, + "loss_ib": 0.007556278724223375, + "step": 1920 + }, + { + "ce_ib": 4.794182300567627, + "ce_orig": 0.6340484023094177, + "epoch": 0.5521604716370695, + "kl_loss": 0.29665517807006836, + "loss_ib": 0.007760734297335148, + "step": 1920 + }, + { + "ce_ib": 6.495025157928467, + "ce_orig": 0.8584146499633789, + "epoch": 0.5521604716370695, + "kl_loss": 0.2170822024345398, + "loss_ib": 0.008665846660733223, + "step": 1920 + }, + { + "ce_ib": 3.3026716709136963, + "ce_orig": 0.6432685256004333, + "epoch": 0.5521604716370695, + "kl_loss": 0.15812014043331146, + "loss_ib": 0.0048838728107512, + "step": 1920 + }, + { + "ce_ib": 4.022793292999268, + "ce_orig": 0.48814812302589417, + "epoch": 0.5524480552160471, + "kl_loss": 0.2611497640609741, + "loss_ib": 0.006634291261434555, + "step": 1921 + }, + { + "ce_ib": 6.338824272155762, + "ce_orig": 1.1433099508285522, + "epoch": 0.5524480552160471, + "kl_loss": 0.2686011791229248, + "loss_ib": 0.009024836122989655, + "step": 1921 + }, + { + "ce_ib": 6.309455394744873, + "ce_orig": 1.0639891624450684, + "epoch": 0.5524480552160471, + "kl_loss": 0.28311389684677124, + "loss_ib": 0.009140594862401485, + "step": 1921 + }, + { + "ce_ib": 7.51746940612793, + "ce_orig": 1.0372909307479858, + "epoch": 0.5524480552160471, + "kl_loss": 0.2799808979034424, + "loss_ib": 0.01031727809458971, + "step": 1921 + }, + { + "ce_ib": 4.688455104827881, + "ce_orig": 0.7796941995620728, + "epoch": 0.5527356387950249, + "kl_loss": 0.25242507457733154, + "loss_ib": 0.007212705444544554, + "step": 1922 + }, + { + "ce_ib": 8.777731895446777, + "ce_orig": 1.802945613861084, + "epoch": 0.5527356387950249, + "kl_loss": 0.33165305852890015, + "loss_ib": 0.01209426298737526, + "step": 1922 + }, + { + "ce_ib": 3.469494581222534, + "ce_orig": 0.6810784339904785, + "epoch": 0.5527356387950249, + "kl_loss": 0.17616687715053558, + "loss_ib": 0.005231163464486599, + "step": 1922 + }, + { + "ce_ib": 4.473598003387451, + "ce_orig": 0.5759130716323853, + "epoch": 0.5527356387950249, + "kl_loss": 0.22008073329925537, + "loss_ib": 0.006674405187368393, + "step": 1922 + }, + { + "ce_ib": 4.555544376373291, + "ce_orig": 0.7167826890945435, + "epoch": 0.5530232223740025, + "kl_loss": 0.168533056974411, + "loss_ib": 0.006240874994546175, + "step": 1923 + }, + { + "ce_ib": 8.480325698852539, + "ce_orig": 1.7931766510009766, + "epoch": 0.5530232223740025, + "kl_loss": 0.3537088632583618, + "loss_ib": 0.012017413973808289, + "step": 1923 + }, + { + "ce_ib": 4.297208309173584, + "ce_orig": 0.6875762939453125, + "epoch": 0.5530232223740025, + "kl_loss": 0.15500590205192566, + "loss_ib": 0.0058472673408687115, + "step": 1923 + }, + { + "ce_ib": 4.13336706161499, + "ce_orig": 0.49390938878059387, + "epoch": 0.5530232223740025, + "kl_loss": 0.2773393392562866, + "loss_ib": 0.006906760856509209, + "step": 1923 + }, + { + "ce_ib": 5.192725658416748, + "ce_orig": 1.1892716884613037, + "epoch": 0.5533108059529801, + "kl_loss": 0.14960896968841553, + "loss_ib": 0.006688815075904131, + "step": 1924 + }, + { + "ce_ib": 1.0564361810684204, + "ce_orig": 0.140930637717247, + "epoch": 0.5533108059529801, + "kl_loss": 0.48497554659843445, + "loss_ib": 0.005906191188842058, + "step": 1924 + }, + { + "ce_ib": 4.778229713439941, + "ce_orig": 0.6768468618392944, + "epoch": 0.5533108059529801, + "kl_loss": 0.18857140839099884, + "loss_ib": 0.006663944106549025, + "step": 1924 + }, + { + "ce_ib": 3.5872573852539062, + "ce_orig": 0.7152208089828491, + "epoch": 0.5533108059529801, + "kl_loss": 0.1990417242050171, + "loss_ib": 0.005577675066888332, + "step": 1924 + }, + { + "epoch": 0.5535983895319577, + "grad_norm": 0.1305120289325714, + "learning_rate": 9.392109040189473e-06, + "loss": 0.8503, + "step": 1925 + }, + { + "ce_ib": 5.968764781951904, + "ce_orig": 0.8049296736717224, + "epoch": 0.5535983895319577, + "kl_loss": 0.24111254513263702, + "loss_ib": 0.008379890583455563, + "step": 1925 + }, + { + "ce_ib": 3.8194169998168945, + "ce_orig": 0.7100443243980408, + "epoch": 0.5535983895319577, + "kl_loss": 0.3223645091056824, + "loss_ib": 0.0070430622436106205, + "step": 1925 + }, + { + "ce_ib": 8.28250503540039, + "ce_orig": 1.206803560256958, + "epoch": 0.5535983895319577, + "kl_loss": 0.201694518327713, + "loss_ib": 0.010299449786543846, + "step": 1925 + }, + { + "ce_ib": 7.383141994476318, + "ce_orig": 1.472749948501587, + "epoch": 0.5535983895319577, + "kl_loss": 0.15550222992897034, + "loss_ib": 0.008938164450228214, + "step": 1925 + }, + { + "ce_ib": 2.9840638637542725, + "ce_orig": 0.4754523038864136, + "epoch": 0.5538859731109353, + "kl_loss": 0.3020710349082947, + "loss_ib": 0.00600477447733283, + "step": 1926 + }, + { + "ce_ib": 2.064957618713379, + "ce_orig": 0.5488508939743042, + "epoch": 0.5538859731109353, + "kl_loss": 0.13696321845054626, + "loss_ib": 0.003434589831158519, + "step": 1926 + }, + { + "ce_ib": 3.859769582748413, + "ce_orig": 0.6679697632789612, + "epoch": 0.5538859731109353, + "kl_loss": 0.23655346035957336, + "loss_ib": 0.006225304678082466, + "step": 1926 + }, + { + "ce_ib": 3.0847089290618896, + "ce_orig": 0.5940335988998413, + "epoch": 0.5538859731109353, + "kl_loss": 0.2199656069278717, + "loss_ib": 0.005284365266561508, + "step": 1926 + }, + { + "ce_ib": 4.7833333015441895, + "ce_orig": 0.699425220489502, + "epoch": 0.554173556689913, + "kl_loss": 0.19268561899662018, + "loss_ib": 0.0067101893946528435, + "step": 1927 + }, + { + "ce_ib": 3.15501070022583, + "ce_orig": 0.6162528395652771, + "epoch": 0.554173556689913, + "kl_loss": 0.1841224730014801, + "loss_ib": 0.004996235482394695, + "step": 1927 + }, + { + "ce_ib": 6.578360557556152, + "ce_orig": 0.9799951910972595, + "epoch": 0.554173556689913, + "kl_loss": 0.22263585031032562, + "loss_ib": 0.00880471896380186, + "step": 1927 + }, + { + "ce_ib": 7.8032402992248535, + "ce_orig": 1.353818416595459, + "epoch": 0.554173556689913, + "kl_loss": 0.30164778232574463, + "loss_ib": 0.01081971824169159, + "step": 1927 + }, + { + "ce_ib": 4.972599983215332, + "ce_orig": 0.7287405729293823, + "epoch": 0.5544611402688906, + "kl_loss": 0.19396856427192688, + "loss_ib": 0.006912285462021828, + "step": 1928 + }, + { + "ce_ib": 3.7816243171691895, + "ce_orig": 0.6331422328948975, + "epoch": 0.5544611402688906, + "kl_loss": 0.18299607932567596, + "loss_ib": 0.0056115854531526566, + "step": 1928 + }, + { + "ce_ib": 5.973362922668457, + "ce_orig": 1.0235064029693604, + "epoch": 0.5544611402688906, + "kl_loss": 0.2769926190376282, + "loss_ib": 0.008743288926780224, + "step": 1928 + }, + { + "ce_ib": 5.59320068359375, + "ce_orig": 0.9232028722763062, + "epoch": 0.5544611402688906, + "kl_loss": 0.22228997945785522, + "loss_ib": 0.007816100493073463, + "step": 1928 + }, + { + "ce_ib": 3.503995656967163, + "ce_orig": 0.5884864330291748, + "epoch": 0.5547487238478683, + "kl_loss": 0.14196009933948517, + "loss_ib": 0.004923596978187561, + "step": 1929 + }, + { + "ce_ib": 6.933825492858887, + "ce_orig": 1.301206111907959, + "epoch": 0.5547487238478683, + "kl_loss": 0.20545580983161926, + "loss_ib": 0.008988384157419205, + "step": 1929 + }, + { + "ce_ib": 5.770648956298828, + "ce_orig": 1.1211457252502441, + "epoch": 0.5547487238478683, + "kl_loss": 0.1420101821422577, + "loss_ib": 0.0071907504461705685, + "step": 1929 + }, + { + "ce_ib": 3.230302095413208, + "ce_orig": 0.5871448516845703, + "epoch": 0.5547487238478683, + "kl_loss": 0.19306373596191406, + "loss_ib": 0.005160939414054155, + "step": 1929 + }, + { + "epoch": 0.555036307426846, + "grad_norm": 0.11835107952356339, + "learning_rate": 9.388394947836278e-06, + "loss": 0.8813, + "step": 1930 + }, + { + "ce_ib": 6.595455169677734, + "ce_orig": 0.8396819829940796, + "epoch": 0.555036307426846, + "kl_loss": 0.2513989210128784, + "loss_ib": 0.009109443984925747, + "step": 1930 + }, + { + "ce_ib": 5.202250003814697, + "ce_orig": 1.0775376558303833, + "epoch": 0.555036307426846, + "kl_loss": 0.2137918770313263, + "loss_ib": 0.007340168580412865, + "step": 1930 + }, + { + "ce_ib": 3.5433919429779053, + "ce_orig": 0.5223658084869385, + "epoch": 0.555036307426846, + "kl_loss": 0.16330486536026, + "loss_ib": 0.005176440346986055, + "step": 1930 + }, + { + "ce_ib": 4.392524719238281, + "ce_orig": 0.8352428674697876, + "epoch": 0.555036307426846, + "kl_loss": 0.20079953968524933, + "loss_ib": 0.006400519981980324, + "step": 1930 + }, + { + "ce_ib": 3.605915069580078, + "ce_orig": 0.6275536417961121, + "epoch": 0.5553238910058236, + "kl_loss": 0.2333388775587082, + "loss_ib": 0.005939303431659937, + "step": 1931 + }, + { + "ce_ib": 3.149280548095703, + "ce_orig": 0.732917070388794, + "epoch": 0.5553238910058236, + "kl_loss": 0.14264488220214844, + "loss_ib": 0.0045757293701171875, + "step": 1931 + }, + { + "ce_ib": 6.063021183013916, + "ce_orig": 0.9229342937469482, + "epoch": 0.5553238910058236, + "kl_loss": 0.214775949716568, + "loss_ib": 0.00821078009903431, + "step": 1931 + }, + { + "ce_ib": 5.187615871429443, + "ce_orig": 0.6924121975898743, + "epoch": 0.5553238910058236, + "kl_loss": 0.2174490988254547, + "loss_ib": 0.007362106814980507, + "step": 1931 + }, + { + "ce_ib": 6.522134304046631, + "ce_orig": 1.2137961387634277, + "epoch": 0.5556114745848012, + "kl_loss": 0.18655726313591003, + "loss_ib": 0.00838770717382431, + "step": 1932 + }, + { + "ce_ib": 5.60007381439209, + "ce_orig": 0.766647219657898, + "epoch": 0.5556114745848012, + "kl_loss": 0.32396399974823, + "loss_ib": 0.008839713409543037, + "step": 1932 + }, + { + "ce_ib": 9.259220123291016, + "ce_orig": 1.8698841333389282, + "epoch": 0.5556114745848012, + "kl_loss": 0.2572416365146637, + "loss_ib": 0.011831636540591717, + "step": 1932 + }, + { + "ce_ib": 7.186155796051025, + "ce_orig": 1.1270413398742676, + "epoch": 0.5556114745848012, + "kl_loss": 0.2171599268913269, + "loss_ib": 0.009357755072414875, + "step": 1932 + }, + { + "ce_ib": 5.8336663246154785, + "ce_orig": 0.8805820941925049, + "epoch": 0.5558990581637788, + "kl_loss": 0.20731845498085022, + "loss_ib": 0.007906850427389145, + "step": 1933 + }, + { + "ce_ib": 2.1970837116241455, + "ce_orig": 0.4671885669231415, + "epoch": 0.5558990581637788, + "kl_loss": 0.16664046049118042, + "loss_ib": 0.0038634883239865303, + "step": 1933 + }, + { + "ce_ib": 6.38844108581543, + "ce_orig": 0.7484267950057983, + "epoch": 0.5558990581637788, + "kl_loss": 0.25336700677871704, + "loss_ib": 0.008922111243009567, + "step": 1933 + }, + { + "ce_ib": 5.786157131195068, + "ce_orig": 0.8145442008972168, + "epoch": 0.5558990581637788, + "kl_loss": 0.19787608087062836, + "loss_ib": 0.007764918264001608, + "step": 1933 + }, + { + "ce_ib": 6.9282121658325195, + "ce_orig": 1.1202467679977417, + "epoch": 0.5561866417427564, + "kl_loss": 0.2380782961845398, + "loss_ib": 0.009308994747698307, + "step": 1934 + }, + { + "ce_ib": 4.730992317199707, + "ce_orig": 0.7270490527153015, + "epoch": 0.5561866417427564, + "kl_loss": 0.27505892515182495, + "loss_ib": 0.0074815815314650536, + "step": 1934 + }, + { + "ce_ib": 5.864253520965576, + "ce_orig": 0.8641997575759888, + "epoch": 0.5561866417427564, + "kl_loss": 0.2009393870830536, + "loss_ib": 0.007873646914958954, + "step": 1934 + }, + { + "ce_ib": 7.711478233337402, + "ce_orig": 1.4662551879882812, + "epoch": 0.5561866417427564, + "kl_loss": 0.20308509469032288, + "loss_ib": 0.009742328897118568, + "step": 1934 + }, + { + "epoch": 0.5564742253217342, + "grad_norm": 0.12336688488721848, + "learning_rate": 9.384670282821087e-06, + "loss": 0.8883, + "step": 1935 + }, + { + "ce_ib": 3.3277626037597656, + "ce_orig": 0.5428022742271423, + "epoch": 0.5564742253217342, + "kl_loss": 0.23022133111953735, + "loss_ib": 0.005629975814372301, + "step": 1935 + }, + { + "ce_ib": 4.103045463562012, + "ce_orig": 0.3328402042388916, + "epoch": 0.5564742253217342, + "kl_loss": 0.26368433237075806, + "loss_ib": 0.006739888805896044, + "step": 1935 + }, + { + "ce_ib": 3.9666333198547363, + "ce_orig": 0.656386137008667, + "epoch": 0.5564742253217342, + "kl_loss": 0.17204327881336212, + "loss_ib": 0.005687066353857517, + "step": 1935 + }, + { + "ce_ib": 2.325824022293091, + "ce_orig": 0.326698899269104, + "epoch": 0.5564742253217342, + "kl_loss": 0.48896095156669617, + "loss_ib": 0.00721543375402689, + "step": 1935 + }, + { + "ce_ib": 4.6291728019714355, + "ce_orig": 0.6698210835456848, + "epoch": 0.5567618089007118, + "kl_loss": 0.21553833782672882, + "loss_ib": 0.006784556433558464, + "step": 1936 + }, + { + "ce_ib": 7.387948989868164, + "ce_orig": 1.3421707153320312, + "epoch": 0.5567618089007118, + "kl_loss": 0.24260932207107544, + "loss_ib": 0.00981404259800911, + "step": 1936 + }, + { + "ce_ib": 5.946327209472656, + "ce_orig": 1.0489966869354248, + "epoch": 0.5567618089007118, + "kl_loss": 0.19853127002716064, + "loss_ib": 0.00793164037168026, + "step": 1936 + }, + { + "ce_ib": 3.620438575744629, + "ce_orig": 0.5803955793380737, + "epoch": 0.5567618089007118, + "kl_loss": 0.18083828687667847, + "loss_ib": 0.005428821314126253, + "step": 1936 + }, + { + "ce_ib": 4.323624134063721, + "ce_orig": 1.1346577405929565, + "epoch": 0.5570493924796894, + "kl_loss": 0.2001379430294037, + "loss_ib": 0.00632500322535634, + "step": 1937 + }, + { + "ce_ib": 3.8898186683654785, + "ce_orig": 0.6070666909217834, + "epoch": 0.5570493924796894, + "kl_loss": 0.2289465367794037, + "loss_ib": 0.0061792838387191296, + "step": 1937 + }, + { + "ce_ib": 4.37337064743042, + "ce_orig": 0.7158910036087036, + "epoch": 0.5570493924796894, + "kl_loss": 0.17210406064987183, + "loss_ib": 0.006094411481171846, + "step": 1937 + }, + { + "ce_ib": 3.612762451171875, + "ce_orig": 0.7651700973510742, + "epoch": 0.5570493924796894, + "kl_loss": 0.35289445519447327, + "loss_ib": 0.007141706999391317, + "step": 1937 + }, + { + "ce_ib": 3.410884380340576, + "ce_orig": 0.8918249607086182, + "epoch": 0.557336976058667, + "kl_loss": 0.13667425513267517, + "loss_ib": 0.004777627065777779, + "step": 1938 + }, + { + "ce_ib": 7.995250701904297, + "ce_orig": 1.657909870147705, + "epoch": 0.557336976058667, + "kl_loss": 0.1475721150636673, + "loss_ib": 0.009470971301198006, + "step": 1938 + }, + { + "ce_ib": 3.5121572017669678, + "ce_orig": 0.7898868322372437, + "epoch": 0.557336976058667, + "kl_loss": 0.23313003778457642, + "loss_ib": 0.0058434573002159595, + "step": 1938 + }, + { + "ce_ib": 5.825277328491211, + "ce_orig": 1.079064965248108, + "epoch": 0.557336976058667, + "kl_loss": 0.18237462639808655, + "loss_ib": 0.007649023551493883, + "step": 1938 + }, + { + "ce_ib": 3.7814619541168213, + "ce_orig": 0.8045347929000854, + "epoch": 0.5576245596376447, + "kl_loss": 0.16451720893383026, + "loss_ib": 0.005426633637398481, + "step": 1939 + }, + { + "ce_ib": 2.8695623874664307, + "ce_orig": 0.4086380898952484, + "epoch": 0.5576245596376447, + "kl_loss": 0.17066530883312225, + "loss_ib": 0.004576215520501137, + "step": 1939 + }, + { + "ce_ib": 3.0014541149139404, + "ce_orig": 0.4150030016899109, + "epoch": 0.5576245596376447, + "kl_loss": 0.24444639682769775, + "loss_ib": 0.0054459176026284695, + "step": 1939 + }, + { + "ce_ib": 4.278745651245117, + "ce_orig": 0.5602519512176514, + "epoch": 0.5576245596376447, + "kl_loss": 0.25006240606307983, + "loss_ib": 0.006779369432479143, + "step": 1939 + }, + { + "epoch": 0.5579121432166223, + "grad_norm": 0.11354150623083115, + "learning_rate": 9.38093505411748e-06, + "loss": 0.8296, + "step": 1940 + }, + { + "ce_ib": 4.754982948303223, + "ce_orig": 0.9043082594871521, + "epoch": 0.5579121432166223, + "kl_loss": 0.22222253680229187, + "loss_ib": 0.006977207958698273, + "step": 1940 + }, + { + "ce_ib": 3.1008718013763428, + "ce_orig": 0.6609811782836914, + "epoch": 0.5579121432166223, + "kl_loss": 0.1551208347082138, + "loss_ib": 0.004652079660445452, + "step": 1940 + }, + { + "ce_ib": 5.148151874542236, + "ce_orig": 0.7034737467765808, + "epoch": 0.5579121432166223, + "kl_loss": 0.32634222507476807, + "loss_ib": 0.008411574177443981, + "step": 1940 + }, + { + "ce_ib": 10.941390037536621, + "ce_orig": 1.2599574327468872, + "epoch": 0.5579121432166223, + "kl_loss": 0.2286052703857422, + "loss_ib": 0.013227442279458046, + "step": 1940 + }, + { + "ce_ib": 7.19765567779541, + "ce_orig": 1.0609315633773804, + "epoch": 0.5581997267955999, + "kl_loss": 0.2000490128993988, + "loss_ib": 0.009198145940899849, + "step": 1941 + }, + { + "ce_ib": 3.3666741847991943, + "ce_orig": 0.7088072896003723, + "epoch": 0.5581997267955999, + "kl_loss": 0.17230737209320068, + "loss_ib": 0.005089747719466686, + "step": 1941 + }, + { + "ce_ib": 6.163748741149902, + "ce_orig": 1.036344051361084, + "epoch": 0.5581997267955999, + "kl_loss": 0.2598022222518921, + "loss_ib": 0.008761771023273468, + "step": 1941 + }, + { + "ce_ib": 4.131283760070801, + "ce_orig": 0.7375625967979431, + "epoch": 0.5581997267955999, + "kl_loss": 0.1777574121952057, + "loss_ib": 0.005908857565373182, + "step": 1941 + }, + { + "ce_ib": 4.538864612579346, + "ce_orig": 0.9160065054893494, + "epoch": 0.5584873103745777, + "kl_loss": 0.17552489042282104, + "loss_ib": 0.006294113118201494, + "step": 1942 + }, + { + "ce_ib": 2.258758783340454, + "ce_orig": 0.4297439754009247, + "epoch": 0.5584873103745777, + "kl_loss": 0.2720901668071747, + "loss_ib": 0.004979660268872976, + "step": 1942 + }, + { + "ce_ib": 6.058786392211914, + "ce_orig": 1.130829095840454, + "epoch": 0.5584873103745777, + "kl_loss": 0.22988750040531158, + "loss_ib": 0.008357660844922066, + "step": 1942 + }, + { + "ce_ib": 3.9205870628356934, + "ce_orig": 1.050699234008789, + "epoch": 0.5584873103745777, + "kl_loss": 0.16049782931804657, + "loss_ib": 0.00552556524053216, + "step": 1942 + }, + { + "ce_ib": 3.205777168273926, + "ce_orig": 0.426290363073349, + "epoch": 0.5587748939535553, + "kl_loss": 0.34285032749176025, + "loss_ib": 0.006634280551224947, + "step": 1943 + }, + { + "ce_ib": 3.6531386375427246, + "ce_orig": 0.6537418961524963, + "epoch": 0.5587748939535553, + "kl_loss": 0.2146739363670349, + "loss_ib": 0.00579987745732069, + "step": 1943 + }, + { + "ce_ib": 4.059100151062012, + "ce_orig": 0.9437558054924011, + "epoch": 0.5587748939535553, + "kl_loss": 0.15987905859947205, + "loss_ib": 0.005657890811562538, + "step": 1943 + }, + { + "ce_ib": 4.630014419555664, + "ce_orig": 0.7337749004364014, + "epoch": 0.5587748939535553, + "kl_loss": 0.31163904070854187, + "loss_ib": 0.007746404968202114, + "step": 1943 + }, + { + "ce_ib": 5.040141582489014, + "ce_orig": 0.7503706812858582, + "epoch": 0.5590624775325329, + "kl_loss": 0.2039007693529129, + "loss_ib": 0.007079149130731821, + "step": 1944 + }, + { + "ce_ib": 3.309403657913208, + "ce_orig": 0.6147379279136658, + "epoch": 0.5590624775325329, + "kl_loss": 0.20214751362800598, + "loss_ib": 0.005330878309905529, + "step": 1944 + }, + { + "ce_ib": 3.446329355239868, + "ce_orig": 0.61670982837677, + "epoch": 0.5590624775325329, + "kl_loss": 0.20748162269592285, + "loss_ib": 0.005521146114915609, + "step": 1944 + }, + { + "ce_ib": 4.511063575744629, + "ce_orig": 0.9238880276679993, + "epoch": 0.5590624775325329, + "kl_loss": 0.17485204339027405, + "loss_ib": 0.006259584333747625, + "step": 1944 + }, + { + "epoch": 0.5593500611115105, + "grad_norm": 0.12173520773649216, + "learning_rate": 9.377189270724492e-06, + "loss": 0.896, + "step": 1945 + }, + { + "ce_ib": 6.507874965667725, + "ce_orig": 0.6445823907852173, + "epoch": 0.5593500611115105, + "kl_loss": 0.29428672790527344, + "loss_ib": 0.009450742043554783, + "step": 1945 + }, + { + "ce_ib": 7.969465255737305, + "ce_orig": 1.6248737573623657, + "epoch": 0.5593500611115105, + "kl_loss": 0.1924789547920227, + "loss_ib": 0.009894254617393017, + "step": 1945 + }, + { + "ce_ib": 8.53795051574707, + "ce_orig": 1.648207426071167, + "epoch": 0.5593500611115105, + "kl_loss": 0.2450883835554123, + "loss_ib": 0.010988833382725716, + "step": 1945 + }, + { + "ce_ib": 5.957749843597412, + "ce_orig": 1.022748351097107, + "epoch": 0.5593500611115105, + "kl_loss": 0.17536038160324097, + "loss_ib": 0.007711353711783886, + "step": 1945 + }, + { + "ce_ib": 5.166660308837891, + "ce_orig": 1.0340142250061035, + "epoch": 0.5596376446904882, + "kl_loss": 0.21062946319580078, + "loss_ib": 0.007272955030202866, + "step": 1946 + }, + { + "ce_ib": 5.739360809326172, + "ce_orig": 0.8432859778404236, + "epoch": 0.5596376446904882, + "kl_loss": 0.1813448667526245, + "loss_ib": 0.007552809547632933, + "step": 1946 + }, + { + "ce_ib": 3.5078823566436768, + "ce_orig": 0.6890704035758972, + "epoch": 0.5596376446904882, + "kl_loss": 0.19745320081710815, + "loss_ib": 0.005482414271682501, + "step": 1946 + }, + { + "ce_ib": 5.156094074249268, + "ce_orig": 0.9947879910469055, + "epoch": 0.5596376446904882, + "kl_loss": 0.20295652747154236, + "loss_ib": 0.007185659371316433, + "step": 1946 + }, + { + "ce_ib": 5.824108123779297, + "ce_orig": 0.4990655481815338, + "epoch": 0.5599252282694658, + "kl_loss": 0.1985587179660797, + "loss_ib": 0.007809694856405258, + "step": 1947 + }, + { + "ce_ib": 0.6695483326911926, + "ce_orig": 0.1322779357433319, + "epoch": 0.5599252282694658, + "kl_loss": 0.4654156565666199, + "loss_ib": 0.005323704797774553, + "step": 1947 + }, + { + "ce_ib": 8.81364631652832, + "ce_orig": 1.402514934539795, + "epoch": 0.5599252282694658, + "kl_loss": 0.2827611565589905, + "loss_ib": 0.01164125744253397, + "step": 1947 + }, + { + "ce_ib": 3.307119607925415, + "ce_orig": 0.5449358224868774, + "epoch": 0.5599252282694658, + "kl_loss": 0.2450537085533142, + "loss_ib": 0.005757656414061785, + "step": 1947 + }, + { + "ce_ib": 5.520815849304199, + "ce_orig": 0.7765263319015503, + "epoch": 0.5602128118484434, + "kl_loss": 0.2116309404373169, + "loss_ib": 0.007637124974280596, + "step": 1948 + }, + { + "ce_ib": 6.154223442077637, + "ce_orig": 0.9919604659080505, + "epoch": 0.5602128118484434, + "kl_loss": 0.2171577513217926, + "loss_ib": 0.00832580029964447, + "step": 1948 + }, + { + "ce_ib": 2.002056121826172, + "ce_orig": 0.4453052580356598, + "epoch": 0.5602128118484434, + "kl_loss": 0.4559124708175659, + "loss_ib": 0.006561180576682091, + "step": 1948 + }, + { + "ce_ib": 1.4254610538482666, + "ce_orig": 0.26237672567367554, + "epoch": 0.5602128118484434, + "kl_loss": 0.4651191532611847, + "loss_ib": 0.006076652090996504, + "step": 1948 + }, + { + "ce_ib": 9.372055053710938, + "ce_orig": 1.944899320602417, + "epoch": 0.5605003954274211, + "kl_loss": 0.20772811770439148, + "loss_ib": 0.011449335142970085, + "step": 1949 + }, + { + "ce_ib": 3.2384655475616455, + "ce_orig": 0.551419734954834, + "epoch": 0.5605003954274211, + "kl_loss": 0.19436872005462646, + "loss_ib": 0.005182153079658747, + "step": 1949 + }, + { + "ce_ib": 10.017022132873535, + "ce_orig": 1.6119446754455566, + "epoch": 0.5605003954274211, + "kl_loss": 0.22372767329216003, + "loss_ib": 0.01225429866462946, + "step": 1949 + }, + { + "ce_ib": 3.6044728755950928, + "ce_orig": 0.7428186535835266, + "epoch": 0.5605003954274211, + "kl_loss": 0.17656241357326508, + "loss_ib": 0.0053700972348451614, + "step": 1949 + }, + { + "epoch": 0.5607879790063988, + "grad_norm": 0.13764934241771698, + "learning_rate": 9.373432941666582e-06, + "loss": 0.8726, + "step": 1950 + }, + { + "ce_ib": 5.5733113288879395, + "ce_orig": 1.06040620803833, + "epoch": 0.5607879790063988, + "kl_loss": 0.23042647540569305, + "loss_ib": 0.007877576164901257, + "step": 1950 + }, + { + "ce_ib": 4.640501499176025, + "ce_orig": 0.7323870658874512, + "epoch": 0.5607879790063988, + "kl_loss": 0.26961541175842285, + "loss_ib": 0.007336655631661415, + "step": 1950 + }, + { + "ce_ib": 6.980530738830566, + "ce_orig": 1.5620394945144653, + "epoch": 0.5607879790063988, + "kl_loss": 0.24542734026908875, + "loss_ib": 0.009434803389012814, + "step": 1950 + }, + { + "ce_ib": 4.435082912445068, + "ce_orig": 0.559454083442688, + "epoch": 0.5607879790063988, + "kl_loss": 0.3001989722251892, + "loss_ib": 0.007437072694301605, + "step": 1950 + }, + { + "ce_ib": 5.830046653747559, + "ce_orig": 1.1036546230316162, + "epoch": 0.5610755625853764, + "kl_loss": 0.1718643307685852, + "loss_ib": 0.007548689842224121, + "step": 1951 + }, + { + "ce_ib": 3.1599724292755127, + "ce_orig": 0.5915268659591675, + "epoch": 0.5610755625853764, + "kl_loss": 0.20919816195964813, + "loss_ib": 0.005251954309642315, + "step": 1951 + }, + { + "ce_ib": 4.4157843589782715, + "ce_orig": 1.294607400894165, + "epoch": 0.5610755625853764, + "kl_loss": 0.170917809009552, + "loss_ib": 0.0061249625869095325, + "step": 1951 + }, + { + "ce_ib": 4.775761127471924, + "ce_orig": 0.7333765625953674, + "epoch": 0.5610755625853764, + "kl_loss": 0.23736411333084106, + "loss_ib": 0.0071494015865027905, + "step": 1951 + }, + { + "ce_ib": 6.860382080078125, + "ce_orig": 0.7319337725639343, + "epoch": 0.561363146164354, + "kl_loss": 0.2919643819332123, + "loss_ib": 0.009780025109648705, + "step": 1952 + }, + { + "ce_ib": 4.966904640197754, + "ce_orig": 0.6298078298568726, + "epoch": 0.561363146164354, + "kl_loss": 0.2703355848789215, + "loss_ib": 0.00767026050016284, + "step": 1952 + }, + { + "ce_ib": 4.709525108337402, + "ce_orig": 0.5962979793548584, + "epoch": 0.561363146164354, + "kl_loss": 0.2725815773010254, + "loss_ib": 0.007435340899974108, + "step": 1952 + }, + { + "ce_ib": 7.591611385345459, + "ce_orig": 1.7110387086868286, + "epoch": 0.561363146164354, + "kl_loss": 0.7085400223731995, + "loss_ib": 0.014677011407911777, + "step": 1952 + }, + { + "ce_ib": 3.424950361251831, + "ce_orig": 0.6562435626983643, + "epoch": 0.5616507297433316, + "kl_loss": 0.28956568241119385, + "loss_ib": 0.006320607382804155, + "step": 1953 + }, + { + "ce_ib": 9.279152870178223, + "ce_orig": 0.6927914619445801, + "epoch": 0.5616507297433316, + "kl_loss": 0.20212432742118835, + "loss_ib": 0.011300395242869854, + "step": 1953 + }, + { + "ce_ib": 5.230459213256836, + "ce_orig": 0.4061637222766876, + "epoch": 0.5616507297433316, + "kl_loss": 0.26631632447242737, + "loss_ib": 0.007893622852861881, + "step": 1953 + }, + { + "ce_ib": 4.862950801849365, + "ce_orig": 0.6797504425048828, + "epoch": 0.5616507297433316, + "kl_loss": 0.31247395277023315, + "loss_ib": 0.007987690158188343, + "step": 1953 + }, + { + "ce_ib": 7.613481044769287, + "ce_orig": 1.2723121643066406, + "epoch": 0.5619383133223093, + "kl_loss": 0.22658246755599976, + "loss_ib": 0.009879305958747864, + "step": 1954 + }, + { + "ce_ib": 7.024825096130371, + "ce_orig": 1.1097939014434814, + "epoch": 0.5619383133223093, + "kl_loss": 0.17091816663742065, + "loss_ib": 0.008734006434679031, + "step": 1954 + }, + { + "ce_ib": 2.7863800525665283, + "ce_orig": 0.5768458247184753, + "epoch": 0.5619383133223093, + "kl_loss": 0.20055538415908813, + "loss_ib": 0.004791933577507734, + "step": 1954 + }, + { + "ce_ib": 3.9914278984069824, + "ce_orig": 0.5456915497779846, + "epoch": 0.5619383133223093, + "kl_loss": 0.30046433210372925, + "loss_ib": 0.006996070966124535, + "step": 1954 + }, + { + "epoch": 0.5622258969012869, + "grad_norm": 0.14414216578006744, + "learning_rate": 9.36966607599362e-06, + "loss": 0.828, + "step": 1955 + }, + { + "ce_ib": 4.35274600982666, + "ce_orig": 0.5877292156219482, + "epoch": 0.5622258969012869, + "kl_loss": 0.2220989465713501, + "loss_ib": 0.006573735270649195, + "step": 1955 + }, + { + "ce_ib": 4.19326639175415, + "ce_orig": 0.7570400834083557, + "epoch": 0.5622258969012869, + "kl_loss": 0.22635827958583832, + "loss_ib": 0.0064568486995995045, + "step": 1955 + }, + { + "ce_ib": 6.88547945022583, + "ce_orig": 0.5518658757209778, + "epoch": 0.5622258969012869, + "kl_loss": 0.7282689809799194, + "loss_ib": 0.014168169349431992, + "step": 1955 + }, + { + "ce_ib": 5.221278667449951, + "ce_orig": 1.0822886228561401, + "epoch": 0.5622258969012869, + "kl_loss": 0.24413618445396423, + "loss_ib": 0.007662640418857336, + "step": 1955 + }, + { + "ce_ib": 5.041411399841309, + "ce_orig": 0.7814905047416687, + "epoch": 0.5625134804802646, + "kl_loss": 0.3357033431529999, + "loss_ib": 0.008398444391787052, + "step": 1956 + }, + { + "ce_ib": 4.407586574554443, + "ce_orig": 0.6083753108978271, + "epoch": 0.5625134804802646, + "kl_loss": 0.2976855933666229, + "loss_ib": 0.00738444272428751, + "step": 1956 + }, + { + "ce_ib": 3.456092119216919, + "ce_orig": 0.5818278789520264, + "epoch": 0.5625134804802646, + "kl_loss": 0.21381860971450806, + "loss_ib": 0.00559427822008729, + "step": 1956 + }, + { + "ce_ib": 3.3865714073181152, + "ce_orig": 0.5109878778457642, + "epoch": 0.5625134804802646, + "kl_loss": 0.1840440034866333, + "loss_ib": 0.005227011162787676, + "step": 1956 + }, + { + "ce_ib": 5.505570411682129, + "ce_orig": 1.1070959568023682, + "epoch": 0.5628010640592422, + "kl_loss": 0.18553832173347473, + "loss_ib": 0.00736095430329442, + "step": 1957 + }, + { + "ce_ib": 4.772067070007324, + "ce_orig": 0.8959853649139404, + "epoch": 0.5628010640592422, + "kl_loss": 0.2142309695482254, + "loss_ib": 0.0069143762812018394, + "step": 1957 + }, + { + "ce_ib": 6.18085241317749, + "ce_orig": 1.1948823928833008, + "epoch": 0.5628010640592422, + "kl_loss": 0.2218266874551773, + "loss_ib": 0.008399119600653648, + "step": 1957 + }, + { + "ce_ib": 5.908140182495117, + "ce_orig": 1.0072635412216187, + "epoch": 0.5628010640592422, + "kl_loss": 0.1685742884874344, + "loss_ib": 0.007593883201479912, + "step": 1957 + }, + { + "ce_ib": 2.8653934001922607, + "ce_orig": 0.7492801547050476, + "epoch": 0.5630886476382199, + "kl_loss": 0.1760227084159851, + "loss_ib": 0.004625620320439339, + "step": 1958 + }, + { + "ce_ib": 3.984140157699585, + "ce_orig": 0.9309844970703125, + "epoch": 0.5630886476382199, + "kl_loss": 0.15883925557136536, + "loss_ib": 0.005572532303631306, + "step": 1958 + }, + { + "ce_ib": 4.55318546295166, + "ce_orig": 0.6771810054779053, + "epoch": 0.5630886476382199, + "kl_loss": 0.2006765604019165, + "loss_ib": 0.006559951230883598, + "step": 1958 + }, + { + "ce_ib": 4.834569454193115, + "ce_orig": 0.7299646735191345, + "epoch": 0.5630886476382199, + "kl_loss": 0.19424204528331757, + "loss_ib": 0.006776989437639713, + "step": 1958 + }, + { + "ce_ib": 5.048141002655029, + "ce_orig": 0.6003642082214355, + "epoch": 0.5633762312171975, + "kl_loss": 0.22421041131019592, + "loss_ib": 0.007290245033800602, + "step": 1959 + }, + { + "ce_ib": 2.5761818885803223, + "ce_orig": 0.5762946009635925, + "epoch": 0.5633762312171975, + "kl_loss": 0.11957596242427826, + "loss_ib": 0.0037719416432082653, + "step": 1959 + }, + { + "ce_ib": 2.9306375980377197, + "ce_orig": 0.4841289818286896, + "epoch": 0.5633762312171975, + "kl_loss": 0.18257150053977966, + "loss_ib": 0.00475635239854455, + "step": 1959 + }, + { + "ce_ib": 4.837977886199951, + "ce_orig": 0.9663442373275757, + "epoch": 0.5633762312171975, + "kl_loss": 0.21835489571094513, + "loss_ib": 0.007021526340395212, + "step": 1959 + }, + { + "epoch": 0.5636638147961751, + "grad_norm": 0.11777511239051819, + "learning_rate": 9.365888682780862e-06, + "loss": 0.8707, + "step": 1960 + }, + { + "ce_ib": 1.4187829494476318, + "ce_orig": 0.19470104575157166, + "epoch": 0.5636638147961751, + "kl_loss": 0.40988442301750183, + "loss_ib": 0.0055176266469061375, + "step": 1960 + }, + { + "ce_ib": 5.243957042694092, + "ce_orig": 0.5946865677833557, + "epoch": 0.5636638147961751, + "kl_loss": 0.31962987780570984, + "loss_ib": 0.008440256118774414, + "step": 1960 + }, + { + "ce_ib": 6.228485107421875, + "ce_orig": 0.5695993304252625, + "epoch": 0.5636638147961751, + "kl_loss": 0.3002013564109802, + "loss_ib": 0.009230498224496841, + "step": 1960 + }, + { + "ce_ib": 6.686850547790527, + "ce_orig": 1.0680514574050903, + "epoch": 0.5636638147961751, + "kl_loss": 0.16701674461364746, + "loss_ib": 0.008357018232345581, + "step": 1960 + }, + { + "ce_ib": 5.57006311416626, + "ce_orig": 0.7003214955329895, + "epoch": 0.5639513983751527, + "kl_loss": 0.22560706734657288, + "loss_ib": 0.007826133631169796, + "step": 1961 + }, + { + "ce_ib": 3.7655398845672607, + "ce_orig": 0.7719546556472778, + "epoch": 0.5639513983751527, + "kl_loss": 0.1541329324245453, + "loss_ib": 0.005306868813931942, + "step": 1961 + }, + { + "ce_ib": 3.8506884574890137, + "ce_orig": 0.6675884127616882, + "epoch": 0.5639513983751527, + "kl_loss": 0.1861284077167511, + "loss_ib": 0.005711972713470459, + "step": 1961 + }, + { + "ce_ib": 4.970664024353027, + "ce_orig": 0.8154793381690979, + "epoch": 0.5639513983751527, + "kl_loss": 0.26470455527305603, + "loss_ib": 0.007617709692567587, + "step": 1961 + }, + { + "ce_ib": 7.087484836578369, + "ce_orig": 0.9953211545944214, + "epoch": 0.5642389819541305, + "kl_loss": 0.23596073687076569, + "loss_ib": 0.009447092190384865, + "step": 1962 + }, + { + "ce_ib": 3.6978096961975098, + "ce_orig": 0.8677507638931274, + "epoch": 0.5642389819541305, + "kl_loss": 0.12703941762447357, + "loss_ib": 0.004968203604221344, + "step": 1962 + }, + { + "ce_ib": 5.717005252838135, + "ce_orig": 1.2173904180526733, + "epoch": 0.5642389819541305, + "kl_loss": 0.21575793623924255, + "loss_ib": 0.007874583825469017, + "step": 1962 + }, + { + "ce_ib": 3.6788642406463623, + "ce_orig": 0.8600021004676819, + "epoch": 0.5642389819541305, + "kl_loss": 0.14753463864326477, + "loss_ib": 0.005154210142791271, + "step": 1962 + }, + { + "ce_ib": 3.4018752574920654, + "ce_orig": 0.8169087767601013, + "epoch": 0.5645265655331081, + "kl_loss": 0.18081295490264893, + "loss_ib": 0.00521000474691391, + "step": 1963 + }, + { + "ce_ib": 4.7397894859313965, + "ce_orig": 0.7717341184616089, + "epoch": 0.5645265655331081, + "kl_loss": 0.199485182762146, + "loss_ib": 0.00673464173451066, + "step": 1963 + }, + { + "ce_ib": 4.653703689575195, + "ce_orig": 0.7700741291046143, + "epoch": 0.5645265655331081, + "kl_loss": 0.2130657583475113, + "loss_ib": 0.006784361321479082, + "step": 1963 + }, + { + "ce_ib": 4.431517601013184, + "ce_orig": 0.9778881669044495, + "epoch": 0.5645265655331081, + "kl_loss": 0.19082719087600708, + "loss_ib": 0.006339789368212223, + "step": 1963 + }, + { + "ce_ib": 7.125487327575684, + "ce_orig": 0.6698811054229736, + "epoch": 0.5648141491120857, + "kl_loss": 0.3037925362586975, + "loss_ib": 0.010163411498069763, + "step": 1964 + }, + { + "ce_ib": 2.7644705772399902, + "ce_orig": 0.27121105790138245, + "epoch": 0.5648141491120857, + "kl_loss": 0.17889803647994995, + "loss_ib": 0.004553450737148523, + "step": 1964 + }, + { + "ce_ib": 3.524197578430176, + "ce_orig": 0.6075092554092407, + "epoch": 0.5648141491120857, + "kl_loss": 0.3592822551727295, + "loss_ib": 0.00711701950058341, + "step": 1964 + }, + { + "ce_ib": 6.147427558898926, + "ce_orig": 0.9962998628616333, + "epoch": 0.5648141491120857, + "kl_loss": 0.2286912351846695, + "loss_ib": 0.008434339426457882, + "step": 1964 + }, + { + "epoch": 0.5651017326910633, + "grad_norm": 0.12749658524990082, + "learning_rate": 9.36210077112892e-06, + "loss": 0.802, + "step": 1965 + }, + { + "ce_ib": 2.299947738647461, + "ce_orig": 0.4007086753845215, + "epoch": 0.5651017326910633, + "kl_loss": 0.22868286073207855, + "loss_ib": 0.004586776252835989, + "step": 1965 + }, + { + "ce_ib": 6.62831974029541, + "ce_orig": 1.134644627571106, + "epoch": 0.5651017326910633, + "kl_loss": 0.24236369132995605, + "loss_ib": 0.009051956236362457, + "step": 1965 + }, + { + "ce_ib": 3.9246878623962402, + "ce_orig": 0.5810030102729797, + "epoch": 0.5651017326910633, + "kl_loss": 0.26141154766082764, + "loss_ib": 0.006538803223520517, + "step": 1965 + }, + { + "ce_ib": 4.731905937194824, + "ce_orig": 0.6843492984771729, + "epoch": 0.5651017326910633, + "kl_loss": 0.22076337039470673, + "loss_ib": 0.006939539685845375, + "step": 1965 + }, + { + "ce_ib": 5.879118919372559, + "ce_orig": 0.7771980166435242, + "epoch": 0.565389316270041, + "kl_loss": 0.21585160493850708, + "loss_ib": 0.008037635125219822, + "step": 1966 + }, + { + "ce_ib": 4.194392681121826, + "ce_orig": 0.7197152376174927, + "epoch": 0.565389316270041, + "kl_loss": 0.2818256616592407, + "loss_ib": 0.007012649439275265, + "step": 1966 + }, + { + "ce_ib": 6.469331741333008, + "ce_orig": 0.8204442262649536, + "epoch": 0.565389316270041, + "kl_loss": 0.16121214628219604, + "loss_ib": 0.008081452921032906, + "step": 1966 + }, + { + "ce_ib": 5.347432613372803, + "ce_orig": 0.8207641839981079, + "epoch": 0.565389316270041, + "kl_loss": 0.16065415740013123, + "loss_ib": 0.006953973788768053, + "step": 1966 + }, + { + "ce_ib": 5.6471076011657715, + "ce_orig": 0.8323034048080444, + "epoch": 0.5656768998490186, + "kl_loss": 0.22340193390846252, + "loss_ib": 0.007881127297878265, + "step": 1967 + }, + { + "ce_ib": 3.217761754989624, + "ce_orig": 0.6444844603538513, + "epoch": 0.5656768998490186, + "kl_loss": 0.17131659388542175, + "loss_ib": 0.0049309274181723595, + "step": 1967 + }, + { + "ce_ib": 7.174370765686035, + "ce_orig": 1.3534836769104004, + "epoch": 0.5656768998490186, + "kl_loss": 0.15459537506103516, + "loss_ib": 0.008720324374735355, + "step": 1967 + }, + { + "ce_ib": 4.484457969665527, + "ce_orig": 0.7245428562164307, + "epoch": 0.5656768998490186, + "kl_loss": 0.25370633602142334, + "loss_ib": 0.007021521218121052, + "step": 1967 + }, + { + "ce_ib": 6.128870487213135, + "ce_orig": 0.6025968194007874, + "epoch": 0.5659644834279962, + "kl_loss": 0.2562224864959717, + "loss_ib": 0.008691095747053623, + "step": 1968 + }, + { + "ce_ib": 8.38642406463623, + "ce_orig": 1.2454034090042114, + "epoch": 0.5659644834279962, + "kl_loss": 0.25671201944351196, + "loss_ib": 0.010953543707728386, + "step": 1968 + }, + { + "ce_ib": 2.751986265182495, + "ce_orig": 0.3705005645751953, + "epoch": 0.5659644834279962, + "kl_loss": 0.5028501749038696, + "loss_ib": 0.0077804881148040295, + "step": 1968 + }, + { + "ce_ib": 5.967759132385254, + "ce_orig": 0.9296004176139832, + "epoch": 0.5659644834279962, + "kl_loss": 0.16728214919567108, + "loss_ib": 0.007640581112354994, + "step": 1968 + }, + { + "ce_ib": 5.655499458312988, + "ce_orig": 0.9303698539733887, + "epoch": 0.566252067006974, + "kl_loss": 0.2030981481075287, + "loss_ib": 0.007686481345444918, + "step": 1969 + }, + { + "ce_ib": 3.7224225997924805, + "ce_orig": 0.40809500217437744, + "epoch": 0.566252067006974, + "kl_loss": 0.24028897285461426, + "loss_ib": 0.006125312298536301, + "step": 1969 + }, + { + "ce_ib": 4.018906593322754, + "ce_orig": 0.6669531464576721, + "epoch": 0.566252067006974, + "kl_loss": 0.22310064733028412, + "loss_ib": 0.006249913014471531, + "step": 1969 + }, + { + "ce_ib": 5.23858118057251, + "ce_orig": 0.5927353501319885, + "epoch": 0.566252067006974, + "kl_loss": 0.2642560601234436, + "loss_ib": 0.007881141267716885, + "step": 1969 + }, + { + "epoch": 0.5665396505859516, + "grad_norm": 0.11762264370918274, + "learning_rate": 9.358302350163758e-06, + "loss": 0.8599, + "step": 1970 + }, + { + "ce_ib": 4.985979080200195, + "ce_orig": 0.9551448225975037, + "epoch": 0.5665396505859516, + "kl_loss": 0.399181604385376, + "loss_ib": 0.008977795019745827, + "step": 1970 + }, + { + "ce_ib": 4.591373443603516, + "ce_orig": 0.8099931478500366, + "epoch": 0.5665396505859516, + "kl_loss": 0.19217705726623535, + "loss_ib": 0.006513143423944712, + "step": 1970 + }, + { + "ce_ib": 6.659412860870361, + "ce_orig": 1.127098798751831, + "epoch": 0.5665396505859516, + "kl_loss": 0.21108250319957733, + "loss_ib": 0.008770237676799297, + "step": 1970 + }, + { + "ce_ib": 5.537312030792236, + "ce_orig": 0.7628366947174072, + "epoch": 0.5665396505859516, + "kl_loss": 0.20539790391921997, + "loss_ib": 0.00759129086509347, + "step": 1970 + }, + { + "ce_ib": 3.149667978286743, + "ce_orig": 0.7977863550186157, + "epoch": 0.5668272341649292, + "kl_loss": 0.19728781282901764, + "loss_ib": 0.005122545640915632, + "step": 1971 + }, + { + "ce_ib": 3.710076332092285, + "ce_orig": 0.5578461289405823, + "epoch": 0.5668272341649292, + "kl_loss": 0.20799040794372559, + "loss_ib": 0.005789980757981539, + "step": 1971 + }, + { + "ce_ib": 6.388326644897461, + "ce_orig": 1.2302563190460205, + "epoch": 0.5668272341649292, + "kl_loss": 0.23331697285175323, + "loss_ib": 0.008721495978534222, + "step": 1971 + }, + { + "ce_ib": 5.520804405212402, + "ce_orig": 0.9213300943374634, + "epoch": 0.5668272341649292, + "kl_loss": 0.288860559463501, + "loss_ib": 0.008409409783780575, + "step": 1971 + }, + { + "ce_ib": 6.34333610534668, + "ce_orig": 0.6838300228118896, + "epoch": 0.5671148177439068, + "kl_loss": 0.2031092792749405, + "loss_ib": 0.008374428376555443, + "step": 1972 + }, + { + "ce_ib": 2.3525428771972656, + "ce_orig": 0.6577918529510498, + "epoch": 0.5671148177439068, + "kl_loss": 0.1439896523952484, + "loss_ib": 0.003792439354583621, + "step": 1972 + }, + { + "ce_ib": 5.995169162750244, + "ce_orig": 1.369937777519226, + "epoch": 0.5671148177439068, + "kl_loss": 0.25531530380249023, + "loss_ib": 0.008548322133719921, + "step": 1972 + }, + { + "ce_ib": 9.326186180114746, + "ce_orig": 1.1996033191680908, + "epoch": 0.5671148177439068, + "kl_loss": 0.2241915613412857, + "loss_ib": 0.01156810112297535, + "step": 1972 + }, + { + "ce_ib": 4.266485214233398, + "ce_orig": 0.7407187223434448, + "epoch": 0.5674024013228844, + "kl_loss": 0.15525701642036438, + "loss_ib": 0.005819055251777172, + "step": 1973 + }, + { + "ce_ib": 6.443141937255859, + "ce_orig": 1.002044916152954, + "epoch": 0.5674024013228844, + "kl_loss": 0.19105428457260132, + "loss_ib": 0.008353685028851032, + "step": 1973 + }, + { + "ce_ib": 5.837249279022217, + "ce_orig": 1.0835412740707397, + "epoch": 0.5674024013228844, + "kl_loss": 0.2187289148569107, + "loss_ib": 0.008024537935853004, + "step": 1973 + }, + { + "ce_ib": 6.973402976989746, + "ce_orig": 1.2432231903076172, + "epoch": 0.5674024013228844, + "kl_loss": 0.20823659002780914, + "loss_ib": 0.009055769070982933, + "step": 1973 + }, + { + "ce_ib": 4.132625102996826, + "ce_orig": 0.7623564004898071, + "epoch": 0.5676899849018621, + "kl_loss": 0.155003622174263, + "loss_ib": 0.005682661198079586, + "step": 1974 + }, + { + "ce_ib": 4.418184757232666, + "ce_orig": 0.865897536277771, + "epoch": 0.5676899849018621, + "kl_loss": 0.23717643320560455, + "loss_ib": 0.006789948791265488, + "step": 1974 + }, + { + "ce_ib": 8.083592414855957, + "ce_orig": 1.3602644205093384, + "epoch": 0.5676899849018621, + "kl_loss": 0.18909840285778046, + "loss_ib": 0.009974576532840729, + "step": 1974 + }, + { + "ce_ib": 4.652640342712402, + "ce_orig": 0.973773717880249, + "epoch": 0.5676899849018621, + "kl_loss": 0.19918793439865112, + "loss_ib": 0.0066445195116102695, + "step": 1974 + }, + { + "epoch": 0.5679775684808397, + "grad_norm": 0.1255088448524475, + "learning_rate": 9.35449342903665e-06, + "loss": 0.9833, + "step": 1975 + }, + { + "ce_ib": 8.040228843688965, + "ce_orig": 0.7436036467552185, + "epoch": 0.5679775684808397, + "kl_loss": 0.2832046449184418, + "loss_ib": 0.010872275568544865, + "step": 1975 + }, + { + "ce_ib": 6.236600875854492, + "ce_orig": 1.1183065176010132, + "epoch": 0.5679775684808397, + "kl_loss": 0.26579269766807556, + "loss_ib": 0.00889452826231718, + "step": 1975 + }, + { + "ce_ib": 7.973631381988525, + "ce_orig": 1.1876856088638306, + "epoch": 0.5679775684808397, + "kl_loss": 0.23983827233314514, + "loss_ib": 0.010372013784945011, + "step": 1975 + }, + { + "ce_ib": 5.137147426605225, + "ce_orig": 0.3782559335231781, + "epoch": 0.5679775684808397, + "kl_loss": 0.6367517709732056, + "loss_ib": 0.011504664085805416, + "step": 1975 + }, + { + "ce_ib": 8.592161178588867, + "ce_orig": 1.8806641101837158, + "epoch": 0.5682651520598174, + "kl_loss": 0.20329293608665466, + "loss_ib": 0.010625090450048447, + "step": 1976 + }, + { + "ce_ib": 7.720051288604736, + "ce_orig": 1.0171343088150024, + "epoch": 0.5682651520598174, + "kl_loss": 0.262137234210968, + "loss_ib": 0.010341423563659191, + "step": 1976 + }, + { + "ce_ib": 4.9487762451171875, + "ce_orig": 0.7323424816131592, + "epoch": 0.5682651520598174, + "kl_loss": 0.313734769821167, + "loss_ib": 0.008086123503744602, + "step": 1976 + }, + { + "ce_ib": 3.0523829460144043, + "ce_orig": 0.44585099816322327, + "epoch": 0.5682651520598174, + "kl_loss": 0.29260846972465515, + "loss_ib": 0.005978467408567667, + "step": 1976 + }, + { + "ce_ib": 4.261775493621826, + "ce_orig": 0.7914097905158997, + "epoch": 0.568552735638795, + "kl_loss": 0.2280750870704651, + "loss_ib": 0.006542526185512543, + "step": 1977 + }, + { + "ce_ib": 4.111820697784424, + "ce_orig": 0.8065600991249084, + "epoch": 0.568552735638795, + "kl_loss": 0.19572162628173828, + "loss_ib": 0.006069036666303873, + "step": 1977 + }, + { + "ce_ib": 3.917246103286743, + "ce_orig": 0.6122181415557861, + "epoch": 0.568552735638795, + "kl_loss": 0.27111905813217163, + "loss_ib": 0.006628436967730522, + "step": 1977 + }, + { + "ce_ib": 5.574148178100586, + "ce_orig": 0.9698813557624817, + "epoch": 0.568552735638795, + "kl_loss": 0.2713298797607422, + "loss_ib": 0.008287446573376656, + "step": 1977 + }, + { + "ce_ib": 4.16474723815918, + "ce_orig": 0.9032987952232361, + "epoch": 0.5688403192177727, + "kl_loss": 0.1838064193725586, + "loss_ib": 0.006002811249345541, + "step": 1978 + }, + { + "ce_ib": 4.287901401519775, + "ce_orig": 0.8550389409065247, + "epoch": 0.5688403192177727, + "kl_loss": 0.2831040322780609, + "loss_ib": 0.007118941284716129, + "step": 1978 + }, + { + "ce_ib": 8.180766105651855, + "ce_orig": 1.2101643085479736, + "epoch": 0.5688403192177727, + "kl_loss": 0.26428651809692383, + "loss_ib": 0.010823630727827549, + "step": 1978 + }, + { + "ce_ib": 4.760415077209473, + "ce_orig": 0.8011019825935364, + "epoch": 0.5688403192177727, + "kl_loss": 0.3007705807685852, + "loss_ib": 0.007768120616674423, + "step": 1978 + }, + { + "ce_ib": 5.779112339019775, + "ce_orig": 1.2069902420043945, + "epoch": 0.5691279027967503, + "kl_loss": 0.2669152021408081, + "loss_ib": 0.008448264561593533, + "step": 1979 + }, + { + "ce_ib": 3.393887519836426, + "ce_orig": 0.5832438468933105, + "epoch": 0.5691279027967503, + "kl_loss": 0.146831214427948, + "loss_ib": 0.004862199537456036, + "step": 1979 + }, + { + "ce_ib": 6.103104591369629, + "ce_orig": 1.0624935626983643, + "epoch": 0.5691279027967503, + "kl_loss": 0.34160977602005005, + "loss_ib": 0.009519202634692192, + "step": 1979 + }, + { + "ce_ib": 2.8118221759796143, + "ce_orig": 0.5914442539215088, + "epoch": 0.5691279027967503, + "kl_loss": 0.197896808385849, + "loss_ib": 0.004790790379047394, + "step": 1979 + }, + { + "epoch": 0.5694154863757279, + "grad_norm": 0.12583377957344055, + "learning_rate": 9.35067401692417e-06, + "loss": 0.9343, + "step": 1980 + }, + { + "ce_ib": 6.383730411529541, + "ce_orig": 1.111653208732605, + "epoch": 0.5694154863757279, + "kl_loss": 0.22049827873706818, + "loss_ib": 0.008588713593780994, + "step": 1980 + }, + { + "ce_ib": 4.164979934692383, + "ce_orig": 0.4715644121170044, + "epoch": 0.5694154863757279, + "kl_loss": 0.19932615756988525, + "loss_ib": 0.0061582415364682674, + "step": 1980 + }, + { + "ce_ib": 3.7962656021118164, + "ce_orig": 0.47578591108322144, + "epoch": 0.5694154863757279, + "kl_loss": 0.4800046980381012, + "loss_ib": 0.008596313185989857, + "step": 1980 + }, + { + "ce_ib": 4.593181610107422, + "ce_orig": 0.6127696633338928, + "epoch": 0.5694154863757279, + "kl_loss": 0.22612276673316956, + "loss_ib": 0.006854408886283636, + "step": 1980 + }, + { + "ce_ib": 5.146809101104736, + "ce_orig": 0.36611780524253845, + "epoch": 0.5697030699547055, + "kl_loss": 0.35808759927749634, + "loss_ib": 0.008727684617042542, + "step": 1981 + }, + { + "ce_ib": 6.117854595184326, + "ce_orig": 0.8459972739219666, + "epoch": 0.5697030699547055, + "kl_loss": 0.2890790104866028, + "loss_ib": 0.009008645080029964, + "step": 1981 + }, + { + "ce_ib": 4.888398170471191, + "ce_orig": 0.5127837657928467, + "epoch": 0.5697030699547055, + "kl_loss": 0.24100396037101746, + "loss_ib": 0.007298437878489494, + "step": 1981 + }, + { + "ce_ib": 4.431370735168457, + "ce_orig": 0.4488409757614136, + "epoch": 0.5697030699547055, + "kl_loss": 0.26973119378089905, + "loss_ib": 0.00712868245318532, + "step": 1981 + }, + { + "ce_ib": 2.463197946548462, + "ce_orig": 0.5498092770576477, + "epoch": 0.5699906535336833, + "kl_loss": 0.12848663330078125, + "loss_ib": 0.00374806416220963, + "step": 1982 + }, + { + "ce_ib": 4.813397407531738, + "ce_orig": 0.9176458120346069, + "epoch": 0.5699906535336833, + "kl_loss": 0.23598945140838623, + "loss_ib": 0.007173291873186827, + "step": 1982 + }, + { + "ce_ib": 6.061206340789795, + "ce_orig": 1.3012479543685913, + "epoch": 0.5699906535336833, + "kl_loss": 0.15754292905330658, + "loss_ib": 0.007636635564267635, + "step": 1982 + }, + { + "ce_ib": 3.3574044704437256, + "ce_orig": 0.5007990598678589, + "epoch": 0.5699906535336833, + "kl_loss": 0.2878541648387909, + "loss_ib": 0.00623594643548131, + "step": 1982 + }, + { + "ce_ib": 4.542166233062744, + "ce_orig": 1.2737343311309814, + "epoch": 0.5702782371126609, + "kl_loss": 0.16203643381595612, + "loss_ib": 0.006162530742585659, + "step": 1983 + }, + { + "ce_ib": 5.179211139678955, + "ce_orig": 0.9539887309074402, + "epoch": 0.5702782371126609, + "kl_loss": 0.34270644187927246, + "loss_ib": 0.008606275543570518, + "step": 1983 + }, + { + "ce_ib": 4.461246013641357, + "ce_orig": 0.7930008172988892, + "epoch": 0.5702782371126609, + "kl_loss": 0.3119294345378876, + "loss_ib": 0.007580540142953396, + "step": 1983 + }, + { + "ce_ib": 3.8203072547912598, + "ce_orig": 0.4939483106136322, + "epoch": 0.5702782371126609, + "kl_loss": 0.18022455275058746, + "loss_ib": 0.005622552707791328, + "step": 1983 + }, + { + "ce_ib": 3.5545308589935303, + "ce_orig": 0.4724838137626648, + "epoch": 0.5705658206916385, + "kl_loss": 0.17989501357078552, + "loss_ib": 0.005353481043130159, + "step": 1984 + }, + { + "ce_ib": 5.211380958557129, + "ce_orig": 0.8598366379737854, + "epoch": 0.5705658206916385, + "kl_loss": 0.20496268570423126, + "loss_ib": 0.007261008024215698, + "step": 1984 + }, + { + "ce_ib": 6.27523136138916, + "ce_orig": 1.1058155298233032, + "epoch": 0.5705658206916385, + "kl_loss": 0.23712310194969177, + "loss_ib": 0.008646462112665176, + "step": 1984 + }, + { + "ce_ib": 5.946166038513184, + "ce_orig": 0.9420840740203857, + "epoch": 0.5705658206916385, + "kl_loss": 0.23133786022663116, + "loss_ib": 0.008259544149041176, + "step": 1984 + }, + { + "epoch": 0.5708534042706161, + "grad_norm": 0.12770475447177887, + "learning_rate": 9.346844123028172e-06, + "loss": 0.854, + "step": 1985 + }, + { + "ce_ib": 5.662142276763916, + "ce_orig": 0.8796694874763489, + "epoch": 0.5708534042706161, + "kl_loss": 0.19070428609848022, + "loss_ib": 0.007569185458123684, + "step": 1985 + }, + { + "ce_ib": 4.19233512878418, + "ce_orig": 0.9024421572685242, + "epoch": 0.5708534042706161, + "kl_loss": 0.2900976836681366, + "loss_ib": 0.007093311753123999, + "step": 1985 + }, + { + "ce_ib": 7.417691707611084, + "ce_orig": 1.1089247465133667, + "epoch": 0.5708534042706161, + "kl_loss": 0.217866450548172, + "loss_ib": 0.009596356190741062, + "step": 1985 + }, + { + "ce_ib": 6.199939727783203, + "ce_orig": 1.012454867362976, + "epoch": 0.5708534042706161, + "kl_loss": 0.22154691815376282, + "loss_ib": 0.008415409363806248, + "step": 1985 + }, + { + "ce_ib": 4.7033257484436035, + "ce_orig": 0.695756733417511, + "epoch": 0.5711409878495938, + "kl_loss": 0.19042037427425385, + "loss_ib": 0.006607529241591692, + "step": 1986 + }, + { + "ce_ib": 5.706419467926025, + "ce_orig": 0.5783017873764038, + "epoch": 0.5711409878495938, + "kl_loss": 0.16660155355930328, + "loss_ib": 0.007372434716671705, + "step": 1986 + }, + { + "ce_ib": 6.473813056945801, + "ce_orig": 0.6710724830627441, + "epoch": 0.5711409878495938, + "kl_loss": 0.48526352643966675, + "loss_ib": 0.011326448991894722, + "step": 1986 + }, + { + "ce_ib": 5.012685775756836, + "ce_orig": 0.8257612586021423, + "epoch": 0.5711409878495938, + "kl_loss": 0.22710269689559937, + "loss_ib": 0.007283712271600962, + "step": 1986 + }, + { + "ce_ib": 4.048294544219971, + "ce_orig": 0.5852899551391602, + "epoch": 0.5714285714285714, + "kl_loss": 0.13435371220111847, + "loss_ib": 0.00539183197543025, + "step": 1987 + }, + { + "ce_ib": 5.982120037078857, + "ce_orig": 0.9091359376907349, + "epoch": 0.5714285714285714, + "kl_loss": 0.17164137959480286, + "loss_ib": 0.007698533590883017, + "step": 1987 + }, + { + "ce_ib": 5.078379154205322, + "ce_orig": 1.0537258386611938, + "epoch": 0.5714285714285714, + "kl_loss": 0.17836973071098328, + "loss_ib": 0.006862076465040445, + "step": 1987 + }, + { + "ce_ib": 3.4556667804718018, + "ce_orig": 0.6418643593788147, + "epoch": 0.5714285714285714, + "kl_loss": 0.23529046773910522, + "loss_ib": 0.005808571353554726, + "step": 1987 + }, + { + "ce_ib": 3.838954448699951, + "ce_orig": 0.5713245868682861, + "epoch": 0.571716155007549, + "kl_loss": 0.14425553381443024, + "loss_ib": 0.005281509831547737, + "step": 1988 + }, + { + "ce_ib": 3.5798120498657227, + "ce_orig": 0.308655709028244, + "epoch": 0.571716155007549, + "kl_loss": 0.49981340765953064, + "loss_ib": 0.008577946573495865, + "step": 1988 + }, + { + "ce_ib": 3.3572182655334473, + "ce_orig": 0.7377309203147888, + "epoch": 0.571716155007549, + "kl_loss": 0.16723394393920898, + "loss_ib": 0.00502955773845315, + "step": 1988 + }, + { + "ce_ib": 4.432819366455078, + "ce_orig": 0.8372645974159241, + "epoch": 0.571716155007549, + "kl_loss": 0.2893436551094055, + "loss_ib": 0.007326256018131971, + "step": 1988 + }, + { + "ce_ib": 3.809474229812622, + "ce_orig": 0.7397820353507996, + "epoch": 0.5720037385865268, + "kl_loss": 0.1930888444185257, + "loss_ib": 0.00574036268517375, + "step": 1989 + }, + { + "ce_ib": 3.7399144172668457, + "ce_orig": 0.5138627886772156, + "epoch": 0.5720037385865268, + "kl_loss": 0.2592537999153137, + "loss_ib": 0.006332451943308115, + "step": 1989 + }, + { + "ce_ib": 3.4589803218841553, + "ce_orig": 0.6595289707183838, + "epoch": 0.5720037385865268, + "kl_loss": 0.1255977749824524, + "loss_ib": 0.004714957904070616, + "step": 1989 + }, + { + "ce_ib": 4.204591751098633, + "ce_orig": 0.8881221413612366, + "epoch": 0.5720037385865268, + "kl_loss": 0.17572838068008423, + "loss_ib": 0.005961875896900892, + "step": 1989 + }, + { + "epoch": 0.5722913221655044, + "grad_norm": 0.15225893259048462, + "learning_rate": 9.343003756575758e-06, + "loss": 0.8669, + "step": 1990 + }, + { + "ce_ib": 3.736387014389038, + "ce_orig": 0.4147120714187622, + "epoch": 0.5722913221655044, + "kl_loss": 0.23932798206806183, + "loss_ib": 0.006129667162895203, + "step": 1990 + }, + { + "ce_ib": 4.791964530944824, + "ce_orig": 1.0321811437606812, + "epoch": 0.5722913221655044, + "kl_loss": 0.1818554401397705, + "loss_ib": 0.00661051832139492, + "step": 1990 + }, + { + "ce_ib": 6.760359287261963, + "ce_orig": 1.4710983037948608, + "epoch": 0.5722913221655044, + "kl_loss": 0.12290021777153015, + "loss_ib": 0.007989360950887203, + "step": 1990 + }, + { + "ce_ib": 4.722901344299316, + "ce_orig": 0.6649274230003357, + "epoch": 0.5722913221655044, + "kl_loss": 0.31279200315475464, + "loss_ib": 0.007850821129977703, + "step": 1990 + }, + { + "ce_ib": 4.941730976104736, + "ce_orig": 0.7840262651443481, + "epoch": 0.572578905744482, + "kl_loss": 0.22302401065826416, + "loss_ib": 0.007171971257776022, + "step": 1991 + }, + { + "ce_ib": 4.915649890899658, + "ce_orig": 1.1190143823623657, + "epoch": 0.572578905744482, + "kl_loss": 0.16858713328838348, + "loss_ib": 0.0066015212796628475, + "step": 1991 + }, + { + "ce_ib": 8.89372444152832, + "ce_orig": 1.5894157886505127, + "epoch": 0.572578905744482, + "kl_loss": 0.22975695133209229, + "loss_ib": 0.011191293597221375, + "step": 1991 + }, + { + "ce_ib": 4.597128391265869, + "ce_orig": 0.8089755177497864, + "epoch": 0.572578905744482, + "kl_loss": 0.30403149127960205, + "loss_ib": 0.007637443486601114, + "step": 1991 + }, + { + "ce_ib": 7.109074592590332, + "ce_orig": 0.5922656059265137, + "epoch": 0.5728664893234596, + "kl_loss": 0.34544605016708374, + "loss_ib": 0.010563535615801811, + "step": 1992 + }, + { + "ce_ib": 3.9104511737823486, + "ce_orig": 0.510415256023407, + "epoch": 0.5728664893234596, + "kl_loss": 0.23136720061302185, + "loss_ib": 0.006224123295396566, + "step": 1992 + }, + { + "ce_ib": 4.182595729827881, + "ce_orig": 0.814848005771637, + "epoch": 0.5728664893234596, + "kl_loss": 0.14320699870586395, + "loss_ib": 0.005614665802568197, + "step": 1992 + }, + { + "ce_ib": 3.3440380096435547, + "ce_orig": 0.6243037581443787, + "epoch": 0.5728664893234596, + "kl_loss": 0.1349714994430542, + "loss_ib": 0.004693753086030483, + "step": 1992 + }, + { + "ce_ib": 3.631105899810791, + "ce_orig": 0.6191436052322388, + "epoch": 0.5731540729024373, + "kl_loss": 0.20608216524124146, + "loss_ib": 0.005691927392035723, + "step": 1993 + }, + { + "ce_ib": 5.589832305908203, + "ce_orig": 1.1829005479812622, + "epoch": 0.5731540729024373, + "kl_loss": 0.22724345326423645, + "loss_ib": 0.007862267084419727, + "step": 1993 + }, + { + "ce_ib": 3.7915942668914795, + "ce_orig": 0.6931657791137695, + "epoch": 0.5731540729024373, + "kl_loss": 0.18231695890426636, + "loss_ib": 0.005614763591438532, + "step": 1993 + }, + { + "ce_ib": 4.375422954559326, + "ce_orig": 0.745322585105896, + "epoch": 0.5731540729024373, + "kl_loss": 0.2949674725532532, + "loss_ib": 0.007325096987187862, + "step": 1993 + }, + { + "ce_ib": 3.4465203285217285, + "ce_orig": 0.5611634850502014, + "epoch": 0.5734416564814149, + "kl_loss": 0.2395157516002655, + "loss_ib": 0.005841677542775869, + "step": 1994 + }, + { + "ce_ib": 8.060595512390137, + "ce_orig": 1.1999075412750244, + "epoch": 0.5734416564814149, + "kl_loss": 0.1809830665588379, + "loss_ib": 0.009870425797998905, + "step": 1994 + }, + { + "ce_ib": 3.0074267387390137, + "ce_orig": 0.6377060413360596, + "epoch": 0.5734416564814149, + "kl_loss": 0.21215245127677917, + "loss_ib": 0.00512895081192255, + "step": 1994 + }, + { + "ce_ib": 6.93182373046875, + "ce_orig": 0.8554105162620544, + "epoch": 0.5734416564814149, + "kl_loss": 0.2965483069419861, + "loss_ib": 0.009897306561470032, + "step": 1994 + }, + { + "epoch": 0.5737292400603925, + "grad_norm": 0.12339665740728378, + "learning_rate": 9.339152926819259e-06, + "loss": 0.84, + "step": 1995 + }, + { + "ce_ib": 6.209877967834473, + "ce_orig": 1.1827176809310913, + "epoch": 0.5737292400603925, + "kl_loss": 0.2422569990158081, + "loss_ib": 0.008632448501884937, + "step": 1995 + }, + { + "ce_ib": 4.090214252471924, + "ce_orig": 0.5631054639816284, + "epoch": 0.5737292400603925, + "kl_loss": 0.18600359559059143, + "loss_ib": 0.0059502506628632545, + "step": 1995 + }, + { + "ce_ib": 7.50163459777832, + "ce_orig": 1.0576012134552002, + "epoch": 0.5737292400603925, + "kl_loss": 0.19061043858528137, + "loss_ib": 0.009407739154994488, + "step": 1995 + }, + { + "ce_ib": 3.8708176612854004, + "ce_orig": 0.44926437735557556, + "epoch": 0.5737292400603925, + "kl_loss": 0.31955450773239136, + "loss_ib": 0.007066363003104925, + "step": 1995 + }, + { + "ce_ib": 3.8656487464904785, + "ce_orig": 0.6919336915016174, + "epoch": 0.5740168236393702, + "kl_loss": 0.20553775131702423, + "loss_ib": 0.005921026226133108, + "step": 1996 + }, + { + "ce_ib": 2.9704113006591797, + "ce_orig": 0.7503409385681152, + "epoch": 0.5740168236393702, + "kl_loss": 0.15444160997867584, + "loss_ib": 0.004514827858656645, + "step": 1996 + }, + { + "ce_ib": 2.5017213821411133, + "ce_orig": 0.47855257987976074, + "epoch": 0.5740168236393702, + "kl_loss": 0.17140816152095795, + "loss_ib": 0.004215802997350693, + "step": 1996 + }, + { + "ce_ib": 6.732548236846924, + "ce_orig": 1.4294027090072632, + "epoch": 0.5740168236393702, + "kl_loss": 0.3413958251476288, + "loss_ib": 0.010146507062017918, + "step": 1996 + }, + { + "ce_ib": 4.873874187469482, + "ce_orig": 0.8762377500534058, + "epoch": 0.5743044072183479, + "kl_loss": 0.1936822235584259, + "loss_ib": 0.006810695864260197, + "step": 1997 + }, + { + "ce_ib": 3.110563039779663, + "ce_orig": 0.5195457339286804, + "epoch": 0.5743044072183479, + "kl_loss": 0.12195809185504913, + "loss_ib": 0.004330143798142672, + "step": 1997 + }, + { + "ce_ib": 3.6337907314300537, + "ce_orig": 0.7683537602424622, + "epoch": 0.5743044072183479, + "kl_loss": 0.22961460053920746, + "loss_ib": 0.005929936654865742, + "step": 1997 + }, + { + "ce_ib": 5.1048054695129395, + "ce_orig": 0.8128162622451782, + "epoch": 0.5743044072183479, + "kl_loss": 0.20120778679847717, + "loss_ib": 0.007116883993148804, + "step": 1997 + }, + { + "ce_ib": 2.6532974243164062, + "ce_orig": 0.5304936170578003, + "epoch": 0.5745919907973255, + "kl_loss": 0.1840747892856598, + "loss_ib": 0.0044940453954041, + "step": 1998 + }, + { + "ce_ib": 4.239226341247559, + "ce_orig": 0.8676175475120544, + "epoch": 0.5745919907973255, + "kl_loss": 0.26321902871131897, + "loss_ib": 0.006871416233479977, + "step": 1998 + }, + { + "ce_ib": 7.493546009063721, + "ce_orig": 1.3227934837341309, + "epoch": 0.5745919907973255, + "kl_loss": 0.16562724113464355, + "loss_ib": 0.009149818681180477, + "step": 1998 + }, + { + "ce_ib": 3.1623857021331787, + "ce_orig": 0.6350651383399963, + "epoch": 0.5745919907973255, + "kl_loss": 0.18704582750797272, + "loss_ib": 0.0050328439101576805, + "step": 1998 + }, + { + "ce_ib": 2.9258923530578613, + "ce_orig": 0.5514384508132935, + "epoch": 0.5748795743763031, + "kl_loss": 0.16143694519996643, + "loss_ib": 0.004540261812508106, + "step": 1999 + }, + { + "ce_ib": 5.353640079498291, + "ce_orig": 0.828662633895874, + "epoch": 0.5748795743763031, + "kl_loss": 0.18023285269737244, + "loss_ib": 0.0071559688076376915, + "step": 1999 + }, + { + "ce_ib": 6.6875481605529785, + "ce_orig": 0.8465195894241333, + "epoch": 0.5748795743763031, + "kl_loss": 0.20709696412086487, + "loss_ib": 0.008758516982197762, + "step": 1999 + }, + { + "ce_ib": 5.4458537101745605, + "ce_orig": 0.7180911302566528, + "epoch": 0.5748795743763031, + "kl_loss": 0.22075165808200836, + "loss_ib": 0.007653370499610901, + "step": 1999 + }, + { + "epoch": 0.5751671579552807, + "grad_norm": 0.11972998082637787, + "learning_rate": 9.335291643036221e-06, + "loss": 0.8855, + "step": 2000 + } + ], + "logging_steps": 5, + "max_steps": 10434, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}