| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "ce_loss_13": 11.519832849502563, |
| "ce_loss_26": 11.473536491394043, |
| "ce_loss_39": 11.263565063476562, |
| "ce_loss_52": 1.3852829337120056, |
| "ce_loss_7": 11.56409740447998, |
| "epoch": 0.0001, |
| "grad_norm": 22.293954988093517, |
| "kl_loss_13": 20864.0, |
| "kl_loss_26": 20736.0, |
| "kl_loss_39": 20320.0, |
| "kl_loss_7": 20992.0, |
| "learning_rate": 1e-05, |
| "loss": 41440.0, |
| "step": 1 |
| }, |
| { |
| "ce_loss_13": 11.513921552234226, |
| "ce_loss_26": 11.469077931510078, |
| "ce_loss_39": 11.246019893222385, |
| "ce_loss_52": 1.4558950497044458, |
| "ce_loss_7": 11.55848307079739, |
| "epoch": 0.001, |
| "grad_norm": 23.161174410395507, |
| "kl_loss_13": 20800.0, |
| "kl_loss_26": 20696.88888888889, |
| "kl_loss_39": 20227.555555555555, |
| "kl_loss_7": 20881.777777777777, |
| "learning_rate": 0.0001, |
| "loss": 41384.0, |
| "step": 10 |
| }, |
| { |
| "ce_loss_13": 11.426759386062622, |
| "ce_loss_26": 11.41293363571167, |
| "ce_loss_39": 11.229010510444642, |
| "ce_loss_52": 1.4324860751628876, |
| "ce_loss_7": 11.462353825569153, |
| "epoch": 0.002, |
| "grad_norm": 38.50857397395853, |
| "kl_loss_13": 20668.8, |
| "kl_loss_26": 20640.0, |
| "kl_loss_39": 20256.0, |
| "kl_loss_7": 20745.6, |
| "learning_rate": 0.0002, |
| "loss": 41179.2, |
| "step": 20 |
| }, |
| { |
| "ce_loss_13": 10.954976797103882, |
| "ce_loss_26": 11.087984251976014, |
| "ce_loss_39": 11.10265805721283, |
| "ce_loss_52": 1.4276391446590424, |
| "ce_loss_7": 10.940680837631225, |
| "epoch": 0.003, |
| "grad_norm": 58.08626567131467, |
| "kl_loss_13": 19702.4, |
| "kl_loss_26": 19977.6, |
| "kl_loss_39": 20028.8, |
| "kl_loss_7": 19680.0, |
| "learning_rate": 0.0003, |
| "loss": 39668.0, |
| "step": 30 |
| }, |
| { |
| "ce_loss_13": 10.308717799186706, |
| "ce_loss_26": 10.375125074386597, |
| "ce_loss_39": 10.562542247772218, |
| "ce_loss_52": 1.455844309926033, |
| "ce_loss_7": 10.312761902809143, |
| "epoch": 0.004, |
| "grad_norm": 30.451988937114738, |
| "kl_loss_13": 18307.2, |
| "kl_loss_26": 18438.4, |
| "kl_loss_39": 18832.0, |
| "kl_loss_7": 18313.6, |
| "learning_rate": 0.0004, |
| "loss": 36999.2, |
| "step": 40 |
| }, |
| { |
| "ce_loss_13": 10.173566651344299, |
| "ce_loss_26": 10.188505339622498, |
| "ce_loss_39": 10.173172044754029, |
| "ce_loss_52": 1.4577810317277908, |
| "ce_loss_7": 10.182775902748109, |
| "epoch": 0.005, |
| "grad_norm": 37.851028798241174, |
| "kl_loss_13": 18006.4, |
| "kl_loss_26": 18028.8, |
| "kl_loss_39": 18012.8, |
| "kl_loss_7": 18022.4, |
| "learning_rate": 0.0005, |
| "loss": 36191.2, |
| "step": 50 |
| }, |
| { |
| "ce_loss_13": 10.071360087394714, |
| "ce_loss_26": 10.092596936225892, |
| "ce_loss_39": 10.072972583770753, |
| "ce_loss_52": 1.428243064880371, |
| "ce_loss_7": 10.105072927474975, |
| "epoch": 0.006, |
| "grad_norm": 45.98715921867029, |
| "kl_loss_13": 17872.0, |
| "kl_loss_26": 17907.2, |
| "kl_loss_39": 17878.4, |
| "kl_loss_7": 17936.0, |
| "learning_rate": 0.0006, |
| "loss": 35728.0, |
| "step": 60 |
| }, |
| { |
| "ce_loss_13": 9.994266033172607, |
| "ce_loss_26": 10.000447010993957, |
| "ce_loss_39": 9.958984637260437, |
| "ce_loss_52": 1.392430166900158, |
| "ce_loss_7": 10.041595196723938, |
| "epoch": 0.007, |
| "grad_norm": 53.56384816487026, |
| "kl_loss_13": 17750.4, |
| "kl_loss_26": 17763.2, |
| "kl_loss_39": 17667.2, |
| "kl_loss_7": 17846.4, |
| "learning_rate": 0.0007, |
| "loss": 35411.2, |
| "step": 70 |
| }, |
| { |
| "ce_loss_13": 9.870160865783692, |
| "ce_loss_26": 9.870406699180602, |
| "ce_loss_39": 9.815650677680969, |
| "ce_loss_52": 1.4188331544399262, |
| "ce_loss_7": 9.92598659992218, |
| "epoch": 0.008, |
| "grad_norm": 58.363906192597035, |
| "kl_loss_13": 17475.2, |
| "kl_loss_26": 17484.8, |
| "kl_loss_39": 17366.4, |
| "kl_loss_7": 17587.2, |
| "learning_rate": 0.0008, |
| "loss": 35010.4, |
| "step": 80 |
| }, |
| { |
| "ce_loss_13": 9.786613607406617, |
| "ce_loss_26": 9.77514407634735, |
| "ce_loss_39": 9.697125172615051, |
| "ce_loss_52": 1.4261163920164108, |
| "ce_loss_7": 9.84011538028717, |
| "epoch": 0.009, |
| "grad_norm": 57.597510936184484, |
| "kl_loss_13": 17267.2, |
| "kl_loss_26": 17232.0, |
| "kl_loss_39": 17065.6, |
| "kl_loss_7": 17376.0, |
| "learning_rate": 0.0009000000000000001, |
| "loss": 34545.6, |
| "step": 90 |
| }, |
| { |
| "ce_loss_13": 9.70496118068695, |
| "ce_loss_26": 9.680623888969421, |
| "ce_loss_39": 9.57672963142395, |
| "ce_loss_52": 1.4332450866699218, |
| "ce_loss_7": 9.75693221092224, |
| "epoch": 0.01, |
| "grad_norm": 56.92102830978135, |
| "kl_loss_13": 17075.2, |
| "kl_loss_26": 17030.4, |
| "kl_loss_39": 16814.4, |
| "kl_loss_7": 17187.2, |
| "learning_rate": 0.001, |
| "loss": 34141.6, |
| "step": 100 |
| }, |
| { |
| "ce_loss_13": 9.62608094215393, |
| "ce_loss_26": 9.584086346626282, |
| "ce_loss_39": 9.469242024421693, |
| "ce_loss_52": 1.4119121626019477, |
| "ce_loss_7": 9.681734418869018, |
| "epoch": 0.011, |
| "grad_norm": 55.54751251170693, |
| "kl_loss_13": 16956.8, |
| "kl_loss_26": 16856.0, |
| "kl_loss_39": 16632.0, |
| "kl_loss_7": 17075.2, |
| "learning_rate": 0.0009999974825027757, |
| "loss": 33673.2, |
| "step": 110 |
| }, |
| { |
| "ce_loss_13": 9.557737636566163, |
| "ce_loss_26": 9.502408647537232, |
| "ce_loss_39": 9.373179388046264, |
| "ce_loss_52": 1.420964427292347, |
| "ce_loss_7": 9.613711476325989, |
| "epoch": 0.012, |
| "grad_norm": 55.463113229450904, |
| "kl_loss_13": 16777.6, |
| "kl_loss_26": 16667.2, |
| "kl_loss_39": 16393.6, |
| "kl_loss_7": 16905.6, |
| "learning_rate": 0.0009999899300364532, |
| "loss": 33335.2, |
| "step": 120 |
| }, |
| { |
| "ce_loss_13": 9.474759387969971, |
| "ce_loss_26": 9.408789944648742, |
| "ce_loss_39": 9.266374969482422, |
| "ce_loss_52": 1.4124270409345627, |
| "ce_loss_7": 9.538655042648315, |
| "epoch": 0.013, |
| "grad_norm": 54.241543470395335, |
| "kl_loss_13": 16628.8, |
| "kl_loss_26": 16492.8, |
| "kl_loss_39": 16193.6, |
| "kl_loss_7": 16766.4, |
| "learning_rate": 0.0009999773426770863, |
| "loss": 32999.6, |
| "step": 130 |
| }, |
| { |
| "ce_loss_13": 9.420424246788025, |
| "ce_loss_26": 9.348571801185608, |
| "ce_loss_39": 9.1972074508667, |
| "ce_loss_52": 1.4392782002687454, |
| "ce_loss_7": 9.492741465568542, |
| "epoch": 0.014, |
| "grad_norm": 54.10933362933205, |
| "kl_loss_13": 16476.8, |
| "kl_loss_26": 16324.8, |
| "kl_loss_39": 16014.4, |
| "kl_loss_7": 16638.4, |
| "learning_rate": 0.0009999597205514296, |
| "loss": 32751.6, |
| "step": 140 |
| }, |
| { |
| "ce_loss_13": 9.388373732566833, |
| "ce_loss_26": 9.308008575439453, |
| "ce_loss_39": 9.153336524963379, |
| "ce_loss_52": 1.4420859813690186, |
| "ce_loss_7": 9.46445541381836, |
| "epoch": 0.015, |
| "grad_norm": 55.15236350542743, |
| "kl_loss_13": 16382.4, |
| "kl_loss_26": 16219.2, |
| "kl_loss_39": 15888.0, |
| "kl_loss_7": 16542.4, |
| "learning_rate": 0.0009999370638369377, |
| "loss": 32525.2, |
| "step": 150 |
| }, |
| { |
| "ce_loss_13": 9.301919007301331, |
| "ce_loss_26": 9.212661600112915, |
| "ce_loss_39": 9.050238633155823, |
| "ce_loss_52": 1.4233157366514206, |
| "ce_loss_7": 9.383031058311463, |
| "epoch": 0.016, |
| "grad_norm": 55.03653973388566, |
| "kl_loss_13": 16278.4, |
| "kl_loss_26": 16092.8, |
| "kl_loss_39": 15755.2, |
| "kl_loss_7": 16440.0, |
| "learning_rate": 0.000999909372761763, |
| "loss": 32209.6, |
| "step": 160 |
| }, |
| { |
| "ce_loss_13": 9.24642186164856, |
| "ce_loss_26": 9.14689018726349, |
| "ce_loss_39": 8.978599190711975, |
| "ce_loss_52": 1.429112258553505, |
| "ce_loss_7": 9.331455826759338, |
| "epoch": 0.017, |
| "grad_norm": 54.90625528920335, |
| "kl_loss_13": 16142.4, |
| "kl_loss_26": 15931.2, |
| "kl_loss_39": 15580.8, |
| "kl_loss_7": 16315.2, |
| "learning_rate": 0.0009998766476047546, |
| "loss": 31964.8, |
| "step": 170 |
| }, |
| { |
| "ce_loss_13": 9.187117385864259, |
| "ce_loss_26": 9.076116013526917, |
| "ce_loss_39": 8.902227759361267, |
| "ce_loss_52": 1.3885775536298752, |
| "ce_loss_7": 9.275272035598755, |
| "epoch": 0.018, |
| "grad_norm": 54.60426962776646, |
| "kl_loss_13": 16072.0, |
| "kl_loss_26": 15844.8, |
| "kl_loss_39": 15480.0, |
| "kl_loss_7": 16262.4, |
| "learning_rate": 0.0009998388886954545, |
| "loss": 31645.2, |
| "step": 180 |
| }, |
| { |
| "ce_loss_13": 9.131648278236389, |
| "ce_loss_26": 9.008217167854308, |
| "ce_loss_39": 8.831042790412903, |
| "ce_loss_52": 1.4482133895158769, |
| "ce_loss_7": 9.224077129364014, |
| "epoch": 0.019, |
| "grad_norm": 53.93299711922953, |
| "kl_loss_13": 15870.4, |
| "kl_loss_26": 15609.6, |
| "kl_loss_39": 15232.0, |
| "kl_loss_7": 16067.2, |
| "learning_rate": 0.0009997960964140947, |
| "loss": 31408.4, |
| "step": 190 |
| }, |
| { |
| "ce_loss_13": 9.050732731819153, |
| "ce_loss_26": 8.918284726142883, |
| "ce_loss_39": 8.738019919395446, |
| "ce_loss_52": 1.4300477087497712, |
| "ce_loss_7": 9.145042276382446, |
| "epoch": 0.02, |
| "grad_norm": 53.732589384741736, |
| "kl_loss_13": 15728.0, |
| "kl_loss_26": 15449.6, |
| "kl_loss_39": 15064.0, |
| "kl_loss_7": 15928.0, |
| "learning_rate": 0.0009997482711915926, |
| "loss": 31145.6, |
| "step": 200 |
| }, |
| { |
| "ce_loss_13": 8.988229060173035, |
| "ce_loss_26": 8.844111323356628, |
| "ce_loss_39": 8.654908394813537, |
| "ce_loss_52": 1.4580651924014092, |
| "ce_loss_7": 9.090379095077514, |
| "epoch": 0.021, |
| "grad_norm": 53.04072542826613, |
| "kl_loss_13": 15550.4, |
| "kl_loss_26": 15251.2, |
| "kl_loss_39": 14854.4, |
| "kl_loss_7": 15771.2, |
| "learning_rate": 0.0009996954135095479, |
| "loss": 30853.2, |
| "step": 210 |
| }, |
| { |
| "ce_loss_13": 8.945707607269288, |
| "ce_loss_26": 8.79644329547882, |
| "ce_loss_39": 8.601721858978271, |
| "ce_loss_52": 1.4154451981186866, |
| "ce_loss_7": 9.052277135849, |
| "epoch": 0.022, |
| "grad_norm": 53.309057416596275, |
| "kl_loss_13": 15544.0, |
| "kl_loss_26": 15219.2, |
| "kl_loss_39": 14809.6, |
| "kl_loss_7": 15761.6, |
| "learning_rate": 0.0009996375239002368, |
| "loss": 30606.8, |
| "step": 220 |
| }, |
| { |
| "ce_loss_13": 8.933341026306152, |
| "ce_loss_26": 8.773744869232178, |
| "ce_loss_39": 8.57140781879425, |
| "ce_loss_52": 1.4167777329683304, |
| "ce_loss_7": 9.04187982082367, |
| "epoch": 0.023, |
| "grad_norm": 53.383646235412414, |
| "kl_loss_13": 15486.4, |
| "kl_loss_26": 15156.8, |
| "kl_loss_39": 14726.4, |
| "kl_loss_7": 15721.6, |
| "learning_rate": 0.0009995746029466072, |
| "loss": 30406.4, |
| "step": 230 |
| }, |
| { |
| "ce_loss_13": 8.869291019439697, |
| "ce_loss_26": 8.693119740486145, |
| "ce_loss_39": 8.476832008361816, |
| "ce_loss_52": 1.4153499186038971, |
| "ce_loss_7": 8.980035424232483, |
| "epoch": 0.024, |
| "grad_norm": 52.917730504652305, |
| "kl_loss_13": 15353.6, |
| "kl_loss_26": 14985.6, |
| "kl_loss_39": 14528.0, |
| "kl_loss_7": 15588.8, |
| "learning_rate": 0.0009995066512822719, |
| "loss": 30148.4, |
| "step": 240 |
| }, |
| { |
| "ce_loss_13": 8.81713101863861, |
| "ce_loss_26": 8.636789417266845, |
| "ce_loss_39": 8.412476801872254, |
| "ce_loss_52": 1.4529948115348816, |
| "ce_loss_7": 8.929914593696594, |
| "epoch": 0.025, |
| "grad_norm": 53.98592158305785, |
| "kl_loss_13": 15196.8, |
| "kl_loss_26": 14820.8, |
| "kl_loss_39": 14340.8, |
| "kl_loss_7": 15438.4, |
| "learning_rate": 0.000999433669591504, |
| "loss": 29860.8, |
| "step": 250 |
| }, |
| { |
| "ce_loss_13": 8.748036527633667, |
| "ce_loss_26": 8.558162140846253, |
| "ce_loss_39": 8.331628108024598, |
| "ce_loss_52": 1.4311662405729293, |
| "ce_loss_7": 8.864733743667603, |
| "epoch": 0.026, |
| "grad_norm": 52.415587650337294, |
| "kl_loss_13": 15088.0, |
| "kl_loss_26": 14683.2, |
| "kl_loss_39": 14200.0, |
| "kl_loss_7": 15329.6, |
| "learning_rate": 0.000999355658609228, |
| "loss": 29636.0, |
| "step": 260 |
| }, |
| { |
| "ce_loss_13": 8.692949771881104, |
| "ce_loss_26": 8.494869589805603, |
| "ce_loss_39": 8.259693837165832, |
| "ce_loss_52": 1.4384984374046326, |
| "ce_loss_7": 8.814060854911805, |
| "epoch": 0.027, |
| "grad_norm": 53.356303831580306, |
| "kl_loss_13": 14976.0, |
| "kl_loss_26": 14555.2, |
| "kl_loss_39": 14054.4, |
| "kl_loss_7": 15230.4, |
| "learning_rate": 0.0009992726191210138, |
| "loss": 29438.0, |
| "step": 270 |
| }, |
| { |
| "ce_loss_13": 8.67082085609436, |
| "ce_loss_26": 8.463814663887025, |
| "ce_loss_39": 8.215038228034974, |
| "ce_loss_52": 1.4267250567674636, |
| "ce_loss_7": 8.794116616249084, |
| "epoch": 0.028, |
| "grad_norm": 52.94359037736481, |
| "kl_loss_13": 14902.4, |
| "kl_loss_26": 14476.8, |
| "kl_loss_39": 13944.0, |
| "kl_loss_7": 15174.4, |
| "learning_rate": 0.0009991845519630679, |
| "loss": 29276.4, |
| "step": 280 |
| }, |
| { |
| "ce_loss_13": 8.61563618183136, |
| "ce_loss_26": 8.402152299880981, |
| "ce_loss_39": 8.144541609287263, |
| "ce_loss_52": 1.4250373497605324, |
| "ce_loss_7": 8.742781138420105, |
| "epoch": 0.029, |
| "grad_norm": 51.566336997103136, |
| "kl_loss_13": 14817.6, |
| "kl_loss_26": 14369.6, |
| "kl_loss_39": 13816.0, |
| "kl_loss_7": 15089.6, |
| "learning_rate": 0.0009990914580222257, |
| "loss": 29010.0, |
| "step": 290 |
| }, |
| { |
| "ce_loss_13": 8.572561240196228, |
| "ce_loss_26": 8.35531551837921, |
| "ce_loss_39": 8.092759764194488, |
| "ce_loss_52": 1.4600662559270858, |
| "ce_loss_7": 8.698393726348877, |
| "epoch": 0.03, |
| "grad_norm": 53.1076582900563, |
| "kl_loss_13": 14704.0, |
| "kl_loss_26": 14251.2, |
| "kl_loss_39": 13689.6, |
| "kl_loss_7": 14974.4, |
| "learning_rate": 0.0009989933382359422, |
| "loss": 28776.8, |
| "step": 300 |
| }, |
| { |
| "ce_loss_13": 8.491887974739075, |
| "ce_loss_26": 8.263946199417115, |
| "ce_loss_39": 7.987871313095093, |
| "ce_loss_52": 1.4451197743415833, |
| "ce_loss_7": 8.625923323631287, |
| "epoch": 0.031, |
| "grad_norm": 52.58460107915946, |
| "kl_loss_13": 14537.6, |
| "kl_loss_26": 14054.4, |
| "kl_loss_39": 13464.0, |
| "kl_loss_7": 14825.6, |
| "learning_rate": 0.0009988901935922825, |
| "loss": 28548.4, |
| "step": 310 |
| }, |
| { |
| "ce_loss_13": 8.474869418144227, |
| "ce_loss_26": 8.245952117443085, |
| "ce_loss_39": 7.974157309532165, |
| "ce_loss_52": 1.4602727562189102, |
| "ce_loss_7": 8.610798478126526, |
| "epoch": 0.032, |
| "grad_norm": 52.331617741046635, |
| "kl_loss_13": 14480.0, |
| "kl_loss_26": 14003.2, |
| "kl_loss_39": 13427.2, |
| "kl_loss_7": 14760.0, |
| "learning_rate": 0.0009987820251299122, |
| "loss": 28364.4, |
| "step": 320 |
| }, |
| { |
| "ce_loss_13": 8.44498426914215, |
| "ce_loss_26": 8.20645843744278, |
| "ce_loss_39": 7.912872779369354, |
| "ce_loss_52": 1.4554857224225999, |
| "ce_loss_7": 8.583872628211974, |
| "epoch": 0.033, |
| "grad_norm": 50.87832937020315, |
| "kl_loss_13": 14411.2, |
| "kl_loss_26": 13900.8, |
| "kl_loss_39": 13281.6, |
| "kl_loss_7": 14700.8, |
| "learning_rate": 0.0009986688339380862, |
| "loss": 28109.2, |
| "step": 330 |
| }, |
| { |
| "ce_loss_13": 8.38349392414093, |
| "ce_loss_26": 8.133478546142578, |
| "ce_loss_39": 7.828318297863007, |
| "ce_loss_52": 1.425998830795288, |
| "ce_loss_7": 8.525882768630982, |
| "epoch": 0.034, |
| "grad_norm": 51.153122440976325, |
| "kl_loss_13": 14328.0, |
| "kl_loss_26": 13809.6, |
| "kl_loss_39": 13163.2, |
| "kl_loss_7": 14633.6, |
| "learning_rate": 0.0009985506211566387, |
| "loss": 27878.0, |
| "step": 340 |
| }, |
| { |
| "ce_loss_13": 8.349605464935303, |
| "ce_loss_26": 8.097514569759369, |
| "ce_loss_39": 7.785721278190612, |
| "ce_loss_52": 1.4315812528133391, |
| "ce_loss_7": 8.496586155891418, |
| "epoch": 0.035, |
| "grad_norm": 51.20256734387486, |
| "kl_loss_13": 14254.4, |
| "kl_loss_26": 13721.6, |
| "kl_loss_39": 13062.4, |
| "kl_loss_7": 14561.6, |
| "learning_rate": 0.0009984273879759713, |
| "loss": 27693.2, |
| "step": 350 |
| }, |
| { |
| "ce_loss_13": 8.273482608795167, |
| "ce_loss_26": 8.020016944408416, |
| "ce_loss_39": 7.711479115486145, |
| "ce_loss_52": 1.4499147981405258, |
| "ce_loss_7": 8.422512984275818, |
| "epoch": 0.036, |
| "grad_norm": 52.12411775413645, |
| "kl_loss_13": 14088.0, |
| "kl_loss_26": 13556.8, |
| "kl_loss_39": 12892.8, |
| "kl_loss_7": 14403.2, |
| "learning_rate": 0.0009982991356370402, |
| "loss": 27442.0, |
| "step": 360 |
| }, |
| { |
| "ce_loss_13": 8.215481567382813, |
| "ce_loss_26": 7.953275382518768, |
| "ce_loss_39": 7.628855121135712, |
| "ce_loss_52": 1.411750042438507, |
| "ce_loss_7": 8.368350863456726, |
| "epoch": 0.037, |
| "grad_norm": 51.0215332330724, |
| "kl_loss_13": 14024.0, |
| "kl_loss_26": 13470.4, |
| "kl_loss_39": 12779.2, |
| "kl_loss_7": 14348.8, |
| "learning_rate": 0.0009981658654313456, |
| "loss": 27348.0, |
| "step": 370 |
| }, |
| { |
| "ce_loss_13": 8.217882227897643, |
| "ce_loss_26": 7.9462348341941835, |
| "ce_loss_39": 7.613846385478974, |
| "ce_loss_52": 1.4831970453262329, |
| "ce_loss_7": 8.373853397369384, |
| "epoch": 0.038, |
| "grad_norm": 50.4571010346743, |
| "kl_loss_13": 13913.6, |
| "kl_loss_26": 13345.6, |
| "kl_loss_39": 12646.4, |
| "kl_loss_7": 14243.2, |
| "learning_rate": 0.000998027578700917, |
| "loss": 27082.8, |
| "step": 380 |
| }, |
| { |
| "ce_loss_13": 8.11286985874176, |
| "ce_loss_26": 7.831897294521331, |
| "ce_loss_39": 7.493152487277984, |
| "ce_loss_52": 1.4128454998135567, |
| "ce_loss_7": 8.275482225418092, |
| "epoch": 0.039, |
| "grad_norm": 51.75236464910614, |
| "kl_loss_13": 13798.4, |
| "kl_loss_26": 13212.8, |
| "kl_loss_39": 12491.2, |
| "kl_loss_7": 14139.2, |
| "learning_rate": 0.0009978842768382998, |
| "loss": 26835.6, |
| "step": 390 |
| }, |
| { |
| "ce_loss_13": 8.092873919010163, |
| "ce_loss_26": 7.80686913728714, |
| "ce_loss_39": 7.462458717823028, |
| "ce_loss_52": 1.449526023864746, |
| "ce_loss_7": 8.253998827934264, |
| "epoch": 0.04, |
| "grad_norm": 50.588198000151046, |
| "kl_loss_13": 13680.0, |
| "kl_loss_26": 13083.2, |
| "kl_loss_39": 12355.2, |
| "kl_loss_7": 14019.2, |
| "learning_rate": 0.0009977359612865424, |
| "loss": 26670.0, |
| "step": 400 |
| }, |
| { |
| "ce_loss_13": 8.073025333881379, |
| "ce_loss_26": 7.792924261093139, |
| "ce_loss_39": 7.448240423202515, |
| "ce_loss_52": 1.4590945556759833, |
| "ce_loss_7": 8.234287071228028, |
| "epoch": 0.041, |
| "grad_norm": 50.388842117598266, |
| "kl_loss_13": 13630.4, |
| "kl_loss_26": 13032.0, |
| "kl_loss_39": 12302.4, |
| "kl_loss_7": 13969.6, |
| "learning_rate": 0.0009975826335391806, |
| "loss": 26457.2, |
| "step": 410 |
| }, |
| { |
| "ce_loss_13": 7.959017169475556, |
| "ce_loss_26": 7.667092227935791, |
| "ce_loss_39": 7.308180010318756, |
| "ce_loss_52": 1.3903952419757843, |
| "ce_loss_7": 8.129511964321136, |
| "epoch": 0.042, |
| "grad_norm": 50.86911767207133, |
| "kl_loss_13": 13540.8, |
| "kl_loss_26": 12916.8, |
| "kl_loss_39": 12163.2, |
| "kl_loss_7": 13888.0, |
| "learning_rate": 0.0009974242951402235, |
| "loss": 26197.6, |
| "step": 420 |
| }, |
| { |
| "ce_loss_13": 7.940323996543884, |
| "ce_loss_26": 7.629641830921173, |
| "ce_loss_39": 7.2696495175361635, |
| "ce_loss_52": 1.4528310179710389, |
| "ce_loss_7": 8.112548959255218, |
| "epoch": 0.043, |
| "grad_norm": 49.24163454624527, |
| "kl_loss_13": 13374.4, |
| "kl_loss_26": 12728.0, |
| "kl_loss_39": 11958.4, |
| "kl_loss_7": 13744.0, |
| "learning_rate": 0.0009972609476841367, |
| "loss": 25992.4, |
| "step": 430 |
| }, |
| { |
| "ce_loss_13": 7.8985715508461, |
| "ce_loss_26": 7.592843997478485, |
| "ce_loss_39": 7.205817592144013, |
| "ce_loss_52": 1.4196556687355042, |
| "ce_loss_7": 8.078336155414581, |
| "epoch": 0.044, |
| "grad_norm": 49.811998634305894, |
| "kl_loss_13": 13345.6, |
| "kl_loss_26": 12699.2, |
| "kl_loss_39": 11881.6, |
| "kl_loss_7": 13718.4, |
| "learning_rate": 0.0009970925928158272, |
| "loss": 25854.4, |
| "step": 440 |
| }, |
| { |
| "ce_loss_13": 7.867163848876953, |
| "ce_loss_26": 7.55372383594513, |
| "ce_loss_39": 7.173475623130798, |
| "ce_loss_52": 1.4358820408582686, |
| "ce_loss_7": 8.0431494474411, |
| "epoch": 0.045, |
| "grad_norm": 48.311473749243106, |
| "kl_loss_13": 13248.0, |
| "kl_loss_26": 12595.2, |
| "kl_loss_39": 11792.0, |
| "kl_loss_7": 13614.4, |
| "learning_rate": 0.000996919232230627, |
| "loss": 25620.0, |
| "step": 450 |
| }, |
| { |
| "ce_loss_13": 7.794478893280029, |
| "ce_loss_26": 7.470960378646851, |
| "ce_loss_39": 7.075640022754669, |
| "ce_loss_52": 1.4313764542341232, |
| "ce_loss_7": 7.97986272573471, |
| "epoch": 0.046, |
| "grad_norm": 49.75600981611632, |
| "kl_loss_13": 13113.6, |
| "kl_loss_26": 12433.6, |
| "kl_loss_39": 11588.8, |
| "kl_loss_7": 13496.0, |
| "learning_rate": 0.0009967408676742752, |
| "loss": 25367.2, |
| "step": 460 |
| }, |
| { |
| "ce_loss_13": 7.780554842948914, |
| "ce_loss_26": 7.450528597831726, |
| "ce_loss_39": 7.06024489402771, |
| "ce_loss_52": 1.4271526962518692, |
| "ce_loss_7": 7.96198604106903, |
| "epoch": 0.047, |
| "grad_norm": 49.73198686766462, |
| "kl_loss_13": 13089.6, |
| "kl_loss_26": 12406.4, |
| "kl_loss_39": 11576.0, |
| "kl_loss_7": 13475.2, |
| "learning_rate": 0.0009965575009429006, |
| "loss": 25186.4, |
| "step": 470 |
| }, |
| { |
| "ce_loss_13": 7.771422934532166, |
| "ce_loss_26": 7.443638646602631, |
| "ce_loss_39": 7.045053339004516, |
| "ce_loss_52": 1.4691366642713546, |
| "ce_loss_7": 7.956920957565307, |
| "epoch": 0.048, |
| "grad_norm": 48.6898889709016, |
| "kl_loss_13": 12971.2, |
| "kl_loss_26": 12286.4, |
| "kl_loss_39": 11440.0, |
| "kl_loss_7": 13366.4, |
| "learning_rate": 0.0009963691338830043, |
| "loss": 25028.4, |
| "step": 480 |
| }, |
| { |
| "ce_loss_13": 7.7170240640640255, |
| "ce_loss_26": 7.3867839813232425, |
| "ce_loss_39": 6.986623299121857, |
| "ce_loss_52": 1.4700770109891892, |
| "ce_loss_7": 7.900947248935699, |
| "epoch": 0.049, |
| "grad_norm": 47.968754476102596, |
| "kl_loss_13": 12884.8, |
| "kl_loss_26": 12195.2, |
| "kl_loss_39": 11332.8, |
| "kl_loss_7": 13273.6, |
| "learning_rate": 0.0009961757683914405, |
| "loss": 24808.8, |
| "step": 490 |
| }, |
| { |
| "ce_loss_13": 7.612575709819794, |
| "ce_loss_26": 7.270983147621155, |
| "ce_loss_39": 6.851053369045258, |
| "ce_loss_52": 1.4072588831186295, |
| "ce_loss_7": 7.807804656028748, |
| "epoch": 0.05, |
| "grad_norm": 49.18975121944083, |
| "kl_loss_13": 12780.8, |
| "kl_loss_26": 12060.8, |
| "kl_loss_39": 11161.6, |
| "kl_loss_7": 13190.4, |
| "learning_rate": 0.0009959774064153978, |
| "loss": 24615.6, |
| "step": 500 |
| }, |
| { |
| "ce_loss_13": 7.6113405585289, |
| "ce_loss_26": 7.257396864891052, |
| "ce_loss_39": 6.8364926934242245, |
| "ce_loss_52": 1.405586513876915, |
| "ce_loss_7": 7.807830440998077, |
| "epoch": 0.051, |
| "grad_norm": 48.36038036613293, |
| "kl_loss_13": 12753.6, |
| "kl_loss_26": 12017.6, |
| "kl_loss_39": 11121.6, |
| "kl_loss_7": 13161.6, |
| "learning_rate": 0.0009957740499523787, |
| "loss": 24452.0, |
| "step": 510 |
| }, |
| { |
| "ce_loss_13": 7.562924301624298, |
| "ce_loss_26": 7.205751180648804, |
| "ce_loss_39": 6.7729793906211855, |
| "ce_loss_52": 1.441327565908432, |
| "ce_loss_7": 7.762964737415314, |
| "epoch": 0.052, |
| "grad_norm": 48.52091531249349, |
| "kl_loss_13": 12577.6, |
| "kl_loss_26": 11833.6, |
| "kl_loss_39": 10912.0, |
| "kl_loss_7": 12990.4, |
| "learning_rate": 0.0009955657010501807, |
| "loss": 24214.4, |
| "step": 520 |
| }, |
| { |
| "ce_loss_13": 7.5027553796768185, |
| "ce_loss_26": 7.149892139434814, |
| "ce_loss_39": 6.725725698471069, |
| "ce_loss_52": 1.4616976886987687, |
| "ce_loss_7": 7.700168478488922, |
| "epoch": 0.053, |
| "grad_norm": 47.609892122251686, |
| "kl_loss_13": 12451.2, |
| "kl_loss_26": 11710.4, |
| "kl_loss_39": 10804.8, |
| "kl_loss_7": 12872.0, |
| "learning_rate": 0.000995352361806875, |
| "loss": 24037.2, |
| "step": 530 |
| }, |
| { |
| "ce_loss_13": 7.525733006000519, |
| "ce_loss_26": 7.1605717778205875, |
| "ce_loss_39": 6.703774988651276, |
| "ce_loss_52": 1.42885320186615, |
| "ce_loss_7": 7.727836930751801, |
| "epoch": 0.054, |
| "grad_norm": 47.0111007198644, |
| "kl_loss_13": 12556.8, |
| "kl_loss_26": 11790.4, |
| "kl_loss_39": 10828.8, |
| "kl_loss_7": 12976.0, |
| "learning_rate": 0.0009951340343707852, |
| "loss": 23845.2, |
| "step": 540 |
| }, |
| { |
| "ce_loss_13": 7.423776483535766, |
| "ce_loss_26": 7.0557411193847654, |
| "ce_loss_39": 6.604493200778961, |
| "ce_loss_52": 1.4447590827941894, |
| "ce_loss_7": 7.626477897167206, |
| "epoch": 0.055, |
| "grad_norm": 49.45361296474082, |
| "kl_loss_13": 12328.0, |
| "kl_loss_26": 11540.8, |
| "kl_loss_39": 10584.0, |
| "kl_loss_7": 12747.2, |
| "learning_rate": 0.0009949107209404665, |
| "loss": 23664.0, |
| "step": 550 |
| }, |
| { |
| "ce_loss_13": 7.434036374092102, |
| "ce_loss_26": 7.059368348121643, |
| "ce_loss_39": 6.6053709268569945, |
| "ce_loss_52": 1.4645269870758058, |
| "ce_loss_7": 7.6432753801345825, |
| "epoch": 0.056, |
| "grad_norm": 47.673368470799254, |
| "kl_loss_13": 12291.2, |
| "kl_loss_26": 11512.0, |
| "kl_loss_39": 10547.2, |
| "kl_loss_7": 12726.4, |
| "learning_rate": 0.0009946824237646824, |
| "loss": 23469.6, |
| "step": 560 |
| }, |
| { |
| "ce_loss_13": 7.307004892826081, |
| "ce_loss_26": 6.927345609664917, |
| "ce_loss_39": 6.4629304051399235, |
| "ce_loss_52": 1.437305434048176, |
| "ce_loss_7": 7.522386133670807, |
| "epoch": 0.057, |
| "grad_norm": 46.80952508481597, |
| "kl_loss_13": 12094.4, |
| "kl_loss_26": 11300.8, |
| "kl_loss_39": 10308.8, |
| "kl_loss_7": 12539.2, |
| "learning_rate": 0.0009944491451423828, |
| "loss": 23249.6, |
| "step": 570 |
| }, |
| { |
| "ce_loss_13": 7.349401378631592, |
| "ce_loss_26": 6.964329659938812, |
| "ce_loss_39": 6.480949449539184, |
| "ce_loss_52": 1.4452718168497085, |
| "ce_loss_7": 7.564259791374207, |
| "epoch": 0.058, |
| "grad_norm": 46.22867294627436, |
| "kl_loss_13": 12145.6, |
| "kl_loss_26": 11340.8, |
| "kl_loss_39": 10315.2, |
| "kl_loss_7": 12592.0, |
| "learning_rate": 0.0009942108874226813, |
| "loss": 23091.2, |
| "step": 580 |
| }, |
| { |
| "ce_loss_13": 7.254886651039124, |
| "ce_loss_26": 6.858836472034454, |
| "ce_loss_39": 6.3856946468353275, |
| "ce_loss_52": 1.4449717432260514, |
| "ce_loss_7": 7.473185133934021, |
| "epoch": 0.059, |
| "grad_norm": 45.84422579202554, |
| "kl_loss_13": 11969.6, |
| "kl_loss_26": 11147.2, |
| "kl_loss_39": 10136.0, |
| "kl_loss_7": 12424.0, |
| "learning_rate": 0.00099396765300483, |
| "loss": 22886.4, |
| "step": 590 |
| }, |
| { |
| "ce_loss_13": 7.248957896232605, |
| "ce_loss_26": 6.855677163600921, |
| "ce_loss_39": 6.3774519801139835, |
| "ce_loss_52": 1.477000206708908, |
| "ce_loss_7": 7.465912497043609, |
| "epoch": 0.06, |
| "grad_norm": 46.37348014710593, |
| "kl_loss_13": 11888.0, |
| "kl_loss_26": 11064.0, |
| "kl_loss_39": 10044.8, |
| "kl_loss_7": 12347.2, |
| "learning_rate": 0.0009937194443381972, |
| "loss": 22708.0, |
| "step": 600 |
| }, |
| { |
| "ce_loss_13": 7.210493552684784, |
| "ce_loss_26": 6.8088652968406675, |
| "ce_loss_39": 6.325126445293426, |
| "ce_loss_52": 1.444644930958748, |
| "ce_loss_7": 7.429195690155029, |
| "epoch": 0.061, |
| "grad_norm": 44.92499922138711, |
| "kl_loss_13": 11859.2, |
| "kl_loss_26": 11019.2, |
| "kl_loss_39": 9995.2, |
| "kl_loss_7": 12320.0, |
| "learning_rate": 0.0009934662639222412, |
| "loss": 22544.8, |
| "step": 610 |
| }, |
| { |
| "ce_loss_13": 7.1185362339019775, |
| "ce_loss_26": 6.714106225967408, |
| "ce_loss_39": 6.223515486717224, |
| "ce_loss_52": 1.4858893424272537, |
| "ce_loss_7": 7.341545379161834, |
| "epoch": 0.062, |
| "grad_norm": 46.45143938897793, |
| "kl_loss_13": 11601.6, |
| "kl_loss_26": 10750.4, |
| "kl_loss_39": 9708.8, |
| "kl_loss_7": 12072.0, |
| "learning_rate": 0.000993208114306486, |
| "loss": 22270.0, |
| "step": 620 |
| }, |
| { |
| "ce_loss_13": 7.0913821935653685, |
| "ce_loss_26": 6.689675974845886, |
| "ce_loss_39": 6.203632855415345, |
| "ce_loss_52": 1.4506051570177079, |
| "ce_loss_7": 7.311544299125671, |
| "epoch": 0.063, |
| "grad_norm": 45.34630221197193, |
| "kl_loss_13": 11592.0, |
| "kl_loss_26": 10752.0, |
| "kl_loss_39": 9720.0, |
| "kl_loss_7": 12067.2, |
| "learning_rate": 0.0009929449980904952, |
| "loss": 22153.2, |
| "step": 630 |
| }, |
| { |
| "ce_loss_13": 7.083522534370422, |
| "ce_loss_26": 6.665987038612366, |
| "ce_loss_39": 6.162657225131989, |
| "ce_loss_52": 1.4658448547124863, |
| "ce_loss_7": 7.312107050418854, |
| "epoch": 0.064, |
| "grad_norm": 45.471744941742365, |
| "kl_loss_13": 11552.0, |
| "kl_loss_26": 10675.2, |
| "kl_loss_39": 9596.8, |
| "kl_loss_7": 12032.0, |
| "learning_rate": 0.0009926769179238466, |
| "loss": 21949.2, |
| "step": 640 |
| }, |
| { |
| "ce_loss_13": 6.994167017936706, |
| "ce_loss_26": 6.563658082485199, |
| "ce_loss_39": 6.042373907566071, |
| "ce_loss_52": 1.4207285180687905, |
| "ce_loss_7": 7.2311041235923765, |
| "epoch": 0.065, |
| "grad_norm": 43.84127734363621, |
| "kl_loss_13": 11489.6, |
| "kl_loss_26": 10593.6, |
| "kl_loss_39": 9488.0, |
| "kl_loss_7": 11980.8, |
| "learning_rate": 0.000992403876506104, |
| "loss": 21796.8, |
| "step": 650 |
| }, |
| { |
| "ce_loss_13": 6.9931820154190065, |
| "ce_loss_26": 6.566097593307495, |
| "ce_loss_39": 6.045178306102753, |
| "ce_loss_52": 1.4772068083286285, |
| "ce_loss_7": 7.2253869533538815, |
| "epoch": 0.066, |
| "grad_norm": 43.29636197313948, |
| "kl_loss_13": 11363.2, |
| "kl_loss_26": 10462.4, |
| "kl_loss_39": 9350.4, |
| "kl_loss_7": 11856.0, |
| "learning_rate": 0.0009921258765867918, |
| "loss": 21581.2, |
| "step": 660 |
| }, |
| { |
| "ce_loss_13": 6.907565414905548, |
| "ce_loss_26": 6.471543419361114, |
| "ce_loss_39": 5.933564639091491, |
| "ce_loss_52": 1.4364299774169922, |
| "ce_loss_7": 7.147727394104004, |
| "epoch": 0.067, |
| "grad_norm": 45.37835002704289, |
| "kl_loss_13": 11259.2, |
| "kl_loss_26": 10348.8, |
| "kl_loss_39": 9200.0, |
| "kl_loss_7": 11766.4, |
| "learning_rate": 0.0009918429209653662, |
| "loss": 21394.0, |
| "step": 670 |
| }, |
| { |
| "ce_loss_13": 6.9164858102798465, |
| "ce_loss_26": 6.482825660705567, |
| "ce_loss_39": 5.9588632702827455, |
| "ce_loss_52": 1.4493420034646989, |
| "ce_loss_7": 7.152165937423706, |
| "epoch": 0.068, |
| "grad_norm": 44.49853682897619, |
| "kl_loss_13": 11238.4, |
| "kl_loss_26": 10336.0, |
| "kl_loss_39": 9201.6, |
| "kl_loss_7": 11729.6, |
| "learning_rate": 0.0009915550124911866, |
| "loss": 21260.8, |
| "step": 680 |
| }, |
| { |
| "ce_loss_13": 6.871931791305542, |
| "ce_loss_26": 6.440780913829803, |
| "ce_loss_39": 5.910830950736999, |
| "ce_loss_52": 1.4238717705011368, |
| "ce_loss_7": 7.117027842998505, |
| "epoch": 0.069, |
| "grad_norm": 44.632228248662486, |
| "kl_loss_13": 11209.6, |
| "kl_loss_26": 10307.2, |
| "kl_loss_39": 9184.0, |
| "kl_loss_7": 11716.8, |
| "learning_rate": 0.0009912621540634887, |
| "loss": 21100.4, |
| "step": 690 |
| }, |
| { |
| "ce_loss_13": 6.761080467700959, |
| "ce_loss_26": 6.306544578075409, |
| "ce_loss_39": 5.754528117179871, |
| "ce_loss_52": 1.378117674589157, |
| "ce_loss_7": 7.018208122253418, |
| "epoch": 0.07, |
| "grad_norm": 45.73542626422545, |
| "kl_loss_13": 11040.0, |
| "kl_loss_26": 10088.0, |
| "kl_loss_39": 8913.6, |
| "kl_loss_7": 11569.6, |
| "learning_rate": 0.0009909643486313534, |
| "loss": 20851.6, |
| "step": 700 |
| }, |
| { |
| "ce_loss_13": 6.78661539554596, |
| "ce_loss_26": 6.327802836894989, |
| "ce_loss_39": 5.761592519283295, |
| "ce_loss_52": 1.411187869310379, |
| "ce_loss_7": 7.041204571723938, |
| "epoch": 0.071, |
| "grad_norm": 42.217908581540186, |
| "kl_loss_13": 11076.8, |
| "kl_loss_26": 10112.0, |
| "kl_loss_39": 8905.6, |
| "kl_loss_7": 11606.4, |
| "learning_rate": 0.000990661599193678, |
| "loss": 20727.2, |
| "step": 710 |
| }, |
| { |
| "ce_loss_13": 6.725618660449982, |
| "ce_loss_26": 6.256143915653229, |
| "ce_loss_39": 5.684507942199707, |
| "ce_loss_52": 1.4021562442183495, |
| "ce_loss_7": 6.98309029340744, |
| "epoch": 0.072, |
| "grad_norm": 41.87540936995742, |
| "kl_loss_13": 10950.4, |
| "kl_loss_26": 9966.4, |
| "kl_loss_39": 8756.0, |
| "kl_loss_7": 11489.6, |
| "learning_rate": 0.0009903539087991462, |
| "loss": 20494.0, |
| "step": 720 |
| }, |
| { |
| "ce_loss_13": 6.704308211803436, |
| "ce_loss_26": 6.2481373190879825, |
| "ce_loss_39": 5.691672837734222, |
| "ce_loss_52": 1.4353317350149155, |
| "ce_loss_7": 6.9648723125457765, |
| "epoch": 0.073, |
| "grad_norm": 41.50232307107669, |
| "kl_loss_13": 10825.6, |
| "kl_loss_26": 9868.8, |
| "kl_loss_39": 8681.6, |
| "kl_loss_7": 11364.8, |
| "learning_rate": 0.0009900412805461966, |
| "loss": 20435.6, |
| "step": 730 |
| }, |
| { |
| "ce_loss_13": 6.661628067493439, |
| "ce_loss_26": 6.196605837345123, |
| "ce_loss_39": 5.6325979948043825, |
| "ce_loss_52": 1.4341969668865204, |
| "ce_loss_7": 6.919101679325104, |
| "epoch": 0.074, |
| "grad_norm": 40.95865426481825, |
| "kl_loss_13": 10744.0, |
| "kl_loss_26": 9771.2, |
| "kl_loss_39": 8564.0, |
| "kl_loss_7": 11276.8, |
| "learning_rate": 0.0009897237175829927, |
| "loss": 20203.6, |
| "step": 740 |
| }, |
| { |
| "ce_loss_13": 6.600095963478088, |
| "ce_loss_26": 6.1362119793891905, |
| "ce_loss_39": 5.5736886858940125, |
| "ce_loss_52": 1.408122679591179, |
| "ce_loss_7": 6.857535183429718, |
| "epoch": 0.075, |
| "grad_norm": 39.711086876656914, |
| "kl_loss_13": 10672.0, |
| "kl_loss_26": 9696.0, |
| "kl_loss_39": 8488.0, |
| "kl_loss_7": 11211.2, |
| "learning_rate": 0.0009894012231073895, |
| "loss": 20039.6, |
| "step": 750 |
| }, |
| { |
| "ce_loss_13": 6.570180189609528, |
| "ce_loss_26": 6.098634576797485, |
| "ce_loss_39": 5.526832151412964, |
| "ce_loss_52": 1.4715266615152358, |
| "ce_loss_7": 6.833914375305175, |
| "epoch": 0.076, |
| "grad_norm": 43.2480305118225, |
| "kl_loss_13": 10513.6, |
| "kl_loss_26": 9529.6, |
| "kl_loss_39": 8309.6, |
| "kl_loss_7": 11064.0, |
| "learning_rate": 0.0009890738003669028, |
| "loss": 19880.0, |
| "step": 760 |
| }, |
| { |
| "ce_loss_13": 6.567346775531769, |
| "ce_loss_26": 6.0874533414840695, |
| "ce_loss_39": 5.5076407313346865, |
| "ce_loss_52": 1.4363629996776581, |
| "ce_loss_7": 6.83351217508316, |
| "epoch": 0.077, |
| "grad_norm": 40.54611020558553, |
| "kl_loss_13": 10534.4, |
| "kl_loss_26": 9550.4, |
| "kl_loss_39": 8325.6, |
| "kl_loss_7": 11091.2, |
| "learning_rate": 0.0009887414526586764, |
| "loss": 19717.2, |
| "step": 770 |
| }, |
| { |
| "ce_loss_13": 6.519154870510102, |
| "ce_loss_26": 6.030243515968323, |
| "ce_loss_39": 5.4369661688804625, |
| "ce_loss_52": 1.4315312415361405, |
| "ce_loss_7": 6.7882112741470335, |
| "epoch": 0.078, |
| "grad_norm": 40.32433812744511, |
| "kl_loss_13": 10425.6, |
| "kl_loss_26": 9401.6, |
| "kl_loss_39": 8148.0, |
| "kl_loss_7": 10985.6, |
| "learning_rate": 0.0009884041833294476, |
| "loss": 19528.4, |
| "step": 780 |
| }, |
| { |
| "ce_loss_13": 6.458490109443664, |
| "ce_loss_26": 5.970031499862671, |
| "ce_loss_39": 5.392513406276703, |
| "ce_loss_52": 1.4178608924150466, |
| "ce_loss_7": 6.729024958610535, |
| "epoch": 0.079, |
| "grad_norm": 41.89461078246612, |
| "kl_loss_13": 10355.2, |
| "kl_loss_26": 9337.6, |
| "kl_loss_39": 8099.2, |
| "kl_loss_7": 10920.0, |
| "learning_rate": 0.000988061995775515, |
| "loss": 19441.6, |
| "step": 790 |
| }, |
| { |
| "ce_loss_13": 6.432489657402039, |
| "ce_loss_26": 5.9446264743804935, |
| "ce_loss_39": 5.3700491189956665, |
| "ce_loss_52": 1.448669496178627, |
| "ce_loss_7": 6.70353993177414, |
| "epoch": 0.08, |
| "grad_norm": 41.284533949329585, |
| "kl_loss_13": 10246.4, |
| "kl_loss_26": 9230.4, |
| "kl_loss_39": 8004.0, |
| "kl_loss_7": 10811.2, |
| "learning_rate": 0.0009877148934427035, |
| "loss": 19206.8, |
| "step": 800 |
| }, |
| { |
| "ce_loss_13": 6.43521283864975, |
| "ce_loss_26": 5.939107716083527, |
| "ce_loss_39": 5.3414135575294495, |
| "ce_loss_52": 1.4238088309764863, |
| "ce_loss_7": 6.7135733485221865, |
| "epoch": 0.081, |
| "grad_norm": 39.49115349094681, |
| "kl_loss_13": 10304.0, |
| "kl_loss_26": 9264.0, |
| "kl_loss_39": 8010.4, |
| "kl_loss_7": 10881.6, |
| "learning_rate": 0.0009873628798263297, |
| "loss": 19058.0, |
| "step": 810 |
| }, |
| { |
| "ce_loss_13": 6.375123608112335, |
| "ce_loss_26": 5.867140221595764, |
| "ce_loss_39": 5.2596719622612, |
| "ce_loss_52": 1.4445373743772507, |
| "ce_loss_7": 6.642891383171081, |
| "epoch": 0.082, |
| "grad_norm": 39.14925963953466, |
| "kl_loss_13": 10124.8, |
| "kl_loss_26": 9064.0, |
| "kl_loss_39": 7779.2, |
| "kl_loss_7": 10689.6, |
| "learning_rate": 0.0009870059584711668, |
| "loss": 18891.2, |
| "step": 820 |
| }, |
| { |
| "ce_loss_13": 6.2935021877288815, |
| "ce_loss_26": 5.791378605365753, |
| "ce_loss_39": 5.201059639453888, |
| "ce_loss_52": 1.424940624833107, |
| "ce_loss_7": 6.564577507972717, |
| "epoch": 0.083, |
| "grad_norm": 39.604616846798116, |
| "kl_loss_13": 9998.4, |
| "kl_loss_26": 8953.6, |
| "kl_loss_39": 7687.2, |
| "kl_loss_7": 10563.2, |
| "learning_rate": 0.000986644132971409, |
| "loss": 18704.0, |
| "step": 830 |
| }, |
| { |
| "ce_loss_13": 6.274489688873291, |
| "ce_loss_26": 5.771354067325592, |
| "ce_loss_39": 5.155186474323273, |
| "ce_loss_52": 1.429549178481102, |
| "ce_loss_7": 6.554718315601349, |
| "epoch": 0.084, |
| "grad_norm": 38.382382142673, |
| "kl_loss_13": 9971.2, |
| "kl_loss_26": 8913.6, |
| "kl_loss_39": 7593.6, |
| "kl_loss_7": 10548.8, |
| "learning_rate": 0.0009862774069706345, |
| "loss": 18644.4, |
| "step": 840 |
| }, |
| { |
| "ce_loss_13": 6.2195475697517395, |
| "ce_loss_26": 5.717883968353272, |
| "ce_loss_39": 5.13158141374588, |
| "ce_loss_52": 1.4267238914966582, |
| "ce_loss_7": 6.497097527980804, |
| "epoch": 0.085, |
| "grad_norm": 39.67456709505246, |
| "kl_loss_13": 9820.8, |
| "kl_loss_26": 8772.8, |
| "kl_loss_39": 7518.4, |
| "kl_loss_7": 10401.6, |
| "learning_rate": 0.000985905784161771, |
| "loss": 18443.2, |
| "step": 850 |
| }, |
| { |
| "ce_loss_13": 6.253428983688354, |
| "ce_loss_26": 5.746143198013305, |
| "ce_loss_39": 5.127990126609802, |
| "ce_loss_52": 1.4239614009857178, |
| "ce_loss_7": 6.538207769393921, |
| "epoch": 0.086, |
| "grad_norm": 39.46700749652253, |
| "kl_loss_13": 9912.0, |
| "kl_loss_26": 8857.6, |
| "kl_loss_39": 7540.0, |
| "kl_loss_7": 10500.8, |
| "learning_rate": 0.000985529268287055, |
| "loss": 18336.4, |
| "step": 860 |
| }, |
| { |
| "ce_loss_13": 6.181728804111481, |
| "ce_loss_26": 5.661606287956237, |
| "ce_loss_39": 5.042867851257324, |
| "ce_loss_52": 1.427235585451126, |
| "ce_loss_7": 6.472940897941589, |
| "epoch": 0.087, |
| "grad_norm": 38.42645346767181, |
| "kl_loss_13": 9798.4, |
| "kl_loss_26": 8700.0, |
| "kl_loss_39": 7378.4, |
| "kl_loss_7": 10409.6, |
| "learning_rate": 0.0009851478631379982, |
| "loss": 18167.2, |
| "step": 870 |
| }, |
| { |
| "ce_loss_13": 6.113723492622375, |
| "ce_loss_26": 5.586072051525116, |
| "ce_loss_39": 4.9395282626152035, |
| "ce_loss_52": 1.3542900115251542, |
| "ce_loss_7": 6.410606110095978, |
| "epoch": 0.088, |
| "grad_norm": 37.03254074803745, |
| "kl_loss_13": 9768.0, |
| "kl_loss_26": 8668.8, |
| "kl_loss_39": 7310.4, |
| "kl_loss_7": 10384.0, |
| "learning_rate": 0.0009847615725553456, |
| "loss": 18092.4, |
| "step": 880 |
| }, |
| { |
| "ce_loss_13": 6.182212936878204, |
| "ce_loss_26": 5.651630616188049, |
| "ce_loss_39": 5.018951749801635, |
| "ce_loss_52": 1.4187958374619485, |
| "ce_loss_7": 6.474197280406952, |
| "epoch": 0.089, |
| "grad_norm": 36.66962934304384, |
| "kl_loss_13": 9784.0, |
| "kl_loss_26": 8682.4, |
| "kl_loss_39": 7329.6, |
| "kl_loss_7": 10400.0, |
| "learning_rate": 0.0009843704004290394, |
| "loss": 18005.6, |
| "step": 890 |
| }, |
| { |
| "ce_loss_13": 6.1027270436286924, |
| "ce_loss_26": 5.578388214111328, |
| "ce_loss_39": 4.954142212867737, |
| "ce_loss_52": 1.433653001487255, |
| "ce_loss_7": 6.3896349430084225, |
| "epoch": 0.09, |
| "grad_norm": 37.895271797404135, |
| "kl_loss_13": 9592.0, |
| "kl_loss_26": 8488.8, |
| "kl_loss_39": 7161.6, |
| "kl_loss_7": 10198.4, |
| "learning_rate": 0.0009839743506981783, |
| "loss": 17767.6, |
| "step": 900 |
| }, |
| { |
| "ce_loss_13": 6.134694254398346, |
| "ce_loss_26": 5.600531077384948, |
| "ce_loss_39": 4.956934368610382, |
| "ce_loss_52": 1.4616568014025688, |
| "ce_loss_7": 6.426614594459534, |
| "epoch": 0.091, |
| "grad_norm": 35.5095046076979, |
| "kl_loss_13": 9608.0, |
| "kl_loss_26": 8489.6, |
| "kl_loss_39": 7093.6, |
| "kl_loss_7": 10217.6, |
| "learning_rate": 0.0009835734273509786, |
| "loss": 17664.2, |
| "step": 910 |
| }, |
| { |
| "ce_loss_13": 6.059712076187134, |
| "ce_loss_26": 5.526252567768097, |
| "ce_loss_39": 4.893229007720947, |
| "ce_loss_52": 1.4395650416612624, |
| "ce_loss_7": 6.356565976142884, |
| "epoch": 0.092, |
| "grad_norm": 34.796923941159505, |
| "kl_loss_13": 9540.8, |
| "kl_loss_26": 8412.8, |
| "kl_loss_39": 7049.6, |
| "kl_loss_7": 10158.4, |
| "learning_rate": 0.0009831676344247342, |
| "loss": 17511.4, |
| "step": 920 |
| }, |
| { |
| "ce_loss_13": 6.012007105350494, |
| "ce_loss_26": 5.470919144153595, |
| "ce_loss_39": 4.816142636537552, |
| "ce_loss_52": 1.3806764528155326, |
| "ce_loss_7": 6.3036043524742125, |
| "epoch": 0.093, |
| "grad_norm": 34.520449300223, |
| "kl_loss_13": 9520.0, |
| "kl_loss_26": 8383.2, |
| "kl_loss_39": 6990.4, |
| "kl_loss_7": 10132.8, |
| "learning_rate": 0.0009827569760057755, |
| "loss": 17476.2, |
| "step": 930 |
| }, |
| { |
| "ce_loss_13": 6.020643877983093, |
| "ce_loss_26": 5.4863135576248165, |
| "ce_loss_39": 4.843132376670837, |
| "ce_loss_52": 1.4196506530046462, |
| "ce_loss_7": 6.318761503696441, |
| "epoch": 0.094, |
| "grad_norm": 33.89098596439575, |
| "kl_loss_13": 9425.6, |
| "kl_loss_26": 8303.2, |
| "kl_loss_39": 6938.4, |
| "kl_loss_7": 10048.0, |
| "learning_rate": 0.000982341456229428, |
| "loss": 17230.8, |
| "step": 940 |
| }, |
| { |
| "ce_loss_13": 5.974271166324615, |
| "ce_loss_26": 5.436639845371246, |
| "ce_loss_39": 4.804401755332947, |
| "ce_loss_52": 1.4617832124233245, |
| "ce_loss_7": 6.261663150787354, |
| "epoch": 0.095, |
| "grad_norm": 33.71164019490533, |
| "kl_loss_13": 9278.4, |
| "kl_loss_26": 8148.0, |
| "kl_loss_39": 6788.0, |
| "kl_loss_7": 9880.0, |
| "learning_rate": 0.000981921079279971, |
| "loss": 17111.2, |
| "step": 950 |
| }, |
| { |
| "ce_loss_13": 5.966051030158996, |
| "ce_loss_26": 5.422689366340637, |
| "ce_loss_39": 4.77484347820282, |
| "ce_loss_52": 1.4197688490152358, |
| "ce_loss_7": 6.259041047096252, |
| "epoch": 0.096, |
| "grad_norm": 33.446193055983784, |
| "kl_loss_13": 9321.6, |
| "kl_loss_26": 8191.2, |
| "kl_loss_39": 6796.8, |
| "kl_loss_7": 9932.8, |
| "learning_rate": 0.0009814958493905962, |
| "loss": 17055.2, |
| "step": 960 |
| }, |
| { |
| "ce_loss_13": 5.8990898609161375, |
| "ce_loss_26": 5.356628429889679, |
| "ce_loss_39": 4.708657902479172, |
| "ce_loss_52": 1.4259506687521935, |
| "ce_loss_7": 6.204090213775634, |
| "epoch": 0.097, |
| "grad_norm": 32.73849681683031, |
| "kl_loss_13": 9192.0, |
| "kl_loss_26": 8055.2, |
| "kl_loss_39": 6684.0, |
| "kl_loss_7": 9836.8, |
| "learning_rate": 0.0009810657708433637, |
| "loss": 16837.6, |
| "step": 970 |
| }, |
| { |
| "ce_loss_13": 5.879060399532318, |
| "ce_loss_26": 5.346331930160522, |
| "ce_loss_39": 4.690547597408295, |
| "ce_loss_52": 1.4302369862794877, |
| "ce_loss_7": 6.184376800060273, |
| "epoch": 0.098, |
| "grad_norm": 32.85977379524165, |
| "kl_loss_13": 9155.2, |
| "kl_loss_26": 8032.0, |
| "kl_loss_39": 6616.8, |
| "kl_loss_7": 9792.0, |
| "learning_rate": 0.0009806308479691594, |
| "loss": 16832.6, |
| "step": 980 |
| }, |
| { |
| "ce_loss_13": 5.816424036026001, |
| "ce_loss_26": 5.264147555828094, |
| "ce_loss_39": 4.6131664395332335, |
| "ce_loss_52": 1.4319524437189102, |
| "ce_loss_7": 6.118786966800689, |
| "epoch": 0.099, |
| "grad_norm": 33.426081767419625, |
| "kl_loss_13": 8992.0, |
| "kl_loss_26": 7842.4, |
| "kl_loss_39": 6442.4, |
| "kl_loss_7": 9635.2, |
| "learning_rate": 0.0009801910851476522, |
| "loss": 16728.4, |
| "step": 990 |
| }, |
| { |
| "ce_loss_13": 5.814940357208252, |
| "ce_loss_26": 5.2667844772338865, |
| "ce_loss_39": 4.6347626686096195, |
| "ce_loss_52": 1.4415421515703202, |
| "ce_loss_7": 6.12279201745987, |
| "epoch": 0.1, |
| "grad_norm": 33.016085914188245, |
| "kl_loss_13": 8960.0, |
| "kl_loss_26": 7812.8, |
| "kl_loss_39": 6444.0, |
| "kl_loss_7": 9588.8, |
| "learning_rate": 0.0009797464868072487, |
| "loss": 16535.6, |
| "step": 1000 |
| }, |
| { |
| "ce_loss_13": 5.8194945573806764, |
| "ce_loss_26": 5.259810090065002, |
| "ce_loss_39": 4.592129653692245, |
| "ce_loss_52": 1.4136200681328774, |
| "ce_loss_7": 6.127060306072235, |
| "epoch": 0.101, |
| "grad_norm": 31.039827427941336, |
| "kl_loss_13": 9033.6, |
| "kl_loss_26": 7855.2, |
| "kl_loss_39": 6432.0, |
| "kl_loss_7": 9683.2, |
| "learning_rate": 0.0009792970574250492, |
| "loss": 16416.6, |
| "step": 1010 |
| }, |
| { |
| "ce_loss_13": 5.752316701412201, |
| "ce_loss_26": 5.182761800289154, |
| "ce_loss_39": 4.4902693152427675, |
| "ce_loss_52": 1.37675661444664, |
| "ce_loss_7": 6.068772268295288, |
| "epoch": 0.102, |
| "grad_norm": 30.262078958434195, |
| "kl_loss_13": 8969.6, |
| "kl_loss_26": 7772.0, |
| "kl_loss_39": 6300.8, |
| "kl_loss_7": 9622.4, |
| "learning_rate": 0.0009788428015268028, |
| "loss": 16337.4, |
| "step": 1020 |
| }, |
| { |
| "ce_loss_13": 5.781488347053528, |
| "ce_loss_26": 5.2409987449646, |
| "ce_loss_39": 4.596257948875428, |
| "ce_loss_52": 1.4595280766487122, |
| "ce_loss_7": 6.078718197345734, |
| "epoch": 0.103, |
| "grad_norm": 31.79275929639243, |
| "kl_loss_13": 8864.0, |
| "kl_loss_26": 7715.2, |
| "kl_loss_39": 6324.0, |
| "kl_loss_7": 9491.2, |
| "learning_rate": 0.0009783837236868609, |
| "loss": 16174.0, |
| "step": 1030 |
| }, |
| { |
| "ce_loss_13": 5.71779420375824, |
| "ce_loss_26": 5.146243929862976, |
| "ce_loss_39": 4.4791832447052, |
| "ce_loss_52": 1.4291063606739045, |
| "ce_loss_7": 6.026824104785919, |
| "epoch": 0.104, |
| "grad_norm": 30.963349252235982, |
| "kl_loss_13": 8771.2, |
| "kl_loss_26": 7573.6, |
| "kl_loss_39": 6160.8, |
| "kl_loss_7": 9416.0, |
| "learning_rate": 0.0009779198285281327, |
| "loss": 16072.0, |
| "step": 1040 |
| }, |
| { |
| "ce_loss_13": 5.742816948890686, |
| "ce_loss_26": 5.189597749710083, |
| "ce_loss_39": 4.527956926822663, |
| "ce_loss_52": 1.4477199196815491, |
| "ce_loss_7": 6.058120143413544, |
| "epoch": 0.105, |
| "grad_norm": 32.72479335973728, |
| "kl_loss_13": 8772.8, |
| "kl_loss_26": 7620.0, |
| "kl_loss_39": 6205.6, |
| "kl_loss_7": 9438.4, |
| "learning_rate": 0.0009774511207220368, |
| "loss": 15932.8, |
| "step": 1050 |
| }, |
| { |
| "ce_loss_13": 5.728980660438538, |
| "ce_loss_26": 5.167429828643799, |
| "ce_loss_39": 4.511177510023117, |
| "ce_loss_52": 1.4759073287248612, |
| "ce_loss_7": 6.031959307193756, |
| "epoch": 0.106, |
| "grad_norm": 31.226954775299472, |
| "kl_loss_13": 8736.8, |
| "kl_loss_26": 7550.4, |
| "kl_loss_39": 6153.6, |
| "kl_loss_7": 9363.2, |
| "learning_rate": 0.0009769776049884564, |
| "loss": 15802.0, |
| "step": 1060 |
| }, |
| { |
| "ce_loss_13": 5.7382616877555845, |
| "ce_loss_26": 5.175736773014068, |
| "ce_loss_39": 4.510921847820282, |
| "ce_loss_52": 1.4512410640716553, |
| "ce_loss_7": 6.050414621829987, |
| "epoch": 0.107, |
| "grad_norm": 30.630877517383688, |
| "kl_loss_13": 8788.0, |
| "kl_loss_26": 7622.4, |
| "kl_loss_39": 6202.4, |
| "kl_loss_7": 9452.8, |
| "learning_rate": 0.0009764992860956889, |
| "loss": 15822.0, |
| "step": 1070 |
| }, |
| { |
| "ce_loss_13": 5.645794451236725, |
| "ce_loss_26": 5.088676834106446, |
| "ce_loss_39": 4.417802548408508, |
| "ce_loss_52": 1.4182049363851548, |
| "ce_loss_7": 5.959036731719971, |
| "epoch": 0.108, |
| "grad_norm": 30.457487960812127, |
| "kl_loss_13": 8681.6, |
| "kl_loss_26": 7509.6, |
| "kl_loss_39": 6084.8, |
| "kl_loss_7": 9332.8, |
| "learning_rate": 0.0009760161688604008, |
| "loss": 15627.0, |
| "step": 1080 |
| }, |
| { |
| "ce_loss_13": 5.568986439704895, |
| "ce_loss_26": 5.009959697723389, |
| "ce_loss_39": 4.367926681041718, |
| "ce_loss_52": 1.4609902381896973, |
| "ce_loss_7": 5.883221137523651, |
| "epoch": 0.109, |
| "grad_norm": 29.989545158997807, |
| "kl_loss_13": 8458.4, |
| "kl_loss_26": 7279.2, |
| "kl_loss_39": 5895.2, |
| "kl_loss_7": 9112.0, |
| "learning_rate": 0.0009755282581475768, |
| "loss": 15555.0, |
| "step": 1090 |
| }, |
| { |
| "ce_loss_13": 5.6291534304618835, |
| "ce_loss_26": 5.068492126464844, |
| "ce_loss_39": 4.403285652399063, |
| "ce_loss_52": 1.445840133726597, |
| "ce_loss_7": 5.945274484157562, |
| "epoch": 0.11, |
| "grad_norm": 30.66383917654591, |
| "kl_loss_13": 8576.8, |
| "kl_loss_26": 7396.8, |
| "kl_loss_39": 5972.8, |
| "kl_loss_7": 9244.8, |
| "learning_rate": 0.0009750355588704727, |
| "loss": 15472.0, |
| "step": 1100 |
| }, |
| { |
| "ce_loss_13": 5.522909152507782, |
| "ce_loss_26": 4.949995934963226, |
| "ce_loss_39": 4.270336884260177, |
| "ce_loss_52": 1.407256692647934, |
| "ce_loss_7": 5.840477633476257, |
| "epoch": 0.111, |
| "grad_norm": 29.782295400473814, |
| "kl_loss_13": 8460.8, |
| "kl_loss_26": 7250.4, |
| "kl_loss_39": 5788.8, |
| "kl_loss_7": 9118.4, |
| "learning_rate": 0.0009745380759905647, |
| "loss": 15294.0, |
| "step": 1110 |
| }, |
| { |
| "ce_loss_13": 5.517545366287232, |
| "ce_loss_26": 4.932116758823395, |
| "ce_loss_39": 4.250882798433304, |
| "ce_loss_52": 1.3833064809441566, |
| "ce_loss_7": 5.846422612667084, |
| "epoch": 0.112, |
| "grad_norm": 28.73414895119145, |
| "kl_loss_13": 8486.4, |
| "kl_loss_26": 7264.0, |
| "kl_loss_39": 5804.0, |
| "kl_loss_7": 9168.0, |
| "learning_rate": 0.0009740358145174998, |
| "loss": 15318.0, |
| "step": 1120 |
| }, |
| { |
| "ce_loss_13": 5.50923570394516, |
| "ce_loss_26": 4.938242793083191, |
| "ce_loss_39": 4.259438300132752, |
| "ce_loss_52": 1.430792199075222, |
| "ce_loss_7": 5.8306269407272335, |
| "epoch": 0.113, |
| "grad_norm": 28.895019773736312, |
| "kl_loss_13": 8355.2, |
| "kl_loss_26": 7152.8, |
| "kl_loss_39": 5700.8, |
| "kl_loss_7": 9024.0, |
| "learning_rate": 0.0009735287795090455, |
| "loss": 15192.0, |
| "step": 1130 |
| }, |
| { |
| "ce_loss_13": 5.401988506317139, |
| "ce_loss_26": 4.814380037784576, |
| "ce_loss_39": 4.133068162202835, |
| "ce_loss_52": 1.3917075648903847, |
| "ce_loss_7": 5.732739126682281, |
| "epoch": 0.114, |
| "grad_norm": 28.473930821013894, |
| "kl_loss_13": 8231.2, |
| "kl_loss_26": 7010.4, |
| "kl_loss_39": 5549.6, |
| "kl_loss_7": 8921.6, |
| "learning_rate": 0.0009730169760710386, |
| "loss": 15030.2, |
| "step": 1140 |
| }, |
| { |
| "ce_loss_13": 5.538087117671966, |
| "ce_loss_26": 4.94068056344986, |
| "ce_loss_39": 4.253137022256851, |
| "ce_loss_52": 1.4375049352645874, |
| "ce_loss_7": 5.861488628387451, |
| "epoch": 0.115, |
| "grad_norm": 30.345832823062537, |
| "kl_loss_13": 8408.0, |
| "kl_loss_26": 7160.0, |
| "kl_loss_39": 5690.4, |
| "kl_loss_7": 9096.0, |
| "learning_rate": 0.0009725004093573342, |
| "loss": 14951.8, |
| "step": 1150 |
| }, |
| { |
| "ce_loss_13": 5.385800528526306, |
| "ce_loss_26": 4.801717817783356, |
| "ce_loss_39": 4.136020374298096, |
| "ce_loss_52": 1.4078958943486213, |
| "ce_loss_7": 5.722076547145844, |
| "epoch": 0.116, |
| "grad_norm": 30.297186235009132, |
| "kl_loss_13": 8177.6, |
| "kl_loss_26": 6956.0, |
| "kl_loss_39": 5514.4, |
| "kl_loss_7": 8880.0, |
| "learning_rate": 0.0009719790845697534, |
| "loss": 14867.6, |
| "step": 1160 |
| }, |
| { |
| "ce_loss_13": 5.426836037635804, |
| "ce_loss_26": 4.832395279407502, |
| "ce_loss_39": 4.153381270170212, |
| "ce_loss_52": 1.426569977402687, |
| "ce_loss_7": 5.755613851547241, |
| "epoch": 0.117, |
| "grad_norm": 31.87589996223516, |
| "kl_loss_13": 8220.8, |
| "kl_loss_26": 6976.0, |
| "kl_loss_39": 5544.0, |
| "kl_loss_7": 8909.6, |
| "learning_rate": 0.0009714530069580309, |
| "loss": 14745.8, |
| "step": 1170 |
| }, |
| { |
| "ce_loss_13": 5.359652185440064, |
| "ce_loss_26": 4.760751461982727, |
| "ce_loss_39": 4.048358517885208, |
| "ce_loss_52": 1.3907365471124649, |
| "ce_loss_7": 5.693908452987671, |
| "epoch": 0.118, |
| "grad_norm": 27.539122306915463, |
| "kl_loss_13": 8126.4, |
| "kl_loss_26": 6876.0, |
| "kl_loss_39": 5376.0, |
| "kl_loss_7": 8822.4, |
| "learning_rate": 0.0009709221818197624, |
| "loss": 14704.2, |
| "step": 1180 |
| }, |
| { |
| "ce_loss_13": 5.350854313373565, |
| "ce_loss_26": 4.768227469921112, |
| "ce_loss_39": 4.099184954166413, |
| "ce_loss_52": 1.4248775228857995, |
| "ce_loss_7": 5.680926930904389, |
| "epoch": 0.119, |
| "grad_norm": 28.933475617899816, |
| "kl_loss_13": 8027.2, |
| "kl_loss_26": 6818.4, |
| "kl_loss_39": 5372.0, |
| "kl_loss_7": 8727.2, |
| "learning_rate": 0.0009703866145003512, |
| "loss": 14583.0, |
| "step": 1190 |
| }, |
| { |
| "ce_loss_13": 5.372988939285278, |
| "ce_loss_26": 4.778137028217316, |
| "ce_loss_39": 4.089074891805649, |
| "ce_loss_52": 1.4195260405540466, |
| "ce_loss_7": 5.714505088329315, |
| "epoch": 0.12, |
| "grad_norm": 26.60053169419214, |
| "kl_loss_13": 8131.2, |
| "kl_loss_26": 6885.6, |
| "kl_loss_39": 5404.8, |
| "kl_loss_7": 8840.8, |
| "learning_rate": 0.0009698463103929542, |
| "loss": 14513.0, |
| "step": 1200 |
| }, |
| { |
| "ce_loss_13": 5.392605185508728, |
| "ce_loss_26": 4.798673605918884, |
| "ce_loss_39": 4.126504504680634, |
| "ce_loss_52": 1.4746148020029068, |
| "ce_loss_7": 5.7245006442070006, |
| "epoch": 0.121, |
| "grad_norm": 26.989592045291893, |
| "kl_loss_13": 8018.4, |
| "kl_loss_26": 6775.2, |
| "kl_loss_39": 5339.2, |
| "kl_loss_7": 8718.4, |
| "learning_rate": 0.0009693012749384279, |
| "loss": 14383.2, |
| "step": 1210 |
| }, |
| { |
| "ce_loss_13": 5.319035434722901, |
| "ce_loss_26": 4.736347317695618, |
| "ce_loss_39": 4.0659150838851925, |
| "ce_loss_52": 1.4397204488515853, |
| "ce_loss_7": 5.642805421352387, |
| "epoch": 0.122, |
| "grad_norm": 29.38130003686817, |
| "kl_loss_13": 7946.4, |
| "kl_loss_26": 6723.2, |
| "kl_loss_39": 5294.4, |
| "kl_loss_7": 8631.2, |
| "learning_rate": 0.0009687515136252732, |
| "loss": 14375.6, |
| "step": 1220 |
| }, |
| { |
| "ce_loss_13": 5.3430128455162045, |
| "ce_loss_26": 4.744695138931275, |
| "ce_loss_39": 4.071437209844589, |
| "ce_loss_52": 1.4354561120271683, |
| "ce_loss_7": 5.681857228279114, |
| "epoch": 0.123, |
| "grad_norm": 25.479885446063793, |
| "kl_loss_13": 8008.0, |
| "kl_loss_26": 6754.4, |
| "kl_loss_39": 5308.8, |
| "kl_loss_7": 8709.6, |
| "learning_rate": 0.0009681970319895803, |
| "loss": 14273.4, |
| "step": 1230 |
| }, |
| { |
| "ce_loss_13": 5.331636953353882, |
| "ce_loss_26": 4.735000967979431, |
| "ce_loss_39": 4.073598688840866, |
| "ce_loss_52": 1.470110397040844, |
| "ce_loss_7": 5.655930757522583, |
| "epoch": 0.124, |
| "grad_norm": 28.379117971457468, |
| "kl_loss_13": 7929.6, |
| "kl_loss_26": 6684.0, |
| "kl_loss_39": 5260.8, |
| "kl_loss_7": 8612.8, |
| "learning_rate": 0.0009676378356149733, |
| "loss": 14150.8, |
| "step": 1240 |
| }, |
| { |
| "ce_loss_13": 5.1874682068824765, |
| "ce_loss_26": 4.582801806926727, |
| "ce_loss_39": 3.900476610660553, |
| "ce_loss_52": 1.4180996417999268, |
| "ce_loss_7": 5.519565558433532, |
| "epoch": 0.125, |
| "grad_norm": 27.465496459767188, |
| "kl_loss_13": 7764.8, |
| "kl_loss_26": 6504.0, |
| "kl_loss_39": 5041.6, |
| "kl_loss_7": 8455.2, |
| "learning_rate": 0.0009670739301325534, |
| "loss": 13985.0, |
| "step": 1250 |
| }, |
| { |
| "ce_loss_13": 5.221911752223969, |
| "ce_loss_26": 4.625354039669037, |
| "ce_loss_39": 3.9350062906742096, |
| "ce_loss_52": 1.3881619155406952, |
| "ce_loss_7": 5.562334418296814, |
| "epoch": 0.126, |
| "grad_norm": 26.021158683557974, |
| "kl_loss_13": 7847.2, |
| "kl_loss_26": 6599.2, |
| "kl_loss_39": 5120.0, |
| "kl_loss_7": 8558.4, |
| "learning_rate": 0.0009665053212208426, |
| "loss": 13978.8, |
| "step": 1260 |
| }, |
| { |
| "ce_loss_13": 5.201095879077911, |
| "ce_loss_26": 4.594125282764435, |
| "ce_loss_39": 3.8948924005031587, |
| "ce_loss_52": 1.4181114554405212, |
| "ce_loss_7": 5.552536249160767, |
| "epoch": 0.127, |
| "grad_norm": 26.300188898530276, |
| "kl_loss_13": 7783.2, |
| "kl_loss_26": 6515.2, |
| "kl_loss_39": 5012.0, |
| "kl_loss_7": 8515.2, |
| "learning_rate": 0.0009659320146057262, |
| "loss": 13927.6, |
| "step": 1270 |
| }, |
| { |
| "ce_loss_13": 5.186312806606293, |
| "ce_loss_26": 4.5913723587989805, |
| "ce_loss_39": 3.912171256542206, |
| "ce_loss_52": 1.4045341789722443, |
| "ce_loss_7": 5.535152721405029, |
| "epoch": 0.128, |
| "grad_norm": 25.74310395170922, |
| "kl_loss_13": 7757.6, |
| "kl_loss_26": 6509.6, |
| "kl_loss_39": 5065.6, |
| "kl_loss_7": 8486.4, |
| "learning_rate": 0.0009653540160603955, |
| "loss": 13929.0, |
| "step": 1280 |
| }, |
| { |
| "ce_loss_13": 5.17168892621994, |
| "ce_loss_26": 4.572988575696945, |
| "ce_loss_39": 3.902406334877014, |
| "ce_loss_52": 1.4593060314655304, |
| "ce_loss_7": 5.513335394859314, |
| "epoch": 0.129, |
| "grad_norm": 26.464934956114348, |
| "kl_loss_13": 7624.0, |
| "kl_loss_26": 6372.8, |
| "kl_loss_39": 4939.2, |
| "kl_loss_7": 8331.2, |
| "learning_rate": 0.0009647713314052896, |
| "loss": 13720.2, |
| "step": 1290 |
| }, |
| { |
| "ce_loss_13": 5.167734289169312, |
| "ce_loss_26": 4.5827870786190035, |
| "ce_loss_39": 3.9150634109973907, |
| "ce_loss_52": 1.4295778691768646, |
| "ce_loss_7": 5.517308306694031, |
| "epoch": 0.13, |
| "grad_norm": 26.41573865043221, |
| "kl_loss_13": 7627.2, |
| "kl_loss_26": 6407.2, |
| "kl_loss_39": 4972.0, |
| "kl_loss_7": 8360.0, |
| "learning_rate": 0.0009641839665080363, |
| "loss": 13644.6, |
| "step": 1300 |
| }, |
| { |
| "ce_loss_13": 5.155666828155518, |
| "ce_loss_26": 4.557369256019593, |
| "ce_loss_39": 3.8985717594623566, |
| "ce_loss_52": 1.4532029300928115, |
| "ce_loss_7": 5.49907066822052, |
| "epoch": 0.131, |
| "grad_norm": 28.09061259972242, |
| "kl_loss_13": 7600.0, |
| "kl_loss_26": 6340.8, |
| "kl_loss_39": 4909.6, |
| "kl_loss_7": 8327.2, |
| "learning_rate": 0.0009635919272833937, |
| "loss": 13575.0, |
| "step": 1310 |
| }, |
| { |
| "ce_loss_13": 5.079627573490143, |
| "ce_loss_26": 4.46733387708664, |
| "ce_loss_39": 3.7981239676475527, |
| "ce_loss_52": 1.4149045318365097, |
| "ce_loss_7": 5.424402499198914, |
| "epoch": 0.132, |
| "grad_norm": 29.96516682014439, |
| "kl_loss_13": 7483.2, |
| "kl_loss_26": 6202.4, |
| "kl_loss_39": 4773.6, |
| "kl_loss_7": 8210.4, |
| "learning_rate": 0.0009629952196931902, |
| "loss": 13547.6, |
| "step": 1320 |
| }, |
| { |
| "ce_loss_13": 5.090298974514008, |
| "ce_loss_26": 4.495900344848633, |
| "ce_loss_39": 3.821765500307083, |
| "ce_loss_52": 1.4328191310167313, |
| "ce_loss_7": 5.431738471984863, |
| "epoch": 0.133, |
| "grad_norm": 26.14827995707597, |
| "kl_loss_13": 7504.0, |
| "kl_loss_26": 6259.2, |
| "kl_loss_39": 4809.6, |
| "kl_loss_7": 8225.6, |
| "learning_rate": 0.0009623938497462645, |
| "loss": 13496.2, |
| "step": 1330 |
| }, |
| { |
| "ce_loss_13": 5.099011301994324, |
| "ce_loss_26": 4.4848466455936435, |
| "ce_loss_39": 3.794223016500473, |
| "ce_loss_52": 1.416146419942379, |
| "ce_loss_7": 5.462341606616974, |
| "epoch": 0.134, |
| "grad_norm": 24.84289392950202, |
| "kl_loss_13": 7542.4, |
| "kl_loss_26": 6259.2, |
| "kl_loss_39": 4785.2, |
| "kl_loss_7": 8304.8, |
| "learning_rate": 0.0009617878234984055, |
| "loss": 13395.2, |
| "step": 1340 |
| }, |
| { |
| "ce_loss_13": 5.097129952907562, |
| "ce_loss_26": 4.498237466812133, |
| "ce_loss_39": 3.8250812292099, |
| "ce_loss_52": 1.4416721731424331, |
| "ce_loss_7": 5.444403338432312, |
| "epoch": 0.135, |
| "grad_norm": 26.607564330476333, |
| "kl_loss_13": 7480.8, |
| "kl_loss_26": 6224.8, |
| "kl_loss_39": 4782.4, |
| "kl_loss_7": 8202.4, |
| "learning_rate": 0.0009611771470522907, |
| "loss": 13240.2, |
| "step": 1350 |
| }, |
| { |
| "ce_loss_13": 5.043468415737152, |
| "ce_loss_26": 4.443963885307312, |
| "ce_loss_39": 3.7804897725582123, |
| "ce_loss_52": 1.4098651513457299, |
| "ce_loss_7": 5.407905113697052, |
| "epoch": 0.136, |
| "grad_norm": 27.568927473511277, |
| "kl_loss_13": 7464.8, |
| "kl_loss_26": 6209.6, |
| "kl_loss_39": 4773.6, |
| "kl_loss_7": 8221.6, |
| "learning_rate": 0.0009605618265574251, |
| "loss": 13312.2, |
| "step": 1360 |
| }, |
| { |
| "ce_loss_13": 5.1134570121765135, |
| "ce_loss_26": 4.510142356157303, |
| "ce_loss_39": 3.844588041305542, |
| "ce_loss_52": 1.4824964210391045, |
| "ce_loss_7": 5.455760145187378, |
| "epoch": 0.137, |
| "grad_norm": 26.586450808382164, |
| "kl_loss_13": 7431.2, |
| "kl_loss_26": 6176.0, |
| "kl_loss_39": 4748.0, |
| "kl_loss_7": 8151.2, |
| "learning_rate": 0.0009599418682100792, |
| "loss": 13171.6, |
| "step": 1370 |
| }, |
| { |
| "ce_loss_13": 4.993066036701203, |
| "ce_loss_26": 4.390464246273041, |
| "ce_loss_39": 3.7108724772930146, |
| "ce_loss_52": 1.3996128499507905, |
| "ce_loss_7": 5.354221343994141, |
| "epoch": 0.138, |
| "grad_norm": 24.51436324855179, |
| "kl_loss_13": 7388.0, |
| "kl_loss_26": 6120.0, |
| "kl_loss_39": 4660.8, |
| "kl_loss_7": 8145.6, |
| "learning_rate": 0.0009593172782532268, |
| "loss": 13135.2, |
| "step": 1380 |
| }, |
| { |
| "ce_loss_13": 4.9749324202537535, |
| "ce_loss_26": 4.36228443980217, |
| "ce_loss_39": 3.71304127573967, |
| "ce_loss_52": 1.4259393498301507, |
| "ce_loss_7": 5.328954219818115, |
| "epoch": 0.139, |
| "grad_norm": 25.448579155888293, |
| "kl_loss_13": 7284.8, |
| "kl_loss_26": 6005.6, |
| "kl_loss_39": 4599.2, |
| "kl_loss_7": 8029.6, |
| "learning_rate": 0.0009586880629764817, |
| "loss": 13023.4, |
| "step": 1390 |
| }, |
| { |
| "ce_loss_13": 5.021213936805725, |
| "ce_loss_26": 4.392004972696304, |
| "ce_loss_39": 3.695616716146469, |
| "ce_loss_52": 1.3939141556620598, |
| "ce_loss_7": 5.386792302131653, |
| "epoch": 0.14, |
| "grad_norm": 27.169552009752685, |
| "kl_loss_13": 7436.0, |
| "kl_loss_26": 6132.8, |
| "kl_loss_39": 4655.2, |
| "kl_loss_7": 8205.6, |
| "learning_rate": 0.0009580542287160348, |
| "loss": 13043.6, |
| "step": 1400 |
| }, |
| { |
| "ce_loss_13": 5.006197059154511, |
| "ce_loss_26": 4.410893344879151, |
| "ce_loss_39": 3.74367755651474, |
| "ce_loss_52": 1.4519873589277268, |
| "ce_loss_7": 5.36526129245758, |
| "epoch": 0.141, |
| "grad_norm": 24.865151038825246, |
| "kl_loss_13": 7283.2, |
| "kl_loss_26": 6027.2, |
| "kl_loss_39": 4615.2, |
| "kl_loss_7": 8029.6, |
| "learning_rate": 0.0009574157818545901, |
| "loss": 12913.8, |
| "step": 1410 |
| }, |
| { |
| "ce_loss_13": 4.958354568481445, |
| "ce_loss_26": 4.367397904396057, |
| "ce_loss_39": 3.7062928318977355, |
| "ce_loss_52": 1.4099174112081527, |
| "ce_loss_7": 5.317552924156189, |
| "epoch": 0.142, |
| "grad_norm": 24.898155460709848, |
| "kl_loss_13": 7277.6, |
| "kl_loss_26": 6040.8, |
| "kl_loss_39": 4623.6, |
| "kl_loss_7": 8032.8, |
| "learning_rate": 0.0009567727288213005, |
| "loss": 12929.6, |
| "step": 1420 |
| }, |
| { |
| "ce_loss_13": 4.984454607963562, |
| "ce_loss_26": 4.392505377531052, |
| "ce_loss_39": 3.7512011885643006, |
| "ce_loss_52": 1.473440769314766, |
| "ce_loss_7": 5.340136766433716, |
| "epoch": 0.143, |
| "grad_norm": 24.34585690638739, |
| "kl_loss_13": 7221.6, |
| "kl_loss_26": 5972.8, |
| "kl_loss_39": 4602.0, |
| "kl_loss_7": 7973.6, |
| "learning_rate": 0.0009561250760917027, |
| "loss": 12830.2, |
| "step": 1430 |
| }, |
| { |
| "ce_loss_13": 4.917237496376037, |
| "ce_loss_26": 4.313831263780594, |
| "ce_loss_39": 3.6587266325950623, |
| "ce_loss_52": 1.4092496067285538, |
| "ce_loss_7": 5.2786689639091495, |
| "epoch": 0.144, |
| "grad_norm": 25.288024521189875, |
| "kl_loss_13": 7198.4, |
| "kl_loss_26": 5931.2, |
| "kl_loss_39": 4527.2, |
| "kl_loss_7": 7960.8, |
| "learning_rate": 0.0009554728301876525, |
| "loss": 12688.6, |
| "step": 1440 |
| }, |
| { |
| "ce_loss_13": 4.95776047706604, |
| "ce_loss_26": 4.340852671861649, |
| "ce_loss_39": 3.657728981971741, |
| "ce_loss_52": 1.4168317198753357, |
| "ce_loss_7": 5.322667574882507, |
| "epoch": 0.145, |
| "grad_norm": 26.641005752286592, |
| "kl_loss_13": 7228.8, |
| "kl_loss_26": 5940.0, |
| "kl_loss_39": 4485.2, |
| "kl_loss_7": 8003.2, |
| "learning_rate": 0.0009548159976772592, |
| "loss": 12683.8, |
| "step": 1450 |
| }, |
| { |
| "ce_loss_13": 4.831417870521546, |
| "ce_loss_26": 4.231567287445069, |
| "ce_loss_39": 3.581255227327347, |
| "ce_loss_52": 1.4485478460788728, |
| "ce_loss_7": 5.20115841627121, |
| "epoch": 0.146, |
| "grad_norm": 24.920691081516484, |
| "kl_loss_13": 6952.0, |
| "kl_loss_26": 5699.2, |
| "kl_loss_39": 4308.4, |
| "kl_loss_7": 7733.6, |
| "learning_rate": 0.0009541545851748186, |
| "loss": 12574.8, |
| "step": 1460 |
| }, |
| { |
| "ce_loss_13": 4.8803037166595455, |
| "ce_loss_26": 4.2741272211074826, |
| "ce_loss_39": 3.598990321159363, |
| "ce_loss_52": 1.4145199984312058, |
| "ce_loss_7": 5.244284570217133, |
| "epoch": 0.147, |
| "grad_norm": 25.90739775261194, |
| "kl_loss_13": 7076.8, |
| "kl_loss_26": 5806.4, |
| "kl_loss_39": 4372.0, |
| "kl_loss_7": 7841.6, |
| "learning_rate": 0.0009534885993407473, |
| "loss": 12558.0, |
| "step": 1470 |
| }, |
| { |
| "ce_loss_13": 4.854452967643738, |
| "ce_loss_26": 4.251825517416, |
| "ce_loss_39": 3.5948518395423887, |
| "ce_loss_52": 1.428754985332489, |
| "ce_loss_7": 5.219927191734314, |
| "epoch": 0.148, |
| "grad_norm": 24.48718194678669, |
| "kl_loss_13": 7008.8, |
| "kl_loss_26": 5743.2, |
| "kl_loss_39": 4354.4, |
| "kl_loss_7": 7777.6, |
| "learning_rate": 0.0009528180468815154, |
| "loss": 12484.4, |
| "step": 1480 |
| }, |
| { |
| "ce_loss_13": 4.884927380084991, |
| "ce_loss_26": 4.2919243454933165, |
| "ce_loss_39": 3.642289215326309, |
| "ce_loss_52": 1.465419703722, |
| "ce_loss_7": 5.241849565505982, |
| "epoch": 0.149, |
| "grad_norm": 24.903440253335923, |
| "kl_loss_13": 7001.6, |
| "kl_loss_26": 5763.2, |
| "kl_loss_39": 4366.0, |
| "kl_loss_7": 7752.0, |
| "learning_rate": 0.0009521429345495787, |
| "loss": 12486.6, |
| "step": 1490 |
| }, |
| { |
| "ce_loss_13": 4.82739794254303, |
| "ce_loss_26": 4.228940737247467, |
| "ce_loss_39": 3.5767277657985685, |
| "ce_loss_52": 1.4382753789424896, |
| "ce_loss_7": 5.209846138954163, |
| "epoch": 0.15, |
| "grad_norm": 25.291080092187237, |
| "kl_loss_13": 6960.8, |
| "kl_loss_26": 5698.4, |
| "kl_loss_39": 4297.6, |
| "kl_loss_7": 7764.0, |
| "learning_rate": 0.0009514632691433108, |
| "loss": 12420.2, |
| "step": 1500 |
| }, |
| { |
| "ce_loss_13": 4.828064477443695, |
| "ce_loss_26": 4.213042998313904, |
| "ce_loss_39": 3.5323724269866945, |
| "ce_loss_52": 1.3961022228002549, |
| "ce_loss_7": 5.196406292915344, |
| "epoch": 0.151, |
| "grad_norm": 25.466425081780237, |
| "kl_loss_13": 7046.4, |
| "kl_loss_26": 5772.8, |
| "kl_loss_39": 4315.2, |
| "kl_loss_7": 7825.6, |
| "learning_rate": 0.0009507790575069346, |
| "loss": 12387.6, |
| "step": 1510 |
| }, |
| { |
| "ce_loss_13": 4.786497128009796, |
| "ce_loss_26": 4.188397663831711, |
| "ce_loss_39": 3.536989223957062, |
| "ce_loss_52": 1.4404057756066322, |
| "ce_loss_7": 5.156642246246338, |
| "epoch": 0.152, |
| "grad_norm": 22.488994996506335, |
| "kl_loss_13": 6872.8, |
| "kl_loss_26": 5620.8, |
| "kl_loss_39": 4225.6, |
| "kl_loss_7": 7650.4, |
| "learning_rate": 0.0009500903065304539, |
| "loss": 12265.4, |
| "step": 1520 |
| }, |
| { |
| "ce_loss_13": 4.79404227733612, |
| "ce_loss_26": 4.1956378519535065, |
| "ce_loss_39": 3.539849889278412, |
| "ce_loss_52": 1.447507870197296, |
| "ce_loss_7": 5.170107614994049, |
| "epoch": 0.153, |
| "grad_norm": 24.979481722705945, |
| "kl_loss_13": 6864.0, |
| "kl_loss_26": 5609.6, |
| "kl_loss_39": 4213.6, |
| "kl_loss_7": 7656.8, |
| "learning_rate": 0.0009493970231495835, |
| "loss": 12182.2, |
| "step": 1530 |
| }, |
| { |
| "ce_loss_13": 4.754118239879608, |
| "ce_loss_26": 4.16424406170845, |
| "ce_loss_39": 3.5151414275169373, |
| "ce_loss_52": 1.423200336098671, |
| "ce_loss_7": 5.132595348358154, |
| "epoch": 0.154, |
| "grad_norm": 24.139218625352445, |
| "kl_loss_13": 6807.2, |
| "kl_loss_26": 5573.6, |
| "kl_loss_39": 4190.4, |
| "kl_loss_7": 7594.4, |
| "learning_rate": 0.0009486992143456792, |
| "loss": 12152.0, |
| "step": 1540 |
| }, |
| { |
| "ce_loss_13": 4.745328724384308, |
| "ce_loss_26": 4.135520172119141, |
| "ce_loss_39": 3.4818262457847595, |
| "ce_loss_52": 1.4286953419446946, |
| "ce_loss_7": 5.114579677581787, |
| "epoch": 0.155, |
| "grad_norm": 24.426109316342576, |
| "kl_loss_13": 6791.2, |
| "kl_loss_26": 5516.8, |
| "kl_loss_39": 4120.0, |
| "kl_loss_7": 7567.2, |
| "learning_rate": 0.0009479968871456679, |
| "loss": 12128.4, |
| "step": 1550 |
| }, |
| { |
| "ce_loss_13": 4.7574557065963745, |
| "ce_loss_26": 4.145674997568131, |
| "ce_loss_39": 3.476013499498367, |
| "ce_loss_52": 1.4235228240489959, |
| "ce_loss_7": 5.133380055427551, |
| "epoch": 0.156, |
| "grad_norm": 25.100926583342837, |
| "kl_loss_13": 6843.2, |
| "kl_loss_26": 5556.0, |
| "kl_loss_39": 4126.0, |
| "kl_loss_7": 7627.2, |
| "learning_rate": 0.0009472900486219768, |
| "loss": 12082.2, |
| "step": 1560 |
| }, |
| { |
| "ce_loss_13": 4.735032224655152, |
| "ce_loss_26": 4.128789341449737, |
| "ce_loss_39": 3.4694815576076508, |
| "ce_loss_52": 1.4237273722887038, |
| "ce_loss_7": 5.11258887052536, |
| "epoch": 0.157, |
| "grad_norm": 25.10370372986473, |
| "kl_loss_13": 6792.0, |
| "kl_loss_26": 5520.0, |
| "kl_loss_39": 4095.6, |
| "kl_loss_7": 7585.6, |
| "learning_rate": 0.000946578705892462, |
| "loss": 11936.2, |
| "step": 1570 |
| }, |
| { |
| "ce_loss_13": 4.741922962665558, |
| "ce_loss_26": 4.132791459560394, |
| "ce_loss_39": 3.482679557800293, |
| "ce_loss_52": 1.4294559836387635, |
| "ce_loss_7": 5.117163801193238, |
| "epoch": 0.158, |
| "grad_norm": 21.844394510796377, |
| "kl_loss_13": 6799.2, |
| "kl_loss_26": 5517.6, |
| "kl_loss_39": 4118.0, |
| "kl_loss_7": 7581.6, |
| "learning_rate": 0.0009458628661203367, |
| "loss": 11944.8, |
| "step": 1580 |
| }, |
| { |
| "ce_loss_13": 4.741668605804444, |
| "ce_loss_26": 4.1376284003257755, |
| "ce_loss_39": 3.478295695781708, |
| "ce_loss_52": 1.415444830060005, |
| "ce_loss_7": 5.117357003688812, |
| "epoch": 0.159, |
| "grad_norm": 25.4671883290825, |
| "kl_loss_13": 6812.0, |
| "kl_loss_26": 5548.0, |
| "kl_loss_39": 4136.8, |
| "kl_loss_7": 7601.6, |
| "learning_rate": 0.0009451425365140996, |
| "loss": 11952.4, |
| "step": 1590 |
| }, |
| { |
| "ce_loss_13": 4.723819291591644, |
| "ce_loss_26": 4.128834217786789, |
| "ce_loss_39": 3.47242848277092, |
| "ce_loss_52": 1.429117676615715, |
| "ce_loss_7": 5.096058523654937, |
| "epoch": 0.16, |
| "grad_norm": 25.14078013617688, |
| "kl_loss_13": 6768.0, |
| "kl_loss_26": 5519.2, |
| "kl_loss_39": 4101.6, |
| "kl_loss_7": 7547.2, |
| "learning_rate": 0.0009444177243274617, |
| "loss": 11862.0, |
| "step": 1600 |
| }, |
| { |
| "ce_loss_13": 4.648782467842102, |
| "ce_loss_26": 4.0394273698329926, |
| "ce_loss_39": 3.377221292257309, |
| "ce_loss_52": 1.4151206001639367, |
| "ce_loss_7": 5.0250336050987245, |
| "epoch": 0.161, |
| "grad_norm": 24.128253336718885, |
| "kl_loss_13": 6640.0, |
| "kl_loss_26": 5364.0, |
| "kl_loss_39": 3953.2, |
| "kl_loss_7": 7436.8, |
| "learning_rate": 0.0009436884368592739, |
| "loss": 11833.0, |
| "step": 1610 |
| }, |
| { |
| "ce_loss_13": 4.695314359664917, |
| "ce_loss_26": 4.099924111366272, |
| "ce_loss_39": 3.466512751579285, |
| "ce_loss_52": 1.4766929775476456, |
| "ce_loss_7": 5.064470827579498, |
| "epoch": 0.162, |
| "grad_norm": 23.68843577414951, |
| "kl_loss_13": 6614.4, |
| "kl_loss_26": 5368.8, |
| "kl_loss_39": 3996.8, |
| "kl_loss_7": 7387.2, |
| "learning_rate": 0.0009429546814534529, |
| "loss": 11713.8, |
| "step": 1620 |
| }, |
| { |
| "ce_loss_13": 4.7040504813194275, |
| "ce_loss_26": 4.104205197095871, |
| "ce_loss_39": 3.4451481282711027, |
| "ce_loss_52": 1.4428326219320298, |
| "ce_loss_7": 5.08098030090332, |
| "epoch": 0.163, |
| "grad_norm": 23.332187460756046, |
| "kl_loss_13": 6673.6, |
| "kl_loss_26": 5408.0, |
| "kl_loss_39": 4000.0, |
| "kl_loss_7": 7468.8, |
| "learning_rate": 0.0009422164654989072, |
| "loss": 11730.0, |
| "step": 1630 |
| }, |
| { |
| "ce_loss_13": 4.6945901870727536, |
| "ce_loss_26": 4.092492777109146, |
| "ce_loss_39": 3.4360527455806733, |
| "ce_loss_52": 1.4436773255467414, |
| "ce_loss_7": 5.079924070835114, |
| "epoch": 0.164, |
| "grad_norm": 25.877563512298988, |
| "kl_loss_13": 6666.4, |
| "kl_loss_26": 5404.8, |
| "kl_loss_39": 4011.6, |
| "kl_loss_7": 7479.2, |
| "learning_rate": 0.0009414737964294635, |
| "loss": 11645.0, |
| "step": 1640 |
| }, |
| { |
| "ce_loss_13": 4.614939618110657, |
| "ce_loss_26": 4.018665736913681, |
| "ce_loss_39": 3.3586190402507783, |
| "ce_loss_52": 1.4472161442041398, |
| "ce_loss_7": 4.990367615222931, |
| "epoch": 0.165, |
| "grad_norm": 24.534381720947415, |
| "kl_loss_13": 6511.2, |
| "kl_loss_26": 5264.0, |
| "kl_loss_39": 3849.6, |
| "kl_loss_7": 7293.6, |
| "learning_rate": 0.000940726681723791, |
| "loss": 11568.6, |
| "step": 1650 |
| }, |
| { |
| "ce_loss_13": 4.539776319265366, |
| "ce_loss_26": 3.943451428413391, |
| "ce_loss_39": 3.281195378303528, |
| "ce_loss_52": 1.4070941284298897, |
| "ce_loss_7": 4.923744630813599, |
| "epoch": 0.166, |
| "grad_norm": 23.51720209782485, |
| "kl_loss_13": 6449.6, |
| "kl_loss_26": 5196.0, |
| "kl_loss_39": 3786.0, |
| "kl_loss_7": 7249.6, |
| "learning_rate": 0.0009399751289053266, |
| "loss": 11569.4, |
| "step": 1660 |
| }, |
| { |
| "ce_loss_13": 4.590777164697647, |
| "ce_loss_26": 3.9918887853622436, |
| "ce_loss_39": 3.328452670574188, |
| "ce_loss_52": 1.4019996047019958, |
| "ce_loss_7": 4.978202056884766, |
| "epoch": 0.167, |
| "grad_norm": 22.82794096581106, |
| "kl_loss_13": 6550.4, |
| "kl_loss_26": 5291.2, |
| "kl_loss_39": 3877.2, |
| "kl_loss_7": 7354.4, |
| "learning_rate": 0.0009392191455421988, |
| "loss": 11557.4, |
| "step": 1670 |
| }, |
| { |
| "ce_loss_13": 4.534084904193878, |
| "ce_loss_26": 3.9383736848831177, |
| "ce_loss_39": 3.290838527679443, |
| "ce_loss_52": 1.3803422033786774, |
| "ce_loss_7": 4.9193053364753725, |
| "epoch": 0.168, |
| "grad_norm": 22.01316358613574, |
| "kl_loss_13": 6469.6, |
| "kl_loss_26": 5224.0, |
| "kl_loss_39": 3837.2, |
| "kl_loss_7": 7273.6, |
| "learning_rate": 0.0009384587392471515, |
| "loss": 11454.2, |
| "step": 1680 |
| }, |
| { |
| "ce_loss_13": 4.5477269172668455, |
| "ce_loss_26": 3.9558385491371153, |
| "ce_loss_39": 3.3075734674930573, |
| "ce_loss_52": 1.410713329911232, |
| "ce_loss_7": 4.9310637474060055, |
| "epoch": 0.169, |
| "grad_norm": 24.025001534080104, |
| "kl_loss_13": 6453.6, |
| "kl_loss_26": 5223.2, |
| "kl_loss_39": 3830.0, |
| "kl_loss_7": 7244.0, |
| "learning_rate": 0.0009376939176774678, |
| "loss": 11355.2, |
| "step": 1690 |
| }, |
| { |
| "ce_loss_13": 4.580456328392029, |
| "ce_loss_26": 3.996914601325989, |
| "ce_loss_39": 3.3568237483501435, |
| "ce_loss_52": 1.4514233976602555, |
| "ce_loss_7": 4.956906342506409, |
| "epoch": 0.17, |
| "grad_norm": 24.061048820242437, |
| "kl_loss_13": 6424.8, |
| "kl_loss_26": 5199.2, |
| "kl_loss_39": 3822.0, |
| "kl_loss_7": 7210.4, |
| "learning_rate": 0.0009369246885348925, |
| "loss": 11365.4, |
| "step": 1700 |
| }, |
| { |
| "ce_loss_13": 4.5829225301742555, |
| "ce_loss_26": 3.973430114984512, |
| "ce_loss_39": 3.3136274456977843, |
| "ce_loss_52": 1.4179346442222596, |
| "ce_loss_7": 4.9602553129196165, |
| "epoch": 0.171, |
| "grad_norm": 21.925882863353518, |
| "kl_loss_13": 6508.0, |
| "kl_loss_26": 5225.6, |
| "kl_loss_39": 3821.2, |
| "kl_loss_7": 7300.0, |
| "learning_rate": 0.0009361510595655545, |
| "loss": 11427.8, |
| "step": 1710 |
| }, |
| { |
| "ce_loss_13": 4.597618329524994, |
| "ce_loss_26": 4.014382421970367, |
| "ce_loss_39": 3.3793311297893522, |
| "ce_loss_52": 1.4502436846494675, |
| "ce_loss_7": 4.970238649845124, |
| "epoch": 0.172, |
| "grad_norm": 21.861723684559113, |
| "kl_loss_13": 6463.2, |
| "kl_loss_26": 5241.6, |
| "kl_loss_39": 3880.8, |
| "kl_loss_7": 7242.4, |
| "learning_rate": 0.0009353730385598887, |
| "loss": 11300.4, |
| "step": 1720 |
| }, |
| { |
| "ce_loss_13": 4.474293851852417, |
| "ce_loss_26": 3.8755543529987335, |
| "ce_loss_39": 3.212596780061722, |
| "ce_loss_52": 1.4004584550857544, |
| "ce_loss_7": 4.856262743473053, |
| "epoch": 0.173, |
| "grad_norm": 23.168666460490822, |
| "kl_loss_13": 6318.4, |
| "kl_loss_26": 5065.6, |
| "kl_loss_39": 3658.4, |
| "kl_loss_7": 7116.8, |
| "learning_rate": 0.0009345906333525581, |
| "loss": 11205.0, |
| "step": 1730 |
| }, |
| { |
| "ce_loss_13": 4.5212029337883, |
| "ce_loss_26": 3.9314939856529234, |
| "ce_loss_39": 3.301447206735611, |
| "ce_loss_52": 1.422508242726326, |
| "ce_loss_7": 4.894874656200409, |
| "epoch": 0.174, |
| "grad_norm": 25.870791070867757, |
| "kl_loss_13": 6358.4, |
| "kl_loss_26": 5133.6, |
| "kl_loss_39": 3775.2, |
| "kl_loss_7": 7142.4, |
| "learning_rate": 0.0009338038518223745, |
| "loss": 11159.2, |
| "step": 1740 |
| }, |
| { |
| "ce_loss_13": 4.551153075695038, |
| "ce_loss_26": 3.96026993393898, |
| "ce_loss_39": 3.326349085569382, |
| "ce_loss_52": 1.4542756617069243, |
| "ce_loss_7": 4.919975602626801, |
| "epoch": 0.175, |
| "grad_norm": 23.828468964880035, |
| "kl_loss_13": 6352.8, |
| "kl_loss_26": 5109.6, |
| "kl_loss_39": 3757.6, |
| "kl_loss_7": 7129.6, |
| "learning_rate": 0.0009330127018922195, |
| "loss": 11089.0, |
| "step": 1750 |
| }, |
| { |
| "ce_loss_13": 4.469128930568695, |
| "ce_loss_26": 3.8787964940071107, |
| "ce_loss_39": 3.2338991940021513, |
| "ce_loss_52": 1.4316335827112199, |
| "ce_loss_7": 4.848294925689697, |
| "epoch": 0.176, |
| "grad_norm": 24.772424094235244, |
| "kl_loss_13": 6252.8, |
| "kl_loss_26": 5015.2, |
| "kl_loss_39": 3643.2, |
| "kl_loss_7": 7041.6, |
| "learning_rate": 0.0009322171915289634, |
| "loss": 11050.6, |
| "step": 1760 |
| }, |
| { |
| "ce_loss_13": 4.515468680858612, |
| "ce_loss_26": 3.9264565110206604, |
| "ce_loss_39": 3.2920862257480623, |
| "ce_loss_52": 1.46503643989563, |
| "ce_loss_7": 4.88274484872818, |
| "epoch": 0.177, |
| "grad_norm": 24.580027558725412, |
| "kl_loss_13": 6243.2, |
| "kl_loss_26": 5015.2, |
| "kl_loss_39": 3668.0, |
| "kl_loss_7": 7024.0, |
| "learning_rate": 0.0009314173287433873, |
| "loss": 11083.0, |
| "step": 1770 |
| }, |
| { |
| "ce_loss_13": 4.563484919071198, |
| "ce_loss_26": 3.988471633195877, |
| "ce_loss_39": 3.3576016187667848, |
| "ce_loss_52": 1.4738382428884507, |
| "ce_loss_7": 4.929107296466827, |
| "epoch": 0.178, |
| "grad_norm": 23.727065102019264, |
| "kl_loss_13": 6340.0, |
| "kl_loss_26": 5134.4, |
| "kl_loss_39": 3774.0, |
| "kl_loss_7": 7108.0, |
| "learning_rate": 0.0009306131215901003, |
| "loss": 11053.2, |
| "step": 1780 |
| }, |
| { |
| "ce_loss_13": 4.485390210151673, |
| "ce_loss_26": 3.9024369359016418, |
| "ce_loss_39": 3.277720022201538, |
| "ce_loss_52": 1.4684919208288192, |
| "ce_loss_7": 4.849484694004059, |
| "epoch": 0.179, |
| "grad_norm": 24.140381804707665, |
| "kl_loss_13": 6222.4, |
| "kl_loss_26": 4996.0, |
| "kl_loss_39": 3639.6, |
| "kl_loss_7": 6991.2, |
| "learning_rate": 0.0009298045781674596, |
| "loss": 10948.8, |
| "step": 1790 |
| }, |
| { |
| "ce_loss_13": 4.485648030042649, |
| "ce_loss_26": 3.8959447860717775, |
| "ce_loss_39": 3.255040627717972, |
| "ce_loss_52": 1.41890487074852, |
| "ce_loss_7": 4.864380013942719, |
| "epoch": 0.18, |
| "grad_norm": 25.753548379396687, |
| "kl_loss_13": 6269.6, |
| "kl_loss_26": 5029.6, |
| "kl_loss_39": 3653.2, |
| "kl_loss_7": 7068.8, |
| "learning_rate": 0.0009289917066174886, |
| "loss": 10940.4, |
| "step": 1800 |
| }, |
| { |
| "ce_loss_13": 4.4491588294506075, |
| "ce_loss_26": 3.862889313697815, |
| "ce_loss_39": 3.203300213813782, |
| "ce_loss_52": 1.4129745751619338, |
| "ce_loss_7": 4.8373774766921995, |
| "epoch": 0.181, |
| "grad_norm": 23.580007870242206, |
| "kl_loss_13": 6251.2, |
| "kl_loss_26": 5015.2, |
| "kl_loss_39": 3609.2, |
| "kl_loss_7": 7063.2, |
| "learning_rate": 0.0009281745151257945, |
| "loss": 10831.6, |
| "step": 1810 |
| }, |
| { |
| "ce_loss_13": 4.4796471238136295, |
| "ce_loss_26": 3.9034676015377046, |
| "ce_loss_39": 3.27801650762558, |
| "ce_loss_52": 1.470898449420929, |
| "ce_loss_7": 4.846702206134796, |
| "epoch": 0.182, |
| "grad_norm": 21.825066910706077, |
| "kl_loss_13": 6129.6, |
| "kl_loss_26": 4921.6, |
| "kl_loss_39": 3590.4, |
| "kl_loss_7": 6907.2, |
| "learning_rate": 0.0009273530119214868, |
| "loss": 10852.6, |
| "step": 1820 |
| }, |
| { |
| "ce_loss_13": 4.397759801149368, |
| "ce_loss_26": 3.809650295972824, |
| "ce_loss_39": 3.163052296638489, |
| "ce_loss_52": 1.4123397037386893, |
| "ce_loss_7": 4.76624493598938, |
| "epoch": 0.183, |
| "grad_norm": 23.028395579089935, |
| "kl_loss_13": 6109.6, |
| "kl_loss_26": 4884.8, |
| "kl_loss_39": 3520.0, |
| "kl_loss_7": 6886.4, |
| "learning_rate": 0.0009265272052770935, |
| "loss": 10776.6, |
| "step": 1830 |
| }, |
| { |
| "ce_loss_13": 4.409473043680191, |
| "ce_loss_26": 3.825248968601227, |
| "ce_loss_39": 3.174910306930542, |
| "ce_loss_52": 1.4039017781615257, |
| "ce_loss_7": 4.799298018217087, |
| "epoch": 0.184, |
| "grad_norm": 22.60594476207274, |
| "kl_loss_13": 6165.6, |
| "kl_loss_26": 4934.4, |
| "kl_loss_39": 3543.2, |
| "kl_loss_7": 6977.6, |
| "learning_rate": 0.0009256971035084784, |
| "loss": 10733.4, |
| "step": 1840 |
| }, |
| { |
| "ce_loss_13": 4.3755183041095735, |
| "ce_loss_26": 3.797974693775177, |
| "ce_loss_39": 3.1725789427757265, |
| "ce_loss_52": 1.4232216864824294, |
| "ce_loss_7": 4.739054465293885, |
| "epoch": 0.185, |
| "grad_norm": 23.627865972104136, |
| "kl_loss_13": 6060.0, |
| "kl_loss_26": 4843.2, |
| "kl_loss_39": 3517.2, |
| "kl_loss_7": 6827.2, |
| "learning_rate": 0.0009248627149747573, |
| "loss": 10698.4, |
| "step": 1850 |
| }, |
| { |
| "ce_loss_13": 4.422569459676742, |
| "ce_loss_26": 3.822605752944946, |
| "ce_loss_39": 3.1763491451740267, |
| "ce_loss_52": 1.427757203578949, |
| "ce_loss_7": 4.793670791387558, |
| "epoch": 0.186, |
| "grad_norm": 22.345780165109367, |
| "kl_loss_13": 6140.0, |
| "kl_loss_26": 4902.0, |
| "kl_loss_39": 3525.6, |
| "kl_loss_7": 6920.0, |
| "learning_rate": 0.0009240240480782129, |
| "loss": 10688.6, |
| "step": 1860 |
| }, |
| { |
| "ce_loss_13": 4.390002739429474, |
| "ce_loss_26": 3.8117696583271026, |
| "ce_loss_39": 3.193506735563278, |
| "ce_loss_52": 1.4390262439846992, |
| "ce_loss_7": 4.754710161685944, |
| "epoch": 0.187, |
| "grad_norm": 24.270272909834983, |
| "kl_loss_13": 6056.0, |
| "kl_loss_26": 4841.6, |
| "kl_loss_39": 3523.2, |
| "kl_loss_7": 6828.0, |
| "learning_rate": 0.0009231811112642122, |
| "loss": 10605.8, |
| "step": 1870 |
| }, |
| { |
| "ce_loss_13": 4.347514522075653, |
| "ce_loss_26": 3.774775582551956, |
| "ce_loss_39": 3.1524779438972472, |
| "ce_loss_52": 1.4184574037790298, |
| "ce_loss_7": 4.711814332008362, |
| "epoch": 0.188, |
| "grad_norm": 23.060486415907942, |
| "kl_loss_13": 6006.4, |
| "kl_loss_26": 4801.6, |
| "kl_loss_39": 3474.8, |
| "kl_loss_7": 6776.0, |
| "learning_rate": 0.0009223339130211192, |
| "loss": 10599.8, |
| "step": 1880 |
| }, |
| { |
| "ce_loss_13": 4.280169582366943, |
| "ce_loss_26": 3.6960571646690368, |
| "ce_loss_39": 3.0768611639738084, |
| "ce_loss_52": 1.4011510267853737, |
| "ce_loss_7": 4.650495028495788, |
| "epoch": 0.189, |
| "grad_norm": 23.308893883500843, |
| "kl_loss_13": 5916.0, |
| "kl_loss_26": 4693.6, |
| "kl_loss_39": 3365.6, |
| "kl_loss_7": 6688.8, |
| "learning_rate": 0.0009214824618802108, |
| "loss": 10510.0, |
| "step": 1890 |
| }, |
| { |
| "ce_loss_13": 4.426742446422577, |
| "ce_loss_26": 3.835584044456482, |
| "ce_loss_39": 3.1762202858924864, |
| "ce_loss_52": 1.435165250301361, |
| "ce_loss_7": 4.8001045942306515, |
| "epoch": 0.19, |
| "grad_norm": 24.259267724718942, |
| "kl_loss_13": 6154.4, |
| "kl_loss_26": 4914.4, |
| "kl_loss_39": 3499.2, |
| "kl_loss_7": 6933.6, |
| "learning_rate": 0.0009206267664155906, |
| "loss": 10574.0, |
| "step": 1900 |
| }, |
| { |
| "ce_loss_13": 4.317660903930664, |
| "ce_loss_26": 3.736785036325455, |
| "ce_loss_39": 3.102087676525116, |
| "ce_loss_52": 1.4297346964478492, |
| "ce_loss_7": 4.69397531747818, |
| "epoch": 0.191, |
| "grad_norm": 23.3562329011761, |
| "kl_loss_13": 5937.6, |
| "kl_loss_26": 4723.2, |
| "kl_loss_39": 3373.2, |
| "kl_loss_7": 6731.2, |
| "learning_rate": 0.0009197668352441024, |
| "loss": 10503.4, |
| "step": 1910 |
| }, |
| { |
| "ce_loss_13": 4.334453409910202, |
| "ce_loss_26": 3.7587957322597503, |
| "ce_loss_39": 3.123855656385422, |
| "ce_loss_52": 1.4094826728105545, |
| "ce_loss_7": 4.709676373004913, |
| "epoch": 0.192, |
| "grad_norm": 24.22470876078119, |
| "kl_loss_13": 5996.8, |
| "kl_loss_26": 4786.4, |
| "kl_loss_39": 3430.0, |
| "kl_loss_7": 6772.8, |
| "learning_rate": 0.0009189026770252437, |
| "loss": 10471.0, |
| "step": 1920 |
| }, |
| { |
| "ce_loss_13": 4.351706159114838, |
| "ce_loss_26": 3.7773966193199158, |
| "ce_loss_39": 3.1497732281684874, |
| "ce_loss_52": 1.4320787012577056, |
| "ce_loss_7": 4.7191231608390805, |
| "epoch": 0.193, |
| "grad_norm": 23.447904782586527, |
| "kl_loss_13": 5997.6, |
| "kl_loss_26": 4794.4, |
| "kl_loss_39": 3448.4, |
| "kl_loss_7": 6763.2, |
| "learning_rate": 0.000918034300461078, |
| "loss": 10433.4, |
| "step": 1930 |
| }, |
| { |
| "ce_loss_13": 4.307104933261871, |
| "ce_loss_26": 3.7206166088581085, |
| "ce_loss_39": 3.091316682100296, |
| "ce_loss_52": 1.4110687702894211, |
| "ce_loss_7": 4.676908355951309, |
| "epoch": 0.194, |
| "grad_norm": 23.93372642527522, |
| "kl_loss_13": 5951.2, |
| "kl_loss_26": 4727.6, |
| "kl_loss_39": 3376.0, |
| "kl_loss_7": 6721.6, |
| "learning_rate": 0.0009171617142961477, |
| "loss": 10442.2, |
| "step": 1940 |
| }, |
| { |
| "ce_loss_13": 4.3363093614578245, |
| "ce_loss_26": 3.750982850790024, |
| "ce_loss_39": 3.111935979127884, |
| "ce_loss_52": 1.431942057609558, |
| "ce_loss_7": 4.707539451122284, |
| "epoch": 0.195, |
| "grad_norm": 23.910939036749266, |
| "kl_loss_13": 5967.2, |
| "kl_loss_26": 4743.2, |
| "kl_loss_39": 3382.8, |
| "kl_loss_7": 6752.0, |
| "learning_rate": 0.0009162849273173857, |
| "loss": 10366.8, |
| "step": 1950 |
| }, |
| { |
| "ce_loss_13": 4.271794074773789, |
| "ce_loss_26": 3.7012794077396394, |
| "ce_loss_39": 3.0882445216178893, |
| "ce_loss_52": 1.4407746940851212, |
| "ce_loss_7": 4.636665797233581, |
| "epoch": 0.196, |
| "grad_norm": 23.30649566244444, |
| "kl_loss_13": 5844.8, |
| "kl_loss_26": 4652.8, |
| "kl_loss_39": 3337.2, |
| "kl_loss_7": 6608.8, |
| "learning_rate": 0.0009154039483540273, |
| "loss": 10313.0, |
| "step": 1960 |
| }, |
| { |
| "ce_loss_13": 4.3892871856689455, |
| "ce_loss_26": 3.8117631673812866, |
| "ce_loss_39": 3.1672019243240355, |
| "ce_loss_52": 1.4633917301893233, |
| "ce_loss_7": 4.75022611618042, |
| "epoch": 0.197, |
| "grad_norm": 22.823988656575857, |
| "kl_loss_13": 5992.0, |
| "kl_loss_26": 4784.0, |
| "kl_loss_39": 3421.6, |
| "kl_loss_7": 6749.6, |
| "learning_rate": 0.0009145187862775209, |
| "loss": 10294.2, |
| "step": 1970 |
| }, |
| { |
| "ce_loss_13": 4.251708203554154, |
| "ce_loss_26": 3.68521209359169, |
| "ce_loss_39": 3.0638678431510926, |
| "ce_loss_52": 1.4189983233809471, |
| "ce_loss_7": 4.615437304973602, |
| "epoch": 0.198, |
| "grad_norm": 22.115400900183356, |
| "kl_loss_13": 5814.4, |
| "kl_loss_26": 4623.2, |
| "kl_loss_39": 3291.2, |
| "kl_loss_7": 6581.6, |
| "learning_rate": 0.0009136294500014386, |
| "loss": 10194.8, |
| "step": 1980 |
| }, |
| { |
| "ce_loss_13": 4.36306391954422, |
| "ce_loss_26": 3.7900101482868194, |
| "ce_loss_39": 3.1458350718021393, |
| "ce_loss_52": 1.431062677502632, |
| "ce_loss_7": 4.733654403686524, |
| "epoch": 0.199, |
| "grad_norm": 21.64648055888152, |
| "kl_loss_13": 6001.6, |
| "kl_loss_26": 4801.6, |
| "kl_loss_39": 3444.4, |
| "kl_loss_7": 6778.4, |
| "learning_rate": 0.000912735948481387, |
| "loss": 10217.4, |
| "step": 1990 |
| }, |
| { |
| "ce_loss_13": 4.2783638596534725, |
| "ce_loss_26": 3.7015498995780947, |
| "ce_loss_39": 3.079295587539673, |
| "ce_loss_52": 1.4367393761873246, |
| "ce_loss_7": 4.643443429470063, |
| "epoch": 0.2, |
| "grad_norm": 22.667053535414237, |
| "kl_loss_13": 5844.8, |
| "kl_loss_26": 4641.2, |
| "kl_loss_39": 3314.8, |
| "kl_loss_7": 6607.2, |
| "learning_rate": 0.0009118382907149164, |
| "loss": 10108.9, |
| "step": 2000 |
| }, |
| { |
| "ce_loss_13": 4.298079961538315, |
| "ce_loss_26": 3.7112639427185057, |
| "ce_loss_39": 3.0900191485881807, |
| "ce_loss_52": 1.4447624236345291, |
| "ce_loss_7": 4.659080803394318, |
| "epoch": 0.201, |
| "grad_norm": 21.421967222285037, |
| "kl_loss_13": 5860.8, |
| "kl_loss_26": 4647.6, |
| "kl_loss_39": 3308.4, |
| "kl_loss_7": 6620.8, |
| "learning_rate": 0.0009109364857414306, |
| "loss": 10210.1, |
| "step": 2010 |
| }, |
| { |
| "ce_loss_13": 4.298530715703964, |
| "ce_loss_26": 3.7281334936618804, |
| "ce_loss_39": 3.103990191221237, |
| "ce_loss_52": 1.445554968714714, |
| "ce_loss_7": 4.667031800746917, |
| "epoch": 0.202, |
| "grad_norm": 22.186513808055555, |
| "kl_loss_13": 5863.2, |
| "kl_loss_26": 4661.6, |
| "kl_loss_39": 3329.2, |
| "kl_loss_7": 6627.2, |
| "learning_rate": 0.0009100305426420956, |
| "loss": 10090.6, |
| "step": 2020 |
| }, |
| { |
| "ce_loss_13": 4.2117482125759125, |
| "ce_loss_26": 3.6511631190776823, |
| "ce_loss_39": 3.0435081899166105, |
| "ce_loss_52": 1.4112246841192246, |
| "ce_loss_7": 4.573570990562439, |
| "epoch": 0.203, |
| "grad_norm": 23.22208055275699, |
| "kl_loss_13": 5758.4, |
| "kl_loss_26": 4581.6, |
| "kl_loss_39": 3281.6, |
| "kl_loss_7": 6522.4, |
| "learning_rate": 0.0009091204705397484, |
| "loss": 10094.4, |
| "step": 2030 |
| }, |
| { |
| "ce_loss_13": 4.2797119140625, |
| "ce_loss_26": 3.7096898019313813, |
| "ce_loss_39": 3.0815310001373293, |
| "ce_loss_52": 1.4514381274580956, |
| "ce_loss_7": 4.634703290462494, |
| "epoch": 0.204, |
| "grad_norm": 22.77691157290275, |
| "kl_loss_13": 5768.8, |
| "kl_loss_26": 4568.8, |
| "kl_loss_39": 3229.6, |
| "kl_loss_7": 6520.8, |
| "learning_rate": 0.0009082062785988049, |
| "loss": 10052.8, |
| "step": 2040 |
| }, |
| { |
| "ce_loss_13": 4.228492313623429, |
| "ce_loss_26": 3.649516838788986, |
| "ce_loss_39": 3.017675918340683, |
| "ce_loss_52": 1.4015851855278014, |
| "ce_loss_7": 4.5945727050304415, |
| "epoch": 0.205, |
| "grad_norm": 24.043992652900016, |
| "kl_loss_13": 5788.0, |
| "kl_loss_26": 4580.8, |
| "kl_loss_39": 3234.0, |
| "kl_loss_7": 6555.2, |
| "learning_rate": 0.0009072879760251679, |
| "loss": 10047.6, |
| "step": 2050 |
| }, |
| { |
| "ce_loss_13": 4.153064209222793, |
| "ce_loss_26": 3.5778062403202058, |
| "ce_loss_39": 2.964171904325485, |
| "ce_loss_52": 1.4066181004047393, |
| "ce_loss_7": 4.5237502455711365, |
| "epoch": 0.206, |
| "grad_norm": 23.14612831170837, |
| "kl_loss_13": 5666.4, |
| "kl_loss_26": 4462.4, |
| "kl_loss_39": 3156.0, |
| "kl_loss_7": 6428.0, |
| "learning_rate": 0.0009063655720661341, |
| "loss": 10022.0, |
| "step": 2060 |
| }, |
| { |
| "ce_loss_13": 4.162076050043106, |
| "ce_loss_26": 3.5850139617919923, |
| "ce_loss_39": 2.9763170003890993, |
| "ce_loss_52": 1.4114871382713319, |
| "ce_loss_7": 4.525000536441803, |
| "epoch": 0.207, |
| "grad_norm": 23.335931334507656, |
| "kl_loss_13": 5684.8, |
| "kl_loss_26": 4486.4, |
| "kl_loss_39": 3172.4, |
| "kl_loss_7": 6440.8, |
| "learning_rate": 0.000905439076010301, |
| "loss": 9910.8, |
| "step": 2070 |
| }, |
| { |
| "ce_loss_13": 4.203662091493607, |
| "ce_loss_26": 3.6453104853630065, |
| "ce_loss_39": 3.031943756341934, |
| "ce_loss_52": 1.4498305425047875, |
| "ce_loss_7": 4.567969477176666, |
| "epoch": 0.208, |
| "grad_norm": 22.17297694250979, |
| "kl_loss_13": 5653.6, |
| "kl_loss_26": 4474.4, |
| "kl_loss_39": 3178.4, |
| "kl_loss_7": 6404.0, |
| "learning_rate": 0.0009045084971874737, |
| "loss": 9890.1, |
| "step": 2080 |
| }, |
| { |
| "ce_loss_13": 4.1253215074539185, |
| "ce_loss_26": 3.5510447442531587, |
| "ce_loss_39": 2.9255994498729705, |
| "ce_loss_52": 1.383307683467865, |
| "ce_loss_7": 4.503264659643174, |
| "epoch": 0.209, |
| "grad_norm": 21.83866221628796, |
| "kl_loss_13": 5675.2, |
| "kl_loss_26": 4462.8, |
| "kl_loss_39": 3118.0, |
| "kl_loss_7": 6452.0, |
| "learning_rate": 0.0009035738449685707, |
| "loss": 9916.2, |
| "step": 2090 |
| }, |
| { |
| "ce_loss_13": 4.232741326093674, |
| "ce_loss_26": 3.6592664182186128, |
| "ce_loss_39": 3.0402495503425597, |
| "ce_loss_52": 1.4619457066059112, |
| "ce_loss_7": 4.595292699337006, |
| "epoch": 0.21, |
| "grad_norm": 23.1172808852491, |
| "kl_loss_13": 5709.6, |
| "kl_loss_26": 4522.8, |
| "kl_loss_39": 3192.0, |
| "kl_loss_7": 6460.8, |
| "learning_rate": 0.0009026351287655293, |
| "loss": 9882.4, |
| "step": 2100 |
| }, |
| { |
| "ce_loss_13": 4.196552646160126, |
| "ce_loss_26": 3.6394916236400605, |
| "ce_loss_39": 3.029403269290924, |
| "ce_loss_52": 1.431785149872303, |
| "ce_loss_7": 4.555036389827729, |
| "epoch": 0.211, |
| "grad_norm": 22.472192746858727, |
| "kl_loss_13": 5678.4, |
| "kl_loss_26": 4513.2, |
| "kl_loss_39": 3201.6, |
| "kl_loss_7": 6423.2, |
| "learning_rate": 0.0009016923580312113, |
| "loss": 9778.0, |
| "step": 2110 |
| }, |
| { |
| "ce_loss_13": 4.267941182851791, |
| "ce_loss_26": 3.6975386798381806, |
| "ce_loss_39": 3.0839960873126984, |
| "ce_loss_52": 1.4794423222541808, |
| "ce_loss_7": 4.60888249874115, |
| "epoch": 0.212, |
| "grad_norm": 23.91558691594894, |
| "kl_loss_13": 5697.6, |
| "kl_loss_26": 4502.8, |
| "kl_loss_39": 3191.2, |
| "kl_loss_7": 6425.6, |
| "learning_rate": 0.0009007455422593077, |
| "loss": 9764.0, |
| "step": 2120 |
| }, |
| { |
| "ce_loss_13": 4.170402336120605, |
| "ce_loss_26": 3.5973034620285036, |
| "ce_loss_39": 2.9883872270584106, |
| "ce_loss_52": 1.4295871376991272, |
| "ce_loss_7": 4.519614219665527, |
| "epoch": 0.213, |
| "grad_norm": 22.812352077428177, |
| "kl_loss_13": 5644.0, |
| "kl_loss_26": 4456.4, |
| "kl_loss_39": 3137.2, |
| "kl_loss_7": 6387.2, |
| "learning_rate": 0.0008997946909842425, |
| "loss": 9755.6, |
| "step": 2130 |
| }, |
| { |
| "ce_loss_13": 4.10284715294838, |
| "ce_loss_26": 3.5395869314670563, |
| "ce_loss_39": 2.9268704533576964, |
| "ce_loss_52": 1.4086133271455765, |
| "ce_loss_7": 4.462130695581436, |
| "epoch": 0.214, |
| "grad_norm": 22.23702862817867, |
| "kl_loss_13": 5524.8, |
| "kl_loss_26": 4340.4, |
| "kl_loss_39": 3036.0, |
| "kl_loss_7": 6288.8, |
| "learning_rate": 0.0008988398137810777, |
| "loss": 9645.0, |
| "step": 2140 |
| }, |
| { |
| "ce_loss_13": 4.079320967197418, |
| "ce_loss_26": 3.496495473384857, |
| "ce_loss_39": 2.8816928565502167, |
| "ce_loss_52": 1.3819068521261215, |
| "ce_loss_7": 4.434896755218506, |
| "epoch": 0.215, |
| "grad_norm": 22.79915028059786, |
| "kl_loss_13": 5541.6, |
| "kl_loss_26": 4350.0, |
| "kl_loss_39": 3045.2, |
| "kl_loss_7": 6288.8, |
| "learning_rate": 0.0008978809202654162, |
| "loss": 9686.6, |
| "step": 2150 |
| }, |
| { |
| "ce_loss_13": 4.069333535432816, |
| "ce_loss_26": 3.5059276044368746, |
| "ce_loss_39": 2.8949272632598877, |
| "ce_loss_52": 1.409129326045513, |
| "ce_loss_7": 4.434026664495468, |
| "epoch": 0.216, |
| "grad_norm": 22.908702660837623, |
| "kl_loss_13": 5464.0, |
| "kl_loss_26": 4286.8, |
| "kl_loss_39": 2986.8, |
| "kl_loss_7": 6228.0, |
| "learning_rate": 0.0008969180200933046, |
| "loss": 9665.2, |
| "step": 2160 |
| }, |
| { |
| "ce_loss_13": 4.164896643161773, |
| "ce_loss_26": 3.594840294122696, |
| "ce_loss_39": 2.9824715733528135, |
| "ce_loss_52": 1.4398185968399049, |
| "ce_loss_7": 4.5187140583992, |
| "epoch": 0.217, |
| "grad_norm": 22.2992725858673, |
| "kl_loss_13": 5602.4, |
| "kl_loss_26": 4409.6, |
| "kl_loss_39": 3101.6, |
| "kl_loss_7": 6338.4, |
| "learning_rate": 0.0008959511229611376, |
| "loss": 9611.1, |
| "step": 2170 |
| }, |
| { |
| "ce_loss_13": 4.1424953758716585, |
| "ce_loss_26": 3.5846896708011626, |
| "ce_loss_39": 2.9776509165763856, |
| "ce_loss_52": 1.4638631641864777, |
| "ce_loss_7": 4.499098914861679, |
| "epoch": 0.218, |
| "grad_norm": 22.569206560566755, |
| "kl_loss_13": 5520.8, |
| "kl_loss_26": 4360.4, |
| "kl_loss_39": 3039.2, |
| "kl_loss_7": 6262.4, |
| "learning_rate": 0.0008949802386055581, |
| "loss": 9598.7, |
| "step": 2180 |
| }, |
| { |
| "ce_loss_13": 4.124321860074997, |
| "ce_loss_26": 3.559172648191452, |
| "ce_loss_39": 2.942674660682678, |
| "ce_loss_52": 1.4182981908321382, |
| "ce_loss_7": 4.483241724967956, |
| "epoch": 0.219, |
| "grad_norm": 22.517460780417444, |
| "kl_loss_13": 5559.2, |
| "kl_loss_26": 4374.8, |
| "kl_loss_39": 3066.4, |
| "kl_loss_7": 6304.0, |
| "learning_rate": 0.0008940053768033609, |
| "loss": 9610.7, |
| "step": 2190 |
| }, |
| { |
| "ce_loss_13": 4.136555308103562, |
| "ce_loss_26": 3.5674175798892973, |
| "ce_loss_39": 2.9441113233566285, |
| "ce_loss_52": 1.4381250411272049, |
| "ce_loss_7": 4.481418180465698, |
| "epoch": 0.22, |
| "grad_norm": 23.1169100672147, |
| "kl_loss_13": 5545.6, |
| "kl_loss_26": 4354.4, |
| "kl_loss_39": 3035.2, |
| "kl_loss_7": 6270.4, |
| "learning_rate": 0.0008930265473713938, |
| "loss": 9621.3, |
| "step": 2200 |
| }, |
| { |
| "ce_loss_13": 4.116154849529266, |
| "ce_loss_26": 3.5370861172676085, |
| "ce_loss_39": 2.9127448469400408, |
| "ce_loss_52": 1.3838467657566071, |
| "ce_loss_7": 4.479850220680237, |
| "epoch": 0.221, |
| "grad_norm": 23.327101471626264, |
| "kl_loss_13": 5613.6, |
| "kl_loss_26": 4407.2, |
| "kl_loss_39": 3084.0, |
| "kl_loss_7": 6376.8, |
| "learning_rate": 0.0008920437601664579, |
| "loss": 9580.3, |
| "step": 2210 |
| }, |
| { |
| "ce_loss_13": 4.081603097915649, |
| "ce_loss_26": 3.533859223127365, |
| "ce_loss_39": 2.9178696632385255, |
| "ce_loss_52": 1.4558427572250365, |
| "ce_loss_7": 4.428021937608719, |
| "epoch": 0.222, |
| "grad_norm": 24.571271492626643, |
| "kl_loss_13": 5416.8, |
| "kl_loss_26": 4265.2, |
| "kl_loss_39": 2949.2, |
| "kl_loss_7": 6150.4, |
| "learning_rate": 0.0008910570250852097, |
| "loss": 9535.0, |
| "step": 2220 |
| }, |
| { |
| "ce_loss_13": 4.016762095689773, |
| "ce_loss_26": 3.453756958246231, |
| "ce_loss_39": 2.8424510210752487, |
| "ce_loss_52": 1.382763533294201, |
| "ce_loss_7": 4.369659447669983, |
| "epoch": 0.223, |
| "grad_norm": 22.39109803193224, |
| "kl_loss_13": 5417.6, |
| "kl_loss_26": 4255.6, |
| "kl_loss_39": 2951.6, |
| "kl_loss_7": 6149.6, |
| "learning_rate": 0.0008900663520640604, |
| "loss": 9449.7, |
| "step": 2230 |
| }, |
| { |
| "ce_loss_13": 4.07697583436966, |
| "ce_loss_26": 3.5205394327640533, |
| "ce_loss_39": 2.9201291859149934, |
| "ce_loss_52": 1.4419079095125198, |
| "ce_loss_7": 4.432818019390107, |
| "epoch": 0.224, |
| "grad_norm": 29.33071925320992, |
| "kl_loss_13": 5431.2, |
| "kl_loss_26": 4260.4, |
| "kl_loss_39": 2980.0, |
| "kl_loss_7": 6176.8, |
| "learning_rate": 0.0008890717510790764, |
| "loss": 9471.4, |
| "step": 2240 |
| }, |
| { |
| "ce_loss_13": 4.099857300519943, |
| "ce_loss_26": 3.5491108179092405, |
| "ce_loss_39": 2.946030503511429, |
| "ce_loss_52": 1.4482155337929725, |
| "ce_loss_7": 4.446840679645538, |
| "epoch": 0.225, |
| "grad_norm": 24.393145108562546, |
| "kl_loss_13": 5448.0, |
| "kl_loss_26": 4290.4, |
| "kl_loss_39": 3004.4, |
| "kl_loss_7": 6176.0, |
| "learning_rate": 0.0008880732321458784, |
| "loss": 9429.4, |
| "step": 2250 |
| }, |
| { |
| "ce_loss_13": 4.008820396661759, |
| "ce_loss_26": 3.466299217939377, |
| "ce_loss_39": 2.8751066744327547, |
| "ce_loss_52": 1.4349601715803146, |
| "ce_loss_7": 4.357817393541336, |
| "epoch": 0.226, |
| "grad_norm": 23.790321486003762, |
| "kl_loss_13": 5306.4, |
| "kl_loss_26": 4167.6, |
| "kl_loss_39": 2894.0, |
| "kl_loss_7": 6034.4, |
| "learning_rate": 0.0008870708053195413, |
| "loss": 9349.3, |
| "step": 2260 |
| }, |
| { |
| "ce_loss_13": 4.0613229155540465, |
| "ce_loss_26": 3.495078670978546, |
| "ce_loss_39": 2.889867639541626, |
| "ce_loss_52": 1.417461496591568, |
| "ce_loss_7": 4.40853306055069, |
| "epoch": 0.227, |
| "grad_norm": 24.394028059861938, |
| "kl_loss_13": 5412.8, |
| "kl_loss_26": 4243.2, |
| "kl_loss_39": 2960.8, |
| "kl_loss_7": 6130.4, |
| "learning_rate": 0.0008860644806944918, |
| "loss": 9352.6, |
| "step": 2270 |
| }, |
| { |
| "ce_loss_13": 4.178533679246902, |
| "ce_loss_26": 3.622497373819351, |
| "ce_loss_39": 3.006651484966278, |
| "ce_loss_52": 1.4494876891374588, |
| "ce_loss_7": 4.527337849140167, |
| "epoch": 0.228, |
| "grad_norm": 22.806373163177923, |
| "kl_loss_13": 5580.0, |
| "kl_loss_26": 4405.2, |
| "kl_loss_39": 3104.0, |
| "kl_loss_7": 6317.6, |
| "learning_rate": 0.0008850542684044079, |
| "loss": 9441.9, |
| "step": 2280 |
| }, |
| { |
| "ce_loss_13": 4.018020331859589, |
| "ce_loss_26": 3.458746635913849, |
| "ce_loss_39": 2.8657322227954865, |
| "ce_loss_52": 1.4288572728633882, |
| "ce_loss_7": 4.375551146268845, |
| "epoch": 0.229, |
| "grad_norm": 22.45015355344987, |
| "kl_loss_13": 5303.2, |
| "kl_loss_26": 4136.0, |
| "kl_loss_39": 2856.8, |
| "kl_loss_7": 6051.2, |
| "learning_rate": 0.0008840401786221159, |
| "loss": 9343.7, |
| "step": 2290 |
| }, |
| { |
| "ce_loss_13": 4.0382424116134645, |
| "ce_loss_26": 3.4842948436737062, |
| "ce_loss_39": 2.894715803861618, |
| "ce_loss_52": 1.4393651276826858, |
| "ce_loss_7": 4.377675461769104, |
| "epoch": 0.23, |
| "grad_norm": 23.31839583792026, |
| "kl_loss_13": 5351.2, |
| "kl_loss_26": 4188.4, |
| "kl_loss_39": 2912.0, |
| "kl_loss_7": 6064.8, |
| "learning_rate": 0.000883022221559489, |
| "loss": 9246.3, |
| "step": 2300 |
| }, |
| { |
| "ce_loss_13": 4.038966596126556, |
| "ce_loss_26": 3.488220602273941, |
| "ce_loss_39": 2.8812575459480287, |
| "ce_loss_52": 1.441010195016861, |
| "ce_loss_7": 4.381568449735641, |
| "epoch": 0.231, |
| "grad_norm": 22.254622557882463, |
| "kl_loss_13": 5335.2, |
| "kl_loss_26": 4171.2, |
| "kl_loss_39": 2887.2, |
| "kl_loss_7": 6052.8, |
| "learning_rate": 0.0008820004074673434, |
| "loss": 9220.3, |
| "step": 2310 |
| }, |
| { |
| "ce_loss_13": 3.9854084312915803, |
| "ce_loss_26": 3.4409989297389982, |
| "ce_loss_39": 2.84497589468956, |
| "ce_loss_52": 1.4122451767325401, |
| "ce_loss_7": 4.340046459436417, |
| "epoch": 0.232, |
| "grad_norm": 21.074813671439337, |
| "kl_loss_13": 5296.8, |
| "kl_loss_26": 4147.6, |
| "kl_loss_39": 2864.8, |
| "kl_loss_7": 6036.8, |
| "learning_rate": 0.0008809747466353355, |
| "loss": 9279.8, |
| "step": 2320 |
| }, |
| { |
| "ce_loss_13": 4.115998637676239, |
| "ce_loss_26": 3.5584963142871855, |
| "ce_loss_39": 2.9395908057689666, |
| "ce_loss_52": 1.465473085641861, |
| "ce_loss_7": 4.470573830604553, |
| "epoch": 0.233, |
| "grad_norm": 22.26955688088229, |
| "kl_loss_13": 5459.2, |
| "kl_loss_26": 4288.0, |
| "kl_loss_39": 2960.4, |
| "kl_loss_7": 6197.6, |
| "learning_rate": 0.0008799452493918585, |
| "loss": 9213.2, |
| "step": 2330 |
| }, |
| { |
| "ce_loss_13": 3.9350290656089784, |
| "ce_loss_26": 3.3855203211307527, |
| "ce_loss_39": 2.799959135055542, |
| "ce_loss_52": 1.4289379581809043, |
| "ce_loss_7": 4.274723726511001, |
| "epoch": 0.234, |
| "grad_norm": 22.04159638849698, |
| "kl_loss_13": 5186.8, |
| "kl_loss_26": 4026.0, |
| "kl_loss_39": 2752.2, |
| "kl_loss_7": 5904.4, |
| "learning_rate": 0.0008789119261039385, |
| "loss": 9222.9, |
| "step": 2340 |
| }, |
| { |
| "ce_loss_13": 3.977653867006302, |
| "ce_loss_26": 3.4353197515010834, |
| "ce_loss_39": 2.8274969339370726, |
| "ce_loss_52": 1.400892499089241, |
| "ce_loss_7": 4.322875905036926, |
| "epoch": 0.235, |
| "grad_norm": 25.32335755706349, |
| "kl_loss_13": 5282.4, |
| "kl_loss_26": 4139.6, |
| "kl_loss_39": 2849.2, |
| "kl_loss_7": 6001.6, |
| "learning_rate": 0.0008778747871771292, |
| "loss": 9101.4, |
| "step": 2350 |
| }, |
| { |
| "ce_loss_13": 3.9699031889438627, |
| "ce_loss_26": 3.4155133664608, |
| "ce_loss_39": 2.8127492308616637, |
| "ce_loss_52": 1.4119284138083459, |
| "ce_loss_7": 4.322866821289063, |
| "epoch": 0.236, |
| "grad_norm": 24.250283920991954, |
| "kl_loss_13": 5259.2, |
| "kl_loss_26": 4092.8, |
| "kl_loss_39": 2816.4, |
| "kl_loss_7": 6000.8, |
| "learning_rate": 0.0008768338430554083, |
| "loss": 9104.0, |
| "step": 2360 |
| }, |
| { |
| "ce_loss_13": 3.928439366817474, |
| "ce_loss_26": 3.382099211215973, |
| "ce_loss_39": 2.789314305782318, |
| "ce_loss_52": 1.3916988223791122, |
| "ce_loss_7": 4.277575564384461, |
| "epoch": 0.237, |
| "grad_norm": 23.978839586298704, |
| "kl_loss_13": 5212.0, |
| "kl_loss_26": 4068.4, |
| "kl_loss_39": 2797.2, |
| "kl_loss_7": 5938.4, |
| "learning_rate": 0.0008757891042210713, |
| "loss": 9141.7, |
| "step": 2370 |
| }, |
| { |
| "ce_loss_13": 3.9462322175502775, |
| "ce_loss_26": 3.397873044013977, |
| "ce_loss_39": 2.8039229214191437, |
| "ce_loss_52": 1.4037385553121566, |
| "ce_loss_7": 4.290230017900467, |
| "epoch": 0.238, |
| "grad_norm": 23.01247346605362, |
| "kl_loss_13": 5215.2, |
| "kl_loss_26": 4079.6, |
| "kl_loss_39": 2813.2, |
| "kl_loss_7": 5942.4, |
| "learning_rate": 0.0008747405811946271, |
| "loss": 9055.8, |
| "step": 2380 |
| }, |
| { |
| "ce_loss_13": 3.98149796128273, |
| "ce_loss_26": 3.442757821083069, |
| "ce_loss_39": 2.84624342918396, |
| "ce_loss_52": 1.445407471060753, |
| "ce_loss_7": 4.318572920560837, |
| "epoch": 0.239, |
| "grad_norm": 22.74122664649998, |
| "kl_loss_13": 5226.4, |
| "kl_loss_26": 4089.6, |
| "kl_loss_39": 2827.6, |
| "kl_loss_7": 5934.4, |
| "learning_rate": 0.0008736882845346905, |
| "loss": 9110.6, |
| "step": 2390 |
| }, |
| { |
| "ce_loss_13": 3.9661067545413973, |
| "ce_loss_26": 3.4294336676597594, |
| "ce_loss_39": 2.836567336320877, |
| "ce_loss_52": 1.442602628469467, |
| "ce_loss_7": 4.3087667465209964, |
| "epoch": 0.24, |
| "grad_norm": 23.333126009298994, |
| "kl_loss_13": 5196.0, |
| "kl_loss_26": 4051.6, |
| "kl_loss_39": 2790.0, |
| "kl_loss_7": 5911.2, |
| "learning_rate": 0.0008726322248378774, |
| "loss": 9064.8, |
| "step": 2400 |
| }, |
| { |
| "ce_loss_13": 3.988937532901764, |
| "ce_loss_26": 3.4361318945884705, |
| "ce_loss_39": 2.833483111858368, |
| "ce_loss_52": 1.4274606987833978, |
| "ce_loss_7": 4.3361672222614285, |
| "epoch": 0.241, |
| "grad_norm": 21.55988300492865, |
| "kl_loss_13": 5244.8, |
| "kl_loss_26": 4091.2, |
| "kl_loss_39": 2809.2, |
| "kl_loss_7": 5974.4, |
| "learning_rate": 0.0008715724127386971, |
| "loss": 9048.5, |
| "step": 2410 |
| }, |
| { |
| "ce_loss_13": 3.93166036605835, |
| "ce_loss_26": 3.3924847066402437, |
| "ce_loss_39": 2.8114346325397492, |
| "ce_loss_52": 1.433423739671707, |
| "ce_loss_7": 4.277747517824173, |
| "epoch": 0.242, |
| "grad_norm": 21.76629675806892, |
| "kl_loss_13": 5152.8, |
| "kl_loss_26": 4024.4, |
| "kl_loss_39": 2775.6, |
| "kl_loss_7": 5876.0, |
| "learning_rate": 0.0008705088589094458, |
| "loss": 8950.6, |
| "step": 2420 |
| }, |
| { |
| "ce_loss_13": 4.0298320889472965, |
| "ce_loss_26": 3.4784019589424133, |
| "ce_loss_39": 2.8909366130828857, |
| "ce_loss_52": 1.4593130856752397, |
| "ce_loss_7": 4.372854852676392, |
| "epoch": 0.243, |
| "grad_norm": 22.782714549711034, |
| "kl_loss_13": 5275.2, |
| "kl_loss_26": 4132.0, |
| "kl_loss_39": 2868.8, |
| "kl_loss_7": 6000.0, |
| "learning_rate": 0.0008694415740600988, |
| "loss": 8979.7, |
| "step": 2430 |
| }, |
| { |
| "ce_loss_13": 3.957322496175766, |
| "ce_loss_26": 3.391196775436401, |
| "ce_loss_39": 2.7942449331283568, |
| "ce_loss_52": 1.429965654015541, |
| "ce_loss_7": 4.3002465009689335, |
| "epoch": 0.244, |
| "grad_norm": 22.108343623695664, |
| "kl_loss_13": 5175.2, |
| "kl_loss_26": 4000.0, |
| "kl_loss_39": 2735.2, |
| "kl_loss_7": 5894.4, |
| "learning_rate": 0.0008683705689382025, |
| "loss": 8983.5, |
| "step": 2440 |
| }, |
| { |
| "ce_loss_13": 3.914830905199051, |
| "ce_loss_26": 3.371804046630859, |
| "ce_loss_39": 2.789295125007629, |
| "ce_loss_52": 1.4458730816841125, |
| "ce_loss_7": 4.244754731655121, |
| "epoch": 0.245, |
| "grad_norm": 22.68476073719735, |
| "kl_loss_13": 5094.4, |
| "kl_loss_26": 3954.8, |
| "kl_loss_39": 2705.0, |
| "kl_loss_7": 5792.8, |
| "learning_rate": 0.0008672958543287666, |
| "loss": 8971.0, |
| "step": 2450 |
| }, |
| { |
| "ce_loss_13": 3.910190373659134, |
| "ce_loss_26": 3.3697587728500364, |
| "ce_loss_39": 2.7743864953517914, |
| "ce_loss_52": 1.4169176414608955, |
| "ce_loss_7": 4.245863050222397, |
| "epoch": 0.246, |
| "grad_norm": 23.78530061144511, |
| "kl_loss_13": 5117.6, |
| "kl_loss_26": 3979.2, |
| "kl_loss_39": 2714.8, |
| "kl_loss_7": 5818.4, |
| "learning_rate": 0.0008662174410541554, |
| "loss": 8871.3, |
| "step": 2460 |
| }, |
| { |
| "ce_loss_13": 3.905332827568054, |
| "ce_loss_26": 3.3677509129047394, |
| "ce_loss_39": 2.785578554868698, |
| "ce_loss_52": 1.4284577563405036, |
| "ce_loss_7": 4.243521982431412, |
| "epoch": 0.247, |
| "grad_norm": 21.62010382710404, |
| "kl_loss_13": 5076.0, |
| "kl_loss_26": 3951.2, |
| "kl_loss_39": 2706.0, |
| "kl_loss_7": 5774.4, |
| "learning_rate": 0.0008651353399739787, |
| "loss": 8827.8, |
| "step": 2470 |
| }, |
| { |
| "ce_loss_13": 3.9418592929840086, |
| "ce_loss_26": 3.399972987174988, |
| "ce_loss_39": 2.7943135529756544, |
| "ce_loss_52": 1.4213671818375588, |
| "ce_loss_7": 4.285894882678986, |
| "epoch": 0.248, |
| "grad_norm": 21.67451956689309, |
| "kl_loss_13": 5164.0, |
| "kl_loss_26": 4026.4, |
| "kl_loss_39": 2746.4, |
| "kl_loss_7": 5886.4, |
| "learning_rate": 0.0008640495619849821, |
| "loss": 8908.6, |
| "step": 2480 |
| }, |
| { |
| "ce_loss_13": 3.9583646595478057, |
| "ce_loss_26": 3.418367612361908, |
| "ce_loss_39": 2.8201009154319765, |
| "ce_loss_52": 1.4697474852204322, |
| "ce_loss_7": 4.3033524513244625, |
| "epoch": 0.249, |
| "grad_norm": 23.94241052015279, |
| "kl_loss_13": 5140.0, |
| "kl_loss_26": 4010.0, |
| "kl_loss_39": 2731.6, |
| "kl_loss_7": 5860.0, |
| "learning_rate": 0.0008629601180209381, |
| "loss": 8796.4, |
| "step": 2490 |
| }, |
| { |
| "ce_loss_13": 3.9134137570858, |
| "ce_loss_26": 3.3699698984622954, |
| "ce_loss_39": 2.781053990125656, |
| "ce_loss_52": 1.4334075331687928, |
| "ce_loss_7": 4.242314898967743, |
| "epoch": 0.25, |
| "grad_norm": 22.621772280297588, |
| "kl_loss_13": 5100.0, |
| "kl_loss_26": 3960.0, |
| "kl_loss_39": 2699.6, |
| "kl_loss_7": 5796.0, |
| "learning_rate": 0.000861867019052535, |
| "loss": 8802.5, |
| "step": 2500 |
| }, |
| { |
| "ce_loss_13": 3.975496470928192, |
| "ce_loss_26": 3.4352354168891908, |
| "ce_loss_39": 2.8324910700321198, |
| "ce_loss_52": 1.469119620323181, |
| "ce_loss_7": 4.319353139400482, |
| "epoch": 0.251, |
| "grad_norm": 24.031546669852546, |
| "kl_loss_13": 5152.4, |
| "kl_loss_26": 4019.6, |
| "kl_loss_39": 2733.2, |
| "kl_loss_7": 5876.8, |
| "learning_rate": 0.0008607702760872678, |
| "loss": 8791.0, |
| "step": 2510 |
| }, |
| { |
| "ce_loss_13": 3.970981556177139, |
| "ce_loss_26": 3.4194670915603638, |
| "ce_loss_39": 2.826087462902069, |
| "ce_loss_52": 1.457669761776924, |
| "ce_loss_7": 4.311935073137283, |
| "epoch": 0.252, |
| "grad_norm": 22.68902245721029, |
| "kl_loss_13": 5173.6, |
| "kl_loss_26": 4025.2, |
| "kl_loss_39": 2753.2, |
| "kl_loss_7": 5881.6, |
| "learning_rate": 0.0008596699001693256, |
| "loss": 8797.8, |
| "step": 2520 |
| }, |
| { |
| "ce_loss_13": 3.9163833260536194, |
| "ce_loss_26": 3.378202974796295, |
| "ce_loss_39": 2.789392131567001, |
| "ce_loss_52": 1.4278187423944473, |
| "ce_loss_7": 4.251722925901413, |
| "epoch": 0.253, |
| "grad_norm": 23.732161584585768, |
| "kl_loss_13": 5073.6, |
| "kl_loss_26": 3944.0, |
| "kl_loss_39": 2692.8, |
| "kl_loss_7": 5776.8, |
| "learning_rate": 0.0008585659023794818, |
| "loss": 8730.9, |
| "step": 2530 |
| }, |
| { |
| "ce_loss_13": 3.8952399492263794, |
| "ce_loss_26": 3.3577997207641603, |
| "ce_loss_39": 2.761990362405777, |
| "ce_loss_52": 1.421005728840828, |
| "ce_loss_7": 4.233315163850785, |
| "epoch": 0.254, |
| "grad_norm": 23.217787871644095, |
| "kl_loss_13": 5080.0, |
| "kl_loss_26": 3941.2, |
| "kl_loss_39": 2674.0, |
| "kl_loss_7": 5788.8, |
| "learning_rate": 0.0008574582938349817, |
| "loss": 8689.0, |
| "step": 2540 |
| }, |
| { |
| "ce_loss_13": 3.9610107481479644, |
| "ce_loss_26": 3.423109310865402, |
| "ce_loss_39": 2.8520358502864838, |
| "ce_loss_52": 1.4856882840394974, |
| "ce_loss_7": 4.297859001159668, |
| "epoch": 0.255, |
| "grad_norm": 24.36417114927956, |
| "kl_loss_13": 5090.4, |
| "kl_loss_26": 3959.2, |
| "kl_loss_39": 2721.2, |
| "kl_loss_7": 5797.6, |
| "learning_rate": 0.0008563470856894315, |
| "loss": 8682.7, |
| "step": 2550 |
| }, |
| { |
| "ce_loss_13": 3.9392871856689453, |
| "ce_loss_26": 3.4012055695056915, |
| "ce_loss_39": 2.808671069145203, |
| "ce_loss_52": 1.472413820028305, |
| "ce_loss_7": 4.270819437503815, |
| "epoch": 0.256, |
| "grad_norm": 22.260198047396518, |
| "kl_loss_13": 5046.4, |
| "kl_loss_26": 3928.4, |
| "kl_loss_39": 2676.0, |
| "kl_loss_7": 5743.2, |
| "learning_rate": 0.0008552322891326845, |
| "loss": 8696.9, |
| "step": 2560 |
| }, |
| { |
| "ce_loss_13": 3.920336198806763, |
| "ce_loss_26": 3.379600703716278, |
| "ce_loss_39": 2.7945084452629088, |
| "ce_loss_52": 1.4492182582616806, |
| "ce_loss_7": 4.2536624610424045, |
| "epoch": 0.257, |
| "grad_norm": 21.726891625639418, |
| "kl_loss_13": 5079.2, |
| "kl_loss_26": 3942.4, |
| "kl_loss_39": 2686.4, |
| "kl_loss_7": 5777.6, |
| "learning_rate": 0.0008541139153907296, |
| "loss": 8637.5, |
| "step": 2570 |
| }, |
| { |
| "ce_loss_13": 3.8855518221855165, |
| "ce_loss_26": 3.3411356985569, |
| "ce_loss_39": 2.7515164047479628, |
| "ce_loss_52": 1.444500783085823, |
| "ce_loss_7": 4.214444124698639, |
| "epoch": 0.258, |
| "grad_norm": 21.147965943731403, |
| "kl_loss_13": 5012.8, |
| "kl_loss_26": 3868.4, |
| "kl_loss_39": 2618.4, |
| "kl_loss_7": 5708.0, |
| "learning_rate": 0.0008529919757255782, |
| "loss": 8639.8, |
| "step": 2580 |
| }, |
| { |
| "ce_loss_13": 3.906326872110367, |
| "ce_loss_26": 3.3759279191493987, |
| "ce_loss_39": 2.794377237558365, |
| "ce_loss_52": 1.4845586121082306, |
| "ce_loss_7": 4.233397454023361, |
| "epoch": 0.259, |
| "grad_norm": 23.06645203237802, |
| "kl_loss_13": 4970.4, |
| "kl_loss_26": 3854.4, |
| "kl_loss_39": 2612.0, |
| "kl_loss_7": 5654.4, |
| "learning_rate": 0.0008518664814351503, |
| "loss": 8576.9, |
| "step": 2590 |
| }, |
| { |
| "ce_loss_13": 3.7816513538360597, |
| "ce_loss_26": 3.2462404370307922, |
| "ce_loss_39": 2.6499598264694213, |
| "ce_loss_52": 1.391408371925354, |
| "ce_loss_7": 4.128740018606186, |
| "epoch": 0.26, |
| "grad_norm": 23.574382647594796, |
| "kl_loss_13": 4927.2, |
| "kl_loss_26": 3814.8, |
| "kl_loss_39": 2554.8, |
| "kl_loss_7": 5640.8, |
| "learning_rate": 0.0008507374438531607, |
| "loss": 8563.9, |
| "step": 2600 |
| }, |
| { |
| "ce_loss_13": 3.9538560569286347, |
| "ce_loss_26": 3.4095658123493195, |
| "ce_loss_39": 2.8113979279994963, |
| "ce_loss_52": 1.4748313665390014, |
| "ce_loss_7": 4.2874442756176, |
| "epoch": 0.261, |
| "grad_norm": 22.486126104346287, |
| "kl_loss_13": 5098.4, |
| "kl_loss_26": 3961.2, |
| "kl_loss_39": 2686.8, |
| "kl_loss_7": 5806.4, |
| "learning_rate": 0.0008496048743490053, |
| "loss": 8565.1, |
| "step": 2610 |
| }, |
| { |
| "ce_loss_13": 3.806992840766907, |
| "ce_loss_26": 3.282588803768158, |
| "ce_loss_39": 2.7138577342033385, |
| "ce_loss_52": 1.4272374346852303, |
| "ce_loss_7": 4.137687039375305, |
| "epoch": 0.262, |
| "grad_norm": 23.33658890766694, |
| "kl_loss_13": 4889.6, |
| "kl_loss_26": 3786.4, |
| "kl_loss_39": 2568.2, |
| "kl_loss_7": 5575.2, |
| "learning_rate": 0.0008484687843276469, |
| "loss": 8535.4, |
| "step": 2620 |
| }, |
| { |
| "ce_loss_13": 3.8657312452793122, |
| "ce_loss_26": 3.3368504345417023, |
| "ce_loss_39": 2.7600394666194914, |
| "ce_loss_52": 1.4648621320724486, |
| "ce_loss_7": 4.185077089071274, |
| "epoch": 0.263, |
| "grad_norm": 21.52255395290807, |
| "kl_loss_13": 4951.2, |
| "kl_loss_26": 3845.2, |
| "kl_loss_39": 2613.2, |
| "kl_loss_7": 5622.4, |
| "learning_rate": 0.0008473291852294987, |
| "loss": 8580.4, |
| "step": 2630 |
| }, |
| { |
| "ce_loss_13": 3.8671354949474335, |
| "ce_loss_26": 3.334774547815323, |
| "ce_loss_39": 2.757487526535988, |
| "ce_loss_52": 1.4462923228740692, |
| "ce_loss_7": 4.198447376489639, |
| "epoch": 0.264, |
| "grad_norm": 22.57622271607983, |
| "kl_loss_13": 4962.4, |
| "kl_loss_26": 3854.4, |
| "kl_loss_39": 2616.4, |
| "kl_loss_7": 5648.8, |
| "learning_rate": 0.0008461860885303114, |
| "loss": 8492.7, |
| "step": 2640 |
| }, |
| { |
| "ce_loss_13": 3.875018262863159, |
| "ce_loss_26": 3.3343500018119814, |
| "ce_loss_39": 2.731833589076996, |
| "ce_loss_52": 1.4120988547801971, |
| "ce_loss_7": 4.215745764970779, |
| "epoch": 0.265, |
| "grad_norm": 21.329908759369555, |
| "kl_loss_13": 5048.8, |
| "kl_loss_26": 3930.4, |
| "kl_loss_39": 2648.0, |
| "kl_loss_7": 5764.8, |
| "learning_rate": 0.000845039505741056, |
| "loss": 8545.0, |
| "step": 2650 |
| }, |
| { |
| "ce_loss_13": 3.8392697393894197, |
| "ce_loss_26": 3.313974368572235, |
| "ce_loss_39": 2.743110102415085, |
| "ce_loss_52": 1.4838833779096603, |
| "ce_loss_7": 4.162828749418258, |
| "epoch": 0.266, |
| "grad_norm": 22.328558189428094, |
| "kl_loss_13": 4850.4, |
| "kl_loss_26": 3737.2, |
| "kl_loss_39": 2504.0, |
| "kl_loss_7": 5524.8, |
| "learning_rate": 0.0008438894484078086, |
| "loss": 8456.0, |
| "step": 2660 |
| }, |
| { |
| "ce_loss_13": 3.7446135103702547, |
| "ce_loss_26": 3.2187088668346404, |
| "ce_loss_39": 2.6340013802051545, |
| "ce_loss_52": 1.389008679986, |
| "ce_loss_7": 4.068695777654648, |
| "epoch": 0.267, |
| "grad_norm": 22.22147816002266, |
| "kl_loss_13": 4894.4, |
| "kl_loss_26": 3784.0, |
| "kl_loss_39": 2542.0, |
| "kl_loss_7": 5576.0, |
| "learning_rate": 0.0008427359281116334, |
| "loss": 8425.6, |
| "step": 2670 |
| }, |
| { |
| "ce_loss_13": 3.8235996186733248, |
| "ce_loss_26": 3.292762166261673, |
| "ce_loss_39": 2.7183104872703554, |
| "ce_loss_52": 1.4328299894928933, |
| "ce_loss_7": 4.153199070692063, |
| "epoch": 0.268, |
| "grad_norm": 22.48935879447384, |
| "kl_loss_13": 4909.2, |
| "kl_loss_26": 3800.0, |
| "kl_loss_39": 2568.2, |
| "kl_loss_7": 5595.2, |
| "learning_rate": 0.0008415789564684673, |
| "loss": 8422.0, |
| "step": 2680 |
| }, |
| { |
| "ce_loss_13": 3.776876950263977, |
| "ce_loss_26": 3.2493546307086945, |
| "ce_loss_39": 2.682066896557808, |
| "ce_loss_52": 1.426028846204281, |
| "ce_loss_7": 4.102496325969696, |
| "epoch": 0.269, |
| "grad_norm": 23.679133499819734, |
| "kl_loss_13": 4848.8, |
| "kl_loss_26": 3742.4, |
| "kl_loss_39": 2509.6, |
| "kl_loss_7": 5526.4, |
| "learning_rate": 0.0008404185451290017, |
| "loss": 8501.8, |
| "step": 2690 |
| }, |
| { |
| "ce_loss_13": 3.8134057581424714, |
| "ce_loss_26": 3.272122323513031, |
| "ce_loss_39": 2.686330908536911, |
| "ce_loss_52": 1.4213334619998932, |
| "ce_loss_7": 4.143577241897583, |
| "epoch": 0.27, |
| "grad_norm": 22.090911465136045, |
| "kl_loss_13": 4938.4, |
| "kl_loss_26": 3810.8, |
| "kl_loss_39": 2554.8, |
| "kl_loss_7": 5636.8, |
| "learning_rate": 0.0008392547057785661, |
| "loss": 8351.5, |
| "step": 2700 |
| }, |
| { |
| "ce_loss_13": 3.786053466796875, |
| "ce_loss_26": 3.247877132892609, |
| "ce_loss_39": 2.6689940333366393, |
| "ce_loss_52": 1.4167698860168456, |
| "ce_loss_7": 4.1162903845310215, |
| "epoch": 0.271, |
| "grad_norm": 20.5206064618152, |
| "kl_loss_13": 4872.8, |
| "kl_loss_26": 3742.0, |
| "kl_loss_39": 2504.4, |
| "kl_loss_7": 5563.2, |
| "learning_rate": 0.0008380874501370098, |
| "loss": 8427.4, |
| "step": 2710 |
| }, |
| { |
| "ce_loss_13": 3.704389762878418, |
| "ce_loss_26": 3.187553709745407, |
| "ce_loss_39": 2.624448519945145, |
| "ce_loss_52": 1.42959221303463, |
| "ce_loss_7": 4.022905468940735, |
| "epoch": 0.272, |
| "grad_norm": 24.53128855683424, |
| "kl_loss_13": 4716.8, |
| "kl_loss_26": 3630.0, |
| "kl_loss_39": 2409.2, |
| "kl_loss_7": 5390.4, |
| "learning_rate": 0.0008369167899585841, |
| "loss": 8346.0, |
| "step": 2720 |
| }, |
| { |
| "ce_loss_13": 3.7978334367275237, |
| "ce_loss_26": 3.257440310716629, |
| "ce_loss_39": 2.673193109035492, |
| "ce_loss_52": 1.4194035559892655, |
| "ce_loss_7": 4.132071840763092, |
| "epoch": 0.273, |
| "grad_norm": 22.71970164256676, |
| "kl_loss_13": 4896.8, |
| "kl_loss_26": 3759.2, |
| "kl_loss_39": 2513.4, |
| "kl_loss_7": 5595.2, |
| "learning_rate": 0.0008357427370318238, |
| "loss": 8347.6, |
| "step": 2730 |
| }, |
| { |
| "ce_loss_13": 3.7844323456287383, |
| "ce_loss_26": 3.263492447137833, |
| "ce_loss_39": 2.6871775329113006, |
| "ce_loss_52": 1.452097550034523, |
| "ce_loss_7": 4.105139708518982, |
| "epoch": 0.274, |
| "grad_norm": 22.48264422716788, |
| "kl_loss_13": 4807.2, |
| "kl_loss_26": 3710.0, |
| "kl_loss_39": 2469.2, |
| "kl_loss_7": 5490.4, |
| "learning_rate": 0.0008345653031794292, |
| "loss": 8382.9, |
| "step": 2740 |
| }, |
| { |
| "ce_loss_13": 3.8110480844974517, |
| "ce_loss_26": 3.2820364594459535, |
| "ce_loss_39": 2.7046351432800293, |
| "ce_loss_52": 1.4519646763801575, |
| "ce_loss_7": 4.134507310390473, |
| "epoch": 0.275, |
| "grad_norm": 21.943498417005777, |
| "kl_loss_13": 4812.0, |
| "kl_loss_26": 3713.6, |
| "kl_loss_39": 2499.8, |
| "kl_loss_7": 5488.0, |
| "learning_rate": 0.0008333845002581458, |
| "loss": 8287.2, |
| "step": 2750 |
| }, |
| { |
| "ce_loss_13": 3.822121250629425, |
| "ce_loss_26": 3.3007264256477358, |
| "ce_loss_39": 2.73195458650589, |
| "ce_loss_52": 1.4655994832515717, |
| "ce_loss_7": 4.141650629043579, |
| "epoch": 0.276, |
| "grad_norm": 22.59740270522763, |
| "kl_loss_13": 4832.8, |
| "kl_loss_26": 3745.6, |
| "kl_loss_39": 2538.4, |
| "kl_loss_7": 5500.0, |
| "learning_rate": 0.0008322003401586462, |
| "loss": 8283.1, |
| "step": 2760 |
| }, |
| { |
| "ce_loss_13": 3.726576977968216, |
| "ce_loss_26": 3.214203953742981, |
| "ce_loss_39": 2.669701686501503, |
| "ce_loss_52": 1.4409982591867447, |
| "ce_loss_7": 4.043469870090485, |
| "epoch": 0.277, |
| "grad_norm": 21.384417928568727, |
| "kl_loss_13": 4712.0, |
| "kl_loss_26": 3643.6, |
| "kl_loss_39": 2461.0, |
| "kl_loss_7": 5379.2, |
| "learning_rate": 0.0008310128348054094, |
| "loss": 8251.4, |
| "step": 2770 |
| }, |
| { |
| "ce_loss_13": 3.768916404247284, |
| "ce_loss_26": 3.2343318104743957, |
| "ce_loss_39": 2.6558803230524064, |
| "ce_loss_52": 1.4221897169947624, |
| "ce_loss_7": 4.097134619951248, |
| "epoch": 0.278, |
| "grad_norm": 21.8508758307847, |
| "kl_loss_13": 4846.4, |
| "kl_loss_26": 3737.6, |
| "kl_loss_39": 2492.4, |
| "kl_loss_7": 5534.4, |
| "learning_rate": 0.0008298219961566008, |
| "loss": 8264.2, |
| "step": 2780 |
| }, |
| { |
| "ce_loss_13": 3.73385471701622, |
| "ce_loss_26": 3.216676640510559, |
| "ce_loss_39": 2.634915125370026, |
| "ce_loss_52": 1.4022609382867812, |
| "ce_loss_7": 4.067033034563065, |
| "epoch": 0.279, |
| "grad_norm": 22.23449381616188, |
| "kl_loss_13": 4806.0, |
| "kl_loss_26": 3708.8, |
| "kl_loss_39": 2479.2, |
| "kl_loss_7": 5499.2, |
| "learning_rate": 0.0008286278362039527, |
| "loss": 8184.2, |
| "step": 2790 |
| }, |
| { |
| "ce_loss_13": 3.756936568021774, |
| "ce_loss_26": 3.2383838176727293, |
| "ce_loss_39": 2.672528338432312, |
| "ce_loss_52": 1.452534568309784, |
| "ce_loss_7": 4.076909917593002, |
| "epoch": 0.28, |
| "grad_norm": 21.54056853237845, |
| "kl_loss_13": 4743.2, |
| "kl_loss_26": 3661.6, |
| "kl_loss_39": 2456.0, |
| "kl_loss_7": 5414.4, |
| "learning_rate": 0.0008274303669726426, |
| "loss": 8160.7, |
| "step": 2800 |
| }, |
| { |
| "ce_loss_13": 3.8688619792461396, |
| "ce_loss_26": 3.3306061148643495, |
| "ce_loss_39": 2.7420520305633547, |
| "ce_loss_52": 1.4561516880989074, |
| "ce_loss_7": 4.191312706470489, |
| "epoch": 0.281, |
| "grad_norm": 23.01011471220724, |
| "kl_loss_13": 4962.4, |
| "kl_loss_26": 3836.4, |
| "kl_loss_39": 2581.6, |
| "kl_loss_7": 5640.8, |
| "learning_rate": 0.0008262296005211721, |
| "loss": 8239.5, |
| "step": 2810 |
| }, |
| { |
| "ce_loss_13": 3.7579640209674836, |
| "ce_loss_26": 3.2256029903888703, |
| "ce_loss_39": 2.650630474090576, |
| "ce_loss_52": 1.4400919079780579, |
| "ce_loss_7": 4.077768385410309, |
| "epoch": 0.282, |
| "grad_norm": 21.557554897738267, |
| "kl_loss_13": 4784.8, |
| "kl_loss_26": 3661.6, |
| "kl_loss_39": 2435.2, |
| "kl_loss_7": 5454.4, |
| "learning_rate": 0.0008250255489412463, |
| "loss": 8218.5, |
| "step": 2820 |
| }, |
| { |
| "ce_loss_13": 3.7878367722034456, |
| "ce_loss_26": 3.255573272705078, |
| "ce_loss_39": 2.667675232887268, |
| "ce_loss_52": 1.4289155021309852, |
| "ce_loss_7": 4.111210036277771, |
| "epoch": 0.283, |
| "grad_norm": 22.099755132556425, |
| "kl_loss_13": 4851.2, |
| "kl_loss_26": 3733.6, |
| "kl_loss_39": 2480.6, |
| "kl_loss_7": 5527.2, |
| "learning_rate": 0.0008238182243576511, |
| "loss": 8152.9, |
| "step": 2830 |
| }, |
| { |
| "ce_loss_13": 3.7699286341667175, |
| "ce_loss_26": 3.2401221811771395, |
| "ce_loss_39": 2.6614575743675233, |
| "ce_loss_52": 1.4347774118185044, |
| "ce_loss_7": 4.08802090883255, |
| "epoch": 0.284, |
| "grad_norm": 21.441617328301042, |
| "kl_loss_13": 4791.6, |
| "kl_loss_26": 3695.6, |
| "kl_loss_39": 2469.8, |
| "kl_loss_7": 5453.6, |
| "learning_rate": 0.0008226076389281315, |
| "loss": 8141.7, |
| "step": 2840 |
| }, |
| { |
| "ce_loss_13": 3.692534440755844, |
| "ce_loss_26": 3.174397534132004, |
| "ce_loss_39": 2.6222778260707855, |
| "ce_loss_52": 1.4332606226205826, |
| "ce_loss_7": 4.00234357714653, |
| "epoch": 0.285, |
| "grad_norm": 23.306650885126444, |
| "kl_loss_13": 4633.6, |
| "kl_loss_26": 3560.0, |
| "kl_loss_39": 2376.6, |
| "kl_loss_7": 5279.2, |
| "learning_rate": 0.0008213938048432696, |
| "loss": 8068.6, |
| "step": 2850 |
| }, |
| { |
| "ce_loss_13": 3.6946506440639495, |
| "ce_loss_26": 3.1700410664081575, |
| "ce_loss_39": 2.597234898805618, |
| "ce_loss_52": 1.4046493530273438, |
| "ce_loss_7": 4.0274644792079926, |
| "epoch": 0.286, |
| "grad_norm": 21.879949646782595, |
| "kl_loss_13": 4721.6, |
| "kl_loss_26": 3628.8, |
| "kl_loss_39": 2400.4, |
| "kl_loss_7": 5412.8, |
| "learning_rate": 0.0008201767343263612, |
| "loss": 8086.6, |
| "step": 2860 |
| }, |
| { |
| "ce_loss_13": 3.7227329850196837, |
| "ce_loss_26": 3.200405848026276, |
| "ce_loss_39": 2.637904042005539, |
| "ce_loss_52": 1.422918725013733, |
| "ce_loss_7": 4.043233323097229, |
| "epoch": 0.287, |
| "grad_norm": 24.428095636864317, |
| "kl_loss_13": 4732.0, |
| "kl_loss_26": 3643.6, |
| "kl_loss_39": 2433.8, |
| "kl_loss_7": 5399.2, |
| "learning_rate": 0.0008189564396332927, |
| "loss": 8066.0, |
| "step": 2870 |
| }, |
| { |
| "ce_loss_13": 3.721643441915512, |
| "ce_loss_26": 3.185835379362106, |
| "ce_loss_39": 2.6226376593112946, |
| "ce_loss_52": 1.4448419839143753, |
| "ce_loss_7": 4.039817118644715, |
| "epoch": 0.288, |
| "grad_norm": 22.93644669160459, |
| "kl_loss_13": 4683.2, |
| "kl_loss_26": 3561.2, |
| "kl_loss_39": 2345.2, |
| "kl_loss_7": 5352.8, |
| "learning_rate": 0.0008177329330524181, |
| "loss": 8090.5, |
| "step": 2880 |
| }, |
| { |
| "ce_loss_13": 3.732273721694946, |
| "ce_loss_26": 3.217743480205536, |
| "ce_loss_39": 2.656236010789871, |
| "ce_loss_52": 1.4405113011598587, |
| "ce_loss_7": 4.046578335762024, |
| "epoch": 0.289, |
| "grad_norm": 22.27500685105708, |
| "kl_loss_13": 4702.0, |
| "kl_loss_26": 3629.6, |
| "kl_loss_39": 2416.0, |
| "kl_loss_7": 5360.8, |
| "learning_rate": 0.0008165062269044352, |
| "loss": 8083.7, |
| "step": 2890 |
| }, |
| { |
| "ce_loss_13": 3.7308314204216004, |
| "ce_loss_26": 3.2038592040538787, |
| "ce_loss_39": 2.6387904793024064, |
| "ce_loss_52": 1.44214668571949, |
| "ce_loss_7": 4.04458264708519, |
| "epoch": 0.29, |
| "grad_norm": 22.45358727511497, |
| "kl_loss_13": 4700.4, |
| "kl_loss_26": 3605.2, |
| "kl_loss_39": 2398.4, |
| "kl_loss_7": 5358.4, |
| "learning_rate": 0.0008152763335422613, |
| "loss": 8063.0, |
| "step": 2900 |
| }, |
| { |
| "ce_loss_13": 3.6699211478233336, |
| "ce_loss_26": 3.153660440444946, |
| "ce_loss_39": 2.5951134085655214, |
| "ce_loss_52": 1.4221089735627175, |
| "ce_loss_7": 3.978937405347824, |
| "epoch": 0.291, |
| "grad_norm": 23.44237828375525, |
| "kl_loss_13": 4614.0, |
| "kl_loss_26": 3534.4, |
| "kl_loss_39": 2341.4, |
| "kl_loss_7": 5261.6, |
| "learning_rate": 0.0008140432653509088, |
| "loss": 8001.3, |
| "step": 2910 |
| }, |
| { |
| "ce_loss_13": 3.6406539916992187, |
| "ce_loss_26": 3.1216741025447847, |
| "ce_loss_39": 2.5570163398981096, |
| "ce_loss_52": 1.39638482183218, |
| "ce_loss_7": 3.9557625532150267, |
| "epoch": 0.292, |
| "grad_norm": 21.187460458215387, |
| "kl_loss_13": 4592.8, |
| "kl_loss_26": 3516.8, |
| "kl_loss_39": 2326.8, |
| "kl_loss_7": 5254.0, |
| "learning_rate": 0.0008128070347473608, |
| "loss": 7966.5, |
| "step": 2920 |
| }, |
| { |
| "ce_loss_13": 3.665645903348923, |
| "ce_loss_26": 3.149553042650223, |
| "ce_loss_39": 2.58870205283165, |
| "ce_loss_52": 1.4208435118198395, |
| "ce_loss_7": 3.9827320516109466, |
| "epoch": 0.293, |
| "grad_norm": 21.300592787802476, |
| "kl_loss_13": 4619.6, |
| "kl_loss_26": 3530.8, |
| "kl_loss_39": 2336.0, |
| "kl_loss_7": 5284.0, |
| "learning_rate": 0.0008115676541804455, |
| "loss": 7990.7, |
| "step": 2930 |
| }, |
| { |
| "ce_loss_13": 3.6261947989463805, |
| "ce_loss_26": 3.1126498699188234, |
| "ce_loss_39": 2.5498824626207353, |
| "ce_loss_52": 1.3916691318154335, |
| "ce_loss_7": 3.9482949018478393, |
| "epoch": 0.294, |
| "grad_norm": 21.8788417242541, |
| "kl_loss_13": 4598.0, |
| "kl_loss_26": 3513.2, |
| "kl_loss_39": 2317.4, |
| "kl_loss_7": 5269.6, |
| "learning_rate": 0.0008103251361307119, |
| "loss": 7972.2, |
| "step": 2940 |
| }, |
| { |
| "ce_loss_13": 3.6617009818553923, |
| "ce_loss_26": 3.134212648868561, |
| "ce_loss_39": 2.569432234764099, |
| "ce_loss_52": 1.4306001305580138, |
| "ce_loss_7": 3.978475254774094, |
| "epoch": 0.295, |
| "grad_norm": 21.383257731886584, |
| "kl_loss_13": 4584.8, |
| "kl_loss_26": 3486.8, |
| "kl_loss_39": 2289.6, |
| "kl_loss_7": 5252.0, |
| "learning_rate": 0.0008090794931103026, |
| "loss": 7903.9, |
| "step": 2950 |
| }, |
| { |
| "ce_loss_13": 3.674825745820999, |
| "ce_loss_26": 3.1651513874530792, |
| "ce_loss_39": 2.605163484811783, |
| "ce_loss_52": 1.434949815273285, |
| "ce_loss_7": 3.988205587863922, |
| "epoch": 0.296, |
| "grad_norm": 21.87171120261939, |
| "kl_loss_13": 4585.6, |
| "kl_loss_26": 3519.2, |
| "kl_loss_39": 2327.0, |
| "kl_loss_7": 5246.4, |
| "learning_rate": 0.0008078307376628291, |
| "loss": 7903.2, |
| "step": 2960 |
| }, |
| { |
| "ce_loss_13": 3.6504483819007874, |
| "ce_loss_26": 3.1346111416816713, |
| "ce_loss_39": 2.5827776730060577, |
| "ce_loss_52": 1.4189698368310928, |
| "ce_loss_7": 3.9625262200832365, |
| "epoch": 0.297, |
| "grad_norm": 23.048467847326563, |
| "kl_loss_13": 4571.2, |
| "kl_loss_26": 3498.4, |
| "kl_loss_39": 2319.0, |
| "kl_loss_7": 5224.0, |
| "learning_rate": 0.000806578882363245, |
| "loss": 7901.6, |
| "step": 2970 |
| }, |
| { |
| "ce_loss_13": 3.655070722103119, |
| "ce_loss_26": 3.136745995283127, |
| "ce_loss_39": 2.5604557782411574, |
| "ce_loss_52": 1.401164847612381, |
| "ce_loss_7": 3.973217171430588, |
| "epoch": 0.298, |
| "grad_norm": 21.078263907370157, |
| "kl_loss_13": 4614.4, |
| "kl_loss_26": 3538.4, |
| "kl_loss_39": 2311.4, |
| "kl_loss_7": 5287.2, |
| "learning_rate": 0.0008053239398177191, |
| "loss": 7911.8, |
| "step": 2980 |
| }, |
| { |
| "ce_loss_13": 3.6555157959461213, |
| "ce_loss_26": 3.13962464928627, |
| "ce_loss_39": 2.5709628492593763, |
| "ce_loss_52": 1.4242349237203598, |
| "ce_loss_7": 3.9756637513637543, |
| "epoch": 0.299, |
| "grad_norm": 22.608350182138345, |
| "kl_loss_13": 4603.2, |
| "kl_loss_26": 3520.4, |
| "kl_loss_39": 2304.0, |
| "kl_loss_7": 5274.4, |
| "learning_rate": 0.0008040659226635089, |
| "loss": 7892.4, |
| "step": 2990 |
| }, |
| { |
| "ce_loss_13": 3.6532657563686373, |
| "ce_loss_26": 3.124306696653366, |
| "ce_loss_39": 2.557386627793312, |
| "ce_loss_52": 1.402693158388138, |
| "ce_loss_7": 3.9695385217666628, |
| "epoch": 0.3, |
| "grad_norm": 22.376822604204033, |
| "kl_loss_13": 4616.4, |
| "kl_loss_26": 3523.6, |
| "kl_loss_39": 2321.8, |
| "kl_loss_7": 5287.2, |
| "learning_rate": 0.0008028048435688333, |
| "loss": 7820.7, |
| "step": 3000 |
| }, |
| { |
| "ce_loss_13": 3.6811940252780913, |
| "ce_loss_26": 3.1677908301353455, |
| "ce_loss_39": 2.607375094294548, |
| "ce_loss_52": 1.4560914367437363, |
| "ce_loss_7": 3.9915607273578644, |
| "epoch": 0.301, |
| "grad_norm": 21.84188714128543, |
| "kl_loss_13": 4604.8, |
| "kl_loss_26": 3538.4, |
| "kl_loss_39": 2343.2, |
| "kl_loss_7": 5256.0, |
| "learning_rate": 0.0008015407152327448, |
| "loss": 7933.0, |
| "step": 3010 |
| }, |
| { |
| "ce_loss_13": 3.737543153762817, |
| "ce_loss_26": 3.2195757627487183, |
| "ce_loss_39": 2.6466131448745727, |
| "ce_loss_52": 1.4449012607336045, |
| "ce_loss_7": 4.052252840995789, |
| "epoch": 0.302, |
| "grad_norm": 22.34664686545947, |
| "kl_loss_13": 4700.8, |
| "kl_loss_26": 3618.8, |
| "kl_loss_39": 2394.4, |
| "kl_loss_7": 5358.4, |
| "learning_rate": 0.0008002735503850016, |
| "loss": 7844.2, |
| "step": 3020 |
| }, |
| { |
| "ce_loss_13": 3.6698498368263244, |
| "ce_loss_26": 3.155323106050491, |
| "ce_loss_39": 2.579972979426384, |
| "ce_loss_52": 1.4486516952514648, |
| "ce_loss_7": 3.9814261555671693, |
| "epoch": 0.303, |
| "grad_norm": 22.316774640404955, |
| "kl_loss_13": 4563.2, |
| "kl_loss_26": 3486.8, |
| "kl_loss_39": 2285.2, |
| "kl_loss_7": 5213.6, |
| "learning_rate": 0.0007990033617859396, |
| "loss": 7844.3, |
| "step": 3030 |
| }, |
| { |
| "ce_loss_13": 3.661813771724701, |
| "ce_loss_26": 3.1450955271720886, |
| "ce_loss_39": 2.5806987404823305, |
| "ce_loss_52": 1.4304928302764892, |
| "ce_loss_7": 3.9737633407115935, |
| "epoch": 0.304, |
| "grad_norm": 22.094854051528255, |
| "kl_loss_13": 4595.2, |
| "kl_loss_26": 3519.2, |
| "kl_loss_39": 2321.2, |
| "kl_loss_7": 5248.0, |
| "learning_rate": 0.000797730162226344, |
| "loss": 7813.7, |
| "step": 3040 |
| }, |
| { |
| "ce_loss_13": 3.6036822319030763, |
| "ce_loss_26": 3.0875354915857316, |
| "ce_loss_39": 2.5262934505939483, |
| "ce_loss_52": 1.3893155947327613, |
| "ce_loss_7": 3.921951335668564, |
| "epoch": 0.305, |
| "grad_norm": 22.896126437016644, |
| "kl_loss_13": 4538.0, |
| "kl_loss_26": 3453.6, |
| "kl_loss_39": 2252.6, |
| "kl_loss_7": 5210.4, |
| "learning_rate": 0.0007964539645273203, |
| "loss": 7783.3, |
| "step": 3050 |
| }, |
| { |
| "ce_loss_13": 3.690790832042694, |
| "ce_loss_26": 3.1806884586811064, |
| "ce_loss_39": 2.6345931828022002, |
| "ce_loss_52": 1.4827970415353775, |
| "ce_loss_7": 3.996461832523346, |
| "epoch": 0.306, |
| "grad_norm": 22.12157409866164, |
| "kl_loss_13": 4558.4, |
| "kl_loss_26": 3495.6, |
| "kl_loss_39": 2322.2, |
| "kl_loss_7": 5202.4, |
| "learning_rate": 0.000795174781540165, |
| "loss": 7798.9, |
| "step": 3060 |
| }, |
| { |
| "ce_loss_13": 3.6345800876617433, |
| "ce_loss_26": 3.126866352558136, |
| "ce_loss_39": 2.5723444908857345, |
| "ce_loss_52": 1.4505648389458656, |
| "ce_loss_7": 3.938809943199158, |
| "epoch": 0.307, |
| "grad_norm": 21.67276371006888, |
| "kl_loss_13": 4502.8, |
| "kl_loss_26": 3435.6, |
| "kl_loss_39": 2259.2, |
| "kl_loss_7": 5140.8, |
| "learning_rate": 0.0007938926261462366, |
| "loss": 7786.2, |
| "step": 3070 |
| }, |
| { |
| "ce_loss_13": 3.6539651334285734, |
| "ce_loss_26": 3.136826354265213, |
| "ce_loss_39": 2.5686775982379912, |
| "ce_loss_52": 1.4312876760959625, |
| "ce_loss_7": 3.9670185923576353, |
| "epoch": 0.308, |
| "grad_norm": 23.264906723037097, |
| "kl_loss_13": 4571.6, |
| "kl_loss_26": 3495.6, |
| "kl_loss_39": 2289.8, |
| "kl_loss_7": 5231.2, |
| "learning_rate": 0.0007926075112568258, |
| "loss": 7773.0, |
| "step": 3080 |
| }, |
| { |
| "ce_loss_13": 3.6424070239067077, |
| "ce_loss_26": 3.13111692070961, |
| "ce_loss_39": 2.5661837816238404, |
| "ce_loss_52": 1.4395585834980011, |
| "ce_loss_7": 3.9482239544391633, |
| "epoch": 0.309, |
| "grad_norm": 22.051447045868983, |
| "kl_loss_13": 4540.0, |
| "kl_loss_26": 3467.2, |
| "kl_loss_39": 2265.6, |
| "kl_loss_7": 5186.4, |
| "learning_rate": 0.0007913194498130252, |
| "loss": 7730.0, |
| "step": 3090 |
| }, |
| { |
| "ce_loss_13": 3.6187573671340942, |
| "ce_loss_26": 3.110442912578583, |
| "ce_loss_39": 2.553048479557037, |
| "ce_loss_52": 1.4316339492797852, |
| "ce_loss_7": 3.92513769865036, |
| "epoch": 0.31, |
| "grad_norm": 22.041241368180156, |
| "kl_loss_13": 4504.8, |
| "kl_loss_26": 3436.0, |
| "kl_loss_39": 2242.0, |
| "kl_loss_7": 5140.0, |
| "learning_rate": 0.0007900284547855992, |
| "loss": 7742.0, |
| "step": 3100 |
| }, |
| { |
| "ce_loss_13": 3.6639523029327394, |
| "ce_loss_26": 3.1580884575843813, |
| "ce_loss_39": 2.5835469484329225, |
| "ce_loss_52": 1.4442215472459794, |
| "ce_loss_7": 3.976783311367035, |
| "epoch": 0.311, |
| "grad_norm": 20.880619592237565, |
| "kl_loss_13": 4592.0, |
| "kl_loss_26": 3526.4, |
| "kl_loss_39": 2312.4, |
| "kl_loss_7": 5244.8, |
| "learning_rate": 0.0007887345391748532, |
| "loss": 7735.3, |
| "step": 3110 |
| }, |
| { |
| "ce_loss_13": 3.6283124804496767, |
| "ce_loss_26": 3.113315612077713, |
| "ce_loss_39": 2.547743684053421, |
| "ce_loss_52": 1.42053325176239, |
| "ce_loss_7": 3.9331269919872285, |
| "epoch": 0.312, |
| "grad_norm": 22.15121587945531, |
| "kl_loss_13": 4543.2, |
| "kl_loss_26": 3463.2, |
| "kl_loss_39": 2267.8, |
| "kl_loss_7": 5184.8, |
| "learning_rate": 0.0007874377160105036, |
| "loss": 7729.4, |
| "step": 3120 |
| }, |
| { |
| "ce_loss_13": 3.6399169504642486, |
| "ce_loss_26": 3.135877913236618, |
| "ce_loss_39": 2.5725889205932617, |
| "ce_loss_52": 1.4427233994007111, |
| "ce_loss_7": 3.9606189668178557, |
| "epoch": 0.313, |
| "grad_norm": 21.87500401487531, |
| "kl_loss_13": 4563.2, |
| "kl_loss_26": 3490.0, |
| "kl_loss_39": 2276.4, |
| "kl_loss_7": 5228.0, |
| "learning_rate": 0.0007861379983515449, |
| "loss": 7710.9, |
| "step": 3130 |
| }, |
| { |
| "ce_loss_13": 3.634021121263504, |
| "ce_loss_26": 3.111995500326157, |
| "ce_loss_39": 2.5583092838525774, |
| "ce_loss_52": 1.4399698421359062, |
| "ce_loss_7": 3.94167400598526, |
| "epoch": 0.314, |
| "grad_norm": 22.854565496538875, |
| "kl_loss_13": 4504.4, |
| "kl_loss_26": 3415.2, |
| "kl_loss_39": 2230.6, |
| "kl_loss_7": 5153.6, |
| "learning_rate": 0.0007848353992861195, |
| "loss": 7710.3, |
| "step": 3140 |
| }, |
| { |
| "ce_loss_13": 3.6272457361221315, |
| "ce_loss_26": 3.116968184709549, |
| "ce_loss_39": 2.551611191034317, |
| "ce_loss_52": 1.437747061252594, |
| "ce_loss_7": 3.9388325929641725, |
| "epoch": 0.315, |
| "grad_norm": 21.84748688614269, |
| "kl_loss_13": 4498.8, |
| "kl_loss_26": 3427.6, |
| "kl_loss_39": 2231.6, |
| "kl_loss_7": 5142.4, |
| "learning_rate": 0.0007835299319313853, |
| "loss": 7607.0, |
| "step": 3150 |
| }, |
| { |
| "ce_loss_13": 3.613277268409729, |
| "ce_loss_26": 3.0916620969772337, |
| "ce_loss_39": 2.5186130821704866, |
| "ce_loss_52": 1.3888636380434036, |
| "ce_loss_7": 3.935783725976944, |
| "epoch": 0.316, |
| "grad_norm": 21.933561198317395, |
| "kl_loss_13": 4519.2, |
| "kl_loss_26": 3438.8, |
| "kl_loss_39": 2232.0, |
| "kl_loss_7": 5189.6, |
| "learning_rate": 0.0007822216094333848, |
| "loss": 7650.0, |
| "step": 3160 |
| }, |
| { |
| "ce_loss_13": 3.658072179555893, |
| "ce_loss_26": 3.1417903542518615, |
| "ce_loss_39": 2.577219474315643, |
| "ce_loss_52": 1.437073315680027, |
| "ce_loss_7": 3.970378410816193, |
| "epoch": 0.317, |
| "grad_norm": 22.034139537965903, |
| "kl_loss_13": 4566.4, |
| "kl_loss_26": 3493.2, |
| "kl_loss_39": 2301.8, |
| "kl_loss_7": 5224.0, |
| "learning_rate": 0.0007809104449669101, |
| "loss": 7644.7, |
| "step": 3170 |
| }, |
| { |
| "ce_loss_13": 3.593963289260864, |
| "ce_loss_26": 3.080548882484436, |
| "ce_loss_39": 2.5262903541326525, |
| "ce_loss_52": 1.4362893968820571, |
| "ce_loss_7": 3.8962223708629606, |
| "epoch": 0.318, |
| "grad_norm": 22.12833658126749, |
| "kl_loss_13": 4417.6, |
| "kl_loss_26": 3353.2, |
| "kl_loss_39": 2169.6, |
| "kl_loss_7": 5054.4, |
| "learning_rate": 0.0007795964517353734, |
| "loss": 7580.1, |
| "step": 3180 |
| }, |
| { |
| "ce_loss_13": 3.639219433069229, |
| "ce_loss_26": 3.126335847377777, |
| "ce_loss_39": 2.5598012149333953, |
| "ce_loss_52": 1.4479554057121278, |
| "ce_loss_7": 3.955880182981491, |
| "epoch": 0.319, |
| "grad_norm": 21.421584248628356, |
| "kl_loss_13": 4524.8, |
| "kl_loss_26": 3445.2, |
| "kl_loss_39": 2238.4, |
| "kl_loss_7": 5180.8, |
| "learning_rate": 0.000778279642970672, |
| "loss": 7577.4, |
| "step": 3190 |
| }, |
| { |
| "ce_loss_13": 3.593672776222229, |
| "ce_loss_26": 3.076059252023697, |
| "ce_loss_39": 2.5206238448619844, |
| "ce_loss_52": 1.4138888984918594, |
| "ce_loss_7": 3.898124760389328, |
| "epoch": 0.32, |
| "grad_norm": 23.27138036145762, |
| "kl_loss_13": 4477.6, |
| "kl_loss_26": 3400.0, |
| "kl_loss_39": 2214.8, |
| "kl_loss_7": 5123.2, |
| "learning_rate": 0.0007769600319330552, |
| "loss": 7595.6, |
| "step": 3200 |
| }, |
| { |
| "ce_loss_13": 3.6573951125144957, |
| "ce_loss_26": 3.166206729412079, |
| "ce_loss_39": 2.6105258047580717, |
| "ce_loss_52": 1.47857309281826, |
| "ce_loss_7": 3.9568731427192687, |
| "epoch": 0.321, |
| "grad_norm": 21.35600054948774, |
| "kl_loss_13": 4470.4, |
| "kl_loss_26": 3434.4, |
| "kl_loss_39": 2255.6, |
| "kl_loss_7": 5100.8, |
| "learning_rate": 0.0007756376319109917, |
| "loss": 7610.9, |
| "step": 3210 |
| }, |
| { |
| "ce_loss_13": 3.619207721948624, |
| "ce_loss_26": 3.112484961748123, |
| "ce_loss_39": 2.5595098197460175, |
| "ce_loss_52": 1.442267394065857, |
| "ce_loss_7": 3.9296476364135744, |
| "epoch": 0.322, |
| "grad_norm": 21.117056892906756, |
| "kl_loss_13": 4453.6, |
| "kl_loss_26": 3396.0, |
| "kl_loss_39": 2218.0, |
| "kl_loss_7": 5104.8, |
| "learning_rate": 0.0007743124562210351, |
| "loss": 7569.7, |
| "step": 3220 |
| }, |
| { |
| "ce_loss_13": 3.612431305646896, |
| "ce_loss_26": 3.104649418592453, |
| "ce_loss_39": 2.5444509416818617, |
| "ce_loss_52": 1.4610149055719375, |
| "ce_loss_7": 3.9223886907100676, |
| "epoch": 0.323, |
| "grad_norm": 22.510814919939268, |
| "kl_loss_13": 4408.0, |
| "kl_loss_26": 3338.4, |
| "kl_loss_39": 2155.4, |
| "kl_loss_7": 5054.4, |
| "learning_rate": 0.0007729845182076895, |
| "loss": 7565.6, |
| "step": 3230 |
| }, |
| { |
| "ce_loss_13": 3.5650066912174223, |
| "ce_loss_26": 3.060505121946335, |
| "ce_loss_39": 2.5127211630344393, |
| "ce_loss_52": 1.445562407374382, |
| "ce_loss_7": 3.8779995679855346, |
| "epoch": 0.324, |
| "grad_norm": 24.007681143469355, |
| "kl_loss_13": 4388.4, |
| "kl_loss_26": 3323.6, |
| "kl_loss_39": 2150.2, |
| "kl_loss_7": 5044.0, |
| "learning_rate": 0.0007716538312432765, |
| "loss": 7556.0, |
| "step": 3240 |
| }, |
| { |
| "ce_loss_13": 3.5737381398677828, |
| "ce_loss_26": 3.0725920855998994, |
| "ce_loss_39": 2.5134449005126953, |
| "ce_loss_52": 1.4138619631528855, |
| "ce_loss_7": 3.8855117499828338, |
| "epoch": 0.325, |
| "grad_norm": 22.203629206824775, |
| "kl_loss_13": 4430.8, |
| "kl_loss_26": 3379.6, |
| "kl_loss_39": 2197.4, |
| "kl_loss_7": 5081.6, |
| "learning_rate": 0.0007703204087277988, |
| "loss": 7530.7, |
| "step": 3250 |
| }, |
| { |
| "ce_loss_13": 3.5474561214447022, |
| "ce_loss_26": 3.037938302755356, |
| "ce_loss_39": 2.477022570371628, |
| "ce_loss_52": 1.3884405881166457, |
| "ce_loss_7": 3.85917187333107, |
| "epoch": 0.326, |
| "grad_norm": 21.98291246151193, |
| "kl_loss_13": 4437.6, |
| "kl_loss_26": 3371.6, |
| "kl_loss_39": 2176.2, |
| "kl_loss_7": 5089.6, |
| "learning_rate": 0.0007689842640888063, |
| "loss": 7519.3, |
| "step": 3260 |
| }, |
| { |
| "ce_loss_13": 3.6053310513496397, |
| "ce_loss_26": 3.0936122059822084, |
| "ce_loss_39": 2.5414693653583527, |
| "ce_loss_52": 1.4531731829047203, |
| "ce_loss_7": 3.913253253698349, |
| "epoch": 0.327, |
| "grad_norm": 22.418707773974628, |
| "kl_loss_13": 4430.8, |
| "kl_loss_26": 3360.0, |
| "kl_loss_39": 2184.0, |
| "kl_loss_7": 5068.4, |
| "learning_rate": 0.0007676454107812607, |
| "loss": 7473.1, |
| "step": 3270 |
| }, |
| { |
| "ce_loss_13": 3.545606768131256, |
| "ce_loss_26": 3.0480951845645903, |
| "ce_loss_39": 2.499777999520302, |
| "ce_loss_52": 1.4314876705408097, |
| "ce_loss_7": 3.849948841333389, |
| "epoch": 0.328, |
| "grad_norm": 22.389500426390892, |
| "kl_loss_13": 4402.4, |
| "kl_loss_26": 3351.6, |
| "kl_loss_39": 2160.2, |
| "kl_loss_7": 5035.2, |
| "learning_rate": 0.0007663038622873999, |
| "loss": 7510.3, |
| "step": 3280 |
| }, |
| { |
| "ce_loss_13": 3.6383297204971314, |
| "ce_loss_26": 3.127288430929184, |
| "ce_loss_39": 2.561781680583954, |
| "ce_loss_52": 1.4617935866117477, |
| "ce_loss_7": 3.9572836577892305, |
| "epoch": 0.329, |
| "grad_norm": 23.105081611165705, |
| "kl_loss_13": 4472.4, |
| "kl_loss_26": 3396.8, |
| "kl_loss_39": 2202.8, |
| "kl_loss_7": 5139.2, |
| "learning_rate": 0.0007649596321166025, |
| "loss": 7473.8, |
| "step": 3290 |
| }, |
| { |
| "ce_loss_13": 3.5131199419498444, |
| "ce_loss_26": 3.0129006803035736, |
| "ce_loss_39": 2.4699264496564863, |
| "ce_loss_52": 1.4362694859504699, |
| "ce_loss_7": 3.8114282488822937, |
| "epoch": 0.33, |
| "grad_norm": 22.77133190654901, |
| "kl_loss_13": 4268.0, |
| "kl_loss_26": 3223.6, |
| "kl_loss_39": 2069.4, |
| "kl_loss_7": 4882.4, |
| "learning_rate": 0.0007636127338052513, |
| "loss": 7443.1, |
| "step": 3300 |
| }, |
| { |
| "ce_loss_13": 3.5485077798366547, |
| "ce_loss_26": 3.0334243774414062, |
| "ce_loss_39": 2.471779704093933, |
| "ce_loss_52": 1.4008762776851653, |
| "ce_loss_7": 3.860434752702713, |
| "epoch": 0.331, |
| "grad_norm": 22.94544302407564, |
| "kl_loss_13": 4425.6, |
| "kl_loss_26": 3344.4, |
| "kl_loss_39": 2154.8, |
| "kl_loss_7": 5076.8, |
| "learning_rate": 0.0007622631809165971, |
| "loss": 7403.2, |
| "step": 3310 |
| }, |
| { |
| "ce_loss_13": 3.611973536014557, |
| "ce_loss_26": 3.1062149882316588, |
| "ce_loss_39": 2.5540910184383394, |
| "ce_loss_52": 1.4812486261129378, |
| "ce_loss_7": 3.913082367181778, |
| "epoch": 0.332, |
| "grad_norm": 21.844212510164496, |
| "kl_loss_13": 4407.2, |
| "kl_loss_26": 3357.6, |
| "kl_loss_39": 2168.2, |
| "kl_loss_7": 5035.2, |
| "learning_rate": 0.000760910987040623, |
| "loss": 7436.1, |
| "step": 3320 |
| }, |
| { |
| "ce_loss_13": 3.500666618347168, |
| "ce_loss_26": 2.992197906970978, |
| "ce_loss_39": 2.443836176395416, |
| "ce_loss_52": 1.4172912210226059, |
| "ce_loss_7": 3.809192955493927, |
| "epoch": 0.333, |
| "grad_norm": 22.341971135877618, |
| "kl_loss_13": 4287.2, |
| "kl_loss_26": 3229.2, |
| "kl_loss_39": 2050.8, |
| "kl_loss_7": 4934.4, |
| "learning_rate": 0.000759556165793906, |
| "loss": 7354.8, |
| "step": 3330 |
| }, |
| { |
| "ce_loss_13": 3.572561663389206, |
| "ce_loss_26": 3.0666925728321077, |
| "ce_loss_39": 2.5258089125156404, |
| "ce_loss_52": 1.4658059388399125, |
| "ce_loss_7": 3.8703009307384493, |
| "epoch": 0.334, |
| "grad_norm": 20.585398734523825, |
| "kl_loss_13": 4346.0, |
| "kl_loss_26": 3292.8, |
| "kl_loss_39": 2121.6, |
| "kl_loss_7": 4966.4, |
| "learning_rate": 0.000758198730819481, |
| "loss": 7376.9, |
| "step": 3340 |
| }, |
| { |
| "ce_loss_13": 3.5921706318855287, |
| "ce_loss_26": 3.0865486025810243, |
| "ce_loss_39": 2.5250521272420885, |
| "ce_loss_52": 1.4276385620236396, |
| "ce_loss_7": 3.9076746106147766, |
| "epoch": 0.335, |
| "grad_norm": 22.48300338159267, |
| "kl_loss_13": 4447.2, |
| "kl_loss_26": 3387.2, |
| "kl_loss_39": 2194.8, |
| "kl_loss_7": 5095.2, |
| "learning_rate": 0.0007568386957867032, |
| "loss": 7407.2, |
| "step": 3350 |
| }, |
| { |
| "ce_loss_13": 3.5395087361335755, |
| "ce_loss_26": 3.0476285994052885, |
| "ce_loss_39": 2.5039754688739775, |
| "ce_loss_52": 1.4519936561584472, |
| "ce_loss_7": 3.8360753774642946, |
| "epoch": 0.336, |
| "grad_norm": 22.282680621594952, |
| "kl_loss_13": 4315.6, |
| "kl_loss_26": 3284.8, |
| "kl_loss_39": 2115.8, |
| "kl_loss_7": 4935.2, |
| "learning_rate": 0.0007554760743911103, |
| "loss": 7349.9, |
| "step": 3360 |
| }, |
| { |
| "ce_loss_13": 3.5341054499149323, |
| "ce_loss_26": 3.0255552768707275, |
| "ce_loss_39": 2.4795517563819884, |
| "ce_loss_52": 1.43424501568079, |
| "ce_loss_7": 3.8406670331954955, |
| "epoch": 0.337, |
| "grad_norm": 21.82655281544531, |
| "kl_loss_13": 4317.2, |
| "kl_loss_26": 3248.8, |
| "kl_loss_39": 2081.8, |
| "kl_loss_7": 4959.2, |
| "learning_rate": 0.0007541108803542846, |
| "loss": 7352.9, |
| "step": 3370 |
| }, |
| { |
| "ce_loss_13": 3.571430027484894, |
| "ce_loss_26": 3.0674545526504517, |
| "ce_loss_39": 2.5179436981678007, |
| "ce_loss_52": 1.4572079569101333, |
| "ce_loss_7": 3.8665721654891967, |
| "epoch": 0.338, |
| "grad_norm": 20.283540782731166, |
| "kl_loss_13": 4343.6, |
| "kl_loss_26": 3291.2, |
| "kl_loss_39": 2129.4, |
| "kl_loss_7": 4966.4, |
| "learning_rate": 0.0007527431274237149, |
| "loss": 7371.7, |
| "step": 3380 |
| }, |
| { |
| "ce_loss_13": 3.53710196018219, |
| "ce_loss_26": 3.0354479968547823, |
| "ce_loss_39": 2.4885441571474076, |
| "ce_loss_52": 1.4400692582130432, |
| "ce_loss_7": 3.8393473029136658, |
| "epoch": 0.339, |
| "grad_norm": 21.387970982998613, |
| "kl_loss_13": 4311.6, |
| "kl_loss_26": 3256.4, |
| "kl_loss_39": 2099.8, |
| "kl_loss_7": 4936.0, |
| "learning_rate": 0.0007513728293726579, |
| "loss": 7294.4, |
| "step": 3390 |
| }, |
| { |
| "ce_loss_13": 3.523770880699158, |
| "ce_loss_26": 3.0170785784721375, |
| "ce_loss_39": 2.4583947211503983, |
| "ce_loss_52": 1.43256463855505, |
| "ce_loss_7": 3.822116255760193, |
| "epoch": 0.34, |
| "grad_norm": 21.22690089148789, |
| "kl_loss_13": 4311.6, |
| "kl_loss_26": 3260.0, |
| "kl_loss_39": 2066.6, |
| "kl_loss_7": 4940.0, |
| "learning_rate": 0.00075, |
| "loss": 7289.9, |
| "step": 3400 |
| }, |
| { |
| "ce_loss_13": 3.495673859119415, |
| "ce_loss_26": 2.9982276618480683, |
| "ce_loss_39": 2.444023036956787, |
| "ce_loss_52": 1.4111162751913071, |
| "ce_loss_7": 3.805855029821396, |
| "epoch": 0.341, |
| "grad_norm": 20.68401947266934, |
| "kl_loss_13": 4286.0, |
| "kl_loss_26": 3244.8, |
| "kl_loss_39": 2061.6, |
| "kl_loss_7": 4924.4, |
| "learning_rate": 0.0007486246531301177, |
| "loss": 7295.1, |
| "step": 3410 |
| }, |
| { |
| "ce_loss_13": 3.532993698120117, |
| "ce_loss_26": 3.0362183272838594, |
| "ce_loss_39": 2.485447385907173, |
| "ce_loss_52": 1.4526691198349, |
| "ce_loss_7": 3.830386519432068, |
| "epoch": 0.342, |
| "grad_norm": 22.222401081185772, |
| "kl_loss_13": 4299.6, |
| "kl_loss_26": 3249.2, |
| "kl_loss_39": 2080.4, |
| "kl_loss_7": 4928.0, |
| "learning_rate": 0.0007472468026127384, |
| "loss": 7341.7, |
| "step": 3420 |
| }, |
| { |
| "ce_loss_13": 3.463904342055321, |
| "ce_loss_26": 2.9640887469053268, |
| "ce_loss_39": 2.421750417351723, |
| "ce_loss_52": 1.4137367144227029, |
| "ce_loss_7": 3.7643026977777483, |
| "epoch": 0.343, |
| "grad_norm": 21.63494224797145, |
| "kl_loss_13": 4250.8, |
| "kl_loss_26": 3196.2, |
| "kl_loss_39": 2039.9, |
| "kl_loss_7": 4881.6, |
| "learning_rate": 0.000745866462322802, |
| "loss": 7230.95, |
| "step": 3430 |
| }, |
| { |
| "ce_loss_13": 3.5996195137500764, |
| "ce_loss_26": 3.093309980630875, |
| "ce_loss_39": 2.5379314005374907, |
| "ce_loss_52": 1.4943716078996658, |
| "ce_loss_7": 3.899878454208374, |
| "epoch": 0.344, |
| "grad_norm": 23.12022506478991, |
| "kl_loss_13": 4332.0, |
| "kl_loss_26": 3268.0, |
| "kl_loss_39": 2085.8, |
| "kl_loss_7": 4968.0, |
| "learning_rate": 0.0007444836461603195, |
| "loss": 7294.5, |
| "step": 3440 |
| }, |
| { |
| "ce_loss_13": 3.460267198085785, |
| "ce_loss_26": 2.979328769445419, |
| "ce_loss_39": 2.435830682516098, |
| "ce_loss_52": 1.409597858786583, |
| "ce_loss_7": 3.7637628614902496, |
| "epoch": 0.345, |
| "grad_norm": 22.11179077444158, |
| "kl_loss_13": 4240.0, |
| "kl_loss_26": 3220.0, |
| "kl_loss_39": 2052.4, |
| "kl_loss_7": 4872.8, |
| "learning_rate": 0.0007430983680502344, |
| "loss": 7260.3, |
| "step": 3450 |
| }, |
| { |
| "ce_loss_13": 3.484216260910034, |
| "ce_loss_26": 2.986140418052673, |
| "ce_loss_39": 2.4419540107250213, |
| "ce_loss_52": 1.423793789744377, |
| "ce_loss_7": 3.783113992214203, |
| "epoch": 0.346, |
| "grad_norm": 21.48292293909848, |
| "kl_loss_13": 4242.0, |
| "kl_loss_26": 3200.0, |
| "kl_loss_39": 2037.4, |
| "kl_loss_7": 4868.0, |
| "learning_rate": 0.0007417106419422819, |
| "loss": 7210.1, |
| "step": 3460 |
| }, |
| { |
| "ce_loss_13": 3.4797898173332213, |
| "ce_loss_26": 2.9807622492313386, |
| "ce_loss_39": 2.43132506608963, |
| "ce_loss_52": 1.4044816851615907, |
| "ce_loss_7": 3.782477653026581, |
| "epoch": 0.347, |
| "grad_norm": 21.84069780374938, |
| "kl_loss_13": 4286.0, |
| "kl_loss_26": 3248.0, |
| "kl_loss_39": 2073.6, |
| "kl_loss_7": 4917.6, |
| "learning_rate": 0.0007403204818108486, |
| "loss": 7232.3, |
| "step": 3470 |
| }, |
| { |
| "ce_loss_13": 3.461978626251221, |
| "ce_loss_26": 2.971747863292694, |
| "ce_loss_39": 2.4197792381048204, |
| "ce_loss_52": 1.411722904443741, |
| "ce_loss_7": 3.7553564965724946, |
| "epoch": 0.348, |
| "grad_norm": 20.773261789734953, |
| "kl_loss_13": 4206.0, |
| "kl_loss_26": 3177.2, |
| "kl_loss_39": 2006.4, |
| "kl_loss_7": 4823.2, |
| "learning_rate": 0.0007389279016548316, |
| "loss": 7200.0, |
| "step": 3480 |
| }, |
| { |
| "ce_loss_13": 3.412698417901993, |
| "ce_loss_26": 2.910679543018341, |
| "ce_loss_39": 2.3608890056610106, |
| "ce_loss_52": 1.3876498267054558, |
| "ce_loss_7": 3.7148698210716247, |
| "epoch": 0.349, |
| "grad_norm": 21.05607269401212, |
| "kl_loss_13": 4174.0, |
| "kl_loss_26": 3125.6, |
| "kl_loss_39": 1966.0, |
| "kl_loss_7": 4804.0, |
| "learning_rate": 0.0007375329154974975, |
| "loss": 7216.6, |
| "step": 3490 |
| }, |
| { |
| "ce_loss_13": 3.474409651756287, |
| "ce_loss_26": 2.9683692157268524, |
| "ce_loss_39": 2.418835300207138, |
| "ce_loss_52": 1.4043258875608444, |
| "ce_loss_7": 3.774983435869217, |
| "epoch": 0.35, |
| "grad_norm": 20.352131735407625, |
| "kl_loss_13": 4246.8, |
| "kl_loss_26": 3202.8, |
| "kl_loss_39": 2042.2, |
| "kl_loss_7": 4875.2, |
| "learning_rate": 0.0007361355373863414, |
| "loss": 7202.7, |
| "step": 3500 |
| }, |
| { |
| "ce_loss_13": 3.4584279537200926, |
| "ce_loss_26": 2.9674349963665008, |
| "ce_loss_39": 2.422105145454407, |
| "ce_loss_52": 1.4255526602268218, |
| "ce_loss_7": 3.767817974090576, |
| "epoch": 0.351, |
| "grad_norm": 20.416274052366226, |
| "kl_loss_13": 4216.8, |
| "kl_loss_26": 3188.8, |
| "kl_loss_39": 2018.4, |
| "kl_loss_7": 4854.4, |
| "learning_rate": 0.0007347357813929454, |
| "loss": 7180.1, |
| "step": 3510 |
| }, |
| { |
| "ce_loss_13": 3.4778851926326753, |
| "ce_loss_26": 2.979369193315506, |
| "ce_loss_39": 2.4347113519906998, |
| "ce_loss_52": 1.4163423389196397, |
| "ce_loss_7": 3.7732009410858156, |
| "epoch": 0.352, |
| "grad_norm": 24.260880475347793, |
| "kl_loss_13": 4219.6, |
| "kl_loss_26": 3191.6, |
| "kl_loss_39": 2033.6, |
| "kl_loss_7": 4844.0, |
| "learning_rate": 0.0007333336616128369, |
| "loss": 7181.8, |
| "step": 3520 |
| }, |
| { |
| "ce_loss_13": 3.479642480611801, |
| "ce_loss_26": 2.9858607232570646, |
| "ce_loss_39": 2.429011595249176, |
| "ce_loss_52": 1.4224095463752746, |
| "ce_loss_7": 3.779256856441498, |
| "epoch": 0.353, |
| "grad_norm": 20.532035835107088, |
| "kl_loss_13": 4211.6, |
| "kl_loss_26": 3185.2, |
| "kl_loss_39": 2008.4, |
| "kl_loss_7": 4841.6, |
| "learning_rate": 0.0007319291921653463, |
| "loss": 7183.4, |
| "step": 3530 |
| }, |
| { |
| "ce_loss_13": 3.4610216915607452, |
| "ce_loss_26": 2.962309718132019, |
| "ce_loss_39": 2.410178878903389, |
| "ce_loss_52": 1.4115030318498611, |
| "ce_loss_7": 3.757075273990631, |
| "epoch": 0.354, |
| "grad_norm": 23.640729954280236, |
| "kl_loss_13": 4236.8, |
| "kl_loss_26": 3190.0, |
| "kl_loss_39": 2013.0, |
| "kl_loss_7": 4864.8, |
| "learning_rate": 0.0007305223871934656, |
| "loss": 7181.4, |
| "step": 3540 |
| }, |
| { |
| "ce_loss_13": 3.5113906443119047, |
| "ce_loss_26": 3.016271597146988, |
| "ce_loss_39": 2.4817953169345857, |
| "ce_loss_52": 1.4761293560266495, |
| "ce_loss_7": 3.79822900891304, |
| "epoch": 0.355, |
| "grad_norm": 22.47350392427222, |
| "kl_loss_13": 4203.2, |
| "kl_loss_26": 3159.6, |
| "kl_loss_39": 2013.8, |
| "kl_loss_7": 4804.8, |
| "learning_rate": 0.0007291132608637052, |
| "loss": 7117.3, |
| "step": 3550 |
| }, |
| { |
| "ce_loss_13": 3.51672882437706, |
| "ce_loss_26": 3.017621088027954, |
| "ce_loss_39": 2.471283310651779, |
| "ce_loss_52": 1.4747960895299912, |
| "ce_loss_7": 3.8214517176151275, |
| "epoch": 0.356, |
| "grad_norm": 22.39492370466417, |
| "kl_loss_13": 4220.0, |
| "kl_loss_26": 3165.2, |
| "kl_loss_39": 2000.0, |
| "kl_loss_7": 4852.8, |
| "learning_rate": 0.0007277018273659516, |
| "loss": 7133.8, |
| "step": 3560 |
| }, |
| { |
| "ce_loss_13": 3.5502862453460695, |
| "ce_loss_26": 3.05435825586319, |
| "ce_loss_39": 2.5157745271921157, |
| "ce_loss_52": 1.4953802406787873, |
| "ce_loss_7": 3.8426124274730684, |
| "epoch": 0.357, |
| "grad_norm": 22.306092146539875, |
| "kl_loss_13": 4247.6, |
| "kl_loss_26": 3216.4, |
| "kl_loss_39": 2067.2, |
| "kl_loss_7": 4859.2, |
| "learning_rate": 0.0007262881009133242, |
| "loss": 7135.8, |
| "step": 3570 |
| }, |
| { |
| "ce_loss_13": 3.454521042108536, |
| "ce_loss_26": 2.9510986149311065, |
| "ce_loss_39": 2.4084193408489227, |
| "ce_loss_52": 1.4200605943799018, |
| "ce_loss_7": 3.7586602210998534, |
| "epoch": 0.358, |
| "grad_norm": 21.121962853812185, |
| "kl_loss_13": 4202.8, |
| "kl_loss_26": 3144.4, |
| "kl_loss_39": 1986.2, |
| "kl_loss_7": 4836.0, |
| "learning_rate": 0.0007248720957420329, |
| "loss": 7135.9, |
| "step": 3580 |
| }, |
| { |
| "ce_loss_13": 3.4299929022789, |
| "ce_loss_26": 2.9262389481067657, |
| "ce_loss_39": 2.3823306292295454, |
| "ce_loss_52": 1.4015884697437286, |
| "ce_loss_7": 3.7345054388046264, |
| "epoch": 0.359, |
| "grad_norm": 21.87103253394757, |
| "kl_loss_13": 4194.4, |
| "kl_loss_26": 3138.0, |
| "kl_loss_39": 1982.0, |
| "kl_loss_7": 4829.6, |
| "learning_rate": 0.0007234538261112341, |
| "loss": 7056.9, |
| "step": 3590 |
| }, |
| { |
| "ce_loss_13": 3.47941969037056, |
| "ce_loss_26": 2.9830207943916323, |
| "ce_loss_39": 2.429542663693428, |
| "ce_loss_52": 1.4434623152017594, |
| "ce_loss_7": 3.7679139375686646, |
| "epoch": 0.36, |
| "grad_norm": 21.216900982885303, |
| "kl_loss_13": 4199.2, |
| "kl_loss_26": 3151.6, |
| "kl_loss_39": 1989.4, |
| "kl_loss_7": 4814.0, |
| "learning_rate": 0.0007220333063028871, |
| "loss": 7096.6, |
| "step": 3600 |
| }, |
| { |
| "ce_loss_13": 3.3754841923713683, |
| "ce_loss_26": 2.8831639885902405, |
| "ce_loss_39": 2.3438637793064117, |
| "ce_loss_52": 1.3872641950845719, |
| "ce_loss_7": 3.681108373403549, |
| "epoch": 0.361, |
| "grad_norm": 21.80171673212867, |
| "kl_loss_13": 4118.8, |
| "kl_loss_26": 3083.2, |
| "kl_loss_39": 1941.8, |
| "kl_loss_7": 4758.0, |
| "learning_rate": 0.0007206105506216106, |
| "loss": 7029.4, |
| "step": 3610 |
| }, |
| { |
| "ce_loss_13": 3.548617047071457, |
| "ce_loss_26": 3.054288852214813, |
| "ce_loss_39": 2.504137873649597, |
| "ce_loss_52": 1.4843981340527534, |
| "ce_loss_7": 3.850842350721359, |
| "epoch": 0.362, |
| "grad_norm": 21.588123109222046, |
| "kl_loss_13": 4246.4, |
| "kl_loss_26": 3203.6, |
| "kl_loss_39": 2032.4, |
| "kl_loss_7": 4870.4, |
| "learning_rate": 0.0007191855733945387, |
| "loss": 7126.1, |
| "step": 3620 |
| }, |
| { |
| "ce_loss_13": 3.469277936220169, |
| "ce_loss_26": 2.985336202383041, |
| "ce_loss_39": 2.4588325411081313, |
| "ce_loss_52": 1.4790852904319762, |
| "ce_loss_7": 3.764431744813919, |
| "epoch": 0.363, |
| "grad_norm": 22.204733809635066, |
| "kl_loss_13": 4129.2, |
| "kl_loss_26": 3111.6, |
| "kl_loss_39": 1976.2, |
| "kl_loss_7": 4744.4, |
| "learning_rate": 0.0007177583889711762, |
| "loss": 7054.3, |
| "step": 3630 |
| }, |
| { |
| "ce_loss_13": 3.442378747463226, |
| "ce_loss_26": 2.9400178849697114, |
| "ce_loss_39": 2.394621509313583, |
| "ce_loss_52": 1.417646163702011, |
| "ce_loss_7": 3.743503212928772, |
| "epoch": 0.364, |
| "grad_norm": 21.957965313010384, |
| "kl_loss_13": 4163.6, |
| "kl_loss_26": 3123.2, |
| "kl_loss_39": 1959.6, |
| "kl_loss_7": 4796.0, |
| "learning_rate": 0.0007163290117232541, |
| "loss": 7054.5, |
| "step": 3640 |
| }, |
| { |
| "ce_loss_13": 3.4392197132110596, |
| "ce_loss_26": 2.951560914516449, |
| "ce_loss_39": 2.4180801689624785, |
| "ce_loss_52": 1.4459613859653473, |
| "ce_loss_7": 3.7318799614906313, |
| "epoch": 0.365, |
| "grad_norm": 21.451785041659093, |
| "kl_loss_13": 4123.6, |
| "kl_loss_26": 3098.8, |
| "kl_loss_39": 1968.6, |
| "kl_loss_7": 4730.8, |
| "learning_rate": 0.0007148974560445859, |
| "loss": 7029.7, |
| "step": 3650 |
| }, |
| { |
| "ce_loss_13": 3.4576940476894378, |
| "ce_loss_26": 2.964629900455475, |
| "ce_loss_39": 2.4162339717149734, |
| "ce_loss_52": 1.4277015537023545, |
| "ce_loss_7": 3.7549045085906982, |
| "epoch": 0.366, |
| "grad_norm": 22.817794972487214, |
| "kl_loss_13": 4165.6, |
| "kl_loss_26": 3135.6, |
| "kl_loss_39": 1970.0, |
| "kl_loss_7": 4792.8, |
| "learning_rate": 0.0007134637363509209, |
| "loss": 7013.0, |
| "step": 3660 |
| }, |
| { |
| "ce_loss_13": 3.5099750757217407, |
| "ce_loss_26": 3.0163159906864165, |
| "ce_loss_39": 2.4729519367218016, |
| "ce_loss_52": 1.4603912830352783, |
| "ce_loss_7": 3.8057311475276947, |
| "epoch": 0.367, |
| "grad_norm": 21.693779714382707, |
| "kl_loss_13": 4227.6, |
| "kl_loss_26": 3203.2, |
| "kl_loss_39": 2043.8, |
| "kl_loss_7": 4848.0, |
| "learning_rate": 0.0007120278670798009, |
| "loss": 7024.2, |
| "step": 3670 |
| }, |
| { |
| "ce_loss_13": 3.4791980743408204, |
| "ce_loss_26": 2.992640608549118, |
| "ce_loss_39": 2.4573631793260575, |
| "ce_loss_52": 1.461830335855484, |
| "ce_loss_7": 3.7739274382591246, |
| "epoch": 0.368, |
| "grad_norm": 22.105670609070703, |
| "kl_loss_13": 4126.8, |
| "kl_loss_26": 3112.4, |
| "kl_loss_39": 1978.0, |
| "kl_loss_7": 4751.2, |
| "learning_rate": 0.0007105898626904133, |
| "loss": 6924.7, |
| "step": 3680 |
| }, |
| { |
| "ce_loss_13": 3.4017152190208435, |
| "ce_loss_26": 2.910390090942383, |
| "ce_loss_39": 2.374891012907028, |
| "ce_loss_52": 1.41865316927433, |
| "ce_loss_7": 3.6938551664352417, |
| "epoch": 0.369, |
| "grad_norm": 20.08460323542704, |
| "kl_loss_13": 4101.6, |
| "kl_loss_26": 3071.6, |
| "kl_loss_39": 1938.0, |
| "kl_loss_7": 4706.8, |
| "learning_rate": 0.0007091497376634463, |
| "loss": 6952.1, |
| "step": 3690 |
| }, |
| { |
| "ce_loss_13": 3.4051457762718202, |
| "ce_loss_26": 2.9142957627773285, |
| "ce_loss_39": 2.3769975334405897, |
| "ce_loss_52": 1.4461660206317901, |
| "ce_loss_7": 3.7018753468990324, |
| "epoch": 0.37, |
| "grad_norm": 21.75095718081699, |
| "kl_loss_13": 4034.8, |
| "kl_loss_26": 3006.4, |
| "kl_loss_39": 1877.2, |
| "kl_loss_7": 4647.6, |
| "learning_rate": 0.0007077075065009433, |
| "loss": 6973.3, |
| "step": 3700 |
| }, |
| { |
| "ce_loss_13": 3.407693642377853, |
| "ce_loss_26": 2.9163559854030607, |
| "ce_loss_39": 2.3682307243347167, |
| "ce_loss_52": 1.3965442717075347, |
| "ce_loss_7": 3.7011303901672363, |
| "epoch": 0.371, |
| "grad_norm": 21.90346982831121, |
| "kl_loss_13": 4126.8, |
| "kl_loss_26": 3098.4, |
| "kl_loss_39": 1951.8, |
| "kl_loss_7": 4740.4, |
| "learning_rate": 0.0007062631837261557, |
| "loss": 6968.9, |
| "step": 3710 |
| }, |
| { |
| "ce_loss_13": 3.4375191271305083, |
| "ce_loss_26": 2.9387122094631195, |
| "ce_loss_39": 2.4049195408821107, |
| "ce_loss_52": 1.4491820633411407, |
| "ce_loss_7": 3.7319699347019197, |
| "epoch": 0.372, |
| "grad_norm": 22.15813884607567, |
| "kl_loss_13": 4096.8, |
| "kl_loss_26": 3062.8, |
| "kl_loss_39": 1923.8, |
| "kl_loss_7": 4716.0, |
| "learning_rate": 0.0007048167838833977, |
| "loss": 6892.9, |
| "step": 3720 |
| }, |
| { |
| "ce_loss_13": 3.443445736169815, |
| "ce_loss_26": 2.9415276020765306, |
| "ce_loss_39": 2.3946647971868513, |
| "ce_loss_52": 1.4324263527989387, |
| "ce_loss_7": 3.7482150912284853, |
| "epoch": 0.373, |
| "grad_norm": 20.533639539523726, |
| "kl_loss_13": 4160.0, |
| "kl_loss_26": 3107.6, |
| "kl_loss_39": 1932.2, |
| "kl_loss_7": 4794.8, |
| "learning_rate": 0.0007033683215379002, |
| "loss": 6994.9, |
| "step": 3730 |
| }, |
| { |
| "ce_loss_13": 3.440624713897705, |
| "ce_loss_26": 2.932874071598053, |
| "ce_loss_39": 2.384758135676384, |
| "ce_loss_52": 1.4341223761439323, |
| "ce_loss_7": 3.7404758751392366, |
| "epoch": 0.374, |
| "grad_norm": 22.169142032653717, |
| "kl_loss_13": 4178.4, |
| "kl_loss_26": 3120.4, |
| "kl_loss_39": 1940.6, |
| "kl_loss_7": 4809.6, |
| "learning_rate": 0.0007019178112756625, |
| "loss": 6960.1, |
| "step": 3740 |
| }, |
| { |
| "ce_loss_13": 3.479549217224121, |
| "ce_loss_26": 2.9821768522262575, |
| "ce_loss_39": 2.4379994481801988, |
| "ce_loss_52": 1.4455731570720673, |
| "ce_loss_7": 3.779719626903534, |
| "epoch": 0.375, |
| "grad_norm": 22.88722443184811, |
| "kl_loss_13": 4190.8, |
| "kl_loss_26": 3160.8, |
| "kl_loss_39": 2000.2, |
| "kl_loss_7": 4822.4, |
| "learning_rate": 0.0007004652677033068, |
| "loss": 6922.4, |
| "step": 3750 |
| }, |
| { |
| "ce_loss_13": 3.5048573672771455, |
| "ce_loss_26": 2.996897077560425, |
| "ce_loss_39": 2.4514291107654573, |
| "ce_loss_52": 1.4677145808935166, |
| "ce_loss_7": 3.8070162892341615, |
| "epoch": 0.376, |
| "grad_norm": 20.379791798469622, |
| "kl_loss_13": 4200.4, |
| "kl_loss_26": 3149.2, |
| "kl_loss_39": 1988.0, |
| "kl_loss_7": 4828.8, |
| "learning_rate": 0.0006990107054479312, |
| "loss": 6948.5, |
| "step": 3760 |
| }, |
| { |
| "ce_loss_13": 3.3857189416885376, |
| "ce_loss_26": 2.8971070766448976, |
| "ce_loss_39": 2.3661463767290116, |
| "ce_loss_52": 1.4227028042078018, |
| "ce_loss_7": 3.679090714454651, |
| "epoch": 0.377, |
| "grad_norm": 21.179119667844127, |
| "kl_loss_13": 4050.8, |
| "kl_loss_26": 3030.4, |
| "kl_loss_39": 1892.4, |
| "kl_loss_7": 4666.8, |
| "learning_rate": 0.000697554139156961, |
| "loss": 6941.0, |
| "step": 3770 |
| }, |
| { |
| "ce_loss_13": 3.512388813495636, |
| "ce_loss_26": 3.0138610899448395, |
| "ce_loss_39": 2.4606133818626406, |
| "ce_loss_52": 1.4959001630544662, |
| "ce_loss_7": 3.807480573654175, |
| "epoch": 0.378, |
| "grad_norm": 22.362162135534977, |
| "kl_loss_13": 4145.2, |
| "kl_loss_26": 3107.2, |
| "kl_loss_39": 1951.0, |
| "kl_loss_7": 4758.0, |
| "learning_rate": 0.0006960955834980027, |
| "loss": 6874.7, |
| "step": 3780 |
| }, |
| { |
| "ce_loss_13": 3.411291944980621, |
| "ce_loss_26": 2.907497102022171, |
| "ce_loss_39": 2.355439043045044, |
| "ce_loss_52": 1.4057017982006073, |
| "ce_loss_7": 3.7079379081726076, |
| "epoch": 0.379, |
| "grad_norm": 20.519862733845507, |
| "kl_loss_13": 4121.2, |
| "kl_loss_26": 3082.0, |
| "kl_loss_39": 1917.0, |
| "kl_loss_7": 4746.0, |
| "learning_rate": 0.0006946350531586958, |
| "loss": 6891.8, |
| "step": 3790 |
| }, |
| { |
| "ce_loss_13": 3.365473288297653, |
| "ce_loss_26": 2.8626536786556245, |
| "ce_loss_39": 2.3194735169410707, |
| "ce_loss_52": 1.3925662517547608, |
| "ce_loss_7": 3.661379265785217, |
| "epoch": 0.38, |
| "grad_norm": 21.211701089479526, |
| "kl_loss_13": 4084.0, |
| "kl_loss_26": 3048.4, |
| "kl_loss_39": 1891.2, |
| "kl_loss_7": 4702.0, |
| "learning_rate": 0.0006931725628465643, |
| "loss": 6889.0, |
| "step": 3800 |
| }, |
| { |
| "ce_loss_13": 3.375334745645523, |
| "ce_loss_26": 2.891470319032669, |
| "ce_loss_39": 2.3472714513540267, |
| "ce_loss_52": 1.4055952280759811, |
| "ce_loss_7": 3.667448806762695, |
| "epoch": 0.381, |
| "grad_norm": 22.083234786813115, |
| "kl_loss_13": 4056.4, |
| "kl_loss_26": 3038.0, |
| "kl_loss_39": 1887.0, |
| "kl_loss_7": 4669.2, |
| "learning_rate": 0.0006917081272888696, |
| "loss": 6821.1, |
| "step": 3810 |
| }, |
| { |
| "ce_loss_13": 3.413332349061966, |
| "ce_loss_26": 2.918791648745537, |
| "ce_loss_39": 2.3821532160043715, |
| "ce_loss_52": 1.426029135286808, |
| "ce_loss_7": 3.70468533039093, |
| "epoch": 0.382, |
| "grad_norm": 21.70379003852508, |
| "kl_loss_13": 4066.4, |
| "kl_loss_26": 3040.8, |
| "kl_loss_39": 1897.2, |
| "kl_loss_7": 4676.0, |
| "learning_rate": 0.0006902417612324615, |
| "loss": 6817.3, |
| "step": 3820 |
| }, |
| { |
| "ce_loss_13": 3.448424202203751, |
| "ce_loss_26": 2.9532420337200165, |
| "ce_loss_39": 2.389399054646492, |
| "ce_loss_52": 1.4127800971269608, |
| "ce_loss_7": 3.7562515437602997, |
| "epoch": 0.383, |
| "grad_norm": 22.611090782774035, |
| "kl_loss_13": 4219.6, |
| "kl_loss_26": 3187.2, |
| "kl_loss_39": 2000.4, |
| "kl_loss_7": 4858.4, |
| "learning_rate": 0.00068877347944363, |
| "loss": 6892.2, |
| "step": 3830 |
| }, |
| { |
| "ce_loss_13": 3.42295760512352, |
| "ce_loss_26": 2.926942157745361, |
| "ce_loss_39": 2.3873476177453994, |
| "ce_loss_52": 1.439600521326065, |
| "ce_loss_7": 3.719656354188919, |
| "epoch": 0.384, |
| "grad_norm": 20.756696465620674, |
| "kl_loss_13": 4109.6, |
| "kl_loss_26": 3061.2, |
| "kl_loss_39": 1912.2, |
| "kl_loss_7": 4727.6, |
| "learning_rate": 0.0006873032967079561, |
| "loss": 6876.7, |
| "step": 3840 |
| }, |
| { |
| "ce_loss_13": 3.4390079021453857, |
| "ce_loss_26": 2.9532729268074034, |
| "ce_loss_39": 2.4051371097564695, |
| "ce_loss_52": 1.446793320775032, |
| "ce_loss_7": 3.740493839979172, |
| "epoch": 0.385, |
| "grad_norm": 20.683464166323773, |
| "kl_loss_13": 4102.8, |
| "kl_loss_26": 3080.8, |
| "kl_loss_39": 1919.2, |
| "kl_loss_7": 4729.6, |
| "learning_rate": 0.0006858312278301637, |
| "loss": 6878.2, |
| "step": 3850 |
| }, |
| { |
| "ce_loss_13": 3.368044465780258, |
| "ce_loss_26": 2.8783551871776583, |
| "ce_loss_39": 2.35613272190094, |
| "ce_loss_52": 1.4377225756645202, |
| "ce_loss_7": 3.657758867740631, |
| "epoch": 0.386, |
| "grad_norm": 22.01788101919845, |
| "kl_loss_13": 3983.6, |
| "kl_loss_26": 2964.0, |
| "kl_loss_39": 1838.2, |
| "kl_loss_7": 4592.4, |
| "learning_rate": 0.0006843572876339704, |
| "loss": 6809.2, |
| "step": 3860 |
| }, |
| { |
| "ce_loss_13": 3.3198332667350767, |
| "ce_loss_26": 2.8321444630622863, |
| "ce_loss_39": 2.2942576706409454, |
| "ce_loss_52": 1.394131037592888, |
| "ce_loss_7": 3.6043868601322173, |
| "epoch": 0.387, |
| "grad_norm": 23.448962125354107, |
| "kl_loss_13": 3965.6, |
| "kl_loss_26": 2956.0, |
| "kl_loss_39": 1822.8, |
| "kl_loss_7": 4560.0, |
| "learning_rate": 0.0006828814909619373, |
| "loss": 6798.0, |
| "step": 3870 |
| }, |
| { |
| "ce_loss_13": 3.3714381575584413, |
| "ce_loss_26": 2.88772599697113, |
| "ce_loss_39": 2.3584464609622957, |
| "ce_loss_52": 1.4422439962625504, |
| "ce_loss_7": 3.662185198068619, |
| "epoch": 0.388, |
| "grad_norm": 22.031425075321017, |
| "kl_loss_13": 3995.6, |
| "kl_loss_26": 2987.6, |
| "kl_loss_39": 1858.6, |
| "kl_loss_7": 4603.2, |
| "learning_rate": 0.0006814038526753205, |
| "loss": 6790.2, |
| "step": 3880 |
| }, |
| { |
| "ce_loss_13": 3.4292624831199645, |
| "ce_loss_26": 2.9387787103652956, |
| "ce_loss_39": 2.392472392320633, |
| "ce_loss_52": 1.458945381641388, |
| "ce_loss_7": 3.722714525461197, |
| "epoch": 0.389, |
| "grad_norm": 21.623492145702706, |
| "kl_loss_13": 4048.0, |
| "kl_loss_26": 3026.8, |
| "kl_loss_39": 1877.2, |
| "kl_loss_7": 4660.8, |
| "learning_rate": 0.0006799243876539213, |
| "loss": 6774.2, |
| "step": 3890 |
| }, |
| { |
| "ce_loss_13": 3.398139035701752, |
| "ce_loss_26": 2.903027367591858, |
| "ce_loss_39": 2.351736932992935, |
| "ce_loss_52": 1.4211576133966446, |
| "ce_loss_7": 3.6922240018844605, |
| "epoch": 0.39, |
| "grad_norm": 20.862431117162615, |
| "kl_loss_13": 4048.8, |
| "kl_loss_26": 3010.0, |
| "kl_loss_39": 1853.6, |
| "kl_loss_7": 4666.4, |
| "learning_rate": 0.0006784431107959359, |
| "loss": 6774.2, |
| "step": 3900 |
| }, |
| { |
| "ce_loss_13": 3.442742919921875, |
| "ce_loss_26": 2.950921058654785, |
| "ce_loss_39": 2.407829362154007, |
| "ce_loss_52": 1.4671964168548584, |
| "ce_loss_7": 3.7354746580123903, |
| "epoch": 0.391, |
| "grad_norm": 22.203660063445458, |
| "kl_loss_13": 4065.6, |
| "kl_loss_26": 3044.4, |
| "kl_loss_39": 1893.8, |
| "kl_loss_7": 4682.8, |
| "learning_rate": 0.0006769600370178059, |
| "loss": 6751.0, |
| "step": 3910 |
| }, |
| { |
| "ce_loss_13": 3.3321305394172667, |
| "ce_loss_26": 2.8416285693645476, |
| "ce_loss_39": 2.3153179585933685, |
| "ce_loss_52": 1.3940225571393967, |
| "ce_loss_7": 3.628729373216629, |
| "epoch": 0.392, |
| "grad_norm": 20.321578181458975, |
| "kl_loss_13": 3993.6, |
| "kl_loss_26": 2972.0, |
| "kl_loss_39": 1845.6, |
| "kl_loss_7": 4610.8, |
| "learning_rate": 0.0006754751812540679, |
| "loss": 6716.4, |
| "step": 3920 |
| }, |
| { |
| "ce_loss_13": 3.382316732406616, |
| "ce_loss_26": 2.8903696179389953, |
| "ce_loss_39": 2.353957489132881, |
| "ce_loss_52": 1.433479717373848, |
| "ce_loss_7": 3.671325671672821, |
| "epoch": 0.393, |
| "grad_norm": 21.271101889195098, |
| "kl_loss_13": 4022.4, |
| "kl_loss_26": 2998.4, |
| "kl_loss_39": 1861.0, |
| "kl_loss_7": 4629.6, |
| "learning_rate": 0.0006739885584572025, |
| "loss": 6776.3, |
| "step": 3930 |
| }, |
| { |
| "ce_loss_13": 3.300927424430847, |
| "ce_loss_26": 2.820311403274536, |
| "ce_loss_39": 2.2964976727962494, |
| "ce_loss_52": 1.4115092635154725, |
| "ce_loss_7": 3.5904260516166686, |
| "epoch": 0.394, |
| "grad_norm": 20.82909920071906, |
| "kl_loss_13": 3946.8, |
| "kl_loss_26": 2937.6, |
| "kl_loss_39": 1809.0, |
| "kl_loss_7": 4550.0, |
| "learning_rate": 0.0006725001835974853, |
| "loss": 6768.3, |
| "step": 3940 |
| }, |
| { |
| "ce_loss_13": 3.3879170179367066, |
| "ce_loss_26": 2.897449654340744, |
| "ce_loss_39": 2.3513225704431533, |
| "ce_loss_52": 1.423241639137268, |
| "ce_loss_7": 3.6786913871765137, |
| "epoch": 0.395, |
| "grad_norm": 21.851496416724263, |
| "kl_loss_13": 4040.0, |
| "kl_loss_26": 3011.6, |
| "kl_loss_39": 1852.2, |
| "kl_loss_7": 4646.4, |
| "learning_rate": 0.0006710100716628344, |
| "loss": 6704.8, |
| "step": 3950 |
| }, |
| { |
| "ce_loss_13": 3.365324836969376, |
| "ce_loss_26": 2.8627363234758376, |
| "ce_loss_39": 2.30602003633976, |
| "ce_loss_52": 1.3922662898898124, |
| "ce_loss_7": 3.670176440477371, |
| "epoch": 0.396, |
| "grad_norm": 19.95099507420605, |
| "kl_loss_13": 4070.0, |
| "kl_loss_26": 3011.6, |
| "kl_loss_39": 1834.6, |
| "kl_loss_7": 4694.8, |
| "learning_rate": 0.0006695182376586602, |
| "loss": 6737.9, |
| "step": 3960 |
| }, |
| { |
| "ce_loss_13": 3.3035158634185793, |
| "ce_loss_26": 2.820575511455536, |
| "ce_loss_39": 2.2792143374681473, |
| "ce_loss_52": 1.3631332144141197, |
| "ce_loss_7": 3.5984981656074524, |
| "epoch": 0.397, |
| "grad_norm": 21.419651305345628, |
| "kl_loss_13": 4016.4, |
| "kl_loss_26": 2992.4, |
| "kl_loss_39": 1851.6, |
| "kl_loss_7": 4639.2, |
| "learning_rate": 0.000668024696607715, |
| "loss": 6659.1, |
| "step": 3970 |
| }, |
| { |
| "ce_loss_13": 3.2616395235061644, |
| "ce_loss_26": 2.784494936466217, |
| "ce_loss_39": 2.2700316429138185, |
| "ce_loss_52": 1.3974194526672363, |
| "ce_loss_7": 3.548482429981232, |
| "epoch": 0.398, |
| "grad_norm": 20.984610674219567, |
| "kl_loss_13": 3844.8, |
| "kl_loss_26": 2848.4, |
| "kl_loss_39": 1755.2, |
| "kl_loss_7": 4441.6, |
| "learning_rate": 0.0006665294635499404, |
| "loss": 6600.0, |
| "step": 3980 |
| }, |
| { |
| "ce_loss_13": 3.3228425204753878, |
| "ce_loss_26": 2.836167597770691, |
| "ce_loss_39": 2.3086318761110305, |
| "ce_loss_52": 1.431598064303398, |
| "ce_loss_7": 3.617774724960327, |
| "epoch": 0.399, |
| "grad_norm": 20.48469634280121, |
| "kl_loss_13": 3879.6, |
| "kl_loss_26": 2876.0, |
| "kl_loss_39": 1769.2, |
| "kl_loss_7": 4495.2, |
| "learning_rate": 0.0006650325535423167, |
| "loss": 6653.5, |
| "step": 3990 |
| }, |
| { |
| "ce_loss_13": 3.3134547114372253, |
| "ce_loss_26": 2.8307457506656646, |
| "ce_loss_39": 2.29368577003479, |
| "ce_loss_52": 1.3969372153282165, |
| "ce_loss_7": 3.6135133028030397, |
| "epoch": 0.4, |
| "grad_norm": 21.23260680511818, |
| "kl_loss_13": 3962.8, |
| "kl_loss_26": 2951.2, |
| "kl_loss_39": 1800.2, |
| "kl_loss_7": 4588.0, |
| "learning_rate": 0.0006635339816587109, |
| "loss": 6715.2, |
| "step": 4000 |
| }, |
| { |
| "ce_loss_13": 3.478778451681137, |
| "ce_loss_26": 2.984225571155548, |
| "ce_loss_39": 2.432309350371361, |
| "ce_loss_52": 1.4644457131624222, |
| "ce_loss_7": 3.782983124256134, |
| "epoch": 0.401, |
| "grad_norm": 21.3701964180473, |
| "kl_loss_13": 4117.6, |
| "kl_loss_26": 3090.4, |
| "kl_loss_39": 1932.4, |
| "kl_loss_7": 4754.8, |
| "learning_rate": 0.0006620337629897252, |
| "loss": 6698.2, |
| "step": 4010 |
| }, |
| { |
| "ce_loss_13": 3.3284165620803834, |
| "ce_loss_26": 2.8434600114822386, |
| "ce_loss_39": 2.311116448044777, |
| "ce_loss_52": 1.4240004986524581, |
| "ce_loss_7": 3.625540155172348, |
| "epoch": 0.402, |
| "grad_norm": 20.004792328254855, |
| "kl_loss_13": 3939.2, |
| "kl_loss_26": 2918.4, |
| "kl_loss_39": 1780.6, |
| "kl_loss_7": 4553.2, |
| "learning_rate": 0.0006605319126425454, |
| "loss": 6664.7, |
| "step": 4020 |
| }, |
| { |
| "ce_loss_13": 3.366453301906586, |
| "ce_loss_26": 2.86657951772213, |
| "ce_loss_39": 2.3278372526168822, |
| "ce_loss_52": 1.4299451738595963, |
| "ce_loss_7": 3.6654918253421784, |
| "epoch": 0.403, |
| "grad_norm": 20.588190398863947, |
| "kl_loss_13": 4014.8, |
| "kl_loss_26": 2970.0, |
| "kl_loss_39": 1823.4, |
| "kl_loss_7": 4628.8, |
| "learning_rate": 0.0006590284457407876, |
| "loss": 6644.4, |
| "step": 4030 |
| }, |
| { |
| "ce_loss_13": 3.3478448331356048, |
| "ce_loss_26": 2.870484399795532, |
| "ce_loss_39": 2.341677349805832, |
| "ce_loss_52": 1.4598551213741302, |
| "ce_loss_7": 3.644811862707138, |
| "epoch": 0.404, |
| "grad_norm": 20.8221732353937, |
| "kl_loss_13": 3908.8, |
| "kl_loss_26": 2901.6, |
| "kl_loss_39": 1782.0, |
| "kl_loss_7": 4526.4, |
| "learning_rate": 0.0006575233774243465, |
| "loss": 6645.55, |
| "step": 4040 |
| }, |
| { |
| "ce_loss_13": 3.28169704079628, |
| "ce_loss_26": 2.7861692667007447, |
| "ce_loss_39": 2.2428950667381287, |
| "ce_loss_52": 1.374313686788082, |
| "ce_loss_7": 3.5797726988792418, |
| "epoch": 0.405, |
| "grad_norm": 21.110763703238916, |
| "kl_loss_13": 3936.4, |
| "kl_loss_26": 2902.0, |
| "kl_loss_39": 1754.0, |
| "kl_loss_7": 4561.2, |
| "learning_rate": 0.0006560167228492435, |
| "loss": 6646.3, |
| "step": 4050 |
| }, |
| { |
| "ce_loss_13": 3.44179083108902, |
| "ce_loss_26": 2.9405432820320128, |
| "ce_loss_39": 2.3968080401420595, |
| "ce_loss_52": 1.4659699857234956, |
| "ce_loss_7": 3.736131912469864, |
| "epoch": 0.406, |
| "grad_norm": 20.50041516447212, |
| "kl_loss_13": 4061.2, |
| "kl_loss_26": 3015.6, |
| "kl_loss_39": 1863.6, |
| "kl_loss_7": 4672.4, |
| "learning_rate": 0.0006545084971874737, |
| "loss": 6655.9, |
| "step": 4060 |
| }, |
| { |
| "ce_loss_13": 3.360958731174469, |
| "ce_loss_26": 2.870532661676407, |
| "ce_loss_39": 2.321683007478714, |
| "ce_loss_52": 1.4091651737689972, |
| "ce_loss_7": 3.6588110864162444, |
| "epoch": 0.407, |
| "grad_norm": 20.463583112470857, |
| "kl_loss_13": 3997.2, |
| "kl_loss_26": 2979.2, |
| "kl_loss_39": 1826.4, |
| "kl_loss_7": 4618.8, |
| "learning_rate": 0.0006529987156268526, |
| "loss": 6617.7, |
| "step": 4070 |
| }, |
| { |
| "ce_loss_13": 3.2686664044857023, |
| "ce_loss_26": 2.7765056490898132, |
| "ce_loss_39": 2.238715943694115, |
| "ce_loss_52": 1.362110722064972, |
| "ce_loss_7": 3.557782357931137, |
| "epoch": 0.408, |
| "grad_norm": 21.232766427379584, |
| "kl_loss_13": 3924.8, |
| "kl_loss_26": 2908.8, |
| "kl_loss_39": 1777.2, |
| "kl_loss_7": 4538.0, |
| "learning_rate": 0.0006514873933708637, |
| "loss": 6653.7, |
| "step": 4080 |
| }, |
| { |
| "ce_loss_13": 3.275262689590454, |
| "ce_loss_26": 2.7900135934352877, |
| "ce_loss_39": 2.2641283214092254, |
| "ce_loss_52": 1.3853250756859778, |
| "ce_loss_7": 3.584109377861023, |
| "epoch": 0.409, |
| "grad_norm": 21.583109836521306, |
| "kl_loss_13": 3887.6, |
| "kl_loss_26": 2868.8, |
| "kl_loss_39": 1755.0, |
| "kl_loss_7": 4526.4, |
| "learning_rate": 0.0006499745456385053, |
| "loss": 6553.8, |
| "step": 4090 |
| }, |
| { |
| "ce_loss_13": 3.3286924988031386, |
| "ce_loss_26": 2.8467950344085695, |
| "ce_loss_39": 2.312630409002304, |
| "ce_loss_52": 1.424817180633545, |
| "ce_loss_7": 3.6197818219661713, |
| "epoch": 0.41, |
| "grad_norm": 20.90973793223202, |
| "kl_loss_13": 3933.2, |
| "kl_loss_26": 2921.2, |
| "kl_loss_39": 1789.5, |
| "kl_loss_7": 4539.2, |
| "learning_rate": 0.0006484601876641375, |
| "loss": 6620.35, |
| "step": 4100 |
| }, |
| { |
| "ce_loss_13": 3.414019340276718, |
| "ce_loss_26": 2.922485715150833, |
| "ce_loss_39": 2.3825585186481475, |
| "ce_loss_52": 1.4558052003383637, |
| "ce_loss_7": 3.709526652097702, |
| "epoch": 0.411, |
| "grad_norm": 21.026331487000416, |
| "kl_loss_13": 4013.6, |
| "kl_loss_26": 2986.4, |
| "kl_loss_39": 1846.6, |
| "kl_loss_7": 4632.0, |
| "learning_rate": 0.000646944334697328, |
| "loss": 6576.6, |
| "step": 4110 |
| }, |
| { |
| "ce_loss_13": 3.3258216440677644, |
| "ce_loss_26": 2.848787486553192, |
| "ce_loss_39": 2.3223425179719923, |
| "ce_loss_52": 1.4663115084171294, |
| "ce_loss_7": 3.604213911294937, |
| "epoch": 0.412, |
| "grad_norm": 20.97048408186008, |
| "kl_loss_13": 3820.4, |
| "kl_loss_26": 2832.4, |
| "kl_loss_39": 1718.8, |
| "kl_loss_7": 4410.0, |
| "learning_rate": 0.0006454270020026995, |
| "loss": 6611.1, |
| "step": 4120 |
| }, |
| { |
| "ce_loss_13": 3.3510149538517, |
| "ce_loss_26": 2.853396385908127, |
| "ce_loss_39": 2.3119456827640534, |
| "ce_loss_52": 1.430704912543297, |
| "ce_loss_7": 3.642722541093826, |
| "epoch": 0.413, |
| "grad_norm": 21.962054178499802, |
| "kl_loss_13": 3953.6, |
| "kl_loss_26": 2916.4, |
| "kl_loss_39": 1780.4, |
| "kl_loss_7": 4556.4, |
| "learning_rate": 0.0006439082048597755, |
| "loss": 6584.4, |
| "step": 4130 |
| }, |
| { |
| "ce_loss_13": 3.3082414746284483, |
| "ce_loss_26": 2.8318909227848055, |
| "ce_loss_39": 2.309774273633957, |
| "ce_loss_52": 1.4488343179225922, |
| "ce_loss_7": 3.6024456560611724, |
| "epoch": 0.414, |
| "grad_norm": 21.685181722314038, |
| "kl_loss_13": 3830.8, |
| "kl_loss_26": 2848.8, |
| "kl_loss_39": 1746.2, |
| "kl_loss_7": 4442.8, |
| "learning_rate": 0.0006423879585628261, |
| "loss": 6547.1, |
| "step": 4140 |
| }, |
| { |
| "ce_loss_13": 3.3523667633533476, |
| "ce_loss_26": 2.855451303720474, |
| "ce_loss_39": 2.3214808642864226, |
| "ce_loss_52": 1.434419831633568, |
| "ce_loss_7": 3.63877694606781, |
| "epoch": 0.415, |
| "grad_norm": 20.289217196894946, |
| "kl_loss_13": 3971.6, |
| "kl_loss_26": 2938.8, |
| "kl_loss_39": 1799.8, |
| "kl_loss_7": 4575.6, |
| "learning_rate": 0.0006408662784207149, |
| "loss": 6535.7, |
| "step": 4150 |
| }, |
| { |
| "ce_loss_13": 3.351851773262024, |
| "ce_loss_26": 2.864642024040222, |
| "ce_loss_39": 2.3303563445806503, |
| "ce_loss_52": 1.417774812877178, |
| "ce_loss_7": 3.6486425340175628, |
| "epoch": 0.416, |
| "grad_norm": 20.99970502270892, |
| "kl_loss_13": 4014.4, |
| "kl_loss_26": 2990.4, |
| "kl_loss_39": 1838.4, |
| "kl_loss_7": 4632.8, |
| "learning_rate": 0.0006393431797567439, |
| "loss": 6546.0, |
| "step": 4160 |
| }, |
| { |
| "ce_loss_13": 3.3471143901348115, |
| "ce_loss_26": 2.866150665283203, |
| "ce_loss_39": 2.3319087445735933, |
| "ce_loss_52": 1.4416520655155183, |
| "ce_loss_7": 3.6357293486595155, |
| "epoch": 0.417, |
| "grad_norm": 21.192503416792082, |
| "kl_loss_13": 3909.6, |
| "kl_loss_26": 2915.8, |
| "kl_loss_39": 1798.8, |
| "kl_loss_7": 4510.8, |
| "learning_rate": 0.0006378186779084996, |
| "loss": 6527.0, |
| "step": 4170 |
| }, |
| { |
| "ce_loss_13": 3.316433811187744, |
| "ce_loss_26": 2.8411558747291563, |
| "ce_loss_39": 2.3257210671901705, |
| "ce_loss_52": 1.4415073692798615, |
| "ce_loss_7": 3.604754400253296, |
| "epoch": 0.418, |
| "grad_norm": 20.807157262193087, |
| "kl_loss_13": 3869.2, |
| "kl_loss_26": 2884.4, |
| "kl_loss_39": 1787.2, |
| "kl_loss_7": 4470.0, |
| "learning_rate": 0.0006362927882276989, |
| "loss": 6561.5, |
| "step": 4180 |
| }, |
| { |
| "ce_loss_13": 3.339698684215546, |
| "ce_loss_26": 2.85506985783577, |
| "ce_loss_39": 2.311668387055397, |
| "ce_loss_52": 1.4213855370879174, |
| "ce_loss_7": 3.6274226009845734, |
| "epoch": 0.419, |
| "grad_norm": 22.02061084804507, |
| "kl_loss_13": 3937.6, |
| "kl_loss_26": 2928.0, |
| "kl_loss_39": 1790.0, |
| "kl_loss_7": 4535.2, |
| "learning_rate": 0.000634765526080034, |
| "loss": 6534.9, |
| "step": 4190 |
| }, |
| { |
| "ce_loss_13": 3.304267328977585, |
| "ce_loss_26": 2.8152280390262603, |
| "ce_loss_39": 2.277574297785759, |
| "ce_loss_52": 1.3985714688897133, |
| "ce_loss_7": 3.598735523223877, |
| "epoch": 0.42, |
| "grad_norm": 19.98512507622475, |
| "kl_loss_13": 3911.6, |
| "kl_loss_26": 2888.0, |
| "kl_loss_39": 1761.0, |
| "kl_loss_7": 4520.8, |
| "learning_rate": 0.0006332369068450174, |
| "loss": 6522.4, |
| "step": 4200 |
| }, |
| { |
| "ce_loss_13": 3.2766413748264314, |
| "ce_loss_26": 2.793798440694809, |
| "ce_loss_39": 2.2647649705410005, |
| "ce_loss_52": 1.404912966489792, |
| "ce_loss_7": 3.5614658653736115, |
| "epoch": 0.421, |
| "grad_norm": 21.98548722652707, |
| "kl_loss_13": 3862.8, |
| "kl_loss_26": 2858.0, |
| "kl_loss_39": 1744.0, |
| "kl_loss_7": 4457.6, |
| "learning_rate": 0.0006317069459158283, |
| "loss": 6461.5, |
| "step": 4210 |
| }, |
| { |
| "ce_loss_13": 3.303953742980957, |
| "ce_loss_26": 2.817542538046837, |
| "ce_loss_39": 2.2884632468223574, |
| "ce_loss_52": 1.414811021089554, |
| "ce_loss_7": 3.5892197132110595, |
| "epoch": 0.422, |
| "grad_norm": 21.22519434260025, |
| "kl_loss_13": 3878.0, |
| "kl_loss_26": 2873.4, |
| "kl_loss_39": 1750.6, |
| "kl_loss_7": 4478.4, |
| "learning_rate": 0.0006301756586991561, |
| "loss": 6510.1, |
| "step": 4220 |
| }, |
| { |
| "ce_loss_13": 3.3445753276348116, |
| "ce_loss_26": 2.8686348736286162, |
| "ce_loss_39": 2.3429405450820924, |
| "ce_loss_52": 1.4785997077822686, |
| "ce_loss_7": 3.6385815382003783, |
| "epoch": 0.423, |
| "grad_norm": 19.644468709446457, |
| "kl_loss_13": 3856.4, |
| "kl_loss_26": 2870.4, |
| "kl_loss_39": 1760.0, |
| "kl_loss_7": 4462.4, |
| "learning_rate": 0.0006286430606150459, |
| "loss": 6493.9, |
| "step": 4230 |
| }, |
| { |
| "ce_loss_13": 3.3064311265945436, |
| "ce_loss_26": 2.8393130600452423, |
| "ce_loss_39": 2.321084627509117, |
| "ce_loss_52": 1.4542193472385407, |
| "ce_loss_7": 3.591093236207962, |
| "epoch": 0.424, |
| "grad_norm": 19.629096721329073, |
| "kl_loss_13": 3821.6, |
| "kl_loss_26": 2834.0, |
| "kl_loss_39": 1732.2, |
| "kl_loss_7": 4414.0, |
| "learning_rate": 0.0006271091670967436, |
| "loss": 6458.7, |
| "step": 4240 |
| }, |
| { |
| "ce_loss_13": 3.335456043481827, |
| "ce_loss_26": 2.8595637679100037, |
| "ce_loss_39": 2.319040137529373, |
| "ce_loss_52": 1.4537455767393113, |
| "ce_loss_7": 3.6277152955532075, |
| "epoch": 0.425, |
| "grad_norm": 22.23231840847551, |
| "kl_loss_13": 3861.2, |
| "kl_loss_26": 2865.2, |
| "kl_loss_39": 1725.4, |
| "kl_loss_7": 4470.8, |
| "learning_rate": 0.0006255739935905395, |
| "loss": 6438.9, |
| "step": 4250 |
| }, |
| { |
| "ce_loss_13": 3.312756323814392, |
| "ce_loss_26": 2.828430265188217, |
| "ce_loss_39": 2.290999186038971, |
| "ce_loss_52": 1.4147280350327491, |
| "ce_loss_7": 3.606942754983902, |
| "epoch": 0.426, |
| "grad_norm": 22.096816976355754, |
| "kl_loss_13": 3909.6, |
| "kl_loss_26": 2901.6, |
| "kl_loss_39": 1772.8, |
| "kl_loss_7": 4524.4, |
| "learning_rate": 0.0006240375555556145, |
| "loss": 6443.7, |
| "step": 4260 |
| }, |
| { |
| "ce_loss_13": 3.2455935895442964, |
| "ce_loss_26": 2.764835333824158, |
| "ce_loss_39": 2.2400916039943697, |
| "ce_loss_52": 1.3983588561415672, |
| "ce_loss_7": 3.5336900293827056, |
| "epoch": 0.427, |
| "grad_norm": 21.016189257560278, |
| "kl_loss_13": 3811.6, |
| "kl_loss_26": 2807.4, |
| "kl_loss_39": 1694.2, |
| "kl_loss_7": 4414.0, |
| "learning_rate": 0.000622499868463882, |
| "loss": 6395.1, |
| "step": 4270 |
| }, |
| { |
| "ce_loss_13": 3.3086226165294645, |
| "ce_loss_26": 2.823494350910187, |
| "ce_loss_39": 2.301421931385994, |
| "ce_loss_52": 1.4462398916482926, |
| "ce_loss_7": 3.595276767015457, |
| "epoch": 0.428, |
| "grad_norm": 21.54081801528653, |
| "kl_loss_13": 3848.8, |
| "kl_loss_26": 2835.6, |
| "kl_loss_39": 1725.6, |
| "kl_loss_7": 4449.6, |
| "learning_rate": 0.0006209609477998338, |
| "loss": 6429.2, |
| "step": 4280 |
| }, |
| { |
| "ce_loss_13": 3.34853395819664, |
| "ce_loss_26": 2.8662350177764893, |
| "ce_loss_39": 2.340099334716797, |
| "ce_loss_52": 1.455578488111496, |
| "ce_loss_7": 3.6379907071590423, |
| "epoch": 0.429, |
| "grad_norm": 22.431607304032426, |
| "kl_loss_13": 3907.2, |
| "kl_loss_26": 2905.6, |
| "kl_loss_39": 1784.0, |
| "kl_loss_7": 4512.8, |
| "learning_rate": 0.0006194208090603844, |
| "loss": 6469.9, |
| "step": 4290 |
| }, |
| { |
| "ce_loss_13": 3.2378364205360413, |
| "ce_loss_26": 2.771626591682434, |
| "ce_loss_39": 2.2533502638339997, |
| "ce_loss_52": 1.4288378104567527, |
| "ce_loss_7": 3.523706406354904, |
| "epoch": 0.43, |
| "grad_norm": 19.478272699999504, |
| "kl_loss_13": 3771.6, |
| "kl_loss_26": 2794.0, |
| "kl_loss_39": 1687.8, |
| "kl_loss_7": 4368.8, |
| "learning_rate": 0.0006178794677547138, |
| "loss": 6399.1, |
| "step": 4300 |
| }, |
| { |
| "ce_loss_13": 3.3320172011852263, |
| "ce_loss_26": 2.8396951615810395, |
| "ce_loss_39": 2.305786609649658, |
| "ce_loss_52": 1.434322476387024, |
| "ce_loss_7": 3.622057580947876, |
| "epoch": 0.431, |
| "grad_norm": 21.288095261764262, |
| "kl_loss_13": 3913.6, |
| "kl_loss_26": 2898.0, |
| "kl_loss_39": 1761.0, |
| "kl_loss_7": 4522.8, |
| "learning_rate": 0.0006163369394041111, |
| "loss": 6430.5, |
| "step": 4310 |
| }, |
| { |
| "ce_loss_13": 3.2775667309761047, |
| "ce_loss_26": 2.79736185669899, |
| "ce_loss_39": 2.2730204701423644, |
| "ce_loss_52": 1.4288703322410583, |
| "ce_loss_7": 3.562648755311966, |
| "epoch": 0.432, |
| "grad_norm": 22.037654673864616, |
| "kl_loss_13": 3816.0, |
| "kl_loss_26": 2814.0, |
| "kl_loss_39": 1690.8, |
| "kl_loss_7": 4412.8, |
| "learning_rate": 0.0006147932395418205, |
| "loss": 6392.0, |
| "step": 4320 |
| }, |
| { |
| "ce_loss_13": 3.2999020636081697, |
| "ce_loss_26": 2.8127501010894775, |
| "ce_loss_39": 2.2844816505908967, |
| "ce_loss_52": 1.4203194737434388, |
| "ce_loss_7": 3.5926522493362425, |
| "epoch": 0.433, |
| "grad_norm": 23.105450049389578, |
| "kl_loss_13": 3851.2, |
| "kl_loss_26": 2850.0, |
| "kl_loss_39": 1733.2, |
| "kl_loss_7": 4468.8, |
| "learning_rate": 0.0006132483837128823, |
| "loss": 6416.4, |
| "step": 4330 |
| }, |
| { |
| "ce_loss_13": 3.314070051908493, |
| "ce_loss_26": 2.8264743953943254, |
| "ce_loss_39": 2.295166790485382, |
| "ce_loss_52": 1.4510357692837714, |
| "ce_loss_7": 3.5982041239738463, |
| "epoch": 0.434, |
| "grad_norm": 21.907818649309228, |
| "kl_loss_13": 3842.0, |
| "kl_loss_26": 2836.0, |
| "kl_loss_39": 1709.4, |
| "kl_loss_7": 4428.8, |
| "learning_rate": 0.0006117023874739772, |
| "loss": 6437.0, |
| "step": 4340 |
| }, |
| { |
| "ce_loss_13": 3.296399414539337, |
| "ce_loss_26": 2.8059658110141754, |
| "ce_loss_39": 2.271949994564056, |
| "ce_loss_52": 1.4154132261872292, |
| "ce_loss_7": 3.588996112346649, |
| "epoch": 0.435, |
| "grad_norm": 21.889212253545118, |
| "kl_loss_13": 3898.8, |
| "kl_loss_26": 2887.2, |
| "kl_loss_39": 1746.6, |
| "kl_loss_7": 4508.4, |
| "learning_rate": 0.0006101552663932703, |
| "loss": 6431.3, |
| "step": 4350 |
| }, |
| { |
| "ce_loss_13": 3.306167459487915, |
| "ce_loss_26": 2.8240922570228575, |
| "ce_loss_39": 2.2931392163038256, |
| "ce_loss_52": 1.4310514152050018, |
| "ce_loss_7": 3.596520256996155, |
| "epoch": 0.436, |
| "grad_norm": 21.05075754740656, |
| "kl_loss_13": 3860.0, |
| "kl_loss_26": 2856.4, |
| "kl_loss_39": 1737.6, |
| "kl_loss_7": 4459.2, |
| "learning_rate": 0.0006086070360502539, |
| "loss": 6370.7, |
| "step": 4360 |
| }, |
| { |
| "ce_loss_13": 3.2818971514701842, |
| "ce_loss_26": 2.8096925973892213, |
| "ce_loss_39": 2.2902305334806443, |
| "ce_loss_52": 1.457671320438385, |
| "ce_loss_7": 3.567191207408905, |
| "epoch": 0.437, |
| "grad_norm": 19.98373918443626, |
| "kl_loss_13": 3770.8, |
| "kl_loss_26": 2788.4, |
| "kl_loss_39": 1692.0, |
| "kl_loss_7": 4367.2, |
| "learning_rate": 0.0006070577120355903, |
| "loss": 6341.1, |
| "step": 4370 |
| }, |
| { |
| "ce_loss_13": 3.317246896028519, |
| "ce_loss_26": 2.84905891418457, |
| "ce_loss_39": 2.322802722454071, |
| "ce_loss_52": 1.4916655078530312, |
| "ce_loss_7": 3.593036550283432, |
| "epoch": 0.438, |
| "grad_norm": 20.010722762004242, |
| "kl_loss_13": 3781.2, |
| "kl_loss_26": 2799.2, |
| "kl_loss_39": 1686.2, |
| "kl_loss_7": 4360.0, |
| "learning_rate": 0.0006055073099509549, |
| "loss": 6355.5, |
| "step": 4380 |
| }, |
| { |
| "ce_loss_13": 3.2864172756671906, |
| "ce_loss_26": 2.8085566580295565, |
| "ce_loss_39": 2.2875818789005278, |
| "ce_loss_52": 1.4407859086990356, |
| "ce_loss_7": 3.5741762936115267, |
| "epoch": 0.439, |
| "grad_norm": 21.10222653748024, |
| "kl_loss_13": 3824.4, |
| "kl_loss_26": 2822.4, |
| "kl_loss_39": 1713.2, |
| "kl_loss_7": 4426.0, |
| "learning_rate": 0.0006039558454088796, |
| "loss": 6354.5, |
| "step": 4390 |
| }, |
| { |
| "ce_loss_13": 3.2985159277915956, |
| "ce_loss_26": 2.817130261659622, |
| "ce_loss_39": 2.2846481442451476, |
| "ce_loss_52": 1.4308176964521409, |
| "ce_loss_7": 3.5788950502872465, |
| "epoch": 0.44, |
| "grad_norm": 22.10167857860873, |
| "kl_loss_13": 3852.4, |
| "kl_loss_26": 2856.8, |
| "kl_loss_39": 1727.6, |
| "kl_loss_7": 4434.8, |
| "learning_rate": 0.0006024033340325954, |
| "loss": 6381.3, |
| "step": 4400 |
| }, |
| { |
| "ce_loss_13": 3.2772581815719604, |
| "ce_loss_26": 2.7952946066856383, |
| "ce_loss_39": 2.259748488664627, |
| "ce_loss_52": 1.4122898250818252, |
| "ce_loss_7": 3.564402920007706, |
| "epoch": 0.441, |
| "grad_norm": 21.766013784294948, |
| "kl_loss_13": 3854.4, |
| "kl_loss_26": 2847.6, |
| "kl_loss_39": 1727.0, |
| "kl_loss_7": 4457.2, |
| "learning_rate": 0.0006008497914558743, |
| "loss": 6338.3, |
| "step": 4410 |
| }, |
| { |
| "ce_loss_13": 3.310656875371933, |
| "ce_loss_26": 2.825407701730728, |
| "ce_loss_39": 2.3054873913526537, |
| "ce_loss_52": 1.4574851334095, |
| "ce_loss_7": 3.595499175786972, |
| "epoch": 0.442, |
| "grad_norm": 23.195817032303225, |
| "kl_loss_13": 3826.4, |
| "kl_loss_26": 2816.8, |
| "kl_loss_39": 1707.6, |
| "kl_loss_7": 4415.6, |
| "learning_rate": 0.0005992952333228728, |
| "loss": 6415.4, |
| "step": 4420 |
| }, |
| { |
| "ce_loss_13": 3.147319358587265, |
| "ce_loss_26": 2.6695513784885407, |
| "ce_loss_39": 2.153283026814461, |
| "ce_loss_52": 1.362196257710457, |
| "ce_loss_7": 3.4255874812602998, |
| "epoch": 0.443, |
| "grad_norm": 21.365484698145746, |
| "kl_loss_13": 3677.2, |
| "kl_loss_26": 2685.6, |
| "kl_loss_39": 1588.2, |
| "kl_loss_7": 4260.0, |
| "learning_rate": 0.0005977396752879741, |
| "loss": 6284.8, |
| "step": 4430 |
| }, |
| { |
| "ce_loss_13": 3.2730862379074095, |
| "ce_loss_26": 2.790391606092453, |
| "ce_loss_39": 2.257637658715248, |
| "ce_loss_52": 1.4242565602064132, |
| "ce_loss_7": 3.5511350512504576, |
| "epoch": 0.444, |
| "grad_norm": 20.84050157821156, |
| "kl_loss_13": 3810.8, |
| "kl_loss_26": 2811.6, |
| "kl_loss_39": 1686.6, |
| "kl_loss_7": 4399.2, |
| "learning_rate": 0.0005961831330156305, |
| "loss": 6282.4, |
| "step": 4440 |
| }, |
| { |
| "ce_loss_13": 3.303426647186279, |
| "ce_loss_26": 2.8194876074790955, |
| "ce_loss_39": 2.286717027425766, |
| "ce_loss_52": 1.4367665380239487, |
| "ce_loss_7": 3.589170789718628, |
| "epoch": 0.445, |
| "grad_norm": 22.09664086851361, |
| "kl_loss_13": 3832.4, |
| "kl_loss_26": 2828.0, |
| "kl_loss_39": 1702.2, |
| "kl_loss_7": 4435.6, |
| "learning_rate": 0.0005946256221802051, |
| "loss": 6310.7, |
| "step": 4450 |
| }, |
| { |
| "ce_loss_13": 3.2220256984233857, |
| "ce_loss_26": 2.747016179561615, |
| "ce_loss_39": 2.2203843981027602, |
| "ce_loss_52": 1.4144678741693497, |
| "ce_loss_7": 3.5019364655017853, |
| "epoch": 0.446, |
| "grad_norm": 20.80212123158213, |
| "kl_loss_13": 3738.0, |
| "kl_loss_26": 2737.6, |
| "kl_loss_39": 1635.2, |
| "kl_loss_7": 4324.0, |
| "learning_rate": 0.0005930671584658151, |
| "loss": 6275.9, |
| "step": 4460 |
| }, |
| { |
| "ce_loss_13": 3.262040966749191, |
| "ce_loss_26": 2.7848617672920226, |
| "ce_loss_39": 2.2554671108722686, |
| "ce_loss_52": 1.4137398272752761, |
| "ce_loss_7": 3.5524380266666413, |
| "epoch": 0.447, |
| "grad_norm": 21.070671360315192, |
| "kl_loss_13": 3794.4, |
| "kl_loss_26": 2805.6, |
| "kl_loss_39": 1697.0, |
| "kl_loss_7": 4396.4, |
| "learning_rate": 0.0005915077575661722, |
| "loss": 6360.7, |
| "step": 4470 |
| }, |
| { |
| "ce_loss_13": 3.2109140872955324, |
| "ce_loss_26": 2.7298059910535812, |
| "ce_loss_39": 2.2085951179265977, |
| "ce_loss_52": 1.3873766094446183, |
| "ce_loss_7": 3.4926227211952208, |
| "epoch": 0.448, |
| "grad_norm": 21.38920961497166, |
| "kl_loss_13": 3765.2, |
| "kl_loss_26": 2759.4, |
| "kl_loss_39": 1656.0, |
| "kl_loss_7": 4359.6, |
| "learning_rate": 0.000589947435184427, |
| "loss": 6255.15, |
| "step": 4480 |
| }, |
| { |
| "ce_loss_13": 3.2468604743480682, |
| "ce_loss_26": 2.7669826805591584, |
| "ce_loss_39": 2.2381924211978914, |
| "ce_loss_52": 1.4454800367355347, |
| "ce_loss_7": 3.5310903012752535, |
| "epoch": 0.449, |
| "grad_norm": 23.74435148547982, |
| "kl_loss_13": 3708.0, |
| "kl_loss_26": 2716.8, |
| "kl_loss_39": 1595.4, |
| "kl_loss_7": 4307.2, |
| "learning_rate": 0.0005883862070330078, |
| "loss": 6262.9, |
| "step": 4490 |
| }, |
| { |
| "ce_loss_13": 3.2490183234214784, |
| "ce_loss_26": 2.775378829240799, |
| "ce_loss_39": 2.259204548597336, |
| "ce_loss_52": 1.4259025424718856, |
| "ce_loss_7": 3.532491201162338, |
| "epoch": 0.45, |
| "grad_norm": 19.921742072679248, |
| "kl_loss_13": 3736.0, |
| "kl_loss_26": 2748.4, |
| "kl_loss_39": 1665.2, |
| "kl_loss_7": 4324.4, |
| "learning_rate": 0.0005868240888334653, |
| "loss": 6279.4, |
| "step": 4500 |
| }, |
| { |
| "ce_loss_13": 3.2022728264331817, |
| "ce_loss_26": 2.7251765221357345, |
| "ce_loss_39": 2.2201029896736144, |
| "ce_loss_52": 1.4243690267205238, |
| "ce_loss_7": 3.4856902956962585, |
| "epoch": 0.451, |
| "grad_norm": 22.336812688943994, |
| "kl_loss_13": 3701.6, |
| "kl_loss_26": 2716.6, |
| "kl_loss_39": 1627.0, |
| "kl_loss_7": 4291.6, |
| "learning_rate": 0.0005852610963163119, |
| "loss": 6274.9, |
| "step": 4510 |
| }, |
| { |
| "ce_loss_13": 3.2056246638298034, |
| "ce_loss_26": 2.735306566953659, |
| "ce_loss_39": 2.2278982251882553, |
| "ce_loss_52": 1.4315001338720321, |
| "ce_loss_7": 3.4921528518199922, |
| "epoch": 0.452, |
| "grad_norm": 21.324834799180188, |
| "kl_loss_13": 3671.2, |
| "kl_loss_26": 2692.4, |
| "kl_loss_39": 1610.6, |
| "kl_loss_7": 4263.2, |
| "learning_rate": 0.0005836972452208654, |
| "loss": 6241.8, |
| "step": 4520 |
| }, |
| { |
| "ce_loss_13": 3.2711530566215514, |
| "ce_loss_26": 2.794381695985794, |
| "ce_loss_39": 2.2675902634859084, |
| "ce_loss_52": 1.4365027844905853, |
| "ce_loss_7": 3.5576207876205443, |
| "epoch": 0.453, |
| "grad_norm": 21.88775011761487, |
| "kl_loss_13": 3792.8, |
| "kl_loss_26": 2804.8, |
| "kl_loss_39": 1700.8, |
| "kl_loss_7": 4388.0, |
| "learning_rate": 0.0005821325512950885, |
| "loss": 6283.8, |
| "step": 4530 |
| }, |
| { |
| "ce_loss_13": 3.2904800713062285, |
| "ce_loss_26": 2.8134379625320434, |
| "ce_loss_39": 2.2945436596870423, |
| "ce_loss_52": 1.476692470908165, |
| "ce_loss_7": 3.5703530073165894, |
| "epoch": 0.454, |
| "grad_norm": 20.942055908262446, |
| "kl_loss_13": 3751.6, |
| "kl_loss_26": 2758.0, |
| "kl_loss_39": 1655.0, |
| "kl_loss_7": 4336.0, |
| "learning_rate": 0.0005805670302954321, |
| "loss": 6268.6, |
| "step": 4540 |
| }, |
| { |
| "ce_loss_13": 3.2201909184455872, |
| "ce_loss_26": 2.737530159950256, |
| "ce_loss_39": 2.2144886016845704, |
| "ce_loss_52": 1.4106894597411155, |
| "ce_loss_7": 3.5021199345588685, |
| "epoch": 0.455, |
| "grad_norm": 21.81892564093558, |
| "kl_loss_13": 3758.8, |
| "kl_loss_26": 2750.8, |
| "kl_loss_39": 1624.2, |
| "kl_loss_7": 4358.4, |
| "learning_rate": 0.000579000697986675, |
| "loss": 6232.8, |
| "step": 4550 |
| }, |
| { |
| "ce_loss_13": 3.2489894032478333, |
| "ce_loss_26": 2.780492717027664, |
| "ce_loss_39": 2.2484638780355453, |
| "ce_loss_52": 1.4397764205932617, |
| "ce_loss_7": 3.5364687144756317, |
| "epoch": 0.456, |
| "grad_norm": 21.02937623207538, |
| "kl_loss_13": 3754.8, |
| "kl_loss_26": 2776.8, |
| "kl_loss_39": 1651.6, |
| "kl_loss_7": 4345.6, |
| "learning_rate": 0.0005774335701417662, |
| "loss": 6241.6, |
| "step": 4560 |
| }, |
| { |
| "ce_loss_13": 3.2101799607276917, |
| "ce_loss_26": 2.7431035935878754, |
| "ce_loss_39": 2.2130339086055755, |
| "ce_loss_52": 1.4163681983947753, |
| "ce_loss_7": 3.4954857528209686, |
| "epoch": 0.457, |
| "grad_norm": 19.83299768002262, |
| "kl_loss_13": 3705.2, |
| "kl_loss_26": 2723.2, |
| "kl_loss_39": 1618.4, |
| "kl_loss_7": 4301.2, |
| "learning_rate": 0.0005758656625416658, |
| "loss": 6247.2, |
| "step": 4570 |
| }, |
| { |
| "ce_loss_13": 3.2813266932964327, |
| "ce_loss_26": 2.793111354112625, |
| "ce_loss_39": 2.2625322908163072, |
| "ce_loss_52": 1.4499622374773025, |
| "ce_loss_7": 3.5644542396068575, |
| "epoch": 0.458, |
| "grad_norm": 21.53754747250698, |
| "kl_loss_13": 3797.6, |
| "kl_loss_26": 2782.8, |
| "kl_loss_39": 1647.4, |
| "kl_loss_7": 4391.2, |
| "learning_rate": 0.0005742969909751859, |
| "loss": 6266.1, |
| "step": 4580 |
| }, |
| { |
| "ce_loss_13": 3.3478006780147553, |
| "ce_loss_26": 2.8727428793907164, |
| "ce_loss_39": 2.3303479075431826, |
| "ce_loss_52": 1.4829013347625732, |
| "ce_loss_7": 3.6364180862903597, |
| "epoch": 0.459, |
| "grad_norm": 21.34326820051386, |
| "kl_loss_13": 3844.0, |
| "kl_loss_26": 2840.4, |
| "kl_loss_39": 1703.4, |
| "kl_loss_7": 4441.6, |
| "learning_rate": 0.0005727275712388318, |
| "loss": 6209.7, |
| "step": 4590 |
| }, |
| { |
| "ce_loss_13": 3.285200160741806, |
| "ce_loss_26": 2.807385641336441, |
| "ce_loss_39": 2.275821554660797, |
| "ce_loss_52": 1.4415770262479781, |
| "ce_loss_7": 3.563661777973175, |
| "epoch": 0.46, |
| "grad_norm": 20.952340509651894, |
| "kl_loss_13": 3807.6, |
| "kl_loss_26": 2814.0, |
| "kl_loss_39": 1683.4, |
| "kl_loss_7": 4390.0, |
| "learning_rate": 0.0005711574191366427, |
| "loss": 6174.2, |
| "step": 4600 |
| }, |
| { |
| "ce_loss_13": 3.2419202089309693, |
| "ce_loss_26": 2.7672870814800263, |
| "ce_loss_39": 2.2521429657936096, |
| "ce_loss_52": 1.4457757875323296, |
| "ce_loss_7": 3.5175400972366333, |
| "epoch": 0.461, |
| "grad_norm": 20.667083707625775, |
| "kl_loss_13": 3685.6, |
| "kl_loss_26": 2706.8, |
| "kl_loss_39": 1618.2, |
| "kl_loss_7": 4262.4, |
| "learning_rate": 0.0005695865504800327, |
| "loss": 6154.6, |
| "step": 4610 |
| }, |
| { |
| "ce_loss_13": 3.2078490257263184, |
| "ce_loss_26": 2.7373934209346773, |
| "ce_loss_39": 2.2357589691877364, |
| "ce_loss_52": 1.4443277925252915, |
| "ce_loss_7": 3.4876007556915285, |
| "epoch": 0.462, |
| "grad_norm": 21.07091453525893, |
| "kl_loss_13": 3645.6, |
| "kl_loss_26": 2666.8, |
| "kl_loss_39": 1598.2, |
| "kl_loss_7": 4226.0, |
| "learning_rate": 0.0005680149810876322, |
| "loss": 6178.4, |
| "step": 4620 |
| }, |
| { |
| "ce_loss_13": 3.2486107409000398, |
| "ce_loss_26": 2.7601176381111143, |
| "ce_loss_39": 2.227588337659836, |
| "ce_loss_52": 1.4004375696182252, |
| "ce_loss_7": 3.532775843143463, |
| "epoch": 0.463, |
| "grad_norm": 21.569532668695917, |
| "kl_loss_13": 3786.8, |
| "kl_loss_26": 2780.4, |
| "kl_loss_39": 1664.6, |
| "kl_loss_7": 4386.0, |
| "learning_rate": 0.0005664427267851271, |
| "loss": 6215.6, |
| "step": 4630 |
| }, |
| { |
| "ce_loss_13": 3.2331403851509095, |
| "ce_loss_26": 2.7579665184020996, |
| "ce_loss_39": 2.233389773964882, |
| "ce_loss_52": 1.4353074416518212, |
| "ce_loss_7": 3.5171454668045046, |
| "epoch": 0.464, |
| "grad_norm": 21.55234160622014, |
| "kl_loss_13": 3697.6, |
| "kl_loss_26": 2708.8, |
| "kl_loss_39": 1599.2, |
| "kl_loss_7": 4292.8, |
| "learning_rate": 0.0005648698034051009, |
| "loss": 6233.0, |
| "step": 4640 |
| }, |
| { |
| "ce_loss_13": 3.264119005203247, |
| "ce_loss_26": 2.784970927238464, |
| "ce_loss_39": 2.2625187635421753, |
| "ce_loss_52": 1.4551048219203948, |
| "ce_loss_7": 3.536862540245056, |
| "epoch": 0.465, |
| "grad_norm": 21.943732618524454, |
| "kl_loss_13": 3715.6, |
| "kl_loss_26": 2727.6, |
| "kl_loss_39": 1626.8, |
| "kl_loss_7": 4294.8, |
| "learning_rate": 0.0005632962267868747, |
| "loss": 6180.9, |
| "step": 4650 |
| }, |
| { |
| "ce_loss_13": 3.124424380064011, |
| "ce_loss_26": 2.670001748204231, |
| "ce_loss_39": 2.161810302734375, |
| "ce_loss_52": 1.3920277938246728, |
| "ce_loss_7": 3.4072051107883454, |
| "epoch": 0.466, |
| "grad_norm": 19.992372851535457, |
| "kl_loss_13": 3608.0, |
| "kl_loss_26": 2641.4, |
| "kl_loss_39": 1573.8, |
| "kl_loss_7": 4190.0, |
| "learning_rate": 0.0005617220127763474, |
| "loss": 6158.8, |
| "step": 4660 |
| }, |
| { |
| "ce_loss_13": 3.2301677465438843, |
| "ce_loss_26": 2.7488952726125717, |
| "ce_loss_39": 2.2431567162275314, |
| "ce_loss_52": 1.4423212110996246, |
| "ce_loss_7": 3.5069880545139314, |
| "epoch": 0.467, |
| "grad_norm": 20.78261479761198, |
| "kl_loss_13": 3680.4, |
| "kl_loss_26": 2686.0, |
| "kl_loss_39": 1603.8, |
| "kl_loss_7": 4262.0, |
| "learning_rate": 0.0005601471772258368, |
| "loss": 6129.5, |
| "step": 4670 |
| }, |
| { |
| "ce_loss_13": 3.2057377636432647, |
| "ce_loss_26": 2.7412378191947937, |
| "ce_loss_39": 2.2341296702623366, |
| "ce_loss_52": 1.4304189920425414, |
| "ce_loss_7": 3.4817180752754213, |
| "epoch": 0.468, |
| "grad_norm": 20.848135049030358, |
| "kl_loss_13": 3672.8, |
| "kl_loss_26": 2706.4, |
| "kl_loss_39": 1624.6, |
| "kl_loss_7": 4245.6, |
| "learning_rate": 0.0005585717359939192, |
| "loss": 6123.3, |
| "step": 4680 |
| }, |
| { |
| "ce_loss_13": 3.233567637205124, |
| "ce_loss_26": 2.768052551150322, |
| "ce_loss_39": 2.25777924656868, |
| "ce_loss_52": 1.4504828751087189, |
| "ce_loss_7": 3.5125366508960725, |
| "epoch": 0.469, |
| "grad_norm": 20.709517983153315, |
| "kl_loss_13": 3660.4, |
| "kl_loss_26": 2696.0, |
| "kl_loss_39": 1619.6, |
| "kl_loss_7": 4241.2, |
| "learning_rate": 0.0005569957049452703, |
| "loss": 6101.6, |
| "step": 4690 |
| }, |
| { |
| "ce_loss_13": 3.2725342512130737, |
| "ce_loss_26": 2.7866754591464997, |
| "ce_loss_39": 2.2480324536561964, |
| "ce_loss_52": 1.404937854409218, |
| "ce_loss_7": 3.5641084611415863, |
| "epoch": 0.47, |
| "grad_norm": 20.633433200619724, |
| "kl_loss_13": 3863.2, |
| "kl_loss_26": 2851.6, |
| "kl_loss_39": 1720.6, |
| "kl_loss_7": 4468.0, |
| "learning_rate": 0.0005554190999505056, |
| "loss": 6211.0, |
| "step": 4700 |
| }, |
| { |
| "ce_loss_13": 3.2052918612957, |
| "ce_loss_26": 2.732904624938965, |
| "ce_loss_39": 2.2116665810346605, |
| "ce_loss_52": 1.4257652133703231, |
| "ce_loss_7": 3.4880705952644346, |
| "epoch": 0.471, |
| "grad_norm": 20.706622626666558, |
| "kl_loss_13": 3660.0, |
| "kl_loss_26": 2670.8, |
| "kl_loss_39": 1571.8, |
| "kl_loss_7": 4252.4, |
| "learning_rate": 0.0005538419368860196, |
| "loss": 6097.3, |
| "step": 4710 |
| }, |
| { |
| "ce_loss_13": 3.201172482967377, |
| "ce_loss_26": 2.726384937763214, |
| "ce_loss_39": 2.198946151137352, |
| "ce_loss_52": 1.4113327443599701, |
| "ce_loss_7": 3.487533462047577, |
| "epoch": 0.472, |
| "grad_norm": 21.400203482886983, |
| "kl_loss_13": 3675.6, |
| "kl_loss_26": 2693.2, |
| "kl_loss_39": 1591.2, |
| "kl_loss_7": 4272.0, |
| "learning_rate": 0.0005522642316338268, |
| "loss": 6121.3, |
| "step": 4720 |
| }, |
| { |
| "ce_loss_13": 3.224886018037796, |
| "ce_loss_26": 2.7551407277584077, |
| "ce_loss_39": 2.2328554034233092, |
| "ce_loss_52": 1.4563202857971191, |
| "ce_loss_7": 3.5022718131542208, |
| "epoch": 0.473, |
| "grad_norm": 21.66295321363831, |
| "kl_loss_13": 3648.0, |
| "kl_loss_26": 2662.8, |
| "kl_loss_39": 1575.8, |
| "kl_loss_7": 4219.6, |
| "learning_rate": 0.0005506860000814017, |
| "loss": 6051.3, |
| "step": 4730 |
| }, |
| { |
| "ce_loss_13": 3.2025643050670625, |
| "ce_loss_26": 2.7286852061748506, |
| "ce_loss_39": 2.21729561984539, |
| "ce_loss_52": 1.453881350159645, |
| "ce_loss_7": 3.4832063794136046, |
| "epoch": 0.474, |
| "grad_norm": 20.603608171505254, |
| "kl_loss_13": 3623.6, |
| "kl_loss_26": 2642.4, |
| "kl_loss_39": 1549.2, |
| "kl_loss_7": 4212.8, |
| "learning_rate": 0.0005491072581215186, |
| "loss": 6098.5, |
| "step": 4740 |
| }, |
| { |
| "ce_loss_13": 3.212699604034424, |
| "ce_loss_26": 2.732209050655365, |
| "ce_loss_39": 2.2089115262031553, |
| "ce_loss_52": 1.4185278177261353, |
| "ce_loss_7": 3.4970239818096163, |
| "epoch": 0.475, |
| "grad_norm": 20.455172908061705, |
| "kl_loss_13": 3701.2, |
| "kl_loss_26": 2708.4, |
| "kl_loss_39": 1600.0, |
| "kl_loss_7": 4294.0, |
| "learning_rate": 0.0005475280216520913, |
| "loss": 6092.7, |
| "step": 4750 |
| }, |
| { |
| "ce_loss_13": 3.161313956975937, |
| "ce_loss_26": 2.6922851324081423, |
| "ce_loss_39": 2.185682702064514, |
| "ce_loss_52": 1.4132045745849608, |
| "ce_loss_7": 3.4361780524253844, |
| "epoch": 0.476, |
| "grad_norm": 21.048159476746886, |
| "kl_loss_13": 3632.8, |
| "kl_loss_26": 2655.2, |
| "kl_loss_39": 1574.6, |
| "kl_loss_7": 4208.4, |
| "learning_rate": 0.0005459483065760138, |
| "loss": 6159.3, |
| "step": 4760 |
| }, |
| { |
| "ce_loss_13": 3.2172334492206573, |
| "ce_loss_26": 2.739536887407303, |
| "ce_loss_39": 2.2192301630973814, |
| "ce_loss_52": 1.4282803654670715, |
| "ce_loss_7": 3.5064621806144713, |
| "epoch": 0.477, |
| "grad_norm": 20.62269454966768, |
| "kl_loss_13": 3708.4, |
| "kl_loss_26": 2710.0, |
| "kl_loss_39": 1610.2, |
| "kl_loss_7": 4301.6, |
| "learning_rate": 0.0005443681288009991, |
| "loss": 6104.1, |
| "step": 4770 |
| }, |
| { |
| "ce_loss_13": 3.217255789041519, |
| "ce_loss_26": 2.7369415044784544, |
| "ce_loss_39": 2.2001087069511414, |
| "ce_loss_52": 1.401385571062565, |
| "ce_loss_7": 3.5032376050949097, |
| "epoch": 0.478, |
| "grad_norm": 20.40853719962087, |
| "kl_loss_13": 3760.0, |
| "kl_loss_26": 2763.2, |
| "kl_loss_39": 1625.0, |
| "kl_loss_7": 4353.2, |
| "learning_rate": 0.0005427875042394199, |
| "loss": 6064.0, |
| "step": 4780 |
| }, |
| { |
| "ce_loss_13": 3.199622023105621, |
| "ce_loss_26": 2.734530872106552, |
| "ce_loss_39": 2.2225404649972917, |
| "ce_loss_52": 1.4552370458841324, |
| "ce_loss_7": 3.475612831115723, |
| "epoch": 0.479, |
| "grad_norm": 21.111949592982874, |
| "kl_loss_13": 3598.8, |
| "kl_loss_26": 2623.2, |
| "kl_loss_39": 1545.4, |
| "kl_loss_7": 4177.6, |
| "learning_rate": 0.0005412064488081482, |
| "loss": 6074.2, |
| "step": 4790 |
| }, |
| { |
| "ce_loss_13": 3.1485446810722353, |
| "ce_loss_26": 2.6845290422439576, |
| "ce_loss_39": 2.1669752955436707, |
| "ce_loss_52": 1.4116393029689789, |
| "ce_loss_7": 3.423712509870529, |
| "epoch": 0.48, |
| "grad_norm": 20.355440951189763, |
| "kl_loss_13": 3606.4, |
| "kl_loss_26": 2638.4, |
| "kl_loss_39": 1543.2, |
| "kl_loss_7": 4182.4, |
| "learning_rate": 0.0005396249784283942, |
| "loss": 6051.0, |
| "step": 4800 |
| }, |
| { |
| "ce_loss_13": 3.1923361301422117, |
| "ce_loss_26": 2.71637277007103, |
| "ce_loss_39": 2.1978514790534973, |
| "ce_loss_52": 1.4327621147036553, |
| "ce_loss_7": 3.47632372379303, |
| "epoch": 0.481, |
| "grad_norm": 22.229636039543518, |
| "kl_loss_13": 3644.0, |
| "kl_loss_26": 2653.2, |
| "kl_loss_39": 1555.8, |
| "kl_loss_7": 4232.8, |
| "learning_rate": 0.0005380431090255476, |
| "loss": 6143.3, |
| "step": 4810 |
| }, |
| { |
| "ce_loss_13": 3.232038801908493, |
| "ce_loss_26": 2.7648274183273314, |
| "ce_loss_39": 2.2573492497205736, |
| "ce_loss_52": 1.4371329843997955, |
| "ce_loss_7": 3.5092617154121397, |
| "epoch": 0.482, |
| "grad_norm": 21.36587062891148, |
| "kl_loss_13": 3704.8, |
| "kl_loss_26": 2740.8, |
| "kl_loss_39": 1651.2, |
| "kl_loss_7": 4281.6, |
| "learning_rate": 0.0005364608565290155, |
| "loss": 6031.2, |
| "step": 4820 |
| }, |
| { |
| "ce_loss_13": 3.250445681810379, |
| "ce_loss_26": 2.773394727706909, |
| "ce_loss_39": 2.243200385570526, |
| "ce_loss_52": 1.4556994497776032, |
| "ce_loss_7": 3.536140114068985, |
| "epoch": 0.483, |
| "grad_norm": 20.760727607225178, |
| "kl_loss_13": 3706.4, |
| "kl_loss_26": 2711.2, |
| "kl_loss_39": 1595.8, |
| "kl_loss_7": 4300.0, |
| "learning_rate": 0.0005348782368720626, |
| "loss": 6094.3, |
| "step": 4830 |
| }, |
| { |
| "ce_loss_13": 3.2234844088554384, |
| "ce_loss_26": 2.7573211640119553, |
| "ce_loss_39": 2.244653856754303, |
| "ce_loss_52": 1.4427421689033508, |
| "ce_loss_7": 3.497196841239929, |
| "epoch": 0.484, |
| "grad_norm": 20.726340018616938, |
| "kl_loss_13": 3685.6, |
| "kl_loss_26": 2708.0, |
| "kl_loss_39": 1610.0, |
| "kl_loss_7": 4262.4, |
| "learning_rate": 0.000533295265991652, |
| "loss": 6062.9, |
| "step": 4840 |
| }, |
| { |
| "ce_loss_13": 3.152006584405899, |
| "ce_loss_26": 2.678810328245163, |
| "ce_loss_39": 2.166279435157776, |
| "ce_loss_52": 1.3957384467124938, |
| "ce_loss_7": 3.4304138660430907, |
| "epoch": 0.485, |
| "grad_norm": 21.522248137711077, |
| "kl_loss_13": 3630.8, |
| "kl_loss_26": 2639.0, |
| "kl_loss_39": 1547.4, |
| "kl_loss_7": 4218.0, |
| "learning_rate": 0.0005317119598282822, |
| "loss": 6033.9, |
| "step": 4850 |
| }, |
| { |
| "ce_loss_13": 3.2258131086826323, |
| "ce_loss_26": 2.7538253903388976, |
| "ce_loss_39": 2.237151172757149, |
| "ce_loss_52": 1.4640001267194749, |
| "ce_loss_7": 3.493991768360138, |
| "epoch": 0.486, |
| "grad_norm": 19.71152116189248, |
| "kl_loss_13": 3656.8, |
| "kl_loss_26": 2681.2, |
| "kl_loss_39": 1581.0, |
| "kl_loss_7": 4221.6, |
| "learning_rate": 0.0005301283343258293, |
| "loss": 6062.4, |
| "step": 4860 |
| }, |
| { |
| "ce_loss_13": 3.188614493608475, |
| "ce_loss_26": 2.7119751185178758, |
| "ce_loss_39": 2.189143994450569, |
| "ce_loss_52": 1.422459150850773, |
| "ce_loss_7": 3.468545514345169, |
| "epoch": 0.487, |
| "grad_norm": 20.57479406007355, |
| "kl_loss_13": 3638.4, |
| "kl_loss_26": 2642.2, |
| "kl_loss_39": 1539.5, |
| "kl_loss_7": 4222.0, |
| "learning_rate": 0.000528544405431384, |
| "loss": 6047.1, |
| "step": 4870 |
| }, |
| { |
| "ce_loss_13": 3.1630406379699707, |
| "ce_loss_26": 2.6917948126792908, |
| "ce_loss_39": 2.187080183625221, |
| "ce_loss_52": 1.430996198952198, |
| "ce_loss_7": 3.437908464670181, |
| "epoch": 0.488, |
| "grad_norm": 20.266256252328688, |
| "kl_loss_13": 3589.2, |
| "kl_loss_26": 2606.0, |
| "kl_loss_39": 1535.4, |
| "kl_loss_7": 4160.0, |
| "learning_rate": 0.000526960189095093, |
| "loss": 6056.9, |
| "step": 4880 |
| }, |
| { |
| "ce_loss_13": 3.1363641381263734, |
| "ce_loss_26": 2.6843234658241273, |
| "ce_loss_39": 2.1881404638290407, |
| "ce_loss_52": 1.4287778049707414, |
| "ce_loss_7": 3.403191590309143, |
| "epoch": 0.489, |
| "grad_norm": 20.737227217707265, |
| "kl_loss_13": 3534.8, |
| "kl_loss_26": 2597.2, |
| "kl_loss_39": 1535.4, |
| "kl_loss_7": 4094.0, |
| "learning_rate": 0.0005253757012699972, |
| "loss": 6013.8, |
| "step": 4890 |
| }, |
| { |
| "ce_loss_13": 3.2066462457180025, |
| "ce_loss_26": 2.7318670630455015, |
| "ce_loss_39": 2.2165933042764663, |
| "ce_loss_52": 1.4400058209896087, |
| "ce_loss_7": 3.4839820206165313, |
| "epoch": 0.49, |
| "grad_norm": 20.942033187869956, |
| "kl_loss_13": 3651.6, |
| "kl_loss_26": 2660.8, |
| "kl_loss_39": 1565.0, |
| "kl_loss_7": 4233.6, |
| "learning_rate": 0.0005237909579118712, |
| "loss": 5973.0, |
| "step": 4900 |
| }, |
| { |
| "ce_loss_13": 3.2107683062553405, |
| "ce_loss_26": 2.723215198516846, |
| "ce_loss_39": 2.2134319245815277, |
| "ce_loss_52": 1.4413373351097107, |
| "ce_loss_7": 3.498019593954086, |
| "epoch": 0.491, |
| "grad_norm": 19.852386563944762, |
| "kl_loss_13": 3647.2, |
| "kl_loss_26": 2634.4, |
| "kl_loss_39": 1545.0, |
| "kl_loss_7": 4246.4, |
| "learning_rate": 0.0005222059749790631, |
| "loss": 5997.8, |
| "step": 4910 |
| }, |
| { |
| "ce_loss_13": 3.2151939988136293, |
| "ce_loss_26": 2.7481437802314757, |
| "ce_loss_39": 2.2312227368354796, |
| "ce_loss_52": 1.4501317411661148, |
| "ce_loss_7": 3.4992719650268556, |
| "epoch": 0.492, |
| "grad_norm": 21.940050434667352, |
| "kl_loss_13": 3627.6, |
| "kl_loss_26": 2655.2, |
| "kl_loss_39": 1571.4, |
| "kl_loss_7": 4213.6, |
| "learning_rate": 0.0005206207684323337, |
| "loss": 5989.6, |
| "step": 4920 |
| }, |
| { |
| "ce_loss_13": 3.14618239402771, |
| "ce_loss_26": 2.6644466161727904, |
| "ce_loss_39": 2.1522305369377137, |
| "ce_loss_52": 1.4040746569633484, |
| "ce_loss_7": 3.430801051855087, |
| "epoch": 0.493, |
| "grad_norm": 21.866819048126402, |
| "kl_loss_13": 3609.6, |
| "kl_loss_26": 2614.8, |
| "kl_loss_39": 1523.4, |
| "kl_loss_7": 4205.6, |
| "learning_rate": 0.000519035354234695, |
| "loss": 5971.3, |
| "step": 4930 |
| }, |
| { |
| "ce_loss_13": 3.2634301006793978, |
| "ce_loss_26": 2.7843244314193725, |
| "ce_loss_39": 2.264421299099922, |
| "ce_loss_52": 1.4652716666460037, |
| "ce_loss_7": 3.5409990727901457, |
| "epoch": 0.494, |
| "grad_norm": 22.02758833423459, |
| "kl_loss_13": 3711.6, |
| "kl_loss_26": 2710.6, |
| "kl_loss_39": 1607.3, |
| "kl_loss_7": 4290.4, |
| "learning_rate": 0.0005174497483512506, |
| "loss": 6017.3, |
| "step": 4940 |
| }, |
| { |
| "ce_loss_13": 3.2103551268577575, |
| "ce_loss_26": 2.7440166890621187, |
| "ce_loss_39": 2.2352791130542755, |
| "ce_loss_52": 1.4523784220218658, |
| "ce_loss_7": 3.486135560274124, |
| "epoch": 0.495, |
| "grad_norm": 23.749732875931574, |
| "kl_loss_13": 3618.0, |
| "kl_loss_26": 2656.6, |
| "kl_loss_39": 1573.0, |
| "kl_loss_7": 4201.2, |
| "learning_rate": 0.0005158639667490339, |
| "loss": 5989.9, |
| "step": 4950 |
| }, |
| { |
| "ce_loss_13": 3.118060350418091, |
| "ce_loss_26": 2.6498723566532134, |
| "ce_loss_39": 2.1354818284511565, |
| "ce_loss_52": 1.3788613289594651, |
| "ce_loss_7": 3.403479200601578, |
| "epoch": 0.496, |
| "grad_norm": 20.524190680845354, |
| "kl_loss_13": 3599.2, |
| "kl_loss_26": 2620.4, |
| "kl_loss_39": 1541.0, |
| "kl_loss_7": 4189.6, |
| "learning_rate": 0.0005142780253968481, |
| "loss": 5973.2, |
| "step": 4960 |
| }, |
| { |
| "ce_loss_13": 3.1573639094829558, |
| "ce_loss_26": 2.6952777743339538, |
| "ce_loss_39": 2.184722366929054, |
| "ce_loss_52": 1.4360924899578094, |
| "ce_loss_7": 3.4324650526046754, |
| "epoch": 0.497, |
| "grad_norm": 21.848899371522094, |
| "kl_loss_13": 3594.0, |
| "kl_loss_26": 2620.8, |
| "kl_loss_39": 1532.4, |
| "kl_loss_7": 4163.6, |
| "learning_rate": 0.0005126919402651053, |
| "loss": 5950.9, |
| "step": 4970 |
| }, |
| { |
| "ce_loss_13": 3.148969703912735, |
| "ce_loss_26": 2.6792631447315216, |
| "ce_loss_39": 2.168180876970291, |
| "ce_loss_52": 1.4135416984558105, |
| "ce_loss_7": 3.4292350709438324, |
| "epoch": 0.498, |
| "grad_norm": 21.082558392676134, |
| "kl_loss_13": 3576.4, |
| "kl_loss_26": 2608.8, |
| "kl_loss_39": 1527.0, |
| "kl_loss_7": 4166.4, |
| "learning_rate": 0.0005111057273256647, |
| "loss": 5924.7, |
| "step": 4980 |
| }, |
| { |
| "ce_loss_13": 3.1738288044929504, |
| "ce_loss_26": 2.7059105813503264, |
| "ce_loss_39": 2.194283801317215, |
| "ce_loss_52": 1.4388620942831039, |
| "ce_loss_7": 3.461875486373901, |
| "epoch": 0.499, |
| "grad_norm": 21.062049454907296, |
| "kl_loss_13": 3574.8, |
| "kl_loss_26": 2594.8, |
| "kl_loss_39": 1521.0, |
| "kl_loss_7": 4165.2, |
| "learning_rate": 0.0005095194025516733, |
| "loss": 5935.8, |
| "step": 4990 |
| }, |
| { |
| "ce_loss_13": 3.2131927073001862, |
| "ce_loss_26": 2.7427126079797746, |
| "ce_loss_39": 2.2358015894889833, |
| "ce_loss_52": 1.4697326198220253, |
| "ce_loss_7": 3.48975727558136, |
| "epoch": 0.5, |
| "grad_norm": 19.951336775236356, |
| "kl_loss_13": 3620.8, |
| "kl_loss_26": 2633.2, |
| "kl_loss_39": 1549.0, |
| "kl_loss_7": 4195.6, |
| "learning_rate": 0.000507932981917404, |
| "loss": 5955.8, |
| "step": 5000 |
| }, |
| { |
| "ce_loss_13": 3.064238077402115, |
| "ce_loss_26": 2.605297487974167, |
| "ce_loss_39": 2.106147512793541, |
| "ce_loss_52": 1.3699454009532928, |
| "ce_loss_7": 3.3354556441307066, |
| "epoch": 0.501, |
| "grad_norm": 22.434294806872032, |
| "kl_loss_13": 3513.6, |
| "kl_loss_26": 2556.0, |
| "kl_loss_39": 1493.0, |
| "kl_loss_7": 4079.2, |
| "learning_rate": 0.0005063464813980949, |
| "loss": 5921.7, |
| "step": 5010 |
| }, |
| { |
| "ce_loss_13": 3.120131802558899, |
| "ce_loss_26": 2.6457916140556335, |
| "ce_loss_39": 2.136381095647812, |
| "ce_loss_52": 1.3973354250192642, |
| "ce_loss_7": 3.3927165508270263, |
| "epoch": 0.502, |
| "grad_norm": 20.534145152408744, |
| "kl_loss_13": 3558.4, |
| "kl_loss_26": 2578.4, |
| "kl_loss_39": 1497.8, |
| "kl_loss_7": 4132.8, |
| "learning_rate": 0.0005047599169697884, |
| "loss": 5945.8, |
| "step": 5020 |
| }, |
| { |
| "ce_loss_13": 3.155858016014099, |
| "ce_loss_26": 2.683425110578537, |
| "ce_loss_39": 2.1650480359792708, |
| "ce_loss_52": 1.426735344529152, |
| "ce_loss_7": 3.4369399666786196, |
| "epoch": 0.503, |
| "grad_norm": 20.583275474881205, |
| "kl_loss_13": 3593.2, |
| "kl_loss_26": 2606.4, |
| "kl_loss_39": 1509.8, |
| "kl_loss_7": 4176.0, |
| "learning_rate": 0.000503173304609171, |
| "loss": 5949.4, |
| "step": 5030 |
| }, |
| { |
| "ce_loss_13": 3.224624240398407, |
| "ce_loss_26": 2.742176574468613, |
| "ce_loss_39": 2.2156084358692167, |
| "ce_loss_52": 1.4474807173013686, |
| "ce_loss_7": 3.508391612768173, |
| "epoch": 0.504, |
| "grad_norm": 20.860727938912092, |
| "kl_loss_13": 3678.4, |
| "kl_loss_26": 2680.8, |
| "kl_loss_39": 1568.2, |
| "kl_loss_7": 4265.2, |
| "learning_rate": 0.0005015866602934111, |
| "loss": 5957.4, |
| "step": 5040 |
| }, |
| { |
| "ce_loss_13": 3.1227844834327696, |
| "ce_loss_26": 2.661714029312134, |
| "ce_loss_39": 2.1653817743062973, |
| "ce_loss_52": 1.4367017298936844, |
| "ce_loss_7": 3.3902225315570833, |
| "epoch": 0.505, |
| "grad_norm": 19.797756106985307, |
| "kl_loss_13": 3490.8, |
| "kl_loss_26": 2530.0, |
| "kl_loss_39": 1472.6, |
| "kl_loss_7": 4055.6, |
| "learning_rate": 0.0005, |
| "loss": 5927.3, |
| "step": 5050 |
| }, |
| { |
| "ce_loss_13": 3.177449029684067, |
| "ce_loss_26": 2.716005155444145, |
| "ce_loss_39": 2.205168914794922, |
| "ce_loss_52": 1.442250807583332, |
| "ce_loss_7": 3.453594130277634, |
| "epoch": 0.506, |
| "grad_norm": 20.433104016560257, |
| "kl_loss_13": 3588.0, |
| "kl_loss_26": 2622.2, |
| "kl_loss_39": 1546.2, |
| "kl_loss_7": 4161.6, |
| "learning_rate": 0.0004984133397065889, |
| "loss": 5913.9, |
| "step": 5060 |
| }, |
| { |
| "ce_loss_13": 3.1423951983451843, |
| "ce_loss_26": 2.674048882722855, |
| "ce_loss_39": 2.1621575862169267, |
| "ce_loss_52": 1.4322402387857438, |
| "ce_loss_7": 3.416734743118286, |
| "epoch": 0.507, |
| "grad_norm": 20.444836977919294, |
| "kl_loss_13": 3545.2, |
| "kl_loss_26": 2564.8, |
| "kl_loss_39": 1486.0, |
| "kl_loss_7": 4119.6, |
| "learning_rate": 0.0004968266953908291, |
| "loss": 5880.6, |
| "step": 5070 |
| }, |
| { |
| "ce_loss_13": 3.070253336429596, |
| "ce_loss_26": 2.6069840848445893, |
| "ce_loss_39": 2.1048853427171705, |
| "ce_loss_52": 1.3886964708566665, |
| "ce_loss_7": 3.350748908519745, |
| "epoch": 0.508, |
| "grad_norm": 21.18309883625556, |
| "kl_loss_13": 3487.2, |
| "kl_loss_26": 2520.8, |
| "kl_loss_39": 1460.8, |
| "kl_loss_7": 4069.6, |
| "learning_rate": 0.0004952400830302117, |
| "loss": 5885.3, |
| "step": 5080 |
| }, |
| { |
| "ce_loss_13": 3.077636110782623, |
| "ce_loss_26": 2.6177482545375823, |
| "ce_loss_39": 2.1197337061166763, |
| "ce_loss_52": 1.3917517423629762, |
| "ce_loss_7": 3.3575133979320526, |
| "epoch": 0.509, |
| "grad_norm": 19.77626859818484, |
| "kl_loss_13": 3496.0, |
| "kl_loss_26": 2536.8, |
| "kl_loss_39": 1477.0, |
| "kl_loss_7": 4073.6, |
| "learning_rate": 0.0004936535186019053, |
| "loss": 5872.1, |
| "step": 5090 |
| }, |
| { |
| "ce_loss_13": 3.178175300359726, |
| "ce_loss_26": 2.701016789674759, |
| "ce_loss_39": 2.1893070548772813, |
| "ce_loss_52": 1.4174780696630478, |
| "ce_loss_7": 3.4626995623111725, |
| "epoch": 0.51, |
| "grad_norm": 19.62760678285289, |
| "kl_loss_13": 3640.4, |
| "kl_loss_26": 2642.0, |
| "kl_loss_39": 1551.2, |
| "kl_loss_7": 4232.4, |
| "learning_rate": 0.000492067018082596, |
| "loss": 5937.7, |
| "step": 5100 |
| }, |
| { |
| "ce_loss_13": 3.169191563129425, |
| "ce_loss_26": 2.708436530828476, |
| "ce_loss_39": 2.1886946499347686, |
| "ce_loss_52": 1.4288851469755173, |
| "ce_loss_7": 3.4514395534992217, |
| "epoch": 0.511, |
| "grad_norm": 20.552371425986603, |
| "kl_loss_13": 3588.0, |
| "kl_loss_26": 2634.8, |
| "kl_loss_39": 1546.0, |
| "kl_loss_7": 4178.4, |
| "learning_rate": 0.0004904805974483267, |
| "loss": 5867.4, |
| "step": 5110 |
| }, |
| { |
| "ce_loss_13": 3.152217388153076, |
| "ce_loss_26": 2.6922530949115755, |
| "ce_loss_39": 2.181437623500824, |
| "ce_loss_52": 1.4460914835333825, |
| "ce_loss_7": 3.429844158887863, |
| "epoch": 0.512, |
| "grad_norm": 20.36189036420726, |
| "kl_loss_13": 3504.0, |
| "kl_loss_26": 2543.2, |
| "kl_loss_39": 1481.0, |
| "kl_loss_7": 4075.2, |
| "learning_rate": 0.0004888942726743353, |
| "loss": 5848.7, |
| "step": 5120 |
| }, |
| { |
| "ce_loss_13": 3.1243839859962463, |
| "ce_loss_26": 2.6554999887943267, |
| "ce_loss_39": 2.1515370845794677, |
| "ce_loss_52": 1.4089675784111022, |
| "ce_loss_7": 3.4115704774856566, |
| "epoch": 0.513, |
| "grad_norm": 20.218379503515084, |
| "kl_loss_13": 3534.0, |
| "kl_loss_26": 2557.4, |
| "kl_loss_39": 1487.0, |
| "kl_loss_7": 4121.6, |
| "learning_rate": 0.0004873080597348947, |
| "loss": 5856.5, |
| "step": 5130 |
| }, |
| { |
| "ce_loss_13": 3.223481798171997, |
| "ce_loss_26": 2.754591333866119, |
| "ce_loss_39": 2.236158034205437, |
| "ce_loss_52": 1.4544740557670592, |
| "ce_loss_7": 3.5026029109954835, |
| "epoch": 0.514, |
| "grad_norm": 20.810099829480396, |
| "kl_loss_13": 3656.8, |
| "kl_loss_26": 2670.8, |
| "kl_loss_39": 1575.8, |
| "kl_loss_7": 4237.2, |
| "learning_rate": 0.0004857219746031519, |
| "loss": 5882.8, |
| "step": 5140 |
| }, |
| { |
| "ce_loss_13": 3.146134835481644, |
| "ce_loss_26": 2.680527698993683, |
| "ce_loss_39": 2.168422257900238, |
| "ce_loss_52": 1.4367385059595108, |
| "ce_loss_7": 3.423633599281311, |
| "epoch": 0.515, |
| "grad_norm": 20.424421047481275, |
| "kl_loss_13": 3516.8, |
| "kl_loss_26": 2539.6, |
| "kl_loss_39": 1472.0, |
| "kl_loss_7": 4094.8, |
| "learning_rate": 0.0004841360332509663, |
| "loss": 5895.45, |
| "step": 5150 |
| }, |
| { |
| "ce_loss_13": 3.1724873065948485, |
| "ce_loss_26": 2.7010417520999908, |
| "ce_loss_39": 2.188371130824089, |
| "ce_loss_52": 1.4464277178049088, |
| "ce_loss_7": 3.4471147775650026, |
| "epoch": 0.516, |
| "grad_norm": 20.221560032680266, |
| "kl_loss_13": 3555.2, |
| "kl_loss_26": 2575.2, |
| "kl_loss_39": 1495.4, |
| "kl_loss_7": 4124.8, |
| "learning_rate": 0.0004825502516487497, |
| "loss": 5883.8, |
| "step": 5160 |
| }, |
| { |
| "ce_loss_13": 3.1568028390407563, |
| "ce_loss_26": 2.695826065540314, |
| "ce_loss_39": 2.1907377928495406, |
| "ce_loss_52": 1.467595374584198, |
| "ce_loss_7": 3.4282791554927825, |
| "epoch": 0.517, |
| "grad_norm": 21.375776905692355, |
| "kl_loss_13": 3490.8, |
| "kl_loss_26": 2528.8, |
| "kl_loss_39": 1463.6, |
| "kl_loss_7": 4049.2, |
| "learning_rate": 0.00048096464576530507, |
| "loss": 5813.7, |
| "step": 5170 |
| }, |
| { |
| "ce_loss_13": 3.0959118604660034, |
| "ce_loss_26": 2.6389028072357177, |
| "ce_loss_39": 2.1422775775194167, |
| "ce_loss_52": 1.4158973768353462, |
| "ce_loss_7": 3.3716647744178774, |
| "epoch": 0.518, |
| "grad_norm": 20.796367284367612, |
| "kl_loss_13": 3490.0, |
| "kl_loss_26": 2527.0, |
| "kl_loss_39": 1467.8, |
| "kl_loss_7": 4068.4, |
| "learning_rate": 0.00047937923156766646, |
| "loss": 5832.2, |
| "step": 5180 |
| }, |
| { |
| "ce_loss_13": 3.181716579198837, |
| "ce_loss_26": 2.7091287076473236, |
| "ce_loss_39": 2.205903950333595, |
| "ce_loss_52": 1.4591009467840195, |
| "ce_loss_7": 3.4553593516349794, |
| "epoch": 0.519, |
| "grad_norm": 21.655670358447043, |
| "kl_loss_13": 3570.4, |
| "kl_loss_26": 2594.8, |
| "kl_loss_39": 1522.8, |
| "kl_loss_7": 4142.4, |
| "learning_rate": 0.00047779402502093696, |
| "loss": 5844.1, |
| "step": 5190 |
| }, |
| { |
| "ce_loss_13": 3.114813321828842, |
| "ce_loss_26": 2.6511843532323836, |
| "ce_loss_39": 2.1350393682718276, |
| "ce_loss_52": 1.4030846193432809, |
| "ce_loss_7": 3.3881891489028932, |
| "epoch": 0.52, |
| "grad_norm": 21.261317204954832, |
| "kl_loss_13": 3537.2, |
| "kl_loss_26": 2569.4, |
| "kl_loss_39": 1490.2, |
| "kl_loss_7": 4112.8, |
| "learning_rate": 0.0004762090420881289, |
| "loss": 5904.5, |
| "step": 5200 |
| }, |
| { |
| "ce_loss_13": 3.178787976503372, |
| "ce_loss_26": 2.716679725050926, |
| "ce_loss_39": 2.214344197511673, |
| "ce_loss_52": 1.4766788110136986, |
| "ce_loss_7": 3.445727747678757, |
| "epoch": 0.521, |
| "grad_norm": 21.5237930218517, |
| "kl_loss_13": 3533.2, |
| "kl_loss_26": 2579.6, |
| "kl_loss_39": 1511.4, |
| "kl_loss_7": 4104.0, |
| "learning_rate": 0.00047462429873000296, |
| "loss": 5807.7, |
| "step": 5210 |
| }, |
| { |
| "ce_loss_13": 3.1838007628917695, |
| "ce_loss_26": 2.7048393905162813, |
| "ce_loss_39": 2.19879055917263, |
| "ce_loss_52": 1.4492772698402405, |
| "ce_loss_7": 3.460488295555115, |
| "epoch": 0.522, |
| "grad_norm": 23.509628530732027, |
| "kl_loss_13": 3558.8, |
| "kl_loss_26": 2571.2, |
| "kl_loss_39": 1503.6, |
| "kl_loss_7": 4141.6, |
| "learning_rate": 0.0004730398109049071, |
| "loss": 5850.6, |
| "step": 5220 |
| }, |
| { |
| "ce_loss_13": 3.1778542578220366, |
| "ce_loss_26": 2.723250871896744, |
| "ce_loss_39": 2.2311396062374116, |
| "ce_loss_52": 1.4870843648910523, |
| "ce_loss_7": 3.4500561714172364, |
| "epoch": 0.523, |
| "grad_norm": 20.540010606623795, |
| "kl_loss_13": 3511.6, |
| "kl_loss_26": 2561.4, |
| "kl_loss_39": 1495.8, |
| "kl_loss_7": 4082.4, |
| "learning_rate": 0.000471455594568616, |
| "loss": 5864.9, |
| "step": 5230 |
| }, |
| { |
| "ce_loss_13": 3.184338331222534, |
| "ce_loss_26": 2.719081574678421, |
| "ce_loss_39": 2.1989813148975372, |
| "ce_loss_52": 1.4533298462629318, |
| "ce_loss_7": 3.465100187063217, |
| "epoch": 0.524, |
| "grad_norm": 19.66913851254436, |
| "kl_loss_13": 3588.4, |
| "kl_loss_26": 2616.0, |
| "kl_loss_39": 1518.0, |
| "kl_loss_7": 4170.0, |
| "learning_rate": 0.00046987166567417086, |
| "loss": 5881.2, |
| "step": 5240 |
| }, |
| { |
| "ce_loss_13": 3.097227877378464, |
| "ce_loss_26": 2.6349131643772123, |
| "ce_loss_39": 2.1339926183223725, |
| "ce_loss_52": 1.3901747956871986, |
| "ce_loss_7": 3.3714997708797454, |
| "epoch": 0.525, |
| "grad_norm": 21.094189724694242, |
| "kl_loss_13": 3503.6, |
| "kl_loss_26": 2546.6, |
| "kl_loss_39": 1491.4, |
| "kl_loss_7": 4079.2, |
| "learning_rate": 0.00046828804017171776, |
| "loss": 5869.8, |
| "step": 5250 |
| }, |
| { |
| "ce_loss_13": 3.1320539236068727, |
| "ce_loss_26": 2.667567166686058, |
| "ce_loss_39": 2.17643720805645, |
| "ce_loss_52": 1.450567391514778, |
| "ce_loss_7": 3.4080025017261506, |
| "epoch": 0.526, |
| "grad_norm": 20.61619717149242, |
| "kl_loss_13": 3507.6, |
| "kl_loss_26": 2536.4, |
| "kl_loss_39": 1483.6, |
| "kl_loss_7": 4082.4, |
| "learning_rate": 0.00046670473400834805, |
| "loss": 5811.0, |
| "step": 5260 |
| }, |
| { |
| "ce_loss_13": 3.123244607448578, |
| "ce_loss_26": 2.6622200667858125, |
| "ce_loss_39": 2.1446547359228134, |
| "ce_loss_52": 1.4058131739497184, |
| "ce_loss_7": 3.393282580375671, |
| "epoch": 0.527, |
| "grad_norm": 20.13067058218258, |
| "kl_loss_13": 3536.4, |
| "kl_loss_26": 2574.0, |
| "kl_loss_39": 1499.8, |
| "kl_loss_7": 4103.6, |
| "learning_rate": 0.00046512176312793734, |
| "loss": 5799.9, |
| "step": 5270 |
| }, |
| { |
| "ce_loss_13": 3.0799066185951234, |
| "ce_loss_26": 2.620718148350716, |
| "ce_loss_39": 2.1284131199121474, |
| "ce_loss_52": 1.4004437893629074, |
| "ce_loss_7": 3.357691395282745, |
| "epoch": 0.528, |
| "grad_norm": 20.729785702601426, |
| "kl_loss_13": 3458.8, |
| "kl_loss_26": 2500.6, |
| "kl_loss_39": 1456.6, |
| "kl_loss_7": 4039.2, |
| "learning_rate": 0.00046353914347098467, |
| "loss": 5784.9, |
| "step": 5280 |
| }, |
| { |
| "ce_loss_13": 3.100508135557175, |
| "ce_loss_26": 2.645443448424339, |
| "ce_loss_39": 2.140786075592041, |
| "ce_loss_52": 1.4068747192621232, |
| "ce_loss_7": 3.3814845025539397, |
| "epoch": 0.529, |
| "grad_norm": 20.87154357218591, |
| "kl_loss_13": 3506.0, |
| "kl_loss_26": 2553.6, |
| "kl_loss_39": 1489.5, |
| "kl_loss_7": 4091.2, |
| "learning_rate": 0.0004619568909744524, |
| "loss": 5772.2, |
| "step": 5290 |
| }, |
| { |
| "ce_loss_13": 3.127873086929321, |
| "ce_loss_26": 2.6634323090314864, |
| "ce_loss_39": 2.157263731956482, |
| "ce_loss_52": 1.4283392548561096, |
| "ce_loss_7": 3.3975966036319734, |
| "epoch": 0.53, |
| "grad_norm": 20.20322543048614, |
| "kl_loss_13": 3490.8, |
| "kl_loss_26": 2524.4, |
| "kl_loss_39": 1474.6, |
| "kl_loss_7": 4055.2, |
| "learning_rate": 0.00046037502157160573, |
| "loss": 5795.9, |
| "step": 5300 |
| }, |
| { |
| "ce_loss_13": 3.076627087593079, |
| "ce_loss_26": 2.616869166493416, |
| "ce_loss_39": 2.117774197459221, |
| "ce_loss_52": 1.4120985105633737, |
| "ce_loss_7": 3.3510021567344666, |
| "epoch": 0.531, |
| "grad_norm": 20.811499078870163, |
| "kl_loss_13": 3444.0, |
| "kl_loss_26": 2482.4, |
| "kl_loss_39": 1422.8, |
| "kl_loss_7": 4016.0, |
| "learning_rate": 0.00045879355119185207, |
| "loss": 5749.7, |
| "step": 5310 |
| }, |
| { |
| "ce_loss_13": 3.1226239800453186, |
| "ce_loss_26": 2.662443572282791, |
| "ce_loss_39": 2.156531369686127, |
| "ce_loss_52": 1.4430431425571442, |
| "ce_loss_7": 3.396123135089874, |
| "epoch": 0.532, |
| "grad_norm": 18.59996382548897, |
| "kl_loss_13": 3496.0, |
| "kl_loss_26": 2528.8, |
| "kl_loss_39": 1445.2, |
| "kl_loss_7": 4069.2, |
| "learning_rate": 0.0004572124957605803, |
| "loss": 5776.5, |
| "step": 5320 |
| }, |
| { |
| "ce_loss_13": 3.1100788176059724, |
| "ce_loss_26": 2.647669917345047, |
| "ce_loss_39": 2.140716627240181, |
| "ce_loss_52": 1.4189704924821853, |
| "ce_loss_7": 3.387048715353012, |
| "epoch": 0.533, |
| "grad_norm": 20.905976368739214, |
| "kl_loss_13": 3497.6, |
| "kl_loss_26": 2533.0, |
| "kl_loss_39": 1466.2, |
| "kl_loss_7": 4079.6, |
| "learning_rate": 0.00045563187119900103, |
| "loss": 5752.6, |
| "step": 5330 |
| }, |
| { |
| "ce_loss_13": 3.06461244225502, |
| "ce_loss_26": 2.6009975552558897, |
| "ce_loss_39": 2.1015468716621397, |
| "ce_loss_52": 1.3909726276993752, |
| "ce_loss_7": 3.3485201001167297, |
| "epoch": 0.534, |
| "grad_norm": 19.831333266576646, |
| "kl_loss_13": 3462.4, |
| "kl_loss_26": 2487.0, |
| "kl_loss_39": 1427.1, |
| "kl_loss_7": 4053.2, |
| "learning_rate": 0.00045405169342398633, |
| "loss": 5804.75, |
| "step": 5340 |
| }, |
| { |
| "ce_loss_13": 3.1465537667274477, |
| "ce_loss_26": 2.6862030625343323, |
| "ce_loss_39": 2.1878136694431305, |
| "ce_loss_52": 1.4572405338287353, |
| "ce_loss_7": 3.4159956216812133, |
| "epoch": 0.535, |
| "grad_norm": 20.130648070413336, |
| "kl_loss_13": 3486.8, |
| "kl_loss_26": 2529.2, |
| "kl_loss_39": 1463.2, |
| "kl_loss_7": 4050.4, |
| "learning_rate": 0.0004524719783479088, |
| "loss": 5785.8, |
| "step": 5350 |
| }, |
| { |
| "ce_loss_13": 3.1608322679996492, |
| "ce_loss_26": 2.6899396955966948, |
| "ce_loss_39": 2.192571198940277, |
| "ce_loss_52": 1.466444182395935, |
| "ce_loss_7": 3.4340961396694185, |
| "epoch": 0.536, |
| "grad_norm": 21.175578440541553, |
| "kl_loss_13": 3511.2, |
| "kl_loss_26": 2537.2, |
| "kl_loss_39": 1482.0, |
| "kl_loss_7": 4079.6, |
| "learning_rate": 0.00045089274187848144, |
| "loss": 5831.5, |
| "step": 5360 |
| }, |
| { |
| "ce_loss_13": 3.122362142801285, |
| "ce_loss_26": 2.6539461642503737, |
| "ce_loss_39": 2.1548154592514037, |
| "ce_loss_52": 1.4296748742461205, |
| "ce_loss_7": 3.403215527534485, |
| "epoch": 0.537, |
| "grad_norm": 21.426385415702033, |
| "kl_loss_13": 3491.6, |
| "kl_loss_26": 2524.6, |
| "kl_loss_39": 1464.2, |
| "kl_loss_7": 4068.8, |
| "learning_rate": 0.00044931399991859835, |
| "loss": 5775.7, |
| "step": 5370 |
| }, |
| { |
| "ce_loss_13": 3.0924407064914705, |
| "ce_loss_26": 2.6285818815231323, |
| "ce_loss_39": 2.12394041121006, |
| "ce_loss_52": 1.4268731981515885, |
| "ce_loss_7": 3.365303188562393, |
| "epoch": 0.538, |
| "grad_norm": 21.06287805296543, |
| "kl_loss_13": 3469.6, |
| "kl_loss_26": 2495.8, |
| "kl_loss_39": 1421.6, |
| "kl_loss_7": 4039.2, |
| "learning_rate": 0.00044773576836617336, |
| "loss": 5748.1, |
| "step": 5380 |
| }, |
| { |
| "ce_loss_13": 3.0936496675014498, |
| "ce_loss_26": 2.6234502464532854, |
| "ce_loss_39": 2.112909361720085, |
| "ce_loss_52": 1.3892342567443847, |
| "ce_loss_7": 3.3770604133605957, |
| "epoch": 0.539, |
| "grad_norm": 20.257845085428166, |
| "kl_loss_13": 3512.0, |
| "kl_loss_26": 2533.6, |
| "kl_loss_39": 1454.2, |
| "kl_loss_7": 4101.2, |
| "learning_rate": 0.00044615806311398056, |
| "loss": 5750.7, |
| "step": 5390 |
| }, |
| { |
| "ce_loss_13": 3.1293481528759, |
| "ce_loss_26": 2.661091110110283, |
| "ce_loss_39": 2.1563747018575667, |
| "ce_loss_52": 1.4326880395412445, |
| "ce_loss_7": 3.4015913248062133, |
| "epoch": 0.54, |
| "grad_norm": 19.884995716311128, |
| "kl_loss_13": 3510.0, |
| "kl_loss_26": 2528.6, |
| "kl_loss_39": 1461.8, |
| "kl_loss_7": 4087.2, |
| "learning_rate": 0.00044458090004949454, |
| "loss": 5775.2, |
| "step": 5400 |
| }, |
| { |
| "ce_loss_13": 3.1498380303382874, |
| "ce_loss_26": 2.6860203623771666, |
| "ce_loss_39": 2.184649482369423, |
| "ce_loss_52": 1.4619063645601273, |
| "ce_loss_7": 3.425897455215454, |
| "epoch": 0.541, |
| "grad_norm": 21.787073133904556, |
| "kl_loss_13": 3476.0, |
| "kl_loss_26": 2516.4, |
| "kl_loss_39": 1456.6, |
| "kl_loss_7": 4048.0, |
| "learning_rate": 0.0004430042950547297, |
| "loss": 5755.9, |
| "step": 5410 |
| }, |
| { |
| "ce_loss_13": 3.1863462030887604, |
| "ce_loss_26": 2.726431465148926, |
| "ce_loss_39": 2.2207835763692856, |
| "ce_loss_52": 1.5021536648273468, |
| "ce_loss_7": 3.454101949930191, |
| "epoch": 0.542, |
| "grad_norm": 20.7568922984159, |
| "kl_loss_13": 3510.0, |
| "kl_loss_26": 2532.8, |
| "kl_loss_39": 1455.8, |
| "kl_loss_7": 4068.8, |
| "learning_rate": 0.0004414282640060809, |
| "loss": 5749.1, |
| "step": 5420 |
| }, |
| { |
| "ce_loss_13": 3.1338580727577208, |
| "ce_loss_26": 2.6763171195983886, |
| "ce_loss_39": 2.181287834048271, |
| "ce_loss_52": 1.4664452508091927, |
| "ce_loss_7": 3.4055596351623536, |
| "epoch": 0.543, |
| "grad_norm": 19.620114685157933, |
| "kl_loss_13": 3421.6, |
| "kl_loss_26": 2476.8, |
| "kl_loss_39": 1437.8, |
| "kl_loss_7": 3988.8, |
| "learning_rate": 0.0004398528227741633, |
| "loss": 5704.8, |
| "step": 5430 |
| }, |
| { |
| "ce_loss_13": 3.104156017303467, |
| "ce_loss_26": 2.639963132143021, |
| "ce_loss_39": 2.127584692835808, |
| "ce_loss_52": 1.4150341883301736, |
| "ce_loss_7": 3.390649217367172, |
| "epoch": 0.544, |
| "grad_norm": 20.487750099372004, |
| "kl_loss_13": 3479.6, |
| "kl_loss_26": 2511.2, |
| "kl_loss_39": 1437.2, |
| "kl_loss_7": 4085.6, |
| "learning_rate": 0.00043827798722365264, |
| "loss": 5688.3, |
| "step": 5440 |
| }, |
| { |
| "ce_loss_13": 3.0290561497211455, |
| "ce_loss_26": 2.5664966076612474, |
| "ce_loss_39": 2.065689593553543, |
| "ce_loss_52": 1.3703163504600524, |
| "ce_loss_7": 3.3105548918247223, |
| "epoch": 0.545, |
| "grad_norm": 20.36704365885761, |
| "kl_loss_13": 3438.0, |
| "kl_loss_26": 2471.0, |
| "kl_loss_39": 1405.0, |
| "kl_loss_7": 4010.4, |
| "learning_rate": 0.00043670377321312535, |
| "loss": 5715.9, |
| "step": 5450 |
| }, |
| { |
| "ce_loss_13": 3.1363044023513793, |
| "ce_loss_26": 2.674453055858612, |
| "ce_loss_39": 2.169647827744484, |
| "ce_loss_52": 1.440093258023262, |
| "ce_loss_7": 3.409278839826584, |
| "epoch": 0.546, |
| "grad_norm": 20.32480480357714, |
| "kl_loss_13": 3496.8, |
| "kl_loss_26": 2524.8, |
| "kl_loss_39": 1466.2, |
| "kl_loss_7": 4078.0, |
| "learning_rate": 0.0004351301965948991, |
| "loss": 5722.2, |
| "step": 5460 |
| }, |
| { |
| "ce_loss_13": 3.152593141794205, |
| "ce_loss_26": 2.6999820828437806, |
| "ce_loss_39": 2.1983327239751818, |
| "ce_loss_52": 1.4628954619169234, |
| "ce_loss_7": 3.4299716293811797, |
| "epoch": 0.547, |
| "grad_norm": 19.856114793460193, |
| "kl_loss_13": 3499.2, |
| "kl_loss_26": 2548.2, |
| "kl_loss_39": 1495.2, |
| "kl_loss_7": 4066.8, |
| "learning_rate": 0.000433557273214873, |
| "loss": 5741.7, |
| "step": 5470 |
| }, |
| { |
| "ce_loss_13": 3.064018839597702, |
| "ce_loss_26": 2.6090955317020414, |
| "ce_loss_39": 2.1058148056268693, |
| "ce_loss_52": 1.4195792496204376, |
| "ce_loss_7": 3.344401216506958, |
| "epoch": 0.548, |
| "grad_norm": 20.616048950091404, |
| "kl_loss_13": 3411.2, |
| "kl_loss_26": 2459.2, |
| "kl_loss_39": 1389.6, |
| "kl_loss_7": 3992.4, |
| "learning_rate": 0.000431985018912368, |
| "loss": 5718.6, |
| "step": 5480 |
| }, |
| { |
| "ce_loss_13": 3.0607047379016876, |
| "ce_loss_26": 2.597879120707512, |
| "ce_loss_39": 2.108409595489502, |
| "ce_loss_52": 1.4123182266950607, |
| "ce_loss_7": 3.336356836557388, |
| "epoch": 0.549, |
| "grad_norm": 20.036021472042393, |
| "kl_loss_13": 3404.4, |
| "kl_loss_26": 2447.2, |
| "kl_loss_39": 1413.8, |
| "kl_loss_7": 3983.2, |
| "learning_rate": 0.0004304134495199674, |
| "loss": 5692.5, |
| "step": 5490 |
| }, |
| { |
| "ce_loss_13": 3.052510768175125, |
| "ce_loss_26": 2.6108580827713013, |
| "ce_loss_39": 2.1228879362344744, |
| "ce_loss_52": 1.4244433492422104, |
| "ce_loss_7": 3.3247893154621124, |
| "epoch": 0.55, |
| "grad_norm": 20.158536855725483, |
| "kl_loss_13": 3391.6, |
| "kl_loss_26": 2469.0, |
| "kl_loss_39": 1430.9, |
| "kl_loss_7": 3959.6, |
| "learning_rate": 0.0004288425808633575, |
| "loss": 5660.8, |
| "step": 5500 |
| }, |
| { |
| "ce_loss_13": 3.164420074224472, |
| "ce_loss_26": 2.6986677646636963, |
| "ce_loss_39": 2.192535865306854, |
| "ce_loss_52": 1.4702347338199615, |
| "ce_loss_7": 3.4305537164211275, |
| "epoch": 0.551, |
| "grad_norm": 21.34549178191646, |
| "kl_loss_13": 3508.0, |
| "kl_loss_26": 2541.6, |
| "kl_loss_39": 1463.2, |
| "kl_loss_7": 4075.6, |
| "learning_rate": 0.0004272724287611684, |
| "loss": 5697.3, |
| "step": 5510 |
| }, |
| { |
| "ce_loss_13": 3.1066312968730925, |
| "ce_loss_26": 2.6526737749576568, |
| "ce_loss_39": 2.1508010149002077, |
| "ce_loss_52": 1.4371357694268228, |
| "ce_loss_7": 3.381526863574982, |
| "epoch": 0.552, |
| "grad_norm": 20.017257048745616, |
| "kl_loss_13": 3449.6, |
| "kl_loss_26": 2497.2, |
| "kl_loss_39": 1440.4, |
| "kl_loss_7": 4027.2, |
| "learning_rate": 0.00042570300902481425, |
| "loss": 5661.1, |
| "step": 5520 |
| }, |
| { |
| "ce_loss_13": 3.0768387794494627, |
| "ce_loss_26": 2.6105665415525436, |
| "ce_loss_39": 2.1041111290454864, |
| "ce_loss_52": 1.3769602328538895, |
| "ce_loss_7": 3.3504232287406923, |
| "epoch": 0.553, |
| "grad_norm": 20.71287480099847, |
| "kl_loss_13": 3497.6, |
| "kl_loss_26": 2529.0, |
| "kl_loss_39": 1464.6, |
| "kl_loss_7": 4067.6, |
| "learning_rate": 0.00042413433745833423, |
| "loss": 5675.2, |
| "step": 5530 |
| }, |
| { |
| "ce_loss_13": 3.078362447023392, |
| "ce_loss_26": 2.6186123132705688, |
| "ce_loss_39": 2.1204461604356766, |
| "ce_loss_52": 1.42312493622303, |
| "ce_loss_7": 3.3565984547138212, |
| "epoch": 0.554, |
| "grad_norm": 20.243506516231662, |
| "kl_loss_13": 3424.0, |
| "kl_loss_26": 2463.4, |
| "kl_loss_39": 1411.4, |
| "kl_loss_7": 4003.6, |
| "learning_rate": 0.0004225664298582339, |
| "loss": 5650.4, |
| "step": 5540 |
| }, |
| { |
| "ce_loss_13": 3.104430967569351, |
| "ce_loss_26": 2.6466626435518266, |
| "ce_loss_39": 2.149568349123001, |
| "ce_loss_52": 1.4317003712058067, |
| "ce_loss_7": 3.380819743871689, |
| "epoch": 0.555, |
| "grad_norm": 20.086456724534386, |
| "kl_loss_13": 3478.8, |
| "kl_loss_26": 2528.0, |
| "kl_loss_39": 1462.6, |
| "kl_loss_7": 4051.6, |
| "learning_rate": 0.000420999302013325, |
| "loss": 5686.3, |
| "step": 5550 |
| }, |
| { |
| "ce_loss_13": 3.09715017080307, |
| "ce_loss_26": 2.6249477684497835, |
| "ce_loss_39": 2.112127733230591, |
| "ce_loss_52": 1.3998657062649726, |
| "ce_loss_7": 3.3848826706409456, |
| "epoch": 0.556, |
| "grad_norm": 19.84836283457165, |
| "kl_loss_13": 3500.0, |
| "kl_loss_26": 2520.0, |
| "kl_loss_39": 1432.4, |
| "kl_loss_7": 4098.4, |
| "learning_rate": 0.000419432969704568, |
| "loss": 5721.05, |
| "step": 5560 |
| }, |
| { |
| "ce_loss_13": 3.1611645042896273, |
| "ce_loss_26": 2.7037321001291277, |
| "ce_loss_39": 2.2008607923984527, |
| "ce_loss_52": 1.4731642618775367, |
| "ce_loss_7": 3.4352382242679598, |
| "epoch": 0.557, |
| "grad_norm": 19.486736620492533, |
| "kl_loss_13": 3494.8, |
| "kl_loss_26": 2542.2, |
| "kl_loss_39": 1471.9, |
| "kl_loss_7": 4071.6, |
| "learning_rate": 0.00041786744870491154, |
| "loss": 5664.5, |
| "step": 5570 |
| }, |
| { |
| "ce_loss_13": 3.1723786175251005, |
| "ce_loss_26": 2.712140661478043, |
| "ce_loss_39": 2.2021081149578094, |
| "ce_loss_52": 1.4713574051856995, |
| "ce_loss_7": 3.4434271275997164, |
| "epoch": 0.558, |
| "grad_norm": 21.812924852409434, |
| "kl_loss_13": 3518.4, |
| "kl_loss_26": 2552.0, |
| "kl_loss_39": 1476.8, |
| "kl_loss_7": 4082.0, |
| "learning_rate": 0.0004163027547791347, |
| "loss": 5667.5, |
| "step": 5580 |
| }, |
| { |
| "ce_loss_13": 3.0999899983406065, |
| "ce_loss_26": 2.6524887919425963, |
| "ce_loss_39": 2.159899726510048, |
| "ce_loss_52": 1.457381361722946, |
| "ce_loss_7": 3.376901388168335, |
| "epoch": 0.559, |
| "grad_norm": 20.855137229386383, |
| "kl_loss_13": 3402.8, |
| "kl_loss_26": 2469.2, |
| "kl_loss_39": 1417.0, |
| "kl_loss_7": 3983.6, |
| "learning_rate": 0.0004147389036836881, |
| "loss": 5623.2, |
| "step": 5590 |
| }, |
| { |
| "ce_loss_13": 3.048000919818878, |
| "ce_loss_26": 2.6028879463672636, |
| "ce_loss_39": 2.110449159145355, |
| "ce_loss_52": 1.4273749262094497, |
| "ce_loss_7": 3.3209989249706267, |
| "epoch": 0.56, |
| "grad_norm": 21.293221784840117, |
| "kl_loss_13": 3370.8, |
| "kl_loss_26": 2436.6, |
| "kl_loss_39": 1393.6, |
| "kl_loss_7": 3935.6, |
| "learning_rate": 0.00041317591116653486, |
| "loss": 5661.4, |
| "step": 5600 |
| }, |
| { |
| "ce_loss_13": 3.117449927330017, |
| "ce_loss_26": 2.6561968684196473, |
| "ce_loss_39": 2.1393496483564376, |
| "ce_loss_52": 1.4263568341732025, |
| "ce_loss_7": 3.3912305176258086, |
| "epoch": 0.561, |
| "grad_norm": 19.806130661521266, |
| "kl_loss_13": 3510.8, |
| "kl_loss_26": 2543.6, |
| "kl_loss_39": 1446.0, |
| "kl_loss_7": 4074.8, |
| "learning_rate": 0.0004116137929669921, |
| "loss": 5646.7, |
| "step": 5610 |
| }, |
| { |
| "ce_loss_13": 3.032761037349701, |
| "ce_loss_26": 2.5814107984304426, |
| "ce_loss_39": 2.092069110274315, |
| "ce_loss_52": 1.4175067842006683, |
| "ce_loss_7": 3.3051693975925445, |
| "epoch": 0.562, |
| "grad_norm": 21.265424868671836, |
| "kl_loss_13": 3342.0, |
| "kl_loss_26": 2397.0, |
| "kl_loss_39": 1360.0, |
| "kl_loss_7": 3902.8, |
| "learning_rate": 0.00041005256481557305, |
| "loss": 5649.3, |
| "step": 5620 |
| }, |
| { |
| "ce_loss_13": 3.1011641681194306, |
| "ce_loss_26": 2.6472670078277587, |
| "ce_loss_39": 2.1640954107046126, |
| "ce_loss_52": 1.4572645276784897, |
| "ce_loss_7": 3.3653019249439238, |
| "epoch": 0.563, |
| "grad_norm": 19.60469852329931, |
| "kl_loss_13": 3397.2, |
| "kl_loss_26": 2452.6, |
| "kl_loss_39": 1426.2, |
| "kl_loss_7": 3957.6, |
| "learning_rate": 0.00040849224243382767, |
| "loss": 5635.6, |
| "step": 5630 |
| }, |
| { |
| "ce_loss_13": 3.0702085912227632, |
| "ce_loss_26": 2.6029874324798583, |
| "ce_loss_39": 2.1070942997932436, |
| "ce_loss_52": 1.4102862730622292, |
| "ce_loss_7": 3.345056527853012, |
| "epoch": 0.564, |
| "grad_norm": 19.66351329562965, |
| "kl_loss_13": 3444.8, |
| "kl_loss_26": 2478.8, |
| "kl_loss_39": 1416.0, |
| "kl_loss_7": 4010.8, |
| "learning_rate": 0.000406932841534185, |
| "loss": 5678.4, |
| "step": 5640 |
| }, |
| { |
| "ce_loss_13": 3.1035412073135378, |
| "ce_loss_26": 2.6404273927211763, |
| "ce_loss_39": 2.148519089818001, |
| "ce_loss_52": 1.4659966766834258, |
| "ce_loss_7": 3.3797259271144866, |
| "epoch": 0.565, |
| "grad_norm": 20.61367124543832, |
| "kl_loss_13": 3418.4, |
| "kl_loss_26": 2452.6, |
| "kl_loss_39": 1399.6, |
| "kl_loss_7": 3989.2, |
| "learning_rate": 0.0004053743778197951, |
| "loss": 5619.4, |
| "step": 5650 |
| }, |
| { |
| "ce_loss_13": 3.074153536558151, |
| "ce_loss_26": 2.6227691769599915, |
| "ce_loss_39": 2.12532425224781, |
| "ce_loss_52": 1.4269753962755203, |
| "ce_loss_7": 3.3447964787483215, |
| "epoch": 0.566, |
| "grad_norm": 20.08816441246758, |
| "kl_loss_13": 3398.8, |
| "kl_loss_26": 2455.2, |
| "kl_loss_39": 1406.8, |
| "kl_loss_7": 3968.8, |
| "learning_rate": 0.0004038168669843697, |
| "loss": 5607.4, |
| "step": 5660 |
| }, |
| { |
| "ce_loss_13": 3.117138236761093, |
| "ce_loss_26": 2.6556679248809814, |
| "ce_loss_39": 2.1434233844280244, |
| "ce_loss_52": 1.4418231889605522, |
| "ce_loss_7": 3.3935614347457888, |
| "epoch": 0.567, |
| "grad_norm": 19.629663422829108, |
| "kl_loss_13": 3460.4, |
| "kl_loss_26": 2499.6, |
| "kl_loss_39": 1423.0, |
| "kl_loss_7": 4038.4, |
| "learning_rate": 0.000402260324712026, |
| "loss": 5653.85, |
| "step": 5670 |
| }, |
| { |
| "ce_loss_13": 3.0241773188114167, |
| "ce_loss_26": 2.5764558911323547, |
| "ce_loss_39": 2.0826069891452788, |
| "ce_loss_52": 1.4262234181165696, |
| "ce_loss_7": 3.292912298440933, |
| "epoch": 0.568, |
| "grad_norm": 19.863333175376273, |
| "kl_loss_13": 3317.2, |
| "kl_loss_26": 2380.6, |
| "kl_loss_39": 1341.9, |
| "kl_loss_7": 3879.6, |
| "learning_rate": 0.00040070476667712743, |
| "loss": 5602.0, |
| "step": 5680 |
| }, |
| { |
| "ce_loss_13": 3.1207240760326385, |
| "ce_loss_26": 2.667350098490715, |
| "ce_loss_39": 2.1637227922677993, |
| "ce_loss_52": 1.4507419973611833, |
| "ce_loss_7": 3.3947570443153383, |
| "epoch": 0.569, |
| "grad_norm": 20.18457161613193, |
| "kl_loss_13": 3470.8, |
| "kl_loss_26": 2511.8, |
| "kl_loss_39": 1453.2, |
| "kl_loss_7": 4038.0, |
| "learning_rate": 0.0003991502085441259, |
| "loss": 5637.8, |
| "step": 5690 |
| }, |
| { |
| "ce_loss_13": 3.052716261148453, |
| "ce_loss_26": 2.5999310851097106, |
| "ce_loss_39": 2.103394716978073, |
| "ce_loss_52": 1.4248915880918502, |
| "ce_loss_7": 3.3296144127845766, |
| "epoch": 0.57, |
| "grad_norm": 21.32933636042312, |
| "kl_loss_13": 3384.4, |
| "kl_loss_26": 2434.4, |
| "kl_loss_39": 1385.3, |
| "kl_loss_7": 3961.6, |
| "learning_rate": 0.0003975966659674047, |
| "loss": 5572.65, |
| "step": 5700 |
| }, |
| { |
| "ce_loss_13": 3.0565085709095, |
| "ce_loss_26": 2.612689271569252, |
| "ce_loss_39": 2.133056679368019, |
| "ce_loss_52": 1.456464058160782, |
| "ce_loss_7": 3.3283673584461213, |
| "epoch": 0.571, |
| "grad_norm": 20.795332274261725, |
| "kl_loss_13": 3328.4, |
| "kl_loss_26": 2402.4, |
| "kl_loss_39": 1384.6, |
| "kl_loss_7": 3893.6, |
| "learning_rate": 0.0003960441545911204, |
| "loss": 5637.0, |
| "step": 5710 |
| }, |
| { |
| "ce_loss_13": 3.100340133905411, |
| "ce_loss_26": 2.6507264733314515, |
| "ce_loss_39": 2.1515814483165743, |
| "ce_loss_52": 1.4689666152000427, |
| "ce_loss_7": 3.36507533788681, |
| "epoch": 0.572, |
| "grad_norm": 19.479562193670343, |
| "kl_loss_13": 3367.6, |
| "kl_loss_26": 2430.4, |
| "kl_loss_39": 1381.6, |
| "kl_loss_7": 3924.4, |
| "learning_rate": 0.0003944926900490452, |
| "loss": 5586.7, |
| "step": 5720 |
| }, |
| { |
| "ce_loss_13": 3.035090607404709, |
| "ce_loss_26": 2.5765923827886583, |
| "ce_loss_39": 2.0693789660930633, |
| "ce_loss_52": 1.394070391356945, |
| "ce_loss_7": 3.3126678228378297, |
| "epoch": 0.573, |
| "grad_norm": 20.769836690311337, |
| "kl_loss_13": 3418.0, |
| "kl_loss_26": 2455.8, |
| "kl_loss_39": 1385.6, |
| "kl_loss_7": 3996.8, |
| "learning_rate": 0.0003929422879644099, |
| "loss": 5607.2, |
| "step": 5730 |
| }, |
| { |
| "ce_loss_13": 3.0840226650238036, |
| "ce_loss_26": 2.6376162350177763, |
| "ce_loss_39": 2.1402535855770113, |
| "ce_loss_52": 1.4594999521970748, |
| "ce_loss_7": 3.3528446674346926, |
| "epoch": 0.574, |
| "grad_norm": 19.55271310482914, |
| "kl_loss_13": 3396.0, |
| "kl_loss_26": 2453.0, |
| "kl_loss_39": 1389.4, |
| "kl_loss_7": 3971.6, |
| "learning_rate": 0.0003913929639497462, |
| "loss": 5561.5, |
| "step": 5740 |
| }, |
| { |
| "ce_loss_13": 3.0994504928588866, |
| "ce_loss_26": 2.645833945274353, |
| "ce_loss_39": 2.151085004210472, |
| "ce_loss_52": 1.453307920694351, |
| "ce_loss_7": 3.375282955169678, |
| "epoch": 0.575, |
| "grad_norm": 20.394651449213793, |
| "kl_loss_13": 3410.8, |
| "kl_loss_26": 2459.0, |
| "kl_loss_39": 1418.8, |
| "kl_loss_7": 3977.2, |
| "learning_rate": 0.00038984473360672965, |
| "loss": 5587.6, |
| "step": 5750 |
| }, |
| { |
| "ce_loss_13": 3.063554251194, |
| "ce_loss_26": 2.602804532647133, |
| "ce_loss_39": 2.112149253487587, |
| "ce_loss_52": 1.4378221184015274, |
| "ce_loss_7": 3.3263466000556945, |
| "epoch": 0.576, |
| "grad_norm": 20.732107583485522, |
| "kl_loss_13": 3335.2, |
| "kl_loss_26": 2397.0, |
| "kl_loss_39": 1362.5, |
| "kl_loss_7": 3885.6, |
| "learning_rate": 0.0003882976125260229, |
| "loss": 5618.5, |
| "step": 5760 |
| }, |
| { |
| "ce_loss_13": 3.011310315132141, |
| "ce_loss_26": 2.5538589358329773, |
| "ce_loss_39": 2.0608473628759385, |
| "ce_loss_52": 1.3871852427721023, |
| "ce_loss_7": 3.2823293566703797, |
| "epoch": 0.577, |
| "grad_norm": 20.222489437839133, |
| "kl_loss_13": 3356.0, |
| "kl_loss_26": 2403.4, |
| "kl_loss_39": 1363.6, |
| "kl_loss_7": 3921.2, |
| "learning_rate": 0.00038675161628711776, |
| "loss": 5583.6, |
| "step": 5770 |
| }, |
| { |
| "ce_loss_13": 3.060704934597015, |
| "ce_loss_26": 2.6082344591617583, |
| "ce_loss_39": 2.1105374455451966, |
| "ce_loss_52": 1.4133148401975633, |
| "ce_loss_7": 3.337174046039581, |
| "epoch": 0.578, |
| "grad_norm": 20.5258419136392, |
| "kl_loss_13": 3407.2, |
| "kl_loss_26": 2468.6, |
| "kl_loss_39": 1416.8, |
| "kl_loss_7": 3978.0, |
| "learning_rate": 0.0003852067604581794, |
| "loss": 5550.9, |
| "step": 5780 |
| }, |
| { |
| "ce_loss_13": 3.0313555896282196, |
| "ce_loss_26": 2.5763757705688475, |
| "ce_loss_39": 2.090648338198662, |
| "ce_loss_52": 1.4237483531236648, |
| "ce_loss_7": 3.303388088941574, |
| "epoch": 0.579, |
| "grad_norm": 19.375379659731948, |
| "kl_loss_13": 3338.4, |
| "kl_loss_26": 2400.6, |
| "kl_loss_39": 1371.4, |
| "kl_loss_7": 3902.8, |
| "learning_rate": 0.0003836630605958888, |
| "loss": 5548.1, |
| "step": 5790 |
| }, |
| { |
| "ce_loss_13": 3.0871520400047303, |
| "ce_loss_26": 2.628482538461685, |
| "ce_loss_39": 2.1345098853111266, |
| "ce_loss_52": 1.439907690882683, |
| "ce_loss_7": 3.360053616762161, |
| "epoch": 0.58, |
| "grad_norm": 20.217419963891068, |
| "kl_loss_13": 3384.8, |
| "kl_loss_26": 2429.6, |
| "kl_loss_39": 1387.0, |
| "kl_loss_7": 3952.0, |
| "learning_rate": 0.0003821205322452863, |
| "loss": 5608.2, |
| "step": 5800 |
| }, |
| { |
| "ce_loss_13": 3.098143881559372, |
| "ce_loss_26": 2.648523300886154, |
| "ce_loss_39": 2.1544575184583663, |
| "ce_loss_52": 1.4684545397758484, |
| "ce_loss_7": 3.369327354431152, |
| "epoch": 0.581, |
| "grad_norm": 20.306772003501575, |
| "kl_loss_13": 3376.0, |
| "kl_loss_26": 2423.6, |
| "kl_loss_39": 1374.4, |
| "kl_loss_7": 3943.2, |
| "learning_rate": 0.0003805791909396155, |
| "loss": 5609.7, |
| "step": 5810 |
| }, |
| { |
| "ce_loss_13": 3.03250247836113, |
| "ce_loss_26": 2.5763048112392424, |
| "ce_loss_39": 2.0811164885759355, |
| "ce_loss_52": 1.4048690304160119, |
| "ce_loss_7": 3.2992306888103484, |
| "epoch": 0.582, |
| "grad_norm": 19.674023919479502, |
| "kl_loss_13": 3376.8, |
| "kl_loss_26": 2428.6, |
| "kl_loss_39": 1384.2, |
| "kl_loss_7": 3936.8, |
| "learning_rate": 0.0003790390522001662, |
| "loss": 5494.5, |
| "step": 5820 |
| }, |
| { |
| "ce_loss_13": 3.0025113344192507, |
| "ce_loss_26": 2.55022137761116, |
| "ce_loss_39": 2.0565065026283262, |
| "ce_loss_52": 1.377510306239128, |
| "ce_loss_7": 3.2718186736106873, |
| "epoch": 0.583, |
| "grad_norm": 20.728894667471106, |
| "kl_loss_13": 3354.8, |
| "kl_loss_26": 2406.2, |
| "kl_loss_39": 1367.9, |
| "kl_loss_7": 3917.6, |
| "learning_rate": 0.0003775001315361183, |
| "loss": 5559.7, |
| "step": 5830 |
| }, |
| { |
| "ce_loss_13": 3.0971179008483887, |
| "ce_loss_26": 2.6451155722141264, |
| "ce_loss_39": 2.1508567333221436, |
| "ce_loss_52": 1.4824247717857362, |
| "ce_loss_7": 3.37031666636467, |
| "epoch": 0.584, |
| "grad_norm": 20.600074401568932, |
| "kl_loss_13": 3337.2, |
| "kl_loss_26": 2403.8, |
| "kl_loss_39": 1359.2, |
| "kl_loss_7": 3907.2, |
| "learning_rate": 0.0003759624444443858, |
| "loss": 5519.4, |
| "step": 5840 |
| }, |
| { |
| "ce_loss_13": 3.0961884200572967, |
| "ce_loss_26": 2.626956915855408, |
| "ce_loss_39": 2.1310097485780717, |
| "ce_loss_52": 1.4370131075382233, |
| "ce_loss_7": 3.371414542198181, |
| "epoch": 0.585, |
| "grad_norm": 21.119848921717185, |
| "kl_loss_13": 3437.6, |
| "kl_loss_26": 2463.6, |
| "kl_loss_39": 1412.8, |
| "kl_loss_7": 4010.4, |
| "learning_rate": 0.00037442600640946044, |
| "loss": 5564.2, |
| "step": 5850 |
| }, |
| { |
| "ce_loss_13": 3.022693085670471, |
| "ce_loss_26": 2.556842041015625, |
| "ce_loss_39": 2.054610991477966, |
| "ce_loss_52": 1.3797120869159698, |
| "ce_loss_7": 3.2991441190242767, |
| "epoch": 0.586, |
| "grad_norm": 20.829952423189983, |
| "kl_loss_13": 3400.0, |
| "kl_loss_26": 2436.4, |
| "kl_loss_39": 1378.6, |
| "kl_loss_7": 3976.4, |
| "learning_rate": 0.00037289083290325663, |
| "loss": 5555.3, |
| "step": 5860 |
| }, |
| { |
| "ce_loss_13": 3.048868530988693, |
| "ce_loss_26": 2.597111147642136, |
| "ce_loss_39": 2.1075122743844985, |
| "ce_loss_52": 1.4477093726396562, |
| "ce_loss_7": 3.3170620620250704, |
| "epoch": 0.587, |
| "grad_norm": 19.743999290482414, |
| "kl_loss_13": 3295.2, |
| "kl_loss_26": 2349.8, |
| "kl_loss_39": 1314.6, |
| "kl_loss_7": 3862.0, |
| "learning_rate": 0.0003713569393849543, |
| "loss": 5582.2, |
| "step": 5870 |
| }, |
| { |
| "ce_loss_13": 3.0275582671165466, |
| "ce_loss_26": 2.5641341865062715, |
| "ce_loss_39": 2.075681546330452, |
| "ce_loss_52": 1.413079009950161, |
| "ce_loss_7": 3.30134374499321, |
| "epoch": 0.588, |
| "grad_norm": 19.60417578779171, |
| "kl_loss_13": 3342.8, |
| "kl_loss_26": 2385.2, |
| "kl_loss_39": 1348.4, |
| "kl_loss_7": 3911.6, |
| "learning_rate": 0.00036982434130084397, |
| "loss": 5547.2, |
| "step": 5880 |
| }, |
| { |
| "ce_loss_13": 3.0114518344402312, |
| "ce_loss_26": 2.5489202946424485, |
| "ce_loss_39": 2.051037350296974, |
| "ce_loss_52": 1.3947512209415436, |
| "ce_loss_7": 3.2770422041416167, |
| "epoch": 0.589, |
| "grad_norm": 19.943871445513693, |
| "kl_loss_13": 3324.8, |
| "kl_loss_26": 2358.6, |
| "kl_loss_39": 1318.4, |
| "kl_loss_7": 3886.4, |
| "learning_rate": 0.00036829305408417166, |
| "loss": 5519.0, |
| "step": 5890 |
| }, |
| { |
| "ce_loss_13": 2.9831090033054353, |
| "ce_loss_26": 2.5218098521232606, |
| "ce_loss_39": 2.0276191979646683, |
| "ce_loss_52": 1.3660520613193512, |
| "ce_loss_7": 3.262023079395294, |
| "epoch": 0.59, |
| "grad_norm": 20.136107811495098, |
| "kl_loss_13": 3337.2, |
| "kl_loss_26": 2377.0, |
| "kl_loss_39": 1333.6, |
| "kl_loss_7": 3918.4, |
| "learning_rate": 0.0003667630931549826, |
| "loss": 5547.1, |
| "step": 5900 |
| }, |
| { |
| "ce_loss_13": 3.1185491621494292, |
| "ce_loss_26": 2.6609552204608917, |
| "ce_loss_39": 2.154519048333168, |
| "ce_loss_52": 1.4638203248381614, |
| "ce_loss_7": 3.3888748466968535, |
| "epoch": 0.591, |
| "grad_norm": 21.04398596009474, |
| "kl_loss_13": 3422.0, |
| "kl_loss_26": 2461.2, |
| "kl_loss_39": 1398.6, |
| "kl_loss_7": 3996.8, |
| "learning_rate": 0.00036523447391996613, |
| "loss": 5529.25, |
| "step": 5910 |
| }, |
| { |
| "ce_loss_13": 3.01438627243042, |
| "ce_loss_26": 2.5740538477897643, |
| "ce_loss_39": 2.0915403455495833, |
| "ce_loss_52": 1.431156474351883, |
| "ce_loss_7": 3.2843648850917817, |
| "epoch": 0.592, |
| "grad_norm": 21.11836249464285, |
| "kl_loss_13": 3292.4, |
| "kl_loss_26": 2358.6, |
| "kl_loss_39": 1331.3, |
| "kl_loss_7": 3853.2, |
| "learning_rate": 0.00036370721177230114, |
| "loss": 5565.45, |
| "step": 5920 |
| }, |
| { |
| "ce_loss_13": 3.069197976589203, |
| "ce_loss_26": 2.619757717847824, |
| "ce_loss_39": 2.1231694549322127, |
| "ce_loss_52": 1.4499645620584487, |
| "ce_loss_7": 3.339762735366821, |
| "epoch": 0.593, |
| "grad_norm": 19.81588610039522, |
| "kl_loss_13": 3318.4, |
| "kl_loss_26": 2373.0, |
| "kl_loss_39": 1346.8, |
| "kl_loss_7": 3881.2, |
| "learning_rate": 0.00036218132209150044, |
| "loss": 5483.7, |
| "step": 5930 |
| }, |
| { |
| "ce_loss_13": 3.0346123337745667, |
| "ce_loss_26": 2.5827606499195097, |
| "ce_loss_39": 2.090969371795654, |
| "ce_loss_52": 1.4306629955768586, |
| "ce_loss_7": 3.3081043541431425, |
| "epoch": 0.594, |
| "grad_norm": 20.579777117155064, |
| "kl_loss_13": 3338.4, |
| "kl_loss_26": 2404.8, |
| "kl_loss_39": 1361.4, |
| "kl_loss_7": 3897.6, |
| "learning_rate": 0.0003606568202432562, |
| "loss": 5508.2, |
| "step": 5940 |
| }, |
| { |
| "ce_loss_13": 2.9628920376300814, |
| "ce_loss_26": 2.5103672593832016, |
| "ce_loss_39": 2.028717193007469, |
| "ce_loss_52": 1.3798889175057412, |
| "ce_loss_7": 3.238497519493103, |
| "epoch": 0.595, |
| "grad_norm": 19.336805098702218, |
| "kl_loss_13": 3271.2, |
| "kl_loss_26": 2333.8, |
| "kl_loss_39": 1324.3, |
| "kl_loss_7": 3845.6, |
| "learning_rate": 0.0003591337215792851, |
| "loss": 5512.3, |
| "step": 5950 |
| }, |
| { |
| "ce_loss_13": 3.0788431644439695, |
| "ce_loss_26": 2.608420321345329, |
| "ce_loss_39": 2.1091116696596144, |
| "ce_loss_52": 1.4199562400579453, |
| "ce_loss_7": 3.359624499082565, |
| "epoch": 0.596, |
| "grad_norm": 20.533566806186286, |
| "kl_loss_13": 3426.4, |
| "kl_loss_26": 2447.8, |
| "kl_loss_39": 1399.7, |
| "kl_loss_7": 4004.4, |
| "learning_rate": 0.00035761204143717383, |
| "loss": 5551.7, |
| "step": 5960 |
| }, |
| { |
| "ce_loss_13": 3.03274342417717, |
| "ce_loss_26": 2.585944026708603, |
| "ce_loss_39": 2.096950164437294, |
| "ce_loss_52": 1.416708105802536, |
| "ce_loss_7": 3.3088025748729706, |
| "epoch": 0.597, |
| "grad_norm": 19.695670321869642, |
| "kl_loss_13": 3339.2, |
| "kl_loss_26": 2400.2, |
| "kl_loss_39": 1368.0, |
| "kl_loss_7": 3908.0, |
| "learning_rate": 0.0003560917951402245, |
| "loss": 5483.9, |
| "step": 5970 |
| }, |
| { |
| "ce_loss_13": 3.01766916513443, |
| "ce_loss_26": 2.561279395222664, |
| "ce_loss_39": 2.06943539083004, |
| "ce_loss_52": 1.418130737543106, |
| "ce_loss_7": 3.2899491429328918, |
| "epoch": 0.598, |
| "grad_norm": 21.072429989583483, |
| "kl_loss_13": 3344.4, |
| "kl_loss_26": 2382.6, |
| "kl_loss_39": 1331.0, |
| "kl_loss_7": 3913.6, |
| "learning_rate": 0.00035457299799730046, |
| "loss": 5551.3, |
| "step": 5980 |
| }, |
| { |
| "ce_loss_13": 3.0659261345863342, |
| "ce_loss_26": 2.600133925676346, |
| "ce_loss_39": 2.110896447300911, |
| "ce_loss_52": 1.437103909254074, |
| "ce_loss_7": 3.3336906135082245, |
| "epoch": 0.599, |
| "grad_norm": 24.325101698795564, |
| "kl_loss_13": 3380.0, |
| "kl_loss_26": 2416.4, |
| "kl_loss_39": 1376.0, |
| "kl_loss_7": 3938.8, |
| "learning_rate": 0.0003530556653026721, |
| "loss": 5495.3, |
| "step": 5990 |
| }, |
| { |
| "ce_loss_13": 3.0716091096401215, |
| "ce_loss_26": 2.614444798231125, |
| "ce_loss_39": 2.1203446626663207, |
| "ce_loss_52": 1.4388466402888298, |
| "ce_loss_7": 3.3488605439662935, |
| "epoch": 0.6, |
| "grad_norm": 20.521363612152747, |
| "kl_loss_13": 3384.4, |
| "kl_loss_26": 2432.4, |
| "kl_loss_39": 1382.6, |
| "kl_loss_7": 3959.2, |
| "learning_rate": 0.00035153981233586274, |
| "loss": 5545.9, |
| "step": 6000 |
| }, |
| { |
| "ce_loss_13": 3.106086379289627, |
| "ce_loss_26": 2.6430875420570374, |
| "ce_loss_39": 2.1337583631277086, |
| "ce_loss_52": 1.4439469754695893, |
| "ce_loss_7": 3.3797987580299376, |
| "epoch": 0.601, |
| "grad_norm": 20.80497849503253, |
| "kl_loss_13": 3453.6, |
| "kl_loss_26": 2484.8, |
| "kl_loss_39": 1398.0, |
| "kl_loss_7": 4028.4, |
| "learning_rate": 0.00035002545436149473, |
| "loss": 5491.4, |
| "step": 6010 |
| }, |
| { |
| "ce_loss_13": 3.000385183095932, |
| "ce_loss_26": 2.5445862293243406, |
| "ce_loss_39": 2.045133265852928, |
| "ce_loss_52": 1.387436880171299, |
| "ce_loss_7": 3.271744018793106, |
| "epoch": 0.602, |
| "grad_norm": 32.49284937945306, |
| "kl_loss_13": 3335.6, |
| "kl_loss_26": 2380.8, |
| "kl_loss_39": 1330.6, |
| "kl_loss_7": 3907.2, |
| "learning_rate": 0.0003485126066291364, |
| "loss": 5483.7, |
| "step": 6020 |
| }, |
| { |
| "ce_loss_13": 3.0256562530994415, |
| "ce_loss_26": 2.577129301428795, |
| "ce_loss_39": 2.0889712274074554, |
| "ce_loss_52": 1.4449194520711899, |
| "ce_loss_7": 3.2831216752529144, |
| "epoch": 0.603, |
| "grad_norm": 20.23621815480592, |
| "kl_loss_13": 3272.0, |
| "kl_loss_26": 2339.4, |
| "kl_loss_39": 1312.8, |
| "kl_loss_7": 3817.2, |
| "learning_rate": 0.0003470012843731476, |
| "loss": 5461.5, |
| "step": 6030 |
| }, |
| { |
| "ce_loss_13": 3.0063592195510864, |
| "ce_loss_26": 2.5461477816104887, |
| "ce_loss_39": 2.047996437549591, |
| "ce_loss_52": 1.4004468455910684, |
| "ce_loss_7": 3.2747744262218474, |
| "epoch": 0.604, |
| "grad_norm": 20.161854907584466, |
| "kl_loss_13": 3331.2, |
| "kl_loss_26": 2365.2, |
| "kl_loss_39": 1318.3, |
| "kl_loss_7": 3894.4, |
| "learning_rate": 0.00034549150281252633, |
| "loss": 5446.25, |
| "step": 6040 |
| }, |
| { |
| "ce_loss_13": 3.0043693661689757, |
| "ce_loss_26": 2.5464318215847017, |
| "ce_loss_39": 2.054910770058632, |
| "ce_loss_52": 1.402955588698387, |
| "ce_loss_7": 3.277425891160965, |
| "epoch": 0.605, |
| "grad_norm": 19.39479695015959, |
| "kl_loss_13": 3303.6, |
| "kl_loss_26": 2362.4, |
| "kl_loss_39": 1315.5, |
| "kl_loss_7": 3867.2, |
| "learning_rate": 0.0003439832771507565, |
| "loss": 5513.7, |
| "step": 6050 |
| }, |
| { |
| "ce_loss_13": 3.0073243379592896, |
| "ce_loss_26": 2.552249348163605, |
| "ce_loss_39": 2.0556343287229537, |
| "ce_loss_52": 1.4056400418281556, |
| "ce_loss_7": 3.276465517282486, |
| "epoch": 0.606, |
| "grad_norm": 20.632328630604196, |
| "kl_loss_13": 3315.2, |
| "kl_loss_26": 2367.2, |
| "kl_loss_39": 1320.7, |
| "kl_loss_7": 3890.0, |
| "learning_rate": 0.0003424766225756537, |
| "loss": 5437.2, |
| "step": 6060 |
| }, |
| { |
| "ce_loss_13": 3.0342160046100615, |
| "ce_loss_26": 2.5702687412500382, |
| "ce_loss_39": 2.0748631983995436, |
| "ce_loss_52": 1.4053118824958801, |
| "ce_loss_7": 3.30224769115448, |
| "epoch": 0.607, |
| "grad_norm": 20.381462378536757, |
| "kl_loss_13": 3375.6, |
| "kl_loss_26": 2412.6, |
| "kl_loss_39": 1368.5, |
| "kl_loss_7": 3938.0, |
| "learning_rate": 0.00034097155425921255, |
| "loss": 5453.95, |
| "step": 6070 |
| }, |
| { |
| "ce_loss_13": 2.979208827018738, |
| "ce_loss_26": 2.5391657948493958, |
| "ce_loss_39": 2.0675252109766005, |
| "ce_loss_52": 1.4293861359357833, |
| "ce_loss_7": 3.243548810482025, |
| "epoch": 0.608, |
| "grad_norm": 20.059894251099692, |
| "kl_loss_13": 3201.6, |
| "kl_loss_26": 2277.2, |
| "kl_loss_39": 1276.3, |
| "kl_loss_7": 3759.2, |
| "learning_rate": 0.0003394680873574546, |
| "loss": 5463.6, |
| "step": 6080 |
| }, |
| { |
| "ce_loss_13": 3.0087065517902376, |
| "ce_loss_26": 2.5543720543384554, |
| "ce_loss_39": 2.0706711769104005, |
| "ce_loss_52": 1.4312650561332703, |
| "ce_loss_7": 3.274740469455719, |
| "epoch": 0.609, |
| "grad_norm": 19.682020137215275, |
| "kl_loss_13": 3278.0, |
| "kl_loss_26": 2334.8, |
| "kl_loss_39": 1311.6, |
| "kl_loss_7": 3829.2, |
| "learning_rate": 0.0003379662370102747, |
| "loss": 5485.1, |
| "step": 6090 |
| }, |
| { |
| "ce_loss_13": 2.9708913624286652, |
| "ce_loss_26": 2.526490569114685, |
| "ce_loss_39": 2.045279270410538, |
| "ce_loss_52": 1.406798492372036, |
| "ce_loss_7": 3.2348900735378265, |
| "epoch": 0.61, |
| "grad_norm": 20.816822332244506, |
| "kl_loss_13": 3220.0, |
| "kl_loss_26": 2302.2, |
| "kl_loss_39": 1290.7, |
| "kl_loss_7": 3776.8, |
| "learning_rate": 0.0003364660183412892, |
| "loss": 5434.0, |
| "step": 6100 |
| }, |
| { |
| "ce_loss_13": 3.034948408603668, |
| "ce_loss_26": 2.5749203741550444, |
| "ce_loss_39": 2.083053132891655, |
| "ce_loss_52": 1.4239173114299775, |
| "ce_loss_7": 3.308141976594925, |
| "epoch": 0.611, |
| "grad_norm": 19.1619951590758, |
| "kl_loss_13": 3322.8, |
| "kl_loss_26": 2375.8, |
| "kl_loss_39": 1335.6, |
| "kl_loss_7": 3891.2, |
| "learning_rate": 0.0003349674464576834, |
| "loss": 5422.4, |
| "step": 6110 |
| }, |
| { |
| "ce_loss_13": 3.004230409860611, |
| "ce_loss_26": 2.559123533964157, |
| "ce_loss_39": 2.0680998235940935, |
| "ce_loss_52": 1.416846913099289, |
| "ce_loss_7": 3.268228167295456, |
| "epoch": 0.612, |
| "grad_norm": 19.814048572070753, |
| "kl_loss_13": 3277.2, |
| "kl_loss_26": 2352.2, |
| "kl_loss_39": 1329.4, |
| "kl_loss_7": 3827.6, |
| "learning_rate": 0.00033347053645005966, |
| "loss": 5408.25, |
| "step": 6120 |
| }, |
| { |
| "ce_loss_13": 3.043248528242111, |
| "ce_loss_26": 2.5893827825784683, |
| "ce_loss_39": 2.106617513298988, |
| "ce_loss_52": 1.4531341344118118, |
| "ce_loss_7": 3.3129510939121247, |
| "epoch": 0.613, |
| "grad_norm": 20.76807127463998, |
| "kl_loss_13": 3286.4, |
| "kl_loss_26": 2356.0, |
| "kl_loss_39": 1332.4, |
| "kl_loss_7": 3850.8, |
| "learning_rate": 0.00033197530339228485, |
| "loss": 5370.55, |
| "step": 6130 |
| }, |
| { |
| "ce_loss_13": 3.00436030626297, |
| "ce_loss_26": 2.551770430803299, |
| "ce_loss_39": 2.0617963910102843, |
| "ce_loss_52": 1.4093145355582237, |
| "ce_loss_7": 3.287700629234314, |
| "epoch": 0.614, |
| "grad_norm": 20.13277338184701, |
| "kl_loss_13": 3319.2, |
| "kl_loss_26": 2374.0, |
| "kl_loss_39": 1331.1, |
| "kl_loss_7": 3899.6, |
| "learning_rate": 0.00033048176234133967, |
| "loss": 5468.4, |
| "step": 6140 |
| }, |
| { |
| "ce_loss_13": 3.0555618822574617, |
| "ce_loss_26": 2.6027767241001127, |
| "ce_loss_39": 2.1256050765514374, |
| "ce_loss_52": 1.4570606127381325, |
| "ce_loss_7": 3.3252673983573913, |
| "epoch": 0.615, |
| "grad_norm": 20.812161363431123, |
| "kl_loss_13": 3324.4, |
| "kl_loss_26": 2376.0, |
| "kl_loss_39": 1350.9, |
| "kl_loss_7": 3884.4, |
| "learning_rate": 0.0003289899283371657, |
| "loss": 5469.45, |
| "step": 6150 |
| }, |
| { |
| "ce_loss_13": 2.9699956268072127, |
| "ce_loss_26": 2.5214530378580093, |
| "ce_loss_39": 2.047743359208107, |
| "ce_loss_52": 1.404917973279953, |
| "ce_loss_7": 3.242190235853195, |
| "epoch": 0.616, |
| "grad_norm": 21.707368727671295, |
| "kl_loss_13": 3253.6, |
| "kl_loss_26": 2330.0, |
| "kl_loss_39": 1321.7, |
| "kl_loss_7": 3830.4, |
| "learning_rate": 0.0003274998164025148, |
| "loss": 5448.5, |
| "step": 6160 |
| }, |
| { |
| "ce_loss_13": 3.1136968553066255, |
| "ce_loss_26": 2.6612686932086946, |
| "ce_loss_39": 2.1661961168050765, |
| "ce_loss_52": 1.4886637568473815, |
| "ce_loss_7": 3.390312284231186, |
| "epoch": 0.617, |
| "grad_norm": 20.098473719799063, |
| "kl_loss_13": 3356.0, |
| "kl_loss_26": 2410.6, |
| "kl_loss_39": 1369.8, |
| "kl_loss_7": 3926.4, |
| "learning_rate": 0.0003260114415427975, |
| "loss": 5420.0, |
| "step": 6170 |
| }, |
| { |
| "ce_loss_13": 3.0537400960922243, |
| "ce_loss_26": 2.6002777397632597, |
| "ce_loss_39": 2.0997320264577866, |
| "ce_loss_52": 1.425080481171608, |
| "ce_loss_7": 3.3301878392696382, |
| "epoch": 0.618, |
| "grad_norm": 20.959268913691595, |
| "kl_loss_13": 3376.8, |
| "kl_loss_26": 2431.0, |
| "kl_loss_39": 1376.8, |
| "kl_loss_7": 3956.0, |
| "learning_rate": 0.0003245248187459323, |
| "loss": 5467.5, |
| "step": 6180 |
| }, |
| { |
| "ce_loss_13": 3.0195475459098815, |
| "ce_loss_26": 2.5703806400299074, |
| "ce_loss_39": 2.0828246504068373, |
| "ce_loss_52": 1.4389306217432023, |
| "ce_loss_7": 3.2870283126831055, |
| "epoch": 0.619, |
| "grad_norm": 19.77508773242922, |
| "kl_loss_13": 3278.4, |
| "kl_loss_26": 2325.6, |
| "kl_loss_39": 1301.5, |
| "kl_loss_7": 3832.0, |
| "learning_rate": 0.00032303996298219416, |
| "loss": 5436.5, |
| "step": 6190 |
| }, |
| { |
| "ce_loss_13": 3.080602079629898, |
| "ce_loss_26": 2.6279105126857756, |
| "ce_loss_39": 2.1348745226860046, |
| "ce_loss_52": 1.4668798118829727, |
| "ce_loss_7": 3.349226105213165, |
| "epoch": 0.62, |
| "grad_norm": 20.105835424543073, |
| "kl_loss_13": 3342.4, |
| "kl_loss_26": 2402.4, |
| "kl_loss_39": 1358.2, |
| "kl_loss_7": 3903.6, |
| "learning_rate": 0.00032155688920406414, |
| "loss": 5427.1, |
| "step": 6200 |
| }, |
| { |
| "ce_loss_13": 2.980887794494629, |
| "ce_loss_26": 2.5403966814279557, |
| "ce_loss_39": 2.06268994808197, |
| "ce_loss_52": 1.4154802724719047, |
| "ce_loss_7": 3.252926254272461, |
| "epoch": 0.621, |
| "grad_norm": 19.282334771133197, |
| "kl_loss_13": 3252.8, |
| "kl_loss_26": 2327.6, |
| "kl_loss_39": 1309.6, |
| "kl_loss_7": 3818.4, |
| "learning_rate": 0.0003200756123460788, |
| "loss": 5482.55, |
| "step": 6210 |
| }, |
| { |
| "ce_loss_13": 2.9776609361171724, |
| "ce_loss_26": 2.5325854122638702, |
| "ce_loss_39": 2.0461337983608248, |
| "ce_loss_52": 1.410691213607788, |
| "ce_loss_7": 3.24890678524971, |
| "epoch": 0.622, |
| "grad_norm": 20.106994551943693, |
| "kl_loss_13": 3252.0, |
| "kl_loss_26": 2326.2, |
| "kl_loss_39": 1296.8, |
| "kl_loss_7": 3812.4, |
| "learning_rate": 0.00031859614732467957, |
| "loss": 5416.4, |
| "step": 6220 |
| }, |
| { |
| "ce_loss_13": 3.032334786653519, |
| "ce_loss_26": 2.579844218492508, |
| "ce_loss_39": 2.1053399711847307, |
| "ce_loss_52": 1.4573373839259147, |
| "ce_loss_7": 3.2939690172672274, |
| "epoch": 0.623, |
| "grad_norm": 19.37466323417316, |
| "kl_loss_13": 3254.0, |
| "kl_loss_26": 2315.6, |
| "kl_loss_39": 1307.4, |
| "kl_loss_7": 3800.0, |
| "learning_rate": 0.00031711850903806275, |
| "loss": 5384.0, |
| "step": 6230 |
| }, |
| { |
| "ce_loss_13": 3.0033844828605654, |
| "ce_loss_26": 2.554124391078949, |
| "ce_loss_39": 2.072583147883415, |
| "ce_loss_52": 1.405614359676838, |
| "ce_loss_7": 3.272351396083832, |
| "epoch": 0.624, |
| "grad_norm": 19.68798944815722, |
| "kl_loss_13": 3318.8, |
| "kl_loss_26": 2383.4, |
| "kl_loss_39": 1353.1, |
| "kl_loss_7": 3878.4, |
| "learning_rate": 0.0003156427123660297, |
| "loss": 5409.1, |
| "step": 6240 |
| }, |
| { |
| "ce_loss_13": 3.044550156593323, |
| "ce_loss_26": 2.5933004200458525, |
| "ce_loss_39": 2.0951038181781767, |
| "ce_loss_52": 1.432724517583847, |
| "ce_loss_7": 3.31173922419548, |
| "epoch": 0.625, |
| "grad_norm": 19.588532833188335, |
| "kl_loss_13": 3324.0, |
| "kl_loss_26": 2390.2, |
| "kl_loss_39": 1351.9, |
| "kl_loss_7": 3884.8, |
| "learning_rate": 0.0003141687721698363, |
| "loss": 5410.2, |
| "step": 6250 |
| }, |
| { |
| "ce_loss_13": 3.0133075952529906, |
| "ce_loss_26": 2.562904554605484, |
| "ce_loss_39": 2.084453445672989, |
| "ce_loss_52": 1.4354033678770066, |
| "ce_loss_7": 3.2846658766269683, |
| "epoch": 0.626, |
| "grad_norm": 20.175418509715428, |
| "kl_loss_13": 3256.4, |
| "kl_loss_26": 2322.2, |
| "kl_loss_39": 1302.4, |
| "kl_loss_7": 3820.0, |
| "learning_rate": 0.00031269670329204396, |
| "loss": 5413.4, |
| "step": 6260 |
| }, |
| { |
| "ce_loss_13": 3.020740455389023, |
| "ce_loss_26": 2.576273998618126, |
| "ce_loss_39": 2.094499522447586, |
| "ce_loss_52": 1.46863095164299, |
| "ce_loss_7": 3.2838824689388275, |
| "epoch": 0.627, |
| "grad_norm": 19.159006125141875, |
| "kl_loss_13": 3217.6, |
| "kl_loss_26": 2295.2, |
| "kl_loss_39": 1276.9, |
| "kl_loss_7": 3769.2, |
| "learning_rate": 0.00031122652055637015, |
| "loss": 5419.65, |
| "step": 6270 |
| }, |
| { |
| "ce_loss_13": 2.9717007994651796, |
| "ce_loss_26": 2.5309597969055178, |
| "ce_loss_39": 2.045916485786438, |
| "ce_loss_52": 1.4202698469161987, |
| "ce_loss_7": 3.244756191968918, |
| "epoch": 0.628, |
| "grad_norm": 20.34275756584657, |
| "kl_loss_13": 3237.2, |
| "kl_loss_26": 2312.2, |
| "kl_loss_39": 1284.9, |
| "kl_loss_7": 3804.0, |
| "learning_rate": 0.0003097582387675385, |
| "loss": 5361.6, |
| "step": 6280 |
| }, |
| { |
| "ce_loss_13": 2.959231287240982, |
| "ce_loss_26": 2.5172866880893707, |
| "ce_loss_39": 2.043312183022499, |
| "ce_loss_52": 1.4159017190337182, |
| "ce_loss_7": 3.220345306396484, |
| "epoch": 0.629, |
| "grad_norm": 20.150529720020295, |
| "kl_loss_13": 3215.2, |
| "kl_loss_26": 2296.4, |
| "kl_loss_39": 1276.2, |
| "kl_loss_7": 3763.2, |
| "learning_rate": 0.00030829187271113034, |
| "loss": 5363.8, |
| "step": 6290 |
| }, |
| { |
| "ce_loss_13": 2.9990767776966094, |
| "ce_loss_26": 2.5505200415849685, |
| "ce_loss_39": 2.060671201348305, |
| "ce_loss_52": 1.395907147228718, |
| "ce_loss_7": 3.2725743770599367, |
| "epoch": 0.63, |
| "grad_norm": 19.518127150050482, |
| "kl_loss_13": 3309.2, |
| "kl_loss_26": 2380.2, |
| "kl_loss_39": 1347.4, |
| "kl_loss_7": 3878.4, |
| "learning_rate": 0.00030682743715343565, |
| "loss": 5435.15, |
| "step": 6300 |
| }, |
| { |
| "ce_loss_13": 3.080632323026657, |
| "ce_loss_26": 2.6302684545516968, |
| "ce_loss_39": 2.135207986831665, |
| "ce_loss_52": 1.4774031162261962, |
| "ce_loss_7": 3.3441965878009796, |
| "epoch": 0.631, |
| "grad_norm": 21.06860944259249, |
| "kl_loss_13": 3302.8, |
| "kl_loss_26": 2366.6, |
| "kl_loss_39": 1332.6, |
| "kl_loss_7": 3862.8, |
| "learning_rate": 0.0003053649468413043, |
| "loss": 5425.25, |
| "step": 6310 |
| }, |
| { |
| "ce_loss_13": 3.0259739339351652, |
| "ce_loss_26": 2.570155072212219, |
| "ce_loss_39": 2.078566926717758, |
| "ce_loss_52": 1.4384133130311967, |
| "ce_loss_7": 3.288107806444168, |
| "epoch": 0.632, |
| "grad_norm": 20.78357258068192, |
| "kl_loss_13": 3282.0, |
| "kl_loss_26": 2335.8, |
| "kl_loss_39": 1293.6, |
| "kl_loss_7": 3833.2, |
| "learning_rate": 0.00030390441650199725, |
| "loss": 5412.2, |
| "step": 6320 |
| }, |
| { |
| "ce_loss_13": 2.9379296779632567, |
| "ce_loss_26": 2.49056881070137, |
| "ce_loss_39": 2.0068995296955108, |
| "ce_loss_52": 1.3864078581333161, |
| "ce_loss_7": 3.203891623020172, |
| "epoch": 0.633, |
| "grad_norm": 20.174137035703254, |
| "kl_loss_13": 3206.8, |
| "kl_loss_26": 2268.4, |
| "kl_loss_39": 1251.1, |
| "kl_loss_7": 3762.4, |
| "learning_rate": 0.00030244586084303903, |
| "loss": 5352.9, |
| "step": 6330 |
| }, |
| { |
| "ce_loss_13": 2.9555823683738707, |
| "ce_loss_26": 2.501230263710022, |
| "ce_loss_39": 2.0173334002494814, |
| "ce_loss_52": 1.3891687452793122, |
| "ce_loss_7": 3.2268748760223387, |
| "epoch": 0.634, |
| "grad_norm": 20.047186620123167, |
| "kl_loss_13": 3264.8, |
| "kl_loss_26": 2316.6, |
| "kl_loss_39": 1274.1, |
| "kl_loss_7": 3843.2, |
| "learning_rate": 0.00030098929455206903, |
| "loss": 5365.4, |
| "step": 6340 |
| }, |
| { |
| "ce_loss_13": 2.9806570291519163, |
| "ce_loss_26": 2.533248084783554, |
| "ce_loss_39": 2.0557660490274428, |
| "ce_loss_52": 1.4183998316526414, |
| "ce_loss_7": 3.248631852865219, |
| "epoch": 0.635, |
| "grad_norm": 19.42792815463783, |
| "kl_loss_13": 3226.0, |
| "kl_loss_26": 2301.0, |
| "kl_loss_39": 1289.2, |
| "kl_loss_7": 3784.4, |
| "learning_rate": 0.00029953473229669324, |
| "loss": 5429.0, |
| "step": 6350 |
| }, |
| { |
| "ce_loss_13": 3.008418655395508, |
| "ce_loss_26": 2.5531763255596163, |
| "ce_loss_39": 2.069964846968651, |
| "ce_loss_52": 1.4419535219669342, |
| "ce_loss_7": 3.274876070022583, |
| "epoch": 0.636, |
| "grad_norm": 20.040461482265158, |
| "kl_loss_13": 3248.4, |
| "kl_loss_26": 2315.4, |
| "kl_loss_39": 1287.5, |
| "kl_loss_7": 3802.8, |
| "learning_rate": 0.00029808218872433767, |
| "loss": 5390.5, |
| "step": 6360 |
| }, |
| { |
| "ce_loss_13": 2.9624147057533263, |
| "ce_loss_26": 2.509669789671898, |
| "ce_loss_39": 2.017057329416275, |
| "ce_loss_52": 1.392769531905651, |
| "ce_loss_7": 3.2405923306941986, |
| "epoch": 0.637, |
| "grad_norm": 19.969518630784094, |
| "kl_loss_13": 3281.6, |
| "kl_loss_26": 2321.8, |
| "kl_loss_39": 1284.8, |
| "kl_loss_7": 3857.6, |
| "learning_rate": 0.0002966316784621, |
| "loss": 5344.4, |
| "step": 6370 |
| }, |
| { |
| "ce_loss_13": 2.9690242230892183, |
| "ce_loss_26": 2.5119642555713653, |
| "ce_loss_39": 2.022391200065613, |
| "ce_loss_52": 1.3881384432315826, |
| "ce_loss_7": 3.2436072409152983, |
| "epoch": 0.638, |
| "grad_norm": 19.443805471212187, |
| "kl_loss_13": 3260.4, |
| "kl_loss_26": 2314.8, |
| "kl_loss_39": 1281.4, |
| "kl_loss_7": 3835.6, |
| "learning_rate": 0.0002951832161166024, |
| "loss": 5333.0, |
| "step": 6380 |
| }, |
| { |
| "ce_loss_13": 3.019889771938324, |
| "ce_loss_26": 2.574093183875084, |
| "ce_loss_39": 2.085198149085045, |
| "ce_loss_52": 1.4531068801879883, |
| "ce_loss_7": 3.286811703443527, |
| "epoch": 0.639, |
| "grad_norm": 19.50308932074232, |
| "kl_loss_13": 3246.0, |
| "kl_loss_26": 2313.6, |
| "kl_loss_39": 1281.1, |
| "kl_loss_7": 3800.8, |
| "learning_rate": 0.0002937368162738445, |
| "loss": 5358.7, |
| "step": 6390 |
| }, |
| { |
| "ce_loss_13": 2.979416298866272, |
| "ce_loss_26": 2.5201680839061735, |
| "ce_loss_39": 2.0343465119600297, |
| "ce_loss_52": 1.4125050336122513, |
| "ce_loss_7": 3.2494026124477386, |
| "epoch": 0.64, |
| "grad_norm": 19.887826117374196, |
| "kl_loss_13": 3261.2, |
| "kl_loss_26": 2302.8, |
| "kl_loss_39": 1277.2, |
| "kl_loss_7": 3827.6, |
| "learning_rate": 0.0002922924934990568, |
| "loss": 5361.0, |
| "step": 6400 |
| }, |
| { |
| "ce_loss_13": 2.966394138336182, |
| "ce_loss_26": 2.5159747898578644, |
| "ce_loss_39": 2.0343314677476885, |
| "ce_loss_52": 1.3983295410871506, |
| "ce_loss_7": 3.2374337732791902, |
| "epoch": 0.641, |
| "grad_norm": 21.205292313379594, |
| "kl_loss_13": 3264.4, |
| "kl_loss_26": 2317.8, |
| "kl_loss_39": 1301.9, |
| "kl_loss_7": 3828.8, |
| "learning_rate": 0.0002908502623365536, |
| "loss": 5348.95, |
| "step": 6410 |
| }, |
| { |
| "ce_loss_13": 3.0186184704303742, |
| "ce_loss_26": 2.5620053589344023, |
| "ce_loss_39": 2.079856187105179, |
| "ce_loss_52": 1.437650018930435, |
| "ce_loss_7": 3.2890258550643923, |
| "epoch": 0.642, |
| "grad_norm": 20.335253629932936, |
| "kl_loss_13": 3264.4, |
| "kl_loss_26": 2325.4, |
| "kl_loss_39": 1302.1, |
| "kl_loss_7": 3830.0, |
| "learning_rate": 0.0002894101373095867, |
| "loss": 5303.0, |
| "step": 6420 |
| }, |
| { |
| "ce_loss_13": 3.0677368700504304, |
| "ce_loss_26": 2.6209602475166323, |
| "ce_loss_39": 2.1306197196245193, |
| "ce_loss_52": 1.4929826736450196, |
| "ce_loss_7": 3.3314808785915373, |
| "epoch": 0.643, |
| "grad_norm": 20.265575824381624, |
| "kl_loss_13": 3289.6, |
| "kl_loss_26": 2355.4, |
| "kl_loss_39": 1318.2, |
| "kl_loss_7": 3835.2, |
| "learning_rate": 0.00028797213292019926, |
| "loss": 5380.85, |
| "step": 6430 |
| }, |
| { |
| "ce_loss_13": 2.966922277212143, |
| "ce_loss_26": 2.51933411359787, |
| "ce_loss_39": 2.040864047408104, |
| "ce_loss_52": 1.4254619121551513, |
| "ce_loss_7": 3.232788211107254, |
| "epoch": 0.644, |
| "grad_norm": 19.660532004484757, |
| "kl_loss_13": 3209.2, |
| "kl_loss_26": 2273.0, |
| "kl_loss_39": 1260.8, |
| "kl_loss_7": 3770.0, |
| "learning_rate": 0.0002865362636490791, |
| "loss": 5309.3, |
| "step": 6440 |
| }, |
| { |
| "ce_loss_13": 3.0011440992355345, |
| "ce_loss_26": 2.5491324365139008, |
| "ce_loss_39": 2.057476672530174, |
| "ce_loss_52": 1.4156641319394112, |
| "ce_loss_7": 3.2698193073272703, |
| "epoch": 0.645, |
| "grad_norm": 20.528114278014176, |
| "kl_loss_13": 3284.8, |
| "kl_loss_26": 2344.4, |
| "kl_loss_39": 1307.4, |
| "kl_loss_7": 3853.6, |
| "learning_rate": 0.0002851025439554142, |
| "loss": 5329.3, |
| "step": 6450 |
| }, |
| { |
| "ce_loss_13": 3.0585977435112, |
| "ce_loss_26": 2.593171867728233, |
| "ce_loss_39": 2.1089387238025665, |
| "ce_loss_52": 1.4590320155024528, |
| "ce_loss_7": 3.3243721425533295, |
| "epoch": 0.646, |
| "grad_norm": 19.177895864651298, |
| "kl_loss_13": 3308.0, |
| "kl_loss_26": 2342.6, |
| "kl_loss_39": 1306.8, |
| "kl_loss_7": 3858.4, |
| "learning_rate": 0.00028367098827674573, |
| "loss": 5399.4, |
| "step": 6460 |
| }, |
| { |
| "ce_loss_13": 3.0068358182907104, |
| "ce_loss_26": 2.559490966796875, |
| "ce_loss_39": 2.0818791508674623, |
| "ce_loss_52": 1.4510639190673829, |
| "ce_loss_7": 3.2726912021636965, |
| "epoch": 0.647, |
| "grad_norm": 20.001351203231668, |
| "kl_loss_13": 3231.2, |
| "kl_loss_26": 2299.0, |
| "kl_loss_39": 1281.6, |
| "kl_loss_7": 3785.2, |
| "learning_rate": 0.00028224161102882397, |
| "loss": 5353.5, |
| "step": 6470 |
| }, |
| { |
| "ce_loss_13": 3.0042510509490965, |
| "ce_loss_26": 2.545697581768036, |
| "ce_loss_39": 2.0517130315303804, |
| "ce_loss_52": 1.4082761898636817, |
| "ce_loss_7": 3.2736884713172913, |
| "epoch": 0.648, |
| "grad_norm": 19.176592480543317, |
| "kl_loss_13": 3304.8, |
| "kl_loss_26": 2344.6, |
| "kl_loss_39": 1308.6, |
| "kl_loss_7": 3870.8, |
| "learning_rate": 0.00028081442660546124, |
| "loss": 5357.45, |
| "step": 6480 |
| }, |
| { |
| "ce_loss_13": 2.9604024648666383, |
| "ce_loss_26": 2.5067259430885316, |
| "ce_loss_39": 2.0217309921979902, |
| "ce_loss_52": 1.396033638715744, |
| "ce_loss_7": 3.229094612598419, |
| "epoch": 0.649, |
| "grad_norm": 20.132685826085943, |
| "kl_loss_13": 3230.8, |
| "kl_loss_26": 2292.8, |
| "kl_loss_39": 1272.4, |
| "kl_loss_7": 3788.8, |
| "learning_rate": 0.0002793894493783892, |
| "loss": 5337.2, |
| "step": 6490 |
| }, |
| { |
| "ce_loss_13": 3.038835954666138, |
| "ce_loss_26": 2.5828086912631987, |
| "ce_loss_39": 2.094580352306366, |
| "ce_loss_52": 1.4504825562238692, |
| "ce_loss_7": 3.3101982474327087, |
| "epoch": 0.65, |
| "grad_norm": 20.26204607317675, |
| "kl_loss_13": 3292.0, |
| "kl_loss_26": 2347.2, |
| "kl_loss_39": 1312.5, |
| "kl_loss_7": 3857.2, |
| "learning_rate": 0.0002779666936971129, |
| "loss": 5341.9, |
| "step": 6500 |
| }, |
| { |
| "ce_loss_13": 2.981122875213623, |
| "ce_loss_26": 2.5436301648616793, |
| "ce_loss_39": 2.06348480284214, |
| "ce_loss_52": 1.4388723462820052, |
| "ce_loss_7": 3.2536712110042574, |
| "epoch": 0.651, |
| "grad_norm": 19.527282619134503, |
| "kl_loss_13": 3203.2, |
| "kl_loss_26": 2279.0, |
| "kl_loss_39": 1260.5, |
| "kl_loss_7": 3763.6, |
| "learning_rate": 0.00027654617388876614, |
| "loss": 5303.55, |
| "step": 6510 |
| }, |
| { |
| "ce_loss_13": 2.985329604148865, |
| "ce_loss_26": 2.5429716140031813, |
| "ce_loss_39": 2.0698666363954543, |
| "ce_loss_52": 1.43270433396101, |
| "ce_loss_7": 3.2484419345855713, |
| "epoch": 0.652, |
| "grad_norm": 19.361430489337188, |
| "kl_loss_13": 3187.2, |
| "kl_loss_26": 2277.2, |
| "kl_loss_39": 1278.4, |
| "kl_loss_7": 3732.4, |
| "learning_rate": 0.0002751279042579672, |
| "loss": 5316.3, |
| "step": 6520 |
| }, |
| { |
| "ce_loss_13": 2.9774204194545746, |
| "ce_loss_26": 2.5208486020565033, |
| "ce_loss_39": 2.0273024052381516, |
| "ce_loss_52": 1.402597150206566, |
| "ce_loss_7": 3.249239844083786, |
| "epoch": 0.653, |
| "grad_norm": 19.527542591573294, |
| "kl_loss_13": 3262.4, |
| "kl_loss_26": 2313.8, |
| "kl_loss_39": 1281.3, |
| "kl_loss_7": 3834.0, |
| "learning_rate": 0.00027371189908667604, |
| "loss": 5336.1, |
| "step": 6530 |
| }, |
| { |
| "ce_loss_13": 3.003592276573181, |
| "ce_loss_26": 2.5556287467479706, |
| "ce_loss_39": 2.075370451807976, |
| "ce_loss_52": 1.4352567225694657, |
| "ce_loss_7": 3.271352219581604, |
| "epoch": 0.654, |
| "grad_norm": 19.924679150402795, |
| "kl_loss_13": 3252.0, |
| "kl_loss_26": 2323.8, |
| "kl_loss_39": 1299.3, |
| "kl_loss_7": 3811.2, |
| "learning_rate": 0.00027229817263404863, |
| "loss": 5288.8, |
| "step": 6540 |
| }, |
| { |
| "ce_loss_13": 2.995857471227646, |
| "ce_loss_26": 2.5282726138830185, |
| "ce_loss_39": 2.0456599622964857, |
| "ce_loss_52": 1.4150889962911606, |
| "ce_loss_7": 3.259833812713623, |
| "epoch": 0.655, |
| "grad_norm": 19.56917393810092, |
| "kl_loss_13": 3242.4, |
| "kl_loss_26": 2291.2, |
| "kl_loss_39": 1266.0, |
| "kl_loss_7": 3806.4, |
| "learning_rate": 0.0002708867391362948, |
| "loss": 5328.1, |
| "step": 6550 |
| }, |
| { |
| "ce_loss_13": 3.004334282875061, |
| "ce_loss_26": 2.553117799758911, |
| "ce_loss_39": 2.0656849920749663, |
| "ce_loss_52": 1.4380108654499053, |
| "ce_loss_7": 3.275963246822357, |
| "epoch": 0.656, |
| "grad_norm": 19.95397617147254, |
| "kl_loss_13": 3232.8, |
| "kl_loss_26": 2292.0, |
| "kl_loss_39": 1262.4, |
| "kl_loss_7": 3798.0, |
| "learning_rate": 0.0002694776128065345, |
| "loss": 5289.15, |
| "step": 6560 |
| }, |
| { |
| "ce_loss_13": 3.0339253902435304, |
| "ce_loss_26": 2.5763088524341584, |
| "ce_loss_39": 2.0788813173770904, |
| "ce_loss_52": 1.4449120432138443, |
| "ce_loss_7": 3.3040917217731476, |
| "epoch": 0.657, |
| "grad_norm": 20.212973328584127, |
| "kl_loss_13": 3268.4, |
| "kl_loss_26": 2314.6, |
| "kl_loss_39": 1279.8, |
| "kl_loss_7": 3840.8, |
| "learning_rate": 0.00026807080783465374, |
| "loss": 5293.2, |
| "step": 6570 |
| }, |
| { |
| "ce_loss_13": 3.032737511396408, |
| "ce_loss_26": 2.5693029284477236, |
| "ce_loss_39": 2.081709760427475, |
| "ce_loss_52": 1.4346143543720244, |
| "ce_loss_7": 3.308923304080963, |
| "epoch": 0.658, |
| "grad_norm": 19.880750606313658, |
| "kl_loss_13": 3330.4, |
| "kl_loss_26": 2365.8, |
| "kl_loss_39": 1316.5, |
| "kl_loss_7": 3904.0, |
| "learning_rate": 0.00026666633838716316, |
| "loss": 5330.1, |
| "step": 6580 |
| }, |
| { |
| "ce_loss_13": 3.0193063259124755, |
| "ce_loss_26": 2.576409709453583, |
| "ce_loss_39": 2.0992994725704195, |
| "ce_loss_52": 1.4646779403090477, |
| "ce_loss_7": 3.285114985704422, |
| "epoch": 0.659, |
| "grad_norm": 20.363457370030773, |
| "kl_loss_13": 3237.2, |
| "kl_loss_26": 2313.0, |
| "kl_loss_39": 1303.8, |
| "kl_loss_7": 3788.4, |
| "learning_rate": 0.00026526421860705474, |
| "loss": 5307.4, |
| "step": 6590 |
| }, |
| { |
| "ce_loss_13": 2.9948873639106752, |
| "ce_loss_26": 2.563684010505676, |
| "ce_loss_39": 2.0847960352897643, |
| "ce_loss_52": 1.4657811507582665, |
| "ce_loss_7": 3.2635042905807494, |
| "epoch": 0.66, |
| "grad_norm": 20.73724151705583, |
| "kl_loss_13": 3176.8, |
| "kl_loss_26": 2267.2, |
| "kl_loss_39": 1259.0, |
| "kl_loss_7": 3732.8, |
| "learning_rate": 0.0002638644626136587, |
| "loss": 5326.5, |
| "step": 6600 |
| }, |
| { |
| "ce_loss_13": 3.007009822130203, |
| "ce_loss_26": 2.5652425408363344, |
| "ce_loss_39": 2.085008403658867, |
| "ce_loss_52": 1.4418020695447922, |
| "ce_loss_7": 3.2768814861774445, |
| "epoch": 0.661, |
| "grad_norm": 19.59865222649701, |
| "kl_loss_13": 3192.4, |
| "kl_loss_26": 2272.6, |
| "kl_loss_39": 1265.8, |
| "kl_loss_7": 3758.8, |
| "learning_rate": 0.00026246708450250255, |
| "loss": 5252.1, |
| "step": 6610 |
| }, |
| { |
| "ce_loss_13": 3.0007415533065798, |
| "ce_loss_26": 2.57885719537735, |
| "ce_loss_39": 2.1149094998836517, |
| "ce_loss_52": 1.4889407217502595, |
| "ce_loss_7": 3.26429398059845, |
| "epoch": 0.662, |
| "grad_norm": 19.791378915341742, |
| "kl_loss_13": 3154.4, |
| "kl_loss_26": 2261.8, |
| "kl_loss_39": 1265.9, |
| "kl_loss_7": 3702.8, |
| "learning_rate": 0.00026107209834516854, |
| "loss": 5253.75, |
| "step": 6620 |
| }, |
| { |
| "ce_loss_13": 3.023489362001419, |
| "ce_loss_26": 2.565022760629654, |
| "ce_loss_39": 2.0684966832399367, |
| "ce_loss_52": 1.4181891351938247, |
| "ce_loss_7": 3.295238083600998, |
| "epoch": 0.663, |
| "grad_norm": 18.83951798457028, |
| "kl_loss_13": 3308.8, |
| "kl_loss_26": 2356.8, |
| "kl_loss_39": 1317.4, |
| "kl_loss_7": 3874.4, |
| "learning_rate": 0.0002596795181891514, |
| "loss": 5303.2, |
| "step": 6630 |
| }, |
| { |
| "ce_loss_13": 2.948693299293518, |
| "ce_loss_26": 2.511568069458008, |
| "ce_loss_39": 2.0302646070718766, |
| "ce_loss_52": 1.40511611700058, |
| "ce_loss_7": 3.215245670080185, |
| "epoch": 0.664, |
| "grad_norm": 19.938510106571005, |
| "kl_loss_13": 3190.8, |
| "kl_loss_26": 2276.8, |
| "kl_loss_39": 1269.2, |
| "kl_loss_7": 3747.6, |
| "learning_rate": 0.000258289358057718, |
| "loss": 5355.55, |
| "step": 6640 |
| }, |
| { |
| "ce_loss_13": 2.964204251766205, |
| "ce_loss_26": 2.5196115612983703, |
| "ce_loss_39": 2.032116264104843, |
| "ce_loss_52": 1.397587490081787, |
| "ce_loss_7": 3.234144788980484, |
| "epoch": 0.665, |
| "grad_norm": 19.688819745922057, |
| "kl_loss_13": 3241.6, |
| "kl_loss_26": 2310.8, |
| "kl_loss_39": 1285.8, |
| "kl_loss_7": 3812.0, |
| "learning_rate": 0.0002569016319497657, |
| "loss": 5275.7, |
| "step": 6650 |
| }, |
| { |
| "ce_loss_13": 3.0219891548156737, |
| "ce_loss_26": 2.568303269147873, |
| "ce_loss_39": 2.0779166162014007, |
| "ce_loss_52": 1.4420379608869554, |
| "ce_loss_7": 3.2859797060489653, |
| "epoch": 0.666, |
| "grad_norm": 19.909960186263373, |
| "kl_loss_13": 3259.2, |
| "kl_loss_26": 2308.2, |
| "kl_loss_39": 1277.3, |
| "kl_loss_7": 3822.8, |
| "learning_rate": 0.00025551635383968066, |
| "loss": 5336.5, |
| "step": 6660 |
| }, |
| { |
| "ce_loss_13": 2.994647592306137, |
| "ce_loss_26": 2.5398232668638228, |
| "ce_loss_39": 2.0492498099803926, |
| "ce_loss_52": 1.434773786365986, |
| "ce_loss_7": 3.264130574464798, |
| "epoch": 0.667, |
| "grad_norm": 20.004167920061033, |
| "kl_loss_13": 3214.0, |
| "kl_loss_26": 2272.2, |
| "kl_loss_39": 1256.6, |
| "kl_loss_7": 3778.0, |
| "learning_rate": 0.00025413353767719804, |
| "loss": 5257.5, |
| "step": 6670 |
| }, |
| { |
| "ce_loss_13": 2.96993693113327, |
| "ce_loss_26": 2.533370888233185, |
| "ce_loss_39": 2.055029663443565, |
| "ce_loss_52": 1.4544675678014756, |
| "ce_loss_7": 3.2342797338962557, |
| "epoch": 0.668, |
| "grad_norm": 20.025849444564383, |
| "kl_loss_13": 3142.4, |
| "kl_loss_26": 2230.8, |
| "kl_loss_39": 1222.0, |
| "kl_loss_7": 3698.4, |
| "learning_rate": 0.0002527531973872617, |
| "loss": 5248.5, |
| "step": 6680 |
| }, |
| { |
| "ce_loss_13": 2.9402814984321592, |
| "ce_loss_26": 2.4974717676639555, |
| "ce_loss_39": 2.0207353264093397, |
| "ce_loss_52": 1.4053010821342469, |
| "ce_loss_7": 3.211796945333481, |
| "epoch": 0.669, |
| "grad_norm": 20.57900906104847, |
| "kl_loss_13": 3184.0, |
| "kl_loss_26": 2262.6, |
| "kl_loss_39": 1252.7, |
| "kl_loss_7": 3753.6, |
| "learning_rate": 0.0002513753468698826, |
| "loss": 5296.7, |
| "step": 6690 |
| }, |
| { |
| "ce_loss_13": 3.049540191888809, |
| "ce_loss_26": 2.5854183793067933, |
| "ce_loss_39": 2.092196524143219, |
| "ce_loss_52": 1.4520713061094284, |
| "ce_loss_7": 3.3153574585914614, |
| "epoch": 0.67, |
| "grad_norm": 20.064255813843516, |
| "kl_loss_13": 3277.6, |
| "kl_loss_26": 2320.0, |
| "kl_loss_39": 1288.6, |
| "kl_loss_7": 3837.2, |
| "learning_rate": 0.0002500000000000001, |
| "loss": 5320.3, |
| "step": 6700 |
| }, |
| { |
| "ce_loss_13": 2.944129317998886, |
| "ce_loss_26": 2.503596860170364, |
| "ce_loss_39": 2.0286095440387726, |
| "ce_loss_52": 1.4282706409692765, |
| "ce_loss_7": 3.2095106482505797, |
| "epoch": 0.671, |
| "grad_norm": 20.12603370941262, |
| "kl_loss_13": 3178.8, |
| "kl_loss_26": 2257.6, |
| "kl_loss_39": 1240.0, |
| "kl_loss_7": 3734.8, |
| "learning_rate": 0.0002486271706273421, |
| "loss": 5232.2, |
| "step": 6710 |
| }, |
| { |
| "ce_loss_13": 2.9713922500610352, |
| "ce_loss_26": 2.5247735172510146, |
| "ce_loss_39": 2.050862190127373, |
| "ce_loss_52": 1.449659252166748, |
| "ce_loss_7": 3.2344084203243257, |
| "epoch": 0.672, |
| "grad_norm": 20.72667693004533, |
| "kl_loss_13": 3149.6, |
| "kl_loss_26": 2225.2, |
| "kl_loss_39": 1217.0, |
| "kl_loss_7": 3703.2, |
| "learning_rate": 0.0002472568725762853, |
| "loss": 5273.45, |
| "step": 6720 |
| }, |
| { |
| "ce_loss_13": 2.9712363362312315, |
| "ce_loss_26": 2.5258888751268387, |
| "ce_loss_39": 2.038512706756592, |
| "ce_loss_52": 1.4098822742700576, |
| "ce_loss_7": 3.238377648591995, |
| "epoch": 0.673, |
| "grad_norm": 19.364942978516346, |
| "kl_loss_13": 3241.6, |
| "kl_loss_26": 2313.0, |
| "kl_loss_39": 1283.6, |
| "kl_loss_7": 3804.0, |
| "learning_rate": 0.00024588911964571554, |
| "loss": 5264.25, |
| "step": 6730 |
| }, |
| { |
| "ce_loss_13": 3.0029661655426025, |
| "ce_loss_26": 2.5618400514125823, |
| "ce_loss_39": 2.0779304295778274, |
| "ce_loss_52": 1.4597969472408294, |
| "ce_loss_7": 3.2699401795864107, |
| "epoch": 0.674, |
| "grad_norm": 19.386861595432553, |
| "kl_loss_13": 3201.2, |
| "kl_loss_26": 2283.2, |
| "kl_loss_39": 1260.7, |
| "kl_loss_7": 3760.0, |
| "learning_rate": 0.00024452392560888974, |
| "loss": 5256.1, |
| "step": 6740 |
| }, |
| { |
| "ce_loss_13": 2.9799251735210417, |
| "ce_loss_26": 2.5340505450963975, |
| "ce_loss_39": 2.049144822359085, |
| "ce_loss_52": 1.4137116000056267, |
| "ce_loss_7": 3.257823657989502, |
| "epoch": 0.675, |
| "grad_norm": 19.909504320065746, |
| "kl_loss_13": 3246.4, |
| "kl_loss_26": 2320.6, |
| "kl_loss_39": 1295.2, |
| "kl_loss_7": 3816.4, |
| "learning_rate": 0.00024316130421329695, |
| "loss": 5221.1, |
| "step": 6750 |
| }, |
| { |
| "ce_loss_13": 2.9629843533039093, |
| "ce_loss_26": 2.524860253930092, |
| "ce_loss_39": 2.042747235298157, |
| "ce_loss_52": 1.4334406018257142, |
| "ce_loss_7": 3.2345532715320586, |
| "epoch": 0.676, |
| "grad_norm": 20.369497806638545, |
| "kl_loss_13": 3186.8, |
| "kl_loss_26": 2271.6, |
| "kl_loss_39": 1245.2, |
| "kl_loss_7": 3754.4, |
| "learning_rate": 0.00024180126918051909, |
| "loss": 5236.3, |
| "step": 6760 |
| }, |
| { |
| "ce_loss_13": 2.9732554376125337, |
| "ce_loss_26": 2.5324235647916793, |
| "ce_loss_39": 2.0435447841882706, |
| "ce_loss_52": 1.4181665301322937, |
| "ce_loss_7": 3.2470718741416933, |
| "epoch": 0.677, |
| "grad_norm": 20.49515152965971, |
| "kl_loss_13": 3219.2, |
| "kl_loss_26": 2299.4, |
| "kl_loss_39": 1278.7, |
| "kl_loss_7": 3786.8, |
| "learning_rate": 0.00024044383420609406, |
| "loss": 5319.65, |
| "step": 6770 |
| }, |
| { |
| "ce_loss_13": 2.9884051978588104, |
| "ce_loss_26": 2.552768051624298, |
| "ce_loss_39": 2.0788974314928055, |
| "ce_loss_52": 1.460537651181221, |
| "ce_loss_7": 3.251654601097107, |
| "epoch": 0.678, |
| "grad_norm": 19.11355169384201, |
| "kl_loss_13": 3167.6, |
| "kl_loss_26": 2248.8, |
| "kl_loss_39": 1249.3, |
| "kl_loss_7": 3718.4, |
| "learning_rate": 0.00023908901295937712, |
| "loss": 5270.2, |
| "step": 6780 |
| }, |
| { |
| "ce_loss_13": 2.974563705921173, |
| "ce_loss_26": 2.536205679178238, |
| "ce_loss_39": 2.059639421105385, |
| "ce_loss_52": 1.4519853800535203, |
| "ce_loss_7": 3.2307840466499327, |
| "epoch": 0.679, |
| "grad_norm": 19.49943649381752, |
| "kl_loss_13": 3131.2, |
| "kl_loss_26": 2224.6, |
| "kl_loss_39": 1227.3, |
| "kl_loss_7": 3672.8, |
| "learning_rate": 0.00023773681908340283, |
| "loss": 5293.35, |
| "step": 6790 |
| }, |
| { |
| "ce_loss_13": 2.961294001340866, |
| "ce_loss_26": 2.5129422783851623, |
| "ce_loss_39": 2.0338284403085707, |
| "ce_loss_52": 1.409618005156517, |
| "ce_loss_7": 3.226576966047287, |
| "epoch": 0.68, |
| "grad_norm": 19.714496760455777, |
| "kl_loss_13": 3218.4, |
| "kl_loss_26": 2291.6, |
| "kl_loss_39": 1267.1, |
| "kl_loss_7": 3768.8, |
| "learning_rate": 0.00023638726619474876, |
| "loss": 5250.5, |
| "step": 6800 |
| }, |
| { |
| "ce_loss_13": 3.0715033173561097, |
| "ce_loss_26": 2.626942425966263, |
| "ce_loss_39": 2.1481954157352448, |
| "ce_loss_52": 1.5187551528215408, |
| "ce_loss_7": 3.336813968420029, |
| "epoch": 0.681, |
| "grad_norm": 19.805927066338636, |
| "kl_loss_13": 3238.0, |
| "kl_loss_26": 2300.0, |
| "kl_loss_39": 1278.2, |
| "kl_loss_7": 3790.0, |
| "learning_rate": 0.0002350403678833976, |
| "loss": 5234.9, |
| "step": 6810 |
| }, |
| { |
| "ce_loss_13": 2.957927519083023, |
| "ce_loss_26": 2.5146523237228395, |
| "ce_loss_39": 2.0373351722955704, |
| "ce_loss_52": 1.42108353972435, |
| "ce_loss_7": 3.223799991607666, |
| "epoch": 0.682, |
| "grad_norm": 20.479900710283268, |
| "kl_loss_13": 3202.4, |
| "kl_loss_26": 2281.6, |
| "kl_loss_39": 1265.3, |
| "kl_loss_7": 3751.2, |
| "learning_rate": 0.00023369613771260007, |
| "loss": 5258.8, |
| "step": 6820 |
| }, |
| { |
| "ce_loss_13": 2.9837976515293123, |
| "ce_loss_26": 2.5472346246242523, |
| "ce_loss_39": 2.0789157301187515, |
| "ce_loss_52": 1.4713156789541244, |
| "ce_loss_7": 3.2464165806770326, |
| "epoch": 0.683, |
| "grad_norm": 19.479700971519588, |
| "kl_loss_13": 3160.0, |
| "kl_loss_26": 2251.4, |
| "kl_loss_39": 1251.2, |
| "kl_loss_7": 3708.8, |
| "learning_rate": 0.00023235458921873925, |
| "loss": 5205.3, |
| "step": 6830 |
| }, |
| { |
| "ce_loss_13": 2.9887463808059693, |
| "ce_loss_26": 2.54727523624897, |
| "ce_loss_39": 2.0671548724174498, |
| "ce_loss_52": 1.429240283370018, |
| "ce_loss_7": 3.2527148902416227, |
| "epoch": 0.684, |
| "grad_norm": 19.517242754730322, |
| "kl_loss_13": 3196.4, |
| "kl_loss_26": 2285.2, |
| "kl_loss_39": 1272.4, |
| "kl_loss_7": 3748.4, |
| "learning_rate": 0.0002310157359111938, |
| "loss": 5234.8, |
| "step": 6840 |
| }, |
| { |
| "ce_loss_13": 2.916054058074951, |
| "ce_loss_26": 2.4669763922691343, |
| "ce_loss_39": 1.992121958732605, |
| "ce_loss_52": 1.390305233001709, |
| "ce_loss_7": 3.179300290346146, |
| "epoch": 0.685, |
| "grad_norm": 20.2160173629293, |
| "kl_loss_13": 3168.4, |
| "kl_loss_26": 2234.6, |
| "kl_loss_39": 1230.6, |
| "kl_loss_7": 3719.2, |
| "learning_rate": 0.0002296795912722014, |
| "loss": 5214.55, |
| "step": 6850 |
| }, |
| { |
| "ce_loss_13": 2.9123338878154756, |
| "ce_loss_26": 2.4650843650102616, |
| "ce_loss_39": 1.9903331339359283, |
| "ce_loss_52": 1.3830609425902367, |
| "ce_loss_7": 3.174854850769043, |
| "epoch": 0.686, |
| "grad_norm": 19.3573154940235, |
| "kl_loss_13": 3154.0, |
| "kl_loss_26": 2228.4, |
| "kl_loss_39": 1226.6, |
| "kl_loss_7": 3712.0, |
| "learning_rate": 0.0002283461687567236, |
| "loss": 5186.2, |
| "step": 6860 |
| }, |
| { |
| "ce_loss_13": 2.9503078758716583, |
| "ce_loss_26": 2.5050206154584886, |
| "ce_loss_39": 2.0353992134332657, |
| "ce_loss_52": 1.4260726869106293, |
| "ce_loss_7": 3.2181301593780516, |
| "epoch": 0.687, |
| "grad_norm": 19.436405773017547, |
| "kl_loss_13": 3173.6, |
| "kl_loss_26": 2249.4, |
| "kl_loss_39": 1239.4, |
| "kl_loss_7": 3725.2, |
| "learning_rate": 0.00022701548179231045, |
| "loss": 5180.9, |
| "step": 6870 |
| }, |
| { |
| "ce_loss_13": 2.989210718870163, |
| "ce_loss_26": 2.5456956744194033, |
| "ce_loss_39": 2.07299542427063, |
| "ce_loss_52": 1.45269995033741, |
| "ce_loss_7": 3.25973704457283, |
| "epoch": 0.688, |
| "grad_norm": 19.21415326438658, |
| "kl_loss_13": 3172.0, |
| "kl_loss_26": 2245.6, |
| "kl_loss_39": 1252.7, |
| "kl_loss_7": 3735.6, |
| "learning_rate": 0.00022568754377896516, |
| "loss": 5258.6, |
| "step": 6880 |
| }, |
| { |
| "ce_loss_13": 2.9914295256137846, |
| "ce_loss_26": 2.5424347430467606, |
| "ce_loss_39": 2.0552540928125382, |
| "ce_loss_52": 1.4221117675304413, |
| "ce_loss_7": 3.2662317156791687, |
| "epoch": 0.689, |
| "grad_norm": 19.29554445155232, |
| "kl_loss_13": 3243.2, |
| "kl_loss_26": 2310.8, |
| "kl_loss_39": 1282.1, |
| "kl_loss_7": 3819.2, |
| "learning_rate": 0.00022436236808900844, |
| "loss": 5241.3, |
| "step": 6890 |
| }, |
| { |
| "ce_loss_13": 2.9910283386707306, |
| "ce_loss_26": 2.550189185142517, |
| "ce_loss_39": 2.07351476252079, |
| "ce_loss_52": 1.4624590903520585, |
| "ce_loss_7": 3.2594147861003875, |
| "epoch": 0.69, |
| "grad_norm": 19.896412241424787, |
| "kl_loss_13": 3197.2, |
| "kl_loss_26": 2269.0, |
| "kl_loss_39": 1261.5, |
| "kl_loss_7": 3754.8, |
| "learning_rate": 0.00022303996806694487, |
| "loss": 5245.0, |
| "step": 6900 |
| }, |
| { |
| "ce_loss_13": 2.9984730899333956, |
| "ce_loss_26": 2.563878893852234, |
| "ce_loss_39": 2.073988217115402, |
| "ce_loss_52": 1.4545040100812912, |
| "ce_loss_7": 3.266710376739502, |
| "epoch": 0.691, |
| "grad_norm": 18.309749884904267, |
| "kl_loss_13": 3220.8, |
| "kl_loss_26": 2301.4, |
| "kl_loss_39": 1269.6, |
| "kl_loss_7": 3776.4, |
| "learning_rate": 0.00022172035702932823, |
| "loss": 5221.5, |
| "step": 6910 |
| }, |
| { |
| "ce_loss_13": 2.9589293122291567, |
| "ce_loss_26": 2.5164781630039217, |
| "ce_loss_39": 2.045464962720871, |
| "ce_loss_52": 1.4442868947982788, |
| "ce_loss_7": 3.2192385673522947, |
| "epoch": 0.692, |
| "grad_norm": 19.310059013922874, |
| "kl_loss_13": 3138.4, |
| "kl_loss_26": 2227.4, |
| "kl_loss_39": 1231.9, |
| "kl_loss_7": 3680.8, |
| "learning_rate": 0.00022040354826462666, |
| "loss": 5190.7, |
| "step": 6920 |
| }, |
| { |
| "ce_loss_13": 2.947145390510559, |
| "ce_loss_26": 2.5085155785083773, |
| "ce_loss_39": 2.0424805164337156, |
| "ce_loss_52": 1.443164300918579, |
| "ce_loss_7": 3.206177592277527, |
| "epoch": 0.693, |
| "grad_norm": 20.439949582745886, |
| "kl_loss_13": 3135.6, |
| "kl_loss_26": 2219.4, |
| "kl_loss_39": 1222.2, |
| "kl_loss_7": 3681.2, |
| "learning_rate": 0.0002190895550330899, |
| "loss": 5252.8, |
| "step": 6930 |
| }, |
| { |
| "ce_loss_13": 2.951523560285568, |
| "ce_loss_26": 2.4968682497739794, |
| "ce_loss_39": 2.021780180931091, |
| "ce_loss_52": 1.4140155717730523, |
| "ce_loss_7": 3.2223109781742094, |
| "epoch": 0.694, |
| "grad_norm": 19.632700567273133, |
| "kl_loss_13": 3171.6, |
| "kl_loss_26": 2224.6, |
| "kl_loss_39": 1221.6, |
| "kl_loss_7": 3730.8, |
| "learning_rate": 0.00021777839056661552, |
| "loss": 5204.95, |
| "step": 6940 |
| }, |
| { |
| "ce_loss_13": 2.9996495246887207, |
| "ce_loss_26": 2.545914036035538, |
| "ce_loss_39": 2.0629312634468078, |
| "ce_loss_52": 1.4569539099931716, |
| "ce_loss_7": 3.260616344213486, |
| "epoch": 0.695, |
| "grad_norm": 19.802798519542876, |
| "kl_loss_13": 3188.0, |
| "kl_loss_26": 2252.8, |
| "kl_loss_39": 1238.7, |
| "kl_loss_7": 3738.8, |
| "learning_rate": 0.0002164700680686147, |
| "loss": 5219.0, |
| "step": 6950 |
| }, |
| { |
| "ce_loss_13": 2.965209072828293, |
| "ce_loss_26": 2.523294594883919, |
| "ce_loss_39": 2.0499251425266265, |
| "ce_loss_52": 1.450638398528099, |
| "ce_loss_7": 3.224820476770401, |
| "epoch": 0.696, |
| "grad_norm": 19.91434345309615, |
| "kl_loss_13": 3134.0, |
| "kl_loss_26": 2218.0, |
| "kl_loss_39": 1215.6, |
| "kl_loss_7": 3669.6, |
| "learning_rate": 0.0002151646007138806, |
| "loss": 5247.2, |
| "step": 6960 |
| }, |
| { |
| "ce_loss_13": 2.989179176092148, |
| "ce_loss_26": 2.5318516552448274, |
| "ce_loss_39": 2.049258217215538, |
| "ce_loss_52": 1.4338771492242812, |
| "ce_loss_7": 3.2527658343315125, |
| "epoch": 0.697, |
| "grad_norm": 19.22493467335688, |
| "kl_loss_13": 3224.0, |
| "kl_loss_26": 2281.6, |
| "kl_loss_39": 1260.3, |
| "kl_loss_7": 3768.8, |
| "learning_rate": 0.00021386200164845526, |
| "loss": 5208.2, |
| "step": 6970 |
| }, |
| { |
| "ce_loss_13": 2.9675691723823547, |
| "ce_loss_26": 2.524250292778015, |
| "ce_loss_39": 2.0449122846126557, |
| "ce_loss_52": 1.4266346216201782, |
| "ce_loss_7": 3.238176566362381, |
| "epoch": 0.698, |
| "grad_norm": 19.28582875590594, |
| "kl_loss_13": 3208.4, |
| "kl_loss_26": 2279.6, |
| "kl_loss_39": 1258.2, |
| "kl_loss_7": 3762.8, |
| "learning_rate": 0.0002125622839894964, |
| "loss": 5196.95, |
| "step": 6980 |
| }, |
| { |
| "ce_loss_13": 3.08748916387558, |
| "ce_loss_26": 2.6341689109802244, |
| "ce_loss_39": 2.131711891293526, |
| "ce_loss_52": 1.4756682693958283, |
| "ce_loss_7": 3.3573212742805483, |
| "epoch": 0.699, |
| "grad_norm": 19.570816095455605, |
| "kl_loss_13": 3319.2, |
| "kl_loss_26": 2375.0, |
| "kl_loss_39": 1332.9, |
| "kl_loss_7": 3878.0, |
| "learning_rate": 0.00021126546082514663, |
| "loss": 5264.6, |
| "step": 6990 |
| }, |
| { |
| "ce_loss_13": 2.9623800575733186, |
| "ce_loss_26": 2.528759664297104, |
| "ce_loss_39": 2.0456511676311493, |
| "ce_loss_52": 1.4381566911935806, |
| "ce_loss_7": 3.2283441185951234, |
| "epoch": 0.7, |
| "grad_norm": 20.0544191043279, |
| "kl_loss_13": 3144.4, |
| "kl_loss_26": 2245.4, |
| "kl_loss_39": 1237.7, |
| "kl_loss_7": 3701.6, |
| "learning_rate": 0.00020997154521440098, |
| "loss": 5184.75, |
| "step": 7000 |
| }, |
| { |
| "ce_loss_13": 2.9258078813552855, |
| "ce_loss_26": 2.5000339925289152, |
| "ce_loss_39": 2.030132883787155, |
| "ce_loss_52": 1.432218487560749, |
| "ce_loss_7": 3.1859234631061555, |
| "epoch": 0.701, |
| "grad_norm": 20.032367176949514, |
| "kl_loss_13": 3112.4, |
| "kl_loss_26": 2211.2, |
| "kl_loss_39": 1212.0, |
| "kl_loss_7": 3653.6, |
| "learning_rate": 0.0002086805501869749, |
| "loss": 5163.7, |
| "step": 7010 |
| }, |
| { |
| "ce_loss_13": 2.9877611219882967, |
| "ce_loss_26": 2.5467711210250856, |
| "ce_loss_39": 2.0729553580284117, |
| "ce_loss_52": 1.4694935828447342, |
| "ce_loss_7": 3.2482242822647094, |
| "epoch": 0.702, |
| "grad_norm": 19.48021495423088, |
| "kl_loss_13": 3139.2, |
| "kl_loss_26": 2221.2, |
| "kl_loss_39": 1226.6, |
| "kl_loss_7": 3688.4, |
| "learning_rate": 0.0002073924887431744, |
| "loss": 5172.1, |
| "step": 7020 |
| }, |
| { |
| "ce_loss_13": 2.908235615491867, |
| "ce_loss_26": 2.477229207754135, |
| "ce_loss_39": 2.0130053520202638, |
| "ce_loss_52": 1.4191944628953934, |
| "ce_loss_7": 3.1711674451828005, |
| "epoch": 0.703, |
| "grad_norm": 19.67889818109448, |
| "kl_loss_13": 3069.6, |
| "kl_loss_26": 2175.0, |
| "kl_loss_39": 1200.3, |
| "kl_loss_7": 3607.2, |
| "learning_rate": 0.00020610737385376348, |
| "loss": 5178.0, |
| "step": 7030 |
| }, |
| { |
| "ce_loss_13": 2.925868648290634, |
| "ce_loss_26": 2.4821185052394865, |
| "ce_loss_39": 2.013263535499573, |
| "ce_loss_52": 1.407904815673828, |
| "ce_loss_7": 3.184937173128128, |
| "epoch": 0.704, |
| "grad_norm": 19.315710978547724, |
| "kl_loss_13": 3152.4, |
| "kl_loss_26": 2238.2, |
| "kl_loss_39": 1233.7, |
| "kl_loss_7": 3694.8, |
| "learning_rate": 0.00020482521845983521, |
| "loss": 5182.5, |
| "step": 7040 |
| }, |
| { |
| "ce_loss_13": 2.978561645746231, |
| "ce_loss_26": 2.537975686788559, |
| "ce_loss_39": 2.068689134716988, |
| "ce_loss_52": 1.449235063791275, |
| "ce_loss_7": 3.239302319288254, |
| "epoch": 0.705, |
| "grad_norm": 20.022921411442997, |
| "kl_loss_13": 3163.2, |
| "kl_loss_26": 2247.8, |
| "kl_loss_39": 1249.9, |
| "kl_loss_7": 3709.6, |
| "learning_rate": 0.00020354603547267987, |
| "loss": 5191.65, |
| "step": 7050 |
| }, |
| { |
| "ce_loss_13": 2.926995551586151, |
| "ce_loss_26": 2.4728329688310624, |
| "ce_loss_39": 2.000428321957588, |
| "ce_loss_52": 1.4053510591387748, |
| "ce_loss_7": 3.1873682618141173, |
| "epoch": 0.706, |
| "grad_norm": 20.203929178538456, |
| "kl_loss_13": 3170.4, |
| "kl_loss_26": 2222.2, |
| "kl_loss_39": 1210.8, |
| "kl_loss_7": 3714.0, |
| "learning_rate": 0.00020226983777365604, |
| "loss": 5154.3, |
| "step": 7060 |
| }, |
| { |
| "ce_loss_13": 2.9693270325660706, |
| "ce_loss_26": 2.520361030101776, |
| "ce_loss_39": 2.040296342968941, |
| "ce_loss_52": 1.4353806316852569, |
| "ce_loss_7": 3.2412941575050356, |
| "epoch": 0.707, |
| "grad_norm": 19.721352799273095, |
| "kl_loss_13": 3196.4, |
| "kl_loss_26": 2259.8, |
| "kl_loss_39": 1250.8, |
| "kl_loss_7": 3758.8, |
| "learning_rate": 0.00020099663821406056, |
| "loss": 5217.7, |
| "step": 7070 |
| }, |
| { |
| "ce_loss_13": 2.9820376515388487, |
| "ce_loss_26": 2.5476091861724854, |
| "ce_loss_39": 2.072761395573616, |
| "ce_loss_52": 1.4543047964572906, |
| "ce_loss_7": 3.2506080687046053, |
| "epoch": 0.708, |
| "grad_norm": 20.324232804485565, |
| "kl_loss_13": 3159.6, |
| "kl_loss_26": 2249.8, |
| "kl_loss_39": 1247.5, |
| "kl_loss_7": 3714.4, |
| "learning_rate": 0.00019972644961499853, |
| "loss": 5197.1, |
| "step": 7080 |
| }, |
| { |
| "ce_loss_13": 2.9332118809223173, |
| "ce_loss_26": 2.488256406784058, |
| "ce_loss_39": 2.0110603511333465, |
| "ce_loss_52": 1.4073437690734862, |
| "ce_loss_7": 3.202910542488098, |
| "epoch": 0.709, |
| "grad_norm": 19.810367107777207, |
| "kl_loss_13": 3178.8, |
| "kl_loss_26": 2255.4, |
| "kl_loss_39": 1244.9, |
| "kl_loss_7": 3745.2, |
| "learning_rate": 0.00019845928476725522, |
| "loss": 5159.15, |
| "step": 7090 |
| }, |
| { |
| "ce_loss_13": 2.963654935359955, |
| "ce_loss_26": 2.530976951122284, |
| "ce_loss_39": 2.0635117918252943, |
| "ce_loss_52": 1.466596108675003, |
| "ce_loss_7": 3.219101697206497, |
| "epoch": 0.71, |
| "grad_norm": 20.1039471333501, |
| "kl_loss_13": 3130.4, |
| "kl_loss_26": 2221.8, |
| "kl_loss_39": 1222.3, |
| "kl_loss_7": 3668.4, |
| "learning_rate": 0.00019719515643116677, |
| "loss": 5138.7, |
| "step": 7100 |
| }, |
| { |
| "ce_loss_13": 2.9432359755039217, |
| "ce_loss_26": 2.497118225693703, |
| "ce_loss_39": 2.0123680919408797, |
| "ce_loss_52": 1.3938605546951295, |
| "ce_loss_7": 3.214134621620178, |
| "epoch": 0.711, |
| "grad_norm": 20.875475364068677, |
| "kl_loss_13": 3169.6, |
| "kl_loss_26": 2236.6, |
| "kl_loss_39": 1232.8, |
| "kl_loss_7": 3728.8, |
| "learning_rate": 0.0001959340773364911, |
| "loss": 5177.25, |
| "step": 7110 |
| }, |
| { |
| "ce_loss_13": 2.937902510166168, |
| "ce_loss_26": 2.5005611896514894, |
| "ce_loss_39": 2.025138959288597, |
| "ce_loss_52": 1.4121045261621474, |
| "ce_loss_7": 3.21354022026062, |
| "epoch": 0.712, |
| "grad_norm": 19.300987998871754, |
| "kl_loss_13": 3168.4, |
| "kl_loss_26": 2254.8, |
| "kl_loss_39": 1247.8, |
| "kl_loss_7": 3737.2, |
| "learning_rate": 0.0001946760601822809, |
| "loss": 5183.35, |
| "step": 7120 |
| }, |
| { |
| "ce_loss_13": 2.9532769322395325, |
| "ce_loss_26": 2.5094838380813598, |
| "ce_loss_39": 2.042011481523514, |
| "ce_loss_52": 1.4359831362962723, |
| "ce_loss_7": 3.2172477781772613, |
| "epoch": 0.713, |
| "grad_norm": 19.806306622567096, |
| "kl_loss_13": 3130.8, |
| "kl_loss_26": 2213.4, |
| "kl_loss_39": 1210.7, |
| "kl_loss_7": 3690.4, |
| "learning_rate": 0.00019342111763675512, |
| "loss": 5121.55, |
| "step": 7130 |
| }, |
| { |
| "ce_loss_13": 2.9316843450069427, |
| "ce_loss_26": 2.4771865159273148, |
| "ce_loss_39": 1.9945536375045776, |
| "ce_loss_52": 1.3953458324074746, |
| "ce_loss_7": 3.1933025121688843, |
| "epoch": 0.714, |
| "grad_norm": 19.89107625803987, |
| "kl_loss_13": 3143.2, |
| "kl_loss_26": 2214.4, |
| "kl_loss_39": 1207.5, |
| "kl_loss_7": 3687.6, |
| "learning_rate": 0.00019216926233717085, |
| "loss": 5175.1, |
| "step": 7140 |
| }, |
| { |
| "ce_loss_13": 2.9416719019412993, |
| "ce_loss_26": 2.4982976377010346, |
| "ce_loss_39": 2.018396332859993, |
| "ce_loss_52": 1.4147447228431702, |
| "ce_loss_7": 3.206580412387848, |
| "epoch": 0.715, |
| "grad_norm": 19.473553749134638, |
| "kl_loss_13": 3170.0, |
| "kl_loss_26": 2250.0, |
| "kl_loss_39": 1236.5, |
| "kl_loss_7": 3716.0, |
| "learning_rate": 0.00019092050688969737, |
| "loss": 5168.85, |
| "step": 7150 |
| }, |
| { |
| "ce_loss_13": 2.93907487988472, |
| "ce_loss_26": 2.5087509632110594, |
| "ce_loss_39": 2.036549669504166, |
| "ce_loss_52": 1.4339062184095384, |
| "ce_loss_7": 3.2007872402668, |
| "epoch": 0.716, |
| "grad_norm": 18.582597661219776, |
| "kl_loss_13": 3097.2, |
| "kl_loss_26": 2211.2, |
| "kl_loss_39": 1215.8, |
| "kl_loss_7": 3638.4, |
| "learning_rate": 0.00018967486386928817, |
| "loss": 5158.35, |
| "step": 7160 |
| }, |
| { |
| "ce_loss_13": 2.945317584276199, |
| "ce_loss_26": 2.4993871986865996, |
| "ce_loss_39": 2.0176479905843734, |
| "ce_loss_52": 1.4336241394281388, |
| "ce_loss_7": 3.20940922498703, |
| "epoch": 0.717, |
| "grad_norm": 20.943645323187056, |
| "kl_loss_13": 3133.6, |
| "kl_loss_26": 2211.4, |
| "kl_loss_39": 1198.8, |
| "kl_loss_7": 3684.0, |
| "learning_rate": 0.00018843234581955443, |
| "loss": 5165.1, |
| "step": 7170 |
| }, |
| { |
| "ce_loss_13": 2.9533539593219755, |
| "ce_loss_26": 2.5154259234666823, |
| "ce_loss_39": 2.0371447414159776, |
| "ce_loss_52": 1.4300806164741515, |
| "ce_loss_7": 3.2220456659793855, |
| "epoch": 0.718, |
| "grad_norm": 20.352991453837664, |
| "kl_loss_13": 3138.0, |
| "kl_loss_26": 2233.8, |
| "kl_loss_39": 1229.9, |
| "kl_loss_7": 3696.4, |
| "learning_rate": 0.00018719296525263924, |
| "loss": 5165.1, |
| "step": 7180 |
| }, |
| { |
| "ce_loss_13": 2.8928813517093657, |
| "ce_loss_26": 2.4584825813770292, |
| "ce_loss_39": 1.9816097348928452, |
| "ce_loss_52": 1.4100207000970841, |
| "ce_loss_7": 3.1512379109859467, |
| "epoch": 0.719, |
| "grad_norm": 19.73288927838942, |
| "kl_loss_13": 3091.2, |
| "kl_loss_26": 2168.2, |
| "kl_loss_39": 1162.9, |
| "kl_loss_7": 3634.8, |
| "learning_rate": 0.0001859567346490913, |
| "loss": 5125.45, |
| "step": 7190 |
| }, |
| { |
| "ce_loss_13": 3.013565558195114, |
| "ce_loss_26": 2.5717740774154665, |
| "ce_loss_39": 2.0986361503601074, |
| "ce_loss_52": 1.4729616045951843, |
| "ce_loss_7": 3.270446163415909, |
| "epoch": 0.72, |
| "grad_norm": 19.301343533343292, |
| "kl_loss_13": 3201.2, |
| "kl_loss_26": 2278.0, |
| "kl_loss_39": 1269.9, |
| "kl_loss_7": 3744.0, |
| "learning_rate": 0.0001847236664577389, |
| "loss": 5151.05, |
| "step": 7200 |
| }, |
| { |
| "ce_loss_13": 2.8901243984699247, |
| "ce_loss_26": 2.447618916630745, |
| "ce_loss_39": 1.9805465787649155, |
| "ce_loss_52": 1.393562839925289, |
| "ce_loss_7": 3.152049034833908, |
| "epoch": 0.721, |
| "grad_norm": 19.80481816325832, |
| "kl_loss_13": 3108.8, |
| "kl_loss_26": 2184.6, |
| "kl_loss_39": 1197.1, |
| "kl_loss_7": 3651.2, |
| "learning_rate": 0.00018349377309556487, |
| "loss": 5147.6, |
| "step": 7210 |
| }, |
| { |
| "ce_loss_13": 2.935315173864365, |
| "ce_loss_26": 2.4844827204942703, |
| "ce_loss_39": 2.014389392733574, |
| "ce_loss_52": 1.4218608409166336, |
| "ce_loss_7": 3.202117031812668, |
| "epoch": 0.722, |
| "grad_norm": 21.29811195159757, |
| "kl_loss_13": 3145.6, |
| "kl_loss_26": 2210.6, |
| "kl_loss_39": 1204.5, |
| "kl_loss_7": 3703.6, |
| "learning_rate": 0.00018226706694758193, |
| "loss": 5128.1, |
| "step": 7220 |
| }, |
| { |
| "ce_loss_13": 2.9786236941814423, |
| "ce_loss_26": 2.536505568027496, |
| "ce_loss_39": 2.0584143906831742, |
| "ce_loss_52": 1.4622384160757065, |
| "ce_loss_7": 3.243917632102966, |
| "epoch": 0.723, |
| "grad_norm": 19.6163049246679, |
| "kl_loss_13": 3157.6, |
| "kl_loss_26": 2230.8, |
| "kl_loss_39": 1218.2, |
| "kl_loss_7": 3710.4, |
| "learning_rate": 0.0001810435603667075, |
| "loss": 5135.45, |
| "step": 7230 |
| }, |
| { |
| "ce_loss_13": 2.9429832458496095, |
| "ce_loss_26": 2.4910512387752535, |
| "ce_loss_39": 2.0214726239442826, |
| "ce_loss_52": 1.4324376732110977, |
| "ce_loss_7": 3.201977092027664, |
| "epoch": 0.724, |
| "grad_norm": 19.933959617154258, |
| "kl_loss_13": 3130.8, |
| "kl_loss_26": 2197.0, |
| "kl_loss_39": 1197.6, |
| "kl_loss_7": 3676.0, |
| "learning_rate": 0.0001798232656736389, |
| "loss": 5101.6, |
| "step": 7240 |
| }, |
| { |
| "ce_loss_13": 3.017507255077362, |
| "ce_loss_26": 2.5613476634025574, |
| "ce_loss_39": 2.065951904654503, |
| "ce_loss_52": 1.4529381558299064, |
| "ce_loss_7": 3.284585565328598, |
| "epoch": 0.725, |
| "grad_norm": 19.69163026795737, |
| "kl_loss_13": 3244.0, |
| "kl_loss_26": 2294.8, |
| "kl_loss_39": 1252.0, |
| "kl_loss_7": 3804.4, |
| "learning_rate": 0.0001786061951567303, |
| "loss": 5145.8, |
| "step": 7250 |
| }, |
| { |
| "ce_loss_13": 2.8927790343761446, |
| "ce_loss_26": 2.4552118331193924, |
| "ce_loss_39": 1.9874976933002473, |
| "ce_loss_52": 1.3983306601643561, |
| "ce_loss_7": 3.160378706455231, |
| "epoch": 0.726, |
| "grad_norm": 19.959972483591027, |
| "kl_loss_13": 3097.2, |
| "kl_loss_26": 2189.6, |
| "kl_loss_39": 1199.8, |
| "kl_loss_7": 3653.2, |
| "learning_rate": 0.00017739236107186857, |
| "loss": 5152.65, |
| "step": 7260 |
| }, |
| { |
| "ce_loss_13": 2.938109403848648, |
| "ce_loss_26": 2.4923312455415725, |
| "ce_loss_39": 2.0182687640190125, |
| "ce_loss_52": 1.4262833833694457, |
| "ce_loss_7": 3.2080613017082213, |
| "epoch": 0.727, |
| "grad_norm": 19.42416009314478, |
| "kl_loss_13": 3165.6, |
| "kl_loss_26": 2229.8, |
| "kl_loss_39": 1226.4, |
| "kl_loss_7": 3724.0, |
| "learning_rate": 0.00017618177564234904, |
| "loss": 5132.0, |
| "step": 7270 |
| }, |
| { |
| "ce_loss_13": 2.932406869530678, |
| "ce_loss_26": 2.481098806858063, |
| "ce_loss_39": 1.9998224407434464, |
| "ce_loss_52": 1.4021466106176377, |
| "ce_loss_7": 3.192963147163391, |
| "epoch": 0.728, |
| "grad_norm": 19.542374311814292, |
| "kl_loss_13": 3155.2, |
| "kl_loss_26": 2223.4, |
| "kl_loss_39": 1216.9, |
| "kl_loss_7": 3704.0, |
| "learning_rate": 0.00017497445105875377, |
| "loss": 5186.8, |
| "step": 7280 |
| }, |
| { |
| "ce_loss_13": 2.9192141771316527, |
| "ce_loss_26": 2.490318274497986, |
| "ce_loss_39": 2.027893853187561, |
| "ce_loss_52": 1.43733262270689, |
| "ce_loss_7": 3.177316850423813, |
| "epoch": 0.729, |
| "grad_norm": 20.20715160517786, |
| "kl_loss_13": 3084.0, |
| "kl_loss_26": 2177.6, |
| "kl_loss_39": 1196.3, |
| "kl_loss_7": 3620.0, |
| "learning_rate": 0.000173770399478828, |
| "loss": 5076.85, |
| "step": 7290 |
| }, |
| { |
| "ce_loss_13": 2.9109710931777952, |
| "ce_loss_26": 2.4745964229106905, |
| "ce_loss_39": 2.007509797811508, |
| "ce_loss_52": 1.4228856399655343, |
| "ce_loss_7": 3.179339534044266, |
| "epoch": 0.73, |
| "grad_norm": 19.31391716230683, |
| "kl_loss_13": 3123.2, |
| "kl_loss_26": 2212.0, |
| "kl_loss_39": 1198.3, |
| "kl_loss_7": 3681.6, |
| "learning_rate": 0.0001725696330273575, |
| "loss": 5123.9, |
| "step": 7300 |
| }, |
| { |
| "ce_loss_13": 2.9543818056583406, |
| "ce_loss_26": 2.5187111288309096, |
| "ce_loss_39": 2.046798062324524, |
| "ce_loss_52": 1.4316737815737723, |
| "ce_loss_7": 3.2196085810661317, |
| "epoch": 0.731, |
| "grad_norm": 19.333490210710867, |
| "kl_loss_13": 3136.4, |
| "kl_loss_26": 2223.6, |
| "kl_loss_39": 1228.2, |
| "kl_loss_7": 3683.6, |
| "learning_rate": 0.00017137216379604724, |
| "loss": 5093.05, |
| "step": 7310 |
| }, |
| { |
| "ce_loss_13": 2.991909348964691, |
| "ce_loss_26": 2.533888804912567, |
| "ce_loss_39": 2.048043805360794, |
| "ce_loss_52": 1.426993179321289, |
| "ce_loss_7": 3.2596666753292083, |
| "epoch": 0.732, |
| "grad_norm": 18.669638559883268, |
| "kl_loss_13": 3225.6, |
| "kl_loss_26": 2287.6, |
| "kl_loss_39": 1264.8, |
| "kl_loss_7": 3784.0, |
| "learning_rate": 0.00017017800384339925, |
| "loss": 5127.2, |
| "step": 7320 |
| }, |
| { |
| "ce_loss_13": 2.919608438014984, |
| "ce_loss_26": 2.475746387243271, |
| "ce_loss_39": 2.0119441866874697, |
| "ce_loss_52": 1.416624790430069, |
| "ce_loss_7": 3.1857059836387633, |
| "epoch": 0.733, |
| "grad_norm": 19.375624695020313, |
| "kl_loss_13": 3109.2, |
| "kl_loss_26": 2190.6, |
| "kl_loss_39": 1207.2, |
| "kl_loss_7": 3667.2, |
| "learning_rate": 0.00016898716519459073, |
| "loss": 5204.9, |
| "step": 7330 |
| }, |
| { |
| "ce_loss_13": 2.978672456741333, |
| "ce_loss_26": 2.5233275532722472, |
| "ce_loss_39": 2.0352649986743927, |
| "ce_loss_52": 1.396337878704071, |
| "ce_loss_7": 3.2537453293800356, |
| "epoch": 0.734, |
| "grad_norm": 19.487833479563225, |
| "kl_loss_13": 3269.6, |
| "kl_loss_26": 2319.4, |
| "kl_loss_39": 1294.6, |
| "kl_loss_7": 3847.2, |
| "learning_rate": 0.00016779965984135375, |
| "loss": 5141.65, |
| "step": 7340 |
| }, |
| { |
| "ce_loss_13": 2.946110498905182, |
| "ce_loss_26": 2.507070618867874, |
| "ce_loss_39": 2.0425552487373353, |
| "ce_loss_52": 1.4492309480905532, |
| "ce_loss_7": 3.2022292137146, |
| "epoch": 0.735, |
| "grad_norm": 19.428920750438415, |
| "kl_loss_13": 3113.2, |
| "kl_loss_26": 2205.0, |
| "kl_loss_39": 1210.6, |
| "kl_loss_7": 3652.4, |
| "learning_rate": 0.00016661549974185424, |
| "loss": 5094.6, |
| "step": 7350 |
| }, |
| { |
| "ce_loss_13": 2.9823583602905273, |
| "ce_loss_26": 2.539780905842781, |
| "ce_loss_39": 2.0619786471128463, |
| "ce_loss_52": 1.45176909416914, |
| "ce_loss_7": 3.249054718017578, |
| "epoch": 0.736, |
| "grad_norm": 19.839368194621894, |
| "kl_loss_13": 3204.0, |
| "kl_loss_26": 2280.6, |
| "kl_loss_39": 1256.4, |
| "kl_loss_7": 3753.6, |
| "learning_rate": 0.00016543469682057105, |
| "loss": 5196.95, |
| "step": 7360 |
| }, |
| { |
| "ce_loss_13": 2.958816784620285, |
| "ce_loss_26": 2.526939642429352, |
| "ce_loss_39": 2.0634298622608185, |
| "ce_loss_52": 1.4754424065351486, |
| "ce_loss_7": 3.223200261592865, |
| "epoch": 0.737, |
| "grad_norm": 19.922895259671222, |
| "kl_loss_13": 3096.0, |
| "kl_loss_26": 2185.8, |
| "kl_loss_39": 1192.1, |
| "kl_loss_7": 3641.6, |
| "learning_rate": 0.00016425726296817632, |
| "loss": 5155.3, |
| "step": 7370 |
| }, |
| { |
| "ce_loss_13": 2.9623453855514525, |
| "ce_loss_26": 2.5124567419290544, |
| "ce_loss_39": 2.032202622294426, |
| "ce_loss_52": 1.4377738699316978, |
| "ce_loss_7": 3.22493896484375, |
| "epoch": 0.738, |
| "grad_norm": 19.913040297451975, |
| "kl_loss_13": 3150.8, |
| "kl_loss_26": 2213.6, |
| "kl_loss_39": 1202.5, |
| "kl_loss_7": 3703.2, |
| "learning_rate": 0.00016308321004141607, |
| "loss": 5152.6, |
| "step": 7380 |
| }, |
| { |
| "ce_loss_13": 2.9184991478919984, |
| "ce_loss_26": 2.4769508123397825, |
| "ce_loss_39": 2.0019295692443846, |
| "ce_loss_52": 1.423092892765999, |
| "ce_loss_7": 3.185350716114044, |
| "epoch": 0.739, |
| "grad_norm": 19.322834578437774, |
| "kl_loss_13": 3095.6, |
| "kl_loss_26": 2174.4, |
| "kl_loss_39": 1176.6, |
| "kl_loss_7": 3652.0, |
| "learning_rate": 0.00016191254986299043, |
| "loss": 5134.25, |
| "step": 7390 |
| }, |
| { |
| "ce_loss_13": 2.8498477935791016, |
| "ce_loss_26": 2.4134464621543885, |
| "ce_loss_39": 1.952101919054985, |
| "ce_loss_52": 1.3852853626012802, |
| "ce_loss_7": 3.117431342601776, |
| "epoch": 0.74, |
| "grad_norm": 20.243774871674358, |
| "kl_loss_13": 3055.2, |
| "kl_loss_26": 2147.0, |
| "kl_loss_39": 1163.3, |
| "kl_loss_7": 3606.8, |
| "learning_rate": 0.00016074529422143398, |
| "loss": 5086.95, |
| "step": 7400 |
| }, |
| { |
| "ce_loss_13": 2.999220699071884, |
| "ce_loss_26": 2.550427186489105, |
| "ce_loss_39": 2.0656515032052996, |
| "ce_loss_52": 1.4574634283781052, |
| "ce_loss_7": 3.2632993936538695, |
| "epoch": 0.741, |
| "grad_norm": 20.693157593916027, |
| "kl_loss_13": 3181.6, |
| "kl_loss_26": 2251.2, |
| "kl_loss_39": 1241.3, |
| "kl_loss_7": 3731.6, |
| "learning_rate": 0.0001595814548709983, |
| "loss": 5127.4, |
| "step": 7410 |
| }, |
| { |
| "ce_loss_13": 2.9262797057628633, |
| "ce_loss_26": 2.4955521285533906, |
| "ce_loss_39": 2.043664366006851, |
| "ce_loss_52": 1.4607123613357544, |
| "ce_loss_7": 3.178546887636185, |
| "epoch": 0.742, |
| "grad_norm": 19.251560643481486, |
| "kl_loss_13": 3060.8, |
| "kl_loss_26": 2159.0, |
| "kl_loss_39": 1190.2, |
| "kl_loss_7": 3588.8, |
| "learning_rate": 0.00015842104353153285, |
| "loss": 5092.2, |
| "step": 7420 |
| }, |
| { |
| "ce_loss_13": 3.0160707533359528, |
| "ce_loss_26": 2.5663387060165403, |
| "ce_loss_39": 2.0802172899246214, |
| "ce_loss_52": 1.4715656280517577, |
| "ce_loss_7": 3.2800197422504427, |
| "epoch": 0.743, |
| "grad_norm": 19.587217025482605, |
| "kl_loss_13": 3190.4, |
| "kl_loss_26": 2255.8, |
| "kl_loss_39": 1233.7, |
| "kl_loss_7": 3735.6, |
| "learning_rate": 0.0001572640718883667, |
| "loss": 5115.1, |
| "step": 7430 |
| }, |
| { |
| "ce_loss_13": 2.9469042241573336, |
| "ce_loss_26": 2.504935991764069, |
| "ce_loss_39": 2.0331138372421265, |
| "ce_loss_52": 1.4291576787829399, |
| "ce_loss_7": 3.2098668992519377, |
| "epoch": 0.744, |
| "grad_norm": 18.92686745034597, |
| "kl_loss_13": 3124.8, |
| "kl_loss_26": 2209.8, |
| "kl_loss_39": 1212.4, |
| "kl_loss_7": 3675.2, |
| "learning_rate": 0.0001561105515921915, |
| "loss": 5076.55, |
| "step": 7440 |
| }, |
| { |
| "ce_loss_13": 2.924536573886871, |
| "ce_loss_26": 2.487199380993843, |
| "ce_loss_39": 2.0280190229415895, |
| "ce_loss_52": 1.4308366000652313, |
| "ce_loss_7": 3.184017467498779, |
| "epoch": 0.745, |
| "grad_norm": 20.464433868383605, |
| "kl_loss_13": 3079.6, |
| "kl_loss_26": 2173.0, |
| "kl_loss_39": 1203.3, |
| "kl_loss_7": 3634.0, |
| "learning_rate": 0.0001549604942589441, |
| "loss": 5072.9, |
| "step": 7450 |
| }, |
| { |
| "ce_loss_13": 2.929040068387985, |
| "ce_loss_26": 2.4782379269599915, |
| "ce_loss_39": 1.997541171312332, |
| "ce_loss_52": 1.4025927037000656, |
| "ce_loss_7": 3.1986856281757357, |
| "epoch": 0.746, |
| "grad_norm": 19.749937853484084, |
| "kl_loss_13": 3136.4, |
| "kl_loss_26": 2195.8, |
| "kl_loss_39": 1197.5, |
| "kl_loss_7": 3701.6, |
| "learning_rate": 0.00015381391146968864, |
| "loss": 5119.05, |
| "step": 7460 |
| }, |
| { |
| "ce_loss_13": 2.9494260370731356, |
| "ce_loss_26": 2.5060392141342165, |
| "ce_loss_39": 2.0337665289640428, |
| "ce_loss_52": 1.437817743420601, |
| "ce_loss_7": 3.2058426082134246, |
| "epoch": 0.747, |
| "grad_norm": 20.32794285148468, |
| "kl_loss_13": 3134.4, |
| "kl_loss_26": 2216.0, |
| "kl_loss_39": 1213.4, |
| "kl_loss_7": 3678.0, |
| "learning_rate": 0.00015267081477050133, |
| "loss": 5102.65, |
| "step": 7470 |
| }, |
| { |
| "ce_loss_13": 2.921141803264618, |
| "ce_loss_26": 2.4805154383182524, |
| "ce_loss_39": 2.0182774633169176, |
| "ce_loss_52": 1.4364572942256928, |
| "ce_loss_7": 3.1770897448062896, |
| "epoch": 0.748, |
| "grad_norm": 19.06438842103908, |
| "kl_loss_13": 3083.4, |
| "kl_loss_26": 2171.6, |
| "kl_loss_39": 1187.2, |
| "kl_loss_7": 3614.4, |
| "learning_rate": 0.00015153121567235335, |
| "loss": 5127.55, |
| "step": 7480 |
| }, |
| { |
| "ce_loss_13": 2.913339024782181, |
| "ce_loss_26": 2.4667980909347533, |
| "ce_loss_39": 1.9971046984195708, |
| "ce_loss_52": 1.4167528375983238, |
| "ce_loss_7": 3.1835066616535186, |
| "epoch": 0.749, |
| "grad_norm": 19.868010576940023, |
| "kl_loss_13": 3104.0, |
| "kl_loss_26": 2178.6, |
| "kl_loss_39": 1188.7, |
| "kl_loss_7": 3662.4, |
| "learning_rate": 0.00015039512565099468, |
| "loss": 5094.65, |
| "step": 7490 |
| }, |
| { |
| "ce_loss_13": 2.914568355679512, |
| "ce_loss_26": 2.469626322388649, |
| "ce_loss_39": 2.0020422458648683, |
| "ce_loss_52": 1.4171572998166084, |
| "ce_loss_7": 3.1783276200294495, |
| "epoch": 0.75, |
| "grad_norm": 19.266335144116216, |
| "kl_loss_13": 3099.6, |
| "kl_loss_26": 2178.4, |
| "kl_loss_39": 1189.1, |
| "kl_loss_7": 3658.8, |
| "learning_rate": 0.00014926255614683932, |
| "loss": 5132.95, |
| "step": 7500 |
| }, |
| { |
| "ce_loss_13": 2.9255091905593873, |
| "ce_loss_26": 2.4897490620613096, |
| "ce_loss_39": 2.020438665151596, |
| "ce_loss_52": 1.43346728682518, |
| "ce_loss_7": 3.188784825801849, |
| "epoch": 0.751, |
| "grad_norm": 18.946748872518604, |
| "kl_loss_13": 3108.8, |
| "kl_loss_26": 2190.0, |
| "kl_loss_39": 1197.3, |
| "kl_loss_7": 3652.8, |
| "learning_rate": 0.0001481335185648498, |
| "loss": 5140.95, |
| "step": 7510 |
| }, |
| { |
| "ce_loss_13": 2.9898211777210237, |
| "ce_loss_26": 2.541801372170448, |
| "ce_loss_39": 2.068241673707962, |
| "ce_loss_52": 1.474491646885872, |
| "ce_loss_7": 3.2494523525238037, |
| "epoch": 0.752, |
| "grad_norm": 19.297834732191596, |
| "kl_loss_13": 3108.0, |
| "kl_loss_26": 2187.0, |
| "kl_loss_39": 1193.8, |
| "kl_loss_7": 3655.2, |
| "learning_rate": 0.0001470080242744218, |
| "loss": 5080.45, |
| "step": 7520 |
| }, |
| { |
| "ce_loss_13": 2.989736980199814, |
| "ce_loss_26": 2.5422766327857973, |
| "ce_loss_39": 2.0724924355745316, |
| "ce_loss_52": 1.4764755725860597, |
| "ce_loss_7": 3.2583333015441895, |
| "epoch": 0.753, |
| "grad_norm": 19.759302349149518, |
| "kl_loss_13": 3127.2, |
| "kl_loss_26": 2207.8, |
| "kl_loss_39": 1205.1, |
| "kl_loss_7": 3686.4, |
| "learning_rate": 0.0001458860846092705, |
| "loss": 5089.25, |
| "step": 7530 |
| }, |
| { |
| "ce_loss_13": 2.9518058955669404, |
| "ce_loss_26": 2.5123462677001953, |
| "ce_loss_39": 2.033254536986351, |
| "ce_loss_52": 1.430069674551487, |
| "ce_loss_7": 3.216610902547836, |
| "epoch": 0.754, |
| "grad_norm": 19.101743494796594, |
| "kl_loss_13": 3130.4, |
| "kl_loss_26": 2214.4, |
| "kl_loss_39": 1206.1, |
| "kl_loss_7": 3688.8, |
| "learning_rate": 0.00014476771086731566, |
| "loss": 5132.95, |
| "step": 7540 |
| }, |
| { |
| "ce_loss_13": 2.9499751746654512, |
| "ce_loss_26": 2.5148087441921234, |
| "ce_loss_39": 2.0469634413719175, |
| "ce_loss_52": 1.4645337551832198, |
| "ce_loss_7": 3.2114050924777984, |
| "epoch": 0.755, |
| "grad_norm": 18.95244057854832, |
| "kl_loss_13": 3084.4, |
| "kl_loss_26": 2166.2, |
| "kl_loss_39": 1178.4, |
| "kl_loss_7": 3625.2, |
| "learning_rate": 0.00014365291431056872, |
| "loss": 5111.9, |
| "step": 7550 |
| }, |
| { |
| "ce_loss_13": 2.915192812681198, |
| "ce_loss_26": 2.470223453640938, |
| "ce_loss_39": 1.9942311495542526, |
| "ce_loss_52": 1.4130516573786736, |
| "ce_loss_7": 3.186279386281967, |
| "epoch": 0.756, |
| "grad_norm": 19.87294750540145, |
| "kl_loss_13": 3138.4, |
| "kl_loss_26": 2214.2, |
| "kl_loss_39": 1201.4, |
| "kl_loss_7": 3700.0, |
| "learning_rate": 0.00014254170616501827, |
| "loss": 5096.4, |
| "step": 7560 |
| }, |
| { |
| "ce_loss_13": 2.954612511396408, |
| "ce_loss_26": 2.5071854114532472, |
| "ce_loss_39": 2.035946971178055, |
| "ce_loss_52": 1.439925280213356, |
| "ce_loss_7": 3.2222863495349885, |
| "epoch": 0.757, |
| "grad_norm": 20.90881340934633, |
| "kl_loss_13": 3122.0, |
| "kl_loss_26": 2194.2, |
| "kl_loss_39": 1192.5, |
| "kl_loss_7": 3677.6, |
| "learning_rate": 0.0001414340976205183, |
| "loss": 5060.45, |
| "step": 7570 |
| }, |
| { |
| "ce_loss_13": 2.9101392149925234, |
| "ce_loss_26": 2.471102824807167, |
| "ce_loss_39": 2.0064873933792113, |
| "ce_loss_52": 1.4280226349830627, |
| "ce_loss_7": 3.1714185059070585, |
| "epoch": 0.758, |
| "grad_norm": 19.49522818126467, |
| "kl_loss_13": 3076.0, |
| "kl_loss_26": 2159.8, |
| "kl_loss_39": 1176.2, |
| "kl_loss_7": 3620.4, |
| "learning_rate": 0.00014033009983067452, |
| "loss": 5108.35, |
| "step": 7580 |
| }, |
| { |
| "ce_loss_13": 2.966978985071182, |
| "ce_loss_26": 2.5087331235408783, |
| "ce_loss_39": 2.0336913764476776, |
| "ce_loss_52": 1.4274784743785858, |
| "ce_loss_7": 3.2379914104938505, |
| "epoch": 0.759, |
| "grad_norm": 18.693022063405696, |
| "kl_loss_13": 3203.2, |
| "kl_loss_26": 2255.0, |
| "kl_loss_39": 1243.9, |
| "kl_loss_7": 3762.0, |
| "learning_rate": 0.00013922972391273224, |
| "loss": 5094.65, |
| "step": 7590 |
| }, |
| { |
| "ce_loss_13": 2.9872674524784086, |
| "ce_loss_26": 2.5407335460186005, |
| "ce_loss_39": 2.061782196164131, |
| "ce_loss_52": 1.4462621062994003, |
| "ce_loss_7": 3.253601038455963, |
| "epoch": 0.76, |
| "grad_norm": 19.707863398571995, |
| "kl_loss_13": 3172.0, |
| "kl_loss_26": 2252.8, |
| "kl_loss_39": 1251.4, |
| "kl_loss_7": 3726.4, |
| "learning_rate": 0.0001381329809474649, |
| "loss": 5098.7, |
| "step": 7600 |
| }, |
| { |
| "ce_loss_13": 2.8771551668643953, |
| "ce_loss_26": 2.4450180411338804, |
| "ce_loss_39": 1.9818484753370285, |
| "ce_loss_52": 1.4145073384046554, |
| "ce_loss_7": 3.1438543021678926, |
| "epoch": 0.761, |
| "grad_norm": 18.46540807586579, |
| "kl_loss_13": 3029.2, |
| "kl_loss_26": 2126.8, |
| "kl_loss_39": 1154.6, |
| "kl_loss_7": 3583.2, |
| "learning_rate": 0.0001370398819790621, |
| "loss": 5084.25, |
| "step": 7610 |
| }, |
| { |
| "ce_loss_13": 2.960028713941574, |
| "ce_loss_26": 2.525826930999756, |
| "ce_loss_39": 2.0549875289201736, |
| "ce_loss_52": 1.4615961879491806, |
| "ce_loss_7": 3.2287797749042513, |
| "epoch": 0.762, |
| "grad_norm": 19.79273649753162, |
| "kl_loss_13": 3108.0, |
| "kl_loss_26": 2202.8, |
| "kl_loss_39": 1209.5, |
| "kl_loss_7": 3652.8, |
| "learning_rate": 0.00013595043801501794, |
| "loss": 5052.75, |
| "step": 7620 |
| }, |
| { |
| "ce_loss_13": 2.9331182718276976, |
| "ce_loss_26": 2.4871296346187592, |
| "ce_loss_39": 2.019395884871483, |
| "ce_loss_52": 1.4329772531986236, |
| "ce_loss_7": 3.19427090883255, |
| "epoch": 0.763, |
| "grad_norm": 20.265530214115834, |
| "kl_loss_13": 3124.0, |
| "kl_loss_26": 2195.4, |
| "kl_loss_39": 1199.7, |
| "kl_loss_7": 3669.6, |
| "learning_rate": 0.00013486466002602133, |
| "loss": 5092.15, |
| "step": 7630 |
| }, |
| { |
| "ce_loss_13": 2.86139075756073, |
| "ce_loss_26": 2.4176136374473574, |
| "ce_loss_39": 1.941940224170685, |
| "ce_loss_52": 1.3821519583463668, |
| "ce_loss_7": 3.1196699738502502, |
| "epoch": 0.764, |
| "grad_norm": 19.727213627271716, |
| "kl_loss_13": 3072.8, |
| "kl_loss_26": 2145.6, |
| "kl_loss_39": 1144.7, |
| "kl_loss_7": 3617.2, |
| "learning_rate": 0.00013378255894584462, |
| "loss": 5002.6, |
| "step": 7640 |
| }, |
| { |
| "ce_loss_13": 2.9353716015815734, |
| "ce_loss_26": 2.490555015206337, |
| "ce_loss_39": 2.019370597600937, |
| "ce_loss_52": 1.4347517609596252, |
| "ce_loss_7": 3.196541225910187, |
| "epoch": 0.765, |
| "grad_norm": 20.424012523901062, |
| "kl_loss_13": 3127.6, |
| "kl_loss_26": 2205.8, |
| "kl_loss_39": 1202.7, |
| "kl_loss_7": 3670.0, |
| "learning_rate": 0.0001327041456712334, |
| "loss": 5085.1, |
| "step": 7650 |
| }, |
| { |
| "ce_loss_13": 2.994156318902969, |
| "ce_loss_26": 2.56048826277256, |
| "ce_loss_39": 2.084024053812027, |
| "ce_loss_52": 1.487925711274147, |
| "ce_loss_7": 3.2512714982032778, |
| "epoch": 0.766, |
| "grad_norm": 19.650498118539073, |
| "kl_loss_13": 3125.6, |
| "kl_loss_26": 2221.6, |
| "kl_loss_39": 1220.7, |
| "kl_loss_7": 3665.2, |
| "learning_rate": 0.00013162943106179747, |
| "loss": 5105.9, |
| "step": 7660 |
| }, |
| { |
| "ce_loss_13": 2.9769886791706086, |
| "ce_loss_26": 2.5290128916501997, |
| "ce_loss_39": 2.0444375783205033, |
| "ce_loss_52": 1.4427102521061896, |
| "ce_loss_7": 3.2454589188098906, |
| "epoch": 0.767, |
| "grad_norm": 19.69057867231776, |
| "kl_loss_13": 3203.2, |
| "kl_loss_26": 2269.2, |
| "kl_loss_39": 1243.2, |
| "kl_loss_7": 3758.4, |
| "learning_rate": 0.00013055842593990132, |
| "loss": 5067.35, |
| "step": 7670 |
| }, |
| { |
| "ce_loss_13": 2.9738565921783446, |
| "ce_loss_26": 2.5335902631282807, |
| "ce_loss_39": 2.06500606238842, |
| "ce_loss_52": 1.4531759321689606, |
| "ce_loss_7": 3.2406944632530212, |
| "epoch": 0.768, |
| "grad_norm": 19.854778454902203, |
| "kl_loss_13": 3164.0, |
| "kl_loss_26": 2244.2, |
| "kl_loss_39": 1253.0, |
| "kl_loss_7": 3714.8, |
| "learning_rate": 0.00012949114109055414, |
| "loss": 5080.45, |
| "step": 7680 |
| }, |
| { |
| "ce_loss_13": 2.8718224823474885, |
| "ce_loss_26": 2.434892734885216, |
| "ce_loss_39": 1.9763070404529572, |
| "ce_loss_52": 1.4119910702109337, |
| "ce_loss_7": 3.1341715812683106, |
| "epoch": 0.769, |
| "grad_norm": 19.065166102671203, |
| "kl_loss_13": 3037.6, |
| "kl_loss_26": 2134.2, |
| "kl_loss_39": 1156.1, |
| "kl_loss_7": 3588.0, |
| "learning_rate": 0.00012842758726130281, |
| "loss": 5110.75, |
| "step": 7690 |
| }, |
| { |
| "ce_loss_13": 2.9209058582782745, |
| "ce_loss_26": 2.4878934979438783, |
| "ce_loss_39": 2.0205056190490724, |
| "ce_loss_52": 1.4421792283654213, |
| "ce_loss_7": 3.182013803720474, |
| "epoch": 0.77, |
| "grad_norm": 19.547331822021178, |
| "kl_loss_13": 3063.6, |
| "kl_loss_26": 2150.4, |
| "kl_loss_39": 1168.8, |
| "kl_loss_7": 3608.0, |
| "learning_rate": 0.00012736777516212267, |
| "loss": 5073.5, |
| "step": 7700 |
| }, |
| { |
| "ce_loss_13": 2.9334555983543398, |
| "ce_loss_26": 2.493560019135475, |
| "ce_loss_39": 2.0158845692873, |
| "ce_loss_52": 1.4151214450597762, |
| "ce_loss_7": 3.2045696437358857, |
| "epoch": 0.771, |
| "grad_norm": 18.662664559432073, |
| "kl_loss_13": 3145.2, |
| "kl_loss_26": 2230.4, |
| "kl_loss_39": 1228.4, |
| "kl_loss_7": 3706.0, |
| "learning_rate": 0.00012631171546530968, |
| "loss": 5058.75, |
| "step": 7710 |
| }, |
| { |
| "ce_loss_13": 2.944778233766556, |
| "ce_loss_26": 2.5031532883644103, |
| "ce_loss_39": 2.0205067574977873, |
| "ce_loss_52": 1.414848119020462, |
| "ce_loss_7": 3.2128884732723235, |
| "epoch": 0.772, |
| "grad_norm": 19.409023945233233, |
| "kl_loss_13": 3144.0, |
| "kl_loss_26": 2229.4, |
| "kl_loss_39": 1221.0, |
| "kl_loss_7": 3700.4, |
| "learning_rate": 0.00012525941880537307, |
| "loss": 5071.55, |
| "step": 7720 |
| }, |
| { |
| "ce_loss_13": 2.937892961502075, |
| "ce_loss_26": 2.492938667535782, |
| "ce_loss_39": 2.020972582697868, |
| "ce_loss_52": 1.4354220196604728, |
| "ce_loss_7": 3.2008834302425386, |
| "epoch": 0.773, |
| "grad_norm": 19.46905923212589, |
| "kl_loss_13": 3121.2, |
| "kl_loss_26": 2201.4, |
| "kl_loss_39": 1204.2, |
| "kl_loss_7": 3662.4, |
| "learning_rate": 0.00012421089577892869, |
| "loss": 5040.15, |
| "step": 7730 |
| }, |
| { |
| "ce_loss_13": 2.949704957008362, |
| "ce_loss_26": 2.5045041382312774, |
| "ce_loss_39": 2.016797697544098, |
| "ce_loss_52": 1.4014781221747399, |
| "ce_loss_7": 3.212642914056778, |
| "epoch": 0.774, |
| "grad_norm": 19.613908494270376, |
| "kl_loss_13": 3187.2, |
| "kl_loss_26": 2266.0, |
| "kl_loss_39": 1252.6, |
| "kl_loss_7": 3732.0, |
| "learning_rate": 0.0001231661569445919, |
| "loss": 5076.45, |
| "step": 7740 |
| }, |
| { |
| "ce_loss_13": 2.955250400304794, |
| "ce_loss_26": 2.5274777173995973, |
| "ce_loss_39": 2.067678835988045, |
| "ce_loss_52": 1.4838283985853196, |
| "ce_loss_7": 3.2114856481552123, |
| "epoch": 0.775, |
| "grad_norm": 19.844798674533834, |
| "kl_loss_13": 3084.8, |
| "kl_loss_26": 2190.6, |
| "kl_loss_39": 1199.8, |
| "kl_loss_7": 3616.4, |
| "learning_rate": 0.00012212521282287093, |
| "loss": 5060.4, |
| "step": 7750 |
| }, |
| { |
| "ce_loss_13": 2.9763452112674713, |
| "ce_loss_26": 2.5314755111932756, |
| "ce_loss_39": 2.0540303111076357, |
| "ce_loss_52": 1.450203076004982, |
| "ce_loss_7": 3.2422658264636994, |
| "epoch": 0.776, |
| "grad_norm": 20.432260851236787, |
| "kl_loss_13": 3181.2, |
| "kl_loss_26": 2258.4, |
| "kl_loss_39": 1248.7, |
| "kl_loss_7": 3726.0, |
| "learning_rate": 0.00012108807389606158, |
| "loss": 5084.95, |
| "step": 7760 |
| }, |
| { |
| "ce_loss_13": 2.920662760734558, |
| "ce_loss_26": 2.475513318181038, |
| "ce_loss_39": 2.0011812478303908, |
| "ce_loss_52": 1.4144959792494773, |
| "ce_loss_7": 3.1814299404621122, |
| "epoch": 0.777, |
| "grad_norm": 19.88207983374875, |
| "kl_loss_13": 3106.4, |
| "kl_loss_26": 2178.8, |
| "kl_loss_39": 1189.6, |
| "kl_loss_7": 3652.0, |
| "learning_rate": 0.00012005475060814159, |
| "loss": 5075.25, |
| "step": 7770 |
| }, |
| { |
| "ce_loss_13": 2.982200914621353, |
| "ce_loss_26": 2.5349232286214827, |
| "ce_loss_39": 2.069617584347725, |
| "ce_loss_52": 1.4744601517915725, |
| "ce_loss_7": 3.2433891892433167, |
| "epoch": 0.778, |
| "grad_norm": 19.290497638146196, |
| "kl_loss_13": 3128.4, |
| "kl_loss_26": 2198.0, |
| "kl_loss_39": 1199.9, |
| "kl_loss_7": 3677.2, |
| "learning_rate": 0.00011902525336466464, |
| "loss": 5053.05, |
| "step": 7780 |
| }, |
| { |
| "ce_loss_13": 2.9154593467712404, |
| "ce_loss_26": 2.477250945568085, |
| "ce_loss_39": 2.0098533272743224, |
| "ce_loss_52": 1.4167337000370026, |
| "ce_loss_7": 3.1773332476615908, |
| "epoch": 0.779, |
| "grad_norm": 19.252580297991088, |
| "kl_loss_13": 3096.4, |
| "kl_loss_26": 2190.8, |
| "kl_loss_39": 1202.1, |
| "kl_loss_7": 3643.6, |
| "learning_rate": 0.00011799959253265668, |
| "loss": 5067.65, |
| "step": 7790 |
| }, |
| { |
| "ce_loss_13": 2.9013588786125184, |
| "ce_loss_26": 2.468735784292221, |
| "ce_loss_39": 2.0137620836496355, |
| "ce_loss_52": 1.4429293110966683, |
| "ce_loss_7": 3.1603996396064757, |
| "epoch": 0.78, |
| "grad_norm": 18.773016641793355, |
| "kl_loss_13": 3040.0, |
| "kl_loss_26": 2138.8, |
| "kl_loss_39": 1157.9, |
| "kl_loss_7": 3579.6, |
| "learning_rate": 0.00011697777844051105, |
| "loss": 5056.85, |
| "step": 7800 |
| }, |
| { |
| "ce_loss_13": 2.99123472571373, |
| "ce_loss_26": 2.5481519401073456, |
| "ce_loss_39": 2.0644855082035063, |
| "ce_loss_52": 1.4605911195278167, |
| "ce_loss_7": 3.2550659775733948, |
| "epoch": 0.781, |
| "grad_norm": 19.108089078575876, |
| "kl_loss_13": 3156.0, |
| "kl_loss_26": 2232.8, |
| "kl_loss_39": 1225.5, |
| "kl_loss_7": 3705.6, |
| "learning_rate": 0.00011595982137788402, |
| "loss": 5045.05, |
| "step": 7810 |
| }, |
| { |
| "ce_loss_13": 2.979940289258957, |
| "ce_loss_26": 2.5384896367788317, |
| "ce_loss_39": 2.066192331910133, |
| "ce_loss_52": 1.4709408730268478, |
| "ce_loss_7": 3.2419922232627867, |
| "epoch": 0.782, |
| "grad_norm": 19.283597029818793, |
| "kl_loss_13": 3151.6, |
| "kl_loss_26": 2222.2, |
| "kl_loss_39": 1217.6, |
| "kl_loss_7": 3697.2, |
| "learning_rate": 0.00011494573159559212, |
| "loss": 5088.85, |
| "step": 7820 |
| }, |
| { |
| "ce_loss_13": 2.9142948031425475, |
| "ce_loss_26": 2.4835946947336196, |
| "ce_loss_39": 2.015666288137436, |
| "ce_loss_52": 1.433475723862648, |
| "ce_loss_7": 3.1793313324451447, |
| "epoch": 0.783, |
| "grad_norm": 18.805294673266925, |
| "kl_loss_13": 3065.2, |
| "kl_loss_26": 2169.8, |
| "kl_loss_39": 1195.1, |
| "kl_loss_7": 3620.0, |
| "learning_rate": 0.00011393551930550828, |
| "loss": 5021.8, |
| "step": 7830 |
| }, |
| { |
| "ce_loss_13": 2.942464643716812, |
| "ce_loss_26": 2.4966187834739686, |
| "ce_loss_39": 2.018853786587715, |
| "ce_loss_52": 1.4241285115480422, |
| "ce_loss_7": 3.209713137149811, |
| "epoch": 0.784, |
| "grad_norm": 18.802417227998035, |
| "kl_loss_13": 3153.6, |
| "kl_loss_26": 2218.8, |
| "kl_loss_39": 1214.7, |
| "kl_loss_7": 3709.6, |
| "learning_rate": 0.00011292919468045875, |
| "loss": 5056.2, |
| "step": 7840 |
| }, |
| { |
| "ce_loss_13": 2.9324662506580355, |
| "ce_loss_26": 2.490780544281006, |
| "ce_loss_39": 2.014774057269096, |
| "ce_loss_52": 1.4378075569868087, |
| "ce_loss_7": 3.2002897918224336, |
| "epoch": 0.785, |
| "grad_norm": 18.31675592106317, |
| "kl_loss_13": 3120.4, |
| "kl_loss_26": 2191.6, |
| "kl_loss_39": 1189.3, |
| "kl_loss_7": 3674.4, |
| "learning_rate": 0.00011192676785412154, |
| "loss": 5025.65, |
| "step": 7850 |
| }, |
| { |
| "ce_loss_13": 2.9004018545150756, |
| "ce_loss_26": 2.4679500609636307, |
| "ce_loss_39": 2.0106911092996596, |
| "ce_loss_52": 1.454608330130577, |
| "ce_loss_7": 3.1605159759521486, |
| "epoch": 0.786, |
| "grad_norm": 21.238240444429067, |
| "kl_loss_13": 2987.2, |
| "kl_loss_26": 2093.8, |
| "kl_loss_39": 1136.1, |
| "kl_loss_7": 3522.4, |
| "learning_rate": 0.00011092824892092374, |
| "loss": 4990.8, |
| "step": 7860 |
| }, |
| { |
| "ce_loss_13": 3.0144290030002594, |
| "ce_loss_26": 2.5742201924324037, |
| "ce_loss_39": 2.095407247543335, |
| "ce_loss_52": 1.4918664544820786, |
| "ce_loss_7": 3.2818655967712402, |
| "epoch": 0.787, |
| "grad_norm": 20.48506344270259, |
| "kl_loss_13": 3166.0, |
| "kl_loss_26": 2239.6, |
| "kl_loss_39": 1229.4, |
| "kl_loss_7": 3722.0, |
| "learning_rate": 0.0001099336479359398, |
| "loss": 5084.85, |
| "step": 7870 |
| }, |
| { |
| "ce_loss_13": 2.9180380165576936, |
| "ce_loss_26": 2.4719971120357513, |
| "ce_loss_39": 1.9984097123146056, |
| "ce_loss_52": 1.4270031958818437, |
| "ce_loss_7": 3.1842149913311006, |
| "epoch": 0.788, |
| "grad_norm": 19.617074455019072, |
| "kl_loss_13": 3109.6, |
| "kl_loss_26": 2182.8, |
| "kl_loss_39": 1178.3, |
| "kl_loss_7": 3662.0, |
| "learning_rate": 0.00010894297491479043, |
| "loss": 5092.2, |
| "step": 7880 |
| }, |
| { |
| "ce_loss_13": 2.8819404006004334, |
| "ce_loss_26": 2.449340745806694, |
| "ce_loss_39": 1.9862482339143752, |
| "ce_loss_52": 1.413575354218483, |
| "ce_loss_7": 3.143266361951828, |
| "epoch": 0.789, |
| "grad_norm": 19.73420487885875, |
| "kl_loss_13": 3037.2, |
| "kl_loss_26": 2140.0, |
| "kl_loss_39": 1164.1, |
| "kl_loss_7": 3572.0, |
| "learning_rate": 0.00010795623983354214, |
| "loss": 5012.15, |
| "step": 7890 |
| }, |
| { |
| "ce_loss_13": 2.936946928501129, |
| "ce_loss_26": 2.50938241481781, |
| "ce_loss_39": 2.0399589776992797, |
| "ce_loss_52": 1.4433409079909325, |
| "ce_loss_7": 3.1964890122413636, |
| "epoch": 0.79, |
| "grad_norm": 20.456776413214342, |
| "kl_loss_13": 3094.8, |
| "kl_loss_26": 2194.4, |
| "kl_loss_39": 1203.3, |
| "kl_loss_7": 3650.0, |
| "learning_rate": 0.00010697345262860636, |
| "loss": 5033.95, |
| "step": 7900 |
| }, |
| { |
| "ce_loss_13": 2.9234113335609435, |
| "ce_loss_26": 2.486256945133209, |
| "ce_loss_39": 2.014375075697899, |
| "ce_loss_52": 1.449743601679802, |
| "ce_loss_7": 3.18265563249588, |
| "epoch": 0.791, |
| "grad_norm": 19.900735615836812, |
| "kl_loss_13": 3074.8, |
| "kl_loss_26": 2163.0, |
| "kl_loss_39": 1161.1, |
| "kl_loss_7": 3612.8, |
| "learning_rate": 0.00010599462319663906, |
| "loss": 5038.65, |
| "step": 7910 |
| }, |
| { |
| "ce_loss_13": 2.9553000926971436, |
| "ce_loss_26": 2.5179340064525606, |
| "ce_loss_39": 2.0430867671966553, |
| "ce_loss_52": 1.444689854979515, |
| "ce_loss_7": 3.21486856341362, |
| "epoch": 0.792, |
| "grad_norm": 19.250533773319045, |
| "kl_loss_13": 3122.0, |
| "kl_loss_26": 2206.2, |
| "kl_loss_39": 1213.4, |
| "kl_loss_7": 3668.8, |
| "learning_rate": 0.00010501976139444191, |
| "loss": 5048.55, |
| "step": 7920 |
| }, |
| { |
| "ce_loss_13": 2.946609389781952, |
| "ce_loss_26": 2.5028085887432097, |
| "ce_loss_39": 2.0339547246694565, |
| "ce_loss_52": 1.4416055083274841, |
| "ce_loss_7": 3.2140405058860777, |
| "epoch": 0.793, |
| "grad_norm": 20.61899934712358, |
| "kl_loss_13": 3133.6, |
| "kl_loss_26": 2209.4, |
| "kl_loss_39": 1208.3, |
| "kl_loss_7": 3694.0, |
| "learning_rate": 0.0001040488770388625, |
| "loss": 5057.15, |
| "step": 7930 |
| }, |
| { |
| "ce_loss_13": 2.866725343465805, |
| "ce_loss_26": 2.424889090657234, |
| "ce_loss_39": 1.9532368332147598, |
| "ce_loss_52": 1.37542584836483, |
| "ce_loss_7": 3.1246279418468474, |
| "epoch": 0.794, |
| "grad_norm": 19.267832695141472, |
| "kl_loss_13": 3083.6, |
| "kl_loss_26": 2164.8, |
| "kl_loss_39": 1176.3, |
| "kl_loss_7": 3624.4, |
| "learning_rate": 0.00010308197990669538, |
| "loss": 5026.95, |
| "step": 7940 |
| }, |
| { |
| "ce_loss_13": 2.901643234491348, |
| "ce_loss_26": 2.4638597697019575, |
| "ce_loss_39": 1.9964057832956315, |
| "ce_loss_52": 1.4079250425100327, |
| "ce_loss_7": 3.1619919717311857, |
| "epoch": 0.795, |
| "grad_norm": 19.24818590589668, |
| "kl_loss_13": 3123.2, |
| "kl_loss_26": 2199.2, |
| "kl_loss_39": 1202.4, |
| "kl_loss_7": 3666.4, |
| "learning_rate": 0.0001021190797345839, |
| "loss": 5013.5, |
| "step": 7950 |
| }, |
| { |
| "ce_loss_13": 2.967918246984482, |
| "ce_loss_26": 2.533867511153221, |
| "ce_loss_39": 2.069682791829109, |
| "ce_loss_52": 1.4818589851260184, |
| "ce_loss_7": 3.222521889209747, |
| "epoch": 0.796, |
| "grad_norm": 19.75622423793478, |
| "kl_loss_13": 3058.8, |
| "kl_loss_26": 2155.2, |
| "kl_loss_39": 1175.7, |
| "kl_loss_7": 3592.0, |
| "learning_rate": 0.00010116018621892236, |
| "loss": 5009.6, |
| "step": 7960 |
| }, |
| { |
| "ce_loss_13": 2.8812991797924044, |
| "ce_loss_26": 2.4400356858968735, |
| "ce_loss_39": 1.9724171191453934, |
| "ce_loss_52": 1.4085147365927697, |
| "ce_loss_7": 3.1415765941143037, |
| "epoch": 0.797, |
| "grad_norm": 19.13497167869677, |
| "kl_loss_13": 3057.6, |
| "kl_loss_26": 2142.8, |
| "kl_loss_39": 1159.2, |
| "kl_loss_7": 3602.8, |
| "learning_rate": 0.00010020530901575753, |
| "loss": 5020.4, |
| "step": 7970 |
| }, |
| { |
| "ce_loss_13": 2.904066652059555, |
| "ce_loss_26": 2.4755689650774, |
| "ce_loss_39": 2.0152323603630067, |
| "ce_loss_52": 1.4373595044016838, |
| "ce_loss_7": 3.1696866393089294, |
| "epoch": 0.798, |
| "grad_norm": 20.01532447536976, |
| "kl_loss_13": 3044.0, |
| "kl_loss_26": 2150.0, |
| "kl_loss_39": 1170.5, |
| "kl_loss_7": 3601.6, |
| "learning_rate": 9.925445774069231e-05, |
| "loss": 5018.45, |
| "step": 7980 |
| }, |
| { |
| "ce_loss_13": 2.9288057029247283, |
| "ce_loss_26": 2.4822009325027468, |
| "ce_loss_39": 2.011521789431572, |
| "ce_loss_52": 1.4153838574886322, |
| "ce_loss_7": 3.1911131501197816, |
| "epoch": 0.799, |
| "grad_norm": 19.205851361122782, |
| "kl_loss_13": 3126.0, |
| "kl_loss_26": 2203.4, |
| "kl_loss_39": 1210.7, |
| "kl_loss_7": 3672.4, |
| "learning_rate": 9.830764196878872e-05, |
| "loss": 5069.4, |
| "step": 7990 |
| }, |
| { |
| "ce_loss_13": 2.9970316886901855, |
| "ce_loss_26": 2.5632322430610657, |
| "ce_loss_39": 2.089646649360657, |
| "ce_loss_52": 1.4764455169439317, |
| "ce_loss_7": 3.2621945440769196, |
| "epoch": 0.8, |
| "grad_norm": 18.817128068716947, |
| "kl_loss_13": 3164.4, |
| "kl_loss_26": 2259.8, |
| "kl_loss_39": 1248.2, |
| "kl_loss_7": 3716.4, |
| "learning_rate": 9.736487123447069e-05, |
| "loss": 5026.75, |
| "step": 8000 |
| }, |
| { |
| "ce_loss_13": 2.955092731118202, |
| "ce_loss_26": 2.5211679935455322, |
| "ce_loss_39": 2.061183473467827, |
| "ce_loss_52": 1.4790756076574325, |
| "ce_loss_7": 3.211963188648224, |
| "epoch": 0.801, |
| "grad_norm": 19.13731613570068, |
| "kl_loss_13": 3058.4, |
| "kl_loss_26": 2157.0, |
| "kl_loss_39": 1183.6, |
| "kl_loss_7": 3589.2, |
| "learning_rate": 9.642615503142926e-05, |
| "loss": 5013.8, |
| "step": 8010 |
| }, |
| { |
| "ce_loss_13": 2.8814174115657805, |
| "ce_loss_26": 2.451471582055092, |
| "ce_loss_39": 1.9925315648317337, |
| "ce_loss_52": 1.4264169082045555, |
| "ce_loss_7": 3.150370055437088, |
| "epoch": 0.802, |
| "grad_norm": 19.95644855086655, |
| "kl_loss_13": 3015.2, |
| "kl_loss_26": 2114.4, |
| "kl_loss_39": 1144.7, |
| "kl_loss_7": 3572.8, |
| "learning_rate": 9.549150281252633e-05, |
| "loss": 5066.0, |
| "step": 8020 |
| }, |
| { |
| "ce_loss_13": 2.9213546574115754, |
| "ce_loss_26": 2.488295114040375, |
| "ce_loss_39": 2.0217175424098968, |
| "ce_loss_52": 1.448304545879364, |
| "ce_loss_7": 3.177370023727417, |
| "epoch": 0.803, |
| "grad_norm": 19.167189329215354, |
| "kl_loss_13": 3046.0, |
| "kl_loss_26": 2140.4, |
| "kl_loss_39": 1161.6, |
| "kl_loss_7": 3582.8, |
| "learning_rate": 9.4560923989699e-05, |
| "loss": 5040.7, |
| "step": 8030 |
| }, |
| { |
| "ce_loss_13": 2.8780395865440367, |
| "ce_loss_26": 2.43271960914135, |
| "ce_loss_39": 1.966169360280037, |
| "ce_loss_52": 1.3888942331075669, |
| "ce_loss_7": 3.1494656085968016, |
| "epoch": 0.804, |
| "grad_norm": 18.853764898775175, |
| "kl_loss_13": 3103.6, |
| "kl_loss_26": 2173.0, |
| "kl_loss_39": 1171.5, |
| "kl_loss_7": 3672.8, |
| "learning_rate": 9.363442793386607e-05, |
| "loss": 5021.25, |
| "step": 8040 |
| }, |
| { |
| "ce_loss_13": 2.9130735754966737, |
| "ce_loss_26": 2.480276498198509, |
| "ce_loss_39": 2.0196522653102873, |
| "ce_loss_52": 1.4448804795742034, |
| "ce_loss_7": 3.1761886417865752, |
| "epoch": 0.805, |
| "grad_norm": 18.76619687601556, |
| "kl_loss_13": 3048.8, |
| "kl_loss_26": 2147.8, |
| "kl_loss_39": 1165.5, |
| "kl_loss_7": 3590.8, |
| "learning_rate": 9.271202397483213e-05, |
| "loss": 4983.8, |
| "step": 8050 |
| }, |
| { |
| "ce_loss_13": 2.960085618495941, |
| "ce_loss_26": 2.516904118657112, |
| "ce_loss_39": 2.0490771383047104, |
| "ce_loss_52": 1.4702347993850708, |
| "ce_loss_7": 3.2195077538490295, |
| "epoch": 0.806, |
| "grad_norm": 20.341613718883853, |
| "kl_loss_13": 3080.0, |
| "kl_loss_26": 2163.0, |
| "kl_loss_39": 1179.4, |
| "kl_loss_7": 3617.6, |
| "learning_rate": 9.179372140119524e-05, |
| "loss": 5044.25, |
| "step": 8060 |
| }, |
| { |
| "ce_loss_13": 2.895679956674576, |
| "ce_loss_26": 2.447220724821091, |
| "ce_loss_39": 1.978733304142952, |
| "ce_loss_52": 1.4091844826936721, |
| "ce_loss_7": 3.1559928357601166, |
| "epoch": 0.807, |
| "grad_norm": 19.648457937725187, |
| "kl_loss_13": 3088.0, |
| "kl_loss_26": 2160.6, |
| "kl_loss_39": 1172.1, |
| "kl_loss_7": 3631.6, |
| "learning_rate": 9.087952946025175e-05, |
| "loss": 5019.35, |
| "step": 8070 |
| }, |
| { |
| "ce_loss_13": 2.8867613554000853, |
| "ce_loss_26": 2.4542810022830963, |
| "ce_loss_39": 1.997492003440857, |
| "ce_loss_52": 1.4237906068563462, |
| "ce_loss_7": 3.1490099489688874, |
| "epoch": 0.808, |
| "grad_norm": 19.108440716050485, |
| "kl_loss_13": 3031.2, |
| "kl_loss_26": 2132.0, |
| "kl_loss_39": 1161.2, |
| "kl_loss_7": 3573.2, |
| "learning_rate": 8.996945735790446e-05, |
| "loss": 5081.55, |
| "step": 8080 |
| }, |
| { |
| "ce_loss_13": 2.903727024793625, |
| "ce_loss_26": 2.4667694687843325, |
| "ce_loss_39": 2.0024666130542754, |
| "ce_loss_52": 1.4252464413642882, |
| "ce_loss_7": 3.1627338111400602, |
| "epoch": 0.809, |
| "grad_norm": 19.45516980399187, |
| "kl_loss_13": 3065.2, |
| "kl_loss_26": 2156.8, |
| "kl_loss_39": 1173.6, |
| "kl_loss_7": 3607.6, |
| "learning_rate": 8.906351425856951e-05, |
| "loss": 5032.55, |
| "step": 8090 |
| }, |
| { |
| "ce_loss_13": 2.9971861839294434, |
| "ce_loss_26": 2.558201992511749, |
| "ce_loss_39": 2.0943466246128084, |
| "ce_loss_52": 1.5071399331092834, |
| "ce_loss_7": 3.2508726358413695, |
| "epoch": 0.81, |
| "grad_norm": 18.578032258449234, |
| "kl_loss_13": 3080.0, |
| "kl_loss_26": 2167.8, |
| "kl_loss_39": 1188.5, |
| "kl_loss_7": 3612.8, |
| "learning_rate": 8.816170928508365e-05, |
| "loss": 5060.5, |
| "step": 8100 |
| }, |
| { |
| "ce_loss_13": 2.9514600038528442, |
| "ce_loss_26": 2.511053240299225, |
| "ce_loss_39": 2.036428925395012, |
| "ce_loss_52": 1.4632433116436006, |
| "ce_loss_7": 3.2164508640766143, |
| "epoch": 0.811, |
| "grad_norm": 19.46007345669389, |
| "kl_loss_13": 3077.6, |
| "kl_loss_26": 2160.6, |
| "kl_loss_39": 1164.9, |
| "kl_loss_7": 3629.6, |
| "learning_rate": 8.7264051518613e-05, |
| "loss": 5036.75, |
| "step": 8110 |
| }, |
| { |
| "ce_loss_13": 2.8350981414318084, |
| "ce_loss_26": 2.3989670783281327, |
| "ce_loss_39": 1.9394948929548264, |
| "ce_loss_52": 1.385464173555374, |
| "ce_loss_7": 3.0943558514118195, |
| "epoch": 0.812, |
| "grad_norm": 20.429118003347835, |
| "kl_loss_13": 3019.6, |
| "kl_loss_26": 2118.2, |
| "kl_loss_39": 1139.6, |
| "kl_loss_7": 3559.6, |
| "learning_rate": 8.637054999856148e-05, |
| "loss": 5033.2, |
| "step": 8120 |
| }, |
| { |
| "ce_loss_13": 2.956312870979309, |
| "ce_loss_26": 2.510516768693924, |
| "ce_loss_39": 2.0456418454647065, |
| "ce_loss_52": 1.454368954896927, |
| "ce_loss_7": 3.2136961221694946, |
| "epoch": 0.813, |
| "grad_norm": 19.290479115918554, |
| "kl_loss_13": 3110.4, |
| "kl_loss_26": 2193.4, |
| "kl_loss_39": 1206.0, |
| "kl_loss_7": 3651.6, |
| "learning_rate": 8.548121372247918e-05, |
| "loss": 5042.35, |
| "step": 8130 |
| }, |
| { |
| "ce_loss_13": 2.8997408151626587, |
| "ce_loss_26": 2.4578535914421082, |
| "ce_loss_39": 1.9877970427274705, |
| "ce_loss_52": 1.4171391934156419, |
| "ce_loss_7": 3.1653897404670714, |
| "epoch": 0.814, |
| "grad_norm": 19.231104242907662, |
| "kl_loss_13": 3062.4, |
| "kl_loss_26": 2154.2, |
| "kl_loss_39": 1161.8, |
| "kl_loss_7": 3620.0, |
| "learning_rate": 8.459605164597267e-05, |
| "loss": 4990.3, |
| "step": 8140 |
| }, |
| { |
| "ce_loss_13": 2.8967735528945924, |
| "ce_loss_26": 2.462614360451698, |
| "ce_loss_39": 1.9984548151493073, |
| "ce_loss_52": 1.438458850979805, |
| "ce_loss_7": 3.1581345558166505, |
| "epoch": 0.815, |
| "grad_norm": 19.45006377816174, |
| "kl_loss_13": 3056.8, |
| "kl_loss_26": 2153.0, |
| "kl_loss_39": 1160.6, |
| "kl_loss_7": 3602.0, |
| "learning_rate": 8.371507268261436e-05, |
| "loss": 4980.2, |
| "step": 8150 |
| }, |
| { |
| "ce_loss_13": 2.936253345012665, |
| "ce_loss_26": 2.5057778120040894, |
| "ce_loss_39": 2.038064029812813, |
| "ce_loss_52": 1.4622955560684203, |
| "ce_loss_7": 3.1979693949222563, |
| "epoch": 0.816, |
| "grad_norm": 18.91111939228637, |
| "kl_loss_13": 3060.8, |
| "kl_loss_26": 2153.2, |
| "kl_loss_39": 1175.6, |
| "kl_loss_7": 3612.0, |
| "learning_rate": 8.283828570385238e-05, |
| "loss": 5006.35, |
| "step": 8160 |
| }, |
| { |
| "ce_loss_13": 2.9529894649982453, |
| "ce_loss_26": 2.5044034361839294, |
| "ce_loss_39": 2.042719992995262, |
| "ce_loss_52": 1.4732781440019607, |
| "ce_loss_7": 3.2085989713668823, |
| "epoch": 0.817, |
| "grad_norm": 18.881507142327827, |
| "kl_loss_13": 3068.4, |
| "kl_loss_26": 2134.6, |
| "kl_loss_39": 1154.6, |
| "kl_loss_7": 3608.0, |
| "learning_rate": 8.196569953892202e-05, |
| "loss": 5023.2, |
| "step": 8170 |
| }, |
| { |
| "ce_loss_13": 2.901773339509964, |
| "ce_loss_26": 2.46816024184227, |
| "ce_loss_39": 2.0061379730701447, |
| "ce_loss_52": 1.4472751855850219, |
| "ce_loss_7": 3.162723332643509, |
| "epoch": 0.818, |
| "grad_norm": 19.58512082927694, |
| "kl_loss_13": 3021.6, |
| "kl_loss_26": 2116.2, |
| "kl_loss_39": 1138.3, |
| "kl_loss_7": 3567.6, |
| "learning_rate": 8.109732297475635e-05, |
| "loss": 5011.1, |
| "step": 8180 |
| }, |
| { |
| "ce_loss_13": 2.9283832788467405, |
| "ce_loss_26": 2.495754861831665, |
| "ce_loss_39": 2.0315854638814925, |
| "ce_loss_52": 1.4544063314795495, |
| "ce_loss_7": 3.184633868932724, |
| "epoch": 0.819, |
| "grad_norm": 20.338540288781616, |
| "kl_loss_13": 3056.0, |
| "kl_loss_26": 2152.0, |
| "kl_loss_39": 1164.7, |
| "kl_loss_7": 3604.8, |
| "learning_rate": 8.023316475589754e-05, |
| "loss": 4985.65, |
| "step": 8190 |
| }, |
| { |
| "ce_loss_13": 2.8736020922660828, |
| "ce_loss_26": 2.4326390773057938, |
| "ce_loss_39": 1.9687805682420731, |
| "ce_loss_52": 1.4114506781101226, |
| "ce_loss_7": 3.1362832963466643, |
| "epoch": 0.82, |
| "grad_norm": 19.348124098647066, |
| "kl_loss_13": 3039.6, |
| "kl_loss_26": 2118.4, |
| "kl_loss_39": 1131.5, |
| "kl_loss_7": 3592.4, |
| "learning_rate": 7.937323358440934e-05, |
| "loss": 4999.05, |
| "step": 8200 |
| }, |
| { |
| "ce_loss_13": 2.9405028223991394, |
| "ce_loss_26": 2.5053980708122254, |
| "ce_loss_39": 2.031425711512566, |
| "ce_loss_52": 1.4602935075759889, |
| "ce_loss_7": 3.1905817687511444, |
| "epoch": 0.821, |
| "grad_norm": 19.308929525960306, |
| "kl_loss_13": 3053.6, |
| "kl_loss_26": 2142.8, |
| "kl_loss_39": 1164.5, |
| "kl_loss_7": 3580.4, |
| "learning_rate": 7.851753811978923e-05, |
| "loss": 5013.6, |
| "step": 8210 |
| }, |
| { |
| "ce_loss_13": 2.8261436820030212, |
| "ce_loss_26": 2.396569001674652, |
| "ce_loss_39": 1.9376911997795105, |
| "ce_loss_52": 1.3843101486563683, |
| "ce_loss_7": 3.08916922211647, |
| "epoch": 0.822, |
| "grad_norm": 18.760581120890944, |
| "kl_loss_13": 2979.6, |
| "kl_loss_26": 2084.0, |
| "kl_loss_39": 1118.1, |
| "kl_loss_7": 3526.4, |
| "learning_rate": 7.766608697888095e-05, |
| "loss": 4996.05, |
| "step": 8220 |
| }, |
| { |
| "ce_loss_13": 2.895614618062973, |
| "ce_loss_26": 2.4538383156061174, |
| "ce_loss_39": 1.9913074195384979, |
| "ce_loss_52": 1.4129166051745414, |
| "ce_loss_7": 3.156418579816818, |
| "epoch": 0.823, |
| "grad_norm": 19.448137234461008, |
| "kl_loss_13": 3094.8, |
| "kl_loss_26": 2171.8, |
| "kl_loss_39": 1179.4, |
| "kl_loss_7": 3644.0, |
| "learning_rate": 7.681888873578785e-05, |
| "loss": 5010.15, |
| "step": 8230 |
| }, |
| { |
| "ce_loss_13": 2.8753804206848144, |
| "ce_loss_26": 2.448589825630188, |
| "ce_loss_39": 1.9881916165351867, |
| "ce_loss_52": 1.433535772562027, |
| "ce_loss_7": 3.133152514696121, |
| "epoch": 0.824, |
| "grad_norm": 19.780732761004185, |
| "kl_loss_13": 2998.8, |
| "kl_loss_26": 2105.2, |
| "kl_loss_39": 1137.2, |
| "kl_loss_7": 3529.6, |
| "learning_rate": 7.597595192178702e-05, |
| "loss": 4951.6, |
| "step": 8240 |
| }, |
| { |
| "ce_loss_13": 2.874552935361862, |
| "ce_loss_26": 2.426975393295288, |
| "ce_loss_39": 1.969171154499054, |
| "ce_loss_52": 1.4067743465304374, |
| "ce_loss_7": 3.1344926774501802, |
| "epoch": 0.825, |
| "grad_norm": 19.081018400834456, |
| "kl_loss_13": 3050.0, |
| "kl_loss_26": 2129.2, |
| "kl_loss_39": 1153.6, |
| "kl_loss_7": 3590.8, |
| "learning_rate": 7.513728502524286e-05, |
| "loss": 4924.6, |
| "step": 8250 |
| }, |
| { |
| "ce_loss_13": 2.8894054651260377, |
| "ce_loss_26": 2.4550040304660796, |
| "ce_loss_39": 1.9873575389385223, |
| "ce_loss_52": 1.417596697807312, |
| "ce_loss_7": 3.152091747522354, |
| "epoch": 0.826, |
| "grad_norm": 19.8137268983455, |
| "kl_loss_13": 3043.6, |
| "kl_loss_26": 2138.6, |
| "kl_loss_39": 1152.9, |
| "kl_loss_7": 3588.0, |
| "learning_rate": 7.430289649152156e-05, |
| "loss": 5032.7, |
| "step": 8260 |
| }, |
| { |
| "ce_loss_13": 2.9286361813545225, |
| "ce_loss_26": 2.500110092759132, |
| "ce_loss_39": 2.0343170315027237, |
| "ce_loss_52": 1.4783238634467124, |
| "ce_loss_7": 3.186429667472839, |
| "epoch": 0.827, |
| "grad_norm": 19.162288533319998, |
| "kl_loss_13": 3004.0, |
| "kl_loss_26": 2105.8, |
| "kl_loss_39": 1135.2, |
| "kl_loss_7": 3537.2, |
| "learning_rate": 7.347279472290646e-05, |
| "loss": 4997.95, |
| "step": 8270 |
| }, |
| { |
| "ce_loss_13": 2.8661180198192597, |
| "ce_loss_26": 2.4267468631267546, |
| "ce_loss_39": 1.9584647029638291, |
| "ce_loss_52": 1.3965127140283584, |
| "ce_loss_7": 3.1264285147190094, |
| "epoch": 0.828, |
| "grad_norm": 18.88865775629702, |
| "kl_loss_13": 3042.4, |
| "kl_loss_26": 2132.2, |
| "kl_loss_39": 1143.9, |
| "kl_loss_7": 3592.0, |
| "learning_rate": 7.264698807851328e-05, |
| "loss": 4945.25, |
| "step": 8280 |
| }, |
| { |
| "ce_loss_13": 2.963280272483826, |
| "ce_loss_26": 2.5102931052446364, |
| "ce_loss_39": 2.034750634431839, |
| "ce_loss_52": 1.4600775420665741, |
| "ce_loss_7": 3.2234760582447053, |
| "epoch": 0.829, |
| "grad_norm": 18.537963317549014, |
| "kl_loss_13": 3144.4, |
| "kl_loss_26": 2206.8, |
| "kl_loss_39": 1191.9, |
| "kl_loss_7": 3687.6, |
| "learning_rate": 7.182548487420554e-05, |
| "loss": 5033.4, |
| "step": 8290 |
| }, |
| { |
| "ce_loss_13": 2.9992467045783995, |
| "ce_loss_26": 2.55620219707489, |
| "ce_loss_39": 2.0874054759740828, |
| "ce_loss_52": 1.4909266605973244, |
| "ce_loss_7": 3.263571548461914, |
| "epoch": 0.83, |
| "grad_norm": 18.891053458181357, |
| "kl_loss_13": 3132.0, |
| "kl_loss_26": 2207.0, |
| "kl_loss_39": 1207.0, |
| "kl_loss_7": 3681.2, |
| "learning_rate": 7.100829338251146e-05, |
| "loss": 5053.05, |
| "step": 8300 |
| }, |
| { |
| "ce_loss_13": 2.9434437096118926, |
| "ce_loss_26": 2.5084387719631196, |
| "ce_loss_39": 2.0438412368297576, |
| "ce_loss_52": 1.455627703666687, |
| "ce_loss_7": 3.203293949365616, |
| "epoch": 0.831, |
| "grad_norm": 18.63506341583337, |
| "kl_loss_13": 3080.0, |
| "kl_loss_26": 2173.6, |
| "kl_loss_39": 1196.0, |
| "kl_loss_7": 3632.8, |
| "learning_rate": 7.019542183254046e-05, |
| "loss": 5020.25, |
| "step": 8310 |
| }, |
| { |
| "ce_loss_13": 2.9150700867176056, |
| "ce_loss_26": 2.4821571350097655, |
| "ce_loss_39": 2.011009243130684, |
| "ce_loss_52": 1.4295171827077866, |
| "ce_loss_7": 3.1743992388248445, |
| "epoch": 0.832, |
| "grad_norm": 19.224604457055225, |
| "kl_loss_13": 3061.6, |
| "kl_loss_26": 2156.4, |
| "kl_loss_39": 1178.0, |
| "kl_loss_7": 3603.6, |
| "learning_rate": 6.938687840989971e-05, |
| "loss": 4998.8, |
| "step": 8320 |
| }, |
| { |
| "ce_loss_13": 2.9248284220695497, |
| "ce_loss_26": 2.4842200338840486, |
| "ce_loss_39": 2.016203221678734, |
| "ce_loss_52": 1.4306745767593383, |
| "ce_loss_7": 3.1828024327754973, |
| "epoch": 0.833, |
| "grad_norm": 21.39912114954264, |
| "kl_loss_13": 3084.0, |
| "kl_loss_26": 2177.0, |
| "kl_loss_39": 1194.5, |
| "kl_loss_7": 3630.0, |
| "learning_rate": 6.858267125661271e-05, |
| "loss": 5022.85, |
| "step": 8330 |
| }, |
| { |
| "ce_loss_13": 2.8756704360246657, |
| "ce_loss_26": 2.447014120221138, |
| "ce_loss_39": 1.9950514793395997, |
| "ce_loss_52": 1.4190126568078996, |
| "ce_loss_7": 3.135387209057808, |
| "epoch": 0.834, |
| "grad_norm": 19.16865484866817, |
| "kl_loss_13": 3030.8, |
| "kl_loss_26": 2137.8, |
| "kl_loss_39": 1174.0, |
| "kl_loss_7": 3584.4, |
| "learning_rate": 6.778280847103668e-05, |
| "loss": 5009.55, |
| "step": 8340 |
| }, |
| { |
| "ce_loss_13": 2.8521511554718018, |
| "ce_loss_26": 2.415205565094948, |
| "ce_loss_39": 1.9484322667121887, |
| "ce_loss_52": 1.3919154837727548, |
| "ce_loss_7": 3.11352881193161, |
| "epoch": 0.835, |
| "grad_norm": 19.565513612829772, |
| "kl_loss_13": 3035.6, |
| "kl_loss_26": 2128.8, |
| "kl_loss_39": 1129.1, |
| "kl_loss_7": 3584.4, |
| "learning_rate": 6.698729810778065e-05, |
| "loss": 4997.8, |
| "step": 8350 |
| }, |
| { |
| "ce_loss_13": 2.9550336360931397, |
| "ce_loss_26": 2.516478735208511, |
| "ce_loss_39": 2.0486765056848526, |
| "ce_loss_52": 1.4607697233557702, |
| "ce_loss_7": 3.2147393763065337, |
| "epoch": 0.836, |
| "grad_norm": 19.2117514895061, |
| "kl_loss_13": 3091.2, |
| "kl_loss_26": 2181.2, |
| "kl_loss_39": 1192.7, |
| "kl_loss_7": 3642.8, |
| "learning_rate": 6.619614817762538e-05, |
| "loss": 4980.45, |
| "step": 8360 |
| }, |
| { |
| "ce_loss_13": 2.8862642347812653, |
| "ce_loss_26": 2.453189605474472, |
| "ce_loss_39": 1.9996996372938156, |
| "ce_loss_52": 1.42118998169899, |
| "ce_loss_7": 3.1397067666053773, |
| "epoch": 0.837, |
| "grad_norm": 19.74025196121118, |
| "kl_loss_13": 3027.2, |
| "kl_loss_26": 2130.4, |
| "kl_loss_39": 1163.7, |
| "kl_loss_7": 3555.6, |
| "learning_rate": 6.540936664744196e-05, |
| "loss": 5003.55, |
| "step": 8370 |
| }, |
| { |
| "ce_loss_13": 2.8884230494499206, |
| "ce_loss_26": 2.461115485429764, |
| "ce_loss_39": 1.996484610438347, |
| "ce_loss_52": 1.431185284256935, |
| "ce_loss_7": 3.1595689237117766, |
| "epoch": 0.838, |
| "grad_norm": 18.91687230970295, |
| "kl_loss_13": 3028.0, |
| "kl_loss_26": 2134.2, |
| "kl_loss_39": 1162.1, |
| "kl_loss_7": 3585.2, |
| "learning_rate": 6.462696144011149e-05, |
| "loss": 4978.95, |
| "step": 8380 |
| }, |
| { |
| "ce_loss_13": 2.9092856884002685, |
| "ce_loss_26": 2.477367341518402, |
| "ce_loss_39": 2.0064304888248445, |
| "ce_loss_52": 1.4392234981060028, |
| "ce_loss_7": 3.166864866018295, |
| "epoch": 0.839, |
| "grad_norm": 18.987849334165656, |
| "kl_loss_13": 3037.2, |
| "kl_loss_26": 2130.0, |
| "kl_loss_39": 1148.3, |
| "kl_loss_7": 3579.6, |
| "learning_rate": 6.384894043444567e-05, |
| "loss": 4978.1, |
| "step": 8390 |
| }, |
| { |
| "ce_loss_13": 2.926213449239731, |
| "ce_loss_26": 2.4814498484134675, |
| "ce_loss_39": 2.011722648143768, |
| "ce_loss_52": 1.4253545701503754, |
| "ce_loss_7": 3.1928284585475923, |
| "epoch": 0.84, |
| "grad_norm": 18.67190019337169, |
| "kl_loss_13": 3118.4, |
| "kl_loss_26": 2194.4, |
| "kl_loss_39": 1203.2, |
| "kl_loss_7": 3673.2, |
| "learning_rate": 6.307531146510753e-05, |
| "loss": 4975.3, |
| "step": 8400 |
| }, |
| { |
| "ce_loss_13": 2.9474796772003176, |
| "ce_loss_26": 2.513693606853485, |
| "ce_loss_39": 2.039980337023735, |
| "ce_loss_52": 1.4588691473007203, |
| "ce_loss_7": 3.208155167102814, |
| "epoch": 0.841, |
| "grad_norm": 19.23898472020216, |
| "kl_loss_13": 3091.2, |
| "kl_loss_26": 2176.0, |
| "kl_loss_39": 1186.3, |
| "kl_loss_7": 3638.8, |
| "learning_rate": 6.230608232253226e-05, |
| "loss": 4972.8, |
| "step": 8410 |
| }, |
| { |
| "ce_loss_13": 2.98299777507782, |
| "ce_loss_26": 2.546931451559067, |
| "ce_loss_39": 2.066228356957436, |
| "ce_loss_52": 1.4528164565563202, |
| "ce_loss_7": 3.2408275246620177, |
| "epoch": 0.842, |
| "grad_norm": 19.590505544042042, |
| "kl_loss_13": 3177.6, |
| "kl_loss_26": 2260.2, |
| "kl_loss_39": 1246.2, |
| "kl_loss_7": 3722.8, |
| "learning_rate": 6.154126075284855e-05, |
| "loss": 5025.9, |
| "step": 8420 |
| }, |
| { |
| "ce_loss_13": 2.8128190338611603, |
| "ce_loss_26": 2.3779567658901213, |
| "ce_loss_39": 1.922488284111023, |
| "ce_loss_52": 1.3714163228869438, |
| "ce_loss_7": 3.0708466947078703, |
| "epoch": 0.843, |
| "grad_norm": 19.244313422130332, |
| "kl_loss_13": 3014.4, |
| "kl_loss_26": 2105.4, |
| "kl_loss_39": 1130.8, |
| "kl_loss_7": 3552.8, |
| "learning_rate": 6.078085445780129e-05, |
| "loss": 5004.75, |
| "step": 8430 |
| }, |
| { |
| "ce_loss_13": 2.938475805521011, |
| "ce_loss_26": 2.497309777140617, |
| "ce_loss_39": 2.027024504542351, |
| "ce_loss_52": 1.4460827559232712, |
| "ce_loss_7": 3.194866645336151, |
| "epoch": 0.844, |
| "grad_norm": 18.715084502177618, |
| "kl_loss_13": 3086.4, |
| "kl_loss_26": 2168.0, |
| "kl_loss_39": 1180.4, |
| "kl_loss_7": 3630.0, |
| "learning_rate": 6.002487109467347e-05, |
| "loss": 5005.05, |
| "step": 8440 |
| }, |
| { |
| "ce_loss_13": 2.940459841489792, |
| "ce_loss_26": 2.5169106662273406, |
| "ce_loss_39": 2.0435358375310897, |
| "ce_loss_52": 1.4713785827159882, |
| "ce_loss_7": 3.199807566404343, |
| "epoch": 0.845, |
| "grad_norm": 20.662636581778713, |
| "kl_loss_13": 3050.8, |
| "kl_loss_26": 2152.2, |
| "kl_loss_39": 1166.1, |
| "kl_loss_7": 3588.8, |
| "learning_rate": 5.927331827620902e-05, |
| "loss": 5015.45, |
| "step": 8450 |
| }, |
| { |
| "ce_loss_13": 2.8635286152362824, |
| "ce_loss_26": 2.426770511269569, |
| "ce_loss_39": 1.9639566600322724, |
| "ce_loss_52": 1.4056006461381911, |
| "ce_loss_7": 3.126782363653183, |
| "epoch": 0.846, |
| "grad_norm": 19.43955116705752, |
| "kl_loss_13": 3026.8, |
| "kl_loss_26": 2123.4, |
| "kl_loss_39": 1139.5, |
| "kl_loss_7": 3581.2, |
| "learning_rate": 5.852620357053651e-05, |
| "loss": 4930.5, |
| "step": 8460 |
| }, |
| { |
| "ce_loss_13": 2.967438644170761, |
| "ce_loss_26": 2.5302784025669096, |
| "ce_loss_39": 2.0548608988523482, |
| "ce_loss_52": 1.456875516474247, |
| "ce_loss_7": 3.232648569345474, |
| "epoch": 0.847, |
| "grad_norm": 18.917933547815785, |
| "kl_loss_13": 3141.6, |
| "kl_loss_26": 2229.0, |
| "kl_loss_39": 1221.3, |
| "kl_loss_7": 3698.0, |
| "learning_rate": 5.778353450109286e-05, |
| "loss": 5049.2, |
| "step": 8470 |
| }, |
| { |
| "ce_loss_13": 2.8490840077400206, |
| "ce_loss_26": 2.4299704492092133, |
| "ce_loss_39": 1.9720161318778993, |
| "ce_loss_52": 1.428696632385254, |
| "ce_loss_7": 3.1050261557102203, |
| "epoch": 0.848, |
| "grad_norm": 19.028513845565772, |
| "kl_loss_13": 2946.4, |
| "kl_loss_26": 2077.0, |
| "kl_loss_39": 1109.1, |
| "kl_loss_7": 3480.0, |
| "learning_rate": 5.7045318546547206e-05, |
| "loss": 4964.25, |
| "step": 8480 |
| }, |
| { |
| "ce_loss_13": 2.91025772690773, |
| "ce_loss_26": 2.470404103398323, |
| "ce_loss_39": 1.9989687472581863, |
| "ce_loss_52": 1.4330752968788147, |
| "ce_loss_7": 3.1751048266887665, |
| "epoch": 0.849, |
| "grad_norm": 18.63333546431516, |
| "kl_loss_13": 3053.6, |
| "kl_loss_26": 2137.8, |
| "kl_loss_39": 1151.7, |
| "kl_loss_7": 3603.6, |
| "learning_rate": 5.631156314072605e-05, |
| "loss": 4997.6, |
| "step": 8490 |
| }, |
| { |
| "ce_loss_13": 2.969731491804123, |
| "ce_loss_26": 2.5224834442138673, |
| "ce_loss_39": 2.043315088748932, |
| "ce_loss_52": 1.4606564939022064, |
| "ce_loss_7": 3.235535615682602, |
| "epoch": 0.85, |
| "grad_norm": 19.13874159874534, |
| "kl_loss_13": 3116.0, |
| "kl_loss_26": 2192.8, |
| "kl_loss_39": 1187.7, |
| "kl_loss_7": 3671.6, |
| "learning_rate": 5.5582275672538315e-05, |
| "loss": 4973.7, |
| "step": 8500 |
| }, |
| { |
| "ce_loss_13": 2.9457097470760347, |
| "ce_loss_26": 2.5094266653060915, |
| "ce_loss_39": 2.037208506464958, |
| "ce_loss_52": 1.4461605846881866, |
| "ce_loss_7": 3.2044992685317992, |
| "epoch": 0.851, |
| "grad_norm": 19.084085332754032, |
| "kl_loss_13": 3088.0, |
| "kl_loss_26": 2174.4, |
| "kl_loss_39": 1186.9, |
| "kl_loss_7": 3629.2, |
| "learning_rate": 5.4857463485900484e-05, |
| "loss": 4979.7, |
| "step": 8510 |
| }, |
| { |
| "ce_loss_13": 2.932442033290863, |
| "ce_loss_26": 2.4945150196552275, |
| "ce_loss_39": 2.024565789103508, |
| "ce_loss_52": 1.4535668522119523, |
| "ce_loss_7": 3.1925411999225615, |
| "epoch": 0.852, |
| "grad_norm": 18.598717511449827, |
| "kl_loss_13": 3049.2, |
| "kl_loss_26": 2138.2, |
| "kl_loss_39": 1163.5, |
| "kl_loss_7": 3596.0, |
| "learning_rate": 5.413713387966329e-05, |
| "loss": 4976.35, |
| "step": 8520 |
| }, |
| { |
| "ce_loss_13": 2.850284093618393, |
| "ce_loss_26": 2.4304546415805817, |
| "ce_loss_39": 1.9769367069005965, |
| "ce_loss_52": 1.4221994251012802, |
| "ce_loss_7": 3.106715601682663, |
| "epoch": 0.853, |
| "grad_norm": 19.800943553162387, |
| "kl_loss_13": 2971.2, |
| "kl_loss_26": 2092.8, |
| "kl_loss_39": 1135.0, |
| "kl_loss_7": 3509.2, |
| "learning_rate": 5.34212941075381e-05, |
| "loss": 4969.3, |
| "step": 8530 |
| }, |
| { |
| "ce_loss_13": 2.8934908270835877, |
| "ce_loss_26": 2.4596606254577638, |
| "ce_loss_39": 2.00773600935936, |
| "ce_loss_52": 1.4613569289445878, |
| "ce_loss_7": 3.150895756483078, |
| "epoch": 0.854, |
| "grad_norm": 18.83825612066199, |
| "kl_loss_13": 3003.6, |
| "kl_loss_26": 2097.4, |
| "kl_loss_39": 1124.7, |
| "kl_loss_7": 3544.0, |
| "learning_rate": 5.270995137802315e-05, |
| "loss": 4942.95, |
| "step": 8540 |
| }, |
| { |
| "ce_loss_13": 2.9037817120552063, |
| "ce_loss_26": 2.462430712580681, |
| "ce_loss_39": 1.9962077885866165, |
| "ce_loss_52": 1.425583516061306, |
| "ce_loss_7": 3.173860079050064, |
| "epoch": 0.855, |
| "grad_norm": 18.951683103960118, |
| "kl_loss_13": 3081.6, |
| "kl_loss_26": 2158.2, |
| "kl_loss_39": 1162.8, |
| "kl_loss_7": 3640.8, |
| "learning_rate": 5.2003112854332125e-05, |
| "loss": 4931.9, |
| "step": 8550 |
| }, |
| { |
| "ce_loss_13": 2.945174980163574, |
| "ce_loss_26": 2.504639369249344, |
| "ce_loss_39": 2.038501372933388, |
| "ce_loss_52": 1.4634816706180573, |
| "ce_loss_7": 3.206812459230423, |
| "epoch": 0.856, |
| "grad_norm": 19.71064196024924, |
| "kl_loss_13": 3095.2, |
| "kl_loss_26": 2169.8, |
| "kl_loss_39": 1182.5, |
| "kl_loss_7": 3640.4, |
| "learning_rate": 5.130078565432089e-05, |
| "loss": 5022.2, |
| "step": 8560 |
| }, |
| { |
| "ce_loss_13": 2.916484522819519, |
| "ce_loss_26": 2.4740715622901917, |
| "ce_loss_39": 2.0083792597055434, |
| "ce_loss_52": 1.4411062002182007, |
| "ce_loss_7": 3.1799224853515624, |
| "epoch": 0.857, |
| "grad_norm": 18.476735822226924, |
| "kl_loss_13": 3088.4, |
| "kl_loss_26": 2169.8, |
| "kl_loss_39": 1171.0, |
| "kl_loss_7": 3638.0, |
| "learning_rate": 5.060297685041659e-05, |
| "loss": 4959.95, |
| "step": 8570 |
| }, |
| { |
| "ce_loss_13": 2.9657407224178316, |
| "ce_loss_26": 2.5159328460693358, |
| "ce_loss_39": 2.0297839671373366, |
| "ce_loss_52": 1.4374482572078704, |
| "ce_loss_7": 3.2368306040763857, |
| "epoch": 0.858, |
| "grad_norm": 18.666614846165707, |
| "kl_loss_13": 3174.0, |
| "kl_loss_26": 2243.8, |
| "kl_loss_39": 1214.4, |
| "kl_loss_7": 3739.2, |
| "learning_rate": 4.99096934695461e-05, |
| "loss": 4973.6, |
| "step": 8580 |
| }, |
| { |
| "ce_loss_13": 2.9122063517570496, |
| "ce_loss_26": 2.4818194091320036, |
| "ce_loss_39": 2.0184349328279496, |
| "ce_loss_52": 1.4382916703820228, |
| "ce_loss_7": 3.17412588596344, |
| "epoch": 0.859, |
| "grad_norm": 19.2367820447764, |
| "kl_loss_13": 3051.6, |
| "kl_loss_26": 2150.2, |
| "kl_loss_39": 1170.3, |
| "kl_loss_7": 3600.8, |
| "learning_rate": 4.922094249306558e-05, |
| "loss": 4986.8, |
| "step": 8590 |
| }, |
| { |
| "ce_loss_13": 2.856007623672485, |
| "ce_loss_26": 2.4249674677848816, |
| "ce_loss_39": 1.9603979021310807, |
| "ce_loss_52": 1.3998382538557053, |
| "ce_loss_7": 3.120637094974518, |
| "epoch": 0.86, |
| "grad_norm": 19.45927110525327, |
| "kl_loss_13": 3034.0, |
| "kl_loss_26": 2133.8, |
| "kl_loss_39": 1154.5, |
| "kl_loss_7": 3588.0, |
| "learning_rate": 4.853673085668947e-05, |
| "loss": 5020.0, |
| "step": 8600 |
| }, |
| { |
| "ce_loss_13": 2.8736523926258086, |
| "ce_loss_26": 2.4339311927556992, |
| "ce_loss_39": 1.9728148251771926, |
| "ce_loss_52": 1.4134869635105134, |
| "ce_loss_7": 3.132260227203369, |
| "epoch": 0.861, |
| "grad_norm": 18.566757154178084, |
| "kl_loss_13": 3035.6, |
| "kl_loss_26": 2121.8, |
| "kl_loss_39": 1143.1, |
| "kl_loss_7": 3595.6, |
| "learning_rate": 4.78570654504214e-05, |
| "loss": 5002.4, |
| "step": 8610 |
| }, |
| { |
| "ce_loss_13": 2.918911075592041, |
| "ce_loss_26": 2.4760118186473847, |
| "ce_loss_39": 2.0035496681928633, |
| "ce_loss_52": 1.4298115074634552, |
| "ce_loss_7": 3.1867982387542724, |
| "epoch": 0.862, |
| "grad_norm": 19.07407083640675, |
| "kl_loss_13": 3104.8, |
| "kl_loss_26": 2178.0, |
| "kl_loss_39": 1171.7, |
| "kl_loss_7": 3661.6, |
| "learning_rate": 4.7181953118484556e-05, |
| "loss": 4962.1, |
| "step": 8620 |
| }, |
| { |
| "ce_loss_13": 2.90929571390152, |
| "ce_loss_26": 2.4703837007284166, |
| "ce_loss_39": 2.004773771762848, |
| "ce_loss_52": 1.4389754503965377, |
| "ce_loss_7": 3.1703392446041105, |
| "epoch": 0.863, |
| "grad_norm": 19.469181472497027, |
| "kl_loss_13": 3027.6, |
| "kl_loss_26": 2134.8, |
| "kl_loss_39": 1156.0, |
| "kl_loss_7": 3579.2, |
| "learning_rate": 4.651140065925269e-05, |
| "loss": 4937.2, |
| "step": 8630 |
| }, |
| { |
| "ce_loss_13": 2.9975267946720123, |
| "ce_loss_26": 2.5536694526672363, |
| "ce_loss_39": 2.07455490231514, |
| "ce_loss_52": 1.4786568373441695, |
| "ce_loss_7": 3.265315741300583, |
| "epoch": 0.864, |
| "grad_norm": 19.328625062361652, |
| "kl_loss_13": 3149.6, |
| "kl_loss_26": 2221.4, |
| "kl_loss_39": 1210.4, |
| "kl_loss_7": 3701.6, |
| "learning_rate": 4.58454148251814e-05, |
| "loss": 4974.2, |
| "step": 8640 |
| }, |
| { |
| "ce_loss_13": 2.914253044128418, |
| "ce_loss_26": 2.4742087960243224, |
| "ce_loss_39": 2.0090451925992965, |
| "ce_loss_52": 1.4428718268871308, |
| "ce_loss_7": 3.174784082174301, |
| "epoch": 0.865, |
| "grad_norm": 18.948427807011946, |
| "kl_loss_13": 3048.4, |
| "kl_loss_26": 2129.2, |
| "kl_loss_39": 1142.4, |
| "kl_loss_7": 3588.0, |
| "learning_rate": 4.518400232274078e-05, |
| "loss": 4950.8, |
| "step": 8650 |
| }, |
| { |
| "ce_loss_13": 2.895064812898636, |
| "ce_loss_26": 2.452637565135956, |
| "ce_loss_39": 1.9877680152654649, |
| "ce_loss_52": 1.419823595881462, |
| "ce_loss_7": 3.151270192861557, |
| "epoch": 0.866, |
| "grad_norm": 18.81553972523, |
| "kl_loss_13": 3058.0, |
| "kl_loss_26": 2145.0, |
| "kl_loss_39": 1169.4, |
| "kl_loss_7": 3595.6, |
| "learning_rate": 4.452716981234745e-05, |
| "loss": 5007.2, |
| "step": 8660 |
| }, |
| { |
| "ce_loss_13": 2.929095983505249, |
| "ce_loss_26": 2.480497121810913, |
| "ce_loss_39": 2.006428611278534, |
| "ce_loss_52": 1.4315154731273652, |
| "ce_loss_7": 3.2007214546203615, |
| "epoch": 0.867, |
| "grad_norm": 18.95890107682501, |
| "kl_loss_13": 3086.0, |
| "kl_loss_26": 2159.8, |
| "kl_loss_39": 1163.2, |
| "kl_loss_7": 3645.6, |
| "learning_rate": 4.3874923908297335e-05, |
| "loss": 4988.0, |
| "step": 8670 |
| }, |
| { |
| "ce_loss_13": 2.917868083715439, |
| "ce_loss_26": 2.466423386335373, |
| "ce_loss_39": 1.9825115293264388, |
| "ce_loss_52": 1.4014427214860916, |
| "ce_loss_7": 3.187401866912842, |
| "epoch": 0.868, |
| "grad_norm": 18.57272436866935, |
| "kl_loss_13": 3128.0, |
| "kl_loss_26": 2194.4, |
| "kl_loss_39": 1181.8, |
| "kl_loss_7": 3698.4, |
| "learning_rate": 4.322727117869951e-05, |
| "loss": 4966.1, |
| "step": 8680 |
| }, |
| { |
| "ce_loss_13": 2.864998000860214, |
| "ce_loss_26": 2.4295243114233016, |
| "ce_loss_39": 1.9672368943691254, |
| "ce_loss_52": 1.4193324148654938, |
| "ce_loss_7": 3.128977674245834, |
| "epoch": 0.869, |
| "grad_norm": 18.701108919653294, |
| "kl_loss_13": 3002.4, |
| "kl_loss_26": 2088.8, |
| "kl_loss_39": 1117.8, |
| "kl_loss_7": 3558.4, |
| "learning_rate": 4.2584218145409916e-05, |
| "loss": 4955.6, |
| "step": 8690 |
| }, |
| { |
| "ce_loss_13": 2.866414725780487, |
| "ce_loss_26": 2.4338418275117872, |
| "ce_loss_39": 1.961911031603813, |
| "ce_loss_52": 1.3979329317808151, |
| "ce_loss_7": 3.1249643862247467, |
| "epoch": 0.87, |
| "grad_norm": 19.695244279115233, |
| "kl_loss_13": 3026.4, |
| "kl_loss_26": 2123.4, |
| "kl_loss_39": 1140.7, |
| "kl_loss_7": 3568.4, |
| "learning_rate": 4.194577128396521e-05, |
| "loss": 4954.85, |
| "step": 8700 |
| }, |
| { |
| "ce_loss_13": 2.9441056907176972, |
| "ce_loss_26": 2.5146015286445618, |
| "ce_loss_39": 2.0488742887973785, |
| "ce_loss_52": 1.4835967749357224, |
| "ce_loss_7": 3.202489811182022, |
| "epoch": 0.871, |
| "grad_norm": 18.533833127334756, |
| "kl_loss_13": 3031.2, |
| "kl_loss_26": 2129.4, |
| "kl_loss_39": 1154.3, |
| "kl_loss_7": 3572.4, |
| "learning_rate": 4.1311937023518264e-05, |
| "loss": 4983.55, |
| "step": 8710 |
| }, |
| { |
| "ce_loss_13": 2.927008146047592, |
| "ce_loss_26": 2.4801330626010896, |
| "ce_loss_39": 2.0113667100667953, |
| "ce_loss_52": 1.4392758041620255, |
| "ce_loss_7": 3.1931580364704133, |
| "epoch": 0.872, |
| "grad_norm": 19.604731242944656, |
| "kl_loss_13": 3073.2, |
| "kl_loss_26": 2148.4, |
| "kl_loss_39": 1161.3, |
| "kl_loss_7": 3631.6, |
| "learning_rate": 4.0682721746773344e-05, |
| "loss": 4966.3, |
| "step": 8720 |
| }, |
| { |
| "ce_loss_13": 2.875244301557541, |
| "ce_loss_26": 2.4395941644906998, |
| "ce_loss_39": 1.9802403211593629, |
| "ce_loss_52": 1.425346952676773, |
| "ce_loss_7": 3.1323861300945284, |
| "epoch": 0.873, |
| "grad_norm": 19.084321030186736, |
| "kl_loss_13": 3020.0, |
| "kl_loss_26": 2114.6, |
| "kl_loss_39": 1135.6, |
| "kl_loss_7": 3558.4, |
| "learning_rate": 4.0058131789920904e-05, |
| "loss": 4966.9, |
| "step": 8730 |
| }, |
| { |
| "ce_loss_13": 2.91582133769989, |
| "ce_loss_26": 2.469817638397217, |
| "ce_loss_39": 1.9961349010467528, |
| "ce_loss_52": 1.4161934450268745, |
| "ce_loss_7": 3.1843641221523287, |
| "epoch": 0.874, |
| "grad_norm": 19.701093771188038, |
| "kl_loss_13": 3130.0, |
| "kl_loss_26": 2205.0, |
| "kl_loss_39": 1187.7, |
| "kl_loss_7": 3684.8, |
| "learning_rate": 3.9438173442575e-05, |
| "loss": 4920.3, |
| "step": 8740 |
| }, |
| { |
| "ce_loss_13": 2.931247502565384, |
| "ce_loss_26": 2.499329847097397, |
| "ce_loss_39": 2.038593566417694, |
| "ce_loss_52": 1.4660022050142287, |
| "ce_loss_7": 3.1989371538162232, |
| "epoch": 0.875, |
| "grad_norm": 19.499459693858824, |
| "kl_loss_13": 3039.6, |
| "kl_loss_26": 2133.8, |
| "kl_loss_39": 1152.2, |
| "kl_loss_7": 3588.0, |
| "learning_rate": 3.882285294770937e-05, |
| "loss": 4984.7, |
| "step": 8750 |
| }, |
| { |
| "ce_loss_13": 2.903587061166763, |
| "ce_loss_26": 2.4606124222278596, |
| "ce_loss_39": 1.9871950060129167, |
| "ce_loss_52": 1.3980468481779098, |
| "ce_loss_7": 3.17354930639267, |
| "epoch": 0.876, |
| "grad_norm": 19.253090654259744, |
| "kl_loss_13": 3078.4, |
| "kl_loss_26": 2163.8, |
| "kl_loss_39": 1181.0, |
| "kl_loss_7": 3644.8, |
| "learning_rate": 3.821217650159453e-05, |
| "loss": 4982.75, |
| "step": 8760 |
| }, |
| { |
| "ce_loss_13": 2.820340207219124, |
| "ce_loss_26": 2.3951667070388796, |
| "ce_loss_39": 1.9451639890670775, |
| "ce_loss_52": 1.4168721199035645, |
| "ce_loss_7": 3.074656307697296, |
| "epoch": 0.877, |
| "grad_norm": 19.190959579311908, |
| "kl_loss_13": 2910.8, |
| "kl_loss_26": 2031.2, |
| "kl_loss_39": 1086.2, |
| "kl_loss_7": 3448.8, |
| "learning_rate": 3.760615025373543e-05, |
| "loss": 4941.25, |
| "step": 8770 |
| }, |
| { |
| "ce_loss_13": 2.9391641199588774, |
| "ce_loss_26": 2.5044194877147676, |
| "ce_loss_39": 2.0340330809354783, |
| "ce_loss_52": 1.4550057530403138, |
| "ce_loss_7": 3.200497591495514, |
| "epoch": 0.878, |
| "grad_norm": 19.218011314135488, |
| "kl_loss_13": 3063.6, |
| "kl_loss_26": 2155.8, |
| "kl_loss_39": 1164.4, |
| "kl_loss_7": 3604.8, |
| "learning_rate": 3.700478030680987e-05, |
| "loss": 4989.0, |
| "step": 8780 |
| }, |
| { |
| "ce_loss_13": 2.9181353628635405, |
| "ce_loss_26": 2.484974616765976, |
| "ce_loss_39": 2.0213694095611574, |
| "ce_loss_52": 1.4473600834608078, |
| "ce_loss_7": 3.1764037668704987, |
| "epoch": 0.879, |
| "grad_norm": 18.881217884773644, |
| "kl_loss_13": 3048.4, |
| "kl_loss_26": 2145.6, |
| "kl_loss_39": 1162.1, |
| "kl_loss_7": 3594.8, |
| "learning_rate": 3.6408072716606344e-05, |
| "loss": 4996.95, |
| "step": 8790 |
| }, |
| { |
| "ce_loss_13": 2.878078305721283, |
| "ce_loss_26": 2.445318901538849, |
| "ce_loss_39": 1.9787369549274445, |
| "ce_loss_52": 1.4154168665409088, |
| "ce_loss_7": 3.13910374045372, |
| "epoch": 0.88, |
| "grad_norm": 19.232153237296814, |
| "kl_loss_13": 3023.6, |
| "kl_loss_26": 2120.2, |
| "kl_loss_39": 1138.8, |
| "kl_loss_7": 3570.8, |
| "learning_rate": 3.5816033491963716e-05, |
| "loss": 4957.2, |
| "step": 8800 |
| }, |
| { |
| "ce_loss_13": 2.8982558727264403, |
| "ce_loss_26": 2.4716543793678283, |
| "ce_loss_39": 2.0096321552991867, |
| "ce_loss_52": 1.4367865800857544, |
| "ce_loss_7": 3.162083399295807, |
| "epoch": 0.881, |
| "grad_norm": 19.92275213516334, |
| "kl_loss_13": 3003.6, |
| "kl_loss_26": 2113.0, |
| "kl_loss_39": 1151.0, |
| "kl_loss_7": 3555.2, |
| "learning_rate": 3.522866859471047e-05, |
| "loss": 4925.7, |
| "step": 8810 |
| }, |
| { |
| "ce_loss_13": 2.9355869591236115, |
| "ce_loss_26": 2.495421326160431, |
| "ce_loss_39": 2.0324677735567094, |
| "ce_loss_52": 1.4521033734083175, |
| "ce_loss_7": 3.20084969997406, |
| "epoch": 0.882, |
| "grad_norm": 18.63804720981334, |
| "kl_loss_13": 3093.2, |
| "kl_loss_26": 2164.2, |
| "kl_loss_39": 1179.6, |
| "kl_loss_7": 3645.6, |
| "learning_rate": 3.46459839396045e-05, |
| "loss": 5007.2, |
| "step": 8820 |
| }, |
| { |
| "ce_loss_13": 2.9325197875499724, |
| "ce_loss_26": 2.4879075407981874, |
| "ce_loss_39": 2.0197067618370057, |
| "ce_loss_52": 1.4270877152681352, |
| "ce_loss_7": 3.198145306110382, |
| "epoch": 0.883, |
| "grad_norm": 18.241829484742485, |
| "kl_loss_13": 3112.0, |
| "kl_loss_26": 2188.6, |
| "kl_loss_39": 1188.1, |
| "kl_loss_7": 3676.4, |
| "learning_rate": 3.406798539427386e-05, |
| "loss": 4970.75, |
| "step": 8830 |
| }, |
| { |
| "ce_loss_13": 2.9144038438796995, |
| "ce_loss_26": 2.490026795864105, |
| "ce_loss_39": 2.0250850170850754, |
| "ce_loss_52": 1.4695867449045181, |
| "ce_loss_7": 3.171018958091736, |
| "epoch": 0.884, |
| "grad_norm": 19.295732101275807, |
| "kl_loss_13": 3016.4, |
| "kl_loss_26": 2109.6, |
| "kl_loss_39": 1133.6, |
| "kl_loss_7": 3556.0, |
| "learning_rate": 3.349467877915746e-05, |
| "loss": 4929.15, |
| "step": 8840 |
| }, |
| { |
| "ce_loss_13": 2.9367240130901338, |
| "ce_loss_26": 2.5029721915721894, |
| "ce_loss_39": 2.038401874899864, |
| "ce_loss_52": 1.4602129399776458, |
| "ce_loss_7": 3.1973277926445007, |
| "epoch": 0.885, |
| "grad_norm": 18.483553110198095, |
| "kl_loss_13": 3071.2, |
| "kl_loss_26": 2165.0, |
| "kl_loss_39": 1179.3, |
| "kl_loss_7": 3608.0, |
| "learning_rate": 3.292606986744667e-05, |
| "loss": 4997.15, |
| "step": 8850 |
| }, |
| { |
| "ce_loss_13": 2.962205785512924, |
| "ce_loss_26": 2.5182169795036318, |
| "ce_loss_39": 2.0426361471414567, |
| "ce_loss_52": 1.4736472845077515, |
| "ce_loss_7": 3.2304830133914946, |
| "epoch": 0.886, |
| "grad_norm": 19.501438521011444, |
| "kl_loss_13": 3089.6, |
| "kl_loss_26": 2164.0, |
| "kl_loss_39": 1168.6, |
| "kl_loss_7": 3642.4, |
| "learning_rate": 3.23621643850267e-05, |
| "loss": 4957.35, |
| "step": 8860 |
| }, |
| { |
| "ce_loss_13": 2.8543384969234467, |
| "ce_loss_26": 2.4200983941555023, |
| "ce_loss_39": 1.9575607985258103, |
| "ce_loss_52": 1.3949070930480958, |
| "ce_loss_7": 3.1160971879959107, |
| "epoch": 0.887, |
| "grad_norm": 19.320586944244614, |
| "kl_loss_13": 3009.6, |
| "kl_loss_26": 2108.8, |
| "kl_loss_39": 1139.1, |
| "kl_loss_7": 3548.0, |
| "learning_rate": 3.180296801041971e-05, |
| "loss": 4940.1, |
| "step": 8870 |
| }, |
| { |
| "ce_loss_13": 2.8845052778720857, |
| "ce_loss_26": 2.456773716211319, |
| "ce_loss_39": 1.9930354177951812, |
| "ce_loss_52": 1.4307963967323303, |
| "ce_loss_7": 3.144515538215637, |
| "epoch": 0.888, |
| "grad_norm": 19.659033917356428, |
| "kl_loss_13": 2998.8, |
| "kl_loss_26": 2105.2, |
| "kl_loss_39": 1137.6, |
| "kl_loss_7": 3543.2, |
| "learning_rate": 3.124848637472688e-05, |
| "loss": 4952.5, |
| "step": 8880 |
| }, |
| { |
| "ce_loss_13": 2.8944738626480104, |
| "ce_loss_26": 2.4629203975200653, |
| "ce_loss_39": 1.9913650721311569, |
| "ce_loss_52": 1.4339269563555717, |
| "ce_loss_7": 3.1572672605514525, |
| "epoch": 0.889, |
| "grad_norm": 18.88843635803768, |
| "kl_loss_13": 3032.4, |
| "kl_loss_26": 2127.8, |
| "kl_loss_39": 1135.0, |
| "kl_loss_7": 3579.2, |
| "learning_rate": 3.069872506157212e-05, |
| "loss": 4974.35, |
| "step": 8890 |
| }, |
| { |
| "ce_loss_13": 2.8339054346084596, |
| "ce_loss_26": 2.394126781821251, |
| "ce_loss_39": 1.9339392215013504, |
| "ce_loss_52": 1.3971292108297348, |
| "ce_loss_7": 3.0929621160030365, |
| "epoch": 0.89, |
| "grad_norm": 18.978687158228045, |
| "kl_loss_13": 2982.0, |
| "kl_loss_26": 2066.8, |
| "kl_loss_39": 1100.8, |
| "kl_loss_7": 3526.4, |
| "learning_rate": 3.0153689607045842e-05, |
| "loss": 4941.5, |
| "step": 8900 |
| }, |
| { |
| "ce_loss_13": 2.8811903417110445, |
| "ce_loss_26": 2.440036287903786, |
| "ce_loss_39": 1.975409933924675, |
| "ce_loss_52": 1.416017021238804, |
| "ce_loss_7": 3.1414348661899565, |
| "epoch": 0.891, |
| "grad_norm": 19.560639422793763, |
| "kl_loss_13": 3040.4, |
| "kl_loss_26": 2123.2, |
| "kl_loss_39": 1133.6, |
| "kl_loss_7": 3586.4, |
| "learning_rate": 2.9613385499648926e-05, |
| "loss": 4965.6, |
| "step": 8910 |
| }, |
| { |
| "ce_loss_13": 2.8596278965473174, |
| "ce_loss_26": 2.4295035183429716, |
| "ce_loss_39": 1.9728092432022095, |
| "ce_loss_52": 1.4283979684114456, |
| "ce_loss_7": 3.1134236633777617, |
| "epoch": 0.892, |
| "grad_norm": 18.908746612036932, |
| "kl_loss_13": 2974.8, |
| "kl_loss_26": 2082.0, |
| "kl_loss_39": 1115.7, |
| "kl_loss_7": 3505.2, |
| "learning_rate": 2.9077818180237692e-05, |
| "loss": 5007.95, |
| "step": 8920 |
| }, |
| { |
| "ce_loss_13": 2.8744696974754333, |
| "ce_loss_26": 2.4505746215581894, |
| "ce_loss_39": 1.9992403596639634, |
| "ce_loss_52": 1.4506706580519677, |
| "ce_loss_7": 3.13488364815712, |
| "epoch": 0.893, |
| "grad_norm": 18.957478200982788, |
| "kl_loss_13": 2985.6, |
| "kl_loss_26": 2094.6, |
| "kl_loss_39": 1129.4, |
| "kl_loss_7": 3523.6, |
| "learning_rate": 2.8546993041969172e-05, |
| "loss": 4940.15, |
| "step": 8930 |
| }, |
| { |
| "ce_loss_13": 2.8975345969200133, |
| "ce_loss_26": 2.4653249740600587, |
| "ce_loss_39": 2.0007053166627884, |
| "ce_loss_52": 1.4314887911081313, |
| "ce_loss_7": 3.155570811033249, |
| "epoch": 0.894, |
| "grad_norm": 18.739265695771255, |
| "kl_loss_13": 3012.4, |
| "kl_loss_26": 2117.2, |
| "kl_loss_39": 1151.3, |
| "kl_loss_7": 3551.6, |
| "learning_rate": 2.802091543024671e-05, |
| "loss": 4940.05, |
| "step": 8940 |
| }, |
| { |
| "ce_loss_13": 2.9072438359260557, |
| "ce_loss_26": 2.471187961101532, |
| "ce_loss_39": 2.0144962787628176, |
| "ce_loss_52": 1.42624132335186, |
| "ce_loss_7": 3.1694943487644194, |
| "epoch": 0.895, |
| "grad_norm": 19.21731826372691, |
| "kl_loss_13": 3062.0, |
| "kl_loss_26": 2168.4, |
| "kl_loss_39": 1196.7, |
| "kl_loss_7": 3618.8, |
| "learning_rate": 2.7499590642665774e-05, |
| "loss": 4979.0, |
| "step": 8950 |
| }, |
| { |
| "ce_loss_13": 2.893015044927597, |
| "ce_loss_26": 2.4596860975027086, |
| "ce_loss_39": 2.0018325716257097, |
| "ce_loss_52": 1.4285719782114028, |
| "ce_loss_7": 3.154858148097992, |
| "epoch": 0.896, |
| "grad_norm": 18.801842424478984, |
| "kl_loss_13": 3027.6, |
| "kl_loss_26": 2117.0, |
| "kl_loss_39": 1153.1, |
| "kl_loss_7": 3566.0, |
| "learning_rate": 2.6983023928961405e-05, |
| "loss": 4959.6, |
| "step": 8960 |
| }, |
| { |
| "ce_loss_13": 2.8546026587486266, |
| "ce_loss_26": 2.4183767944574357, |
| "ce_loss_39": 1.956614688038826, |
| "ce_loss_52": 1.3939528629183768, |
| "ce_loss_7": 3.1231652200222015, |
| "epoch": 0.897, |
| "grad_norm": 19.808391264092982, |
| "kl_loss_13": 3029.2, |
| "kl_loss_26": 2118.8, |
| "kl_loss_39": 1139.0, |
| "kl_loss_7": 3584.4, |
| "learning_rate": 2.6471220490954628e-05, |
| "loss": 4973.5, |
| "step": 8970 |
| }, |
| { |
| "ce_loss_13": 2.9009016394615172, |
| "ce_loss_26": 2.467138040065765, |
| "ce_loss_39": 2.0069568186998366, |
| "ce_loss_52": 1.4654083251953125, |
| "ce_loss_7": 3.155901938676834, |
| "epoch": 0.898, |
| "grad_norm": 19.041913132666583, |
| "kl_loss_13": 2967.2, |
| "kl_loss_26": 2069.0, |
| "kl_loss_39": 1105.9, |
| "kl_loss_7": 3506.0, |
| "learning_rate": 2.596418548250029e-05, |
| "loss": 4886.7, |
| "step": 8980 |
| }, |
| { |
| "ce_loss_13": 2.8516535699367522, |
| "ce_loss_26": 2.426672577857971, |
| "ce_loss_39": 1.9635347902774811, |
| "ce_loss_52": 1.419031423330307, |
| "ce_loss_7": 3.112126684188843, |
| "epoch": 0.899, |
| "grad_norm": 19.026364280537617, |
| "kl_loss_13": 2990.8, |
| "kl_loss_26": 2103.8, |
| "kl_loss_39": 1123.3, |
| "kl_loss_7": 3530.0, |
| "learning_rate": 2.5461924009435368e-05, |
| "loss": 4885.15, |
| "step": 8990 |
| }, |
| { |
| "ce_loss_13": 2.870776003599167, |
| "ce_loss_26": 2.4326407968997956, |
| "ce_loss_39": 1.9669345051050187, |
| "ce_loss_52": 1.420183390378952, |
| "ce_loss_7": 3.127993369102478, |
| "epoch": 0.9, |
| "grad_norm": 18.943171008960356, |
| "kl_loss_13": 3002.4, |
| "kl_loss_26": 2090.0, |
| "kl_loss_39": 1108.9, |
| "kl_loss_7": 3537.6, |
| "learning_rate": 2.4964441129527336e-05, |
| "loss": 4949.55, |
| "step": 9000 |
| }, |
| { |
| "ce_loss_13": 2.91824157834053, |
| "ce_loss_26": 2.4817179054021836, |
| "ce_loss_39": 2.015528929233551, |
| "ce_loss_52": 1.434774386882782, |
| "ce_loss_7": 3.181524306535721, |
| "epoch": 0.901, |
| "grad_norm": 19.717113587964203, |
| "kl_loss_13": 3071.6, |
| "kl_loss_26": 2176.4, |
| "kl_loss_39": 1186.6, |
| "kl_loss_7": 3618.4, |
| "learning_rate": 2.4471741852423235e-05, |
| "loss": 4970.45, |
| "step": 9010 |
| }, |
| { |
| "ce_loss_13": 2.8203544914722443, |
| "ce_loss_26": 2.394831323623657, |
| "ce_loss_39": 1.9447649121284485, |
| "ce_loss_52": 1.3895054385066032, |
| "ce_loss_7": 3.0815266370773315, |
| "epoch": 0.902, |
| "grad_norm": 19.201709482987514, |
| "kl_loss_13": 2963.6, |
| "kl_loss_26": 2077.6, |
| "kl_loss_39": 1127.3, |
| "kl_loss_7": 3508.4, |
| "learning_rate": 2.3983831139599287e-05, |
| "loss": 4939.65, |
| "step": 9020 |
| }, |
| { |
| "ce_loss_13": 2.8914650082588196, |
| "ce_loss_26": 2.46552118062973, |
| "ce_loss_39": 1.9927147597074508, |
| "ce_loss_52": 1.4259023681282996, |
| "ce_loss_7": 3.1594585537910462, |
| "epoch": 0.903, |
| "grad_norm": 18.327777842458563, |
| "kl_loss_13": 3054.4, |
| "kl_loss_26": 2152.4, |
| "kl_loss_39": 1157.6, |
| "kl_loss_7": 3607.6, |
| "learning_rate": 2.3500713904311022e-05, |
| "loss": 4963.8, |
| "step": 9030 |
| }, |
| { |
| "ce_loss_13": 2.8807626605033874, |
| "ce_loss_26": 2.432952329516411, |
| "ce_loss_39": 1.9567799299955368, |
| "ce_loss_52": 1.4073475629091263, |
| "ce_loss_7": 3.1469713926315306, |
| "epoch": 0.904, |
| "grad_norm": 20.180916839551323, |
| "kl_loss_13": 3056.0, |
| "kl_loss_26": 2131.4, |
| "kl_loss_39": 1132.8, |
| "kl_loss_7": 3607.6, |
| "learning_rate": 2.3022395011543685e-05, |
| "loss": 4930.8, |
| "step": 9040 |
| }, |
| { |
| "ce_loss_13": 2.895359253883362, |
| "ce_loss_26": 2.4620601534843445, |
| "ce_loss_39": 1.9888764083385468, |
| "ce_loss_52": 1.4224872916936875, |
| "ce_loss_7": 3.1553323328495027, |
| "epoch": 0.905, |
| "grad_norm": 19.515330390455173, |
| "kl_loss_13": 3061.2, |
| "kl_loss_26": 2157.8, |
| "kl_loss_39": 1165.6, |
| "kl_loss_7": 3605.6, |
| "learning_rate": 2.2548879277963063e-05, |
| "loss": 4965.65, |
| "step": 9050 |
| }, |
| { |
| "ce_loss_13": 2.913999766111374, |
| "ce_loss_26": 2.4826299071311952, |
| "ce_loss_39": 2.025396314263344, |
| "ce_loss_52": 1.4647169053554534, |
| "ce_loss_7": 3.1696211397647858, |
| "epoch": 0.906, |
| "grad_norm": 19.958942164442686, |
| "kl_loss_13": 2998.4, |
| "kl_loss_26": 2104.6, |
| "kl_loss_39": 1143.0, |
| "kl_loss_7": 3532.8, |
| "learning_rate": 2.208017147186736e-05, |
| "loss": 4953.35, |
| "step": 9060 |
| }, |
| { |
| "ce_loss_13": 2.931579887866974, |
| "ce_loss_26": 2.497947371006012, |
| "ce_loss_39": 2.0416529774665833, |
| "ce_loss_52": 1.4668794304132462, |
| "ce_loss_7": 3.1927560210227965, |
| "epoch": 0.907, |
| "grad_norm": 18.915099363205265, |
| "kl_loss_13": 3050.8, |
| "kl_loss_26": 2150.2, |
| "kl_loss_39": 1170.4, |
| "kl_loss_7": 3596.4, |
| "learning_rate": 2.1616276313139227e-05, |
| "loss": 4967.35, |
| "step": 9070 |
| }, |
| { |
| "ce_loss_13": 2.8379551649093626, |
| "ce_loss_26": 2.4058648884296416, |
| "ce_loss_39": 1.9522909700870514, |
| "ce_loss_52": 1.401316450536251, |
| "ce_loss_7": 3.096262776851654, |
| "epoch": 0.908, |
| "grad_norm": 17.939476679877323, |
| "kl_loss_13": 2996.4, |
| "kl_loss_26": 2103.4, |
| "kl_loss_39": 1130.6, |
| "kl_loss_7": 3545.6, |
| "learning_rate": 2.1157198473197415e-05, |
| "loss": 4983.65, |
| "step": 9080 |
| }, |
| { |
| "ce_loss_13": 2.9229146242141724, |
| "ce_loss_26": 2.4857192397117616, |
| "ce_loss_39": 2.014718788862228, |
| "ce_loss_52": 1.4371613681316375, |
| "ce_loss_7": 3.192232495546341, |
| "epoch": 0.909, |
| "grad_norm": 19.195813577565207, |
| "kl_loss_13": 3088.8, |
| "kl_loss_26": 2172.6, |
| "kl_loss_39": 1177.9, |
| "kl_loss_7": 3649.2, |
| "learning_rate": 2.0702942574950812e-05, |
| "loss": 4961.5, |
| "step": 9090 |
| }, |
| { |
| "ce_loss_13": 2.9034866452217103, |
| "ce_loss_26": 2.4769316017627716, |
| "ce_loss_39": 2.006492680311203, |
| "ce_loss_52": 1.4311101764440537, |
| "ce_loss_7": 3.164641487598419, |
| "epoch": 0.91, |
| "grad_norm": 18.881356963654913, |
| "kl_loss_13": 3059.6, |
| "kl_loss_26": 2172.6, |
| "kl_loss_39": 1174.0, |
| "kl_loss_7": 3598.4, |
| "learning_rate": 2.025351319275137e-05, |
| "loss": 4952.9, |
| "step": 9100 |
| }, |
| { |
| "ce_loss_13": 2.915982037782669, |
| "ce_loss_26": 2.4739193379879, |
| "ce_loss_39": 2.002902591228485, |
| "ce_loss_52": 1.4319583177566528, |
| "ce_loss_7": 3.17461501955986, |
| "epoch": 0.911, |
| "grad_norm": 18.844750958897702, |
| "kl_loss_13": 3071.6, |
| "kl_loss_26": 2158.2, |
| "kl_loss_39": 1169.6, |
| "kl_loss_7": 3613.6, |
| "learning_rate": 1.9808914852347816e-05, |
| "loss": 4969.1, |
| "step": 9110 |
| }, |
| { |
| "ce_loss_13": 2.9571076393127442, |
| "ce_loss_26": 2.5076956033706663, |
| "ce_loss_39": 2.0347861379384993, |
| "ce_loss_52": 1.4524286478757857, |
| "ce_loss_7": 3.2245921969413756, |
| "epoch": 0.912, |
| "grad_norm": 18.516316183492112, |
| "kl_loss_13": 3096.4, |
| "kl_loss_26": 2170.0, |
| "kl_loss_39": 1183.2, |
| "kl_loss_7": 3656.0, |
| "learning_rate": 1.9369152030840554e-05, |
| "loss": 4969.7, |
| "step": 9120 |
| }, |
| { |
| "ce_loss_13": 2.8493692874908447, |
| "ce_loss_26": 2.4184423595666886, |
| "ce_loss_39": 1.9636689513921737, |
| "ce_loss_52": 1.4161925345659256, |
| "ce_loss_7": 3.1067621290683745, |
| "epoch": 0.913, |
| "grad_norm": 19.557541417790066, |
| "kl_loss_13": 2969.6, |
| "kl_loss_26": 2073.0, |
| "kl_loss_39": 1110.7, |
| "kl_loss_7": 3508.8, |
| "learning_rate": 1.893422915663645e-05, |
| "loss": 4967.05, |
| "step": 9130 |
| }, |
| { |
| "ce_loss_13": 2.950014758110046, |
| "ce_loss_26": 2.5281428694725037, |
| "ce_loss_39": 2.0640300661325455, |
| "ce_loss_52": 1.4911428451538087, |
| "ce_loss_7": 3.20717169046402, |
| "epoch": 0.914, |
| "grad_norm": 19.126297962103095, |
| "kl_loss_13": 3062.4, |
| "kl_loss_26": 2166.6, |
| "kl_loss_39": 1184.2, |
| "kl_loss_7": 3597.2, |
| "learning_rate": 1.850415060940386e-05, |
| "loss": 4910.65, |
| "step": 9140 |
| }, |
| { |
| "ce_loss_13": 2.9050391018390656, |
| "ce_loss_26": 2.4736091554164887, |
| "ce_loss_39": 2.017327818274498, |
| "ce_loss_52": 1.4512428998947144, |
| "ce_loss_7": 3.162110447883606, |
| "epoch": 0.915, |
| "grad_norm": 18.914613896159423, |
| "kl_loss_13": 3018.0, |
| "kl_loss_26": 2122.6, |
| "kl_loss_39": 1153.5, |
| "kl_loss_7": 3547.6, |
| "learning_rate": 1.8078920720028978e-05, |
| "loss": 4898.6, |
| "step": 9150 |
| }, |
| { |
| "ce_loss_13": 2.8679368257522584, |
| "ce_loss_26": 2.4424335032701494, |
| "ce_loss_39": 1.9891292452812195, |
| "ce_loss_52": 1.4451990023255348, |
| "ce_loss_7": 3.127324694395065, |
| "epoch": 0.916, |
| "grad_norm": 20.019541856401887, |
| "kl_loss_13": 2959.2, |
| "kl_loss_26": 2064.4, |
| "kl_loss_39": 1098.8, |
| "kl_loss_7": 3496.8, |
| "learning_rate": 1.765854377057219e-05, |
| "loss": 4940.15, |
| "step": 9160 |
| }, |
| { |
| "ce_loss_13": 2.873015010356903, |
| "ce_loss_26": 2.433028203248978, |
| "ce_loss_39": 1.9746823519468308, |
| "ce_loss_52": 1.407148177921772, |
| "ce_loss_7": 3.1320842862129212, |
| "epoch": 0.917, |
| "grad_norm": 18.323389911529343, |
| "kl_loss_13": 3046.8, |
| "kl_loss_26": 2143.6, |
| "kl_loss_39": 1161.3, |
| "kl_loss_7": 3586.0, |
| "learning_rate": 1.724302399422456e-05, |
| "loss": 4937.75, |
| "step": 9170 |
| }, |
| { |
| "ce_loss_13": 2.864003378152847, |
| "ce_loss_26": 2.4368703365325928, |
| "ce_loss_39": 1.978201287984848, |
| "ce_loss_52": 1.4284173011779786, |
| "ce_loss_7": 3.117653822898865, |
| "epoch": 0.918, |
| "grad_norm": 19.851903614612837, |
| "kl_loss_13": 2960.4, |
| "kl_loss_26": 2069.6, |
| "kl_loss_39": 1110.5, |
| "kl_loss_7": 3494.8, |
| "learning_rate": 1.683236557526574e-05, |
| "loss": 4948.85, |
| "step": 9180 |
| }, |
| { |
| "ce_loss_13": 2.841529107093811, |
| "ce_loss_26": 2.4092674642801284, |
| "ce_loss_39": 1.9591031044721603, |
| "ce_loss_52": 1.4043558463454247, |
| "ce_loss_7": 3.099020904302597, |
| "epoch": 0.919, |
| "grad_norm": 18.98858399937095, |
| "kl_loss_13": 2971.2, |
| "kl_loss_26": 2076.4, |
| "kl_loss_39": 1122.0, |
| "kl_loss_7": 3508.0, |
| "learning_rate": 1.6426572649021475e-05, |
| "loss": 4944.1, |
| "step": 9190 |
| }, |
| { |
| "ce_loss_13": 2.902883565425873, |
| "ce_loss_26": 2.4624376207590104, |
| "ce_loss_39": 2.0033867925405504, |
| "ce_loss_52": 1.4482421904802323, |
| "ce_loss_7": 3.1540717780590057, |
| "epoch": 0.92, |
| "grad_norm": 19.524164167367193, |
| "kl_loss_13": 3016.4, |
| "kl_loss_26": 2114.8, |
| "kl_loss_39": 1136.7, |
| "kl_loss_7": 3542.4, |
| "learning_rate": 1.6025649301821876e-05, |
| "loss": 4936.95, |
| "step": 9200 |
| }, |
| { |
| "ce_loss_13": 2.962231194972992, |
| "ce_loss_26": 2.5141273856163027, |
| "ce_loss_39": 2.038061347603798, |
| "ce_loss_52": 1.4573458433151245, |
| "ce_loss_7": 3.22137930393219, |
| "epoch": 0.921, |
| "grad_norm": 19.07781770056793, |
| "kl_loss_13": 3091.6, |
| "kl_loss_26": 2174.2, |
| "kl_loss_39": 1179.6, |
| "kl_loss_7": 3638.0, |
| "learning_rate": 1.5629599570960716e-05, |
| "loss": 4931.05, |
| "step": 9210 |
| }, |
| { |
| "ce_loss_13": 2.828860414028168, |
| "ce_loss_26": 2.394576147198677, |
| "ce_loss_39": 1.940834417939186, |
| "ce_loss_52": 1.396960550546646, |
| "ce_loss_7": 3.0943815410137177, |
| "epoch": 0.922, |
| "grad_norm": 18.68562598066032, |
| "kl_loss_13": 2986.4, |
| "kl_loss_26": 2085.0, |
| "kl_loss_39": 1113.6, |
| "kl_loss_7": 3535.2, |
| "learning_rate": 1.5238427444654367e-05, |
| "loss": 4919.35, |
| "step": 9220 |
| }, |
| { |
| "ce_loss_13": 2.854993385076523, |
| "ce_loss_26": 2.4067456245422365, |
| "ce_loss_39": 1.9481880724430085, |
| "ce_loss_52": 1.392129084467888, |
| "ce_loss_7": 3.119863528013229, |
| "epoch": 0.923, |
| "grad_norm": 19.56628173058375, |
| "kl_loss_13": 3048.8, |
| "kl_loss_26": 2126.2, |
| "kl_loss_39": 1141.3, |
| "kl_loss_7": 3609.6, |
| "learning_rate": 1.4852136862001764e-05, |
| "loss": 4956.25, |
| "step": 9230 |
| }, |
| { |
| "ce_loss_13": 2.8672266066074372, |
| "ce_loss_26": 2.428171756863594, |
| "ce_loss_39": 1.967655423283577, |
| "ce_loss_52": 1.4228445023298264, |
| "ce_loss_7": 3.12925271987915, |
| "epoch": 0.924, |
| "grad_norm": 18.655136558750065, |
| "kl_loss_13": 3020.8, |
| "kl_loss_26": 2114.0, |
| "kl_loss_39": 1134.6, |
| "kl_loss_7": 3562.4, |
| "learning_rate": 1.4470731712944884e-05, |
| "loss": 4914.5, |
| "step": 9240 |
| }, |
| { |
| "ce_loss_13": 2.967056131362915, |
| "ce_loss_26": 2.527100908756256, |
| "ce_loss_39": 2.0592952966690063, |
| "ce_loss_52": 1.466832235455513, |
| "ce_loss_7": 3.2317879140377044, |
| "epoch": 0.925, |
| "grad_norm": 18.755830587214348, |
| "kl_loss_13": 3074.0, |
| "kl_loss_26": 2172.6, |
| "kl_loss_39": 1194.2, |
| "kl_loss_7": 3633.2, |
| "learning_rate": 1.4094215838229174e-05, |
| "loss": 4941.0, |
| "step": 9250 |
| }, |
| { |
| "ce_loss_13": 2.8956347942352294, |
| "ce_loss_26": 2.4609649628400803, |
| "ce_loss_39": 1.998116421699524, |
| "ce_loss_52": 1.4327284812927246, |
| "ce_loss_7": 3.1544252693653108, |
| "epoch": 0.926, |
| "grad_norm": 19.440875104184542, |
| "kl_loss_13": 3037.6, |
| "kl_loss_26": 2133.0, |
| "kl_loss_39": 1149.6, |
| "kl_loss_7": 3582.4, |
| "learning_rate": 1.372259302936546e-05, |
| "loss": 4929.25, |
| "step": 9260 |
| }, |
| { |
| "ce_loss_13": 2.818482467532158, |
| "ce_loss_26": 2.3888671875, |
| "ce_loss_39": 1.9417572438716888, |
| "ce_loss_52": 1.3873827829957008, |
| "ce_loss_7": 3.0732292413711546, |
| "epoch": 0.927, |
| "grad_norm": 19.09848340283336, |
| "kl_loss_13": 2988.4, |
| "kl_loss_26": 2096.8, |
| "kl_loss_39": 1136.6, |
| "kl_loss_7": 3519.2, |
| "learning_rate": 1.3355867028591206e-05, |
| "loss": 4917.85, |
| "step": 9270 |
| }, |
| { |
| "ce_loss_13": 2.8812867999076843, |
| "ce_loss_26": 2.445907565951347, |
| "ce_loss_39": 1.9824917227029801, |
| "ce_loss_52": 1.4204061418771743, |
| "ce_loss_7": 3.1463906168937683, |
| "epoch": 0.928, |
| "grad_norm": 19.73371377973639, |
| "kl_loss_13": 3015.6, |
| "kl_loss_26": 2109.8, |
| "kl_loss_39": 1132.3, |
| "kl_loss_7": 3565.2, |
| "learning_rate": 1.2994041528833267e-05, |
| "loss": 4914.15, |
| "step": 9280 |
| }, |
| { |
| "ce_loss_13": 2.989528793096542, |
| "ce_loss_26": 2.545223152637482, |
| "ce_loss_39": 2.0668440997600555, |
| "ce_loss_52": 1.4640702456235886, |
| "ce_loss_7": 3.2542518198490145, |
| "epoch": 0.929, |
| "grad_norm": 18.497071159749588, |
| "kl_loss_13": 3146.0, |
| "kl_loss_26": 2234.4, |
| "kl_loss_39": 1221.2, |
| "kl_loss_7": 3704.8, |
| "learning_rate": 1.2637120173670358e-05, |
| "loss": 4971.25, |
| "step": 9290 |
| }, |
| { |
| "ce_loss_13": 2.9433493435382845, |
| "ce_loss_26": 2.503718575835228, |
| "ce_loss_39": 2.029115191102028, |
| "ce_loss_52": 1.4293665170669556, |
| "ce_loss_7": 3.21596360206604, |
| "epoch": 0.93, |
| "grad_norm": 19.233646690177977, |
| "kl_loss_13": 3119.2, |
| "kl_loss_26": 2210.0, |
| "kl_loss_39": 1209.9, |
| "kl_loss_7": 3688.8, |
| "learning_rate": 1.2285106557296478e-05, |
| "loss": 4970.8, |
| "step": 9300 |
| }, |
| { |
| "ce_loss_13": 2.8525869846343994, |
| "ce_loss_26": 2.4185830265283585, |
| "ce_loss_39": 1.9529170453548432, |
| "ce_loss_52": 1.4022331610321999, |
| "ce_loss_7": 3.112831687927246, |
| "epoch": 0.931, |
| "grad_norm": 19.01919083076588, |
| "kl_loss_13": 3012.8, |
| "kl_loss_26": 2100.6, |
| "kl_loss_39": 1120.5, |
| "kl_loss_7": 3542.0, |
| "learning_rate": 1.1938004224484989e-05, |
| "loss": 4934.7, |
| "step": 9310 |
| }, |
| { |
| "ce_loss_13": 2.9074361979961396, |
| "ce_loss_26": 2.477645492553711, |
| "ce_loss_39": 2.010756382346153, |
| "ce_loss_52": 1.4430534109473228, |
| "ce_loss_7": 3.1718304812908173, |
| "epoch": 0.932, |
| "grad_norm": 18.572431056907458, |
| "kl_loss_13": 3020.0, |
| "kl_loss_26": 2116.0, |
| "kl_loss_39": 1144.2, |
| "kl_loss_7": 3575.6, |
| "learning_rate": 1.1595816670552429e-05, |
| "loss": 4913.95, |
| "step": 9320 |
| }, |
| { |
| "ce_loss_13": 2.8636857986450197, |
| "ce_loss_26": 2.424144572019577, |
| "ce_loss_39": 1.9600117355585098, |
| "ce_loss_52": 1.402983972430229, |
| "ce_loss_7": 3.1251452922821046, |
| "epoch": 0.933, |
| "grad_norm": 18.288942605726792, |
| "kl_loss_13": 3044.0, |
| "kl_loss_26": 2129.6, |
| "kl_loss_39": 1139.2, |
| "kl_loss_7": 3581.6, |
| "learning_rate": 1.1258547341323699e-05, |
| "loss": 4937.25, |
| "step": 9330 |
| }, |
| { |
| "ce_loss_13": 2.893140608072281, |
| "ce_loss_26": 2.457077306509018, |
| "ce_loss_39": 2.003238731622696, |
| "ce_loss_52": 1.4437968581914902, |
| "ce_loss_7": 3.1531366109848022, |
| "epoch": 0.934, |
| "grad_norm": 18.739319955640763, |
| "kl_loss_13": 3019.6, |
| "kl_loss_26": 2115.0, |
| "kl_loss_39": 1131.3, |
| "kl_loss_7": 3558.4, |
| "learning_rate": 1.0926199633097156e-05, |
| "loss": 4899.9, |
| "step": 9340 |
| }, |
| { |
| "ce_loss_13": 2.9001421511173247, |
| "ce_loss_26": 2.4687224984169007, |
| "ce_loss_39": 2.0006180971860887, |
| "ce_loss_52": 1.4220335900783538, |
| "ce_loss_7": 3.1691121637821196, |
| "epoch": 0.935, |
| "grad_norm": 19.392869535691936, |
| "kl_loss_13": 3054.0, |
| "kl_loss_26": 2149.8, |
| "kl_loss_39": 1176.1, |
| "kl_loss_7": 3619.2, |
| "learning_rate": 1.0598776892610684e-05, |
| "loss": 4922.25, |
| "step": 9350 |
| }, |
| { |
| "ce_loss_13": 2.953709363937378, |
| "ce_loss_26": 2.5250791788101195, |
| "ce_loss_39": 2.0616777926683425, |
| "ce_loss_52": 1.5004188895225525, |
| "ce_loss_7": 3.2059156119823458, |
| "epoch": 0.936, |
| "grad_norm": 18.98607482508187, |
| "kl_loss_13": 3007.2, |
| "kl_loss_26": 2107.0, |
| "kl_loss_39": 1138.6, |
| "kl_loss_7": 3536.0, |
| "learning_rate": 1.0276282417007399e-05, |
| "loss": 4935.75, |
| "step": 9360 |
| }, |
| { |
| "ce_loss_13": 2.902603155374527, |
| "ce_loss_26": 2.464896833896637, |
| "ce_loss_39": 2.0031634330749513, |
| "ce_loss_52": 1.4478828191757203, |
| "ce_loss_7": 3.166324245929718, |
| "epoch": 0.937, |
| "grad_norm": 18.72231921789515, |
| "kl_loss_13": 3025.6, |
| "kl_loss_26": 2120.4, |
| "kl_loss_39": 1137.7, |
| "kl_loss_7": 3574.4, |
| "learning_rate": 9.958719453803277e-06, |
| "loss": 4933.2, |
| "step": 9370 |
| }, |
| { |
| "ce_loss_13": 2.878055286407471, |
| "ce_loss_26": 2.4367445170879365, |
| "ce_loss_39": 1.9698922991752625, |
| "ce_loss_52": 1.40206458568573, |
| "ce_loss_7": 3.1407361745834352, |
| "epoch": 0.938, |
| "grad_norm": 19.520797823561637, |
| "kl_loss_13": 3045.6, |
| "kl_loss_26": 2130.6, |
| "kl_loss_39": 1145.7, |
| "kl_loss_7": 3591.6, |
| "learning_rate": 9.646091200853802e-06, |
| "loss": 4932.45, |
| "step": 9380 |
| }, |
| { |
| "ce_loss_13": 2.8573631644248962, |
| "ce_loss_26": 2.429997554421425, |
| "ce_loss_39": 1.9779304087162017, |
| "ce_loss_52": 1.4321624323725701, |
| "ce_loss_7": 3.119151920080185, |
| "epoch": 0.939, |
| "grad_norm": 18.61104788500602, |
| "kl_loss_13": 2968.4, |
| "kl_loss_26": 2075.6, |
| "kl_loss_39": 1113.7, |
| "kl_loss_7": 3509.6, |
| "learning_rate": 9.338400806321978e-06, |
| "loss": 4899.9, |
| "step": 9390 |
| }, |
| { |
| "ce_loss_13": 2.8828431129455567, |
| "ce_loss_26": 2.4453956365585325, |
| "ce_loss_39": 1.986677783727646, |
| "ce_loss_52": 1.4324709355831147, |
| "ce_loss_7": 3.1462887287139893, |
| "epoch": 0.94, |
| "grad_norm": 18.660409146960177, |
| "kl_loss_13": 3006.4, |
| "kl_loss_26": 2102.2, |
| "kl_loss_39": 1130.9, |
| "kl_loss_7": 3551.2, |
| "learning_rate": 9.035651368646646e-06, |
| "loss": 4963.1, |
| "step": 9400 |
| }, |
| { |
| "ce_loss_13": 2.856483778357506, |
| "ce_loss_26": 2.426860272884369, |
| "ce_loss_39": 1.9708759590983391, |
| "ce_loss_52": 1.4115710154175758, |
| "ce_loss_7": 3.114782178401947, |
| "epoch": 0.941, |
| "grad_norm": 19.55117077640538, |
| "kl_loss_13": 2986.0, |
| "kl_loss_26": 2096.2, |
| "kl_loss_39": 1131.2, |
| "kl_loss_7": 3526.4, |
| "learning_rate": 8.737845936511335e-06, |
| "loss": 4960.75, |
| "step": 9410 |
| }, |
| { |
| "ce_loss_13": 2.894274836778641, |
| "ce_loss_26": 2.454681032896042, |
| "ce_loss_39": 1.9826824754476546, |
| "ce_loss_52": 1.4298861980438233, |
| "ce_loss_7": 3.15040722489357, |
| "epoch": 0.942, |
| "grad_norm": 19.039583654377346, |
| "kl_loss_13": 3067.6, |
| "kl_loss_26": 2152.8, |
| "kl_loss_39": 1152.4, |
| "kl_loss_7": 3608.4, |
| "learning_rate": 8.444987508813451e-06, |
| "loss": 4899.6, |
| "step": 9420 |
| }, |
| { |
| "ce_loss_13": 2.9001412212848665, |
| "ce_loss_26": 2.4617854237556456, |
| "ce_loss_39": 1.999165838956833, |
| "ce_loss_52": 1.4294554442167282, |
| "ce_loss_7": 3.165439170598984, |
| "epoch": 0.943, |
| "grad_norm": 18.564983933864266, |
| "kl_loss_13": 3046.0, |
| "kl_loss_26": 2136.6, |
| "kl_loss_39": 1159.1, |
| "kl_loss_7": 3592.8, |
| "learning_rate": 8.157079034633974e-06, |
| "loss": 4920.3, |
| "step": 9430 |
| }, |
| { |
| "ce_loss_13": 2.863955610990524, |
| "ce_loss_26": 2.435519364476204, |
| "ce_loss_39": 1.9886516004800796, |
| "ce_loss_52": 1.4344154298305511, |
| "ce_loss_7": 3.1265052914619447, |
| "epoch": 0.944, |
| "grad_norm": 17.82647647549486, |
| "kl_loss_13": 2962.4, |
| "kl_loss_26": 2073.6, |
| "kl_loss_39": 1123.1, |
| "kl_loss_7": 3508.4, |
| "learning_rate": 7.874123413208145e-06, |
| "loss": 4921.7, |
| "step": 9440 |
| }, |
| { |
| "ce_loss_13": 2.8527204990386963, |
| "ce_loss_26": 2.418528434634209, |
| "ce_loss_39": 1.960913023352623, |
| "ce_loss_52": 1.4082367643713951, |
| "ce_loss_7": 3.118280106782913, |
| "epoch": 0.945, |
| "grad_norm": 17.642678200140654, |
| "kl_loss_13": 3000.8, |
| "kl_loss_26": 2093.6, |
| "kl_loss_39": 1127.9, |
| "kl_loss_7": 3547.2, |
| "learning_rate": 7.59612349389599e-06, |
| "loss": 4941.9, |
| "step": 9450 |
| }, |
| { |
| "ce_loss_13": 2.8983235955238342, |
| "ce_loss_26": 2.4708085656166077, |
| "ce_loss_39": 2.01363542675972, |
| "ce_loss_52": 1.4459212511777877, |
| "ce_loss_7": 3.1573162257671354, |
| "epoch": 0.946, |
| "grad_norm": 18.21137845155402, |
| "kl_loss_13": 3012.8, |
| "kl_loss_26": 2129.0, |
| "kl_loss_39": 1159.6, |
| "kl_loss_7": 3550.0, |
| "learning_rate": 7.323082076153509e-06, |
| "loss": 4932.45, |
| "step": 9460 |
| }, |
| { |
| "ce_loss_13": 2.8793884813785553, |
| "ce_loss_26": 2.444310560822487, |
| "ce_loss_39": 1.9878242909908295, |
| "ce_loss_52": 1.4219153225421906, |
| "ce_loss_7": 3.1359946370124816, |
| "epoch": 0.947, |
| "grad_norm": 19.11147526952516, |
| "kl_loss_13": 3000.4, |
| "kl_loss_26": 2106.2, |
| "kl_loss_39": 1141.4, |
| "kl_loss_7": 3539.6, |
| "learning_rate": 7.055001909504755e-06, |
| "loss": 4932.95, |
| "step": 9470 |
| }, |
| { |
| "ce_loss_13": 2.8483738005161285, |
| "ce_loss_26": 2.4169380724430085, |
| "ce_loss_39": 1.9552814781665802, |
| "ce_loss_52": 1.4036450207233429, |
| "ce_loss_7": 3.1039236187934875, |
| "epoch": 0.948, |
| "grad_norm": 19.227610169601164, |
| "kl_loss_13": 3000.4, |
| "kl_loss_26": 2102.0, |
| "kl_loss_39": 1125.6, |
| "kl_loss_7": 3530.0, |
| "learning_rate": 6.791885693514133e-06, |
| "loss": 4941.55, |
| "step": 9480 |
| }, |
| { |
| "ce_loss_13": 2.8693545699119567, |
| "ce_loss_26": 2.4362709283828736, |
| "ce_loss_39": 1.9619301795959472, |
| "ce_loss_52": 1.400461108982563, |
| "ce_loss_7": 3.133023035526276, |
| "epoch": 0.949, |
| "grad_norm": 19.323995399615697, |
| "kl_loss_13": 3058.8, |
| "kl_loss_26": 2146.6, |
| "kl_loss_39": 1149.1, |
| "kl_loss_7": 3608.8, |
| "learning_rate": 6.533736077758867e-06, |
| "loss": 4986.35, |
| "step": 9490 |
| }, |
| { |
| "ce_loss_13": 2.8667274117469788, |
| "ce_loss_26": 2.4240807622671126, |
| "ce_loss_39": 1.9586560875177383, |
| "ce_loss_52": 1.3980020493268968, |
| "ce_loss_7": 3.127706527709961, |
| "epoch": 0.95, |
| "grad_norm": 18.253118734633716, |
| "kl_loss_13": 3033.6, |
| "kl_loss_26": 2126.0, |
| "kl_loss_39": 1140.7, |
| "kl_loss_7": 3581.6, |
| "learning_rate": 6.2805556618028556e-06, |
| "loss": 4971.65, |
| "step": 9500 |
| }, |
| { |
| "ce_loss_13": 2.9265355467796326, |
| "ce_loss_26": 2.4994624704122543, |
| "ce_loss_39": 2.03882916867733, |
| "ce_loss_52": 1.4773303151130677, |
| "ce_loss_7": 3.1838342785835265, |
| "epoch": 0.951, |
| "grad_norm": 19.482478782354722, |
| "kl_loss_13": 2998.4, |
| "kl_loss_26": 2103.6, |
| "kl_loss_39": 1144.8, |
| "kl_loss_7": 3540.4, |
| "learning_rate": 6.032346995169968e-06, |
| "loss": 4951.7, |
| "step": 9510 |
| }, |
| { |
| "ce_loss_13": 2.9545272469520567, |
| "ce_loss_26": 2.5311076641082764, |
| "ce_loss_39": 2.068070963025093, |
| "ce_loss_52": 1.4843237161636353, |
| "ce_loss_7": 3.2110206544399262, |
| "epoch": 0.952, |
| "grad_norm": 19.225083219290383, |
| "kl_loss_13": 3055.2, |
| "kl_loss_26": 2164.8, |
| "kl_loss_39": 1188.4, |
| "kl_loss_7": 3590.4, |
| "learning_rate": 5.789112577318789e-06, |
| "loss": 4961.65, |
| "step": 9520 |
| }, |
| { |
| "ce_loss_13": 2.852985817193985, |
| "ce_loss_26": 2.4146564304828644, |
| "ce_loss_39": 1.961009207367897, |
| "ce_loss_52": 1.3947103202342988, |
| "ce_loss_7": 3.1259153723716735, |
| "epoch": 0.953, |
| "grad_norm": 18.155555980380427, |
| "kl_loss_13": 3021.6, |
| "kl_loss_26": 2118.2, |
| "kl_loss_39": 1157.4, |
| "kl_loss_7": 3575.6, |
| "learning_rate": 5.550854857617194e-06, |
| "loss": 4909.2, |
| "step": 9530 |
| }, |
| { |
| "ce_loss_13": 2.8418005287647246, |
| "ce_loss_26": 2.4128061681985855, |
| "ce_loss_39": 1.9482584029436112, |
| "ce_loss_52": 1.3925445035099984, |
| "ce_loss_7": 3.1012724101543427, |
| "epoch": 0.954, |
| "grad_norm": 18.797933923537936, |
| "kl_loss_13": 3018.8, |
| "kl_loss_26": 2116.8, |
| "kl_loss_39": 1135.1, |
| "kl_loss_7": 3566.4, |
| "learning_rate": 5.317576235317756e-06, |
| "loss": 4951.35, |
| "step": 9540 |
| }, |
| { |
| "ce_loss_13": 2.9137533485889433, |
| "ce_loss_26": 2.479102221131325, |
| "ce_loss_39": 2.011527943611145, |
| "ce_loss_52": 1.4640387833118438, |
| "ce_loss_7": 3.16554337143898, |
| "epoch": 0.955, |
| "grad_norm": 18.308431062302134, |
| "kl_loss_13": 3004.0, |
| "kl_loss_26": 2104.0, |
| "kl_loss_39": 1124.5, |
| "kl_loss_7": 3534.4, |
| "learning_rate": 5.089279059533658e-06, |
| "loss": 4893.9, |
| "step": 9550 |
| }, |
| { |
| "ce_loss_13": 2.9561933636665345, |
| "ce_loss_26": 2.520014223456383, |
| "ce_loss_39": 2.044199249148369, |
| "ce_loss_52": 1.4634439080953598, |
| "ce_loss_7": 3.2146646201610567, |
| "epoch": 0.956, |
| "grad_norm": 19.011558845630333, |
| "kl_loss_13": 3107.6, |
| "kl_loss_26": 2205.4, |
| "kl_loss_39": 1192.1, |
| "kl_loss_7": 3652.4, |
| "learning_rate": 4.865965629214819e-06, |
| "loss": 4928.6, |
| "step": 9560 |
| }, |
| { |
| "ce_loss_13": 2.9264722049236296, |
| "ce_loss_26": 2.500628116726875, |
| "ce_loss_39": 2.0424467980861665, |
| "ce_loss_52": 1.4656882539391518, |
| "ce_loss_7": 3.1882854044437408, |
| "epoch": 0.957, |
| "grad_norm": 19.491005801371585, |
| "kl_loss_13": 3028.0, |
| "kl_loss_26": 2138.6, |
| "kl_loss_39": 1174.2, |
| "kl_loss_7": 3566.0, |
| "learning_rate": 4.6476381931251366e-06, |
| "loss": 4947.75, |
| "step": 9570 |
| }, |
| { |
| "ce_loss_13": 2.8677931249141695, |
| "ce_loss_26": 2.432820278406143, |
| "ce_loss_39": 1.970087245106697, |
| "ce_loss_52": 1.4129542678594589, |
| "ce_loss_7": 3.129103422164917, |
| "epoch": 0.958, |
| "grad_norm": 18.903925554756427, |
| "kl_loss_13": 2994.0, |
| "kl_loss_26": 2096.0, |
| "kl_loss_39": 1120.2, |
| "kl_loss_7": 3546.4, |
| "learning_rate": 4.434298949819449e-06, |
| "loss": 4918.2, |
| "step": 9580 |
| }, |
| { |
| "ce_loss_13": 2.894206315279007, |
| "ce_loss_26": 2.4608440458774568, |
| "ce_loss_39": 2.00639765560627, |
| "ce_loss_52": 1.4453970074653626, |
| "ce_loss_7": 3.151961898803711, |
| "epoch": 0.959, |
| "grad_norm": 17.742534881377313, |
| "kl_loss_13": 2993.6, |
| "kl_loss_26": 2091.6, |
| "kl_loss_39": 1135.9, |
| "kl_loss_7": 3542.0, |
| "learning_rate": 4.2259500476214406e-06, |
| "loss": 4904.1, |
| "step": 9590 |
| }, |
| { |
| "ce_loss_13": 2.907763344049454, |
| "ce_loss_26": 2.4721481442451476, |
| "ce_loss_39": 2.013452297449112, |
| "ce_loss_52": 1.4463645279407502, |
| "ce_loss_7": 3.1700760960578918, |
| "epoch": 0.96, |
| "grad_norm": 18.762064601939382, |
| "kl_loss_13": 3033.2, |
| "kl_loss_26": 2127.6, |
| "kl_loss_39": 1154.1, |
| "kl_loss_7": 3576.8, |
| "learning_rate": 4.02259358460233e-06, |
| "loss": 4944.15, |
| "step": 9600 |
| }, |
| { |
| "ce_loss_13": 2.9404530614614486, |
| "ce_loss_26": 2.5118053376674654, |
| "ce_loss_39": 2.045960560441017, |
| "ce_loss_52": 1.4736278399825096, |
| "ce_loss_7": 3.199742293357849, |
| "epoch": 0.961, |
| "grad_norm": 19.091693827270714, |
| "kl_loss_13": 3049.6, |
| "kl_loss_26": 2150.4, |
| "kl_loss_39": 1165.5, |
| "kl_loss_7": 3590.8, |
| "learning_rate": 3.8242316085594916e-06, |
| "loss": 4931.75, |
| "step": 9610 |
| }, |
| { |
| "ce_loss_13": 2.8952401757240294, |
| "ce_loss_26": 2.4520116090774535, |
| "ce_loss_39": 1.9771205306053161, |
| "ce_loss_52": 1.3961644172668457, |
| "ce_loss_7": 3.1528802454471587, |
| "epoch": 0.962, |
| "grad_norm": 18.822596918413492, |
| "kl_loss_13": 3097.6, |
| "kl_loss_26": 2171.4, |
| "kl_loss_39": 1176.6, |
| "kl_loss_7": 3631.6, |
| "learning_rate": 3.630866116995757e-06, |
| "loss": 4991.65, |
| "step": 9620 |
| }, |
| { |
| "ce_loss_13": 2.848733913898468, |
| "ce_loss_26": 2.4173508852720262, |
| "ce_loss_39": 1.9629988223314285, |
| "ce_loss_52": 1.4227147445082664, |
| "ce_loss_7": 3.105393874645233, |
| "epoch": 0.963, |
| "grad_norm": 18.772124078001035, |
| "kl_loss_13": 2951.2, |
| "kl_loss_26": 2061.0, |
| "kl_loss_39": 1101.5, |
| "kl_loss_7": 3491.2, |
| "learning_rate": 3.4424990570994797e-06, |
| "loss": 4903.15, |
| "step": 9630 |
| }, |
| { |
| "ce_loss_13": 2.9066348552703856, |
| "ce_loss_26": 2.469021773338318, |
| "ce_loss_39": 2.010333400964737, |
| "ce_loss_52": 1.4492767244577407, |
| "ce_loss_7": 3.1746467888355254, |
| "epoch": 0.964, |
| "grad_norm": 19.32775364197132, |
| "kl_loss_13": 3013.6, |
| "kl_loss_26": 2110.8, |
| "kl_loss_39": 1135.7, |
| "kl_loss_7": 3570.0, |
| "learning_rate": 3.2591323257248896e-06, |
| "loss": 4939.25, |
| "step": 9640 |
| }, |
| { |
| "ce_loss_13": 2.9041188657283783, |
| "ce_loss_26": 2.471466612815857, |
| "ce_loss_39": 2.0076118439435957, |
| "ce_loss_52": 1.4575997084379195, |
| "ce_loss_7": 3.156245505809784, |
| "epoch": 0.965, |
| "grad_norm": 18.772007331370325, |
| "kl_loss_13": 3016.0, |
| "kl_loss_26": 2111.8, |
| "kl_loss_39": 1138.9, |
| "kl_loss_7": 3551.6, |
| "learning_rate": 3.0807677693729385e-06, |
| "loss": 4953.0, |
| "step": 9650 |
| }, |
| { |
| "ce_loss_13": 2.9185379564762117, |
| "ce_loss_26": 2.4852662444114686, |
| "ce_loss_39": 2.0213693618774413, |
| "ce_loss_52": 1.4567248612642287, |
| "ce_loss_7": 3.1788457691669465, |
| "epoch": 0.966, |
| "grad_norm": 19.301754151350856, |
| "kl_loss_13": 3049.2, |
| "kl_loss_26": 2142.4, |
| "kl_loss_39": 1165.5, |
| "kl_loss_7": 3589.2, |
| "learning_rate": 2.9074071841727055e-06, |
| "loss": 4966.3, |
| "step": 9660 |
| }, |
| { |
| "ce_loss_13": 2.856596076488495, |
| "ce_loss_26": 2.4228154510259627, |
| "ce_loss_39": 1.966923463344574, |
| "ce_loss_52": 1.3987573131918907, |
| "ce_loss_7": 3.112848150730133, |
| "epoch": 0.967, |
| "grad_norm": 18.636169987835014, |
| "kl_loss_13": 3015.2, |
| "kl_loss_26": 2124.8, |
| "kl_loss_39": 1150.1, |
| "kl_loss_7": 3558.0, |
| "learning_rate": 2.739052315863355e-06, |
| "loss": 4944.85, |
| "step": 9670 |
| }, |
| { |
| "ce_loss_13": 2.946157419681549, |
| "ce_loss_26": 2.506874307990074, |
| "ce_loss_39": 2.0385408878326414, |
| "ce_loss_52": 1.4472137212753295, |
| "ce_loss_7": 3.2151435017585754, |
| "epoch": 0.968, |
| "grad_norm": 19.361220180181423, |
| "kl_loss_13": 3105.6, |
| "kl_loss_26": 2193.0, |
| "kl_loss_39": 1200.9, |
| "kl_loss_7": 3669.2, |
| "learning_rate": 2.5757048597765396e-06, |
| "loss": 4938.1, |
| "step": 9680 |
| }, |
| { |
| "ce_loss_13": 2.838688534498215, |
| "ce_loss_26": 2.408904367685318, |
| "ce_loss_39": 1.9514323592185974, |
| "ce_loss_52": 1.4050966590642928, |
| "ce_loss_7": 3.102074921131134, |
| "epoch": 0.969, |
| "grad_norm": 18.982791691406838, |
| "kl_loss_13": 2984.8, |
| "kl_loss_26": 2086.6, |
| "kl_loss_39": 1106.6, |
| "kl_loss_7": 3533.2, |
| "learning_rate": 2.417366460819359e-06, |
| "loss": 4918.15, |
| "step": 9690 |
| }, |
| { |
| "ce_loss_13": 2.880851173400879, |
| "ce_loss_26": 2.447329577803612, |
| "ce_loss_39": 1.989661106467247, |
| "ce_loss_52": 1.4333824023604393, |
| "ce_loss_7": 3.137517309188843, |
| "epoch": 0.97, |
| "grad_norm": 19.196819142959395, |
| "kl_loss_13": 2988.0, |
| "kl_loss_26": 2092.4, |
| "kl_loss_39": 1128.9, |
| "kl_loss_7": 3524.4, |
| "learning_rate": 2.2640387134577057e-06, |
| "loss": 4938.15, |
| "step": 9700 |
| }, |
| { |
| "ce_loss_13": 2.8625703275203707, |
| "ce_loss_26": 2.4340526342391966, |
| "ce_loss_39": 1.968525806069374, |
| "ce_loss_52": 1.4236899584531784, |
| "ce_loss_7": 3.112631046772003, |
| "epoch": 0.971, |
| "grad_norm": 19.493522870524444, |
| "kl_loss_13": 2981.2, |
| "kl_loss_26": 2091.6, |
| "kl_loss_39": 1109.1, |
| "kl_loss_7": 3508.4, |
| "learning_rate": 2.115723161700278e-06, |
| "loss": 4978.3, |
| "step": 9710 |
| }, |
| { |
| "ce_loss_13": 2.930490869283676, |
| "ce_loss_26": 2.495130881667137, |
| "ce_loss_39": 2.030032703280449, |
| "ce_loss_52": 1.4442616790533065, |
| "ce_loss_7": 3.190455746650696, |
| "epoch": 0.972, |
| "grad_norm": 18.231386237261873, |
| "kl_loss_13": 3083.2, |
| "kl_loss_26": 2182.4, |
| "kl_loss_39": 1200.2, |
| "kl_loss_7": 3630.4, |
| "learning_rate": 1.9724212990830937e-06, |
| "loss": 4917.25, |
| "step": 9720 |
| }, |
| { |
| "ce_loss_13": 2.8745281517505648, |
| "ce_loss_26": 2.450575265288353, |
| "ce_loss_39": 1.9936909019947051, |
| "ce_loss_52": 1.4346210777759552, |
| "ce_loss_7": 3.1411093890666963, |
| "epoch": 0.973, |
| "grad_norm": 17.9155258115958, |
| "kl_loss_13": 2990.4, |
| "kl_loss_26": 2104.0, |
| "kl_loss_39": 1135.2, |
| "kl_loss_7": 3546.0, |
| "learning_rate": 1.8341345686543331e-06, |
| "loss": 4907.2, |
| "step": 9730 |
| }, |
| { |
| "ce_loss_13": 2.950432300567627, |
| "ce_loss_26": 2.5173233568668367, |
| "ce_loss_39": 2.05553839802742, |
| "ce_loss_52": 1.5059631228446961, |
| "ce_loss_7": 3.203335565328598, |
| "epoch": 0.974, |
| "grad_norm": 18.692267522295538, |
| "kl_loss_13": 2994.0, |
| "kl_loss_26": 2099.8, |
| "kl_loss_39": 1118.3, |
| "kl_loss_7": 3526.0, |
| "learning_rate": 1.7008643629596864e-06, |
| "loss": 4975.3, |
| "step": 9740 |
| }, |
| { |
| "ce_loss_13": 2.938932454586029, |
| "ce_loss_26": 2.4943090945482256, |
| "ce_loss_39": 2.030773627758026, |
| "ce_loss_52": 1.4614870190620421, |
| "ce_loss_7": 3.203068423271179, |
| "epoch": 0.975, |
| "grad_norm": 19.21025690602488, |
| "kl_loss_13": 3068.4, |
| "kl_loss_26": 2141.2, |
| "kl_loss_39": 1151.8, |
| "kl_loss_7": 3617.6, |
| "learning_rate": 1.5726120240288633e-06, |
| "loss": 4916.8, |
| "step": 9750 |
| }, |
| { |
| "ce_loss_13": 2.9820500314235687, |
| "ce_loss_26": 2.5365146696567535, |
| "ce_loss_39": 2.0661711603403092, |
| "ce_loss_52": 1.471569898724556, |
| "ce_loss_7": 3.2498775362968444, |
| "epoch": 0.976, |
| "grad_norm": 18.6163174066225, |
| "kl_loss_13": 3112.0, |
| "kl_loss_26": 2196.6, |
| "kl_loss_39": 1202.4, |
| "kl_loss_7": 3666.4, |
| "learning_rate": 1.4493788433612708e-06, |
| "loss": 4925.3, |
| "step": 9760 |
| }, |
| { |
| "ce_loss_13": 2.8684714436531067, |
| "ce_loss_26": 2.433712217211723, |
| "ce_loss_39": 1.979764473438263, |
| "ce_loss_52": 1.4259307652711868, |
| "ce_loss_7": 3.1243775844573975, |
| "epoch": 0.977, |
| "grad_norm": 18.645711029415455, |
| "kl_loss_13": 2991.6, |
| "kl_loss_26": 2090.8, |
| "kl_loss_39": 1122.5, |
| "kl_loss_7": 3526.8, |
| "learning_rate": 1.3311660619138578e-06, |
| "loss": 4899.9, |
| "step": 9770 |
| }, |
| { |
| "ce_loss_13": 2.875075614452362, |
| "ce_loss_26": 2.4274426341056823, |
| "ce_loss_39": 1.9597632795572282, |
| "ce_loss_52": 1.3981771111488341, |
| "ce_loss_7": 3.132591074705124, |
| "epoch": 0.978, |
| "grad_norm": 19.101397556379275, |
| "kl_loss_13": 3053.6, |
| "kl_loss_26": 2133.6, |
| "kl_loss_39": 1148.0, |
| "kl_loss_7": 3594.4, |
| "learning_rate": 1.2179748700879012e-06, |
| "loss": 4922.55, |
| "step": 9780 |
| }, |
| { |
| "ce_loss_13": 2.8309387296438215, |
| "ce_loss_26": 2.4053177654743196, |
| "ce_loss_39": 1.9509627014398574, |
| "ce_loss_52": 1.397429385781288, |
| "ce_loss_7": 3.098381590843201, |
| "epoch": 0.979, |
| "grad_norm": 18.730022448398557, |
| "kl_loss_13": 2994.8, |
| "kl_loss_26": 2100.6, |
| "kl_loss_39": 1129.3, |
| "kl_loss_7": 3549.6, |
| "learning_rate": 1.1098064077174619e-06, |
| "loss": 4943.05, |
| "step": 9790 |
| }, |
| { |
| "ce_loss_13": 2.939675289392471, |
| "ce_loss_26": 2.5042629301548005, |
| "ce_loss_39": 2.036014449596405, |
| "ce_loss_52": 1.4478511959314346, |
| "ce_loss_7": 3.2001714766025544, |
| "epoch": 0.98, |
| "grad_norm": 18.76259776094823, |
| "kl_loss_13": 3078.4, |
| "kl_loss_26": 2174.8, |
| "kl_loss_39": 1199.8, |
| "kl_loss_7": 3615.6, |
| "learning_rate": 1.006661764057837e-06, |
| "loss": 4908.35, |
| "step": 9800 |
| }, |
| { |
| "ce_loss_13": 2.871473455429077, |
| "ce_loss_26": 2.4314837962388993, |
| "ce_loss_39": 1.95380699634552, |
| "ce_loss_52": 1.3844006016850472, |
| "ce_loss_7": 3.1342472076416015, |
| "epoch": 0.981, |
| "grad_norm": 19.274903724206773, |
| "kl_loss_13": 3093.6, |
| "kl_loss_26": 2172.4, |
| "kl_loss_39": 1165.3, |
| "kl_loss_7": 3637.6, |
| "learning_rate": 9.085419777743465e-07, |
| "loss": 4984.5, |
| "step": 9810 |
| }, |
| { |
| "ce_loss_13": 2.895174187421799, |
| "ce_loss_26": 2.465178096294403, |
| "ce_loss_39": 2.006316193938255, |
| "ce_loss_52": 1.441744513809681, |
| "ce_loss_7": 3.1597203612327576, |
| "epoch": 0.982, |
| "grad_norm": 18.123510539706626, |
| "kl_loss_13": 3039.2, |
| "kl_loss_26": 2136.8, |
| "kl_loss_39": 1165.1, |
| "kl_loss_7": 3592.0, |
| "learning_rate": 8.15448036932176e-07, |
| "loss": 4978.7, |
| "step": 9820 |
| }, |
| { |
| "ce_loss_13": 2.9061976075172424, |
| "ce_loss_26": 2.477484393119812, |
| "ce_loss_39": 2.0169315338134766, |
| "ce_loss_52": 1.4464313685894012, |
| "ce_loss_7": 3.1681883454322817, |
| "epoch": 0.983, |
| "grad_norm": 18.434840579065046, |
| "kl_loss_13": 3067.2, |
| "kl_loss_26": 2157.8, |
| "kl_loss_39": 1175.9, |
| "kl_loss_7": 3616.8, |
| "learning_rate": 7.273808789862724e-07, |
| "loss": 4921.0, |
| "step": 9830 |
| }, |
| { |
| "ce_loss_13": 2.91153547167778, |
| "ce_loss_26": 2.473715308308601, |
| "ce_loss_39": 2.007182112336159, |
| "ce_loss_52": 1.4433553382754325, |
| "ce_loss_7": 3.170134776830673, |
| "epoch": 0.984, |
| "grad_norm": 19.402855155704938, |
| "kl_loss_13": 3056.0, |
| "kl_loss_26": 2142.6, |
| "kl_loss_39": 1154.2, |
| "kl_loss_7": 3589.6, |
| "learning_rate": 6.443413907720186e-07, |
| "loss": 4900.3, |
| "step": 9840 |
| }, |
| { |
| "ce_loss_13": 2.812727469205856, |
| "ce_loss_26": 2.3864874839782715, |
| "ce_loss_39": 1.9436532348394393, |
| "ce_loss_52": 1.3948013991117478, |
| "ce_loss_7": 3.0747777581214906, |
| "epoch": 0.985, |
| "grad_norm": 18.7509272372939, |
| "kl_loss_13": 2956.4, |
| "kl_loss_26": 2066.6, |
| "kl_loss_39": 1115.4, |
| "kl_loss_7": 3495.2, |
| "learning_rate": 5.663304084960185e-07, |
| "loss": 4941.5, |
| "step": 9850 |
| }, |
| { |
| "ce_loss_13": 2.8474230617284775, |
| "ce_loss_26": 2.4196896702051163, |
| "ce_loss_39": 1.958586323261261, |
| "ce_loss_52": 1.4132703453302384, |
| "ce_loss_7": 3.112215679883957, |
| "epoch": 0.986, |
| "grad_norm": 19.13033176723296, |
| "kl_loss_13": 2947.6, |
| "kl_loss_26": 2054.6, |
| "kl_loss_39": 1096.6, |
| "kl_loss_7": 3496.0, |
| "learning_rate": 4.933487177280482e-07, |
| "loss": 4900.7, |
| "step": 9860 |
| }, |
| { |
| "ce_loss_13": 2.914989507198334, |
| "ce_loss_26": 2.4912798583507536, |
| "ce_loss_39": 2.020476207137108, |
| "ce_loss_52": 1.45462586581707, |
| "ce_loss_7": 3.169612795114517, |
| "epoch": 0.987, |
| "grad_norm": 18.808140949859265, |
| "kl_loss_13": 3018.8, |
| "kl_loss_26": 2134.0, |
| "kl_loss_39": 1159.4, |
| "kl_loss_7": 3551.6, |
| "learning_rate": 4.2539705339295075e-07, |
| "loss": 4908.55, |
| "step": 9870 |
| }, |
| { |
| "ce_loss_13": 2.8734777927398683, |
| "ce_loss_26": 2.437858074903488, |
| "ce_loss_39": 1.9720120638608933, |
| "ce_loss_52": 1.4267651215195656, |
| "ce_loss_7": 3.13208429813385, |
| "epoch": 0.988, |
| "grad_norm": 18.962161399510684, |
| "kl_loss_13": 2984.8, |
| "kl_loss_26": 2080.8, |
| "kl_loss_39": 1110.4, |
| "kl_loss_7": 3520.0, |
| "learning_rate": 3.6247609976319816e-07, |
| "loss": 4944.0, |
| "step": 9880 |
| }, |
| { |
| "ce_loss_13": 2.941332721710205, |
| "ce_loss_26": 2.5040529906749724, |
| "ce_loss_39": 2.0378583818674088, |
| "ce_loss_52": 1.468481183052063, |
| "ce_loss_7": 3.198000502586365, |
| "epoch": 0.989, |
| "grad_norm": 18.392580217010416, |
| "kl_loss_13": 3027.6, |
| "kl_loss_26": 2127.6, |
| "kl_loss_39": 1152.6, |
| "kl_loss_7": 3568.8, |
| "learning_rate": 3.0458649045211895e-07, |
| "loss": 4940.25, |
| "step": 9890 |
| }, |
| { |
| "ce_loss_13": 2.7879110276699066, |
| "ce_loss_26": 2.361061328649521, |
| "ce_loss_39": 1.9103228181600571, |
| "ce_loss_52": 1.3702009424567223, |
| "ce_loss_7": 3.0456897139549257, |
| "epoch": 0.99, |
| "grad_norm": 18.728359427979246, |
| "kl_loss_13": 2962.0, |
| "kl_loss_26": 2068.0, |
| "kl_loss_39": 1101.6, |
| "kl_loss_7": 3497.6, |
| "learning_rate": 2.517288084074587e-07, |
| "loss": 4930.1, |
| "step": 9900 |
| }, |
| { |
| "ce_loss_13": 2.8971258997917175, |
| "ce_loss_26": 2.5054025918245317, |
| "ce_loss_39": 2.0191519230604174, |
| "ce_loss_52": 1.4615912348031999, |
| "ce_loss_7": 3.1491506710648536, |
| "epoch": 0.991, |
| "grad_norm": 18.31923607379438, |
| "kl_loss_13": 3021.4, |
| "kl_loss_26": 2147.8, |
| "kl_loss_39": 1158.2, |
| "kl_loss_7": 3569.6, |
| "learning_rate": 2.0390358590538505e-07, |
| "loss": 4961.35, |
| "step": 9910 |
| }, |
| { |
| "ce_loss_13": 2.8903492599725724, |
| "ce_loss_26": 2.463494861125946, |
| "ce_loss_39": 1.9990645915269851, |
| "ce_loss_52": 1.4176891192793846, |
| "ce_loss_7": 3.156185895204544, |
| "epoch": 0.992, |
| "grad_norm": 18.63207057019639, |
| "kl_loss_13": 3059.2, |
| "kl_loss_26": 2160.8, |
| "kl_loss_39": 1181.7, |
| "kl_loss_7": 3612.8, |
| "learning_rate": 1.61111304545436e-07, |
| "loss": 4924.65, |
| "step": 9920 |
| }, |
| { |
| "ce_loss_13": 2.9144785940647124, |
| "ce_loss_26": 2.4774809032678604, |
| "ce_loss_39": 2.0119458585977554, |
| "ce_loss_52": 1.4417672097682952, |
| "ce_loss_7": 3.1794375479221344, |
| "epoch": 0.993, |
| "grad_norm": 19.670611142271607, |
| "kl_loss_13": 3065.6, |
| "kl_loss_26": 2158.2, |
| "kl_loss_39": 1171.1, |
| "kl_loss_7": 3620.0, |
| "learning_rate": 1.2335239524541298e-07, |
| "loss": 4934.7, |
| "step": 9930 |
| }, |
| { |
| "ce_loss_13": 2.8820480942726134, |
| "ce_loss_26": 2.4476457953453066, |
| "ce_loss_39": 1.9870627135038377, |
| "ce_loss_52": 1.418778820335865, |
| "ce_loss_7": 3.140765738487244, |
| "epoch": 0.994, |
| "grad_norm": 18.855641981755237, |
| "kl_loss_13": 3036.0, |
| "kl_loss_26": 2135.8, |
| "kl_loss_39": 1164.2, |
| "kl_loss_7": 3583.2, |
| "learning_rate": 9.06272382371065e-08, |
| "loss": 4938.8, |
| "step": 9940 |
| }, |
| { |
| "ce_loss_13": 2.8076956808567046, |
| "ce_loss_26": 2.3849500566720963, |
| "ce_loss_39": 1.944134348630905, |
| "ce_loss_52": 1.4005259275436401, |
| "ce_loss_7": 3.060946136713028, |
| "epoch": 0.995, |
| "grad_norm": 18.321398390501436, |
| "kl_loss_13": 2907.2, |
| "kl_loss_26": 2027.0, |
| "kl_loss_39": 1087.6, |
| "kl_loss_7": 3442.4, |
| "learning_rate": 6.293616306246586e-08, |
| "loss": 4950.05, |
| "step": 9950 |
| }, |
| { |
| "ce_loss_13": 2.879979431629181, |
| "ce_loss_26": 2.4471361935138702, |
| "ce_loss_39": 1.9933580070734025, |
| "ce_loss_52": 1.4384188532829285, |
| "ce_loss_7": 3.137728548049927, |
| "epoch": 0.996, |
| "grad_norm": 18.715841313903894, |
| "kl_loss_13": 3004.0, |
| "kl_loss_26": 2101.4, |
| "kl_loss_39": 1139.8, |
| "kl_loss_7": 3537.6, |
| "learning_rate": 4.027944857032395e-08, |
| "loss": 4943.4, |
| "step": 9960 |
| }, |
| { |
| "ce_loss_13": 2.873315241932869, |
| "ce_loss_26": 2.4401324480772018, |
| "ce_loss_39": 1.9843392819166183, |
| "ce_loss_52": 1.4195681273937226, |
| "ce_loss_7": 3.131341791152954, |
| "epoch": 0.997, |
| "grad_norm": 18.684401038028668, |
| "kl_loss_13": 3010.0, |
| "kl_loss_26": 2109.2, |
| "kl_loss_39": 1143.6, |
| "kl_loss_7": 3546.8, |
| "learning_rate": 2.265732291356626e-08, |
| "loss": 4916.9, |
| "step": 9970 |
| }, |
| { |
| "ce_loss_13": 2.806668055057526, |
| "ce_loss_26": 2.379783111810684, |
| "ce_loss_39": 1.9266512155532838, |
| "ce_loss_52": 1.397612212598324, |
| "ce_loss_7": 3.066510772705078, |
| "epoch": 0.998, |
| "grad_norm": 18.616508974623724, |
| "kl_loss_13": 2953.6, |
| "kl_loss_26": 2053.8, |
| "kl_loss_39": 1093.4, |
| "kl_loss_7": 3497.6, |
| "learning_rate": 1.0069963546743833e-08, |
| "loss": 4905.25, |
| "step": 9980 |
| }, |
| { |
| "ce_loss_13": 2.8469128251075744, |
| "ce_loss_26": 2.4174416065216064, |
| "ce_loss_39": 1.9600837975740433, |
| "ce_loss_52": 1.4190092101693152, |
| "ce_loss_7": 3.1044517934322355, |
| "epoch": 0.999, |
| "grad_norm": 18.871308722121288, |
| "kl_loss_13": 2960.0, |
| "kl_loss_26": 2064.8, |
| "kl_loss_39": 1104.4, |
| "kl_loss_7": 3488.8, |
| "learning_rate": 2.517497224463483e-09, |
| "loss": 4901.2, |
| "step": 9990 |
| }, |
| { |
| "ce_loss_13": 2.895157891511917, |
| "ce_loss_26": 2.452625501155853, |
| "ce_loss_39": 1.9889036536216735, |
| "ce_loss_52": 1.4127988710999488, |
| "ce_loss_7": 3.167273908853531, |
| "epoch": 1.0, |
| "grad_norm": 19.02740690165538, |
| "kl_loss_13": 3067.6, |
| "kl_loss_26": 2157.2, |
| "kl_loss_39": 1167.3, |
| "kl_loss_7": 3628.8, |
| "learning_rate": 0.0, |
| "loss": 4933.3, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0167830278176768e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|