diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,96288 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 12032, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.999999829562913e-06, + "loss": 1.5, + "regression_loss": 0.0, + "step": 1, + "text_loss": 1.4765625 + }, + { + "epoch": 0.0, + "learning_rate": 9.99999931825166e-06, + "loss": 1.6377, + "regression_loss": 0.0, + "step": 2, + "text_loss": 1.5625 + }, + { + "epoch": 0.0, + "learning_rate": 9.99999846606628e-06, + "loss": 1.3232, + "regression_loss": 0.0, + "step": 3, + "text_loss": 1.234375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999997273006828e-06, + "loss": 1.0591, + "regression_loss": 0.0, + "step": 4, + "text_loss": 1.109375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999995739073384e-06, + "loss": 0.8706, + "regression_loss": 0.0, + "step": 5, + "text_loss": 0.87109375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999993864266058e-06, + "loss": 0.7603, + "regression_loss": 0.0, + "step": 6, + "text_loss": 0.59765625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999991648584972e-06, + "loss": 0.7395, + "regression_loss": 0.0, + "step": 7, + "text_loss": 0.7890625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999989092030283e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 8, + "text_loss": 0.57421875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999986194602158e-06, + "loss": 0.6885, + "regression_loss": 0.0, + "step": 9, + "text_loss": 0.6171875 + }, + { + "epoch": 0.0, + "learning_rate": 9.9999829563008e-06, + "loss": 0.6516, + "regression_loss": 0.0, + "step": 10, + "text_loss": 0.6875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999979377126429e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 11, + "text_loss": 0.337890625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999975457079288e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 12, + "text_loss": 0.54296875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999971196159644e-06, + "loss": 0.6938, + "regression_loss": 0.0, + "step": 13, + "text_loss": 0.7890625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999966594367789e-06, + "loss": 0.6506, + "regression_loss": 0.0, + "step": 14, + "text_loss": 0.953125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999961651704035e-06, + "loss": 0.6665, + "regression_loss": 0.0, + "step": 15, + "text_loss": 0.60546875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999956368168719e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 16, + "text_loss": 0.419921875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999950743762203e-06, + "loss": 0.7051, + "regression_loss": 0.0, + "step": 17, + "text_loss": 0.828125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999944778484869e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 18, + "text_loss": 0.41796875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999938472337123e-06, + "loss": 0.6724, + "regression_loss": 0.0, + "step": 19, + "text_loss": 0.53125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999931825319397e-06, + "loss": 0.688, + "regression_loss": 0.0, + "step": 20, + "text_loss": 0.5625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999924837432142e-06, + "loss": 0.6404, + "regression_loss": 0.0, + "step": 21, + "text_loss": 0.78125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999917508675837e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 22, + "text_loss": 0.470703125 + }, + { + "epoch": 0.0, + "learning_rate": 9.99990983905098e-06, + "loss": 0.6501, + "regression_loss": 0.0, + "step": 23, + "text_loss": 0.640625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999901828558095e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 24, + "text_loss": 0.3984375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999893477197723e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 25, + "text_loss": 0.57421875 + }, + { + "epoch": 0.0, + "learning_rate": 9.99988478497044e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 26, + "text_loss": 0.51171875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999875751876838e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 27, + "text_loss": 0.4921875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999866377917528e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 28, + "text_loss": 0.4140625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999856663093153e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 29, + "text_loss": 0.56640625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999846607404373e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 30, + "text_loss": 0.5625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999836210851877e-06, + "loss": 0.6753, + "regression_loss": 0.0, + "step": 31, + "text_loss": 0.7890625 + }, + { + "epoch": 0.0, + "learning_rate": 9.99982547343637e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 32, + "text_loss": 0.357421875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999814395158587e-06, + "loss": 0.6157, + "regression_loss": 0.0, + "step": 33, + "text_loss": 0.70703125 + }, + { + "epoch": 0.0, + "learning_rate": 9.99980297601928e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 34, + "text_loss": 0.59375 + }, + { + "epoch": 0.0, + "learning_rate": 9.99979121601923e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 35, + "text_loss": 0.78515625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999779115159238e-06, + "loss": 0.71, + "regression_loss": 0.0, + "step": 36, + "text_loss": 0.87109375 + }, + { + "epoch": 0.0, + "learning_rate": 9.99976667344013e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 37, + "text_loss": 0.5390625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999753890862754e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 38, + "text_loss": 0.5859375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999740767427979e-06, + "loss": 0.7314, + "regression_loss": 0.0, + "step": 39, + "text_loss": 0.8828125 + }, + { + "epoch": 0.0, + "learning_rate": 9.9997273031367e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 40, + "text_loss": 0.58203125 + }, + { + "epoch": 0.0, + "learning_rate": 9.99971349798984e-06, + "loss": 0.6855, + "regression_loss": 0.0, + "step": 41, + "text_loss": 1.0234375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999699351988335e-06, + "loss": 0.6577, + "regression_loss": 0.0, + "step": 42, + "text_loss": 0.79296875 + }, + { + "epoch": 0.0, + "learning_rate": 9.99968486513315e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 43, + "text_loss": 0.431640625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999670037425275e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 44, + "text_loss": 0.333984375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999654868865719e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 45, + "text_loss": 0.6796875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999639359455516e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 46, + "text_loss": 0.439453125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999623509195724e-06, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 47, + "text_loss": 0.73828125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999607318087424e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 48, + "text_loss": 0.369140625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999590786131718e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 49, + "text_loss": 0.8046875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999573913329736e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 50, + "text_loss": 0.3671875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999556699682626e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 51, + "text_loss": 0.466796875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999539145191562e-06, + "loss": 0.6604, + "regression_loss": 0.0, + "step": 52, + "text_loss": 0.6796875 + }, + { + "epoch": 0.0, + "learning_rate": 9.99952124985774e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 53, + "text_loss": 0.5 + }, + { + "epoch": 0.0, + "learning_rate": 9.999503013682384e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 54, + "text_loss": 0.53515625 + }, + { + "epoch": 0.0, + "learning_rate": 9.99948443666673e-06, + "loss": 0.6919, + "regression_loss": 0.0, + "step": 55, + "text_loss": 0.8828125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999465518812053e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 56, + "text_loss": 0.5234375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999446260119636e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 57, + "text_loss": 0.5859375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999426660590795e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 58, + "text_loss": 0.3671875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999406720226864e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 59, + "text_loss": 0.796875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999386439029207e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 60, + "text_loss": 0.609375 + }, + { + "epoch": 0.01, + "learning_rate": 9.999365816999202e-06, + "loss": 0.645, + "regression_loss": 0.0, + "step": 61, + "text_loss": 0.88671875 + }, + { + "epoch": 0.01, + "learning_rate": 9.999344854138257e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 62, + "text_loss": 0.53515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.999323550447802e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 63, + "text_loss": 0.58203125 + }, + { + "epoch": 0.01, + "learning_rate": 9.999301905929286e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 64, + "text_loss": 0.703125 + }, + { + "epoch": 0.01, + "learning_rate": 9.999279920584188e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 65, + "text_loss": 0.466796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.999257594414005e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 66, + "text_loss": 0.58203125 + }, + { + "epoch": 0.01, + "learning_rate": 9.999234927420259e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 67, + "text_loss": 0.62109375 + }, + { + "epoch": 0.01, + "learning_rate": 9.999211919604497e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 68, + "text_loss": 0.296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.999188570968285e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 69, + "text_loss": 0.8515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.99916488151322e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 70, + "text_loss": 0.37890625 + }, + { + "epoch": 0.01, + "learning_rate": 9.999140851240909e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 71, + "text_loss": 0.80078125 + }, + { + "epoch": 0.01, + "learning_rate": 9.999116480152997e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 72, + "text_loss": 0.5234375 + }, + { + "epoch": 0.01, + "learning_rate": 9.999091768251144e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 73, + "text_loss": 0.90234375 + }, + { + "epoch": 0.01, + "learning_rate": 9.999066715537031e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 74, + "text_loss": 0.7265625 + }, + { + "epoch": 0.01, + "learning_rate": 9.999041322012371e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 75, + "text_loss": 0.484375 + }, + { + "epoch": 0.01, + "learning_rate": 9.999015587678894e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 76, + "text_loss": 0.6328125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99898951253835e-06, + "loss": 0.6589, + "regression_loss": 0.0, + "step": 77, + "text_loss": 0.68359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.998963096592523e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 78, + "text_loss": 0.8046875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998936339843208e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 79, + "text_loss": 0.76953125 + }, + { + "epoch": 0.01, + "learning_rate": 9.998909242292234e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 80, + "text_loss": 0.53125 + }, + { + "epoch": 0.01, + "learning_rate": 9.998881803941447e-06, + "loss": 0.6338, + "regression_loss": 0.0, + "step": 81, + "text_loss": 0.6796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998854024792716e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 82, + "text_loss": 0.72265625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998825904847938e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 83, + "text_loss": 0.5546875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998797444109027e-06, + "loss": 0.6289, + "regression_loss": 0.0, + "step": 84, + "text_loss": 0.921875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998768642577922e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 85, + "text_loss": 0.62890625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998739500256593e-06, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 86, + "text_loss": 0.68359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.99871001714702e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 87, + "text_loss": 0.279296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998680193251216e-06, + "loss": 0.6311, + "regression_loss": 0.0, + "step": 88, + "text_loss": 0.345703125 + }, + { + "epoch": 0.01, + "learning_rate": 9.998650028571214e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 89, + "text_loss": 0.62109375 + }, + { + "epoch": 0.01, + "learning_rate": 9.998619523109069e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 90, + "text_loss": 0.44921875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998588676866866e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 91, + "text_loss": 0.296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.9985574898467e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 92, + "text_loss": 0.490234375 + }, + { + "epoch": 0.01, + "learning_rate": 9.998525962050702e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 93, + "text_loss": 0.3828125 + }, + { + "epoch": 0.01, + "learning_rate": 9.998494093481022e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 94, + "text_loss": 0.65234375 + }, + { + "epoch": 0.01, + "learning_rate": 9.99846188413983e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 95, + "text_loss": 0.515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998429334029323e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 96, + "text_loss": 0.84765625 + }, + { + "epoch": 0.01, + "learning_rate": 9.99839644315172e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 97, + "text_loss": 0.38671875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998363211509266e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 98, + "text_loss": 0.625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998329639104222e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 99, + "text_loss": 1.078125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99829572593888e-06, + "loss": 0.6609, + "regression_loss": 0.0, + "step": 100, + "text_loss": 0.62890625 + }, + { + "epoch": 0.01, + "learning_rate": 9.99826147201555e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 101, + "text_loss": 0.484375 + }, + { + "epoch": 0.01, + "learning_rate": 9.998226877336567e-06, + "loss": 0.6501, + "regression_loss": 0.0, + "step": 102, + "text_loss": 0.59765625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998191941904292e-06, + "loss": 0.6238, + "regression_loss": 0.0, + "step": 103, + "text_loss": 0.6796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.998156665721106e-06, + "loss": 0.6667, + "regression_loss": 0.0, + "step": 104, + "text_loss": 0.6015625 + }, + { + "epoch": 0.01, + "learning_rate": 9.99812104878941e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 105, + "text_loss": 0.251953125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99808509111164e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 106, + "text_loss": 0.357421875 + }, + { + "epoch": 0.01, + "learning_rate": 9.99804879269024e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 107, + "text_loss": 0.625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998012153527689e-06, + "loss": 0.655, + "regression_loss": 0.0, + "step": 108, + "text_loss": 0.796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997975173626482e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 109, + "text_loss": 0.66796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997937852989143e-06, + "loss": 0.7007, + "regression_loss": 0.0, + "step": 110, + "text_loss": 0.68359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997900191618212e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 111, + "text_loss": 0.41796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997862189516264e-06, + "loss": 0.6348, + "regression_loss": 0.0, + "step": 112, + "text_loss": 0.55859375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997823846685881e-06, + "loss": 0.6057, + "regression_loss": 0.0, + "step": 113, + "text_loss": 0.8828125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997785163129682e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 114, + "text_loss": 0.271484375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997746138850306e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 115, + "text_loss": 0.318359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997706773850409e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 116, + "text_loss": 0.703125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997667068132676e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 117, + "text_loss": 0.39453125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997627021699815e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 118, + "text_loss": 0.62109375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997586634554557e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 119, + "text_loss": 0.27734375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997545906699652e-06, + "loss": 0.6497, + "regression_loss": 0.0, + "step": 120, + "text_loss": 0.99609375 + }, + { + "epoch": 0.01, + "learning_rate": 9.99750483813788e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 121, + "text_loss": 0.7734375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997463428872039e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 122, + "text_loss": 0.578125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997421678904953e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 123, + "text_loss": 0.46875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997379588239468e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 124, + "text_loss": 0.62109375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997337156878453e-06, + "loss": 0.6743, + "regression_loss": 0.0, + "step": 125, + "text_loss": 0.474609375 + }, + { + "epoch": 0.01, + "learning_rate": 9.9972943848248e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 126, + "text_loss": 0.78125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997251272081427e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 127, + "text_loss": 0.46484375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997207818651273e-06, + "loss": 0.6848, + "regression_loss": 0.0, + "step": 128, + "text_loss": 0.62109375 + }, + { + "epoch": 0.01, + "learning_rate": 9.9971640245373e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 129, + "text_loss": 0.419921875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997119889742493e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 130, + "text_loss": 0.859375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997075414269862e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 131, + "text_loss": 0.458984375 + }, + { + "epoch": 0.01, + "learning_rate": 9.997030598122437e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 132, + "text_loss": 0.515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.996985441303276e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 133, + "text_loss": 0.71875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996939943815456e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 134, + "text_loss": 0.72265625 + }, + { + "epoch": 0.01, + "learning_rate": 9.996894105662079e-06, + "loss": 0.6523, + "regression_loss": 0.0, + "step": 135, + "text_loss": 0.51171875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996847926846269e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 136, + "text_loss": 0.4296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996801407371177e-06, + "loss": 0.6367, + "regression_loss": 0.0, + "step": 137, + "text_loss": 0.52734375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996754547239972e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 138, + "text_loss": 0.578125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99670734645585e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 139, + "text_loss": 0.416015625 + }, + { + "epoch": 0.01, + "learning_rate": 9.996659805022029e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 140, + "text_loss": 0.55859375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996611922941748e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 141, + "text_loss": 0.72265625 + }, + { + "epoch": 0.01, + "learning_rate": 9.996563700218274e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 142, + "text_loss": 0.416015625 + }, + { + "epoch": 0.01, + "learning_rate": 9.996515136854892e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 143, + "text_loss": 0.482421875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996466232854916e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 144, + "text_loss": 0.486328125 + }, + { + "epoch": 0.01, + "learning_rate": 9.996416988221678e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 145, + "text_loss": 0.609375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996367402958536e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 146, + "text_loss": 0.875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996317477068867e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 147, + "text_loss": 0.396484375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996267210556081e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 148, + "text_loss": 0.41015625 + }, + { + "epoch": 0.01, + "learning_rate": 9.9962166034236e-06, + "loss": 0.6311, + "regression_loss": 0.0, + "step": 149, + "text_loss": 0.6953125 + }, + { + "epoch": 0.01, + "learning_rate": 9.996165655674876e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 150, + "text_loss": 0.33984375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996114367313382e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 151, + "text_loss": 0.60546875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996062738342616e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 152, + "text_loss": 0.296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.996010768766093e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 153, + "text_loss": 0.6015625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995958458587362e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 154, + "text_loss": 0.439453125 + }, + { + "epoch": 0.01, + "learning_rate": 9.995905807809986e-06, + "loss": 0.6426, + "regression_loss": 0.0, + "step": 155, + "text_loss": 0.7265625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995852816437557e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 156, + "text_loss": 0.400390625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995799484473683e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 157, + "text_loss": 0.515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995745811922005e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 158, + "text_loss": 0.703125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99569179878618e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 159, + "text_loss": 0.5234375 + }, + { + "epoch": 0.01, + "learning_rate": 9.995637445069889e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 160, + "text_loss": 0.455078125 + }, + { + "epoch": 0.01, + "learning_rate": 9.995582750776838e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 161, + "text_loss": 0.58203125 + }, + { + "epoch": 0.01, + "learning_rate": 9.995527715910758e-06, + "loss": 0.5981, + "regression_loss": 0.0, + "step": 162, + "text_loss": 0.40625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995472340475398e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 163, + "text_loss": 0.6328125 + }, + { + "epoch": 0.01, + "learning_rate": 9.995416624474537e-06, + "loss": 0.6506, + "regression_loss": 0.0, + "step": 164, + "text_loss": 0.58984375 + }, + { + "epoch": 0.01, + "learning_rate": 9.995360567911969e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 165, + "text_loss": 0.5703125 + }, + { + "epoch": 0.01, + "learning_rate": 9.995304170791519e-06, + "loss": 0.4299, + "regression_loss": 0.0, + "step": 166, + "text_loss": 0.55859375 + }, + { + "epoch": 0.01, + "learning_rate": 9.99524743311703e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 167, + "text_loss": 0.5546875 + }, + { + "epoch": 0.01, + "learning_rate": 9.995190354892371e-06, + "loss": 0.6455, + "regression_loss": 0.0, + "step": 168, + "text_loss": 0.43359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.995132936121433e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 169, + "text_loss": 0.625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995075176808131e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 170, + "text_loss": 0.515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.9950170769564e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 171, + "text_loss": 0.3984375 + }, + { + "epoch": 0.01, + "learning_rate": 9.994958636570207e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 172, + "text_loss": 0.8515625 + }, + { + "epoch": 0.01, + "learning_rate": 9.99489985565353e-06, + "loss": 0.6294, + "regression_loss": 0.0, + "step": 173, + "text_loss": 0.61328125 + }, + { + "epoch": 0.01, + "learning_rate": 9.99484073421038e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 174, + "text_loss": 0.66796875 + }, + { + "epoch": 0.01, + "learning_rate": 9.994781272244786e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 175, + "text_loss": 0.33984375 + }, + { + "epoch": 0.01, + "learning_rate": 9.994721469760802e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 176, + "text_loss": 0.57421875 + }, + { + "epoch": 0.01, + "learning_rate": 9.994661326762505e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 177, + "text_loss": 0.34375 + }, + { + "epoch": 0.01, + "learning_rate": 9.994600843253997e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 178, + "text_loss": 0.4296875 + }, + { + "epoch": 0.01, + "learning_rate": 9.994540019239398e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 179, + "text_loss": 0.443359375 + }, + { + "epoch": 0.01, + "learning_rate": 9.994478854722857e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 180, + "text_loss": 0.58203125 + }, + { + "epoch": 0.02, + "learning_rate": 9.994417349708544e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 181, + "text_loss": 0.5703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.99435550420065e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 182, + "text_loss": 0.5078125 + }, + { + "epoch": 0.02, + "learning_rate": 9.994293318203395e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 183, + "text_loss": 0.53125 + }, + { + "epoch": 0.02, + "learning_rate": 9.994230791721015e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 184, + "text_loss": 0.255859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.994167924757774e-06, + "loss": 0.6868, + "regression_loss": 0.0, + "step": 185, + "text_loss": 0.333984375 + }, + { + "epoch": 0.02, + "learning_rate": 9.994104717317958e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 186, + "text_loss": 0.6640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.994041169405876e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 187, + "text_loss": 0.5 + }, + { + "epoch": 0.02, + "learning_rate": 9.993977281025862e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 188, + "text_loss": 0.45703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.993913052182271e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 189, + "text_loss": 0.51171875 + }, + { + "epoch": 0.02, + "learning_rate": 9.993848482879479e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 190, + "text_loss": 0.53125 + }, + { + "epoch": 0.02, + "learning_rate": 9.99378357312189e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 191, + "text_loss": 0.384765625 + }, + { + "epoch": 0.02, + "learning_rate": 9.99371832291393e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 192, + "text_loss": 0.68359375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993652732260047e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 193, + "text_loss": 0.65625 + }, + { + "epoch": 0.02, + "learning_rate": 9.99358680116471e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 194, + "text_loss": 0.375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993520529632417e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 195, + "text_loss": 0.4375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993453917667685e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 196, + "text_loss": 0.5859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993386965275055e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 197, + "text_loss": 0.47265625 + }, + { + "epoch": 0.02, + "learning_rate": 9.993319672459094e-06, + "loss": 0.6875, + "regression_loss": 0.0, + "step": 198, + "text_loss": 0.859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993252039224387e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 199, + "text_loss": 0.66796875 + }, + { + "epoch": 0.02, + "learning_rate": 9.993184065575545e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 200, + "text_loss": 0.75 + }, + { + "epoch": 0.02, + "learning_rate": 9.993115751517201e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 201, + "text_loss": 0.3515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.993047097054015e-06, + "loss": 0.6289, + "regression_loss": 0.0, + "step": 202, + "text_loss": 0.578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.992978102190667e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 203, + "text_loss": 0.640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.99290876693186e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 204, + "text_loss": 0.55078125 + }, + { + "epoch": 0.02, + "learning_rate": 9.992839091282319e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 205, + "text_loss": 0.84375 + }, + { + "epoch": 0.02, + "learning_rate": 9.992769075246797e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 206, + "text_loss": 0.515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.992698718830067e-06, + "loss": 0.6406, + "regression_loss": 0.0, + "step": 207, + "text_loss": 0.69921875 + }, + { + "epoch": 0.02, + "learning_rate": 9.992628022036924e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 208, + "text_loss": 0.5625 + }, + { + "epoch": 0.02, + "learning_rate": 9.992556984872189e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 209, + "text_loss": 0.75 + }, + { + "epoch": 0.02, + "learning_rate": 9.992485607340704e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 210, + "text_loss": 0.65234375 + }, + { + "epoch": 0.02, + "learning_rate": 9.992413889447338e-06, + "loss": 0.6094, + "regression_loss": 0.0, + "step": 211, + "text_loss": 0.671875 + }, + { + "epoch": 0.02, + "learning_rate": 9.992341831196976e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 212, + "text_loss": 0.70703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.992269432594533e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 213, + "text_loss": 0.77734375 + }, + { + "epoch": 0.02, + "learning_rate": 9.992196693644945e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 214, + "text_loss": 0.49609375 + }, + { + "epoch": 0.02, + "learning_rate": 9.992123614353171e-06, + "loss": 0.6763, + "regression_loss": 0.0, + "step": 215, + "text_loss": 0.81640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.992050194724191e-06, + "loss": 0.4089, + "regression_loss": 0.0, + "step": 216, + "text_loss": 0.314453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991976434763012e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 217, + "text_loss": 0.69140625 + }, + { + "epoch": 0.02, + "learning_rate": 9.991902334474663e-06, + "loss": 0.6619, + "regression_loss": 0.0, + "step": 218, + "text_loss": 0.8515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.991827893864196e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 219, + "text_loss": 0.7734375 + }, + { + "epoch": 0.02, + "learning_rate": 9.991753112936686e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 220, + "text_loss": 0.453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991677991697228e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 221, + "text_loss": 0.2734375 + }, + { + "epoch": 0.02, + "learning_rate": 9.991602530150947e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 222, + "text_loss": 0.375 + }, + { + "epoch": 0.02, + "learning_rate": 9.991526728302987e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 223, + "text_loss": 0.36328125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991450586158515e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 224, + "text_loss": 0.392578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991374103722721e-06, + "loss": 0.6975, + "regression_loss": 0.0, + "step": 225, + "text_loss": 0.388671875 + }, + { + "epoch": 0.02, + "learning_rate": 9.991297281000823e-06, + "loss": 0.5928, + "regression_loss": 0.0, + "step": 226, + "text_loss": 0.78125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991220117998052e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 227, + "text_loss": 0.314453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.991142614719675e-06, + "loss": 0.624, + "regression_loss": 0.0, + "step": 228, + "text_loss": 0.54296875 + }, + { + "epoch": 0.02, + "learning_rate": 9.991064771170974e-06, + "loss": 0.6335, + "regression_loss": 0.0, + "step": 229, + "text_loss": 0.70703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.990986587357252e-06, + "loss": 0.6707, + "regression_loss": 0.0, + "step": 230, + "text_loss": 0.80078125 + }, + { + "epoch": 0.02, + "learning_rate": 9.990908063283844e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 231, + "text_loss": 0.458984375 + }, + { + "epoch": 0.02, + "learning_rate": 9.9908291989561e-06, + "loss": 0.4811, + "regression_loss": 0.0, + "step": 232, + "text_loss": 0.451171875 + }, + { + "epoch": 0.02, + "learning_rate": 9.9907499943794e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 233, + "text_loss": 0.390625 + }, + { + "epoch": 0.02, + "learning_rate": 9.99067044955914e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 234, + "text_loss": 0.45703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.990590564500745e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 235, + "text_loss": 0.56640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.990510339209664e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 236, + "text_loss": 0.74609375 + }, + { + "epoch": 0.02, + "learning_rate": 9.99042977369136e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 237, + "text_loss": 0.462890625 + }, + { + "epoch": 0.02, + "learning_rate": 9.990348867951332e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 238, + "text_loss": 0.4375 + }, + { + "epoch": 0.02, + "learning_rate": 9.99026762199509e-06, + "loss": 0.6799, + "regression_loss": 0.0, + "step": 239, + "text_loss": 0.76171875 + }, + { + "epoch": 0.02, + "learning_rate": 9.990186035828177e-06, + "loss": 0.6562, + "regression_loss": 0.0, + "step": 240, + "text_loss": 0.84765625 + }, + { + "epoch": 0.02, + "learning_rate": 9.990104109456152e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 241, + "text_loss": 0.33203125 + }, + { + "epoch": 0.02, + "learning_rate": 9.990021842884603e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 242, + "text_loss": 0.4375 + }, + { + "epoch": 0.02, + "learning_rate": 9.989939236119139e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 243, + "text_loss": 0.73828125 + }, + { + "epoch": 0.02, + "learning_rate": 9.989856289165387e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 244, + "text_loss": 0.71875 + }, + { + "epoch": 0.02, + "learning_rate": 9.989773002029008e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 245, + "text_loss": 0.5625 + }, + { + "epoch": 0.02, + "learning_rate": 9.989689374715677e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 246, + "text_loss": 0.318359375 + }, + { + "epoch": 0.02, + "learning_rate": 9.989605407231093e-06, + "loss": 0.6665, + "regression_loss": 0.0, + "step": 247, + "text_loss": 0.79296875 + }, + { + "epoch": 0.02, + "learning_rate": 9.989521099580985e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 248, + "text_loss": 0.5390625 + }, + { + "epoch": 0.02, + "learning_rate": 9.989436451771097e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 249, + "text_loss": 0.49609375 + }, + { + "epoch": 0.02, + "learning_rate": 9.989351463807202e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 250, + "text_loss": 0.640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.989266135695095e-06, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 251, + "text_loss": 0.59375 + }, + { + "epoch": 0.02, + "learning_rate": 9.989180467440591e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 252, + "text_loss": 0.4453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.98909445904953e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 253, + "text_loss": 0.65234375 + }, + { + "epoch": 0.02, + "learning_rate": 9.989008110527778e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 254, + "text_loss": 0.66796875 + }, + { + "epoch": 0.02, + "learning_rate": 9.98892142188122e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 255, + "text_loss": 0.640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.988834393115768e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 256, + "text_loss": 0.63671875 + }, + { + "epoch": 0.02, + "learning_rate": 9.988747024237351e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 257, + "text_loss": 0.466796875 + }, + { + "epoch": 0.02, + "learning_rate": 9.98865931525193e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 258, + "text_loss": 0.4140625 + }, + { + "epoch": 0.02, + "learning_rate": 9.98857126616548e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 259, + "text_loss": 0.6171875 + }, + { + "epoch": 0.02, + "learning_rate": 9.988482876984009e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 260, + "text_loss": 0.46875 + }, + { + "epoch": 0.02, + "learning_rate": 9.98839414771354e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 261, + "text_loss": 0.7421875 + }, + { + "epoch": 0.02, + "learning_rate": 9.988305078360122e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 262, + "text_loss": 0.578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.988215668929828e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 263, + "text_loss": 0.70703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.988125919428752e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 264, + "text_loss": 0.5703125 + }, + { + "epoch": 0.02, + "learning_rate": 9.988035829863014e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 265, + "text_loss": 0.64453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.987945400238756e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 266, + "text_loss": 0.54296875 + }, + { + "epoch": 0.02, + "learning_rate": 9.987854630562143e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 267, + "text_loss": 0.54296875 + }, + { + "epoch": 0.02, + "learning_rate": 9.987763520839362e-06, + "loss": 0.6431, + "regression_loss": 0.0, + "step": 268, + "text_loss": 0.81640625 + }, + { + "epoch": 0.02, + "learning_rate": 9.987672071076626e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 269, + "text_loss": 0.380859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.98758028128017e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 270, + "text_loss": 0.55859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.987488151456248e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 271, + "text_loss": 0.384765625 + }, + { + "epoch": 0.02, + "learning_rate": 9.987395681611145e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 272, + "text_loss": 0.42578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.987302871751162e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 273, + "text_loss": 0.478515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.98720972188263e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 274, + "text_loss": 0.326171875 + }, + { + "epoch": 0.02, + "learning_rate": 9.987116232011896e-06, + "loss": 0.5245, + "regression_loss": 0.0, + "step": 275, + "text_loss": 0.546875 + }, + { + "epoch": 0.02, + "learning_rate": 9.987022402145336e-06, + "loss": 0.6572, + "regression_loss": 0.0, + "step": 276, + "text_loss": 0.55859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.986928232289346e-06, + "loss": 0.6821, + "regression_loss": 0.0, + "step": 277, + "text_loss": 0.44921875 + }, + { + "epoch": 0.02, + "learning_rate": 9.986833722450345e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 278, + "text_loss": 0.8515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.986738872634777e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 279, + "text_loss": 0.37109375 + }, + { + "epoch": 0.02, + "learning_rate": 9.986643682849108e-06, + "loss": 0.6311, + "regression_loss": 0.0, + "step": 280, + "text_loss": 0.80859375 + }, + { + "epoch": 0.02, + "learning_rate": 9.98654815309983e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 281, + "text_loss": 0.51953125 + }, + { + "epoch": 0.02, + "learning_rate": 9.986452283393452e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 282, + "text_loss": 0.625 + }, + { + "epoch": 0.02, + "learning_rate": 9.986356073736511e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 283, + "text_loss": 0.3125 + }, + { + "epoch": 0.02, + "learning_rate": 9.986259524135569e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 284, + "text_loss": 0.83203125 + }, + { + "epoch": 0.02, + "learning_rate": 9.986162634597205e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 285, + "text_loss": 0.67578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.986065405128025e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 286, + "text_loss": 0.453125 + }, + { + "epoch": 0.02, + "learning_rate": 9.985967835734658e-06, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 287, + "text_loss": 0.3515625 + }, + { + "epoch": 0.02, + "learning_rate": 9.985869926423757e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 288, + "text_loss": 0.625 + }, + { + "epoch": 0.02, + "learning_rate": 9.985771677201993e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 289, + "text_loss": 0.8359375 + }, + { + "epoch": 0.02, + "learning_rate": 9.98567308807607e-06, + "loss": 0.6331, + "regression_loss": 0.0, + "step": 290, + "text_loss": 0.48828125 + }, + { + "epoch": 0.02, + "learning_rate": 9.985574159052705e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 291, + "text_loss": 0.47265625 + }, + { + "epoch": 0.02, + "learning_rate": 9.985474890138642e-06, + "loss": 0.6667, + "regression_loss": 0.0, + "step": 292, + "text_loss": 0.609375 + }, + { + "epoch": 0.02, + "learning_rate": 9.985375281340653e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 293, + "text_loss": 0.65234375 + }, + { + "epoch": 0.02, + "learning_rate": 9.985275332665525e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 294, + "text_loss": 0.369140625 + }, + { + "epoch": 0.02, + "learning_rate": 9.985175044120073e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 295, + "text_loss": 0.5 + }, + { + "epoch": 0.02, + "learning_rate": 9.985074415711134e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 296, + "text_loss": 0.46875 + }, + { + "epoch": 0.02, + "learning_rate": 9.984973447445567e-06, + "loss": 0.6782, + "regression_loss": 0.0, + "step": 297, + "text_loss": 0.49609375 + }, + { + "epoch": 0.02, + "learning_rate": 9.984872139330259e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 298, + "text_loss": 0.578125 + }, + { + "epoch": 0.02, + "learning_rate": 9.984770491372113e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 299, + "text_loss": 0.61328125 + }, + { + "epoch": 0.02, + "learning_rate": 9.984668503578064e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 300, + "text_loss": 0.70703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.984566175955057e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 301, + "text_loss": 0.30078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.984463508510077e-06, + "loss": 0.7051, + "regression_loss": 0.0, + "step": 302, + "text_loss": 0.66015625 + }, + { + "epoch": 0.03, + "learning_rate": 9.984360501250116e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 303, + "text_loss": 0.6640625 + }, + { + "epoch": 0.03, + "learning_rate": 9.9842571541822e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 304, + "text_loss": 0.71484375 + }, + { + "epoch": 0.03, + "learning_rate": 9.984153467313376e-06, + "loss": 0.616, + "regression_loss": 0.0, + "step": 305, + "text_loss": 0.4609375 + }, + { + "epoch": 0.03, + "learning_rate": 9.984049440650709e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 306, + "text_loss": 0.431640625 + }, + { + "epoch": 0.03, + "learning_rate": 9.983945074201293e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 307, + "text_loss": 0.326171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.983840367972244e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 308, + "text_loss": 0.91796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.9837353219707e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 309, + "text_loss": 0.77734375 + }, + { + "epoch": 0.03, + "learning_rate": 9.983629936203822e-06, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 310, + "text_loss": 0.88671875 + }, + { + "epoch": 0.03, + "learning_rate": 9.983524210678794e-06, + "loss": 0.6541, + "regression_loss": 0.0, + "step": 311, + "text_loss": 0.7421875 + }, + { + "epoch": 0.03, + "learning_rate": 9.983418145402825e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 312, + "text_loss": 0.55078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.983311740383144e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 313, + "text_loss": 0.54296875 + }, + { + "epoch": 0.03, + "learning_rate": 9.98320499562701e-06, + "loss": 0.6682, + "regression_loss": 0.0, + "step": 314, + "text_loss": 0.53515625 + }, + { + "epoch": 0.03, + "learning_rate": 9.983097911141694e-06, + "loss": 0.6003, + "regression_loss": 0.0, + "step": 315, + "text_loss": 0.30078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.9829904869345e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 316, + "text_loss": 0.478515625 + }, + { + "epoch": 0.03, + "learning_rate": 9.982882723012751e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 317, + "text_loss": 0.41015625 + }, + { + "epoch": 0.03, + "learning_rate": 9.982774619383796e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 318, + "text_loss": 0.48046875 + }, + { + "epoch": 0.03, + "learning_rate": 9.982666176054997e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 319, + "text_loss": 0.703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.982557393033758e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 320, + "text_loss": 0.71875 + }, + { + "epoch": 0.03, + "learning_rate": 9.98244827032749e-06, + "loss": 0.6526, + "regression_loss": 0.0, + "step": 321, + "text_loss": 0.7109375 + }, + { + "epoch": 0.03, + "learning_rate": 9.98233880794363e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 322, + "text_loss": 0.59375 + }, + { + "epoch": 0.03, + "learning_rate": 9.982229005889645e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 323, + "text_loss": 0.5625 + }, + { + "epoch": 0.03, + "learning_rate": 9.982118864173018e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 324, + "text_loss": 0.51171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.98200838280126e-06, + "loss": 0.6233, + "regression_loss": 0.0, + "step": 325, + "text_loss": 0.6015625 + }, + { + "epoch": 0.03, + "learning_rate": 9.981897561781902e-06, + "loss": 0.6694, + "regression_loss": 0.0, + "step": 326, + "text_loss": 0.76171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.981786401122498e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 327, + "text_loss": 0.625 + }, + { + "epoch": 0.03, + "learning_rate": 9.981674900830627e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 328, + "text_loss": 0.73828125 + }, + { + "epoch": 0.03, + "learning_rate": 9.98156306091389e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 329, + "text_loss": 0.58984375 + }, + { + "epoch": 0.03, + "learning_rate": 9.981450881379916e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 330, + "text_loss": 0.451171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.981338362236346e-06, + "loss": 0.5879, + "regression_loss": 0.0, + "step": 331, + "text_loss": 0.478515625 + }, + { + "epoch": 0.03, + "learning_rate": 9.981225503490855e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 332, + "text_loss": 0.51171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.981112305151137e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 333, + "text_loss": 0.326171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.98099876722491e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 334, + "text_loss": 0.5078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.980884889719912e-06, + "loss": 0.6997, + "regression_loss": 0.0, + "step": 335, + "text_loss": 1.0078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.980770672643907e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 336, + "text_loss": 0.828125 + }, + { + "epoch": 0.03, + "learning_rate": 9.980656116004685e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 337, + "text_loss": 0.296875 + }, + { + "epoch": 0.03, + "learning_rate": 9.980541219810052e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 338, + "text_loss": 0.76171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.98042598406784e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 339, + "text_loss": 0.67578125 + }, + { + "epoch": 0.03, + "learning_rate": 9.98031040878591e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 340, + "text_loss": 0.390625 + }, + { + "epoch": 0.03, + "learning_rate": 9.980194493972139e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 341, + "text_loss": 0.5703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.98007823963443e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 342, + "text_loss": 0.6796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.979961645780707e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 343, + "text_loss": 0.341796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.97984471241892e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 344, + "text_loss": 0.283203125 + }, + { + "epoch": 0.03, + "learning_rate": 9.97972743955704e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 345, + "text_loss": 0.609375 + }, + { + "epoch": 0.03, + "learning_rate": 9.979609827203062e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 346, + "text_loss": 0.671875 + }, + { + "epoch": 0.03, + "learning_rate": 9.979491875365009e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 347, + "text_loss": 0.71875 + }, + { + "epoch": 0.03, + "learning_rate": 9.979373584050914e-06, + "loss": 0.6165, + "regression_loss": 0.0, + "step": 348, + "text_loss": 0.5546875 + }, + { + "epoch": 0.03, + "learning_rate": 9.979254953268848e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 349, + "text_loss": 0.431640625 + }, + { + "epoch": 0.03, + "learning_rate": 9.979135983026895e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 350, + "text_loss": 0.49609375 + }, + { + "epoch": 0.03, + "learning_rate": 9.979016673333169e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 351, + "text_loss": 0.337890625 + }, + { + "epoch": 0.03, + "learning_rate": 9.978897024195801e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 352, + "text_loss": 0.45703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.978777035622949e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 353, + "text_loss": 0.69921875 + }, + { + "epoch": 0.03, + "learning_rate": 9.978656707622794e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 354, + "text_loss": 0.50390625 + }, + { + "epoch": 0.03, + "learning_rate": 9.978536040203537e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 355, + "text_loss": 0.50390625 + }, + { + "epoch": 0.03, + "learning_rate": 9.978415033373408e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 356, + "text_loss": 0.93359375 + }, + { + "epoch": 0.03, + "learning_rate": 9.978293687140655e-06, + "loss": 0.6562, + "regression_loss": 0.0, + "step": 357, + "text_loss": 0.6171875 + }, + { + "epoch": 0.03, + "learning_rate": 9.97817200151355e-06, + "loss": 0.6604, + "regression_loss": 0.0, + "step": 358, + "text_loss": 0.6328125 + }, + { + "epoch": 0.03, + "learning_rate": 9.97804997650039e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 359, + "text_loss": 0.66796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.977927612109493e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 360, + "text_loss": 0.45703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.977804908349202e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 361, + "text_loss": 0.447265625 + }, + { + "epoch": 0.03, + "learning_rate": 9.977681865227881e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 362, + "text_loss": 0.294921875 + }, + { + "epoch": 0.03, + "learning_rate": 9.97755848275392e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 363, + "text_loss": 0.5390625 + }, + { + "epoch": 0.03, + "learning_rate": 9.977434760935731e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 364, + "text_loss": 0.255859375 + }, + { + "epoch": 0.03, + "learning_rate": 9.977310699781745e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 365, + "text_loss": 0.5546875 + }, + { + "epoch": 0.03, + "learning_rate": 9.977186299300427e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 366, + "text_loss": 0.59375 + }, + { + "epoch": 0.03, + "learning_rate": 9.977061559500249e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 367, + "text_loss": 0.625 + }, + { + "epoch": 0.03, + "learning_rate": 9.976936480389723e-06, + "loss": 0.6533, + "regression_loss": 0.0, + "step": 368, + "text_loss": 0.8671875 + }, + { + "epoch": 0.03, + "learning_rate": 9.976811061977371e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 369, + "text_loss": 0.5234375 + }, + { + "epoch": 0.03, + "learning_rate": 9.976685304271747e-06, + "loss": 0.7114, + "regression_loss": 0.0, + "step": 370, + "text_loss": 0.57421875 + }, + { + "epoch": 0.03, + "learning_rate": 9.976559207281422e-06, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 371, + "text_loss": 0.3359375 + }, + { + "epoch": 0.03, + "learning_rate": 9.976432771014992e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 372, + "text_loss": 0.69140625 + }, + { + "epoch": 0.03, + "learning_rate": 9.97630599548108e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 373, + "text_loss": 0.69921875 + }, + { + "epoch": 0.03, + "learning_rate": 9.976178880688327e-06, + "loss": 0.6333, + "regression_loss": 0.0, + "step": 374, + "text_loss": 0.61328125 + }, + { + "epoch": 0.03, + "learning_rate": 9.976051426645398e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 375, + "text_loss": 0.66015625 + }, + { + "epoch": 0.03, + "learning_rate": 9.975923633360985e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 376, + "text_loss": 0.56640625 + }, + { + "epoch": 0.03, + "learning_rate": 9.975795500843799e-06, + "loss": 0.6316, + "regression_loss": 0.0, + "step": 377, + "text_loss": 0.59765625 + }, + { + "epoch": 0.03, + "learning_rate": 9.975667029102573e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 378, + "text_loss": 0.5234375 + }, + { + "epoch": 0.03, + "learning_rate": 9.975538218146068e-06, + "loss": 0.5006, + "regression_loss": 0.0, + "step": 379, + "text_loss": 0.55859375 + }, + { + "epoch": 0.03, + "learning_rate": 9.975409067983067e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 380, + "text_loss": 0.6484375 + }, + { + "epoch": 0.03, + "learning_rate": 9.97527957862237e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 381, + "text_loss": 0.60546875 + }, + { + "epoch": 0.03, + "learning_rate": 9.97514975007281e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 382, + "text_loss": 0.439453125 + }, + { + "epoch": 0.03, + "learning_rate": 9.975019582343234e-06, + "loss": 0.6316, + "regression_loss": 0.0, + "step": 383, + "text_loss": 0.86328125 + }, + { + "epoch": 0.03, + "learning_rate": 9.97488907544252e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 384, + "text_loss": 0.5625 + }, + { + "epoch": 0.03, + "learning_rate": 9.974758229379565e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 385, + "text_loss": 0.5703125 + }, + { + "epoch": 0.03, + "learning_rate": 9.974627044163285e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 386, + "text_loss": 0.6484375 + }, + { + "epoch": 0.03, + "learning_rate": 9.974495519802628e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 387, + "text_loss": 0.65625 + }, + { + "epoch": 0.03, + "learning_rate": 9.974363656306558e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 388, + "text_loss": 0.73046875 + }, + { + "epoch": 0.03, + "learning_rate": 9.974231453684067e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 389, + "text_loss": 0.60546875 + }, + { + "epoch": 0.03, + "learning_rate": 9.974098911944166e-06, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 390, + "text_loss": 0.380859375 + }, + { + "epoch": 0.03, + "learning_rate": 9.973966031095889e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 391, + "text_loss": 0.6953125 + }, + { + "epoch": 0.03, + "learning_rate": 9.973832811148302e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 392, + "text_loss": 0.7578125 + }, + { + "epoch": 0.03, + "learning_rate": 9.97369925211048e-06, + "loss": 0.6738, + "regression_loss": 0.0, + "step": 393, + "text_loss": 0.578125 + }, + { + "epoch": 0.03, + "learning_rate": 9.973565353991535e-06, + "loss": 0.6733, + "regression_loss": 0.0, + "step": 394, + "text_loss": 1.65625 + }, + { + "epoch": 0.03, + "learning_rate": 9.97343111680059e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 395, + "text_loss": 0.89453125 + }, + { + "epoch": 0.03, + "learning_rate": 9.973296540546797e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 396, + "text_loss": 0.458984375 + }, + { + "epoch": 0.03, + "learning_rate": 9.973161625239333e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 397, + "text_loss": 0.466796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.973026370887395e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 398, + "text_loss": 0.3828125 + }, + { + "epoch": 0.03, + "learning_rate": 9.972890777500206e-06, + "loss": 0.6831, + "regression_loss": 0.0, + "step": 399, + "text_loss": 0.79296875 + }, + { + "epoch": 0.03, + "learning_rate": 9.972754845087006e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 400, + "text_loss": 0.341796875 + }, + { + "epoch": 0.03, + "learning_rate": 9.972618573657064e-06, + "loss": 0.6484, + "regression_loss": 0.0, + "step": 401, + "text_loss": 0.421875 + }, + { + "epoch": 0.03, + "learning_rate": 9.972481963219672e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 402, + "text_loss": 0.55078125 + }, + { + "epoch": 0.03, + "learning_rate": 9.972345013784142e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 403, + "text_loss": 0.24609375 + }, + { + "epoch": 0.03, + "learning_rate": 9.972207725359809e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 404, + "text_loss": 0.419921875 + }, + { + "epoch": 0.03, + "learning_rate": 9.972070097956035e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 405, + "text_loss": 0.412109375 + }, + { + "epoch": 0.03, + "learning_rate": 9.9719321315822e-06, + "loss": 0.6233, + "regression_loss": 0.0, + "step": 406, + "text_loss": 0.65234375 + }, + { + "epoch": 0.03, + "learning_rate": 9.971793826247715e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 407, + "text_loss": 0.294921875 + }, + { + "epoch": 0.03, + "learning_rate": 9.971655181962003e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 408, + "text_loss": 0.9140625 + }, + { + "epoch": 0.03, + "learning_rate": 9.971516198734519e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 409, + "text_loss": 0.53125 + }, + { + "epoch": 0.03, + "learning_rate": 9.971376876574738e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 410, + "text_loss": 0.87109375 + }, + { + "epoch": 0.03, + "learning_rate": 9.971237215492158e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 411, + "text_loss": 0.57421875 + }, + { + "epoch": 0.03, + "learning_rate": 9.971097215496298e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 412, + "text_loss": 0.7265625 + }, + { + "epoch": 0.03, + "learning_rate": 9.970956876596709e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 413, + "text_loss": 0.494140625 + }, + { + "epoch": 0.03, + "learning_rate": 9.970816198802953e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 414, + "text_loss": 0.447265625 + }, + { + "epoch": 0.03, + "learning_rate": 9.97067518212462e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 415, + "text_loss": 0.46484375 + }, + { + "epoch": 0.03, + "learning_rate": 9.970533826571329e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 416, + "text_loss": 0.388671875 + }, + { + "epoch": 0.03, + "learning_rate": 9.970392132152712e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 417, + "text_loss": 0.75 + }, + { + "epoch": 0.03, + "learning_rate": 9.970250098878432e-06, + "loss": 0.647, + "regression_loss": 0.0, + "step": 418, + "text_loss": 0.734375 + }, + { + "epoch": 0.03, + "learning_rate": 9.970107726758169e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 419, + "text_loss": 0.6484375 + }, + { + "epoch": 0.03, + "learning_rate": 9.969965015801632e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 420, + "text_loss": 0.84765625 + }, + { + "epoch": 0.03, + "learning_rate": 9.96982196601855e-06, + "loss": 0.6523, + "regression_loss": 0.0, + "step": 421, + "text_loss": 0.63671875 + }, + { + "epoch": 0.04, + "learning_rate": 9.969678577418674e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 422, + "text_loss": 0.640625 + }, + { + "epoch": 0.04, + "learning_rate": 9.969534850011782e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 423, + "text_loss": 0.5546875 + }, + { + "epoch": 0.04, + "learning_rate": 9.969390783807668e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 424, + "text_loss": 0.2734375 + }, + { + "epoch": 0.04, + "learning_rate": 9.969246378816158e-06, + "loss": 0.5994, + "regression_loss": 0.0, + "step": 425, + "text_loss": 0.75390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.969101635047096e-06, + "loss": 0.6152, + "regression_loss": 0.0, + "step": 426, + "text_loss": 0.3515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.968956552510349e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 427, + "text_loss": 0.51171875 + }, + { + "epoch": 0.04, + "learning_rate": 9.968811131215807e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 428, + "text_loss": 0.3671875 + }, + { + "epoch": 0.04, + "learning_rate": 9.968665371173386e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 429, + "text_loss": 0.400390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.968519272393022e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 430, + "text_loss": 0.7109375 + }, + { + "epoch": 0.04, + "learning_rate": 9.968372834884676e-06, + "loss": 0.6226, + "regression_loss": 0.0, + "step": 431, + "text_loss": 0.625 + }, + { + "epoch": 0.04, + "learning_rate": 9.968226058658333e-06, + "loss": 0.6943, + "regression_loss": 0.0, + "step": 432, + "text_loss": 0.796875 + }, + { + "epoch": 0.04, + "learning_rate": 9.968078943723994e-06, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 433, + "text_loss": 0.5390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.967931490091694e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 434, + "text_loss": 0.8515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.967783697771485e-06, + "loss": 0.6399, + "regression_loss": 0.0, + "step": 435, + "text_loss": 0.74609375 + }, + { + "epoch": 0.04, + "learning_rate": 9.96763556677344e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 436, + "text_loss": 0.71875 + }, + { + "epoch": 0.04, + "learning_rate": 9.967487097107657e-06, + "loss": 0.6995, + "regression_loss": 0.0, + "step": 437, + "text_loss": 1.15625 + }, + { + "epoch": 0.04, + "learning_rate": 9.967338288784263e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 438, + "text_loss": 0.60546875 + }, + { + "epoch": 0.04, + "learning_rate": 9.967189141813398e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 439, + "text_loss": 0.369140625 + }, + { + "epoch": 0.04, + "learning_rate": 9.967039656205235e-06, + "loss": 0.6555, + "regression_loss": 0.0, + "step": 440, + "text_loss": 0.69140625 + }, + { + "epoch": 0.04, + "learning_rate": 9.96688983196996e-06, + "loss": 0.6348, + "regression_loss": 0.0, + "step": 441, + "text_loss": 0.5390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.96673966911779e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 442, + "text_loss": 0.5234375 + }, + { + "epoch": 0.04, + "learning_rate": 9.966589167658963e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 443, + "text_loss": 0.640625 + }, + { + "epoch": 0.04, + "learning_rate": 9.966438327603735e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 444, + "text_loss": 0.515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.966287148962396e-06, + "loss": 0.6353, + "regression_loss": 0.0, + "step": 445, + "text_loss": 0.67578125 + }, + { + "epoch": 0.04, + "learning_rate": 9.966135631745248e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 446, + "text_loss": 0.51953125 + }, + { + "epoch": 0.04, + "learning_rate": 9.965983775962622e-06, + "loss": 0.6301, + "regression_loss": 0.0, + "step": 447, + "text_loss": 0.5703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.965831581624872e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 448, + "text_loss": 0.62109375 + }, + { + "epoch": 0.04, + "learning_rate": 9.965679048742371e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 449, + "text_loss": 0.5234375 + }, + { + "epoch": 0.04, + "learning_rate": 9.96552617732552e-06, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 450, + "text_loss": 0.859375 + }, + { + "epoch": 0.04, + "learning_rate": 9.965372967384741e-06, + "loss": 0.6467, + "regression_loss": 0.0, + "step": 451, + "text_loss": 0.76953125 + }, + { + "epoch": 0.04, + "learning_rate": 9.965219418930479e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 452, + "text_loss": 0.416015625 + }, + { + "epoch": 0.04, + "learning_rate": 9.9650655319732e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 453, + "text_loss": 0.64453125 + }, + { + "epoch": 0.04, + "learning_rate": 9.964911306523397e-06, + "loss": 0.6458, + "regression_loss": 0.0, + "step": 454, + "text_loss": 0.99609375 + }, + { + "epoch": 0.04, + "learning_rate": 9.964756742591585e-06, + "loss": 0.6399, + "regression_loss": 0.0, + "step": 455, + "text_loss": 0.59375 + }, + { + "epoch": 0.04, + "learning_rate": 9.9646018401883e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 456, + "text_loss": 0.55078125 + }, + { + "epoch": 0.04, + "learning_rate": 9.964446599324104e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 457, + "text_loss": 0.5078125 + }, + { + "epoch": 0.04, + "learning_rate": 9.964291020009579e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 458, + "text_loss": 0.412109375 + }, + { + "epoch": 0.04, + "learning_rate": 9.964135102255333e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 459, + "text_loss": 0.453125 + }, + { + "epoch": 0.04, + "learning_rate": 9.963978846071993e-06, + "loss": 0.6648, + "regression_loss": 0.0, + "step": 460, + "text_loss": 0.5703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.963822251470214e-06, + "loss": 0.647, + "regression_loss": 0.0, + "step": 461, + "text_loss": 0.61328125 + }, + { + "epoch": 0.04, + "learning_rate": 9.963665318460671e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 462, + "text_loss": 0.470703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.963508047054065e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 463, + "text_loss": 0.5390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.963350437261115e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 464, + "text_loss": 0.7421875 + }, + { + "epoch": 0.04, + "learning_rate": 9.963192489092566e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 465, + "text_loss": 0.4375 + }, + { + "epoch": 0.04, + "learning_rate": 9.96303420255919e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 466, + "text_loss": 0.70703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.962875577671773e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 467, + "text_loss": 0.69921875 + }, + { + "epoch": 0.04, + "learning_rate": 9.962716614441133e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 468, + "text_loss": 0.494140625 + }, + { + "epoch": 0.04, + "learning_rate": 9.962557312878105e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 469, + "text_loss": 0.7265625 + }, + { + "epoch": 0.04, + "learning_rate": 9.962397672993552e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 470, + "text_loss": 0.48046875 + }, + { + "epoch": 0.04, + "learning_rate": 9.962237694798354e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 471, + "text_loss": 0.796875 + }, + { + "epoch": 0.04, + "learning_rate": 9.962077378303418e-06, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 472, + "text_loss": 0.515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.961916723519678e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 473, + "text_loss": 0.76953125 + }, + { + "epoch": 0.04, + "learning_rate": 9.961755730458082e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 474, + "text_loss": 0.5078125 + }, + { + "epoch": 0.04, + "learning_rate": 9.961594399129606e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 475, + "text_loss": 0.984375 + }, + { + "epoch": 0.04, + "learning_rate": 9.961432729545251e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 476, + "text_loss": 0.53515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.961270721716037e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 477, + "text_loss": 0.62890625 + }, + { + "epoch": 0.04, + "learning_rate": 9.96110837565301e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 478, + "text_loss": 0.5390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.960945691367238e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 479, + "text_loss": 0.546875 + }, + { + "epoch": 0.04, + "learning_rate": 9.960782668869811e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 480, + "text_loss": 0.84765625 + }, + { + "epoch": 0.04, + "learning_rate": 9.960619308171841e-06, + "loss": 0.6438, + "regression_loss": 0.0, + "step": 481, + "text_loss": 0.474609375 + }, + { + "epoch": 0.04, + "learning_rate": 9.96045560928447e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 482, + "text_loss": 0.5390625 + }, + { + "epoch": 0.04, + "learning_rate": 9.960291572218854e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 483, + "text_loss": 0.498046875 + }, + { + "epoch": 0.04, + "learning_rate": 9.96012719698618e-06, + "loss": 0.6003, + "regression_loss": 0.0, + "step": 484, + "text_loss": 0.64453125 + }, + { + "epoch": 0.04, + "learning_rate": 9.959962483597651e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 485, + "text_loss": 0.421875 + }, + { + "epoch": 0.04, + "learning_rate": 9.959797432064496e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 486, + "text_loss": 0.5078125 + }, + { + "epoch": 0.04, + "learning_rate": 9.959632042397971e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 487, + "text_loss": 0.482421875 + }, + { + "epoch": 0.04, + "learning_rate": 9.959466314609347e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 488, + "text_loss": 0.87890625 + }, + { + "epoch": 0.04, + "learning_rate": 9.959300248709927e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 489, + "text_loss": 0.453125 + }, + { + "epoch": 0.04, + "learning_rate": 9.959133844711028e-06, + "loss": 0.6479, + "regression_loss": 0.0, + "step": 490, + "text_loss": 0.828125 + }, + { + "epoch": 0.04, + "learning_rate": 9.958967102623998e-06, + "loss": 0.6699, + "regression_loss": 0.0, + "step": 491, + "text_loss": 0.6328125 + }, + { + "epoch": 0.04, + "learning_rate": 9.958800022460202e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 492, + "text_loss": 0.326171875 + }, + { + "epoch": 0.04, + "learning_rate": 9.958632604231033e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 493, + "text_loss": 0.45703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.958464847947903e-06, + "loss": 0.6951, + "regression_loss": 0.0, + "step": 494, + "text_loss": 0.283203125 + }, + { + "epoch": 0.04, + "learning_rate": 9.95829675362225e-06, + "loss": 0.6362, + "regression_loss": 0.0, + "step": 495, + "text_loss": 0.47265625 + }, + { + "epoch": 0.04, + "learning_rate": 9.958128321265531e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 496, + "text_loss": 0.40625 + }, + { + "epoch": 0.04, + "learning_rate": 9.957959550889234e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 497, + "text_loss": 0.3984375 + }, + { + "epoch": 0.04, + "learning_rate": 9.957790442504863e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 498, + "text_loss": 0.625 + }, + { + "epoch": 0.04, + "learning_rate": 9.957620996123942e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 499, + "text_loss": 0.55859375 + }, + { + "epoch": 0.04, + "learning_rate": 9.957451211758029e-06, + "loss": 0.7808, + "regression_loss": 0.0, + "step": 500, + "text_loss": 0.38671875 + }, + { + "epoch": 0.04, + "learning_rate": 9.957281089418698e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 501, + "text_loss": 0.3125 + }, + { + "epoch": 0.04, + "learning_rate": 9.957110629117544e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 502, + "text_loss": 0.796875 + }, + { + "epoch": 0.04, + "learning_rate": 9.95693983086619e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 503, + "text_loss": 0.62890625 + }, + { + "epoch": 0.04, + "learning_rate": 9.956768694676282e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 504, + "text_loss": 0.62890625 + }, + { + "epoch": 0.04, + "learning_rate": 9.956597220559484e-06, + "loss": 0.6733, + "regression_loss": 0.0, + "step": 505, + "text_loss": 0.67578125 + }, + { + "epoch": 0.04, + "learning_rate": 9.956425408527489e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 506, + "text_loss": 0.484375 + }, + { + "epoch": 0.04, + "learning_rate": 9.956253258592008e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 507, + "text_loss": 0.5625 + }, + { + "epoch": 0.04, + "learning_rate": 9.956080770764777e-06, + "loss": 0.6191, + "regression_loss": 0.0, + "step": 508, + "text_loss": 0.51171875 + }, + { + "epoch": 0.04, + "learning_rate": 9.955907945057558e-06, + "loss": 0.6575, + "regression_loss": 0.0, + "step": 509, + "text_loss": 0.86328125 + }, + { + "epoch": 0.04, + "learning_rate": 9.955734781482131e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 510, + "text_loss": 0.421875 + }, + { + "epoch": 0.04, + "learning_rate": 9.955561280050304e-06, + "loss": 0.6602, + "regression_loss": 0.0, + "step": 511, + "text_loss": 0.5703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.955387440773902e-06, + "loss": 0.7122, + "regression_loss": 0.0, + "step": 512, + "text_loss": 0.70703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.955213263664778e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 513, + "text_loss": 0.498046875 + }, + { + "epoch": 0.04, + "learning_rate": 9.955038748734807e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 514, + "text_loss": 0.62890625 + }, + { + "epoch": 0.04, + "learning_rate": 9.954863895995887e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 515, + "text_loss": 0.71875 + }, + { + "epoch": 0.04, + "learning_rate": 9.954688705459937e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 516, + "text_loss": 0.78515625 + }, + { + "epoch": 0.04, + "learning_rate": 9.9545131771389e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 517, + "text_loss": 0.859375 + }, + { + "epoch": 0.04, + "learning_rate": 9.954337311044745e-06, + "loss": 0.6057, + "regression_loss": 0.0, + "step": 518, + "text_loss": 0.412109375 + }, + { + "epoch": 0.04, + "learning_rate": 9.954161107189462e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 519, + "text_loss": 0.8984375 + }, + { + "epoch": 0.04, + "learning_rate": 9.953984565585062e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 520, + "text_loss": 0.27734375 + }, + { + "epoch": 0.04, + "learning_rate": 9.95380768624358e-06, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 521, + "text_loss": 0.3046875 + }, + { + "epoch": 0.04, + "learning_rate": 9.953630469177075e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 522, + "text_loss": 0.5234375 + }, + { + "epoch": 0.04, + "learning_rate": 9.95345291439763e-06, + "loss": 0.6938, + "regression_loss": 0.0, + "step": 523, + "text_loss": 0.859375 + }, + { + "epoch": 0.04, + "learning_rate": 9.95327502191735e-06, + "loss": 0.6448, + "regression_loss": 0.0, + "step": 524, + "text_loss": 0.89453125 + }, + { + "epoch": 0.04, + "learning_rate": 9.95309679174836e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 525, + "text_loss": 0.63671875 + }, + { + "epoch": 0.04, + "learning_rate": 9.952918223902816e-06, + "loss": 0.595, + "regression_loss": 0.0, + "step": 526, + "text_loss": 0.447265625 + }, + { + "epoch": 0.04, + "learning_rate": 9.952739318392885e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 527, + "text_loss": 0.4609375 + }, + { + "epoch": 0.04, + "learning_rate": 9.95256007523077e-06, + "loss": 0.6721, + "regression_loss": 0.0, + "step": 528, + "text_loss": 0.8046875 + }, + { + "epoch": 0.04, + "learning_rate": 9.952380494428688e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 529, + "text_loss": 0.431640625 + }, + { + "epoch": 0.04, + "learning_rate": 9.952200575998882e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 530, + "text_loss": 0.4140625 + }, + { + "epoch": 0.04, + "learning_rate": 9.952020319953616e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 531, + "text_loss": 0.59765625 + }, + { + "epoch": 0.04, + "learning_rate": 9.951839726305184e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 532, + "text_loss": 0.640625 + }, + { + "epoch": 0.04, + "learning_rate": 9.951658795065894e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 533, + "text_loss": 0.65625 + }, + { + "epoch": 0.04, + "learning_rate": 9.951477526248082e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 534, + "text_loss": 0.79296875 + }, + { + "epoch": 0.04, + "learning_rate": 9.951295919864105e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 535, + "text_loss": 0.34765625 + }, + { + "epoch": 0.04, + "learning_rate": 9.951113975926346e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 536, + "text_loss": 0.54296875 + }, + { + "epoch": 0.04, + "learning_rate": 9.950931694447209e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 537, + "text_loss": 0.6796875 + }, + { + "epoch": 0.04, + "learning_rate": 9.950749075439118e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 538, + "text_loss": 0.70703125 + }, + { + "epoch": 0.04, + "learning_rate": 9.950566118914525e-06, + "loss": 0.6719, + "regression_loss": 0.0, + "step": 539, + "text_loss": 0.6953125 + }, + { + "epoch": 0.04, + "learning_rate": 9.950382824885905e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 540, + "text_loss": 0.6171875 + }, + { + "epoch": 0.04, + "learning_rate": 9.950199193365749e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 541, + "text_loss": 0.4921875 + }, + { + "epoch": 0.05, + "learning_rate": 9.95001522436658e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 542, + "text_loss": 0.6796875 + }, + { + "epoch": 0.05, + "learning_rate": 9.94983091790094e-06, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 543, + "text_loss": 0.61328125 + }, + { + "epoch": 0.05, + "learning_rate": 9.949646273981394e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 544, + "text_loss": 0.51171875 + }, + { + "epoch": 0.05, + "learning_rate": 9.949461292620529e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 545, + "text_loss": 0.353515625 + }, + { + "epoch": 0.05, + "learning_rate": 9.949275973830953e-06, + "loss": 0.6941, + "regression_loss": 0.0, + "step": 546, + "text_loss": 0.74609375 + }, + { + "epoch": 0.05, + "learning_rate": 9.949090317625307e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 547, + "text_loss": 0.50390625 + }, + { + "epoch": 0.05, + "learning_rate": 9.948904324016245e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 548, + "text_loss": 0.376953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.948717993016443e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 549, + "text_loss": 0.4296875 + }, + { + "epoch": 0.05, + "learning_rate": 9.94853132463861e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 550, + "text_loss": 0.7421875 + }, + { + "epoch": 0.05, + "learning_rate": 9.948344318895468e-06, + "loss": 0.6426, + "regression_loss": 0.0, + "step": 551, + "text_loss": 0.83203125 + }, + { + "epoch": 0.05, + "learning_rate": 9.94815697579977e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 552, + "text_loss": 0.384765625 + }, + { + "epoch": 0.05, + "learning_rate": 9.947969295364286e-06, + "loss": 0.6313, + "regression_loss": 0.0, + "step": 553, + "text_loss": 0.396484375 + }, + { + "epoch": 0.05, + "learning_rate": 9.94778127760181e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 554, + "text_loss": 0.51953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.947592922525161e-06, + "loss": 0.7808, + "regression_loss": 0.0, + "step": 555, + "text_loss": 0.51953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.94740423014718e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 556, + "text_loss": 0.43359375 + }, + { + "epoch": 0.05, + "learning_rate": 9.947215200480731e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 557, + "text_loss": 0.55859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.947025833538703e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 558, + "text_loss": 0.66015625 + }, + { + "epoch": 0.05, + "learning_rate": 9.946836129334004e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 559, + "text_loss": 0.6953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.946646087879566e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 560, + "text_loss": 0.3515625 + }, + { + "epoch": 0.05, + "learning_rate": 9.946455709188348e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 561, + "text_loss": 0.328125 + }, + { + "epoch": 0.05, + "learning_rate": 9.946264993273324e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 562, + "text_loss": 0.408203125 + }, + { + "epoch": 0.05, + "learning_rate": 9.946073940147503e-06, + "loss": 0.6194, + "regression_loss": 0.0, + "step": 563, + "text_loss": 0.7578125 + }, + { + "epoch": 0.05, + "learning_rate": 9.945882549823906e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 564, + "text_loss": 0.63671875 + }, + { + "epoch": 0.05, + "learning_rate": 9.945690822315582e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 565, + "text_loss": 0.7734375 + }, + { + "epoch": 0.05, + "learning_rate": 9.945498757635599e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 566, + "text_loss": 0.4921875 + }, + { + "epoch": 0.05, + "learning_rate": 9.945306355797056e-06, + "loss": 0.8926, + "regression_loss": 0.0, + "step": 567, + "text_loss": 0.490234375 + }, + { + "epoch": 0.05, + "learning_rate": 9.945113616813066e-06, + "loss": 0.6318, + "regression_loss": 0.0, + "step": 568, + "text_loss": 0.73828125 + }, + { + "epoch": 0.05, + "learning_rate": 9.94492054069677e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 569, + "text_loss": 0.66015625 + }, + { + "epoch": 0.05, + "learning_rate": 9.944727127461334e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 570, + "text_loss": 0.267578125 + }, + { + "epoch": 0.05, + "learning_rate": 9.944533377119939e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 571, + "text_loss": 0.470703125 + }, + { + "epoch": 0.05, + "learning_rate": 9.944339289685795e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 572, + "text_loss": 0.58984375 + }, + { + "epoch": 0.05, + "learning_rate": 9.944144865172138e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 573, + "text_loss": 0.87109375 + }, + { + "epoch": 0.05, + "learning_rate": 9.943950103592219e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 574, + "text_loss": 0.65625 + }, + { + "epoch": 0.05, + "learning_rate": 9.943755004959317e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 575, + "text_loss": 0.84375 + }, + { + "epoch": 0.05, + "learning_rate": 9.943559569286731e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 576, + "text_loss": 0.435546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.943363796587789e-06, + "loss": 0.6799, + "regression_loss": 0.0, + "step": 577, + "text_loss": 0.51171875 + }, + { + "epoch": 0.05, + "learning_rate": 9.943167686875832e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 578, + "text_loss": 0.408203125 + }, + { + "epoch": 0.05, + "learning_rate": 9.942971240164234e-06, + "loss": 0.6423, + "regression_loss": 0.0, + "step": 579, + "text_loss": 0.609375 + }, + { + "epoch": 0.05, + "learning_rate": 9.942774456466387e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 580, + "text_loss": 0.3671875 + }, + { + "epoch": 0.05, + "learning_rate": 9.942577335795706e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 581, + "text_loss": 0.80859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.94237987816563e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 582, + "text_loss": 0.5078125 + }, + { + "epoch": 0.05, + "learning_rate": 9.942182083589621e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 583, + "text_loss": 0.41015625 + }, + { + "epoch": 0.05, + "learning_rate": 9.941983952081163e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 584, + "text_loss": 0.57421875 + }, + { + "epoch": 0.05, + "learning_rate": 9.941785483653762e-06, + "loss": 0.6692, + "regression_loss": 0.0, + "step": 585, + "text_loss": 0.5390625 + }, + { + "epoch": 0.05, + "learning_rate": 9.941586678320953e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 586, + "text_loss": 0.67578125 + }, + { + "epoch": 0.05, + "learning_rate": 9.941387536096286e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 587, + "text_loss": 0.546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.941188056993338e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 588, + "text_loss": 0.60546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.94098824102571e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 589, + "text_loss": 0.51953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.940788088207021e-06, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 590, + "text_loss": 0.7265625 + }, + { + "epoch": 0.05, + "learning_rate": 9.940587598550922e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 591, + "text_loss": 0.57421875 + }, + { + "epoch": 0.05, + "learning_rate": 9.940386772071074e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 592, + "text_loss": 0.54296875 + }, + { + "epoch": 0.05, + "learning_rate": 9.940185608781174e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 593, + "text_loss": 0.66796875 + }, + { + "epoch": 0.05, + "learning_rate": 9.939984108694936e-06, + "loss": 0.6138, + "regression_loss": 0.0, + "step": 594, + "text_loss": 0.37109375 + }, + { + "epoch": 0.05, + "learning_rate": 9.939782271826093e-06, + "loss": 0.6423, + "regression_loss": 0.0, + "step": 595, + "text_loss": 0.7890625 + }, + { + "epoch": 0.05, + "learning_rate": 9.93958009818841e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 596, + "text_loss": 0.78125 + }, + { + "epoch": 0.05, + "learning_rate": 9.939377587795668e-06, + "loss": 0.6443, + "regression_loss": 0.0, + "step": 597, + "text_loss": 0.43359375 + }, + { + "epoch": 0.05, + "learning_rate": 9.939174740661672e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 598, + "text_loss": 0.78125 + }, + { + "epoch": 0.05, + "learning_rate": 9.938971556800254e-06, + "loss": 0.6157, + "regression_loss": 0.0, + "step": 599, + "text_loss": 0.60546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.938768036225264e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 600, + "text_loss": 0.55859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.938564178950575e-06, + "loss": 0.6094, + "regression_loss": 0.0, + "step": 601, + "text_loss": 0.6484375 + }, + { + "epoch": 0.05, + "learning_rate": 9.938359984990088e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 602, + "text_loss": 0.78125 + }, + { + "epoch": 0.05, + "learning_rate": 9.938155454357725e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 603, + "text_loss": 0.51171875 + }, + { + "epoch": 0.05, + "learning_rate": 9.937950587067428e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 604, + "text_loss": 0.462890625 + }, + { + "epoch": 0.05, + "learning_rate": 9.937745383133163e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 605, + "text_loss": 0.4765625 + }, + { + "epoch": 0.05, + "learning_rate": 9.93753984256892e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 606, + "text_loss": 0.61328125 + }, + { + "epoch": 0.05, + "learning_rate": 9.937333965388713e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 607, + "text_loss": 0.71875 + }, + { + "epoch": 0.05, + "learning_rate": 9.937127751606577e-06, + "loss": 0.4587, + "regression_loss": 0.0, + "step": 608, + "text_loss": 0.22265625 + }, + { + "epoch": 0.05, + "learning_rate": 9.936921201236569e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 609, + "text_loss": 0.625 + }, + { + "epoch": 0.05, + "learning_rate": 9.936714314292774e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 610, + "text_loss": 0.4765625 + }, + { + "epoch": 0.05, + "learning_rate": 9.936507090789294e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 611, + "text_loss": 0.423828125 + }, + { + "epoch": 0.05, + "learning_rate": 9.936299530740254e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 612, + "text_loss": 0.56640625 + }, + { + "epoch": 0.05, + "learning_rate": 9.93609163415981e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 613, + "text_loss": 0.62890625 + }, + { + "epoch": 0.05, + "learning_rate": 9.935883401062133e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 614, + "text_loss": 0.482421875 + }, + { + "epoch": 0.05, + "learning_rate": 9.935674831461418e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 615, + "text_loss": 0.55859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.935465925371885e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 616, + "text_loss": 0.6171875 + }, + { + "epoch": 0.05, + "learning_rate": 9.935256682807776e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 617, + "text_loss": 0.44140625 + }, + { + "epoch": 0.05, + "learning_rate": 9.935047103783358e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 618, + "text_loss": 0.5078125 + }, + { + "epoch": 0.05, + "learning_rate": 9.934837188312915e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 619, + "text_loss": 0.6640625 + }, + { + "epoch": 0.05, + "learning_rate": 9.93462693641076e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 620, + "text_loss": 0.3125 + }, + { + "epoch": 0.05, + "learning_rate": 9.934416348091229e-06, + "loss": 0.6528, + "regression_loss": 0.0, + "step": 621, + "text_loss": 0.6875 + }, + { + "epoch": 0.05, + "learning_rate": 9.934205423368676e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 622, + "text_loss": 0.75 + }, + { + "epoch": 0.05, + "learning_rate": 9.933994162257483e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 623, + "text_loss": 0.60546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.933782564772049e-06, + "loss": 0.6152, + "regression_loss": 0.0, + "step": 624, + "text_loss": 0.5078125 + }, + { + "epoch": 0.05, + "learning_rate": 9.933570630926803e-06, + "loss": 0.5333, + "regression_loss": 0.0, + "step": 625, + "text_loss": 0.58984375 + }, + { + "epoch": 0.05, + "learning_rate": 9.933358360736193e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 626, + "text_loss": 0.77734375 + }, + { + "epoch": 0.05, + "learning_rate": 9.93314575421469e-06, + "loss": 0.9758, + "regression_loss": 0.0, + "step": 627, + "text_loss": 0.828125 + }, + { + "epoch": 0.05, + "learning_rate": 9.932932811376787e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 628, + "text_loss": 0.74609375 + }, + { + "epoch": 0.05, + "learning_rate": 9.932719532237006e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 629, + "text_loss": 0.71484375 + }, + { + "epoch": 0.05, + "learning_rate": 9.932505916809882e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 630, + "text_loss": 0.75390625 + }, + { + "epoch": 0.05, + "learning_rate": 9.932291965109979e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 631, + "text_loss": 0.6171875 + }, + { + "epoch": 0.05, + "learning_rate": 9.932077677151887e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 632, + "text_loss": 0.828125 + }, + { + "epoch": 0.05, + "learning_rate": 9.93186305295021e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 633, + "text_loss": 1.0703125 + }, + { + "epoch": 0.05, + "learning_rate": 9.931648092519584e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 634, + "text_loss": 0.443359375 + }, + { + "epoch": 0.05, + "learning_rate": 9.931432795874663e-06, + "loss": 0.4711, + "regression_loss": 0.0, + "step": 635, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.05, + "learning_rate": 9.931217163030123e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 636, + "text_loss": 0.435546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.931001194000667e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 637, + "text_loss": 0.4609375 + }, + { + "epoch": 0.05, + "learning_rate": 9.930784888801015e-06, + "loss": 0.6816, + "regression_loss": 0.0, + "step": 638, + "text_loss": 0.546875 + }, + { + "epoch": 0.05, + "learning_rate": 9.930568247445917e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 639, + "text_loss": 0.3203125 + }, + { + "epoch": 0.05, + "learning_rate": 9.930351269950144e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 640, + "text_loss": 0.5859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.930133956328484e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 641, + "text_loss": 0.51953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.929916306595756e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 642, + "text_loss": 0.80078125 + }, + { + "epoch": 0.05, + "learning_rate": 9.929698320766793e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 643, + "text_loss": 0.5859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.929479998856462e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 644, + "text_loss": 0.90234375 + }, + { + "epoch": 0.05, + "learning_rate": 9.929261340879643e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 645, + "text_loss": 0.59765625 + }, + { + "epoch": 0.05, + "learning_rate": 9.929042346851248e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 646, + "text_loss": 0.859375 + }, + { + "epoch": 0.05, + "learning_rate": 9.928823016786201e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 647, + "text_loss": 0.625 + }, + { + "epoch": 0.05, + "learning_rate": 9.928603350699458e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 648, + "text_loss": 0.41015625 + }, + { + "epoch": 0.05, + "learning_rate": 9.928383348605994e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 649, + "text_loss": 0.51953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.928163010520808e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 650, + "text_loss": 0.498046875 + }, + { + "epoch": 0.05, + "learning_rate": 9.92794233645892e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 651, + "text_loss": 0.46484375 + }, + { + "epoch": 0.05, + "learning_rate": 9.927721326435377e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 652, + "text_loss": 0.50390625 + }, + { + "epoch": 0.05, + "learning_rate": 9.927499980465244e-06, + "loss": 0.7593, + "regression_loss": 0.0, + "step": 653, + "text_loss": 0.77734375 + }, + { + "epoch": 0.05, + "learning_rate": 9.927278298563614e-06, + "loss": 0.7146, + "regression_loss": 0.0, + "step": 654, + "text_loss": 0.66796875 + }, + { + "epoch": 0.05, + "learning_rate": 9.927056280745597e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 655, + "text_loss": 0.625 + }, + { + "epoch": 0.05, + "learning_rate": 9.926833927026332e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 656, + "text_loss": 0.2578125 + }, + { + "epoch": 0.05, + "learning_rate": 9.926611237420973e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 657, + "text_loss": 0.64453125 + }, + { + "epoch": 0.05, + "learning_rate": 9.926388211944707e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 658, + "text_loss": 0.3125 + }, + { + "epoch": 0.05, + "learning_rate": 9.926164850612737e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 659, + "text_loss": 0.6953125 + }, + { + "epoch": 0.05, + "learning_rate": 9.92594115344029e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 660, + "text_loss": 0.390625 + }, + { + "epoch": 0.05, + "learning_rate": 9.925717120442615e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 661, + "text_loss": 0.72265625 + }, + { + "epoch": 0.06, + "learning_rate": 9.92549275163499e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 662, + "text_loss": 0.35546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.925268047032708e-06, + "loss": 0.5143, + "regression_loss": 0.0, + "step": 663, + "text_loss": 0.498046875 + }, + { + "epoch": 0.06, + "learning_rate": 9.925043006651089e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 664, + "text_loss": 0.65234375 + }, + { + "epoch": 0.06, + "learning_rate": 9.924817630505475e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 665, + "text_loss": 0.45703125 + }, + { + "epoch": 0.06, + "learning_rate": 9.924591918611231e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 666, + "text_loss": 0.76171875 + }, + { + "epoch": 0.06, + "learning_rate": 9.924365870983743e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 667, + "text_loss": 0.6171875 + }, + { + "epoch": 0.06, + "learning_rate": 9.924139487638427e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 668, + "text_loss": 0.703125 + }, + { + "epoch": 0.06, + "learning_rate": 9.92391276859071e-06, + "loss": 0.7488, + "regression_loss": 0.0, + "step": 669, + "text_loss": 0.51171875 + }, + { + "epoch": 0.06, + "learning_rate": 9.923685713856053e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 670, + "text_loss": 0.4609375 + }, + { + "epoch": 0.06, + "learning_rate": 9.923458323449935e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 671, + "text_loss": 0.65625 + }, + { + "epoch": 0.06, + "learning_rate": 9.923230597387856e-06, + "loss": 0.6802, + "regression_loss": 0.0, + "step": 672, + "text_loss": 0.60546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.923002535685342e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 673, + "text_loss": 0.58984375 + }, + { + "epoch": 0.06, + "learning_rate": 9.922774138357944e-06, + "loss": 0.4835, + "regression_loss": 0.0, + "step": 674, + "text_loss": 0.8046875 + }, + { + "epoch": 0.06, + "learning_rate": 9.92254540542123e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 675, + "text_loss": 0.263671875 + }, + { + "epoch": 0.06, + "learning_rate": 9.922316336890794e-06, + "loss": 0.5482, + "regression_loss": 0.0, + "step": 676, + "text_loss": 1.3203125 + }, + { + "epoch": 0.06, + "learning_rate": 9.922086932782253e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 677, + "text_loss": 0.392578125 + }, + { + "epoch": 0.06, + "learning_rate": 9.921857193111247e-06, + "loss": 0.6165, + "regression_loss": 0.0, + "step": 678, + "text_loss": 0.6640625 + }, + { + "epoch": 0.06, + "learning_rate": 9.921627117893438e-06, + "loss": 0.6562, + "regression_loss": 0.0, + "step": 679, + "text_loss": 0.78125 + }, + { + "epoch": 0.06, + "learning_rate": 9.921396707144513e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 680, + "text_loss": 0.46875 + }, + { + "epoch": 0.06, + "learning_rate": 9.921165960880178e-06, + "loss": 0.6418, + "regression_loss": 0.0, + "step": 681, + "text_loss": 0.365234375 + }, + { + "epoch": 0.06, + "learning_rate": 9.920934879116165e-06, + "loss": 0.655, + "regression_loss": 0.0, + "step": 682, + "text_loss": 0.6328125 + }, + { + "epoch": 0.06, + "learning_rate": 9.920703461868226e-06, + "loss": 0.6816, + "regression_loss": 0.0, + "step": 683, + "text_loss": 0.89453125 + }, + { + "epoch": 0.06, + "learning_rate": 9.920471709152141e-06, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 684, + "text_loss": 0.65625 + }, + { + "epoch": 0.06, + "learning_rate": 9.92023962098371e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 685, + "text_loss": 0.55859375 + }, + { + "epoch": 0.06, + "learning_rate": 9.920007197378752e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 686, + "text_loss": 0.3671875 + }, + { + "epoch": 0.06, + "learning_rate": 9.919774438353116e-06, + "loss": 0.626, + "regression_loss": 0.0, + "step": 687, + "text_loss": 0.55078125 + }, + { + "epoch": 0.06, + "learning_rate": 9.919541343922667e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 688, + "text_loss": 0.67578125 + }, + { + "epoch": 0.06, + "learning_rate": 9.919307914103297e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 689, + "text_loss": 0.6015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.919074148910925e-06, + "loss": 0.6719, + "regression_loss": 0.0, + "step": 690, + "text_loss": 0.79296875 + }, + { + "epoch": 0.06, + "learning_rate": 9.91884004836148e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 691, + "text_loss": 1.1015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.918605612470929e-06, + "loss": 0.6538, + "regression_loss": 0.0, + "step": 692, + "text_loss": 0.70703125 + }, + { + "epoch": 0.06, + "learning_rate": 9.918370841255248e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 693, + "text_loss": 0.271484375 + }, + { + "epoch": 0.06, + "learning_rate": 9.918135734730448e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 694, + "text_loss": 0.3828125 + }, + { + "epoch": 0.06, + "learning_rate": 9.917900292912554e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 695, + "text_loss": 0.65625 + }, + { + "epoch": 0.06, + "learning_rate": 9.917664515817618e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 696, + "text_loss": 0.82421875 + }, + { + "epoch": 0.06, + "learning_rate": 9.917428403461714e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 697, + "text_loss": 0.6640625 + }, + { + "epoch": 0.06, + "learning_rate": 9.91719195586094e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 698, + "text_loss": 0.58203125 + }, + { + "epoch": 0.06, + "learning_rate": 9.916955173031415e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 699, + "text_loss": 0.65234375 + }, + { + "epoch": 0.06, + "learning_rate": 9.916718054989283e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 700, + "text_loss": 0.55078125 + }, + { + "epoch": 0.06, + "learning_rate": 9.916480601750706e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 701, + "text_loss": 0.322265625 + }, + { + "epoch": 0.06, + "learning_rate": 9.916242813331876e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 702, + "text_loss": 0.65234375 + }, + { + "epoch": 0.06, + "learning_rate": 9.916004689749e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 703, + "text_loss": 0.66796875 + }, + { + "epoch": 0.06, + "learning_rate": 9.915766231018317e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 704, + "text_loss": 0.46484375 + }, + { + "epoch": 0.06, + "learning_rate": 9.915527437156083e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 705, + "text_loss": 0.41796875 + }, + { + "epoch": 0.06, + "learning_rate": 9.915288308178574e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 706, + "text_loss": 0.55078125 + }, + { + "epoch": 0.06, + "learning_rate": 9.915048844102095e-06, + "loss": 0.6416, + "regression_loss": 0.0, + "step": 707, + "text_loss": 0.953125 + }, + { + "epoch": 0.06, + "learning_rate": 9.914809044942972e-06, + "loss": 0.665, + "regression_loss": 0.0, + "step": 708, + "text_loss": 0.70703125 + }, + { + "epoch": 0.06, + "learning_rate": 9.914568910717552e-06, + "loss": 0.5109, + "regression_loss": 0.0, + "step": 709, + "text_loss": 0.4609375 + }, + { + "epoch": 0.06, + "learning_rate": 9.914328441442209e-06, + "loss": 0.5219, + "regression_loss": 0.0, + "step": 710, + "text_loss": 0.52734375 + }, + { + "epoch": 0.06, + "learning_rate": 9.914087637133331e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 711, + "text_loss": 0.6015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.913846497807342e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 712, + "text_loss": 0.59765625 + }, + { + "epoch": 0.06, + "learning_rate": 9.913605023480676e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 713, + "text_loss": 0.2890625 + }, + { + "epoch": 0.06, + "learning_rate": 9.913363214169798e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 714, + "text_loss": 0.6015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.913121069891193e-06, + "loss": 0.6365, + "regression_loss": 0.0, + "step": 715, + "text_loss": 0.498046875 + }, + { + "epoch": 0.06, + "learning_rate": 9.91287859066137e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 716, + "text_loss": 0.36328125 + }, + { + "epoch": 0.06, + "learning_rate": 9.912635776496858e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 717, + "text_loss": 0.498046875 + }, + { + "epoch": 0.06, + "learning_rate": 9.91239262741421e-06, + "loss": 0.438, + "regression_loss": 0.0, + "step": 718, + "text_loss": 0.546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.912149143430007e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 719, + "text_loss": 0.6328125 + }, + { + "epoch": 0.06, + "learning_rate": 9.911905324560844e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 720, + "text_loss": 0.55859375 + }, + { + "epoch": 0.06, + "learning_rate": 9.911661170823347e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 721, + "text_loss": 0.42578125 + }, + { + "epoch": 0.06, + "learning_rate": 9.91141668223416e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 722, + "text_loss": 0.75390625 + }, + { + "epoch": 0.06, + "learning_rate": 9.91117185880995e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 723, + "text_loss": 0.59375 + }, + { + "epoch": 0.06, + "learning_rate": 9.910926700567406e-06, + "loss": 0.6179, + "regression_loss": 0.0, + "step": 724, + "text_loss": 0.55078125 + }, + { + "epoch": 0.06, + "learning_rate": 9.910681207523244e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 725, + "text_loss": 0.6640625 + }, + { + "epoch": 0.06, + "learning_rate": 9.910435379694203e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 726, + "text_loss": 0.74609375 + }, + { + "epoch": 0.06, + "learning_rate": 9.910189217097037e-06, + "loss": 0.5507, + "regression_loss": 0.0, + "step": 727, + "text_loss": 0.6015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.909942719748532e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 728, + "text_loss": 0.8046875 + }, + { + "epoch": 0.06, + "learning_rate": 9.909695887665491e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 729, + "text_loss": 0.890625 + }, + { + "epoch": 0.06, + "learning_rate": 9.909448720864742e-06, + "loss": 0.626, + "regression_loss": 0.0, + "step": 730, + "text_loss": 0.369140625 + }, + { + "epoch": 0.06, + "learning_rate": 9.909201219363137e-06, + "loss": 0.689, + "regression_loss": 0.0, + "step": 731, + "text_loss": 0.859375 + }, + { + "epoch": 0.06, + "learning_rate": 9.908953383177547e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 732, + "text_loss": 0.5625 + }, + { + "epoch": 0.06, + "learning_rate": 9.908705212324869e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 733, + "text_loss": 0.474609375 + }, + { + "epoch": 0.06, + "learning_rate": 9.908456706822023e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 734, + "text_loss": 0.42578125 + }, + { + "epoch": 0.06, + "learning_rate": 9.90820786668595e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 735, + "text_loss": 0.890625 + }, + { + "epoch": 0.06, + "learning_rate": 9.907958691933616e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 736, + "text_loss": 0.58203125 + }, + { + "epoch": 0.06, + "learning_rate": 9.907709182582005e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 737, + "text_loss": 0.70703125 + }, + { + "epoch": 0.06, + "learning_rate": 9.907459338648132e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 738, + "text_loss": 0.384765625 + }, + { + "epoch": 0.06, + "learning_rate": 9.907209160149028e-06, + "loss": 0.6606, + "regression_loss": 0.0, + "step": 739, + "text_loss": 0.6015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.906958647101745e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 740, + "text_loss": 0.76953125 + }, + { + "epoch": 0.06, + "learning_rate": 9.906707799523368e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 741, + "text_loss": 0.65625 + }, + { + "epoch": 0.06, + "learning_rate": 9.906456617430995e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 742, + "text_loss": 0.50390625 + }, + { + "epoch": 0.06, + "learning_rate": 9.90620510084175e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 743, + "text_loss": 0.7734375 + }, + { + "epoch": 0.06, + "learning_rate": 9.905953249772782e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 744, + "text_loss": 0.76953125 + }, + { + "epoch": 0.06, + "learning_rate": 9.905701064241259e-06, + "loss": 0.6238, + "regression_loss": 0.0, + "step": 745, + "text_loss": 0.921875 + }, + { + "epoch": 0.06, + "learning_rate": 9.905448544264376e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 746, + "text_loss": 0.466796875 + }, + { + "epoch": 0.06, + "learning_rate": 9.905195689859348e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 747, + "text_loss": 0.54296875 + }, + { + "epoch": 0.06, + "learning_rate": 9.904942501043411e-06, + "loss": 0.6577, + "regression_loss": 0.0, + "step": 748, + "text_loss": 0.5625 + }, + { + "epoch": 0.06, + "learning_rate": 9.904688977833827e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 749, + "text_loss": 0.6875 + }, + { + "epoch": 0.06, + "learning_rate": 9.904435120247882e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 750, + "text_loss": 0.75 + }, + { + "epoch": 0.06, + "learning_rate": 9.90418092830288e-06, + "loss": 0.6194, + "regression_loss": 0.0, + "step": 751, + "text_loss": 0.6796875 + }, + { + "epoch": 0.06, + "learning_rate": 9.903926402016153e-06, + "loss": 0.6658, + "regression_loss": 0.0, + "step": 752, + "text_loss": 0.6953125 + }, + { + "epoch": 0.06, + "learning_rate": 9.903671541405051e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 753, + "text_loss": 0.9453125 + }, + { + "epoch": 0.06, + "learning_rate": 9.903416346486951e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 754, + "text_loss": 0.435546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.903160817279249e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 755, + "text_loss": 0.62109375 + }, + { + "epoch": 0.06, + "learning_rate": 9.902904953799367e-06, + "loss": 0.6807, + "regression_loss": 0.0, + "step": 756, + "text_loss": 0.65625 + }, + { + "epoch": 0.06, + "learning_rate": 9.902648756064748e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 757, + "text_loss": 0.55078125 + }, + { + "epoch": 0.06, + "learning_rate": 9.902392224092858e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 758, + "text_loss": 0.3828125 + }, + { + "epoch": 0.06, + "learning_rate": 9.902135357901185e-06, + "loss": 0.6729, + "regression_loss": 0.0, + "step": 759, + "text_loss": 0.435546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.901878157507246e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 760, + "text_loss": 0.9609375 + }, + { + "epoch": 0.06, + "learning_rate": 9.90162062292857e-06, + "loss": 0.6514, + "regression_loss": 0.0, + "step": 761, + "text_loss": 0.71484375 + }, + { + "epoch": 0.06, + "learning_rate": 9.901362754182714e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 762, + "text_loss": 0.6875 + }, + { + "epoch": 0.06, + "learning_rate": 9.901104551287262e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 763, + "text_loss": 0.7734375 + }, + { + "epoch": 0.06, + "learning_rate": 9.900846014259816e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 764, + "text_loss": 0.51953125 + }, + { + "epoch": 0.06, + "learning_rate": 9.900587143118e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 765, + "text_loss": 0.68359375 + }, + { + "epoch": 0.06, + "learning_rate": 9.900327937879462e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 766, + "text_loss": 0.59765625 + }, + { + "epoch": 0.06, + "learning_rate": 9.900068398561876e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 767, + "text_loss": 0.66015625 + }, + { + "epoch": 0.06, + "learning_rate": 9.899808525182935e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 768, + "text_loss": 0.53515625 + }, + { + "epoch": 0.06, + "learning_rate": 9.899548317760355e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 769, + "text_loss": 0.314453125 + }, + { + "epoch": 0.06, + "learning_rate": 9.899287776311877e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 770, + "text_loss": 0.6640625 + }, + { + "epoch": 0.06, + "learning_rate": 9.899026900855261e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 771, + "text_loss": 0.6875 + }, + { + "epoch": 0.06, + "learning_rate": 9.898765691408295e-06, + "loss": 0.6987, + "regression_loss": 0.0, + "step": 772, + "text_loss": 0.94921875 + }, + { + "epoch": 0.06, + "learning_rate": 9.898504147988785e-06, + "loss": 0.7092, + "regression_loss": 0.0, + "step": 773, + "text_loss": 0.61328125 + }, + { + "epoch": 0.06, + "learning_rate": 9.898242270614563e-06, + "loss": 0.4065, + "regression_loss": 0.0, + "step": 774, + "text_loss": 0.46484375 + }, + { + "epoch": 0.06, + "learning_rate": 9.897980059303481e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 775, + "text_loss": 0.29296875 + }, + { + "epoch": 0.06, + "learning_rate": 9.897717514073416e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 776, + "text_loss": 0.60546875 + }, + { + "epoch": 0.06, + "learning_rate": 9.897454634942267e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 777, + "text_loss": 0.31640625 + }, + { + "epoch": 0.06, + "learning_rate": 9.897191421927956e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 778, + "text_loss": 0.384765625 + }, + { + "epoch": 0.06, + "learning_rate": 9.896927875048427e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 779, + "text_loss": 0.54296875 + }, + { + "epoch": 0.06, + "learning_rate": 9.896663994321646e-06, + "loss": 0.6506, + "regression_loss": 0.0, + "step": 780, + "text_loss": 0.5390625 + }, + { + "epoch": 0.06, + "learning_rate": 9.896399779765607e-06, + "loss": 0.7866, + "regression_loss": 0.0, + "step": 781, + "text_loss": 0.7265625 + }, + { + "epoch": 0.06, + "learning_rate": 9.896135231398318e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 782, + "text_loss": 0.56640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.895870349237818e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 783, + "text_loss": 0.640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.895605133302163e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 784, + "text_loss": 0.4296875 + }, + { + "epoch": 0.07, + "learning_rate": 9.895339583609436e-06, + "loss": 0.7402, + "regression_loss": 0.0, + "step": 785, + "text_loss": 0.4296875 + }, + { + "epoch": 0.07, + "learning_rate": 9.89507370017774e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 786, + "text_loss": 0.52734375 + }, + { + "epoch": 0.07, + "learning_rate": 9.894807483025202e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 787, + "text_loss": 0.392578125 + }, + { + "epoch": 0.07, + "learning_rate": 9.89454093216997e-06, + "loss": 0.6255, + "regression_loss": 0.0, + "step": 788, + "text_loss": 0.5859375 + }, + { + "epoch": 0.07, + "learning_rate": 9.894274047630216e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 789, + "text_loss": 0.625 + }, + { + "epoch": 0.07, + "learning_rate": 9.894006829424137e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 790, + "text_loss": 0.68359375 + }, + { + "epoch": 0.07, + "learning_rate": 9.893739277569949e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 791, + "text_loss": 0.52734375 + }, + { + "epoch": 0.07, + "learning_rate": 9.893471392085891e-06, + "loss": 0.6191, + "regression_loss": 0.0, + "step": 792, + "text_loss": 0.71484375 + }, + { + "epoch": 0.07, + "learning_rate": 9.89320317299023e-06, + "loss": 0.6289, + "regression_loss": 0.0, + "step": 793, + "text_loss": 0.7265625 + }, + { + "epoch": 0.07, + "learning_rate": 9.89293462030125e-06, + "loss": 0.6545, + "regression_loss": 0.0, + "step": 794, + "text_loss": 1.046875 + }, + { + "epoch": 0.07, + "learning_rate": 9.892665734037255e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 795, + "text_loss": 0.451171875 + }, + { + "epoch": 0.07, + "learning_rate": 9.892396514216583e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 796, + "text_loss": 0.63671875 + }, + { + "epoch": 0.07, + "learning_rate": 9.892126960857585e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 797, + "text_loss": 0.51953125 + }, + { + "epoch": 0.07, + "learning_rate": 9.891857073978636e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 798, + "text_loss": 0.82421875 + }, + { + "epoch": 0.07, + "learning_rate": 9.891586853598139e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 799, + "text_loss": 0.6015625 + }, + { + "epoch": 0.07, + "learning_rate": 9.891316299734514e-06, + "loss": 0.6384, + "regression_loss": 0.0, + "step": 800, + "text_loss": 0.74609375 + }, + { + "epoch": 0.07, + "learning_rate": 9.891045412406209e-06, + "loss": 0.427, + "regression_loss": 0.0, + "step": 801, + "text_loss": 0.26953125 + }, + { + "epoch": 0.07, + "learning_rate": 9.890774191631686e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 802, + "text_loss": 0.56640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.89050263742944e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 803, + "text_loss": 0.66796875 + }, + { + "epoch": 0.07, + "learning_rate": 9.890230749817984e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 804, + "text_loss": 0.419921875 + }, + { + "epoch": 0.07, + "learning_rate": 9.88995852881585e-06, + "loss": 0.6418, + "regression_loss": 0.0, + "step": 805, + "text_loss": 0.3984375 + }, + { + "epoch": 0.07, + "learning_rate": 9.889685974441604e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 806, + "text_loss": 0.62890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.88941308671382e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 807, + "text_loss": 0.419921875 + }, + { + "epoch": 0.07, + "learning_rate": 9.889139865651104e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 808, + "text_loss": 0.390625 + }, + { + "epoch": 0.07, + "learning_rate": 9.888866311272085e-06, + "loss": 0.623, + "regression_loss": 0.0, + "step": 809, + "text_loss": 0.59375 + }, + { + "epoch": 0.07, + "learning_rate": 9.88859242359541e-06, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 810, + "text_loss": 0.49609375 + }, + { + "epoch": 0.07, + "learning_rate": 9.888318202639753e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 811, + "text_loss": 0.640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.88804364842381e-06, + "loss": 0.7932, + "regression_loss": 0.0, + "step": 812, + "text_loss": 0.71484375 + }, + { + "epoch": 0.07, + "learning_rate": 9.887768760966295e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 813, + "text_loss": 0.439453125 + }, + { + "epoch": 0.07, + "learning_rate": 9.887493540285951e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 814, + "text_loss": 0.515625 + }, + { + "epoch": 0.07, + "learning_rate": 9.887217986401543e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 815, + "text_loss": 0.37890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.88694209933185e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 816, + "text_loss": 0.3125 + }, + { + "epoch": 0.07, + "learning_rate": 9.88666587909569e-06, + "loss": 0.4567, + "regression_loss": 0.0, + "step": 817, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.07, + "learning_rate": 9.886389325711885e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 818, + "text_loss": 0.796875 + }, + { + "epoch": 0.07, + "learning_rate": 9.886112439199295e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 819, + "text_loss": 0.62890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.885835219576797e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 820, + "text_loss": 0.56640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.885557666863286e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 821, + "text_loss": 0.49609375 + }, + { + "epoch": 0.07, + "learning_rate": 9.885279781077686e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 822, + "text_loss": 0.5 + }, + { + "epoch": 0.07, + "learning_rate": 9.885001562238943e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 823, + "text_loss": 0.5390625 + }, + { + "epoch": 0.07, + "learning_rate": 9.884723010366025e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 824, + "text_loss": 0.26171875 + }, + { + "epoch": 0.07, + "learning_rate": 9.88444412547792e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 825, + "text_loss": 0.75390625 + }, + { + "epoch": 0.07, + "learning_rate": 9.88416490759364e-06, + "loss": 0.7012, + "regression_loss": 0.0, + "step": 826, + "text_loss": 0.75 + }, + { + "epoch": 0.07, + "learning_rate": 9.883885356732226e-06, + "loss": 0.6177, + "regression_loss": 0.0, + "step": 827, + "text_loss": 0.54296875 + }, + { + "epoch": 0.07, + "learning_rate": 9.883605472912731e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 828, + "text_loss": 0.703125 + }, + { + "epoch": 0.07, + "learning_rate": 9.88332525615424e-06, + "loss": 0.6177, + "regression_loss": 0.0, + "step": 829, + "text_loss": 0.48828125 + }, + { + "epoch": 0.07, + "learning_rate": 9.883044706475853e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 830, + "text_loss": 0.59375 + }, + { + "epoch": 0.07, + "learning_rate": 9.882763823896696e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 831, + "text_loss": 0.69921875 + }, + { + "epoch": 0.07, + "learning_rate": 9.882482608435924e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 832, + "text_loss": 0.67578125 + }, + { + "epoch": 0.07, + "learning_rate": 9.882201060112705e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 833, + "text_loss": 0.625 + }, + { + "epoch": 0.07, + "learning_rate": 9.881919178946232e-06, + "loss": 0.7231, + "regression_loss": 0.0, + "step": 834, + "text_loss": 0.6640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.881636964955723e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 835, + "text_loss": 0.44921875 + }, + { + "epoch": 0.07, + "learning_rate": 9.88135441816042e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 836, + "text_loss": 0.55078125 + }, + { + "epoch": 0.07, + "learning_rate": 9.881071538579586e-06, + "loss": 0.6484, + "regression_loss": 0.0, + "step": 837, + "text_loss": 0.7578125 + }, + { + "epoch": 0.07, + "learning_rate": 9.880788326232501e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 838, + "text_loss": 0.5078125 + }, + { + "epoch": 0.07, + "learning_rate": 9.880504781138477e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 839, + "text_loss": 0.5234375 + }, + { + "epoch": 0.07, + "learning_rate": 9.880220903316845e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 840, + "text_loss": 0.62890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.879936692786959e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 841, + "text_loss": 0.78125 + }, + { + "epoch": 0.07, + "learning_rate": 9.879652149568194e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 842, + "text_loss": 0.6875 + }, + { + "epoch": 0.07, + "learning_rate": 9.879367273679946e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 843, + "text_loss": 0.4765625 + }, + { + "epoch": 0.07, + "learning_rate": 9.879082065141637e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 844, + "text_loss": 0.53125 + }, + { + "epoch": 0.07, + "learning_rate": 9.878796523972714e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 845, + "text_loss": 0.55078125 + }, + { + "epoch": 0.07, + "learning_rate": 9.878510650192644e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 846, + "text_loss": 0.8984375 + }, + { + "epoch": 0.07, + "learning_rate": 9.878224443820913e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 847, + "text_loss": 0.494140625 + }, + { + "epoch": 0.07, + "learning_rate": 9.877937904877036e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 848, + "text_loss": 0.484375 + }, + { + "epoch": 0.07, + "learning_rate": 9.877651033380545e-06, + "loss": 0.6326, + "regression_loss": 0.0, + "step": 849, + "text_loss": 0.44140625 + }, + { + "epoch": 0.07, + "learning_rate": 9.877363829350999e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 850, + "text_loss": 0.890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.877076292807979e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 851, + "text_loss": 0.53125 + }, + { + "epoch": 0.07, + "learning_rate": 9.876788423771087e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 852, + "text_loss": 0.72265625 + }, + { + "epoch": 0.07, + "learning_rate": 9.876500222259945e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 853, + "text_loss": 0.388671875 + }, + { + "epoch": 0.07, + "learning_rate": 9.876211688294208e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 854, + "text_loss": 0.546875 + }, + { + "epoch": 0.07, + "learning_rate": 9.875922821893543e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 855, + "text_loss": 0.6171875 + }, + { + "epoch": 0.07, + "learning_rate": 9.87563362307764e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 856, + "text_loss": 0.375 + }, + { + "epoch": 0.07, + "learning_rate": 9.875344091866221e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 857, + "text_loss": 0.48046875 + }, + { + "epoch": 0.07, + "learning_rate": 9.875054228279024e-06, + "loss": 0.688, + "regression_loss": 0.0, + "step": 858, + "text_loss": 0.734375 + }, + { + "epoch": 0.07, + "learning_rate": 9.874764032335807e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 859, + "text_loss": 0.5625 + }, + { + "epoch": 0.07, + "learning_rate": 9.874473504056354e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 860, + "text_loss": 0.474609375 + }, + { + "epoch": 0.07, + "learning_rate": 9.874182643460474e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 861, + "text_loss": 0.51953125 + }, + { + "epoch": 0.07, + "learning_rate": 9.873891450567998e-06, + "loss": 0.4764, + "regression_loss": 0.0, + "step": 862, + "text_loss": 0.6953125 + }, + { + "epoch": 0.07, + "learning_rate": 9.873599925398774e-06, + "loss": 0.5945, + "regression_loss": 0.0, + "step": 863, + "text_loss": 0.453125 + }, + { + "epoch": 0.07, + "learning_rate": 9.873308067972679e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 864, + "text_loss": 0.625 + }, + { + "epoch": 0.07, + "learning_rate": 9.87301587830961e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 865, + "text_loss": 0.61328125 + }, + { + "epoch": 0.07, + "learning_rate": 9.872723356429486e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 866, + "text_loss": 0.486328125 + }, + { + "epoch": 0.07, + "learning_rate": 9.872430502352252e-06, + "loss": 0.6248, + "regression_loss": 0.0, + "step": 867, + "text_loss": 0.7734375 + }, + { + "epoch": 0.07, + "learning_rate": 9.872137316097872e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 868, + "text_loss": 0.625 + }, + { + "epoch": 0.07, + "learning_rate": 9.871843797686331e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 869, + "text_loss": 0.67578125 + }, + { + "epoch": 0.07, + "learning_rate": 9.871549947137645e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 870, + "text_loss": 0.71484375 + }, + { + "epoch": 0.07, + "learning_rate": 9.871255764471843e-06, + "loss": 0.6487, + "regression_loss": 0.0, + "step": 871, + "text_loss": 0.73828125 + }, + { + "epoch": 0.07, + "learning_rate": 9.870961249708983e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 872, + "text_loss": 0.455078125 + }, + { + "epoch": 0.07, + "learning_rate": 9.870666402869143e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 873, + "text_loss": 0.62109375 + }, + { + "epoch": 0.07, + "learning_rate": 9.870371223972423e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 874, + "text_loss": 0.54296875 + }, + { + "epoch": 0.07, + "learning_rate": 9.870075713038947e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 875, + "text_loss": 0.59375 + }, + { + "epoch": 0.07, + "learning_rate": 9.869779870088865e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 876, + "text_loss": 0.6640625 + }, + { + "epoch": 0.07, + "learning_rate": 9.869483695142341e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 877, + "text_loss": 0.9140625 + }, + { + "epoch": 0.07, + "learning_rate": 9.869187188219569e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 878, + "text_loss": 0.5234375 + }, + { + "epoch": 0.07, + "learning_rate": 9.868890349340764e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 879, + "text_loss": 0.65625 + }, + { + "epoch": 0.07, + "learning_rate": 9.868593178526161e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 880, + "text_loss": 0.82421875 + }, + { + "epoch": 0.07, + "learning_rate": 9.868295675796023e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 881, + "text_loss": 0.63671875 + }, + { + "epoch": 0.07, + "learning_rate": 9.867997841170627e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 882, + "text_loss": 0.89453125 + }, + { + "epoch": 0.07, + "learning_rate": 9.867699674670282e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 883, + "text_loss": 0.48828125 + }, + { + "epoch": 0.07, + "learning_rate": 9.867401176315315e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 884, + "text_loss": 0.54296875 + }, + { + "epoch": 0.07, + "learning_rate": 9.867102346126073e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 885, + "text_loss": 0.8828125 + }, + { + "epoch": 0.07, + "learning_rate": 9.866803184122933e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 886, + "text_loss": 0.3984375 + }, + { + "epoch": 0.07, + "learning_rate": 9.866503690326286e-06, + "loss": 0.6233, + "regression_loss": 0.0, + "step": 887, + "text_loss": 0.60546875 + }, + { + "epoch": 0.07, + "learning_rate": 9.866203864756555e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 888, + "text_loss": 0.48046875 + }, + { + "epoch": 0.07, + "learning_rate": 9.865903707434175e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 889, + "text_loss": 0.6484375 + }, + { + "epoch": 0.07, + "learning_rate": 9.865603218379613e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 890, + "text_loss": 0.45703125 + }, + { + "epoch": 0.07, + "learning_rate": 9.865302397613355e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 891, + "text_loss": 0.63671875 + }, + { + "epoch": 0.07, + "learning_rate": 9.865001245155908e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 892, + "text_loss": 0.59765625 + }, + { + "epoch": 0.07, + "learning_rate": 9.864699761027801e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 893, + "text_loss": 0.462890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.86439794524959e-06, + "loss": 0.6619, + "regression_loss": 0.0, + "step": 894, + "text_loss": 0.9375 + }, + { + "epoch": 0.07, + "learning_rate": 9.864095797841853e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 895, + "text_loss": 0.75 + }, + { + "epoch": 0.07, + "learning_rate": 9.863793318825186e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 896, + "text_loss": 0.57421875 + }, + { + "epoch": 0.07, + "learning_rate": 9.863490508220213e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 897, + "text_loss": 0.7890625 + }, + { + "epoch": 0.07, + "learning_rate": 9.863187366047574e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 898, + "text_loss": 0.6171875 + }, + { + "epoch": 0.07, + "learning_rate": 9.862883892327938e-06, + "loss": 0.8008, + "regression_loss": 0.0, + "step": 899, + "text_loss": 0.97265625 + }, + { + "epoch": 0.07, + "learning_rate": 9.862580087081995e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 900, + "text_loss": 0.26953125 + }, + { + "epoch": 0.07, + "learning_rate": 9.862275950330457e-06, + "loss": 0.6394, + "regression_loss": 0.0, + "step": 901, + "text_loss": 0.69921875 + }, + { + "epoch": 0.07, + "learning_rate": 9.861971482094056e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 902, + "text_loss": 0.48828125 + }, + { + "epoch": 0.08, + "learning_rate": 9.861666682393552e-06, + "loss": 0.5963, + "regression_loss": 0.0, + "step": 903, + "text_loss": 0.515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.861361551249723e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 904, + "text_loss": 0.40625 + }, + { + "epoch": 0.08, + "learning_rate": 9.861056088683371e-06, + "loss": 0.6426, + "regression_loss": 0.0, + "step": 905, + "text_loss": 0.671875 + }, + { + "epoch": 0.08, + "learning_rate": 9.860750294715323e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 906, + "text_loss": 0.671875 + }, + { + "epoch": 0.08, + "learning_rate": 9.860444169366423e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 907, + "text_loss": 0.73828125 + }, + { + "epoch": 0.08, + "learning_rate": 9.860137712657545e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 908, + "text_loss": 0.80078125 + }, + { + "epoch": 0.08, + "learning_rate": 9.85983092460958e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 909, + "text_loss": 0.36328125 + }, + { + "epoch": 0.08, + "learning_rate": 9.85952380524344e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 910, + "text_loss": 0.35546875 + }, + { + "epoch": 0.08, + "learning_rate": 9.859216354580068e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 911, + "text_loss": 0.45703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.858908572640422e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 912, + "text_loss": 0.365234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.858600459445484e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 913, + "text_loss": 0.443359375 + }, + { + "epoch": 0.08, + "learning_rate": 9.858292015016263e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 914, + "text_loss": 0.58984375 + }, + { + "epoch": 0.08, + "learning_rate": 9.857983239373782e-06, + "loss": 0.5813, + "regression_loss": 0.0, + "step": 915, + "text_loss": 0.40234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.857674132539095e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 916, + "text_loss": 0.5859375 + }, + { + "epoch": 0.08, + "learning_rate": 9.857364694533276e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 917, + "text_loss": 0.5390625 + }, + { + "epoch": 0.08, + "learning_rate": 9.85705492537742e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 918, + "text_loss": 0.30859375 + }, + { + "epoch": 0.08, + "learning_rate": 9.856744825092643e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 919, + "text_loss": 0.455078125 + }, + { + "epoch": 0.08, + "learning_rate": 9.856434393700091e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 920, + "text_loss": 0.52734375 + }, + { + "epoch": 0.08, + "learning_rate": 9.856123631220923e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 921, + "text_loss": 0.64453125 + }, + { + "epoch": 0.08, + "learning_rate": 9.855812537676328e-06, + "loss": 0.749, + "regression_loss": 0.0, + "step": 922, + "text_loss": 0.439453125 + }, + { + "epoch": 0.08, + "learning_rate": 9.855501113087513e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 923, + "text_loss": 0.84375 + }, + { + "epoch": 0.08, + "learning_rate": 9.85518935747571e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 924, + "text_loss": 0.26953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.854877270862173e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 925, + "text_loss": 0.70703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.854564853268177e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 926, + "text_loss": 0.44921875 + }, + { + "epoch": 0.08, + "learning_rate": 9.854252104715026e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 927, + "text_loss": 0.66796875 + }, + { + "epoch": 0.08, + "learning_rate": 9.853939025224037e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 928, + "text_loss": 0.7578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.853625614816552e-06, + "loss": 0.7014, + "regression_loss": 0.0, + "step": 929, + "text_loss": 0.828125 + }, + { + "epoch": 0.08, + "learning_rate": 9.853311873513944e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 930, + "text_loss": 0.376953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.852997801337599e-06, + "loss": 0.3928, + "regression_loss": 0.0, + "step": 931, + "text_loss": 0.296875 + }, + { + "epoch": 0.08, + "learning_rate": 9.852683398308929e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 932, + "text_loss": 0.58203125 + }, + { + "epoch": 0.08, + "learning_rate": 9.852368664449366e-06, + "loss": 0.6501, + "regression_loss": 0.0, + "step": 933, + "text_loss": 0.8359375 + }, + { + "epoch": 0.08, + "learning_rate": 9.85205359978037e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 934, + "text_loss": 0.609375 + }, + { + "epoch": 0.08, + "learning_rate": 9.851738204323422e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 935, + "text_loss": 0.52734375 + }, + { + "epoch": 0.08, + "learning_rate": 9.851422478100021e-06, + "loss": 0.5994, + "regression_loss": 0.0, + "step": 936, + "text_loss": 0.416015625 + }, + { + "epoch": 0.08, + "learning_rate": 9.85110642113169e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 937, + "text_loss": 0.56640625 + }, + { + "epoch": 0.08, + "learning_rate": 9.850790033439981e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 938, + "text_loss": 0.298828125 + }, + { + "epoch": 0.08, + "learning_rate": 9.85047331504646e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 939, + "text_loss": 0.5 + }, + { + "epoch": 0.08, + "learning_rate": 9.850156265972722e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 940, + "text_loss": 0.62890625 + }, + { + "epoch": 0.08, + "learning_rate": 9.849838886240377e-06, + "loss": 0.5321, + "regression_loss": 0.0, + "step": 941, + "text_loss": 0.77734375 + }, + { + "epoch": 0.08, + "learning_rate": 9.849521175871069e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 942, + "text_loss": 0.29296875 + }, + { + "epoch": 0.08, + "learning_rate": 9.849203134886452e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 943, + "text_loss": 0.51953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.84888476330821e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 944, + "text_loss": 0.45703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.848566061158052e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 945, + "text_loss": 0.3515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.848247028457698e-06, + "loss": 0.6604, + "regression_loss": 0.0, + "step": 946, + "text_loss": 0.55859375 + }, + { + "epoch": 0.08, + "learning_rate": 9.847927665228904e-06, + "loss": 0.7605, + "regression_loss": 0.0, + "step": 947, + "text_loss": 2.109375 + }, + { + "epoch": 0.08, + "learning_rate": 9.847607971493442e-06, + "loss": 0.658, + "regression_loss": 0.0, + "step": 948, + "text_loss": 0.5625 + }, + { + "epoch": 0.08, + "learning_rate": 9.847287947273104e-06, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 949, + "text_loss": 0.86328125 + }, + { + "epoch": 0.08, + "learning_rate": 9.846967592589709e-06, + "loss": 0.6819, + "regression_loss": 0.0, + "step": 950, + "text_loss": 0.70703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.846646907465098e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 951, + "text_loss": 0.369140625 + }, + { + "epoch": 0.08, + "learning_rate": 9.846325891921134e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 952, + "text_loss": 0.470703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.8460045459797e-06, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 953, + "text_loss": 0.44921875 + }, + { + "epoch": 0.08, + "learning_rate": 9.845682869662707e-06, + "loss": 0.7009, + "regression_loss": 0.0, + "step": 954, + "text_loss": 0.76171875 + }, + { + "epoch": 0.08, + "learning_rate": 9.845360862992081e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 955, + "text_loss": 0.5390625 + }, + { + "epoch": 0.08, + "learning_rate": 9.845038525989778e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 956, + "text_loss": 0.578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.844715858677772e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 957, + "text_loss": 0.51953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.84439286107806e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 958, + "text_loss": 0.458984375 + }, + { + "epoch": 0.08, + "learning_rate": 9.844069533212664e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 959, + "text_loss": 0.482421875 + }, + { + "epoch": 0.08, + "learning_rate": 9.843745875103628e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 960, + "text_loss": 0.42578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.843421886773013e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 961, + "text_loss": 0.64453125 + }, + { + "epoch": 0.08, + "learning_rate": 9.843097568242909e-06, + "loss": 0.5345, + "regression_loss": 0.0, + "step": 962, + "text_loss": 0.357421875 + }, + { + "epoch": 0.08, + "learning_rate": 9.842772919535429e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 963, + "text_loss": 0.58203125 + }, + { + "epoch": 0.08, + "learning_rate": 9.842447940672703e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 964, + "text_loss": 0.7109375 + }, + { + "epoch": 0.08, + "learning_rate": 9.842122631676888e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 965, + "text_loss": 0.71875 + }, + { + "epoch": 0.08, + "learning_rate": 9.841796992570157e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 966, + "text_loss": 0.68359375 + }, + { + "epoch": 0.08, + "learning_rate": 9.841471023374717e-06, + "loss": 0.6421, + "regression_loss": 0.0, + "step": 967, + "text_loss": 0.46875 + }, + { + "epoch": 0.08, + "learning_rate": 9.841144724112788e-06, + "loss": 0.6521, + "regression_loss": 0.0, + "step": 968, + "text_loss": 0.8984375 + }, + { + "epoch": 0.08, + "learning_rate": 9.840818094806614e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 969, + "text_loss": 0.4375 + }, + { + "epoch": 0.08, + "learning_rate": 9.840491135478467e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 970, + "text_loss": 0.462890625 + }, + { + "epoch": 0.08, + "learning_rate": 9.840163846150634e-06, + "loss": 0.4965, + "regression_loss": 0.0, + "step": 971, + "text_loss": 0.66015625 + }, + { + "epoch": 0.08, + "learning_rate": 9.839836226845427e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 972, + "text_loss": 0.625 + }, + { + "epoch": 0.08, + "learning_rate": 9.839508277585185e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 973, + "text_loss": 0.65234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.839179998392263e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 974, + "text_loss": 0.6953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.838851389289042e-06, + "loss": 0.7249, + "regression_loss": 0.0, + "step": 975, + "text_loss": 0.78515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.838522450297927e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 976, + "text_loss": 0.490234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.83819318144134e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 977, + "text_loss": 0.59375 + }, + { + "epoch": 0.08, + "learning_rate": 9.837863582741732e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 978, + "text_loss": 0.5234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.837533654221569e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 979, + "text_loss": 0.4609375 + }, + { + "epoch": 0.08, + "learning_rate": 9.83720339590335e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 980, + "text_loss": 0.478515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.836872807809585e-06, + "loss": 0.6223, + "regression_loss": 0.0, + "step": 981, + "text_loss": 0.92578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.836541889962813e-06, + "loss": 0.4253, + "regression_loss": 0.0, + "step": 982, + "text_loss": 0.365234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.836210642385598e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 983, + "text_loss": 0.640625 + }, + { + "epoch": 0.08, + "learning_rate": 9.835879065100517e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 984, + "text_loss": 0.3671875 + }, + { + "epoch": 0.08, + "learning_rate": 9.835547158130178e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 985, + "text_loss": 0.5234375 + }, + { + "epoch": 0.08, + "learning_rate": 9.83521492149721e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 986, + "text_loss": 0.369140625 + }, + { + "epoch": 0.08, + "learning_rate": 9.834882355224261e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 987, + "text_loss": 0.78515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.834549459334005e-06, + "loss": 0.6499, + "regression_loss": 0.0, + "step": 988, + "text_loss": 1.015625 + }, + { + "epoch": 0.08, + "learning_rate": 9.834216233849136e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 989, + "text_loss": 0.29296875 + }, + { + "epoch": 0.08, + "learning_rate": 9.833882678792373e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 990, + "text_loss": 0.3984375 + }, + { + "epoch": 0.08, + "learning_rate": 9.833548794186455e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 991, + "text_loss": 0.671875 + }, + { + "epoch": 0.08, + "learning_rate": 9.833214580054145e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 992, + "text_loss": 0.380859375 + }, + { + "epoch": 0.08, + "learning_rate": 9.832880036418228e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 993, + "text_loss": 0.59765625 + }, + { + "epoch": 0.08, + "learning_rate": 9.83254516330151e-06, + "loss": 0.6443, + "regression_loss": 0.0, + "step": 994, + "text_loss": 0.53515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.832209960726822e-06, + "loss": 0.6953, + "regression_loss": 0.0, + "step": 995, + "text_loss": 0.51953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.831874428717019e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 996, + "text_loss": 0.77734375 + }, + { + "epoch": 0.08, + "learning_rate": 9.831538567294971e-06, + "loss": 0.6138, + "regression_loss": 0.0, + "step": 997, + "text_loss": 0.67578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.831202376483579e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 998, + "text_loss": 0.4765625 + }, + { + "epoch": 0.08, + "learning_rate": 9.83086585630576e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 999, + "text_loss": 0.42578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.830529006784459e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 1000, + "text_loss": 0.380859375 + }, + { + "epoch": 0.08, + "learning_rate": 9.830191827942639e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 1001, + "text_loss": 0.74609375 + }, + { + "epoch": 0.08, + "learning_rate": 9.829854319803288e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 1002, + "text_loss": 0.6484375 + }, + { + "epoch": 0.08, + "learning_rate": 9.829516482389414e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 1003, + "text_loss": 0.7265625 + }, + { + "epoch": 0.08, + "learning_rate": 9.82917831572405e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 1004, + "text_loss": 0.443359375 + }, + { + "epoch": 0.08, + "learning_rate": 9.828839819830252e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 1005, + "text_loss": 0.50390625 + }, + { + "epoch": 0.08, + "learning_rate": 9.828500994731094e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 1006, + "text_loss": 0.251953125 + }, + { + "epoch": 0.08, + "learning_rate": 9.828161840449677e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 1007, + "text_loss": 0.76171875 + }, + { + "epoch": 0.08, + "learning_rate": 9.827822357009123e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 1008, + "text_loss": 0.67578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.827482544432574e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 1009, + "text_loss": 0.7578125 + }, + { + "epoch": 0.08, + "learning_rate": 9.8271424027432e-06, + "loss": 0.7164, + "regression_loss": 0.0, + "step": 1010, + "text_loss": 0.2080078125 + }, + { + "epoch": 0.08, + "learning_rate": 9.826801931964187e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 1011, + "text_loss": 0.7265625 + }, + { + "epoch": 0.08, + "learning_rate": 9.82646113211875e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 1012, + "text_loss": 0.419921875 + }, + { + "epoch": 0.08, + "learning_rate": 9.82612000323012e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 1013, + "text_loss": 0.5703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.825778545321554e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1014, + "text_loss": 0.212890625 + }, + { + "epoch": 0.08, + "learning_rate": 9.82543675841633e-06, + "loss": 0.6318, + "regression_loss": 0.0, + "step": 1015, + "text_loss": 0.45703125 + }, + { + "epoch": 0.08, + "learning_rate": 9.825094642537754e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 1016, + "text_loss": 0.6015625 + }, + { + "epoch": 0.08, + "learning_rate": 9.824752197709143e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 1017, + "text_loss": 0.5390625 + }, + { + "epoch": 0.08, + "learning_rate": 9.824409423953848e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 1018, + "text_loss": 0.427734375 + }, + { + "epoch": 0.08, + "learning_rate": 9.824066321295237e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 1019, + "text_loss": 0.82421875 + }, + { + "epoch": 0.08, + "learning_rate": 9.823722889756697e-06, + "loss": 0.6677, + "regression_loss": 0.0, + "step": 1020, + "text_loss": 0.515625 + }, + { + "epoch": 0.08, + "learning_rate": 9.823379129361646e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 1021, + "text_loss": 0.6640625 + }, + { + "epoch": 0.08, + "learning_rate": 9.823035040133517e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1022, + "text_loss": 0.4765625 + }, + { + "epoch": 0.09, + "learning_rate": 9.82269062209577e-06, + "loss": 0.6289, + "regression_loss": 0.0, + "step": 1023, + "text_loss": 0.75 + }, + { + "epoch": 0.09, + "learning_rate": 9.822345875271884e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 1024, + "text_loss": 0.53125 + }, + { + "epoch": 0.09, + "learning_rate": 9.822000799685362e-06, + "loss": 0.6855, + "regression_loss": 0.0, + "step": 1025, + "text_loss": 0.734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.821655395359733e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 1026, + "text_loss": 0.38671875 + }, + { + "epoch": 0.09, + "learning_rate": 9.821309662318541e-06, + "loss": 0.7068, + "regression_loss": 0.0, + "step": 1027, + "text_loss": 0.96875 + }, + { + "epoch": 0.09, + "learning_rate": 9.820963600585359e-06, + "loss": 0.7249, + "regression_loss": 0.0, + "step": 1028, + "text_loss": 0.625 + }, + { + "epoch": 0.09, + "learning_rate": 9.820617210183776e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 1029, + "text_loss": 0.7734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.820270491137412e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 1030, + "text_loss": 0.4296875 + }, + { + "epoch": 0.09, + "learning_rate": 9.8199234434699e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 1031, + "text_loss": 0.4609375 + }, + { + "epoch": 0.09, + "learning_rate": 9.819576067204903e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 1032, + "text_loss": 0.59765625 + }, + { + "epoch": 0.09, + "learning_rate": 9.8192283623661e-06, + "loss": 0.7327, + "regression_loss": 0.0, + "step": 1033, + "text_loss": 0.58203125 + }, + { + "epoch": 0.09, + "learning_rate": 9.8188803289772e-06, + "loss": 0.6292, + "regression_loss": 0.0, + "step": 1034, + "text_loss": 0.6484375 + }, + { + "epoch": 0.09, + "learning_rate": 9.818531967061928e-06, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 1035, + "text_loss": 0.78515625 + }, + { + "epoch": 0.09, + "learning_rate": 9.818183276644034e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 1036, + "text_loss": 0.56640625 + }, + { + "epoch": 0.09, + "learning_rate": 9.817834257747287e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 1037, + "text_loss": 0.326171875 + }, + { + "epoch": 0.09, + "learning_rate": 9.817484910395486e-06, + "loss": 0.5226, + "regression_loss": 0.0, + "step": 1038, + "text_loss": 0.734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.817135234612443e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 1039, + "text_loss": 0.609375 + }, + { + "epoch": 0.09, + "learning_rate": 9.816785230422001e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 1040, + "text_loss": 0.447265625 + }, + { + "epoch": 0.09, + "learning_rate": 9.81643489784802e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 1041, + "text_loss": 0.45703125 + }, + { + "epoch": 0.09, + "learning_rate": 9.816084236914386e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 1042, + "text_loss": 0.4140625 + }, + { + "epoch": 0.09, + "learning_rate": 9.815733247645e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 1043, + "text_loss": 0.6171875 + }, + { + "epoch": 0.09, + "learning_rate": 9.815381930063795e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 1044, + "text_loss": 0.431640625 + }, + { + "epoch": 0.09, + "learning_rate": 9.81503028419472e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 1045, + "text_loss": 0.474609375 + }, + { + "epoch": 0.09, + "learning_rate": 9.81467831006175e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 1046, + "text_loss": 0.6875 + }, + { + "epoch": 0.09, + "learning_rate": 9.814326007688879e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 1047, + "text_loss": 0.3984375 + }, + { + "epoch": 0.09, + "learning_rate": 9.813973377100126e-06, + "loss": 0.6819, + "regression_loss": 0.0, + "step": 1048, + "text_loss": 0.75390625 + }, + { + "epoch": 0.09, + "learning_rate": 9.813620418319534e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 1049, + "text_loss": 0.48828125 + }, + { + "epoch": 0.09, + "learning_rate": 9.81326713137116e-06, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 1050, + "text_loss": 0.33203125 + }, + { + "epoch": 0.09, + "learning_rate": 9.812913516279095e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1051, + "text_loss": 0.37109375 + }, + { + "epoch": 0.09, + "learning_rate": 9.812559573067445e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 1052, + "text_loss": 0.58203125 + }, + { + "epoch": 0.09, + "learning_rate": 9.812205301760339e-06, + "loss": 0.7031, + "regression_loss": 0.0, + "step": 1053, + "text_loss": 0.66796875 + }, + { + "epoch": 0.09, + "learning_rate": 9.811850702381929e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 1054, + "text_loss": 0.369140625 + }, + { + "epoch": 0.09, + "learning_rate": 9.811495774956392e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 1055, + "text_loss": 0.55859375 + }, + { + "epoch": 0.09, + "learning_rate": 9.811140519507922e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 1056, + "text_loss": 0.5625 + }, + { + "epoch": 0.09, + "learning_rate": 9.810784936060742e-06, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 1057, + "text_loss": 0.66796875 + }, + { + "epoch": 0.09, + "learning_rate": 9.81042902463909e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 1058, + "text_loss": 0.5078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.810072785267233e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 1059, + "text_loss": 0.578125 + }, + { + "epoch": 0.09, + "learning_rate": 9.809716217969459e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 1060, + "text_loss": 0.5859375 + }, + { + "epoch": 0.09, + "learning_rate": 9.809359322770074e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 1061, + "text_loss": 0.345703125 + }, + { + "epoch": 0.09, + "learning_rate": 9.809002099693407e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 1062, + "text_loss": 0.431640625 + }, + { + "epoch": 0.09, + "learning_rate": 9.808644548763817e-06, + "loss": 0.75, + "regression_loss": 0.0, + "step": 1063, + "text_loss": 0.455078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.808286670005678e-06, + "loss": 0.6738, + "regression_loss": 0.0, + "step": 1064, + "text_loss": 0.7578125 + }, + { + "epoch": 0.09, + "learning_rate": 9.807928463443387e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 1065, + "text_loss": 0.5703125 + }, + { + "epoch": 0.09, + "learning_rate": 9.807569929101366e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 1066, + "text_loss": 0.48046875 + }, + { + "epoch": 0.09, + "learning_rate": 9.807211067004058e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 1067, + "text_loss": 0.47265625 + }, + { + "epoch": 0.09, + "learning_rate": 9.806851877175928e-06, + "loss": 0.7251, + "regression_loss": 0.0, + "step": 1068, + "text_loss": 0.90234375 + }, + { + "epoch": 0.09, + "learning_rate": 9.806492359641463e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 1069, + "text_loss": 0.3359375 + }, + { + "epoch": 0.09, + "learning_rate": 9.806132514425176e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 1070, + "text_loss": 0.62890625 + }, + { + "epoch": 0.09, + "learning_rate": 9.805772341551594e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 1071, + "text_loss": 0.458984375 + }, + { + "epoch": 0.09, + "learning_rate": 9.805411841045276e-06, + "loss": 0.6914, + "regression_loss": 0.0, + "step": 1072, + "text_loss": 0.3359375 + }, + { + "epoch": 0.09, + "learning_rate": 9.805051012930798e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 1073, + "text_loss": 0.6484375 + }, + { + "epoch": 0.09, + "learning_rate": 9.804689857232758e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 1074, + "text_loss": 0.6640625 + }, + { + "epoch": 0.09, + "learning_rate": 9.804328373975782e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 1075, + "text_loss": 0.376953125 + }, + { + "epoch": 0.09, + "learning_rate": 9.803966563184509e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 1076, + "text_loss": 0.546875 + }, + { + "epoch": 0.09, + "learning_rate": 9.803604424883608e-06, + "loss": 0.7151, + "regression_loss": 0.0, + "step": 1077, + "text_loss": 0.55078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.803241959097768e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 1078, + "text_loss": 0.421875 + }, + { + "epoch": 0.09, + "learning_rate": 9.802879165851699e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 1079, + "text_loss": 0.75 + }, + { + "epoch": 0.09, + "learning_rate": 9.802516045170134e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 1080, + "text_loss": 0.462890625 + }, + { + "epoch": 0.09, + "learning_rate": 9.80215259707783e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1081, + "text_loss": 0.421875 + }, + { + "epoch": 0.09, + "learning_rate": 9.801788821599564e-06, + "loss": 0.6543, + "regression_loss": 0.0, + "step": 1082, + "text_loss": 0.58984375 + }, + { + "epoch": 0.09, + "learning_rate": 9.801424718760137e-06, + "loss": 0.6812, + "regression_loss": 0.0, + "step": 1083, + "text_loss": 0.84765625 + }, + { + "epoch": 0.09, + "learning_rate": 9.801060288584371e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 1084, + "text_loss": 0.4453125 + }, + { + "epoch": 0.09, + "learning_rate": 9.80069553109711e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 1085, + "text_loss": 0.484375 + }, + { + "epoch": 0.09, + "learning_rate": 9.800330446323226e-06, + "loss": 0.6816, + "regression_loss": 0.0, + "step": 1086, + "text_loss": 0.66796875 + }, + { + "epoch": 0.09, + "learning_rate": 9.799965034287603e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 1087, + "text_loss": 0.65234375 + }, + { + "epoch": 0.09, + "learning_rate": 9.799599295015154e-06, + "loss": 0.6328, + "regression_loss": 0.0, + "step": 1088, + "text_loss": 0.50390625 + }, + { + "epoch": 0.09, + "learning_rate": 9.799233228530815e-06, + "loss": 0.7083, + "regression_loss": 0.0, + "step": 1089, + "text_loss": 0.703125 + }, + { + "epoch": 0.09, + "learning_rate": 9.798866834859545e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 1090, + "text_loss": 0.63671875 + }, + { + "epoch": 0.09, + "learning_rate": 9.798500114026316e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 1091, + "text_loss": 0.5390625 + }, + { + "epoch": 0.09, + "learning_rate": 9.798133066056134e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 1092, + "text_loss": 0.83203125 + }, + { + "epoch": 0.09, + "learning_rate": 9.797765690974022e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 1093, + "text_loss": 0.7265625 + }, + { + "epoch": 0.09, + "learning_rate": 9.797397988805023e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 1094, + "text_loss": 0.73046875 + }, + { + "epoch": 0.09, + "learning_rate": 9.797029959574207e-06, + "loss": 0.6423, + "regression_loss": 0.0, + "step": 1095, + "text_loss": 0.78125 + }, + { + "epoch": 0.09, + "learning_rate": 9.796661603306666e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 1096, + "text_loss": 0.52734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.79629292002751e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 1097, + "text_loss": 0.53125 + }, + { + "epoch": 0.09, + "learning_rate": 9.795923909761876e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 1098, + "text_loss": 0.6015625 + }, + { + "epoch": 0.09, + "learning_rate": 9.79555457253492e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 1099, + "text_loss": 0.5703125 + }, + { + "epoch": 0.09, + "learning_rate": 9.79518490837182e-06, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 1100, + "text_loss": 0.5390625 + }, + { + "epoch": 0.09, + "learning_rate": 9.79481491729778e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 1101, + "text_loss": 0.37109375 + }, + { + "epoch": 0.09, + "learning_rate": 9.794444599338024e-06, + "loss": 0.7241, + "regression_loss": 0.0, + "step": 1102, + "text_loss": 0.37890625 + }, + { + "epoch": 0.09, + "learning_rate": 9.794073954517797e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 1103, + "text_loss": 1.25 + }, + { + "epoch": 0.09, + "learning_rate": 9.79370298286237e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 1104, + "text_loss": 0.40625 + }, + { + "epoch": 0.09, + "learning_rate": 9.793331684397032e-06, + "loss": 0.6587, + "regression_loss": 0.0, + "step": 1105, + "text_loss": 0.400390625 + }, + { + "epoch": 0.09, + "learning_rate": 9.792960059147096e-06, + "loss": 0.4969, + "regression_loss": 0.0, + "step": 1106, + "text_loss": 0.52734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.7925881071379e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 1107, + "text_loss": 0.7421875 + }, + { + "epoch": 0.09, + "learning_rate": 9.792215828394797e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 1108, + "text_loss": 0.55078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.79184322294317e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1109, + "text_loss": 0.64453125 + }, + { + "epoch": 0.09, + "learning_rate": 9.791470290808422e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 1110, + "text_loss": 0.953125 + }, + { + "epoch": 0.09, + "learning_rate": 9.791097032015979e-06, + "loss": 0.6458, + "regression_loss": 0.0, + "step": 1111, + "text_loss": 0.51171875 + }, + { + "epoch": 0.09, + "learning_rate": 9.790723446591283e-06, + "loss": 0.6067, + "regression_loss": 0.0, + "step": 1112, + "text_loss": 0.66796875 + }, + { + "epoch": 0.09, + "learning_rate": 9.790349534559807e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 1113, + "text_loss": 0.427734375 + }, + { + "epoch": 0.09, + "learning_rate": 9.789975295947041e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 1114, + "text_loss": 0.330078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.789600730778499e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 1115, + "text_loss": 1.0078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.789225839079717e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 1116, + "text_loss": 0.55078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.788850620876252e-06, + "loss": 0.6643, + "regression_loss": 0.0, + "step": 1117, + "text_loss": 0.64453125 + }, + { + "epoch": 0.09, + "learning_rate": 9.788475076193686e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 1118, + "text_loss": 0.5234375 + }, + { + "epoch": 0.09, + "learning_rate": 9.788099205057621e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 1119, + "text_loss": 0.42578125 + }, + { + "epoch": 0.09, + "learning_rate": 9.787723007493681e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 1120, + "text_loss": 0.4765625 + }, + { + "epoch": 0.09, + "learning_rate": 9.787346483527516e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 1121, + "text_loss": 0.5625 + }, + { + "epoch": 0.09, + "learning_rate": 9.786969633184794e-06, + "loss": 0.5126, + "regression_loss": 0.0, + "step": 1122, + "text_loss": 0.2021484375 + }, + { + "epoch": 0.09, + "learning_rate": 9.786592456491204e-06, + "loss": 0.7202, + "regression_loss": 0.0, + "step": 1123, + "text_loss": 0.9453125 + }, + { + "epoch": 0.09, + "learning_rate": 9.786214953472464e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 1124, + "text_loss": 0.373046875 + }, + { + "epoch": 0.09, + "learning_rate": 9.78583712415431e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 1125, + "text_loss": 0.30078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.785458968562495e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 1126, + "text_loss": 0.73828125 + }, + { + "epoch": 0.09, + "learning_rate": 9.785080486722807e-06, + "loss": 0.6597, + "regression_loss": 0.0, + "step": 1127, + "text_loss": 0.51953125 + }, + { + "epoch": 0.09, + "learning_rate": 9.784701678661045e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 1128, + "text_loss": 0.55078125 + }, + { + "epoch": 0.09, + "learning_rate": 9.784322544403036e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 1129, + "text_loss": 0.6328125 + }, + { + "epoch": 0.09, + "learning_rate": 9.783943083974623e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 1130, + "text_loss": 0.474609375 + }, + { + "epoch": 0.09, + "learning_rate": 9.783563297401682e-06, + "loss": 0.675, + "regression_loss": 0.0, + "step": 1131, + "text_loss": 0.51171875 + }, + { + "epoch": 0.09, + "learning_rate": 9.783183184710101e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 1132, + "text_loss": 0.8515625 + }, + { + "epoch": 0.09, + "learning_rate": 9.782802745925795e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 1133, + "text_loss": 0.55859375 + }, + { + "epoch": 0.09, + "learning_rate": 9.782421981074699e-06, + "loss": 0.6626, + "regression_loss": 0.0, + "step": 1134, + "text_loss": 0.8984375 + }, + { + "epoch": 0.09, + "learning_rate": 9.782040890182774e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 1135, + "text_loss": 0.251953125 + }, + { + "epoch": 0.09, + "learning_rate": 9.781659473276e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 1136, + "text_loss": 0.408203125 + }, + { + "epoch": 0.09, + "learning_rate": 9.781277730380379e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 1137, + "text_loss": 0.38671875 + }, + { + "epoch": 0.09, + "learning_rate": 9.780895661521937e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 1138, + "text_loss": 0.625 + }, + { + "epoch": 0.09, + "learning_rate": 9.78051326672672e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 1139, + "text_loss": 0.6328125 + }, + { + "epoch": 0.09, + "learning_rate": 9.7801305460208e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 1140, + "text_loss": 0.68359375 + }, + { + "epoch": 0.09, + "learning_rate": 9.77974749943027e-06, + "loss": 0.749, + "regression_loss": 0.0, + "step": 1141, + "text_loss": 0.48046875 + }, + { + "epoch": 0.09, + "learning_rate": 9.779364126981242e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 1142, + "text_loss": 0.5625 + }, + { + "epoch": 0.09, + "learning_rate": 9.77898042869985e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 1143, + "text_loss": 0.404296875 + }, + { + "epoch": 0.1, + "learning_rate": 9.778596404612257e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 1144, + "text_loss": 0.578125 + }, + { + "epoch": 0.1, + "learning_rate": 9.77821205474464e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 1145, + "text_loss": 0.287109375 + }, + { + "epoch": 0.1, + "learning_rate": 9.777827379123206e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 1146, + "text_loss": 0.458984375 + }, + { + "epoch": 0.1, + "learning_rate": 9.777442377774178e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 1147, + "text_loss": 0.5546875 + }, + { + "epoch": 0.1, + "learning_rate": 9.777057050723803e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 1148, + "text_loss": 0.9609375 + }, + { + "epoch": 0.1, + "learning_rate": 9.776671397998352e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 1149, + "text_loss": 0.95703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.776285419624114e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 1150, + "text_loss": 0.34765625 + }, + { + "epoch": 0.1, + "learning_rate": 9.775899115627405e-06, + "loss": 0.6844, + "regression_loss": 0.0, + "step": 1151, + "text_loss": 0.73828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.775512486034564e-06, + "loss": 0.6838, + "regression_loss": 0.0, + "step": 1152, + "text_loss": 0.48828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.775125530871942e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 1153, + "text_loss": 0.439453125 + }, + { + "epoch": 0.1, + "learning_rate": 9.774738250165928e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 1154, + "text_loss": 0.65234375 + }, + { + "epoch": 0.1, + "learning_rate": 9.774350643942921e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 1155, + "text_loss": 0.3046875 + }, + { + "epoch": 0.1, + "learning_rate": 9.773962712229344e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 1156, + "text_loss": 0.44140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.773574455051647e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 1157, + "text_loss": 0.306640625 + }, + { + "epoch": 0.1, + "learning_rate": 9.773185872436298e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 1158, + "text_loss": 0.51953125 + }, + { + "epoch": 0.1, + "learning_rate": 9.772796964409789e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 1159, + "text_loss": 0.640625 + }, + { + "epoch": 0.1, + "learning_rate": 9.772407730998633e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 1160, + "text_loss": 0.236328125 + }, + { + "epoch": 0.1, + "learning_rate": 9.772018172229369e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 1161, + "text_loss": 0.5859375 + }, + { + "epoch": 0.1, + "learning_rate": 9.771628288128552e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 1162, + "text_loss": 0.3984375 + }, + { + "epoch": 0.1, + "learning_rate": 9.77123807872276e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 1163, + "text_loss": 0.384765625 + }, + { + "epoch": 0.1, + "learning_rate": 9.770847544038601e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 1164, + "text_loss": 0.55078125 + }, + { + "epoch": 0.1, + "learning_rate": 9.770456684102698e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1165, + "text_loss": 0.57421875 + }, + { + "epoch": 0.1, + "learning_rate": 9.770065498941697e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 1166, + "text_loss": 0.37890625 + }, + { + "epoch": 0.1, + "learning_rate": 9.769673988582266e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 1167, + "text_loss": 0.703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.769282153051099e-06, + "loss": 0.6677, + "regression_loss": 0.0, + "step": 1168, + "text_loss": 0.373046875 + }, + { + "epoch": 0.1, + "learning_rate": 9.768889992374904e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 1169, + "text_loss": 0.25 + }, + { + "epoch": 0.1, + "learning_rate": 9.768497506580424e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 1170, + "text_loss": 0.59375 + }, + { + "epoch": 0.1, + "learning_rate": 9.768104695694409e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 1171, + "text_loss": 0.44140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.767711559743644e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 1172, + "text_loss": 0.5078125 + }, + { + "epoch": 0.1, + "learning_rate": 9.76731809875493e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 1173, + "text_loss": 0.400390625 + }, + { + "epoch": 0.1, + "learning_rate": 9.76692431275509e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 1174, + "text_loss": 0.453125 + }, + { + "epoch": 0.1, + "learning_rate": 9.766530201770969e-06, + "loss": 0.5248, + "regression_loss": 0.0, + "step": 1175, + "text_loss": 0.330078125 + }, + { + "epoch": 0.1, + "learning_rate": 9.76613576582944e-06, + "loss": 0.6379, + "regression_loss": 0.0, + "step": 1176, + "text_loss": 0.5546875 + }, + { + "epoch": 0.1, + "learning_rate": 9.76574100495739e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 1177, + "text_loss": 0.45703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.765345919181732e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 1178, + "text_loss": 0.373046875 + }, + { + "epoch": 0.1, + "learning_rate": 9.764950508529403e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 1179, + "text_loss": 0.484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.764554773027357e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 1180, + "text_loss": 0.80859375 + }, + { + "epoch": 0.1, + "learning_rate": 9.764158712702575e-06, + "loss": 0.5994, + "regression_loss": 0.0, + "step": 1181, + "text_loss": 0.9921875 + }, + { + "epoch": 0.1, + "learning_rate": 9.76376232758206e-06, + "loss": 0.6177, + "regression_loss": 0.0, + "step": 1182, + "text_loss": 0.5625 + }, + { + "epoch": 0.1, + "learning_rate": 9.763365617692832e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 1183, + "text_loss": 0.65234375 + }, + { + "epoch": 0.1, + "learning_rate": 9.762968583061938e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 1184, + "text_loss": 0.5625 + }, + { + "epoch": 0.1, + "learning_rate": 9.762571223716447e-06, + "loss": 0.6304, + "regression_loss": 0.0, + "step": 1185, + "text_loss": 0.3828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.762173539683448e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 1186, + "text_loss": 0.60546875 + }, + { + "epoch": 0.1, + "learning_rate": 9.761775530990052e-06, + "loss": 0.6997, + "regression_loss": 0.0, + "step": 1187, + "text_loss": 0.5625 + }, + { + "epoch": 0.1, + "learning_rate": 9.761377197663395e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 1188, + "text_loss": 0.298828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.760978539730632e-06, + "loss": 0.6057, + "regression_loss": 0.0, + "step": 1189, + "text_loss": 0.267578125 + }, + { + "epoch": 0.1, + "learning_rate": 9.760579557218942e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 1190, + "text_loss": 0.55078125 + }, + { + "epoch": 0.1, + "learning_rate": 9.760180250155527e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 1191, + "text_loss": 0.8515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.759780618567607e-06, + "loss": 0.6357, + "regression_loss": 0.0, + "step": 1192, + "text_loss": 0.8125 + }, + { + "epoch": 0.1, + "learning_rate": 9.759380662482427e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 1193, + "text_loss": 0.4765625 + }, + { + "epoch": 0.1, + "learning_rate": 9.758980381927257e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 1194, + "text_loss": 0.45703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.758579776929383e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 1195, + "text_loss": 0.48046875 + }, + { + "epoch": 0.1, + "learning_rate": 9.758178847516118e-06, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 1196, + "text_loss": 0.609375 + }, + { + "epoch": 0.1, + "learning_rate": 9.757777593714792e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 1197, + "text_loss": 0.6015625 + }, + { + "epoch": 0.1, + "learning_rate": 9.757376015552765e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 1198, + "text_loss": 0.515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.756974113057413e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 1199, + "text_loss": 0.478515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.756571886256134e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 1200, + "text_loss": 0.6484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.756169335176352e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 1201, + "text_loss": 0.412109375 + }, + { + "epoch": 0.1, + "learning_rate": 9.755766459845508e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 1202, + "text_loss": 0.58203125 + }, + { + "epoch": 0.1, + "learning_rate": 9.755363260291068e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 1203, + "text_loss": 0.494140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.754959736540525e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 1204, + "text_loss": 0.7578125 + }, + { + "epoch": 0.1, + "learning_rate": 9.754555888621384e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 1205, + "text_loss": 0.470703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.754151716561179e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 1206, + "text_loss": 0.625 + }, + { + "epoch": 0.1, + "learning_rate": 9.753747220387466e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 1207, + "text_loss": 0.396484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.75334240012782e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 1208, + "text_loss": 0.61328125 + }, + { + "epoch": 0.1, + "learning_rate": 9.752937255809838e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 1209, + "text_loss": 0.50390625 + }, + { + "epoch": 0.1, + "learning_rate": 9.752531787461144e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 1210, + "text_loss": 0.359375 + }, + { + "epoch": 0.1, + "learning_rate": 9.752125995109378e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1211, + "text_loss": 0.515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.751719878782204e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 1212, + "text_loss": 0.7734375 + }, + { + "epoch": 0.1, + "learning_rate": 9.751313438507313e-06, + "loss": 0.6531, + "regression_loss": 0.0, + "step": 1213, + "text_loss": 0.58203125 + }, + { + "epoch": 0.1, + "learning_rate": 9.75090667431241e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 1214, + "text_loss": 0.376953125 + }, + { + "epoch": 0.1, + "learning_rate": 9.750499586225228e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 1215, + "text_loss": 0.74609375 + }, + { + "epoch": 0.1, + "learning_rate": 9.75009217427352e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 1216, + "text_loss": 0.19140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.749684438485062e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 1217, + "text_loss": 0.48828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.74927637888765e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 1218, + "text_loss": 0.66015625 + }, + { + "epoch": 0.1, + "learning_rate": 9.748867995509104e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 1219, + "text_loss": 0.302734375 + }, + { + "epoch": 0.1, + "learning_rate": 9.748459288377264e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 1220, + "text_loss": 0.376953125 + }, + { + "epoch": 0.1, + "learning_rate": 9.748050257519997e-06, + "loss": 0.4149, + "regression_loss": 0.0, + "step": 1221, + "text_loss": 0.2021484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.747640902965185e-06, + "loss": 0.5879, + "regression_loss": 0.0, + "step": 1222, + "text_loss": 0.625 + }, + { + "epoch": 0.1, + "learning_rate": 9.747231224740736e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 1223, + "text_loss": 0.69140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.746821222874582e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 1224, + "text_loss": 0.6484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.746410897394673e-06, + "loss": 0.5057, + "regression_loss": 0.0, + "step": 1225, + "text_loss": 0.54296875 + }, + { + "epoch": 0.1, + "learning_rate": 9.746000248328985e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 1226, + "text_loss": 0.37109375 + }, + { + "epoch": 0.1, + "learning_rate": 9.745589275705513e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 1227, + "text_loss": 0.5 + }, + { + "epoch": 0.1, + "learning_rate": 9.745177979552273e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 1228, + "text_loss": 0.400390625 + }, + { + "epoch": 0.1, + "learning_rate": 9.744766359897307e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 1229, + "text_loss": 0.33984375 + }, + { + "epoch": 0.1, + "learning_rate": 9.744354416768676e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 1230, + "text_loss": 0.71875 + }, + { + "epoch": 0.1, + "learning_rate": 9.743942150194466e-06, + "loss": 0.6113, + "regression_loss": 0.0, + "step": 1231, + "text_loss": 0.6015625 + }, + { + "epoch": 0.1, + "learning_rate": 9.74352956020278e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 1232, + "text_loss": 0.515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.74311664682175e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 1233, + "text_loss": 0.41796875 + }, + { + "epoch": 0.1, + "learning_rate": 9.742703410079523e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 1234, + "text_loss": 0.63671875 + }, + { + "epoch": 0.1, + "learning_rate": 9.742289850004274e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 1235, + "text_loss": 0.578125 + }, + { + "epoch": 0.1, + "learning_rate": 9.741875966624196e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 1236, + "text_loss": 0.64453125 + }, + { + "epoch": 0.1, + "learning_rate": 9.741461759967508e-06, + "loss": 0.6741, + "regression_loss": 0.0, + "step": 1237, + "text_loss": 0.478515625 + }, + { + "epoch": 0.1, + "learning_rate": 9.741047230062443e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 1238, + "text_loss": 0.52734375 + }, + { + "epoch": 0.1, + "learning_rate": 9.740632376937265e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 1239, + "text_loss": 0.400390625 + }, + { + "epoch": 0.1, + "learning_rate": 9.740217200620257e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 1240, + "text_loss": 0.486328125 + }, + { + "epoch": 0.1, + "learning_rate": 9.739801701139724e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 1241, + "text_loss": 0.470703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.73938587852399e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 1242, + "text_loss": 0.578125 + }, + { + "epoch": 0.1, + "learning_rate": 9.738969732801404e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 1243, + "text_loss": 0.80859375 + }, + { + "epoch": 0.1, + "learning_rate": 9.738553264000339e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 1244, + "text_loss": 0.73046875 + }, + { + "epoch": 0.1, + "learning_rate": 9.738136472149186e-06, + "loss": 0.6211, + "regression_loss": 0.0, + "step": 1245, + "text_loss": 0.52734375 + }, + { + "epoch": 0.1, + "learning_rate": 9.737719357276361e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 1246, + "text_loss": 0.75390625 + }, + { + "epoch": 0.1, + "learning_rate": 9.7373019194103e-06, + "loss": 0.6804, + "regression_loss": 0.0, + "step": 1247, + "text_loss": 0.79296875 + }, + { + "epoch": 0.1, + "learning_rate": 9.73688415857946e-06, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 1248, + "text_loss": 0.66015625 + }, + { + "epoch": 0.1, + "learning_rate": 9.736466074812326e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 1249, + "text_loss": 0.69140625 + }, + { + "epoch": 0.1, + "learning_rate": 9.736047668137397e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 1250, + "text_loss": 0.396484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.735628938583198e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 1251, + "text_loss": 0.7734375 + }, + { + "epoch": 0.1, + "learning_rate": 9.735209886178276e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 1252, + "text_loss": 0.703125 + }, + { + "epoch": 0.1, + "learning_rate": 9.734790510951202e-06, + "loss": 0.6548, + "regression_loss": 0.0, + "step": 1253, + "text_loss": 0.65625 + }, + { + "epoch": 0.1, + "learning_rate": 9.734370812930566e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 1254, + "text_loss": 0.61328125 + }, + { + "epoch": 0.1, + "learning_rate": 9.733950792144979e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 1255, + "text_loss": 0.53125 + }, + { + "epoch": 0.1, + "learning_rate": 9.733530448623079e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 1256, + "text_loss": 0.7890625 + }, + { + "epoch": 0.1, + "learning_rate": 9.73310978239352e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 1257, + "text_loss": 0.80859375 + }, + { + "epoch": 0.1, + "learning_rate": 9.732688793484981e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 1258, + "text_loss": 0.79296875 + }, + { + "epoch": 0.1, + "learning_rate": 9.732267481926165e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 1259, + "text_loss": 0.3828125 + }, + { + "epoch": 0.1, + "learning_rate": 9.731845847745794e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 1260, + "text_loss": 0.486328125 + }, + { + "epoch": 0.1, + "learning_rate": 9.731423890972611e-06, + "loss": 0.7056, + "regression_loss": 0.0, + "step": 1261, + "text_loss": 0.6484375 + }, + { + "epoch": 0.1, + "learning_rate": 9.731001611635385e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 1262, + "text_loss": 0.7421875 + }, + { + "epoch": 0.1, + "learning_rate": 9.730579009762904e-06, + "loss": 0.6238, + "regression_loss": 0.0, + "step": 1263, + "text_loss": 0.52734375 + }, + { + "epoch": 0.11, + "learning_rate": 9.730156085383979e-06, + "loss": 0.6514, + "regression_loss": 0.0, + "step": 1264, + "text_loss": 0.5234375 + }, + { + "epoch": 0.11, + "learning_rate": 9.72973283852744e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 1265, + "text_loss": 0.59375 + }, + { + "epoch": 0.11, + "learning_rate": 9.72930926922215e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 1266, + "text_loss": 0.4296875 + }, + { + "epoch": 0.11, + "learning_rate": 9.728885377496975e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 1267, + "text_loss": 0.828125 + }, + { + "epoch": 0.11, + "learning_rate": 9.728461163380823e-06, + "loss": 0.6807, + "regression_loss": 0.0, + "step": 1268, + "text_loss": 0.796875 + }, + { + "epoch": 0.11, + "learning_rate": 9.728036626902607e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 1269, + "text_loss": 0.451171875 + }, + { + "epoch": 0.11, + "learning_rate": 9.727611768091276e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 1270, + "text_loss": 0.61328125 + }, + { + "epoch": 0.11, + "learning_rate": 9.727186586975791e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 1271, + "text_loss": 0.640625 + }, + { + "epoch": 0.11, + "learning_rate": 9.72676108358514e-06, + "loss": 0.7061, + "regression_loss": 0.0, + "step": 1272, + "text_loss": 0.5859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.726335257948332e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 1273, + "text_loss": 0.640625 + }, + { + "epoch": 0.11, + "learning_rate": 9.725909110094396e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 1274, + "text_loss": 0.515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.725482640052386e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 1275, + "text_loss": 0.408203125 + }, + { + "epoch": 0.11, + "learning_rate": 9.725055847851376e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 1276, + "text_loss": 0.6171875 + }, + { + "epoch": 0.11, + "learning_rate": 9.724628733520463e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 1277, + "text_loss": 0.67578125 + }, + { + "epoch": 0.11, + "learning_rate": 9.724201297088763e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 1278, + "text_loss": 0.5234375 + }, + { + "epoch": 0.11, + "learning_rate": 9.723773538585421e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 1279, + "text_loss": 0.34375 + }, + { + "epoch": 0.11, + "learning_rate": 9.723345458039595e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 1280, + "text_loss": 0.5234375 + }, + { + "epoch": 0.11, + "learning_rate": 9.722917055480471e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 1281, + "text_loss": 0.412109375 + }, + { + "epoch": 0.11, + "learning_rate": 9.722488330937256e-06, + "loss": 0.6553, + "regression_loss": 0.0, + "step": 1282, + "text_loss": 0.62890625 + }, + { + "epoch": 0.11, + "learning_rate": 9.722059284439179e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 1283, + "text_loss": 0.486328125 + }, + { + "epoch": 0.11, + "learning_rate": 9.721629916015486e-06, + "loss": 0.6821, + "regression_loss": 0.0, + "step": 1284, + "text_loss": 0.60546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.721200225695454e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 1285, + "text_loss": 0.66015625 + }, + { + "epoch": 0.11, + "learning_rate": 9.720770213508374e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 1286, + "text_loss": 0.5859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.720339879483563e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 1287, + "text_loss": 0.4375 + }, + { + "epoch": 0.11, + "learning_rate": 9.719909223650359e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 1288, + "text_loss": 0.32421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.719478246038121e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 1289, + "text_loss": 0.51953125 + }, + { + "epoch": 0.11, + "learning_rate": 9.719046946676233e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 1290, + "text_loss": 0.546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.718615325594096e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 1291, + "text_loss": 0.53515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.718183382821138e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 1292, + "text_loss": 0.39453125 + }, + { + "epoch": 0.11, + "learning_rate": 9.717751118386806e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 1293, + "text_loss": 0.4296875 + }, + { + "epoch": 0.11, + "learning_rate": 9.717318532320568e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 1294, + "text_loss": 0.482421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.716885624651919e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 1295, + "text_loss": 0.396484375 + }, + { + "epoch": 0.11, + "learning_rate": 9.716452395410367e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 1296, + "text_loss": 0.76953125 + }, + { + "epoch": 0.11, + "learning_rate": 9.716018844625453e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 1297, + "text_loss": 0.78515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.71558497232673e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 1298, + "text_loss": 0.671875 + }, + { + "epoch": 0.11, + "learning_rate": 9.71515077854378e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 1299, + "text_loss": 0.482421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.714716263306203e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 1300, + "text_loss": 0.357421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.714281426643622e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 1301, + "text_loss": 0.57421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.713846268585683e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 1302, + "text_loss": 0.34765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.71341078916205e-06, + "loss": 0.6064, + "regression_loss": 0.0, + "step": 1303, + "text_loss": 0.4765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.712974988402413e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 1304, + "text_loss": 0.296875 + }, + { + "epoch": 0.11, + "learning_rate": 9.712538866336485e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 1305, + "text_loss": 0.62109375 + }, + { + "epoch": 0.11, + "learning_rate": 9.712102422993995e-06, + "loss": 0.5813, + "regression_loss": 0.0, + "step": 1306, + "text_loss": 0.58984375 + }, + { + "epoch": 0.11, + "learning_rate": 9.711665658404702e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 1307, + "text_loss": 0.37890625 + }, + { + "epoch": 0.11, + "learning_rate": 9.711228572598376e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 1308, + "text_loss": 0.63671875 + }, + { + "epoch": 0.11, + "learning_rate": 9.71079116560482e-06, + "loss": 0.637, + "regression_loss": 0.0, + "step": 1309, + "text_loss": 0.59765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.710353437453853e-06, + "loss": 0.437, + "regression_loss": 0.0, + "step": 1310, + "text_loss": 0.4453125 + }, + { + "epoch": 0.11, + "learning_rate": 9.709915388175318e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 1311, + "text_loss": 0.7265625 + }, + { + "epoch": 0.11, + "learning_rate": 9.709477017799076e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 1312, + "text_loss": 0.3515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.709038326355015e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 1313, + "text_loss": 0.478515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.708599313873044e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 1314, + "text_loss": 0.59765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.70815998038309e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 1315, + "text_loss": 0.47265625 + }, + { + "epoch": 0.11, + "learning_rate": 9.707720325915105e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 1316, + "text_loss": 0.59765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.707280350499064e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 1317, + "text_loss": 0.640625 + }, + { + "epoch": 0.11, + "learning_rate": 9.70684005416496e-06, + "loss": 0.6729, + "regression_loss": 0.0, + "step": 1318, + "text_loss": 0.68359375 + }, + { + "epoch": 0.11, + "learning_rate": 9.706399436942811e-06, + "loss": 0.6226, + "regression_loss": 0.0, + "step": 1319, + "text_loss": 0.7421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.705958498862658e-06, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 1320, + "text_loss": 0.30859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.705517239954558e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 1321, + "text_loss": 0.515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.705075660248597e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 1322, + "text_loss": 0.400390625 + }, + { + "epoch": 0.11, + "learning_rate": 9.70463375977488e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 1323, + "text_loss": 0.515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.704191538563529e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 1324, + "text_loss": 0.443359375 + }, + { + "epoch": 0.11, + "learning_rate": 9.703748996644698e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 1325, + "text_loss": 0.734375 + }, + { + "epoch": 0.11, + "learning_rate": 9.703306134048553e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 1326, + "text_loss": 0.404296875 + }, + { + "epoch": 0.11, + "learning_rate": 9.702862950805289e-06, + "loss": 0.6475, + "regression_loss": 0.0, + "step": 1327, + "text_loss": 0.6015625 + }, + { + "epoch": 0.11, + "learning_rate": 9.702419446945118e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 1328, + "text_loss": 0.6875 + }, + { + "epoch": 0.11, + "learning_rate": 9.701975622498277e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 1329, + "text_loss": 0.396484375 + }, + { + "epoch": 0.11, + "learning_rate": 9.70153147749502e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 1330, + "text_loss": 0.5859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.701087011965633e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 1331, + "text_loss": 0.65625 + }, + { + "epoch": 0.11, + "learning_rate": 9.700642225940412e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 1332, + "text_loss": 0.7421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.700197119449685e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 1333, + "text_loss": 0.41796875 + }, + { + "epoch": 0.11, + "learning_rate": 9.699751692523793e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 1334, + "text_loss": 0.53515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.699305945193103e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 1335, + "text_loss": 0.55859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.698859877488004e-06, + "loss": 0.6433, + "regression_loss": 0.0, + "step": 1336, + "text_loss": 0.5859375 + }, + { + "epoch": 0.11, + "learning_rate": 9.69841348943891e-06, + "loss": 0.5612, + "regression_loss": 0.0, + "step": 1337, + "text_loss": 0.61328125 + }, + { + "epoch": 0.11, + "learning_rate": 9.69796678107625e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 1338, + "text_loss": 0.6328125 + }, + { + "epoch": 0.11, + "learning_rate": 9.69751975243048e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 1339, + "text_loss": 0.65625 + }, + { + "epoch": 0.11, + "learning_rate": 9.697072403532074e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 1340, + "text_loss": 0.59375 + }, + { + "epoch": 0.11, + "learning_rate": 9.696624734411532e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 1341, + "text_loss": 0.35546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.696176745099372e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 1342, + "text_loss": 0.4375 + }, + { + "epoch": 0.11, + "learning_rate": 9.695728435626137e-06, + "loss": 0.5819, + "regression_loss": 0.0, + "step": 1343, + "text_loss": 0.5390625 + }, + { + "epoch": 0.11, + "learning_rate": 9.695279806022391e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 1344, + "text_loss": 0.5390625 + }, + { + "epoch": 0.11, + "learning_rate": 9.694830856318718e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 1345, + "text_loss": 0.384765625 + }, + { + "epoch": 0.11, + "learning_rate": 9.694381586545724e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 1346, + "text_loss": 0.63671875 + }, + { + "epoch": 0.11, + "learning_rate": 9.69393199673404e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 1347, + "text_loss": 0.38671875 + }, + { + "epoch": 0.11, + "learning_rate": 9.693482086914317e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 1348, + "text_loss": 0.34375 + }, + { + "epoch": 0.11, + "learning_rate": 9.693031857117225e-06, + "loss": 0.6152, + "regression_loss": 0.0, + "step": 1349, + "text_loss": 0.9375 + }, + { + "epoch": 0.11, + "learning_rate": 9.692581307373461e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 1350, + "text_loss": 0.267578125 + }, + { + "epoch": 0.11, + "learning_rate": 9.692130437713739e-06, + "loss": 0.4338, + "regression_loss": 0.0, + "step": 1351, + "text_loss": 0.5546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.691679248168797e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 1352, + "text_loss": 0.376953125 + }, + { + "epoch": 0.11, + "learning_rate": 9.691227738769397e-06, + "loss": 0.6338, + "regression_loss": 0.0, + "step": 1353, + "text_loss": 0.33203125 + }, + { + "epoch": 0.11, + "learning_rate": 9.690775909546319e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 1354, + "text_loss": 0.46875 + }, + { + "epoch": 0.11, + "learning_rate": 9.690323760530367e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 1355, + "text_loss": 0.470703125 + }, + { + "epoch": 0.11, + "learning_rate": 9.689871291752366e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 1356, + "text_loss": 0.4140625 + }, + { + "epoch": 0.11, + "learning_rate": 9.689418503243163e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 1357, + "text_loss": 0.91796875 + }, + { + "epoch": 0.11, + "learning_rate": 9.688965395033628e-06, + "loss": 0.7375, + "regression_loss": 0.0, + "step": 1358, + "text_loss": 0.494140625 + }, + { + "epoch": 0.11, + "learning_rate": 9.688511967154647e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 1359, + "text_loss": 0.49609375 + }, + { + "epoch": 0.11, + "learning_rate": 9.688058219637137e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 1360, + "text_loss": 0.53125 + }, + { + "epoch": 0.11, + "learning_rate": 9.68760415251203e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 1361, + "text_loss": 0.75 + }, + { + "epoch": 0.11, + "learning_rate": 9.687149765810284e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 1362, + "text_loss": 0.5390625 + }, + { + "epoch": 0.11, + "learning_rate": 9.686695059562875e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 1363, + "text_loss": 0.6640625 + }, + { + "epoch": 0.11, + "learning_rate": 9.686240033800804e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 1364, + "text_loss": 0.5546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.68578468855509e-06, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 1365, + "text_loss": 0.57421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.685329023856777e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 1366, + "text_loss": 0.7421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.68487303973693e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 1367, + "text_loss": 0.60546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.684416736226637e-06, + "loss": 0.4548, + "regression_loss": 0.0, + "step": 1368, + "text_loss": 0.54296875 + }, + { + "epoch": 0.11, + "learning_rate": 9.683960113357004e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 1369, + "text_loss": 0.69140625 + }, + { + "epoch": 0.11, + "learning_rate": 9.683503171159163e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 1370, + "text_loss": 0.5546875 + }, + { + "epoch": 0.11, + "learning_rate": 9.683045909664266e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 1371, + "text_loss": 0.5 + }, + { + "epoch": 0.11, + "learning_rate": 9.682588328903486e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1372, + "text_loss": 0.6796875 + }, + { + "epoch": 0.11, + "learning_rate": 9.682130428908018e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 1373, + "text_loss": 0.52734375 + }, + { + "epoch": 0.11, + "learning_rate": 9.68167220970908e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 1374, + "text_loss": 0.46875 + }, + { + "epoch": 0.11, + "learning_rate": 9.681213671337913e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 1375, + "text_loss": 0.57421875 + }, + { + "epoch": 0.11, + "learning_rate": 9.680754813825774e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 1376, + "text_loss": 0.439453125 + }, + { + "epoch": 0.11, + "learning_rate": 9.680295637203947e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 1377, + "text_loss": 0.65234375 + }, + { + "epoch": 0.11, + "learning_rate": 9.679836141503735e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 1378, + "text_loss": 0.388671875 + }, + { + "epoch": 0.11, + "learning_rate": 9.679376326756468e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 1379, + "text_loss": 0.515625 + }, + { + "epoch": 0.11, + "learning_rate": 9.678916192993491e-06, + "loss": 0.6772, + "regression_loss": 0.0, + "step": 1380, + "text_loss": 0.87890625 + }, + { + "epoch": 0.11, + "learning_rate": 9.678455740246173e-06, + "loss": 0.5057, + "regression_loss": 0.0, + "step": 1381, + "text_loss": 0.6640625 + }, + { + "epoch": 0.11, + "learning_rate": 9.677994968545907e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 1382, + "text_loss": 0.51953125 + }, + { + "epoch": 0.11, + "learning_rate": 9.677533877924105e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 1383, + "text_loss": 0.59765625 + }, + { + "epoch": 0.12, + "learning_rate": 9.6770724684122e-06, + "loss": 0.6257, + "regression_loss": 0.0, + "step": 1384, + "text_loss": 0.65625 + }, + { + "epoch": 0.12, + "learning_rate": 9.676610740041655e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 1385, + "text_loss": 0.69140625 + }, + { + "epoch": 0.12, + "learning_rate": 9.67614869284394e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 1386, + "text_loss": 0.8203125 + }, + { + "epoch": 0.12, + "learning_rate": 9.67568632685056e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 1387, + "text_loss": 0.3515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.675223642093034e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 1388, + "text_loss": 0.33203125 + }, + { + "epoch": 0.12, + "learning_rate": 9.67476063860291e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 1389, + "text_loss": 0.330078125 + }, + { + "epoch": 0.12, + "learning_rate": 9.674297316411748e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 1390, + "text_loss": 0.6484375 + }, + { + "epoch": 0.12, + "learning_rate": 9.673833675551138e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 1391, + "text_loss": 0.486328125 + }, + { + "epoch": 0.12, + "learning_rate": 9.673369716052687e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 1392, + "text_loss": 0.6171875 + }, + { + "epoch": 0.12, + "learning_rate": 9.672905437948025e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 1393, + "text_loss": 0.65234375 + }, + { + "epoch": 0.12, + "learning_rate": 9.672440841268807e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 1394, + "text_loss": 0.30859375 + }, + { + "epoch": 0.12, + "learning_rate": 9.671975926046705e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 1395, + "text_loss": 0.46484375 + }, + { + "epoch": 0.12, + "learning_rate": 9.671510692313412e-06, + "loss": 0.5573, + "regression_loss": 0.0, + "step": 1396, + "text_loss": 0.8125 + }, + { + "epoch": 0.12, + "learning_rate": 9.67104514010065e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 1397, + "text_loss": 0.5703125 + }, + { + "epoch": 0.12, + "learning_rate": 9.670579269440155e-06, + "loss": 0.4653, + "regression_loss": 0.0, + "step": 1398, + "text_loss": 0.416015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.67011308036369e-06, + "loss": 0.6377, + "regression_loss": 0.0, + "step": 1399, + "text_loss": 0.8984375 + }, + { + "epoch": 0.12, + "learning_rate": 9.669646572903033e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 1400, + "text_loss": 0.5390625 + }, + { + "epoch": 0.12, + "learning_rate": 9.669179747089991e-06, + "loss": 0.6431, + "regression_loss": 0.0, + "step": 1401, + "text_loss": 0.73046875 + }, + { + "epoch": 0.12, + "learning_rate": 9.668712602956391e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 1402, + "text_loss": 0.60546875 + }, + { + "epoch": 0.12, + "learning_rate": 9.668245140534079e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 1403, + "text_loss": 0.478515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.667777359854922e-06, + "loss": 0.6567, + "regression_loss": 0.0, + "step": 1404, + "text_loss": 0.66015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.667309260950815e-06, + "loss": 0.6553, + "regression_loss": 0.0, + "step": 1405, + "text_loss": 0.63671875 + }, + { + "epoch": 0.12, + "learning_rate": 9.666840843853668e-06, + "loss": 0.6768, + "regression_loss": 0.0, + "step": 1406, + "text_loss": 0.796875 + }, + { + "epoch": 0.12, + "learning_rate": 9.666372108595415e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 1407, + "text_loss": 0.75 + }, + { + "epoch": 0.12, + "learning_rate": 9.665903055208013e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 1408, + "text_loss": 0.494140625 + }, + { + "epoch": 0.12, + "learning_rate": 9.66543368372344e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 1409, + "text_loss": 0.39453125 + }, + { + "epoch": 0.12, + "learning_rate": 9.664963994173695e-06, + "loss": 0.6787, + "regression_loss": 0.0, + "step": 1410, + "text_loss": 0.57421875 + }, + { + "epoch": 0.12, + "learning_rate": 9.6644939865908e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 1411, + "text_loss": 0.6640625 + }, + { + "epoch": 0.12, + "learning_rate": 9.664023661006794e-06, + "loss": 0.4436, + "regression_loss": 0.0, + "step": 1412, + "text_loss": 0.38671875 + }, + { + "epoch": 0.12, + "learning_rate": 9.663553017453744e-06, + "loss": 0.6396, + "regression_loss": 0.0, + "step": 1413, + "text_loss": 0.484375 + }, + { + "epoch": 0.12, + "learning_rate": 9.663082055963738e-06, + "loss": 0.6689, + "regression_loss": 0.0, + "step": 1414, + "text_loss": 0.546875 + }, + { + "epoch": 0.12, + "learning_rate": 9.66261077656888e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 1415, + "text_loss": 0.486328125 + }, + { + "epoch": 0.12, + "learning_rate": 9.6621391793013e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 1416, + "text_loss": 0.6640625 + }, + { + "epoch": 0.12, + "learning_rate": 9.661667264193151e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 1417, + "text_loss": 0.375 + }, + { + "epoch": 0.12, + "learning_rate": 9.661195031276605e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 1418, + "text_loss": 0.4921875 + }, + { + "epoch": 0.12, + "learning_rate": 9.660722480583855e-06, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 1419, + "text_loss": 0.3984375 + }, + { + "epoch": 0.12, + "learning_rate": 9.66024961214712e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 1420, + "text_loss": 0.59375 + }, + { + "epoch": 0.12, + "learning_rate": 9.659776425998636e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 1421, + "text_loss": 0.6640625 + }, + { + "epoch": 0.12, + "learning_rate": 9.659302922170661e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 1422, + "text_loss": 0.435546875 + }, + { + "epoch": 0.12, + "learning_rate": 9.658829100695477e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 1423, + "text_loss": 0.484375 + }, + { + "epoch": 0.12, + "learning_rate": 9.658354961605388e-06, + "loss": 0.675, + "regression_loss": 0.0, + "step": 1424, + "text_loss": 0.50390625 + }, + { + "epoch": 0.12, + "learning_rate": 9.657880504932718e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 1425, + "text_loss": 0.7578125 + }, + { + "epoch": 0.12, + "learning_rate": 9.657405730709813e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 1426, + "text_loss": 0.58984375 + }, + { + "epoch": 0.12, + "learning_rate": 9.656930638969037e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 1427, + "text_loss": 0.68359375 + }, + { + "epoch": 0.12, + "learning_rate": 9.656455229742784e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 1428, + "text_loss": 0.49609375 + }, + { + "epoch": 0.12, + "learning_rate": 9.655979503063467e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 1429, + "text_loss": 0.265625 + }, + { + "epoch": 0.12, + "learning_rate": 9.65550345896351e-06, + "loss": 0.6665, + "regression_loss": 0.0, + "step": 1430, + "text_loss": 0.609375 + }, + { + "epoch": 0.12, + "learning_rate": 9.655027097475375e-06, + "loss": 0.4437, + "regression_loss": 0.0, + "step": 1431, + "text_loss": 0.58203125 + }, + { + "epoch": 0.12, + "learning_rate": 9.654550418631534e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 1432, + "text_loss": 0.80859375 + }, + { + "epoch": 0.12, + "learning_rate": 9.654073422464485e-06, + "loss": 0.6355, + "regression_loss": 0.0, + "step": 1433, + "text_loss": 0.423828125 + }, + { + "epoch": 0.12, + "learning_rate": 9.65359610900675e-06, + "loss": 0.4652, + "regression_loss": 0.0, + "step": 1434, + "text_loss": 0.7421875 + }, + { + "epoch": 0.12, + "learning_rate": 9.653118478290864e-06, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 1435, + "text_loss": 0.3359375 + }, + { + "epoch": 0.12, + "learning_rate": 9.652640530349396e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 1436, + "text_loss": 0.71484375 + }, + { + "epoch": 0.12, + "learning_rate": 9.652162265214923e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 1437, + "text_loss": 0.50390625 + }, + { + "epoch": 0.12, + "learning_rate": 9.651683682920058e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 1438, + "text_loss": 0.76171875 + }, + { + "epoch": 0.12, + "learning_rate": 9.651204783497424e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 1439, + "text_loss": 0.462890625 + }, + { + "epoch": 0.12, + "learning_rate": 9.650725566979671e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 1440, + "text_loss": 0.4375 + }, + { + "epoch": 0.12, + "learning_rate": 9.650246033399467e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 1441, + "text_loss": 0.259765625 + }, + { + "epoch": 0.12, + "learning_rate": 9.649766182789509e-06, + "loss": 0.7078, + "regression_loss": 0.0, + "step": 1442, + "text_loss": 0.765625 + }, + { + "epoch": 0.12, + "learning_rate": 9.649286015182506e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 1443, + "text_loss": 0.671875 + }, + { + "epoch": 0.12, + "learning_rate": 9.648805530611196e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 1444, + "text_loss": 0.6171875 + }, + { + "epoch": 0.12, + "learning_rate": 9.648324729108335e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 1445, + "text_loss": 0.353515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.647843610706705e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 1446, + "text_loss": 0.8828125 + }, + { + "epoch": 0.12, + "learning_rate": 9.647362175439098e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 1447, + "text_loss": 0.72265625 + }, + { + "epoch": 0.12, + "learning_rate": 9.646880423338345e-06, + "loss": 0.4847, + "regression_loss": 0.0, + "step": 1448, + "text_loss": 0.515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.646398354437284e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 1449, + "text_loss": 0.4453125 + }, + { + "epoch": 0.12, + "learning_rate": 9.64591596876878e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 1450, + "text_loss": 0.546875 + }, + { + "epoch": 0.12, + "learning_rate": 9.645433266365722e-06, + "loss": 0.634, + "regression_loss": 0.0, + "step": 1451, + "text_loss": 0.66015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.644950247261016e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 1452, + "text_loss": 0.55078125 + }, + { + "epoch": 0.12, + "learning_rate": 9.644466911487593e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 1453, + "text_loss": 0.78515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.643983259078405e-06, + "loss": 0.6401, + "regression_loss": 0.0, + "step": 1454, + "text_loss": 0.74609375 + }, + { + "epoch": 0.12, + "learning_rate": 9.643499290066423e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 1455, + "text_loss": 0.60546875 + }, + { + "epoch": 0.12, + "learning_rate": 9.643015004484641e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 1456, + "text_loss": 0.6796875 + }, + { + "epoch": 0.12, + "learning_rate": 9.64253040236608e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 1457, + "text_loss": 0.5703125 + }, + { + "epoch": 0.12, + "learning_rate": 9.64204548374377e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 1458, + "text_loss": 0.66015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.641560248650777e-06, + "loss": 0.6445, + "regression_loss": 0.0, + "step": 1459, + "text_loss": 0.7734375 + }, + { + "epoch": 0.12, + "learning_rate": 9.641074697120178e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 1460, + "text_loss": 0.7109375 + }, + { + "epoch": 0.12, + "learning_rate": 9.640588829185078e-06, + "loss": 0.6392, + "regression_loss": 0.0, + "step": 1461, + "text_loss": 0.73046875 + }, + { + "epoch": 0.12, + "learning_rate": 9.6401026448786e-06, + "loss": 0.6675, + "regression_loss": 0.0, + "step": 1462, + "text_loss": 0.53125 + }, + { + "epoch": 0.12, + "learning_rate": 9.639616144233887e-06, + "loss": 0.75, + "regression_loss": 0.0, + "step": 1463, + "text_loss": 0.72265625 + }, + { + "epoch": 0.12, + "learning_rate": 9.639129327284108e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 1464, + "text_loss": 0.54296875 + }, + { + "epoch": 0.12, + "learning_rate": 9.638642194062456e-06, + "loss": 0.4806, + "regression_loss": 0.0, + "step": 1465, + "text_loss": 0.578125 + }, + { + "epoch": 0.12, + "learning_rate": 9.638154744602133e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 1466, + "text_loss": 0.80859375 + }, + { + "epoch": 0.12, + "learning_rate": 9.637666978936377e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 1467, + "text_loss": 0.244140625 + }, + { + "epoch": 0.12, + "learning_rate": 9.637178897098439e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 1468, + "text_loss": 0.5078125 + }, + { + "epoch": 0.12, + "learning_rate": 9.636690499121592e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 1469, + "text_loss": 0.8046875 + }, + { + "epoch": 0.12, + "learning_rate": 9.636201785039136e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 1470, + "text_loss": 0.44921875 + }, + { + "epoch": 0.12, + "learning_rate": 9.63571275488439e-06, + "loss": 0.4548, + "regression_loss": 0.0, + "step": 1471, + "text_loss": 0.298828125 + }, + { + "epoch": 0.12, + "learning_rate": 9.635223408690688e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 1472, + "text_loss": 0.5234375 + }, + { + "epoch": 0.12, + "learning_rate": 9.634733746491396e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 1473, + "text_loss": 0.66015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.634243768319894e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 1474, + "text_loss": 0.3984375 + }, + { + "epoch": 0.12, + "learning_rate": 9.633753474209587e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 1475, + "text_loss": 0.6875 + }, + { + "epoch": 0.12, + "learning_rate": 9.633262864193902e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 1476, + "text_loss": 0.40234375 + }, + { + "epoch": 0.12, + "learning_rate": 9.632771938306285e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 1477, + "text_loss": 0.384765625 + }, + { + "epoch": 0.12, + "learning_rate": 9.632280696580204e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 1478, + "text_loss": 0.2890625 + }, + { + "epoch": 0.12, + "learning_rate": 9.631789139049149e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 1479, + "text_loss": 0.87109375 + }, + { + "epoch": 0.12, + "learning_rate": 9.631297265746635e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 1480, + "text_loss": 0.64453125 + }, + { + "epoch": 0.12, + "learning_rate": 9.630805076706192e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 1481, + "text_loss": 0.73828125 + }, + { + "epoch": 0.12, + "learning_rate": 9.630312571961377e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 1482, + "text_loss": 0.3046875 + }, + { + "epoch": 0.12, + "learning_rate": 9.629819751545765e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 1483, + "text_loss": 0.439453125 + }, + { + "epoch": 0.12, + "learning_rate": 9.629326615492956e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 1484, + "text_loss": 0.328125 + }, + { + "epoch": 0.12, + "learning_rate": 9.628833163836569e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 1485, + "text_loss": 0.455078125 + }, + { + "epoch": 0.12, + "learning_rate": 9.628339396610242e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 1486, + "text_loss": 0.734375 + }, + { + "epoch": 0.12, + "learning_rate": 9.627845313847641e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 1487, + "text_loss": 0.25390625 + }, + { + "epoch": 0.12, + "learning_rate": 9.627350915582448e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 1488, + "text_loss": 0.490234375 + }, + { + "epoch": 0.12, + "learning_rate": 9.626856201848369e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 1489, + "text_loss": 0.69921875 + }, + { + "epoch": 0.12, + "learning_rate": 9.62636117267913e-06, + "loss": 0.4094, + "regression_loss": 0.0, + "step": 1490, + "text_loss": 0.287109375 + }, + { + "epoch": 0.12, + "learning_rate": 9.625865828108483e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 1491, + "text_loss": 0.6015625 + }, + { + "epoch": 0.12, + "learning_rate": 9.625370168170195e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 1492, + "text_loss": 0.263671875 + }, + { + "epoch": 0.12, + "learning_rate": 9.624874192898058e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 1493, + "text_loss": 0.6328125 + }, + { + "epoch": 0.12, + "learning_rate": 9.624377902325885e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 1494, + "text_loss": 0.54296875 + }, + { + "epoch": 0.12, + "learning_rate": 9.623881296487513e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 1495, + "text_loss": 0.498046875 + }, + { + "epoch": 0.12, + "learning_rate": 9.623384375416793e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 1496, + "text_loss": 0.318359375 + }, + { + "epoch": 0.12, + "learning_rate": 9.622887139147608e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 1497, + "text_loss": 0.80078125 + }, + { + "epoch": 0.12, + "learning_rate": 9.622389587713852e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 1498, + "text_loss": 0.478515625 + }, + { + "epoch": 0.12, + "learning_rate": 9.62189172114945e-06, + "loss": 0.6277, + "regression_loss": 0.0, + "step": 1499, + "text_loss": 0.283203125 + }, + { + "epoch": 0.12, + "learning_rate": 9.62139353948834e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 1500, + "text_loss": 0.68359375 + }, + { + "epoch": 0.12, + "learning_rate": 9.620895042764489e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 1501, + "text_loss": 0.298828125 + }, + { + "epoch": 0.12, + "learning_rate": 9.62039623101188e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 1502, + "text_loss": 0.6875 + }, + { + "epoch": 0.12, + "learning_rate": 9.619897104264519e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 1503, + "text_loss": 0.58984375 + }, + { + "epoch": 0.12, + "learning_rate": 9.619397662556434e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 1504, + "text_loss": 0.259765625 + }, + { + "epoch": 0.13, + "learning_rate": 9.618897905921675e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 1505, + "text_loss": 0.52734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.618397834394316e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 1506, + "text_loss": 0.78125 + }, + { + "epoch": 0.13, + "learning_rate": 9.617897448008441e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1507, + "text_loss": 0.6796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.617396746798173e-06, + "loss": 0.6626, + "regression_loss": 0.0, + "step": 1508, + "text_loss": 0.6953125 + }, + { + "epoch": 0.13, + "learning_rate": 9.616895730797639e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 1509, + "text_loss": 0.6796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.616394400041003e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 1510, + "text_loss": 0.404296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.615892754562439e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 1511, + "text_loss": 0.8046875 + }, + { + "epoch": 0.13, + "learning_rate": 9.615390794396147e-06, + "loss": 0.6331, + "regression_loss": 0.0, + "step": 1512, + "text_loss": 0.51953125 + }, + { + "epoch": 0.13, + "learning_rate": 9.614888519576348e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 1513, + "text_loss": 0.416015625 + }, + { + "epoch": 0.13, + "learning_rate": 9.614385930137286e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 1514, + "text_loss": 0.70703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.613883026113222e-06, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 1515, + "text_loss": 0.404296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.613379807538445e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 1516, + "text_loss": 0.4609375 + }, + { + "epoch": 0.13, + "learning_rate": 9.61287627444726e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 1517, + "text_loss": 0.625 + }, + { + "epoch": 0.13, + "learning_rate": 9.612372426873997e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 1518, + "text_loss": 0.63671875 + }, + { + "epoch": 0.13, + "learning_rate": 9.611868264853002e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 1519, + "text_loss": 0.62109375 + }, + { + "epoch": 0.13, + "learning_rate": 9.611363788418648e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 1520, + "text_loss": 0.515625 + }, + { + "epoch": 0.13, + "learning_rate": 9.610858997605329e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 1521, + "text_loss": 0.69140625 + }, + { + "epoch": 0.13, + "learning_rate": 9.610353892447457e-06, + "loss": 0.6326, + "regression_loss": 0.0, + "step": 1522, + "text_loss": 0.427734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.609848472979471e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 1523, + "text_loss": 0.47265625 + }, + { + "epoch": 0.13, + "learning_rate": 9.609342739235822e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 1524, + "text_loss": 0.6953125 + }, + { + "epoch": 0.13, + "learning_rate": 9.608836691250993e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 1525, + "text_loss": 0.5390625 + }, + { + "epoch": 0.13, + "learning_rate": 9.608330329059484e-06, + "loss": 0.6257, + "regression_loss": 0.0, + "step": 1526, + "text_loss": 0.65234375 + }, + { + "epoch": 0.13, + "learning_rate": 9.607823652695813e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 1527, + "text_loss": 0.4140625 + }, + { + "epoch": 0.13, + "learning_rate": 9.607316662194523e-06, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 1528, + "text_loss": 0.56640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.60680935759018e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 1529, + "text_loss": 0.49609375 + }, + { + "epoch": 0.13, + "learning_rate": 9.60630173891737e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 1530, + "text_loss": 0.341796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.605793806210697e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 1531, + "text_loss": 0.82421875 + }, + { + "epoch": 0.13, + "learning_rate": 9.605285559504791e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 1532, + "text_loss": 0.5703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.6047769988343e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 1533, + "text_loss": 0.455078125 + }, + { + "epoch": 0.13, + "learning_rate": 9.604268124233897e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 1534, + "text_loss": 0.484375 + }, + { + "epoch": 0.13, + "learning_rate": 9.603758935738274e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 1535, + "text_loss": 0.6328125 + }, + { + "epoch": 0.13, + "learning_rate": 9.603249433382145e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 1536, + "text_loss": 0.578125 + }, + { + "epoch": 0.13, + "learning_rate": 9.602739617200244e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 1537, + "text_loss": 0.66796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.602229487227328e-06, + "loss": 0.635, + "regression_loss": 0.0, + "step": 1538, + "text_loss": 0.396484375 + }, + { + "epoch": 0.13, + "learning_rate": 9.601719043498176e-06, + "loss": 0.6685, + "regression_loss": 0.0, + "step": 1539, + "text_loss": 0.80859375 + }, + { + "epoch": 0.13, + "learning_rate": 9.601208286047587e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 1540, + "text_loss": 0.5546875 + }, + { + "epoch": 0.13, + "learning_rate": 9.600697214910381e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 1541, + "text_loss": 0.578125 + }, + { + "epoch": 0.13, + "learning_rate": 9.600185830121401e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 1542, + "text_loss": 0.51171875 + }, + { + "epoch": 0.13, + "learning_rate": 9.59967413171551e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 1543, + "text_loss": 0.421875 + }, + { + "epoch": 0.13, + "learning_rate": 9.599162119727594e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 1544, + "text_loss": 0.76953125 + }, + { + "epoch": 0.13, + "learning_rate": 9.59864979419256e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 1545, + "text_loss": 0.78125 + }, + { + "epoch": 0.13, + "learning_rate": 9.598137155145333e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 1546, + "text_loss": 0.494140625 + }, + { + "epoch": 0.13, + "learning_rate": 9.597624202620864e-06, + "loss": 0.761, + "regression_loss": 0.0, + "step": 1547, + "text_loss": 0.75390625 + }, + { + "epoch": 0.13, + "learning_rate": 9.597110936654121e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 1548, + "text_loss": 0.56640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.5965973572801e-06, + "loss": 0.6394, + "regression_loss": 0.0, + "step": 1549, + "text_loss": 0.6171875 + }, + { + "epoch": 0.13, + "learning_rate": 9.59608346453381e-06, + "loss": 0.6375, + "regression_loss": 0.0, + "step": 1550, + "text_loss": 0.328125 + }, + { + "epoch": 0.13, + "learning_rate": 9.59556925845029e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 1551, + "text_loss": 0.373046875 + }, + { + "epoch": 0.13, + "learning_rate": 9.595054739064591e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 1552, + "text_loss": 0.421875 + }, + { + "epoch": 0.13, + "learning_rate": 9.594539906411794e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 1553, + "text_loss": 0.357421875 + }, + { + "epoch": 0.13, + "learning_rate": 9.594024760526996e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 1554, + "text_loss": 0.78125 + }, + { + "epoch": 0.13, + "learning_rate": 9.593509301445318e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 1555, + "text_loss": 0.5546875 + }, + { + "epoch": 0.13, + "learning_rate": 9.5929935292019e-06, + "loss": 0.6375, + "regression_loss": 0.0, + "step": 1556, + "text_loss": 0.640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.592477443831905e-06, + "loss": 0.5333, + "regression_loss": 0.0, + "step": 1557, + "text_loss": 0.546875 + }, + { + "epoch": 0.13, + "learning_rate": 9.591961045370517e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 1558, + "text_loss": 0.6875 + }, + { + "epoch": 0.13, + "learning_rate": 9.591444333852944e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 1559, + "text_loss": 0.63671875 + }, + { + "epoch": 0.13, + "learning_rate": 9.590927309314408e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 1560, + "text_loss": 0.47265625 + }, + { + "epoch": 0.13, + "learning_rate": 9.590409971790162e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 1561, + "text_loss": 0.8671875 + }, + { + "epoch": 0.13, + "learning_rate": 9.58989232131547e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 1562, + "text_loss": 0.4609375 + }, + { + "epoch": 0.13, + "learning_rate": 9.589374357925628e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 1563, + "text_loss": 0.361328125 + }, + { + "epoch": 0.13, + "learning_rate": 9.588856081655943e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 1564, + "text_loss": 0.56640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.588337492541755e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 1565, + "text_loss": 0.45703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.587818590618413e-06, + "loss": 0.5865, + "regression_loss": 0.0, + "step": 1566, + "text_loss": 0.8359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.587299375921294e-06, + "loss": 0.7119, + "regression_loss": 0.0, + "step": 1567, + "text_loss": 0.447265625 + }, + { + "epoch": 0.13, + "learning_rate": 9.586779848485797e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 1568, + "text_loss": 0.404296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.586260008347341e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 1569, + "text_loss": 0.68359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.585739855541363e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 1570, + "text_loss": 0.54296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.585219390103327e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 1571, + "text_loss": 0.70703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.584698612068716e-06, + "loss": 0.6768, + "regression_loss": 0.0, + "step": 1572, + "text_loss": 0.9921875 + }, + { + "epoch": 0.13, + "learning_rate": 9.584177521473033e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 1573, + "text_loss": 0.443359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.583656118351801e-06, + "loss": 0.6365, + "regression_loss": 0.0, + "step": 1574, + "text_loss": 0.640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.583134402740571e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 1575, + "text_loss": 0.85546875 + }, + { + "epoch": 0.13, + "learning_rate": 9.582612374674909e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 1576, + "text_loss": 0.484375 + }, + { + "epoch": 0.13, + "learning_rate": 9.582090034190402e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 1577, + "text_loss": 0.56640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.581567381322664e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 1578, + "text_loss": 0.54296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.581044416107323e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 1579, + "text_loss": 0.59375 + }, + { + "epoch": 0.13, + "learning_rate": 9.580521138580035e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 1580, + "text_loss": 0.515625 + }, + { + "epoch": 0.13, + "learning_rate": 9.579997548776475e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 1581, + "text_loss": 0.78125 + }, + { + "epoch": 0.13, + "learning_rate": 9.579473646732335e-06, + "loss": 0.5891, + "regression_loss": 0.0, + "step": 1582, + "text_loss": 0.671875 + }, + { + "epoch": 0.13, + "learning_rate": 9.578949432483335e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 1583, + "text_loss": 0.478515625 + }, + { + "epoch": 0.13, + "learning_rate": 9.578424906065213e-06, + "loss": 0.6714, + "regression_loss": 0.0, + "step": 1584, + "text_loss": 0.78125 + }, + { + "epoch": 0.13, + "learning_rate": 9.577900067513729e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 1585, + "text_loss": 0.68359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.577374916864659e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 1586, + "text_loss": 0.55078125 + }, + { + "epoch": 0.13, + "learning_rate": 9.57684945415381e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 1587, + "text_loss": 0.546875 + }, + { + "epoch": 0.13, + "learning_rate": 9.576323679417005e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 1588, + "text_loss": 0.439453125 + }, + { + "epoch": 0.13, + "learning_rate": 9.575797592690088e-06, + "loss": 0.6892, + "regression_loss": 0.0, + "step": 1589, + "text_loss": 0.73828125 + }, + { + "epoch": 0.13, + "learning_rate": 9.575271194008923e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 1590, + "text_loss": 0.77734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.5747444834094e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 1591, + "text_loss": 0.59765625 + }, + { + "epoch": 0.13, + "learning_rate": 9.574217460927424e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 1592, + "text_loss": 0.3984375 + }, + { + "epoch": 0.13, + "learning_rate": 9.573690126598927e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 1593, + "text_loss": 0.6796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.57316248045986e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 1594, + "text_loss": 0.640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.572634522546196e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 1595, + "text_loss": 0.5234375 + }, + { + "epoch": 0.13, + "learning_rate": 9.572106252893927e-06, + "loss": 0.614, + "regression_loss": 0.0, + "step": 1596, + "text_loss": 0.6328125 + }, + { + "epoch": 0.13, + "learning_rate": 9.571577671539068e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 1597, + "text_loss": 0.4140625 + }, + { + "epoch": 0.13, + "learning_rate": 9.571048778517655e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 1598, + "text_loss": 0.70703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.570519573865745e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 1599, + "text_loss": 0.53125 + }, + { + "epoch": 0.13, + "learning_rate": 9.569990057619414e-06, + "loss": 0.7056, + "regression_loss": 0.0, + "step": 1600, + "text_loss": 0.70703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.569460229814766e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 1601, + "text_loss": 0.462890625 + }, + { + "epoch": 0.13, + "learning_rate": 9.568930090487921e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 1602, + "text_loss": 0.83203125 + }, + { + "epoch": 0.13, + "learning_rate": 9.56839963967502e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 1603, + "text_loss": 0.43359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.567868877412227e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 1604, + "text_loss": 0.390625 + }, + { + "epoch": 0.13, + "learning_rate": 9.567337803735725e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 1605, + "text_loss": 0.5703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.566806418681722e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 1606, + "text_loss": 0.4296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.566274722286443e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 1607, + "text_loss": 0.91796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.56574271458614e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 1608, + "text_loss": 0.921875 + }, + { + "epoch": 0.13, + "learning_rate": 9.565210395617079e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 1609, + "text_loss": 0.62890625 + }, + { + "epoch": 0.13, + "learning_rate": 9.564677765415552e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 1610, + "text_loss": 0.5625 + }, + { + "epoch": 0.13, + "learning_rate": 9.56414482401787e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 1611, + "text_loss": 0.33984375 + }, + { + "epoch": 0.13, + "learning_rate": 9.563611571460367e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 1612, + "text_loss": 0.81640625 + }, + { + "epoch": 0.13, + "learning_rate": 9.563078007779398e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 1613, + "text_loss": 0.76953125 + }, + { + "epoch": 0.13, + "learning_rate": 9.56254413301134e-06, + "loss": 0.6755, + "regression_loss": 0.0, + "step": 1614, + "text_loss": 0.474609375 + }, + { + "epoch": 0.13, + "learning_rate": 9.562009947192585e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 1615, + "text_loss": 0.29296875 + }, + { + "epoch": 0.13, + "learning_rate": 9.561475450359556e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 1616, + "text_loss": 0.2734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.560940642548689e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 1617, + "text_loss": 0.7734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.560405523796446e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 1618, + "text_loss": 0.53125 + }, + { + "epoch": 0.13, + "learning_rate": 9.559870094139307e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 1619, + "text_loss": 0.419921875 + }, + { + "epoch": 0.13, + "learning_rate": 9.559334353613778e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 1620, + "text_loss": 0.52734375 + }, + { + "epoch": 0.13, + "learning_rate": 9.55879830225638e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 1621, + "text_loss": 0.466796875 + }, + { + "epoch": 0.13, + "learning_rate": 9.55826194010366e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 1622, + "text_loss": 0.45703125 + }, + { + "epoch": 0.13, + "learning_rate": 9.557725267192183e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 1623, + "text_loss": 0.359375 + }, + { + "epoch": 0.13, + "learning_rate": 9.557188283558537e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 1624, + "text_loss": 0.625 + }, + { + "epoch": 0.14, + "learning_rate": 9.556650989239331e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 1625, + "text_loss": 0.408203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.556113384271196e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 1626, + "text_loss": 0.515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.555575468690782e-06, + "loss": 0.5497, + "regression_loss": 0.0, + "step": 1627, + "text_loss": 0.412109375 + }, + { + "epoch": 0.14, + "learning_rate": 9.555037242534761e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 1628, + "text_loss": 0.4921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.554498705839827e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 1629, + "text_loss": 0.470703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.553959858642694e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 1630, + "text_loss": 0.53515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.5534207009801e-06, + "loss": 0.7148, + "regression_loss": 0.0, + "step": 1631, + "text_loss": 0.97265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.5528812328888e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 1632, + "text_loss": 0.52734375 + }, + { + "epoch": 0.14, + "learning_rate": 9.55234145440557e-06, + "loss": 0.4674, + "regression_loss": 0.0, + "step": 1633, + "text_loss": 0.71875 + }, + { + "epoch": 0.14, + "learning_rate": 9.551801365567214e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 1634, + "text_loss": 0.419921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.55126096641055e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 1635, + "text_loss": 0.703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.55072025697242e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 1636, + "text_loss": 0.67578125 + }, + { + "epoch": 0.14, + "learning_rate": 9.550179237289687e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 1637, + "text_loss": 0.70703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.549637907399234e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 1638, + "text_loss": 0.68359375 + }, + { + "epoch": 0.14, + "learning_rate": 9.549096267337968e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 1639, + "text_loss": 0.50390625 + }, + { + "epoch": 0.14, + "learning_rate": 9.548554317142811e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 1640, + "text_loss": 0.4296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.548012056850715e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 1641, + "text_loss": 0.84765625 + }, + { + "epoch": 0.14, + "learning_rate": 9.547469486498648e-06, + "loss": 0.5663, + "regression_loss": 0.0, + "step": 1642, + "text_loss": 0.44921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.546926606123597e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 1643, + "text_loss": 0.83203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.546383415762574e-06, + "loss": 0.636, + "regression_loss": 0.0, + "step": 1644, + "text_loss": 0.53125 + }, + { + "epoch": 0.14, + "learning_rate": 9.545839915452612e-06, + "loss": 0.5365, + "regression_loss": 0.0, + "step": 1645, + "text_loss": 0.515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.545296105230763e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 1646, + "text_loss": 0.640625 + }, + { + "epoch": 0.14, + "learning_rate": 9.544751985134102e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 1647, + "text_loss": 0.921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.544207555199722e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 1648, + "text_loss": 0.296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.543662815464743e-06, + "loss": 0.6355, + "regression_loss": 0.0, + "step": 1649, + "text_loss": 0.279296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.543117765966299e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 1650, + "text_loss": 0.5859375 + }, + { + "epoch": 0.14, + "learning_rate": 9.542572406741551e-06, + "loss": 0.6384, + "regression_loss": 0.0, + "step": 1651, + "text_loss": 0.71484375 + }, + { + "epoch": 0.14, + "learning_rate": 9.542026737827678e-06, + "loss": 0.6694, + "regression_loss": 0.0, + "step": 1652, + "text_loss": 1.015625 + }, + { + "epoch": 0.14, + "learning_rate": 9.54148075926188e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 1653, + "text_loss": 0.515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.540934471081382e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 1654, + "text_loss": 0.64453125 + }, + { + "epoch": 0.14, + "learning_rate": 9.540387873323424e-06, + "loss": 0.4501, + "regression_loss": 0.0, + "step": 1655, + "text_loss": 0.43359375 + }, + { + "epoch": 0.14, + "learning_rate": 9.539840966025272e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 1656, + "text_loss": 0.72265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.539293749224207e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 1657, + "text_loss": 0.81640625 + }, + { + "epoch": 0.14, + "learning_rate": 9.538746222957542e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 1658, + "text_loss": 0.81640625 + }, + { + "epoch": 0.14, + "learning_rate": 9.538198387262602e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 1659, + "text_loss": 0.3515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.537650242176735e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 1660, + "text_loss": 0.39453125 + }, + { + "epoch": 0.14, + "learning_rate": 9.53710178773731e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 1661, + "text_loss": 0.470703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.536553023981718e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 1662, + "text_loss": 0.62109375 + }, + { + "epoch": 0.14, + "learning_rate": 9.536003950947372e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 1663, + "text_loss": 0.443359375 + }, + { + "epoch": 0.14, + "learning_rate": 9.535454568671705e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 1664, + "text_loss": 0.62109375 + }, + { + "epoch": 0.14, + "learning_rate": 9.534904877192169e-06, + "loss": 0.5787, + "regression_loss": 0.0, + "step": 1665, + "text_loss": 0.9140625 + }, + { + "epoch": 0.14, + "learning_rate": 9.534354876546242e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 1666, + "text_loss": 0.828125 + }, + { + "epoch": 0.14, + "learning_rate": 9.533804566771418e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 1667, + "text_loss": 0.51953125 + }, + { + "epoch": 0.14, + "learning_rate": 9.533253947905217e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 1668, + "text_loss": 0.365234375 + }, + { + "epoch": 0.14, + "learning_rate": 9.532703019985172e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 1669, + "text_loss": 0.46875 + }, + { + "epoch": 0.14, + "learning_rate": 9.532151783048848e-06, + "loss": 0.6597, + "regression_loss": 0.0, + "step": 1670, + "text_loss": 0.69921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.531600237133825e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 1671, + "text_loss": 0.5703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.5310483822777e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 1672, + "text_loss": 0.66015625 + }, + { + "epoch": 0.14, + "learning_rate": 9.530496218518099e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 1673, + "text_loss": 0.51171875 + }, + { + "epoch": 0.14, + "learning_rate": 9.529943745892667e-06, + "loss": 0.6063, + "regression_loss": 0.0, + "step": 1674, + "text_loss": 0.7578125 + }, + { + "epoch": 0.14, + "learning_rate": 9.529390964439064e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 1675, + "text_loss": 0.58203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.528837874194982e-06, + "loss": 0.6104, + "regression_loss": 0.0, + "step": 1676, + "text_loss": 0.5546875 + }, + { + "epoch": 0.14, + "learning_rate": 9.528284475198122e-06, + "loss": 0.7227, + "regression_loss": 0.0, + "step": 1677, + "text_loss": 0.5234375 + }, + { + "epoch": 0.14, + "learning_rate": 9.527730767486217e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 1678, + "text_loss": 0.380859375 + }, + { + "epoch": 0.14, + "learning_rate": 9.52717675109701e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 1679, + "text_loss": 0.47265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.526622426068277e-06, + "loss": 0.6055, + "regression_loss": 0.0, + "step": 1680, + "text_loss": 0.8203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.526067792437806e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 1681, + "text_loss": 0.3359375 + }, + { + "epoch": 0.14, + "learning_rate": 9.525512850243409e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 1682, + "text_loss": 0.74609375 + }, + { + "epoch": 0.14, + "learning_rate": 9.524957599522917e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 1683, + "text_loss": 0.314453125 + }, + { + "epoch": 0.14, + "learning_rate": 9.52440204031419e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 1684, + "text_loss": 0.50390625 + }, + { + "epoch": 0.14, + "learning_rate": 9.523846172655097e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 1685, + "text_loss": 0.54296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.52328999658354e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 1686, + "text_loss": 0.33203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.522733512137433e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 1687, + "text_loss": 0.482421875 + }, + { + "epoch": 0.14, + "learning_rate": 9.522176719354712e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 1688, + "text_loss": 0.78515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.52161961827334e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 1689, + "text_loss": 0.7109375 + }, + { + "epoch": 0.14, + "learning_rate": 9.521062208931295e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 1690, + "text_loss": 0.291015625 + }, + { + "epoch": 0.14, + "learning_rate": 9.520504491366581e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 1691, + "text_loss": 0.6796875 + }, + { + "epoch": 0.14, + "learning_rate": 9.519946465617217e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 1692, + "text_loss": 0.5 + }, + { + "epoch": 0.14, + "learning_rate": 9.51938813172125e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 1693, + "text_loss": 0.32421875 + }, + { + "epoch": 0.14, + "learning_rate": 9.518829489716739e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 1694, + "text_loss": 0.423828125 + }, + { + "epoch": 0.14, + "learning_rate": 9.518270539641775e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 1695, + "text_loss": 0.57421875 + }, + { + "epoch": 0.14, + "learning_rate": 9.51771128153446e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 1696, + "text_loss": 0.58984375 + }, + { + "epoch": 0.14, + "learning_rate": 9.517151715432925e-06, + "loss": 0.6326, + "regression_loss": 0.0, + "step": 1697, + "text_loss": 0.9140625 + }, + { + "epoch": 0.14, + "learning_rate": 9.516591841375316e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 1698, + "text_loss": 0.447265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.516031659399803e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 1699, + "text_loss": 0.322265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.515471169544574e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 1700, + "text_loss": 0.6953125 + }, + { + "epoch": 0.14, + "learning_rate": 9.514910371847845e-06, + "loss": 0.4923, + "regression_loss": 0.0, + "step": 1701, + "text_loss": 0.5703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.514349266347844e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 1702, + "text_loss": 0.361328125 + }, + { + "epoch": 0.14, + "learning_rate": 9.513787853082827e-06, + "loss": 0.717, + "regression_loss": 0.0, + "step": 1703, + "text_loss": 0.8203125 + }, + { + "epoch": 0.14, + "learning_rate": 9.513226132091066e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 1704, + "text_loss": 0.765625 + }, + { + "epoch": 0.14, + "learning_rate": 9.512664103410859e-06, + "loss": 0.6714, + "regression_loss": 0.0, + "step": 1705, + "text_loss": 0.71875 + }, + { + "epoch": 0.14, + "learning_rate": 9.512101767080519e-06, + "loss": 0.6365, + "regression_loss": 0.0, + "step": 1706, + "text_loss": 0.921875 + }, + { + "epoch": 0.14, + "learning_rate": 9.511539123138387e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 1707, + "text_loss": 0.62109375 + }, + { + "epoch": 0.14, + "learning_rate": 9.510976171622818e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 1708, + "text_loss": 0.84375 + }, + { + "epoch": 0.14, + "learning_rate": 9.510412912572192e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 1709, + "text_loss": 0.57421875 + }, + { + "epoch": 0.14, + "learning_rate": 9.50984934602491e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 1710, + "text_loss": 0.6953125 + }, + { + "epoch": 0.14, + "learning_rate": 9.509285472019392e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 1711, + "text_loss": 0.5078125 + }, + { + "epoch": 0.14, + "learning_rate": 9.50872129059408e-06, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 1712, + "text_loss": 0.498046875 + }, + { + "epoch": 0.14, + "learning_rate": 9.508156801787437e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 1713, + "text_loss": 0.373046875 + }, + { + "epoch": 0.14, + "learning_rate": 9.507592005637946e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 1714, + "text_loss": 0.326171875 + }, + { + "epoch": 0.14, + "learning_rate": 9.507026902184116e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 1715, + "text_loss": 0.404296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.506461491464469e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 1716, + "text_loss": 0.3828125 + }, + { + "epoch": 0.14, + "learning_rate": 9.505895773517553e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 1717, + "text_loss": 0.59765625 + }, + { + "epoch": 0.14, + "learning_rate": 9.505329748381933e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 1718, + "text_loss": 0.51953125 + }, + { + "epoch": 0.14, + "learning_rate": 9.504763416096204e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 1719, + "text_loss": 0.67578125 + }, + { + "epoch": 0.14, + "learning_rate": 9.504196776698969e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 1720, + "text_loss": 0.361328125 + }, + { + "epoch": 0.14, + "learning_rate": 9.503629830228861e-06, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 1721, + "text_loss": 0.359375 + }, + { + "epoch": 0.14, + "learning_rate": 9.503062576724534e-06, + "loss": 0.7695, + "regression_loss": 0.0, + "step": 1722, + "text_loss": 0.4375 + }, + { + "epoch": 0.14, + "learning_rate": 9.502495016224657e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 1723, + "text_loss": 0.8828125 + }, + { + "epoch": 0.14, + "learning_rate": 9.501927148767924e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 1724, + "text_loss": 0.341796875 + }, + { + "epoch": 0.14, + "learning_rate": 9.501358974393052e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 1725, + "text_loss": 0.263671875 + }, + { + "epoch": 0.14, + "learning_rate": 9.500790493138772e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 1726, + "text_loss": 0.41796875 + }, + { + "epoch": 0.14, + "learning_rate": 9.500221705043842e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 1727, + "text_loss": 0.6484375 + }, + { + "epoch": 0.14, + "learning_rate": 9.49965261014704e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 1728, + "text_loss": 0.52734375 + }, + { + "epoch": 0.14, + "learning_rate": 9.499083208487164e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 1729, + "text_loss": 0.66796875 + }, + { + "epoch": 0.14, + "learning_rate": 9.498513500103029e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 1730, + "text_loss": 0.671875 + }, + { + "epoch": 0.14, + "learning_rate": 9.49794348503348e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 1731, + "text_loss": 0.470703125 + }, + { + "epoch": 0.14, + "learning_rate": 9.497373163317376e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 1732, + "text_loss": 0.5390625 + }, + { + "epoch": 0.14, + "learning_rate": 9.496802534993596e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 1733, + "text_loss": 0.416015625 + }, + { + "epoch": 0.14, + "learning_rate": 9.496231600101044e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 1734, + "text_loss": 0.53125 + }, + { + "epoch": 0.14, + "learning_rate": 9.495660358678646e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 1735, + "text_loss": 0.453125 + }, + { + "epoch": 0.14, + "learning_rate": 9.495088810765343e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 1736, + "text_loss": 0.54296875 + }, + { + "epoch": 0.14, + "learning_rate": 9.494516956400102e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 1737, + "text_loss": 0.640625 + }, + { + "epoch": 0.14, + "learning_rate": 9.49394479562191e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 1738, + "text_loss": 0.7890625 + }, + { + "epoch": 0.14, + "learning_rate": 9.49337232846977e-06, + "loss": 0.6638, + "regression_loss": 0.0, + "step": 1739, + "text_loss": 0.98828125 + }, + { + "epoch": 0.14, + "learning_rate": 9.492799554982713e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1740, + "text_loss": 0.451171875 + }, + { + "epoch": 0.14, + "learning_rate": 9.492226475199788e-06, + "loss": 0.5879, + "regression_loss": 0.0, + "step": 1741, + "text_loss": 0.322265625 + }, + { + "epoch": 0.14, + "learning_rate": 9.491653089160063e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 1742, + "text_loss": 0.41796875 + }, + { + "epoch": 0.14, + "learning_rate": 9.491079396902628e-06, + "loss": 0.6375, + "regression_loss": 0.0, + "step": 1743, + "text_loss": 0.478515625 + }, + { + "epoch": 0.14, + "learning_rate": 9.490505398466596e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 1744, + "text_loss": 0.54296875 + }, + { + "epoch": 0.15, + "learning_rate": 9.489931093891099e-06, + "loss": 0.6504, + "regression_loss": 0.0, + "step": 1745, + "text_loss": 0.73046875 + }, + { + "epoch": 0.15, + "learning_rate": 9.48935648321529e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 1746, + "text_loss": 0.4921875 + }, + { + "epoch": 0.15, + "learning_rate": 9.488781566478341e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 1747, + "text_loss": 0.6015625 + }, + { + "epoch": 0.15, + "learning_rate": 9.488206343719453e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 1748, + "text_loss": 0.56640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.487630814977833e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 1749, + "text_loss": 0.30078125 + }, + { + "epoch": 0.15, + "learning_rate": 9.487054980292724e-06, + "loss": 0.6704, + "regression_loss": 0.0, + "step": 1750, + "text_loss": 0.71875 + }, + { + "epoch": 0.15, + "learning_rate": 9.486478839703383e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 1751, + "text_loss": 0.310546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.485902393249085e-06, + "loss": 0.6523, + "regression_loss": 0.0, + "step": 1752, + "text_loss": 0.337890625 + }, + { + "epoch": 0.15, + "learning_rate": 9.485325640969129e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 1753, + "text_loss": 0.8125 + }, + { + "epoch": 0.15, + "learning_rate": 9.48474858290284e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 1754, + "text_loss": 0.58203125 + }, + { + "epoch": 0.15, + "learning_rate": 9.484171219089555e-06, + "loss": 0.5945, + "regression_loss": 0.0, + "step": 1755, + "text_loss": 0.482421875 + }, + { + "epoch": 0.15, + "learning_rate": 9.483593549568635e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 1756, + "text_loss": 0.43359375 + }, + { + "epoch": 0.15, + "learning_rate": 9.483015574379465e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 1757, + "text_loss": 0.50390625 + }, + { + "epoch": 0.15, + "learning_rate": 9.482437293561449e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 1758, + "text_loss": 0.490234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.481858707154006e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 1759, + "text_loss": 0.310546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.481279815196587e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 1760, + "text_loss": 0.421875 + }, + { + "epoch": 0.15, + "learning_rate": 9.480700617728654e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 1761, + "text_loss": 0.6640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.480121114789695e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 1762, + "text_loss": 0.41796875 + }, + { + "epoch": 0.15, + "learning_rate": 9.479541306419217e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 1763, + "text_loss": 0.5703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.478961192656751e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 1764, + "text_loss": 0.5703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.478380773541844e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 1765, + "text_loss": 0.51953125 + }, + { + "epoch": 0.15, + "learning_rate": 9.477800049114065e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 1766, + "text_loss": 0.5546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.477219019413006e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 1767, + "text_loss": 0.890625 + }, + { + "epoch": 0.15, + "learning_rate": 9.476637684478278e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 1768, + "text_loss": 0.515625 + }, + { + "epoch": 0.15, + "learning_rate": 9.476056044349514e-06, + "loss": 0.6785, + "regression_loss": 0.0, + "step": 1769, + "text_loss": 0.76171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.475474099066368e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 1770, + "text_loss": 0.4375 + }, + { + "epoch": 0.15, + "learning_rate": 9.474891848668512e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 1771, + "text_loss": 0.6875 + }, + { + "epoch": 0.15, + "learning_rate": 9.474309293195643e-06, + "loss": 0.7109, + "regression_loss": 0.0, + "step": 1772, + "text_loss": 0.6171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.473726432687475e-06, + "loss": 0.5389, + "regression_loss": 0.0, + "step": 1773, + "text_loss": 0.5859375 + }, + { + "epoch": 0.15, + "learning_rate": 9.473143267183745e-06, + "loss": 0.7117, + "regression_loss": 0.0, + "step": 1774, + "text_loss": 1.0078125 + }, + { + "epoch": 0.15, + "learning_rate": 9.47255979672421e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 1775, + "text_loss": 0.31640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.471976021348648e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 1776, + "text_loss": 0.4375 + }, + { + "epoch": 0.15, + "learning_rate": 9.47139194109686e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 1777, + "text_loss": 0.62109375 + }, + { + "epoch": 0.15, + "learning_rate": 9.47080755600866e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 1778, + "text_loss": 0.361328125 + }, + { + "epoch": 0.15, + "learning_rate": 9.470222866123896e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 1779, + "text_loss": 0.640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.46963787148242e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 1780, + "text_loss": 0.5234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.469052572124123e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 1781, + "text_loss": 0.43359375 + }, + { + "epoch": 0.15, + "learning_rate": 9.468466968088903e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 1782, + "text_loss": 0.6640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.467881059416684e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 1783, + "text_loss": 0.78125 + }, + { + "epoch": 0.15, + "learning_rate": 9.467294846147408e-06, + "loss": 0.6543, + "regression_loss": 0.0, + "step": 1784, + "text_loss": 0.76171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.466708328321045e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 1785, + "text_loss": 0.35546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.466121505977577e-06, + "loss": 0.6799, + "regression_loss": 0.0, + "step": 1786, + "text_loss": 0.734375 + }, + { + "epoch": 0.15, + "learning_rate": 9.465534379157013e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 1787, + "text_loss": 0.4921875 + }, + { + "epoch": 0.15, + "learning_rate": 9.464946947899378e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 1788, + "text_loss": 0.53515625 + }, + { + "epoch": 0.15, + "learning_rate": 9.46435921224472e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 1789, + "text_loss": 0.41015625 + }, + { + "epoch": 0.15, + "learning_rate": 9.46377117223311e-06, + "loss": 0.635, + "regression_loss": 0.0, + "step": 1790, + "text_loss": 0.419921875 + }, + { + "epoch": 0.15, + "learning_rate": 9.463182827904636e-06, + "loss": 0.7297, + "regression_loss": 0.0, + "step": 1791, + "text_loss": 0.34765625 + }, + { + "epoch": 0.15, + "learning_rate": 9.462594179299408e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 1792, + "text_loss": 0.51953125 + }, + { + "epoch": 0.15, + "learning_rate": 9.462005226457557e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 1793, + "text_loss": 0.671875 + }, + { + "epoch": 0.15, + "learning_rate": 9.461415969419235e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 1794, + "text_loss": 0.6484375 + }, + { + "epoch": 0.15, + "learning_rate": 9.460826408224616e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 1795, + "text_loss": 0.5625 + }, + { + "epoch": 0.15, + "learning_rate": 9.460236542913892e-06, + "loss": 0.6597, + "regression_loss": 0.0, + "step": 1796, + "text_loss": 1.0625 + }, + { + "epoch": 0.15, + "learning_rate": 9.459646373527275e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 1797, + "text_loss": 0.5234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.459055900105005e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 1798, + "text_loss": 0.45703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.458465122687332e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 1799, + "text_loss": 0.455078125 + }, + { + "epoch": 0.15, + "learning_rate": 9.457874041314535e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 1800, + "text_loss": 0.6953125 + }, + { + "epoch": 0.15, + "learning_rate": 9.45728265602691e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 1801, + "text_loss": 0.8203125 + }, + { + "epoch": 0.15, + "learning_rate": 9.456690966864775e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 1802, + "text_loss": 0.62890625 + }, + { + "epoch": 0.15, + "learning_rate": 9.456098973868467e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 1803, + "text_loss": 0.65234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.455506677078347e-06, + "loss": 0.6826, + "regression_loss": 0.0, + "step": 1804, + "text_loss": 1.28125 + }, + { + "epoch": 0.15, + "learning_rate": 9.454914076534793e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 1805, + "text_loss": 0.4609375 + }, + { + "epoch": 0.15, + "learning_rate": 9.454321172278207e-06, + "loss": 0.6492, + "regression_loss": 0.0, + "step": 1806, + "text_loss": 0.494140625 + }, + { + "epoch": 0.15, + "learning_rate": 9.45372796434901e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 1807, + "text_loss": 0.322265625 + }, + { + "epoch": 0.15, + "learning_rate": 9.453134452787643e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 1808, + "text_loss": 0.310546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.452540637634567e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 1809, + "text_loss": 0.47265625 + }, + { + "epoch": 0.15, + "learning_rate": 9.451946518930269e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 1810, + "text_loss": 0.52734375 + }, + { + "epoch": 0.15, + "learning_rate": 9.451352096715251e-06, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 1811, + "text_loss": 0.4140625 + }, + { + "epoch": 0.15, + "learning_rate": 9.450757371030036e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 1812, + "text_loss": 0.48828125 + }, + { + "epoch": 0.15, + "learning_rate": 9.450162341915172e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 1813, + "text_loss": 0.67578125 + }, + { + "epoch": 0.15, + "learning_rate": 9.449567009411225e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 1814, + "text_loss": 0.59765625 + }, + { + "epoch": 0.15, + "learning_rate": 9.44897137355878e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 1815, + "text_loss": 0.326171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.448375434398444e-06, + "loss": 0.7341, + "regression_loss": 0.0, + "step": 1816, + "text_loss": 1.1640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.447779191970848e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 1817, + "text_loss": 0.671875 + }, + { + "epoch": 0.15, + "learning_rate": 9.447182646316636e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 1818, + "text_loss": 0.5703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.446585797476483e-06, + "loss": 0.5236, + "regression_loss": 0.0, + "step": 1819, + "text_loss": 0.59765625 + }, + { + "epoch": 0.15, + "learning_rate": 9.445988645491074e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 1820, + "text_loss": 0.8046875 + }, + { + "epoch": 0.15, + "learning_rate": 9.445391190401122e-06, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 1821, + "text_loss": 0.51171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.444793432247361e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 1822, + "text_loss": 0.3828125 + }, + { + "epoch": 0.15, + "learning_rate": 9.44419537107054e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 1823, + "text_loss": 0.4921875 + }, + { + "epoch": 0.15, + "learning_rate": 9.443597006911432e-06, + "loss": 0.6262, + "regression_loss": 0.0, + "step": 1824, + "text_loss": 0.63671875 + }, + { + "epoch": 0.15, + "learning_rate": 9.44299833981083e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 1825, + "text_loss": 0.47265625 + }, + { + "epoch": 0.15, + "learning_rate": 9.442399369809547e-06, + "loss": 0.6609, + "regression_loss": 0.0, + "step": 1826, + "text_loss": 0.609375 + }, + { + "epoch": 0.15, + "learning_rate": 9.441800096948421e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 1827, + "text_loss": 0.796875 + }, + { + "epoch": 0.15, + "learning_rate": 9.441200521268306e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 1828, + "text_loss": 0.578125 + }, + { + "epoch": 0.15, + "learning_rate": 9.440600642810077e-06, + "loss": 0.6331, + "regression_loss": 0.0, + "step": 1829, + "text_loss": 0.76171875 + }, + { + "epoch": 0.15, + "learning_rate": 9.440000461614631e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 1830, + "text_loss": 0.484375 + }, + { + "epoch": 0.15, + "learning_rate": 9.439399977722888e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 1831, + "text_loss": 0.5234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.43879919117578e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 1832, + "text_loss": 0.65625 + }, + { + "epoch": 0.15, + "learning_rate": 9.438198102014271e-06, + "loss": 0.6365, + "regression_loss": 0.0, + "step": 1833, + "text_loss": 0.6875 + }, + { + "epoch": 0.15, + "learning_rate": 9.437596710279337e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 1834, + "text_loss": 0.435546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.43699501601198e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 1835, + "text_loss": 0.3203125 + }, + { + "epoch": 0.15, + "learning_rate": 9.436393019253219e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 1836, + "text_loss": 0.65234375 + }, + { + "epoch": 0.15, + "learning_rate": 9.435790720044094e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 1837, + "text_loss": 0.68359375 + }, + { + "epoch": 0.15, + "learning_rate": 9.435188118425668e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 1838, + "text_loss": 0.5859375 + }, + { + "epoch": 0.15, + "learning_rate": 9.434585214439023e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 1839, + "text_loss": 0.6015625 + }, + { + "epoch": 0.15, + "learning_rate": 9.433982008125264e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 1840, + "text_loss": 0.25 + }, + { + "epoch": 0.15, + "learning_rate": 9.433378499525512e-06, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 1841, + "text_loss": 0.6640625 + }, + { + "epoch": 0.15, + "learning_rate": 9.43277468868091e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 1842, + "text_loss": 0.70703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.432170575632626e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 1843, + "text_loss": 0.5703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.431566160421843e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 1844, + "text_loss": 0.55859375 + }, + { + "epoch": 0.15, + "learning_rate": 9.430961443089769e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 1845, + "text_loss": 0.5546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.430356423677627e-06, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 1846, + "text_loss": 0.48046875 + }, + { + "epoch": 0.15, + "learning_rate": 9.429751102226667e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 1847, + "text_loss": 0.5703125 + }, + { + "epoch": 0.15, + "learning_rate": 9.429145478778157e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 1848, + "text_loss": 0.63671875 + }, + { + "epoch": 0.15, + "learning_rate": 9.428539553373383e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 1849, + "text_loss": 0.53125 + }, + { + "epoch": 0.15, + "learning_rate": 9.427933326053654e-06, + "loss": 0.6689, + "regression_loss": 0.0, + "step": 1850, + "text_loss": 0.890625 + }, + { + "epoch": 0.15, + "learning_rate": 9.427326796860302e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 1851, + "text_loss": 0.5390625 + }, + { + "epoch": 0.15, + "learning_rate": 9.426719965834676e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 1852, + "text_loss": 0.392578125 + }, + { + "epoch": 0.15, + "learning_rate": 9.426112833018144e-06, + "loss": 0.688, + "regression_loss": 0.0, + "step": 1853, + "text_loss": 0.9375 + }, + { + "epoch": 0.15, + "learning_rate": 9.425505398452101e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 1854, + "text_loss": 0.8203125 + }, + { + "epoch": 0.15, + "learning_rate": 9.424897662177956e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 1855, + "text_loss": 0.59375 + }, + { + "epoch": 0.15, + "learning_rate": 9.424289624237143e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 1856, + "text_loss": 0.462890625 + }, + { + "epoch": 0.15, + "learning_rate": 9.423681284671116e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 1857, + "text_loss": 0.59375 + }, + { + "epoch": 0.15, + "learning_rate": 9.423072643521343e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 1858, + "text_loss": 0.83203125 + }, + { + "epoch": 0.15, + "learning_rate": 9.422463700829325e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 1859, + "text_loss": 0.546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.421854456636571e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 1860, + "text_loss": 0.310546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.42124491098462e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 1861, + "text_loss": 0.546875 + }, + { + "epoch": 0.15, + "learning_rate": 9.420635063915024e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 1862, + "text_loss": 0.46484375 + }, + { + "epoch": 0.15, + "learning_rate": 9.420024915469364e-06, + "loss": 0.6541, + "regression_loss": 0.0, + "step": 1863, + "text_loss": 0.4296875 + }, + { + "epoch": 0.15, + "learning_rate": 9.419414465689231e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 1864, + "text_loss": 0.357421875 + }, + { + "epoch": 0.16, + "learning_rate": 9.418803714616248e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 1865, + "text_loss": 0.46484375 + }, + { + "epoch": 0.16, + "learning_rate": 9.41819266229205e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 1866, + "text_loss": 0.462890625 + }, + { + "epoch": 0.16, + "learning_rate": 9.417581308758294e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 1867, + "text_loss": 0.515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.416969654056663e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 1868, + "text_loss": 0.408203125 + }, + { + "epoch": 0.16, + "learning_rate": 9.416357698228851e-06, + "loss": 0.5619, + "regression_loss": 0.0, + "step": 1869, + "text_loss": 0.84765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.415745441316581e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 1870, + "text_loss": 0.59375 + }, + { + "epoch": 0.16, + "learning_rate": 9.415132883361596e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 1871, + "text_loss": 0.43359375 + }, + { + "epoch": 0.16, + "learning_rate": 9.414520024405653e-06, + "loss": 0.6162, + "regression_loss": 0.0, + "step": 1872, + "text_loss": 0.53515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.413906864490536e-06, + "loss": 0.6428, + "regression_loss": 0.0, + "step": 1873, + "text_loss": 0.53515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.413293403658046e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 1874, + "text_loss": 0.271484375 + }, + { + "epoch": 0.16, + "learning_rate": 9.412679641950006e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 1875, + "text_loss": 0.447265625 + }, + { + "epoch": 0.16, + "learning_rate": 9.412065579408258e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 1876, + "text_loss": 0.3515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.411451216074668e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 1877, + "text_loss": 0.48828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.410836551991116e-06, + "loss": 0.5363, + "regression_loss": 0.0, + "step": 1878, + "text_loss": 0.486328125 + }, + { + "epoch": 0.16, + "learning_rate": 9.41022158719951e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 1879, + "text_loss": 0.5 + }, + { + "epoch": 0.16, + "learning_rate": 9.409606321741776e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 1880, + "text_loss": 0.6953125 + }, + { + "epoch": 0.16, + "learning_rate": 9.408990755659858e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 1881, + "text_loss": 0.5703125 + }, + { + "epoch": 0.16, + "learning_rate": 9.40837488899572e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 1882, + "text_loss": 0.515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.407758721791352e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 1883, + "text_loss": 0.478515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.40714225408876e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 1884, + "text_loss": 0.43359375 + }, + { + "epoch": 0.16, + "learning_rate": 9.40652548592997e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 1885, + "text_loss": 0.5859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.405908417357032e-06, + "loss": 0.6343, + "regression_loss": 0.0, + "step": 1886, + "text_loss": 0.59765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.405291048412016e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 1887, + "text_loss": 0.66796875 + }, + { + "epoch": 0.16, + "learning_rate": 9.404673379137007e-06, + "loss": 0.6758, + "regression_loss": 0.0, + "step": 1888, + "text_loss": 0.921875 + }, + { + "epoch": 0.16, + "learning_rate": 9.404055409574117e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 1889, + "text_loss": 0.51953125 + }, + { + "epoch": 0.16, + "learning_rate": 9.403437139765474e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 1890, + "text_loss": 0.421875 + }, + { + "epoch": 0.16, + "learning_rate": 9.402818569753232e-06, + "loss": 0.4058, + "regression_loss": 0.0, + "step": 1891, + "text_loss": 0.515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.40219969957956e-06, + "loss": 0.6055, + "regression_loss": 0.0, + "step": 1892, + "text_loss": 0.99609375 + }, + { + "epoch": 0.16, + "learning_rate": 9.401580529286647e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 1893, + "text_loss": 0.453125 + }, + { + "epoch": 0.16, + "learning_rate": 9.400961058916708e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 1894, + "text_loss": 0.55859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.400341288511976e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 1895, + "text_loss": 0.76171875 + }, + { + "epoch": 0.16, + "learning_rate": 9.3997212181147e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 1896, + "text_loss": 0.5859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.399100847767158e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 1897, + "text_loss": 0.69140625 + }, + { + "epoch": 0.16, + "learning_rate": 9.398480177511638e-06, + "loss": 0.6194, + "regression_loss": 0.0, + "step": 1898, + "text_loss": 0.54296875 + }, + { + "epoch": 0.16, + "learning_rate": 9.397859207390459e-06, + "loss": 0.5385, + "regression_loss": 0.0, + "step": 1899, + "text_loss": 0.859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.39723793744595e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 1900, + "text_loss": 0.87890625 + }, + { + "epoch": 0.16, + "learning_rate": 9.396616367720475e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 1901, + "text_loss": 0.734375 + }, + { + "epoch": 0.16, + "learning_rate": 9.3959944982564e-06, + "loss": 0.6428, + "regression_loss": 0.0, + "step": 1902, + "text_loss": 0.73828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.395372329096127e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 1903, + "text_loss": 0.68359375 + }, + { + "epoch": 0.16, + "learning_rate": 9.394749860282068e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 1904, + "text_loss": 0.69140625 + }, + { + "epoch": 0.16, + "learning_rate": 9.394127091856663e-06, + "loss": 0.6648, + "regression_loss": 0.0, + "step": 1905, + "text_loss": 0.60546875 + }, + { + "epoch": 0.16, + "learning_rate": 9.393504023862368e-06, + "loss": 0.6301, + "regression_loss": 0.0, + "step": 1906, + "text_loss": 0.765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.39288065634166e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 1907, + "text_loss": 0.5234375 + }, + { + "epoch": 0.16, + "learning_rate": 9.392256989337038e-06, + "loss": 0.6426, + "regression_loss": 0.0, + "step": 1908, + "text_loss": 0.7578125 + }, + { + "epoch": 0.16, + "learning_rate": 9.391633022891018e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 1909, + "text_loss": 0.7265625 + }, + { + "epoch": 0.16, + "learning_rate": 9.391008757046143e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 1910, + "text_loss": 0.57421875 + }, + { + "epoch": 0.16, + "learning_rate": 9.390384191844969e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 1911, + "text_loss": 0.609375 + }, + { + "epoch": 0.16, + "learning_rate": 9.389759327330078e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 1912, + "text_loss": 0.42578125 + }, + { + "epoch": 0.16, + "learning_rate": 9.389134163544065e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 1913, + "text_loss": 0.453125 + }, + { + "epoch": 0.16, + "learning_rate": 9.388508700529556e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 1914, + "text_loss": 0.412109375 + }, + { + "epoch": 0.16, + "learning_rate": 9.38788293832919e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 1915, + "text_loss": 0.326171875 + }, + { + "epoch": 0.16, + "learning_rate": 9.387256876985628e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 1916, + "text_loss": 0.265625 + }, + { + "epoch": 0.16, + "learning_rate": 9.386630516541552e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 1917, + "text_loss": 0.5 + }, + { + "epoch": 0.16, + "learning_rate": 9.386003857039664e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 1918, + "text_loss": 0.494140625 + }, + { + "epoch": 0.16, + "learning_rate": 9.385376898522685e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 1919, + "text_loss": 0.455078125 + }, + { + "epoch": 0.16, + "learning_rate": 9.384749641033358e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 1920, + "text_loss": 0.86328125 + }, + { + "epoch": 0.16, + "learning_rate": 9.384122084614448e-06, + "loss": 0.4419, + "regression_loss": 0.0, + "step": 1921, + "text_loss": 0.404296875 + }, + { + "epoch": 0.16, + "learning_rate": 9.383494229308739e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 1922, + "text_loss": 0.349609375 + }, + { + "epoch": 0.16, + "learning_rate": 9.382866075159032e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 1923, + "text_loss": 0.51171875 + }, + { + "epoch": 0.16, + "learning_rate": 9.382237622208154e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 1924, + "text_loss": 0.6171875 + }, + { + "epoch": 0.16, + "learning_rate": 9.381608870498948e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 1925, + "text_loss": 0.57421875 + }, + { + "epoch": 0.16, + "learning_rate": 9.380979820074279e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 1926, + "text_loss": 0.51953125 + }, + { + "epoch": 0.16, + "learning_rate": 9.380350470977033e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 1927, + "text_loss": 0.62890625 + }, + { + "epoch": 0.16, + "learning_rate": 9.379720823250116e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 1928, + "text_loss": 0.34375 + }, + { + "epoch": 0.16, + "learning_rate": 9.379090876936455e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 1929, + "text_loss": 0.419921875 + }, + { + "epoch": 0.16, + "learning_rate": 9.378460632078995e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 1930, + "text_loss": 0.46875 + }, + { + "epoch": 0.16, + "learning_rate": 9.377830088720703e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 1931, + "text_loss": 0.337890625 + }, + { + "epoch": 0.16, + "learning_rate": 9.377199246904567e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 1932, + "text_loss": 0.421875 + }, + { + "epoch": 0.16, + "learning_rate": 9.376568106673593e-06, + "loss": 0.647, + "regression_loss": 0.0, + "step": 1933, + "text_loss": 0.95703125 + }, + { + "epoch": 0.16, + "learning_rate": 9.37593666807081e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 1934, + "text_loss": 0.671875 + }, + { + "epoch": 0.16, + "learning_rate": 9.375304931139266e-06, + "loss": 0.7258, + "regression_loss": 0.0, + "step": 1935, + "text_loss": 0.83984375 + }, + { + "epoch": 0.16, + "learning_rate": 9.37467289592203e-06, + "loss": 0.6538, + "regression_loss": 0.0, + "step": 1936, + "text_loss": 0.4375 + }, + { + "epoch": 0.16, + "learning_rate": 9.37404056246219e-06, + "loss": 0.6038, + "regression_loss": 0.0, + "step": 1937, + "text_loss": 0.458984375 + }, + { + "epoch": 0.16, + "learning_rate": 9.373407930802856e-06, + "loss": 0.4653, + "regression_loss": 0.0, + "step": 1938, + "text_loss": 0.7734375 + }, + { + "epoch": 0.16, + "learning_rate": 9.372775000987157e-06, + "loss": 0.7041, + "regression_loss": 0.0, + "step": 1939, + "text_loss": 0.640625 + }, + { + "epoch": 0.16, + "learning_rate": 9.372141773058243e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 1940, + "text_loss": 0.412109375 + }, + { + "epoch": 0.16, + "learning_rate": 9.371508247059285e-06, + "loss": 0.5999, + "regression_loss": 0.0, + "step": 1941, + "text_loss": 0.3828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.370874423033474e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 1942, + "text_loss": 0.5078125 + }, + { + "epoch": 0.16, + "learning_rate": 9.370240301024019e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 1943, + "text_loss": 0.59375 + }, + { + "epoch": 0.16, + "learning_rate": 9.36960588107415e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 1944, + "text_loss": 0.384765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.368971163227124e-06, + "loss": 0.7083, + "regression_loss": 0.0, + "step": 1945, + "text_loss": 0.73828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.368336147526205e-06, + "loss": 0.6587, + "regression_loss": 0.0, + "step": 1946, + "text_loss": 0.6640625 + }, + { + "epoch": 0.16, + "learning_rate": 9.367700834014693e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 1947, + "text_loss": 0.67578125 + }, + { + "epoch": 0.16, + "learning_rate": 9.367065222735894e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 1948, + "text_loss": 0.515625 + }, + { + "epoch": 0.16, + "learning_rate": 9.366429313733145e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 1949, + "text_loss": 0.703125 + }, + { + "epoch": 0.16, + "learning_rate": 9.365793107049796e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 1950, + "text_loss": 0.423828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.365156602729222e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 1951, + "text_loss": 0.8046875 + }, + { + "epoch": 0.16, + "learning_rate": 9.364519800814818e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 1952, + "text_loss": 0.4765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.363882701349995e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 1953, + "text_loss": 0.341796875 + }, + { + "epoch": 0.16, + "learning_rate": 9.363245304378188e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 1954, + "text_loss": 0.380859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.362607609942852e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 1955, + "text_loss": 0.298828125 + }, + { + "epoch": 0.16, + "learning_rate": 9.361969618087461e-06, + "loss": 0.6787, + "regression_loss": 0.0, + "step": 1956, + "text_loss": 0.63671875 + }, + { + "epoch": 0.16, + "learning_rate": 9.361331328855512e-06, + "loss": 0.6936, + "regression_loss": 0.0, + "step": 1957, + "text_loss": 0.734375 + }, + { + "epoch": 0.16, + "learning_rate": 9.360692742290516e-06, + "loss": 0.6338, + "regression_loss": 0.0, + "step": 1958, + "text_loss": 0.55078125 + }, + { + "epoch": 0.16, + "learning_rate": 9.360053858436012e-06, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 1959, + "text_loss": 0.67578125 + }, + { + "epoch": 0.16, + "learning_rate": 9.359414677335557e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 1960, + "text_loss": 0.28125 + }, + { + "epoch": 0.16, + "learning_rate": 9.358775199032723e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 1961, + "text_loss": 0.70703125 + }, + { + "epoch": 0.16, + "learning_rate": 9.358135423571109e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 1962, + "text_loss": 0.361328125 + }, + { + "epoch": 0.16, + "learning_rate": 9.35749535099433e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 1963, + "text_loss": 0.263671875 + }, + { + "epoch": 0.16, + "learning_rate": 9.356854981346024e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 1964, + "text_loss": 0.4453125 + }, + { + "epoch": 0.16, + "learning_rate": 9.356214314669848e-06, + "loss": 0.6802, + "regression_loss": 0.0, + "step": 1965, + "text_loss": 0.46484375 + }, + { + "epoch": 0.16, + "learning_rate": 9.355573351009481e-06, + "loss": 0.6223, + "regression_loss": 0.0, + "step": 1966, + "text_loss": 0.80078125 + }, + { + "epoch": 0.16, + "learning_rate": 9.354932090408617e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 1967, + "text_loss": 0.310546875 + }, + { + "epoch": 0.16, + "learning_rate": 9.354290532910977e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 1968, + "text_loss": 0.59375 + }, + { + "epoch": 0.16, + "learning_rate": 9.353648678560296e-06, + "loss": 0.6194, + "regression_loss": 0.0, + "step": 1969, + "text_loss": 0.470703125 + }, + { + "epoch": 0.16, + "learning_rate": 9.353006527400336e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 1970, + "text_loss": 0.66796875 + }, + { + "epoch": 0.16, + "learning_rate": 9.352364079474872e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 1971, + "text_loss": 0.8359375 + }, + { + "epoch": 0.16, + "learning_rate": 9.351721334827706e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 1972, + "text_loss": 0.74609375 + }, + { + "epoch": 0.16, + "learning_rate": 9.351078293502655e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 1973, + "text_loss": 0.71484375 + }, + { + "epoch": 0.16, + "learning_rate": 9.350434955543557e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 1974, + "text_loss": 0.390625 + }, + { + "epoch": 0.16, + "learning_rate": 9.349791320994274e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 1975, + "text_loss": 0.31640625 + }, + { + "epoch": 0.16, + "learning_rate": 9.349147389898687e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 1976, + "text_loss": 0.7109375 + }, + { + "epoch": 0.16, + "learning_rate": 9.348503162300691e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 1977, + "text_loss": 0.63671875 + }, + { + "epoch": 0.16, + "learning_rate": 9.347858638244209e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 1978, + "text_loss": 0.25 + }, + { + "epoch": 0.16, + "learning_rate": 9.347213817773182e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 1979, + "text_loss": 0.2890625 + }, + { + "epoch": 0.16, + "learning_rate": 9.34656870093157e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 1980, + "text_loss": 0.62109375 + }, + { + "epoch": 0.16, + "learning_rate": 9.345923287763353e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 1981, + "text_loss": 0.59375 + }, + { + "epoch": 0.16, + "learning_rate": 9.345277578312531e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 1982, + "text_loss": 0.33984375 + }, + { + "epoch": 0.16, + "learning_rate": 9.344631572623129e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 1983, + "text_loss": 0.5859375 + }, + { + "epoch": 0.16, + "learning_rate": 9.343985270739184e-06, + "loss": 0.4923, + "regression_loss": 0.0, + "step": 1984, + "text_loss": 0.59765625 + }, + { + "epoch": 0.16, + "learning_rate": 9.343338672704757e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 1985, + "text_loss": 0.98046875 + }, + { + "epoch": 0.17, + "learning_rate": 9.342691778563937e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 1986, + "text_loss": 0.5390625 + }, + { + "epoch": 0.17, + "learning_rate": 9.342044588360818e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 1987, + "text_loss": 0.84375 + }, + { + "epoch": 0.17, + "learning_rate": 9.341397102139524e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 1988, + "text_loss": 0.6640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.340749319944201e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 1989, + "text_loss": 0.57421875 + }, + { + "epoch": 0.17, + "learning_rate": 9.340101241819007e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 1990, + "text_loss": 0.5703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.339452867808127e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 1991, + "text_loss": 0.44921875 + }, + { + "epoch": 0.17, + "learning_rate": 9.338804197955762e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 1992, + "text_loss": 0.56640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.338155232306138e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 1993, + "text_loss": 0.69140625 + }, + { + "epoch": 0.17, + "learning_rate": 9.337505970903495e-06, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 1994, + "text_loss": 0.6328125 + }, + { + "epoch": 0.17, + "learning_rate": 9.336856413792098e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 1995, + "text_loss": 0.5390625 + }, + { + "epoch": 0.17, + "learning_rate": 9.33620656101623e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 1996, + "text_loss": 0.33203125 + }, + { + "epoch": 0.17, + "learning_rate": 9.335556412620195e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 1997, + "text_loss": 0.33984375 + }, + { + "epoch": 0.17, + "learning_rate": 9.334905968648315e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 1998, + "text_loss": 0.59375 + }, + { + "epoch": 0.17, + "learning_rate": 9.334255229144935e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 1999, + "text_loss": 0.57421875 + }, + { + "epoch": 0.17, + "learning_rate": 9.333604194154421e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 2000, + "text_loss": 0.431640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.332952863721154e-06, + "loss": 0.6113, + "regression_loss": 0.0, + "step": 2001, + "text_loss": 0.5234375 + }, + { + "epoch": 0.17, + "learning_rate": 9.33230123788954e-06, + "loss": 0.6067, + "regression_loss": 0.0, + "step": 2002, + "text_loss": 0.7734375 + }, + { + "epoch": 0.17, + "learning_rate": 9.331649316704004e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 2003, + "text_loss": 0.86328125 + }, + { + "epoch": 0.17, + "learning_rate": 9.330997100208991e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 2004, + "text_loss": 0.46484375 + }, + { + "epoch": 0.17, + "learning_rate": 9.330344588448964e-06, + "loss": 0.6414, + "regression_loss": 0.0, + "step": 2005, + "text_loss": 0.90625 + }, + { + "epoch": 0.17, + "learning_rate": 9.329691781468406e-06, + "loss": 0.4432, + "regression_loss": 0.0, + "step": 2006, + "text_loss": 0.2451171875 + }, + { + "epoch": 0.17, + "learning_rate": 9.329038679311827e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 2007, + "text_loss": 0.5546875 + }, + { + "epoch": 0.17, + "learning_rate": 9.32838528202375e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 2008, + "text_loss": 0.6796875 + }, + { + "epoch": 0.17, + "learning_rate": 9.327731589648719e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 2009, + "text_loss": 0.734375 + }, + { + "epoch": 0.17, + "learning_rate": 9.327077602231301e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 2010, + "text_loss": 0.447265625 + }, + { + "epoch": 0.17, + "learning_rate": 9.326423319816079e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 2011, + "text_loss": 0.52734375 + }, + { + "epoch": 0.17, + "learning_rate": 9.325768742447662e-06, + "loss": 0.6667, + "regression_loss": 0.0, + "step": 2012, + "text_loss": 0.51171875 + }, + { + "epoch": 0.17, + "learning_rate": 9.325113870170674e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 2013, + "text_loss": 0.40234375 + }, + { + "epoch": 0.17, + "learning_rate": 9.32445870302976e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 2014, + "text_loss": 0.796875 + }, + { + "epoch": 0.17, + "learning_rate": 9.323803241069589e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 2015, + "text_loss": 0.490234375 + }, + { + "epoch": 0.17, + "learning_rate": 9.323147484334843e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 2016, + "text_loss": 0.48046875 + }, + { + "epoch": 0.17, + "learning_rate": 9.32249143287023e-06, + "loss": 0.5999, + "regression_loss": 0.0, + "step": 2017, + "text_loss": 0.38671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.321835086720477e-06, + "loss": 0.6577, + "regression_loss": 0.0, + "step": 2018, + "text_loss": 0.52734375 + }, + { + "epoch": 0.17, + "learning_rate": 9.321178445930327e-06, + "loss": 0.6064, + "regression_loss": 0.0, + "step": 2019, + "text_loss": 0.64453125 + }, + { + "epoch": 0.17, + "learning_rate": 9.320521510544553e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 2020, + "text_loss": 0.5703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.319864280607935e-06, + "loss": 0.5928, + "regression_loss": 0.0, + "step": 2021, + "text_loss": 0.6640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.319206756165282e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 2022, + "text_loss": 0.431640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.31854893726142e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 2023, + "text_loss": 0.470703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.317890823941196e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 2024, + "text_loss": 0.67578125 + }, + { + "epoch": 0.17, + "learning_rate": 9.317232416249477e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 2025, + "text_loss": 0.255859375 + }, + { + "epoch": 0.17, + "learning_rate": 9.316573714231152e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 2026, + "text_loss": 0.447265625 + }, + { + "epoch": 0.17, + "learning_rate": 9.315914717931123e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 2027, + "text_loss": 0.81640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.315255427394322e-06, + "loss": 0.6333, + "regression_loss": 0.0, + "step": 2028, + "text_loss": 0.84765625 + }, + { + "epoch": 0.17, + "learning_rate": 9.31459584266569e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 2029, + "text_loss": 0.88671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.313935963790201e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 2030, + "text_loss": 0.65625 + }, + { + "epoch": 0.17, + "learning_rate": 9.313275790812837e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 2031, + "text_loss": 0.515625 + }, + { + "epoch": 0.17, + "learning_rate": 9.312615323778609e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 2032, + "text_loss": 0.498046875 + }, + { + "epoch": 0.17, + "learning_rate": 9.31195456273254e-06, + "loss": 0.5999, + "regression_loss": 0.0, + "step": 2033, + "text_loss": 0.380859375 + }, + { + "epoch": 0.17, + "learning_rate": 9.311293507719681e-06, + "loss": 0.5321, + "regression_loss": 0.0, + "step": 2034, + "text_loss": 0.33984375 + }, + { + "epoch": 0.17, + "learning_rate": 9.310632158785097e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 2035, + "text_loss": 0.37890625 + }, + { + "epoch": 0.17, + "learning_rate": 9.309970515973876e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 2036, + "text_loss": 0.4765625 + }, + { + "epoch": 0.17, + "learning_rate": 9.309308579331126e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 2037, + "text_loss": 0.765625 + }, + { + "epoch": 0.17, + "learning_rate": 9.308646348901975e-06, + "loss": 0.7478, + "regression_loss": 0.0, + "step": 2038, + "text_loss": 1.8671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.307983824731567e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 2039, + "text_loss": 0.431640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.307321006865076e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 2040, + "text_loss": 0.63671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.306657895347682e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 2041, + "text_loss": 0.51171875 + }, + { + "epoch": 0.17, + "learning_rate": 9.305994490224596e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 2042, + "text_loss": 0.54296875 + }, + { + "epoch": 0.17, + "learning_rate": 9.305330791541047e-06, + "loss": 0.4377, + "regression_loss": 0.0, + "step": 2043, + "text_loss": 0.291015625 + }, + { + "epoch": 0.17, + "learning_rate": 9.304666799342281e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 2044, + "text_loss": 0.94921875 + }, + { + "epoch": 0.17, + "learning_rate": 9.304002513673566e-06, + "loss": 0.5565, + "regression_loss": 0.0, + "step": 2045, + "text_loss": 0.38671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.303337934580188e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 2046, + "text_loss": 0.4140625 + }, + { + "epoch": 0.17, + "learning_rate": 9.302673062107457e-06, + "loss": 0.4149, + "regression_loss": 0.0, + "step": 2047, + "text_loss": 0.55078125 + }, + { + "epoch": 0.17, + "learning_rate": 9.302007896300697e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 2048, + "text_loss": 0.5390625 + }, + { + "epoch": 0.17, + "learning_rate": 9.301342437205261e-06, + "loss": 0.6104, + "regression_loss": 0.0, + "step": 2049, + "text_loss": 0.7109375 + }, + { + "epoch": 0.17, + "learning_rate": 9.30067668486651e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 2050, + "text_loss": 0.396484375 + }, + { + "epoch": 0.17, + "learning_rate": 9.300010639329838e-06, + "loss": 0.3889, + "regression_loss": 0.0, + "step": 2051, + "text_loss": 0.30859375 + }, + { + "epoch": 0.17, + "learning_rate": 9.299344300640648e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 2052, + "text_loss": 0.486328125 + }, + { + "epoch": 0.17, + "learning_rate": 9.29867766884437e-06, + "loss": 0.7017, + "regression_loss": 0.0, + "step": 2053, + "text_loss": 0.66015625 + }, + { + "epoch": 0.17, + "learning_rate": 9.29801074398645e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 2054, + "text_loss": 0.60546875 + }, + { + "epoch": 0.17, + "learning_rate": 9.297343526112356e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 2055, + "text_loss": 0.5078125 + }, + { + "epoch": 0.17, + "learning_rate": 9.296676015267575e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 2056, + "text_loss": 0.55859375 + }, + { + "epoch": 0.17, + "learning_rate": 9.296008211497616e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 2057, + "text_loss": 0.68359375 + }, + { + "epoch": 0.17, + "learning_rate": 9.295340114848005e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 2058, + "text_loss": 0.341796875 + }, + { + "epoch": 0.17, + "learning_rate": 9.294671725364291e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 2059, + "text_loss": 0.87109375 + }, + { + "epoch": 0.17, + "learning_rate": 9.294003043092036e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 2060, + "text_loss": 0.51171875 + }, + { + "epoch": 0.17, + "learning_rate": 9.293334068076835e-06, + "loss": 0.4984, + "regression_loss": 0.0, + "step": 2061, + "text_loss": 0.5703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.292664800364293e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 2062, + "text_loss": 0.6328125 + }, + { + "epoch": 0.17, + "learning_rate": 9.291995240000033e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 2063, + "text_loss": 0.89453125 + }, + { + "epoch": 0.17, + "learning_rate": 9.291325387029706e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 2064, + "text_loss": 0.451171875 + }, + { + "epoch": 0.17, + "learning_rate": 9.290655241498977e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 2065, + "text_loss": 0.88671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.289984803453538e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 2066, + "text_loss": 0.703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.289314072939089e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 2067, + "text_loss": 0.7109375 + }, + { + "epoch": 0.17, + "learning_rate": 9.288643050001362e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 2068, + "text_loss": 0.8203125 + }, + { + "epoch": 0.17, + "learning_rate": 9.287971734686101e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 2069, + "text_loss": 0.63671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.287300127039074e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 2070, + "text_loss": 0.71484375 + }, + { + "epoch": 0.17, + "learning_rate": 9.286628227106067e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 2071, + "text_loss": 0.310546875 + }, + { + "epoch": 0.17, + "learning_rate": 9.285956034932887e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 2072, + "text_loss": 0.74609375 + }, + { + "epoch": 0.17, + "learning_rate": 9.285283550565362e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 2073, + "text_loss": 0.53125 + }, + { + "epoch": 0.17, + "learning_rate": 9.28461077404934e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 2074, + "text_loss": 0.63671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.283937705430679e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 2075, + "text_loss": 0.6328125 + }, + { + "epoch": 0.17, + "learning_rate": 9.283264344755276e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 2076, + "text_loss": 0.640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.28259069206903e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 2077, + "text_loss": 0.359375 + }, + { + "epoch": 0.17, + "learning_rate": 9.28191674741787e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 2078, + "text_loss": 0.69140625 + }, + { + "epoch": 0.17, + "learning_rate": 9.28124251084774e-06, + "loss": 0.5414, + "regression_loss": 0.0, + "step": 2079, + "text_loss": 0.671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.280567982404611e-06, + "loss": 0.5536, + "regression_loss": 0.0, + "step": 2080, + "text_loss": 0.9453125 + }, + { + "epoch": 0.17, + "learning_rate": 9.279893162134464e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 2081, + "text_loss": 0.419921875 + }, + { + "epoch": 0.17, + "learning_rate": 9.279218050083307e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 2082, + "text_loss": 0.48828125 + }, + { + "epoch": 0.17, + "learning_rate": 9.278542646297165e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 2083, + "text_loss": 0.51953125 + }, + { + "epoch": 0.17, + "learning_rate": 9.277866950822081e-06, + "loss": 0.6919, + "regression_loss": 0.0, + "step": 2084, + "text_loss": 0.671875 + }, + { + "epoch": 0.17, + "learning_rate": 9.277190963704123e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 2085, + "text_loss": 0.70703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.276514684989378e-06, + "loss": 0.6733, + "regression_loss": 0.0, + "step": 2086, + "text_loss": 0.53515625 + }, + { + "epoch": 0.17, + "learning_rate": 9.275838114723949e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 2087, + "text_loss": 0.353515625 + }, + { + "epoch": 0.17, + "learning_rate": 9.275161252953961e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 2088, + "text_loss": 0.322265625 + }, + { + "epoch": 0.17, + "learning_rate": 9.27448409972556e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 2089, + "text_loss": 0.4453125 + }, + { + "epoch": 0.17, + "learning_rate": 9.273806655084911e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 2090, + "text_loss": 0.81640625 + }, + { + "epoch": 0.17, + "learning_rate": 9.273128919078196e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 2091, + "text_loss": 0.62890625 + }, + { + "epoch": 0.17, + "learning_rate": 9.272450891751623e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 2092, + "text_loss": 0.46484375 + }, + { + "epoch": 0.17, + "learning_rate": 9.271772573151413e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 2093, + "text_loss": 0.412109375 + }, + { + "epoch": 0.17, + "learning_rate": 9.271093963323814e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 2094, + "text_loss": 0.5703125 + }, + { + "epoch": 0.17, + "learning_rate": 9.270415062315087e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 2095, + "text_loss": 0.298828125 + }, + { + "epoch": 0.17, + "learning_rate": 9.269735870171517e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 2096, + "text_loss": 0.65234375 + }, + { + "epoch": 0.17, + "learning_rate": 9.269056386939408e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 2097, + "text_loss": 0.765625 + }, + { + "epoch": 0.17, + "learning_rate": 9.268376612665084e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 2098, + "text_loss": 0.53125 + }, + { + "epoch": 0.17, + "learning_rate": 9.267696547394888e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 2099, + "text_loss": 0.78515625 + }, + { + "epoch": 0.17, + "learning_rate": 9.267016191175184e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 2100, + "text_loss": 0.55078125 + }, + { + "epoch": 0.17, + "learning_rate": 9.266335544052354e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 2101, + "text_loss": 0.353515625 + }, + { + "epoch": 0.17, + "learning_rate": 9.265654606072802e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 2102, + "text_loss": 0.28125 + }, + { + "epoch": 0.17, + "learning_rate": 9.264973377282949e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 2103, + "text_loss": 0.384765625 + }, + { + "epoch": 0.17, + "learning_rate": 9.26429185772924e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 2104, + "text_loss": 0.455078125 + }, + { + "epoch": 0.17, + "learning_rate": 9.263610047458138e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 2105, + "text_loss": 0.388671875 + }, + { + "epoch": 0.18, + "learning_rate": 9.262927946516123e-06, + "loss": 0.4363, + "regression_loss": 0.0, + "step": 2106, + "text_loss": 0.546875 + }, + { + "epoch": 0.18, + "learning_rate": 9.262245554949696e-06, + "loss": 0.6492, + "regression_loss": 0.0, + "step": 2107, + "text_loss": 0.67578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.261562872805382e-06, + "loss": 0.5834, + "regression_loss": 0.0, + "step": 2108, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.260879900129725e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 2109, + "text_loss": 0.58984375 + }, + { + "epoch": 0.18, + "learning_rate": 9.26019663696928e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 2110, + "text_loss": 0.55859375 + }, + { + "epoch": 0.18, + "learning_rate": 9.259513083370631e-06, + "loss": 0.4933, + "regression_loss": 0.0, + "step": 2111, + "text_loss": 0.58203125 + }, + { + "epoch": 0.18, + "learning_rate": 9.25882923938038e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 2112, + "text_loss": 0.46484375 + }, + { + "epoch": 0.18, + "learning_rate": 9.25814510504515e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 2113, + "text_loss": 0.458984375 + }, + { + "epoch": 0.18, + "learning_rate": 9.257460680411578e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 2114, + "text_loss": 0.66015625 + }, + { + "epoch": 0.18, + "learning_rate": 9.256775965526327e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 2115, + "text_loss": 0.373046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.256090960436075e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 2116, + "text_loss": 0.50390625 + }, + { + "epoch": 0.18, + "learning_rate": 9.255405665187525e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 2117, + "text_loss": 0.451171875 + }, + { + "epoch": 0.18, + "learning_rate": 9.254720079827393e-06, + "loss": 0.7158, + "regression_loss": 0.0, + "step": 2118, + "text_loss": 0.71875 + }, + { + "epoch": 0.18, + "learning_rate": 9.254034204402423e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 2119, + "text_loss": 0.6875 + }, + { + "epoch": 0.18, + "learning_rate": 9.253348038959372e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 2120, + "text_loss": 0.82421875 + }, + { + "epoch": 0.18, + "learning_rate": 9.252661583545018e-06, + "loss": 0.6387, + "regression_loss": 0.0, + "step": 2121, + "text_loss": 0.65234375 + }, + { + "epoch": 0.18, + "learning_rate": 9.251974838206162e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 2122, + "text_loss": 0.6015625 + }, + { + "epoch": 0.18, + "learning_rate": 9.251287802989623e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 2123, + "text_loss": 0.345703125 + }, + { + "epoch": 0.18, + "learning_rate": 9.25060047794224e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 2124, + "text_loss": 0.734375 + }, + { + "epoch": 0.18, + "learning_rate": 9.24991286311087e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 2125, + "text_loss": 0.67578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.249224958542389e-06, + "loss": 0.4326, + "regression_loss": 0.0, + "step": 2126, + "text_loss": 0.4609375 + }, + { + "epoch": 0.18, + "learning_rate": 9.248536764283698e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 2127, + "text_loss": 0.67578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.247848280381715e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 2128, + "text_loss": 0.30859375 + }, + { + "epoch": 0.18, + "learning_rate": 9.247159506883374e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 2129, + "text_loss": 0.359375 + }, + { + "epoch": 0.18, + "learning_rate": 9.246470443835635e-06, + "loss": 0.6095, + "regression_loss": 0.0, + "step": 2130, + "text_loss": 0.59765625 + }, + { + "epoch": 0.18, + "learning_rate": 9.245781091285473e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 2131, + "text_loss": 0.62109375 + }, + { + "epoch": 0.18, + "learning_rate": 9.245091449279883e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 2132, + "text_loss": 0.578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.244401517865886e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 2133, + "text_loss": 0.6640625 + }, + { + "epoch": 0.18, + "learning_rate": 9.243711297090516e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 2134, + "text_loss": 0.404296875 + }, + { + "epoch": 0.18, + "learning_rate": 9.243020787000826e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 2135, + "text_loss": 0.5390625 + }, + { + "epoch": 0.18, + "learning_rate": 9.242329987643895e-06, + "loss": 0.6428, + "regression_loss": 0.0, + "step": 2136, + "text_loss": 0.69921875 + }, + { + "epoch": 0.18, + "learning_rate": 9.241638899066813e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 2137, + "text_loss": 0.33203125 + }, + { + "epoch": 0.18, + "learning_rate": 9.2409475213167e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 2138, + "text_loss": 0.6484375 + }, + { + "epoch": 0.18, + "learning_rate": 9.240255854440691e-06, + "loss": 0.6448, + "regression_loss": 0.0, + "step": 2139, + "text_loss": 0.8828125 + }, + { + "epoch": 0.18, + "learning_rate": 9.239563898485937e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 2140, + "text_loss": 0.7890625 + }, + { + "epoch": 0.18, + "learning_rate": 9.238871653499613e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 2141, + "text_loss": 0.73046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.238179119528912e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 2142, + "text_loss": 0.388671875 + }, + { + "epoch": 0.18, + "learning_rate": 9.237486296621049e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 2143, + "text_loss": 0.8125 + }, + { + "epoch": 0.18, + "learning_rate": 9.236793184823257e-06, + "loss": 0.6038, + "regression_loss": 0.0, + "step": 2144, + "text_loss": 0.69140625 + }, + { + "epoch": 0.18, + "learning_rate": 9.236099784182786e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 2145, + "text_loss": 0.5 + }, + { + "epoch": 0.18, + "learning_rate": 9.235406094746913e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 2146, + "text_loss": 0.79296875 + }, + { + "epoch": 0.18, + "learning_rate": 9.234712116562926e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2147, + "text_loss": 0.53515625 + }, + { + "epoch": 0.18, + "learning_rate": 9.234017849678138e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 2148, + "text_loss": 0.57421875 + }, + { + "epoch": 0.18, + "learning_rate": 9.233323294139883e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 2149, + "text_loss": 0.57421875 + }, + { + "epoch": 0.18, + "learning_rate": 9.23262844999551e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 2150, + "text_loss": 0.54296875 + }, + { + "epoch": 0.18, + "learning_rate": 9.23193331729239e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 2151, + "text_loss": 0.734375 + }, + { + "epoch": 0.18, + "learning_rate": 9.231237896077912e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 2152, + "text_loss": 0.65234375 + }, + { + "epoch": 0.18, + "learning_rate": 9.230542186399489e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 2153, + "text_loss": 0.3125 + }, + { + "epoch": 0.18, + "learning_rate": 9.229846188304552e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 2154, + "text_loss": 0.66015625 + }, + { + "epoch": 0.18, + "learning_rate": 9.229149901840546e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 2155, + "text_loss": 0.42578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.228453327054943e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 2156, + "text_loss": 0.75390625 + }, + { + "epoch": 0.18, + "learning_rate": 9.227756463995232e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 2157, + "text_loss": 0.4921875 + }, + { + "epoch": 0.18, + "learning_rate": 9.22705931270892e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 2158, + "text_loss": 0.37890625 + }, + { + "epoch": 0.18, + "learning_rate": 9.226361873243538e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 2159, + "text_loss": 0.57421875 + }, + { + "epoch": 0.18, + "learning_rate": 9.225664145646633e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 2160, + "text_loss": 0.8515625 + }, + { + "epoch": 0.18, + "learning_rate": 9.224966129965768e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 2161, + "text_loss": 0.23046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.224267826248536e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 2162, + "text_loss": 0.58984375 + }, + { + "epoch": 0.18, + "learning_rate": 9.223569234542542e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 2163, + "text_loss": 0.36328125 + }, + { + "epoch": 0.18, + "learning_rate": 9.22287035489541e-06, + "loss": 0.6179, + "regression_loss": 0.0, + "step": 2164, + "text_loss": 0.765625 + }, + { + "epoch": 0.18, + "learning_rate": 9.222171187354788e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 2165, + "text_loss": 0.478515625 + }, + { + "epoch": 0.18, + "learning_rate": 9.221471731968341e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 2166, + "text_loss": 0.65234375 + }, + { + "epoch": 0.18, + "learning_rate": 9.220771988783754e-06, + "loss": 0.4445, + "regression_loss": 0.0, + "step": 2167, + "text_loss": 0.431640625 + }, + { + "epoch": 0.18, + "learning_rate": 9.220071957848735e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 2168, + "text_loss": 0.7421875 + }, + { + "epoch": 0.18, + "learning_rate": 9.219371639211004e-06, + "loss": 0.7107, + "regression_loss": 0.0, + "step": 2169, + "text_loss": 0.546875 + }, + { + "epoch": 0.18, + "learning_rate": 9.218671032918307e-06, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 2170, + "text_loss": 0.2578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.21797013901841e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 2171, + "text_loss": 0.62109375 + }, + { + "epoch": 0.18, + "learning_rate": 9.217268957559091e-06, + "loss": 0.6677, + "regression_loss": 0.0, + "step": 2172, + "text_loss": 0.61328125 + }, + { + "epoch": 0.18, + "learning_rate": 9.216567488588159e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 2173, + "text_loss": 0.79296875 + }, + { + "epoch": 0.18, + "learning_rate": 9.215865732153431e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 2174, + "text_loss": 0.369140625 + }, + { + "epoch": 0.18, + "learning_rate": 9.215163688302752e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 2175, + "text_loss": 0.67578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.214461357083986e-06, + "loss": 0.6458, + "regression_loss": 0.0, + "step": 2176, + "text_loss": 0.51953125 + }, + { + "epoch": 0.18, + "learning_rate": 9.213758738545011e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 2177, + "text_loss": 0.78515625 + }, + { + "epoch": 0.18, + "learning_rate": 9.213055832733727e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 2178, + "text_loss": 0.255859375 + }, + { + "epoch": 0.18, + "learning_rate": 9.212352639698056e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 2179, + "text_loss": 0.73046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.21164915948594e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 2180, + "text_loss": 0.6171875 + }, + { + "epoch": 0.18, + "learning_rate": 9.210945392145336e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 2181, + "text_loss": 0.42578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.210241337724224e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 2182, + "text_loss": 0.51953125 + }, + { + "epoch": 0.18, + "learning_rate": 9.209536996270604e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 2183, + "text_loss": 0.65234375 + }, + { + "epoch": 0.18, + "learning_rate": 9.208832367832492e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 2184, + "text_loss": 0.578125 + }, + { + "epoch": 0.18, + "learning_rate": 9.208127452457927e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 2185, + "text_loss": 0.423828125 + }, + { + "epoch": 0.18, + "learning_rate": 9.207422250194969e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 2186, + "text_loss": 0.5390625 + }, + { + "epoch": 0.18, + "learning_rate": 9.20671676109169e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 2187, + "text_loss": 0.74609375 + }, + { + "epoch": 0.18, + "learning_rate": 9.20601098519619e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 2188, + "text_loss": 0.5 + }, + { + "epoch": 0.18, + "learning_rate": 9.205304922556584e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 2189, + "text_loss": 0.447265625 + }, + { + "epoch": 0.18, + "learning_rate": 9.20459857322101e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 2190, + "text_loss": 0.255859375 + }, + { + "epoch": 0.18, + "learning_rate": 9.20389193723762e-06, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 2191, + "text_loss": 0.49609375 + }, + { + "epoch": 0.18, + "learning_rate": 9.203185014654589e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 2192, + "text_loss": 0.6484375 + }, + { + "epoch": 0.18, + "learning_rate": 9.202477805520116e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 2193, + "text_loss": 0.70703125 + }, + { + "epoch": 0.18, + "learning_rate": 9.20177030988241e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 2194, + "text_loss": 0.80078125 + }, + { + "epoch": 0.18, + "learning_rate": 9.201062527789704e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 2195, + "text_loss": 0.734375 + }, + { + "epoch": 0.18, + "learning_rate": 9.200354459290255e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 2196, + "text_loss": 0.37890625 + }, + { + "epoch": 0.18, + "learning_rate": 9.199646104432332e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 2197, + "text_loss": 0.6640625 + }, + { + "epoch": 0.18, + "learning_rate": 9.198937463264229e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 2198, + "text_loss": 0.65625 + }, + { + "epoch": 0.18, + "learning_rate": 9.198228535834255e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 2199, + "text_loss": 0.4765625 + }, + { + "epoch": 0.18, + "learning_rate": 9.197519322190745e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 2200, + "text_loss": 0.5078125 + }, + { + "epoch": 0.18, + "learning_rate": 9.196809822382045e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 2201, + "text_loss": 0.63671875 + }, + { + "epoch": 0.18, + "learning_rate": 9.196100036456528e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 2202, + "text_loss": 0.40625 + }, + { + "epoch": 0.18, + "learning_rate": 9.195389964462581e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 2203, + "text_loss": 0.796875 + }, + { + "epoch": 0.18, + "learning_rate": 9.194679606448618e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 2204, + "text_loss": 0.45703125 + }, + { + "epoch": 0.18, + "learning_rate": 9.193968962463065e-06, + "loss": 0.666, + "regression_loss": 0.0, + "step": 2205, + "text_loss": 0.40625 + }, + { + "epoch": 0.18, + "learning_rate": 9.193258032554367e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 2206, + "text_loss": 0.58203125 + }, + { + "epoch": 0.18, + "learning_rate": 9.192546816770992e-06, + "loss": 0.6423, + "regression_loss": 0.0, + "step": 2207, + "text_loss": 0.7109375 + }, + { + "epoch": 0.18, + "learning_rate": 9.191835315161432e-06, + "loss": 0.6465, + "regression_loss": 0.0, + "step": 2208, + "text_loss": 0.73828125 + }, + { + "epoch": 0.18, + "learning_rate": 9.19112352777419e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 2209, + "text_loss": 0.50390625 + }, + { + "epoch": 0.18, + "learning_rate": 9.190411454657793e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 2210, + "text_loss": 0.47265625 + }, + { + "epoch": 0.18, + "learning_rate": 9.189699095860785e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 2211, + "text_loss": 0.251953125 + }, + { + "epoch": 0.18, + "learning_rate": 9.188986451431732e-06, + "loss": 0.5509, + "regression_loss": 0.0, + "step": 2212, + "text_loss": 0.76953125 + }, + { + "epoch": 0.18, + "learning_rate": 9.18827352141922e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 2213, + "text_loss": 0.474609375 + }, + { + "epoch": 0.18, + "learning_rate": 9.187560305871848e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 2214, + "text_loss": 0.68359375 + }, + { + "epoch": 0.18, + "learning_rate": 9.186846804838244e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 2215, + "text_loss": 0.77734375 + }, + { + "epoch": 0.18, + "learning_rate": 9.18613301836705e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 2216, + "text_loss": 0.498046875 + }, + { + "epoch": 0.18, + "learning_rate": 9.185418946506926e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 2217, + "text_loss": 0.279296875 + }, + { + "epoch": 0.18, + "learning_rate": 9.184704589306557e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 2218, + "text_loss": 0.490234375 + }, + { + "epoch": 0.18, + "learning_rate": 9.18398994681464e-06, + "loss": 0.6392, + "regression_loss": 0.0, + "step": 2219, + "text_loss": 0.7109375 + }, + { + "epoch": 0.18, + "learning_rate": 9.1832750190799e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2220, + "text_loss": 0.74609375 + }, + { + "epoch": 0.18, + "learning_rate": 9.182559806151075e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 2221, + "text_loss": 0.546875 + }, + { + "epoch": 0.18, + "learning_rate": 9.181844308076924e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 2222, + "text_loss": 0.76171875 + }, + { + "epoch": 0.18, + "learning_rate": 9.181128524906228e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 2223, + "text_loss": 0.55078125 + }, + { + "epoch": 0.18, + "learning_rate": 9.180412456687784e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 2224, + "text_loss": 0.77734375 + }, + { + "epoch": 0.18, + "learning_rate": 9.17969610347041e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 2225, + "text_loss": 0.376953125 + }, + { + "epoch": 0.19, + "learning_rate": 9.178979465302942e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 2226, + "text_loss": 0.5546875 + }, + { + "epoch": 0.19, + "learning_rate": 9.178262542234237e-06, + "loss": 0.7261, + "regression_loss": 0.0, + "step": 2227, + "text_loss": 0.62109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.177545334313175e-06, + "loss": 0.6609, + "regression_loss": 0.0, + "step": 2228, + "text_loss": 0.310546875 + }, + { + "epoch": 0.19, + "learning_rate": 9.176827841588647e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 2229, + "text_loss": 0.212890625 + }, + { + "epoch": 0.19, + "learning_rate": 9.17611006410957e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 2230, + "text_loss": 0.416015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.175392001924877e-06, + "loss": 0.6062, + "regression_loss": 0.0, + "step": 2231, + "text_loss": 0.75390625 + }, + { + "epoch": 0.19, + "learning_rate": 9.174673655083523e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 2232, + "text_loss": 0.4296875 + }, + { + "epoch": 0.19, + "learning_rate": 9.17395502363448e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 2233, + "text_loss": 0.625 + }, + { + "epoch": 0.19, + "learning_rate": 9.173236107626741e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 2234, + "text_loss": 0.77734375 + }, + { + "epoch": 0.19, + "learning_rate": 9.172516907109321e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 2235, + "text_loss": 0.671875 + }, + { + "epoch": 0.19, + "learning_rate": 9.171797422131247e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 2236, + "text_loss": 0.65234375 + }, + { + "epoch": 0.19, + "learning_rate": 9.171077652741571e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 2237, + "text_loss": 0.66796875 + }, + { + "epoch": 0.19, + "learning_rate": 9.170357598989366e-06, + "loss": 0.4801, + "regression_loss": 0.0, + "step": 2238, + "text_loss": 0.55859375 + }, + { + "epoch": 0.19, + "learning_rate": 9.169637260923718e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 2239, + "text_loss": 0.375 + }, + { + "epoch": 0.19, + "learning_rate": 9.168916638593736e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 2240, + "text_loss": 0.49609375 + }, + { + "epoch": 0.19, + "learning_rate": 9.168195732048551e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 2241, + "text_loss": 0.357421875 + }, + { + "epoch": 0.19, + "learning_rate": 9.167474541337307e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 2242, + "text_loss": 0.59765625 + }, + { + "epoch": 0.19, + "learning_rate": 9.166753066509176e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 2243, + "text_loss": 0.44921875 + }, + { + "epoch": 0.19, + "learning_rate": 9.166031307613338e-06, + "loss": 0.7163, + "regression_loss": 0.0, + "step": 2244, + "text_loss": 0.56640625 + }, + { + "epoch": 0.19, + "learning_rate": 9.165309264699005e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 2245, + "text_loss": 0.408203125 + }, + { + "epoch": 0.19, + "learning_rate": 9.164586937815398e-06, + "loss": 0.6, + "regression_loss": 0.0, + "step": 2246, + "text_loss": 0.7578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.163864327011764e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 2247, + "text_loss": 0.494140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.163141432337367e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2248, + "text_loss": 0.58203125 + }, + { + "epoch": 0.19, + "learning_rate": 9.162418253841488e-06, + "loss": 0.4623, + "regression_loss": 0.0, + "step": 2249, + "text_loss": 0.51953125 + }, + { + "epoch": 0.19, + "learning_rate": 9.16169479157343e-06, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 2250, + "text_loss": 0.37109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.160971045582517e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 2251, + "text_loss": 0.62890625 + }, + { + "epoch": 0.19, + "learning_rate": 9.16024701591809e-06, + "loss": 0.7693, + "regression_loss": 0.0, + "step": 2252, + "text_loss": 0.59375 + }, + { + "epoch": 0.19, + "learning_rate": 9.159522702629506e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 2253, + "text_loss": 0.7578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.158798105766149e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 2254, + "text_loss": 0.53515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.158073225377416e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 2255, + "text_loss": 0.44921875 + }, + { + "epoch": 0.19, + "learning_rate": 9.157348061512728e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 2256, + "text_loss": 0.7421875 + }, + { + "epoch": 0.19, + "learning_rate": 9.15662261422152e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 2257, + "text_loss": 0.3125 + }, + { + "epoch": 0.19, + "learning_rate": 9.15589688355325e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 2258, + "text_loss": 0.33203125 + }, + { + "epoch": 0.19, + "learning_rate": 9.155170869557394e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 2259, + "text_loss": 0.353515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.154444572283452e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 2260, + "text_loss": 0.5703125 + }, + { + "epoch": 0.19, + "learning_rate": 9.153717991780935e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 2261, + "text_loss": 0.37109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.152991128099378e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 2262, + "text_loss": 0.74609375 + }, + { + "epoch": 0.19, + "learning_rate": 9.152263981288336e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 2263, + "text_loss": 0.61328125 + }, + { + "epoch": 0.19, + "learning_rate": 9.15153655139738e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 2264, + "text_loss": 0.578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.150808838476106e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 2265, + "text_loss": 0.578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.150080842574124e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 2266, + "text_loss": 0.33203125 + }, + { + "epoch": 0.19, + "learning_rate": 9.149352563741063e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 2267, + "text_loss": 0.443359375 + }, + { + "epoch": 0.19, + "learning_rate": 9.148624002026575e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 2268, + "text_loss": 0.8203125 + }, + { + "epoch": 0.19, + "learning_rate": 9.14789515748033e-06, + "loss": 0.6836, + "regression_loss": 0.0, + "step": 2269, + "text_loss": 0.67578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.14716603015202e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 2270, + "text_loss": 0.5546875 + }, + { + "epoch": 0.19, + "learning_rate": 9.146436620091346e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 2271, + "text_loss": 0.5 + }, + { + "epoch": 0.19, + "learning_rate": 9.14570692734804e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 2272, + "text_loss": 0.443359375 + }, + { + "epoch": 0.19, + "learning_rate": 9.144976951971846e-06, + "loss": 0.6104, + "regression_loss": 0.0, + "step": 2273, + "text_loss": 0.73828125 + }, + { + "epoch": 0.19, + "learning_rate": 9.144246694012533e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 2274, + "text_loss": 0.75 + }, + { + "epoch": 0.19, + "learning_rate": 9.143516153519885e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 2275, + "text_loss": 0.57421875 + }, + { + "epoch": 0.19, + "learning_rate": 9.142785330543707e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2276, + "text_loss": 0.439453125 + }, + { + "epoch": 0.19, + "learning_rate": 9.142054225133821e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 2277, + "text_loss": 0.546875 + }, + { + "epoch": 0.19, + "learning_rate": 9.14132283734007e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 2278, + "text_loss": 0.80078125 + }, + { + "epoch": 0.19, + "learning_rate": 9.140591167212319e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 2279, + "text_loss": 0.8515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.139859214800445e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 2280, + "text_loss": 0.421875 + }, + { + "epoch": 0.19, + "learning_rate": 9.139126980154356e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 2281, + "text_loss": 0.6796875 + }, + { + "epoch": 0.19, + "learning_rate": 9.138394463323965e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 2282, + "text_loss": 0.447265625 + }, + { + "epoch": 0.19, + "learning_rate": 9.137661664359214e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 2283, + "text_loss": 0.5 + }, + { + "epoch": 0.19, + "learning_rate": 9.136928583310061e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 2284, + "text_loss": 0.498046875 + }, + { + "epoch": 0.19, + "learning_rate": 9.136195220226485e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 2285, + "text_loss": 0.56640625 + }, + { + "epoch": 0.19, + "learning_rate": 9.135461575158479e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 2286, + "text_loss": 0.75390625 + }, + { + "epoch": 0.19, + "learning_rate": 9.134727648156064e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 2287, + "text_loss": 0.37109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.133993439269272e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 2288, + "text_loss": 0.494140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.13325894854816e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 2289, + "text_loss": 0.58984375 + }, + { + "epoch": 0.19, + "learning_rate": 9.132524176042801e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 2290, + "text_loss": 0.52734375 + }, + { + "epoch": 0.19, + "learning_rate": 9.131789121803286e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 2291, + "text_loss": 0.47265625 + }, + { + "epoch": 0.19, + "learning_rate": 9.13105378587973e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 2292, + "text_loss": 0.59375 + }, + { + "epoch": 0.19, + "learning_rate": 9.130318168322262e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 2293, + "text_loss": 0.62109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.129582269181034e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 2294, + "text_loss": 1.03125 + }, + { + "epoch": 0.19, + "learning_rate": 9.128846088506217e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 2295, + "text_loss": 0.71875 + }, + { + "epoch": 0.19, + "learning_rate": 9.128109626347997e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 2296, + "text_loss": 0.5859375 + }, + { + "epoch": 0.19, + "learning_rate": 9.127372882756584e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 2297, + "text_loss": 0.64453125 + }, + { + "epoch": 0.19, + "learning_rate": 9.126635857782206e-06, + "loss": 0.6426, + "regression_loss": 0.0, + "step": 2298, + "text_loss": 0.37890625 + }, + { + "epoch": 0.19, + "learning_rate": 9.125898551475108e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 2299, + "text_loss": 0.6171875 + }, + { + "epoch": 0.19, + "learning_rate": 9.125160963885558e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 2300, + "text_loss": 0.703125 + }, + { + "epoch": 0.19, + "learning_rate": 9.124423095063837e-06, + "loss": 0.6648, + "regression_loss": 0.0, + "step": 2301, + "text_loss": 0.5390625 + }, + { + "epoch": 0.19, + "learning_rate": 9.123684945060253e-06, + "loss": 0.6875, + "regression_loss": 0.0, + "step": 2302, + "text_loss": 0.71875 + }, + { + "epoch": 0.19, + "learning_rate": 9.122946513925128e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 2303, + "text_loss": 0.341796875 + }, + { + "epoch": 0.19, + "learning_rate": 9.122207801708802e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 2304, + "text_loss": 0.392578125 + }, + { + "epoch": 0.19, + "learning_rate": 9.12146880846164e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 2305, + "text_loss": 0.4296875 + }, + { + "epoch": 0.19, + "learning_rate": 9.120729534234022e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 2306, + "text_loss": 0.65625 + }, + { + "epoch": 0.19, + "learning_rate": 9.119989979076347e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 2307, + "text_loss": 0.365234375 + }, + { + "epoch": 0.19, + "learning_rate": 9.119250143039034e-06, + "loss": 0.6733, + "regression_loss": 0.0, + "step": 2308, + "text_loss": 0.60546875 + }, + { + "epoch": 0.19, + "learning_rate": 9.118510026172522e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 2309, + "text_loss": 0.59375 + }, + { + "epoch": 0.19, + "learning_rate": 9.117769628527266e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 2310, + "text_loss": 0.69140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.117028950153746e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 2311, + "text_loss": 0.55859375 + }, + { + "epoch": 0.19, + "learning_rate": 9.116287991102458e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 2312, + "text_loss": 0.625 + }, + { + "epoch": 0.19, + "learning_rate": 9.115546751423912e-06, + "loss": 0.6145, + "regression_loss": 0.0, + "step": 2313, + "text_loss": 0.490234375 + }, + { + "epoch": 0.19, + "learning_rate": 9.114805231168646e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 2314, + "text_loss": 0.62109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.11406343038721e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 2315, + "text_loss": 0.54296875 + }, + { + "epoch": 0.19, + "learning_rate": 9.11332134913018e-06, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 2316, + "text_loss": 0.369140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.112578987448145e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 2317, + "text_loss": 0.369140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.111836345391714e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 2318, + "text_loss": 0.3515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.111093423011519e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 2319, + "text_loss": 0.296875 + }, + { + "epoch": 0.19, + "learning_rate": 9.110350220358208e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 2320, + "text_loss": 0.6640625 + }, + { + "epoch": 0.19, + "learning_rate": 9.109606737482449e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 2321, + "text_loss": 0.5859375 + }, + { + "epoch": 0.19, + "learning_rate": 9.108862974434925e-06, + "loss": 0.6597, + "regression_loss": 0.0, + "step": 2322, + "text_loss": 0.515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.108118931266348e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 2323, + "text_loss": 0.66015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.10737460802744e-06, + "loss": 0.5258, + "regression_loss": 0.0, + "step": 2324, + "text_loss": 0.28515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.106630004768943e-06, + "loss": 0.5421, + "regression_loss": 0.0, + "step": 2325, + "text_loss": 0.455078125 + }, + { + "epoch": 0.19, + "learning_rate": 9.105885121541624e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 2326, + "text_loss": 0.4609375 + }, + { + "epoch": 0.19, + "learning_rate": 9.105139958396264e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2327, + "text_loss": 0.6171875 + }, + { + "epoch": 0.19, + "learning_rate": 9.104394515383664e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 2328, + "text_loss": 0.6015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.103648792554644e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 2329, + "text_loss": 0.66015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.102902789960044e-06, + "loss": 0.6616, + "regression_loss": 0.0, + "step": 2330, + "text_loss": 0.66015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.102156507650723e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 2331, + "text_loss": 0.55078125 + }, + { + "epoch": 0.19, + "learning_rate": 9.101409945677558e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 2332, + "text_loss": 0.51171875 + }, + { + "epoch": 0.19, + "learning_rate": 9.100663104091446e-06, + "loss": 0.4347, + "regression_loss": 0.0, + "step": 2333, + "text_loss": 0.314453125 + }, + { + "epoch": 0.19, + "learning_rate": 9.099915982943304e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 2334, + "text_loss": 0.62109375 + }, + { + "epoch": 0.19, + "learning_rate": 9.099168582284065e-06, + "loss": 0.4258, + "regression_loss": 0.0, + "step": 2335, + "text_loss": 0.494140625 + }, + { + "epoch": 0.19, + "learning_rate": 9.098420902164684e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 2336, + "text_loss": 0.6015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.097672942636132e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 2337, + "text_loss": 0.65625 + }, + { + "epoch": 0.19, + "learning_rate": 9.096924703749406e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 2338, + "text_loss": 0.640625 + }, + { + "epoch": 0.19, + "learning_rate": 9.09617618555551e-06, + "loss": 0.6445, + "regression_loss": 0.0, + "step": 2339, + "text_loss": 0.74609375 + }, + { + "epoch": 0.19, + "learning_rate": 9.09542738810548e-06, + "loss": 0.4294, + "regression_loss": 0.0, + "step": 2340, + "text_loss": 0.59375 + }, + { + "epoch": 0.19, + "learning_rate": 9.094678311450363e-06, + "loss": 0.6318, + "regression_loss": 0.0, + "step": 2341, + "text_loss": 0.6015625 + }, + { + "epoch": 0.19, + "learning_rate": 9.093928955641227e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 2342, + "text_loss": 0.6328125 + }, + { + "epoch": 0.19, + "learning_rate": 9.09317932072916e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 2343, + "text_loss": 0.349609375 + }, + { + "epoch": 0.19, + "learning_rate": 9.092429406765266e-06, + "loss": 0.6279, + "regression_loss": 0.0, + "step": 2344, + "text_loss": 0.28515625 + }, + { + "epoch": 0.19, + "learning_rate": 9.091679213800673e-06, + "loss": 0.4502, + "regression_loss": 0.0, + "step": 2345, + "text_loss": 0.251953125 + }, + { + "epoch": 0.19, + "learning_rate": 9.090928741886524e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 2346, + "text_loss": 0.61328125 + }, + { + "epoch": 0.2, + "learning_rate": 9.090177991073982e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 2347, + "text_loss": 0.76953125 + }, + { + "epoch": 0.2, + "learning_rate": 9.08942696141423e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 2348, + "text_loss": 0.734375 + }, + { + "epoch": 0.2, + "learning_rate": 9.08867565295847e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 2349, + "text_loss": 0.3203125 + }, + { + "epoch": 0.2, + "learning_rate": 9.08792406575792e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 2350, + "text_loss": 0.55078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.08717219986382e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 2351, + "text_loss": 0.451171875 + }, + { + "epoch": 0.2, + "learning_rate": 9.086420055327431e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 2352, + "text_loss": 0.77734375 + }, + { + "epoch": 0.2, + "learning_rate": 9.085667632200028e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2353, + "text_loss": 0.423828125 + }, + { + "epoch": 0.2, + "learning_rate": 9.084914930532906e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 2354, + "text_loss": 0.490234375 + }, + { + "epoch": 0.2, + "learning_rate": 9.084161950377384e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 2355, + "text_loss": 0.55078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.083408691784793e-06, + "loss": 0.5668, + "regression_loss": 0.0, + "step": 2356, + "text_loss": 0.6796875 + }, + { + "epoch": 0.2, + "learning_rate": 9.08265515480649e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 2357, + "text_loss": 0.46484375 + }, + { + "epoch": 0.2, + "learning_rate": 9.081901339493842e-06, + "loss": 0.5682, + "regression_loss": 0.0, + "step": 2358, + "text_loss": 0.75390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.081147245898246e-06, + "loss": 0.7012, + "regression_loss": 0.0, + "step": 2359, + "text_loss": 0.82421875 + }, + { + "epoch": 0.2, + "learning_rate": 9.080392874071106e-06, + "loss": 0.6296, + "regression_loss": 0.0, + "step": 2360, + "text_loss": 0.41796875 + }, + { + "epoch": 0.2, + "learning_rate": 9.079638224063856e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 2361, + "text_loss": 0.388671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.078883295927942e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 2362, + "text_loss": 0.3671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.078128089714833e-06, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 2363, + "text_loss": 0.478515625 + }, + { + "epoch": 0.2, + "learning_rate": 9.077372605476014e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 2364, + "text_loss": 0.2265625 + }, + { + "epoch": 0.2, + "learning_rate": 9.07661684326299e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 2365, + "text_loss": 0.6875 + }, + { + "epoch": 0.2, + "learning_rate": 9.075860803127284e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 2366, + "text_loss": 0.6875 + }, + { + "epoch": 0.2, + "learning_rate": 9.075104485120438e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 2367, + "text_loss": 0.83203125 + }, + { + "epoch": 0.2, + "learning_rate": 9.074347889294017e-06, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 2368, + "text_loss": 0.326171875 + }, + { + "epoch": 0.2, + "learning_rate": 9.073591015699601e-06, + "loss": 0.6248, + "regression_loss": 0.0, + "step": 2369, + "text_loss": 0.5078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.072833864388789e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 2370, + "text_loss": 0.890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.072076435413199e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 2371, + "text_loss": 0.5 + }, + { + "epoch": 0.2, + "learning_rate": 9.07131872882447e-06, + "loss": 0.5734, + "regression_loss": 0.0, + "step": 2372, + "text_loss": 0.462890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.070560744674257e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 2373, + "text_loss": 0.62109375 + }, + { + "epoch": 0.2, + "learning_rate": 9.069802483014236e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 2374, + "text_loss": 0.87890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.069043943896103e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 2375, + "text_loss": 0.498046875 + }, + { + "epoch": 0.2, + "learning_rate": 9.06828512737157e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 2376, + "text_loss": 0.609375 + }, + { + "epoch": 0.2, + "learning_rate": 9.067526033492369e-06, + "loss": 0.6372, + "regression_loss": 0.0, + "step": 2377, + "text_loss": 0.431640625 + }, + { + "epoch": 0.2, + "learning_rate": 9.066766662310248e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 2378, + "text_loss": 0.80078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.066007013876985e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 2379, + "text_loss": 0.62890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.065247088244363e-06, + "loss": 0.6624, + "regression_loss": 0.0, + "step": 2380, + "text_loss": 0.8671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.064486885464188e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 2381, + "text_loss": 0.5390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.063726405588292e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 2382, + "text_loss": 0.6015625 + }, + { + "epoch": 0.2, + "learning_rate": 9.062965648668518e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 2383, + "text_loss": 0.671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.062204614756732e-06, + "loss": 0.6519, + "regression_loss": 0.0, + "step": 2384, + "text_loss": 0.74609375 + }, + { + "epoch": 0.2, + "learning_rate": 9.061443303904814e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 2385, + "text_loss": 0.275390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.060681716164667e-06, + "loss": 0.5175, + "regression_loss": 0.0, + "step": 2386, + "text_loss": 0.65625 + }, + { + "epoch": 0.2, + "learning_rate": 9.059919851588217e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 2387, + "text_loss": 0.5234375 + }, + { + "epoch": 0.2, + "learning_rate": 9.059157710227398e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 2388, + "text_loss": 0.62109375 + }, + { + "epoch": 0.2, + "learning_rate": 9.058395292134172e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 2389, + "text_loss": 0.51171875 + }, + { + "epoch": 0.2, + "learning_rate": 9.057632597360517e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 2390, + "text_loss": 0.458984375 + }, + { + "epoch": 0.2, + "learning_rate": 9.056869625958427e-06, + "loss": 0.5309, + "regression_loss": 0.0, + "step": 2391, + "text_loss": 0.466796875 + }, + { + "epoch": 0.2, + "learning_rate": 9.056106377979919e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 2392, + "text_loss": 0.435546875 + }, + { + "epoch": 0.2, + "learning_rate": 9.05534285347703e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 2393, + "text_loss": 0.75390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.054579052501808e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 2394, + "text_loss": 0.63671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.053814975106327e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 2395, + "text_loss": 0.341796875 + }, + { + "epoch": 0.2, + "learning_rate": 9.05305062134268e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 2396, + "text_loss": 0.50390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.052285991262975e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 2397, + "text_loss": 0.55859375 + }, + { + "epoch": 0.2, + "learning_rate": 9.05152108491934e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 2398, + "text_loss": 0.50390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.050755902363923e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 2399, + "text_loss": 0.56640625 + }, + { + "epoch": 0.2, + "learning_rate": 9.04999044364889e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 2400, + "text_loss": 0.5546875 + }, + { + "epoch": 0.2, + "learning_rate": 9.049224708826427e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 2401, + "text_loss": 0.75 + }, + { + "epoch": 0.2, + "learning_rate": 9.048458697948736e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 2402, + "text_loss": 0.63671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.047692411068042e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 2403, + "text_loss": 0.349609375 + }, + { + "epoch": 0.2, + "learning_rate": 9.046925848236584e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 2404, + "text_loss": 0.345703125 + }, + { + "epoch": 0.2, + "learning_rate": 9.046159009506623e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 2405, + "text_loss": 0.45703125 + }, + { + "epoch": 0.2, + "learning_rate": 9.04539189493044e-06, + "loss": 0.6377, + "regression_loss": 0.0, + "step": 2406, + "text_loss": 0.82421875 + }, + { + "epoch": 0.2, + "learning_rate": 9.04462450456033e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 2407, + "text_loss": 0.357421875 + }, + { + "epoch": 0.2, + "learning_rate": 9.043856838448612e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 2408, + "text_loss": 0.56640625 + }, + { + "epoch": 0.2, + "learning_rate": 9.043088896647621e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 2409, + "text_loss": 0.546875 + }, + { + "epoch": 0.2, + "learning_rate": 9.042320679209711e-06, + "loss": 0.6572, + "regression_loss": 0.0, + "step": 2410, + "text_loss": 0.703125 + }, + { + "epoch": 0.2, + "learning_rate": 9.041552186187254e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 2411, + "text_loss": 0.57421875 + }, + { + "epoch": 0.2, + "learning_rate": 9.040783417632644e-06, + "loss": 0.5262, + "regression_loss": 0.0, + "step": 2412, + "text_loss": 0.7265625 + }, + { + "epoch": 0.2, + "learning_rate": 9.040014373598291e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 2413, + "text_loss": 0.396484375 + }, + { + "epoch": 0.2, + "learning_rate": 9.039245054136623e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 2414, + "text_loss": 0.6796875 + }, + { + "epoch": 0.2, + "learning_rate": 9.038475459300092e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 2415, + "text_loss": 0.59375 + }, + { + "epoch": 0.2, + "learning_rate": 9.03770558914116e-06, + "loss": 0.438, + "regression_loss": 0.0, + "step": 2416, + "text_loss": 0.56640625 + }, + { + "epoch": 0.2, + "learning_rate": 9.036935443712313e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 2417, + "text_loss": 0.64453125 + }, + { + "epoch": 0.2, + "learning_rate": 9.036165023066062e-06, + "loss": 0.5813, + "regression_loss": 0.0, + "step": 2418, + "text_loss": 0.478515625 + }, + { + "epoch": 0.2, + "learning_rate": 9.035394327254923e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 2419, + "text_loss": 0.87109375 + }, + { + "epoch": 0.2, + "learning_rate": 9.034623356331443e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 2420, + "text_loss": 0.40625 + }, + { + "epoch": 0.2, + "learning_rate": 9.033852110348178e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 2421, + "text_loss": 0.53515625 + }, + { + "epoch": 0.2, + "learning_rate": 9.033080589357712e-06, + "loss": 0.5114, + "regression_loss": 0.0, + "step": 2422, + "text_loss": 0.7890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.032308793412643e-06, + "loss": 0.6465, + "regression_loss": 0.0, + "step": 2423, + "text_loss": 0.625 + }, + { + "epoch": 0.2, + "learning_rate": 9.031536722565584e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 2424, + "text_loss": 0.93359375 + }, + { + "epoch": 0.2, + "learning_rate": 9.030764376869176e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 2425, + "text_loss": 0.65234375 + }, + { + "epoch": 0.2, + "learning_rate": 9.02999175637607e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 2426, + "text_loss": 0.59375 + }, + { + "epoch": 0.2, + "learning_rate": 9.02921886113894e-06, + "loss": 0.6675, + "regression_loss": 0.0, + "step": 2427, + "text_loss": 0.85546875 + }, + { + "epoch": 0.2, + "learning_rate": 9.02844569121048e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 2428, + "text_loss": 0.54296875 + }, + { + "epoch": 0.2, + "learning_rate": 9.027672246643399e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 2429, + "text_loss": 0.6171875 + }, + { + "epoch": 0.2, + "learning_rate": 9.026898527490425e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 2430, + "text_loss": 0.458984375 + }, + { + "epoch": 0.2, + "learning_rate": 9.026124533804308e-06, + "loss": 0.6257, + "regression_loss": 0.0, + "step": 2431, + "text_loss": 0.47265625 + }, + { + "epoch": 0.2, + "learning_rate": 9.025350265637816e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 2432, + "text_loss": 0.50390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.02457572304373e-06, + "loss": 0.6545, + "regression_loss": 0.0, + "step": 2433, + "text_loss": 0.439453125 + }, + { + "epoch": 0.2, + "learning_rate": 9.023800906074862e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 2434, + "text_loss": 0.63671875 + }, + { + "epoch": 0.2, + "learning_rate": 9.023025814784028e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 2435, + "text_loss": 0.408203125 + }, + { + "epoch": 0.2, + "learning_rate": 9.022250449224073e-06, + "loss": 0.6738, + "regression_loss": 0.0, + "step": 2436, + "text_loss": 0.296875 + }, + { + "epoch": 0.2, + "learning_rate": 9.021474809447856e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 2437, + "text_loss": 0.490234375 + }, + { + "epoch": 0.2, + "learning_rate": 9.020698895508258e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 2438, + "text_loss": 0.55078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.019922707458175e-06, + "loss": 0.609, + "regression_loss": 0.0, + "step": 2439, + "text_loss": 0.62890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.019146245350524e-06, + "loss": 0.4955, + "regression_loss": 0.0, + "step": 2440, + "text_loss": 0.40625 + }, + { + "epoch": 0.2, + "learning_rate": 9.018369509238241e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 2441, + "text_loss": 0.283203125 + }, + { + "epoch": 0.2, + "learning_rate": 9.017592499174278e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 2442, + "text_loss": 0.59765625 + }, + { + "epoch": 0.2, + "learning_rate": 9.01681521521161e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 2443, + "text_loss": 0.5390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.016037657403225e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 2444, + "text_loss": 0.482421875 + }, + { + "epoch": 0.2, + "learning_rate": 9.015259825802137e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 2445, + "text_loss": 0.37890625 + }, + { + "epoch": 0.2, + "learning_rate": 9.014481720461369e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 2446, + "text_loss": 0.369140625 + }, + { + "epoch": 0.2, + "learning_rate": 9.013703341433975e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 2447, + "text_loss": 0.453125 + }, + { + "epoch": 0.2, + "learning_rate": 9.012924688773017e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 2448, + "text_loss": 0.73046875 + }, + { + "epoch": 0.2, + "learning_rate": 9.01214576253158e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 2449, + "text_loss": 0.62109375 + }, + { + "epoch": 0.2, + "learning_rate": 9.011366562762764e-06, + "loss": 0.6455, + "regression_loss": 0.0, + "step": 2450, + "text_loss": 0.94140625 + }, + { + "epoch": 0.2, + "learning_rate": 9.010587089519696e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 2451, + "text_loss": 0.78125 + }, + { + "epoch": 0.2, + "learning_rate": 9.009807342855515e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 2452, + "text_loss": 0.50390625 + }, + { + "epoch": 0.2, + "learning_rate": 9.009027322823379e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 2453, + "text_loss": 0.55859375 + }, + { + "epoch": 0.2, + "learning_rate": 9.008247029476467e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 2454, + "text_loss": 0.2578125 + }, + { + "epoch": 0.2, + "learning_rate": 9.007466462867973e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 2455, + "text_loss": 0.609375 + }, + { + "epoch": 0.2, + "learning_rate": 9.006685623051114e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 2456, + "text_loss": 0.765625 + }, + { + "epoch": 0.2, + "learning_rate": 9.005904510079123e-06, + "loss": 0.5402, + "regression_loss": 0.0, + "step": 2457, + "text_loss": 0.6875 + }, + { + "epoch": 0.2, + "learning_rate": 9.005123124005253e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 2458, + "text_loss": 0.7578125 + }, + { + "epoch": 0.2, + "learning_rate": 9.004341464882773e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 2459, + "text_loss": 0.59765625 + }, + { + "epoch": 0.2, + "learning_rate": 9.003559532764976e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 2460, + "text_loss": 0.416015625 + }, + { + "epoch": 0.2, + "learning_rate": 9.002777327705165e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 2461, + "text_loss": 0.384765625 + }, + { + "epoch": 0.2, + "learning_rate": 9.001994849756673e-06, + "loss": 0.6362, + "regression_loss": 0.0, + "step": 2462, + "text_loss": 1.0078125 + }, + { + "epoch": 0.2, + "learning_rate": 9.001212098972841e-06, + "loss": 0.6482, + "regression_loss": 0.0, + "step": 2463, + "text_loss": 0.79296875 + }, + { + "epoch": 0.2, + "learning_rate": 9.00042907540703e-06, + "loss": 0.6372, + "regression_loss": 0.0, + "step": 2464, + "text_loss": 0.6328125 + }, + { + "epoch": 0.2, + "learning_rate": 8.999645779112632e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 2465, + "text_loss": 0.6484375 + }, + { + "epoch": 0.2, + "learning_rate": 8.998862210143037e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 2466, + "text_loss": 0.357421875 + }, + { + "epoch": 0.21, + "learning_rate": 8.998078368551673e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 2467, + "text_loss": 0.7578125 + }, + { + "epoch": 0.21, + "learning_rate": 8.997294254391976e-06, + "loss": 0.5157, + "regression_loss": 0.0, + "step": 2468, + "text_loss": 0.6640625 + }, + { + "epoch": 0.21, + "learning_rate": 8.9965098677174e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 2469, + "text_loss": 0.65234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.995725208581426e-06, + "loss": 0.6838, + "regression_loss": 0.0, + "step": 2470, + "text_loss": 0.53125 + }, + { + "epoch": 0.21, + "learning_rate": 8.99494027703754e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 2471, + "text_loss": 1.15625 + }, + { + "epoch": 0.21, + "learning_rate": 8.994155073139262e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 2472, + "text_loss": 0.52734375 + }, + { + "epoch": 0.21, + "learning_rate": 8.99336959694012e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 2473, + "text_loss": 0.423828125 + }, + { + "epoch": 0.21, + "learning_rate": 8.992583848493665e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 2474, + "text_loss": 0.83203125 + }, + { + "epoch": 0.21, + "learning_rate": 8.991797827853463e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 2475, + "text_loss": 0.5234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.991011535073102e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 2476, + "text_loss": 0.3203125 + }, + { + "epoch": 0.21, + "learning_rate": 8.99022497020619e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 2477, + "text_loss": 0.423828125 + }, + { + "epoch": 0.21, + "learning_rate": 8.989438133306345e-06, + "loss": 0.6699, + "regression_loss": 0.0, + "step": 2478, + "text_loss": 0.5546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.988651024427216e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 2479, + "text_loss": 0.515625 + }, + { + "epoch": 0.21, + "learning_rate": 8.987863643622457e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 2480, + "text_loss": 0.58984375 + }, + { + "epoch": 0.21, + "learning_rate": 8.987075990945755e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 2481, + "text_loss": 0.65625 + }, + { + "epoch": 0.21, + "learning_rate": 8.986288066450803e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 2482, + "text_loss": 0.6171875 + }, + { + "epoch": 0.21, + "learning_rate": 8.98549987019132e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 2483, + "text_loss": 0.5390625 + }, + { + "epoch": 0.21, + "learning_rate": 8.98471140222104e-06, + "loss": 0.7273, + "regression_loss": 0.0, + "step": 2484, + "text_loss": 1.5859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.983922662593717e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 2485, + "text_loss": 0.70703125 + }, + { + "epoch": 0.21, + "learning_rate": 8.983133651363125e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 2486, + "text_loss": 0.4453125 + }, + { + "epoch": 0.21, + "learning_rate": 8.982344368583052e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 2487, + "text_loss": 0.6171875 + }, + { + "epoch": 0.21, + "learning_rate": 8.981554814307307e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 2488, + "text_loss": 0.51953125 + }, + { + "epoch": 0.21, + "learning_rate": 8.980764988589719e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 2489, + "text_loss": 0.73828125 + }, + { + "epoch": 0.21, + "learning_rate": 8.979974891484134e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 2490, + "text_loss": 0.3046875 + }, + { + "epoch": 0.21, + "learning_rate": 8.979184523044419e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 2491, + "text_loss": 0.5859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.978393883324453e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 2492, + "text_loss": 0.439453125 + }, + { + "epoch": 0.21, + "learning_rate": 8.97760297237814e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 2493, + "text_loss": 0.291015625 + }, + { + "epoch": 0.21, + "learning_rate": 8.9768117902594e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 2494, + "text_loss": 0.703125 + }, + { + "epoch": 0.21, + "learning_rate": 8.976020337022175e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 2495, + "text_loss": 0.494140625 + }, + { + "epoch": 0.21, + "learning_rate": 8.975228612720415e-06, + "loss": 0.6279, + "regression_loss": 0.0, + "step": 2496, + "text_loss": 0.45703125 + }, + { + "epoch": 0.21, + "learning_rate": 8.974436617408102e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 2497, + "text_loss": 0.77734375 + }, + { + "epoch": 0.21, + "learning_rate": 8.973644351139228e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 2498, + "text_loss": 0.490234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.972851813967804e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 2499, + "text_loss": 0.60546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.972059005947862e-06, + "loss": 0.6755, + "regression_loss": 0.0, + "step": 2500, + "text_loss": 0.640625 + }, + { + "epoch": 0.21, + "learning_rate": 8.971265927133454e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 2501, + "text_loss": 0.37109375 + }, + { + "epoch": 0.21, + "learning_rate": 8.970472577578644e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 2502, + "text_loss": 0.4140625 + }, + { + "epoch": 0.21, + "learning_rate": 8.969678957337523e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 2503, + "text_loss": 0.484375 + }, + { + "epoch": 0.21, + "learning_rate": 8.968885066464192e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 2504, + "text_loss": 0.5078125 + }, + { + "epoch": 0.21, + "learning_rate": 8.968090905012777e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 2505, + "text_loss": 0.458984375 + }, + { + "epoch": 0.21, + "learning_rate": 8.967296473037417e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2506, + "text_loss": 0.494140625 + }, + { + "epoch": 0.21, + "learning_rate": 8.966501770592275e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 2507, + "text_loss": 0.5859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.965706797731528e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 2508, + "text_loss": 0.40234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.964911554509375e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 2509, + "text_loss": 0.80859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.96411604098003e-06, + "loss": 0.6421, + "regression_loss": 0.0, + "step": 2510, + "text_loss": 0.62109375 + }, + { + "epoch": 0.21, + "learning_rate": 8.963320257197727e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 2511, + "text_loss": 0.7890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.96252420321672e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 2512, + "text_loss": 0.52734375 + }, + { + "epoch": 0.21, + "learning_rate": 8.961727879091277e-06, + "loss": 0.7275, + "regression_loss": 0.0, + "step": 2513, + "text_loss": 0.66015625 + }, + { + "epoch": 0.21, + "learning_rate": 8.96093128487569e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 2514, + "text_loss": 0.412109375 + }, + { + "epoch": 0.21, + "learning_rate": 8.960134420624267e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 2515, + "text_loss": 0.419921875 + }, + { + "epoch": 0.21, + "learning_rate": 8.95933728639133e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 2516, + "text_loss": 0.625 + }, + { + "epoch": 0.21, + "learning_rate": 8.958539882231228e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 2517, + "text_loss": 0.53125 + }, + { + "epoch": 0.21, + "learning_rate": 8.95774220819832e-06, + "loss": 0.5858, + "regression_loss": 0.0, + "step": 2518, + "text_loss": 0.828125 + }, + { + "epoch": 0.21, + "learning_rate": 8.956944264346992e-06, + "loss": 0.6575, + "regression_loss": 0.0, + "step": 2519, + "text_loss": 0.58984375 + }, + { + "epoch": 0.21, + "learning_rate": 8.95614605073164e-06, + "loss": 0.4111, + "regression_loss": 0.0, + "step": 2520, + "text_loss": 0.28125 + }, + { + "epoch": 0.21, + "learning_rate": 8.955347567406682e-06, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 2521, + "text_loss": 0.462890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.954548814426558e-06, + "loss": 0.4633, + "regression_loss": 0.0, + "step": 2522, + "text_loss": 0.59375 + }, + { + "epoch": 0.21, + "learning_rate": 8.95374979184572e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 2523, + "text_loss": 0.451171875 + }, + { + "epoch": 0.21, + "learning_rate": 8.95295049971864e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 2524, + "text_loss": 0.66796875 + }, + { + "epoch": 0.21, + "learning_rate": 8.952150938099813e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 2525, + "text_loss": 0.4765625 + }, + { + "epoch": 0.21, + "learning_rate": 8.951351107043746e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 2526, + "text_loss": 0.60546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.950551006604971e-06, + "loss": 0.5236, + "regression_loss": 0.0, + "step": 2527, + "text_loss": 0.81640625 + }, + { + "epoch": 0.21, + "learning_rate": 8.94975063683803e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 2528, + "text_loss": 0.625 + }, + { + "epoch": 0.21, + "learning_rate": 8.94894999779749e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 2529, + "text_loss": 0.486328125 + }, + { + "epoch": 0.21, + "learning_rate": 8.948149089537938e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 2530, + "text_loss": 0.4921875 + }, + { + "epoch": 0.21, + "learning_rate": 8.94734791211397e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 2531, + "text_loss": 0.7578125 + }, + { + "epoch": 0.21, + "learning_rate": 8.94654646558021e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 2532, + "text_loss": 0.328125 + }, + { + "epoch": 0.21, + "learning_rate": 8.945744749991295e-06, + "loss": 0.6055, + "regression_loss": 0.0, + "step": 2533, + "text_loss": 0.49609375 + }, + { + "epoch": 0.21, + "learning_rate": 8.944942765401883e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 2534, + "text_loss": 0.35546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.944140511866647e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 2535, + "text_loss": 0.25390625 + }, + { + "epoch": 0.21, + "learning_rate": 8.943337989440283e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 2536, + "text_loss": 0.69921875 + }, + { + "epoch": 0.21, + "learning_rate": 8.9425351981775e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 2537, + "text_loss": 0.42578125 + }, + { + "epoch": 0.21, + "learning_rate": 8.941732138133032e-06, + "loss": 0.6294, + "regression_loss": 0.0, + "step": 2538, + "text_loss": 0.625 + }, + { + "epoch": 0.21, + "learning_rate": 8.940928809361624e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 2539, + "text_loss": 0.482421875 + }, + { + "epoch": 0.21, + "learning_rate": 8.940125211918044e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 2540, + "text_loss": 0.365234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.939321345857078e-06, + "loss": 0.6597, + "regression_loss": 0.0, + "step": 2541, + "text_loss": 0.671875 + }, + { + "epoch": 0.21, + "learning_rate": 8.938517211233528e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 2542, + "text_loss": 0.55859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.937712808102217e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 2543, + "text_loss": 0.5546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.936908136517985e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 2544, + "text_loss": 0.62890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.936103196535688e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 2545, + "text_loss": 0.52734375 + }, + { + "epoch": 0.21, + "learning_rate": 8.935297988210204e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 2546, + "text_loss": 0.62890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.934492511596432e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 2547, + "text_loss": 0.353515625 + }, + { + "epoch": 0.21, + "learning_rate": 8.93368676674928e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 2548, + "text_loss": 0.6796875 + }, + { + "epoch": 0.21, + "learning_rate": 8.93288075372368e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 2549, + "text_loss": 0.388671875 + }, + { + "epoch": 0.21, + "learning_rate": 8.932074472574583e-06, + "loss": 0.5441, + "regression_loss": 0.0, + "step": 2550, + "text_loss": 0.416015625 + }, + { + "epoch": 0.21, + "learning_rate": 8.931267923356958e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 2551, + "text_loss": 0.37890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.93046110612579e-06, + "loss": 0.6697, + "regression_loss": 0.0, + "step": 2552, + "text_loss": 1.3125 + }, + { + "epoch": 0.21, + "learning_rate": 8.929654020936086e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 2553, + "text_loss": 0.65234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.928846667842864e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 2554, + "text_loss": 0.625 + }, + { + "epoch": 0.21, + "learning_rate": 8.92803904690117e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 2555, + "text_loss": 0.62109375 + }, + { + "epoch": 0.21, + "learning_rate": 8.92723115816606e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 2556, + "text_loss": 0.44921875 + }, + { + "epoch": 0.21, + "learning_rate": 8.926423001692613e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 2557, + "text_loss": 0.60546875 + }, + { + "epoch": 0.21, + "learning_rate": 8.925614577535927e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 2558, + "text_loss": 0.4921875 + }, + { + "epoch": 0.21, + "learning_rate": 8.924805885751113e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 2559, + "text_loss": 0.31640625 + }, + { + "epoch": 0.21, + "learning_rate": 8.923996926393306e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 2560, + "text_loss": 0.50390625 + }, + { + "epoch": 0.21, + "learning_rate": 8.923187699517654e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 2561, + "text_loss": 0.55859375 + }, + { + "epoch": 0.21, + "learning_rate": 8.922378205179328e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 2562, + "text_loss": 0.7265625 + }, + { + "epoch": 0.21, + "learning_rate": 8.921568443433515e-06, + "loss": 0.6475, + "regression_loss": 0.0, + "step": 2563, + "text_loss": 0.53125 + }, + { + "epoch": 0.21, + "learning_rate": 8.92075841433542e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 2564, + "text_loss": 0.61328125 + }, + { + "epoch": 0.21, + "learning_rate": 8.919948117940265e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 2565, + "text_loss": 0.80078125 + }, + { + "epoch": 0.21, + "learning_rate": 8.919137554303294e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 2566, + "text_loss": 0.3984375 + }, + { + "epoch": 0.21, + "learning_rate": 8.918326723479767e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 2567, + "text_loss": 0.6796875 + }, + { + "epoch": 0.21, + "learning_rate": 8.917515625524961e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 2568, + "text_loss": 0.28125 + }, + { + "epoch": 0.21, + "learning_rate": 8.916704260494173e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 2569, + "text_loss": 0.69140625 + }, + { + "epoch": 0.21, + "learning_rate": 8.915892628442719e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 2570, + "text_loss": 0.3828125 + }, + { + "epoch": 0.21, + "learning_rate": 8.915080729425928e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 2571, + "text_loss": 0.75390625 + }, + { + "epoch": 0.21, + "learning_rate": 8.914268563499156e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 2572, + "text_loss": 0.396484375 + }, + { + "epoch": 0.21, + "learning_rate": 8.91345613071777e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 2573, + "text_loss": 0.75 + }, + { + "epoch": 0.21, + "learning_rate": 8.912643431137156e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 2574, + "text_loss": 0.349609375 + }, + { + "epoch": 0.21, + "learning_rate": 8.91183046481272e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 2575, + "text_loss": 0.404296875 + }, + { + "epoch": 0.21, + "learning_rate": 8.91101723179989e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 2576, + "text_loss": 0.5625 + }, + { + "epoch": 0.21, + "learning_rate": 8.910203732154103e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 2577, + "text_loss": 0.40234375 + }, + { + "epoch": 0.21, + "learning_rate": 8.909389965930823e-06, + "loss": 0.6465, + "regression_loss": 0.0, + "step": 2578, + "text_loss": 0.515625 + }, + { + "epoch": 0.21, + "learning_rate": 8.908575933185525e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 2579, + "text_loss": 0.65625 + }, + { + "epoch": 0.21, + "learning_rate": 8.907761633973708e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 2580, + "text_loss": 0.431640625 + }, + { + "epoch": 0.21, + "learning_rate": 8.906947068350886e-06, + "loss": 0.6467, + "regression_loss": 0.0, + "step": 2581, + "text_loss": 0.337890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.906132236372591e-06, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 2582, + "text_loss": 0.2890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.905317138094374e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 2583, + "text_loss": 0.416015625 + }, + { + "epoch": 0.21, + "learning_rate": 8.904501773571806e-06, + "loss": 0.699, + "regression_loss": 0.0, + "step": 2584, + "text_loss": 0.76953125 + }, + { + "epoch": 0.21, + "learning_rate": 8.903686142860473e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 2585, + "text_loss": 0.2890625 + }, + { + "epoch": 0.21, + "learning_rate": 8.902870246015982e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 2586, + "text_loss": 0.79296875 + }, + { + "epoch": 0.22, + "learning_rate": 8.902054083093954e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 2587, + "text_loss": 0.5859375 + }, + { + "epoch": 0.22, + "learning_rate": 8.901237654150032e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 2588, + "text_loss": 0.56640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.900420959239876e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 2589, + "text_loss": 0.36328125 + }, + { + "epoch": 0.22, + "learning_rate": 8.899603998419163e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 2590, + "text_loss": 0.83203125 + }, + { + "epoch": 0.22, + "learning_rate": 8.898786771743592e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 2591, + "text_loss": 0.703125 + }, + { + "epoch": 0.22, + "learning_rate": 8.897969279268877e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 2592, + "text_loss": 0.2890625 + }, + { + "epoch": 0.22, + "learning_rate": 8.897151521050747e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 2593, + "text_loss": 0.49609375 + }, + { + "epoch": 0.22, + "learning_rate": 8.896333497144955e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 2594, + "text_loss": 0.66015625 + }, + { + "epoch": 0.22, + "learning_rate": 8.89551520760727e-06, + "loss": 0.6594, + "regression_loss": 0.0, + "step": 2595, + "text_loss": 0.71484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.894696652493476e-06, + "loss": 0.6582, + "regression_loss": 0.0, + "step": 2596, + "text_loss": 0.7734375 + }, + { + "epoch": 0.22, + "learning_rate": 8.89387783185938e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 2597, + "text_loss": 0.71484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.893058745760807e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 2598, + "text_loss": 0.302734375 + }, + { + "epoch": 0.22, + "learning_rate": 8.892239394253594e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 2599, + "text_loss": 0.74609375 + }, + { + "epoch": 0.22, + "learning_rate": 8.891419777393601e-06, + "loss": 0.4674, + "regression_loss": 0.0, + "step": 2600, + "text_loss": 0.302734375 + }, + { + "epoch": 0.22, + "learning_rate": 8.89059989523671e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 2601, + "text_loss": 0.6484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.88977974783881e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 2602, + "text_loss": 0.44921875 + }, + { + "epoch": 0.22, + "learning_rate": 8.888959335255815e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 2603, + "text_loss": 0.65234375 + }, + { + "epoch": 0.22, + "learning_rate": 8.888138657543662e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 2604, + "text_loss": 0.365234375 + }, + { + "epoch": 0.22, + "learning_rate": 8.887317714758295e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 2605, + "text_loss": 0.42578125 + }, + { + "epoch": 0.22, + "learning_rate": 8.886496506955685e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 2606, + "text_loss": 0.61328125 + }, + { + "epoch": 0.22, + "learning_rate": 8.885675034191813e-06, + "loss": 0.5721, + "regression_loss": 0.0, + "step": 2607, + "text_loss": 1.203125 + }, + { + "epoch": 0.22, + "learning_rate": 8.884853296522689e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 2608, + "text_loss": 0.4453125 + }, + { + "epoch": 0.22, + "learning_rate": 8.884031294004331e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 2609, + "text_loss": 0.546875 + }, + { + "epoch": 0.22, + "learning_rate": 8.883209026692781e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 2610, + "text_loss": 0.55078125 + }, + { + "epoch": 0.22, + "learning_rate": 8.882386494644095e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 2611, + "text_loss": 0.8125 + }, + { + "epoch": 0.22, + "learning_rate": 8.88156369791435e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 2612, + "text_loss": 0.59375 + }, + { + "epoch": 0.22, + "learning_rate": 8.88074063655964e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 2613, + "text_loss": 0.625 + }, + { + "epoch": 0.22, + "learning_rate": 8.879917310636077e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 2614, + "text_loss": 0.439453125 + }, + { + "epoch": 0.22, + "learning_rate": 8.879093720199791e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 2615, + "text_loss": 0.396484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.87826986530693e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 2616, + "text_loss": 0.330078125 + }, + { + "epoch": 0.22, + "learning_rate": 8.87744574601366e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 2617, + "text_loss": 0.59765625 + }, + { + "epoch": 0.22, + "learning_rate": 8.876621362376165e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 2618, + "text_loss": 0.6640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.875796714450649e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 2619, + "text_loss": 0.80078125 + }, + { + "epoch": 0.22, + "learning_rate": 8.874971802293329e-06, + "loss": 0.6589, + "regression_loss": 0.0, + "step": 2620, + "text_loss": 0.76953125 + }, + { + "epoch": 0.22, + "learning_rate": 8.874146625960446e-06, + "loss": 0.6895, + "regression_loss": 0.0, + "step": 2621, + "text_loss": 0.6015625 + }, + { + "epoch": 0.22, + "learning_rate": 8.873321185508257e-06, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 2622, + "text_loss": 0.4921875 + }, + { + "epoch": 0.22, + "learning_rate": 8.872495480993033e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 2623, + "text_loss": 0.484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.871669512471068e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 2624, + "text_loss": 0.4296875 + }, + { + "epoch": 0.22, + "learning_rate": 8.870843279998673e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 2625, + "text_loss": 0.875 + }, + { + "epoch": 0.22, + "learning_rate": 8.870016783632176e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 2626, + "text_loss": 0.24609375 + }, + { + "epoch": 0.22, + "learning_rate": 8.86919002342792e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 2627, + "text_loss": 0.75390625 + }, + { + "epoch": 0.22, + "learning_rate": 8.868362999442275e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 2628, + "text_loss": 0.248046875 + }, + { + "epoch": 0.22, + "learning_rate": 8.86753571173162e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 2629, + "text_loss": 0.296875 + }, + { + "epoch": 0.22, + "learning_rate": 8.866708160352354e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 2630, + "text_loss": 0.74609375 + }, + { + "epoch": 0.22, + "learning_rate": 8.865880345360897e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 2631, + "text_loss": 0.412109375 + }, + { + "epoch": 0.22, + "learning_rate": 8.865052266813686e-06, + "loss": 0.4847, + "regression_loss": 0.0, + "step": 2632, + "text_loss": 0.283203125 + }, + { + "epoch": 0.22, + "learning_rate": 8.864223924767172e-06, + "loss": 0.5319, + "regression_loss": 0.0, + "step": 2633, + "text_loss": 0.55859375 + }, + { + "epoch": 0.22, + "learning_rate": 8.86339531927783e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 2634, + "text_loss": 0.328125 + }, + { + "epoch": 0.22, + "learning_rate": 8.86256645040215e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 2635, + "text_loss": 0.9140625 + }, + { + "epoch": 0.22, + "learning_rate": 8.861737318196637e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2636, + "text_loss": 0.578125 + }, + { + "epoch": 0.22, + "learning_rate": 8.86090792271782e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 2637, + "text_loss": 0.7421875 + }, + { + "epoch": 0.22, + "learning_rate": 8.860078264022243e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 2638, + "text_loss": 0.41015625 + }, + { + "epoch": 0.22, + "learning_rate": 8.859248342166465e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 2639, + "text_loss": 0.33984375 + }, + { + "epoch": 0.22, + "learning_rate": 8.858418157207068e-06, + "loss": 0.5994, + "regression_loss": 0.0, + "step": 2640, + "text_loss": 0.7109375 + }, + { + "epoch": 0.22, + "learning_rate": 8.85758770920065e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 2641, + "text_loss": 0.66015625 + }, + { + "epoch": 0.22, + "learning_rate": 8.856756998203825e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 2642, + "text_loss": 0.490234375 + }, + { + "epoch": 0.22, + "learning_rate": 8.855926024273227e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 2643, + "text_loss": 0.6640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.855094787465507e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 2644, + "text_loss": 0.3828125 + }, + { + "epoch": 0.22, + "learning_rate": 8.85426328783734e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 2645, + "text_loss": 0.71875 + }, + { + "epoch": 0.22, + "learning_rate": 8.853431525445404e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 2646, + "text_loss": 0.396484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.85259950034641e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 2647, + "text_loss": 0.73828125 + }, + { + "epoch": 0.22, + "learning_rate": 8.851767212597083e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2648, + "text_loss": 0.6796875 + }, + { + "epoch": 0.22, + "learning_rate": 8.850934662254158e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 2649, + "text_loss": 0.36328125 + }, + { + "epoch": 0.22, + "learning_rate": 8.8501018493744e-06, + "loss": 0.5813, + "regression_loss": 0.0, + "step": 2650, + "text_loss": 0.69140625 + }, + { + "epoch": 0.22, + "learning_rate": 8.849268774014581e-06, + "loss": 0.5813, + "regression_loss": 0.0, + "step": 2651, + "text_loss": 0.6328125 + }, + { + "epoch": 0.22, + "learning_rate": 8.848435436231498e-06, + "loss": 0.5682, + "regression_loss": 0.0, + "step": 2652, + "text_loss": 0.67578125 + }, + { + "epoch": 0.22, + "learning_rate": 8.847601836081964e-06, + "loss": 0.6313, + "regression_loss": 0.0, + "step": 2653, + "text_loss": 0.6875 + }, + { + "epoch": 0.22, + "learning_rate": 8.84676797362281e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 2654, + "text_loss": 0.48046875 + }, + { + "epoch": 0.22, + "learning_rate": 8.845933848910882e-06, + "loss": 0.689, + "regression_loss": 0.0, + "step": 2655, + "text_loss": 0.78125 + }, + { + "epoch": 0.22, + "learning_rate": 8.845099462003049e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 2656, + "text_loss": 0.369140625 + }, + { + "epoch": 0.22, + "learning_rate": 8.844264812956194e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 2657, + "text_loss": 0.31640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.843429901827219e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 2658, + "text_loss": 0.4765625 + }, + { + "epoch": 0.22, + "learning_rate": 8.842594728673043e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 2659, + "text_loss": 0.40625 + }, + { + "epoch": 0.22, + "learning_rate": 8.841759293550606e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 2660, + "text_loss": 0.296875 + }, + { + "epoch": 0.22, + "learning_rate": 8.840923596516863e-06, + "loss": 0.6165, + "regression_loss": 0.0, + "step": 2661, + "text_loss": 0.4453125 + }, + { + "epoch": 0.22, + "learning_rate": 8.840087637628786e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 2662, + "text_loss": 0.78515625 + }, + { + "epoch": 0.22, + "learning_rate": 8.839251416943367e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 2663, + "text_loss": 0.59375 + }, + { + "epoch": 0.22, + "learning_rate": 8.838414934517617e-06, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 2664, + "text_loss": 0.6484375 + }, + { + "epoch": 0.22, + "learning_rate": 8.837578190408561e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 2665, + "text_loss": 0.578125 + }, + { + "epoch": 0.22, + "learning_rate": 8.836741184673244e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 2666, + "text_loss": 0.59765625 + }, + { + "epoch": 0.22, + "learning_rate": 8.83590391736873e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 2667, + "text_loss": 0.765625 + }, + { + "epoch": 0.22, + "learning_rate": 8.835066388552099e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 2668, + "text_loss": 0.431640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.834228598280447e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 2669, + "text_loss": 0.5546875 + }, + { + "epoch": 0.22, + "learning_rate": 8.833390546610895e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 2670, + "text_loss": 0.50390625 + }, + { + "epoch": 0.22, + "learning_rate": 8.832552233600573e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 2671, + "text_loss": 0.60546875 + }, + { + "epoch": 0.22, + "learning_rate": 8.831713659306635e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 2672, + "text_loss": 0.51171875 + }, + { + "epoch": 0.22, + "learning_rate": 8.83087482378625e-06, + "loss": 0.4242, + "regression_loss": 0.0, + "step": 2673, + "text_loss": 0.2890625 + }, + { + "epoch": 0.22, + "learning_rate": 8.830035727096605e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 2674, + "text_loss": 0.55859375 + }, + { + "epoch": 0.22, + "learning_rate": 8.829196369294905e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 2675, + "text_loss": 0.94921875 + }, + { + "epoch": 0.22, + "learning_rate": 8.828356750438375e-06, + "loss": 0.5984, + "regression_loss": 0.0, + "step": 2676, + "text_loss": 0.67578125 + }, + { + "epoch": 0.22, + "learning_rate": 8.827516870584255e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 2677, + "text_loss": 0.859375 + }, + { + "epoch": 0.22, + "learning_rate": 8.826676729789802e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 2678, + "text_loss": 0.57421875 + }, + { + "epoch": 0.22, + "learning_rate": 8.825836328112296e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 2679, + "text_loss": 0.5703125 + }, + { + "epoch": 0.22, + "learning_rate": 8.824995665609026e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 2680, + "text_loss": 0.64453125 + }, + { + "epoch": 0.22, + "learning_rate": 8.82415474233731e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 2681, + "text_loss": 0.208984375 + }, + { + "epoch": 0.22, + "learning_rate": 8.823313558354474e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 2682, + "text_loss": 0.72265625 + }, + { + "epoch": 0.22, + "learning_rate": 8.822472113717865e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 2683, + "text_loss": 0.65234375 + }, + { + "epoch": 0.22, + "learning_rate": 8.821630408484852e-06, + "loss": 0.626, + "regression_loss": 0.0, + "step": 2684, + "text_loss": 0.70703125 + }, + { + "epoch": 0.22, + "learning_rate": 8.820788442712813e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 2685, + "text_loss": 0.40625 + }, + { + "epoch": 0.22, + "learning_rate": 8.819946216459154e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 2686, + "text_loss": 0.58984375 + }, + { + "epoch": 0.22, + "learning_rate": 8.819103729781292e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 2687, + "text_loss": 0.55078125 + }, + { + "epoch": 0.22, + "learning_rate": 8.818260982736662e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 2688, + "text_loss": 0.62890625 + }, + { + "epoch": 0.22, + "learning_rate": 8.817417975382718e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 2689, + "text_loss": 0.58203125 + }, + { + "epoch": 0.22, + "learning_rate": 8.816574707776933e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 2690, + "text_loss": 0.58984375 + }, + { + "epoch": 0.22, + "learning_rate": 8.815731179976796e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 2691, + "text_loss": 0.6171875 + }, + { + "epoch": 0.22, + "learning_rate": 8.814887392039817e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 2692, + "text_loss": 0.291015625 + }, + { + "epoch": 0.22, + "learning_rate": 8.814043344023519e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 2693, + "text_loss": 0.6171875 + }, + { + "epoch": 0.22, + "learning_rate": 8.813199035985442e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 2694, + "text_loss": 0.58984375 + }, + { + "epoch": 0.22, + "learning_rate": 8.812354467983152e-06, + "loss": 0.616, + "regression_loss": 0.0, + "step": 2695, + "text_loss": 0.62109375 + }, + { + "epoch": 0.22, + "learning_rate": 8.811509640074223e-06, + "loss": 0.5155, + "regression_loss": 0.0, + "step": 2696, + "text_loss": 0.498046875 + }, + { + "epoch": 0.22, + "learning_rate": 8.810664552316255e-06, + "loss": 0.6262, + "regression_loss": 0.0, + "step": 2697, + "text_loss": 0.56640625 + }, + { + "epoch": 0.22, + "learning_rate": 8.809819204766856e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 2698, + "text_loss": 0.7265625 + }, + { + "epoch": 0.22, + "learning_rate": 8.808973597483664e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 2699, + "text_loss": 0.380859375 + }, + { + "epoch": 0.22, + "learning_rate": 8.808127730524323e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 2700, + "text_loss": 0.62890625 + }, + { + "epoch": 0.22, + "learning_rate": 8.807281603946502e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 2701, + "text_loss": 0.451171875 + }, + { + "epoch": 0.22, + "learning_rate": 8.806435217807885e-06, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 2702, + "text_loss": 0.357421875 + }, + { + "epoch": 0.22, + "learning_rate": 8.805588572166175e-06, + "loss": 0.4476, + "regression_loss": 0.0, + "step": 2703, + "text_loss": 0.298828125 + }, + { + "epoch": 0.22, + "learning_rate": 8.804741667079091e-06, + "loss": 0.6321, + "regression_loss": 0.0, + "step": 2704, + "text_loss": 0.84765625 + }, + { + "epoch": 0.22, + "learning_rate": 8.803894502604373e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 2705, + "text_loss": 0.39453125 + }, + { + "epoch": 0.22, + "learning_rate": 8.803047078799772e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 2706, + "text_loss": 0.40625 + }, + { + "epoch": 0.22, + "learning_rate": 8.802199395723065e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 2707, + "text_loss": 0.431640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.80135145343204e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 2708, + "text_loss": 0.384765625 + }, + { + "epoch": 0.23, + "learning_rate": 8.800503251984507e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 2709, + "text_loss": 0.51171875 + }, + { + "epoch": 0.23, + "learning_rate": 8.79965479143829e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 2710, + "text_loss": 0.7265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.798806071851236e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 2711, + "text_loss": 0.71875 + }, + { + "epoch": 0.23, + "learning_rate": 8.797957093281203e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 2712, + "text_loss": 0.7734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.79710785578607e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 2713, + "text_loss": 0.734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.796258359423738e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 2714, + "text_loss": 0.59375 + }, + { + "epoch": 0.23, + "learning_rate": 8.795408604252116e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 2715, + "text_loss": 0.6640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.79455859032914e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 2716, + "text_loss": 0.384765625 + }, + { + "epoch": 0.23, + "learning_rate": 8.793708317712757e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 2717, + "text_loss": 0.369140625 + }, + { + "epoch": 0.23, + "learning_rate": 8.792857786460933e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 2718, + "text_loss": 0.5859375 + }, + { + "epoch": 0.23, + "learning_rate": 8.792006996631657e-06, + "loss": 0.7302, + "regression_loss": 0.0, + "step": 2719, + "text_loss": 0.87109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.791155948282927e-06, + "loss": 0.6396, + "regression_loss": 0.0, + "step": 2720, + "text_loss": 0.703125 + }, + { + "epoch": 0.23, + "learning_rate": 8.790304641472766e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 2721, + "text_loss": 0.79296875 + }, + { + "epoch": 0.23, + "learning_rate": 8.789453076259212e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 2722, + "text_loss": 0.47265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.78860125270032e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 2723, + "text_loss": 0.55859375 + }, + { + "epoch": 0.23, + "learning_rate": 8.78774917085416e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 2724, + "text_loss": 0.609375 + }, + { + "epoch": 0.23, + "learning_rate": 8.786896830778827e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 2725, + "text_loss": 0.76171875 + }, + { + "epoch": 0.23, + "learning_rate": 8.786044232532423e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 2726, + "text_loss": 0.36328125 + }, + { + "epoch": 0.23, + "learning_rate": 8.78519137617308e-06, + "loss": 0.5009, + "regression_loss": 0.0, + "step": 2727, + "text_loss": 0.55078125 + }, + { + "epoch": 0.23, + "learning_rate": 8.78433826175894e-06, + "loss": 0.444, + "regression_loss": 0.0, + "step": 2728, + "text_loss": 0.302734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.783484889348161e-06, + "loss": 0.71, + "regression_loss": 0.0, + "step": 2729, + "text_loss": 0.73828125 + }, + { + "epoch": 0.23, + "learning_rate": 8.782631258998924e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 2730, + "text_loss": 0.66015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.781777370769425e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 2731, + "text_loss": 0.4296875 + }, + { + "epoch": 0.23, + "learning_rate": 8.780923224717877e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 2732, + "text_loss": 0.7890625 + }, + { + "epoch": 0.23, + "learning_rate": 8.780068820902511e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 2733, + "text_loss": 0.75390625 + }, + { + "epoch": 0.23, + "learning_rate": 8.779214159381577e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 2734, + "text_loss": 0.57421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.77835924021334e-06, + "loss": 0.6311, + "regression_loss": 0.0, + "step": 2735, + "text_loss": 0.458984375 + }, + { + "epoch": 0.23, + "learning_rate": 8.777504063456084e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 2736, + "text_loss": 0.60546875 + }, + { + "epoch": 0.23, + "learning_rate": 8.776648629168114e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 2737, + "text_loss": 0.57421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.775792937407744e-06, + "loss": 0.6165, + "regression_loss": 0.0, + "step": 2738, + "text_loss": 0.5078125 + }, + { + "epoch": 0.23, + "learning_rate": 8.774936988233314e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 2739, + "text_loss": 0.625 + }, + { + "epoch": 0.23, + "learning_rate": 8.774080781703176e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 2740, + "text_loss": 0.478515625 + }, + { + "epoch": 0.23, + "learning_rate": 8.773224317875703e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 2741, + "text_loss": 0.416015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.772367596809286e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 2742, + "text_loss": 0.44921875 + }, + { + "epoch": 0.23, + "learning_rate": 8.771510618562328e-06, + "loss": 0.474, + "regression_loss": 0.0, + "step": 2743, + "text_loss": 0.369140625 + }, + { + "epoch": 0.23, + "learning_rate": 8.770653383193255e-06, + "loss": 0.665, + "regression_loss": 0.0, + "step": 2744, + "text_loss": 0.27734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.769795890760511e-06, + "loss": 0.6934, + "regression_loss": 0.0, + "step": 2745, + "text_loss": 1.0390625 + }, + { + "epoch": 0.23, + "learning_rate": 8.768938141322554e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 2746, + "text_loss": 0.7265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.768080134937858e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 2747, + "text_loss": 0.498046875 + }, + { + "epoch": 0.23, + "learning_rate": 8.767221871664922e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 2748, + "text_loss": 0.482421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.766363351562255e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 2749, + "text_loss": 1.0390625 + }, + { + "epoch": 0.23, + "learning_rate": 8.765504574688389e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 2750, + "text_loss": 0.498046875 + }, + { + "epoch": 0.23, + "learning_rate": 8.764645541101869e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 2751, + "text_loss": 0.58203125 + }, + { + "epoch": 0.23, + "learning_rate": 8.763786250861258e-06, + "loss": 0.6499, + "regression_loss": 0.0, + "step": 2752, + "text_loss": 0.6640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.76292670402514e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 2753, + "text_loss": 0.396484375 + }, + { + "epoch": 0.23, + "learning_rate": 8.762066900652116e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 2754, + "text_loss": 0.72265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.7612068408008e-06, + "loss": 0.5962, + "regression_loss": 0.0, + "step": 2755, + "text_loss": 0.76171875 + }, + { + "epoch": 0.23, + "learning_rate": 8.760346524529829e-06, + "loss": 0.6416, + "regression_loss": 0.0, + "step": 2756, + "text_loss": 0.640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.759485951897853e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 2757, + "text_loss": 0.7109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.75862512296354e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 2758, + "text_loss": 1.046875 + }, + { + "epoch": 0.23, + "learning_rate": 8.757764037785582e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 2759, + "text_loss": 0.7265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.756902696422677e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 2760, + "text_loss": 0.53125 + }, + { + "epoch": 0.23, + "learning_rate": 8.756041098933553e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 2761, + "text_loss": 0.62109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.755179245376945e-06, + "loss": 0.6741, + "regression_loss": 0.0, + "step": 2762, + "text_loss": 0.87890625 + }, + { + "epoch": 0.23, + "learning_rate": 8.754317135811611e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 2763, + "text_loss": 0.302734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.753454770296325e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 2764, + "text_loss": 0.7109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.75259214888988e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 2765, + "text_loss": 0.50390625 + }, + { + "epoch": 0.23, + "learning_rate": 8.751729271651083e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 2766, + "text_loss": 0.326171875 + }, + { + "epoch": 0.23, + "learning_rate": 8.750866138638761e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 2767, + "text_loss": 0.62109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.75000274991176e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 2768, + "text_loss": 0.6015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.749139105528939e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 2769, + "text_loss": 0.419921875 + }, + { + "epoch": 0.23, + "learning_rate": 8.748275205549178e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 2770, + "text_loss": 0.67578125 + }, + { + "epoch": 0.23, + "learning_rate": 8.747411050031369e-06, + "loss": 0.6226, + "regression_loss": 0.0, + "step": 2771, + "text_loss": 0.322265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.746546639034433e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 2772, + "text_loss": 0.40625 + }, + { + "epoch": 0.23, + "learning_rate": 8.745681972617298e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 2773, + "text_loss": 0.6015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.74481705083891e-06, + "loss": 0.5981, + "regression_loss": 0.0, + "step": 2774, + "text_loss": 0.5859375 + }, + { + "epoch": 0.23, + "learning_rate": 8.743951873758238e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 2775, + "text_loss": 0.6640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.743086441434264e-06, + "loss": 0.4569, + "regression_loss": 0.0, + "step": 2776, + "text_loss": 0.482421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.74222075392599e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 2777, + "text_loss": 0.392578125 + }, + { + "epoch": 0.23, + "learning_rate": 8.741354811292431e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 2778, + "text_loss": 0.345703125 + }, + { + "epoch": 0.23, + "learning_rate": 8.740488613592626e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 2779, + "text_loss": 0.7890625 + }, + { + "epoch": 0.23, + "learning_rate": 8.739622160885626e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 2780, + "text_loss": 0.69140625 + }, + { + "epoch": 0.23, + "learning_rate": 8.738755453230502e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 2781, + "text_loss": 0.416015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.73788849068634e-06, + "loss": 0.6064, + "regression_loss": 0.0, + "step": 2782, + "text_loss": 0.671875 + }, + { + "epoch": 0.23, + "learning_rate": 8.737021273312247e-06, + "loss": 0.6709, + "regression_loss": 0.0, + "step": 2783, + "text_loss": 0.80078125 + }, + { + "epoch": 0.23, + "learning_rate": 8.736153801167346e-06, + "loss": 0.5436, + "regression_loss": 0.0, + "step": 2784, + "text_loss": 0.5625 + }, + { + "epoch": 0.23, + "learning_rate": 8.735286074310774e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2785, + "text_loss": 0.447265625 + }, + { + "epoch": 0.23, + "learning_rate": 8.73441809280169e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 2786, + "text_loss": 0.82421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.73354985669927e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 2787, + "text_loss": 0.80859375 + }, + { + "epoch": 0.23, + "learning_rate": 8.732681366062701e-06, + "loss": 0.5233, + "regression_loss": 0.0, + "step": 2788, + "text_loss": 0.5546875 + }, + { + "epoch": 0.23, + "learning_rate": 8.731812620951196e-06, + "loss": 0.6362, + "regression_loss": 0.0, + "step": 2789, + "text_loss": 0.78125 + }, + { + "epoch": 0.23, + "learning_rate": 8.730943621423983e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 2790, + "text_loss": 0.67578125 + }, + { + "epoch": 0.23, + "learning_rate": 8.730074367540302e-06, + "loss": 0.7261, + "regression_loss": 0.0, + "step": 2791, + "text_loss": 0.671875 + }, + { + "epoch": 0.23, + "learning_rate": 8.729204859359417e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 2792, + "text_loss": 0.39453125 + }, + { + "epoch": 0.23, + "learning_rate": 8.728335096940603e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 2793, + "text_loss": 0.625 + }, + { + "epoch": 0.23, + "learning_rate": 8.72746508034316e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 2794, + "text_loss": 0.408203125 + }, + { + "epoch": 0.23, + "learning_rate": 8.726594809626401e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 2795, + "text_loss": 0.5390625 + }, + { + "epoch": 0.23, + "learning_rate": 8.725724284849654e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 2796, + "text_loss": 0.53515625 + }, + { + "epoch": 0.23, + "learning_rate": 8.724853506072269e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 2797, + "text_loss": 0.57421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.723982473353608e-06, + "loss": 0.5692, + "regression_loss": 0.0, + "step": 2798, + "text_loss": 0.31640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.723111186753056e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 2799, + "text_loss": 0.78515625 + }, + { + "epoch": 0.23, + "learning_rate": 8.722239646330014e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 2800, + "text_loss": 0.734375 + }, + { + "epoch": 0.23, + "learning_rate": 8.721367852143899e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 2801, + "text_loss": 0.609375 + }, + { + "epoch": 0.23, + "learning_rate": 8.720495804254141e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 2802, + "text_loss": 0.478515625 + }, + { + "epoch": 0.23, + "learning_rate": 8.719623502720198e-06, + "loss": 0.5106, + "regression_loss": 0.0, + "step": 2803, + "text_loss": 0.640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.718750947601533e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 2804, + "text_loss": 0.73828125 + }, + { + "epoch": 0.23, + "learning_rate": 8.717878138957639e-06, + "loss": 0.6372, + "regression_loss": 0.0, + "step": 2805, + "text_loss": 0.255859375 + }, + { + "epoch": 0.23, + "learning_rate": 8.717005076848011e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 2806, + "text_loss": 0.375 + }, + { + "epoch": 0.23, + "learning_rate": 8.716131761332179e-06, + "loss": 0.6638, + "regression_loss": 0.0, + "step": 2807, + "text_loss": 0.67578125 + }, + { + "epoch": 0.23, + "learning_rate": 8.715258192469673e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 2808, + "text_loss": 0.609375 + }, + { + "epoch": 0.23, + "learning_rate": 8.714384370320055e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 2809, + "text_loss": 0.412109375 + }, + { + "epoch": 0.23, + "learning_rate": 8.713510294942892e-06, + "loss": 0.6274, + "regression_loss": 0.0, + "step": 2810, + "text_loss": 0.60546875 + }, + { + "epoch": 0.23, + "learning_rate": 8.71263596639778e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2811, + "text_loss": 0.6796875 + }, + { + "epoch": 0.23, + "learning_rate": 8.711761384744323e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 2812, + "text_loss": 0.34375 + }, + { + "epoch": 0.23, + "learning_rate": 8.710886550042144e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 2813, + "text_loss": 0.44140625 + }, + { + "epoch": 0.23, + "learning_rate": 8.710011462350888e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 2814, + "text_loss": 0.41796875 + }, + { + "epoch": 0.23, + "learning_rate": 8.70913612173021e-06, + "loss": 0.5262, + "regression_loss": 0.0, + "step": 2815, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.23, + "learning_rate": 8.708260528239788e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 2816, + "text_loss": 0.462890625 + }, + { + "epoch": 0.23, + "learning_rate": 8.707384681939318e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 2817, + "text_loss": 0.48046875 + }, + { + "epoch": 0.23, + "learning_rate": 8.706508582888507e-06, + "loss": 0.5338, + "regression_loss": 0.0, + "step": 2818, + "text_loss": 0.32421875 + }, + { + "epoch": 0.23, + "learning_rate": 8.705632231147085e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 2819, + "text_loss": 0.271484375 + }, + { + "epoch": 0.23, + "learning_rate": 8.704755626774796e-06, + "loss": 0.666, + "regression_loss": 0.0, + "step": 2820, + "text_loss": 0.66015625 + }, + { + "epoch": 0.23, + "learning_rate": 8.703878769831405e-06, + "loss": 0.6294, + "regression_loss": 0.0, + "step": 2821, + "text_loss": 0.53125 + }, + { + "epoch": 0.23, + "learning_rate": 8.703001660376687e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 2822, + "text_loss": 0.5 + }, + { + "epoch": 0.23, + "learning_rate": 8.702124298470443e-06, + "loss": 0.4341, + "regression_loss": 0.0, + "step": 2823, + "text_loss": 0.578125 + }, + { + "epoch": 0.23, + "learning_rate": 8.701246684172482e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 2824, + "text_loss": 0.85546875 + }, + { + "epoch": 0.23, + "learning_rate": 8.700368817542641e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 2825, + "text_loss": 0.443359375 + }, + { + "epoch": 0.23, + "learning_rate": 8.699490698640767e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 2826, + "text_loss": 0.58203125 + }, + { + "epoch": 0.23, + "learning_rate": 8.698612327526724e-06, + "loss": 0.6582, + "regression_loss": 0.0, + "step": 2827, + "text_loss": 1.171875 + }, + { + "epoch": 0.24, + "learning_rate": 8.697733704260394e-06, + "loss": 0.6243, + "regression_loss": 0.0, + "step": 2828, + "text_loss": 1.421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.696854828901677e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 2829, + "text_loss": 0.76953125 + }, + { + "epoch": 0.24, + "learning_rate": 8.695975701510496e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 2830, + "text_loss": 0.6953125 + }, + { + "epoch": 0.24, + "learning_rate": 8.695096322146777e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 2831, + "text_loss": 0.5234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.694216690870476e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 2832, + "text_loss": 0.4375 + }, + { + "epoch": 0.24, + "learning_rate": 8.693336807741561e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 2833, + "text_loss": 0.259765625 + }, + { + "epoch": 0.24, + "learning_rate": 8.69245667282002e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 2834, + "text_loss": 0.361328125 + }, + { + "epoch": 0.24, + "learning_rate": 8.691576286165852e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 2835, + "text_loss": 0.6640625 + }, + { + "epoch": 0.24, + "learning_rate": 8.69069564783908e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 2836, + "text_loss": 0.54296875 + }, + { + "epoch": 0.24, + "learning_rate": 8.689814757899742e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 2837, + "text_loss": 0.44140625 + }, + { + "epoch": 0.24, + "learning_rate": 8.688933616407888e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 2838, + "text_loss": 0.578125 + }, + { + "epoch": 0.24, + "learning_rate": 8.688052223423596e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 2839, + "text_loss": 0.51171875 + }, + { + "epoch": 0.24, + "learning_rate": 8.687170579006949e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 2840, + "text_loss": 0.296875 + }, + { + "epoch": 0.24, + "learning_rate": 8.686288683218058e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 2841, + "text_loss": 0.55859375 + }, + { + "epoch": 0.24, + "learning_rate": 8.685406536117042e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 2842, + "text_loss": 0.380859375 + }, + { + "epoch": 0.24, + "learning_rate": 8.684524137764044e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 2843, + "text_loss": 0.32421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.683641488219221e-06, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 2844, + "text_loss": 0.58984375 + }, + { + "epoch": 0.24, + "learning_rate": 8.682758587542745e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 2845, + "text_loss": 0.4609375 + }, + { + "epoch": 0.24, + "learning_rate": 8.681875435794811e-06, + "loss": 0.665, + "regression_loss": 0.0, + "step": 2846, + "text_loss": 0.734375 + }, + { + "epoch": 0.24, + "learning_rate": 8.680992033035625e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 2847, + "text_loss": 0.65625 + }, + { + "epoch": 0.24, + "learning_rate": 8.680108379325413e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 2848, + "text_loss": 0.66796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.679224474724422e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 2849, + "text_loss": 0.625 + }, + { + "epoch": 0.24, + "learning_rate": 8.678340319292907e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 2850, + "text_loss": 0.392578125 + }, + { + "epoch": 0.24, + "learning_rate": 8.67745591309115e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 2851, + "text_loss": 0.259765625 + }, + { + "epoch": 0.24, + "learning_rate": 8.676571256179438e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 2852, + "text_loss": 0.54296875 + }, + { + "epoch": 0.24, + "learning_rate": 8.675686348618089e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 2853, + "text_loss": 0.34765625 + }, + { + "epoch": 0.24, + "learning_rate": 8.674801190467428e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 2854, + "text_loss": 0.66796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.673915781787803e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 2855, + "text_loss": 0.32421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.673030122639575e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 2856, + "text_loss": 0.408203125 + }, + { + "epoch": 0.24, + "learning_rate": 8.672144213083122e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 2857, + "text_loss": 0.68359375 + }, + { + "epoch": 0.24, + "learning_rate": 8.671258053178844e-06, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 2858, + "text_loss": 0.66796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.670371642987154e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 2859, + "text_loss": 0.234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.669484982568482e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 2860, + "text_loss": 0.6796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.668598071983277e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 2861, + "text_loss": 0.6953125 + }, + { + "epoch": 0.24, + "learning_rate": 8.667710911292001e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 2862, + "text_loss": 0.63671875 + }, + { + "epoch": 0.24, + "learning_rate": 8.66682350055514e-06, + "loss": 0.5648, + "regression_loss": 0.0, + "step": 2863, + "text_loss": 0.80078125 + }, + { + "epoch": 0.24, + "learning_rate": 8.665935839833191e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 2864, + "text_loss": 0.671875 + }, + { + "epoch": 0.24, + "learning_rate": 8.66504792918667e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 2865, + "text_loss": 0.392578125 + }, + { + "epoch": 0.24, + "learning_rate": 8.664159768676113e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 2866, + "text_loss": 0.4375 + }, + { + "epoch": 0.24, + "learning_rate": 8.663271358362064e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 2867, + "text_loss": 0.6484375 + }, + { + "epoch": 0.24, + "learning_rate": 8.662382698305098e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 2868, + "text_loss": 0.62890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.661493788565793e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 2869, + "text_loss": 0.349609375 + }, + { + "epoch": 0.24, + "learning_rate": 8.660604629204751e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 2870, + "text_loss": 0.8828125 + }, + { + "epoch": 0.24, + "learning_rate": 8.659715220282596e-06, + "loss": 0.4706, + "regression_loss": 0.0, + "step": 2871, + "text_loss": 0.318359375 + }, + { + "epoch": 0.24, + "learning_rate": 8.658825561859955e-06, + "loss": 0.6938, + "regression_loss": 0.0, + "step": 2872, + "text_loss": 0.74609375 + }, + { + "epoch": 0.24, + "learning_rate": 8.657935653997487e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 2873, + "text_loss": 0.6328125 + }, + { + "epoch": 0.24, + "learning_rate": 8.657045496755858e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 2874, + "text_loss": 0.5234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.656155090195757e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 2875, + "text_loss": 0.52734375 + }, + { + "epoch": 0.24, + "learning_rate": 8.655264434377884e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 2876, + "text_loss": 0.345703125 + }, + { + "epoch": 0.24, + "learning_rate": 8.65437352936296e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 2877, + "text_loss": 0.48046875 + }, + { + "epoch": 0.24, + "learning_rate": 8.653482375211723e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 2878, + "text_loss": 0.6640625 + }, + { + "epoch": 0.24, + "learning_rate": 8.652590971984929e-06, + "loss": 0.6355, + "regression_loss": 0.0, + "step": 2879, + "text_loss": 0.52734375 + }, + { + "epoch": 0.24, + "learning_rate": 8.651699319743348e-06, + "loss": 0.5782, + "regression_loss": 0.0, + "step": 2880, + "text_loss": 0.41796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.650807418547766e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 2881, + "text_loss": 0.41015625 + }, + { + "epoch": 0.24, + "learning_rate": 8.649915268458991e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 2882, + "text_loss": 0.322265625 + }, + { + "epoch": 0.24, + "learning_rate": 8.649022869537843e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 2883, + "text_loss": 0.58984375 + }, + { + "epoch": 0.24, + "learning_rate": 8.648130221845164e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 2884, + "text_loss": 0.53515625 + }, + { + "epoch": 0.24, + "learning_rate": 8.647237325441809e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 2885, + "text_loss": 0.65625 + }, + { + "epoch": 0.24, + "learning_rate": 8.646344180388649e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 2886, + "text_loss": 0.416015625 + }, + { + "epoch": 0.24, + "learning_rate": 8.645450786746577e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 2887, + "text_loss": 0.439453125 + }, + { + "epoch": 0.24, + "learning_rate": 8.644557144576497e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 2888, + "text_loss": 0.2265625 + }, + { + "epoch": 0.24, + "learning_rate": 8.643663253939336e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 2889, + "text_loss": 0.671875 + }, + { + "epoch": 0.24, + "learning_rate": 8.642769114896033e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 2890, + "text_loss": 0.609375 + }, + { + "epoch": 0.24, + "learning_rate": 8.641874727507545e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 2891, + "text_loss": 0.33984375 + }, + { + "epoch": 0.24, + "learning_rate": 8.640980091834849e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 2892, + "text_loss": 0.37890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.640085207938936e-06, + "loss": 0.4843, + "regression_loss": 0.0, + "step": 2893, + "text_loss": 0.291015625 + }, + { + "epoch": 0.24, + "learning_rate": 8.639190075880813e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 2894, + "text_loss": 0.44921875 + }, + { + "epoch": 0.24, + "learning_rate": 8.638294695721508e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 2895, + "text_loss": 0.53515625 + }, + { + "epoch": 0.24, + "learning_rate": 8.63739906752206e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 2896, + "text_loss": 0.5234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.63650319134353e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 2897, + "text_loss": 0.671875 + }, + { + "epoch": 0.24, + "learning_rate": 8.635607067246996e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 2898, + "text_loss": 0.296875 + }, + { + "epoch": 0.24, + "learning_rate": 8.634710695293549e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 2899, + "text_loss": 0.58203125 + }, + { + "epoch": 0.24, + "learning_rate": 8.6338140755443e-06, + "loss": 0.4419, + "regression_loss": 0.0, + "step": 2900, + "text_loss": 0.5 + }, + { + "epoch": 0.24, + "learning_rate": 8.632917208060374e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 2901, + "text_loss": 0.71484375 + }, + { + "epoch": 0.24, + "learning_rate": 8.632020092902917e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 2902, + "text_loss": 0.640625 + }, + { + "epoch": 0.24, + "learning_rate": 8.63112273013309e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 2903, + "text_loss": 0.65625 + }, + { + "epoch": 0.24, + "learning_rate": 8.630225119812068e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 2904, + "text_loss": 0.62890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.629327262001047e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 2905, + "text_loss": 0.78125 + }, + { + "epoch": 0.24, + "learning_rate": 8.62842915676124e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 2906, + "text_loss": 0.337890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.627530804153871e-06, + "loss": 0.4784, + "regression_loss": 0.0, + "step": 2907, + "text_loss": 0.48046875 + }, + { + "epoch": 0.24, + "learning_rate": 8.626632204240188e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 2908, + "text_loss": 0.796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.625733357081453e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 2909, + "text_loss": 0.482421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.624834262738946e-06, + "loss": 0.6633, + "regression_loss": 0.0, + "step": 2910, + "text_loss": 0.9375 + }, + { + "epoch": 0.24, + "learning_rate": 8.623934921273957e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 2911, + "text_loss": 0.515625 + }, + { + "epoch": 0.24, + "learning_rate": 8.623035332747804e-06, + "loss": 0.6624, + "regression_loss": 0.0, + "step": 2912, + "text_loss": 0.7578125 + }, + { + "epoch": 0.24, + "learning_rate": 8.622135497221815e-06, + "loss": 0.6733, + "regression_loss": 0.0, + "step": 2913, + "text_loss": 0.890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.621235414757337e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 2914, + "text_loss": 0.65625 + }, + { + "epoch": 0.24, + "learning_rate": 8.620335085415728e-06, + "loss": 0.5021, + "regression_loss": 0.0, + "step": 2915, + "text_loss": 0.4921875 + }, + { + "epoch": 0.24, + "learning_rate": 8.619434509258374e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 2916, + "text_loss": 0.796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.618533686346668e-06, + "loss": 0.5928, + "regression_loss": 0.0, + "step": 2917, + "text_loss": 0.74609375 + }, + { + "epoch": 0.24, + "learning_rate": 8.617632616742024e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 2918, + "text_loss": 0.73046875 + }, + { + "epoch": 0.24, + "learning_rate": 8.616731300505873e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 2919, + "text_loss": 0.5703125 + }, + { + "epoch": 0.24, + "learning_rate": 8.615829737699661e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 2920, + "text_loss": 0.78125 + }, + { + "epoch": 0.24, + "learning_rate": 8.614927928384854e-06, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 2921, + "text_loss": 0.5234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.614025872622932e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 2922, + "text_loss": 0.50390625 + }, + { + "epoch": 0.24, + "learning_rate": 8.61312357047539e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 2923, + "text_loss": 0.2890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.612221022003745e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 2924, + "text_loss": 0.93359375 + }, + { + "epoch": 0.24, + "learning_rate": 8.61131822726953e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 2925, + "text_loss": 0.2734375 + }, + { + "epoch": 0.24, + "learning_rate": 8.610415186334286e-06, + "loss": 0.6479, + "regression_loss": 0.0, + "step": 2926, + "text_loss": 0.828125 + }, + { + "epoch": 0.24, + "learning_rate": 8.609511899259586e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 2927, + "text_loss": 0.470703125 + }, + { + "epoch": 0.24, + "learning_rate": 8.608608366107004e-06, + "loss": 0.6587, + "regression_loss": 0.0, + "step": 2928, + "text_loss": 0.73828125 + }, + { + "epoch": 0.24, + "learning_rate": 8.607704586938143e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 2929, + "text_loss": 0.482421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.606800561814616e-06, + "loss": 0.6594, + "regression_loss": 0.0, + "step": 2930, + "text_loss": 0.490234375 + }, + { + "epoch": 0.24, + "learning_rate": 8.605896290798057e-06, + "loss": 0.637, + "regression_loss": 0.0, + "step": 2931, + "text_loss": 0.94140625 + }, + { + "epoch": 0.24, + "learning_rate": 8.604991773950111e-06, + "loss": 0.5919, + "regression_loss": 0.0, + "step": 2932, + "text_loss": 0.58203125 + }, + { + "epoch": 0.24, + "learning_rate": 8.604087011332444e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 2933, + "text_loss": 0.6796875 + }, + { + "epoch": 0.24, + "learning_rate": 8.603182003006742e-06, + "loss": 0.5789, + "regression_loss": 0.0, + "step": 2934, + "text_loss": 0.66015625 + }, + { + "epoch": 0.24, + "learning_rate": 8.6022767490347e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 2935, + "text_loss": 0.37890625 + }, + { + "epoch": 0.24, + "learning_rate": 8.601371249478033e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 2936, + "text_loss": 0.5 + }, + { + "epoch": 0.24, + "learning_rate": 8.600465504398477e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 2937, + "text_loss": 0.30859375 + }, + { + "epoch": 0.24, + "learning_rate": 8.599559513857776e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 2938, + "text_loss": 0.57421875 + }, + { + "epoch": 0.24, + "learning_rate": 8.598653277917701e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 2939, + "text_loss": 0.447265625 + }, + { + "epoch": 0.24, + "learning_rate": 8.597746796640031e-06, + "loss": 0.4456, + "regression_loss": 0.0, + "step": 2940, + "text_loss": 0.5703125 + }, + { + "epoch": 0.24, + "learning_rate": 8.596840070086567e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 2941, + "text_loss": 0.6875 + }, + { + "epoch": 0.24, + "learning_rate": 8.595933098319123e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 2942, + "text_loss": 0.5078125 + }, + { + "epoch": 0.24, + "learning_rate": 8.595025881399534e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 2943, + "text_loss": 0.486328125 + }, + { + "epoch": 0.24, + "learning_rate": 8.594118419389648e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 2944, + "text_loss": 0.59765625 + }, + { + "epoch": 0.24, + "learning_rate": 8.59321071235133e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 2945, + "text_loss": 0.703125 + }, + { + "epoch": 0.24, + "learning_rate": 8.592302760346467e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 2946, + "text_loss": 0.515625 + }, + { + "epoch": 0.24, + "learning_rate": 8.591394563436952e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 2947, + "text_loss": 0.609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.590486121684708e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 2948, + "text_loss": 0.5625 + }, + { + "epoch": 0.25, + "learning_rate": 8.58957743515166e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 2949, + "text_loss": 0.6953125 + }, + { + "epoch": 0.25, + "learning_rate": 8.588668503899767e-06, + "loss": 0.6385, + "regression_loss": 0.0, + "step": 2950, + "text_loss": 0.6875 + }, + { + "epoch": 0.25, + "learning_rate": 8.587759327990988e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 2951, + "text_loss": 0.65625 + }, + { + "epoch": 0.25, + "learning_rate": 8.586849907487308e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 2952, + "text_loss": 0.52734375 + }, + { + "epoch": 0.25, + "learning_rate": 8.585940242450728e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 2953, + "text_loss": 0.35546875 + }, + { + "epoch": 0.25, + "learning_rate": 8.585030332943263e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 2954, + "text_loss": 0.5390625 + }, + { + "epoch": 0.25, + "learning_rate": 8.584120179026945e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 2955, + "text_loss": 0.78125 + }, + { + "epoch": 0.25, + "learning_rate": 8.583209780763826e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 2956, + "text_loss": 0.38671875 + }, + { + "epoch": 0.25, + "learning_rate": 8.58229913821597e-06, + "loss": 0.6802, + "regression_loss": 0.0, + "step": 2957, + "text_loss": 0.6953125 + }, + { + "epoch": 0.25, + "learning_rate": 8.581388251445462e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 2958, + "text_loss": 0.68359375 + }, + { + "epoch": 0.25, + "learning_rate": 8.580477120514398e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 2959, + "text_loss": 0.56640625 + }, + { + "epoch": 0.25, + "learning_rate": 8.579565745484899e-06, + "loss": 0.615, + "regression_loss": 0.0, + "step": 2960, + "text_loss": 0.6953125 + }, + { + "epoch": 0.25, + "learning_rate": 8.578654126419094e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 2961, + "text_loss": 0.62890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.577742263379134e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 2962, + "text_loss": 0.6484375 + }, + { + "epoch": 0.25, + "learning_rate": 8.576830156427186e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 2963, + "text_loss": 0.71484375 + }, + { + "epoch": 0.25, + "learning_rate": 8.575917805625432e-06, + "loss": 0.6504, + "regression_loss": 0.0, + "step": 2964, + "text_loss": 0.3828125 + }, + { + "epoch": 0.25, + "learning_rate": 8.575005211036069e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 2965, + "text_loss": 0.67578125 + }, + { + "epoch": 0.25, + "learning_rate": 8.574092372721317e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 2966, + "text_loss": 0.5078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.573179290743406e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 2967, + "text_loss": 0.314453125 + }, + { + "epoch": 0.25, + "learning_rate": 8.572265965164585e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 2968, + "text_loss": 0.375 + }, + { + "epoch": 0.25, + "learning_rate": 8.571352396047122e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 2969, + "text_loss": 0.5546875 + }, + { + "epoch": 0.25, + "learning_rate": 8.570438583453298e-06, + "loss": 0.5736, + "regression_loss": 0.0, + "step": 2970, + "text_loss": 0.62890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.569524527445413e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 2971, + "text_loss": 0.431640625 + }, + { + "epoch": 0.25, + "learning_rate": 8.568610228085781e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 2972, + "text_loss": 0.5078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.567695685436734e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 2973, + "text_loss": 0.255859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.566780899560622e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 2974, + "text_loss": 0.765625 + }, + { + "epoch": 0.25, + "learning_rate": 8.565865870519812e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 2975, + "text_loss": 0.5625 + }, + { + "epoch": 0.25, + "learning_rate": 8.564950598376683e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 2976, + "text_loss": 0.7421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.564035083193633e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 2977, + "text_loss": 0.47265625 + }, + { + "epoch": 0.25, + "learning_rate": 8.563119325033082e-06, + "loss": 0.4153, + "regression_loss": 0.0, + "step": 2978, + "text_loss": 0.57421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.562203323957457e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 2979, + "text_loss": 0.5078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.561287080029208e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 2980, + "text_loss": 0.498046875 + }, + { + "epoch": 0.25, + "learning_rate": 8.560370593310799e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 2981, + "text_loss": 0.57421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.559453863864711e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 2982, + "text_loss": 0.47265625 + }, + { + "epoch": 0.25, + "learning_rate": 8.558536891753442e-06, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 2983, + "text_loss": 0.890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.557619677039509e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 2984, + "text_loss": 0.5859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.55670221978544e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 2985, + "text_loss": 0.52734375 + }, + { + "epoch": 0.25, + "learning_rate": 8.555784520053784e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 2986, + "text_loss": 0.59375 + }, + { + "epoch": 0.25, + "learning_rate": 8.554866577907106e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 2987, + "text_loss": 0.53125 + }, + { + "epoch": 0.25, + "learning_rate": 8.553948393407982e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 2988, + "text_loss": 0.80078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.553029966619014e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 2989, + "text_loss": 0.4453125 + }, + { + "epoch": 0.25, + "learning_rate": 8.552111297602814e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 2990, + "text_loss": 0.419921875 + }, + { + "epoch": 0.25, + "learning_rate": 8.551192386422011e-06, + "loss": 0.6792, + "regression_loss": 0.0, + "step": 2991, + "text_loss": 0.84375 + }, + { + "epoch": 0.25, + "learning_rate": 8.550273233139252e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 2992, + "text_loss": 0.474609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.549353837817203e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 2993, + "text_loss": 0.310546875 + }, + { + "epoch": 0.25, + "learning_rate": 8.548434200518539e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 2994, + "text_loss": 0.474609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.547514321305959e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 2995, + "text_loss": 0.4375 + }, + { + "epoch": 0.25, + "learning_rate": 8.546594200242176e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 2996, + "text_loss": 0.357421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.545673837389916e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 2997, + "text_loss": 0.40234375 + }, + { + "epoch": 0.25, + "learning_rate": 8.544753232811928e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 2998, + "text_loss": 0.63671875 + }, + { + "epoch": 0.25, + "learning_rate": 8.543832386570972e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 2999, + "text_loss": 0.6171875 + }, + { + "epoch": 0.25, + "learning_rate": 8.542911298729827e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 3000, + "text_loss": 0.83203125 + }, + { + "epoch": 0.25, + "learning_rate": 8.54198996935129e-06, + "loss": 0.5336, + "regression_loss": 0.0, + "step": 3001, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.25, + "learning_rate": 8.54106839849817e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 3002, + "text_loss": 0.70703125 + }, + { + "epoch": 0.25, + "learning_rate": 8.540146586233296e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 3003, + "text_loss": 0.58984375 + }, + { + "epoch": 0.25, + "learning_rate": 8.539224532619511e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 3004, + "text_loss": 0.458984375 + }, + { + "epoch": 0.25, + "learning_rate": 8.538302237719678e-06, + "loss": 0.5126, + "regression_loss": 0.0, + "step": 3005, + "text_loss": 0.376953125 + }, + { + "epoch": 0.25, + "learning_rate": 8.537379701596673e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 3006, + "text_loss": 0.384765625 + }, + { + "epoch": 0.25, + "learning_rate": 8.53645692431339e-06, + "loss": 0.6494, + "regression_loss": 0.0, + "step": 3007, + "text_loss": 0.474609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.535533905932739e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 3008, + "text_loss": 0.609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.534610646517647e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 3009, + "text_loss": 0.5625 + }, + { + "epoch": 0.25, + "learning_rate": 8.533687146131056e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 3010, + "text_loss": 0.59765625 + }, + { + "epoch": 0.25, + "learning_rate": 8.532763404835927e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 3011, + "text_loss": 0.455078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.531839422695236e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 3012, + "text_loss": 0.6875 + }, + { + "epoch": 0.25, + "learning_rate": 8.530915199771975e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 3013, + "text_loss": 0.54296875 + }, + { + "epoch": 0.25, + "learning_rate": 8.52999073612915e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 3014, + "text_loss": 0.78515625 + }, + { + "epoch": 0.25, + "learning_rate": 8.529066031829789e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 3015, + "text_loss": 0.62890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.528141086936934e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 3016, + "text_loss": 0.337890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.527215901513642e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 3017, + "text_loss": 0.84375 + }, + { + "epoch": 0.25, + "learning_rate": 8.526290475622989e-06, + "loss": 0.6326, + "regression_loss": 0.0, + "step": 3018, + "text_loss": 1.0 + }, + { + "epoch": 0.25, + "learning_rate": 8.525364809328061e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 3019, + "text_loss": 0.41796875 + }, + { + "epoch": 0.25, + "learning_rate": 8.52443890269197e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 3020, + "text_loss": 0.380859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.52351275577784e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 3021, + "text_loss": 0.5703125 + }, + { + "epoch": 0.25, + "learning_rate": 8.522586368648804e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 3022, + "text_loss": 0.5234375 + }, + { + "epoch": 0.25, + "learning_rate": 8.521659741368026e-06, + "loss": 0.4207, + "regression_loss": 0.0, + "step": 3023, + "text_loss": 0.490234375 + }, + { + "epoch": 0.25, + "learning_rate": 8.520732873998675e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 3024, + "text_loss": 0.455078125 + }, + { + "epoch": 0.25, + "learning_rate": 8.519805766603941e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 3025, + "text_loss": 0.490234375 + }, + { + "epoch": 0.25, + "learning_rate": 8.51887841924703e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 3026, + "text_loss": 0.5859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.51795083199116e-06, + "loss": 0.6816, + "regression_loss": 0.0, + "step": 3027, + "text_loss": 0.5859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.517023004899574e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 3028, + "text_loss": 0.56640625 + }, + { + "epoch": 0.25, + "learning_rate": 8.516094938035524e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 3029, + "text_loss": 0.482421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.515166631462283e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 3030, + "text_loss": 0.359375 + }, + { + "epoch": 0.25, + "learning_rate": 8.514238085243134e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 3031, + "text_loss": 0.416015625 + }, + { + "epoch": 0.25, + "learning_rate": 8.513309299441383e-06, + "loss": 0.4635, + "regression_loss": 0.0, + "step": 3032, + "text_loss": 0.7421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.512380274120352e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 3033, + "text_loss": 0.98828125 + }, + { + "epoch": 0.25, + "learning_rate": 8.511451009343372e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 3034, + "text_loss": 0.4375 + }, + { + "epoch": 0.25, + "learning_rate": 8.5105215051738e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 3035, + "text_loss": 0.50390625 + }, + { + "epoch": 0.25, + "learning_rate": 8.509591761675004e-06, + "loss": 0.5406, + "regression_loss": 0.0, + "step": 3036, + "text_loss": 0.6484375 + }, + { + "epoch": 0.25, + "learning_rate": 8.508661778910367e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 3037, + "text_loss": 0.26953125 + }, + { + "epoch": 0.25, + "learning_rate": 8.50773155694329e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 3038, + "text_loss": 0.55859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.506801095837195e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 3039, + "text_loss": 0.640625 + }, + { + "epoch": 0.25, + "learning_rate": 8.505870395655512e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 3040, + "text_loss": 0.451171875 + }, + { + "epoch": 0.25, + "learning_rate": 8.504939456461694e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 3041, + "text_loss": 0.51171875 + }, + { + "epoch": 0.25, + "learning_rate": 8.504008278319206e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 3042, + "text_loss": 0.53515625 + }, + { + "epoch": 0.25, + "learning_rate": 8.50307686129153e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 3043, + "text_loss": 0.267578125 + }, + { + "epoch": 0.25, + "learning_rate": 8.502145205442169e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 3044, + "text_loss": 0.9375 + }, + { + "epoch": 0.25, + "learning_rate": 8.501213310834633e-06, + "loss": 0.6414, + "regression_loss": 0.0, + "step": 3045, + "text_loss": 0.890625 + }, + { + "epoch": 0.25, + "learning_rate": 8.50028117753246e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 3046, + "text_loss": 0.7109375 + }, + { + "epoch": 0.25, + "learning_rate": 8.499348805599192e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 3047, + "text_loss": 0.291015625 + }, + { + "epoch": 0.25, + "learning_rate": 8.498416195098397e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 3048, + "text_loss": 0.328125 + }, + { + "epoch": 0.25, + "learning_rate": 8.497483346093654e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 3049, + "text_loss": 0.7421875 + }, + { + "epoch": 0.25, + "learning_rate": 8.496550258648562e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 3050, + "text_loss": 0.392578125 + }, + { + "epoch": 0.25, + "learning_rate": 8.495616932826732e-06, + "loss": 0.5385, + "regression_loss": 0.0, + "step": 3051, + "text_loss": 0.427734375 + }, + { + "epoch": 0.25, + "learning_rate": 8.494683368691792e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 3052, + "text_loss": 0.90234375 + }, + { + "epoch": 0.25, + "learning_rate": 8.493749566307391e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 3053, + "text_loss": 0.609375 + }, + { + "epoch": 0.25, + "learning_rate": 8.49281552573719e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 3054, + "text_loss": 0.58984375 + }, + { + "epoch": 0.25, + "learning_rate": 8.491881247044866e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 3055, + "text_loss": 0.52734375 + }, + { + "epoch": 0.25, + "learning_rate": 8.490946730294112e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 3056, + "text_loss": 0.73828125 + }, + { + "epoch": 0.25, + "learning_rate": 8.490011975548642e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 3057, + "text_loss": 0.6484375 + }, + { + "epoch": 0.25, + "learning_rate": 8.489076982872179e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 3058, + "text_loss": 0.68359375 + }, + { + "epoch": 0.25, + "learning_rate": 8.48814175232847e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 3059, + "text_loss": 0.44921875 + }, + { + "epoch": 0.25, + "learning_rate": 8.48720628398127e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 3060, + "text_loss": 0.5859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.48627057789436e-06, + "loss": 0.6802, + "regression_loss": 0.0, + "step": 3061, + "text_loss": 0.6015625 + }, + { + "epoch": 0.25, + "learning_rate": 8.485334634131526e-06, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 3062, + "text_loss": 0.25390625 + }, + { + "epoch": 0.25, + "learning_rate": 8.484398452756577e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 3063, + "text_loss": 0.7734375 + }, + { + "epoch": 0.25, + "learning_rate": 8.483462033833339e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 3064, + "text_loss": 0.703125 + }, + { + "epoch": 0.25, + "learning_rate": 8.48252537742565e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 3065, + "text_loss": 0.5625 + }, + { + "epoch": 0.25, + "learning_rate": 8.481588483597368e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 3066, + "text_loss": 0.5859375 + }, + { + "epoch": 0.25, + "learning_rate": 8.480651352412365e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 3067, + "text_loss": 0.671875 + }, + { + "epoch": 0.25, + "learning_rate": 8.479713983934532e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 3068, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.478776378227769e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 3069, + "text_loss": 0.373046875 + }, + { + "epoch": 0.26, + "learning_rate": 8.477838535356001e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 3070, + "text_loss": 0.625 + }, + { + "epoch": 0.26, + "learning_rate": 8.476900455383164e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 3071, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.475962138373212e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 3072, + "text_loss": 0.85546875 + }, + { + "epoch": 0.26, + "learning_rate": 8.475023584390115e-06, + "loss": 0.6501, + "regression_loss": 0.0, + "step": 3073, + "text_loss": 0.78515625 + }, + { + "epoch": 0.26, + "learning_rate": 8.474084793497856e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 3074, + "text_loss": 0.73828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.47314576576044e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 3075, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.472206501241884e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 3076, + "text_loss": 0.74609375 + }, + { + "epoch": 0.26, + "learning_rate": 8.471267000006222e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3077, + "text_loss": 0.388671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.470327262117506e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 3078, + "text_loss": 0.453125 + }, + { + "epoch": 0.26, + "learning_rate": 8.469387287639799e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 3079, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.468447076637187e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 3080, + "text_loss": 0.609375 + }, + { + "epoch": 0.26, + "learning_rate": 8.467506629173765e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 3081, + "text_loss": 0.51953125 + }, + { + "epoch": 0.26, + "learning_rate": 8.466565945313652e-06, + "loss": 0.6328, + "regression_loss": 0.0, + "step": 3082, + "text_loss": 0.53515625 + }, + { + "epoch": 0.26, + "learning_rate": 8.465625025120979e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 3083, + "text_loss": 0.4296875 + }, + { + "epoch": 0.26, + "learning_rate": 8.46468386865989e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 3084, + "text_loss": 0.259765625 + }, + { + "epoch": 0.26, + "learning_rate": 8.463742475994547e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 3085, + "text_loss": 0.90625 + }, + { + "epoch": 0.26, + "learning_rate": 8.462800847189133e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 3086, + "text_loss": 0.4375 + }, + { + "epoch": 0.26, + "learning_rate": 8.461858982307842e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 3087, + "text_loss": 0.640625 + }, + { + "epoch": 0.26, + "learning_rate": 8.460916881414886e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 3088, + "text_loss": 0.73828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.459974544574494e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 3089, + "text_loss": 0.6015625 + }, + { + "epoch": 0.26, + "learning_rate": 8.459031971850905e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 3090, + "text_loss": 0.73046875 + }, + { + "epoch": 0.26, + "learning_rate": 8.458089163308382e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 3091, + "text_loss": 0.470703125 + }, + { + "epoch": 0.26, + "learning_rate": 8.457146119011202e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 3092, + "text_loss": 0.68359375 + }, + { + "epoch": 0.26, + "learning_rate": 8.456202839023655e-06, + "loss": 0.6702, + "regression_loss": 0.0, + "step": 3093, + "text_loss": 0.66796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.455259323410047e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 3094, + "text_loss": 0.7109375 + }, + { + "epoch": 0.26, + "learning_rate": 8.454315572234708e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 3095, + "text_loss": 0.63671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.45337158556197e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 3096, + "text_loss": 0.796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.452427363456198e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 3097, + "text_loss": 0.32421875 + }, + { + "epoch": 0.26, + "learning_rate": 8.451482905981756e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 3098, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.450538213203038e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 3099, + "text_loss": 0.66015625 + }, + { + "epoch": 0.26, + "learning_rate": 8.449593285184445e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 3100, + "text_loss": 0.30078125 + }, + { + "epoch": 0.26, + "learning_rate": 8.448648121990399e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 3101, + "text_loss": 0.671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.447702723685335e-06, + "loss": 0.6436, + "regression_loss": 0.0, + "step": 3102, + "text_loss": 0.66796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.446757090333707e-06, + "loss": 0.6509, + "regression_loss": 0.0, + "step": 3103, + "text_loss": 0.453125 + }, + { + "epoch": 0.26, + "learning_rate": 8.445811221999983e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 3104, + "text_loss": 0.41796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.444865118748646e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 3105, + "text_loss": 0.384765625 + }, + { + "epoch": 0.26, + "learning_rate": 8.443918780644199e-06, + "loss": 0.6287, + "regression_loss": 0.0, + "step": 3106, + "text_loss": 0.6171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.442972207751155e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 3107, + "text_loss": 0.50390625 + }, + { + "epoch": 0.26, + "learning_rate": 8.442025400134048e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 3108, + "text_loss": 0.28515625 + }, + { + "epoch": 0.26, + "learning_rate": 8.441078357857428e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 3109, + "text_loss": 0.7734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.440131080985859e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 3110, + "text_loss": 0.609375 + }, + { + "epoch": 0.26, + "learning_rate": 8.43918356958392e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 3111, + "text_loss": 0.73046875 + }, + { + "epoch": 0.26, + "learning_rate": 8.438235823716208e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 3112, + "text_loss": 0.6875 + }, + { + "epoch": 0.26, + "learning_rate": 8.437287843447336e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 3113, + "text_loss": 0.7890625 + }, + { + "epoch": 0.26, + "learning_rate": 8.436339628841931e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 3114, + "text_loss": 1.09375 + }, + { + "epoch": 0.26, + "learning_rate": 8.435391179964638e-06, + "loss": 0.6499, + "regression_loss": 0.0, + "step": 3115, + "text_loss": 0.640625 + }, + { + "epoch": 0.26, + "learning_rate": 8.434442496880118e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 3116, + "text_loss": 0.57421875 + }, + { + "epoch": 0.26, + "learning_rate": 8.433493579653047e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 3117, + "text_loss": 0.69140625 + }, + { + "epoch": 0.26, + "learning_rate": 8.432544428348119e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 3118, + "text_loss": 0.38671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.431595043030038e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 3119, + "text_loss": 0.7265625 + }, + { + "epoch": 0.26, + "learning_rate": 8.430645423763533e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 3120, + "text_loss": 0.7578125 + }, + { + "epoch": 0.26, + "learning_rate": 8.42969557061334e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 3121, + "text_loss": 0.41015625 + }, + { + "epoch": 0.26, + "learning_rate": 8.428745483644216e-06, + "loss": 0.624, + "regression_loss": 0.0, + "step": 3122, + "text_loss": 0.52734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.427795162920937e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 3123, + "text_loss": 0.376953125 + }, + { + "epoch": 0.26, + "learning_rate": 8.426844608508285e-06, + "loss": 0.636, + "regression_loss": 0.0, + "step": 3124, + "text_loss": 0.63671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.425893820471069e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 3125, + "text_loss": 0.64453125 + }, + { + "epoch": 0.26, + "learning_rate": 8.424942798874106e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 3126, + "text_loss": 0.66015625 + }, + { + "epoch": 0.26, + "learning_rate": 8.423991543782231e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 3127, + "text_loss": 0.63671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.423040055260297e-06, + "loss": 0.6235, + "regression_loss": 0.0, + "step": 3128, + "text_loss": 0.58203125 + }, + { + "epoch": 0.26, + "learning_rate": 8.422088333373173e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 3129, + "text_loss": 0.5078125 + }, + { + "epoch": 0.26, + "learning_rate": 8.421136378185738e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 3130, + "text_loss": 0.75390625 + }, + { + "epoch": 0.26, + "learning_rate": 8.4201841897629e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 3131, + "text_loss": 0.4296875 + }, + { + "epoch": 0.26, + "learning_rate": 8.419231768169564e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 3132, + "text_loss": 0.52734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.418279113470666e-06, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 3133, + "text_loss": 0.59375 + }, + { + "epoch": 0.26, + "learning_rate": 8.417326225731154e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 3134, + "text_loss": 0.65625 + }, + { + "epoch": 0.26, + "learning_rate": 8.416373105015991e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 3135, + "text_loss": 0.4765625 + }, + { + "epoch": 0.26, + "learning_rate": 8.415419751390155e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 3136, + "text_loss": 0.46484375 + }, + { + "epoch": 0.26, + "learning_rate": 8.414466164918639e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 3137, + "text_loss": 0.52734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.413512345666456e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 3138, + "text_loss": 0.421875 + }, + { + "epoch": 0.26, + "learning_rate": 8.412558293698632e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 3139, + "text_loss": 0.40625 + }, + { + "epoch": 0.26, + "learning_rate": 8.411604009080207e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 3140, + "text_loss": 0.52734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.410649491876242e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 3141, + "text_loss": 0.30859375 + }, + { + "epoch": 0.26, + "learning_rate": 8.40969474215181e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 3142, + "text_loss": 0.53515625 + }, + { + "epoch": 0.26, + "learning_rate": 8.408739759972002e-06, + "loss": 0.656, + "regression_loss": 0.0, + "step": 3143, + "text_loss": 0.53125 + }, + { + "epoch": 0.26, + "learning_rate": 8.407784545401922e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 3144, + "text_loss": 0.62890625 + }, + { + "epoch": 0.26, + "learning_rate": 8.406829098506693e-06, + "loss": 0.5984, + "regression_loss": 0.0, + "step": 3145, + "text_loss": 0.5625 + }, + { + "epoch": 0.26, + "learning_rate": 8.405873419351451e-06, + "loss": 0.6138, + "regression_loss": 0.0, + "step": 3146, + "text_loss": 0.400390625 + }, + { + "epoch": 0.26, + "learning_rate": 8.40491750800135e-06, + "loss": 0.6897, + "regression_loss": 0.0, + "step": 3147, + "text_loss": 0.7890625 + }, + { + "epoch": 0.26, + "learning_rate": 8.403961364521562e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 3148, + "text_loss": 0.3828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.403004988977267e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 3149, + "text_loss": 0.72265625 + }, + { + "epoch": 0.26, + "learning_rate": 8.402048381433668e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 3150, + "text_loss": 0.48828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.40109154195598e-06, + "loss": 0.6304, + "regression_loss": 0.0, + "step": 3151, + "text_loss": 0.498046875 + }, + { + "epoch": 0.26, + "learning_rate": 8.400134470609438e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 3152, + "text_loss": 0.43359375 + }, + { + "epoch": 0.26, + "learning_rate": 8.399177167459289e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 3153, + "text_loss": 0.7734375 + }, + { + "epoch": 0.26, + "learning_rate": 8.398219632570797e-06, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 3154, + "text_loss": 0.55078125 + }, + { + "epoch": 0.26, + "learning_rate": 8.397261866009243e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 3155, + "text_loss": 0.3046875 + }, + { + "epoch": 0.26, + "learning_rate": 8.39630386783992e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 3156, + "text_loss": 0.51171875 + }, + { + "epoch": 0.26, + "learning_rate": 8.395345638128141e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 3157, + "text_loss": 0.515625 + }, + { + "epoch": 0.26, + "learning_rate": 8.394387176939233e-06, + "loss": 0.3481, + "regression_loss": 0.0, + "step": 3158, + "text_loss": 0.328125 + }, + { + "epoch": 0.26, + "learning_rate": 8.393428484338538e-06, + "loss": 0.626, + "regression_loss": 0.0, + "step": 3159, + "text_loss": 0.63671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.392469560391416e-06, + "loss": 0.4886, + "regression_loss": 0.0, + "step": 3160, + "text_loss": 0.453125 + }, + { + "epoch": 0.26, + "learning_rate": 8.391510405163241e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 3161, + "text_loss": 0.443359375 + }, + { + "epoch": 0.26, + "learning_rate": 8.390551018719404e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 3162, + "text_loss": 0.490234375 + }, + { + "epoch": 0.26, + "learning_rate": 8.38959140112531e-06, + "loss": 0.6482, + "regression_loss": 0.0, + "step": 3163, + "text_loss": 0.8359375 + }, + { + "epoch": 0.26, + "learning_rate": 8.38863155244638e-06, + "loss": 0.6851, + "regression_loss": 0.0, + "step": 3164, + "text_loss": 0.61328125 + }, + { + "epoch": 0.26, + "learning_rate": 8.387671472748053e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 3165, + "text_loss": 0.41015625 + }, + { + "epoch": 0.26, + "learning_rate": 8.386711162095783e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 3166, + "text_loss": 0.59375 + }, + { + "epoch": 0.26, + "learning_rate": 8.385750620555036e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 3167, + "text_loss": 0.294921875 + }, + { + "epoch": 0.26, + "learning_rate": 8.3847898481913e-06, + "loss": 0.4324, + "regression_loss": 0.0, + "step": 3168, + "text_loss": 0.578125 + }, + { + "epoch": 0.26, + "learning_rate": 8.383828845070075e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 3169, + "text_loss": 0.578125 + }, + { + "epoch": 0.26, + "learning_rate": 8.382867611256873e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 3170, + "text_loss": 0.73828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.381906146817232e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 3171, + "text_loss": 0.60546875 + }, + { + "epoch": 0.26, + "learning_rate": 8.380944451816697e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 3172, + "text_loss": 0.73828125 + }, + { + "epoch": 0.26, + "learning_rate": 8.379982526320829e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 3173, + "text_loss": 0.5234375 + }, + { + "epoch": 0.26, + "learning_rate": 8.379020370395211e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 3174, + "text_loss": 0.55078125 + }, + { + "epoch": 0.26, + "learning_rate": 8.378057984105437e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 3175, + "text_loss": 0.404296875 + }, + { + "epoch": 0.26, + "learning_rate": 8.377095367517116e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 3176, + "text_loss": 0.30859375 + }, + { + "epoch": 0.26, + "learning_rate": 8.376132520695874e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 3177, + "text_loss": 0.8203125 + }, + { + "epoch": 0.26, + "learning_rate": 8.375169443707356e-06, + "loss": 0.3901, + "regression_loss": 0.0, + "step": 3178, + "text_loss": 0.431640625 + }, + { + "epoch": 0.26, + "learning_rate": 8.374206136617217e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 3179, + "text_loss": 0.3671875 + }, + { + "epoch": 0.26, + "learning_rate": 8.37324259949113e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 3180, + "text_loss": 0.76953125 + }, + { + "epoch": 0.26, + "learning_rate": 8.372278832394787e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 3181, + "text_loss": 0.466796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.371314835393888e-06, + "loss": 0.6814, + "regression_loss": 0.0, + "step": 3182, + "text_loss": 0.83984375 + }, + { + "epoch": 0.26, + "learning_rate": 8.370350608554157e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 3183, + "text_loss": 0.66796875 + }, + { + "epoch": 0.26, + "learning_rate": 8.36938615194133e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 3184, + "text_loss": 0.4765625 + }, + { + "epoch": 0.26, + "learning_rate": 8.368421465621155e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 3185, + "text_loss": 0.57421875 + }, + { + "epoch": 0.26, + "learning_rate": 8.367456549659404e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 3186, + "text_loss": 0.59765625 + }, + { + "epoch": 0.26, + "learning_rate": 8.366491404121858e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 3187, + "text_loss": 0.5703125 + }, + { + "epoch": 0.26, + "learning_rate": 8.365526029074315e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 3188, + "text_loss": 0.77734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.364560424582588e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 3189, + "text_loss": 0.498046875 + }, + { + "epoch": 0.27, + "learning_rate": 8.363594590712511e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 3190, + "text_loss": 0.66015625 + }, + { + "epoch": 0.27, + "learning_rate": 8.362628527529928e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 3191, + "text_loss": 0.185546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.361662235100697e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 3192, + "text_loss": 0.53125 + }, + { + "epoch": 0.27, + "learning_rate": 8.3606957134907e-06, + "loss": 0.5531, + "regression_loss": 0.0, + "step": 3193, + "text_loss": 0.265625 + }, + { + "epoch": 0.27, + "learning_rate": 8.359728962765822e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 3194, + "text_loss": 0.5625 + }, + { + "epoch": 0.27, + "learning_rate": 8.358761982991981e-06, + "loss": 0.6379, + "regression_loss": 0.0, + "step": 3195, + "text_loss": 0.7578125 + }, + { + "epoch": 0.27, + "learning_rate": 8.357794774235094e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 3196, + "text_loss": 0.58984375 + }, + { + "epoch": 0.27, + "learning_rate": 8.3568273365611e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 3197, + "text_loss": 0.59375 + }, + { + "epoch": 0.27, + "learning_rate": 8.355859670035957e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 3198, + "text_loss": 0.451171875 + }, + { + "epoch": 0.27, + "learning_rate": 8.354891774725635e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 3199, + "text_loss": 0.6796875 + }, + { + "epoch": 0.27, + "learning_rate": 8.353923650696119e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 3200, + "text_loss": 0.490234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.352955298013409e-06, + "loss": 0.6082, + "regression_loss": 0.0, + "step": 3201, + "text_loss": 0.87109375 + }, + { + "epoch": 0.27, + "learning_rate": 8.351986716743528e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 3202, + "text_loss": 0.6328125 + }, + { + "epoch": 0.27, + "learning_rate": 8.351017906952502e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 3203, + "text_loss": 0.265625 + }, + { + "epoch": 0.27, + "learning_rate": 8.350048868706383e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 3204, + "text_loss": 0.5 + }, + { + "epoch": 0.27, + "learning_rate": 8.349079602071236e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 3205, + "text_loss": 0.46484375 + }, + { + "epoch": 0.27, + "learning_rate": 8.348110107113138e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 3206, + "text_loss": 0.27734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.347140383898188e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 3207, + "text_loss": 0.404296875 + }, + { + "epoch": 0.27, + "learning_rate": 8.346170432492491e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 3208, + "text_loss": 0.77734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.345200252962178e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 3209, + "text_loss": 0.6484375 + }, + { + "epoch": 0.27, + "learning_rate": 8.344229845373388e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 3210, + "text_loss": 0.330078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.34325920979228e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 3211, + "text_loss": 0.5078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.342288346285028e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 3212, + "text_loss": 0.56640625 + }, + { + "epoch": 0.27, + "learning_rate": 8.341317254917817e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 3213, + "text_loss": 0.478515625 + }, + { + "epoch": 0.27, + "learning_rate": 8.340345935756855e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 3214, + "text_loss": 0.6328125 + }, + { + "epoch": 0.27, + "learning_rate": 8.33937438886836e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 3215, + "text_loss": 0.77734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.338402614318563e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 3216, + "text_loss": 0.65234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.337430612173721e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 3217, + "text_loss": 0.73828125 + }, + { + "epoch": 0.27, + "learning_rate": 8.336458382500096e-06, + "loss": 0.729, + "regression_loss": 0.0, + "step": 3218, + "text_loss": 1.2734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.335485925363971e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 3219, + "text_loss": 0.65234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.334513240831643e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 3220, + "text_loss": 0.65625 + }, + { + "epoch": 0.27, + "learning_rate": 8.333540328969425e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 3221, + "text_loss": 0.5625 + }, + { + "epoch": 0.27, + "learning_rate": 8.332567189843643e-06, + "loss": 0.6377, + "regression_loss": 0.0, + "step": 3222, + "text_loss": 0.890625 + }, + { + "epoch": 0.27, + "learning_rate": 8.331593823520645e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 3223, + "text_loss": 0.32421875 + }, + { + "epoch": 0.27, + "learning_rate": 8.330620230066785e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 3224, + "text_loss": 0.64453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.329646409548441e-06, + "loss": 0.6, + "regression_loss": 0.0, + "step": 3225, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.328672362032002e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 3226, + "text_loss": 0.7265625 + }, + { + "epoch": 0.27, + "learning_rate": 8.327698087583874e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 3227, + "text_loss": 0.47265625 + }, + { + "epoch": 0.27, + "learning_rate": 8.326723586270475e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 3228, + "text_loss": 0.55859375 + }, + { + "epoch": 0.27, + "learning_rate": 8.325748858158246e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 3229, + "text_loss": 0.60546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.324773903313636e-06, + "loss": 0.7622, + "regression_loss": 0.0, + "step": 3230, + "text_loss": 0.6171875 + }, + { + "epoch": 0.27, + "learning_rate": 8.323798721803113e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 3231, + "text_loss": 0.515625 + }, + { + "epoch": 0.27, + "learning_rate": 8.322823313693162e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 3232, + "text_loss": 0.59765625 + }, + { + "epoch": 0.27, + "learning_rate": 8.321847679050279e-06, + "loss": 0.6277, + "regression_loss": 0.0, + "step": 3233, + "text_loss": 0.6484375 + }, + { + "epoch": 0.27, + "learning_rate": 8.320871817940976e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 3234, + "text_loss": 0.3359375 + }, + { + "epoch": 0.27, + "learning_rate": 8.319895730431785e-06, + "loss": 0.6191, + "regression_loss": 0.0, + "step": 3235, + "text_loss": 0.84375 + }, + { + "epoch": 0.27, + "learning_rate": 8.318919416589251e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 3236, + "text_loss": 0.45703125 + }, + { + "epoch": 0.27, + "learning_rate": 8.317942876479931e-06, + "loss": 0.7148, + "regression_loss": 0.0, + "step": 3237, + "text_loss": 0.87109375 + }, + { + "epoch": 0.27, + "learning_rate": 8.316966110170406e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 3238, + "text_loss": 0.4453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.31598911772726e-06, + "loss": 0.6482, + "regression_loss": 0.0, + "step": 3239, + "text_loss": 0.5703125 + }, + { + "epoch": 0.27, + "learning_rate": 8.315011899217103e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3240, + "text_loss": 0.65234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.314034454706556e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 3241, + "text_loss": 0.5234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.31305678426226e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 3242, + "text_loss": 0.66015625 + }, + { + "epoch": 0.27, + "learning_rate": 8.31207888795086e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 3243, + "text_loss": 0.46484375 + }, + { + "epoch": 0.27, + "learning_rate": 8.31110076583903e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 3244, + "text_loss": 0.796875 + }, + { + "epoch": 0.27, + "learning_rate": 8.31012241799345e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 3245, + "text_loss": 0.8671875 + }, + { + "epoch": 0.27, + "learning_rate": 8.309143844480817e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 3246, + "text_loss": 0.3828125 + }, + { + "epoch": 0.27, + "learning_rate": 8.308165045367853e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 3247, + "text_loss": 0.3828125 + }, + { + "epoch": 0.27, + "learning_rate": 8.307186020721281e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 3248, + "text_loss": 0.388671875 + }, + { + "epoch": 0.27, + "learning_rate": 8.306206770607846e-06, + "loss": 0.4297, + "regression_loss": 0.0, + "step": 3249, + "text_loss": 0.32421875 + }, + { + "epoch": 0.27, + "learning_rate": 8.305227295094309e-06, + "loss": 0.6714, + "regression_loss": 0.0, + "step": 3250, + "text_loss": 0.62890625 + }, + { + "epoch": 0.27, + "learning_rate": 8.304247594247448e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 3251, + "text_loss": 0.306640625 + }, + { + "epoch": 0.27, + "learning_rate": 8.30326766813405e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 3252, + "text_loss": 0.310546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.302287516820925e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 3253, + "text_loss": 0.34765625 + }, + { + "epoch": 0.27, + "learning_rate": 8.301307140374893e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 3254, + "text_loss": 0.67578125 + }, + { + "epoch": 0.27, + "learning_rate": 8.30032653886279e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 3255, + "text_loss": 0.5234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.299345712351468e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 3256, + "text_loss": 0.54296875 + }, + { + "epoch": 0.27, + "learning_rate": 8.298364660907798e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 3257, + "text_loss": 0.81640625 + }, + { + "epoch": 0.27, + "learning_rate": 8.29738338459866e-06, + "loss": 0.6055, + "regression_loss": 0.0, + "step": 3258, + "text_loss": 0.63671875 + }, + { + "epoch": 0.27, + "learning_rate": 8.296401883490955e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 3259, + "text_loss": 0.79296875 + }, + { + "epoch": 0.27, + "learning_rate": 8.295420157651593e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 3260, + "text_loss": 0.57421875 + }, + { + "epoch": 0.27, + "learning_rate": 8.294438207147506e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 3261, + "text_loss": 0.78125 + }, + { + "epoch": 0.27, + "learning_rate": 8.293456032045637e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 3262, + "text_loss": 0.41015625 + }, + { + "epoch": 0.27, + "learning_rate": 8.292473632412947e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 3263, + "text_loss": 0.4375 + }, + { + "epoch": 0.27, + "learning_rate": 8.291491008316409e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 3264, + "text_loss": 0.369140625 + }, + { + "epoch": 0.27, + "learning_rate": 8.290508159823015e-06, + "loss": 0.6313, + "regression_loss": 0.0, + "step": 3265, + "text_loss": 0.5 + }, + { + "epoch": 0.27, + "learning_rate": 8.289525086999768e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 3266, + "text_loss": 0.7890625 + }, + { + "epoch": 0.27, + "learning_rate": 8.288541789913691e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 3267, + "text_loss": 0.625 + }, + { + "epoch": 0.27, + "learning_rate": 8.28755826863182e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 3268, + "text_loss": 0.365234375 + }, + { + "epoch": 0.27, + "learning_rate": 8.286574523221206e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 3269, + "text_loss": 0.6328125 + }, + { + "epoch": 0.27, + "learning_rate": 8.285590553748918e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 3270, + "text_loss": 0.421875 + }, + { + "epoch": 0.27, + "learning_rate": 8.28460636028203e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 3271, + "text_loss": 0.435546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.28362194288765e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 3272, + "text_loss": 0.71484375 + }, + { + "epoch": 0.27, + "learning_rate": 8.282637301632883e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 3273, + "text_loss": 0.408203125 + }, + { + "epoch": 0.27, + "learning_rate": 8.28165243658486e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 3274, + "text_loss": 0.46875 + }, + { + "epoch": 0.27, + "learning_rate": 8.280667347810722e-06, + "loss": 0.6179, + "regression_loss": 0.0, + "step": 3275, + "text_loss": 0.58203125 + }, + { + "epoch": 0.27, + "learning_rate": 8.27968203537763e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 3276, + "text_loss": 0.419921875 + }, + { + "epoch": 0.27, + "learning_rate": 8.278696499352755e-06, + "loss": 0.5685, + "regression_loss": 0.0, + "step": 3277, + "text_loss": 0.80078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.277710739803287e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 3278, + "text_loss": 0.3046875 + }, + { + "epoch": 0.27, + "learning_rate": 8.27672475679643e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 3279, + "text_loss": 0.703125 + }, + { + "epoch": 0.27, + "learning_rate": 8.275738550399402e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 3280, + "text_loss": 0.4296875 + }, + { + "epoch": 0.27, + "learning_rate": 8.27475212067944e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 3281, + "text_loss": 0.5703125 + }, + { + "epoch": 0.27, + "learning_rate": 8.27376546770379e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 3282, + "text_loss": 0.80078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.272778591539719e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 3283, + "text_loss": 0.52734375 + }, + { + "epoch": 0.27, + "learning_rate": 8.271791492254508e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 3284, + "text_loss": 0.4453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.270804169915451e-06, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 3285, + "text_loss": 0.453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.26981662458986e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 3286, + "text_loss": 0.84765625 + }, + { + "epoch": 0.27, + "learning_rate": 8.26882885634506e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 3287, + "text_loss": 0.55859375 + }, + { + "epoch": 0.27, + "learning_rate": 8.26784086524839e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 3288, + "text_loss": 0.546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.266852651367207e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 3289, + "text_loss": 0.255859375 + }, + { + "epoch": 0.27, + "learning_rate": 8.265864214768883e-06, + "loss": 0.5253, + "regression_loss": 0.0, + "step": 3290, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.27, + "learning_rate": 8.264875555520806e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 3291, + "text_loss": 0.55078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.263886673690378e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 3292, + "text_loss": 0.30078125 + }, + { + "epoch": 0.27, + "learning_rate": 8.262897569345012e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 3293, + "text_loss": 0.64453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.261908242552141e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 3294, + "text_loss": 0.314453125 + }, + { + "epoch": 0.27, + "learning_rate": 8.260918693379216e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 3295, + "text_loss": 0.33203125 + }, + { + "epoch": 0.27, + "learning_rate": 8.259928921893694e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 3296, + "text_loss": 0.578125 + }, + { + "epoch": 0.27, + "learning_rate": 8.258938928163058e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 3297, + "text_loss": 0.37890625 + }, + { + "epoch": 0.27, + "learning_rate": 8.257948712254795e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 3298, + "text_loss": 0.69140625 + }, + { + "epoch": 0.27, + "learning_rate": 8.256958274236418e-06, + "loss": 0.6191, + "regression_loss": 0.0, + "step": 3299, + "text_loss": 0.671875 + }, + { + "epoch": 0.27, + "learning_rate": 8.255967614175447e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 3300, + "text_loss": 0.62109375 + }, + { + "epoch": 0.27, + "learning_rate": 8.254976732139419e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 3301, + "text_loss": 0.48828125 + }, + { + "epoch": 0.27, + "learning_rate": 8.25398562819589e-06, + "loss": 0.6689, + "regression_loss": 0.0, + "step": 3302, + "text_loss": 0.51171875 + }, + { + "epoch": 0.27, + "learning_rate": 8.252994302412427e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 3303, + "text_loss": 0.5625 + }, + { + "epoch": 0.27, + "learning_rate": 8.252002754856613e-06, + "loss": 0.6062, + "regression_loss": 0.0, + "step": 3304, + "text_loss": 0.51171875 + }, + { + "epoch": 0.27, + "learning_rate": 8.251010985596048e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 3305, + "text_loss": 0.474609375 + }, + { + "epoch": 0.27, + "learning_rate": 8.250018994698347e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 3306, + "text_loss": 0.44140625 + }, + { + "epoch": 0.27, + "learning_rate": 8.249026782231135e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 3307, + "text_loss": 0.85546875 + }, + { + "epoch": 0.27, + "learning_rate": 8.248034348262058e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 3308, + "text_loss": 0.404296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.247041692858775e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 3309, + "text_loss": 0.78515625 + }, + { + "epoch": 0.28, + "learning_rate": 8.246048816088958e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 3310, + "text_loss": 0.56640625 + }, + { + "epoch": 0.28, + "learning_rate": 8.2450557180203e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 3311, + "text_loss": 0.412109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.244062398720503e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 3312, + "text_loss": 0.609375 + }, + { + "epoch": 0.28, + "learning_rate": 8.243068858257287e-06, + "loss": 0.5109, + "regression_loss": 0.0, + "step": 3313, + "text_loss": 0.72265625 + }, + { + "epoch": 0.28, + "learning_rate": 8.242075096698386e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 3314, + "text_loss": 0.61328125 + }, + { + "epoch": 0.28, + "learning_rate": 8.241081114111548e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 3315, + "text_loss": 0.5 + }, + { + "epoch": 0.28, + "learning_rate": 8.240086910564543e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 3316, + "text_loss": 0.470703125 + }, + { + "epoch": 0.28, + "learning_rate": 8.239092486125145e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 3317, + "text_loss": 0.408203125 + }, + { + "epoch": 0.28, + "learning_rate": 8.23809784086115e-06, + "loss": 0.4811, + "regression_loss": 0.0, + "step": 3318, + "text_loss": 0.6796875 + }, + { + "epoch": 0.28, + "learning_rate": 8.23710297484037e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 3319, + "text_loss": 0.439453125 + }, + { + "epoch": 0.28, + "learning_rate": 8.236107888130628e-06, + "loss": 0.3922, + "regression_loss": 0.0, + "step": 3320, + "text_loss": 0.30859375 + }, + { + "epoch": 0.28, + "learning_rate": 8.235112580799765e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 3321, + "text_loss": 0.625 + }, + { + "epoch": 0.28, + "learning_rate": 8.234117052915633e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 3322, + "text_loss": 0.474609375 + }, + { + "epoch": 0.28, + "learning_rate": 8.233121304546105e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 3323, + "text_loss": 0.31640625 + }, + { + "epoch": 0.28, + "learning_rate": 8.232125335759065e-06, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 3324, + "text_loss": 0.470703125 + }, + { + "epoch": 0.28, + "learning_rate": 8.231129146622414e-06, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 3325, + "text_loss": 0.48046875 + }, + { + "epoch": 0.28, + "learning_rate": 8.230132737204065e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 3326, + "text_loss": 0.38671875 + }, + { + "epoch": 0.28, + "learning_rate": 8.229136107571948e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 3327, + "text_loss": 0.384765625 + }, + { + "epoch": 0.28, + "learning_rate": 8.228139257794012e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 3328, + "text_loss": 0.51953125 + }, + { + "epoch": 0.28, + "learning_rate": 8.227142187938215e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 3329, + "text_loss": 0.32421875 + }, + { + "epoch": 0.28, + "learning_rate": 8.226144898072528e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 3330, + "text_loss": 0.62109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.225147388264947e-06, + "loss": 0.42, + "regression_loss": 0.0, + "step": 3331, + "text_loss": 0.369140625 + }, + { + "epoch": 0.28, + "learning_rate": 8.224149658583472e-06, + "loss": 0.7202, + "regression_loss": 0.0, + "step": 3332, + "text_loss": 0.8203125 + }, + { + "epoch": 0.28, + "learning_rate": 8.223151709096128e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 3333, + "text_loss": 0.7109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.222153539870945e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 3334, + "text_loss": 0.60546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.221155150975978e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 3335, + "text_loss": 0.5859375 + }, + { + "epoch": 0.28, + "learning_rate": 8.220156542479289e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 3336, + "text_loss": 0.55859375 + }, + { + "epoch": 0.28, + "learning_rate": 8.219157714448957e-06, + "loss": 0.6653, + "regression_loss": 0.0, + "step": 3337, + "text_loss": 0.62890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.21815866695308e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 3338, + "text_loss": 0.62109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.217159400059766e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 3339, + "text_loss": 0.94140625 + }, + { + "epoch": 0.28, + "learning_rate": 8.216159913837142e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 3340, + "text_loss": 0.71484375 + }, + { + "epoch": 0.28, + "learning_rate": 8.215160208353345e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 3341, + "text_loss": 0.458984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.21416028367653e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 3342, + "text_loss": 0.5859375 + }, + { + "epoch": 0.28, + "learning_rate": 8.213160139874867e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 3343, + "text_loss": 0.54296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.212159777016543e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 3344, + "text_loss": 0.232421875 + }, + { + "epoch": 0.28, + "learning_rate": 8.211159195169755e-06, + "loss": 0.7295, + "regression_loss": 0.0, + "step": 3345, + "text_loss": 0.640625 + }, + { + "epoch": 0.28, + "learning_rate": 8.210158394402718e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 3346, + "text_loss": 0.60546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.209157374783661e-06, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 3347, + "text_loss": 0.34765625 + }, + { + "epoch": 0.28, + "learning_rate": 8.20815613638083e-06, + "loss": 0.6279, + "regression_loss": 0.0, + "step": 3348, + "text_loss": 0.80078125 + }, + { + "epoch": 0.28, + "learning_rate": 8.207154679262484e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 3349, + "text_loss": 0.46484375 + }, + { + "epoch": 0.28, + "learning_rate": 8.206153003496895e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 3350, + "text_loss": 0.43359375 + }, + { + "epoch": 0.28, + "learning_rate": 8.205151109152354e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 3351, + "text_loss": 0.5625 + }, + { + "epoch": 0.28, + "learning_rate": 8.204148996297166e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 3352, + "text_loss": 0.984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.203146664999646e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 3353, + "text_loss": 0.80078125 + }, + { + "epoch": 0.28, + "learning_rate": 8.202144115328133e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 3354, + "text_loss": 0.73828125 + }, + { + "epoch": 0.28, + "learning_rate": 8.20114134735097e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3355, + "text_loss": 0.98046875 + }, + { + "epoch": 0.28, + "learning_rate": 8.200138361136524e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 3356, + "text_loss": 0.45703125 + }, + { + "epoch": 0.28, + "learning_rate": 8.199135156753174e-06, + "loss": 0.6223, + "regression_loss": 0.0, + "step": 3357, + "text_loss": 0.83203125 + }, + { + "epoch": 0.28, + "learning_rate": 8.19813173426931e-06, + "loss": 0.6849, + "regression_loss": 0.0, + "step": 3358, + "text_loss": 0.27734375 + }, + { + "epoch": 0.28, + "learning_rate": 8.197128093753345e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 3359, + "text_loss": 0.486328125 + }, + { + "epoch": 0.28, + "learning_rate": 8.196124235273698e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 3360, + "text_loss": 0.5546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.195120158898805e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 3361, + "text_loss": 0.55078125 + }, + { + "epoch": 0.28, + "learning_rate": 8.194115864697126e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 3362, + "text_loss": 0.73046875 + }, + { + "epoch": 0.28, + "learning_rate": 8.193111352737125e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 3363, + "text_loss": 0.796875 + }, + { + "epoch": 0.28, + "learning_rate": 8.19210662308728e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 3364, + "text_loss": 0.67578125 + }, + { + "epoch": 0.28, + "learning_rate": 8.191101675816097e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 3365, + "text_loss": 0.6484375 + }, + { + "epoch": 0.28, + "learning_rate": 8.19009651099208e-06, + "loss": 0.6055, + "regression_loss": 0.0, + "step": 3366, + "text_loss": 0.56640625 + }, + { + "epoch": 0.28, + "learning_rate": 8.18909112868376e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 3367, + "text_loss": 0.4140625 + }, + { + "epoch": 0.28, + "learning_rate": 8.18808552895968e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 3368, + "text_loss": 0.609375 + }, + { + "epoch": 0.28, + "learning_rate": 8.18707971188839e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 3369, + "text_loss": 0.384765625 + }, + { + "epoch": 0.28, + "learning_rate": 8.18607367753847e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 3370, + "text_loss": 0.3515625 + }, + { + "epoch": 0.28, + "learning_rate": 8.1850674259785e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 3371, + "text_loss": 0.498046875 + }, + { + "epoch": 0.28, + "learning_rate": 8.184060957277086e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 3372, + "text_loss": 0.55859375 + }, + { + "epoch": 0.28, + "learning_rate": 8.183054271502839e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 3373, + "text_loss": 0.52734375 + }, + { + "epoch": 0.28, + "learning_rate": 8.182047368724393e-06, + "loss": 0.6687, + "regression_loss": 0.0, + "step": 3374, + "text_loss": 0.82421875 + }, + { + "epoch": 0.28, + "learning_rate": 8.181040249010393e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 3375, + "text_loss": 0.3125 + }, + { + "epoch": 0.28, + "learning_rate": 8.180032912429497e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 3376, + "text_loss": 0.7109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.17902535905038e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 3377, + "text_loss": 0.671875 + }, + { + "epoch": 0.28, + "learning_rate": 8.178017588941736e-06, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 3378, + "text_loss": 0.51953125 + }, + { + "epoch": 0.28, + "learning_rate": 8.177009602172266e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 3379, + "text_loss": 0.62890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.17600139881069e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 3380, + "text_loss": 0.5625 + }, + { + "epoch": 0.28, + "learning_rate": 8.17499297892574e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 3381, + "text_loss": 0.64453125 + }, + { + "epoch": 0.28, + "learning_rate": 8.17398434258617e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 3382, + "text_loss": 0.7578125 + }, + { + "epoch": 0.28, + "learning_rate": 8.172975489860739e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 3383, + "text_loss": 0.54296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.171966420818227e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 3384, + "text_loss": 0.37890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.170957135527429e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 3385, + "text_loss": 0.62890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.16994763405715e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 3386, + "text_loss": 0.51953125 + }, + { + "epoch": 0.28, + "learning_rate": 8.168937916476214e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 3387, + "text_loss": 0.50390625 + }, + { + "epoch": 0.28, + "learning_rate": 8.167927982853458e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 3388, + "text_loss": 0.54296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.166917833257733e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 3389, + "text_loss": 0.26171875 + }, + { + "epoch": 0.28, + "learning_rate": 8.165907467757907e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3390, + "text_loss": 0.458984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.164896886422862e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 3391, + "text_loss": 0.296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.163886089321493e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 3392, + "text_loss": 0.546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.16287507652271e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 3393, + "text_loss": 0.578125 + }, + { + "epoch": 0.28, + "learning_rate": 8.161863848095444e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 3394, + "text_loss": 0.734375 + }, + { + "epoch": 0.28, + "learning_rate": 8.16085240410863e-06, + "loss": 0.6316, + "regression_loss": 0.0, + "step": 3395, + "text_loss": 0.88671875 + }, + { + "epoch": 0.28, + "learning_rate": 8.159840744631221e-06, + "loss": 0.6614, + "regression_loss": 0.0, + "step": 3396, + "text_loss": 0.40234375 + }, + { + "epoch": 0.28, + "learning_rate": 8.158828869732194e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 3397, + "text_loss": 0.337890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.15781677948053e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 3398, + "text_loss": 0.291015625 + }, + { + "epoch": 0.28, + "learning_rate": 8.156804473945226e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 3399, + "text_loss": 0.78515625 + }, + { + "epoch": 0.28, + "learning_rate": 8.155791953195296e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 3400, + "text_loss": 0.3984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.154779217299771e-06, + "loss": 0.6172, + "regression_loss": 0.0, + "step": 3401, + "text_loss": 0.439453125 + }, + { + "epoch": 0.28, + "learning_rate": 8.153766266327694e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 3402, + "text_loss": 0.412109375 + }, + { + "epoch": 0.28, + "learning_rate": 8.152753100348121e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 3403, + "text_loss": 0.5625 + }, + { + "epoch": 0.28, + "learning_rate": 8.151739719430126e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 3404, + "text_loss": 0.60546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.150726123642794e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 3405, + "text_loss": 0.62890625 + }, + { + "epoch": 0.28, + "learning_rate": 8.149712313055228e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 3406, + "text_loss": 0.98828125 + }, + { + "epoch": 0.28, + "learning_rate": 8.148698287736543e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 3407, + "text_loss": 0.546875 + }, + { + "epoch": 0.28, + "learning_rate": 8.147684047755873e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 3408, + "text_loss": 0.388671875 + }, + { + "epoch": 0.28, + "learning_rate": 8.146669593182362e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 3409, + "text_loss": 0.5 + }, + { + "epoch": 0.28, + "learning_rate": 8.145654924085171e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 3410, + "text_loss": 0.765625 + }, + { + "epoch": 0.28, + "learning_rate": 8.144640040533472e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 3411, + "text_loss": 0.52734375 + }, + { + "epoch": 0.28, + "learning_rate": 8.143624942596458e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 3412, + "text_loss": 0.796875 + }, + { + "epoch": 0.28, + "learning_rate": 8.142609630343332e-06, + "loss": 0.6406, + "regression_loss": 0.0, + "step": 3413, + "text_loss": 0.73828125 + }, + { + "epoch": 0.28, + "learning_rate": 8.141594103843312e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 3414, + "text_loss": 0.64453125 + }, + { + "epoch": 0.28, + "learning_rate": 8.140578363165631e-06, + "loss": 0.506, + "regression_loss": 0.0, + "step": 3415, + "text_loss": 0.59375 + }, + { + "epoch": 0.28, + "learning_rate": 8.13956240837954e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 3416, + "text_loss": 0.58984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.1385462395543e-06, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 3417, + "text_loss": 0.384765625 + }, + { + "epoch": 0.28, + "learning_rate": 8.137529856759183e-06, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 3418, + "text_loss": 0.474609375 + }, + { + "epoch": 0.28, + "learning_rate": 8.136513260063489e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 3419, + "text_loss": 0.3828125 + }, + { + "epoch": 0.28, + "learning_rate": 8.13549644953652e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 3420, + "text_loss": 0.458984375 + }, + { + "epoch": 0.28, + "learning_rate": 8.134479425247596e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 3421, + "text_loss": 0.4296875 + }, + { + "epoch": 0.28, + "learning_rate": 8.133462187266056e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 3422, + "text_loss": 0.58203125 + }, + { + "epoch": 0.28, + "learning_rate": 8.132444735661246e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 3423, + "text_loss": 0.5625 + }, + { + "epoch": 0.28, + "learning_rate": 8.131427070502535e-06, + "loss": 0.6555, + "regression_loss": 0.0, + "step": 3424, + "text_loss": 0.76171875 + }, + { + "epoch": 0.28, + "learning_rate": 8.130409191859297e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 3425, + "text_loss": 0.39453125 + }, + { + "epoch": 0.28, + "learning_rate": 8.12939109980093e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 3426, + "text_loss": 0.78515625 + }, + { + "epoch": 0.28, + "learning_rate": 8.128372794396841e-06, + "loss": 0.4352, + "regression_loss": 0.0, + "step": 3427, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.28, + "learning_rate": 8.127354275716452e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 3428, + "text_loss": 0.66796875 + }, + { + "epoch": 0.28, + "learning_rate": 8.1263355438292e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 3429, + "text_loss": 0.50390625 + }, + { + "epoch": 0.29, + "learning_rate": 8.125316598804538e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 3430, + "text_loss": 0.60546875 + }, + { + "epoch": 0.29, + "learning_rate": 8.124297440711933e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 3431, + "text_loss": 0.59765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.123278069620864e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 3432, + "text_loss": 0.4453125 + }, + { + "epoch": 0.29, + "learning_rate": 8.122258485600828e-06, + "loss": 0.478, + "regression_loss": 0.0, + "step": 3433, + "text_loss": 0.306640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.121238688721334e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 3434, + "text_loss": 0.419921875 + }, + { + "epoch": 0.29, + "learning_rate": 8.120218679051907e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 3435, + "text_loss": 0.44921875 + }, + { + "epoch": 0.29, + "learning_rate": 8.119198456662087e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 3436, + "text_loss": 0.4296875 + }, + { + "epoch": 0.29, + "learning_rate": 8.118178021621425e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 3437, + "text_loss": 0.384765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.117157373999493e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 3438, + "text_loss": 0.80859375 + }, + { + "epoch": 0.29, + "learning_rate": 8.116136513865868e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 3439, + "text_loss": 0.6171875 + }, + { + "epoch": 0.29, + "learning_rate": 8.115115441290151e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 3440, + "text_loss": 0.578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.114094156341953e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 3441, + "text_loss": 0.76171875 + }, + { + "epoch": 0.29, + "learning_rate": 8.113072659090899e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 3442, + "text_loss": 0.5390625 + }, + { + "epoch": 0.29, + "learning_rate": 8.11205094960663e-06, + "loss": 0.5123, + "regression_loss": 0.0, + "step": 3443, + "text_loss": 0.283203125 + }, + { + "epoch": 0.29, + "learning_rate": 8.1110290279588e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 3444, + "text_loss": 0.67578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.110006894217077e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 3445, + "text_loss": 0.640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.10898454845115e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 3446, + "text_loss": 0.59765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.10796199073071e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 3447, + "text_loss": 0.77734375 + }, + { + "epoch": 0.29, + "learning_rate": 8.106939221125478e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 3448, + "text_loss": 0.703125 + }, + { + "epoch": 0.29, + "learning_rate": 8.105916239705174e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 3449, + "text_loss": 0.75390625 + }, + { + "epoch": 0.29, + "learning_rate": 8.104893046539543e-06, + "loss": 0.6436, + "regression_loss": 0.0, + "step": 3450, + "text_loss": 0.65625 + }, + { + "epoch": 0.29, + "learning_rate": 8.103869641698341e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 3451, + "text_loss": 0.5625 + }, + { + "epoch": 0.29, + "learning_rate": 8.102846025251337e-06, + "loss": 0.644, + "regression_loss": 0.0, + "step": 3452, + "text_loss": 0.38671875 + }, + { + "epoch": 0.29, + "learning_rate": 8.101822197268316e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 3453, + "text_loss": 0.62109375 + }, + { + "epoch": 0.29, + "learning_rate": 8.10079815781908e-06, + "loss": 0.616, + "regression_loss": 0.0, + "step": 3454, + "text_loss": 0.41015625 + }, + { + "epoch": 0.29, + "learning_rate": 8.09977390697344e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 3455, + "text_loss": 0.72265625 + }, + { + "epoch": 0.29, + "learning_rate": 8.098749444801226e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 3456, + "text_loss": 0.58984375 + }, + { + "epoch": 0.29, + "learning_rate": 8.097724771372278e-06, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 3457, + "text_loss": 0.51171875 + }, + { + "epoch": 0.29, + "learning_rate": 8.096699886756453e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 3458, + "text_loss": 0.48046875 + }, + { + "epoch": 0.29, + "learning_rate": 8.095674791023627e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 3459, + "text_loss": 0.3828125 + }, + { + "epoch": 0.29, + "learning_rate": 8.094649484243682e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 3460, + "text_loss": 0.7734375 + }, + { + "epoch": 0.29, + "learning_rate": 8.093623966486517e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 3461, + "text_loss": 0.65234375 + }, + { + "epoch": 0.29, + "learning_rate": 8.092598237822048e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 3462, + "text_loss": 0.40625 + }, + { + "epoch": 0.29, + "learning_rate": 8.091572298320205e-06, + "loss": 0.4904, + "regression_loss": 0.0, + "step": 3463, + "text_loss": 0.3046875 + }, + { + "epoch": 0.29, + "learning_rate": 8.09054614805093e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 3464, + "text_loss": 0.51953125 + }, + { + "epoch": 0.29, + "learning_rate": 8.089519787084182e-06, + "loss": 0.5962, + "regression_loss": 0.0, + "step": 3465, + "text_loss": 0.46484375 + }, + { + "epoch": 0.29, + "learning_rate": 8.08849321548993e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 3466, + "text_loss": 0.4296875 + }, + { + "epoch": 0.29, + "learning_rate": 8.087466433338163e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 3467, + "text_loss": 0.40234375 + }, + { + "epoch": 0.29, + "learning_rate": 8.086439440698879e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 3468, + "text_loss": 0.64453125 + }, + { + "epoch": 0.29, + "learning_rate": 8.085412237642097e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 3469, + "text_loss": 0.4609375 + }, + { + "epoch": 0.29, + "learning_rate": 8.084384824237845e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 3470, + "text_loss": 0.369140625 + }, + { + "epoch": 0.29, + "learning_rate": 8.083357200556164e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 3471, + "text_loss": 0.384765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.082329366667115e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 3472, + "text_loss": 0.494140625 + }, + { + "epoch": 0.29, + "learning_rate": 8.081301322640771e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 3473, + "text_loss": 0.59375 + }, + { + "epoch": 0.29, + "learning_rate": 8.080273068547216e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 3474, + "text_loss": 0.66015625 + }, + { + "epoch": 0.29, + "learning_rate": 8.079244604456553e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 3475, + "text_loss": 0.5625 + }, + { + "epoch": 0.29, + "learning_rate": 8.078215930438895e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 3476, + "text_loss": 0.7734375 + }, + { + "epoch": 0.29, + "learning_rate": 8.077187046564375e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 3477, + "text_loss": 0.53125 + }, + { + "epoch": 0.29, + "learning_rate": 8.076157952903134e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 3478, + "text_loss": 0.5859375 + }, + { + "epoch": 0.29, + "learning_rate": 8.075128649525335e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 3479, + "text_loss": 0.4609375 + }, + { + "epoch": 0.29, + "learning_rate": 8.074099136501143e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 3480, + "text_loss": 0.671875 + }, + { + "epoch": 0.29, + "learning_rate": 8.073069413900752e-06, + "loss": 0.624, + "regression_loss": 0.0, + "step": 3481, + "text_loss": 0.6953125 + }, + { + "epoch": 0.29, + "learning_rate": 8.07203948179436e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 3482, + "text_loss": 0.80078125 + }, + { + "epoch": 0.29, + "learning_rate": 8.071009340252182e-06, + "loss": 0.6328, + "regression_loss": 0.0, + "step": 3483, + "text_loss": 0.71875 + }, + { + "epoch": 0.29, + "learning_rate": 8.069978989344447e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 3484, + "text_loss": 0.6796875 + }, + { + "epoch": 0.29, + "learning_rate": 8.068948429141404e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 3485, + "text_loss": 0.44921875 + }, + { + "epoch": 0.29, + "learning_rate": 8.067917659713306e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 3486, + "text_loss": 0.578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.066886681130427e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 3487, + "text_loss": 0.337890625 + }, + { + "epoch": 0.29, + "learning_rate": 8.065855493463055e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 3488, + "text_loss": 0.81640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.06482409678149e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 3489, + "text_loss": 0.67578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.063792491156046e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 3490, + "text_loss": 0.45703125 + }, + { + "epoch": 0.29, + "learning_rate": 8.062760676657055e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 3491, + "text_loss": 0.443359375 + }, + { + "epoch": 0.29, + "learning_rate": 8.06172865335486e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 3492, + "text_loss": 0.7578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.060696421319819e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 3493, + "text_loss": 0.53125 + }, + { + "epoch": 0.29, + "learning_rate": 8.059663980622304e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 3494, + "text_loss": 0.51953125 + }, + { + "epoch": 0.29, + "learning_rate": 8.0586313313327e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 3495, + "text_loss": 0.640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.057598473521411e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 3496, + "text_loss": 0.765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.05656540725885e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 3497, + "text_loss": 0.54296875 + }, + { + "epoch": 0.29, + "learning_rate": 8.055532132615446e-06, + "loss": 0.6152, + "regression_loss": 0.0, + "step": 3498, + "text_loss": 0.359375 + }, + { + "epoch": 0.29, + "learning_rate": 8.054498649661641e-06, + "loss": 0.6396, + "regression_loss": 0.0, + "step": 3499, + "text_loss": 0.546875 + }, + { + "epoch": 0.29, + "learning_rate": 8.053464958467897e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 3500, + "text_loss": 0.625 + }, + { + "epoch": 0.29, + "learning_rate": 8.052431059104681e-06, + "loss": 0.6157, + "regression_loss": 0.0, + "step": 3501, + "text_loss": 0.5859375 + }, + { + "epoch": 0.29, + "learning_rate": 8.05139695164248e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 3502, + "text_loss": 0.578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.050362636151796e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 3503, + "text_loss": 0.47265625 + }, + { + "epoch": 0.29, + "learning_rate": 8.049328112703144e-06, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 3504, + "text_loss": 0.67578125 + }, + { + "epoch": 0.29, + "learning_rate": 8.048293381367047e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 3505, + "text_loss": 0.61328125 + }, + { + "epoch": 0.29, + "learning_rate": 8.047258442214054e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 3506, + "text_loss": 0.76953125 + }, + { + "epoch": 0.29, + "learning_rate": 8.046223295314717e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 3507, + "text_loss": 0.384765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.045187940739611e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 3508, + "text_loss": 0.5 + }, + { + "epoch": 0.29, + "learning_rate": 8.044152378559318e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 3509, + "text_loss": 0.609375 + }, + { + "epoch": 0.29, + "learning_rate": 8.04311660884444e-06, + "loss": 0.4204, + "regression_loss": 0.0, + "step": 3510, + "text_loss": 0.388671875 + }, + { + "epoch": 0.29, + "learning_rate": 8.042080631665587e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 3511, + "text_loss": 0.31640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.041044447093389e-06, + "loss": 0.6333, + "regression_loss": 0.0, + "step": 3512, + "text_loss": 0.56640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.040008055198487e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 3513, + "text_loss": 0.62890625 + }, + { + "epoch": 0.29, + "learning_rate": 8.038971456051537e-06, + "loss": 0.7153, + "regression_loss": 0.0, + "step": 3514, + "text_loss": 0.64453125 + }, + { + "epoch": 0.29, + "learning_rate": 8.03793464972321e-06, + "loss": 0.677, + "regression_loss": 0.0, + "step": 3515, + "text_loss": 0.423828125 + }, + { + "epoch": 0.29, + "learning_rate": 8.036897636284188e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 3516, + "text_loss": 0.57421875 + }, + { + "epoch": 0.29, + "learning_rate": 8.03586041580517e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 3517, + "text_loss": 0.97265625 + }, + { + "epoch": 0.29, + "learning_rate": 8.034822988356869e-06, + "loss": 0.438, + "regression_loss": 0.0, + "step": 3518, + "text_loss": 0.46875 + }, + { + "epoch": 0.29, + "learning_rate": 8.033785354010014e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 3519, + "text_loss": 0.6796875 + }, + { + "epoch": 0.29, + "learning_rate": 8.032747512835338e-06, + "loss": 0.6865, + "regression_loss": 0.0, + "step": 3520, + "text_loss": 0.5859375 + }, + { + "epoch": 0.29, + "learning_rate": 8.031709464903604e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 3521, + "text_loss": 0.484375 + }, + { + "epoch": 0.29, + "learning_rate": 8.030671210285575e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 3522, + "text_loss": 0.65234375 + }, + { + "epoch": 0.29, + "learning_rate": 8.029632749052034e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 3523, + "text_loss": 0.546875 + }, + { + "epoch": 0.29, + "learning_rate": 8.028594081273784e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 3524, + "text_loss": 0.71875 + }, + { + "epoch": 0.29, + "learning_rate": 8.02755520702163e-06, + "loss": 0.4271, + "regression_loss": 0.0, + "step": 3525, + "text_loss": 0.51953125 + }, + { + "epoch": 0.29, + "learning_rate": 8.026516126366396e-06, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 3526, + "text_loss": 0.5078125 + }, + { + "epoch": 0.29, + "learning_rate": 8.025476839378928e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 3527, + "text_loss": 0.734375 + }, + { + "epoch": 0.29, + "learning_rate": 8.024437346130073e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 3528, + "text_loss": 0.462890625 + }, + { + "epoch": 0.29, + "learning_rate": 8.023397646690702e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 3529, + "text_loss": 0.73828125 + }, + { + "epoch": 0.29, + "learning_rate": 8.022357741131695e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 3530, + "text_loss": 0.7109375 + }, + { + "epoch": 0.29, + "learning_rate": 8.021317629523947e-06, + "loss": 0.568, + "regression_loss": 0.0, + "step": 3531, + "text_loss": 0.48828125 + }, + { + "epoch": 0.29, + "learning_rate": 8.020277311938366e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 3532, + "text_loss": 0.8515625 + }, + { + "epoch": 0.29, + "learning_rate": 8.019236788445878e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 3533, + "text_loss": 0.78125 + }, + { + "epoch": 0.29, + "learning_rate": 8.01819605911742e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 3534, + "text_loss": 0.37890625 + }, + { + "epoch": 0.29, + "learning_rate": 8.017155124023943e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 3535, + "text_loss": 0.40625 + }, + { + "epoch": 0.29, + "learning_rate": 8.016113983236413e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 3536, + "text_loss": 0.6015625 + }, + { + "epoch": 0.29, + "learning_rate": 8.01507263682581e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 3537, + "text_loss": 0.64453125 + }, + { + "epoch": 0.29, + "learning_rate": 8.014031084863128e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 3538, + "text_loss": 0.58203125 + }, + { + "epoch": 0.29, + "learning_rate": 8.012989327419372e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 3539, + "text_loss": 0.59765625 + }, + { + "epoch": 0.29, + "learning_rate": 8.011947364565565e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 3540, + "text_loss": 0.6484375 + }, + { + "epoch": 0.29, + "learning_rate": 8.010905196372742e-06, + "loss": 0.6238, + "regression_loss": 0.0, + "step": 3541, + "text_loss": 0.6640625 + }, + { + "epoch": 0.29, + "learning_rate": 8.009862822911957e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 3542, + "text_loss": 0.375 + }, + { + "epoch": 0.29, + "learning_rate": 8.008820244254268e-06, + "loss": 0.6123, + "regression_loss": 0.0, + "step": 3543, + "text_loss": 0.458984375 + }, + { + "epoch": 0.29, + "learning_rate": 8.007777460470754e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 3544, + "text_loss": 0.4296875 + }, + { + "epoch": 0.29, + "learning_rate": 8.00673447163251e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 3545, + "text_loss": 0.55859375 + }, + { + "epoch": 0.29, + "learning_rate": 8.005691277810638e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 3546, + "text_loss": 0.69921875 + }, + { + "epoch": 0.29, + "learning_rate": 8.004647879076258e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 3547, + "text_loss": 0.70703125 + }, + { + "epoch": 0.29, + "learning_rate": 8.003604275500505e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 3548, + "text_loss": 0.50390625 + }, + { + "epoch": 0.29, + "learning_rate": 8.002560467154524e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 3549, + "text_loss": 0.265625 + }, + { + "epoch": 0.3, + "learning_rate": 8.00151645410948e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 3550, + "text_loss": 0.51953125 + }, + { + "epoch": 0.3, + "learning_rate": 8.000472236436546e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 3551, + "text_loss": 0.7734375 + }, + { + "epoch": 0.3, + "learning_rate": 7.999427814206911e-06, + "loss": 0.5879, + "regression_loss": 0.0, + "step": 3552, + "text_loss": 0.63671875 + }, + { + "epoch": 0.3, + "learning_rate": 7.998383187491781e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 3553, + "text_loss": 0.38671875 + }, + { + "epoch": 0.3, + "learning_rate": 7.997338356362371e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 3554, + "text_loss": 0.388671875 + }, + { + "epoch": 0.3, + "learning_rate": 7.996293320889912e-06, + "loss": 0.6292, + "regression_loss": 0.0, + "step": 3555, + "text_loss": 0.79296875 + }, + { + "epoch": 0.3, + "learning_rate": 7.99524808114565e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 3556, + "text_loss": 0.453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.994202637200844e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 3557, + "text_loss": 0.443359375 + }, + { + "epoch": 0.3, + "learning_rate": 7.993156989126767e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 3558, + "text_loss": 0.421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.992111136994705e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 3559, + "text_loss": 0.392578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.99106508087596e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 3560, + "text_loss": 0.515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.990018820841847e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 3561, + "text_loss": 0.55078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.988972356963693e-06, + "loss": 0.3843, + "regression_loss": 0.0, + "step": 3562, + "text_loss": 0.50390625 + }, + { + "epoch": 0.3, + "learning_rate": 7.987925689312841e-06, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 3563, + "text_loss": 0.412109375 + }, + { + "epoch": 0.3, + "learning_rate": 7.986878817960648e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 3564, + "text_loss": 0.482421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.985831742978486e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 3565, + "text_loss": 0.3828125 + }, + { + "epoch": 0.3, + "learning_rate": 7.984784464437735e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 3566, + "text_loss": 0.89453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.983736982409796e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 3567, + "text_loss": 0.55078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.982689296966079e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 3568, + "text_loss": 0.5546875 + }, + { + "epoch": 0.3, + "learning_rate": 7.981641408178012e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 3569, + "text_loss": 0.369140625 + }, + { + "epoch": 0.3, + "learning_rate": 7.980593316117031e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 3570, + "text_loss": 0.45703125 + }, + { + "epoch": 0.3, + "learning_rate": 7.979545020854595e-06, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 3571, + "text_loss": 0.419921875 + }, + { + "epoch": 0.3, + "learning_rate": 7.978496522462167e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 3572, + "text_loss": 0.765625 + }, + { + "epoch": 0.3, + "learning_rate": 7.97744782101123e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 3573, + "text_loss": 0.4453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.97639891657328e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 3574, + "text_loss": 0.41015625 + }, + { + "epoch": 0.3, + "learning_rate": 7.975349809219823e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 3575, + "text_loss": 0.58203125 + }, + { + "epoch": 0.3, + "learning_rate": 7.974300499022382e-06, + "loss": 0.6404, + "regression_loss": 0.0, + "step": 3576, + "text_loss": 0.66015625 + }, + { + "epoch": 0.3, + "learning_rate": 7.973250986052498e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 3577, + "text_loss": 0.734375 + }, + { + "epoch": 0.3, + "learning_rate": 7.972201270381716e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 3578, + "text_loss": 0.4921875 + }, + { + "epoch": 0.3, + "learning_rate": 7.971151352081603e-06, + "loss": 0.6611, + "regression_loss": 0.0, + "step": 3579, + "text_loss": 0.640625 + }, + { + "epoch": 0.3, + "learning_rate": 7.970101231223736e-06, + "loss": 0.4999, + "regression_loss": 0.0, + "step": 3580, + "text_loss": 0.5390625 + }, + { + "epoch": 0.3, + "learning_rate": 7.969050907879708e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 3581, + "text_loss": 0.34765625 + }, + { + "epoch": 0.3, + "learning_rate": 7.968000382121124e-06, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 3582, + "text_loss": 0.7421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.966949654019603e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 3583, + "text_loss": 0.65234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.965898723646777e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 3584, + "text_loss": 0.4453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.964847591074296e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 3585, + "text_loss": 0.7578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.963796256373818e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 3586, + "text_loss": 0.8046875 + }, + { + "epoch": 0.3, + "learning_rate": 7.96274471961702e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 3587, + "text_loss": 0.455078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.961692980875588e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3588, + "text_loss": 0.765625 + }, + { + "epoch": 0.3, + "learning_rate": 7.960641040221226e-06, + "loss": 0.3899, + "regression_loss": 0.0, + "step": 3589, + "text_loss": 0.369140625 + }, + { + "epoch": 0.3, + "learning_rate": 7.95958889772565e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 3590, + "text_loss": 0.40234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.958536553460588e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 3591, + "text_loss": 0.359375 + }, + { + "epoch": 0.3, + "learning_rate": 7.957484007497784e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 3592, + "text_loss": 0.6640625 + }, + { + "epoch": 0.3, + "learning_rate": 7.956431259908996e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 3593, + "text_loss": 0.40234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.955378310765993e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 3594, + "text_loss": 0.5234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.954325160140562e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 3595, + "text_loss": 0.66796875 + }, + { + "epoch": 0.3, + "learning_rate": 7.953271808104498e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 3596, + "text_loss": 0.357421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.952218254729618e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 3597, + "text_loss": 0.68359375 + }, + { + "epoch": 0.3, + "learning_rate": 7.951164500087745e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 3598, + "text_loss": 0.8984375 + }, + { + "epoch": 0.3, + "learning_rate": 7.950110544250718e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 3599, + "text_loss": 0.462890625 + }, + { + "epoch": 0.3, + "learning_rate": 7.949056387290392e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 3600, + "text_loss": 0.578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.948002029278632e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 3601, + "text_loss": 0.51953125 + }, + { + "epoch": 0.3, + "learning_rate": 7.946947470287321e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 3602, + "text_loss": 0.6484375 + }, + { + "epoch": 0.3, + "learning_rate": 7.94589271038835e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 3603, + "text_loss": 0.5234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.94483774965363e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 3604, + "text_loss": 0.4296875 + }, + { + "epoch": 0.3, + "learning_rate": 7.943782588155082e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 3605, + "text_loss": 1.0078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.942727225964641e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 3606, + "text_loss": 0.4296875 + }, + { + "epoch": 0.3, + "learning_rate": 7.941671663154257e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 3607, + "text_loss": 0.2734375 + }, + { + "epoch": 0.3, + "learning_rate": 7.94061589979589e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 3608, + "text_loss": 0.921875 + }, + { + "epoch": 0.3, + "learning_rate": 7.939559935961522e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 3609, + "text_loss": 0.5546875 + }, + { + "epoch": 0.3, + "learning_rate": 7.93850377172314e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 3610, + "text_loss": 0.30078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.937447407152746e-06, + "loss": 0.6306, + "regression_loss": 0.0, + "step": 3611, + "text_loss": 0.52734375 + }, + { + "epoch": 0.3, + "learning_rate": 7.936390842322358e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 3612, + "text_loss": 0.3671875 + }, + { + "epoch": 0.3, + "learning_rate": 7.935334077304013e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 3613, + "text_loss": 0.7265625 + }, + { + "epoch": 0.3, + "learning_rate": 7.934277112169747e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 3614, + "text_loss": 0.625 + }, + { + "epoch": 0.3, + "learning_rate": 7.933219946991625e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 3615, + "text_loss": 0.625 + }, + { + "epoch": 0.3, + "learning_rate": 7.932162581841715e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 3616, + "text_loss": 0.478515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.931105016792106e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 3617, + "text_loss": 0.5234375 + }, + { + "epoch": 0.3, + "learning_rate": 7.930047251914894e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 3618, + "text_loss": 0.51953125 + }, + { + "epoch": 0.3, + "learning_rate": 7.928989287282195e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 3619, + "text_loss": 0.6015625 + }, + { + "epoch": 0.3, + "learning_rate": 7.927931122966133e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 3620, + "text_loss": 0.48828125 + }, + { + "epoch": 0.3, + "learning_rate": 7.92687275903885e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 3621, + "text_loss": 0.5546875 + }, + { + "epoch": 0.3, + "learning_rate": 7.9258141955725e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 3622, + "text_loss": 0.76953125 + }, + { + "epoch": 0.3, + "learning_rate": 7.924755432639248e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 3623, + "text_loss": 0.392578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.923696470311279e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 3624, + "text_loss": 0.28515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.922637308660784e-06, + "loss": 0.6353, + "regression_loss": 0.0, + "step": 3625, + "text_loss": 0.625 + }, + { + "epoch": 0.3, + "learning_rate": 7.92157794775997e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 3626, + "text_loss": 0.5703125 + }, + { + "epoch": 0.3, + "learning_rate": 7.920518387681065e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 3627, + "text_loss": 0.73828125 + }, + { + "epoch": 0.3, + "learning_rate": 7.919458628496297e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 3628, + "text_loss": 0.4375 + }, + { + "epoch": 0.3, + "learning_rate": 7.91839867027792e-06, + "loss": 0.6277, + "regression_loss": 0.0, + "step": 3629, + "text_loss": 0.478515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.917338513098194e-06, + "loss": 0.4762, + "regression_loss": 0.0, + "step": 3630, + "text_loss": 0.53125 + }, + { + "epoch": 0.3, + "learning_rate": 7.916278157029396e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 3631, + "text_loss": 0.53515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.915217602143814e-06, + "loss": 0.6178, + "regression_loss": 0.0, + "step": 3632, + "text_loss": 0.89453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.914156848513755e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 3633, + "text_loss": 0.6484375 + }, + { + "epoch": 0.3, + "learning_rate": 7.913095896211533e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 3634, + "text_loss": 0.703125 + }, + { + "epoch": 0.3, + "learning_rate": 7.912034745309477e-06, + "loss": 0.5096, + "regression_loss": 0.0, + "step": 3635, + "text_loss": 0.57421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.910973395879932e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 3636, + "text_loss": 0.55078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.909911847995258e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 3637, + "text_loss": 0.42578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.908850101727823e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 3638, + "text_loss": 0.326171875 + }, + { + "epoch": 0.3, + "learning_rate": 7.907788157150012e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 3639, + "text_loss": 0.42578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.906726014334221e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 3640, + "text_loss": 0.54296875 + }, + { + "epoch": 0.3, + "learning_rate": 7.905663673352863e-06, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 3641, + "text_loss": 0.4140625 + }, + { + "epoch": 0.3, + "learning_rate": 7.904601134278364e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 3642, + "text_loss": 0.703125 + }, + { + "epoch": 0.3, + "learning_rate": 7.903538397183162e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 3643, + "text_loss": 0.2421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.902475462139708e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 3644, + "text_loss": 0.341796875 + }, + { + "epoch": 0.3, + "learning_rate": 7.901412329220468e-06, + "loss": 0.4218, + "regression_loss": 0.0, + "step": 3645, + "text_loss": 0.5 + }, + { + "epoch": 0.3, + "learning_rate": 7.900348998497921e-06, + "loss": 0.6313, + "regression_loss": 0.0, + "step": 3646, + "text_loss": 0.7265625 + }, + { + "epoch": 0.3, + "learning_rate": 7.899285470044559e-06, + "loss": 0.7219, + "regression_loss": 0.0, + "step": 3647, + "text_loss": 0.81640625 + }, + { + "epoch": 0.3, + "learning_rate": 7.898221743932887e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 3648, + "text_loss": 0.515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.897157820235427e-06, + "loss": 0.6333, + "regression_loss": 0.0, + "step": 3649, + "text_loss": 0.55078125 + }, + { + "epoch": 0.3, + "learning_rate": 7.89609369902471e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 3650, + "text_loss": 0.765625 + }, + { + "epoch": 0.3, + "learning_rate": 7.895029380373283e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 3651, + "text_loss": 0.462890625 + }, + { + "epoch": 0.3, + "learning_rate": 7.893964864353703e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 3652, + "text_loss": 0.73828125 + }, + { + "epoch": 0.3, + "learning_rate": 7.892900151038546e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 3653, + "text_loss": 0.32421875 + }, + { + "epoch": 0.3, + "learning_rate": 7.891835240500401e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 3654, + "text_loss": 0.515625 + }, + { + "epoch": 0.3, + "learning_rate": 7.890770132811863e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 3655, + "text_loss": 0.7890625 + }, + { + "epoch": 0.3, + "learning_rate": 7.889704828045548e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 3656, + "text_loss": 0.4921875 + }, + { + "epoch": 0.3, + "learning_rate": 7.888639326274082e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 3657, + "text_loss": 0.453125 + }, + { + "epoch": 0.3, + "learning_rate": 7.887573627570107e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 3658, + "text_loss": 0.6484375 + }, + { + "epoch": 0.3, + "learning_rate": 7.886507732006275e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 3659, + "text_loss": 0.423828125 + }, + { + "epoch": 0.3, + "learning_rate": 7.885441639655256e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 3660, + "text_loss": 0.55859375 + }, + { + "epoch": 0.3, + "learning_rate": 7.884375350589727e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 3661, + "text_loss": 0.296875 + }, + { + "epoch": 0.3, + "learning_rate": 7.883308864882386e-06, + "loss": 0.624, + "regression_loss": 0.0, + "step": 3662, + "text_loss": 0.51953125 + }, + { + "epoch": 0.3, + "learning_rate": 7.882242182605936e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 3663, + "text_loss": 0.55859375 + }, + { + "epoch": 0.3, + "learning_rate": 7.881175303833102e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 3664, + "text_loss": 0.546875 + }, + { + "epoch": 0.3, + "learning_rate": 7.880108228636616e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 3665, + "text_loss": 0.44140625 + }, + { + "epoch": 0.3, + "learning_rate": 7.879040957089229e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 3666, + "text_loss": 0.84375 + }, + { + "epoch": 0.3, + "learning_rate": 7.877973489263695e-06, + "loss": 0.6443, + "regression_loss": 0.0, + "step": 3667, + "text_loss": 0.7734375 + }, + { + "epoch": 0.3, + "learning_rate": 7.876905825232795e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 3668, + "text_loss": 0.578125 + }, + { + "epoch": 0.3, + "learning_rate": 7.875837965069316e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 3669, + "text_loss": 0.462890625 + }, + { + "epoch": 0.31, + "learning_rate": 7.874769908846056e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 3670, + "text_loss": 0.46484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.873701656635831e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 3671, + "text_loss": 0.6875 + }, + { + "epoch": 0.31, + "learning_rate": 7.87263320851147e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 3672, + "text_loss": 0.44140625 + }, + { + "epoch": 0.31, + "learning_rate": 7.871564564545814e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 3673, + "text_loss": 0.373046875 + }, + { + "epoch": 0.31, + "learning_rate": 7.870495724811717e-06, + "loss": 0.5558, + "regression_loss": 0.0, + "step": 3674, + "text_loss": 0.478515625 + }, + { + "epoch": 0.31, + "learning_rate": 7.869426689382048e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 3675, + "text_loss": 0.5859375 + }, + { + "epoch": 0.31, + "learning_rate": 7.868357458329685e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 3676, + "text_loss": 0.34765625 + }, + { + "epoch": 0.31, + "learning_rate": 7.867288031727527e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 3677, + "text_loss": 0.89453125 + }, + { + "epoch": 0.31, + "learning_rate": 7.86621840964848e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 3678, + "text_loss": 0.30078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.865148592165465e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 3679, + "text_loss": 0.322265625 + }, + { + "epoch": 0.31, + "learning_rate": 7.864078579351418e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 3680, + "text_loss": 0.490234375 + }, + { + "epoch": 0.31, + "learning_rate": 7.863008371279284e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 3681, + "text_loss": 0.58984375 + }, + { + "epoch": 0.31, + "learning_rate": 7.861937968022029e-06, + "loss": 0.6431, + "regression_loss": 0.0, + "step": 3682, + "text_loss": 0.859375 + }, + { + "epoch": 0.31, + "learning_rate": 7.860867369652623e-06, + "loss": 0.5316, + "regression_loss": 0.0, + "step": 3683, + "text_loss": 0.5625 + }, + { + "epoch": 0.31, + "learning_rate": 7.859796576244056e-06, + "loss": 0.5524, + "regression_loss": 0.0, + "step": 3684, + "text_loss": 1.2578125 + }, + { + "epoch": 0.31, + "learning_rate": 7.858725587869329e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 3685, + "text_loss": 0.46484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.857654404601456e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 3686, + "text_loss": 0.6171875 + }, + { + "epoch": 0.31, + "learning_rate": 7.856583026513465e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 3687, + "text_loss": 0.546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.855511453678397e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 3688, + "text_loss": 0.69140625 + }, + { + "epoch": 0.31, + "learning_rate": 7.854439686169309e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 3689, + "text_loss": 0.5625 + }, + { + "epoch": 0.31, + "learning_rate": 7.853367724059262e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 3690, + "text_loss": 0.42578125 + }, + { + "epoch": 0.31, + "learning_rate": 7.852295567421343e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 3691, + "text_loss": 0.46875 + }, + { + "epoch": 0.31, + "learning_rate": 7.851223216328644e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 3692, + "text_loss": 0.703125 + }, + { + "epoch": 0.31, + "learning_rate": 7.850150670854272e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 3693, + "text_loss": 0.734375 + }, + { + "epoch": 0.31, + "learning_rate": 7.849077931071348e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 3694, + "text_loss": 0.57421875 + }, + { + "epoch": 0.31, + "learning_rate": 7.848004997053005e-06, + "loss": 0.4236, + "regression_loss": 0.0, + "step": 3695, + "text_loss": 0.5 + }, + { + "epoch": 0.31, + "learning_rate": 7.84693186887239e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 3696, + "text_loss": 0.58203125 + }, + { + "epoch": 0.31, + "learning_rate": 7.845858546602667e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 3697, + "text_loss": 0.443359375 + }, + { + "epoch": 0.31, + "learning_rate": 7.844785030317005e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 3698, + "text_loss": 0.6953125 + }, + { + "epoch": 0.31, + "learning_rate": 7.843711320088592e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 3699, + "text_loss": 0.765625 + }, + { + "epoch": 0.31, + "learning_rate": 7.842637415990631e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 3700, + "text_loss": 0.55078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.84156331809633e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 3701, + "text_loss": 0.48828125 + }, + { + "epoch": 0.31, + "learning_rate": 7.84048902647892e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 3702, + "text_loss": 0.43359375 + }, + { + "epoch": 0.31, + "learning_rate": 7.839414541211637e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 3703, + "text_loss": 0.68359375 + }, + { + "epoch": 0.31, + "learning_rate": 7.838339862367738e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 3704, + "text_loss": 0.515625 + }, + { + "epoch": 0.31, + "learning_rate": 7.837264990020486e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 3705, + "text_loss": 0.73828125 + }, + { + "epoch": 0.31, + "learning_rate": 7.836189924243161e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 3706, + "text_loss": 0.49609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.835114665109055e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 3707, + "text_loss": 0.41015625 + }, + { + "epoch": 0.31, + "learning_rate": 7.834039212691474e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 3708, + "text_loss": 0.5625 + }, + { + "epoch": 0.31, + "learning_rate": 7.832963567063738e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 3709, + "text_loss": 0.30078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.831887728299177e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 3710, + "text_loss": 0.546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.830811696471136e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 3711, + "text_loss": 0.59765625 + }, + { + "epoch": 0.31, + "learning_rate": 7.829735471652978e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 3712, + "text_loss": 0.99609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.828659053918067e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 3713, + "text_loss": 0.427734375 + }, + { + "epoch": 0.31, + "learning_rate": 7.827582443339793e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 3714, + "text_loss": 0.7578125 + }, + { + "epoch": 0.31, + "learning_rate": 7.826505639991551e-06, + "loss": 0.5419, + "regression_loss": 0.0, + "step": 3715, + "text_loss": 0.271484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.825428643946754e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 3716, + "text_loss": 0.412109375 + }, + { + "epoch": 0.31, + "learning_rate": 7.824351455278824e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 3717, + "text_loss": 0.4921875 + }, + { + "epoch": 0.31, + "learning_rate": 7.8232740740612e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 3718, + "text_loss": 0.498046875 + }, + { + "epoch": 0.31, + "learning_rate": 7.822196500367331e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 3719, + "text_loss": 0.515625 + }, + { + "epoch": 0.31, + "learning_rate": 7.821118734270681e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 3720, + "text_loss": 0.451171875 + }, + { + "epoch": 0.31, + "learning_rate": 7.820040775844726e-06, + "loss": 0.6233, + "regression_loss": 0.0, + "step": 3721, + "text_loss": 0.4921875 + }, + { + "epoch": 0.31, + "learning_rate": 7.818962625162955e-06, + "loss": 0.3896, + "regression_loss": 0.0, + "step": 3722, + "text_loss": 0.46484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.817884282298874e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 3723, + "text_loss": 0.328125 + }, + { + "epoch": 0.31, + "learning_rate": 7.816805747325997e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 3724, + "text_loss": 0.412109375 + }, + { + "epoch": 0.31, + "learning_rate": 7.81572702031785e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 3725, + "text_loss": 0.625 + }, + { + "epoch": 0.31, + "learning_rate": 7.81464810134798e-06, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 3726, + "text_loss": 0.453125 + }, + { + "epoch": 0.31, + "learning_rate": 7.813568990489937e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 3727, + "text_loss": 0.53515625 + }, + { + "epoch": 0.31, + "learning_rate": 7.812489687817294e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 3728, + "text_loss": 0.60546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.811410193403631e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 3729, + "text_loss": 0.5546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.81033050732254e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 3730, + "text_loss": 0.97265625 + }, + { + "epoch": 0.31, + "learning_rate": 7.809250629647628e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 3731, + "text_loss": 0.263671875 + }, + { + "epoch": 0.31, + "learning_rate": 7.808170560452521e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 3732, + "text_loss": 0.40625 + }, + { + "epoch": 0.31, + "learning_rate": 7.807090299810848e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 3733, + "text_loss": 0.388671875 + }, + { + "epoch": 0.31, + "learning_rate": 7.806009847796255e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 3734, + "text_loss": 0.5625 + }, + { + "epoch": 0.31, + "learning_rate": 7.804929204482405e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 3735, + "text_loss": 0.71875 + }, + { + "epoch": 0.31, + "learning_rate": 7.803848369942967e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 3736, + "text_loss": 0.63671875 + }, + { + "epoch": 0.31, + "learning_rate": 7.80276734425163e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 3737, + "text_loss": 0.54296875 + }, + { + "epoch": 0.31, + "learning_rate": 7.801686127482091e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 3738, + "text_loss": 0.4296875 + }, + { + "epoch": 0.31, + "learning_rate": 7.800604719708063e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 3739, + "text_loss": 0.322265625 + }, + { + "epoch": 0.31, + "learning_rate": 7.79952312100327e-06, + "loss": 0.623, + "regression_loss": 0.0, + "step": 3740, + "text_loss": 0.63671875 + }, + { + "epoch": 0.31, + "learning_rate": 7.798441331441448e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 3741, + "text_loss": 0.453125 + }, + { + "epoch": 0.31, + "learning_rate": 7.797359351096351e-06, + "loss": 0.6892, + "regression_loss": 0.0, + "step": 3742, + "text_loss": 0.796875 + }, + { + "epoch": 0.31, + "learning_rate": 7.796277180041741e-06, + "loss": 0.6599, + "regression_loss": 0.0, + "step": 3743, + "text_loss": 0.326171875 + }, + { + "epoch": 0.31, + "learning_rate": 7.795194818351395e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 3744, + "text_loss": 0.423828125 + }, + { + "epoch": 0.31, + "learning_rate": 7.794112266099104e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 3745, + "text_loss": 0.33203125 + }, + { + "epoch": 0.31, + "learning_rate": 7.793029523358669e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 3746, + "text_loss": 0.51953125 + }, + { + "epoch": 0.31, + "learning_rate": 7.791946590203908e-06, + "loss": 0.6443, + "regression_loss": 0.0, + "step": 3747, + "text_loss": 0.6484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.790863466708648e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 3748, + "text_loss": 0.546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.789780152946728e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 3749, + "text_loss": 0.435546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.788696648992009e-06, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 3750, + "text_loss": 0.703125 + }, + { + "epoch": 0.31, + "learning_rate": 7.787612954918356e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 3751, + "text_loss": 0.28125 + }, + { + "epoch": 0.31, + "learning_rate": 7.786529070799649e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 3752, + "text_loss": 0.40234375 + }, + { + "epoch": 0.31, + "learning_rate": 7.785444996709782e-06, + "loss": 0.4799, + "regression_loss": 0.0, + "step": 3753, + "text_loss": 0.79296875 + }, + { + "epoch": 0.31, + "learning_rate": 7.78436073272266e-06, + "loss": 0.5548, + "regression_loss": 0.0, + "step": 3754, + "text_loss": 0.2158203125 + }, + { + "epoch": 0.31, + "learning_rate": 7.783276278912206e-06, + "loss": 0.6753, + "regression_loss": 0.0, + "step": 3755, + "text_loss": 0.67578125 + }, + { + "epoch": 0.31, + "learning_rate": 7.78219163535235e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 3756, + "text_loss": 0.376953125 + }, + { + "epoch": 0.31, + "learning_rate": 7.781106802117037e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 3757, + "text_loss": 0.3203125 + }, + { + "epoch": 0.31, + "learning_rate": 7.780021779280228e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 3758, + "text_loss": 0.5546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.778936566915892e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 3759, + "text_loss": 0.318359375 + }, + { + "epoch": 0.31, + "learning_rate": 7.777851165098012e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 3760, + "text_loss": 0.609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.77676557390059e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 3761, + "text_loss": 0.546875 + }, + { + "epoch": 0.31, + "learning_rate": 7.775679793397628e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 3762, + "text_loss": 0.55078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.774593823663157e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 3763, + "text_loss": 0.69921875 + }, + { + "epoch": 0.31, + "learning_rate": 7.77350766477121e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 3764, + "text_loss": 0.51953125 + }, + { + "epoch": 0.31, + "learning_rate": 7.772421316795832e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 3765, + "text_loss": 0.58203125 + }, + { + "epoch": 0.31, + "learning_rate": 7.77133477981109e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 3766, + "text_loss": 0.52734375 + }, + { + "epoch": 0.31, + "learning_rate": 7.770248053891057e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 3767, + "text_loss": 0.5078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.769161139109818e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 3768, + "text_loss": 0.71484375 + }, + { + "epoch": 0.31, + "learning_rate": 7.768074035541475e-06, + "loss": 0.4075, + "regression_loss": 0.0, + "step": 3769, + "text_loss": 0.3125 + }, + { + "epoch": 0.31, + "learning_rate": 7.766986743260143e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 3770, + "text_loss": 0.62109375 + }, + { + "epoch": 0.31, + "learning_rate": 7.765899262339945e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 3771, + "text_loss": 0.42578125 + }, + { + "epoch": 0.31, + "learning_rate": 7.76481159285502e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 3772, + "text_loss": 1.125 + }, + { + "epoch": 0.31, + "learning_rate": 7.76372373487952e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 3773, + "text_loss": 0.62109375 + }, + { + "epoch": 0.31, + "learning_rate": 7.76263568848761e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 3774, + "text_loss": 0.6875 + }, + { + "epoch": 0.31, + "learning_rate": 7.761547453753468e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 3775, + "text_loss": 0.55078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.760459030751285e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 3776, + "text_loss": 0.66015625 + }, + { + "epoch": 0.31, + "learning_rate": 7.759370419555262e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 3777, + "text_loss": 0.30078125 + }, + { + "epoch": 0.31, + "learning_rate": 7.758281620239614e-06, + "loss": 0.6619, + "regression_loss": 0.0, + "step": 3778, + "text_loss": 0.49609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.757192632878572e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 3779, + "text_loss": 0.796875 + }, + { + "epoch": 0.31, + "learning_rate": 7.756103457546376e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 3780, + "text_loss": 0.4453125 + }, + { + "epoch": 0.31, + "learning_rate": 7.755014094317284e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 3781, + "text_loss": 0.388671875 + }, + { + "epoch": 0.31, + "learning_rate": 7.753924543265558e-06, + "loss": 0.6467, + "regression_loss": 0.0, + "step": 3782, + "text_loss": 0.66015625 + }, + { + "epoch": 0.31, + "learning_rate": 7.752834804465479e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 3783, + "text_loss": 0.62890625 + }, + { + "epoch": 0.31, + "learning_rate": 7.751744877991344e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 3784, + "text_loss": 0.51171875 + }, + { + "epoch": 0.31, + "learning_rate": 7.750654763917454e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 3785, + "text_loss": 0.470703125 + }, + { + "epoch": 0.31, + "learning_rate": 7.749564462318129e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 3786, + "text_loss": 0.380859375 + }, + { + "epoch": 0.31, + "learning_rate": 7.748473973267699e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 3787, + "text_loss": 0.474609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.747383296840511e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 3788, + "text_loss": 0.6015625 + }, + { + "epoch": 0.31, + "learning_rate": 7.746292433110918e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 3789, + "text_loss": 0.49609375 + }, + { + "epoch": 0.31, + "learning_rate": 7.745201382153293e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 3790, + "text_loss": 0.49609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.744110144042013e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 3791, + "text_loss": 0.35546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.743018718851481e-06, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 3792, + "text_loss": 0.46875 + }, + { + "epoch": 0.32, + "learning_rate": 7.741927106656097e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 3793, + "text_loss": 0.48828125 + }, + { + "epoch": 0.32, + "learning_rate": 7.740835307530285e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 3794, + "text_loss": 0.57421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.739743321548478e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 3795, + "text_loss": 0.37109375 + }, + { + "epoch": 0.32, + "learning_rate": 7.738651148785122e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 3796, + "text_loss": 0.73046875 + }, + { + "epoch": 0.32, + "learning_rate": 7.737558789314676e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 3797, + "text_loss": 0.59375 + }, + { + "epoch": 0.32, + "learning_rate": 7.73646624321161e-06, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 3798, + "text_loss": 0.443359375 + }, + { + "epoch": 0.32, + "learning_rate": 7.735373510550408e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 3799, + "text_loss": 0.60546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.73428059140557e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 3800, + "text_loss": 0.546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.7331874858516e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 3801, + "text_loss": 0.5625 + }, + { + "epoch": 0.32, + "learning_rate": 7.732094193963027e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 3802, + "text_loss": 0.49609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.731000715814384e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 3803, + "text_loss": 0.71875 + }, + { + "epoch": 0.32, + "learning_rate": 7.729907051480214e-06, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 3804, + "text_loss": 0.4921875 + }, + { + "epoch": 0.32, + "learning_rate": 7.728813201035081e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 3805, + "text_loss": 0.6796875 + }, + { + "epoch": 0.32, + "learning_rate": 7.727719164553559e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 3806, + "text_loss": 0.8515625 + }, + { + "epoch": 0.32, + "learning_rate": 7.726624942110233e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 3807, + "text_loss": 0.5546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.7255305337797e-06, + "loss": 0.626, + "regression_loss": 0.0, + "step": 3808, + "text_loss": 0.625 + }, + { + "epoch": 0.32, + "learning_rate": 7.724435939636573e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 3809, + "text_loss": 0.62109375 + }, + { + "epoch": 0.32, + "learning_rate": 7.723341159755476e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 3810, + "text_loss": 0.74609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.722246194211043e-06, + "loss": 0.595, + "regression_loss": 0.0, + "step": 3811, + "text_loss": 0.490234375 + }, + { + "epoch": 0.32, + "learning_rate": 7.721151043077923e-06, + "loss": 0.5873, + "regression_loss": 0.0, + "step": 3812, + "text_loss": 0.50390625 + }, + { + "epoch": 0.32, + "learning_rate": 7.720055706430783e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 3813, + "text_loss": 0.78125 + }, + { + "epoch": 0.32, + "learning_rate": 7.71896018434429e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 3814, + "text_loss": 0.75 + }, + { + "epoch": 0.32, + "learning_rate": 7.717864476893135e-06, + "loss": 0.4652, + "regression_loss": 0.0, + "step": 3815, + "text_loss": 0.416015625 + }, + { + "epoch": 0.32, + "learning_rate": 7.71676858415202e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 3816, + "text_loss": 0.6171875 + }, + { + "epoch": 0.32, + "learning_rate": 7.715672506195652e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 3817, + "text_loss": 0.80859375 + }, + { + "epoch": 0.32, + "learning_rate": 7.71457624309876e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 3818, + "text_loss": 0.486328125 + }, + { + "epoch": 0.32, + "learning_rate": 7.71347979493608e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 3819, + "text_loss": 0.5 + }, + { + "epoch": 0.32, + "learning_rate": 7.712383161782362e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 3820, + "text_loss": 0.7421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.71128634371237e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 3821, + "text_loss": 0.59375 + }, + { + "epoch": 0.32, + "learning_rate": 7.710189340800878e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 3822, + "text_loss": 0.466796875 + }, + { + "epoch": 0.32, + "learning_rate": 7.709092153122674e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 3823, + "text_loss": 0.40234375 + }, + { + "epoch": 0.32, + "learning_rate": 7.70799478075256e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 3824, + "text_loss": 0.5703125 + }, + { + "epoch": 0.32, + "learning_rate": 7.706897223765346e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 3825, + "text_loss": 0.349609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.705799482235863e-06, + "loss": 0.6555, + "regression_loss": 0.0, + "step": 3826, + "text_loss": 0.3203125 + }, + { + "epoch": 0.32, + "learning_rate": 7.704701556238946e-06, + "loss": 0.6692, + "regression_loss": 0.0, + "step": 3827, + "text_loss": 0.62109375 + }, + { + "epoch": 0.32, + "learning_rate": 7.703603445849444e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 3828, + "text_loss": 0.67578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.702505151142223e-06, + "loss": 0.4879, + "regression_loss": 0.0, + "step": 3829, + "text_loss": 0.515625 + }, + { + "epoch": 0.32, + "learning_rate": 7.70140667219216e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 3830, + "text_loss": 0.578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.700308009074144e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 3831, + "text_loss": 0.5 + }, + { + "epoch": 0.32, + "learning_rate": 7.699209161863072e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 3832, + "text_loss": 0.5078125 + }, + { + "epoch": 0.32, + "learning_rate": 7.698110130633862e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 3833, + "text_loss": 0.75 + }, + { + "epoch": 0.32, + "learning_rate": 7.697010915461437e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 3834, + "text_loss": 0.81640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.695911516420738e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 3835, + "text_loss": 0.921875 + }, + { + "epoch": 0.32, + "learning_rate": 7.694811933586717e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 3836, + "text_loss": 0.46875 + }, + { + "epoch": 0.32, + "learning_rate": 7.693712167034337e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 3837, + "text_loss": 0.279296875 + }, + { + "epoch": 0.32, + "learning_rate": 7.692612216838574e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 3838, + "text_loss": 0.5234375 + }, + { + "epoch": 0.32, + "learning_rate": 7.691512083074413e-06, + "loss": 0.4248, + "regression_loss": 0.0, + "step": 3839, + "text_loss": 0.5390625 + }, + { + "epoch": 0.32, + "learning_rate": 7.690411765816864e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 3840, + "text_loss": 0.69140625 + }, + { + "epoch": 0.32, + "learning_rate": 7.689311265140936e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 3841, + "text_loss": 0.58203125 + }, + { + "epoch": 0.32, + "learning_rate": 7.688210581121654e-06, + "loss": 0.7095, + "regression_loss": 0.0, + "step": 3842, + "text_loss": 0.80078125 + }, + { + "epoch": 0.32, + "learning_rate": 7.68710971383406e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 3843, + "text_loss": 0.5 + }, + { + "epoch": 0.32, + "learning_rate": 7.686008663353205e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 3844, + "text_loss": 0.42578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.684907429754149e-06, + "loss": 0.493, + "regression_loss": 0.0, + "step": 3845, + "text_loss": 0.78515625 + }, + { + "epoch": 0.32, + "learning_rate": 7.683806013111974e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 3846, + "text_loss": 0.251953125 + }, + { + "epoch": 0.32, + "learning_rate": 7.682704413501765e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 3847, + "text_loss": 0.55078125 + }, + { + "epoch": 0.32, + "learning_rate": 7.681602630998629e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 3848, + "text_loss": 0.357421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.680500665677671e-06, + "loss": 0.6453, + "regression_loss": 0.0, + "step": 3849, + "text_loss": 0.5859375 + }, + { + "epoch": 0.32, + "learning_rate": 7.679398517614024e-06, + "loss": 0.5043, + "regression_loss": 0.0, + "step": 3850, + "text_loss": 0.640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.678296186882824e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 3851, + "text_loss": 0.396484375 + }, + { + "epoch": 0.32, + "learning_rate": 7.677193673559225e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 3852, + "text_loss": 0.6484375 + }, + { + "epoch": 0.32, + "learning_rate": 7.676090977718386e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 3853, + "text_loss": 0.66796875 + }, + { + "epoch": 0.32, + "learning_rate": 7.674988099435487e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 3854, + "text_loss": 0.5546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.673885038785715e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 3855, + "text_loss": 0.56640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.672781795844271e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 3856, + "text_loss": 0.78125 + }, + { + "epoch": 0.32, + "learning_rate": 7.671678370686371e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 3857, + "text_loss": 0.57421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.670574763387235e-06, + "loss": 0.5599, + "regression_loss": 0.0, + "step": 3858, + "text_loss": 0.2255859375 + }, + { + "epoch": 0.32, + "learning_rate": 7.669470974022107e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 3859, + "text_loss": 0.6796875 + }, + { + "epoch": 0.32, + "learning_rate": 7.668367002666235e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 3860, + "text_loss": 0.55078125 + }, + { + "epoch": 0.32, + "learning_rate": 7.667262849394883e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 3861, + "text_loss": 0.279296875 + }, + { + "epoch": 0.32, + "learning_rate": 7.666158514283325e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 3862, + "text_loss": 0.5234375 + }, + { + "epoch": 0.32, + "learning_rate": 7.66505399740685e-06, + "loss": 0.5219, + "regression_loss": 0.0, + "step": 3863, + "text_loss": 0.49609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.663949298840758e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 3864, + "text_loss": 0.53125 + }, + { + "epoch": 0.32, + "learning_rate": 7.662844418660364e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 3865, + "text_loss": 0.609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.661739356940987e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 3866, + "text_loss": 0.7421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.66063411375797e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 3867, + "text_loss": 0.69140625 + }, + { + "epoch": 0.32, + "learning_rate": 7.659528689186662e-06, + "loss": 0.5992, + "regression_loss": 0.0, + "step": 3868, + "text_loss": 0.63671875 + }, + { + "epoch": 0.32, + "learning_rate": 7.658423083302421e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 3869, + "text_loss": 0.765625 + }, + { + "epoch": 0.32, + "learning_rate": 7.657317296180627e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 3870, + "text_loss": 0.25390625 + }, + { + "epoch": 0.32, + "learning_rate": 7.656211327896665e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 3871, + "text_loss": 0.7734375 + }, + { + "epoch": 0.32, + "learning_rate": 7.655105178525932e-06, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 3872, + "text_loss": 0.4921875 + }, + { + "epoch": 0.32, + "learning_rate": 7.653998848143842e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 3873, + "text_loss": 0.56640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.652892336825818e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 3874, + "text_loss": 0.72265625 + }, + { + "epoch": 0.32, + "learning_rate": 7.651785644647297e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 3875, + "text_loss": 0.494140625 + }, + { + "epoch": 0.32, + "learning_rate": 7.650678771683725e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 3876, + "text_loss": 0.2421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.649571718010564e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 3877, + "text_loss": 0.4296875 + }, + { + "epoch": 0.32, + "learning_rate": 7.64846448370329e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 3878, + "text_loss": 0.7578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.647357068837386e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 3879, + "text_loss": 0.65625 + }, + { + "epoch": 0.32, + "learning_rate": 7.64624947348835e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 3880, + "text_loss": 0.6875 + }, + { + "epoch": 0.32, + "learning_rate": 7.645141697731692e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 3881, + "text_loss": 0.359375 + }, + { + "epoch": 0.32, + "learning_rate": 7.644033741642935e-06, + "loss": 0.6304, + "regression_loss": 0.0, + "step": 3882, + "text_loss": 0.578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.642925605297616e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 3883, + "text_loss": 0.5546875 + }, + { + "epoch": 0.32, + "learning_rate": 7.641817288771275e-06, + "loss": 0.6274, + "regression_loss": 0.0, + "step": 3884, + "text_loss": 0.6015625 + }, + { + "epoch": 0.32, + "learning_rate": 7.64070879213948e-06, + "loss": 0.6282, + "regression_loss": 0.0, + "step": 3885, + "text_loss": 0.74609375 + }, + { + "epoch": 0.32, + "learning_rate": 7.639600115477796e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 3886, + "text_loss": 0.6796875 + }, + { + "epoch": 0.32, + "learning_rate": 7.63849125886181e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 3887, + "text_loss": 0.32421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.637382222367118e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 3888, + "text_loss": 0.43359375 + }, + { + "epoch": 0.32, + "learning_rate": 7.636273006069328e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 3889, + "text_loss": 0.640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.63516361004406e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 3890, + "text_loss": 0.365234375 + }, + { + "epoch": 0.32, + "learning_rate": 7.634054034366947e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 3891, + "text_loss": 0.75 + }, + { + "epoch": 0.32, + "learning_rate": 7.632944279113634e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 3892, + "text_loss": 0.58984375 + }, + { + "epoch": 0.32, + "learning_rate": 7.63183434435978e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 3893, + "text_loss": 0.6484375 + }, + { + "epoch": 0.32, + "learning_rate": 7.630724230181053e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 3894, + "text_loss": 0.50390625 + }, + { + "epoch": 0.32, + "learning_rate": 7.629613936653132e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 3895, + "text_loss": 0.267578125 + }, + { + "epoch": 0.32, + "learning_rate": 7.62850346385172e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 3896, + "text_loss": 0.55078125 + }, + { + "epoch": 0.32, + "learning_rate": 7.627392811852514e-06, + "loss": 0.6467, + "regression_loss": 0.0, + "step": 3897, + "text_loss": 0.44921875 + }, + { + "epoch": 0.32, + "learning_rate": 7.626281980731238e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 3898, + "text_loss": 0.53125 + }, + { + "epoch": 0.32, + "learning_rate": 7.62517097056362e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 3899, + "text_loss": 0.61328125 + }, + { + "epoch": 0.32, + "learning_rate": 7.624059781425404e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 3900, + "text_loss": 0.640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.6229484133923445e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 3901, + "text_loss": 0.46875 + }, + { + "epoch": 0.32, + "learning_rate": 7.621836866540211e-06, + "loss": 0.4982, + "regression_loss": 0.0, + "step": 3902, + "text_loss": 0.2421875 + }, + { + "epoch": 0.32, + "learning_rate": 7.62072514094478e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 3903, + "text_loss": 0.271484375 + }, + { + "epoch": 0.32, + "learning_rate": 7.619613236681845e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 3904, + "text_loss": 0.56640625 + }, + { + "epoch": 0.32, + "learning_rate": 7.618501153827209e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 3905, + "text_loss": 0.79296875 + }, + { + "epoch": 0.32, + "learning_rate": 7.617388892456689e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 3906, + "text_loss": 0.64453125 + }, + { + "epoch": 0.32, + "learning_rate": 7.616276452646113e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 3907, + "text_loss": 0.353515625 + }, + { + "epoch": 0.32, + "learning_rate": 7.61516383447132e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 3908, + "text_loss": 0.91015625 + }, + { + "epoch": 0.32, + "learning_rate": 7.614051038008165e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 3909, + "text_loss": 0.50390625 + }, + { + "epoch": 0.32, + "learning_rate": 7.612938063332511e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 3910, + "text_loss": 0.52734375 + }, + { + "epoch": 0.33, + "learning_rate": 7.6118249105202346e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 3911, + "text_loss": 0.4453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.610711579647225e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 3912, + "text_loss": 0.72265625 + }, + { + "epoch": 0.33, + "learning_rate": 7.6095980707893855e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 3913, + "text_loss": 0.375 + }, + { + "epoch": 0.33, + "learning_rate": 7.608484384022628e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 3914, + "text_loss": 0.59375 + }, + { + "epoch": 0.33, + "learning_rate": 7.607370519422877e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 3915, + "text_loss": 0.4453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.606256477066069e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 3916, + "text_loss": 0.6953125 + }, + { + "epoch": 0.33, + "learning_rate": 7.605142257028158e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 3917, + "text_loss": 0.345703125 + }, + { + "epoch": 0.33, + "learning_rate": 7.604027859385102e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 3918, + "text_loss": 0.73046875 + }, + { + "epoch": 0.33, + "learning_rate": 7.602913284212876e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 3919, + "text_loss": 0.419921875 + }, + { + "epoch": 0.33, + "learning_rate": 7.601798531587465e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 3920, + "text_loss": 0.322265625 + }, + { + "epoch": 0.33, + "learning_rate": 7.6006836015848695e-06, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 3921, + "text_loss": 0.28515625 + }, + { + "epoch": 0.33, + "learning_rate": 7.5995684942810975e-06, + "loss": 0.6516, + "regression_loss": 0.0, + "step": 3922, + "text_loss": 0.3984375 + }, + { + "epoch": 0.33, + "learning_rate": 7.598453209752172e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 3923, + "text_loss": 0.56640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.5973377480741275e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 3924, + "text_loss": 0.328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.596222109323008e-06, + "loss": 0.356, + "regression_loss": 0.0, + "step": 3925, + "text_loss": 0.39453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.595106293574876e-06, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 3926, + "text_loss": 0.48046875 + }, + { + "epoch": 0.33, + "learning_rate": 7.593990300905802e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 3927, + "text_loss": 0.79296875 + }, + { + "epoch": 0.33, + "learning_rate": 7.5928741313918654e-06, + "loss": 0.7173, + "regression_loss": 0.0, + "step": 3928, + "text_loss": 0.73828125 + }, + { + "epoch": 0.33, + "learning_rate": 7.591757785109162e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 3929, + "text_loss": 0.53125 + }, + { + "epoch": 0.33, + "learning_rate": 7.5906412621338e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 3930, + "text_loss": 0.8984375 + }, + { + "epoch": 0.33, + "learning_rate": 7.589524562541894e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 3931, + "text_loss": 0.486328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.588407686409582e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 3932, + "text_loss": 0.26953125 + }, + { + "epoch": 0.33, + "learning_rate": 7.587290633813e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 3933, + "text_loss": 0.59375 + }, + { + "epoch": 0.33, + "learning_rate": 7.586173404828307e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 3934, + "text_loss": 0.60546875 + }, + { + "epoch": 0.33, + "learning_rate": 7.585055999531667e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 3935, + "text_loss": 0.384765625 + }, + { + "epoch": 0.33, + "learning_rate": 7.583938417999261e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 3936, + "text_loss": 0.486328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.5828206603072795e-06, + "loss": 0.6519, + "regression_loss": 0.0, + "step": 3937, + "text_loss": 0.8125 + }, + { + "epoch": 0.33, + "learning_rate": 7.581702726531926e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 3938, + "text_loss": 0.6171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.580584616749413e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 3939, + "text_loss": 0.384765625 + }, + { + "epoch": 0.33, + "learning_rate": 7.579466331035972e-06, + "loss": 0.4659, + "regression_loss": 0.0, + "step": 3940, + "text_loss": 0.609375 + }, + { + "epoch": 0.33, + "learning_rate": 7.578347869467837e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 3941, + "text_loss": 0.65234375 + }, + { + "epoch": 0.33, + "learning_rate": 7.5772292321212615e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 3942, + "text_loss": 0.326171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.576110419072507e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 3943, + "text_loss": 0.546875 + }, + { + "epoch": 0.33, + "learning_rate": 7.574991430397851e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 3944, + "text_loss": 0.6640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.573872266173578e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 3945, + "text_loss": 0.51171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.572752926475988e-06, + "loss": 0.5928, + "regression_loss": 0.0, + "step": 3946, + "text_loss": 0.625 + }, + { + "epoch": 0.33, + "learning_rate": 7.571633411381391e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 3947, + "text_loss": 0.4765625 + }, + { + "epoch": 0.33, + "learning_rate": 7.570513720966108e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 3948, + "text_loss": 0.4140625 + }, + { + "epoch": 0.33, + "learning_rate": 7.5693938553064795e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 3949, + "text_loss": 0.474609375 + }, + { + "epoch": 0.33, + "learning_rate": 7.568273814478847e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 3950, + "text_loss": 0.85546875 + }, + { + "epoch": 0.33, + "learning_rate": 7.56715359855957e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 3951, + "text_loss": 0.59375 + }, + { + "epoch": 0.33, + "learning_rate": 7.566033207625021e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 3952, + "text_loss": 0.7109375 + }, + { + "epoch": 0.33, + "learning_rate": 7.56491264175158e-06, + "loss": 0.4449, + "regression_loss": 0.0, + "step": 3953, + "text_loss": 0.44921875 + }, + { + "epoch": 0.33, + "learning_rate": 7.563791901015643e-06, + "loss": 0.5343, + "regression_loss": 0.0, + "step": 3954, + "text_loss": 0.61328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.562670985493615e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 3955, + "text_loss": 0.4296875 + }, + { + "epoch": 0.33, + "learning_rate": 7.561549895261915e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 3956, + "text_loss": 0.46484375 + }, + { + "epoch": 0.33, + "learning_rate": 7.560428630396974e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 3957, + "text_loss": 0.3203125 + }, + { + "epoch": 0.33, + "learning_rate": 7.559307190975234e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 3958, + "text_loss": 0.333984375 + }, + { + "epoch": 0.33, + "learning_rate": 7.558185577073147e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 3959, + "text_loss": 0.412109375 + }, + { + "epoch": 0.33, + "learning_rate": 7.557063788767179e-06, + "loss": 0.6169, + "regression_loss": 0.0, + "step": 3960, + "text_loss": 0.291015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.55594182613381e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 3961, + "text_loss": 0.275390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.554819689249528e-06, + "loss": 0.6968, + "regression_loss": 0.0, + "step": 3962, + "text_loss": 0.55859375 + }, + { + "epoch": 0.33, + "learning_rate": 7.553697378190836e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 3963, + "text_loss": 0.392578125 + }, + { + "epoch": 0.33, + "learning_rate": 7.552574893034245e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 3964, + "text_loss": 0.55078125 + }, + { + "epoch": 0.33, + "learning_rate": 7.551452233856282e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 3965, + "text_loss": 1.0234375 + }, + { + "epoch": 0.33, + "learning_rate": 7.550329400733485e-06, + "loss": 0.4089, + "regression_loss": 0.0, + "step": 3966, + "text_loss": 0.6015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.549206393742398e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 3967, + "text_loss": 0.498046875 + }, + { + "epoch": 0.33, + "learning_rate": 7.548083212959588e-06, + "loss": 0.6294, + "regression_loss": 0.0, + "step": 3968, + "text_loss": 0.6015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.546959858461624e-06, + "loss": 0.4353, + "regression_loss": 0.0, + "step": 3969, + "text_loss": 0.58203125 + }, + { + "epoch": 0.33, + "learning_rate": 7.545836330325092e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 3970, + "text_loss": 0.7109375 + }, + { + "epoch": 0.33, + "learning_rate": 7.544712628626588e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 3971, + "text_loss": 0.486328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.543588753442719e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 3972, + "text_loss": 0.58984375 + }, + { + "epoch": 0.33, + "learning_rate": 7.542464704850108e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 3973, + "text_loss": 0.25 + }, + { + "epoch": 0.33, + "learning_rate": 7.541340482925384e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 3974, + "text_loss": 0.5 + }, + { + "epoch": 0.33, + "learning_rate": 7.5402160877451915e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 3975, + "text_loss": 0.52734375 + }, + { + "epoch": 0.33, + "learning_rate": 7.539091519386185e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 3976, + "text_loss": 0.88671875 + }, + { + "epoch": 0.33, + "learning_rate": 7.537966777925034e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 3977, + "text_loss": 0.494140625 + }, + { + "epoch": 0.33, + "learning_rate": 7.536841863438415e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 3978, + "text_loss": 0.400390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.535716776003022e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 3979, + "text_loss": 0.314453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.534591515695555e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 3980, + "text_loss": 0.29296875 + }, + { + "epoch": 0.33, + "learning_rate": 7.533466082592729e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 3981, + "text_loss": 0.56640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.532340476771269e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 3982, + "text_loss": 0.458984375 + }, + { + "epoch": 0.33, + "learning_rate": 7.531214698307918e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 3983, + "text_loss": 0.6953125 + }, + { + "epoch": 0.33, + "learning_rate": 7.53008874727942e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 3984, + "text_loss": 0.64453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.528962623762539e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 3985, + "text_loss": 0.6640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.527836327834048e-06, + "loss": 0.6116, + "regression_loss": 0.0, + "step": 3986, + "text_loss": 0.41015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.526709859570732e-06, + "loss": 0.5853, + "regression_loss": 0.0, + "step": 3987, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.33, + "learning_rate": 7.525583219049389e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 3988, + "text_loss": 0.318359375 + }, + { + "epoch": 0.33, + "learning_rate": 7.5244564063468254e-06, + "loss": 0.6816, + "regression_loss": 0.0, + "step": 3989, + "text_loss": 0.6796875 + }, + { + "epoch": 0.33, + "learning_rate": 7.523329421539862e-06, + "loss": 0.418, + "regression_loss": 0.0, + "step": 3990, + "text_loss": 0.66796875 + }, + { + "epoch": 0.33, + "learning_rate": 7.5222022647053314e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 3991, + "text_loss": 0.4765625 + }, + { + "epoch": 0.33, + "learning_rate": 7.521074935920079e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 3992, + "text_loss": 0.6328125 + }, + { + "epoch": 0.33, + "learning_rate": 7.519947435260956e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 3993, + "text_loss": 0.50390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.518819762804834e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 3994, + "text_loss": 0.5859375 + }, + { + "epoch": 0.33, + "learning_rate": 7.517691918628589e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 3995, + "text_loss": 0.55078125 + }, + { + "epoch": 0.33, + "learning_rate": 7.516563902809112e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 3996, + "text_loss": 0.6015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.515435715423307e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 3997, + "text_loss": 0.25390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.514307356548085e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 3998, + "text_loss": 0.412109375 + }, + { + "epoch": 0.33, + "learning_rate": 7.513178826260374e-06, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 3999, + "text_loss": 0.6015625 + }, + { + "epoch": 0.33, + "learning_rate": 7.512050124637114e-06, + "loss": 0.6738, + "regression_loss": 0.0, + "step": 4000, + "text_loss": 0.890625 + }, + { + "epoch": 0.33, + "learning_rate": 7.510921251755247e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 4001, + "text_loss": 0.60546875 + }, + { + "epoch": 0.33, + "learning_rate": 7.5097922076917395e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 4002, + "text_loss": 0.67578125 + }, + { + "epoch": 0.33, + "learning_rate": 7.508662992523561e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 4003, + "text_loss": 0.39453125 + }, + { + "epoch": 0.33, + "learning_rate": 7.507533606327697e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 4004, + "text_loss": 0.59375 + }, + { + "epoch": 0.33, + "learning_rate": 7.506404049181143e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 4005, + "text_loss": 0.51171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.505274321160906e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 4006, + "text_loss": 0.2734375 + }, + { + "epoch": 0.33, + "learning_rate": 7.504144422344004e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 4007, + "text_loss": 0.6171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.503014352807472e-06, + "loss": 0.5316, + "regression_loss": 0.0, + "step": 4008, + "text_loss": 0.74609375 + }, + { + "epoch": 0.33, + "learning_rate": 7.501884112628346e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 4009, + "text_loss": 0.5078125 + }, + { + "epoch": 0.33, + "learning_rate": 7.500753701883683e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 4010, + "text_loss": 0.357421875 + }, + { + "epoch": 0.33, + "learning_rate": 7.499623120650551e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 4011, + "text_loss": 0.5625 + }, + { + "epoch": 0.33, + "learning_rate": 7.498492369006023e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 4012, + "text_loss": 0.466796875 + }, + { + "epoch": 0.33, + "learning_rate": 7.497361447027191e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 4013, + "text_loss": 0.6640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.496230354791153e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 4014, + "text_loss": 0.546875 + }, + { + "epoch": 0.33, + "learning_rate": 7.495099092375022e-06, + "loss": 0.5782, + "regression_loss": 0.0, + "step": 4015, + "text_loss": 0.81640625 + }, + { + "epoch": 0.33, + "learning_rate": 7.493967659855922e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 4016, + "text_loss": 0.6171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.4928360573109885e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 4017, + "text_loss": 0.76953125 + }, + { + "epoch": 0.33, + "learning_rate": 7.491704284817367e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 4018, + "text_loss": 0.8203125 + }, + { + "epoch": 0.33, + "learning_rate": 7.490572342452217e-06, + "loss": 0.6116, + "regression_loss": 0.0, + "step": 4019, + "text_loss": 0.52734375 + }, + { + "epoch": 0.33, + "learning_rate": 7.489440230292707e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 4020, + "text_loss": 0.76171875 + }, + { + "epoch": 0.33, + "learning_rate": 7.488307948416021e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 4021, + "text_loss": 0.515625 + }, + { + "epoch": 0.33, + "learning_rate": 7.48717549689935e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 4022, + "text_loss": 0.33203125 + }, + { + "epoch": 0.33, + "learning_rate": 7.486042875819901e-06, + "loss": 0.6404, + "regression_loss": 0.0, + "step": 4023, + "text_loss": 0.48828125 + }, + { + "epoch": 0.33, + "learning_rate": 7.4849100852548885e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 4024, + "text_loss": 0.5390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.4837771252815395e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 4025, + "text_loss": 0.419921875 + }, + { + "epoch": 0.33, + "learning_rate": 7.482643995977095e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 4026, + "text_loss": 0.392578125 + }, + { + "epoch": 0.33, + "learning_rate": 7.481510697418806e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 4027, + "text_loss": 0.5390625 + }, + { + "epoch": 0.33, + "learning_rate": 7.480377229683932e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 4028, + "text_loss": 0.7109375 + }, + { + "epoch": 0.33, + "learning_rate": 7.479243592849752e-06, + "loss": 0.4138, + "regression_loss": 0.0, + "step": 4029, + "text_loss": 0.84375 + }, + { + "epoch": 0.33, + "learning_rate": 7.478109786993548e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 4030, + "text_loss": 0.310546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.4769758121926175e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4031, + "text_loss": 0.5859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.475841668524268e-06, + "loss": 0.6699, + "regression_loss": 0.0, + "step": 4032, + "text_loss": 0.5859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.4747073560658236e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 4033, + "text_loss": 0.369140625 + }, + { + "epoch": 0.34, + "learning_rate": 7.473572874894611e-06, + "loss": 0.4502, + "regression_loss": 0.0, + "step": 4034, + "text_loss": 0.55859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.472438225087977e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 4035, + "text_loss": 0.45703125 + }, + { + "epoch": 0.34, + "learning_rate": 7.471303406723274e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 4036, + "text_loss": 0.330078125 + }, + { + "epoch": 0.34, + "learning_rate": 7.470168419877868e-06, + "loss": 0.4313, + "regression_loss": 0.0, + "step": 4037, + "text_loss": 0.3671875 + }, + { + "epoch": 0.34, + "learning_rate": 7.4690332646291375e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 4038, + "text_loss": 0.671875 + }, + { + "epoch": 0.34, + "learning_rate": 7.467897941054472e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 4039, + "text_loss": 0.9296875 + }, + { + "epoch": 0.34, + "learning_rate": 7.46676244923127e-06, + "loss": 0.614, + "regression_loss": 0.0, + "step": 4040, + "text_loss": 0.466796875 + }, + { + "epoch": 0.34, + "learning_rate": 7.465626789236946e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 4041, + "text_loss": 0.7578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.464490961148921e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 4042, + "text_loss": 0.462890625 + }, + { + "epoch": 0.34, + "learning_rate": 7.4633549650446314e-06, + "loss": 0.4587, + "regression_loss": 0.0, + "step": 4043, + "text_loss": 0.478515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.462218801001524e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 4044, + "text_loss": 0.484375 + }, + { + "epoch": 0.34, + "learning_rate": 7.461082469097054e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 4045, + "text_loss": 0.3515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.459945969408693e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 4046, + "text_loss": 0.68359375 + }, + { + "epoch": 0.34, + "learning_rate": 7.458809302013923e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 4047, + "text_loss": 0.6484375 + }, + { + "epoch": 0.34, + "learning_rate": 7.457672466990231e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 4048, + "text_loss": 0.27734375 + }, + { + "epoch": 0.34, + "learning_rate": 7.456535464415125e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 4049, + "text_loss": 0.85546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.4553982943661204e-06, + "loss": 0.6526, + "regression_loss": 0.0, + "step": 4050, + "text_loss": 0.6640625 + }, + { + "epoch": 0.34, + "learning_rate": 7.45426095692074e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 4051, + "text_loss": 0.578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.453123452156525e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 4052, + "text_loss": 0.3515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.451985780151022e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 4053, + "text_loss": 0.478515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.450847940981791e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 4054, + "text_loss": 0.37890625 + }, + { + "epoch": 0.34, + "learning_rate": 7.449709934726408e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 4055, + "text_loss": 0.51953125 + }, + { + "epoch": 0.34, + "learning_rate": 7.448571761462454e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 4056, + "text_loss": 0.8046875 + }, + { + "epoch": 0.34, + "learning_rate": 7.447433421267523e-06, + "loss": 0.4341, + "regression_loss": 0.0, + "step": 4057, + "text_loss": 0.53125 + }, + { + "epoch": 0.34, + "learning_rate": 7.446294914219223e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 4058, + "text_loss": 0.6484375 + }, + { + "epoch": 0.34, + "learning_rate": 7.44515624039517e-06, + "loss": 0.6663, + "regression_loss": 0.0, + "step": 4059, + "text_loss": 0.3984375 + }, + { + "epoch": 0.34, + "learning_rate": 7.444017399872992e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 4060, + "text_loss": 0.55859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.442878392730333e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 4061, + "text_loss": 0.609375 + }, + { + "epoch": 0.34, + "learning_rate": 7.44173921904484e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 4062, + "text_loss": 0.349609375 + }, + { + "epoch": 0.34, + "learning_rate": 7.440599878894179e-06, + "loss": 0.4331, + "regression_loss": 0.0, + "step": 4063, + "text_loss": 0.380859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.439460372356025e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 4064, + "text_loss": 0.37890625 + }, + { + "epoch": 0.34, + "learning_rate": 7.438320699508062e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 4065, + "text_loss": 0.765625 + }, + { + "epoch": 0.34, + "learning_rate": 7.4371808604279864e-06, + "loss": 0.7217, + "regression_loss": 0.0, + "step": 4066, + "text_loss": 1.2265625 + }, + { + "epoch": 0.34, + "learning_rate": 7.436040855193506e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 4067, + "text_loss": 0.427734375 + }, + { + "epoch": 0.34, + "learning_rate": 7.434900683882344e-06, + "loss": 0.3981, + "regression_loss": 0.0, + "step": 4068, + "text_loss": 0.40625 + }, + { + "epoch": 0.34, + "learning_rate": 7.433760346572228e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 4069, + "text_loss": 0.55078125 + }, + { + "epoch": 0.34, + "learning_rate": 7.432619843340903e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 4070, + "text_loss": 0.65625 + }, + { + "epoch": 0.34, + "learning_rate": 7.431479174266121e-06, + "loss": 0.6113, + "regression_loss": 0.0, + "step": 4071, + "text_loss": 0.87109375 + }, + { + "epoch": 0.34, + "learning_rate": 7.430338339425647e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 4072, + "text_loss": 0.5234375 + }, + { + "epoch": 0.34, + "learning_rate": 7.429197338897256e-06, + "loss": 0.4288, + "regression_loss": 0.0, + "step": 4073, + "text_loss": 0.40234375 + }, + { + "epoch": 0.34, + "learning_rate": 7.428056172758737e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 4074, + "text_loss": 0.6328125 + }, + { + "epoch": 0.34, + "learning_rate": 7.4269148410878915e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 4075, + "text_loss": 0.5078125 + }, + { + "epoch": 0.34, + "learning_rate": 7.425773343962525e-06, + "loss": 0.4504, + "regression_loss": 0.0, + "step": 4076, + "text_loss": 0.3828125 + }, + { + "epoch": 0.34, + "learning_rate": 7.42463168146046e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 4077, + "text_loss": 0.59375 + }, + { + "epoch": 0.34, + "learning_rate": 7.423489853659531e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 4078, + "text_loss": 0.27734375 + }, + { + "epoch": 0.34, + "learning_rate": 7.42234786063758e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 4079, + "text_loss": 0.60546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.421205702472464e-06, + "loss": 0.6182, + "regression_loss": 0.0, + "step": 4080, + "text_loss": 0.67578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.420063379242047e-06, + "loss": 0.4288, + "regression_loss": 0.0, + "step": 4081, + "text_loss": 0.53515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.418920891024208e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 4082, + "text_loss": 0.435546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.417778237896838e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 4083, + "text_loss": 0.69921875 + }, + { + "epoch": 0.34, + "learning_rate": 7.416635419937834e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 4084, + "text_loss": 0.462890625 + }, + { + "epoch": 0.34, + "learning_rate": 7.415492437225108e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 4085, + "text_loss": 0.5625 + }, + { + "epoch": 0.34, + "learning_rate": 7.414349289836585e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 4086, + "text_loss": 0.6328125 + }, + { + "epoch": 0.34, + "learning_rate": 7.413205977850195e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 4087, + "text_loss": 0.3828125 + }, + { + "epoch": 0.34, + "learning_rate": 7.412062501343886e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 4088, + "text_loss": 0.61328125 + }, + { + "epoch": 0.34, + "learning_rate": 7.410918860395615e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 4089, + "text_loss": 0.36328125 + }, + { + "epoch": 0.34, + "learning_rate": 7.409775055083347e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 4090, + "text_loss": 0.55078125 + }, + { + "epoch": 0.34, + "learning_rate": 7.408631085485061e-06, + "loss": 0.5458, + "regression_loss": 0.0, + "step": 4091, + "text_loss": 0.93359375 + }, + { + "epoch": 0.34, + "learning_rate": 7.40748695167875e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 4092, + "text_loss": 0.6015625 + }, + { + "epoch": 0.34, + "learning_rate": 7.406342653742411e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 4093, + "text_loss": 0.75 + }, + { + "epoch": 0.34, + "learning_rate": 7.40519819175406e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 4094, + "text_loss": 0.37109375 + }, + { + "epoch": 0.34, + "learning_rate": 7.4040535657917174e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 4095, + "text_loss": 0.75390625 + }, + { + "epoch": 0.34, + "learning_rate": 7.402908775933419e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 4096, + "text_loss": 0.52734375 + }, + { + "epoch": 0.34, + "learning_rate": 7.401763822257213e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 4097, + "text_loss": 0.484375 + }, + { + "epoch": 0.34, + "learning_rate": 7.400618704841153e-06, + "loss": 0.6409, + "regression_loss": 0.0, + "step": 4098, + "text_loss": 0.67578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.399473423763308e-06, + "loss": 0.4786, + "regression_loss": 0.0, + "step": 4099, + "text_loss": 0.1904296875 + }, + { + "epoch": 0.34, + "learning_rate": 7.398327979101759e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 4100, + "text_loss": 0.62109375 + }, + { + "epoch": 0.34, + "learning_rate": 7.3971823709345944e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 4101, + "text_loss": 0.8125 + }, + { + "epoch": 0.34, + "learning_rate": 7.396036599339917e-06, + "loss": 0.5789, + "regression_loss": 0.0, + "step": 4102, + "text_loss": 0.51171875 + }, + { + "epoch": 0.34, + "learning_rate": 7.394890664395841e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 4103, + "text_loss": 0.640625 + }, + { + "epoch": 0.34, + "learning_rate": 7.393744566180488e-06, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 4104, + "text_loss": 0.75 + }, + { + "epoch": 0.34, + "learning_rate": 7.392598304771992e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 4105, + "text_loss": 0.375 + }, + { + "epoch": 0.34, + "learning_rate": 7.391451880248504e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 4106, + "text_loss": 0.453125 + }, + { + "epoch": 0.34, + "learning_rate": 7.390305292688176e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 4107, + "text_loss": 0.58984375 + }, + { + "epoch": 0.34, + "learning_rate": 7.389158542169179e-06, + "loss": 0.6113, + "regression_loss": 0.0, + "step": 4108, + "text_loss": 0.55859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.388011628769693e-06, + "loss": 0.4823, + "regression_loss": 0.0, + "step": 4109, + "text_loss": 0.35546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.386864552567907e-06, + "loss": 0.3773, + "regression_loss": 0.0, + "step": 4110, + "text_loss": 0.34765625 + }, + { + "epoch": 0.34, + "learning_rate": 7.385717313642025e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 4111, + "text_loss": 0.48828125 + }, + { + "epoch": 0.34, + "learning_rate": 7.3845699120702585e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 4112, + "text_loss": 0.6328125 + }, + { + "epoch": 0.34, + "learning_rate": 7.383422347930828e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 4113, + "text_loss": 0.68359375 + }, + { + "epoch": 0.34, + "learning_rate": 7.3822746213019755e-06, + "loss": 0.6675, + "regression_loss": 0.0, + "step": 4114, + "text_loss": 0.7578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.381126732261942e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 4115, + "text_loss": 0.490234375 + }, + { + "epoch": 0.34, + "learning_rate": 7.379978680888986e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 4116, + "text_loss": 0.404296875 + }, + { + "epoch": 0.34, + "learning_rate": 7.3788304672613775e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 4117, + "text_loss": 0.515625 + }, + { + "epoch": 0.34, + "learning_rate": 7.377682091457392e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 4118, + "text_loss": 0.58203125 + }, + { + "epoch": 0.34, + "learning_rate": 7.376533553555323e-06, + "loss": 0.5245, + "regression_loss": 0.0, + "step": 4119, + "text_loss": 0.33984375 + }, + { + "epoch": 0.34, + "learning_rate": 7.37538485363347e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 4120, + "text_loss": 0.51953125 + }, + { + "epoch": 0.34, + "learning_rate": 7.374235991770146e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 4121, + "text_loss": 0.59375 + }, + { + "epoch": 0.34, + "learning_rate": 7.373086968043676e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 4122, + "text_loss": 0.380859375 + }, + { + "epoch": 0.34, + "learning_rate": 7.371937782532392e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 4123, + "text_loss": 0.91015625 + }, + { + "epoch": 0.34, + "learning_rate": 7.370788435314642e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 4124, + "text_loss": 0.40234375 + }, + { + "epoch": 0.34, + "learning_rate": 7.369638926468781e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 4125, + "text_loss": 0.625 + }, + { + "epoch": 0.34, + "learning_rate": 7.368489256073176e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 4126, + "text_loss": 0.640625 + }, + { + "epoch": 0.34, + "learning_rate": 7.3673394242062055e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 4127, + "text_loss": 0.34765625 + }, + { + "epoch": 0.34, + "learning_rate": 7.366189430946262e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 4128, + "text_loss": 0.546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.365039276371744e-06, + "loss": 0.6702, + "regression_loss": 0.0, + "step": 4129, + "text_loss": 0.89453125 + }, + { + "epoch": 0.34, + "learning_rate": 7.363888960561061e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 4130, + "text_loss": 0.671875 + }, + { + "epoch": 0.34, + "learning_rate": 7.362738483592641e-06, + "loss": 0.6431, + "regression_loss": 0.0, + "step": 4131, + "text_loss": 0.60546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.361587845544912e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 4132, + "text_loss": 0.5703125 + }, + { + "epoch": 0.34, + "learning_rate": 7.360437046496321e-06, + "loss": 0.5199, + "regression_loss": 0.0, + "step": 4133, + "text_loss": 0.5625 + }, + { + "epoch": 0.34, + "learning_rate": 7.359286086525325e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 4134, + "text_loss": 0.412109375 + }, + { + "epoch": 0.34, + "learning_rate": 7.358134965710388e-06, + "loss": 0.6235, + "regression_loss": 0.0, + "step": 4135, + "text_loss": 0.45703125 + }, + { + "epoch": 0.34, + "learning_rate": 7.3569836841299905e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 4136, + "text_loss": 0.357421875 + }, + { + "epoch": 0.34, + "learning_rate": 7.355832241862616e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 4137, + "text_loss": 0.58984375 + }, + { + "epoch": 0.34, + "learning_rate": 7.3546806389867675e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 4138, + "text_loss": 0.427734375 + }, + { + "epoch": 0.34, + "learning_rate": 7.353528875580955e-06, + "loss": 0.4369, + "regression_loss": 0.0, + "step": 4139, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.34, + "learning_rate": 7.3523769517237e-06, + "loss": 0.6697, + "regression_loss": 0.0, + "step": 4140, + "text_loss": 0.75 + }, + { + "epoch": 0.34, + "learning_rate": 7.351224867493533e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 4141, + "text_loss": 0.388671875 + }, + { + "epoch": 0.34, + "learning_rate": 7.350072622968999e-06, + "loss": 0.5311, + "regression_loss": 0.0, + "step": 4142, + "text_loss": 0.490234375 + }, + { + "epoch": 0.34, + "learning_rate": 7.348920218228651e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 4143, + "text_loss": 0.44921875 + }, + { + "epoch": 0.34, + "learning_rate": 7.347767653351054e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 4144, + "text_loss": 0.64453125 + }, + { + "epoch": 0.34, + "learning_rate": 7.346614928414786e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 4145, + "text_loss": 0.7890625 + }, + { + "epoch": 0.34, + "learning_rate": 7.3454620434984305e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 4146, + "text_loss": 0.546875 + }, + { + "epoch": 0.34, + "learning_rate": 7.344308998680587e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 4147, + "text_loss": 0.447265625 + }, + { + "epoch": 0.34, + "learning_rate": 7.3431557940398645e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 4148, + "text_loss": 0.7578125 + }, + { + "epoch": 0.34, + "learning_rate": 7.342002429654882e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 4149, + "text_loss": 0.4375 + }, + { + "epoch": 0.34, + "learning_rate": 7.340848905604269e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 4150, + "text_loss": 0.55078125 + }, + { + "epoch": 0.34, + "learning_rate": 7.339695221966669e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 4151, + "text_loss": 0.33203125 + }, + { + "epoch": 0.35, + "learning_rate": 7.338541378820732e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 4152, + "text_loss": 0.421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.337387376245123e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 4153, + "text_loss": 0.9453125 + }, + { + "epoch": 0.35, + "learning_rate": 7.336233214318515e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 4154, + "text_loss": 0.404296875 + }, + { + "epoch": 0.35, + "learning_rate": 7.335078893119592e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 4155, + "text_loss": 0.478515625 + }, + { + "epoch": 0.35, + "learning_rate": 7.33392441272705e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 4156, + "text_loss": 0.48046875 + }, + { + "epoch": 0.35, + "learning_rate": 7.3327697732195965e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 4157, + "text_loss": 0.69140625 + }, + { + "epoch": 0.35, + "learning_rate": 7.331614974675948e-06, + "loss": 0.6235, + "regression_loss": 0.0, + "step": 4158, + "text_loss": 0.78125 + }, + { + "epoch": 0.35, + "learning_rate": 7.330460017174834e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 4159, + "text_loss": 0.52734375 + }, + { + "epoch": 0.35, + "learning_rate": 7.329304900794991e-06, + "loss": 0.5074, + "regression_loss": 0.0, + "step": 4160, + "text_loss": 0.38671875 + }, + { + "epoch": 0.35, + "learning_rate": 7.328149625615172e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 4161, + "text_loss": 0.80859375 + }, + { + "epoch": 0.35, + "learning_rate": 7.3269941917141344e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 4162, + "text_loss": 0.765625 + }, + { + "epoch": 0.35, + "learning_rate": 7.325838599170652e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 4163, + "text_loss": 0.59765625 + }, + { + "epoch": 0.35, + "learning_rate": 7.324682848063508e-06, + "loss": 0.6353, + "regression_loss": 0.0, + "step": 4164, + "text_loss": 0.78515625 + }, + { + "epoch": 0.35, + "learning_rate": 7.323526938471493e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 4165, + "text_loss": 0.365234375 + }, + { + "epoch": 0.35, + "learning_rate": 7.322370870473411e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 4166, + "text_loss": 0.28515625 + }, + { + "epoch": 0.35, + "learning_rate": 7.321214644148079e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 4167, + "text_loss": 0.453125 + }, + { + "epoch": 0.35, + "learning_rate": 7.3200582595743204e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 4168, + "text_loss": 0.828125 + }, + { + "epoch": 0.35, + "learning_rate": 7.3189017168309726e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 4169, + "text_loss": 0.55078125 + }, + { + "epoch": 0.35, + "learning_rate": 7.317745015996885e-06, + "loss": 0.6694, + "regression_loss": 0.0, + "step": 4170, + "text_loss": 0.404296875 + }, + { + "epoch": 0.35, + "learning_rate": 7.3165881571509105e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 4171, + "text_loss": 0.26953125 + }, + { + "epoch": 0.35, + "learning_rate": 7.3154311403719205e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 4172, + "text_loss": 0.421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.314273965738794e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 4173, + "text_loss": 0.890625 + }, + { + "epoch": 0.35, + "learning_rate": 7.313116633330422e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 4174, + "text_loss": 0.466796875 + }, + { + "epoch": 0.35, + "learning_rate": 7.311959143225704e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 4175, + "text_loss": 0.640625 + }, + { + "epoch": 0.35, + "learning_rate": 7.310801495503555e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 4176, + "text_loss": 0.6484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.309643690242893e-06, + "loss": 0.637, + "regression_loss": 0.0, + "step": 4177, + "text_loss": 0.33984375 + }, + { + "epoch": 0.35, + "learning_rate": 7.308485727522654e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 4178, + "text_loss": 0.494140625 + }, + { + "epoch": 0.35, + "learning_rate": 7.307327607421779e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 4179, + "text_loss": 0.494140625 + }, + { + "epoch": 0.35, + "learning_rate": 7.306169330019227e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 4180, + "text_loss": 0.8984375 + }, + { + "epoch": 0.35, + "learning_rate": 7.30501089539396e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 4181, + "text_loss": 0.5078125 + }, + { + "epoch": 0.35, + "learning_rate": 7.303852303624956e-06, + "loss": 0.415, + "regression_loss": 0.0, + "step": 4182, + "text_loss": 0.33203125 + }, + { + "epoch": 0.35, + "learning_rate": 7.3026935547912004e-06, + "loss": 0.4757, + "regression_loss": 0.0, + "step": 4183, + "text_loss": 0.734375 + }, + { + "epoch": 0.35, + "learning_rate": 7.301534648971692e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 4184, + "text_loss": 0.44140625 + }, + { + "epoch": 0.35, + "learning_rate": 7.3003755862454386e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 4185, + "text_loss": 0.4296875 + }, + { + "epoch": 0.35, + "learning_rate": 7.299216366691457e-06, + "loss": 0.619, + "regression_loss": 0.0, + "step": 4186, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.35, + "learning_rate": 7.298056990388781e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 4187, + "text_loss": 0.80859375 + }, + { + "epoch": 0.35, + "learning_rate": 7.296897457416446e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 4188, + "text_loss": 0.412109375 + }, + { + "epoch": 0.35, + "learning_rate": 7.295737767853507e-06, + "loss": 0.4082, + "regression_loss": 0.0, + "step": 4189, + "text_loss": 0.298828125 + }, + { + "epoch": 0.35, + "learning_rate": 7.294577921779025e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 4190, + "text_loss": 0.53125 + }, + { + "epoch": 0.35, + "learning_rate": 7.29341791927207e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 4191, + "text_loss": 0.271484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.292257760411726e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 4192, + "text_loss": 0.60546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2910974452770885e-06, + "loss": 0.634, + "regression_loss": 0.0, + "step": 4193, + "text_loss": 0.66796875 + }, + { + "epoch": 0.35, + "learning_rate": 7.28993697394726e-06, + "loss": 0.6174, + "regression_loss": 0.0, + "step": 4194, + "text_loss": 0.51953125 + }, + { + "epoch": 0.35, + "learning_rate": 7.288776346501356e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 4195, + "text_loss": 0.34375 + }, + { + "epoch": 0.35, + "learning_rate": 7.287615563018502e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 4196, + "text_loss": 0.5625 + }, + { + "epoch": 0.35, + "learning_rate": 7.286454623577833e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 4197, + "text_loss": 0.486328125 + }, + { + "epoch": 0.35, + "learning_rate": 7.285293528258498e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 4198, + "text_loss": 0.546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.284132277139652e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 4199, + "text_loss": 0.6484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.282970870300466e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 4200, + "text_loss": 0.5546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.281809307820118e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 4201, + "text_loss": 0.287109375 + }, + { + "epoch": 0.35, + "learning_rate": 7.280647589777795e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 4202, + "text_loss": 0.71875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2794857162526985e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 4203, + "text_loss": 0.46875 + }, + { + "epoch": 0.35, + "learning_rate": 7.27832368732404e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 4204, + "text_loss": 0.36328125 + }, + { + "epoch": 0.35, + "learning_rate": 7.27716150307104e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 4205, + "text_loss": 0.482421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.275999163572929e-06, + "loss": 0.5284, + "regression_loss": 0.0, + "step": 4206, + "text_loss": 0.5078125 + }, + { + "epoch": 0.35, + "learning_rate": 7.27483666890895e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 4207, + "text_loss": 0.333984375 + }, + { + "epoch": 0.35, + "learning_rate": 7.273674019158356e-06, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 4208, + "text_loss": 0.625 + }, + { + "epoch": 0.35, + "learning_rate": 7.272511214400412e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 4209, + "text_loss": 0.68359375 + }, + { + "epoch": 0.35, + "learning_rate": 7.27134825471439e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 4210, + "text_loss": 0.57421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2701851401795745e-06, + "loss": 0.4855, + "regression_loss": 0.0, + "step": 4211, + "text_loss": 0.345703125 + }, + { + "epoch": 0.35, + "learning_rate": 7.269021870875262e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 4212, + "text_loss": 0.7734375 + }, + { + "epoch": 0.35, + "learning_rate": 7.267858446880758e-06, + "loss": 0.4852, + "regression_loss": 0.0, + "step": 4213, + "text_loss": 0.341796875 + }, + { + "epoch": 0.35, + "learning_rate": 7.266694868275377e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 4214, + "text_loss": 0.546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2655311351384485e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 4215, + "text_loss": 0.294921875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2643672475493065e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 4216, + "text_loss": 0.38671875 + }, + { + "epoch": 0.35, + "learning_rate": 7.263203205587303e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 4217, + "text_loss": 0.57421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.262039009331792e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 4218, + "text_loss": 0.40625 + }, + { + "epoch": 0.35, + "learning_rate": 7.2608746588621444e-06, + "loss": 0.6699, + "regression_loss": 0.0, + "step": 4219, + "text_loss": 0.76171875 + }, + { + "epoch": 0.35, + "learning_rate": 7.259710154257742e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 4220, + "text_loss": 0.50390625 + }, + { + "epoch": 0.35, + "learning_rate": 7.258545495597971e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 4221, + "text_loss": 0.546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.257380682962232e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 4222, + "text_loss": 0.388671875 + }, + { + "epoch": 0.35, + "learning_rate": 7.256215716429939e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4223, + "text_loss": 0.267578125 + }, + { + "epoch": 0.35, + "learning_rate": 7.25505059608051e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 4224, + "text_loss": 0.58203125 + }, + { + "epoch": 0.35, + "learning_rate": 7.253885321993379e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 4225, + "text_loss": 0.57421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.252719894247988e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 4226, + "text_loss": 0.46484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.251554312923789e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 4227, + "text_loss": 0.44921875 + }, + { + "epoch": 0.35, + "learning_rate": 7.250388578100246e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 4228, + "text_loss": 0.4140625 + }, + { + "epoch": 0.35, + "learning_rate": 7.249222689856833e-06, + "loss": 0.4377, + "regression_loss": 0.0, + "step": 4229, + "text_loss": 0.58203125 + }, + { + "epoch": 0.35, + "learning_rate": 7.248056648273034e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 4230, + "text_loss": 0.2734375 + }, + { + "epoch": 0.35, + "learning_rate": 7.246890453428342e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 4231, + "text_loss": 0.29296875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2457241054022666e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 4232, + "text_loss": 0.38671875 + }, + { + "epoch": 0.35, + "learning_rate": 7.244557604274318e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 4233, + "text_loss": 0.357421875 + }, + { + "epoch": 0.35, + "learning_rate": 7.243390950124027e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 4234, + "text_loss": 0.75390625 + }, + { + "epoch": 0.35, + "learning_rate": 7.242224143030927e-06, + "loss": 0.6406, + "regression_loss": 0.0, + "step": 4235, + "text_loss": 0.48046875 + }, + { + "epoch": 0.35, + "learning_rate": 7.2410571830745645e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 4236, + "text_loss": 0.61328125 + }, + { + "epoch": 0.35, + "learning_rate": 7.2398900703345e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 4237, + "text_loss": 0.52734375 + }, + { + "epoch": 0.35, + "learning_rate": 7.238722804890297e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 4238, + "text_loss": 0.84765625 + }, + { + "epoch": 0.35, + "learning_rate": 7.237555386821538e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 4239, + "text_loss": 0.60546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.236387816207808e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 4240, + "text_loss": 0.6171875 + }, + { + "epoch": 0.35, + "learning_rate": 7.235220093128707e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 4241, + "text_loss": 0.53125 + }, + { + "epoch": 0.35, + "learning_rate": 7.234052217663845e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 4242, + "text_loss": 0.283203125 + }, + { + "epoch": 0.35, + "learning_rate": 7.232884189892843e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 4243, + "text_loss": 0.66015625 + }, + { + "epoch": 0.35, + "learning_rate": 7.231716009895326e-06, + "loss": 0.6428, + "regression_loss": 0.0, + "step": 4244, + "text_loss": 0.39453125 + }, + { + "epoch": 0.35, + "learning_rate": 7.230547677750941e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 4245, + "text_loss": 0.83984375 + }, + { + "epoch": 0.35, + "learning_rate": 7.2293791935393355e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 4246, + "text_loss": 0.71484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.22821055734017e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 4247, + "text_loss": 0.60546875 + }, + { + "epoch": 0.35, + "learning_rate": 7.227041769233116e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 4248, + "text_loss": 0.5703125 + }, + { + "epoch": 0.35, + "learning_rate": 7.22587282929786e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 4249, + "text_loss": 0.287109375 + }, + { + "epoch": 0.35, + "learning_rate": 7.224703737614087e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 4250, + "text_loss": 0.4375 + }, + { + "epoch": 0.35, + "learning_rate": 7.223534494261506e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 4251, + "text_loss": 0.34375 + }, + { + "epoch": 0.35, + "learning_rate": 7.222365099319827e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 4252, + "text_loss": 0.2265625 + }, + { + "epoch": 0.35, + "learning_rate": 7.2211955528687724e-06, + "loss": 0.614, + "regression_loss": 0.0, + "step": 4253, + "text_loss": 0.50390625 + }, + { + "epoch": 0.35, + "learning_rate": 7.220025854988079e-06, + "loss": 0.6211, + "regression_loss": 0.0, + "step": 4254, + "text_loss": 0.7578125 + }, + { + "epoch": 0.35, + "learning_rate": 7.218856005757486e-06, + "loss": 0.5031, + "regression_loss": 0.0, + "step": 4255, + "text_loss": 0.451171875 + }, + { + "epoch": 0.35, + "learning_rate": 7.217686005256755e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 4256, + "text_loss": 0.67578125 + }, + { + "epoch": 0.35, + "learning_rate": 7.216515853565642e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 4257, + "text_loss": 0.7265625 + }, + { + "epoch": 0.35, + "learning_rate": 7.2153455507639285e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 4258, + "text_loss": 0.64453125 + }, + { + "epoch": 0.35, + "learning_rate": 7.214175096931397e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 4259, + "text_loss": 0.458984375 + }, + { + "epoch": 0.35, + "learning_rate": 7.213004492147843e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 4260, + "text_loss": 0.36328125 + }, + { + "epoch": 0.35, + "learning_rate": 7.211833736493072e-06, + "loss": 0.6616, + "regression_loss": 0.0, + "step": 4261, + "text_loss": 0.45703125 + }, + { + "epoch": 0.35, + "learning_rate": 7.210662830046901e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 4262, + "text_loss": 0.2890625 + }, + { + "epoch": 0.35, + "learning_rate": 7.209491772889156e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 4263, + "text_loss": 0.455078125 + }, + { + "epoch": 0.35, + "learning_rate": 7.208320565099675e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 4264, + "text_loss": 0.7109375 + }, + { + "epoch": 0.35, + "learning_rate": 7.207149206758303e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 4265, + "text_loss": 0.419921875 + }, + { + "epoch": 0.35, + "learning_rate": 7.205977697944896e-06, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 4266, + "text_loss": 0.66015625 + }, + { + "epoch": 0.35, + "learning_rate": 7.2048060387393235e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 4267, + "text_loss": 0.96875 + }, + { + "epoch": 0.35, + "learning_rate": 7.203634229221465e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 4268, + "text_loss": 0.71484375 + }, + { + "epoch": 0.35, + "learning_rate": 7.202462269471204e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 4269, + "text_loss": 0.349609375 + }, + { + "epoch": 0.35, + "learning_rate": 7.201290159568442e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 4270, + "text_loss": 0.55859375 + }, + { + "epoch": 0.35, + "learning_rate": 7.200117899593085e-06, + "loss": 0.4608, + "regression_loss": 0.0, + "step": 4271, + "text_loss": 0.2177734375 + }, + { + "epoch": 0.36, + "learning_rate": 7.198945489625054e-06, + "loss": 0.677, + "regression_loss": 0.0, + "step": 4272, + "text_loss": 0.609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.197772929744278e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 4273, + "text_loss": 0.6796875 + }, + { + "epoch": 0.36, + "learning_rate": 7.196600220030692e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 4274, + "text_loss": 0.5234375 + }, + { + "epoch": 0.36, + "learning_rate": 7.195427360564249e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 4275, + "text_loss": 0.69140625 + }, + { + "epoch": 0.36, + "learning_rate": 7.194254351424909e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 4276, + "text_loss": 0.455078125 + }, + { + "epoch": 0.36, + "learning_rate": 7.193081192692639e-06, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 4277, + "text_loss": 0.384765625 + }, + { + "epoch": 0.36, + "learning_rate": 7.19190788444742e-06, + "loss": 0.4474, + "regression_loss": 0.0, + "step": 4278, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.36, + "learning_rate": 7.190734426769244e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 4279, + "text_loss": 0.5078125 + }, + { + "epoch": 0.36, + "learning_rate": 7.189560819738109e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 4280, + "text_loss": 0.6171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.188387063434025e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 4281, + "text_loss": 0.546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.187213157937014e-06, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 4282, + "text_loss": 0.326171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.186039103327107e-06, + "loss": 0.6194, + "regression_loss": 0.0, + "step": 4283, + "text_loss": 0.75390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.184864899684344e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 4284, + "text_loss": 0.66015625 + }, + { + "epoch": 0.36, + "learning_rate": 7.1836905470887776e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 4285, + "text_loss": 0.60546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.182516045620468e-06, + "loss": 0.4747, + "regression_loss": 0.0, + "step": 4286, + "text_loss": 0.66015625 + }, + { + "epoch": 0.36, + "learning_rate": 7.1813413953594855e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 4287, + "text_loss": 0.408203125 + }, + { + "epoch": 0.36, + "learning_rate": 7.180166596385915e-06, + "loss": 0.4041, + "regression_loss": 0.0, + "step": 4288, + "text_loss": 0.318359375 + }, + { + "epoch": 0.36, + "learning_rate": 7.178991648779845e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 4289, + "text_loss": 0.56640625 + }, + { + "epoch": 0.36, + "learning_rate": 7.177816552621379e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 4290, + "text_loss": 0.400390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.176641307990628e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 4291, + "text_loss": 0.50390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.175465914967714e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 4292, + "text_loss": 0.58984375 + }, + { + "epoch": 0.36, + "learning_rate": 7.1742903736327715e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 4293, + "text_loss": 0.71875 + }, + { + "epoch": 0.36, + "learning_rate": 7.17311468406594e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 4294, + "text_loss": 0.640625 + }, + { + "epoch": 0.36, + "learning_rate": 7.171938846347374e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 4295, + "text_loss": 0.53515625 + }, + { + "epoch": 0.36, + "learning_rate": 7.170762860557235e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 4296, + "text_loss": 0.474609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.169586726775696e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 4297, + "text_loss": 0.404296875 + }, + { + "epoch": 0.36, + "learning_rate": 7.1684104450829384e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 4298, + "text_loss": 0.4921875 + }, + { + "epoch": 0.36, + "learning_rate": 7.167234015559158e-06, + "loss": 0.5411, + "regression_loss": 0.0, + "step": 4299, + "text_loss": 0.5234375 + }, + { + "epoch": 0.36, + "learning_rate": 7.166057438284555e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 4300, + "text_loss": 0.51171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.164880713339341e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 4301, + "text_loss": 0.515625 + }, + { + "epoch": 0.36, + "learning_rate": 7.163703840803744e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 4302, + "text_loss": 0.423828125 + }, + { + "epoch": 0.36, + "learning_rate": 7.162526820757994e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 4303, + "text_loss": 0.3125 + }, + { + "epoch": 0.36, + "learning_rate": 7.161349653282335e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 4304, + "text_loss": 0.498046875 + }, + { + "epoch": 0.36, + "learning_rate": 7.160172338457019e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 4305, + "text_loss": 0.3125 + }, + { + "epoch": 0.36, + "learning_rate": 7.158994876362308e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 4306, + "text_loss": 0.515625 + }, + { + "epoch": 0.36, + "learning_rate": 7.1578172670784805e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 4307, + "text_loss": 0.796875 + }, + { + "epoch": 0.36, + "learning_rate": 7.156639510685815e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 4308, + "text_loss": 0.61328125 + }, + { + "epoch": 0.36, + "learning_rate": 7.155461607264607e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 4309, + "text_loss": 0.39453125 + }, + { + "epoch": 0.36, + "learning_rate": 7.154283556895158e-06, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 4310, + "text_loss": 0.515625 + }, + { + "epoch": 0.36, + "learning_rate": 7.153105359657784e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 4311, + "text_loss": 0.451171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.151927015632808e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 4312, + "text_loss": 0.73828125 + }, + { + "epoch": 0.36, + "learning_rate": 7.150748524900561e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 4313, + "text_loss": 0.5390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.149569887541388e-06, + "loss": 0.719, + "regression_loss": 0.0, + "step": 4314, + "text_loss": 0.734375 + }, + { + "epoch": 0.36, + "learning_rate": 7.148391103635642e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 4315, + "text_loss": 0.474609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.147212173263686e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 4316, + "text_loss": 0.73046875 + }, + { + "epoch": 0.36, + "learning_rate": 7.146033096505895e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 4317, + "text_loss": 0.45703125 + }, + { + "epoch": 0.36, + "learning_rate": 7.144853873442652e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 4318, + "text_loss": 0.76171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.1436745041543495e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 4319, + "text_loss": 0.7109375 + }, + { + "epoch": 0.36, + "learning_rate": 7.14249498872139e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 4320, + "text_loss": 0.314453125 + }, + { + "epoch": 0.36, + "learning_rate": 7.14131532722419e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 4321, + "text_loss": 0.279296875 + }, + { + "epoch": 0.36, + "learning_rate": 7.1401355197431675e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 4322, + "text_loss": 0.515625 + }, + { + "epoch": 0.36, + "learning_rate": 7.1389555663587605e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 4323, + "text_loss": 0.51171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.137775467151411e-06, + "loss": 0.4043, + "regression_loss": 0.0, + "step": 4324, + "text_loss": 0.474609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.136595222201571e-06, + "loss": 0.6296, + "regression_loss": 0.0, + "step": 4325, + "text_loss": 0.8359375 + }, + { + "epoch": 0.36, + "learning_rate": 7.135414831589703e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 4326, + "text_loss": 0.609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.1342342953962825e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 4327, + "text_loss": 0.498046875 + }, + { + "epoch": 0.36, + "learning_rate": 7.13305361370179e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 4328, + "text_loss": 0.72265625 + }, + { + "epoch": 0.36, + "learning_rate": 7.13187278658672e-06, + "loss": 0.5248, + "regression_loss": 0.0, + "step": 4329, + "text_loss": 0.52734375 + }, + { + "epoch": 0.36, + "learning_rate": 7.130691814131572e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 4330, + "text_loss": 0.68359375 + }, + { + "epoch": 0.36, + "learning_rate": 7.129510696416861e-06, + "loss": 0.6062, + "regression_loss": 0.0, + "step": 4331, + "text_loss": 0.4921875 + }, + { + "epoch": 0.36, + "learning_rate": 7.128329433523113e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 4332, + "text_loss": 0.6484375 + }, + { + "epoch": 0.36, + "learning_rate": 7.1271480255308524e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 4333, + "text_loss": 0.42578125 + }, + { + "epoch": 0.36, + "learning_rate": 7.1259664725206275e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 4334, + "text_loss": 0.474609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.124784774572989e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 4335, + "text_loss": 0.6171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.123602931768498e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 4336, + "text_loss": 0.6953125 + }, + { + "epoch": 0.36, + "learning_rate": 7.122420944187729e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 4337, + "text_loss": 0.68359375 + }, + { + "epoch": 0.36, + "learning_rate": 7.121238811911261e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 4338, + "text_loss": 0.65625 + }, + { + "epoch": 0.36, + "learning_rate": 7.120056535019686e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 4339, + "text_loss": 0.6953125 + }, + { + "epoch": 0.36, + "learning_rate": 7.118874113593607e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 4340, + "text_loss": 0.25 + }, + { + "epoch": 0.36, + "learning_rate": 7.117691547713634e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 4341, + "text_loss": 0.4375 + }, + { + "epoch": 0.36, + "learning_rate": 7.11650883746039e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 4342, + "text_loss": 0.392578125 + }, + { + "epoch": 0.36, + "learning_rate": 7.115325982914502e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 4343, + "text_loss": 0.328125 + }, + { + "epoch": 0.36, + "learning_rate": 7.1141429841566175e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4344, + "text_loss": 0.62890625 + }, + { + "epoch": 0.36, + "learning_rate": 7.112959841267382e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 4345, + "text_loss": 0.50390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.111776554327458e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 4346, + "text_loss": 0.38671875 + }, + { + "epoch": 0.36, + "learning_rate": 7.110593123417515e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 4347, + "text_loss": 0.5546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.109409548618234e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 4348, + "text_loss": 0.306640625 + }, + { + "epoch": 0.36, + "learning_rate": 7.108225830010305e-06, + "loss": 0.6501, + "regression_loss": 0.0, + "step": 4349, + "text_loss": 0.6875 + }, + { + "epoch": 0.36, + "learning_rate": 7.107041967674427e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 4350, + "text_loss": 0.734375 + }, + { + "epoch": 0.36, + "learning_rate": 7.1058579616913105e-06, + "loss": 0.595, + "regression_loss": 0.0, + "step": 4351, + "text_loss": 0.7734375 + }, + { + "epoch": 0.36, + "learning_rate": 7.104673812141676e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 4352, + "text_loss": 0.8203125 + }, + { + "epoch": 0.36, + "learning_rate": 7.103489519106249e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 4353, + "text_loss": 0.357421875 + }, + { + "epoch": 0.36, + "learning_rate": 7.102305082665772e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 4354, + "text_loss": 0.4765625 + }, + { + "epoch": 0.36, + "learning_rate": 7.101120502900994e-06, + "loss": 0.426, + "regression_loss": 0.0, + "step": 4355, + "text_loss": 0.4296875 + }, + { + "epoch": 0.36, + "learning_rate": 7.099935779892669e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 4356, + "text_loss": 0.3203125 + }, + { + "epoch": 0.36, + "learning_rate": 7.09875091372157e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 4357, + "text_loss": 0.439453125 + }, + { + "epoch": 0.36, + "learning_rate": 7.097565904468474e-06, + "loss": 0.4054, + "regression_loss": 0.0, + "step": 4358, + "text_loss": 0.60546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.096380752214167e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 4359, + "text_loss": 0.357421875 + }, + { + "epoch": 0.36, + "learning_rate": 7.09519545703945e-06, + "loss": 0.7002, + "regression_loss": 0.0, + "step": 4360, + "text_loss": 0.234375 + }, + { + "epoch": 0.36, + "learning_rate": 7.094010019025126e-06, + "loss": 0.4618, + "regression_loss": 0.0, + "step": 4361, + "text_loss": 0.76171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.092824438252014e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 4362, + "text_loss": 0.291015625 + }, + { + "epoch": 0.36, + "learning_rate": 7.091638714800942e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 4363, + "text_loss": 0.54296875 + }, + { + "epoch": 0.36, + "learning_rate": 7.090452848752746e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 4364, + "text_loss": 0.6796875 + }, + { + "epoch": 0.36, + "learning_rate": 7.08926684018827e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 4365, + "text_loss": 0.59765625 + }, + { + "epoch": 0.36, + "learning_rate": 7.088080689188374e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 4366, + "text_loss": 0.50390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.086894395833918e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 4367, + "text_loss": 0.74609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.085707960205783e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 4368, + "text_loss": 0.58203125 + }, + { + "epoch": 0.36, + "learning_rate": 7.0845213823848515e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 4369, + "text_loss": 0.37109375 + }, + { + "epoch": 0.36, + "learning_rate": 7.083334662452017e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 4370, + "text_loss": 0.7265625 + }, + { + "epoch": 0.36, + "learning_rate": 7.0821478004881875e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 4371, + "text_loss": 0.53125 + }, + { + "epoch": 0.36, + "learning_rate": 7.080960796574273e-06, + "loss": 0.5133, + "regression_loss": 0.0, + "step": 4372, + "text_loss": 0.2236328125 + }, + { + "epoch": 0.36, + "learning_rate": 7.079773650791199e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 4373, + "text_loss": 0.451171875 + }, + { + "epoch": 0.36, + "learning_rate": 7.078586363219902e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 4374, + "text_loss": 0.5078125 + }, + { + "epoch": 0.36, + "learning_rate": 7.077398933941319e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 4375, + "text_loss": 0.8359375 + }, + { + "epoch": 0.36, + "learning_rate": 7.076211363036408e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 4376, + "text_loss": 0.54296875 + }, + { + "epoch": 0.36, + "learning_rate": 7.07502365058613e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 4377, + "text_loss": 0.703125 + }, + { + "epoch": 0.36, + "learning_rate": 7.0738357966714555e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 4378, + "text_loss": 0.546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.072647801373367e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 4379, + "text_loss": 0.275390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.0714596647728585e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 4380, + "text_loss": 0.7109375 + }, + { + "epoch": 0.36, + "learning_rate": 7.070271386950927e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 4381, + "text_loss": 0.49609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.069082967988585e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 4382, + "text_loss": 0.462890625 + }, + { + "epoch": 0.36, + "learning_rate": 7.0678944079668545e-06, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 4383, + "text_loss": 0.291015625 + }, + { + "epoch": 0.36, + "learning_rate": 7.0667057069667625e-06, + "loss": 0.6357, + "regression_loss": 0.0, + "step": 4384, + "text_loss": 0.765625 + }, + { + "epoch": 0.36, + "learning_rate": 7.065516865069349e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 4385, + "text_loss": 0.59375 + }, + { + "epoch": 0.36, + "learning_rate": 7.064327882355665e-06, + "loss": 0.6003, + "regression_loss": 0.0, + "step": 4386, + "text_loss": 0.435546875 + }, + { + "epoch": 0.36, + "learning_rate": 7.063138758906767e-06, + "loss": 0.433, + "regression_loss": 0.0, + "step": 4387, + "text_loss": 0.2099609375 + }, + { + "epoch": 0.36, + "learning_rate": 7.0619494948037244e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 4388, + "text_loss": 0.390625 + }, + { + "epoch": 0.36, + "learning_rate": 7.060760090127615e-06, + "loss": 0.6362, + "regression_loss": 0.0, + "step": 4389, + "text_loss": 0.5625 + }, + { + "epoch": 0.36, + "learning_rate": 7.0595705449595266e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 4390, + "text_loss": 0.5625 + }, + { + "epoch": 0.36, + "learning_rate": 7.058380859380556e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 4391, + "text_loss": 0.5234375 + }, + { + "epoch": 0.37, + "learning_rate": 7.057191033471808e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 4392, + "text_loss": 0.390625 + }, + { + "epoch": 0.37, + "learning_rate": 7.056001067314402e-06, + "loss": 0.6233, + "regression_loss": 0.0, + "step": 4393, + "text_loss": 0.8671875 + }, + { + "epoch": 0.37, + "learning_rate": 7.054810960989462e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 4394, + "text_loss": 0.4609375 + }, + { + "epoch": 0.37, + "learning_rate": 7.053620714578122e-06, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 4395, + "text_loss": 0.54296875 + }, + { + "epoch": 0.37, + "learning_rate": 7.05243032816153e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 4396, + "text_loss": 0.58203125 + }, + { + "epoch": 0.37, + "learning_rate": 7.051239801820839e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 4397, + "text_loss": 0.6875 + }, + { + "epoch": 0.37, + "learning_rate": 7.050049135637211e-06, + "loss": 0.5074, + "regression_loss": 0.0, + "step": 4398, + "text_loss": 0.3515625 + }, + { + "epoch": 0.37, + "learning_rate": 7.048858329691822e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 4399, + "text_loss": 0.421875 + }, + { + "epoch": 0.37, + "learning_rate": 7.047667384065855e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 4400, + "text_loss": 0.43359375 + }, + { + "epoch": 0.37, + "learning_rate": 7.0464762988405e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 4401, + "text_loss": 0.451171875 + }, + { + "epoch": 0.37, + "learning_rate": 7.045285074096962e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 4402, + "text_loss": 0.80859375 + }, + { + "epoch": 0.37, + "learning_rate": 7.044093709916451e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 4403, + "text_loss": 0.36328125 + }, + { + "epoch": 0.37, + "learning_rate": 7.042902206380188e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 4404, + "text_loss": 0.66796875 + }, + { + "epoch": 0.37, + "learning_rate": 7.041710563569404e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 4405, + "text_loss": 0.453125 + }, + { + "epoch": 0.37, + "learning_rate": 7.040518781565339e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 4406, + "text_loss": 0.57421875 + }, + { + "epoch": 0.37, + "learning_rate": 7.039326860449243e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 4407, + "text_loss": 0.294921875 + }, + { + "epoch": 0.37, + "learning_rate": 7.038134800302374e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 4408, + "text_loss": 0.55078125 + }, + { + "epoch": 0.37, + "learning_rate": 7.036942601206e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 4409, + "text_loss": 0.578125 + }, + { + "epoch": 0.37, + "learning_rate": 7.035750263241402e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 4410, + "text_loss": 0.58203125 + }, + { + "epoch": 0.37, + "learning_rate": 7.034557786489864e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 4411, + "text_loss": 0.330078125 + }, + { + "epoch": 0.37, + "learning_rate": 7.033365171032685e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 4412, + "text_loss": 0.6015625 + }, + { + "epoch": 0.37, + "learning_rate": 7.03217241695117e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 4413, + "text_loss": 0.4609375 + }, + { + "epoch": 0.37, + "learning_rate": 7.030979524326635e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 4414, + "text_loss": 0.70703125 + }, + { + "epoch": 0.37, + "learning_rate": 7.029786493240407e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 4415, + "text_loss": 0.294921875 + }, + { + "epoch": 0.37, + "learning_rate": 7.028593323773819e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 4416, + "text_loss": 0.74609375 + }, + { + "epoch": 0.37, + "learning_rate": 7.027400016008215e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 4417, + "text_loss": 0.65625 + }, + { + "epoch": 0.37, + "learning_rate": 7.026206570024949e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 4418, + "text_loss": 0.828125 + }, + { + "epoch": 0.37, + "learning_rate": 7.025012985905386e-06, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 4419, + "text_loss": 0.431640625 + }, + { + "epoch": 0.37, + "learning_rate": 7.023819263730896e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 4420, + "text_loss": 0.337890625 + }, + { + "epoch": 0.37, + "learning_rate": 7.022625403582859e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 4421, + "text_loss": 0.294921875 + }, + { + "epoch": 0.37, + "learning_rate": 7.021431405542672e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 4422, + "text_loss": 0.5859375 + }, + { + "epoch": 0.37, + "learning_rate": 7.02023726969173e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 4423, + "text_loss": 0.5546875 + }, + { + "epoch": 0.37, + "learning_rate": 7.019042996111445e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 4424, + "text_loss": 0.96484375 + }, + { + "epoch": 0.37, + "learning_rate": 7.017848584883238e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 4425, + "text_loss": 0.58984375 + }, + { + "epoch": 0.37, + "learning_rate": 7.016654036088535e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 4426, + "text_loss": 0.494140625 + }, + { + "epoch": 0.37, + "learning_rate": 7.0154593498087765e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 4427, + "text_loss": 0.91015625 + }, + { + "epoch": 0.37, + "learning_rate": 7.0142645261254085e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 4428, + "text_loss": 0.58203125 + }, + { + "epoch": 0.37, + "learning_rate": 7.013069565119888e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 4429, + "text_loss": 0.76953125 + }, + { + "epoch": 0.37, + "learning_rate": 7.011874466873684e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 4430, + "text_loss": 0.318359375 + }, + { + "epoch": 0.37, + "learning_rate": 7.010679231468267e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 4431, + "text_loss": 0.6328125 + }, + { + "epoch": 0.37, + "learning_rate": 7.0094838589851265e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 4432, + "text_loss": 0.65625 + }, + { + "epoch": 0.37, + "learning_rate": 7.008288349505755e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 4433, + "text_loss": 0.71875 + }, + { + "epoch": 0.37, + "learning_rate": 7.007092703111656e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 4434, + "text_loss": 0.7265625 + }, + { + "epoch": 0.37, + "learning_rate": 7.005896919884343e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 4435, + "text_loss": 0.76953125 + }, + { + "epoch": 0.37, + "learning_rate": 7.004700999905339e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 4436, + "text_loss": 0.6640625 + }, + { + "epoch": 0.37, + "learning_rate": 7.003504943256174e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 4437, + "text_loss": 0.87109375 + }, + { + "epoch": 0.37, + "learning_rate": 7.0023087500183896e-06, + "loss": 0.5157, + "regression_loss": 0.0, + "step": 4438, + "text_loss": 0.54296875 + }, + { + "epoch": 0.37, + "learning_rate": 7.001112420273537e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 4439, + "text_loss": 0.625 + }, + { + "epoch": 0.37, + "learning_rate": 6.999915954103174e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 4440, + "text_loss": 0.66796875 + }, + { + "epoch": 0.37, + "learning_rate": 6.998719351588871e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 4441, + "text_loss": 0.462890625 + }, + { + "epoch": 0.37, + "learning_rate": 6.997522612812205e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 4442, + "text_loss": 0.58203125 + }, + { + "epoch": 0.37, + "learning_rate": 6.996325737854765e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 4443, + "text_loss": 0.80078125 + }, + { + "epoch": 0.37, + "learning_rate": 6.9951287267981485e-06, + "loss": 0.4547, + "regression_loss": 0.0, + "step": 4444, + "text_loss": 0.39453125 + }, + { + "epoch": 0.37, + "learning_rate": 6.993931579723957e-06, + "loss": 0.6414, + "regression_loss": 0.0, + "step": 4445, + "text_loss": 0.76171875 + }, + { + "epoch": 0.37, + "learning_rate": 6.99273429671381e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 4446, + "text_loss": 0.6640625 + }, + { + "epoch": 0.37, + "learning_rate": 6.991536877849332e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 4447, + "text_loss": 0.46875 + }, + { + "epoch": 0.37, + "learning_rate": 6.990339323212154e-06, + "loss": 0.7273, + "regression_loss": 0.0, + "step": 4448, + "text_loss": 0.4609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.989141632883922e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 4449, + "text_loss": 0.49609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.987943806946287e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 4450, + "text_loss": 0.32421875 + }, + { + "epoch": 0.37, + "learning_rate": 6.986745845480909e-06, + "loss": 0.5228, + "regression_loss": 0.0, + "step": 4451, + "text_loss": 0.56640625 + }, + { + "epoch": 0.37, + "learning_rate": 6.985547748569463e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 4452, + "text_loss": 0.51953125 + }, + { + "epoch": 0.37, + "learning_rate": 6.984349516293625e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 4453, + "text_loss": 0.703125 + }, + { + "epoch": 0.37, + "learning_rate": 6.983151148735085e-06, + "loss": 0.4192, + "regression_loss": 0.0, + "step": 4454, + "text_loss": 0.28515625 + }, + { + "epoch": 0.37, + "learning_rate": 6.981952645975545e-06, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 4455, + "text_loss": 0.890625 + }, + { + "epoch": 0.37, + "learning_rate": 6.9807540080967085e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 4456, + "text_loss": 0.60546875 + }, + { + "epoch": 0.37, + "learning_rate": 6.979555235180293e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 4457, + "text_loss": 0.37890625 + }, + { + "epoch": 0.37, + "learning_rate": 6.978356327308028e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 4458, + "text_loss": 0.3515625 + }, + { + "epoch": 0.37, + "learning_rate": 6.977157284561645e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 4459, + "text_loss": 0.78515625 + }, + { + "epoch": 0.37, + "learning_rate": 6.975958107022891e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 4460, + "text_loss": 0.65234375 + }, + { + "epoch": 0.37, + "learning_rate": 6.974758794773519e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 4461, + "text_loss": 0.6328125 + }, + { + "epoch": 0.37, + "learning_rate": 6.97355934789529e-06, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 4462, + "text_loss": 0.38671875 + }, + { + "epoch": 0.37, + "learning_rate": 6.972359766469978e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 4463, + "text_loss": 0.455078125 + }, + { + "epoch": 0.37, + "learning_rate": 6.971160050579366e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 4464, + "text_loss": 0.5546875 + }, + { + "epoch": 0.37, + "learning_rate": 6.969960200305242e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 4465, + "text_loss": 0.75390625 + }, + { + "epoch": 0.37, + "learning_rate": 6.968760215729406e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 4466, + "text_loss": 0.59375 + }, + { + "epoch": 0.37, + "learning_rate": 6.967560096933665e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 4467, + "text_loss": 0.412109375 + }, + { + "epoch": 0.37, + "learning_rate": 6.96635984399984e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 4468, + "text_loss": 0.56640625 + }, + { + "epoch": 0.37, + "learning_rate": 6.965159457009758e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 4469, + "text_loss": 0.7578125 + }, + { + "epoch": 0.37, + "learning_rate": 6.963958936045252e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 4470, + "text_loss": 0.2177734375 + }, + { + "epoch": 0.37, + "learning_rate": 6.96275828118817e-06, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 4471, + "text_loss": 0.52734375 + }, + { + "epoch": 0.37, + "learning_rate": 6.961557492520367e-06, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 4472, + "text_loss": 0.5703125 + }, + { + "epoch": 0.37, + "learning_rate": 6.960356570123704e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 4473, + "text_loss": 0.74609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.959155514080055e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 4474, + "text_loss": 0.64453125 + }, + { + "epoch": 0.37, + "learning_rate": 6.9579543244713036e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 4475, + "text_loss": 0.51171875 + }, + { + "epoch": 0.37, + "learning_rate": 6.956753001379336e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 4476, + "text_loss": 0.44921875 + }, + { + "epoch": 0.37, + "learning_rate": 6.955551544886057e-06, + "loss": 0.4943, + "regression_loss": 0.0, + "step": 4477, + "text_loss": 0.62109375 + }, + { + "epoch": 0.37, + "learning_rate": 6.954349955073374e-06, + "loss": 0.679, + "regression_loss": 0.0, + "step": 4478, + "text_loss": 0.90234375 + }, + { + "epoch": 0.37, + "learning_rate": 6.953148232023204e-06, + "loss": 0.6005, + "regression_loss": 0.0, + "step": 4479, + "text_loss": 0.66796875 + }, + { + "epoch": 0.37, + "learning_rate": 6.9519463758174745e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 4480, + "text_loss": 0.71484375 + }, + { + "epoch": 0.37, + "learning_rate": 6.950744386538124e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 4481, + "text_loss": 0.3203125 + }, + { + "epoch": 0.37, + "learning_rate": 6.949542264267096e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 4482, + "text_loss": 0.37890625 + }, + { + "epoch": 0.37, + "learning_rate": 6.948340009086347e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 4483, + "text_loss": 0.5703125 + }, + { + "epoch": 0.37, + "learning_rate": 6.947137621077837e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 4484, + "text_loss": 0.54296875 + }, + { + "epoch": 0.37, + "learning_rate": 6.945935100323542e-06, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 4485, + "text_loss": 0.490234375 + }, + { + "epoch": 0.37, + "learning_rate": 6.944732446905443e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 4486, + "text_loss": 0.88671875 + }, + { + "epoch": 0.37, + "learning_rate": 6.943529660905528e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 4487, + "text_loss": 0.6015625 + }, + { + "epoch": 0.37, + "learning_rate": 6.942326742405799e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4488, + "text_loss": 0.34375 + }, + { + "epoch": 0.37, + "learning_rate": 6.941123691488265e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 4489, + "text_loss": 0.224609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.939920508234943e-06, + "loss": 0.5857, + "regression_loss": 0.0, + "step": 4490, + "text_loss": 0.59765625 + }, + { + "epoch": 0.37, + "learning_rate": 6.938717192727862e-06, + "loss": 0.5962, + "regression_loss": 0.0, + "step": 4491, + "text_loss": 0.61328125 + }, + { + "epoch": 0.37, + "learning_rate": 6.9375137450490545e-06, + "loss": 0.472, + "regression_loss": 0.0, + "step": 4492, + "text_loss": 0.75 + }, + { + "epoch": 0.37, + "learning_rate": 6.9363101652805664e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 4493, + "text_loss": 0.74609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.935106453504454e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 4494, + "text_loss": 0.62109375 + }, + { + "epoch": 0.37, + "learning_rate": 6.933902609802777e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 4495, + "text_loss": 0.396484375 + }, + { + "epoch": 0.37, + "learning_rate": 6.932698634257608e-06, + "loss": 0.6252, + "regression_loss": 0.0, + "step": 4496, + "text_loss": 0.51171875 + }, + { + "epoch": 0.37, + "learning_rate": 6.931494526951028e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 4497, + "text_loss": 0.7421875 + }, + { + "epoch": 0.37, + "learning_rate": 6.930290287965127e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 4498, + "text_loss": 0.6640625 + }, + { + "epoch": 0.37, + "learning_rate": 6.929085917382005e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 4499, + "text_loss": 0.4921875 + }, + { + "epoch": 0.37, + "learning_rate": 6.927881415283768e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 4500, + "text_loss": 0.400390625 + }, + { + "epoch": 0.37, + "learning_rate": 6.9266767817525325e-06, + "loss": 0.644, + "regression_loss": 0.0, + "step": 4501, + "text_loss": 0.65234375 + }, + { + "epoch": 0.37, + "learning_rate": 6.925472016870426e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 4502, + "text_loss": 0.73046875 + }, + { + "epoch": 0.37, + "learning_rate": 6.924267120719583e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 4503, + "text_loss": 0.81640625 + }, + { + "epoch": 0.37, + "learning_rate": 6.923062093382144e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 4504, + "text_loss": 0.828125 + }, + { + "epoch": 0.37, + "learning_rate": 6.921856934940266e-06, + "loss": 0.6765, + "regression_loss": 0.0, + "step": 4505, + "text_loss": 0.74609375 + }, + { + "epoch": 0.37, + "learning_rate": 6.920651645476107e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 4506, + "text_loss": 0.51171875 + }, + { + "epoch": 0.37, + "learning_rate": 6.91944622507184e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 4507, + "text_loss": 0.37109375 + }, + { + "epoch": 0.37, + "learning_rate": 6.918240673809642e-06, + "loss": 0.4473, + "regression_loss": 0.0, + "step": 4508, + "text_loss": 0.248046875 + }, + { + "epoch": 0.37, + "learning_rate": 6.9170349917717025e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 4509, + "text_loss": 0.26953125 + }, + { + "epoch": 0.37, + "learning_rate": 6.915829179040218e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 4510, + "text_loss": 0.73046875 + }, + { + "epoch": 0.37, + "learning_rate": 6.9146232356973965e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 4511, + "text_loss": 0.5390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.913417161825449e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 4512, + "text_loss": 0.30078125 + }, + { + "epoch": 0.38, + "learning_rate": 6.912210957506604e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 4513, + "text_loss": 0.71875 + }, + { + "epoch": 0.38, + "learning_rate": 6.911004622823092e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 4514, + "text_loss": 0.6484375 + }, + { + "epoch": 0.38, + "learning_rate": 6.909798157857155e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 4515, + "text_loss": 0.6328125 + }, + { + "epoch": 0.38, + "learning_rate": 6.9085915626910416e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 4516, + "text_loss": 0.44921875 + }, + { + "epoch": 0.38, + "learning_rate": 6.9073848374070155e-06, + "loss": 0.5311, + "regression_loss": 0.0, + "step": 4517, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.38, + "learning_rate": 6.906177982087341e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 4518, + "text_loss": 0.453125 + }, + { + "epoch": 0.38, + "learning_rate": 6.904970996814298e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 4519, + "text_loss": 0.43359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.90376388167017e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 4520, + "text_loss": 0.32421875 + }, + { + "epoch": 0.38, + "learning_rate": 6.9025566367372545e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 4521, + "text_loss": 0.78515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.901349262097854e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 4522, + "text_loss": 0.6171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.900141757834279e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 4523, + "text_loss": 0.419921875 + }, + { + "epoch": 0.38, + "learning_rate": 6.898934124028854e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 4524, + "text_loss": 0.341796875 + }, + { + "epoch": 0.38, + "learning_rate": 6.8977263607639085e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 4525, + "text_loss": 0.318359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.896518468121781e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 4526, + "text_loss": 0.373046875 + }, + { + "epoch": 0.38, + "learning_rate": 6.89531044618482e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 4527, + "text_loss": 0.37109375 + }, + { + "epoch": 0.38, + "learning_rate": 6.8941022950353816e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 4528, + "text_loss": 0.35546875 + }, + { + "epoch": 0.38, + "learning_rate": 6.8928940147558306e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 4529, + "text_loss": 0.44921875 + }, + { + "epoch": 0.38, + "learning_rate": 6.891685605428543e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 4530, + "text_loss": 0.419921875 + }, + { + "epoch": 0.38, + "learning_rate": 6.8904770671359025e-06, + "loss": 0.6575, + "regression_loss": 0.0, + "step": 4531, + "text_loss": 0.578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.889268399960297e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 4532, + "text_loss": 0.6796875 + }, + { + "epoch": 0.38, + "learning_rate": 6.888059603984131e-06, + "loss": 0.6062, + "regression_loss": 0.0, + "step": 4533, + "text_loss": 0.41796875 + }, + { + "epoch": 0.38, + "learning_rate": 6.886850679289814e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 4534, + "text_loss": 0.7265625 + }, + { + "epoch": 0.38, + "learning_rate": 6.885641625959761e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 4535, + "text_loss": 0.6953125 + }, + { + "epoch": 0.38, + "learning_rate": 6.884432444076401e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 4536, + "text_loss": 0.5390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.88322313372217e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 4537, + "text_loss": 0.62109375 + }, + { + "epoch": 0.38, + "learning_rate": 6.8820136949795125e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 4538, + "text_loss": 0.60546875 + }, + { + "epoch": 0.38, + "learning_rate": 6.880804127930882e-06, + "loss": 0.6167, + "regression_loss": 0.0, + "step": 4539, + "text_loss": 0.64453125 + }, + { + "epoch": 0.38, + "learning_rate": 6.87959443265874e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 4540, + "text_loss": 0.388671875 + }, + { + "epoch": 0.38, + "learning_rate": 6.878384609245558e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 4541, + "text_loss": 0.57421875 + }, + { + "epoch": 0.38, + "learning_rate": 6.877174657773815e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 4542, + "text_loss": 0.310546875 + }, + { + "epoch": 0.38, + "learning_rate": 6.8759645783259975e-06, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 4543, + "text_loss": 0.33203125 + }, + { + "epoch": 0.38, + "learning_rate": 6.8747543709846064e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 4544, + "text_loss": 0.359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.873544035832144e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 4545, + "text_loss": 0.4375 + }, + { + "epoch": 0.38, + "learning_rate": 6.872333572951125e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 4546, + "text_loss": 0.5078125 + }, + { + "epoch": 0.38, + "learning_rate": 6.8711229824240746e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 4547, + "text_loss": 0.66015625 + }, + { + "epoch": 0.38, + "learning_rate": 6.869912264333522e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 4548, + "text_loss": 0.29296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.86870141876201e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 4549, + "text_loss": 0.9296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.8674904457920875e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 4550, + "text_loss": 1.0 + }, + { + "epoch": 0.38, + "learning_rate": 6.86627934550631e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 4551, + "text_loss": 0.515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.8650681179872465e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 4552, + "text_loss": 0.65234375 + }, + { + "epoch": 0.38, + "learning_rate": 6.8638567633174735e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 4553, + "text_loss": 0.62890625 + }, + { + "epoch": 0.38, + "learning_rate": 6.862645281579571e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 4554, + "text_loss": 0.47265625 + }, + { + "epoch": 0.38, + "learning_rate": 6.861433672856133e-06, + "loss": 0.6138, + "regression_loss": 0.0, + "step": 4555, + "text_loss": 0.7578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.860221937229763e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 4556, + "text_loss": 0.88671875 + }, + { + "epoch": 0.38, + "learning_rate": 6.859010074783068e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 4557, + "text_loss": 0.5625 + }, + { + "epoch": 0.38, + "learning_rate": 6.8577980855986655e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 4558, + "text_loss": 0.45703125 + }, + { + "epoch": 0.38, + "learning_rate": 6.856585969759189e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 4559, + "text_loss": 0.56640625 + }, + { + "epoch": 0.38, + "learning_rate": 6.855373727347268e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 4560, + "text_loss": 0.42578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.854161358445549e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 4561, + "text_loss": 0.37890625 + }, + { + "epoch": 0.38, + "learning_rate": 6.852948863136685e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 4562, + "text_loss": 0.73828125 + }, + { + "epoch": 0.38, + "learning_rate": 6.851736241503337e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 4563, + "text_loss": 0.61328125 + }, + { + "epoch": 0.38, + "learning_rate": 6.8505234936281786e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 4564, + "text_loss": 0.5390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.849310619593883e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 4565, + "text_loss": 0.279296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.848097619483141e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 4566, + "text_loss": 0.71875 + }, + { + "epoch": 0.38, + "learning_rate": 6.846884493378652e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 4567, + "text_loss": 0.4296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.845671241363113e-06, + "loss": 0.6235, + "regression_loss": 0.0, + "step": 4568, + "text_loss": 0.4765625 + }, + { + "epoch": 0.38, + "learning_rate": 6.844457863519243e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 4569, + "text_loss": 0.78515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.843244359929764e-06, + "loss": 0.595, + "regression_loss": 0.0, + "step": 4570, + "text_loss": 0.80078125 + }, + { + "epoch": 0.38, + "learning_rate": 6.842030730677403e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 4571, + "text_loss": 0.515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.840816975844899e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 4572, + "text_loss": 0.44140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.839603095515004e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 4573, + "text_loss": 0.45703125 + }, + { + "epoch": 0.38, + "learning_rate": 6.83838908977047e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 4574, + "text_loss": 0.412109375 + }, + { + "epoch": 0.38, + "learning_rate": 6.837174958694062e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 4575, + "text_loss": 0.4765625 + }, + { + "epoch": 0.38, + "learning_rate": 6.8359607023685544e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 4576, + "text_loss": 0.9296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.834746320876729e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 4577, + "text_loss": 0.388671875 + }, + { + "epoch": 0.38, + "learning_rate": 6.833531814301377e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 4578, + "text_loss": 0.478515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.8323171827252945e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 4579, + "text_loss": 0.78515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.83110242623129e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 4580, + "text_loss": 0.609375 + }, + { + "epoch": 0.38, + "learning_rate": 6.829887544902179e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 4581, + "text_loss": 0.53515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.828672538820787e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 4582, + "text_loss": 0.380859375 + }, + { + "epoch": 0.38, + "learning_rate": 6.827457408069946e-06, + "loss": 0.6047, + "regression_loss": 0.0, + "step": 4583, + "text_loss": 1.2109375 + }, + { + "epoch": 0.38, + "learning_rate": 6.8262421527324976e-06, + "loss": 0.6582, + "regression_loss": 0.0, + "step": 4584, + "text_loss": 0.68359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.825026772891291e-06, + "loss": 0.4347, + "regression_loss": 0.0, + "step": 4585, + "text_loss": 0.65625 + }, + { + "epoch": 0.38, + "learning_rate": 6.823811268629186e-06, + "loss": 0.6121, + "regression_loss": 0.0, + "step": 4586, + "text_loss": 0.93359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.822595640029047e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 4587, + "text_loss": 0.671875 + }, + { + "epoch": 0.38, + "learning_rate": 6.821379887173751e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 4588, + "text_loss": 0.427734375 + }, + { + "epoch": 0.38, + "learning_rate": 6.820164010146184e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 4589, + "text_loss": 0.5390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.818948009029233e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 4590, + "text_loss": 0.30078125 + }, + { + "epoch": 0.38, + "learning_rate": 6.8177318839058015e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 4591, + "text_loss": 0.392578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.8165156348588e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 4592, + "text_loss": 0.6171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.815299261971143e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 4593, + "text_loss": 0.451171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.814082765325758e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 4594, + "text_loss": 0.451171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.812866145005581e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 4595, + "text_loss": 0.494140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.811649401093553e-06, + "loss": 0.6843, + "regression_loss": 0.0, + "step": 4596, + "text_loss": 0.79296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.810432533672625e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 4597, + "text_loss": 0.58203125 + }, + { + "epoch": 0.38, + "learning_rate": 6.809215542825759e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 4598, + "text_loss": 0.62109375 + }, + { + "epoch": 0.38, + "learning_rate": 6.80799842863592e-06, + "loss": 0.6367, + "regression_loss": 0.0, + "step": 4599, + "text_loss": 0.734375 + }, + { + "epoch": 0.38, + "learning_rate": 6.806781191186087e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 4600, + "text_loss": 0.57421875 + }, + { + "epoch": 0.38, + "learning_rate": 6.805563830559243e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 4601, + "text_loss": 0.267578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.804346346838384e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 4602, + "text_loss": 0.64453125 + }, + { + "epoch": 0.38, + "learning_rate": 6.803128740106511e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 4603, + "text_loss": 0.36328125 + }, + { + "epoch": 0.38, + "learning_rate": 6.801911010446631e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 4604, + "text_loss": 0.4140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.8006931579417656e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 4605, + "text_loss": 0.44140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.799475182674942e-06, + "loss": 0.5355, + "regression_loss": 0.0, + "step": 4606, + "text_loss": 0.5234375 + }, + { + "epoch": 0.38, + "learning_rate": 6.798257084729194e-06, + "loss": 0.6223, + "regression_loss": 0.0, + "step": 4607, + "text_loss": 0.73046875 + }, + { + "epoch": 0.38, + "learning_rate": 6.797038864187564e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 4608, + "text_loss": 0.490234375 + }, + { + "epoch": 0.38, + "learning_rate": 6.795820521133108e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 4609, + "text_loss": 0.56640625 + }, + { + "epoch": 0.38, + "learning_rate": 6.794602055648884e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 4610, + "text_loss": 0.76171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.793383467817959e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 4611, + "text_loss": 0.3125 + }, + { + "epoch": 0.38, + "learning_rate": 6.792164757723414e-06, + "loss": 0.6077, + "regression_loss": 0.0, + "step": 4612, + "text_loss": 0.6640625 + }, + { + "epoch": 0.38, + "learning_rate": 6.79094592544833e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 4613, + "text_loss": 0.5 + }, + { + "epoch": 0.38, + "learning_rate": 6.789726971075803e-06, + "loss": 0.4213, + "regression_loss": 0.0, + "step": 4614, + "text_loss": 0.54296875 + }, + { + "epoch": 0.38, + "learning_rate": 6.788507894688936e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 4615, + "text_loss": 0.67578125 + }, + { + "epoch": 0.38, + "learning_rate": 6.787288696370838e-06, + "loss": 0.494, + "regression_loss": 0.0, + "step": 4616, + "text_loss": 0.55859375 + }, + { + "epoch": 0.38, + "learning_rate": 6.786069376204627e-06, + "loss": 0.5255, + "regression_loss": 0.0, + "step": 4617, + "text_loss": 0.61328125 + }, + { + "epoch": 0.38, + "learning_rate": 6.784849934273431e-06, + "loss": 0.4628, + "regression_loss": 0.0, + "step": 4618, + "text_loss": 0.400390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.7836303706603846e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 4619, + "text_loss": 0.6171875 + }, + { + "epoch": 0.38, + "learning_rate": 6.782410685448633e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 4620, + "text_loss": 0.4140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.781190878721325e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 4621, + "text_loss": 0.69140625 + }, + { + "epoch": 0.38, + "learning_rate": 6.779970950561624e-06, + "loss": 0.6284, + "regression_loss": 0.0, + "step": 4622, + "text_loss": 0.4921875 + }, + { + "epoch": 0.38, + "learning_rate": 6.778750901052696e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 4623, + "text_loss": 0.70703125 + }, + { + "epoch": 0.38, + "learning_rate": 6.7775307302777194e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 4624, + "text_loss": 0.5859375 + }, + { + "epoch": 0.38, + "learning_rate": 6.7763104383198775e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 4625, + "text_loss": 0.74609375 + }, + { + "epoch": 0.38, + "learning_rate": 6.775090025262365e-06, + "loss": 0.4669, + "regression_loss": 0.0, + "step": 4626, + "text_loss": 0.75390625 + }, + { + "epoch": 0.38, + "learning_rate": 6.773869491188382e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 4627, + "text_loss": 0.34375 + }, + { + "epoch": 0.38, + "learning_rate": 6.772648836181138e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 4628, + "text_loss": 0.5703125 + }, + { + "epoch": 0.38, + "learning_rate": 6.7714280603238535e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 4629, + "text_loss": 0.5234375 + }, + { + "epoch": 0.38, + "learning_rate": 6.7702071636997515e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 4630, + "text_loss": 0.515625 + }, + { + "epoch": 0.38, + "learning_rate": 6.768986146392069e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 4631, + "text_loss": 0.443359375 + }, + { + "epoch": 0.38, + "learning_rate": 6.767765008484047e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 4632, + "text_loss": 0.57421875 + }, + { + "epoch": 0.39, + "learning_rate": 6.766543750058938e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 4633, + "text_loss": 0.306640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.765322371200001e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 4634, + "text_loss": 0.478515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.7641008719905e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 4635, + "text_loss": 0.546875 + }, + { + "epoch": 0.39, + "learning_rate": 6.7628792525137126e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 4636, + "text_loss": 0.46484375 + }, + { + "epoch": 0.39, + "learning_rate": 6.761657512852925e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 4637, + "text_loss": 0.8203125 + }, + { + "epoch": 0.39, + "learning_rate": 6.760435653091425e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 4638, + "text_loss": 0.70703125 + }, + { + "epoch": 0.39, + "learning_rate": 6.759213673312515e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 4639, + "text_loss": 0.9375 + }, + { + "epoch": 0.39, + "learning_rate": 6.757991573599504e-06, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 4640, + "text_loss": 0.3984375 + }, + { + "epoch": 0.39, + "learning_rate": 6.756769354035706e-06, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 4641, + "text_loss": 0.60546875 + }, + { + "epoch": 0.39, + "learning_rate": 6.755547014704449e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 4642, + "text_loss": 0.6484375 + }, + { + "epoch": 0.39, + "learning_rate": 6.754324555689061e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 4643, + "text_loss": 0.45703125 + }, + { + "epoch": 0.39, + "learning_rate": 6.753101977072887e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 4644, + "text_loss": 0.421875 + }, + { + "epoch": 0.39, + "learning_rate": 6.751879278939275e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 4645, + "text_loss": 0.59375 + }, + { + "epoch": 0.39, + "learning_rate": 6.750656461371581e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 4646, + "text_loss": 0.734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.7494335244531704e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 4647, + "text_loss": 0.78515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.74821046826742e-06, + "loss": 0.5353, + "regression_loss": 0.0, + "step": 4648, + "text_loss": 0.64453125 + }, + { + "epoch": 0.39, + "learning_rate": 6.746987292897708e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 4649, + "text_loss": 0.53515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.745763998427426e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 4650, + "text_loss": 0.5390625 + }, + { + "epoch": 0.39, + "learning_rate": 6.7445405849399705e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 4651, + "text_loss": 0.72265625 + }, + { + "epoch": 0.39, + "learning_rate": 6.743317052518748e-06, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 4652, + "text_loss": 0.48046875 + }, + { + "epoch": 0.39, + "learning_rate": 6.742093401247173e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 4653, + "text_loss": 0.49609375 + }, + { + "epoch": 0.39, + "learning_rate": 6.740869631208669e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 4654, + "text_loss": 0.81640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.739645742486663e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 4655, + "text_loss": 0.671875 + }, + { + "epoch": 0.39, + "learning_rate": 6.738421735164595e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 4656, + "text_loss": 0.578125 + }, + { + "epoch": 0.39, + "learning_rate": 6.737197609325914e-06, + "loss": 0.4456, + "regression_loss": 0.0, + "step": 4657, + "text_loss": 0.28515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.735973365054071e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 4658, + "text_loss": 0.52734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.734749002432531e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 4659, + "text_loss": 0.318359375 + }, + { + "epoch": 0.39, + "learning_rate": 6.733524521544763e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 4660, + "text_loss": 0.52734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.7322999224742455e-06, + "loss": 0.5719, + "regression_loss": 0.0, + "step": 4661, + "text_loss": 0.423828125 + }, + { + "epoch": 0.39, + "learning_rate": 6.731075205304468e-06, + "loss": 0.5028, + "regression_loss": 0.0, + "step": 4662, + "text_loss": 0.58203125 + }, + { + "epoch": 0.39, + "learning_rate": 6.729850370118923e-06, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 4663, + "text_loss": 0.26953125 + }, + { + "epoch": 0.39, + "learning_rate": 6.7286254170011134e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 4664, + "text_loss": 0.5859375 + }, + { + "epoch": 0.39, + "learning_rate": 6.7274003460345525e-06, + "loss": 0.4473, + "regression_loss": 0.0, + "step": 4665, + "text_loss": 0.322265625 + }, + { + "epoch": 0.39, + "learning_rate": 6.726175157302756e-06, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 4666, + "text_loss": 0.25390625 + }, + { + "epoch": 0.39, + "learning_rate": 6.724949850889255e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 4667, + "text_loss": 0.6015625 + }, + { + "epoch": 0.39, + "learning_rate": 6.723724426877581e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 4668, + "text_loss": 0.478515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.722498885351279e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 4669, + "text_loss": 0.77734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.721273226393899e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 4670, + "text_loss": 0.60546875 + }, + { + "epoch": 0.39, + "learning_rate": 6.720047450089e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 4671, + "text_loss": 0.353515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.718821556520151e-06, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 4672, + "text_loss": 0.419921875 + }, + { + "epoch": 0.39, + "learning_rate": 6.7175955457709254e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 4673, + "text_loss": 0.6171875 + }, + { + "epoch": 0.39, + "learning_rate": 6.716369417924906e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 4674, + "text_loss": 0.375 + }, + { + "epoch": 0.39, + "learning_rate": 6.715143173065683e-06, + "loss": 0.6143, + "regression_loss": 0.0, + "step": 4675, + "text_loss": 0.53125 + }, + { + "epoch": 0.39, + "learning_rate": 6.713916811276861e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 4676, + "text_loss": 0.33203125 + }, + { + "epoch": 0.39, + "learning_rate": 6.712690332642038e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 4677, + "text_loss": 0.52734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.711463737244837e-06, + "loss": 0.5651, + "regression_loss": 0.0, + "step": 4678, + "text_loss": 0.384765625 + }, + { + "epoch": 0.39, + "learning_rate": 6.710237025168878e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 4679, + "text_loss": 0.77734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.709010196497789e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 4680, + "text_loss": 0.58203125 + }, + { + "epoch": 0.39, + "learning_rate": 6.7077832513152156e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 4681, + "text_loss": 0.28515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.706556189704798e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 4682, + "text_loss": 0.5703125 + }, + { + "epoch": 0.39, + "learning_rate": 6.705329011750194e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 4683, + "text_loss": 0.3828125 + }, + { + "epoch": 0.39, + "learning_rate": 6.7041017175350666e-06, + "loss": 0.4706, + "regression_loss": 0.0, + "step": 4684, + "text_loss": 0.431640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.702874307143085e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 4685, + "text_loss": 0.251953125 + }, + { + "epoch": 0.39, + "learning_rate": 6.701646780657926e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 4686, + "text_loss": 0.74609375 + }, + { + "epoch": 0.39, + "learning_rate": 6.7004191381632826e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 4687, + "text_loss": 0.515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.699191379742842e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 4688, + "text_loss": 0.4765625 + }, + { + "epoch": 0.39, + "learning_rate": 6.697963505480309e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 4689, + "text_loss": 0.421875 + }, + { + "epoch": 0.39, + "learning_rate": 6.696735515459395e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 4690, + "text_loss": 0.6484375 + }, + { + "epoch": 0.39, + "learning_rate": 6.695507409763816e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 4691, + "text_loss": 0.6015625 + }, + { + "epoch": 0.39, + "learning_rate": 6.6942791884773e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 4692, + "text_loss": 0.359375 + }, + { + "epoch": 0.39, + "learning_rate": 6.693050851683579e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 4693, + "text_loss": 0.33203125 + }, + { + "epoch": 0.39, + "learning_rate": 6.6918223994663955e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 4694, + "text_loss": 0.9609375 + }, + { + "epoch": 0.39, + "learning_rate": 6.690593831909498e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 4695, + "text_loss": 0.62109375 + }, + { + "epoch": 0.39, + "learning_rate": 6.689365149096647e-06, + "loss": 0.4652, + "regression_loss": 0.0, + "step": 4696, + "text_loss": 0.703125 + }, + { + "epoch": 0.39, + "learning_rate": 6.688136351111602e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 4697, + "text_loss": 0.66015625 + }, + { + "epoch": 0.39, + "learning_rate": 6.686907438038144e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 4698, + "text_loss": 0.6953125 + }, + { + "epoch": 0.39, + "learning_rate": 6.685678409960046e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 4699, + "text_loss": 0.578125 + }, + { + "epoch": 0.39, + "learning_rate": 6.684449266961101e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 4700, + "text_loss": 0.49609375 + }, + { + "epoch": 0.39, + "learning_rate": 6.683220009125106e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 4701, + "text_loss": 0.57421875 + }, + { + "epoch": 0.39, + "learning_rate": 6.681990636535862e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 4702, + "text_loss": 0.73046875 + }, + { + "epoch": 0.39, + "learning_rate": 6.6807611492771865e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 4703, + "text_loss": 0.48046875 + }, + { + "epoch": 0.39, + "learning_rate": 6.679531547432896e-06, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 4704, + "text_loss": 0.369140625 + }, + { + "epoch": 0.39, + "learning_rate": 6.678301831086817e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 4705, + "text_loss": 0.6484375 + }, + { + "epoch": 0.39, + "learning_rate": 6.677072000322788e-06, + "loss": 0.4847, + "regression_loss": 0.0, + "step": 4706, + "text_loss": 0.65234375 + }, + { + "epoch": 0.39, + "learning_rate": 6.675842055224655e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 4707, + "text_loss": 0.546875 + }, + { + "epoch": 0.39, + "learning_rate": 6.674611995876263e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 4708, + "text_loss": 0.73828125 + }, + { + "epoch": 0.39, + "learning_rate": 6.673381822361475e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 4709, + "text_loss": 0.365234375 + }, + { + "epoch": 0.39, + "learning_rate": 6.6721515347641575e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 4710, + "text_loss": 0.6015625 + }, + { + "epoch": 0.39, + "learning_rate": 6.670921133168185e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 4711, + "text_loss": 0.51953125 + }, + { + "epoch": 0.39, + "learning_rate": 6.669690617657438e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 4712, + "text_loss": 0.5859375 + }, + { + "epoch": 0.39, + "learning_rate": 6.668459988315811e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 4713, + "text_loss": 0.640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.667229245227198e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 4714, + "text_loss": 0.396484375 + }, + { + "epoch": 0.39, + "learning_rate": 6.665998388475506e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 4715, + "text_loss": 0.70703125 + }, + { + "epoch": 0.39, + "learning_rate": 6.664767418144649e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 4716, + "text_loss": 0.439453125 + }, + { + "epoch": 0.39, + "learning_rate": 6.663536334318547e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 4717, + "text_loss": 0.361328125 + }, + { + "epoch": 0.39, + "learning_rate": 6.6623051370811306e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 4718, + "text_loss": 0.5 + }, + { + "epoch": 0.39, + "learning_rate": 6.6610738265163345e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 4719, + "text_loss": 0.52734375 + }, + { + "epoch": 0.39, + "learning_rate": 6.659842402708105e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 4720, + "text_loss": 0.41015625 + }, + { + "epoch": 0.39, + "learning_rate": 6.658610865740393e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 4721, + "text_loss": 0.353515625 + }, + { + "epoch": 0.39, + "learning_rate": 6.657379215697159e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 4722, + "text_loss": 0.69140625 + }, + { + "epoch": 0.39, + "learning_rate": 6.656147452662368e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 4723, + "text_loss": 0.5390625 + }, + { + "epoch": 0.39, + "learning_rate": 6.654915576719999e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 4724, + "text_loss": 0.84375 + }, + { + "epoch": 0.39, + "learning_rate": 6.653683587954034e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 4725, + "text_loss": 0.50390625 + }, + { + "epoch": 0.39, + "learning_rate": 6.652451486448463e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 4726, + "text_loss": 0.330078125 + }, + { + "epoch": 0.39, + "learning_rate": 6.6512192722872835e-06, + "loss": 0.5385, + "regression_loss": 0.0, + "step": 4727, + "text_loss": 0.87109375 + }, + { + "epoch": 0.39, + "learning_rate": 6.649986945554502e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 4728, + "text_loss": 0.365234375 + }, + { + "epoch": 0.39, + "learning_rate": 6.648754506334134e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 4729, + "text_loss": 0.6171875 + }, + { + "epoch": 0.39, + "learning_rate": 6.647521954710199e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 4730, + "text_loss": 0.373046875 + }, + { + "epoch": 0.39, + "learning_rate": 6.646289290766725e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 4731, + "text_loss": 0.5234375 + }, + { + "epoch": 0.39, + "learning_rate": 6.645056514587751e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 4732, + "text_loss": 0.5546875 + }, + { + "epoch": 0.39, + "learning_rate": 6.643823626257321e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 4733, + "text_loss": 0.625 + }, + { + "epoch": 0.39, + "learning_rate": 6.642590625859484e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 4734, + "text_loss": 0.42578125 + }, + { + "epoch": 0.39, + "learning_rate": 6.641357513478305e-06, + "loss": 0.6201, + "regression_loss": 0.0, + "step": 4735, + "text_loss": 0.7265625 + }, + { + "epoch": 0.39, + "learning_rate": 6.640124289197845e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 4736, + "text_loss": 0.75390625 + }, + { + "epoch": 0.39, + "learning_rate": 6.638890953102185e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 4737, + "text_loss": 0.40625 + }, + { + "epoch": 0.39, + "learning_rate": 6.637657505275402e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 4738, + "text_loss": 0.640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.63642394580159e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 4739, + "text_loss": 0.44140625 + }, + { + "epoch": 0.39, + "learning_rate": 6.635190274764846e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 4740, + "text_loss": 0.6640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.633956492249271e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 4741, + "text_loss": 0.62890625 + }, + { + "epoch": 0.39, + "learning_rate": 6.632722598338985e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 4742, + "text_loss": 0.49609375 + }, + { + "epoch": 0.39, + "learning_rate": 6.631488593118104e-06, + "loss": 0.4296, + "regression_loss": 0.0, + "step": 4743, + "text_loss": 0.54296875 + }, + { + "epoch": 0.39, + "learning_rate": 6.630254476670756e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 4744, + "text_loss": 0.63671875 + }, + { + "epoch": 0.39, + "learning_rate": 6.629020249081079e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 4745, + "text_loss": 0.7265625 + }, + { + "epoch": 0.39, + "learning_rate": 6.627785910433215e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 4746, + "text_loss": 0.431640625 + }, + { + "epoch": 0.39, + "learning_rate": 6.626551460811316e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 4747, + "text_loss": 0.59375 + }, + { + "epoch": 0.39, + "learning_rate": 6.625316900299537e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 4748, + "text_loss": 0.65625 + }, + { + "epoch": 0.39, + "learning_rate": 6.6240822289820485e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 4749, + "text_loss": 0.6875 + }, + { + "epoch": 0.39, + "learning_rate": 6.62284744694302e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 4750, + "text_loss": 0.44921875 + }, + { + "epoch": 0.39, + "learning_rate": 6.621612554266634e-06, + "loss": 0.5516, + "regression_loss": 0.0, + "step": 4751, + "text_loss": 0.57421875 + }, + { + "epoch": 0.39, + "learning_rate": 6.620377551037082e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 4752, + "text_loss": 0.73046875 + }, + { + "epoch": 0.4, + "learning_rate": 6.619142437338555e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 4753, + "text_loss": 0.423828125 + }, + { + "epoch": 0.4, + "learning_rate": 6.6179072132552614e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 4754, + "text_loss": 0.64453125 + }, + { + "epoch": 0.4, + "learning_rate": 6.616671878871409e-06, + "loss": 0.657, + "regression_loss": 0.0, + "step": 4755, + "text_loss": 0.494140625 + }, + { + "epoch": 0.4, + "learning_rate": 6.6154364342712174e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 4756, + "text_loss": 0.625 + }, + { + "epoch": 0.4, + "learning_rate": 6.614200879538914e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 4757, + "text_loss": 0.49609375 + }, + { + "epoch": 0.4, + "learning_rate": 6.612965214758731e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 4758, + "text_loss": 0.25390625 + }, + { + "epoch": 0.4, + "learning_rate": 6.611729440014911e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 4759, + "text_loss": 0.400390625 + }, + { + "epoch": 0.4, + "learning_rate": 6.610493555391703e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 4760, + "text_loss": 0.43359375 + }, + { + "epoch": 0.4, + "learning_rate": 6.609257560973361e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 4761, + "text_loss": 0.53125 + }, + { + "epoch": 0.4, + "learning_rate": 6.6080214568441505e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 4762, + "text_loss": 0.41015625 + }, + { + "epoch": 0.4, + "learning_rate": 6.606785243088343e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 4763, + "text_loss": 0.337890625 + }, + { + "epoch": 0.4, + "learning_rate": 6.605548919790215e-06, + "loss": 0.6416, + "regression_loss": 0.0, + "step": 4764, + "text_loss": 0.48046875 + }, + { + "epoch": 0.4, + "learning_rate": 6.604312487034055e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 4765, + "text_loss": 0.5703125 + }, + { + "epoch": 0.4, + "learning_rate": 6.603075944904155e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 4766, + "text_loss": 0.458984375 + }, + { + "epoch": 0.4, + "learning_rate": 6.601839293484817e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 4767, + "text_loss": 0.625 + }, + { + "epoch": 0.4, + "learning_rate": 6.600602532860349e-06, + "loss": 0.5677, + "regression_loss": 0.0, + "step": 4768, + "text_loss": 0.890625 + }, + { + "epoch": 0.4, + "learning_rate": 6.599365663115068e-06, + "loss": 0.652, + "regression_loss": 0.0, + "step": 4769, + "text_loss": 0.70703125 + }, + { + "epoch": 0.4, + "learning_rate": 6.598128684333296e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 4770, + "text_loss": 0.6015625 + }, + { + "epoch": 0.4, + "learning_rate": 6.596891596599364e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 4771, + "text_loss": 0.255859375 + }, + { + "epoch": 0.4, + "learning_rate": 6.5956543999976105e-06, + "loss": 0.4138, + "regression_loss": 0.0, + "step": 4772, + "text_loss": 0.56640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.594417094612382e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 4773, + "text_loss": 0.427734375 + }, + { + "epoch": 0.4, + "learning_rate": 6.5931796805280325e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 4774, + "text_loss": 0.52734375 + }, + { + "epoch": 0.4, + "learning_rate": 6.591942157828919e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 4775, + "text_loss": 0.4140625 + }, + { + "epoch": 0.4, + "learning_rate": 6.590704526599412e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 4776, + "text_loss": 0.50390625 + }, + { + "epoch": 0.4, + "learning_rate": 6.589466786923886e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 4777, + "text_loss": 0.369140625 + }, + { + "epoch": 0.4, + "learning_rate": 6.588228938886723e-06, + "loss": 0.5182, + "regression_loss": 0.0, + "step": 4778, + "text_loss": 0.51171875 + }, + { + "epoch": 0.4, + "learning_rate": 6.586990982572316e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 4779, + "text_loss": 0.5078125 + }, + { + "epoch": 0.4, + "learning_rate": 6.585752918065058e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 4780, + "text_loss": 0.53125 + }, + { + "epoch": 0.4, + "learning_rate": 6.584514745449358e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 4781, + "text_loss": 0.64453125 + }, + { + "epoch": 0.4, + "learning_rate": 6.583276464809626e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 4782, + "text_loss": 0.373046875 + }, + { + "epoch": 0.4, + "learning_rate": 6.5820380762302815e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 4783, + "text_loss": 0.447265625 + }, + { + "epoch": 0.4, + "learning_rate": 6.580799579795751e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 4784, + "text_loss": 0.453125 + }, + { + "epoch": 0.4, + "learning_rate": 6.579560975590472e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 4785, + "text_loss": 0.6640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.578322263698882e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 4786, + "text_loss": 0.298828125 + }, + { + "epoch": 0.4, + "learning_rate": 6.577083444205433e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 4787, + "text_loss": 0.765625 + }, + { + "epoch": 0.4, + "learning_rate": 6.575844517194581e-06, + "loss": 0.5514, + "regression_loss": 0.0, + "step": 4788, + "text_loss": 0.37890625 + }, + { + "epoch": 0.4, + "learning_rate": 6.574605482750786e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 4789, + "text_loss": 0.61328125 + }, + { + "epoch": 0.4, + "learning_rate": 6.573366340958524e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 4790, + "text_loss": 0.65234375 + }, + { + "epoch": 0.4, + "learning_rate": 6.572127091902272e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 4791, + "text_loss": 0.4609375 + }, + { + "epoch": 0.4, + "learning_rate": 6.570887735666513e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 4792, + "text_loss": 0.45703125 + }, + { + "epoch": 0.4, + "learning_rate": 6.569648272335742e-06, + "loss": 0.4906, + "regression_loss": 0.0, + "step": 4793, + "text_loss": 0.58203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.568408701994459e-06, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 4794, + "text_loss": 0.3515625 + }, + { + "epoch": 0.4, + "learning_rate": 6.56716902472717e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 4795, + "text_loss": 0.31640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.565929240618393e-06, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 4796, + "text_loss": 0.43359375 + }, + { + "epoch": 0.4, + "learning_rate": 6.564689349752647e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 4797, + "text_loss": 0.59375 + }, + { + "epoch": 0.4, + "learning_rate": 6.563449352214462e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 4798, + "text_loss": 0.58984375 + }, + { + "epoch": 0.4, + "learning_rate": 6.562209248088377e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 4799, + "text_loss": 0.6171875 + }, + { + "epoch": 0.4, + "learning_rate": 6.560969037458933e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 4800, + "text_loss": 0.55078125 + }, + { + "epoch": 0.4, + "learning_rate": 6.559728720410681e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 4801, + "text_loss": 0.474609375 + }, + { + "epoch": 0.4, + "learning_rate": 6.558488297028183e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 4802, + "text_loss": 0.36328125 + }, + { + "epoch": 0.4, + "learning_rate": 6.557247767396001e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 4803, + "text_loss": 0.482421875 + }, + { + "epoch": 0.4, + "learning_rate": 6.5560071315987084e-06, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 4804, + "text_loss": 0.5859375 + }, + { + "epoch": 0.4, + "learning_rate": 6.554766389720888e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 4805, + "text_loss": 0.37109375 + }, + { + "epoch": 0.4, + "learning_rate": 6.553525541847123e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 4806, + "text_loss": 0.482421875 + }, + { + "epoch": 0.4, + "learning_rate": 6.5522845880620125e-06, + "loss": 0.4479, + "regression_loss": 0.0, + "step": 4807, + "text_loss": 0.283203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.5510435284501565e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 4808, + "text_loss": 0.58203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.549802363096162e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 4809, + "text_loss": 0.490234375 + }, + { + "epoch": 0.4, + "learning_rate": 6.548561092084647e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 4810, + "text_loss": 0.62890625 + }, + { + "epoch": 0.4, + "learning_rate": 6.547319715500235e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 4811, + "text_loss": 0.73828125 + }, + { + "epoch": 0.4, + "learning_rate": 6.546078233427556e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 4812, + "text_loss": 0.5234375 + }, + { + "epoch": 0.4, + "learning_rate": 6.544836645951249e-06, + "loss": 0.4314, + "regression_loss": 0.0, + "step": 4813, + "text_loss": 0.57421875 + }, + { + "epoch": 0.4, + "learning_rate": 6.543594953155958e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 4814, + "text_loss": 0.58203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.5423531551263365e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 4815, + "text_loss": 0.40234375 + }, + { + "epoch": 0.4, + "learning_rate": 6.541111251947043e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 4816, + "text_loss": 0.4296875 + }, + { + "epoch": 0.4, + "learning_rate": 6.539869243702742e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 4817, + "text_loss": 0.48828125 + }, + { + "epoch": 0.4, + "learning_rate": 6.53862713047811e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 4818, + "text_loss": 0.69921875 + }, + { + "epoch": 0.4, + "learning_rate": 6.537384912357827e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 4819, + "text_loss": 0.30078125 + }, + { + "epoch": 0.4, + "learning_rate": 6.536142589426582e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 4820, + "text_loss": 0.55859375 + }, + { + "epoch": 0.4, + "learning_rate": 6.534900161769069e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 4821, + "text_loss": 0.4375 + }, + { + "epoch": 0.4, + "learning_rate": 6.533657629469989e-06, + "loss": 0.6675, + "regression_loss": 0.0, + "step": 4822, + "text_loss": 0.58203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.5324149926140545e-06, + "loss": 0.4762, + "regression_loss": 0.0, + "step": 4823, + "text_loss": 0.25390625 + }, + { + "epoch": 0.4, + "learning_rate": 6.531172251285981e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 4824, + "text_loss": 0.65625 + }, + { + "epoch": 0.4, + "learning_rate": 6.52992940557049e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 4825, + "text_loss": 0.71484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.528686455552314e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 4826, + "text_loss": 0.640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.527443401316192e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 4827, + "text_loss": 0.625 + }, + { + "epoch": 0.4, + "learning_rate": 6.526200242946869e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 4828, + "text_loss": 0.4609375 + }, + { + "epoch": 0.4, + "learning_rate": 6.524956980529094e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 4829, + "text_loss": 0.322265625 + }, + { + "epoch": 0.4, + "learning_rate": 6.523713614147629e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 4830, + "text_loss": 0.66796875 + }, + { + "epoch": 0.4, + "learning_rate": 6.5224701438872395e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 4831, + "text_loss": 0.6484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.521226569832699e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 4832, + "text_loss": 0.52734375 + }, + { + "epoch": 0.4, + "learning_rate": 6.519982892068788e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 4833, + "text_loss": 0.88671875 + }, + { + "epoch": 0.4, + "learning_rate": 6.518739110680293e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 4834, + "text_loss": 0.57421875 + }, + { + "epoch": 0.4, + "learning_rate": 6.517495225752011e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4835, + "text_loss": 0.58203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.51625123736874e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 4836, + "text_loss": 0.5859375 + }, + { + "epoch": 0.4, + "learning_rate": 6.515007145615293e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 4837, + "text_loss": 0.46484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.513762950576483e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 4838, + "text_loss": 0.5703125 + }, + { + "epoch": 0.4, + "learning_rate": 6.512518652337133e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 4839, + "text_loss": 0.4921875 + }, + { + "epoch": 0.4, + "learning_rate": 6.511274250982074e-06, + "loss": 0.615, + "regression_loss": 0.0, + "step": 4840, + "text_loss": 0.63671875 + }, + { + "epoch": 0.4, + "learning_rate": 6.510029746596141e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 4841, + "text_loss": 0.62109375 + }, + { + "epoch": 0.4, + "learning_rate": 6.50878513926418e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 4842, + "text_loss": 0.53515625 + }, + { + "epoch": 0.4, + "learning_rate": 6.50754042907104e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 4843, + "text_loss": 0.6328125 + }, + { + "epoch": 0.4, + "learning_rate": 6.50629561610158e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 4844, + "text_loss": 0.52734375 + }, + { + "epoch": 0.4, + "learning_rate": 6.505050700440665e-06, + "loss": 0.6157, + "regression_loss": 0.0, + "step": 4845, + "text_loss": 0.6640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.503805682173165e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 4846, + "text_loss": 0.61328125 + }, + { + "epoch": 0.4, + "learning_rate": 6.502560561383964e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 4847, + "text_loss": 0.54296875 + }, + { + "epoch": 0.4, + "learning_rate": 6.501315338157942e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 4848, + "text_loss": 0.408203125 + }, + { + "epoch": 0.4, + "learning_rate": 6.5000700125799955e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 4849, + "text_loss": 0.41796875 + }, + { + "epoch": 0.4, + "learning_rate": 6.498824584735022e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 4850, + "text_loss": 0.388671875 + }, + { + "epoch": 0.4, + "learning_rate": 6.49757905470793e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 4851, + "text_loss": 0.484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.496333422583633e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 4852, + "text_loss": 0.43359375 + }, + { + "epoch": 0.4, + "learning_rate": 6.4950876884470505e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 4853, + "text_loss": 0.271484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.4938418523831115e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 4854, + "text_loss": 0.36328125 + }, + { + "epoch": 0.4, + "learning_rate": 6.492595914476752e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 4855, + "text_loss": 0.578125 + }, + { + "epoch": 0.4, + "learning_rate": 6.491349874812911e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 4856, + "text_loss": 0.46484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.490103733476538e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 4857, + "text_loss": 0.41015625 + }, + { + "epoch": 0.4, + "learning_rate": 6.48885749055259e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 4858, + "text_loss": 0.59765625 + }, + { + "epoch": 0.4, + "learning_rate": 6.487611146126027e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 4859, + "text_loss": 0.291015625 + }, + { + "epoch": 0.4, + "learning_rate": 6.4863647002818186e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 4860, + "text_loss": 0.56640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.485118153104943e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 4861, + "text_loss": 0.46875 + }, + { + "epoch": 0.4, + "learning_rate": 6.4838715046803824e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 4862, + "text_loss": 0.431640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.482624755093125e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 4863, + "text_loss": 0.51953125 + }, + { + "epoch": 0.4, + "learning_rate": 6.481377904428171e-06, + "loss": 0.463, + "regression_loss": 0.0, + "step": 4864, + "text_loss": 0.396484375 + }, + { + "epoch": 0.4, + "learning_rate": 6.48013095277052e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 4865, + "text_loss": 0.65625 + }, + { + "epoch": 0.4, + "learning_rate": 6.478883900205188e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 4866, + "text_loss": 0.244140625 + }, + { + "epoch": 0.4, + "learning_rate": 6.47763674681719e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 4867, + "text_loss": 0.70703125 + }, + { + "epoch": 0.4, + "learning_rate": 6.476389492691548e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 4868, + "text_loss": 0.6640625 + }, + { + "epoch": 0.4, + "learning_rate": 6.475142137913299e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 4869, + "text_loss": 0.43359375 + }, + { + "epoch": 0.4, + "learning_rate": 6.473894682567476e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 4870, + "text_loss": 0.4375 + }, + { + "epoch": 0.4, + "learning_rate": 6.472647126739124e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 4871, + "text_loss": 0.51953125 + }, + { + "epoch": 0.4, + "learning_rate": 6.471399470513301e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 4872, + "text_loss": 0.76953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.470151713975059e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 4873, + "text_loss": 0.6875 + }, + { + "epoch": 0.41, + "learning_rate": 6.468903857209468e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 4874, + "text_loss": 0.609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.467655900301597e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 4875, + "text_loss": 0.6171875 + }, + { + "epoch": 0.41, + "learning_rate": 6.466407843336527e-06, + "loss": 0.5485, + "regression_loss": 0.0, + "step": 4876, + "text_loss": 0.85546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.465159686399346e-06, + "loss": 0.6787, + "regression_loss": 0.0, + "step": 4877, + "text_loss": 0.7578125 + }, + { + "epoch": 0.41, + "learning_rate": 6.463911429575143e-06, + "loss": 0.6504, + "regression_loss": 0.0, + "step": 4878, + "text_loss": 0.5625 + }, + { + "epoch": 0.41, + "learning_rate": 6.462663072949021e-06, + "loss": 0.6318, + "regression_loss": 0.0, + "step": 4879, + "text_loss": 1.0234375 + }, + { + "epoch": 0.41, + "learning_rate": 6.461414616606083e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 4880, + "text_loss": 0.875 + }, + { + "epoch": 0.41, + "learning_rate": 6.460166060631447e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 4881, + "text_loss": 0.5703125 + }, + { + "epoch": 0.41, + "learning_rate": 6.458917405110227e-06, + "loss": 0.427, + "regression_loss": 0.0, + "step": 4882, + "text_loss": 0.3359375 + }, + { + "epoch": 0.41, + "learning_rate": 6.457668650127558e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 4883, + "text_loss": 0.62890625 + }, + { + "epoch": 0.41, + "learning_rate": 6.456419795768566e-06, + "loss": 0.4019, + "regression_loss": 0.0, + "step": 4884, + "text_loss": 0.2578125 + }, + { + "epoch": 0.41, + "learning_rate": 6.455170842118394e-06, + "loss": 0.465, + "regression_loss": 0.0, + "step": 4885, + "text_loss": 0.349609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.4539217892621934e-06, + "loss": 0.6401, + "regression_loss": 0.0, + "step": 4886, + "text_loss": 0.52734375 + }, + { + "epoch": 0.41, + "learning_rate": 6.452672637285111e-06, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 4887, + "text_loss": 0.625 + }, + { + "epoch": 0.41, + "learning_rate": 6.451423386272312e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 4888, + "text_loss": 0.328125 + }, + { + "epoch": 0.41, + "learning_rate": 6.450174036308964e-06, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 4889, + "text_loss": 0.263671875 + }, + { + "epoch": 0.41, + "learning_rate": 6.448924587480239e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 4890, + "text_loss": 0.97265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.44767503987132e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 4891, + "text_loss": 0.427734375 + }, + { + "epoch": 0.41, + "learning_rate": 6.4464253935673926e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 4892, + "text_loss": 0.50390625 + }, + { + "epoch": 0.41, + "learning_rate": 6.445175648653653e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 4893, + "text_loss": 0.337890625 + }, + { + "epoch": 0.41, + "learning_rate": 6.443925805215303e-06, + "loss": 0.6887, + "regression_loss": 0.0, + "step": 4894, + "text_loss": 0.8125 + }, + { + "epoch": 0.41, + "learning_rate": 6.442675863337547e-06, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 4895, + "text_loss": 0.2578125 + }, + { + "epoch": 0.41, + "learning_rate": 6.441425823105603e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 4896, + "text_loss": 0.68359375 + }, + { + "epoch": 0.41, + "learning_rate": 6.440175684604692e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 4897, + "text_loss": 0.48046875 + }, + { + "epoch": 0.41, + "learning_rate": 6.4389254479200395e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 4898, + "text_loss": 0.48046875 + }, + { + "epoch": 0.41, + "learning_rate": 6.437675113136882e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 4899, + "text_loss": 0.51953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.436424680340462e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 4900, + "text_loss": 0.39453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.435174149616026e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 4901, + "text_loss": 0.48828125 + }, + { + "epoch": 0.41, + "learning_rate": 6.433923521048827e-06, + "loss": 0.5994, + "regression_loss": 0.0, + "step": 4902, + "text_loss": 0.58984375 + }, + { + "epoch": 0.41, + "learning_rate": 6.432672794724131e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 4903, + "text_loss": 0.84765625 + }, + { + "epoch": 0.41, + "learning_rate": 6.4314219707272005e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 4904, + "text_loss": 0.26953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.4301710491433165e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 4905, + "text_loss": 0.453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.4289200300577545e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 4906, + "text_loss": 0.279296875 + }, + { + "epoch": 0.41, + "learning_rate": 6.427668913555806e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 4907, + "text_loss": 0.6015625 + }, + { + "epoch": 0.41, + "learning_rate": 6.426417699722765e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 4908, + "text_loss": 0.6953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.425166388643931e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 4909, + "text_loss": 0.6640625 + }, + { + "epoch": 0.41, + "learning_rate": 6.423914980404614e-06, + "loss": 0.646, + "regression_loss": 0.0, + "step": 4910, + "text_loss": 0.72265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.4226634750901295e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 4911, + "text_loss": 0.71875 + }, + { + "epoch": 0.41, + "learning_rate": 6.421411872785794e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 4912, + "text_loss": 0.451171875 + }, + { + "epoch": 0.41, + "learning_rate": 6.4201601735769415e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 4913, + "text_loss": 0.384765625 + }, + { + "epoch": 0.41, + "learning_rate": 6.418908377548902e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 4914, + "text_loss": 0.85546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.417656484787017e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 4915, + "text_loss": 0.30078125 + }, + { + "epoch": 0.41, + "learning_rate": 6.416404495376637e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 4916, + "text_loss": 0.67578125 + }, + { + "epoch": 0.41, + "learning_rate": 6.415152409403111e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 4917, + "text_loss": 1.0234375 + }, + { + "epoch": 0.41, + "learning_rate": 6.413900226951806e-06, + "loss": 0.3597, + "regression_loss": 0.0, + "step": 4918, + "text_loss": 0.2060546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.412647948108085e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 4919, + "text_loss": 0.66796875 + }, + { + "epoch": 0.41, + "learning_rate": 6.4113955729573215e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 4920, + "text_loss": 0.4453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.410143101584899e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 4921, + "text_loss": 0.5625 + }, + { + "epoch": 0.41, + "learning_rate": 6.408890534076203e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 4922, + "text_loss": 0.69921875 + }, + { + "epoch": 0.41, + "learning_rate": 6.407637870516626e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 4923, + "text_loss": 0.4609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.406385110991571e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 4924, + "text_loss": 0.609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.405132255586441e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 4925, + "text_loss": 0.66796875 + }, + { + "epoch": 0.41, + "learning_rate": 6.403879304386653e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 4926, + "text_loss": 0.4765625 + }, + { + "epoch": 0.41, + "learning_rate": 6.402626257477623e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 4927, + "text_loss": 0.65234375 + }, + { + "epoch": 0.41, + "learning_rate": 6.401373114944781e-06, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 4928, + "text_loss": 0.3671875 + }, + { + "epoch": 0.41, + "learning_rate": 6.4001198768735565e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 4929, + "text_loss": 0.302734375 + }, + { + "epoch": 0.41, + "learning_rate": 6.398866543349391e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 4930, + "text_loss": 0.6484375 + }, + { + "epoch": 0.41, + "learning_rate": 6.39761311445773e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 4931, + "text_loss": 0.376953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.396359590284023e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 4932, + "text_loss": 0.83203125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3951059709137345e-06, + "loss": 0.6174, + "regression_loss": 0.0, + "step": 4933, + "text_loss": 0.703125 + }, + { + "epoch": 0.41, + "learning_rate": 6.393852256432324e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 4934, + "text_loss": 0.5625 + }, + { + "epoch": 0.41, + "learning_rate": 6.392598446925266e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 4935, + "text_loss": 0.58203125 + }, + { + "epoch": 0.41, + "learning_rate": 6.391344542478039e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 4936, + "text_loss": 0.5 + }, + { + "epoch": 0.41, + "learning_rate": 6.390090543176127e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 4937, + "text_loss": 0.3359375 + }, + { + "epoch": 0.41, + "learning_rate": 6.388836449105021e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 4938, + "text_loss": 0.51953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.38758226035022e-06, + "loss": 0.4476, + "regression_loss": 0.0, + "step": 4939, + "text_loss": 0.72265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.386327976997226e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 4940, + "text_loss": 0.66015625 + }, + { + "epoch": 0.41, + "learning_rate": 6.3850735991315515e-06, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 4941, + "text_loss": 0.296875 + }, + { + "epoch": 0.41, + "learning_rate": 6.3838191268387124e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 4942, + "text_loss": 0.3203125 + }, + { + "epoch": 0.41, + "learning_rate": 6.382564560204233e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 4943, + "text_loss": 0.380859375 + }, + { + "epoch": 0.41, + "learning_rate": 6.381309899313642e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 4944, + "text_loss": 0.72265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.380055144252477e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 4945, + "text_loss": 0.7734375 + }, + { + "epoch": 0.41, + "learning_rate": 6.378800295106279e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 4946, + "text_loss": 0.546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.377545351960599e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 4947, + "text_loss": 0.287109375 + }, + { + "epoch": 0.41, + "learning_rate": 6.376290314900991e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 4948, + "text_loss": 0.78125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3750351840130186e-06, + "loss": 0.7019, + "regression_loss": 0.0, + "step": 4949, + "text_loss": 0.6875 + }, + { + "epoch": 0.41, + "learning_rate": 6.373779959382249e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 4950, + "text_loss": 0.7578125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3725246410942564e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 4951, + "text_loss": 0.2109375 + }, + { + "epoch": 0.41, + "learning_rate": 6.3712692292346234e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 4952, + "text_loss": 0.6796875 + }, + { + "epoch": 0.41, + "learning_rate": 6.370013723888937e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 4953, + "text_loss": 0.8125 + }, + { + "epoch": 0.41, + "learning_rate": 6.36875812514279e-06, + "loss": 0.4139, + "regression_loss": 0.0, + "step": 4954, + "text_loss": 0.5625 + }, + { + "epoch": 0.41, + "learning_rate": 6.367502433081784e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 4955, + "text_loss": 0.546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.366246647791526e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 4956, + "text_loss": 0.486328125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3649907693576265e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 4957, + "text_loss": 0.36328125 + }, + { + "epoch": 0.41, + "learning_rate": 6.363734797865706e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 4958, + "text_loss": 0.55078125 + }, + { + "epoch": 0.41, + "learning_rate": 6.362478733401391e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 4959, + "text_loss": 0.2734375 + }, + { + "epoch": 0.41, + "learning_rate": 6.361222576050312e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 4960, + "text_loss": 0.345703125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3599663258981105e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 4961, + "text_loss": 0.765625 + }, + { + "epoch": 0.41, + "learning_rate": 6.358709983030428e-06, + "loss": 0.6953, + "regression_loss": 0.0, + "step": 4962, + "text_loss": 0.89453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.357453547532915e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 4963, + "text_loss": 0.6328125 + }, + { + "epoch": 0.41, + "learning_rate": 6.356197019491232e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 4964, + "text_loss": 0.357421875 + }, + { + "epoch": 0.41, + "learning_rate": 6.3549403989910405e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 4965, + "text_loss": 0.375 + }, + { + "epoch": 0.41, + "learning_rate": 6.3536836861180105e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 4966, + "text_loss": 0.5546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.3524268809578185e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 4967, + "text_loss": 0.59765625 + }, + { + "epoch": 0.41, + "learning_rate": 6.351169983596146e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 4968, + "text_loss": 0.8203125 + }, + { + "epoch": 0.41, + "learning_rate": 6.349912994118684e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 4969, + "text_loss": 0.703125 + }, + { + "epoch": 0.41, + "learning_rate": 6.348655912611126e-06, + "loss": 0.428, + "regression_loss": 0.0, + "step": 4970, + "text_loss": 0.3984375 + }, + { + "epoch": 0.41, + "learning_rate": 6.347398739159172e-06, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 4971, + "text_loss": 0.3515625 + }, + { + "epoch": 0.41, + "learning_rate": 6.346141473848533e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 4972, + "text_loss": 0.5234375 + }, + { + "epoch": 0.41, + "learning_rate": 6.344884116764919e-06, + "loss": 0.6399, + "regression_loss": 0.0, + "step": 4973, + "text_loss": 0.546875 + }, + { + "epoch": 0.41, + "learning_rate": 6.3436266679940524e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 4974, + "text_loss": 0.357421875 + }, + { + "epoch": 0.41, + "learning_rate": 6.342369127621661e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 4975, + "text_loss": 0.72265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.341111495733474e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 4976, + "text_loss": 0.453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.339853772415232e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 4977, + "text_loss": 0.78515625 + }, + { + "epoch": 0.41, + "learning_rate": 6.338595957752681e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 4978, + "text_loss": 0.4609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.33733805183157e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 4979, + "text_loss": 0.55859375 + }, + { + "epoch": 0.41, + "learning_rate": 6.336080054737657e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 4980, + "text_loss": 0.494140625 + }, + { + "epoch": 0.41, + "learning_rate": 6.33482196655671e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 4981, + "text_loss": 0.447265625 + }, + { + "epoch": 0.41, + "learning_rate": 6.333563787374493e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 4982, + "text_loss": 0.57421875 + }, + { + "epoch": 0.41, + "learning_rate": 6.332305517276784e-06, + "loss": 0.6145, + "regression_loss": 0.0, + "step": 4983, + "text_loss": 0.51953125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3310471563493685e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 4984, + "text_loss": 0.74609375 + }, + { + "epoch": 0.41, + "learning_rate": 6.32978870467803e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 4985, + "text_loss": 0.53515625 + }, + { + "epoch": 0.41, + "learning_rate": 6.328530162348567e-06, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 4986, + "text_loss": 0.453125 + }, + { + "epoch": 0.41, + "learning_rate": 6.327271529446779e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 4987, + "text_loss": 0.70703125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3260128060584725e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 4988, + "text_loss": 0.423828125 + }, + { + "epoch": 0.41, + "learning_rate": 6.324753992269463e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 4989, + "text_loss": 0.361328125 + }, + { + "epoch": 0.41, + "learning_rate": 6.3234950881655675e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 4990, + "text_loss": 0.76171875 + }, + { + "epoch": 0.41, + "learning_rate": 6.322236093832614e-06, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 4991, + "text_loss": 0.81640625 + }, + { + "epoch": 0.41, + "learning_rate": 6.3209770093564315e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 4992, + "text_loss": 0.640625 + }, + { + "epoch": 0.41, + "learning_rate": 6.3197178348228586e-06, + "loss": 0.4833, + "regression_loss": 0.0, + "step": 4993, + "text_loss": 0.5625 + }, + { + "epoch": 0.42, + "learning_rate": 6.318458570317741e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 4994, + "text_loss": 0.55078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.317199215926928e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 4995, + "text_loss": 0.4765625 + }, + { + "epoch": 0.42, + "learning_rate": 6.3159397717362736e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 4996, + "text_loss": 0.384765625 + }, + { + "epoch": 0.42, + "learning_rate": 6.314680237831643e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 4997, + "text_loss": 0.478515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.313420614298906e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 4998, + "text_loss": 0.58203125 + }, + { + "epoch": 0.42, + "learning_rate": 6.3121609012239325e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 4999, + "text_loss": 0.302734375 + }, + { + "epoch": 0.42, + "learning_rate": 6.3109010986926065e-06, + "loss": 0.3955, + "regression_loss": 0.0, + "step": 5000, + "text_loss": 0.56640625 + }, + { + "epoch": 0.42, + "learning_rate": 6.309641206790816e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 5001, + "text_loss": 0.44140625 + }, + { + "epoch": 0.42, + "learning_rate": 6.30838122560445e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 5002, + "text_loss": 0.58203125 + }, + { + "epoch": 0.42, + "learning_rate": 6.307121155219411e-06, + "loss": 0.6331, + "regression_loss": 0.0, + "step": 5003, + "text_loss": 0.439453125 + }, + { + "epoch": 0.42, + "learning_rate": 6.305860995721602e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 5004, + "text_loss": 0.380859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.304600747196935e-06, + "loss": 0.4248, + "regression_loss": 0.0, + "step": 5005, + "text_loss": 0.427734375 + }, + { + "epoch": 0.42, + "learning_rate": 6.303340409731326e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 5006, + "text_loss": 0.302734375 + }, + { + "epoch": 0.42, + "learning_rate": 6.3020799834107e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 5007, + "text_loss": 0.6015625 + }, + { + "epoch": 0.42, + "learning_rate": 6.300819468320988e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 5008, + "text_loss": 0.6796875 + }, + { + "epoch": 0.42, + "learning_rate": 6.299558864548121e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 5009, + "text_loss": 0.49609375 + }, + { + "epoch": 0.42, + "learning_rate": 6.298298172178043e-06, + "loss": 0.4872, + "regression_loss": 0.0, + "step": 5010, + "text_loss": 0.5078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.297037391296702e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 5011, + "text_loss": 0.5078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.29577652199005e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 5012, + "text_loss": 0.5390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.294515564344047e-06, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 5013, + "text_loss": 0.400390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.29325451844466e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 5014, + "text_loss": 0.490234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.291993384377858e-06, + "loss": 0.5275, + "regression_loss": 0.0, + "step": 5015, + "text_loss": 0.74609375 + }, + { + "epoch": 0.42, + "learning_rate": 6.290732162229621e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 5016, + "text_loss": 0.6328125 + }, + { + "epoch": 0.42, + "learning_rate": 6.28947085208593e-06, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 5017, + "text_loss": 0.404296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.2882094540327785e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 5018, + "text_loss": 0.46875 + }, + { + "epoch": 0.42, + "learning_rate": 6.286947968156158e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 5019, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.285686394542073e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 5020, + "text_loss": 0.609375 + }, + { + "epoch": 0.42, + "learning_rate": 6.284424733276529e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 5021, + "text_loss": 0.640625 + }, + { + "epoch": 0.42, + "learning_rate": 6.283162984445541e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 5022, + "text_loss": 0.80859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.28190114813513e-06, + "loss": 0.5187, + "regression_loss": 0.0, + "step": 5023, + "text_loss": 0.7421875 + }, + { + "epoch": 0.42, + "learning_rate": 6.280639224431317e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 5024, + "text_loss": 0.6328125 + }, + { + "epoch": 0.42, + "learning_rate": 6.279377213420137e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 5025, + "text_loss": 0.61328125 + }, + { + "epoch": 0.42, + "learning_rate": 6.278115115187626e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 5026, + "text_loss": 0.6015625 + }, + { + "epoch": 0.42, + "learning_rate": 6.276852929819828e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 5027, + "text_loss": 0.30078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.275590657402792e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 5028, + "text_loss": 0.48046875 + }, + { + "epoch": 0.42, + "learning_rate": 6.274328298022574e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 5029, + "text_loss": 0.42578125 + }, + { + "epoch": 0.42, + "learning_rate": 6.273065851765233e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 5030, + "text_loss": 0.25390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.271803318716838e-06, + "loss": 0.5153, + "regression_loss": 0.0, + "step": 5031, + "text_loss": 0.326171875 + }, + { + "epoch": 0.42, + "learning_rate": 6.270540698963462e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 5032, + "text_loss": 0.55078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.269277992591182e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 5033, + "text_loss": 0.36328125 + }, + { + "epoch": 0.42, + "learning_rate": 6.268015199686086e-06, + "loss": 0.4015, + "regression_loss": 0.0, + "step": 5034, + "text_loss": 0.365234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.266752320334261e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 5035, + "text_loss": 0.46875 + }, + { + "epoch": 0.42, + "learning_rate": 6.265489354621806e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 5036, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.264226302634824e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 5037, + "text_loss": 0.5390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.26296316445942e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 5038, + "text_loss": 0.6796875 + }, + { + "epoch": 0.42, + "learning_rate": 6.261699940181711e-06, + "loss": 0.5686, + "regression_loss": 0.0, + "step": 5039, + "text_loss": 0.5703125 + }, + { + "epoch": 0.42, + "learning_rate": 6.2604366298878184e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 5040, + "text_loss": 0.48828125 + }, + { + "epoch": 0.42, + "learning_rate": 6.2591732336638645e-06, + "loss": 0.4598, + "regression_loss": 0.0, + "step": 5041, + "text_loss": 0.291015625 + }, + { + "epoch": 0.42, + "learning_rate": 6.257909751595984e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 5042, + "text_loss": 0.5078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.256646183770313e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 5043, + "text_loss": 0.44140625 + }, + { + "epoch": 0.42, + "learning_rate": 6.2553825302729956e-06, + "loss": 0.6072, + "regression_loss": 0.0, + "step": 5044, + "text_loss": 0.26171875 + }, + { + "epoch": 0.42, + "learning_rate": 6.254118791190183e-06, + "loss": 0.4198, + "regression_loss": 0.0, + "step": 5045, + "text_loss": 0.373046875 + }, + { + "epoch": 0.42, + "learning_rate": 6.252854966608025e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 5046, + "text_loss": 0.67578125 + }, + { + "epoch": 0.42, + "learning_rate": 6.251591056612688e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 5047, + "text_loss": 0.392578125 + }, + { + "epoch": 0.42, + "learning_rate": 6.250327061290337e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 5048, + "text_loss": 0.73046875 + }, + { + "epoch": 0.42, + "learning_rate": 6.249062980727146e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 5049, + "text_loss": 0.33984375 + }, + { + "epoch": 0.42, + "learning_rate": 6.24779881500929e-06, + "loss": 0.5074, + "regression_loss": 0.0, + "step": 5050, + "text_loss": 0.59375 + }, + { + "epoch": 0.42, + "learning_rate": 6.246534564222957e-06, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 5051, + "text_loss": 0.306640625 + }, + { + "epoch": 0.42, + "learning_rate": 6.245270228454335e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 5052, + "text_loss": 0.62109375 + }, + { + "epoch": 0.42, + "learning_rate": 6.2440058077896205e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 5053, + "text_loss": 0.5859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.242741302315014e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 5054, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.241476712116725e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 5055, + "text_loss": 0.462890625 + }, + { + "epoch": 0.42, + "learning_rate": 6.240212037280967e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 5056, + "text_loss": 0.54296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.238947277893958e-06, + "loss": 0.6296, + "regression_loss": 0.0, + "step": 5057, + "text_loss": 0.439453125 + }, + { + "epoch": 0.42, + "learning_rate": 6.2376824340419205e-06, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 5058, + "text_loss": 0.8984375 + }, + { + "epoch": 0.42, + "learning_rate": 6.236417505811089e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 5059, + "text_loss": 0.37109375 + }, + { + "epoch": 0.42, + "learning_rate": 6.235152493287696e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 5060, + "text_loss": 0.330078125 + }, + { + "epoch": 0.42, + "learning_rate": 6.233887396557986e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 5061, + "text_loss": 0.490234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.232622215708207e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 5062, + "text_loss": 1.0234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.231356950824612e-06, + "loss": 0.4862, + "regression_loss": 0.0, + "step": 5063, + "text_loss": 0.54296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.230091601993459e-06, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 5064, + "text_loss": 0.390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.228826169301013e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 5065, + "text_loss": 0.6953125 + }, + { + "epoch": 0.42, + "learning_rate": 6.227560652833547e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 5066, + "text_loss": 0.73046875 + }, + { + "epoch": 0.42, + "learning_rate": 6.226295052677336e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 5067, + "text_loss": 0.31640625 + }, + { + "epoch": 0.42, + "learning_rate": 6.22502936891866e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 5068, + "text_loss": 0.380859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.2237636016438094e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 5069, + "text_loss": 0.53125 + }, + { + "epoch": 0.42, + "learning_rate": 6.222497750939078e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 5070, + "text_loss": 0.453125 + }, + { + "epoch": 0.42, + "learning_rate": 6.221231816890764e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 5071, + "text_loss": 0.447265625 + }, + { + "epoch": 0.42, + "learning_rate": 6.219965799585172e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 5072, + "text_loss": 0.333984375 + }, + { + "epoch": 0.42, + "learning_rate": 6.218699699108613e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 5073, + "text_loss": 0.3515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.217433515547402e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 5074, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.216167248987861e-06, + "loss": 0.5502, + "regression_loss": 0.0, + "step": 5075, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.21490089951632e-06, + "loss": 0.417, + "regression_loss": 0.0, + "step": 5076, + "text_loss": 0.69140625 + }, + { + "epoch": 0.42, + "learning_rate": 6.2136344672191095e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 5077, + "text_loss": 0.404296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.21236795218257e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 5078, + "text_loss": 0.287109375 + }, + { + "epoch": 0.42, + "learning_rate": 6.211101354493045e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 5079, + "text_loss": 0.67578125 + }, + { + "epoch": 0.42, + "learning_rate": 6.209834674236885e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 5080, + "text_loss": 0.65234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.208567911500446e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 5081, + "text_loss": 0.5546875 + }, + { + "epoch": 0.42, + "learning_rate": 6.2073010663700874e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 5082, + "text_loss": 0.4375 + }, + { + "epoch": 0.42, + "learning_rate": 6.206034138932178e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 5083, + "text_loss": 0.75390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.204767129273092e-06, + "loss": 0.4908, + "regression_loss": 0.0, + "step": 5084, + "text_loss": 0.5234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.203500037479203e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 5085, + "text_loss": 0.6796875 + }, + { + "epoch": 0.42, + "learning_rate": 6.202232863636898e-06, + "loss": 0.6624, + "regression_loss": 0.0, + "step": 5086, + "text_loss": 0.703125 + }, + { + "epoch": 0.42, + "learning_rate": 6.200965607832566e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 5087, + "text_loss": 1.109375 + }, + { + "epoch": 0.42, + "learning_rate": 6.199698270152602e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 5088, + "text_loss": 0.55859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.198430850683405e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 5089, + "text_loss": 0.65625 + }, + { + "epoch": 0.42, + "learning_rate": 6.197163349511384e-06, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 5090, + "text_loss": 0.47265625 + }, + { + "epoch": 0.42, + "learning_rate": 6.195895766722948e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 5091, + "text_loss": 0.58984375 + }, + { + "epoch": 0.42, + "learning_rate": 6.1946281024045154e-06, + "loss": 0.615, + "regression_loss": 0.0, + "step": 5092, + "text_loss": 0.75390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.19336035664251e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 5093, + "text_loss": 0.58984375 + }, + { + "epoch": 0.42, + "learning_rate": 6.192092529523357e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 5094, + "text_loss": 0.75390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.190824621133494e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 5095, + "text_loss": 0.365234375 + }, + { + "epoch": 0.42, + "learning_rate": 6.1895566315593584e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 5096, + "text_loss": 0.384765625 + }, + { + "epoch": 0.42, + "learning_rate": 6.188288560887395e-06, + "loss": 0.6509, + "regression_loss": 0.0, + "step": 5097, + "text_loss": 0.9296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.187020409204055e-06, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 5098, + "text_loss": 0.34375 + }, + { + "epoch": 0.42, + "learning_rate": 6.185752176595794e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 5099, + "text_loss": 0.53515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.184483863149072e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 5100, + "text_loss": 0.6640625 + }, + { + "epoch": 0.42, + "learning_rate": 6.183215468950361e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 5101, + "text_loss": 0.4296875 + }, + { + "epoch": 0.42, + "learning_rate": 6.18194699408613e-06, + "loss": 0.4139, + "regression_loss": 0.0, + "step": 5102, + "text_loss": 0.353515625 + }, + { + "epoch": 0.42, + "learning_rate": 6.180678438642856e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 5103, + "text_loss": 0.6484375 + }, + { + "epoch": 0.42, + "learning_rate": 6.179409802707026e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 5104, + "text_loss": 0.55859375 + }, + { + "epoch": 0.42, + "learning_rate": 6.178141086365126e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 5105, + "text_loss": 0.298828125 + }, + { + "epoch": 0.42, + "learning_rate": 6.176872289703653e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 5106, + "text_loss": 0.50390625 + }, + { + "epoch": 0.42, + "learning_rate": 6.1756034128091035e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 5107, + "text_loss": 0.484375 + }, + { + "epoch": 0.42, + "learning_rate": 6.174334455767987e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 5108, + "text_loss": 0.287109375 + }, + { + "epoch": 0.42, + "learning_rate": 6.173065418666812e-06, + "loss": 0.6248, + "regression_loss": 0.0, + "step": 5109, + "text_loss": 0.76953125 + }, + { + "epoch": 0.42, + "learning_rate": 6.171796301592095e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 5110, + "text_loss": 0.384765625 + }, + { + "epoch": 0.42, + "learning_rate": 6.170527104630358e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 5111, + "text_loss": 0.68359375 + }, + { + "epoch": 0.42, + "learning_rate": 6.16925782786813e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 5112, + "text_loss": 0.470703125 + }, + { + "epoch": 0.42, + "learning_rate": 6.167988471391941e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 5113, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.166719035288331e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 5114, + "text_loss": 0.62890625 + }, + { + "epoch": 0.43, + "learning_rate": 6.165449519643845e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 5115, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.164179924545027e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 5116, + "text_loss": 0.404296875 + }, + { + "epoch": 0.43, + "learning_rate": 6.162910250078436e-06, + "loss": 0.5055, + "regression_loss": 0.0, + "step": 5117, + "text_loss": 0.625 + }, + { + "epoch": 0.43, + "learning_rate": 6.161640496330631e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 5118, + "text_loss": 0.75390625 + }, + { + "epoch": 0.43, + "learning_rate": 6.160370663388175e-06, + "loss": 0.428, + "regression_loss": 0.0, + "step": 5119, + "text_loss": 0.61328125 + }, + { + "epoch": 0.43, + "learning_rate": 6.1591007513376425e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 5120, + "text_loss": 0.2734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.157830760265606e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 5121, + "text_loss": 0.609375 + }, + { + "epoch": 0.43, + "learning_rate": 6.156560690258648e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5122, + "text_loss": 0.478515625 + }, + { + "epoch": 0.43, + "learning_rate": 6.155290541403357e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 5123, + "text_loss": 0.482421875 + }, + { + "epoch": 0.43, + "learning_rate": 6.154020313786322e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 5124, + "text_loss": 0.55859375 + }, + { + "epoch": 0.43, + "learning_rate": 6.152750007494144e-06, + "loss": 0.4589, + "regression_loss": 0.0, + "step": 5125, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.151479622613423e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 5126, + "text_loss": 0.41796875 + }, + { + "epoch": 0.43, + "learning_rate": 6.150209159230769e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 5127, + "text_loss": 0.392578125 + }, + { + "epoch": 0.43, + "learning_rate": 6.148938617432794e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 5128, + "text_loss": 0.75390625 + }, + { + "epoch": 0.43, + "learning_rate": 6.147667997306121e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 5129, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.14639729893737e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 5130, + "text_loss": 0.44140625 + }, + { + "epoch": 0.43, + "learning_rate": 6.145126522413172e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 5131, + "text_loss": 0.61328125 + }, + { + "epoch": 0.43, + "learning_rate": 6.1438556678201625e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 5132, + "text_loss": 0.76953125 + }, + { + "epoch": 0.43, + "learning_rate": 6.142584735244981e-06, + "loss": 0.4548, + "regression_loss": 0.0, + "step": 5133, + "text_loss": 0.51171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.141313724774273e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 5134, + "text_loss": 0.349609375 + }, + { + "epoch": 0.43, + "learning_rate": 6.140042636494691e-06, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 5135, + "text_loss": 0.6484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.138771470492889e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 5136, + "text_loss": 0.474609375 + }, + { + "epoch": 0.43, + "learning_rate": 6.137500226855532e-06, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 5137, + "text_loss": 0.3671875 + }, + { + "epoch": 0.43, + "learning_rate": 6.136228905669282e-06, + "loss": 0.5964, + "regression_loss": 0.0, + "step": 5138, + "text_loss": 0.6171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.134957507020815e-06, + "loss": 0.5016, + "regression_loss": 0.0, + "step": 5139, + "text_loss": 0.56640625 + }, + { + "epoch": 0.43, + "learning_rate": 6.133686030996807e-06, + "loss": 0.6831, + "regression_loss": 0.0, + "step": 5140, + "text_loss": 0.73046875 + }, + { + "epoch": 0.43, + "learning_rate": 6.132414477683941e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 5141, + "text_loss": 0.53125 + }, + { + "epoch": 0.43, + "learning_rate": 6.131142847168904e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 5142, + "text_loss": 0.267578125 + }, + { + "epoch": 0.43, + "learning_rate": 6.12987113953839e-06, + "loss": 0.6228, + "regression_loss": 0.0, + "step": 5143, + "text_loss": 0.60546875 + }, + { + "epoch": 0.43, + "learning_rate": 6.1285993548790974e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 5144, + "text_loss": 0.71484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.12732749327773e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 5145, + "text_loss": 0.41796875 + }, + { + "epoch": 0.43, + "learning_rate": 6.126055554820997e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 5146, + "text_loss": 0.45703125 + }, + { + "epoch": 0.43, + "learning_rate": 6.12478353959561e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 5147, + "text_loss": 0.60546875 + }, + { + "epoch": 0.43, + "learning_rate": 6.123511447688293e-06, + "loss": 0.6396, + "regression_loss": 0.0, + "step": 5148, + "text_loss": 0.6640625 + }, + { + "epoch": 0.43, + "learning_rate": 6.1222392791857665e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 5149, + "text_loss": 0.294921875 + }, + { + "epoch": 0.43, + "learning_rate": 6.120967034174763e-06, + "loss": 0.6245, + "regression_loss": 0.0, + "step": 5150, + "text_loss": 0.8046875 + }, + { + "epoch": 0.43, + "learning_rate": 6.119694712742018e-06, + "loss": 0.5382, + "regression_loss": 0.0, + "step": 5151, + "text_loss": 0.6953125 + }, + { + "epoch": 0.43, + "learning_rate": 6.118422314974269e-06, + "loss": 0.5399, + "regression_loss": 0.0, + "step": 5152, + "text_loss": 0.671875 + }, + { + "epoch": 0.43, + "learning_rate": 6.117149840958263e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 5153, + "text_loss": 0.8046875 + }, + { + "epoch": 0.43, + "learning_rate": 6.1158772907807515e-06, + "loss": 0.4894, + "regression_loss": 0.0, + "step": 5154, + "text_loss": 0.7734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.11460466452849e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 5155, + "text_loss": 0.51953125 + }, + { + "epoch": 0.43, + "learning_rate": 6.113331962288238e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 5156, + "text_loss": 0.5390625 + }, + { + "epoch": 0.43, + "learning_rate": 6.112059184146765e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 5157, + "text_loss": 0.6015625 + }, + { + "epoch": 0.43, + "learning_rate": 6.110786330190838e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 5158, + "text_loss": 0.314453125 + }, + { + "epoch": 0.43, + "learning_rate": 6.109513400507237e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 5159, + "text_loss": 0.63671875 + }, + { + "epoch": 0.43, + "learning_rate": 6.108240395182744e-06, + "loss": 0.5265, + "regression_loss": 0.0, + "step": 5160, + "text_loss": 0.6171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.106967314304144e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 5161, + "text_loss": 0.32421875 + }, + { + "epoch": 0.43, + "learning_rate": 6.1056941579582294e-06, + "loss": 0.4677, + "regression_loss": 0.0, + "step": 5162, + "text_loss": 0.330078125 + }, + { + "epoch": 0.43, + "learning_rate": 6.1044209262317984e-06, + "loss": 0.6768, + "regression_loss": 0.0, + "step": 5163, + "text_loss": 0.6484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.1031476192116525e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 5164, + "text_loss": 0.240234375 + }, + { + "epoch": 0.43, + "learning_rate": 6.1018742369846e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 5165, + "text_loss": 0.4921875 + }, + { + "epoch": 0.43, + "learning_rate": 6.1006007796374525e-06, + "loss": 0.6643, + "regression_loss": 0.0, + "step": 5166, + "text_loss": 0.4765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.0993272472570275e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 5167, + "text_loss": 0.59765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.098053639930151e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 5168, + "text_loss": 0.55078125 + }, + { + "epoch": 0.43, + "learning_rate": 6.096779957743646e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 5169, + "text_loss": 0.365234375 + }, + { + "epoch": 0.43, + "learning_rate": 6.095506200784349e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 5170, + "text_loss": 0.451171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.094232369139099e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 5171, + "text_loss": 0.4140625 + }, + { + "epoch": 0.43, + "learning_rate": 6.0929584628947355e-06, + "loss": 0.667, + "regression_loss": 0.0, + "step": 5172, + "text_loss": 0.76171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.0916844821381094e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 5173, + "text_loss": 0.51953125 + }, + { + "epoch": 0.43, + "learning_rate": 6.0904104269560745e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 5174, + "text_loss": 0.46875 + }, + { + "epoch": 0.43, + "learning_rate": 6.089136297435488e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 5175, + "text_loss": 0.5234375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0878620936632135e-06, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 5176, + "text_loss": 0.296875 + }, + { + "epoch": 0.43, + "learning_rate": 6.086587815726121e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 5177, + "text_loss": 0.7734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0853134637110835e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 5178, + "text_loss": 0.4765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.084039037704979e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 5179, + "text_loss": 0.640625 + }, + { + "epoch": 0.43, + "learning_rate": 6.082764537794692e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 5180, + "text_loss": 0.53515625 + }, + { + "epoch": 0.43, + "learning_rate": 6.081489964067111e-06, + "loss": 0.5962, + "regression_loss": 0.0, + "step": 5181, + "text_loss": 0.416015625 + }, + { + "epoch": 0.43, + "learning_rate": 6.08021531660913e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 5182, + "text_loss": 0.515625 + }, + { + "epoch": 0.43, + "learning_rate": 6.078940595507648e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 5183, + "text_loss": 0.49609375 + }, + { + "epoch": 0.43, + "learning_rate": 6.077665800849568e-06, + "loss": 0.6565, + "regression_loss": 0.0, + "step": 5184, + "text_loss": 0.7734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.076390932721802e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 5185, + "text_loss": 0.447265625 + }, + { + "epoch": 0.43, + "learning_rate": 6.075115991211259e-06, + "loss": 0.4368, + "regression_loss": 0.0, + "step": 5186, + "text_loss": 0.7265625 + }, + { + "epoch": 0.43, + "learning_rate": 6.073840976404861e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 5187, + "text_loss": 0.59765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.0725658883895335e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 5188, + "text_loss": 0.5234375 + }, + { + "epoch": 0.43, + "learning_rate": 6.071290727252201e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 5189, + "text_loss": 0.59765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.0700154930798e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 5190, + "text_loss": 0.4609375 + }, + { + "epoch": 0.43, + "learning_rate": 6.068740185959271e-06, + "loss": 0.6406, + "regression_loss": 0.0, + "step": 5191, + "text_loss": 0.5 + }, + { + "epoch": 0.43, + "learning_rate": 6.067464805977554e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 5192, + "text_loss": 0.490234375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0661893532216e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 5193, + "text_loss": 0.330078125 + }, + { + "epoch": 0.43, + "learning_rate": 6.064913827778363e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 5194, + "text_loss": 0.6484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0636382297348014e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 5195, + "text_loss": 0.6171875 + }, + { + "epoch": 0.43, + "learning_rate": 6.06236255917788e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 5196, + "text_loss": 0.59765625 + }, + { + "epoch": 0.43, + "learning_rate": 6.061086816194564e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 5197, + "text_loss": 0.5546875 + }, + { + "epoch": 0.43, + "learning_rate": 6.05981100087183e-06, + "loss": 0.5052, + "regression_loss": 0.0, + "step": 5198, + "text_loss": 0.69921875 + }, + { + "epoch": 0.43, + "learning_rate": 6.0585351132966575e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 5199, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.057259153556026e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 5200, + "text_loss": 0.263671875 + }, + { + "epoch": 0.43, + "learning_rate": 6.055983121736928e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 5201, + "text_loss": 0.51953125 + }, + { + "epoch": 0.43, + "learning_rate": 6.0547070179263535e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 5202, + "text_loss": 0.578125 + }, + { + "epoch": 0.43, + "learning_rate": 6.053430842211302e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 5203, + "text_loss": 0.3203125 + }, + { + "epoch": 0.43, + "learning_rate": 6.052154594678777e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 5204, + "text_loss": 0.546875 + }, + { + "epoch": 0.43, + "learning_rate": 6.050878275415786e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 5205, + "text_loss": 0.53125 + }, + { + "epoch": 0.43, + "learning_rate": 6.04960188450934e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 5206, + "text_loss": 0.67578125 + }, + { + "epoch": 0.43, + "learning_rate": 6.04832542204646e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 5207, + "text_loss": 0.25390625 + }, + { + "epoch": 0.43, + "learning_rate": 6.0470488881141675e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 5208, + "text_loss": 0.462890625 + }, + { + "epoch": 0.43, + "learning_rate": 6.045772282799489e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 5209, + "text_loss": 0.56640625 + }, + { + "epoch": 0.43, + "learning_rate": 6.044495606189459e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 5210, + "text_loss": 0.2578125 + }, + { + "epoch": 0.43, + "learning_rate": 6.043218858371112e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 5211, + "text_loss": 0.427734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.04194203943149e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 5212, + "text_loss": 0.64453125 + }, + { + "epoch": 0.43, + "learning_rate": 6.040665149457643e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 5213, + "text_loss": 0.91796875 + }, + { + "epoch": 0.43, + "learning_rate": 6.03938818853662e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 5214, + "text_loss": 0.6875 + }, + { + "epoch": 0.43, + "learning_rate": 6.03811115675548e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 5215, + "text_loss": 0.64453125 + }, + { + "epoch": 0.43, + "learning_rate": 6.036834054201283e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 5216, + "text_loss": 0.53125 + }, + { + "epoch": 0.43, + "learning_rate": 6.035556880961093e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 5217, + "text_loss": 0.6875 + }, + { + "epoch": 0.43, + "learning_rate": 6.034279637121986e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 5218, + "text_loss": 0.6484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.033002322771033e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 5219, + "text_loss": 0.48828125 + }, + { + "epoch": 0.43, + "learning_rate": 6.031724937995318e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5220, + "text_loss": 0.54296875 + }, + { + "epoch": 0.43, + "learning_rate": 6.030447482881926e-06, + "loss": 0.6641, + "regression_loss": 0.0, + "step": 5221, + "text_loss": 0.79296875 + }, + { + "epoch": 0.43, + "learning_rate": 6.029169957517946e-06, + "loss": 0.6292, + "regression_loss": 0.0, + "step": 5222, + "text_loss": 0.9296875 + }, + { + "epoch": 0.43, + "learning_rate": 6.027892361990474e-06, + "loss": 0.4838, + "regression_loss": 0.0, + "step": 5223, + "text_loss": 0.4921875 + }, + { + "epoch": 0.43, + "learning_rate": 6.02661469638661e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 5224, + "text_loss": 0.875 + }, + { + "epoch": 0.43, + "learning_rate": 6.025336960793457e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 5225, + "text_loss": 0.271484375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0240591552981275e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 5226, + "text_loss": 0.408203125 + }, + { + "epoch": 0.43, + "learning_rate": 6.022781279987734e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 5227, + "text_loss": 0.55859375 + }, + { + "epoch": 0.43, + "learning_rate": 6.021503334949394e-06, + "loss": 0.3961, + "regression_loss": 0.0, + "step": 5228, + "text_loss": 0.5625 + }, + { + "epoch": 0.43, + "learning_rate": 6.020225320270232e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 5229, + "text_loss": 0.52734375 + }, + { + "epoch": 0.43, + "learning_rate": 6.0189472360373795e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 5230, + "text_loss": 0.43359375 + }, + { + "epoch": 0.43, + "learning_rate": 6.017669082337965e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 5231, + "text_loss": 0.4375 + }, + { + "epoch": 0.43, + "learning_rate": 6.016390859259129e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 5232, + "text_loss": 0.66796875 + }, + { + "epoch": 0.43, + "learning_rate": 6.015112566888015e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 5233, + "text_loss": 0.4296875 + }, + { + "epoch": 0.44, + "learning_rate": 6.013834205311767e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 5234, + "text_loss": 0.265625 + }, + { + "epoch": 0.44, + "learning_rate": 6.012555774617541e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 5235, + "text_loss": 0.6171875 + }, + { + "epoch": 0.44, + "learning_rate": 6.0112772748924905e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 5236, + "text_loss": 0.376953125 + }, + { + "epoch": 0.44, + "learning_rate": 6.0099987062237796e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 5237, + "text_loss": 0.310546875 + }, + { + "epoch": 0.44, + "learning_rate": 6.008720068698574e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 5238, + "text_loss": 0.734375 + }, + { + "epoch": 0.44, + "learning_rate": 6.0074413624040425e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 5239, + "text_loss": 0.515625 + }, + { + "epoch": 0.44, + "learning_rate": 6.006162587427362e-06, + "loss": 0.6189, + "regression_loss": 0.0, + "step": 5240, + "text_loss": 0.451171875 + }, + { + "epoch": 0.44, + "learning_rate": 6.004883743855714e-06, + "loss": 0.4404, + "regression_loss": 0.0, + "step": 5241, + "text_loss": 0.41796875 + }, + { + "epoch": 0.44, + "learning_rate": 6.003604831776282e-06, + "loss": 0.3762, + "regression_loss": 0.0, + "step": 5242, + "text_loss": 0.421875 + }, + { + "epoch": 0.44, + "learning_rate": 6.002325851276256e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 5243, + "text_loss": 0.33984375 + }, + { + "epoch": 0.44, + "learning_rate": 6.0010468024428305e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 5244, + "text_loss": 0.416015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.9997676853632045e-06, + "loss": 0.6125, + "regression_loss": 0.0, + "step": 5245, + "text_loss": 0.67578125 + }, + { + "epoch": 0.44, + "learning_rate": 5.998488500124582e-06, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 5246, + "text_loss": 0.376953125 + }, + { + "epoch": 0.44, + "learning_rate": 5.9972092468141696e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 5247, + "text_loss": 0.53125 + }, + { + "epoch": 0.44, + "learning_rate": 5.995929925519181e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5248, + "text_loss": 0.57421875 + }, + { + "epoch": 0.44, + "learning_rate": 5.994650536326835e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 5249, + "text_loss": 0.427734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.993371079324352e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 5250, + "text_loss": 0.337890625 + }, + { + "epoch": 0.44, + "learning_rate": 5.992091554598958e-06, + "loss": 0.62, + "regression_loss": 0.0, + "step": 5251, + "text_loss": 0.60546875 + }, + { + "epoch": 0.44, + "learning_rate": 5.990811962237888e-06, + "loss": 0.6394, + "regression_loss": 0.0, + "step": 5252, + "text_loss": 1.1171875 + }, + { + "epoch": 0.44, + "learning_rate": 5.989532302328375e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 5253, + "text_loss": 0.671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9882525749576606e-06, + "loss": 0.4576, + "regression_loss": 0.0, + "step": 5254, + "text_loss": 0.51953125 + }, + { + "epoch": 0.44, + "learning_rate": 5.986972780212989e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 5255, + "text_loss": 0.416015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.9856929181816116e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 5256, + "text_loss": 0.73046875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9844129889507826e-06, + "loss": 0.6489, + "regression_loss": 0.0, + "step": 5257, + "text_loss": 0.671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9831329926077595e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 5258, + "text_loss": 0.421875 + }, + { + "epoch": 0.44, + "learning_rate": 5.981852929239806e-06, + "loss": 0.6061, + "regression_loss": 0.0, + "step": 5259, + "text_loss": 0.40234375 + }, + { + "epoch": 0.44, + "learning_rate": 5.980572798934193e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 5260, + "text_loss": 0.55078125 + }, + { + "epoch": 0.44, + "learning_rate": 5.97929260177819e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 5261, + "text_loss": 0.55078125 + }, + { + "epoch": 0.44, + "learning_rate": 5.978012337859075e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 5262, + "text_loss": 0.5390625 + }, + { + "epoch": 0.44, + "learning_rate": 5.976732007264131e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 5263, + "text_loss": 0.61328125 + }, + { + "epoch": 0.44, + "learning_rate": 5.975451610080643e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 5264, + "text_loss": 0.44921875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9741711463959e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 5265, + "text_loss": 0.279296875 + }, + { + "epoch": 0.44, + "learning_rate": 5.972890616297203e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 5266, + "text_loss": 0.50390625 + }, + { + "epoch": 0.44, + "learning_rate": 5.971610019871847e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 5267, + "text_loss": 0.62109375 + }, + { + "epoch": 0.44, + "learning_rate": 5.9703293572071384e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 5268, + "text_loss": 0.734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.969048628390386e-06, + "loss": 0.595, + "regression_loss": 0.0, + "step": 5269, + "text_loss": 0.65234375 + }, + { + "epoch": 0.44, + "learning_rate": 5.967767833508903e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 5270, + "text_loss": 0.6015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.966486972650007e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 5271, + "text_loss": 0.7109375 + }, + { + "epoch": 0.44, + "learning_rate": 5.965206045901022e-06, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 5272, + "text_loss": 0.328125 + }, + { + "epoch": 0.44, + "learning_rate": 5.963925053349273e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 5273, + "text_loss": 0.6328125 + }, + { + "epoch": 0.44, + "learning_rate": 5.9626439950820935e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 5274, + "text_loss": 0.388671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.961362871186818e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 5275, + "text_loss": 0.365234375 + }, + { + "epoch": 0.44, + "learning_rate": 5.960081681750785e-06, + "loss": 0.4528, + "regression_loss": 0.0, + "step": 5276, + "text_loss": 0.2734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.958800426861345e-06, + "loss": 0.3882, + "regression_loss": 0.0, + "step": 5277, + "text_loss": 0.29296875 + }, + { + "epoch": 0.44, + "learning_rate": 5.957519106605843e-06, + "loss": 0.5837, + "regression_loss": 0.0, + "step": 5278, + "text_loss": 0.65625 + }, + { + "epoch": 0.44, + "learning_rate": 5.956237721071632e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 5279, + "text_loss": 0.421875 + }, + { + "epoch": 0.44, + "learning_rate": 5.954956270346074e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 5280, + "text_loss": 0.375 + }, + { + "epoch": 0.44, + "learning_rate": 5.953674754516528e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 5281, + "text_loss": 0.609375 + }, + { + "epoch": 0.44, + "learning_rate": 5.9523931736703636e-06, + "loss": 0.5074, + "regression_loss": 0.0, + "step": 5282, + "text_loss": 0.337890625 + }, + { + "epoch": 0.44, + "learning_rate": 5.951111527894953e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 5283, + "text_loss": 0.62890625 + }, + { + "epoch": 0.44, + "learning_rate": 5.9498298172776695e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 5284, + "text_loss": 0.35546875 + }, + { + "epoch": 0.44, + "learning_rate": 5.948548041905894e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 5285, + "text_loss": 0.396484375 + }, + { + "epoch": 0.44, + "learning_rate": 5.947266201867014e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 5286, + "text_loss": 0.58984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.945984297248415e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 5287, + "text_loss": 0.458984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.944702328137493e-06, + "loss": 0.668, + "regression_loss": 0.0, + "step": 5288, + "text_loss": 0.60546875 + }, + { + "epoch": 0.44, + "learning_rate": 5.943420294621645e-06, + "loss": 0.4114, + "regression_loss": 0.0, + "step": 5289, + "text_loss": 0.45703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.942138196788274e-06, + "loss": 0.6179, + "regression_loss": 0.0, + "step": 5290, + "text_loss": 0.8828125 + }, + { + "epoch": 0.44, + "learning_rate": 5.940856034724789e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 5291, + "text_loss": 0.314453125 + }, + { + "epoch": 0.44, + "learning_rate": 5.939573808518597e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 5292, + "text_loss": 0.578125 + }, + { + "epoch": 0.44, + "learning_rate": 5.9382915182571155e-06, + "loss": 0.4326, + "regression_loss": 0.0, + "step": 5293, + "text_loss": 0.41015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.937009164027765e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 5294, + "text_loss": 0.6171875 + }, + { + "epoch": 0.44, + "learning_rate": 5.935726745917969e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 5295, + "text_loss": 0.28125 + }, + { + "epoch": 0.44, + "learning_rate": 5.934444264015157e-06, + "loss": 0.614, + "regression_loss": 0.0, + "step": 5296, + "text_loss": 0.78515625 + }, + { + "epoch": 0.44, + "learning_rate": 5.93316171840676e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 5297, + "text_loss": 0.6015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.9318791091802175e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 5298, + "text_loss": 0.427734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.930596436422971e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 5299, + "text_loss": 0.44921875 + }, + { + "epoch": 0.44, + "learning_rate": 5.929313700222466e-06, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 5300, + "text_loss": 0.58984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.9280309006661504e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 5301, + "text_loss": 0.859375 + }, + { + "epoch": 0.44, + "learning_rate": 5.926748037841483e-06, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 5302, + "text_loss": 0.59765625 + }, + { + "epoch": 0.44, + "learning_rate": 5.925465111835921e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 5303, + "text_loss": 0.546875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9241821227369256e-06, + "loss": 0.6418, + "regression_loss": 0.0, + "step": 5304, + "text_loss": 0.7734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.922899070631969e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 5305, + "text_loss": 0.353515625 + }, + { + "epoch": 0.44, + "learning_rate": 5.921615955608519e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 5306, + "text_loss": 0.26953125 + }, + { + "epoch": 0.44, + "learning_rate": 5.920332777754053e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 5307, + "text_loss": 0.2734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.919049537156053e-06, + "loss": 0.6709, + "regression_loss": 0.0, + "step": 5308, + "text_loss": 0.70703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.9177662339020004e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 5309, + "text_loss": 0.54296875 + }, + { + "epoch": 0.44, + "learning_rate": 5.9164828680793874e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 5310, + "text_loss": 0.4375 + }, + { + "epoch": 0.44, + "learning_rate": 5.915199439775706e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 5311, + "text_loss": 0.458984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.913915949078453e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 5312, + "text_loss": 0.9765625 + }, + { + "epoch": 0.44, + "learning_rate": 5.912632396075131e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 5313, + "text_loss": 0.51171875 + }, + { + "epoch": 0.44, + "learning_rate": 5.911348780853246e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 5314, + "text_loss": 0.59375 + }, + { + "epoch": 0.44, + "learning_rate": 5.910065103500307e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 5315, + "text_loss": 0.72265625 + }, + { + "epoch": 0.44, + "learning_rate": 5.908781364103832e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 5316, + "text_loss": 0.46484375 + }, + { + "epoch": 0.44, + "learning_rate": 5.907497562751335e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 5317, + "text_loss": 0.27734375 + }, + { + "epoch": 0.44, + "learning_rate": 5.906213699530342e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 5318, + "text_loss": 0.62890625 + }, + { + "epoch": 0.44, + "learning_rate": 5.904929774528381e-06, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 5319, + "text_loss": 0.25390625 + }, + { + "epoch": 0.44, + "learning_rate": 5.90364578783298e-06, + "loss": 0.6206, + "regression_loss": 0.0, + "step": 5320, + "text_loss": 0.578125 + }, + { + "epoch": 0.44, + "learning_rate": 5.9023617395316765e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 5321, + "text_loss": 0.396484375 + }, + { + "epoch": 0.44, + "learning_rate": 5.901077629712011e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 5322, + "text_loss": 0.453125 + }, + { + "epoch": 0.44, + "learning_rate": 5.899793458461526e-06, + "loss": 0.6653, + "regression_loss": 0.0, + "step": 5323, + "text_loss": 0.71875 + }, + { + "epoch": 0.44, + "learning_rate": 5.89850922586777e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 5324, + "text_loss": 0.55078125 + }, + { + "epoch": 0.44, + "learning_rate": 5.897224932018297e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 5325, + "text_loss": 0.4921875 + }, + { + "epoch": 0.44, + "learning_rate": 5.8959405770006606e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 5326, + "text_loss": 0.67578125 + }, + { + "epoch": 0.44, + "learning_rate": 5.894656160902424e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 5327, + "text_loss": 0.458984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.893371683811151e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 5328, + "text_loss": 0.26171875 + }, + { + "epoch": 0.44, + "learning_rate": 5.8920871458144105e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 5329, + "text_loss": 0.58984375 + }, + { + "epoch": 0.44, + "learning_rate": 5.890802546999777e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 5330, + "text_loss": 0.3671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.889517887454826e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 5331, + "text_loss": 0.58203125 + }, + { + "epoch": 0.44, + "learning_rate": 5.888233167267138e-06, + "loss": 0.5789, + "regression_loss": 0.0, + "step": 5332, + "text_loss": 0.4453125 + }, + { + "epoch": 0.44, + "learning_rate": 5.886948386524303e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 5333, + "text_loss": 0.3046875 + }, + { + "epoch": 0.44, + "learning_rate": 5.885663545313906e-06, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 5334, + "text_loss": 0.4140625 + }, + { + "epoch": 0.44, + "learning_rate": 5.884378643723543e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 5335, + "text_loss": 0.6640625 + }, + { + "epoch": 0.44, + "learning_rate": 5.883093681840811e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 5336, + "text_loss": 0.345703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.881808659753314e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 5337, + "text_loss": 0.69140625 + }, + { + "epoch": 0.44, + "learning_rate": 5.880523577548658e-06, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 5338, + "text_loss": 0.703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.87923843531445e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 5339, + "text_loss": 0.37890625 + }, + { + "epoch": 0.44, + "learning_rate": 5.877953233138308e-06, + "loss": 0.5214, + "regression_loss": 0.0, + "step": 5340, + "text_loss": 0.53515625 + }, + { + "epoch": 0.44, + "learning_rate": 5.87666797110785e-06, + "loss": 0.4567, + "regression_loss": 0.0, + "step": 5341, + "text_loss": 0.46484375 + }, + { + "epoch": 0.44, + "learning_rate": 5.875382649310696e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 5342, + "text_loss": 0.5234375 + }, + { + "epoch": 0.44, + "learning_rate": 5.8740972678344746e-06, + "loss": 0.6418, + "regression_loss": 0.0, + "step": 5343, + "text_loss": 0.75390625 + }, + { + "epoch": 0.44, + "learning_rate": 5.872811826766817e-06, + "loss": 0.6494, + "regression_loss": 0.0, + "step": 5344, + "text_loss": 0.703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.871526326195355e-06, + "loss": 0.578, + "regression_loss": 0.0, + "step": 5345, + "text_loss": 0.2060546875 + }, + { + "epoch": 0.44, + "learning_rate": 5.870240766207731e-06, + "loss": 0.6592, + "regression_loss": 0.0, + "step": 5346, + "text_loss": 0.41015625 + }, + { + "epoch": 0.44, + "learning_rate": 5.868955146891586e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 5347, + "text_loss": 0.38671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.867669468334567e-06, + "loss": 0.5911, + "regression_loss": 0.0, + "step": 5348, + "text_loss": 0.314453125 + }, + { + "epoch": 0.44, + "learning_rate": 5.866383730624325e-06, + "loss": 0.434, + "regression_loss": 0.0, + "step": 5349, + "text_loss": 0.57421875 + }, + { + "epoch": 0.44, + "learning_rate": 5.865097933848514e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 5350, + "text_loss": 0.38671875 + }, + { + "epoch": 0.44, + "learning_rate": 5.863812078094795e-06, + "loss": 0.4103, + "regression_loss": 0.0, + "step": 5351, + "text_loss": 0.22265625 + }, + { + "epoch": 0.44, + "learning_rate": 5.862526163450831e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 5352, + "text_loss": 0.345703125 + }, + { + "epoch": 0.44, + "learning_rate": 5.861240190004287e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 5353, + "text_loss": 0.361328125 + }, + { + "epoch": 0.44, + "learning_rate": 5.8599541578428345e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 5354, + "text_loss": 0.58984375 + }, + { + "epoch": 0.45, + "learning_rate": 5.85866806705415e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 5355, + "text_loss": 0.3828125 + }, + { + "epoch": 0.45, + "learning_rate": 5.85738191772591e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 5356, + "text_loss": 0.36328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.8560957099458005e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 5357, + "text_loss": 0.37109375 + }, + { + "epoch": 0.45, + "learning_rate": 5.8548094438015065e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 5358, + "text_loss": 0.36328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.853523119380721e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 5359, + "text_loss": 0.267578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.852236736771135e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 5360, + "text_loss": 0.470703125 + }, + { + "epoch": 0.45, + "learning_rate": 5.850950296060452e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 5361, + "text_loss": 0.341796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.849663797336372e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 5362, + "text_loss": 0.67578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.848377240686603e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 5363, + "text_loss": 0.33984375 + }, + { + "epoch": 0.45, + "learning_rate": 5.847090626198856e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 5364, + "text_loss": 0.384765625 + }, + { + "epoch": 0.45, + "learning_rate": 5.845803953960845e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 5365, + "text_loss": 0.60546875 + }, + { + "epoch": 0.45, + "learning_rate": 5.844517224060288e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 5366, + "text_loss": 0.54296875 + }, + { + "epoch": 0.45, + "learning_rate": 5.843230436584909e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 5367, + "text_loss": 0.72265625 + }, + { + "epoch": 0.45, + "learning_rate": 5.841943591622435e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 5368, + "text_loss": 0.73828125 + }, + { + "epoch": 0.45, + "learning_rate": 5.8406566892605945e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 5369, + "text_loss": 0.5078125 + }, + { + "epoch": 0.45, + "learning_rate": 5.839369729587122e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 5370, + "text_loss": 0.5078125 + }, + { + "epoch": 0.45, + "learning_rate": 5.838082712689757e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 5371, + "text_loss": 0.484375 + }, + { + "epoch": 0.45, + "learning_rate": 5.8367956386562415e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 5372, + "text_loss": 0.59765625 + }, + { + "epoch": 0.45, + "learning_rate": 5.835508507574321e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 5373, + "text_loss": 0.302734375 + }, + { + "epoch": 0.45, + "learning_rate": 5.834221319531745e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 5374, + "text_loss": 0.515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.832934074616271e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 5375, + "text_loss": 0.57421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.831646772915651e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 5376, + "text_loss": 0.67578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.83035941451765e-06, + "loss": 0.6445, + "regression_loss": 0.0, + "step": 5377, + "text_loss": 0.34375 + }, + { + "epoch": 0.45, + "learning_rate": 5.8290719995100345e-06, + "loss": 0.6675, + "regression_loss": 0.0, + "step": 5378, + "text_loss": 0.7421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.82778452798057e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 5379, + "text_loss": 0.7421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.8264970000170315e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 5380, + "text_loss": 0.28515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.8252094157071985e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 5381, + "text_loss": 0.65625 + }, + { + "epoch": 0.45, + "learning_rate": 5.823921775138847e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 5382, + "text_loss": 0.62890625 + }, + { + "epoch": 0.45, + "learning_rate": 5.822634078399766e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 5383, + "text_loss": 0.546875 + }, + { + "epoch": 0.45, + "learning_rate": 5.8213463255777416e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 5384, + "text_loss": 0.50390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.820058516760567e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 5385, + "text_loss": 0.54296875 + }, + { + "epoch": 0.45, + "learning_rate": 5.818770652036039e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 5386, + "text_loss": 0.41015625 + }, + { + "epoch": 0.45, + "learning_rate": 5.817482731491956e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 5387, + "text_loss": 0.36328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.816194755216122e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 5388, + "text_loss": 0.357421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.814906723296346e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 5389, + "text_loss": 0.66796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.813618635820438e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 5390, + "text_loss": 0.7421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.812330492876214e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 5391, + "text_loss": 0.43359375 + }, + { + "epoch": 0.45, + "learning_rate": 5.811042294551493e-06, + "loss": 0.6191, + "regression_loss": 0.0, + "step": 5392, + "text_loss": 0.73828125 + }, + { + "epoch": 0.45, + "learning_rate": 5.809754040934097e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 5393, + "text_loss": 0.400390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.808465732111853e-06, + "loss": 0.4163, + "regression_loss": 0.0, + "step": 5394, + "text_loss": 0.3515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.807177368172591e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 5395, + "text_loss": 0.43359375 + }, + { + "epoch": 0.45, + "learning_rate": 5.805888949204144e-06, + "loss": 0.3789, + "regression_loss": 0.0, + "step": 5396, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.804600475294352e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 5397, + "text_loss": 0.494140625 + }, + { + "epoch": 0.45, + "learning_rate": 5.803311946531054e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 5398, + "text_loss": 0.50390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.802023363002096e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 5399, + "text_loss": 0.578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.80073472479533e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 5400, + "text_loss": 0.69140625 + }, + { + "epoch": 0.45, + "learning_rate": 5.799446031998603e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 5401, + "text_loss": 0.53125 + }, + { + "epoch": 0.45, + "learning_rate": 5.7981572846997766e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 5402, + "text_loss": 0.60546875 + }, + { + "epoch": 0.45, + "learning_rate": 5.79686848298671e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 5403, + "text_loss": 0.8828125 + }, + { + "epoch": 0.45, + "learning_rate": 5.795579626947263e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 5404, + "text_loss": 0.515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.794290716669307e-06, + "loss": 0.505, + "regression_loss": 0.0, + "step": 5405, + "text_loss": 0.55859375 + }, + { + "epoch": 0.45, + "learning_rate": 5.793001752240715e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 5406, + "text_loss": 0.80078125 + }, + { + "epoch": 0.45, + "learning_rate": 5.791712733749356e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 5407, + "text_loss": 0.66796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.790423661283112e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 5408, + "text_loss": 0.462890625 + }, + { + "epoch": 0.45, + "learning_rate": 5.789134534929868e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 5409, + "text_loss": 0.41796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.787845354777504e-06, + "loss": 0.6462, + "regression_loss": 0.0, + "step": 5410, + "text_loss": 0.69921875 + }, + { + "epoch": 0.45, + "learning_rate": 5.786556120913915e-06, + "loss": 0.5077, + "regression_loss": 0.0, + "step": 5411, + "text_loss": 0.416015625 + }, + { + "epoch": 0.45, + "learning_rate": 5.785266833426992e-06, + "loss": 0.6575, + "regression_loss": 0.0, + "step": 5412, + "text_loss": 0.44921875 + }, + { + "epoch": 0.45, + "learning_rate": 5.78397749240463e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 5413, + "text_loss": 0.69921875 + }, + { + "epoch": 0.45, + "learning_rate": 5.782688097934735e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 5414, + "text_loss": 0.50390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.781398650105204e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 5415, + "text_loss": 0.7265625 + }, + { + "epoch": 0.45, + "learning_rate": 5.780109149003951e-06, + "loss": 0.6775, + "regression_loss": 0.0, + "step": 5416, + "text_loss": 0.7109375 + }, + { + "epoch": 0.45, + "learning_rate": 5.778819594718886e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 5417, + "text_loss": 0.486328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.777529987337923e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 5418, + "text_loss": 0.78515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.776240326948981e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 5419, + "text_loss": 0.63671875 + }, + { + "epoch": 0.45, + "learning_rate": 5.7749506136399834e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 5420, + "text_loss": 0.62890625 + }, + { + "epoch": 0.45, + "learning_rate": 5.773660847498854e-06, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 5421, + "text_loss": 0.392578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.772371028613525e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 5422, + "text_loss": 0.431640625 + }, + { + "epoch": 0.45, + "learning_rate": 5.771081157071928e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 5423, + "text_loss": 0.38671875 + }, + { + "epoch": 0.45, + "learning_rate": 5.769791232962001e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 5424, + "text_loss": 0.55078125 + }, + { + "epoch": 0.45, + "learning_rate": 5.768501256371683e-06, + "loss": 0.7068, + "regression_loss": 0.0, + "step": 5425, + "text_loss": 1.0859375 + }, + { + "epoch": 0.45, + "learning_rate": 5.767211227388918e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 5426, + "text_loss": 0.5625 + }, + { + "epoch": 0.45, + "learning_rate": 5.765921146101655e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 5427, + "text_loss": 0.390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.764631012597844e-06, + "loss": 0.6069, + "regression_loss": 0.0, + "step": 5428, + "text_loss": 0.546875 + }, + { + "epoch": 0.45, + "learning_rate": 5.763340826965438e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 5429, + "text_loss": 0.50390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.762050589292398e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 5430, + "text_loss": 0.515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.760760299666687e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 5431, + "text_loss": 0.52734375 + }, + { + "epoch": 0.45, + "learning_rate": 5.759469958176264e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 5432, + "text_loss": 0.78515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.758179564909105e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 5433, + "text_loss": 0.9375 + }, + { + "epoch": 0.45, + "learning_rate": 5.756889119953179e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 5434, + "text_loss": 0.51953125 + }, + { + "epoch": 0.45, + "learning_rate": 5.75559862339646e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 5435, + "text_loss": 0.74609375 + }, + { + "epoch": 0.45, + "learning_rate": 5.754308075326932e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 5436, + "text_loss": 0.4765625 + }, + { + "epoch": 0.45, + "learning_rate": 5.753017475832573e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 5437, + "text_loss": 0.4140625 + }, + { + "epoch": 0.45, + "learning_rate": 5.751726825001373e-06, + "loss": 0.3699, + "regression_loss": 0.0, + "step": 5438, + "text_loss": 0.255859375 + }, + { + "epoch": 0.45, + "learning_rate": 5.75043612292132e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 5439, + "text_loss": 0.65625 + }, + { + "epoch": 0.45, + "learning_rate": 5.7491453696804075e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 5440, + "text_loss": 0.30859375 + }, + { + "epoch": 0.45, + "learning_rate": 5.747854565366634e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 5441, + "text_loss": 0.578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.746563710067999e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 5442, + "text_loss": 0.70703125 + }, + { + "epoch": 0.45, + "learning_rate": 5.745272803872506e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 5443, + "text_loss": 0.5234375 + }, + { + "epoch": 0.45, + "learning_rate": 5.7439818468681605e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 5444, + "text_loss": 0.33203125 + }, + { + "epoch": 0.45, + "learning_rate": 5.742690839142978e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 5445, + "text_loss": 0.396484375 + }, + { + "epoch": 0.45, + "learning_rate": 5.7413997807849686e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 5446, + "text_loss": 0.38671875 + }, + { + "epoch": 0.45, + "learning_rate": 5.740108671882152e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 5447, + "text_loss": 0.63671875 + }, + { + "epoch": 0.45, + "learning_rate": 5.738817512522549e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 5448, + "text_loss": 0.6328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.737526302794182e-06, + "loss": 0.3999, + "regression_loss": 0.0, + "step": 5449, + "text_loss": 0.48046875 + }, + { + "epoch": 0.45, + "learning_rate": 5.736235042785084e-06, + "loss": 0.4833, + "regression_loss": 0.0, + "step": 5450, + "text_loss": 0.466796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.73494373258328e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 5451, + "text_loss": 0.7109375 + }, + { + "epoch": 0.45, + "learning_rate": 5.733652372276809e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 5452, + "text_loss": 0.275390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.7323609619537105e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 5453, + "text_loss": 0.62109375 + }, + { + "epoch": 0.45, + "learning_rate": 5.731069501702022e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 5454, + "text_loss": 0.76171875 + }, + { + "epoch": 0.45, + "learning_rate": 5.729777991609791e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 5455, + "text_loss": 0.6328125 + }, + { + "epoch": 0.45, + "learning_rate": 5.728486431765067e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 5456, + "text_loss": 0.357421875 + }, + { + "epoch": 0.45, + "learning_rate": 5.727194822255899e-06, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 5457, + "text_loss": 0.3359375 + }, + { + "epoch": 0.45, + "learning_rate": 5.7259031631703445e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 5458, + "text_loss": 0.294921875 + }, + { + "epoch": 0.45, + "learning_rate": 5.7246114545964624e-06, + "loss": 0.6389, + "regression_loss": 0.0, + "step": 5459, + "text_loss": 0.41796875 + }, + { + "epoch": 0.45, + "learning_rate": 5.723319696622313e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 5460, + "text_loss": 0.48046875 + }, + { + "epoch": 0.45, + "learning_rate": 5.722027889335962e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 5461, + "text_loss": 0.78125 + }, + { + "epoch": 0.45, + "learning_rate": 5.720736032825481e-06, + "loss": 0.524, + "regression_loss": 0.0, + "step": 5462, + "text_loss": 0.7265625 + }, + { + "epoch": 0.45, + "learning_rate": 5.719444127178937e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 5463, + "text_loss": 0.6171875 + }, + { + "epoch": 0.45, + "learning_rate": 5.718152172484409e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 5464, + "text_loss": 0.375 + }, + { + "epoch": 0.45, + "learning_rate": 5.716860168829976e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 5465, + "text_loss": 0.76171875 + }, + { + "epoch": 0.45, + "learning_rate": 5.715568116303718e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 5466, + "text_loss": 0.3203125 + }, + { + "epoch": 0.45, + "learning_rate": 5.714276014993723e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 5467, + "text_loss": 0.443359375 + }, + { + "epoch": 0.45, + "learning_rate": 5.712983864988078e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 5468, + "text_loss": 0.44140625 + }, + { + "epoch": 0.45, + "learning_rate": 5.711691666374875e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 5469, + "text_loss": 0.515625 + }, + { + "epoch": 0.45, + "learning_rate": 5.71039941924221e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 5470, + "text_loss": 0.67578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.709107123678182e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 5471, + "text_loss": 0.5390625 + }, + { + "epoch": 0.45, + "learning_rate": 5.707814779770892e-06, + "loss": 0.6672, + "regression_loss": 0.0, + "step": 5472, + "text_loss": 0.87109375 + }, + { + "epoch": 0.45, + "learning_rate": 5.706522387608448e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 5473, + "text_loss": 0.42578125 + }, + { + "epoch": 0.45, + "learning_rate": 5.705229947278954e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 5474, + "text_loss": 0.55078125 + }, + { + "epoch": 0.46, + "learning_rate": 5.703937458870525e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 5475, + "text_loss": 0.3828125 + }, + { + "epoch": 0.46, + "learning_rate": 5.702644922471278e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 5476, + "text_loss": 0.671875 + }, + { + "epoch": 0.46, + "learning_rate": 5.701352338169327e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 5477, + "text_loss": 0.51171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.700059706052797e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 5478, + "text_loss": 0.41796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.698767026209813e-06, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 5479, + "text_loss": 0.5234375 + }, + { + "epoch": 0.46, + "learning_rate": 5.697474298728501e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 5480, + "text_loss": 0.63671875 + }, + { + "epoch": 0.46, + "learning_rate": 5.696181523696993e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 5481, + "text_loss": 0.421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.694888701203425e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 5482, + "text_loss": 0.6640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.693595831335934e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 5483, + "text_loss": 0.47265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.692302914182663e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 5484, + "text_loss": 0.45703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.691009949831754e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 5485, + "text_loss": 0.4296875 + }, + { + "epoch": 0.46, + "learning_rate": 5.689716938371355e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 5486, + "text_loss": 0.65234375 + }, + { + "epoch": 0.46, + "learning_rate": 5.688423879889619e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 5487, + "text_loss": 0.48046875 + }, + { + "epoch": 0.46, + "learning_rate": 5.687130774474697e-06, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 5488, + "text_loss": 0.453125 + }, + { + "epoch": 0.46, + "learning_rate": 5.685837622214749e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 5489, + "text_loss": 0.6171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.684544423197933e-06, + "loss": 0.6404, + "regression_loss": 0.0, + "step": 5490, + "text_loss": 0.76171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.683251177512415e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 5491, + "text_loss": 0.322265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.68195788524636e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 5492, + "text_loss": 0.47265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.680664546487937e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 5493, + "text_loss": 0.498046875 + }, + { + "epoch": 0.46, + "learning_rate": 5.679371161325323e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 5494, + "text_loss": 0.52734375 + }, + { + "epoch": 0.46, + "learning_rate": 5.6780777298466915e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 5495, + "text_loss": 0.72265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.676784252140222e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 5496, + "text_loss": 0.46484375 + }, + { + "epoch": 0.46, + "learning_rate": 5.675490728294098e-06, + "loss": 0.4148, + "regression_loss": 0.0, + "step": 5497, + "text_loss": 0.4296875 + }, + { + "epoch": 0.46, + "learning_rate": 5.674197158396505e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 5498, + "text_loss": 0.5703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.672903542535631e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 5499, + "text_loss": 0.73828125 + }, + { + "epoch": 0.46, + "learning_rate": 5.67160988079967e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 5500, + "text_loss": 0.41796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.670316173276816e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 5501, + "text_loss": 0.48828125 + }, + { + "epoch": 0.46, + "learning_rate": 5.669022420055268e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 5502, + "text_loss": 0.78515625 + }, + { + "epoch": 0.46, + "learning_rate": 5.667728621223225e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 5503, + "text_loss": 0.6171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.666434776868895e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 5504, + "text_loss": 0.51171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.665140887080483e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 5505, + "text_loss": 0.412109375 + }, + { + "epoch": 0.46, + "learning_rate": 5.6638469519462015e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 5506, + "text_loss": 0.48046875 + }, + { + "epoch": 0.46, + "learning_rate": 5.662552971554262e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 5507, + "text_loss": 0.66015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.661258945992884e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 5508, + "text_loss": 0.703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.659964875350288e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 5509, + "text_loss": 0.7421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.6586707597146926e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 5510, + "text_loss": 0.310546875 + }, + { + "epoch": 0.46, + "learning_rate": 5.657376599174328e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 5511, + "text_loss": 0.4609375 + }, + { + "epoch": 0.46, + "learning_rate": 5.656082393817422e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 5512, + "text_loss": 0.75 + }, + { + "epoch": 0.46, + "learning_rate": 5.654788143732207e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 5513, + "text_loss": 0.421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.653493849006918e-06, + "loss": 0.5869, + "regression_loss": 0.0, + "step": 5514, + "text_loss": 0.6796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.652199509729795e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 5515, + "text_loss": 0.90625 + }, + { + "epoch": 0.46, + "learning_rate": 5.6509051259890755e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 5516, + "text_loss": 0.3515625 + }, + { + "epoch": 0.46, + "learning_rate": 5.649610697873007e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 5517, + "text_loss": 0.6640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.648316225469838e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 5518, + "text_loss": 0.392578125 + }, + { + "epoch": 0.46, + "learning_rate": 5.647021708867815e-06, + "loss": 0.6038, + "regression_loss": 0.0, + "step": 5519, + "text_loss": 0.60546875 + }, + { + "epoch": 0.46, + "learning_rate": 5.645727148155195e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 5520, + "text_loss": 0.404296875 + }, + { + "epoch": 0.46, + "learning_rate": 5.644432543420233e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 5521, + "text_loss": 0.63671875 + }, + { + "epoch": 0.46, + "learning_rate": 5.6431378947511885e-06, + "loss": 0.5995, + "regression_loss": 0.0, + "step": 5522, + "text_loss": 0.89453125 + }, + { + "epoch": 0.46, + "learning_rate": 5.641843202236324e-06, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 5523, + "text_loss": 0.38671875 + }, + { + "epoch": 0.46, + "learning_rate": 5.640548465963904e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 5524, + "text_loss": 0.40234375 + }, + { + "epoch": 0.46, + "learning_rate": 5.639253686022198e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 5525, + "text_loss": 0.72265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.637958862499479e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 5526, + "text_loss": 0.392578125 + }, + { + "epoch": 0.46, + "learning_rate": 5.636663995484019e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 5527, + "text_loss": 0.40234375 + }, + { + "epoch": 0.46, + "learning_rate": 5.6353690850640965e-06, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 5528, + "text_loss": 0.4765625 + }, + { + "epoch": 0.46, + "learning_rate": 5.63407413132799e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 5529, + "text_loss": 0.546875 + }, + { + "epoch": 0.46, + "learning_rate": 5.632779134363985e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 5530, + "text_loss": 0.7578125 + }, + { + "epoch": 0.46, + "learning_rate": 5.631484094260368e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 5531, + "text_loss": 0.66015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.630189011105425e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 5532, + "text_loss": 0.37109375 + }, + { + "epoch": 0.46, + "learning_rate": 5.6288938849874505e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 5533, + "text_loss": 0.51171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.62759871599474e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 5534, + "text_loss": 0.4453125 + }, + { + "epoch": 0.46, + "learning_rate": 5.62630350421559e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 5535, + "text_loss": 0.55078125 + }, + { + "epoch": 0.46, + "learning_rate": 5.625008249738301e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 5536, + "text_loss": 0.421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.623712952651179e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 5537, + "text_loss": 0.5 + }, + { + "epoch": 0.46, + "learning_rate": 5.622417613042529e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 5538, + "text_loss": 0.35546875 + }, + { + "epoch": 0.46, + "learning_rate": 5.62112223100066e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 5539, + "text_loss": 0.498046875 + }, + { + "epoch": 0.46, + "learning_rate": 5.619826806613886e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 5540, + "text_loss": 0.57421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.618531339970521e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 5541, + "text_loss": 0.431640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.617235831158885e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 5542, + "text_loss": 0.8125 + }, + { + "epoch": 0.46, + "learning_rate": 5.615940280267297e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 5543, + "text_loss": 0.33984375 + }, + { + "epoch": 0.46, + "learning_rate": 5.6146446873840824e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 5544, + "text_loss": 0.318359375 + }, + { + "epoch": 0.46, + "learning_rate": 5.613349052597568e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 5545, + "text_loss": 0.7109375 + }, + { + "epoch": 0.46, + "learning_rate": 5.612053375996082e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 5546, + "text_loss": 0.70703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.610757657667958e-06, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 5547, + "text_loss": 0.466796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.609461897701533e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 5548, + "text_loss": 0.640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.608166096185142e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 5549, + "text_loss": 0.47265625 + }, + { + "epoch": 0.46, + "learning_rate": 5.6068702532071275e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 5550, + "text_loss": 0.65625 + }, + { + "epoch": 0.46, + "learning_rate": 5.605574368855835e-06, + "loss": 0.623, + "regression_loss": 0.0, + "step": 5551, + "text_loss": 0.59765625 + }, + { + "epoch": 0.46, + "learning_rate": 5.604278443219608e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 5552, + "text_loss": 0.330078125 + }, + { + "epoch": 0.46, + "learning_rate": 5.6029824763867975e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 5553, + "text_loss": 0.4140625 + }, + { + "epoch": 0.46, + "learning_rate": 5.601686468445758e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 5554, + "text_loss": 0.55859375 + }, + { + "epoch": 0.46, + "learning_rate": 5.600390419484842e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 5555, + "text_loss": 0.53515625 + }, + { + "epoch": 0.46, + "learning_rate": 5.599094329592407e-06, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 5556, + "text_loss": 0.3046875 + }, + { + "epoch": 0.46, + "learning_rate": 5.5977981988568175e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 5557, + "text_loss": 0.4609375 + }, + { + "epoch": 0.46, + "learning_rate": 5.596502027366432e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 5558, + "text_loss": 0.54296875 + }, + { + "epoch": 0.46, + "learning_rate": 5.595205815209619e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 5559, + "text_loss": 0.353515625 + }, + { + "epoch": 0.46, + "learning_rate": 5.59390956247475e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 5560, + "text_loss": 0.30078125 + }, + { + "epoch": 0.46, + "learning_rate": 5.5926132692501926e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 5561, + "text_loss": 0.51171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.591316935624324e-06, + "loss": 0.5094, + "regression_loss": 0.0, + "step": 5562, + "text_loss": 0.6640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.59002056168552e-06, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 5563, + "text_loss": 0.6015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.588724147522162e-06, + "loss": 0.6323, + "regression_loss": 0.0, + "step": 5564, + "text_loss": 0.671875 + }, + { + "epoch": 0.46, + "learning_rate": 5.587427693222632e-06, + "loss": 0.5311, + "regression_loss": 0.0, + "step": 5565, + "text_loss": 1.28125 + }, + { + "epoch": 0.46, + "learning_rate": 5.586131198875317e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 5566, + "text_loss": 0.625 + }, + { + "epoch": 0.46, + "learning_rate": 5.584834664568602e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 5567, + "text_loss": 0.91015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.583538090390882e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 5568, + "text_loss": 0.796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.5822414764305475e-06, + "loss": 0.5336, + "regression_loss": 0.0, + "step": 5569, + "text_loss": 0.484375 + }, + { + "epoch": 0.46, + "learning_rate": 5.580944822775998e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 5570, + "text_loss": 0.361328125 + }, + { + "epoch": 0.46, + "learning_rate": 5.579648129515629e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 5571, + "text_loss": 0.48828125 + }, + { + "epoch": 0.46, + "learning_rate": 5.578351396737845e-06, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 5572, + "text_loss": 0.4765625 + }, + { + "epoch": 0.46, + "learning_rate": 5.577054624531051e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 5573, + "text_loss": 0.46484375 + }, + { + "epoch": 0.46, + "learning_rate": 5.575757812983652e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 5574, + "text_loss": 0.451171875 + }, + { + "epoch": 0.46, + "learning_rate": 5.574460962184059e-06, + "loss": 0.4432, + "regression_loss": 0.0, + "step": 5575, + "text_loss": 0.66015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.5731640722206855e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 5576, + "text_loss": 0.578125 + }, + { + "epoch": 0.46, + "learning_rate": 5.571867143181945e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 5577, + "text_loss": 0.5703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.570570175156257e-06, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 5578, + "text_loss": 0.57421875 + }, + { + "epoch": 0.46, + "learning_rate": 5.569273168232041e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 5579, + "text_loss": 0.51953125 + }, + { + "epoch": 0.46, + "learning_rate": 5.56797612249772e-06, + "loss": 0.4635, + "regression_loss": 0.0, + "step": 5580, + "text_loss": 0.349609375 + }, + { + "epoch": 0.46, + "learning_rate": 5.5666790380417214e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 5581, + "text_loss": 0.6015625 + }, + { + "epoch": 0.46, + "learning_rate": 5.565381914952472e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 5582, + "text_loss": 0.59375 + }, + { + "epoch": 0.46, + "learning_rate": 5.564084753318405e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 5583, + "text_loss": 0.7734375 + }, + { + "epoch": 0.46, + "learning_rate": 5.562787553227953e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 5584, + "text_loss": 0.2890625 + }, + { + "epoch": 0.46, + "learning_rate": 5.561490314769551e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 5585, + "text_loss": 0.427734375 + }, + { + "epoch": 0.46, + "learning_rate": 5.560193038031639e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 5586, + "text_loss": 0.6640625 + }, + { + "epoch": 0.46, + "learning_rate": 5.5588957231026605e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 5587, + "text_loss": 0.4140625 + }, + { + "epoch": 0.46, + "learning_rate": 5.557598370071058e-06, + "loss": 0.6375, + "regression_loss": 0.0, + "step": 5588, + "text_loss": 0.5703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.556300979025278e-06, + "loss": 0.5228, + "regression_loss": 0.0, + "step": 5589, + "text_loss": 0.70703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.555003550053771e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 5590, + "text_loss": 0.66796875 + }, + { + "epoch": 0.46, + "learning_rate": 5.553706083244987e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 5591, + "text_loss": 0.703125 + }, + { + "epoch": 0.46, + "learning_rate": 5.552408578687384e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 5592, + "text_loss": 0.609375 + }, + { + "epoch": 0.46, + "learning_rate": 5.551111036469416e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 5593, + "text_loss": 0.462890625 + }, + { + "epoch": 0.46, + "learning_rate": 5.549813456679544e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 5594, + "text_loss": 0.376953125 + }, + { + "epoch": 0.47, + "learning_rate": 5.54851583940623e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5595, + "text_loss": 0.55078125 + }, + { + "epoch": 0.47, + "learning_rate": 5.54721818473794e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 5596, + "text_loss": 0.59375 + }, + { + "epoch": 0.47, + "learning_rate": 5.54592049276314e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 5597, + "text_loss": 0.83984375 + }, + { + "epoch": 0.47, + "learning_rate": 5.544622763570301e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 5598, + "text_loss": 0.4375 + }, + { + "epoch": 0.47, + "learning_rate": 5.543324997247894e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 5599, + "text_loss": 0.345703125 + }, + { + "epoch": 0.47, + "learning_rate": 5.542027193884395e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 5600, + "text_loss": 0.74609375 + }, + { + "epoch": 0.47, + "learning_rate": 5.540729353568282e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 5601, + "text_loss": 0.67578125 + }, + { + "epoch": 0.47, + "learning_rate": 5.539431476388034e-06, + "loss": 0.4825, + "regression_loss": 0.0, + "step": 5602, + "text_loss": 0.2275390625 + }, + { + "epoch": 0.47, + "learning_rate": 5.538133562432134e-06, + "loss": 0.4784, + "regression_loss": 0.0, + "step": 5603, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.47, + "learning_rate": 5.5368356117890686e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 5604, + "text_loss": 0.55859375 + }, + { + "epoch": 0.47, + "learning_rate": 5.535537624547321e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 5605, + "text_loss": 0.515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.534239600795386e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 5606, + "text_loss": 0.228515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.532941540621755e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 5607, + "text_loss": 0.5859375 + }, + { + "epoch": 0.47, + "learning_rate": 5.5316434441149205e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 5608, + "text_loss": 0.55078125 + }, + { + "epoch": 0.47, + "learning_rate": 5.530345311363382e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 5609, + "text_loss": 0.48828125 + }, + { + "epoch": 0.47, + "learning_rate": 5.529047142455641e-06, + "loss": 0.4473, + "regression_loss": 0.0, + "step": 5610, + "text_loss": 0.435546875 + }, + { + "epoch": 0.47, + "learning_rate": 5.527748937480197e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 5611, + "text_loss": 0.33984375 + }, + { + "epoch": 0.47, + "learning_rate": 5.526450696525556e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 5612, + "text_loss": 0.5546875 + }, + { + "epoch": 0.47, + "learning_rate": 5.525152419680226e-06, + "loss": 0.5857, + "regression_loss": 0.0, + "step": 5613, + "text_loss": 0.4921875 + }, + { + "epoch": 0.47, + "learning_rate": 5.523854107032717e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 5614, + "text_loss": 0.55078125 + }, + { + "epoch": 0.47, + "learning_rate": 5.52255575867154e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 5615, + "text_loss": 0.53515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.5212573746852096e-06, + "loss": 0.5433, + "regression_loss": 0.0, + "step": 5616, + "text_loss": 0.57421875 + }, + { + "epoch": 0.47, + "learning_rate": 5.519958955162243e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 5617, + "text_loss": 0.296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.518660500191162e-06, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 5618, + "text_loss": 0.484375 + }, + { + "epoch": 0.47, + "learning_rate": 5.517362009860486e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 5619, + "text_loss": 0.6640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.516063484258741e-06, + "loss": 0.4456, + "regression_loss": 0.0, + "step": 5620, + "text_loss": 0.474609375 + }, + { + "epoch": 0.47, + "learning_rate": 5.514764923474454e-06, + "loss": 0.4659, + "regression_loss": 0.0, + "step": 5621, + "text_loss": 0.53515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.513466327596151e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 5622, + "text_loss": 0.421875 + }, + { + "epoch": 0.47, + "learning_rate": 5.512167696712366e-06, + "loss": 0.4752, + "regression_loss": 0.0, + "step": 5623, + "text_loss": 0.58984375 + }, + { + "epoch": 0.47, + "learning_rate": 5.510869030911634e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 5624, + "text_loss": 0.671875 + }, + { + "epoch": 0.47, + "learning_rate": 5.50957033028249e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 5625, + "text_loss": 0.462890625 + }, + { + "epoch": 0.47, + "learning_rate": 5.508271594913472e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 5626, + "text_loss": 0.57421875 + }, + { + "epoch": 0.47, + "learning_rate": 5.506972824893122e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 5627, + "text_loss": 0.56640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.505674020309983e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 5628, + "text_loss": 0.228515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.504375181252603e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 5629, + "text_loss": 0.5 + }, + { + "epoch": 0.47, + "learning_rate": 5.503076307809525e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 5630, + "text_loss": 0.443359375 + }, + { + "epoch": 0.47, + "learning_rate": 5.501777400069305e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 5631, + "text_loss": 0.51171875 + }, + { + "epoch": 0.47, + "learning_rate": 5.500478458120493e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 5632, + "text_loss": 0.609375 + }, + { + "epoch": 0.47, + "learning_rate": 5.499179482051643e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 5633, + "text_loss": 0.6875 + }, + { + "epoch": 0.47, + "learning_rate": 5.497880471951316e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 5634, + "text_loss": 0.5390625 + }, + { + "epoch": 0.47, + "learning_rate": 5.49658142790807e-06, + "loss": 0.427, + "regression_loss": 0.0, + "step": 5635, + "text_loss": 0.279296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.495282350010465e-06, + "loss": 0.45, + "regression_loss": 0.0, + "step": 5636, + "text_loss": 0.4375 + }, + { + "epoch": 0.47, + "learning_rate": 5.493983238347069e-06, + "loss": 0.4491, + "regression_loss": 0.0, + "step": 5637, + "text_loss": 0.5234375 + }, + { + "epoch": 0.47, + "learning_rate": 5.492684093006447e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 5638, + "text_loss": 0.42578125 + }, + { + "epoch": 0.47, + "learning_rate": 5.491384914077168e-06, + "loss": 0.6499, + "regression_loss": 0.0, + "step": 5639, + "text_loss": 0.56640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.490085701647805e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 5640, + "text_loss": 0.302734375 + }, + { + "epoch": 0.47, + "learning_rate": 5.488786455806927e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 5641, + "text_loss": 0.62890625 + }, + { + "epoch": 0.47, + "learning_rate": 5.4874871766431145e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 5642, + "text_loss": 0.54296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.486187864244945e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 5643, + "text_loss": 0.484375 + }, + { + "epoch": 0.47, + "learning_rate": 5.484888518700997e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 5644, + "text_loss": 0.56640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.4835891400998545e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 5645, + "text_loss": 0.294921875 + }, + { + "epoch": 0.47, + "learning_rate": 5.482289728530102e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 5646, + "text_loss": 0.546875 + }, + { + "epoch": 0.47, + "learning_rate": 5.480990284080326e-06, + "loss": 0.4493, + "regression_loss": 0.0, + "step": 5647, + "text_loss": 0.404296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4796908068391176e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 5648, + "text_loss": 0.49609375 + }, + { + "epoch": 0.47, + "learning_rate": 5.478391296895068e-06, + "loss": 0.6138, + "regression_loss": 0.0, + "step": 5649, + "text_loss": 0.74609375 + }, + { + "epoch": 0.47, + "learning_rate": 5.477091754336769e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 5650, + "text_loss": 0.33203125 + }, + { + "epoch": 0.47, + "learning_rate": 5.475792179252819e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 5651, + "text_loss": 0.64453125 + }, + { + "epoch": 0.47, + "learning_rate": 5.474492571731818e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 5652, + "text_loss": 0.2578125 + }, + { + "epoch": 0.47, + "learning_rate": 5.473192931862361e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 5653, + "text_loss": 0.68359375 + }, + { + "epoch": 0.47, + "learning_rate": 5.471893259733054e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 5654, + "text_loss": 0.41796875 + }, + { + "epoch": 0.47, + "learning_rate": 5.470593555432504e-06, + "loss": 0.4005, + "regression_loss": 0.0, + "step": 5655, + "text_loss": 0.43359375 + }, + { + "epoch": 0.47, + "learning_rate": 5.469293819049314e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 5656, + "text_loss": 0.33984375 + }, + { + "epoch": 0.47, + "learning_rate": 5.467994050672096e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 5657, + "text_loss": 0.53515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.4666942503894605e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 5658, + "text_loss": 0.75 + }, + { + "epoch": 0.47, + "learning_rate": 5.465394418290021e-06, + "loss": 0.6304, + "regression_loss": 0.0, + "step": 5659, + "text_loss": 0.2734375 + }, + { + "epoch": 0.47, + "learning_rate": 5.4640945544623956e-06, + "loss": 0.4962, + "regression_loss": 0.0, + "step": 5660, + "text_loss": 0.431640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.462794658995199e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 5661, + "text_loss": 0.6796875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4614947319770514e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 5662, + "text_loss": 0.44921875 + }, + { + "epoch": 0.47, + "learning_rate": 5.46019477349658e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 5663, + "text_loss": 0.58984375 + }, + { + "epoch": 0.47, + "learning_rate": 5.458894783642402e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 5664, + "text_loss": 0.515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.45759476250315e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 5665, + "text_loss": 0.40625 + }, + { + "epoch": 0.47, + "learning_rate": 5.45629471016745e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 5666, + "text_loss": 0.62109375 + }, + { + "epoch": 0.47, + "learning_rate": 5.454994626723933e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 5667, + "text_loss": 0.490234375 + }, + { + "epoch": 0.47, + "learning_rate": 5.453694512261234e-06, + "loss": 0.478, + "regression_loss": 0.0, + "step": 5668, + "text_loss": 0.51953125 + }, + { + "epoch": 0.47, + "learning_rate": 5.452394366867985e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 5669, + "text_loss": 0.69140625 + }, + { + "epoch": 0.47, + "learning_rate": 5.451094190632824e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 5670, + "text_loss": 0.7890625 + }, + { + "epoch": 0.47, + "learning_rate": 5.449793983644393e-06, + "loss": 0.4569, + "regression_loss": 0.0, + "step": 5671, + "text_loss": 0.59375 + }, + { + "epoch": 0.47, + "learning_rate": 5.44849374599133e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 5672, + "text_loss": 0.6875 + }, + { + "epoch": 0.47, + "learning_rate": 5.447193477762279e-06, + "loss": 0.6184, + "regression_loss": 0.0, + "step": 5673, + "text_loss": 0.7109375 + }, + { + "epoch": 0.47, + "learning_rate": 5.445893179045888e-06, + "loss": 0.6743, + "regression_loss": 0.0, + "step": 5674, + "text_loss": 0.59375 + }, + { + "epoch": 0.47, + "learning_rate": 5.444592849930802e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 5675, + "text_loss": 0.5625 + }, + { + "epoch": 0.47, + "learning_rate": 5.443292490505672e-06, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 5676, + "text_loss": 0.828125 + }, + { + "epoch": 0.47, + "learning_rate": 5.44199210085915e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 5677, + "text_loss": 0.296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.44069168107989e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 5678, + "text_loss": 0.44140625 + }, + { + "epoch": 0.47, + "learning_rate": 5.439391231256544e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 5679, + "text_loss": 0.28515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.438090751477777e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 5680, + "text_loss": 0.66796875 + }, + { + "epoch": 0.47, + "learning_rate": 5.436790241832244e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 5681, + "text_loss": 0.81640625 + }, + { + "epoch": 0.47, + "learning_rate": 5.435489702408609e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 5682, + "text_loss": 0.416015625 + }, + { + "epoch": 0.47, + "learning_rate": 5.434189133295533e-06, + "loss": 0.504, + "regression_loss": 0.0, + "step": 5683, + "text_loss": 0.4375 + }, + { + "epoch": 0.47, + "learning_rate": 5.4328885345816866e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 5684, + "text_loss": 0.466796875 + }, + { + "epoch": 0.47, + "learning_rate": 5.431587906355735e-06, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 5685, + "text_loss": 0.47265625 + }, + { + "epoch": 0.47, + "learning_rate": 5.430287248706348e-06, + "loss": 0.5957, + "regression_loss": 0.0, + "step": 5686, + "text_loss": 0.48046875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4289865617222005e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 5687, + "text_loss": 0.6484375 + }, + { + "epoch": 0.47, + "learning_rate": 5.427685845491964e-06, + "loss": 0.6211, + "regression_loss": 0.0, + "step": 5688, + "text_loss": 0.58203125 + }, + { + "epoch": 0.47, + "learning_rate": 5.4263851001043155e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 5689, + "text_loss": 0.515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.4250843256479325e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 5690, + "text_loss": 0.5625 + }, + { + "epoch": 0.47, + "learning_rate": 5.423783522211498e-06, + "loss": 0.6221, + "regression_loss": 0.0, + "step": 5691, + "text_loss": 0.65625 + }, + { + "epoch": 0.47, + "learning_rate": 5.422482689883689e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 5692, + "text_loss": 0.53515625 + }, + { + "epoch": 0.47, + "learning_rate": 5.421181828753192e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 5693, + "text_loss": 0.34765625 + }, + { + "epoch": 0.47, + "learning_rate": 5.419880938908696e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 5694, + "text_loss": 0.314453125 + }, + { + "epoch": 0.47, + "learning_rate": 5.418580020438885e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 5695, + "text_loss": 0.5390625 + }, + { + "epoch": 0.47, + "learning_rate": 5.41727907343245e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 5696, + "text_loss": 0.62890625 + }, + { + "epoch": 0.47, + "learning_rate": 5.415978097978083e-06, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 5697, + "text_loss": 0.55859375 + }, + { + "epoch": 0.47, + "learning_rate": 5.414677094164478e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 5698, + "text_loss": 0.5859375 + }, + { + "epoch": 0.47, + "learning_rate": 5.4133760620803295e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 5699, + "text_loss": 0.53125 + }, + { + "epoch": 0.47, + "learning_rate": 5.412075001814337e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 5700, + "text_loss": 0.546875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4107739134551985e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 5701, + "text_loss": 0.5703125 + }, + { + "epoch": 0.47, + "learning_rate": 5.4094727970916176e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 5702, + "text_loss": 0.62109375 + }, + { + "epoch": 0.47, + "learning_rate": 5.4081716528122955e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 5703, + "text_loss": 0.66015625 + }, + { + "epoch": 0.47, + "learning_rate": 5.406870480705937e-06, + "loss": 0.5209, + "regression_loss": 0.0, + "step": 5704, + "text_loss": 0.2041015625 + }, + { + "epoch": 0.47, + "learning_rate": 5.405569280861253e-06, + "loss": 0.394, + "regression_loss": 0.0, + "step": 5705, + "text_loss": 0.326171875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4042680533669475e-06, + "loss": 0.426, + "regression_loss": 0.0, + "step": 5706, + "text_loss": 0.392578125 + }, + { + "epoch": 0.47, + "learning_rate": 5.402966798311735e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 5707, + "text_loss": 0.3046875 + }, + { + "epoch": 0.47, + "learning_rate": 5.401665515784329e-06, + "loss": 0.4879, + "regression_loss": 0.0, + "step": 5708, + "text_loss": 0.54296875 + }, + { + "epoch": 0.47, + "learning_rate": 5.4003642058734405e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 5709, + "text_loss": 0.5390625 + }, + { + "epoch": 0.47, + "learning_rate": 5.399062868667789e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 5710, + "text_loss": 0.67578125 + }, + { + "epoch": 0.47, + "learning_rate": 5.3977615042560914e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 5711, + "text_loss": 0.83203125 + }, + { + "epoch": 0.47, + "learning_rate": 5.39646011272707e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 5712, + "text_loss": 0.55859375 + }, + { + "epoch": 0.47, + "learning_rate": 5.395158694169446e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 5713, + "text_loss": 0.357421875 + }, + { + "epoch": 0.47, + "learning_rate": 5.393857248671943e-06, + "loss": 0.4672, + "regression_loss": 0.0, + "step": 5714, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.47, + "learning_rate": 5.392555776323286e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 5715, + "text_loss": 0.48046875 + }, + { + "epoch": 0.48, + "learning_rate": 5.391254277212206e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 5716, + "text_loss": 0.72265625 + }, + { + "epoch": 0.48, + "learning_rate": 5.3899527514274275e-06, + "loss": 0.6689, + "regression_loss": 0.0, + "step": 5717, + "text_loss": 0.5390625 + }, + { + "epoch": 0.48, + "learning_rate": 5.388651199057685e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 5718, + "text_loss": 0.34375 + }, + { + "epoch": 0.48, + "learning_rate": 5.387349620191713e-06, + "loss": 0.5643, + "regression_loss": 0.0, + "step": 5719, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.386048014918243e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 5720, + "text_loss": 0.62109375 + }, + { + "epoch": 0.48, + "learning_rate": 5.384746383326014e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 5721, + "text_loss": 0.3203125 + }, + { + "epoch": 0.48, + "learning_rate": 5.383444725503762e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 5722, + "text_loss": 0.6953125 + }, + { + "epoch": 0.48, + "learning_rate": 5.382143041540229e-06, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 5723, + "text_loss": 0.6328125 + }, + { + "epoch": 0.48, + "learning_rate": 5.38084133152416e-06, + "loss": 0.635, + "regression_loss": 0.0, + "step": 5724, + "text_loss": 0.5234375 + }, + { + "epoch": 0.48, + "learning_rate": 5.3795395955442946e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 5725, + "text_loss": 0.3828125 + }, + { + "epoch": 0.48, + "learning_rate": 5.378237833689379e-06, + "loss": 0.452, + "regression_loss": 0.0, + "step": 5726, + "text_loss": 0.419921875 + }, + { + "epoch": 0.48, + "learning_rate": 5.376936046048162e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 5727, + "text_loss": 0.494140625 + }, + { + "epoch": 0.48, + "learning_rate": 5.375634232709392e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 5728, + "text_loss": 0.5078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.374332393761821e-06, + "loss": 0.4369, + "regression_loss": 0.0, + "step": 5729, + "text_loss": 0.380859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.3730305292942e-06, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 5730, + "text_loss": 0.53515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.371728639395284e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 5731, + "text_loss": 0.703125 + }, + { + "epoch": 0.48, + "learning_rate": 5.370426724153829e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 5732, + "text_loss": 0.65625 + }, + { + "epoch": 0.48, + "learning_rate": 5.369124783658595e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 5733, + "text_loss": 0.349609375 + }, + { + "epoch": 0.48, + "learning_rate": 5.367822817998338e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 5734, + "text_loss": 0.5 + }, + { + "epoch": 0.48, + "learning_rate": 5.366520827261821e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 5735, + "text_loss": 0.640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.365218811537808e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 5736, + "text_loss": 0.53515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.363916770915062e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 5737, + "text_loss": 0.51953125 + }, + { + "epoch": 0.48, + "learning_rate": 5.362614705482351e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 5738, + "text_loss": 0.380859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.361312615328441e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 5739, + "text_loss": 0.455078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.3600105005421035e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 5740, + "text_loss": 0.65625 + }, + { + "epoch": 0.48, + "learning_rate": 5.358708361212111e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 5741, + "text_loss": 0.71484375 + }, + { + "epoch": 0.48, + "learning_rate": 5.3574061974272325e-06, + "loss": 0.5945, + "regression_loss": 0.0, + "step": 5742, + "text_loss": 0.4609375 + }, + { + "epoch": 0.48, + "learning_rate": 5.356104009276246e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 5743, + "text_loss": 0.474609375 + }, + { + "epoch": 0.48, + "learning_rate": 5.354801796847928e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 5744, + "text_loss": 0.6953125 + }, + { + "epoch": 0.48, + "learning_rate": 5.353499560231057e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 5745, + "text_loss": 0.443359375 + }, + { + "epoch": 0.48, + "learning_rate": 5.35219729951441e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 5746, + "text_loss": 0.5625 + }, + { + "epoch": 0.48, + "learning_rate": 5.3508950147867714e-06, + "loss": 0.6643, + "regression_loss": 0.0, + "step": 5747, + "text_loss": 0.828125 + }, + { + "epoch": 0.48, + "learning_rate": 5.349592706136922e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 5748, + "text_loss": 0.5859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.3482903736536475e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 5749, + "text_loss": 0.482421875 + }, + { + "epoch": 0.48, + "learning_rate": 5.346988017425736e-06, + "loss": 0.5989, + "regression_loss": 0.0, + "step": 5750, + "text_loss": 0.5625 + }, + { + "epoch": 0.48, + "learning_rate": 5.345685637541972e-06, + "loss": 0.634, + "regression_loss": 0.0, + "step": 5751, + "text_loss": 0.87109375 + }, + { + "epoch": 0.48, + "learning_rate": 5.344383234091146e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 5752, + "text_loss": 0.34375 + }, + { + "epoch": 0.48, + "learning_rate": 5.343080807162051e-06, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 5753, + "text_loss": 0.24609375 + }, + { + "epoch": 0.48, + "learning_rate": 5.341778356843479e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 5754, + "text_loss": 0.46875 + }, + { + "epoch": 0.48, + "learning_rate": 5.340475883224223e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 5755, + "text_loss": 0.291015625 + }, + { + "epoch": 0.48, + "learning_rate": 5.3391733863930795e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 5756, + "text_loss": 0.38671875 + }, + { + "epoch": 0.48, + "learning_rate": 5.337870866438847e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 5757, + "text_loss": 0.57421875 + }, + { + "epoch": 0.48, + "learning_rate": 5.336568323450323e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 5758, + "text_loss": 0.51953125 + }, + { + "epoch": 0.48, + "learning_rate": 5.3352657575163095e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 5759, + "text_loss": 0.55078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.3339631687256085e-06, + "loss": 0.5023, + "regression_loss": 0.0, + "step": 5760, + "text_loss": 0.265625 + }, + { + "epoch": 0.48, + "learning_rate": 5.332660557167023e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 5761, + "text_loss": 0.30859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.3313579229293586e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 5762, + "text_loss": 0.404296875 + }, + { + "epoch": 0.48, + "learning_rate": 5.330055266101424e-06, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 5763, + "text_loss": 0.76171875 + }, + { + "epoch": 0.48, + "learning_rate": 5.328752586772025e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 5764, + "text_loss": 0.515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.327449885029973e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 5765, + "text_loss": 0.78125 + }, + { + "epoch": 0.48, + "learning_rate": 5.326147160964079e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 5766, + "text_loss": 0.69140625 + }, + { + "epoch": 0.48, + "learning_rate": 5.3248444146631554e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 5767, + "text_loss": 0.6796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.3235416462160176e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 5768, + "text_loss": 0.30078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.322238855711483e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 5769, + "text_loss": 0.486328125 + }, + { + "epoch": 0.48, + "learning_rate": 5.320936043238365e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 5770, + "text_loss": 0.359375 + }, + { + "epoch": 0.48, + "learning_rate": 5.319633208885487e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 5771, + "text_loss": 0.51171875 + }, + { + "epoch": 0.48, + "learning_rate": 5.3183303527416675e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 5772, + "text_loss": 0.53515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.317027474895728e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 5773, + "text_loss": 0.25 + }, + { + "epoch": 0.48, + "learning_rate": 5.315724575436492e-06, + "loss": 0.4368, + "regression_loss": 0.0, + "step": 5774, + "text_loss": 0.26171875 + }, + { + "epoch": 0.48, + "learning_rate": 5.314421654452786e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 5775, + "text_loss": 0.65234375 + }, + { + "epoch": 0.48, + "learning_rate": 5.313118712033436e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 5776, + "text_loss": 0.4140625 + }, + { + "epoch": 0.48, + "learning_rate": 5.311815748267269e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 5777, + "text_loss": 0.3828125 + }, + { + "epoch": 0.48, + "learning_rate": 5.3105127632431154e-06, + "loss": 0.473, + "regression_loss": 0.0, + "step": 5778, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.48, + "learning_rate": 5.3092097570498035e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 5779, + "text_loss": 0.6875 + }, + { + "epoch": 0.48, + "learning_rate": 5.30790672977617e-06, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 5780, + "text_loss": 0.60546875 + }, + { + "epoch": 0.48, + "learning_rate": 5.306603681511043e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 5781, + "text_loss": 0.66796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.305300612343263e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 5782, + "text_loss": 0.671875 + }, + { + "epoch": 0.48, + "learning_rate": 5.303997522361663e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 5783, + "text_loss": 0.82421875 + }, + { + "epoch": 0.48, + "learning_rate": 5.302694411655083e-06, + "loss": 0.6572, + "regression_loss": 0.0, + "step": 5784, + "text_loss": 0.609375 + }, + { + "epoch": 0.48, + "learning_rate": 5.301391280312359e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 5785, + "text_loss": 0.3203125 + }, + { + "epoch": 0.48, + "learning_rate": 5.300088128422337e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 5786, + "text_loss": 0.71875 + }, + { + "epoch": 0.48, + "learning_rate": 5.2987849560738545e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 5787, + "text_loss": 0.69140625 + }, + { + "epoch": 0.48, + "learning_rate": 5.297481763355758e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 5788, + "text_loss": 0.6796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.296178550356891e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 5789, + "text_loss": 0.486328125 + }, + { + "epoch": 0.48, + "learning_rate": 5.294875317166099e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 5790, + "text_loss": 0.55078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.293572063872232e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 5791, + "text_loss": 0.453125 + }, + { + "epoch": 0.48, + "learning_rate": 5.292268790564138e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 5792, + "text_loss": 0.4921875 + }, + { + "epoch": 0.48, + "learning_rate": 5.290965497330667e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 5793, + "text_loss": 0.4296875 + }, + { + "epoch": 0.48, + "learning_rate": 5.289662184260672e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 5794, + "text_loss": 0.65625 + }, + { + "epoch": 0.48, + "learning_rate": 5.288358851443004e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 5795, + "text_loss": 0.578125 + }, + { + "epoch": 0.48, + "learning_rate": 5.287055498966519e-06, + "loss": 0.5936, + "regression_loss": 0.0, + "step": 5796, + "text_loss": 0.47265625 + }, + { + "epoch": 0.48, + "learning_rate": 5.285752126920074e-06, + "loss": 0.5184, + "regression_loss": 0.0, + "step": 5797, + "text_loss": 0.43359375 + }, + { + "epoch": 0.48, + "learning_rate": 5.2844487353925236e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 5798, + "text_loss": 0.69140625 + }, + { + "epoch": 0.48, + "learning_rate": 5.283145324472728e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 5799, + "text_loss": 0.7265625 + }, + { + "epoch": 0.48, + "learning_rate": 5.2818418942495475e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 5800, + "text_loss": 0.640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.28053844481184e-06, + "loss": 0.4309, + "regression_loss": 0.0, + "step": 5801, + "text_loss": 0.337890625 + }, + { + "epoch": 0.48, + "learning_rate": 5.279234976248472e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 5802, + "text_loss": 0.98046875 + }, + { + "epoch": 0.48, + "learning_rate": 5.277931488648306e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 5803, + "text_loss": 0.58984375 + }, + { + "epoch": 0.48, + "learning_rate": 5.276627982100205e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 5804, + "text_loss": 0.412109375 + }, + { + "epoch": 0.48, + "learning_rate": 5.275324456693037e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 5805, + "text_loss": 0.5859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.2740209125156706e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 5806, + "text_loss": 0.419921875 + }, + { + "epoch": 0.48, + "learning_rate": 5.272717349656973e-06, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 5807, + "text_loss": 0.435546875 + }, + { + "epoch": 0.48, + "learning_rate": 5.271413768205816e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 5808, + "text_loss": 0.478515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.270110168251069e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 5809, + "text_loss": 0.451171875 + }, + { + "epoch": 0.48, + "learning_rate": 5.268806549881606e-06, + "loss": 0.4662, + "regression_loss": 0.0, + "step": 5810, + "text_loss": 0.6640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.267502913186302e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 5811, + "text_loss": 0.466796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.266199258254029e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 5812, + "text_loss": 0.357421875 + }, + { + "epoch": 0.48, + "learning_rate": 5.264895585173666e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 5813, + "text_loss": 0.578125 + }, + { + "epoch": 0.48, + "learning_rate": 5.263591894034092e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 5814, + "text_loss": 0.31640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.262288184924182e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 5815, + "text_loss": 0.625 + }, + { + "epoch": 0.48, + "learning_rate": 5.2609844579328195e-06, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 5816, + "text_loss": 0.56640625 + }, + { + "epoch": 0.48, + "learning_rate": 5.2596807131488834e-06, + "loss": 0.7207, + "regression_loss": 0.0, + "step": 5817, + "text_loss": 0.796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.258376950661258e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 5818, + "text_loss": 0.55078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.257073170558827e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 5819, + "text_loss": 0.65625 + }, + { + "epoch": 0.48, + "learning_rate": 5.255769372930475e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 5820, + "text_loss": 0.70703125 + }, + { + "epoch": 0.48, + "learning_rate": 5.254465557865087e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 5821, + "text_loss": 0.6796875 + }, + { + "epoch": 0.48, + "learning_rate": 5.253161725451554e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 5822, + "text_loss": 0.60546875 + }, + { + "epoch": 0.48, + "learning_rate": 5.25185787577876e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 5823, + "text_loss": 0.625 + }, + { + "epoch": 0.48, + "learning_rate": 5.250554008935596e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 5824, + "text_loss": 0.55859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.249250125010957e-06, + "loss": 0.3782, + "regression_loss": 0.0, + "step": 5825, + "text_loss": 0.52734375 + }, + { + "epoch": 0.48, + "learning_rate": 5.2479462240937294e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 5826, + "text_loss": 0.80859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.246642306272809e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 5827, + "text_loss": 0.55859375 + }, + { + "epoch": 0.48, + "learning_rate": 5.245338371637091e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 5828, + "text_loss": 0.5078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.244034420275469e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 5829, + "text_loss": 0.53515625 + }, + { + "epoch": 0.48, + "learning_rate": 5.242730452276841e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 5830, + "text_loss": 0.59765625 + }, + { + "epoch": 0.48, + "learning_rate": 5.241426467730105e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 5831, + "text_loss": 0.3125 + }, + { + "epoch": 0.48, + "learning_rate": 5.240122466724158e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 5832, + "text_loss": 0.58203125 + }, + { + "epoch": 0.48, + "learning_rate": 5.238818449347904e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 5833, + "text_loss": 0.322265625 + }, + { + "epoch": 0.48, + "learning_rate": 5.237514415690239e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 5834, + "text_loss": 0.5078125 + }, + { + "epoch": 0.48, + "learning_rate": 5.236210365840068e-06, + "loss": 0.4242, + "regression_loss": 0.0, + "step": 5835, + "text_loss": 0.259765625 + }, + { + "epoch": 0.49, + "learning_rate": 5.234906299886295e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 5836, + "text_loss": 0.54296875 + }, + { + "epoch": 0.49, + "learning_rate": 5.233602217917824e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 5837, + "text_loss": 0.384765625 + }, + { + "epoch": 0.49, + "learning_rate": 5.232298120023559e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 5838, + "text_loss": 0.515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.23099400629241e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 5839, + "text_loss": 0.5234375 + }, + { + "epoch": 0.49, + "learning_rate": 5.229689876813281e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 5840, + "text_loss": 0.322265625 + }, + { + "epoch": 0.49, + "learning_rate": 5.228385731675083e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 5841, + "text_loss": 0.640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.2270815709667265e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5842, + "text_loss": 0.50390625 + }, + { + "epoch": 0.49, + "learning_rate": 5.22577739477712e-06, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 5843, + "text_loss": 0.349609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.224473203195176e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 5844, + "text_loss": 0.4453125 + }, + { + "epoch": 0.49, + "learning_rate": 5.223168996309811e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 5845, + "text_loss": 0.51171875 + }, + { + "epoch": 0.49, + "learning_rate": 5.221864774209935e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 5846, + "text_loss": 0.3984375 + }, + { + "epoch": 0.49, + "learning_rate": 5.220560536984464e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 5847, + "text_loss": 0.578125 + }, + { + "epoch": 0.49, + "learning_rate": 5.219256284722316e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 5848, + "text_loss": 0.3828125 + }, + { + "epoch": 0.49, + "learning_rate": 5.2179520175124056e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 5849, + "text_loss": 0.46484375 + }, + { + "epoch": 0.49, + "learning_rate": 5.216647735443654e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 5850, + "text_loss": 0.400390625 + }, + { + "epoch": 0.49, + "learning_rate": 5.215343438604977e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 5851, + "text_loss": 0.4140625 + }, + { + "epoch": 0.49, + "learning_rate": 5.214039127085297e-06, + "loss": 0.4514, + "regression_loss": 0.0, + "step": 5852, + "text_loss": 0.53515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.212734800973536e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 5853, + "text_loss": 0.83203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.211430460358613e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 5854, + "text_loss": 0.4921875 + }, + { + "epoch": 0.49, + "learning_rate": 5.210126105329454e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 5855, + "text_loss": 0.380859375 + }, + { + "epoch": 0.49, + "learning_rate": 5.208821735974984e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 5856, + "text_loss": 0.65234375 + }, + { + "epoch": 0.49, + "learning_rate": 5.207517352384125e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 5857, + "text_loss": 0.53125 + }, + { + "epoch": 0.49, + "learning_rate": 5.2062129546458054e-06, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 5858, + "text_loss": 0.470703125 + }, + { + "epoch": 0.49, + "learning_rate": 5.204908542848951e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 5859, + "text_loss": 0.62109375 + }, + { + "epoch": 0.49, + "learning_rate": 5.20360411708249e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 5860, + "text_loss": 0.57421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.202299677435354e-06, + "loss": 0.5267, + "regression_loss": 0.0, + "step": 5861, + "text_loss": 0.57421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.200995223996469e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 5862, + "text_loss": 0.31640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.1996907568547675e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 5863, + "text_loss": 0.49609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.198386276099183e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 5864, + "text_loss": 0.58203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.197081781818646e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 5865, + "text_loss": 0.26953125 + }, + { + "epoch": 0.49, + "learning_rate": 5.195777274102091e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 5866, + "text_loss": 0.4140625 + }, + { + "epoch": 0.49, + "learning_rate": 5.194472753038453e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 5867, + "text_loss": 0.498046875 + }, + { + "epoch": 0.49, + "learning_rate": 5.193168218716666e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 5868, + "text_loss": 0.53125 + }, + { + "epoch": 0.49, + "learning_rate": 5.1918636712256685e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 5869, + "text_loss": 0.67578125 + }, + { + "epoch": 0.49, + "learning_rate": 5.190559110654398e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 5870, + "text_loss": 0.671875 + }, + { + "epoch": 0.49, + "learning_rate": 5.189254537091791e-06, + "loss": 0.425, + "regression_loss": 0.0, + "step": 5871, + "text_loss": 0.37890625 + }, + { + "epoch": 0.49, + "learning_rate": 5.187949950626786e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 5872, + "text_loss": 0.474609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.186645351348324e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 5873, + "text_loss": 0.376953125 + }, + { + "epoch": 0.49, + "learning_rate": 5.185340739345347e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 5874, + "text_loss": 0.421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.184036114706795e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 5875, + "text_loss": 0.34765625 + }, + { + "epoch": 0.49, + "learning_rate": 5.182731477521611e-06, + "loss": 0.718, + "regression_loss": 0.0, + "step": 5876, + "text_loss": 0.375 + }, + { + "epoch": 0.49, + "learning_rate": 5.181426827878739e-06, + "loss": 0.4089, + "regression_loss": 0.0, + "step": 5877, + "text_loss": 0.2421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.180122165867124e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 5878, + "text_loss": 0.54296875 + }, + { + "epoch": 0.49, + "learning_rate": 5.178817491575708e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 5879, + "text_loss": 0.7421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.17751280509344e-06, + "loss": 0.4453, + "regression_loss": 0.0, + "step": 5880, + "text_loss": 0.41796875 + }, + { + "epoch": 0.49, + "learning_rate": 5.176208106509266e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 5881, + "text_loss": 0.392578125 + }, + { + "epoch": 0.49, + "learning_rate": 5.174903395912133e-06, + "loss": 0.446, + "regression_loss": 0.0, + "step": 5882, + "text_loss": 0.388671875 + }, + { + "epoch": 0.49, + "learning_rate": 5.173598673390989e-06, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 5883, + "text_loss": 0.37109375 + }, + { + "epoch": 0.49, + "learning_rate": 5.172293939034786e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 5884, + "text_loss": 0.44921875 + }, + { + "epoch": 0.49, + "learning_rate": 5.1709891929324705e-06, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 5885, + "text_loss": 0.48046875 + }, + { + "epoch": 0.49, + "learning_rate": 5.169684435172996e-06, + "loss": 0.4309, + "regression_loss": 0.0, + "step": 5886, + "text_loss": 0.349609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.168379665845313e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 5887, + "text_loss": 0.4296875 + }, + { + "epoch": 0.49, + "learning_rate": 5.1670748850383734e-06, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 5888, + "text_loss": 0.78515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.165770092841133e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 5889, + "text_loss": 0.52734375 + }, + { + "epoch": 0.49, + "learning_rate": 5.164465289342542e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 5890, + "text_loss": 0.31640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.163160474631556e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 5891, + "text_loss": 0.33984375 + }, + { + "epoch": 0.49, + "learning_rate": 5.161855648797134e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 5892, + "text_loss": 0.5625 + }, + { + "epoch": 0.49, + "learning_rate": 5.1605508119282276e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 5893, + "text_loss": 0.3125 + }, + { + "epoch": 0.49, + "learning_rate": 5.159245964113797e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 5894, + "text_loss": 0.69921875 + }, + { + "epoch": 0.49, + "learning_rate": 5.1579411054428e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 5895, + "text_loss": 0.53125 + }, + { + "epoch": 0.49, + "learning_rate": 5.156636236004193e-06, + "loss": 0.6294, + "regression_loss": 0.0, + "step": 5896, + "text_loss": 0.69140625 + }, + { + "epoch": 0.49, + "learning_rate": 5.155331355886936e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 5897, + "text_loss": 0.361328125 + }, + { + "epoch": 0.49, + "learning_rate": 5.154026465179991e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 5898, + "text_loss": 0.65234375 + }, + { + "epoch": 0.49, + "learning_rate": 5.152721563972315e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 5899, + "text_loss": 0.6640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.151416652352873e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 5900, + "text_loss": 0.53515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.1501117304106255e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 5901, + "text_loss": 0.65625 + }, + { + "epoch": 0.49, + "learning_rate": 5.148806798234535e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 5902, + "text_loss": 0.451171875 + }, + { + "epoch": 0.49, + "learning_rate": 5.147501855913567e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 5903, + "text_loss": 0.515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.146196903536683e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 5904, + "text_loss": 0.6015625 + }, + { + "epoch": 0.49, + "learning_rate": 5.144891941192849e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 5905, + "text_loss": 0.58203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.143586968971033e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 5906, + "text_loss": 0.60546875 + }, + { + "epoch": 0.49, + "learning_rate": 5.142281986960196e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 5907, + "text_loss": 0.8203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.140976995249309e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 5908, + "text_loss": 0.66015625 + }, + { + "epoch": 0.49, + "learning_rate": 5.13967199392734e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 5909, + "text_loss": 0.55078125 + }, + { + "epoch": 0.49, + "learning_rate": 5.138366983083255e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 5910, + "text_loss": 0.400390625 + }, + { + "epoch": 0.49, + "learning_rate": 5.137061962806022e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 5911, + "text_loss": 0.64453125 + }, + { + "epoch": 0.49, + "learning_rate": 5.1357569331846155e-06, + "loss": 0.4767, + "regression_loss": 0.0, + "step": 5912, + "text_loss": 0.470703125 + }, + { + "epoch": 0.49, + "learning_rate": 5.1344518943080006e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 5913, + "text_loss": 0.78515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.133146846265151e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 5914, + "text_loss": 0.56640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.131841789145035e-06, + "loss": 0.5138, + "regression_loss": 0.0, + "step": 5915, + "text_loss": 0.49609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.130536723036629e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 5916, + "text_loss": 0.455078125 + }, + { + "epoch": 0.49, + "learning_rate": 5.129231648028903e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 5917, + "text_loss": 0.52734375 + }, + { + "epoch": 0.49, + "learning_rate": 5.12792656421083e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 5918, + "text_loss": 0.5390625 + }, + { + "epoch": 0.49, + "learning_rate": 5.126621471671384e-06, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 5919, + "text_loss": 0.51953125 + }, + { + "epoch": 0.49, + "learning_rate": 5.1253163704995425e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 5920, + "text_loss": 0.69140625 + }, + { + "epoch": 0.49, + "learning_rate": 5.124011260784275e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 5921, + "text_loss": 0.2890625 + }, + { + "epoch": 0.49, + "learning_rate": 5.122706142614562e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 5922, + "text_loss": 0.59765625 + }, + { + "epoch": 0.49, + "learning_rate": 5.121401016079378e-06, + "loss": 0.5182, + "regression_loss": 0.0, + "step": 5923, + "text_loss": 0.8515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.120095881267699e-06, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 5924, + "text_loss": 0.62109375 + }, + { + "epoch": 0.49, + "learning_rate": 5.118790738268503e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 5925, + "text_loss": 0.66015625 + }, + { + "epoch": 0.49, + "learning_rate": 5.1174855871707695e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 5926, + "text_loss": 0.5546875 + }, + { + "epoch": 0.49, + "learning_rate": 5.116180428063474e-06, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 5927, + "text_loss": 0.546875 + }, + { + "epoch": 0.49, + "learning_rate": 5.1148752610355964e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 5928, + "text_loss": 0.408203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.11357008617612e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 5929, + "text_loss": 0.474609375 + }, + { + "epoch": 0.49, + "learning_rate": 5.112264903574018e-06, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 5930, + "text_loss": 0.2265625 + }, + { + "epoch": 0.49, + "learning_rate": 5.1109597133182774e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 5931, + "text_loss": 0.345703125 + }, + { + "epoch": 0.49, + "learning_rate": 5.109654515497875e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 5932, + "text_loss": 0.69140625 + }, + { + "epoch": 0.49, + "learning_rate": 5.108349310201795e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 5933, + "text_loss": 0.734375 + }, + { + "epoch": 0.49, + "learning_rate": 5.107044097519018e-06, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 5934, + "text_loss": 0.478515625 + }, + { + "epoch": 0.49, + "learning_rate": 5.105738877538528e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 5935, + "text_loss": 0.7578125 + }, + { + "epoch": 0.49, + "learning_rate": 5.104433650349307e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 5936, + "text_loss": 0.5078125 + }, + { + "epoch": 0.49, + "learning_rate": 5.103128416040338e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 5937, + "text_loss": 0.6796875 + }, + { + "epoch": 0.49, + "learning_rate": 5.101823174700608e-06, + "loss": 0.6267, + "regression_loss": 0.0, + "step": 5938, + "text_loss": 0.6640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.100517926419098e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 5939, + "text_loss": 0.482421875 + }, + { + "epoch": 0.49, + "learning_rate": 5.099212671284796e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 5940, + "text_loss": 0.58984375 + }, + { + "epoch": 0.49, + "learning_rate": 5.0979074093866875e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 5941, + "text_loss": 0.41015625 + }, + { + "epoch": 0.49, + "learning_rate": 5.096602140813756e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 5942, + "text_loss": 0.486328125 + }, + { + "epoch": 0.49, + "learning_rate": 5.09529686565499e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 5943, + "text_loss": 0.58203125 + }, + { + "epoch": 0.49, + "learning_rate": 5.0939915839993755e-06, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 5944, + "text_loss": 0.5625 + }, + { + "epoch": 0.49, + "learning_rate": 5.0926862959359014e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 5945, + "text_loss": 0.427734375 + }, + { + "epoch": 0.49, + "learning_rate": 5.091381001553552e-06, + "loss": 0.6548, + "regression_loss": 0.0, + "step": 5946, + "text_loss": 0.4375 + }, + { + "epoch": 0.49, + "learning_rate": 5.0900757009413206e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 5947, + "text_loss": 0.81640625 + }, + { + "epoch": 0.49, + "learning_rate": 5.088770394188192e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5948, + "text_loss": 0.51171875 + }, + { + "epoch": 0.49, + "learning_rate": 5.087465081383156e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 5949, + "text_loss": 0.55859375 + }, + { + "epoch": 0.49, + "learning_rate": 5.0861597626152035e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 5950, + "text_loss": 0.7578125 + }, + { + "epoch": 0.49, + "learning_rate": 5.084854437973323e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 5951, + "text_loss": 0.625 + }, + { + "epoch": 0.49, + "learning_rate": 5.083549107546505e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 5952, + "text_loss": 0.5625 + }, + { + "epoch": 0.49, + "learning_rate": 5.082243771423742e-06, + "loss": 0.4535, + "regression_loss": 0.0, + "step": 5953, + "text_loss": 0.64453125 + }, + { + "epoch": 0.49, + "learning_rate": 5.08093842969402e-06, + "loss": 0.5082, + "regression_loss": 0.0, + "step": 5954, + "text_loss": 0.5 + }, + { + "epoch": 0.49, + "learning_rate": 5.0796330824463355e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 5955, + "text_loss": 0.84375 + }, + { + "epoch": 0.5, + "learning_rate": 5.07832772976968e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 5956, + "text_loss": 0.466796875 + }, + { + "epoch": 0.5, + "learning_rate": 5.077022371753043e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 5957, + "text_loss": 0.4453125 + }, + { + "epoch": 0.5, + "learning_rate": 5.075717008485419e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 5958, + "text_loss": 0.455078125 + }, + { + "epoch": 0.5, + "learning_rate": 5.074411640055801e-06, + "loss": 0.6753, + "regression_loss": 0.0, + "step": 5959, + "text_loss": 0.53125 + }, + { + "epoch": 0.5, + "learning_rate": 5.0731062665531805e-06, + "loss": 0.6355, + "regression_loss": 0.0, + "step": 5960, + "text_loss": 0.703125 + }, + { + "epoch": 0.5, + "learning_rate": 5.071800888066552e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 5961, + "text_loss": 0.326171875 + }, + { + "epoch": 0.5, + "learning_rate": 5.070495504684912e-06, + "loss": 0.3965, + "regression_loss": 0.0, + "step": 5962, + "text_loss": 0.412109375 + }, + { + "epoch": 0.5, + "learning_rate": 5.069190116497251e-06, + "loss": 0.6265, + "regression_loss": 0.0, + "step": 5963, + "text_loss": 0.80859375 + }, + { + "epoch": 0.5, + "learning_rate": 5.067884723592566e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 5964, + "text_loss": 0.59375 + }, + { + "epoch": 0.5, + "learning_rate": 5.066579326059851e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 5965, + "text_loss": 0.625 + }, + { + "epoch": 0.5, + "learning_rate": 5.0652739239881e-06, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 5966, + "text_loss": 0.369140625 + }, + { + "epoch": 0.5, + "learning_rate": 5.063968517466311e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 5967, + "text_loss": 0.734375 + }, + { + "epoch": 0.5, + "learning_rate": 5.06266310658348e-06, + "loss": 0.4077, + "regression_loss": 0.0, + "step": 5968, + "text_loss": 0.25390625 + }, + { + "epoch": 0.5, + "learning_rate": 5.0613576914286e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 5969, + "text_loss": 0.421875 + }, + { + "epoch": 0.5, + "learning_rate": 5.060052272090671e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 5970, + "text_loss": 0.59765625 + }, + { + "epoch": 0.5, + "learning_rate": 5.058746848658688e-06, + "loss": 0.4291, + "regression_loss": 0.0, + "step": 5971, + "text_loss": 0.50390625 + }, + { + "epoch": 0.5, + "learning_rate": 5.057441421221647e-06, + "loss": 0.3973, + "regression_loss": 0.0, + "step": 5972, + "text_loss": 0.330078125 + }, + { + "epoch": 0.5, + "learning_rate": 5.056135989868549e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 5973, + "text_loss": 0.7265625 + }, + { + "epoch": 0.5, + "learning_rate": 5.054830554688387e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 5974, + "text_loss": 0.57421875 + }, + { + "epoch": 0.5, + "learning_rate": 5.053525115770162e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 5975, + "text_loss": 0.333984375 + }, + { + "epoch": 0.5, + "learning_rate": 5.0522196732028705e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 5976, + "text_loss": 0.478515625 + }, + { + "epoch": 0.5, + "learning_rate": 5.0509142270755115e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 5977, + "text_loss": 0.267578125 + }, + { + "epoch": 0.5, + "learning_rate": 5.049608777477083e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 5978, + "text_loss": 0.59375 + }, + { + "epoch": 0.5, + "learning_rate": 5.048303324496586e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 5979, + "text_loss": 0.66796875 + }, + { + "epoch": 0.5, + "learning_rate": 5.046997868223016e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 5980, + "text_loss": 0.56640625 + }, + { + "epoch": 0.5, + "learning_rate": 5.045692408745375e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 5981, + "text_loss": 0.609375 + }, + { + "epoch": 0.5, + "learning_rate": 5.044386946152662e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 5982, + "text_loss": 0.3828125 + }, + { + "epoch": 0.5, + "learning_rate": 5.0430814805338745e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 5983, + "text_loss": 0.66015625 + }, + { + "epoch": 0.5, + "learning_rate": 5.041776011978016e-06, + "loss": 0.5192, + "regression_loss": 0.0, + "step": 5984, + "text_loss": 0.31640625 + }, + { + "epoch": 0.5, + "learning_rate": 5.040470540574084e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 5985, + "text_loss": 0.72265625 + }, + { + "epoch": 0.5, + "learning_rate": 5.0391650664110796e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 5986, + "text_loss": 0.400390625 + }, + { + "epoch": 0.5, + "learning_rate": 5.037859589578004e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 5987, + "text_loss": 0.279296875 + }, + { + "epoch": 0.5, + "learning_rate": 5.036554110163856e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 5988, + "text_loss": 0.54296875 + }, + { + "epoch": 0.5, + "learning_rate": 5.035248628257637e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 5989, + "text_loss": 0.5234375 + }, + { + "epoch": 0.5, + "learning_rate": 5.033943143948351e-06, + "loss": 0.5959, + "regression_loss": 0.0, + "step": 5990, + "text_loss": 0.67578125 + }, + { + "epoch": 0.5, + "learning_rate": 5.032637657324994e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 5991, + "text_loss": 0.298828125 + }, + { + "epoch": 0.5, + "learning_rate": 5.0313321684765715e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 5992, + "text_loss": 0.5859375 + }, + { + "epoch": 0.5, + "learning_rate": 5.030026677492084e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 5993, + "text_loss": 0.4609375 + }, + { + "epoch": 0.5, + "learning_rate": 5.028721184460531e-06, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 5994, + "text_loss": 0.5390625 + }, + { + "epoch": 0.5, + "learning_rate": 5.027415689470918e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 5995, + "text_loss": 0.6875 + }, + { + "epoch": 0.5, + "learning_rate": 5.026110192612244e-06, + "loss": 0.415, + "regression_loss": 0.0, + "step": 5996, + "text_loss": 0.515625 + }, + { + "epoch": 0.5, + "learning_rate": 5.024804693973511e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 5997, + "text_loss": 0.6015625 + }, + { + "epoch": 0.5, + "learning_rate": 5.023499193643724e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 5998, + "text_loss": 0.302734375 + }, + { + "epoch": 0.5, + "learning_rate": 5.022193691711882e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 5999, + "text_loss": 0.81640625 + }, + { + "epoch": 0.5, + "learning_rate": 5.0208881882669894e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 6000, + "text_loss": 0.3828125 + }, + { + "epoch": 0.5, + "learning_rate": 5.019582683398048e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 6001, + "text_loss": 0.74609375 + }, + { + "epoch": 0.5, + "learning_rate": 5.01827717719406e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 6002, + "text_loss": 0.66015625 + }, + { + "epoch": 0.5, + "learning_rate": 5.016971669744027e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 6003, + "text_loss": 0.47265625 + }, + { + "epoch": 0.5, + "learning_rate": 5.015666161136956e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 6004, + "text_loss": 0.6796875 + }, + { + "epoch": 0.5, + "learning_rate": 5.014360651461846e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 6005, + "text_loss": 0.439453125 + }, + { + "epoch": 0.5, + "learning_rate": 5.0130551408077e-06, + "loss": 0.4718, + "regression_loss": 0.0, + "step": 6006, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.5, + "learning_rate": 5.011749629263524e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 6007, + "text_loss": 0.5078125 + }, + { + "epoch": 0.5, + "learning_rate": 5.010444116918317e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 6008, + "text_loss": 0.5390625 + }, + { + "epoch": 0.5, + "learning_rate": 5.009138603861086e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 6009, + "text_loss": 0.6171875 + }, + { + "epoch": 0.5, + "learning_rate": 5.007833090180831e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 6010, + "text_loss": 0.4765625 + }, + { + "epoch": 0.5, + "learning_rate": 5.006527575966554e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 6011, + "text_loss": 0.427734375 + }, + { + "epoch": 0.5, + "learning_rate": 5.005222061307264e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 6012, + "text_loss": 0.318359375 + }, + { + "epoch": 0.5, + "learning_rate": 5.0039165462919606e-06, + "loss": 0.6826, + "regression_loss": 0.0, + "step": 6013, + "text_loss": 0.59375 + }, + { + "epoch": 0.5, + "learning_rate": 5.002611031009645e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 6014, + "text_loss": 0.75 + }, + { + "epoch": 0.5, + "learning_rate": 5.001305515549325e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 6015, + "text_loss": 0.57421875 + }, + { + "epoch": 0.5, + "learning_rate": 5e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 6016, + "text_loss": 0.61328125 + }, + { + "epoch": 0.5, + "learning_rate": 4.998694484450677e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 6017, + "text_loss": 0.48828125 + }, + { + "epoch": 0.5, + "learning_rate": 4.997388968990356e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 6018, + "text_loss": 0.5234375 + }, + { + "epoch": 0.5, + "learning_rate": 4.99608345370804e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 6019, + "text_loss": 0.32421875 + }, + { + "epoch": 0.5, + "learning_rate": 4.994777938692737e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 6020, + "text_loss": 0.474609375 + }, + { + "epoch": 0.5, + "learning_rate": 4.9934724240334456e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 6021, + "text_loss": 0.51171875 + }, + { + "epoch": 0.5, + "learning_rate": 4.992166909819172e-06, + "loss": 0.4559, + "regression_loss": 0.0, + "step": 6022, + "text_loss": 0.44140625 + }, + { + "epoch": 0.5, + "learning_rate": 4.990861396138916e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 6023, + "text_loss": 0.486328125 + }, + { + "epoch": 0.5, + "learning_rate": 4.989555883081684e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 6024, + "text_loss": 0.53515625 + }, + { + "epoch": 0.5, + "learning_rate": 4.988250370736477e-06, + "loss": 0.3337, + "regression_loss": 0.0, + "step": 6025, + "text_loss": 0.32421875 + }, + { + "epoch": 0.5, + "learning_rate": 4.986944859192302e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 6026, + "text_loss": 0.240234375 + }, + { + "epoch": 0.5, + "learning_rate": 4.985639348538154e-06, + "loss": 0.5706, + "regression_loss": 0.0, + "step": 6027, + "text_loss": 0.57421875 + }, + { + "epoch": 0.5, + "learning_rate": 4.984333838863045e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 6028, + "text_loss": 0.52734375 + }, + { + "epoch": 0.5, + "learning_rate": 4.983028330255972e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 6029, + "text_loss": 0.52734375 + }, + { + "epoch": 0.5, + "learning_rate": 4.981722822805942e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 6030, + "text_loss": 0.400390625 + }, + { + "epoch": 0.5, + "learning_rate": 4.980417316601954e-06, + "loss": 0.614, + "regression_loss": 0.0, + "step": 6031, + "text_loss": 0.259765625 + }, + { + "epoch": 0.5, + "learning_rate": 4.979111811733013e-06, + "loss": 0.6251, + "regression_loss": 0.0, + "step": 6032, + "text_loss": 0.73046875 + }, + { + "epoch": 0.5, + "learning_rate": 4.977806308288119e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 6033, + "text_loss": 0.6875 + }, + { + "epoch": 0.5, + "learning_rate": 4.976500806356278e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 6034, + "text_loss": 0.51171875 + }, + { + "epoch": 0.5, + "learning_rate": 4.975195306026489e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 6035, + "text_loss": 0.63671875 + }, + { + "epoch": 0.5, + "learning_rate": 4.973889807387758e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 6036, + "text_loss": 0.66015625 + }, + { + "epoch": 0.5, + "learning_rate": 4.9725843105290836e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 6037, + "text_loss": 0.82421875 + }, + { + "epoch": 0.5, + "learning_rate": 4.97127881553947e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 6038, + "text_loss": 0.80078125 + }, + { + "epoch": 0.5, + "learning_rate": 4.969973322507917e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 6039, + "text_loss": 0.609375 + }, + { + "epoch": 0.5, + "learning_rate": 4.96866783152343e-06, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 6040, + "text_loss": 0.236328125 + }, + { + "epoch": 0.5, + "learning_rate": 4.9673623426750065e-06, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 6041, + "text_loss": 0.46484375 + }, + { + "epoch": 0.5, + "learning_rate": 4.966056856051652e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 6042, + "text_loss": 0.369140625 + }, + { + "epoch": 0.5, + "learning_rate": 4.964751371742364e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 6043, + "text_loss": 0.75390625 + }, + { + "epoch": 0.5, + "learning_rate": 4.9634458898361466e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 6044, + "text_loss": 0.5546875 + }, + { + "epoch": 0.5, + "learning_rate": 4.962140410421998e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 6045, + "text_loss": 0.6875 + }, + { + "epoch": 0.5, + "learning_rate": 4.960834933588923e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 6046, + "text_loss": 0.337890625 + }, + { + "epoch": 0.5, + "learning_rate": 4.959529459425918e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 6047, + "text_loss": 0.5703125 + }, + { + "epoch": 0.5, + "learning_rate": 4.958223988021986e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 6048, + "text_loss": 0.53125 + }, + { + "epoch": 0.5, + "learning_rate": 4.956918519466126e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 6049, + "text_loss": 0.447265625 + }, + { + "epoch": 0.5, + "learning_rate": 4.955613053847341e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 6050, + "text_loss": 0.439453125 + }, + { + "epoch": 0.5, + "learning_rate": 4.954307591254627e-06, + "loss": 0.4309, + "regression_loss": 0.0, + "step": 6051, + "text_loss": 0.421875 + }, + { + "epoch": 0.5, + "learning_rate": 4.953002131776986e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 6052, + "text_loss": 0.314453125 + }, + { + "epoch": 0.5, + "learning_rate": 4.951696675503416e-06, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 6053, + "text_loss": 0.578125 + }, + { + "epoch": 0.5, + "learning_rate": 4.950391222522918e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 6054, + "text_loss": 0.68359375 + }, + { + "epoch": 0.5, + "learning_rate": 4.94908577292449e-06, + "loss": 0.4933, + "regression_loss": 0.0, + "step": 6055, + "text_loss": 0.6015625 + }, + { + "epoch": 0.5, + "learning_rate": 4.947780326797132e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 6056, + "text_loss": 0.7578125 + }, + { + "epoch": 0.5, + "learning_rate": 4.94647488422984e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 6057, + "text_loss": 0.83984375 + }, + { + "epoch": 0.5, + "learning_rate": 4.9451694453116155e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 6058, + "text_loss": 0.45703125 + }, + { + "epoch": 0.5, + "learning_rate": 4.943864010131453e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 6059, + "text_loss": 0.66796875 + }, + { + "epoch": 0.5, + "learning_rate": 4.942558578778356e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 6060, + "text_loss": 0.392578125 + }, + { + "epoch": 0.5, + "learning_rate": 4.941253151341314e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 6061, + "text_loss": 0.734375 + }, + { + "epoch": 0.5, + "learning_rate": 4.939947727909331e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 6062, + "text_loss": 0.48046875 + }, + { + "epoch": 0.5, + "learning_rate": 4.938642308571401e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 6063, + "text_loss": 0.4765625 + }, + { + "epoch": 0.5, + "learning_rate": 4.937336893416521e-06, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 6064, + "text_loss": 0.5625 + }, + { + "epoch": 0.5, + "learning_rate": 4.936031482533691e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 6065, + "text_loss": 0.46875 + }, + { + "epoch": 0.5, + "learning_rate": 4.9347260760119005e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 6066, + "text_loss": 0.66796875 + }, + { + "epoch": 0.5, + "learning_rate": 4.933420673940151e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 6067, + "text_loss": 0.62109375 + }, + { + "epoch": 0.5, + "learning_rate": 4.932115276407435e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 6068, + "text_loss": 0.478515625 + }, + { + "epoch": 0.5, + "learning_rate": 4.93080988350275e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 6069, + "text_loss": 0.50390625 + }, + { + "epoch": 0.5, + "learning_rate": 4.9295044953150885e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 6070, + "text_loss": 0.5078125 + }, + { + "epoch": 0.5, + "learning_rate": 4.9281991119334485e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 6071, + "text_loss": 0.44140625 + }, + { + "epoch": 0.5, + "learning_rate": 4.9268937334468194e-06, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 6072, + "text_loss": 0.75 + }, + { + "epoch": 0.5, + "learning_rate": 4.925588359944201e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 6073, + "text_loss": 0.52734375 + }, + { + "epoch": 0.5, + "learning_rate": 4.924282991514581e-06, + "loss": 0.5419, + "regression_loss": 0.0, + "step": 6074, + "text_loss": 0.474609375 + }, + { + "epoch": 0.5, + "learning_rate": 4.922977628246958e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 6075, + "text_loss": 0.5625 + }, + { + "epoch": 0.5, + "learning_rate": 4.921672270230321e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 6076, + "text_loss": 0.484375 + }, + { + "epoch": 0.51, + "learning_rate": 4.920366917553665e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 6077, + "text_loss": 0.53515625 + }, + { + "epoch": 0.51, + "learning_rate": 4.91906157030598e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 6078, + "text_loss": 0.640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.91775622857626e-06, + "loss": 0.627, + "regression_loss": 0.0, + "step": 6079, + "text_loss": 0.58203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.916450892453495e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 6080, + "text_loss": 0.439453125 + }, + { + "epoch": 0.51, + "learning_rate": 4.915145562026678e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 6081, + "text_loss": 0.5234375 + }, + { + "epoch": 0.51, + "learning_rate": 4.913840237384797e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 6082, + "text_loss": 0.4453125 + }, + { + "epoch": 0.51, + "learning_rate": 4.912534918616845e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 6083, + "text_loss": 0.3515625 + }, + { + "epoch": 0.51, + "learning_rate": 4.9112296058118085e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 6084, + "text_loss": 0.5390625 + }, + { + "epoch": 0.51, + "learning_rate": 4.909924299058681e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 6085, + "text_loss": 0.59765625 + }, + { + "epoch": 0.51, + "learning_rate": 4.908618998446447e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 6086, + "text_loss": 0.640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.9073137040641e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 6087, + "text_loss": 0.36328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.906008416000625e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 6088, + "text_loss": 0.609375 + }, + { + "epoch": 0.51, + "learning_rate": 4.904703134345012e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 6089, + "text_loss": 0.7109375 + }, + { + "epoch": 0.51, + "learning_rate": 4.903397859186245e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 6090, + "text_loss": 0.58984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.902092590613316e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 6091, + "text_loss": 0.6875 + }, + { + "epoch": 0.51, + "learning_rate": 4.9007873287152045e-06, + "loss": 0.4628, + "regression_loss": 0.0, + "step": 6092, + "text_loss": 0.71875 + }, + { + "epoch": 0.51, + "learning_rate": 4.899482073580903e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 6093, + "text_loss": 0.69921875 + }, + { + "epoch": 0.51, + "learning_rate": 4.898176825299394e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 6094, + "text_loss": 0.5625 + }, + { + "epoch": 0.51, + "learning_rate": 4.896871583959663e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 6095, + "text_loss": 0.62890625 + }, + { + "epoch": 0.51, + "learning_rate": 4.895566349650696e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 6096, + "text_loss": 0.451171875 + }, + { + "epoch": 0.51, + "learning_rate": 4.894261122461475e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 6097, + "text_loss": 0.51171875 + }, + { + "epoch": 0.51, + "learning_rate": 4.892955902480983e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 6098, + "text_loss": 0.51953125 + }, + { + "epoch": 0.51, + "learning_rate": 4.8916506897982085e-06, + "loss": 0.4945, + "regression_loss": 0.0, + "step": 6099, + "text_loss": 0.5546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.890345484502126e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 6100, + "text_loss": 0.59375 + }, + { + "epoch": 0.51, + "learning_rate": 4.889040286681725e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 6101, + "text_loss": 0.435546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.887735096425983e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 6102, + "text_loss": 0.6171875 + }, + { + "epoch": 0.51, + "learning_rate": 4.8864299138238834e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 6103, + "text_loss": 0.44921875 + }, + { + "epoch": 0.51, + "learning_rate": 4.885124738964404e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 6104, + "text_loss": 0.78515625 + }, + { + "epoch": 0.51, + "learning_rate": 4.8838195719365284e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 6105, + "text_loss": 0.58203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.882514412829232e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 6106, + "text_loss": 0.625 + }, + { + "epoch": 0.51, + "learning_rate": 4.881209261731498e-06, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 6107, + "text_loss": 0.302734375 + }, + { + "epoch": 0.51, + "learning_rate": 4.879904118732302e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 6108, + "text_loss": 0.6015625 + }, + { + "epoch": 0.51, + "learning_rate": 4.8785989839206244e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 6109, + "text_loss": 0.279296875 + }, + { + "epoch": 0.51, + "learning_rate": 4.87729385738544e-06, + "loss": 0.4241, + "regression_loss": 0.0, + "step": 6110, + "text_loss": 0.6328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.875988739215725e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 6111, + "text_loss": 0.5546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.87468362950046e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 6112, + "text_loss": 0.5546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.8733785283286164e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 6113, + "text_loss": 0.60546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.872073435789171e-06, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 6114, + "text_loss": 0.5859375 + }, + { + "epoch": 0.51, + "learning_rate": 4.870768351971099e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 6115, + "text_loss": 0.80859375 + }, + { + "epoch": 0.51, + "learning_rate": 4.869463276963373e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 6116, + "text_loss": 0.48828125 + }, + { + "epoch": 0.51, + "learning_rate": 4.868158210854964e-06, + "loss": 0.4303, + "regression_loss": 0.0, + "step": 6117, + "text_loss": 0.443359375 + }, + { + "epoch": 0.51, + "learning_rate": 4.86685315373485e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 6118, + "text_loss": 0.470703125 + }, + { + "epoch": 0.51, + "learning_rate": 4.865548105691999e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 6119, + "text_loss": 0.31640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.864243066815385e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 6120, + "text_loss": 0.8203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.862938037193977e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 6121, + "text_loss": 0.43359375 + }, + { + "epoch": 0.51, + "learning_rate": 4.861633016916746e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 6122, + "text_loss": 0.2578125 + }, + { + "epoch": 0.51, + "learning_rate": 4.86032800607266e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 6123, + "text_loss": 0.38671875 + }, + { + "epoch": 0.51, + "learning_rate": 4.859023004750692e-06, + "loss": 0.465, + "regression_loss": 0.0, + "step": 6124, + "text_loss": 0.68359375 + }, + { + "epoch": 0.51, + "learning_rate": 4.857718013039804e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 6125, + "text_loss": 0.7109375 + }, + { + "epoch": 0.51, + "learning_rate": 4.8564130310289695e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 6126, + "text_loss": 0.2734375 + }, + { + "epoch": 0.51, + "learning_rate": 4.855108058807151e-06, + "loss": 0.689, + "regression_loss": 0.0, + "step": 6127, + "text_loss": 0.8203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.853803096463319e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 6128, + "text_loss": 0.416015625 + }, + { + "epoch": 0.51, + "learning_rate": 4.852498144086434e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 6129, + "text_loss": 0.423828125 + }, + { + "epoch": 0.51, + "learning_rate": 4.851193201765466e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 6130, + "text_loss": 0.46484375 + }, + { + "epoch": 0.51, + "learning_rate": 4.849888269589375e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 6131, + "text_loss": 0.640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.848583347647128e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 6132, + "text_loss": 0.6640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.8472784360276856e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 6133, + "text_loss": 0.83203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.8459735348200115e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 6134, + "text_loss": 0.361328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.844668644113065e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 6135, + "text_loss": 0.390625 + }, + { + "epoch": 0.51, + "learning_rate": 4.84336376399581e-06, + "loss": 0.6353, + "regression_loss": 0.0, + "step": 6136, + "text_loss": 0.64453125 + }, + { + "epoch": 0.51, + "learning_rate": 4.842058894557202e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 6137, + "text_loss": 0.76171875 + }, + { + "epoch": 0.51, + "learning_rate": 4.840754035886205e-06, + "loss": 0.6479, + "regression_loss": 0.0, + "step": 6138, + "text_loss": 0.55078125 + }, + { + "epoch": 0.51, + "learning_rate": 4.839449188071773e-06, + "loss": 0.4358, + "regression_loss": 0.0, + "step": 6139, + "text_loss": 0.38671875 + }, + { + "epoch": 0.51, + "learning_rate": 4.838144351202869e-06, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 6140, + "text_loss": 0.4609375 + }, + { + "epoch": 0.51, + "learning_rate": 4.836839525368445e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 6141, + "text_loss": 0.494140625 + }, + { + "epoch": 0.51, + "learning_rate": 4.8355347106574615e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 6142, + "text_loss": 0.25 + }, + { + "epoch": 0.51, + "learning_rate": 4.834229907158869e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 6143, + "text_loss": 0.3984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.832925114961629e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 6144, + "text_loss": 0.609375 + }, + { + "epoch": 0.51, + "learning_rate": 4.831620334154688e-06, + "loss": 0.6638, + "regression_loss": 0.0, + "step": 6145, + "text_loss": 0.84765625 + }, + { + "epoch": 0.51, + "learning_rate": 4.830315564827006e-06, + "loss": 0.4377, + "regression_loss": 0.0, + "step": 6146, + "text_loss": 0.2265625 + }, + { + "epoch": 0.51, + "learning_rate": 4.82901080706753e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 6147, + "text_loss": 0.69140625 + }, + { + "epoch": 0.51, + "learning_rate": 4.827706060965216e-06, + "loss": 0.5077, + "regression_loss": 0.0, + "step": 6148, + "text_loss": 0.53125 + }, + { + "epoch": 0.51, + "learning_rate": 4.826401326609012e-06, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 6149, + "text_loss": 0.5546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.825096604087869e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 6150, + "text_loss": 0.7890625 + }, + { + "epoch": 0.51, + "learning_rate": 4.823791893490735e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 6151, + "text_loss": 0.251953125 + }, + { + "epoch": 0.51, + "learning_rate": 4.8224871949065615e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 6152, + "text_loss": 0.71484375 + }, + { + "epoch": 0.51, + "learning_rate": 4.821182508424293e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 6153, + "text_loss": 0.419921875 + }, + { + "epoch": 0.51, + "learning_rate": 4.819877834132879e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 6154, + "text_loss": 0.427734375 + }, + { + "epoch": 0.51, + "learning_rate": 4.8185731721212625e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 6155, + "text_loss": 0.66796875 + }, + { + "epoch": 0.51, + "learning_rate": 4.817268522478391e-06, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 6156, + "text_loss": 0.6953125 + }, + { + "epoch": 0.51, + "learning_rate": 4.815963885293206e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 6157, + "text_loss": 0.5625 + }, + { + "epoch": 0.51, + "learning_rate": 4.814659260654654e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 6158, + "text_loss": 0.6953125 + }, + { + "epoch": 0.51, + "learning_rate": 4.813354648651677e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 6159, + "text_loss": 0.4921875 + }, + { + "epoch": 0.51, + "learning_rate": 4.812050049373215e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 6160, + "text_loss": 0.8203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.810745462908212e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 6161, + "text_loss": 0.54296875 + }, + { + "epoch": 0.51, + "learning_rate": 4.809440889345603e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 6162, + "text_loss": 0.58984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.808136328774332e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 6163, + "text_loss": 0.486328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.806831781283333e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 6164, + "text_loss": 0.296875 + }, + { + "epoch": 0.51, + "learning_rate": 4.805527246961548e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 6165, + "text_loss": 0.703125 + }, + { + "epoch": 0.51, + "learning_rate": 4.80422272589791e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 6166, + "text_loss": 0.7734375 + }, + { + "epoch": 0.51, + "learning_rate": 4.802918218181356e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 6167, + "text_loss": 0.5390625 + }, + { + "epoch": 0.51, + "learning_rate": 4.8016137239008185e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 6168, + "text_loss": 0.51953125 + }, + { + "epoch": 0.51, + "learning_rate": 4.800309243145233e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 6169, + "text_loss": 0.58203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.799004776003531e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 6170, + "text_loss": 0.59765625 + }, + { + "epoch": 0.51, + "learning_rate": 4.797700322564648e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 6171, + "text_loss": 0.73046875 + }, + { + "epoch": 0.51, + "learning_rate": 4.79639588291751e-06, + "loss": 0.4204, + "regression_loss": 0.0, + "step": 6172, + "text_loss": 0.306640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.79509145715105e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 6173, + "text_loss": 0.69921875 + }, + { + "epoch": 0.51, + "learning_rate": 4.793787045354196e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 6174, + "text_loss": 0.56640625 + }, + { + "epoch": 0.51, + "learning_rate": 4.792482647615877e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 6175, + "text_loss": 0.310546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.791178264025017e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 6176, + "text_loss": 0.50390625 + }, + { + "epoch": 0.51, + "learning_rate": 4.789873894670548e-06, + "loss": 0.3818, + "regression_loss": 0.0, + "step": 6177, + "text_loss": 0.36328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.788569539641388e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 6178, + "text_loss": 0.5625 + }, + { + "epoch": 0.51, + "learning_rate": 4.787265199026466e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 6179, + "text_loss": 0.396484375 + }, + { + "epoch": 0.51, + "learning_rate": 4.785960872914704e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 6180, + "text_loss": 0.58203125 + }, + { + "epoch": 0.51, + "learning_rate": 4.784656561395025e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 6181, + "text_loss": 0.30859375 + }, + { + "epoch": 0.51, + "learning_rate": 4.783352264556348e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 6182, + "text_loss": 0.578125 + }, + { + "epoch": 0.51, + "learning_rate": 4.782047982487597e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 6183, + "text_loss": 0.33984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.780743715277686e-06, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 6184, + "text_loss": 0.4140625 + }, + { + "epoch": 0.51, + "learning_rate": 4.779439463015538e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 6185, + "text_loss": 0.66796875 + }, + { + "epoch": 0.51, + "learning_rate": 4.778135225790067e-06, + "loss": 0.4603, + "regression_loss": 0.0, + "step": 6186, + "text_loss": 0.3984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.776831003690192e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 6187, + "text_loss": 0.4453125 + }, + { + "epoch": 0.51, + "learning_rate": 4.775526796804825e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 6188, + "text_loss": 0.458984375 + }, + { + "epoch": 0.51, + "learning_rate": 4.774222605222883e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 6189, + "text_loss": 0.53125 + }, + { + "epoch": 0.51, + "learning_rate": 4.772918429033275e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 6190, + "text_loss": 0.5546875 + }, + { + "epoch": 0.51, + "learning_rate": 4.771614268324918e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 6191, + "text_loss": 0.328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.77031012318672e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 6192, + "text_loss": 0.27734375 + }, + { + "epoch": 0.51, + "learning_rate": 4.769005993707593e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 6193, + "text_loss": 0.73046875 + }, + { + "epoch": 0.51, + "learning_rate": 4.767701879976442e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 6194, + "text_loss": 0.328125 + }, + { + "epoch": 0.51, + "learning_rate": 4.766397782082179e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 6195, + "text_loss": 0.5234375 + }, + { + "epoch": 0.51, + "learning_rate": 4.765093700113706e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 6196, + "text_loss": 0.6875 + }, + { + "epoch": 0.52, + "learning_rate": 4.7637896341599345e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 6197, + "text_loss": 0.384765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.7624855843097624e-06, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 6198, + "text_loss": 0.86328125 + }, + { + "epoch": 0.52, + "learning_rate": 4.761181550652099e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 6199, + "text_loss": 0.427734375 + }, + { + "epoch": 0.52, + "learning_rate": 4.759877533275843e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 6200, + "text_loss": 0.515625 + }, + { + "epoch": 0.52, + "learning_rate": 4.7585735322698975e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 6201, + "text_loss": 0.59765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.75726954772316e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 6202, + "text_loss": 0.5625 + }, + { + "epoch": 0.52, + "learning_rate": 4.755965579724534e-06, + "loss": 0.6038, + "regression_loss": 0.0, + "step": 6203, + "text_loss": 0.64453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.75466162836291e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 6204, + "text_loss": 0.6796875 + }, + { + "epoch": 0.52, + "learning_rate": 4.753357693727191e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 6205, + "text_loss": 0.275390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.752053775906272e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 6206, + "text_loss": 0.435546875 + }, + { + "epoch": 0.52, + "learning_rate": 4.750749874989044e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 6207, + "text_loss": 0.77734375 + }, + { + "epoch": 0.52, + "learning_rate": 4.7494459910644044e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 6208, + "text_loss": 0.66796875 + }, + { + "epoch": 0.52, + "learning_rate": 4.74814212422124e-06, + "loss": 0.5782, + "regression_loss": 0.0, + "step": 6209, + "text_loss": 0.625 + }, + { + "epoch": 0.52, + "learning_rate": 4.746838274548448e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 6210, + "text_loss": 0.484375 + }, + { + "epoch": 0.52, + "learning_rate": 4.745534442134913e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 6211, + "text_loss": 0.72265625 + }, + { + "epoch": 0.52, + "learning_rate": 4.744230627069526e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 6212, + "text_loss": 0.6171875 + }, + { + "epoch": 0.52, + "learning_rate": 4.7429268294411735e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 6213, + "text_loss": 0.70703125 + }, + { + "epoch": 0.52, + "learning_rate": 4.7416230493387434e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 6214, + "text_loss": 0.58984375 + }, + { + "epoch": 0.52, + "learning_rate": 4.740319286851116e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 6215, + "text_loss": 0.37109375 + }, + { + "epoch": 0.52, + "learning_rate": 4.739015542067182e-06, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 6216, + "text_loss": 0.474609375 + }, + { + "epoch": 0.52, + "learning_rate": 4.7377118150758185e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 6217, + "text_loss": 0.7421875 + }, + { + "epoch": 0.52, + "learning_rate": 4.736408105965909e-06, + "loss": 0.4199, + "regression_loss": 0.0, + "step": 6218, + "text_loss": 0.400390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.735104414826334e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 6219, + "text_loss": 0.5390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.733800741745972e-06, + "loss": 0.4055, + "regression_loss": 0.0, + "step": 6220, + "text_loss": 0.298828125 + }, + { + "epoch": 0.52, + "learning_rate": 4.732497086813699e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 6221, + "text_loss": 0.55859375 + }, + { + "epoch": 0.52, + "learning_rate": 4.731193450118395e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 6222, + "text_loss": 0.3828125 + }, + { + "epoch": 0.52, + "learning_rate": 4.729889831748931e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6223, + "text_loss": 0.314453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.728586231794185e-06, + "loss": 0.504, + "regression_loss": 0.0, + "step": 6224, + "text_loss": 0.30078125 + }, + { + "epoch": 0.52, + "learning_rate": 4.727282650343027e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 6225, + "text_loss": 0.578125 + }, + { + "epoch": 0.52, + "learning_rate": 4.725979087484331e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 6226, + "text_loss": 0.4140625 + }, + { + "epoch": 0.52, + "learning_rate": 4.7246755433069645e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 6227, + "text_loss": 0.33984375 + }, + { + "epoch": 0.52, + "learning_rate": 4.723372017899797e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 6228, + "text_loss": 0.5859375 + }, + { + "epoch": 0.52, + "learning_rate": 4.722068511351696e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 6229, + "text_loss": 0.5078125 + }, + { + "epoch": 0.52, + "learning_rate": 4.72076502375153e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 6230, + "text_loss": 0.279296875 + }, + { + "epoch": 0.52, + "learning_rate": 4.719461555188161e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 6231, + "text_loss": 0.27734375 + }, + { + "epoch": 0.52, + "learning_rate": 4.718158105750456e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 6232, + "text_loss": 0.3671875 + }, + { + "epoch": 0.52, + "learning_rate": 4.7168546755272735e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 6233, + "text_loss": 0.44921875 + }, + { + "epoch": 0.52, + "learning_rate": 4.715551264607479e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 6234, + "text_loss": 0.326171875 + }, + { + "epoch": 0.52, + "learning_rate": 4.714247873079928e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 6235, + "text_loss": 0.6484375 + }, + { + "epoch": 0.52, + "learning_rate": 4.712944501033484e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 6236, + "text_loss": 0.451171875 + }, + { + "epoch": 0.52, + "learning_rate": 4.7116411485569975e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 6237, + "text_loss": 0.67578125 + }, + { + "epoch": 0.52, + "learning_rate": 4.710337815739331e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 6238, + "text_loss": 0.50390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.709034502669335e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 6239, + "text_loss": 0.66015625 + }, + { + "epoch": 0.52, + "learning_rate": 4.707731209435864e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 6240, + "text_loss": 0.71875 + }, + { + "epoch": 0.52, + "learning_rate": 4.70642793612777e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 6241, + "text_loss": 0.59375 + }, + { + "epoch": 0.52, + "learning_rate": 4.705124682833903e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 6242, + "text_loss": 0.25390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.7038214496431106e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 6243, + "text_loss": 0.53125 + }, + { + "epoch": 0.52, + "learning_rate": 4.702518236644245e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 6244, + "text_loss": 0.462890625 + }, + { + "epoch": 0.52, + "learning_rate": 4.701215043926146e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 6245, + "text_loss": 0.69140625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6999118715776655e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 6246, + "text_loss": 0.58984375 + }, + { + "epoch": 0.52, + "learning_rate": 4.698608719687642e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 6247, + "text_loss": 0.40625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6973055883449206e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 6248, + "text_loss": 0.369140625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6960024776383384e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 6249, + "text_loss": 0.63671875 + }, + { + "epoch": 0.52, + "learning_rate": 4.694699387656739e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 6250, + "text_loss": 0.59375 + }, + { + "epoch": 0.52, + "learning_rate": 4.693396318488958e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 6251, + "text_loss": 0.63671875 + }, + { + "epoch": 0.52, + "learning_rate": 4.692093270223832e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 6252, + "text_loss": 0.53125 + }, + { + "epoch": 0.52, + "learning_rate": 4.690790242950198e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 6253, + "text_loss": 0.625 + }, + { + "epoch": 0.52, + "learning_rate": 4.689487236756885e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 6254, + "text_loss": 0.453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.688184251732732e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 6255, + "text_loss": 0.50390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.686881287966565e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 6256, + "text_loss": 0.314453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.685578345547215e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 6257, + "text_loss": 0.671875 + }, + { + "epoch": 0.52, + "learning_rate": 4.684275424563509e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 6258, + "text_loss": 0.392578125 + }, + { + "epoch": 0.52, + "learning_rate": 4.682972525104274e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 6259, + "text_loss": 0.400390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.681669647258333e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 6260, + "text_loss": 0.3984375 + }, + { + "epoch": 0.52, + "learning_rate": 4.680366791114515e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 6261, + "text_loss": 0.40625 + }, + { + "epoch": 0.52, + "learning_rate": 4.679063956761635e-06, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 6262, + "text_loss": 0.51953125 + }, + { + "epoch": 0.52, + "learning_rate": 4.677761144288519e-06, + "loss": 0.4674, + "regression_loss": 0.0, + "step": 6263, + "text_loss": 0.1943359375 + }, + { + "epoch": 0.52, + "learning_rate": 4.676458353783982e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 6264, + "text_loss": 0.31640625 + }, + { + "epoch": 0.52, + "learning_rate": 4.675155585336845e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 6265, + "text_loss": 0.39453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.673852839035923e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 6266, + "text_loss": 0.5546875 + }, + { + "epoch": 0.52, + "learning_rate": 4.672550114970029e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 6267, + "text_loss": 0.33203125 + }, + { + "epoch": 0.52, + "learning_rate": 4.671247413227975e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 6268, + "text_loss": 0.455078125 + }, + { + "epoch": 0.52, + "learning_rate": 4.669944733898577e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 6269, + "text_loss": 0.59375 + }, + { + "epoch": 0.52, + "learning_rate": 4.668642077070641e-06, + "loss": 0.6064, + "regression_loss": 0.0, + "step": 6270, + "text_loss": 0.59375 + }, + { + "epoch": 0.52, + "learning_rate": 4.667339442832978e-06, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 6271, + "text_loss": 0.83203125 + }, + { + "epoch": 0.52, + "learning_rate": 4.666036831274392e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 6272, + "text_loss": 0.54296875 + }, + { + "epoch": 0.52, + "learning_rate": 4.664734242483692e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 6273, + "text_loss": 0.73046875 + }, + { + "epoch": 0.52, + "learning_rate": 4.663431676549677e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 6274, + "text_loss": 0.5625 + }, + { + "epoch": 0.52, + "learning_rate": 4.662129133561156e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 6275, + "text_loss": 0.4765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.660826613606922e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 6276, + "text_loss": 0.4765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6595241167757795e-06, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 6277, + "text_loss": 0.55859375 + }, + { + "epoch": 0.52, + "learning_rate": 4.658221643156523e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 6278, + "text_loss": 0.6328125 + }, + { + "epoch": 0.52, + "learning_rate": 4.656919192837951e-06, + "loss": 0.4733, + "regression_loss": 0.0, + "step": 6279, + "text_loss": 0.451171875 + }, + { + "epoch": 0.52, + "learning_rate": 4.655616765908855e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 6280, + "text_loss": 0.455078125 + }, + { + "epoch": 0.52, + "learning_rate": 4.654314362458031e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 6281, + "text_loss": 0.4921875 + }, + { + "epoch": 0.52, + "learning_rate": 4.653011982574266e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 6282, + "text_loss": 0.6328125 + }, + { + "epoch": 0.52, + "learning_rate": 4.651709626346354e-06, + "loss": 0.6262, + "regression_loss": 0.0, + "step": 6283, + "text_loss": 0.65625 + }, + { + "epoch": 0.52, + "learning_rate": 4.65040729386308e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 6284, + "text_loss": 0.734375 + }, + { + "epoch": 0.52, + "learning_rate": 4.649104985213231e-06, + "loss": 0.4309, + "regression_loss": 0.0, + "step": 6285, + "text_loss": 0.29296875 + }, + { + "epoch": 0.52, + "learning_rate": 4.647802700485592e-06, + "loss": 0.6394, + "regression_loss": 0.0, + "step": 6286, + "text_loss": 0.373046875 + }, + { + "epoch": 0.52, + "learning_rate": 4.6465004397689466e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 6287, + "text_loss": 0.46484375 + }, + { + "epoch": 0.52, + "learning_rate": 4.6451982031520724e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 6288, + "text_loss": 0.50390625 + }, + { + "epoch": 0.52, + "learning_rate": 4.6438959907237555e-06, + "loss": 0.4199, + "regression_loss": 0.0, + "step": 6289, + "text_loss": 0.3984375 + }, + { + "epoch": 0.52, + "learning_rate": 4.642593802572769e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 6290, + "text_loss": 0.357421875 + }, + { + "epoch": 0.52, + "learning_rate": 4.6412916387878926e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 6291, + "text_loss": 0.3515625 + }, + { + "epoch": 0.52, + "learning_rate": 4.639989499457898e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 6292, + "text_loss": 0.478515625 + }, + { + "epoch": 0.52, + "learning_rate": 4.638687384671562e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 6293, + "text_loss": 0.58203125 + }, + { + "epoch": 0.52, + "learning_rate": 4.637385294517651e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6294, + "text_loss": 0.44140625 + }, + { + "epoch": 0.52, + "learning_rate": 4.636083229084941e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 6295, + "text_loss": 0.328125 + }, + { + "epoch": 0.52, + "learning_rate": 4.634781188462194e-06, + "loss": 0.535, + "regression_loss": 0.0, + "step": 6296, + "text_loss": 0.59765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.633479172738181e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 6297, + "text_loss": 0.255859375 + }, + { + "epoch": 0.52, + "learning_rate": 4.6321771820016635e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 6298, + "text_loss": 0.59765625 + }, + { + "epoch": 0.52, + "learning_rate": 4.630875216341406e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 6299, + "text_loss": 0.76953125 + }, + { + "epoch": 0.52, + "learning_rate": 4.629573275846172e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 6300, + "text_loss": 0.359375 + }, + { + "epoch": 0.52, + "learning_rate": 4.628271360604716e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 6301, + "text_loss": 0.8046875 + }, + { + "epoch": 0.52, + "learning_rate": 4.626969470705801e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 6302, + "text_loss": 0.64453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.62566760623818e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 6303, + "text_loss": 0.33203125 + }, + { + "epoch": 0.52, + "learning_rate": 4.624365767290609e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 6304, + "text_loss": 0.51171875 + }, + { + "epoch": 0.52, + "learning_rate": 4.623063953951839e-06, + "loss": 0.6282, + "regression_loss": 0.0, + "step": 6305, + "text_loss": 0.55078125 + }, + { + "epoch": 0.52, + "learning_rate": 4.621762166310623e-06, + "loss": 0.3962, + "regression_loss": 0.0, + "step": 6306, + "text_loss": 0.48828125 + }, + { + "epoch": 0.52, + "learning_rate": 4.620460404455706e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 6307, + "text_loss": 0.546875 + }, + { + "epoch": 0.52, + "learning_rate": 4.619158668475841e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 6308, + "text_loss": 0.439453125 + }, + { + "epoch": 0.52, + "learning_rate": 4.61785695845977e-06, + "loss": 0.6514, + "regression_loss": 0.0, + "step": 6309, + "text_loss": 0.625 + }, + { + "epoch": 0.52, + "learning_rate": 4.616555274496239e-06, + "loss": 0.6309, + "regression_loss": 0.0, + "step": 6310, + "text_loss": 0.5703125 + }, + { + "epoch": 0.52, + "learning_rate": 4.6152536166739875e-06, + "loss": 0.5857, + "regression_loss": 0.0, + "step": 6311, + "text_loss": 0.23828125 + }, + { + "epoch": 0.52, + "learning_rate": 4.613951985081759e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 6312, + "text_loss": 0.361328125 + }, + { + "epoch": 0.52, + "learning_rate": 4.6126503798082875e-06, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 6313, + "text_loss": 0.42578125 + }, + { + "epoch": 0.52, + "learning_rate": 4.611348800942316e-06, + "loss": 0.3958, + "regression_loss": 0.0, + "step": 6314, + "text_loss": 0.224609375 + }, + { + "epoch": 0.52, + "learning_rate": 4.6100472485725724e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 6315, + "text_loss": 0.54296875 + }, + { + "epoch": 0.52, + "learning_rate": 4.608745722787796e-06, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 6316, + "text_loss": 0.33203125 + }, + { + "epoch": 0.53, + "learning_rate": 4.607444223676714e-06, + "loss": 0.582, + "regression_loss": 0.0, + "step": 6317, + "text_loss": 0.53515625 + }, + { + "epoch": 0.53, + "learning_rate": 4.606142751328059e-06, + "loss": 0.4686, + "regression_loss": 0.0, + "step": 6318, + "text_loss": 0.33984375 + }, + { + "epoch": 0.53, + "learning_rate": 4.604841305830555e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 6319, + "text_loss": 0.357421875 + }, + { + "epoch": 0.53, + "learning_rate": 4.603539887272931e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 6320, + "text_loss": 0.51953125 + }, + { + "epoch": 0.53, + "learning_rate": 4.6022384957439085e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 6321, + "text_loss": 0.58984375 + }, + { + "epoch": 0.53, + "learning_rate": 4.600937131332213e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 6322, + "text_loss": 0.65234375 + }, + { + "epoch": 0.53, + "learning_rate": 4.59963579412656e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 6323, + "text_loss": 0.298828125 + }, + { + "epoch": 0.53, + "learning_rate": 4.598334484215674e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 6324, + "text_loss": 0.314453125 + }, + { + "epoch": 0.53, + "learning_rate": 4.597033201688267e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 6325, + "text_loss": 0.384765625 + }, + { + "epoch": 0.53, + "learning_rate": 4.595731946633055e-06, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 6326, + "text_loss": 0.51171875 + }, + { + "epoch": 0.53, + "learning_rate": 4.59443071913875e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 6327, + "text_loss": 0.435546875 + }, + { + "epoch": 0.53, + "learning_rate": 4.593129519294065e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 6328, + "text_loss": 0.66015625 + }, + { + "epoch": 0.53, + "learning_rate": 4.591828347187707e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 6329, + "text_loss": 0.5625 + }, + { + "epoch": 0.53, + "learning_rate": 4.590527202908385e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 6330, + "text_loss": 0.41796875 + }, + { + "epoch": 0.53, + "learning_rate": 4.589226086544803e-06, + "loss": 0.6118, + "regression_loss": 0.0, + "step": 6331, + "text_loss": 0.80078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.587924998185665e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 6332, + "text_loss": 0.8828125 + }, + { + "epoch": 0.53, + "learning_rate": 4.586623937919671e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 6333, + "text_loss": 0.341796875 + }, + { + "epoch": 0.53, + "learning_rate": 4.5853229058355255e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 6334, + "text_loss": 0.40234375 + }, + { + "epoch": 0.53, + "learning_rate": 4.584021902021918e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 6335, + "text_loss": 0.51171875 + }, + { + "epoch": 0.53, + "learning_rate": 4.582720926567552e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 6336, + "text_loss": 0.73046875 + }, + { + "epoch": 0.53, + "learning_rate": 4.581419979561117e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 6337, + "text_loss": 0.5234375 + }, + { + "epoch": 0.53, + "learning_rate": 4.580119061091306e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 6338, + "text_loss": 0.455078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.5788181712468085e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 6339, + "text_loss": 0.546875 + }, + { + "epoch": 0.53, + "learning_rate": 4.577517310116313e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 6340, + "text_loss": 0.474609375 + }, + { + "epoch": 0.53, + "learning_rate": 4.5762164777885045e-06, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 6341, + "text_loss": 0.703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.574915674352069e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 6342, + "text_loss": 0.6171875 + }, + { + "epoch": 0.53, + "learning_rate": 4.573614899895686e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 6343, + "text_loss": 0.4921875 + }, + { + "epoch": 0.53, + "learning_rate": 4.572314154508038e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 6344, + "text_loss": 0.5703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.571013438277801e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 6345, + "text_loss": 0.5078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.569712751293652e-06, + "loss": 0.4672, + "regression_loss": 0.0, + "step": 6346, + "text_loss": 0.5625 + }, + { + "epoch": 0.53, + "learning_rate": 4.568412093644267e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 6347, + "text_loss": 0.61328125 + }, + { + "epoch": 0.53, + "learning_rate": 4.567111465418314e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 6348, + "text_loss": 0.765625 + }, + { + "epoch": 0.53, + "learning_rate": 4.565810866704468e-06, + "loss": 0.3943, + "regression_loss": 0.0, + "step": 6349, + "text_loss": 0.51953125 + }, + { + "epoch": 0.53, + "learning_rate": 4.564510297591393e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 6350, + "text_loss": 0.71875 + }, + { + "epoch": 0.53, + "learning_rate": 4.563209758167758e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 6351, + "text_loss": 0.44921875 + }, + { + "epoch": 0.53, + "learning_rate": 4.561909248522223e-06, + "loss": 0.6702, + "regression_loss": 0.0, + "step": 6352, + "text_loss": 0.78125 + }, + { + "epoch": 0.53, + "learning_rate": 4.5606087687434565e-06, + "loss": 0.4711, + "regression_loss": 0.0, + "step": 6353, + "text_loss": 0.5859375 + }, + { + "epoch": 0.53, + "learning_rate": 4.559308318920111e-06, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 6354, + "text_loss": 0.373046875 + }, + { + "epoch": 0.53, + "learning_rate": 4.558007899140851e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 6355, + "text_loss": 0.5859375 + }, + { + "epoch": 0.53, + "learning_rate": 4.556707509494328e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 6356, + "text_loss": 0.416015625 + }, + { + "epoch": 0.53, + "learning_rate": 4.5554071500691996e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 6357, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.554106820954112e-06, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 6358, + "text_loss": 0.302734375 + }, + { + "epoch": 0.53, + "learning_rate": 4.552806522237722e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 6359, + "text_loss": 0.80078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.551506254008671e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 6360, + "text_loss": 0.44140625 + }, + { + "epoch": 0.53, + "learning_rate": 4.550206016355608e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 6361, + "text_loss": 0.52734375 + }, + { + "epoch": 0.53, + "learning_rate": 4.548905809367176e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 6362, + "text_loss": 0.6484375 + }, + { + "epoch": 0.53, + "learning_rate": 4.547605633132017e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 6363, + "text_loss": 0.5625 + }, + { + "epoch": 0.53, + "learning_rate": 4.546305487738768e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 6364, + "text_loss": 0.310546875 + }, + { + "epoch": 0.53, + "learning_rate": 4.545005373276068e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 6365, + "text_loss": 0.443359375 + }, + { + "epoch": 0.53, + "learning_rate": 4.54370528983255e-06, + "loss": 0.606, + "regression_loss": 0.0, + "step": 6366, + "text_loss": 0.828125 + }, + { + "epoch": 0.53, + "learning_rate": 4.542405237496851e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 6367, + "text_loss": 0.259765625 + }, + { + "epoch": 0.53, + "learning_rate": 4.5411052163575986e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 6368, + "text_loss": 0.49609375 + }, + { + "epoch": 0.53, + "learning_rate": 4.539805226503423e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 6369, + "text_loss": 0.4609375 + }, + { + "epoch": 0.53, + "learning_rate": 4.538505268022949e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 6370, + "text_loss": 0.455078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.537205341004804e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 6371, + "text_loss": 0.578125 + }, + { + "epoch": 0.53, + "learning_rate": 4.535905445537606e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 6372, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.534605581709981e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 6373, + "text_loss": 0.451171875 + }, + { + "epoch": 0.53, + "learning_rate": 4.533305749610541e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 6374, + "text_loss": 0.30078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.532005949327906e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 6375, + "text_loss": 0.76953125 + }, + { + "epoch": 0.53, + "learning_rate": 4.530706180950687e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 6376, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.529406444567499e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 6377, + "text_loss": 0.431640625 + }, + { + "epoch": 0.53, + "learning_rate": 4.528106740266947e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 6378, + "text_loss": 0.4921875 + }, + { + "epoch": 0.53, + "learning_rate": 4.526807068137642e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 6379, + "text_loss": 0.7734375 + }, + { + "epoch": 0.53, + "learning_rate": 4.5255074282681846e-06, + "loss": 0.6272, + "regression_loss": 0.0, + "step": 6380, + "text_loss": 0.36328125 + }, + { + "epoch": 0.53, + "learning_rate": 4.524207820747182e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 6381, + "text_loss": 0.44140625 + }, + { + "epoch": 0.53, + "learning_rate": 4.5229082456632315e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 6382, + "text_loss": 0.4453125 + }, + { + "epoch": 0.53, + "learning_rate": 4.521608703104935e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 6383, + "text_loss": 0.4375 + }, + { + "epoch": 0.53, + "learning_rate": 4.520309193160884e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 6384, + "text_loss": 0.4921875 + }, + { + "epoch": 0.53, + "learning_rate": 4.519009715919676e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 6385, + "text_loss": 0.5234375 + }, + { + "epoch": 0.53, + "learning_rate": 4.5177102714699e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 6386, + "text_loss": 0.61328125 + }, + { + "epoch": 0.53, + "learning_rate": 4.516410859900149e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 6387, + "text_loss": 0.453125 + }, + { + "epoch": 0.53, + "learning_rate": 4.515111481299004e-06, + "loss": 0.4825, + "regression_loss": 0.0, + "step": 6388, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.513812135755057e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 6389, + "text_loss": 0.431640625 + }, + { + "epoch": 0.53, + "learning_rate": 4.512512823356886e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 6390, + "text_loss": 0.50390625 + }, + { + "epoch": 0.53, + "learning_rate": 4.5112135441930745e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 6391, + "text_loss": 0.57421875 + }, + { + "epoch": 0.53, + "learning_rate": 4.509914298352197e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 6392, + "text_loss": 0.57421875 + }, + { + "epoch": 0.53, + "learning_rate": 4.508615085922832e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 6393, + "text_loss": 0.4296875 + }, + { + "epoch": 0.53, + "learning_rate": 4.507315906993554e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 6394, + "text_loss": 0.56640625 + }, + { + "epoch": 0.53, + "learning_rate": 4.506016761652932e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 6395, + "text_loss": 0.58203125 + }, + { + "epoch": 0.53, + "learning_rate": 4.504717649989536e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 6396, + "text_loss": 0.478515625 + }, + { + "epoch": 0.53, + "learning_rate": 4.503418572091931e-06, + "loss": 0.428, + "regression_loss": 0.0, + "step": 6397, + "text_loss": 0.36328125 + }, + { + "epoch": 0.53, + "learning_rate": 4.502119528048686e-06, + "loss": 0.5319, + "regression_loss": 0.0, + "step": 6398, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.500820517948357e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 6399, + "text_loss": 0.22265625 + }, + { + "epoch": 0.53, + "learning_rate": 4.499521541879508e-06, + "loss": 0.6216, + "regression_loss": 0.0, + "step": 6400, + "text_loss": 0.5703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.498222599930696e-06, + "loss": 0.5984, + "regression_loss": 0.0, + "step": 6401, + "text_loss": 0.69140625 + }, + { + "epoch": 0.53, + "learning_rate": 4.496923692190476e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 6402, + "text_loss": 0.8359375 + }, + { + "epoch": 0.53, + "learning_rate": 4.495624818747399e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 6403, + "text_loss": 0.455078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.494325979690018e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 6404, + "text_loss": 0.404296875 + }, + { + "epoch": 0.53, + "learning_rate": 4.493027175106878e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 6405, + "text_loss": 0.63671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.491728405086529e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 6406, + "text_loss": 0.29296875 + }, + { + "epoch": 0.53, + "learning_rate": 4.490429669717511e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 6407, + "text_loss": 0.76953125 + }, + { + "epoch": 0.53, + "learning_rate": 4.489130969088367e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 6408, + "text_loss": 0.80078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.487832303287633e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 6409, + "text_loss": 0.478515625 + }, + { + "epoch": 0.53, + "learning_rate": 4.48653367240385e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 6410, + "text_loss": 0.45703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.485235076525547e-06, + "loss": 0.438, + "regression_loss": 0.0, + "step": 6411, + "text_loss": 0.5390625 + }, + { + "epoch": 0.53, + "learning_rate": 4.4839365157412605e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 6412, + "text_loss": 0.380859375 + }, + { + "epoch": 0.53, + "learning_rate": 4.482637990139514e-06, + "loss": 0.6368, + "regression_loss": 0.0, + "step": 6413, + "text_loss": 0.9453125 + }, + { + "epoch": 0.53, + "learning_rate": 4.4813394998088385e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 6414, + "text_loss": 0.5703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.480041044837758e-06, + "loss": 0.442, + "regression_loss": 0.0, + "step": 6415, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.53, + "learning_rate": 4.478742625314793e-06, + "loss": 0.4948, + "regression_loss": 0.0, + "step": 6416, + "text_loss": 0.83984375 + }, + { + "epoch": 0.53, + "learning_rate": 4.477444241328463e-06, + "loss": 0.5753, + "regression_loss": 0.0, + "step": 6417, + "text_loss": 0.578125 + }, + { + "epoch": 0.53, + "learning_rate": 4.4761458929672865e-06, + "loss": 0.5245, + "regression_loss": 0.0, + "step": 6418, + "text_loss": 0.220703125 + }, + { + "epoch": 0.53, + "learning_rate": 4.4748475803197746e-06, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 6419, + "text_loss": 0.28125 + }, + { + "epoch": 0.53, + "learning_rate": 4.4735493034744454e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 6420, + "text_loss": 0.53125 + }, + { + "epoch": 0.53, + "learning_rate": 4.472251062519805e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 6421, + "text_loss": 0.416015625 + }, + { + "epoch": 0.53, + "learning_rate": 4.4709528575443614e-06, + "loss": 0.4458, + "regression_loss": 0.0, + "step": 6422, + "text_loss": 0.326171875 + }, + { + "epoch": 0.53, + "learning_rate": 4.4696546886366195e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 6423, + "text_loss": 0.4453125 + }, + { + "epoch": 0.53, + "learning_rate": 4.468356555885082e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 6424, + "text_loss": 0.353515625 + }, + { + "epoch": 0.53, + "learning_rate": 4.467058459378247e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 6425, + "text_loss": 0.5546875 + }, + { + "epoch": 0.53, + "learning_rate": 4.465760399204617e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 6426, + "text_loss": 0.71875 + }, + { + "epoch": 0.53, + "learning_rate": 4.46446237545268e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 6427, + "text_loss": 0.62890625 + }, + { + "epoch": 0.53, + "learning_rate": 4.463164388210934e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 6428, + "text_loss": 0.62890625 + }, + { + "epoch": 0.53, + "learning_rate": 4.4618664375678675e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 6429, + "text_loss": 0.55078125 + }, + { + "epoch": 0.53, + "learning_rate": 4.460568523611968e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 6430, + "text_loss": 0.310546875 + }, + { + "epoch": 0.53, + "learning_rate": 4.4592706464317186e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6431, + "text_loss": 0.380859375 + }, + { + "epoch": 0.53, + "learning_rate": 4.457972806115607e-06, + "loss": 0.4862, + "regression_loss": 0.0, + "step": 6432, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.53, + "learning_rate": 4.456675002752107e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 6433, + "text_loss": 0.8203125 + }, + { + "epoch": 0.53, + "learning_rate": 4.455377236429701e-06, + "loss": 0.535, + "regression_loss": 0.0, + "step": 6434, + "text_loss": 0.6015625 + }, + { + "epoch": 0.53, + "learning_rate": 4.454079507236861e-06, + "loss": 0.4357, + "regression_loss": 0.0, + "step": 6435, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.53, + "learning_rate": 4.4527818152620616e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 6436, + "text_loss": 0.5859375 + }, + { + "epoch": 0.53, + "learning_rate": 4.451484160593771e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 6437, + "text_loss": 0.349609375 + }, + { + "epoch": 0.54, + "learning_rate": 4.450186543320458e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 6438, + "text_loss": 0.33984375 + }, + { + "epoch": 0.54, + "learning_rate": 4.448888963530585e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 6439, + "text_loss": 0.275390625 + }, + { + "epoch": 0.54, + "learning_rate": 4.447591421312617e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 6440, + "text_loss": 0.65625 + }, + { + "epoch": 0.54, + "learning_rate": 4.446293916755014e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 6441, + "text_loss": 0.2265625 + }, + { + "epoch": 0.54, + "learning_rate": 4.444996449946231e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 6442, + "text_loss": 0.4296875 + }, + { + "epoch": 0.54, + "learning_rate": 4.443699020974724e-06, + "loss": 0.4341, + "regression_loss": 0.0, + "step": 6443, + "text_loss": 0.68359375 + }, + { + "epoch": 0.54, + "learning_rate": 4.442401629928943e-06, + "loss": 0.5341, + "regression_loss": 0.0, + "step": 6444, + "text_loss": 0.7734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.44110427689734e-06, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 6445, + "text_loss": 0.416015625 + }, + { + "epoch": 0.54, + "learning_rate": 4.439806961968361e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 6446, + "text_loss": 0.60546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.438509685230451e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 6447, + "text_loss": 0.8828125 + }, + { + "epoch": 0.54, + "learning_rate": 4.437212446772049e-06, + "loss": 0.4689, + "regression_loss": 0.0, + "step": 6448, + "text_loss": 0.349609375 + }, + { + "epoch": 0.54, + "learning_rate": 4.435915246681597e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 6449, + "text_loss": 0.71484375 + }, + { + "epoch": 0.54, + "learning_rate": 4.434618085047527e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 6450, + "text_loss": 0.55078125 + }, + { + "epoch": 0.54, + "learning_rate": 4.43332096195828e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 6451, + "text_loss": 0.326171875 + }, + { + "epoch": 0.54, + "learning_rate": 4.43202387750228e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 6452, + "text_loss": 0.341796875 + }, + { + "epoch": 0.54, + "learning_rate": 4.43072683176796e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 6453, + "text_loss": 0.70703125 + }, + { + "epoch": 0.54, + "learning_rate": 4.429429824843744e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 6454, + "text_loss": 0.35546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.4281328568180566e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 6455, + "text_loss": 0.4453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.426835927779316e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 6456, + "text_loss": 0.78125 + }, + { + "epoch": 0.54, + "learning_rate": 4.425539037815942e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 6457, + "text_loss": 0.494140625 + }, + { + "epoch": 0.54, + "learning_rate": 4.424242187016348e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 6458, + "text_loss": 0.498046875 + }, + { + "epoch": 0.54, + "learning_rate": 4.422945375468951e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 6459, + "text_loss": 0.71875 + }, + { + "epoch": 0.54, + "learning_rate": 4.421648603262155e-06, + "loss": 0.4838, + "regression_loss": 0.0, + "step": 6460, + "text_loss": 0.357421875 + }, + { + "epoch": 0.54, + "learning_rate": 4.4203518704843726e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 6461, + "text_loss": 0.470703125 + }, + { + "epoch": 0.54, + "learning_rate": 4.419055177224004e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 6462, + "text_loss": 0.828125 + }, + { + "epoch": 0.54, + "learning_rate": 4.417758523569454e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 6463, + "text_loss": 0.435546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.416461909609119e-06, + "loss": 0.5874, + "regression_loss": 0.0, + "step": 6464, + "text_loss": 0.65234375 + }, + { + "epoch": 0.54, + "learning_rate": 4.4151653354314e-06, + "loss": 0.593, + "regression_loss": 0.0, + "step": 6465, + "text_loss": 0.734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.413868801124685e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 6466, + "text_loss": 0.72265625 + }, + { + "epoch": 0.54, + "learning_rate": 4.4125723067773704e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 6467, + "text_loss": 0.474609375 + }, + { + "epoch": 0.54, + "learning_rate": 4.41127585247784e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 6468, + "text_loss": 0.3515625 + }, + { + "epoch": 0.54, + "learning_rate": 4.4099794383144824e-06, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 6469, + "text_loss": 0.259765625 + }, + { + "epoch": 0.54, + "learning_rate": 4.408683064375677e-06, + "loss": 0.3916, + "regression_loss": 0.0, + "step": 6470, + "text_loss": 0.578125 + }, + { + "epoch": 0.54, + "learning_rate": 4.40738673074981e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 6471, + "text_loss": 0.65234375 + }, + { + "epoch": 0.54, + "learning_rate": 4.406090437525252e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 6472, + "text_loss": 0.494140625 + }, + { + "epoch": 0.54, + "learning_rate": 4.4047941847903815e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 6473, + "text_loss": 0.34765625 + }, + { + "epoch": 0.54, + "learning_rate": 4.403497972633569e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 6474, + "text_loss": 0.462890625 + }, + { + "epoch": 0.54, + "learning_rate": 4.402201801143186e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 6475, + "text_loss": 0.6484375 + }, + { + "epoch": 0.54, + "learning_rate": 4.400905670407594e-06, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 6476, + "text_loss": 0.53125 + }, + { + "epoch": 0.54, + "learning_rate": 4.399609580515161e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 6477, + "text_loss": 0.5390625 + }, + { + "epoch": 0.54, + "learning_rate": 4.398313531554243e-06, + "loss": 0.4747, + "regression_loss": 0.0, + "step": 6478, + "text_loss": 0.388671875 + }, + { + "epoch": 0.54, + "learning_rate": 4.397017523613203e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 6479, + "text_loss": 0.443359375 + }, + { + "epoch": 0.54, + "learning_rate": 4.3957215567803934e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 6480, + "text_loss": 0.6953125 + }, + { + "epoch": 0.54, + "learning_rate": 4.394425631144168e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 6481, + "text_loss": 0.56640625 + }, + { + "epoch": 0.54, + "learning_rate": 4.393129746792874e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 6482, + "text_loss": 0.671875 + }, + { + "epoch": 0.54, + "learning_rate": 4.391833903814861e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 6483, + "text_loss": 0.404296875 + }, + { + "epoch": 0.54, + "learning_rate": 4.390538102298469e-06, + "loss": 0.5099, + "regression_loss": 0.0, + "step": 6484, + "text_loss": 0.79296875 + }, + { + "epoch": 0.54, + "learning_rate": 4.3892423423320444e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 6485, + "text_loss": 0.232421875 + }, + { + "epoch": 0.54, + "learning_rate": 4.38794662400392e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 6486, + "text_loss": 0.5859375 + }, + { + "epoch": 0.54, + "learning_rate": 4.386650947402434e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 6487, + "text_loss": 0.76953125 + }, + { + "epoch": 0.54, + "learning_rate": 4.385355312615919e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 6488, + "text_loss": 0.3203125 + }, + { + "epoch": 0.54, + "learning_rate": 4.384059719732703e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 6489, + "text_loss": 0.53125 + }, + { + "epoch": 0.54, + "learning_rate": 4.3827641688411175e-06, + "loss": 0.493, + "regression_loss": 0.0, + "step": 6490, + "text_loss": 0.64453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.381468660029479e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 6491, + "text_loss": 0.314453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.380173193386115e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 6492, + "text_loss": 0.39453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.378877768999341e-06, + "loss": 0.4324, + "regression_loss": 0.0, + "step": 6493, + "text_loss": 0.5234375 + }, + { + "epoch": 0.54, + "learning_rate": 4.377582386957473e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 6494, + "text_loss": 0.8359375 + }, + { + "epoch": 0.54, + "learning_rate": 4.376287047348823e-06, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 6495, + "text_loss": 0.470703125 + }, + { + "epoch": 0.54, + "learning_rate": 4.3749917502617e-06, + "loss": 0.5913, + "regression_loss": 0.0, + "step": 6496, + "text_loss": 0.67578125 + }, + { + "epoch": 0.54, + "learning_rate": 4.373696495784411e-06, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 6497, + "text_loss": 0.81640625 + }, + { + "epoch": 0.54, + "learning_rate": 4.372401284005261e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 6498, + "text_loss": 0.439453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.37110611501255e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 6499, + "text_loss": 0.55859375 + }, + { + "epoch": 0.54, + "learning_rate": 4.369810988894576e-06, + "loss": 0.4504, + "regression_loss": 0.0, + "step": 6500, + "text_loss": 0.458984375 + }, + { + "epoch": 0.54, + "learning_rate": 4.368515905739634e-06, + "loss": 0.5784, + "regression_loss": 0.0, + "step": 6501, + "text_loss": 0.62109375 + }, + { + "epoch": 0.54, + "learning_rate": 4.367220865636017e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 6502, + "text_loss": 0.58203125 + }, + { + "epoch": 0.54, + "learning_rate": 4.36592586867201e-06, + "loss": 0.5111, + "regression_loss": 0.0, + "step": 6503, + "text_loss": 0.5703125 + }, + { + "epoch": 0.54, + "learning_rate": 4.364630914935905e-06, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 6504, + "text_loss": 0.69140625 + }, + { + "epoch": 0.54, + "learning_rate": 4.363336004515982e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 6505, + "text_loss": 0.78125 + }, + { + "epoch": 0.54, + "learning_rate": 4.362041137500522e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 6506, + "text_loss": 0.4140625 + }, + { + "epoch": 0.54, + "learning_rate": 4.360746313977802e-06, + "loss": 0.6082, + "regression_loss": 0.0, + "step": 6507, + "text_loss": 0.478515625 + }, + { + "epoch": 0.54, + "learning_rate": 4.359451534036097e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 6508, + "text_loss": 0.431640625 + }, + { + "epoch": 0.54, + "learning_rate": 4.358156797763677e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 6509, + "text_loss": 0.4375 + }, + { + "epoch": 0.54, + "learning_rate": 4.356862105248815e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 6510, + "text_loss": 0.416015625 + }, + { + "epoch": 0.54, + "learning_rate": 4.355567456579769e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 6511, + "text_loss": 0.400390625 + }, + { + "epoch": 0.54, + "learning_rate": 4.354272851844807e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 6512, + "text_loss": 0.375 + }, + { + "epoch": 0.54, + "learning_rate": 4.352978291132186e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 6513, + "text_loss": 0.275390625 + }, + { + "epoch": 0.54, + "learning_rate": 4.351683774530165e-06, + "loss": 0.4659, + "regression_loss": 0.0, + "step": 6514, + "text_loss": 0.51953125 + }, + { + "epoch": 0.54, + "learning_rate": 4.350389302126994e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 6515, + "text_loss": 0.4921875 + }, + { + "epoch": 0.54, + "learning_rate": 4.349094874010927e-06, + "loss": 0.5309, + "regression_loss": 0.0, + "step": 6516, + "text_loss": 0.353515625 + }, + { + "epoch": 0.54, + "learning_rate": 4.347800490270207e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 6517, + "text_loss": 0.49609375 + }, + { + "epoch": 0.54, + "learning_rate": 4.346506150993084e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 6518, + "text_loss": 0.52734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.345211856267795e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 6519, + "text_loss": 0.236328125 + }, + { + "epoch": 0.54, + "learning_rate": 4.34391760618258e-06, + "loss": 0.4231, + "regression_loss": 0.0, + "step": 6520, + "text_loss": 0.515625 + }, + { + "epoch": 0.54, + "learning_rate": 4.342623400825674e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 6521, + "text_loss": 0.50390625 + }, + { + "epoch": 0.54, + "learning_rate": 4.34132924028531e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 6522, + "text_loss": 0.578125 + }, + { + "epoch": 0.54, + "learning_rate": 4.340035124649714e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 6523, + "text_loss": 0.42578125 + }, + { + "epoch": 0.54, + "learning_rate": 4.338741054007118e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 6524, + "text_loss": 0.76953125 + }, + { + "epoch": 0.54, + "learning_rate": 4.3374470284457385e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 6525, + "text_loss": 0.4140625 + }, + { + "epoch": 0.54, + "learning_rate": 4.3361530480538e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 6526, + "text_loss": 0.515625 + }, + { + "epoch": 0.54, + "learning_rate": 4.334859112919518e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 6527, + "text_loss": 0.69921875 + }, + { + "epoch": 0.54, + "learning_rate": 4.333565223131107e-06, + "loss": 0.4539, + "regression_loss": 0.0, + "step": 6528, + "text_loss": 0.439453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.332271378776776e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 6529, + "text_loss": 0.69921875 + }, + { + "epoch": 0.54, + "learning_rate": 4.3309775799447354e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 6530, + "text_loss": 0.255859375 + }, + { + "epoch": 0.54, + "learning_rate": 4.329683826723185e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 6531, + "text_loss": 0.59765625 + }, + { + "epoch": 0.54, + "learning_rate": 4.328390119200331e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 6532, + "text_loss": 0.7734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.3270964574643695e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 6533, + "text_loss": 0.71875 + }, + { + "epoch": 0.54, + "learning_rate": 4.325802841603496e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 6534, + "text_loss": 0.5234375 + }, + { + "epoch": 0.54, + "learning_rate": 4.324509271705904e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 6535, + "text_loss": 0.5546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.323215747859778e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 6536, + "text_loss": 0.765625 + }, + { + "epoch": 0.54, + "learning_rate": 4.32192227015331e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 6537, + "text_loss": 0.44921875 + }, + { + "epoch": 0.54, + "learning_rate": 4.320628838674678e-06, + "loss": 0.4535, + "regression_loss": 0.0, + "step": 6538, + "text_loss": 0.4921875 + }, + { + "epoch": 0.54, + "learning_rate": 4.319335453512064e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 6539, + "text_loss": 0.59765625 + }, + { + "epoch": 0.54, + "learning_rate": 4.3180421147536426e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 6540, + "text_loss": 0.54296875 + }, + { + "epoch": 0.54, + "learning_rate": 4.316748822487588e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 6541, + "text_loss": 0.74609375 + }, + { + "epoch": 0.54, + "learning_rate": 4.315455576802068e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 6542, + "text_loss": 0.77734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.314162377785252e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 6543, + "text_loss": 0.302734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.312869225525304e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 6544, + "text_loss": 0.453125 + }, + { + "epoch": 0.54, + "learning_rate": 4.311576120110382e-06, + "loss": 0.4657, + "regression_loss": 0.0, + "step": 6545, + "text_loss": 0.33203125 + }, + { + "epoch": 0.54, + "learning_rate": 4.310283061628645e-06, + "loss": 0.3512, + "regression_loss": 0.0, + "step": 6546, + "text_loss": 0.310546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.308990050168248e-06, + "loss": 0.4229, + "regression_loss": 0.0, + "step": 6547, + "text_loss": 0.2890625 + }, + { + "epoch": 0.54, + "learning_rate": 4.3076970858173374e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 6548, + "text_loss": 0.7890625 + }, + { + "epoch": 0.54, + "learning_rate": 4.306404168664067e-06, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 6549, + "text_loss": 0.51953125 + }, + { + "epoch": 0.54, + "learning_rate": 4.305111298796575e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 6550, + "text_loss": 0.73046875 + }, + { + "epoch": 0.54, + "learning_rate": 4.303818476303008e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 6551, + "text_loss": 0.546875 + }, + { + "epoch": 0.54, + "learning_rate": 4.3025257012715e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 6552, + "text_loss": 0.52734375 + }, + { + "epoch": 0.54, + "learning_rate": 4.301232973790189e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 6553, + "text_loss": 0.96484375 + }, + { + "epoch": 0.54, + "learning_rate": 4.299940293947204e-06, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 6554, + "text_loss": 0.46875 + }, + { + "epoch": 0.54, + "learning_rate": 4.298647661830674e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 6555, + "text_loss": 0.443359375 + }, + { + "epoch": 0.54, + "learning_rate": 4.297355077528723e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 6556, + "text_loss": 0.458984375 + }, + { + "epoch": 0.54, + "learning_rate": 4.2960625411294756e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 6557, + "text_loss": 0.275390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.294770052721047e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 6558, + "text_loss": 0.294921875 + }, + { + "epoch": 0.55, + "learning_rate": 4.2934776123915555e-06, + "loss": 0.5367, + "regression_loss": 0.0, + "step": 6559, + "text_loss": 0.40234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.29218522022911e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 6560, + "text_loss": 0.35546875 + }, + { + "epoch": 0.55, + "learning_rate": 4.290892876321821e-06, + "loss": 0.5896, + "regression_loss": 0.0, + "step": 6561, + "text_loss": 0.921875 + }, + { + "epoch": 0.55, + "learning_rate": 4.289600580757791e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 6562, + "text_loss": 0.427734375 + }, + { + "epoch": 0.55, + "learning_rate": 4.288308333625129e-06, + "loss": 0.6411, + "regression_loss": 0.0, + "step": 6563, + "text_loss": 0.4765625 + }, + { + "epoch": 0.55, + "learning_rate": 4.287016135011924e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 6564, + "text_loss": 0.5390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.2857239850062795e-06, + "loss": 0.506, + "regression_loss": 0.0, + "step": 6565, + "text_loss": 0.703125 + }, + { + "epoch": 0.55, + "learning_rate": 4.284431883696283e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 6566, + "text_loss": 0.43359375 + }, + { + "epoch": 0.55, + "learning_rate": 4.2831398311700265e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 6567, + "text_loss": 0.5390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.2818478275155925e-06, + "loss": 0.3997, + "regression_loss": 0.0, + "step": 6568, + "text_loss": 0.55078125 + }, + { + "epoch": 0.55, + "learning_rate": 4.280555872821066e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 6569, + "text_loss": 0.4140625 + }, + { + "epoch": 0.55, + "learning_rate": 4.279263967174521e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 6570, + "text_loss": 0.578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.277972110664039e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 6571, + "text_loss": 0.466796875 + }, + { + "epoch": 0.55, + "learning_rate": 4.276680303377688e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 6572, + "text_loss": 0.484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.27538854540354e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 6573, + "text_loss": 0.23046875 + }, + { + "epoch": 0.55, + "learning_rate": 4.274096836829657e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 6574, + "text_loss": 0.53515625 + }, + { + "epoch": 0.55, + "learning_rate": 4.272805177744103e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 6575, + "text_loss": 0.337890625 + }, + { + "epoch": 0.55, + "learning_rate": 4.2715135682349345e-06, + "loss": 0.6504, + "regression_loss": 0.0, + "step": 6576, + "text_loss": 0.28515625 + }, + { + "epoch": 0.55, + "learning_rate": 4.27022200839021e-06, + "loss": 0.4053, + "regression_loss": 0.0, + "step": 6577, + "text_loss": 0.43359375 + }, + { + "epoch": 0.55, + "learning_rate": 4.26893049829798e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 6578, + "text_loss": 0.5 + }, + { + "epoch": 0.55, + "learning_rate": 4.267639038046292e-06, + "loss": 0.4436, + "regression_loss": 0.0, + "step": 6579, + "text_loss": 0.326171875 + }, + { + "epoch": 0.55, + "learning_rate": 4.266347627723192e-06, + "loss": 0.4398, + "regression_loss": 0.0, + "step": 6580, + "text_loss": 0.59765625 + }, + { + "epoch": 0.55, + "learning_rate": 4.265056267416721e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 6581, + "text_loss": 0.490234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.263764957214919e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 6582, + "text_loss": 0.53125 + }, + { + "epoch": 0.55, + "learning_rate": 4.262473697205818e-06, + "loss": 0.4979, + "regression_loss": 0.0, + "step": 6583, + "text_loss": 0.515625 + }, + { + "epoch": 0.55, + "learning_rate": 4.2611824874774535e-06, + "loss": 0.524, + "regression_loss": 0.0, + "step": 6584, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.55, + "learning_rate": 4.259891328117849e-06, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 6585, + "text_loss": 0.40234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.258600219215033e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 6586, + "text_loss": 0.462890625 + }, + { + "epoch": 0.55, + "learning_rate": 4.257309160857022e-06, + "loss": 0.5111, + "regression_loss": 0.0, + "step": 6587, + "text_loss": 0.380859375 + }, + { + "epoch": 0.55, + "learning_rate": 4.25601815313184e-06, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 6588, + "text_loss": 0.6796875 + }, + { + "epoch": 0.55, + "learning_rate": 4.254727196127495e-06, + "loss": 0.448, + "regression_loss": 0.0, + "step": 6589, + "text_loss": 0.470703125 + }, + { + "epoch": 0.55, + "learning_rate": 4.253436289932003e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 6590, + "text_loss": 0.59375 + }, + { + "epoch": 0.55, + "learning_rate": 4.252145434633366e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 6591, + "text_loss": 0.52734375 + }, + { + "epoch": 0.55, + "learning_rate": 4.250854630319593e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 6592, + "text_loss": 0.5625 + }, + { + "epoch": 0.55, + "learning_rate": 4.249563877078681e-06, + "loss": 0.656, + "regression_loss": 0.0, + "step": 6593, + "text_loss": 0.578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.24827317499863e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 6594, + "text_loss": 0.337890625 + }, + { + "epoch": 0.55, + "learning_rate": 4.246982524167427e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 6595, + "text_loss": 0.62109375 + }, + { + "epoch": 0.55, + "learning_rate": 4.24569192467307e-06, + "loss": 0.4458, + "regression_loss": 0.0, + "step": 6596, + "text_loss": 0.36328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.24440137660354e-06, + "loss": 0.5568, + "regression_loss": 0.0, + "step": 6597, + "text_loss": 0.64453125 + }, + { + "epoch": 0.55, + "learning_rate": 4.243110880046823e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6598, + "text_loss": 0.7421875 + }, + { + "epoch": 0.55, + "learning_rate": 4.241820435090895e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 6599, + "text_loss": 0.68359375 + }, + { + "epoch": 0.55, + "learning_rate": 4.2405300418237365e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 6600, + "text_loss": 0.55078125 + }, + { + "epoch": 0.55, + "learning_rate": 4.239239700333314e-06, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 6601, + "text_loss": 0.6171875 + }, + { + "epoch": 0.55, + "learning_rate": 4.237949410707603e-06, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 6602, + "text_loss": 0.53125 + }, + { + "epoch": 0.55, + "learning_rate": 4.236659173034562e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 6603, + "text_loss": 0.80859375 + }, + { + "epoch": 0.55, + "learning_rate": 4.235368987402159e-06, + "loss": 0.4987, + "regression_loss": 0.0, + "step": 6604, + "text_loss": 0.671875 + }, + { + "epoch": 0.55, + "learning_rate": 4.234078853898347e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 6605, + "text_loss": 0.71484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.232788772611084e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 6606, + "text_loss": 0.66015625 + }, + { + "epoch": 0.55, + "learning_rate": 4.231498743628318e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 6607, + "text_loss": 0.53125 + }, + { + "epoch": 0.55, + "learning_rate": 4.230208767038002e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 6608, + "text_loss": 0.36328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.2289188429280735e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 6609, + "text_loss": 0.6328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.227628971386477e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 6610, + "text_loss": 0.58203125 + }, + { + "epoch": 0.55, + "learning_rate": 4.226339152501147e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 6611, + "text_loss": 0.51953125 + }, + { + "epoch": 0.55, + "learning_rate": 4.225049386360019e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 6612, + "text_loss": 0.244140625 + }, + { + "epoch": 0.55, + "learning_rate": 4.223759673051021e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 6613, + "text_loss": 0.52734375 + }, + { + "epoch": 0.55, + "learning_rate": 4.22247001266208e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 6614, + "text_loss": 0.1611328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.221180405281115e-06, + "loss": 0.5277, + "regression_loss": 0.0, + "step": 6615, + "text_loss": 0.7578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.21989085099605e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 6616, + "text_loss": 0.453125 + }, + { + "epoch": 0.55, + "learning_rate": 4.2186013498947966e-06, + "loss": 0.4045, + "regression_loss": 0.0, + "step": 6617, + "text_loss": 0.609375 + }, + { + "epoch": 0.55, + "learning_rate": 4.2173119020652686e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 6618, + "text_loss": 0.6328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.2160225075953705e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 6619, + "text_loss": 0.52734375 + }, + { + "epoch": 0.55, + "learning_rate": 4.214733166573011e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 6620, + "text_loss": 0.71484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.213443879086086e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 6621, + "text_loss": 0.6015625 + }, + { + "epoch": 0.55, + "learning_rate": 4.212154645222498e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 6622, + "text_loss": 0.55859375 + }, + { + "epoch": 0.55, + "learning_rate": 4.2108654650701345e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 6623, + "text_loss": 0.78125 + }, + { + "epoch": 0.55, + "learning_rate": 4.2095763387168895e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 6624, + "text_loss": 0.40625 + }, + { + "epoch": 0.55, + "learning_rate": 4.208287266250646e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 6625, + "text_loss": 0.40234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.206998247759289e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 6626, + "text_loss": 0.494140625 + }, + { + "epoch": 0.55, + "learning_rate": 4.205709283330694e-06, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 6627, + "text_loss": 0.38671875 + }, + { + "epoch": 0.55, + "learning_rate": 4.204420373052736e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 6628, + "text_loss": 0.4453125 + }, + { + "epoch": 0.55, + "learning_rate": 4.203131517013293e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 6629, + "text_loss": 0.54296875 + }, + { + "epoch": 0.55, + "learning_rate": 4.201842715300224e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 6630, + "text_loss": 0.71484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.2005539680013976e-06, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 6631, + "text_loss": 0.50390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.199265275204672e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 6632, + "text_loss": 0.65234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.197976636997905e-06, + "loss": 0.4948, + "regression_loss": 0.0, + "step": 6633, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.196688053468946e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 6634, + "text_loss": 0.423828125 + }, + { + "epoch": 0.55, + "learning_rate": 4.19539952470565e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 6635, + "text_loss": 0.65625 + }, + { + "epoch": 0.55, + "learning_rate": 4.194111050795856e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 6636, + "text_loss": 0.6484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.192822631827411e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 6637, + "text_loss": 0.703125 + }, + { + "epoch": 0.55, + "learning_rate": 4.191534267888148e-06, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 6638, + "text_loss": 0.466796875 + }, + { + "epoch": 0.55, + "learning_rate": 4.190245959065905e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 6639, + "text_loss": 0.447265625 + }, + { + "epoch": 0.55, + "learning_rate": 4.188957705448507e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 6640, + "text_loss": 0.640625 + }, + { + "epoch": 0.55, + "learning_rate": 4.187669507123787e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 6641, + "text_loss": 0.48046875 + }, + { + "epoch": 0.55, + "learning_rate": 4.186381364179562e-06, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 6642, + "text_loss": 0.48046875 + }, + { + "epoch": 0.55, + "learning_rate": 4.1850932767036545e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 6643, + "text_loss": 0.35546875 + }, + { + "epoch": 0.55, + "learning_rate": 4.183805244783879e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 6644, + "text_loss": 0.296875 + }, + { + "epoch": 0.55, + "learning_rate": 4.182517268508046e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 6645, + "text_loss": 0.46875 + }, + { + "epoch": 0.55, + "learning_rate": 4.181229347963961e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 6646, + "text_loss": 0.298828125 + }, + { + "epoch": 0.55, + "learning_rate": 4.179941483239434e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 6647, + "text_loss": 0.39453125 + }, + { + "epoch": 0.55, + "learning_rate": 4.178653674422259e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 6648, + "text_loss": 0.6796875 + }, + { + "epoch": 0.55, + "learning_rate": 4.177365921600236e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 6649, + "text_loss": 0.6171875 + }, + { + "epoch": 0.55, + "learning_rate": 4.176078224861153e-06, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 6650, + "text_loss": 0.81640625 + }, + { + "epoch": 0.55, + "learning_rate": 4.174790584292804e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 6651, + "text_loss": 0.7578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.173502999982969e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 6652, + "text_loss": 0.40234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.172215472019433e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 6653, + "text_loss": 0.341796875 + }, + { + "epoch": 0.55, + "learning_rate": 4.170928000489968e-06, + "loss": 0.4055, + "regression_loss": 0.0, + "step": 6654, + "text_loss": 0.45703125 + }, + { + "epoch": 0.55, + "learning_rate": 4.169640585482351e-06, + "loss": 0.4767, + "regression_loss": 0.0, + "step": 6655, + "text_loss": 0.490234375 + }, + { + "epoch": 0.55, + "learning_rate": 4.1683532270843505e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 6656, + "text_loss": 0.5546875 + }, + { + "epoch": 0.55, + "learning_rate": 4.167065925383732e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 6657, + "text_loss": 0.3359375 + }, + { + "epoch": 0.55, + "learning_rate": 4.165778680468256e-06, + "loss": 0.4473, + "regression_loss": 0.0, + "step": 6658, + "text_loss": 0.515625 + }, + { + "epoch": 0.55, + "learning_rate": 4.164491492425681e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 6659, + "text_loss": 0.484375 + }, + { + "epoch": 0.55, + "learning_rate": 4.163204361343759e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 6660, + "text_loss": 0.431640625 + }, + { + "epoch": 0.55, + "learning_rate": 4.161917287310246e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 6661, + "text_loss": 0.5546875 + }, + { + "epoch": 0.55, + "learning_rate": 4.16063027041288e-06, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 6662, + "text_loss": 0.61328125 + }, + { + "epoch": 0.55, + "learning_rate": 4.159343310739409e-06, + "loss": 0.6721, + "regression_loss": 0.0, + "step": 6663, + "text_loss": 0.625 + }, + { + "epoch": 0.55, + "learning_rate": 4.158056408377568e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 6664, + "text_loss": 0.390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.156769563415093e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 6665, + "text_loss": 0.50390625 + }, + { + "epoch": 0.55, + "learning_rate": 4.155482775939715e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 6666, + "text_loss": 0.3828125 + }, + { + "epoch": 0.55, + "learning_rate": 4.1541960460391586e-06, + "loss": 0.4084, + "regression_loss": 0.0, + "step": 6667, + "text_loss": 0.35546875 + }, + { + "epoch": 0.55, + "learning_rate": 4.152909373801145e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 6668, + "text_loss": 0.427734375 + }, + { + "epoch": 0.55, + "learning_rate": 4.151622759313398e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 6669, + "text_loss": 0.58984375 + }, + { + "epoch": 0.55, + "learning_rate": 4.150336202663629e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 6670, + "text_loss": 0.515625 + }, + { + "epoch": 0.55, + "learning_rate": 4.14904970393955e-06, + "loss": 0.4272, + "regression_loss": 0.0, + "step": 6671, + "text_loss": 0.255859375 + }, + { + "epoch": 0.55, + "learning_rate": 4.147763263228866e-06, + "loss": 0.4141, + "regression_loss": 0.0, + "step": 6672, + "text_loss": 0.57421875 + }, + { + "epoch": 0.55, + "learning_rate": 4.146476880619283e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 6673, + "text_loss": 0.306640625 + }, + { + "epoch": 0.55, + "learning_rate": 4.145190556198494e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 6674, + "text_loss": 0.72265625 + }, + { + "epoch": 0.55, + "learning_rate": 4.1439042900542e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 6675, + "text_loss": 0.498046875 + }, + { + "epoch": 0.55, + "learning_rate": 4.142618082274091e-06, + "loss": 0.4735, + "regression_loss": 0.0, + "step": 6676, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.55, + "learning_rate": 4.141331932945852e-06, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 6677, + "text_loss": 0.478515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.140045842157168e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 6678, + "text_loss": 0.51171875 + }, + { + "epoch": 0.56, + "learning_rate": 4.138759809995714e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 6679, + "text_loss": 0.609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.137473836549171e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 6680, + "text_loss": 0.6484375 + }, + { + "epoch": 0.56, + "learning_rate": 4.136187921905205e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 6681, + "text_loss": 0.66015625 + }, + { + "epoch": 0.56, + "learning_rate": 4.134902066151486e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 6682, + "text_loss": 0.546875 + }, + { + "epoch": 0.56, + "learning_rate": 4.133616269375676e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 6683, + "text_loss": 0.349609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.132330531665435e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 6684, + "text_loss": 0.23046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.131044853108415e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 6685, + "text_loss": 0.240234375 + }, + { + "epoch": 0.56, + "learning_rate": 4.129759233792271e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 6686, + "text_loss": 0.443359375 + }, + { + "epoch": 0.56, + "learning_rate": 4.128473673804645e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 6687, + "text_loss": 0.66015625 + }, + { + "epoch": 0.56, + "learning_rate": 4.127188173233185e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 6688, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.125902732165526e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 6689, + "text_loss": 0.435546875 + }, + { + "epoch": 0.56, + "learning_rate": 4.124617350689305e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 6690, + "text_loss": 0.59765625 + }, + { + "epoch": 0.56, + "learning_rate": 4.1233320288921515e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 6691, + "text_loss": 0.392578125 + }, + { + "epoch": 0.56, + "learning_rate": 4.1220467668616935e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 6692, + "text_loss": 0.62890625 + }, + { + "epoch": 0.56, + "learning_rate": 4.12076156468555e-06, + "loss": 0.5891, + "regression_loss": 0.0, + "step": 6693, + "text_loss": 0.76953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.1194764224513435e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 6694, + "text_loss": 0.23046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.118191340246687e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 6695, + "text_loss": 0.734375 + }, + { + "epoch": 0.56, + "learning_rate": 4.11690631815919e-06, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 6696, + "text_loss": 0.486328125 + }, + { + "epoch": 0.56, + "learning_rate": 4.115621356276459e-06, + "loss": 0.3585, + "regression_loss": 0.0, + "step": 6697, + "text_loss": 0.3203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.114336454686097e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 6698, + "text_loss": 0.67578125 + }, + { + "epoch": 0.56, + "learning_rate": 4.113051613475699e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 6699, + "text_loss": 0.68359375 + }, + { + "epoch": 0.56, + "learning_rate": 4.111766832732865e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 6700, + "text_loss": 0.49609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.110482112545177e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 6701, + "text_loss": 0.462890625 + }, + { + "epoch": 0.56, + "learning_rate": 4.109197453000226e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 6702, + "text_loss": 0.26171875 + }, + { + "epoch": 0.56, + "learning_rate": 4.107912854185591e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 6703, + "text_loss": 0.455078125 + }, + { + "epoch": 0.56, + "learning_rate": 4.1066283161888515e-06, + "loss": 0.4216, + "regression_loss": 0.0, + "step": 6704, + "text_loss": 0.515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.105343839097578e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 6705, + "text_loss": 0.52734375 + }, + { + "epoch": 0.56, + "learning_rate": 4.104059422999342e-06, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 6706, + "text_loss": 0.625 + }, + { + "epoch": 0.56, + "learning_rate": 4.102775067981705e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 6707, + "text_loss": 0.6015625 + }, + { + "epoch": 0.56, + "learning_rate": 4.101490774132232e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 6708, + "text_loss": 0.53515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.100206541538476e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 6709, + "text_loss": 0.625 + }, + { + "epoch": 0.56, + "learning_rate": 4.0989223702879915e-06, + "loss": 0.3962, + "regression_loss": 0.0, + "step": 6710, + "text_loss": 0.3203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.097638260468325e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 6711, + "text_loss": 0.59375 + }, + { + "epoch": 0.56, + "learning_rate": 4.096354212167023e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 6712, + "text_loss": 0.5234375 + }, + { + "epoch": 0.56, + "learning_rate": 4.095070225471621e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 6713, + "text_loss": 0.609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.09378630046966e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 6714, + "text_loss": 0.6953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.092502437248666e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 6715, + "text_loss": 0.53125 + }, + { + "epoch": 0.56, + "learning_rate": 4.09121863589617e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 6716, + "text_loss": 0.75 + }, + { + "epoch": 0.56, + "learning_rate": 4.0899348964996935e-06, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 6717, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.088651219146757e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 6718, + "text_loss": 0.58984375 + }, + { + "epoch": 0.56, + "learning_rate": 4.08736760392487e-06, + "loss": 0.4493, + "regression_loss": 0.0, + "step": 6719, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.08608405092155e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 6720, + "text_loss": 0.3984375 + }, + { + "epoch": 0.56, + "learning_rate": 4.084800560224296e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 6721, + "text_loss": 0.39453125 + }, + { + "epoch": 0.56, + "learning_rate": 4.083517131920613e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 6722, + "text_loss": 0.333984375 + }, + { + "epoch": 0.56, + "learning_rate": 4.082233766098e-06, + "loss": 0.4254, + "regression_loss": 0.0, + "step": 6723, + "text_loss": 0.46875 + }, + { + "epoch": 0.56, + "learning_rate": 4.080950462843948e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 6724, + "text_loss": 0.265625 + }, + { + "epoch": 0.56, + "learning_rate": 4.079667222245948e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 6725, + "text_loss": 0.71875 + }, + { + "epoch": 0.56, + "learning_rate": 4.078384044391482e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 6726, + "text_loss": 0.28125 + }, + { + "epoch": 0.56, + "learning_rate": 4.077100929368032e-06, + "loss": 0.5521, + "regression_loss": 0.0, + "step": 6727, + "text_loss": 0.54296875 + }, + { + "epoch": 0.56, + "learning_rate": 4.075817877263074e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 6728, + "text_loss": 0.62890625 + }, + { + "epoch": 0.56, + "learning_rate": 4.074534888164081e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 6729, + "text_loss": 0.263671875 + }, + { + "epoch": 0.56, + "learning_rate": 4.0732519621585185e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 6730, + "text_loss": 0.53125 + }, + { + "epoch": 0.56, + "learning_rate": 4.071969099333851e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 6731, + "text_loss": 0.353515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.070686299777535e-06, + "loss": 0.4403, + "regression_loss": 0.0, + "step": 6732, + "text_loss": 0.3984375 + }, + { + "epoch": 0.56, + "learning_rate": 4.06940356357703e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 6733, + "text_loss": 0.75390625 + }, + { + "epoch": 0.56, + "learning_rate": 4.0681208908197824e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 6734, + "text_loss": 0.69921875 + }, + { + "epoch": 0.56, + "learning_rate": 4.066838281593241e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 6735, + "text_loss": 0.478515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.065555735984844e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 6736, + "text_loss": 0.5 + }, + { + "epoch": 0.56, + "learning_rate": 4.0642732540820326e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 6737, + "text_loss": 0.75390625 + }, + { + "epoch": 0.56, + "learning_rate": 4.062990835972234e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 6738, + "text_loss": 0.404296875 + }, + { + "epoch": 0.56, + "learning_rate": 4.061708481742886e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 6739, + "text_loss": 0.26953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.0604261914814035e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 6740, + "text_loss": 0.66796875 + }, + { + "epoch": 0.56, + "learning_rate": 4.059143965275213e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 6741, + "text_loss": 0.59375 + }, + { + "epoch": 0.56, + "learning_rate": 4.057861803211725e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 6742, + "text_loss": 0.46484375 + }, + { + "epoch": 0.56, + "learning_rate": 4.056579705378356e-06, + "loss": 0.387, + "regression_loss": 0.0, + "step": 6743, + "text_loss": 0.404296875 + }, + { + "epoch": 0.56, + "learning_rate": 4.055297671862509e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 6744, + "text_loss": 0.458984375 + }, + { + "epoch": 0.56, + "learning_rate": 4.054015702751588e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 6745, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.052733798132988e-06, + "loss": 0.4884, + "regression_loss": 0.0, + "step": 6746, + "text_loss": 0.453125 + }, + { + "epoch": 0.56, + "learning_rate": 4.0514519580941076e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 6747, + "text_loss": 0.6171875 + }, + { + "epoch": 0.56, + "learning_rate": 4.050170182722333e-06, + "loss": 0.5739, + "regression_loss": 0.0, + "step": 6748, + "text_loss": 0.431640625 + }, + { + "epoch": 0.56, + "learning_rate": 4.04888847210505e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 6749, + "text_loss": 0.330078125 + }, + { + "epoch": 0.56, + "learning_rate": 4.047606826329637e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 6750, + "text_loss": 0.474609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.046325245483474e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 6751, + "text_loss": 0.361328125 + }, + { + "epoch": 0.56, + "learning_rate": 4.045043729653927e-06, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 6752, + "text_loss": 0.62890625 + }, + { + "epoch": 0.56, + "learning_rate": 4.043762278928369e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 6753, + "text_loss": 0.423828125 + }, + { + "epoch": 0.56, + "learning_rate": 4.04248089339416e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 6754, + "text_loss": 0.494140625 + }, + { + "epoch": 0.56, + "learning_rate": 4.041199573138658e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 6755, + "text_loss": 0.48046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.0399183182492155e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 6756, + "text_loss": 0.78125 + }, + { + "epoch": 0.56, + "learning_rate": 4.038637128813185e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 6757, + "text_loss": 0.62109375 + }, + { + "epoch": 0.56, + "learning_rate": 4.037356004917908e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 6758, + "text_loss": 0.57421875 + }, + { + "epoch": 0.56, + "learning_rate": 4.036074946650729e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 6759, + "text_loss": 0.2578125 + }, + { + "epoch": 0.56, + "learning_rate": 4.034793954098979e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 6760, + "text_loss": 0.498046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.033513027349994e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 6761, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.032232166491099e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 6762, + "text_loss": 0.390625 + }, + { + "epoch": 0.56, + "learning_rate": 4.030951371609616e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 6763, + "text_loss": 0.54296875 + }, + { + "epoch": 0.56, + "learning_rate": 4.029670642792863e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 6764, + "text_loss": 0.396484375 + }, + { + "epoch": 0.56, + "learning_rate": 4.028389980128156e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 6765, + "text_loss": 0.40234375 + }, + { + "epoch": 0.56, + "learning_rate": 4.027109383702798e-06, + "loss": 0.4698, + "regression_loss": 0.0, + "step": 6766, + "text_loss": 0.490234375 + }, + { + "epoch": 0.56, + "learning_rate": 4.0258288536041005e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 6767, + "text_loss": 0.640625 + }, + { + "epoch": 0.56, + "learning_rate": 4.02454838991936e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 6768, + "text_loss": 0.38671875 + }, + { + "epoch": 0.56, + "learning_rate": 4.023267992735871e-06, + "loss": 0.6382, + "regression_loss": 0.0, + "step": 6769, + "text_loss": 0.78515625 + }, + { + "epoch": 0.56, + "learning_rate": 4.021987662140927e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 6770, + "text_loss": 0.279296875 + }, + { + "epoch": 0.56, + "learning_rate": 4.02070739822181e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 6771, + "text_loss": 0.5 + }, + { + "epoch": 0.56, + "learning_rate": 4.019427201065808e-06, + "loss": 0.4197, + "regression_loss": 0.0, + "step": 6772, + "text_loss": 0.3203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.018147070760193e-06, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 6773, + "text_loss": 0.390625 + }, + { + "epoch": 0.56, + "learning_rate": 4.016867007392241e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 6774, + "text_loss": 0.77734375 + }, + { + "epoch": 0.56, + "learning_rate": 4.015587011049218e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 6775, + "text_loss": 0.73046875 + }, + { + "epoch": 0.56, + "learning_rate": 4.014307081818389e-06, + "loss": 0.5757, + "regression_loss": 0.0, + "step": 6776, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.01302721978701e-06, + "loss": 0.6367, + "regression_loss": 0.0, + "step": 6777, + "text_loss": 0.40625 + }, + { + "epoch": 0.56, + "learning_rate": 4.011747425042342e-06, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 6778, + "text_loss": 0.6953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.010467697671626e-06, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 6779, + "text_loss": 0.58203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.009188037762113e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 6780, + "text_loss": 0.57421875 + }, + { + "epoch": 0.56, + "learning_rate": 4.0079084454010415e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 6781, + "text_loss": 0.51953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.00662892067565e-06, + "loss": 0.4303, + "regression_loss": 0.0, + "step": 6782, + "text_loss": 0.6015625 + }, + { + "epoch": 0.56, + "learning_rate": 4.0053494636731675e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 6783, + "text_loss": 0.408203125 + }, + { + "epoch": 0.56, + "learning_rate": 4.004070074480821e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 6784, + "text_loss": 0.49609375 + }, + { + "epoch": 0.56, + "learning_rate": 4.002790753185831e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 6785, + "text_loss": 0.26953125 + }, + { + "epoch": 0.56, + "learning_rate": 4.0015114998754204e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 6786, + "text_loss": 0.27734375 + }, + { + "epoch": 0.56, + "learning_rate": 4.000232314636796e-06, + "loss": 0.4835, + "regression_loss": 0.0, + "step": 6787, + "text_loss": 0.609375 + }, + { + "epoch": 0.56, + "learning_rate": 3.99895319755717e-06, + "loss": 0.448, + "regression_loss": 0.0, + "step": 6788, + "text_loss": 0.79296875 + }, + { + "epoch": 0.56, + "learning_rate": 3.9976741487237444e-06, + "loss": 0.5284, + "regression_loss": 0.0, + "step": 6789, + "text_loss": 0.26171875 + }, + { + "epoch": 0.56, + "learning_rate": 3.99639516822372e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 6790, + "text_loss": 0.6015625 + }, + { + "epoch": 0.56, + "learning_rate": 3.995116256144287e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 6791, + "text_loss": 0.6484375 + }, + { + "epoch": 0.56, + "learning_rate": 3.993837412572639e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 6792, + "text_loss": 0.640625 + }, + { + "epoch": 0.56, + "learning_rate": 3.992558637595959e-06, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 6793, + "text_loss": 0.29296875 + }, + { + "epoch": 0.56, + "learning_rate": 3.991279931301429e-06, + "loss": 0.4291, + "regression_loss": 0.0, + "step": 6794, + "text_loss": 0.41015625 + }, + { + "epoch": 0.56, + "learning_rate": 3.990001293776222e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 6795, + "text_loss": 0.41015625 + }, + { + "epoch": 0.56, + "learning_rate": 3.988722725107511e-06, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 6796, + "text_loss": 0.29296875 + }, + { + "epoch": 0.56, + "learning_rate": 3.98744422538246e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 6797, + "text_loss": 0.4765625 + }, + { + "epoch": 0.56, + "learning_rate": 3.986165794688236e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 6798, + "text_loss": 0.326171875 + }, + { + "epoch": 0.57, + "learning_rate": 3.984887433111987e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 6799, + "text_loss": 0.3359375 + }, + { + "epoch": 0.57, + "learning_rate": 3.983609140740873e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 6800, + "text_loss": 0.40234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.982330917662036e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 6801, + "text_loss": 0.66015625 + }, + { + "epoch": 0.57, + "learning_rate": 3.981052763962623e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 6802, + "text_loss": 0.6640625 + }, + { + "epoch": 0.57, + "learning_rate": 3.979774679729769e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 6803, + "text_loss": 0.298828125 + }, + { + "epoch": 0.57, + "learning_rate": 3.978496665050609e-06, + "loss": 0.4509, + "regression_loss": 0.0, + "step": 6804, + "text_loss": 0.5859375 + }, + { + "epoch": 0.57, + "learning_rate": 3.977218720012268e-06, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 6805, + "text_loss": 0.8984375 + }, + { + "epoch": 0.57, + "learning_rate": 3.975940844701874e-06, + "loss": 0.6345, + "regression_loss": 0.0, + "step": 6806, + "text_loss": 0.7421875 + }, + { + "epoch": 0.57, + "learning_rate": 3.974663039206544e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 6807, + "text_loss": 0.58203125 + }, + { + "epoch": 0.57, + "learning_rate": 3.973385303613392e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 6808, + "text_loss": 0.734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.972107638009528e-06, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 6809, + "text_loss": 0.53515625 + }, + { + "epoch": 0.57, + "learning_rate": 3.970830042482056e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 6810, + "text_loss": 0.4140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.969552517118075e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 6811, + "text_loss": 0.416015625 + }, + { + "epoch": 0.57, + "learning_rate": 3.968275062004684e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 6812, + "text_loss": 0.33203125 + }, + { + "epoch": 0.57, + "learning_rate": 3.966997677228968e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 6813, + "text_loss": 0.376953125 + }, + { + "epoch": 0.57, + "learning_rate": 3.9657203628780165e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 6814, + "text_loss": 0.609375 + }, + { + "epoch": 0.57, + "learning_rate": 3.964443119038908e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 6815, + "text_loss": 0.494140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.963165945798718e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 6816, + "text_loss": 0.345703125 + }, + { + "epoch": 0.57, + "learning_rate": 3.961888843244523e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 6817, + "text_loss": 0.71484375 + }, + { + "epoch": 0.57, + "learning_rate": 3.96061181146338e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 6818, + "text_loss": 0.5546875 + }, + { + "epoch": 0.57, + "learning_rate": 3.959334850542358e-06, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 6819, + "text_loss": 0.69921875 + }, + { + "epoch": 0.57, + "learning_rate": 3.95805796056851e-06, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 6820, + "text_loss": 0.390625 + }, + { + "epoch": 0.57, + "learning_rate": 3.956781141628891e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 6821, + "text_loss": 0.60546875 + }, + { + "epoch": 0.57, + "learning_rate": 3.955504393810542e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 6822, + "text_loss": 0.7265625 + }, + { + "epoch": 0.57, + "learning_rate": 3.954227717200513e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 6823, + "text_loss": 0.7890625 + }, + { + "epoch": 0.57, + "learning_rate": 3.9529511118858325e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 6824, + "text_loss": 0.5625 + }, + { + "epoch": 0.57, + "learning_rate": 3.951674577953541e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 6825, + "text_loss": 0.474609375 + }, + { + "epoch": 0.57, + "learning_rate": 3.95039811549066e-06, + "loss": 0.5101, + "regression_loss": 0.0, + "step": 6826, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.57, + "learning_rate": 3.949121724584216e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 6827, + "text_loss": 0.458984375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9478454053212245e-06, + "loss": 0.4341, + "regression_loss": 0.0, + "step": 6828, + "text_loss": 0.375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9465691577887e-06, + "loss": 0.4301, + "regression_loss": 0.0, + "step": 6829, + "text_loss": 0.625 + }, + { + "epoch": 0.57, + "learning_rate": 3.9452929820736464e-06, + "loss": 0.3801, + "regression_loss": 0.0, + "step": 6830, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.57, + "learning_rate": 3.9440168782630735e-06, + "loss": 0.4287, + "regression_loss": 0.0, + "step": 6831, + "text_loss": 0.466796875 + }, + { + "epoch": 0.57, + "learning_rate": 3.942740846443974e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 6832, + "text_loss": 0.32421875 + }, + { + "epoch": 0.57, + "learning_rate": 3.941464886703344e-06, + "loss": 0.4038, + "regression_loss": 0.0, + "step": 6833, + "text_loss": 0.640625 + }, + { + "epoch": 0.57, + "learning_rate": 3.940188999128169e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 6834, + "text_loss": 0.22265625 + }, + { + "epoch": 0.57, + "learning_rate": 3.938913183805437e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 6835, + "text_loss": 0.65625 + }, + { + "epoch": 0.57, + "learning_rate": 3.9376374408221205e-06, + "loss": 0.4142, + "regression_loss": 0.0, + "step": 6836, + "text_loss": 0.427734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9363617702652e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 6837, + "text_loss": 0.5703125 + }, + { + "epoch": 0.57, + "learning_rate": 3.935086172221637e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 6838, + "text_loss": 0.9296875 + }, + { + "epoch": 0.57, + "learning_rate": 3.933810646778401e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 6839, + "text_loss": 0.578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.932535194022448e-06, + "loss": 0.698, + "regression_loss": 0.0, + "step": 6840, + "text_loss": 0.5078125 + }, + { + "epoch": 0.57, + "learning_rate": 3.931259814040732e-06, + "loss": 0.484, + "regression_loss": 0.0, + "step": 6841, + "text_loss": 0.64453125 + }, + { + "epoch": 0.57, + "learning_rate": 3.929984506920201e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 6842, + "text_loss": 0.451171875 + }, + { + "epoch": 0.57, + "learning_rate": 3.928709272747801e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 6843, + "text_loss": 0.765625 + }, + { + "epoch": 0.57, + "learning_rate": 3.927434111610469e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 6844, + "text_loss": 0.5859375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9261590235951405e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 6845, + "text_loss": 0.5234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.924884008788742e-06, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 6846, + "text_loss": 0.5078125 + }, + { + "epoch": 0.57, + "learning_rate": 3.9236090672782e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 6847, + "text_loss": 0.734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.922334199150433e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 6848, + "text_loss": 0.328125 + }, + { + "epoch": 0.57, + "learning_rate": 3.921059404492354e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 6849, + "text_loss": 0.828125 + }, + { + "epoch": 0.57, + "learning_rate": 3.919784683390871e-06, + "loss": 0.4614, + "regression_loss": 0.0, + "step": 6850, + "text_loss": 0.56640625 + }, + { + "epoch": 0.57, + "learning_rate": 3.9185100359328915e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 6851, + "text_loss": 0.65234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9172354622053086e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 6852, + "text_loss": 0.59765625 + }, + { + "epoch": 0.57, + "learning_rate": 3.915960962295022e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 6853, + "text_loss": 0.6171875 + }, + { + "epoch": 0.57, + "learning_rate": 3.914686536288917e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 6854, + "text_loss": 0.67578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.91341218427388e-06, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 6855, + "text_loss": 0.68359375 + }, + { + "epoch": 0.57, + "learning_rate": 3.912137906336787e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 6856, + "text_loss": 0.423828125 + }, + { + "epoch": 0.57, + "learning_rate": 3.910863702564514e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 6857, + "text_loss": 0.419921875 + }, + { + "epoch": 0.57, + "learning_rate": 3.909589573043926e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 6858, + "text_loss": 0.6640625 + }, + { + "epoch": 0.57, + "learning_rate": 3.908315517861891e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 6859, + "text_loss": 0.44140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.907041537105265e-06, + "loss": 0.4436, + "regression_loss": 0.0, + "step": 6860, + "text_loss": 0.283203125 + }, + { + "epoch": 0.57, + "learning_rate": 3.905767630860904e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 6861, + "text_loss": 0.3984375 + }, + { + "epoch": 0.57, + "learning_rate": 3.904493799215652e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 6862, + "text_loss": 0.40234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9032200422563535e-06, + "loss": 0.6326, + "regression_loss": 0.0, + "step": 6863, + "text_loss": 0.90234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.901946360069851e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 6864, + "text_loss": 0.7734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.9006727527429724e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 6865, + "text_loss": 0.380859375 + }, + { + "epoch": 0.57, + "learning_rate": 3.899399220362549e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 6866, + "text_loss": 0.52734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.898125763015401e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 6867, + "text_loss": 0.76171875 + }, + { + "epoch": 0.57, + "learning_rate": 3.896852380788349e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 6868, + "text_loss": 0.55078125 + }, + { + "epoch": 0.57, + "learning_rate": 3.895579073768202e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 6869, + "text_loss": 0.55859375 + }, + { + "epoch": 0.57, + "learning_rate": 3.894305842041771e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 6870, + "text_loss": 0.5546875 + }, + { + "epoch": 0.57, + "learning_rate": 3.893032685695857e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 6871, + "text_loss": 0.466796875 + }, + { + "epoch": 0.57, + "learning_rate": 3.891759604817257e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 6872, + "text_loss": 0.451171875 + }, + { + "epoch": 0.57, + "learning_rate": 3.890486599492764e-06, + "loss": 0.6057, + "regression_loss": 0.0, + "step": 6873, + "text_loss": 0.625 + }, + { + "epoch": 0.57, + "learning_rate": 3.889213669809163e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 6874, + "text_loss": 0.51953125 + }, + { + "epoch": 0.57, + "learning_rate": 3.887940815853236e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 6875, + "text_loss": 0.466796875 + }, + { + "epoch": 0.57, + "learning_rate": 3.886668037711763e-06, + "loss": 0.4247, + "regression_loss": 0.0, + "step": 6876, + "text_loss": 0.42578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.88539533547151e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 6877, + "text_loss": 0.3984375 + }, + { + "epoch": 0.57, + "learning_rate": 3.884122709219249e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 6878, + "text_loss": 0.494140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8828501590417366e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 6879, + "text_loss": 0.4765625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8815776850257325e-06, + "loss": 0.6274, + "regression_loss": 0.0, + "step": 6880, + "text_loss": 0.8125 + }, + { + "epoch": 0.57, + "learning_rate": 3.8803052872579835e-06, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 6881, + "text_loss": 0.68359375 + }, + { + "epoch": 0.57, + "learning_rate": 3.879032965825238e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 6882, + "text_loss": 0.578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.8777607208142335e-06, + "loss": 0.4152, + "regression_loss": 0.0, + "step": 6883, + "text_loss": 0.515625 + }, + { + "epoch": 0.57, + "learning_rate": 3.876488552311708e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 6884, + "text_loss": 0.404296875 + }, + { + "epoch": 0.57, + "learning_rate": 3.875216460404391e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 6885, + "text_loss": 0.40234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.873944445179006e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 6886, + "text_loss": 0.291015625 + }, + { + "epoch": 0.57, + "learning_rate": 3.872672506722272e-06, + "loss": 0.446, + "regression_loss": 0.0, + "step": 6887, + "text_loss": 0.484375 + }, + { + "epoch": 0.57, + "learning_rate": 3.871400645120905e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 6888, + "text_loss": 0.494140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8701288604616115e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 6889, + "text_loss": 0.625 + }, + { + "epoch": 0.57, + "learning_rate": 3.868857152831099e-06, + "loss": 0.4496, + "regression_loss": 0.0, + "step": 6890, + "text_loss": 0.412109375 + }, + { + "epoch": 0.57, + "learning_rate": 3.867585522316061e-06, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 6891, + "text_loss": 0.87890625 + }, + { + "epoch": 0.57, + "learning_rate": 3.866313969003195e-06, + "loss": 0.4458, + "regression_loss": 0.0, + "step": 6892, + "text_loss": 0.427734375 + }, + { + "epoch": 0.57, + "learning_rate": 3.865042492979186e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 6893, + "text_loss": 0.5234375 + }, + { + "epoch": 0.57, + "learning_rate": 3.86377109433072e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 6894, + "text_loss": 0.61328125 + }, + { + "epoch": 0.57, + "learning_rate": 3.86249977314447e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 6895, + "text_loss": 0.73046875 + }, + { + "epoch": 0.57, + "learning_rate": 3.861228529507113e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 6896, + "text_loss": 0.67578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.85995736350531e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 6897, + "text_loss": 0.46875 + }, + { + "epoch": 0.57, + "learning_rate": 3.858686275225729e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 6898, + "text_loss": 0.388671875 + }, + { + "epoch": 0.57, + "learning_rate": 3.8574152647550215e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 6899, + "text_loss": 0.65625 + }, + { + "epoch": 0.57, + "learning_rate": 3.85614433217984e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 6900, + "text_loss": 0.5 + }, + { + "epoch": 0.57, + "learning_rate": 3.85487347758683e-06, + "loss": 0.545, + "regression_loss": 0.0, + "step": 6901, + "text_loss": 0.69140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.853602701062632e-06, + "loss": 0.5977, + "regression_loss": 0.0, + "step": 6902, + "text_loss": 0.50390625 + }, + { + "epoch": 0.57, + "learning_rate": 3.85233200269388e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 6903, + "text_loss": 0.62109375 + }, + { + "epoch": 0.57, + "learning_rate": 3.8510613825672065e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 6904, + "text_loss": 0.392578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.849790840769232e-06, + "loss": 0.4376, + "regression_loss": 0.0, + "step": 6905, + "text_loss": 0.578125 + }, + { + "epoch": 0.57, + "learning_rate": 3.848520377386578e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 6906, + "text_loss": 0.478515625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8472499925058585e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 6907, + "text_loss": 0.380859375 + }, + { + "epoch": 0.57, + "learning_rate": 3.84597968621368e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 6908, + "text_loss": 0.4140625 + }, + { + "epoch": 0.57, + "learning_rate": 3.844709458596645e-06, + "loss": 0.4183, + "regression_loss": 0.0, + "step": 6909, + "text_loss": 0.50390625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8434393097413526e-06, + "loss": 0.4576, + "regression_loss": 0.0, + "step": 6910, + "text_loss": 0.515625 + }, + { + "epoch": 0.57, + "learning_rate": 3.8421692397343955e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 6911, + "text_loss": 0.51953125 + }, + { + "epoch": 0.57, + "learning_rate": 3.840899248662358e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 6912, + "text_loss": 0.5546875 + }, + { + "epoch": 0.57, + "learning_rate": 3.839629336611826e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 6913, + "text_loss": 0.78125 + }, + { + "epoch": 0.57, + "learning_rate": 3.838359503669369e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 6914, + "text_loss": 0.54296875 + }, + { + "epoch": 0.57, + "learning_rate": 3.837089749921565e-06, + "loss": 0.5204, + "regression_loss": 0.0, + "step": 6915, + "text_loss": 0.375 + }, + { + "epoch": 0.57, + "learning_rate": 3.835820075454973e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 6916, + "text_loss": 0.53125 + }, + { + "epoch": 0.57, + "learning_rate": 3.8345504803561575e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 6917, + "text_loss": 0.41796875 + }, + { + "epoch": 0.57, + "learning_rate": 3.833280964711669e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 6918, + "text_loss": 0.330078125 + }, + { + "epoch": 0.58, + "learning_rate": 3.83201152860806e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 6919, + "text_loss": 0.5 + }, + { + "epoch": 0.58, + "learning_rate": 3.830742172131871e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 6920, + "text_loss": 0.2578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.8294728953696434e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 6921, + "text_loss": 0.609375 + }, + { + "epoch": 0.58, + "learning_rate": 3.828203698407905e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 6922, + "text_loss": 0.6015625 + }, + { + "epoch": 0.58, + "learning_rate": 3.8269345813331894e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6923, + "text_loss": 0.75390625 + }, + { + "epoch": 0.58, + "learning_rate": 3.825665544232013e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 6924, + "text_loss": 0.4375 + }, + { + "epoch": 0.58, + "learning_rate": 3.824396587190897e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 6925, + "text_loss": 0.466796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.823127710296348e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 6926, + "text_loss": 0.392578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.821858913634875e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 6927, + "text_loss": 0.431640625 + }, + { + "epoch": 0.58, + "learning_rate": 3.820590197292974e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 6928, + "text_loss": 0.47265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.819321561357145e-06, + "loss": 0.415, + "regression_loss": 0.0, + "step": 6929, + "text_loss": 0.609375 + }, + { + "epoch": 0.58, + "learning_rate": 3.818053005913871e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 6930, + "text_loss": 0.578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.81678453104964e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 6931, + "text_loss": 0.55859375 + }, + { + "epoch": 0.58, + "learning_rate": 3.815516136850928e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 6932, + "text_loss": 0.451171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.8142478234042086e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 6933, + "text_loss": 0.5 + }, + { + "epoch": 0.58, + "learning_rate": 3.8129795907959467e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 6934, + "text_loss": 0.494140625 + }, + { + "epoch": 0.58, + "learning_rate": 3.8117114391126088e-06, + "loss": 0.411, + "regression_loss": 0.0, + "step": 6935, + "text_loss": 0.212890625 + }, + { + "epoch": 0.58, + "learning_rate": 3.810443368440644e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 6936, + "text_loss": 0.65625 + }, + { + "epoch": 0.58, + "learning_rate": 3.8091753788665085e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 6937, + "text_loss": 0.59765625 + }, + { + "epoch": 0.58, + "learning_rate": 3.8079074704766443e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 6938, + "text_loss": 0.609375 + }, + { + "epoch": 0.58, + "learning_rate": 3.8066396433574932e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 6939, + "text_loss": 0.330078125 + }, + { + "epoch": 0.58, + "learning_rate": 3.8053718975954862e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 6940, + "text_loss": 0.39453125 + }, + { + "epoch": 0.58, + "learning_rate": 3.8041042332770545e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 6941, + "text_loss": 0.66796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.8028366504886173e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 6942, + "text_loss": 0.52734375 + }, + { + "epoch": 0.58, + "learning_rate": 3.8015691493165963e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 6943, + "text_loss": 0.66796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.8003017298474e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 6944, + "text_loss": 0.81640625 + }, + { + "epoch": 0.58, + "learning_rate": 3.799034392167436e-06, + "loss": 0.6155, + "regression_loss": 0.0, + "step": 6945, + "text_loss": 0.7890625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7977671363631036e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 6946, + "text_loss": 0.55859375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7964999625208e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 6947, + "text_loss": 0.59375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7952328707269102e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 6948, + "text_loss": 0.71875 + }, + { + "epoch": 0.58, + "learning_rate": 3.793965861067824e-06, + "loss": 0.4962, + "regression_loss": 0.0, + "step": 6949, + "text_loss": 0.55859375 + }, + { + "epoch": 0.58, + "learning_rate": 3.792698933629914e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 6950, + "text_loss": 0.65625 + }, + { + "epoch": 0.58, + "learning_rate": 3.791432088499557e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 6951, + "text_loss": 0.396484375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7901653257631166e-06, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 6952, + "text_loss": 0.212890625 + }, + { + "epoch": 0.58, + "learning_rate": 3.788898645506957e-06, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 6953, + "text_loss": 0.43359375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7876320478174315e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 6954, + "text_loss": 0.5390625 + }, + { + "epoch": 0.58, + "learning_rate": 3.786365532780892e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 6955, + "text_loss": 0.466796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7850991004836813e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 6956, + "text_loss": 0.353515625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7838327510121392e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 6957, + "text_loss": 0.69140625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7825664844526e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 6958, + "text_loss": 0.283203125 + }, + { + "epoch": 0.58, + "learning_rate": 3.781300300891389e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 6959, + "text_loss": 0.25 + }, + { + "epoch": 0.58, + "learning_rate": 3.78003420041483e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 6960, + "text_loss": 0.61328125 + }, + { + "epoch": 0.58, + "learning_rate": 3.778768183109236e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 6961, + "text_loss": 0.51171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7775022490609227e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 6962, + "text_loss": 0.2578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.77623639835619e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 6963, + "text_loss": 0.6640625 + }, + { + "epoch": 0.58, + "learning_rate": 3.774970631081341e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 6964, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7737049473226656e-06, + "loss": 0.4532, + "regression_loss": 0.0, + "step": 6965, + "text_loss": 0.421875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7724393471664545e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 6966, + "text_loss": 0.23828125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7711738306989866e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 6967, + "text_loss": 0.408203125 + }, + { + "epoch": 0.58, + "learning_rate": 3.769908398006542e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 6968, + "text_loss": 0.58203125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7686430491753888e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 6969, + "text_loss": 0.47265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.767377784291794e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 6970, + "text_loss": 0.66796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7661126034420143e-06, + "loss": 0.408, + "regression_loss": 0.0, + "step": 6971, + "text_loss": 0.314453125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7648475067123053e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 6972, + "text_loss": 0.8515625 + }, + { + "epoch": 0.58, + "learning_rate": 3.763582494188912e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 6973, + "text_loss": 0.515625 + }, + { + "epoch": 0.58, + "learning_rate": 3.762317565958081e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 6974, + "text_loss": 0.4296875 + }, + { + "epoch": 0.58, + "learning_rate": 3.761052722106043e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 6975, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7597879627190337e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 6976, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7585232878832745e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 6977, + "text_loss": 0.392578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7572586976849867e-06, + "loss": 0.5801, + "regression_loss": 0.0, + "step": 6978, + "text_loss": 0.578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7559941922103808e-06, + "loss": 0.4506, + "regression_loss": 0.0, + "step": 6979, + "text_loss": 0.53125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7547297715456674e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 6980, + "text_loss": 0.55859375 + }, + { + "epoch": 0.58, + "learning_rate": 3.753465435777044e-06, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 6981, + "text_loss": 0.375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7522011849907112e-06, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 6982, + "text_loss": 0.365234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.750937019272856e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 6983, + "text_loss": 0.72265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7496729387096643e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 6984, + "text_loss": 0.63671875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7484089433873133e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 6985, + "text_loss": 0.5390625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7471450333919766e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 6986, + "text_loss": 0.5703125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7458812088098194e-06, + "loss": 0.4991, + "regression_loss": 0.0, + "step": 6987, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.744617469727007e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 6988, + "text_loss": 0.6328125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7433538162296884e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 6989, + "text_loss": 0.43359375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7420902484040185e-06, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 6990, + "text_loss": 0.546875 + }, + { + "epoch": 0.58, + "learning_rate": 3.740826766336137e-06, + "loss": 0.625, + "regression_loss": 0.0, + "step": 6991, + "text_loss": 0.7265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.739563370112185e-06, + "loss": 0.4075, + "regression_loss": 0.0, + "step": 6992, + "text_loss": 0.58203125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7383000598182907e-06, + "loss": 0.7012, + "regression_loss": 0.0, + "step": 6993, + "text_loss": 0.44140625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7370368355405826e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 6994, + "text_loss": 0.470703125 + }, + { + "epoch": 0.58, + "learning_rate": 3.735773697365178e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 6995, + "text_loss": 0.373046875 + }, + { + "epoch": 0.58, + "learning_rate": 3.734510645378196e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 6996, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7332476796657403e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 6997, + "text_loss": 0.71484375 + }, + { + "epoch": 0.58, + "learning_rate": 3.731984800313917e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 6998, + "text_loss": 0.3125 + }, + { + "epoch": 0.58, + "learning_rate": 3.730722007408819e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 6999, + "text_loss": 0.6171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.729459301036541e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 7000, + "text_loss": 0.6015625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7281966812831626e-06, + "loss": 0.4982, + "regression_loss": 0.0, + "step": 7001, + "text_loss": 0.384765625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7269341482347688e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 7002, + "text_loss": 0.58203125 + }, + { + "epoch": 0.58, + "learning_rate": 3.725671701977428e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 7003, + "text_loss": 0.62890625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7244093425972083e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 7004, + "text_loss": 0.6640625 + }, + { + "epoch": 0.58, + "learning_rate": 3.723147070180173e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 7005, + "text_loss": 0.71875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7218848848123733e-06, + "loss": 0.4434, + "regression_loss": 0.0, + "step": 7006, + "text_loss": 0.578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.720622786579864e-06, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 7007, + "text_loss": 0.85546875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7193607755686836e-06, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 7008, + "text_loss": 0.349609375 + }, + { + "epoch": 0.58, + "learning_rate": 3.718098851864872e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7009, + "text_loss": 0.50390625 + }, + { + "epoch": 0.58, + "learning_rate": 3.7168370155544587e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 7010, + "text_loss": 0.34765625 + }, + { + "epoch": 0.58, + "learning_rate": 3.715575266723472e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 7011, + "text_loss": 0.275390625 + }, + { + "epoch": 0.58, + "learning_rate": 3.714313605457927e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 7012, + "text_loss": 0.37109375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7130520318438433e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 7013, + "text_loss": 0.546875 + }, + { + "epoch": 0.58, + "learning_rate": 3.711790545967222e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 7014, + "text_loss": 0.73046875 + }, + { + "epoch": 0.58, + "learning_rate": 3.7105291479140704e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 7015, + "text_loss": 0.5234375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7092678377703806e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 7016, + "text_loss": 0.361328125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7080066156221434e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 7017, + "text_loss": 0.486328125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7067454815553416e-06, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 7018, + "text_loss": 0.439453125 + }, + { + "epoch": 0.58, + "learning_rate": 3.7054844356559545e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 7019, + "text_loss": 0.47265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.704223478009951e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 7020, + "text_loss": 0.466796875 + }, + { + "epoch": 0.58, + "learning_rate": 3.702962608703299e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 7021, + "text_loss": 0.609375 + }, + { + "epoch": 0.58, + "learning_rate": 3.7017018278219574e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 7022, + "text_loss": 0.5625 + }, + { + "epoch": 0.58, + "learning_rate": 3.70044113545188e-06, + "loss": 0.4718, + "regression_loss": 0.0, + "step": 7023, + "text_loss": 0.48046875 + }, + { + "epoch": 0.58, + "learning_rate": 3.699180531679013e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 7024, + "text_loss": 0.51171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.6979200165893003e-06, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 7025, + "text_loss": 0.392578125 + }, + { + "epoch": 0.58, + "learning_rate": 3.6966595902686736e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 7026, + "text_loss": 0.734375 + }, + { + "epoch": 0.58, + "learning_rate": 3.695399252803068e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 7027, + "text_loss": 0.64453125 + }, + { + "epoch": 0.58, + "learning_rate": 3.6941390042783994e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 7028, + "text_loss": 0.6171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.6928788447805906e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 7029, + "text_loss": 0.68359375 + }, + { + "epoch": 0.58, + "learning_rate": 3.6916187743955513e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 7030, + "text_loss": 0.37109375 + }, + { + "epoch": 0.58, + "learning_rate": 3.690358793209187e-06, + "loss": 0.425, + "regression_loss": 0.0, + "step": 7031, + "text_loss": 0.251953125 + }, + { + "epoch": 0.58, + "learning_rate": 3.6890989013073943e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 7032, + "text_loss": 0.7265625 + }, + { + "epoch": 0.58, + "learning_rate": 3.687839098776069e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 7033, + "text_loss": 0.61328125 + }, + { + "epoch": 0.58, + "learning_rate": 3.6865793857010956e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 7034, + "text_loss": 0.53125 + }, + { + "epoch": 0.58, + "learning_rate": 3.685319762168358e-06, + "loss": 0.4801, + "regression_loss": 0.0, + "step": 7035, + "text_loss": 0.51171875 + }, + { + "epoch": 0.58, + "learning_rate": 3.6840602282637273e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 7036, + "text_loss": 0.462890625 + }, + { + "epoch": 0.58, + "learning_rate": 3.6828007840730752e-06, + "loss": 0.4047, + "regression_loss": 0.0, + "step": 7037, + "text_loss": 0.333984375 + }, + { + "epoch": 0.58, + "learning_rate": 3.681541429682261e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 7038, + "text_loss": 0.494140625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6802821651771435e-06, + "loss": 0.574, + "regression_loss": 0.0, + "step": 7039, + "text_loss": 0.54296875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6790229906435706e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 7040, + "text_loss": 0.6484375 + }, + { + "epoch": 0.59, + "learning_rate": 3.677763906167389e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 7041, + "text_loss": 0.361328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6765049118344333e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 7042, + "text_loss": 0.60546875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6752460077305385e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 7043, + "text_loss": 0.40625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6739871939415283e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 7044, + "text_loss": 0.57421875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6727284705532236e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 7045, + "text_loss": 0.51171875 + }, + { + "epoch": 0.59, + "learning_rate": 3.671469837651434e-06, + "loss": 0.4554, + "regression_loss": 0.0, + "step": 7046, + "text_loss": 0.5234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6702112953219725e-06, + "loss": 0.4574, + "regression_loss": 0.0, + "step": 7047, + "text_loss": 0.59375 + }, + { + "epoch": 0.59, + "learning_rate": 3.668952843650634e-06, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 7048, + "text_loss": 0.26171875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6676944827232176e-06, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 7049, + "text_loss": 0.41796875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6664362126255087e-06, + "loss": 0.3893, + "regression_loss": 0.0, + "step": 7050, + "text_loss": 0.52734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6651780334432908e-06, + "loss": 0.3969, + "regression_loss": 0.0, + "step": 7051, + "text_loss": 0.3984375 + }, + { + "epoch": 0.59, + "learning_rate": 3.663919945262343e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 7052, + "text_loss": 0.326171875 + }, + { + "epoch": 0.59, + "learning_rate": 3.66266194816843e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 7053, + "text_loss": 0.490234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6614040422473208e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 7054, + "text_loss": 0.455078125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6601462275847682e-06, + "loss": 0.4309, + "regression_loss": 0.0, + "step": 7055, + "text_loss": 0.291015625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6588885042665274e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 7056, + "text_loss": 0.90625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6576308723783405e-06, + "loss": 0.5592, + "regression_loss": 0.0, + "step": 7057, + "text_loss": 0.43359375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6563733320059484e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 7058, + "text_loss": 0.427734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.655115883235081e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 7059, + "text_loss": 0.63671875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6538585261514694e-06, + "loss": 0.5779, + "regression_loss": 0.0, + "step": 7060, + "text_loss": 0.6015625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6526012608408283e-06, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 7061, + "text_loss": 0.330078125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6513440873888762e-06, + "loss": 0.4131, + "regression_loss": 0.0, + "step": 7062, + "text_loss": 0.58203125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6500870058813172e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 7063, + "text_loss": 0.6484375 + }, + { + "epoch": 0.59, + "learning_rate": 3.648830016403855e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 7064, + "text_loss": 0.546875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6475731190421814e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 7065, + "text_loss": 0.58203125 + }, + { + "epoch": 0.59, + "learning_rate": 3.646316313881991e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 7066, + "text_loss": 0.302734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.64505960100896e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 7067, + "text_loss": 0.6484375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6438029805087686e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 7068, + "text_loss": 0.216796875 + }, + { + "epoch": 0.59, + "learning_rate": 3.642546452467085e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 7069, + "text_loss": 0.5390625 + }, + { + "epoch": 0.59, + "learning_rate": 3.641290016969574e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 7070, + "text_loss": 0.46484375 + }, + { + "epoch": 0.59, + "learning_rate": 3.640033674101891e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 7071, + "text_loss": 0.5234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6387774239496893e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 7072, + "text_loss": 0.55859375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6375212665986102e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 7073, + "text_loss": 0.455078125 + }, + { + "epoch": 0.59, + "learning_rate": 3.636265202134296e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 7074, + "text_loss": 0.353515625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6350092306423755e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 7075, + "text_loss": 0.5625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6337533522084777e-06, + "loss": 0.5979, + "regression_loss": 0.0, + "step": 7076, + "text_loss": 0.6328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6324975669182182e-06, + "loss": 0.4314, + "regression_loss": 0.0, + "step": 7077, + "text_loss": 0.333984375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6312418748572127e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 7078, + "text_loss": 0.65234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6299862761110644e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 7079, + "text_loss": 0.41796875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6287307707653787e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 7080, + "text_loss": 0.4140625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6274753589057444e-06, + "loss": 0.4377, + "regression_loss": 0.0, + "step": 7081, + "text_loss": 0.392578125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6262200406177534e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 7082, + "text_loss": 0.388671875 + }, + { + "epoch": 0.59, + "learning_rate": 3.624964815986983e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 7083, + "text_loss": 0.4921875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6237096850990105e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 7084, + "text_loss": 0.2734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6224546480394017e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 7085, + "text_loss": 0.2578125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6211997048937235e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 7086, + "text_loss": 0.462890625 + }, + { + "epoch": 0.59, + "learning_rate": 3.619944855747525e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 7087, + "text_loss": 0.61328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6186901006863595e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 7088, + "text_loss": 0.625 + }, + { + "epoch": 0.59, + "learning_rate": 3.617435439795769e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 7089, + "text_loss": 0.474609375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6161808731612896e-06, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 7090, + "text_loss": 0.59375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6149264008684505e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7091, + "text_loss": 0.3828125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6136720230027762e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 7092, + "text_loss": 0.5390625 + }, + { + "epoch": 0.59, + "learning_rate": 3.612417739649781e-06, + "loss": 0.4216, + "regression_loss": 0.0, + "step": 7093, + "text_loss": 0.357421875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6111635508949805e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 7094, + "text_loss": 0.357421875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6099094568238746e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 7095, + "text_loss": 0.55078125 + }, + { + "epoch": 0.59, + "learning_rate": 3.608655457521963e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 7096, + "text_loss": 0.6953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.6074015530747354e-06, + "loss": 0.3909, + "regression_loss": 0.0, + "step": 7097, + "text_loss": 0.35546875 + }, + { + "epoch": 0.59, + "learning_rate": 3.606147743567676e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 7098, + "text_loss": 0.4765625 + }, + { + "epoch": 0.59, + "learning_rate": 3.604894029086267e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 7099, + "text_loss": 0.640625 + }, + { + "epoch": 0.59, + "learning_rate": 3.6036404097159767e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 7100, + "text_loss": 0.40234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.6023868855422715e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 7101, + "text_loss": 0.66796875 + }, + { + "epoch": 0.59, + "learning_rate": 3.6011334566506093e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 7102, + "text_loss": 0.61328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5998801231264447e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 7103, + "text_loss": 0.51953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.598626885055219e-06, + "loss": 0.4476, + "regression_loss": 0.0, + "step": 7104, + "text_loss": 0.55859375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5973737425223774e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 7105, + "text_loss": 0.45703125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5961206956133475e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 7106, + "text_loss": 0.44921875 + }, + { + "epoch": 0.59, + "learning_rate": 3.594867744413559e-06, + "loss": 0.5397, + "regression_loss": 0.0, + "step": 7107, + "text_loss": 0.546875 + }, + { + "epoch": 0.59, + "learning_rate": 3.59361488900843e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 7108, + "text_loss": 0.40625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5923621294833744e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 7109, + "text_loss": 0.26953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.591109465923797e-06, + "loss": 0.5952, + "regression_loss": 0.0, + "step": 7110, + "text_loss": 0.5859375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5898568984151027e-06, + "loss": 0.6162, + "regression_loss": 0.0, + "step": 7111, + "text_loss": 0.76953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5886044270426785e-06, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 7112, + "text_loss": 0.52734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5873520518919168e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 7113, + "text_loss": 0.61328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.586099773048195e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 7114, + "text_loss": 0.52734375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5848475905968893e-06, + "loss": 0.6304, + "regression_loss": 0.0, + "step": 7115, + "text_loss": 0.62109375 + }, + { + "epoch": 0.59, + "learning_rate": 3.583595504623365e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 7116, + "text_loss": 0.54296875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5823435152129843e-06, + "loss": 0.436, + "regression_loss": 0.0, + "step": 7117, + "text_loss": 0.5859375 + }, + { + "epoch": 0.59, + "learning_rate": 3.581091622451098e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 7118, + "text_loss": 0.3515625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5798398264230598e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 7119, + "text_loss": 0.423828125 + }, + { + "epoch": 0.59, + "learning_rate": 3.578588127214206e-06, + "loss": 0.444, + "regression_loss": 0.0, + "step": 7120, + "text_loss": 0.388671875 + }, + { + "epoch": 0.59, + "learning_rate": 3.577336524909873e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 7121, + "text_loss": 0.474609375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5760850195953866e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7122, + "text_loss": 0.34375 + }, + { + "epoch": 0.59, + "learning_rate": 3.574833611356071e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 7123, + "text_loss": 0.6328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.573582300277236e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 7124, + "text_loss": 0.5625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5723310864441968e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 7125, + "text_loss": 0.451171875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5710799699422467e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 7126, + "text_loss": 0.57421875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5698289508566865e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 7127, + "text_loss": 0.43359375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5685780292728e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 7128, + "text_loss": 0.478515625 + }, + { + "epoch": 0.59, + "learning_rate": 3.567327205275872e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 7129, + "text_loss": 0.75 + }, + { + "epoch": 0.59, + "learning_rate": 3.5660764789511744e-06, + "loss": 0.4548, + "regression_loss": 0.0, + "step": 7130, + "text_loss": 0.322265625 + }, + { + "epoch": 0.59, + "learning_rate": 3.564825850383977e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 7131, + "text_loss": 0.265625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5635753196595392e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 7132, + "text_loss": 0.3828125 + }, + { + "epoch": 0.59, + "learning_rate": 3.562324886863119e-06, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 7133, + "text_loss": 0.498046875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5610745520799617e-06, + "loss": 0.4318, + "regression_loss": 0.0, + "step": 7134, + "text_loss": 0.58984375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5598243153953103e-06, + "loss": 0.3358, + "regression_loss": 0.0, + "step": 7135, + "text_loss": 0.251953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5585741768943982e-06, + "loss": 0.5789, + "regression_loss": 0.0, + "step": 7136, + "text_loss": 0.515625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5573241366624557e-06, + "loss": 0.4813, + "regression_loss": 0.0, + "step": 7137, + "text_loss": 0.62890625 + }, + { + "epoch": 0.59, + "learning_rate": 3.556074194784699e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 7138, + "text_loss": 0.53125 + }, + { + "epoch": 0.59, + "learning_rate": 3.55482435134635e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 7139, + "text_loss": 0.404296875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5535746064326087e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 7140, + "text_loss": 0.37890625 + }, + { + "epoch": 0.59, + "learning_rate": 3.5523249601286823e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 7141, + "text_loss": 0.458984375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5510754125197622e-06, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 7142, + "text_loss": 0.31640625 + }, + { + "epoch": 0.59, + "learning_rate": 3.549825963691038e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 7143, + "text_loss": 0.466796875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5485766137276894e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 7144, + "text_loss": 0.8125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5473273627148893e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 7145, + "text_loss": 0.6171875 + }, + { + "epoch": 0.59, + "learning_rate": 3.546078210737809e-06, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 7146, + "text_loss": 0.357421875 + }, + { + "epoch": 0.59, + "learning_rate": 3.544829157881605e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 7147, + "text_loss": 0.361328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5435802042314353e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 7148, + "text_loss": 0.396484375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5423313498724427e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 7149, + "text_loss": 0.376953125 + }, + { + "epoch": 0.59, + "learning_rate": 3.541082594889773e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 7150, + "text_loss": 0.447265625 + }, + { + "epoch": 0.59, + "learning_rate": 3.539833939368554e-06, + "loss": 0.504, + "regression_loss": 0.0, + "step": 7151, + "text_loss": 0.55078125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5385853833939177e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 7152, + "text_loss": 0.48828125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5373369270509806e-06, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 7153, + "text_loss": 0.435546875 + }, + { + "epoch": 0.59, + "learning_rate": 3.5360885704248583e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 7154, + "text_loss": 0.263671875 + }, + { + "epoch": 0.59, + "learning_rate": 3.534840313600656e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 7155, + "text_loss": 0.486328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.533592156663474e-06, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 7156, + "text_loss": 0.486328125 + }, + { + "epoch": 0.59, + "learning_rate": 3.5323440996984033e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 7157, + "text_loss": 0.65234375 + }, + { + "epoch": 0.59, + "learning_rate": 3.5310961427905335e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 7158, + "text_loss": 0.4375 + }, + { + "epoch": 0.59, + "learning_rate": 3.529848286024942e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 7159, + "text_loss": 0.4453125 + }, + { + "epoch": 0.6, + "learning_rate": 3.528600529486701e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7160, + "text_loss": 0.35546875 + }, + { + "epoch": 0.6, + "learning_rate": 3.527352873260875e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 7161, + "text_loss": 0.69921875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5261053174325265e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 7162, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.6, + "learning_rate": 3.524857862086702e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 7163, + "text_loss": 0.4921875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5236105073084527e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 7164, + "text_loss": 0.68359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.5223632531828112e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 7165, + "text_loss": 0.31640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.521116099794812e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 7166, + "text_loss": 0.546875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5198690472294795e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 7167, + "text_loss": 0.92578125 + }, + { + "epoch": 0.6, + "learning_rate": 3.518622095571831e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 7168, + "text_loss": 0.482421875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5173752449068766e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 7169, + "text_loss": 0.76171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5161284953196205e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 7170, + "text_loss": 0.5546875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5148818468950576e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 7171, + "text_loss": 0.62890625 + }, + { + "epoch": 0.6, + "learning_rate": 3.5136352997181822e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 7172, + "text_loss": 0.87109375 + }, + { + "epoch": 0.6, + "learning_rate": 3.5123888538739747e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 7173, + "text_loss": 0.59765625 + }, + { + "epoch": 0.6, + "learning_rate": 3.511142509447412e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 7174, + "text_loss": 0.3203125 + }, + { + "epoch": 0.6, + "learning_rate": 3.5098962665234624e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 7175, + "text_loss": 0.36328125 + }, + { + "epoch": 0.6, + "learning_rate": 3.5086501251870907e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 7176, + "text_loss": 0.453125 + }, + { + "epoch": 0.6, + "learning_rate": 3.5074040855232486e-06, + "loss": 0.4833, + "regression_loss": 0.0, + "step": 7177, + "text_loss": 0.6328125 + }, + { + "epoch": 0.6, + "learning_rate": 3.5061581476168906e-06, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 7178, + "text_loss": 0.51171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.5049123115529503e-06, + "loss": 0.5538, + "regression_loss": 0.0, + "step": 7179, + "text_loss": 0.5 + }, + { + "epoch": 0.6, + "learning_rate": 3.50366657741637e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 7180, + "text_loss": 0.5078125 + }, + { + "epoch": 0.6, + "learning_rate": 3.502420945292072e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 7181, + "text_loss": 0.42578125 + }, + { + "epoch": 0.6, + "learning_rate": 3.5011754152649806e-06, + "loss": 0.4569, + "regression_loss": 0.0, + "step": 7182, + "text_loss": 0.58203125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4999299874200066e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 7183, + "text_loss": 0.52734375 + }, + { + "epoch": 0.6, + "learning_rate": 3.498684661842061e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 7184, + "text_loss": 0.51171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.497439438616038e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 7185, + "text_loss": 0.486328125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4961943178268356e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 7186, + "text_loss": 0.5390625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4949492995593365e-06, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 7187, + "text_loss": 0.400390625 + }, + { + "epoch": 0.6, + "learning_rate": 3.493704383898422e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 7188, + "text_loss": 0.54296875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4924595709289623e-06, + "loss": 0.6279, + "regression_loss": 0.0, + "step": 7189, + "text_loss": 0.6953125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4912148607358233e-06, + "loss": 0.406, + "regression_loss": 0.0, + "step": 7190, + "text_loss": 0.236328125 + }, + { + "epoch": 0.6, + "learning_rate": 3.48997025340386e-06, + "loss": 0.5297, + "regression_loss": 0.0, + "step": 7191, + "text_loss": 0.54296875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4887257490179273e-06, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 7192, + "text_loss": 0.326171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4874813476628684e-06, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 7193, + "text_loss": 0.640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.486237049423518e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 7194, + "text_loss": 0.443359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.484992854384709e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 7195, + "text_loss": 0.390625 + }, + { + "epoch": 0.6, + "learning_rate": 3.483748762631259e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 7196, + "text_loss": 0.6015625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4825047742479908e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 7197, + "text_loss": 0.5859375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4812608893197074e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 7198, + "text_loss": 0.5078125 + }, + { + "epoch": 0.6, + "learning_rate": 3.480017107931214e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 7199, + "text_loss": 0.2421875 + }, + { + "epoch": 0.6, + "learning_rate": 3.478773430167302e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 7200, + "text_loss": 0.62890625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4775298561127626e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 7201, + "text_loss": 0.5625 + }, + { + "epoch": 0.6, + "learning_rate": 3.476286385852371e-06, + "loss": 0.613, + "regression_loss": 0.0, + "step": 7202, + "text_loss": 0.6796875 + }, + { + "epoch": 0.6, + "learning_rate": 3.475043019470908e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 7203, + "text_loss": 0.55859375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4737997570531322e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 7204, + "text_loss": 0.54296875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4725565986838083e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7205, + "text_loss": 0.6640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.471313544447686e-06, + "loss": 0.4503, + "regression_loss": 0.0, + "step": 7206, + "text_loss": 0.41796875 + }, + { + "epoch": 0.6, + "learning_rate": 3.470070594429511e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7207, + "text_loss": 0.52734375 + }, + { + "epoch": 0.6, + "learning_rate": 3.468827748714021e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 7208, + "text_loss": 0.703125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4675850073859476e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 7209, + "text_loss": 0.76171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.466342370530011e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 7210, + "text_loss": 0.26171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4650998382309326e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 7211, + "text_loss": 0.337890625 + }, + { + "epoch": 0.6, + "learning_rate": 3.463857410573419e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 7212, + "text_loss": 0.47265625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4626150876421737e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 7213, + "text_loss": 0.38671875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4613728695218906e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 7214, + "text_loss": 0.29296875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4601307562972597e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 7215, + "text_loss": 0.44140625 + }, + { + "epoch": 0.6, + "learning_rate": 3.458888748052959e-06, + "loss": 0.5275, + "regression_loss": 0.0, + "step": 7216, + "text_loss": 0.56640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4576468448736656e-06, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 7217, + "text_loss": 0.462890625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4564050468440424e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 7218, + "text_loss": 0.546875 + }, + { + "epoch": 0.6, + "learning_rate": 3.455163354048753e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 7219, + "text_loss": 0.38671875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4539217665724457e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 7220, + "text_loss": 0.4375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4526802844997677e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 7221, + "text_loss": 0.25 + }, + { + "epoch": 0.6, + "learning_rate": 3.451438907915354e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 7222, + "text_loss": 0.369140625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4501976369038417e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 7223, + "text_loss": 0.357421875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4489564715498456e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 7224, + "text_loss": 0.55078125 + }, + { + "epoch": 0.6, + "learning_rate": 3.447715411937989e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 7225, + "text_loss": 0.5625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4464744581528774e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 7226, + "text_loss": 0.49609375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4452336102791145e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 7227, + "text_loss": 0.4765625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4439928684012924e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 7228, + "text_loss": 0.478515625 + }, + { + "epoch": 0.6, + "learning_rate": 3.442752232604002e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 7229, + "text_loss": 0.50390625 + }, + { + "epoch": 0.6, + "learning_rate": 3.441511702971818e-06, + "loss": 0.4248, + "regression_loss": 0.0, + "step": 7230, + "text_loss": 0.26171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4402712795893202e-06, + "loss": 0.4948, + "regression_loss": 0.0, + "step": 7231, + "text_loss": 0.66796875 + }, + { + "epoch": 0.6, + "learning_rate": 3.439030962541069e-06, + "loss": 0.6162, + "regression_loss": 0.0, + "step": 7232, + "text_loss": 0.72265625 + }, + { + "epoch": 0.6, + "learning_rate": 3.437790751911626e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 7233, + "text_loss": 0.6484375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4365506477855393e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 7234, + "text_loss": 0.443359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4353106502473555e-06, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 7235, + "text_loss": 0.421875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4340707593816085e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 7236, + "text_loss": 0.5703125 + }, + { + "epoch": 0.6, + "learning_rate": 3.432830975272833e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 7237, + "text_loss": 0.443359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4315912980055433e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 7238, + "text_loss": 0.796875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4303517276642595e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 7239, + "text_loss": 0.6484375 + }, + { + "epoch": 0.6, + "learning_rate": 3.429112264333489e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 7240, + "text_loss": 0.4609375 + }, + { + "epoch": 0.6, + "learning_rate": 3.427872908097729e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 7241, + "text_loss": 0.73828125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4266336590414773e-06, + "loss": 0.3926, + "regression_loss": 0.0, + "step": 7242, + "text_loss": 0.287109375 + }, + { + "epoch": 0.6, + "learning_rate": 3.425394517249213e-06, + "loss": 0.4679, + "regression_loss": 0.0, + "step": 7243, + "text_loss": 0.83984375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4241554828054214e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 7244, + "text_loss": 0.404296875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4229165557945676e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 7245, + "text_loss": 0.55078125 + }, + { + "epoch": 0.6, + "learning_rate": 3.421677736301119e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 7246, + "text_loss": 0.31640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.42043902440953e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 7247, + "text_loss": 0.380859375 + }, + { + "epoch": 0.6, + "learning_rate": 3.41920042020425e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 7248, + "text_loss": 0.5390625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4179619237697194e-06, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 7249, + "text_loss": 0.5625 + }, + { + "epoch": 0.6, + "learning_rate": 3.416723535190376e-06, + "loss": 0.4637, + "regression_loss": 0.0, + "step": 7250, + "text_loss": 0.38671875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4154852545506432e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 7251, + "text_loss": 0.341796875 + }, + { + "epoch": 0.6, + "learning_rate": 3.4142470819349434e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 7252, + "text_loss": 0.46484375 + }, + { + "epoch": 0.6, + "learning_rate": 3.4130090174276857e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 7253, + "text_loss": 0.62109375 + }, + { + "epoch": 0.6, + "learning_rate": 3.411771061113278e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 7254, + "text_loss": 0.267578125 + }, + { + "epoch": 0.6, + "learning_rate": 3.4105332130761147e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 7255, + "text_loss": 0.2578125 + }, + { + "epoch": 0.6, + "learning_rate": 3.409295473400589e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 7256, + "text_loss": 0.58203125 + }, + { + "epoch": 0.6, + "learning_rate": 3.408057842171082e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 7257, + "text_loss": 0.447265625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4068203194719696e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 7258, + "text_loss": 0.56640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4055829053876177e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 7259, + "text_loss": 0.349609375 + }, + { + "epoch": 0.6, + "learning_rate": 3.40434560000239e-06, + "loss": 0.5167, + "regression_loss": 0.0, + "step": 7260, + "text_loss": 0.56640625 + }, + { + "epoch": 0.6, + "learning_rate": 3.4031084034006357e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 7261, + "text_loss": 0.51953125 + }, + { + "epoch": 0.6, + "learning_rate": 3.401871315666706e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 7262, + "text_loss": 0.58984375 + }, + { + "epoch": 0.6, + "learning_rate": 3.400634336884933e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 7263, + "text_loss": 0.373046875 + }, + { + "epoch": 0.6, + "learning_rate": 3.3993974671396523e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 7264, + "text_loss": 0.451171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.3981607065151844e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 7265, + "text_loss": 0.349609375 + }, + { + "epoch": 0.6, + "learning_rate": 3.396924055095847e-06, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 7266, + "text_loss": 0.51171875 + }, + { + "epoch": 0.6, + "learning_rate": 3.3956875129659474e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 7267, + "text_loss": 1.015625 + }, + { + "epoch": 0.6, + "learning_rate": 3.3944510802097875e-06, + "loss": 0.4818, + "regression_loss": 0.0, + "step": 7268, + "text_loss": 0.43359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.393214756911659e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 7269, + "text_loss": 0.484375 + }, + { + "epoch": 0.6, + "learning_rate": 3.3919785431558516e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 7270, + "text_loss": 0.255859375 + }, + { + "epoch": 0.6, + "learning_rate": 3.3907424390266408e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 7271, + "text_loss": 0.46875 + }, + { + "epoch": 0.6, + "learning_rate": 3.3895064446082992e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 7272, + "text_loss": 0.373046875 + }, + { + "epoch": 0.6, + "learning_rate": 3.3882705599850907e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7273, + "text_loss": 0.396484375 + }, + { + "epoch": 0.6, + "learning_rate": 3.387034785241271e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 7274, + "text_loss": 0.494140625 + }, + { + "epoch": 0.6, + "learning_rate": 3.3857991204610875e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 7275, + "text_loss": 0.43359375 + }, + { + "epoch": 0.6, + "learning_rate": 3.384563565728785e-06, + "loss": 0.45, + "regression_loss": 0.0, + "step": 7276, + "text_loss": 0.474609375 + }, + { + "epoch": 0.6, + "learning_rate": 3.383328121128593e-06, + "loss": 0.594, + "regression_loss": 0.0, + "step": 7277, + "text_loss": 0.51953125 + }, + { + "epoch": 0.6, + "learning_rate": 3.3820927867447407e-06, + "loss": 0.5526, + "regression_loss": 0.0, + "step": 7278, + "text_loss": 0.5859375 + }, + { + "epoch": 0.6, + "learning_rate": 3.380857562661446e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 7279, + "text_loss": 0.703125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3796224489629205e-06, + "loss": 0.6694, + "regression_loss": 0.0, + "step": 7280, + "text_loss": 0.625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3783874457333664e-06, + "loss": 0.5016, + "regression_loss": 0.0, + "step": 7281, + "text_loss": 0.72265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3771525530569826e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 7282, + "text_loss": 0.4453125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3759177710179536e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 7283, + "text_loss": 0.66015625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3746830997004643e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 7284, + "text_loss": 0.51171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.373448539188686e-06, + "loss": 0.4572, + "regression_loss": 0.0, + "step": 7285, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3722140895667856e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 7286, + "text_loss": 0.6015625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3709797509189223e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 7287, + "text_loss": 0.6015625 + }, + { + "epoch": 0.61, + "learning_rate": 3.369745523329243e-06, + "loss": 0.3923, + "regression_loss": 0.0, + "step": 7288, + "text_loss": 0.345703125 + }, + { + "epoch": 0.61, + "learning_rate": 3.368511406881897e-06, + "loss": 0.6855, + "regression_loss": 0.0, + "step": 7289, + "text_loss": 0.58203125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3672774016610156e-06, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 7290, + "text_loss": 0.439453125 + }, + { + "epoch": 0.61, + "learning_rate": 3.366043507750729e-06, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 7291, + "text_loss": 0.67578125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3648097252351562e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 7292, + "text_loss": 0.41015625 + }, + { + "epoch": 0.61, + "learning_rate": 3.363576054198412e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 7293, + "text_loss": 0.625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3623424947245975e-06, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 7294, + "text_loss": 0.72265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3611090468978163e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 7295, + "text_loss": 0.37890625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3598757108021546e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 7296, + "text_loss": 0.53125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3586424865216966e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 7297, + "text_loss": 0.41796875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3574093741405157e-06, + "loss": 0.45, + "regression_loss": 0.0, + "step": 7298, + "text_loss": 0.248046875 + }, + { + "epoch": 0.61, + "learning_rate": 3.356176373742681e-06, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 7299, + "text_loss": 0.5625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3549434854122488e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 7300, + "text_loss": 0.482421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3537107092332766e-06, + "loss": 0.5928, + "regression_loss": 0.0, + "step": 7301, + "text_loss": 0.5390625 + }, + { + "epoch": 0.61, + "learning_rate": 3.352478045289802e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 7302, + "text_loss": 0.51171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3512454936658674e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7303, + "text_loss": 0.70703125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3500130544454976e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 7304, + "text_loss": 0.64453125 + }, + { + "epoch": 0.61, + "learning_rate": 3.348780727712718e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 7305, + "text_loss": 0.546875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3475485135515386e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 7306, + "text_loss": 0.478515625 + }, + { + "epoch": 0.61, + "learning_rate": 3.346316412045968e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 7307, + "text_loss": 0.734375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3450844232800006e-06, + "loss": 0.4275, + "regression_loss": 0.0, + "step": 7308, + "text_loss": 0.357421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3438525473376326e-06, + "loss": 0.439, + "regression_loss": 0.0, + "step": 7309, + "text_loss": 0.458984375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3426207843028435e-06, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 7310, + "text_loss": 0.60546875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3413891342596093e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 7311, + "text_loss": 0.50390625 + }, + { + "epoch": 0.61, + "learning_rate": 3.340157597291897e-06, + "loss": 0.6108, + "regression_loss": 0.0, + "step": 7312, + "text_loss": 0.6875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3389261734836676e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 7313, + "text_loss": 0.30078125 + }, + { + "epoch": 0.61, + "learning_rate": 3.337694862918871e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 7314, + "text_loss": 0.6875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3364636656814553e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 7315, + "text_loss": 0.44921875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3352325818553523e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 7316, + "text_loss": 0.640625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3340016115244957e-06, + "loss": 0.6187, + "regression_loss": 0.0, + "step": 7317, + "text_loss": 0.796875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3327707547728037e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 7318, + "text_loss": 0.396484375 + }, + { + "epoch": 0.61, + "learning_rate": 3.331540011684191e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 7319, + "text_loss": 0.421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3303093823425626e-06, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 7320, + "text_loss": 0.474609375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3290788668318175e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 7321, + "text_loss": 0.376953125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3278484652358433e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 7322, + "text_loss": 0.46484375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3266181776385263e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 7323, + "text_loss": 0.357421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3253880041237385e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 7324, + "text_loss": 0.390625 + }, + { + "epoch": 0.61, + "learning_rate": 3.324157944775348e-06, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 7325, + "text_loss": 0.275390625 + }, + { + "epoch": 0.61, + "learning_rate": 3.322927999677212e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 7326, + "text_loss": 0.466796875 + }, + { + "epoch": 0.61, + "learning_rate": 3.321698168913185e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 7327, + "text_loss": 0.419921875 + }, + { + "epoch": 0.61, + "learning_rate": 3.320468452567106e-06, + "loss": 0.6423, + "regression_loss": 0.0, + "step": 7328, + "text_loss": 0.47265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3192388507228156e-06, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 7329, + "text_loss": 0.5625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3180093634641385e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 7330, + "text_loss": 0.49609375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3167799908748967e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 7331, + "text_loss": 0.546875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3155507330389004e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 7332, + "text_loss": 0.416015625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3143215900399545e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 7333, + "text_loss": 0.365234375 + }, + { + "epoch": 0.61, + "learning_rate": 3.313092561961858e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 7334, + "text_loss": 0.57421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3118636488883972e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 7335, + "text_loss": 0.59765625 + }, + { + "epoch": 0.61, + "learning_rate": 3.310634850903355e-06, + "loss": 0.6255, + "regression_loss": 0.0, + "step": 7336, + "text_loss": 0.44921875 + }, + { + "epoch": 0.61, + "learning_rate": 3.309406168090502e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 7337, + "text_loss": 0.3359375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3081776005336058e-06, + "loss": 0.6106, + "regression_loss": 0.0, + "step": 7338, + "text_loss": 0.66796875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3069491483164206e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 7339, + "text_loss": 0.6171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.3057208115227017e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 7340, + "text_loss": 0.47265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3044925902361836e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 7341, + "text_loss": 0.3515625 + }, + { + "epoch": 0.61, + "learning_rate": 3.3032644845406058e-06, + "loss": 0.6331, + "regression_loss": 0.0, + "step": 7342, + "text_loss": 0.65234375 + }, + { + "epoch": 0.61, + "learning_rate": 3.3020364945196915e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 7343, + "text_loss": 0.39453125 + }, + { + "epoch": 0.61, + "learning_rate": 3.3008086202571598e-06, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 7344, + "text_loss": 0.36328125 + }, + { + "epoch": 0.61, + "learning_rate": 3.29958086183672e-06, + "loss": 0.4823, + "regression_loss": 0.0, + "step": 7345, + "text_loss": 0.6171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.298353219342074e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 7346, + "text_loss": 0.58984375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2971256928569163e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 7347, + "text_loss": 0.66796875 + }, + { + "epoch": 0.61, + "learning_rate": 3.295898282464935e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 7348, + "text_loss": 0.578125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2946709882498063e-06, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 7349, + "text_loss": 0.5859375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2934438102952037e-06, + "loss": 0.623, + "regression_loss": 0.0, + "step": 7350, + "text_loss": 0.3125 + }, + { + "epoch": 0.61, + "learning_rate": 3.292216748684786e-06, + "loss": 0.562, + "regression_loss": 0.0, + "step": 7351, + "text_loss": 0.357421875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2909898035022112e-06, + "loss": 0.571, + "regression_loss": 0.0, + "step": 7352, + "text_loss": 0.58203125 + }, + { + "epoch": 0.61, + "learning_rate": 3.289762974831123e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 7353, + "text_loss": 0.515625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2885362627551654e-06, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 7354, + "text_loss": 0.62109375 + }, + { + "epoch": 0.61, + "learning_rate": 3.287309667357962e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 7355, + "text_loss": 0.419921875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2860831887231427e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 7356, + "text_loss": 0.55078125 + }, + { + "epoch": 0.61, + "learning_rate": 3.284856826934317e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 7357, + "text_loss": 0.5703125 + }, + { + "epoch": 0.61, + "learning_rate": 3.283630582075097e-06, + "loss": 0.5126, + "regression_loss": 0.0, + "step": 7358, + "text_loss": 0.53515625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2824044542290762e-06, + "loss": 0.4713, + "regression_loss": 0.0, + "step": 7359, + "text_loss": 0.59375 + }, + { + "epoch": 0.61, + "learning_rate": 3.281178443479852e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 7360, + "text_loss": 0.5078125 + }, + { + "epoch": 0.61, + "learning_rate": 3.279952549911001e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 7361, + "text_loss": 0.5078125 + }, + { + "epoch": 0.61, + "learning_rate": 3.278726773606103e-06, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 7362, + "text_loss": 0.310546875 + }, + { + "epoch": 0.61, + "learning_rate": 3.277501114648722e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 7363, + "text_loss": 0.7890625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2762755731224205e-06, + "loss": 0.5769, + "regression_loss": 0.0, + "step": 7364, + "text_loss": 0.828125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2750501491107467e-06, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 7365, + "text_loss": 0.26171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2738248426972453e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 7366, + "text_loss": 0.58203125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2725996539654483e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 7367, + "text_loss": 0.59765625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2713745829988874e-06, + "loss": 0.407, + "regression_loss": 0.0, + "step": 7368, + "text_loss": 0.322265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2701496298810788e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 7369, + "text_loss": 0.443359375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2689247946955344e-06, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 7370, + "text_loss": 0.369140625 + }, + { + "epoch": 0.61, + "learning_rate": 3.267700077525756e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 7371, + "text_loss": 0.2890625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2664754784552397e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 7372, + "text_loss": 0.396484375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2652509975674705e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 7373, + "text_loss": 0.470703125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2640266349459315e-06, + "loss": 0.4514, + "regression_loss": 0.0, + "step": 7374, + "text_loss": 0.265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.262802390674087e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 7375, + "text_loss": 0.48046875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2615782648354055e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 7376, + "text_loss": 0.373046875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2603542575133385e-06, + "loss": 0.4126, + "regression_loss": 0.0, + "step": 7377, + "text_loss": 0.4296875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2591303687913332e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 7378, + "text_loss": 0.4765625 + }, + { + "epoch": 0.61, + "learning_rate": 3.257906598752828e-06, + "loss": 0.4231, + "regression_loss": 0.0, + "step": 7379, + "text_loss": 0.375 + }, + { + "epoch": 0.61, + "learning_rate": 3.256682947481252e-06, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 7380, + "text_loss": 0.5546875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2554594150600303e-06, + "loss": 0.5774, + "regression_loss": 0.0, + "step": 7381, + "text_loss": 0.51171875 + }, + { + "epoch": 0.61, + "learning_rate": 3.254236001572575e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 7382, + "text_loss": 0.388671875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2530127071022933e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 7383, + "text_loss": 0.31640625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2517895317325808e-06, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 7384, + "text_loss": 0.263671875 + }, + { + "epoch": 0.61, + "learning_rate": 3.2505664755468304e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 7385, + "text_loss": 0.74609375 + }, + { + "epoch": 0.61, + "learning_rate": 3.24934353862842e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 7386, + "text_loss": 0.51953125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2481207210607267e-06, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 7387, + "text_loss": 0.69140625 + }, + { + "epoch": 0.61, + "learning_rate": 3.246898022927114e-06, + "loss": 0.5094, + "regression_loss": 0.0, + "step": 7388, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2456754443109407e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 7389, + "text_loss": 0.43359375 + }, + { + "epoch": 0.61, + "learning_rate": 3.244452985295553e-06, + "loss": 0.5503, + "regression_loss": 0.0, + "step": 7390, + "text_loss": 0.62109375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2432306459642947e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 7391, + "text_loss": 0.53125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2420084264004966e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7392, + "text_loss": 0.455078125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2407863266874864e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 7393, + "text_loss": 0.458984375 + }, + { + "epoch": 0.61, + "learning_rate": 3.2395643469085753e-06, + "loss": 0.4272, + "regression_loss": 0.0, + "step": 7394, + "text_loss": 0.65234375 + }, + { + "epoch": 0.61, + "learning_rate": 3.238342487147077e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 7395, + "text_loss": 0.36328125 + }, + { + "epoch": 0.61, + "learning_rate": 3.2371207474862874e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 7396, + "text_loss": 0.56640625 + }, + { + "epoch": 0.61, + "learning_rate": 3.235899128009502e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 7397, + "text_loss": 0.47265625 + }, + { + "epoch": 0.61, + "learning_rate": 3.2346776288000005e-06, + "loss": 0.4315, + "regression_loss": 0.0, + "step": 7398, + "text_loss": 0.431640625 + }, + { + "epoch": 0.61, + "learning_rate": 3.233456249941064e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 7399, + "text_loss": 0.65625 + }, + { + "epoch": 0.62, + "learning_rate": 3.232234991515953e-06, + "loss": 0.6147, + "regression_loss": 0.0, + "step": 7400, + "text_loss": 1.1875 + }, + { + "epoch": 0.62, + "learning_rate": 3.2310138536079315e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 7401, + "text_loss": 0.56640625 + }, + { + "epoch": 0.62, + "learning_rate": 3.229792836300249e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 7402, + "text_loss": 0.5703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.2285719396761486e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 7403, + "text_loss": 0.62890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.227351163818863e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 7404, + "text_loss": 0.294921875 + }, + { + "epoch": 0.62, + "learning_rate": 3.226130508811621e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 7405, + "text_loss": 0.408203125 + }, + { + "epoch": 0.62, + "learning_rate": 3.2249099747376367e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 7406, + "text_loss": 0.68359375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2236895616801246e-06, + "loss": 0.6086, + "regression_loss": 0.0, + "step": 7407, + "text_loss": 0.640625 + }, + { + "epoch": 0.62, + "learning_rate": 3.2224692697222826e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 7408, + "text_loss": 0.6484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2212490989473053e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 7409, + "text_loss": 0.5859375 + }, + { + "epoch": 0.62, + "learning_rate": 3.220029049438378e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 7410, + "text_loss": 0.796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.2188091212786763e-06, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 7411, + "text_loss": 0.33984375 + }, + { + "epoch": 0.62, + "learning_rate": 3.217589314551368e-06, + "loss": 0.6057, + "regression_loss": 0.0, + "step": 7412, + "text_loss": 0.609375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2163696293396175e-06, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 7413, + "text_loss": 0.6953125 + }, + { + "epoch": 0.62, + "learning_rate": 3.2151500657265698e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 7414, + "text_loss": 0.359375 + }, + { + "epoch": 0.62, + "learning_rate": 3.213930623795375e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 7415, + "text_loss": 0.26171875 + }, + { + "epoch": 0.62, + "learning_rate": 3.2127113036291636e-06, + "loss": 0.4576, + "regression_loss": 0.0, + "step": 7416, + "text_loss": 0.4609375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2114921053110658e-06, + "loss": 0.3785, + "regression_loss": 0.0, + "step": 7417, + "text_loss": 0.345703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.210273028924198e-06, + "loss": 0.389, + "regression_loss": 0.0, + "step": 7418, + "text_loss": 0.34765625 + }, + { + "epoch": 0.62, + "learning_rate": 3.2090540745516725e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 7419, + "text_loss": 0.65234375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2078352422765878e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 7420, + "text_loss": 0.73828125 + }, + { + "epoch": 0.62, + "learning_rate": 3.206616532182042e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 7421, + "text_loss": 0.5078125 + }, + { + "epoch": 0.62, + "learning_rate": 3.205397944351118e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 7422, + "text_loss": 0.625 + }, + { + "epoch": 0.62, + "learning_rate": 3.204179478866894e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 7423, + "text_loss": 0.35546875 + }, + { + "epoch": 0.62, + "learning_rate": 3.202961135812437e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 7424, + "text_loss": 0.462890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.2017429152708093e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 7425, + "text_loss": 0.609375 + }, + { + "epoch": 0.62, + "learning_rate": 3.2005248173250593e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 7426, + "text_loss": 0.275390625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1993068420582353e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 7427, + "text_loss": 0.62109375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1980889895533705e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 7428, + "text_loss": 0.42578125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1968712598934913e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 7429, + "text_loss": 0.3203125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1956536531616178e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 7430, + "text_loss": 0.71484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1944361694407566e-06, + "loss": 0.4393, + "regression_loss": 0.0, + "step": 7431, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.62, + "learning_rate": 3.193218808813914e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 7432, + "text_loss": 0.703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1920015713640806e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 7433, + "text_loss": 0.53515625 + }, + { + "epoch": 0.62, + "learning_rate": 3.190784457174243e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 7434, + "text_loss": 0.78125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1895674663273755e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 7435, + "text_loss": 0.6875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1883505989064485e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 7436, + "text_loss": 0.62890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1871338549944185e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 7437, + "text_loss": 0.52734375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1859172346742428e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 7438, + "text_loss": 0.69921875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1847007380288577e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 7439, + "text_loss": 0.341796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1834843651412017e-06, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 7440, + "text_loss": 0.57421875 + }, + { + "epoch": 0.62, + "learning_rate": 3.182268116094199e-06, + "loss": 0.4141, + "regression_loss": 0.0, + "step": 7441, + "text_loss": 0.283203125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1810519909707684e-06, + "loss": 0.4192, + "regression_loss": 0.0, + "step": 7442, + "text_loss": 0.314453125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1798359898538177e-06, + "loss": 0.4119, + "regression_loss": 0.0, + "step": 7443, + "text_loss": 0.396484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1786201128262494e-06, + "loss": 0.5782, + "regression_loss": 0.0, + "step": 7444, + "text_loss": 0.48828125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1774043599709527e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 7445, + "text_loss": 0.59375 + }, + { + "epoch": 0.62, + "learning_rate": 3.176188731370815e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 7446, + "text_loss": 0.53515625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1749732271087095e-06, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 7447, + "text_loss": 0.259765625 + }, + { + "epoch": 0.62, + "learning_rate": 3.173757847267504e-06, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 7448, + "text_loss": 0.5 + }, + { + "epoch": 0.62, + "learning_rate": 3.1725425919300554e-06, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 7449, + "text_loss": 0.44140625 + }, + { + "epoch": 0.62, + "learning_rate": 3.171327461179215e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 7450, + "text_loss": 0.73046875 + }, + { + "epoch": 0.62, + "learning_rate": 3.170112455097821e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 7451, + "text_loss": 0.56640625 + }, + { + "epoch": 0.62, + "learning_rate": 3.168897573768713e-06, + "loss": 0.496, + "regression_loss": 0.0, + "step": 7452, + "text_loss": 0.3828125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1676828172747076e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 7453, + "text_loss": 0.6171875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1664681856986258e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 7454, + "text_loss": 0.4921875 + }, + { + "epoch": 0.62, + "learning_rate": 3.165253679123272e-06, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 7455, + "text_loss": 0.57421875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1640392976314472e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 7456, + "text_loss": 0.43359375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1628250413059395e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 7457, + "text_loss": 0.40625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1616109102295336e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 7458, + "text_loss": 0.50390625 + }, + { + "epoch": 0.62, + "learning_rate": 3.160396904484998e-06, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 7459, + "text_loss": 0.396484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1591830241551023e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 7460, + "text_loss": 0.703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1579692693225996e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 7461, + "text_loss": 0.71875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1567556400702394e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 7462, + "text_loss": 0.490234375 + }, + { + "epoch": 0.62, + "learning_rate": 3.155542136480758e-06, + "loss": 0.5898, + "regression_loss": 0.0, + "step": 7463, + "text_loss": 0.58203125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1543287586368887e-06, + "loss": 0.4226, + "regression_loss": 0.0, + "step": 7464, + "text_loss": 0.609375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1531155066213504e-06, + "loss": 0.4979, + "regression_loss": 0.0, + "step": 7465, + "text_loss": 0.6171875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1519023805168604e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 7466, + "text_loss": 0.443359375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1506893804061178e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 7467, + "text_loss": 0.65234375 + }, + { + "epoch": 0.62, + "learning_rate": 3.149476506371825e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 7468, + "text_loss": 0.6328125 + }, + { + "epoch": 0.62, + "learning_rate": 3.148263758496664e-06, + "loss": 0.5585, + "regression_loss": 0.0, + "step": 7469, + "text_loss": 0.72265625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1470511368633173e-06, + "loss": 0.5009, + "regression_loss": 0.0, + "step": 7470, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1458386415544524e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 7471, + "text_loss": 0.62890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1446262726527354e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 7472, + "text_loss": 0.52734375 + }, + { + "epoch": 0.62, + "learning_rate": 3.143414030240813e-06, + "loss": 0.5284, + "regression_loss": 0.0, + "step": 7473, + "text_loss": 0.62890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1422019144013337e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 7474, + "text_loss": 0.36328125 + }, + { + "epoch": 0.62, + "learning_rate": 3.140989925216934e-06, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 7475, + "text_loss": 0.283203125 + }, + { + "epoch": 0.62, + "learning_rate": 3.139778062770238e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 7476, + "text_loss": 0.60546875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1385663271438684e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 7477, + "text_loss": 0.72265625 + }, + { + "epoch": 0.62, + "learning_rate": 3.13735471842043e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 7478, + "text_loss": 0.5390625 + }, + { + "epoch": 0.62, + "learning_rate": 3.136143236682528e-06, + "loss": 0.527, + "regression_loss": 0.0, + "step": 7479, + "text_loss": 0.69921875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1349318820127526e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 7480, + "text_loss": 0.341796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1337206544936905e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 7481, + "text_loss": 0.46484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1325095542079138e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 7482, + "text_loss": 0.4296875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1312985812379915e-06, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 7483, + "text_loss": 0.5703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1300877356664777e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 7484, + "text_loss": 0.7265625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1288770175759263e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 7485, + "text_loss": 0.54296875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1276664270488755e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 7486, + "text_loss": 0.63671875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1264559641678578e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 7487, + "text_loss": 0.96875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1252456290153952e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 7488, + "text_loss": 0.65625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1240354216740033e-06, + "loss": 0.4476, + "regression_loss": 0.0, + "step": 7489, + "text_loss": 0.6015625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1228253422261855e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 7490, + "text_loss": 0.59765625 + }, + { + "epoch": 0.62, + "learning_rate": 3.121615390754444e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 7491, + "text_loss": 0.63671875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1204055673412596e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 7492, + "text_loss": 0.6953125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1191958720691183e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 7493, + "text_loss": 0.451171875 + }, + { + "epoch": 0.62, + "learning_rate": 3.117986305020488e-06, + "loss": 0.5901, + "regression_loss": 0.0, + "step": 7494, + "text_loss": 0.41796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1167768662778307e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 7495, + "text_loss": 0.63671875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1155675559236005e-06, + "loss": 0.4916, + "regression_loss": 0.0, + "step": 7496, + "text_loss": 0.609375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1143583740402415e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 7497, + "text_loss": 0.65625 + }, + { + "epoch": 0.62, + "learning_rate": 3.113149320710188e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 7498, + "text_loss": 0.46484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.1119403960158707e-06, + "loss": 0.4053, + "regression_loss": 0.0, + "step": 7499, + "text_loss": 0.53515625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1107316000397042e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 7500, + "text_loss": 0.341796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.109522932864101e-06, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 7501, + "text_loss": 0.482421875 + }, + { + "epoch": 0.62, + "learning_rate": 3.108314394571458e-06, + "loss": 0.4363, + "regression_loss": 0.0, + "step": 7502, + "text_loss": 0.310546875 + }, + { + "epoch": 0.62, + "learning_rate": 3.107105985244171e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 7503, + "text_loss": 0.53515625 + }, + { + "epoch": 0.62, + "learning_rate": 3.1058977049646192e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7504, + "text_loss": 0.5 + }, + { + "epoch": 0.62, + "learning_rate": 3.1046895538151818e-06, + "loss": 0.515, + "regression_loss": 0.0, + "step": 7505, + "text_loss": 0.41796875 + }, + { + "epoch": 0.62, + "learning_rate": 3.1034815318782195e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 7506, + "text_loss": 0.51953125 + }, + { + "epoch": 0.62, + "learning_rate": 3.102273639236093e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 7507, + "text_loss": 0.61328125 + }, + { + "epoch": 0.62, + "learning_rate": 3.1010658759711477e-06, + "loss": 0.4559, + "regression_loss": 0.0, + "step": 7508, + "text_loss": 0.52734375 + }, + { + "epoch": 0.62, + "learning_rate": 3.099858242165723e-06, + "loss": 0.4192, + "regression_loss": 0.0, + "step": 7509, + "text_loss": 0.375 + }, + { + "epoch": 0.62, + "learning_rate": 3.098650737902148e-06, + "loss": 0.4158, + "regression_loss": 0.0, + "step": 7510, + "text_loss": 0.3828125 + }, + { + "epoch": 0.62, + "learning_rate": 3.097443363262749e-06, + "loss": 0.5006, + "regression_loss": 0.0, + "step": 7511, + "text_loss": 0.462890625 + }, + { + "epoch": 0.62, + "learning_rate": 3.0962361183298313e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 7512, + "text_loss": 0.5078125 + }, + { + "epoch": 0.62, + "learning_rate": 3.0950290031857042e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 7513, + "text_loss": 0.451171875 + }, + { + "epoch": 0.62, + "learning_rate": 3.0938220179126603e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 7514, + "text_loss": 0.46484375 + }, + { + "epoch": 0.62, + "learning_rate": 3.0926151625929866e-06, + "loss": 0.5416, + "regression_loss": 0.0, + "step": 7515, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.62, + "learning_rate": 3.0914084373089593e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 7516, + "text_loss": 0.63671875 + }, + { + "epoch": 0.62, + "learning_rate": 3.090201842142848e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 7517, + "text_loss": 0.470703125 + }, + { + "epoch": 0.62, + "learning_rate": 3.0889953771769092e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 7518, + "text_loss": 0.486328125 + }, + { + "epoch": 0.62, + "learning_rate": 3.0877890424933977e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 7519, + "text_loss": 0.6875 + }, + { + "epoch": 0.62, + "learning_rate": 3.0865828381745515e-06, + "loss": 0.3867, + "regression_loss": 0.0, + "step": 7520, + "text_loss": 0.357421875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0853767643026056e-06, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 7521, + "text_loss": 0.69140625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0841708209597833e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 7522, + "text_loss": 0.7578125 + }, + { + "epoch": 0.63, + "learning_rate": 3.082965008228298e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 7523, + "text_loss": 0.416015625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0817593261903587e-06, + "loss": 0.4969, + "regression_loss": 0.0, + "step": 7524, + "text_loss": 0.4609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.080553774928161e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 7525, + "text_loss": 0.59765625 + }, + { + "epoch": 0.63, + "learning_rate": 3.079348354523894e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 7526, + "text_loss": 0.41015625 + }, + { + "epoch": 0.63, + "learning_rate": 3.078143065059735e-06, + "loss": 0.4219, + "regression_loss": 0.0, + "step": 7527, + "text_loss": 0.53125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0769379066178565e-06, + "loss": 0.7563, + "regression_loss": 0.0, + "step": 7528, + "text_loss": 0.71484375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0757328792804174e-06, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 7529, + "text_loss": 0.3984375 + }, + { + "epoch": 0.63, + "learning_rate": 3.074527983129575e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 7530, + "text_loss": 0.416015625 + }, + { + "epoch": 0.63, + "learning_rate": 3.073323218247467e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 7531, + "text_loss": 0.54296875 + }, + { + "epoch": 0.63, + "learning_rate": 3.072118584716233e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 7532, + "text_loss": 0.70703125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0709140826179962e-06, + "loss": 0.4559, + "regression_loss": 0.0, + "step": 7533, + "text_loss": 0.609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.069709712034874e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 7534, + "text_loss": 0.609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0685054730489727e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 7535, + "text_loss": 0.45703125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0673013657423943e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 7536, + "text_loss": 0.3203125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0660973901972242e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 7537, + "text_loss": 0.5234375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0648935464955475e-06, + "loss": 0.527, + "regression_loss": 0.0, + "step": 7538, + "text_loss": 0.2119140625 + }, + { + "epoch": 0.63, + "learning_rate": 3.063689834719433e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 7539, + "text_loss": 0.796875 + }, + { + "epoch": 0.63, + "learning_rate": 3.062486254950947e-06, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 7540, + "text_loss": 0.7890625 + }, + { + "epoch": 0.63, + "learning_rate": 3.061282807272139e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 7541, + "text_loss": 0.3046875 + }, + { + "epoch": 0.63, + "learning_rate": 3.060079491765058e-06, + "loss": 0.6213, + "regression_loss": 0.0, + "step": 7542, + "text_loss": 0.60546875 + }, + { + "epoch": 0.63, + "learning_rate": 3.058876308511735e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 7543, + "text_loss": 0.58203125 + }, + { + "epoch": 0.63, + "learning_rate": 3.057673257594202e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 7544, + "text_loss": 0.306640625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0564703390944734e-06, + "loss": 0.5033, + "regression_loss": 0.0, + "step": 7545, + "text_loss": 0.404296875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0552675530945595e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 7546, + "text_loss": 0.478515625 + }, + { + "epoch": 0.63, + "learning_rate": 3.05406489967646e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 7547, + "text_loss": 0.318359375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0528623789221645e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 7548, + "text_loss": 0.60546875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0516599909136545e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 7549, + "text_loss": 0.64453125 + }, + { + "epoch": 0.63, + "learning_rate": 3.050457735732906e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 7550, + "text_loss": 0.33984375 + }, + { + "epoch": 0.63, + "learning_rate": 3.049255613461877e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 7551, + "text_loss": 0.455078125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0480536241825263e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 7552, + "text_loss": 0.4375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0468517679767984e-06, + "loss": 0.4628, + "regression_loss": 0.0, + "step": 7553, + "text_loss": 0.4609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.045650044926629e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 7554, + "text_loss": 0.54296875 + }, + { + "epoch": 0.63, + "learning_rate": 3.044448455113945e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 7555, + "text_loss": 0.28125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0432469986206663e-06, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 7556, + "text_loss": 0.87890625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0420456755286985e-06, + "loss": 0.5853, + "regression_loss": 0.0, + "step": 7557, + "text_loss": 1.203125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0408444859199465e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 7558, + "text_loss": 0.44921875 + }, + { + "epoch": 0.63, + "learning_rate": 3.039643429876298e-06, + "loss": 0.4413, + "regression_loss": 0.0, + "step": 7559, + "text_loss": 0.1962890625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0384425074796355e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 7560, + "text_loss": 0.30859375 + }, + { + "epoch": 0.63, + "learning_rate": 3.037241718811831e-06, + "loss": 0.5731, + "regression_loss": 0.0, + "step": 7561, + "text_loss": 0.80859375 + }, + { + "epoch": 0.63, + "learning_rate": 3.03604106395475e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 7562, + "text_loss": 0.546875 + }, + { + "epoch": 0.63, + "learning_rate": 3.034840542990244e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 7563, + "text_loss": 0.703125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0336401560001622e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 7564, + "text_loss": 0.609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.032439903066336e-06, + "loss": 0.4207, + "regression_loss": 0.0, + "step": 7565, + "text_loss": 0.23046875 + }, + { + "epoch": 0.63, + "learning_rate": 3.031239784270597e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 7566, + "text_loss": 0.5859375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0300397996947604e-06, + "loss": 0.5104, + "regression_loss": 0.0, + "step": 7567, + "text_loss": 0.45703125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0288399494206345e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 7568, + "text_loss": 0.578125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0276402335300226e-06, + "loss": 0.4067, + "regression_loss": 0.0, + "step": 7569, + "text_loss": 0.408203125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0264406521047107e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 7570, + "text_loss": 0.78515625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0252412052264828e-06, + "loss": 0.5776, + "regression_loss": 0.0, + "step": 7571, + "text_loss": 0.5390625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0240418929771098e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 7572, + "text_loss": 0.51171875 + }, + { + "epoch": 0.63, + "learning_rate": 3.022842715438356e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 7573, + "text_loss": 0.4375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0216436726919717e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 7574, + "text_loss": 0.51171875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0204447648197078e-06, + "loss": 0.6248, + "regression_loss": 0.0, + "step": 7575, + "text_loss": 0.76953125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0192459919032923e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 7576, + "text_loss": 0.38671875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0180473540244566e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 7577, + "text_loss": 0.75390625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0168488512649146e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 7578, + "text_loss": 0.46484375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0156504837063766e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 7579, + "text_loss": 0.50390625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0144522514305386e-06, + "loss": 0.4464, + "regression_loss": 0.0, + "step": 7580, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.63, + "learning_rate": 3.013254154519092e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 7581, + "text_loss": 0.314453125 + }, + { + "epoch": 0.63, + "learning_rate": 3.012056193053714e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 7582, + "text_loss": 0.59375 + }, + { + "epoch": 0.63, + "learning_rate": 3.010858367116079e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 7583, + "text_loss": 0.4453125 + }, + { + "epoch": 0.63, + "learning_rate": 3.009660676787846e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 7584, + "text_loss": 0.3515625 + }, + { + "epoch": 0.63, + "learning_rate": 3.0084631221506693e-06, + "loss": 0.4207, + "regression_loss": 0.0, + "step": 7585, + "text_loss": 0.50390625 + }, + { + "epoch": 0.63, + "learning_rate": 3.00726570328619e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 7586, + "text_loss": 0.265625 + }, + { + "epoch": 0.63, + "learning_rate": 3.006068420276044e-06, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 7587, + "text_loss": 0.361328125 + }, + { + "epoch": 0.63, + "learning_rate": 3.0048712732018527e-06, + "loss": 0.4214, + "regression_loss": 0.0, + "step": 7588, + "text_loss": 0.375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0036742621452362e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 7589, + "text_loss": 0.73046875 + }, + { + "epoch": 0.63, + "learning_rate": 3.0024773871877943e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 7590, + "text_loss": 0.396484375 + }, + { + "epoch": 0.63, + "learning_rate": 3.00128064841113e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 7591, + "text_loss": 0.609375 + }, + { + "epoch": 0.63, + "learning_rate": 3.0000840458968274e-06, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 7592, + "text_loss": 0.51953125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9988875797264647e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 7593, + "text_loss": 0.431640625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9976912499816117e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 7594, + "text_loss": 0.90234375 + }, + { + "epoch": 0.63, + "learning_rate": 2.996495056743829e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 7595, + "text_loss": 0.33984375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9952990000946624e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 7596, + "text_loss": 0.408203125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9941030801156577e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 7597, + "text_loss": 0.396484375 + }, + { + "epoch": 0.63, + "learning_rate": 2.992907296888345e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 7598, + "text_loss": 0.44921875 + }, + { + "epoch": 0.63, + "learning_rate": 2.991711650494247e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 7599, + "text_loss": 0.55859375 + }, + { + "epoch": 0.63, + "learning_rate": 2.990516141014875e-06, + "loss": 0.4591, + "regression_loss": 0.0, + "step": 7600, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.63, + "learning_rate": 2.989320768531735e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 7601, + "text_loss": 0.482421875 + }, + { + "epoch": 0.63, + "learning_rate": 2.988125533126318e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 7602, + "text_loss": 0.48828125 + }, + { + "epoch": 0.63, + "learning_rate": 2.986930434880114e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 7603, + "text_loss": 0.248046875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9857354738745923e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 7604, + "text_loss": 0.37890625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9845406501912256e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 7605, + "text_loss": 0.345703125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9833459639114666e-06, + "loss": 0.4323, + "regression_loss": 0.0, + "step": 7606, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9821514151167652e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 7607, + "text_loss": 0.6484375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9809570038885565e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 7608, + "text_loss": 0.546875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9797627303082734e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 7609, + "text_loss": 0.236328125 + }, + { + "epoch": 0.63, + "learning_rate": 2.97856859445733e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 7610, + "text_loss": 0.431640625 + }, + { + "epoch": 0.63, + "learning_rate": 2.977374596417142e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 7611, + "text_loss": 0.466796875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9761807362691066e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 7612, + "text_loss": 0.470703125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9749870140946157e-06, + "loss": 0.4015, + "regression_loss": 0.0, + "step": 7613, + "text_loss": 0.447265625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9737934299750514e-06, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 7614, + "text_loss": 0.5859375 + }, + { + "epoch": 0.63, + "learning_rate": 2.972599983991785e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 7615, + "text_loss": 0.59375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9714066762261825e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 7616, + "text_loss": 0.2265625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9702135067595938e-06, + "loss": 0.4033, + "regression_loss": 0.0, + "step": 7617, + "text_loss": 0.34765625 + }, + { + "epoch": 0.63, + "learning_rate": 2.969020475673366e-06, + "loss": 0.4618, + "regression_loss": 0.0, + "step": 7618, + "text_loss": 0.54296875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9678275830488314e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 7619, + "text_loss": 0.279296875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9666348289673174e-06, + "loss": 0.431, + "regression_loss": 0.0, + "step": 7620, + "text_loss": 0.44140625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9654422135101368e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 7621, + "text_loss": 0.55078125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9642497367585997e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 7622, + "text_loss": 0.447265625 + }, + { + "epoch": 0.63, + "learning_rate": 2.9630573987940005e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 7623, + "text_loss": 0.46875 + }, + { + "epoch": 0.63, + "learning_rate": 2.961865199697628e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 7624, + "text_loss": 0.259765625 + }, + { + "epoch": 0.63, + "learning_rate": 2.960673139550758e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 7625, + "text_loss": 0.205078125 + }, + { + "epoch": 0.63, + "learning_rate": 2.959481218434662e-06, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 7626, + "text_loss": 0.69921875 + }, + { + "epoch": 0.63, + "learning_rate": 2.958289436430596e-06, + "loss": 0.395, + "regression_loss": 0.0, + "step": 7627, + "text_loss": 0.357421875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9570977936198138e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 7628, + "text_loss": 0.490234375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9559062900835493e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 7629, + "text_loss": 0.6796875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9547149259030393e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 7630, + "text_loss": 0.52734375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9535237011595e-06, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 7631, + "text_loss": 0.546875 + }, + { + "epoch": 0.63, + "learning_rate": 2.952332615934147e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 7632, + "text_loss": 0.255859375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9511416703081787e-06, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 7633, + "text_loss": 0.27734375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9499508643627908e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 7634, + "text_loss": 0.51953125 + }, + { + "epoch": 0.63, + "learning_rate": 2.9487601981791626e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 7635, + "text_loss": 0.451171875 + }, + { + "epoch": 0.63, + "learning_rate": 2.9475696718384713e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 7636, + "text_loss": 0.490234375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9463792854218787e-06, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 7637, + "text_loss": 0.2021484375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9451890390105404e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 7638, + "text_loss": 0.4375 + }, + { + "epoch": 0.63, + "learning_rate": 2.9439989326856e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 7639, + "text_loss": 0.60546875 + }, + { + "epoch": 0.63, + "learning_rate": 2.942808966528194e-06, + "loss": 0.463, + "regression_loss": 0.0, + "step": 7640, + "text_loss": 0.578125 + }, + { + "epoch": 0.64, + "learning_rate": 2.941619140619446e-06, + "loss": 0.605, + "regression_loss": 0.0, + "step": 7641, + "text_loss": 0.6953125 + }, + { + "epoch": 0.64, + "learning_rate": 2.940429455040477e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 7642, + "text_loss": 0.71484375 + }, + { + "epoch": 0.64, + "learning_rate": 2.9392399098723863e-06, + "loss": 0.3978, + "regression_loss": 0.0, + "step": 7643, + "text_loss": 0.462890625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9380505051962772e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 7644, + "text_loss": 0.484375 + }, + { + "epoch": 0.64, + "learning_rate": 2.9368612410932347e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 7645, + "text_loss": 0.44921875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9356721176443376e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 7646, + "text_loss": 0.65625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9344831349306512e-06, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 7647, + "text_loss": 0.59765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9332942930332404e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 7648, + "text_loss": 0.478515625 + }, + { + "epoch": 0.64, + "learning_rate": 2.932105592033147e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 7649, + "text_loss": 0.625 + }, + { + "epoch": 0.64, + "learning_rate": 2.930917032011416e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 7650, + "text_loss": 0.47265625 + }, + { + "epoch": 0.64, + "learning_rate": 2.929728613049074e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 7651, + "text_loss": 0.24609375 + }, + { + "epoch": 0.64, + "learning_rate": 2.928540335227144e-06, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 7652, + "text_loss": 0.640625 + }, + { + "epoch": 0.64, + "learning_rate": 2.927352198626634e-06, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 7653, + "text_loss": 0.66015625 + }, + { + "epoch": 0.64, + "learning_rate": 2.926164203328547e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 7654, + "text_loss": 0.71484375 + }, + { + "epoch": 0.64, + "learning_rate": 2.9249763494138718e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 7655, + "text_loss": 0.625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9237886369635937e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 7656, + "text_loss": 0.416015625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9226010660586816e-06, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 7657, + "text_loss": 0.58203125 + }, + { + "epoch": 0.64, + "learning_rate": 2.921413636780101e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 7658, + "text_loss": 0.66796875 + }, + { + "epoch": 0.64, + "learning_rate": 2.920226349208802e-06, + "loss": 0.6938, + "regression_loss": 0.0, + "step": 7659, + "text_loss": 0.8046875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9190392034257297e-06, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 7660, + "text_loss": 0.69921875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9178521995118154e-06, + "loss": 0.6204, + "regression_loss": 0.0, + "step": 7661, + "text_loss": 0.625 + }, + { + "epoch": 0.64, + "learning_rate": 2.916665337547984e-06, + "loss": 0.585, + "regression_loss": 0.0, + "step": 7662, + "text_loss": 0.6640625 + }, + { + "epoch": 0.64, + "learning_rate": 2.91547861761515e-06, + "loss": 0.5538, + "regression_loss": 0.0, + "step": 7663, + "text_loss": 0.2294921875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9142920397942177e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 7664, + "text_loss": 0.703125 + }, + { + "epoch": 0.64, + "learning_rate": 2.913105604166082e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 7665, + "text_loss": 0.53125 + }, + { + "epoch": 0.64, + "learning_rate": 2.9119193108116274e-06, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 7666, + "text_loss": 0.56640625 + }, + { + "epoch": 0.64, + "learning_rate": 2.910733159811731e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 7667, + "text_loss": 0.53125 + }, + { + "epoch": 0.64, + "learning_rate": 2.9095471512472556e-06, + "loss": 0.4777, + "regression_loss": 0.0, + "step": 7668, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9083612851990583e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 7669, + "text_loss": 0.55859375 + }, + { + "epoch": 0.64, + "learning_rate": 2.9071755617479856e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 7670, + "text_loss": 0.322265625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9059899809748746e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 7671, + "text_loss": 0.6171875 + }, + { + "epoch": 0.64, + "learning_rate": 2.904804542960551e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 7672, + "text_loss": 0.5390625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9036192477858336e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 7673, + "text_loss": 0.22265625 + }, + { + "epoch": 0.64, + "learning_rate": 2.9024340955315272e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 7674, + "text_loss": 0.48046875 + }, + { + "epoch": 0.64, + "learning_rate": 2.9012490862784306e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 7675, + "text_loss": 0.53125 + }, + { + "epoch": 0.64, + "learning_rate": 2.9000642201073316e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 7676, + "text_loss": 0.48046875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8988794970990096e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 7677, + "text_loss": 0.298828125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8976949173342272e-06, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 7678, + "text_loss": 0.33984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8965104808937524e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 7679, + "text_loss": 0.458984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8953261878583263e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 7680, + "text_loss": 1.03125 + }, + { + "epoch": 0.64, + "learning_rate": 2.89414203830869e-06, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 7681, + "text_loss": 0.412109375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8929580323255735e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 7682, + "text_loss": 0.458984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8917741699896972e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 7683, + "text_loss": 0.53515625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8905904513817663e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 7684, + "text_loss": 0.427734375 + }, + { + "epoch": 0.64, + "learning_rate": 2.889406876582487e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 7685, + "text_loss": 0.416015625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8882234456725433e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 7686, + "text_loss": 0.64453125 + }, + { + "epoch": 0.64, + "learning_rate": 2.887040158732619e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 7687, + "text_loss": 0.49609375 + }, + { + "epoch": 0.64, + "learning_rate": 2.885857015843383e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 7688, + "text_loss": 0.53125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8846740170854993e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 7689, + "text_loss": 0.58984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.883491162539611e-06, + "loss": 0.5128, + "regression_loss": 0.0, + "step": 7690, + "text_loss": 0.70703125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8823084522863675e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 7691, + "text_loss": 0.357421875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8811258864063953e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 7692, + "text_loss": 0.4609375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8799434649803156e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 7693, + "text_loss": 0.625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8787611880887408e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 7694, + "text_loss": 0.4453125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8775790558122742e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 7695, + "text_loss": 0.546875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8763970682315035e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 7696, + "text_loss": 0.60546875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8752152254270127e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 7697, + "text_loss": 0.458984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.874033527479374e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 7698, + "text_loss": 0.5234375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8728519744691484e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 7699, + "text_loss": 0.384765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8716705664768894e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 7700, + "text_loss": 0.482421875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8704893035831404e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 7701, + "text_loss": 0.64453125 + }, + { + "epoch": 0.64, + "learning_rate": 2.86930818586843e-06, + "loss": 0.615, + "regression_loss": 0.0, + "step": 7702, + "text_loss": 0.26171875 + }, + { + "epoch": 0.64, + "learning_rate": 2.868127213413283e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 7703, + "text_loss": 0.357421875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8669463862982114e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 7704, + "text_loss": 0.28125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8657657046037183e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 7705, + "text_loss": 0.4296875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8645851684102965e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 7706, + "text_loss": 0.6640625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8634047777984313e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 7707, + "text_loss": 0.384765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.862224532848591e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 7708, + "text_loss": 0.455078125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8610444336412386e-06, + "loss": 0.4351, + "regression_loss": 0.0, + "step": 7709, + "text_loss": 0.306640625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8598644802568333e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 7710, + "text_loss": 0.50390625 + }, + { + "epoch": 0.64, + "learning_rate": 2.858684672775812e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 7711, + "text_loss": 0.251953125 + }, + { + "epoch": 0.64, + "learning_rate": 2.85750501127861e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 7712, + "text_loss": 0.4921875 + }, + { + "epoch": 0.64, + "learning_rate": 2.856325495845651e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 7713, + "text_loss": 0.51171875 + }, + { + "epoch": 0.64, + "learning_rate": 2.85514612655735e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 7714, + "text_loss": 0.62109375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8539669034941044e-06, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 7715, + "text_loss": 0.515625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8527878267363153e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 7716, + "text_loss": 0.65625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8516088963643597e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 7717, + "text_loss": 0.84375 + }, + { + "epoch": 0.64, + "learning_rate": 2.850430112458613e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 7718, + "text_loss": 0.380859375 + }, + { + "epoch": 0.64, + "learning_rate": 2.84925147509944e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 7719, + "text_loss": 0.470703125 + }, + { + "epoch": 0.64, + "learning_rate": 2.848072984367195e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 7720, + "text_loss": 0.48828125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8468946403422172e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 7721, + "text_loss": 0.58203125 + }, + { + "epoch": 0.64, + "learning_rate": 2.845716443104842e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 7722, + "text_loss": 0.482421875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8445383927353936e-06, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 7723, + "text_loss": 0.353515625 + }, + { + "epoch": 0.64, + "learning_rate": 2.843360489314185e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 7724, + "text_loss": 0.84765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.842182732921519e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 7725, + "text_loss": 0.466796875 + }, + { + "epoch": 0.64, + "learning_rate": 2.841005123637692e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 7726, + "text_loss": 0.58984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.839827661542983e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 7727, + "text_loss": 0.51953125 + }, + { + "epoch": 0.64, + "learning_rate": 2.838650346717666e-06, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 7728, + "text_loss": 0.33203125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8374731792420064e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 7729, + "text_loss": 0.265625 + }, + { + "epoch": 0.64, + "learning_rate": 2.836296159196258e-06, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 7730, + "text_loss": 0.578125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8351192866606582e-06, + "loss": 0.5759, + "regression_loss": 0.0, + "step": 7731, + "text_loss": 0.291015625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8339425617154483e-06, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 7732, + "text_loss": 0.365234375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8327659844408443e-06, + "loss": 0.3826, + "regression_loss": 0.0, + "step": 7733, + "text_loss": 0.369140625 + }, + { + "epoch": 0.64, + "learning_rate": 2.8315895549170624e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 7734, + "text_loss": 0.6875 + }, + { + "epoch": 0.64, + "learning_rate": 2.830413273224306e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 7735, + "text_loss": 0.59765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.829237139442768e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 7736, + "text_loss": 0.396484375 + }, + { + "epoch": 0.64, + "learning_rate": 2.828061153652627e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 7737, + "text_loss": 0.271484375 + }, + { + "epoch": 0.64, + "learning_rate": 2.826885315934062e-06, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 7738, + "text_loss": 0.412109375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8257096263672306e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 7739, + "text_loss": 0.39453125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8245340850322873e-06, + "loss": 0.4106, + "regression_loss": 0.0, + "step": 7740, + "text_loss": 0.5078125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8233586920093737e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 7741, + "text_loss": 0.5 + }, + { + "epoch": 0.64, + "learning_rate": 2.8221834473786246e-06, + "loss": 0.4938, + "regression_loss": 0.0, + "step": 7742, + "text_loss": 0.310546875 + }, + { + "epoch": 0.64, + "learning_rate": 2.821008351220156e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 7743, + "text_loss": 0.44921875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8198334036140873e-06, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 7744, + "text_loss": 0.35546875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8186586046405157e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 7745, + "text_loss": 0.33984375 + }, + { + "epoch": 0.64, + "learning_rate": 2.817483954379534e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 7746, + "text_loss": 0.62109375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8163094529112232e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 7747, + "text_loss": 0.34765625 + }, + { + "epoch": 0.64, + "learning_rate": 2.815135100315658e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 7748, + "text_loss": 0.365234375 + }, + { + "epoch": 0.64, + "learning_rate": 2.8139608966728932e-06, + "loss": 0.4113, + "regression_loss": 0.0, + "step": 7749, + "text_loss": 0.322265625 + }, + { + "epoch": 0.64, + "learning_rate": 2.812786842062988e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 7750, + "text_loss": 0.578125 + }, + { + "epoch": 0.64, + "learning_rate": 2.811612936565977e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 7751, + "text_loss": 0.423828125 + }, + { + "epoch": 0.64, + "learning_rate": 2.8104391802618937e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 7752, + "text_loss": 0.43359375 + }, + { + "epoch": 0.64, + "learning_rate": 2.809265573230757e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 7753, + "text_loss": 0.69140625 + }, + { + "epoch": 0.64, + "learning_rate": 2.808092115552582e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 7754, + "text_loss": 0.69140625 + }, + { + "epoch": 0.64, + "learning_rate": 2.806918807307363e-06, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 7755, + "text_loss": 0.5703125 + }, + { + "epoch": 0.64, + "learning_rate": 2.805745648575091e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 7756, + "text_loss": 0.4609375 + }, + { + "epoch": 0.64, + "learning_rate": 2.804572639435752e-06, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 7757, + "text_loss": 0.703125 + }, + { + "epoch": 0.64, + "learning_rate": 2.803399779969309e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 7758, + "text_loss": 0.5625 + }, + { + "epoch": 0.64, + "learning_rate": 2.802227070255724e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 7759, + "text_loss": 0.63671875 + }, + { + "epoch": 0.64, + "learning_rate": 2.8010545103749464e-06, + "loss": 0.4167, + "regression_loss": 0.0, + "step": 7760, + "text_loss": 0.447265625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7998821004069167e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 7761, + "text_loss": 0.40625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7987098404315583e-06, + "loss": 0.4128, + "regression_loss": 0.0, + "step": 7762, + "text_loss": 0.51171875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7975377305287978e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 7763, + "text_loss": 0.30078125 + }, + { + "epoch": 0.65, + "learning_rate": 2.796365770778537e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 7764, + "text_loss": 0.515625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7951939612606765e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 7765, + "text_loss": 0.4375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7940223020551042e-06, + "loss": 0.5767, + "regression_loss": 0.0, + "step": 7766, + "text_loss": 0.33203125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7928507932417005e-06, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 7767, + "text_loss": 0.8125 + }, + { + "epoch": 0.65, + "learning_rate": 2.791679434900325e-06, + "loss": 0.5786, + "regression_loss": 0.0, + "step": 7768, + "text_loss": 0.62109375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7905082271108445e-06, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 7769, + "text_loss": 0.341796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.789337169953099e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 7770, + "text_loss": 0.439453125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7881662635069284e-06, + "loss": 0.592, + "regression_loss": 0.0, + "step": 7771, + "text_loss": 0.64453125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7869955078521576e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 7772, + "text_loss": 0.45703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7858249030686053e-06, + "loss": 0.4459, + "regression_loss": 0.0, + "step": 7773, + "text_loss": 0.6640625 + }, + { + "epoch": 0.65, + "learning_rate": 2.784654449236071e-06, + "loss": 0.4415, + "regression_loss": 0.0, + "step": 7774, + "text_loss": 0.765625 + }, + { + "epoch": 0.65, + "learning_rate": 2.783484146434359e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 7775, + "text_loss": 0.6953125 + }, + { + "epoch": 0.65, + "learning_rate": 2.782313994743247e-06, + "loss": 0.4109, + "regression_loss": 0.0, + "step": 7776, + "text_loss": 0.48828125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7811439942425132e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 7777, + "text_loss": 0.416015625 + }, + { + "epoch": 0.65, + "learning_rate": 2.779974145011922e-06, + "loss": 0.4681, + "regression_loss": 0.0, + "step": 7778, + "text_loss": 0.2294921875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7788044471312292e-06, + "loss": 0.4813, + "regression_loss": 0.0, + "step": 7779, + "text_loss": 0.43359375 + }, + { + "epoch": 0.65, + "learning_rate": 2.777634900680175e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 7780, + "text_loss": 0.4140625 + }, + { + "epoch": 0.65, + "learning_rate": 2.776465505738496e-06, + "loss": 0.4257, + "regression_loss": 0.0, + "step": 7781, + "text_loss": 0.546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.775296262385914e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 7782, + "text_loss": 0.65625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7741271707021424e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 7783, + "text_loss": 0.59375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7729582307668836e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 7784, + "text_loss": 0.55859375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7717894426598322e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 7785, + "text_loss": 0.65625 + }, + { + "epoch": 0.65, + "learning_rate": 2.770620806460667e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 7786, + "text_loss": 0.78125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7694523222490603e-06, + "loss": 0.448, + "regression_loss": 0.0, + "step": 7787, + "text_loss": 0.341796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.768283990104674e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 7788, + "text_loss": 0.40625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7671158101071612e-06, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 7789, + "text_loss": 0.482421875 + }, + { + "epoch": 0.65, + "learning_rate": 2.765947782336155e-06, + "loss": 0.4972, + "regression_loss": 0.0, + "step": 7790, + "text_loss": 0.357421875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7647799068712953e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 7791, + "text_loss": 0.474609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7636121837921947e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 7792, + "text_loss": 0.58984375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7624446131784643e-06, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 7793, + "text_loss": 0.6015625 + }, + { + "epoch": 0.65, + "learning_rate": 2.761277195109704e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 7794, + "text_loss": 0.69140625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7601099296655036e-06, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 7795, + "text_loss": 0.77734375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7589428169254363e-06, + "loss": 0.4943, + "regression_loss": 0.0, + "step": 7796, + "text_loss": 0.365234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.757775856969076e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 7797, + "text_loss": 0.375 + }, + { + "epoch": 0.65, + "learning_rate": 2.756609049875975e-06, + "loss": 0.4725, + "regression_loss": 0.0, + "step": 7798, + "text_loss": 0.2255859375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7554423957256824e-06, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 7799, + "text_loss": 0.76171875 + }, + { + "epoch": 0.65, + "learning_rate": 2.754275894597735e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 7800, + "text_loss": 0.466796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7531095465716596e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 7801, + "text_loss": 0.345703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7519433517269665e-06, + "loss": 0.4991, + "regression_loss": 0.0, + "step": 7802, + "text_loss": 0.341796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7507773101431673e-06, + "loss": 0.408, + "regression_loss": 0.0, + "step": 7803, + "text_loss": 0.5859375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7496114218997562e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 7804, + "text_loss": 0.546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7484456870762123e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 7805, + "text_loss": 0.6328125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7472801057520137e-06, + "loss": 0.5355, + "regression_loss": 0.0, + "step": 7806, + "text_loss": 0.73046875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7461146780066216e-06, + "loss": 0.3824, + "regression_loss": 0.0, + "step": 7807, + "text_loss": 0.3046875 + }, + { + "epoch": 0.65, + "learning_rate": 2.74494940391949e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 7808, + "text_loss": 0.65234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.743784283570062e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 7809, + "text_loss": 0.5859375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7426193170377695e-06, + "loss": 0.4277, + "regression_loss": 0.0, + "step": 7810, + "text_loss": 0.470703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.741454504402031e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 7811, + "text_loss": 0.65234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.74028984574226e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 7812, + "text_loss": 0.34765625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7391253411378555e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 7813, + "text_loss": 0.671875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7379609906682103e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 7814, + "text_loss": 0.63671875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7367967944126975e-06, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 7815, + "text_loss": 0.205078125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7356327524506943e-06, + "loss": 0.3638, + "regression_loss": 0.0, + "step": 7816, + "text_loss": 0.41015625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7344688648615536e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 7817, + "text_loss": 0.6796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.733305131724624e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 7818, + "text_loss": 0.58984375 + }, + { + "epoch": 0.65, + "learning_rate": 2.732141553119243e-06, + "loss": 0.634, + "regression_loss": 0.0, + "step": 7819, + "text_loss": 0.474609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.73097812912474e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 7820, + "text_loss": 0.6015625 + }, + { + "epoch": 0.65, + "learning_rate": 2.729814859820425e-06, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 7821, + "text_loss": 0.609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.728651745285612e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 7822, + "text_loss": 0.49609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7274887855995892e-06, + "loss": 0.4183, + "regression_loss": 0.0, + "step": 7823, + "text_loss": 0.431640625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7263259808416444e-06, + "loss": 0.4093, + "regression_loss": 0.0, + "step": 7824, + "text_loss": 0.5234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.725163331091051e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 7825, + "text_loss": 0.72265625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7240008364270737e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 7826, + "text_loss": 0.421875 + }, + { + "epoch": 0.65, + "learning_rate": 2.722838496928961e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 7827, + "text_loss": 0.423828125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7216763126759617e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 7828, + "text_loss": 0.439453125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7205142837473027e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 7829, + "text_loss": 0.4375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7193524102222065e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 7830, + "text_loss": 0.28515625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7181906921798833e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 7831, + "text_loss": 0.5078125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7170291296995362e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 7832, + "text_loss": 0.6171875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7158677228603493e-06, + "loss": 0.5265, + "regression_loss": 0.0, + "step": 7833, + "text_loss": 0.6328125 + }, + { + "epoch": 0.65, + "learning_rate": 2.714706471741504e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 7834, + "text_loss": 0.40234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7135453764221686e-06, + "loss": 0.4486, + "regression_loss": 0.0, + "step": 7835, + "text_loss": 0.302734375 + }, + { + "epoch": 0.65, + "learning_rate": 2.7123844369815002e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 7836, + "text_loss": 0.39453125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7112236534986446e-06, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 7837, + "text_loss": 0.41796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7100630260527422e-06, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 7838, + "text_loss": 0.431640625 + }, + { + "epoch": 0.65, + "learning_rate": 2.708902554722913e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 7839, + "text_loss": 0.640625 + }, + { + "epoch": 0.65, + "learning_rate": 2.7077422395882745e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 7840, + "text_loss": 0.703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7065820807279315e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 7841, + "text_loss": 0.703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.7054220782209784e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 7842, + "text_loss": 0.466796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.704262232146493e-06, + "loss": 0.3948, + "regression_loss": 0.0, + "step": 7843, + "text_loss": 0.388671875 + }, + { + "epoch": 0.65, + "learning_rate": 2.7031025425835554e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 7844, + "text_loss": 0.53515625 + }, + { + "epoch": 0.65, + "learning_rate": 2.701943009611222e-06, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 7845, + "text_loss": 0.66796875 + }, + { + "epoch": 0.65, + "learning_rate": 2.700783633308544e-06, + "loss": 0.5104, + "regression_loss": 0.0, + "step": 7846, + "text_loss": 0.5859375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6996244137545635e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 7847, + "text_loss": 0.546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.698465351028311e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 7848, + "text_loss": 0.48828125 + }, + { + "epoch": 0.65, + "learning_rate": 2.6973064452088e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 7849, + "text_loss": 0.35546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.6961476963750443e-06, + "loss": 0.5615, + "regression_loss": 0.0, + "step": 7850, + "text_loss": 0.25390625 + }, + { + "epoch": 0.65, + "learning_rate": 2.694989104606042e-06, + "loss": 0.5182, + "regression_loss": 0.0, + "step": 7851, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6938306699807724e-06, + "loss": 0.4053, + "regression_loss": 0.0, + "step": 7852, + "text_loss": 0.494140625 + }, + { + "epoch": 0.65, + "learning_rate": 2.692672392578222e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 7853, + "text_loss": 0.369140625 + }, + { + "epoch": 0.65, + "learning_rate": 2.691514272477348e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 7854, + "text_loss": 0.50390625 + }, + { + "epoch": 0.65, + "learning_rate": 2.6903563097571085e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 7855, + "text_loss": 0.5546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.6891985044964465e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 7856, + "text_loss": 0.609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6880408567742964e-06, + "loss": 0.5411, + "regression_loss": 0.0, + "step": 7857, + "text_loss": 0.4296875 + }, + { + "epoch": 0.65, + "learning_rate": 2.686883366669579e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 7858, + "text_loss": 0.578125 + }, + { + "epoch": 0.65, + "learning_rate": 2.685726034261207e-06, + "loss": 0.3506, + "regression_loss": 0.0, + "step": 7859, + "text_loss": 0.236328125 + }, + { + "epoch": 0.65, + "learning_rate": 2.6845688596280808e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 7860, + "text_loss": 0.5703125 + }, + { + "epoch": 0.65, + "learning_rate": 2.683411842849091e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 7861, + "text_loss": 0.75390625 + }, + { + "epoch": 0.65, + "learning_rate": 2.6822549840031166e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 7862, + "text_loss": 0.39453125 + }, + { + "epoch": 0.65, + "learning_rate": 2.681098283169028e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 7863, + "text_loss": 0.578125 + }, + { + "epoch": 0.65, + "learning_rate": 2.6799417404256804e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 7864, + "text_loss": 0.625 + }, + { + "epoch": 0.65, + "learning_rate": 2.678785355851922e-06, + "loss": 0.4194, + "regression_loss": 0.0, + "step": 7865, + "text_loss": 0.57421875 + }, + { + "epoch": 0.65, + "learning_rate": 2.67762912952659e-06, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 7866, + "text_loss": 0.51953125 + }, + { + "epoch": 0.65, + "learning_rate": 2.67647306152851e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 7867, + "text_loss": 0.7578125 + }, + { + "epoch": 0.65, + "learning_rate": 2.675317151936493e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 7868, + "text_loss": 0.58984375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6741614008293493e-06, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 7869, + "text_loss": 0.74609375 + }, + { + "epoch": 0.65, + "learning_rate": 2.673005808285867e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 7870, + "text_loss": 0.515625 + }, + { + "epoch": 0.65, + "learning_rate": 2.67185037438483e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 7871, + "text_loss": 0.73828125 + }, + { + "epoch": 0.65, + "learning_rate": 2.6706950992050097e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 7872, + "text_loss": 0.54296875 + }, + { + "epoch": 0.65, + "learning_rate": 2.669539982825169e-06, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 7873, + "text_loss": 0.85546875 + }, + { + "epoch": 0.65, + "learning_rate": 2.668385025324052e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 7874, + "text_loss": 0.65234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6672302267804056e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 7875, + "text_loss": 0.5234375 + }, + { + "epoch": 0.65, + "learning_rate": 2.6660755872729515e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 7876, + "text_loss": 0.482421875 + }, + { + "epoch": 0.65, + "learning_rate": 2.6649211068804105e-06, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 7877, + "text_loss": 0.38671875 + }, + { + "epoch": 0.65, + "learning_rate": 2.663766785681487e-06, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 7878, + "text_loss": 0.51171875 + }, + { + "epoch": 0.65, + "learning_rate": 2.66261262375488e-06, + "loss": 0.5018, + "regression_loss": 0.0, + "step": 7879, + "text_loss": 0.498046875 + }, + { + "epoch": 0.65, + "learning_rate": 2.661458621179268e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 7880, + "text_loss": 0.578125 + }, + { + "epoch": 0.66, + "learning_rate": 2.6603047780333336e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 7881, + "text_loss": 0.52734375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6591510943957322e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 7882, + "text_loss": 0.6015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.65799757034512e-06, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 7883, + "text_loss": 0.37109375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6568442059601367e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 7884, + "text_loss": 0.416015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.655691001319416e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 7885, + "text_loss": 0.416015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.65453795650157e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 7886, + "text_loss": 0.404296875 + }, + { + "epoch": 0.66, + "learning_rate": 2.653385071585217e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 7887, + "text_loss": 0.6640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6522323466489475e-06, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 7888, + "text_loss": 0.6796875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6510797817713503e-06, + "loss": 0.455, + "regression_loss": 0.0, + "step": 7889, + "text_loss": 0.6796875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6499273770310018e-06, + "loss": 0.6116, + "regression_loss": 0.0, + "step": 7890, + "text_loss": 0.54296875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6487751325064693e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 7891, + "text_loss": 0.306640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6476230482763026e-06, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 7892, + "text_loss": 0.50390625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6464711244190465e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 7893, + "text_loss": 0.271484375 + }, + { + "epoch": 0.66, + "learning_rate": 2.645319361013234e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 7894, + "text_loss": 0.52734375 + }, + { + "epoch": 0.66, + "learning_rate": 2.644167758137386e-06, + "loss": 0.4042, + "regression_loss": 0.0, + "step": 7895, + "text_loss": 0.37109375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6430163158700116e-06, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 7896, + "text_loss": 0.34765625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6418650342896125e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 7897, + "text_loss": 0.396484375 + }, + { + "epoch": 0.66, + "learning_rate": 2.640713913474677e-06, + "loss": 0.4855, + "regression_loss": 0.0, + "step": 7898, + "text_loss": 0.5078125 + }, + { + "epoch": 0.66, + "learning_rate": 2.6395629535036783e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 7899, + "text_loss": 0.44921875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6384121544550895e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 7900, + "text_loss": 0.2890625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6372615164073613e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 7901, + "text_loss": 0.54296875 + }, + { + "epoch": 0.66, + "learning_rate": 2.636111039438939e-06, + "loss": 0.4242, + "regression_loss": 0.0, + "step": 7902, + "text_loss": 0.5625 + }, + { + "epoch": 0.66, + "learning_rate": 2.634960723628257e-06, + "loss": 0.5153, + "regression_loss": 0.0, + "step": 7903, + "text_loss": 0.458984375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6338105690537402e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 7904, + "text_loss": 0.490234375 + }, + { + "epoch": 0.66, + "learning_rate": 2.632660575793794e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 7905, + "text_loss": 0.82421875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6315107439268262e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 7906, + "text_loss": 0.52734375 + }, + { + "epoch": 0.66, + "learning_rate": 2.630361073531221e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 7907, + "text_loss": 0.64453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.6292115646853587e-06, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 7908, + "text_loss": 0.29296875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6280622174676074e-06, + "loss": 0.5728, + "regression_loss": 0.0, + "step": 7909, + "text_loss": 0.74609375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6269130319563258e-06, + "loss": 0.4923, + "regression_loss": 0.0, + "step": 7910, + "text_loss": 0.283203125 + }, + { + "epoch": 0.66, + "learning_rate": 2.6257640082298545e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 7911, + "text_loss": 0.515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6246151463665314e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 7912, + "text_loss": 0.64453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.623466446444678e-06, + "loss": 0.5576, + "regression_loss": 0.0, + "step": 7913, + "text_loss": 0.5234375 + }, + { + "epoch": 0.66, + "learning_rate": 2.622317908542609e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 7914, + "text_loss": 0.361328125 + }, + { + "epoch": 0.66, + "learning_rate": 2.6211695327386233e-06, + "loss": 0.4121, + "regression_loss": 0.0, + "step": 7915, + "text_loss": 0.291015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6200213191110154e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 7916, + "text_loss": 0.63671875 + }, + { + "epoch": 0.66, + "learning_rate": 2.618873267738059e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 7917, + "text_loss": 0.953125 + }, + { + "epoch": 0.66, + "learning_rate": 2.617725378698026e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 7918, + "text_loss": 0.322265625 + }, + { + "epoch": 0.66, + "learning_rate": 2.616577652069172e-06, + "loss": 0.6011, + "regression_loss": 0.0, + "step": 7919, + "text_loss": 0.73046875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6154300879297436e-06, + "loss": 0.6199, + "regression_loss": 0.0, + "step": 7920, + "text_loss": 1.171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6142826863579762e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 7921, + "text_loss": 0.31640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6131354474320948e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 7922, + "text_loss": 0.56640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.611988371230309e-06, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 7923, + "text_loss": 0.478515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.610841457830823e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 7924, + "text_loss": 0.515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6096947073118256e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 7925, + "text_loss": 0.451171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.6085481197515e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 7926, + "text_loss": 0.5078125 + }, + { + "epoch": 0.66, + "learning_rate": 2.607401695228008e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 7927, + "text_loss": 0.447265625 + }, + { + "epoch": 0.66, + "learning_rate": 2.606255433819515e-06, + "loss": 0.4745, + "regression_loss": 0.0, + "step": 7928, + "text_loss": 0.4609375 + }, + { + "epoch": 0.66, + "learning_rate": 2.6051093356041614e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 7929, + "text_loss": 0.462890625 + }, + { + "epoch": 0.66, + "learning_rate": 2.603963400660084e-06, + "loss": 0.4742, + "regression_loss": 0.0, + "step": 7930, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.602817629065407e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 7931, + "text_loss": 0.419921875 + }, + { + "epoch": 0.66, + "learning_rate": 2.601672020898244e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 7932, + "text_loss": 0.53515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.6005265762366925e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 7933, + "text_loss": 0.396484375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5993812951588492e-06, + "loss": 0.4143, + "regression_loss": 0.0, + "step": 7934, + "text_loss": 0.349609375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5982361777427897e-06, + "loss": 0.4075, + "regression_loss": 0.0, + "step": 7935, + "text_loss": 0.5546875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5970912240665815e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 7936, + "text_loss": 0.68359375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5959464342082842e-06, + "loss": 0.4172, + "regression_loss": 0.0, + "step": 7937, + "text_loss": 0.421875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5948018082459435e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 7938, + "text_loss": 0.6875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5936573462575885e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 7939, + "text_loss": 0.70703125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5925130483212526e-06, + "loss": 0.6239, + "regression_loss": 0.0, + "step": 7940, + "text_loss": 0.984375 + }, + { + "epoch": 0.66, + "learning_rate": 2.59136891451494e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 7941, + "text_loss": 0.7421875 + }, + { + "epoch": 0.66, + "learning_rate": 2.590224944916655e-06, + "loss": 0.4662, + "regression_loss": 0.0, + "step": 7942, + "text_loss": 0.46484375 + }, + { + "epoch": 0.66, + "learning_rate": 2.589081139604387e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 7943, + "text_loss": 0.453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5879374986561144e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 7944, + "text_loss": 0.30859375 + }, + { + "epoch": 0.66, + "learning_rate": 2.586794022149807e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 7945, + "text_loss": 0.51171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.585650710163416e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 7946, + "text_loss": 0.61328125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5845075627748934e-06, + "loss": 0.4796, + "regression_loss": 0.0, + "step": 7947, + "text_loss": 0.2451171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5833645800621675e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 7948, + "text_loss": 0.6015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.582221762103163e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 7949, + "text_loss": 0.390625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5810791089757915e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 7950, + "text_loss": 0.5859375 + }, + { + "epoch": 0.66, + "learning_rate": 2.579936620757955e-06, + "loss": 0.4504, + "regression_loss": 0.0, + "step": 7951, + "text_loss": 0.357421875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5787942975275365e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 7952, + "text_loss": 0.37109375 + }, + { + "epoch": 0.66, + "learning_rate": 2.577652139362421e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 7953, + "text_loss": 0.498046875 + }, + { + "epoch": 0.66, + "learning_rate": 2.57651014634047e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 7954, + "text_loss": 0.251953125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5753683185395405e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 7955, + "text_loss": 0.58203125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5742266560374757e-06, + "loss": 0.4965, + "regression_loss": 0.0, + "step": 7956, + "text_loss": 0.8515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.573085158912111e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 7957, + "text_loss": 0.5390625 + }, + { + "epoch": 0.66, + "learning_rate": 2.571943827241261e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 7958, + "text_loss": 0.4296875 + }, + { + "epoch": 0.66, + "learning_rate": 2.570802661102745e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 7959, + "text_loss": 0.439453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.569661660574355e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 7960, + "text_loss": 0.76953125 + }, + { + "epoch": 0.66, + "learning_rate": 2.56852082573388e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 7961, + "text_loss": 0.734375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5673801566590973e-06, + "loss": 0.4066, + "regression_loss": 0.0, + "step": 7962, + "text_loss": 0.6171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5662396534277735e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 7963, + "text_loss": 0.6796875 + }, + { + "epoch": 0.66, + "learning_rate": 2.565099316117656e-06, + "loss": 0.4401, + "regression_loss": 0.0, + "step": 7964, + "text_loss": 0.36328125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5639591448064948e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 7965, + "text_loss": 0.447265625 + }, + { + "epoch": 0.66, + "learning_rate": 2.562819139572016e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 7966, + "text_loss": 0.58203125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5616793004919405e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 7967, + "text_loss": 0.337890625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5605396276439764e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 7968, + "text_loss": 0.7421875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5594001211058227e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 7969, + "text_loss": 0.24609375 + }, + { + "epoch": 0.66, + "learning_rate": 2.558260780955162e-06, + "loss": 0.5815, + "regression_loss": 0.0, + "step": 7970, + "text_loss": 0.65234375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5571216072696696e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 7971, + "text_loss": 1.0078125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5559826001270094e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 7972, + "text_loss": 0.51171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.554843759604832e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 7973, + "text_loss": 0.42578125 + }, + { + "epoch": 0.66, + "learning_rate": 2.553705085780779e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 7974, + "text_loss": 0.5390625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5525665787324794e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 7975, + "text_loss": 0.578125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5514282385375483e-06, + "loss": 0.396, + "regression_loss": 0.0, + "step": 7976, + "text_loss": 0.546875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5502900652735927e-06, + "loss": 0.4097, + "regression_loss": 0.0, + "step": 7977, + "text_loss": 0.50390625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5491520590182094e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 7978, + "text_loss": 0.73046875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5480142198489816e-06, + "loss": 0.5031, + "regression_loss": 0.0, + "step": 7979, + "text_loss": 0.51171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5468765478434765e-06, + "loss": 0.6328, + "regression_loss": 0.0, + "step": 7980, + "text_loss": 0.310546875 + }, + { + "epoch": 0.66, + "learning_rate": 2.545739043079262e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 7981, + "text_loss": 0.23828125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5446017056338817e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 7982, + "text_loss": 0.359375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5434645355848755e-06, + "loss": 0.6909, + "regression_loss": 0.0, + "step": 7983, + "text_loss": 0.859375 + }, + { + "epoch": 0.66, + "learning_rate": 2.5423275330097697e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 7984, + "text_loss": 0.4453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5411906979860814e-06, + "loss": 0.437, + "regression_loss": 0.0, + "step": 7985, + "text_loss": 0.384765625 + }, + { + "epoch": 0.66, + "learning_rate": 2.540054030591307e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 7986, + "text_loss": 0.51953125 + }, + { + "epoch": 0.66, + "learning_rate": 2.538917530902948e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 7987, + "text_loss": 0.64453125 + }, + { + "epoch": 0.66, + "learning_rate": 2.5377811989984785e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 7988, + "text_loss": 0.291015625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5366450349553702e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 7989, + "text_loss": 0.48046875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5355090388510806e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 7990, + "text_loss": 0.34765625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5343732107630554e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 7991, + "text_loss": 0.59375 + }, + { + "epoch": 0.66, + "learning_rate": 2.53323755076873e-06, + "loss": 0.4246, + "regression_loss": 0.0, + "step": 7992, + "text_loss": 0.326171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.532102058945529e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 7993, + "text_loss": 0.51171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5309667353708646e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 7994, + "text_loss": 0.31640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5298315801221334e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 7995, + "text_loss": 0.28515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5286965932767276e-06, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 7996, + "text_loss": 0.56640625 + }, + { + "epoch": 0.66, + "learning_rate": 2.527561774912024e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 7997, + "text_loss": 0.26171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5264271251053896e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 7998, + "text_loss": 0.5625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5252926439341773e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 7999, + "text_loss": 0.78515625 + }, + { + "epoch": 0.66, + "learning_rate": 2.5241583314757327e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 8000, + "text_loss": 0.6171875 + }, + { + "epoch": 0.66, + "learning_rate": 2.5230241878073846e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 8001, + "text_loss": 0.494140625 + }, + { + "epoch": 0.67, + "learning_rate": 2.521890213006454e-06, + "loss": 0.604, + "regression_loss": 0.0, + "step": 8002, + "text_loss": 0.546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.520756407150249e-06, + "loss": 0.4167, + "regression_loss": 0.0, + "step": 8003, + "text_loss": 0.6328125 + }, + { + "epoch": 0.67, + "learning_rate": 2.5196227703160693e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 8004, + "text_loss": 0.4375 + }, + { + "epoch": 0.67, + "learning_rate": 2.5184893025811947e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 8005, + "text_loss": 0.19921875 + }, + { + "epoch": 0.67, + "learning_rate": 2.5173560040229066e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 8006, + "text_loss": 0.6015625 + }, + { + "epoch": 0.67, + "learning_rate": 2.5162228747184613e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 8007, + "text_loss": 0.28125 + }, + { + "epoch": 0.67, + "learning_rate": 2.515089914745113e-06, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 8008, + "text_loss": 0.3515625 + }, + { + "epoch": 0.67, + "learning_rate": 2.5139571241801e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 8009, + "text_loss": 0.5390625 + }, + { + "epoch": 0.67, + "learning_rate": 2.5128245031006516e-06, + "loss": 0.6025, + "regression_loss": 0.0, + "step": 8010, + "text_loss": 1.21875 + }, + { + "epoch": 0.67, + "learning_rate": 2.511692051583978e-06, + "loss": 0.5449, + "regression_loss": 0.0, + "step": 8011, + "text_loss": 0.478515625 + }, + { + "epoch": 0.67, + "learning_rate": 2.5105597697072935e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 8012, + "text_loss": 0.498046875 + }, + { + "epoch": 0.67, + "learning_rate": 2.509427657547785e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 8013, + "text_loss": 0.421875 + }, + { + "epoch": 0.67, + "learning_rate": 2.5082957151826343e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 8014, + "text_loss": 0.4609375 + }, + { + "epoch": 0.67, + "learning_rate": 2.5071639426890128e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 8015, + "text_loss": 0.474609375 + }, + { + "epoch": 0.67, + "learning_rate": 2.50603234014408e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 8016, + "text_loss": 0.345703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.5049009076249785e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 8017, + "text_loss": 0.41015625 + }, + { + "epoch": 0.67, + "learning_rate": 2.503769645208849e-06, + "loss": 0.603, + "regression_loss": 0.0, + "step": 8018, + "text_loss": 0.5078125 + }, + { + "epoch": 0.67, + "learning_rate": 2.5026385529728115e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 8019, + "text_loss": 0.328125 + }, + { + "epoch": 0.67, + "learning_rate": 2.501507630993978e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 8020, + "text_loss": 0.466796875 + }, + { + "epoch": 0.67, + "learning_rate": 2.5003768793494505e-06, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 8021, + "text_loss": 0.53125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4992462981163185e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 8022, + "text_loss": 0.53125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4981158873716543e-06, + "loss": 0.3954, + "regression_loss": 0.0, + "step": 8023, + "text_loss": 0.734375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4969856471925315e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 8024, + "text_loss": 0.47265625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4958555776559967e-06, + "loss": 0.4662, + "regression_loss": 0.0, + "step": 8025, + "text_loss": 0.51953125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4947256788390956e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 8026, + "text_loss": 0.310546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4935959508188577e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 8027, + "text_loss": 0.427734375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4924663936723055e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 8028, + "text_loss": 0.2890625 + }, + { + "epoch": 0.67, + "learning_rate": 2.491337007476441e-06, + "loss": 0.6082, + "regression_loss": 0.0, + "step": 8029, + "text_loss": 0.64453125 + }, + { + "epoch": 0.67, + "learning_rate": 2.490207792308263e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 8030, + "text_loss": 0.5703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4890787482447544e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 8031, + "text_loss": 0.39453125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4879498753628885e-06, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 8032, + "text_loss": 0.2578125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4868211737396255e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 8033, + "text_loss": 0.455078125 + }, + { + "epoch": 0.67, + "learning_rate": 2.485692643451917e-06, + "loss": 0.533, + "regression_loss": 0.0, + "step": 8034, + "text_loss": 0.3515625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4845642845766955e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 8035, + "text_loss": 0.6796875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4834360971908892e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 8036, + "text_loss": 0.5546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.482308081371413e-06, + "loss": 0.4331, + "regression_loss": 0.0, + "step": 8037, + "text_loss": 0.57421875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4811802371951677e-06, + "loss": 0.527, + "regression_loss": 0.0, + "step": 8038, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4800525647390444e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 8039, + "text_loss": 0.59765625 + }, + { + "epoch": 0.67, + "learning_rate": 2.478925064079922e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 8040, + "text_loss": 0.59375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4777977352946702e-06, + "loss": 0.4501, + "regression_loss": 0.0, + "step": 8041, + "text_loss": 0.55078125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4766705784601377e-06, + "loss": 0.3997, + "regression_loss": 0.0, + "step": 8042, + "text_loss": 0.6015625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4755435936531762e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 8043, + "text_loss": 0.41796875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4744167809506127e-06, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 8044, + "text_loss": 0.42578125 + }, + { + "epoch": 0.67, + "learning_rate": 2.473290140429268e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 8045, + "text_loss": 0.34765625 + }, + { + "epoch": 0.67, + "learning_rate": 2.472163672165952e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 8046, + "text_loss": 0.6484375 + }, + { + "epoch": 0.67, + "learning_rate": 2.471037376237463e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 8047, + "text_loss": 0.2578125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4699112527205815e-06, + "loss": 0.4353, + "regression_loss": 0.0, + "step": 8048, + "text_loss": 0.59765625 + }, + { + "epoch": 0.67, + "learning_rate": 2.468785301692084e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 8049, + "text_loss": 0.671875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4676595232287306e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 8050, + "text_loss": 0.30078125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4665339174072718e-06, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 8051, + "text_loss": 0.23828125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4654084843044457e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 8052, + "text_loss": 0.609375 + }, + { + "epoch": 0.67, + "learning_rate": 2.46428322399698e-06, + "loss": 0.4696, + "regression_loss": 0.0, + "step": 8053, + "text_loss": 0.625 + }, + { + "epoch": 0.67, + "learning_rate": 2.463158136561586e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 8054, + "text_loss": 0.8203125 + }, + { + "epoch": 0.67, + "learning_rate": 2.462033222074968e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 8055, + "text_loss": 0.52734375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4609084806138154e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 8056, + "text_loss": 0.328125 + }, + { + "epoch": 0.67, + "learning_rate": 2.459783912254812e-06, + "loss": 0.5052, + "regression_loss": 0.0, + "step": 8057, + "text_loss": 0.7109375 + }, + { + "epoch": 0.67, + "learning_rate": 2.458659517074617e-06, + "loss": 0.4087, + "regression_loss": 0.0, + "step": 8058, + "text_loss": 0.55859375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4575352951498943e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 8059, + "text_loss": 0.5546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.456411246557282e-06, + "loss": 0.6091, + "regression_loss": 0.0, + "step": 8060, + "text_loss": 0.5703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4552873713734133e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 8061, + "text_loss": 0.5234375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4541636696749086e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 8062, + "text_loss": 0.34765625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4530401415383783e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 8063, + "text_loss": 0.5078125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4519167870404126e-06, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 8064, + "text_loss": 0.26953125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4507936062576033e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 8065, + "text_loss": 0.341796875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4496705992665183e-06, + "loss": 0.6177, + "regression_loss": 0.0, + "step": 8066, + "text_loss": 1.0546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.448547766143719e-06, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 8067, + "text_loss": 0.298828125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4474251069657555e-06, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 8068, + "text_loss": 0.412109375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4463026218091666e-06, + "loss": 0.584, + "regression_loss": 0.0, + "step": 8069, + "text_loss": 0.671875 + }, + { + "epoch": 0.67, + "learning_rate": 2.445180310750472e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 8070, + "text_loss": 0.51171875 + }, + { + "epoch": 0.67, + "learning_rate": 2.444058173866192e-06, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 8071, + "text_loss": 0.490234375 + }, + { + "epoch": 0.67, + "learning_rate": 2.442936211232822e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 8072, + "text_loss": 0.439453125 + }, + { + "epoch": 0.67, + "learning_rate": 2.441814422926855e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 8073, + "text_loss": 0.69921875 + }, + { + "epoch": 0.67, + "learning_rate": 2.440692809024768e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 8074, + "text_loss": 0.412109375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4395713696030283e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 8075, + "text_loss": 0.65234375 + }, + { + "epoch": 0.67, + "learning_rate": 2.438450104738085e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 8076, + "text_loss": 0.49609375 + }, + { + "epoch": 0.67, + "learning_rate": 2.437329014506387e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 8077, + "text_loss": 0.69921875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4362080989843594e-06, + "loss": 0.5583, + "regression_loss": 0.0, + "step": 8078, + "text_loss": 0.5703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.435087358248422e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 8079, + "text_loss": 0.46484375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4339667923749804e-06, + "loss": 0.422, + "regression_loss": 0.0, + "step": 8080, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4328464014404325e-06, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 8081, + "text_loss": 0.353515625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4317261855211556e-06, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 8082, + "text_loss": 0.396484375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4306061446935225e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 8083, + "text_loss": 0.47265625 + }, + { + "epoch": 0.67, + "learning_rate": 2.429486279033892e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 8084, + "text_loss": 0.36328125 + }, + { + "epoch": 0.67, + "learning_rate": 2.428366588618611e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 8085, + "text_loss": 0.76953125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4272470735240133e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 8086, + "text_loss": 0.384765625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4261277338264226e-06, + "loss": 0.4364, + "regression_loss": 0.0, + "step": 8087, + "text_loss": 0.396484375 + }, + { + "epoch": 0.67, + "learning_rate": 2.425008569602151e-06, + "loss": 0.4141, + "regression_loss": 0.0, + "step": 8088, + "text_loss": 0.208984375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4238895809274927e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 8089, + "text_loss": 0.703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4227707678787406e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8090, + "text_loss": 0.51171875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4216521305321644e-06, + "loss": 0.4982, + "regression_loss": 0.0, + "step": 8091, + "text_loss": 0.4921875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4205336689640295e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 8092, + "text_loss": 0.33203125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4194153832505863e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 8093, + "text_loss": 0.76171875 + }, + { + "epoch": 0.67, + "learning_rate": 2.418297273468076e-06, + "loss": 0.437, + "regression_loss": 0.0, + "step": 8094, + "text_loss": 0.58984375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4171793396927197e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 8095, + "text_loss": 0.6171875 + }, + { + "epoch": 0.67, + "learning_rate": 2.41606158200074e-06, + "loss": 0.4569, + "regression_loss": 0.0, + "step": 8096, + "text_loss": 0.5703125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4149440004683337e-06, + "loss": 0.4606, + "regression_loss": 0.0, + "step": 8097, + "text_loss": 0.392578125 + }, + { + "epoch": 0.67, + "learning_rate": 2.4138265951716945e-06, + "loss": 0.444, + "regression_loss": 0.0, + "step": 8098, + "text_loss": 0.302734375 + }, + { + "epoch": 0.67, + "learning_rate": 2.412709366187e-06, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 8099, + "text_loss": 0.53125 + }, + { + "epoch": 0.67, + "learning_rate": 2.411592313590421e-06, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 8100, + "text_loss": 0.5859375 + }, + { + "epoch": 0.67, + "learning_rate": 2.410475437458104e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 8101, + "text_loss": 0.55859375 + }, + { + "epoch": 0.67, + "learning_rate": 2.409358737866202e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 8102, + "text_loss": 0.490234375 + }, + { + "epoch": 0.67, + "learning_rate": 2.408242214890839e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 8103, + "text_loss": 0.32421875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4071258686081354e-06, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 8104, + "text_loss": 0.60546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.406009699094198e-06, + "loss": 0.5341, + "regression_loss": 0.0, + "step": 8105, + "text_loss": 0.53125 + }, + { + "epoch": 0.67, + "learning_rate": 2.404893706425125e-06, + "loss": 0.386, + "regression_loss": 0.0, + "step": 8106, + "text_loss": 0.431640625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4037778906769926e-06, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 8107, + "text_loss": 0.609375 + }, + { + "epoch": 0.67, + "learning_rate": 2.4026622519258746e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 8108, + "text_loss": 0.515625 + }, + { + "epoch": 0.67, + "learning_rate": 2.4015467902478295e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 8109, + "text_loss": 0.6796875 + }, + { + "epoch": 0.67, + "learning_rate": 2.4004315057189037e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 8110, + "text_loss": 0.462890625 + }, + { + "epoch": 0.67, + "learning_rate": 2.3993163984151317e-06, + "loss": 0.4377, + "regression_loss": 0.0, + "step": 8111, + "text_loss": 0.390625 + }, + { + "epoch": 0.67, + "learning_rate": 2.3982014684125372e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 8112, + "text_loss": 0.625 + }, + { + "epoch": 0.67, + "learning_rate": 2.397086715787126e-06, + "loss": 0.4009, + "regression_loss": 0.0, + "step": 8113, + "text_loss": 0.5546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.3959721406149e-06, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 8114, + "text_loss": 0.57421875 + }, + { + "epoch": 0.67, + "learning_rate": 2.394857742971844e-06, + "loss": 0.4231, + "regression_loss": 0.0, + "step": 8115, + "text_loss": 0.3125 + }, + { + "epoch": 0.67, + "learning_rate": 2.3937435229339328e-06, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 8116, + "text_loss": 0.52734375 + }, + { + "epoch": 0.67, + "learning_rate": 2.3926294805771246e-06, + "loss": 0.427, + "regression_loss": 0.0, + "step": 8117, + "text_loss": 0.6328125 + }, + { + "epoch": 0.67, + "learning_rate": 2.3915156159773743e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 8118, + "text_loss": 0.65625 + }, + { + "epoch": 0.67, + "learning_rate": 2.390401929210616e-06, + "loss": 0.5908, + "regression_loss": 0.0, + "step": 8119, + "text_loss": 0.435546875 + }, + { + "epoch": 0.67, + "learning_rate": 2.3892884203527757e-06, + "loss": 0.4143, + "regression_loss": 0.0, + "step": 8120, + "text_loss": 0.578125 + }, + { + "epoch": 0.67, + "learning_rate": 2.3881750894797667e-06, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 8121, + "text_loss": 0.291015625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3870619366674924e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 8122, + "text_loss": 0.494140625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3859489619918357e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 8123, + "text_loss": 0.251953125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3848361655286818e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 8124, + "text_loss": 0.66015625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3837235473538896e-06, + "loss": 0.5472, + "regression_loss": 0.0, + "step": 8125, + "text_loss": 0.63671875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3826111075433128e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 8126, + "text_loss": 0.6796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3814988461727924e-06, + "loss": 0.5096, + "regression_loss": 0.0, + "step": 8127, + "text_loss": 0.326171875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3803867633181575e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 8128, + "text_loss": 0.6484375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3792748590552204e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 8129, + "text_loss": 0.6328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3781631334597914e-06, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 8130, + "text_loss": 0.734375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3770515866076564e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 8131, + "text_loss": 0.578125 + }, + { + "epoch": 0.68, + "learning_rate": 2.375940218574597e-06, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 8132, + "text_loss": 0.3671875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3748290294363806e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 8133, + "text_loss": 0.53515625 + }, + { + "epoch": 0.68, + "learning_rate": 2.373718019268763e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 8134, + "text_loss": 0.361328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.372607188147486e-06, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 8135, + "text_loss": 0.322265625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3714965361482805e-06, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 8136, + "text_loss": 0.6328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3703860633468678e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 8137, + "text_loss": 0.25390625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3692757698189496e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 8138, + "text_loss": 0.328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3681656556402215e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 8139, + "text_loss": 0.6796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.367055720886367e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 8140, + "text_loss": 0.423828125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3659459656330558e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 8141, + "text_loss": 0.41796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3648363899559406e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 8142, + "text_loss": 0.4609375 + }, + { + "epoch": 0.68, + "learning_rate": 2.363726993930674e-06, + "loss": 0.6257, + "regression_loss": 0.0, + "step": 8143, + "text_loss": 0.80859375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3626177776328827e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 8144, + "text_loss": 0.2890625 + }, + { + "epoch": 0.68, + "learning_rate": 2.36150874113819e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 8145, + "text_loss": 0.70703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3603998845222037e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 8146, + "text_loss": 0.490234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3592912078605224e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 8147, + "text_loss": 0.65234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3581827112287237e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 8148, + "text_loss": 0.5625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3570743947023867e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 8149, + "text_loss": 0.37109375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3559662583570652e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 8150, + "text_loss": 0.2578125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3548583022683085e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 8151, + "text_loss": 0.5234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.353750526511651e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 8152, + "text_loss": 0.376953125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3526429311626165e-06, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 8153, + "text_loss": 0.71875 + }, + { + "epoch": 0.68, + "learning_rate": 2.35153551629671e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 8154, + "text_loss": 0.421875 + }, + { + "epoch": 0.68, + "learning_rate": 2.350428281989437e-06, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 8155, + "text_loss": 0.34765625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3493212283162775e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 8156, + "text_loss": 0.35546875 + }, + { + "epoch": 0.68, + "learning_rate": 2.348214355352706e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 8157, + "text_loss": 0.244140625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3471076631741833e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 8158, + "text_loss": 0.6328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.346001151856161e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 8159, + "text_loss": 0.50390625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3448948214740703e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 8160, + "text_loss": 0.48046875 + }, + { + "epoch": 0.68, + "learning_rate": 2.343788672103337e-06, + "loss": 0.3904, + "regression_loss": 0.0, + "step": 8161, + "text_loss": 0.490234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.342682703819374e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 8162, + "text_loss": 0.6953125 + }, + { + "epoch": 0.68, + "learning_rate": 2.341576916697579e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 8163, + "text_loss": 0.56640625 + }, + { + "epoch": 0.68, + "learning_rate": 2.34047131081334e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 8164, + "text_loss": 0.6796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.339365886242032e-06, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 8165, + "text_loss": 0.390625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3382606430590142e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 8166, + "text_loss": 0.48828125 + }, + { + "epoch": 0.68, + "learning_rate": 2.337155581339639e-06, + "loss": 0.4614, + "regression_loss": 0.0, + "step": 8167, + "text_loss": 0.6640625 + }, + { + "epoch": 0.68, + "learning_rate": 2.336050701159243e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 8168, + "text_loss": 0.359375 + }, + { + "epoch": 0.68, + "learning_rate": 2.334946002593151e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 8169, + "text_loss": 0.6953125 + }, + { + "epoch": 0.68, + "learning_rate": 2.333841485716676e-06, + "loss": 0.3994, + "regression_loss": 0.0, + "step": 8170, + "text_loss": 0.35546875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3327371506051195e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 8171, + "text_loss": 0.5078125 + }, + { + "epoch": 0.68, + "learning_rate": 2.331632997333767e-06, + "loss": 0.4979, + "regression_loss": 0.0, + "step": 8172, + "text_loss": 0.50390625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3305290259778946e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 8173, + "text_loss": 0.3203125 + }, + { + "epoch": 0.68, + "learning_rate": 2.329425236612766e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 8174, + "text_loss": 0.20703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.328321629313633e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 8175, + "text_loss": 0.265625 + }, + { + "epoch": 0.68, + "learning_rate": 2.327218204155729e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8176, + "text_loss": 0.1953125 + }, + { + "epoch": 0.68, + "learning_rate": 2.326114961214287e-06, + "loss": 0.6165, + "regression_loss": 0.0, + "step": 8177, + "text_loss": 0.49609375 + }, + { + "epoch": 0.68, + "learning_rate": 2.325011900564515e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 8178, + "text_loss": 0.6875 + }, + { + "epoch": 0.68, + "learning_rate": 2.323909022281614e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 8179, + "text_loss": 0.65234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3228063264407776e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 8180, + "text_loss": 0.66796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3217038131171763e-06, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 8181, + "text_loss": 0.65234375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3206014823859768e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 8182, + "text_loss": 0.39453125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3194993343223294e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 8183, + "text_loss": 0.4375 + }, + { + "epoch": 0.68, + "learning_rate": 2.3183973690013745e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 8184, + "text_loss": 0.4921875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3172955864982355e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 8185, + "text_loss": 0.6328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.316193986888027e-06, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 8186, + "text_loss": 0.421875 + }, + { + "epoch": 0.68, + "learning_rate": 2.3150925702458518e-06, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 8187, + "text_loss": 0.6015625 + }, + { + "epoch": 0.68, + "learning_rate": 2.313991336646797e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 8188, + "text_loss": 0.39453125 + }, + { + "epoch": 0.68, + "learning_rate": 2.312890286165941e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 8189, + "text_loss": 0.7890625 + }, + { + "epoch": 0.68, + "learning_rate": 2.311789418878348e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 8190, + "text_loss": 0.48828125 + }, + { + "epoch": 0.68, + "learning_rate": 2.310688734859066e-06, + "loss": 0.3887, + "regression_loss": 0.0, + "step": 8191, + "text_loss": 0.3359375 + }, + { + "epoch": 0.68, + "learning_rate": 2.309588234183137e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 8192, + "text_loss": 0.515625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3084879169255866e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 8193, + "text_loss": 0.58203125 + }, + { + "epoch": 0.68, + "learning_rate": 2.30738778316143e-06, + "loss": 0.4346, + "regression_loss": 0.0, + "step": 8194, + "text_loss": 0.5 + }, + { + "epoch": 0.68, + "learning_rate": 2.306287832965664e-06, + "loss": 0.3766, + "regression_loss": 0.0, + "step": 8195, + "text_loss": 0.49609375 + }, + { + "epoch": 0.68, + "learning_rate": 2.305188066413284e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 8196, + "text_loss": 0.65625 + }, + { + "epoch": 0.68, + "learning_rate": 2.3040884835792625e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 8197, + "text_loss": 0.53515625 + }, + { + "epoch": 0.68, + "learning_rate": 2.302989084538564e-06, + "loss": 0.6111, + "regression_loss": 0.0, + "step": 8198, + "text_loss": 0.5 + }, + { + "epoch": 0.68, + "learning_rate": 2.3018898693661397e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 8199, + "text_loss": 1.0703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.3007908381369306e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 8200, + "text_loss": 0.5078125 + }, + { + "epoch": 0.68, + "learning_rate": 2.299691990925857e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 8201, + "text_loss": 0.73046875 + }, + { + "epoch": 0.68, + "learning_rate": 2.298593327807841e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 8202, + "text_loss": 0.3046875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2974948488577775e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 8203, + "text_loss": 0.423828125 + }, + { + "epoch": 0.68, + "learning_rate": 2.2963965541505573e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 8204, + "text_loss": 0.59375 + }, + { + "epoch": 0.68, + "learning_rate": 2.2952984437610564e-06, + "loss": 0.5442, + "regression_loss": 0.0, + "step": 8205, + "text_loss": 0.6640625 + }, + { + "epoch": 0.68, + "learning_rate": 2.2942005177641403e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 8206, + "text_loss": 0.75 + }, + { + "epoch": 0.68, + "learning_rate": 2.2931027762346536e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 8207, + "text_loss": 0.44140625 + }, + { + "epoch": 0.68, + "learning_rate": 2.2920052192474423e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 8208, + "text_loss": 0.54296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2909078468773278e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 8209, + "text_loss": 0.63671875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2898106591991238e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 8210, + "text_loss": 0.53125 + }, + { + "epoch": 0.68, + "learning_rate": 2.288713656287631e-06, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 8211, + "text_loss": 0.45703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.28761683821764e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 8212, + "text_loss": 0.439453125 + }, + { + "epoch": 0.68, + "learning_rate": 2.28652020506392e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 8213, + "text_loss": 0.61328125 + }, + { + "epoch": 0.68, + "learning_rate": 2.285423756901241e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 8214, + "text_loss": 0.373046875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2843274938043484e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 8215, + "text_loss": 0.69921875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2832314158479818e-06, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 8216, + "text_loss": 0.375 + }, + { + "epoch": 0.68, + "learning_rate": 2.282135523106865e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8217, + "text_loss": 0.46875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2810398156557123e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 8218, + "text_loss": 0.54296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2799442935692206e-06, + "loss": 0.3953, + "regression_loss": 0.0, + "step": 8219, + "text_loss": 0.52734375 + }, + { + "epoch": 0.68, + "learning_rate": 2.2788489569220778e-06, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 8220, + "text_loss": 0.6875 + }, + { + "epoch": 0.68, + "learning_rate": 2.277753805788959e-06, + "loss": 0.5933, + "regression_loss": 0.0, + "step": 8221, + "text_loss": 0.70703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.276658840244526e-06, + "loss": 0.4027, + "regression_loss": 0.0, + "step": 8222, + "text_loss": 0.32421875 + }, + { + "epoch": 0.68, + "learning_rate": 2.275564060363428e-06, + "loss": 0.522, + "regression_loss": 0.0, + "step": 8223, + "text_loss": 0.470703125 + }, + { + "epoch": 0.68, + "learning_rate": 2.2744694662203022e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 8224, + "text_loss": 0.55078125 + }, + { + "epoch": 0.68, + "learning_rate": 2.273375057889769e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 8225, + "text_loss": 0.79296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2722808354464404e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 8226, + "text_loss": 0.41796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.27118679896492e-06, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 8227, + "text_loss": 0.484375 + }, + { + "epoch": 0.68, + "learning_rate": 2.2700929485197876e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 8228, + "text_loss": 0.55078125 + }, + { + "epoch": 0.68, + "learning_rate": 2.2689992841856185e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 8229, + "text_loss": 0.65625 + }, + { + "epoch": 0.68, + "learning_rate": 2.267905806036973e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 8230, + "text_loss": 0.404296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2668125141484005e-06, + "loss": 0.6218, + "regression_loss": 0.0, + "step": 8231, + "text_loss": 0.6484375 + }, + { + "epoch": 0.68, + "learning_rate": 2.2657194085944306e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 8232, + "text_loss": 0.796875 + }, + { + "epoch": 0.68, + "learning_rate": 2.264626489449593e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 8233, + "text_loss": 0.3359375 + }, + { + "epoch": 0.68, + "learning_rate": 2.2635337567883912e-06, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 8234, + "text_loss": 0.5625 + }, + { + "epoch": 0.68, + "learning_rate": 2.262441210685325e-06, + "loss": 0.4362, + "regression_loss": 0.0, + "step": 8235, + "text_loss": 0.62890625 + }, + { + "epoch": 0.68, + "learning_rate": 2.261348851214878e-06, + "loss": 0.4153, + "regression_loss": 0.0, + "step": 8236, + "text_loss": 0.296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.2602566784515236e-06, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 8237, + "text_loss": 0.267578125 + }, + { + "epoch": 0.68, + "learning_rate": 2.2591646924697142e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 8238, + "text_loss": 0.4140625 + }, + { + "epoch": 0.68, + "learning_rate": 2.2580728933439037e-06, + "loss": 0.4453, + "regression_loss": 0.0, + "step": 8239, + "text_loss": 0.439453125 + }, + { + "epoch": 0.68, + "learning_rate": 2.2569812811485204e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 8240, + "text_loss": 0.54296875 + }, + { + "epoch": 0.68, + "learning_rate": 2.255889855957986e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 8241, + "text_loss": 0.4921875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2547986178467072e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 8242, + "text_loss": 0.369140625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2537075668890833e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 8243, + "text_loss": 0.51953125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2526167031594908e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 8244, + "text_loss": 0.5234375 + }, + { + "epoch": 0.69, + "learning_rate": 2.251526026732302e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 8245, + "text_loss": 0.65625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2504355376818727e-06, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 8246, + "text_loss": 0.64453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2493452360825476e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 8247, + "text_loss": 0.6328125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2482551220086573e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 8248, + "text_loss": 0.5078125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2471651955345226e-06, + "loss": 0.4442, + "regression_loss": 0.0, + "step": 8249, + "text_loss": 0.62890625 + }, + { + "epoch": 0.69, + "learning_rate": 2.246075456734445e-06, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 8250, + "text_loss": 0.34765625 + }, + { + "epoch": 0.69, + "learning_rate": 2.244985905682719e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 8251, + "text_loss": 0.30859375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2438965424536244e-06, + "loss": 0.4158, + "regression_loss": 0.0, + "step": 8252, + "text_loss": 0.388671875 + }, + { + "epoch": 0.69, + "learning_rate": 2.242807367121431e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 8253, + "text_loss": 0.78515625 + }, + { + "epoch": 0.69, + "learning_rate": 2.241718379760387e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 8254, + "text_loss": 0.46875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2406295804447414e-06, + "loss": 0.5962, + "regression_loss": 0.0, + "step": 8255, + "text_loss": 0.51953125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2395409692487174e-06, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 8256, + "text_loss": 0.498046875 + }, + { + "epoch": 0.69, + "learning_rate": 2.238452546246533e-06, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 8257, + "text_loss": 0.48046875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2373643115123903e-06, + "loss": 0.3876, + "regression_loss": 0.0, + "step": 8258, + "text_loss": 0.423828125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2362762651204827e-06, + "loss": 0.4672, + "regression_loss": 0.0, + "step": 8259, + "text_loss": 0.5625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2351884071449813e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 8260, + "text_loss": 0.365234375 + }, + { + "epoch": 0.69, + "learning_rate": 2.234100737660058e-06, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 8261, + "text_loss": 0.341796875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2330132567398594e-06, + "loss": 0.4056, + "regression_loss": 0.0, + "step": 8262, + "text_loss": 0.279296875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2319259644585255e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 8263, + "text_loss": 0.6015625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2308388608901827e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 8264, + "text_loss": 0.427734375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2297519461089456e-06, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 8265, + "text_loss": 0.66015625 + }, + { + "epoch": 0.69, + "learning_rate": 2.22866522018891e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 8266, + "text_loss": 0.5390625 + }, + { + "epoch": 0.69, + "learning_rate": 2.227578683204169e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 8267, + "text_loss": 0.515625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2264923352287934e-06, + "loss": 0.4048, + "regression_loss": 0.0, + "step": 8268, + "text_loss": 0.47265625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2254061763368445e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 8269, + "text_loss": 0.49609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2243202066023724e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 8270, + "text_loss": 0.59375 + }, + { + "epoch": 0.69, + "learning_rate": 2.223234426099415e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 8271, + "text_loss": 0.435546875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2221488349019903e-06, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 8272, + "text_loss": 0.6328125 + }, + { + "epoch": 0.69, + "learning_rate": 2.221063433084109e-06, + "loss": 0.5747, + "regression_loss": 0.0, + "step": 8273, + "text_loss": 0.77734375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2199782207197744e-06, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 8274, + "text_loss": 0.58984375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2188931978829643e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 8275, + "text_loss": 0.52734375 + }, + { + "epoch": 0.69, + "learning_rate": 2.217808364647651e-06, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 8276, + "text_loss": 0.396484375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2167237210877952e-06, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 8277, + "text_loss": 0.47265625 + }, + { + "epoch": 0.69, + "learning_rate": 2.2156392672773417e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 8278, + "text_loss": 0.419921875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2145550032902183e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 8279, + "text_loss": 0.578125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2134709292003533e-06, + "loss": 0.469, + "regression_loss": 0.0, + "step": 8280, + "text_loss": 0.453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.212387045081645e-06, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 8281, + "text_loss": 0.34375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2113033510079912e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 8282, + "text_loss": 0.66796875 + }, + { + "epoch": 0.69, + "learning_rate": 2.2102198470532717e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 8283, + "text_loss": 0.3828125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2091365332913556e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 8284, + "text_loss": 0.37109375 + }, + { + "epoch": 0.69, + "learning_rate": 2.208053409796093e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 8285, + "text_loss": 0.5234375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2069704766413325e-06, + "loss": 0.473, + "regression_loss": 0.0, + "step": 8286, + "text_loss": 0.62109375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2058877339008973e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 8287, + "text_loss": 0.49609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2048051816486054e-06, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 8288, + "text_loss": 0.89453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.2037228199582594e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 8289, + "text_loss": 0.384765625 + }, + { + "epoch": 0.69, + "learning_rate": 2.202640648903651e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 8290, + "text_loss": 0.59375 + }, + { + "epoch": 0.69, + "learning_rate": 2.2015586685585515e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 8291, + "text_loss": 0.75 + }, + { + "epoch": 0.69, + "learning_rate": 2.2004768789967318e-06, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 8292, + "text_loss": 0.69140625 + }, + { + "epoch": 0.69, + "learning_rate": 2.199395280291938e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 8293, + "text_loss": 0.41015625 + }, + { + "epoch": 0.69, + "learning_rate": 2.198313872517909e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 8294, + "text_loss": 0.734375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1972326557483696e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 8295, + "text_loss": 0.390625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1961516300570345e-06, + "loss": 0.4156, + "regression_loss": 0.0, + "step": 8296, + "text_loss": 0.435546875 + }, + { + "epoch": 0.69, + "learning_rate": 2.195070795517597e-06, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 8297, + "text_loss": 0.318359375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1939901522037465e-06, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 8298, + "text_loss": 0.484375 + }, + { + "epoch": 0.69, + "learning_rate": 2.192909700189154e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 8299, + "text_loss": 0.796875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1918294395474803e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 8300, + "text_loss": 0.29296875 + }, + { + "epoch": 0.69, + "learning_rate": 2.190749370352372e-06, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 8301, + "text_loss": 0.5390625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1896694926774636e-06, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 8302, + "text_loss": 0.65234375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1885898065963724e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 8303, + "text_loss": 0.29296875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1875103121827075e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 8304, + "text_loss": 0.48828125 + }, + { + "epoch": 0.69, + "learning_rate": 2.186431009510064e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 8305, + "text_loss": 0.3515625 + }, + { + "epoch": 0.69, + "learning_rate": 2.185351898652024e-06, + "loss": 0.3761, + "regression_loss": 0.0, + "step": 8306, + "text_loss": 0.314453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1842729796821503e-06, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 8307, + "text_loss": 0.62890625 + }, + { + "epoch": 0.69, + "learning_rate": 2.183194252674006e-06, + "loss": 0.6296, + "regression_loss": 0.0, + "step": 8308, + "text_loss": 0.78125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1821157177011277e-06, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 8309, + "text_loss": 0.42578125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1810373748370454e-06, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 8310, + "text_loss": 0.9140625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1799592241552753e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 8311, + "text_loss": 0.51171875 + }, + { + "epoch": 0.69, + "learning_rate": 2.178881265729322e-06, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 8312, + "text_loss": 0.392578125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1778034996326703e-06, + "loss": 0.3734, + "regression_loss": 0.0, + "step": 8313, + "text_loss": 0.337890625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1767259259388024e-06, + "loss": 0.4965, + "regression_loss": 0.0, + "step": 8314, + "text_loss": 0.66796875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1756485447211777e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 8315, + "text_loss": 0.6171875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1745713560532485e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 8316, + "text_loss": 0.443359375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1734943600084503e-06, + "loss": 0.5531, + "regression_loss": 0.0, + "step": 8317, + "text_loss": 0.703125 + }, + { + "epoch": 0.69, + "learning_rate": 2.172417556660211e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 8318, + "text_loss": 0.41796875 + }, + { + "epoch": 0.69, + "learning_rate": 2.171340946081934e-06, + "loss": 0.425, + "regression_loss": 0.0, + "step": 8319, + "text_loss": 0.50390625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1702645283470238e-06, + "loss": 0.6089, + "regression_loss": 0.0, + "step": 8320, + "text_loss": 0.51953125 + }, + { + "epoch": 0.69, + "learning_rate": 2.169188303528865e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 8321, + "text_loss": 0.76171875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1681122717008247e-06, + "loss": 0.4194, + "regression_loss": 0.0, + "step": 8322, + "text_loss": 0.275390625 + }, + { + "epoch": 0.69, + "learning_rate": 2.167036432936263e-06, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 8323, + "text_loss": 0.5078125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1659607873085265e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 8324, + "text_loss": 0.84375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1648853348909454e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 8325, + "text_loss": 0.5859375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1638100757568398e-06, + "loss": 0.4447, + "regression_loss": 0.0, + "step": 8326, + "text_loss": 0.388671875 + }, + { + "epoch": 0.69, + "learning_rate": 2.162735009979516e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 8327, + "text_loss": 0.609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1616601376322636e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 8328, + "text_loss": 0.69140625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1605854587883633e-06, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 8329, + "text_loss": 0.33203125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1595109735210813e-06, + "loss": 0.4398, + "regression_loss": 0.0, + "step": 8330, + "text_loss": 0.486328125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1584366819036723e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 8331, + "text_loss": 0.72265625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1573625840093704e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 8332, + "text_loss": 0.609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1562886799114093e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 8333, + "text_loss": 0.455078125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1552149696829967e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 8334, + "text_loss": 0.40234375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1541414533973344e-06, + "loss": 0.6455, + "regression_loss": 0.0, + "step": 8335, + "text_loss": 1.015625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1530681311276096e-06, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 8336, + "text_loss": 0.80859375 + }, + { + "epoch": 0.69, + "learning_rate": 2.151995002946998e-06, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 8337, + "text_loss": 0.59765625 + }, + { + "epoch": 0.69, + "learning_rate": 2.150922068928653e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 8338, + "text_loss": 0.439453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.14984932914573e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 8339, + "text_loss": 0.5859375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1487767836713573e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 8340, + "text_loss": 0.61328125 + }, + { + "epoch": 0.69, + "learning_rate": 2.147704432578658e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 8341, + "text_loss": 0.62890625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1466322759407383e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 8342, + "text_loss": 0.49609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1455603138306947e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 8343, + "text_loss": 0.71875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1444885463216024e-06, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 8344, + "text_loss": 0.61328125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1434169734865367e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 8345, + "text_loss": 0.435546875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1423455953985456e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 8346, + "text_loss": 0.453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1412744121306727e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 8347, + "text_loss": 0.3359375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1402034237559457e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 8348, + "text_loss": 0.44140625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1391326303473805e-06, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 8349, + "text_loss": 0.63671875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1380620319779723e-06, + "loss": 0.4525, + "regression_loss": 0.0, + "step": 8350, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.69, + "learning_rate": 2.1369916287207176e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 8351, + "text_loss": 0.431640625 + }, + { + "epoch": 0.69, + "learning_rate": 2.1359214206485845e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 8352, + "text_loss": 0.6171875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1348514078345368e-06, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 8353, + "text_loss": 0.609375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1337815903515214e-06, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 8354, + "text_loss": 0.48046875 + }, + { + "epoch": 0.69, + "learning_rate": 2.132711968272476e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 8355, + "text_loss": 0.59375 + }, + { + "epoch": 0.69, + "learning_rate": 2.131642541670317e-06, + "loss": 0.4065, + "regression_loss": 0.0, + "step": 8356, + "text_loss": 0.5546875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1305733106179554e-06, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 8357, + "text_loss": 0.5078125 + }, + { + "epoch": 0.69, + "learning_rate": 2.129504275188285e-06, + "loss": 0.6628, + "regression_loss": 0.0, + "step": 8358, + "text_loss": 0.69921875 + }, + { + "epoch": 0.69, + "learning_rate": 2.1284354354541877e-06, + "loss": 0.428, + "regression_loss": 0.0, + "step": 8359, + "text_loss": 0.51171875 + }, + { + "epoch": 0.69, + "learning_rate": 2.127366791488531e-06, + "loss": 0.4194, + "regression_loss": 0.0, + "step": 8360, + "text_loss": 0.37109375 + }, + { + "epoch": 0.69, + "learning_rate": 2.1262983433641715e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 8361, + "text_loss": 0.64453125 + }, + { + "epoch": 0.69, + "learning_rate": 2.125230091153947e-06, + "loss": 0.4321, + "regression_loss": 0.0, + "step": 8362, + "text_loss": 0.34375 + }, + { + "epoch": 0.7, + "learning_rate": 2.124162034930687e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 8363, + "text_loss": 0.54296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.1230941747672063e-06, + "loss": 0.3898, + "regression_loss": 0.0, + "step": 8364, + "text_loss": 0.404296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.1220265107363075e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 8365, + "text_loss": 0.453125 + }, + { + "epoch": 0.7, + "learning_rate": 2.1209590429107734e-06, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 8366, + "text_loss": 0.80078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.1198917713633836e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 8367, + "text_loss": 0.34375 + }, + { + "epoch": 0.7, + "learning_rate": 2.1188246961668995e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 8368, + "text_loss": 0.4375 + }, + { + "epoch": 0.7, + "learning_rate": 2.117757817394063e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 8369, + "text_loss": 0.48046875 + }, + { + "epoch": 0.7, + "learning_rate": 2.116691135117616e-06, + "loss": 0.4614, + "regression_loss": 0.0, + "step": 8370, + "text_loss": 0.52734375 + }, + { + "epoch": 0.7, + "learning_rate": 2.115624649410274e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 8371, + "text_loss": 0.52734375 + }, + { + "epoch": 0.7, + "learning_rate": 2.114558360344745e-06, + "loss": 0.4111, + "regression_loss": 0.0, + "step": 8372, + "text_loss": 0.396484375 + }, + { + "epoch": 0.7, + "learning_rate": 2.113492267993725e-06, + "loss": 0.4803, + "regression_loss": 0.0, + "step": 8373, + "text_loss": 0.369140625 + }, + { + "epoch": 0.7, + "learning_rate": 2.1124263724298953e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 8374, + "text_loss": 0.255859375 + }, + { + "epoch": 0.7, + "learning_rate": 2.1113606737259197e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 8375, + "text_loss": 0.26171875 + }, + { + "epoch": 0.7, + "learning_rate": 2.110295171954454e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 8376, + "text_loss": 0.302734375 + }, + { + "epoch": 0.7, + "learning_rate": 2.109229867188139e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 8377, + "text_loss": 0.4609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.108164759499601e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 8378, + "text_loss": 0.6796875 + }, + { + "epoch": 0.7, + "learning_rate": 2.107099848961453e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 8379, + "text_loss": 0.42578125 + }, + { + "epoch": 0.7, + "learning_rate": 2.1060351356462983e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8380, + "text_loss": 0.5234375 + }, + { + "epoch": 0.7, + "learning_rate": 2.1049706196267193e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 8381, + "text_loss": 0.494140625 + }, + { + "epoch": 0.7, + "learning_rate": 2.1039063009752914e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 8382, + "text_loss": 0.33984375 + }, + { + "epoch": 0.7, + "learning_rate": 2.1028421797645737e-06, + "loss": 0.4222, + "regression_loss": 0.0, + "step": 8383, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.7, + "learning_rate": 2.1017782560671124e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 8384, + "text_loss": 0.419921875 + }, + { + "epoch": 0.7, + "learning_rate": 2.1007145299554415e-06, + "loss": 0.4508, + "regression_loss": 0.0, + "step": 8385, + "text_loss": 0.80078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0996510015020805e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 8386, + "text_loss": 0.404296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0985876707795334e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 8387, + "text_loss": 0.4375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0975245378602926e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 8388, + "text_loss": 0.65625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0964616028168388e-06, + "loss": 0.6045, + "regression_loss": 0.0, + "step": 8389, + "text_loss": 0.5859375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0953988657216377e-06, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 8390, + "text_loss": 0.5078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.094336326647137e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 8391, + "text_loss": 0.50390625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0932739856657813e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 8392, + "text_loss": 0.43359375 + }, + { + "epoch": 0.7, + "learning_rate": 2.092211842849991e-06, + "loss": 0.4767, + "regression_loss": 0.0, + "step": 8393, + "text_loss": 0.46484375 + }, + { + "epoch": 0.7, + "learning_rate": 2.091149898272179e-06, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 8394, + "text_loss": 0.58203125 + }, + { + "epoch": 0.7, + "learning_rate": 2.090088152004743e-06, + "loss": 0.4059, + "regression_loss": 0.0, + "step": 8395, + "text_loss": 0.2294921875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0890266041200692e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 8396, + "text_loss": 0.44140625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0879652546905234e-06, + "loss": 0.4229, + "regression_loss": 0.0, + "step": 8397, + "text_loss": 0.369140625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0869041037884696e-06, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 8398, + "text_loss": 0.29296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0858431514862464e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 8399, + "text_loss": 0.416015625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0847823978561863e-06, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 8400, + "text_loss": 0.55078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0837218429706056e-06, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 8401, + "text_loss": 0.5703125 + }, + { + "epoch": 0.7, + "learning_rate": 2.082661486901809e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 8402, + "text_loss": 0.37890625 + }, + { + "epoch": 0.7, + "learning_rate": 2.081601329722081e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 8403, + "text_loss": 0.625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0805413715037056e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 8404, + "text_loss": 0.5703125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0794816123189384e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 8405, + "text_loss": 0.66015625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0784220522400307e-06, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 8406, + "text_loss": 0.439453125 + }, + { + "epoch": 0.7, + "learning_rate": 2.077362691339218e-06, + "loss": 0.6399, + "regression_loss": 0.0, + "step": 8407, + "text_loss": 0.62890625 + }, + { + "epoch": 0.7, + "learning_rate": 2.076303529688724e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 8408, + "text_loss": 0.62109375 + }, + { + "epoch": 0.7, + "learning_rate": 2.075244567360753e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 8409, + "text_loss": 0.4296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0741858044275015e-06, + "loss": 0.415, + "regression_loss": 0.0, + "step": 8410, + "text_loss": 0.609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.073127240961151e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 8411, + "text_loss": 0.57421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0720688770338674e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 8412, + "text_loss": 0.380859375 + }, + { + "epoch": 0.7, + "learning_rate": 2.071010712717806e-06, + "loss": 0.4453, + "regression_loss": 0.0, + "step": 8413, + "text_loss": 0.3828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0699527480851063e-06, + "loss": 0.5974, + "regression_loss": 0.0, + "step": 8414, + "text_loss": 0.72265625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0688949832078965e-06, + "loss": 0.4979, + "regression_loss": 0.0, + "step": 8415, + "text_loss": 0.75 + }, + { + "epoch": 0.7, + "learning_rate": 2.0678374181582845e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 8416, + "text_loss": 0.474609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0667800530083765e-06, + "loss": 0.418, + "regression_loss": 0.0, + "step": 8417, + "text_loss": 0.2421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0657228878302533e-06, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 8418, + "text_loss": 0.419921875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0646659226959885e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 8419, + "text_loss": 0.373046875 + }, + { + "epoch": 0.7, + "learning_rate": 2.063609157677641e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 8420, + "text_loss": 0.4765625 + }, + { + "epoch": 0.7, + "learning_rate": 2.062552592847256e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 8421, + "text_loss": 0.330078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0614962282768607e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 8422, + "text_loss": 0.53515625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0604400640384784e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8423, + "text_loss": 0.28515625 + }, + { + "epoch": 0.7, + "learning_rate": 2.059384100204109e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 8424, + "text_loss": 0.56640625 + }, + { + "epoch": 0.7, + "learning_rate": 2.058328336845744e-06, + "loss": 0.4313, + "regression_loss": 0.0, + "step": 8425, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0572727740353597e-06, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 8426, + "text_loss": 0.6328125 + }, + { + "epoch": 0.7, + "learning_rate": 2.05621741184492e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 8427, + "text_loss": 0.447265625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0551622503463703e-06, + "loss": 0.3975, + "regression_loss": 0.0, + "step": 8428, + "text_loss": 0.333984375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0541072896116514e-06, + "loss": 0.5879, + "regression_loss": 0.0, + "step": 8429, + "text_loss": 0.80078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.053052529712681e-06, + "loss": 0.412, + "regression_loss": 0.0, + "step": 8430, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0519979707213685e-06, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 8431, + "text_loss": 0.376953125 + }, + { + "epoch": 0.7, + "learning_rate": 2.050943612709608e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 8432, + "text_loss": 0.66015625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0498894557492833e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 8433, + "text_loss": 0.396484375 + }, + { + "epoch": 0.7, + "learning_rate": 2.048835499912256e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 8434, + "text_loss": 0.546875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0477817452703825e-06, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 8435, + "text_loss": 0.423828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.046728191895501e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 8436, + "text_loss": 0.447265625 + }, + { + "epoch": 0.7, + "learning_rate": 2.045674839859439e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 8437, + "text_loss": 0.466796875 + }, + { + "epoch": 0.7, + "learning_rate": 2.044621689234007e-06, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 8438, + "text_loss": 0.91015625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0435687400910064e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8439, + "text_loss": 0.490234375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0425159925022176e-06, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 8440, + "text_loss": 0.48828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0414634465394135e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 8441, + "text_loss": 0.50390625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0404111022743512e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 8442, + "text_loss": 0.44921875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0393589597787763e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 8443, + "text_loss": 0.55859375 + }, + { + "epoch": 0.7, + "learning_rate": 2.038307019124412e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 8444, + "text_loss": 0.228515625 + }, + { + "epoch": 0.7, + "learning_rate": 2.037255280382982e-06, + "loss": 0.5138, + "regression_loss": 0.0, + "step": 8445, + "text_loss": 0.640625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0362037436261833e-06, + "loss": 0.5209, + "regression_loss": 0.0, + "step": 8446, + "text_loss": 0.1982421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0351524089257064e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 8447, + "text_loss": 0.49609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0341012763532243e-06, + "loss": 0.4708, + "regression_loss": 0.0, + "step": 8448, + "text_loss": 0.3671875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0330503459804007e-06, + "loss": 0.418, + "regression_loss": 0.0, + "step": 8449, + "text_loss": 0.41796875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0319996178788775e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 8450, + "text_loss": 0.447265625 + }, + { + "epoch": 0.7, + "learning_rate": 2.030949092120294e-06, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 8451, + "text_loss": 0.5078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0298987687762655e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 8452, + "text_loss": 0.359375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0288486479183984e-06, + "loss": 0.6299, + "regression_loss": 0.0, + "step": 8453, + "text_loss": 0.64453125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0277987296182847e-06, + "loss": 0.4684, + "regression_loss": 0.0, + "step": 8454, + "text_loss": 0.609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.026749013947505e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 8455, + "text_loss": 0.5078125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0256995009776177e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 8456, + "text_loss": 0.388671875 + }, + { + "epoch": 0.7, + "learning_rate": 2.024650190780179e-06, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 8457, + "text_loss": 0.50390625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0236010834267223e-06, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 8458, + "text_loss": 0.48828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.022552178988771e-06, + "loss": 0.3928, + "regression_loss": 0.0, + "step": 8459, + "text_loss": 0.57421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0215034775378336e-06, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 8460, + "text_loss": 0.36328125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0204549791454057e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 8461, + "text_loss": 0.4609375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0194066838829686e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 8462, + "text_loss": 0.7109375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0183585918219894e-06, + "loss": 0.4359, + "regression_loss": 0.0, + "step": 8463, + "text_loss": 0.423828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0173107030339234e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 8464, + "text_loss": 0.48046875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0162630175902063e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 8465, + "text_loss": 0.515625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0152155355622667e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 8466, + "text_loss": 0.279296875 + }, + { + "epoch": 0.7, + "learning_rate": 2.014168257021516e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 8467, + "text_loss": 0.412109375 + }, + { + "epoch": 0.7, + "learning_rate": 2.013121182039353e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 8468, + "text_loss": 0.53515625 + }, + { + "epoch": 0.7, + "learning_rate": 2.0120743106871584e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 8469, + "text_loss": 0.310546875 + }, + { + "epoch": 0.7, + "learning_rate": 2.0110276430363084e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 8470, + "text_loss": 0.482421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.009981179158154e-06, + "loss": 0.3524, + "regression_loss": 0.0, + "step": 8471, + "text_loss": 0.3828125 + }, + { + "epoch": 0.7, + "learning_rate": 2.00893491912404e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 8472, + "text_loss": 0.421875 + }, + { + "epoch": 0.7, + "learning_rate": 2.007888863005295e-06, + "loss": 0.3713, + "regression_loss": 0.0, + "step": 8473, + "text_loss": 0.416015625 + }, + { + "epoch": 0.7, + "learning_rate": 2.006843010873235e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 8474, + "text_loss": 0.318359375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0057973627991556e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 8475, + "text_loss": 0.46484375 + }, + { + "epoch": 0.7, + "learning_rate": 2.0047519188543514e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 8476, + "text_loss": 0.46875 + }, + { + "epoch": 0.7, + "learning_rate": 2.003706679110089e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 8477, + "text_loss": 0.61328125 + }, + { + "epoch": 0.7, + "learning_rate": 2.00266164363763e-06, + "loss": 0.3809, + "regression_loss": 0.0, + "step": 8478, + "text_loss": 0.25 + }, + { + "epoch": 0.7, + "learning_rate": 2.00161681250822e-06, + "loss": 0.4032, + "regression_loss": 0.0, + "step": 8479, + "text_loss": 0.26953125 + }, + { + "epoch": 0.7, + "learning_rate": 2.0005721857930902e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 8480, + "text_loss": 0.4609375 + }, + { + "epoch": 0.7, + "learning_rate": 1.999527763563454e-06, + "loss": 0.41, + "regression_loss": 0.0, + "step": 8481, + "text_loss": 0.65234375 + }, + { + "epoch": 0.7, + "learning_rate": 1.9984835458905217e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 8482, + "text_loss": 0.46875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9974395328454765e-06, + "loss": 0.4835, + "regression_loss": 0.0, + "step": 8483, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.996395724499497e-06, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 8484, + "text_loss": 0.66015625 + }, + { + "epoch": 0.71, + "learning_rate": 1.995352120923743e-06, + "loss": 0.5986, + "regression_loss": 0.0, + "step": 8485, + "text_loss": 0.7578125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9943087221893646e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 8486, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9932655283674907e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 8487, + "text_loss": 0.70703125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9922225395292472e-06, + "loss": 0.475, + "regression_loss": 0.0, + "step": 8488, + "text_loss": 0.466796875 + }, + { + "epoch": 0.71, + "learning_rate": 1.991179755745734e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 8489, + "text_loss": 0.51171875 + }, + { + "epoch": 0.71, + "learning_rate": 1.990137177088045e-06, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 8490, + "text_loss": 0.5703125 + }, + { + "epoch": 0.71, + "learning_rate": 1.989094803627258e-06, + "loss": 0.6328, + "regression_loss": 0.0, + "step": 8491, + "text_loss": 0.66796875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9880526354344376e-06, + "loss": 0.4188, + "regression_loss": 0.0, + "step": 8492, + "text_loss": 0.263671875 + }, + { + "epoch": 0.71, + "learning_rate": 1.987010672580631e-06, + "loss": 0.4828, + "regression_loss": 0.0, + "step": 8493, + "text_loss": 0.62109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.985968915136875e-06, + "loss": 0.6631, + "regression_loss": 0.0, + "step": 8494, + "text_loss": 0.703125 + }, + { + "epoch": 0.71, + "learning_rate": 1.984927363174191e-06, + "loss": 0.4393, + "regression_loss": 0.0, + "step": 8495, + "text_loss": 0.5703125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9838860167635878e-06, + "loss": 0.342, + "regression_loss": 0.0, + "step": 8496, + "text_loss": 0.314453125 + }, + { + "epoch": 0.71, + "learning_rate": 1.982844875976057e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 8497, + "text_loss": 0.287109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.981803940882582e-06, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 8498, + "text_loss": 0.349609375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9807632115541237e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 8499, + "text_loss": 0.58203125 + }, + { + "epoch": 0.71, + "learning_rate": 1.979722688061636e-06, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 8500, + "text_loss": 0.240234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9786823704760562e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 8501, + "text_loss": 0.69921875 + }, + { + "epoch": 0.71, + "learning_rate": 1.977642258868309e-06, + "loss": 0.6006, + "regression_loss": 0.0, + "step": 8502, + "text_loss": 0.75390625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9766023533092985e-06, + "loss": 0.446, + "regression_loss": 0.0, + "step": 8503, + "text_loss": 0.546875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9755626538699284e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 8504, + "text_loss": 0.64453125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9745231606210743e-06, + "loss": 0.4774, + "regression_loss": 0.0, + "step": 8505, + "text_loss": 0.80078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.973483873633605e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 8506, + "text_loss": 0.244140625 + }, + { + "epoch": 0.71, + "learning_rate": 1.972444792978373e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 8507, + "text_loss": 0.4765625 + }, + { + "epoch": 0.71, + "learning_rate": 1.971405918726218e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 8508, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.970367250947966e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 8509, + "text_loss": 0.96875 + }, + { + "epoch": 0.71, + "learning_rate": 1.969328789714427e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 8510, + "text_loss": 0.412109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9682905350963995e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 8511, + "text_loss": 0.50390625 + }, + { + "epoch": 0.71, + "learning_rate": 1.967252487164663e-06, + "loss": 0.478, + "regression_loss": 0.0, + "step": 8512, + "text_loss": 0.26171875 + }, + { + "epoch": 0.71, + "learning_rate": 1.966214645989989e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 8513, + "text_loss": 0.396484375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9651770116431313e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 8514, + "text_loss": 0.455078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9641395841948303e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 8515, + "text_loss": 0.5 + }, + { + "epoch": 0.71, + "learning_rate": 1.963102363715812e-06, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 8516, + "text_loss": 0.494140625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9620653502767923e-06, + "loss": 0.6135, + "regression_loss": 0.0, + "step": 8517, + "text_loss": 0.64453125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9610285439484637e-06, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 8518, + "text_loss": 0.357421875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9599919448015137e-06, + "loss": 0.5353, + "regression_loss": 0.0, + "step": 8519, + "text_loss": 0.61328125 + }, + { + "epoch": 0.71, + "learning_rate": 1.958955552906612e-06, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 8520, + "text_loss": 0.62109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9579193683344157e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 8521, + "text_loss": 0.455078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.956883391155561e-06, + "loss": 0.458, + "regression_loss": 0.0, + "step": 8522, + "text_loss": 0.42578125 + }, + { + "epoch": 0.71, + "learning_rate": 1.955847621440683e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 8523, + "text_loss": 0.330078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9548120592603903e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 8524, + "text_loss": 0.58984375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9537767046852827e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 8525, + "text_loss": 0.5546875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9527415577859465e-06, + "loss": 0.4685, + "regression_loss": 0.0, + "step": 8526, + "text_loss": 0.734375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9517066186329542e-06, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 8527, + "text_loss": 0.439453125 + }, + { + "epoch": 0.71, + "learning_rate": 1.950671887296857e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 8528, + "text_loss": 0.58984375 + }, + { + "epoch": 0.71, + "learning_rate": 1.949637363848205e-06, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 8529, + "text_loss": 0.4296875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9486030483575202e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 8530, + "text_loss": 0.263671875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9475689408953204e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 8531, + "text_loss": 0.71484375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9465350415321043e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 8532, + "text_loss": 0.6953125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9455013503383603e-06, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 8533, + "text_loss": 0.73046875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9444678673845544e-06, + "loss": 0.387, + "regression_loss": 0.0, + "step": 8534, + "text_loss": 0.58203125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9434345927411517e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 8535, + "text_loss": 0.65234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9424015264785906e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 8536, + "text_loss": 0.80859375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9413686686673006e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 8537, + "text_loss": 0.6015625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9403360193776977e-06, + "loss": 0.4453, + "regression_loss": 0.0, + "step": 8538, + "text_loss": 0.275390625 + }, + { + "epoch": 0.71, + "learning_rate": 1.939303578680184e-06, + "loss": 0.4684, + "regression_loss": 0.0, + "step": 8539, + "text_loss": 0.2275390625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9382713466451403e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 8540, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9372393233429466e-06, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 8541, + "text_loss": 0.56640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.936207508843955e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 8542, + "text_loss": 0.4609375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9351759032185125e-06, + "loss": 0.4264, + "regression_loss": 0.0, + "step": 8543, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.71, + "learning_rate": 1.934144506536946e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 8544, + "text_loss": 0.78515625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9331133188695755e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 8545, + "text_loss": 0.62109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.932082340286697e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 8546, + "text_loss": 0.734375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9310515708585982e-06, + "loss": 0.3948, + "regression_loss": 0.0, + "step": 8547, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9300210106555535e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 8548, + "text_loss": 0.80078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9289906597478197e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 8549, + "text_loss": 0.55078125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9279605182056415e-06, + "loss": 0.4165, + "regression_loss": 0.0, + "step": 8550, + "text_loss": 0.51953125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9269305860992506e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 8551, + "text_loss": 0.431640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9259008634988587e-06, + "loss": 0.4347, + "regression_loss": 0.0, + "step": 8552, + "text_loss": 0.251953125 + }, + { + "epoch": 0.71, + "learning_rate": 1.924871350474668e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 8553, + "text_loss": 0.6953125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9238420470968665e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 8554, + "text_loss": 0.68359375 + }, + { + "epoch": 0.71, + "learning_rate": 1.922812953435626e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 8555, + "text_loss": 0.8203125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9217840695611056e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 8556, + "text_loss": 0.5234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9207553955434483e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 8557, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.919726931452786e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 8558, + "text_loss": 0.349609375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9186986773592293e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 8559, + "text_loss": 0.51171875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9176706333328856e-06, + "loss": 0.478, + "regression_loss": 0.0, + "step": 8560, + "text_loss": 0.31640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9166427994438363e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 8561, + "text_loss": 0.65625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9156151757621565e-06, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 8562, + "text_loss": 0.80859375 + }, + { + "epoch": 0.71, + "learning_rate": 1.914587762357903e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 8563, + "text_loss": 0.51953125 + }, + { + "epoch": 0.71, + "learning_rate": 1.913560559301122e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 8564, + "text_loss": 0.6171875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9125335666618376e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 8565, + "text_loss": 0.357421875 + }, + { + "epoch": 0.71, + "learning_rate": 1.911506784510072e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 8566, + "text_loss": 0.62890625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9104802129158205e-06, + "loss": 0.6157, + "regression_loss": 0.0, + "step": 8567, + "text_loss": 0.59375 + }, + { + "epoch": 0.71, + "learning_rate": 1.909453851949071e-06, + "loss": 0.4471, + "regression_loss": 0.0, + "step": 8568, + "text_loss": 0.546875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9084277016797954e-06, + "loss": 0.467, + "regression_loss": 0.0, + "step": 8569, + "text_loss": 0.2734375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9074017621779536e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 8570, + "text_loss": 0.56640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9063760335134852e-06, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 8571, + "text_loss": 0.373046875 + }, + { + "epoch": 0.71, + "learning_rate": 1.9053505157563201e-06, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 8572, + "text_loss": 0.5703125 + }, + { + "epoch": 0.71, + "learning_rate": 1.9043252089763741e-06, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 8573, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.9033001132435463e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 8574, + "text_loss": 0.451171875 + }, + { + "epoch": 0.71, + "learning_rate": 1.902275228627723e-06, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 8575, + "text_loss": 0.30859375 + }, + { + "epoch": 0.71, + "learning_rate": 1.9012505551987764e-06, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 8576, + "text_loss": 0.5 + }, + { + "epoch": 0.71, + "learning_rate": 1.9002260930265614e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 8577, + "text_loss": 0.71484375 + }, + { + "epoch": 0.71, + "learning_rate": 1.899201842180921e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 8578, + "text_loss": 0.490234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8981778027316845e-06, + "loss": 0.634, + "regression_loss": 0.0, + "step": 8579, + "text_loss": 0.8671875 + }, + { + "epoch": 0.71, + "learning_rate": 1.8971539747486657e-06, + "loss": 0.5199, + "regression_loss": 0.0, + "step": 8580, + "text_loss": 0.54296875 + }, + { + "epoch": 0.71, + "learning_rate": 1.8961303583016606e-06, + "loss": 0.4186, + "regression_loss": 0.0, + "step": 8581, + "text_loss": 0.412109375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8951069534604594e-06, + "loss": 0.4294, + "regression_loss": 0.0, + "step": 8582, + "text_loss": 0.322265625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8940837602948282e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 8583, + "text_loss": 0.4375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8930607788745248e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 8584, + "text_loss": 0.77734375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8920380092692902e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 8585, + "text_loss": 0.453125 + }, + { + "epoch": 0.71, + "learning_rate": 1.8910154515488543e-06, + "loss": 0.395, + "regression_loss": 0.0, + "step": 8586, + "text_loss": 0.296875 + }, + { + "epoch": 0.71, + "learning_rate": 1.8899931057829236e-06, + "loss": 0.3914, + "regression_loss": 0.0, + "step": 8587, + "text_loss": 0.4140625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8889709720412037e-06, + "loss": 0.4293, + "regression_loss": 0.0, + "step": 8588, + "text_loss": 0.447265625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8879490503933733e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 8589, + "text_loss": 0.431640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8869273409091033e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 8590, + "text_loss": 0.5625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8859058436580486e-06, + "loss": 0.3794, + "regression_loss": 0.0, + "step": 8591, + "text_loss": 0.408203125 + }, + { + "epoch": 0.71, + "learning_rate": 1.8848845587098513e-06, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 8592, + "text_loss": 0.53515625 + }, + { + "epoch": 0.71, + "learning_rate": 1.883863486134132e-06, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 8593, + "text_loss": 0.53125 + }, + { + "epoch": 0.71, + "learning_rate": 1.88284262600051e-06, + "loss": 0.541, + "regression_loss": 0.0, + "step": 8594, + "text_loss": 0.81640625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8818219783785757e-06, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 8595, + "text_loss": 0.46875 + }, + { + "epoch": 0.71, + "learning_rate": 1.8808015433379139e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 8596, + "text_loss": 0.74609375 + }, + { + "epoch": 0.71, + "learning_rate": 1.879781320948093e-06, + "loss": 0.554, + "regression_loss": 0.0, + "step": 8597, + "text_loss": 0.65234375 + }, + { + "epoch": 0.71, + "learning_rate": 1.878761311278668e-06, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 8598, + "text_loss": 0.359375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8777415143991724e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 8599, + "text_loss": 0.494140625 + }, + { + "epoch": 0.71, + "learning_rate": 1.8767219303791378e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 8600, + "text_loss": 0.5546875 + }, + { + "epoch": 0.71, + "learning_rate": 1.875702559288069e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 8601, + "text_loss": 0.46484375 + }, + { + "epoch": 0.71, + "learning_rate": 1.8746834011954629e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 8602, + "text_loss": 0.375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8736644561708012e-06, + "loss": 0.5052, + "regression_loss": 0.0, + "step": 8603, + "text_loss": 0.54296875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8726457242835495e-06, + "loss": 0.3357, + "regression_loss": 0.0, + "step": 8604, + "text_loss": 0.5625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8716272056031615e-06, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 8605, + "text_loss": 0.5703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8706089001990695e-06, + "loss": 0.4276, + "regression_loss": 0.0, + "step": 8606, + "text_loss": 0.47265625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8695908081407038e-06, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 8607, + "text_loss": 0.474609375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8685729294974668e-06, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 8608, + "text_loss": 0.51171875 + }, + { + "epoch": 0.72, + "learning_rate": 1.867555264338754e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 8609, + "text_loss": 0.68359375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8665378127339445e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 8610, + "text_loss": 0.59375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8655205747524052e-06, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 8611, + "text_loss": 0.474609375 + }, + { + "epoch": 0.72, + "learning_rate": 1.86450355046348e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 8612, + "text_loss": 0.48828125 + }, + { + "epoch": 0.72, + "learning_rate": 1.863486739936512e-06, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 8613, + "text_loss": 0.671875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8624701432408171e-06, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 8614, + "text_loss": 0.56640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8614537604457022e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 8615, + "text_loss": 0.248046875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8604375916204604e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 8616, + "text_loss": 0.357421875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8594216368343699e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 8617, + "text_loss": 0.640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8584058961566876e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 8618, + "text_loss": 0.47265625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8573903696566692e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 8619, + "text_loss": 0.396484375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8563750574035428e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 8620, + "text_loss": 0.6171875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8553599594665284e-06, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 8621, + "text_loss": 0.60546875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8543450759148302e-06, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 8622, + "text_loss": 0.6484375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8533304068176395e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 8623, + "text_loss": 0.51953125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8523159522441281e-06, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 8624, + "text_loss": 0.70703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8513017122634573e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 8625, + "text_loss": 0.72265625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8502876869447739e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 8626, + "text_loss": 0.65625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8492738763572077e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 8627, + "text_loss": 0.7890625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8482602805698758e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 8628, + "text_loss": 0.61328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.847246899651881e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 8629, + "text_loss": 0.482421875 + }, + { + "epoch": 0.72, + "learning_rate": 1.846233733672308e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 8630, + "text_loss": 0.5625 + }, + { + "epoch": 0.72, + "learning_rate": 1.84522078270023e-06, + "loss": 0.4004, + "regression_loss": 0.0, + "step": 8631, + "text_loss": 0.5625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8442080468047046e-06, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 8632, + "text_loss": 0.236328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8431955260547758e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8633, + "text_loss": 0.40625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8421832205194718e-06, + "loss": 0.4209, + "regression_loss": 0.0, + "step": 8634, + "text_loss": 0.5078125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8411711302678076e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8635, + "text_loss": 0.3828125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8401592553687785e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 8636, + "text_loss": 0.3359375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8391475958913724e-06, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 8637, + "text_loss": 0.33203125 + }, + { + "epoch": 0.72, + "learning_rate": 1.838136151904557e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 8638, + "text_loss": 0.5078125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8371249234772904e-06, + "loss": 0.3921, + "regression_loss": 0.0, + "step": 8639, + "text_loss": 0.44140625 + }, + { + "epoch": 0.72, + "learning_rate": 1.836113910678507e-06, + "loss": 0.437, + "regression_loss": 0.0, + "step": 8640, + "text_loss": 0.6171875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8351031135771396e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 8641, + "text_loss": 0.392578125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8340925322420945e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 8642, + "text_loss": 0.482421875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8330821667422683e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 8643, + "text_loss": 0.71875 + }, + { + "epoch": 0.72, + "learning_rate": 1.832072017146544e-06, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 8644, + "text_loss": 0.5234375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8310620835237886e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 8645, + "text_loss": 0.62890625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8300523659428503e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 8646, + "text_loss": 0.46484375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8290428644725726e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 8647, + "text_loss": 0.44140625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8280335791817733e-06, + "loss": 0.479, + "regression_loss": 0.0, + "step": 8648, + "text_loss": 0.435546875 + }, + { + "epoch": 0.72, + "learning_rate": 1.827024510139262e-06, + "loss": 0.4906, + "regression_loss": 0.0, + "step": 8649, + "text_loss": 0.56640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8260156574138316e-06, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 8650, + "text_loss": 0.61328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8250070210742599e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 8651, + "text_loss": 0.478515625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8239986011893118e-06, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 8652, + "text_loss": 0.369140625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8229903978277352e-06, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 8653, + "text_loss": 0.53515625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8219824110582663e-06, + "loss": 0.572, + "regression_loss": 0.0, + "step": 8654, + "text_loss": 0.8125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8209746409496204e-06, + "loss": 0.4259, + "regression_loss": 0.0, + "step": 8655, + "text_loss": 0.373046875 + }, + { + "epoch": 0.72, + "learning_rate": 1.819967087570505e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 8656, + "text_loss": 0.345703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8189597509896089e-06, + "loss": 0.4028, + "regression_loss": 0.0, + "step": 8657, + "text_loss": 0.29296875 + }, + { + "epoch": 0.72, + "learning_rate": 1.817952631275609e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 8658, + "text_loss": 0.2890625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8169457284971603e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 8659, + "text_loss": 0.53515625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8159390427229157e-06, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 8660, + "text_loss": 0.54296875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8149325740214996e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 8661, + "text_loss": 0.23828125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8139263224615306e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 8662, + "text_loss": 0.21875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8129202881116092e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 8663, + "text_loss": 0.51171875 + }, + { + "epoch": 0.72, + "learning_rate": 1.8119144710403237e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 8664, + "text_loss": 0.359375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8109088713162397e-06, + "loss": 0.4342, + "regression_loss": 0.0, + "step": 8665, + "text_loss": 0.2021484375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8099034890079215e-06, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 8666, + "text_loss": 0.45703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8088983241839048e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 8667, + "text_loss": 0.470703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8078933769127193e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 8668, + "text_loss": 0.59765625 + }, + { + "epoch": 0.72, + "learning_rate": 1.806888647262876e-06, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 8669, + "text_loss": 0.56640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.805884135302875e-06, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 8670, + "text_loss": 0.390625 + }, + { + "epoch": 0.72, + "learning_rate": 1.804879841101193e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 8671, + "text_loss": 0.62109375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8038757647263045e-06, + "loss": 0.5569, + "regression_loss": 0.0, + "step": 8672, + "text_loss": 0.55078125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8028719062466571e-06, + "loss": 0.4943, + "regression_loss": 0.0, + "step": 8673, + "text_loss": 0.328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.8018682657306903e-06, + "loss": 0.4742, + "regression_loss": 0.0, + "step": 8674, + "text_loss": 0.5625 + }, + { + "epoch": 0.72, + "learning_rate": 1.8008648432468273e-06, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 8675, + "text_loss": 0.5390625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7998616388634782e-06, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 8676, + "text_loss": 0.50390625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7988586526490303e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 8677, + "text_loss": 0.64453125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7978558846718702e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 8678, + "text_loss": 0.451171875 + }, + { + "epoch": 0.72, + "learning_rate": 1.7968533350003553e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 8679, + "text_loss": 0.625 + }, + { + "epoch": 0.72, + "learning_rate": 1.795851003702836e-06, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 8680, + "text_loss": 0.41015625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7948488908476463e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 8681, + "text_loss": 0.466796875 + }, + { + "epoch": 0.72, + "learning_rate": 1.793846996503107e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 8682, + "text_loss": 0.5 + }, + { + "epoch": 0.72, + "learning_rate": 1.792845320737518e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 8683, + "text_loss": 0.30078125 + }, + { + "epoch": 0.72, + "learning_rate": 1.791843863619171e-06, + "loss": 0.4043, + "regression_loss": 0.0, + "step": 8684, + "text_loss": 0.40234375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7908426252163396e-06, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 8685, + "text_loss": 0.33203125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7898416055972834e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 8686, + "text_loss": 0.59765625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7888408048302457e-06, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 8687, + "text_loss": 0.58203125 + }, + { + "epoch": 0.72, + "learning_rate": 1.787840222983459e-06, + "loss": 0.4384, + "regression_loss": 0.0, + "step": 8688, + "text_loss": 0.35546875 + }, + { + "epoch": 0.72, + "learning_rate": 1.786839860125134e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 8689, + "text_loss": 0.6328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.785839716323472e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 8690, + "text_loss": 0.353515625 + }, + { + "epoch": 0.72, + "learning_rate": 1.784839791646657e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 8691, + "text_loss": 0.640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7838400861628608e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 8692, + "text_loss": 0.60546875 + }, + { + "epoch": 0.72, + "learning_rate": 1.7828405999402338e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 8693, + "text_loss": 0.455078125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7818413330469213e-06, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 8694, + "text_loss": 0.462890625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7808422855510438e-06, + "loss": 0.475, + "regression_loss": 0.0, + "step": 8695, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.72, + "learning_rate": 1.7798434575207113e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 8696, + "text_loss": 0.486328125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7788448490240235e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 8697, + "text_loss": 0.322265625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7778464601290551e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 8698, + "text_loss": 0.71484375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7768482909038732e-06, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 8699, + "text_loss": 0.74609375 + }, + { + "epoch": 0.72, + "learning_rate": 1.775850341416528e-06, + "loss": 0.6003, + "regression_loss": 0.0, + "step": 8700, + "text_loss": 0.4453125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748526117350557e-06, + "loss": 0.4062, + "regression_loss": 0.0, + "step": 8701, + "text_loss": 0.23828125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738551019274714e-06, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 8702, + "text_loss": 0.453125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728578120617873e-06, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 8703, + "text_loss": 0.490234375 + }, + { + "epoch": 0.72, + "learning_rate": 1.771860742205988e-06, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 8704, + "text_loss": 0.384765625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7708638924280508e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 8705, + "text_loss": 0.39453125 + }, + { + "epoch": 0.72, + "learning_rate": 1.769867262795935e-06, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 8706, + "text_loss": 0.52734375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7688708533775878e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 8707, + "text_loss": 0.333984375 + }, + { + "epoch": 0.72, + "learning_rate": 1.767874664240936e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 8708, + "text_loss": 0.3203125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7668786954538958e-06, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 8709, + "text_loss": 0.34375 + }, + { + "epoch": 0.72, + "learning_rate": 1.765882947084368e-06, + "loss": 0.3857, + "regression_loss": 0.0, + "step": 8710, + "text_loss": 0.416015625 + }, + { + "epoch": 0.72, + "learning_rate": 1.764887419200237e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 8711, + "text_loss": 0.4140625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7638921118693724e-06, + "loss": 0.5331, + "regression_loss": 0.0, + "step": 8712, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7628970251596316e-06, + "loss": 0.4999, + "regression_loss": 0.0, + "step": 8713, + "text_loss": 0.31640625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7619021591388508e-06, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 8714, + "text_loss": 0.2421875 + }, + { + "epoch": 0.72, + "learning_rate": 1.760907513874856e-06, + "loss": 0.6096, + "regression_loss": 0.0, + "step": 8715, + "text_loss": 0.9765625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7599130894354582e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 8716, + "text_loss": 0.470703125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7589188858884527e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 8717, + "text_loss": 0.546875 + }, + { + "epoch": 0.72, + "learning_rate": 1.7579249033016144e-06, + "loss": 0.429, + "regression_loss": 0.0, + "step": 8718, + "text_loss": 0.298828125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7569311417427148e-06, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 8719, + "text_loss": 0.50390625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7559376012794981e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 8720, + "text_loss": 0.478515625 + }, + { + "epoch": 0.72, + "learning_rate": 1.7549442819797013e-06, + "loss": 0.4435, + "regression_loss": 0.0, + "step": 8721, + "text_loss": 0.341796875 + }, + { + "epoch": 0.72, + "learning_rate": 1.7539511839110423e-06, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 8722, + "text_loss": 0.51953125 + }, + { + "epoch": 0.72, + "learning_rate": 1.7529583071412282e-06, + "loss": 0.6074, + "regression_loss": 0.0, + "step": 8723, + "text_loss": 0.71875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7519656517379429e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 8724, + "text_loss": 0.6875 + }, + { + "epoch": 0.73, + "learning_rate": 1.750973217768867e-06, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 8725, + "text_loss": 0.4375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7499810053016553e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 8726, + "text_loss": 0.671875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7489890144039522e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 8727, + "text_loss": 0.404296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.747997245143387e-06, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 8728, + "text_loss": 0.416015625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7470056975875755e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 8729, + "text_loss": 0.63671875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7460143718041106e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 8730, + "text_loss": 0.55078125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7450232678605834e-06, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 8731, + "text_loss": 0.484375 + }, + { + "epoch": 0.73, + "learning_rate": 1.744032385824556e-06, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 8732, + "text_loss": 0.390625 + }, + { + "epoch": 0.73, + "learning_rate": 1.743041725763584e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 8733, + "text_loss": 0.609375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7420512877452056e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 8734, + "text_loss": 0.68359375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7410610718369453e-06, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 8735, + "text_loss": 0.416015625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7400710781063073e-06, + "loss": 0.4033, + "regression_loss": 0.0, + "step": 8736, + "text_loss": 0.419921875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7390813066207862e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 8737, + "text_loss": 0.4296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7380917574478596e-06, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 8738, + "text_loss": 0.412109375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7371024306549894e-06, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 8739, + "text_loss": 0.53515625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7361133263096236e-06, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 8740, + "text_loss": 0.71875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7351244444791948e-06, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 8741, + "text_loss": 0.40234375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7341357852311175e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 8742, + "text_loss": 0.55859375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7331473486327927e-06, + "loss": 0.553, + "regression_loss": 0.0, + "step": 8743, + "text_loss": 0.5234375 + }, + { + "epoch": 0.73, + "learning_rate": 1.732159134751612e-06, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 8744, + "text_loss": 0.21484375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7311711436549423e-06, + "loss": 0.4121, + "regression_loss": 0.0, + "step": 8745, + "text_loss": 0.76171875 + }, + { + "epoch": 0.73, + "learning_rate": 1.730183375410141e-06, + "loss": 0.3848, + "regression_loss": 0.0, + "step": 8746, + "text_loss": 0.95703125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7291958300845491e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 8747, + "text_loss": 0.47265625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7282085077454935e-06, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 8748, + "text_loss": 0.453125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7272214084602801e-06, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 8749, + "text_loss": 0.58984375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7262345322962114e-06, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 8750, + "text_loss": 0.60546875 + }, + { + "epoch": 0.73, + "learning_rate": 1.725247879320562e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 8751, + "text_loss": 0.58203125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7242614496005988e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8752, + "text_loss": 0.31640625 + }, + { + "epoch": 0.73, + "learning_rate": 1.723275243203571e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 8753, + "text_loss": 0.5703125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7222892601967145e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 8754, + "text_loss": 0.5625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7213035006472445e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 8755, + "text_loss": 0.384765625 + }, + { + "epoch": 0.73, + "learning_rate": 1.720317964622371e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 8756, + "text_loss": 0.46875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7193326521892783e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 8757, + "text_loss": 0.494140625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7183475634151408e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 8758, + "text_loss": 0.328125 + }, + { + "epoch": 0.73, + "learning_rate": 1.717362698367117e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 8759, + "text_loss": 0.404296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7163780571123523e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 8760, + "text_loss": 0.40234375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7153936397179706e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 8761, + "text_loss": 0.57421875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7144094462510851e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 8762, + "text_loss": 0.578125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7134254767787946e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 8763, + "text_loss": 0.546875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7124417313681806e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 8764, + "text_loss": 0.6875 + }, + { + "epoch": 0.73, + "learning_rate": 1.7114582100863091e-06, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 8765, + "text_loss": 0.259765625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7104749130002341e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 8766, + "text_loss": 0.287109375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7094918401769877e-06, + "loss": 0.4166, + "regression_loss": 0.0, + "step": 8767, + "text_loss": 0.2236328125 + }, + { + "epoch": 0.73, + "learning_rate": 1.7085089916835924e-06, + "loss": 0.5708, + "regression_loss": 0.0, + "step": 8768, + "text_loss": 0.6640625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7075263675870546e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 8769, + "text_loss": 0.197265625 + }, + { + "epoch": 0.73, + "learning_rate": 1.706543967954365e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 8770, + "text_loss": 0.4609375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7055617928524943e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 8771, + "text_loss": 0.3359375 + }, + { + "epoch": 0.73, + "learning_rate": 1.7045798423484083e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 8772, + "text_loss": 0.291015625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7035981165090471e-06, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 8773, + "text_loss": 0.53515625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7026166154013408e-06, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 8774, + "text_loss": 0.58203125 + }, + { + "epoch": 0.73, + "learning_rate": 1.701635339092203e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 8775, + "text_loss": 0.4765625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7006542876485337e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 8776, + "text_loss": 0.404296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6996734611372113e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 8777, + "text_loss": 0.59375 + }, + { + "epoch": 0.73, + "learning_rate": 1.69869285962511e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 8778, + "text_loss": 0.609375 + }, + { + "epoch": 0.73, + "learning_rate": 1.697712483179077e-06, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 8779, + "text_loss": 0.7265625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6967323318659513e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 8780, + "text_loss": 0.310546875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6957524057525538e-06, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 8781, + "text_loss": 0.53125 + }, + { + "epoch": 0.73, + "learning_rate": 1.6947727049056933e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 8782, + "text_loss": 0.337890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6937932293921556e-06, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 8783, + "text_loss": 0.5234375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6928139792787224e-06, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 8784, + "text_loss": 0.53515625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6918349546321494e-06, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 8785, + "text_loss": 0.5390625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6908561555191828e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 8786, + "text_loss": 0.38671875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6898775820065521e-06, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 8787, + "text_loss": 0.373046875 + }, + { + "epoch": 0.73, + "learning_rate": 1.688899234160974e-06, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 8788, + "text_loss": 0.287109375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6879211120491407e-06, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 8789, + "text_loss": 0.62890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6869432157377418e-06, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 8790, + "text_loss": 0.7109375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6859655452934443e-06, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 8791, + "text_loss": 0.482421875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6849881007828977e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 8792, + "text_loss": 0.51953125 + }, + { + "epoch": 0.73, + "learning_rate": 1.6840108822727408e-06, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 8793, + "text_loss": 0.54296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6830338898295955e-06, + "loss": 0.4584, + "regression_loss": 0.0, + "step": 8794, + "text_loss": 0.29296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.68205712352007e-06, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 8795, + "text_loss": 0.294921875 + }, + { + "epoch": 0.73, + "learning_rate": 1.681080583410749e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 8796, + "text_loss": 0.482421875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6801042695682163e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 8797, + "text_loss": 0.48046875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6791281820590256e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 8798, + "text_loss": 0.6484375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6781523209497231e-06, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 8799, + "text_loss": 0.5859375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6771766863068389e-06, + "loss": 0.416, + "regression_loss": 0.0, + "step": 8800, + "text_loss": 0.46484375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6762012781968885e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 8801, + "text_loss": 0.55078125 + }, + { + "epoch": 0.73, + "learning_rate": 1.6752260966863638e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 8802, + "text_loss": 0.369140625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6742511418417556e-06, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 8803, + "text_loss": 0.380859375 + }, + { + "epoch": 0.73, + "learning_rate": 1.673276413729526e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 8804, + "text_loss": 0.3359375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6723019124161282e-06, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 8805, + "text_loss": 0.462890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6713276379679988e-06, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 8806, + "text_loss": 0.400390625 + }, + { + "epoch": 0.73, + "learning_rate": 1.670353590451561e-06, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 8807, + "text_loss": 0.62890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6693797699332148e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 8808, + "text_loss": 0.43359375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6684061764793569e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 8809, + "text_loss": 0.48828125 + }, + { + "epoch": 0.73, + "learning_rate": 1.667432810156357e-06, + "loss": 0.532, + "regression_loss": 0.0, + "step": 8810, + "text_loss": 0.62109375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6664596710305764e-06, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 8811, + "text_loss": 0.625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6654867591683577e-06, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 8812, + "text_loss": 0.796875 + }, + { + "epoch": 0.73, + "learning_rate": 1.664514074636031e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 8813, + "text_loss": 0.404296875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6635416174999041e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 8814, + "text_loss": 0.349609375 + }, + { + "epoch": 0.73, + "learning_rate": 1.662569387826281e-06, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 8815, + "text_loss": 0.62890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6615973856814376e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 8816, + "text_loss": 0.357421875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6606256111316428e-06, + "loss": 0.4114, + "regression_loss": 0.0, + "step": 8817, + "text_loss": 0.76171875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6596540642431459e-06, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 8818, + "text_loss": 0.86328125 + }, + { + "epoch": 0.73, + "learning_rate": 1.658682745082184e-06, + "loss": 0.6128, + "regression_loss": 0.0, + "step": 8819, + "text_loss": 0.734375 + }, + { + "epoch": 0.73, + "learning_rate": 1.657711653714974e-06, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 8820, + "text_loss": 0.3671875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6567407902077203e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 8821, + "text_loss": 0.51953125 + }, + { + "epoch": 0.73, + "learning_rate": 1.655770154626613e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 8822, + "text_loss": 0.66015625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6547997470378235e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8823, + "text_loss": 0.5078125 + }, + { + "epoch": 0.73, + "learning_rate": 1.6538295675075095e-06, + "loss": 0.6101, + "regression_loss": 0.0, + "step": 8824, + "text_loss": 0.6328125 + }, + { + "epoch": 0.73, + "learning_rate": 1.6528596161018152e-06, + "loss": 0.4453, + "regression_loss": 0.0, + "step": 8825, + "text_loss": 0.435546875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6518898928868632e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 8826, + "text_loss": 0.47265625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6509203979287652e-06, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 8827, + "text_loss": 0.1728515625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6499511312936178e-06, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 8828, + "text_loss": 0.50390625 + }, + { + "epoch": 0.73, + "learning_rate": 1.648982093047501e-06, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 8829, + "text_loss": 0.59765625 + }, + { + "epoch": 0.73, + "learning_rate": 1.648013283256474e-06, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 8830, + "text_loss": 0.318359375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6470447019865925e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 8831, + "text_loss": 0.59375 + }, + { + "epoch": 0.73, + "learning_rate": 1.646076349303884e-06, + "loss": 0.4241, + "regression_loss": 0.0, + "step": 8832, + "text_loss": 0.326171875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6451082252743671e-06, + "loss": 0.4178, + "regression_loss": 0.0, + "step": 8833, + "text_loss": 0.388671875 + }, + { + "epoch": 0.73, + "learning_rate": 1.6441403299640441e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 8834, + "text_loss": 0.59765625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6431726634389022e-06, + "loss": 0.3896, + "regression_loss": 0.0, + "step": 8835, + "text_loss": 0.5 + }, + { + "epoch": 0.73, + "learning_rate": 1.642205225764908e-06, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 8836, + "text_loss": 0.5625 + }, + { + "epoch": 0.73, + "learning_rate": 1.64123801700802e-06, + "loss": 0.6084, + "regression_loss": 0.0, + "step": 8837, + "text_loss": 0.50390625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6402710372341784e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 8838, + "text_loss": 0.41015625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6393042865093033e-06, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 8839, + "text_loss": 0.56640625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6383377648993042e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 8840, + "text_loss": 0.5703125 + }, + { + "epoch": 0.73, + "learning_rate": 1.637371472470074e-06, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 8841, + "text_loss": 0.462890625 + }, + { + "epoch": 0.73, + "learning_rate": 1.6364054092874893e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 8842, + "text_loss": 0.224609375 + }, + { + "epoch": 0.73, + "learning_rate": 1.6354395754174112e-06, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 8843, + "text_loss": 0.380859375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6344739709256874e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 8844, + "text_loss": 0.470703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6335085958781439e-06, + "loss": 0.4528, + "regression_loss": 0.0, + "step": 8845, + "text_loss": 0.1923828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6325434503405967e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 8846, + "text_loss": 0.8125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6315785343788448e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 8847, + "text_loss": 0.443359375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6306138480586707e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 8848, + "text_loss": 0.54296875 + }, + { + "epoch": 0.74, + "learning_rate": 1.6296493914458427e-06, + "loss": 0.5713, + "regression_loss": 0.0, + "step": 8849, + "text_loss": 0.515625 + }, + { + "epoch": 0.74, + "learning_rate": 1.628685164606113e-06, + "loss": 0.4591, + "regression_loss": 0.0, + "step": 8850, + "text_loss": 0.490234375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6277211676052151e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 8851, + "text_loss": 0.66015625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6267574005088704e-06, + "loss": 0.4219, + "regression_loss": 0.0, + "step": 8852, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6257938633827835e-06, + "loss": 0.3916, + "regression_loss": 0.0, + "step": 8853, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6248305562926458e-06, + "loss": 0.4114, + "regression_loss": 0.0, + "step": 8854, + "text_loss": 0.283203125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6238674793041254e-06, + "loss": 0.5177, + "regression_loss": 0.0, + "step": 8855, + "text_loss": 0.423828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6229046324828857e-06, + "loss": 0.4286, + "regression_loss": 0.0, + "step": 8856, + "text_loss": 0.470703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6219420158945648e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 8857, + "text_loss": 0.62890625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6209796296047897e-06, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 8858, + "text_loss": 0.302734375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6200174736791713e-06, + "loss": 0.4414, + "regression_loss": 0.0, + "step": 8859, + "text_loss": 0.2734375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6190555481833064e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 8860, + "text_loss": 0.68359375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6180938531827688e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 8861, + "text_loss": 0.640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6171323887431284e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 8862, + "text_loss": 0.349609375 + }, + { + "epoch": 0.74, + "learning_rate": 1.616171154929928e-06, + "loss": 0.4862, + "regression_loss": 0.0, + "step": 8863, + "text_loss": 0.73828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.615210151808701e-06, + "loss": 0.4185, + "regression_loss": 0.0, + "step": 8864, + "text_loss": 0.3828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6142493794449642e-06, + "loss": 0.3672, + "regression_loss": 0.0, + "step": 8865, + "text_loss": 0.263671875 + }, + { + "epoch": 0.74, + "learning_rate": 1.6132888379042199e-06, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 8866, + "text_loss": 0.5 + }, + { + "epoch": 0.74, + "learning_rate": 1.6123285272519468e-06, + "loss": 0.5752, + "regression_loss": 0.0, + "step": 8867, + "text_loss": 0.69921875 + }, + { + "epoch": 0.74, + "learning_rate": 1.6113684475536213e-06, + "loss": 0.5023, + "regression_loss": 0.0, + "step": 8868, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.74, + "learning_rate": 1.610408598874692e-06, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 8869, + "text_loss": 0.251953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.609448981280597e-06, + "loss": 0.4584, + "regression_loss": 0.0, + "step": 8870, + "text_loss": 0.45703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.608489594836759e-06, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 8871, + "text_loss": 0.45703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.6075304396085856e-06, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 8872, + "text_loss": 0.81640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6065715156614636e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 8873, + "text_loss": 0.50390625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6056128230607693e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 8874, + "text_loss": 0.4921875 + }, + { + "epoch": 0.74, + "learning_rate": 1.6046543618718608e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 8875, + "text_loss": 0.546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.6036961321600813e-06, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 8876, + "text_loss": 0.62109375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6027381339907582e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 8877, + "text_loss": 0.484375 + }, + { + "epoch": 0.74, + "learning_rate": 1.6017803674292042e-06, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 8878, + "text_loss": 0.5390625 + }, + { + "epoch": 0.74, + "learning_rate": 1.6008228325407122e-06, + "loss": 0.4294, + "regression_loss": 0.0, + "step": 8879, + "text_loss": 0.478515625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5998655293905634e-06, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 8880, + "text_loss": 0.5390625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5989084580440212e-06, + "loss": 0.395, + "regression_loss": 0.0, + "step": 8881, + "text_loss": 0.60546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.597951618566334e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 8882, + "text_loss": 0.55078125 + }, + { + "epoch": 0.74, + "learning_rate": 1.596995011022735e-06, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 8883, + "text_loss": 0.376953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5960386354784397e-06, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 8884, + "text_loss": 0.70703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5950824919986507e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 8885, + "text_loss": 0.6640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5941265806485483e-06, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 8886, + "text_loss": 0.5 + }, + { + "epoch": 0.74, + "learning_rate": 1.5931709014933082e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 8887, + "text_loss": 0.474609375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5922154545980789e-06, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 8888, + "text_loss": 0.59375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5912602400279986e-06, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 8889, + "text_loss": 0.345703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5903052578481897e-06, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 8890, + "text_loss": 0.341796875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5893505081237598e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 8891, + "text_loss": 0.470703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5883959909197926e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 8892, + "text_loss": 0.44921875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5874417063013702e-06, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 8893, + "text_loss": 0.478515625 + }, + { + "epoch": 0.74, + "learning_rate": 1.586487654333545e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 8894, + "text_loss": 0.78125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5855338350813615e-06, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 8895, + "text_loss": 0.421875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5845802486098461e-06, + "loss": 0.6682, + "regression_loss": 0.0, + "step": 8896, + "text_loss": 0.97265625 + }, + { + "epoch": 0.74, + "learning_rate": 1.58362689498401e-06, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 8897, + "text_loss": 0.322265625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5826737742688464e-06, + "loss": 0.3804, + "regression_loss": 0.0, + "step": 8898, + "text_loss": 0.2255859375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5817208865293344e-06, + "loss": 0.4333, + "regression_loss": 0.0, + "step": 8899, + "text_loss": 0.470703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5807682318304373e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 8900, + "text_loss": 0.408203125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5798158102371026e-06, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 8901, + "text_loss": 0.7109375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5788636218142611e-06, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 8902, + "text_loss": 0.4296875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5779116666268295e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 8903, + "text_loss": 0.42578125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5769599447397044e-06, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 8904, + "text_loss": 0.3203125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5760084562177707e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 8905, + "text_loss": 0.306640625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5750572011258962e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 8906, + "text_loss": 0.5625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5741061795289341e-06, + "loss": 0.4808, + "regression_loss": 0.0, + "step": 8907, + "text_loss": 0.388671875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5731553914917152e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 8908, + "text_loss": 0.52734375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5722048370790661e-06, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 8909, + "text_loss": 0.39453125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5712545163557846e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 8910, + "text_loss": 0.76953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.570304429386662e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 8911, + "text_loss": 0.546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5693545762364693e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 8912, + "text_loss": 0.3671875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5684049569699644e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 8913, + "text_loss": 0.546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5674555716518824e-06, + "loss": 0.4952, + "regression_loss": 0.0, + "step": 8914, + "text_loss": 0.63671875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5665064203469544e-06, + "loss": 0.5092, + "regression_loss": 0.0, + "step": 8915, + "text_loss": 0.416015625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5655575031198833e-06, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 8916, + "text_loss": 0.33984375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5646088200353632e-06, + "loss": 0.4185, + "regression_loss": 0.0, + "step": 8917, + "text_loss": 0.44140625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5636603711580706e-06, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 8918, + "text_loss": 0.314453125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5627121565526675e-06, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 8919, + "text_loss": 0.357421875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5617641762837932e-06, + "loss": 0.4561, + "regression_loss": 0.0, + "step": 8920, + "text_loss": 0.30859375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5608164304160823e-06, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 8921, + "text_loss": 0.546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5598689190141426e-06, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 8922, + "text_loss": 0.298828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5589216421425723e-06, + "loss": 0.4589, + "regression_loss": 0.0, + "step": 8923, + "text_loss": 0.53515625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5579745998659523e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 8924, + "text_loss": 0.48046875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5570277922488476e-06, + "loss": 0.5345, + "regression_loss": 0.0, + "step": 8925, + "text_loss": 0.54296875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5560812193558023e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 8926, + "text_loss": 0.75390625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5551348812513555e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 8927, + "text_loss": 0.451171875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5541887780000187e-06, + "loss": 0.5376, + "regression_loss": 0.0, + "step": 8928, + "text_loss": 0.486328125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5532429096662938e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 8929, + "text_loss": 0.333984375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5522972763146653e-06, + "loss": 0.4087, + "regression_loss": 0.0, + "step": 8930, + "text_loss": 0.421875 + }, + { + "epoch": 0.74, + "learning_rate": 1.551351878009602e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 8931, + "text_loss": 0.298828125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5504067148155572e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 8932, + "text_loss": 0.353515625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5494617867969625e-06, + "loss": 0.416, + "regression_loss": 0.0, + "step": 8933, + "text_loss": 0.4765625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5485170940182447e-06, + "loss": 0.3518, + "regression_loss": 0.0, + "step": 8934, + "text_loss": 0.373046875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5475726365438043e-06, + "loss": 0.4341, + "regression_loss": 0.0, + "step": 8935, + "text_loss": 0.279296875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5466284144380294e-06, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 8936, + "text_loss": 0.49609375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5456844277652932e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 8937, + "text_loss": 0.259765625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5447406765899535e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 8938, + "text_loss": 0.55078125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5437971609763452e-06, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 8939, + "text_loss": 0.52734375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5428538809887988e-06, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 8940, + "text_loss": 0.40234375 + }, + { + "epoch": 0.74, + "learning_rate": 1.541910836691618e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 8941, + "text_loss": 0.51953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5409680281490952e-06, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 8942, + "text_loss": 0.1953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5400254554255072e-06, + "loss": 0.475, + "regression_loss": 0.0, + "step": 8943, + "text_loss": 0.396484375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5390831185851147e-06, + "loss": 0.5811, + "regression_loss": 0.0, + "step": 8944, + "text_loss": 0.4609375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5381410176921574e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 8945, + "text_loss": 0.35546875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5371991528108682e-06, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 8946, + "text_loss": 0.328125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5362575240054546e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 8947, + "text_loss": 0.4609375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5353161313401126e-06, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 8948, + "text_loss": 0.703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5343749748790226e-06, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 8949, + "text_loss": 0.2734375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5334340546863492e-06, + "loss": 0.5632, + "regression_loss": 0.0, + "step": 8950, + "text_loss": 0.44140625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5324933708262358e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 8951, + "text_loss": 0.92578125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5315529233628152e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 8952, + "text_loss": 0.369140625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5306127123602022e-06, + "loss": 0.488, + "regression_loss": 0.0, + "step": 8953, + "text_loss": 0.51953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5296727378824955e-06, + "loss": 0.6414, + "regression_loss": 0.0, + "step": 8954, + "text_loss": 0.4375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5287329999937783e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 8955, + "text_loss": 0.37890625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5277934987581177e-06, + "loss": 0.5746, + "regression_loss": 0.0, + "step": 8956, + "text_loss": 0.55859375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5268542342395614e-06, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 8957, + "text_loss": 0.470703125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5259152065021455e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 8958, + "text_loss": 0.5234375 + }, + { + "epoch": 0.74, + "learning_rate": 1.5249764156098867e-06, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 8959, + "text_loss": 0.51953125 + }, + { + "epoch": 0.74, + "learning_rate": 1.5240378616267887e-06, + "loss": 0.4457, + "regression_loss": 0.0, + "step": 8960, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5230995446168362e-06, + "loss": 0.4097, + "regression_loss": 0.0, + "step": 8961, + "text_loss": 0.26171875 + }, + { + "epoch": 0.74, + "learning_rate": 1.5221614646440008e-06, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 8962, + "text_loss": 0.37890625 + }, + { + "epoch": 0.74, + "learning_rate": 1.5212236217722325e-06, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 8963, + "text_loss": 0.5859375 + }, + { + "epoch": 0.75, + "learning_rate": 1.5202860160654703e-06, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 8964, + "text_loss": 0.8125 + }, + { + "epoch": 0.75, + "learning_rate": 1.5193486475876352e-06, + "loss": 0.4237, + "regression_loss": 0.0, + "step": 8965, + "text_loss": 0.50390625 + }, + { + "epoch": 0.75, + "learning_rate": 1.5184115164026341e-06, + "loss": 0.4904, + "regression_loss": 0.0, + "step": 8966, + "text_loss": 0.37109375 + }, + { + "epoch": 0.75, + "learning_rate": 1.5174746225743503e-06, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 8967, + "text_loss": 0.4296875 + }, + { + "epoch": 0.75, + "learning_rate": 1.5165379661666636e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 8968, + "text_loss": 0.5234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.515601547243425e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 8969, + "text_loss": 0.625 + }, + { + "epoch": 0.75, + "learning_rate": 1.5146653658684761e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 8970, + "text_loss": 0.53125 + }, + { + "epoch": 0.75, + "learning_rate": 1.513729422105642e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 8971, + "text_loss": 0.451171875 + }, + { + "epoch": 0.75, + "learning_rate": 1.512793716018731e-06, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 8972, + "text_loss": 0.455078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.5118582476715304e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 8973, + "text_loss": 0.68359375 + }, + { + "epoch": 0.75, + "learning_rate": 1.5109230171278221e-06, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 8974, + "text_loss": 0.291015625 + }, + { + "epoch": 0.75, + "learning_rate": 1.5099880244513598e-06, + "loss": 0.4777, + "regression_loss": 0.0, + "step": 8975, + "text_loss": 0.734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.509053269705889e-06, + "loss": 0.475, + "regression_loss": 0.0, + "step": 8976, + "text_loss": 0.4921875 + }, + { + "epoch": 0.75, + "learning_rate": 1.508118752955136e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 8977, + "text_loss": 0.302734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.5071844742628112e-06, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 8978, + "text_loss": 0.55078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.5062504336926093e-06, + "loss": 0.489, + "regression_loss": 0.0, + "step": 8979, + "text_loss": 0.6796875 + }, + { + "epoch": 0.75, + "learning_rate": 1.5053166313082079e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 8980, + "text_loss": 0.349609375 + }, + { + "epoch": 0.75, + "learning_rate": 1.5043830671732706e-06, + "loss": 0.3875, + "regression_loss": 0.0, + "step": 8981, + "text_loss": 0.400390625 + }, + { + "epoch": 0.75, + "learning_rate": 1.5034497413514399e-06, + "loss": 0.5771, + "regression_loss": 0.0, + "step": 8982, + "text_loss": 0.67578125 + }, + { + "epoch": 0.75, + "learning_rate": 1.5025166539063468e-06, + "loss": 0.6238, + "regression_loss": 0.0, + "step": 8983, + "text_loss": 0.53125 + }, + { + "epoch": 0.75, + "learning_rate": 1.501583804901604e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 8984, + "text_loss": 0.40625 + }, + { + "epoch": 0.75, + "learning_rate": 1.5006511944008106e-06, + "loss": 0.6626, + "regression_loss": 0.0, + "step": 8985, + "text_loss": 0.7734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4997188224675413e-06, + "loss": 0.4224, + "regression_loss": 0.0, + "step": 8986, + "text_loss": 0.44921875 + }, + { + "epoch": 0.75, + "learning_rate": 1.498786689165368e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 8987, + "text_loss": 0.3671875 + }, + { + "epoch": 0.75, + "learning_rate": 1.497854794557833e-06, + "loss": 0.552, + "regression_loss": 0.0, + "step": 8988, + "text_loss": 0.6796875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4969231387084703e-06, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 8989, + "text_loss": 0.482421875 + }, + { + "epoch": 0.75, + "learning_rate": 1.495991721680795e-06, + "loss": 0.491, + "regression_loss": 0.0, + "step": 8990, + "text_loss": 0.62890625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4950605435383074e-06, + "loss": 0.4346, + "regression_loss": 0.0, + "step": 8991, + "text_loss": 0.322265625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4941296043444869e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 8992, + "text_loss": 0.55859375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4931989041628059e-06, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 8993, + "text_loss": 0.37109375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4922684430567098e-06, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 8994, + "text_loss": 0.2265625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4913382210896344e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 8995, + "text_loss": 0.470703125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4904082383249974e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 8996, + "text_loss": 0.52734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4894784948262015e-06, + "loss": 0.5854, + "regression_loss": 0.0, + "step": 8997, + "text_loss": 0.494140625 + }, + { + "epoch": 0.75, + "learning_rate": 1.488548990656627e-06, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 8998, + "text_loss": 0.60546875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4876197258796499e-06, + "loss": 0.5157, + "regression_loss": 0.0, + "step": 8999, + "text_loss": 0.5234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4866907005586172e-06, + "loss": 0.478, + "regression_loss": 0.0, + "step": 9000, + "text_loss": 0.396484375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4857619147568669e-06, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 9001, + "text_loss": 0.859375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4848333685377186e-06, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 9002, + "text_loss": 0.50390625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4839050619644774e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 9003, + "text_loss": 0.55078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4829769951004258e-06, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 9004, + "text_loss": 0.3515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4820491680088411e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 9005, + "text_loss": 0.46484375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4811215807529727e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 9006, + "text_loss": 0.7890625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4801942333960605e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 9007, + "text_loss": 0.546875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4792671260013258e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 9008, + "text_loss": 0.6640625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4783402586319762e-06, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 9009, + "text_loss": 0.359375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4774136313511977e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 9010, + "text_loss": 0.490234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4764872442221639e-06, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 9011, + "text_loss": 0.5625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4755610973080309e-06, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 9012, + "text_loss": 0.52734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4746351906719398e-06, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 9013, + "text_loss": 0.490234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4737095243770127e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 9014, + "text_loss": 0.3671875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4727840984863596e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 9015, + "text_loss": 0.478515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4718589130630673e-06, + "loss": 0.5341, + "regression_loss": 0.0, + "step": 9016, + "text_loss": 0.498046875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4709339681702116e-06, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 9017, + "text_loss": 0.59375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4700092638708512e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 9018, + "text_loss": 0.3515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4690848002280289e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 9019, + "text_loss": 0.494140625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4681605773047652e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 9020, + "text_loss": 0.3984375 + }, + { + "epoch": 0.75, + "learning_rate": 1.467236595164075e-06, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 9021, + "text_loss": 0.46484375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4663128538689459e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 9022, + "text_loss": 0.734375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4653893534823554e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 9023, + "text_loss": 0.5625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.4326, + "regression_loss": 0.0, + "step": 9024, + "text_loss": 0.5625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4635430756866114e-06, + "loss": 0.4294, + "regression_loss": 0.0, + "step": 9025, + "text_loss": 0.37890625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4626202984033282e-06, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 9026, + "text_loss": 0.5078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.461697762280323e-06, + "loss": 0.4187, + "regression_loss": 0.0, + "step": 9027, + "text_loss": 0.423828125 + }, + { + "epoch": 0.75, + "learning_rate": 1.460775467380491e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 9028, + "text_loss": 0.498046875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4598534137667042e-06, + "loss": 0.5234, + "regression_loss": 0.0, + "step": 9029, + "text_loss": 0.419921875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4589316015018311e-06, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 9030, + "text_loss": 0.58203125 + }, + { + "epoch": 0.75, + "learning_rate": 1.458010030648711e-06, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 9031, + "text_loss": 0.208984375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4570887012701724e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 9032, + "text_loss": 0.287109375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4561676134290282e-06, + "loss": 0.4473, + "regression_loss": 0.0, + "step": 9033, + "text_loss": 0.34765625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4552467671880737e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 9034, + "text_loss": 0.65234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4543261626100852e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 9035, + "text_loss": 0.44140625 + }, + { + "epoch": 0.75, + "learning_rate": 1.453405799757826e-06, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 9036, + "text_loss": 0.384765625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4524856786940423e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 9037, + "text_loss": 0.50390625 + }, + { + "epoch": 0.75, + "learning_rate": 1.451565799481462e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 9038, + "text_loss": 0.65625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4506461621827983e-06, + "loss": 0.4135, + "regression_loss": 0.0, + "step": 9039, + "text_loss": 0.470703125 + }, + { + "epoch": 0.75, + "learning_rate": 1.449726766860749e-06, + "loss": 0.51, + "regression_loss": 0.0, + "step": 9040, + "text_loss": 0.41015625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4488076135779905e-06, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 9041, + "text_loss": 0.55078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4478887023971878e-06, + "loss": 0.55, + "regression_loss": 0.0, + "step": 9042, + "text_loss": 0.5390625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4469700333809866e-06, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 9043, + "text_loss": 0.3125 + }, + { + "epoch": 0.75, + "learning_rate": 1.44605160659202e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 9044, + "text_loss": 0.515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4451334220928952e-06, + "loss": 0.3865, + "regression_loss": 0.0, + "step": 9045, + "text_loss": 0.421875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4442154799462171e-06, + "loss": 0.439, + "regression_loss": 0.0, + "step": 9046, + "text_loss": 0.478515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4432977802145609e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 9047, + "text_loss": 0.625 + }, + { + "epoch": 0.75, + "learning_rate": 1.442380322960492e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 9048, + "text_loss": 0.55078125 + }, + { + "epoch": 0.75, + "learning_rate": 1.441463108246558e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 9049, + "text_loss": 0.69921875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4405461361352918e-06, + "loss": 0.4127, + "regression_loss": 0.0, + "step": 9050, + "text_loss": 0.4140625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4396294066892024e-06, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 9051, + "text_loss": 0.51171875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4387129199707945e-06, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 9052, + "text_loss": 0.337890625 + }, + { + "epoch": 0.75, + "learning_rate": 1.437796676042545e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 9053, + "text_loss": 0.28125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4368806749669196e-06, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 9054, + "text_loss": 0.6796875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4359649168063666e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 9055, + "text_loss": 0.5 + }, + { + "epoch": 0.75, + "learning_rate": 1.4350494016233197e-06, + "loss": 0.382, + "regression_loss": 0.0, + "step": 9056, + "text_loss": 0.65625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4341341294801887e-06, + "loss": 0.4508, + "regression_loss": 0.0, + "step": 9057, + "text_loss": 0.421875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4332191004393792e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 9058, + "text_loss": 0.515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4323043145632676e-06, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 9059, + "text_loss": 0.353515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4313897719142211e-06, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 9060, + "text_loss": 0.490234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4304754725545883e-06, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 9061, + "text_loss": 0.41015625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4295614165467037e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 9062, + "text_loss": 0.31640625 + }, + { + "epoch": 0.75, + "learning_rate": 1.428647603952878e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 9063, + "text_loss": 0.353515625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4277340348354158e-06, + "loss": 0.5084, + "regression_loss": 0.0, + "step": 9064, + "text_loss": 0.263671875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4268207092565956e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 9065, + "text_loss": 0.318359375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4259076272786842e-06, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 9066, + "text_loss": 0.447265625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4249947889639316e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 9067, + "text_loss": 0.44140625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4240821943745714e-06, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 9068, + "text_loss": 0.431640625 + }, + { + "epoch": 0.75, + "learning_rate": 1.423169843572816e-06, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 9069, + "text_loss": 0.5859375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4222577366208672e-06, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 9070, + "text_loss": 0.65234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4213458735809072e-06, + "loss": 0.5496, + "regression_loss": 0.0, + "step": 9071, + "text_loss": 0.6640625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4204342545151023e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 9072, + "text_loss": 0.40234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4195228794856025e-06, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 9073, + "text_loss": 0.59765625 + }, + { + "epoch": 0.75, + "learning_rate": 1.4186117485545397e-06, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 9074, + "text_loss": 0.58203125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4177008617840322e-06, + "loss": 0.4899, + "regression_loss": 0.0, + "step": 9075, + "text_loss": 0.5 + }, + { + "epoch": 0.75, + "learning_rate": 1.4167902192361743e-06, + "loss": 0.4491, + "regression_loss": 0.0, + "step": 9076, + "text_loss": 0.435546875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4158798209730561e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 9077, + "text_loss": 0.39453125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4149696670567387e-06, + "loss": 0.4067, + "regression_loss": 0.0, + "step": 9078, + "text_loss": 0.423828125 + }, + { + "epoch": 0.75, + "learning_rate": 1.414059757549273e-06, + "loss": 0.5863, + "regression_loss": 0.0, + "step": 9079, + "text_loss": 0.5703125 + }, + { + "epoch": 0.75, + "learning_rate": 1.4131500925126924e-06, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 9080, + "text_loss": 0.63671875 + }, + { + "epoch": 0.75, + "learning_rate": 1.4122406720090143e-06, + "loss": 0.636, + "regression_loss": 0.0, + "step": 9081, + "text_loss": 0.69921875 + }, + { + "epoch": 0.75, + "learning_rate": 1.411331496100234e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 9082, + "text_loss": 0.62109375 + }, + { + "epoch": 0.75, + "learning_rate": 1.41042256484834e-06, + "loss": 0.5106, + "regression_loss": 0.0, + "step": 9083, + "text_loss": 0.490234375 + }, + { + "epoch": 0.75, + "learning_rate": 1.4095138783152944e-06, + "loss": 0.4116, + "regression_loss": 0.0, + "step": 9084, + "text_loss": 0.470703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.4086054365630486e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 9085, + "text_loss": 0.65625 + }, + { + "epoch": 0.76, + "learning_rate": 1.407697239653535e-06, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 9086, + "text_loss": 0.69140625 + }, + { + "epoch": 0.76, + "learning_rate": 1.4067892876486716e-06, + "loss": 0.4701, + "regression_loss": 0.0, + "step": 9087, + "text_loss": 0.388671875 + }, + { + "epoch": 0.76, + "learning_rate": 1.4058815806103542e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 9088, + "text_loss": 0.52734375 + }, + { + "epoch": 0.76, + "learning_rate": 1.4049741186004678e-06, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 9089, + "text_loss": 0.546875 + }, + { + "epoch": 0.76, + "learning_rate": 1.4040669016808778e-06, + "loss": 0.6372, + "regression_loss": 0.0, + "step": 9090, + "text_loss": 0.54296875 + }, + { + "epoch": 0.76, + "learning_rate": 1.4031599299134342e-06, + "loss": 0.4158, + "regression_loss": 0.0, + "step": 9091, + "text_loss": 0.61328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.4022532033599696e-06, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 9092, + "text_loss": 0.32421875 + }, + { + "epoch": 0.76, + "learning_rate": 1.4013467220823012e-06, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 9093, + "text_loss": 0.34765625 + }, + { + "epoch": 0.76, + "learning_rate": 1.400440486142225e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 9094, + "text_loss": 0.298828125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3995344956015255e-06, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 9095, + "text_loss": 0.50390625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3986287505219681e-06, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 9096, + "text_loss": 0.4296875 + }, + { + "epoch": 0.76, + "learning_rate": 1.397723250965302e-06, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 9097, + "text_loss": 0.53515625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3968179969932594e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 9098, + "text_loss": 0.66796875 + }, + { + "epoch": 0.76, + "learning_rate": 1.395912988667557e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 9099, + "text_loss": 0.68359375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3950082260498915e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 9100, + "text_loss": 0.353515625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3941037092019455e-06, + "loss": 0.6653, + "regression_loss": 0.0, + "step": 9101, + "text_loss": 0.5 + }, + { + "epoch": 0.76, + "learning_rate": 1.3931994381853847e-06, + "loss": 0.4449, + "regression_loss": 0.0, + "step": 9102, + "text_loss": 0.69140625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3922954130618594e-06, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 9103, + "text_loss": 0.66796875 + }, + { + "epoch": 0.76, + "learning_rate": 1.391391633892996e-06, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 9104, + "text_loss": 0.6328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3904881007404164e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 9105, + "text_loss": 0.65234375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3895848136657142e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9106, + "text_loss": 0.625 + }, + { + "epoch": 0.76, + "learning_rate": 1.388681772730472e-06, + "loss": 0.4067, + "regression_loss": 0.0, + "step": 9107, + "text_loss": 0.40625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3877789779962543e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 9108, + "text_loss": 0.412109375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3868764295246113e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 9109, + "text_loss": 0.7421875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3859741273770684e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 9110, + "text_loss": 0.5703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3850720716151472e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 9111, + "text_loss": 0.30078125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3841702623003394e-06, + "loss": 0.4393, + "regression_loss": 0.0, + "step": 9112, + "text_loss": 0.62109375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3832686994941285e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 9113, + "text_loss": 0.70703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3823673832579777e-06, + "loss": 0.5734, + "regression_loss": 0.0, + "step": 9114, + "text_loss": 0.703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3814663136533353e-06, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 9115, + "text_loss": 0.5546875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3805654907416277e-06, + "loss": 0.4657, + "regression_loss": 0.0, + "step": 9116, + "text_loss": 0.42578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.379664914584274e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 9117, + "text_loss": 0.31640625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3787645852426663e-06, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 9118, + "text_loss": 0.56640625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3778645027781863e-06, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 9119, + "text_loss": 0.41796875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3769646672521964e-06, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 9120, + "text_loss": 0.404296875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3760650787260428e-06, + "loss": 0.4347, + "regression_loss": 0.0, + "step": 9121, + "text_loss": 0.4375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3751657372610567e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 9122, + "text_loss": 0.58984375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3742666429185465e-06, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 9123, + "text_loss": 0.5859375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3733677957598123e-06, + "loss": 0.4502, + "regression_loss": 0.0, + "step": 9124, + "text_loss": 0.6328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.37246919584613e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 9125, + "text_loss": 0.384765625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3715708432387614e-06, + "loss": 0.3987, + "regression_loss": 0.0, + "step": 9126, + "text_loss": 0.349609375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3706727379989531e-06, + "loss": 0.5624, + "regression_loss": 0.0, + "step": 9127, + "text_loss": 0.41796875 + }, + { + "epoch": 0.76, + "learning_rate": 1.369774880187934e-06, + "loss": 0.4686, + "regression_loss": 0.0, + "step": 9128, + "text_loss": 0.435546875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3688772698669106e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 9129, + "text_loss": 0.578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3679799070970839e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 9130, + "text_loss": 0.302734375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3670827919396274e-06, + "loss": 0.4967, + "regression_loss": 0.0, + "step": 9131, + "text_loss": 0.34375 + }, + { + "epoch": 0.76, + "learning_rate": 1.366185924455702e-06, + "loss": 0.4614, + "regression_loss": 0.0, + "step": 9132, + "text_loss": 0.412109375 + }, + { + "epoch": 0.76, + "learning_rate": 1.365289304706452e-06, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 9133, + "text_loss": 0.43359375 + }, + { + "epoch": 0.76, + "learning_rate": 1.364392932753006e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 9134, + "text_loss": 0.49609375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3634968086564698e-06, + "loss": 0.563, + "regression_loss": 0.0, + "step": 9135, + "text_loss": 0.390625 + }, + { + "epoch": 0.76, + "learning_rate": 1.362600932477942e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 9136, + "text_loss": 0.408203125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3617053042784945e-06, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 9137, + "text_loss": 0.42578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.360809924119188e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 9138, + "text_loss": 0.578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3599147920610651e-06, + "loss": 0.575, + "regression_loss": 0.0, + "step": 9139, + "text_loss": 0.6171875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3590199081651522e-06, + "loss": 0.5857, + "regression_loss": 0.0, + "step": 9140, + "text_loss": 0.427734375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3581252724924543e-06, + "loss": 0.45, + "regression_loss": 0.0, + "step": 9141, + "text_loss": 0.64453125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3572308851039684e-06, + "loss": 0.511, + "regression_loss": 0.0, + "step": 9142, + "text_loss": 0.375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3563367460606653e-06, + "loss": 0.5876, + "regression_loss": 0.0, + "step": 9143, + "text_loss": 0.37890625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3554428554235032e-06, + "loss": 0.4215, + "regression_loss": 0.0, + "step": 9144, + "text_loss": 0.369140625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3545492132534238e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 9145, + "text_loss": 0.33203125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3536558196113526e-06, + "loss": 0.5862, + "regression_loss": 0.0, + "step": 9146, + "text_loss": 0.7578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.352762674558193e-06, + "loss": 0.54, + "regression_loss": 0.0, + "step": 9147, + "text_loss": 0.51953125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3518697781548368e-06, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 9148, + "text_loss": 0.69140625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3509771304621572e-06, + "loss": 0.464, + "regression_loss": 0.0, + "step": 9149, + "text_loss": 0.263671875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3500847315410099e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 9150, + "text_loss": 0.470703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3491925814522345e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 9151, + "text_loss": 0.5234375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3483006802566546e-06, + "loss": 0.573, + "regression_loss": 0.0, + "step": 9152, + "text_loss": 0.388671875 + }, + { + "epoch": 0.76, + "learning_rate": 1.347409028015072e-06, + "loss": 0.5038, + "regression_loss": 0.0, + "step": 9153, + "text_loss": 0.53515625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3465176247882772e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 9154, + "text_loss": 0.376953125 + }, + { + "epoch": 0.76, + "learning_rate": 1.345626470637041e-06, + "loss": 0.4143, + "regression_loss": 0.0, + "step": 9155, + "text_loss": 0.19140625 + }, + { + "epoch": 0.76, + "learning_rate": 1.344735565622119e-06, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 9156, + "text_loss": 0.578125 + }, + { + "epoch": 0.76, + "learning_rate": 1.343844909804244e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 9157, + "text_loss": 0.36328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3429545032441428e-06, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 9158, + "text_loss": 0.3125 + }, + { + "epoch": 0.76, + "learning_rate": 1.342064346002514e-06, + "loss": 0.4087, + "regression_loss": 0.0, + "step": 9159, + "text_loss": 0.482421875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3411744381400455e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 9160, + "text_loss": 0.38671875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3402847797174056e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 9161, + "text_loss": 0.45703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.33939537079525e-06, + "loss": 0.53, + "regression_loss": 0.0, + "step": 9162, + "text_loss": 0.34765625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3385062114342085e-06, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 9163, + "text_loss": 0.53515625 + }, + { + "epoch": 0.76, + "learning_rate": 1.337617301694905e-06, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 9164, + "text_loss": 0.70703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3367286416379366e-06, + "loss": 0.3845, + "regression_loss": 0.0, + "step": 9165, + "text_loss": 0.3046875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3358402313238894e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 9166, + "text_loss": 0.62890625 + }, + { + "epoch": 0.76, + "learning_rate": 1.33495207081333e-06, + "loss": 0.3962, + "regression_loss": 0.0, + "step": 9167, + "text_loss": 0.35546875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3340641601668097e-06, + "loss": 0.4226, + "regression_loss": 0.0, + "step": 9168, + "text_loss": 0.50390625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3331764994448604e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 9169, + "text_loss": 0.333984375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3322890887079986e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 9170, + "text_loss": 0.72265625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3314019280167256e-06, + "loss": 0.4657, + "regression_loss": 0.0, + "step": 9171, + "text_loss": 0.6328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3305150174315196e-06, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 9172, + "text_loss": 0.69921875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3296283570128472e-06, + "loss": 0.5148, + "regression_loss": 0.0, + "step": 9173, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3287419468211565e-06, + "loss": 0.4353, + "regression_loss": 0.0, + "step": 9174, + "text_loss": 0.36328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3278557869168784e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 9175, + "text_loss": 0.70703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3269698773604268e-06, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 9176, + "text_loss": 0.41015625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3260842182121998e-06, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 9177, + "text_loss": 0.55859375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3251988095325735e-06, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 9178, + "text_loss": 0.291015625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3243136513819132e-06, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 9179, + "text_loss": 0.484375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3234287438205635e-06, + "loss": 0.4404, + "regression_loss": 0.0, + "step": 9180, + "text_loss": 0.7890625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3225440869088546e-06, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 9181, + "text_loss": 0.59375 + }, + { + "epoch": 0.76, + "learning_rate": 1.321659680707093e-06, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 9182, + "text_loss": 0.5 + }, + { + "epoch": 0.76, + "learning_rate": 1.32077552527558e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 9183, + "text_loss": 0.39453125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3198916206745871e-06, + "loss": 0.449, + "regression_loss": 0.0, + "step": 9184, + "text_loss": 0.470703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3190079669643763e-06, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 9185, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3181245642051904e-06, + "loss": 0.3792, + "regression_loss": 0.0, + "step": 9186, + "text_loss": 0.6328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.317241412457257e-06, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 9187, + "text_loss": 0.447265625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3163585117807797e-06, + "loss": 0.5205, + "regression_loss": 0.0, + "step": 9188, + "text_loss": 0.3828125 + }, + { + "epoch": 0.76, + "learning_rate": 1.315475862235957e-06, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 9189, + "text_loss": 0.6875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3145934638829588e-06, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 9190, + "text_loss": 0.419921875 + }, + { + "epoch": 0.76, + "learning_rate": 1.3137113167819431e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 9191, + "text_loss": 0.4765625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3128294209930508e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 9192, + "text_loss": 0.271484375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3119477765764065e-06, + "loss": 0.5806, + "regression_loss": 0.0, + "step": 9193, + "text_loss": 0.7109375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3110663835921118e-06, + "loss": 0.6831, + "regression_loss": 0.0, + "step": 9194, + "text_loss": 0.61328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3101852421002603e-06, + "loss": 0.4272, + "regression_loss": 0.0, + "step": 9195, + "text_loss": 0.306640625 + }, + { + "epoch": 0.76, + "learning_rate": 1.309304352160921e-06, + "loss": 0.5275, + "regression_loss": 0.0, + "step": 9196, + "text_loss": 0.48828125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3084237138341487e-06, + "loss": 0.4434, + "regression_loss": 0.0, + "step": 9197, + "text_loss": 0.6328125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3075433271799815e-06, + "loss": 0.6013, + "regression_loss": 0.0, + "step": 9198, + "text_loss": 0.58203125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3066631922584406e-06, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 9199, + "text_loss": 0.47265625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3057833091295257e-06, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 9200, + "text_loss": 0.51953125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3049036778532253e-06, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 9201, + "text_loss": 0.5703125 + }, + { + "epoch": 0.76, + "learning_rate": 1.3040242984895069e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 9202, + "text_loss": 0.33984375 + }, + { + "epoch": 0.76, + "learning_rate": 1.3031451710983233e-06, + "loss": 0.5352, + "regression_loss": 0.0, + "step": 9203, + "text_loss": 0.50390625 + }, + { + "epoch": 0.76, + "learning_rate": 1.3022662957396076e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 9204, + "text_loss": 0.4140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.3013876724732793e-06, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 9205, + "text_loss": 0.451171875 + }, + { + "epoch": 0.77, + "learning_rate": 1.3005093013592356e-06, + "loss": 0.481, + "regression_loss": 0.0, + "step": 9206, + "text_loss": 0.65625 + }, + { + "epoch": 0.77, + "learning_rate": 1.29963118245736e-06, + "loss": 0.5969, + "regression_loss": 0.0, + "step": 9207, + "text_loss": 0.39453125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2987533158275185e-06, + "loss": 0.6021, + "regression_loss": 0.0, + "step": 9208, + "text_loss": 0.54296875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2978757015295596e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 9209, + "text_loss": 0.365234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2969983396233144e-06, + "loss": 0.5341, + "regression_loss": 0.0, + "step": 9210, + "text_loss": 0.2099609375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2961212301685982e-06, + "loss": 0.3853, + "regression_loss": 0.0, + "step": 9211, + "text_loss": 0.203125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2952443732252058e-06, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 9212, + "text_loss": 0.51171875 + }, + { + "epoch": 0.77, + "learning_rate": 1.294367768852915e-06, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 9213, + "text_loss": 0.51953125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2934914171114942e-06, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 9214, + "text_loss": 0.47265625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2926153180606838e-06, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 9215, + "text_loss": 0.494140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2917394717602123e-06, + "loss": 0.3994, + "regression_loss": 0.0, + "step": 9216, + "text_loss": 0.4140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2908638782697913e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 9217, + "text_loss": 0.337890625 + }, + { + "epoch": 0.77, + "learning_rate": 1.289988537649115e-06, + "loss": 0.5323, + "regression_loss": 0.0, + "step": 9218, + "text_loss": 0.66015625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2891134499578562e-06, + "loss": 0.4229, + "regression_loss": 0.0, + "step": 9219, + "text_loss": 0.23046875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2882386152556786e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 9220, + "text_loss": 0.49609375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2873640336022203e-06, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 9221, + "text_loss": 0.58984375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2864897050571069e-06, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 9222, + "text_loss": 0.361328125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2856156296799454e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 9223, + "text_loss": 0.3359375 + }, + { + "epoch": 0.77, + "learning_rate": 1.284741807530328e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 9224, + "text_loss": 0.5625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2838682386678235e-06, + "loss": 0.4728, + "regression_loss": 0.0, + "step": 9225, + "text_loss": 0.2119140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2829949231519894e-06, + "loss": 0.5859, + "regression_loss": 0.0, + "step": 9226, + "text_loss": 0.6015625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2821218610423635e-06, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 9227, + "text_loss": 0.388671875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2812490523984671e-06, + "loss": 0.531, + "regression_loss": 0.0, + "step": 9228, + "text_loss": 0.6640625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2803764972798034e-06, + "loss": 0.509, + "regression_loss": 0.0, + "step": 9229, + "text_loss": 0.435546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2795041957458598e-06, + "loss": 0.3716, + "regression_loss": 0.0, + "step": 9230, + "text_loss": 0.203125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2786321478561031e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 9231, + "text_loss": 0.490234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2777603536699867e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 9232, + "text_loss": 0.33984375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2768888132469442e-06, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 9233, + "text_loss": 0.326171875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2760175266463941e-06, + "loss": 0.4266, + "regression_loss": 0.0, + "step": 9234, + "text_loss": 0.7421875 + }, + { + "epoch": 0.77, + "learning_rate": 1.275146493927733e-06, + "loss": 0.4611, + "regression_loss": 0.0, + "step": 9235, + "text_loss": 0.455078125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2742757151503483e-06, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 9236, + "text_loss": 0.5390625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2734051903736005e-06, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 9237, + "text_loss": 0.494140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2725349196568398e-06, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 9238, + "text_loss": 0.455078125 + }, + { + "epoch": 0.77, + "learning_rate": 1.271664903059397e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9239, + "text_loss": 0.73828125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2707951406405855e-06, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 9240, + "text_loss": 0.6953125 + }, + { + "epoch": 0.77, + "learning_rate": 1.269925632459698e-06, + "loss": 0.4913, + "regression_loss": 0.0, + "step": 9241, + "text_loss": 0.2158203125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2690563785760184e-06, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 9242, + "text_loss": 0.451171875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2681873790488035e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 9243, + "text_loss": 0.28515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2673186339372994e-06, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 9244, + "text_loss": 0.58203125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2664501433007314e-06, + "loss": 0.4191, + "regression_loss": 0.0, + "step": 9245, + "text_loss": 0.5078125 + }, + { + "epoch": 0.77, + "learning_rate": 1.265581907198311e-06, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 9246, + "text_loss": 0.56640625 + }, + { + "epoch": 0.77, + "learning_rate": 1.264713925689226e-06, + "loss": 0.4908, + "regression_loss": 0.0, + "step": 9247, + "text_loss": 0.70703125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2638461988326556e-06, + "loss": 0.426, + "regression_loss": 0.0, + "step": 9248, + "text_loss": 0.27734375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2629787266877536e-06, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 9249, + "text_loss": 0.41015625 + }, + { + "epoch": 0.77, + "learning_rate": 1.262111509313661e-06, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 9250, + "text_loss": 0.384765625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2612445467694995e-06, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 9251, + "text_loss": 0.515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2603778391143761e-06, + "loss": 0.4314, + "regression_loss": 0.0, + "step": 9252, + "text_loss": 0.392578125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2595113864073743e-06, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 9253, + "text_loss": 0.447265625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2586451887075706e-06, + "loss": 0.3972, + "regression_loss": 0.0, + "step": 9254, + "text_loss": 0.240234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2577792460740124e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 9255, + "text_loss": 0.58984375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2569135585657372e-06, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 9256, + "text_loss": 0.4453125 + }, + { + "epoch": 0.77, + "learning_rate": 1.256048126241763e-06, + "loss": 0.4669, + "regression_loss": 0.0, + "step": 9257, + "text_loss": 0.59765625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2551829491610916e-06, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 9258, + "text_loss": 0.578125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2543180273827044e-06, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 9259, + "text_loss": 0.302734375 + }, + { + "epoch": 0.77, + "learning_rate": 1.253453360965567e-06, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 9260, + "text_loss": 0.7109375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2525889499686312e-06, + "loss": 0.3911, + "regression_loss": 0.0, + "step": 9261, + "text_loss": 0.28515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2517247944508248e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 9262, + "text_loss": 0.3515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2508608944710625e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 9263, + "text_loss": 0.39453125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2499972500882412e-06, + "loss": 0.4491, + "regression_loss": 0.0, + "step": 9264, + "text_loss": 0.275390625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2491338613612402e-06, + "loss": 0.46, + "regression_loss": 0.0, + "step": 9265, + "text_loss": 0.490234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.248270728348917e-06, + "loss": 0.4447, + "regression_loss": 0.0, + "step": 9266, + "text_loss": 0.35546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2474078511101212e-06, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 9267, + "text_loss": 0.5703125 + }, + { + "epoch": 0.77, + "learning_rate": 1.246545229703675e-06, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 9268, + "text_loss": 0.58984375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2456828641883895e-06, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 9269, + "text_loss": 0.65234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.244820754623055e-06, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 9270, + "text_loss": 0.35546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2439589010664488e-06, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 9271, + "text_loss": 0.53125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2430973035773218e-06, + "loss": 0.4043, + "regression_loss": 0.0, + "step": 9272, + "text_loss": 0.25390625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2422359622144198e-06, + "loss": 0.443, + "regression_loss": 0.0, + "step": 9273, + "text_loss": 0.46875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2413748770364602e-06, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 9274, + "text_loss": 0.490234375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2405140481021487e-06, + "loss": 0.47, + "regression_loss": 0.0, + "step": 9275, + "text_loss": 0.4296875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2396534754701722e-06, + "loss": 0.5, + "regression_loss": 0.0, + "step": 9276, + "text_loss": 0.44921875 + }, + { + "epoch": 0.77, + "learning_rate": 1.238793159199202e-06, + "loss": 0.4464, + "regression_loss": 0.0, + "step": 9277, + "text_loss": 0.6015625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2379330993478844e-06, + "loss": 0.443, + "regression_loss": 0.0, + "step": 9278, + "text_loss": 0.54296875 + }, + { + "epoch": 0.77, + "learning_rate": 1.237073295974861e-06, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 9279, + "text_loss": 0.546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2362137491387433e-06, + "loss": 0.3932, + "regression_loss": 0.0, + "step": 9280, + "text_loss": 0.341796875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2353544588981332e-06, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 9281, + "text_loss": 0.42578125 + }, + { + "epoch": 0.77, + "learning_rate": 1.234495425311612e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 9282, + "text_loss": 0.357421875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2336366484377465e-06, + "loss": 0.4297, + "regression_loss": 0.0, + "step": 9283, + "text_loss": 0.287109375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2327781283350792e-06, + "loss": 0.4344, + "regression_loss": 0.0, + "step": 9284, + "text_loss": 0.2177734375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2319198650621428e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 9285, + "text_loss": 0.353515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2310618586774481e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 9286, + "text_loss": 0.6796875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2302041092394895e-06, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 9287, + "text_loss": 0.4296875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2293466168067448e-06, + "loss": 0.49, + "regression_loss": 0.0, + "step": 9288, + "text_loss": 0.64453125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2284893814376737e-06, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 9289, + "text_loss": 0.59765625 + }, + { + "epoch": 0.77, + "learning_rate": 1.227632403190716e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 9290, + "text_loss": 0.546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2267756821242976e-06, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 9291, + "text_loss": 0.6875 + }, + { + "epoch": 0.77, + "learning_rate": 1.225919218296825e-06, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 9292, + "text_loss": 0.57421875 + }, + { + "epoch": 0.77, + "learning_rate": 1.225063011766689e-06, + "loss": 0.4437, + "regression_loss": 0.0, + "step": 9293, + "text_loss": 0.53125 + }, + { + "epoch": 0.77, + "learning_rate": 1.224207062592257e-06, + "loss": 0.4349, + "regression_loss": 0.0, + "step": 9294, + "text_loss": 0.484375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2233513708318884e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 9295, + "text_loss": 0.31640625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2224959365439165e-06, + "loss": 0.474, + "regression_loss": 0.0, + "step": 9296, + "text_loss": 0.53515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2216407597866613e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 9297, + "text_loss": 0.60546875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2207858406184242e-06, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 9298, + "text_loss": 0.578125 + }, + { + "epoch": 0.77, + "learning_rate": 1.219931179097491e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 9299, + "text_loss": 0.27734375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2190767752821236e-06, + "loss": 0.6196, + "regression_loss": 0.0, + "step": 9300, + "text_loss": 0.466796875 + }, + { + "epoch": 0.77, + "learning_rate": 1.218222629230577e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 9301, + "text_loss": 0.48046875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2173687410010776e-06, + "loss": 0.4569, + "regression_loss": 0.0, + "step": 9302, + "text_loss": 0.4921875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2165151106518402e-06, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 9303, + "text_loss": 0.53515625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2156617382410617e-06, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 9304, + "text_loss": 0.369140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2148086238269219e-06, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 9305, + "text_loss": 0.376953125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2139557674675773e-06, + "loss": 0.4321, + "regression_loss": 0.0, + "step": 9306, + "text_loss": 0.3828125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2131031692211754e-06, + "loss": 0.468, + "regression_loss": 0.0, + "step": 9307, + "text_loss": 0.62109375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2122508291458418e-06, + "loss": 0.5494, + "regression_loss": 0.0, + "step": 9308, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.77, + "learning_rate": 1.2113987472996823e-06, + "loss": 0.5405, + "regression_loss": 0.0, + "step": 9309, + "text_loss": 0.68359375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2105469237407885e-06, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 9310, + "text_loss": 0.734375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2096953585272337e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 9311, + "text_loss": 0.74609375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2088440517170729e-06, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 9312, + "text_loss": 0.388671875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2079930033683435e-06, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 9313, + "text_loss": 0.447265625 + }, + { + "epoch": 0.77, + "learning_rate": 1.207142213539068e-06, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 9314, + "text_loss": 0.5625 + }, + { + "epoch": 0.77, + "learning_rate": 1.206291682287245e-06, + "loss": 0.4653, + "regression_loss": 0.0, + "step": 9315, + "text_loss": 0.376953125 + }, + { + "epoch": 0.77, + "learning_rate": 1.205441409670861e-06, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 9316, + "text_loss": 0.6875 + }, + { + "epoch": 0.77, + "learning_rate": 1.2045913957478838e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 9317, + "text_loss": 0.359375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2037416405762637e-06, + "loss": 0.5052, + "regression_loss": 0.0, + "step": 9318, + "text_loss": 0.5859375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2028921442139286e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 9319, + "text_loss": 0.62109375 + }, + { + "epoch": 0.77, + "learning_rate": 1.2020429067187984e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 9320, + "text_loss": 0.390625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2011939281487655e-06, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 9321, + "text_loss": 0.625 + }, + { + "epoch": 0.77, + "learning_rate": 1.2003452085617102e-06, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 9322, + "text_loss": 0.69140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.199496748015494e-06, + "loss": 0.4352, + "regression_loss": 0.0, + "step": 9323, + "text_loss": 0.369140625 + }, + { + "epoch": 0.77, + "learning_rate": 1.198648546567962e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 9324, + "text_loss": 0.57421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1978006042769358e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 9325, + "text_loss": 0.39453125 + }, + { + "epoch": 0.78, + "learning_rate": 1.196952921200229e-06, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 9326, + "text_loss": 0.734375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1961054973956288e-06, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 9327, + "text_loss": 0.41015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1952583329209095e-06, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 9328, + "text_loss": 0.203125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1944114278338254e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 9329, + "text_loss": 0.51171875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1935647821921164e-06, + "loss": 0.6516, + "regression_loss": 0.0, + "step": 9330, + "text_loss": 0.51171875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1927183960534982e-06, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 9331, + "text_loss": 0.265625 + }, + { + "epoch": 0.78, + "learning_rate": 1.191872269475679e-06, + "loss": 0.4287, + "regression_loss": 0.0, + "step": 9332, + "text_loss": 0.34375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1910264025163381e-06, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 9333, + "text_loss": 0.435546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1901807952331445e-06, + "loss": 0.4749, + "regression_loss": 0.0, + "step": 9334, + "text_loss": 0.53515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1893354476837465e-06, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 9335, + "text_loss": 0.30078125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1884903599257786e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9336, + "text_loss": 0.326171875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1876455320168495e-06, + "loss": 0.5457, + "regression_loss": 0.0, + "step": 9337, + "text_loss": 0.5703125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1868009640145583e-06, + "loss": 0.4241, + "regression_loss": 0.0, + "step": 9338, + "text_loss": 0.48828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1859566559764828e-06, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 9339, + "text_loss": 0.498046875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1851126079601838e-06, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 9340, + "text_loss": 0.6640625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1842688200232034e-06, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 9341, + "text_loss": 0.53125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1834252922230687e-06, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 9342, + "text_loss": 0.49609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1825820246172843e-06, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 9343, + "text_loss": 0.6328125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1817390172633402e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 9344, + "text_loss": 0.373046875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1808962702187099e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 9345, + "text_loss": 0.734375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1800537835408482e-06, + "loss": 0.516, + "regression_loss": 0.0, + "step": 9346, + "text_loss": 0.5234375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1792115572871871e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 9347, + "text_loss": 0.310546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.178369591515151e-06, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 9348, + "text_loss": 0.6953125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1775278862821366e-06, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 9349, + "text_loss": 0.52734375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1766864416455282e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 9350, + "text_loss": 0.55078125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1758452576626917e-06, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 9351, + "text_loss": 0.443359375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1750043343909756e-06, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 9352, + "text_loss": 0.5390625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1741636718877053e-06, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 9353, + "text_loss": 0.396484375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1733232702101976e-06, + "loss": 0.551, + "regression_loss": 0.0, + "step": 9354, + "text_loss": 0.5546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1724831294157463e-06, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 9355, + "text_loss": 0.515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1716432495616241e-06, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 9356, + "text_loss": 0.6328125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1708036307050951e-06, + "loss": 0.564, + "regression_loss": 0.0, + "step": 9357, + "text_loss": 0.76171875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1699642729033956e-06, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 9358, + "text_loss": 0.796875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1691251762137507e-06, + "loss": 0.3936, + "regression_loss": 0.0, + "step": 9359, + "text_loss": 0.353515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1682863406933654e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 9360, + "text_loss": 0.55859375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1674477663994282e-06, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 9361, + "text_loss": 0.63671875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1666094533891065e-06, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 9362, + "text_loss": 0.404296875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1657714017195533e-06, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 9363, + "text_loss": 0.400390625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1649336114479027e-06, + "loss": 0.3992, + "regression_loss": 0.0, + "step": 9364, + "text_loss": 0.53125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1640960826312708e-06, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 9365, + "text_loss": 0.5390625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1632588153267565e-06, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 9366, + "text_loss": 0.5 + }, + { + "epoch": 0.78, + "learning_rate": 1.1624218095914409e-06, + "loss": 0.4275, + "regression_loss": 0.0, + "step": 9367, + "text_loss": 0.263671875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1615850654823846e-06, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 9368, + "text_loss": 0.48828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1607485830566334e-06, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 9369, + "text_loss": 0.2890625 + }, + { + "epoch": 0.78, + "learning_rate": 1.159912362371215e-06, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 9370, + "text_loss": 0.49609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1590764034831399e-06, + "loss": 0.4313, + "regression_loss": 0.0, + "step": 9371, + "text_loss": 0.3984375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1582407064493938e-06, + "loss": 0.4696, + "regression_loss": 0.0, + "step": 9372, + "text_loss": 0.4609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.157405271326958e-06, + "loss": 0.4202, + "regression_loss": 0.0, + "step": 9373, + "text_loss": 0.37890625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1565700981727829e-06, + "loss": 0.4916, + "regression_loss": 0.0, + "step": 9374, + "text_loss": 0.68359375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1557351870438078e-06, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 9375, + "text_loss": 0.423828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.154900537996952e-06, + "loss": 0.561, + "regression_loss": 0.0, + "step": 9376, + "text_loss": 0.55078125 + }, + { + "epoch": 0.78, + "learning_rate": 1.15406615108912e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9377, + "text_loss": 0.337890625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1532320263771911e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 9378, + "text_loss": 0.271484375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1523981639180376e-06, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 9379, + "text_loss": 0.470703125 + }, + { + "epoch": 0.78, + "learning_rate": 1.151564563768503e-06, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 9380, + "text_loss": 0.74609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1507312259854203e-06, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 9381, + "text_loss": 0.62109375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1498981506256019e-06, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 9382, + "text_loss": 0.376953125 + }, + { + "epoch": 0.78, + "learning_rate": 1.149065337745844e-06, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 9383, + "text_loss": 0.32421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.148232787402918e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 9384, + "text_loss": 0.41015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1474004996535903e-06, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 9385, + "text_loss": 0.318359375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1465684745545969e-06, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 9386, + "text_loss": 0.58984375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1457367121626622e-06, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 9387, + "text_loss": 0.337890625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1449052125344923e-06, + "loss": 0.3995, + "regression_loss": 0.0, + "step": 9388, + "text_loss": 0.4609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.144073975726775e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 9389, + "text_loss": 0.65234375 + }, + { + "epoch": 0.78, + "learning_rate": 1.143243001796176e-06, + "loss": 0.3918, + "regression_loss": 0.0, + "step": 9390, + "text_loss": 0.361328125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1424122907993524e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 9391, + "text_loss": 0.349609375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1415818427929332e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 9392, + "text_loss": 0.384765625 + }, + { + "epoch": 0.78, + "learning_rate": 1.140751657833536e-06, + "loss": 0.543, + "regression_loss": 0.0, + "step": 9393, + "text_loss": 0.3828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1399217359777581e-06, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 9394, + "text_loss": 0.255859375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1390920772821812e-06, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 9395, + "text_loss": 0.55859375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1382626818033644e-06, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 9396, + "text_loss": 0.369140625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1374335495978517e-06, + "loss": 0.499, + "regression_loss": 0.0, + "step": 9397, + "text_loss": 0.357421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1366046807221709e-06, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 9398, + "text_loss": 0.255859375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1357760752328283e-06, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 9399, + "text_loss": 0.48828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.134947733186315e-06, + "loss": 0.4701, + "regression_loss": 0.0, + "step": 9400, + "text_loss": 0.59765625 + }, + { + "epoch": 0.78, + "learning_rate": 1.134119654639103e-06, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 9401, + "text_loss": 0.5546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1332918396476477e-06, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 9402, + "text_loss": 0.5859375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1324642882683807e-06, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 9403, + "text_loss": 0.482421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.131637000557726e-06, + "loss": 0.4821, + "regression_loss": 0.0, + "step": 9404, + "text_loss": 0.64453125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1308099765720797e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 9405, + "text_loss": 0.6796875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1299832163678258e-06, + "loss": 0.4982, + "regression_loss": 0.0, + "step": 9406, + "text_loss": 0.48828125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1291567200013276e-06, + "loss": 0.3918, + "regression_loss": 0.0, + "step": 9407, + "text_loss": 0.32421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1283304875289335e-06, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 9408, + "text_loss": 0.421875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1275045190069677e-06, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 9409, + "text_loss": 0.322265625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1266788144917451e-06, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 9410, + "text_loss": 0.60546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.125853374039555e-06, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 9411, + "text_loss": 0.279296875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1250281977066719e-06, + "loss": 0.4391, + "regression_loss": 0.0, + "step": 9412, + "text_loss": 0.2041015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1242032855493529e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 9413, + "text_loss": 0.7109375 + }, + { + "epoch": 0.78, + "learning_rate": 1.123378637623837e-06, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 9414, + "text_loss": 0.5390625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1225542539863422e-06, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 9415, + "text_loss": 0.37890625 + }, + { + "epoch": 0.78, + "learning_rate": 1.121730134693072e-06, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 9416, + "text_loss": 0.53515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1209062798002108e-06, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 9417, + "text_loss": 0.578125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1200826893639238e-06, + "loss": 0.519, + "regression_loss": 0.0, + "step": 9418, + "text_loss": 0.322265625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1192593634403603e-06, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 9419, + "text_loss": 0.259765625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1184363020856508e-06, + "loss": 0.5057, + "regression_loss": 0.0, + "step": 9420, + "text_loss": 0.376953125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1176135053559055e-06, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 9421, + "text_loss": 0.419921875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1167909733072197e-06, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 9422, + "text_loss": 0.392578125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1159687059956687e-06, + "loss": 0.6042, + "regression_loss": 0.0, + "step": 9423, + "text_loss": 0.6015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1151467034773111e-06, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 9424, + "text_loss": 0.29296875 + }, + { + "epoch": 0.78, + "learning_rate": 1.1143249658081862e-06, + "loss": 0.434, + "regression_loss": 0.0, + "step": 9425, + "text_loss": 0.5390625 + }, + { + "epoch": 0.78, + "learning_rate": 1.113503493044318e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 9426, + "text_loss": 0.578125 + }, + { + "epoch": 0.78, + "learning_rate": 1.112682285241707e-06, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 9427, + "text_loss": 0.66015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1118613424563401e-06, + "loss": 0.4419, + "regression_loss": 0.0, + "step": 9428, + "text_loss": 0.41015625 + }, + { + "epoch": 0.78, + "learning_rate": 1.111040664744185e-06, + "loss": 0.4354, + "regression_loss": 0.0, + "step": 9429, + "text_loss": 0.47265625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1102202521611933e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 9430, + "text_loss": 0.55078125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1094001047632918e-06, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 9431, + "text_loss": 0.51953125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1085802226063997e-06, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 9432, + "text_loss": 0.56640625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1077606057464075e-06, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 9433, + "text_loss": 0.359375 + }, + { + "epoch": 0.78, + "learning_rate": 1.106941254239195e-06, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 9434, + "text_loss": 0.52734375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1061221681406198e-06, + "loss": 0.446, + "regression_loss": 0.0, + "step": 9435, + "text_loss": 0.58984375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1053033475065261e-06, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 9436, + "text_loss": 0.34765625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1044847923927316e-06, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 9437, + "text_loss": 0.515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.1036665028550465e-06, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 9438, + "text_loss": 0.68359375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1028484789492543e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 9439, + "text_loss": 0.470703125 + }, + { + "epoch": 0.78, + "learning_rate": 1.1020307207311244e-06, + "loss": 0.5596, + "regression_loss": 0.0, + "step": 9440, + "text_loss": 0.52734375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1012132282564075e-06, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 9441, + "text_loss": 0.458984375 + }, + { + "epoch": 0.78, + "learning_rate": 1.1003960015808379e-06, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 9442, + "text_loss": 0.515625 + }, + { + "epoch": 0.78, + "learning_rate": 1.0995790407601248e-06, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 9443, + "text_loss": 0.35546875 + }, + { + "epoch": 0.78, + "learning_rate": 1.0987623458499703e-06, + "loss": 0.5955, + "regression_loss": 0.0, + "step": 9444, + "text_loss": 0.6953125 + }, + { + "epoch": 0.78, + "learning_rate": 1.097945916906048e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 9445, + "text_loss": 0.55859375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0971297539840203e-06, + "loss": 0.4379, + "regression_loss": 0.0, + "step": 9446, + "text_loss": 0.427734375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0963138571395277e-06, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 9447, + "text_loss": 0.30078125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0954982264281943e-06, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 9448, + "text_loss": 0.359375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0946828619056276e-06, + "loss": 0.521, + "regression_loss": 0.0, + "step": 9449, + "text_loss": 0.455078125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0938677636274097e-06, + "loss": 0.4358, + "regression_loss": 0.0, + "step": 9450, + "text_loss": 0.61328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0930529316491163e-06, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 9451, + "text_loss": 0.3828125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0922383660262931e-06, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 9452, + "text_loss": 0.267578125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0914240668144754e-06, + "loss": 0.4045, + "regression_loss": 0.0, + "step": 9453, + "text_loss": 0.408203125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0906100340691778e-06, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 9454, + "text_loss": 0.466796875 + }, + { + "epoch": 0.79, + "learning_rate": 1.089796267845898e-06, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 9455, + "text_loss": 0.53515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0889827682001097e-06, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 9456, + "text_loss": 0.58984375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0881695351872796e-06, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 9457, + "text_loss": 0.5 + }, + { + "epoch": 0.79, + "learning_rate": 1.087356568862845e-06, + "loss": 0.457, + "regression_loss": 0.0, + "step": 9458, + "text_loss": 0.67578125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0865438692822317e-06, + "loss": 0.3962, + "regression_loss": 0.0, + "step": 9459, + "text_loss": 0.3203125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0857314365008448e-06, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 9460, + "text_loss": 0.5234375 + }, + { + "epoch": 0.79, + "learning_rate": 1.084919270574073e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 9461, + "text_loss": 0.279296875 + }, + { + "epoch": 0.79, + "learning_rate": 1.084107371557282e-06, + "loss": 0.4219, + "regression_loss": 0.0, + "step": 9462, + "text_loss": 0.310546875 + }, + { + "epoch": 0.79, + "learning_rate": 1.083295739505828e-06, + "loss": 0.5321, + "regression_loss": 0.0, + "step": 9463, + "text_loss": 0.60546875 + }, + { + "epoch": 0.79, + "learning_rate": 1.08248437447504e-06, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 9464, + "text_loss": 0.7265625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0816732765202337e-06, + "loss": 0.443, + "regression_loss": 0.0, + "step": 9465, + "text_loss": 0.6328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.080862445696706e-06, + "loss": 0.542, + "regression_loss": 0.0, + "step": 9466, + "text_loss": 0.72265625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0800518820597366e-06, + "loss": 0.4368, + "regression_loss": 0.0, + "step": 9467, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.79, + "learning_rate": 1.079241585664581e-06, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 9468, + "text_loss": 0.52734375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0784315565664866e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 9469, + "text_loss": 0.76171875 + }, + { + "epoch": 0.79, + "learning_rate": 1.077621794820673e-06, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 9470, + "text_loss": 0.38671875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0768123004823467e-06, + "loss": 0.3749, + "regression_loss": 0.0, + "step": 9471, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0760030736066952e-06, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 9472, + "text_loss": 0.5 + }, + { + "epoch": 0.79, + "learning_rate": 1.0751941142488887e-06, + "loss": 0.495, + "regression_loss": 0.0, + "step": 9473, + "text_loss": 0.2294921875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0743854224640748e-06, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 9474, + "text_loss": 0.51953125 + }, + { + "epoch": 0.79, + "learning_rate": 1.073576998307388e-06, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 9475, + "text_loss": 0.447265625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0727688418339415e-06, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 9476, + "text_loss": 0.490234375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0719609530988322e-06, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 9477, + "text_loss": 0.306640625 + }, + { + "epoch": 0.79, + "learning_rate": 1.071153332157137e-06, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 9478, + "text_loss": 0.3984375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0703459790639175e-06, + "loss": 0.557, + "regression_loss": 0.0, + "step": 9479, + "text_loss": 0.2197265625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0695388938742113e-06, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 9480, + "text_loss": 0.37890625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0687320766430432e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 9481, + "text_loss": 0.72265625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0679255274254174e-06, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 9482, + "text_loss": 0.5390625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0671192462763225e-06, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 9483, + "text_loss": 0.37890625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0663132332507214e-06, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 9484, + "text_loss": 0.388671875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0655074884035704e-06, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 9485, + "text_loss": 0.53515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0647020117897961e-06, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 9486, + "text_loss": 0.60546875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0638968034643132e-06, + "loss": 0.498, + "regression_loss": 0.0, + "step": 9487, + "text_loss": 0.7421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.063091863482017e-06, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 9488, + "text_loss": 0.296875 + }, + { + "epoch": 0.79, + "learning_rate": 1.062287191897785e-06, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 9489, + "text_loss": 0.45703125 + }, + { + "epoch": 0.79, + "learning_rate": 1.061482788766472e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 9490, + "text_loss": 0.359375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0606786541429236e-06, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 9491, + "text_loss": 0.423828125 + }, + { + "epoch": 0.79, + "learning_rate": 1.059874788081957e-06, + "loss": 0.5276, + "regression_loss": 0.0, + "step": 9492, + "text_loss": 0.66015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0590711906383772e-06, + "loss": 0.5013, + "regression_loss": 0.0, + "step": 9493, + "text_loss": 0.33203125 + }, + { + "epoch": 0.79, + "learning_rate": 1.058267861866969e-06, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 9494, + "text_loss": 0.6328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0574648018224998e-06, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 9495, + "text_loss": 0.314453125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0566620105597175e-06, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9496, + "text_loss": 0.6796875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0558594881333528e-06, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 9497, + "text_loss": 0.462890625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0550572345981185e-06, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 9498, + "text_loss": 0.41015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0542552500087055e-06, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 9499, + "text_loss": 0.53515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0534535344197905e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 9500, + "text_loss": 0.41015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0526520878860302e-06, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 9501, + "text_loss": 0.73828125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0518509104620629e-06, + "loss": 0.5894, + "regression_loss": 0.0, + "step": 9502, + "text_loss": 0.62109375 + }, + { + "epoch": 0.79, + "learning_rate": 1.051050002202509e-06, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 9503, + "text_loss": 0.73046875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0502493631619715e-06, + "loss": 0.3894, + "regression_loss": 0.0, + "step": 9504, + "text_loss": 0.384765625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0494489933950314e-06, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 9505, + "text_loss": 0.57421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0486488929562544e-06, + "loss": 0.528, + "regression_loss": 0.0, + "step": 9506, + "text_loss": 0.73046875 + }, + { + "epoch": 0.79, + "learning_rate": 1.047849061900188e-06, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 9507, + "text_loss": 0.4609375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0470495002813618e-06, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 9508, + "text_loss": 0.283203125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0462502081542814e-06, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 9509, + "text_loss": 0.482421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0454511855734439e-06, + "loss": 0.502, + "regression_loss": 0.0, + "step": 9510, + "text_loss": 0.3671875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0446524325933184e-06, + "loss": 0.3571, + "regression_loss": 0.0, + "step": 9511, + "text_loss": 0.380859375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0438539492683614e-06, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 9512, + "text_loss": 0.6015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.043055735653009e-06, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 9513, + "text_loss": 0.75390625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0422577918016814e-06, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 9514, + "text_loss": 0.53125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0414601177687734e-06, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 9515, + "text_loss": 0.44140625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0406627136086717e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 9516, + "text_loss": 0.5859375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0398655793757357e-06, + "loss": 0.426, + "regression_loss": 0.0, + "step": 9517, + "text_loss": 0.5234375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0390687151243112e-06, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 9518, + "text_loss": 0.431640625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0382721209087238e-06, + "loss": 0.4352, + "regression_loss": 0.0, + "step": 9519, + "text_loss": 0.640625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0374757967832826e-06, + "loss": 0.427, + "regression_loss": 0.0, + "step": 9520, + "text_loss": 0.486328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.036679742802273e-06, + "loss": 0.5818, + "regression_loss": 0.0, + "step": 9521, + "text_loss": 0.365234375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0358839590199716e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 9522, + "text_loss": 0.4609375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0350884454906262e-06, + "loss": 0.5605, + "regression_loss": 0.0, + "step": 9523, + "text_loss": 0.64453125 + }, + { + "epoch": 0.79, + "learning_rate": 1.034293202268472e-06, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 9524, + "text_loss": 0.46484375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0334982294077256e-06, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 9525, + "text_loss": 0.453125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0327035269625845e-06, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 9526, + "text_loss": 0.328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.031909094987224e-06, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 9527, + "text_loss": 0.474609375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0311149335358096e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 9528, + "text_loss": 0.384765625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0303210426624782e-06, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 9529, + "text_loss": 0.40234375 + }, + { + "epoch": 0.79, + "learning_rate": 1.029527422421356e-06, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 9530, + "text_loss": 0.259765625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0287340728665475e-06, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 9531, + "text_loss": 0.5 + }, + { + "epoch": 0.79, + "learning_rate": 1.0279409940521395e-06, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 9532, + "text_loss": 0.498046875 + }, + { + "epoch": 0.79, + "learning_rate": 1.027148186032198e-06, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 9533, + "text_loss": 0.494140625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0263556488607744e-06, + "loss": 0.501, + "regression_loss": 0.0, + "step": 9534, + "text_loss": 0.6015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0255633825918993e-06, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 9535, + "text_loss": 0.34765625 + }, + { + "epoch": 0.79, + "learning_rate": 1.024771387279585e-06, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 9536, + "text_loss": 0.55078125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0239796629778265e-06, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 9537, + "text_loss": 0.57421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0231882097406004e-06, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 9538, + "text_loss": 0.59765625 + }, + { + "epoch": 0.79, + "learning_rate": 1.022397027621861e-06, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 9539, + "text_loss": 0.48828125 + }, + { + "epoch": 0.79, + "learning_rate": 1.021606116675548e-06, + "loss": 0.4596, + "regression_loss": 0.0, + "step": 9540, + "text_loss": 0.2060546875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0208154769555828e-06, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 9541, + "text_loss": 0.6015625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0200251085158664e-06, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 9542, + "text_loss": 0.62109375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0192350114102818e-06, + "loss": 0.4109, + "regression_loss": 0.0, + "step": 9543, + "text_loss": 0.28515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0184451856926942e-06, + "loss": 0.583, + "regression_loss": 0.0, + "step": 9544, + "text_loss": 0.703125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0176556314169506e-06, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 9545, + "text_loss": 0.609375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0168663486368757e-06, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 9546, + "text_loss": 0.52734375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0160773374062833e-06, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9547, + "text_loss": 0.3515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0152885977789606e-06, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 9548, + "text_loss": 0.4375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0145001298086805e-06, + "loss": 0.512, + "regression_loss": 0.0, + "step": 9549, + "text_loss": 0.298828125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0137119335491968e-06, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 9550, + "text_loss": 0.51953125 + }, + { + "epoch": 0.79, + "learning_rate": 1.012924009054247e-06, + "loss": 0.52, + "regression_loss": 0.0, + "step": 9551, + "text_loss": 0.46484375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0121363563775433e-06, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 9552, + "text_loss": 0.53515625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0113489755727868e-06, + "loss": 0.4843, + "regression_loss": 0.0, + "step": 9553, + "text_loss": 0.53125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0105618666936557e-06, + "loss": 0.4199, + "regression_loss": 0.0, + "step": 9554, + "text_loss": 0.54296875 + }, + { + "epoch": 0.79, + "learning_rate": 1.009775029793812e-06, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 9555, + "text_loss": 0.57421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0089884649268976e-06, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 9556, + "text_loss": 0.396484375 + }, + { + "epoch": 0.79, + "learning_rate": 1.0082021721465386e-06, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 9557, + "text_loss": 0.458984375 + }, + { + "epoch": 0.79, + "learning_rate": 1.007416151506337e-06, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 9558, + "text_loss": 0.546875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0066304030598807e-06, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 9559, + "text_loss": 0.36328125 + }, + { + "epoch": 0.79, + "learning_rate": 1.0058449268607385e-06, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 9560, + "text_loss": 0.57421875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0050597229624608e-06, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 9561, + "text_loss": 0.5 + }, + { + "epoch": 0.79, + "learning_rate": 1.0042747914185757e-06, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 9562, + "text_loss": 0.5625 + }, + { + "epoch": 0.79, + "learning_rate": 1.0034901322826007e-06, + "loss": 0.459, + "regression_loss": 0.0, + "step": 9563, + "text_loss": 0.63671875 + }, + { + "epoch": 0.79, + "learning_rate": 1.0027057456080258e-06, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 9564, + "text_loss": 0.37890625 + }, + { + "epoch": 0.79, + "learning_rate": 1.001921631448327e-06, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 9565, + "text_loss": 0.44140625 + }, + { + "epoch": 0.8, + "learning_rate": 1.0011377898569625e-06, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 9566, + "text_loss": 0.79296875 + }, + { + "epoch": 0.8, + "learning_rate": 1.0003542208873711e-06, + "loss": 0.5947, + "regression_loss": 0.0, + "step": 9567, + "text_loss": 0.6484375 + }, + { + "epoch": 0.8, + "learning_rate": 9.995709245929691e-07, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 9568, + "text_loss": 0.45703125 + }, + { + "epoch": 0.8, + "learning_rate": 9.987879010271617e-07, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 9569, + "text_loss": 0.73828125 + }, + { + "epoch": 0.8, + "learning_rate": 9.980051502433285e-07, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 9570, + "text_loss": 0.453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.972226722948346e-07, + "loss": 0.3947, + "regression_loss": 0.0, + "step": 9571, + "text_loss": 0.56640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.964404672350252e-07, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 9572, + "text_loss": 0.3046875 + }, + { + "epoch": 0.8, + "learning_rate": 9.956585351172282e-07, + "loss": 0.5564, + "regression_loss": 0.0, + "step": 9573, + "text_loss": 0.73828125 + }, + { + "epoch": 0.8, + "learning_rate": 9.948768759947475e-07, + "loss": 0.5664, + "regression_loss": 0.0, + "step": 9574, + "text_loss": 0.38671875 + }, + { + "epoch": 0.8, + "learning_rate": 9.940954899208783e-07, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 9575, + "text_loss": 0.35546875 + }, + { + "epoch": 0.8, + "learning_rate": 9.933143769488873e-07, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 9576, + "text_loss": 0.37109375 + }, + { + "epoch": 0.8, + "learning_rate": 9.925335371320283e-07, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 9577, + "text_loss": 0.3515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.917529705235346e-07, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 9578, + "text_loss": 0.53515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.909726771766231e-07, + "loss": 0.5649, + "regression_loss": 0.0, + "step": 9579, + "text_loss": 0.373046875 + }, + { + "epoch": 0.8, + "learning_rate": 9.901926571444854e-07, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 9580, + "text_loss": 0.294921875 + }, + { + "epoch": 0.8, + "learning_rate": 9.894129104803046e-07, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 9581, + "text_loss": 0.44140625 + }, + { + "epoch": 0.8, + "learning_rate": 9.886334372372364e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 9582, + "text_loss": 0.625 + }, + { + "epoch": 0.8, + "learning_rate": 9.878542374684224e-07, + "loss": 0.6145, + "regression_loss": 0.0, + "step": 9583, + "text_loss": 0.62890625 + }, + { + "epoch": 0.8, + "learning_rate": 9.870753112269842e-07, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 9584, + "text_loss": 0.49609375 + }, + { + "epoch": 0.8, + "learning_rate": 9.862966585660267e-07, + "loss": 0.3362, + "regression_loss": 0.0, + "step": 9585, + "text_loss": 0.2412109375 + }, + { + "epoch": 0.8, + "learning_rate": 9.855182795386315e-07, + "loss": 0.486, + "regression_loss": 0.0, + "step": 9586, + "text_loss": 0.44921875 + }, + { + "epoch": 0.8, + "learning_rate": 9.847401741978651e-07, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 9587, + "text_loss": 0.32421875 + }, + { + "epoch": 0.8, + "learning_rate": 9.83962342596776e-07, + "loss": 0.4601, + "regression_loss": 0.0, + "step": 9588, + "text_loss": 0.2255859375 + }, + { + "epoch": 0.8, + "learning_rate": 9.831847847883918e-07, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 9589, + "text_loss": 0.74609375 + }, + { + "epoch": 0.8, + "learning_rate": 9.824075008257229e-07, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 9590, + "text_loss": 0.328125 + }, + { + "epoch": 0.8, + "learning_rate": 9.816304907617601e-07, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 9591, + "text_loss": 0.39453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.80853754649478e-07, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 9592, + "text_loss": 0.205078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.800772925418255e-07, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 9593, + "text_loss": 0.23046875 + }, + { + "epoch": 0.8, + "learning_rate": 9.793011044917434e-07, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 9594, + "text_loss": 0.2265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.785251905521447e-07, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 9595, + "text_loss": 0.6328125 + }, + { + "epoch": 0.8, + "learning_rate": 9.777495507759276e-07, + "loss": 0.5386, + "regression_loss": 0.0, + "step": 9596, + "text_loss": 0.421875 + }, + { + "epoch": 0.8, + "learning_rate": 9.769741852159726e-07, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 9597, + "text_loss": 0.376953125 + }, + { + "epoch": 0.8, + "learning_rate": 9.761990939251402e-07, + "loss": 0.4321, + "regression_loss": 0.0, + "step": 9598, + "text_loss": 0.435546875 + }, + { + "epoch": 0.8, + "learning_rate": 9.754242769562682e-07, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 9599, + "text_loss": 0.55859375 + }, + { + "epoch": 0.8, + "learning_rate": 9.746497343621857e-07, + "loss": 0.5378, + "regression_loss": 0.0, + "step": 9600, + "text_loss": 0.671875 + }, + { + "epoch": 0.8, + "learning_rate": 9.738754661956928e-07, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 9601, + "text_loss": 0.56640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.731014725095762e-07, + "loss": 0.502, + "regression_loss": 0.0, + "step": 9602, + "text_loss": 0.7578125 + }, + { + "epoch": 0.8, + "learning_rate": 9.723277533566023e-07, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 9603, + "text_loss": 0.443359375 + }, + { + "epoch": 0.8, + "learning_rate": 9.715543087895217e-07, + "loss": 0.4509, + "regression_loss": 0.0, + "step": 9604, + "text_loss": 0.37890625 + }, + { + "epoch": 0.8, + "learning_rate": 9.707811388610595e-07, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 9605, + "text_loss": 0.396484375 + }, + { + "epoch": 0.8, + "learning_rate": 9.70008243623931e-07, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 9606, + "text_loss": 0.451171875 + }, + { + "epoch": 0.8, + "learning_rate": 9.692356231308248e-07, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 9607, + "text_loss": 0.431640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.68463277434416e-07, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 9608, + "text_loss": 0.81640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.676912065873584e-07, + "loss": 0.5654, + "regression_loss": 0.0, + "step": 9609, + "text_loss": 0.5703125 + }, + { + "epoch": 0.8, + "learning_rate": 9.66919410642289e-07, + "loss": 0.5735, + "regression_loss": 0.0, + "step": 9610, + "text_loss": 0.578125 + }, + { + "epoch": 0.8, + "learning_rate": 9.66147889651823e-07, + "loss": 0.4509, + "regression_loss": 0.0, + "step": 9611, + "text_loss": 0.37109375 + }, + { + "epoch": 0.8, + "learning_rate": 9.653766436685597e-07, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 9612, + "text_loss": 0.6015625 + }, + { + "epoch": 0.8, + "learning_rate": 9.64605672745078e-07, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 9613, + "text_loss": 0.53515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.6383497693394e-07, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 9614, + "text_loss": 0.423828125 + }, + { + "epoch": 0.8, + "learning_rate": 9.630645562876866e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 9615, + "text_loss": 0.55078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.622944108588428e-07, + "loss": 0.4557, + "regression_loss": 0.0, + "step": 9616, + "text_loss": 0.3671875 + }, + { + "epoch": 0.8, + "learning_rate": 9.615245406999108e-07, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 9617, + "text_loss": 0.451171875 + }, + { + "epoch": 0.8, + "learning_rate": 9.607549458633774e-07, + "loss": 0.5217, + "regression_loss": 0.0, + "step": 9618, + "text_loss": 0.330078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.5998562640171e-07, + "loss": 0.6711, + "regression_loss": 0.0, + "step": 9619, + "text_loss": 0.5625 + }, + { + "epoch": 0.8, + "learning_rate": 9.592165823673576e-07, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 9620, + "text_loss": 0.55078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.584478138127456e-07, + "loss": 0.4351, + "regression_loss": 0.0, + "step": 9621, + "text_loss": 0.41015625 + }, + { + "epoch": 0.8, + "learning_rate": 9.576793207902907e-07, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 9622, + "text_loss": 0.51953125 + }, + { + "epoch": 0.8, + "learning_rate": 9.569111033523804e-07, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 9623, + "text_loss": 0.455078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.561431615513888e-07, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 9624, + "text_loss": 0.69140625 + }, + { + "epoch": 0.8, + "learning_rate": 9.553754954396704e-07, + "loss": 0.408, + "regression_loss": 0.0, + "step": 9625, + "text_loss": 0.33984375 + }, + { + "epoch": 0.8, + "learning_rate": 9.546081050695622e-07, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 9626, + "text_loss": 0.37890625 + }, + { + "epoch": 0.8, + "learning_rate": 9.538409904933771e-07, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 9627, + "text_loss": 0.56640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.530741517634178e-07, + "loss": 0.4167, + "regression_loss": 0.0, + "step": 9628, + "text_loss": 0.244140625 + }, + { + "epoch": 0.8, + "learning_rate": 9.523075889319599e-07, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 9629, + "text_loss": 0.3125 + }, + { + "epoch": 0.8, + "learning_rate": 9.515413020512648e-07, + "loss": 0.4119, + "regression_loss": 0.0, + "step": 9630, + "text_loss": 0.546875 + }, + { + "epoch": 0.8, + "learning_rate": 9.50775291173574e-07, + "loss": 0.6033, + "regression_loss": 0.0, + "step": 9631, + "text_loss": 0.384765625 + }, + { + "epoch": 0.8, + "learning_rate": 9.500095563511119e-07, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 9632, + "text_loss": 0.69921875 + }, + { + "epoch": 0.8, + "learning_rate": 9.492440976360773e-07, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 9633, + "text_loss": 0.2890625 + }, + { + "epoch": 0.8, + "learning_rate": 9.484789150806617e-07, + "loss": 0.4204, + "regression_loss": 0.0, + "step": 9634, + "text_loss": 0.50390625 + }, + { + "epoch": 0.8, + "learning_rate": 9.477140087370268e-07, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 9635, + "text_loss": 0.59375 + }, + { + "epoch": 0.8, + "learning_rate": 9.469493786573208e-07, + "loss": 0.5881, + "regression_loss": 0.0, + "step": 9636, + "text_loss": 0.5 + }, + { + "epoch": 0.8, + "learning_rate": 9.461850248936733e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9637, + "text_loss": 0.197265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.454209474981935e-07, + "loss": 0.4945, + "regression_loss": 0.0, + "step": 9638, + "text_loss": 0.7265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.446571465229715e-07, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 9639, + "text_loss": 0.6640625 + }, + { + "epoch": 0.8, + "learning_rate": 9.438936220200806e-07, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 9640, + "text_loss": 0.326171875 + }, + { + "epoch": 0.8, + "learning_rate": 9.431303740415743e-07, + "loss": 0.491, + "regression_loss": 0.0, + "step": 9641, + "text_loss": 0.453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.423674026394847e-07, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 9642, + "text_loss": 0.236328125 + }, + { + "epoch": 0.8, + "learning_rate": 9.416047078658285e-07, + "loss": 0.5039, + "regression_loss": 0.0, + "step": 9643, + "text_loss": 0.48046875 + }, + { + "epoch": 0.8, + "learning_rate": 9.408422897726027e-07, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 9644, + "text_loss": 0.5234375 + }, + { + "epoch": 0.8, + "learning_rate": 9.400801484117855e-07, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 9645, + "text_loss": 0.60546875 + }, + { + "epoch": 0.8, + "learning_rate": 9.393182838353321e-07, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9646, + "text_loss": 0.71484375 + }, + { + "epoch": 0.8, + "learning_rate": 9.385566960951881e-07, + "loss": 0.4781, + "regression_loss": 0.0, + "step": 9647, + "text_loss": 0.40234375 + }, + { + "epoch": 0.8, + "learning_rate": 9.377953852432709e-07, + "loss": 0.46, + "regression_loss": 0.0, + "step": 9648, + "text_loss": 0.5390625 + }, + { + "epoch": 0.8, + "learning_rate": 9.370343513314828e-07, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 9649, + "text_loss": 0.5546875 + }, + { + "epoch": 0.8, + "learning_rate": 9.362735944117085e-07, + "loss": 0.385, + "regression_loss": 0.0, + "step": 9650, + "text_loss": 0.470703125 + }, + { + "epoch": 0.8, + "learning_rate": 9.355131145358131e-07, + "loss": 0.4266, + "regression_loss": 0.0, + "step": 9651, + "text_loss": 0.49609375 + }, + { + "epoch": 0.8, + "learning_rate": 9.347529117556386e-07, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 9652, + "text_loss": 0.51171875 + }, + { + "epoch": 0.8, + "learning_rate": 9.339929861230168e-07, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 9653, + "text_loss": 0.55078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.332333376897518e-07, + "loss": 0.4425, + "regression_loss": 0.0, + "step": 9654, + "text_loss": 0.515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.324739665076332e-07, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 9655, + "text_loss": 0.53125 + }, + { + "epoch": 0.8, + "learning_rate": 9.317148726284314e-07, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 9656, + "text_loss": 0.5703125 + }, + { + "epoch": 0.8, + "learning_rate": 9.309560561038982e-07, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 9657, + "text_loss": 0.376953125 + }, + { + "epoch": 0.8, + "learning_rate": 9.301975169857636e-07, + "loss": 0.4579, + "regression_loss": 0.0, + "step": 9658, + "text_loss": 0.5 + }, + { + "epoch": 0.8, + "learning_rate": 9.294392553257442e-07, + "loss": 0.4487, + "regression_loss": 0.0, + "step": 9659, + "text_loss": 0.373046875 + }, + { + "epoch": 0.8, + "learning_rate": 9.286812711755311e-07, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 9660, + "text_loss": 0.400390625 + }, + { + "epoch": 0.8, + "learning_rate": 9.279235645868017e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 9661, + "text_loss": 0.59375 + }, + { + "epoch": 0.8, + "learning_rate": 9.271661356112116e-07, + "loss": 0.457, + "regression_loss": 0.0, + "step": 9662, + "text_loss": 0.47265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.264089843004004e-07, + "loss": 0.4454, + "regression_loss": 0.0, + "step": 9663, + "text_loss": 0.5078125 + }, + { + "epoch": 0.8, + "learning_rate": 9.256521107059834e-07, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 9664, + "text_loss": 0.314453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.248955148795625e-07, + "loss": 0.417, + "regression_loss": 0.0, + "step": 9665, + "text_loss": 0.345703125 + }, + { + "epoch": 0.8, + "learning_rate": 9.241391968727176e-07, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 9666, + "text_loss": 0.53515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.233831567370116e-07, + "loss": 0.4783, + "regression_loss": 0.0, + "step": 9667, + "text_loss": 0.2265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.22627394523986e-07, + "loss": 0.4747, + "regression_loss": 0.0, + "step": 9668, + "text_loss": 0.58203125 + }, + { + "epoch": 0.8, + "learning_rate": 9.218719102851675e-07, + "loss": 0.5938, + "regression_loss": 0.0, + "step": 9669, + "text_loss": 0.4921875 + }, + { + "epoch": 0.8, + "learning_rate": 9.211167040720581e-07, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 9670, + "text_loss": 0.72265625 + }, + { + "epoch": 0.8, + "learning_rate": 9.203617759361444e-07, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 9671, + "text_loss": 0.66796875 + }, + { + "epoch": 0.8, + "learning_rate": 9.196071259288947e-07, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 9672, + "text_loss": 0.6484375 + }, + { + "epoch": 0.8, + "learning_rate": 9.188527541017561e-07, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 9673, + "text_loss": 0.7578125 + }, + { + "epoch": 0.8, + "learning_rate": 9.180986605061581e-07, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 9674, + "text_loss": 0.34375 + }, + { + "epoch": 0.8, + "learning_rate": 9.173448451935124e-07, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 9675, + "text_loss": 0.400390625 + }, + { + "epoch": 0.8, + "learning_rate": 9.165913082152078e-07, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 9676, + "text_loss": 0.25390625 + }, + { + "epoch": 0.8, + "learning_rate": 9.158380496226171e-07, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9677, + "text_loss": 0.53515625 + }, + { + "epoch": 0.8, + "learning_rate": 9.150850694670943e-07, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 9678, + "text_loss": 0.451171875 + }, + { + "epoch": 0.8, + "learning_rate": 9.143323677999749e-07, + "loss": 0.4879, + "regression_loss": 0.0, + "step": 9679, + "text_loss": 0.2041015625 + }, + { + "epoch": 0.8, + "learning_rate": 9.135799446725701e-07, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 9680, + "text_loss": 0.29296875 + }, + { + "epoch": 0.8, + "learning_rate": 9.128278001361812e-07, + "loss": 0.474, + "regression_loss": 0.0, + "step": 9681, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.120759342420821e-07, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 9682, + "text_loss": 0.65625 + }, + { + "epoch": 0.8, + "learning_rate": 9.113243470415312e-07, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 9683, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.8, + "learning_rate": 9.105730385857714e-07, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 9684, + "text_loss": 0.482421875 + }, + { + "epoch": 0.8, + "learning_rate": 9.09822008926019e-07, + "loss": 0.5359, + "regression_loss": 0.0, + "step": 9685, + "text_loss": 0.625 + }, + { + "epoch": 0.81, + "learning_rate": 9.090712581134769e-07, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 9686, + "text_loss": 0.625 + }, + { + "epoch": 0.81, + "learning_rate": 9.083207861993276e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 9687, + "text_loss": 0.326171875 + }, + { + "epoch": 0.81, + "learning_rate": 9.07570593234735e-07, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 9688, + "text_loss": 0.6171875 + }, + { + "epoch": 0.81, + "learning_rate": 9.068206792708417e-07, + "loss": 0.4403, + "regression_loss": 0.0, + "step": 9689, + "text_loss": 0.54296875 + }, + { + "epoch": 0.81, + "learning_rate": 9.060710443587739e-07, + "loss": 0.4323, + "regression_loss": 0.0, + "step": 9690, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.81, + "learning_rate": 9.053216885496374e-07, + "loss": 0.5848, + "regression_loss": 0.0, + "step": 9691, + "text_loss": 0.5 + }, + { + "epoch": 0.81, + "learning_rate": 9.045726118945197e-07, + "loss": 0.5695, + "regression_loss": 0.0, + "step": 9692, + "text_loss": 0.578125 + }, + { + "epoch": 0.81, + "learning_rate": 9.038238144444895e-07, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 9693, + "text_loss": 0.427734375 + }, + { + "epoch": 0.81, + "learning_rate": 9.030752962505962e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 9694, + "text_loss": 0.44140625 + }, + { + "epoch": 0.81, + "learning_rate": 9.023270573638681e-07, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 9695, + "text_loss": 0.322265625 + }, + { + "epoch": 0.81, + "learning_rate": 9.015790978353173e-07, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 9696, + "text_loss": 0.404296875 + }, + { + "epoch": 0.81, + "learning_rate": 9.00831417715936e-07, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 9697, + "text_loss": 0.451171875 + }, + { + "epoch": 0.81, + "learning_rate": 9.000840170566983e-07, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 9698, + "text_loss": 0.345703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.993368959085541e-07, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 9699, + "text_loss": 0.458984375 + }, + { + "epoch": 0.81, + "learning_rate": 8.985900543224435e-07, + "loss": 0.472, + "regression_loss": 0.0, + "step": 9700, + "text_loss": 0.2353515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.978434923492785e-07, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 9701, + "text_loss": 0.361328125 + }, + { + "epoch": 0.81, + "learning_rate": 8.970972100399572e-07, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 9702, + "text_loss": 0.318359375 + }, + { + "epoch": 0.81, + "learning_rate": 8.963512074453573e-07, + "loss": 0.335, + "regression_loss": 0.0, + "step": 9703, + "text_loss": 0.28515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.956054846163387e-07, + "loss": 0.4352, + "regression_loss": 0.0, + "step": 9704, + "text_loss": 0.1943359375 + }, + { + "epoch": 0.81, + "learning_rate": 8.948600416037367e-07, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 9705, + "text_loss": 0.220703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.941148784583775e-07, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 9706, + "text_loss": 0.255859375 + }, + { + "epoch": 0.81, + "learning_rate": 8.933699952310582e-07, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 9707, + "text_loss": 0.60546875 + }, + { + "epoch": 0.81, + "learning_rate": 8.926253919725625e-07, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 9708, + "text_loss": 0.3125 + }, + { + "epoch": 0.81, + "learning_rate": 8.918810687336532e-07, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 9709, + "text_loss": 0.37109375 + }, + { + "epoch": 0.81, + "learning_rate": 8.911370255650764e-07, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 9710, + "text_loss": 0.54296875 + }, + { + "epoch": 0.81, + "learning_rate": 8.903932625175532e-07, + "loss": 0.4379, + "regression_loss": 0.0, + "step": 9711, + "text_loss": 0.44921875 + }, + { + "epoch": 0.81, + "learning_rate": 8.896497796417941e-07, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 9712, + "text_loss": 0.46484375 + }, + { + "epoch": 0.81, + "learning_rate": 8.889065769884825e-07, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 9713, + "text_loss": 0.58203125 + }, + { + "epoch": 0.81, + "learning_rate": 8.881636546082867e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 9714, + "text_loss": 0.609375 + }, + { + "epoch": 0.81, + "learning_rate": 8.874210125518567e-07, + "loss": 0.4229, + "regression_loss": 0.0, + "step": 9715, + "text_loss": 0.353515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.866786508698216e-07, + "loss": 0.5918, + "regression_loss": 0.0, + "step": 9716, + "text_loss": 0.73046875 + }, + { + "epoch": 0.81, + "learning_rate": 8.859365696127897e-07, + "loss": 0.637, + "regression_loss": 0.0, + "step": 9717, + "text_loss": 0.79296875 + }, + { + "epoch": 0.81, + "learning_rate": 8.851947688313562e-07, + "loss": 0.3948, + "regression_loss": 0.0, + "step": 9718, + "text_loss": 0.486328125 + }, + { + "epoch": 0.81, + "learning_rate": 8.844532485760892e-07, + "loss": 0.4362, + "regression_loss": 0.0, + "step": 9719, + "text_loss": 0.7109375 + }, + { + "epoch": 0.81, + "learning_rate": 8.837120088975443e-07, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 9720, + "text_loss": 0.310546875 + }, + { + "epoch": 0.81, + "learning_rate": 8.829710498462541e-07, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 9721, + "text_loss": 0.400390625 + }, + { + "epoch": 0.81, + "learning_rate": 8.822303714727349e-07, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 9722, + "text_loss": 0.40625 + }, + { + "epoch": 0.81, + "learning_rate": 8.814899738274801e-07, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 9723, + "text_loss": 0.515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.807498569609679e-07, + "loss": 0.395, + "regression_loss": 0.0, + "step": 9724, + "text_loss": 0.27734375 + }, + { + "epoch": 0.81, + "learning_rate": 8.800100209236551e-07, + "loss": 0.4259, + "regression_loss": 0.0, + "step": 9725, + "text_loss": 0.296875 + }, + { + "epoch": 0.81, + "learning_rate": 8.792704657659795e-07, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 9726, + "text_loss": 0.2421875 + }, + { + "epoch": 0.81, + "learning_rate": 8.785311915383604e-07, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 9727, + "text_loss": 0.470703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.777921982911996e-07, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 9728, + "text_loss": 0.45703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.770534860748747e-07, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 9729, + "text_loss": 0.458984375 + }, + { + "epoch": 0.81, + "learning_rate": 8.76315054939747e-07, + "loss": 0.4241, + "regression_loss": 0.0, + "step": 9730, + "text_loss": 0.5390625 + }, + { + "epoch": 0.81, + "learning_rate": 8.75576904936164e-07, + "loss": 0.428, + "regression_loss": 0.0, + "step": 9731, + "text_loss": 0.44921875 + }, + { + "epoch": 0.81, + "learning_rate": 8.748390361144438e-07, + "loss": 0.4218, + "regression_loss": 0.0, + "step": 9732, + "text_loss": 0.609375 + }, + { + "epoch": 0.81, + "learning_rate": 8.741014485248922e-07, + "loss": 0.4946, + "regression_loss": 0.0, + "step": 9733, + "text_loss": 0.34375 + }, + { + "epoch": 0.81, + "learning_rate": 8.733641422177941e-07, + "loss": 0.4825, + "regression_loss": 0.0, + "step": 9734, + "text_loss": 0.71875 + }, + { + "epoch": 0.81, + "learning_rate": 8.726271172434164e-07, + "loss": 0.3909, + "regression_loss": 0.0, + "step": 9735, + "text_loss": 0.5 + }, + { + "epoch": 0.81, + "learning_rate": 8.718903736520023e-07, + "loss": 0.4337, + "regression_loss": 0.0, + "step": 9736, + "text_loss": 0.3359375 + }, + { + "epoch": 0.81, + "learning_rate": 8.711539114937844e-07, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 9737, + "text_loss": 0.423828125 + }, + { + "epoch": 0.81, + "learning_rate": 8.70417730818966e-07, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 9738, + "text_loss": 0.546875 + }, + { + "epoch": 0.81, + "learning_rate": 8.696818316777383e-07, + "loss": 0.522, + "regression_loss": 0.0, + "step": 9739, + "text_loss": 0.51171875 + }, + { + "epoch": 0.81, + "learning_rate": 8.689462141202709e-07, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 9740, + "text_loss": 0.49609375 + }, + { + "epoch": 0.81, + "learning_rate": 8.682108781967152e-07, + "loss": 0.4456, + "regression_loss": 0.0, + "step": 9741, + "text_loss": 0.494140625 + }, + { + "epoch": 0.81, + "learning_rate": 8.674758239571996e-07, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 9742, + "text_loss": 0.416015625 + }, + { + "epoch": 0.81, + "learning_rate": 8.66741051451841e-07, + "loss": 0.4862, + "regression_loss": 0.0, + "step": 9743, + "text_loss": 0.46875 + }, + { + "epoch": 0.81, + "learning_rate": 8.660065607307283e-07, + "loss": 0.4589, + "regression_loss": 0.0, + "step": 9744, + "text_loss": 0.578125 + }, + { + "epoch": 0.81, + "learning_rate": 8.652723518439366e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 9745, + "text_loss": 0.5078125 + }, + { + "epoch": 0.81, + "learning_rate": 8.645384248415212e-07, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 9746, + "text_loss": 0.240234375 + }, + { + "epoch": 0.81, + "learning_rate": 8.638047797735178e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 9747, + "text_loss": 0.56640625 + }, + { + "epoch": 0.81, + "learning_rate": 8.630714166899406e-07, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 9748, + "text_loss": 0.69921875 + }, + { + "epoch": 0.81, + "learning_rate": 8.623383356407871e-07, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 9749, + "text_loss": 0.6015625 + }, + { + "epoch": 0.81, + "learning_rate": 8.616055366760362e-07, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 9750, + "text_loss": 0.5234375 + }, + { + "epoch": 0.81, + "learning_rate": 8.608730198456449e-07, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 9751, + "text_loss": 0.3515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.601407851995536e-07, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 9752, + "text_loss": 0.5703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.594088327876827e-07, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 9753, + "text_loss": 0.56640625 + }, + { + "epoch": 0.81, + "learning_rate": 8.586771626599311e-07, + "loss": 0.5077, + "regression_loss": 0.0, + "step": 9754, + "text_loss": 0.5703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.579457748661813e-07, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 9755, + "text_loss": 0.373046875 + }, + { + "epoch": 0.81, + "learning_rate": 8.572146694562949e-07, + "loss": 0.5618, + "regression_loss": 0.0, + "step": 9756, + "text_loss": 0.59375 + }, + { + "epoch": 0.81, + "learning_rate": 8.564838464801167e-07, + "loss": 0.3521, + "regression_loss": 0.0, + "step": 9757, + "text_loss": 0.3125 + }, + { + "epoch": 0.81, + "learning_rate": 8.557533059874674e-07, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 9758, + "text_loss": 0.4453125 + }, + { + "epoch": 0.81, + "learning_rate": 8.550230480281552e-07, + "loss": 0.5742, + "regression_loss": 0.0, + "step": 9759, + "text_loss": 0.51953125 + }, + { + "epoch": 0.81, + "learning_rate": 8.542930726519622e-07, + "loss": 0.5656, + "regression_loss": 0.0, + "step": 9760, + "text_loss": 0.6171875 + }, + { + "epoch": 0.81, + "learning_rate": 8.535633799086557e-07, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 9761, + "text_loss": 0.5625 + }, + { + "epoch": 0.81, + "learning_rate": 8.52833969847982e-07, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 9762, + "text_loss": 0.4453125 + }, + { + "epoch": 0.81, + "learning_rate": 8.521048425196698e-07, + "loss": 0.4131, + "regression_loss": 0.0, + "step": 9763, + "text_loss": 0.7421875 + }, + { + "epoch": 0.81, + "learning_rate": 8.513759979734243e-07, + "loss": 0.3875, + "regression_loss": 0.0, + "step": 9764, + "text_loss": 0.443359375 + }, + { + "epoch": 0.81, + "learning_rate": 8.506474362589378e-07, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 9765, + "text_loss": 0.4453125 + }, + { + "epoch": 0.81, + "learning_rate": 8.49919157425878e-07, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 9766, + "text_loss": 0.6796875 + }, + { + "epoch": 0.81, + "learning_rate": 8.491911615238946e-07, + "loss": 0.4041, + "regression_loss": 0.0, + "step": 9767, + "text_loss": 0.37109375 + }, + { + "epoch": 0.81, + "learning_rate": 8.4846344860262e-07, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 9768, + "text_loss": 0.427734375 + }, + { + "epoch": 0.81, + "learning_rate": 8.477360187116668e-07, + "loss": 0.4016, + "regression_loss": 0.0, + "step": 9769, + "text_loss": 0.59765625 + }, + { + "epoch": 0.81, + "learning_rate": 8.470088719006231e-07, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 9770, + "text_loss": 0.53515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.462820082190676e-07, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 9771, + "text_loss": 0.435546875 + }, + { + "epoch": 0.81, + "learning_rate": 8.455554277165501e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 9772, + "text_loss": 0.5703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.448291304426065e-07, + "loss": 0.479, + "regression_loss": 0.0, + "step": 9773, + "text_loss": 0.271484375 + }, + { + "epoch": 0.81, + "learning_rate": 8.441031164467517e-07, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 9774, + "text_loss": 0.6484375 + }, + { + "epoch": 0.81, + "learning_rate": 8.433773857784833e-07, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 9775, + "text_loss": 0.478515625 + }, + { + "epoch": 0.81, + "learning_rate": 8.426519384872733e-07, + "loss": 0.4186, + "regression_loss": 0.0, + "step": 9776, + "text_loss": 0.345703125 + }, + { + "epoch": 0.81, + "learning_rate": 8.41926774622584e-07, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 9777, + "text_loss": 0.51953125 + }, + { + "epoch": 0.81, + "learning_rate": 8.412018942338523e-07, + "loss": 0.4045, + "regression_loss": 0.0, + "step": 9778, + "text_loss": 0.578125 + }, + { + "epoch": 0.81, + "learning_rate": 8.404772973704944e-07, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 9779, + "text_loss": 0.36328125 + }, + { + "epoch": 0.81, + "learning_rate": 8.397529840819113e-07, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 9780, + "text_loss": 0.390625 + }, + { + "epoch": 0.81, + "learning_rate": 8.390289544174829e-07, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 9781, + "text_loss": 0.7109375 + }, + { + "epoch": 0.81, + "learning_rate": 8.383052084265708e-07, + "loss": 0.4891, + "regression_loss": 0.0, + "step": 9782, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.81, + "learning_rate": 8.375817461585123e-07, + "loss": 0.4818, + "regression_loss": 0.0, + "step": 9783, + "text_loss": 0.48046875 + }, + { + "epoch": 0.81, + "learning_rate": 8.368585676626345e-07, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 9784, + "text_loss": 0.5234375 + }, + { + "epoch": 0.81, + "learning_rate": 8.361356729882358e-07, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 9785, + "text_loss": 0.490234375 + }, + { + "epoch": 0.81, + "learning_rate": 8.354130621846019e-07, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 9786, + "text_loss": 0.625 + }, + { + "epoch": 0.81, + "learning_rate": 8.346907353009959e-07, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 9787, + "text_loss": 0.74609375 + }, + { + "epoch": 0.81, + "learning_rate": 8.33968692386663e-07, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 9788, + "text_loss": 0.73828125 + }, + { + "epoch": 0.81, + "learning_rate": 8.332469334908261e-07, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 9789, + "text_loss": 0.75 + }, + { + "epoch": 0.81, + "learning_rate": 8.325254586626941e-07, + "loss": 0.511, + "regression_loss": 0.0, + "step": 9790, + "text_loss": 0.337890625 + }, + { + "epoch": 0.81, + "learning_rate": 8.318042679514515e-07, + "loss": 0.53, + "regression_loss": 0.0, + "step": 9791, + "text_loss": 0.484375 + }, + { + "epoch": 0.81, + "learning_rate": 8.310833614062652e-07, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 9792, + "text_loss": 0.6875 + }, + { + "epoch": 0.81, + "learning_rate": 8.303627390762836e-07, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 9793, + "text_loss": 0.62109375 + }, + { + "epoch": 0.81, + "learning_rate": 8.296424010106363e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 9794, + "text_loss": 0.5625 + }, + { + "epoch": 0.81, + "learning_rate": 8.289223472584285e-07, + "loss": 0.3875, + "regression_loss": 0.0, + "step": 9795, + "text_loss": 0.341796875 + }, + { + "epoch": 0.81, + "learning_rate": 8.282025778687541e-07, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 9796, + "text_loss": 0.53125 + }, + { + "epoch": 0.81, + "learning_rate": 8.274830928906802e-07, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 9797, + "text_loss": 0.26171875 + }, + { + "epoch": 0.81, + "learning_rate": 8.267638923732585e-07, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 9798, + "text_loss": 0.55078125 + }, + { + "epoch": 0.81, + "learning_rate": 8.260449763655203e-07, + "loss": 0.4047, + "regression_loss": 0.0, + "step": 9799, + "text_loss": 0.34765625 + }, + { + "epoch": 0.81, + "learning_rate": 8.253263449164789e-07, + "loss": 0.4272, + "regression_loss": 0.0, + "step": 9800, + "text_loss": 0.51953125 + }, + { + "epoch": 0.81, + "learning_rate": 8.246079980751248e-07, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 9801, + "text_loss": 0.546875 + }, + { + "epoch": 0.81, + "learning_rate": 8.238899358904317e-07, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 9802, + "text_loss": 0.404296875 + }, + { + "epoch": 0.81, + "learning_rate": 8.231721584113545e-07, + "loss": 0.4048, + "regression_loss": 0.0, + "step": 9803, + "text_loss": 0.37890625 + }, + { + "epoch": 0.81, + "learning_rate": 8.224546656868259e-07, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 9804, + "text_loss": 0.44921875 + }, + { + "epoch": 0.81, + "learning_rate": 8.217374577657622e-07, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 9805, + "text_loss": 0.232421875 + }, + { + "epoch": 0.81, + "learning_rate": 8.210205346970602e-07, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 9806, + "text_loss": 0.546875 + }, + { + "epoch": 0.82, + "learning_rate": 8.203038965295923e-07, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 9807, + "text_loss": 0.423828125 + }, + { + "epoch": 0.82, + "learning_rate": 8.195875433122175e-07, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 9808, + "text_loss": 0.57421875 + }, + { + "epoch": 0.82, + "learning_rate": 8.188714750937727e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 9809, + "text_loss": 0.341796875 + }, + { + "epoch": 0.82, + "learning_rate": 8.181556919230771e-07, + "loss": 0.4204, + "regression_loss": 0.0, + "step": 9810, + "text_loss": 0.52734375 + }, + { + "epoch": 0.82, + "learning_rate": 8.174401938489258e-07, + "loss": 0.4941, + "regression_loss": 0.0, + "step": 9811, + "text_loss": 0.58984375 + }, + { + "epoch": 0.82, + "learning_rate": 8.167249809201016e-07, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 9812, + "text_loss": 0.439453125 + }, + { + "epoch": 0.82, + "learning_rate": 8.160100531853609e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 9813, + "text_loss": 0.369140625 + }, + { + "epoch": 0.82, + "learning_rate": 8.152954106934451e-07, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 9814, + "text_loss": 0.3125 + }, + { + "epoch": 0.82, + "learning_rate": 8.145810534930753e-07, + "loss": 0.4016, + "regression_loss": 0.0, + "step": 9815, + "text_loss": 0.40625 + }, + { + "epoch": 0.82, + "learning_rate": 8.138669816329531e-07, + "loss": 0.6099, + "regression_loss": 0.0, + "step": 9816, + "text_loss": 0.62890625 + }, + { + "epoch": 0.82, + "learning_rate": 8.131531951617567e-07, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 9817, + "text_loss": 0.65234375 + }, + { + "epoch": 0.82, + "learning_rate": 8.124396941281537e-07, + "loss": 0.3884, + "regression_loss": 0.0, + "step": 9818, + "text_loss": 0.52734375 + }, + { + "epoch": 0.82, + "learning_rate": 8.117264785807827e-07, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 9819, + "text_loss": 0.4296875 + }, + { + "epoch": 0.82, + "learning_rate": 8.110135485682691e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 9820, + "text_loss": 0.5703125 + }, + { + "epoch": 0.82, + "learning_rate": 8.103009041392157e-07, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 9821, + "text_loss": 0.318359375 + }, + { + "epoch": 0.82, + "learning_rate": 8.095885453422086e-07, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 9822, + "text_loss": 0.67578125 + }, + { + "epoch": 0.82, + "learning_rate": 8.088764722258097e-07, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 9823, + "text_loss": 0.515625 + }, + { + "epoch": 0.82, + "learning_rate": 8.081646848385671e-07, + "loss": 0.4825, + "regression_loss": 0.0, + "step": 9824, + "text_loss": 0.57421875 + }, + { + "epoch": 0.82, + "learning_rate": 8.074531832290077e-07, + "loss": 0.4042, + "regression_loss": 0.0, + "step": 9825, + "text_loss": 0.416015625 + }, + { + "epoch": 0.82, + "learning_rate": 8.067419674456351e-07, + "loss": 0.4454, + "regression_loss": 0.0, + "step": 9826, + "text_loss": 0.486328125 + }, + { + "epoch": 0.82, + "learning_rate": 8.060310375369374e-07, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 9827, + "text_loss": 0.384765625 + }, + { + "epoch": 0.82, + "learning_rate": 8.05320393551382e-07, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 9828, + "text_loss": 0.609375 + }, + { + "epoch": 0.82, + "learning_rate": 8.046100355374176e-07, + "loss": 0.3735, + "regression_loss": 0.0, + "step": 9829, + "text_loss": 0.703125 + }, + { + "epoch": 0.82, + "learning_rate": 8.038999635434724e-07, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 9830, + "text_loss": 0.6171875 + }, + { + "epoch": 0.82, + "learning_rate": 8.031901776179568e-07, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 9831, + "text_loss": 0.47265625 + }, + { + "epoch": 0.82, + "learning_rate": 8.024806778092575e-07, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 9832, + "text_loss": 0.462890625 + }, + { + "epoch": 0.82, + "learning_rate": 8.01771464165746e-07, + "loss": 0.4131, + "regression_loss": 0.0, + "step": 9833, + "text_loss": 0.37890625 + }, + { + "epoch": 0.82, + "learning_rate": 8.010625367357727e-07, + "loss": 0.4514, + "regression_loss": 0.0, + "step": 9834, + "text_loss": 0.328125 + }, + { + "epoch": 0.82, + "learning_rate": 8.003538955676704e-07, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 9835, + "text_loss": 0.333984375 + }, + { + "epoch": 0.82, + "learning_rate": 7.996455407097459e-07, + "loss": 0.4357, + "regression_loss": 0.0, + "step": 9836, + "text_loss": 0.5703125 + }, + { + "epoch": 0.82, + "learning_rate": 7.989374722102972e-07, + "loss": 0.4701, + "regression_loss": 0.0, + "step": 9837, + "text_loss": 0.419921875 + }, + { + "epoch": 0.82, + "learning_rate": 7.982296901175924e-07, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 9838, + "text_loss": 0.44921875 + }, + { + "epoch": 0.82, + "learning_rate": 7.975221944798856e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 9839, + "text_loss": 0.498046875 + }, + { + "epoch": 0.82, + "learning_rate": 7.968149853454105e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 9840, + "text_loss": 0.380859375 + }, + { + "epoch": 0.82, + "learning_rate": 7.961080627623819e-07, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 9841, + "text_loss": 0.515625 + }, + { + "epoch": 0.82, + "learning_rate": 7.954014267789906e-07, + "loss": 0.468, + "regression_loss": 0.0, + "step": 9842, + "text_loss": 0.328125 + }, + { + "epoch": 0.82, + "learning_rate": 7.946950774434165e-07, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 9843, + "text_loss": 0.36328125 + }, + { + "epoch": 0.82, + "learning_rate": 7.93989014803811e-07, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 9844, + "text_loss": 0.412109375 + }, + { + "epoch": 0.82, + "learning_rate": 7.932832389083112e-07, + "loss": 0.3699, + "regression_loss": 0.0, + "step": 9845, + "text_loss": 0.31640625 + }, + { + "epoch": 0.82, + "learning_rate": 7.92577749805033e-07, + "loss": 0.5581, + "regression_loss": 0.0, + "step": 9846, + "text_loss": 0.431640625 + }, + { + "epoch": 0.82, + "learning_rate": 7.918725475420741e-07, + "loss": 0.4338, + "regression_loss": 0.0, + "step": 9847, + "text_loss": 0.267578125 + }, + { + "epoch": 0.82, + "learning_rate": 7.911676321675082e-07, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 9848, + "text_loss": 0.302734375 + }, + { + "epoch": 0.82, + "learning_rate": 7.904630037293976e-07, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 9849, + "text_loss": 0.5234375 + }, + { + "epoch": 0.82, + "learning_rate": 7.897586622757764e-07, + "loss": 0.4108, + "regression_loss": 0.0, + "step": 9850, + "text_loss": 0.546875 + }, + { + "epoch": 0.82, + "learning_rate": 7.890546078546646e-07, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 9851, + "text_loss": 0.462890625 + }, + { + "epoch": 0.82, + "learning_rate": 7.883508405140605e-07, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 9852, + "text_loss": 0.373046875 + }, + { + "epoch": 0.82, + "learning_rate": 7.876473603019446e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 9853, + "text_loss": 0.5703125 + }, + { + "epoch": 0.82, + "learning_rate": 7.869441672662731e-07, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 9854, + "text_loss": 0.470703125 + }, + { + "epoch": 0.82, + "learning_rate": 7.862412614549913e-07, + "loss": 0.4104, + "regression_loss": 0.0, + "step": 9855, + "text_loss": 0.34375 + }, + { + "epoch": 0.82, + "learning_rate": 7.85538642916015e-07, + "loss": 0.4581, + "regression_loss": 0.0, + "step": 9856, + "text_loss": 0.2275390625 + }, + { + "epoch": 0.82, + "learning_rate": 7.848363116972473e-07, + "loss": 0.4587, + "regression_loss": 0.0, + "step": 9857, + "text_loss": 0.6171875 + }, + { + "epoch": 0.82, + "learning_rate": 7.841342678465691e-07, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 9858, + "text_loss": 0.416015625 + }, + { + "epoch": 0.82, + "learning_rate": 7.834325114118435e-07, + "loss": 0.54, + "regression_loss": 0.0, + "step": 9859, + "text_loss": 0.408203125 + }, + { + "epoch": 0.82, + "learning_rate": 7.827310424409095e-07, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 9860, + "text_loss": 0.6015625 + }, + { + "epoch": 0.82, + "learning_rate": 7.820298609815924e-07, + "loss": 0.5032, + "regression_loss": 0.0, + "step": 9861, + "text_loss": 0.5546875 + }, + { + "epoch": 0.82, + "learning_rate": 7.813289670816937e-07, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 9862, + "text_loss": 0.296875 + }, + { + "epoch": 0.82, + "learning_rate": 7.80628360788997e-07, + "loss": 0.4371, + "regression_loss": 0.0, + "step": 9863, + "text_loss": 0.5546875 + }, + { + "epoch": 0.82, + "learning_rate": 7.799280421512661e-07, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 9864, + "text_loss": 0.455078125 + }, + { + "epoch": 0.82, + "learning_rate": 7.79228011216247e-07, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 9865, + "text_loss": 0.6953125 + }, + { + "epoch": 0.82, + "learning_rate": 7.785282680316608e-07, + "loss": 0.3519, + "regression_loss": 0.0, + "step": 9866, + "text_loss": 0.373046875 + }, + { + "epoch": 0.82, + "learning_rate": 7.778288126452138e-07, + "loss": 0.4454, + "regression_loss": 0.0, + "step": 9867, + "text_loss": 0.5625 + }, + { + "epoch": 0.82, + "learning_rate": 7.771296451045917e-07, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 9868, + "text_loss": 0.375 + }, + { + "epoch": 0.82, + "learning_rate": 7.764307654574615e-07, + "loss": 0.5889, + "regression_loss": 0.0, + "step": 9869, + "text_loss": 0.76171875 + }, + { + "epoch": 0.82, + "learning_rate": 7.757321737514645e-07, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 9870, + "text_loss": 0.55859375 + }, + { + "epoch": 0.82, + "learning_rate": 7.750338700342319e-07, + "loss": 0.4119, + "regression_loss": 0.0, + "step": 9871, + "text_loss": 0.271484375 + }, + { + "epoch": 0.82, + "learning_rate": 7.743358543533696e-07, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 9872, + "text_loss": 0.408203125 + }, + { + "epoch": 0.82, + "learning_rate": 7.736381267564613e-07, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 9873, + "text_loss": 0.609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.729406872910794e-07, + "loss": 0.4058, + "regression_loss": 0.0, + "step": 9874, + "text_loss": 0.369140625 + }, + { + "epoch": 0.82, + "learning_rate": 7.722435360047681e-07, + "loss": 0.4525, + "regression_loss": 0.0, + "step": 9875, + "text_loss": 0.6171875 + }, + { + "epoch": 0.82, + "learning_rate": 7.715466729450572e-07, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 9876, + "text_loss": 0.298828125 + }, + { + "epoch": 0.82, + "learning_rate": 7.708500981594541e-07, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 9877, + "text_loss": 0.72265625 + }, + { + "epoch": 0.82, + "learning_rate": 7.701538116954498e-07, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 9878, + "text_loss": 0.43359375 + }, + { + "epoch": 0.82, + "learning_rate": 7.69457813600511e-07, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 9879, + "text_loss": 0.474609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.687621039220882e-07, + "loss": 0.48, + "regression_loss": 0.0, + "step": 9880, + "text_loss": 0.32421875 + }, + { + "epoch": 0.82, + "learning_rate": 7.680666827076116e-07, + "loss": 0.448, + "regression_loss": 0.0, + "step": 9881, + "text_loss": 0.44921875 + }, + { + "epoch": 0.82, + "learning_rate": 7.67371550004491e-07, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 9882, + "text_loss": 0.494140625 + }, + { + "epoch": 0.82, + "learning_rate": 7.666767058601177e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 9883, + "text_loss": 0.376953125 + }, + { + "epoch": 0.82, + "learning_rate": 7.659821503218629e-07, + "loss": 0.5272, + "regression_loss": 0.0, + "step": 9884, + "text_loss": 0.400390625 + }, + { + "epoch": 0.82, + "learning_rate": 7.652878834370752e-07, + "loss": 0.3632, + "regression_loss": 0.0, + "step": 9885, + "text_loss": 0.341796875 + }, + { + "epoch": 0.82, + "learning_rate": 7.645939052530888e-07, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 9886, + "text_loss": 0.67578125 + }, + { + "epoch": 0.82, + "learning_rate": 7.639002158172143e-07, + "loss": 0.4009, + "regression_loss": 0.0, + "step": 9887, + "text_loss": 0.64453125 + }, + { + "epoch": 0.82, + "learning_rate": 7.632068151767447e-07, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 9888, + "text_loss": 0.328125 + }, + { + "epoch": 0.82, + "learning_rate": 7.625137033789515e-07, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 9889, + "text_loss": 0.474609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.618208804710891e-07, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 9890, + "text_loss": 0.61328125 + }, + { + "epoch": 0.82, + "learning_rate": 7.611283465003888e-07, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 9891, + "text_loss": 0.59375 + }, + { + "epoch": 0.82, + "learning_rate": 7.604361015140643e-07, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 9892, + "text_loss": 0.5390625 + }, + { + "epoch": 0.82, + "learning_rate": 7.597441455593097e-07, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 9893, + "text_loss": 0.4140625 + }, + { + "epoch": 0.82, + "learning_rate": 7.590524786833004e-07, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 9894, + "text_loss": 0.6171875 + }, + { + "epoch": 0.82, + "learning_rate": 7.583611009331865e-07, + "loss": 0.6001, + "regression_loss": 0.0, + "step": 9895, + "text_loss": 0.6328125 + }, + { + "epoch": 0.82, + "learning_rate": 7.576700123561076e-07, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 9896, + "text_loss": 0.306640625 + }, + { + "epoch": 0.82, + "learning_rate": 7.569792129991754e-07, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 9897, + "text_loss": 0.4921875 + }, + { + "epoch": 0.82, + "learning_rate": 7.562887029094856e-07, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 9898, + "text_loss": 0.40625 + }, + { + "epoch": 0.82, + "learning_rate": 7.555984821341139e-07, + "loss": 0.449, + "regression_loss": 0.0, + "step": 9899, + "text_loss": 0.474609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.549085507201171e-07, + "loss": 0.4258, + "regression_loss": 0.0, + "step": 9900, + "text_loss": 0.490234375 + }, + { + "epoch": 0.82, + "learning_rate": 7.54218908714528e-07, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 9901, + "text_loss": 0.4609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.535295561643669e-07, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 9902, + "text_loss": 0.330078125 + }, + { + "epoch": 0.82, + "learning_rate": 7.528404931166272e-07, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 9903, + "text_loss": 0.232421875 + }, + { + "epoch": 0.82, + "learning_rate": 7.521517196182865e-07, + "loss": 0.5271, + "regression_loss": 0.0, + "step": 9904, + "text_loss": 0.66796875 + }, + { + "epoch": 0.82, + "learning_rate": 7.514632357163021e-07, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 9905, + "text_loss": 0.55859375 + }, + { + "epoch": 0.82, + "learning_rate": 7.507750414576126e-07, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 9906, + "text_loss": 0.384765625 + }, + { + "epoch": 0.82, + "learning_rate": 7.500871368891316e-07, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 9907, + "text_loss": 0.80859375 + }, + { + "epoch": 0.82, + "learning_rate": 7.493995220577615e-07, + "loss": 0.4728, + "regression_loss": 0.0, + "step": 9908, + "text_loss": 0.291015625 + }, + { + "epoch": 0.82, + "learning_rate": 7.487121970103778e-07, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 9909, + "text_loss": 0.52734375 + }, + { + "epoch": 0.82, + "learning_rate": 7.480251617938383e-07, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 9910, + "text_loss": 0.78515625 + }, + { + "epoch": 0.82, + "learning_rate": 7.473384164549829e-07, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 9911, + "text_loss": 0.50390625 + }, + { + "epoch": 0.82, + "learning_rate": 7.466519610406314e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 9912, + "text_loss": 0.458984375 + }, + { + "epoch": 0.82, + "learning_rate": 7.459657955975797e-07, + "loss": 0.4287, + "regression_loss": 0.0, + "step": 9913, + "text_loss": 0.37109375 + }, + { + "epoch": 0.82, + "learning_rate": 7.452799201726086e-07, + "loss": 0.45, + "regression_loss": 0.0, + "step": 9914, + "text_loss": 0.232421875 + }, + { + "epoch": 0.82, + "learning_rate": 7.445943348124773e-07, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 9915, + "text_loss": 0.7109375 + }, + { + "epoch": 0.82, + "learning_rate": 7.439090395639259e-07, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 9916, + "text_loss": 0.47265625 + }, + { + "epoch": 0.82, + "learning_rate": 7.43224034473674e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 9917, + "text_loss": 0.51953125 + }, + { + "epoch": 0.82, + "learning_rate": 7.425393195884222e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9918, + "text_loss": 0.5703125 + }, + { + "epoch": 0.82, + "learning_rate": 7.418548949548516e-07, + "loss": 0.6577, + "regression_loss": 0.0, + "step": 9919, + "text_loss": 0.73046875 + }, + { + "epoch": 0.82, + "learning_rate": 7.411707606196189e-07, + "loss": 0.4077, + "regression_loss": 0.0, + "step": 9920, + "text_loss": 0.546875 + }, + { + "epoch": 0.82, + "learning_rate": 7.404869166293693e-07, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 9921, + "text_loss": 0.43359375 + }, + { + "epoch": 0.82, + "learning_rate": 7.398033630307217e-07, + "loss": 0.5688, + "regression_loss": 0.0, + "step": 9922, + "text_loss": 1.0625 + }, + { + "epoch": 0.82, + "learning_rate": 7.391200998702769e-07, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 9923, + "text_loss": 0.224609375 + }, + { + "epoch": 0.82, + "learning_rate": 7.384371271946172e-07, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 9924, + "text_loss": 0.5390625 + }, + { + "epoch": 0.82, + "learning_rate": 7.377544450503049e-07, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 9925, + "text_loss": 0.5234375 + }, + { + "epoch": 0.82, + "learning_rate": 7.370720534838782e-07, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 9926, + "text_loss": 0.5 + }, + { + "epoch": 0.83, + "learning_rate": 7.363899525418638e-07, + "loss": 0.5092, + "regression_loss": 0.0, + "step": 9927, + "text_loss": 0.62109375 + }, + { + "epoch": 0.83, + "learning_rate": 7.357081422707608e-07, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 9928, + "text_loss": 0.64453125 + }, + { + "epoch": 0.83, + "learning_rate": 7.350266227170516e-07, + "loss": 0.4872, + "regression_loss": 0.0, + "step": 9929, + "text_loss": 0.5078125 + }, + { + "epoch": 0.83, + "learning_rate": 7.343453939271993e-07, + "loss": 0.55, + "regression_loss": 0.0, + "step": 9930, + "text_loss": 0.71484375 + }, + { + "epoch": 0.83, + "learning_rate": 7.336644559476481e-07, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 9931, + "text_loss": 0.283203125 + }, + { + "epoch": 0.83, + "learning_rate": 7.329838088248165e-07, + "loss": 0.509, + "regression_loss": 0.0, + "step": 9932, + "text_loss": 0.6328125 + }, + { + "epoch": 0.83, + "learning_rate": 7.323034526051131e-07, + "loss": 0.427, + "regression_loss": 0.0, + "step": 9933, + "text_loss": 0.2275390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.316233873349171e-07, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 9934, + "text_loss": 0.54296875 + }, + { + "epoch": 0.83, + "learning_rate": 7.309436130605924e-07, + "loss": 0.4357, + "regression_loss": 0.0, + "step": 9935, + "text_loss": 0.408203125 + }, + { + "epoch": 0.83, + "learning_rate": 7.302641298284835e-07, + "loss": 0.556, + "regression_loss": 0.0, + "step": 9936, + "text_loss": 0.318359375 + }, + { + "epoch": 0.83, + "learning_rate": 7.295849376849151e-07, + "loss": 0.4399, + "regression_loss": 0.0, + "step": 9937, + "text_loss": 0.447265625 + }, + { + "epoch": 0.83, + "learning_rate": 7.28906036676188e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 9938, + "text_loss": 0.5625 + }, + { + "epoch": 0.83, + "learning_rate": 7.282274268485878e-07, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 9939, + "text_loss": 0.5078125 + }, + { + "epoch": 0.83, + "learning_rate": 7.275491082483782e-07, + "loss": 0.489, + "regression_loss": 0.0, + "step": 9940, + "text_loss": 0.41796875 + }, + { + "epoch": 0.83, + "learning_rate": 7.268710809218044e-07, + "loss": 0.4509, + "regression_loss": 0.0, + "step": 9941, + "text_loss": 0.33203125 + }, + { + "epoch": 0.83, + "learning_rate": 7.261933449150898e-07, + "loss": 0.5072, + "regression_loss": 0.0, + "step": 9942, + "text_loss": 0.50390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.255159002744411e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 9943, + "text_loss": 0.408203125 + }, + { + "epoch": 0.83, + "learning_rate": 7.248387470460394e-07, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 9944, + "text_loss": 0.51953125 + }, + { + "epoch": 0.83, + "learning_rate": 7.241618852760512e-07, + "loss": 0.501, + "regression_loss": 0.0, + "step": 9945, + "text_loss": 0.466796875 + }, + { + "epoch": 0.83, + "learning_rate": 7.234853150106225e-07, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 9946, + "text_loss": 0.640625 + }, + { + "epoch": 0.83, + "learning_rate": 7.228090362958779e-07, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 9947, + "text_loss": 0.392578125 + }, + { + "epoch": 0.83, + "learning_rate": 7.2213304917792e-07, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 9948, + "text_loss": 0.46484375 + }, + { + "epoch": 0.83, + "learning_rate": 7.21457353702838e-07, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 9949, + "text_loss": 0.318359375 + }, + { + "epoch": 0.83, + "learning_rate": 7.207819499166952e-07, + "loss": 0.3796, + "regression_loss": 0.0, + "step": 9950, + "text_loss": 0.44921875 + }, + { + "epoch": 0.83, + "learning_rate": 7.201068378655374e-07, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 9951, + "text_loss": 0.4453125 + }, + { + "epoch": 0.83, + "learning_rate": 7.194320175953901e-07, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 9952, + "text_loss": 0.51171875 + }, + { + "epoch": 0.83, + "learning_rate": 7.187574891522603e-07, + "loss": 0.5798, + "regression_loss": 0.0, + "step": 9953, + "text_loss": 0.65234375 + }, + { + "epoch": 0.83, + "learning_rate": 7.180832525821308e-07, + "loss": 0.459, + "regression_loss": 0.0, + "step": 9954, + "text_loss": 0.4296875 + }, + { + "epoch": 0.83, + "learning_rate": 7.174093079309719e-07, + "loss": 0.4443, + "regression_loss": 0.0, + "step": 9955, + "text_loss": 0.35546875 + }, + { + "epoch": 0.83, + "learning_rate": 7.167356552447264e-07, + "loss": 0.4363, + "regression_loss": 0.0, + "step": 9956, + "text_loss": 0.255859375 + }, + { + "epoch": 0.83, + "learning_rate": 7.160622945693213e-07, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 9957, + "text_loss": 0.390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.153892259506635e-07, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 9958, + "text_loss": 0.3046875 + }, + { + "epoch": 0.83, + "learning_rate": 7.147164494346393e-07, + "loss": 0.4229, + "regression_loss": 0.0, + "step": 9959, + "text_loss": 0.2578125 + }, + { + "epoch": 0.83, + "learning_rate": 7.140439650671127e-07, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 9960, + "text_loss": 0.390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.133717728939349e-07, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 9961, + "text_loss": 0.50390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.12699872960928e-07, + "loss": 0.4606, + "regression_loss": 0.0, + "step": 9962, + "text_loss": 0.53125 + }, + { + "epoch": 0.83, + "learning_rate": 7.12028265313901e-07, + "loss": 0.551, + "regression_loss": 0.0, + "step": 9963, + "text_loss": 0.78125 + }, + { + "epoch": 0.83, + "learning_rate": 7.113569499986401e-07, + "loss": 0.6289, + "regression_loss": 0.0, + "step": 9964, + "text_loss": 0.51953125 + }, + { + "epoch": 0.83, + "learning_rate": 7.106859270609117e-07, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 9965, + "text_loss": 0.357421875 + }, + { + "epoch": 0.83, + "learning_rate": 7.10015196546463e-07, + "loss": 0.4587, + "regression_loss": 0.0, + "step": 9966, + "text_loss": 0.66015625 + }, + { + "epoch": 0.83, + "learning_rate": 7.093447585010215e-07, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 9967, + "text_loss": 0.4765625 + }, + { + "epoch": 0.83, + "learning_rate": 7.086746129702948e-07, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 9968, + "text_loss": 0.62109375 + }, + { + "epoch": 0.83, + "learning_rate": 7.080047599999679e-07, + "loss": 0.462, + "regression_loss": 0.0, + "step": 9969, + "text_loss": 0.71484375 + }, + { + "epoch": 0.83, + "learning_rate": 7.073351996357086e-07, + "loss": 0.5342, + "regression_loss": 0.0, + "step": 9970, + "text_loss": 0.6328125 + }, + { + "epoch": 0.83, + "learning_rate": 7.066659319231645e-07, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 9971, + "text_loss": 0.26171875 + }, + { + "epoch": 0.83, + "learning_rate": 7.059969569079639e-07, + "loss": 0.6699, + "regression_loss": 0.0, + "step": 9972, + "text_loss": 0.4296875 + }, + { + "epoch": 0.83, + "learning_rate": 7.053282746357104e-07, + "loss": 0.4943, + "regression_loss": 0.0, + "step": 9973, + "text_loss": 0.75390625 + }, + { + "epoch": 0.83, + "learning_rate": 7.046598851519964e-07, + "loss": 0.4413, + "regression_loss": 0.0, + "step": 9974, + "text_loss": 0.62109375 + }, + { + "epoch": 0.83, + "learning_rate": 7.03991788502385e-07, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 9975, + "text_loss": 0.52734375 + }, + { + "epoch": 0.83, + "learning_rate": 7.033239847324258e-07, + "loss": 0.4315, + "regression_loss": 0.0, + "step": 9976, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.83, + "learning_rate": 7.026564738876451e-07, + "loss": 0.6782, + "regression_loss": 0.0, + "step": 9977, + "text_loss": 0.6328125 + }, + { + "epoch": 0.83, + "learning_rate": 7.019892560135522e-07, + "loss": 0.4285, + "regression_loss": 0.0, + "step": 9978, + "text_loss": 0.30859375 + }, + { + "epoch": 0.83, + "learning_rate": 7.01322331155631e-07, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 9979, + "text_loss": 0.3359375 + }, + { + "epoch": 0.83, + "learning_rate": 7.006556993593532e-07, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 9980, + "text_loss": 0.6328125 + }, + { + "epoch": 0.83, + "learning_rate": 6.999893606701636e-07, + "loss": 0.429, + "regression_loss": 0.0, + "step": 9981, + "text_loss": 0.19921875 + }, + { + "epoch": 0.83, + "learning_rate": 6.9932331513349e-07, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 9982, + "text_loss": 0.365234375 + }, + { + "epoch": 0.83, + "learning_rate": 6.986575627947406e-07, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 9983, + "text_loss": 0.78125 + }, + { + "epoch": 0.83, + "learning_rate": 6.979921036993042e-07, + "loss": 0.502, + "regression_loss": 0.0, + "step": 9984, + "text_loss": 0.3359375 + }, + { + "epoch": 0.83, + "learning_rate": 6.97326937892544e-07, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 9985, + "text_loss": 0.474609375 + }, + { + "epoch": 0.83, + "learning_rate": 6.966620654198131e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9986, + "text_loss": 0.283203125 + }, + { + "epoch": 0.83, + "learning_rate": 6.959974863264357e-07, + "loss": 0.4916, + "regression_loss": 0.0, + "step": 9987, + "text_loss": 0.4453125 + }, + { + "epoch": 0.83, + "learning_rate": 6.953332006577196e-07, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 9988, + "text_loss": 0.380859375 + }, + { + "epoch": 0.83, + "learning_rate": 6.946692084589529e-07, + "loss": 0.5131, + "regression_loss": 0.0, + "step": 9989, + "text_loss": 0.58984375 + }, + { + "epoch": 0.83, + "learning_rate": 6.940055097754045e-07, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 9990, + "text_loss": 0.60546875 + }, + { + "epoch": 0.83, + "learning_rate": 6.933421046523181e-07, + "loss": 0.4277, + "regression_loss": 0.0, + "step": 9991, + "text_loss": 0.41796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.926789931349265e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 9992, + "text_loss": 0.53125 + }, + { + "epoch": 0.83, + "learning_rate": 6.920161752684329e-07, + "loss": 0.5048, + "regression_loss": 0.0, + "step": 9993, + "text_loss": 0.58203125 + }, + { + "epoch": 0.83, + "learning_rate": 6.913536510980262e-07, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 9994, + "text_loss": 0.45703125 + }, + { + "epoch": 0.83, + "learning_rate": 6.906914206688742e-07, + "loss": 0.4015, + "regression_loss": 0.0, + "step": 9995, + "text_loss": 0.265625 + }, + { + "epoch": 0.83, + "learning_rate": 6.900294840261251e-07, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 9996, + "text_loss": 0.33203125 + }, + { + "epoch": 0.83, + "learning_rate": 6.893678412149046e-07, + "loss": 0.5491, + "regression_loss": 0.0, + "step": 9997, + "text_loss": 0.42578125 + }, + { + "epoch": 0.83, + "learning_rate": 6.88706492280321e-07, + "loss": 0.4263, + "regression_loss": 0.0, + "step": 9998, + "text_loss": 0.404296875 + }, + { + "epoch": 0.83, + "learning_rate": 6.880454372674611e-07, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 9999, + "text_loss": 0.2734375 + }, + { + "epoch": 0.83, + "learning_rate": 6.873846762213931e-07, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 10000, + "text_loss": 0.359375 + }, + { + "epoch": 0.83, + "learning_rate": 6.867242091871634e-07, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 10001, + "text_loss": 0.275390625 + }, + { + "epoch": 0.83, + "learning_rate": 6.860640362098015e-07, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 10002, + "text_loss": 0.43359375 + }, + { + "epoch": 0.83, + "learning_rate": 6.854041573343106e-07, + "loss": 0.5101, + "regression_loss": 0.0, + "step": 10003, + "text_loss": 0.6640625 + }, + { + "epoch": 0.83, + "learning_rate": 6.847445726056806e-07, + "loss": 0.491, + "regression_loss": 0.0, + "step": 10004, + "text_loss": 0.24609375 + }, + { + "epoch": 0.83, + "learning_rate": 6.840852820688781e-07, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 10005, + "text_loss": 0.62890625 + }, + { + "epoch": 0.83, + "learning_rate": 6.834262857688512e-07, + "loss": 0.5679, + "regression_loss": 0.0, + "step": 10006, + "text_loss": 0.6875 + }, + { + "epoch": 0.83, + "learning_rate": 6.827675837505226e-07, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 10007, + "text_loss": 0.64453125 + }, + { + "epoch": 0.83, + "learning_rate": 6.82109176058805e-07, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 10008, + "text_loss": 0.423828125 + }, + { + "epoch": 0.83, + "learning_rate": 6.814510627385817e-07, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 10009, + "text_loss": 0.453125 + }, + { + "epoch": 0.83, + "learning_rate": 6.807932438347198e-07, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 10010, + "text_loss": 0.6015625 + }, + { + "epoch": 0.83, + "learning_rate": 6.801357193920666e-07, + "loss": 0.4167, + "regression_loss": 0.0, + "step": 10011, + "text_loss": 0.50390625 + }, + { + "epoch": 0.83, + "learning_rate": 6.794784894554485e-07, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 10012, + "text_loss": 0.546875 + }, + { + "epoch": 0.83, + "learning_rate": 6.788215540696724e-07, + "loss": 0.551, + "regression_loss": 0.0, + "step": 10013, + "text_loss": 0.65625 + }, + { + "epoch": 0.83, + "learning_rate": 6.781649132795243e-07, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 10014, + "text_loss": 0.380859375 + }, + { + "epoch": 0.83, + "learning_rate": 6.775085671297715e-07, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 10015, + "text_loss": 0.66796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.768525156651589e-07, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 10016, + "text_loss": 0.373046875 + }, + { + "epoch": 0.83, + "learning_rate": 6.76196758930413e-07, + "loss": 0.509, + "regression_loss": 0.0, + "step": 10017, + "text_loss": 0.64453125 + }, + { + "epoch": 0.83, + "learning_rate": 6.755412969702402e-07, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 10018, + "text_loss": 0.515625 + }, + { + "epoch": 0.83, + "learning_rate": 6.748861298293269e-07, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 10019, + "text_loss": 0.45703125 + }, + { + "epoch": 0.83, + "learning_rate": 6.742312575523386e-07, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 10020, + "text_loss": 0.54296875 + }, + { + "epoch": 0.83, + "learning_rate": 6.735766801839222e-07, + "loss": 0.4005, + "regression_loss": 0.0, + "step": 10021, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.729223977687016e-07, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 10022, + "text_loss": 0.2412109375 + }, + { + "epoch": 0.83, + "learning_rate": 6.722684103512827e-07, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 10023, + "text_loss": 0.29296875 + }, + { + "epoch": 0.83, + "learning_rate": 6.716147179762516e-07, + "loss": 0.488, + "regression_loss": 0.0, + "step": 10024, + "text_loss": 0.59375 + }, + { + "epoch": 0.83, + "learning_rate": 6.709613206881749e-07, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 10025, + "text_loss": 0.76171875 + }, + { + "epoch": 0.83, + "learning_rate": 6.70308218531594e-07, + "loss": 0.467, + "regression_loss": 0.0, + "step": 10026, + "text_loss": 0.41796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.696554115510389e-07, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 10027, + "text_loss": 0.3359375 + }, + { + "epoch": 0.83, + "learning_rate": 6.690028997910108e-07, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 10028, + "text_loss": 0.421875 + }, + { + "epoch": 0.83, + "learning_rate": 6.683506832959963e-07, + "loss": 0.5793, + "regression_loss": 0.0, + "step": 10029, + "text_loss": 0.66796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.676987621104602e-07, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 10030, + "text_loss": 0.291015625 + }, + { + "epoch": 0.83, + "learning_rate": 6.670471362788472e-07, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 10031, + "text_loss": 0.32421875 + }, + { + "epoch": 0.83, + "learning_rate": 6.6639580584558e-07, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 10032, + "text_loss": 0.71875 + }, + { + "epoch": 0.83, + "learning_rate": 6.657447708550657e-07, + "loss": 0.4264, + "regression_loss": 0.0, + "step": 10033, + "text_loss": 0.341796875 + }, + { + "epoch": 0.83, + "learning_rate": 6.650940313516862e-07, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 10034, + "text_loss": 0.388671875 + }, + { + "epoch": 0.83, + "learning_rate": 6.644435873798066e-07, + "loss": 0.4408, + "regression_loss": 0.0, + "step": 10035, + "text_loss": 0.546875 + }, + { + "epoch": 0.83, + "learning_rate": 6.637934389837708e-07, + "loss": 0.4202, + "regression_loss": 0.0, + "step": 10036, + "text_loss": 0.4765625 + }, + { + "epoch": 0.83, + "learning_rate": 6.631435862079033e-07, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 10037, + "text_loss": 0.58984375 + }, + { + "epoch": 0.83, + "learning_rate": 6.624940290965048e-07, + "loss": 0.4475, + "regression_loss": 0.0, + "step": 10038, + "text_loss": 0.3046875 + }, + { + "epoch": 0.83, + "learning_rate": 6.618447676938627e-07, + "loss": 0.4635, + "regression_loss": 0.0, + "step": 10039, + "text_loss": 0.478515625 + }, + { + "epoch": 0.83, + "learning_rate": 6.611958020442377e-07, + "loss": 0.3994, + "regression_loss": 0.0, + "step": 10040, + "text_loss": 0.396484375 + }, + { + "epoch": 0.83, + "learning_rate": 6.605471321918739e-07, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 10041, + "text_loss": 0.63671875 + }, + { + "epoch": 0.83, + "learning_rate": 6.598987581809934e-07, + "loss": 0.5835, + "regression_loss": 0.0, + "step": 10042, + "text_loss": 0.9453125 + }, + { + "epoch": 0.83, + "learning_rate": 6.592506800558007e-07, + "loss": 0.4148, + "regression_loss": 0.0, + "step": 10043, + "text_loss": 0.470703125 + }, + { + "epoch": 0.83, + "learning_rate": 6.586028978604753e-07, + "loss": 0.5505, + "regression_loss": 0.0, + "step": 10044, + "text_loss": 0.55859375 + }, + { + "epoch": 0.83, + "learning_rate": 6.579554116391834e-07, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 10045, + "text_loss": 0.625 + }, + { + "epoch": 0.83, + "learning_rate": 6.573082214360648e-07, + "loss": 0.4263, + "regression_loss": 0.0, + "step": 10046, + "text_loss": 0.6484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.566613272952421e-07, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 10047, + "text_loss": 0.47265625 + }, + { + "epoch": 0.84, + "learning_rate": 6.560147292608177e-07, + "loss": 0.6208, + "regression_loss": 0.0, + "step": 10048, + "text_loss": 0.6953125 + }, + { + "epoch": 0.84, + "learning_rate": 6.55368427376874e-07, + "loss": 0.5823, + "regression_loss": 0.0, + "step": 10049, + "text_loss": 0.73046875 + }, + { + "epoch": 0.84, + "learning_rate": 6.547224216874698e-07, + "loss": 0.4025, + "regression_loss": 0.0, + "step": 10050, + "text_loss": 0.41015625 + }, + { + "epoch": 0.84, + "learning_rate": 6.540767122366488e-07, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 10051, + "text_loss": 0.271484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.534312990684316e-07, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 10052, + "text_loss": 0.44921875 + }, + { + "epoch": 0.84, + "learning_rate": 6.527861822268184e-07, + "loss": 0.4297, + "regression_loss": 0.0, + "step": 10053, + "text_loss": 0.263671875 + }, + { + "epoch": 0.84, + "learning_rate": 6.521413617557909e-07, + "loss": 0.5684, + "regression_loss": 0.0, + "step": 10054, + "text_loss": 0.416015625 + }, + { + "epoch": 0.84, + "learning_rate": 6.514968376993108e-07, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 10055, + "text_loss": 0.46484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.508526101013152e-07, + "loss": 0.5251, + "regression_loss": 0.0, + "step": 10056, + "text_loss": 0.390625 + }, + { + "epoch": 0.84, + "learning_rate": 6.50208679005726e-07, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 10057, + "text_loss": 0.5390625 + }, + { + "epoch": 0.84, + "learning_rate": 6.495650444564433e-07, + "loss": 0.5516, + "regression_loss": 0.0, + "step": 10058, + "text_loss": 0.55078125 + }, + { + "epoch": 0.84, + "learning_rate": 6.489217064973463e-07, + "loss": 0.4906, + "regression_loss": 0.0, + "step": 10059, + "text_loss": 0.5546875 + }, + { + "epoch": 0.84, + "learning_rate": 6.48278665172295e-07, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 10060, + "text_loss": 0.7421875 + }, + { + "epoch": 0.84, + "learning_rate": 6.476359205251276e-07, + "loss": 0.431, + "regression_loss": 0.0, + "step": 10061, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.84, + "learning_rate": 6.469934725996652e-07, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 10062, + "text_loss": 0.365234375 + }, + { + "epoch": 0.84, + "learning_rate": 6.463513214397033e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 10063, + "text_loss": 0.275390625 + }, + { + "epoch": 0.84, + "learning_rate": 6.457094670890241e-07, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 10064, + "text_loss": 0.46484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.450679095913831e-07, + "loss": 0.4049, + "regression_loss": 0.0, + "step": 10065, + "text_loss": 0.5390625 + }, + { + "epoch": 0.84, + "learning_rate": 6.444266489905199e-07, + "loss": 0.3854, + "regression_loss": 0.0, + "step": 10066, + "text_loss": 0.279296875 + }, + { + "epoch": 0.84, + "learning_rate": 6.437856853301517e-07, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 10067, + "text_loss": 0.56640625 + }, + { + "epoch": 0.84, + "learning_rate": 6.431450186539767e-07, + "loss": 0.5154, + "regression_loss": 0.0, + "step": 10068, + "text_loss": 0.7890625 + }, + { + "epoch": 0.84, + "learning_rate": 6.425046490056703e-07, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 10069, + "text_loss": 0.5625 + }, + { + "epoch": 0.84, + "learning_rate": 6.418645764288928e-07, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 10070, + "text_loss": 0.66015625 + }, + { + "epoch": 0.84, + "learning_rate": 6.412248009672789e-07, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 10071, + "text_loss": 0.431640625 + }, + { + "epoch": 0.84, + "learning_rate": 6.405853226644448e-07, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 10072, + "text_loss": 0.58203125 + }, + { + "epoch": 0.84, + "learning_rate": 6.399461415639885e-07, + "loss": 0.5078, + "regression_loss": 0.0, + "step": 10073, + "text_loss": 0.62109375 + }, + { + "epoch": 0.84, + "learning_rate": 6.393072577094855e-07, + "loss": 0.426, + "regression_loss": 0.0, + "step": 10074, + "text_loss": 0.4609375 + }, + { + "epoch": 0.84, + "learning_rate": 6.386686711444906e-07, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 10075, + "text_loss": 0.51953125 + }, + { + "epoch": 0.84, + "learning_rate": 6.3803038191254e-07, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 10076, + "text_loss": 0.255859375 + }, + { + "epoch": 0.84, + "learning_rate": 6.373923900571494e-07, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 10077, + "text_loss": 0.54296875 + }, + { + "epoch": 0.84, + "learning_rate": 6.367546956218129e-07, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 10078, + "text_loss": 0.51953125 + }, + { + "epoch": 0.84, + "learning_rate": 6.361172986500058e-07, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 10079, + "text_loss": 0.734375 + }, + { + "epoch": 0.84, + "learning_rate": 6.354801991851839e-07, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 10080, + "text_loss": 0.5703125 + }, + { + "epoch": 0.84, + "learning_rate": 6.348433972707785e-07, + "loss": 0.4333, + "regression_loss": 0.0, + "step": 10081, + "text_loss": 0.39453125 + }, + { + "epoch": 0.84, + "learning_rate": 6.342068929502049e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 10082, + "text_loss": 0.4609375 + }, + { + "epoch": 0.84, + "learning_rate": 6.335706862668562e-07, + "loss": 0.386, + "regression_loss": 0.0, + "step": 10083, + "text_loss": 0.55078125 + }, + { + "epoch": 0.84, + "learning_rate": 6.329347772641076e-07, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 10084, + "text_loss": 0.67578125 + }, + { + "epoch": 0.84, + "learning_rate": 6.32299165985309e-07, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 10085, + "text_loss": 0.54296875 + }, + { + "epoch": 0.84, + "learning_rate": 6.316638524737961e-07, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 10086, + "text_loss": 0.3515625 + }, + { + "epoch": 0.84, + "learning_rate": 6.31028836772879e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 10087, + "text_loss": 0.408203125 + }, + { + "epoch": 0.84, + "learning_rate": 6.303941189258506e-07, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 10088, + "text_loss": 0.26953125 + }, + { + "epoch": 0.84, + "learning_rate": 6.29759698975983e-07, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 10089, + "text_loss": 0.59765625 + }, + { + "epoch": 0.84, + "learning_rate": 6.291255769665288e-07, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 10090, + "text_loss": 0.29296875 + }, + { + "epoch": 0.84, + "learning_rate": 6.284917529407153e-07, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 10091, + "text_loss": 0.427734375 + }, + { + "epoch": 0.84, + "learning_rate": 6.278582269417583e-07, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 10092, + "text_loss": 0.451171875 + }, + { + "epoch": 0.84, + "learning_rate": 6.272249990128443e-07, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 10093, + "text_loss": 0.3359375 + }, + { + "epoch": 0.84, + "learning_rate": 6.265920691971455e-07, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 10094, + "text_loss": 0.58984375 + }, + { + "epoch": 0.84, + "learning_rate": 6.259594375378114e-07, + "loss": 0.4246, + "regression_loss": 0.0, + "step": 10095, + "text_loss": 0.322265625 + }, + { + "epoch": 0.84, + "learning_rate": 6.253271040779724e-07, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 10096, + "text_loss": 0.54296875 + }, + { + "epoch": 0.84, + "learning_rate": 6.24695068860735e-07, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 10097, + "text_loss": 0.423828125 + }, + { + "epoch": 0.84, + "learning_rate": 6.240633319291922e-07, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 10098, + "text_loss": 0.484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.234318933264088e-07, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 10099, + "text_loss": 0.3203125 + }, + { + "epoch": 0.84, + "learning_rate": 6.22800753095435e-07, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 10100, + "text_loss": 0.34765625 + }, + { + "epoch": 0.84, + "learning_rate": 6.221699112792979e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 10101, + "text_loss": 0.453125 + }, + { + "epoch": 0.84, + "learning_rate": 6.215393679210068e-07, + "loss": 0.4518, + "regression_loss": 0.0, + "step": 10102, + "text_loss": 0.4921875 + }, + { + "epoch": 0.84, + "learning_rate": 6.20909123063545e-07, + "loss": 0.4321, + "regression_loss": 0.0, + "step": 10103, + "text_loss": 0.2109375 + }, + { + "epoch": 0.84, + "learning_rate": 6.202791767498845e-07, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 10104, + "text_loss": 0.515625 + }, + { + "epoch": 0.84, + "learning_rate": 6.196495290229676e-07, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 10105, + "text_loss": 0.4609375 + }, + { + "epoch": 0.84, + "learning_rate": 6.190201799257217e-07, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 10106, + "text_loss": 0.5859375 + }, + { + "epoch": 0.84, + "learning_rate": 6.183911295010531e-07, + "loss": 0.3876, + "regression_loss": 0.0, + "step": 10107, + "text_loss": 0.357421875 + }, + { + "epoch": 0.84, + "learning_rate": 6.177623777918468e-07, + "loss": 0.3804, + "regression_loss": 0.0, + "step": 10108, + "text_loss": 0.40625 + }, + { + "epoch": 0.84, + "learning_rate": 6.171339248409692e-07, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 10109, + "text_loss": 0.4921875 + }, + { + "epoch": 0.84, + "learning_rate": 6.165057706912614e-07, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 10110, + "text_loss": 0.5703125 + }, + { + "epoch": 0.84, + "learning_rate": 6.158779153855526e-07, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 10111, + "text_loss": 0.5078125 + }, + { + "epoch": 0.84, + "learning_rate": 6.152503589666426e-07, + "loss": 0.4653, + "regression_loss": 0.0, + "step": 10112, + "text_loss": 0.47265625 + }, + { + "epoch": 0.84, + "learning_rate": 6.146231014773169e-07, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 10113, + "text_loss": 0.5859375 + }, + { + "epoch": 0.84, + "learning_rate": 6.139961429603381e-07, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 10114, + "text_loss": 0.3046875 + }, + { + "epoch": 0.84, + "learning_rate": 6.133694834584503e-07, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 10115, + "text_loss": 0.56640625 + }, + { + "epoch": 0.84, + "learning_rate": 6.127431230143727e-07, + "loss": 0.5259, + "regression_loss": 0.0, + "step": 10116, + "text_loss": 0.52734375 + }, + { + "epoch": 0.84, + "learning_rate": 6.121170616708111e-07, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 10117, + "text_loss": 0.2578125 + }, + { + "epoch": 0.84, + "learning_rate": 6.114912994704447e-07, + "loss": 0.4519, + "regression_loss": 0.0, + "step": 10118, + "text_loss": 0.2734375 + }, + { + "epoch": 0.84, + "learning_rate": 6.108658364559356e-07, + "loss": 0.4242, + "regression_loss": 0.0, + "step": 10119, + "text_loss": 0.37109375 + }, + { + "epoch": 0.84, + "learning_rate": 6.102406726699244e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 10120, + "text_loss": 0.85546875 + }, + { + "epoch": 0.84, + "learning_rate": 6.09615808155033e-07, + "loss": 0.4904, + "regression_loss": 0.0, + "step": 10121, + "text_loss": 0.396484375 + }, + { + "epoch": 0.84, + "learning_rate": 6.089912429538575e-07, + "loss": 0.4426, + "regression_loss": 0.0, + "step": 10122, + "text_loss": 0.38671875 + }, + { + "epoch": 0.84, + "learning_rate": 6.083669771089829e-07, + "loss": 0.511, + "regression_loss": 0.0, + "step": 10123, + "text_loss": 0.3671875 + }, + { + "epoch": 0.84, + "learning_rate": 6.07743010662964e-07, + "loss": 0.5991, + "regression_loss": 0.0, + "step": 10124, + "text_loss": 0.64453125 + }, + { + "epoch": 0.84, + "learning_rate": 6.071193436583412e-07, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 10125, + "text_loss": 0.4765625 + }, + { + "epoch": 0.84, + "learning_rate": 6.064959761376333e-07, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 10126, + "text_loss": 0.72265625 + }, + { + "epoch": 0.84, + "learning_rate": 6.05872908143339e-07, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 10127, + "text_loss": 0.75 + }, + { + "epoch": 0.84, + "learning_rate": 6.052501397179333e-07, + "loss": 0.491, + "regression_loss": 0.0, + "step": 10128, + "text_loss": 0.373046875 + }, + { + "epoch": 0.84, + "learning_rate": 6.046276709038751e-07, + "loss": 0.4733, + "regression_loss": 0.0, + "step": 10129, + "text_loss": 0.384765625 + }, + { + "epoch": 0.84, + "learning_rate": 6.040055017436008e-07, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 10130, + "text_loss": 0.2734375 + }, + { + "epoch": 0.84, + "learning_rate": 6.033836322795266e-07, + "loss": 0.4009, + "regression_loss": 0.0, + "step": 10131, + "text_loss": 0.255859375 + }, + { + "epoch": 0.84, + "learning_rate": 6.027620625540487e-07, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 10132, + "text_loss": 0.40234375 + }, + { + "epoch": 0.84, + "learning_rate": 6.021407926095435e-07, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 10133, + "text_loss": 0.408203125 + }, + { + "epoch": 0.84, + "learning_rate": 6.015198224883634e-07, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 10134, + "text_loss": 0.4609375 + }, + { + "epoch": 0.84, + "learning_rate": 6.008991522328444e-07, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 10135, + "text_loss": 0.82421875 + }, + { + "epoch": 0.84, + "learning_rate": 6.002787818853001e-07, + "loss": 0.4539, + "regression_loss": 0.0, + "step": 10136, + "text_loss": 0.466796875 + }, + { + "epoch": 0.84, + "learning_rate": 5.996587114880248e-07, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 10137, + "text_loss": 0.52734375 + }, + { + "epoch": 0.84, + "learning_rate": 5.990389410832914e-07, + "loss": 0.4136, + "regression_loss": 0.0, + "step": 10138, + "text_loss": 0.416015625 + }, + { + "epoch": 0.84, + "learning_rate": 5.98419470713354e-07, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 10139, + "text_loss": 0.515625 + }, + { + "epoch": 0.84, + "learning_rate": 5.978003004204419e-07, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 10140, + "text_loss": 0.640625 + }, + { + "epoch": 0.84, + "learning_rate": 5.971814302467688e-07, + "loss": 0.457, + "regression_loss": 0.0, + "step": 10141, + "text_loss": 0.578125 + }, + { + "epoch": 0.84, + "learning_rate": 5.965628602345264e-07, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 10142, + "text_loss": 0.310546875 + }, + { + "epoch": 0.84, + "learning_rate": 5.959445904258854e-07, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 10143, + "text_loss": 0.53125 + }, + { + "epoch": 0.84, + "learning_rate": 5.953266208629943e-07, + "loss": 0.4236, + "regression_loss": 0.0, + "step": 10144, + "text_loss": 0.65234375 + }, + { + "epoch": 0.84, + "learning_rate": 5.947089515879867e-07, + "loss": 0.4072, + "regression_loss": 0.0, + "step": 10145, + "text_loss": 0.4296875 + }, + { + "epoch": 0.84, + "learning_rate": 5.940915826429689e-07, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 10146, + "text_loss": 0.455078125 + }, + { + "epoch": 0.84, + "learning_rate": 5.93474514070031e-07, + "loss": 0.4297, + "regression_loss": 0.0, + "step": 10147, + "text_loss": 0.259765625 + }, + { + "epoch": 0.84, + "learning_rate": 5.928577459112416e-07, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 10148, + "text_loss": 0.42578125 + }, + { + "epoch": 0.84, + "learning_rate": 5.922412782086501e-07, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 10149, + "text_loss": 0.470703125 + }, + { + "epoch": 0.84, + "learning_rate": 5.916251110042809e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 10150, + "text_loss": 0.6875 + }, + { + "epoch": 0.84, + "learning_rate": 5.910092443401444e-07, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 10151, + "text_loss": 0.326171875 + }, + { + "epoch": 0.84, + "learning_rate": 5.903936782582253e-07, + "loss": 0.481, + "regression_loss": 0.0, + "step": 10152, + "text_loss": 0.244140625 + }, + { + "epoch": 0.84, + "learning_rate": 5.897784128004902e-07, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 10153, + "text_loss": 0.416015625 + }, + { + "epoch": 0.84, + "learning_rate": 5.891634480088848e-07, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 10154, + "text_loss": 0.427734375 + }, + { + "epoch": 0.84, + "learning_rate": 5.88548783925334e-07, + "loss": 0.4515, + "regression_loss": 0.0, + "step": 10155, + "text_loss": 0.63671875 + }, + { + "epoch": 0.84, + "learning_rate": 5.879344205917426e-07, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 10156, + "text_loss": 0.4609375 + }, + { + "epoch": 0.84, + "learning_rate": 5.873203580499948e-07, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 10157, + "text_loss": 0.5390625 + }, + { + "epoch": 0.84, + "learning_rate": 5.867065963419555e-07, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 10158, + "text_loss": 0.55078125 + }, + { + "epoch": 0.84, + "learning_rate": 5.860931355094651e-07, + "loss": 0.4363, + "regression_loss": 0.0, + "step": 10159, + "text_loss": 0.44921875 + }, + { + "epoch": 0.84, + "learning_rate": 5.854799755943474e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10160, + "text_loss": 0.48828125 + }, + { + "epoch": 0.84, + "learning_rate": 5.848671166384045e-07, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 10161, + "text_loss": 0.486328125 + }, + { + "epoch": 0.84, + "learning_rate": 5.842545586834192e-07, + "loss": 0.4275, + "regression_loss": 0.0, + "step": 10162, + "text_loss": 0.328125 + }, + { + "epoch": 0.84, + "learning_rate": 5.836423017711495e-07, + "loss": 0.5864, + "regression_loss": 0.0, + "step": 10163, + "text_loss": 0.53125 + }, + { + "epoch": 0.84, + "learning_rate": 5.830303459433395e-07, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 10164, + "text_loss": 0.5390625 + }, + { + "epoch": 0.84, + "learning_rate": 5.824186912417068e-07, + "loss": 0.6384, + "regression_loss": 0.0, + "step": 10165, + "text_loss": 0.56640625 + }, + { + "epoch": 0.84, + "learning_rate": 5.81807337707952e-07, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 10166, + "text_loss": 0.291015625 + }, + { + "epoch": 0.84, + "learning_rate": 5.811962853837527e-07, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 10167, + "text_loss": 0.482421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.805855343107697e-07, + "loss": 0.444, + "regression_loss": 0.0, + "step": 10168, + "text_loss": 0.423828125 + }, + { + "epoch": 0.85, + "learning_rate": 5.799750845306373e-07, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 10169, + "text_loss": 0.3359375 + }, + { + "epoch": 0.85, + "learning_rate": 5.793649360849768e-07, + "loss": 0.4258, + "regression_loss": 0.0, + "step": 10170, + "text_loss": 0.59375 + }, + { + "epoch": 0.85, + "learning_rate": 5.787550890153815e-07, + "loss": 0.469, + "regression_loss": 0.0, + "step": 10171, + "text_loss": 0.45703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.781455433634298e-07, + "loss": 0.509, + "regression_loss": 0.0, + "step": 10172, + "text_loss": 0.53515625 + }, + { + "epoch": 0.85, + "learning_rate": 5.775362991706762e-07, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 10173, + "text_loss": 0.734375 + }, + { + "epoch": 0.85, + "learning_rate": 5.769273564786576e-07, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 10174, + "text_loss": 0.50390625 + }, + { + "epoch": 0.85, + "learning_rate": 5.763187153288858e-07, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 10175, + "text_loss": 0.46484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.757103757628573e-07, + "loss": 0.563, + "regression_loss": 0.0, + "step": 10176, + "text_loss": 0.341796875 + }, + { + "epoch": 0.85, + "learning_rate": 5.751023378220444e-07, + "loss": 0.3961, + "regression_loss": 0.0, + "step": 10177, + "text_loss": 0.2119140625 + }, + { + "epoch": 0.85, + "learning_rate": 5.744946015478997e-07, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 10178, + "text_loss": 0.39453125 + }, + { + "epoch": 0.85, + "learning_rate": 5.738871669818563e-07, + "loss": 0.4092, + "regression_loss": 0.0, + "step": 10179, + "text_loss": 0.5625 + }, + { + "epoch": 0.85, + "learning_rate": 5.732800341653261e-07, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 10180, + "text_loss": 0.73828125 + }, + { + "epoch": 0.85, + "learning_rate": 5.726732031396981e-07, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 10181, + "text_loss": 0.431640625 + }, + { + "epoch": 0.85, + "learning_rate": 5.720666739463465e-07, + "loss": 0.4045, + "regression_loss": 0.0, + "step": 10182, + "text_loss": 0.484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.714604466266188e-07, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 10183, + "text_loss": 0.26953125 + }, + { + "epoch": 0.85, + "learning_rate": 5.708545212218447e-07, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 10184, + "text_loss": 0.30859375 + }, + { + "epoch": 0.85, + "learning_rate": 5.702488977733333e-07, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 10185, + "text_loss": 0.408203125 + }, + { + "epoch": 0.85, + "learning_rate": 5.696435763223745e-07, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 10186, + "text_loss": 0.435546875 + }, + { + "epoch": 0.85, + "learning_rate": 5.69038556910233e-07, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 10187, + "text_loss": 0.380859375 + }, + { + "epoch": 0.85, + "learning_rate": 5.68433839578158e-07, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 10188, + "text_loss": 0.609375 + }, + { + "epoch": 0.85, + "learning_rate": 5.678294243673749e-07, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 10189, + "text_loss": 0.357421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.672253113190901e-07, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 10190, + "text_loss": 0.66015625 + }, + { + "epoch": 0.85, + "learning_rate": 5.666215004744896e-07, + "loss": 0.5282, + "regression_loss": 0.0, + "step": 10191, + "text_loss": 0.5546875 + }, + { + "epoch": 0.85, + "learning_rate": 5.660179918747377e-07, + "loss": 0.4722, + "regression_loss": 0.0, + "step": 10192, + "text_loss": 0.326171875 + }, + { + "epoch": 0.85, + "learning_rate": 5.654147855609776e-07, + "loss": 0.4277, + "regression_loss": 0.0, + "step": 10193, + "text_loss": 0.279296875 + }, + { + "epoch": 0.85, + "learning_rate": 5.648118815743331e-07, + "loss": 0.4351, + "regression_loss": 0.0, + "step": 10194, + "text_loss": 0.41015625 + }, + { + "epoch": 0.85, + "learning_rate": 5.642092799559079e-07, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 10195, + "text_loss": 0.69921875 + }, + { + "epoch": 0.85, + "learning_rate": 5.636069807467842e-07, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 10196, + "text_loss": 0.578125 + }, + { + "epoch": 0.85, + "learning_rate": 5.630049839880214e-07, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 10197, + "text_loss": 0.55078125 + }, + { + "epoch": 0.85, + "learning_rate": 5.624032897206649e-07, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 10198, + "text_loss": 0.4375 + }, + { + "epoch": 0.85, + "learning_rate": 5.61801897985731e-07, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 10199, + "text_loss": 0.482421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.612008088242199e-07, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 10200, + "text_loss": 0.396484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.606000222771141e-07, + "loss": 0.489, + "regression_loss": 0.0, + "step": 10201, + "text_loss": 0.48828125 + }, + { + "epoch": 0.85, + "learning_rate": 5.59999538385369e-07, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 10202, + "text_loss": 0.67578125 + }, + { + "epoch": 0.85, + "learning_rate": 5.593993571899236e-07, + "loss": 0.4342, + "regression_loss": 0.0, + "step": 10203, + "text_loss": 0.62109375 + }, + { + "epoch": 0.85, + "learning_rate": 5.587994787316948e-07, + "loss": 0.384, + "regression_loss": 0.0, + "step": 10204, + "text_loss": 0.44921875 + }, + { + "epoch": 0.85, + "learning_rate": 5.5819990305158e-07, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 10205, + "text_loss": 0.419921875 + }, + { + "epoch": 0.85, + "learning_rate": 5.57600630190453e-07, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 10206, + "text_loss": 0.546875 + }, + { + "epoch": 0.85, + "learning_rate": 5.570016601891725e-07, + "loss": 0.4829, + "regression_loss": 0.0, + "step": 10207, + "text_loss": 0.302734375 + }, + { + "epoch": 0.85, + "learning_rate": 5.5640299308857e-07, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 10208, + "text_loss": 0.58984375 + }, + { + "epoch": 0.85, + "learning_rate": 5.558046289294616e-07, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 10209, + "text_loss": 0.484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.552065677526397e-07, + "loss": 0.4357, + "regression_loss": 0.0, + "step": 10210, + "text_loss": 0.359375 + }, + { + "epoch": 0.85, + "learning_rate": 5.546088095988783e-07, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 10211, + "text_loss": 0.37890625 + }, + { + "epoch": 0.85, + "learning_rate": 5.54011354508927e-07, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 10212, + "text_loss": 0.55859375 + }, + { + "epoch": 0.85, + "learning_rate": 5.53414202523519e-07, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 10213, + "text_loss": 0.57421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.528173536833648e-07, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 10214, + "text_loss": 0.46484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.522208080291541e-07, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 10215, + "text_loss": 0.62109375 + }, + { + "epoch": 0.85, + "learning_rate": 5.516245656015568e-07, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 10216, + "text_loss": 0.470703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.510286264412224e-07, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 10217, + "text_loss": 0.2890625 + }, + { + "epoch": 0.85, + "learning_rate": 5.504329905887767e-07, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 10218, + "text_loss": 0.326171875 + }, + { + "epoch": 0.85, + "learning_rate": 5.498376580848286e-07, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 10219, + "text_loss": 0.703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.492426289699642e-07, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 10220, + "text_loss": 0.58984375 + }, + { + "epoch": 0.85, + "learning_rate": 5.486479032847513e-07, + "loss": 0.5803, + "regression_loss": 0.0, + "step": 10221, + "text_loss": 0.625 + }, + { + "epoch": 0.85, + "learning_rate": 5.480534810697314e-07, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 10222, + "text_loss": 0.294921875 + }, + { + "epoch": 0.85, + "learning_rate": 5.47459362365434e-07, + "loss": 0.381, + "regression_loss": 0.0, + "step": 10223, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.85, + "learning_rate": 5.468655472123591e-07, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 10224, + "text_loss": 0.51171875 + }, + { + "epoch": 0.85, + "learning_rate": 5.462720356509916e-07, + "loss": 0.4928, + "regression_loss": 0.0, + "step": 10225, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.85, + "learning_rate": 5.456788277217934e-07, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 10226, + "text_loss": 0.6015625 + }, + { + "epoch": 0.85, + "learning_rate": 5.450859234652078e-07, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 10227, + "text_loss": 0.3515625 + }, + { + "epoch": 0.85, + "learning_rate": 5.444933229216536e-07, + "loss": 0.4108, + "regression_loss": 0.0, + "step": 10228, + "text_loss": 0.23046875 + }, + { + "epoch": 0.85, + "learning_rate": 5.439010261315342e-07, + "loss": 0.4757, + "regression_loss": 0.0, + "step": 10229, + "text_loss": 0.390625 + }, + { + "epoch": 0.85, + "learning_rate": 5.433090331352265e-07, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 10230, + "text_loss": 0.330078125 + }, + { + "epoch": 0.85, + "learning_rate": 5.42717343973091e-07, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 10231, + "text_loss": 0.58203125 + }, + { + "epoch": 0.85, + "learning_rate": 5.421259586854661e-07, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 10232, + "text_loss": 0.53125 + }, + { + "epoch": 0.85, + "learning_rate": 5.415348773126694e-07, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 10233, + "text_loss": 0.353515625 + }, + { + "epoch": 0.85, + "learning_rate": 5.409440998949955e-07, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 10234, + "text_loss": 0.5 + }, + { + "epoch": 0.85, + "learning_rate": 5.403536264727249e-07, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 10235, + "text_loss": 0.640625 + }, + { + "epoch": 0.85, + "learning_rate": 5.397634570861094e-07, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 10236, + "text_loss": 0.494140625 + }, + { + "epoch": 0.85, + "learning_rate": 5.391735917753848e-07, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 10237, + "text_loss": 0.6875 + }, + { + "epoch": 0.85, + "learning_rate": 5.385840305807655e-07, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 10238, + "text_loss": 0.40234375 + }, + { + "epoch": 0.85, + "learning_rate": 5.379947735424452e-07, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 10239, + "text_loss": 0.6328125 + }, + { + "epoch": 0.85, + "learning_rate": 5.374058207005945e-07, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 10240, + "text_loss": 0.333984375 + }, + { + "epoch": 0.85, + "learning_rate": 5.368171720953663e-07, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 10241, + "text_loss": 0.53125 + }, + { + "epoch": 0.85, + "learning_rate": 5.36228827766892e-07, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 10242, + "text_loss": 0.54296875 + }, + { + "epoch": 0.85, + "learning_rate": 5.35640787755281e-07, + "loss": 0.4087, + "regression_loss": 0.0, + "step": 10243, + "text_loss": 0.380859375 + }, + { + "epoch": 0.85, + "learning_rate": 5.350530521006236e-07, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 10244, + "text_loss": 0.392578125 + }, + { + "epoch": 0.85, + "learning_rate": 5.344656208429894e-07, + "loss": 0.5522, + "regression_loss": 0.0, + "step": 10245, + "text_loss": 0.66015625 + }, + { + "epoch": 0.85, + "learning_rate": 5.338784940224239e-07, + "loss": 0.4659, + "regression_loss": 0.0, + "step": 10246, + "text_loss": 0.482421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.332916716789555e-07, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 10247, + "text_loss": 0.443359375 + }, + { + "epoch": 0.85, + "learning_rate": 5.327051538525924e-07, + "loss": 0.4679, + "regression_loss": 0.0, + "step": 10248, + "text_loss": 0.46484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.321189405833183e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10249, + "text_loss": 0.4921875 + }, + { + "epoch": 0.85, + "learning_rate": 5.315330319110984e-07, + "loss": 0.6558, + "regression_loss": 0.0, + "step": 10250, + "text_loss": 0.55078125 + }, + { + "epoch": 0.85, + "learning_rate": 5.309474278758781e-07, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 10251, + "text_loss": 0.55859375 + }, + { + "epoch": 0.85, + "learning_rate": 5.303621285175803e-07, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 10252, + "text_loss": 0.3203125 + }, + { + "epoch": 0.85, + "learning_rate": 5.297771338761059e-07, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 10253, + "text_loss": 0.345703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.29192443991341e-07, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 10254, + "text_loss": 0.58203125 + }, + { + "epoch": 0.85, + "learning_rate": 5.286080589031422e-07, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 10255, + "text_loss": 0.2890625 + }, + { + "epoch": 0.85, + "learning_rate": 5.280239786513525e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 10256, + "text_loss": 0.48046875 + }, + { + "epoch": 0.85, + "learning_rate": 5.274402032757908e-07, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 10257, + "text_loss": 0.412109375 + }, + { + "epoch": 0.85, + "learning_rate": 5.268567328162566e-07, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 10258, + "text_loss": 0.703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.262735673125258e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 10259, + "text_loss": 0.546875 + }, + { + "epoch": 0.85, + "learning_rate": 5.256907068043582e-07, + "loss": 0.5388, + "regression_loss": 0.0, + "step": 10260, + "text_loss": 0.486328125 + }, + { + "epoch": 0.85, + "learning_rate": 5.251081513314888e-07, + "loss": 0.5065, + "regression_loss": 0.0, + "step": 10261, + "text_loss": 0.62890625 + }, + { + "epoch": 0.85, + "learning_rate": 5.245259009336329e-07, + "loss": 0.528, + "regression_loss": 0.0, + "step": 10262, + "text_loss": 0.6328125 + }, + { + "epoch": 0.85, + "learning_rate": 5.23943955650486e-07, + "loss": 0.4083, + "regression_loss": 0.0, + "step": 10263, + "text_loss": 0.34765625 + }, + { + "epoch": 0.85, + "learning_rate": 5.233623155217232e-07, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 10264, + "text_loss": 0.375 + }, + { + "epoch": 0.85, + "learning_rate": 5.227809805869955e-07, + "loss": 0.4163, + "regression_loss": 0.0, + "step": 10265, + "text_loss": 0.3203125 + }, + { + "epoch": 0.85, + "learning_rate": 5.221999508859365e-07, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 10266, + "text_loss": 0.326171875 + }, + { + "epoch": 0.85, + "learning_rate": 5.216192264581577e-07, + "loss": 0.574, + "regression_loss": 0.0, + "step": 10267, + "text_loss": 0.78515625 + }, + { + "epoch": 0.85, + "learning_rate": 5.210388073432493e-07, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 10268, + "text_loss": 0.50390625 + }, + { + "epoch": 0.85, + "learning_rate": 5.20458693580782e-07, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 10269, + "text_loss": 0.65234375 + }, + { + "epoch": 0.85, + "learning_rate": 5.198788852103059e-07, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 10270, + "text_loss": 0.314453125 + }, + { + "epoch": 0.85, + "learning_rate": 5.192993822713477e-07, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 10271, + "text_loss": 0.482421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.187201848034146e-07, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 10272, + "text_loss": 0.310546875 + }, + { + "epoch": 0.85, + "learning_rate": 5.18141292845995e-07, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 10273, + "text_loss": 0.357421875 + }, + { + "epoch": 0.85, + "learning_rate": 5.175627064385541e-07, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 10274, + "text_loss": 0.625 + }, + { + "epoch": 0.85, + "learning_rate": 5.169844256205353e-07, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 10275, + "text_loss": 0.3828125 + }, + { + "epoch": 0.85, + "learning_rate": 5.16406450431366e-07, + "loss": 0.4148, + "regression_loss": 0.0, + "step": 10276, + "text_loss": 0.44140625 + }, + { + "epoch": 0.85, + "learning_rate": 5.158287809104467e-07, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 10277, + "text_loss": 0.53125 + }, + { + "epoch": 0.85, + "learning_rate": 5.152514170971606e-07, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 10278, + "text_loss": 0.388671875 + }, + { + "epoch": 0.85, + "learning_rate": 5.146743590308706e-07, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 10279, + "text_loss": 0.59765625 + }, + { + "epoch": 0.85, + "learning_rate": 5.140976067509173e-07, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 10280, + "text_loss": 0.5703125 + }, + { + "epoch": 0.85, + "learning_rate": 5.135211602966184e-07, + "loss": 0.4672, + "regression_loss": 0.0, + "step": 10281, + "text_loss": 0.474609375 + }, + { + "epoch": 0.85, + "learning_rate": 5.129450197072766e-07, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 10282, + "text_loss": 0.46484375 + }, + { + "epoch": 0.85, + "learning_rate": 5.123691850221674e-07, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 10283, + "text_loss": 0.25390625 + }, + { + "epoch": 0.85, + "learning_rate": 5.117936562805493e-07, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 10284, + "text_loss": 0.61328125 + }, + { + "epoch": 0.85, + "learning_rate": 5.112184335216586e-07, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 10285, + "text_loss": 0.5078125 + }, + { + "epoch": 0.85, + "learning_rate": 5.106435167847118e-07, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 10286, + "text_loss": 0.447265625 + }, + { + "epoch": 0.85, + "learning_rate": 5.100689061089015e-07, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 10287, + "text_loss": 0.4609375 + }, + { + "epoch": 0.86, + "learning_rate": 5.094946015334057e-07, + "loss": 0.4539, + "regression_loss": 0.0, + "step": 10288, + "text_loss": 0.34375 + }, + { + "epoch": 0.86, + "learning_rate": 5.089206030973736e-07, + "loss": 0.5361, + "regression_loss": 0.0, + "step": 10289, + "text_loss": 0.58203125 + }, + { + "epoch": 0.86, + "learning_rate": 5.083469108399391e-07, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 10290, + "text_loss": 0.703125 + }, + { + "epoch": 0.86, + "learning_rate": 5.077735248002135e-07, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 10291, + "text_loss": 0.734375 + }, + { + "epoch": 0.86, + "learning_rate": 5.072004450172885e-07, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 10292, + "text_loss": 0.5625 + }, + { + "epoch": 0.86, + "learning_rate": 5.066276715302304e-07, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 10293, + "text_loss": 0.6328125 + }, + { + "epoch": 0.86, + "learning_rate": 5.060552043780914e-07, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 10294, + "text_loss": 0.306640625 + }, + { + "epoch": 0.86, + "learning_rate": 5.05483043599898e-07, + "loss": 0.3906, + "regression_loss": 0.0, + "step": 10295, + "text_loss": 0.4140625 + }, + { + "epoch": 0.86, + "learning_rate": 5.04911189234657e-07, + "loss": 0.4596, + "regression_loss": 0.0, + "step": 10296, + "text_loss": 0.498046875 + }, + { + "epoch": 0.86, + "learning_rate": 5.043396413213547e-07, + "loss": 0.4746, + "regression_loss": 0.0, + "step": 10297, + "text_loss": 0.201171875 + }, + { + "epoch": 0.86, + "learning_rate": 5.037683998989556e-07, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 10298, + "text_loss": 0.486328125 + }, + { + "epoch": 0.86, + "learning_rate": 5.031974650064059e-07, + "loss": 0.5225, + "regression_loss": 0.0, + "step": 10299, + "text_loss": 0.265625 + }, + { + "epoch": 0.86, + "learning_rate": 5.026268366826259e-07, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 10300, + "text_loss": 0.28515625 + }, + { + "epoch": 0.86, + "learning_rate": 5.020565149665213e-07, + "loss": 0.4172, + "regression_loss": 0.0, + "step": 10301, + "text_loss": 0.5546875 + }, + { + "epoch": 0.86, + "learning_rate": 5.014864998969715e-07, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 10302, + "text_loss": 0.69140625 + }, + { + "epoch": 0.86, + "learning_rate": 5.009167915128382e-07, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 10303, + "text_loss": 0.2265625 + }, + { + "epoch": 0.86, + "learning_rate": 5.00347389852961e-07, + "loss": 0.4537, + "regression_loss": 0.0, + "step": 10304, + "text_loss": 0.59375 + }, + { + "epoch": 0.86, + "learning_rate": 4.997782949561597e-07, + "loss": 0.4369, + "regression_loss": 0.0, + "step": 10305, + "text_loss": 0.498046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.99209506861229e-07, + "loss": 0.3658, + "regression_loss": 0.0, + "step": 10306, + "text_loss": 0.1962890625 + }, + { + "epoch": 0.86, + "learning_rate": 4.986410256069502e-07, + "loss": 0.4413, + "regression_loss": 0.0, + "step": 10307, + "text_loss": 0.515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.980728512320765e-07, + "loss": 0.4539, + "regression_loss": 0.0, + "step": 10308, + "text_loss": 0.61328125 + }, + { + "epoch": 0.86, + "learning_rate": 4.975049837753438e-07, + "loss": 0.423, + "regression_loss": 0.0, + "step": 10309, + "text_loss": 0.478515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.969374232754664e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 10310, + "text_loss": 0.3046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.963701697711392e-07, + "loss": 0.564, + "regression_loss": 0.0, + "step": 10311, + "text_loss": 0.47265625 + }, + { + "epoch": 0.86, + "learning_rate": 4.958032233010313e-07, + "loss": 0.5903, + "regression_loss": 0.0, + "step": 10312, + "text_loss": 0.50390625 + }, + { + "epoch": 0.86, + "learning_rate": 4.952365839037981e-07, + "loss": 0.3945, + "regression_loss": 0.0, + "step": 10313, + "text_loss": 0.345703125 + }, + { + "epoch": 0.86, + "learning_rate": 4.946702516180668e-07, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 10314, + "text_loss": 0.6953125 + }, + { + "epoch": 0.86, + "learning_rate": 4.941042264824486e-07, + "loss": 0.5425, + "regression_loss": 0.0, + "step": 10315, + "text_loss": 0.625 + }, + { + "epoch": 0.86, + "learning_rate": 4.935385085355321e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 10316, + "text_loss": 0.5625 + }, + { + "epoch": 0.86, + "learning_rate": 4.929730978158853e-07, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 10317, + "text_loss": 0.61328125 + }, + { + "epoch": 0.86, + "learning_rate": 4.92407994362053e-07, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 10318, + "text_loss": 0.39453125 + }, + { + "epoch": 0.86, + "learning_rate": 4.918431982125643e-07, + "loss": 0.46, + "regression_loss": 0.0, + "step": 10319, + "text_loss": 0.373046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.912787094059218e-07, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 10320, + "text_loss": 0.69140625 + }, + { + "epoch": 0.86, + "learning_rate": 4.907145279806097e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 10321, + "text_loss": 0.5078125 + }, + { + "epoch": 0.86, + "learning_rate": 4.901506539750911e-07, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 10322, + "text_loss": 0.640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.895870874278091e-07, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 10323, + "text_loss": 0.6015625 + }, + { + "epoch": 0.86, + "learning_rate": 4.890238283771831e-07, + "loss": 0.469, + "regression_loss": 0.0, + "step": 10324, + "text_loss": 0.4140625 + }, + { + "epoch": 0.86, + "learning_rate": 4.884608768616139e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 10325, + "text_loss": 0.369140625 + }, + { + "epoch": 0.86, + "learning_rate": 4.878982329194809e-07, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 10326, + "text_loss": 0.58203125 + }, + { + "epoch": 0.86, + "learning_rate": 4.873358965891418e-07, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 10327, + "text_loss": 0.353515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.86773867908934e-07, + "loss": 0.4325, + "regression_loss": 0.0, + "step": 10328, + "text_loss": 0.37890625 + }, + { + "epoch": 0.86, + "learning_rate": 4.862121469171749e-07, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 10329, + "text_loss": 0.8515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.856507336521571e-07, + "loss": 0.4015, + "regression_loss": 0.0, + "step": 10330, + "text_loss": 0.455078125 + }, + { + "epoch": 0.86, + "learning_rate": 4.850896281521566e-07, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 10331, + "text_loss": 0.3515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.845288304554269e-07, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 10332, + "text_loss": 0.50390625 + }, + { + "epoch": 0.86, + "learning_rate": 4.839683406001999e-07, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 10333, + "text_loss": 0.498046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.834081586246853e-07, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 10334, + "text_loss": 0.384765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.828482845670768e-07, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 10335, + "text_loss": 0.484375 + }, + { + "epoch": 0.86, + "learning_rate": 4.822887184655406e-07, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 10336, + "text_loss": 0.27734375 + }, + { + "epoch": 0.86, + "learning_rate": 4.817294603582262e-07, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 10337, + "text_loss": 0.6875 + }, + { + "epoch": 0.86, + "learning_rate": 4.811705102832615e-07, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 10338, + "text_loss": 0.423828125 + }, + { + "epoch": 0.86, + "learning_rate": 4.806118682787531e-07, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 10339, + "text_loss": 0.69921875 + }, + { + "epoch": 0.86, + "learning_rate": 4.800535343827834e-07, + "loss": 0.4192, + "regression_loss": 0.0, + "step": 10340, + "text_loss": 0.56640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.794955086334196e-07, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 10341, + "text_loss": 0.5859375 + }, + { + "epoch": 0.86, + "learning_rate": 4.789377910687054e-07, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 10342, + "text_loss": 0.73046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.783803817266613e-07, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 10343, + "text_loss": 0.6640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.778232806452892e-07, + "loss": 0.4331, + "regression_loss": 0.0, + "step": 10344, + "text_loss": 0.4453125 + }, + { + "epoch": 0.86, + "learning_rate": 4.772664878625694e-07, + "loss": 0.468, + "regression_loss": 0.0, + "step": 10345, + "text_loss": 0.3046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.767100034164607e-07, + "loss": 0.4496, + "regression_loss": 0.0, + "step": 10346, + "text_loss": 0.4453125 + }, + { + "epoch": 0.86, + "learning_rate": 4.7615382734490234e-07, + "loss": 0.4224, + "regression_loss": 0.0, + "step": 10347, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.86, + "learning_rate": 4.7559795968581177e-07, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 10348, + "text_loss": 0.4453125 + }, + { + "epoch": 0.86, + "learning_rate": 4.750424004770832e-07, + "loss": 0.516, + "regression_loss": 0.0, + "step": 10349, + "text_loss": 0.5859375 + }, + { + "epoch": 0.86, + "learning_rate": 4.744871497565934e-07, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 10350, + "text_loss": 0.353515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.7393220756219606e-07, + "loss": 0.426, + "regression_loss": 0.0, + "step": 10351, + "text_loss": 0.6875 + }, + { + "epoch": 0.86, + "learning_rate": 4.7337757393172423e-07, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 10352, + "text_loss": 0.59765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.728232489029905e-07, + "loss": 0.3748, + "regression_loss": 0.0, + "step": 10353, + "text_loss": 0.427734375 + }, + { + "epoch": 0.86, + "learning_rate": 4.722692325137856e-07, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 10354, + "text_loss": 0.43359375 + }, + { + "epoch": 0.86, + "learning_rate": 4.71715524801879e-07, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 10355, + "text_loss": 0.416015625 + }, + { + "epoch": 0.86, + "learning_rate": 4.7116212580501977e-07, + "loss": 0.5, + "regression_loss": 0.0, + "step": 10356, + "text_loss": 0.66796875 + }, + { + "epoch": 0.86, + "learning_rate": 4.706090355609361e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 10357, + "text_loss": 0.77734375 + }, + { + "epoch": 0.86, + "learning_rate": 4.7005625410733504e-07, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 10358, + "text_loss": 0.73046875 + }, + { + "epoch": 0.86, + "learning_rate": 4.695037814819009e-07, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 10359, + "text_loss": 0.59765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.689516177223014e-07, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 10360, + "text_loss": 0.703125 + }, + { + "epoch": 0.86, + "learning_rate": 4.683997628661774e-07, + "loss": 0.437, + "regression_loss": 0.0, + "step": 10361, + "text_loss": 0.5234375 + }, + { + "epoch": 0.86, + "learning_rate": 4.6784821695115223e-07, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 10362, + "text_loss": 0.5703125 + }, + { + "epoch": 0.86, + "learning_rate": 4.6729698001482807e-07, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 10363, + "text_loss": 0.62890625 + }, + { + "epoch": 0.86, + "learning_rate": 4.667460520947853e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 10364, + "text_loss": 0.62109375 + }, + { + "epoch": 0.86, + "learning_rate": 4.661954332285817e-07, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 10365, + "text_loss": 0.40625 + }, + { + "epoch": 0.86, + "learning_rate": 4.65645123453759e-07, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 10366, + "text_loss": 0.6484375 + }, + { + "epoch": 0.86, + "learning_rate": 4.650951228078315e-07, + "loss": 0.4048, + "regression_loss": 0.0, + "step": 10367, + "text_loss": 0.31640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.6454543132829653e-07, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 10368, + "text_loss": 0.52734375 + }, + { + "epoch": 0.86, + "learning_rate": 4.6399604905262906e-07, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 10369, + "text_loss": 0.470703125 + }, + { + "epoch": 0.86, + "learning_rate": 4.634469760182836e-07, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 10370, + "text_loss": 0.466796875 + }, + { + "epoch": 0.86, + "learning_rate": 4.6289821226269125e-07, + "loss": 0.5796, + "regression_loss": 0.0, + "step": 10371, + "text_loss": 0.419921875 + }, + { + "epoch": 0.86, + "learning_rate": 4.623497578232672e-07, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 10372, + "text_loss": 0.3203125 + }, + { + "epoch": 0.86, + "learning_rate": 4.618016127373992e-07, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 10373, + "text_loss": 0.515625 + }, + { + "epoch": 0.86, + "learning_rate": 4.6125377704245797e-07, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 10374, + "text_loss": 0.455078125 + }, + { + "epoch": 0.86, + "learning_rate": 4.6070625077579256e-07, + "loss": 0.4207, + "regression_loss": 0.0, + "step": 10375, + "text_loss": 0.59765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.6015903397473084e-07, + "loss": 0.448, + "regression_loss": 0.0, + "step": 10376, + "text_loss": 0.431640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.5961212667657795e-07, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 10377, + "text_loss": 0.5390625 + }, + { + "epoch": 0.86, + "learning_rate": 4.5906552891861964e-07, + "loss": 0.4697, + "regression_loss": 0.0, + "step": 10378, + "text_loss": 0.6640625 + }, + { + "epoch": 0.86, + "learning_rate": 4.5851924073812e-07, + "loss": 0.5389, + "regression_loss": 0.0, + "step": 10379, + "text_loss": 0.59375 + }, + { + "epoch": 0.86, + "learning_rate": 4.5797326217232265e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 10380, + "text_loss": 0.291015625 + }, + { + "epoch": 0.86, + "learning_rate": 4.5742759325844933e-07, + "loss": 0.4777, + "regression_loss": 0.0, + "step": 10381, + "text_loss": 0.58984375 + }, + { + "epoch": 0.86, + "learning_rate": 4.56882234033702e-07, + "loss": 0.5306, + "regression_loss": 0.0, + "step": 10382, + "text_loss": 0.65625 + }, + { + "epoch": 0.86, + "learning_rate": 4.563371845352588e-07, + "loss": 0.4552, + "regression_loss": 0.0, + "step": 10383, + "text_loss": 0.435546875 + }, + { + "epoch": 0.86, + "learning_rate": 4.5579244480027875e-07, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 10384, + "text_loss": 0.5703125 + }, + { + "epoch": 0.86, + "learning_rate": 4.5524801486589944e-07, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 10385, + "text_loss": 0.326171875 + }, + { + "epoch": 0.86, + "learning_rate": 4.5470389476923726e-07, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 10386, + "text_loss": 0.51171875 + }, + { + "epoch": 0.86, + "learning_rate": 4.5416008454738813e-07, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 10387, + "text_loss": 0.376953125 + }, + { + "epoch": 0.86, + "learning_rate": 4.536165842374257e-07, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 10388, + "text_loss": 0.59765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.5307339387640423e-07, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 10389, + "text_loss": 0.376953125 + }, + { + "epoch": 0.86, + "learning_rate": 4.5253051350135245e-07, + "loss": 0.4159, + "regression_loss": 0.0, + "step": 10390, + "text_loss": 0.5 + }, + { + "epoch": 0.86, + "learning_rate": 4.519879431492852e-07, + "loss": 0.45, + "regression_loss": 0.0, + "step": 10391, + "text_loss": 0.310546875 + }, + { + "epoch": 0.86, + "learning_rate": 4.514456828571889e-07, + "loss": 0.4672, + "regression_loss": 0.0, + "step": 10392, + "text_loss": 0.6796875 + }, + { + "epoch": 0.86, + "learning_rate": 4.5090373266203357e-07, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 10393, + "text_loss": 0.259765625 + }, + { + "epoch": 0.86, + "learning_rate": 4.503620926007668e-07, + "loss": 0.5396, + "regression_loss": 0.0, + "step": 10394, + "text_loss": 0.6484375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4982076271031463e-07, + "loss": 0.5547, + "regression_loss": 0.0, + "step": 10395, + "text_loss": 0.375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4927974302758027e-07, + "loss": 0.4843, + "regression_loss": 0.0, + "step": 10396, + "text_loss": 0.412109375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4873903358945094e-07, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 10397, + "text_loss": 0.6328125 + }, + { + "epoch": 0.86, + "learning_rate": 4.4819863443278653e-07, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 10398, + "text_loss": 0.6953125 + }, + { + "epoch": 0.86, + "learning_rate": 4.4765854559443045e-07, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 10399, + "text_loss": 0.240234375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4711876711120206e-07, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 10400, + "text_loss": 0.2734375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4657929901990196e-07, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 10401, + "text_loss": 0.6015625 + }, + { + "epoch": 0.86, + "learning_rate": 4.460401413573068e-07, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 10402, + "text_loss": 0.546875 + }, + { + "epoch": 0.86, + "learning_rate": 4.455012941601744e-07, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 10403, + "text_loss": 0.443359375 + }, + { + "epoch": 0.86, + "learning_rate": 4.4496275746524045e-07, + "loss": 0.5671, + "regression_loss": 0.0, + "step": 10404, + "text_loss": 0.423828125 + }, + { + "epoch": 0.86, + "learning_rate": 4.4442453130921936e-07, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 10405, + "text_loss": 0.78125 + }, + { + "epoch": 0.86, + "learning_rate": 4.4388661572880466e-07, + "loss": 0.49, + "regression_loss": 0.0, + "step": 10406, + "text_loss": 0.578125 + }, + { + "epoch": 0.86, + "learning_rate": 4.433490107606697e-07, + "loss": 0.498, + "regression_loss": 0.0, + "step": 10407, + "text_loss": 0.388671875 + }, + { + "epoch": 0.87, + "learning_rate": 4.4281171644146416e-07, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 10408, + "text_loss": 0.65625 + }, + { + "epoch": 0.87, + "learning_rate": 4.422747328078186e-07, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 10409, + "text_loss": 0.5625 + }, + { + "epoch": 0.87, + "learning_rate": 4.417380598963417e-07, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 10410, + "text_loss": 0.3828125 + }, + { + "epoch": 0.87, + "learning_rate": 4.4120169774362173e-07, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 10411, + "text_loss": 0.349609375 + }, + { + "epoch": 0.87, + "learning_rate": 4.40665646386223e-07, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 10412, + "text_loss": 0.47265625 + }, + { + "epoch": 0.87, + "learning_rate": 4.4012990586069393e-07, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 10413, + "text_loss": 0.52734375 + }, + { + "epoch": 0.87, + "learning_rate": 4.395944762035559e-07, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 10414, + "text_loss": 0.357421875 + }, + { + "epoch": 0.87, + "learning_rate": 4.3905935745131245e-07, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 10415, + "text_loss": 0.78515625 + }, + { + "epoch": 0.87, + "learning_rate": 4.3852454964044555e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 10416, + "text_loss": 0.48828125 + }, + { + "epoch": 0.87, + "learning_rate": 4.3799005280741605e-07, + "loss": 0.562, + "regression_loss": 0.0, + "step": 10417, + "text_loss": 0.6484375 + }, + { + "epoch": 0.87, + "learning_rate": 4.374558669886614e-07, + "loss": 0.4185, + "regression_loss": 0.0, + "step": 10418, + "text_loss": 0.431640625 + }, + { + "epoch": 0.87, + "learning_rate": 4.369219922206025e-07, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 10419, + "text_loss": 0.48828125 + }, + { + "epoch": 0.87, + "learning_rate": 4.36388428539633e-07, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 10420, + "text_loss": 0.42578125 + }, + { + "epoch": 0.87, + "learning_rate": 4.358551759821306e-07, + "loss": 0.4404, + "regression_loss": 0.0, + "step": 10421, + "text_loss": 0.57421875 + }, + { + "epoch": 0.87, + "learning_rate": 4.353222345844493e-07, + "loss": 0.4133, + "regression_loss": 0.0, + "step": 10422, + "text_loss": 0.4375 + }, + { + "epoch": 0.87, + "learning_rate": 4.3478960438292304e-07, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 10423, + "text_loss": 0.3671875 + }, + { + "epoch": 0.87, + "learning_rate": 4.342572854138605e-07, + "loss": 0.4587, + "regression_loss": 0.0, + "step": 10424, + "text_loss": 0.50390625 + }, + { + "epoch": 0.87, + "learning_rate": 4.33725277713557e-07, + "loss": 0.494, + "regression_loss": 0.0, + "step": 10425, + "text_loss": 0.48046875 + }, + { + "epoch": 0.87, + "learning_rate": 4.3319358131827914e-07, + "loss": 0.6133, + "regression_loss": 0.0, + "step": 10426, + "text_loss": 0.55078125 + }, + { + "epoch": 0.87, + "learning_rate": 4.3266219626427574e-07, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 10427, + "text_loss": 0.73828125 + }, + { + "epoch": 0.87, + "learning_rate": 4.3213112258777447e-07, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 10428, + "text_loss": 0.265625 + }, + { + "epoch": 0.87, + "learning_rate": 4.316003603249819e-07, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 10429, + "text_loss": 0.6171875 + }, + { + "epoch": 0.87, + "learning_rate": 4.310699095120796e-07, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 10430, + "text_loss": 0.40234375 + }, + { + "epoch": 0.87, + "learning_rate": 4.305397701852343e-07, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 10431, + "text_loss": 0.474609375 + }, + { + "epoch": 0.87, + "learning_rate": 4.300099423805865e-07, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 10432, + "text_loss": 0.65625 + }, + { + "epoch": 0.87, + "learning_rate": 4.294804261342572e-07, + "loss": 0.5231, + "regression_loss": 0.0, + "step": 10433, + "text_loss": 0.5546875 + }, + { + "epoch": 0.87, + "learning_rate": 4.289512214823466e-07, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 10434, + "text_loss": 0.58203125 + }, + { + "epoch": 0.87, + "learning_rate": 4.284223284609329e-07, + "loss": 0.4414, + "regression_loss": 0.0, + "step": 10435, + "text_loss": 0.259765625 + }, + { + "epoch": 0.87, + "learning_rate": 4.2789374710607456e-07, + "loss": 0.4812, + "regression_loss": 0.0, + "step": 10436, + "text_loss": 0.474609375 + }, + { + "epoch": 0.87, + "learning_rate": 4.273654774538039e-07, + "loss": 0.4684, + "regression_loss": 0.0, + "step": 10437, + "text_loss": 0.5078125 + }, + { + "epoch": 0.87, + "learning_rate": 4.2683751954013976e-07, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 10438, + "text_loss": 0.6015625 + }, + { + "epoch": 0.87, + "learning_rate": 4.2630987340107346e-07, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 10439, + "text_loss": 0.4453125 + }, + { + "epoch": 0.87, + "learning_rate": 4.2578253907257674e-07, + "loss": 0.4894, + "regression_loss": 0.0, + "step": 10440, + "text_loss": 0.515625 + }, + { + "epoch": 0.87, + "learning_rate": 4.2525551659060195e-07, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 10441, + "text_loss": 0.478515625 + }, + { + "epoch": 0.87, + "learning_rate": 4.247288059910787e-07, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 10442, + "text_loss": 0.38671875 + }, + { + "epoch": 0.87, + "learning_rate": 4.2420240730991326e-07, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 10443, + "text_loss": 0.203125 + }, + { + "epoch": 0.87, + "learning_rate": 4.2367632058299635e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 10444, + "text_loss": 0.455078125 + }, + { + "epoch": 0.87, + "learning_rate": 4.231505458461904e-07, + "loss": 0.5891, + "regression_loss": 0.0, + "step": 10445, + "text_loss": 0.404296875 + }, + { + "epoch": 0.87, + "learning_rate": 4.226250831353418e-07, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 10446, + "text_loss": 0.396484375 + }, + { + "epoch": 0.87, + "learning_rate": 4.220999324862735e-07, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 10447, + "text_loss": 0.5234375 + }, + { + "epoch": 0.87, + "learning_rate": 4.2157509393478845e-07, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 10448, + "text_loss": 0.5546875 + }, + { + "epoch": 0.87, + "learning_rate": 4.2105056751666487e-07, + "loss": 0.4626, + "regression_loss": 0.0, + "step": 10449, + "text_loss": 0.46875 + }, + { + "epoch": 0.87, + "learning_rate": 4.2052635326766567e-07, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 10450, + "text_loss": 0.71875 + }, + { + "epoch": 0.87, + "learning_rate": 4.200024512235262e-07, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 10451, + "text_loss": 0.3984375 + }, + { + "epoch": 0.87, + "learning_rate": 4.194788614199652e-07, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 10452, + "text_loss": 0.427734375 + }, + { + "epoch": 0.87, + "learning_rate": 4.1895558389267733e-07, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 10453, + "text_loss": 0.34375 + }, + { + "epoch": 0.87, + "learning_rate": 4.184326186773385e-07, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 10454, + "text_loss": 0.451171875 + }, + { + "epoch": 0.87, + "learning_rate": 4.179099658095992e-07, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 10455, + "text_loss": 0.64453125 + }, + { + "epoch": 0.87, + "learning_rate": 4.17387625325093e-07, + "loss": 0.512, + "regression_loss": 0.0, + "step": 10456, + "text_loss": 0.53125 + }, + { + "epoch": 0.87, + "learning_rate": 4.168655972594299e-07, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 10457, + "text_loss": 0.40234375 + }, + { + "epoch": 0.87, + "learning_rate": 4.1634388164819903e-07, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 10458, + "text_loss": 0.55859375 + }, + { + "epoch": 0.87, + "learning_rate": 4.158224785269682e-07, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 10459, + "text_loss": 0.5859375 + }, + { + "epoch": 0.87, + "learning_rate": 4.153013879312856e-07, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 10460, + "text_loss": 0.5859375 + }, + { + "epoch": 0.87, + "learning_rate": 4.147806098966739e-07, + "loss": 0.4404, + "regression_loss": 0.0, + "step": 10461, + "text_loss": 0.494140625 + }, + { + "epoch": 0.87, + "learning_rate": 4.1426014445863806e-07, + "loss": 0.4905, + "regression_loss": 0.0, + "step": 10462, + "text_loss": 0.486328125 + }, + { + "epoch": 0.87, + "learning_rate": 4.1373999165266077e-07, + "loss": 0.5203, + "regression_loss": 0.0, + "step": 10463, + "text_loss": 0.453125 + }, + { + "epoch": 0.87, + "learning_rate": 4.132201515142037e-07, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 10464, + "text_loss": 0.5234375 + }, + { + "epoch": 0.87, + "learning_rate": 4.127006240787068e-07, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 10465, + "text_loss": 0.357421875 + }, + { + "epoch": 0.87, + "learning_rate": 4.1218140938158904e-07, + "loss": 0.4167, + "regression_loss": 0.0, + "step": 10466, + "text_loss": 0.51171875 + }, + { + "epoch": 0.87, + "learning_rate": 4.11662507458247e-07, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 10467, + "text_loss": 0.291015625 + }, + { + "epoch": 0.87, + "learning_rate": 4.111439183440569e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 10468, + "text_loss": 0.46875 + }, + { + "epoch": 0.87, + "learning_rate": 4.106256420743732e-07, + "loss": 0.52, + "regression_loss": 0.0, + "step": 10469, + "text_loss": 0.78515625 + }, + { + "epoch": 0.87, + "learning_rate": 4.101076786845315e-07, + "loss": 0.4336, + "regression_loss": 0.0, + "step": 10470, + "text_loss": 0.53515625 + }, + { + "epoch": 0.87, + "learning_rate": 4.095900282098397e-07, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 10471, + "text_loss": 0.361328125 + }, + { + "epoch": 0.87, + "learning_rate": 4.09072690685593e-07, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 10472, + "text_loss": 0.58203125 + }, + { + "epoch": 0.87, + "learning_rate": 4.085556661470574e-07, + "loss": 0.472, + "regression_loss": 0.0, + "step": 10473, + "text_loss": 0.56640625 + }, + { + "epoch": 0.87, + "learning_rate": 4.080389546294833e-07, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 10474, + "text_loss": 0.51953125 + }, + { + "epoch": 0.87, + "learning_rate": 4.0752255616809564e-07, + "loss": 0.4653, + "regression_loss": 0.0, + "step": 10475, + "text_loss": 0.4765625 + }, + { + "epoch": 0.87, + "learning_rate": 4.0700647079810197e-07, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 10476, + "text_loss": 0.609375 + }, + { + "epoch": 0.87, + "learning_rate": 4.0649069855468294e-07, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 10477, + "text_loss": 0.6640625 + }, + { + "epoch": 0.87, + "learning_rate": 4.059752394730049e-07, + "loss": 0.436, + "regression_loss": 0.0, + "step": 10478, + "text_loss": 0.357421875 + }, + { + "epoch": 0.87, + "learning_rate": 4.0546009358820694e-07, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 10479, + "text_loss": 0.5390625 + }, + { + "epoch": 0.87, + "learning_rate": 4.049452609354093e-07, + "loss": 0.4886, + "regression_loss": 0.0, + "step": 10480, + "text_loss": 0.263671875 + }, + { + "epoch": 0.87, + "learning_rate": 4.0443074154971116e-07, + "loss": 0.5435, + "regression_loss": 0.0, + "step": 10481, + "text_loss": 0.58203125 + }, + { + "epoch": 0.87, + "learning_rate": 4.0391653546619e-07, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 10482, + "text_loss": 0.73046875 + }, + { + "epoch": 0.87, + "learning_rate": 4.0340264271990116e-07, + "loss": 0.4216, + "regression_loss": 0.0, + "step": 10483, + "text_loss": 0.421875 + }, + { + "epoch": 0.87, + "learning_rate": 4.028890633458793e-07, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 10484, + "text_loss": 0.494140625 + }, + { + "epoch": 0.87, + "learning_rate": 4.023757973791381e-07, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 10485, + "text_loss": 0.5546875 + }, + { + "epoch": 0.87, + "learning_rate": 4.0186284485466855e-07, + "loss": 0.417, + "regression_loss": 0.0, + "step": 10486, + "text_loss": 0.39453125 + }, + { + "epoch": 0.87, + "learning_rate": 4.0135020580744144e-07, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 10487, + "text_loss": 0.6171875 + }, + { + "epoch": 0.87, + "learning_rate": 4.0083788027240613e-07, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 10488, + "text_loss": 0.255859375 + }, + { + "epoch": 0.87, + "learning_rate": 4.003258682844907e-07, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 10489, + "text_loss": 0.75 + }, + { + "epoch": 0.87, + "learning_rate": 3.99814169878599e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 10490, + "text_loss": 0.56640625 + }, + { + "epoch": 0.87, + "learning_rate": 3.993027850896203e-07, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 10491, + "text_loss": 0.5859375 + }, + { + "epoch": 0.87, + "learning_rate": 3.9879171395241444e-07, + "loss": 0.4518, + "regression_loss": 0.0, + "step": 10492, + "text_loss": 0.404296875 + }, + { + "epoch": 0.87, + "learning_rate": 3.9828095650182474e-07, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 10493, + "text_loss": 0.38671875 + }, + { + "epoch": 0.87, + "learning_rate": 3.9777051277267277e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 10494, + "text_loss": 0.4375 + }, + { + "epoch": 0.87, + "learning_rate": 3.972603827997579e-07, + "loss": 0.4017, + "regression_loss": 0.0, + "step": 10495, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.87, + "learning_rate": 3.9675056661785563e-07, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 10496, + "text_loss": 0.63671875 + }, + { + "epoch": 0.87, + "learning_rate": 3.9624106426172704e-07, + "loss": 0.5828, + "regression_loss": 0.0, + "step": 10497, + "text_loss": 0.4765625 + }, + { + "epoch": 0.87, + "learning_rate": 3.9573187576610327e-07, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 10498, + "text_loss": 0.306640625 + }, + { + "epoch": 0.87, + "learning_rate": 3.9522300116570044e-07, + "loss": 0.457, + "regression_loss": 0.0, + "step": 10499, + "text_loss": 0.44140625 + }, + { + "epoch": 0.87, + "learning_rate": 3.947144404952102e-07, + "loss": 0.4785, + "regression_loss": 0.0, + "step": 10500, + "text_loss": 0.400390625 + }, + { + "epoch": 0.87, + "learning_rate": 3.9420619378930494e-07, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 10501, + "text_loss": 0.447265625 + }, + { + "epoch": 0.87, + "learning_rate": 3.9369826108263063e-07, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 10502, + "text_loss": 0.404296875 + }, + { + "epoch": 0.87, + "learning_rate": 3.931906424098203e-07, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 10503, + "text_loss": 0.6015625 + }, + { + "epoch": 0.87, + "learning_rate": 3.926833378054773e-07, + "loss": 0.5497, + "regression_loss": 0.0, + "step": 10504, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.87, + "learning_rate": 3.921763473041884e-07, + "loss": 0.3755, + "regression_loss": 0.0, + "step": 10505, + "text_loss": 0.1875 + }, + { + "epoch": 0.87, + "learning_rate": 3.916696709405177e-07, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 10506, + "text_loss": 0.447265625 + }, + { + "epoch": 0.87, + "learning_rate": 3.911633087490074e-07, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 10507, + "text_loss": 0.412109375 + }, + { + "epoch": 0.87, + "learning_rate": 3.9065726076417787e-07, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 10508, + "text_loss": 0.26171875 + }, + { + "epoch": 0.87, + "learning_rate": 3.901515270205308e-07, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 10509, + "text_loss": 0.65234375 + }, + { + "epoch": 0.87, + "learning_rate": 3.8964610755254316e-07, + "loss": 0.4136, + "regression_loss": 0.0, + "step": 10510, + "text_loss": 0.51171875 + }, + { + "epoch": 0.87, + "learning_rate": 3.891410023946718e-07, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 10511, + "text_loss": 0.7109375 + }, + { + "epoch": 0.87, + "learning_rate": 3.8863621158135247e-07, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 10512, + "text_loss": 0.478515625 + }, + { + "epoch": 0.87, + "learning_rate": 3.881317351469999e-07, + "loss": 0.49, + "regression_loss": 0.0, + "step": 10513, + "text_loss": 0.65234375 + }, + { + "epoch": 0.87, + "learning_rate": 3.876275731260054e-07, + "loss": 0.3921, + "regression_loss": 0.0, + "step": 10514, + "text_loss": 0.453125 + }, + { + "epoch": 0.87, + "learning_rate": 3.8712372555274046e-07, + "loss": 0.498, + "regression_loss": 0.0, + "step": 10515, + "text_loss": 0.435546875 + }, + { + "epoch": 0.87, + "learning_rate": 3.8662019246155536e-07, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 10516, + "text_loss": 0.42578125 + }, + { + "epoch": 0.87, + "learning_rate": 3.8611697388677817e-07, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 10517, + "text_loss": 0.41015625 + }, + { + "epoch": 0.87, + "learning_rate": 3.856140698627153e-07, + "loss": 0.4594, + "regression_loss": 0.0, + "step": 10518, + "text_loss": 0.38671875 + }, + { + "epoch": 0.87, + "learning_rate": 3.8511148042365333e-07, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 10519, + "text_loss": 0.466796875 + }, + { + "epoch": 0.87, + "learning_rate": 3.8460920560385474e-07, + "loss": 0.501, + "regression_loss": 0.0, + "step": 10520, + "text_loss": 0.3828125 + }, + { + "epoch": 0.87, + "learning_rate": 3.8410724543756274e-07, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 10521, + "text_loss": 0.765625 + }, + { + "epoch": 0.87, + "learning_rate": 3.8360559995899826e-07, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 10522, + "text_loss": 0.6875 + }, + { + "epoch": 0.87, + "learning_rate": 3.831042692023618e-07, + "loss": 0.5916, + "regression_loss": 0.0, + "step": 10523, + "text_loss": 0.466796875 + }, + { + "epoch": 0.87, + "learning_rate": 3.826032532018287e-07, + "loss": 0.47, + "regression_loss": 0.0, + "step": 10524, + "text_loss": 0.578125 + }, + { + "epoch": 0.87, + "learning_rate": 3.8210255199155943e-07, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 10525, + "text_loss": 0.5859375 + }, + { + "epoch": 0.87, + "learning_rate": 3.8160216560568673e-07, + "loss": 0.5336, + "regression_loss": 0.0, + "step": 10526, + "text_loss": 0.3671875 + }, + { + "epoch": 0.87, + "learning_rate": 3.81102094078325e-07, + "loss": 0.5586, + "regression_loss": 0.0, + "step": 10527, + "text_loss": 0.4921875 + }, + { + "epoch": 0.88, + "learning_rate": 3.8060233744356634e-07, + "loss": 0.4885, + "regression_loss": 0.0, + "step": 10528, + "text_loss": 0.5234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.8010289573548187e-07, + "loss": 0.479, + "regression_loss": 0.0, + "step": 10529, + "text_loss": 0.39453125 + }, + { + "epoch": 0.88, + "learning_rate": 3.79603768988121e-07, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 10530, + "text_loss": 0.388671875 + }, + { + "epoch": 0.88, + "learning_rate": 3.7910495723551156e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 10531, + "text_loss": 0.337890625 + }, + { + "epoch": 0.88, + "learning_rate": 3.7860646051166015e-07, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 10532, + "text_loss": 0.5078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.7810827885055024e-07, + "loss": 0.4742, + "regression_loss": 0.0, + "step": 10533, + "text_loss": 0.59765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.776104122861479e-07, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 10534, + "text_loss": 0.28125 + }, + { + "epoch": 0.88, + "learning_rate": 3.771128608523933e-07, + "loss": 0.6477, + "regression_loss": 0.0, + "step": 10535, + "text_loss": 0.93359375 + }, + { + "epoch": 0.88, + "learning_rate": 3.76615624583207e-07, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 10536, + "text_loss": 0.419921875 + }, + { + "epoch": 0.88, + "learning_rate": 3.7611870351248804e-07, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 10537, + "text_loss": 0.443359375 + }, + { + "epoch": 0.88, + "learning_rate": 3.756220976741154e-07, + "loss": 0.5659, + "regression_loss": 0.0, + "step": 10538, + "text_loss": 0.484375 + }, + { + "epoch": 0.88, + "learning_rate": 3.7512580710194257e-07, + "loss": 0.4224, + "regression_loss": 0.0, + "step": 10539, + "text_loss": 0.435546875 + }, + { + "epoch": 0.88, + "learning_rate": 3.746298318298058e-07, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 10540, + "text_loss": 0.515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.7413417189151755e-07, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 10541, + "text_loss": 0.5078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.7363882732086967e-07, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 10542, + "text_loss": 0.240234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.7314379815163227e-07, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 10543, + "text_loss": 0.38671875 + }, + { + "epoch": 0.88, + "learning_rate": 3.72649084417554e-07, + "loss": 0.3748, + "regression_loss": 0.0, + "step": 10544, + "text_loss": 0.40234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.721546861523612e-07, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 10545, + "text_loss": 0.41796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.716606033897591e-07, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 10546, + "text_loss": 0.609375 + }, + { + "epoch": 0.88, + "learning_rate": 3.7116683616343296e-07, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 10547, + "text_loss": 0.357421875 + }, + { + "epoch": 0.88, + "learning_rate": 3.706733845070454e-07, + "loss": 0.5085, + "regression_loss": 0.0, + "step": 10548, + "text_loss": 0.34765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.7018024845423497e-07, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 10549, + "text_loss": 0.412109375 + }, + { + "epoch": 0.88, + "learning_rate": 3.6968742803862424e-07, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 10550, + "text_loss": 0.40625 + }, + { + "epoch": 0.88, + "learning_rate": 3.691949232938091e-07, + "loss": 0.3877, + "regression_loss": 0.0, + "step": 10551, + "text_loss": 0.66015625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6870273425336664e-07, + "loss": 0.4615, + "regression_loss": 0.0, + "step": 10552, + "text_loss": 0.40625 + }, + { + "epoch": 0.88, + "learning_rate": 3.682108609508517e-07, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 10553, + "text_loss": 0.8046875 + }, + { + "epoch": 0.88, + "learning_rate": 3.6771930341979845e-07, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 10554, + "text_loss": 0.37890625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6722806169371685e-07, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 10555, + "text_loss": 0.296875 + }, + { + "epoch": 0.88, + "learning_rate": 3.667371358060995e-07, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 10556, + "text_loss": 0.427734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.6624652579041354e-07, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 10557, + "text_loss": 0.37890625 + }, + { + "epoch": 0.88, + "learning_rate": 3.657562316801072e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10558, + "text_loss": 0.390625 + }, + { + "epoch": 0.88, + "learning_rate": 3.652662535086049e-07, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 10559, + "text_loss": 0.421875 + }, + { + "epoch": 0.88, + "learning_rate": 3.647765913093132e-07, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 10560, + "text_loss": 0.5859375 + }, + { + "epoch": 0.88, + "learning_rate": 3.6428724511561153e-07, + "loss": 0.4237, + "regression_loss": 0.0, + "step": 10561, + "text_loss": 0.322265625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6379821496086374e-07, + "loss": 0.4791, + "regression_loss": 0.0, + "step": 10562, + "text_loss": 0.44140625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6330950087840824e-07, + "loss": 0.4319, + "regression_loss": 0.0, + "step": 10563, + "text_loss": 0.55078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.628211029015627e-07, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 10564, + "text_loss": 0.52734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.6233302106362445e-07, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 10565, + "text_loss": 0.28515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6184525539786797e-07, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 10566, + "text_loss": 0.3515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.6135780593754554e-07, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 10567, + "text_loss": 0.73828125 + }, + { + "epoch": 0.88, + "learning_rate": 3.6087067271589174e-07, + "loss": 0.536, + "regression_loss": 0.0, + "step": 10568, + "text_loss": 0.609375 + }, + { + "epoch": 0.88, + "learning_rate": 3.603838557661138e-07, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 10569, + "text_loss": 0.466796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.5989735512140187e-07, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 10570, + "text_loss": 0.28515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.5941117081492285e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10571, + "text_loss": 0.46484375 + }, + { + "epoch": 0.88, + "learning_rate": 3.5892530287982287e-07, + "loss": 0.467, + "regression_loss": 0.0, + "step": 10572, + "text_loss": 0.58203125 + }, + { + "epoch": 0.88, + "learning_rate": 3.584397513492244e-07, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 10573, + "text_loss": 0.59765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.5795451625623035e-07, + "loss": 0.4116, + "regression_loss": 0.0, + "step": 10574, + "text_loss": 0.2177734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.5746959763392264e-07, + "loss": 0.5662, + "regression_loss": 0.0, + "step": 10575, + "text_loss": 0.62890625 + }, + { + "epoch": 0.88, + "learning_rate": 3.569849955153593e-07, + "loss": 0.4589, + "regression_loss": 0.0, + "step": 10576, + "text_loss": 0.66796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.565007099335782e-07, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 10577, + "text_loss": 0.5234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.560167409215964e-07, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 10578, + "text_loss": 0.51171875 + }, + { + "epoch": 0.88, + "learning_rate": 3.5553308851240807e-07, + "loss": 0.3851, + "regression_loss": 0.0, + "step": 10579, + "text_loss": 0.248046875 + }, + { + "epoch": 0.88, + "learning_rate": 3.55049752738984e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 10580, + "text_loss": 0.33203125 + }, + { + "epoch": 0.88, + "learning_rate": 3.5456673363427954e-07, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 10581, + "text_loss": 0.640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.540840312312205e-07, + "loss": 0.5057, + "regression_loss": 0.0, + "step": 10582, + "text_loss": 0.4609375 + }, + { + "epoch": 0.88, + "learning_rate": 3.5360164556271726e-07, + "loss": 0.4193, + "regression_loss": 0.0, + "step": 10583, + "text_loss": 0.53515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.5311957666165574e-07, + "loss": 0.6052, + "regression_loss": 0.0, + "step": 10584, + "text_loss": 0.578125 + }, + { + "epoch": 0.88, + "learning_rate": 3.526378245609019e-07, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 10585, + "text_loss": 0.38671875 + }, + { + "epoch": 0.88, + "learning_rate": 3.521563892932967e-07, + "loss": 0.5209, + "regression_loss": 0.0, + "step": 10586, + "text_loss": 0.435546875 + }, + { + "epoch": 0.88, + "learning_rate": 3.516752708916649e-07, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 10587, + "text_loss": 0.6015625 + }, + { + "epoch": 0.88, + "learning_rate": 3.511944693888042e-07, + "loss": 0.4478, + "regression_loss": 0.0, + "step": 10588, + "text_loss": 0.53125 + }, + { + "epoch": 0.88, + "learning_rate": 3.507139848174945e-07, + "loss": 0.499, + "regression_loss": 0.0, + "step": 10589, + "text_loss": 0.5625 + }, + { + "epoch": 0.88, + "learning_rate": 3.502338172104924e-07, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 10590, + "text_loss": 0.53125 + }, + { + "epoch": 0.88, + "learning_rate": 3.49753966600534e-07, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 10591, + "text_loss": 0.7421875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4927443302033127e-07, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 10592, + "text_loss": 0.431640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.4879521650257764e-07, + "loss": 0.4684, + "regression_loss": 0.0, + "step": 10593, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4831631707994304e-07, + "loss": 0.5676, + "regression_loss": 0.0, + "step": 10594, + "text_loss": 0.44140625 + }, + { + "epoch": 0.88, + "learning_rate": 3.47837734785077e-07, + "loss": 0.493, + "regression_loss": 0.0, + "step": 10595, + "text_loss": 0.375 + }, + { + "epoch": 0.88, + "learning_rate": 3.4735946965060605e-07, + "loss": 0.4584, + "regression_loss": 0.0, + "step": 10596, + "text_loss": 0.59765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.4688152170913705e-07, + "loss": 0.5557, + "regression_loss": 0.0, + "step": 10597, + "text_loss": 0.66796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4640389099325265e-07, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 10598, + "text_loss": 0.6796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4592657753551583e-07, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 10599, + "text_loss": 0.59375 + }, + { + "epoch": 0.88, + "learning_rate": 3.454495813684672e-07, + "loss": 0.6018, + "regression_loss": 0.0, + "step": 10600, + "text_loss": 0.66015625 + }, + { + "epoch": 0.88, + "learning_rate": 3.449729025246262e-07, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 10601, + "text_loss": 0.56640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.4449654103649033e-07, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 10602, + "text_loss": 0.25 + }, + { + "epoch": 0.88, + "learning_rate": 3.440204969365357e-07, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 10603, + "text_loss": 0.6796875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4354477025721533e-07, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 10604, + "text_loss": 0.51171875 + }, + { + "epoch": 0.88, + "learning_rate": 3.430693610309627e-07, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 10605, + "text_loss": 0.59765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.42594269290189e-07, + "loss": 0.543, + "regression_loss": 0.0, + "step": 10606, + "text_loss": 0.77734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.421194950672835e-07, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 10607, + "text_loss": 0.52734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.4164503839461184e-07, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 10608, + "text_loss": 0.490234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.4117089930452376e-07, + "loss": 0.3999, + "regression_loss": 0.0, + "step": 10609, + "text_loss": 0.2890625 + }, + { + "epoch": 0.88, + "learning_rate": 3.4069707782934056e-07, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 10610, + "text_loss": 0.421875 + }, + { + "epoch": 0.88, + "learning_rate": 3.4022357400136595e-07, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 10611, + "text_loss": 0.408203125 + }, + { + "epoch": 0.88, + "learning_rate": 3.3975038785288127e-07, + "loss": 0.446, + "regression_loss": 0.0, + "step": 10612, + "text_loss": 0.3046875 + }, + { + "epoch": 0.88, + "learning_rate": 3.392775194161463e-07, + "loss": 0.3984, + "regression_loss": 0.0, + "step": 10613, + "text_loss": 0.365234375 + }, + { + "epoch": 0.88, + "learning_rate": 3.388049687233963e-07, + "loss": 0.5366, + "regression_loss": 0.0, + "step": 10614, + "text_loss": 0.56640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.3833273580685056e-07, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 10615, + "text_loss": 0.61328125 + }, + { + "epoch": 0.88, + "learning_rate": 3.3786082069870164e-07, + "loss": 0.5303, + "regression_loss": 0.0, + "step": 10616, + "text_loss": 0.59375 + }, + { + "epoch": 0.88, + "learning_rate": 3.373892234311227e-07, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 10617, + "text_loss": 0.3359375 + }, + { + "epoch": 0.88, + "learning_rate": 3.369179440362641e-07, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 10618, + "text_loss": 0.318359375 + }, + { + "epoch": 0.88, + "learning_rate": 3.364469825462574e-07, + "loss": 0.3818, + "regression_loss": 0.0, + "step": 10619, + "text_loss": 0.4921875 + }, + { + "epoch": 0.88, + "learning_rate": 3.3597633899320746e-07, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 10620, + "text_loss": 0.53515625 + }, + { + "epoch": 0.88, + "learning_rate": 3.355060134092025e-07, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 10621, + "text_loss": 0.5390625 + }, + { + "epoch": 0.88, + "learning_rate": 3.350360058263058e-07, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 10622, + "text_loss": 0.375 + }, + { + "epoch": 0.88, + "learning_rate": 3.3456631627656053e-07, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 10623, + "text_loss": 0.3828125 + }, + { + "epoch": 0.88, + "learning_rate": 3.340969447919873e-07, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 10624, + "text_loss": 0.26171875 + }, + { + "epoch": 0.88, + "learning_rate": 3.336278914045854e-07, + "loss": 0.4655, + "regression_loss": 0.0, + "step": 10625, + "text_loss": 0.486328125 + }, + { + "epoch": 0.88, + "learning_rate": 3.3315915614633374e-07, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 10626, + "text_loss": 0.283203125 + }, + { + "epoch": 0.88, + "learning_rate": 3.326907390491857e-07, + "loss": 0.4412, + "regression_loss": 0.0, + "step": 10627, + "text_loss": 0.5703125 + }, + { + "epoch": 0.88, + "learning_rate": 3.32222640145079e-07, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 10628, + "text_loss": 0.64453125 + }, + { + "epoch": 0.88, + "learning_rate": 3.317548594659231e-07, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 10629, + "text_loss": 0.78125 + }, + { + "epoch": 0.88, + "learning_rate": 3.312873970436098e-07, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 10630, + "text_loss": 0.65625 + }, + { + "epoch": 0.88, + "learning_rate": 3.3082025291000906e-07, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 10631, + "text_loss": 0.30078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.3035342709696826e-07, + "loss": 0.4844, + "regression_loss": 0.0, + "step": 10632, + "text_loss": 0.4765625 + }, + { + "epoch": 0.88, + "learning_rate": 3.2988691963631136e-07, + "loss": 0.4481, + "regression_loss": 0.0, + "step": 10633, + "text_loss": 0.46875 + }, + { + "epoch": 0.88, + "learning_rate": 3.2942073055984515e-07, + "loss": 0.5133, + "regression_loss": 0.0, + "step": 10634, + "text_loss": 0.58203125 + }, + { + "epoch": 0.88, + "learning_rate": 3.289548598993503e-07, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 10635, + "text_loss": 0.275390625 + }, + { + "epoch": 0.88, + "learning_rate": 3.2848930768658746e-07, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 10636, + "text_loss": 0.32421875 + }, + { + "epoch": 0.88, + "learning_rate": 3.280240739532964e-07, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 10637, + "text_loss": 0.275390625 + }, + { + "epoch": 0.88, + "learning_rate": 3.275591587311938e-07, + "loss": 0.5718, + "regression_loss": 0.0, + "step": 10638, + "text_loss": 0.734375 + }, + { + "epoch": 0.88, + "learning_rate": 3.2709456205197445e-07, + "loss": 0.4277, + "regression_loss": 0.0, + "step": 10639, + "text_loss": 0.55078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.2663028394731453e-07, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 10640, + "text_loss": 0.66015625 + }, + { + "epoch": 0.88, + "learning_rate": 3.261663244488633e-07, + "loss": 0.4738, + "regression_loss": 0.0, + "step": 10641, + "text_loss": 0.5625 + }, + { + "epoch": 0.88, + "learning_rate": 3.2570268358825263e-07, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 10642, + "text_loss": 0.4921875 + }, + { + "epoch": 0.88, + "learning_rate": 3.252393613970911e-07, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 10643, + "text_loss": 0.388671875 + }, + { + "epoch": 0.88, + "learning_rate": 3.247763579069663e-07, + "loss": 0.4579, + "regression_loss": 0.0, + "step": 10644, + "text_loss": 0.181640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.243136731494406e-07, + "loss": 0.5104, + "regression_loss": 0.0, + "step": 10645, + "text_loss": 0.56640625 + }, + { + "epoch": 0.88, + "learning_rate": 3.2385130715606117e-07, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 10646, + "text_loss": 0.455078125 + }, + { + "epoch": 0.88, + "learning_rate": 3.2338925995834716e-07, + "loss": 0.4331, + "regression_loss": 0.0, + "step": 10647, + "text_loss": 0.359375 + }, + { + "epoch": 0.88, + "learning_rate": 3.2292753158779945e-07, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 10648, + "text_loss": 0.37890625 + }, + { + "epoch": 0.89, + "learning_rate": 3.2246612207589623e-07, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 10649, + "text_loss": 0.515625 + }, + { + "epoch": 0.89, + "learning_rate": 3.2200503145409456e-07, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 10650, + "text_loss": 0.51953125 + }, + { + "epoch": 0.89, + "learning_rate": 3.215442597538282e-07, + "loss": 0.3894, + "regression_loss": 0.0, + "step": 10651, + "text_loss": 0.32421875 + }, + { + "epoch": 0.89, + "learning_rate": 3.210838070065103e-07, + "loss": 0.4531, + "regression_loss": 0.0, + "step": 10652, + "text_loss": 0.4765625 + }, + { + "epoch": 0.89, + "learning_rate": 3.206236732435325e-07, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 10653, + "text_loss": 0.3671875 + }, + { + "epoch": 0.89, + "learning_rate": 3.2016385849626463e-07, + "loss": 0.469, + "regression_loss": 0.0, + "step": 10654, + "text_loss": 0.546875 + }, + { + "epoch": 0.89, + "learning_rate": 3.1970436279605446e-07, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 10655, + "text_loss": 0.494140625 + }, + { + "epoch": 0.89, + "learning_rate": 3.1924518617422796e-07, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 10656, + "text_loss": 0.3359375 + }, + { + "epoch": 0.89, + "learning_rate": 3.1878632866208905e-07, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 10657, + "text_loss": 0.451171875 + }, + { + "epoch": 0.89, + "learning_rate": 3.1832779029091987e-07, + "loss": 0.4659, + "regression_loss": 0.0, + "step": 10658, + "text_loss": 0.5234375 + }, + { + "epoch": 0.89, + "learning_rate": 3.178695710919821e-07, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 10659, + "text_loss": 0.76171875 + }, + { + "epoch": 0.89, + "learning_rate": 3.174116710965153e-07, + "loss": 0.4246, + "regression_loss": 0.0, + "step": 10660, + "text_loss": 0.296875 + }, + { + "epoch": 0.89, + "learning_rate": 3.169540903357343e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 10661, + "text_loss": 0.37109375 + }, + { + "epoch": 0.89, + "learning_rate": 3.164968288408371e-07, + "loss": 0.5852, + "regression_loss": 0.0, + "step": 10662, + "text_loss": 0.369140625 + }, + { + "epoch": 0.89, + "learning_rate": 3.1603988664299645e-07, + "loss": 0.429, + "regression_loss": 0.0, + "step": 10663, + "text_loss": 0.75390625 + }, + { + "epoch": 0.89, + "learning_rate": 3.155832637733641e-07, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 10664, + "text_loss": 0.5 + }, + { + "epoch": 0.89, + "learning_rate": 3.151269602630708e-07, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 10665, + "text_loss": 0.40234375 + }, + { + "epoch": 0.89, + "learning_rate": 3.1467097614322485e-07, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 10666, + "text_loss": 0.65234375 + }, + { + "epoch": 0.89, + "learning_rate": 3.1421531144491147e-07, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 10667, + "text_loss": 0.62109375 + }, + { + "epoch": 0.89, + "learning_rate": 3.13759966199198e-07, + "loss": 0.5383, + "regression_loss": 0.0, + "step": 10668, + "text_loss": 0.41796875 + }, + { + "epoch": 0.89, + "learning_rate": 3.1330494043712576e-07, + "loss": 0.5667, + "regression_loss": 0.0, + "step": 10669, + "text_loss": 0.6171875 + }, + { + "epoch": 0.89, + "learning_rate": 3.128502341897166e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 10670, + "text_loss": 0.494140625 + }, + { + "epoch": 0.89, + "learning_rate": 3.1239584748797016e-07, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 10671, + "text_loss": 0.462890625 + }, + { + "epoch": 0.89, + "learning_rate": 3.119417803628638e-07, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 10672, + "text_loss": 0.373046875 + }, + { + "epoch": 0.89, + "learning_rate": 3.1148803284535344e-07, + "loss": 0.494, + "regression_loss": 0.0, + "step": 10673, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.89, + "learning_rate": 3.110346049663737e-07, + "loss": 0.4082, + "regression_loss": 0.0, + "step": 10674, + "text_loss": 0.373046875 + }, + { + "epoch": 0.89, + "learning_rate": 3.105814967568377e-07, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 10675, + "text_loss": 0.5859375 + }, + { + "epoch": 0.89, + "learning_rate": 3.1012870824763454e-07, + "loss": 0.5996, + "regression_loss": 0.0, + "step": 10676, + "text_loss": 0.53125 + }, + { + "epoch": 0.89, + "learning_rate": 3.096762394696329e-07, + "loss": 0.4779, + "regression_loss": 0.0, + "step": 10677, + "text_loss": 0.6796875 + }, + { + "epoch": 0.89, + "learning_rate": 3.092240904536808e-07, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 10678, + "text_loss": 0.2158203125 + }, + { + "epoch": 0.89, + "learning_rate": 3.087722612306032e-07, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 10679, + "text_loss": 0.5 + }, + { + "epoch": 0.89, + "learning_rate": 3.0832075183120314e-07, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 10680, + "text_loss": 0.390625 + }, + { + "epoch": 0.89, + "learning_rate": 3.0786956228626264e-07, + "loss": 0.509, + "regression_loss": 0.0, + "step": 10681, + "text_loss": 0.359375 + }, + { + "epoch": 0.89, + "learning_rate": 3.07418692626541e-07, + "loss": 0.4893, + "regression_loss": 0.0, + "step": 10682, + "text_loss": 0.33984375 + }, + { + "epoch": 0.89, + "learning_rate": 3.0696814288277645e-07, + "loss": 0.406, + "regression_loss": 0.0, + "step": 10683, + "text_loss": 0.62890625 + }, + { + "epoch": 0.89, + "learning_rate": 3.065179130856849e-07, + "loss": 0.4376, + "regression_loss": 0.0, + "step": 10684, + "text_loss": 0.447265625 + }, + { + "epoch": 0.89, + "learning_rate": 3.060680032659613e-07, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 10685, + "text_loss": 0.4921875 + }, + { + "epoch": 0.89, + "learning_rate": 3.0561841345427665e-07, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 10686, + "text_loss": 0.5078125 + }, + { + "epoch": 0.89, + "learning_rate": 3.0516914368128426e-07, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 10687, + "text_loss": 0.4375 + }, + { + "epoch": 0.89, + "learning_rate": 3.0472019397761065e-07, + "loss": 0.4258, + "regression_loss": 0.0, + "step": 10688, + "text_loss": 0.6953125 + }, + { + "epoch": 0.89, + "learning_rate": 3.0427156437386363e-07, + "loss": 0.5623, + "regression_loss": 0.0, + "step": 10689, + "text_loss": 0.42578125 + }, + { + "epoch": 0.89, + "learning_rate": 3.038232549006287e-07, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 10690, + "text_loss": 0.431640625 + }, + { + "epoch": 0.89, + "learning_rate": 3.0337526558846977e-07, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 10691, + "text_loss": 0.318359375 + }, + { + "epoch": 0.89, + "learning_rate": 3.029275964679268e-07, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 10692, + "text_loss": 0.408203125 + }, + { + "epoch": 0.89, + "learning_rate": 3.0248024756952154e-07, + "loss": 0.5935, + "regression_loss": 0.0, + "step": 10693, + "text_loss": 0.69921875 + }, + { + "epoch": 0.89, + "learning_rate": 3.020332189237507e-07, + "loss": 0.4708, + "regression_loss": 0.0, + "step": 10694, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.89, + "learning_rate": 3.0158651056109044e-07, + "loss": 0.4751, + "regression_loss": 0.0, + "step": 10695, + "text_loss": 0.443359375 + }, + { + "epoch": 0.89, + "learning_rate": 3.0114012251199534e-07, + "loss": 0.4523, + "regression_loss": 0.0, + "step": 10696, + "text_loss": 0.396484375 + }, + { + "epoch": 0.89, + "learning_rate": 3.0069405480689874e-07, + "loss": 0.4471, + "regression_loss": 0.0, + "step": 10697, + "text_loss": 0.64453125 + }, + { + "epoch": 0.89, + "learning_rate": 3.00248307476208e-07, + "loss": 0.4534, + "regression_loss": 0.0, + "step": 10698, + "text_loss": 0.412109375 + }, + { + "epoch": 0.89, + "learning_rate": 2.9980288055031613e-07, + "loss": 0.5339, + "regression_loss": 0.0, + "step": 10699, + "text_loss": 0.5625 + }, + { + "epoch": 0.89, + "learning_rate": 2.993577740595871e-07, + "loss": 0.498, + "regression_loss": 0.0, + "step": 10700, + "text_loss": 0.3671875 + }, + { + "epoch": 0.89, + "learning_rate": 2.989129880343672e-07, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 10701, + "text_loss": 0.44140625 + }, + { + "epoch": 0.89, + "learning_rate": 2.984685225049794e-07, + "loss": 0.4551, + "regression_loss": 0.0, + "step": 10702, + "text_loss": 0.296875 + }, + { + "epoch": 0.89, + "learning_rate": 2.9802437750172494e-07, + "loss": 0.3766, + "regression_loss": 0.0, + "step": 10703, + "text_loss": 0.1982421875 + }, + { + "epoch": 0.89, + "learning_rate": 2.9758055305488365e-07, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 10704, + "text_loss": 0.52734375 + }, + { + "epoch": 0.89, + "learning_rate": 2.971370491947123e-07, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 10705, + "text_loss": 0.353515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.966938659514473e-07, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 10706, + "text_loss": 0.5546875 + }, + { + "epoch": 0.89, + "learning_rate": 2.9625100335530285e-07, + "loss": 0.4431, + "regression_loss": 0.0, + "step": 10707, + "text_loss": 0.4921875 + }, + { + "epoch": 0.89, + "learning_rate": 2.958084614364709e-07, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 10708, + "text_loss": 0.412109375 + }, + { + "epoch": 0.89, + "learning_rate": 2.953662402251223e-07, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 10709, + "text_loss": 0.82421875 + }, + { + "epoch": 0.89, + "learning_rate": 2.949243397514034e-07, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 10710, + "text_loss": 0.84375 + }, + { + "epoch": 0.89, + "learning_rate": 2.944827600454425e-07, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 10711, + "text_loss": 0.310546875 + }, + { + "epoch": 0.89, + "learning_rate": 2.9404150113734374e-07, + "loss": 0.437, + "regression_loss": 0.0, + "step": 10712, + "text_loss": 0.392578125 + }, + { + "epoch": 0.89, + "learning_rate": 2.9360056305718977e-07, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 10713, + "text_loss": 0.57421875 + }, + { + "epoch": 0.89, + "learning_rate": 2.931599458350415e-07, + "loss": 0.4445, + "regression_loss": 0.0, + "step": 10714, + "text_loss": 0.34375 + }, + { + "epoch": 0.89, + "learning_rate": 2.9271964950093824e-07, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 10715, + "text_loss": 0.60546875 + }, + { + "epoch": 0.89, + "learning_rate": 2.9227967408489653e-07, + "loss": 0.4427, + "regression_loss": 0.0, + "step": 10716, + "text_loss": 0.380859375 + }, + { + "epoch": 0.89, + "learning_rate": 2.918400196169113e-07, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 10717, + "text_loss": 0.50390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.9140068612695804e-07, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 10718, + "text_loss": 0.50390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.909616736449855e-07, + "loss": 0.4805, + "regression_loss": 0.0, + "step": 10719, + "text_loss": 0.65625 + }, + { + "epoch": 0.89, + "learning_rate": 2.905229822009253e-07, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 10720, + "text_loss": 0.25390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.900846118246836e-07, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 10721, + "text_loss": 0.54296875 + }, + { + "epoch": 0.89, + "learning_rate": 2.8964656254614866e-07, + "loss": 0.512, + "regression_loss": 0.0, + "step": 10722, + "text_loss": 0.91796875 + }, + { + "epoch": 0.89, + "learning_rate": 2.8920883439518045e-07, + "loss": 0.5408, + "regression_loss": 0.0, + "step": 10723, + "text_loss": 0.49609375 + }, + { + "epoch": 0.89, + "learning_rate": 2.887714274016251e-07, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 10724, + "text_loss": 0.44921875 + }, + { + "epoch": 0.89, + "learning_rate": 2.883343415953005e-07, + "loss": 0.437, + "regression_loss": 0.0, + "step": 10725, + "text_loss": 0.275390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.878975770060055e-07, + "loss": 0.4932, + "regression_loss": 0.0, + "step": 10726, + "text_loss": 0.376953125 + }, + { + "epoch": 0.89, + "learning_rate": 2.874611336635158e-07, + "loss": 0.4607, + "regression_loss": 0.0, + "step": 10727, + "text_loss": 0.5 + }, + { + "epoch": 0.89, + "learning_rate": 2.8702501159758757e-07, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 10728, + "text_loss": 0.5390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.8658921083795145e-07, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 10729, + "text_loss": 0.427734375 + }, + { + "epoch": 0.89, + "learning_rate": 2.861537314143187e-07, + "loss": 0.5525, + "regression_loss": 0.0, + "step": 10730, + "text_loss": 0.5859375 + }, + { + "epoch": 0.89, + "learning_rate": 2.857185733563783e-07, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 10731, + "text_loss": 0.53515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.8528373669379714e-07, + "loss": 0.478, + "regression_loss": 0.0, + "step": 10732, + "text_loss": 0.70703125 + }, + { + "epoch": 0.89, + "learning_rate": 2.848492214562198e-07, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 10733, + "text_loss": 0.54296875 + }, + { + "epoch": 0.89, + "learning_rate": 2.8441502767327045e-07, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 10734, + "text_loss": 0.345703125 + }, + { + "epoch": 0.89, + "learning_rate": 2.8398115537454806e-07, + "loss": 0.5382, + "regression_loss": 0.0, + "step": 10735, + "text_loss": 0.5625 + }, + { + "epoch": 0.89, + "learning_rate": 2.8354760458963293e-07, + "loss": 0.4755, + "regression_loss": 0.0, + "step": 10736, + "text_loss": 0.7109375 + }, + { + "epoch": 0.89, + "learning_rate": 2.831143753480825e-07, + "loss": 0.382, + "regression_loss": 0.0, + "step": 10737, + "text_loss": 0.53515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.826814676794326e-07, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 10738, + "text_loss": 0.734375 + }, + { + "epoch": 0.89, + "learning_rate": 2.822488816131946e-07, + "loss": 0.3713, + "regression_loss": 0.0, + "step": 10739, + "text_loss": 0.43359375 + }, + { + "epoch": 0.89, + "learning_rate": 2.818166171788628e-07, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 10740, + "text_loss": 0.326171875 + }, + { + "epoch": 0.89, + "learning_rate": 2.8138467440590455e-07, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 10741, + "text_loss": 0.53125 + }, + { + "epoch": 0.89, + "learning_rate": 2.809530533237681e-07, + "loss": 0.5579, + "regression_loss": 0.0, + "step": 10742, + "text_loss": 0.291015625 + }, + { + "epoch": 0.89, + "learning_rate": 2.805217539618793e-07, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 10743, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.89, + "learning_rate": 2.80090776349643e-07, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 10744, + "text_loss": 0.435546875 + }, + { + "epoch": 0.89, + "learning_rate": 2.7966012051643786e-07, + "loss": 0.4835, + "regression_loss": 0.0, + "step": 10745, + "text_loss": 0.58984375 + }, + { + "epoch": 0.89, + "learning_rate": 2.792297864916277e-07, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 10746, + "text_loss": 0.59765625 + }, + { + "epoch": 0.89, + "learning_rate": 2.7879977430454785e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 10747, + "text_loss": 0.24609375 + }, + { + "epoch": 0.89, + "learning_rate": 2.783700839845149e-07, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 10748, + "text_loss": 0.296875 + }, + { + "epoch": 0.89, + "learning_rate": 2.779407155608227e-07, + "loss": 0.3899, + "regression_loss": 0.0, + "step": 10749, + "text_loss": 0.388671875 + }, + { + "epoch": 0.89, + "learning_rate": 2.7751166906274494e-07, + "loss": 0.4374, + "regression_loss": 0.0, + "step": 10750, + "text_loss": 0.5234375 + }, + { + "epoch": 0.89, + "learning_rate": 2.7708294451952943e-07, + "loss": 0.5, + "regression_loss": 0.0, + "step": 10751, + "text_loss": 0.66015625 + }, + { + "epoch": 0.89, + "learning_rate": 2.7665454196040665e-07, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 10752, + "text_loss": 0.5703125 + }, + { + "epoch": 0.89, + "learning_rate": 2.7622646141458097e-07, + "loss": 0.3826, + "regression_loss": 0.0, + "step": 10753, + "text_loss": 0.38671875 + }, + { + "epoch": 0.89, + "learning_rate": 2.7579870291123743e-07, + "loss": 0.443, + "regression_loss": 0.0, + "step": 10754, + "text_loss": 0.515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.753712664795388e-07, + "loss": 0.5513, + "regression_loss": 0.0, + "step": 10755, + "text_loss": 0.412109375 + }, + { + "epoch": 0.89, + "learning_rate": 2.7494415214862567e-07, + "loss": 0.4939, + "regression_loss": 0.0, + "step": 10756, + "text_loss": 0.443359375 + }, + { + "epoch": 0.89, + "learning_rate": 2.745173599476147e-07, + "loss": 0.512, + "regression_loss": 0.0, + "step": 10757, + "text_loss": 0.41015625 + }, + { + "epoch": 0.89, + "learning_rate": 2.740908899056055e-07, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 10758, + "text_loss": 0.47265625 + }, + { + "epoch": 0.89, + "learning_rate": 2.736647420516697e-07, + "loss": 0.4458, + "regression_loss": 0.0, + "step": 10759, + "text_loss": 0.5703125 + }, + { + "epoch": 0.89, + "learning_rate": 2.7323891641486077e-07, + "loss": 0.4613, + "regression_loss": 0.0, + "step": 10760, + "text_loss": 0.498046875 + }, + { + "epoch": 0.89, + "learning_rate": 2.7281341302420996e-07, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 10761, + "text_loss": 0.5390625 + }, + { + "epoch": 0.89, + "learning_rate": 2.7238823190872565e-07, + "loss": 0.3584, + "regression_loss": 0.0, + "step": 10762, + "text_loss": 0.33203125 + }, + { + "epoch": 0.89, + "learning_rate": 2.719633730973942e-07, + "loss": 0.49, + "regression_loss": 0.0, + "step": 10763, + "text_loss": 0.470703125 + }, + { + "epoch": 0.89, + "learning_rate": 2.71538836619179e-07, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 10764, + "text_loss": 0.515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.711146225030253e-07, + "loss": 0.5093, + "regression_loss": 0.0, + "step": 10765, + "text_loss": 0.5546875 + }, + { + "epoch": 0.89, + "learning_rate": 2.706907307778528e-07, + "loss": 0.4958, + "regression_loss": 0.0, + "step": 10766, + "text_loss": 0.353515625 + }, + { + "epoch": 0.89, + "learning_rate": 2.702671614725594e-07, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 10767, + "text_loss": 0.58203125 + }, + { + "epoch": 0.89, + "learning_rate": 2.6984391461602255e-07, + "loss": 0.519, + "regression_loss": 0.0, + "step": 10768, + "text_loss": 0.66015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.694209902370981e-07, + "loss": 0.416, + "regression_loss": 0.0, + "step": 10769, + "text_loss": 0.4609375 + }, + { + "epoch": 0.9, + "learning_rate": 2.689983883646158e-07, + "loss": 0.491, + "regression_loss": 0.0, + "step": 10770, + "text_loss": 0.5859375 + }, + { + "epoch": 0.9, + "learning_rate": 2.685761090273903e-07, + "loss": 0.5288, + "regression_loss": 0.0, + "step": 10771, + "text_loss": 0.56640625 + }, + { + "epoch": 0.9, + "learning_rate": 2.681541522542075e-07, + "loss": 0.4216, + "regression_loss": 0.0, + "step": 10772, + "text_loss": 0.248046875 + }, + { + "epoch": 0.9, + "learning_rate": 2.67732518073836e-07, + "loss": 0.402, + "regression_loss": 0.0, + "step": 10773, + "text_loss": 0.5390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.6731120651501907e-07, + "loss": 0.52, + "regression_loss": 0.0, + "step": 10774, + "text_loss": 0.48046875 + }, + { + "epoch": 0.9, + "learning_rate": 2.668902176064819e-07, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 10775, + "text_loss": 0.5859375 + }, + { + "epoch": 0.9, + "learning_rate": 2.6646955137692166e-07, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 10776, + "text_loss": 0.82421875 + }, + { + "epoch": 0.9, + "learning_rate": 2.6604920785502143e-07, + "loss": 0.5171, + "regression_loss": 0.0, + "step": 10777, + "text_loss": 0.337890625 + }, + { + "epoch": 0.9, + "learning_rate": 2.65629187069435e-07, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 10778, + "text_loss": 0.21875 + }, + { + "epoch": 0.9, + "learning_rate": 2.652094890487983e-07, + "loss": 0.4807, + "regression_loss": 0.0, + "step": 10779, + "text_loss": 0.234375 + }, + { + "epoch": 0.9, + "learning_rate": 2.6479011382172404e-07, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 10780, + "text_loss": 0.50390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.6437106141680433e-07, + "loss": 0.3855, + "regression_loss": 0.0, + "step": 10781, + "text_loss": 0.498046875 + }, + { + "epoch": 0.9, + "learning_rate": 2.639523318626047e-07, + "loss": 0.425, + "regression_loss": 0.0, + "step": 10782, + "text_loss": 0.263671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.6353392518767617e-07, + "loss": 0.4565, + "regression_loss": 0.0, + "step": 10783, + "text_loss": 0.7734375 + }, + { + "epoch": 0.9, + "learning_rate": 2.6311584142054036e-07, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 10784, + "text_loss": 0.23828125 + }, + { + "epoch": 0.9, + "learning_rate": 2.626980805897011e-07, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 10785, + "text_loss": 0.388671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.622806427236391e-07, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 10786, + "text_loss": 0.458984375 + }, + { + "epoch": 0.9, + "learning_rate": 2.6186352785081413e-07, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 10787, + "text_loss": 0.5078125 + }, + { + "epoch": 0.9, + "learning_rate": 2.614467359996614e-07, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 10788, + "text_loss": 0.390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.610302671985965e-07, + "loss": 0.491, + "regression_loss": 0.0, + "step": 10789, + "text_loss": 0.5625 + }, + { + "epoch": 0.9, + "learning_rate": 2.6061412147601163e-07, + "loss": 0.5745, + "regression_loss": 0.0, + "step": 10790, + "text_loss": 0.6484375 + }, + { + "epoch": 0.9, + "learning_rate": 2.6019829886027745e-07, + "loss": 0.446, + "regression_loss": 0.0, + "step": 10791, + "text_loss": 0.322265625 + }, + { + "epoch": 0.9, + "learning_rate": 2.5978279937974303e-07, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 10792, + "text_loss": 0.68359375 + }, + { + "epoch": 0.9, + "learning_rate": 2.593676230627357e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 10793, + "text_loss": 0.49609375 + }, + { + "epoch": 0.9, + "learning_rate": 2.5895276993755823e-07, + "loss": 0.3727, + "regression_loss": 0.0, + "step": 10794, + "text_loss": 0.28515625 + }, + { + "epoch": 0.9, + "learning_rate": 2.585382400324943e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 10795, + "text_loss": 0.4375 + }, + { + "epoch": 0.9, + "learning_rate": 2.5812403337580396e-07, + "loss": 0.3977, + "regression_loss": 0.0, + "step": 10796, + "text_loss": 0.3359375 + }, + { + "epoch": 0.9, + "learning_rate": 2.5771014999572684e-07, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 10797, + "text_loss": 0.298828125 + }, + { + "epoch": 0.9, + "learning_rate": 2.5729658992047655e-07, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 10798, + "text_loss": 0.56640625 + }, + { + "epoch": 0.9, + "learning_rate": 2.568833531782511e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10799, + "text_loss": 0.6171875 + }, + { + "epoch": 0.9, + "learning_rate": 2.5647043979722066e-07, + "loss": 0.489, + "regression_loss": 0.0, + "step": 10800, + "text_loss": 0.453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.560578498055355e-07, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 10801, + "text_loss": 0.33203125 + }, + { + "epoch": 0.9, + "learning_rate": 2.556455832313248e-07, + "loss": 0.3971, + "regression_loss": 0.0, + "step": 10802, + "text_loss": 0.44140625 + }, + { + "epoch": 0.9, + "learning_rate": 2.5523364010269503e-07, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 10803, + "text_loss": 0.439453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.548220204477281e-07, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 10804, + "text_loss": 0.498046875 + }, + { + "epoch": 0.9, + "learning_rate": 2.544107242944888e-07, + "loss": 0.48, + "regression_loss": 0.0, + "step": 10805, + "text_loss": 0.4453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.539997516710152e-07, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 10806, + "text_loss": 0.55078125 + }, + { + "epoch": 0.9, + "learning_rate": 2.5358910260532665e-07, + "loss": 0.4814, + "regression_loss": 0.0, + "step": 10807, + "text_loss": 0.3125 + }, + { + "epoch": 0.9, + "learning_rate": 2.531787771254185e-07, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 10808, + "text_loss": 0.75 + }, + { + "epoch": 0.9, + "learning_rate": 2.5276877525926504e-07, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 10809, + "text_loss": 0.91015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.523590970348166e-07, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 10810, + "text_loss": 0.53125 + }, + { + "epoch": 0.9, + "learning_rate": 2.5194974248000435e-07, + "loss": 0.5063, + "regression_loss": 0.0, + "step": 10811, + "text_loss": 0.55859375 + }, + { + "epoch": 0.9, + "learning_rate": 2.5154071162273695e-07, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 10812, + "text_loss": 0.419921875 + }, + { + "epoch": 0.9, + "learning_rate": 2.511320044908977e-07, + "loss": 0.433, + "regression_loss": 0.0, + "step": 10813, + "text_loss": 0.54296875 + }, + { + "epoch": 0.9, + "learning_rate": 2.5072362111235094e-07, + "loss": 0.413, + "regression_loss": 0.0, + "step": 10814, + "text_loss": 0.5859375 + }, + { + "epoch": 0.9, + "learning_rate": 2.5031556151493883e-07, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 10815, + "text_loss": 0.470703125 + }, + { + "epoch": 0.9, + "learning_rate": 2.4990782572647977e-07, + "loss": 0.5239, + "regression_loss": 0.0, + "step": 10816, + "text_loss": 0.458984375 + }, + { + "epoch": 0.9, + "learning_rate": 2.4950041377477195e-07, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 10817, + "text_loss": 0.5 + }, + { + "epoch": 0.9, + "learning_rate": 2.49093325687591e-07, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 10818, + "text_loss": 0.55078125 + }, + { + "epoch": 0.9, + "learning_rate": 2.486865614926887e-07, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 10819, + "text_loss": 0.310546875 + }, + { + "epoch": 0.9, + "learning_rate": 2.4828012121779654e-07, + "loss": 0.449, + "regression_loss": 0.0, + "step": 10820, + "text_loss": 0.197265625 + }, + { + "epoch": 0.9, + "learning_rate": 2.478740048906236e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 10821, + "text_loss": 0.37890625 + }, + { + "epoch": 0.9, + "learning_rate": 2.474682125388578e-07, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 10822, + "text_loss": 0.54296875 + }, + { + "epoch": 0.9, + "learning_rate": 2.4706274419016185e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 10823, + "text_loss": 0.38671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.466575998721815e-07, + "loss": 0.4594, + "regression_loss": 0.0, + "step": 10824, + "text_loss": 0.484375 + }, + { + "epoch": 0.9, + "learning_rate": 2.462527796125347e-07, + "loss": 0.5265, + "regression_loss": 0.0, + "step": 10825, + "text_loss": 0.451171875 + }, + { + "epoch": 0.9, + "learning_rate": 2.458482834388204e-07, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 10826, + "text_loss": 0.380859375 + }, + { + "epoch": 0.9, + "learning_rate": 2.45444111378616e-07, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 10827, + "text_loss": 0.5390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.4504026345947676e-07, + "loss": 0.478, + "regression_loss": 0.0, + "step": 10828, + "text_loss": 0.63671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.4463673970893166e-07, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 10829, + "text_loss": 0.703125 + }, + { + "epoch": 0.9, + "learning_rate": 2.442335401544943e-07, + "loss": 0.4301, + "regression_loss": 0.0, + "step": 10830, + "text_loss": 0.46484375 + }, + { + "epoch": 0.9, + "learning_rate": 2.4383066482365045e-07, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 10831, + "text_loss": 0.43359375 + }, + { + "epoch": 0.9, + "learning_rate": 2.4342811374386766e-07, + "loss": 0.522, + "regression_loss": 0.0, + "step": 10832, + "text_loss": 0.6796875 + }, + { + "epoch": 0.9, + "learning_rate": 2.4302588694258835e-07, + "loss": 0.4333, + "regression_loss": 0.0, + "step": 10833, + "text_loss": 0.28125 + }, + { + "epoch": 0.9, + "learning_rate": 2.4262398444723613e-07, + "loss": 0.5244, + "regression_loss": 0.0, + "step": 10834, + "text_loss": 0.30078125 + }, + { + "epoch": 0.9, + "learning_rate": 2.4222240628520743e-07, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 10835, + "text_loss": 0.40625 + }, + { + "epoch": 0.9, + "learning_rate": 2.418211524838837e-07, + "loss": 0.5715, + "regression_loss": 0.0, + "step": 10836, + "text_loss": 0.416015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.4142022307061805e-07, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 10837, + "text_loss": 0.54296875 + }, + { + "epoch": 0.9, + "learning_rate": 2.410196180727437e-07, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 10838, + "text_loss": 0.5390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.406193375175725e-07, + "loss": 0.5327, + "regression_loss": 0.0, + "step": 10839, + "text_loss": 0.5546875 + }, + { + "epoch": 0.9, + "learning_rate": 2.4021938143239443e-07, + "loss": 0.4351, + "regression_loss": 0.0, + "step": 10840, + "text_loss": 0.58203125 + }, + { + "epoch": 0.9, + "learning_rate": 2.398197498444743e-07, + "loss": 0.4401, + "regression_loss": 0.0, + "step": 10841, + "text_loss": 0.404296875 + }, + { + "epoch": 0.9, + "learning_rate": 2.3942044278105816e-07, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 10842, + "text_loss": 0.58203125 + }, + { + "epoch": 0.9, + "learning_rate": 2.39021460269368e-07, + "loss": 0.4369, + "regression_loss": 0.0, + "step": 10843, + "text_loss": 0.5625 + }, + { + "epoch": 0.9, + "learning_rate": 2.386228023366055e-07, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 10844, + "text_loss": 0.6953125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3822446900994833e-07, + "loss": 0.6016, + "regression_loss": 0.0, + "step": 10845, + "text_loss": 0.439453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.378264603165531e-07, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 10846, + "text_loss": 1.1015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.3742877628355366e-07, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 10847, + "text_loss": 0.435546875 + }, + { + "epoch": 0.9, + "learning_rate": 2.3703141693806276e-07, + "loss": 0.3966, + "regression_loss": 0.0, + "step": 10848, + "text_loss": 0.51953125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3663438230716928e-07, + "loss": 0.499, + "regression_loss": 0.0, + "step": 10849, + "text_loss": 0.7578125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3623767241794215e-07, + "loss": 0.4001, + "regression_loss": 0.0, + "step": 10850, + "text_loss": 0.5234375 + }, + { + "epoch": 0.9, + "learning_rate": 2.3584128729742472e-07, + "loss": 0.4041, + "regression_loss": 0.0, + "step": 10851, + "text_loss": 0.5390625 + }, + { + "epoch": 0.9, + "learning_rate": 2.3544522697264428e-07, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 10852, + "text_loss": 0.6015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.3504949147059862e-07, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 10853, + "text_loss": 0.447265625 + }, + { + "epoch": 0.9, + "learning_rate": 2.3465408081826845e-07, + "loss": 0.4277, + "regression_loss": 0.0, + "step": 10854, + "text_loss": 0.373046875 + }, + { + "epoch": 0.9, + "learning_rate": 2.3425899504261107e-07, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 10855, + "text_loss": 0.5703125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3386423417056158e-07, + "loss": 0.5354, + "regression_loss": 0.0, + "step": 10856, + "text_loss": 0.609375 + }, + { + "epoch": 0.9, + "learning_rate": 2.3346979822903071e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 10857, + "text_loss": 0.37109375 + }, + { + "epoch": 0.9, + "learning_rate": 2.3307568724491136e-07, + "loss": 0.4578, + "regression_loss": 0.0, + "step": 10858, + "text_loss": 0.376953125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3268190124507207e-07, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 10859, + "text_loss": 0.388671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.3228844025635632e-07, + "loss": 0.411, + "regression_loss": 0.0, + "step": 10860, + "text_loss": 0.298828125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3189530430559158e-07, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 10861, + "text_loss": 0.298828125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3150249341957808e-07, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 10862, + "text_loss": 0.4453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.3111000762509606e-07, + "loss": 0.3619, + "regression_loss": 0.0, + "step": 10863, + "text_loss": 0.5703125 + }, + { + "epoch": 0.9, + "learning_rate": 2.30717846948903e-07, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 10864, + "text_loss": 0.484375 + }, + { + "epoch": 0.9, + "learning_rate": 2.303260114177347e-07, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 10865, + "text_loss": 0.6015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.2993450105830429e-07, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 10866, + "text_loss": 0.7578125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2954331589730206e-07, + "loss": 0.4861, + "regression_loss": 0.0, + "step": 10867, + "text_loss": 0.62109375 + }, + { + "epoch": 0.9, + "learning_rate": 2.2915245596139835e-07, + "loss": 0.519, + "regression_loss": 0.0, + "step": 10868, + "text_loss": 0.3671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2876192127723963e-07, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 10869, + "text_loss": 0.40625 + }, + { + "epoch": 0.9, + "learning_rate": 2.2837171187145014e-07, + "loss": 0.5193, + "regression_loss": 0.0, + "step": 10870, + "text_loss": 0.416015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.2798182777063305e-07, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 10871, + "text_loss": 0.439453125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2759226900136765e-07, + "loss": 0.4506, + "regression_loss": 0.0, + "step": 10872, + "text_loss": 0.51953125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2720303559021274e-07, + "loss": 0.5247, + "regression_loss": 0.0, + "step": 10873, + "text_loss": 0.41015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.268141275637037e-07, + "loss": 0.6592, + "regression_loss": 0.0, + "step": 10874, + "text_loss": 0.80078125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2642554494835546e-07, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 10875, + "text_loss": 0.58203125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2603728777065737e-07, + "loss": 0.4639, + "regression_loss": 0.0, + "step": 10876, + "text_loss": 0.458984375 + }, + { + "epoch": 0.9, + "learning_rate": 2.256493560570816e-07, + "loss": 0.4333, + "regression_loss": 0.0, + "step": 10877, + "text_loss": 0.34375 + }, + { + "epoch": 0.9, + "learning_rate": 2.2526174983407312e-07, + "loss": 0.3751, + "regression_loss": 0.0, + "step": 10878, + "text_loss": 0.388671875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2487446912805745e-07, + "loss": 0.5115, + "regression_loss": 0.0, + "step": 10879, + "text_loss": 0.46875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2448751396543788e-07, + "loss": 0.5125, + "regression_loss": 0.0, + "step": 10880, + "text_loss": 0.359375 + }, + { + "epoch": 0.9, + "learning_rate": 2.2410088437259558e-07, + "loss": 0.3811, + "regression_loss": 0.0, + "step": 10881, + "text_loss": 0.3984375 + }, + { + "epoch": 0.9, + "learning_rate": 2.2371458037588667e-07, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 10882, + "text_loss": 0.6171875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2332860200165009e-07, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 10883, + "text_loss": 0.60546875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2294294927619808e-07, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 10884, + "text_loss": 0.66015625 + }, + { + "epoch": 0.9, + "learning_rate": 2.22557622225823e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 10885, + "text_loss": 0.6953125 + }, + { + "epoch": 0.9, + "learning_rate": 2.2217262087679436e-07, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 10886, + "text_loss": 0.65234375 + }, + { + "epoch": 0.9, + "learning_rate": 2.2178794525536006e-07, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 10887, + "text_loss": 0.482421875 + }, + { + "epoch": 0.9, + "learning_rate": 2.2140359538774357e-07, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 10888, + "text_loss": 0.484375 + }, + { + "epoch": 0.91, + "learning_rate": 2.2101957130015116e-07, + "loss": 0.4856, + "regression_loss": 0.0, + "step": 10889, + "text_loss": 0.41015625 + }, + { + "epoch": 0.91, + "learning_rate": 2.2063587301876023e-07, + "loss": 0.563, + "regression_loss": 0.0, + "step": 10890, + "text_loss": 0.5546875 + }, + { + "epoch": 0.91, + "learning_rate": 2.202525005697309e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 10891, + "text_loss": 0.41796875 + }, + { + "epoch": 0.91, + "learning_rate": 2.1986945397919957e-07, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 10892, + "text_loss": 0.2890625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1948673327328086e-07, + "loss": 0.4491, + "regression_loss": 0.0, + "step": 10893, + "text_loss": 0.55078125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1910433847806445e-07, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 10894, + "text_loss": 0.4453125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1872226961962338e-07, + "loss": 0.5515, + "regression_loss": 0.0, + "step": 10895, + "text_loss": 0.49609375 + }, + { + "epoch": 0.91, + "learning_rate": 2.1834052672400185e-07, + "loss": 0.5, + "regression_loss": 0.0, + "step": 10896, + "text_loss": 0.515625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1795910981722734e-07, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 10897, + "text_loss": 0.56640625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1757801892530184e-07, + "loss": 0.4497, + "regression_loss": 0.0, + "step": 10898, + "text_loss": 0.51171875 + }, + { + "epoch": 0.91, + "learning_rate": 2.1719725407420734e-07, + "loss": 0.4132, + "regression_loss": 0.0, + "step": 10899, + "text_loss": 0.26171875 + }, + { + "epoch": 0.91, + "learning_rate": 2.1681681528990083e-07, + "loss": 0.5002, + "regression_loss": 0.0, + "step": 10900, + "text_loss": 0.3515625 + }, + { + "epoch": 0.91, + "learning_rate": 2.164367025983194e-07, + "loss": 0.3795, + "regression_loss": 0.0, + "step": 10901, + "text_loss": 0.439453125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1605691602537727e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 10902, + "text_loss": 0.53125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1567745559696597e-07, + "loss": 0.399, + "regression_loss": 0.0, + "step": 10903, + "text_loss": 0.388671875 + }, + { + "epoch": 0.91, + "learning_rate": 2.152983213389559e-07, + "loss": 0.3708, + "regression_loss": 0.0, + "step": 10904, + "text_loss": 0.390625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1491951327719362e-07, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 10905, + "text_loss": 0.58203125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1454103143750516e-07, + "loss": 0.519, + "regression_loss": 0.0, + "step": 10906, + "text_loss": 0.73828125 + }, + { + "epoch": 0.91, + "learning_rate": 2.1416287584569207e-07, + "loss": 0.5166, + "regression_loss": 0.0, + "step": 10907, + "text_loss": 0.53125 + }, + { + "epoch": 0.91, + "learning_rate": 2.137850465275365e-07, + "loss": 0.4067, + "regression_loss": 0.0, + "step": 10908, + "text_loss": 0.49609375 + }, + { + "epoch": 0.91, + "learning_rate": 2.134075435087962e-07, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 10909, + "text_loss": 0.53515625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1303036681520783e-07, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 10910, + "text_loss": 0.419921875 + }, + { + "epoch": 0.91, + "learning_rate": 2.1265351647248468e-07, + "loss": 0.5056, + "regression_loss": 0.0, + "step": 10911, + "text_loss": 0.6015625 + }, + { + "epoch": 0.91, + "learning_rate": 2.122769925063195e-07, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 10912, + "text_loss": 0.39453125 + }, + { + "epoch": 0.91, + "learning_rate": 2.119007949423796e-07, + "loss": 0.4281, + "regression_loss": 0.0, + "step": 10913, + "text_loss": 0.49609375 + }, + { + "epoch": 0.91, + "learning_rate": 2.115249238063155e-07, + "loss": 0.4211, + "regression_loss": 0.0, + "step": 10914, + "text_loss": 0.44921875 + }, + { + "epoch": 0.91, + "learning_rate": 2.11149379123749e-07, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 10915, + "text_loss": 0.494140625 + }, + { + "epoch": 0.91, + "learning_rate": 2.1077416092028457e-07, + "loss": 0.4182, + "regression_loss": 0.0, + "step": 10916, + "text_loss": 0.46875 + }, + { + "epoch": 0.91, + "learning_rate": 2.103992692215018e-07, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 10917, + "text_loss": 0.55859375 + }, + { + "epoch": 0.91, + "learning_rate": 2.1002470405296017e-07, + "loss": 0.4644, + "regression_loss": 0.0, + "step": 10918, + "text_loss": 0.365234375 + }, + { + "epoch": 0.91, + "learning_rate": 2.096504654401943e-07, + "loss": 0.531, + "regression_loss": 0.0, + "step": 10919, + "text_loss": 0.6171875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0927655340871766e-07, + "loss": 0.4504, + "regression_loss": 0.0, + "step": 10920, + "text_loss": 0.46484375 + }, + { + "epoch": 0.91, + "learning_rate": 2.0890296798402265e-07, + "loss": 0.4557, + "regression_loss": 0.0, + "step": 10921, + "text_loss": 0.7265625 + }, + { + "epoch": 0.91, + "learning_rate": 2.085297091915778e-07, + "loss": 0.512, + "regression_loss": 0.0, + "step": 10922, + "text_loss": 0.419921875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0815677705682992e-07, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 10923, + "text_loss": 0.5859375 + }, + { + "epoch": 0.91, + "learning_rate": 2.0778417160520426e-07, + "loss": 0.4104, + "regression_loss": 0.0, + "step": 10924, + "text_loss": 0.310546875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0741189286210273e-07, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 10925, + "text_loss": 0.56640625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0703994085290447e-07, + "loss": 0.457, + "regression_loss": 0.0, + "step": 10926, + "text_loss": 0.6015625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0666831560296862e-07, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 10927, + "text_loss": 0.53125 + }, + { + "epoch": 0.91, + "learning_rate": 2.062970171376305e-07, + "loss": 0.4149, + "regression_loss": 0.0, + "step": 10928, + "text_loss": 0.419921875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0592604548220208e-07, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 10929, + "text_loss": 0.67578125 + }, + { + "epoch": 0.91, + "learning_rate": 2.0555540066197643e-07, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 10930, + "text_loss": 0.51171875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0518508270222004e-07, + "loss": 0.423, + "regression_loss": 0.0, + "step": 10931, + "text_loss": 0.48828125 + }, + { + "epoch": 0.91, + "learning_rate": 2.0481509162818048e-07, + "loss": 0.5479, + "regression_loss": 0.0, + "step": 10932, + "text_loss": 0.5390625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0444542746508145e-07, + "loss": 0.4458, + "regression_loss": 0.0, + "step": 10933, + "text_loss": 0.1640625 + }, + { + "epoch": 0.91, + "learning_rate": 2.04076090238125e-07, + "loss": 0.5984, + "regression_loss": 0.0, + "step": 10934, + "text_loss": 0.57421875 + }, + { + "epoch": 0.91, + "learning_rate": 2.037070799724894e-07, + "loss": 0.5469, + "regression_loss": 0.0, + "step": 10935, + "text_loss": 0.54296875 + }, + { + "epoch": 0.91, + "learning_rate": 2.0333839669333445e-07, + "loss": 0.488, + "regression_loss": 0.0, + "step": 10936, + "text_loss": 0.43359375 + }, + { + "epoch": 0.91, + "learning_rate": 2.0297004042579283e-07, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 10937, + "text_loss": 0.333984375 + }, + { + "epoch": 0.91, + "learning_rate": 2.026020111949778e-07, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 10938, + "text_loss": 0.40625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0223430902597984e-07, + "loss": 0.532, + "regression_loss": 0.0, + "step": 10939, + "text_loss": 0.5 + }, + { + "epoch": 0.91, + "learning_rate": 2.0186693394386725e-07, + "loss": 0.561, + "regression_loss": 0.0, + "step": 10940, + "text_loss": 0.703125 + }, + { + "epoch": 0.91, + "learning_rate": 2.0149988597368441e-07, + "loss": 0.4922, + "regression_loss": 0.0, + "step": 10941, + "text_loss": 0.61328125 + }, + { + "epoch": 0.91, + "learning_rate": 2.0113316514045743e-07, + "loss": 0.4302, + "regression_loss": 0.0, + "step": 10942, + "text_loss": 0.349609375 + }, + { + "epoch": 0.91, + "learning_rate": 2.0076677146918466e-07, + "loss": 0.4655, + "regression_loss": 0.0, + "step": 10943, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.91, + "learning_rate": 2.004007049848461e-07, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 10944, + "text_loss": 0.4765625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0003496571239845e-07, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 10945, + "text_loss": 0.53515625 + }, + { + "epoch": 0.91, + "learning_rate": 1.996695536767762e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 10946, + "text_loss": 0.55078125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9930446890288945e-07, + "loss": 0.4968, + "regression_loss": 0.0, + "step": 10947, + "text_loss": 0.490234375 + }, + { + "epoch": 0.91, + "learning_rate": 1.989397114156305e-07, + "loss": 0.4503, + "regression_loss": 0.0, + "step": 10948, + "text_loss": 0.52734375 + }, + { + "epoch": 0.91, + "learning_rate": 1.9857528123986446e-07, + "loss": 0.4106, + "regression_loss": 0.0, + "step": 10949, + "text_loss": 0.234375 + }, + { + "epoch": 0.91, + "learning_rate": 1.9821117840043703e-07, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 10950, + "text_loss": 0.447265625 + }, + { + "epoch": 0.91, + "learning_rate": 1.9784740292217108e-07, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 10951, + "text_loss": 0.2734375 + }, + { + "epoch": 0.91, + "learning_rate": 1.974839548298668e-07, + "loss": 0.4537, + "regression_loss": 0.0, + "step": 10952, + "text_loss": 0.439453125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9712083414830275e-07, + "loss": 0.4539, + "regression_loss": 0.0, + "step": 10953, + "text_loss": 0.5390625 + }, + { + "epoch": 0.91, + "learning_rate": 1.9675804090223237e-07, + "loss": 0.5647, + "regression_loss": 0.0, + "step": 10954, + "text_loss": 0.65234375 + }, + { + "epoch": 0.91, + "learning_rate": 1.9639557511639262e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 10955, + "text_loss": 0.37109375 + }, + { + "epoch": 0.91, + "learning_rate": 1.9603343681549146e-07, + "loss": 0.3977, + "regression_loss": 0.0, + "step": 10956, + "text_loss": 0.35546875 + }, + { + "epoch": 0.91, + "learning_rate": 1.9567162602421919e-07, + "loss": 0.5164, + "regression_loss": 0.0, + "step": 10957, + "text_loss": 0.52734375 + }, + { + "epoch": 0.91, + "learning_rate": 1.9531014276724158e-07, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 10958, + "text_loss": 0.2734375 + }, + { + "epoch": 0.91, + "learning_rate": 1.949489870692034e-07, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 10959, + "text_loss": 0.47265625 + }, + { + "epoch": 0.91, + "learning_rate": 1.9458815895472494e-07, + "loss": 0.439, + "regression_loss": 0.0, + "step": 10960, + "text_loss": 0.4453125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9422765844840764e-07, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 10961, + "text_loss": 0.8515625 + }, + { + "epoch": 0.91, + "learning_rate": 1.9386748557482682e-07, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 10962, + "text_loss": 0.53125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9350764035853786e-07, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 10963, + "text_loss": 0.431640625 + }, + { + "epoch": 0.91, + "learning_rate": 1.931481228240728e-07, + "loss": 0.4332, + "regression_loss": 0.0, + "step": 10964, + "text_loss": 0.455078125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9278893299594315e-07, + "loss": 0.4752, + "regression_loss": 0.0, + "step": 10965, + "text_loss": 0.53515625 + }, + { + "epoch": 0.91, + "learning_rate": 1.924300708986343e-07, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 10966, + "text_loss": 0.5703125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9207153655661337e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 10967, + "text_loss": 0.85546875 + }, + { + "epoch": 0.91, + "learning_rate": 1.9171332999432302e-07, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 10968, + "text_loss": 0.416015625 + }, + { + "epoch": 0.91, + "learning_rate": 1.9135545123618315e-07, + "loss": 0.541, + "regression_loss": 0.0, + "step": 10969, + "text_loss": 0.7578125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9099790030659317e-07, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 10970, + "text_loss": 0.25390625 + }, + { + "epoch": 0.91, + "learning_rate": 1.906406772299285e-07, + "loss": 0.3904, + "regression_loss": 0.0, + "step": 10971, + "text_loss": 0.3828125 + }, + { + "epoch": 0.91, + "learning_rate": 1.9028378203054198e-07, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 10972, + "text_loss": 0.359375 + }, + { + "epoch": 0.91, + "learning_rate": 1.8992721473276687e-07, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 10973, + "text_loss": 0.609375 + }, + { + "epoch": 0.91, + "learning_rate": 1.895709753609104e-07, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 10974, + "text_loss": 0.53125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8921506393925925e-07, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 10975, + "text_loss": 0.6015625 + }, + { + "epoch": 0.91, + "learning_rate": 1.8885948049207847e-07, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 10976, + "text_loss": 0.455078125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8850422504360976e-07, + "loss": 0.459, + "regression_loss": 0.0, + "step": 10977, + "text_loss": 0.263671875 + }, + { + "epoch": 0.91, + "learning_rate": 1.881492976180721e-07, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 10978, + "text_loss": 0.453125 + }, + { + "epoch": 0.91, + "learning_rate": 1.877946982396628e-07, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 10979, + "text_loss": 0.5390625 + }, + { + "epoch": 0.91, + "learning_rate": 1.8744042693255638e-07, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 10980, + "text_loss": 0.6484375 + }, + { + "epoch": 0.91, + "learning_rate": 1.870864837209052e-07, + "loss": 0.5374, + "regression_loss": 0.0, + "step": 10981, + "text_loss": 0.28515625 + }, + { + "epoch": 0.91, + "learning_rate": 1.8673286862884e-07, + "loss": 0.5381, + "regression_loss": 0.0, + "step": 10982, + "text_loss": 0.640625 + }, + { + "epoch": 0.91, + "learning_rate": 1.863795816804681e-07, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 10983, + "text_loss": 0.359375 + }, + { + "epoch": 0.91, + "learning_rate": 1.860266228998747e-07, + "loss": 0.4138, + "regression_loss": 0.0, + "step": 10984, + "text_loss": 0.36328125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8567399231112227e-07, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 10985, + "text_loss": 0.443359375 + }, + { + "epoch": 0.91, + "learning_rate": 1.853216899382515e-07, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 10986, + "text_loss": 0.52734375 + }, + { + "epoch": 0.91, + "learning_rate": 1.8496971580528155e-07, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 10987, + "text_loss": 0.482421875 + }, + { + "epoch": 0.91, + "learning_rate": 1.8461806993620657e-07, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 10988, + "text_loss": 0.369140625 + }, + { + "epoch": 0.91, + "learning_rate": 1.842667523550018e-07, + "loss": 0.4502, + "regression_loss": 0.0, + "step": 10989, + "text_loss": 0.5703125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8391576308561698e-07, + "loss": 0.5923, + "regression_loss": 0.0, + "step": 10990, + "text_loss": 0.48828125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8356510215198076e-07, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 10991, + "text_loss": 0.380859375 + }, + { + "epoch": 0.91, + "learning_rate": 1.8321476957799956e-07, + "loss": 0.3497, + "regression_loss": 0.0, + "step": 10992, + "text_loss": 0.2578125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8286476538755816e-07, + "loss": 0.55, + "regression_loss": 0.0, + "step": 10993, + "text_loss": 0.640625 + }, + { + "epoch": 0.91, + "learning_rate": 1.8251508960451582e-07, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 10994, + "text_loss": 0.390625 + }, + { + "epoch": 0.91, + "learning_rate": 1.8216574225271454e-07, + "loss": 0.5566, + "regression_loss": 0.0, + "step": 10995, + "text_loss": 0.57421875 + }, + { + "epoch": 0.91, + "learning_rate": 1.8181672335596866e-07, + "loss": 0.4375, + "regression_loss": 0.0, + "step": 10996, + "text_loss": 0.359375 + }, + { + "epoch": 0.91, + "learning_rate": 1.814680329380736e-07, + "loss": 0.5232, + "regression_loss": 0.0, + "step": 10997, + "text_loss": 0.3984375 + }, + { + "epoch": 0.91, + "learning_rate": 1.8111967102280082e-07, + "loss": 0.4248, + "regression_loss": 0.0, + "step": 10998, + "text_loss": 0.423828125 + }, + { + "epoch": 0.91, + "learning_rate": 1.8077163763390027e-07, + "loss": 0.5627, + "regression_loss": 0.0, + "step": 10999, + "text_loss": 0.6171875 + }, + { + "epoch": 0.91, + "learning_rate": 1.8042393279509852e-07, + "loss": 0.4149, + "regression_loss": 0.0, + "step": 11000, + "text_loss": 0.54296875 + }, + { + "epoch": 0.91, + "learning_rate": 1.8007655653010102e-07, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 11001, + "text_loss": 0.54296875 + }, + { + "epoch": 0.91, + "learning_rate": 1.7972950886259e-07, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 11002, + "text_loss": 0.359375 + }, + { + "epoch": 0.91, + "learning_rate": 1.7938278981622426e-07, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 11003, + "text_loss": 0.255859375 + }, + { + "epoch": 0.91, + "learning_rate": 1.7903639941464267e-07, + "loss": 0.5703, + "regression_loss": 0.0, + "step": 11004, + "text_loss": 0.40625 + }, + { + "epoch": 0.91, + "learning_rate": 1.7869033768145915e-07, + "loss": 0.4998, + "regression_loss": 0.0, + "step": 11005, + "text_loss": 0.71484375 + }, + { + "epoch": 0.91, + "learning_rate": 1.783446046402676e-07, + "loss": 0.4916, + "regression_loss": 0.0, + "step": 11006, + "text_loss": 0.228515625 + }, + { + "epoch": 0.91, + "learning_rate": 1.7799920031463747e-07, + "loss": 0.3958, + "regression_loss": 0.0, + "step": 11007, + "text_loss": 0.365234375 + }, + { + "epoch": 0.91, + "learning_rate": 1.776541247281177e-07, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 11008, + "text_loss": 0.341796875 + }, + { + "epoch": 0.91, + "learning_rate": 1.7730937790423174e-07, + "loss": 0.542, + "regression_loss": 0.0, + "step": 11009, + "text_loss": 0.345703125 + }, + { + "epoch": 0.92, + "learning_rate": 1.7696495986648464e-07, + "loss": 0.49, + "regression_loss": 0.0, + "step": 11010, + "text_loss": 0.447265625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7662087063835599e-07, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 11011, + "text_loss": 0.333984375 + }, + { + "epoch": 0.92, + "learning_rate": 1.7627711024330475e-07, + "loss": 0.51, + "regression_loss": 0.0, + "step": 11012, + "text_loss": 0.392578125 + }, + { + "epoch": 0.92, + "learning_rate": 1.7593367870476552e-07, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 11013, + "text_loss": 0.4609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.7559057604615347e-07, + "loss": 0.457, + "regression_loss": 0.0, + "step": 11014, + "text_loss": 0.4609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.752478022908577e-07, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 11015, + "text_loss": 0.32421875 + }, + { + "epoch": 0.92, + "learning_rate": 1.7490535746224778e-07, + "loss": 0.4657, + "regression_loss": 0.0, + "step": 11016, + "text_loss": 0.55078125 + }, + { + "epoch": 0.92, + "learning_rate": 1.745632415836701e-07, + "loss": 0.4038, + "regression_loss": 0.0, + "step": 11017, + "text_loss": 0.2431640625 + }, + { + "epoch": 0.92, + "learning_rate": 1.742214546784482e-07, + "loss": 0.4799, + "regression_loss": 0.0, + "step": 11018, + "text_loss": 0.63671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.7387999676988178e-07, + "loss": 0.4503, + "regression_loss": 0.0, + "step": 11019, + "text_loss": 0.5703125 + }, + { + "epoch": 0.92, + "learning_rate": 1.7353886788125218e-07, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 11020, + "text_loss": 0.404296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.7319806803581363e-07, + "loss": 0.4572, + "regression_loss": 0.0, + "step": 11021, + "text_loss": 0.51171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.728575972568014e-07, + "loss": 0.521, + "regression_loss": 0.0, + "step": 11022, + "text_loss": 0.70703125 + }, + { + "epoch": 0.92, + "learning_rate": 1.7251745556742694e-07, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 11023, + "text_loss": 0.6875 + }, + { + "epoch": 0.92, + "learning_rate": 1.7217764299087946e-07, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 11024, + "text_loss": 0.24609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.7183815955032435e-07, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 11025, + "text_loss": 0.3203125 + }, + { + "epoch": 0.92, + "learning_rate": 1.714990052689075e-07, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 11026, + "text_loss": 0.51953125 + }, + { + "epoch": 0.92, + "learning_rate": 1.7116018016974934e-07, + "loss": 0.4508, + "regression_loss": 0.0, + "step": 11027, + "text_loss": 0.6171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082168427595024e-07, + "loss": 0.5326, + "regression_loss": 0.0, + "step": 11028, + "text_loss": 0.73046875 + }, + { + "epoch": 0.92, + "learning_rate": 1.704835176105868e-07, + "loss": 0.3948, + "regression_loss": 0.0, + "step": 11029, + "text_loss": 0.515625 + }, + { + "epoch": 0.92, + "learning_rate": 1.701456801967133e-07, + "loss": 0.3644, + "regression_loss": 0.0, + "step": 11030, + "text_loss": 0.314453125 + }, + { + "epoch": 0.92, + "learning_rate": 1.6980817205736133e-07, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 11031, + "text_loss": 0.37890625 + }, + { + "epoch": 0.92, + "learning_rate": 1.694709932155414e-07, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 11032, + "text_loss": 0.443359375 + }, + { + "epoch": 0.92, + "learning_rate": 1.691341436942401e-07, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 11033, + "text_loss": 0.56640625 + }, + { + "epoch": 0.92, + "learning_rate": 1.687976235164218e-07, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 11034, + "text_loss": 0.3515625 + }, + { + "epoch": 0.92, + "learning_rate": 1.6846143270502935e-07, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 11035, + "text_loss": 0.62109375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6812557128298268e-07, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 11036, + "text_loss": 0.490234375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6779003927317795e-07, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 11037, + "text_loss": 0.61328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.674548366984907e-07, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 11038, + "text_loss": 0.625 + }, + { + "epoch": 0.92, + "learning_rate": 1.6711996358177383e-07, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 11039, + "text_loss": 0.330078125 + }, + { + "epoch": 0.92, + "learning_rate": 1.6678541994585629e-07, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 11040, + "text_loss": 0.53515625 + }, + { + "epoch": 0.92, + "learning_rate": 1.664512058135459e-07, + "loss": 0.3782, + "regression_loss": 0.0, + "step": 11041, + "text_loss": 0.310546875 + }, + { + "epoch": 0.92, + "learning_rate": 1.661173212076278e-07, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 11042, + "text_loss": 0.3984375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6578376615086435e-07, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 11043, + "text_loss": 0.353515625 + }, + { + "epoch": 0.92, + "learning_rate": 1.6545054066599563e-07, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 11044, + "text_loss": 0.283203125 + }, + { + "epoch": 0.92, + "learning_rate": 1.6511764477573966e-07, + "loss": 0.5906, + "regression_loss": 0.0, + "step": 11045, + "text_loss": 0.427734375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6478507850279102e-07, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 11046, + "text_loss": 0.34375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6445284186982213e-07, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 11047, + "text_loss": 0.52734375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6412093489948378e-07, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 11048, + "text_loss": 0.5546875 + }, + { + "epoch": 0.92, + "learning_rate": 1.6378935761440396e-07, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 11049, + "text_loss": 0.63671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.6345811003718682e-07, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 11050, + "text_loss": 0.34375 + }, + { + "epoch": 0.92, + "learning_rate": 1.631271921904165e-07, + "loss": 0.501, + "regression_loss": 0.0, + "step": 11051, + "text_loss": 0.47265625 + }, + { + "epoch": 0.92, + "learning_rate": 1.6279660409665167e-07, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 11052, + "text_loss": 0.474609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.624663457784309e-07, + "loss": 0.5886, + "regression_loss": 0.0, + "step": 11053, + "text_loss": 0.76953125 + }, + { + "epoch": 0.92, + "learning_rate": 1.6213641725826955e-07, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 11054, + "text_loss": 0.470703125 + }, + { + "epoch": 0.92, + "learning_rate": 1.618068185586613e-07, + "loss": 0.3755, + "regression_loss": 0.0, + "step": 11055, + "text_loss": 0.52734375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6147754970207484e-07, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 11056, + "text_loss": 0.54296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.6114861071095832e-07, + "loss": 0.439, + "regression_loss": 0.0, + "step": 11057, + "text_loss": 0.330078125 + }, + { + "epoch": 0.92, + "learning_rate": 1.6082000160773826e-07, + "loss": 0.4675, + "regression_loss": 0.0, + "step": 11058, + "text_loss": 0.59375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6049172241481615e-07, + "loss": 0.5302, + "regression_loss": 0.0, + "step": 11059, + "text_loss": 0.54296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.6016377315457353e-07, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 11060, + "text_loss": 0.404296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.598361538493681e-07, + "loss": 0.5825, + "regression_loss": 0.0, + "step": 11061, + "text_loss": 0.416015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5950886452153424e-07, + "loss": 0.5054, + "regression_loss": 0.0, + "step": 11062, + "text_loss": 0.431640625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5918190519338628e-07, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 11063, + "text_loss": 0.63671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5885527588721306e-07, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 11064, + "text_loss": 0.64453125 + }, + { + "epoch": 0.92, + "learning_rate": 1.5852897662528453e-07, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 11065, + "text_loss": 0.478515625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5820300742984352e-07, + "loss": 0.4983, + "regression_loss": 0.0, + "step": 11066, + "text_loss": 0.294921875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5787736832311495e-07, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 11067, + "text_loss": 0.48046875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5755205932729833e-07, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 11068, + "text_loss": 0.298828125 + }, + { + "epoch": 0.92, + "learning_rate": 1.5722708046457204e-07, + "loss": 0.3905, + "regression_loss": 0.0, + "step": 11069, + "text_loss": 0.421875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5690243175709053e-07, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 11070, + "text_loss": 0.333984375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5657811322698834e-07, + "loss": 0.5464, + "regression_loss": 0.0, + "step": 11071, + "text_loss": 0.4140625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5625412489637337e-07, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 11072, + "text_loss": 0.43359375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5593046678733625e-07, + "loss": 0.4691, + "regression_loss": 0.0, + "step": 11073, + "text_loss": 0.267578125 + }, + { + "epoch": 0.92, + "learning_rate": 1.5560713892194046e-07, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 11074, + "text_loss": 0.69140625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5528414132222948e-07, + "loss": 0.5488, + "regression_loss": 0.0, + "step": 11075, + "text_loss": 0.54296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5496147401022343e-07, + "loss": 0.4418, + "regression_loss": 0.0, + "step": 11076, + "text_loss": 0.3671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5463913700792032e-07, + "loss": 0.4686, + "regression_loss": 0.0, + "step": 11077, + "text_loss": 0.66015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5431713033729478e-07, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 11078, + "text_loss": 0.62890625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5399545402030036e-07, + "loss": 0.5645, + "regression_loss": 0.0, + "step": 11079, + "text_loss": 0.498046875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5367410807886673e-07, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 11080, + "text_loss": 0.734375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5335309253490194e-07, + "loss": 0.4662, + "regression_loss": 0.0, + "step": 11081, + "text_loss": 0.58984375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5303240741029123e-07, + "loss": 0.407, + "regression_loss": 0.0, + "step": 11082, + "text_loss": 0.68359375 + }, + { + "epoch": 0.92, + "learning_rate": 1.527120527268977e-07, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 11083, + "text_loss": 0.42578125 + }, + { + "epoch": 0.92, + "learning_rate": 1.5239202850655943e-07, + "loss": 0.3657, + "regression_loss": 0.0, + "step": 11084, + "text_loss": 0.54296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5207233477109619e-07, + "loss": 0.396, + "regression_loss": 0.0, + "step": 11085, + "text_loss": 0.236328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.5175297154230273e-07, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 11086, + "text_loss": 0.4609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5143393884195056e-07, + "loss": 0.543, + "regression_loss": 0.0, + "step": 11087, + "text_loss": 0.65234375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5111523669179007e-07, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 11088, + "text_loss": 0.474609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.5079686511354996e-07, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 11089, + "text_loss": 0.291015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.5047882412893345e-07, + "loss": 0.5476, + "regression_loss": 0.0, + "step": 11090, + "text_loss": 0.66796875 + }, + { + "epoch": 0.92, + "learning_rate": 1.5016111375962373e-07, + "loss": 0.5493, + "regression_loss": 0.0, + "step": 11091, + "text_loss": 0.63671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4984373402728014e-07, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 11092, + "text_loss": 0.55078125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4952668495354094e-07, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 11093, + "text_loss": 0.7578125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4920996656001997e-07, + "loss": 0.4731, + "regression_loss": 0.0, + "step": 11094, + "text_loss": 0.51953125 + }, + { + "epoch": 0.92, + "learning_rate": 1.488935788683099e-07, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 11095, + "text_loss": 0.48828125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4857752189998133e-07, + "loss": 0.447, + "regression_loss": 0.0, + "step": 11096, + "text_loss": 0.36328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.482617956765786e-07, + "loss": 0.4661, + "regression_loss": 0.0, + "step": 11097, + "text_loss": 0.291015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4794640021962957e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 11098, + "text_loss": 0.490234375 + }, + { + "epoch": 0.92, + "learning_rate": 1.4763133555063426e-07, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 11099, + "text_loss": 0.419921875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4731660169107264e-07, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 11100, + "text_loss": 0.91015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4700219866240206e-07, + "loss": 0.4617, + "regression_loss": 0.0, + "step": 11101, + "text_loss": 0.5703125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4668812648605647e-07, + "loss": 0.41, + "regression_loss": 0.0, + "step": 11102, + "text_loss": 0.6328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4637438518344704e-07, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 11103, + "text_loss": 0.451171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4606097477596504e-07, + "loss": 0.5181, + "regression_loss": 0.0, + "step": 11104, + "text_loss": 0.400390625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4574789528497503e-07, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 11105, + "text_loss": 0.61328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4543514673182212e-07, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 11106, + "text_loss": 0.451171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4512272913782766e-07, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 11107, + "text_loss": 0.5859375 + }, + { + "epoch": 0.92, + "learning_rate": 1.4481064252429122e-07, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 11108, + "text_loss": 0.361328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4449888691248804e-07, + "loss": 0.4937, + "regression_loss": 0.0, + "step": 11109, + "text_loss": 0.76171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4418746232367387e-07, + "loss": 0.5419, + "regression_loss": 0.0, + "step": 11110, + "text_loss": 0.36328125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4387636877907785e-07, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 11111, + "text_loss": 0.51171875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4356560629991023e-07, + "loss": 0.4961, + "regression_loss": 0.0, + "step": 11112, + "text_loss": 0.408203125 + }, + { + "epoch": 0.92, + "learning_rate": 1.4325517490735685e-07, + "loss": 0.5325, + "regression_loss": 0.0, + "step": 11113, + "text_loss": 0.279296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4294507462258189e-07, + "loss": 0.4873, + "regression_loss": 0.0, + "step": 11114, + "text_loss": 0.54296875 + }, + { + "epoch": 0.92, + "learning_rate": 1.426353054667251e-07, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 11115, + "text_loss": 0.62109375 + }, + { + "epoch": 0.92, + "learning_rate": 1.4232586746090516e-07, + "loss": 0.5693, + "regression_loss": 0.0, + "step": 11116, + "text_loss": 0.82421875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4201676062621905e-07, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 11117, + "text_loss": 0.6015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4170798498373883e-07, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 11118, + "text_loss": 0.59375 + }, + { + "epoch": 0.92, + "learning_rate": 1.41399540554516e-07, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 11119, + "text_loss": 0.671875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4109142735957927e-07, + "loss": 0.4358, + "regression_loss": 0.0, + "step": 11120, + "text_loss": 0.66015625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4078364541993295e-07, + "loss": 0.4717, + "regression_loss": 0.0, + "step": 11121, + "text_loss": 0.4921875 + }, + { + "epoch": 0.92, + "learning_rate": 1.4047619475656028e-07, + "loss": 0.3995, + "regression_loss": 0.0, + "step": 11122, + "text_loss": 0.400390625 + }, + { + "epoch": 0.92, + "learning_rate": 1.4016907539042168e-07, + "loss": 0.3947, + "regression_loss": 0.0, + "step": 11123, + "text_loss": 0.294921875 + }, + { + "epoch": 0.92, + "learning_rate": 1.39862287342456e-07, + "loss": 0.5015, + "regression_loss": 0.0, + "step": 11124, + "text_loss": 0.341796875 + }, + { + "epoch": 0.92, + "learning_rate": 1.3955583063357702e-07, + "loss": 0.386, + "regression_loss": 0.0, + "step": 11125, + "text_loss": 0.255859375 + }, + { + "epoch": 0.92, + "learning_rate": 1.3924970528467864e-07, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 11126, + "text_loss": 0.640625 + }, + { + "epoch": 0.92, + "learning_rate": 1.3894391131662965e-07, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 11127, + "text_loss": 0.4765625 + }, + { + "epoch": 0.92, + "learning_rate": 1.3863844875027842e-07, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 11128, + "text_loss": 0.349609375 + }, + { + "epoch": 0.92, + "learning_rate": 1.3833331760644887e-07, + "loss": 0.4603, + "regression_loss": 0.0, + "step": 11129, + "text_loss": 0.384765625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3802851790594485e-07, + "loss": 0.5192, + "regression_loss": 0.0, + "step": 11130, + "text_loss": 0.6171875 + }, + { + "epoch": 0.93, + "learning_rate": 1.3772404966954422e-07, + "loss": 0.4724, + "regression_loss": 0.0, + "step": 11131, + "text_loss": 0.431640625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3741991291800594e-07, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 11132, + "text_loss": 0.68359375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3711610767206285e-07, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 11133, + "text_loss": 0.4765625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3681263395242729e-07, + "loss": 0.4236, + "regression_loss": 0.0, + "step": 11134, + "text_loss": 0.55078125 + }, + { + "epoch": 0.93, + "learning_rate": 1.3650949177978878e-07, + "loss": 0.531, + "regression_loss": 0.0, + "step": 11135, + "text_loss": 0.3671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.3620668117481471e-07, + "loss": 0.507, + "regression_loss": 0.0, + "step": 11136, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.93, + "learning_rate": 1.3590420215814692e-07, + "loss": 0.3727, + "regression_loss": 0.0, + "step": 11137, + "text_loss": 0.46484375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3560205475040945e-07, + "loss": 0.436, + "regression_loss": 0.0, + "step": 11138, + "text_loss": 0.462890625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3530023897219968e-07, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 11139, + "text_loss": 0.64453125 + }, + { + "epoch": 0.93, + "learning_rate": 1.3499875484409397e-07, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 11140, + "text_loss": 0.54296875 + }, + { + "epoch": 0.93, + "learning_rate": 1.3469760238664586e-07, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 11141, + "text_loss": 0.259765625 + }, + { + "epoch": 0.93, + "learning_rate": 1.343967816203867e-07, + "loss": 0.449, + "regression_loss": 0.0, + "step": 11142, + "text_loss": 0.30859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3409629256582511e-07, + "loss": 0.5347, + "regression_loss": 0.0, + "step": 11143, + "text_loss": 0.55859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3379613524344638e-07, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 11144, + "text_loss": 0.5390625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3349630967371407e-07, + "loss": 0.3988, + "regression_loss": 0.0, + "step": 11145, + "text_loss": 0.5625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3319681587706857e-07, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 11146, + "text_loss": 0.41015625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3289765387392795e-07, + "loss": 0.499, + "regression_loss": 0.0, + "step": 11147, + "text_loss": 0.462890625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3259882368468701e-07, + "loss": 0.5059, + "regression_loss": 0.0, + "step": 11148, + "text_loss": 0.546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.323003253297195e-07, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 11149, + "text_loss": 0.37109375 + }, + { + "epoch": 0.93, + "learning_rate": 1.320021588293735e-07, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 11150, + "text_loss": 0.609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.317043242039795e-07, + "loss": 0.501, + "regression_loss": 0.0, + "step": 11151, + "text_loss": 0.609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3140682147383954e-07, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 11152, + "text_loss": 0.49609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.3110965065923686e-07, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 11153, + "text_loss": 0.5390625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3081281178043138e-07, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 11154, + "text_loss": 0.58984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.305163048576602e-07, + "loss": 0.5764, + "regression_loss": 0.0, + "step": 11155, + "text_loss": 0.625 + }, + { + "epoch": 0.93, + "learning_rate": 1.3022012991113609e-07, + "loss": 0.5466, + "regression_loss": 0.0, + "step": 11156, + "text_loss": 0.53515625 + }, + { + "epoch": 0.93, + "learning_rate": 1.2992428696105287e-07, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 11157, + "text_loss": 0.474609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.296287760275783e-07, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 11158, + "text_loss": 0.376953125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2933359713085847e-07, + "loss": 0.5349, + "regression_loss": 0.0, + "step": 11159, + "text_loss": 0.609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2903875029101788e-07, + "loss": 0.448, + "regression_loss": 0.0, + "step": 11160, + "text_loss": 0.3671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2874423552815817e-07, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 11161, + "text_loss": 0.451171875 + }, + { + "epoch": 0.93, + "learning_rate": 1.284500528623561e-07, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 11162, + "text_loss": 0.60546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2815620231366955e-07, + "loss": 0.3928, + "regression_loss": 0.0, + "step": 11163, + "text_loss": 0.51953125 + }, + { + "epoch": 0.93, + "learning_rate": 1.278626839021302e-07, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 11164, + "text_loss": 0.58984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2756949764774873e-07, + "loss": 0.3934, + "regression_loss": 0.0, + "step": 11165, + "text_loss": 0.38671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2727664357051418e-07, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 11166, + "text_loss": 0.46875 + }, + { + "epoch": 0.93, + "learning_rate": 1.269841216903911e-07, + "loss": 0.4249, + "regression_loss": 0.0, + "step": 11167, + "text_loss": 0.435546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.266919320273219e-07, + "loss": 0.427, + "regression_loss": 0.0, + "step": 11168, + "text_loss": 0.326171875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2640007460122727e-07, + "loss": 0.4199, + "regression_loss": 0.0, + "step": 11169, + "text_loss": 0.5625 + }, + { + "epoch": 0.93, + "learning_rate": 1.2610854943200357e-07, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 11170, + "text_loss": 0.58984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.25817356539526e-07, + "loss": 0.5791, + "regression_loss": 0.0, + "step": 11171, + "text_loss": 0.609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2552649594364696e-07, + "loss": 0.4773, + "regression_loss": 0.0, + "step": 11172, + "text_loss": 0.232421875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2523596766419565e-07, + "loss": 0.4882, + "regression_loss": 0.0, + "step": 11173, + "text_loss": 0.58984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2494577172097843e-07, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 11174, + "text_loss": 0.353515625 + }, + { + "epoch": 0.93, + "learning_rate": 1.2465590813377947e-07, + "loss": 0.467, + "regression_loss": 0.0, + "step": 11175, + "text_loss": 0.59375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2436637692235964e-07, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 11176, + "text_loss": 0.76953125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2407717810645925e-07, + "loss": 0.4758, + "regression_loss": 0.0, + "step": 11177, + "text_loss": 0.388671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2378831170579252e-07, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 11178, + "text_loss": 0.33203125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2349977774005483e-07, + "loss": 0.51, + "regression_loss": 0.0, + "step": 11179, + "text_loss": 0.38671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2321157622891545e-07, + "loss": 0.5112, + "regression_loss": 0.0, + "step": 11180, + "text_loss": 0.486328125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2292370719202252e-07, + "loss": 0.4219, + "regression_loss": 0.0, + "step": 11181, + "text_loss": 0.546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.22636170649002e-07, + "loss": 0.4363, + "regression_loss": 0.0, + "step": 11182, + "text_loss": 0.3203125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2234896661945662e-07, + "loss": 0.5444, + "regression_loss": 0.0, + "step": 11183, + "text_loss": 0.578125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2206209512296562e-07, + "loss": 0.3658, + "regression_loss": 0.0, + "step": 11184, + "text_loss": 0.255859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2177555617908844e-07, + "loss": 0.5284, + "regression_loss": 0.0, + "step": 11185, + "text_loss": 0.275390625 + }, + { + "epoch": 0.93, + "learning_rate": 1.2148934980735772e-07, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 11186, + "text_loss": 0.34375 + }, + { + "epoch": 0.93, + "learning_rate": 1.2120347602728567e-07, + "loss": 0.51, + "regression_loss": 0.0, + "step": 11187, + "text_loss": 0.71875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2091793485836333e-07, + "loss": 0.424, + "regression_loss": 0.0, + "step": 11188, + "text_loss": 0.310546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.206327263200563e-07, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 11189, + "text_loss": 0.48828125 + }, + { + "epoch": 0.93, + "learning_rate": 1.2034785043180842e-07, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 11190, + "text_loss": 0.8046875 + }, + { + "epoch": 0.93, + "learning_rate": 1.2006330721304137e-07, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 11191, + "text_loss": 0.416015625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1977909668315468e-07, + "loss": 0.488, + "regression_loss": 0.0, + "step": 11192, + "text_loss": 0.5703125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1949521886152282e-07, + "loss": 0.5281, + "regression_loss": 0.0, + "step": 11193, + "text_loss": 0.333984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1921167376749976e-07, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11194, + "text_loss": 0.40234375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1892846142041614e-07, + "loss": 0.5403, + "regression_loss": 0.0, + "step": 11195, + "text_loss": 0.453125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1864558183958041e-07, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 11196, + "text_loss": 0.5078125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1836303504427716e-07, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 11197, + "text_loss": 0.71484375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1808082105376983e-07, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 11198, + "text_loss": 0.58203125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1779893988729696e-07, + "loss": 0.5574, + "regression_loss": 0.0, + "step": 11199, + "text_loss": 0.455078125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1751739156407649e-07, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 11200, + "text_loss": 0.56640625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1723617610330306e-07, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 11201, + "text_loss": 0.453125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1695529352414914e-07, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 11202, + "text_loss": 0.5859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.166747438457616e-07, + "loss": 0.4652, + "regression_loss": 0.0, + "step": 11203, + "text_loss": 0.5 + }, + { + "epoch": 0.93, + "learning_rate": 1.1639452708726962e-07, + "loss": 0.4181, + "regression_loss": 0.0, + "step": 11204, + "text_loss": 0.4296875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1611464326777511e-07, + "loss": 0.4486, + "regression_loss": 0.0, + "step": 11205, + "text_loss": 0.380859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1583509240635948e-07, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 11206, + "text_loss": 0.322265625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1555587452208195e-07, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 11207, + "text_loss": 0.5546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1527698963397726e-07, + "loss": 0.4346, + "regression_loss": 0.0, + "step": 11208, + "text_loss": 0.322265625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1499843776105745e-07, + "loss": 0.5095, + "regression_loss": 0.0, + "step": 11209, + "text_loss": 0.390625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1472021892231511e-07, + "loss": 0.5549, + "regression_loss": 0.0, + "step": 11210, + "text_loss": 0.458984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1444233313671615e-07, + "loss": 0.4834, + "regression_loss": 0.0, + "step": 11211, + "text_loss": 0.451171875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1416478042320544e-07, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 11212, + "text_loss": 0.50390625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1388756080070506e-07, + "loss": 0.5371, + "regression_loss": 0.0, + "step": 11213, + "text_loss": 0.53515625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1361067428811546e-07, + "loss": 0.5, + "regression_loss": 0.0, + "step": 11214, + "text_loss": 0.734375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1333412090431207e-07, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 11215, + "text_loss": 0.26171875 + }, + { + "epoch": 0.93, + "learning_rate": 1.130579006681498e-07, + "loss": 0.425, + "regression_loss": 0.0, + "step": 11216, + "text_loss": 0.4609375 + }, + { + "epoch": 0.93, + "learning_rate": 1.127820135984592e-07, + "loss": 0.4276, + "regression_loss": 0.0, + "step": 11217, + "text_loss": 0.546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1250645971404905e-07, + "loss": 0.4119, + "regression_loss": 0.0, + "step": 11218, + "text_loss": 0.3828125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1223123903370548e-07, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 11219, + "text_loss": 0.67578125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1195635157619178e-07, + "loss": 0.5004, + "regression_loss": 0.0, + "step": 11220, + "text_loss": 0.5625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1168179736024687e-07, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 11221, + "text_loss": 0.6640625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1140757640459021e-07, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 11222, + "text_loss": 0.51953125 + }, + { + "epoch": 0.93, + "learning_rate": 1.111336887279163e-07, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 11223, + "text_loss": 0.294921875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1086013434889687e-07, + "loss": 0.439, + "regression_loss": 0.0, + "step": 11224, + "text_loss": 0.39453125 + }, + { + "epoch": 0.93, + "learning_rate": 1.1058691328618199e-07, + "loss": 0.467, + "regression_loss": 0.0, + "step": 11225, + "text_loss": 0.6015625 + }, + { + "epoch": 0.93, + "learning_rate": 1.1031402555839843e-07, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 11226, + "text_loss": 0.54296875 + }, + { + "epoch": 0.93, + "learning_rate": 1.1004147118414965e-07, + "loss": 0.4036, + "regression_loss": 0.0, + "step": 11227, + "text_loss": 0.365234375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0976925018201746e-07, + "loss": 0.5317, + "regression_loss": 0.0, + "step": 11228, + "text_loss": 0.6328125 + }, + { + "epoch": 0.93, + "learning_rate": 1.0949736257056087e-07, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 11229, + "text_loss": 0.546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0922580836831509e-07, + "loss": 0.4221, + "regression_loss": 0.0, + "step": 11230, + "text_loss": 0.283203125 + }, + { + "epoch": 0.93, + "learning_rate": 1.0895458759379307e-07, + "loss": 0.4678, + "regression_loss": 0.0, + "step": 11231, + "text_loss": 0.41015625 + }, + { + "epoch": 0.93, + "learning_rate": 1.086837002654867e-07, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 11232, + "text_loss": 0.384765625 + }, + { + "epoch": 0.93, + "learning_rate": 1.084131464018623e-07, + "loss": 0.4053, + "regression_loss": 0.0, + "step": 11233, + "text_loss": 0.466796875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0814292602136455e-07, + "loss": 0.5588, + "regression_loss": 0.0, + "step": 11234, + "text_loss": 0.55859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0787303914241709e-07, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 11235, + "text_loss": 0.3984375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0760348578341851e-07, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 11236, + "text_loss": 0.5859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0733426596274577e-07, + "loss": 0.506, + "regression_loss": 0.0, + "step": 11237, + "text_loss": 0.2470703125 + }, + { + "epoch": 0.93, + "learning_rate": 1.0706537969875252e-07, + "loss": 0.4985, + "regression_loss": 0.0, + "step": 11238, + "text_loss": 0.380859375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0679682700977134e-07, + "loss": 0.437, + "regression_loss": 0.0, + "step": 11239, + "text_loss": 0.66015625 + }, + { + "epoch": 0.93, + "learning_rate": 1.0652860791410868e-07, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 11240, + "text_loss": 0.63671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0626072243005214e-07, + "loss": 0.4436, + "regression_loss": 0.0, + "step": 11241, + "text_loss": 0.6875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0599317057586378e-07, + "loss": 0.5261, + "regression_loss": 0.0, + "step": 11242, + "text_loss": 0.365234375 + }, + { + "epoch": 0.93, + "learning_rate": 1.05725952369784e-07, + "loss": 0.4562, + "regression_loss": 0.0, + "step": 11243, + "text_loss": 0.54296875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0545906783003102e-07, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 11244, + "text_loss": 0.302734375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0519251697479916e-07, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 11245, + "text_loss": 0.65234375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0492629982226e-07, + "loss": 0.5474, + "regression_loss": 0.0, + "step": 11246, + "text_loss": 0.60546875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0466041639056402e-07, + "loss": 0.3896, + "regression_loss": 0.0, + "step": 11247, + "text_loss": 0.63671875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0439486669783672e-07, + "loss": 0.4181, + "regression_loss": 0.0, + "step": 11248, + "text_loss": 0.46484375 + }, + { + "epoch": 0.93, + "learning_rate": 1.0412965076218251e-07, + "loss": 0.5454, + "regression_loss": 0.0, + "step": 11249, + "text_loss": 0.60546875 + }, + { + "epoch": 0.94, + "learning_rate": 1.0386476860168193e-07, + "loss": 0.4125, + "regression_loss": 0.0, + "step": 11250, + "text_loss": 0.36328125 + }, + { + "epoch": 0.94, + "learning_rate": 1.0360022023439442e-07, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 11251, + "text_loss": 0.275390625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0333600567835389e-07, + "loss": 0.364, + "regression_loss": 0.0, + "step": 11252, + "text_loss": 0.248046875 + }, + { + "epoch": 0.94, + "learning_rate": 1.0307212495157425e-07, + "loss": 0.4233, + "regression_loss": 0.0, + "step": 11253, + "text_loss": 0.6015625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0280857807204503e-07, + "loss": 0.438, + "regression_loss": 0.0, + "step": 11254, + "text_loss": 0.515625 + }, + { + "epoch": 0.94, + "learning_rate": 1.025453650577335e-07, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 11255, + "text_loss": 0.50390625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0228248592658418e-07, + "loss": 0.5601, + "regression_loss": 0.0, + "step": 11256, + "text_loss": 0.6171875 + }, + { + "epoch": 0.94, + "learning_rate": 1.0201994069651999e-07, + "loss": 0.4062, + "regression_loss": 0.0, + "step": 11257, + "text_loss": 0.408203125 + }, + { + "epoch": 0.94, + "learning_rate": 1.0175772938543827e-07, + "loss": 0.4761, + "regression_loss": 0.0, + "step": 11258, + "text_loss": 0.5390625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0149585201121526e-07, + "loss": 0.4978, + "regression_loss": 0.0, + "step": 11259, + "text_loss": 0.59375 + }, + { + "epoch": 0.94, + "learning_rate": 1.0123430859170558e-07, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 11260, + "text_loss": 0.482421875 + }, + { + "epoch": 0.94, + "learning_rate": 1.009730991447394e-07, + "loss": 0.5006, + "regression_loss": 0.0, + "step": 11261, + "text_loss": 0.5078125 + }, + { + "epoch": 0.94, + "learning_rate": 1.0071222368812416e-07, + "loss": 0.5571, + "regression_loss": 0.0, + "step": 11262, + "text_loss": 0.66015625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0045168223964619e-07, + "loss": 0.4077, + "regression_loss": 0.0, + "step": 11263, + "text_loss": 0.431640625 + }, + { + "epoch": 0.94, + "learning_rate": 1.0019147481706626e-07, + "loss": 0.4482, + "regression_loss": 0.0, + "step": 11264, + "text_loss": 0.48828125 + }, + { + "epoch": 0.94, + "learning_rate": 9.993160143812464e-08, + "loss": 0.4388, + "regression_loss": 0.0, + "step": 11265, + "text_loss": 0.453125 + }, + { + "epoch": 0.94, + "learning_rate": 9.967206212053882e-08, + "loss": 0.447, + "regression_loss": 0.0, + "step": 11266, + "text_loss": 0.228515625 + }, + { + "epoch": 0.94, + "learning_rate": 9.941285688200242e-08, + "loss": 0.4479, + "regression_loss": 0.0, + "step": 11267, + "text_loss": 0.251953125 + }, + { + "epoch": 0.94, + "learning_rate": 9.915398574018575e-08, + "loss": 0.4559, + "regression_loss": 0.0, + "step": 11268, + "text_loss": 0.5625 + }, + { + "epoch": 0.94, + "learning_rate": 9.889544871273915e-08, + "loss": 0.4014, + "regression_loss": 0.0, + "step": 11269, + "text_loss": 0.384765625 + }, + { + "epoch": 0.94, + "learning_rate": 9.863724581728684e-08, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 11270, + "text_loss": 0.62890625 + }, + { + "epoch": 0.94, + "learning_rate": 9.837937707143197e-08, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 11271, + "text_loss": 0.3046875 + }, + { + "epoch": 0.94, + "learning_rate": 9.812184249275547e-08, + "loss": 0.5344, + "regression_loss": 0.0, + "step": 11272, + "text_loss": 0.703125 + }, + { + "epoch": 0.94, + "learning_rate": 9.786464209881441e-08, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 11273, + "text_loss": 0.55078125 + }, + { + "epoch": 0.94, + "learning_rate": 9.760777590714255e-08, + "loss": 0.4595, + "regression_loss": 0.0, + "step": 11274, + "text_loss": 0.56640625 + }, + { + "epoch": 0.94, + "learning_rate": 9.735124393525309e-08, + "loss": 0.5833, + "regression_loss": 0.0, + "step": 11275, + "text_loss": 0.267578125 + }, + { + "epoch": 0.94, + "learning_rate": 9.709504620063426e-08, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 11276, + "text_loss": 0.404296875 + }, + { + "epoch": 0.94, + "learning_rate": 9.68391827207521e-08, + "loss": 0.4008, + "regression_loss": 0.0, + "step": 11277, + "text_loss": 0.2294921875 + }, + { + "epoch": 0.94, + "learning_rate": 9.658365351304988e-08, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 11278, + "text_loss": 0.60546875 + }, + { + "epoch": 0.94, + "learning_rate": 9.632845859494977e-08, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 11279, + "text_loss": 0.63671875 + }, + { + "epoch": 0.94, + "learning_rate": 9.607359798384785e-08, + "loss": 0.4792, + "regression_loss": 0.0, + "step": 11280, + "text_loss": 0.55078125 + }, + { + "epoch": 0.94, + "learning_rate": 9.581907169711968e-08, + "loss": 0.4677, + "regression_loss": 0.0, + "step": 11281, + "text_loss": 0.5703125 + }, + { + "epoch": 0.94, + "learning_rate": 9.556487975211858e-08, + "loss": 0.4614, + "regression_loss": 0.0, + "step": 11282, + "text_loss": 0.52734375 + }, + { + "epoch": 0.94, + "learning_rate": 9.531102216617293e-08, + "loss": 0.4246, + "regression_loss": 0.0, + "step": 11283, + "text_loss": 0.59375 + }, + { + "epoch": 0.94, + "learning_rate": 9.505749895659e-08, + "loss": 0.562, + "regression_loss": 0.0, + "step": 11284, + "text_loss": 0.55078125 + }, + { + "epoch": 0.94, + "learning_rate": 9.480431014065316e-08, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 11285, + "text_loss": 0.80859375 + }, + { + "epoch": 0.94, + "learning_rate": 9.455145573562419e-08, + "loss": 0.4744, + "regression_loss": 0.0, + "step": 11286, + "text_loss": 0.33984375 + }, + { + "epoch": 0.94, + "learning_rate": 9.429893575874038e-08, + "loss": 0.4858, + "regression_loss": 0.0, + "step": 11287, + "text_loss": 0.41015625 + }, + { + "epoch": 0.94, + "learning_rate": 9.404675022721855e-08, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 11288, + "text_loss": 0.42578125 + }, + { + "epoch": 0.94, + "learning_rate": 9.379489915825047e-08, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 11289, + "text_loss": 0.416015625 + }, + { + "epoch": 0.94, + "learning_rate": 9.354338256900631e-08, + "loss": 0.5149, + "regression_loss": 0.0, + "step": 11290, + "text_loss": 0.4296875 + }, + { + "epoch": 0.94, + "learning_rate": 9.32922004766329e-08, + "loss": 0.5295, + "regression_loss": 0.0, + "step": 11291, + "text_loss": 0.55078125 + }, + { + "epoch": 0.94, + "learning_rate": 9.304135289825544e-08, + "loss": 0.4803, + "regression_loss": 0.0, + "step": 11292, + "text_loss": 0.66015625 + }, + { + "epoch": 0.94, + "learning_rate": 9.279083985097415e-08, + "loss": 0.46, + "regression_loss": 0.0, + "step": 11293, + "text_loss": 0.5078125 + }, + { + "epoch": 0.94, + "learning_rate": 9.254066135186923e-08, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 11294, + "text_loss": 0.62109375 + }, + { + "epoch": 0.94, + "learning_rate": 9.229081741799484e-08, + "loss": 0.5022, + "regression_loss": 0.0, + "step": 11295, + "text_loss": 0.6484375 + }, + { + "epoch": 0.94, + "learning_rate": 9.204130806638511e-08, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 11296, + "text_loss": 0.69140625 + }, + { + "epoch": 0.94, + "learning_rate": 9.179213331405034e-08, + "loss": 0.4954, + "regression_loss": 0.0, + "step": 11297, + "text_loss": 0.64453125 + }, + { + "epoch": 0.94, + "learning_rate": 9.154329317797805e-08, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 11298, + "text_loss": 0.56640625 + }, + { + "epoch": 0.94, + "learning_rate": 9.129478767513134e-08, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 11299, + "text_loss": 0.515625 + }, + { + "epoch": 0.94, + "learning_rate": 9.104661682245441e-08, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 11300, + "text_loss": 0.53125 + }, + { + "epoch": 0.94, + "learning_rate": 9.079878063686487e-08, + "loss": 0.5591, + "regression_loss": 0.0, + "step": 11301, + "text_loss": 0.57421875 + }, + { + "epoch": 0.94, + "learning_rate": 9.055127913525864e-08, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 11302, + "text_loss": 0.6953125 + }, + { + "epoch": 0.94, + "learning_rate": 9.030411233450997e-08, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 11303, + "text_loss": 0.53125 + }, + { + "epoch": 0.94, + "learning_rate": 9.005728025146932e-08, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 11304, + "text_loss": 0.5859375 + }, + { + "epoch": 0.94, + "learning_rate": 8.981078290296375e-08, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 11305, + "text_loss": 0.4140625 + }, + { + "epoch": 0.94, + "learning_rate": 8.956462030579871e-08, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 11306, + "text_loss": 0.7109375 + }, + { + "epoch": 0.94, + "learning_rate": 8.931879247675578e-08, + "loss": 0.6162, + "regression_loss": 0.0, + "step": 11307, + "text_loss": 0.71875 + }, + { + "epoch": 0.94, + "learning_rate": 8.907329943259491e-08, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 11308, + "text_loss": 0.49609375 + }, + { + "epoch": 0.94, + "learning_rate": 8.882814119005212e-08, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 11309, + "text_loss": 0.28125 + }, + { + "epoch": 0.94, + "learning_rate": 8.858331776584184e-08, + "loss": 0.4343, + "regression_loss": 0.0, + "step": 11310, + "text_loss": 0.466796875 + }, + { + "epoch": 0.94, + "learning_rate": 8.833882917665348e-08, + "loss": 0.5598, + "regression_loss": 0.0, + "step": 11311, + "text_loss": 0.384765625 + }, + { + "epoch": 0.94, + "learning_rate": 8.809467543915595e-08, + "loss": 0.5256, + "regression_loss": 0.0, + "step": 11312, + "text_loss": 0.462890625 + }, + { + "epoch": 0.94, + "learning_rate": 8.78508565699937e-08, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 11313, + "text_loss": 0.58203125 + }, + { + "epoch": 0.94, + "learning_rate": 8.760737258579066e-08, + "loss": 0.4888, + "regression_loss": 0.0, + "step": 11314, + "text_loss": 0.263671875 + }, + { + "epoch": 0.94, + "learning_rate": 8.736422350314356e-08, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 11315, + "text_loss": 0.490234375 + }, + { + "epoch": 0.94, + "learning_rate": 8.712140933863189e-08, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 11316, + "text_loss": 0.443359375 + }, + { + "epoch": 0.94, + "learning_rate": 8.687893010880799e-08, + "loss": 0.5221, + "regression_loss": 0.0, + "step": 11317, + "text_loss": 0.55859375 + }, + { + "epoch": 0.94, + "learning_rate": 8.663678583020252e-08, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 11318, + "text_loss": 0.6328125 + }, + { + "epoch": 0.94, + "learning_rate": 8.63949765193245e-08, + "loss": 0.5273, + "regression_loss": 0.0, + "step": 11319, + "text_loss": 0.8046875 + }, + { + "epoch": 0.94, + "learning_rate": 8.615350219265961e-08, + "loss": 0.3896, + "regression_loss": 0.0, + "step": 11320, + "text_loss": 0.349609375 + }, + { + "epoch": 0.94, + "learning_rate": 8.59123628666686e-08, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 11321, + "text_loss": 0.578125 + }, + { + "epoch": 0.94, + "learning_rate": 8.567155855779274e-08, + "loss": 0.5178, + "regression_loss": 0.0, + "step": 11322, + "text_loss": 0.3984375 + }, + { + "epoch": 0.94, + "learning_rate": 8.54310892824478e-08, + "loss": 0.5105, + "regression_loss": 0.0, + "step": 11323, + "text_loss": 0.23828125 + }, + { + "epoch": 0.94, + "learning_rate": 8.519095505702846e-08, + "loss": 0.5161, + "regression_loss": 0.0, + "step": 11324, + "text_loss": 0.6953125 + }, + { + "epoch": 0.94, + "learning_rate": 8.495115589790492e-08, + "loss": 0.4043, + "regression_loss": 0.0, + "step": 11325, + "text_loss": 0.55078125 + }, + { + "epoch": 0.94, + "learning_rate": 8.47116918214269e-08, + "loss": 0.4938, + "regression_loss": 0.0, + "step": 11326, + "text_loss": 0.341796875 + }, + { + "epoch": 0.94, + "learning_rate": 8.447256284391858e-08, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 11327, + "text_loss": 0.4609375 + }, + { + "epoch": 0.94, + "learning_rate": 8.423376898168246e-08, + "loss": 0.4546, + "regression_loss": 0.0, + "step": 11328, + "text_loss": 0.287109375 + }, + { + "epoch": 0.94, + "learning_rate": 8.399531025099994e-08, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 11329, + "text_loss": 0.345703125 + }, + { + "epoch": 0.94, + "learning_rate": 8.375718666812582e-08, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 11330, + "text_loss": 0.65234375 + }, + { + "epoch": 0.94, + "learning_rate": 8.35193982492949e-08, + "loss": 0.6062, + "regression_loss": 0.0, + "step": 11331, + "text_loss": 0.578125 + }, + { + "epoch": 0.94, + "learning_rate": 8.328194501071862e-08, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 11332, + "text_loss": 0.62890625 + }, + { + "epoch": 0.94, + "learning_rate": 8.304482696858518e-08, + "loss": 0.473, + "regression_loss": 0.0, + "step": 11333, + "text_loss": 0.66796875 + }, + { + "epoch": 0.94, + "learning_rate": 8.280804413906052e-08, + "loss": 0.499, + "regression_loss": 0.0, + "step": 11334, + "text_loss": 0.66015625 + }, + { + "epoch": 0.94, + "learning_rate": 8.257159653828672e-08, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 11335, + "text_loss": 0.56640625 + }, + { + "epoch": 0.94, + "learning_rate": 8.233548418238313e-08, + "loss": 0.3909, + "regression_loss": 0.0, + "step": 11336, + "text_loss": 0.275390625 + }, + { + "epoch": 0.94, + "learning_rate": 8.209970708744797e-08, + "loss": 0.4209, + "regression_loss": 0.0, + "step": 11337, + "text_loss": 0.51171875 + }, + { + "epoch": 0.94, + "learning_rate": 8.186426526955393e-08, + "loss": 0.511, + "regression_loss": 0.0, + "step": 11338, + "text_loss": 0.5078125 + }, + { + "epoch": 0.94, + "learning_rate": 8.162915874475319e-08, + "loss": 0.5437, + "regression_loss": 0.0, + "step": 11339, + "text_loss": 0.435546875 + }, + { + "epoch": 0.94, + "learning_rate": 8.139438752907292e-08, + "loss": 0.4457, + "regression_loss": 0.0, + "step": 11340, + "text_loss": 0.71484375 + }, + { + "epoch": 0.94, + "learning_rate": 8.11599516385203e-08, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 11341, + "text_loss": 0.30859375 + }, + { + "epoch": 0.94, + "learning_rate": 8.092585108907702e-08, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 11342, + "text_loss": 0.54296875 + }, + { + "epoch": 0.94, + "learning_rate": 8.069208589670252e-08, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 11343, + "text_loss": 0.388671875 + }, + { + "epoch": 0.94, + "learning_rate": 8.045865607733406e-08, + "loss": 0.512, + "regression_loss": 0.0, + "step": 11344, + "text_loss": 0.65625 + }, + { + "epoch": 0.94, + "learning_rate": 8.02255616468861e-08, + "loss": 0.5237, + "regression_loss": 0.0, + "step": 11345, + "text_loss": 0.64453125 + }, + { + "epoch": 0.94, + "learning_rate": 7.999280262124876e-08, + "loss": 0.4535, + "regression_loss": 0.0, + "step": 11346, + "text_loss": 0.60546875 + }, + { + "epoch": 0.94, + "learning_rate": 7.976037901629152e-08, + "loss": 0.5127, + "regression_loss": 0.0, + "step": 11347, + "text_loss": 0.416015625 + }, + { + "epoch": 0.94, + "learning_rate": 7.952829084785951e-08, + "loss": 0.4419, + "regression_loss": 0.0, + "step": 11348, + "text_loss": 0.46875 + }, + { + "epoch": 0.94, + "learning_rate": 7.929653813177452e-08, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 11349, + "text_loss": 0.5390625 + }, + { + "epoch": 0.94, + "learning_rate": 7.906512088383722e-08, + "loss": 0.507, + "regression_loss": 0.0, + "step": 11350, + "text_loss": 0.482421875 + }, + { + "epoch": 0.94, + "learning_rate": 7.88340391198239e-08, + "loss": 0.4532, + "regression_loss": 0.0, + "step": 11351, + "text_loss": 0.2099609375 + }, + { + "epoch": 0.94, + "learning_rate": 7.860329285548806e-08, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 11352, + "text_loss": 0.326171875 + }, + { + "epoch": 0.94, + "learning_rate": 7.837288210656269e-08, + "loss": 0.519, + "regression_loss": 0.0, + "step": 11353, + "text_loss": 0.671875 + }, + { + "epoch": 0.94, + "learning_rate": 7.814280688875408e-08, + "loss": 0.509, + "regression_loss": 0.0, + "step": 11354, + "text_loss": 0.62890625 + }, + { + "epoch": 0.94, + "learning_rate": 7.791306721774805e-08, + "loss": 0.4292, + "regression_loss": 0.0, + "step": 11355, + "text_loss": 0.5234375 + }, + { + "epoch": 0.94, + "learning_rate": 7.768366310920705e-08, + "loss": 0.4331, + "regression_loss": 0.0, + "step": 11356, + "text_loss": 0.41796875 + }, + { + "epoch": 0.94, + "learning_rate": 7.745459457877192e-08, + "loss": 0.6548, + "regression_loss": 0.0, + "step": 11357, + "text_loss": 0.431640625 + }, + { + "epoch": 0.94, + "learning_rate": 7.722586164205682e-08, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 11358, + "text_loss": 0.640625 + }, + { + "epoch": 0.94, + "learning_rate": 7.699746431465816e-08, + "loss": 0.51, + "regression_loss": 0.0, + "step": 11359, + "text_loss": 0.470703125 + }, + { + "epoch": 0.94, + "learning_rate": 7.676940261214516e-08, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 11360, + "text_loss": 0.470703125 + }, + { + "epoch": 0.94, + "learning_rate": 7.65416765500665e-08, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 11361, + "text_loss": 0.19921875 + }, + { + "epoch": 0.94, + "learning_rate": 7.631428614394754e-08, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 11362, + "text_loss": 0.419921875 + }, + { + "epoch": 0.94, + "learning_rate": 7.608723140929086e-08, + "loss": 0.5723, + "regression_loss": 0.0, + "step": 11363, + "text_loss": 0.6796875 + }, + { + "epoch": 0.94, + "learning_rate": 7.586051236157521e-08, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 11364, + "text_loss": 0.71484375 + }, + { + "epoch": 0.94, + "learning_rate": 7.563412901625711e-08, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 11365, + "text_loss": 0.375 + }, + { + "epoch": 0.94, + "learning_rate": 7.540808138877032e-08, + "loss": 0.5698, + "regression_loss": 0.0, + "step": 11366, + "text_loss": 0.73046875 + }, + { + "epoch": 0.94, + "learning_rate": 7.518236949452585e-08, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 11367, + "text_loss": 0.470703125 + }, + { + "epoch": 0.94, + "learning_rate": 7.495699334891138e-08, + "loss": 0.3889, + "regression_loss": 0.0, + "step": 11368, + "text_loss": 0.21484375 + }, + { + "epoch": 0.94, + "learning_rate": 7.47319529672924e-08, + "loss": 0.521, + "regression_loss": 0.0, + "step": 11369, + "text_loss": 0.625 + }, + { + "epoch": 0.94, + "learning_rate": 7.450724836501055e-08, + "loss": 0.4208, + "regression_loss": 0.0, + "step": 11370, + "text_loss": 0.462890625 + }, + { + "epoch": 0.95, + "learning_rate": 7.428287955738467e-08, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 11371, + "text_loss": 0.353515625 + }, + { + "epoch": 0.95, + "learning_rate": 7.40588465597114e-08, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 11372, + "text_loss": 0.259765625 + }, + { + "epoch": 0.95, + "learning_rate": 7.383514938726466e-08, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 11373, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 7.36117880552939e-08, + "loss": 0.5737, + "regression_loss": 0.0, + "step": 11374, + "text_loss": 0.62890625 + }, + { + "epoch": 0.95, + "learning_rate": 7.33887625790275e-08, + "loss": 0.5017, + "regression_loss": 0.0, + "step": 11375, + "text_loss": 0.376953125 + }, + { + "epoch": 0.95, + "learning_rate": 7.31660729736705e-08, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 11376, + "text_loss": 0.298828125 + }, + { + "epoch": 0.95, + "learning_rate": 7.294371925440302e-08, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 11377, + "text_loss": 0.4765625 + }, + { + "epoch": 0.95, + "learning_rate": 7.272170143638679e-08, + "loss": 0.509, + "regression_loss": 0.0, + "step": 11378, + "text_loss": 0.384765625 + }, + { + "epoch": 0.95, + "learning_rate": 7.250001953475527e-08, + "loss": 0.4695, + "regression_loss": 0.0, + "step": 11379, + "text_loss": 0.83203125 + }, + { + "epoch": 0.95, + "learning_rate": 7.227867356462303e-08, + "loss": 0.6023, + "regression_loss": 0.0, + "step": 11380, + "text_loss": 0.4296875 + }, + { + "epoch": 0.95, + "learning_rate": 7.205766354107967e-08, + "loss": 0.446, + "regression_loss": 0.0, + "step": 11381, + "text_loss": 0.6875 + }, + { + "epoch": 0.95, + "learning_rate": 7.183698947919315e-08, + "loss": 0.4344, + "regression_loss": 0.0, + "step": 11382, + "text_loss": 0.46484375 + }, + { + "epoch": 0.95, + "learning_rate": 7.161665139400642e-08, + "loss": 0.4111, + "regression_loss": 0.0, + "step": 11383, + "text_loss": 0.55078125 + }, + { + "epoch": 0.95, + "learning_rate": 7.139664930054302e-08, + "loss": 0.4971, + "regression_loss": 0.0, + "step": 11384, + "text_loss": 0.41796875 + }, + { + "epoch": 0.95, + "learning_rate": 7.117698321379984e-08, + "loss": 0.405, + "regression_loss": 0.0, + "step": 11385, + "text_loss": 0.3515625 + }, + { + "epoch": 0.95, + "learning_rate": 7.095765314875324e-08, + "loss": 0.439, + "regression_loss": 0.0, + "step": 11386, + "text_loss": 0.494140625 + }, + { + "epoch": 0.95, + "learning_rate": 7.073865912035626e-08, + "loss": 0.5283, + "regression_loss": 0.0, + "step": 11387, + "text_loss": 0.224609375 + }, + { + "epoch": 0.95, + "learning_rate": 7.052000114353919e-08, + "loss": 0.5298, + "regression_loss": 0.0, + "step": 11388, + "text_loss": 0.62109375 + }, + { + "epoch": 0.95, + "learning_rate": 7.030167923320785e-08, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 11389, + "text_loss": 0.5703125 + }, + { + "epoch": 0.95, + "learning_rate": 7.008369340424704e-08, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 11390, + "text_loss": 0.412109375 + }, + { + "epoch": 0.95, + "learning_rate": 6.986604367151706e-08, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 11391, + "text_loss": 0.50390625 + }, + { + "epoch": 0.95, + "learning_rate": 6.964873004985717e-08, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 11392, + "text_loss": 0.703125 + }, + { + "epoch": 0.95, + "learning_rate": 6.943175255408219e-08, + "loss": 0.4801, + "regression_loss": 0.0, + "step": 11393, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 6.921511119898528e-08, + "loss": 0.4172, + "regression_loss": 0.0, + "step": 11394, + "text_loss": 0.62890625 + }, + { + "epoch": 0.95, + "learning_rate": 6.899880599933462e-08, + "loss": 0.3467, + "regression_loss": 0.0, + "step": 11395, + "text_loss": 0.390625 + }, + { + "epoch": 0.95, + "learning_rate": 6.878283696987786e-08, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 11396, + "text_loss": 0.4296875 + }, + { + "epoch": 0.95, + "learning_rate": 6.856720412533768e-08, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 11397, + "text_loss": 0.38671875 + }, + { + "epoch": 0.95, + "learning_rate": 6.83519074804162e-08, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 11398, + "text_loss": 0.341796875 + }, + { + "epoch": 0.95, + "learning_rate": 6.813694704978946e-08, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 11399, + "text_loss": 0.69921875 + }, + { + "epoch": 0.95, + "learning_rate": 6.792232284811406e-08, + "loss": 0.4087, + "regression_loss": 0.0, + "step": 11400, + "text_loss": 0.310546875 + }, + { + "epoch": 0.95, + "learning_rate": 6.770803489002109e-08, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 11401, + "text_loss": 0.65625 + }, + { + "epoch": 0.95, + "learning_rate": 6.749408319011941e-08, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 11402, + "text_loss": 0.3515625 + }, + { + "epoch": 0.95, + "learning_rate": 6.728046776299568e-08, + "loss": 0.4681, + "regression_loss": 0.0, + "step": 11403, + "text_loss": 0.478515625 + }, + { + "epoch": 0.95, + "learning_rate": 6.706718862321326e-08, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 11404, + "text_loss": 0.4453125 + }, + { + "epoch": 0.95, + "learning_rate": 6.685424578531107e-08, + "loss": 0.4521, + "regression_loss": 0.0, + "step": 11405, + "text_loss": 0.37109375 + }, + { + "epoch": 0.95, + "learning_rate": 6.664163926380806e-08, + "loss": 0.5103, + "regression_loss": 0.0, + "step": 11406, + "text_loss": 0.6015625 + }, + { + "epoch": 0.95, + "learning_rate": 6.642936907319763e-08, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 11407, + "text_loss": 0.74609375 + }, + { + "epoch": 0.95, + "learning_rate": 6.62174352279521e-08, + "loss": 0.5312, + "regression_loss": 0.0, + "step": 11408, + "text_loss": 0.6015625 + }, + { + "epoch": 0.95, + "learning_rate": 6.600583774251879e-08, + "loss": 0.553, + "regression_loss": 0.0, + "step": 11409, + "text_loss": 0.71875 + }, + { + "epoch": 0.95, + "learning_rate": 6.579457663132505e-08, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 11410, + "text_loss": 0.671875 + }, + { + "epoch": 0.95, + "learning_rate": 6.558365190877158e-08, + "loss": 0.3829, + "regression_loss": 0.0, + "step": 11411, + "text_loss": 0.51171875 + }, + { + "epoch": 0.95, + "learning_rate": 6.537306358924023e-08, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 11412, + "text_loss": 0.53125 + }, + { + "epoch": 0.95, + "learning_rate": 6.516281168708616e-08, + "loss": 0.5652, + "regression_loss": 0.0, + "step": 11413, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 6.495289621664402e-08, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 11414, + "text_loss": 0.416015625 + }, + { + "epoch": 0.95, + "learning_rate": 6.47433171922246e-08, + "loss": 0.5072, + "regression_loss": 0.0, + "step": 11415, + "text_loss": 0.2392578125 + }, + { + "epoch": 0.95, + "learning_rate": 6.45340746281159e-08, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 11416, + "text_loss": 0.404296875 + }, + { + "epoch": 0.95, + "learning_rate": 6.432516853858318e-08, + "loss": 0.4778, + "regression_loss": 0.0, + "step": 11417, + "text_loss": 0.3671875 + }, + { + "epoch": 0.95, + "learning_rate": 6.411659893786837e-08, + "loss": 0.6094, + "regression_loss": 0.0, + "step": 11418, + "text_loss": 0.5859375 + }, + { + "epoch": 0.95, + "learning_rate": 6.390836584019012e-08, + "loss": 0.5427, + "regression_loss": 0.0, + "step": 11419, + "text_loss": 0.5078125 + }, + { + "epoch": 0.95, + "learning_rate": 6.370046925974594e-08, + "loss": 0.4797, + "regression_loss": 0.0, + "step": 11420, + "text_loss": 0.357421875 + }, + { + "epoch": 0.95, + "learning_rate": 6.349290921070784e-08, + "loss": 0.428, + "regression_loss": 0.0, + "step": 11421, + "text_loss": 0.365234375 + }, + { + "epoch": 0.95, + "learning_rate": 6.328568570722726e-08, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 11422, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 6.307879876343181e-08, + "loss": 0.519, + "regression_loss": 0.0, + "step": 11423, + "text_loss": 0.326171875 + }, + { + "epoch": 0.95, + "learning_rate": 6.28722483934241e-08, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 11424, + "text_loss": 0.56640625 + }, + { + "epoch": 0.95, + "learning_rate": 6.266603461128784e-08, + "loss": 0.6104, + "regression_loss": 0.0, + "step": 11425, + "text_loss": 0.4140625 + }, + { + "epoch": 0.95, + "learning_rate": 6.24601574310807e-08, + "loss": 0.4194, + "regression_loss": 0.0, + "step": 11426, + "text_loss": 0.56640625 + }, + { + "epoch": 0.95, + "learning_rate": 6.22546168668381e-08, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 11427, + "text_loss": 0.61328125 + }, + { + "epoch": 0.95, + "learning_rate": 6.204941293257271e-08, + "loss": 0.4199, + "regression_loss": 0.0, + "step": 11428, + "text_loss": 0.4921875 + }, + { + "epoch": 0.95, + "learning_rate": 6.184454564227504e-08, + "loss": 0.3982, + "regression_loss": 0.0, + "step": 11429, + "text_loss": 0.384765625 + }, + { + "epoch": 0.95, + "learning_rate": 6.164001500991112e-08, + "loss": 0.448, + "regression_loss": 0.0, + "step": 11430, + "text_loss": 0.412109375 + }, + { + "epoch": 0.95, + "learning_rate": 6.143582104942536e-08, + "loss": 0.6445, + "regression_loss": 0.0, + "step": 11431, + "text_loss": 0.90234375 + }, + { + "epoch": 0.95, + "learning_rate": 6.123196377473772e-08, + "loss": 0.5029, + "regression_loss": 0.0, + "step": 11432, + "text_loss": 0.64453125 + }, + { + "epoch": 0.95, + "learning_rate": 6.10284431997471e-08, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 11433, + "text_loss": 0.337890625 + }, + { + "epoch": 0.95, + "learning_rate": 6.082525933832851e-08, + "loss": 0.583, + "regression_loss": 0.0, + "step": 11434, + "text_loss": 0.326171875 + }, + { + "epoch": 0.95, + "learning_rate": 6.062241220433362e-08, + "loss": 0.4165, + "regression_loss": 0.0, + "step": 11435, + "text_loss": 0.546875 + }, + { + "epoch": 0.95, + "learning_rate": 6.04199018115903e-08, + "loss": 0.5066, + "regression_loss": 0.0, + "step": 11436, + "text_loss": 0.53125 + }, + { + "epoch": 0.95, + "learning_rate": 6.021772817390692e-08, + "loss": 0.4136, + "regression_loss": 0.0, + "step": 11437, + "text_loss": 0.3515625 + }, + { + "epoch": 0.95, + "learning_rate": 6.00158913050658e-08, + "loss": 0.4083, + "regression_loss": 0.0, + "step": 11438, + "text_loss": 0.60546875 + }, + { + "epoch": 0.95, + "learning_rate": 5.981439121882593e-08, + "loss": 0.4683, + "regression_loss": 0.0, + "step": 11439, + "text_loss": 0.34765625 + }, + { + "epoch": 0.95, + "learning_rate": 5.961322792892632e-08, + "loss": 0.498, + "regression_loss": 0.0, + "step": 11440, + "text_loss": 0.58203125 + }, + { + "epoch": 0.95, + "learning_rate": 5.9412401449080446e-08, + "loss": 0.396, + "regression_loss": 0.0, + "step": 11441, + "text_loss": 0.263671875 + }, + { + "epoch": 0.95, + "learning_rate": 5.9211911792979026e-08, + "loss": 0.502, + "regression_loss": 0.0, + "step": 11442, + "text_loss": 0.49609375 + }, + { + "epoch": 0.95, + "learning_rate": 5.901175897429112e-08, + "loss": 0.46, + "regression_loss": 0.0, + "step": 11443, + "text_loss": 0.58203125 + }, + { + "epoch": 0.95, + "learning_rate": 5.881194300666193e-08, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 11444, + "text_loss": 0.640625 + }, + { + "epoch": 0.95, + "learning_rate": 5.861246390371445e-08, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 11445, + "text_loss": 0.41015625 + }, + { + "epoch": 0.95, + "learning_rate": 5.841332167904723e-08, + "loss": 0.4875, + "regression_loss": 0.0, + "step": 11446, + "text_loss": 0.72265625 + }, + { + "epoch": 0.95, + "learning_rate": 5.8214516346237204e-08, + "loss": 0.447, + "regression_loss": 0.0, + "step": 11447, + "text_loss": 0.21484375 + }, + { + "epoch": 0.95, + "learning_rate": 5.801604791883797e-08, + "loss": 0.4912, + "regression_loss": 0.0, + "step": 11448, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 5.7817916410379815e-08, + "loss": 0.5415, + "regression_loss": 0.0, + "step": 11449, + "text_loss": 0.66796875 + }, + { + "epoch": 0.95, + "learning_rate": 5.7620121834370266e-08, + "loss": 0.5554, + "regression_loss": 0.0, + "step": 11450, + "text_loss": 0.54296875 + }, + { + "epoch": 0.95, + "learning_rate": 5.7422664204294655e-08, + "loss": 0.489, + "regression_loss": 0.0, + "step": 11451, + "text_loss": 0.2734375 + }, + { + "epoch": 0.95, + "learning_rate": 5.7225543533613315e-08, + "loss": 0.4492, + "regression_loss": 0.0, + "step": 11452, + "text_loss": 0.310546875 + }, + { + "epoch": 0.95, + "learning_rate": 5.7028759835766615e-08, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 11453, + "text_loss": 0.4765625 + }, + { + "epoch": 0.95, + "learning_rate": 5.6832313124168816e-08, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 11454, + "text_loss": 0.484375 + }, + { + "epoch": 0.95, + "learning_rate": 5.66362034122131e-08, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 11455, + "text_loss": 0.443359375 + }, + { + "epoch": 0.95, + "learning_rate": 5.6440430713269325e-08, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 11456, + "text_loss": 0.50390625 + }, + { + "epoch": 0.95, + "learning_rate": 5.6244995040684594e-08, + "loss": 0.5107, + "regression_loss": 0.0, + "step": 11457, + "text_loss": 0.671875 + }, + { + "epoch": 0.95, + "learning_rate": 5.604989640778158e-08, + "loss": 0.415, + "regression_loss": 0.0, + "step": 11458, + "text_loss": 0.44140625 + }, + { + "epoch": 0.95, + "learning_rate": 5.5855134827862424e-08, + "loss": 0.5266, + "regression_loss": 0.0, + "step": 11459, + "text_loss": 0.412109375 + }, + { + "epoch": 0.95, + "learning_rate": 5.566071031420428e-08, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 11460, + "text_loss": 0.63671875 + }, + { + "epoch": 0.95, + "learning_rate": 5.5466622880062105e-08, + "loss": 0.4073, + "regression_loss": 0.0, + "step": 11461, + "text_loss": 0.30859375 + }, + { + "epoch": 0.95, + "learning_rate": 5.527287253866753e-08, + "loss": 0.4766, + "regression_loss": 0.0, + "step": 11462, + "text_loss": 0.6171875 + }, + { + "epoch": 0.95, + "learning_rate": 5.507945930323e-08, + "loss": 0.4586, + "regression_loss": 0.0, + "step": 11463, + "text_loss": 0.474609375 + }, + { + "epoch": 0.95, + "learning_rate": 5.488638318693451e-08, + "loss": 0.4866, + "regression_loss": 0.0, + "step": 11464, + "text_loss": 0.546875 + }, + { + "epoch": 0.95, + "learning_rate": 5.469364420294554e-08, + "loss": 0.5212, + "regression_loss": 0.0, + "step": 11465, + "text_loss": 0.470703125 + }, + { + "epoch": 0.95, + "learning_rate": 5.4501242364401466e-08, + "loss": 0.4756, + "regression_loss": 0.0, + "step": 11466, + "text_loss": 0.341796875 + }, + { + "epoch": 0.95, + "learning_rate": 5.4309177684420125e-08, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 11467, + "text_loss": 0.51171875 + }, + { + "epoch": 0.95, + "learning_rate": 5.411745017609493e-08, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 11468, + "text_loss": 0.515625 + }, + { + "epoch": 0.95, + "learning_rate": 5.392605985249766e-08, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 11469, + "text_loss": 0.384765625 + }, + { + "epoch": 0.95, + "learning_rate": 5.373500672667564e-08, + "loss": 0.4028, + "regression_loss": 0.0, + "step": 11470, + "text_loss": 0.4453125 + }, + { + "epoch": 0.95, + "learning_rate": 5.354429081165402e-08, + "loss": 0.4468, + "regression_loss": 0.0, + "step": 11471, + "text_loss": 0.53125 + }, + { + "epoch": 0.95, + "learning_rate": 5.335391212043517e-08, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 11472, + "text_loss": 0.474609375 + }, + { + "epoch": 0.95, + "learning_rate": 5.3163870665998154e-08, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 11473, + "text_loss": 0.671875 + }, + { + "epoch": 0.95, + "learning_rate": 5.297416646129816e-08, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 11474, + "text_loss": 0.57421875 + }, + { + "epoch": 0.95, + "learning_rate": 5.2784799519268735e-08, + "loss": 0.4656, + "regression_loss": 0.0, + "step": 11475, + "text_loss": 0.490234375 + }, + { + "epoch": 0.95, + "learning_rate": 5.259576985282066e-08, + "loss": 0.605, + "regression_loss": 0.0, + "step": 11476, + "text_loss": 0.41796875 + }, + { + "epoch": 0.95, + "learning_rate": 5.240707747483975e-08, + "loss": 0.4148, + "regression_loss": 0.0, + "step": 11477, + "text_loss": 0.279296875 + }, + { + "epoch": 0.95, + "learning_rate": 5.221872239819126e-08, + "loss": 0.4441, + "regression_loss": 0.0, + "step": 11478, + "text_loss": 0.447265625 + }, + { + "epoch": 0.95, + "learning_rate": 5.203070463571491e-08, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 11479, + "text_loss": 0.58984375 + }, + { + "epoch": 0.95, + "learning_rate": 5.184302420022991e-08, + "loss": 0.5151, + "regression_loss": 0.0, + "step": 11480, + "text_loss": 0.34375 + }, + { + "epoch": 0.95, + "learning_rate": 5.1655681104531565e-08, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 11481, + "text_loss": 0.6484375 + }, + { + "epoch": 0.95, + "learning_rate": 5.146867536139078e-08, + "loss": 0.4868, + "regression_loss": 0.0, + "step": 11482, + "text_loss": 0.369140625 + }, + { + "epoch": 0.95, + "learning_rate": 5.1282006983557345e-08, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 11483, + "text_loss": 0.47265625 + }, + { + "epoch": 0.95, + "learning_rate": 5.1095675983757196e-08, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 11484, + "text_loss": 0.6328125 + }, + { + "epoch": 0.95, + "learning_rate": 5.0909682374693516e-08, + "loss": 0.438, + "regression_loss": 0.0, + "step": 11485, + "text_loss": 0.66015625 + }, + { + "epoch": 0.95, + "learning_rate": 5.0724026169046706e-08, + "loss": 0.5725, + "regression_loss": 0.0, + "step": 11486, + "text_loss": 0.7578125 + }, + { + "epoch": 0.95, + "learning_rate": 5.053870737947275e-08, + "loss": 0.3904, + "regression_loss": 0.0, + "step": 11487, + "text_loss": 0.26171875 + }, + { + "epoch": 0.95, + "learning_rate": 5.035372601860766e-08, + "loss": 0.4324, + "regression_loss": 0.0, + "step": 11488, + "text_loss": 0.341796875 + }, + { + "epoch": 0.95, + "learning_rate": 5.016908209906024e-08, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 11489, + "text_loss": 0.357421875 + }, + { + "epoch": 0.95, + "learning_rate": 4.998477563341986e-08, + "loss": 0.6079, + "regression_loss": 0.0, + "step": 11490, + "text_loss": 0.4609375 + }, + { + "epoch": 0.96, + "learning_rate": 4.980080663425146e-08, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 11491, + "text_loss": 0.345703125 + }, + { + "epoch": 0.96, + "learning_rate": 4.961717511409725e-08, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 11492, + "text_loss": 0.57421875 + }, + { + "epoch": 0.96, + "learning_rate": 4.9433881085475534e-08, + "loss": 0.4419, + "regression_loss": 0.0, + "step": 11493, + "text_loss": 0.31640625 + }, + { + "epoch": 0.96, + "learning_rate": 4.925092456088299e-08, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 11494, + "text_loss": 0.48828125 + }, + { + "epoch": 0.96, + "learning_rate": 4.90683055527924e-08, + "loss": 0.4388, + "regression_loss": 0.0, + "step": 11495, + "text_loss": 0.267578125 + }, + { + "epoch": 0.96, + "learning_rate": 4.888602407365439e-08, + "loss": 0.4641, + "regression_loss": 0.0, + "step": 11496, + "text_loss": 0.4453125 + }, + { + "epoch": 0.96, + "learning_rate": 4.870408013589512e-08, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 11497, + "text_loss": 0.7265625 + }, + { + "epoch": 0.96, + "learning_rate": 4.85224737519191e-08, + "loss": 0.4563, + "regression_loss": 0.0, + "step": 11498, + "text_loss": 0.4140625 + }, + { + "epoch": 0.96, + "learning_rate": 4.8341204934106433e-08, + "loss": 0.5872, + "regression_loss": 0.0, + "step": 11499, + "text_loss": 0.318359375 + }, + { + "epoch": 0.96, + "learning_rate": 4.8160273694816686e-08, + "loss": 0.499, + "regression_loss": 0.0, + "step": 11500, + "text_loss": 0.52734375 + }, + { + "epoch": 0.96, + "learning_rate": 4.797968004638387e-08, + "loss": 0.5391, + "regression_loss": 0.0, + "step": 11501, + "text_loss": 0.54296875 + }, + { + "epoch": 0.96, + "learning_rate": 4.779942400111981e-08, + "loss": 0.4131, + "regression_loss": 0.0, + "step": 11502, + "text_loss": 0.421875 + }, + { + "epoch": 0.96, + "learning_rate": 4.7619505571313006e-08, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11503, + "text_loss": 0.46875 + }, + { + "epoch": 0.96, + "learning_rate": 4.743992476923087e-08, + "loss": 0.4944, + "regression_loss": 0.0, + "step": 11504, + "text_loss": 0.2734375 + }, + { + "epoch": 0.96, + "learning_rate": 4.726068160711472e-08, + "loss": 0.5459, + "regression_loss": 0.0, + "step": 11505, + "text_loss": 0.46875 + }, + { + "epoch": 0.96, + "learning_rate": 4.708177609718589e-08, + "loss": 0.5012, + "regression_loss": 0.0, + "step": 11506, + "text_loss": 0.703125 + }, + { + "epoch": 0.96, + "learning_rate": 4.690320825163963e-08, + "loss": 0.4597, + "regression_loss": 0.0, + "step": 11507, + "text_loss": 0.51171875 + }, + { + "epoch": 0.96, + "learning_rate": 4.6724978082651195e-08, + "loss": 0.4421, + "regression_loss": 0.0, + "step": 11508, + "text_loss": 0.458984375 + }, + { + "epoch": 0.96, + "learning_rate": 4.654708560237031e-08, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 11509, + "text_loss": 0.55078125 + }, + { + "epoch": 0.96, + "learning_rate": 4.6369530822925615e-08, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 11510, + "text_loss": 0.44140625 + }, + { + "epoch": 0.96, + "learning_rate": 4.6192313756421305e-08, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 11511, + "text_loss": 0.546875 + }, + { + "epoch": 0.96, + "learning_rate": 4.601543441493938e-08, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 11512, + "text_loss": 0.404296875 + }, + { + "epoch": 0.96, + "learning_rate": 4.583889281053855e-08, + "loss": 0.3716, + "regression_loss": 0.0, + "step": 11513, + "text_loss": 0.314453125 + }, + { + "epoch": 0.96, + "learning_rate": 4.566268895525472e-08, + "loss": 0.4512, + "regression_loss": 0.0, + "step": 11514, + "text_loss": 0.5 + }, + { + "epoch": 0.96, + "learning_rate": 4.548682286109995e-08, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 11515, + "text_loss": 0.50390625 + }, + { + "epoch": 0.96, + "learning_rate": 4.531129454006411e-08, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 11516, + "text_loss": 0.5859375 + }, + { + "epoch": 0.96, + "learning_rate": 4.513610400411428e-08, + "loss": 0.4312, + "regression_loss": 0.0, + "step": 11517, + "text_loss": 0.484375 + }, + { + "epoch": 0.96, + "learning_rate": 4.4961251265193686e-08, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 11518, + "text_loss": 0.58984375 + }, + { + "epoch": 0.96, + "learning_rate": 4.4786736335223345e-08, + "loss": 0.447, + "regression_loss": 0.0, + "step": 11519, + "text_loss": 0.27734375 + }, + { + "epoch": 0.96, + "learning_rate": 4.461255922609986e-08, + "loss": 0.4353, + "regression_loss": 0.0, + "step": 11520, + "text_loss": 0.291015625 + }, + { + "epoch": 0.96, + "learning_rate": 4.443871994969817e-08, + "loss": 0.4973, + "regression_loss": 0.0, + "step": 11521, + "text_loss": 0.3046875 + }, + { + "epoch": 0.96, + "learning_rate": 4.4265218517869356e-08, + "loss": 0.5696, + "regression_loss": 0.0, + "step": 11522, + "text_loss": 0.7109375 + }, + { + "epoch": 0.96, + "learning_rate": 4.4092054942442844e-08, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11523, + "text_loss": 0.43359375 + }, + { + "epoch": 0.96, + "learning_rate": 4.3919229235223625e-08, + "loss": 0.4612, + "regression_loss": 0.0, + "step": 11524, + "text_loss": 0.5625 + }, + { + "epoch": 0.96, + "learning_rate": 4.3746741407993954e-08, + "loss": 0.4817, + "regression_loss": 0.0, + "step": 11525, + "text_loss": 0.3671875 + }, + { + "epoch": 0.96, + "learning_rate": 4.3574591472512754e-08, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 11526, + "text_loss": 0.625 + }, + { + "epoch": 0.96, + "learning_rate": 4.340277944051674e-08, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 11527, + "text_loss": 0.287109375 + }, + { + "epoch": 0.96, + "learning_rate": 4.3231305323718774e-08, + "loss": 0.4514, + "regression_loss": 0.0, + "step": 11528, + "text_loss": 0.5234375 + }, + { + "epoch": 0.96, + "learning_rate": 4.306016913381006e-08, + "loss": 0.4824, + "regression_loss": 0.0, + "step": 11529, + "text_loss": 0.34375 + }, + { + "epoch": 0.96, + "learning_rate": 4.288937088245682e-08, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 11530, + "text_loss": 0.439453125 + }, + { + "epoch": 0.96, + "learning_rate": 4.271891058130362e-08, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 11531, + "text_loss": 0.478515625 + }, + { + "epoch": 0.96, + "learning_rate": 4.254878824197117e-08, + "loss": 0.5417, + "regression_loss": 0.0, + "step": 11532, + "text_loss": 0.65625 + }, + { + "epoch": 0.96, + "learning_rate": 4.2379003876057977e-08, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 11533, + "text_loss": 0.5234375 + }, + { + "epoch": 0.96, + "learning_rate": 4.220955749513922e-08, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 11534, + "text_loss": 0.33203125 + }, + { + "epoch": 0.96, + "learning_rate": 4.204044911076621e-08, + "loss": 0.481, + "regression_loss": 0.0, + "step": 11535, + "text_loss": 0.51171875 + }, + { + "epoch": 0.96, + "learning_rate": 4.187167873446807e-08, + "loss": 0.5608, + "regression_loss": 0.0, + "step": 11536, + "text_loss": 0.462890625 + }, + { + "epoch": 0.96, + "learning_rate": 4.1703246377751137e-08, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 11537, + "text_loss": 0.388671875 + }, + { + "epoch": 0.96, + "learning_rate": 4.15351520520979e-08, + "loss": 0.4451, + "regression_loss": 0.0, + "step": 11538, + "text_loss": 0.283203125 + }, + { + "epoch": 0.96, + "learning_rate": 4.136739576896864e-08, + "loss": 0.4397, + "regression_loss": 0.0, + "step": 11539, + "text_loss": 0.494140625 + }, + { + "epoch": 0.96, + "learning_rate": 4.119997753979921e-08, + "loss": 0.4159, + "regression_loss": 0.0, + "step": 11540, + "text_loss": 0.42578125 + }, + { + "epoch": 0.96, + "learning_rate": 4.10328973760038e-08, + "loss": 0.4832, + "regression_loss": 0.0, + "step": 11541, + "text_loss": 0.41796875 + }, + { + "epoch": 0.96, + "learning_rate": 4.086615528897275e-08, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11542, + "text_loss": 0.361328125 + }, + { + "epoch": 0.96, + "learning_rate": 4.0699751290074755e-08, + "loss": 0.4602, + "regression_loss": 0.0, + "step": 11543, + "text_loss": 0.6484375 + }, + { + "epoch": 0.96, + "learning_rate": 4.05336853906535e-08, + "loss": 0.4553, + "regression_loss": 0.0, + "step": 11544, + "text_loss": 0.5 + }, + { + "epoch": 0.96, + "learning_rate": 4.036795760203049e-08, + "loss": 0.5364, + "regression_loss": 0.0, + "step": 11545, + "text_loss": 0.443359375 + }, + { + "epoch": 0.96, + "learning_rate": 4.020256793550448e-08, + "loss": 0.4498, + "regression_loss": 0.0, + "step": 11546, + "text_loss": 0.70703125 + }, + { + "epoch": 0.96, + "learning_rate": 4.0037516402350876e-08, + "loss": 0.481, + "regression_loss": 0.0, + "step": 11547, + "text_loss": 0.5078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.987280301382124e-08, + "loss": 0.4609, + "regression_loss": 0.0, + "step": 11548, + "text_loss": 0.66015625 + }, + { + "epoch": 0.96, + "learning_rate": 3.970842778114603e-08, + "loss": 0.5062, + "regression_loss": 0.0, + "step": 11549, + "text_loss": 0.6796875 + }, + { + "epoch": 0.96, + "learning_rate": 3.9544390715530737e-08, + "loss": 0.4187, + "regression_loss": 0.0, + "step": 11550, + "text_loss": 0.30078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.938069182815918e-08, + "loss": 0.4464, + "regression_loss": 0.0, + "step": 11551, + "text_loss": 0.1943359375 + }, + { + "epoch": 0.96, + "learning_rate": 3.921733113019077e-08, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 11552, + "text_loss": 0.66015625 + }, + { + "epoch": 0.96, + "learning_rate": 3.9054308632763826e-08, + "loss": 0.4392, + "regression_loss": 0.0, + "step": 11553, + "text_loss": 0.333984375 + }, + { + "epoch": 0.96, + "learning_rate": 3.889162434699056e-08, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 11554, + "text_loss": 0.49609375 + }, + { + "epoch": 0.96, + "learning_rate": 3.872927828396322e-08, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 11555, + "text_loss": 0.6796875 + }, + { + "epoch": 0.96, + "learning_rate": 3.856727045474962e-08, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 11556, + "text_loss": 0.359375 + }, + { + "epoch": 0.96, + "learning_rate": 3.8405600870394246e-08, + "loss": 0.4536, + "regression_loss": 0.0, + "step": 11557, + "text_loss": 0.50390625 + }, + { + "epoch": 0.96, + "learning_rate": 3.8244269541919396e-08, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 11558, + "text_loss": 0.455078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.8083276480323505e-08, + "loss": 0.4801, + "regression_loss": 0.0, + "step": 11559, + "text_loss": 0.423828125 + }, + { + "epoch": 0.96, + "learning_rate": 3.7922621696581674e-08, + "loss": 0.5044, + "regression_loss": 0.0, + "step": 11560, + "text_loss": 0.7265625 + }, + { + "epoch": 0.96, + "learning_rate": 3.776230520164736e-08, + "loss": 0.392, + "regression_loss": 0.0, + "step": 11561, + "text_loss": 0.2138671875 + }, + { + "epoch": 0.96, + "learning_rate": 3.7602327006450166e-08, + "loss": 0.5334, + "regression_loss": 0.0, + "step": 11562, + "text_loss": 0.58984375 + }, + { + "epoch": 0.96, + "learning_rate": 3.7442687121895806e-08, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 11563, + "text_loss": 0.279296875 + }, + { + "epoch": 0.96, + "learning_rate": 3.7283385558868346e-08, + "loss": 0.5532, + "regression_loss": 0.0, + "step": 11564, + "text_loss": 0.443359375 + }, + { + "epoch": 0.96, + "learning_rate": 3.712442232822799e-08, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 11565, + "text_loss": 0.5234375 + }, + { + "epoch": 0.96, + "learning_rate": 3.6965797440811635e-08, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 11566, + "text_loss": 0.439453125 + }, + { + "epoch": 0.96, + "learning_rate": 3.680751090743395e-08, + "loss": 0.4348, + "regression_loss": 0.0, + "step": 11567, + "text_loss": 0.33984375 + }, + { + "epoch": 0.96, + "learning_rate": 3.6649562738886315e-08, + "loss": 0.4409, + "regression_loss": 0.0, + "step": 11568, + "text_loss": 0.6484375 + }, + { + "epoch": 0.96, + "learning_rate": 3.649195294593622e-08, + "loss": 0.5117, + "regression_loss": 0.0, + "step": 11569, + "text_loss": 0.56640625 + }, + { + "epoch": 0.96, + "learning_rate": 3.6334681539328954e-08, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 11570, + "text_loss": 0.859375 + }, + { + "epoch": 0.96, + "learning_rate": 3.6177748529786505e-08, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 11571, + "text_loss": 0.427734375 + }, + { + "epoch": 0.96, + "learning_rate": 3.602115392800809e-08, + "loss": 0.5562, + "regression_loss": 0.0, + "step": 11572, + "text_loss": 0.50390625 + }, + { + "epoch": 0.96, + "learning_rate": 3.58648977446685e-08, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 11573, + "text_loss": 0.50390625 + }, + { + "epoch": 0.96, + "learning_rate": 3.5708979990422e-08, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 11574, + "text_loss": 0.62109375 + }, + { + "epoch": 0.96, + "learning_rate": 3.5553400675896746e-08, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 11575, + "text_loss": 0.369140625 + }, + { + "epoch": 0.96, + "learning_rate": 3.539815981170036e-08, + "loss": 0.4789, + "regression_loss": 0.0, + "step": 11576, + "text_loss": 0.2490234375 + }, + { + "epoch": 0.96, + "learning_rate": 3.5243257408415497e-08, + "loss": 0.4673, + "regression_loss": 0.0, + "step": 11577, + "text_loss": 0.66015625 + }, + { + "epoch": 0.96, + "learning_rate": 3.508869347660371e-08, + "loss": 0.3724, + "regression_loss": 0.0, + "step": 11578, + "text_loss": 0.330078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.4934468026801007e-08, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 11579, + "text_loss": 0.64453125 + }, + { + "epoch": 0.96, + "learning_rate": 3.478058106952287e-08, + "loss": 0.462, + "regression_loss": 0.0, + "step": 11580, + "text_loss": 0.6640625 + }, + { + "epoch": 0.96, + "learning_rate": 3.462703261525979e-08, + "loss": 0.3571, + "regression_loss": 0.0, + "step": 11581, + "text_loss": 0.298828125 + }, + { + "epoch": 0.96, + "learning_rate": 3.447382267448063e-08, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 11582, + "text_loss": 0.390625 + }, + { + "epoch": 0.96, + "learning_rate": 3.43209512576298e-08, + "loss": 0.51, + "regression_loss": 0.0, + "step": 11583, + "text_loss": 0.62890625 + }, + { + "epoch": 0.96, + "learning_rate": 3.416841837512952e-08, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 11584, + "text_loss": 0.5703125 + }, + { + "epoch": 0.96, + "learning_rate": 3.40162240373787e-08, + "loss": 0.4907, + "regression_loss": 0.0, + "step": 11585, + "text_loss": 0.73828125 + }, + { + "epoch": 0.96, + "learning_rate": 3.3864368254752924e-08, + "loss": 0.4863, + "regression_loss": 0.0, + "step": 11586, + "text_loss": 0.6015625 + }, + { + "epoch": 0.96, + "learning_rate": 3.371285103760502e-08, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 11587, + "text_loss": 0.396484375 + }, + { + "epoch": 0.96, + "learning_rate": 3.356167239626507e-08, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 11588, + "text_loss": 0.32421875 + }, + { + "epoch": 0.96, + "learning_rate": 3.3410832341039254e-08, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 11589, + "text_loss": 0.51953125 + }, + { + "epoch": 0.96, + "learning_rate": 3.326033088221159e-08, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 11590, + "text_loss": 0.265625 + }, + { + "epoch": 0.96, + "learning_rate": 3.311016803004108e-08, + "loss": 0.5071, + "regression_loss": 0.0, + "step": 11591, + "text_loss": 0.51171875 + }, + { + "epoch": 0.96, + "learning_rate": 3.296034379476676e-08, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 11592, + "text_loss": 0.369140625 + }, + { + "epoch": 0.96, + "learning_rate": 3.281085818660212e-08, + "loss": 0.5432, + "regression_loss": 0.0, + "step": 11593, + "text_loss": 0.6640625 + }, + { + "epoch": 0.96, + "learning_rate": 3.2661711215738467e-08, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 11594, + "text_loss": 0.5234375 + }, + { + "epoch": 0.96, + "learning_rate": 3.251290289234321e-08, + "loss": 0.4886, + "regression_loss": 0.0, + "step": 11595, + "text_loss": 0.57421875 + }, + { + "epoch": 0.96, + "learning_rate": 3.236443322656213e-08, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 11596, + "text_loss": 0.5078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.221630222851713e-08, + "loss": 0.4163, + "regression_loss": 0.0, + "step": 11597, + "text_loss": 0.314453125 + }, + { + "epoch": 0.96, + "learning_rate": 3.206850990830623e-08, + "loss": 0.4408, + "regression_loss": 0.0, + "step": 11598, + "text_loss": 0.8125 + }, + { + "epoch": 0.96, + "learning_rate": 3.192105627600639e-08, + "loss": 0.4395, + "regression_loss": 0.0, + "step": 11599, + "text_loss": 0.55078125 + }, + { + "epoch": 0.96, + "learning_rate": 3.1773941341669e-08, + "loss": 0.4666, + "regression_loss": 0.0, + "step": 11600, + "text_loss": 0.435546875 + }, + { + "epoch": 0.96, + "learning_rate": 3.162716511532382e-08, + "loss": 0.5527, + "regression_loss": 0.0, + "step": 11601, + "text_loss": 0.486328125 + }, + { + "epoch": 0.96, + "learning_rate": 3.14807276069784e-08, + "loss": 0.4187, + "regression_loss": 0.0, + "step": 11602, + "text_loss": 0.458984375 + }, + { + "epoch": 0.96, + "learning_rate": 3.133462882661476e-08, + "loss": 0.4351, + "regression_loss": 0.0, + "step": 11603, + "text_loss": 0.427734375 + }, + { + "epoch": 0.96, + "learning_rate": 3.11888687841938e-08, + "loss": 0.5146, + "regression_loss": 0.0, + "step": 11604, + "text_loss": 0.6171875 + }, + { + "epoch": 0.96, + "learning_rate": 3.1043447489652603e-08, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 11605, + "text_loss": 0.44921875 + }, + { + "epoch": 0.96, + "learning_rate": 3.0898364952905435e-08, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 11606, + "text_loss": 0.46875 + }, + { + "epoch": 0.96, + "learning_rate": 3.0753621183842175e-08, + "loss": 0.5642, + "regression_loss": 0.0, + "step": 11607, + "text_loss": 0.453125 + }, + { + "epoch": 0.96, + "learning_rate": 3.06092161923327e-08, + "loss": 0.512, + "regression_loss": 0.0, + "step": 11608, + "text_loss": 0.466796875 + }, + { + "epoch": 0.96, + "learning_rate": 3.04651499882197e-08, + "loss": 0.45, + "regression_loss": 0.0, + "step": 11609, + "text_loss": 0.462890625 + }, + { + "epoch": 0.96, + "learning_rate": 3.032142258132642e-08, + "loss": 0.4465, + "regression_loss": 0.0, + "step": 11610, + "text_loss": 0.52734375 + }, + { + "epoch": 0.97, + "learning_rate": 3.0178033981450585e-08, + "loss": 0.5568, + "regression_loss": 0.0, + "step": 11611, + "text_loss": 0.671875 + }, + { + "epoch": 0.97, + "learning_rate": 3.003498419836825e-08, + "loss": 0.3912, + "regression_loss": 0.0, + "step": 11612, + "text_loss": 0.59765625 + }, + { + "epoch": 0.97, + "learning_rate": 2.989227324183164e-08, + "loss": 0.4524, + "regression_loss": 0.0, + "step": 11613, + "text_loss": 0.38671875 + }, + { + "epoch": 0.97, + "learning_rate": 2.9749901121569057e-08, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 11614, + "text_loss": 0.3046875 + }, + { + "epoch": 0.97, + "learning_rate": 2.9607867847288864e-08, + "loss": 0.4159, + "regression_loss": 0.0, + "step": 11615, + "text_loss": 0.61328125 + }, + { + "epoch": 0.97, + "learning_rate": 2.9466173428672197e-08, + "loss": 0.5439, + "regression_loss": 0.0, + "step": 11616, + "text_loss": 0.52734375 + }, + { + "epoch": 0.97, + "learning_rate": 2.9324817875379663e-08, + "loss": 0.481, + "regression_loss": 0.0, + "step": 11617, + "text_loss": 0.314453125 + }, + { + "epoch": 0.97, + "learning_rate": 2.9183801197048002e-08, + "loss": 0.5027, + "regression_loss": 0.0, + "step": 11618, + "text_loss": 0.6171875 + }, + { + "epoch": 0.97, + "learning_rate": 2.904312340329174e-08, + "loss": 0.4369, + "regression_loss": 0.0, + "step": 11619, + "text_loss": 0.314453125 + }, + { + "epoch": 0.97, + "learning_rate": 2.8902784503700988e-08, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 11620, + "text_loss": 0.44140625 + }, + { + "epoch": 0.97, + "learning_rate": 2.8762784507843088e-08, + "loss": 0.511, + "regression_loss": 0.0, + "step": 11621, + "text_loss": 0.5 + }, + { + "epoch": 0.97, + "learning_rate": 2.862312342526319e-08, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 11622, + "text_loss": 0.56640625 + }, + { + "epoch": 0.97, + "learning_rate": 2.8483801265482005e-08, + "loss": 0.4548, + "regression_loss": 0.0, + "step": 11623, + "text_loss": 0.3671875 + }, + { + "epoch": 0.97, + "learning_rate": 2.8344818037998045e-08, + "loss": 0.5037, + "regression_loss": 0.0, + "step": 11624, + "text_loss": 0.470703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.8206173752287068e-08, + "loss": 0.4417, + "regression_loss": 0.0, + "step": 11625, + "text_loss": 0.388671875 + }, + { + "epoch": 0.97, + "learning_rate": 2.806786841779985e-08, + "loss": 0.4155, + "regression_loss": 0.0, + "step": 11626, + "text_loss": 0.408203125 + }, + { + "epoch": 0.97, + "learning_rate": 2.792990204396606e-08, + "loss": 0.5046, + "regression_loss": 0.0, + "step": 11627, + "text_loss": 0.7890625 + }, + { + "epoch": 0.97, + "learning_rate": 2.7792274640192074e-08, + "loss": 0.522, + "regression_loss": 0.0, + "step": 11628, + "text_loss": 0.470703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.765498621585927e-08, + "loss": 0.4316, + "regression_loss": 0.0, + "step": 11629, + "text_loss": 0.453125 + }, + { + "epoch": 0.97, + "learning_rate": 2.7518036780328494e-08, + "loss": 0.4, + "regression_loss": 0.0, + "step": 11630, + "text_loss": 0.2373046875 + }, + { + "epoch": 0.97, + "learning_rate": 2.7381426342936167e-08, + "loss": 0.5049, + "regression_loss": 0.0, + "step": 11631, + "text_loss": 0.478515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.7245154912994844e-08, + "loss": 0.4586, + "regression_loss": 0.0, + "step": 11632, + "text_loss": 0.47265625 + }, + { + "epoch": 0.97, + "learning_rate": 2.7109222499795416e-08, + "loss": 0.491, + "regression_loss": 0.0, + "step": 11633, + "text_loss": 0.55859375 + }, + { + "epoch": 0.97, + "learning_rate": 2.6973629112604925e-08, + "loss": 0.4651, + "regression_loss": 0.0, + "step": 11634, + "text_loss": 0.53515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.683837476066764e-08, + "loss": 0.551, + "regression_loss": 0.0, + "step": 11635, + "text_loss": 0.54296875 + }, + { + "epoch": 0.97, + "learning_rate": 2.670345945320396e-08, + "loss": 0.3525, + "regression_loss": 0.0, + "step": 11636, + "text_loss": 0.640625 + }, + { + "epoch": 0.97, + "learning_rate": 2.6568883199412643e-08, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 11637, + "text_loss": 0.306640625 + }, + { + "epoch": 0.97, + "learning_rate": 2.643464600846746e-08, + "loss": 0.5925, + "regression_loss": 0.0, + "step": 11638, + "text_loss": 0.3984375 + }, + { + "epoch": 0.97, + "learning_rate": 2.630074788951997e-08, + "loss": 0.4658, + "regression_loss": 0.0, + "step": 11639, + "text_loss": 0.6875 + }, + { + "epoch": 0.97, + "learning_rate": 2.6167188851699555e-08, + "loss": 0.4586, + "regression_loss": 0.0, + "step": 11640, + "text_loss": 0.279296875 + }, + { + "epoch": 0.97, + "learning_rate": 2.6033968904110583e-08, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 11641, + "text_loss": 0.4765625 + }, + { + "epoch": 0.97, + "learning_rate": 2.59010880558358e-08, + "loss": 0.5034, + "regression_loss": 0.0, + "step": 11642, + "text_loss": 0.53515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.576854631593517e-08, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 11643, + "text_loss": 0.330078125 + }, + { + "epoch": 0.97, + "learning_rate": 2.5636343693443145e-08, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 11644, + "text_loss": 0.74609375 + }, + { + "epoch": 0.97, + "learning_rate": 2.5504480197373614e-08, + "loss": 0.4741, + "regression_loss": 0.0, + "step": 11645, + "text_loss": 0.458984375 + }, + { + "epoch": 0.97, + "learning_rate": 2.5372955836715507e-08, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 11646, + "text_loss": 0.7109375 + }, + { + "epoch": 0.97, + "learning_rate": 2.5241770620436647e-08, + "loss": 0.5356, + "regression_loss": 0.0, + "step": 11647, + "text_loss": 0.62109375 + }, + { + "epoch": 0.97, + "learning_rate": 2.511092455747932e-08, + "loss": 0.5535, + "regression_loss": 0.0, + "step": 11648, + "text_loss": 0.63671875 + }, + { + "epoch": 0.97, + "learning_rate": 2.4980417656765286e-08, + "loss": 0.4189, + "regression_loss": 0.0, + "step": 11649, + "text_loss": 0.53515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.4850249927190763e-08, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 11650, + "text_loss": 0.490234375 + }, + { + "epoch": 0.97, + "learning_rate": 2.472042137763031e-08, + "loss": 0.458, + "regression_loss": 0.0, + "step": 11651, + "text_loss": 0.4296875 + }, + { + "epoch": 0.97, + "learning_rate": 2.4590932016935186e-08, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 11652, + "text_loss": 0.283203125 + }, + { + "epoch": 0.97, + "learning_rate": 2.4461781853932776e-08, + "loss": 0.4822, + "regression_loss": 0.0, + "step": 11653, + "text_loss": 0.5703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.43329708974277e-08, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 11654, + "text_loss": 0.546875 + }, + { + "epoch": 0.97, + "learning_rate": 2.420449915620293e-08, + "loss": 0.5781, + "regression_loss": 0.0, + "step": 11655, + "text_loss": 0.75390625 + }, + { + "epoch": 0.97, + "learning_rate": 2.4076366639015914e-08, + "loss": 0.5332, + "regression_loss": 0.0, + "step": 11656, + "text_loss": 0.58984375 + }, + { + "epoch": 0.97, + "learning_rate": 2.3948573354601877e-08, + "loss": 0.5073, + "regression_loss": 0.0, + "step": 11657, + "text_loss": 0.5234375 + }, + { + "epoch": 0.97, + "learning_rate": 2.3821119311673856e-08, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 11658, + "text_loss": 0.34375 + }, + { + "epoch": 0.97, + "learning_rate": 2.3694004518920456e-08, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 11659, + "text_loss": 0.56640625 + }, + { + "epoch": 0.97, + "learning_rate": 2.356722898500752e-08, + "loss": 0.6028, + "regression_loss": 0.0, + "step": 11660, + "text_loss": 0.609375 + }, + { + "epoch": 0.97, + "learning_rate": 2.344079271857924e-08, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 11661, + "text_loss": 0.5703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.3314695728254287e-08, + "loss": 0.4202, + "regression_loss": 0.0, + "step": 11662, + "text_loss": 0.443359375 + }, + { + "epoch": 0.97, + "learning_rate": 2.3188938022629114e-08, + "loss": 0.4, + "regression_loss": 0.0, + "step": 11663, + "text_loss": 0.349609375 + }, + { + "epoch": 0.97, + "learning_rate": 2.3063519610277974e-08, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 11664, + "text_loss": 0.34375 + }, + { + "epoch": 0.97, + "learning_rate": 2.293844049975069e-08, + "loss": 0.3958, + "regression_loss": 0.0, + "step": 11665, + "text_loss": 0.65234375 + }, + { + "epoch": 0.97, + "learning_rate": 2.2813700699574893e-08, + "loss": 0.4871, + "regression_loss": 0.0, + "step": 11666, + "text_loss": 0.40625 + }, + { + "epoch": 0.97, + "learning_rate": 2.2689300218253774e-08, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 11667, + "text_loss": 0.515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.2565239064269995e-08, + "loss": 0.4735, + "regression_loss": 0.0, + "step": 11668, + "text_loss": 0.5390625 + }, + { + "epoch": 0.97, + "learning_rate": 2.2441517246080126e-08, + "loss": 0.4448, + "regression_loss": 0.0, + "step": 11669, + "text_loss": 0.47265625 + }, + { + "epoch": 0.97, + "learning_rate": 2.2318134772119638e-08, + "loss": 0.4138, + "regression_loss": 0.0, + "step": 11670, + "text_loss": 0.279296875 + }, + { + "epoch": 0.97, + "learning_rate": 2.2195091650799028e-08, + "loss": 0.4949, + "regression_loss": 0.0, + "step": 11671, + "text_loss": 0.609375 + }, + { + "epoch": 0.97, + "learning_rate": 2.207238789050825e-08, + "loss": 0.4763, + "regression_loss": 0.0, + "step": 11672, + "text_loss": 0.45703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.1950023499610617e-08, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 11673, + "text_loss": 0.62890625 + }, + { + "epoch": 0.97, + "learning_rate": 2.1827998486450563e-08, + "loss": 0.4702, + "regression_loss": 0.0, + "step": 11674, + "text_loss": 0.53515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.1706312859345325e-08, + "loss": 0.449, + "regression_loss": 0.0, + "step": 11675, + "text_loss": 0.3359375 + }, + { + "epoch": 0.97, + "learning_rate": 2.1584966626592153e-08, + "loss": 0.4951, + "regression_loss": 0.0, + "step": 11676, + "text_loss": 0.546875 + }, + { + "epoch": 0.97, + "learning_rate": 2.1463959796462764e-08, + "loss": 0.4177, + "regression_loss": 0.0, + "step": 11677, + "text_loss": 0.59765625 + }, + { + "epoch": 0.97, + "learning_rate": 2.1343292377207225e-08, + "loss": 0.4138, + "regression_loss": 0.0, + "step": 11678, + "text_loss": 0.296875 + }, + { + "epoch": 0.97, + "learning_rate": 2.1222964377051736e-08, + "loss": 0.3656, + "regression_loss": 0.0, + "step": 11679, + "text_loss": 0.28515625 + }, + { + "epoch": 0.97, + "learning_rate": 2.1102975804200287e-08, + "loss": 0.5322, + "regression_loss": 0.0, + "step": 11680, + "text_loss": 0.431640625 + }, + { + "epoch": 0.97, + "learning_rate": 2.0983326666831894e-08, + "loss": 0.469, + "regression_loss": 0.0, + "step": 11681, + "text_loss": 0.412109375 + }, + { + "epoch": 0.97, + "learning_rate": 2.086401697310503e-08, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 11682, + "text_loss": 0.7109375 + }, + { + "epoch": 0.97, + "learning_rate": 2.074504673115263e-08, + "loss": 0.5005, + "regression_loss": 0.0, + "step": 11683, + "text_loss": 0.361328125 + }, + { + "epoch": 0.97, + "learning_rate": 2.0626415949086543e-08, + "loss": 0.5088, + "regression_loss": 0.0, + "step": 11684, + "text_loss": 0.71484375 + }, + { + "epoch": 0.97, + "learning_rate": 2.0508124634993076e-08, + "loss": 0.4009, + "regression_loss": 0.0, + "step": 11685, + "text_loss": 0.39453125 + }, + { + "epoch": 0.97, + "learning_rate": 2.0390172796937448e-08, + "loss": 0.5657, + "regression_loss": 0.0, + "step": 11686, + "text_loss": 0.62890625 + }, + { + "epoch": 0.97, + "learning_rate": 2.0272560442960443e-08, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 11687, + "text_loss": 0.703125 + }, + { + "epoch": 0.97, + "learning_rate": 2.0155287581081208e-08, + "loss": 0.5198, + "regression_loss": 0.0, + "step": 11688, + "text_loss": 0.375 + }, + { + "epoch": 0.97, + "learning_rate": 2.00383542192939e-08, + "loss": 0.4541, + "regression_loss": 0.0, + "step": 11689, + "text_loss": 0.37109375 + }, + { + "epoch": 0.97, + "learning_rate": 1.992176036557103e-08, + "loss": 0.5122, + "regression_loss": 0.0, + "step": 11690, + "text_loss": 0.42578125 + }, + { + "epoch": 0.97, + "learning_rate": 1.9805506027861243e-08, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 11691, + "text_loss": 0.5 + }, + { + "epoch": 0.97, + "learning_rate": 1.9689591214089865e-08, + "loss": 0.4575, + "regression_loss": 0.0, + "step": 11692, + "text_loss": 0.64453125 + }, + { + "epoch": 0.97, + "learning_rate": 1.957401593216002e-08, + "loss": 0.5254, + "regression_loss": 0.0, + "step": 11693, + "text_loss": 0.58203125 + }, + { + "epoch": 0.97, + "learning_rate": 1.9458780189949843e-08, + "loss": 0.5967, + "regression_loss": 0.0, + "step": 11694, + "text_loss": 0.60546875 + }, + { + "epoch": 0.97, + "learning_rate": 1.934388399531639e-08, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 11695, + "text_loss": 0.55859375 + }, + { + "epoch": 0.97, + "learning_rate": 1.9229327356093397e-08, + "loss": 0.4495, + "regression_loss": 0.0, + "step": 11696, + "text_loss": 0.55078125 + }, + { + "epoch": 0.97, + "learning_rate": 1.9115110280089054e-08, + "loss": 0.4172, + "regression_loss": 0.0, + "step": 11697, + "text_loss": 0.369140625 + }, + { + "epoch": 0.97, + "learning_rate": 1.9001232775091028e-08, + "loss": 0.5208, + "regression_loss": 0.0, + "step": 11698, + "text_loss": 0.63671875 + }, + { + "epoch": 0.97, + "learning_rate": 1.8887694848863103e-08, + "loss": 0.5159, + "regression_loss": 0.0, + "step": 11699, + "text_loss": 0.546875 + }, + { + "epoch": 0.97, + "learning_rate": 1.87744965091452e-08, + "loss": 0.4739, + "regression_loss": 0.0, + "step": 11700, + "text_loss": 0.5078125 + }, + { + "epoch": 0.97, + "learning_rate": 1.8661637763654484e-08, + "loss": 0.4976, + "regression_loss": 0.0, + "step": 11701, + "text_loss": 0.3828125 + }, + { + "epoch": 0.97, + "learning_rate": 1.854911862008646e-08, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 11702, + "text_loss": 0.474609375 + }, + { + "epoch": 0.97, + "learning_rate": 1.8436939086109993e-08, + "loss": 0.5061, + "regression_loss": 0.0, + "step": 11703, + "text_loss": 0.330078125 + }, + { + "epoch": 0.97, + "learning_rate": 1.832509916937397e-08, + "loss": 0.5498, + "regression_loss": 0.0, + "step": 11704, + "text_loss": 0.462890625 + }, + { + "epoch": 0.97, + "learning_rate": 1.8213598877503956e-08, + "loss": 0.5249, + "regression_loss": 0.0, + "step": 11705, + "text_loss": 0.65625 + }, + { + "epoch": 0.97, + "learning_rate": 1.8102438218099982e-08, + "loss": 0.449, + "regression_loss": 0.0, + "step": 11706, + "text_loss": 0.431640625 + }, + { + "epoch": 0.97, + "learning_rate": 1.7991617198740985e-08, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 11707, + "text_loss": 0.42578125 + }, + { + "epoch": 0.97, + "learning_rate": 1.7881135826982033e-08, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 11708, + "text_loss": 0.609375 + }, + { + "epoch": 0.97, + "learning_rate": 1.777099411035543e-08, + "loss": 0.6497, + "regression_loss": 0.0, + "step": 11709, + "text_loss": 0.4921875 + }, + { + "epoch": 0.97, + "learning_rate": 1.7661192056369626e-08, + "loss": 0.5691, + "regression_loss": 0.0, + "step": 11710, + "text_loss": 0.7890625 + }, + { + "epoch": 0.97, + "learning_rate": 1.7551729672511397e-08, + "loss": 0.4132, + "regression_loss": 0.0, + "step": 11711, + "text_loss": 0.328125 + }, + { + "epoch": 0.97, + "learning_rate": 1.7442606966242005e-08, + "loss": 0.4574, + "regression_loss": 0.0, + "step": 11712, + "text_loss": 0.546875 + }, + { + "epoch": 0.97, + "learning_rate": 1.733382394500216e-08, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 11713, + "text_loss": 0.52734375 + }, + { + "epoch": 0.97, + "learning_rate": 1.722538061620649e-08, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 11714, + "text_loss": 0.365234375 + }, + { + "epoch": 0.97, + "learning_rate": 1.7117276987250188e-08, + "loss": 0.4729, + "regression_loss": 0.0, + "step": 11715, + "text_loss": 0.48046875 + }, + { + "epoch": 0.97, + "learning_rate": 1.7009513065501248e-08, + "loss": 0.3792, + "regression_loss": 0.0, + "step": 11716, + "text_loss": 0.2236328125 + }, + { + "epoch": 0.97, + "learning_rate": 1.690208885830713e-08, + "loss": 0.4219, + "regression_loss": 0.0, + "step": 11717, + "text_loss": 0.53515625 + }, + { + "epoch": 0.97, + "learning_rate": 1.6795004372991975e-08, + "loss": 0.5884, + "regression_loss": 0.0, + "step": 11718, + "text_loss": 0.66796875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6688259616856052e-08, + "loss": 0.4281, + "regression_loss": 0.0, + "step": 11719, + "text_loss": 0.408203125 + }, + { + "epoch": 0.97, + "learning_rate": 1.6581854597176316e-08, + "loss": 0.4902, + "regression_loss": 0.0, + "step": 11720, + "text_loss": 0.546875 + }, + { + "epoch": 0.97, + "learning_rate": 1.647578932120697e-08, + "loss": 0.53, + "regression_loss": 0.0, + "step": 11721, + "text_loss": 0.65625 + }, + { + "epoch": 0.97, + "learning_rate": 1.637006379617945e-08, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 11722, + "text_loss": 0.59765625 + }, + { + "epoch": 0.97, + "learning_rate": 1.6264678029300763e-08, + "loss": 0.3951, + "regression_loss": 0.0, + "step": 11723, + "text_loss": 0.46484375 + }, + { + "epoch": 0.97, + "learning_rate": 1.615963202775628e-08, + "loss": 0.5293, + "regression_loss": 0.0, + "step": 11724, + "text_loss": 0.55859375 + }, + { + "epoch": 0.97, + "learning_rate": 1.6054925798707487e-08, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 11725, + "text_loss": 0.6015625 + }, + { + "epoch": 0.97, + "learning_rate": 1.5950559349292015e-08, + "loss": 0.4709, + "regression_loss": 0.0, + "step": 11726, + "text_loss": 0.4765625 + }, + { + "epoch": 0.97, + "learning_rate": 1.5846532686625837e-08, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 11727, + "text_loss": 0.53125 + }, + { + "epoch": 0.97, + "learning_rate": 1.57428458178005e-08, + "loss": 0.5808, + "regression_loss": 0.0, + "step": 11728, + "text_loss": 0.94921875 + }, + { + "epoch": 0.97, + "learning_rate": 1.5639498749884795e-08, + "loss": 0.488, + "regression_loss": 0.0, + "step": 11729, + "text_loss": 0.5 + }, + { + "epoch": 0.97, + "learning_rate": 1.5536491489924755e-08, + "loss": 0.4993, + "regression_loss": 0.0, + "step": 11730, + "text_loss": 0.609375 + }, + { + "epoch": 0.97, + "learning_rate": 1.543382404494309e-08, + "loss": 0.5007, + "regression_loss": 0.0, + "step": 11731, + "text_loss": 0.28125 + }, + { + "epoch": 0.98, + "learning_rate": 1.5331496421938097e-08, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 11732, + "text_loss": 0.361328125 + }, + { + "epoch": 0.98, + "learning_rate": 1.522950862788697e-08, + "loss": 0.4622, + "regression_loss": 0.0, + "step": 11733, + "text_loss": 0.63671875 + }, + { + "epoch": 0.98, + "learning_rate": 1.5127860669741924e-08, + "loss": 0.3768, + "regression_loss": 0.0, + "step": 11734, + "text_loss": 0.27734375 + }, + { + "epoch": 0.98, + "learning_rate": 1.5026552554433528e-08, + "loss": 0.4131, + "regression_loss": 0.0, + "step": 11735, + "text_loss": 0.2890625 + }, + { + "epoch": 0.98, + "learning_rate": 1.492558428886792e-08, + "loss": 0.4556, + "regression_loss": 0.0, + "step": 11736, + "text_loss": 0.43359375 + }, + { + "epoch": 0.98, + "learning_rate": 1.4824955879929048e-08, + "loss": 0.5842, + "regression_loss": 0.0, + "step": 11737, + "text_loss": 0.3125 + }, + { + "epoch": 0.98, + "learning_rate": 1.4724667334476417e-08, + "loss": 0.4485, + "regression_loss": 0.0, + "step": 11738, + "text_loss": 0.51953125 + }, + { + "epoch": 0.98, + "learning_rate": 1.4624718659348447e-08, + "loss": 0.4355, + "regression_loss": 0.0, + "step": 11739, + "text_loss": 0.62109375 + }, + { + "epoch": 0.98, + "learning_rate": 1.4525109861358022e-08, + "loss": 0.4089, + "regression_loss": 0.0, + "step": 11740, + "text_loss": 0.435546875 + }, + { + "epoch": 0.98, + "learning_rate": 1.4425840947296376e-08, + "loss": 0.5461, + "regression_loss": 0.0, + "step": 11741, + "text_loss": 0.4296875 + }, + { + "epoch": 0.98, + "learning_rate": 1.4326911923930874e-08, + "loss": 0.4624, + "regression_loss": 0.0, + "step": 11742, + "text_loss": 0.671875 + }, + { + "epoch": 0.98, + "learning_rate": 1.4228322798006677e-08, + "loss": 0.5518, + "regression_loss": 0.0, + "step": 11743, + "text_loss": 0.5625 + }, + { + "epoch": 0.98, + "learning_rate": 1.4130073576244518e-08, + "loss": 0.5076, + "regression_loss": 0.0, + "step": 11744, + "text_loss": 0.53515625 + }, + { + "epoch": 0.98, + "learning_rate": 1.4032164265342374e-08, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 11745, + "text_loss": 0.48046875 + }, + { + "epoch": 0.98, + "learning_rate": 1.393459487197546e-08, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 11746, + "text_loss": 0.361328125 + }, + { + "epoch": 0.98, + "learning_rate": 1.3837365402795678e-08, + "loss": 0.5337, + "regression_loss": 0.0, + "step": 11747, + "text_loss": 0.482421875 + }, + { + "epoch": 0.98, + "learning_rate": 1.3740475864431613e-08, + "loss": 0.4634, + "regression_loss": 0.0, + "step": 11748, + "text_loss": 0.37890625 + }, + { + "epoch": 0.98, + "learning_rate": 1.3643926263489094e-08, + "loss": 0.5625, + "regression_loss": 0.0, + "step": 11749, + "text_loss": 0.62109375 + }, + { + "epoch": 0.98, + "learning_rate": 1.3547716606548967e-08, + "loss": 0.499, + "regression_loss": 0.0, + "step": 11750, + "text_loss": 0.498046875 + }, + { + "epoch": 0.98, + "learning_rate": 1.3451846900171539e-08, + "loss": 0.5321, + "regression_loss": 0.0, + "step": 11751, + "text_loss": 0.2177734375 + }, + { + "epoch": 0.98, + "learning_rate": 1.3356317150892139e-08, + "loss": 0.5157, + "regression_loss": 0.0, + "step": 11752, + "text_loss": 0.5234375 + }, + { + "epoch": 0.98, + "learning_rate": 1.3261127365224447e-08, + "loss": 0.442, + "regression_loss": 0.0, + "step": 11753, + "text_loss": 0.5 + }, + { + "epoch": 0.98, + "learning_rate": 1.3166277549656603e-08, + "loss": 0.5045, + "regression_loss": 0.0, + "step": 11754, + "text_loss": 0.80859375 + }, + { + "epoch": 0.98, + "learning_rate": 1.3071767710655658e-08, + "loss": 0.4827, + "regression_loss": 0.0, + "step": 11755, + "text_loss": 0.337890625 + }, + { + "epoch": 0.98, + "learning_rate": 1.2977597854665347e-08, + "loss": 0.4924, + "regression_loss": 0.0, + "step": 11756, + "text_loss": 0.375 + }, + { + "epoch": 0.98, + "learning_rate": 1.2883767988104978e-08, + "loss": 0.428, + "regression_loss": 0.0, + "step": 11757, + "text_loss": 0.5078125 + }, + { + "epoch": 0.98, + "learning_rate": 1.2790278117371102e-08, + "loss": 0.4263, + "regression_loss": 0.0, + "step": 11758, + "text_loss": 0.37109375 + }, + { + "epoch": 0.98, + "learning_rate": 1.269712824883862e-08, + "loss": 0.4333, + "regression_loss": 0.0, + "step": 11759, + "text_loss": 0.62890625 + }, + { + "epoch": 0.98, + "learning_rate": 1.260431838885634e-08, + "loss": 0.4568, + "regression_loss": 0.0, + "step": 11760, + "text_loss": 0.416015625 + }, + { + "epoch": 0.98, + "learning_rate": 1.2511848543753091e-08, + "loss": 0.4934, + "regression_loss": 0.0, + "step": 11761, + "text_loss": 0.361328125 + }, + { + "epoch": 0.98, + "learning_rate": 1.2419718719832164e-08, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 11762, + "text_loss": 0.41796875 + }, + { + "epoch": 0.98, + "learning_rate": 1.2327928923374644e-08, + "loss": 0.5369, + "regression_loss": 0.0, + "step": 11763, + "text_loss": 0.67578125 + }, + { + "epoch": 0.98, + "learning_rate": 1.2236479160638305e-08, + "loss": 0.4146, + "regression_loss": 0.0, + "step": 11764, + "text_loss": 0.466796875 + }, + { + "epoch": 0.98, + "learning_rate": 1.2145369437858157e-08, + "loss": 0.448, + "regression_loss": 0.0, + "step": 11765, + "text_loss": 0.55078125 + }, + { + "epoch": 0.98, + "learning_rate": 1.205459976124479e-08, + "loss": 0.4028, + "regression_loss": 0.0, + "step": 11766, + "text_loss": 0.375 + }, + { + "epoch": 0.98, + "learning_rate": 1.1964170136986585e-08, + "loss": 0.3997, + "regression_loss": 0.0, + "step": 11767, + "text_loss": 0.349609375 + }, + { + "epoch": 0.98, + "learning_rate": 1.1874080571249169e-08, + "loss": 0.465, + "regression_loss": 0.0, + "step": 11768, + "text_loss": 0.51171875 + }, + { + "epoch": 0.98, + "learning_rate": 1.1784331070173738e-08, + "loss": 0.4629, + "regression_loss": 0.0, + "step": 11769, + "text_loss": 0.79296875 + }, + { + "epoch": 0.98, + "learning_rate": 1.1694921639879287e-08, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 11770, + "text_loss": 0.392578125 + }, + { + "epoch": 0.98, + "learning_rate": 1.1605852286460938e-08, + "loss": 0.4995, + "regression_loss": 0.0, + "step": 11771, + "text_loss": 0.7109375 + }, + { + "epoch": 0.98, + "learning_rate": 1.1517123015991616e-08, + "loss": 0.5447, + "regression_loss": 0.0, + "step": 11772, + "text_loss": 0.5 + }, + { + "epoch": 0.98, + "learning_rate": 1.1428733834519812e-08, + "loss": 0.4385, + "regression_loss": 0.0, + "step": 11773, + "text_loss": 0.435546875 + }, + { + "epoch": 0.98, + "learning_rate": 1.1340684748071817e-08, + "loss": 0.5685, + "regression_loss": 0.0, + "step": 11774, + "text_loss": 0.60546875 + }, + { + "epoch": 0.98, + "learning_rate": 1.1252975762650053e-08, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 11775, + "text_loss": 0.578125 + }, + { + "epoch": 0.98, + "learning_rate": 1.1165606884234182e-08, + "loss": 0.4435, + "regression_loss": 0.0, + "step": 11776, + "text_loss": 0.314453125 + }, + { + "epoch": 0.98, + "learning_rate": 1.1078578118780548e-08, + "loss": 0.5278, + "regression_loss": 0.0, + "step": 11777, + "text_loss": 0.36328125 + }, + { + "epoch": 0.98, + "learning_rate": 1.0991889472222739e-08, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 11778, + "text_loss": 0.640625 + }, + { + "epoch": 0.98, + "learning_rate": 1.0905540950470472e-08, + "loss": 0.4257, + "regression_loss": 0.0, + "step": 11779, + "text_loss": 0.36328125 + }, + { + "epoch": 0.98, + "learning_rate": 1.0819532559410152e-08, + "loss": 0.4338, + "regression_loss": 0.0, + "step": 11780, + "text_loss": 0.39453125 + }, + { + "epoch": 0.98, + "learning_rate": 1.0733864304905972e-08, + "loss": 0.4266, + "regression_loss": 0.0, + "step": 11781, + "text_loss": 0.42578125 + }, + { + "epoch": 0.98, + "learning_rate": 1.064853619279771e-08, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 11782, + "text_loss": 0.4765625 + }, + { + "epoch": 0.98, + "learning_rate": 1.0563548228903487e-08, + "loss": 0.4897, + "regression_loss": 0.0, + "step": 11783, + "text_loss": 0.62890625 + }, + { + "epoch": 0.98, + "learning_rate": 1.0478900419016446e-08, + "loss": 0.3607, + "regression_loss": 0.0, + "step": 11784, + "text_loss": 0.392578125 + }, + { + "epoch": 0.98, + "learning_rate": 1.039459276890753e-08, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 11785, + "text_loss": 0.431640625 + }, + { + "epoch": 0.98, + "learning_rate": 1.031062528432547e-08, + "loss": 0.4463, + "regression_loss": 0.0, + "step": 11786, + "text_loss": 0.33984375 + }, + { + "epoch": 0.98, + "learning_rate": 1.0226997970993468e-08, + "loss": 0.4342, + "regression_loss": 0.0, + "step": 11787, + "text_loss": 0.1845703125 + }, + { + "epoch": 0.98, + "learning_rate": 1.0143710834613073e-08, + "loss": 0.5452, + "regression_loss": 0.0, + "step": 11788, + "text_loss": 0.53125 + }, + { + "epoch": 0.98, + "learning_rate": 1.0060763880862524e-08, + "loss": 0.4573, + "regression_loss": 0.0, + "step": 11789, + "text_loss": 0.2421875 + }, + { + "epoch": 0.98, + "learning_rate": 9.978157115397291e-09, + "loss": 0.5186, + "regression_loss": 0.0, + "step": 11790, + "text_loss": 0.68359375 + }, + { + "epoch": 0.98, + "learning_rate": 9.89589054384843e-09, + "loss": 0.5, + "regression_loss": 0.0, + "step": 11791, + "text_loss": 0.455078125 + }, + { + "epoch": 0.98, + "learning_rate": 9.813964171824231e-09, + "loss": 0.571, + "regression_loss": 0.0, + "step": 11792, + "text_loss": 0.7109375 + }, + { + "epoch": 0.98, + "learning_rate": 9.732378004910781e-09, + "loss": 0.5309, + "regression_loss": 0.0, + "step": 11793, + "text_loss": 0.6640625 + }, + { + "epoch": 0.98, + "learning_rate": 9.651132048669743e-09, + "loss": 0.4771, + "regression_loss": 0.0, + "step": 11794, + "text_loss": 0.5078125 + }, + { + "epoch": 0.98, + "learning_rate": 9.570226308639463e-09, + "loss": 0.532, + "regression_loss": 0.0, + "step": 11795, + "text_loss": 0.51171875 + }, + { + "epoch": 0.98, + "learning_rate": 9.489660790337196e-09, + "loss": 0.4558, + "regression_loss": 0.0, + "step": 11796, + "text_loss": 0.470703125 + }, + { + "epoch": 0.98, + "learning_rate": 9.409435499254105e-09, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 11797, + "text_loss": 0.50390625 + }, + { + "epoch": 0.98, + "learning_rate": 9.329550440859702e-09, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 11798, + "text_loss": 0.34765625 + }, + { + "epoch": 0.98, + "learning_rate": 9.250005620601299e-09, + "loss": 0.4636, + "regression_loss": 0.0, + "step": 11799, + "text_loss": 0.5390625 + }, + { + "epoch": 0.98, + "learning_rate": 9.170801043900112e-09, + "loss": 0.4705, + "regression_loss": 0.0, + "step": 11800, + "text_loss": 0.59375 + }, + { + "epoch": 0.98, + "learning_rate": 9.09193671615738e-09, + "loss": 0.5068, + "regression_loss": 0.0, + "step": 11801, + "text_loss": 0.369140625 + }, + { + "epoch": 0.98, + "learning_rate": 9.013412642748798e-09, + "loss": 0.4077, + "regression_loss": 0.0, + "step": 11802, + "text_loss": 0.39453125 + }, + { + "epoch": 0.98, + "learning_rate": 8.93522882902842e-09, + "loss": 0.4919, + "regression_loss": 0.0, + "step": 11803, + "text_loss": 0.240234375 + }, + { + "epoch": 0.98, + "learning_rate": 8.857385280325314e-09, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 11804, + "text_loss": 0.79296875 + }, + { + "epoch": 0.98, + "learning_rate": 8.77988200194746e-09, + "loss": 0.3951, + "regression_loss": 0.0, + "step": 11805, + "text_loss": 0.318359375 + }, + { + "epoch": 0.98, + "learning_rate": 8.702718999178406e-09, + "loss": 0.45, + "regression_loss": 0.0, + "step": 11806, + "text_loss": 0.427734375 + }, + { + "epoch": 0.98, + "learning_rate": 8.625896277278944e-09, + "loss": 0.4456, + "regression_loss": 0.0, + "step": 11807, + "text_loss": 0.58203125 + }, + { + "epoch": 0.98, + "learning_rate": 8.549413841485443e-09, + "loss": 0.3871, + "regression_loss": 0.0, + "step": 11808, + "text_loss": 0.353515625 + }, + { + "epoch": 0.98, + "learning_rate": 8.473271697013175e-09, + "loss": 0.437, + "regression_loss": 0.0, + "step": 11809, + "text_loss": 0.66796875 + }, + { + "epoch": 0.98, + "learning_rate": 8.397469849052986e-09, + "loss": 0.4955, + "regression_loss": 0.0, + "step": 11810, + "text_loss": 0.37109375 + }, + { + "epoch": 0.98, + "learning_rate": 8.322008302771855e-09, + "loss": 0.5269, + "regression_loss": 0.0, + "step": 11811, + "text_loss": 0.6953125 + }, + { + "epoch": 0.98, + "learning_rate": 8.246887063315112e-09, + "loss": 0.5637, + "regression_loss": 0.0, + "step": 11812, + "text_loss": 0.408203125 + }, + { + "epoch": 0.98, + "learning_rate": 8.172106135804214e-09, + "loss": 0.5144, + "regression_loss": 0.0, + "step": 11813, + "text_loss": 0.7265625 + }, + { + "epoch": 0.98, + "learning_rate": 8.09766552533675e-09, + "loss": 0.4701, + "regression_loss": 0.0, + "step": 11814, + "text_loss": 0.69921875 + }, + { + "epoch": 0.98, + "learning_rate": 8.023565236988107e-09, + "loss": 0.4414, + "regression_loss": 0.0, + "step": 11815, + "text_loss": 0.28515625 + }, + { + "epoch": 0.98, + "learning_rate": 7.949805275809241e-09, + "loss": 0.5042, + "regression_loss": 0.0, + "step": 11816, + "text_loss": 0.625 + }, + { + "epoch": 0.98, + "learning_rate": 7.876385646830575e-09, + "loss": 0.532, + "regression_loss": 0.0, + "step": 11817, + "text_loss": 0.74609375 + }, + { + "epoch": 0.98, + "learning_rate": 7.803306355055329e-09, + "loss": 0.4727, + "regression_loss": 0.0, + "step": 11818, + "text_loss": 0.3984375 + }, + { + "epoch": 0.98, + "learning_rate": 7.730567405467293e-09, + "loss": 0.5502, + "regression_loss": 0.0, + "step": 11819, + "text_loss": 0.52734375 + }, + { + "epoch": 0.98, + "learning_rate": 7.658168803024724e-09, + "loss": 0.4373, + "regression_loss": 0.0, + "step": 11820, + "text_loss": 0.50390625 + }, + { + "epoch": 0.98, + "learning_rate": 7.586110552663673e-09, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 11821, + "text_loss": 0.52734375 + }, + { + "epoch": 0.98, + "learning_rate": 7.514392659295765e-09, + "loss": 0.4841, + "regression_loss": 0.0, + "step": 11822, + "text_loss": 0.40234375 + }, + { + "epoch": 0.98, + "learning_rate": 7.443015127811537e-09, + "loss": 0.5195, + "regression_loss": 0.0, + "step": 11823, + "text_loss": 0.34765625 + }, + { + "epoch": 0.98, + "learning_rate": 7.371977963077093e-09, + "loss": 0.4446, + "regression_loss": 0.0, + "step": 11824, + "text_loss": 0.478515625 + }, + { + "epoch": 0.98, + "learning_rate": 7.301281169934115e-09, + "loss": 0.4243, + "regression_loss": 0.0, + "step": 11825, + "text_loss": 0.74609375 + }, + { + "epoch": 0.98, + "learning_rate": 7.2309247532037495e-09, + "loss": 0.6035, + "regression_loss": 0.0, + "step": 11826, + "text_loss": 0.6015625 + }, + { + "epoch": 0.98, + "learning_rate": 7.160908717682158e-09, + "loss": 0.5762, + "regression_loss": 0.0, + "step": 11827, + "text_loss": 0.63671875 + }, + { + "epoch": 0.98, + "learning_rate": 7.0912330681421895e-09, + "loss": 0.4917, + "regression_loss": 0.0, + "step": 11828, + "text_loss": 0.490234375 + }, + { + "epoch": 0.98, + "learning_rate": 7.021897809334488e-09, + "loss": 0.4647, + "regression_loss": 0.0, + "step": 11829, + "text_loss": 0.2333984375 + }, + { + "epoch": 0.98, + "learning_rate": 6.952902945985829e-09, + "loss": 0.5219, + "regression_loss": 0.0, + "step": 11830, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.98, + "learning_rate": 6.884248482799671e-09, + "loss": 0.4648, + "regression_loss": 0.0, + "step": 11831, + "text_loss": 0.46875 + }, + { + "epoch": 0.98, + "learning_rate": 6.815934424456716e-09, + "loss": 0.385, + "regression_loss": 0.0, + "step": 11832, + "text_loss": 0.341796875 + }, + { + "epoch": 0.98, + "learning_rate": 6.7479607756143485e-09, + "loss": 0.4424, + "regression_loss": 0.0, + "step": 11833, + "text_loss": 0.64453125 + }, + { + "epoch": 0.98, + "learning_rate": 6.680327540906639e-09, + "loss": 0.5139, + "regression_loss": 0.0, + "step": 11834, + "text_loss": 0.49609375 + }, + { + "epoch": 0.98, + "learning_rate": 6.613034724944345e-09, + "loss": 0.4187, + "regression_loss": 0.0, + "step": 11835, + "text_loss": 0.361328125 + }, + { + "epoch": 0.98, + "learning_rate": 6.546082332314907e-09, + "loss": 0.418, + "regression_loss": 0.0, + "step": 11836, + "text_loss": 0.59375 + }, + { + "epoch": 0.98, + "learning_rate": 6.479470367583563e-09, + "loss": 0.5188, + "regression_loss": 0.0, + "step": 11837, + "text_loss": 0.5 + }, + { + "epoch": 0.98, + "learning_rate": 6.413198835290568e-09, + "loss": 0.4878, + "regression_loss": 0.0, + "step": 11838, + "text_loss": 0.53515625 + }, + { + "epoch": 0.98, + "learning_rate": 6.347267739955087e-09, + "loss": 0.5308, + "regression_loss": 0.0, + "step": 11839, + "text_loss": 0.57421875 + }, + { + "epoch": 0.98, + "learning_rate": 6.281677086071303e-09, + "loss": 0.47, + "regression_loss": 0.0, + "step": 11840, + "text_loss": 0.498046875 + }, + { + "epoch": 0.98, + "learning_rate": 6.2164268781106376e-09, + "loss": 0.5867, + "regression_loss": 0.0, + "step": 11841, + "text_loss": 0.6171875 + }, + { + "epoch": 0.98, + "learning_rate": 6.15151712052231e-09, + "loss": 0.5142, + "regression_loss": 0.0, + "step": 11842, + "text_loss": 0.55078125 + }, + { + "epoch": 0.98, + "learning_rate": 6.0869478177305594e-09, + "loss": 0.3943, + "regression_loss": 0.0, + "step": 11843, + "text_loss": 0.50390625 + }, + { + "epoch": 0.98, + "learning_rate": 6.022718974137976e-09, + "loss": 0.522, + "regression_loss": 0.0, + "step": 11844, + "text_loss": 0.48046875 + }, + { + "epoch": 0.98, + "learning_rate": 5.9588305941232775e-09, + "loss": 0.4304, + "regression_loss": 0.0, + "step": 11845, + "text_loss": 0.474609375 + }, + { + "epoch": 0.98, + "learning_rate": 5.89528268204187e-09, + "loss": 0.49, + "regression_loss": 0.0, + "step": 11846, + "text_loss": 0.376953125 + }, + { + "epoch": 0.98, + "learning_rate": 5.832075242226398e-09, + "loss": 0.4238, + "regression_loss": 0.0, + "step": 11847, + "text_loss": 0.412109375 + }, + { + "epoch": 0.98, + "learning_rate": 5.769208278986193e-09, + "loss": 0.4041, + "regression_loss": 0.0, + "step": 11848, + "text_loss": 0.484375 + }, + { + "epoch": 0.98, + "learning_rate": 5.706681796606161e-09, + "loss": 0.4307, + "regression_loss": 0.0, + "step": 11849, + "text_loss": 0.53125 + }, + { + "epoch": 0.98, + "learning_rate": 5.644495799350114e-09, + "loss": 0.4583, + "regression_loss": 0.0, + "step": 11850, + "text_loss": 0.3125 + }, + { + "epoch": 0.98, + "learning_rate": 5.582650291457437e-09, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 11851, + "text_loss": 0.70703125 + }, + { + "epoch": 0.99, + "learning_rate": 5.521145277144202e-09, + "loss": 0.5242, + "regression_loss": 0.0, + "step": 11852, + "text_loss": 0.52734375 + }, + { + "epoch": 0.99, + "learning_rate": 5.459980760603168e-09, + "loss": 0.417, + "regression_loss": 0.0, + "step": 11853, + "text_loss": 0.484375 + }, + { + "epoch": 0.99, + "learning_rate": 5.399156746004885e-09, + "loss": 0.4851, + "regression_loss": 0.0, + "step": 11854, + "text_loss": 0.55078125 + }, + { + "epoch": 0.99, + "learning_rate": 5.338673237495484e-09, + "loss": 0.4915, + "regression_loss": 0.0, + "step": 11855, + "text_loss": 0.6171875 + }, + { + "epoch": 0.99, + "learning_rate": 5.278530239198332e-09, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11856, + "text_loss": 0.53515625 + }, + { + "epoch": 0.99, + "learning_rate": 5.2187277552145924e-09, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 11857, + "text_loss": 0.34765625 + }, + { + "epoch": 0.99, + "learning_rate": 5.159265789620449e-09, + "loss": 0.5508, + "regression_loss": 0.0, + "step": 11858, + "text_loss": 0.66796875 + }, + { + "epoch": 0.99, + "learning_rate": 5.100144346469882e-09, + "loss": 0.5083, + "regression_loss": 0.0, + "step": 11859, + "text_loss": 0.5234375 + }, + { + "epoch": 0.99, + "learning_rate": 5.041363429793555e-09, + "loss": 0.4114, + "regression_loss": 0.0, + "step": 11860, + "text_loss": 0.4453125 + }, + { + "epoch": 0.99, + "learning_rate": 4.98292304359882e-09, + "loss": 0.4543, + "regression_loss": 0.0, + "step": 11861, + "text_loss": 0.248046875 + }, + { + "epoch": 0.99, + "learning_rate": 4.92482319186971e-09, + "loss": 0.4592, + "regression_loss": 0.0, + "step": 11862, + "text_loss": 0.671875 + }, + { + "epoch": 0.99, + "learning_rate": 4.867063878567502e-09, + "loss": 0.5754, + "regression_loss": 0.0, + "step": 11863, + "text_loss": 0.8046875 + }, + { + "epoch": 0.99, + "learning_rate": 4.809645107629601e-09, + "loss": 0.4365, + "regression_loss": 0.0, + "step": 11864, + "text_loss": 0.58984375 + }, + { + "epoch": 0.99, + "learning_rate": 4.752566882970655e-09, + "loss": 0.4259, + "regression_loss": 0.0, + "step": 11865, + "text_loss": 0.396484375 + }, + { + "epoch": 0.99, + "learning_rate": 4.695829208481995e-09, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 11866, + "text_loss": 0.7109375 + }, + { + "epoch": 0.99, + "learning_rate": 4.639432088031637e-09, + "loss": 0.4712, + "regression_loss": 0.0, + "step": 11867, + "text_loss": 0.298828125 + }, + { + "epoch": 0.99, + "learning_rate": 4.583375525464284e-09, + "loss": 0.5081, + "regression_loss": 0.0, + "step": 11868, + "text_loss": 0.408203125 + }, + { + "epoch": 0.99, + "learning_rate": 4.527659524601879e-09, + "loss": 0.5183, + "regression_loss": 0.0, + "step": 11869, + "text_loss": 0.55859375 + }, + { + "epoch": 0.99, + "learning_rate": 4.472284089243051e-09, + "loss": 0.4198, + "regression_loss": 0.0, + "step": 11870, + "text_loss": 0.486328125 + }, + { + "epoch": 0.99, + "learning_rate": 4.417249223162556e-09, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 11871, + "text_loss": 0.4296875 + }, + { + "epoch": 0.99, + "learning_rate": 4.362554930112395e-09, + "loss": 0.553, + "regression_loss": 0.0, + "step": 11872, + "text_loss": 0.9296875 + }, + { + "epoch": 0.99, + "learning_rate": 4.308201213821805e-09, + "loss": 0.3673, + "regression_loss": 0.0, + "step": 11873, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.99, + "learning_rate": 4.254188077995603e-09, + "loss": 0.5315, + "regression_loss": 0.0, + "step": 11874, + "text_loss": 0.59765625 + }, + { + "epoch": 0.99, + "learning_rate": 4.200515526316396e-09, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 11875, + "text_loss": 0.361328125 + }, + { + "epoch": 0.99, + "learning_rate": 4.1471835624440345e-09, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 11876, + "text_loss": 0.498046875 + }, + { + "epoch": 0.99, + "learning_rate": 4.094192190013946e-09, + "loss": 0.3865, + "regression_loss": 0.0, + "step": 11877, + "text_loss": 0.322265625 + }, + { + "epoch": 0.99, + "learning_rate": 4.041541412638239e-09, + "loss": 0.4329, + "regression_loss": 0.0, + "step": 11878, + "text_loss": 0.310546875 + }, + { + "epoch": 0.99, + "learning_rate": 3.989231233906821e-09, + "loss": 0.5051, + "regression_loss": 0.0, + "step": 11879, + "text_loss": 0.51953125 + }, + { + "epoch": 0.99, + "learning_rate": 3.937261657386282e-09, + "loss": 0.4434, + "regression_loss": 0.0, + "step": 11880, + "text_loss": 0.5546875 + }, + { + "epoch": 0.99, + "learning_rate": 3.88563268661879e-09, + "loss": 0.5194, + "regression_loss": 0.0, + "step": 11881, + "text_loss": 0.2060546875 + }, + { + "epoch": 0.99, + "learning_rate": 3.834344325124861e-09, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 11882, + "text_loss": 0.50390625 + }, + { + "epoch": 0.99, + "learning_rate": 3.783396576401144e-09, + "loss": 0.4884, + "regression_loss": 0.0, + "step": 11883, + "text_loss": 0.68359375 + }, + { + "epoch": 0.99, + "learning_rate": 3.7327894439204145e-09, + "loss": 0.5098, + "regression_loss": 0.0, + "step": 11884, + "text_loss": 0.43359375 + }, + { + "epoch": 0.99, + "learning_rate": 3.6825229311332476e-09, + "loss": 0.5681, + "regression_loss": 0.0, + "step": 11885, + "text_loss": 0.9296875 + }, + { + "epoch": 0.99, + "learning_rate": 3.6325970414663457e-09, + "loss": 0.5173, + "regression_loss": 0.0, + "step": 11886, + "text_loss": 0.3359375 + }, + { + "epoch": 0.99, + "learning_rate": 3.5830117783230978e-09, + "loss": 0.5847, + "regression_loss": 0.0, + "step": 11887, + "text_loss": 0.69140625 + }, + { + "epoch": 0.99, + "learning_rate": 3.533767145084688e-09, + "loss": 0.4163, + "regression_loss": 0.0, + "step": 11888, + "text_loss": 0.5078125 + }, + { + "epoch": 0.99, + "learning_rate": 3.4848631451078753e-09, + "loss": 0.491, + "regression_loss": 0.0, + "step": 11889, + "text_loss": 0.6640625 + }, + { + "epoch": 0.99, + "learning_rate": 3.436299781727215e-09, + "loss": 0.5669, + "regression_loss": 0.0, + "step": 11890, + "text_loss": 0.4453125 + }, + { + "epoch": 0.99, + "learning_rate": 3.3880770582522814e-09, + "loss": 0.4619, + "regression_loss": 0.0, + "step": 11891, + "text_loss": 0.48828125 + }, + { + "epoch": 0.99, + "learning_rate": 3.340194977972111e-09, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 11892, + "text_loss": 0.9609375 + }, + { + "epoch": 0.99, + "learning_rate": 3.292653544150204e-09, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 11893, + "text_loss": 0.59375 + }, + { + "epoch": 0.99, + "learning_rate": 3.2454527600278562e-09, + "loss": 0.4031, + "regression_loss": 0.0, + "step": 11894, + "text_loss": 0.2216796875 + }, + { + "epoch": 0.99, + "learning_rate": 3.1985926288236046e-09, + "loss": 0.4435, + "regression_loss": 0.0, + "step": 11895, + "text_loss": 0.431640625 + }, + { + "epoch": 0.99, + "learning_rate": 3.1520731537310056e-09, + "loss": 0.5222, + "regression_loss": 0.0, + "step": 11896, + "text_loss": 0.421875 + }, + { + "epoch": 0.99, + "learning_rate": 3.105894337921966e-09, + "loss": 0.49, + "regression_loss": 0.0, + "step": 11897, + "text_loss": 0.59765625 + }, + { + "epoch": 0.99, + "learning_rate": 3.0600561845450793e-09, + "loss": 0.4819, + "regression_loss": 0.0, + "step": 11898, + "text_loss": 0.328125 + }, + { + "epoch": 0.99, + "learning_rate": 3.014558696724512e-09, + "loss": 0.4692, + "regression_loss": 0.0, + "step": 11899, + "text_loss": 0.375 + }, + { + "epoch": 0.99, + "learning_rate": 2.9694018775633383e-09, + "loss": 0.49, + "regression_loss": 0.0, + "step": 11900, + "text_loss": 0.671875 + }, + { + "epoch": 0.99, + "learning_rate": 2.9245857301390957e-09, + "loss": 0.3885, + "regression_loss": 0.0, + "step": 11901, + "text_loss": 0.369140625 + }, + { + "epoch": 0.99, + "learning_rate": 2.880110257507673e-09, + "loss": 0.46, + "regression_loss": 0.0, + "step": 11902, + "text_loss": 0.578125 + }, + { + "epoch": 0.99, + "learning_rate": 2.8359754627005354e-09, + "loss": 0.4058, + "regression_loss": 0.0, + "step": 11903, + "text_loss": 0.443359375 + }, + { + "epoch": 0.99, + "learning_rate": 2.792181348726941e-09, + "loss": 0.4802, + "regression_loss": 0.0, + "step": 11904, + "text_loss": 0.578125 + }, + { + "epoch": 0.99, + "learning_rate": 2.7487279185728356e-09, + "loss": 0.4071, + "regression_loss": 0.0, + "step": 11905, + "text_loss": 0.341796875 + }, + { + "epoch": 0.99, + "learning_rate": 2.7056151752002936e-09, + "loss": 0.5486, + "regression_loss": 0.0, + "step": 11906, + "text_loss": 0.419921875 + }, + { + "epoch": 0.99, + "learning_rate": 2.6628431215486307e-09, + "loss": 0.4301, + "regression_loss": 0.0, + "step": 11907, + "text_loss": 0.4296875 + }, + { + "epoch": 0.99, + "learning_rate": 2.6204117605332925e-09, + "loss": 0.3984, + "regression_loss": 0.0, + "step": 11908, + "text_loss": 0.267578125 + }, + { + "epoch": 0.99, + "learning_rate": 2.578321095048075e-09, + "loss": 0.5292, + "regression_loss": 0.0, + "step": 11909, + "text_loss": 0.51953125 + }, + { + "epoch": 0.99, + "learning_rate": 2.5365711279617955e-09, + "loss": 0.5024, + "regression_loss": 0.0, + "step": 11910, + "text_loss": 0.55859375 + }, + { + "epoch": 0.99, + "learning_rate": 2.495161862121065e-09, + "loss": 0.5176, + "regression_loss": 0.0, + "step": 11911, + "text_loss": 0.47265625 + }, + { + "epoch": 0.99, + "learning_rate": 2.4540933003486256e-09, + "loss": 0.4517, + "regression_loss": 0.0, + "step": 11912, + "text_loss": 0.2265625 + }, + { + "epoch": 0.99, + "learning_rate": 2.41336544544446e-09, + "loss": 0.5134, + "regression_loss": 0.0, + "step": 11913, + "text_loss": 0.57421875 + }, + { + "epoch": 0.99, + "learning_rate": 2.372978300185791e-09, + "loss": 0.4056, + "regression_loss": 0.0, + "step": 11914, + "text_loss": 0.306640625 + }, + { + "epoch": 0.99, + "learning_rate": 2.3329318673248615e-09, + "loss": 0.533, + "regression_loss": 0.0, + "step": 11915, + "text_loss": 0.3203125 + }, + { + "epoch": 0.99, + "learning_rate": 2.293226149592265e-09, + "loss": 0.4883, + "regression_loss": 0.0, + "step": 11916, + "text_loss": 0.33203125 + }, + { + "epoch": 0.99, + "learning_rate": 2.25386114969528e-09, + "loss": 0.4407, + "regression_loss": 0.0, + "step": 11917, + "text_loss": 0.5078125 + }, + { + "epoch": 0.99, + "learning_rate": 2.214836870317871e-09, + "loss": 0.5291, + "regression_loss": 0.0, + "step": 11918, + "text_loss": 0.494140625 + }, + { + "epoch": 0.99, + "learning_rate": 2.1761533141195777e-09, + "loss": 0.5593, + "regression_loss": 0.0, + "step": 11919, + "text_loss": 0.66015625 + }, + { + "epoch": 0.99, + "learning_rate": 2.1378104837377345e-09, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 11920, + "text_loss": 0.5078125 + }, + { + "epoch": 0.99, + "learning_rate": 2.0998083817874714e-09, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 11921, + "text_loss": 0.38671875 + }, + { + "epoch": 0.99, + "learning_rate": 2.062147010858384e-09, + "loss": 0.5422, + "regression_loss": 0.0, + "step": 11922, + "text_loss": 0.52734375 + }, + { + "epoch": 0.99, + "learning_rate": 2.024826373518418e-09, + "loss": 0.4795, + "regression_loss": 0.0, + "step": 11923, + "text_loss": 0.263671875 + }, + { + "epoch": 0.99, + "learning_rate": 1.987846472312205e-09, + "loss": 0.4836, + "regression_loss": 0.0, + "step": 11924, + "text_loss": 0.66796875 + }, + { + "epoch": 0.99, + "learning_rate": 1.9512073097605057e-09, + "loss": 0.5942, + "regression_loss": 0.0, + "step": 11925, + "text_loss": 0.640625 + }, + { + "epoch": 0.99, + "learning_rate": 1.914908888361322e-09, + "loss": 0.6056, + "regression_loss": 0.0, + "step": 11926, + "text_loss": 0.71875 + }, + { + "epoch": 0.99, + "learning_rate": 1.8789512105893416e-09, + "loss": 0.3523, + "regression_loss": 0.0, + "step": 11927, + "text_loss": 0.36328125 + }, + { + "epoch": 0.99, + "learning_rate": 1.8433342788953813e-09, + "loss": 0.498, + "regression_loss": 0.0, + "step": 11928, + "text_loss": 0.322265625 + }, + { + "epoch": 0.99, + "learning_rate": 1.8080580957086092e-09, + "loss": 0.5701, + "regression_loss": 0.0, + "step": 11929, + "text_loss": 0.5546875 + }, + { + "epoch": 0.99, + "learning_rate": 1.7731226634337684e-09, + "loss": 0.5613, + "regression_loss": 0.0, + "step": 11930, + "text_loss": 0.494140625 + }, + { + "epoch": 0.99, + "learning_rate": 1.7385279844517323e-09, + "loss": 0.4009, + "regression_loss": 0.0, + "step": 11931, + "text_loss": 0.2314453125 + }, + { + "epoch": 0.99, + "learning_rate": 1.7042740611217246e-09, + "loss": 0.4526, + "regression_loss": 0.0, + "step": 11932, + "text_loss": 0.361328125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6703608957790995e-09, + "loss": 0.6068, + "regression_loss": 0.0, + "step": 11933, + "text_loss": 0.671875 + }, + { + "epoch": 0.99, + "learning_rate": 1.6367884907353414e-09, + "loss": 0.4929, + "regression_loss": 0.0, + "step": 11934, + "text_loss": 0.353515625 + }, + { + "epoch": 0.99, + "learning_rate": 1.6035568482791752e-09, + "loss": 0.429, + "regression_loss": 0.0, + "step": 11935, + "text_loss": 0.52734375 + }, + { + "epoch": 0.99, + "learning_rate": 1.5706659706771211e-09, + "loss": 0.5398, + "regression_loss": 0.0, + "step": 11936, + "text_loss": 0.43359375 + }, + { + "epoch": 0.99, + "learning_rate": 1.5381158601707191e-09, + "loss": 0.4382, + "regression_loss": 0.0, + "step": 11937, + "text_loss": 0.23046875 + }, + { + "epoch": 0.99, + "learning_rate": 1.5059065189787502e-09, + "loss": 0.4415, + "regression_loss": 0.0, + "step": 11938, + "text_loss": 0.384765625 + }, + { + "epoch": 0.99, + "learning_rate": 1.4740379492983459e-09, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 11939, + "text_loss": 0.466796875 + }, + { + "epoch": 0.99, + "learning_rate": 1.442510153300547e-09, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 11940, + "text_loss": 0.48828125 + }, + { + "epoch": 0.99, + "learning_rate": 1.4113231331364107e-09, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 11941, + "text_loss": 0.455078125 + }, + { + "epoch": 0.99, + "learning_rate": 1.380476890930904e-09, + "loss": 0.5286, + "regression_loss": 0.0, + "step": 11942, + "text_loss": 0.52734375 + }, + { + "epoch": 0.99, + "learning_rate": 1.3499714287867893e-09, + "loss": 0.5552, + "regression_loss": 0.0, + "step": 11943, + "text_loss": 0.65625 + }, + { + "epoch": 0.99, + "learning_rate": 1.3198067487851796e-09, + "loss": 0.457, + "regression_loss": 0.0, + "step": 11944, + "text_loss": 0.5234375 + }, + { + "epoch": 0.99, + "learning_rate": 1.2899828529810975e-09, + "loss": 0.5305, + "regression_loss": 0.0, + "step": 11945, + "text_loss": 0.6953125 + }, + { + "epoch": 0.99, + "learning_rate": 1.260499743408472e-09, + "loss": 0.4529, + "regression_loss": 0.0, + "step": 11946, + "text_loss": 0.46875 + }, + { + "epoch": 0.99, + "learning_rate": 1.2313574220773616e-09, + "loss": 0.4177, + "regression_loss": 0.0, + "step": 11947, + "text_loss": 0.578125 + }, + { + "epoch": 0.99, + "learning_rate": 1.2025558909745105e-09, + "loss": 0.4604, + "regression_loss": 0.0, + "step": 11948, + "text_loss": 0.462890625 + }, + { + "epoch": 0.99, + "learning_rate": 1.174095152062793e-09, + "loss": 0.5284, + "regression_loss": 0.0, + "step": 11949, + "text_loss": 0.52734375 + }, + { + "epoch": 0.99, + "learning_rate": 1.145975207283434e-09, + "loss": 0.4387, + "regression_loss": 0.0, + "step": 11950, + "text_loss": 0.640625 + }, + { + "epoch": 0.99, + "learning_rate": 1.1181960585532336e-09, + "loss": 0.4481, + "regression_loss": 0.0, + "step": 11951, + "text_loss": 0.328125 + }, + { + "epoch": 0.99, + "learning_rate": 1.0907577077656772e-09, + "loss": 0.5402, + "regression_loss": 0.0, + "step": 11952, + "text_loss": 0.7734375 + }, + { + "epoch": 0.99, + "learning_rate": 1.0636601567920456e-09, + "loss": 0.4956, + "regression_loss": 0.0, + "step": 11953, + "text_loss": 0.40625 + }, + { + "epoch": 0.99, + "learning_rate": 1.0369034074786399e-09, + "loss": 0.46, + "regression_loss": 0.0, + "step": 11954, + "text_loss": 0.171875 + }, + { + "epoch": 0.99, + "learning_rate": 1.0104874616506666e-09, + "loss": 0.4967, + "regression_loss": 0.0, + "step": 11955, + "text_loss": 0.3359375 + }, + { + "epoch": 0.99, + "learning_rate": 9.844123211083522e-10, + "loss": 0.5132, + "regression_loss": 0.0, + "step": 11956, + "text_loss": 0.8203125 + }, + { + "epoch": 0.99, + "learning_rate": 9.586779876297191e-10, + "loss": 0.5393, + "regression_loss": 0.0, + "step": 11957, + "text_loss": 0.443359375 + }, + { + "epoch": 0.99, + "learning_rate": 9.332844629689197e-10, + "loss": 0.5603, + "regression_loss": 0.0, + "step": 11958, + "text_loss": 0.443359375 + }, + { + "epoch": 0.99, + "learning_rate": 9.082317488573467e-10, + "loss": 0.4895, + "regression_loss": 0.0, + "step": 11959, + "text_loss": 0.6640625 + }, + { + "epoch": 0.99, + "learning_rate": 8.835198470030781e-10, + "loss": 0.5542, + "regression_loss": 0.0, + "step": 11960, + "text_loss": 0.357421875 + }, + { + "epoch": 0.99, + "learning_rate": 8.591487590908776e-10, + "loss": 0.5215, + "regression_loss": 0.0, + "step": 11961, + "text_loss": 0.73046875 + }, + { + "epoch": 0.99, + "learning_rate": 8.351184867816387e-10, + "loss": 0.5227, + "regression_loss": 0.0, + "step": 11962, + "text_loss": 0.38671875 + }, + { + "epoch": 0.99, + "learning_rate": 8.114290317146056e-10, + "loss": 0.4707, + "regression_loss": 0.0, + "step": 11963, + "text_loss": 0.5546875 + }, + { + "epoch": 0.99, + "learning_rate": 7.880803955034877e-10, + "loss": 0.4849, + "regression_loss": 0.0, + "step": 11964, + "text_loss": 0.37890625 + }, + { + "epoch": 0.99, + "learning_rate": 7.650725797414549e-10, + "loss": 0.5156, + "regression_loss": 0.0, + "step": 11965, + "text_loss": 0.498046875 + }, + { + "epoch": 0.99, + "learning_rate": 7.424055859961421e-10, + "loss": 0.4788, + "regression_loss": 0.0, + "step": 11966, + "text_loss": 0.5625 + }, + { + "epoch": 0.99, + "learning_rate": 7.200794158135349e-10, + "loss": 0.4438, + "regression_loss": 0.0, + "step": 11967, + "text_loss": 0.322265625 + }, + { + "epoch": 0.99, + "learning_rate": 6.980940707146388e-10, + "loss": 0.4268, + "regression_loss": 0.0, + "step": 11968, + "text_loss": 0.267578125 + }, + { + "epoch": 0.99, + "learning_rate": 6.764495521999204e-10, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 11969, + "text_loss": 0.8359375 + }, + { + "epoch": 0.99, + "learning_rate": 6.551458617432004e-10, + "loss": 0.4768, + "regression_loss": 0.0, + "step": 11970, + "text_loss": 0.234375 + }, + { + "epoch": 0.99, + "learning_rate": 6.341830007983162e-10, + "loss": 0.4966, + "regression_loss": 0.0, + "step": 11971, + "text_loss": 0.5546875 + }, + { + "epoch": 1.0, + "learning_rate": 6.135609707935697e-10, + "loss": 0.5635, + "regression_loss": 0.0, + "step": 11972, + "text_loss": 0.341796875 + }, + { + "epoch": 1.0, + "learning_rate": 5.932797731350581e-10, + "loss": 0.5413, + "regression_loss": 0.0, + "step": 11973, + "text_loss": 0.58984375 + }, + { + "epoch": 1.0, + "learning_rate": 5.733394092061196e-10, + "loss": 0.4124, + "regression_loss": 0.0, + "step": 11974, + "text_loss": 0.291015625 + }, + { + "epoch": 1.0, + "learning_rate": 5.537398803651117e-10, + "loss": 0.4775, + "regression_loss": 0.0, + "step": 11975, + "text_loss": 0.26171875 + }, + { + "epoch": 1.0, + "learning_rate": 5.344811879487433e-10, + "loss": 0.5544, + "regression_loss": 0.0, + "step": 11976, + "text_loss": 0.76953125 + }, + { + "epoch": 1.0, + "learning_rate": 5.155633332698529e-10, + "loss": 0.5972, + "regression_loss": 0.0, + "step": 11977, + "text_loss": 0.59765625 + }, + { + "epoch": 1.0, + "learning_rate": 4.969863176179646e-10, + "loss": 0.595, + "regression_loss": 0.0, + "step": 11978, + "text_loss": 0.4296875 + }, + { + "epoch": 1.0, + "learning_rate": 4.787501422603979e-10, + "loss": 0.4719, + "regression_loss": 0.0, + "step": 11979, + "text_loss": 0.640625 + }, + { + "epoch": 1.0, + "learning_rate": 4.608548084394926e-10, + "loss": 0.4474, + "regression_loss": 0.0, + "step": 11980, + "text_loss": 0.5234375 + }, + { + "epoch": 1.0, + "learning_rate": 4.433003173753836e-10, + "loss": 0.5537, + "regression_loss": 0.0, + "step": 11981, + "text_loss": 0.47265625 + }, + { + "epoch": 1.0, + "learning_rate": 4.260866702654465e-10, + "loss": 0.6008, + "regression_loss": 0.0, + "step": 11982, + "text_loss": 0.66796875 + }, + { + "epoch": 1.0, + "learning_rate": 4.0921386828263186e-10, + "loss": 0.4839, + "regression_loss": 0.0, + "step": 11983, + "text_loss": 0.39453125 + }, + { + "epoch": 1.0, + "learning_rate": 3.9268191257768593e-10, + "loss": 0.3745, + "regression_loss": 0.0, + "step": 11984, + "text_loss": 0.21484375 + }, + { + "epoch": 1.0, + "learning_rate": 3.7649080427748506e-10, + "loss": 0.4988, + "regression_loss": 0.0, + "step": 11985, + "text_loss": 0.5546875 + }, + { + "epoch": 1.0, + "learning_rate": 3.6064054448559096e-10, + "loss": 0.4646, + "regression_loss": 0.0, + "step": 11986, + "text_loss": 0.64453125 + }, + { + "epoch": 1.0, + "learning_rate": 3.4513113428280563e-10, + "loss": 0.5674, + "regression_loss": 0.0, + "step": 11987, + "text_loss": 0.52734375 + }, + { + "epoch": 1.0, + "learning_rate": 3.299625747260615e-10, + "loss": 0.4297, + "regression_loss": 0.0, + "step": 11988, + "text_loss": 0.63671875 + }, + { + "epoch": 1.0, + "learning_rate": 3.1513486685064155e-10, + "loss": 0.4429, + "regression_loss": 0.0, + "step": 11989, + "text_loss": 0.56640625 + }, + { + "epoch": 1.0, + "learning_rate": 3.0064801166629353e-10, + "loss": 0.4265, + "regression_loss": 0.0, + "step": 11990, + "text_loss": 0.4921875 + }, + { + "epoch": 1.0, + "learning_rate": 2.86502010161116e-10, + "loss": 0.5137, + "regression_loss": 0.0, + "step": 11991, + "text_loss": 0.453125 + }, + { + "epoch": 1.0, + "learning_rate": 2.7269686329933764e-10, + "loss": 0.4326, + "regression_loss": 0.0, + "step": 11992, + "text_loss": 0.50390625 + }, + { + "epoch": 1.0, + "learning_rate": 2.5923257202242756e-10, + "loss": 0.4023, + "regression_loss": 0.0, + "step": 11993, + "text_loss": 0.5625 + }, + { + "epoch": 1.0, + "learning_rate": 2.4610913724798513e-10, + "loss": 0.512, + "regression_loss": 0.0, + "step": 11994, + "text_loss": 0.5546875 + }, + { + "epoch": 1.0, + "learning_rate": 2.3332655987085006e-10, + "loss": 0.3824, + "regression_loss": 0.0, + "step": 11995, + "text_loss": 0.2353515625 + }, + { + "epoch": 1.0, + "learning_rate": 2.2088484076199234e-10, + "loss": 0.4282, + "regression_loss": 0.0, + "step": 11996, + "text_loss": 0.365234375 + }, + { + "epoch": 1.0, + "learning_rate": 2.087839807707326e-10, + "loss": 0.5483, + "regression_loss": 0.0, + "step": 11997, + "text_loss": 0.78125 + }, + { + "epoch": 1.0, + "learning_rate": 1.9702398072085627e-10, + "loss": 0.4668, + "regression_loss": 0.0, + "step": 11998, + "text_loss": 0.36328125 + }, + { + "epoch": 1.0, + "learning_rate": 1.8560484141505463e-10, + "loss": 0.4584, + "regression_loss": 0.0, + "step": 11999, + "text_loss": 0.6484375 + }, + { + "epoch": 1.0, + "learning_rate": 1.7452656363103893e-10, + "loss": 0.4753, + "regression_loss": 0.0, + "step": 12000, + "text_loss": 0.2890625 + }, + { + "epoch": 1.0, + "learning_rate": 1.637891481243159e-10, + "loss": 0.4736, + "regression_loss": 0.0, + "step": 12001, + "text_loss": 0.49609375 + }, + { + "epoch": 1.0, + "learning_rate": 1.5339259562707764e-10, + "loss": 0.5845, + "regression_loss": 0.0, + "step": 12002, + "text_loss": 0.6328125 + }, + { + "epoch": 1.0, + "learning_rate": 1.4333690684820155e-10, + "loss": 0.5559, + "regression_loss": 0.0, + "step": 12003, + "text_loss": 0.59375 + }, + { + "epoch": 1.0, + "learning_rate": 1.3362208247325037e-10, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 12004, + "text_loss": 0.66015625 + }, + { + "epoch": 1.0, + "learning_rate": 1.24248123163917e-10, + "loss": 0.4585, + "regression_loss": 0.0, + "step": 12005, + "text_loss": 0.484375 + }, + { + "epoch": 1.0, + "learning_rate": 1.1521502956024499e-10, + "loss": 0.4545, + "regression_loss": 0.0, + "step": 12006, + "text_loss": 0.451171875 + }, + { + "epoch": 1.0, + "learning_rate": 1.0652280227729794e-10, + "loss": 0.4854, + "regression_loss": 0.0, + "step": 12007, + "text_loss": 0.5078125 + }, + { + "epoch": 1.0, + "learning_rate": 9.817144190737981e-11, + "loss": 0.4846, + "regression_loss": 0.0, + "step": 12008, + "text_loss": 0.35546875 + }, + { + "epoch": 1.0, + "learning_rate": 9.016094902114524e-11, + "loss": 0.4164, + "regression_loss": 0.0, + "step": 12009, + "text_loss": 0.42578125 + }, + { + "epoch": 1.0, + "learning_rate": 8.249132416371374e-11, + "loss": 0.5481, + "regression_loss": 0.0, + "step": 12010, + "text_loss": 0.482421875 + }, + { + "epoch": 1.0, + "learning_rate": 7.516256785800036e-11, + "loss": 0.4663, + "regression_loss": 0.0, + "step": 12011, + "text_loss": 0.3125 + }, + { + "epoch": 1.0, + "learning_rate": 6.817468060360544e-11, + "loss": 0.5471, + "regression_loss": 0.0, + "step": 12012, + "text_loss": 0.453125 + }, + { + "epoch": 1.0, + "learning_rate": 6.152766287736977e-11, + "loss": 0.4611, + "regression_loss": 0.0, + "step": 12013, + "text_loss": 0.64453125 + }, + { + "epoch": 1.0, + "learning_rate": 5.522151513226437e-11, + "loss": 0.5229, + "regression_loss": 0.0, + "step": 12014, + "text_loss": 0.59765625 + }, + { + "epoch": 1.0, + "learning_rate": 4.925623779794553e-11, + "loss": 0.481, + "regression_loss": 0.0, + "step": 12015, + "text_loss": 0.400390625 + }, + { + "epoch": 1.0, + "learning_rate": 4.3631831281309986e-11, + "loss": 0.5338, + "regression_loss": 0.0, + "step": 12016, + "text_loss": 0.60546875 + }, + { + "epoch": 1.0, + "learning_rate": 3.8348295965939805e-11, + "loss": 0.4581, + "regression_loss": 0.0, + "step": 12017, + "text_loss": 0.2021484375 + }, + { + "epoch": 1.0, + "learning_rate": 3.340563221210236e-11, + "loss": 0.5129, + "regression_loss": 0.0, + "step": 12018, + "text_loss": 0.478515625 + }, + { + "epoch": 1.0, + "learning_rate": 2.8803840356195213e-11, + "loss": 0.4688, + "regression_loss": 0.0, + "step": 12019, + "text_loss": 0.49609375 + }, + { + "epoch": 1.0, + "learning_rate": 2.454292071241149e-11, + "loss": 0.5264, + "regression_loss": 0.0, + "step": 12020, + "text_loss": 0.5234375 + }, + { + "epoch": 1.0, + "learning_rate": 2.062287357107451e-11, + "loss": 0.4402, + "regression_loss": 0.0, + "step": 12021, + "text_loss": 0.3984375 + }, + { + "epoch": 1.0, + "learning_rate": 1.704369919974802e-11, + "loss": 0.5732, + "regression_loss": 0.0, + "step": 12022, + "text_loss": 0.48828125 + }, + { + "epoch": 1.0, + "learning_rate": 1.3805397842125979e-11, + "loss": 0.3735, + "regression_loss": 0.0, + "step": 12023, + "text_loss": 0.408203125 + }, + { + "epoch": 1.0, + "learning_rate": 1.0907969718587652e-11, + "loss": 0.4927, + "regression_loss": 0.0, + "step": 12024, + "text_loss": 0.5546875 + }, + { + "epoch": 1.0, + "learning_rate": 8.351415027862963e-12, + "loss": 0.4507, + "regression_loss": 0.0, + "step": 12025, + "text_loss": 0.50390625 + }, + { + "epoch": 1.0, + "learning_rate": 6.135733942591593e-12, + "loss": 0.4734, + "regression_loss": 0.0, + "step": 12026, + "text_loss": 0.419921875 + }, + { + "epoch": 1.0, + "learning_rate": 4.260926615429206e-12, + "loss": 0.4484, + "regression_loss": 0.0, + "step": 12027, + "text_loss": 0.6953125 + }, + { + "epoch": 1.0, + "learning_rate": 2.7269931734963396e-12, + "loss": 0.4714, + "regression_loss": 0.0, + "step": 12028, + "text_loss": 0.5859375 + }, + { + "epoch": 1.0, + "learning_rate": 1.5339337211539573e-12, + "loss": 0.5168, + "regression_loss": 0.0, + "step": 12029, + "text_loss": 0.474609375 + }, + { + "epoch": 1.0, + "learning_rate": 6.817483400034519e-13, + "loss": 0.4963, + "regression_loss": 0.0, + "step": 12030, + "text_loss": 0.3203125 + }, + { + "epoch": 1.0, + "learning_rate": 1.7043708777642054e-13, + "loss": 0.4086, + "regression_loss": 0.0, + "step": 12031, + "text_loss": 0.28125 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.4631, + "regression_loss": 0.0, + "step": 12032, + "text_loss": 0.328125 + }, + { + "epoch": 1.0, + "regression_loss": 0.0, + "step": 12032, + "text_loss": 0.328125, + "total_flos": 4.055886109275377e+19, + "train_loss": 0.5251333459894708, + "train_runtime": 123432.1068, + "train_samples_per_second": 12.477, + "train_steps_per_second": 0.097 + } + ], + "logging_steps": 1, + "max_steps": 12032, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 3000, + "total_flos": 4.055886109275377e+19, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}