| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 594, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 2.0656, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 2.0598, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 2.0373, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 2.011, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 2.0331, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.9345, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 1.8867, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.7814, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1e-05, |
| "loss": 1.8128, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 1.7915, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2222222222222224e-05, |
| "loss": 1.7218, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 1.7265, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.4444444444444446e-05, |
| "loss": 1.6642, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 1.7161, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.6933, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 1.6913, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 1.6784, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 2e-05, |
| "loss": 1.7374, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.999985126139422e-05, |
| "loss": 1.651, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.99994050500015e-05, |
| "loss": 1.611, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9998661379095622e-05, |
| "loss": 1.6064, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.999762027079909e-05, |
| "loss": 1.6288, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.999628175608252e-05, |
| "loss": 1.6289, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9994645874763657e-05, |
| "loss": 1.632, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9992712675506253e-05, |
| "loss": 1.6529, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.999048221581858e-05, |
| "loss": 1.6194, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9987954562051724e-05, |
| "loss": 1.579, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9985129789397633e-05, |
| "loss": 1.6402, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.998200798188685e-05, |
| "loss": 1.596, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.9978589232386036e-05, |
| "loss": 1.5806, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.99748736425952e-05, |
| "loss": 1.5996, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9970861323044667e-05, |
| "loss": 1.5413, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9966552393091804e-05, |
| "loss": 1.5971, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.9961946980917457e-05, |
| "loss": 1.6125, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.995704522352214e-05, |
| "loss": 1.6194, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.995184726672197e-05, |
| "loss": 1.5184, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9946353265144315e-05, |
| "loss": 1.5696, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9940563382223196e-05, |
| "loss": 1.6241, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9934477790194445e-05, |
| "loss": 1.6131, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9928096670090552e-05, |
| "loss": 1.6116, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.99214202117353e-05, |
| "loss": 1.5936, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9914448613738107e-05, |
| "loss": 1.6011, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.9907182083488127e-05, |
| "loss": 1.583, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.989962083714808e-05, |
| "loss": 1.5147, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.989176509964781e-05, |
| "loss": 1.5635, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.988361510467761e-05, |
| "loss": 1.5906, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9875171094681248e-05, |
| "loss": 1.6465, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.9866433320848793e-05, |
| "loss": 1.5781, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.985740204310909e-05, |
| "loss": 1.5766, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.9848077530122083e-05, |
| "loss": 1.5489, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.9838460059270775e-05, |
| "loss": 1.6406, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.9828549916653013e-05, |
| "loss": 1.5429, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.9818347397072954e-05, |
| "loss": 1.5426, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.9807852804032306e-05, |
| "loss": 1.5771, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.9797066449721295e-05, |
| "loss": 1.5727, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.9785988655009386e-05, |
| "loss": 1.5436, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.977461974943572e-05, |
| "loss": 1.5526, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.9762960071199334e-05, |
| "loss": 1.4772, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.975100996714909e-05, |
| "loss": 1.5065, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.9738769792773338e-05, |
| "loss": 1.5692, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.9726239912189382e-05, |
| "loss": 1.5579, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.9713420698132614e-05, |
| "loss": 1.5486, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.9700312531945444e-05, |
| "loss": 1.5538, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.9686915803565934e-05, |
| "loss": 1.5331, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.967323091151623e-05, |
| "loss": 1.5404, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.9659258262890683e-05, |
| "loss": 1.5666, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.9644998273343753e-05, |
| "loss": 1.5203, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.963045136707763e-05, |
| "loss": 1.5571, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.9615617976829622e-05, |
| "loss": 1.5172, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.960049854385929e-05, |
| "loss": 1.5794, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.9585093517935308e-05, |
| "loss": 1.5698, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.956940335732209e-05, |
| "loss": 1.5834, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.9553428528766163e-05, |
| "loss": 1.539, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.953716950748227e-05, |
| "loss": 1.5476, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.9520626777139243e-05, |
| "loss": 1.5608, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.9503800829845613e-05, |
| "loss": 1.5345, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.9486692166134964e-05, |
| "loss": 1.5821, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.946930129495106e-05, |
| "loss": 1.5458, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.945162873363268e-05, |
| "loss": 1.5698, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9433675007898255e-05, |
| "loss": 1.5513, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.941544065183021e-05, |
| "loss": 1.4755, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.9396926207859085e-05, |
| "loss": 1.5616, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.93781322267474e-05, |
| "loss": 1.5531, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.935905926757326e-05, |
| "loss": 1.5437, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.9339707897713737e-05, |
| "loss": 1.5212, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.932007869282799e-05, |
| "loss": 1.5164, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.930017223684012e-05, |
| "loss": 1.5727, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.9279989121921846e-05, |
| "loss": 1.5399, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.9259529948474833e-05, |
| "loss": 1.5235, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.9238795325112867e-05, |
| "loss": 1.5604, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.921778586864375e-05, |
| "loss": 1.5699, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.9196502204050925e-05, |
| "loss": 1.5291, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.9174944964474914e-05, |
| "loss": 1.507, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.9153114791194475e-05, |
| "loss": 1.5863, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.9131012333607507e-05, |
| "loss": 1.5001, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.910863824921176e-05, |
| "loss": 1.4875, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.9085993203585257e-05, |
| "loss": 1.5459, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.9063077870366504e-05, |
| "loss": 1.5156, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.9039892931234434e-05, |
| "loss": 1.3387, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.901643907588816e-05, |
| "loss": 1.1024, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.8992717002026433e-05, |
| "loss": 1.0518, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.8968727415326885e-05, |
| "loss": 1.0785, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.8944471029425052e-05, |
| "loss": 1.0259, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.8919948565893144e-05, |
| "loss": 0.9982, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.8895160754218562e-05, |
| "loss": 1.0523, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.887010833178222e-05, |
| "loss": 1.0001, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.8844792043836592e-05, |
| "loss": 1.0063, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.881921264348355e-05, |
| "loss": 1.0043, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.8793370891651973e-05, |
| "loss": 1.0166, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.876726755707508e-05, |
| "loss": 1.0636, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.874090341626759e-05, |
| "loss": 1.0159, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.8714279253502616e-05, |
| "loss": 1.0415, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.8687395860788325e-05, |
| "loss": 0.9992, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.866025403784439e-05, |
| "loss": 1.0228, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.8632854592078185e-05, |
| "loss": 0.9896, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.860519833856079e-05, |
| "loss": 1.0282, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.8577286100002723e-05, |
| "loss": 1.033, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.854911870672947e-05, |
| "loss": 1.0227, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.8520696996656787e-05, |
| "loss": 0.9903, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.849202181526579e-05, |
| "loss": 1.0171, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.8463094015577772e-05, |
| "loss": 0.9954, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.843391445812886e-05, |
| "loss": 1.0108, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.840448401094438e-05, |
| "loss": 1.0312, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.837480354951308e-05, |
| "loss": 1.0058, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.8344873956761045e-05, |
| "loss": 1.0308, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.8314696123025456e-05, |
| "loss": 1.0252, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.8284270946028092e-05, |
| "loss": 0.995, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.8253599330848638e-05, |
| "loss": 0.9775, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.822268218989775e-05, |
| "loss": 1.0477, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.819152044288992e-05, |
| "loss": 0.9951, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.81601150168161e-05, |
| "loss": 1.0224, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.8128466845916156e-05, |
| "loss": 1.0196, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.809657687165104e-05, |
| "loss": 1.0027, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.806444604267483e-05, |
| "loss": 1.0451, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.803207531480645e-05, |
| "loss": 1.0222, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.7999465651001297e-05, |
| "loss": 1.001, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.7966618021322558e-05, |
| "loss": 1.0296, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.7933533402912354e-05, |
| "loss": 0.9797, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.790021277996269e-05, |
| "loss": 1.0326, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.786665714368617e-05, |
| "loss": 1.0295, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.7832867492286506e-05, |
| "loss": 0.9919, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.7798844830928818e-05, |
| "loss": 1.0372, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.776459017170976e-05, |
| "loss": 1.0156, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.773010453362737e-05, |
| "loss": 1.0052, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.7695388942550807e-05, |
| "loss": 1.0215, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.766044443118978e-05, |
| "loss": 1.0225, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.7625272039063884e-05, |
| "loss": 1.0192, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.758987281247162e-05, |
| "loss": 1.0132, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.7554247804459317e-05, |
| "loss": 1.0388, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.7518398074789776e-05, |
| "loss": 1.0305, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.748232468991076e-05, |
| "loss": 1.0158, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.7446028722923266e-05, |
| "loss": 1.0081, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.7409511253549592e-05, |
| "loss": 1.0356, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.737277336810124e-05, |
| "loss": 0.9761, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1.7335816159446585e-05, |
| "loss": 1.0206, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.7298640726978357e-05, |
| "loss": 1.0653, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.726124817658096e-05, |
| "loss": 1.0368, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1.7223639620597556e-05, |
| "loss": 0.986, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.718581617779698e-05, |
| "loss": 1.0022, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.7147778973340466e-05, |
| "loss": 1.0442, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.7109529138748156e-05, |
| "loss": 1.0237, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 1.0118, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.7032396136829247e-05, |
| "loss": 1.0765, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.699351526403367e-05, |
| "loss": 0.9896, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.6954426350096118e-05, |
| "loss": 0.9537, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.6915130557822698e-05, |
| "loss": 1.0103, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 1.6875629056173674e-05, |
| "loss": 1.0174, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.6835923020228714e-05, |
| "loss": 1.0319, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.6796013631151898e-05, |
| "loss": 1.0299, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.6755902076156606e-05, |
| "loss": 0.9957, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.6715589548470187e-05, |
| "loss": 1.0193, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.6675077247298475e-05, |
| "loss": 1.0055, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.6634366377790113e-05, |
| "loss": 1.0481, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.659345815100069e-05, |
| "loss": 1.037, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.6552353783856733e-05, |
| "loss": 1.0183, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.6511054499119493e-05, |
| "loss": 1.0188, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.6469561525348576e-05, |
| "loss": 0.9937, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.6427876096865394e-05, |
| "loss": 1.0199, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.6385999453716453e-05, |
| "loss": 1.0348, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.6343932841636455e-05, |
| "loss": 1.0251, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.6301677512011248e-05, |
| "loss": 1.0115, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.6259234721840595e-05, |
| "loss": 1.0165, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.6216605733700776e-05, |
| "loss": 1.0315, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.6173791815707053e-05, |
| "loss": 1.0241, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.6130794241475912e-05, |
| "loss": 1.0063, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.608761429008721e-05, |
| "loss": 1.0121, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.604425324604609e-05, |
| "loss": 1.0086, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.6000712399244813e-05, |
| "loss": 0.9998, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.5956993044924334e-05, |
| "loss": 1.0212, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.5913096483635827e-05, |
| "loss": 1.0013, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.586902402120195e-05, |
| "loss": 1.0096, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.5824776968678024e-05, |
| "loss": 1.0396, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.5780356642313034e-05, |
| "loss": 1.0267, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.573576436351046e-05, |
| "loss": 1.0212, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.5691001458788984e-05, |
| "loss": 1.0177, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.5646069259743007e-05, |
| "loss": 1.0202, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.5600969103003056e-05, |
| "loss": 1.0059, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.5555702330196024e-05, |
| "loss": 0.7256, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.5510270287905243e-05, |
| "loss": 0.5414, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.5464674327630437e-05, |
| "loss": 0.5543, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.5418915805747518e-05, |
| "loss": 0.5303, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5372996083468242e-05, |
| "loss": 0.4704, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.532691652679969e-05, |
| "loss": 0.4854, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.528067850650368e-05, |
| "loss": 0.4688, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.523428339805594e-05, |
| "loss": 0.4973, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5187732581605217e-05, |
| "loss": 0.46, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5141027441932217e-05, |
| "loss": 0.4851, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.509416936840842e-05, |
| "loss": 0.4596, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.5047159754954721e-05, |
| "loss": 0.472, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.4691, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4952691506439497e-05, |
| "loss": 0.4607, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4905235681593079e-05, |
| "loss": 0.4816, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4857633937163402e-05, |
| "loss": 0.4583, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4809887689193878e-05, |
| "loss": 0.467, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.4761998358026581e-05, |
| "loss": 0.4515, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.4713967368259981e-05, |
| "loss": 0.4563, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.4665796148706561e-05, |
| "loss": 0.4662, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.4617486132350343e-05, |
| "loss": 0.4559, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.4569038756304209e-05, |
| "loss": 0.4685, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.45204554617672e-05, |
| "loss": 0.4384, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.447173769398161e-05, |
| "loss": 0.4842, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.4422886902190014e-05, |
| "loss": 0.4467, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.4373904539592145e-05, |
| "loss": 0.4587, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.4324792063301662e-05, |
| "loss": 0.4582, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.4275550934302822e-05, |
| "loss": 0.4456, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.4226182617406996e-05, |
| "loss": 0.4544, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.4176688581209109e-05, |
| "loss": 0.4455, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.4127070298043949e-05, |
| "loss": 0.4548, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.4077329243942368e-05, |
| "loss": 0.4477, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.4027466898587375e-05, |
| "loss": 0.4529, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.3977484745270112e-05, |
| "loss": 0.4133, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.3927384270845744e-05, |
| "loss": 0.4699, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.3877166965689206e-05, |
| "loss": 0.4647, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.3826834323650899e-05, |
| "loss": 0.4583, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.3776387842012217e-05, |
| "loss": 0.437, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.372582902144103e-05, |
| "loss": 0.466, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.3675159365947038e-05, |
| "loss": 0.4734, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.3624380382837017e-05, |
| "loss": 0.4734, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.3573493582670003e-05, |
| "loss": 0.4715, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.3522500479212337e-05, |
| "loss": 0.4509, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.347140258939264e-05, |
| "loss": 0.4268, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.342020143325669e-05, |
| "loss": 0.4791, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.3368898533922202e-05, |
| "loss": 0.4794, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.3317495417533523e-05, |
| "loss": 0.4776, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.3265993613216223e-05, |
| "loss": 0.4395, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.3214394653031616e-05, |
| "loss": 0.4477, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.3162700071931185e-05, |
| "loss": 0.4426, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3110911407710909e-05, |
| "loss": 0.4512, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.3059030200965536e-05, |
| "loss": 0.4932, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.300705799504273e-05, |
| "loss": 0.4552, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.295499633599719e-05, |
| "loss": 0.4856, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.2902846772544625e-05, |
| "loss": 0.4616, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.285061085601571e-05, |
| "loss": 0.4826, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.2798290140309924e-05, |
| "loss": 0.4546, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1.2745886181849325e-05, |
| "loss": 0.4744, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 1.2693400539532263e-05, |
| "loss": 0.4564, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 1.2640834774686985e-05, |
| "loss": 0.4737, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 1.2588190451025209e-05, |
| "loss": 0.4698, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 1.2535469134595598e-05, |
| "loss": 0.4656, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 1.2482672393737164e-05, |
| "loss": 0.4543, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 1.242980179903264e-05, |
| "loss": 0.4604, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 1.2376858923261732e-05, |
| "loss": 0.4853, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1.2323845341354347e-05, |
| "loss": 0.4727, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 1.2270762630343734e-05, |
| "loss": 0.4655, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 1.221761236931958e-05, |
| "loss": 0.4848, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 1.2164396139381029e-05, |
| "loss": 0.4718, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 1.2111115523589651e-05, |
| "loss": 0.4646, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.205777210692235e-05, |
| "loss": 0.4864, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 1.2004367476224206e-05, |
| "loss": 0.4613, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.1950903220161286e-05, |
| "loss": 0.4384, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 1.1897380929173365e-05, |
| "loss": 0.4537, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1.1843802195426634e-05, |
| "loss": 0.4551, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.1790168612766331e-05, |
| "loss": 0.4861, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 1.1736481776669307e-05, |
| "loss": 0.4632, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 1.1682743284196595e-05, |
| "loss": 0.4581, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1.162895473394589e-05, |
| "loss": 0.4441, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 1.1575117726003979e-05, |
| "loss": 0.4707, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.1521233861899168e-05, |
| "loss": 0.4659, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.1467304744553618e-05, |
| "loss": 0.4787, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.1413331978235677e-05, |
| "loss": 0.4743, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.1359317168512143e-05, |
| "loss": 0.4753, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.130526192220052e-05, |
| "loss": 0.483, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.1251167847321194e-05, |
| "loss": 0.4675, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.1197036553049626e-05, |
| "loss": 0.4936, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.1142869649668467e-05, |
| "loss": 0.4801, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.1088668748519646e-05, |
| "loss": 0.4533, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.1034435461956465e-05, |
| "loss": 0.4461, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.098017140329561e-05, |
| "loss": 0.5007, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.0925878186769159e-05, |
| "loss": 0.4827, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.0871557427476585e-05, |
| "loss": 0.4558, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.0817210741336684e-05, |
| "loss": 0.4405, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.0762839745039526e-05, |
| "loss": 0.4622, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.0708446055998342e-05, |
| "loss": 0.4697, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 1.0654031292301432e-05, |
| "loss": 0.4578, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 1.0599597072664012e-05, |
| "loss": 0.4447, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.0545145016380065e-05, |
| "loss": 0.5112, |
| "step": 296 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.0490676743274181e-05, |
| "loss": 0.2749, |
| "step": 297 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 1.0436193873653362e-05, |
| "loss": 0.1844, |
| "step": 298 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 1.0381698028258817e-05, |
| "loss": 0.1774, |
| "step": 299 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 1.0327190828217763e-05, |
| "loss": 0.1709, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 1.0272673894995187e-05, |
| "loss": 0.1601, |
| "step": 301 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 1.0218148850345613e-05, |
| "loss": 0.1526, |
| "step": 302 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 1.0163617316264869e-05, |
| "loss": 0.1425, |
| "step": 303 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 1.0109080914941825e-05, |
| "loss": 0.1597, |
| "step": 304 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.0054541268710139e-05, |
| "loss": 0.1534, |
| "step": 305 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 1e-05, |
| "loss": 0.1495, |
| "step": 306 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 9.945458731289863e-06, |
| "loss": 0.1472, |
| "step": 307 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 9.890919085058179e-06, |
| "loss": 0.1375, |
| "step": 308 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 9.836382683735133e-06, |
| "loss": 0.1383, |
| "step": 309 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 9.78185114965439e-06, |
| "loss": 0.1597, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 9.727326105004818e-06, |
| "loss": 0.1443, |
| "step": 311 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 9.67280917178224e-06, |
| "loss": 0.1381, |
| "step": 312 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 9.618301971741185e-06, |
| "loss": 0.1394, |
| "step": 313 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 9.563806126346643e-06, |
| "loss": 0.153, |
| "step": 314 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 9.50932325672582e-06, |
| "loss": 0.1357, |
| "step": 315 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 9.454854983619936e-06, |
| "loss": 0.1409, |
| "step": 316 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 9.400402927335992e-06, |
| "loss": 0.1415, |
| "step": 317 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 9.34596870769857e-06, |
| "loss": 0.1619, |
| "step": 318 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 9.29155394400166e-06, |
| "loss": 0.1427, |
| "step": 319 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 9.237160254960477e-06, |
| "loss": 0.1369, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 9.182789258663321e-06, |
| "loss": 0.1487, |
| "step": 321 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 9.128442572523418e-06, |
| "loss": 0.1432, |
| "step": 322 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 9.074121813230846e-06, |
| "loss": 0.141, |
| "step": 323 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 9.019828596704394e-06, |
| "loss": 0.1341, |
| "step": 324 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 8.965564538043535e-06, |
| "loss": 0.1351, |
| "step": 325 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 8.911331251480357e-06, |
| "loss": 0.1387, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 8.857130350331535e-06, |
| "loss": 0.1361, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 8.802963446950378e-06, |
| "loss": 0.1389, |
| "step": 328 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 8.74883215267881e-06, |
| "loss": 0.1342, |
| "step": 329 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 8.694738077799487e-06, |
| "loss": 0.1447, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 8.64068283148786e-06, |
| "loss": 0.1406, |
| "step": 331 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 8.586668021764328e-06, |
| "loss": 0.1533, |
| "step": 332 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 8.532695255446384e-06, |
| "loss": 0.1278, |
| "step": 333 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 8.478766138100834e-06, |
| "loss": 0.15, |
| "step": 334 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 8.424882273996023e-06, |
| "loss": 0.1381, |
| "step": 335 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 8.371045266054114e-06, |
| "loss": 0.1488, |
| "step": 336 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 8.317256715803407e-06, |
| "loss": 0.1428, |
| "step": 337 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 8.263518223330698e-06, |
| "loss": 0.1461, |
| "step": 338 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 8.209831387233675e-06, |
| "loss": 0.1474, |
| "step": 339 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 8.156197804573368e-06, |
| "loss": 0.1438, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 8.10261907082664e-06, |
| "loss": 0.1377, |
| "step": 341 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 8.04909677983872e-06, |
| "loss": 0.132, |
| "step": 342 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 7.995632523775795e-06, |
| "loss": 0.1411, |
| "step": 343 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 7.942227893077652e-06, |
| "loss": 0.132, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 7.888884476410348e-06, |
| "loss": 0.1357, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 7.835603860618973e-06, |
| "loss": 0.1342, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 7.782387630680422e-06, |
| "loss": 0.1559, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 7.72923736965627e-06, |
| "loss": 0.1467, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 7.676154658645656e-06, |
| "loss": 0.1375, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 7.623141076738271e-06, |
| "loss": 0.1342, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 7.570198200967363e-06, |
| "loss": 0.1441, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 7.5173276062628364e-06, |
| "loss": 0.1452, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 7.4645308654044065e-06, |
| "loss": 0.1423, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 7.411809548974792e-06, |
| "loss": 0.1344, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 7.359165225313019e-06, |
| "loss": 0.1401, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 7.306599460467741e-06, |
| "loss": 0.1322, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 7.25411381815068e-06, |
| "loss": 0.1501, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 7.201709859690081e-06, |
| "loss": 0.1389, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 7.149389143984295e-06, |
| "loss": 0.1301, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 7.097153227455379e-06, |
| "loss": 0.1395, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 7.04500366400281e-06, |
| "loss": 0.1469, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 6.992942004957271e-06, |
| "loss": 0.1416, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 6.940969799034465e-06, |
| "loss": 0.1482, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 6.889088592289092e-06, |
| "loss": 0.1374, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 6.8372999280688175e-06, |
| "loss": 0.1352, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 6.785605346968387e-06, |
| "loss": 0.1347, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 6.73400638678378e-06, |
| "loss": 0.1372, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 6.682504582466482e-06, |
| "loss": 0.1379, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 6.631101466077801e-06, |
| "loss": 0.138, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 6.579798566743314e-06, |
| "loss": 0.1427, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 6.528597410607364e-06, |
| "loss": 0.1483, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 6.4774995207876654e-06, |
| "loss": 0.127, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 6.42650641733e-06, |
| "loss": 0.1327, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 6.375619617162985e-06, |
| "loss": 0.1319, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 6.3248406340529665e-06, |
| "loss": 0.1455, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 6.274170978558971e-06, |
| "loss": 0.1484, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 6.223612157987786e-06, |
| "loss": 0.1447, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 6.173165676349103e-06, |
| "loss": 0.1379, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 6.122833034310794e-06, |
| "loss": 0.1507, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 6.072615729154261e-06, |
| "loss": 0.1506, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 6.02251525472989e-06, |
| "loss": 0.1437, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 5.97253310141263e-06, |
| "loss": 0.156, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 5.922670756057633e-06, |
| "loss": 0.1322, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 5.872929701956054e-06, |
| "loss": 0.1383, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 5.823311418790894e-06, |
| "loss": 0.1378, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 5.773817382593008e-06, |
| "loss": 0.1506, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 5.724449065697182e-06, |
| "loss": 0.1453, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 5.675207936698337e-06, |
| "loss": 0.147, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 5.6260954604078585e-06, |
| "loss": 0.1403, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 5.5771130978099896e-06, |
| "loss": 0.1287, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 5.5282623060183945e-06, |
| "loss": 0.1299, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 5.479544538232804e-06, |
| "loss": 0.133, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 5.430961243695794e-06, |
| "loss": 0.139, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 5.382513867649663e-06, |
| "loss": 0.1268, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 5.334203851293442e-06, |
| "loss": 0.1441, |
| "step": 395 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 5.286032631740023e-06, |
| "loss": 0.0683, |
| "step": 396 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 5.238001641973422e-06, |
| "loss": 0.0522, |
| "step": 397 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 5.190112310806126e-06, |
| "loss": 0.0511, |
| "step": 398 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 5.142366062836599e-06, |
| "loss": 0.0536, |
| "step": 399 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 5.094764318406921e-06, |
| "loss": 0.0448, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 5.047308493560506e-06, |
| "loss": 0.0459, |
| "step": 401 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 0.0483, |
| "step": 402 |
| }, |
| { |
| "epoch": 4.07, |
| "learning_rate": 4.952840245045279e-06, |
| "loss": 0.0452, |
| "step": 403 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 4.9058306315915826e-06, |
| "loss": 0.0437, |
| "step": 404 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 4.858972558067784e-06, |
| "loss": 0.036, |
| "step": 405 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 4.812267418394784e-06, |
| "loss": 0.0435, |
| "step": 406 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 4.765716601944062e-06, |
| "loss": 0.0519, |
| "step": 407 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 4.7193214934963204e-06, |
| "loss": 0.045, |
| "step": 408 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 4.6730834732003104e-06, |
| "loss": 0.0414, |
| "step": 409 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 4.627003916531761e-06, |
| "loss": 0.0402, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 4.581084194252486e-06, |
| "loss": 0.0436, |
| "step": 411 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 4.535325672369567e-06, |
| "loss": 0.0382, |
| "step": 412 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 4.489729712094762e-06, |
| "loss": 0.0443, |
| "step": 413 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 4.444297669803981e-06, |
| "loss": 0.0419, |
| "step": 414 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 4.399030896996945e-06, |
| "loss": 0.0403, |
| "step": 415 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 4.353930740256997e-06, |
| "loss": 0.0422, |
| "step": 416 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 4.308998541211016e-06, |
| "loss": 0.0449, |
| "step": 417 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 4.264235636489542e-06, |
| "loss": 0.0419, |
| "step": 418 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 4.219643357686968e-06, |
| "loss": 0.0406, |
| "step": 419 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 4.17522303132198e-06, |
| "loss": 0.0371, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 4.1309759787980565e-06, |
| "loss": 0.0415, |
| "step": 421 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 4.086903516364179e-06, |
| "loss": 0.0383, |
| "step": 422 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 4.043006955075667e-06, |
| "loss": 0.0383, |
| "step": 423 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 3.999287600755192e-06, |
| "loss": 0.0369, |
| "step": 424 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 3.955746753953912e-06, |
| "loss": 0.0391, |
| "step": 425 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 3.912385709912794e-06, |
| "loss": 0.042, |
| "step": 426 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 3.869205758524091e-06, |
| "loss": 0.0409, |
| "step": 427 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 3.826208184292952e-06, |
| "loss": 0.0381, |
| "step": 428 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 3.7833942662992286e-06, |
| "loss": 0.0434, |
| "step": 429 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 3.7407652781594094e-06, |
| "loss": 0.0405, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 3.698322487988755e-06, |
| "loss": 0.0413, |
| "step": 431 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 3.6560671583635467e-06, |
| "loss": 0.0416, |
| "step": 432 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 3.614000546283547e-06, |
| "loss": 0.0382, |
| "step": 433 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 3.5721239031346067e-06, |
| "loss": 0.0461, |
| "step": 434 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 3.5304384746514273e-06, |
| "loss": 0.0404, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 3.4889455008805107e-06, |
| "loss": 0.042, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 3.4476462161432678e-06, |
| "loss": 0.0426, |
| "step": 437 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 3.4065418489993118e-06, |
| "loss": 0.0378, |
| "step": 438 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 3.3656336222098907e-06, |
| "loss": 0.0411, |
| "step": 439 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 3.324922752701528e-06, |
| "loss": 0.0404, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 3.284410451529816e-06, |
| "loss": 0.0423, |
| "step": 441 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 3.2440979238433977e-06, |
| "loss": 0.0393, |
| "step": 442 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 3.2039863688481055e-06, |
| "loss": 0.0401, |
| "step": 443 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 3.1640769797712865e-06, |
| "loss": 0.0383, |
| "step": 444 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 3.124370943826326e-06, |
| "loss": 0.0396, |
| "step": 445 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 3.0848694421773075e-06, |
| "loss": 0.0391, |
| "step": 446 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 3.0455736499038847e-06, |
| "loss": 0.0407, |
| "step": 447 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 3.0064847359663284e-06, |
| "loss": 0.0404, |
| "step": 448 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 2.967603863170759e-06, |
| "loss": 0.0406, |
| "step": 449 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.0451, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 2.8904708612518404e-06, |
| "loss": 0.0368, |
| "step": 451 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 2.8522210266595386e-06, |
| "loss": 0.0428, |
| "step": 452 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 2.8141838222030195e-06, |
| "loss": 0.0406, |
| "step": 453 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 2.776360379402445e-06, |
| "loss": 0.0387, |
| "step": 454 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 2.7387518234190414e-06, |
| "loss": 0.0396, |
| "step": 455 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 2.7013592730216464e-06, |
| "loss": 0.0394, |
| "step": 456 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 2.664183840553417e-06, |
| "loss": 0.0423, |
| "step": 457 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 2.6272266318987606e-06, |
| "loss": 0.0381, |
| "step": 458 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 2.5904887464504115e-06, |
| "loss": 0.0362, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 2.5539712770767377e-06, |
| "loss": 0.0397, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 2.5176753100892426e-06, |
| "loss": 0.0382, |
| "step": 461 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 2.4816019252102274e-06, |
| "loss": 0.039, |
| "step": 462 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 2.4457521955406872e-06, |
| "loss": 0.0367, |
| "step": 463 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 2.4101271875283818e-06, |
| "loss": 0.0365, |
| "step": 464 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 2.3747279609361197e-06, |
| "loss": 0.0415, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 2.339555568810221e-06, |
| "loss": 0.0377, |
| "step": 466 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 2.3046110574491986e-06, |
| "loss": 0.038, |
| "step": 467 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 2.26989546637263e-06, |
| "loss": 0.0421, |
| "step": 468 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 2.2354098282902446e-06, |
| "loss": 0.0411, |
| "step": 469 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 2.201155169071184e-06, |
| "loss": 0.0386, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 2.1671325077134963e-06, |
| "loss": 0.0451, |
| "step": 471 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 2.1333428563138304e-06, |
| "loss": 0.0393, |
| "step": 472 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 2.0997872200373114e-06, |
| "loss": 0.0374, |
| "step": 473 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 2.0664665970876496e-06, |
| "loss": 0.041, |
| "step": 474 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 2.0333819786774446e-06, |
| "loss": 0.0384, |
| "step": 475 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 2.0005343489987038e-06, |
| "loss": 0.0399, |
| "step": 476 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 1.967924685193552e-06, |
| "loss": 0.0413, |
| "step": 477 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 1.9355539573251737e-06, |
| "loss": 0.0386, |
| "step": 478 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 1.903423128348959e-06, |
| "loss": 0.0378, |
| "step": 479 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 1.8715331540838488e-06, |
| "loss": 0.0403, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 1.8398849831839017e-06, |
| "loss": 0.0394, |
| "step": 481 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 1.808479557110081e-06, |
| "loss": 0.0446, |
| "step": 482 |
| }, |
| { |
| "epoch": 4.88, |
| "learning_rate": 1.7773178101022514e-06, |
| "loss": 0.043, |
| "step": 483 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1.7464006691513624e-06, |
| "loss": 0.0384, |
| "step": 484 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 1.7157290539719108e-06, |
| "loss": 0.0369, |
| "step": 485 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 1.6853038769745466e-06, |
| "loss": 0.0399, |
| "step": 486 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 1.655126043238957e-06, |
| "loss": 0.0393, |
| "step": 487 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 1.6251964504869221e-06, |
| "loss": 0.0421, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 1.5955159890556182e-06, |
| "loss": 0.0374, |
| "step": 489 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 1.566085541871145e-06, |
| "loss": 0.0358, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 1.5369059844222279e-06, |
| "loss": 0.0377, |
| "step": 491 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 1.5079781847342122e-06, |
| "loss": 0.0383, |
| "step": 492 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 1.4793030033432143e-06, |
| "loss": 0.0421, |
| "step": 493 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 1.4508812932705364e-06, |
| "loss": 0.0346, |
| "step": 494 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 1.4227138999972801e-06, |
| "loss": 0.0219, |
| "step": 495 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 1.3948016614392113e-06, |
| "loss": 0.0232, |
| "step": 496 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 1.3671454079218171e-06, |
| "loss": 0.0237, |
| "step": 497 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 1.339745962155613e-06, |
| "loss": 0.0228, |
| "step": 498 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 1.3126041392116774e-06, |
| "loss": 0.0219, |
| "step": 499 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 1.2857207464973876e-06, |
| "loss": 0.022, |
| "step": 500 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 1.2590965837324132e-06, |
| "loss": 0.023, |
| "step": 501 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 1.2327324429249232e-06, |
| "loss": 0.0199, |
| "step": 502 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 1.2066291083480297e-06, |
| "loss": 0.0203, |
| "step": 503 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 1.1807873565164507e-06, |
| "loss": 0.0189, |
| "step": 504 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 1.1552079561634111e-06, |
| "loss": 0.0185, |
| "step": 505 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 1.129891668217783e-06, |
| "loss": 0.0202, |
| "step": 506 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 1.1048392457814406e-06, |
| "loss": 0.0247, |
| "step": 507 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 1.0800514341068592e-06, |
| "loss": 0.0215, |
| "step": 508 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 1.0555289705749483e-06, |
| "loss": 0.0195, |
| "step": 509 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 1.0312725846731174e-06, |
| "loss": 0.0176, |
| "step": 510 |
| }, |
| { |
| "epoch": 5.16, |
| "learning_rate": 1.0072829979735698e-06, |
| "loss": 0.0206, |
| "step": 511 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 9.835609241118404e-07, |
| "loss": 0.0178, |
| "step": 512 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 9.601070687655667e-07, |
| "loss": 0.0173, |
| "step": 513 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 9.369221296335007e-07, |
| "loss": 0.0203, |
| "step": 514 |
| }, |
| { |
| "epoch": 5.2, |
| "learning_rate": 9.140067964147447e-07, |
| "loss": 0.0222, |
| "step": 515 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 8.91361750788241e-07, |
| "loss": 0.0205, |
| "step": 516 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 8.689876663924957e-07, |
| "loss": 0.0193, |
| "step": 517 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 8.468852088055291e-07, |
| "loss": 0.0185, |
| "step": 518 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 8.250550355250875e-07, |
| "loss": 0.0224, |
| "step": 519 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 8.034977959490775e-07, |
| "loss": 0.0203, |
| "step": 520 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 7.822141313562548e-07, |
| "loss": 0.0219, |
| "step": 521 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 7.612046748871327e-07, |
| "loss": 0.0192, |
| "step": 522 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 7.404700515251672e-07, |
| "loss": 0.0168, |
| "step": 523 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 7.200108780781556e-07, |
| "loss": 0.0189, |
| "step": 524 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 6.998277631598793e-07, |
| "loss": 0.0187, |
| "step": 525 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 6.799213071720156e-07, |
| "loss": 0.0193, |
| "step": 526 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 6.602921022862663e-07, |
| "loss": 0.0214, |
| "step": 527 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 6.409407324267448e-07, |
| "loss": 0.0199, |
| "step": 528 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 6.218677732526035e-07, |
| "loss": 0.0186, |
| "step": 529 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 6.030737921409169e-07, |
| "loss": 0.0201, |
| "step": 530 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 5.845593481697931e-07, |
| "loss": 0.0176, |
| "step": 531 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 5.663249921017477e-07, |
| "loss": 0.019, |
| "step": 532 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 5.483712663673224e-07, |
| "loss": 0.0208, |
| "step": 533 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 5.306987050489442e-07, |
| "loss": 0.0192, |
| "step": 534 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 5.133078338650376e-07, |
| "loss": 0.0199, |
| "step": 535 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 4.961991701543889e-07, |
| "loss": 0.0205, |
| "step": 536 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 4.793732228607573e-07, |
| "loss": 0.0186, |
| "step": 537 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 4.628304925177318e-07, |
| "loss": 0.0172, |
| "step": 538 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 4.465714712338398e-07, |
| "loss": 0.0224, |
| "step": 539 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 4.305966426779118e-07, |
| "loss": 0.0192, |
| "step": 540 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 4.149064820646953e-07, |
| "loss": 0.0198, |
| "step": 541 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 3.99501456140714e-07, |
| "loss": 0.0192, |
| "step": 542 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 3.8438202317037987e-07, |
| "loss": 0.0191, |
| "step": 543 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 3.6954863292237297e-07, |
| "loss": 0.0182, |
| "step": 544 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 3.550017266562489e-07, |
| "loss": 0.0212, |
| "step": 545 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 3.4074173710931804e-07, |
| "loss": 0.0217, |
| "step": 546 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 3.2676908848377263e-07, |
| "loss": 0.0187, |
| "step": 547 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 3.1308419643406915e-07, |
| "loss": 0.0196, |
| "step": 548 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 2.996874680545603e-07, |
| "loss": 0.0211, |
| "step": 549 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 2.865793018673857e-07, |
| "loss": 0.0203, |
| "step": 550 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 2.7376008781061835e-07, |
| "loss": 0.02, |
| "step": 551 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 2.612302072266637e-07, |
| "loss": 0.0208, |
| "step": 552 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 2.489900328509154e-07, |
| "loss": 0.0233, |
| "step": 553 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 2.370399288006664e-07, |
| "loss": 0.018, |
| "step": 554 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 2.2538025056428216e-07, |
| "loss": 0.0163, |
| "step": 555 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 2.140113449906167e-07, |
| "loss": 0.0204, |
| "step": 556 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 2.0293355027870554e-07, |
| "loss": 0.023, |
| "step": 557 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 1.921471959676957e-07, |
| "loss": 0.0188, |
| "step": 558 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 1.8165260292704712e-07, |
| "loss": 0.0195, |
| "step": 559 |
| }, |
| { |
| "epoch": 5.66, |
| "learning_rate": 1.7145008334698898e-07, |
| "loss": 0.0202, |
| "step": 560 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 1.615399407292251e-07, |
| "loss": 0.0196, |
| "step": 561 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 1.519224698779198e-07, |
| "loss": 0.0191, |
| "step": 562 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 1.4259795689090972e-07, |
| "loss": 0.0205, |
| "step": 563 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 1.3356667915121025e-07, |
| "loss": 0.0199, |
| "step": 564 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 1.2482890531875124e-07, |
| "loss": 0.0178, |
| "step": 565 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 1.1638489532239339e-07, |
| "loss": 0.0222, |
| "step": 566 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 1.0823490035218986e-07, |
| "loss": 0.0216, |
| "step": 567 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 1.0037916285192129e-07, |
| "loss": 0.0197, |
| "step": 568 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 9.281791651187366e-08, |
| "loss": 0.0211, |
| "step": 569 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 8.555138626189619e-08, |
| "loss": 0.019, |
| "step": 570 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 7.857978826470325e-08, |
| "loss": 0.0186, |
| "step": 571 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 7.19033299094496e-08, |
| "loss": 0.0202, |
| "step": 572 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 6.552220980555635e-08, |
| "loss": 0.0187, |
| "step": 573 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 5.943661777680354e-08, |
| "loss": 0.0167, |
| "step": 574 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 5.364673485568794e-08, |
| "loss": 0.0196, |
| "step": 575 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 4.815273327803183e-08, |
| "loss": 0.0191, |
| "step": 576 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 4.295477647786039e-08, |
| "loss": 0.0208, |
| "step": 577 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 3.805301908254455e-08, |
| "loss": 0.0187, |
| "step": 578 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 3.3447606908196815e-08, |
| "loss": 0.0176, |
| "step": 579 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 2.9138676955333676e-08, |
| "loss": 0.0206, |
| "step": 580 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 2.512635740480218e-08, |
| "loss": 0.0177, |
| "step": 581 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 2.1410767613965212e-08, |
| "loss": 0.0181, |
| "step": 582 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 1.79920181131521e-08, |
| "loss": 0.0199, |
| "step": 583 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 1.487021060236904e-08, |
| "loss": 0.0202, |
| "step": 584 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 1.2045437948275952e-08, |
| "loss": 0.0212, |
| "step": 585 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 9.517784181422018e-09, |
| "loss": 0.0189, |
| "step": 586 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 7.2873244937476935e-09, |
| "loss": 0.0204, |
| "step": 587 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 5.354125236343155e-09, |
| "loss": 0.0187, |
| "step": 588 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 3.7182439174832106e-09, |
| "loss": 0.0187, |
| "step": 589 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 2.379729200908676e-09, |
| "loss": 0.0173, |
| "step": 590 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 1.3386209043819708e-09, |
| "loss": 0.0218, |
| "step": 591 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 5.949499985025142e-10, |
| "loss": 0.0191, |
| "step": 592 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 1.487386057841267e-10, |
| "loss": 0.0206, |
| "step": 593 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 0.0, |
| "loss": 0.0174, |
| "step": 594 |
| }, |
| { |
| "epoch": 6.0, |
| "step": 594, |
| "total_flos": 76624596172800.0, |
| "train_loss": 0.5495017380155749, |
| "train_runtime": 4329.0457, |
| "train_samples_per_second": 13.059, |
| "train_steps_per_second": 0.137 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 594, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 50000, |
| "total_flos": 76624596172800.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|