| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2503401360544218, |
| "eval_steps": 500, |
| "global_step": 1058, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 2.2147, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1e-05, |
| "loss": 2.4802, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.5e-05, |
| "loss": 2.0475, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-05, |
| "loss": 1.6983, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.5e-05, |
| "loss": 1.6212, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 3e-05, |
| "loss": 2.0843, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 3.5e-05, |
| "loss": 1.8442, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4e-05, |
| "loss": 1.3359, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.5e-05, |
| "loss": 2.3761, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 5e-05, |
| "loss": 1.6338, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999999826891235e-05, |
| "loss": 1.7487, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999999307564964e-05, |
| "loss": 2.0834, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9999984420212596e-05, |
| "loss": 1.5794, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999997230260242e-05, |
| "loss": 1.7222, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9999956722820765e-05, |
| "loss": 1.4002, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999993768086981e-05, |
| "loss": 1.7314, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999991517675219e-05, |
| "loss": 1.7053, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999988921047102e-05, |
| "loss": 1.9546, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.99998597820299e-05, |
| "loss": 1.6122, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9999826891432896e-05, |
| "loss": 1.7275, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999979053868456e-05, |
| "loss": 1.8279, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9999750723789936e-05, |
| "loss": 1.6764, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9999707446754546e-05, |
| "loss": 1.7501, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999966070758437e-05, |
| "loss": 1.6349, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999961050628588e-05, |
| "loss": 1.6796, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9999556842866034e-05, |
| "loss": 1.6249, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999949971733225e-05, |
| "loss": 2.0866, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999943912969247e-05, |
| "loss": 1.6658, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999937507995506e-05, |
| "loss": 1.7794, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999930756812889e-05, |
| "loss": 1.743, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999923659422332e-05, |
| "loss": 2.1359, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999916215824817e-05, |
| "loss": 1.7393, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999908426021375e-05, |
| "loss": 1.6387, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999900290013085e-05, |
| "loss": 1.974, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999891807801075e-05, |
| "loss": 1.7787, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9998829793865176e-05, |
| "loss": 2.1738, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999873804770636e-05, |
| "loss": 1.73, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999864283954701e-05, |
| "loss": 1.9363, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999854416940032e-05, |
| "loss": 1.248, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999844203727993e-05, |
| "loss": 2.0461, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999833644320002e-05, |
| "loss": 2.1294, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999822738717518e-05, |
| "loss": 1.3963, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9998114869220525e-05, |
| "loss": 1.7942, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999799888935164e-05, |
| "loss": 1.4804, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.99978794475846e-05, |
| "loss": 1.4482, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999775654393591e-05, |
| "loss": 1.0604, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9997630178422624e-05, |
| "loss": 1.6902, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999750035106222e-05, |
| "loss": 1.4982, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9997367061872694e-05, |
| "loss": 2.0546, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.99972303108725e-05, |
| "loss": 1.581, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999709009808057e-05, |
| "loss": 1.4683, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9996946423516326e-05, |
| "loss": 1.9496, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9996799287199666e-05, |
| "loss": 1.3316, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9996648689150966e-05, |
| "loss": 2.0274, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9996494629391076e-05, |
| "loss": 2.1621, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999633710794135e-05, |
| "loss": 1.6314, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999617612482358e-05, |
| "loss": 1.8074, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9996011680060064e-05, |
| "loss": 1.8467, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999584377367359e-05, |
| "loss": 1.2585, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.99956724056874e-05, |
| "loss": 1.8106, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9995497576125225e-05, |
| "loss": 1.7955, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999531928501128e-05, |
| "loss": 1.9608, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.999513753237025e-05, |
| "loss": 2.1124, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999495231822732e-05, |
| "loss": 1.8793, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999476364260812e-05, |
| "loss": 1.6794, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.99945715055388e-05, |
| "loss": 1.7991, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999437590704595e-05, |
| "loss": 2.0194, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999417684715668e-05, |
| "loss": 1.4938, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999397432589853e-05, |
| "loss": 1.9159, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999376834329956e-05, |
| "loss": 1.3969, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9993558899388305e-05, |
| "loss": 1.727, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999334599419375e-05, |
| "loss": 1.8063, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.99931296277454e-05, |
| "loss": 1.9437, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999290980007321e-05, |
| "loss": 1.5978, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9992686511207614e-05, |
| "loss": 1.9653, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9992459761179545e-05, |
| "loss": 2.2229, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999222955002041e-05, |
| "loss": 1.7991, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9991995877762074e-05, |
| "loss": 2.0293, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999175874443692e-05, |
| "loss": 1.5131, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9991518150077765e-05, |
| "loss": 1.41, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999127409471794e-05, |
| "loss": 2.0329, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9991026578391245e-05, |
| "loss": 1.7566, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999077560113196e-05, |
| "loss": 2.1238, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9990521162974824e-05, |
| "loss": 1.4365, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999026326395509e-05, |
| "loss": 1.7559, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.999000190410848e-05, |
| "loss": 2.3886, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998973708347116e-05, |
| "loss": 1.5783, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998946880207983e-05, |
| "loss": 2.0597, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998919705997164e-05, |
| "loss": 1.684, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998892185718422e-05, |
| "loss": 1.5005, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998864319375568e-05, |
| "loss": 1.8982, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998836106972461e-05, |
| "loss": 2.4997, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998807548513008e-05, |
| "loss": 1.9038, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998778644001165e-05, |
| "loss": 1.4326, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998749393440933e-05, |
| "loss": 1.9809, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998719796836366e-05, |
| "loss": 1.727, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9986898541915595e-05, |
| "loss": 1.5198, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998659565510662e-05, |
| "loss": 1.7004, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998628930797866e-05, |
| "loss": 1.7584, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9985979500574166e-05, |
| "loss": 1.7247, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998566623293603e-05, |
| "loss": 1.7967, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998534950510764e-05, |
| "loss": 1.282, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9985029317132845e-05, |
| "loss": 1.7519, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.998470566905601e-05, |
| "loss": 1.8924, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9984378560921937e-05, |
| "loss": 1.411, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9984047992775926e-05, |
| "loss": 1.7251, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9983713964663776e-05, |
| "loss": 1.6784, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998337647663173e-05, |
| "loss": 1.9045, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998303552872652e-05, |
| "loss": 1.1327, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998269112099538e-05, |
| "loss": 1.6382, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998234325348599e-05, |
| "loss": 1.8228, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998199192624654e-05, |
| "loss": 1.9773, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998163713932567e-05, |
| "loss": 1.6035, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998127889277252e-05, |
| "loss": 2.3095, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998091718663671e-05, |
| "loss": 1.4727, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998055202096832e-05, |
| "loss": 1.8166, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.998018339581792e-05, |
| "loss": 1.2842, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997981131123657e-05, |
| "loss": 1.8485, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997943576727579e-05, |
| "loss": 1.1914, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997905676398759e-05, |
| "loss": 1.9409, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997867430142446e-05, |
| "loss": 2.3757, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997828837963937e-05, |
| "loss": 1.6337, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997789899868575e-05, |
| "loss": 1.5789, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9977506158617535e-05, |
| "loss": 1.61, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9977109859489133e-05, |
| "loss": 1.3976, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997671010135542e-05, |
| "loss": 1.3944, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997630688427176e-05, |
| "loss": 1.5922, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9975900208293984e-05, |
| "loss": 2.0075, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997549007347842e-05, |
| "loss": 1.4363, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9975076479881864e-05, |
| "loss": 1.9972, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.99746594275616e-05, |
| "loss": 1.8052, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997423891657538e-05, |
| "loss": 1.9088, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997381494698143e-05, |
| "loss": 1.7322, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9973387518838474e-05, |
| "loss": 1.6391, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9972956632205704e-05, |
| "loss": 1.8346, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997252228714279e-05, |
| "loss": 1.9651, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997208448370988e-05, |
| "loss": 1.5544, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997164322196762e-05, |
| "loss": 1.4443, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.99711985019771e-05, |
| "loss": 2.1679, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997075032379992e-05, |
| "loss": 1.6557, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.997029868749813e-05, |
| "loss": 1.5759, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.99698435931343e-05, |
| "loss": 1.8057, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9969385040771445e-05, |
| "loss": 1.1004, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.996892303047306e-05, |
| "loss": 1.6915, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.996845756230314e-05, |
| "loss": 1.7905, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9967988636326136e-05, |
| "loss": 2.0294, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.996751625260699e-05, |
| "loss": 1.9933, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996704041121113e-05, |
| "loss": 1.5048, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996656111220443e-05, |
| "loss": 2.0881, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996607835565331e-05, |
| "loss": 1.3425, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9965592141624586e-05, |
| "loss": 1.3072, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9965102470185596e-05, |
| "loss": 2.5566, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9964609341404175e-05, |
| "loss": 1.3695, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.99641127553486e-05, |
| "loss": 2.0205, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9963612712087646e-05, |
| "loss": 2.1791, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996310921169056e-05, |
| "loss": 1.6322, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996260225422707e-05, |
| "loss": 1.8695, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9962091839767386e-05, |
| "loss": 2.1406, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.99615779683822e-05, |
| "loss": 1.7668, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996106064014265e-05, |
| "loss": 1.6439, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9960539855120415e-05, |
| "loss": 1.5069, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.996001561338759e-05, |
| "loss": 1.9099, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995948791501679e-05, |
| "loss": 1.5446, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9958956760081085e-05, |
| "loss": 1.8427, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9958422148654045e-05, |
| "loss": 1.4109, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995788408080969e-05, |
| "loss": 1.7544, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9957342556622557e-05, |
| "loss": 1.2727, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995679757616762e-05, |
| "loss": 1.4266, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995624913952036e-05, |
| "loss": 1.3209, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995569724675673e-05, |
| "loss": 1.5993, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995514189795316e-05, |
| "loss": 1.8296, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9954583093186544e-05, |
| "loss": 1.5827, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9954020832534296e-05, |
| "loss": 1.6316, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9953455116074263e-05, |
| "loss": 1.7216, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9952885943884795e-05, |
| "loss": 1.7878, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995231331604472e-05, |
| "loss": 1.7959, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995173723263332e-05, |
| "loss": 1.4636, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.99511576937304e-05, |
| "loss": 1.365, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.995057469941621e-05, |
| "loss": 1.9666, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994998824977147e-05, |
| "loss": 1.4686, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994939834487742e-05, |
| "loss": 1.4651, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994880498481575e-05, |
| "loss": 1.8047, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9948208169668606e-05, |
| "loss": 1.779, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9947607899518675e-05, |
| "loss": 1.172, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994700417444908e-05, |
| "loss": 1.6413, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9946396994543405e-05, |
| "loss": 2.0329, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994578635988576e-05, |
| "loss": 1.8365, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9945172270560695e-05, |
| "loss": 1.8724, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.994455472665327e-05, |
| "loss": 1.7607, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9943933728248996e-05, |
| "loss": 1.6619, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.994330927543387e-05, |
| "loss": 1.9951, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.994268136829438e-05, |
| "loss": 1.5133, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.994205000691747e-05, |
| "loss": 2.238, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.994141519139058e-05, |
| "loss": 1.6153, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.994077692180164e-05, |
| "loss": 1.2494, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9940135198239024e-05, |
| "loss": 1.7389, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9939490020791614e-05, |
| "loss": 1.2335, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993884138954875e-05, |
| "loss": 1.4512, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993818930460026e-05, |
| "loss": 1.8298, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993753376603645e-05, |
| "loss": 1.333, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9936874773948094e-05, |
| "loss": 1.8395, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993621232842648e-05, |
| "loss": 1.7438, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993554642956332e-05, |
| "loss": 1.2777, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993487707745086e-05, |
| "loss": 1.5628, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9934204272181764e-05, |
| "loss": 1.6458, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9933528013849235e-05, |
| "loss": 1.3878, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993284830254691e-05, |
| "loss": 1.6489, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.993216513836893e-05, |
| "loss": 1.4492, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9931478521409895e-05, |
| "loss": 1.598, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.99307884517649e-05, |
| "loss": 1.9357, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9930094929529506e-05, |
| "loss": 1.4987, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992939795479976e-05, |
| "loss": 1.6394, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992869752767218e-05, |
| "loss": 2.1398, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992799364824377e-05, |
| "loss": 1.4962, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992728631661201e-05, |
| "loss": 1.1668, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9926575532874847e-05, |
| "loss": 1.3898, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9925861297130724e-05, |
| "loss": 1.828, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992514360947855e-05, |
| "loss": 1.4453, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9924422470017715e-05, |
| "loss": 1.774, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992369787884809e-05, |
| "loss": 2.0532, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992296983607002e-05, |
| "loss": 1.881, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992223834178433e-05, |
| "loss": 1.7959, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992150339609232e-05, |
| "loss": 2.1218, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992076499909578e-05, |
| "loss": 1.9065, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.992002315089695e-05, |
| "loss": 1.783, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9919277851598575e-05, |
| "loss": 1.5766, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.991852910130388e-05, |
| "loss": 1.2045, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9917776900116556e-05, |
| "loss": 1.7663, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.991702124814075e-05, |
| "loss": 1.6417, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.991626214548113e-05, |
| "loss": 1.6406, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.991549959224282e-05, |
| "loss": 1.9329, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.991473358853142e-05, |
| "loss": 2.0136, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9913964134453014e-05, |
| "loss": 1.4504, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9913191230114156e-05, |
| "loss": 1.9222, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.991241487562189e-05, |
| "loss": 1.9192, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.991163507108373e-05, |
| "loss": 1.5397, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.991085181660766e-05, |
| "loss": 2.1056, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9910065112302175e-05, |
| "loss": 1.8552, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9909274958276185e-05, |
| "loss": 1.5675, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990848135463915e-05, |
| "loss": 1.7246, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990768430150096e-05, |
| "loss": 1.6873, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9906883798971995e-05, |
| "loss": 2.0968, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9906079847163115e-05, |
| "loss": 1.662, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990527244618566e-05, |
| "loss": 2.1308, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990446159615144e-05, |
| "loss": 1.6716, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9903647297172764e-05, |
| "loss": 1.833, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990282954936237e-05, |
| "loss": 2.0275, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990200835283353e-05, |
| "loss": 1.7828, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990118370769997e-05, |
| "loss": 1.8333, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.990035561407588e-05, |
| "loss": 1.5876, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989952407207594e-05, |
| "loss": 1.6541, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989868908181532e-05, |
| "loss": 1.9533, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989785064340965e-05, |
| "loss": 2.1998, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989700875697503e-05, |
| "loss": 1.2615, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9896163422628076e-05, |
| "loss": 2.2554, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9895314640485835e-05, |
| "loss": 1.6735, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9894462410665856e-05, |
| "loss": 1.6169, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989360673328617e-05, |
| "loss": 0.9541, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989274760846527e-05, |
| "loss": 1.4256, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989188503632213e-05, |
| "loss": 1.576, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.989101901697621e-05, |
| "loss": 1.315, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9890149550547454e-05, |
| "loss": 1.6165, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.988927663715626e-05, |
| "loss": 1.7303, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9888400276923505e-05, |
| "loss": 1.389, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9887520469970574e-05, |
| "loss": 1.6816, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9886637216419295e-05, |
| "loss": 1.7488, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.988575051639199e-05, |
| "loss": 1.3843, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9884860370011453e-05, |
| "loss": 2.1209, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.988396677740097e-05, |
| "loss": 1.7382, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9883069738684286e-05, |
| "loss": 1.3885, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.988216925398562e-05, |
| "loss": 1.8881, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.988126532342968e-05, |
| "loss": 2.1026, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9880357947141664e-05, |
| "loss": 1.7111, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9879447125247215e-05, |
| "loss": 1.3994, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9878532857872476e-05, |
| "loss": 1.9436, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9877615145144055e-05, |
| "loss": 1.2814, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9876693987189054e-05, |
| "loss": 1.5255, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.987576938413504e-05, |
| "loss": 1.7781, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9874841336110044e-05, |
| "loss": 1.834, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.987390984324261e-05, |
| "loss": 1.0663, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9872974905661726e-05, |
| "loss": 1.8486, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9872036523496866e-05, |
| "loss": 1.4602, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9871094696877995e-05, |
| "loss": 2.1106, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.987014942593553e-05, |
| "loss": 1.1824, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986920071080039e-05, |
| "loss": 1.5234, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9868248551603945e-05, |
| "loss": 1.2072, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986729294847807e-05, |
| "loss": 1.961, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986633390155511e-05, |
| "loss": 0.9717, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986537141096786e-05, |
| "loss": 1.7839, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986440547684963e-05, |
| "loss": 2.0075, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986343609933418e-05, |
| "loss": 1.4374, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986246327855576e-05, |
| "loss": 1.3924, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986148701464909e-05, |
| "loss": 1.5884, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.986050730774937e-05, |
| "loss": 1.6415, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985952415799228e-05, |
| "loss": 1.8095, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9858537565513976e-05, |
| "loss": 1.9536, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985754753045107e-05, |
| "loss": 1.1484, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9856554052940705e-05, |
| "loss": 1.6013, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9855557133120436e-05, |
| "loss": 1.5254, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985455677112832e-05, |
| "loss": 1.9474, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985355296710291e-05, |
| "loss": 1.816, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985254572118321e-05, |
| "loss": 1.451, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.985153503350872e-05, |
| "loss": 1.353, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.98505209042194e-05, |
| "loss": 1.6463, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.98495033334557e-05, |
| "loss": 1.8436, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984848232135853e-05, |
| "loss": 1.5973, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984745786806929e-05, |
| "loss": 1.6524, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984642997372987e-05, |
| "loss": 1.499, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984539863848259e-05, |
| "loss": 1.8387, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.98443638624703e-05, |
| "loss": 2.1306, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.98433256458363e-05, |
| "loss": 1.9113, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984228398872436e-05, |
| "loss": 1.7941, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984123889127874e-05, |
| "loss": 1.957, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.984019035364418e-05, |
| "loss": 1.5927, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.983913837596588e-05, |
| "loss": 1.6275, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.983808295838953e-05, |
| "loss": 1.5985, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.983702410106128e-05, |
| "loss": 1.5638, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.983596180412778e-05, |
| "loss": 1.431, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.983489606773615e-05, |
| "loss": 1.6035, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9833826892033966e-05, |
| "loss": 2.0482, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.98327542771693e-05, |
| "loss": 1.7167, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9831678223290705e-05, |
| "loss": 1.2944, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9830598730547185e-05, |
| "loss": 1.6265, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.982951579908824e-05, |
| "loss": 1.6205, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.982842942906386e-05, |
| "loss": 2.1644, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9827339620624466e-05, |
| "loss": 1.4672, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9826246373920994e-05, |
| "loss": 1.6029, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9825149689104846e-05, |
| "loss": 1.5438, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.98240495663279e-05, |
| "loss": 1.9061, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9822946005742497e-05, |
| "loss": 1.8342, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9821839007501484e-05, |
| "loss": 1.8016, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9820728571758155e-05, |
| "loss": 1.6384, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9819614698666295e-05, |
| "loss": 1.4692, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9818497388380154e-05, |
| "loss": 1.4886, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9817376641054466e-05, |
| "loss": 1.4903, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.981625245684445e-05, |
| "loss": 2.093, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.981512483590578e-05, |
| "loss": 1.9255, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.981399377839463e-05, |
| "loss": 1.6346, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.981285928446762e-05, |
| "loss": 1.6704, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.981172135428188e-05, |
| "loss": 0.981, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9810579987994974e-05, |
| "loss": 1.7046, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9809435185765e-05, |
| "loss": 1.6879, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980828694775046e-05, |
| "loss": 2.1196, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980713527411041e-05, |
| "loss": 1.6099, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9805980165004304e-05, |
| "loss": 1.6977, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980482162059213e-05, |
| "loss": 1.8916, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980365964103434e-05, |
| "loss": 2.0914, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980249422649183e-05, |
| "loss": 1.268, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.980132537712602e-05, |
| "loss": 1.7568, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9800153093098756e-05, |
| "loss": 1.3604, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9798977374572395e-05, |
| "loss": 1.5185, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.979779822170977e-05, |
| "loss": 1.8898, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9796615634674155e-05, |
| "loss": 2.0664, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.979542961362934e-05, |
| "loss": 1.4639, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9794240158739566e-05, |
| "loss": 1.2679, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9793047270169566e-05, |
| "loss": 1.6697, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.979185094808453e-05, |
| "loss": 1.3827, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.979065119265013e-05, |
| "loss": 1.471, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.9789448004032533e-05, |
| "loss": 2.0245, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.978824138239835e-05, |
| "loss": 1.6898, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.978703132791469e-05, |
| "loss": 1.8281, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.978581784074913e-05, |
| "loss": 1.8089, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.978460092106971e-05, |
| "loss": 1.6343, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9783380569044974e-05, |
| "loss": 2.2785, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9782156784843916e-05, |
| "loss": 1.9567, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9780929568636015e-05, |
| "loss": 1.3461, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.977969892059123e-05, |
| "loss": 1.4954, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.977846484087998e-05, |
| "loss": 1.305, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9777227329673174e-05, |
| "loss": 1.8285, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.977598638714219e-05, |
| "loss": 1.4243, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.97747420134589e-05, |
| "loss": 1.7667, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9773494208795604e-05, |
| "loss": 1.9451, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9772242973325125e-05, |
| "loss": 1.5445, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9770988307220736e-05, |
| "loss": 1.6328, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976973021065619e-05, |
| "loss": 1.6148, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976846868380572e-05, |
| "loss": 1.3625, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976720372684404e-05, |
| "loss": 1.4694, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9765935339946326e-05, |
| "loss": 1.5375, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976466352328822e-05, |
| "loss": 1.805, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976338827704586e-05, |
| "loss": 1.8427, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976210960139586e-05, |
| "loss": 1.9649, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.976082749651529e-05, |
| "loss": 1.4552, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9759541962581715e-05, |
| "loss": 1.7111, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.975825299977315e-05, |
| "loss": 2.0492, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9756960608268104e-05, |
| "loss": 2.4812, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.975566478824556e-05, |
| "loss": 1.3533, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.975436553988498e-05, |
| "loss": 1.7609, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9753062863366276e-05, |
| "loss": 1.3683, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.975175675886986e-05, |
| "loss": 1.5215, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9750447226576617e-05, |
| "loss": 1.2101, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9749134266667894e-05, |
| "loss": 1.5135, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9747817879325514e-05, |
| "loss": 1.9498, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9746498064731786e-05, |
| "loss": 1.9672, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9745174823069486e-05, |
| "loss": 2.1517, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9743848154521863e-05, |
| "loss": 1.411, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.974251805927266e-05, |
| "loss": 1.3972, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.974118453750605e-05, |
| "loss": 1.6883, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.973984758940672e-05, |
| "loss": 1.4422, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.973850721515983e-05, |
| "loss": 1.5046, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.973716341495099e-05, |
| "loss": 1.9012, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.973581618896631e-05, |
| "loss": 1.6414, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.973446553739236e-05, |
| "loss": 1.4029, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.973311146041619e-05, |
| "loss": 1.3923, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9731753958225316e-05, |
| "loss": 0.9104, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.973039303100773e-05, |
| "loss": 1.4314, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972902867895191e-05, |
| "loss": 1.5725, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972766090224681e-05, |
| "loss": 1.8915, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972628970108183e-05, |
| "loss": 2.0525, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972491507564688e-05, |
| "loss": 1.4409, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9723537026132315e-05, |
| "loss": 1.4878, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972215555272899e-05, |
| "loss": 1.6305, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.972077065562821e-05, |
| "loss": 1.6004, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.971938233502178e-05, |
| "loss": 1.6837, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.971799059110195e-05, |
| "loss": 1.6939, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.971659542406145e-05, |
| "loss": 1.6326, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.971519683409352e-05, |
| "loss": 2.0437, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9713794821391825e-05, |
| "loss": 1.8919, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9712389386150535e-05, |
| "loss": 1.4444, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.971098052856428e-05, |
| "loss": 1.7376, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.970956824882816e-05, |
| "loss": 1.2145, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.970815254713779e-05, |
| "loss": 1.7657, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.970673342368919e-05, |
| "loss": 1.4632, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.97053108786789e-05, |
| "loss": 2.0926, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.970388491230393e-05, |
| "loss": 1.6105, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9702455524761764e-05, |
| "loss": 0.8295, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9701022716250346e-05, |
| "loss": 1.5312, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9699586486968094e-05, |
| "loss": 1.2171, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.969814683711391e-05, |
| "loss": 1.6475, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9696703766887174e-05, |
| "loss": 1.3856, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9695257276487736e-05, |
| "loss": 1.413, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9693807366115905e-05, |
| "loss": 1.2724, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.969235403597248e-05, |
| "loss": 2.0263, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.969089728625873e-05, |
| "loss": 1.6963, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968943711717638e-05, |
| "loss": 1.6354, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968797352892768e-05, |
| "loss": 1.7197, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968650652171528e-05, |
| "loss": 1.7091, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9685036095742365e-05, |
| "loss": 2.0653, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968356225121256e-05, |
| "loss": 1.8299, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968208498832997e-05, |
| "loss": 2.0181, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.968060430729918e-05, |
| "loss": 1.7343, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.967912020832526e-05, |
| "loss": 1.9446, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.967763269161372e-05, |
| "loss": 1.4359, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.967614175737056e-05, |
| "loss": 2.211, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.967464740580227e-05, |
| "loss": 1.5243, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.967314963711579e-05, |
| "loss": 1.0646, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.967164845151855e-05, |
| "loss": 1.9905, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.967014384921842e-05, |
| "loss": 1.9194, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.96686358304238e-05, |
| "loss": 1.5094, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.966712439534351e-05, |
| "loss": 1.8683, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9665609544186867e-05, |
| "loss": 1.422, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.966409127716367e-05, |
| "loss": 1.8794, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.966256959448416e-05, |
| "loss": 1.8767, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.966104449635909e-05, |
| "loss": 1.4035, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.965951598299965e-05, |
| "loss": 1.9975, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9657984054617526e-05, |
| "loss": 2.1903, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9656448711424876e-05, |
| "loss": 1.2803, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9654909953634316e-05, |
| "loss": 1.4093, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.965336778145895e-05, |
| "loss": 1.5228, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.965182219511234e-05, |
| "loss": 1.8209, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9650273194808546e-05, |
| "loss": 1.7129, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.964872078076207e-05, |
| "loss": 1.9, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.96471649531879e-05, |
| "loss": 1.5979, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.964560571230151e-05, |
| "loss": 1.6498, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.964404305831883e-05, |
| "loss": 2.0043, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.964247699145626e-05, |
| "loss": 1.7263, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.964090751193069e-05, |
| "loss": 2.1116, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9639334619959464e-05, |
| "loss": 2.2474, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.963775831576041e-05, |
| "loss": 1.6965, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.963617859955183e-05, |
| "loss": 1.7008, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.963459547155249e-05, |
| "loss": 1.7393, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.963300893198164e-05, |
| "loss": 1.9297, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9631418981058974e-05, |
| "loss": 1.6991, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9629825619004704e-05, |
| "loss": 1.2928, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.962822884603948e-05, |
| "loss": 1.7254, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9626628662384434e-05, |
| "loss": 1.9312, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.962502506826117e-05, |
| "loss": 1.5214, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.962341806389176e-05, |
| "loss": 1.3667, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9621807649498764e-05, |
| "loss": 1.7049, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.962019382530521e-05, |
| "loss": 1.7479, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961857659153456e-05, |
| "loss": 1.5101, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961695594841082e-05, |
| "loss": 1.3799, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961533189615839e-05, |
| "loss": 2.4575, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961370443500221e-05, |
| "loss": 1.5559, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961207356516765e-05, |
| "loss": 1.5502, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.961043928688056e-05, |
| "loss": 1.3477, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.960880160036728e-05, |
| "loss": 1.7157, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.960716050585459e-05, |
| "loss": 1.4801, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.960551600356977e-05, |
| "loss": 1.3484, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.960386809374057e-05, |
| "loss": 1.8751, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.960221677659519e-05, |
| "loss": 1.6072, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9600562052362333e-05, |
| "loss": 2.4259, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9598903921271135e-05, |
| "loss": 1.9676, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.959724238355123e-05, |
| "loss": 2.193, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.959557743943274e-05, |
| "loss": 1.8709, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9593909089146224e-05, |
| "loss": 1.5639, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.959223733292272e-05, |
| "loss": 1.5942, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9590562170993755e-05, |
| "loss": 2.474, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958888360359131e-05, |
| "loss": 1.8667, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958720163094786e-05, |
| "loss": 1.9014, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958551625329631e-05, |
| "loss": 1.3199, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958382747087008e-05, |
| "loss": 1.4193, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958213528390305e-05, |
| "loss": 1.6693, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.958043969262955e-05, |
| "loss": 1.3332, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.957874069728441e-05, |
| "loss": 1.3463, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.957703829810292e-05, |
| "loss": 1.3705, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9575332495320826e-05, |
| "loss": 2.1182, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.957362328917437e-05, |
| "loss": 1.7346, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9571910679900255e-05, |
| "loss": 1.3646, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.957019466773565e-05, |
| "loss": 1.9329, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.95684752529182e-05, |
| "loss": 1.8554, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9566752435686036e-05, |
| "loss": 1.9678, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.956502621627773e-05, |
| "loss": 1.9655, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.956329659493234e-05, |
| "loss": 1.5136, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.95615635718894e-05, |
| "loss": 1.5812, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.955982714738892e-05, |
| "loss": 2.4143, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9558087321671374e-05, |
| "loss": 2.0219, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.955634409497768e-05, |
| "loss": 1.3464, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.955459746754928e-05, |
| "loss": 2.011, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.955284743962804e-05, |
| "loss": 1.1818, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9551094011456335e-05, |
| "loss": 1.9715, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.954933718327697e-05, |
| "loss": 2.2617, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.954757695533326e-05, |
| "loss": 2.0038, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.954581332786896e-05, |
| "loss": 1.2001, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.954404630112833e-05, |
| "loss": 1.1356, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.954227587535606e-05, |
| "loss": 1.6941, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9540502050797335e-05, |
| "loss": 1.3234, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9538724827697814e-05, |
| "loss": 2.1369, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.953694420630361e-05, |
| "loss": 2.0145, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.953516018686133e-05, |
| "loss": 1.5505, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.953337276961803e-05, |
| "loss": 1.5479, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.953158195482124e-05, |
| "loss": 2.4011, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.952978774271897e-05, |
| "loss": 1.5701, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9527990133559684e-05, |
| "loss": 1.576, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.952618912759235e-05, |
| "loss": 1.7477, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9524384725066355e-05, |
| "loss": 1.9253, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.952257692623161e-05, |
| "loss": 1.5152, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.952076573133846e-05, |
| "loss": 1.5489, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.951895114063773e-05, |
| "loss": 1.792, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9517133154380724e-05, |
| "loss": 1.4957, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9515311772819214e-05, |
| "loss": 1.9255, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.951348699620542e-05, |
| "loss": 1.5982, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9511658824792065e-05, |
| "loss": 1.4355, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.950982725883232e-05, |
| "loss": 1.4889, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9507992298579845e-05, |
| "loss": 1.4814, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.950615394428874e-05, |
| "loss": 1.7735, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9504312196213596e-05, |
| "loss": 1.5049, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.950246705460949e-05, |
| "loss": 1.516, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.950061851973193e-05, |
| "loss": 1.805, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.949876659183692e-05, |
| "loss": 1.3621, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.949691127118093e-05, |
| "loss": 1.8475, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9495052558020896e-05, |
| "loss": 2.3199, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.949319045261423e-05, |
| "loss": 1.3384, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.949132495521881e-05, |
| "loss": 1.4563, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.948945606609297e-05, |
| "loss": 1.6799, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.948758378549554e-05, |
| "loss": 1.7253, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.94857081136858e-05, |
| "loss": 1.0717, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9483829050923514e-05, |
| "loss": 1.8967, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9481946597468896e-05, |
| "loss": 1.4919, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.948006075358266e-05, |
| "loss": 1.4128, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.947817151952595e-05, |
| "loss": 0.9676, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9476278895560406e-05, |
| "loss": 2.0732, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9474382881948146e-05, |
| "loss": 1.3808, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.947248347895172e-05, |
| "loss": 1.4687, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.947058068683419e-05, |
| "loss": 1.4168, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.946867450585906e-05, |
| "loss": 1.352, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.946676493629031e-05, |
| "loss": 1.8284, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9464851978392396e-05, |
| "loss": 1.2706, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.946293563243023e-05, |
| "loss": 1.4926, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.946101589866921e-05, |
| "loss": 1.3684, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.945909277737519e-05, |
| "loss": 1.4088, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.945716626881449e-05, |
| "loss": 1.5532, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9455236373253924e-05, |
| "loss": 1.7944, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9453303090960745e-05, |
| "loss": 1.4292, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9451366422202684e-05, |
| "loss": 2.0021, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.944942636724795e-05, |
| "loss": 1.7356, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.944748292636522e-05, |
| "loss": 1.6986, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.944553609982363e-05, |
| "loss": 2.0781, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.944358588789279e-05, |
| "loss": 2.2571, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.944163229084278e-05, |
| "loss": 1.5924, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.943967530894415e-05, |
| "loss": 2.0977, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.943771494246791e-05, |
| "loss": 1.8041, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.943575119168555e-05, |
| "loss": 1.6522, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9433784056869023e-05, |
| "loss": 1.4012, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.943181353829076e-05, |
| "loss": 1.3196, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9429839636223644e-05, |
| "loss": 1.7751, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.942786235094103e-05, |
| "loss": 1.3212, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9425881682716747e-05, |
| "loss": 1.7491, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.94238976318251e-05, |
| "loss": 1.7227, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.942191019854085e-05, |
| "loss": 1.4114, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.941991938313924e-05, |
| "loss": 2.0666, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.941792518589596e-05, |
| "loss": 1.7138, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.941592760708718e-05, |
| "loss": 1.7506, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9413926646989545e-05, |
| "loss": 1.8036, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.941192230588015e-05, |
| "loss": 1.357, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.94099145840366e-05, |
| "loss": 1.5612, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9407903481736904e-05, |
| "loss": 1.1086, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.940588899925959e-05, |
| "loss": 1.217, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.940387113688363e-05, |
| "loss": 1.6993, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9401849894888485e-05, |
| "loss": 1.684, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.939982527355407e-05, |
| "loss": 1.8285, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.939779727316075e-05, |
| "loss": 1.28, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.939576589398939e-05, |
| "loss": 1.6247, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9393731136321306e-05, |
| "loss": 1.7047, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9391693000438286e-05, |
| "loss": 1.67, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.93896514866226e-05, |
| "loss": 2.0157, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9387606595156944e-05, |
| "loss": 1.5195, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9385558326324534e-05, |
| "loss": 1.9139, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9383506680409006e-05, |
| "loss": 1.5393, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.938145165769451e-05, |
| "loss": 2.1366, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.9379393258465623e-05, |
| "loss": 1.835, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.93773314830074e-05, |
| "loss": 1.4716, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.93752663316054e-05, |
| "loss": 1.8467, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.937319780454559e-05, |
| "loss": 1.5434, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.937112590211445e-05, |
| "loss": 1.5179, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.936905062459891e-05, |
| "loss": 1.5824, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9366971972286356e-05, |
| "loss": 1.8289, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.936488994546467e-05, |
| "loss": 1.7236, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.936280454442218e-05, |
| "loss": 2.3392, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.936071576944769e-05, |
| "loss": 1.6376, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9358623620830456e-05, |
| "loss": 1.7252, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9356528098860235e-05, |
| "loss": 1.4184, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.93544292038272e-05, |
| "loss": 1.4501, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9352326936022055e-05, |
| "loss": 1.4233, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.935022129573591e-05, |
| "loss": 1.6631, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9348112283260376e-05, |
| "loss": 1.155, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.934599989888753e-05, |
| "loss": 1.2512, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.934388414290991e-05, |
| "loss": 1.3304, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9341765015620514e-05, |
| "loss": 1.5858, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.933964251731281e-05, |
| "loss": 1.3061, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9337516648280756e-05, |
| "loss": 1.782, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.933538740881874e-05, |
| "loss": 1.8033, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.933325479922163e-05, |
| "loss": 1.8176, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9331118819784773e-05, |
| "loss": 1.8735, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.932897947080398e-05, |
| "loss": 1.59, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9326836752575514e-05, |
| "loss": 2.2281, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.932469066539612e-05, |
| "loss": 2.0652, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9322541209563e-05, |
| "loss": 1.9974, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9320388385373825e-05, |
| "loss": 1.5013, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9318232193126737e-05, |
| "loss": 1.5165, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.931607263312032e-05, |
| "loss": 1.4775, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.931390970565368e-05, |
| "loss": 1.8116, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.931174341102634e-05, |
| "loss": 1.731, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9309573749538295e-05, |
| "loss": 1.5851, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9307400721490015e-05, |
| "loss": 1.6169, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.930522432718245e-05, |
| "loss": 1.6315, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.930304456691699e-05, |
| "loss": 1.6771, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.930086144099551e-05, |
| "loss": 1.083, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9298674949720335e-05, |
| "loss": 1.9102, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9296485093394283e-05, |
| "loss": 1.7891, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.92942918723206e-05, |
| "loss": 1.2337, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.929209528680304e-05, |
| "loss": 1.8288, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.928989533714579e-05, |
| "loss": 1.8799, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.92876920236535e-05, |
| "loss": 2.0931, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9285485346631334e-05, |
| "loss": 1.0637, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9283275306384856e-05, |
| "loss": 2.1745, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.928106190322015e-05, |
| "loss": 2.3641, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.927884513744373e-05, |
| "loss": 1.5699, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.92766250093626e-05, |
| "loss": 0.9195, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.927440151928421e-05, |
| "loss": 1.6488, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.927217466751648e-05, |
| "loss": 1.3872, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9269944454367813e-05, |
| "loss": 2.0154, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.926771088014707e-05, |
| "loss": 1.8308, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.926547394516354e-05, |
| "loss": 1.6353, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.926323364972705e-05, |
| "loss": 1.57, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.926098999414782e-05, |
| "loss": 1.9442, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.925874297873659e-05, |
| "loss": 1.8304, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9256492603804526e-05, |
| "loss": 1.399, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.925423886966328e-05, |
| "loss": 1.5874, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.925198177662497e-05, |
| "loss": 1.8739, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.924972132500217e-05, |
| "loss": 1.6697, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.924745751510792e-05, |
| "loss": 1.3031, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9245190347255746e-05, |
| "loss": 1.5218, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.924291982175959e-05, |
| "loss": 2.1374, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.924064593893392e-05, |
| "loss": 1.1152, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.923836869909362e-05, |
| "loss": 1.8031, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9236088102554075e-05, |
| "loss": 2.0702, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.92338041496311e-05, |
| "loss": 1.6122, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9231516840641e-05, |
| "loss": 1.8458, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9229226175900544e-05, |
| "loss": 1.5888, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.922693215572695e-05, |
| "loss": 1.5632, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.922463478043792e-05, |
| "loss": 1.7268, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.922233405035159e-05, |
| "loss": 1.6798, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.922002996578661e-05, |
| "loss": 1.2235, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.921772252706205e-05, |
| "loss": 1.5721, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9215411734497464e-05, |
| "loss": 1.8032, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.921309758841285e-05, |
| "loss": 1.4632, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9210780089128715e-05, |
| "loss": 1.6855, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.920845923696599e-05, |
| "loss": 1.6799, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.920613503224608e-05, |
| "loss": 2.0956, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.920380747529085e-05, |
| "loss": 1.4309, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9201476566422656e-05, |
| "loss": 1.1498, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.919914230596429e-05, |
| "loss": 1.988, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.9196804694239006e-05, |
| "loss": 1.8407, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.919446373157055e-05, |
| "loss": 1.7266, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.91921194182831e-05, |
| "loss": 1.7592, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.918977175470133e-05, |
| "loss": 1.1552, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.918742074115035e-05, |
| "loss": 1.5499, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9185066377955735e-05, |
| "loss": 1.2127, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.918270866544356e-05, |
| "loss": 1.4216, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.918034760394031e-05, |
| "loss": 1.2214, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.917798319377297e-05, |
| "loss": 1.1203, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.917561543526899e-05, |
| "loss": 1.6326, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.917324432875627e-05, |
| "loss": 1.8997, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.917086987456317e-05, |
| "loss": 2.0038, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.916849207301852e-05, |
| "loss": 1.6171, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9166110924451634e-05, |
| "loss": 1.6013, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.916372642919225e-05, |
| "loss": 2.0288, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.916133858757059e-05, |
| "loss": 1.8568, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.915894739991735e-05, |
| "loss": 2.1949, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.915655286656368e-05, |
| "loss": 1.4406, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.915415498784118e-05, |
| "loss": 1.338, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.915175376408193e-05, |
| "loss": 1.5971, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.914934919561848e-05, |
| "loss": 1.8418, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.914694128278381e-05, |
| "loss": 1.2456, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.914453002591139e-05, |
| "loss": 1.3663, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9142115425335154e-05, |
| "loss": 1.226, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.91396974813895e-05, |
| "loss": 1.6677, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.913727619440926e-05, |
| "loss": 2.4861, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.913485156472978e-05, |
| "loss": 1.4411, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9132423592686816e-05, |
| "loss": 1.5114, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.912999227861662e-05, |
| "loss": 1.9969, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.912755762285589e-05, |
| "loss": 1.7201, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.912511962574181e-05, |
| "loss": 1.9378, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.912267828761199e-05, |
| "loss": 1.5512, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9120233608804546e-05, |
| "loss": 1.6215, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9117785589658014e-05, |
| "loss": 1.6503, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.911533423051143e-05, |
| "loss": 1.5132, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9112879531704256e-05, |
| "loss": 2.3205, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.911042149357646e-05, |
| "loss": 1.644, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.910796011646843e-05, |
| "loss": 1.7747, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9105495400721034e-05, |
| "loss": 1.9071, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.910302734667562e-05, |
| "loss": 1.2772, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.910055595467396e-05, |
| "loss": 1.2807, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9098081225058326e-05, |
| "loss": 1.6285, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9095603158171436e-05, |
| "loss": 1.7838, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9093121754356454e-05, |
| "loss": 1.7611, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.909063701395704e-05, |
| "loss": 1.8441, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.908814893731728e-05, |
| "loss": 1.4508, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.908565752478176e-05, |
| "loss": 1.8418, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.90831627766955e-05, |
| "loss": 1.1092, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.908066469340399e-05, |
| "loss": 2.0333, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.907816327525318e-05, |
| "loss": 1.6242, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9075658522589486e-05, |
| "loss": 1.5041, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9073150435759786e-05, |
| "loss": 1.5954, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.907063901511141e-05, |
| "loss": 1.4664, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.906812426099217e-05, |
| "loss": 1.4326, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.90656061737503e-05, |
| "loss": 1.6302, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9063084753734556e-05, |
| "loss": 2.325, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.906056000129411e-05, |
| "loss": 1.2344, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.90580319167786e-05, |
| "loss": 1.5865, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.905550050053813e-05, |
| "loss": 1.4518, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.905296575292329e-05, |
| "loss": 1.8988, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9050427674285085e-05, |
| "loss": 1.625, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9047886264975016e-05, |
| "loss": 1.6413, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9045341525345035e-05, |
| "loss": 1.9323, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.904279345574756e-05, |
| "loss": 1.6475, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9040242056535465e-05, |
| "loss": 1.6182, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.903768732806208e-05, |
| "loss": 1.8794, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9035129270681196e-05, |
| "loss": 2.3609, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.903256788474708e-05, |
| "loss": 1.6079, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9030003170614456e-05, |
| "loss": 1.6279, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9027435128638494e-05, |
| "loss": 1.4993, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.902486375917483e-05, |
| "loss": 1.8698, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.902228906257958e-05, |
| "loss": 1.6502, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.90197110392093e-05, |
| "loss": 1.2136, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9017129689421e-05, |
| "loss": 1.8118, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.901454501357219e-05, |
| "loss": 1.9347, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.9011957012020793e-05, |
| "loss": 1.9713, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.900936568512522e-05, |
| "loss": 2.036, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.900677103324433e-05, |
| "loss": 1.5848, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.900417305673746e-05, |
| "loss": 1.1603, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.900157175596438e-05, |
| "loss": 0.9986, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.8998967131285356e-05, |
| "loss": 1.431, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.899635918306108e-05, |
| "loss": 1.6065, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.899374791165273e-05, |
| "loss": 1.7656, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.899113331742192e-05, |
| "loss": 1.4621, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.898851540073075e-05, |
| "loss": 2.0439, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.898589416194176e-05, |
| "loss": 1.8089, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.898326960141796e-05, |
| "loss": 1.5008, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.898064171952281e-05, |
| "loss": 1.4781, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.897801051662025e-05, |
| "loss": 1.5729, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8975375993074665e-05, |
| "loss": 2.0593, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.89727381492509e-05, |
| "loss": 1.5069, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.897009698551426e-05, |
| "loss": 1.4593, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.896745250223052e-05, |
| "loss": 1.535, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.896480469976589e-05, |
| "loss": 1.4929, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8962153578487055e-05, |
| "loss": 1.2304, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.895949913876119e-05, |
| "loss": 1.9493, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.895684138095588e-05, |
| "loss": 1.4758, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.895418030543919e-05, |
| "loss": 1.8714, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8951515912579645e-05, |
| "loss": 1.3302, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.894884820274623e-05, |
| "loss": 2.0195, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8946177176308396e-05, |
| "loss": 1.5886, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8943502833636026e-05, |
| "loss": 1.7283, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8940825175099504e-05, |
| "loss": 1.3368, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8938144201069635e-05, |
| "loss": 1.2978, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8935459911917704e-05, |
| "loss": 1.4301, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.893277230801546e-05, |
| "loss": 1.726, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.893008138973508e-05, |
| "loss": 1.7124, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.892738715744924e-05, |
| "loss": 1.1582, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.892468961153105e-05, |
| "loss": 1.6847, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.892198875235409e-05, |
| "loss": 1.9052, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.891928458029238e-05, |
| "loss": 1.7829, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8916577095720424e-05, |
| "loss": 1.5794, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.891386629901317e-05, |
| "loss": 2.0447, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.891115219054603e-05, |
| "loss": 2.0359, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.890843477069487e-05, |
| "loss": 2.1139, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.890571403983603e-05, |
| "loss": 1.3969, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.890298999834627e-05, |
| "loss": 1.7877, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8900262646602854e-05, |
| "loss": 1.5858, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.889753198498348e-05, |
| "loss": 1.6977, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8894798013866315e-05, |
| "loss": 1.1872, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8892060733629963e-05, |
| "loss": 2.2305, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.888932014465352e-05, |
| "loss": 1.778, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.888657624731651e-05, |
| "loss": 1.5463, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.888382904199893e-05, |
| "loss": 1.4502, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8881078529081236e-05, |
| "loss": 1.4547, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.887832470894433e-05, |
| "loss": 1.4357, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.887556758196959e-05, |
| "loss": 1.8349, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8872807148538845e-05, |
| "loss": 1.543, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.887004340903436e-05, |
| "loss": 2.1176, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8867276363838896e-05, |
| "loss": 1.6262, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.886450601333564e-05, |
| "loss": 1.5648, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.8861732357908264e-05, |
| "loss": 1.3948, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.885895539794087e-05, |
| "loss": 1.3463, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.885617513381803e-05, |
| "loss": 1.8717, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.885339156592479e-05, |
| "loss": 1.6983, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.885060469464662e-05, |
| "loss": 1.734, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8847814520369475e-05, |
| "loss": 1.7681, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.884502104347976e-05, |
| "loss": 1.2692, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.884222426436433e-05, |
| "loss": 1.4418, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.883942418341051e-05, |
| "loss": 1.1686, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.883662080100606e-05, |
| "loss": 1.7017, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.883381411753922e-05, |
| "loss": 1.4877, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.883100413339869e-05, |
| "loss": 1.5458, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8828190848973595e-05, |
| "loss": 1.5484, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8825374264653565e-05, |
| "loss": 1.834, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.882255438082863e-05, |
| "loss": 2.2466, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8819731197889334e-05, |
| "loss": 1.459, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.881690471622663e-05, |
| "loss": 1.9088, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.881407493623197e-05, |
| "loss": 1.6538, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.881124185829723e-05, |
| "loss": 2.1155, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.880840548281475e-05, |
| "loss": 2.1894, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8805565810177334e-05, |
| "loss": 2.033, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8802722840778246e-05, |
| "loss": 1.9662, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.87998765750112e-05, |
| "loss": 1.6538, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.879702701327035e-05, |
| "loss": 1.259, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.879417415595034e-05, |
| "loss": 1.6262, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.879131800344626e-05, |
| "loss": 1.6802, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8788458556153635e-05, |
| "loss": 1.336, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.878559581446846e-05, |
| "loss": 1.8981, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8782729778787214e-05, |
| "loss": 1.6329, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.877986044950677e-05, |
| "loss": 1.1941, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.877698782702451e-05, |
| "loss": 1.3871, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8774111911738256e-05, |
| "loss": 1.829, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8771232704046283e-05, |
| "loss": 1.4151, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8768350204347324e-05, |
| "loss": 1.7855, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.876546441304056e-05, |
| "loss": 1.3968, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.876257533052565e-05, |
| "loss": 1.8227, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8759682957202685e-05, |
| "loss": 1.4529, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.875678729347223e-05, |
| "loss": 1.1851, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8753888339735274e-05, |
| "loss": 1.2907, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8750986096393315e-05, |
| "loss": 1.7967, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.874808056384825e-05, |
| "loss": 1.4848, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.8745171742502484e-05, |
| "loss": 1.6839, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.874225963275882e-05, |
| "loss": 1.8314, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.873934423502058e-05, |
| "loss": 2.0282, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8736425549691475e-05, |
| "loss": 1.341, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.873350357717573e-05, |
| "loss": 1.4107, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8730578317878e-05, |
| "loss": 1.4731, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.872764977220338e-05, |
| "loss": 1.9332, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.872471794055744e-05, |
| "loss": 1.8369, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.872178282334621e-05, |
| "loss": 1.3453, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.871884442097616e-05, |
| "loss": 1.7878, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.871590273385421e-05, |
| "loss": 1.9997, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.871295776238777e-05, |
| "loss": 1.9064, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8710009506984656e-05, |
| "loss": 1.8554, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8707057968053175e-05, |
| "loss": 2.1312, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8704103146002075e-05, |
| "loss": 1.972, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.870114504124057e-05, |
| "loss": 2.2831, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.86981836541783e-05, |
| "loss": 1.4848, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.869521898522539e-05, |
| "loss": 1.9652, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.86922510347924e-05, |
| "loss": 1.4183, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.868927980329037e-05, |
| "loss": 2.0386, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.868630529113075e-05, |
| "loss": 1.7273, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8683327498725494e-05, |
| "loss": 1.3664, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.868034642648699e-05, |
| "loss": 1.2684, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.867736207482806e-05, |
| "loss": 1.5481, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8674374444162005e-05, |
| "loss": 1.2832, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.867138353490258e-05, |
| "loss": 2.0168, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8668389347463973e-05, |
| "loss": 1.2822, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8665391882260856e-05, |
| "loss": 1.6619, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.866239113970833e-05, |
| "loss": 2.1701, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8659387120221956e-05, |
| "loss": 2.0322, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.865637982421776e-05, |
| "loss": 1.8849, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8653369252112214e-05, |
| "loss": 1.4536, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8650355404322234e-05, |
| "loss": 1.791, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.86473382812652e-05, |
| "loss": 1.9112, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8644317883358956e-05, |
| "loss": 1.5631, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8641294211021774e-05, |
| "loss": 1.4402, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.86382672646724e-05, |
| "loss": 1.6691, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.863523704473002e-05, |
| "loss": 1.4176, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.863220355161429e-05, |
| "loss": 1.6321, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.862916678574531e-05, |
| "loss": 1.6517, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.862612674754362e-05, |
| "loss": 1.6625, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.862308343743024e-05, |
| "loss": 1.2245, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8620036855826624e-05, |
| "loss": 1.1567, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.861698700315468e-05, |
| "loss": 1.4538, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.861393387983677e-05, |
| "loss": 1.5213, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8610877486295726e-05, |
| "loss": 1.8746, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.86078178229548e-05, |
| "loss": 1.8911, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.860475489023773e-05, |
| "loss": 1.3414, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8601688688568695e-05, |
| "loss": 1.3054, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8598619218372315e-05, |
| "loss": 1.9671, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8595546480073675e-05, |
| "loss": 1.9155, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8592470474098304e-05, |
| "loss": 1.7364, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.858939120087219e-05, |
| "loss": 1.8287, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.858630866082179e-05, |
| "loss": 1.2718, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8583222854373964e-05, |
| "loss": 1.9186, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8580133781956086e-05, |
| "loss": 1.441, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.857704144399594e-05, |
| "loss": 1.5738, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.857394584092177e-05, |
| "loss": 1.4612, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8570846973162284e-05, |
| "loss": 1.5269, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.856774484114663e-05, |
| "loss": 1.5464, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.856463944530442e-05, |
| "loss": 1.8616, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.85615307860657e-05, |
| "loss": 1.652, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8558418863860986e-05, |
| "loss": 1.8906, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.855530367912124e-05, |
| "loss": 1.7692, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.855218523227787e-05, |
| "loss": 1.5755, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.854906352376275e-05, |
| "loss": 1.8406, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8545938554008184e-05, |
| "loss": 1.4997, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.854281032344695e-05, |
| "loss": 1.7373, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.853967883251226e-05, |
| "loss": 1.8977, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8536544081637787e-05, |
| "loss": 1.7549, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8533406071257644e-05, |
| "loss": 1.2084, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.853026480180643e-05, |
| "loss": 1.6226, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8527120273719144e-05, |
| "loss": 1.7877, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.852397248743128e-05, |
| "loss": 1.3214, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.852082144337875e-05, |
| "loss": 1.4208, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8517667141997946e-05, |
| "loss": 1.0795, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8514509583725684e-05, |
| "loss": 1.7681, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.851134876899927e-05, |
| "loss": 1.5075, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.85081846982564e-05, |
| "loss": 1.4082, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.850501737193529e-05, |
| "loss": 1.6105, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.850184679047455e-05, |
| "loss": 1.1266, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8498672954313275e-05, |
| "loss": 1.4528, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.849549586389101e-05, |
| "loss": 1.7253, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.849231551964771e-05, |
| "loss": 1.5444, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.8489131922023845e-05, |
| "loss": 2.2302, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.848594507146028e-05, |
| "loss": 1.4719, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8482754968398366e-05, |
| "loss": 1.6143, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8479561613279876e-05, |
| "loss": 2.1498, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.847636500654707e-05, |
| "loss": 1.9538, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.847316514864262e-05, |
| "loss": 2.097, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.846996204000967e-05, |
| "loss": 1.7786, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.846675568109179e-05, |
| "loss": 1.7149, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8463546072333056e-05, |
| "loss": 1.6515, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.846033321417793e-05, |
| "loss": 1.1366, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.845711710707136e-05, |
| "loss": 1.5233, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.845389775145873e-05, |
| "loss": 1.7639, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.845067514778589e-05, |
| "loss": 1.4838, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.844744929649911e-05, |
| "loss": 1.806, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8444220198045154e-05, |
| "loss": 2.4917, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.844098785287119e-05, |
| "loss": 1.6103, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.843775226142486e-05, |
| "loss": 0.8902, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.843451342415426e-05, |
| "loss": 1.7303, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.843127134150791e-05, |
| "loss": 1.5742, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.842802601393482e-05, |
| "loss": 2.2771, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8424777441884405e-05, |
| "loss": 2.0366, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8421525625806555e-05, |
| "loss": 1.2802, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8418270566151606e-05, |
| "loss": 1.2683, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.841501226337035e-05, |
| "loss": 1.4906, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.841175071791401e-05, |
| "loss": 1.508, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.840848593023426e-05, |
| "loss": 1.7541, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.840521790078325e-05, |
| "loss": 1.6975, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.840194663001354e-05, |
| "loss": 1.7784, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.839867211837817e-05, |
| "loss": 1.4971, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.839539436633063e-05, |
| "loss": 2.3331, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.839211337432482e-05, |
| "loss": 2.0467, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.838882914281513e-05, |
| "loss": 1.4363, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.838554167225637e-05, |
| "loss": 1.3551, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.838225096310384e-05, |
| "loss": 1.8533, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8378957015813225e-05, |
| "loss": 1.4613, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.837565983084072e-05, |
| "loss": 1.5865, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.837235940864293e-05, |
| "loss": 1.4011, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8369055749676926e-05, |
| "loss": 1.3844, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8365748854400224e-05, |
| "loss": 1.8292, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8362438723270775e-05, |
| "loss": 1.777, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8359125356747005e-05, |
| "loss": 1.8838, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.835580875528776e-05, |
| "loss": 1.2637, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.8352488919352345e-05, |
| "loss": 1.7713, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.834916584940052e-05, |
| "loss": 1.477, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.834583954589249e-05, |
| "loss": 2.2866, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8342510009288896e-05, |
| "loss": 1.8303, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8339177240050835e-05, |
| "loss": 1.6103, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.833584123863987e-05, |
| "loss": 1.3783, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8332502005517976e-05, |
| "loss": 1.7061, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.83291595411476e-05, |
| "loss": 1.7425, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.832581384599163e-05, |
| "loss": 1.4885, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.83224649205134e-05, |
| "loss": 1.697, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.831911276517669e-05, |
| "loss": 2.0936, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8315757380445734e-05, |
| "loss": 1.7644, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8312398766785205e-05, |
| "loss": 1.2963, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.830903692466024e-05, |
| "loss": 1.9974, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.830567185453638e-05, |
| "loss": 2.0313, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.830230355687968e-05, |
| "loss": 1.603, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.829893203215659e-05, |
| "loss": 1.5684, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.829555728083402e-05, |
| "loss": 1.6818, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.829217930337933e-05, |
| "loss": 1.8331, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.828879810026033e-05, |
| "loss": 1.4052, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8285413671945266e-05, |
| "loss": 2.1854, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.828202601890285e-05, |
| "loss": 1.8066, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8278635141602216e-05, |
| "loss": 1.9945, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8275241040512956e-05, |
| "loss": 1.8455, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.827184371610511e-05, |
| "loss": 1.7102, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.826844316884917e-05, |
| "loss": 1.2846, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.826503939921606e-05, |
| "loss": 0.9758, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8261632407677174e-05, |
| "loss": 1.5587, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8258222194704304e-05, |
| "loss": 1.1849, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8254808760769756e-05, |
| "loss": 1.6938, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.825139210634623e-05, |
| "loss": 1.2542, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.824797223190688e-05, |
| "loss": 0.9688, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.824454913792532e-05, |
| "loss": 1.7337, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.824112282487562e-05, |
| "loss": 2.0502, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.823769329323226e-05, |
| "loss": 1.8759, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.823426054347019e-05, |
| "loss": 1.444, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.82308245760648e-05, |
| "loss": 1.5999, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.822738539149194e-05, |
| "loss": 1.3271, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.822394299022788e-05, |
| "loss": 1.4029, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.822049737274934e-05, |
| "loss": 1.7134, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.821704853953351e-05, |
| "loss": 1.9922, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.821359649105801e-05, |
| "loss": 1.5071, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8210141227800885e-05, |
| "loss": 1.8292, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.820668275024066e-05, |
| "loss": 1.3623, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8203221058856285e-05, |
| "loss": 1.5124, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8199756154127164e-05, |
| "loss": 1.8369, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.819628803653313e-05, |
| "loss": 1.8629, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.819281670655448e-05, |
| "loss": 1.5195, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8189342164671944e-05, |
| "loss": 2.5293, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.818586441136671e-05, |
| "loss": 1.8663, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.818238344712038e-05, |
| "loss": 1.9068, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.817889927241506e-05, |
| "loss": 2.135, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.817541188773322e-05, |
| "loss": 1.6423, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.817192129355785e-05, |
| "loss": 1.3553, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8168427490372335e-05, |
| "loss": 1.2737, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.816493047866053e-05, |
| "loss": 2.1974, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.816143025890672e-05, |
| "loss": 1.5832, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8157926831595636e-05, |
| "loss": 2.2052, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.815442019721247e-05, |
| "loss": 1.6406, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8150910356242834e-05, |
| "loss": 1.8293, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8147397309172803e-05, |
| "loss": 1.3065, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8143881056488884e-05, |
| "loss": 1.7817, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.814036159867803e-05, |
| "loss": 1.3977, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8136838936227645e-05, |
| "loss": 1.4343, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.813331306962558e-05, |
| "loss": 1.4871, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.81297839993601e-05, |
| "loss": 1.6122, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.8126251725919954e-05, |
| "loss": 1.2465, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.812271624979431e-05, |
| "loss": 1.4962, |
| "step": 1058 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 8452, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 529, |
| "total_flos": 6.01330610601984e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|