| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1251700680272109, | |
| "eval_steps": 500, | |
| "global_step": 529, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 2.2147, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 2.4802, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.0475, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6983, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.6212, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3e-05, | |
| "loss": 2.0843, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.8442, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 1.3359, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.3761, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6338, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999999826891235e-05, | |
| "loss": 1.7487, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999999307564964e-05, | |
| "loss": 2.0834, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999984420212596e-05, | |
| "loss": 1.5794, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999997230260242e-05, | |
| "loss": 1.7222, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999956722820765e-05, | |
| "loss": 1.4002, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999993768086981e-05, | |
| "loss": 1.7314, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999991517675219e-05, | |
| "loss": 1.7053, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999988921047102e-05, | |
| "loss": 1.9546, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99998597820299e-05, | |
| "loss": 1.6122, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999826891432896e-05, | |
| "loss": 1.7275, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999979053868456e-05, | |
| "loss": 1.8279, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999750723789936e-05, | |
| "loss": 1.6764, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999707446754546e-05, | |
| "loss": 1.7501, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999966070758437e-05, | |
| "loss": 1.6349, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999961050628588e-05, | |
| "loss": 1.6796, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999556842866034e-05, | |
| "loss": 1.6249, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999949971733225e-05, | |
| "loss": 2.0866, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999943912969247e-05, | |
| "loss": 1.6658, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999937507995506e-05, | |
| "loss": 1.7794, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999930756812889e-05, | |
| "loss": 1.743, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999923659422332e-05, | |
| "loss": 2.1359, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999916215824817e-05, | |
| "loss": 1.7393, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999908426021375e-05, | |
| "loss": 1.6387, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999900290013085e-05, | |
| "loss": 1.974, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999891807801075e-05, | |
| "loss": 1.7787, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9998829793865176e-05, | |
| "loss": 2.1738, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999873804770636e-05, | |
| "loss": 1.73, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999864283954701e-05, | |
| "loss": 1.9363, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999854416940032e-05, | |
| "loss": 1.248, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999844203727993e-05, | |
| "loss": 2.0461, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999833644320002e-05, | |
| "loss": 2.1294, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999822738717518e-05, | |
| "loss": 1.3963, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9998114869220525e-05, | |
| "loss": 1.7942, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999799888935164e-05, | |
| "loss": 1.4804, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99978794475846e-05, | |
| "loss": 1.4482, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999775654393591e-05, | |
| "loss": 1.0604, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9997630178422624e-05, | |
| "loss": 1.6902, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999750035106222e-05, | |
| "loss": 1.4982, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9997367061872694e-05, | |
| "loss": 2.0546, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99972303108725e-05, | |
| "loss": 1.581, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999709009808057e-05, | |
| "loss": 1.4683, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996946423516326e-05, | |
| "loss": 1.9496, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996799287199666e-05, | |
| "loss": 1.3316, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996648689150966e-05, | |
| "loss": 2.0274, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996494629391076e-05, | |
| "loss": 2.1621, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999633710794135e-05, | |
| "loss": 1.6314, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999617612482358e-05, | |
| "loss": 1.8074, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996011680060064e-05, | |
| "loss": 1.8467, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999584377367359e-05, | |
| "loss": 1.2585, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99956724056874e-05, | |
| "loss": 1.8106, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9995497576125225e-05, | |
| "loss": 1.7955, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999531928501128e-05, | |
| "loss": 1.9608, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999513753237025e-05, | |
| "loss": 2.1124, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999495231822732e-05, | |
| "loss": 1.8793, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999476364260812e-05, | |
| "loss": 1.6794, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.99945715055388e-05, | |
| "loss": 1.7991, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999437590704595e-05, | |
| "loss": 2.0194, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999417684715668e-05, | |
| "loss": 1.4938, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999397432589853e-05, | |
| "loss": 1.9159, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999376834329956e-05, | |
| "loss": 1.3969, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9993558899388305e-05, | |
| "loss": 1.727, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999334599419375e-05, | |
| "loss": 1.8063, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.99931296277454e-05, | |
| "loss": 1.9437, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999290980007321e-05, | |
| "loss": 1.5978, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9992686511207614e-05, | |
| "loss": 1.9653, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9992459761179545e-05, | |
| "loss": 2.2229, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999222955002041e-05, | |
| "loss": 1.7991, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991995877762074e-05, | |
| "loss": 2.0293, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999175874443692e-05, | |
| "loss": 1.5131, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991518150077765e-05, | |
| "loss": 1.41, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999127409471794e-05, | |
| "loss": 2.0329, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991026578391245e-05, | |
| "loss": 1.7566, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999077560113196e-05, | |
| "loss": 2.1238, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9990521162974824e-05, | |
| "loss": 1.4365, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999026326395509e-05, | |
| "loss": 1.7559, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999000190410848e-05, | |
| "loss": 2.3886, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998973708347116e-05, | |
| "loss": 1.5783, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998946880207983e-05, | |
| "loss": 2.0597, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998919705997164e-05, | |
| "loss": 1.684, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998892185718422e-05, | |
| "loss": 1.5005, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998864319375568e-05, | |
| "loss": 1.8982, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998836106972461e-05, | |
| "loss": 2.4997, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998807548513008e-05, | |
| "loss": 1.9038, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998778644001165e-05, | |
| "loss": 1.4326, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998749393440933e-05, | |
| "loss": 1.9809, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998719796836366e-05, | |
| "loss": 1.727, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9986898541915595e-05, | |
| "loss": 1.5198, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998659565510662e-05, | |
| "loss": 1.7004, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998628930797866e-05, | |
| "loss": 1.7584, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9985979500574166e-05, | |
| "loss": 1.7247, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998566623293603e-05, | |
| "loss": 1.7967, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998534950510764e-05, | |
| "loss": 1.282, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9985029317132845e-05, | |
| "loss": 1.7519, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998470566905601e-05, | |
| "loss": 1.8924, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9984378560921937e-05, | |
| "loss": 1.411, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9984047992775926e-05, | |
| "loss": 1.7251, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9983713964663776e-05, | |
| "loss": 1.6784, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998337647663173e-05, | |
| "loss": 1.9045, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998303552872652e-05, | |
| "loss": 1.1327, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998269112099538e-05, | |
| "loss": 1.6382, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998234325348599e-05, | |
| "loss": 1.8228, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998199192624654e-05, | |
| "loss": 1.9773, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998163713932567e-05, | |
| "loss": 1.6035, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998127889277252e-05, | |
| "loss": 2.3095, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998091718663671e-05, | |
| "loss": 1.4727, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998055202096832e-05, | |
| "loss": 1.8166, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998018339581792e-05, | |
| "loss": 1.2842, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997981131123657e-05, | |
| "loss": 1.8485, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997943576727579e-05, | |
| "loss": 1.1914, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997905676398759e-05, | |
| "loss": 1.9409, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997867430142446e-05, | |
| "loss": 2.3757, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997828837963937e-05, | |
| "loss": 1.6337, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997789899868575e-05, | |
| "loss": 1.5789, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9977506158617535e-05, | |
| "loss": 1.61, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9977109859489133e-05, | |
| "loss": 1.3976, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997671010135542e-05, | |
| "loss": 1.3944, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997630688427176e-05, | |
| "loss": 1.5922, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9975900208293984e-05, | |
| "loss": 2.0075, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997549007347842e-05, | |
| "loss": 1.4363, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9975076479881864e-05, | |
| "loss": 1.9972, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99746594275616e-05, | |
| "loss": 1.8052, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997423891657538e-05, | |
| "loss": 1.9088, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997381494698143e-05, | |
| "loss": 1.7322, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9973387518838474e-05, | |
| "loss": 1.6391, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9972956632205704e-05, | |
| "loss": 1.8346, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997252228714279e-05, | |
| "loss": 1.9651, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997208448370988e-05, | |
| "loss": 1.5544, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997164322196762e-05, | |
| "loss": 1.4443, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99711985019771e-05, | |
| "loss": 2.1679, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997075032379992e-05, | |
| "loss": 1.6557, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997029868749813e-05, | |
| "loss": 1.5759, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99698435931343e-05, | |
| "loss": 1.8057, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9969385040771445e-05, | |
| "loss": 1.1004, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996892303047306e-05, | |
| "loss": 1.6915, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996845756230314e-05, | |
| "loss": 1.7905, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9967988636326136e-05, | |
| "loss": 2.0294, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996751625260699e-05, | |
| "loss": 1.9933, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996704041121113e-05, | |
| "loss": 1.5048, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996656111220443e-05, | |
| "loss": 2.0881, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996607835565331e-05, | |
| "loss": 1.3425, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9965592141624586e-05, | |
| "loss": 1.3072, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9965102470185596e-05, | |
| "loss": 2.5566, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9964609341404175e-05, | |
| "loss": 1.3695, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99641127553486e-05, | |
| "loss": 2.0205, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9963612712087646e-05, | |
| "loss": 2.1791, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996310921169056e-05, | |
| "loss": 1.6322, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996260225422707e-05, | |
| "loss": 1.8695, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9962091839767386e-05, | |
| "loss": 2.1406, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99615779683822e-05, | |
| "loss": 1.7668, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996106064014265e-05, | |
| "loss": 1.6439, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9960539855120415e-05, | |
| "loss": 1.5069, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996001561338759e-05, | |
| "loss": 1.9099, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995948791501679e-05, | |
| "loss": 1.5446, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9958956760081085e-05, | |
| "loss": 1.8427, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9958422148654045e-05, | |
| "loss": 1.4109, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995788408080969e-05, | |
| "loss": 1.7544, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9957342556622557e-05, | |
| "loss": 1.2727, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995679757616762e-05, | |
| "loss": 1.4266, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995624913952036e-05, | |
| "loss": 1.3209, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995569724675673e-05, | |
| "loss": 1.5993, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995514189795316e-05, | |
| "loss": 1.8296, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9954583093186544e-05, | |
| "loss": 1.5827, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9954020832534296e-05, | |
| "loss": 1.6316, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9953455116074263e-05, | |
| "loss": 1.7216, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9952885943884795e-05, | |
| "loss": 1.7878, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995231331604472e-05, | |
| "loss": 1.7959, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995173723263332e-05, | |
| "loss": 1.4636, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99511576937304e-05, | |
| "loss": 1.365, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995057469941621e-05, | |
| "loss": 1.9666, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994998824977147e-05, | |
| "loss": 1.4686, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994939834487742e-05, | |
| "loss": 1.4651, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994880498481575e-05, | |
| "loss": 1.8047, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9948208169668606e-05, | |
| "loss": 1.779, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9947607899518675e-05, | |
| "loss": 1.172, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994700417444908e-05, | |
| "loss": 1.6413, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9946396994543405e-05, | |
| "loss": 2.0329, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994578635988576e-05, | |
| "loss": 1.8365, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9945172270560695e-05, | |
| "loss": 1.8724, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994455472665327e-05, | |
| "loss": 1.7607, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9943933728248996e-05, | |
| "loss": 1.6619, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994330927543387e-05, | |
| "loss": 1.9951, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994268136829438e-05, | |
| "loss": 1.5133, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994205000691747e-05, | |
| "loss": 2.238, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994141519139058e-05, | |
| "loss": 1.6153, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994077692180164e-05, | |
| "loss": 1.2494, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9940135198239024e-05, | |
| "loss": 1.7389, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9939490020791614e-05, | |
| "loss": 1.2335, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993884138954875e-05, | |
| "loss": 1.4512, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993818930460026e-05, | |
| "loss": 1.8298, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993753376603645e-05, | |
| "loss": 1.333, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9936874773948094e-05, | |
| "loss": 1.8395, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993621232842648e-05, | |
| "loss": 1.7438, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993554642956332e-05, | |
| "loss": 1.2777, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993487707745086e-05, | |
| "loss": 1.5628, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9934204272181764e-05, | |
| "loss": 1.6458, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9933528013849235e-05, | |
| "loss": 1.3878, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993284830254691e-05, | |
| "loss": 1.6489, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993216513836893e-05, | |
| "loss": 1.4492, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9931478521409895e-05, | |
| "loss": 1.598, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.99307884517649e-05, | |
| "loss": 1.9357, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9930094929529506e-05, | |
| "loss": 1.4987, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992939795479976e-05, | |
| "loss": 1.6394, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992869752767218e-05, | |
| "loss": 2.1398, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992799364824377e-05, | |
| "loss": 1.4962, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992728631661201e-05, | |
| "loss": 1.1668, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9926575532874847e-05, | |
| "loss": 1.3898, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9925861297130724e-05, | |
| "loss": 1.828, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992514360947855e-05, | |
| "loss": 1.4453, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9924422470017715e-05, | |
| "loss": 1.774, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992369787884809e-05, | |
| "loss": 2.0532, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992296983607002e-05, | |
| "loss": 1.881, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992223834178433e-05, | |
| "loss": 1.7959, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992150339609232e-05, | |
| "loss": 2.1218, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992076499909578e-05, | |
| "loss": 1.9065, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992002315089695e-05, | |
| "loss": 1.783, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9919277851598575e-05, | |
| "loss": 1.5766, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991852910130388e-05, | |
| "loss": 1.2045, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9917776900116556e-05, | |
| "loss": 1.7663, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991702124814075e-05, | |
| "loss": 1.6417, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991626214548113e-05, | |
| "loss": 1.6406, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991549959224282e-05, | |
| "loss": 1.9329, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991473358853142e-05, | |
| "loss": 2.0136, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9913964134453014e-05, | |
| "loss": 1.4504, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9913191230114156e-05, | |
| "loss": 1.9222, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991241487562189e-05, | |
| "loss": 1.9192, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991163507108373e-05, | |
| "loss": 1.5397, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991085181660766e-05, | |
| "loss": 2.1056, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9910065112302175e-05, | |
| "loss": 1.8552, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9909274958276185e-05, | |
| "loss": 1.5675, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990848135463915e-05, | |
| "loss": 1.7246, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990768430150096e-05, | |
| "loss": 1.6873, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9906883798971995e-05, | |
| "loss": 2.0968, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9906079847163115e-05, | |
| "loss": 1.662, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990527244618566e-05, | |
| "loss": 2.1308, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990446159615144e-05, | |
| "loss": 1.6716, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9903647297172764e-05, | |
| "loss": 1.833, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990282954936237e-05, | |
| "loss": 2.0275, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990200835283353e-05, | |
| "loss": 1.7828, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990118370769997e-05, | |
| "loss": 1.8333, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990035561407588e-05, | |
| "loss": 1.5876, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989952407207594e-05, | |
| "loss": 1.6541, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989868908181532e-05, | |
| "loss": 1.9533, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989785064340965e-05, | |
| "loss": 2.1998, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989700875697503e-05, | |
| "loss": 1.2615, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9896163422628076e-05, | |
| "loss": 2.2554, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9895314640485835e-05, | |
| "loss": 1.6735, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9894462410665856e-05, | |
| "loss": 1.6169, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989360673328617e-05, | |
| "loss": 0.9541, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989274760846527e-05, | |
| "loss": 1.4256, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989188503632213e-05, | |
| "loss": 1.576, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989101901697621e-05, | |
| "loss": 1.315, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9890149550547454e-05, | |
| "loss": 1.6165, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988927663715626e-05, | |
| "loss": 1.7303, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9888400276923505e-05, | |
| "loss": 1.389, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9887520469970574e-05, | |
| "loss": 1.6816, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9886637216419295e-05, | |
| "loss": 1.7488, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988575051639199e-05, | |
| "loss": 1.3843, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9884860370011453e-05, | |
| "loss": 2.1209, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988396677740097e-05, | |
| "loss": 1.7382, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9883069738684286e-05, | |
| "loss": 1.3885, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988216925398562e-05, | |
| "loss": 1.8881, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988126532342968e-05, | |
| "loss": 2.1026, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9880357947141664e-05, | |
| "loss": 1.7111, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9879447125247215e-05, | |
| "loss": 1.3994, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9878532857872476e-05, | |
| "loss": 1.9436, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9877615145144055e-05, | |
| "loss": 1.2814, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9876693987189054e-05, | |
| "loss": 1.5255, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987576938413504e-05, | |
| "loss": 1.7781, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9874841336110044e-05, | |
| "loss": 1.834, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987390984324261e-05, | |
| "loss": 1.0663, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9872974905661726e-05, | |
| "loss": 1.8486, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9872036523496866e-05, | |
| "loss": 1.4602, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9871094696877995e-05, | |
| "loss": 2.1106, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987014942593553e-05, | |
| "loss": 1.1824, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986920071080039e-05, | |
| "loss": 1.5234, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9868248551603945e-05, | |
| "loss": 1.2072, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986729294847807e-05, | |
| "loss": 1.961, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986633390155511e-05, | |
| "loss": 0.9717, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986537141096786e-05, | |
| "loss": 1.7839, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986440547684963e-05, | |
| "loss": 2.0075, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986343609933418e-05, | |
| "loss": 1.4374, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986246327855576e-05, | |
| "loss": 1.3924, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986148701464909e-05, | |
| "loss": 1.5884, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986050730774937e-05, | |
| "loss": 1.6415, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985952415799228e-05, | |
| "loss": 1.8095, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9858537565513976e-05, | |
| "loss": 1.9536, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985754753045107e-05, | |
| "loss": 1.1484, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9856554052940705e-05, | |
| "loss": 1.6013, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9855557133120436e-05, | |
| "loss": 1.5254, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985455677112832e-05, | |
| "loss": 1.9474, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985355296710291e-05, | |
| "loss": 1.816, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985254572118321e-05, | |
| "loss": 1.451, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985153503350872e-05, | |
| "loss": 1.353, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98505209042194e-05, | |
| "loss": 1.6463, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98495033334557e-05, | |
| "loss": 1.8436, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984848232135853e-05, | |
| "loss": 1.5973, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984745786806929e-05, | |
| "loss": 1.6524, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984642997372987e-05, | |
| "loss": 1.499, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984539863848259e-05, | |
| "loss": 1.8387, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98443638624703e-05, | |
| "loss": 2.1306, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98433256458363e-05, | |
| "loss": 1.9113, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984228398872436e-05, | |
| "loss": 1.7941, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984123889127874e-05, | |
| "loss": 1.957, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984019035364418e-05, | |
| "loss": 1.5927, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.983913837596588e-05, | |
| "loss": 1.6275, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.983808295838953e-05, | |
| "loss": 1.5985, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983702410106128e-05, | |
| "loss": 1.5638, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983596180412778e-05, | |
| "loss": 1.431, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983489606773615e-05, | |
| "loss": 1.6035, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9833826892033966e-05, | |
| "loss": 2.0482, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.98327542771693e-05, | |
| "loss": 1.7167, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9831678223290705e-05, | |
| "loss": 1.2944, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9830598730547185e-05, | |
| "loss": 1.6265, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.982951579908824e-05, | |
| "loss": 1.6205, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.982842942906386e-05, | |
| "loss": 2.1644, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9827339620624466e-05, | |
| "loss": 1.4672, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9826246373920994e-05, | |
| "loss": 1.6029, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9825149689104846e-05, | |
| "loss": 1.5438, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.98240495663279e-05, | |
| "loss": 1.9061, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9822946005742497e-05, | |
| "loss": 1.8342, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9821839007501484e-05, | |
| "loss": 1.8016, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9820728571758155e-05, | |
| "loss": 1.6384, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9819614698666295e-05, | |
| "loss": 1.4692, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9818497388380154e-05, | |
| "loss": 1.4886, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9817376641054466e-05, | |
| "loss": 1.4903, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981625245684445e-05, | |
| "loss": 2.093, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981512483590578e-05, | |
| "loss": 1.9255, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981399377839463e-05, | |
| "loss": 1.6346, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981285928446762e-05, | |
| "loss": 1.6704, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981172135428188e-05, | |
| "loss": 0.981, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9810579987994974e-05, | |
| "loss": 1.7046, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9809435185765e-05, | |
| "loss": 1.6879, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980828694775046e-05, | |
| "loss": 2.1196, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980713527411041e-05, | |
| "loss": 1.6099, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9805980165004304e-05, | |
| "loss": 1.6977, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980482162059213e-05, | |
| "loss": 1.8916, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980365964103434e-05, | |
| "loss": 2.0914, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980249422649183e-05, | |
| "loss": 1.268, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980132537712602e-05, | |
| "loss": 1.7568, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9800153093098756e-05, | |
| "loss": 1.3604, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9798977374572395e-05, | |
| "loss": 1.5185, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979779822170977e-05, | |
| "loss": 1.8898, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9796615634674155e-05, | |
| "loss": 2.0664, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979542961362934e-05, | |
| "loss": 1.4639, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9794240158739566e-05, | |
| "loss": 1.2679, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9793047270169566e-05, | |
| "loss": 1.6697, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979185094808453e-05, | |
| "loss": 1.3827, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979065119265013e-05, | |
| "loss": 1.471, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9789448004032533e-05, | |
| "loss": 2.0245, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978824138239835e-05, | |
| "loss": 1.6898, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978703132791469e-05, | |
| "loss": 1.8281, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978581784074913e-05, | |
| "loss": 1.8089, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978460092106971e-05, | |
| "loss": 1.6343, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9783380569044974e-05, | |
| "loss": 2.2785, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9782156784843916e-05, | |
| "loss": 1.9567, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9780929568636015e-05, | |
| "loss": 1.3461, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977969892059123e-05, | |
| "loss": 1.4954, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977846484087998e-05, | |
| "loss": 1.305, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9777227329673174e-05, | |
| "loss": 1.8285, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977598638714219e-05, | |
| "loss": 1.4243, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.97747420134589e-05, | |
| "loss": 1.7667, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9773494208795604e-05, | |
| "loss": 1.9451, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9772242973325125e-05, | |
| "loss": 1.5445, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9770988307220736e-05, | |
| "loss": 1.6328, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976973021065619e-05, | |
| "loss": 1.6148, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976846868380572e-05, | |
| "loss": 1.3625, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976720372684404e-05, | |
| "loss": 1.4694, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9765935339946326e-05, | |
| "loss": 1.5375, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976466352328822e-05, | |
| "loss": 1.805, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976338827704586e-05, | |
| "loss": 1.8427, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976210960139586e-05, | |
| "loss": 1.9649, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976082749651529e-05, | |
| "loss": 1.4552, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9759541962581715e-05, | |
| "loss": 1.7111, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975825299977315e-05, | |
| "loss": 2.0492, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9756960608268104e-05, | |
| "loss": 2.4812, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975566478824556e-05, | |
| "loss": 1.3533, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975436553988498e-05, | |
| "loss": 1.7609, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9753062863366276e-05, | |
| "loss": 1.3683, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975175675886986e-05, | |
| "loss": 1.5215, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9750447226576617e-05, | |
| "loss": 1.2101, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9749134266667894e-05, | |
| "loss": 1.5135, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9747817879325514e-05, | |
| "loss": 1.9498, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9746498064731786e-05, | |
| "loss": 1.9672, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9745174823069486e-05, | |
| "loss": 2.1517, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9743848154521863e-05, | |
| "loss": 1.411, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.974251805927266e-05, | |
| "loss": 1.3972, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.974118453750605e-05, | |
| "loss": 1.6883, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973984758940672e-05, | |
| "loss": 1.4422, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973850721515983e-05, | |
| "loss": 1.5046, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973716341495099e-05, | |
| "loss": 1.9012, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973581618896631e-05, | |
| "loss": 1.6414, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973446553739236e-05, | |
| "loss": 1.4029, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973311146041619e-05, | |
| "loss": 1.3923, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9731753958225316e-05, | |
| "loss": 0.9104, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973039303100773e-05, | |
| "loss": 1.4314, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972902867895191e-05, | |
| "loss": 1.5725, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972766090224681e-05, | |
| "loss": 1.8915, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972628970108183e-05, | |
| "loss": 2.0525, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972491507564688e-05, | |
| "loss": 1.4409, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9723537026132315e-05, | |
| "loss": 1.4878, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972215555272899e-05, | |
| "loss": 1.6305, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972077065562821e-05, | |
| "loss": 1.6004, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971938233502178e-05, | |
| "loss": 1.6837, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971799059110195e-05, | |
| "loss": 1.6939, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971659542406145e-05, | |
| "loss": 1.6326, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971519683409352e-05, | |
| "loss": 2.0437, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9713794821391825e-05, | |
| "loss": 1.8919, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9712389386150535e-05, | |
| "loss": 1.4444, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971098052856428e-05, | |
| "loss": 1.7376, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970956824882816e-05, | |
| "loss": 1.2145, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970815254713779e-05, | |
| "loss": 1.7657, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970673342368919e-05, | |
| "loss": 1.4632, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.97053108786789e-05, | |
| "loss": 2.0926, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970388491230393e-05, | |
| "loss": 1.6105, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9702455524761764e-05, | |
| "loss": 0.8295, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9701022716250346e-05, | |
| "loss": 1.5312, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9699586486968094e-05, | |
| "loss": 1.2171, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969814683711391e-05, | |
| "loss": 1.6475, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9696703766887174e-05, | |
| "loss": 1.3856, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9695257276487736e-05, | |
| "loss": 1.413, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9693807366115905e-05, | |
| "loss": 1.2724, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969235403597248e-05, | |
| "loss": 2.0263, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969089728625873e-05, | |
| "loss": 1.6963, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968943711717638e-05, | |
| "loss": 1.6354, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968797352892768e-05, | |
| "loss": 1.7197, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968650652171528e-05, | |
| "loss": 1.7091, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9685036095742365e-05, | |
| "loss": 2.0653, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968356225121256e-05, | |
| "loss": 1.8299, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968208498832997e-05, | |
| "loss": 2.0181, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968060430729918e-05, | |
| "loss": 1.7343, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967912020832526e-05, | |
| "loss": 1.9446, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967763269161372e-05, | |
| "loss": 1.4359, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967614175737056e-05, | |
| "loss": 2.211, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967464740580227e-05, | |
| "loss": 1.5243, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967314963711579e-05, | |
| "loss": 1.0646, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967164845151855e-05, | |
| "loss": 1.9905, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967014384921842e-05, | |
| "loss": 1.9194, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.96686358304238e-05, | |
| "loss": 1.5094, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966712439534351e-05, | |
| "loss": 1.8683, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9665609544186867e-05, | |
| "loss": 1.422, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966409127716367e-05, | |
| "loss": 1.8794, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966256959448416e-05, | |
| "loss": 1.8767, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966104449635909e-05, | |
| "loss": 1.4035, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965951598299965e-05, | |
| "loss": 1.9975, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9657984054617526e-05, | |
| "loss": 2.1903, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9656448711424876e-05, | |
| "loss": 1.2803, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9654909953634316e-05, | |
| "loss": 1.4093, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965336778145895e-05, | |
| "loss": 1.5228, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965182219511234e-05, | |
| "loss": 1.8209, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9650273194808546e-05, | |
| "loss": 1.7129, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964872078076207e-05, | |
| "loss": 1.9, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.96471649531879e-05, | |
| "loss": 1.5979, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964560571230151e-05, | |
| "loss": 1.6498, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964404305831883e-05, | |
| "loss": 2.0043, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964247699145626e-05, | |
| "loss": 1.7263, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964090751193069e-05, | |
| "loss": 2.1116, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9639334619959464e-05, | |
| "loss": 2.2474, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963775831576041e-05, | |
| "loss": 1.6965, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963617859955183e-05, | |
| "loss": 1.7008, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963459547155249e-05, | |
| "loss": 1.7393, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963300893198164e-05, | |
| "loss": 1.9297, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9631418981058974e-05, | |
| "loss": 1.6991, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9629825619004704e-05, | |
| "loss": 1.2928, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962822884603948e-05, | |
| "loss": 1.7254, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9626628662384434e-05, | |
| "loss": 1.9312, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962502506826117e-05, | |
| "loss": 1.5214, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962341806389176e-05, | |
| "loss": 1.3667, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9621807649498764e-05, | |
| "loss": 1.7049, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962019382530521e-05, | |
| "loss": 1.7479, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961857659153456e-05, | |
| "loss": 1.5101, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961695594841082e-05, | |
| "loss": 1.3799, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961533189615839e-05, | |
| "loss": 2.4575, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961370443500221e-05, | |
| "loss": 1.5559, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961207356516765e-05, | |
| "loss": 1.5502, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961043928688056e-05, | |
| "loss": 1.3477, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.960880160036728e-05, | |
| "loss": 1.7157, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960716050585459e-05, | |
| "loss": 1.4801, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960551600356977e-05, | |
| "loss": 1.3484, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960386809374057e-05, | |
| "loss": 1.8751, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960221677659519e-05, | |
| "loss": 1.6072, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9600562052362333e-05, | |
| "loss": 2.4259, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9598903921271135e-05, | |
| "loss": 1.9676, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959724238355123e-05, | |
| "loss": 2.193, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959557743943274e-05, | |
| "loss": 1.8709, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9593909089146224e-05, | |
| "loss": 1.5639, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959223733292272e-05, | |
| "loss": 1.5942, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9590562170993755e-05, | |
| "loss": 2.474, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958888360359131e-05, | |
| "loss": 1.8667, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958720163094786e-05, | |
| "loss": 1.9014, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958551625329631e-05, | |
| "loss": 1.3199, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958382747087008e-05, | |
| "loss": 1.4193, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958213528390305e-05, | |
| "loss": 1.6693, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958043969262955e-05, | |
| "loss": 1.3332, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957874069728441e-05, | |
| "loss": 1.3463, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957703829810292e-05, | |
| "loss": 1.3705, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9575332495320826e-05, | |
| "loss": 2.1182, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957362328917437e-05, | |
| "loss": 1.7346, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9571910679900255e-05, | |
| "loss": 1.3646, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957019466773565e-05, | |
| "loss": 1.9329, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.95684752529182e-05, | |
| "loss": 1.8554, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9566752435686036e-05, | |
| "loss": 1.9678, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.956502621627773e-05, | |
| "loss": 1.9655, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.956329659493234e-05, | |
| "loss": 1.5136, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.95615635718894e-05, | |
| "loss": 1.5812, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955982714738892e-05, | |
| "loss": 2.4143, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9558087321671374e-05, | |
| "loss": 2.0219, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955634409497768e-05, | |
| "loss": 1.3464, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955459746754928e-05, | |
| "loss": 2.011, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955284743962804e-05, | |
| "loss": 1.1818, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9551094011456335e-05, | |
| "loss": 1.9715, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954933718327697e-05, | |
| "loss": 2.2617, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954757695533326e-05, | |
| "loss": 2.0038, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954581332786896e-05, | |
| "loss": 1.2001, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954404630112833e-05, | |
| "loss": 1.1356, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954227587535606e-05, | |
| "loss": 1.6941, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9540502050797335e-05, | |
| "loss": 1.3234, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9538724827697814e-05, | |
| "loss": 2.1369, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.953694420630361e-05, | |
| "loss": 2.0145, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.953516018686133e-05, | |
| "loss": 1.5505, | |
| "step": 529 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 8452, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 529, | |
| "total_flos": 3.00665305300992e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |