diff --git "a/exgra-med-gpt-medrag-only/trainer_state.json" "b/exgra-med-gpt-medrag-only/trainer_state.json" new file mode 100644--- /dev/null +++ "b/exgra-med-gpt-medrag-only/trainer_state.json" @@ -0,0 +1,8113 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "global_step": 1348, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.878048780487805e-07, + "loss": 5.855, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 9.75609756097561e-07, + "loss": 5.8735, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.4634146341463414e-06, + "loss": 4.3488, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.951219512195122e-06, + "loss": 3.2056, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2.4390243902439027e-06, + "loss": 1.2717, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 2.926829268292683e-06, + "loss": 0.8925, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 3.414634146341464e-06, + "loss": 0.5534, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 3.902439024390244e-06, + "loss": 0.6385, + "step": 8 + }, + { + "epoch": 0.03, + "learning_rate": 4.390243902439025e-06, + "loss": 0.4929, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 4.8780487804878055e-06, + "loss": 0.3698, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 5.365853658536586e-06, + "loss": 0.1405, + "step": 11 + }, + { + "epoch": 0.04, + "learning_rate": 5.853658536585366e-06, + "loss": 0.268, + "step": 12 + }, + { + "epoch": 0.04, + "learning_rate": 6.341463414634147e-06, + "loss": 0.1497, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 6.829268292682928e-06, + "loss": 0.4364, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 7.317073170731707e-06, + "loss": 0.3558, + "step": 15 + }, + { + "epoch": 0.05, + "learning_rate": 7.804878048780489e-06, + "loss": 0.2374, + "step": 16 + }, + { + "epoch": 0.05, + "learning_rate": 8.292682926829268e-06, + "loss": 0.1175, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 8.78048780487805e-06, + "loss": 0.1637, + "step": 18 + }, + { + "epoch": 0.06, + "learning_rate": 9.268292682926831e-06, + "loss": 0.0784, + "step": 19 + }, + { + "epoch": 0.06, + "learning_rate": 9.756097560975611e-06, + "loss": 0.2103, + "step": 20 + }, + { + "epoch": 0.06, + "learning_rate": 1.024390243902439e-05, + "loss": 0.1701, + "step": 21 + }, + { + "epoch": 0.07, + "learning_rate": 1.0731707317073172e-05, + "loss": 0.0682, + "step": 22 + }, + { + "epoch": 0.07, + "learning_rate": 1.1219512195121953e-05, + "loss": 0.2193, + "step": 23 + }, + { + "epoch": 0.07, + "learning_rate": 1.1707317073170731e-05, + "loss": 0.175, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 1.2195121951219513e-05, + "loss": 0.0853, + "step": 25 + }, + { + "epoch": 0.08, + "learning_rate": 1.2682926829268294e-05, + "loss": 0.1163, + "step": 26 + }, + { + "epoch": 0.08, + "learning_rate": 1.3170731707317076e-05, + "loss": 0.1191, + "step": 27 + }, + { + "epoch": 0.08, + "learning_rate": 1.3658536585365855e-05, + "loss": 0.1116, + "step": 28 + }, + { + "epoch": 0.09, + "learning_rate": 1.4146341463414635e-05, + "loss": 0.0768, + "step": 29 + }, + { + "epoch": 0.09, + "learning_rate": 1.4634146341463415e-05, + "loss": 0.0739, + "step": 30 + }, + { + "epoch": 0.09, + "learning_rate": 1.5121951219512196e-05, + "loss": 0.152, + "step": 31 + }, + { + "epoch": 0.09, + "learning_rate": 1.5609756097560978e-05, + "loss": 0.1381, + "step": 32 + }, + { + "epoch": 0.1, + "learning_rate": 1.6097560975609757e-05, + "loss": 0.0836, + "step": 33 + }, + { + "epoch": 0.1, + "learning_rate": 1.6585365853658537e-05, + "loss": 0.0781, + "step": 34 + }, + { + "epoch": 0.1, + "learning_rate": 1.7073170731707317e-05, + "loss": 0.0648, + "step": 35 + }, + { + "epoch": 0.11, + "learning_rate": 1.75609756097561e-05, + "loss": 0.0673, + "step": 36 + }, + { + "epoch": 0.11, + "learning_rate": 1.804878048780488e-05, + "loss": 0.0796, + "step": 37 + }, + { + "epoch": 0.11, + "learning_rate": 1.8536585365853663e-05, + "loss": 0.0609, + "step": 38 + }, + { + "epoch": 0.12, + "learning_rate": 1.902439024390244e-05, + "loss": 0.0598, + "step": 39 + }, + { + "epoch": 0.12, + "learning_rate": 1.9512195121951222e-05, + "loss": 0.0692, + "step": 40 + }, + { + "epoch": 0.12, + "learning_rate": 2e-05, + "loss": 0.1062, + "step": 41 + }, + { + "epoch": 0.12, + "learning_rate": 1.99999711119408e-05, + "loss": 0.0811, + "step": 42 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999884447930092e-05, + "loss": 0.0576, + "step": 43 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999740008468595e-05, + "loss": 0.0691, + "step": 44 + }, + { + "epoch": 0.13, + "learning_rate": 1.999953779439082e-05, + "loss": 0.0659, + "step": 45 + }, + { + "epoch": 0.14, + "learning_rate": 1.9999277806865083e-05, + "loss": 0.0609, + "step": 46 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998960047393496e-05, + "loss": 0.0636, + "step": 47 + }, + { + "epoch": 0.14, + "learning_rate": 1.999858451781194e-05, + "loss": 0.0506, + "step": 48 + }, + { + "epoch": 0.15, + "learning_rate": 1.999815122029008e-05, + "loss": 0.067, + "step": 49 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997660157331356e-05, + "loss": 0.0513, + "step": 50 + }, + { + "epoch": 0.15, + "learning_rate": 1.999711133177292e-05, + "loss": 0.0887, + "step": 51 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996504746785685e-05, + "loss": 0.0576, + "step": 52 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995840405874257e-05, + "loss": 0.0475, + "step": 53 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995118312876946e-05, + "loss": 0.0496, + "step": 54 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994338471965718e-05, + "loss": 0.0475, + "step": 55 + }, + { + "epoch": 0.17, + "learning_rate": 1.9993500887646196e-05, + "loss": 0.0487, + "step": 56 + }, + { + "epoch": 0.17, + "learning_rate": 1.9992605564757618e-05, + "loss": 0.0625, + "step": 57 + }, + { + "epoch": 0.17, + "learning_rate": 1.9991652508472807e-05, + "loss": 0.0596, + "step": 58 + }, + { + "epoch": 0.18, + "learning_rate": 1.9990641724298158e-05, + "loss": 0.0509, + "step": 59 + }, + { + "epoch": 0.18, + "learning_rate": 1.9989573218073584e-05, + "loss": 0.0997, + "step": 60 + }, + { + "epoch": 0.18, + "learning_rate": 1.9988446995972502e-05, + "loss": 0.109, + "step": 61 + }, + { + "epoch": 0.18, + "learning_rate": 1.998726306450179e-05, + "loss": 0.0644, + "step": 62 + }, + { + "epoch": 0.19, + "learning_rate": 1.998602143050174e-05, + "loss": 0.1192, + "step": 63 + }, + { + "epoch": 0.19, + "learning_rate": 1.998472210114603e-05, + "loss": 0.0763, + "step": 64 + }, + { + "epoch": 0.19, + "learning_rate": 1.9983365083941683e-05, + "loss": 0.0446, + "step": 65 + }, + { + "epoch": 0.2, + "learning_rate": 1.998195038672902e-05, + "loss": 0.0681, + "step": 66 + }, + { + "epoch": 0.2, + "learning_rate": 1.9980478017681605e-05, + "loss": 0.0583, + "step": 67 + }, + { + "epoch": 0.2, + "learning_rate": 1.9978947985306222e-05, + "loss": 0.0551, + "step": 68 + }, + { + "epoch": 0.2, + "learning_rate": 1.9977360298442803e-05, + "loss": 0.0396, + "step": 69 + }, + { + "epoch": 0.21, + "learning_rate": 1.9975714966264386e-05, + "loss": 0.0572, + "step": 70 + }, + { + "epoch": 0.21, + "learning_rate": 1.9974011998277055e-05, + "loss": 0.0649, + "step": 71 + }, + { + "epoch": 0.21, + "learning_rate": 1.997225140431991e-05, + "loss": 0.0646, + "step": 72 + }, + { + "epoch": 0.22, + "learning_rate": 1.997043319456497e-05, + "loss": 0.0531, + "step": 73 + }, + { + "epoch": 0.22, + "learning_rate": 1.9968557379517154e-05, + "loss": 0.0582, + "step": 74 + }, + { + "epoch": 0.22, + "learning_rate": 1.9966623970014182e-05, + "loss": 0.0535, + "step": 75 + }, + { + "epoch": 0.23, + "learning_rate": 1.996463297722655e-05, + "loss": 0.0549, + "step": 76 + }, + { + "epoch": 0.23, + "learning_rate": 1.9962584412657444e-05, + "loss": 0.0582, + "step": 77 + }, + { + "epoch": 0.23, + "learning_rate": 1.996047828814267e-05, + "loss": 0.0624, + "step": 78 + }, + { + "epoch": 0.23, + "learning_rate": 1.99583146158506e-05, + "loss": 0.0657, + "step": 79 + }, + { + "epoch": 0.24, + "learning_rate": 1.9956093408282093e-05, + "loss": 0.0544, + "step": 80 + }, + { + "epoch": 0.24, + "learning_rate": 1.995381467827042e-05, + "loss": 0.0541, + "step": 81 + }, + { + "epoch": 0.24, + "learning_rate": 1.9951478438981208e-05, + "loss": 0.062, + "step": 82 + }, + { + "epoch": 0.25, + "learning_rate": 1.9949084703912333e-05, + "loss": 0.0601, + "step": 83 + }, + { + "epoch": 0.25, + "learning_rate": 1.9946633486893867e-05, + "loss": 0.061, + "step": 84 + }, + { + "epoch": 0.25, + "learning_rate": 1.9944124802087993e-05, + "loss": 0.0398, + "step": 85 + }, + { + "epoch": 0.26, + "learning_rate": 1.9941558663988918e-05, + "loss": 0.0647, + "step": 86 + }, + { + "epoch": 0.26, + "learning_rate": 1.993893508742279e-05, + "loss": 0.064, + "step": 87 + }, + { + "epoch": 0.26, + "learning_rate": 1.993625408754762e-05, + "loss": 0.0482, + "step": 88 + }, + { + "epoch": 0.26, + "learning_rate": 1.9933515679853183e-05, + "loss": 0.0553, + "step": 89 + }, + { + "epoch": 0.27, + "learning_rate": 1.9930719880160932e-05, + "loss": 0.1394, + "step": 90 + }, + { + "epoch": 0.27, + "learning_rate": 1.9927866704623913e-05, + "loss": 0.0508, + "step": 91 + }, + { + "epoch": 0.27, + "learning_rate": 1.9924956169726672e-05, + "loss": 0.0454, + "step": 92 + }, + { + "epoch": 0.28, + "learning_rate": 1.9921988292285145e-05, + "loss": 0.0459, + "step": 93 + }, + { + "epoch": 0.28, + "learning_rate": 1.991896308944658e-05, + "loss": 0.0429, + "step": 94 + }, + { + "epoch": 0.28, + "learning_rate": 1.991588057868942e-05, + "loss": 0.0587, + "step": 95 + }, + { + "epoch": 0.28, + "learning_rate": 1.991274077782322e-05, + "loss": 0.05, + "step": 96 + }, + { + "epoch": 0.29, + "learning_rate": 1.9909543704988528e-05, + "loss": 0.0512, + "step": 97 + }, + { + "epoch": 0.29, + "learning_rate": 1.990628937865679e-05, + "loss": 0.0452, + "step": 98 + }, + { + "epoch": 0.29, + "learning_rate": 1.990297781763024e-05, + "loss": 0.0478, + "step": 99 + }, + { + "epoch": 0.3, + "learning_rate": 1.9899609041041795e-05, + "loss": 0.0546, + "step": 100 + }, + { + "epoch": 0.3, + "learning_rate": 1.9896183068354937e-05, + "loss": 0.0583, + "step": 101 + }, + { + "epoch": 0.3, + "learning_rate": 1.9892699919363605e-05, + "loss": 0.0472, + "step": 102 + }, + { + "epoch": 0.31, + "learning_rate": 1.988915961419208e-05, + "loss": 0.0483, + "step": 103 + }, + { + "epoch": 0.31, + "learning_rate": 1.9885562173294878e-05, + "loss": 0.0528, + "step": 104 + }, + { + "epoch": 0.31, + "learning_rate": 1.988190761745661e-05, + "loss": 0.0407, + "step": 105 + }, + { + "epoch": 0.31, + "learning_rate": 1.9878195967791885e-05, + "loss": 0.0497, + "step": 106 + }, + { + "epoch": 0.32, + "learning_rate": 1.987442724574517e-05, + "loss": 0.0495, + "step": 107 + }, + { + "epoch": 0.32, + "learning_rate": 1.987060147309068e-05, + "loss": 0.0434, + "step": 108 + }, + { + "epoch": 0.32, + "learning_rate": 1.9866718671932247e-05, + "loss": 0.0362, + "step": 109 + }, + { + "epoch": 0.33, + "learning_rate": 1.9862778864703185e-05, + "loss": 0.0541, + "step": 110 + }, + { + "epoch": 0.33, + "learning_rate": 1.9858782074166175e-05, + "loss": 0.0612, + "step": 111 + }, + { + "epoch": 0.33, + "learning_rate": 1.9854728323413112e-05, + "loss": 0.0403, + "step": 112 + }, + { + "epoch": 0.34, + "learning_rate": 1.9850617635865007e-05, + "loss": 0.044, + "step": 113 + }, + { + "epoch": 0.34, + "learning_rate": 1.984645003527181e-05, + "loss": 0.0268, + "step": 114 + }, + { + "epoch": 0.34, + "learning_rate": 1.9842225545712302e-05, + "loss": 0.0465, + "step": 115 + }, + { + "epoch": 0.34, + "learning_rate": 1.9837944191593942e-05, + "loss": 0.031, + "step": 116 + }, + { + "epoch": 0.35, + "learning_rate": 1.9833605997652732e-05, + "loss": 0.051, + "step": 117 + }, + { + "epoch": 0.35, + "learning_rate": 1.9829210988953074e-05, + "loss": 0.0445, + "step": 118 + }, + { + "epoch": 0.35, + "learning_rate": 1.982475919088762e-05, + "loss": 0.048, + "step": 119 + }, + { + "epoch": 0.36, + "learning_rate": 1.9820250629177134e-05, + "loss": 0.0499, + "step": 120 + }, + { + "epoch": 0.36, + "learning_rate": 1.9815685329870337e-05, + "loss": 0.0386, + "step": 121 + }, + { + "epoch": 0.36, + "learning_rate": 1.9811063319343753e-05, + "loss": 0.0552, + "step": 122 + }, + { + "epoch": 0.36, + "learning_rate": 1.9806384624301565e-05, + "loss": 0.0427, + "step": 123 + }, + { + "epoch": 0.37, + "learning_rate": 1.980164927177546e-05, + "loss": 0.0423, + "step": 124 + }, + { + "epoch": 0.37, + "learning_rate": 1.979685728912446e-05, + "loss": 0.0465, + "step": 125 + }, + { + "epoch": 0.37, + "learning_rate": 1.979200870403479e-05, + "loss": 0.0454, + "step": 126 + }, + { + "epoch": 0.38, + "learning_rate": 1.9787103544519684e-05, + "loss": 0.0576, + "step": 127 + }, + { + "epoch": 0.38, + "learning_rate": 1.9782141838919253e-05, + "loss": 0.0564, + "step": 128 + }, + { + "epoch": 0.38, + "learning_rate": 1.977712361590031e-05, + "loss": 0.0376, + "step": 129 + }, + { + "epoch": 0.39, + "learning_rate": 1.9772048904456192e-05, + "loss": 0.0444, + "step": 130 + }, + { + "epoch": 0.39, + "learning_rate": 1.976691773390662e-05, + "loss": 0.0457, + "step": 131 + }, + { + "epoch": 0.39, + "learning_rate": 1.9761730133897497e-05, + "loss": 0.0432, + "step": 132 + }, + { + "epoch": 0.39, + "learning_rate": 1.9756486134400772e-05, + "loss": 0.0419, + "step": 133 + }, + { + "epoch": 0.4, + "learning_rate": 1.9751185765714234e-05, + "loss": 0.0394, + "step": 134 + }, + { + "epoch": 0.4, + "learning_rate": 1.9745829058461353e-05, + "loss": 0.0489, + "step": 135 + }, + { + "epoch": 0.4, + "learning_rate": 1.9740416043591106e-05, + "loss": 0.0409, + "step": 136 + }, + { + "epoch": 0.41, + "learning_rate": 1.9734946752377794e-05, + "loss": 0.055, + "step": 137 + }, + { + "epoch": 0.41, + "learning_rate": 1.9729421216420857e-05, + "loss": 0.0408, + "step": 138 + }, + { + "epoch": 0.41, + "learning_rate": 1.9723839467644697e-05, + "loss": 0.0378, + "step": 139 + }, + { + "epoch": 0.42, + "learning_rate": 1.971820153829849e-05, + "loss": 0.0457, + "step": 140 + }, + { + "epoch": 0.42, + "learning_rate": 1.9712507460956008e-05, + "loss": 0.0379, + "step": 141 + }, + { + "epoch": 0.42, + "learning_rate": 1.970675726851542e-05, + "loss": 0.0348, + "step": 142 + }, + { + "epoch": 0.42, + "learning_rate": 1.97009509941991e-05, + "loss": 0.0481, + "step": 143 + }, + { + "epoch": 0.43, + "learning_rate": 1.969508867155345e-05, + "loss": 0.019, + "step": 144 + }, + { + "epoch": 0.43, + "learning_rate": 1.9689170334448695e-05, + "loss": 0.0487, + "step": 145 + }, + { + "epoch": 0.43, + "learning_rate": 1.9683196017078692e-05, + "loss": 0.0391, + "step": 146 + }, + { + "epoch": 0.44, + "learning_rate": 1.967716575396072e-05, + "loss": 0.0534, + "step": 147 + }, + { + "epoch": 0.44, + "learning_rate": 1.967107957993531e-05, + "loss": 0.0438, + "step": 148 + }, + { + "epoch": 0.44, + "learning_rate": 1.9664937530166002e-05, + "loss": 0.044, + "step": 149 + }, + { + "epoch": 0.45, + "learning_rate": 1.9658739640139185e-05, + "loss": 0.0447, + "step": 150 + }, + { + "epoch": 0.45, + "learning_rate": 1.9652485945663853e-05, + "loss": 0.0419, + "step": 151 + }, + { + "epoch": 0.45, + "learning_rate": 1.964617648287143e-05, + "loss": 0.0391, + "step": 152 + }, + { + "epoch": 0.45, + "learning_rate": 1.9639811288215545e-05, + "loss": 0.052, + "step": 153 + }, + { + "epoch": 0.46, + "learning_rate": 1.963339039847182e-05, + "loss": 0.0394, + "step": 154 + }, + { + "epoch": 0.46, + "learning_rate": 1.9626913850737658e-05, + "loss": 0.0458, + "step": 155 + }, + { + "epoch": 0.46, + "learning_rate": 1.9620381682432047e-05, + "loss": 0.0465, + "step": 156 + }, + { + "epoch": 0.47, + "learning_rate": 1.9613793931295317e-05, + "loss": 0.043, + "step": 157 + }, + { + "epoch": 0.47, + "learning_rate": 1.9607150635388935e-05, + "loss": 0.0402, + "step": 158 + }, + { + "epoch": 0.47, + "learning_rate": 1.9600451833095287e-05, + "loss": 0.0504, + "step": 159 + }, + { + "epoch": 0.47, + "learning_rate": 1.959369756311745e-05, + "loss": 0.0455, + "step": 160 + }, + { + "epoch": 0.48, + "learning_rate": 1.9586887864478978e-05, + "loss": 0.0379, + "step": 161 + }, + { + "epoch": 0.48, + "learning_rate": 1.9580022776523667e-05, + "loss": 0.0419, + "step": 162 + }, + { + "epoch": 0.48, + "learning_rate": 1.9573102338915327e-05, + "loss": 0.0346, + "step": 163 + }, + { + "epoch": 0.49, + "learning_rate": 1.956612659163756e-05, + "loss": 0.0357, + "step": 164 + }, + { + "epoch": 0.49, + "learning_rate": 1.955909557499353e-05, + "loss": 0.0429, + "step": 165 + }, + { + "epoch": 0.49, + "learning_rate": 1.955200932960572e-05, + "loss": 0.0126, + "step": 166 + }, + { + "epoch": 0.5, + "learning_rate": 1.9544867896415706e-05, + "loss": 0.0444, + "step": 167 + }, + { + "epoch": 0.5, + "learning_rate": 1.9537671316683916e-05, + "loss": 0.0464, + "step": 168 + }, + { + "epoch": 0.5, + "learning_rate": 1.9530419631989392e-05, + "loss": 0.0441, + "step": 169 + }, + { + "epoch": 0.5, + "learning_rate": 1.9523112884229558e-05, + "loss": 0.0396, + "step": 170 + }, + { + "epoch": 0.51, + "learning_rate": 1.9515751115619963e-05, + "loss": 0.0465, + "step": 171 + }, + { + "epoch": 0.51, + "learning_rate": 1.9508334368694052e-05, + "loss": 0.0324, + "step": 172 + }, + { + "epoch": 0.51, + "learning_rate": 1.9500862686302904e-05, + "loss": 0.0372, + "step": 173 + }, + { + "epoch": 0.52, + "learning_rate": 1.9493336111615006e-05, + "loss": 0.0466, + "step": 174 + }, + { + "epoch": 0.52, + "learning_rate": 1.948575468811598e-05, + "loss": 0.0313, + "step": 175 + }, + { + "epoch": 0.52, + "learning_rate": 1.947811845960835e-05, + "loss": 0.0492, + "step": 176 + }, + { + "epoch": 0.53, + "learning_rate": 1.9470427470211282e-05, + "loss": 0.0323, + "step": 177 + }, + { + "epoch": 0.53, + "learning_rate": 1.9462681764360326e-05, + "loss": 0.0164, + "step": 178 + }, + { + "epoch": 0.53, + "learning_rate": 1.9454881386807164e-05, + "loss": 0.0406, + "step": 179 + }, + { + "epoch": 0.53, + "learning_rate": 1.944702638261935e-05, + "loss": 0.0572, + "step": 180 + }, + { + "epoch": 0.54, + "learning_rate": 1.9439116797180046e-05, + "loss": 0.048, + "step": 181 + }, + { + "epoch": 0.54, + "learning_rate": 1.9431152676187774e-05, + "loss": 0.0358, + "step": 182 + }, + { + "epoch": 0.54, + "learning_rate": 1.9423134065656123e-05, + "loss": 0.0422, + "step": 183 + }, + { + "epoch": 0.55, + "learning_rate": 1.941506101191352e-05, + "loss": 0.0304, + "step": 184 + }, + { + "epoch": 0.55, + "learning_rate": 1.9406933561602938e-05, + "loss": 0.0462, + "step": 185 + }, + { + "epoch": 0.55, + "learning_rate": 1.9398751761681626e-05, + "loss": 0.0432, + "step": 186 + }, + { + "epoch": 0.55, + "learning_rate": 1.9390515659420846e-05, + "loss": 0.0544, + "step": 187 + }, + { + "epoch": 0.56, + "learning_rate": 1.9382225302405603e-05, + "loss": 0.0447, + "step": 188 + }, + { + "epoch": 0.56, + "learning_rate": 1.937388073853436e-05, + "loss": 0.053, + "step": 189 + }, + { + "epoch": 0.56, + "learning_rate": 1.936548201601877e-05, + "loss": 0.0393, + "step": 190 + }, + { + "epoch": 0.57, + "learning_rate": 1.935702918338339e-05, + "loss": 0.0483, + "step": 191 + }, + { + "epoch": 0.57, + "learning_rate": 1.9348522289465405e-05, + "loss": 0.0493, + "step": 192 + }, + { + "epoch": 0.57, + "learning_rate": 1.933996138341435e-05, + "loss": 0.0395, + "step": 193 + }, + { + "epoch": 0.58, + "learning_rate": 1.9331346514691813e-05, + "loss": 0.0442, + "step": 194 + }, + { + "epoch": 0.58, + "learning_rate": 1.9322677733071167e-05, + "loss": 0.0603, + "step": 195 + }, + { + "epoch": 0.58, + "learning_rate": 1.9313955088637263e-05, + "loss": 0.0325, + "step": 196 + }, + { + "epoch": 0.58, + "learning_rate": 1.9305178631786154e-05, + "loss": 0.0372, + "step": 197 + }, + { + "epoch": 0.59, + "learning_rate": 1.92963484132248e-05, + "loss": 0.0452, + "step": 198 + }, + { + "epoch": 0.59, + "learning_rate": 1.928746448397078e-05, + "loss": 0.0375, + "step": 199 + }, + { + "epoch": 0.59, + "learning_rate": 1.9278526895351992e-05, + "loss": 0.0376, + "step": 200 + }, + { + "epoch": 0.6, + "learning_rate": 1.9269535699006345e-05, + "loss": 0.031, + "step": 201 + }, + { + "epoch": 0.6, + "learning_rate": 1.9260490946881488e-05, + "loss": 0.0428, + "step": 202 + }, + { + "epoch": 0.6, + "learning_rate": 1.9251392691234486e-05, + "loss": 0.0298, + "step": 203 + }, + { + "epoch": 0.61, + "learning_rate": 1.924224098463153e-05, + "loss": 0.0243, + "step": 204 + }, + { + "epoch": 0.61, + "learning_rate": 1.9233035879947627e-05, + "loss": 0.0428, + "step": 205 + }, + { + "epoch": 0.61, + "learning_rate": 1.9223777430366298e-05, + "loss": 0.0245, + "step": 206 + }, + { + "epoch": 0.61, + "learning_rate": 1.921446568937927e-05, + "loss": 0.0393, + "step": 207 + }, + { + "epoch": 0.62, + "learning_rate": 1.920510071078617e-05, + "loss": 0.0265, + "step": 208 + }, + { + "epoch": 0.62, + "learning_rate": 1.919568254869421e-05, + "loss": 0.0417, + "step": 209 + }, + { + "epoch": 0.62, + "learning_rate": 1.9186211257517873e-05, + "loss": 0.032, + "step": 210 + }, + { + "epoch": 0.63, + "learning_rate": 1.9176686891978603e-05, + "loss": 0.0451, + "step": 211 + }, + { + "epoch": 0.63, + "learning_rate": 1.916710950710449e-05, + "loss": 0.035, + "step": 212 + }, + { + "epoch": 0.63, + "learning_rate": 1.9157479158229942e-05, + "loss": 0.038, + "step": 213 + }, + { + "epoch": 0.64, + "learning_rate": 1.914779590099538e-05, + "loss": 0.0381, + "step": 214 + }, + { + "epoch": 0.64, + "learning_rate": 1.9138059791346904e-05, + "loss": 0.0283, + "step": 215 + }, + { + "epoch": 0.64, + "learning_rate": 1.912827088553598e-05, + "loss": 0.0318, + "step": 216 + }, + { + "epoch": 0.64, + "learning_rate": 1.9118429240119098e-05, + "loss": 0.0388, + "step": 217 + }, + { + "epoch": 0.65, + "learning_rate": 1.9108534911957472e-05, + "loss": 0.0468, + "step": 218 + }, + { + "epoch": 0.65, + "learning_rate": 1.909858795821669e-05, + "loss": 0.0393, + "step": 219 + }, + { + "epoch": 0.65, + "learning_rate": 1.9088588436366383e-05, + "loss": 0.0352, + "step": 220 + }, + { + "epoch": 0.66, + "learning_rate": 1.9078536404179916e-05, + "loss": 0.0303, + "step": 221 + }, + { + "epoch": 0.66, + "learning_rate": 1.906843191973402e-05, + "loss": 0.0368, + "step": 222 + }, + { + "epoch": 0.66, + "learning_rate": 1.905827504140849e-05, + "loss": 0.0407, + "step": 223 + }, + { + "epoch": 0.66, + "learning_rate": 1.9048065827885828e-05, + "loss": 0.0363, + "step": 224 + }, + { + "epoch": 0.67, + "learning_rate": 1.90378043381509e-05, + "loss": 0.0206, + "step": 225 + }, + { + "epoch": 0.67, + "learning_rate": 1.9027490631490618e-05, + "loss": 0.0214, + "step": 226 + }, + { + "epoch": 0.67, + "learning_rate": 1.9017124767493565e-05, + "loss": 0.0281, + "step": 227 + }, + { + "epoch": 0.68, + "learning_rate": 1.9006706806049694e-05, + "loss": 0.0256, + "step": 228 + }, + { + "epoch": 0.68, + "learning_rate": 1.899623680734993e-05, + "loss": 0.0271, + "step": 229 + }, + { + "epoch": 0.68, + "learning_rate": 1.898571483188587e-05, + "loss": 0.0223, + "step": 230 + }, + { + "epoch": 0.69, + "learning_rate": 1.8975140940449396e-05, + "loss": 0.0312, + "step": 231 + }, + { + "epoch": 0.69, + "learning_rate": 1.8964515194132358e-05, + "loss": 0.0372, + "step": 232 + }, + { + "epoch": 0.69, + "learning_rate": 1.8953837654326185e-05, + "loss": 0.0242, + "step": 233 + }, + { + "epoch": 0.69, + "learning_rate": 1.894310838272156e-05, + "loss": 0.0355, + "step": 234 + }, + { + "epoch": 0.7, + "learning_rate": 1.8932327441308057e-05, + "loss": 0.0173, + "step": 235 + }, + { + "epoch": 0.7, + "learning_rate": 1.892149489237376e-05, + "loss": 0.0435, + "step": 236 + }, + { + "epoch": 0.7, + "learning_rate": 1.8910610798504934e-05, + "loss": 0.0323, + "step": 237 + }, + { + "epoch": 0.71, + "learning_rate": 1.889967522258566e-05, + "loss": 0.0406, + "step": 238 + }, + { + "epoch": 0.71, + "learning_rate": 1.8888688227797434e-05, + "loss": 0.0397, + "step": 239 + }, + { + "epoch": 0.71, + "learning_rate": 1.8877649877618854e-05, + "loss": 0.0396, + "step": 240 + }, + { + "epoch": 0.72, + "learning_rate": 1.8866560235825232e-05, + "loss": 0.0284, + "step": 241 + }, + { + "epoch": 0.72, + "learning_rate": 1.88554193664882e-05, + "loss": 0.0367, + "step": 242 + }, + { + "epoch": 0.72, + "learning_rate": 1.8844227333975383e-05, + "loss": 0.041, + "step": 243 + }, + { + "epoch": 0.72, + "learning_rate": 1.883298420295e-05, + "loss": 0.0425, + "step": 244 + }, + { + "epoch": 0.73, + "learning_rate": 1.8821690038370497e-05, + "loss": 0.0238, + "step": 245 + }, + { + "epoch": 0.73, + "learning_rate": 1.8810344905490173e-05, + "loss": 0.027, + "step": 246 + }, + { + "epoch": 0.73, + "learning_rate": 1.8798948869856804e-05, + "loss": 0.0169, + "step": 247 + }, + { + "epoch": 0.74, + "learning_rate": 1.8787501997312257e-05, + "loss": 0.0305, + "step": 248 + }, + { + "epoch": 0.74, + "learning_rate": 1.8776004353992127e-05, + "loss": 0.0289, + "step": 249 + }, + { + "epoch": 0.74, + "learning_rate": 1.876445600632532e-05, + "loss": 0.025, + "step": 250 + }, + { + "epoch": 0.74, + "learning_rate": 1.8752857021033716e-05, + "loss": 0.0192, + "step": 251 + }, + { + "epoch": 0.75, + "learning_rate": 1.874120746513175e-05, + "loss": 0.0136, + "step": 252 + }, + { + "epoch": 0.75, + "learning_rate": 1.8729507405926032e-05, + "loss": 0.044, + "step": 253 + }, + { + "epoch": 0.75, + "learning_rate": 1.871775691101496e-05, + "loss": 0.0282, + "step": 254 + }, + { + "epoch": 0.76, + "learning_rate": 1.8705956048288335e-05, + "loss": 0.0178, + "step": 255 + }, + { + "epoch": 0.76, + "learning_rate": 1.869410488592696e-05, + "loss": 0.0296, + "step": 256 + }, + { + "epoch": 0.76, + "learning_rate": 1.868220349240226e-05, + "loss": 0.0153, + "step": 257 + }, + { + "epoch": 0.77, + "learning_rate": 1.867025193647585e-05, + "loss": 0.0245, + "step": 258 + }, + { + "epoch": 0.77, + "learning_rate": 1.8658250287199196e-05, + "loss": 0.0233, + "step": 259 + }, + { + "epoch": 0.77, + "learning_rate": 1.864619861391316e-05, + "loss": 0.0249, + "step": 260 + }, + { + "epoch": 0.77, + "learning_rate": 1.8634096986247638e-05, + "loss": 0.0253, + "step": 261 + }, + { + "epoch": 0.78, + "learning_rate": 1.8621945474121134e-05, + "loss": 0.0263, + "step": 262 + }, + { + "epoch": 0.78, + "learning_rate": 1.8609744147740367e-05, + "loss": 0.0275, + "step": 263 + }, + { + "epoch": 0.78, + "learning_rate": 1.859749307759987e-05, + "loss": 0.0352, + "step": 264 + }, + { + "epoch": 0.79, + "learning_rate": 1.8585192334481562e-05, + "loss": 0.0348, + "step": 265 + }, + { + "epoch": 0.79, + "learning_rate": 1.8572841989454376e-05, + "loss": 0.0147, + "step": 266 + }, + { + "epoch": 0.79, + "learning_rate": 1.85604421138738e-05, + "loss": 0.0418, + "step": 267 + }, + { + "epoch": 0.8, + "learning_rate": 1.8547992779381507e-05, + "loss": 0.0426, + "step": 268 + }, + { + "epoch": 0.8, + "learning_rate": 1.8535494057904916e-05, + "loss": 0.0408, + "step": 269 + }, + { + "epoch": 0.8, + "learning_rate": 1.852294602165679e-05, + "loss": 0.0317, + "step": 270 + }, + { + "epoch": 0.8, + "learning_rate": 1.851034874313481e-05, + "loss": 0.0456, + "step": 271 + }, + { + "epoch": 0.81, + "learning_rate": 1.849770229512117e-05, + "loss": 0.0306, + "step": 272 + }, + { + "epoch": 0.81, + "learning_rate": 1.8485006750682126e-05, + "loss": 0.0305, + "step": 273 + }, + { + "epoch": 0.81, + "learning_rate": 1.8472262183167614e-05, + "loss": 0.0372, + "step": 274 + }, + { + "epoch": 0.82, + "learning_rate": 1.8459468666210797e-05, + "loss": 0.0322, + "step": 275 + }, + { + "epoch": 0.82, + "learning_rate": 1.8446626273727643e-05, + "loss": 0.0369, + "step": 276 + }, + { + "epoch": 0.82, + "learning_rate": 1.843373507991652e-05, + "loss": 0.0494, + "step": 277 + }, + { + "epoch": 0.82, + "learning_rate": 1.8420795159257737e-05, + "loss": 0.0326, + "step": 278 + }, + { + "epoch": 0.83, + "learning_rate": 1.8407806586513134e-05, + "loss": 0.0353, + "step": 279 + }, + { + "epoch": 0.83, + "learning_rate": 1.8394769436725645e-05, + "loss": 0.0362, + "step": 280 + }, + { + "epoch": 0.83, + "learning_rate": 1.8381683785218855e-05, + "loss": 0.0353, + "step": 281 + }, + { + "epoch": 0.84, + "learning_rate": 1.836854970759659e-05, + "loss": 0.0325, + "step": 282 + }, + { + "epoch": 0.84, + "learning_rate": 1.8355367279742436e-05, + "loss": 0.0387, + "step": 283 + }, + { + "epoch": 0.84, + "learning_rate": 1.8342136577819358e-05, + "loss": 0.0479, + "step": 284 + }, + { + "epoch": 0.85, + "learning_rate": 1.8328857678269214e-05, + "loss": 0.0407, + "step": 285 + }, + { + "epoch": 0.85, + "learning_rate": 1.8315530657812326e-05, + "loss": 0.0294, + "step": 286 + }, + { + "epoch": 0.85, + "learning_rate": 1.8302155593447047e-05, + "loss": 0.0277, + "step": 287 + }, + { + "epoch": 0.85, + "learning_rate": 1.828873256244931e-05, + "loss": 0.0309, + "step": 288 + }, + { + "epoch": 0.86, + "learning_rate": 1.8275261642372177e-05, + "loss": 0.0371, + "step": 289 + }, + { + "epoch": 0.86, + "learning_rate": 1.8261742911045392e-05, + "loss": 0.022, + "step": 290 + }, + { + "epoch": 0.86, + "learning_rate": 1.824817644657494e-05, + "loss": 0.0225, + "step": 291 + }, + { + "epoch": 0.87, + "learning_rate": 1.823456232734259e-05, + "loss": 0.0501, + "step": 292 + }, + { + "epoch": 0.87, + "learning_rate": 1.8220900632005428e-05, + "loss": 0.0225, + "step": 293 + }, + { + "epoch": 0.87, + "learning_rate": 1.820719143949544e-05, + "loss": 0.026, + "step": 294 + }, + { + "epoch": 0.88, + "learning_rate": 1.819343482901901e-05, + "loss": 0.0237, + "step": 295 + }, + { + "epoch": 0.88, + "learning_rate": 1.81796308800565e-05, + "loss": 0.0308, + "step": 296 + }, + { + "epoch": 0.88, + "learning_rate": 1.8165779672361757e-05, + "loss": 0.0241, + "step": 297 + }, + { + "epoch": 0.88, + "learning_rate": 1.81518812859617e-05, + "loss": 0.0269, + "step": 298 + }, + { + "epoch": 0.89, + "learning_rate": 1.8137935801155793e-05, + "loss": 0.0314, + "step": 299 + }, + { + "epoch": 0.89, + "learning_rate": 1.8123943298515646e-05, + "loss": 0.0184, + "step": 300 + }, + { + "epoch": 0.89, + "learning_rate": 1.810990385888451e-05, + "loss": 0.0281, + "step": 301 + }, + { + "epoch": 0.9, + "learning_rate": 1.8095817563376806e-05, + "loss": 0.0246, + "step": 302 + }, + { + "epoch": 0.9, + "learning_rate": 1.808168449337769e-05, + "loss": 0.0316, + "step": 303 + }, + { + "epoch": 0.9, + "learning_rate": 1.806750473054255e-05, + "loss": 0.0252, + "step": 304 + }, + { + "epoch": 0.91, + "learning_rate": 1.805327835679656e-05, + "loss": 0.0411, + "step": 305 + }, + { + "epoch": 0.91, + "learning_rate": 1.8039005454334177e-05, + "loss": 0.0234, + "step": 306 + }, + { + "epoch": 0.91, + "learning_rate": 1.8024686105618695e-05, + "loss": 0.038, + "step": 307 + }, + { + "epoch": 0.91, + "learning_rate": 1.801032039338175e-05, + "loss": 0.0202, + "step": 308 + }, + { + "epoch": 0.92, + "learning_rate": 1.799590840062285e-05, + "loss": 0.0332, + "step": 309 + }, + { + "epoch": 0.92, + "learning_rate": 1.7981450210608898e-05, + "loss": 0.0274, + "step": 310 + }, + { + "epoch": 0.92, + "learning_rate": 1.7966945906873706e-05, + "loss": 0.047, + "step": 311 + }, + { + "epoch": 0.93, + "learning_rate": 1.795239557321751e-05, + "loss": 0.0236, + "step": 312 + }, + { + "epoch": 0.93, + "learning_rate": 1.793779929370649e-05, + "loss": 0.0215, + "step": 313 + }, + { + "epoch": 0.93, + "learning_rate": 1.792315715267228e-05, + "loss": 0.0301, + "step": 314 + }, + { + "epoch": 0.93, + "learning_rate": 1.7908469234711486e-05, + "loss": 0.0239, + "step": 315 + }, + { + "epoch": 0.94, + "learning_rate": 1.7893735624685203e-05, + "loss": 0.0242, + "step": 316 + }, + { + "epoch": 0.94, + "learning_rate": 1.7878956407718505e-05, + "loss": 0.0274, + "step": 317 + }, + { + "epoch": 0.94, + "learning_rate": 1.7864131669199975e-05, + "loss": 0.0209, + "step": 318 + }, + { + "epoch": 0.95, + "learning_rate": 1.78492614947812e-05, + "loss": 0.0218, + "step": 319 + }, + { + "epoch": 0.95, + "learning_rate": 1.7834345970376272e-05, + "loss": 0.0181, + "step": 320 + }, + { + "epoch": 0.95, + "learning_rate": 1.7819385182161296e-05, + "loss": 0.0133, + "step": 321 + }, + { + "epoch": 0.96, + "learning_rate": 1.780437921657391e-05, + "loss": 0.0218, + "step": 322 + }, + { + "epoch": 0.96, + "learning_rate": 1.778932816031275e-05, + "loss": 0.0108, + "step": 323 + }, + { + "epoch": 0.96, + "learning_rate": 1.7774232100336983e-05, + "loss": 0.0127, + "step": 324 + }, + { + "epoch": 0.96, + "learning_rate": 1.7759091123865778e-05, + "loss": 0.0214, + "step": 325 + }, + { + "epoch": 0.97, + "learning_rate": 1.7743905318377826e-05, + "loss": 0.019, + "step": 326 + }, + { + "epoch": 0.97, + "learning_rate": 1.7728674771610814e-05, + "loss": 0.0146, + "step": 327 + }, + { + "epoch": 0.97, + "learning_rate": 1.771339957156093e-05, + "loss": 0.0179, + "step": 328 + }, + { + "epoch": 0.98, + "learning_rate": 1.7698079806482344e-05, + "loss": 0.0201, + "step": 329 + }, + { + "epoch": 0.98, + "learning_rate": 1.768271556488672e-05, + "loss": 0.0174, + "step": 330 + }, + { + "epoch": 0.98, + "learning_rate": 1.7667306935542682e-05, + "loss": 0.0254, + "step": 331 + }, + { + "epoch": 0.99, + "learning_rate": 1.7651854007475307e-05, + "loss": 0.016, + "step": 332 + }, + { + "epoch": 0.99, + "learning_rate": 1.7636356869965615e-05, + "loss": 0.0086, + "step": 333 + }, + { + "epoch": 0.99, + "learning_rate": 1.762081561255005e-05, + "loss": 0.0297, + "step": 334 + }, + { + "epoch": 0.99, + "learning_rate": 1.760523032501997e-05, + "loss": 0.0202, + "step": 335 + }, + { + "epoch": 1.0, + "learning_rate": 1.758960109742111e-05, + "loss": 0.0224, + "step": 336 + }, + { + "epoch": 1.0, + "learning_rate": 1.7573928020053083e-05, + "loss": 0.0299, + "step": 337 + }, + { + "epoch": 1.0, + "learning_rate": 1.7558211183468848e-05, + "loss": 0.0375, + "step": 338 + }, + { + "epoch": 1.01, + "learning_rate": 1.7542450678474187e-05, + "loss": 0.0262, + "step": 339 + }, + { + "epoch": 1.01, + "learning_rate": 1.7526646596127177e-05, + "loss": 0.0196, + "step": 340 + }, + { + "epoch": 1.01, + "learning_rate": 1.7510799027737672e-05, + "loss": 0.0262, + "step": 341 + }, + { + "epoch": 1.01, + "learning_rate": 1.7494908064866775e-05, + "loss": 0.012, + "step": 342 + }, + { + "epoch": 1.02, + "learning_rate": 1.7478973799326296e-05, + "loss": 0.0174, + "step": 343 + }, + { + "epoch": 1.02, + "learning_rate": 1.7462996323178236e-05, + "loss": 0.0209, + "step": 344 + }, + { + "epoch": 1.02, + "learning_rate": 1.744697572873425e-05, + "loss": 0.0192, + "step": 345 + }, + { + "epoch": 1.03, + "learning_rate": 1.7430912108555125e-05, + "loss": 0.0135, + "step": 346 + }, + { + "epoch": 1.03, + "learning_rate": 1.741480555545021e-05, + "loss": 0.0294, + "step": 347 + }, + { + "epoch": 1.03, + "learning_rate": 1.739865616247692e-05, + "loss": 0.0149, + "step": 348 + }, + { + "epoch": 1.04, + "learning_rate": 1.7382464022940183e-05, + "loss": 0.0169, + "step": 349 + }, + { + "epoch": 1.04, + "learning_rate": 1.7366229230391894e-05, + "loss": 0.0159, + "step": 350 + }, + { + "epoch": 1.04, + "learning_rate": 1.734995187863038e-05, + "loss": 0.022, + "step": 351 + }, + { + "epoch": 1.04, + "learning_rate": 1.7333632061699866e-05, + "loss": 0.0123, + "step": 352 + }, + { + "epoch": 1.05, + "learning_rate": 1.7317269873889915e-05, + "loss": 0.0229, + "step": 353 + }, + { + "epoch": 1.05, + "learning_rate": 1.7300865409734902e-05, + "loss": 0.0134, + "step": 354 + }, + { + "epoch": 1.05, + "learning_rate": 1.728441876401345e-05, + "loss": 0.0251, + "step": 355 + }, + { + "epoch": 1.06, + "learning_rate": 1.7267930031747896e-05, + "loss": 0.014, + "step": 356 + }, + { + "epoch": 1.06, + "learning_rate": 1.725139930820373e-05, + "loss": 0.0395, + "step": 357 + }, + { + "epoch": 1.06, + "learning_rate": 1.7234826688889064e-05, + "loss": 0.0201, + "step": 358 + }, + { + "epoch": 1.07, + "learning_rate": 1.721821226955405e-05, + "loss": 0.0118, + "step": 359 + }, + { + "epoch": 1.07, + "learning_rate": 1.7201556146190363e-05, + "loss": 0.0436, + "step": 360 + }, + { + "epoch": 1.07, + "learning_rate": 1.7184858415030613e-05, + "loss": 0.0398, + "step": 361 + }, + { + "epoch": 1.07, + "learning_rate": 1.716811917254781e-05, + "loss": 0.0361, + "step": 362 + }, + { + "epoch": 1.08, + "learning_rate": 1.7151338515454802e-05, + "loss": 0.0169, + "step": 363 + }, + { + "epoch": 1.08, + "learning_rate": 1.7134516540703713e-05, + "loss": 0.0324, + "step": 364 + }, + { + "epoch": 1.08, + "learning_rate": 1.7117653345485378e-05, + "loss": 0.0401, + "step": 365 + }, + { + "epoch": 1.09, + "learning_rate": 1.7100749027228797e-05, + "loss": 0.024, + "step": 366 + }, + { + "epoch": 1.09, + "learning_rate": 1.7083803683600555e-05, + "loss": 0.0159, + "step": 367 + }, + { + "epoch": 1.09, + "learning_rate": 1.706681741250428e-05, + "loss": 0.0307, + "step": 368 + }, + { + "epoch": 1.09, + "learning_rate": 1.704979031208004e-05, + "loss": 0.0772, + "step": 369 + }, + { + "epoch": 1.1, + "learning_rate": 1.7032722480703826e-05, + "loss": 0.0316, + "step": 370 + }, + { + "epoch": 1.1, + "learning_rate": 1.701561401698693e-05, + "loss": 0.0166, + "step": 371 + }, + { + "epoch": 1.1, + "learning_rate": 1.699846501977542e-05, + "loss": 0.0264, + "step": 372 + }, + { + "epoch": 1.11, + "learning_rate": 1.698127558814955e-05, + "loss": 0.0327, + "step": 373 + }, + { + "epoch": 1.11, + "learning_rate": 1.696404582142318e-05, + "loss": 0.0218, + "step": 374 + }, + { + "epoch": 1.11, + "learning_rate": 1.6946775819143205e-05, + "loss": 0.0191, + "step": 375 + }, + { + "epoch": 1.12, + "learning_rate": 1.692946568108901e-05, + "loss": 0.0291, + "step": 376 + }, + { + "epoch": 1.12, + "learning_rate": 1.6912115507271845e-05, + "loss": 0.0295, + "step": 377 + }, + { + "epoch": 1.12, + "learning_rate": 1.689472539793428e-05, + "loss": 0.0522, + "step": 378 + }, + { + "epoch": 1.12, + "learning_rate": 1.6877295453549617e-05, + "loss": 0.0283, + "step": 379 + }, + { + "epoch": 1.13, + "learning_rate": 1.685982577482131e-05, + "loss": 0.0378, + "step": 380 + }, + { + "epoch": 1.13, + "learning_rate": 1.6842316462682383e-05, + "loss": 0.0249, + "step": 381 + }, + { + "epoch": 1.13, + "learning_rate": 1.6824767618294842e-05, + "loss": 0.0225, + "step": 382 + }, + { + "epoch": 1.14, + "learning_rate": 1.68071793430491e-05, + "loss": 0.0314, + "step": 383 + }, + { + "epoch": 1.14, + "learning_rate": 1.6789551738563384e-05, + "loss": 0.0305, + "step": 384 + }, + { + "epoch": 1.14, + "learning_rate": 1.6771884906683156e-05, + "loss": 0.0179, + "step": 385 + }, + { + "epoch": 1.15, + "learning_rate": 1.67541789494805e-05, + "loss": 0.0177, + "step": 386 + }, + { + "epoch": 1.15, + "learning_rate": 1.6736433969253576e-05, + "loss": 0.0156, + "step": 387 + }, + { + "epoch": 1.15, + "learning_rate": 1.6718650068525985e-05, + "loss": 0.0159, + "step": 388 + }, + { + "epoch": 1.15, + "learning_rate": 1.6700827350046206e-05, + "loss": 0.0258, + "step": 389 + }, + { + "epoch": 1.16, + "learning_rate": 1.6682965916786988e-05, + "loss": 0.0464, + "step": 390 + }, + { + "epoch": 1.16, + "learning_rate": 1.666506587194476e-05, + "loss": 0.0121, + "step": 391 + }, + { + "epoch": 1.16, + "learning_rate": 1.6647127318939027e-05, + "loss": 0.0266, + "step": 392 + }, + { + "epoch": 1.17, + "learning_rate": 1.662915036141179e-05, + "loss": 0.0182, + "step": 393 + }, + { + "epoch": 1.17, + "learning_rate": 1.6611135103226936e-05, + "loss": 0.0356, + "step": 394 + }, + { + "epoch": 1.17, + "learning_rate": 1.6593081648469627e-05, + "loss": 0.0268, + "step": 395 + }, + { + "epoch": 1.18, + "learning_rate": 1.6574990101445716e-05, + "loss": 0.0156, + "step": 396 + }, + { + "epoch": 1.18, + "learning_rate": 1.6556860566681147e-05, + "loss": 0.011, + "step": 397 + }, + { + "epoch": 1.18, + "learning_rate": 1.653869314892133e-05, + "loss": 0.0598, + "step": 398 + }, + { + "epoch": 1.18, + "learning_rate": 1.6520487953130553e-05, + "loss": 0.0309, + "step": 399 + }, + { + "epoch": 1.19, + "learning_rate": 1.650224508449137e-05, + "loss": 0.035, + "step": 400 + }, + { + "epoch": 1.19, + "learning_rate": 1.6483964648403997e-05, + "loss": 0.0201, + "step": 401 + }, + { + "epoch": 1.19, + "learning_rate": 1.6465646750485695e-05, + "loss": 0.0193, + "step": 402 + }, + { + "epoch": 1.2, + "learning_rate": 1.644729149657017e-05, + "loss": 0.0247, + "step": 403 + }, + { + "epoch": 1.2, + "learning_rate": 1.6428898992706957e-05, + "loss": 0.0233, + "step": 404 + }, + { + "epoch": 1.2, + "learning_rate": 1.64104693451608e-05, + "loss": 0.0301, + "step": 405 + }, + { + "epoch": 1.2, + "learning_rate": 1.639200266041105e-05, + "loss": 0.0247, + "step": 406 + }, + { + "epoch": 1.21, + "learning_rate": 1.6373499045151046e-05, + "loss": 0.0171, + "step": 407 + }, + { + "epoch": 1.21, + "learning_rate": 1.6354958606287488e-05, + "loss": 0.0213, + "step": 408 + }, + { + "epoch": 1.21, + "learning_rate": 1.6336381450939843e-05, + "loss": 0.0246, + "step": 409 + }, + { + "epoch": 1.22, + "learning_rate": 1.63177676864397e-05, + "loss": 0.0166, + "step": 410 + }, + { + "epoch": 1.22, + "learning_rate": 1.6299117420330164e-05, + "loss": 0.0279, + "step": 411 + }, + { + "epoch": 1.22, + "learning_rate": 1.6280430760365234e-05, + "loss": 0.0397, + "step": 412 + }, + { + "epoch": 1.23, + "learning_rate": 1.626170781450918e-05, + "loss": 0.0135, + "step": 413 + }, + { + "epoch": 1.23, + "learning_rate": 1.624294869093591e-05, + "loss": 0.0129, + "step": 414 + }, + { + "epoch": 1.23, + "learning_rate": 1.6224153498028367e-05, + "loss": 0.0202, + "step": 415 + }, + { + "epoch": 1.23, + "learning_rate": 1.6205322344377873e-05, + "loss": 0.026, + "step": 416 + }, + { + "epoch": 1.24, + "learning_rate": 1.618645533878353e-05, + "loss": 0.016, + "step": 417 + }, + { + "epoch": 1.24, + "learning_rate": 1.6167552590251568e-05, + "loss": 0.0187, + "step": 418 + }, + { + "epoch": 1.24, + "learning_rate": 1.6148614207994735e-05, + "loss": 0.0196, + "step": 419 + }, + { + "epoch": 1.25, + "learning_rate": 1.6129640301431648e-05, + "loss": 0.0155, + "step": 420 + }, + { + "epoch": 1.25, + "learning_rate": 1.6110630980186174e-05, + "loss": 0.0224, + "step": 421 + }, + { + "epoch": 1.25, + "learning_rate": 1.6091586354086798e-05, + "loss": 0.0141, + "step": 422 + }, + { + "epoch": 1.26, + "learning_rate": 1.607250653316598e-05, + "loss": 0.0233, + "step": 423 + }, + { + "epoch": 1.26, + "learning_rate": 1.6053391627659505e-05, + "loss": 0.0107, + "step": 424 + }, + { + "epoch": 1.26, + "learning_rate": 1.6034241748005887e-05, + "loss": 0.0357, + "step": 425 + }, + { + "epoch": 1.26, + "learning_rate": 1.6015057004845697e-05, + "loss": 0.0192, + "step": 426 + }, + { + "epoch": 1.27, + "learning_rate": 1.5995837509020934e-05, + "loss": 0.0316, + "step": 427 + }, + { + "epoch": 1.27, + "learning_rate": 1.597658337157439e-05, + "loss": 0.0286, + "step": 428 + }, + { + "epoch": 1.27, + "learning_rate": 1.5957294703748983e-05, + "loss": 0.009, + "step": 429 + }, + { + "epoch": 1.28, + "learning_rate": 1.593797161698716e-05, + "loss": 0.0153, + "step": 430 + }, + { + "epoch": 1.28, + "learning_rate": 1.5918614222930214e-05, + "loss": 0.0292, + "step": 431 + }, + { + "epoch": 1.28, + "learning_rate": 1.5899222633417654e-05, + "loss": 0.0189, + "step": 432 + }, + { + "epoch": 1.28, + "learning_rate": 1.5879796960486555e-05, + "loss": 0.0124, + "step": 433 + }, + { + "epoch": 1.29, + "learning_rate": 1.5860337316370918e-05, + "loss": 0.0082, + "step": 434 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840843813501014e-05, + "loss": 0.0246, + "step": 435 + }, + { + "epoch": 1.29, + "learning_rate": 1.582131656450273e-05, + "loss": 0.0211, + "step": 436 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801755682196933e-05, + "loss": 0.0133, + "step": 437 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782161279598813e-05, + "loss": 0.0086, + "step": 438 + }, + { + "epoch": 1.3, + "learning_rate": 1.5762533469917217e-05, + "loss": 0.0058, + "step": 439 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742872366554018e-05, + "loss": 0.0259, + "step": 440 + }, + { + "epoch": 1.31, + "learning_rate": 1.5723178083103428e-05, + "loss": 0.009, + "step": 441 + }, + { + "epoch": 1.31, + "learning_rate": 1.570345073335138e-05, + "loss": 0.026, + "step": 442 + }, + { + "epoch": 1.31, + "learning_rate": 1.5683690431274844e-05, + "loss": 0.0196, + "step": 443 + }, + { + "epoch": 1.32, + "learning_rate": 1.5663897291041177e-05, + "loss": 0.0117, + "step": 444 + }, + { + "epoch": 1.32, + "learning_rate": 1.5644071427007454e-05, + "loss": 0.0107, + "step": 445 + }, + { + "epoch": 1.32, + "learning_rate": 1.5624212953719825e-05, + "loss": 0.0099, + "step": 446 + }, + { + "epoch": 1.33, + "learning_rate": 1.5604321985912842e-05, + "loss": 0.0095, + "step": 447 + }, + { + "epoch": 1.33, + "learning_rate": 1.5584398638508788e-05, + "loss": 0.0189, + "step": 448 + }, + { + "epoch": 1.33, + "learning_rate": 1.5564443026617042e-05, + "loss": 0.0162, + "step": 449 + }, + { + "epoch": 1.34, + "learning_rate": 1.5544455265533377e-05, + "loss": 0.0156, + "step": 450 + }, + { + "epoch": 1.34, + "learning_rate": 1.5524435470739322e-05, + "loss": 0.019, + "step": 451 + }, + { + "epoch": 1.34, + "learning_rate": 1.550438375790148e-05, + "loss": 0.016, + "step": 452 + }, + { + "epoch": 1.34, + "learning_rate": 1.548430024287086e-05, + "loss": 0.0045, + "step": 453 + }, + { + "epoch": 1.35, + "learning_rate": 1.546418504168222e-05, + "loss": 0.0089, + "step": 454 + }, + { + "epoch": 1.35, + "learning_rate": 1.544403827055338e-05, + "loss": 0.0134, + "step": 455 + }, + { + "epoch": 1.35, + "learning_rate": 1.5423860045884575e-05, + "loss": 0.007, + "step": 456 + }, + { + "epoch": 1.36, + "learning_rate": 1.540365048425774e-05, + "loss": 0.0226, + "step": 457 + }, + { + "epoch": 1.36, + "learning_rate": 1.5383409702435885e-05, + "loss": 0.0288, + "step": 458 + }, + { + "epoch": 1.36, + "learning_rate": 1.5363137817362393e-05, + "loss": 0.0224, + "step": 459 + }, + { + "epoch": 1.36, + "learning_rate": 1.534283494616034e-05, + "loss": 0.0184, + "step": 460 + }, + { + "epoch": 1.37, + "learning_rate": 1.5322501206131845e-05, + "loss": 0.0204, + "step": 461 + }, + { + "epoch": 1.37, + "learning_rate": 1.5302136714757354e-05, + "loss": 0.033, + "step": 462 + }, + { + "epoch": 1.37, + "learning_rate": 1.5281741589695002e-05, + "loss": 0.0159, + "step": 463 + }, + { + "epoch": 1.38, + "learning_rate": 1.5261315948779904e-05, + "loss": 0.0193, + "step": 464 + }, + { + "epoch": 1.38, + "learning_rate": 1.5240859910023479e-05, + "loss": 0.0082, + "step": 465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5220373591612785e-05, + "loss": 0.018, + "step": 466 + }, + { + "epoch": 1.39, + "learning_rate": 1.5199857111909812e-05, + "loss": 0.0134, + "step": 467 + }, + { + "epoch": 1.39, + "learning_rate": 1.5179310589450824e-05, + "loss": 0.0217, + "step": 468 + }, + { + "epoch": 1.39, + "learning_rate": 1.5158734142945645e-05, + "loss": 0.021, + "step": 469 + }, + { + "epoch": 1.39, + "learning_rate": 1.5138127891277e-05, + "loss": 0.0096, + "step": 470 + }, + { + "epoch": 1.4, + "learning_rate": 1.5117491953499813e-05, + "loss": 0.0178, + "step": 471 + }, + { + "epoch": 1.4, + "learning_rate": 1.5096826448840518e-05, + "loss": 0.0081, + "step": 472 + }, + { + "epoch": 1.4, + "learning_rate": 1.5076131496696388e-05, + "loss": 0.0076, + "step": 473 + }, + { + "epoch": 1.41, + "learning_rate": 1.505540721663481e-05, + "loss": 0.0299, + "step": 474 + }, + { + "epoch": 1.41, + "learning_rate": 1.503465372839264e-05, + "loss": 0.0175, + "step": 475 + }, + { + "epoch": 1.41, + "learning_rate": 1.5013871151875478e-05, + "loss": 0.0115, + "step": 476 + }, + { + "epoch": 1.42, + "learning_rate": 1.499305960715698e-05, + "loss": 0.0138, + "step": 477 + }, + { + "epoch": 1.42, + "learning_rate": 1.4972219214478177e-05, + "loss": 0.0122, + "step": 478 + }, + { + "epoch": 1.42, + "learning_rate": 1.4951350094246761e-05, + "loss": 0.0128, + "step": 479 + }, + { + "epoch": 1.42, + "learning_rate": 1.493045236703642e-05, + "loss": 0.0059, + "step": 480 + }, + { + "epoch": 1.43, + "learning_rate": 1.49095261535861e-05, + "loss": 0.0129, + "step": 481 + }, + { + "epoch": 1.43, + "learning_rate": 1.488857157479934e-05, + "loss": 0.0166, + "step": 482 + }, + { + "epoch": 1.43, + "learning_rate": 1.4867588751743569e-05, + "loss": 0.0168, + "step": 483 + }, + { + "epoch": 1.44, + "learning_rate": 1.4846577805649389e-05, + "loss": 0.009, + "step": 484 + }, + { + "epoch": 1.44, + "learning_rate": 1.4825538857909893e-05, + "loss": 0.0035, + "step": 485 + }, + { + "epoch": 1.44, + "learning_rate": 1.4804472030079953e-05, + "loss": 0.0249, + "step": 486 + }, + { + "epoch": 1.45, + "learning_rate": 1.4783377443875526e-05, + "loss": 0.0079, + "step": 487 + }, + { + "epoch": 1.45, + "learning_rate": 1.4762255221172941e-05, + "loss": 0.0096, + "step": 488 + }, + { + "epoch": 1.45, + "learning_rate": 1.47411054840082e-05, + "loss": 0.0169, + "step": 489 + }, + { + "epoch": 1.45, + "learning_rate": 1.4719928354576278e-05, + "loss": 0.0266, + "step": 490 + }, + { + "epoch": 1.46, + "learning_rate": 1.4698723955230407e-05, + "loss": 0.0099, + "step": 491 + }, + { + "epoch": 1.46, + "learning_rate": 1.4677492408481376e-05, + "loss": 0.0212, + "step": 492 + }, + { + "epoch": 1.46, + "learning_rate": 1.4656233836996822e-05, + "loss": 0.0138, + "step": 493 + }, + { + "epoch": 1.47, + "learning_rate": 1.4634948363600518e-05, + "loss": 0.0091, + "step": 494 + }, + { + "epoch": 1.47, + "learning_rate": 1.4613636111271668e-05, + "loss": 0.0155, + "step": 495 + }, + { + "epoch": 1.47, + "learning_rate": 1.459229720314419e-05, + "loss": 0.0233, + "step": 496 + }, + { + "epoch": 1.47, + "learning_rate": 1.457093176250602e-05, + "loss": 0.0199, + "step": 497 + }, + { + "epoch": 1.48, + "learning_rate": 1.4549539912798376e-05, + "loss": 0.0187, + "step": 498 + }, + { + "epoch": 1.48, + "learning_rate": 1.4528121777615058e-05, + "loss": 0.0071, + "step": 499 + }, + { + "epoch": 1.48, + "learning_rate": 1.4506677480701743e-05, + "loss": 0.0138, + "step": 500 + }, + { + "epoch": 1.49, + "learning_rate": 1.4485207145955253e-05, + "loss": 0.0171, + "step": 501 + }, + { + "epoch": 1.49, + "learning_rate": 1.4463710897422853e-05, + "loss": 0.0131, + "step": 502 + }, + { + "epoch": 1.49, + "learning_rate": 1.4442188859301516e-05, + "loss": 0.0078, + "step": 503 + }, + { + "epoch": 1.5, + "learning_rate": 1.4420641155937225e-05, + "loss": 0.0133, + "step": 504 + }, + { + "epoch": 1.5, + "learning_rate": 1.4399067911824251e-05, + "loss": 0.0078, + "step": 505 + }, + { + "epoch": 1.5, + "learning_rate": 1.4377469251604421e-05, + "loss": 0.0094, + "step": 506 + }, + { + "epoch": 1.5, + "learning_rate": 1.4355845300066413e-05, + "loss": 0.0144, + "step": 507 + }, + { + "epoch": 1.51, + "learning_rate": 1.433419618214502e-05, + "loss": 0.0105, + "step": 508 + }, + { + "epoch": 1.51, + "learning_rate": 1.4312522022920445e-05, + "loss": 0.0052, + "step": 509 + }, + { + "epoch": 1.51, + "learning_rate": 1.4290822947617572e-05, + "loss": 0.0205, + "step": 510 + }, + { + "epoch": 1.52, + "learning_rate": 1.4269099081605227e-05, + "loss": 0.0152, + "step": 511 + }, + { + "epoch": 1.52, + "learning_rate": 1.4247350550395479e-05, + "loss": 0.0218, + "step": 512 + }, + { + "epoch": 1.52, + "learning_rate": 1.4225577479642898e-05, + "loss": 0.0131, + "step": 513 + }, + { + "epoch": 1.53, + "learning_rate": 1.4203779995143841e-05, + "loss": 0.0275, + "step": 514 + }, + { + "epoch": 1.53, + "learning_rate": 1.4181958222835706e-05, + "loss": 0.0219, + "step": 515 + }, + { + "epoch": 1.53, + "learning_rate": 1.4160112288796227e-05, + "loss": 0.0311, + "step": 516 + }, + { + "epoch": 1.53, + "learning_rate": 1.4138242319242728e-05, + "loss": 0.0232, + "step": 517 + }, + { + "epoch": 1.54, + "learning_rate": 1.4116348440531406e-05, + "loss": 0.0103, + "step": 518 + }, + { + "epoch": 1.54, + "learning_rate": 1.4094430779156596e-05, + "loss": 0.0164, + "step": 519 + }, + { + "epoch": 1.54, + "learning_rate": 1.4072489461750034e-05, + "loss": 0.0514, + "step": 520 + }, + { + "epoch": 1.55, + "learning_rate": 1.4050524615080137e-05, + "loss": 0.0137, + "step": 521 + }, + { + "epoch": 1.55, + "learning_rate": 1.4028536366051265e-05, + "loss": 0.0131, + "step": 522 + }, + { + "epoch": 1.55, + "learning_rate": 1.400652484170298e-05, + "loss": 0.0204, + "step": 523 + }, + { + "epoch": 1.55, + "learning_rate": 1.3984490169209333e-05, + "loss": 0.0144, + "step": 524 + }, + { + "epoch": 1.56, + "learning_rate": 1.3962432475878103e-05, + "loss": 0.0151, + "step": 525 + }, + { + "epoch": 1.56, + "learning_rate": 1.3940351889150084e-05, + "loss": 0.0126, + "step": 526 + }, + { + "epoch": 1.56, + "learning_rate": 1.3918248536598333e-05, + "loss": 0.0108, + "step": 527 + }, + { + "epoch": 1.57, + "learning_rate": 1.3896122545927442e-05, + "loss": 0.0112, + "step": 528 + }, + { + "epoch": 1.57, + "learning_rate": 1.38739740449728e-05, + "loss": 0.0068, + "step": 529 + }, + { + "epoch": 1.57, + "learning_rate": 1.385180316169984e-05, + "loss": 0.0069, + "step": 530 + }, + { + "epoch": 1.58, + "learning_rate": 1.382961002420333e-05, + "loss": 0.0105, + "step": 531 + }, + { + "epoch": 1.58, + "learning_rate": 1.3807394760706596e-05, + "loss": 0.0041, + "step": 532 + }, + { + "epoch": 1.58, + "learning_rate": 1.3785157499560814e-05, + "loss": 0.0036, + "step": 533 + }, + { + "epoch": 1.58, + "learning_rate": 1.376289836924424e-05, + "loss": 0.0103, + "step": 534 + }, + { + "epoch": 1.59, + "learning_rate": 1.3740617498361494e-05, + "loss": 0.015, + "step": 535 + }, + { + "epoch": 1.59, + "learning_rate": 1.3718315015642801e-05, + "loss": 0.0176, + "step": 536 + }, + { + "epoch": 1.59, + "learning_rate": 1.369599104994324e-05, + "loss": 0.0105, + "step": 537 + }, + { + "epoch": 1.6, + "learning_rate": 1.367364573024203e-05, + "loss": 0.0141, + "step": 538 + }, + { + "epoch": 1.6, + "learning_rate": 1.3651279185641753e-05, + "loss": 0.017, + "step": 539 + }, + { + "epoch": 1.6, + "learning_rate": 1.3628891545367616e-05, + "loss": 0.0109, + "step": 540 + }, + { + "epoch": 1.61, + "learning_rate": 1.3606482938766719e-05, + "loss": 0.0161, + "step": 541 + }, + { + "epoch": 1.61, + "learning_rate": 1.3584053495307287e-05, + "loss": 0.0293, + "step": 542 + }, + { + "epoch": 1.61, + "learning_rate": 1.3561603344577949e-05, + "loss": 0.0411, + "step": 543 + }, + { + "epoch": 1.61, + "learning_rate": 1.3539132616286956e-05, + "loss": 0.0181, + "step": 544 + }, + { + "epoch": 1.62, + "learning_rate": 1.351664144026145e-05, + "loss": 0.0076, + "step": 545 + }, + { + "epoch": 1.62, + "learning_rate": 1.3494129946446722e-05, + "loss": 0.0056, + "step": 546 + }, + { + "epoch": 1.62, + "learning_rate": 1.347159826490544e-05, + "loss": 0.0163, + "step": 547 + }, + { + "epoch": 1.63, + "learning_rate": 1.344904652581692e-05, + "loss": 0.0136, + "step": 548 + }, + { + "epoch": 1.63, + "learning_rate": 1.342647485947635e-05, + "loss": 0.0146, + "step": 549 + }, + { + "epoch": 1.63, + "learning_rate": 1.3403883396294062e-05, + "loss": 0.0116, + "step": 550 + }, + { + "epoch": 1.64, + "learning_rate": 1.338127226679476e-05, + "loss": 0.0082, + "step": 551 + }, + { + "epoch": 1.64, + "learning_rate": 1.3358641601616774e-05, + "loss": 0.0187, + "step": 552 + }, + { + "epoch": 1.64, + "learning_rate": 1.33359915315113e-05, + "loss": 0.01, + "step": 553 + }, + { + "epoch": 1.64, + "learning_rate": 1.3313322187341653e-05, + "loss": 0.0091, + "step": 554 + }, + { + "epoch": 1.65, + "learning_rate": 1.3290633700082502e-05, + "loss": 0.0069, + "step": 555 + }, + { + "epoch": 1.65, + "learning_rate": 1.3267926200819128e-05, + "loss": 0.0094, + "step": 556 + }, + { + "epoch": 1.65, + "learning_rate": 1.3245199820746637e-05, + "loss": 0.0257, + "step": 557 + }, + { + "epoch": 1.66, + "learning_rate": 1.3222454691169239e-05, + "loss": 0.0048, + "step": 558 + }, + { + "epoch": 1.66, + "learning_rate": 1.3199690943499457e-05, + "loss": 0.0112, + "step": 559 + }, + { + "epoch": 1.66, + "learning_rate": 1.3176908709257398e-05, + "loss": 0.0161, + "step": 560 + }, + { + "epoch": 1.66, + "learning_rate": 1.3154108120069963e-05, + "loss": 0.0067, + "step": 561 + }, + { + "epoch": 1.67, + "learning_rate": 1.3131289307670107e-05, + "loss": 0.0123, + "step": 562 + }, + { + "epoch": 1.67, + "learning_rate": 1.310845240389607e-05, + "loss": 0.0155, + "step": 563 + }, + { + "epoch": 1.67, + "learning_rate": 1.3085597540690618e-05, + "loss": 0.0076, + "step": 564 + }, + { + "epoch": 1.68, + "learning_rate": 1.3062724850100279e-05, + "loss": 0.0149, + "step": 565 + }, + { + "epoch": 1.68, + "learning_rate": 1.303983446427458e-05, + "loss": 0.0129, + "step": 566 + }, + { + "epoch": 1.68, + "learning_rate": 1.3016926515465288e-05, + "loss": 0.0168, + "step": 567 + }, + { + "epoch": 1.69, + "learning_rate": 1.299400113602564e-05, + "loss": 0.0123, + "step": 568 + }, + { + "epoch": 1.69, + "learning_rate": 1.2971058458409576e-05, + "loss": 0.0084, + "step": 569 + }, + { + "epoch": 1.69, + "learning_rate": 1.2948098615170983e-05, + "loss": 0.0036, + "step": 570 + }, + { + "epoch": 1.69, + "learning_rate": 1.2925121738962922e-05, + "loss": 0.0137, + "step": 571 + }, + { + "epoch": 1.7, + "learning_rate": 1.2902127962536867e-05, + "loss": 0.0194, + "step": 572 + }, + { + "epoch": 1.7, + "learning_rate": 1.2879117418741935e-05, + "loss": 0.0084, + "step": 573 + }, + { + "epoch": 1.7, + "learning_rate": 1.2856090240524112e-05, + "loss": 0.0055, + "step": 574 + }, + { + "epoch": 1.71, + "learning_rate": 1.2833046560925499e-05, + "loss": 0.0077, + "step": 575 + }, + { + "epoch": 1.71, + "learning_rate": 1.2809986513083527e-05, + "loss": 0.0144, + "step": 576 + }, + { + "epoch": 1.71, + "learning_rate": 1.278691023023021e-05, + "loss": 0.0084, + "step": 577 + }, + { + "epoch": 1.72, + "learning_rate": 1.2763817845691345e-05, + "loss": 0.0142, + "step": 578 + }, + { + "epoch": 1.72, + "learning_rate": 1.274070949288577e-05, + "loss": 0.0051, + "step": 579 + }, + { + "epoch": 1.72, + "learning_rate": 1.2717585305324575e-05, + "loss": 0.0249, + "step": 580 + }, + { + "epoch": 1.72, + "learning_rate": 1.2694445416610344e-05, + "loss": 0.0107, + "step": 581 + }, + { + "epoch": 1.73, + "learning_rate": 1.2671289960436371e-05, + "loss": 0.0097, + "step": 582 + }, + { + "epoch": 1.73, + "learning_rate": 1.2648119070585888e-05, + "loss": 0.0172, + "step": 583 + }, + { + "epoch": 1.73, + "learning_rate": 1.2624932880931312e-05, + "loss": 0.0139, + "step": 584 + }, + { + "epoch": 1.74, + "learning_rate": 1.2601731525433443e-05, + "loss": 0.0141, + "step": 585 + }, + { + "epoch": 1.74, + "learning_rate": 1.2578515138140702e-05, + "loss": 0.0042, + "step": 586 + }, + { + "epoch": 1.74, + "learning_rate": 1.2555283853188371e-05, + "loss": 0.0134, + "step": 587 + }, + { + "epoch": 1.74, + "learning_rate": 1.2532037804797791e-05, + "loss": 0.0218, + "step": 588 + }, + { + "epoch": 1.75, + "learning_rate": 1.2508777127275611e-05, + "loss": 0.019, + "step": 589 + }, + { + "epoch": 1.75, + "learning_rate": 1.2485501955012996e-05, + "loss": 0.0209, + "step": 590 + }, + { + "epoch": 1.75, + "learning_rate": 1.2462212422484852e-05, + "loss": 0.0063, + "step": 591 + }, + { + "epoch": 1.76, + "learning_rate": 1.2438908664249064e-05, + "loss": 0.0135, + "step": 592 + }, + { + "epoch": 1.76, + "learning_rate": 1.2415590814945697e-05, + "loss": 0.0069, + "step": 593 + }, + { + "epoch": 1.76, + "learning_rate": 1.2392259009296239e-05, + "loss": 0.0064, + "step": 594 + }, + { + "epoch": 1.77, + "learning_rate": 1.23689133821028e-05, + "loss": 0.0034, + "step": 595 + }, + { + "epoch": 1.77, + "learning_rate": 1.2345554068247359e-05, + "loss": 0.0166, + "step": 596 + }, + { + "epoch": 1.77, + "learning_rate": 1.2322181202690954e-05, + "loss": 0.0054, + "step": 597 + }, + { + "epoch": 1.77, + "learning_rate": 1.229879492047294e-05, + "loss": 0.0112, + "step": 598 + }, + { + "epoch": 1.78, + "learning_rate": 1.2275395356710176e-05, + "loss": 0.0033, + "step": 599 + }, + { + "epoch": 1.78, + "learning_rate": 1.2251982646596254e-05, + "loss": 0.0063, + "step": 600 + }, + { + "epoch": 1.78, + "learning_rate": 1.222855692540073e-05, + "loss": 0.0066, + "step": 601 + }, + { + "epoch": 1.79, + "learning_rate": 1.2205118328468327e-05, + "loss": 0.0048, + "step": 602 + }, + { + "epoch": 1.79, + "learning_rate": 1.2181666991218156e-05, + "loss": 0.0048, + "step": 603 + }, + { + "epoch": 1.79, + "learning_rate": 1.2158203049142947e-05, + "loss": 0.0094, + "step": 604 + }, + { + "epoch": 1.8, + "learning_rate": 1.213472663780824e-05, + "loss": 0.0077, + "step": 605 + }, + { + "epoch": 1.8, + "learning_rate": 1.211123789285164e-05, + "loss": 0.0046, + "step": 606 + }, + { + "epoch": 1.8, + "learning_rate": 1.2087736949981994e-05, + "loss": 0.0104, + "step": 607 + }, + { + "epoch": 1.8, + "learning_rate": 1.2064223944978623e-05, + "loss": 0.0192, + "step": 608 + }, + { + "epoch": 1.81, + "learning_rate": 1.2040699013690545e-05, + "loss": 0.0246, + "step": 609 + }, + { + "epoch": 1.81, + "learning_rate": 1.2017162292035683e-05, + "loss": 0.0175, + "step": 610 + }, + { + "epoch": 1.81, + "learning_rate": 1.1993613916000078e-05, + "loss": 0.0091, + "step": 611 + }, + { + "epoch": 1.82, + "learning_rate": 1.1970054021637106e-05, + "loss": 0.0078, + "step": 612 + }, + { + "epoch": 1.82, + "learning_rate": 1.194648274506669e-05, + "loss": 0.0084, + "step": 613 + }, + { + "epoch": 1.82, + "learning_rate": 1.1922900222474522e-05, + "loss": 0.0088, + "step": 614 + }, + { + "epoch": 1.82, + "learning_rate": 1.1899306590111257e-05, + "loss": 0.0124, + "step": 615 + }, + { + "epoch": 1.83, + "learning_rate": 1.1875701984291751e-05, + "loss": 0.0096, + "step": 616 + }, + { + "epoch": 1.83, + "learning_rate": 1.1852086541394247e-05, + "loss": 0.0053, + "step": 617 + }, + { + "epoch": 1.83, + "learning_rate": 1.1828460397859616e-05, + "loss": 0.0206, + "step": 618 + }, + { + "epoch": 1.84, + "learning_rate": 1.1804823690190539e-05, + "loss": 0.0036, + "step": 619 + }, + { + "epoch": 1.84, + "learning_rate": 1.1781176554950738e-05, + "loss": 0.0156, + "step": 620 + }, + { + "epoch": 1.84, + "learning_rate": 1.1757519128764183e-05, + "loss": 0.0191, + "step": 621 + }, + { + "epoch": 1.85, + "learning_rate": 1.1733851548314297e-05, + "loss": 0.0054, + "step": 622 + }, + { + "epoch": 1.85, + "learning_rate": 1.171017395034318e-05, + "loss": 0.0099, + "step": 623 + }, + { + "epoch": 1.85, + "learning_rate": 1.1686486471650797e-05, + "loss": 0.0048, + "step": 624 + }, + { + "epoch": 1.85, + "learning_rate": 1.1662789249094203e-05, + "loss": 0.0063, + "step": 625 + }, + { + "epoch": 1.86, + "learning_rate": 1.1639082419586758e-05, + "loss": 0.0069, + "step": 626 + }, + { + "epoch": 1.86, + "learning_rate": 1.1615366120097318e-05, + "loss": 0.022, + "step": 627 + }, + { + "epoch": 1.86, + "learning_rate": 1.1591640487649457e-05, + "loss": 0.0102, + "step": 628 + }, + { + "epoch": 1.87, + "learning_rate": 1.1567905659320665e-05, + "loss": 0.0147, + "step": 629 + }, + { + "epoch": 1.87, + "learning_rate": 1.1544161772241568e-05, + "loss": 0.0049, + "step": 630 + }, + { + "epoch": 1.87, + "learning_rate": 1.1520408963595137e-05, + "loss": 0.0067, + "step": 631 + }, + { + "epoch": 1.88, + "learning_rate": 1.1496647370615874e-05, + "loss": 0.0175, + "step": 632 + }, + { + "epoch": 1.88, + "learning_rate": 1.1472877130589042e-05, + "loss": 0.0042, + "step": 633 + }, + { + "epoch": 1.88, + "learning_rate": 1.1449098380849858e-05, + "loss": 0.019, + "step": 634 + }, + { + "epoch": 1.88, + "learning_rate": 1.1425311258782711e-05, + "loss": 0.003, + "step": 635 + }, + { + "epoch": 1.89, + "learning_rate": 1.1401515901820363e-05, + "loss": 0.0101, + "step": 636 + }, + { + "epoch": 1.89, + "learning_rate": 1.137771244744314e-05, + "loss": 0.0076, + "step": 637 + }, + { + "epoch": 1.89, + "learning_rate": 1.1353901033178171e-05, + "loss": 0.0148, + "step": 638 + }, + { + "epoch": 1.9, + "learning_rate": 1.133008179659856e-05, + "loss": 0.0029, + "step": 639 + }, + { + "epoch": 1.9, + "learning_rate": 1.1306254875322612e-05, + "loss": 0.0072, + "step": 640 + }, + { + "epoch": 1.9, + "learning_rate": 1.1282420407013033e-05, + "loss": 0.0108, + "step": 641 + }, + { + "epoch": 1.91, + "learning_rate": 1.1258578529376123e-05, + "loss": 0.0094, + "step": 642 + }, + { + "epoch": 1.91, + "learning_rate": 1.1234729380160998e-05, + "loss": 0.0024, + "step": 643 + }, + { + "epoch": 1.91, + "learning_rate": 1.1210873097158785e-05, + "loss": 0.018, + "step": 644 + }, + { + "epoch": 1.91, + "learning_rate": 1.1187009818201832e-05, + "loss": 0.0162, + "step": 645 + }, + { + "epoch": 1.92, + "learning_rate": 1.1163139681162898e-05, + "loss": 0.0025, + "step": 646 + }, + { + "epoch": 1.92, + "learning_rate": 1.1139262823954367e-05, + "loss": 0.0049, + "step": 647 + }, + { + "epoch": 1.92, + "learning_rate": 1.1115379384527459e-05, + "loss": 0.0028, + "step": 648 + }, + { + "epoch": 1.93, + "learning_rate": 1.1091489500871409e-05, + "loss": 0.0059, + "step": 649 + }, + { + "epoch": 1.93, + "learning_rate": 1.1067593311012697e-05, + "loss": 0.011, + "step": 650 + }, + { + "epoch": 1.93, + "learning_rate": 1.1043690953014226e-05, + "loss": 0.0035, + "step": 651 + }, + { + "epoch": 1.93, + "learning_rate": 1.101978256497455e-05, + "loss": 0.004, + "step": 652 + }, + { + "epoch": 1.94, + "learning_rate": 1.0995868285027051e-05, + "loss": 0.0033, + "step": 653 + }, + { + "epoch": 1.94, + "learning_rate": 1.0971948251339157e-05, + "loss": 0.0078, + "step": 654 + }, + { + "epoch": 1.94, + "learning_rate": 1.0948022602111537e-05, + "loss": 0.0074, + "step": 655 + }, + { + "epoch": 1.95, + "learning_rate": 1.0924091475577305e-05, + "loss": 0.0016, + "step": 656 + }, + { + "epoch": 1.95, + "learning_rate": 1.0900155010001226e-05, + "loss": 0.0019, + "step": 657 + }, + { + "epoch": 1.95, + "learning_rate": 1.0876213343678899e-05, + "loss": 0.0084, + "step": 658 + }, + { + "epoch": 1.96, + "learning_rate": 1.0852266614935982e-05, + "loss": 0.0236, + "step": 659 + }, + { + "epoch": 1.96, + "learning_rate": 1.0828314962127382e-05, + "loss": 0.0034, + "step": 660 + }, + { + "epoch": 1.96, + "learning_rate": 1.0804358523636447e-05, + "loss": 0.0143, + "step": 661 + }, + { + "epoch": 1.96, + "learning_rate": 1.0780397437874183e-05, + "loss": 0.0182, + "step": 662 + }, + { + "epoch": 1.97, + "learning_rate": 1.075643184327844e-05, + "loss": 0.014, + "step": 663 + }, + { + "epoch": 1.97, + "learning_rate": 1.0732461878313125e-05, + "loss": 0.0096, + "step": 664 + }, + { + "epoch": 1.97, + "learning_rate": 1.0708487681467388e-05, + "loss": 0.0065, + "step": 665 + }, + { + "epoch": 1.98, + "learning_rate": 1.0684509391254834e-05, + "loss": 0.0299, + "step": 666 + }, + { + "epoch": 1.98, + "learning_rate": 1.0660527146212716e-05, + "loss": 0.0154, + "step": 667 + }, + { + "epoch": 1.98, + "learning_rate": 1.0636541084901132e-05, + "loss": 0.0171, + "step": 668 + }, + { + "epoch": 1.99, + "learning_rate": 1.0612551345902245e-05, + "loss": 0.0161, + "step": 669 + }, + { + "epoch": 1.99, + "learning_rate": 1.0588558067819447e-05, + "loss": 0.0058, + "step": 670 + }, + { + "epoch": 1.99, + "learning_rate": 1.0564561389276588e-05, + "loss": 0.0099, + "step": 671 + }, + { + "epoch": 1.99, + "learning_rate": 1.0540561448917159e-05, + "loss": 0.0134, + "step": 672 + }, + { + "epoch": 2.0, + "learning_rate": 1.0516558385403503e-05, + "loss": 0.0119, + "step": 673 + }, + { + "epoch": 2.0, + "learning_rate": 1.0492552337416007e-05, + "loss": 0.0087, + "step": 674 + }, + { + "epoch": 2.0, + "learning_rate": 1.046854344365229e-05, + "loss": 0.0054, + "step": 675 + }, + { + "epoch": 2.01, + "learning_rate": 1.0444531842826427e-05, + "loss": 0.0037, + "step": 676 + }, + { + "epoch": 2.01, + "learning_rate": 1.0420517673668125e-05, + "loss": 0.0033, + "step": 677 + }, + { + "epoch": 2.01, + "learning_rate": 1.0396501074921932e-05, + "loss": 0.0113, + "step": 678 + }, + { + "epoch": 2.01, + "learning_rate": 1.0372482185346435e-05, + "loss": 0.003, + "step": 679 + }, + { + "epoch": 2.02, + "learning_rate": 1.0348461143713454e-05, + "loss": 0.0076, + "step": 680 + }, + { + "epoch": 2.02, + "learning_rate": 1.0324438088807238e-05, + "loss": 0.0111, + "step": 681 + }, + { + "epoch": 2.02, + "learning_rate": 1.0300413159423684e-05, + "loss": 0.0088, + "step": 682 + }, + { + "epoch": 2.03, + "learning_rate": 1.0276386494369501e-05, + "loss": 0.0053, + "step": 683 + }, + { + "epoch": 2.03, + "learning_rate": 1.0252358232461437e-05, + "loss": 0.0074, + "step": 684 + }, + { + "epoch": 2.03, + "learning_rate": 1.0228328512525457e-05, + "loss": 0.0072, + "step": 685 + }, + { + "epoch": 2.04, + "learning_rate": 1.0204297473395964e-05, + "loss": 0.0041, + "step": 686 + }, + { + "epoch": 2.04, + "learning_rate": 1.0180265253914967e-05, + "loss": 0.0037, + "step": 687 + }, + { + "epoch": 2.04, + "learning_rate": 1.0156231992931305e-05, + "loss": 0.0099, + "step": 688 + }, + { + "epoch": 2.04, + "learning_rate": 1.013219782929983e-05, + "loss": 0.0025, + "step": 689 + }, + { + "epoch": 2.05, + "learning_rate": 1.0108162901880611e-05, + "loss": 0.0021, + "step": 690 + }, + { + "epoch": 2.05, + "learning_rate": 1.0084127349538133e-05, + "loss": 0.0017, + "step": 691 + }, + { + "epoch": 2.05, + "learning_rate": 1.0060091311140481e-05, + "loss": 0.0054, + "step": 692 + }, + { + "epoch": 2.06, + "learning_rate": 1.0036054925558557e-05, + "loss": 0.0008, + "step": 693 + }, + { + "epoch": 2.06, + "learning_rate": 1.0012018331665272e-05, + "loss": 0.0041, + "step": 694 + }, + { + "epoch": 2.06, + "learning_rate": 9.98798166833473e-06, + "loss": 0.0033, + "step": 695 + }, + { + "epoch": 2.07, + "learning_rate": 9.963945074441444e-06, + "loss": 0.0047, + "step": 696 + }, + { + "epoch": 2.07, + "learning_rate": 9.939908688859522e-06, + "loss": 0.0053, + "step": 697 + }, + { + "epoch": 2.07, + "learning_rate": 9.915872650461872e-06, + "loss": 0.0082, + "step": 698 + }, + { + "epoch": 2.07, + "learning_rate": 9.891837098119389e-06, + "loss": 0.001, + "step": 699 + }, + { + "epoch": 2.08, + "learning_rate": 9.86780217070017e-06, + "loss": 0.0031, + "step": 700 + }, + { + "epoch": 2.08, + "learning_rate": 9.843768007068696e-06, + "loss": 0.0014, + "step": 701 + }, + { + "epoch": 2.08, + "learning_rate": 9.819734746085034e-06, + "loss": 0.0027, + "step": 702 + }, + { + "epoch": 2.09, + "learning_rate": 9.795702526604041e-06, + "loss": 0.0022, + "step": 703 + }, + { + "epoch": 2.09, + "learning_rate": 9.771671487474546e-06, + "loss": 0.0026, + "step": 704 + }, + { + "epoch": 2.09, + "learning_rate": 9.747641767538568e-06, + "loss": 0.0075, + "step": 705 + }, + { + "epoch": 2.09, + "learning_rate": 9.723613505630504e-06, + "loss": 0.002, + "step": 706 + }, + { + "epoch": 2.1, + "learning_rate": 9.699586840576321e-06, + "loss": 0.0087, + "step": 707 + }, + { + "epoch": 2.1, + "learning_rate": 9.675561911192762e-06, + "loss": 0.0025, + "step": 708 + }, + { + "epoch": 2.1, + "learning_rate": 9.651538856286551e-06, + "loss": 0.0103, + "step": 709 + }, + { + "epoch": 2.11, + "learning_rate": 9.627517814653568e-06, + "loss": 0.0087, + "step": 710 + }, + { + "epoch": 2.11, + "learning_rate": 9.60349892507807e-06, + "loss": 0.0025, + "step": 711 + }, + { + "epoch": 2.11, + "learning_rate": 9.579482326331877e-06, + "loss": 0.0091, + "step": 712 + }, + { + "epoch": 2.12, + "learning_rate": 9.555468157173576e-06, + "loss": 0.001, + "step": 713 + }, + { + "epoch": 2.12, + "learning_rate": 9.531456556347713e-06, + "loss": 0.0025, + "step": 714 + }, + { + "epoch": 2.12, + "learning_rate": 9.507447662583998e-06, + "loss": 0.0041, + "step": 715 + }, + { + "epoch": 2.12, + "learning_rate": 9.483441614596495e-06, + "loss": 0.0099, + "step": 716 + }, + { + "epoch": 2.13, + "learning_rate": 9.459438551082841e-06, + "loss": 0.0037, + "step": 717 + }, + { + "epoch": 2.13, + "learning_rate": 9.435438610723415e-06, + "loss": 0.0023, + "step": 718 + }, + { + "epoch": 2.13, + "learning_rate": 9.411441932180555e-06, + "loss": 0.007, + "step": 719 + }, + { + "epoch": 2.14, + "learning_rate": 9.387448654097757e-06, + "loss": 0.0134, + "step": 720 + }, + { + "epoch": 2.14, + "learning_rate": 9.36345891509887e-06, + "loss": 0.0087, + "step": 721 + }, + { + "epoch": 2.14, + "learning_rate": 9.339472853787289e-06, + "loss": 0.0009, + "step": 722 + }, + { + "epoch": 2.15, + "learning_rate": 9.315490608745173e-06, + "loss": 0.0028, + "step": 723 + }, + { + "epoch": 2.15, + "learning_rate": 9.291512318532615e-06, + "loss": 0.0173, + "step": 724 + }, + { + "epoch": 2.15, + "learning_rate": 9.267538121686877e-06, + "loss": 0.0031, + "step": 725 + }, + { + "epoch": 2.15, + "learning_rate": 9.243568156721561e-06, + "loss": 0.0155, + "step": 726 + }, + { + "epoch": 2.16, + "learning_rate": 9.219602562125819e-06, + "loss": 0.004, + "step": 727 + }, + { + "epoch": 2.16, + "learning_rate": 9.195641476363556e-06, + "loss": 0.0014, + "step": 728 + }, + { + "epoch": 2.16, + "learning_rate": 9.171685037872621e-06, + "loss": 0.0014, + "step": 729 + }, + { + "epoch": 2.17, + "learning_rate": 9.147733385064021e-06, + "loss": 0.005, + "step": 730 + }, + { + "epoch": 2.17, + "learning_rate": 9.123786656321105e-06, + "loss": 0.0038, + "step": 731 + }, + { + "epoch": 2.17, + "learning_rate": 9.099844989998779e-06, + "loss": 0.0119, + "step": 732 + }, + { + "epoch": 2.18, + "learning_rate": 9.075908524422695e-06, + "loss": 0.0072, + "step": 733 + }, + { + "epoch": 2.18, + "learning_rate": 9.051977397888465e-06, + "loss": 0.0049, + "step": 734 + }, + { + "epoch": 2.18, + "learning_rate": 9.028051748660845e-06, + "loss": 0.0058, + "step": 735 + }, + { + "epoch": 2.18, + "learning_rate": 9.00413171497295e-06, + "loss": 0.0064, + "step": 736 + }, + { + "epoch": 2.19, + "learning_rate": 8.980217435025453e-06, + "loss": 0.004, + "step": 737 + }, + { + "epoch": 2.19, + "learning_rate": 8.956309046985775e-06, + "loss": 0.0125, + "step": 738 + }, + { + "epoch": 2.19, + "learning_rate": 8.93240668898731e-06, + "loss": 0.0074, + "step": 739 + }, + { + "epoch": 2.2, + "learning_rate": 8.908510499128598e-06, + "loss": 0.0028, + "step": 740 + }, + { + "epoch": 2.2, + "learning_rate": 8.884620615472545e-06, + "loss": 0.0024, + "step": 741 + }, + { + "epoch": 2.2, + "learning_rate": 8.860737176045633e-06, + "loss": 0.0084, + "step": 742 + }, + { + "epoch": 2.2, + "learning_rate": 8.836860318837107e-06, + "loss": 0.0038, + "step": 743 + }, + { + "epoch": 2.21, + "learning_rate": 8.81299018179817e-06, + "loss": 0.0053, + "step": 744 + }, + { + "epoch": 2.21, + "learning_rate": 8.789126902841217e-06, + "loss": 0.0012, + "step": 745 + }, + { + "epoch": 2.21, + "learning_rate": 8.765270619839004e-06, + "loss": 0.0083, + "step": 746 + }, + { + "epoch": 2.22, + "learning_rate": 8.741421470623884e-06, + "loss": 0.006, + "step": 747 + }, + { + "epoch": 2.22, + "learning_rate": 8.71757959298697e-06, + "loss": 0.0157, + "step": 748 + }, + { + "epoch": 2.22, + "learning_rate": 8.693745124677386e-06, + "loss": 0.005, + "step": 749 + }, + { + "epoch": 2.23, + "learning_rate": 8.669918203401441e-06, + "loss": 0.0039, + "step": 750 + }, + { + "epoch": 2.23, + "learning_rate": 8.64609896682183e-06, + "loss": 0.0026, + "step": 751 + }, + { + "epoch": 2.23, + "learning_rate": 8.622287552556863e-06, + "loss": 0.0097, + "step": 752 + }, + { + "epoch": 2.23, + "learning_rate": 8.59848409817964e-06, + "loss": 0.0209, + "step": 753 + }, + { + "epoch": 2.24, + "learning_rate": 8.57468874121729e-06, + "loss": 0.0093, + "step": 754 + }, + { + "epoch": 2.24, + "learning_rate": 8.550901619150143e-06, + "loss": 0.0174, + "step": 755 + }, + { + "epoch": 2.24, + "learning_rate": 8.527122869410962e-06, + "loss": 0.0081, + "step": 756 + }, + { + "epoch": 2.25, + "learning_rate": 8.503352629384131e-06, + "loss": 0.0084, + "step": 757 + }, + { + "epoch": 2.25, + "learning_rate": 8.479591036404862e-06, + "loss": 0.0034, + "step": 758 + }, + { + "epoch": 2.25, + "learning_rate": 8.455838227758432e-06, + "loss": 0.0034, + "step": 759 + }, + { + "epoch": 2.26, + "learning_rate": 8.43209434067934e-06, + "loss": 0.0102, + "step": 760 + }, + { + "epoch": 2.26, + "learning_rate": 8.408359512350548e-06, + "loss": 0.0016, + "step": 761 + }, + { + "epoch": 2.26, + "learning_rate": 8.384633879902685e-06, + "loss": 0.003, + "step": 762 + }, + { + "epoch": 2.26, + "learning_rate": 8.360917580413245e-06, + "loss": 0.0063, + "step": 763 + }, + { + "epoch": 2.27, + "learning_rate": 8.3372107509058e-06, + "loss": 0.0009, + "step": 764 + }, + { + "epoch": 2.27, + "learning_rate": 8.313513528349208e-06, + "loss": 0.0062, + "step": 765 + }, + { + "epoch": 2.27, + "learning_rate": 8.289826049656821e-06, + "loss": 0.0014, + "step": 766 + }, + { + "epoch": 2.28, + "learning_rate": 8.266148451685703e-06, + "loss": 0.0013, + "step": 767 + }, + { + "epoch": 2.28, + "learning_rate": 8.242480871235819e-06, + "loss": 0.0011, + "step": 768 + }, + { + "epoch": 2.28, + "learning_rate": 8.218823445049265e-06, + "loss": 0.0048, + "step": 769 + }, + { + "epoch": 2.28, + "learning_rate": 8.195176309809465e-06, + "loss": 0.0016, + "step": 770 + }, + { + "epoch": 2.29, + "learning_rate": 8.171539602140389e-06, + "loss": 0.002, + "step": 771 + }, + { + "epoch": 2.29, + "learning_rate": 8.147913458605755e-06, + "loss": 0.0025, + "step": 772 + }, + { + "epoch": 2.29, + "learning_rate": 8.124298015708254e-06, + "loss": 0.0036, + "step": 773 + }, + { + "epoch": 2.3, + "learning_rate": 8.100693409888748e-06, + "loss": 0.0088, + "step": 774 + }, + { + "epoch": 2.3, + "learning_rate": 8.07709977752548e-06, + "loss": 0.0009, + "step": 775 + }, + { + "epoch": 2.3, + "learning_rate": 8.05351725493331e-06, + "loss": 0.0033, + "step": 776 + }, + { + "epoch": 2.31, + "learning_rate": 8.029945978362899e-06, + "loss": 0.0006, + "step": 777 + }, + { + "epoch": 2.31, + "learning_rate": 8.006386083999925e-06, + "loss": 0.0013, + "step": 778 + }, + { + "epoch": 2.31, + "learning_rate": 7.982837707964322e-06, + "loss": 0.0047, + "step": 779 + }, + { + "epoch": 2.31, + "learning_rate": 7.959300986309459e-06, + "loss": 0.001, + "step": 780 + }, + { + "epoch": 2.32, + "learning_rate": 7.935776055021382e-06, + "loss": 0.001, + "step": 781 + }, + { + "epoch": 2.32, + "learning_rate": 7.91226305001801e-06, + "loss": 0.0014, + "step": 782 + }, + { + "epoch": 2.32, + "learning_rate": 7.888762107148357e-06, + "loss": 0.0021, + "step": 783 + }, + { + "epoch": 2.33, + "learning_rate": 7.86527336219176e-06, + "loss": 0.0033, + "step": 784 + }, + { + "epoch": 2.33, + "learning_rate": 7.841796950857056e-06, + "loss": 0.0009, + "step": 785 + }, + { + "epoch": 2.33, + "learning_rate": 7.818333008781847e-06, + "loss": 0.0141, + "step": 786 + }, + { + "epoch": 2.34, + "learning_rate": 7.794881671531678e-06, + "loss": 0.001, + "step": 787 + }, + { + "epoch": 2.34, + "learning_rate": 7.771443074599275e-06, + "loss": 0.0012, + "step": 788 + }, + { + "epoch": 2.34, + "learning_rate": 7.74801735340375e-06, + "loss": 0.0064, + "step": 789 + }, + { + "epoch": 2.34, + "learning_rate": 7.724604643289829e-06, + "loss": 0.0012, + "step": 790 + }, + { + "epoch": 2.35, + "learning_rate": 7.70120507952706e-06, + "loss": 0.0032, + "step": 791 + }, + { + "epoch": 2.35, + "learning_rate": 7.677818797309044e-06, + "loss": 0.0139, + "step": 792 + }, + { + "epoch": 2.35, + "learning_rate": 7.654445931752646e-06, + "loss": 0.0037, + "step": 793 + }, + { + "epoch": 2.36, + "learning_rate": 7.631086617897203e-06, + "loss": 0.0035, + "step": 794 + }, + { + "epoch": 2.36, + "learning_rate": 7.607740990703764e-06, + "loss": 0.0027, + "step": 795 + }, + { + "epoch": 2.36, + "learning_rate": 7.584409185054305e-06, + "loss": 0.0155, + "step": 796 + }, + { + "epoch": 2.36, + "learning_rate": 7.5610913357509395e-06, + "loss": 0.0165, + "step": 797 + }, + { + "epoch": 2.37, + "learning_rate": 7.537787577515152e-06, + "loss": 0.0016, + "step": 798 + }, + { + "epoch": 2.37, + "learning_rate": 7.51449804498701e-06, + "loss": 0.0064, + "step": 799 + }, + { + "epoch": 2.37, + "learning_rate": 7.4912228727243905e-06, + "loss": 0.0049, + "step": 800 + }, + { + "epoch": 2.38, + "learning_rate": 7.467962195202211e-06, + "loss": 0.0059, + "step": 801 + }, + { + "epoch": 2.38, + "learning_rate": 7.444716146811633e-06, + "loss": 0.0158, + "step": 802 + }, + { + "epoch": 2.38, + "learning_rate": 7.4214848618593006e-06, + "loss": 0.0017, + "step": 803 + }, + { + "epoch": 2.39, + "learning_rate": 7.3982684745665614e-06, + "loss": 0.0017, + "step": 804 + }, + { + "epoch": 2.39, + "learning_rate": 7.375067119068692e-06, + "loss": 0.0108, + "step": 805 + }, + { + "epoch": 2.39, + "learning_rate": 7.351880929414113e-06, + "loss": 0.0085, + "step": 806 + }, + { + "epoch": 2.39, + "learning_rate": 7.3287100395636355e-06, + "loss": 0.0017, + "step": 807 + }, + { + "epoch": 2.4, + "learning_rate": 7.305554583389658e-06, + "loss": 0.0075, + "step": 808 + }, + { + "epoch": 2.4, + "learning_rate": 7.282414694675426e-06, + "loss": 0.0066, + "step": 809 + }, + { + "epoch": 2.4, + "learning_rate": 7.259290507114234e-06, + "loss": 0.0007, + "step": 810 + }, + { + "epoch": 2.41, + "learning_rate": 7.236182154308658e-06, + "loss": 0.008, + "step": 811 + }, + { + "epoch": 2.41, + "learning_rate": 7.213089769769795e-06, + "loss": 0.0106, + "step": 812 + }, + { + "epoch": 2.41, + "learning_rate": 7.190013486916476e-06, + "loss": 0.003, + "step": 813 + }, + { + "epoch": 2.42, + "learning_rate": 7.1669534390745045e-06, + "loss": 0.0033, + "step": 814 + }, + { + "epoch": 2.42, + "learning_rate": 7.143909759475892e-06, + "loss": 0.0017, + "step": 815 + }, + { + "epoch": 2.42, + "learning_rate": 7.1208825812580665e-06, + "loss": 0.0059, + "step": 816 + }, + { + "epoch": 2.42, + "learning_rate": 7.0978720374631326e-06, + "loss": 0.0012, + "step": 817 + }, + { + "epoch": 2.43, + "learning_rate": 7.07487826103708e-06, + "loss": 0.0164, + "step": 818 + }, + { + "epoch": 2.43, + "learning_rate": 7.051901384829021e-06, + "loss": 0.01, + "step": 819 + }, + { + "epoch": 2.43, + "learning_rate": 7.028941541590428e-06, + "loss": 0.0066, + "step": 820 + }, + { + "epoch": 2.44, + "learning_rate": 7.005998863974363e-06, + "loss": 0.0043, + "step": 821 + }, + { + "epoch": 2.44, + "learning_rate": 6.983073484534715e-06, + "loss": 0.0013, + "step": 822 + }, + { + "epoch": 2.44, + "learning_rate": 6.960165535725423e-06, + "loss": 0.003, + "step": 823 + }, + { + "epoch": 2.45, + "learning_rate": 6.937275149899726e-06, + "loss": 0.0013, + "step": 824 + }, + { + "epoch": 2.45, + "learning_rate": 6.914402459309384e-06, + "loss": 0.0095, + "step": 825 + }, + { + "epoch": 2.45, + "learning_rate": 6.891547596103931e-06, + "loss": 0.0008, + "step": 826 + }, + { + "epoch": 2.45, + "learning_rate": 6.868710692329895e-06, + "loss": 0.0017, + "step": 827 + }, + { + "epoch": 2.46, + "learning_rate": 6.845891879930038e-06, + "loss": 0.0015, + "step": 828 + }, + { + "epoch": 2.46, + "learning_rate": 6.823091290742603e-06, + "loss": 0.0041, + "step": 829 + }, + { + "epoch": 2.46, + "learning_rate": 6.8003090565005445e-06, + "loss": 0.0081, + "step": 830 + }, + { + "epoch": 2.47, + "learning_rate": 6.777545308830765e-06, + "loss": 0.0009, + "step": 831 + }, + { + "epoch": 2.47, + "learning_rate": 6.754800179253367e-06, + "loss": 0.001, + "step": 832 + }, + { + "epoch": 2.47, + "learning_rate": 6.732073799180876e-06, + "loss": 0.0013, + "step": 833 + }, + { + "epoch": 2.47, + "learning_rate": 6.709366299917497e-06, + "loss": 0.003, + "step": 834 + }, + { + "epoch": 2.48, + "learning_rate": 6.686677812658351e-06, + "loss": 0.0016, + "step": 835 + }, + { + "epoch": 2.48, + "learning_rate": 6.664008468488703e-06, + "loss": 0.0059, + "step": 836 + }, + { + "epoch": 2.48, + "learning_rate": 6.6413583983832304e-06, + "loss": 0.0002, + "step": 837 + }, + { + "epoch": 2.49, + "learning_rate": 6.6187277332052415e-06, + "loss": 0.002, + "step": 838 + }, + { + "epoch": 2.49, + "learning_rate": 6.59611660370594e-06, + "loss": 0.0021, + "step": 839 + }, + { + "epoch": 2.49, + "learning_rate": 6.573525140523651e-06, + "loss": 0.0107, + "step": 840 + }, + { + "epoch": 2.5, + "learning_rate": 6.5509534741830845e-06, + "loss": 0.0079, + "step": 841 + }, + { + "epoch": 2.5, + "learning_rate": 6.52840173509456e-06, + "loss": 0.0004, + "step": 842 + }, + { + "epoch": 2.5, + "learning_rate": 6.505870053553279e-06, + "loss": 0.0038, + "step": 843 + }, + { + "epoch": 2.5, + "learning_rate": 6.483358559738551e-06, + "loss": 0.0011, + "step": 844 + }, + { + "epoch": 2.51, + "learning_rate": 6.460867383713046e-06, + "loss": 0.0067, + "step": 845 + }, + { + "epoch": 2.51, + "learning_rate": 6.438396655422052e-06, + "loss": 0.0011, + "step": 846 + }, + { + "epoch": 2.51, + "learning_rate": 6.415946504692714e-06, + "loss": 0.0011, + "step": 847 + }, + { + "epoch": 2.52, + "learning_rate": 6.393517061233286e-06, + "loss": 0.0028, + "step": 848 + }, + { + "epoch": 2.52, + "learning_rate": 6.371108454632391e-06, + "loss": 0.0016, + "step": 849 + }, + { + "epoch": 2.52, + "learning_rate": 6.348720814358251e-06, + "loss": 0.0033, + "step": 850 + }, + { + "epoch": 2.53, + "learning_rate": 6.32635426975797e-06, + "loss": 0.0011, + "step": 851 + }, + { + "epoch": 2.53, + "learning_rate": 6.304008950056761e-06, + "loss": 0.0053, + "step": 852 + }, + { + "epoch": 2.53, + "learning_rate": 6.281684984357204e-06, + "loss": 0.0016, + "step": 853 + }, + { + "epoch": 2.53, + "learning_rate": 6.259382501638509e-06, + "loss": 0.0008, + "step": 854 + }, + { + "epoch": 2.54, + "learning_rate": 6.237101630755762e-06, + "loss": 0.0057, + "step": 855 + }, + { + "epoch": 2.54, + "learning_rate": 6.214842500439191e-06, + "loss": 0.0013, + "step": 856 + }, + { + "epoch": 2.54, + "learning_rate": 6.192605239293407e-06, + "loss": 0.0008, + "step": 857 + }, + { + "epoch": 2.55, + "learning_rate": 6.170389975796671e-06, + "loss": 0.0034, + "step": 858 + }, + { + "epoch": 2.55, + "learning_rate": 6.14819683830016e-06, + "loss": 0.0053, + "step": 859 + }, + { + "epoch": 2.55, + "learning_rate": 6.126025955027204e-06, + "loss": 0.0016, + "step": 860 + }, + { + "epoch": 2.55, + "learning_rate": 6.10387745407256e-06, + "loss": 0.0004, + "step": 861 + }, + { + "epoch": 2.56, + "learning_rate": 6.0817514634016695e-06, + "loss": 0.0008, + "step": 862 + }, + { + "epoch": 2.56, + "learning_rate": 6.05964811084992e-06, + "loss": 0.0091, + "step": 863 + }, + { + "epoch": 2.56, + "learning_rate": 6.0375675241219e-06, + "loss": 0.0051, + "step": 864 + }, + { + "epoch": 2.57, + "learning_rate": 6.015509830790672e-06, + "loss": 0.0004, + "step": 865 + }, + { + "epoch": 2.57, + "learning_rate": 5.993475158297027e-06, + "loss": 0.0012, + "step": 866 + }, + { + "epoch": 2.57, + "learning_rate": 5.97146363394874e-06, + "loss": 0.0102, + "step": 867 + }, + { + "epoch": 2.58, + "learning_rate": 5.949475384919865e-06, + "loss": 0.0004, + "step": 868 + }, + { + "epoch": 2.58, + "learning_rate": 5.92751053824997e-06, + "loss": 0.0017, + "step": 869 + }, + { + "epoch": 2.58, + "learning_rate": 5.905569220843406e-06, + "loss": 0.0144, + "step": 870 + }, + { + "epoch": 2.58, + "learning_rate": 5.8836515594685975e-06, + "loss": 0.0007, + "step": 871 + }, + { + "epoch": 2.59, + "learning_rate": 5.861757680757275e-06, + "loss": 0.0033, + "step": 872 + }, + { + "epoch": 2.59, + "learning_rate": 5.839887711203778e-06, + "loss": 0.0009, + "step": 873 + }, + { + "epoch": 2.59, + "learning_rate": 5.8180417771643e-06, + "loss": 0.0012, + "step": 874 + }, + { + "epoch": 2.6, + "learning_rate": 5.796220004856161e-06, + "loss": 0.0003, + "step": 875 + }, + { + "epoch": 2.6, + "learning_rate": 5.774422520357101e-06, + "loss": 0.0096, + "step": 876 + }, + { + "epoch": 2.6, + "learning_rate": 5.752649449604526e-06, + "loss": 0.0007, + "step": 877 + }, + { + "epoch": 2.61, + "learning_rate": 5.730900918394777e-06, + "loss": 0.0013, + "step": 878 + }, + { + "epoch": 2.61, + "learning_rate": 5.709177052382432e-06, + "loss": 0.0021, + "step": 879 + }, + { + "epoch": 2.61, + "learning_rate": 5.687477977079555e-06, + "loss": 0.0016, + "step": 880 + }, + { + "epoch": 2.61, + "learning_rate": 5.665803817854985e-06, + "loss": 0.0019, + "step": 881 + }, + { + "epoch": 2.62, + "learning_rate": 5.644154699933593e-06, + "loss": 0.0019, + "step": 882 + }, + { + "epoch": 2.62, + "learning_rate": 5.622530748395581e-06, + "loss": 0.0022, + "step": 883 + }, + { + "epoch": 2.62, + "learning_rate": 5.60093208817575e-06, + "loss": 0.005, + "step": 884 + }, + { + "epoch": 2.63, + "learning_rate": 5.579358844062774e-06, + "loss": 0.0013, + "step": 885 + }, + { + "epoch": 2.63, + "learning_rate": 5.557811140698486e-06, + "loss": 0.0012, + "step": 886 + }, + { + "epoch": 2.63, + "learning_rate": 5.536289102577152e-06, + "loss": 0.0005, + "step": 887 + }, + { + "epoch": 2.64, + "learning_rate": 5.5147928540447486e-06, + "loss": 0.0099, + "step": 888 + }, + { + "epoch": 2.64, + "learning_rate": 5.4933225192982586e-06, + "loss": 0.0017, + "step": 889 + }, + { + "epoch": 2.64, + "learning_rate": 5.471878222384944e-06, + "loss": 0.0004, + "step": 890 + }, + { + "epoch": 2.64, + "learning_rate": 5.450460087201632e-06, + "loss": 0.0002, + "step": 891 + }, + { + "epoch": 2.65, + "learning_rate": 5.429068237493982e-06, + "loss": 0.0044, + "step": 892 + }, + { + "epoch": 2.65, + "learning_rate": 5.407702796855809e-06, + "loss": 0.0015, + "step": 893 + }, + { + "epoch": 2.65, + "learning_rate": 5.386363888728337e-06, + "loss": 0.0119, + "step": 894 + }, + { + "epoch": 2.66, + "learning_rate": 5.365051636399485e-06, + "loss": 0.0002, + "step": 895 + }, + { + "epoch": 2.66, + "learning_rate": 5.343766163003181e-06, + "loss": 0.0006, + "step": 896 + }, + { + "epoch": 2.66, + "learning_rate": 5.322507591518627e-06, + "loss": 0.0014, + "step": 897 + }, + { + "epoch": 2.66, + "learning_rate": 5.301276044769599e-06, + "loss": 0.0009, + "step": 898 + }, + { + "epoch": 2.67, + "learning_rate": 5.2800716454237266e-06, + "loss": 0.0007, + "step": 899 + }, + { + "epoch": 2.67, + "learning_rate": 5.2588945159918e-06, + "loss": 0.004, + "step": 900 + }, + { + "epoch": 2.67, + "learning_rate": 5.237744778827063e-06, + "loss": 0.0083, + "step": 901 + }, + { + "epoch": 2.68, + "learning_rate": 5.216622556124475e-06, + "loss": 0.0066, + "step": 902 + }, + { + "epoch": 2.68, + "learning_rate": 5.195527969920047e-06, + "loss": 0.0017, + "step": 903 + }, + { + "epoch": 2.68, + "learning_rate": 5.174461142090112e-06, + "loss": 0.001, + "step": 904 + }, + { + "epoch": 2.69, + "learning_rate": 5.153422194350614e-06, + "loss": 0.0012, + "step": 905 + }, + { + "epoch": 2.69, + "learning_rate": 5.1324112482564345e-06, + "loss": 0.0031, + "step": 906 + }, + { + "epoch": 2.69, + "learning_rate": 5.111428425200662e-06, + "loss": 0.0006, + "step": 907 + }, + { + "epoch": 2.69, + "learning_rate": 5.090473846413908e-06, + "loss": 0.0004, + "step": 908 + }, + { + "epoch": 2.7, + "learning_rate": 5.0695476329635825e-06, + "loss": 0.0022, + "step": 909 + }, + { + "epoch": 2.7, + "learning_rate": 5.048649905753238e-06, + "loss": 0.0002, + "step": 910 + }, + { + "epoch": 2.7, + "learning_rate": 5.027780785521828e-06, + "loss": 0.0002, + "step": 911 + }, + { + "epoch": 2.71, + "learning_rate": 5.006940392843022e-06, + "loss": 0.0008, + "step": 912 + }, + { + "epoch": 2.71, + "learning_rate": 4.986128848124523e-06, + "loss": 0.0206, + "step": 913 + }, + { + "epoch": 2.71, + "learning_rate": 4.965346271607359e-06, + "loss": 0.0045, + "step": 914 + }, + { + "epoch": 2.72, + "learning_rate": 4.944592783365194e-06, + "loss": 0.0027, + "step": 915 + }, + { + "epoch": 2.72, + "learning_rate": 4.923868503303619e-06, + "loss": 0.0007, + "step": 916 + }, + { + "epoch": 2.72, + "learning_rate": 4.903173551159481e-06, + "loss": 0.003, + "step": 917 + }, + { + "epoch": 2.72, + "learning_rate": 4.882508046500191e-06, + "loss": 0.0019, + "step": 918 + }, + { + "epoch": 2.73, + "learning_rate": 4.861872108723001e-06, + "loss": 0.0072, + "step": 919 + }, + { + "epoch": 2.73, + "learning_rate": 4.841265857054356e-06, + "loss": 0.0023, + "step": 920 + }, + { + "epoch": 2.73, + "learning_rate": 4.820689410549181e-06, + "loss": 0.0047, + "step": 921 + }, + { + "epoch": 2.74, + "learning_rate": 4.80014288809019e-06, + "loss": 0.0046, + "step": 922 + }, + { + "epoch": 2.74, + "learning_rate": 4.7796264083872194e-06, + "loss": 0.0024, + "step": 923 + }, + { + "epoch": 2.74, + "learning_rate": 4.759140089976524e-06, + "loss": 0.0018, + "step": 924 + }, + { + "epoch": 2.74, + "learning_rate": 4.7386840512201e-06, + "loss": 0.0067, + "step": 925 + }, + { + "epoch": 2.75, + "learning_rate": 4.718258410304999e-06, + "loss": 0.012, + "step": 926 + }, + { + "epoch": 2.75, + "learning_rate": 4.697863285242645e-06, + "loss": 0.003, + "step": 927 + }, + { + "epoch": 2.75, + "learning_rate": 4.67749879386816e-06, + "loss": 0.0001, + "step": 928 + }, + { + "epoch": 2.76, + "learning_rate": 4.6571650538396615e-06, + "loss": 0.0044, + "step": 929 + }, + { + "epoch": 2.76, + "learning_rate": 4.6368621826376115e-06, + "loss": 0.0038, + "step": 930 + }, + { + "epoch": 2.76, + "learning_rate": 4.616590297564116e-06, + "loss": 0.0051, + "step": 931 + }, + { + "epoch": 2.77, + "learning_rate": 4.596349515742266e-06, + "loss": 0.0013, + "step": 932 + }, + { + "epoch": 2.77, + "learning_rate": 4.576139954115432e-06, + "loss": 0.0007, + "step": 933 + }, + { + "epoch": 2.77, + "learning_rate": 4.555961729446617e-06, + "loss": 0.0026, + "step": 934 + }, + { + "epoch": 2.77, + "learning_rate": 4.535814958317783e-06, + "loss": 0.0019, + "step": 935 + }, + { + "epoch": 2.78, + "learning_rate": 4.5156997571291425e-06, + "loss": 0.0008, + "step": 936 + }, + { + "epoch": 2.78, + "learning_rate": 4.495616242098523e-06, + "loss": 0.0004, + "step": 937 + }, + { + "epoch": 2.78, + "learning_rate": 4.475564529260681e-06, + "loss": 0.0001, + "step": 938 + }, + { + "epoch": 2.79, + "learning_rate": 4.455544734466624e-06, + "loss": 0.0006, + "step": 939 + }, + { + "epoch": 2.79, + "learning_rate": 4.43555697338296e-06, + "loss": 0.0004, + "step": 940 + }, + { + "epoch": 2.79, + "learning_rate": 4.415601361491213e-06, + "loss": 0.0101, + "step": 941 + }, + { + "epoch": 2.8, + "learning_rate": 4.395678014087162e-06, + "loss": 0.0009, + "step": 942 + }, + { + "epoch": 2.8, + "learning_rate": 4.375787046280177e-06, + "loss": 0.0018, + "step": 943 + }, + { + "epoch": 2.8, + "learning_rate": 4.355928572992547e-06, + "loss": 0.0018, + "step": 944 + }, + { + "epoch": 2.8, + "learning_rate": 4.336102708958827e-06, + "loss": 0.0029, + "step": 945 + }, + { + "epoch": 2.81, + "learning_rate": 4.316309568725156e-06, + "loss": 0.0014, + "step": 946 + }, + { + "epoch": 2.81, + "learning_rate": 4.296549266648621e-06, + "loss": 0.003, + "step": 947 + }, + { + "epoch": 2.81, + "learning_rate": 4.276821916896574e-06, + "loss": 0.0038, + "step": 948 + }, + { + "epoch": 2.82, + "learning_rate": 4.25712763344599e-06, + "loss": 0.0051, + "step": 949 + }, + { + "epoch": 2.82, + "learning_rate": 4.2374665300827865e-06, + "loss": 0.0064, + "step": 950 + }, + { + "epoch": 2.82, + "learning_rate": 4.217838720401188e-06, + "loss": 0.0024, + "step": 951 + }, + { + "epoch": 2.82, + "learning_rate": 4.198244317803071e-06, + "loss": 0.0007, + "step": 952 + }, + { + "epoch": 2.83, + "learning_rate": 4.178683435497275e-06, + "loss": 0.0045, + "step": 953 + }, + { + "epoch": 2.83, + "learning_rate": 4.1591561864989905e-06, + "loss": 0.0076, + "step": 954 + }, + { + "epoch": 2.83, + "learning_rate": 4.139662683629087e-06, + "loss": 0.0025, + "step": 955 + }, + { + "epoch": 2.84, + "learning_rate": 4.120203039513448e-06, + "loss": 0.0006, + "step": 956 + }, + { + "epoch": 2.84, + "learning_rate": 4.100777366582349e-06, + "loss": 0.0001, + "step": 957 + }, + { + "epoch": 2.84, + "learning_rate": 4.081385777069789e-06, + "loss": 0.0008, + "step": 958 + }, + { + "epoch": 2.85, + "learning_rate": 4.062028383012841e-06, + "loss": 0.0004, + "step": 959 + }, + { + "epoch": 2.85, + "learning_rate": 4.042705296251018e-06, + "loss": 0.0001, + "step": 960 + }, + { + "epoch": 2.85, + "learning_rate": 4.023416628425614e-06, + "loss": 0.0063, + "step": 961 + }, + { + "epoch": 2.85, + "learning_rate": 4.004162490979067e-06, + "loss": 0.0008, + "step": 962 + }, + { + "epoch": 2.86, + "learning_rate": 3.984942995154305e-06, + "loss": 0.0056, + "step": 963 + }, + { + "epoch": 2.86, + "learning_rate": 3.965758251994115e-06, + "loss": 0.0115, + "step": 964 + }, + { + "epoch": 2.86, + "learning_rate": 3.946608372340498e-06, + "loss": 0.0006, + "step": 965 + }, + { + "epoch": 2.87, + "learning_rate": 3.927493466834028e-06, + "loss": 0.0015, + "step": 966 + }, + { + "epoch": 2.87, + "learning_rate": 3.908413645913201e-06, + "loss": 0.0149, + "step": 967 + }, + { + "epoch": 2.87, + "learning_rate": 3.889369019813823e-06, + "loss": 0.0024, + "step": 968 + }, + { + "epoch": 2.88, + "learning_rate": 3.870359698568355e-06, + "loss": 0.0037, + "step": 969 + }, + { + "epoch": 2.88, + "learning_rate": 3.851385792005269e-06, + "loss": 0.0009, + "step": 970 + }, + { + "epoch": 2.88, + "learning_rate": 3.8324474097484335e-06, + "loss": 0.0009, + "step": 971 + }, + { + "epoch": 2.88, + "learning_rate": 3.813544661216475e-06, + "loss": 0.0003, + "step": 972 + }, + { + "epoch": 2.89, + "learning_rate": 3.79467765562213e-06, + "loss": 0.0052, + "step": 973 + }, + { + "epoch": 2.89, + "learning_rate": 3.775846501971636e-06, + "loss": 0.0029, + "step": 974 + }, + { + "epoch": 2.89, + "learning_rate": 3.7570513090640915e-06, + "loss": 0.0025, + "step": 975 + }, + { + "epoch": 2.9, + "learning_rate": 3.738292185490825e-06, + "loss": 0.0018, + "step": 976 + }, + { + "epoch": 2.9, + "learning_rate": 3.7195692396347693e-06, + "loss": 0.0001, + "step": 977 + }, + { + "epoch": 2.9, + "learning_rate": 3.7008825796698388e-06, + "loss": 0.0004, + "step": 978 + }, + { + "epoch": 2.91, + "learning_rate": 3.6822323135603054e-06, + "loss": 0.0002, + "step": 979 + }, + { + "epoch": 2.91, + "learning_rate": 3.6636185490601607e-06, + "loss": 0.0001, + "step": 980 + }, + { + "epoch": 2.91, + "learning_rate": 3.645041393712514e-06, + "loss": 0.0007, + "step": 981 + }, + { + "epoch": 2.91, + "learning_rate": 3.626500954848957e-06, + "loss": 0.0004, + "step": 982 + }, + { + "epoch": 2.92, + "learning_rate": 3.6079973395889545e-06, + "loss": 0.0024, + "step": 983 + }, + { + "epoch": 2.92, + "learning_rate": 3.5895306548392006e-06, + "loss": 0.0177, + "step": 984 + }, + { + "epoch": 2.92, + "learning_rate": 3.571101007293043e-06, + "loss": 0.0017, + "step": 985 + }, + { + "epoch": 2.93, + "learning_rate": 3.5527085034298314e-06, + "loss": 0.0011, + "step": 986 + }, + { + "epoch": 2.93, + "learning_rate": 3.5343532495143075e-06, + "loss": 0.0002, + "step": 987 + }, + { + "epoch": 2.93, + "learning_rate": 3.5160353515960053e-06, + "loss": 0.005, + "step": 988 + }, + { + "epoch": 2.93, + "learning_rate": 3.497754915508632e-06, + "loss": 0.002, + "step": 989 + }, + { + "epoch": 2.94, + "learning_rate": 3.479512046869452e-06, + "loss": 0.0014, + "step": 990 + }, + { + "epoch": 2.94, + "learning_rate": 3.461306851078674e-06, + "loss": 0.0122, + "step": 991 + }, + { + "epoch": 2.94, + "learning_rate": 3.443139433318855e-06, + "loss": 0.0039, + "step": 992 + }, + { + "epoch": 2.95, + "learning_rate": 3.4250098985542847e-06, + "loss": 0.0077, + "step": 993 + }, + { + "epoch": 2.95, + "learning_rate": 3.406918351530376e-06, + "loss": 0.0011, + "step": 994 + }, + { + "epoch": 2.95, + "learning_rate": 3.3888648967730654e-06, + "loss": 0.0015, + "step": 995 + }, + { + "epoch": 2.96, + "learning_rate": 3.3708496385882117e-06, + "loss": 0.0003, + "step": 996 + }, + { + "epoch": 2.96, + "learning_rate": 3.352872681060976e-06, + "loss": 0.0008, + "step": 997 + }, + { + "epoch": 2.96, + "learning_rate": 3.334934128055245e-06, + "loss": 0.0006, + "step": 998 + }, + { + "epoch": 2.96, + "learning_rate": 3.3170340832130134e-06, + "loss": 0.0013, + "step": 999 + }, + { + "epoch": 2.97, + "learning_rate": 3.2991726499537967e-06, + "loss": 0.0017, + "step": 1000 + }, + { + "epoch": 2.97, + "learning_rate": 3.281349931474015e-06, + "loss": 0.0128, + "step": 1001 + }, + { + "epoch": 2.97, + "learning_rate": 3.2635660307464247e-06, + "loss": 0.0013, + "step": 1002 + }, + { + "epoch": 2.98, + "learning_rate": 3.245821050519501e-06, + "loss": 0.0003, + "step": 1003 + }, + { + "epoch": 2.98, + "learning_rate": 3.228115093316848e-06, + "loss": 0.0062, + "step": 1004 + }, + { + "epoch": 2.98, + "learning_rate": 3.210448261436615e-06, + "loss": 0.0008, + "step": 1005 + }, + { + "epoch": 2.99, + "learning_rate": 3.1928206569508992e-06, + "loss": 0.002, + "step": 1006 + }, + { + "epoch": 2.99, + "learning_rate": 3.175232381705161e-06, + "loss": 0.0017, + "step": 1007 + }, + { + "epoch": 2.99, + "learning_rate": 3.1576835373176206e-06, + "loss": 0.0032, + "step": 1008 + }, + { + "epoch": 2.99, + "learning_rate": 3.1401742251786926e-06, + "loss": 0.0043, + "step": 1009 + }, + { + "epoch": 3.0, + "learning_rate": 3.1227045464503856e-06, + "loss": 0.0013, + "step": 1010 + }, + { + "epoch": 3.0, + "learning_rate": 3.105274602065723e-06, + "loss": 0.0119, + "step": 1011 + }, + { + "epoch": 3.0, + "learning_rate": 3.087884492728158e-06, + "loss": 0.0046, + "step": 1012 + }, + { + "epoch": 3.01, + "learning_rate": 3.0705343189109948e-06, + "loss": 0.0002, + "step": 1013 + }, + { + "epoch": 3.01, + "learning_rate": 3.0532241808567966e-06, + "loss": 0.0004, + "step": 1014 + }, + { + "epoch": 3.01, + "learning_rate": 3.0359541785768255e-06, + "loss": 0.0009, + "step": 1015 + }, + { + "epoch": 3.01, + "learning_rate": 3.018724411850451e-06, + "loss": 0.0022, + "step": 1016 + }, + { + "epoch": 3.02, + "learning_rate": 3.0015349802245817e-06, + "loss": 0.0002, + "step": 1017 + }, + { + "epoch": 3.02, + "learning_rate": 2.9843859830130696e-06, + "loss": 0.0001, + "step": 1018 + }, + { + "epoch": 3.02, + "learning_rate": 2.9672775192961755e-06, + "loss": 0.0022, + "step": 1019 + }, + { + "epoch": 3.03, + "learning_rate": 2.9502096879199606e-06, + "loss": 0.0003, + "step": 1020 + }, + { + "epoch": 3.03, + "learning_rate": 2.933182587495724e-06, + "loss": 0.0003, + "step": 1021 + }, + { + "epoch": 3.03, + "learning_rate": 2.9161963163994454e-06, + "loss": 0.0004, + "step": 1022 + }, + { + "epoch": 3.04, + "learning_rate": 2.899250972771207e-06, + "loss": 0.0002, + "step": 1023 + }, + { + "epoch": 3.04, + "learning_rate": 2.882346654514627e-06, + "loss": 0.0002, + "step": 1024 + }, + { + "epoch": 3.04, + "learning_rate": 2.8654834592962923e-06, + "loss": 0.0005, + "step": 1025 + }, + { + "epoch": 3.04, + "learning_rate": 2.8486614845451965e-06, + "loss": 0.0002, + "step": 1026 + }, + { + "epoch": 3.05, + "learning_rate": 2.8318808274521915e-06, + "loss": 0.0001, + "step": 1027 + }, + { + "epoch": 3.05, + "learning_rate": 2.8151415849693886e-06, + "loss": 0.0004, + "step": 1028 + }, + { + "epoch": 3.05, + "learning_rate": 2.7984438538096393e-06, + "loss": 0.0006, + "step": 1029 + }, + { + "epoch": 3.06, + "learning_rate": 2.781787730445953e-06, + "loss": 0.0029, + "step": 1030 + }, + { + "epoch": 3.06, + "learning_rate": 2.7651733111109415e-06, + "loss": 0.0011, + "step": 1031 + }, + { + "epoch": 3.06, + "learning_rate": 2.7486006917962727e-06, + "loss": 0.0003, + "step": 1032 + }, + { + "epoch": 3.07, + "learning_rate": 2.732069968252108e-06, + "loss": 0.0002, + "step": 1033 + }, + { + "epoch": 3.07, + "learning_rate": 2.715581235986552e-06, + "loss": 0.0019, + "step": 1034 + }, + { + "epoch": 3.07, + "learning_rate": 2.699134590265099e-06, + "loss": 0.0011, + "step": 1035 + }, + { + "epoch": 3.07, + "learning_rate": 2.6827301261100846e-06, + "loss": 0.0018, + "step": 1036 + }, + { + "epoch": 3.08, + "learning_rate": 2.666367938300137e-06, + "loss": 0.0029, + "step": 1037 + }, + { + "epoch": 3.08, + "learning_rate": 2.6500481213696227e-06, + "loss": 0.0002, + "step": 1038 + }, + { + "epoch": 3.08, + "learning_rate": 2.6337707696081093e-06, + "loss": 0.0001, + "step": 1039 + }, + { + "epoch": 3.09, + "learning_rate": 2.6175359770598195e-06, + "loss": 0.0006, + "step": 1040 + }, + { + "epoch": 3.09, + "learning_rate": 2.6013438375230836e-06, + "loss": 0.0009, + "step": 1041 + }, + { + "epoch": 3.09, + "learning_rate": 2.5851944445497955e-06, + "loss": 0.0004, + "step": 1042 + }, + { + "epoch": 3.09, + "learning_rate": 2.569087891444877e-06, + "loss": 0.0002, + "step": 1043 + }, + { + "epoch": 3.1, + "learning_rate": 2.5530242712657494e-06, + "loss": 0.0005, + "step": 1044 + }, + { + "epoch": 3.1, + "learning_rate": 2.5370036768217666e-06, + "loss": 0.0006, + "step": 1045 + }, + { + "epoch": 3.1, + "learning_rate": 2.5210262006737074e-06, + "loss": 0.0009, + "step": 1046 + }, + { + "epoch": 3.11, + "learning_rate": 2.5050919351332303e-06, + "loss": 0.0002, + "step": 1047 + }, + { + "epoch": 3.11, + "learning_rate": 2.48920097226233e-06, + "loss": 0.0008, + "step": 1048 + }, + { + "epoch": 3.11, + "learning_rate": 2.473353403872826e-06, + "loss": 0.0011, + "step": 1049 + }, + { + "epoch": 3.12, + "learning_rate": 2.457549321525815e-06, + "loss": 0.0003, + "step": 1050 + }, + { + "epoch": 3.12, + "learning_rate": 2.4417888165311533e-06, + "loss": 0.0008, + "step": 1051 + }, + { + "epoch": 3.12, + "learning_rate": 2.426071979946918e-06, + "loss": 0.0001, + "step": 1052 + }, + { + "epoch": 3.12, + "learning_rate": 2.4103989025788923e-06, + "loss": 0.0011, + "step": 1053 + }, + { + "epoch": 3.13, + "learning_rate": 2.394769674980035e-06, + "loss": 0.0006, + "step": 1054 + }, + { + "epoch": 3.13, + "learning_rate": 2.379184387449952e-06, + "loss": 0.0001, + "step": 1055 + }, + { + "epoch": 3.13, + "learning_rate": 2.3636431300343875e-06, + "loss": 0.0004, + "step": 1056 + }, + { + "epoch": 3.14, + "learning_rate": 2.348145992524694e-06, + "loss": 0.0008, + "step": 1057 + }, + { + "epoch": 3.14, + "learning_rate": 2.332693064457321e-06, + "loss": 0.0002, + "step": 1058 + }, + { + "epoch": 3.14, + "learning_rate": 2.3172844351132786e-06, + "loss": 0.0007, + "step": 1059 + }, + { + "epoch": 3.15, + "learning_rate": 2.301920193517655e-06, + "loss": 0.0007, + "step": 1060 + }, + { + "epoch": 3.15, + "learning_rate": 2.286600428439074e-06, + "loss": 0.0004, + "step": 1061 + }, + { + "epoch": 3.15, + "learning_rate": 2.2713252283891873e-06, + "loss": 0.0002, + "step": 1062 + }, + { + "epoch": 3.15, + "learning_rate": 2.256094681622174e-06, + "loss": 0.0005, + "step": 1063 + }, + { + "epoch": 3.16, + "learning_rate": 2.2409088761342234e-06, + "loss": 0.0002, + "step": 1064 + }, + { + "epoch": 3.16, + "learning_rate": 2.2257678996630193e-06, + "loss": 0.0063, + "step": 1065 + }, + { + "epoch": 3.16, + "learning_rate": 2.210671839687252e-06, + "loss": 0.0002, + "step": 1066 + }, + { + "epoch": 3.17, + "learning_rate": 2.195620783426092e-06, + "loss": 0.0002, + "step": 1067 + }, + { + "epoch": 3.17, + "learning_rate": 2.1806148178387043e-06, + "loss": 0.0003, + "step": 1068 + }, + { + "epoch": 3.17, + "learning_rate": 2.1656540296237316e-06, + "loss": 0.0001, + "step": 1069 + }, + { + "epoch": 3.18, + "learning_rate": 2.150738505218801e-06, + "loss": 0.001, + "step": 1070 + }, + { + "epoch": 3.18, + "learning_rate": 2.1358683308000263e-06, + "loss": 0.0009, + "step": 1071 + }, + { + "epoch": 3.18, + "learning_rate": 2.1210435922814964e-06, + "loss": 0.0003, + "step": 1072 + }, + { + "epoch": 3.18, + "learning_rate": 2.1062643753148003e-06, + "loss": 0.0002, + "step": 1073 + }, + { + "epoch": 3.19, + "learning_rate": 2.0915307652885163e-06, + "loss": 0.0009, + "step": 1074 + }, + { + "epoch": 3.19, + "learning_rate": 2.076842847327727e-06, + "loss": 0.0007, + "step": 1075 + }, + { + "epoch": 3.19, + "learning_rate": 2.0622007062935123e-06, + "loss": 0.0005, + "step": 1076 + }, + { + "epoch": 3.2, + "learning_rate": 2.047604426782489e-06, + "loss": 0.0001, + "step": 1077 + }, + { + "epoch": 3.2, + "learning_rate": 2.033054093126294e-06, + "loss": 0.0007, + "step": 1078 + }, + { + "epoch": 3.2, + "learning_rate": 2.018549789391102e-06, + "loss": 0.0004, + "step": 1079 + }, + { + "epoch": 3.2, + "learning_rate": 2.0040915993771513e-06, + "loss": 0.0002, + "step": 1080 + }, + { + "epoch": 3.21, + "learning_rate": 1.9896796066182567e-06, + "loss": 0.0003, + "step": 1081 + }, + { + "epoch": 3.21, + "learning_rate": 1.975313894381311e-06, + "loss": 0.0013, + "step": 1082 + }, + { + "epoch": 3.21, + "learning_rate": 1.9609945456658276e-06, + "loss": 0.0003, + "step": 1083 + }, + { + "epoch": 3.22, + "learning_rate": 1.946721643203443e-06, + "loss": 0.0004, + "step": 1084 + }, + { + "epoch": 3.22, + "learning_rate": 1.9324952694574493e-06, + "loss": 0.0003, + "step": 1085 + }, + { + "epoch": 3.22, + "learning_rate": 1.9183155066223113e-06, + "loss": 0.0001, + "step": 1086 + }, + { + "epoch": 3.23, + "learning_rate": 1.9041824366231953e-06, + "loss": 0.0003, + "step": 1087 + }, + { + "epoch": 3.23, + "learning_rate": 1.890096141115495e-06, + "loss": 0.0008, + "step": 1088 + }, + { + "epoch": 3.23, + "learning_rate": 1.8760567014843545e-06, + "loss": 0.0001, + "step": 1089 + }, + { + "epoch": 3.23, + "learning_rate": 1.862064198844208e-06, + "loss": 0.0001, + "step": 1090 + }, + { + "epoch": 3.24, + "learning_rate": 1.848118714038305e-06, + "loss": 0.001, + "step": 1091 + }, + { + "epoch": 3.24, + "learning_rate": 1.8342203276382454e-06, + "loss": 0.0008, + "step": 1092 + }, + { + "epoch": 3.24, + "learning_rate": 1.8203691199435037e-06, + "loss": 0.0003, + "step": 1093 + }, + { + "epoch": 3.25, + "learning_rate": 1.8065651709809906e-06, + "loss": 0.0003, + "step": 1094 + }, + { + "epoch": 3.25, + "learning_rate": 1.792808560504563e-06, + "loss": 0.0001, + "step": 1095 + }, + { + "epoch": 3.25, + "learning_rate": 1.7790993679945723e-06, + "loss": 0.0009, + "step": 1096 + }, + { + "epoch": 3.26, + "learning_rate": 1.765437672657414e-06, + "loss": 0.0001, + "step": 1097 + }, + { + "epoch": 3.26, + "learning_rate": 1.7518235534250639e-06, + "loss": 0.0004, + "step": 1098 + }, + { + "epoch": 3.26, + "learning_rate": 1.7382570889546124e-06, + "loss": 0.0004, + "step": 1099 + }, + { + "epoch": 3.26, + "learning_rate": 1.7247383576278277e-06, + "loss": 0.0004, + "step": 1100 + }, + { + "epoch": 3.27, + "learning_rate": 1.7112674375506931e-06, + "loss": 0.0011, + "step": 1101 + }, + { + "epoch": 3.27, + "learning_rate": 1.6978444065529554e-06, + "loss": 0.0001, + "step": 1102 + }, + { + "epoch": 3.27, + "learning_rate": 1.6844693421876778e-06, + "loss": 0.0003, + "step": 1103 + }, + { + "epoch": 3.28, + "learning_rate": 1.6711423217307888e-06, + "loss": 0.0002, + "step": 1104 + }, + { + "epoch": 3.28, + "learning_rate": 1.6578634221806445e-06, + "loss": 0.0001, + "step": 1105 + }, + { + "epoch": 3.28, + "learning_rate": 1.6446327202575662e-06, + "loss": 0.0001, + "step": 1106 + }, + { + "epoch": 3.28, + "learning_rate": 1.6314502924034158e-06, + "loss": 0.0001, + "step": 1107 + }, + { + "epoch": 3.29, + "learning_rate": 1.6183162147811448e-06, + "loss": 0.0001, + "step": 1108 + }, + { + "epoch": 3.29, + "learning_rate": 1.6052305632743592e-06, + "loss": 0.0002, + "step": 1109 + }, + { + "epoch": 3.29, + "learning_rate": 1.5921934134868655e-06, + "loss": 0.0001, + "step": 1110 + }, + { + "epoch": 3.3, + "learning_rate": 1.5792048407422633e-06, + "loss": 0.0034, + "step": 1111 + }, + { + "epoch": 3.3, + "learning_rate": 1.5662649200834822e-06, + "loss": 0.0, + "step": 1112 + }, + { + "epoch": 3.3, + "learning_rate": 1.553373726272358e-06, + "loss": 0.0002, + "step": 1113 + }, + { + "epoch": 3.31, + "learning_rate": 1.540531333789207e-06, + "loss": 0.0005, + "step": 1114 + }, + { + "epoch": 3.31, + "learning_rate": 1.5277378168323886e-06, + "loss": 0.0, + "step": 1115 + }, + { + "epoch": 3.31, + "learning_rate": 1.5149932493178754e-06, + "loss": 0.0001, + "step": 1116 + }, + { + "epoch": 3.31, + "learning_rate": 1.5022977048788335e-06, + "loss": 0.0002, + "step": 1117 + }, + { + "epoch": 3.32, + "learning_rate": 1.4896512568651888e-06, + "loss": 0.0001, + "step": 1118 + }, + { + "epoch": 3.32, + "learning_rate": 1.4770539783432113e-06, + "loss": 0.0004, + "step": 1119 + }, + { + "epoch": 3.32, + "learning_rate": 1.464505942095087e-06, + "loss": 0.001, + "step": 1120 + }, + { + "epoch": 3.33, + "learning_rate": 1.4520072206184954e-06, + "loss": 0.0002, + "step": 1121 + }, + { + "epoch": 3.33, + "learning_rate": 1.4395578861262037e-06, + "loss": 0.0002, + "step": 1122 + }, + { + "epoch": 3.33, + "learning_rate": 1.4271580105456273e-06, + "loss": 0.0002, + "step": 1123 + }, + { + "epoch": 3.34, + "learning_rate": 1.4148076655184373e-06, + "loss": 0.0001, + "step": 1124 + }, + { + "epoch": 3.34, + "learning_rate": 1.4025069224001342e-06, + "loss": 0.0003, + "step": 1125 + }, + { + "epoch": 3.34, + "learning_rate": 1.3902558522596355e-06, + "loss": 0.0002, + "step": 1126 + }, + { + "epoch": 3.34, + "learning_rate": 1.3780545258788703e-06, + "loss": 0.0003, + "step": 1127 + }, + { + "epoch": 3.35, + "learning_rate": 1.3659030137523655e-06, + "loss": 0.0004, + "step": 1128 + }, + { + "epoch": 3.35, + "learning_rate": 1.3538013860868438e-06, + "loss": 0.0002, + "step": 1129 + }, + { + "epoch": 3.35, + "learning_rate": 1.3417497128008084e-06, + "loss": 0.0001, + "step": 1130 + }, + { + "epoch": 3.36, + "learning_rate": 1.3297480635241522e-06, + "loss": 0.0001, + "step": 1131 + }, + { + "epoch": 3.36, + "learning_rate": 1.317796507597745e-06, + "loss": 0.0001, + "step": 1132 + }, + { + "epoch": 3.36, + "learning_rate": 1.3058951140730415e-06, + "loss": 0.0006, + "step": 1133 + }, + { + "epoch": 3.36, + "learning_rate": 1.2940439517116677e-06, + "loss": 0.0002, + "step": 1134 + }, + { + "epoch": 3.37, + "learning_rate": 1.2822430889850434e-06, + "loss": 0.0002, + "step": 1135 + }, + { + "epoch": 3.37, + "learning_rate": 1.270492594073972e-06, + "loss": 0.0003, + "step": 1136 + }, + { + "epoch": 3.37, + "learning_rate": 1.2587925348682517e-06, + "loss": 0.0001, + "step": 1137 + }, + { + "epoch": 3.38, + "learning_rate": 1.2471429789662847e-06, + "loss": 0.0001, + "step": 1138 + }, + { + "epoch": 3.38, + "learning_rate": 1.2355439936746826e-06, + "loss": 0.0, + "step": 1139 + }, + { + "epoch": 3.38, + "learning_rate": 1.2239956460078795e-06, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 3.39, + "learning_rate": 1.2124980026877442e-06, + "loss": 0.0001, + "step": 1141 + }, + { + "epoch": 3.39, + "learning_rate": 1.2010511301431981e-06, + "loss": 0.0004, + "step": 1142 + }, + { + "epoch": 3.39, + "learning_rate": 1.1896550945098295e-06, + "loss": 0.0014, + "step": 1143 + }, + { + "epoch": 3.39, + "learning_rate": 1.1783099616295058e-06, + "loss": 0.0002, + "step": 1144 + }, + { + "epoch": 3.4, + "learning_rate": 1.167015797050003e-06, + "loss": 0.0001, + "step": 1145 + }, + { + "epoch": 3.4, + "learning_rate": 1.1557726660246205e-06, + "loss": 0.0013, + "step": 1146 + }, + { + "epoch": 3.4, + "learning_rate": 1.1445806335118037e-06, + "loss": 0.0001, + "step": 1147 + }, + { + "epoch": 3.41, + "learning_rate": 1.1334397641747718e-06, + "loss": 0.0001, + "step": 1148 + }, + { + "epoch": 3.41, + "learning_rate": 1.122350122381145e-06, + "loss": 0.0002, + "step": 1149 + }, + { + "epoch": 3.41, + "learning_rate": 1.111311772202569e-06, + "loss": 0.0002, + "step": 1150 + }, + { + "epoch": 3.42, + "learning_rate": 1.1003247774143455e-06, + "loss": 0.0013, + "step": 1151 + }, + { + "epoch": 3.42, + "learning_rate": 1.0893892014950637e-06, + "loss": 0.0001, + "step": 1152 + }, + { + "epoch": 3.42, + "learning_rate": 1.078505107626242e-06, + "loss": 0.0001, + "step": 1153 + }, + { + "epoch": 3.42, + "learning_rate": 1.0676725586919456e-06, + "loss": 0.0025, + "step": 1154 + }, + { + "epoch": 3.43, + "learning_rate": 1.0568916172784383e-06, + "loss": 0.0003, + "step": 1155 + }, + { + "epoch": 3.43, + "learning_rate": 1.0461623456738167e-06, + "loss": 0.0003, + "step": 1156 + }, + { + "epoch": 3.43, + "learning_rate": 1.0354848058676448e-06, + "loss": 0.0001, + "step": 1157 + }, + { + "epoch": 3.44, + "learning_rate": 1.0248590595506046e-06, + "loss": 0.0018, + "step": 1158 + }, + { + "epoch": 3.44, + "learning_rate": 1.014285168114133e-06, + "loss": 0.0002, + "step": 1159 + }, + { + "epoch": 3.44, + "learning_rate": 1.0037631926500712e-06, + "loss": 0.0001, + "step": 1160 + }, + { + "epoch": 3.45, + "learning_rate": 9.932931939503088e-07, + "loss": 0.0003, + "step": 1161 + }, + { + "epoch": 3.45, + "learning_rate": 9.828752325064339e-07, + "loss": 0.0, + "step": 1162 + }, + { + "epoch": 3.45, + "learning_rate": 9.725093685093867e-07, + "loss": 0.0001, + "step": 1163 + }, + { + "epoch": 3.45, + "learning_rate": 9.621956618491024e-07, + "loss": 0.0001, + "step": 1164 + }, + { + "epoch": 3.46, + "learning_rate": 9.519341721141751e-07, + "loss": 0.0004, + "step": 1165 + }, + { + "epoch": 3.46, + "learning_rate": 9.41724958591509e-07, + "loss": 0.0001, + "step": 1166 + }, + { + "epoch": 3.46, + "learning_rate": 9.31568080265981e-07, + "loss": 0.0003, + "step": 1167 + }, + { + "epoch": 3.47, + "learning_rate": 9.214635958200846e-07, + "loss": 0.0013, + "step": 1168 + }, + { + "epoch": 3.47, + "learning_rate": 9.114115636336151e-07, + "loss": 0.0001, + "step": 1169 + }, + { + "epoch": 3.47, + "learning_rate": 9.014120417833116e-07, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 3.47, + "learning_rate": 8.91465088042528e-07, + "loss": 0.0002, + "step": 1171 + }, + { + "epoch": 3.48, + "learning_rate": 8.815707598809021e-07, + "loss": 0.0002, + "step": 1172 + }, + { + "epoch": 3.48, + "learning_rate": 8.717291144640238e-07, + "loss": 0.0001, + "step": 1173 + }, + { + "epoch": 3.48, + "learning_rate": 8.61940208653097e-07, + "loss": 0.0001, + "step": 1174 + }, + { + "epoch": 3.49, + "learning_rate": 8.52204099004621e-07, + "loss": 0.0001, + "step": 1175 + }, + { + "epoch": 3.49, + "learning_rate": 8.425208417700581e-07, + "loss": 0.0002, + "step": 1176 + }, + { + "epoch": 3.49, + "learning_rate": 8.328904928955117e-07, + "loss": 0.0001, + "step": 1177 + }, + { + "epoch": 3.5, + "learning_rate": 8.233131080213975e-07, + "loss": 0.0005, + "step": 1178 + }, + { + "epoch": 3.5, + "learning_rate": 8.137887424821278e-07, + "loss": 0.0003, + "step": 1179 + }, + { + "epoch": 3.5, + "learning_rate": 8.043174513057927e-07, + "loss": 0.0003, + "step": 1180 + }, + { + "epoch": 3.5, + "learning_rate": 7.948992892138318e-07, + "loss": 0.0004, + "step": 1181 + }, + { + "epoch": 3.51, + "learning_rate": 7.855343106207325e-07, + "loss": 0.001, + "step": 1182 + }, + { + "epoch": 3.51, + "learning_rate": 7.762225696337056e-07, + "loss": 0.0013, + "step": 1183 + }, + { + "epoch": 3.51, + "learning_rate": 7.66964120052377e-07, + "loss": 0.0055, + "step": 1184 + }, + { + "epoch": 3.52, + "learning_rate": 7.57759015368471e-07, + "loss": 0.0001, + "step": 1185 + }, + { + "epoch": 3.52, + "learning_rate": 7.486073087655133e-07, + "loss": 0.0001, + "step": 1186 + }, + { + "epoch": 3.52, + "learning_rate": 7.395090531185123e-07, + "loss": 0.0002, + "step": 1187 + }, + { + "epoch": 3.53, + "learning_rate": 7.304643009936552e-07, + "loss": 0.0001, + "step": 1188 + }, + { + "epoch": 3.53, + "learning_rate": 7.214731046480095e-07, + "loss": 0.0004, + "step": 1189 + }, + { + "epoch": 3.53, + "learning_rate": 7.125355160292202e-07, + "loss": 0.0001, + "step": 1190 + }, + { + "epoch": 3.53, + "learning_rate": 7.036515867752014e-07, + "loss": 0.0001, + "step": 1191 + }, + { + "epoch": 3.54, + "learning_rate": 6.948213682138505e-07, + "loss": 0.0008, + "step": 1192 + }, + { + "epoch": 3.54, + "learning_rate": 6.86044911362741e-07, + "loss": 0.0, + "step": 1193 + }, + { + "epoch": 3.54, + "learning_rate": 6.773222669288359e-07, + "loss": 0.0, + "step": 1194 + }, + { + "epoch": 3.55, + "learning_rate": 6.686534853081872e-07, + "loss": 0.0001, + "step": 1195 + }, + { + "epoch": 3.55, + "learning_rate": 6.600386165856521e-07, + "loss": 0.0001, + "step": 1196 + }, + { + "epoch": 3.55, + "learning_rate": 6.514777105345981e-07, + "loss": 0.0006, + "step": 1197 + }, + { + "epoch": 3.55, + "learning_rate": 6.429708166166148e-07, + "loss": 0.0005, + "step": 1198 + }, + { + "epoch": 3.56, + "learning_rate": 6.345179839812343e-07, + "loss": 0.0, + "step": 1199 + }, + { + "epoch": 3.56, + "learning_rate": 6.261192614656442e-07, + "loss": 0.0002, + "step": 1200 + }, + { + "epoch": 3.56, + "learning_rate": 6.177746975944021e-07, + "loss": 0.0013, + "step": 1201 + }, + { + "epoch": 3.57, + "learning_rate": 6.094843405791573e-07, + "loss": 0.0002, + "step": 1202 + }, + { + "epoch": 3.57, + "learning_rate": 6.012482383183771e-07, + "loss": 0.0, + "step": 1203 + }, + { + "epoch": 3.57, + "learning_rate": 5.930664383970642e-07, + "loss": 0.0002, + "step": 1204 + }, + { + "epoch": 3.58, + "learning_rate": 5.849389880864787e-07, + "loss": 0.0001, + "step": 1205 + }, + { + "epoch": 3.58, + "learning_rate": 5.768659343438764e-07, + "loss": 0.0, + "step": 1206 + }, + { + "epoch": 3.58, + "learning_rate": 5.688473238122305e-07, + "loss": 0.0001, + "step": 1207 + }, + { + "epoch": 3.58, + "learning_rate": 5.608832028199552e-07, + "loss": 0.0, + "step": 1208 + }, + { + "epoch": 3.59, + "learning_rate": 5.529736173806533e-07, + "loss": 0.0008, + "step": 1209 + }, + { + "epoch": 3.59, + "learning_rate": 5.451186131928387e-07, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 3.59, + "learning_rate": 5.373182356396756e-07, + "loss": 0.0, + "step": 1211 + }, + { + "epoch": 3.6, + "learning_rate": 5.2957252978872e-07, + "loss": 0.0001, + "step": 1212 + }, + { + "epoch": 3.6, + "learning_rate": 5.218815403916511e-07, + "loss": 0.0021, + "step": 1213 + }, + { + "epoch": 3.6, + "learning_rate": 5.142453118840241e-07, + "loss": 0.0002, + "step": 1214 + }, + { + "epoch": 3.61, + "learning_rate": 5.066638883849983e-07, + "loss": 0.0007, + "step": 1215 + }, + { + "epoch": 3.61, + "learning_rate": 4.99137313697099e-07, + "loss": 0.0014, + "step": 1216 + }, + { + "epoch": 3.61, + "learning_rate": 4.91665631305952e-07, + "loss": 0.0, + "step": 1217 + }, + { + "epoch": 3.61, + "learning_rate": 4.842488843800409e-07, + "loss": 0.0001, + "step": 1218 + }, + { + "epoch": 3.62, + "learning_rate": 4.768871157704436e-07, + "loss": 0.0001, + "step": 1219 + }, + { + "epoch": 3.62, + "learning_rate": 4.6958036801060926e-07, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 3.62, + "learning_rate": 4.6232868331608784e-07, + "loss": 0.0, + "step": 1221 + }, + { + "epoch": 3.63, + "learning_rate": 4.5513210358429707e-07, + "loss": 0.0, + "step": 1222 + }, + { + "epoch": 3.63, + "learning_rate": 4.479906703942816e-07, + "loss": 0.0008, + "step": 1223 + }, + { + "epoch": 3.63, + "learning_rate": 4.40904425006472e-07, + "loss": 0.0, + "step": 1224 + }, + { + "epoch": 3.64, + "learning_rate": 4.3387340836244183e-07, + "loss": 0.0001, + "step": 1225 + }, + { + "epoch": 3.64, + "learning_rate": 4.2689766108467643e-07, + "loss": 0.0005, + "step": 1226 + }, + { + "epoch": 3.64, + "learning_rate": 4.199772234763366e-07, + "loss": 0.0003, + "step": 1227 + }, + { + "epoch": 3.64, + "learning_rate": 4.1311213552102324e-07, + "loss": 0.0001, + "step": 1228 + }, + { + "epoch": 3.65, + "learning_rate": 4.0630243688255187e-07, + "loss": 0.0, + "step": 1229 + }, + { + "epoch": 3.65, + "learning_rate": 3.9954816690471633e-07, + "loss": 0.0007, + "step": 1230 + }, + { + "epoch": 3.65, + "learning_rate": 3.9284936461106873e-07, + "loss": 0.0004, + "step": 1231 + }, + { + "epoch": 3.66, + "learning_rate": 3.8620606870468535e-07, + "loss": 0.0005, + "step": 1232 + }, + { + "epoch": 3.66, + "learning_rate": 3.7961831756795353e-07, + "loss": 0.0002, + "step": 1233 + }, + { + "epoch": 3.66, + "learning_rate": 3.7308614926234164e-07, + "loss": 0.0001, + "step": 1234 + }, + { + "epoch": 3.66, + "learning_rate": 3.666096015281828e-07, + "loss": 0.0001, + "step": 1235 + }, + { + "epoch": 3.67, + "learning_rate": 3.6018871178445604e-07, + "loss": 0.0001, + "step": 1236 + }, + { + "epoch": 3.67, + "learning_rate": 3.538235171285698e-07, + "loss": 0.0066, + "step": 1237 + }, + { + "epoch": 3.67, + "learning_rate": 3.4751405433614884e-07, + "loss": 0.0, + "step": 1238 + }, + { + "epoch": 3.68, + "learning_rate": 3.4126035986081884e-07, + "loss": 0.0001, + "step": 1239 + }, + { + "epoch": 3.68, + "learning_rate": 3.350624698339988e-07, + "loss": 0.0006, + "step": 1240 + }, + { + "epoch": 3.68, + "learning_rate": 3.2892042006469447e-07, + "loss": 0.0001, + "step": 1241 + }, + { + "epoch": 3.69, + "learning_rate": 3.228342460392808e-07, + "loss": 0.0004, + "step": 1242 + }, + { + "epoch": 3.69, + "learning_rate": 3.168039829213121e-07, + "loss": 0.0001, + "step": 1243 + }, + { + "epoch": 3.69, + "learning_rate": 3.108296655513066e-07, + "loss": 0.0, + "step": 1244 + }, + { + "epoch": 3.69, + "learning_rate": 3.049113284465521e-07, + "loss": 0.0001, + "step": 1245 + }, + { + "epoch": 3.7, + "learning_rate": 2.990490058009021e-07, + "loss": 0.0, + "step": 1246 + }, + { + "epoch": 3.7, + "learning_rate": 2.9324273148458205e-07, + "loss": 0.0003, + "step": 1247 + }, + { + "epoch": 3.7, + "learning_rate": 2.8749253904399197e-07, + "loss": 0.0, + "step": 1248 + }, + { + "epoch": 3.71, + "learning_rate": 2.8179846170150906e-07, + "loss": 0.0, + "step": 1249 + }, + { + "epoch": 3.71, + "learning_rate": 2.7616053235530515e-07, + "loss": 0.0001, + "step": 1250 + }, + { + "epoch": 3.71, + "learning_rate": 2.7057878357914513e-07, + "loss": 0.0017, + "step": 1251 + }, + { + "epoch": 3.72, + "learning_rate": 2.6505324762220783e-07, + "loss": 0.001, + "step": 1252 + }, + { + "epoch": 3.72, + "learning_rate": 2.595839564088953e-07, + "loss": 0.0001, + "step": 1253 + }, + { + "epoch": 3.72, + "learning_rate": 2.541709415386495e-07, + "loss": 0.0004, + "step": 1254 + }, + { + "epoch": 3.72, + "learning_rate": 2.4881423428577025e-07, + "loss": 0.0, + "step": 1255 + }, + { + "epoch": 3.73, + "learning_rate": 2.4351386559922973e-07, + "loss": 0.0015, + "step": 1256 + }, + { + "epoch": 3.73, + "learning_rate": 2.3826986610250292e-07, + "loss": 0.0001, + "step": 1257 + }, + { + "epoch": 3.73, + "learning_rate": 2.3308226609338401e-07, + "loss": 0.0009, + "step": 1258 + }, + { + "epoch": 3.74, + "learning_rate": 2.2795109554381024e-07, + "loss": 0.0002, + "step": 1259 + }, + { + "epoch": 3.74, + "learning_rate": 2.2287638409969393e-07, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 3.74, + "learning_rate": 2.178581610807484e-07, + "loss": 0.0, + "step": 1261 + }, + { + "epoch": 3.74, + "learning_rate": 2.1289645548031923e-07, + "loss": 0.0002, + "step": 1262 + }, + { + "epoch": 3.75, + "learning_rate": 2.0799129596521417e-07, + "loss": 0.0002, + "step": 1263 + }, + { + "epoch": 3.75, + "learning_rate": 2.0314271087554126e-07, + "loss": 0.0, + "step": 1264 + }, + { + "epoch": 3.75, + "learning_rate": 1.9835072822454448e-07, + "loss": 0.0001, + "step": 1265 + }, + { + "epoch": 3.76, + "learning_rate": 1.9361537569843712e-07, + "loss": 0.0001, + "step": 1266 + }, + { + "epoch": 3.76, + "learning_rate": 1.889366806562487e-07, + "loss": 0.0002, + "step": 1267 + }, + { + "epoch": 3.76, + "learning_rate": 1.8431467012966387e-07, + "loss": 0.0001, + "step": 1268 + }, + { + "epoch": 3.77, + "learning_rate": 1.7974937082286593e-07, + "loss": 0.0002, + "step": 1269 + }, + { + "epoch": 3.77, + "learning_rate": 1.7524080911238028e-07, + "loss": 0.0001, + "step": 1270 + }, + { + "epoch": 3.77, + "learning_rate": 1.7078901104692792e-07, + "loss": 0.0, + "step": 1271 + }, + { + "epoch": 3.77, + "learning_rate": 1.66394002347271e-07, + "loss": 0.0003, + "step": 1272 + }, + { + "epoch": 3.78, + "learning_rate": 1.6205580840606084e-07, + "loss": 0.0001, + "step": 1273 + }, + { + "epoch": 3.78, + "learning_rate": 1.5777445428770022e-07, + "loss": 0.0008, + "step": 1274 + }, + { + "epoch": 3.78, + "learning_rate": 1.5354996472819128e-07, + "loss": 0.0001, + "step": 1275 + }, + { + "epoch": 3.79, + "learning_rate": 1.4938236413499453e-07, + "loss": 0.0003, + "step": 1276 + }, + { + "epoch": 3.79, + "learning_rate": 1.452716765868878e-07, + "loss": 0.0001, + "step": 1277 + }, + { + "epoch": 3.79, + "learning_rate": 1.4121792583382975e-07, + "loss": 0.0008, + "step": 1278 + }, + { + "epoch": 3.8, + "learning_rate": 1.372211352968167e-07, + "loss": 0.0, + "step": 1279 + }, + { + "epoch": 3.8, + "learning_rate": 1.3328132806775473e-07, + "loss": 0.0003, + "step": 1280 + }, + { + "epoch": 3.8, + "learning_rate": 1.293985269093212e-07, + "loss": 0.0, + "step": 1281 + }, + { + "epoch": 3.8, + "learning_rate": 1.255727542548324e-07, + "loss": 0.0, + "step": 1282 + }, + { + "epoch": 3.81, + "learning_rate": 1.218040322081182e-07, + "loss": 0.0002, + "step": 1283 + }, + { + "epoch": 3.81, + "learning_rate": 1.1809238254339106e-07, + "loss": 0.0, + "step": 1284 + }, + { + "epoch": 3.81, + "learning_rate": 1.1443782670512383e-07, + "loss": 0.0001, + "step": 1285 + }, + { + "epoch": 3.82, + "learning_rate": 1.1084038580791989e-07, + "loss": 0.0002, + "step": 1286 + }, + { + "epoch": 3.82, + "learning_rate": 1.0730008063639774e-07, + "loss": 0.0001, + "step": 1287 + }, + { + "epoch": 3.82, + "learning_rate": 1.0381693164506546e-07, + "loss": 0.0, + "step": 1288 + }, + { + "epoch": 3.82, + "learning_rate": 1.0039095895820639e-07, + "loss": 0.0006, + "step": 1289 + }, + { + "epoch": 3.83, + "learning_rate": 9.702218236976147e-08, + "loss": 0.0005, + "step": 1290 + }, + { + "epoch": 3.83, + "learning_rate": 9.371062134321263e-08, + "loss": 0.0, + "step": 1291 + }, + { + "epoch": 3.83, + "learning_rate": 9.045629501147401e-08, + "loss": 0.0, + "step": 1292 + }, + { + "epoch": 3.84, + "learning_rate": 8.725922217678206e-08, + "loss": 0.0001, + "step": 1293 + }, + { + "epoch": 3.84, + "learning_rate": 8.411942131058115e-08, + "loss": 0.0005, + "step": 1294 + }, + { + "epoch": 3.84, + "learning_rate": 8.103691055342145e-08, + "loss": 0.0, + "step": 1295 + }, + { + "epoch": 3.85, + "learning_rate": 7.801170771485567e-08, + "loss": 0.0004, + "step": 1296 + }, + { + "epoch": 3.85, + "learning_rate": 7.504383027333029e-08, + "loss": 0.0003, + "step": 1297 + }, + { + "epoch": 3.85, + "learning_rate": 7.213329537608893e-08, + "loss": 0.0002, + "step": 1298 + }, + { + "epoch": 3.85, + "learning_rate": 6.928011983907246e-08, + "loss": 0.0001, + "step": 1299 + }, + { + "epoch": 3.86, + "learning_rate": 6.648432014682127e-08, + "loss": 0.0002, + "step": 1300 + }, + { + "epoch": 3.86, + "learning_rate": 6.374591245238204e-08, + "loss": 0.0, + "step": 1301 + }, + { + "epoch": 3.86, + "learning_rate": 6.106491257721114e-08, + "loss": 0.0001, + "step": 1302 + }, + { + "epoch": 3.87, + "learning_rate": 5.844133601108359e-08, + "loss": 0.0003, + "step": 1303 + }, + { + "epoch": 3.87, + "learning_rate": 5.587519791200868e-08, + "loss": 0.0001, + "step": 1304 + }, + { + "epoch": 3.87, + "learning_rate": 5.3366513106134496e-08, + "loss": 0.0002, + "step": 1305 + }, + { + "epoch": 3.88, + "learning_rate": 5.0915296087670204e-08, + "loss": 0.0, + "step": 1306 + }, + { + "epoch": 3.88, + "learning_rate": 4.8521561018793906e-08, + "loss": 0.0001, + "step": 1307 + }, + { + "epoch": 3.88, + "learning_rate": 4.618532172957935e-08, + "loss": 0.0001, + "step": 1308 + }, + { + "epoch": 3.88, + "learning_rate": 4.390659171790934e-08, + "loss": 0.0001, + "step": 1309 + }, + { + "epoch": 3.89, + "learning_rate": 4.1685384149402486e-08, + "loss": 0.0003, + "step": 1310 + }, + { + "epoch": 3.89, + "learning_rate": 3.952171185733211e-08, + "loss": 0.0, + "step": 1311 + }, + { + "epoch": 3.89, + "learning_rate": 3.7415587342557455e-08, + "loss": 0.0001, + "step": 1312 + }, + { + "epoch": 3.9, + "learning_rate": 3.536702277345039e-08, + "loss": 0.0005, + "step": 1313 + }, + { + "epoch": 3.9, + "learning_rate": 3.3376029985819904e-08, + "loss": 0.0, + "step": 1314 + }, + { + "epoch": 3.9, + "learning_rate": 3.144262048284885e-08, + "loss": 0.0002, + "step": 1315 + }, + { + "epoch": 3.91, + "learning_rate": 2.9566805435029544e-08, + "loss": 0.0001, + "step": 1316 + }, + { + "epoch": 3.91, + "learning_rate": 2.7748595680091583e-08, + "loss": 0.0001, + "step": 1317 + }, + { + "epoch": 3.91, + "learning_rate": 2.5988001722944134e-08, + "loss": 0.0005, + "step": 1318 + }, + { + "epoch": 3.91, + "learning_rate": 2.428503373561708e-08, + "loss": 0.0001, + "step": 1319 + }, + { + "epoch": 3.92, + "learning_rate": 2.2639701557198856e-08, + "loss": 0.0, + "step": 1320 + }, + { + "epoch": 3.92, + "learning_rate": 2.105201469377871e-08, + "loss": 0.0001, + "step": 1321 + }, + { + "epoch": 3.92, + "learning_rate": 1.9521982318395637e-08, + "loss": 0.0001, + "step": 1322 + }, + { + "epoch": 3.93, + "learning_rate": 1.804961327098398e-08, + "loss": 0.0, + "step": 1323 + }, + { + "epoch": 3.93, + "learning_rate": 1.663491605831902e-08, + "loss": 0.0, + "step": 1324 + }, + { + "epoch": 3.93, + "learning_rate": 1.5277898853972573e-08, + "loss": 0.0, + "step": 1325 + }, + { + "epoch": 3.93, + "learning_rate": 1.3978569498263039e-08, + "loss": 0.0001, + "step": 1326 + }, + { + "epoch": 3.94, + "learning_rate": 1.2736935498212088e-08, + "loss": 0.0, + "step": 1327 + }, + { + "epoch": 3.94, + "learning_rate": 1.1553004027498038e-08, + "loss": 0.0003, + "step": 1328 + }, + { + "epoch": 3.94, + "learning_rate": 1.0426781926416995e-08, + "loss": 0.0001, + "step": 1329 + }, + { + "epoch": 3.95, + "learning_rate": 9.35827570184511e-09, + "loss": 0.0001, + "step": 1330 + }, + { + "epoch": 3.95, + "learning_rate": 8.347491527195273e-09, + "loss": 0.0, + "step": 1331 + }, + { + "epoch": 3.95, + "learning_rate": 7.394435242384923e-09, + "loss": 0.0001, + "step": 1332 + }, + { + "epoch": 3.96, + "learning_rate": 6.499112353804959e-09, + "loss": 0.0009, + "step": 1333 + }, + { + "epoch": 3.96, + "learning_rate": 5.661528034284214e-09, + "loss": 0.0001, + "step": 1334 + }, + { + "epoch": 3.96, + "learning_rate": 4.881687123057255e-09, + "loss": 0.0009, + "step": 1335 + }, + { + "epoch": 3.96, + "learning_rate": 4.159594125744404e-09, + "loss": 0.0, + "step": 1336 + }, + { + "epoch": 3.97, + "learning_rate": 3.4952532143173178e-09, + "loss": 0.0002, + "step": 1337 + }, + { + "epoch": 3.97, + "learning_rate": 2.888668227081226e-09, + "loss": 0.0007, + "step": 1338 + }, + { + "epoch": 3.97, + "learning_rate": 2.3398426686471744e-09, + "loss": 0.0001, + "step": 1339 + }, + { + "epoch": 3.98, + "learning_rate": 1.848779709917592e-09, + "loss": 0.0001, + "step": 1340 + }, + { + "epoch": 3.98, + "learning_rate": 1.4154821880618674e-09, + "loss": 0.0013, + "step": 1341 + }, + { + "epoch": 3.98, + "learning_rate": 1.0399526065074662e-09, + "loss": 0.0002, + "step": 1342 + }, + { + "epoch": 3.99, + "learning_rate": 7.221931349166156e-10, + "loss": 0.0002, + "step": 1343 + }, + { + "epoch": 3.99, + "learning_rate": 4.622056091807547e-10, + "loss": 0.0, + "step": 1344 + }, + { + "epoch": 3.99, + "learning_rate": 2.599915314061008e-10, + "loss": 0.0, + "step": 1345 + }, + { + "epoch": 3.99, + "learning_rate": 1.155520699092083e-10, + "loss": 0.0, + "step": 1346 + }, + { + "epoch": 4.0, + "learning_rate": 2.8888059203646678e-11, + "loss": 0.0002, + "step": 1347 + }, + { + "epoch": 4.0, + "learning_rate": 0.0, + "loss": 0.007, + "step": 1348 + }, + { + "epoch": 4.0, + "step": 1348, + "total_flos": 1.148849645628162e+18, + "train_loss": 0.034755720196650006, + "train_runtime": 9562.0276, + "train_samples_per_second": 4.511, + "train_steps_per_second": 0.141 + } + ], + "max_steps": 1348, + "num_train_epochs": 4, + "total_flos": 1.148849645628162e+18, + "trial_name": null, + "trial_params": null +}