English
S-Chain / exgra-med-gpt-cot /trainer_state.json
leduckhai's picture
Upload folder using huggingface_hub
3777826 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 1011,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 6.451612903225807e-07,
"loss": 2.4965,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.2903225806451614e-06,
"loss": 2.5525,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.935483870967742e-06,
"loss": 2.1001,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 2.580645161290323e-06,
"loss": 1.7128,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 3.225806451612903e-06,
"loss": 1.3476,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 3.870967741935484e-06,
"loss": 1.1423,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 4.516129032258065e-06,
"loss": 0.8957,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 5.161290322580646e-06,
"loss": 0.7838,
"step": 8
},
{
"epoch": 0.03,
"learning_rate": 5.806451612903226e-06,
"loss": 0.6607,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 6.451612903225806e-06,
"loss": 0.5847,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 7.096774193548388e-06,
"loss": 0.5715,
"step": 11
},
{
"epoch": 0.04,
"learning_rate": 7.741935483870968e-06,
"loss": 0.5061,
"step": 12
},
{
"epoch": 0.04,
"learning_rate": 8.387096774193549e-06,
"loss": 0.4799,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 9.03225806451613e-06,
"loss": 0.4054,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 9.67741935483871e-06,
"loss": 0.3912,
"step": 15
},
{
"epoch": 0.05,
"learning_rate": 1.0322580645161291e-05,
"loss": 0.3842,
"step": 16
},
{
"epoch": 0.05,
"learning_rate": 1.096774193548387e-05,
"loss": 0.3853,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 1.1612903225806453e-05,
"loss": 0.3847,
"step": 18
},
{
"epoch": 0.06,
"learning_rate": 1.2258064516129034e-05,
"loss": 0.3706,
"step": 19
},
{
"epoch": 0.06,
"learning_rate": 1.2903225806451613e-05,
"loss": 0.3492,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 1.3548387096774194e-05,
"loss": 0.333,
"step": 21
},
{
"epoch": 0.07,
"learning_rate": 1.4193548387096776e-05,
"loss": 0.3636,
"step": 22
},
{
"epoch": 0.07,
"learning_rate": 1.4838709677419357e-05,
"loss": 0.3244,
"step": 23
},
{
"epoch": 0.07,
"learning_rate": 1.5483870967741936e-05,
"loss": 0.3663,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 1.6129032258064517e-05,
"loss": 0.3344,
"step": 25
},
{
"epoch": 0.08,
"learning_rate": 1.6774193548387098e-05,
"loss": 0.3155,
"step": 26
},
{
"epoch": 0.08,
"learning_rate": 1.741935483870968e-05,
"loss": 0.2982,
"step": 27
},
{
"epoch": 0.08,
"learning_rate": 1.806451612903226e-05,
"loss": 0.334,
"step": 28
},
{
"epoch": 0.09,
"learning_rate": 1.870967741935484e-05,
"loss": 0.3371,
"step": 29
},
{
"epoch": 0.09,
"learning_rate": 1.935483870967742e-05,
"loss": 0.3354,
"step": 30
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.3074,
"step": 31
},
{
"epoch": 0.09,
"learning_rate": 1.999994861726391e-05,
"loss": 0.2829,
"step": 32
},
{
"epoch": 0.1,
"learning_rate": 1.9999794469583663e-05,
"loss": 0.3008,
"step": 33
},
{
"epoch": 0.1,
"learning_rate": 1.9999537558543373e-05,
"loss": 0.3202,
"step": 34
},
{
"epoch": 0.1,
"learning_rate": 1.9999177886783194e-05,
"loss": 0.2939,
"step": 35
},
{
"epoch": 0.11,
"learning_rate": 1.9998715457999313e-05,
"loss": 0.3127,
"step": 36
},
{
"epoch": 0.11,
"learning_rate": 1.99981502769439e-05,
"loss": 0.3105,
"step": 37
},
{
"epoch": 0.11,
"learning_rate": 1.999748234942507e-05,
"loss": 0.2865,
"step": 38
},
{
"epoch": 0.12,
"learning_rate": 1.99967116823068e-05,
"loss": 0.3032,
"step": 39
},
{
"epoch": 0.12,
"learning_rate": 1.9995838283508897e-05,
"loss": 0.3019,
"step": 40
},
{
"epoch": 0.12,
"learning_rate": 1.999486216200688e-05,
"loss": 0.275,
"step": 41
},
{
"epoch": 0.12,
"learning_rate": 1.999378332783191e-05,
"loss": 0.2923,
"step": 42
},
{
"epoch": 0.13,
"learning_rate": 1.999260179207068e-05,
"loss": 0.3153,
"step": 43
},
{
"epoch": 0.13,
"learning_rate": 1.999131756686529e-05,
"loss": 0.3107,
"step": 44
},
{
"epoch": 0.13,
"learning_rate": 1.9989930665413148e-05,
"loss": 0.2806,
"step": 45
},
{
"epoch": 0.14,
"learning_rate": 1.9988441101966807e-05,
"loss": 0.2811,
"step": 46
},
{
"epoch": 0.14,
"learning_rate": 1.9986848891833846e-05,
"loss": 0.2938,
"step": 47
},
{
"epoch": 0.14,
"learning_rate": 1.9985154051376676e-05,
"loss": 0.2853,
"step": 48
},
{
"epoch": 0.15,
"learning_rate": 1.9983356598012412e-05,
"loss": 0.2876,
"step": 49
},
{
"epoch": 0.15,
"learning_rate": 1.9981456550212665e-05,
"loss": 0.2776,
"step": 50
},
{
"epoch": 0.15,
"learning_rate": 1.9979453927503366e-05,
"loss": 0.319,
"step": 51
},
{
"epoch": 0.15,
"learning_rate": 1.997734875046456e-05,
"loss": 0.2709,
"step": 52
},
{
"epoch": 0.16,
"learning_rate": 1.997514104073021e-05,
"loss": 0.2841,
"step": 53
},
{
"epoch": 0.16,
"learning_rate": 1.9972830820987933e-05,
"loss": 0.2706,
"step": 54
},
{
"epoch": 0.16,
"learning_rate": 1.997041811497882e-05,
"loss": 0.2659,
"step": 55
},
{
"epoch": 0.17,
"learning_rate": 1.9967902947497158e-05,
"loss": 0.3049,
"step": 56
},
{
"epoch": 0.17,
"learning_rate": 1.9965285344390185e-05,
"loss": 0.2515,
"step": 57
},
{
"epoch": 0.17,
"learning_rate": 1.996256533255782e-05,
"loss": 0.2723,
"step": 58
},
{
"epoch": 0.18,
"learning_rate": 1.9959742939952393e-05,
"loss": 0.2731,
"step": 59
},
{
"epoch": 0.18,
"learning_rate": 1.9956818195578357e-05,
"loss": 0.3109,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 1.9953791129491985e-05,
"loss": 0.2765,
"step": 61
},
{
"epoch": 0.18,
"learning_rate": 1.9950661772801062e-05,
"loss": 0.268,
"step": 62
},
{
"epoch": 0.19,
"learning_rate": 1.9947430157664575e-05,
"loss": 0.2551,
"step": 63
},
{
"epoch": 0.19,
"learning_rate": 1.994409631729237e-05,
"loss": 0.2912,
"step": 64
},
{
"epoch": 0.19,
"learning_rate": 1.9940660285944805e-05,
"loss": 0.2742,
"step": 65
},
{
"epoch": 0.2,
"learning_rate": 1.9937122098932428e-05,
"loss": 0.2524,
"step": 66
},
{
"epoch": 0.2,
"learning_rate": 1.9933481792615583e-05,
"loss": 0.2445,
"step": 67
},
{
"epoch": 0.2,
"learning_rate": 1.9929739404404046e-05,
"loss": 0.2322,
"step": 68
},
{
"epoch": 0.2,
"learning_rate": 1.992589497275665e-05,
"loss": 0.2457,
"step": 69
},
{
"epoch": 0.21,
"learning_rate": 1.992194853718088e-05,
"loss": 0.2726,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 1.991790013823246e-05,
"loss": 0.2617,
"step": 71
},
{
"epoch": 0.21,
"learning_rate": 1.9913749817514963e-05,
"loss": 0.2821,
"step": 72
},
{
"epoch": 0.22,
"learning_rate": 1.990949761767935e-05,
"loss": 0.2492,
"step": 73
},
{
"epoch": 0.22,
"learning_rate": 1.9905143582423552e-05,
"loss": 0.2284,
"step": 74
},
{
"epoch": 0.22,
"learning_rate": 1.9900687756492022e-05,
"loss": 0.2455,
"step": 75
},
{
"epoch": 0.23,
"learning_rate": 1.9896130185675263e-05,
"loss": 0.2388,
"step": 76
},
{
"epoch": 0.23,
"learning_rate": 1.9891470916809362e-05,
"loss": 0.2557,
"step": 77
},
{
"epoch": 0.23,
"learning_rate": 1.9886709997775527e-05,
"loss": 0.2606,
"step": 78
},
{
"epoch": 0.23,
"learning_rate": 1.988184747749956e-05,
"loss": 0.2682,
"step": 79
},
{
"epoch": 0.24,
"learning_rate": 1.9876883405951378e-05,
"loss": 0.2425,
"step": 80
},
{
"epoch": 0.24,
"learning_rate": 1.9871817834144506e-05,
"loss": 0.2667,
"step": 81
},
{
"epoch": 0.24,
"learning_rate": 1.986665081413552e-05,
"loss": 0.2914,
"step": 82
},
{
"epoch": 0.25,
"learning_rate": 1.986138239902355e-05,
"loss": 0.2532,
"step": 83
},
{
"epoch": 0.25,
"learning_rate": 1.9856012642949717e-05,
"loss": 0.2426,
"step": 84
},
{
"epoch": 0.25,
"learning_rate": 1.985054160109657e-05,
"loss": 0.2597,
"step": 85
},
{
"epoch": 0.26,
"learning_rate": 1.9844969329687526e-05,
"loss": 0.2548,
"step": 86
},
{
"epoch": 0.26,
"learning_rate": 1.98392958859863e-05,
"loss": 0.2874,
"step": 87
},
{
"epoch": 0.26,
"learning_rate": 1.9833521328296296e-05,
"loss": 0.2295,
"step": 88
},
{
"epoch": 0.26,
"learning_rate": 1.982764571596004e-05,
"loss": 0.2552,
"step": 89
},
{
"epoch": 0.27,
"learning_rate": 1.9821669109358532e-05,
"loss": 0.2749,
"step": 90
},
{
"epoch": 0.27,
"learning_rate": 1.9815591569910654e-05,
"loss": 0.2478,
"step": 91
},
{
"epoch": 0.27,
"learning_rate": 1.980941316007253e-05,
"loss": 0.3027,
"step": 92
},
{
"epoch": 0.28,
"learning_rate": 1.9803133943336874e-05,
"loss": 0.2478,
"step": 93
},
{
"epoch": 0.28,
"learning_rate": 1.9796753984232357e-05,
"loss": 0.2743,
"step": 94
},
{
"epoch": 0.28,
"learning_rate": 1.979027334832293e-05,
"loss": 0.2443,
"step": 95
},
{
"epoch": 0.28,
"learning_rate": 1.9783692102207156e-05,
"loss": 0.2374,
"step": 96
},
{
"epoch": 0.29,
"learning_rate": 1.9777010313517517e-05,
"loss": 0.2383,
"step": 97
},
{
"epoch": 0.29,
"learning_rate": 1.977022805091973e-05,
"loss": 0.2399,
"step": 98
},
{
"epoch": 0.29,
"learning_rate": 1.9763345384112044e-05,
"loss": 0.2852,
"step": 99
},
{
"epoch": 0.3,
"learning_rate": 1.9756362383824502e-05,
"loss": 0.2309,
"step": 100
},
{
"epoch": 0.3,
"learning_rate": 1.9749279121818235e-05,
"loss": 0.2399,
"step": 101
},
{
"epoch": 0.3,
"learning_rate": 1.974209567088473e-05,
"loss": 0.2482,
"step": 102
},
{
"epoch": 0.31,
"learning_rate": 1.973481210484505e-05,
"loss": 0.2402,
"step": 103
},
{
"epoch": 0.31,
"learning_rate": 1.9727428498549105e-05,
"loss": 0.2514,
"step": 104
},
{
"epoch": 0.31,
"learning_rate": 1.971994492787488e-05,
"loss": 0.2348,
"step": 105
},
{
"epoch": 0.31,
"learning_rate": 1.971236146972764e-05,
"loss": 0.2646,
"step": 106
},
{
"epoch": 0.32,
"learning_rate": 1.9704678202039148e-05,
"loss": 0.2356,
"step": 107
},
{
"epoch": 0.32,
"learning_rate": 1.969689520376687e-05,
"loss": 0.2451,
"step": 108
},
{
"epoch": 0.32,
"learning_rate": 1.9689012554893154e-05,
"loss": 0.2584,
"step": 109
},
{
"epoch": 0.33,
"learning_rate": 1.9681030336424416e-05,
"loss": 0.2525,
"step": 110
},
{
"epoch": 0.33,
"learning_rate": 1.9672948630390296e-05,
"loss": 0.259,
"step": 111
},
{
"epoch": 0.33,
"learning_rate": 1.966476751984283e-05,
"loss": 0.2623,
"step": 112
},
{
"epoch": 0.34,
"learning_rate": 1.965648708885559e-05,
"loss": 0.242,
"step": 113
},
{
"epoch": 0.34,
"learning_rate": 1.964810742252281e-05,
"loss": 0.2632,
"step": 114
},
{
"epoch": 0.34,
"learning_rate": 1.9639628606958535e-05,
"loss": 0.2652,
"step": 115
},
{
"epoch": 0.34,
"learning_rate": 1.9631050729295705e-05,
"loss": 0.2305,
"step": 116
},
{
"epoch": 0.35,
"learning_rate": 1.962237387768529e-05,
"loss": 0.2429,
"step": 117
},
{
"epoch": 0.35,
"learning_rate": 1.961359814129536e-05,
"loss": 0.2898,
"step": 118
},
{
"epoch": 0.35,
"learning_rate": 1.9604723610310195e-05,
"loss": 0.2613,
"step": 119
},
{
"epoch": 0.36,
"learning_rate": 1.9595750375929322e-05,
"loss": 0.2293,
"step": 120
},
{
"epoch": 0.36,
"learning_rate": 1.9586678530366607e-05,
"loss": 0.2288,
"step": 121
},
{
"epoch": 0.36,
"learning_rate": 1.9577508166849308e-05,
"loss": 0.2414,
"step": 122
},
{
"epoch": 0.36,
"learning_rate": 1.956823937961709e-05,
"loss": 0.2296,
"step": 123
},
{
"epoch": 0.37,
"learning_rate": 1.9558872263921087e-05,
"loss": 0.2369,
"step": 124
},
{
"epoch": 0.37,
"learning_rate": 1.9549406916022906e-05,
"loss": 0.2395,
"step": 125
},
{
"epoch": 0.37,
"learning_rate": 1.953984343319364e-05,
"loss": 0.2355,
"step": 126
},
{
"epoch": 0.38,
"learning_rate": 1.9530181913712875e-05,
"loss": 0.232,
"step": 127
},
{
"epoch": 0.38,
"learning_rate": 1.9520422456867668e-05,
"loss": 0.2138,
"step": 128
},
{
"epoch": 0.38,
"learning_rate": 1.9510565162951538e-05,
"loss": 0.2366,
"step": 129
},
{
"epoch": 0.39,
"learning_rate": 1.9500610133263434e-05,
"loss": 0.2473,
"step": 130
},
{
"epoch": 0.39,
"learning_rate": 1.949055747010669e-05,
"loss": 0.2315,
"step": 131
},
{
"epoch": 0.39,
"learning_rate": 1.9480407276787968e-05,
"loss": 0.2433,
"step": 132
},
{
"epoch": 0.39,
"learning_rate": 1.9470159657616214e-05,
"loss": 0.2484,
"step": 133
},
{
"epoch": 0.4,
"learning_rate": 1.945981471790157e-05,
"loss": 0.2259,
"step": 134
},
{
"epoch": 0.4,
"learning_rate": 1.9449372563954293e-05,
"loss": 0.286,
"step": 135
},
{
"epoch": 0.4,
"learning_rate": 1.9438833303083677e-05,
"loss": 0.2275,
"step": 136
},
{
"epoch": 0.41,
"learning_rate": 1.942819704359693e-05,
"loss": 0.224,
"step": 137
},
{
"epoch": 0.41,
"learning_rate": 1.9417463894798078e-05,
"loss": 0.2222,
"step": 138
},
{
"epoch": 0.41,
"learning_rate": 1.9406633966986828e-05,
"loss": 0.2599,
"step": 139
},
{
"epoch": 0.42,
"learning_rate": 1.9395707371457443e-05,
"loss": 0.2371,
"step": 140
},
{
"epoch": 0.42,
"learning_rate": 1.9384684220497605e-05,
"loss": 0.2313,
"step": 141
},
{
"epoch": 0.42,
"learning_rate": 1.9373564627387243e-05,
"loss": 0.2351,
"step": 142
},
{
"epoch": 0.42,
"learning_rate": 1.9362348706397374e-05,
"loss": 0.2348,
"step": 143
},
{
"epoch": 0.43,
"learning_rate": 1.9351036572788948e-05,
"loss": 0.2301,
"step": 144
},
{
"epoch": 0.43,
"learning_rate": 1.9339628342811635e-05,
"loss": 0.2453,
"step": 145
},
{
"epoch": 0.43,
"learning_rate": 1.932812413370265e-05,
"loss": 0.2554,
"step": 146
},
{
"epoch": 0.44,
"learning_rate": 1.9316524063685544e-05,
"loss": 0.233,
"step": 147
},
{
"epoch": 0.44,
"learning_rate": 1.930482825196898e-05,
"loss": 0.2414,
"step": 148
},
{
"epoch": 0.44,
"learning_rate": 1.929303681874552e-05,
"loss": 0.2656,
"step": 149
},
{
"epoch": 0.45,
"learning_rate": 1.928114988519039e-05,
"loss": 0.2418,
"step": 150
},
{
"epoch": 0.45,
"learning_rate": 1.926916757346022e-05,
"loss": 0.2567,
"step": 151
},
{
"epoch": 0.45,
"learning_rate": 1.92570900066918e-05,
"loss": 0.2124,
"step": 152
},
{
"epoch": 0.45,
"learning_rate": 1.9244917309000817e-05,
"loss": 0.227,
"step": 153
},
{
"epoch": 0.46,
"learning_rate": 1.9232649605480574e-05,
"loss": 0.2257,
"step": 154
},
{
"epoch": 0.46,
"learning_rate": 1.9220287022200707e-05,
"loss": 0.2139,
"step": 155
},
{
"epoch": 0.46,
"learning_rate": 1.9207829686205882e-05,
"loss": 0.2292,
"step": 156
},
{
"epoch": 0.47,
"learning_rate": 1.919527772551451e-05,
"loss": 0.2665,
"step": 157
},
{
"epoch": 0.47,
"learning_rate": 1.9182631269117397e-05,
"loss": 0.2325,
"step": 158
},
{
"epoch": 0.47,
"learning_rate": 1.9169890446976454e-05,
"loss": 0.2331,
"step": 159
},
{
"epoch": 0.47,
"learning_rate": 1.915705539002334e-05,
"loss": 0.2448,
"step": 160
},
{
"epoch": 0.48,
"learning_rate": 1.9144126230158127e-05,
"loss": 0.2489,
"step": 161
},
{
"epoch": 0.48,
"learning_rate": 1.9131103100247934e-05,
"loss": 0.22,
"step": 162
},
{
"epoch": 0.48,
"learning_rate": 1.911798613412557e-05,
"loss": 0.2499,
"step": 163
},
{
"epoch": 0.49,
"learning_rate": 1.9104775466588162e-05,
"loss": 0.2359,
"step": 164
},
{
"epoch": 0.49,
"learning_rate": 1.909147123339575e-05,
"loss": 0.2454,
"step": 165
},
{
"epoch": 0.49,
"learning_rate": 1.9078073571269922e-05,
"loss": 0.2349,
"step": 166
},
{
"epoch": 0.5,
"learning_rate": 1.9064582617892383e-05,
"loss": 0.217,
"step": 167
},
{
"epoch": 0.5,
"learning_rate": 1.905099851190355e-05,
"loss": 0.2653,
"step": 168
},
{
"epoch": 0.5,
"learning_rate": 1.9037321392901134e-05,
"loss": 0.2439,
"step": 169
},
{
"epoch": 0.5,
"learning_rate": 1.9023551401438693e-05,
"loss": 0.2422,
"step": 170
},
{
"epoch": 0.51,
"learning_rate": 1.900968867902419e-05,
"loss": 0.2172,
"step": 171
},
{
"epoch": 0.51,
"learning_rate": 1.8995733368118556e-05,
"loss": 0.2619,
"step": 172
},
{
"epoch": 0.51,
"learning_rate": 1.898168561213419e-05,
"loss": 0.2424,
"step": 173
},
{
"epoch": 0.52,
"learning_rate": 1.8967545555433535e-05,
"loss": 0.2477,
"step": 174
},
{
"epoch": 0.52,
"learning_rate": 1.895331334332753e-05,
"loss": 0.3012,
"step": 175
},
{
"epoch": 0.52,
"learning_rate": 1.8938989122074195e-05,
"loss": 0.2277,
"step": 176
},
{
"epoch": 0.53,
"learning_rate": 1.8924573038877062e-05,
"loss": 0.2739,
"step": 177
},
{
"epoch": 0.53,
"learning_rate": 1.891006524188368e-05,
"loss": 0.2405,
"step": 178
},
{
"epoch": 0.53,
"learning_rate": 1.889546588018412e-05,
"loss": 0.2445,
"step": 179
},
{
"epoch": 0.53,
"learning_rate": 1.888077510380941e-05,
"loss": 0.2223,
"step": 180
},
{
"epoch": 0.54,
"learning_rate": 1.8865993063730003e-05,
"loss": 0.223,
"step": 181
},
{
"epoch": 0.54,
"learning_rate": 1.8851119911854236e-05,
"loss": 0.2569,
"step": 182
},
{
"epoch": 0.54,
"learning_rate": 1.8836155801026754e-05,
"loss": 0.1956,
"step": 183
},
{
"epoch": 0.55,
"learning_rate": 1.882110088502695e-05,
"loss": 0.2253,
"step": 184
},
{
"epoch": 0.55,
"learning_rate": 1.880595531856738e-05,
"loss": 0.2289,
"step": 185
},
{
"epoch": 0.55,
"learning_rate": 1.8790719257292175e-05,
"loss": 0.2309,
"step": 186
},
{
"epoch": 0.55,
"learning_rate": 1.877539285777543e-05,
"loss": 0.2177,
"step": 187
},
{
"epoch": 0.56,
"learning_rate": 1.875997627751962e-05,
"loss": 0.2486,
"step": 188
},
{
"epoch": 0.56,
"learning_rate": 1.8744469674953957e-05,
"loss": 0.235,
"step": 189
},
{
"epoch": 0.56,
"learning_rate": 1.8728873209432778e-05,
"loss": 0.2218,
"step": 190
},
{
"epoch": 0.57,
"learning_rate": 1.8713187041233896e-05,
"loss": 0.2204,
"step": 191
},
{
"epoch": 0.57,
"learning_rate": 1.8697411331556958e-05,
"loss": 0.2261,
"step": 192
},
{
"epoch": 0.57,
"learning_rate": 1.8681546242521785e-05,
"loss": 0.2365,
"step": 193
},
{
"epoch": 0.58,
"learning_rate": 1.866559193716672e-05,
"loss": 0.2041,
"step": 194
},
{
"epoch": 0.58,
"learning_rate": 1.8649548579446938e-05,
"loss": 0.2162,
"step": 195
},
{
"epoch": 0.58,
"learning_rate": 1.8633416334232754e-05,
"loss": 0.2906,
"step": 196
},
{
"epoch": 0.58,
"learning_rate": 1.861719536730795e-05,
"loss": 0.2097,
"step": 197
},
{
"epoch": 0.59,
"learning_rate": 1.8600885845368066e-05,
"loss": 0.2627,
"step": 198
},
{
"epoch": 0.59,
"learning_rate": 1.8584487936018663e-05,
"loss": 0.2259,
"step": 199
},
{
"epoch": 0.59,
"learning_rate": 1.8568001807773636e-05,
"loss": 0.2406,
"step": 200
},
{
"epoch": 0.6,
"learning_rate": 1.8551427630053464e-05,
"loss": 0.2333,
"step": 201
},
{
"epoch": 0.6,
"learning_rate": 1.853476557318346e-05,
"loss": 0.2382,
"step": 202
},
{
"epoch": 0.6,
"learning_rate": 1.8518015808392045e-05,
"loss": 0.2057,
"step": 203
},
{
"epoch": 0.61,
"learning_rate": 1.8501178507808962e-05,
"loss": 0.2401,
"step": 204
},
{
"epoch": 0.61,
"learning_rate": 1.8484253844463527e-05,
"loss": 0.2819,
"step": 205
},
{
"epoch": 0.61,
"learning_rate": 1.8467241992282842e-05,
"loss": 0.2465,
"step": 206
},
{
"epoch": 0.61,
"learning_rate": 1.8450143126090015e-05,
"loss": 0.2202,
"step": 207
},
{
"epoch": 0.62,
"learning_rate": 1.8432957421602342e-05,
"loss": 0.2308,
"step": 208
},
{
"epoch": 0.62,
"learning_rate": 1.8415685055429534e-05,
"loss": 0.2387,
"step": 209
},
{
"epoch": 0.62,
"learning_rate": 1.8398326205071875e-05,
"loss": 0.2055,
"step": 210
},
{
"epoch": 0.63,
"learning_rate": 1.8380881048918406e-05,
"loss": 0.2126,
"step": 211
},
{
"epoch": 0.63,
"learning_rate": 1.8363349766245108e-05,
"loss": 0.2494,
"step": 212
},
{
"epoch": 0.63,
"learning_rate": 1.834573253721303e-05,
"loss": 0.213,
"step": 213
},
{
"epoch": 0.64,
"learning_rate": 1.8328029542866456e-05,
"loss": 0.2331,
"step": 214
},
{
"epoch": 0.64,
"learning_rate": 1.831024096513104e-05,
"loss": 0.2249,
"step": 215
},
{
"epoch": 0.64,
"learning_rate": 1.8292366986811952e-05,
"loss": 0.2174,
"step": 216
},
{
"epoch": 0.64,
"learning_rate": 1.8274407791591966e-05,
"loss": 0.2401,
"step": 217
},
{
"epoch": 0.65,
"learning_rate": 1.8256363564029603e-05,
"loss": 0.211,
"step": 218
},
{
"epoch": 0.65,
"learning_rate": 1.8238234489557217e-05,
"loss": 0.2181,
"step": 219
},
{
"epoch": 0.65,
"learning_rate": 1.8220020754479104e-05,
"loss": 0.2475,
"step": 220
},
{
"epoch": 0.66,
"learning_rate": 1.820172254596956e-05,
"loss": 0.2369,
"step": 221
},
{
"epoch": 0.66,
"learning_rate": 1.8183340052071e-05,
"loss": 0.2619,
"step": 222
},
{
"epoch": 0.66,
"learning_rate": 1.8164873461691987e-05,
"loss": 0.2523,
"step": 223
},
{
"epoch": 0.66,
"learning_rate": 1.814632296460531e-05,
"loss": 0.2162,
"step": 224
},
{
"epoch": 0.67,
"learning_rate": 1.8127688751446028e-05,
"loss": 0.2395,
"step": 225
},
{
"epoch": 0.67,
"learning_rate": 1.8108971013709512e-05,
"loss": 0.2605,
"step": 226
},
{
"epoch": 0.67,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.2455,
"step": 227
},
{
"epoch": 0.68,
"learning_rate": 1.8071285734776006e-05,
"loss": 0.1979,
"step": 228
},
{
"epoch": 0.68,
"learning_rate": 1.8052318580853564e-05,
"loss": 0.2281,
"step": 229
},
{
"epoch": 0.68,
"learning_rate": 1.803326867689901e-05,
"loss": 0.2125,
"step": 230
},
{
"epoch": 0.69,
"learning_rate": 1.8014136218679566e-05,
"loss": 0.2357,
"step": 231
},
{
"epoch": 0.69,
"learning_rate": 1.7994921402810862e-05,
"loss": 0.2636,
"step": 232
},
{
"epoch": 0.69,
"learning_rate": 1.7975624426754847e-05,
"loss": 0.236,
"step": 233
},
{
"epoch": 0.69,
"learning_rate": 1.795624548881781e-05,
"loss": 0.2148,
"step": 234
},
{
"epoch": 0.7,
"learning_rate": 1.793678478814833e-05,
"loss": 0.2252,
"step": 235
},
{
"epoch": 0.7,
"learning_rate": 1.79172425247352e-05,
"loss": 0.1993,
"step": 236
},
{
"epoch": 0.7,
"learning_rate": 1.7897618899405423e-05,
"loss": 0.2338,
"step": 237
},
{
"epoch": 0.71,
"learning_rate": 1.7877914113822114e-05,
"loss": 0.2327,
"step": 238
},
{
"epoch": 0.71,
"learning_rate": 1.7858128370482427e-05,
"loss": 0.2101,
"step": 239
},
{
"epoch": 0.71,
"learning_rate": 1.7838261872715486e-05,
"loss": 0.2053,
"step": 240
},
{
"epoch": 0.72,
"learning_rate": 1.78183148246803e-05,
"loss": 0.215,
"step": 241
},
{
"epoch": 0.72,
"learning_rate": 1.7798287431363644e-05,
"loss": 0.23,
"step": 242
},
{
"epoch": 0.72,
"learning_rate": 1.7778179898577973e-05,
"loss": 0.2125,
"step": 243
},
{
"epoch": 0.72,
"learning_rate": 1.7757992432959302e-05,
"loss": 0.2345,
"step": 244
},
{
"epoch": 0.73,
"learning_rate": 1.7737725241965068e-05,
"loss": 0.2242,
"step": 245
},
{
"epoch": 0.73,
"learning_rate": 1.771737853387202e-05,
"loss": 0.2115,
"step": 246
},
{
"epoch": 0.73,
"learning_rate": 1.769695251777406e-05,
"loss": 0.2035,
"step": 247
},
{
"epoch": 0.74,
"learning_rate": 1.7676447403580114e-05,
"loss": 0.2445,
"step": 248
},
{
"epoch": 0.74,
"learning_rate": 1.7655863402011947e-05,
"loss": 0.228,
"step": 249
},
{
"epoch": 0.74,
"learning_rate": 1.7635200724602036e-05,
"loss": 0.2122,
"step": 250
},
{
"epoch": 0.74,
"learning_rate": 1.7614459583691346e-05,
"loss": 0.2416,
"step": 251
},
{
"epoch": 0.75,
"learning_rate": 1.75936401924272e-05,
"loss": 0.2078,
"step": 252
},
{
"epoch": 0.75,
"learning_rate": 1.7572742764761054e-05,
"loss": 0.2297,
"step": 253
},
{
"epoch": 0.75,
"learning_rate": 1.755176751544631e-05,
"loss": 0.2206,
"step": 254
},
{
"epoch": 0.76,
"learning_rate": 1.7530714660036112e-05,
"loss": 0.2213,
"step": 255
},
{
"epoch": 0.76,
"learning_rate": 1.7509584414881114e-05,
"loss": 0.2514,
"step": 256
},
{
"epoch": 0.76,
"learning_rate": 1.748837699712728e-05,
"loss": 0.2235,
"step": 257
},
{
"epoch": 0.77,
"learning_rate": 1.7467092624713652e-05,
"loss": 0.2258,
"step": 258
},
{
"epoch": 0.77,
"learning_rate": 1.744573151637007e-05,
"loss": 0.228,
"step": 259
},
{
"epoch": 0.77,
"learning_rate": 1.7424293891614988e-05,
"loss": 0.2215,
"step": 260
},
{
"epoch": 0.77,
"learning_rate": 1.7402779970753156e-05,
"loss": 0.2077,
"step": 261
},
{
"epoch": 0.78,
"learning_rate": 1.738118997487341e-05,
"loss": 0.2129,
"step": 262
},
{
"epoch": 0.78,
"learning_rate": 1.7359524125846353e-05,
"loss": 0.2223,
"step": 263
},
{
"epoch": 0.78,
"learning_rate": 1.7337782646322106e-05,
"loss": 0.2628,
"step": 264
},
{
"epoch": 0.79,
"learning_rate": 1.7315965759728014e-05,
"loss": 0.2176,
"step": 265
},
{
"epoch": 0.79,
"learning_rate": 1.7294073690266343e-05,
"loss": 0.2521,
"step": 266
},
{
"epoch": 0.79,
"learning_rate": 1.7272106662911972e-05,
"loss": 0.2021,
"step": 267
},
{
"epoch": 0.8,
"learning_rate": 1.7250064903410106e-05,
"loss": 0.2487,
"step": 268
},
{
"epoch": 0.8,
"learning_rate": 1.7227948638273918e-05,
"loss": 0.2398,
"step": 269
},
{
"epoch": 0.8,
"learning_rate": 1.7205758094782254e-05,
"loss": 0.2032,
"step": 270
},
{
"epoch": 0.8,
"learning_rate": 1.7183493500977277e-05,
"loss": 0.219,
"step": 271
},
{
"epoch": 0.81,
"learning_rate": 1.7161155085662144e-05,
"loss": 0.2011,
"step": 272
},
{
"epoch": 0.81,
"learning_rate": 1.7138743078398632e-05,
"loss": 0.216,
"step": 273
},
{
"epoch": 0.81,
"learning_rate": 1.7116257709504795e-05,
"loss": 0.2035,
"step": 274
},
{
"epoch": 0.82,
"learning_rate": 1.709369921005258e-05,
"loss": 0.2134,
"step": 275
},
{
"epoch": 0.82,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.2216,
"step": 276
},
{
"epoch": 0.82,
"learning_rate": 1.704836374751612e-05,
"loss": 0.2135,
"step": 277
},
{
"epoch": 0.82,
"learning_rate": 1.7025587250323894e-05,
"loss": 0.1954,
"step": 278
},
{
"epoch": 0.83,
"learning_rate": 1.700273855435255e-05,
"loss": 0.2289,
"step": 279
},
{
"epoch": 0.83,
"learning_rate": 1.6979817894407794e-05,
"loss": 0.2037,
"step": 280
},
{
"epoch": 0.83,
"learning_rate": 1.6956825506034866e-05,
"loss": 0.2189,
"step": 281
},
{
"epoch": 0.84,
"learning_rate": 1.693376162551613e-05,
"loss": 0.2252,
"step": 282
},
{
"epoch": 0.84,
"learning_rate": 1.691062648986865e-05,
"loss": 0.222,
"step": 283
},
{
"epoch": 0.84,
"learning_rate": 1.6887420336841732e-05,
"loss": 0.202,
"step": 284
},
{
"epoch": 0.85,
"learning_rate": 1.6864143404914506e-05,
"loss": 0.2092,
"step": 285
},
{
"epoch": 0.85,
"learning_rate": 1.6840795933293464e-05,
"loss": 0.2261,
"step": 286
},
{
"epoch": 0.85,
"learning_rate": 1.6817378161909995e-05,
"loss": 0.2056,
"step": 287
},
{
"epoch": 0.85,
"learning_rate": 1.6793890331417942e-05,
"loss": 0.2364,
"step": 288
},
{
"epoch": 0.86,
"learning_rate": 1.67703326831911e-05,
"loss": 0.1831,
"step": 289
},
{
"epoch": 0.86,
"learning_rate": 1.6746705459320746e-05,
"loss": 0.2128,
"step": 290
},
{
"epoch": 0.86,
"learning_rate": 1.672300890261317e-05,
"loss": 0.2097,
"step": 291
},
{
"epoch": 0.87,
"learning_rate": 1.6699243256587156e-05,
"loss": 0.213,
"step": 292
},
{
"epoch": 0.87,
"learning_rate": 1.6675408765471482e-05,
"loss": 0.2289,
"step": 293
},
{
"epoch": 0.87,
"learning_rate": 1.6651505674202425e-05,
"loss": 0.2507,
"step": 294
},
{
"epoch": 0.88,
"learning_rate": 1.662753422842123e-05,
"loss": 0.2063,
"step": 295
},
{
"epoch": 0.88,
"learning_rate": 1.6603494674471595e-05,
"loss": 0.2669,
"step": 296
},
{
"epoch": 0.88,
"learning_rate": 1.657938725939713e-05,
"loss": 0.2287,
"step": 297
},
{
"epoch": 0.88,
"learning_rate": 1.655521223093882e-05,
"loss": 0.2881,
"step": 298
},
{
"epoch": 0.89,
"learning_rate": 1.6530969837532487e-05,
"loss": 0.218,
"step": 299
},
{
"epoch": 0.89,
"learning_rate": 1.6506660328306236e-05,
"loss": 0.2186,
"step": 300
},
{
"epoch": 0.89,
"learning_rate": 1.6482283953077887e-05,
"loss": 0.2173,
"step": 301
},
{
"epoch": 0.9,
"learning_rate": 1.6457840962352403e-05,
"loss": 0.2316,
"step": 302
},
{
"epoch": 0.9,
"learning_rate": 1.6433331607319342e-05,
"loss": 0.2361,
"step": 303
},
{
"epoch": 0.9,
"learning_rate": 1.6408756139850243e-05,
"loss": 0.2374,
"step": 304
},
{
"epoch": 0.91,
"learning_rate": 1.6384114812496056e-05,
"loss": 0.2294,
"step": 305
},
{
"epoch": 0.91,
"learning_rate": 1.635940787848455e-05,
"loss": 0.2091,
"step": 306
},
{
"epoch": 0.91,
"learning_rate": 1.63346355917177e-05,
"loss": 0.2107,
"step": 307
},
{
"epoch": 0.91,
"learning_rate": 1.6309798206769073e-05,
"loss": 0.2242,
"step": 308
},
{
"epoch": 0.92,
"learning_rate": 1.6284895978881235e-05,
"loss": 0.2374,
"step": 309
},
{
"epoch": 0.92,
"learning_rate": 1.6259929163963108e-05,
"loss": 0.2177,
"step": 310
},
{
"epoch": 0.92,
"learning_rate": 1.6234898018587336e-05,
"loss": 0.208,
"step": 311
},
{
"epoch": 0.93,
"learning_rate": 1.6209802799987674e-05,
"loss": 0.2236,
"step": 312
},
{
"epoch": 0.93,
"learning_rate": 1.6184643766056315e-05,
"loss": 0.2173,
"step": 313
},
{
"epoch": 0.93,
"learning_rate": 1.6159421175341265e-05,
"loss": 0.2275,
"step": 314
},
{
"epoch": 0.93,
"learning_rate": 1.6134135287043668e-05,
"loss": 0.2074,
"step": 315
},
{
"epoch": 0.94,
"learning_rate": 1.6108786361015145e-05,
"loss": 0.2398,
"step": 316
},
{
"epoch": 0.94,
"learning_rate": 1.6083374657755132e-05,
"loss": 0.187,
"step": 317
},
{
"epoch": 0.94,
"learning_rate": 1.60579004384082e-05,
"loss": 0.1895,
"step": 318
},
{
"epoch": 0.95,
"learning_rate": 1.6032363964761363e-05,
"loss": 0.2363,
"step": 319
},
{
"epoch": 0.95,
"learning_rate": 1.6006765499241407e-05,
"loss": 0.2346,
"step": 320
},
{
"epoch": 0.95,
"learning_rate": 1.598110530491216e-05,
"loss": 0.2223,
"step": 321
},
{
"epoch": 0.96,
"learning_rate": 1.5955383645471828e-05,
"loss": 0.1944,
"step": 322
},
{
"epoch": 0.96,
"learning_rate": 1.592960078525026e-05,
"loss": 0.2239,
"step": 323
},
{
"epoch": 0.96,
"learning_rate": 1.5903756989206233e-05,
"loss": 0.1962,
"step": 324
},
{
"epoch": 0.96,
"learning_rate": 1.5877852522924733e-05,
"loss": 0.2071,
"step": 325
},
{
"epoch": 0.97,
"learning_rate": 1.5851887652614238e-05,
"loss": 0.2117,
"step": 326
},
{
"epoch": 0.97,
"learning_rate": 1.5825862645103962e-05,
"loss": 0.2055,
"step": 327
},
{
"epoch": 0.97,
"learning_rate": 1.579977776784112e-05,
"loss": 0.2455,
"step": 328
},
{
"epoch": 0.98,
"learning_rate": 1.5773633288888198e-05,
"loss": 0.2056,
"step": 329
},
{
"epoch": 0.98,
"learning_rate": 1.574742947692015e-05,
"loss": 0.2357,
"step": 330
},
{
"epoch": 0.98,
"learning_rate": 1.5721166601221697e-05,
"loss": 0.2344,
"step": 331
},
{
"epoch": 0.99,
"learning_rate": 1.569484493168452e-05,
"loss": 0.2336,
"step": 332
},
{
"epoch": 0.99,
"learning_rate": 1.56684647388045e-05,
"loss": 0.2018,
"step": 333
},
{
"epoch": 0.99,
"learning_rate": 1.5642026293678935e-05,
"loss": 0.1912,
"step": 334
},
{
"epoch": 0.99,
"learning_rate": 1.561552986800375e-05,
"loss": 0.2155,
"step": 335
},
{
"epoch": 1.0,
"learning_rate": 1.5588975734070717e-05,
"loss": 0.2015,
"step": 336
},
{
"epoch": 1.0,
"learning_rate": 1.556236416476465e-05,
"loss": 0.1918,
"step": 337
},
{
"epoch": 1.0,
"learning_rate": 1.5535695433560594e-05,
"loss": 0.2007,
"step": 338
},
{
"epoch": 1.01,
"learning_rate": 1.5508969814521026e-05,
"loss": 0.1973,
"step": 339
},
{
"epoch": 1.01,
"learning_rate": 1.5482187582293033e-05,
"loss": 0.186,
"step": 340
},
{
"epoch": 1.01,
"learning_rate": 1.5455349012105488e-05,
"loss": 0.1876,
"step": 341
},
{
"epoch": 1.01,
"learning_rate": 1.5428454379766223e-05,
"loss": 0.1989,
"step": 342
},
{
"epoch": 1.02,
"learning_rate": 1.5401503961659202e-05,
"loss": 0.2025,
"step": 343
},
{
"epoch": 1.02,
"learning_rate": 1.5374498034741664e-05,
"loss": 0.2163,
"step": 344
},
{
"epoch": 1.02,
"learning_rate": 1.5347436876541298e-05,
"loss": 0.1935,
"step": 345
},
{
"epoch": 1.03,
"learning_rate": 1.5320320765153367e-05,
"loss": 0.2044,
"step": 346
},
{
"epoch": 1.03,
"learning_rate": 1.5293149979237875e-05,
"loss": 0.2094,
"step": 347
},
{
"epoch": 1.03,
"learning_rate": 1.5265924798016683e-05,
"loss": 0.1902,
"step": 348
},
{
"epoch": 1.04,
"learning_rate": 1.5238645501270654e-05,
"loss": 0.1955,
"step": 349
},
{
"epoch": 1.04,
"learning_rate": 1.521131236933677e-05,
"loss": 0.2061,
"step": 350
},
{
"epoch": 1.04,
"learning_rate": 1.5183925683105254e-05,
"loss": 0.2195,
"step": 351
},
{
"epoch": 1.04,
"learning_rate": 1.5156485724016672e-05,
"loss": 0.202,
"step": 352
},
{
"epoch": 1.05,
"learning_rate": 1.5128992774059063e-05,
"loss": 0.2279,
"step": 353
},
{
"epoch": 1.05,
"learning_rate": 1.5101447115765027e-05,
"loss": 0.2037,
"step": 354
},
{
"epoch": 1.05,
"learning_rate": 1.5073849032208823e-05,
"loss": 0.2323,
"step": 355
},
{
"epoch": 1.06,
"learning_rate": 1.504619880700346e-05,
"loss": 0.2095,
"step": 356
},
{
"epoch": 1.06,
"learning_rate": 1.5018496724297778e-05,
"loss": 0.2149,
"step": 357
},
{
"epoch": 1.06,
"learning_rate": 1.499074306877354e-05,
"loss": 0.1879,
"step": 358
},
{
"epoch": 1.07,
"learning_rate": 1.4962938125642504e-05,
"loss": 0.2019,
"step": 359
},
{
"epoch": 1.07,
"learning_rate": 1.493508218064347e-05,
"loss": 0.1932,
"step": 360
},
{
"epoch": 1.07,
"learning_rate": 1.4907175520039381e-05,
"loss": 0.1941,
"step": 361
},
{
"epoch": 1.07,
"learning_rate": 1.4879218430614346e-05,
"loss": 0.2021,
"step": 362
},
{
"epoch": 1.08,
"learning_rate": 1.485121119967072e-05,
"loss": 0.1967,
"step": 363
},
{
"epoch": 1.08,
"learning_rate": 1.4823154115026132e-05,
"loss": 0.1899,
"step": 364
},
{
"epoch": 1.08,
"learning_rate": 1.479504746501054e-05,
"loss": 0.1945,
"step": 365
},
{
"epoch": 1.09,
"learning_rate": 1.4766891538463255e-05,
"loss": 0.2061,
"step": 366
},
{
"epoch": 1.09,
"learning_rate": 1.4738686624729987e-05,
"loss": 0.1947,
"step": 367
},
{
"epoch": 1.09,
"learning_rate": 1.4710433013659866e-05,
"loss": 0.213,
"step": 368
},
{
"epoch": 1.09,
"learning_rate": 1.468213099560246e-05,
"loss": 0.1873,
"step": 369
},
{
"epoch": 1.1,
"learning_rate": 1.4653780861404791e-05,
"loss": 0.1849,
"step": 370
},
{
"epoch": 1.1,
"learning_rate": 1.4625382902408356e-05,
"loss": 0.1943,
"step": 371
},
{
"epoch": 1.1,
"learning_rate": 1.4596937410446117e-05,
"loss": 0.2039,
"step": 372
},
{
"epoch": 1.11,
"learning_rate": 1.4568444677839517e-05,
"loss": 0.193,
"step": 373
},
{
"epoch": 1.11,
"learning_rate": 1.4539904997395468e-05,
"loss": 0.2072,
"step": 374
},
{
"epoch": 1.11,
"learning_rate": 1.4511318662403347e-05,
"loss": 0.1674,
"step": 375
},
{
"epoch": 1.12,
"learning_rate": 1.448268596663197e-05,
"loss": 0.2044,
"step": 376
},
{
"epoch": 1.12,
"learning_rate": 1.4454007204326592e-05,
"loss": 0.2063,
"step": 377
},
{
"epoch": 1.12,
"learning_rate": 1.4425282670205864e-05,
"loss": 0.197,
"step": 378
},
{
"epoch": 1.12,
"learning_rate": 1.4396512659458824e-05,
"loss": 0.1597,
"step": 379
},
{
"epoch": 1.13,
"learning_rate": 1.4367697467741834e-05,
"loss": 0.1955,
"step": 380
},
{
"epoch": 1.13,
"learning_rate": 1.4338837391175582e-05,
"loss": 0.191,
"step": 381
},
{
"epoch": 1.13,
"learning_rate": 1.4309932726342007e-05,
"loss": 0.1781,
"step": 382
},
{
"epoch": 1.14,
"learning_rate": 1.4280983770281258e-05,
"loss": 0.1903,
"step": 383
},
{
"epoch": 1.14,
"learning_rate": 1.4251990820488648e-05,
"loss": 0.2112,
"step": 384
},
{
"epoch": 1.14,
"learning_rate": 1.42229541749116e-05,
"loss": 0.1956,
"step": 385
},
{
"epoch": 1.15,
"learning_rate": 1.419387413194657e-05,
"loss": 0.1889,
"step": 386
},
{
"epoch": 1.15,
"learning_rate": 1.4164750990435991e-05,
"loss": 0.1825,
"step": 387
},
{
"epoch": 1.15,
"learning_rate": 1.4135585049665207e-05,
"loss": 0.2006,
"step": 388
},
{
"epoch": 1.15,
"learning_rate": 1.4106376609359382e-05,
"loss": 0.1683,
"step": 389
},
{
"epoch": 1.16,
"learning_rate": 1.4077125969680433e-05,
"loss": 0.2107,
"step": 390
},
{
"epoch": 1.16,
"learning_rate": 1.4047833431223938e-05,
"loss": 0.1856,
"step": 391
},
{
"epoch": 1.16,
"learning_rate": 1.4018499295016057e-05,
"loss": 0.1967,
"step": 392
},
{
"epoch": 1.17,
"learning_rate": 1.3989123862510419e-05,
"loss": 0.2089,
"step": 393
},
{
"epoch": 1.17,
"learning_rate": 1.3959707435585045e-05,
"loss": 0.2095,
"step": 394
},
{
"epoch": 1.17,
"learning_rate": 1.3930250316539237e-05,
"loss": 0.1879,
"step": 395
},
{
"epoch": 1.18,
"learning_rate": 1.390075280809047e-05,
"loss": 0.2128,
"step": 396
},
{
"epoch": 1.18,
"learning_rate": 1.3871215213371284e-05,
"loss": 0.1864,
"step": 397
},
{
"epoch": 1.18,
"learning_rate": 1.3841637835926161e-05,
"loss": 0.2121,
"step": 398
},
{
"epoch": 1.18,
"learning_rate": 1.3812020979708418e-05,
"loss": 0.1948,
"step": 399
},
{
"epoch": 1.19,
"learning_rate": 1.3782364949077078e-05,
"loss": 0.2055,
"step": 400
},
{
"epoch": 1.19,
"learning_rate": 1.3752670048793744e-05,
"loss": 0.2035,
"step": 401
},
{
"epoch": 1.19,
"learning_rate": 1.3722936584019453e-05,
"loss": 0.1856,
"step": 402
},
{
"epoch": 1.2,
"learning_rate": 1.3693164860311565e-05,
"loss": 0.1825,
"step": 403
},
{
"epoch": 1.2,
"learning_rate": 1.3663355183620604e-05,
"loss": 0.1991,
"step": 404
},
{
"epoch": 1.2,
"learning_rate": 1.3633507860287116e-05,
"loss": 0.218,
"step": 405
},
{
"epoch": 1.2,
"learning_rate": 1.3603623197038536e-05,
"loss": 0.1819,
"step": 406
},
{
"epoch": 1.21,
"learning_rate": 1.3573701500986012e-05,
"loss": 0.1707,
"step": 407
},
{
"epoch": 1.21,
"learning_rate": 1.3543743079621266e-05,
"loss": 0.215,
"step": 408
},
{
"epoch": 1.21,
"learning_rate": 1.3513748240813429e-05,
"loss": 0.1969,
"step": 409
},
{
"epoch": 1.22,
"learning_rate": 1.3483717292805882e-05,
"loss": 0.1956,
"step": 410
},
{
"epoch": 1.22,
"learning_rate": 1.3453650544213078e-05,
"loss": 0.2245,
"step": 411
},
{
"epoch": 1.22,
"learning_rate": 1.342354830401738e-05,
"loss": 0.2196,
"step": 412
},
{
"epoch": 1.23,
"learning_rate": 1.3393410881565878e-05,
"loss": 0.227,
"step": 413
},
{
"epoch": 1.23,
"learning_rate": 1.336323858656722e-05,
"loss": 0.1788,
"step": 414
},
{
"epoch": 1.23,
"learning_rate": 1.333303172908842e-05,
"loss": 0.1956,
"step": 415
},
{
"epoch": 1.23,
"learning_rate": 1.3302790619551673e-05,
"loss": 0.2028,
"step": 416
},
{
"epoch": 1.24,
"learning_rate": 1.327251556873117e-05,
"loss": 0.248,
"step": 417
},
{
"epoch": 1.24,
"learning_rate": 1.32422068877499e-05,
"loss": 0.2031,
"step": 418
},
{
"epoch": 1.24,
"learning_rate": 1.3211864888076458e-05,
"loss": 0.1804,
"step": 419
},
{
"epoch": 1.25,
"learning_rate": 1.3181489881521829e-05,
"loss": 0.2046,
"step": 420
},
{
"epoch": 1.25,
"learning_rate": 1.315108218023621e-05,
"loss": 0.1965,
"step": 421
},
{
"epoch": 1.25,
"learning_rate": 1.3120642096705773e-05,
"loss": 0.1865,
"step": 422
},
{
"epoch": 1.26,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.2038,
"step": 423
},
{
"epoch": 1.26,
"learning_rate": 1.3059666034515839e-05,
"loss": 0.2047,
"step": 424
},
{
"epoch": 1.26,
"learning_rate": 1.3029130682479722e-05,
"loss": 0.1924,
"step": 425
},
{
"epoch": 1.26,
"learning_rate": 1.2998564201439117e-05,
"loss": 0.2054,
"step": 426
},
{
"epoch": 1.27,
"learning_rate": 1.2967966905511906e-05,
"loss": 0.2109,
"step": 427
},
{
"epoch": 1.27,
"learning_rate": 1.2937339109132649e-05,
"loss": 0.1987,
"step": 428
},
{
"epoch": 1.27,
"learning_rate": 1.2906681127049339e-05,
"loss": 0.2053,
"step": 429
},
{
"epoch": 1.28,
"learning_rate": 1.2875993274320173e-05,
"loss": 0.2248,
"step": 430
},
{
"epoch": 1.28,
"learning_rate": 1.2845275866310325e-05,
"loss": 0.2563,
"step": 431
},
{
"epoch": 1.28,
"learning_rate": 1.2814529218688688e-05,
"loss": 0.1917,
"step": 432
},
{
"epoch": 1.28,
"learning_rate": 1.2783753647424635e-05,
"loss": 0.1754,
"step": 433
},
{
"epoch": 1.29,
"learning_rate": 1.2752949468784776e-05,
"loss": 0.2259,
"step": 434
},
{
"epoch": 1.29,
"learning_rate": 1.2722116999329712e-05,
"loss": 0.1892,
"step": 435
},
{
"epoch": 1.29,
"learning_rate": 1.2691256555910769e-05,
"loss": 0.1902,
"step": 436
},
{
"epoch": 1.3,
"learning_rate": 1.2660368455666752e-05,
"loss": 0.1936,
"step": 437
},
{
"epoch": 1.3,
"learning_rate": 1.2629453016020681e-05,
"loss": 0.2145,
"step": 438
},
{
"epoch": 1.3,
"learning_rate": 1.259851055467653e-05,
"loss": 0.1962,
"step": 439
},
{
"epoch": 1.31,
"learning_rate": 1.2567541389615965e-05,
"loss": 0.1939,
"step": 440
},
{
"epoch": 1.31,
"learning_rate": 1.2536545839095074e-05,
"loss": 0.1781,
"step": 441
},
{
"epoch": 1.31,
"learning_rate": 1.2505524221641097e-05,
"loss": 0.2123,
"step": 442
},
{
"epoch": 1.31,
"learning_rate": 1.2474476856049145e-05,
"loss": 0.2056,
"step": 443
},
{
"epoch": 1.32,
"learning_rate": 1.2443404061378941e-05,
"loss": 0.2158,
"step": 444
},
{
"epoch": 1.32,
"learning_rate": 1.2412306156951525e-05,
"loss": 0.2192,
"step": 445
},
{
"epoch": 1.32,
"learning_rate": 1.2381183462345983e-05,
"loss": 0.1944,
"step": 446
},
{
"epoch": 1.33,
"learning_rate": 1.2350036297396153e-05,
"loss": 0.2265,
"step": 447
},
{
"epoch": 1.33,
"learning_rate": 1.2318864982187347e-05,
"loss": 0.2102,
"step": 448
},
{
"epoch": 1.33,
"learning_rate": 1.2287669837053055e-05,
"loss": 0.1852,
"step": 449
},
{
"epoch": 1.34,
"learning_rate": 1.2256451182571663e-05,
"loss": 0.1777,
"step": 450
},
{
"epoch": 1.34,
"learning_rate": 1.2225209339563144e-05,
"loss": 0.1881,
"step": 451
},
{
"epoch": 1.34,
"learning_rate": 1.2193944629085778e-05,
"loss": 0.1841,
"step": 452
},
{
"epoch": 1.34,
"learning_rate": 1.2162657372432835e-05,
"loss": 0.2031,
"step": 453
},
{
"epoch": 1.35,
"learning_rate": 1.2131347891129286e-05,
"loss": 0.24,
"step": 454
},
{
"epoch": 1.35,
"learning_rate": 1.2100016506928494e-05,
"loss": 0.1889,
"step": 455
},
{
"epoch": 1.35,
"learning_rate": 1.206866354180891e-05,
"loss": 0.2081,
"step": 456
},
{
"epoch": 1.36,
"learning_rate": 1.2037289317970757e-05,
"loss": 0.203,
"step": 457
},
{
"epoch": 1.36,
"learning_rate": 1.200589415783273e-05,
"loss": 0.2091,
"step": 458
},
{
"epoch": 1.36,
"learning_rate": 1.1974478384028672e-05,
"loss": 0.1858,
"step": 459
},
{
"epoch": 1.36,
"learning_rate": 1.1943042319404272e-05,
"loss": 0.1861,
"step": 460
},
{
"epoch": 1.37,
"learning_rate": 1.1911586287013726e-05,
"loss": 0.2011,
"step": 461
},
{
"epoch": 1.37,
"learning_rate": 1.1880110610116438e-05,
"loss": 0.2251,
"step": 462
},
{
"epoch": 1.37,
"learning_rate": 1.1848615612173689e-05,
"loss": 0.1843,
"step": 463
},
{
"epoch": 1.38,
"learning_rate": 1.1817101616845312e-05,
"loss": 0.2017,
"step": 464
},
{
"epoch": 1.38,
"learning_rate": 1.1785568947986368e-05,
"loss": 0.2193,
"step": 465
},
{
"epoch": 1.38,
"learning_rate": 1.1754017929643818e-05,
"loss": 0.1872,
"step": 466
},
{
"epoch": 1.39,
"learning_rate": 1.172244888605319e-05,
"loss": 0.1698,
"step": 467
},
{
"epoch": 1.39,
"learning_rate": 1.1690862141635253e-05,
"loss": 0.1732,
"step": 468
},
{
"epoch": 1.39,
"learning_rate": 1.165925802099268e-05,
"loss": 0.2203,
"step": 469
},
{
"epoch": 1.39,
"learning_rate": 1.1627636848906703e-05,
"loss": 0.1943,
"step": 470
},
{
"epoch": 1.4,
"learning_rate": 1.1595998950333794e-05,
"loss": 0.1959,
"step": 471
},
{
"epoch": 1.4,
"learning_rate": 1.156434465040231e-05,
"loss": 0.1993,
"step": 472
},
{
"epoch": 1.4,
"learning_rate": 1.1532674274409159e-05,
"loss": 0.19,
"step": 473
},
{
"epoch": 1.41,
"learning_rate": 1.1500988147816461e-05,
"loss": 0.1762,
"step": 474
},
{
"epoch": 1.41,
"learning_rate": 1.1469286596248181e-05,
"loss": 0.1955,
"step": 475
},
{
"epoch": 1.41,
"learning_rate": 1.143756994548682e-05,
"loss": 0.1867,
"step": 476
},
{
"epoch": 1.42,
"learning_rate": 1.140583852147003e-05,
"loss": 0.2102,
"step": 477
},
{
"epoch": 1.42,
"learning_rate": 1.1374092650287297e-05,
"loss": 0.2042,
"step": 478
},
{
"epoch": 1.42,
"learning_rate": 1.1342332658176556e-05,
"loss": 0.1689,
"step": 479
},
{
"epoch": 1.42,
"learning_rate": 1.1310558871520871e-05,
"loss": 0.1948,
"step": 480
},
{
"epoch": 1.43,
"learning_rate": 1.1278771616845061e-05,
"loss": 0.198,
"step": 481
},
{
"epoch": 1.43,
"learning_rate": 1.1246971220812348e-05,
"loss": 0.1882,
"step": 482
},
{
"epoch": 1.43,
"learning_rate": 1.1215158010221005e-05,
"loss": 0.1861,
"step": 483
},
{
"epoch": 1.44,
"learning_rate": 1.1183332312000989e-05,
"loss": 0.2,
"step": 484
},
{
"epoch": 1.44,
"learning_rate": 1.1151494453210596e-05,
"loss": 0.1843,
"step": 485
},
{
"epoch": 1.44,
"learning_rate": 1.1119644761033079e-05,
"loss": 0.1969,
"step": 486
},
{
"epoch": 1.45,
"learning_rate": 1.108778356277331e-05,
"loss": 0.1718,
"step": 487
},
{
"epoch": 1.45,
"learning_rate": 1.1055911185854396e-05,
"loss": 0.1781,
"step": 488
},
{
"epoch": 1.45,
"learning_rate": 1.1024027957814313e-05,
"loss": 0.2047,
"step": 489
},
{
"epoch": 1.45,
"learning_rate": 1.0992134206302576e-05,
"loss": 0.1784,
"step": 490
},
{
"epoch": 1.46,
"learning_rate": 1.0960230259076819e-05,
"loss": 0.192,
"step": 491
},
{
"epoch": 1.46,
"learning_rate": 1.0928316443999462e-05,
"loss": 0.182,
"step": 492
},
{
"epoch": 1.46,
"learning_rate": 1.0896393089034336e-05,
"loss": 0.172,
"step": 493
},
{
"epoch": 1.47,
"learning_rate": 1.0864460522243303e-05,
"loss": 0.1901,
"step": 494
},
{
"epoch": 1.47,
"learning_rate": 1.0832519071782895e-05,
"loss": 0.1964,
"step": 495
},
{
"epoch": 1.47,
"learning_rate": 1.0800569065900935e-05,
"loss": 0.196,
"step": 496
},
{
"epoch": 1.47,
"learning_rate": 1.0768610832933169e-05,
"loss": 0.1814,
"step": 497
},
{
"epoch": 1.48,
"learning_rate": 1.0736644701299884e-05,
"loss": 0.1976,
"step": 498
},
{
"epoch": 1.48,
"learning_rate": 1.070467099950254e-05,
"loss": 0.2129,
"step": 499
},
{
"epoch": 1.48,
"learning_rate": 1.0672690056120398e-05,
"loss": 0.2031,
"step": 500
},
{
"epoch": 1.49,
"learning_rate": 1.064070219980713e-05,
"loss": 0.1937,
"step": 501
},
{
"epoch": 1.49,
"learning_rate": 1.0608707759287454e-05,
"loss": 0.1766,
"step": 502
},
{
"epoch": 1.49,
"learning_rate": 1.0576707063353745e-05,
"loss": 0.1932,
"step": 503
},
{
"epoch": 1.5,
"learning_rate": 1.0544700440862669e-05,
"loss": 0.2211,
"step": 504
},
{
"epoch": 1.5,
"learning_rate": 1.0512688220731793e-05,
"loss": 0.2346,
"step": 505
},
{
"epoch": 1.5,
"learning_rate": 1.0480670731936209e-05,
"loss": 0.201,
"step": 506
},
{
"epoch": 1.5,
"learning_rate": 1.044864830350515e-05,
"loss": 0.1923,
"step": 507
},
{
"epoch": 1.51,
"learning_rate": 1.0416621264518617e-05,
"loss": 0.1918,
"step": 508
},
{
"epoch": 1.51,
"learning_rate": 1.0384589944103984e-05,
"loss": 0.1918,
"step": 509
},
{
"epoch": 1.51,
"learning_rate": 1.0352554671432635e-05,
"loss": 0.1892,
"step": 510
},
{
"epoch": 1.52,
"learning_rate": 1.0320515775716556e-05,
"loss": 0.184,
"step": 511
},
{
"epoch": 1.52,
"learning_rate": 1.028847358620497e-05,
"loss": 0.1872,
"step": 512
},
{
"epoch": 1.52,
"learning_rate": 1.0256428432180955e-05,
"loss": 0.1958,
"step": 513
},
{
"epoch": 1.53,
"learning_rate": 1.0224380642958052e-05,
"loss": 0.2097,
"step": 514
},
{
"epoch": 1.53,
"learning_rate": 1.0192330547876871e-05,
"loss": 0.1952,
"step": 515
},
{
"epoch": 1.53,
"learning_rate": 1.0160278476301739e-05,
"loss": 0.2157,
"step": 516
},
{
"epoch": 1.53,
"learning_rate": 1.0128224757617272e-05,
"loss": 0.2013,
"step": 517
},
{
"epoch": 1.54,
"learning_rate": 1.0096169721225033e-05,
"loss": 0.2132,
"step": 518
},
{
"epoch": 1.54,
"learning_rate": 1.0064113696540112e-05,
"loss": 0.1928,
"step": 519
},
{
"epoch": 1.54,
"learning_rate": 1.0032057012987762e-05,
"loss": 0.1676,
"step": 520
},
{
"epoch": 1.55,
"learning_rate": 1e-05,
"loss": 0.1832,
"step": 521
},
{
"epoch": 1.55,
"learning_rate": 9.967942987012243e-06,
"loss": 0.1776,
"step": 522
},
{
"epoch": 1.55,
"learning_rate": 9.93588630345989e-06,
"loss": 0.19,
"step": 523
},
{
"epoch": 1.55,
"learning_rate": 9.903830278774967e-06,
"loss": 0.1784,
"step": 524
},
{
"epoch": 1.56,
"learning_rate": 9.871775242382727e-06,
"loss": 0.1754,
"step": 525
},
{
"epoch": 1.56,
"learning_rate": 9.839721523698265e-06,
"loss": 0.1861,
"step": 526
},
{
"epoch": 1.56,
"learning_rate": 9.80766945212313e-06,
"loss": 0.1722,
"step": 527
},
{
"epoch": 1.57,
"learning_rate": 9.775619357041952e-06,
"loss": 0.1846,
"step": 528
},
{
"epoch": 1.57,
"learning_rate": 9.743571567819046e-06,
"loss": 0.1849,
"step": 529
},
{
"epoch": 1.57,
"learning_rate": 9.711526413795032e-06,
"loss": 0.1817,
"step": 530
},
{
"epoch": 1.58,
"learning_rate": 9.67948422428345e-06,
"loss": 0.1887,
"step": 531
},
{
"epoch": 1.58,
"learning_rate": 9.647445328567368e-06,
"loss": 0.2008,
"step": 532
},
{
"epoch": 1.58,
"learning_rate": 9.615410055896016e-06,
"loss": 0.1844,
"step": 533
},
{
"epoch": 1.58,
"learning_rate": 9.583378735481385e-06,
"loss": 0.1905,
"step": 534
},
{
"epoch": 1.59,
"learning_rate": 9.551351696494854e-06,
"loss": 0.179,
"step": 535
},
{
"epoch": 1.59,
"learning_rate": 9.519329268063795e-06,
"loss": 0.179,
"step": 536
},
{
"epoch": 1.59,
"learning_rate": 9.48731177926821e-06,
"loss": 0.1989,
"step": 537
},
{
"epoch": 1.6,
"learning_rate": 9.455299559137333e-06,
"loss": 0.2119,
"step": 538
},
{
"epoch": 1.6,
"learning_rate": 9.423292936646258e-06,
"loss": 0.193,
"step": 539
},
{
"epoch": 1.6,
"learning_rate": 9.39129224071255e-06,
"loss": 0.1858,
"step": 540
},
{
"epoch": 1.61,
"learning_rate": 9.359297800192873e-06,
"loss": 0.2169,
"step": 541
},
{
"epoch": 1.61,
"learning_rate": 9.327309943879604e-06,
"loss": 0.2047,
"step": 542
},
{
"epoch": 1.61,
"learning_rate": 9.29532900049746e-06,
"loss": 0.1811,
"step": 543
},
{
"epoch": 1.61,
"learning_rate": 9.263355298700123e-06,
"loss": 0.2101,
"step": 544
},
{
"epoch": 1.62,
"learning_rate": 9.231389167066836e-06,
"loss": 0.1871,
"step": 545
},
{
"epoch": 1.62,
"learning_rate": 9.199430934099068e-06,
"loss": 0.1922,
"step": 546
},
{
"epoch": 1.62,
"learning_rate": 9.167480928217108e-06,
"loss": 0.1954,
"step": 547
},
{
"epoch": 1.63,
"learning_rate": 9.1355394777567e-06,
"loss": 0.2035,
"step": 548
},
{
"epoch": 1.63,
"learning_rate": 9.103606910965666e-06,
"loss": 0.2352,
"step": 549
},
{
"epoch": 1.63,
"learning_rate": 9.07168355600054e-06,
"loss": 0.2066,
"step": 550
},
{
"epoch": 1.64,
"learning_rate": 9.039769740923183e-06,
"loss": 0.1802,
"step": 551
},
{
"epoch": 1.64,
"learning_rate": 9.007865793697425e-06,
"loss": 0.19,
"step": 552
},
{
"epoch": 1.64,
"learning_rate": 8.975972042185688e-06,
"loss": 0.2019,
"step": 553
},
{
"epoch": 1.64,
"learning_rate": 8.94408881414561e-06,
"loss": 0.2428,
"step": 554
},
{
"epoch": 1.65,
"learning_rate": 8.912216437226692e-06,
"loss": 0.1807,
"step": 555
},
{
"epoch": 1.65,
"learning_rate": 8.880355238966923e-06,
"loss": 0.1645,
"step": 556
},
{
"epoch": 1.65,
"learning_rate": 8.848505546789407e-06,
"loss": 0.1831,
"step": 557
},
{
"epoch": 1.66,
"learning_rate": 8.816667687999013e-06,
"loss": 0.1971,
"step": 558
},
{
"epoch": 1.66,
"learning_rate": 8.784841989778997e-06,
"loss": 0.1857,
"step": 559
},
{
"epoch": 1.66,
"learning_rate": 8.753028779187654e-06,
"loss": 0.1736,
"step": 560
},
{
"epoch": 1.66,
"learning_rate": 8.721228383154939e-06,
"loss": 0.172,
"step": 561
},
{
"epoch": 1.67,
"learning_rate": 8.689441128479134e-06,
"loss": 0.2053,
"step": 562
},
{
"epoch": 1.67,
"learning_rate": 8.657667341823449e-06,
"loss": 0.1865,
"step": 563
},
{
"epoch": 1.67,
"learning_rate": 8.62590734971271e-06,
"loss": 0.1972,
"step": 564
},
{
"epoch": 1.68,
"learning_rate": 8.594161478529974e-06,
"loss": 0.1973,
"step": 565
},
{
"epoch": 1.68,
"learning_rate": 8.562430054513184e-06,
"loss": 0.1822,
"step": 566
},
{
"epoch": 1.68,
"learning_rate": 8.530713403751822e-06,
"loss": 0.1861,
"step": 567
},
{
"epoch": 1.69,
"learning_rate": 8.499011852183542e-06,
"loss": 0.1758,
"step": 568
},
{
"epoch": 1.69,
"learning_rate": 8.46732572559084e-06,
"loss": 0.1724,
"step": 569
},
{
"epoch": 1.69,
"learning_rate": 8.43565534959769e-06,
"loss": 0.1817,
"step": 570
},
{
"epoch": 1.69,
"learning_rate": 8.404001049666211e-06,
"loss": 0.1975,
"step": 571
},
{
"epoch": 1.7,
"learning_rate": 8.372363151093302e-06,
"loss": 0.1843,
"step": 572
},
{
"epoch": 1.7,
"learning_rate": 8.340741979007325e-06,
"loss": 0.2175,
"step": 573
},
{
"epoch": 1.7,
"learning_rate": 8.309137858364748e-06,
"loss": 0.1724,
"step": 574
},
{
"epoch": 1.71,
"learning_rate": 8.277551113946812e-06,
"loss": 0.2118,
"step": 575
},
{
"epoch": 1.71,
"learning_rate": 8.245982070356186e-06,
"loss": 0.1878,
"step": 576
},
{
"epoch": 1.71,
"learning_rate": 8.214431052013636e-06,
"loss": 0.189,
"step": 577
},
{
"epoch": 1.72,
"learning_rate": 8.182898383154692e-06,
"loss": 0.2257,
"step": 578
},
{
"epoch": 1.72,
"learning_rate": 8.151384387826313e-06,
"loss": 0.1898,
"step": 579
},
{
"epoch": 1.72,
"learning_rate": 8.119889389883567e-06,
"loss": 0.1711,
"step": 580
},
{
"epoch": 1.72,
"learning_rate": 8.08841371298628e-06,
"loss": 0.2002,
"step": 581
},
{
"epoch": 1.73,
"learning_rate": 8.056957680595733e-06,
"loss": 0.1789,
"step": 582
},
{
"epoch": 1.73,
"learning_rate": 8.02552161597133e-06,
"loss": 0.2765,
"step": 583
},
{
"epoch": 1.73,
"learning_rate": 7.994105842167274e-06,
"loss": 0.2065,
"step": 584
},
{
"epoch": 1.74,
"learning_rate": 7.962710682029245e-06,
"loss": 0.1659,
"step": 585
},
{
"epoch": 1.74,
"learning_rate": 7.931336458191092e-06,
"loss": 0.1884,
"step": 586
},
{
"epoch": 1.74,
"learning_rate": 7.899983493071506e-06,
"loss": 0.1882,
"step": 587
},
{
"epoch": 1.74,
"learning_rate": 7.868652108870715e-06,
"loss": 0.1975,
"step": 588
},
{
"epoch": 1.75,
"learning_rate": 7.837342627567167e-06,
"loss": 0.1806,
"step": 589
},
{
"epoch": 1.75,
"learning_rate": 7.806055370914223e-06,
"loss": 0.1659,
"step": 590
},
{
"epoch": 1.75,
"learning_rate": 7.774790660436857e-06,
"loss": 0.1884,
"step": 591
},
{
"epoch": 1.76,
"learning_rate": 7.74354881742834e-06,
"loss": 0.1901,
"step": 592
},
{
"epoch": 1.76,
"learning_rate": 7.712330162946948e-06,
"loss": 0.1852,
"step": 593
},
{
"epoch": 1.76,
"learning_rate": 7.681135017812656e-06,
"loss": 0.1781,
"step": 594
},
{
"epoch": 1.77,
"learning_rate": 7.649963702603848e-06,
"loss": 0.1721,
"step": 595
},
{
"epoch": 1.77,
"learning_rate": 7.618816537654018e-06,
"loss": 0.215,
"step": 596
},
{
"epoch": 1.77,
"learning_rate": 7.587693843048475e-06,
"loss": 0.1721,
"step": 597
},
{
"epoch": 1.77,
"learning_rate": 7.556595938621058e-06,
"loss": 0.2148,
"step": 598
},
{
"epoch": 1.78,
"learning_rate": 7.525523143950859e-06,
"loss": 0.2092,
"step": 599
},
{
"epoch": 1.78,
"learning_rate": 7.494475778358907e-06,
"loss": 0.195,
"step": 600
},
{
"epoch": 1.78,
"learning_rate": 7.463454160904928e-06,
"loss": 0.1706,
"step": 601
},
{
"epoch": 1.79,
"learning_rate": 7.432458610384037e-06,
"loss": 0.1863,
"step": 602
},
{
"epoch": 1.79,
"learning_rate": 7.401489445323473e-06,
"loss": 0.1755,
"step": 603
},
{
"epoch": 1.79,
"learning_rate": 7.370546983979322e-06,
"loss": 0.1802,
"step": 604
},
{
"epoch": 1.8,
"learning_rate": 7.33963154433325e-06,
"loss": 0.1956,
"step": 605
},
{
"epoch": 1.8,
"learning_rate": 7.308743444089232e-06,
"loss": 0.1734,
"step": 606
},
{
"epoch": 1.8,
"learning_rate": 7.27788300067029e-06,
"loss": 0.203,
"step": 607
},
{
"epoch": 1.8,
"learning_rate": 7.2470505312152275e-06,
"loss": 0.1673,
"step": 608
},
{
"epoch": 1.81,
"learning_rate": 7.21624635257537e-06,
"loss": 0.1818,
"step": 609
},
{
"epoch": 1.81,
"learning_rate": 7.185470781311317e-06,
"loss": 0.1825,
"step": 610
},
{
"epoch": 1.81,
"learning_rate": 7.154724133689677e-06,
"loss": 0.1771,
"step": 611
},
{
"epoch": 1.82,
"learning_rate": 7.124006725679828e-06,
"loss": 0.1771,
"step": 612
},
{
"epoch": 1.82,
"learning_rate": 7.093318872950665e-06,
"loss": 0.1799,
"step": 613
},
{
"epoch": 1.82,
"learning_rate": 7.062660890867354e-06,
"loss": 0.1809,
"step": 614
},
{
"epoch": 1.82,
"learning_rate": 7.032033094488094e-06,
"loss": 0.2217,
"step": 615
},
{
"epoch": 1.83,
"learning_rate": 7.001435798560884e-06,
"loss": 0.2301,
"step": 616
},
{
"epoch": 1.83,
"learning_rate": 6.97086931752028e-06,
"loss": 0.1917,
"step": 617
},
{
"epoch": 1.83,
"learning_rate": 6.9403339654841654e-06,
"loss": 0.1945,
"step": 618
},
{
"epoch": 1.84,
"learning_rate": 6.909830056250527e-06,
"loss": 0.165,
"step": 619
},
{
"epoch": 1.84,
"learning_rate": 6.8793579032942305e-06,
"loss": 0.1981,
"step": 620
},
{
"epoch": 1.84,
"learning_rate": 6.848917819763794e-06,
"loss": 0.2108,
"step": 621
},
{
"epoch": 1.85,
"learning_rate": 6.818510118478172e-06,
"loss": 0.1768,
"step": 622
},
{
"epoch": 1.85,
"learning_rate": 6.788135111923545e-06,
"loss": 0.1805,
"step": 623
},
{
"epoch": 1.85,
"learning_rate": 6.7577931122501e-06,
"loss": 0.1729,
"step": 624
},
{
"epoch": 1.85,
"learning_rate": 6.727484431268832e-06,
"loss": 0.1929,
"step": 625
},
{
"epoch": 1.86,
"learning_rate": 6.697209380448333e-06,
"loss": 0.2105,
"step": 626
},
{
"epoch": 1.86,
"learning_rate": 6.666968270911585e-06,
"loss": 0.1783,
"step": 627
},
{
"epoch": 1.86,
"learning_rate": 6.636761413432784e-06,
"loss": 0.2108,
"step": 628
},
{
"epoch": 1.87,
"learning_rate": 6.606589118434126e-06,
"loss": 0.2341,
"step": 629
},
{
"epoch": 1.87,
"learning_rate": 6.576451695982624e-06,
"loss": 0.1821,
"step": 630
},
{
"epoch": 1.87,
"learning_rate": 6.546349455786926e-06,
"loss": 0.1985,
"step": 631
},
{
"epoch": 1.88,
"learning_rate": 6.5162827071941194e-06,
"loss": 0.1876,
"step": 632
},
{
"epoch": 1.88,
"learning_rate": 6.486251759186573e-06,
"loss": 0.1913,
"step": 633
},
{
"epoch": 1.88,
"learning_rate": 6.456256920378736e-06,
"loss": 0.2022,
"step": 634
},
{
"epoch": 1.88,
"learning_rate": 6.426298499013994e-06,
"loss": 0.1908,
"step": 635
},
{
"epoch": 1.89,
"learning_rate": 6.396376802961468e-06,
"loss": 0.1636,
"step": 636
},
{
"epoch": 1.89,
"learning_rate": 6.366492139712886e-06,
"loss": 0.2088,
"step": 637
},
{
"epoch": 1.89,
"learning_rate": 6.3366448163794e-06,
"loss": 0.1953,
"step": 638
},
{
"epoch": 1.9,
"learning_rate": 6.306835139688439e-06,
"loss": 0.1963,
"step": 639
},
{
"epoch": 1.9,
"learning_rate": 6.277063415980549e-06,
"loss": 0.2017,
"step": 640
},
{
"epoch": 1.9,
"learning_rate": 6.24732995120626e-06,
"loss": 0.1823,
"step": 641
},
{
"epoch": 1.91,
"learning_rate": 6.217635050922923e-06,
"loss": 0.1817,
"step": 642
},
{
"epoch": 1.91,
"learning_rate": 6.187979020291584e-06,
"loss": 0.1778,
"step": 643
},
{
"epoch": 1.91,
"learning_rate": 6.158362164073844e-06,
"loss": 0.1754,
"step": 644
},
{
"epoch": 1.91,
"learning_rate": 6.1287847866287205e-06,
"loss": 0.1686,
"step": 645
},
{
"epoch": 1.92,
"learning_rate": 6.099247191909532e-06,
"loss": 0.1738,
"step": 646
},
{
"epoch": 1.92,
"learning_rate": 6.069749683460765e-06,
"loss": 0.1639,
"step": 647
},
{
"epoch": 1.92,
"learning_rate": 6.040292564414958e-06,
"loss": 0.1872,
"step": 648
},
{
"epoch": 1.93,
"learning_rate": 6.010876137489584e-06,
"loss": 0.1957,
"step": 649
},
{
"epoch": 1.93,
"learning_rate": 5.981500704983947e-06,
"loss": 0.1731,
"step": 650
},
{
"epoch": 1.93,
"learning_rate": 5.952166568776062e-06,
"loss": 0.1766,
"step": 651
},
{
"epoch": 1.93,
"learning_rate": 5.922874030319567e-06,
"loss": 0.2051,
"step": 652
},
{
"epoch": 1.94,
"learning_rate": 5.893623390640621e-06,
"loss": 0.1655,
"step": 653
},
{
"epoch": 1.94,
"learning_rate": 5.864414950334796e-06,
"loss": 0.1798,
"step": 654
},
{
"epoch": 1.94,
"learning_rate": 5.835249009564013e-06,
"loss": 0.1825,
"step": 655
},
{
"epoch": 1.95,
"learning_rate": 5.806125868053433e-06,
"loss": 0.207,
"step": 656
},
{
"epoch": 1.95,
"learning_rate": 5.7770458250884044e-06,
"loss": 0.1848,
"step": 657
},
{
"epoch": 1.95,
"learning_rate": 5.748009179511353e-06,
"loss": 0.183,
"step": 658
},
{
"epoch": 1.96,
"learning_rate": 5.719016229718748e-06,
"loss": 0.1813,
"step": 659
},
{
"epoch": 1.96,
"learning_rate": 5.690067273657996e-06,
"loss": 0.1971,
"step": 660
},
{
"epoch": 1.96,
"learning_rate": 5.66116260882442e-06,
"loss": 0.1838,
"step": 661
},
{
"epoch": 1.96,
"learning_rate": 5.6323025322581694e-06,
"loss": 0.1871,
"step": 662
},
{
"epoch": 1.97,
"learning_rate": 5.60348734054118e-06,
"loss": 0.1785,
"step": 663
},
{
"epoch": 1.97,
"learning_rate": 5.574717329794137e-06,
"loss": 0.1728,
"step": 664
},
{
"epoch": 1.97,
"learning_rate": 5.545992795673408e-06,
"loss": 0.172,
"step": 665
},
{
"epoch": 1.98,
"learning_rate": 5.517314033368031e-06,
"loss": 0.1982,
"step": 666
},
{
"epoch": 1.98,
"learning_rate": 5.488681337596653e-06,
"loss": 0.1817,
"step": 667
},
{
"epoch": 1.98,
"learning_rate": 5.460095002604533e-06,
"loss": 0.1741,
"step": 668
},
{
"epoch": 1.99,
"learning_rate": 5.431555322160483e-06,
"loss": 0.1814,
"step": 669
},
{
"epoch": 1.99,
"learning_rate": 5.403062589553887e-06,
"loss": 0.2196,
"step": 670
},
{
"epoch": 1.99,
"learning_rate": 5.37461709759165e-06,
"loss": 0.1938,
"step": 671
},
{
"epoch": 1.99,
"learning_rate": 5.346219138595215e-06,
"loss": 0.1946,
"step": 672
},
{
"epoch": 2.0,
"learning_rate": 5.317869004397545e-06,
"loss": 0.1869,
"step": 673
},
{
"epoch": 2.0,
"learning_rate": 5.2895669863401375e-06,
"loss": 0.1741,
"step": 674
},
{
"epoch": 2.0,
"learning_rate": 5.2613133752700145e-06,
"loss": 0.1707,
"step": 675
},
{
"epoch": 2.01,
"learning_rate": 5.233108461536749e-06,
"loss": 0.178,
"step": 676
},
{
"epoch": 2.01,
"learning_rate": 5.2049525349894625e-06,
"loss": 0.1694,
"step": 677
},
{
"epoch": 2.01,
"learning_rate": 5.176845884973864e-06,
"loss": 0.1639,
"step": 678
},
{
"epoch": 2.01,
"learning_rate": 5.148788800329279e-06,
"loss": 0.1708,
"step": 679
},
{
"epoch": 2.02,
"learning_rate": 5.1207815693856555e-06,
"loss": 0.162,
"step": 680
},
{
"epoch": 2.02,
"learning_rate": 5.092824479960625e-06,
"loss": 0.1806,
"step": 681
},
{
"epoch": 2.02,
"learning_rate": 5.064917819356532e-06,
"loss": 0.1712,
"step": 682
},
{
"epoch": 2.03,
"learning_rate": 5.037061874357503e-06,
"loss": 0.162,
"step": 683
},
{
"epoch": 2.03,
"learning_rate": 5.00925693122646e-06,
"loss": 0.1849,
"step": 684
},
{
"epoch": 2.03,
"learning_rate": 4.981503275702227e-06,
"loss": 0.1677,
"step": 685
},
{
"epoch": 2.04,
"learning_rate": 4.9538011929965436e-06,
"loss": 0.1529,
"step": 686
},
{
"epoch": 2.04,
"learning_rate": 4.92615096779118e-06,
"loss": 0.1878,
"step": 687
},
{
"epoch": 2.04,
"learning_rate": 4.898552884234973e-06,
"loss": 0.167,
"step": 688
},
{
"epoch": 2.04,
"learning_rate": 4.87100722594094e-06,
"loss": 0.1626,
"step": 689
},
{
"epoch": 2.05,
"learning_rate": 4.843514275983335e-06,
"loss": 0.1579,
"step": 690
},
{
"epoch": 2.05,
"learning_rate": 4.81607431689475e-06,
"loss": 0.1781,
"step": 691
},
{
"epoch": 2.05,
"learning_rate": 4.788687630663232e-06,
"loss": 0.1875,
"step": 692
},
{
"epoch": 2.06,
"learning_rate": 4.761354498729345e-06,
"loss": 0.1669,
"step": 693
},
{
"epoch": 2.06,
"learning_rate": 4.734075201983319e-06,
"loss": 0.1816,
"step": 694
},
{
"epoch": 2.06,
"learning_rate": 4.706850020762126e-06,
"loss": 0.1829,
"step": 695
},
{
"epoch": 2.07,
"learning_rate": 4.679679234846636e-06,
"loss": 0.1798,
"step": 696
},
{
"epoch": 2.07,
"learning_rate": 4.652563123458703e-06,
"loss": 0.1723,
"step": 697
},
{
"epoch": 2.07,
"learning_rate": 4.62550196525834e-06,
"loss": 0.1561,
"step": 698
},
{
"epoch": 2.07,
"learning_rate": 4.598496038340801e-06,
"loss": 0.1808,
"step": 699
},
{
"epoch": 2.08,
"learning_rate": 4.57154562023378e-06,
"loss": 0.1633,
"step": 700
},
{
"epoch": 2.08,
"learning_rate": 4.544650987894514e-06,
"loss": 0.1438,
"step": 701
},
{
"epoch": 2.08,
"learning_rate": 4.517812417706967e-06,
"loss": 0.1627,
"step": 702
},
{
"epoch": 2.09,
"learning_rate": 4.491030185478976e-06,
"loss": 0.1657,
"step": 703
},
{
"epoch": 2.09,
"learning_rate": 4.464304566439407e-06,
"loss": 0.1751,
"step": 704
},
{
"epoch": 2.09,
"learning_rate": 4.437635835235353e-06,
"loss": 0.1505,
"step": 705
},
{
"epoch": 2.09,
"learning_rate": 4.411024265929283e-06,
"loss": 0.1646,
"step": 706
},
{
"epoch": 2.1,
"learning_rate": 4.3844701319962525e-06,
"loss": 0.1621,
"step": 707
},
{
"epoch": 2.1,
"learning_rate": 4.35797370632107e-06,
"loss": 0.1678,
"step": 708
},
{
"epoch": 2.1,
"learning_rate": 4.331535261195504e-06,
"loss": 0.17,
"step": 709
},
{
"epoch": 2.11,
"learning_rate": 4.305155068315481e-06,
"loss": 0.1618,
"step": 710
},
{
"epoch": 2.11,
"learning_rate": 4.278833398778306e-06,
"loss": 0.1786,
"step": 711
},
{
"epoch": 2.11,
"learning_rate": 4.252570523079852e-06,
"loss": 0.1487,
"step": 712
},
{
"epoch": 2.12,
"learning_rate": 4.2263667111118074e-06,
"loss": 0.1828,
"step": 713
},
{
"epoch": 2.12,
"learning_rate": 4.200222232158877e-06,
"loss": 0.1621,
"step": 714
},
{
"epoch": 2.12,
"learning_rate": 4.17413735489604e-06,
"loss": 0.1788,
"step": 715
},
{
"epoch": 2.12,
"learning_rate": 4.148112347385762e-06,
"loss": 0.1653,
"step": 716
},
{
"epoch": 2.13,
"learning_rate": 4.12214747707527e-06,
"loss": 0.1624,
"step": 717
},
{
"epoch": 2.13,
"learning_rate": 4.096243010793775e-06,
"loss": 0.1912,
"step": 718
},
{
"epoch": 2.13,
"learning_rate": 4.070399214749743e-06,
"loss": 0.151,
"step": 719
},
{
"epoch": 2.14,
"learning_rate": 4.044616354528173e-06,
"loss": 0.1731,
"step": 720
},
{
"epoch": 2.14,
"learning_rate": 4.01889469508784e-06,
"loss": 0.1716,
"step": 721
},
{
"epoch": 2.14,
"learning_rate": 3.993234500758597e-06,
"loss": 0.1793,
"step": 722
},
{
"epoch": 2.15,
"learning_rate": 3.967636035238636e-06,
"loss": 0.169,
"step": 723
},
{
"epoch": 2.15,
"learning_rate": 3.942099561591802e-06,
"loss": 0.1728,
"step": 724
},
{
"epoch": 2.15,
"learning_rate": 3.9166253422448685e-06,
"loss": 0.2001,
"step": 725
},
{
"epoch": 2.15,
"learning_rate": 3.891213638984858e-06,
"loss": 0.1604,
"step": 726
},
{
"epoch": 2.16,
"learning_rate": 3.865864712956336e-06,
"loss": 0.1734,
"step": 727
},
{
"epoch": 2.16,
"learning_rate": 3.840578824658735e-06,
"loss": 0.1616,
"step": 728
},
{
"epoch": 2.16,
"learning_rate": 3.815356233943685e-06,
"loss": 0.1579,
"step": 729
},
{
"epoch": 2.17,
"learning_rate": 3.790197200012328e-06,
"loss": 0.1743,
"step": 730
},
{
"epoch": 2.17,
"learning_rate": 3.7651019814126656e-06,
"loss": 0.1546,
"step": 731
},
{
"epoch": 2.17,
"learning_rate": 3.740070836036893e-06,
"loss": 0.1533,
"step": 732
},
{
"epoch": 2.18,
"learning_rate": 3.715104021118764e-06,
"loss": 0.1506,
"step": 733
},
{
"epoch": 2.18,
"learning_rate": 3.6902017932309244e-06,
"loss": 0.1708,
"step": 734
},
{
"epoch": 2.18,
"learning_rate": 3.665364408282305e-06,
"loss": 0.1604,
"step": 735
},
{
"epoch": 2.18,
"learning_rate": 3.6405921215154492e-06,
"loss": 0.1868,
"step": 736
},
{
"epoch": 2.19,
"learning_rate": 3.6158851875039458e-06,
"loss": 0.1637,
"step": 737
},
{
"epoch": 2.19,
"learning_rate": 3.591243860149759e-06,
"loss": 0.1872,
"step": 738
},
{
"epoch": 2.19,
"learning_rate": 3.5666683926806623e-06,
"loss": 0.1828,
"step": 739
},
{
"epoch": 2.2,
"learning_rate": 3.542159037647598e-06,
"loss": 0.1682,
"step": 740
},
{
"epoch": 2.2,
"learning_rate": 3.5177160469221184e-06,
"loss": 0.1583,
"step": 741
},
{
"epoch": 2.2,
"learning_rate": 3.493339671693765e-06,
"loss": 0.1684,
"step": 742
},
{
"epoch": 2.2,
"learning_rate": 3.4690301624675127e-06,
"loss": 0.1625,
"step": 743
},
{
"epoch": 2.21,
"learning_rate": 3.4447877690611863e-06,
"loss": 0.1503,
"step": 744
},
{
"epoch": 2.21,
"learning_rate": 3.4206127406028744e-06,
"loss": 0.1516,
"step": 745
},
{
"epoch": 2.21,
"learning_rate": 3.3965053255284085e-06,
"loss": 0.1559,
"step": 746
},
{
"epoch": 2.22,
"learning_rate": 3.372465771578771e-06,
"loss": 0.1577,
"step": 747
},
{
"epoch": 2.22,
"learning_rate": 3.3484943257975788e-06,
"loss": 0.17,
"step": 748
},
{
"epoch": 2.22,
"learning_rate": 3.3245912345285203e-06,
"loss": 0.1793,
"step": 749
},
{
"epoch": 2.23,
"learning_rate": 3.3007567434128495e-06,
"loss": 0.1591,
"step": 750
},
{
"epoch": 2.23,
"learning_rate": 3.2769910973868314e-06,
"loss": 0.1648,
"step": 751
},
{
"epoch": 2.23,
"learning_rate": 3.2532945406792573e-06,
"loss": 0.1953,
"step": 752
},
{
"epoch": 2.23,
"learning_rate": 3.2296673168089078e-06,
"loss": 0.1651,
"step": 753
},
{
"epoch": 2.24,
"learning_rate": 3.2061096685820603e-06,
"loss": 0.1688,
"step": 754
},
{
"epoch": 2.24,
"learning_rate": 3.1826218380900066e-06,
"loss": 0.1652,
"step": 755
},
{
"epoch": 2.24,
"learning_rate": 3.1592040667065393e-06,
"loss": 0.1684,
"step": 756
},
{
"epoch": 2.25,
"learning_rate": 3.135856595085498e-06,
"loss": 0.1515,
"step": 757
},
{
"epoch": 2.25,
"learning_rate": 3.1125796631582707e-06,
"loss": 0.1602,
"step": 758
},
{
"epoch": 2.25,
"learning_rate": 3.089373510131354e-06,
"loss": 0.1626,
"step": 759
},
{
"epoch": 2.26,
"learning_rate": 3.0662383744838697e-06,
"loss": 0.1554,
"step": 760
},
{
"epoch": 2.26,
"learning_rate": 3.0431744939651365e-06,
"loss": 0.1726,
"step": 761
},
{
"epoch": 2.26,
"learning_rate": 3.02018210559221e-06,
"loss": 0.1699,
"step": 762
},
{
"epoch": 2.26,
"learning_rate": 2.9972614456474537e-06,
"loss": 0.1631,
"step": 763
},
{
"epoch": 2.27,
"learning_rate": 2.9744127496761087e-06,
"loss": 0.1666,
"step": 764
},
{
"epoch": 2.27,
"learning_rate": 2.951636252483885e-06,
"loss": 0.1491,
"step": 765
},
{
"epoch": 2.27,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.1649,
"step": 766
},
{
"epoch": 2.28,
"learning_rate": 2.9063007899474214e-06,
"loss": 0.1795,
"step": 767
},
{
"epoch": 2.28,
"learning_rate": 2.8837422904952097e-06,
"loss": 0.1674,
"step": 768
},
{
"epoch": 2.28,
"learning_rate": 2.8612569216013675e-06,
"loss": 0.1603,
"step": 769
},
{
"epoch": 2.28,
"learning_rate": 2.838844914337857e-06,
"loss": 0.1588,
"step": 770
},
{
"epoch": 2.29,
"learning_rate": 2.8165064990227255e-06,
"loss": 0.2216,
"step": 771
},
{
"epoch": 2.29,
"learning_rate": 2.794241905217753e-06,
"loss": 0.1518,
"step": 772
},
{
"epoch": 2.29,
"learning_rate": 2.7720513617260857e-06,
"loss": 0.1456,
"step": 773
},
{
"epoch": 2.3,
"learning_rate": 2.7499350965898974e-06,
"loss": 0.1764,
"step": 774
},
{
"epoch": 2.3,
"learning_rate": 2.7278933370880267e-06,
"loss": 0.1678,
"step": 775
},
{
"epoch": 2.3,
"learning_rate": 2.7059263097336595e-06,
"loss": 0.1886,
"step": 776
},
{
"epoch": 2.31,
"learning_rate": 2.6840342402719867e-06,
"loss": 0.1893,
"step": 777
},
{
"epoch": 2.31,
"learning_rate": 2.662217353677896e-06,
"loss": 0.1528,
"step": 778
},
{
"epoch": 2.31,
"learning_rate": 2.640475874153651e-06,
"loss": 0.162,
"step": 779
},
{
"epoch": 2.31,
"learning_rate": 2.6188100251265947e-06,
"loss": 0.1645,
"step": 780
},
{
"epoch": 2.32,
"learning_rate": 2.597220029246846e-06,
"loss": 0.1745,
"step": 781
},
{
"epoch": 2.32,
"learning_rate": 2.5757061083850153e-06,
"loss": 0.1532,
"step": 782
},
{
"epoch": 2.32,
"learning_rate": 2.5542684836299316e-06,
"loss": 0.1588,
"step": 783
},
{
"epoch": 2.33,
"learning_rate": 2.532907375286351e-06,
"loss": 0.1625,
"step": 784
},
{
"epoch": 2.33,
"learning_rate": 2.5116230028727184e-06,
"loss": 0.1802,
"step": 785
},
{
"epoch": 2.33,
"learning_rate": 2.490415585118887e-06,
"loss": 0.1523,
"step": 786
},
{
"epoch": 2.34,
"learning_rate": 2.469285339963892e-06,
"loss": 0.1776,
"step": 787
},
{
"epoch": 2.34,
"learning_rate": 2.4482324845536887e-06,
"loss": 0.1612,
"step": 788
},
{
"epoch": 2.34,
"learning_rate": 2.427257235238949e-06,
"loss": 0.1741,
"step": 789
},
{
"epoch": 2.34,
"learning_rate": 2.406359807572802e-06,
"loss": 0.1605,
"step": 790
},
{
"epoch": 2.35,
"learning_rate": 2.3855404163086558e-06,
"loss": 0.1642,
"step": 791
},
{
"epoch": 2.35,
"learning_rate": 2.3647992753979698e-06,
"loss": 0.1563,
"step": 792
},
{
"epoch": 2.35,
"learning_rate": 2.3441365979880527e-06,
"loss": 0.1648,
"step": 793
},
{
"epoch": 2.36,
"learning_rate": 2.323552596419889e-06,
"loss": 0.1595,
"step": 794
},
{
"epoch": 2.36,
"learning_rate": 2.3030474822259396e-06,
"loss": 0.1647,
"step": 795
},
{
"epoch": 2.36,
"learning_rate": 2.282621466127982e-06,
"loss": 0.1673,
"step": 796
},
{
"epoch": 2.36,
"learning_rate": 2.2622747580349313e-06,
"loss": 0.157,
"step": 797
},
{
"epoch": 2.37,
"learning_rate": 2.2420075670407015e-06,
"loss": 0.1659,
"step": 798
},
{
"epoch": 2.37,
"learning_rate": 2.2218201014220266e-06,
"loss": 0.1847,
"step": 799
},
{
"epoch": 2.37,
"learning_rate": 2.2017125686363592e-06,
"loss": 0.1408,
"step": 800
},
{
"epoch": 2.38,
"learning_rate": 2.1816851753197023e-06,
"loss": 0.155,
"step": 801
},
{
"epoch": 2.38,
"learning_rate": 2.1617381272845174e-06,
"loss": 0.1689,
"step": 802
},
{
"epoch": 2.38,
"learning_rate": 2.1418716295175766e-06,
"loss": 0.1603,
"step": 803
},
{
"epoch": 2.39,
"learning_rate": 2.1220858861778903e-06,
"loss": 0.1565,
"step": 804
},
{
"epoch": 2.39,
"learning_rate": 2.102381100594577e-06,
"loss": 0.1502,
"step": 805
},
{
"epoch": 2.39,
"learning_rate": 2.082757475264804e-06,
"loss": 0.1535,
"step": 806
},
{
"epoch": 2.39,
"learning_rate": 2.063215211851678e-06,
"loss": 0.1587,
"step": 807
},
{
"epoch": 2.4,
"learning_rate": 2.043754511182191e-06,
"loss": 0.1648,
"step": 808
},
{
"epoch": 2.4,
"learning_rate": 2.0243755732451566e-06,
"loss": 0.1573,
"step": 809
},
{
"epoch": 2.4,
"learning_rate": 2.0050785971891406e-06,
"loss": 0.169,
"step": 810
},
{
"epoch": 2.41,
"learning_rate": 1.9858637813204352e-06,
"loss": 0.1777,
"step": 811
},
{
"epoch": 2.41,
"learning_rate": 1.9667313231009955e-06,
"loss": 0.1741,
"step": 812
},
{
"epoch": 2.41,
"learning_rate": 1.947681419146439e-06,
"loss": 0.168,
"step": 813
},
{
"epoch": 2.42,
"learning_rate": 1.9287142652239964e-06,
"loss": 0.1625,
"step": 814
},
{
"epoch": 2.42,
"learning_rate": 1.9098300562505266e-06,
"loss": 0.1613,
"step": 815
},
{
"epoch": 2.42,
"learning_rate": 1.8910289862904917e-06,
"loss": 0.1577,
"step": 816
},
{
"epoch": 2.42,
"learning_rate": 1.8723112485539741e-06,
"loss": 0.1852,
"step": 817
},
{
"epoch": 2.43,
"learning_rate": 1.853677035394692e-06,
"loss": 0.1604,
"step": 818
},
{
"epoch": 2.43,
"learning_rate": 1.835126538308013e-06,
"loss": 0.156,
"step": 819
},
{
"epoch": 2.43,
"learning_rate": 1.8166599479290014e-06,
"loss": 0.1622,
"step": 820
},
{
"epoch": 2.44,
"learning_rate": 1.7982774540304404e-06,
"loss": 0.1773,
"step": 821
},
{
"epoch": 2.44,
"learning_rate": 1.7799792455209019e-06,
"loss": 0.16,
"step": 822
},
{
"epoch": 2.44,
"learning_rate": 1.7617655104427833e-06,
"loss": 0.1909,
"step": 823
},
{
"epoch": 2.45,
"learning_rate": 1.7436364359703995e-06,
"loss": 0.171,
"step": 824
},
{
"epoch": 2.45,
"learning_rate": 1.7255922084080367e-06,
"loss": 0.1579,
"step": 825
},
{
"epoch": 2.45,
"learning_rate": 1.7076330131880525e-06,
"loss": 0.1572,
"step": 826
},
{
"epoch": 2.45,
"learning_rate": 1.6897590348689607e-06,
"loss": 0.1498,
"step": 827
},
{
"epoch": 2.46,
"learning_rate": 1.6719704571335503e-06,
"loss": 0.1957,
"step": 828
},
{
"epoch": 2.46,
"learning_rate": 1.6542674627869738e-06,
"loss": 0.1603,
"step": 829
},
{
"epoch": 2.46,
"learning_rate": 1.6366502337548952e-06,
"loss": 0.1513,
"step": 830
},
{
"epoch": 2.47,
"learning_rate": 1.6191189510815942e-06,
"loss": 0.1601,
"step": 831
},
{
"epoch": 2.47,
"learning_rate": 1.601673794928127e-06,
"loss": 0.1631,
"step": 832
},
{
"epoch": 2.47,
"learning_rate": 1.5843149445704687e-06,
"loss": 0.1667,
"step": 833
},
{
"epoch": 2.47,
"learning_rate": 1.5670425783976583e-06,
"loss": 0.1615,
"step": 834
},
{
"epoch": 2.48,
"learning_rate": 1.5498568739099907e-06,
"loss": 0.1712,
"step": 835
},
{
"epoch": 2.48,
"learning_rate": 1.5327580077171589e-06,
"loss": 0.1516,
"step": 836
},
{
"epoch": 2.48,
"learning_rate": 1.5157461555364772e-06,
"loss": 0.1606,
"step": 837
},
{
"epoch": 2.49,
"learning_rate": 1.4988214921910415e-06,
"loss": 0.1585,
"step": 838
},
{
"epoch": 2.49,
"learning_rate": 1.481984191607959e-06,
"loss": 0.1694,
"step": 839
},
{
"epoch": 2.49,
"learning_rate": 1.4652344268165407e-06,
"loss": 0.1648,
"step": 840
},
{
"epoch": 2.5,
"learning_rate": 1.4485723699465392e-06,
"loss": 0.1829,
"step": 841
},
{
"epoch": 2.5,
"learning_rate": 1.4319981922263636e-06,
"loss": 0.146,
"step": 842
},
{
"epoch": 2.5,
"learning_rate": 1.4155120639813392e-06,
"loss": 0.1492,
"step": 843
},
{
"epoch": 2.5,
"learning_rate": 1.3991141546319386e-06,
"loss": 0.1684,
"step": 844
},
{
"epoch": 2.51,
"learning_rate": 1.3828046326920496e-06,
"loss": 0.1687,
"step": 845
},
{
"epoch": 2.51,
"learning_rate": 1.3665836657672493e-06,
"loss": 0.162,
"step": 846
},
{
"epoch": 2.51,
"learning_rate": 1.350451420553065e-06,
"loss": 0.1518,
"step": 847
},
{
"epoch": 2.52,
"learning_rate": 1.33440806283328e-06,
"loss": 0.1942,
"step": 848
},
{
"epoch": 2.52,
"learning_rate": 1.3184537574782153e-06,
"loss": 0.1432,
"step": 849
},
{
"epoch": 2.52,
"learning_rate": 1.3025886684430467e-06,
"loss": 0.1676,
"step": 850
},
{
"epoch": 2.53,
"learning_rate": 1.286812958766106e-06,
"loss": 0.1539,
"step": 851
},
{
"epoch": 2.53,
"learning_rate": 1.271126790567223e-06,
"loss": 0.1735,
"step": 852
},
{
"epoch": 2.53,
"learning_rate": 1.2555303250460437e-06,
"loss": 0.1582,
"step": 853
},
{
"epoch": 2.53,
"learning_rate": 1.2400237224803836e-06,
"loss": 0.1704,
"step": 854
},
{
"epoch": 2.54,
"learning_rate": 1.224607142224572e-06,
"loss": 0.16,
"step": 855
},
{
"epoch": 2.54,
"learning_rate": 1.209280742707828e-06,
"loss": 0.1743,
"step": 856
},
{
"epoch": 2.54,
"learning_rate": 1.19404468143262e-06,
"loss": 0.1596,
"step": 857
},
{
"epoch": 2.55,
"learning_rate": 1.17889911497305e-06,
"loss": 0.1748,
"step": 858
},
{
"epoch": 2.55,
"learning_rate": 1.1638441989732474e-06,
"loss": 0.1557,
"step": 859
},
{
"epoch": 2.55,
"learning_rate": 1.1488800881457652e-06,
"loss": 0.1712,
"step": 860
},
{
"epoch": 2.55,
"learning_rate": 1.134006936269999e-06,
"loss": 0.1759,
"step": 861
},
{
"epoch": 2.56,
"learning_rate": 1.119224896190595e-06,
"loss": 0.172,
"step": 862
},
{
"epoch": 2.56,
"learning_rate": 1.1045341198158833e-06,
"loss": 0.1652,
"step": 863
},
{
"epoch": 2.56,
"learning_rate": 1.0899347581163222e-06,
"loss": 0.1401,
"step": 864
},
{
"epoch": 2.57,
"learning_rate": 1.0754269611229428e-06,
"loss": 0.1503,
"step": 865
},
{
"epoch": 2.57,
"learning_rate": 1.0610108779258043e-06,
"loss": 0.1461,
"step": 866
},
{
"epoch": 2.57,
"learning_rate": 1.0466866566724698e-06,
"loss": 0.1539,
"step": 867
},
{
"epoch": 2.58,
"learning_rate": 1.0324544445664698e-06,
"loss": 0.1625,
"step": 868
},
{
"epoch": 2.58,
"learning_rate": 1.0183143878658098e-06,
"loss": 0.1688,
"step": 869
},
{
"epoch": 2.58,
"learning_rate": 1.0042666318814465e-06,
"loss": 0.1635,
"step": 870
},
{
"epoch": 2.58,
"learning_rate": 9.903113209758098e-07,
"loss": 0.1373,
"step": 871
},
{
"epoch": 2.59,
"learning_rate": 9.764485985613092e-07,
"loss": 0.1584,
"step": 872
},
{
"epoch": 2.59,
"learning_rate": 9.626786070988658e-07,
"loss": 0.1459,
"step": 873
},
{
"epoch": 2.59,
"learning_rate": 9.490014880964504e-07,
"loss": 0.1816,
"step": 874
},
{
"epoch": 2.6,
"learning_rate": 9.354173821076184e-07,
"loss": 0.1623,
"step": 875
},
{
"epoch": 2.6,
"learning_rate": 9.2192642873008e-07,
"loss": 0.153,
"step": 876
},
{
"epoch": 2.6,
"learning_rate": 9.085287666042508e-07,
"loss": 0.1419,
"step": 877
},
{
"epoch": 2.61,
"learning_rate": 8.952245334118415e-07,
"loss": 0.2001,
"step": 878
},
{
"epoch": 2.61,
"learning_rate": 8.820138658744304e-07,
"loss": 0.1642,
"step": 879
},
{
"epoch": 2.61,
"learning_rate": 8.688968997520686e-07,
"loss": 0.1611,
"step": 880
},
{
"epoch": 2.61,
"learning_rate": 8.558737698418762e-07,
"loss": 0.1676,
"step": 881
},
{
"epoch": 2.62,
"learning_rate": 8.429446099766614e-07,
"loss": 0.153,
"step": 882
},
{
"epoch": 2.62,
"learning_rate": 8.301095530235492e-07,
"loss": 0.1858,
"step": 883
},
{
"epoch": 2.62,
"learning_rate": 8.173687308826062e-07,
"loss": 0.1763,
"step": 884
},
{
"epoch": 2.63,
"learning_rate": 8.047222744854943e-07,
"loss": 0.1667,
"step": 885
},
{
"epoch": 2.63,
"learning_rate": 7.921703137941172e-07,
"loss": 0.1807,
"step": 886
},
{
"epoch": 2.63,
"learning_rate": 7.797129777992951e-07,
"loss": 0.1954,
"step": 887
},
{
"epoch": 2.64,
"learning_rate": 7.67350394519426e-07,
"loss": 0.1448,
"step": 888
},
{
"epoch": 2.64,
"learning_rate": 7.55082690999186e-07,
"loss": 0.165,
"step": 889
},
{
"epoch": 2.64,
"learning_rate": 7.429099933082029e-07,
"loss": 0.1646,
"step": 890
},
{
"epoch": 2.64,
"learning_rate": 7.308324265397837e-07,
"loss": 0.1554,
"step": 891
},
{
"epoch": 2.65,
"learning_rate": 7.188501148096117e-07,
"loss": 0.1498,
"step": 892
},
{
"epoch": 2.65,
"learning_rate": 7.069631812544808e-07,
"loss": 0.1686,
"step": 893
},
{
"epoch": 2.65,
"learning_rate": 6.951717480310216e-07,
"loss": 0.1418,
"step": 894
},
{
"epoch": 2.66,
"learning_rate": 6.834759363144595e-07,
"loss": 0.1438,
"step": 895
},
{
"epoch": 2.66,
"learning_rate": 6.718758662973524e-07,
"loss": 0.1547,
"step": 896
},
{
"epoch": 2.66,
"learning_rate": 6.60371657188369e-07,
"loss": 0.1536,
"step": 897
},
{
"epoch": 2.66,
"learning_rate": 6.489634272110567e-07,
"loss": 0.1687,
"step": 898
},
{
"epoch": 2.67,
"learning_rate": 6.37651293602628e-07,
"loss": 0.1972,
"step": 899
},
{
"epoch": 2.67,
"learning_rate": 6.264353726127615e-07,
"loss": 0.1614,
"step": 900
},
{
"epoch": 2.67,
"learning_rate": 6.153157795023956e-07,
"loss": 0.1796,
"step": 901
},
{
"epoch": 2.68,
"learning_rate": 6.042926285425577e-07,
"loss": 0.159,
"step": 902
},
{
"epoch": 2.68,
"learning_rate": 5.933660330131752e-07,
"loss": 0.1816,
"step": 903
},
{
"epoch": 2.68,
"learning_rate": 5.825361052019252e-07,
"loss": 0.1525,
"step": 904
},
{
"epoch": 2.69,
"learning_rate": 5.718029564030703e-07,
"loss": 0.1598,
"step": 905
},
{
"epoch": 2.69,
"learning_rate": 5.611666969163243e-07,
"loss": 0.1714,
"step": 906
},
{
"epoch": 2.69,
"learning_rate": 5.506274360457087e-07,
"loss": 0.1536,
"step": 907
},
{
"epoch": 2.69,
"learning_rate": 5.401852820984321e-07,
"loss": 0.1609,
"step": 908
},
{
"epoch": 2.7,
"learning_rate": 5.298403423837883e-07,
"loss": 0.175,
"step": 909
},
{
"epoch": 2.7,
"learning_rate": 5.195927232120335e-07,
"loss": 0.1498,
"step": 910
},
{
"epoch": 2.7,
"learning_rate": 5.094425298933136e-07,
"loss": 0.165,
"step": 911
},
{
"epoch": 2.71,
"learning_rate": 4.993898667365671e-07,
"loss": 0.158,
"step": 912
},
{
"epoch": 2.71,
"learning_rate": 4.894348370484648e-07,
"loss": 0.1589,
"step": 913
},
{
"epoch": 2.71,
"learning_rate": 4.795775431323358e-07,
"loss": 0.1598,
"step": 914
},
{
"epoch": 2.72,
"learning_rate": 4.6981808628712823e-07,
"loss": 0.156,
"step": 915
},
{
"epoch": 2.72,
"learning_rate": 4.6015656680636234e-07,
"loss": 0.1534,
"step": 916
},
{
"epoch": 2.72,
"learning_rate": 4.505930839770967e-07,
"loss": 0.183,
"step": 917
},
{
"epoch": 2.72,
"learning_rate": 4.411277360789146e-07,
"loss": 0.1902,
"step": 918
},
{
"epoch": 2.73,
"learning_rate": 4.3176062038291275e-07,
"loss": 0.1482,
"step": 919
},
{
"epoch": 2.73,
"learning_rate": 4.224918331506955e-07,
"loss": 0.1482,
"step": 920
},
{
"epoch": 2.73,
"learning_rate": 4.133214696333943e-07,
"loss": 0.1746,
"step": 921
},
{
"epoch": 2.74,
"learning_rate": 4.0424962407068167e-07,
"loss": 0.1612,
"step": 922
},
{
"epoch": 2.74,
"learning_rate": 3.9527638968980707e-07,
"loss": 0.1794,
"step": 923
},
{
"epoch": 2.74,
"learning_rate": 3.864018587046392e-07,
"loss": 0.1452,
"step": 924
},
{
"epoch": 2.74,
"learning_rate": 3.7762612231471264e-07,
"loss": 0.1641,
"step": 925
},
{
"epoch": 2.75,
"learning_rate": 3.6894927070429744e-07,
"loss": 0.1555,
"step": 926
},
{
"epoch": 2.75,
"learning_rate": 3.603713930414676e-07,
"loss": 0.1513,
"step": 927
},
{
"epoch": 2.75,
"learning_rate": 3.5189257747718976e-07,
"loss": 0.1673,
"step": 928
},
{
"epoch": 2.76,
"learning_rate": 3.4351291114441134e-07,
"loss": 0.1645,
"step": 929
},
{
"epoch": 2.76,
"learning_rate": 3.3523248015717047e-07,
"loss": 0.2115,
"step": 930
},
{
"epoch": 2.76,
"learning_rate": 3.2705136960970554e-07,
"loss": 0.1612,
"step": 931
},
{
"epoch": 2.77,
"learning_rate": 3.189696635755868e-07,
"loss": 0.1732,
"step": 932
},
{
"epoch": 2.77,
"learning_rate": 3.1098744510684733e-07,
"loss": 0.1626,
"step": 933
},
{
"epoch": 2.77,
"learning_rate": 3.0310479623313125e-07,
"loss": 0.1693,
"step": 934
},
{
"epoch": 2.77,
"learning_rate": 2.9532179796085356e-07,
"loss": 0.1642,
"step": 935
},
{
"epoch": 2.78,
"learning_rate": 2.8763853027236277e-07,
"loss": 0.1638,
"step": 936
},
{
"epoch": 2.78,
"learning_rate": 2.8005507212512164e-07,
"loss": 0.1573,
"step": 937
},
{
"epoch": 2.78,
"learning_rate": 2.725715014508956e-07,
"loss": 0.1555,
"step": 938
},
{
"epoch": 2.79,
"learning_rate": 2.6518789515495356e-07,
"loss": 0.1642,
"step": 939
},
{
"epoch": 2.79,
"learning_rate": 2.5790432911527164e-07,
"loss": 0.1564,
"step": 940
},
{
"epoch": 2.79,
"learning_rate": 2.507208781817638e-07,
"loss": 0.1513,
"step": 941
},
{
"epoch": 2.8,
"learning_rate": 2.436376161755005e-07,
"loss": 0.1506,
"step": 942
},
{
"epoch": 2.8,
"learning_rate": 2.3665461588795902e-07,
"loss": 0.1638,
"step": 943
},
{
"epoch": 2.8,
"learning_rate": 2.2977194908027077e-07,
"loss": 0.1655,
"step": 944
},
{
"epoch": 2.8,
"learning_rate": 2.2298968648248652e-07,
"loss": 0.1597,
"step": 945
},
{
"epoch": 2.81,
"learning_rate": 2.1630789779284677e-07,
"loss": 0.1487,
"step": 946
},
{
"epoch": 2.81,
"learning_rate": 2.097266516770713e-07,
"loss": 0.16,
"step": 947
},
{
"epoch": 2.81,
"learning_rate": 2.0324601576764525e-07,
"loss": 0.1754,
"step": 948
},
{
"epoch": 2.82,
"learning_rate": 1.9686605666312754e-07,
"loss": 0.1728,
"step": 949
},
{
"epoch": 2.82,
"learning_rate": 1.905868399274735e-07,
"loss": 0.1572,
"step": 950
},
{
"epoch": 2.82,
"learning_rate": 1.844084300893456e-07,
"loss": 0.1338,
"step": 951
},
{
"epoch": 2.82,
"learning_rate": 1.7833089064146825e-07,
"loss": 0.1588,
"step": 952
},
{
"epoch": 2.83,
"learning_rate": 1.7235428403996167e-07,
"loss": 0.1646,
"step": 953
},
{
"epoch": 2.83,
"learning_rate": 1.6647867170370369e-07,
"loss": 0.1549,
"step": 954
},
{
"epoch": 2.83,
"learning_rate": 1.6070411401370335e-07,
"loss": 0.1641,
"step": 955
},
{
"epoch": 2.84,
"learning_rate": 1.55030670312476e-07,
"loss": 0.1773,
"step": 956
},
{
"epoch": 2.84,
"learning_rate": 1.494583989034326e-07,
"loss": 0.1881,
"step": 957
},
{
"epoch": 2.84,
"learning_rate": 1.4398735705028477e-07,
"loss": 0.1721,
"step": 958
},
{
"epoch": 2.85,
"learning_rate": 1.3861760097645062e-07,
"loss": 0.1723,
"step": 959
},
{
"epoch": 2.85,
"learning_rate": 1.333491858644831e-07,
"loss": 0.1674,
"step": 960
},
{
"epoch": 2.85,
"learning_rate": 1.2818216585549824e-07,
"loss": 0.1634,
"step": 961
},
{
"epoch": 2.85,
"learning_rate": 1.231165940486234e-07,
"loss": 0.1683,
"step": 962
},
{
"epoch": 2.86,
"learning_rate": 1.1815252250044318e-07,
"loss": 0.1602,
"step": 963
},
{
"epoch": 2.86,
"learning_rate": 1.1329000222447539e-07,
"loss": 0.1798,
"step": 964
},
{
"epoch": 2.86,
"learning_rate": 1.0852908319063827e-07,
"loss": 0.171,
"step": 965
},
{
"epoch": 2.87,
"learning_rate": 1.0386981432474075e-07,
"loss": 0.1564,
"step": 966
},
{
"epoch": 2.87,
"learning_rate": 9.931224350798185e-08,
"loss": 0.1669,
"step": 967
},
{
"epoch": 2.87,
"learning_rate": 9.485641757644992e-08,
"loss": 0.1707,
"step": 968
},
{
"epoch": 2.88,
"learning_rate": 9.0502382320653e-08,
"loss": 0.1538,
"step": 969
},
{
"epoch": 2.88,
"learning_rate": 8.625018248503814e-08,
"loss": 0.1591,
"step": 970
},
{
"epoch": 2.88,
"learning_rate": 8.209986176753947e-08,
"loss": 0.1669,
"step": 971
},
{
"epoch": 2.88,
"learning_rate": 7.805146281912202e-08,
"loss": 0.1657,
"step": 972
},
{
"epoch": 2.89,
"learning_rate": 7.410502724334967e-08,
"loss": 0.1542,
"step": 973
},
{
"epoch": 2.89,
"learning_rate": 7.026059559595566e-08,
"loss": 0.1511,
"step": 974
},
{
"epoch": 2.89,
"learning_rate": 6.65182073844195e-08,
"loss": 0.184,
"step": 975
},
{
"epoch": 2.9,
"learning_rate": 6.287790106757396e-08,
"loss": 0.1708,
"step": 976
},
{
"epoch": 2.9,
"learning_rate": 5.933971405519656e-08,
"loss": 0.1491,
"step": 977
},
{
"epoch": 2.9,
"learning_rate": 5.590368270763535e-08,
"loss": 0.1538,
"step": 978
},
{
"epoch": 2.91,
"learning_rate": 5.256984233542595e-08,
"loss": 0.1564,
"step": 979
},
{
"epoch": 2.91,
"learning_rate": 4.933822719893955e-08,
"loss": 0.1653,
"step": 980
},
{
"epoch": 2.91,
"learning_rate": 4.6208870508017703e-08,
"loss": 0.1564,
"step": 981
},
{
"epoch": 2.91,
"learning_rate": 4.318180442164588e-08,
"loss": 0.1573,
"step": 982
},
{
"epoch": 2.92,
"learning_rate": 4.025706004760932e-08,
"loss": 0.1791,
"step": 983
},
{
"epoch": 2.92,
"learning_rate": 3.743466744218327e-08,
"loss": 0.1527,
"step": 984
},
{
"epoch": 2.92,
"learning_rate": 3.471465560981768e-08,
"loss": 0.1743,
"step": 985
},
{
"epoch": 2.93,
"learning_rate": 3.2097052502843005e-08,
"loss": 0.1682,
"step": 986
},
{
"epoch": 2.93,
"learning_rate": 2.9581885021181534e-08,
"loss": 0.177,
"step": 987
},
{
"epoch": 2.93,
"learning_rate": 2.7169179012068725e-08,
"loss": 0.1762,
"step": 988
},
{
"epoch": 2.93,
"learning_rate": 2.4858959269794537e-08,
"loss": 0.1661,
"step": 989
},
{
"epoch": 2.94,
"learning_rate": 2.265124953543918e-08,
"loss": 0.1654,
"step": 990
},
{
"epoch": 2.94,
"learning_rate": 2.054607249663665e-08,
"loss": 0.1633,
"step": 991
},
{
"epoch": 2.94,
"learning_rate": 1.854344978733824e-08,
"loss": 0.1487,
"step": 992
},
{
"epoch": 2.95,
"learning_rate": 1.6643401987591624e-08,
"loss": 0.164,
"step": 993
},
{
"epoch": 2.95,
"learning_rate": 1.4845948623325446e-08,
"loss": 0.1648,
"step": 994
},
{
"epoch": 2.95,
"learning_rate": 1.3151108166156168e-08,
"loss": 0.1452,
"step": 995
},
{
"epoch": 2.96,
"learning_rate": 1.1558898033191545e-08,
"loss": 0.1621,
"step": 996
},
{
"epoch": 2.96,
"learning_rate": 1.0069334586854106e-08,
"loss": 0.1606,
"step": 997
},
{
"epoch": 2.96,
"learning_rate": 8.682433134711288e-09,
"loss": 0.1406,
"step": 998
},
{
"epoch": 2.96,
"learning_rate": 7.3982079293233314e-09,
"loss": 0.1568,
"step": 999
},
{
"epoch": 2.97,
"learning_rate": 6.216672168091187e-09,
"loss": 0.1481,
"step": 1000
},
{
"epoch": 2.97,
"learning_rate": 5.137837993121064e-09,
"loss": 0.1675,
"step": 1001
},
{
"epoch": 2.97,
"learning_rate": 4.161716491105639e-09,
"loss": 0.1742,
"step": 1002
},
{
"epoch": 2.98,
"learning_rate": 3.2883176932019255e-09,
"loss": 0.1662,
"step": 1003
},
{
"epoch": 2.98,
"learning_rate": 2.5176505749346937e-09,
"loss": 0.1682,
"step": 1004
},
{
"epoch": 2.98,
"learning_rate": 1.8497230560998724e-09,
"loss": 0.1729,
"step": 1005
},
{
"epoch": 2.99,
"learning_rate": 1.2845420006879494e-09,
"loss": 0.1763,
"step": 1006
},
{
"epoch": 2.99,
"learning_rate": 8.221132168073631e-10,
"loss": 0.1545,
"step": 1007
},
{
"epoch": 2.99,
"learning_rate": 4.6244145663010273e-10,
"loss": 0.1652,
"step": 1008
},
{
"epoch": 2.99,
"learning_rate": 2.0553041633952775e-10,
"loss": 0.1665,
"step": 1009
},
{
"epoch": 3.0,
"learning_rate": 5.1382736093730237e-11,
"loss": 0.1664,
"step": 1010
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.1621,
"step": 1011
},
{
"epoch": 3.0,
"step": 1011,
"total_flos": 4.565881206834463e+17,
"train_loss": 0.21420475727077998,
"train_runtime": 4238.9846,
"train_samples_per_second": 7.631,
"train_steps_per_second": 0.239
}
],
"max_steps": 1011,
"num_train_epochs": 3,
"total_flos": 4.565881206834463e+17,
"trial_name": null,
"trial_params": null
}