functionary-7b-v0.2 / trainer_state.json
musabgultekin's picture
Upload folder using huggingface_hub
c91349b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.998003992015968,
"global_step": 2253,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9411764705882356e-07,
"loss": 3.3444,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 5.882352941176471e-07,
"loss": 4.3524,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 8.823529411764707e-07,
"loss": 3.5916,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.1764705882352942e-06,
"loss": 3.8084,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.4705882352941177e-06,
"loss": 4.0642,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.7647058823529414e-06,
"loss": 3.9822,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.058823529411765e-06,
"loss": 3.543,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 2.3529411764705885e-06,
"loss": 3.1988,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 2.647058823529412e-06,
"loss": 3.7397,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 2.9411764705882355e-06,
"loss": 3.9014,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.2352941176470594e-06,
"loss": 3.7911,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 3.529411764705883e-06,
"loss": 3.1401,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 3.8235294117647055e-06,
"loss": 3.261,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 4.11764705882353e-06,
"loss": 3.0643,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 4.411764705882353e-06,
"loss": 2.9693,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 4.705882352941177e-06,
"loss": 3.2043,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 5e-06,
"loss": 3.7151,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 5.294117647058824e-06,
"loss": 4.3418,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 5.588235294117647e-06,
"loss": 3.0498,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 5.882352941176471e-06,
"loss": 3.7658,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 6.176470588235295e-06,
"loss": 3.5779,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 6.470588235294119e-06,
"loss": 3.455,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 6.764705882352942e-06,
"loss": 3.1044,
"step": 23
},
{
"epoch": 0.03,
"learning_rate": 7.058823529411766e-06,
"loss": 3.0818,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 7.352941176470589e-06,
"loss": 3.3943,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 7.647058823529411e-06,
"loss": 2.9755,
"step": 26
},
{
"epoch": 0.04,
"learning_rate": 7.941176470588236e-06,
"loss": 2.8652,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 8.23529411764706e-06,
"loss": 2.9031,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 8.529411764705883e-06,
"loss": 3.0531,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 8.823529411764707e-06,
"loss": 2.9879,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 9.11764705882353e-06,
"loss": 2.7831,
"step": 31
},
{
"epoch": 0.04,
"learning_rate": 9.411764705882354e-06,
"loss": 2.7968,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 9.705882352941177e-06,
"loss": 2.9585,
"step": 33
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 2.9291,
"step": 34
},
{
"epoch": 0.05,
"learning_rate": 1.0294117647058823e-05,
"loss": 2.7425,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 1.0588235294117648e-05,
"loss": 2.8701,
"step": 36
},
{
"epoch": 0.05,
"learning_rate": 1.0882352941176471e-05,
"loss": 2.5117,
"step": 37
},
{
"epoch": 0.05,
"learning_rate": 1.1176470588235295e-05,
"loss": 2.4959,
"step": 38
},
{
"epoch": 0.05,
"learning_rate": 1.1470588235294118e-05,
"loss": 2.3904,
"step": 39
},
{
"epoch": 0.05,
"learning_rate": 1.1764705882352942e-05,
"loss": 1.8824,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.2058823529411765e-05,
"loss": 2.3635,
"step": 41
},
{
"epoch": 0.06,
"learning_rate": 1.235294117647059e-05,
"loss": 1.9408,
"step": 42
},
{
"epoch": 0.06,
"learning_rate": 1.2647058823529412e-05,
"loss": 1.8,
"step": 43
},
{
"epoch": 0.06,
"learning_rate": 1.2941176470588238e-05,
"loss": 1.7004,
"step": 44
},
{
"epoch": 0.06,
"learning_rate": 1.323529411764706e-05,
"loss": 1.6838,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 1.3529411764705885e-05,
"loss": 1.6822,
"step": 46
},
{
"epoch": 0.06,
"learning_rate": 1.3823529411764706e-05,
"loss": 1.6287,
"step": 47
},
{
"epoch": 0.06,
"learning_rate": 1.4117647058823532e-05,
"loss": 1.6299,
"step": 48
},
{
"epoch": 0.07,
"learning_rate": 1.4411764705882353e-05,
"loss": 1.4549,
"step": 49
},
{
"epoch": 0.07,
"learning_rate": 1.4705882352941179e-05,
"loss": 1.6206,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 1.5000000000000002e-05,
"loss": 1.4354,
"step": 51
},
{
"epoch": 0.07,
"learning_rate": 1.5294117647058822e-05,
"loss": 1.2827,
"step": 52
},
{
"epoch": 0.07,
"learning_rate": 1.558823529411765e-05,
"loss": 1.316,
"step": 53
},
{
"epoch": 0.07,
"learning_rate": 1.5882352941176473e-05,
"loss": 1.2486,
"step": 54
},
{
"epoch": 0.07,
"learning_rate": 1.6176470588235296e-05,
"loss": 1.1419,
"step": 55
},
{
"epoch": 0.07,
"learning_rate": 1.647058823529412e-05,
"loss": 1.0224,
"step": 56
},
{
"epoch": 0.08,
"learning_rate": 1.6764705882352943e-05,
"loss": 0.9171,
"step": 57
},
{
"epoch": 0.08,
"learning_rate": 1.7058823529411767e-05,
"loss": 0.8769,
"step": 58
},
{
"epoch": 0.08,
"learning_rate": 1.735294117647059e-05,
"loss": 0.8913,
"step": 59
},
{
"epoch": 0.08,
"learning_rate": 1.7647058823529414e-05,
"loss": 0.8164,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 1.7941176470588237e-05,
"loss": 0.7544,
"step": 61
},
{
"epoch": 0.08,
"learning_rate": 1.823529411764706e-05,
"loss": 0.7687,
"step": 62
},
{
"epoch": 0.08,
"learning_rate": 1.8529411764705884e-05,
"loss": 0.7051,
"step": 63
},
{
"epoch": 0.09,
"learning_rate": 1.8823529411764708e-05,
"loss": 0.741,
"step": 64
},
{
"epoch": 0.09,
"learning_rate": 1.911764705882353e-05,
"loss": 0.6817,
"step": 65
},
{
"epoch": 0.09,
"learning_rate": 1.9411764705882355e-05,
"loss": 0.6719,
"step": 66
},
{
"epoch": 0.09,
"learning_rate": 1.9705882352941178e-05,
"loss": 0.6153,
"step": 67
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.6012,
"step": 68
},
{
"epoch": 0.09,
"learning_rate": 1.9999989663659863e-05,
"loss": 0.6539,
"step": 69
},
{
"epoch": 0.09,
"learning_rate": 1.9999958654660808e-05,
"loss": 0.6979,
"step": 70
},
{
"epoch": 0.09,
"learning_rate": 1.999990697306695e-05,
"loss": 0.6427,
"step": 71
},
{
"epoch": 0.1,
"learning_rate": 1.9999834618985123e-05,
"loss": 0.66,
"step": 72
},
{
"epoch": 0.1,
"learning_rate": 1.9999741592564903e-05,
"loss": 0.6596,
"step": 73
},
{
"epoch": 0.1,
"learning_rate": 1.99996278939986e-05,
"loss": 0.6675,
"step": 74
},
{
"epoch": 0.1,
"learning_rate": 1.9999493523521264e-05,
"loss": 0.6353,
"step": 75
},
{
"epoch": 0.1,
"learning_rate": 1.9999338481410665e-05,
"loss": 0.6944,
"step": 76
},
{
"epoch": 0.1,
"learning_rate": 1.9999162767987327e-05,
"loss": 0.6479,
"step": 77
},
{
"epoch": 0.1,
"learning_rate": 1.999896638361449e-05,
"loss": 0.6814,
"step": 78
},
{
"epoch": 0.11,
"learning_rate": 1.9998749328698135e-05,
"loss": 0.7432,
"step": 79
},
{
"epoch": 0.11,
"learning_rate": 1.9998511603686973e-05,
"loss": 0.5978,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 1.9998253209072447e-05,
"loss": 0.6435,
"step": 81
},
{
"epoch": 0.11,
"learning_rate": 1.9997974145388726e-05,
"loss": 0.6004,
"step": 82
},
{
"epoch": 0.11,
"learning_rate": 1.999767441321271e-05,
"loss": 0.6766,
"step": 83
},
{
"epoch": 0.11,
"learning_rate": 1.9997354013164024e-05,
"loss": 0.5962,
"step": 84
},
{
"epoch": 0.11,
"learning_rate": 1.9997012945905024e-05,
"loss": 0.6063,
"step": 85
},
{
"epoch": 0.11,
"learning_rate": 1.9996651212140784e-05,
"loss": 0.6562,
"step": 86
},
{
"epoch": 0.12,
"learning_rate": 1.999626881261911e-05,
"loss": 0.6744,
"step": 87
},
{
"epoch": 0.12,
"learning_rate": 1.9995865748130518e-05,
"loss": 0.643,
"step": 88
},
{
"epoch": 0.12,
"learning_rate": 1.999544201950825e-05,
"loss": 0.5848,
"step": 89
},
{
"epoch": 0.12,
"learning_rate": 1.9994997627628276e-05,
"loss": 0.6735,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 1.999453257340926e-05,
"loss": 0.6068,
"step": 91
},
{
"epoch": 0.12,
"learning_rate": 1.9994046857812605e-05,
"loss": 0.6557,
"step": 92
},
{
"epoch": 0.12,
"learning_rate": 1.999354048184241e-05,
"loss": 0.6136,
"step": 93
},
{
"epoch": 0.13,
"learning_rate": 1.999301344654549e-05,
"loss": 0.678,
"step": 94
},
{
"epoch": 0.13,
"learning_rate": 1.9992465753011368e-05,
"loss": 0.6404,
"step": 95
},
{
"epoch": 0.13,
"learning_rate": 1.999189740237228e-05,
"loss": 0.6321,
"step": 96
},
{
"epoch": 0.13,
"learning_rate": 1.999130839580315e-05,
"loss": 0.6427,
"step": 97
},
{
"epoch": 0.13,
"learning_rate": 1.9990698734521614e-05,
"loss": 0.5795,
"step": 98
},
{
"epoch": 0.13,
"learning_rate": 1.9990068419788007e-05,
"loss": 0.6273,
"step": 99
},
{
"epoch": 0.13,
"learning_rate": 1.998941745290536e-05,
"loss": 0.6187,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 1.9988745835219393e-05,
"loss": 0.6396,
"step": 101
},
{
"epoch": 0.14,
"learning_rate": 1.998805356811852e-05,
"loss": 0.5794,
"step": 102
},
{
"epoch": 0.14,
"learning_rate": 1.9987340653033847e-05,
"loss": 0.6254,
"step": 103
},
{
"epoch": 0.14,
"learning_rate": 1.998660709143916e-05,
"loss": 0.6609,
"step": 104
},
{
"epoch": 0.14,
"learning_rate": 1.9985852884850922e-05,
"loss": 0.7137,
"step": 105
},
{
"epoch": 0.14,
"learning_rate": 1.998507803482828e-05,
"loss": 0.6828,
"step": 106
},
{
"epoch": 0.14,
"learning_rate": 1.9984282542973064e-05,
"loss": 0.6107,
"step": 107
},
{
"epoch": 0.14,
"learning_rate": 1.9983466410929764e-05,
"loss": 0.6305,
"step": 108
},
{
"epoch": 0.15,
"learning_rate": 1.9982629640385545e-05,
"loss": 0.6215,
"step": 109
},
{
"epoch": 0.15,
"learning_rate": 1.9981772233070232e-05,
"loss": 0.6814,
"step": 110
},
{
"epoch": 0.15,
"learning_rate": 1.9980894190756323e-05,
"loss": 0.6134,
"step": 111
},
{
"epoch": 0.15,
"learning_rate": 1.997999551525896e-05,
"loss": 0.6571,
"step": 112
},
{
"epoch": 0.15,
"learning_rate": 1.997907620843595e-05,
"loss": 0.5888,
"step": 113
},
{
"epoch": 0.15,
"learning_rate": 1.9978136272187745e-05,
"loss": 0.6551,
"step": 114
},
{
"epoch": 0.15,
"learning_rate": 1.9977175708457446e-05,
"loss": 0.6709,
"step": 115
},
{
"epoch": 0.15,
"learning_rate": 1.9976194519230797e-05,
"loss": 0.6328,
"step": 116
},
{
"epoch": 0.16,
"learning_rate": 1.9975192706536178e-05,
"loss": 0.5973,
"step": 117
},
{
"epoch": 0.16,
"learning_rate": 1.9974170272444604e-05,
"loss": 0.652,
"step": 118
},
{
"epoch": 0.16,
"learning_rate": 1.9973127219069718e-05,
"loss": 0.598,
"step": 119
},
{
"epoch": 0.16,
"learning_rate": 1.9972063548567793e-05,
"loss": 0.5632,
"step": 120
},
{
"epoch": 0.16,
"learning_rate": 1.9970979263137726e-05,
"loss": 0.6435,
"step": 121
},
{
"epoch": 0.16,
"learning_rate": 1.9969874365021018e-05,
"loss": 0.5922,
"step": 122
},
{
"epoch": 0.16,
"learning_rate": 1.996874885650179e-05,
"loss": 0.5974,
"step": 123
},
{
"epoch": 0.17,
"learning_rate": 1.9967602739906773e-05,
"loss": 0.5934,
"step": 124
},
{
"epoch": 0.17,
"learning_rate": 1.9966436017605296e-05,
"loss": 0.6182,
"step": 125
},
{
"epoch": 0.17,
"learning_rate": 1.996524869200929e-05,
"loss": 0.6375,
"step": 126
},
{
"epoch": 0.17,
"learning_rate": 1.996404076557327e-05,
"loss": 0.6046,
"step": 127
},
{
"epoch": 0.17,
"learning_rate": 1.9962812240794344e-05,
"loss": 0.6272,
"step": 128
},
{
"epoch": 0.17,
"learning_rate": 1.9961563120212208e-05,
"loss": 0.6565,
"step": 129
},
{
"epoch": 0.17,
"learning_rate": 1.9960293406409122e-05,
"loss": 0.5962,
"step": 130
},
{
"epoch": 0.17,
"learning_rate": 1.9959003102009928e-05,
"loss": 0.5806,
"step": 131
},
{
"epoch": 0.18,
"learning_rate": 1.9957692209682032e-05,
"loss": 0.6353,
"step": 132
},
{
"epoch": 0.18,
"learning_rate": 1.99563607321354e-05,
"loss": 0.6147,
"step": 133
},
{
"epoch": 0.18,
"learning_rate": 1.9955008672122548e-05,
"loss": 0.6465,
"step": 134
},
{
"epoch": 0.18,
"learning_rate": 1.9953636032438553e-05,
"loss": 0.5685,
"step": 135
},
{
"epoch": 0.18,
"learning_rate": 1.9952242815921024e-05,
"loss": 0.6165,
"step": 136
},
{
"epoch": 0.18,
"learning_rate": 1.9950829025450116e-05,
"loss": 0.5951,
"step": 137
},
{
"epoch": 0.18,
"learning_rate": 1.994939466394851e-05,
"loss": 0.6623,
"step": 138
},
{
"epoch": 0.18,
"learning_rate": 1.994793973438142e-05,
"loss": 0.6015,
"step": 139
},
{
"epoch": 0.19,
"learning_rate": 1.9946464239756573e-05,
"loss": 0.6593,
"step": 140
},
{
"epoch": 0.19,
"learning_rate": 1.9944968183124212e-05,
"loss": 0.5974,
"step": 141
},
{
"epoch": 0.19,
"learning_rate": 1.9943451567577085e-05,
"loss": 0.658,
"step": 142
},
{
"epoch": 0.19,
"learning_rate": 1.9941914396250447e-05,
"loss": 0.6255,
"step": 143
},
{
"epoch": 0.19,
"learning_rate": 1.9940356672322037e-05,
"loss": 0.7209,
"step": 144
},
{
"epoch": 0.19,
"learning_rate": 1.9938778399012094e-05,
"loss": 0.6326,
"step": 145
},
{
"epoch": 0.19,
"learning_rate": 1.993717957958333e-05,
"loss": 0.612,
"step": 146
},
{
"epoch": 0.2,
"learning_rate": 1.9935560217340933e-05,
"loss": 0.64,
"step": 147
},
{
"epoch": 0.2,
"learning_rate": 1.9933920315632557e-05,
"loss": 0.6461,
"step": 148
},
{
"epoch": 0.2,
"learning_rate": 1.9932259877848324e-05,
"loss": 0.6731,
"step": 149
},
{
"epoch": 0.2,
"learning_rate": 1.9930578907420798e-05,
"loss": 0.6109,
"step": 150
},
{
"epoch": 0.2,
"learning_rate": 1.9928877407824997e-05,
"loss": 0.6187,
"step": 151
},
{
"epoch": 0.2,
"learning_rate": 1.9927155382578377e-05,
"loss": 0.5909,
"step": 152
},
{
"epoch": 0.2,
"learning_rate": 1.9925412835240826e-05,
"loss": 0.5811,
"step": 153
},
{
"epoch": 0.2,
"learning_rate": 1.9923649769414657e-05,
"loss": 0.6857,
"step": 154
},
{
"epoch": 0.21,
"learning_rate": 1.9921866188744596e-05,
"loss": 0.6276,
"step": 155
},
{
"epoch": 0.21,
"learning_rate": 1.992006209691779e-05,
"loss": 0.628,
"step": 156
},
{
"epoch": 0.21,
"learning_rate": 1.9918237497663772e-05,
"loss": 0.5771,
"step": 157
},
{
"epoch": 0.21,
"learning_rate": 1.9916392394754483e-05,
"loss": 0.5674,
"step": 158
},
{
"epoch": 0.21,
"learning_rate": 1.9914526792004244e-05,
"loss": 0.6131,
"step": 159
},
{
"epoch": 0.21,
"learning_rate": 1.9912640693269754e-05,
"loss": 0.5663,
"step": 160
},
{
"epoch": 0.21,
"learning_rate": 1.9910734102450087e-05,
"loss": 0.6315,
"step": 161
},
{
"epoch": 0.22,
"learning_rate": 1.9908807023486676e-05,
"loss": 0.6092,
"step": 162
},
{
"epoch": 0.22,
"learning_rate": 1.9906859460363307e-05,
"loss": 0.5825,
"step": 163
},
{
"epoch": 0.22,
"learning_rate": 1.9904891417106123e-05,
"loss": 0.6339,
"step": 164
},
{
"epoch": 0.22,
"learning_rate": 1.990290289778359e-05,
"loss": 0.6112,
"step": 165
},
{
"epoch": 0.22,
"learning_rate": 1.9900893906506514e-05,
"loss": 0.681,
"step": 166
},
{
"epoch": 0.22,
"learning_rate": 1.9898864447428012e-05,
"loss": 0.5334,
"step": 167
},
{
"epoch": 0.22,
"learning_rate": 1.989681452474353e-05,
"loss": 0.6323,
"step": 168
},
{
"epoch": 0.22,
"learning_rate": 1.98947441426908e-05,
"loss": 0.597,
"step": 169
},
{
"epoch": 0.23,
"learning_rate": 1.9892653305549858e-05,
"loss": 0.6683,
"step": 170
},
{
"epoch": 0.23,
"learning_rate": 1.9890542017643026e-05,
"loss": 0.6226,
"step": 171
},
{
"epoch": 0.23,
"learning_rate": 1.98884102833349e-05,
"loss": 0.5996,
"step": 172
},
{
"epoch": 0.23,
"learning_rate": 1.9886258107032352e-05,
"loss": 0.6269,
"step": 173
},
{
"epoch": 0.23,
"learning_rate": 1.98840854931845e-05,
"loss": 0.5049,
"step": 174
},
{
"epoch": 0.23,
"learning_rate": 1.9881892446282722e-05,
"loss": 0.6765,
"step": 175
},
{
"epoch": 0.23,
"learning_rate": 1.9879678970860635e-05,
"loss": 0.5981,
"step": 176
},
{
"epoch": 0.24,
"learning_rate": 1.9877445071494087e-05,
"loss": 0.65,
"step": 177
},
{
"epoch": 0.24,
"learning_rate": 1.987519075280114e-05,
"loss": 0.6235,
"step": 178
},
{
"epoch": 0.24,
"learning_rate": 1.9872916019442084e-05,
"loss": 0.6012,
"step": 179
},
{
"epoch": 0.24,
"learning_rate": 1.98706208761194e-05,
"loss": 0.5538,
"step": 180
},
{
"epoch": 0.24,
"learning_rate": 1.9868305327577756e-05,
"loss": 0.5699,
"step": 181
},
{
"epoch": 0.24,
"learning_rate": 1.9865969378604023e-05,
"loss": 0.5941,
"step": 182
},
{
"epoch": 0.24,
"learning_rate": 1.9863613034027224e-05,
"loss": 0.654,
"step": 183
},
{
"epoch": 0.24,
"learning_rate": 1.9861236298718563e-05,
"loss": 0.6387,
"step": 184
},
{
"epoch": 0.25,
"learning_rate": 1.9858839177591384e-05,
"loss": 0.6182,
"step": 185
},
{
"epoch": 0.25,
"learning_rate": 1.9856421675601183e-05,
"loss": 0.6694,
"step": 186
},
{
"epoch": 0.25,
"learning_rate": 1.985398379774558e-05,
"loss": 0.645,
"step": 187
},
{
"epoch": 0.25,
"learning_rate": 1.9851525549064324e-05,
"loss": 0.6398,
"step": 188
},
{
"epoch": 0.25,
"learning_rate": 1.9849046934639273e-05,
"loss": 0.6549,
"step": 189
},
{
"epoch": 0.25,
"learning_rate": 1.984654795959439e-05,
"loss": 0.5635,
"step": 190
},
{
"epoch": 0.25,
"learning_rate": 1.9844028629095722e-05,
"loss": 0.6546,
"step": 191
},
{
"epoch": 0.26,
"learning_rate": 1.9841488948351403e-05,
"loss": 0.5884,
"step": 192
},
{
"epoch": 0.26,
"learning_rate": 1.9838928922611634e-05,
"loss": 0.6025,
"step": 193
},
{
"epoch": 0.26,
"learning_rate": 1.983634855716867e-05,
"loss": 0.6468,
"step": 194
},
{
"epoch": 0.26,
"learning_rate": 1.983374785735683e-05,
"loss": 0.5707,
"step": 195
},
{
"epoch": 0.26,
"learning_rate": 1.9831126828552442e-05,
"loss": 0.6129,
"step": 196
},
{
"epoch": 0.26,
"learning_rate": 1.9828485476173887e-05,
"loss": 0.6287,
"step": 197
},
{
"epoch": 0.26,
"learning_rate": 1.9825823805681543e-05,
"loss": 0.6218,
"step": 198
},
{
"epoch": 0.26,
"learning_rate": 1.9823141822577794e-05,
"loss": 0.6359,
"step": 199
},
{
"epoch": 0.27,
"learning_rate": 1.9820439532407025e-05,
"loss": 0.5706,
"step": 200
},
{
"epoch": 0.27,
"learning_rate": 1.9817716940755586e-05,
"loss": 0.603,
"step": 201
},
{
"epoch": 0.27,
"learning_rate": 1.981497405325181e-05,
"loss": 0.5742,
"step": 202
},
{
"epoch": 0.27,
"learning_rate": 1.981221087556598e-05,
"loss": 0.6006,
"step": 203
},
{
"epoch": 0.27,
"learning_rate": 1.980942741341032e-05,
"loss": 0.6203,
"step": 204
},
{
"epoch": 0.27,
"learning_rate": 1.9806623672538997e-05,
"loss": 0.6248,
"step": 205
},
{
"epoch": 0.27,
"learning_rate": 1.9803799658748096e-05,
"loss": 0.5531,
"step": 206
},
{
"epoch": 0.28,
"learning_rate": 1.9800955377875603e-05,
"loss": 0.605,
"step": 207
},
{
"epoch": 0.28,
"learning_rate": 1.9798090835801418e-05,
"loss": 0.6357,
"step": 208
},
{
"epoch": 0.28,
"learning_rate": 1.9795206038447313e-05,
"loss": 0.622,
"step": 209
},
{
"epoch": 0.28,
"learning_rate": 1.9792300991776933e-05,
"loss": 0.5886,
"step": 210
},
{
"epoch": 0.28,
"learning_rate": 1.9789375701795795e-05,
"loss": 0.6413,
"step": 211
},
{
"epoch": 0.28,
"learning_rate": 1.9786430174551254e-05,
"loss": 0.5457,
"step": 212
},
{
"epoch": 0.28,
"learning_rate": 1.9783464416132507e-05,
"loss": 0.638,
"step": 213
},
{
"epoch": 0.28,
"learning_rate": 1.978047843267057e-05,
"loss": 0.6095,
"step": 214
},
{
"epoch": 0.29,
"learning_rate": 1.9777472230338272e-05,
"loss": 0.5679,
"step": 215
},
{
"epoch": 0.29,
"learning_rate": 1.977444581535023e-05,
"loss": 0.6246,
"step": 216
},
{
"epoch": 0.29,
"learning_rate": 1.977139919396287e-05,
"loss": 0.5572,
"step": 217
},
{
"epoch": 0.29,
"learning_rate": 1.976833237247437e-05,
"loss": 0.6627,
"step": 218
},
{
"epoch": 0.29,
"learning_rate": 1.9765245357224665e-05,
"loss": 0.5689,
"step": 219
},
{
"epoch": 0.29,
"learning_rate": 1.9762138154595448e-05,
"loss": 0.572,
"step": 220
},
{
"epoch": 0.29,
"learning_rate": 1.975901077101014e-05,
"loss": 0.6176,
"step": 221
},
{
"epoch": 0.3,
"learning_rate": 1.975586321293388e-05,
"loss": 0.569,
"step": 222
},
{
"epoch": 0.3,
"learning_rate": 1.9752695486873516e-05,
"loss": 0.6731,
"step": 223
},
{
"epoch": 0.3,
"learning_rate": 1.9749507599377583e-05,
"loss": 0.5726,
"step": 224
},
{
"epoch": 0.3,
"learning_rate": 1.9746299557036305e-05,
"loss": 0.5783,
"step": 225
},
{
"epoch": 0.3,
"learning_rate": 1.9743071366481562e-05,
"loss": 0.6354,
"step": 226
},
{
"epoch": 0.3,
"learning_rate": 1.9739823034386885e-05,
"loss": 0.6493,
"step": 227
},
{
"epoch": 0.3,
"learning_rate": 1.973655456746745e-05,
"loss": 0.6178,
"step": 228
},
{
"epoch": 0.3,
"learning_rate": 1.973326597248006e-05,
"loss": 0.574,
"step": 229
},
{
"epoch": 0.31,
"learning_rate": 1.9729957256223114e-05,
"loss": 0.5995,
"step": 230
},
{
"epoch": 0.31,
"learning_rate": 1.9726628425536622e-05,
"loss": 0.5978,
"step": 231
},
{
"epoch": 0.31,
"learning_rate": 1.972327948730216e-05,
"loss": 0.5984,
"step": 232
},
{
"epoch": 0.31,
"learning_rate": 1.9719910448442893e-05,
"loss": 0.5305,
"step": 233
},
{
"epoch": 0.31,
"learning_rate": 1.971652131592352e-05,
"loss": 0.5717,
"step": 234
},
{
"epoch": 0.31,
"learning_rate": 1.9713112096750287e-05,
"loss": 0.5385,
"step": 235
},
{
"epoch": 0.31,
"learning_rate": 1.9709682797970965e-05,
"loss": 0.6165,
"step": 236
},
{
"epoch": 0.32,
"learning_rate": 1.9706233426674827e-05,
"loss": 0.4972,
"step": 237
},
{
"epoch": 0.32,
"learning_rate": 1.970276398999266e-05,
"loss": 0.5531,
"step": 238
},
{
"epoch": 0.32,
"learning_rate": 1.9699274495096712e-05,
"loss": 0.6023,
"step": 239
},
{
"epoch": 0.32,
"learning_rate": 1.969576494920071e-05,
"loss": 0.5336,
"step": 240
},
{
"epoch": 0.32,
"learning_rate": 1.9692235359559814e-05,
"loss": 0.5925,
"step": 241
},
{
"epoch": 0.32,
"learning_rate": 1.9688685733470646e-05,
"loss": 0.6258,
"step": 242
},
{
"epoch": 0.32,
"learning_rate": 1.9685116078271224e-05,
"loss": 0.5845,
"step": 243
},
{
"epoch": 0.32,
"learning_rate": 1.968152640134099e-05,
"loss": 0.5396,
"step": 244
},
{
"epoch": 0.33,
"learning_rate": 1.967791671010076e-05,
"loss": 0.5297,
"step": 245
},
{
"epoch": 0.33,
"learning_rate": 1.9674287012012743e-05,
"loss": 0.6084,
"step": 246
},
{
"epoch": 0.33,
"learning_rate": 1.967063731458049e-05,
"loss": 0.5481,
"step": 247
},
{
"epoch": 0.33,
"learning_rate": 1.9666967625348907e-05,
"loss": 0.5879,
"step": 248
},
{
"epoch": 0.33,
"learning_rate": 1.9663277951904224e-05,
"loss": 0.6084,
"step": 249
},
{
"epoch": 0.33,
"learning_rate": 1.9659568301873987e-05,
"loss": 0.6001,
"step": 250
},
{
"epoch": 0.33,
"learning_rate": 1.9655838682927032e-05,
"loss": 0.5729,
"step": 251
},
{
"epoch": 0.34,
"learning_rate": 1.9652089102773487e-05,
"loss": 0.5848,
"step": 252
},
{
"epoch": 0.34,
"learning_rate": 1.964831956916474e-05,
"loss": 0.6093,
"step": 253
},
{
"epoch": 0.34,
"learning_rate": 1.9644530089893418e-05,
"loss": 0.5567,
"step": 254
},
{
"epoch": 0.34,
"learning_rate": 1.96407206727934e-05,
"loss": 0.5381,
"step": 255
},
{
"epoch": 0.34,
"learning_rate": 1.9636891325739768e-05,
"loss": 0.5677,
"step": 256
},
{
"epoch": 0.34,
"learning_rate": 1.963304205664881e-05,
"loss": 0.5554,
"step": 257
},
{
"epoch": 0.34,
"learning_rate": 1.9629172873477995e-05,
"loss": 0.6306,
"step": 258
},
{
"epoch": 0.34,
"learning_rate": 1.9625283784225963e-05,
"loss": 0.5457,
"step": 259
},
{
"epoch": 0.35,
"learning_rate": 1.9621374796932504e-05,
"loss": 0.5729,
"step": 260
},
{
"epoch": 0.35,
"learning_rate": 1.961744591967854e-05,
"loss": 0.5567,
"step": 261
},
{
"epoch": 0.35,
"learning_rate": 1.961349716058612e-05,
"loss": 0.6111,
"step": 262
},
{
"epoch": 0.35,
"learning_rate": 1.960952852781838e-05,
"loss": 0.635,
"step": 263
},
{
"epoch": 0.35,
"learning_rate": 1.960554002957955e-05,
"loss": 0.612,
"step": 264
},
{
"epoch": 0.35,
"learning_rate": 1.960153167411493e-05,
"loss": 0.5564,
"step": 265
},
{
"epoch": 0.35,
"learning_rate": 1.9597503469710854e-05,
"loss": 0.5434,
"step": 266
},
{
"epoch": 0.36,
"learning_rate": 1.959345542469471e-05,
"loss": 0.5786,
"step": 267
},
{
"epoch": 0.36,
"learning_rate": 1.958938754743489e-05,
"loss": 0.5391,
"step": 268
},
{
"epoch": 0.36,
"learning_rate": 1.9585299846340783e-05,
"loss": 0.5782,
"step": 269
},
{
"epoch": 0.36,
"learning_rate": 1.9581192329862764e-05,
"loss": 0.6337,
"step": 270
},
{
"epoch": 0.36,
"learning_rate": 1.957706500649218e-05,
"loss": 0.5858,
"step": 271
},
{
"epoch": 0.36,
"learning_rate": 1.95729178847613e-05,
"loss": 0.6079,
"step": 272
},
{
"epoch": 0.36,
"learning_rate": 1.956875097324334e-05,
"loss": 0.6067,
"step": 273
},
{
"epoch": 0.36,
"learning_rate": 1.9564564280552427e-05,
"loss": 0.5872,
"step": 274
},
{
"epoch": 0.37,
"learning_rate": 1.9560357815343577e-05,
"loss": 0.5878,
"step": 275
},
{
"epoch": 0.37,
"learning_rate": 1.9556131586312678e-05,
"loss": 0.5073,
"step": 276
},
{
"epoch": 0.37,
"learning_rate": 1.9551885602196482e-05,
"loss": 0.5536,
"step": 277
},
{
"epoch": 0.37,
"learning_rate": 1.9547619871772575e-05,
"loss": 0.5973,
"step": 278
},
{
"epoch": 0.37,
"learning_rate": 1.954333440385936e-05,
"loss": 0.5857,
"step": 279
},
{
"epoch": 0.37,
"learning_rate": 1.953902920731605e-05,
"loss": 0.532,
"step": 280
},
{
"epoch": 0.37,
"learning_rate": 1.9534704291042644e-05,
"loss": 0.6352,
"step": 281
},
{
"epoch": 0.38,
"learning_rate": 1.9530359663979905e-05,
"loss": 0.539,
"step": 282
},
{
"epoch": 0.38,
"learning_rate": 1.9525995335109333e-05,
"loss": 0.6429,
"step": 283
},
{
"epoch": 0.38,
"learning_rate": 1.952161131345317e-05,
"loss": 0.5587,
"step": 284
},
{
"epoch": 0.38,
"learning_rate": 1.9517207608074368e-05,
"loss": 0.606,
"step": 285
},
{
"epoch": 0.38,
"learning_rate": 1.951278422807656e-05,
"loss": 0.5475,
"step": 286
},
{
"epoch": 0.38,
"learning_rate": 1.950834118260406e-05,
"loss": 0.5579,
"step": 287
},
{
"epoch": 0.38,
"learning_rate": 1.9503878480841832e-05,
"loss": 0.6164,
"step": 288
},
{
"epoch": 0.38,
"learning_rate": 1.9499396132015483e-05,
"loss": 0.4853,
"step": 289
},
{
"epoch": 0.39,
"learning_rate": 1.9494894145391222e-05,
"loss": 0.5572,
"step": 290
},
{
"epoch": 0.39,
"learning_rate": 1.949037253027587e-05,
"loss": 0.5075,
"step": 291
},
{
"epoch": 0.39,
"learning_rate": 1.9485831296016806e-05,
"loss": 0.5372,
"step": 292
},
{
"epoch": 0.39,
"learning_rate": 1.948127045200199e-05,
"loss": 0.5441,
"step": 293
},
{
"epoch": 0.39,
"learning_rate": 1.94766900076599e-05,
"loss": 0.5909,
"step": 294
},
{
"epoch": 0.39,
"learning_rate": 1.947208997245955e-05,
"loss": 0.5576,
"step": 295
},
{
"epoch": 0.39,
"learning_rate": 1.9467470355910438e-05,
"loss": 0.6149,
"step": 296
},
{
"epoch": 0.4,
"learning_rate": 1.9462831167562556e-05,
"loss": 0.5856,
"step": 297
},
{
"epoch": 0.4,
"learning_rate": 1.9458172417006347e-05,
"loss": 0.608,
"step": 298
},
{
"epoch": 0.4,
"learning_rate": 1.94534941138727e-05,
"loss": 0.567,
"step": 299
},
{
"epoch": 0.4,
"learning_rate": 1.9448796267832914e-05,
"loss": 0.5854,
"step": 300
},
{
"epoch": 0.4,
"learning_rate": 1.9444078888598703e-05,
"loss": 0.5723,
"step": 301
},
{
"epoch": 0.4,
"learning_rate": 1.9439341985922153e-05,
"loss": 0.5833,
"step": 302
},
{
"epoch": 0.4,
"learning_rate": 1.943458556959571e-05,
"loss": 0.6001,
"step": 303
},
{
"epoch": 0.4,
"learning_rate": 1.942980964945216e-05,
"loss": 0.5977,
"step": 304
},
{
"epoch": 0.41,
"learning_rate": 1.942501423536461e-05,
"loss": 0.6121,
"step": 305
},
{
"epoch": 0.41,
"learning_rate": 1.9420199337246474e-05,
"loss": 0.662,
"step": 306
},
{
"epoch": 0.41,
"learning_rate": 1.941536496505143e-05,
"loss": 0.5761,
"step": 307
},
{
"epoch": 0.41,
"learning_rate": 1.941051112877342e-05,
"loss": 0.5378,
"step": 308
},
{
"epoch": 0.41,
"learning_rate": 1.9405637838446626e-05,
"loss": 0.5601,
"step": 309
},
{
"epoch": 0.41,
"learning_rate": 1.9400745104145447e-05,
"loss": 0.5509,
"step": 310
},
{
"epoch": 0.41,
"learning_rate": 1.9395832935984474e-05,
"loss": 0.6043,
"step": 311
},
{
"epoch": 0.42,
"learning_rate": 1.939090134411848e-05,
"loss": 0.5576,
"step": 312
},
{
"epoch": 0.42,
"learning_rate": 1.9385950338742377e-05,
"loss": 0.5805,
"step": 313
},
{
"epoch": 0.42,
"learning_rate": 1.938097993009123e-05,
"loss": 0.5753,
"step": 314
},
{
"epoch": 0.42,
"learning_rate": 1.9375990128440205e-05,
"loss": 0.6385,
"step": 315
},
{
"epoch": 0.42,
"learning_rate": 1.9370980944104553e-05,
"loss": 0.5665,
"step": 316
},
{
"epoch": 0.42,
"learning_rate": 1.936595238743961e-05,
"loss": 0.5358,
"step": 317
},
{
"epoch": 0.42,
"learning_rate": 1.936090446884074e-05,
"loss": 0.5766,
"step": 318
},
{
"epoch": 0.42,
"learning_rate": 1.935583719874335e-05,
"loss": 0.5505,
"step": 319
},
{
"epoch": 0.43,
"learning_rate": 1.9350750587622844e-05,
"loss": 0.5405,
"step": 320
},
{
"epoch": 0.43,
"learning_rate": 1.934564464599461e-05,
"loss": 0.5544,
"step": 321
},
{
"epoch": 0.43,
"learning_rate": 1.9340519384414e-05,
"loss": 0.5739,
"step": 322
},
{
"epoch": 0.43,
"learning_rate": 1.93353748134763e-05,
"loss": 0.5872,
"step": 323
},
{
"epoch": 0.43,
"learning_rate": 1.933021094381672e-05,
"loss": 0.5486,
"step": 324
},
{
"epoch": 0.43,
"learning_rate": 1.9325027786110363e-05,
"loss": 0.5757,
"step": 325
},
{
"epoch": 0.43,
"learning_rate": 1.9319825351072205e-05,
"loss": 0.6554,
"step": 326
},
{
"epoch": 0.44,
"learning_rate": 1.931460364945707e-05,
"loss": 0.6208,
"step": 327
},
{
"epoch": 0.44,
"learning_rate": 1.9309362692059617e-05,
"loss": 0.578,
"step": 328
},
{
"epoch": 0.44,
"learning_rate": 1.930410248971431e-05,
"loss": 0.558,
"step": 329
},
{
"epoch": 0.44,
"learning_rate": 1.9298823053295396e-05,
"loss": 0.4966,
"step": 330
},
{
"epoch": 0.44,
"learning_rate": 1.929352439371689e-05,
"loss": 0.5472,
"step": 331
},
{
"epoch": 0.44,
"learning_rate": 1.9288206521932533e-05,
"loss": 0.5367,
"step": 332
},
{
"epoch": 0.44,
"learning_rate": 1.92828694489358e-05,
"loss": 0.5803,
"step": 333
},
{
"epoch": 0.44,
"learning_rate": 1.9277513185759847e-05,
"loss": 0.5869,
"step": 334
},
{
"epoch": 0.45,
"learning_rate": 1.9272137743477507e-05,
"loss": 0.5123,
"step": 335
},
{
"epoch": 0.45,
"learning_rate": 1.9266743133201254e-05,
"loss": 0.5236,
"step": 336
},
{
"epoch": 0.45,
"learning_rate": 1.9261329366083202e-05,
"loss": 0.636,
"step": 337
},
{
"epoch": 0.45,
"learning_rate": 1.9255896453315054e-05,
"loss": 0.5565,
"step": 338
},
{
"epoch": 0.45,
"learning_rate": 1.92504444061281e-05,
"loss": 0.5845,
"step": 339
},
{
"epoch": 0.45,
"learning_rate": 1.924497323579318e-05,
"loss": 0.5149,
"step": 340
},
{
"epoch": 0.45,
"learning_rate": 1.9239482953620668e-05,
"loss": 0.5277,
"step": 341
},
{
"epoch": 0.46,
"learning_rate": 1.923397357096045e-05,
"loss": 0.6106,
"step": 342
},
{
"epoch": 0.46,
"learning_rate": 1.92284450992019e-05,
"loss": 0.5565,
"step": 343
},
{
"epoch": 0.46,
"learning_rate": 1.922289754977385e-05,
"loss": 0.5898,
"step": 344
},
{
"epoch": 0.46,
"learning_rate": 1.9217330934144565e-05,
"loss": 0.5872,
"step": 345
},
{
"epoch": 0.46,
"learning_rate": 1.921174526382174e-05,
"loss": 0.5871,
"step": 346
},
{
"epoch": 0.46,
"learning_rate": 1.920614055035245e-05,
"loss": 0.5483,
"step": 347
},
{
"epoch": 0.46,
"learning_rate": 1.920051680532314e-05,
"loss": 0.5675,
"step": 348
},
{
"epoch": 0.46,
"learning_rate": 1.9194874040359596e-05,
"loss": 0.5477,
"step": 349
},
{
"epoch": 0.47,
"learning_rate": 1.918921226712693e-05,
"loss": 0.6154,
"step": 350
},
{
"epoch": 0.47,
"learning_rate": 1.918353149732954e-05,
"loss": 0.5895,
"step": 351
},
{
"epoch": 0.47,
"learning_rate": 1.9177831742711098e-05,
"loss": 0.5929,
"step": 352
},
{
"epoch": 0.47,
"learning_rate": 1.917211301505453e-05,
"loss": 0.5098,
"step": 353
},
{
"epoch": 0.47,
"learning_rate": 1.916637532618198e-05,
"loss": 0.5323,
"step": 354
},
{
"epoch": 0.47,
"learning_rate": 1.9160618687954783e-05,
"loss": 0.5033,
"step": 355
},
{
"epoch": 0.47,
"learning_rate": 1.915484311227346e-05,
"loss": 0.5039,
"step": 356
},
{
"epoch": 0.48,
"learning_rate": 1.9149048611077666e-05,
"loss": 0.5925,
"step": 357
},
{
"epoch": 0.48,
"learning_rate": 1.9143235196346194e-05,
"loss": 0.5453,
"step": 358
},
{
"epoch": 0.48,
"learning_rate": 1.9137402880096925e-05,
"loss": 0.5215,
"step": 359
},
{
"epoch": 0.48,
"learning_rate": 1.9131551674386827e-05,
"loss": 0.5655,
"step": 360
},
{
"epoch": 0.48,
"learning_rate": 1.9125681591311905e-05,
"loss": 0.521,
"step": 361
},
{
"epoch": 0.48,
"learning_rate": 1.9119792643007197e-05,
"loss": 0.6077,
"step": 362
},
{
"epoch": 0.48,
"learning_rate": 1.9113884841646736e-05,
"loss": 0.558,
"step": 363
},
{
"epoch": 0.48,
"learning_rate": 1.9107958199443532e-05,
"loss": 0.581,
"step": 364
},
{
"epoch": 0.49,
"learning_rate": 1.9102012728649543e-05,
"loss": 0.5827,
"step": 365
},
{
"epoch": 0.49,
"learning_rate": 1.9096048441555646e-05,
"loss": 0.551,
"step": 366
},
{
"epoch": 0.49,
"learning_rate": 1.909006535049163e-05,
"loss": 0.6177,
"step": 367
},
{
"epoch": 0.49,
"learning_rate": 1.9084063467826137e-05,
"loss": 0.6471,
"step": 368
},
{
"epoch": 0.49,
"learning_rate": 1.9078042805966673e-05,
"loss": 0.5624,
"step": 369
},
{
"epoch": 0.49,
"learning_rate": 1.9072003377359566e-05,
"loss": 0.5739,
"step": 370
},
{
"epoch": 0.49,
"learning_rate": 1.9065945194489925e-05,
"loss": 0.5464,
"step": 371
},
{
"epoch": 0.5,
"learning_rate": 1.9059868269881637e-05,
"loss": 0.6308,
"step": 372
},
{
"epoch": 0.5,
"learning_rate": 1.905377261609734e-05,
"loss": 0.6398,
"step": 373
},
{
"epoch": 0.5,
"learning_rate": 1.9047658245738382e-05,
"loss": 0.5618,
"step": 374
},
{
"epoch": 0.5,
"learning_rate": 1.90415251714448e-05,
"loss": 0.57,
"step": 375
},
{
"epoch": 0.5,
"learning_rate": 1.903537340589531e-05,
"loss": 0.6831,
"step": 376
},
{
"epoch": 0.5,
"learning_rate": 1.902920296180726e-05,
"loss": 0.5774,
"step": 377
},
{
"epoch": 0.5,
"learning_rate": 1.9023013851936603e-05,
"loss": 0.6064,
"step": 378
},
{
"epoch": 0.5,
"learning_rate": 1.90168060890779e-05,
"loss": 0.5465,
"step": 379
},
{
"epoch": 0.51,
"learning_rate": 1.901057968606425e-05,
"loss": 0.5249,
"step": 380
},
{
"epoch": 0.51,
"learning_rate": 1.9004334655767304e-05,
"loss": 0.5461,
"step": 381
},
{
"epoch": 0.51,
"learning_rate": 1.8998071011097207e-05,
"loss": 0.6345,
"step": 382
},
{
"epoch": 0.51,
"learning_rate": 1.89917887650026e-05,
"loss": 0.5514,
"step": 383
},
{
"epoch": 0.51,
"learning_rate": 1.8985487930470566e-05,
"loss": 0.5724,
"step": 384
},
{
"epoch": 0.51,
"learning_rate": 1.8979168520526614e-05,
"loss": 0.5864,
"step": 385
},
{
"epoch": 0.51,
"learning_rate": 1.8972830548234663e-05,
"loss": 0.5636,
"step": 386
},
{
"epoch": 0.51,
"learning_rate": 1.8966474026696995e-05,
"loss": 0.5535,
"step": 387
},
{
"epoch": 0.52,
"learning_rate": 1.8960098969054253e-05,
"loss": 0.5523,
"step": 388
},
{
"epoch": 0.52,
"learning_rate": 1.8953705388485385e-05,
"loss": 0.5524,
"step": 389
},
{
"epoch": 0.52,
"learning_rate": 1.8947293298207637e-05,
"loss": 0.5385,
"step": 390
},
{
"epoch": 0.52,
"learning_rate": 1.8940862711476515e-05,
"loss": 0.579,
"step": 391
},
{
"epoch": 0.52,
"learning_rate": 1.8934413641585765e-05,
"loss": 0.5301,
"step": 392
},
{
"epoch": 0.52,
"learning_rate": 1.8927946101867348e-05,
"loss": 0.5583,
"step": 393
},
{
"epoch": 0.52,
"learning_rate": 1.8921460105691394e-05,
"loss": 0.5858,
"step": 394
},
{
"epoch": 0.53,
"learning_rate": 1.891495566646621e-05,
"loss": 0.6168,
"step": 395
},
{
"epoch": 0.53,
"learning_rate": 1.8908432797638198e-05,
"loss": 0.5846,
"step": 396
},
{
"epoch": 0.53,
"learning_rate": 1.8901891512691887e-05,
"loss": 0.5091,
"step": 397
},
{
"epoch": 0.53,
"learning_rate": 1.889533182514986e-05,
"loss": 0.6059,
"step": 398
},
{
"epoch": 0.53,
"learning_rate": 1.8888753748572756e-05,
"loss": 0.5595,
"step": 399
},
{
"epoch": 0.53,
"learning_rate": 1.8882157296559217e-05,
"loss": 0.5621,
"step": 400
},
{
"epoch": 0.53,
"learning_rate": 1.8875542482745883e-05,
"loss": 0.571,
"step": 401
},
{
"epoch": 0.53,
"learning_rate": 1.886890932080734e-05,
"loss": 0.6054,
"step": 402
},
{
"epoch": 0.54,
"learning_rate": 1.886225782445612e-05,
"loss": 0.5613,
"step": 403
},
{
"epoch": 0.54,
"learning_rate": 1.885558800744264e-05,
"loss": 0.6156,
"step": 404
},
{
"epoch": 0.54,
"learning_rate": 1.8848899883555204e-05,
"loss": 0.6255,
"step": 405
},
{
"epoch": 0.54,
"learning_rate": 1.8842193466619956e-05,
"loss": 0.6063,
"step": 406
},
{
"epoch": 0.54,
"learning_rate": 1.8835468770500856e-05,
"loss": 0.553,
"step": 407
},
{
"epoch": 0.54,
"learning_rate": 1.8828725809099657e-05,
"loss": 0.5725,
"step": 408
},
{
"epoch": 0.54,
"learning_rate": 1.8821964596355866e-05,
"loss": 0.5681,
"step": 409
},
{
"epoch": 0.55,
"learning_rate": 1.8815185146246718e-05,
"loss": 0.5984,
"step": 410
},
{
"epoch": 0.55,
"learning_rate": 1.8808387472787156e-05,
"loss": 0.5214,
"step": 411
},
{
"epoch": 0.55,
"learning_rate": 1.8801571590029795e-05,
"loss": 0.5708,
"step": 412
},
{
"epoch": 0.55,
"learning_rate": 1.879473751206489e-05,
"loss": 0.6156,
"step": 413
},
{
"epoch": 0.55,
"learning_rate": 1.8787885253020314e-05,
"loss": 0.5647,
"step": 414
},
{
"epoch": 0.55,
"learning_rate": 1.878101482706152e-05,
"loss": 0.5148,
"step": 415
},
{
"epoch": 0.55,
"learning_rate": 1.877412624839152e-05,
"loss": 0.6082,
"step": 416
},
{
"epoch": 0.55,
"learning_rate": 1.8767219531250856e-05,
"loss": 0.5763,
"step": 417
},
{
"epoch": 0.56,
"learning_rate": 1.8760294689917556e-05,
"loss": 0.5751,
"step": 418
},
{
"epoch": 0.56,
"learning_rate": 1.8753351738707132e-05,
"loss": 0.5228,
"step": 419
},
{
"epoch": 0.56,
"learning_rate": 1.8746390691972517e-05,
"loss": 0.5081,
"step": 420
},
{
"epoch": 0.56,
"learning_rate": 1.8739411564104067e-05,
"loss": 0.5583,
"step": 421
},
{
"epoch": 0.56,
"learning_rate": 1.8732414369529506e-05,
"loss": 0.5519,
"step": 422
},
{
"epoch": 0.56,
"learning_rate": 1.8725399122713914e-05,
"loss": 0.5567,
"step": 423
},
{
"epoch": 0.56,
"learning_rate": 1.8718365838159682e-05,
"loss": 0.6165,
"step": 424
},
{
"epoch": 0.57,
"learning_rate": 1.87113145304065e-05,
"loss": 0.567,
"step": 425
},
{
"epoch": 0.57,
"learning_rate": 1.8704245214031303e-05,
"loss": 0.5719,
"step": 426
},
{
"epoch": 0.57,
"learning_rate": 1.8697157903648274e-05,
"loss": 0.567,
"step": 427
},
{
"epoch": 0.57,
"learning_rate": 1.869005261390877e-05,
"loss": 0.537,
"step": 428
},
{
"epoch": 0.57,
"learning_rate": 1.8682929359501338e-05,
"loss": 0.5481,
"step": 429
},
{
"epoch": 0.57,
"learning_rate": 1.8675788155151654e-05,
"loss": 0.5208,
"step": 430
},
{
"epoch": 0.57,
"learning_rate": 1.8668629015622498e-05,
"loss": 0.577,
"step": 431
},
{
"epoch": 0.57,
"learning_rate": 1.866145195571373e-05,
"loss": 0.5348,
"step": 432
},
{
"epoch": 0.58,
"learning_rate": 1.865425699026226e-05,
"loss": 0.5151,
"step": 433
},
{
"epoch": 0.58,
"learning_rate": 1.864704413414201e-05,
"loss": 0.5509,
"step": 434
},
{
"epoch": 0.58,
"learning_rate": 1.863981340226388e-05,
"loss": 0.5463,
"step": 435
},
{
"epoch": 0.58,
"learning_rate": 1.863256480957574e-05,
"loss": 0.6358,
"step": 436
},
{
"epoch": 0.58,
"learning_rate": 1.8625298371062367e-05,
"loss": 0.5462,
"step": 437
},
{
"epoch": 0.58,
"learning_rate": 1.8618014101745444e-05,
"loss": 0.546,
"step": 438
},
{
"epoch": 0.58,
"learning_rate": 1.86107120166835e-05,
"loss": 0.5507,
"step": 439
},
{
"epoch": 0.59,
"learning_rate": 1.860339213097191e-05,
"loss": 0.5176,
"step": 440
},
{
"epoch": 0.59,
"learning_rate": 1.8596054459742833e-05,
"loss": 0.5711,
"step": 441
},
{
"epoch": 0.59,
"learning_rate": 1.8588699018165202e-05,
"loss": 0.5908,
"step": 442
},
{
"epoch": 0.59,
"learning_rate": 1.858132582144469e-05,
"loss": 0.6022,
"step": 443
},
{
"epoch": 0.59,
"learning_rate": 1.8573934884823673e-05,
"loss": 0.6439,
"step": 444
},
{
"epoch": 0.59,
"learning_rate": 1.8566526223581194e-05,
"loss": 0.4982,
"step": 445
},
{
"epoch": 0.59,
"learning_rate": 1.855909985303294e-05,
"loss": 0.6417,
"step": 446
},
{
"epoch": 0.59,
"learning_rate": 1.855165578853121e-05,
"loss": 0.5724,
"step": 447
},
{
"epoch": 0.6,
"learning_rate": 1.8544194045464888e-05,
"loss": 0.4889,
"step": 448
},
{
"epoch": 0.6,
"learning_rate": 1.8536714639259387e-05,
"loss": 0.5331,
"step": 449
},
{
"epoch": 0.6,
"learning_rate": 1.8529217585376652e-05,
"loss": 0.5935,
"step": 450
},
{
"epoch": 0.6,
"learning_rate": 1.852170289931509e-05,
"loss": 0.5383,
"step": 451
},
{
"epoch": 0.6,
"learning_rate": 1.8514170596609592e-05,
"loss": 0.5171,
"step": 452
},
{
"epoch": 0.6,
"learning_rate": 1.8506620692831427e-05,
"loss": 0.5753,
"step": 453
},
{
"epoch": 0.6,
"learning_rate": 1.849905320358828e-05,
"loss": 0.582,
"step": 454
},
{
"epoch": 0.61,
"learning_rate": 1.849146814452418e-05,
"loss": 0.5363,
"step": 455
},
{
"epoch": 0.61,
"learning_rate": 1.848386553131947e-05,
"loss": 0.5283,
"step": 456
},
{
"epoch": 0.61,
"learning_rate": 1.8476245379690797e-05,
"loss": 0.6175,
"step": 457
},
{
"epoch": 0.61,
"learning_rate": 1.846860770539105e-05,
"loss": 0.5749,
"step": 458
},
{
"epoch": 0.61,
"learning_rate": 1.8460952524209355e-05,
"loss": 0.5476,
"step": 459
},
{
"epoch": 0.61,
"learning_rate": 1.845327985197102e-05,
"loss": 0.5045,
"step": 460
},
{
"epoch": 0.61,
"learning_rate": 1.8445589704537518e-05,
"loss": 0.5888,
"step": 461
},
{
"epoch": 0.61,
"learning_rate": 1.8437882097806436e-05,
"loss": 0.511,
"step": 462
},
{
"epoch": 0.62,
"learning_rate": 1.8430157047711473e-05,
"loss": 0.5411,
"step": 463
},
{
"epoch": 0.62,
"learning_rate": 1.8422414570222377e-05,
"loss": 0.5565,
"step": 464
},
{
"epoch": 0.62,
"learning_rate": 1.841465468134492e-05,
"loss": 0.5269,
"step": 465
},
{
"epoch": 0.62,
"learning_rate": 1.8406877397120872e-05,
"loss": 0.5228,
"step": 466
},
{
"epoch": 0.62,
"learning_rate": 1.8399082733627967e-05,
"loss": 0.5123,
"step": 467
},
{
"epoch": 0.62,
"learning_rate": 1.8391270706979864e-05,
"loss": 0.5226,
"step": 468
},
{
"epoch": 0.62,
"learning_rate": 1.838344133332611e-05,
"loss": 0.5915,
"step": 469
},
{
"epoch": 0.63,
"learning_rate": 1.8375594628852128e-05,
"loss": 0.587,
"step": 470
},
{
"epoch": 0.63,
"learning_rate": 1.8367730609779153e-05,
"loss": 0.5936,
"step": 471
},
{
"epoch": 0.63,
"learning_rate": 1.835984929236422e-05,
"loss": 0.5464,
"step": 472
},
{
"epoch": 0.63,
"learning_rate": 1.8351950692900127e-05,
"loss": 0.4809,
"step": 473
},
{
"epoch": 0.63,
"learning_rate": 1.8344034827715398e-05,
"loss": 0.5147,
"step": 474
},
{
"epoch": 0.63,
"learning_rate": 1.8336101713174242e-05,
"loss": 0.5597,
"step": 475
},
{
"epoch": 0.63,
"learning_rate": 1.832815136567654e-05,
"loss": 0.5808,
"step": 476
},
{
"epoch": 0.63,
"learning_rate": 1.8320183801657783e-05,
"loss": 0.5556,
"step": 477
},
{
"epoch": 0.64,
"learning_rate": 1.831219903758907e-05,
"loss": 0.5801,
"step": 478
},
{
"epoch": 0.64,
"learning_rate": 1.8304197089977042e-05,
"loss": 0.5319,
"step": 479
},
{
"epoch": 0.64,
"learning_rate": 1.8296177975363877e-05,
"loss": 0.5619,
"step": 480
},
{
"epoch": 0.64,
"learning_rate": 1.8288141710327224e-05,
"loss": 0.5532,
"step": 481
},
{
"epoch": 0.64,
"learning_rate": 1.8280088311480203e-05,
"loss": 0.5464,
"step": 482
},
{
"epoch": 0.64,
"learning_rate": 1.8272017795471345e-05,
"loss": 0.4504,
"step": 483
},
{
"epoch": 0.64,
"learning_rate": 1.8263930178984576e-05,
"loss": 0.516,
"step": 484
},
{
"epoch": 0.65,
"learning_rate": 1.825582547873916e-05,
"loss": 0.592,
"step": 485
},
{
"epoch": 0.65,
"learning_rate": 1.8247703711489684e-05,
"loss": 0.6012,
"step": 486
},
{
"epoch": 0.65,
"learning_rate": 1.8239564894026026e-05,
"loss": 0.4877,
"step": 487
},
{
"epoch": 0.65,
"learning_rate": 1.82314090431733e-05,
"loss": 0.6035,
"step": 488
},
{
"epoch": 0.65,
"learning_rate": 1.822323617579183e-05,
"loss": 0.5124,
"step": 489
},
{
"epoch": 0.65,
"learning_rate": 1.821504630877713e-05,
"loss": 0.5404,
"step": 490
},
{
"epoch": 0.65,
"learning_rate": 1.8206839459059843e-05,
"loss": 0.5361,
"step": 491
},
{
"epoch": 0.65,
"learning_rate": 1.8198615643605733e-05,
"loss": 0.5507,
"step": 492
},
{
"epoch": 0.66,
"learning_rate": 1.8190374879415634e-05,
"loss": 0.5185,
"step": 493
},
{
"epoch": 0.66,
"learning_rate": 1.8182117183525405e-05,
"loss": 0.536,
"step": 494
},
{
"epoch": 0.66,
"learning_rate": 1.8173842573005924e-05,
"loss": 0.5354,
"step": 495
},
{
"epoch": 0.66,
"learning_rate": 1.8165551064963026e-05,
"loss": 0.6247,
"step": 496
},
{
"epoch": 0.66,
"learning_rate": 1.8157242676537478e-05,
"loss": 0.5719,
"step": 497
},
{
"epoch": 0.66,
"learning_rate": 1.8148917424904952e-05,
"loss": 0.5484,
"step": 498
},
{
"epoch": 0.66,
"learning_rate": 1.814057532727597e-05,
"loss": 0.599,
"step": 499
},
{
"epoch": 0.67,
"learning_rate": 1.8132216400895882e-05,
"loss": 0.4973,
"step": 500
},
{
"epoch": 0.67,
"learning_rate": 1.8123840663044835e-05,
"loss": 0.6065,
"step": 501
},
{
"epoch": 0.67,
"learning_rate": 1.811544813103772e-05,
"loss": 0.5469,
"step": 502
},
{
"epoch": 0.67,
"learning_rate": 1.810703882222415e-05,
"loss": 0.5349,
"step": 503
},
{
"epoch": 0.67,
"learning_rate": 1.8098612753988424e-05,
"loss": 0.5713,
"step": 504
},
{
"epoch": 0.67,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.5541,
"step": 505
},
{
"epoch": 0.67,
"learning_rate": 1.8081710408960864e-05,
"loss": 0.4614,
"step": 506
},
{
"epoch": 0.67,
"learning_rate": 1.8073234167110715e-05,
"loss": 0.5755,
"step": 507
},
{
"epoch": 0.68,
"learning_rate": 1.806474123572169e-05,
"loss": 0.5647,
"step": 508
},
{
"epoch": 0.68,
"learning_rate": 1.8056231632350952e-05,
"loss": 0.5211,
"step": 509
},
{
"epoch": 0.68,
"learning_rate": 1.8047705374590133e-05,
"loss": 0.4948,
"step": 510
},
{
"epoch": 0.68,
"learning_rate": 1.80391624800653e-05,
"loss": 0.5539,
"step": 511
},
{
"epoch": 0.68,
"learning_rate": 1.8030602966436897e-05,
"loss": 0.553,
"step": 512
},
{
"epoch": 0.68,
"learning_rate": 1.8022026851399737e-05,
"loss": 0.5482,
"step": 513
},
{
"epoch": 0.68,
"learning_rate": 1.8013434152682946e-05,
"loss": 0.6039,
"step": 514
},
{
"epoch": 0.69,
"learning_rate": 1.8004824888049938e-05,
"loss": 0.5511,
"step": 515
},
{
"epoch": 0.69,
"learning_rate": 1.799619907529837e-05,
"loss": 0.5233,
"step": 516
},
{
"epoch": 0.69,
"learning_rate": 1.7987556732260108e-05,
"loss": 0.5196,
"step": 517
},
{
"epoch": 0.69,
"learning_rate": 1.7978897876801192e-05,
"loss": 0.5032,
"step": 518
},
{
"epoch": 0.69,
"learning_rate": 1.7970222526821797e-05,
"loss": 0.5984,
"step": 519
},
{
"epoch": 0.69,
"learning_rate": 1.7961530700256194e-05,
"loss": 0.5026,
"step": 520
},
{
"epoch": 0.69,
"learning_rate": 1.7952822415072723e-05,
"loss": 0.5421,
"step": 521
},
{
"epoch": 0.69,
"learning_rate": 1.7944097689273744e-05,
"loss": 0.5719,
"step": 522
},
{
"epoch": 0.7,
"learning_rate": 1.79353565408956e-05,
"loss": 0.5133,
"step": 523
},
{
"epoch": 0.7,
"learning_rate": 1.7926598988008584e-05,
"loss": 0.5987,
"step": 524
},
{
"epoch": 0.7,
"learning_rate": 1.7917825048716912e-05,
"loss": 0.4915,
"step": 525
},
{
"epoch": 0.7,
"learning_rate": 1.7909034741158668e-05,
"loss": 0.5648,
"step": 526
},
{
"epoch": 0.7,
"learning_rate": 1.7900228083505768e-05,
"loss": 0.525,
"step": 527
},
{
"epoch": 0.7,
"learning_rate": 1.789140509396394e-05,
"loss": 0.6012,
"step": 528
},
{
"epoch": 0.7,
"learning_rate": 1.788256579077266e-05,
"loss": 0.5617,
"step": 529
},
{
"epoch": 0.71,
"learning_rate": 1.787371019220515e-05,
"loss": 0.5533,
"step": 530
},
{
"epoch": 0.71,
"learning_rate": 1.7864838316568294e-05,
"loss": 0.503,
"step": 531
},
{
"epoch": 0.71,
"learning_rate": 1.7855950182202638e-05,
"loss": 0.5371,
"step": 532
},
{
"epoch": 0.71,
"learning_rate": 1.7847045807482347e-05,
"loss": 0.479,
"step": 533
},
{
"epoch": 0.71,
"learning_rate": 1.7838125210815143e-05,
"loss": 0.5253,
"step": 534
},
{
"epoch": 0.71,
"learning_rate": 1.782918841064229e-05,
"loss": 0.4655,
"step": 535
},
{
"epoch": 0.71,
"learning_rate": 1.782023542543855e-05,
"loss": 0.5105,
"step": 536
},
{
"epoch": 0.71,
"learning_rate": 1.7811266273712145e-05,
"loss": 0.579,
"step": 537
},
{
"epoch": 0.72,
"learning_rate": 1.7802280974004717e-05,
"loss": 0.5406,
"step": 538
},
{
"epoch": 0.72,
"learning_rate": 1.779327954489128e-05,
"loss": 0.5135,
"step": 539
},
{
"epoch": 0.72,
"learning_rate": 1.778426200498021e-05,
"loss": 0.5054,
"step": 540
},
{
"epoch": 0.72,
"learning_rate": 1.7775228372913176e-05,
"loss": 0.5448,
"step": 541
},
{
"epoch": 0.72,
"learning_rate": 1.776617866736512e-05,
"loss": 0.5911,
"step": 542
},
{
"epoch": 0.72,
"learning_rate": 1.77571129070442e-05,
"loss": 0.55,
"step": 543
},
{
"epoch": 0.72,
"learning_rate": 1.7748031110691778e-05,
"loss": 0.5268,
"step": 544
},
{
"epoch": 0.73,
"learning_rate": 1.7738933297082367e-05,
"loss": 0.5403,
"step": 545
},
{
"epoch": 0.73,
"learning_rate": 1.7729819485023574e-05,
"loss": 0.5282,
"step": 546
},
{
"epoch": 0.73,
"learning_rate": 1.77206896933561e-05,
"loss": 0.5774,
"step": 547
},
{
"epoch": 0.73,
"learning_rate": 1.7711543940953667e-05,
"loss": 0.5489,
"step": 548
},
{
"epoch": 0.73,
"learning_rate": 1.7702382246723004e-05,
"loss": 0.484,
"step": 549
},
{
"epoch": 0.73,
"learning_rate": 1.769320462960378e-05,
"loss": 0.5222,
"step": 550
},
{
"epoch": 0.73,
"learning_rate": 1.7684011108568593e-05,
"loss": 0.5264,
"step": 551
},
{
"epoch": 0.73,
"learning_rate": 1.7674801702622915e-05,
"loss": 0.5038,
"step": 552
},
{
"epoch": 0.74,
"learning_rate": 1.7665576430805053e-05,
"loss": 0.5301,
"step": 553
},
{
"epoch": 0.74,
"learning_rate": 1.765633531218612e-05,
"loss": 0.5571,
"step": 554
},
{
"epoch": 0.74,
"learning_rate": 1.764707836586999e-05,
"loss": 0.5062,
"step": 555
},
{
"epoch": 0.74,
"learning_rate": 1.7637805610993243e-05,
"loss": 0.4844,
"step": 556
},
{
"epoch": 0.74,
"learning_rate": 1.762851706672515e-05,
"loss": 0.4986,
"step": 557
},
{
"epoch": 0.74,
"learning_rate": 1.7619212752267628e-05,
"loss": 0.5016,
"step": 558
},
{
"epoch": 0.74,
"learning_rate": 1.760989268685519e-05,
"loss": 0.5088,
"step": 559
},
{
"epoch": 0.75,
"learning_rate": 1.7600556889754896e-05,
"loss": 0.5221,
"step": 560
},
{
"epoch": 0.75,
"learning_rate": 1.759120538026635e-05,
"loss": 0.5354,
"step": 561
},
{
"epoch": 0.75,
"learning_rate": 1.758183817772163e-05,
"loss": 0.5598,
"step": 562
},
{
"epoch": 0.75,
"learning_rate": 1.757245530148525e-05,
"loss": 0.5004,
"step": 563
},
{
"epoch": 0.75,
"learning_rate": 1.7563056770954133e-05,
"loss": 0.525,
"step": 564
},
{
"epoch": 0.75,
"learning_rate": 1.7553642605557558e-05,
"loss": 0.522,
"step": 565
},
{
"epoch": 0.75,
"learning_rate": 1.7544212824757133e-05,
"loss": 0.5609,
"step": 566
},
{
"epoch": 0.75,
"learning_rate": 1.7534767448046737e-05,
"loss": 0.5663,
"step": 567
},
{
"epoch": 0.76,
"learning_rate": 1.7525306494952498e-05,
"loss": 0.5628,
"step": 568
},
{
"epoch": 0.76,
"learning_rate": 1.7515829985032743e-05,
"loss": 0.5095,
"step": 569
},
{
"epoch": 0.76,
"learning_rate": 1.7506337937877956e-05,
"loss": 0.5501,
"step": 570
},
{
"epoch": 0.76,
"learning_rate": 1.749683037311074e-05,
"loss": 0.5341,
"step": 571
},
{
"epoch": 0.76,
"learning_rate": 1.7487307310385784e-05,
"loss": 0.5511,
"step": 572
},
{
"epoch": 0.76,
"learning_rate": 1.747776876938981e-05,
"loss": 0.5254,
"step": 573
},
{
"epoch": 0.76,
"learning_rate": 1.7468214769841542e-05,
"loss": 0.5699,
"step": 574
},
{
"epoch": 0.77,
"learning_rate": 1.7458645331491652e-05,
"loss": 0.4717,
"step": 575
},
{
"epoch": 0.77,
"learning_rate": 1.744906047412273e-05,
"loss": 0.5426,
"step": 576
},
{
"epoch": 0.77,
"learning_rate": 1.7439460217549257e-05,
"loss": 0.5514,
"step": 577
},
{
"epoch": 0.77,
"learning_rate": 1.7429844581617532e-05,
"loss": 0.5577,
"step": 578
},
{
"epoch": 0.77,
"learning_rate": 1.7420213586205647e-05,
"loss": 0.4914,
"step": 579
},
{
"epoch": 0.77,
"learning_rate": 1.741056725122346e-05,
"loss": 0.5572,
"step": 580
},
{
"epoch": 0.77,
"learning_rate": 1.740090559661252e-05,
"loss": 0.5265,
"step": 581
},
{
"epoch": 0.77,
"learning_rate": 1.7391228642346064e-05,
"loss": 0.5183,
"step": 582
},
{
"epoch": 0.78,
"learning_rate": 1.7381536408428948e-05,
"loss": 0.5206,
"step": 583
},
{
"epoch": 0.78,
"learning_rate": 1.737182891489762e-05,
"loss": 0.5716,
"step": 584
},
{
"epoch": 0.78,
"learning_rate": 1.7362106181820065e-05,
"loss": 0.4864,
"step": 585
},
{
"epoch": 0.78,
"learning_rate": 1.7352368229295786e-05,
"loss": 0.4861,
"step": 586
},
{
"epoch": 0.78,
"learning_rate": 1.7342615077455732e-05,
"loss": 0.5279,
"step": 587
},
{
"epoch": 0.78,
"learning_rate": 1.733284674646229e-05,
"loss": 0.5287,
"step": 588
},
{
"epoch": 0.78,
"learning_rate": 1.7323063256509212e-05,
"loss": 0.4958,
"step": 589
},
{
"epoch": 0.79,
"learning_rate": 1.73132646278216e-05,
"loss": 0.5163,
"step": 590
},
{
"epoch": 0.79,
"learning_rate": 1.730345088065584e-05,
"loss": 0.4921,
"step": 591
},
{
"epoch": 0.79,
"learning_rate": 1.7293622035299583e-05,
"loss": 0.5565,
"step": 592
},
{
"epoch": 0.79,
"learning_rate": 1.7283778112071683e-05,
"loss": 0.5082,
"step": 593
},
{
"epoch": 0.79,
"learning_rate": 1.727391913132217e-05,
"loss": 0.5474,
"step": 594
},
{
"epoch": 0.79,
"learning_rate": 1.72640451134322e-05,
"loss": 0.5652,
"step": 595
},
{
"epoch": 0.79,
"learning_rate": 1.725415607881401e-05,
"loss": 0.5298,
"step": 596
},
{
"epoch": 0.79,
"learning_rate": 1.7244252047910893e-05,
"loss": 0.5521,
"step": 597
},
{
"epoch": 0.8,
"learning_rate": 1.7234333041197127e-05,
"loss": 0.5971,
"step": 598
},
{
"epoch": 0.8,
"learning_rate": 1.722439907917796e-05,
"loss": 0.5095,
"step": 599
},
{
"epoch": 0.8,
"learning_rate": 1.721445018238956e-05,
"loss": 0.4946,
"step": 600
},
{
"epoch": 0.8,
"learning_rate": 1.7204486371398953e-05,
"loss": 0.4728,
"step": 601
},
{
"epoch": 0.8,
"learning_rate": 1.7194507666804016e-05,
"loss": 0.5162,
"step": 602
},
{
"epoch": 0.8,
"learning_rate": 1.7184514089233403e-05,
"loss": 0.5081,
"step": 603
},
{
"epoch": 0.8,
"learning_rate": 1.7174505659346516e-05,
"loss": 0.6207,
"step": 604
},
{
"epoch": 0.81,
"learning_rate": 1.7164482397833464e-05,
"loss": 0.4926,
"step": 605
},
{
"epoch": 0.81,
"learning_rate": 1.715444432541501e-05,
"loss": 0.6121,
"step": 606
},
{
"epoch": 0.81,
"learning_rate": 1.714439146284255e-05,
"loss": 0.5255,
"step": 607
},
{
"epoch": 0.81,
"learning_rate": 1.7134323830898036e-05,
"loss": 0.4981,
"step": 608
},
{
"epoch": 0.81,
"learning_rate": 1.7124241450393967e-05,
"loss": 0.5991,
"step": 609
},
{
"epoch": 0.81,
"learning_rate": 1.711414434217332e-05,
"loss": 0.5851,
"step": 610
},
{
"epoch": 0.81,
"learning_rate": 1.710403252710953e-05,
"loss": 0.5345,
"step": 611
},
{
"epoch": 0.81,
"learning_rate": 1.709390602610643e-05,
"loss": 0.4922,
"step": 612
},
{
"epoch": 0.82,
"learning_rate": 1.7083764860098206e-05,
"loss": 0.5122,
"step": 613
},
{
"epoch": 0.82,
"learning_rate": 1.7073609050049368e-05,
"loss": 0.526,
"step": 614
},
{
"epoch": 0.82,
"learning_rate": 1.7063438616954703e-05,
"loss": 0.5571,
"step": 615
},
{
"epoch": 0.82,
"learning_rate": 1.7053253581839216e-05,
"loss": 0.5699,
"step": 616
},
{
"epoch": 0.82,
"learning_rate": 1.7043053965758104e-05,
"loss": 0.5517,
"step": 617
},
{
"epoch": 0.82,
"learning_rate": 1.7032839789796712e-05,
"loss": 0.5249,
"step": 618
},
{
"epoch": 0.82,
"learning_rate": 1.7022611075070476e-05,
"loss": 0.5561,
"step": 619
},
{
"epoch": 0.83,
"learning_rate": 1.7012367842724887e-05,
"loss": 0.5201,
"step": 620
},
{
"epoch": 0.83,
"learning_rate": 1.700211011393546e-05,
"loss": 0.576,
"step": 621
},
{
"epoch": 0.83,
"learning_rate": 1.6991837909907668e-05,
"loss": 0.5241,
"step": 622
},
{
"epoch": 0.83,
"learning_rate": 1.6981551251876905e-05,
"loss": 0.5167,
"step": 623
},
{
"epoch": 0.83,
"learning_rate": 1.6971250161108456e-05,
"loss": 0.5414,
"step": 624
},
{
"epoch": 0.83,
"learning_rate": 1.696093465889743e-05,
"loss": 0.4829,
"step": 625
},
{
"epoch": 0.83,
"learning_rate": 1.6950604766568745e-05,
"loss": 0.5224,
"step": 626
},
{
"epoch": 0.83,
"learning_rate": 1.694026050547705e-05,
"loss": 0.5047,
"step": 627
},
{
"epoch": 0.84,
"learning_rate": 1.69299018970067e-05,
"loss": 0.531,
"step": 628
},
{
"epoch": 0.84,
"learning_rate": 1.6919528962571727e-05,
"loss": 0.6254,
"step": 629
},
{
"epoch": 0.84,
"learning_rate": 1.6909141723615757e-05,
"loss": 0.5142,
"step": 630
},
{
"epoch": 0.84,
"learning_rate": 1.6898740201612006e-05,
"loss": 0.5683,
"step": 631
},
{
"epoch": 0.84,
"learning_rate": 1.68883244180632e-05,
"loss": 0.5562,
"step": 632
},
{
"epoch": 0.84,
"learning_rate": 1.687789439450156e-05,
"loss": 0.5212,
"step": 633
},
{
"epoch": 0.84,
"learning_rate": 1.6867450152488737e-05,
"loss": 0.5286,
"step": 634
},
{
"epoch": 0.84,
"learning_rate": 1.6856991713615778e-05,
"loss": 0.5047,
"step": 635
},
{
"epoch": 0.85,
"learning_rate": 1.6846519099503082e-05,
"loss": 0.4734,
"step": 636
},
{
"epoch": 0.85,
"learning_rate": 1.6836032331800353e-05,
"loss": 0.5313,
"step": 637
},
{
"epoch": 0.85,
"learning_rate": 1.6825531432186545e-05,
"loss": 0.5197,
"step": 638
},
{
"epoch": 0.85,
"learning_rate": 1.681501642236983e-05,
"loss": 0.5809,
"step": 639
},
{
"epoch": 0.85,
"learning_rate": 1.6804487324087558e-05,
"loss": 0.5486,
"step": 640
},
{
"epoch": 0.85,
"learning_rate": 1.6793944159106193e-05,
"loss": 0.5485,
"step": 641
},
{
"epoch": 0.85,
"learning_rate": 1.678338694922128e-05,
"loss": 0.5458,
"step": 642
},
{
"epoch": 0.86,
"learning_rate": 1.6772815716257414e-05,
"loss": 0.5027,
"step": 643
},
{
"epoch": 0.86,
"learning_rate": 1.6762230482068154e-05,
"loss": 0.596,
"step": 644
},
{
"epoch": 0.86,
"learning_rate": 1.675163126853602e-05,
"loss": 0.4872,
"step": 645
},
{
"epoch": 0.86,
"learning_rate": 1.674101809757243e-05,
"loss": 0.5911,
"step": 646
},
{
"epoch": 0.86,
"learning_rate": 1.673039099111765e-05,
"loss": 0.5567,
"step": 647
},
{
"epoch": 0.86,
"learning_rate": 1.6719749971140756e-05,
"loss": 0.5006,
"step": 648
},
{
"epoch": 0.86,
"learning_rate": 1.6709095059639592e-05,
"loss": 0.4825,
"step": 649
},
{
"epoch": 0.86,
"learning_rate": 1.6698426278640714e-05,
"loss": 0.5711,
"step": 650
},
{
"epoch": 0.87,
"learning_rate": 1.6687743650199357e-05,
"loss": 0.4992,
"step": 651
},
{
"epoch": 0.87,
"learning_rate": 1.6677047196399366e-05,
"loss": 0.5349,
"step": 652
},
{
"epoch": 0.87,
"learning_rate": 1.666633693935319e-05,
"loss": 0.546,
"step": 653
},
{
"epoch": 0.87,
"learning_rate": 1.6655612901201795e-05,
"loss": 0.5331,
"step": 654
},
{
"epoch": 0.87,
"learning_rate": 1.6644875104114643e-05,
"loss": 0.5573,
"step": 655
},
{
"epoch": 0.87,
"learning_rate": 1.6634123570289635e-05,
"loss": 0.5205,
"step": 656
},
{
"epoch": 0.87,
"learning_rate": 1.662335832195308e-05,
"loss": 0.5296,
"step": 657
},
{
"epoch": 0.88,
"learning_rate": 1.6612579381359624e-05,
"loss": 0.498,
"step": 658
},
{
"epoch": 0.88,
"learning_rate": 1.6601786770792233e-05,
"loss": 0.6091,
"step": 659
},
{
"epoch": 0.88,
"learning_rate": 1.6590980512562126e-05,
"loss": 0.4497,
"step": 660
},
{
"epoch": 0.88,
"learning_rate": 1.6580160629008728e-05,
"loss": 0.5771,
"step": 661
},
{
"epoch": 0.88,
"learning_rate": 1.6569327142499644e-05,
"loss": 0.5265,
"step": 662
},
{
"epoch": 0.88,
"learning_rate": 1.6558480075430594e-05,
"loss": 0.4623,
"step": 663
},
{
"epoch": 0.88,
"learning_rate": 1.6547619450225373e-05,
"loss": 0.4838,
"step": 664
},
{
"epoch": 0.88,
"learning_rate": 1.6536745289335803e-05,
"loss": 0.5203,
"step": 665
},
{
"epoch": 0.89,
"learning_rate": 1.6525857615241686e-05,
"loss": 0.5305,
"step": 666
},
{
"epoch": 0.89,
"learning_rate": 1.651495645045077e-05,
"loss": 0.4639,
"step": 667
},
{
"epoch": 0.89,
"learning_rate": 1.6504041817498676e-05,
"loss": 0.5365,
"step": 668
},
{
"epoch": 0.89,
"learning_rate": 1.6493113738948885e-05,
"loss": 0.5131,
"step": 669
},
{
"epoch": 0.89,
"learning_rate": 1.648217223739266e-05,
"loss": 0.538,
"step": 670
},
{
"epoch": 0.89,
"learning_rate": 1.6471217335449014e-05,
"loss": 0.5716,
"step": 671
},
{
"epoch": 0.89,
"learning_rate": 1.6460249055764664e-05,
"loss": 0.5852,
"step": 672
},
{
"epoch": 0.9,
"learning_rate": 1.6449267421013994e-05,
"loss": 0.4646,
"step": 673
},
{
"epoch": 0.9,
"learning_rate": 1.643827245389898e-05,
"loss": 0.5744,
"step": 674
},
{
"epoch": 0.9,
"learning_rate": 1.6427264177149168e-05,
"loss": 0.5326,
"step": 675
},
{
"epoch": 0.9,
"learning_rate": 1.6416242613521612e-05,
"loss": 0.5126,
"step": 676
},
{
"epoch": 0.9,
"learning_rate": 1.6405207785800843e-05,
"loss": 0.5071,
"step": 677
},
{
"epoch": 0.9,
"learning_rate": 1.6394159716798807e-05,
"loss": 0.5211,
"step": 678
},
{
"epoch": 0.9,
"learning_rate": 1.638309842935482e-05,
"loss": 0.5045,
"step": 679
},
{
"epoch": 0.9,
"learning_rate": 1.6372023946335534e-05,
"loss": 0.5384,
"step": 680
},
{
"epoch": 0.91,
"learning_rate": 1.636093629063487e-05,
"loss": 0.5004,
"step": 681
},
{
"epoch": 0.91,
"learning_rate": 1.634983548517398e-05,
"loss": 0.4925,
"step": 682
},
{
"epoch": 0.91,
"learning_rate": 1.633872155290121e-05,
"loss": 0.5373,
"step": 683
},
{
"epoch": 0.91,
"learning_rate": 1.632759451679204e-05,
"loss": 0.4419,
"step": 684
},
{
"epoch": 0.91,
"learning_rate": 1.6316454399849028e-05,
"loss": 0.5154,
"step": 685
},
{
"epoch": 0.91,
"learning_rate": 1.6305301225101785e-05,
"loss": 0.556,
"step": 686
},
{
"epoch": 0.91,
"learning_rate": 1.6294135015606914e-05,
"loss": 0.5158,
"step": 687
},
{
"epoch": 0.92,
"learning_rate": 1.628295579444796e-05,
"loss": 0.5772,
"step": 688
},
{
"epoch": 0.92,
"learning_rate": 1.6271763584735373e-05,
"loss": 0.5327,
"step": 689
},
{
"epoch": 0.92,
"learning_rate": 1.6260558409606444e-05,
"loss": 0.5582,
"step": 690
},
{
"epoch": 0.92,
"learning_rate": 1.6249340292225282e-05,
"loss": 0.5634,
"step": 691
},
{
"epoch": 0.92,
"learning_rate": 1.6238109255782738e-05,
"loss": 0.5778,
"step": 692
},
{
"epoch": 0.92,
"learning_rate": 1.6226865323496373e-05,
"loss": 0.5424,
"step": 693
},
{
"epoch": 0.92,
"learning_rate": 1.621560851861041e-05,
"loss": 0.5612,
"step": 694
},
{
"epoch": 0.92,
"learning_rate": 1.6204338864395683e-05,
"loss": 0.5013,
"step": 695
},
{
"epoch": 0.93,
"learning_rate": 1.6193056384149586e-05,
"loss": 0.4523,
"step": 696
},
{
"epoch": 0.93,
"learning_rate": 1.618176110119603e-05,
"loss": 0.5348,
"step": 697
},
{
"epoch": 0.93,
"learning_rate": 1.6170453038885394e-05,
"loss": 0.5137,
"step": 698
},
{
"epoch": 0.93,
"learning_rate": 1.6159132220594474e-05,
"loss": 0.5526,
"step": 699
},
{
"epoch": 0.93,
"learning_rate": 1.614779866972643e-05,
"loss": 0.5209,
"step": 700
},
{
"epoch": 0.93,
"learning_rate": 1.6136452409710757e-05,
"loss": 0.5774,
"step": 701
},
{
"epoch": 0.93,
"learning_rate": 1.6125093464003215e-05,
"loss": 0.5387,
"step": 702
},
{
"epoch": 0.94,
"learning_rate": 1.6113721856085783e-05,
"loss": 0.5866,
"step": 703
},
{
"epoch": 0.94,
"learning_rate": 1.6102337609466627e-05,
"loss": 0.5322,
"step": 704
},
{
"epoch": 0.94,
"learning_rate": 1.6090940747680033e-05,
"loss": 0.5236,
"step": 705
},
{
"epoch": 0.94,
"learning_rate": 1.6079531294286374e-05,
"loss": 0.5094,
"step": 706
},
{
"epoch": 0.94,
"learning_rate": 1.6068109272872044e-05,
"loss": 0.486,
"step": 707
},
{
"epoch": 0.94,
"learning_rate": 1.6056674707049423e-05,
"loss": 0.5285,
"step": 708
},
{
"epoch": 0.94,
"learning_rate": 1.6045227620456825e-05,
"loss": 0.5118,
"step": 709
},
{
"epoch": 0.94,
"learning_rate": 1.6033768036758446e-05,
"loss": 0.5344,
"step": 710
},
{
"epoch": 0.95,
"learning_rate": 1.6022295979644314e-05,
"loss": 0.4874,
"step": 711
},
{
"epoch": 0.95,
"learning_rate": 1.6010811472830253e-05,
"loss": 0.4917,
"step": 712
},
{
"epoch": 0.95,
"learning_rate": 1.599931454005781e-05,
"loss": 0.5593,
"step": 713
},
{
"epoch": 0.95,
"learning_rate": 1.5987805205094225e-05,
"loss": 0.5395,
"step": 714
},
{
"epoch": 0.95,
"learning_rate": 1.597628349173239e-05,
"loss": 0.5768,
"step": 715
},
{
"epoch": 0.95,
"learning_rate": 1.596474942379076e-05,
"loss": 0.5764,
"step": 716
},
{
"epoch": 0.95,
"learning_rate": 1.595320302511335e-05,
"loss": 0.566,
"step": 717
},
{
"epoch": 0.96,
"learning_rate": 1.5941644319569665e-05,
"loss": 0.4719,
"step": 718
},
{
"epoch": 0.96,
"learning_rate": 1.5930073331054646e-05,
"loss": 0.4876,
"step": 719
},
{
"epoch": 0.96,
"learning_rate": 1.5918490083488625e-05,
"loss": 0.5392,
"step": 720
},
{
"epoch": 0.96,
"learning_rate": 1.590689460081728e-05,
"loss": 0.5153,
"step": 721
},
{
"epoch": 0.96,
"learning_rate": 1.5895286907011582e-05,
"loss": 0.5176,
"step": 722
},
{
"epoch": 0.96,
"learning_rate": 1.5883667026067745e-05,
"loss": 0.5485,
"step": 723
},
{
"epoch": 0.96,
"learning_rate": 1.587203498200718e-05,
"loss": 0.5325,
"step": 724
},
{
"epoch": 0.96,
"learning_rate": 1.5860390798876435e-05,
"loss": 0.5387,
"step": 725
},
{
"epoch": 0.97,
"learning_rate": 1.584873450074716e-05,
"loss": 0.5527,
"step": 726
},
{
"epoch": 0.97,
"learning_rate": 1.583706611171605e-05,
"loss": 0.5442,
"step": 727
},
{
"epoch": 0.97,
"learning_rate": 1.582538565590479e-05,
"loss": 0.518,
"step": 728
},
{
"epoch": 0.97,
"learning_rate": 1.581369315746001e-05,
"loss": 0.5603,
"step": 729
},
{
"epoch": 0.97,
"learning_rate": 1.5801988640553246e-05,
"loss": 0.5076,
"step": 730
},
{
"epoch": 0.97,
"learning_rate": 1.5790272129380864e-05,
"loss": 0.4687,
"step": 731
},
{
"epoch": 0.97,
"learning_rate": 1.5778543648164034e-05,
"loss": 0.4937,
"step": 732
},
{
"epoch": 0.98,
"learning_rate": 1.5766803221148676e-05,
"loss": 0.517,
"step": 733
},
{
"epoch": 0.98,
"learning_rate": 1.575505087260539e-05,
"loss": 0.5697,
"step": 734
},
{
"epoch": 0.98,
"learning_rate": 1.5743286626829437e-05,
"loss": 0.5444,
"step": 735
},
{
"epoch": 0.98,
"learning_rate": 1.5731510508140666e-05,
"loss": 0.5673,
"step": 736
},
{
"epoch": 0.98,
"learning_rate": 1.571972254088347e-05,
"loss": 0.5083,
"step": 737
},
{
"epoch": 0.98,
"learning_rate": 1.5707922749426735e-05,
"loss": 0.396,
"step": 738
},
{
"epoch": 0.98,
"learning_rate": 1.56961111581638e-05,
"loss": 0.5268,
"step": 739
},
{
"epoch": 0.98,
"learning_rate": 1.568428779151238e-05,
"loss": 0.4363,
"step": 740
},
{
"epoch": 0.99,
"learning_rate": 1.5672452673914542e-05,
"loss": 0.4911,
"step": 741
},
{
"epoch": 0.99,
"learning_rate": 1.5660605829836658e-05,
"loss": 0.4721,
"step": 742
},
{
"epoch": 0.99,
"learning_rate": 1.564874728376932e-05,
"loss": 0.5447,
"step": 743
},
{
"epoch": 0.99,
"learning_rate": 1.5636877060227322e-05,
"loss": 0.5587,
"step": 744
},
{
"epoch": 0.99,
"learning_rate": 1.56249951837496e-05,
"loss": 0.5235,
"step": 745
},
{
"epoch": 0.99,
"learning_rate": 1.561310167889918e-05,
"loss": 0.4723,
"step": 746
},
{
"epoch": 0.99,
"learning_rate": 1.5601196570263124e-05,
"loss": 0.4636,
"step": 747
},
{
"epoch": 1.0,
"learning_rate": 1.5589279882452476e-05,
"loss": 0.522,
"step": 748
},
{
"epoch": 1.0,
"learning_rate": 1.5577351640102226e-05,
"loss": 0.5531,
"step": 749
},
{
"epoch": 1.0,
"learning_rate": 1.5565411867871257e-05,
"loss": 0.4981,
"step": 750
},
{
"epoch": 1.0,
"learning_rate": 1.5553460590442263e-05,
"loss": 0.5169,
"step": 751
},
{
"epoch": 1.0,
"learning_rate": 1.554149783252175e-05,
"loss": 0.4997,
"step": 752
},
{
"epoch": 1.0,
"learning_rate": 1.5529523618839937e-05,
"loss": 0.3923,
"step": 753
},
{
"epoch": 1.0,
"learning_rate": 1.5517537974150742e-05,
"loss": 0.4028,
"step": 754
},
{
"epoch": 1.0,
"learning_rate": 1.5505540923231698e-05,
"loss": 0.3686,
"step": 755
},
{
"epoch": 1.01,
"learning_rate": 1.5493532490883925e-05,
"loss": 0.4007,
"step": 756
},
{
"epoch": 1.01,
"learning_rate": 1.5481512701932074e-05,
"loss": 0.3535,
"step": 757
},
{
"epoch": 1.01,
"learning_rate": 1.5469481581224274e-05,
"loss": 0.4131,
"step": 758
},
{
"epoch": 1.01,
"learning_rate": 1.5457439153632063e-05,
"loss": 0.3907,
"step": 759
},
{
"epoch": 1.01,
"learning_rate": 1.5445385444050377e-05,
"loss": 0.3804,
"step": 760
},
{
"epoch": 1.01,
"learning_rate": 1.5433320477397467e-05,
"loss": 0.4405,
"step": 761
},
{
"epoch": 1.01,
"learning_rate": 1.5421244278614847e-05,
"loss": 0.44,
"step": 762
},
{
"epoch": 1.02,
"learning_rate": 1.540915687266726e-05,
"loss": 0.4553,
"step": 763
},
{
"epoch": 1.02,
"learning_rate": 1.5397058284542606e-05,
"loss": 0.3886,
"step": 764
},
{
"epoch": 1.02,
"learning_rate": 1.5384948539251922e-05,
"loss": 0.4194,
"step": 765
},
{
"epoch": 1.02,
"learning_rate": 1.5372827661829285e-05,
"loss": 0.4104,
"step": 766
},
{
"epoch": 1.02,
"learning_rate": 1.5360695677331804e-05,
"loss": 0.4086,
"step": 767
},
{
"epoch": 1.02,
"learning_rate": 1.534855261083954e-05,
"loss": 0.4544,
"step": 768
},
{
"epoch": 1.02,
"learning_rate": 1.5336398487455473e-05,
"loss": 0.4349,
"step": 769
},
{
"epoch": 1.02,
"learning_rate": 1.5324233332305426e-05,
"loss": 0.4625,
"step": 770
},
{
"epoch": 1.03,
"learning_rate": 1.5312057170538033e-05,
"loss": 0.3674,
"step": 771
},
{
"epoch": 1.03,
"learning_rate": 1.5299870027324694e-05,
"loss": 0.3616,
"step": 772
},
{
"epoch": 1.03,
"learning_rate": 1.5287671927859494e-05,
"loss": 0.3637,
"step": 773
},
{
"epoch": 1.03,
"learning_rate": 1.5275462897359175e-05,
"loss": 0.3691,
"step": 774
},
{
"epoch": 1.03,
"learning_rate": 1.5263242961063075e-05,
"loss": 0.4458,
"step": 775
},
{
"epoch": 1.03,
"learning_rate": 1.525101214423308e-05,
"loss": 0.4362,
"step": 776
},
{
"epoch": 1.03,
"learning_rate": 1.5238770472153563e-05,
"loss": 0.3427,
"step": 777
},
{
"epoch": 1.04,
"learning_rate": 1.5226517970131345e-05,
"loss": 0.3899,
"step": 778
},
{
"epoch": 1.04,
"learning_rate": 1.521425466349563e-05,
"loss": 0.4188,
"step": 779
},
{
"epoch": 1.04,
"learning_rate": 1.5201980577597955e-05,
"loss": 0.4245,
"step": 780
},
{
"epoch": 1.04,
"learning_rate": 1.5189695737812153e-05,
"loss": 0.4518,
"step": 781
},
{
"epoch": 1.04,
"learning_rate": 1.5177400169534276e-05,
"loss": 0.4284,
"step": 782
},
{
"epoch": 1.04,
"learning_rate": 1.516509389818256e-05,
"loss": 0.4311,
"step": 783
},
{
"epoch": 1.04,
"learning_rate": 1.5152776949197372e-05,
"loss": 0.4098,
"step": 784
},
{
"epoch": 1.04,
"learning_rate": 1.5140449348041136e-05,
"loss": 0.3843,
"step": 785
},
{
"epoch": 1.05,
"learning_rate": 1.5128111120198314e-05,
"loss": 0.4021,
"step": 786
},
{
"epoch": 1.05,
"learning_rate": 1.5115762291175334e-05,
"loss": 0.4332,
"step": 787
},
{
"epoch": 1.05,
"learning_rate": 1.5103402886500526e-05,
"loss": 0.3936,
"step": 788
},
{
"epoch": 1.05,
"learning_rate": 1.5091032931724096e-05,
"loss": 0.4141,
"step": 789
},
{
"epoch": 1.05,
"learning_rate": 1.5078652452418063e-05,
"loss": 0.3487,
"step": 790
},
{
"epoch": 1.05,
"learning_rate": 1.5066261474176184e-05,
"loss": 0.4175,
"step": 791
},
{
"epoch": 1.05,
"learning_rate": 1.505386002261394e-05,
"loss": 0.4036,
"step": 792
},
{
"epoch": 1.06,
"learning_rate": 1.5041448123368454e-05,
"loss": 0.4055,
"step": 793
},
{
"epoch": 1.06,
"learning_rate": 1.5029025802098449e-05,
"loss": 0.414,
"step": 794
},
{
"epoch": 1.06,
"learning_rate": 1.501659308448419e-05,
"loss": 0.3485,
"step": 795
},
{
"epoch": 1.06,
"learning_rate": 1.5004149996227437e-05,
"loss": 0.4009,
"step": 796
},
{
"epoch": 1.06,
"learning_rate": 1.499169656305139e-05,
"loss": 0.4709,
"step": 797
},
{
"epoch": 1.06,
"learning_rate": 1.4979232810700638e-05,
"loss": 0.3868,
"step": 798
},
{
"epoch": 1.06,
"learning_rate": 1.4966758764941084e-05,
"loss": 0.3733,
"step": 799
},
{
"epoch": 1.06,
"learning_rate": 1.4954274451559938e-05,
"loss": 0.3761,
"step": 800
},
{
"epoch": 1.07,
"learning_rate": 1.4941779896365617e-05,
"loss": 0.3985,
"step": 801
},
{
"epoch": 1.07,
"learning_rate": 1.4929275125187712e-05,
"loss": 0.4211,
"step": 802
},
{
"epoch": 1.07,
"learning_rate": 1.491676016387694e-05,
"loss": 0.3833,
"step": 803
},
{
"epoch": 1.07,
"learning_rate": 1.4904235038305084e-05,
"loss": 0.443,
"step": 804
},
{
"epoch": 1.07,
"learning_rate": 1.4891699774364928e-05,
"loss": 0.4012,
"step": 805
},
{
"epoch": 1.07,
"learning_rate": 1.4879154397970228e-05,
"loss": 0.4254,
"step": 806
},
{
"epoch": 1.07,
"learning_rate": 1.4866598935055636e-05,
"loss": 0.4091,
"step": 807
},
{
"epoch": 1.08,
"learning_rate": 1.4854033411576659e-05,
"loss": 0.4144,
"step": 808
},
{
"epoch": 1.08,
"learning_rate": 1.4841457853509606e-05,
"loss": 0.3817,
"step": 809
},
{
"epoch": 1.08,
"learning_rate": 1.4828872286851526e-05,
"loss": 0.4129,
"step": 810
},
{
"epoch": 1.08,
"learning_rate": 1.481627673762015e-05,
"loss": 0.3956,
"step": 811
},
{
"epoch": 1.08,
"learning_rate": 1.4803671231853867e-05,
"loss": 0.4042,
"step": 812
},
{
"epoch": 1.08,
"learning_rate": 1.4791055795611623e-05,
"loss": 0.4099,
"step": 813
},
{
"epoch": 1.08,
"learning_rate": 1.4778430454972919e-05,
"loss": 0.4024,
"step": 814
},
{
"epoch": 1.08,
"learning_rate": 1.4765795236037708e-05,
"loss": 0.4296,
"step": 815
},
{
"epoch": 1.09,
"learning_rate": 1.4753150164926376e-05,
"loss": 0.3707,
"step": 816
},
{
"epoch": 1.09,
"learning_rate": 1.474049526777968e-05,
"loss": 0.3914,
"step": 817
},
{
"epoch": 1.09,
"learning_rate": 1.472783057075868e-05,
"loss": 0.3528,
"step": 818
},
{
"epoch": 1.09,
"learning_rate": 1.4715156100044693e-05,
"loss": 0.4084,
"step": 819
},
{
"epoch": 1.09,
"learning_rate": 1.4702471881839262e-05,
"loss": 0.3999,
"step": 820
},
{
"epoch": 1.09,
"learning_rate": 1.4689777942364054e-05,
"loss": 0.3552,
"step": 821
},
{
"epoch": 1.09,
"learning_rate": 1.4677074307860846e-05,
"loss": 0.4458,
"step": 822
},
{
"epoch": 1.1,
"learning_rate": 1.4664361004591459e-05,
"loss": 0.4254,
"step": 823
},
{
"epoch": 1.1,
"learning_rate": 1.4651638058837695e-05,
"loss": 0.3912,
"step": 824
},
{
"epoch": 1.1,
"learning_rate": 1.4638905496901292e-05,
"loss": 0.3838,
"step": 825
},
{
"epoch": 1.1,
"learning_rate": 1.4626163345103874e-05,
"loss": 0.3971,
"step": 826
},
{
"epoch": 1.1,
"learning_rate": 1.461341162978688e-05,
"loss": 0.3865,
"step": 827
},
{
"epoch": 1.1,
"learning_rate": 1.4600650377311523e-05,
"loss": 0.4375,
"step": 828
},
{
"epoch": 1.1,
"learning_rate": 1.4587879614058733e-05,
"loss": 0.4622,
"step": 829
},
{
"epoch": 1.1,
"learning_rate": 1.4575099366429102e-05,
"loss": 0.4123,
"step": 830
},
{
"epoch": 1.11,
"learning_rate": 1.4562309660842826e-05,
"loss": 0.4301,
"step": 831
},
{
"epoch": 1.11,
"learning_rate": 1.4549510523739652e-05,
"loss": 0.4209,
"step": 832
},
{
"epoch": 1.11,
"learning_rate": 1.453670198157883e-05,
"loss": 0.375,
"step": 833
},
{
"epoch": 1.11,
"learning_rate": 1.452388406083905e-05,
"loss": 0.4005,
"step": 834
},
{
"epoch": 1.11,
"learning_rate": 1.451105678801839e-05,
"loss": 0.3901,
"step": 835
},
{
"epoch": 1.11,
"learning_rate": 1.4498220189634257e-05,
"loss": 0.4246,
"step": 836
},
{
"epoch": 1.11,
"learning_rate": 1.4485374292223347e-05,
"loss": 0.3675,
"step": 837
},
{
"epoch": 1.12,
"learning_rate": 1.4472519122341566e-05,
"loss": 0.4222,
"step": 838
},
{
"epoch": 1.12,
"learning_rate": 1.4459654706563999e-05,
"loss": 0.3685,
"step": 839
},
{
"epoch": 1.12,
"learning_rate": 1.4446781071484844e-05,
"loss": 0.4442,
"step": 840
},
{
"epoch": 1.12,
"learning_rate": 1.4433898243717351e-05,
"loss": 0.3833,
"step": 841
},
{
"epoch": 1.12,
"learning_rate": 1.4421006249893779e-05,
"loss": 0.4315,
"step": 842
},
{
"epoch": 1.12,
"learning_rate": 1.4408105116665336e-05,
"loss": 0.4632,
"step": 843
},
{
"epoch": 1.12,
"learning_rate": 1.439519487070212e-05,
"loss": 0.3585,
"step": 844
},
{
"epoch": 1.12,
"learning_rate": 1.438227553869307e-05,
"loss": 0.4242,
"step": 845
},
{
"epoch": 1.13,
"learning_rate": 1.4369347147345916e-05,
"loss": 0.4203,
"step": 846
},
{
"epoch": 1.13,
"learning_rate": 1.4356409723387092e-05,
"loss": 0.3878,
"step": 847
},
{
"epoch": 1.13,
"learning_rate": 1.4343463293561734e-05,
"loss": 0.4562,
"step": 848
},
{
"epoch": 1.13,
"learning_rate": 1.4330507884633582e-05,
"loss": 0.3938,
"step": 849
},
{
"epoch": 1.13,
"learning_rate": 1.4317543523384928e-05,
"loss": 0.4618,
"step": 850
},
{
"epoch": 1.13,
"learning_rate": 1.4304570236616595e-05,
"loss": 0.4335,
"step": 851
},
{
"epoch": 1.13,
"learning_rate": 1.4291588051147838e-05,
"loss": 0.4421,
"step": 852
},
{
"epoch": 1.14,
"learning_rate": 1.427859699381631e-05,
"loss": 0.3868,
"step": 853
},
{
"epoch": 1.14,
"learning_rate": 1.4265597091478015e-05,
"loss": 0.3885,
"step": 854
},
{
"epoch": 1.14,
"learning_rate": 1.4252588371007228e-05,
"loss": 0.396,
"step": 855
},
{
"epoch": 1.14,
"learning_rate": 1.4239570859296468e-05,
"loss": 0.414,
"step": 856
},
{
"epoch": 1.14,
"learning_rate": 1.422654458325642e-05,
"loss": 0.3773,
"step": 857
},
{
"epoch": 1.14,
"learning_rate": 1.4213509569815884e-05,
"loss": 0.4021,
"step": 858
},
{
"epoch": 1.14,
"learning_rate": 1.420046584592173e-05,
"loss": 0.3979,
"step": 859
},
{
"epoch": 1.14,
"learning_rate": 1.4187413438538831e-05,
"loss": 0.4026,
"step": 860
},
{
"epoch": 1.15,
"learning_rate": 1.417435237465001e-05,
"loss": 0.3902,
"step": 861
},
{
"epoch": 1.15,
"learning_rate": 1.4161282681255987e-05,
"loss": 0.3861,
"step": 862
},
{
"epoch": 1.15,
"learning_rate": 1.414820438537532e-05,
"loss": 0.3986,
"step": 863
},
{
"epoch": 1.15,
"learning_rate": 1.4135117514044354e-05,
"loss": 0.4363,
"step": 864
},
{
"epoch": 1.15,
"learning_rate": 1.4122022094317163e-05,
"loss": 0.4074,
"step": 865
},
{
"epoch": 1.15,
"learning_rate": 1.4108918153265485e-05,
"loss": 0.3809,
"step": 866
},
{
"epoch": 1.15,
"learning_rate": 1.4095805717978674e-05,
"loss": 0.4117,
"step": 867
},
{
"epoch": 1.16,
"learning_rate": 1.408268481556366e-05,
"loss": 0.4628,
"step": 868
},
{
"epoch": 1.16,
"learning_rate": 1.4069555473144856e-05,
"loss": 0.4029,
"step": 869
},
{
"epoch": 1.16,
"learning_rate": 1.4056417717864132e-05,
"loss": 0.379,
"step": 870
},
{
"epoch": 1.16,
"learning_rate": 1.4043271576880754e-05,
"loss": 0.3925,
"step": 871
},
{
"epoch": 1.16,
"learning_rate": 1.4030117077371316e-05,
"loss": 0.3917,
"step": 872
},
{
"epoch": 1.16,
"learning_rate": 1.4016954246529697e-05,
"loss": 0.3495,
"step": 873
},
{
"epoch": 1.16,
"learning_rate": 1.4003783111566994e-05,
"loss": 0.4229,
"step": 874
},
{
"epoch": 1.16,
"learning_rate": 1.3990603699711469e-05,
"loss": 0.4326,
"step": 875
},
{
"epoch": 1.17,
"learning_rate": 1.397741603820851e-05,
"loss": 0.4123,
"step": 876
},
{
"epoch": 1.17,
"learning_rate": 1.3964220154320535e-05,
"loss": 0.393,
"step": 877
},
{
"epoch": 1.17,
"learning_rate": 1.395101607532698e-05,
"loss": 0.4087,
"step": 878
},
{
"epoch": 1.17,
"learning_rate": 1.3937803828524219e-05,
"loss": 0.3626,
"step": 879
},
{
"epoch": 1.17,
"learning_rate": 1.39245834412255e-05,
"loss": 0.4136,
"step": 880
},
{
"epoch": 1.17,
"learning_rate": 1.391135494076091e-05,
"loss": 0.45,
"step": 881
},
{
"epoch": 1.17,
"learning_rate": 1.3898118354477306e-05,
"loss": 0.4047,
"step": 882
},
{
"epoch": 1.17,
"learning_rate": 1.3884873709738259e-05,
"loss": 0.3767,
"step": 883
},
{
"epoch": 1.18,
"learning_rate": 1.3871621033923997e-05,
"loss": 0.374,
"step": 884
},
{
"epoch": 1.18,
"learning_rate": 1.3858360354431355e-05,
"loss": 0.3774,
"step": 885
},
{
"epoch": 1.18,
"learning_rate": 1.3845091698673713e-05,
"loss": 0.3599,
"step": 886
},
{
"epoch": 1.18,
"learning_rate": 1.3831815094080938e-05,
"loss": 0.4637,
"step": 887
},
{
"epoch": 1.18,
"learning_rate": 1.3818530568099328e-05,
"loss": 0.4131,
"step": 888
},
{
"epoch": 1.18,
"learning_rate": 1.3805238148191564e-05,
"loss": 0.4008,
"step": 889
},
{
"epoch": 1.18,
"learning_rate": 1.3791937861836636e-05,
"loss": 0.4088,
"step": 890
},
{
"epoch": 1.19,
"learning_rate": 1.3778629736529801e-05,
"loss": 0.4052,
"step": 891
},
{
"epoch": 1.19,
"learning_rate": 1.3765313799782523e-05,
"loss": 0.4569,
"step": 892
},
{
"epoch": 1.19,
"learning_rate": 1.3751990079122412e-05,
"loss": 0.4333,
"step": 893
},
{
"epoch": 1.19,
"learning_rate": 1.3738658602093168e-05,
"loss": 0.3979,
"step": 894
},
{
"epoch": 1.19,
"learning_rate": 1.3725319396254531e-05,
"loss": 0.3753,
"step": 895
},
{
"epoch": 1.19,
"learning_rate": 1.3711972489182208e-05,
"loss": 0.4217,
"step": 896
},
{
"epoch": 1.19,
"learning_rate": 1.369861790846784e-05,
"loss": 0.3559,
"step": 897
},
{
"epoch": 1.19,
"learning_rate": 1.3685255681718922e-05,
"loss": 0.4057,
"step": 898
},
{
"epoch": 1.2,
"learning_rate": 1.3671885836558757e-05,
"loss": 0.4011,
"step": 899
},
{
"epoch": 1.2,
"learning_rate": 1.3658508400626403e-05,
"loss": 0.4364,
"step": 900
},
{
"epoch": 1.2,
"learning_rate": 1.36451234015766e-05,
"loss": 0.3803,
"step": 901
},
{
"epoch": 1.2,
"learning_rate": 1.3631730867079731e-05,
"loss": 0.4065,
"step": 902
},
{
"epoch": 1.2,
"learning_rate": 1.3618330824821753e-05,
"loss": 0.4144,
"step": 903
},
{
"epoch": 1.2,
"learning_rate": 1.3604923302504146e-05,
"loss": 0.3598,
"step": 904
},
{
"epoch": 1.2,
"learning_rate": 1.3591508327843859e-05,
"loss": 0.3769,
"step": 905
},
{
"epoch": 1.21,
"learning_rate": 1.3578085928573226e-05,
"loss": 0.3617,
"step": 906
},
{
"epoch": 1.21,
"learning_rate": 1.3564656132439953e-05,
"loss": 0.3795,
"step": 907
},
{
"epoch": 1.21,
"learning_rate": 1.3551218967207032e-05,
"loss": 0.3873,
"step": 908
},
{
"epoch": 1.21,
"learning_rate": 1.3537774460652673e-05,
"loss": 0.3598,
"step": 909
},
{
"epoch": 1.21,
"learning_rate": 1.3524322640570283e-05,
"loss": 0.4041,
"step": 910
},
{
"epoch": 1.21,
"learning_rate": 1.3510863534768381e-05,
"loss": 0.3939,
"step": 911
},
{
"epoch": 1.21,
"learning_rate": 1.3497397171070539e-05,
"loss": 0.3919,
"step": 912
},
{
"epoch": 1.21,
"learning_rate": 1.3483923577315347e-05,
"loss": 0.3968,
"step": 913
},
{
"epoch": 1.22,
"learning_rate": 1.3470442781356332e-05,
"loss": 0.3836,
"step": 914
},
{
"epoch": 1.22,
"learning_rate": 1.3456954811061909e-05,
"loss": 0.3582,
"step": 915
},
{
"epoch": 1.22,
"learning_rate": 1.3443459694315335e-05,
"loss": 0.3858,
"step": 916
},
{
"epoch": 1.22,
"learning_rate": 1.3429957459014626e-05,
"loss": 0.3942,
"step": 917
},
{
"epoch": 1.22,
"learning_rate": 1.3416448133072525e-05,
"loss": 0.4013,
"step": 918
},
{
"epoch": 1.22,
"learning_rate": 1.3402931744416432e-05,
"loss": 0.4524,
"step": 919
},
{
"epoch": 1.22,
"learning_rate": 1.3389408320988339e-05,
"loss": 0.4013,
"step": 920
},
{
"epoch": 1.23,
"learning_rate": 1.3375877890744795e-05,
"loss": 0.3735,
"step": 921
},
{
"epoch": 1.23,
"learning_rate": 1.3362340481656817e-05,
"loss": 0.3816,
"step": 922
},
{
"epoch": 1.23,
"learning_rate": 1.3348796121709862e-05,
"loss": 0.3905,
"step": 923
},
{
"epoch": 1.23,
"learning_rate": 1.3335244838903756e-05,
"loss": 0.405,
"step": 924
},
{
"epoch": 1.23,
"learning_rate": 1.3321686661252626e-05,
"loss": 0.3871,
"step": 925
},
{
"epoch": 1.23,
"learning_rate": 1.3308121616784862e-05,
"loss": 0.3905,
"step": 926
},
{
"epoch": 1.23,
"learning_rate": 1.329454973354305e-05,
"loss": 0.4165,
"step": 927
},
{
"epoch": 1.23,
"learning_rate": 1.3280971039583906e-05,
"loss": 0.3895,
"step": 928
},
{
"epoch": 1.24,
"learning_rate": 1.3267385562978228e-05,
"loss": 0.3369,
"step": 929
},
{
"epoch": 1.24,
"learning_rate": 1.3253793331810845e-05,
"loss": 0.3956,
"step": 930
},
{
"epoch": 1.24,
"learning_rate": 1.3240194374180536e-05,
"loss": 0.3502,
"step": 931
},
{
"epoch": 1.24,
"learning_rate": 1.322658871819999e-05,
"loss": 0.4111,
"step": 932
},
{
"epoch": 1.24,
"learning_rate": 1.3212976391995752e-05,
"loss": 0.4257,
"step": 933
},
{
"epoch": 1.24,
"learning_rate": 1.3199357423708142e-05,
"loss": 0.4108,
"step": 934
},
{
"epoch": 1.24,
"learning_rate": 1.3185731841491219e-05,
"loss": 0.4203,
"step": 935
},
{
"epoch": 1.25,
"learning_rate": 1.3172099673512715e-05,
"loss": 0.3531,
"step": 936
},
{
"epoch": 1.25,
"learning_rate": 1.3158460947953975e-05,
"loss": 0.3896,
"step": 937
},
{
"epoch": 1.25,
"learning_rate": 1.31448156930099e-05,
"loss": 0.3907,
"step": 938
},
{
"epoch": 1.25,
"learning_rate": 1.3131163936888888e-05,
"loss": 0.3528,
"step": 939
},
{
"epoch": 1.25,
"learning_rate": 1.3117505707812779e-05,
"loss": 0.347,
"step": 940
},
{
"epoch": 1.25,
"learning_rate": 1.3103841034016793e-05,
"loss": 0.4016,
"step": 941
},
{
"epoch": 1.25,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.4088,
"step": 942
},
{
"epoch": 1.25,
"learning_rate": 1.3076492465272632e-05,
"loss": 0.3764,
"step": 943
},
{
"epoch": 1.26,
"learning_rate": 1.3062808626861276e-05,
"loss": 0.3463,
"step": 944
},
{
"epoch": 1.26,
"learning_rate": 1.3049118456803568e-05,
"loss": 0.3623,
"step": 945
},
{
"epoch": 1.26,
"learning_rate": 1.3035421983400762e-05,
"loss": 0.3405,
"step": 946
},
{
"epoch": 1.26,
"learning_rate": 1.3021719234967139e-05,
"loss": 0.348,
"step": 947
},
{
"epoch": 1.26,
"learning_rate": 1.3008010239829951e-05,
"loss": 0.4005,
"step": 948
},
{
"epoch": 1.26,
"learning_rate": 1.2994295026329369e-05,
"loss": 0.3788,
"step": 949
},
{
"epoch": 1.26,
"learning_rate": 1.298057362281841e-05,
"loss": 0.4222,
"step": 950
},
{
"epoch": 1.27,
"learning_rate": 1.2966846057662896e-05,
"loss": 0.389,
"step": 951
},
{
"epoch": 1.27,
"learning_rate": 1.2953112359241385e-05,
"loss": 0.3683,
"step": 952
},
{
"epoch": 1.27,
"learning_rate": 1.2939372555945112e-05,
"loss": 0.3502,
"step": 953
},
{
"epoch": 1.27,
"learning_rate": 1.2925626676177926e-05,
"loss": 0.4359,
"step": 954
},
{
"epoch": 1.27,
"learning_rate": 1.2911874748356255e-05,
"loss": 0.3482,
"step": 955
},
{
"epoch": 1.27,
"learning_rate": 1.2898116800909015e-05,
"loss": 0.402,
"step": 956
},
{
"epoch": 1.27,
"learning_rate": 1.2884352862277569e-05,
"loss": 0.4015,
"step": 957
},
{
"epoch": 1.27,
"learning_rate": 1.2870582960915669e-05,
"loss": 0.398,
"step": 958
},
{
"epoch": 1.28,
"learning_rate": 1.2856807125289396e-05,
"loss": 0.4351,
"step": 959
},
{
"epoch": 1.28,
"learning_rate": 1.2843025383877086e-05,
"loss": 0.4223,
"step": 960
},
{
"epoch": 1.28,
"learning_rate": 1.28292377651693e-05,
"loss": 0.3735,
"step": 961
},
{
"epoch": 1.28,
"learning_rate": 1.281544429766874e-05,
"loss": 0.3737,
"step": 962
},
{
"epoch": 1.28,
"learning_rate": 1.2801645009890195e-05,
"loss": 0.3587,
"step": 963
},
{
"epoch": 1.28,
"learning_rate": 1.2787839930360501e-05,
"loss": 0.367,
"step": 964
},
{
"epoch": 1.28,
"learning_rate": 1.2774029087618448e-05,
"loss": 0.4311,
"step": 965
},
{
"epoch": 1.29,
"learning_rate": 1.2760212510214753e-05,
"loss": 0.419,
"step": 966
},
{
"epoch": 1.29,
"learning_rate": 1.2746390226711988e-05,
"loss": 0.4003,
"step": 967
},
{
"epoch": 1.29,
"learning_rate": 1.2732562265684512e-05,
"loss": 0.4417,
"step": 968
},
{
"epoch": 1.29,
"learning_rate": 1.2718728655718429e-05,
"loss": 0.3897,
"step": 969
},
{
"epoch": 1.29,
"learning_rate": 1.2704889425411524e-05,
"loss": 0.4317,
"step": 970
},
{
"epoch": 1.29,
"learning_rate": 1.2691044603373186e-05,
"loss": 0.4082,
"step": 971
},
{
"epoch": 1.29,
"learning_rate": 1.2677194218224383e-05,
"loss": 0.3805,
"step": 972
},
{
"epoch": 1.29,
"learning_rate": 1.2663338298597562e-05,
"loss": 0.4267,
"step": 973
},
{
"epoch": 1.3,
"learning_rate": 1.264947687313663e-05,
"loss": 0.3848,
"step": 974
},
{
"epoch": 1.3,
"learning_rate": 1.2635609970496872e-05,
"loss": 0.3909,
"step": 975
},
{
"epoch": 1.3,
"learning_rate": 1.2621737619344883e-05,
"loss": 0.4333,
"step": 976
},
{
"epoch": 1.3,
"learning_rate": 1.2607859848358535e-05,
"loss": 0.4267,
"step": 977
},
{
"epoch": 1.3,
"learning_rate": 1.2593976686226906e-05,
"loss": 0.3827,
"step": 978
},
{
"epoch": 1.3,
"learning_rate": 1.2580088161650206e-05,
"loss": 0.3738,
"step": 979
},
{
"epoch": 1.3,
"learning_rate": 1.2566194303339738e-05,
"loss": 0.3439,
"step": 980
},
{
"epoch": 1.31,
"learning_rate": 1.2552295140017834e-05,
"loss": 0.3631,
"step": 981
},
{
"epoch": 1.31,
"learning_rate": 1.2538390700417788e-05,
"loss": 0.3873,
"step": 982
},
{
"epoch": 1.31,
"learning_rate": 1.2524481013283809e-05,
"loss": 0.3874,
"step": 983
},
{
"epoch": 1.31,
"learning_rate": 1.251056610737094e-05,
"loss": 0.381,
"step": 984
},
{
"epoch": 1.31,
"learning_rate": 1.2496646011445025e-05,
"loss": 0.3948,
"step": 985
},
{
"epoch": 1.31,
"learning_rate": 1.2482720754282638e-05,
"loss": 0.4366,
"step": 986
},
{
"epoch": 1.31,
"learning_rate": 1.246879036467101e-05,
"loss": 0.3534,
"step": 987
},
{
"epoch": 1.31,
"learning_rate": 1.2454854871407993e-05,
"loss": 0.3832,
"step": 988
},
{
"epoch": 1.32,
"learning_rate": 1.2440914303301988e-05,
"loss": 0.4036,
"step": 989
},
{
"epoch": 1.32,
"learning_rate": 1.2426968689171883e-05,
"loss": 0.465,
"step": 990
},
{
"epoch": 1.32,
"learning_rate": 1.2413018057847004e-05,
"loss": 0.4003,
"step": 991
},
{
"epoch": 1.32,
"learning_rate": 1.2399062438167042e-05,
"loss": 0.3981,
"step": 992
},
{
"epoch": 1.32,
"learning_rate": 1.2385101858982004e-05,
"loss": 0.4212,
"step": 993
},
{
"epoch": 1.32,
"learning_rate": 1.237113634915215e-05,
"loss": 0.3927,
"step": 994
},
{
"epoch": 1.32,
"learning_rate": 1.2357165937547935e-05,
"loss": 0.4102,
"step": 995
},
{
"epoch": 1.33,
"learning_rate": 1.2343190653049937e-05,
"loss": 0.3698,
"step": 996
},
{
"epoch": 1.33,
"learning_rate": 1.2329210524548818e-05,
"loss": 0.4015,
"step": 997
},
{
"epoch": 1.33,
"learning_rate": 1.2315225580945252e-05,
"loss": 0.456,
"step": 998
},
{
"epoch": 1.33,
"learning_rate": 1.2301235851149867e-05,
"loss": 0.3995,
"step": 999
},
{
"epoch": 1.33,
"learning_rate": 1.2287241364083177e-05,
"loss": 0.386,
"step": 1000
},
{
"epoch": 1.33,
"learning_rate": 1.2273242148675547e-05,
"loss": 0.394,
"step": 1001
},
{
"epoch": 1.33,
"learning_rate": 1.2259238233867103e-05,
"loss": 0.3588,
"step": 1002
},
{
"epoch": 1.33,
"learning_rate": 1.224522964860769e-05,
"loss": 0.4739,
"step": 1003
},
{
"epoch": 1.34,
"learning_rate": 1.2231216421856809e-05,
"loss": 0.3866,
"step": 1004
},
{
"epoch": 1.34,
"learning_rate": 1.2217198582583556e-05,
"loss": 0.3561,
"step": 1005
},
{
"epoch": 1.34,
"learning_rate": 1.2203176159766561e-05,
"loss": 0.4068,
"step": 1006
},
{
"epoch": 1.34,
"learning_rate": 1.218914918239393e-05,
"loss": 0.4147,
"step": 1007
},
{
"epoch": 1.34,
"learning_rate": 1.2175117679463187e-05,
"loss": 0.3591,
"step": 1008
},
{
"epoch": 1.34,
"learning_rate": 1.2161081679981209e-05,
"loss": 0.4341,
"step": 1009
},
{
"epoch": 1.34,
"learning_rate": 1.2147041212964166e-05,
"loss": 0.3503,
"step": 1010
},
{
"epoch": 1.35,
"learning_rate": 1.213299630743747e-05,
"loss": 0.3848,
"step": 1011
},
{
"epoch": 1.35,
"learning_rate": 1.2118946992435705e-05,
"loss": 0.35,
"step": 1012
},
{
"epoch": 1.35,
"learning_rate": 1.2104893297002566e-05,
"loss": 0.3734,
"step": 1013
},
{
"epoch": 1.35,
"learning_rate": 1.2090835250190814e-05,
"loss": 0.4052,
"step": 1014
},
{
"epoch": 1.35,
"learning_rate": 1.20767728810622e-05,
"loss": 0.3985,
"step": 1015
},
{
"epoch": 1.35,
"learning_rate": 1.2062706218687404e-05,
"loss": 0.3564,
"step": 1016
},
{
"epoch": 1.35,
"learning_rate": 1.2048635292145991e-05,
"loss": 0.4138,
"step": 1017
},
{
"epoch": 1.35,
"learning_rate": 1.2034560130526341e-05,
"loss": 0.3527,
"step": 1018
},
{
"epoch": 1.36,
"learning_rate": 1.2020480762925577e-05,
"loss": 0.4207,
"step": 1019
},
{
"epoch": 1.36,
"learning_rate": 1.2006397218449537e-05,
"loss": 0.41,
"step": 1020
},
{
"epoch": 1.36,
"learning_rate": 1.1992309526212671e-05,
"loss": 0.3881,
"step": 1021
},
{
"epoch": 1.36,
"learning_rate": 1.1978217715338021e-05,
"loss": 0.3877,
"step": 1022
},
{
"epoch": 1.36,
"learning_rate": 1.1964121814957136e-05,
"loss": 0.4229,
"step": 1023
},
{
"epoch": 1.36,
"learning_rate": 1.1950021854210018e-05,
"loss": 0.369,
"step": 1024
},
{
"epoch": 1.36,
"learning_rate": 1.193591786224507e-05,
"loss": 0.4036,
"step": 1025
},
{
"epoch": 1.37,
"learning_rate": 1.1921809868219021e-05,
"loss": 0.3862,
"step": 1026
},
{
"epoch": 1.37,
"learning_rate": 1.1907697901296871e-05,
"loss": 0.3794,
"step": 1027
},
{
"epoch": 1.37,
"learning_rate": 1.1893581990651848e-05,
"loss": 0.3867,
"step": 1028
},
{
"epoch": 1.37,
"learning_rate": 1.1879462165465314e-05,
"loss": 0.4121,
"step": 1029
},
{
"epoch": 1.37,
"learning_rate": 1.1865338454926736e-05,
"loss": 0.3355,
"step": 1030
},
{
"epoch": 1.37,
"learning_rate": 1.185121088823361e-05,
"loss": 0.4035,
"step": 1031
},
{
"epoch": 1.37,
"learning_rate": 1.1837079494591401e-05,
"loss": 0.3914,
"step": 1032
},
{
"epoch": 1.37,
"learning_rate": 1.1822944303213486e-05,
"loss": 0.4246,
"step": 1033
},
{
"epoch": 1.38,
"learning_rate": 1.1808805343321102e-05,
"loss": 0.3524,
"step": 1034
},
{
"epoch": 1.38,
"learning_rate": 1.1794662644143257e-05,
"loss": 0.4416,
"step": 1035
},
{
"epoch": 1.38,
"learning_rate": 1.178051623491671e-05,
"loss": 0.4373,
"step": 1036
},
{
"epoch": 1.38,
"learning_rate": 1.1766366144885877e-05,
"loss": 0.3892,
"step": 1037
},
{
"epoch": 1.38,
"learning_rate": 1.1752212403302785e-05,
"loss": 0.4062,
"step": 1038
},
{
"epoch": 1.38,
"learning_rate": 1.1738055039427014e-05,
"loss": 0.3698,
"step": 1039
},
{
"epoch": 1.38,
"learning_rate": 1.172389408252563e-05,
"loss": 0.3956,
"step": 1040
},
{
"epoch": 1.39,
"learning_rate": 1.1709729561873127e-05,
"loss": 0.4628,
"step": 1041
},
{
"epoch": 1.39,
"learning_rate": 1.169556150675136e-05,
"loss": 0.3547,
"step": 1042
},
{
"epoch": 1.39,
"learning_rate": 1.1681389946449504e-05,
"loss": 0.3984,
"step": 1043
},
{
"epoch": 1.39,
"learning_rate": 1.1667214910263966e-05,
"loss": 0.3975,
"step": 1044
},
{
"epoch": 1.39,
"learning_rate": 1.1653036427498354e-05,
"loss": 0.3935,
"step": 1045
},
{
"epoch": 1.39,
"learning_rate": 1.1638854527463383e-05,
"loss": 0.4245,
"step": 1046
},
{
"epoch": 1.39,
"learning_rate": 1.1624669239476844e-05,
"loss": 0.3888,
"step": 1047
},
{
"epoch": 1.39,
"learning_rate": 1.161048059286353e-05,
"loss": 0.4163,
"step": 1048
},
{
"epoch": 1.4,
"learning_rate": 1.159628861695518e-05,
"loss": 0.4077,
"step": 1049
},
{
"epoch": 1.4,
"learning_rate": 1.1582093341090406e-05,
"loss": 0.4206,
"step": 1050
},
{
"epoch": 1.4,
"learning_rate": 1.1567894794614652e-05,
"loss": 0.4298,
"step": 1051
},
{
"epoch": 1.4,
"learning_rate": 1.1553693006880115e-05,
"loss": 0.4158,
"step": 1052
},
{
"epoch": 1.4,
"learning_rate": 1.1539488007245704e-05,
"loss": 0.4388,
"step": 1053
},
{
"epoch": 1.4,
"learning_rate": 1.1525279825076954e-05,
"loss": 0.3861,
"step": 1054
},
{
"epoch": 1.4,
"learning_rate": 1.1511068489745986e-05,
"loss": 0.3817,
"step": 1055
},
{
"epoch": 1.41,
"learning_rate": 1.1496854030631443e-05,
"loss": 0.3967,
"step": 1056
},
{
"epoch": 1.41,
"learning_rate": 1.148263647711842e-05,
"loss": 0.3434,
"step": 1057
},
{
"epoch": 1.41,
"learning_rate": 1.1468415858598413e-05,
"loss": 0.4263,
"step": 1058
},
{
"epoch": 1.41,
"learning_rate": 1.1454192204469245e-05,
"loss": 0.3852,
"step": 1059
},
{
"epoch": 1.41,
"learning_rate": 1.143996554413503e-05,
"loss": 0.4517,
"step": 1060
},
{
"epoch": 1.41,
"learning_rate": 1.1425735907006082e-05,
"loss": 0.4012,
"step": 1061
},
{
"epoch": 1.41,
"learning_rate": 1.1411503322498878e-05,
"loss": 0.3449,
"step": 1062
},
{
"epoch": 1.41,
"learning_rate": 1.1397267820035986e-05,
"loss": 0.3733,
"step": 1063
},
{
"epoch": 1.42,
"learning_rate": 1.1383029429046e-05,
"loss": 0.4,
"step": 1064
},
{
"epoch": 1.42,
"learning_rate": 1.1368788178963492e-05,
"loss": 0.3731,
"step": 1065
},
{
"epoch": 1.42,
"learning_rate": 1.1354544099228947e-05,
"loss": 0.391,
"step": 1066
},
{
"epoch": 1.42,
"learning_rate": 1.1340297219288696e-05,
"loss": 0.4494,
"step": 1067
},
{
"epoch": 1.42,
"learning_rate": 1.1326047568594852e-05,
"loss": 0.4024,
"step": 1068
},
{
"epoch": 1.42,
"learning_rate": 1.1311795176605267e-05,
"loss": 0.3791,
"step": 1069
},
{
"epoch": 1.42,
"learning_rate": 1.1297540072783455e-05,
"loss": 0.3897,
"step": 1070
},
{
"epoch": 1.43,
"learning_rate": 1.1283282286598536e-05,
"loss": 0.4061,
"step": 1071
},
{
"epoch": 1.43,
"learning_rate": 1.1269021847525172e-05,
"loss": 0.384,
"step": 1072
},
{
"epoch": 1.43,
"learning_rate": 1.1254758785043516e-05,
"loss": 0.4029,
"step": 1073
},
{
"epoch": 1.43,
"learning_rate": 1.1240493128639145e-05,
"loss": 0.4111,
"step": 1074
},
{
"epoch": 1.43,
"learning_rate": 1.1226224907802986e-05,
"loss": 0.3945,
"step": 1075
},
{
"epoch": 1.43,
"learning_rate": 1.1211954152031282e-05,
"loss": 0.402,
"step": 1076
},
{
"epoch": 1.43,
"learning_rate": 1.119768089082551e-05,
"loss": 0.3924,
"step": 1077
},
{
"epoch": 1.43,
"learning_rate": 1.118340515369232e-05,
"loss": 0.3622,
"step": 1078
},
{
"epoch": 1.44,
"learning_rate": 1.1169126970143496e-05,
"loss": 0.3994,
"step": 1079
},
{
"epoch": 1.44,
"learning_rate": 1.1154846369695864e-05,
"loss": 0.3777,
"step": 1080
},
{
"epoch": 1.44,
"learning_rate": 1.1140563381871252e-05,
"loss": 0.352,
"step": 1081
},
{
"epoch": 1.44,
"learning_rate": 1.1126278036196432e-05,
"loss": 0.4038,
"step": 1082
},
{
"epoch": 1.44,
"learning_rate": 1.1111990362203034e-05,
"loss": 0.4041,
"step": 1083
},
{
"epoch": 1.44,
"learning_rate": 1.1097700389427511e-05,
"loss": 0.4331,
"step": 1084
},
{
"epoch": 1.44,
"learning_rate": 1.1083408147411075e-05,
"loss": 0.385,
"step": 1085
},
{
"epoch": 1.45,
"learning_rate": 1.1069113665699606e-05,
"loss": 0.3627,
"step": 1086
},
{
"epoch": 1.45,
"learning_rate": 1.1054816973843646e-05,
"loss": 0.4123,
"step": 1087
},
{
"epoch": 1.45,
"learning_rate": 1.1040518101398277e-05,
"loss": 0.3485,
"step": 1088
},
{
"epoch": 1.45,
"learning_rate": 1.1026217077923102e-05,
"loss": 0.4436,
"step": 1089
},
{
"epoch": 1.45,
"learning_rate": 1.1011913932982176e-05,
"loss": 0.3344,
"step": 1090
},
{
"epoch": 1.45,
"learning_rate": 1.099760869614393e-05,
"loss": 0.4286,
"step": 1091
},
{
"epoch": 1.45,
"learning_rate": 1.0983301396981116e-05,
"loss": 0.3961,
"step": 1092
},
{
"epoch": 1.45,
"learning_rate": 1.096899206507077e-05,
"loss": 0.4213,
"step": 1093
},
{
"epoch": 1.46,
"learning_rate": 1.0954680729994103e-05,
"loss": 0.4102,
"step": 1094
},
{
"epoch": 1.46,
"learning_rate": 1.0940367421336488e-05,
"loss": 0.346,
"step": 1095
},
{
"epoch": 1.46,
"learning_rate": 1.0926052168687366e-05,
"loss": 0.3738,
"step": 1096
},
{
"epoch": 1.46,
"learning_rate": 1.0911735001640206e-05,
"loss": 0.4301,
"step": 1097
},
{
"epoch": 1.46,
"learning_rate": 1.0897415949792427e-05,
"loss": 0.3766,
"step": 1098
},
{
"epoch": 1.46,
"learning_rate": 1.0883095042745347e-05,
"loss": 0.396,
"step": 1099
},
{
"epoch": 1.46,
"learning_rate": 1.0868772310104119e-05,
"loss": 0.4056,
"step": 1100
},
{
"epoch": 1.47,
"learning_rate": 1.085444778147767e-05,
"loss": 0.3815,
"step": 1101
},
{
"epoch": 1.47,
"learning_rate": 1.084012148647864e-05,
"loss": 0.3954,
"step": 1102
},
{
"epoch": 1.47,
"learning_rate": 1.0825793454723325e-05,
"loss": 0.3729,
"step": 1103
},
{
"epoch": 1.47,
"learning_rate": 1.0811463715831601e-05,
"loss": 0.3521,
"step": 1104
},
{
"epoch": 1.47,
"learning_rate": 1.0797132299426882e-05,
"loss": 0.4233,
"step": 1105
},
{
"epoch": 1.47,
"learning_rate": 1.0782799235136045e-05,
"loss": 0.402,
"step": 1106
},
{
"epoch": 1.47,
"learning_rate": 1.0768464552589379e-05,
"loss": 0.3972,
"step": 1107
},
{
"epoch": 1.47,
"learning_rate": 1.0754128281420511e-05,
"loss": 0.4245,
"step": 1108
},
{
"epoch": 1.48,
"learning_rate": 1.0739790451266358e-05,
"loss": 0.399,
"step": 1109
},
{
"epoch": 1.48,
"learning_rate": 1.0725451091767058e-05,
"loss": 0.4053,
"step": 1110
},
{
"epoch": 1.48,
"learning_rate": 1.0711110232565912e-05,
"loss": 0.3778,
"step": 1111
},
{
"epoch": 1.48,
"learning_rate": 1.0696767903309315e-05,
"loss": 0.4328,
"step": 1112
},
{
"epoch": 1.48,
"learning_rate": 1.0682424133646712e-05,
"loss": 0.3426,
"step": 1113
},
{
"epoch": 1.48,
"learning_rate": 1.0668078953230511e-05,
"loss": 0.3958,
"step": 1114
},
{
"epoch": 1.48,
"learning_rate": 1.0653732391716053e-05,
"loss": 0.3929,
"step": 1115
},
{
"epoch": 1.49,
"learning_rate": 1.0639384478761522e-05,
"loss": 0.4048,
"step": 1116
},
{
"epoch": 1.49,
"learning_rate": 1.0625035244027903e-05,
"loss": 0.4021,
"step": 1117
},
{
"epoch": 1.49,
"learning_rate": 1.0610684717178905e-05,
"loss": 0.3922,
"step": 1118
},
{
"epoch": 1.49,
"learning_rate": 1.0596332927880919e-05,
"loss": 0.3639,
"step": 1119
},
{
"epoch": 1.49,
"learning_rate": 1.0581979905802933e-05,
"loss": 0.3547,
"step": 1120
},
{
"epoch": 1.49,
"learning_rate": 1.0567625680616496e-05,
"loss": 0.4014,
"step": 1121
},
{
"epoch": 1.49,
"learning_rate": 1.055327028199564e-05,
"loss": 0.3819,
"step": 1122
},
{
"epoch": 1.49,
"learning_rate": 1.0538913739616817e-05,
"loss": 0.3648,
"step": 1123
},
{
"epoch": 1.5,
"learning_rate": 1.052455608315885e-05,
"loss": 0.3878,
"step": 1124
},
{
"epoch": 1.5,
"learning_rate": 1.0510197342302866e-05,
"loss": 0.3519,
"step": 1125
},
{
"epoch": 1.5,
"learning_rate": 1.0495837546732224e-05,
"loss": 0.428,
"step": 1126
},
{
"epoch": 1.5,
"learning_rate": 1.0481476726132476e-05,
"loss": 0.3829,
"step": 1127
},
{
"epoch": 1.5,
"learning_rate": 1.046711491019129e-05,
"loss": 0.4461,
"step": 1128
},
{
"epoch": 1.5,
"learning_rate": 1.0452752128598378e-05,
"loss": 0.3991,
"step": 1129
},
{
"epoch": 1.5,
"learning_rate": 1.0438388411045472e-05,
"loss": 0.4386,
"step": 1130
},
{
"epoch": 1.5,
"learning_rate": 1.0424023787226216e-05,
"loss": 0.4289,
"step": 1131
},
{
"epoch": 1.51,
"learning_rate": 1.0409658286836144e-05,
"loss": 0.3475,
"step": 1132
},
{
"epoch": 1.51,
"learning_rate": 1.0395291939572593e-05,
"loss": 0.3851,
"step": 1133
},
{
"epoch": 1.51,
"learning_rate": 1.0380924775134651e-05,
"loss": 0.3497,
"step": 1134
},
{
"epoch": 1.51,
"learning_rate": 1.0366556823223101e-05,
"loss": 0.4265,
"step": 1135
},
{
"epoch": 1.51,
"learning_rate": 1.0352188113540356e-05,
"loss": 0.4034,
"step": 1136
},
{
"epoch": 1.51,
"learning_rate": 1.0337818675790377e-05,
"loss": 0.377,
"step": 1137
},
{
"epoch": 1.51,
"learning_rate": 1.0323448539678653e-05,
"loss": 0.3927,
"step": 1138
},
{
"epoch": 1.52,
"learning_rate": 1.0309077734912104e-05,
"loss": 0.3918,
"step": 1139
},
{
"epoch": 1.52,
"learning_rate": 1.0294706291199032e-05,
"loss": 0.4098,
"step": 1140
},
{
"epoch": 1.52,
"learning_rate": 1.0280334238249072e-05,
"loss": 0.359,
"step": 1141
},
{
"epoch": 1.52,
"learning_rate": 1.0265961605773103e-05,
"loss": 0.4039,
"step": 1142
},
{
"epoch": 1.52,
"learning_rate": 1.0251588423483205e-05,
"loss": 0.4106,
"step": 1143
},
{
"epoch": 1.52,
"learning_rate": 1.023721472109261e-05,
"loss": 0.3272,
"step": 1144
},
{
"epoch": 1.52,
"learning_rate": 1.0222840528315602e-05,
"loss": 0.3948,
"step": 1145
},
{
"epoch": 1.52,
"learning_rate": 1.0208465874867497e-05,
"loss": 0.3821,
"step": 1146
},
{
"epoch": 1.53,
"learning_rate": 1.019409079046455e-05,
"loss": 0.3869,
"step": 1147
},
{
"epoch": 1.53,
"learning_rate": 1.0179715304823921e-05,
"loss": 0.3778,
"step": 1148
},
{
"epoch": 1.53,
"learning_rate": 1.0165339447663586e-05,
"loss": 0.3705,
"step": 1149
},
{
"epoch": 1.53,
"learning_rate": 1.0150963248702299e-05,
"loss": 0.3864,
"step": 1150
},
{
"epoch": 1.53,
"learning_rate": 1.013658673765951e-05,
"loss": 0.3836,
"step": 1151
},
{
"epoch": 1.53,
"learning_rate": 1.0122209944255332e-05,
"loss": 0.4196,
"step": 1152
},
{
"epoch": 1.53,
"learning_rate": 1.0107832898210438e-05,
"loss": 0.3793,
"step": 1153
},
{
"epoch": 1.54,
"learning_rate": 1.0093455629246044e-05,
"loss": 0.3428,
"step": 1154
},
{
"epoch": 1.54,
"learning_rate": 1.0079078167083816e-05,
"loss": 0.4084,
"step": 1155
},
{
"epoch": 1.54,
"learning_rate": 1.006470054144582e-05,
"loss": 0.395,
"step": 1156
},
{
"epoch": 1.54,
"learning_rate": 1.0050322782054464e-05,
"loss": 0.3754,
"step": 1157
},
{
"epoch": 1.54,
"learning_rate": 1.0035944918632429e-05,
"loss": 0.396,
"step": 1158
},
{
"epoch": 1.54,
"learning_rate": 1.0021566980902613e-05,
"loss": 0.4234,
"step": 1159
},
{
"epoch": 1.54,
"learning_rate": 1.0007188998588068e-05,
"loss": 0.3381,
"step": 1160
},
{
"epoch": 1.54,
"learning_rate": 9.992811001411935e-06,
"loss": 0.3193,
"step": 1161
},
{
"epoch": 1.55,
"learning_rate": 9.978433019097389e-06,
"loss": 0.4227,
"step": 1162
},
{
"epoch": 1.55,
"learning_rate": 9.96405508136757e-06,
"loss": 0.3533,
"step": 1163
},
{
"epoch": 1.55,
"learning_rate": 9.94967721794554e-06,
"loss": 0.3649,
"step": 1164
},
{
"epoch": 1.55,
"learning_rate": 9.935299458554183e-06,
"loss": 0.3968,
"step": 1165
},
{
"epoch": 1.55,
"learning_rate": 9.920921832916187e-06,
"loss": 0.3738,
"step": 1166
},
{
"epoch": 1.55,
"learning_rate": 9.906544370753957e-06,
"loss": 0.3771,
"step": 1167
},
{
"epoch": 1.55,
"learning_rate": 9.892167101789563e-06,
"loss": 0.3747,
"step": 1168
},
{
"epoch": 1.56,
"learning_rate": 9.877790055744673e-06,
"loss": 0.3853,
"step": 1169
},
{
"epoch": 1.56,
"learning_rate": 9.863413262340491e-06,
"loss": 0.4165,
"step": 1170
},
{
"epoch": 1.56,
"learning_rate": 9.849036751297704e-06,
"loss": 0.4511,
"step": 1171
},
{
"epoch": 1.56,
"learning_rate": 9.834660552336415e-06,
"loss": 0.424,
"step": 1172
},
{
"epoch": 1.56,
"learning_rate": 9.820284695176082e-06,
"loss": 0.3835,
"step": 1173
},
{
"epoch": 1.56,
"learning_rate": 9.805909209535453e-06,
"loss": 0.4683,
"step": 1174
},
{
"epoch": 1.56,
"learning_rate": 9.791534125132508e-06,
"loss": 0.4335,
"step": 1175
},
{
"epoch": 1.56,
"learning_rate": 9.7771594716844e-06,
"loss": 0.4148,
"step": 1176
},
{
"epoch": 1.57,
"learning_rate": 9.762785278907393e-06,
"loss": 0.3826,
"step": 1177
},
{
"epoch": 1.57,
"learning_rate": 9.748411576516794e-06,
"loss": 0.4034,
"step": 1178
},
{
"epoch": 1.57,
"learning_rate": 9.7340383942269e-06,
"loss": 0.3316,
"step": 1179
},
{
"epoch": 1.57,
"learning_rate": 9.719665761750933e-06,
"loss": 0.4083,
"step": 1180
},
{
"epoch": 1.57,
"learning_rate": 9.705293708800971e-06,
"loss": 0.3881,
"step": 1181
},
{
"epoch": 1.57,
"learning_rate": 9.690922265087898e-06,
"loss": 0.4507,
"step": 1182
},
{
"epoch": 1.57,
"learning_rate": 9.67655146032135e-06,
"loss": 0.3749,
"step": 1183
},
{
"epoch": 1.58,
"learning_rate": 9.662181324209625e-06,
"loss": 0.409,
"step": 1184
},
{
"epoch": 1.58,
"learning_rate": 9.64781188645965e-06,
"loss": 0.3762,
"step": 1185
},
{
"epoch": 1.58,
"learning_rate": 9.6334431767769e-06,
"loss": 0.3905,
"step": 1186
},
{
"epoch": 1.58,
"learning_rate": 9.619075224865352e-06,
"loss": 0.3479,
"step": 1187
},
{
"epoch": 1.58,
"learning_rate": 9.604708060427408e-06,
"loss": 0.3671,
"step": 1188
},
{
"epoch": 1.58,
"learning_rate": 9.590341713163858e-06,
"loss": 0.3931,
"step": 1189
},
{
"epoch": 1.58,
"learning_rate": 9.575976212773786e-06,
"loss": 0.4523,
"step": 1190
},
{
"epoch": 1.58,
"learning_rate": 9.561611588954533e-06,
"loss": 0.4036,
"step": 1191
},
{
"epoch": 1.59,
"learning_rate": 9.547247871401624e-06,
"loss": 0.4068,
"step": 1192
},
{
"epoch": 1.59,
"learning_rate": 9.532885089808713e-06,
"loss": 0.3594,
"step": 1193
},
{
"epoch": 1.59,
"learning_rate": 9.518523273867524e-06,
"loss": 0.3894,
"step": 1194
},
{
"epoch": 1.59,
"learning_rate": 9.504162453267776e-06,
"loss": 0.3979,
"step": 1195
},
{
"epoch": 1.59,
"learning_rate": 9.489802657697139e-06,
"loss": 0.4243,
"step": 1196
},
{
"epoch": 1.59,
"learning_rate": 9.475443916841154e-06,
"loss": 0.4059,
"step": 1197
},
{
"epoch": 1.59,
"learning_rate": 9.461086260383188e-06,
"loss": 0.4176,
"step": 1198
},
{
"epoch": 1.6,
"learning_rate": 9.446729718004362e-06,
"loss": 0.408,
"step": 1199
},
{
"epoch": 1.6,
"learning_rate": 9.432374319383505e-06,
"loss": 0.3716,
"step": 1200
},
{
"epoch": 1.6,
"learning_rate": 9.41802009419707e-06,
"loss": 0.3971,
"step": 1201
},
{
"epoch": 1.6,
"learning_rate": 9.403667072119086e-06,
"loss": 0.3936,
"step": 1202
},
{
"epoch": 1.6,
"learning_rate": 9.389315282821097e-06,
"loss": 0.3566,
"step": 1203
},
{
"epoch": 1.6,
"learning_rate": 9.3749647559721e-06,
"loss": 0.3569,
"step": 1204
},
{
"epoch": 1.6,
"learning_rate": 9.360615521238476e-06,
"loss": 0.3751,
"step": 1205
},
{
"epoch": 1.6,
"learning_rate": 9.346267608283947e-06,
"loss": 0.4599,
"step": 1206
},
{
"epoch": 1.61,
"learning_rate": 9.33192104676949e-06,
"loss": 0.343,
"step": 1207
},
{
"epoch": 1.61,
"learning_rate": 9.317575866353293e-06,
"loss": 0.328,
"step": 1208
},
{
"epoch": 1.61,
"learning_rate": 9.303232096690686e-06,
"loss": 0.4034,
"step": 1209
},
{
"epoch": 1.61,
"learning_rate": 9.28888976743409e-06,
"loss": 0.4065,
"step": 1210
},
{
"epoch": 1.61,
"learning_rate": 9.274548908232942e-06,
"loss": 0.4213,
"step": 1211
},
{
"epoch": 1.61,
"learning_rate": 9.260209548733647e-06,
"loss": 0.3857,
"step": 1212
},
{
"epoch": 1.61,
"learning_rate": 9.24587171857949e-06,
"loss": 0.3587,
"step": 1213
},
{
"epoch": 1.62,
"learning_rate": 9.231535447410625e-06,
"loss": 0.3866,
"step": 1214
},
{
"epoch": 1.62,
"learning_rate": 9.217200764863957e-06,
"loss": 0.3534,
"step": 1215
},
{
"epoch": 1.62,
"learning_rate": 9.20286770057312e-06,
"loss": 0.3947,
"step": 1216
},
{
"epoch": 1.62,
"learning_rate": 9.188536284168404e-06,
"loss": 0.3698,
"step": 1217
},
{
"epoch": 1.62,
"learning_rate": 9.174206545276678e-06,
"loss": 0.3353,
"step": 1218
},
{
"epoch": 1.62,
"learning_rate": 9.159878513521361e-06,
"loss": 0.3948,
"step": 1219
},
{
"epoch": 1.62,
"learning_rate": 9.145552218522332e-06,
"loss": 0.4014,
"step": 1220
},
{
"epoch": 1.62,
"learning_rate": 9.131227689895885e-06,
"loss": 0.3932,
"step": 1221
},
{
"epoch": 1.63,
"learning_rate": 9.116904957254655e-06,
"loss": 0.3666,
"step": 1222
},
{
"epoch": 1.63,
"learning_rate": 9.102584050207578e-06,
"loss": 0.4175,
"step": 1223
},
{
"epoch": 1.63,
"learning_rate": 9.088264998359795e-06,
"loss": 0.3857,
"step": 1224
},
{
"epoch": 1.63,
"learning_rate": 9.073947831312636e-06,
"loss": 0.3551,
"step": 1225
},
{
"epoch": 1.63,
"learning_rate": 9.059632578663515e-06,
"loss": 0.3447,
"step": 1226
},
{
"epoch": 1.63,
"learning_rate": 9.0453192700059e-06,
"loss": 0.4132,
"step": 1227
},
{
"epoch": 1.63,
"learning_rate": 9.031007934929237e-06,
"loss": 0.3876,
"step": 1228
},
{
"epoch": 1.64,
"learning_rate": 9.016698603018887e-06,
"loss": 0.4158,
"step": 1229
},
{
"epoch": 1.64,
"learning_rate": 9.002391303856074e-06,
"loss": 0.4234,
"step": 1230
},
{
"epoch": 1.64,
"learning_rate": 8.988086067017826e-06,
"loss": 0.4086,
"step": 1231
},
{
"epoch": 1.64,
"learning_rate": 8.9737829220769e-06,
"loss": 0.378,
"step": 1232
},
{
"epoch": 1.64,
"learning_rate": 8.959481898601729e-06,
"loss": 0.3476,
"step": 1233
},
{
"epoch": 1.64,
"learning_rate": 8.94518302615636e-06,
"loss": 0.3561,
"step": 1234
},
{
"epoch": 1.64,
"learning_rate": 8.930886334300395e-06,
"loss": 0.3771,
"step": 1235
},
{
"epoch": 1.64,
"learning_rate": 8.91659185258893e-06,
"loss": 0.4063,
"step": 1236
},
{
"epoch": 1.65,
"learning_rate": 8.902299610572489e-06,
"loss": 0.3686,
"step": 1237
},
{
"epoch": 1.65,
"learning_rate": 8.888009637796968e-06,
"loss": 0.3768,
"step": 1238
},
{
"epoch": 1.65,
"learning_rate": 8.873721963803573e-06,
"loss": 0.3278,
"step": 1239
},
{
"epoch": 1.65,
"learning_rate": 8.85943661812875e-06,
"loss": 0.3615,
"step": 1240
},
{
"epoch": 1.65,
"learning_rate": 8.84515363030414e-06,
"loss": 0.3454,
"step": 1241
},
{
"epoch": 1.65,
"learning_rate": 8.830873029856507e-06,
"loss": 0.3781,
"step": 1242
},
{
"epoch": 1.65,
"learning_rate": 8.816594846307682e-06,
"loss": 0.3592,
"step": 1243
},
{
"epoch": 1.66,
"learning_rate": 8.802319109174496e-06,
"loss": 0.3192,
"step": 1244
},
{
"epoch": 1.66,
"learning_rate": 8.788045847968721e-06,
"loss": 0.3609,
"step": 1245
},
{
"epoch": 1.66,
"learning_rate": 8.773775092197018e-06,
"loss": 0.3928,
"step": 1246
},
{
"epoch": 1.66,
"learning_rate": 8.759506871360858e-06,
"loss": 0.3647,
"step": 1247
},
{
"epoch": 1.66,
"learning_rate": 8.745241214956484e-06,
"loss": 0.3296,
"step": 1248
},
{
"epoch": 1.66,
"learning_rate": 8.730978152474831e-06,
"loss": 0.4117,
"step": 1249
},
{
"epoch": 1.66,
"learning_rate": 8.71671771340147e-06,
"loss": 0.4159,
"step": 1250
},
{
"epoch": 1.66,
"learning_rate": 8.702459927216548e-06,
"loss": 0.3371,
"step": 1251
},
{
"epoch": 1.67,
"learning_rate": 8.688204823394736e-06,
"loss": 0.3708,
"step": 1252
},
{
"epoch": 1.67,
"learning_rate": 8.673952431405148e-06,
"loss": 0.4057,
"step": 1253
},
{
"epoch": 1.67,
"learning_rate": 8.659702780711306e-06,
"loss": 0.3903,
"step": 1254
},
{
"epoch": 1.67,
"learning_rate": 8.645455900771054e-06,
"loss": 0.4371,
"step": 1255
},
{
"epoch": 1.67,
"learning_rate": 8.63121182103651e-06,
"loss": 0.3869,
"step": 1256
},
{
"epoch": 1.67,
"learning_rate": 8.616970570954003e-06,
"loss": 0.3889,
"step": 1257
},
{
"epoch": 1.67,
"learning_rate": 8.602732179964017e-06,
"loss": 0.3636,
"step": 1258
},
{
"epoch": 1.68,
"learning_rate": 8.588496677501122e-06,
"loss": 0.3386,
"step": 1259
},
{
"epoch": 1.68,
"learning_rate": 8.57426409299392e-06,
"loss": 0.3794,
"step": 1260
},
{
"epoch": 1.68,
"learning_rate": 8.560034455864974e-06,
"loss": 0.3961,
"step": 1261
},
{
"epoch": 1.68,
"learning_rate": 8.545807795530757e-06,
"loss": 0.3269,
"step": 1262
},
{
"epoch": 1.68,
"learning_rate": 8.531584141401592e-06,
"loss": 0.4132,
"step": 1263
},
{
"epoch": 1.68,
"learning_rate": 8.51736352288158e-06,
"loss": 0.3408,
"step": 1264
},
{
"epoch": 1.68,
"learning_rate": 8.503145969368562e-06,
"loss": 0.4361,
"step": 1265
},
{
"epoch": 1.68,
"learning_rate": 8.488931510254015e-06,
"loss": 0.397,
"step": 1266
},
{
"epoch": 1.69,
"learning_rate": 8.47472017492305e-06,
"loss": 0.398,
"step": 1267
},
{
"epoch": 1.69,
"learning_rate": 8.4605119927543e-06,
"loss": 0.38,
"step": 1268
},
{
"epoch": 1.69,
"learning_rate": 8.446306993119887e-06,
"loss": 0.3775,
"step": 1269
},
{
"epoch": 1.69,
"learning_rate": 8.43210520538535e-06,
"loss": 0.3678,
"step": 1270
},
{
"epoch": 1.69,
"learning_rate": 8.417906658909599e-06,
"loss": 0.3723,
"step": 1271
},
{
"epoch": 1.69,
"learning_rate": 8.403711383044823e-06,
"loss": 0.3722,
"step": 1272
},
{
"epoch": 1.69,
"learning_rate": 8.389519407136471e-06,
"loss": 0.3865,
"step": 1273
},
{
"epoch": 1.7,
"learning_rate": 8.375330760523158e-06,
"loss": 0.3655,
"step": 1274
},
{
"epoch": 1.7,
"learning_rate": 8.361145472536618e-06,
"loss": 0.3242,
"step": 1275
},
{
"epoch": 1.7,
"learning_rate": 8.346963572501651e-06,
"loss": 0.3672,
"step": 1276
},
{
"epoch": 1.7,
"learning_rate": 8.332785089736036e-06,
"loss": 0.412,
"step": 1277
},
{
"epoch": 1.7,
"learning_rate": 8.318610053550498e-06,
"loss": 0.3711,
"step": 1278
},
{
"epoch": 1.7,
"learning_rate": 8.30443849324864e-06,
"loss": 0.378,
"step": 1279
},
{
"epoch": 1.7,
"learning_rate": 8.290270438126878e-06,
"loss": 0.3856,
"step": 1280
},
{
"epoch": 1.7,
"learning_rate": 8.276105917474374e-06,
"loss": 0.337,
"step": 1281
},
{
"epoch": 1.71,
"learning_rate": 8.26194496057299e-06,
"loss": 0.3912,
"step": 1282
},
{
"epoch": 1.71,
"learning_rate": 8.247787596697217e-06,
"loss": 0.3874,
"step": 1283
},
{
"epoch": 1.71,
"learning_rate": 8.233633855114127e-06,
"loss": 0.3742,
"step": 1284
},
{
"epoch": 1.71,
"learning_rate": 8.219483765083294e-06,
"loss": 0.3513,
"step": 1285
},
{
"epoch": 1.71,
"learning_rate": 8.205337355856744e-06,
"loss": 0.3581,
"step": 1286
},
{
"epoch": 1.71,
"learning_rate": 8.191194656678905e-06,
"loss": 0.3398,
"step": 1287
},
{
"epoch": 1.71,
"learning_rate": 8.177055696786516e-06,
"loss": 0.4382,
"step": 1288
},
{
"epoch": 1.72,
"learning_rate": 8.1629205054086e-06,
"loss": 0.4248,
"step": 1289
},
{
"epoch": 1.72,
"learning_rate": 8.148789111766393e-06,
"loss": 0.4283,
"step": 1290
},
{
"epoch": 1.72,
"learning_rate": 8.134661545073266e-06,
"loss": 0.395,
"step": 1291
},
{
"epoch": 1.72,
"learning_rate": 8.120537834534689e-06,
"loss": 0.4059,
"step": 1292
},
{
"epoch": 1.72,
"learning_rate": 8.106418009348157e-06,
"loss": 0.4119,
"step": 1293
},
{
"epoch": 1.72,
"learning_rate": 8.092302098703132e-06,
"loss": 0.3592,
"step": 1294
},
{
"epoch": 1.72,
"learning_rate": 8.078190131780984e-06,
"loss": 0.3495,
"step": 1295
},
{
"epoch": 1.72,
"learning_rate": 8.064082137754932e-06,
"loss": 0.3594,
"step": 1296
},
{
"epoch": 1.73,
"learning_rate": 8.049978145789983e-06,
"loss": 0.3652,
"step": 1297
},
{
"epoch": 1.73,
"learning_rate": 8.035878185042869e-06,
"loss": 0.3264,
"step": 1298
},
{
"epoch": 1.73,
"learning_rate": 8.021782284661984e-06,
"loss": 0.3928,
"step": 1299
},
{
"epoch": 1.73,
"learning_rate": 8.00769047378733e-06,
"loss": 0.363,
"step": 1300
},
{
"epoch": 1.73,
"learning_rate": 7.993602781550466e-06,
"loss": 0.3823,
"step": 1301
},
{
"epoch": 1.73,
"learning_rate": 7.979519237074423e-06,
"loss": 0.3753,
"step": 1302
},
{
"epoch": 1.73,
"learning_rate": 7.965439869473664e-06,
"loss": 0.3907,
"step": 1303
},
{
"epoch": 1.74,
"learning_rate": 7.951364707854012e-06,
"loss": 0.3681,
"step": 1304
},
{
"epoch": 1.74,
"learning_rate": 7.937293781312601e-06,
"loss": 0.3893,
"step": 1305
},
{
"epoch": 1.74,
"learning_rate": 7.923227118937803e-06,
"loss": 0.3393,
"step": 1306
},
{
"epoch": 1.74,
"learning_rate": 7.909164749809186e-06,
"loss": 0.36,
"step": 1307
},
{
"epoch": 1.74,
"learning_rate": 7.895106702997437e-06,
"loss": 0.366,
"step": 1308
},
{
"epoch": 1.74,
"learning_rate": 7.8810530075643e-06,
"loss": 0.3623,
"step": 1309
},
{
"epoch": 1.74,
"learning_rate": 7.867003692562533e-06,
"loss": 0.3657,
"step": 1310
},
{
"epoch": 1.74,
"learning_rate": 7.852958787035838e-06,
"loss": 0.3976,
"step": 1311
},
{
"epoch": 1.75,
"learning_rate": 7.838918320018793e-06,
"loss": 0.4042,
"step": 1312
},
{
"epoch": 1.75,
"learning_rate": 7.824882320536814e-06,
"loss": 0.3648,
"step": 1313
},
{
"epoch": 1.75,
"learning_rate": 7.810850817606072e-06,
"loss": 0.3935,
"step": 1314
},
{
"epoch": 1.75,
"learning_rate": 7.796823840233444e-06,
"loss": 0.3635,
"step": 1315
},
{
"epoch": 1.75,
"learning_rate": 7.782801417416448e-06,
"loss": 0.397,
"step": 1316
},
{
"epoch": 1.75,
"learning_rate": 7.768783578143195e-06,
"loss": 0.4025,
"step": 1317
},
{
"epoch": 1.75,
"learning_rate": 7.754770351392311e-06,
"loss": 0.3549,
"step": 1318
},
{
"epoch": 1.76,
"learning_rate": 7.740761766132902e-06,
"loss": 0.3796,
"step": 1319
},
{
"epoch": 1.76,
"learning_rate": 7.726757851324456e-06,
"loss": 0.3648,
"step": 1320
},
{
"epoch": 1.76,
"learning_rate": 7.712758635916824e-06,
"loss": 0.4098,
"step": 1321
},
{
"epoch": 1.76,
"learning_rate": 7.698764148850138e-06,
"loss": 0.3365,
"step": 1322
},
{
"epoch": 1.76,
"learning_rate": 7.684774419054748e-06,
"loss": 0.4357,
"step": 1323
},
{
"epoch": 1.76,
"learning_rate": 7.670789475451187e-06,
"loss": 0.377,
"step": 1324
},
{
"epoch": 1.76,
"learning_rate": 7.656809346950068e-06,
"loss": 0.4478,
"step": 1325
},
{
"epoch": 1.76,
"learning_rate": 7.64283406245207e-06,
"loss": 0.4096,
"step": 1326
},
{
"epoch": 1.77,
"learning_rate": 7.628863650847851e-06,
"loss": 0.3253,
"step": 1327
},
{
"epoch": 1.77,
"learning_rate": 7.6148981410179966e-06,
"loss": 0.3832,
"step": 1328
},
{
"epoch": 1.77,
"learning_rate": 7.6009375618329575e-06,
"loss": 0.3922,
"step": 1329
},
{
"epoch": 1.77,
"learning_rate": 7.586981942153e-06,
"loss": 0.3995,
"step": 1330
},
{
"epoch": 1.77,
"learning_rate": 7.573031310828118e-06,
"loss": 0.3775,
"step": 1331
},
{
"epoch": 1.77,
"learning_rate": 7.559085696698015e-06,
"loss": 0.3257,
"step": 1332
},
{
"epoch": 1.77,
"learning_rate": 7.545145128592009e-06,
"loss": 0.3899,
"step": 1333
},
{
"epoch": 1.78,
"learning_rate": 7.531209635328993e-06,
"loss": 0.3511,
"step": 1334
},
{
"epoch": 1.78,
"learning_rate": 7.5172792457173674e-06,
"loss": 0.3967,
"step": 1335
},
{
"epoch": 1.78,
"learning_rate": 7.503353988554976e-06,
"loss": 0.3558,
"step": 1336
},
{
"epoch": 1.78,
"learning_rate": 7.489433892629061e-06,
"loss": 0.3956,
"step": 1337
},
{
"epoch": 1.78,
"learning_rate": 7.475518986716193e-06,
"loss": 0.383,
"step": 1338
},
{
"epoch": 1.78,
"learning_rate": 7.461609299582212e-06,
"loss": 0.3532,
"step": 1339
},
{
"epoch": 1.78,
"learning_rate": 7.44770485998217e-06,
"loss": 0.4247,
"step": 1340
},
{
"epoch": 1.78,
"learning_rate": 7.433805696660267e-06,
"loss": 0.336,
"step": 1341
},
{
"epoch": 1.79,
"learning_rate": 7.419911838349798e-06,
"loss": 0.401,
"step": 1342
},
{
"epoch": 1.79,
"learning_rate": 7.406023313773097e-06,
"loss": 0.3901,
"step": 1343
},
{
"epoch": 1.79,
"learning_rate": 7.392140151641465e-06,
"loss": 0.4122,
"step": 1344
},
{
"epoch": 1.79,
"learning_rate": 7.378262380655119e-06,
"loss": 0.3591,
"step": 1345
},
{
"epoch": 1.79,
"learning_rate": 7.3643900295031345e-06,
"loss": 0.3807,
"step": 1346
},
{
"epoch": 1.79,
"learning_rate": 7.350523126863373e-06,
"loss": 0.3915,
"step": 1347
},
{
"epoch": 1.79,
"learning_rate": 7.336661701402439e-06,
"loss": 0.3913,
"step": 1348
},
{
"epoch": 1.8,
"learning_rate": 7.322805781775621e-06,
"loss": 0.4335,
"step": 1349
},
{
"epoch": 1.8,
"learning_rate": 7.308955396626815e-06,
"loss": 0.3294,
"step": 1350
},
{
"epoch": 1.8,
"learning_rate": 7.29511057458848e-06,
"loss": 0.367,
"step": 1351
},
{
"epoch": 1.8,
"learning_rate": 7.281271344281573e-06,
"loss": 0.3957,
"step": 1352
},
{
"epoch": 1.8,
"learning_rate": 7.267437734315493e-06,
"loss": 0.3683,
"step": 1353
},
{
"epoch": 1.8,
"learning_rate": 7.253609773288015e-06,
"loss": 0.3796,
"step": 1354
},
{
"epoch": 1.8,
"learning_rate": 7.239787489785248e-06,
"loss": 0.3557,
"step": 1355
},
{
"epoch": 1.8,
"learning_rate": 7.225970912381557e-06,
"loss": 0.3499,
"step": 1356
},
{
"epoch": 1.81,
"learning_rate": 7.212160069639505e-06,
"loss": 0.4639,
"step": 1357
},
{
"epoch": 1.81,
"learning_rate": 7.198354990109806e-06,
"loss": 0.3478,
"step": 1358
},
{
"epoch": 1.81,
"learning_rate": 7.184555702331264e-06,
"loss": 0.3994,
"step": 1359
},
{
"epoch": 1.81,
"learning_rate": 7.1707622348307e-06,
"loss": 0.3661,
"step": 1360
},
{
"epoch": 1.81,
"learning_rate": 7.1569746161229135e-06,
"loss": 0.3361,
"step": 1361
},
{
"epoch": 1.81,
"learning_rate": 7.143192874710608e-06,
"loss": 0.3789,
"step": 1362
},
{
"epoch": 1.81,
"learning_rate": 7.1294170390843335e-06,
"loss": 0.4008,
"step": 1363
},
{
"epoch": 1.82,
"learning_rate": 7.115647137722435e-06,
"loss": 0.3338,
"step": 1364
},
{
"epoch": 1.82,
"learning_rate": 7.1018831990909886e-06,
"loss": 0.3696,
"step": 1365
},
{
"epoch": 1.82,
"learning_rate": 7.088125251643747e-06,
"loss": 0.3819,
"step": 1366
},
{
"epoch": 1.82,
"learning_rate": 7.074373323822077e-06,
"loss": 0.3673,
"step": 1367
},
{
"epoch": 1.82,
"learning_rate": 7.0606274440548935e-06,
"loss": 0.3443,
"step": 1368
},
{
"epoch": 1.82,
"learning_rate": 7.046887640758617e-06,
"loss": 0.3936,
"step": 1369
},
{
"epoch": 1.82,
"learning_rate": 7.033153942337105e-06,
"loss": 0.3278,
"step": 1370
},
{
"epoch": 1.82,
"learning_rate": 7.019426377181591e-06,
"loss": 0.3112,
"step": 1371
},
{
"epoch": 1.83,
"learning_rate": 7.005704973670631e-06,
"loss": 0.4195,
"step": 1372
},
{
"epoch": 1.83,
"learning_rate": 6.991989760170051e-06,
"loss": 0.3903,
"step": 1373
},
{
"epoch": 1.83,
"learning_rate": 6.978280765032863e-06,
"loss": 0.4118,
"step": 1374
},
{
"epoch": 1.83,
"learning_rate": 6.9645780165992395e-06,
"loss": 0.4142,
"step": 1375
},
{
"epoch": 1.83,
"learning_rate": 6.950881543196435e-06,
"loss": 0.3847,
"step": 1376
},
{
"epoch": 1.83,
"learning_rate": 6.9371913731387256e-06,
"loss": 0.3684,
"step": 1377
},
{
"epoch": 1.83,
"learning_rate": 6.923507534727374e-06,
"loss": 0.4026,
"step": 1378
},
{
"epoch": 1.83,
"learning_rate": 6.909830056250527e-06,
"loss": 0.3864,
"step": 1379
},
{
"epoch": 1.84,
"learning_rate": 6.896158965983208e-06,
"loss": 0.3766,
"step": 1380
},
{
"epoch": 1.84,
"learning_rate": 6.882494292187223e-06,
"loss": 0.3352,
"step": 1381
},
{
"epoch": 1.84,
"learning_rate": 6.868836063111113e-06,
"loss": 0.3506,
"step": 1382
},
{
"epoch": 1.84,
"learning_rate": 6.855184306990106e-06,
"loss": 0.3745,
"step": 1383
},
{
"epoch": 1.84,
"learning_rate": 6.841539052046029e-06,
"loss": 0.3186,
"step": 1384
},
{
"epoch": 1.84,
"learning_rate": 6.827900326487287e-06,
"loss": 0.3694,
"step": 1385
},
{
"epoch": 1.84,
"learning_rate": 6.814268158508783e-06,
"loss": 0.3087,
"step": 1386
},
{
"epoch": 1.85,
"learning_rate": 6.800642576291863e-06,
"loss": 0.3472,
"step": 1387
},
{
"epoch": 1.85,
"learning_rate": 6.787023608004251e-06,
"loss": 0.3875,
"step": 1388
},
{
"epoch": 1.85,
"learning_rate": 6.773411281800014e-06,
"loss": 0.3322,
"step": 1389
},
{
"epoch": 1.85,
"learning_rate": 6.759805625819469e-06,
"loss": 0.3911,
"step": 1390
},
{
"epoch": 1.85,
"learning_rate": 6.746206668189159e-06,
"loss": 0.3677,
"step": 1391
},
{
"epoch": 1.85,
"learning_rate": 6.732614437021774e-06,
"loss": 0.3868,
"step": 1392
},
{
"epoch": 1.85,
"learning_rate": 6.7190289604160986e-06,
"loss": 0.4189,
"step": 1393
},
{
"epoch": 1.85,
"learning_rate": 6.705450266456957e-06,
"loss": 0.426,
"step": 1394
},
{
"epoch": 1.86,
"learning_rate": 6.6918783832151424e-06,
"loss": 0.3593,
"step": 1395
},
{
"epoch": 1.86,
"learning_rate": 6.678313338747376e-06,
"loss": 0.3958,
"step": 1396
},
{
"epoch": 1.86,
"learning_rate": 6.664755161096248e-06,
"loss": 0.3603,
"step": 1397
},
{
"epoch": 1.86,
"learning_rate": 6.651203878290139e-06,
"loss": 0.3907,
"step": 1398
},
{
"epoch": 1.86,
"learning_rate": 6.637659518343187e-06,
"loss": 0.4062,
"step": 1399
},
{
"epoch": 1.86,
"learning_rate": 6.624122109255211e-06,
"loss": 0.4029,
"step": 1400
},
{
"epoch": 1.86,
"learning_rate": 6.610591679011664e-06,
"loss": 0.3885,
"step": 1401
},
{
"epoch": 1.87,
"learning_rate": 6.59706825558357e-06,
"loss": 0.419,
"step": 1402
},
{
"epoch": 1.87,
"learning_rate": 6.583551866927475e-06,
"loss": 0.32,
"step": 1403
},
{
"epoch": 1.87,
"learning_rate": 6.570042540985375e-06,
"loss": 0.3637,
"step": 1404
},
{
"epoch": 1.87,
"learning_rate": 6.55654030568467e-06,
"loss": 0.3825,
"step": 1405
},
{
"epoch": 1.87,
"learning_rate": 6.543045188938093e-06,
"loss": 0.3514,
"step": 1406
},
{
"epoch": 1.87,
"learning_rate": 6.52955721864367e-06,
"loss": 0.3847,
"step": 1407
},
{
"epoch": 1.87,
"learning_rate": 6.516076422684654e-06,
"loss": 0.371,
"step": 1408
},
{
"epoch": 1.87,
"learning_rate": 6.502602828929461e-06,
"loss": 0.3767,
"step": 1409
},
{
"epoch": 1.88,
"learning_rate": 6.4891364652316236e-06,
"loss": 0.3636,
"step": 1410
},
{
"epoch": 1.88,
"learning_rate": 6.4756773594297195e-06,
"loss": 0.3754,
"step": 1411
},
{
"epoch": 1.88,
"learning_rate": 6.462225539347331e-06,
"loss": 0.373,
"step": 1412
},
{
"epoch": 1.88,
"learning_rate": 6.4487810327929726e-06,
"loss": 0.3653,
"step": 1413
},
{
"epoch": 1.88,
"learning_rate": 6.435343867560046e-06,
"loss": 0.3777,
"step": 1414
},
{
"epoch": 1.88,
"learning_rate": 6.421914071426778e-06,
"loss": 0.3633,
"step": 1415
},
{
"epoch": 1.88,
"learning_rate": 6.408491672156147e-06,
"loss": 0.4067,
"step": 1416
},
{
"epoch": 1.89,
"learning_rate": 6.395076697495854e-06,
"loss": 0.3693,
"step": 1417
},
{
"epoch": 1.89,
"learning_rate": 6.381669175178249e-06,
"loss": 0.3732,
"step": 1418
},
{
"epoch": 1.89,
"learning_rate": 6.368269132920271e-06,
"loss": 0.37,
"step": 1419
},
{
"epoch": 1.89,
"learning_rate": 6.354876598423402e-06,
"loss": 0.3789,
"step": 1420
},
{
"epoch": 1.89,
"learning_rate": 6.3414915993736016e-06,
"loss": 0.3993,
"step": 1421
},
{
"epoch": 1.89,
"learning_rate": 6.328114163441244e-06,
"loss": 0.3901,
"step": 1422
},
{
"epoch": 1.89,
"learning_rate": 6.314744318281081e-06,
"loss": 0.4063,
"step": 1423
},
{
"epoch": 1.89,
"learning_rate": 6.301382091532162e-06,
"loss": 0.3555,
"step": 1424
},
{
"epoch": 1.9,
"learning_rate": 6.2880275108177915e-06,
"loss": 0.4178,
"step": 1425
},
{
"epoch": 1.9,
"learning_rate": 6.274680603745476e-06,
"loss": 0.335,
"step": 1426
},
{
"epoch": 1.9,
"learning_rate": 6.261341397906835e-06,
"loss": 0.4094,
"step": 1427
},
{
"epoch": 1.9,
"learning_rate": 6.248009920877591e-06,
"loss": 0.3896,
"step": 1428
},
{
"epoch": 1.9,
"learning_rate": 6.234686200217479e-06,
"loss": 0.3874,
"step": 1429
},
{
"epoch": 1.9,
"learning_rate": 6.221370263470199e-06,
"loss": 0.3543,
"step": 1430
},
{
"epoch": 1.9,
"learning_rate": 6.2080621381633685e-06,
"loss": 0.3259,
"step": 1431
},
{
"epoch": 1.91,
"learning_rate": 6.19476185180844e-06,
"loss": 0.4089,
"step": 1432
},
{
"epoch": 1.91,
"learning_rate": 6.181469431900673e-06,
"loss": 0.3837,
"step": 1433
},
{
"epoch": 1.91,
"learning_rate": 6.168184905919065e-06,
"loss": 0.3719,
"step": 1434
},
{
"epoch": 1.91,
"learning_rate": 6.15490830132629e-06,
"loss": 0.3605,
"step": 1435
},
{
"epoch": 1.91,
"learning_rate": 6.141639645568646e-06,
"loss": 0.3713,
"step": 1436
},
{
"epoch": 1.91,
"learning_rate": 6.128378966076008e-06,
"loss": 0.3414,
"step": 1437
},
{
"epoch": 1.91,
"learning_rate": 6.115126290261746e-06,
"loss": 0.372,
"step": 1438
},
{
"epoch": 1.91,
"learning_rate": 6.101881645522697e-06,
"loss": 0.3859,
"step": 1439
},
{
"epoch": 1.92,
"learning_rate": 6.088645059239091e-06,
"loss": 0.3484,
"step": 1440
},
{
"epoch": 1.92,
"learning_rate": 6.075416558774503e-06,
"loss": 0.3365,
"step": 1441
},
{
"epoch": 1.92,
"learning_rate": 6.062196171475785e-06,
"loss": 0.4046,
"step": 1442
},
{
"epoch": 1.92,
"learning_rate": 6.048983924673022e-06,
"loss": 0.3334,
"step": 1443
},
{
"epoch": 1.92,
"learning_rate": 6.035779845679467e-06,
"loss": 0.3392,
"step": 1444
},
{
"epoch": 1.92,
"learning_rate": 6.022583961791495e-06,
"loss": 0.4184,
"step": 1445
},
{
"epoch": 1.92,
"learning_rate": 6.009396300288533e-06,
"loss": 0.4124,
"step": 1446
},
{
"epoch": 1.93,
"learning_rate": 5.996216888433009e-06,
"loss": 0.3271,
"step": 1447
},
{
"epoch": 1.93,
"learning_rate": 5.983045753470308e-06,
"loss": 0.4033,
"step": 1448
},
{
"epoch": 1.93,
"learning_rate": 5.969882922628685e-06,
"loss": 0.417,
"step": 1449
},
{
"epoch": 1.93,
"learning_rate": 5.956728423119248e-06,
"loss": 0.3805,
"step": 1450
},
{
"epoch": 1.93,
"learning_rate": 5.943582282135869e-06,
"loss": 0.3646,
"step": 1451
},
{
"epoch": 1.93,
"learning_rate": 5.930444526855148e-06,
"loss": 0.4043,
"step": 1452
},
{
"epoch": 1.93,
"learning_rate": 5.917315184436345e-06,
"loss": 0.368,
"step": 1453
},
{
"epoch": 1.93,
"learning_rate": 5.904194282021329e-06,
"loss": 0.3831,
"step": 1454
},
{
"epoch": 1.94,
"learning_rate": 5.891081846734519e-06,
"loss": 0.382,
"step": 1455
},
{
"epoch": 1.94,
"learning_rate": 5.877977905682839e-06,
"loss": 0.3967,
"step": 1456
},
{
"epoch": 1.94,
"learning_rate": 5.864882485955645e-06,
"loss": 0.3409,
"step": 1457
},
{
"epoch": 1.94,
"learning_rate": 5.8517956146246826e-06,
"loss": 0.3854,
"step": 1458
},
{
"epoch": 1.94,
"learning_rate": 5.8387173187440185e-06,
"loss": 0.4403,
"step": 1459
},
{
"epoch": 1.94,
"learning_rate": 5.825647625349995e-06,
"loss": 0.3796,
"step": 1460
},
{
"epoch": 1.94,
"learning_rate": 5.812586561461173e-06,
"loss": 0.3223,
"step": 1461
},
{
"epoch": 1.95,
"learning_rate": 5.799534154078273e-06,
"loss": 0.384,
"step": 1462
},
{
"epoch": 1.95,
"learning_rate": 5.786490430184115e-06,
"loss": 0.3572,
"step": 1463
},
{
"epoch": 1.95,
"learning_rate": 5.773455416743583e-06,
"loss": 0.3786,
"step": 1464
},
{
"epoch": 1.95,
"learning_rate": 5.760429140703534e-06,
"loss": 0.4211,
"step": 1465
},
{
"epoch": 1.95,
"learning_rate": 5.747411628992774e-06,
"loss": 0.3475,
"step": 1466
},
{
"epoch": 1.95,
"learning_rate": 5.73440290852199e-06,
"loss": 0.3549,
"step": 1467
},
{
"epoch": 1.95,
"learning_rate": 5.72140300618369e-06,
"loss": 0.3719,
"step": 1468
},
{
"epoch": 1.95,
"learning_rate": 5.708411948852167e-06,
"loss": 0.4056,
"step": 1469
},
{
"epoch": 1.96,
"learning_rate": 5.695429763383408e-06,
"loss": 0.3784,
"step": 1470
},
{
"epoch": 1.96,
"learning_rate": 5.6824564766150724e-06,
"loss": 0.3892,
"step": 1471
},
{
"epoch": 1.96,
"learning_rate": 5.669492115366421e-06,
"loss": 0.3995,
"step": 1472
},
{
"epoch": 1.96,
"learning_rate": 5.656536706438267e-06,
"loss": 0.3431,
"step": 1473
},
{
"epoch": 1.96,
"learning_rate": 5.643590276612909e-06,
"loss": 0.3634,
"step": 1474
},
{
"epoch": 1.96,
"learning_rate": 5.6306528526540905e-06,
"loss": 0.403,
"step": 1475
},
{
"epoch": 1.96,
"learning_rate": 5.61772446130693e-06,
"loss": 0.4416,
"step": 1476
},
{
"epoch": 1.97,
"learning_rate": 5.604805129297883e-06,
"loss": 0.3262,
"step": 1477
},
{
"epoch": 1.97,
"learning_rate": 5.591894883334668e-06,
"loss": 0.3851,
"step": 1478
},
{
"epoch": 1.97,
"learning_rate": 5.578993750106224e-06,
"loss": 0.2999,
"step": 1479
},
{
"epoch": 1.97,
"learning_rate": 5.566101756282652e-06,
"loss": 0.3492,
"step": 1480
},
{
"epoch": 1.97,
"learning_rate": 5.553218928515159e-06,
"loss": 0.3234,
"step": 1481
},
{
"epoch": 1.97,
"learning_rate": 5.540345293436003e-06,
"loss": 0.4179,
"step": 1482
},
{
"epoch": 1.97,
"learning_rate": 5.527480877658437e-06,
"loss": 0.3734,
"step": 1483
},
{
"epoch": 1.97,
"learning_rate": 5.514625707776656e-06,
"loss": 0.3784,
"step": 1484
},
{
"epoch": 1.98,
"learning_rate": 5.5017798103657436e-06,
"loss": 0.3777,
"step": 1485
},
{
"epoch": 1.98,
"learning_rate": 5.488943211981612e-06,
"loss": 0.357,
"step": 1486
},
{
"epoch": 1.98,
"learning_rate": 5.476115939160951e-06,
"loss": 0.349,
"step": 1487
},
{
"epoch": 1.98,
"learning_rate": 5.463298018421171e-06,
"loss": 0.3378,
"step": 1488
},
{
"epoch": 1.98,
"learning_rate": 5.45048947626035e-06,
"loss": 0.3722,
"step": 1489
},
{
"epoch": 1.98,
"learning_rate": 5.437690339157178e-06,
"loss": 0.4409,
"step": 1490
},
{
"epoch": 1.98,
"learning_rate": 5.424900633570901e-06,
"loss": 0.3555,
"step": 1491
},
{
"epoch": 1.99,
"learning_rate": 5.412120385941269e-06,
"loss": 0.3247,
"step": 1492
},
{
"epoch": 1.99,
"learning_rate": 5.399349622688479e-06,
"loss": 0.4104,
"step": 1493
},
{
"epoch": 1.99,
"learning_rate": 5.386588370213124e-06,
"loss": 0.3717,
"step": 1494
},
{
"epoch": 1.99,
"learning_rate": 5.373836654896128e-06,
"loss": 0.4084,
"step": 1495
},
{
"epoch": 1.99,
"learning_rate": 5.361094503098709e-06,
"loss": 0.3636,
"step": 1496
},
{
"epoch": 1.99,
"learning_rate": 5.348361941162309e-06,
"loss": 0.3702,
"step": 1497
},
{
"epoch": 1.99,
"learning_rate": 5.3356389954085455e-06,
"loss": 0.3951,
"step": 1498
},
{
"epoch": 1.99,
"learning_rate": 5.322925692139157e-06,
"loss": 0.3752,
"step": 1499
},
{
"epoch": 2.0,
"learning_rate": 5.31022205763595e-06,
"loss": 0.3483,
"step": 1500
},
{
"epoch": 2.0,
"learning_rate": 5.297528118160742e-06,
"loss": 0.3862,
"step": 1501
},
{
"epoch": 2.0,
"learning_rate": 5.2848438999553065e-06,
"loss": 0.3454,
"step": 1502
},
{
"epoch": 2.0,
"learning_rate": 5.272169429241325e-06,
"loss": 0.3723,
"step": 1503
},
{
"epoch": 2.0,
"learning_rate": 5.259504732220322e-06,
"loss": 0.2757,
"step": 1504
},
{
"epoch": 2.0,
"learning_rate": 5.246849835073624e-06,
"loss": 0.3221,
"step": 1505
},
{
"epoch": 2.0,
"learning_rate": 5.234204763962298e-06,
"loss": 0.2749,
"step": 1506
},
{
"epoch": 2.01,
"learning_rate": 5.221569545027084e-06,
"loss": 0.3068,
"step": 1507
},
{
"epoch": 2.01,
"learning_rate": 5.208944204388377e-06,
"loss": 0.3199,
"step": 1508
},
{
"epoch": 2.01,
"learning_rate": 5.196328768146137e-06,
"loss": 0.2917,
"step": 1509
},
{
"epoch": 2.01,
"learning_rate": 5.183723262379851e-06,
"loss": 0.2954,
"step": 1510
},
{
"epoch": 2.01,
"learning_rate": 5.1711277131484785e-06,
"loss": 0.2894,
"step": 1511
},
{
"epoch": 2.01,
"learning_rate": 5.1585421464904e-06,
"loss": 0.2786,
"step": 1512
},
{
"epoch": 2.01,
"learning_rate": 5.145966588423341e-06,
"loss": 0.2828,
"step": 1513
},
{
"epoch": 2.01,
"learning_rate": 5.133401064944367e-06,
"loss": 0.2701,
"step": 1514
},
{
"epoch": 2.02,
"learning_rate": 5.120845602029776e-06,
"loss": 0.2794,
"step": 1515
},
{
"epoch": 2.02,
"learning_rate": 5.108300225635074e-06,
"loss": 0.3123,
"step": 1516
},
{
"epoch": 2.02,
"learning_rate": 5.095764961694923e-06,
"loss": 0.2538,
"step": 1517
},
{
"epoch": 2.02,
"learning_rate": 5.0832398361230595e-06,
"loss": 0.3285,
"step": 1518
},
{
"epoch": 2.02,
"learning_rate": 5.070724874812289e-06,
"loss": 0.3041,
"step": 1519
},
{
"epoch": 2.02,
"learning_rate": 5.058220103634386e-06,
"loss": 0.3184,
"step": 1520
},
{
"epoch": 2.02,
"learning_rate": 5.045725548440064e-06,
"loss": 0.3181,
"step": 1521
},
{
"epoch": 2.03,
"learning_rate": 5.033241235058919e-06,
"loss": 0.2967,
"step": 1522
},
{
"epoch": 2.03,
"learning_rate": 5.020767189299369e-06,
"loss": 0.2698,
"step": 1523
},
{
"epoch": 2.03,
"learning_rate": 5.008303436948609e-06,
"loss": 0.2555,
"step": 1524
},
{
"epoch": 2.03,
"learning_rate": 4.995850003772563e-06,
"loss": 0.2487,
"step": 1525
},
{
"epoch": 2.03,
"learning_rate": 4.983406915515812e-06,
"loss": 0.27,
"step": 1526
},
{
"epoch": 2.03,
"learning_rate": 4.970974197901552e-06,
"loss": 0.3033,
"step": 1527
},
{
"epoch": 2.03,
"learning_rate": 4.95855187663155e-06,
"loss": 0.291,
"step": 1528
},
{
"epoch": 2.03,
"learning_rate": 4.9461399773860645e-06,
"loss": 0.2856,
"step": 1529
},
{
"epoch": 2.04,
"learning_rate": 4.933738525823817e-06,
"loss": 0.2517,
"step": 1530
},
{
"epoch": 2.04,
"learning_rate": 4.921347547581939e-06,
"loss": 0.2981,
"step": 1531
},
{
"epoch": 2.04,
"learning_rate": 4.908967068275903e-06,
"loss": 0.2659,
"step": 1532
},
{
"epoch": 2.04,
"learning_rate": 4.896597113499479e-06,
"loss": 0.3157,
"step": 1533
},
{
"epoch": 2.04,
"learning_rate": 4.884237708824673e-06,
"loss": 0.3258,
"step": 1534
},
{
"epoch": 2.04,
"learning_rate": 4.871888879801685e-06,
"loss": 0.2822,
"step": 1535
},
{
"epoch": 2.04,
"learning_rate": 4.859550651958864e-06,
"loss": 0.2877,
"step": 1536
},
{
"epoch": 2.05,
"learning_rate": 4.847223050802631e-06,
"loss": 0.2556,
"step": 1537
},
{
"epoch": 2.05,
"learning_rate": 4.8349061018174385e-06,
"loss": 0.2871,
"step": 1538
},
{
"epoch": 2.05,
"learning_rate": 4.822599830465727e-06,
"loss": 0.2662,
"step": 1539
},
{
"epoch": 2.05,
"learning_rate": 4.8103042621878515e-06,
"loss": 0.2883,
"step": 1540
},
{
"epoch": 2.05,
"learning_rate": 4.798019422402046e-06,
"loss": 0.2879,
"step": 1541
},
{
"epoch": 2.05,
"learning_rate": 4.7857453365043724e-06,
"loss": 0.303,
"step": 1542
},
{
"epoch": 2.05,
"learning_rate": 4.773482029868657e-06,
"loss": 0.304,
"step": 1543
},
{
"epoch": 2.05,
"learning_rate": 4.76122952784644e-06,
"loss": 0.2793,
"step": 1544
},
{
"epoch": 2.06,
"learning_rate": 4.748987855766924e-06,
"loss": 0.2673,
"step": 1545
},
{
"epoch": 2.06,
"learning_rate": 4.736757038936928e-06,
"loss": 0.2712,
"step": 1546
},
{
"epoch": 2.06,
"learning_rate": 4.724537102640825e-06,
"loss": 0.261,
"step": 1547
},
{
"epoch": 2.06,
"learning_rate": 4.712328072140505e-06,
"loss": 0.3192,
"step": 1548
},
{
"epoch": 2.06,
"learning_rate": 4.700129972675309e-06,
"loss": 0.286,
"step": 1549
},
{
"epoch": 2.06,
"learning_rate": 4.687942829461969e-06,
"loss": 0.2395,
"step": 1550
},
{
"epoch": 2.06,
"learning_rate": 4.67576666769458e-06,
"loss": 0.2547,
"step": 1551
},
{
"epoch": 2.07,
"learning_rate": 4.663601512544532e-06,
"loss": 0.2602,
"step": 1552
},
{
"epoch": 2.07,
"learning_rate": 4.6514473891604584e-06,
"loss": 0.3271,
"step": 1553
},
{
"epoch": 2.07,
"learning_rate": 4.639304322668198e-06,
"loss": 0.3037,
"step": 1554
},
{
"epoch": 2.07,
"learning_rate": 4.627172338170721e-06,
"loss": 0.2511,
"step": 1555
},
{
"epoch": 2.07,
"learning_rate": 4.615051460748086e-06,
"loss": 0.2825,
"step": 1556
},
{
"epoch": 2.07,
"learning_rate": 4.602941715457397e-06,
"loss": 0.2969,
"step": 1557
},
{
"epoch": 2.07,
"learning_rate": 4.590843127332744e-06,
"loss": 0.2673,
"step": 1558
},
{
"epoch": 2.07,
"learning_rate": 4.578755721385153e-06,
"loss": 0.3132,
"step": 1559
},
{
"epoch": 2.08,
"learning_rate": 4.566679522602536e-06,
"loss": 0.2702,
"step": 1560
},
{
"epoch": 2.08,
"learning_rate": 4.554614555949625e-06,
"loss": 0.309,
"step": 1561
},
{
"epoch": 2.08,
"learning_rate": 4.54256084636794e-06,
"loss": 0.2695,
"step": 1562
},
{
"epoch": 2.08,
"learning_rate": 4.530518418775734e-06,
"loss": 0.2886,
"step": 1563
},
{
"epoch": 2.08,
"learning_rate": 4.518487298067925e-06,
"loss": 0.2857,
"step": 1564
},
{
"epoch": 2.08,
"learning_rate": 4.506467509116078e-06,
"loss": 0.265,
"step": 1565
},
{
"epoch": 2.08,
"learning_rate": 4.4944590767683064e-06,
"loss": 0.2748,
"step": 1566
},
{
"epoch": 2.09,
"learning_rate": 4.482462025849262e-06,
"loss": 0.3021,
"step": 1567
},
{
"epoch": 2.09,
"learning_rate": 4.470476381160065e-06,
"loss": 0.2591,
"step": 1568
},
{
"epoch": 2.09,
"learning_rate": 4.458502167478254e-06,
"loss": 0.2534,
"step": 1569
},
{
"epoch": 2.09,
"learning_rate": 4.446539409557736e-06,
"loss": 0.3279,
"step": 1570
},
{
"epoch": 2.09,
"learning_rate": 4.434588132128749e-06,
"loss": 0.3502,
"step": 1571
},
{
"epoch": 2.09,
"learning_rate": 4.422648359897776e-06,
"loss": 0.3065,
"step": 1572
},
{
"epoch": 2.09,
"learning_rate": 4.410720117547528e-06,
"loss": 0.3202,
"step": 1573
},
{
"epoch": 2.09,
"learning_rate": 4.3988034297368795e-06,
"loss": 0.2672,
"step": 1574
},
{
"epoch": 2.1,
"learning_rate": 4.386898321100818e-06,
"loss": 0.2682,
"step": 1575
},
{
"epoch": 2.1,
"learning_rate": 4.3750048162504e-06,
"loss": 0.3013,
"step": 1576
},
{
"epoch": 2.1,
"learning_rate": 4.3631229397726805e-06,
"loss": 0.3204,
"step": 1577
},
{
"epoch": 2.1,
"learning_rate": 4.351252716230685e-06,
"loss": 0.2938,
"step": 1578
},
{
"epoch": 2.1,
"learning_rate": 4.339394170163347e-06,
"loss": 0.277,
"step": 1579
},
{
"epoch": 2.1,
"learning_rate": 4.327547326085459e-06,
"loss": 0.2645,
"step": 1580
},
{
"epoch": 2.1,
"learning_rate": 4.315712208487626e-06,
"loss": 0.2595,
"step": 1581
},
{
"epoch": 2.11,
"learning_rate": 4.3038888418362045e-06,
"loss": 0.2986,
"step": 1582
},
{
"epoch": 2.11,
"learning_rate": 4.292077250573265e-06,
"loss": 0.2658,
"step": 1583
},
{
"epoch": 2.11,
"learning_rate": 4.280277459116532e-06,
"loss": 0.2544,
"step": 1584
},
{
"epoch": 2.11,
"learning_rate": 4.268489491859335e-06,
"loss": 0.2755,
"step": 1585
},
{
"epoch": 2.11,
"learning_rate": 4.256713373170565e-06,
"loss": 0.2834,
"step": 1586
},
{
"epoch": 2.11,
"learning_rate": 4.244949127394613e-06,
"loss": 0.2922,
"step": 1587
},
{
"epoch": 2.11,
"learning_rate": 4.2331967788513295e-06,
"loss": 0.3073,
"step": 1588
},
{
"epoch": 2.11,
"learning_rate": 4.22145635183597e-06,
"loss": 0.2888,
"step": 1589
},
{
"epoch": 2.12,
"learning_rate": 4.209727870619141e-06,
"loss": 0.3523,
"step": 1590
},
{
"epoch": 2.12,
"learning_rate": 4.198011359446759e-06,
"loss": 0.2993,
"step": 1591
},
{
"epoch": 2.12,
"learning_rate": 4.186306842539991e-06,
"loss": 0.3119,
"step": 1592
},
{
"epoch": 2.12,
"learning_rate": 4.1746143440952135e-06,
"loss": 0.297,
"step": 1593
},
{
"epoch": 2.12,
"learning_rate": 4.162933888283952e-06,
"loss": 0.2553,
"step": 1594
},
{
"epoch": 2.12,
"learning_rate": 4.151265499252841e-06,
"loss": 0.2403,
"step": 1595
},
{
"epoch": 2.12,
"learning_rate": 4.139609201123568e-06,
"loss": 0.2411,
"step": 1596
},
{
"epoch": 2.13,
"learning_rate": 4.127965017992823e-06,
"loss": 0.2568,
"step": 1597
},
{
"epoch": 2.13,
"learning_rate": 4.116332973932256e-06,
"loss": 0.2933,
"step": 1598
},
{
"epoch": 2.13,
"learning_rate": 4.104713092988421e-06,
"loss": 0.2436,
"step": 1599
},
{
"epoch": 2.13,
"learning_rate": 4.093105399182723e-06,
"loss": 0.2686,
"step": 1600
},
{
"epoch": 2.13,
"learning_rate": 4.081509916511378e-06,
"loss": 0.3192,
"step": 1601
},
{
"epoch": 2.13,
"learning_rate": 4.069926668945357e-06,
"loss": 0.2468,
"step": 1602
},
{
"epoch": 2.13,
"learning_rate": 4.058355680430337e-06,
"loss": 0.311,
"step": 1603
},
{
"epoch": 2.13,
"learning_rate": 4.0467969748866506e-06,
"loss": 0.2698,
"step": 1604
},
{
"epoch": 2.14,
"learning_rate": 4.035250576209244e-06,
"loss": 0.2707,
"step": 1605
},
{
"epoch": 2.14,
"learning_rate": 4.023716508267615e-06,
"loss": 0.2912,
"step": 1606
},
{
"epoch": 2.14,
"learning_rate": 4.0121947949057745e-06,
"loss": 0.2814,
"step": 1607
},
{
"epoch": 2.14,
"learning_rate": 4.000685459942193e-06,
"loss": 0.2864,
"step": 1608
},
{
"epoch": 2.14,
"learning_rate": 3.989188527169749e-06,
"loss": 0.2819,
"step": 1609
},
{
"epoch": 2.14,
"learning_rate": 3.977704020355686e-06,
"loss": 0.2714,
"step": 1610
},
{
"epoch": 2.14,
"learning_rate": 3.966231963241555e-06,
"loss": 0.2988,
"step": 1611
},
{
"epoch": 2.15,
"learning_rate": 3.954772379543177e-06,
"loss": 0.308,
"step": 1612
},
{
"epoch": 2.15,
"learning_rate": 3.943325292950579e-06,
"loss": 0.2696,
"step": 1613
},
{
"epoch": 2.15,
"learning_rate": 3.931890727127959e-06,
"loss": 0.304,
"step": 1614
},
{
"epoch": 2.15,
"learning_rate": 3.920468705713629e-06,
"loss": 0.2708,
"step": 1615
},
{
"epoch": 2.15,
"learning_rate": 3.909059252319969e-06,
"loss": 0.2919,
"step": 1616
},
{
"epoch": 2.15,
"learning_rate": 3.897662390533377e-06,
"loss": 0.3162,
"step": 1617
},
{
"epoch": 2.15,
"learning_rate": 3.886278143914219e-06,
"loss": 0.3069,
"step": 1618
},
{
"epoch": 2.15,
"learning_rate": 3.874906535996789e-06,
"loss": 0.2871,
"step": 1619
},
{
"epoch": 2.16,
"learning_rate": 3.863547590289243e-06,
"loss": 0.2698,
"step": 1620
},
{
"epoch": 2.16,
"learning_rate": 3.852201330273569e-06,
"loss": 0.275,
"step": 1621
},
{
"epoch": 2.16,
"learning_rate": 3.840867779405528e-06,
"loss": 0.2522,
"step": 1622
},
{
"epoch": 2.16,
"learning_rate": 3.829546961114608e-06,
"loss": 0.3177,
"step": 1623
},
{
"epoch": 2.16,
"learning_rate": 3.818238898803975e-06,
"loss": 0.2847,
"step": 1624
},
{
"epoch": 2.16,
"learning_rate": 3.806943615850417e-06,
"loss": 0.2798,
"step": 1625
},
{
"epoch": 2.16,
"learning_rate": 3.7956611356043196e-06,
"loss": 0.2904,
"step": 1626
},
{
"epoch": 2.17,
"learning_rate": 3.784391481389592e-06,
"loss": 0.3192,
"step": 1627
},
{
"epoch": 2.17,
"learning_rate": 3.773134676503629e-06,
"loss": 0.313,
"step": 1628
},
{
"epoch": 2.17,
"learning_rate": 3.7618907442172646e-06,
"loss": 0.2804,
"step": 1629
},
{
"epoch": 2.17,
"learning_rate": 3.750659707774723e-06,
"loss": 0.2757,
"step": 1630
},
{
"epoch": 2.17,
"learning_rate": 3.7394415903935557e-06,
"loss": 0.2502,
"step": 1631
},
{
"epoch": 2.17,
"learning_rate": 3.72823641526463e-06,
"loss": 0.2803,
"step": 1632
},
{
"epoch": 2.17,
"learning_rate": 3.7170442055520418e-06,
"loss": 0.2719,
"step": 1633
},
{
"epoch": 2.17,
"learning_rate": 3.705864984393088e-06,
"loss": 0.2822,
"step": 1634
},
{
"epoch": 2.18,
"learning_rate": 3.69469877489822e-06,
"loss": 0.2217,
"step": 1635
},
{
"epoch": 2.18,
"learning_rate": 3.6835456001509775e-06,
"loss": 0.2997,
"step": 1636
},
{
"epoch": 2.18,
"learning_rate": 3.6724054832079625e-06,
"loss": 0.3332,
"step": 1637
},
{
"epoch": 2.18,
"learning_rate": 3.6612784470987893e-06,
"loss": 0.2533,
"step": 1638
},
{
"epoch": 2.18,
"learning_rate": 3.650164514826021e-06,
"loss": 0.2877,
"step": 1639
},
{
"epoch": 2.18,
"learning_rate": 3.6390637093651373e-06,
"loss": 0.2474,
"step": 1640
},
{
"epoch": 2.18,
"learning_rate": 3.6279760536644716e-06,
"loss": 0.2916,
"step": 1641
},
{
"epoch": 2.18,
"learning_rate": 3.6169015706451804e-06,
"loss": 0.3209,
"step": 1642
},
{
"epoch": 2.19,
"learning_rate": 3.6058402832011953e-06,
"loss": 0.323,
"step": 1643
},
{
"epoch": 2.19,
"learning_rate": 3.5947922141991576e-06,
"loss": 0.2923,
"step": 1644
},
{
"epoch": 2.19,
"learning_rate": 3.583757386478389e-06,
"loss": 0.2694,
"step": 1645
},
{
"epoch": 2.19,
"learning_rate": 3.572735822850838e-06,
"loss": 0.2599,
"step": 1646
},
{
"epoch": 2.19,
"learning_rate": 3.561727546101024e-06,
"loss": 0.2933,
"step": 1647
},
{
"epoch": 2.19,
"learning_rate": 3.550732578986006e-06,
"loss": 0.2948,
"step": 1648
},
{
"epoch": 2.19,
"learning_rate": 3.539750944235335e-06,
"loss": 0.2704,
"step": 1649
},
{
"epoch": 2.2,
"learning_rate": 3.5287826645509892e-06,
"loss": 0.2442,
"step": 1650
},
{
"epoch": 2.2,
"learning_rate": 3.5178277626073465e-06,
"loss": 0.281,
"step": 1651
},
{
"epoch": 2.2,
"learning_rate": 3.506886261051119e-06,
"loss": 0.3236,
"step": 1652
},
{
"epoch": 2.2,
"learning_rate": 3.4959581825013256e-06,
"loss": 0.277,
"step": 1653
},
{
"epoch": 2.2,
"learning_rate": 3.4850435495492305e-06,
"loss": 0.2643,
"step": 1654
},
{
"epoch": 2.2,
"learning_rate": 3.4741423847583134e-06,
"loss": 0.2905,
"step": 1655
},
{
"epoch": 2.2,
"learning_rate": 3.4632547106642023e-06,
"loss": 0.2982,
"step": 1656
},
{
"epoch": 2.2,
"learning_rate": 3.452380549774631e-06,
"loss": 0.3042,
"step": 1657
},
{
"epoch": 2.21,
"learning_rate": 3.4415199245694084e-06,
"loss": 0.2886,
"step": 1658
},
{
"epoch": 2.21,
"learning_rate": 3.4306728575003556e-06,
"loss": 0.2892,
"step": 1659
},
{
"epoch": 2.21,
"learning_rate": 3.4198393709912714e-06,
"loss": 0.3049,
"step": 1660
},
{
"epoch": 2.21,
"learning_rate": 3.4090194874378758e-06,
"loss": 0.3113,
"step": 1661
},
{
"epoch": 2.21,
"learning_rate": 3.3982132292077695e-06,
"loss": 0.2593,
"step": 1662
},
{
"epoch": 2.21,
"learning_rate": 3.387420618640379e-06,
"loss": 0.2622,
"step": 1663
},
{
"epoch": 2.21,
"learning_rate": 3.376641678046926e-06,
"loss": 0.2365,
"step": 1664
},
{
"epoch": 2.22,
"learning_rate": 3.3658764297103665e-06,
"loss": 0.2725,
"step": 1665
},
{
"epoch": 2.22,
"learning_rate": 3.35512489588536e-06,
"loss": 0.2857,
"step": 1666
},
{
"epoch": 2.22,
"learning_rate": 3.3443870987982097e-06,
"loss": 0.2917,
"step": 1667
},
{
"epoch": 2.22,
"learning_rate": 3.3336630606468136e-06,
"loss": 0.2748,
"step": 1668
},
{
"epoch": 2.22,
"learning_rate": 3.3229528036006355e-06,
"loss": 0.3054,
"step": 1669
},
{
"epoch": 2.22,
"learning_rate": 3.3122563498006477e-06,
"loss": 0.2857,
"step": 1670
},
{
"epoch": 2.22,
"learning_rate": 3.301573721359285e-06,
"loss": 0.2768,
"step": 1671
},
{
"epoch": 2.22,
"learning_rate": 3.290904940360411e-06,
"loss": 0.2679,
"step": 1672
},
{
"epoch": 2.23,
"learning_rate": 3.2802500288592477e-06,
"loss": 0.2753,
"step": 1673
},
{
"epoch": 2.23,
"learning_rate": 3.2696090088823564e-06,
"loss": 0.3112,
"step": 1674
},
{
"epoch": 2.23,
"learning_rate": 3.258981902427575e-06,
"loss": 0.3057,
"step": 1675
},
{
"epoch": 2.23,
"learning_rate": 3.2483687314639846e-06,
"loss": 0.2615,
"step": 1676
},
{
"epoch": 2.23,
"learning_rate": 3.237769517931848e-06,
"loss": 0.2851,
"step": 1677
},
{
"epoch": 2.23,
"learning_rate": 3.2271842837425917e-06,
"loss": 0.2582,
"step": 1678
},
{
"epoch": 2.23,
"learning_rate": 3.216613050778721e-06,
"loss": 0.2726,
"step": 1679
},
{
"epoch": 2.24,
"learning_rate": 3.206055840893811e-06,
"loss": 0.303,
"step": 1680
},
{
"epoch": 2.24,
"learning_rate": 3.195512675912447e-06,
"loss": 0.3676,
"step": 1681
},
{
"epoch": 2.24,
"learning_rate": 3.184983577630171e-06,
"loss": 0.3246,
"step": 1682
},
{
"epoch": 2.24,
"learning_rate": 3.174468567813461e-06,
"loss": 0.2546,
"step": 1683
},
{
"epoch": 2.24,
"learning_rate": 3.1639676681996513e-06,
"loss": 0.2647,
"step": 1684
},
{
"epoch": 2.24,
"learning_rate": 3.153480900496919e-06,
"loss": 0.2786,
"step": 1685
},
{
"epoch": 2.24,
"learning_rate": 3.1430082863842247e-06,
"loss": 0.2762,
"step": 1686
},
{
"epoch": 2.24,
"learning_rate": 3.132549847511268e-06,
"loss": 0.2656,
"step": 1687
},
{
"epoch": 2.25,
"learning_rate": 3.122105605498442e-06,
"loss": 0.268,
"step": 1688
},
{
"epoch": 2.25,
"learning_rate": 3.1116755819368037e-06,
"loss": 0.2496,
"step": 1689
},
{
"epoch": 2.25,
"learning_rate": 3.101259798387999e-06,
"loss": 0.312,
"step": 1690
},
{
"epoch": 2.25,
"learning_rate": 3.0908582763842444e-06,
"loss": 0.3181,
"step": 1691
},
{
"epoch": 2.25,
"learning_rate": 3.0804710374282775e-06,
"loss": 0.2936,
"step": 1692
},
{
"epoch": 2.25,
"learning_rate": 3.0700981029933017e-06,
"loss": 0.277,
"step": 1693
},
{
"epoch": 2.25,
"learning_rate": 3.0597394945229565e-06,
"loss": 0.3224,
"step": 1694
},
{
"epoch": 2.26,
"learning_rate": 3.049395233431259e-06,
"loss": 0.2781,
"step": 1695
},
{
"epoch": 2.26,
"learning_rate": 3.0390653411025704e-06,
"loss": 0.3009,
"step": 1696
},
{
"epoch": 2.26,
"learning_rate": 3.028749838891547e-06,
"loss": 0.2604,
"step": 1697
},
{
"epoch": 2.26,
"learning_rate": 3.018448748123097e-06,
"loss": 0.2925,
"step": 1698
},
{
"epoch": 2.26,
"learning_rate": 3.008162090092335e-06,
"loss": 0.2235,
"step": 1699
},
{
"epoch": 2.26,
"learning_rate": 2.997889886064541e-06,
"loss": 0.314,
"step": 1700
},
{
"epoch": 2.26,
"learning_rate": 2.9876321572751143e-06,
"loss": 0.3214,
"step": 1701
},
{
"epoch": 2.26,
"learning_rate": 2.9773889249295294e-06,
"loss": 0.2915,
"step": 1702
},
{
"epoch": 2.27,
"learning_rate": 2.9671602102032926e-06,
"loss": 0.2613,
"step": 1703
},
{
"epoch": 2.27,
"learning_rate": 2.9569460342418986e-06,
"loss": 0.2976,
"step": 1704
},
{
"epoch": 2.27,
"learning_rate": 2.9467464181607873e-06,
"loss": 0.2736,
"step": 1705
},
{
"epoch": 2.27,
"learning_rate": 2.9365613830453e-06,
"loss": 0.3119,
"step": 1706
},
{
"epoch": 2.27,
"learning_rate": 2.926390949950633e-06,
"loss": 0.3096,
"step": 1707
},
{
"epoch": 2.27,
"learning_rate": 2.9162351399017964e-06,
"loss": 0.2989,
"step": 1708
},
{
"epoch": 2.27,
"learning_rate": 2.9060939738935724e-06,
"loss": 0.3273,
"step": 1709
},
{
"epoch": 2.28,
"learning_rate": 2.895967472890471e-06,
"loss": 0.2744,
"step": 1710
},
{
"epoch": 2.28,
"learning_rate": 2.8858556578266814e-06,
"loss": 0.3116,
"step": 1711
},
{
"epoch": 2.28,
"learning_rate": 2.8757585496060368e-06,
"loss": 0.293,
"step": 1712
},
{
"epoch": 2.28,
"learning_rate": 2.8656761691019673e-06,
"loss": 0.2583,
"step": 1713
},
{
"epoch": 2.28,
"learning_rate": 2.855608537157454e-06,
"loss": 0.3229,
"step": 1714
},
{
"epoch": 2.28,
"learning_rate": 2.845555674584991e-06,
"loss": 0.2773,
"step": 1715
},
{
"epoch": 2.28,
"learning_rate": 2.8355176021665397e-06,
"loss": 0.2946,
"step": 1716
},
{
"epoch": 2.28,
"learning_rate": 2.8254943406534864e-06,
"loss": 0.2622,
"step": 1717
},
{
"epoch": 2.29,
"learning_rate": 2.815485910766599e-06,
"loss": 0.273,
"step": 1718
},
{
"epoch": 2.29,
"learning_rate": 2.805492333195985e-06,
"loss": 0.2846,
"step": 1719
},
{
"epoch": 2.29,
"learning_rate": 2.7955136286010474e-06,
"loss": 0.2458,
"step": 1720
},
{
"epoch": 2.29,
"learning_rate": 2.7855498176104435e-06,
"loss": 0.2927,
"step": 1721
},
{
"epoch": 2.29,
"learning_rate": 2.7756009208220413e-06,
"loss": 0.2873,
"step": 1722
},
{
"epoch": 2.29,
"learning_rate": 2.765666958802876e-06,
"loss": 0.3134,
"step": 1723
},
{
"epoch": 2.29,
"learning_rate": 2.7557479520891104e-06,
"loss": 0.29,
"step": 1724
},
{
"epoch": 2.3,
"learning_rate": 2.745843921185991e-06,
"loss": 0.3168,
"step": 1725
},
{
"epoch": 2.3,
"learning_rate": 2.7359548865678032e-06,
"loss": 0.302,
"step": 1726
},
{
"epoch": 2.3,
"learning_rate": 2.726080868677832e-06,
"loss": 0.2911,
"step": 1727
},
{
"epoch": 2.3,
"learning_rate": 2.7162218879283174e-06,
"loss": 0.2471,
"step": 1728
},
{
"epoch": 2.3,
"learning_rate": 2.7063779647004185e-06,
"loss": 0.3204,
"step": 1729
},
{
"epoch": 2.3,
"learning_rate": 2.696549119344161e-06,
"loss": 0.2544,
"step": 1730
},
{
"epoch": 2.3,
"learning_rate": 2.686735372178405e-06,
"loss": 0.2976,
"step": 1731
},
{
"epoch": 2.3,
"learning_rate": 2.676936743490789e-06,
"loss": 0.2915,
"step": 1732
},
{
"epoch": 2.31,
"learning_rate": 2.6671532535377132e-06,
"loss": 0.3095,
"step": 1733
},
{
"epoch": 2.31,
"learning_rate": 2.6573849225442694e-06,
"loss": 0.288,
"step": 1734
},
{
"epoch": 2.31,
"learning_rate": 2.647631770704218e-06,
"loss": 0.3127,
"step": 1735
},
{
"epoch": 2.31,
"learning_rate": 2.6378938181799365e-06,
"loss": 0.3009,
"step": 1736
},
{
"epoch": 2.31,
"learning_rate": 2.6281710851023857e-06,
"loss": 0.2595,
"step": 1737
},
{
"epoch": 2.31,
"learning_rate": 2.618463591571052e-06,
"loss": 0.2579,
"step": 1738
},
{
"epoch": 2.31,
"learning_rate": 2.608771357653936e-06,
"loss": 0.2496,
"step": 1739
},
{
"epoch": 2.32,
"learning_rate": 2.599094403387481e-06,
"loss": 0.2874,
"step": 1740
},
{
"epoch": 2.32,
"learning_rate": 2.5894327487765424e-06,
"loss": 0.2551,
"step": 1741
},
{
"epoch": 2.32,
"learning_rate": 2.579786413794355e-06,
"loss": 0.3256,
"step": 1742
},
{
"epoch": 2.32,
"learning_rate": 2.570155418382473e-06,
"loss": 0.3191,
"step": 1743
},
{
"epoch": 2.32,
"learning_rate": 2.5605397824507426e-06,
"loss": 0.2512,
"step": 1744
},
{
"epoch": 2.32,
"learning_rate": 2.5509395258772696e-06,
"loss": 0.2963,
"step": 1745
},
{
"epoch": 2.32,
"learning_rate": 2.541354668508352e-06,
"loss": 0.2779,
"step": 1746
},
{
"epoch": 2.32,
"learning_rate": 2.5317852301584642e-06,
"loss": 0.2721,
"step": 1747
},
{
"epoch": 2.33,
"learning_rate": 2.5222312306101925e-06,
"loss": 0.2931,
"step": 1748
},
{
"epoch": 2.33,
"learning_rate": 2.5126926896142155e-06,
"loss": 0.2314,
"step": 1749
},
{
"epoch": 2.33,
"learning_rate": 2.5031696268892602e-06,
"loss": 0.3332,
"step": 1750
},
{
"epoch": 2.33,
"learning_rate": 2.4936620621220453e-06,
"loss": 0.292,
"step": 1751
},
{
"epoch": 2.33,
"learning_rate": 2.4841700149672576e-06,
"loss": 0.2978,
"step": 1752
},
{
"epoch": 2.33,
"learning_rate": 2.474693505047504e-06,
"loss": 0.2726,
"step": 1753
},
{
"epoch": 2.33,
"learning_rate": 2.465232551953265e-06,
"loss": 0.2921,
"step": 1754
},
{
"epoch": 2.34,
"learning_rate": 2.4557871752428677e-06,
"loss": 0.27,
"step": 1755
},
{
"epoch": 2.34,
"learning_rate": 2.446357394442441e-06,
"loss": 0.2627,
"step": 1756
},
{
"epoch": 2.34,
"learning_rate": 2.436943229045867e-06,
"loss": 0.2775,
"step": 1757
},
{
"epoch": 2.34,
"learning_rate": 2.427544698514753e-06,
"loss": 0.2968,
"step": 1758
},
{
"epoch": 2.34,
"learning_rate": 2.418161822278374e-06,
"loss": 0.2781,
"step": 1759
},
{
"epoch": 2.34,
"learning_rate": 2.408794619733653e-06,
"loss": 0.2936,
"step": 1760
},
{
"epoch": 2.34,
"learning_rate": 2.3994431102451065e-06,
"loss": 0.284,
"step": 1761
},
{
"epoch": 2.34,
"learning_rate": 2.390107313144815e-06,
"loss": 0.2362,
"step": 1762
},
{
"epoch": 2.35,
"learning_rate": 2.3807872477323736e-06,
"loss": 0.2559,
"step": 1763
},
{
"epoch": 2.35,
"learning_rate": 2.371482933274851e-06,
"loss": 0.2871,
"step": 1764
},
{
"epoch": 2.35,
"learning_rate": 2.3621943890067613e-06,
"loss": 0.2985,
"step": 1765
},
{
"epoch": 2.35,
"learning_rate": 2.3529216341300122e-06,
"loss": 0.2773,
"step": 1766
},
{
"epoch": 2.35,
"learning_rate": 2.343664687813878e-06,
"loss": 0.267,
"step": 1767
},
{
"epoch": 2.35,
"learning_rate": 2.334423569194948e-06,
"loss": 0.2669,
"step": 1768
},
{
"epoch": 2.35,
"learning_rate": 2.3251982973770904e-06,
"loss": 0.2842,
"step": 1769
},
{
"epoch": 2.36,
"learning_rate": 2.315988891431412e-06,
"loss": 0.2547,
"step": 1770
},
{
"epoch": 2.36,
"learning_rate": 2.3067953703962242e-06,
"loss": 0.3175,
"step": 1771
},
{
"epoch": 2.36,
"learning_rate": 2.2976177532769973e-06,
"loss": 0.3056,
"step": 1772
},
{
"epoch": 2.36,
"learning_rate": 2.288456059046331e-06,
"loss": 0.2917,
"step": 1773
},
{
"epoch": 2.36,
"learning_rate": 2.2793103066439024e-06,
"loss": 0.2827,
"step": 1774
},
{
"epoch": 2.36,
"learning_rate": 2.270180514976429e-06,
"loss": 0.2943,
"step": 1775
},
{
"epoch": 2.36,
"learning_rate": 2.261066702917639e-06,
"loss": 0.3367,
"step": 1776
},
{
"epoch": 2.36,
"learning_rate": 2.2519688893082238e-06,
"loss": 0.2613,
"step": 1777
},
{
"epoch": 2.37,
"learning_rate": 2.2428870929558012e-06,
"loss": 0.3604,
"step": 1778
},
{
"epoch": 2.37,
"learning_rate": 2.2338213326348834e-06,
"loss": 0.2802,
"step": 1779
},
{
"epoch": 2.37,
"learning_rate": 2.224771627086826e-06,
"loss": 0.2902,
"step": 1780
},
{
"epoch": 2.37,
"learning_rate": 2.2157379950197923e-06,
"loss": 0.2624,
"step": 1781
},
{
"epoch": 2.37,
"learning_rate": 2.2067204551087228e-06,
"loss": 0.2829,
"step": 1782
},
{
"epoch": 2.37,
"learning_rate": 2.1977190259952887e-06,
"loss": 0.2563,
"step": 1783
},
{
"epoch": 2.37,
"learning_rate": 2.188733726287855e-06,
"loss": 0.2785,
"step": 1784
},
{
"epoch": 2.38,
"learning_rate": 2.1797645745614527e-06,
"loss": 0.2991,
"step": 1785
},
{
"epoch": 2.38,
"learning_rate": 2.1708115893577143e-06,
"loss": 0.336,
"step": 1786
},
{
"epoch": 2.38,
"learning_rate": 2.1618747891848615e-06,
"loss": 0.3159,
"step": 1787
},
{
"epoch": 2.38,
"learning_rate": 2.1529541925176555e-06,
"loss": 0.3084,
"step": 1788
},
{
"epoch": 2.38,
"learning_rate": 2.1440498177973603e-06,
"loss": 0.2434,
"step": 1789
},
{
"epoch": 2.38,
"learning_rate": 2.13516168343171e-06,
"loss": 0.3207,
"step": 1790
},
{
"epoch": 2.38,
"learning_rate": 2.1262898077948547e-06,
"loss": 0.3113,
"step": 1791
},
{
"epoch": 2.38,
"learning_rate": 2.1174342092273413e-06,
"loss": 0.306,
"step": 1792
},
{
"epoch": 2.39,
"learning_rate": 2.1085949060360654e-06,
"loss": 0.2778,
"step": 1793
},
{
"epoch": 2.39,
"learning_rate": 2.0997719164942352e-06,
"loss": 0.279,
"step": 1794
},
{
"epoch": 2.39,
"learning_rate": 2.0909652588413345e-06,
"loss": 0.2918,
"step": 1795
},
{
"epoch": 2.39,
"learning_rate": 2.082174951283091e-06,
"loss": 0.258,
"step": 1796
},
{
"epoch": 2.39,
"learning_rate": 2.0734010119914193e-06,
"loss": 0.2881,
"step": 1797
},
{
"epoch": 2.39,
"learning_rate": 2.064643459104405e-06,
"loss": 0.2881,
"step": 1798
},
{
"epoch": 2.39,
"learning_rate": 2.055902310726259e-06,
"loss": 0.2942,
"step": 1799
},
{
"epoch": 2.4,
"learning_rate": 2.047177584927278e-06,
"loss": 0.3092,
"step": 1800
},
{
"epoch": 2.4,
"learning_rate": 2.038469299743806e-06,
"loss": 0.2957,
"step": 1801
},
{
"epoch": 2.4,
"learning_rate": 2.029777473178205e-06,
"loss": 0.2596,
"step": 1802
},
{
"epoch": 2.4,
"learning_rate": 2.0211021231988103e-06,
"loss": 0.285,
"step": 1803
},
{
"epoch": 2.4,
"learning_rate": 2.0124432677398942e-06,
"loss": 0.2614,
"step": 1804
},
{
"epoch": 2.4,
"learning_rate": 2.0038009247016323e-06,
"loss": 0.2971,
"step": 1805
},
{
"epoch": 2.4,
"learning_rate": 1.995175111950064e-06,
"loss": 0.2641,
"step": 1806
},
{
"epoch": 2.4,
"learning_rate": 1.9865658473170555e-06,
"loss": 0.2995,
"step": 1807
},
{
"epoch": 2.41,
"learning_rate": 1.9779731486002664e-06,
"loss": 0.2968,
"step": 1808
},
{
"epoch": 2.41,
"learning_rate": 1.969397033563106e-06,
"loss": 0.3318,
"step": 1809
},
{
"epoch": 2.41,
"learning_rate": 1.9608375199347027e-06,
"loss": 0.2813,
"step": 1810
},
{
"epoch": 2.41,
"learning_rate": 1.952294625409867e-06,
"loss": 0.2624,
"step": 1811
},
{
"epoch": 2.41,
"learning_rate": 1.9437683676490513e-06,
"loss": 0.3098,
"step": 1812
},
{
"epoch": 2.41,
"learning_rate": 1.9352587642783137e-06,
"loss": 0.2956,
"step": 1813
},
{
"epoch": 2.41,
"learning_rate": 1.926765832889288e-06,
"loss": 0.2895,
"step": 1814
},
{
"epoch": 2.42,
"learning_rate": 1.9182895910391375e-06,
"loss": 0.2829,
"step": 1815
},
{
"epoch": 2.42,
"learning_rate": 1.9098300562505266e-06,
"loss": 0.2663,
"step": 1816
},
{
"epoch": 2.42,
"learning_rate": 1.9013872460115813e-06,
"loss": 0.3045,
"step": 1817
},
{
"epoch": 2.42,
"learning_rate": 1.8929611777758528e-06,
"loss": 0.3335,
"step": 1818
},
{
"epoch": 2.42,
"learning_rate": 1.8845518689622843e-06,
"loss": 0.2597,
"step": 1819
},
{
"epoch": 2.42,
"learning_rate": 1.876159336955169e-06,
"loss": 0.3063,
"step": 1820
},
{
"epoch": 2.42,
"learning_rate": 1.8677835991041205e-06,
"loss": 0.264,
"step": 1821
},
{
"epoch": 2.42,
"learning_rate": 1.8594246727240339e-06,
"loss": 0.3139,
"step": 1822
},
{
"epoch": 2.43,
"learning_rate": 1.8510825750950512e-06,
"loss": 0.3005,
"step": 1823
},
{
"epoch": 2.43,
"learning_rate": 1.8427573234625228e-06,
"loss": 0.2733,
"step": 1824
},
{
"epoch": 2.43,
"learning_rate": 1.8344489350369776e-06,
"loss": 0.2918,
"step": 1825
},
{
"epoch": 2.43,
"learning_rate": 1.8261574269940784e-06,
"loss": 0.2815,
"step": 1826
},
{
"epoch": 2.43,
"learning_rate": 1.8178828164745965e-06,
"loss": 0.2633,
"step": 1827
},
{
"epoch": 2.43,
"learning_rate": 1.8096251205843685e-06,
"loss": 0.2829,
"step": 1828
},
{
"epoch": 2.43,
"learning_rate": 1.8013843563942668e-06,
"loss": 0.2585,
"step": 1829
},
{
"epoch": 2.44,
"learning_rate": 1.7931605409401575e-06,
"loss": 0.2827,
"step": 1830
},
{
"epoch": 2.44,
"learning_rate": 1.7849536912228737e-06,
"loss": 0.2402,
"step": 1831
},
{
"epoch": 2.44,
"learning_rate": 1.776763824208172e-06,
"loss": 0.2447,
"step": 1832
},
{
"epoch": 2.44,
"learning_rate": 1.7685909568267034e-06,
"loss": 0.2825,
"step": 1833
},
{
"epoch": 2.44,
"learning_rate": 1.7604351059739745e-06,
"loss": 0.2777,
"step": 1834
},
{
"epoch": 2.44,
"learning_rate": 1.7522962885103145e-06,
"loss": 0.2952,
"step": 1835
},
{
"epoch": 2.44,
"learning_rate": 1.7441745212608419e-06,
"loss": 0.2926,
"step": 1836
},
{
"epoch": 2.44,
"learning_rate": 1.736069821015427e-06,
"loss": 0.2769,
"step": 1837
},
{
"epoch": 2.45,
"learning_rate": 1.7279822045286577e-06,
"loss": 0.3156,
"step": 1838
},
{
"epoch": 2.45,
"learning_rate": 1.7199116885197996e-06,
"loss": 0.2911,
"step": 1839
},
{
"epoch": 2.45,
"learning_rate": 1.7118582896727786e-06,
"loss": 0.2989,
"step": 1840
},
{
"epoch": 2.45,
"learning_rate": 1.703822024636127e-06,
"loss": 0.3122,
"step": 1841
},
{
"epoch": 2.45,
"learning_rate": 1.6958029100229579e-06,
"loss": 0.3007,
"step": 1842
},
{
"epoch": 2.45,
"learning_rate": 1.6878009624109315e-06,
"loss": 0.2943,
"step": 1843
},
{
"epoch": 2.45,
"learning_rate": 1.679816198342219e-06,
"loss": 0.2678,
"step": 1844
},
{
"epoch": 2.46,
"learning_rate": 1.671848634323463e-06,
"loss": 0.2849,
"step": 1845
},
{
"epoch": 2.46,
"learning_rate": 1.663898286825759e-06,
"loss": 0.266,
"step": 1846
},
{
"epoch": 2.46,
"learning_rate": 1.6559651722846037e-06,
"loss": 0.3055,
"step": 1847
},
{
"epoch": 2.46,
"learning_rate": 1.6480493070998738e-06,
"loss": 0.2899,
"step": 1848
},
{
"epoch": 2.46,
"learning_rate": 1.6401507076357836e-06,
"loss": 0.3152,
"step": 1849
},
{
"epoch": 2.46,
"learning_rate": 1.632269390220852e-06,
"loss": 0.2777,
"step": 1850
},
{
"epoch": 2.46,
"learning_rate": 1.6244053711478748e-06,
"loss": 0.3057,
"step": 1851
},
{
"epoch": 2.46,
"learning_rate": 1.6165586666738908e-06,
"loss": 0.2924,
"step": 1852
},
{
"epoch": 2.47,
"learning_rate": 1.6087292930201393e-06,
"loss": 0.289,
"step": 1853
},
{
"epoch": 2.47,
"learning_rate": 1.6009172663720352e-06,
"loss": 0.2587,
"step": 1854
},
{
"epoch": 2.47,
"learning_rate": 1.5931226028791324e-06,
"loss": 0.2916,
"step": 1855
},
{
"epoch": 2.47,
"learning_rate": 1.5853453186550827e-06,
"loss": 0.3499,
"step": 1856
},
{
"epoch": 2.47,
"learning_rate": 1.5775854297776249e-06,
"loss": 0.2826,
"step": 1857
},
{
"epoch": 2.47,
"learning_rate": 1.5698429522885273e-06,
"loss": 0.3013,
"step": 1858
},
{
"epoch": 2.47,
"learning_rate": 1.562117902193564e-06,
"loss": 0.2691,
"step": 1859
},
{
"epoch": 2.48,
"learning_rate": 1.5544102954624873e-06,
"loss": 0.2211,
"step": 1860
},
{
"epoch": 2.48,
"learning_rate": 1.546720148028983e-06,
"loss": 0.331,
"step": 1861
},
{
"epoch": 2.48,
"learning_rate": 1.5390474757906449e-06,
"loss": 0.2653,
"step": 1862
},
{
"epoch": 2.48,
"learning_rate": 1.5313922946089488e-06,
"loss": 0.2559,
"step": 1863
},
{
"epoch": 2.48,
"learning_rate": 1.5237546203092046e-06,
"loss": 0.2915,
"step": 1864
},
{
"epoch": 2.48,
"learning_rate": 1.5161344686805324e-06,
"loss": 0.2929,
"step": 1865
},
{
"epoch": 2.48,
"learning_rate": 1.5085318554758244e-06,
"loss": 0.3095,
"step": 1866
},
{
"epoch": 2.48,
"learning_rate": 1.500946796411722e-06,
"loss": 0.2752,
"step": 1867
},
{
"epoch": 2.49,
"learning_rate": 1.493379307168573e-06,
"loss": 0.2825,
"step": 1868
},
{
"epoch": 2.49,
"learning_rate": 1.485829403390411e-06,
"loss": 0.2708,
"step": 1869
},
{
"epoch": 2.49,
"learning_rate": 1.4782971006849078e-06,
"loss": 0.2964,
"step": 1870
},
{
"epoch": 2.49,
"learning_rate": 1.4707824146233541e-06,
"loss": 0.2993,
"step": 1871
},
{
"epoch": 2.49,
"learning_rate": 1.463285360740616e-06,
"loss": 0.3013,
"step": 1872
},
{
"epoch": 2.49,
"learning_rate": 1.4558059545351144e-06,
"loss": 0.3196,
"step": 1873
},
{
"epoch": 2.49,
"learning_rate": 1.4483442114687884e-06,
"loss": 0.2641,
"step": 1874
},
{
"epoch": 2.5,
"learning_rate": 1.4409001469670615e-06,
"loss": 0.274,
"step": 1875
},
{
"epoch": 2.5,
"learning_rate": 1.433473776418811e-06,
"loss": 0.2977,
"step": 1876
},
{
"epoch": 2.5,
"learning_rate": 1.4260651151763304e-06,
"loss": 0.314,
"step": 1877
},
{
"epoch": 2.5,
"learning_rate": 1.4186741785553116e-06,
"loss": 0.2708,
"step": 1878
},
{
"epoch": 2.5,
"learning_rate": 1.4113009818347988e-06,
"loss": 0.2854,
"step": 1879
},
{
"epoch": 2.5,
"learning_rate": 1.40394554025717e-06,
"loss": 0.2619,
"step": 1880
},
{
"epoch": 2.5,
"learning_rate": 1.3966078690280948e-06,
"loss": 0.2925,
"step": 1881
},
{
"epoch": 2.5,
"learning_rate": 1.3892879833165029e-06,
"loss": 0.2681,
"step": 1882
},
{
"epoch": 2.51,
"learning_rate": 1.3819858982545598e-06,
"loss": 0.2858,
"step": 1883
},
{
"epoch": 2.51,
"learning_rate": 1.3747016289376347e-06,
"loss": 0.2814,
"step": 1884
},
{
"epoch": 2.51,
"learning_rate": 1.367435190424261e-06,
"loss": 0.292,
"step": 1885
},
{
"epoch": 2.51,
"learning_rate": 1.3601865977361196e-06,
"loss": 0.267,
"step": 1886
},
{
"epoch": 2.51,
"learning_rate": 1.3529558658579945e-06,
"loss": 0.3031,
"step": 1887
},
{
"epoch": 2.51,
"learning_rate": 1.3457430097377421e-06,
"loss": 0.2524,
"step": 1888
},
{
"epoch": 2.51,
"learning_rate": 1.338548044286272e-06,
"loss": 0.2333,
"step": 1889
},
{
"epoch": 2.51,
"learning_rate": 1.3313709843775057e-06,
"loss": 0.259,
"step": 1890
},
{
"epoch": 2.52,
"learning_rate": 1.3242118448483477e-06,
"loss": 0.2548,
"step": 1891
},
{
"epoch": 2.52,
"learning_rate": 1.3170706404986645e-06,
"loss": 0.288,
"step": 1892
},
{
"epoch": 2.52,
"learning_rate": 1.3099473860912325e-06,
"loss": 0.2413,
"step": 1893
},
{
"epoch": 2.52,
"learning_rate": 1.3028420963517307e-06,
"loss": 0.2193,
"step": 1894
},
{
"epoch": 2.52,
"learning_rate": 1.2957547859686982e-06,
"loss": 0.2913,
"step": 1895
},
{
"epoch": 2.52,
"learning_rate": 1.2886854695935013e-06,
"loss": 0.3318,
"step": 1896
},
{
"epoch": 2.52,
"learning_rate": 1.2816341618403194e-06,
"loss": 0.2692,
"step": 1897
},
{
"epoch": 2.53,
"learning_rate": 1.2746008772860885e-06,
"loss": 0.2969,
"step": 1898
},
{
"epoch": 2.53,
"learning_rate": 1.2675856304704958e-06,
"loss": 0.2983,
"step": 1899
},
{
"epoch": 2.53,
"learning_rate": 1.2605884358959353e-06,
"loss": 0.2613,
"step": 1900
},
{
"epoch": 2.53,
"learning_rate": 1.2536093080274848e-06,
"loss": 0.2566,
"step": 1901
},
{
"epoch": 2.53,
"learning_rate": 1.2466482612928698e-06,
"loss": 0.2356,
"step": 1902
},
{
"epoch": 2.53,
"learning_rate": 1.2397053100824463e-06,
"loss": 0.2723,
"step": 1903
},
{
"epoch": 2.53,
"learning_rate": 1.2327804687491496e-06,
"loss": 0.3036,
"step": 1904
},
{
"epoch": 2.53,
"learning_rate": 1.2258737516084828e-06,
"loss": 0.296,
"step": 1905
},
{
"epoch": 2.54,
"learning_rate": 1.2189851729384827e-06,
"loss": 0.2793,
"step": 1906
},
{
"epoch": 2.54,
"learning_rate": 1.212114746979688e-06,
"loss": 0.2952,
"step": 1907
},
{
"epoch": 2.54,
"learning_rate": 1.2052624879351105e-06,
"loss": 0.2778,
"step": 1908
},
{
"epoch": 2.54,
"learning_rate": 1.198428409970206e-06,
"loss": 0.2561,
"step": 1909
},
{
"epoch": 2.54,
"learning_rate": 1.1916125272128454e-06,
"loss": 0.2666,
"step": 1910
},
{
"epoch": 2.54,
"learning_rate": 1.1848148537532845e-06,
"loss": 0.24,
"step": 1911
},
{
"epoch": 2.54,
"learning_rate": 1.1780354036441376e-06,
"loss": 0.2735,
"step": 1912
},
{
"epoch": 2.55,
"learning_rate": 1.1712741909003444e-06,
"loss": 0.2576,
"step": 1913
},
{
"epoch": 2.55,
"learning_rate": 1.164531229499145e-06,
"loss": 0.2803,
"step": 1914
},
{
"epoch": 2.55,
"learning_rate": 1.157806533380046e-06,
"loss": 0.3011,
"step": 1915
},
{
"epoch": 2.55,
"learning_rate": 1.1511001164447989e-06,
"loss": 0.2892,
"step": 1916
},
{
"epoch": 2.55,
"learning_rate": 1.1444119925573626e-06,
"loss": 0.3086,
"step": 1917
},
{
"epoch": 2.55,
"learning_rate": 1.1377421755438834e-06,
"loss": 0.2762,
"step": 1918
},
{
"epoch": 2.55,
"learning_rate": 1.1310906791926602e-06,
"loss": 0.273,
"step": 1919
},
{
"epoch": 2.55,
"learning_rate": 1.1244575172541183e-06,
"loss": 0.3002,
"step": 1920
},
{
"epoch": 2.56,
"learning_rate": 1.1178427034407823e-06,
"loss": 0.2443,
"step": 1921
},
{
"epoch": 2.56,
"learning_rate": 1.111246251427245e-06,
"loss": 0.2303,
"step": 1922
},
{
"epoch": 2.56,
"learning_rate": 1.1046681748501409e-06,
"loss": 0.2553,
"step": 1923
},
{
"epoch": 2.56,
"learning_rate": 1.098108487308116e-06,
"loss": 0.2891,
"step": 1924
},
{
"epoch": 2.56,
"learning_rate": 1.0915672023618051e-06,
"loss": 0.284,
"step": 1925
},
{
"epoch": 2.56,
"learning_rate": 1.0850443335337957e-06,
"loss": 0.234,
"step": 1926
},
{
"epoch": 2.56,
"learning_rate": 1.0785398943086057e-06,
"loss": 0.3016,
"step": 1927
},
{
"epoch": 2.57,
"learning_rate": 1.0720538981326557e-06,
"loss": 0.2537,
"step": 1928
},
{
"epoch": 2.57,
"learning_rate": 1.0655863584142367e-06,
"loss": 0.2847,
"step": 1929
},
{
"epoch": 2.57,
"learning_rate": 1.0591372885234885e-06,
"loss": 0.2307,
"step": 1930
},
{
"epoch": 2.57,
"learning_rate": 1.0527067017923654e-06,
"loss": 0.306,
"step": 1931
},
{
"epoch": 2.57,
"learning_rate": 1.0462946115146156e-06,
"loss": 0.2957,
"step": 1932
},
{
"epoch": 2.57,
"learning_rate": 1.0399010309457459e-06,
"loss": 0.2832,
"step": 1933
},
{
"epoch": 2.57,
"learning_rate": 1.0335259733030034e-06,
"loss": 0.2763,
"step": 1934
},
{
"epoch": 2.57,
"learning_rate": 1.0271694517653397e-06,
"loss": 0.2688,
"step": 1935
},
{
"epoch": 2.58,
"learning_rate": 1.0208314794733887e-06,
"loss": 0.2473,
"step": 1936
},
{
"epoch": 2.58,
"learning_rate": 1.0145120695294376e-06,
"loss": 0.2995,
"step": 1937
},
{
"epoch": 2.58,
"learning_rate": 1.0082112349974017e-06,
"loss": 0.2279,
"step": 1938
},
{
"epoch": 2.58,
"learning_rate": 1.0019289889027927e-06,
"loss": 0.2909,
"step": 1939
},
{
"epoch": 2.58,
"learning_rate": 9.956653442326991e-07,
"loss": 0.2969,
"step": 1940
},
{
"epoch": 2.58,
"learning_rate": 9.894203139357528e-07,
"loss": 0.3017,
"step": 1941
},
{
"epoch": 2.58,
"learning_rate": 9.831939109221045e-07,
"loss": 0.33,
"step": 1942
},
{
"epoch": 2.59,
"learning_rate": 9.76986148063398e-07,
"loss": 0.2548,
"step": 1943
},
{
"epoch": 2.59,
"learning_rate": 9.707970381927446e-07,
"loss": 0.2519,
"step": 1944
},
{
"epoch": 2.59,
"learning_rate": 9.646265941046917e-07,
"loss": 0.2546,
"step": 1945
},
{
"epoch": 2.59,
"learning_rate": 9.584748285552015e-07,
"loss": 0.3273,
"step": 1946
},
{
"epoch": 2.59,
"learning_rate": 9.52341754261622e-07,
"loss": 0.3168,
"step": 1947
},
{
"epoch": 2.59,
"learning_rate": 9.462273839026625e-07,
"loss": 0.2748,
"step": 1948
},
{
"epoch": 2.59,
"learning_rate": 9.401317301183655e-07,
"loss": 0.2494,
"step": 1949
},
{
"epoch": 2.59,
"learning_rate": 9.340548055100784e-07,
"loss": 0.2737,
"step": 1950
},
{
"epoch": 2.6,
"learning_rate": 9.279966226404391e-07,
"loss": 0.2397,
"step": 1951
},
{
"epoch": 2.6,
"learning_rate": 9.21957194033326e-07,
"loss": 0.2839,
"step": 1952
},
{
"epoch": 2.6,
"learning_rate": 9.159365321738655e-07,
"loss": 0.2749,
"step": 1953
},
{
"epoch": 2.6,
"learning_rate": 9.09934649508375e-07,
"loss": 0.2812,
"step": 1954
},
{
"epoch": 2.6,
"learning_rate": 9.039515584443559e-07,
"loss": 0.3246,
"step": 1955
},
{
"epoch": 2.6,
"learning_rate": 8.979872713504634e-07,
"loss": 0.2954,
"step": 1956
},
{
"epoch": 2.6,
"learning_rate": 8.920418005564724e-07,
"loss": 0.25,
"step": 1957
},
{
"epoch": 2.61,
"learning_rate": 8.861151583532657e-07,
"loss": 0.2666,
"step": 1958
},
{
"epoch": 2.61,
"learning_rate": 8.802073569928049e-07,
"loss": 0.2609,
"step": 1959
},
{
"epoch": 2.61,
"learning_rate": 8.743184086880962e-07,
"loss": 0.2546,
"step": 1960
},
{
"epoch": 2.61,
"learning_rate": 8.684483256131749e-07,
"loss": 0.2558,
"step": 1961
},
{
"epoch": 2.61,
"learning_rate": 8.625971199030769e-07,
"loss": 0.2567,
"step": 1962
},
{
"epoch": 2.61,
"learning_rate": 8.56764803653809e-07,
"loss": 0.2301,
"step": 1963
},
{
"epoch": 2.61,
"learning_rate": 8.509513889223364e-07,
"loss": 0.2661,
"step": 1964
},
{
"epoch": 2.61,
"learning_rate": 8.451568877265426e-07,
"loss": 0.2654,
"step": 1965
},
{
"epoch": 2.62,
"learning_rate": 8.393813120452166e-07,
"loss": 0.285,
"step": 1966
},
{
"epoch": 2.62,
"learning_rate": 8.336246738180231e-07,
"loss": 0.2843,
"step": 1967
},
{
"epoch": 2.62,
"learning_rate": 8.278869849454718e-07,
"loss": 0.2737,
"step": 1968
},
{
"epoch": 2.62,
"learning_rate": 8.221682572889033e-07,
"loss": 0.2637,
"step": 1969
},
{
"epoch": 2.62,
"learning_rate": 8.164685026704644e-07,
"loss": 0.27,
"step": 1970
},
{
"epoch": 2.62,
"learning_rate": 8.107877328730729e-07,
"loss": 0.31,
"step": 1971
},
{
"epoch": 2.62,
"learning_rate": 8.05125959640406e-07,
"loss": 0.3094,
"step": 1972
},
{
"epoch": 2.63,
"learning_rate": 7.994831946768622e-07,
"loss": 0.2761,
"step": 1973
},
{
"epoch": 2.63,
"learning_rate": 7.938594496475516e-07,
"loss": 0.2886,
"step": 1974
},
{
"epoch": 2.63,
"learning_rate": 7.882547361782589e-07,
"loss": 0.2651,
"step": 1975
},
{
"epoch": 2.63,
"learning_rate": 7.826690658554337e-07,
"loss": 0.3271,
"step": 1976
},
{
"epoch": 2.63,
"learning_rate": 7.771024502261526e-07,
"loss": 0.2416,
"step": 1977
},
{
"epoch": 2.63,
"learning_rate": 7.715549007981026e-07,
"loss": 0.3205,
"step": 1978
},
{
"epoch": 2.63,
"learning_rate": 7.660264290395525e-07,
"loss": 0.2991,
"step": 1979
},
{
"epoch": 2.63,
"learning_rate": 7.605170463793343e-07,
"loss": 0.2657,
"step": 1980
},
{
"epoch": 2.64,
"learning_rate": 7.550267642068243e-07,
"loss": 0.3098,
"step": 1981
},
{
"epoch": 2.64,
"learning_rate": 7.495555938719023e-07,
"loss": 0.2963,
"step": 1982
},
{
"epoch": 2.64,
"learning_rate": 7.441035466849489e-07,
"loss": 0.2523,
"step": 1983
},
{
"epoch": 2.64,
"learning_rate": 7.386706339168015e-07,
"loss": 0.2508,
"step": 1984
},
{
"epoch": 2.64,
"learning_rate": 7.332568667987483e-07,
"loss": 0.2862,
"step": 1985
},
{
"epoch": 2.64,
"learning_rate": 7.278622565224969e-07,
"loss": 0.2777,
"step": 1986
},
{
"epoch": 2.64,
"learning_rate": 7.224868142401542e-07,
"loss": 0.3255,
"step": 1987
},
{
"epoch": 2.65,
"learning_rate": 7.171305510642024e-07,
"loss": 0.2786,
"step": 1988
},
{
"epoch": 2.65,
"learning_rate": 7.117934780674673e-07,
"loss": 0.2914,
"step": 1989
},
{
"epoch": 2.65,
"learning_rate": 7.064756062831135e-07,
"loss": 0.3303,
"step": 1990
},
{
"epoch": 2.65,
"learning_rate": 7.011769467046047e-07,
"loss": 0.2876,
"step": 1991
},
{
"epoch": 2.65,
"learning_rate": 6.958975102856913e-07,
"loss": 0.3003,
"step": 1992
},
{
"epoch": 2.65,
"learning_rate": 6.90637307940385e-07,
"loss": 0.3271,
"step": 1993
},
{
"epoch": 2.65,
"learning_rate": 6.85396350542934e-07,
"loss": 0.3105,
"step": 1994
},
{
"epoch": 2.65,
"learning_rate": 6.801746489277993e-07,
"loss": 0.2629,
"step": 1995
},
{
"epoch": 2.66,
"learning_rate": 6.749722138896398e-07,
"loss": 0.2727,
"step": 1996
},
{
"epoch": 2.66,
"learning_rate": 6.697890561832809e-07,
"loss": 0.2658,
"step": 1997
},
{
"epoch": 2.66,
"learning_rate": 6.646251865236997e-07,
"loss": 0.2901,
"step": 1998
},
{
"epoch": 2.66,
"learning_rate": 6.594806155860034e-07,
"loss": 0.2825,
"step": 1999
},
{
"epoch": 2.66,
"learning_rate": 6.543553540053926e-07,
"loss": 0.3046,
"step": 2000
},
{
"epoch": 2.66,
"learning_rate": 6.492494123771586e-07,
"loss": 0.264,
"step": 2001
},
{
"epoch": 2.66,
"learning_rate": 6.441628012566537e-07,
"loss": 0.2686,
"step": 2002
},
{
"epoch": 2.67,
"learning_rate": 6.390955311592617e-07,
"loss": 0.2963,
"step": 2003
},
{
"epoch": 2.67,
"learning_rate": 6.340476125603945e-07,
"loss": 0.2806,
"step": 2004
},
{
"epoch": 2.67,
"learning_rate": 6.290190558954479e-07,
"loss": 0.238,
"step": 2005
},
{
"epoch": 2.67,
"learning_rate": 6.240098715597975e-07,
"loss": 0.2875,
"step": 2006
},
{
"epoch": 2.67,
"learning_rate": 6.190200699087701e-07,
"loss": 0.2686,
"step": 2007
},
{
"epoch": 2.67,
"learning_rate": 6.140496612576241e-07,
"loss": 0.2641,
"step": 2008
},
{
"epoch": 2.67,
"learning_rate": 6.090986558815227e-07,
"loss": 0.3024,
"step": 2009
},
{
"epoch": 2.67,
"learning_rate": 6.041670640155273e-07,
"loss": 0.2734,
"step": 2010
},
{
"epoch": 2.68,
"learning_rate": 5.992548958545552e-07,
"loss": 0.2733,
"step": 2011
},
{
"epoch": 2.68,
"learning_rate": 5.943621615533768e-07,
"loss": 0.3153,
"step": 2012
},
{
"epoch": 2.68,
"learning_rate": 5.894888712265834e-07,
"loss": 0.2579,
"step": 2013
},
{
"epoch": 2.68,
"learning_rate": 5.846350349485729e-07,
"loss": 0.2798,
"step": 2014
},
{
"epoch": 2.68,
"learning_rate": 5.798006627535279e-07,
"loss": 0.2899,
"step": 2015
},
{
"epoch": 2.68,
"learning_rate": 5.749857646353885e-07,
"loss": 0.2813,
"step": 2016
},
{
"epoch": 2.68,
"learning_rate": 5.701903505478424e-07,
"loss": 0.2528,
"step": 2017
},
{
"epoch": 2.69,
"learning_rate": 5.65414430404293e-07,
"loss": 0.2661,
"step": 2018
},
{
"epoch": 2.69,
"learning_rate": 5.606580140778495e-07,
"loss": 0.2684,
"step": 2019
},
{
"epoch": 2.69,
"learning_rate": 5.559211114012963e-07,
"loss": 0.2601,
"step": 2020
},
{
"epoch": 2.69,
"learning_rate": 5.512037321670871e-07,
"loss": 0.2666,
"step": 2021
},
{
"epoch": 2.69,
"learning_rate": 5.465058861273032e-07,
"loss": 0.2497,
"step": 2022
},
{
"epoch": 2.69,
"learning_rate": 5.418275829936537e-07,
"loss": 0.2781,
"step": 2023
},
{
"epoch": 2.69,
"learning_rate": 5.371688324374447e-07,
"loss": 0.3093,
"step": 2024
},
{
"epoch": 2.69,
"learning_rate": 5.325296440895622e-07,
"loss": 0.288,
"step": 2025
},
{
"epoch": 2.7,
"learning_rate": 5.279100275404536e-07,
"loss": 0.2456,
"step": 2026
},
{
"epoch": 2.7,
"learning_rate": 5.233099923401019e-07,
"loss": 0.3145,
"step": 2027
},
{
"epoch": 2.7,
"learning_rate": 5.187295479980136e-07,
"loss": 0.3247,
"step": 2028
},
{
"epoch": 2.7,
"learning_rate": 5.141687039831966e-07,
"loss": 0.3147,
"step": 2029
},
{
"epoch": 2.7,
"learning_rate": 5.096274697241354e-07,
"loss": 0.31,
"step": 2030
},
{
"epoch": 2.7,
"learning_rate": 5.051058546087795e-07,
"loss": 0.236,
"step": 2031
},
{
"epoch": 2.7,
"learning_rate": 5.006038679845204e-07,
"loss": 0.2457,
"step": 2032
},
{
"epoch": 2.71,
"learning_rate": 4.961215191581692e-07,
"loss": 0.2959,
"step": 2033
},
{
"epoch": 2.71,
"learning_rate": 4.916588173959435e-07,
"loss": 0.2689,
"step": 2034
},
{
"epoch": 2.71,
"learning_rate": 4.872157719234438e-07,
"loss": 0.2731,
"step": 2035
},
{
"epoch": 2.71,
"learning_rate": 4.827923919256349e-07,
"loss": 0.2566,
"step": 2036
},
{
"epoch": 2.71,
"learning_rate": 4.783886865468302e-07,
"loss": 0.2586,
"step": 2037
},
{
"epoch": 2.71,
"learning_rate": 4.7400466489066823e-07,
"loss": 0.2689,
"step": 2038
},
{
"epoch": 2.71,
"learning_rate": 4.696403360200985e-07,
"loss": 0.3069,
"step": 2039
},
{
"epoch": 2.71,
"learning_rate": 4.652957089573551e-07,
"loss": 0.2749,
"step": 2040
},
{
"epoch": 2.72,
"learning_rate": 4.609707926839502e-07,
"loss": 0.2995,
"step": 2041
},
{
"epoch": 2.72,
"learning_rate": 4.56665596140643e-07,
"loss": 0.2423,
"step": 2042
},
{
"epoch": 2.72,
"learning_rate": 4.5238012822742874e-07,
"loss": 0.2366,
"step": 2043
},
{
"epoch": 2.72,
"learning_rate": 4.481143978035196e-07,
"loss": 0.2952,
"step": 2044
},
{
"epoch": 2.72,
"learning_rate": 4.438684136873217e-07,
"loss": 0.2385,
"step": 2045
},
{
"epoch": 2.72,
"learning_rate": 4.396421846564236e-07,
"loss": 0.2483,
"step": 2046
},
{
"epoch": 2.72,
"learning_rate": 4.3543571944757334e-07,
"loss": 0.318,
"step": 2047
},
{
"epoch": 2.73,
"learning_rate": 4.312490267566616e-07,
"loss": 0.3044,
"step": 2048
},
{
"epoch": 2.73,
"learning_rate": 4.270821152387039e-07,
"loss": 0.3026,
"step": 2049
},
{
"epoch": 2.73,
"learning_rate": 4.229349935078242e-07,
"loss": 0.2694,
"step": 2050
},
{
"epoch": 2.73,
"learning_rate": 4.188076701372346e-07,
"loss": 0.294,
"step": 2051
},
{
"epoch": 2.73,
"learning_rate": 4.147001536592177e-07,
"loss": 0.2854,
"step": 2052
},
{
"epoch": 2.73,
"learning_rate": 4.1061245256511227e-07,
"loss": 0.2678,
"step": 2053
},
{
"epoch": 2.73,
"learning_rate": 4.06544575305291e-07,
"loss": 0.2868,
"step": 2054
},
{
"epoch": 2.73,
"learning_rate": 4.0249653028914705e-07,
"loss": 0.2571,
"step": 2055
},
{
"epoch": 2.74,
"learning_rate": 3.984683258850741e-07,
"loss": 0.2826,
"step": 2056
},
{
"epoch": 2.74,
"learning_rate": 3.9445997042044995e-07,
"loss": 0.303,
"step": 2057
},
{
"epoch": 2.74,
"learning_rate": 3.9047147218162273e-07,
"loss": 0.2853,
"step": 2058
},
{
"epoch": 2.74,
"learning_rate": 3.8650283941388253e-07,
"loss": 0.2913,
"step": 2059
},
{
"epoch": 2.74,
"learning_rate": 3.8255408032146004e-07,
"loss": 0.2739,
"step": 2060
},
{
"epoch": 2.74,
"learning_rate": 3.7862520306749753e-07,
"loss": 0.3182,
"step": 2061
},
{
"epoch": 2.74,
"learning_rate": 3.747162157740392e-07,
"loss": 0.3729,
"step": 2062
},
{
"epoch": 2.75,
"learning_rate": 3.708271265220087e-07,
"loss": 0.2688,
"step": 2063
},
{
"epoch": 2.75,
"learning_rate": 3.669579433511927e-07,
"loss": 0.2763,
"step": 2064
},
{
"epoch": 2.75,
"learning_rate": 3.63108674260233e-07,
"loss": 0.2761,
"step": 2065
},
{
"epoch": 2.75,
"learning_rate": 3.592793272066009e-07,
"loss": 0.2747,
"step": 2066
},
{
"epoch": 2.75,
"learning_rate": 3.5546991010658283e-07,
"loss": 0.2449,
"step": 2067
},
{
"epoch": 2.75,
"learning_rate": 3.516804308352628e-07,
"loss": 0.2666,
"step": 2068
},
{
"epoch": 2.75,
"learning_rate": 3.4791089722651437e-07,
"loss": 0.2583,
"step": 2069
},
{
"epoch": 2.75,
"learning_rate": 3.441613170729685e-07,
"loss": 0.2711,
"step": 2070
},
{
"epoch": 2.76,
"learning_rate": 3.4043169812601585e-07,
"loss": 0.2959,
"step": 2071
},
{
"epoch": 2.76,
"learning_rate": 3.367220480957778e-07,
"loss": 0.3037,
"step": 2072
},
{
"epoch": 2.76,
"learning_rate": 3.330323746510955e-07,
"loss": 0.2904,
"step": 2073
},
{
"epoch": 2.76,
"learning_rate": 3.293626854195131e-07,
"loss": 0.2753,
"step": 2074
},
{
"epoch": 2.76,
"learning_rate": 3.2571298798726005e-07,
"loss": 0.2712,
"step": 2075
},
{
"epoch": 2.76,
"learning_rate": 3.220832898992399e-07,
"loss": 0.2851,
"step": 2076
},
{
"epoch": 2.76,
"learning_rate": 3.1847359865901153e-07,
"loss": 0.3019,
"step": 2077
},
{
"epoch": 2.77,
"learning_rate": 3.1488392172877577e-07,
"loss": 0.2921,
"step": 2078
},
{
"epoch": 2.77,
"learning_rate": 3.113142665293567e-07,
"loss": 0.2601,
"step": 2079
},
{
"epoch": 2.77,
"learning_rate": 3.077646404401868e-07,
"loss": 0.3103,
"step": 2080
},
{
"epoch": 2.77,
"learning_rate": 3.0423505079929414e-07,
"loss": 0.2662,
"step": 2081
},
{
"epoch": 2.77,
"learning_rate": 3.0072550490328754e-07,
"loss": 0.2822,
"step": 2082
},
{
"epoch": 2.77,
"learning_rate": 2.9723601000734013e-07,
"loss": 0.2657,
"step": 2083
},
{
"epoch": 2.77,
"learning_rate": 2.937665733251716e-07,
"loss": 0.2812,
"step": 2084
},
{
"epoch": 2.77,
"learning_rate": 2.9031720202904014e-07,
"loss": 0.2961,
"step": 2085
},
{
"epoch": 2.78,
"learning_rate": 2.868879032497163e-07,
"loss": 0.2678,
"step": 2086
},
{
"epoch": 2.78,
"learning_rate": 2.834786840764814e-07,
"loss": 0.2701,
"step": 2087
},
{
"epoch": 2.78,
"learning_rate": 2.8008955155710784e-07,
"loss": 0.2586,
"step": 2088
},
{
"epoch": 2.78,
"learning_rate": 2.7672051269783896e-07,
"loss": 0.2716,
"step": 2089
},
{
"epoch": 2.78,
"learning_rate": 2.7337157446338135e-07,
"loss": 0.2162,
"step": 2090
},
{
"epoch": 2.78,
"learning_rate": 2.7004274377688713e-07,
"loss": 0.2575,
"step": 2091
},
{
"epoch": 2.78,
"learning_rate": 2.667340275199426e-07,
"loss": 0.276,
"step": 2092
},
{
"epoch": 2.79,
"learning_rate": 2.634454325325497e-07,
"loss": 0.2885,
"step": 2093
},
{
"epoch": 2.79,
"learning_rate": 2.60176965613117e-07,
"loss": 0.2806,
"step": 2094
},
{
"epoch": 2.79,
"learning_rate": 2.569286335184418e-07,
"loss": 0.2343,
"step": 2095
},
{
"epoch": 2.79,
"learning_rate": 2.53700442963698e-07,
"loss": 0.2932,
"step": 2096
},
{
"epoch": 2.79,
"learning_rate": 2.504924006224174e-07,
"loss": 0.2788,
"step": 2097
},
{
"epoch": 2.79,
"learning_rate": 2.4730451312648617e-07,
"loss": 0.2448,
"step": 2098
},
{
"epoch": 2.79,
"learning_rate": 2.441367870661215e-07,
"loss": 0.3089,
"step": 2099
},
{
"epoch": 2.79,
"learning_rate": 2.4098922898986186e-07,
"loss": 0.3209,
"step": 2100
},
{
"epoch": 2.8,
"learning_rate": 2.3786184540455449e-07,
"loss": 0.2541,
"step": 2101
},
{
"epoch": 2.8,
"learning_rate": 2.3475464277533887e-07,
"loss": 0.322,
"step": 2102
},
{
"epoch": 2.8,
"learning_rate": 2.3166762752563466e-07,
"loss": 0.2739,
"step": 2103
},
{
"epoch": 2.8,
"learning_rate": 2.286008060371303e-07,
"loss": 0.2702,
"step": 2104
},
{
"epoch": 2.8,
"learning_rate": 2.2555418464976886e-07,
"loss": 0.2774,
"step": 2105
},
{
"epoch": 2.8,
"learning_rate": 2.225277696617334e-07,
"loss": 0.2942,
"step": 2106
},
{
"epoch": 2.8,
"learning_rate": 2.1952156732943265e-07,
"loss": 0.2878,
"step": 2107
},
{
"epoch": 2.81,
"learning_rate": 2.165355838674943e-07,
"loss": 0.2635,
"step": 2108
},
{
"epoch": 2.81,
"learning_rate": 2.1356982544874617e-07,
"loss": 0.3053,
"step": 2109
},
{
"epoch": 2.81,
"learning_rate": 2.1062429820420616e-07,
"loss": 0.279,
"step": 2110
},
{
"epoch": 2.81,
"learning_rate": 2.076990082230679e-07,
"loss": 0.295,
"step": 2111
},
{
"epoch": 2.81,
"learning_rate": 2.047939615526906e-07,
"loss": 0.2856,
"step": 2112
},
{
"epoch": 2.81,
"learning_rate": 2.0190916419858486e-07,
"loss": 0.2738,
"step": 2113
},
{
"epoch": 2.81,
"learning_rate": 1.99044622124398e-07,
"loss": 0.315,
"step": 2114
},
{
"epoch": 2.81,
"learning_rate": 1.9620034125190645e-07,
"loss": 0.2712,
"step": 2115
},
{
"epoch": 2.82,
"learning_rate": 1.9337632746100233e-07,
"loss": 0.2764,
"step": 2116
},
{
"epoch": 2.82,
"learning_rate": 1.9057258658968015e-07,
"loss": 0.2373,
"step": 2117
},
{
"epoch": 2.82,
"learning_rate": 1.8778912443402242e-07,
"loss": 0.2531,
"step": 2118
},
{
"epoch": 2.82,
"learning_rate": 1.8502594674819073e-07,
"loss": 0.2503,
"step": 2119
},
{
"epoch": 2.82,
"learning_rate": 1.8228305924441469e-07,
"loss": 0.2814,
"step": 2120
},
{
"epoch": 2.82,
"learning_rate": 1.795604675929774e-07,
"loss": 0.3003,
"step": 2121
},
{
"epoch": 2.82,
"learning_rate": 1.7685817742220668e-07,
"loss": 0.2622,
"step": 2122
},
{
"epoch": 2.83,
"learning_rate": 1.7417619431845945e-07,
"loss": 0.2912,
"step": 2123
},
{
"epoch": 2.83,
"learning_rate": 1.715145238261151e-07,
"loss": 0.2969,
"step": 2124
},
{
"epoch": 2.83,
"learning_rate": 1.6887317144755777e-07,
"loss": 0.301,
"step": 2125
},
{
"epoch": 2.83,
"learning_rate": 1.662521426431729e-07,
"loss": 0.2535,
"step": 2126
},
{
"epoch": 2.83,
"learning_rate": 1.636514428313274e-07,
"loss": 0.2898,
"step": 2127
},
{
"epoch": 2.83,
"learning_rate": 1.6107107738836835e-07,
"loss": 0.2925,
"step": 2128
},
{
"epoch": 2.83,
"learning_rate": 1.585110516485988e-07,
"loss": 0.2688,
"step": 2129
},
{
"epoch": 2.83,
"learning_rate": 1.5597137090428095e-07,
"loss": 0.265,
"step": 2130
},
{
"epoch": 2.84,
"learning_rate": 1.534520404056139e-07,
"loss": 0.2796,
"step": 2131
},
{
"epoch": 2.84,
"learning_rate": 1.509530653607294e-07,
"loss": 0.264,
"step": 2132
},
{
"epoch": 2.84,
"learning_rate": 1.4847445093567836e-07,
"loss": 0.3058,
"step": 2133
},
{
"epoch": 2.84,
"learning_rate": 1.4601620225442204e-07,
"loss": 0.2793,
"step": 2134
},
{
"epoch": 2.84,
"learning_rate": 1.435783243988187e-07,
"loss": 0.3069,
"step": 2135
},
{
"epoch": 2.84,
"learning_rate": 1.4116082240861583e-07,
"loss": 0.2757,
"step": 2136
},
{
"epoch": 2.84,
"learning_rate": 1.38763701281438e-07,
"loss": 0.2565,
"step": 2137
},
{
"epoch": 2.84,
"learning_rate": 1.3638696597277678e-07,
"loss": 0.2618,
"step": 2138
},
{
"epoch": 2.85,
"learning_rate": 1.3403062139598078e-07,
"loss": 0.2745,
"step": 2139
},
{
"epoch": 2.85,
"learning_rate": 1.3169467242224565e-07,
"loss": 0.3041,
"step": 2140
},
{
"epoch": 2.85,
"learning_rate": 1.2937912388060526e-07,
"loss": 0.3041,
"step": 2141
},
{
"epoch": 2.85,
"learning_rate": 1.2708398055791716e-07,
"loss": 0.2617,
"step": 2142
},
{
"epoch": 2.85,
"learning_rate": 1.2480924719885934e-07,
"loss": 0.2527,
"step": 2143
},
{
"epoch": 2.85,
"learning_rate": 1.2255492850591576e-07,
"loss": 0.2608,
"step": 2144
},
{
"epoch": 2.85,
"learning_rate": 1.2032102913936528e-07,
"loss": 0.2741,
"step": 2145
},
{
"epoch": 2.86,
"learning_rate": 1.1810755371727823e-07,
"loss": 0.2895,
"step": 2146
},
{
"epoch": 2.86,
"learning_rate": 1.1591450681550209e-07,
"loss": 0.2755,
"step": 2147
},
{
"epoch": 2.86,
"learning_rate": 1.1374189296765037e-07,
"loss": 0.2829,
"step": 2148
},
{
"epoch": 2.86,
"learning_rate": 1.1158971666510033e-07,
"loss": 0.2905,
"step": 2149
},
{
"epoch": 2.86,
"learning_rate": 1.0945798235697636e-07,
"loss": 0.2926,
"step": 2150
},
{
"epoch": 2.86,
"learning_rate": 1.0734669445014445e-07,
"loss": 0.2992,
"step": 2151
},
{
"epoch": 2.86,
"learning_rate": 1.0525585730920329e-07,
"loss": 0.2767,
"step": 2152
},
{
"epoch": 2.86,
"learning_rate": 1.0318547525647316e-07,
"loss": 0.2765,
"step": 2153
},
{
"epoch": 2.87,
"learning_rate": 1.0113555257198926e-07,
"loss": 0.2611,
"step": 2154
},
{
"epoch": 2.87,
"learning_rate": 9.910609349348954e-08,
"loss": 0.289,
"step": 2155
},
{
"epoch": 2.87,
"learning_rate": 9.709710221641133e-08,
"loss": 0.2742,
"step": 2156
},
{
"epoch": 2.87,
"learning_rate": 9.510858289387914e-08,
"loss": 0.2623,
"step": 2157
},
{
"epoch": 2.87,
"learning_rate": 9.314053963669245e-08,
"loss": 0.255,
"step": 2158
},
{
"epoch": 2.87,
"learning_rate": 9.119297651332681e-08,
"loss": 0.3114,
"step": 2159
},
{
"epoch": 2.87,
"learning_rate": 8.926589754991499e-08,
"loss": 0.3152,
"step": 2160
},
{
"epoch": 2.88,
"learning_rate": 8.735930673024806e-08,
"loss": 0.3322,
"step": 2161
},
{
"epoch": 2.88,
"learning_rate": 8.547320799575876e-08,
"loss": 0.2252,
"step": 2162
},
{
"epoch": 2.88,
"learning_rate": 8.360760524551814e-08,
"loss": 0.2919,
"step": 2163
},
{
"epoch": 2.88,
"learning_rate": 8.176250233622896e-08,
"loss": 0.3245,
"step": 2164
},
{
"epoch": 2.88,
"learning_rate": 7.993790308221228e-08,
"loss": 0.2709,
"step": 2165
},
{
"epoch": 2.88,
"learning_rate": 7.81338112554042e-08,
"loss": 0.2627,
"step": 2166
},
{
"epoch": 2.88,
"learning_rate": 7.635023058534474e-08,
"loss": 0.292,
"step": 2167
},
{
"epoch": 2.88,
"learning_rate": 7.45871647591756e-08,
"loss": 0.2692,
"step": 2168
},
{
"epoch": 2.89,
"learning_rate": 7.284461742162463e-08,
"loss": 0.2928,
"step": 2169
},
{
"epoch": 2.89,
"learning_rate": 7.112259217500583e-08,
"loss": 0.2703,
"step": 2170
},
{
"epoch": 2.89,
"learning_rate": 6.942109257920382e-08,
"loss": 0.2504,
"step": 2171
},
{
"epoch": 2.89,
"learning_rate": 6.774012215167825e-08,
"loss": 0.2479,
"step": 2172
},
{
"epoch": 2.89,
"learning_rate": 6.607968436744272e-08,
"loss": 0.2958,
"step": 2173
},
{
"epoch": 2.89,
"learning_rate": 6.443978265906813e-08,
"loss": 0.2901,
"step": 2174
},
{
"epoch": 2.89,
"learning_rate": 6.282042041667047e-08,
"loss": 0.2417,
"step": 2175
},
{
"epoch": 2.9,
"learning_rate": 6.122160098790741e-08,
"loss": 0.3119,
"step": 2176
},
{
"epoch": 2.9,
"learning_rate": 5.964332767796399e-08,
"loss": 0.2602,
"step": 2177
},
{
"epoch": 2.9,
"learning_rate": 5.808560374955585e-08,
"loss": 0.3184,
"step": 2178
},
{
"epoch": 2.9,
"learning_rate": 5.6548432422917075e-08,
"loss": 0.2902,
"step": 2179
},
{
"epoch": 2.9,
"learning_rate": 5.503181687579018e-08,
"loss": 0.3286,
"step": 2180
},
{
"epoch": 2.9,
"learning_rate": 5.3535760243429434e-08,
"loss": 0.2656,
"step": 2181
},
{
"epoch": 2.9,
"learning_rate": 5.206026561858091e-08,
"loss": 0.2607,
"step": 2182
},
{
"epoch": 2.9,
"learning_rate": 5.06053360514902e-08,
"loss": 0.3011,
"step": 2183
},
{
"epoch": 2.91,
"learning_rate": 4.9170974549885844e-08,
"loss": 0.2876,
"step": 2184
},
{
"epoch": 2.91,
"learning_rate": 4.7757184078978114e-08,
"loss": 0.3083,
"step": 2185
},
{
"epoch": 2.91,
"learning_rate": 4.636396756144912e-08,
"loss": 0.2766,
"step": 2186
},
{
"epoch": 2.91,
"learning_rate": 4.499132787745386e-08,
"loss": 0.2395,
"step": 2187
},
{
"epoch": 2.91,
"learning_rate": 4.3639267864603594e-08,
"loss": 0.2715,
"step": 2188
},
{
"epoch": 2.91,
"learning_rate": 4.2307790317969164e-08,
"loss": 0.2618,
"step": 2189
},
{
"epoch": 2.91,
"learning_rate": 4.099689799007211e-08,
"loss": 0.2751,
"step": 2190
},
{
"epoch": 2.92,
"learning_rate": 3.970659359087914e-08,
"loss": 0.2979,
"step": 2191
},
{
"epoch": 2.92,
"learning_rate": 3.8436879787794315e-08,
"loss": 0.278,
"step": 2192
},
{
"epoch": 2.92,
"learning_rate": 3.718775920565687e-08,
"loss": 0.3025,
"step": 2193
},
{
"epoch": 2.92,
"learning_rate": 3.5959234426732327e-08,
"loss": 0.247,
"step": 2194
},
{
"epoch": 2.92,
"learning_rate": 3.4751307990712466e-08,
"loss": 0.3313,
"step": 2195
},
{
"epoch": 2.92,
"learning_rate": 3.356398239470427e-08,
"loss": 0.2895,
"step": 2196
},
{
"epoch": 2.92,
"learning_rate": 3.2397260093228747e-08,
"loss": 0.2989,
"step": 2197
},
{
"epoch": 2.92,
"learning_rate": 3.125114349821212e-08,
"loss": 0.2793,
"step": 2198
},
{
"epoch": 2.93,
"learning_rate": 3.0125634978985795e-08,
"loss": 0.2881,
"step": 2199
},
{
"epoch": 2.93,
"learning_rate": 2.9020736862276355e-08,
"loss": 0.2572,
"step": 2200
},
{
"epoch": 2.93,
"learning_rate": 2.7936451432206692e-08,
"loss": 0.2704,
"step": 2201
},
{
"epoch": 2.93,
"learning_rate": 2.6872780930282672e-08,
"loss": 0.2761,
"step": 2202
},
{
"epoch": 2.93,
"learning_rate": 2.5829727555397587e-08,
"loss": 0.2963,
"step": 2203
},
{
"epoch": 2.93,
"learning_rate": 2.480729346382216e-08,
"loss": 0.2771,
"step": 2204
},
{
"epoch": 2.93,
"learning_rate": 2.3805480769202304e-08,
"loss": 0.2775,
"step": 2205
},
{
"epoch": 2.94,
"learning_rate": 2.2824291542552502e-08,
"loss": 0.2817,
"step": 2206
},
{
"epoch": 2.94,
"learning_rate": 2.1863727812254653e-08,
"loss": 0.299,
"step": 2207
},
{
"epoch": 2.94,
"learning_rate": 2.0923791564050333e-08,
"loss": 0.3056,
"step": 2208
},
{
"epoch": 2.94,
"learning_rate": 2.0004484741040774e-08,
"loss": 0.3118,
"step": 2209
},
{
"epoch": 2.94,
"learning_rate": 1.9105809243679108e-08,
"loss": 0.3226,
"step": 2210
},
{
"epoch": 2.94,
"learning_rate": 1.8227766929768132e-08,
"loss": 0.2844,
"step": 2211
},
{
"epoch": 2.94,
"learning_rate": 1.73703596144581e-08,
"loss": 0.2995,
"step": 2212
},
{
"epoch": 2.94,
"learning_rate": 1.653358907023783e-08,
"loss": 0.2929,
"step": 2213
},
{
"epoch": 2.95,
"learning_rate": 1.5717457026938054e-08,
"loss": 0.292,
"step": 2214
},
{
"epoch": 2.95,
"learning_rate": 1.4921965171720288e-08,
"loss": 0.3032,
"step": 2215
},
{
"epoch": 2.95,
"learning_rate": 1.4147115149081292e-08,
"loss": 0.3301,
"step": 2216
},
{
"epoch": 2.95,
"learning_rate": 1.3392908560843077e-08,
"loss": 0.2944,
"step": 2217
},
{
"epoch": 2.95,
"learning_rate": 1.2659346966152897e-08,
"loss": 0.245,
"step": 2218
},
{
"epoch": 2.95,
"learning_rate": 1.1946431881479926e-08,
"loss": 0.3132,
"step": 2219
},
{
"epoch": 2.95,
"learning_rate": 1.125416478060859e-08,
"loss": 0.2303,
"step": 2220
},
{
"epoch": 2.96,
"learning_rate": 1.0582547094643014e-08,
"loss": 0.2813,
"step": 2221
},
{
"epoch": 2.96,
"learning_rate": 9.931580211994806e-09,
"loss": 0.32,
"step": 2222
},
{
"epoch": 2.96,
"learning_rate": 9.30126547838861e-09,
"loss": 0.3028,
"step": 2223
},
{
"epoch": 2.96,
"learning_rate": 8.69160419685433e-09,
"loss": 0.3215,
"step": 2224
},
{
"epoch": 2.96,
"learning_rate": 8.102597627722697e-09,
"loss": 0.3025,
"step": 2225
},
{
"epoch": 2.96,
"learning_rate": 7.534246988630812e-09,
"loss": 0.2906,
"step": 2226
},
{
"epoch": 2.96,
"learning_rate": 6.986553454509937e-09,
"loss": 0.2969,
"step": 2227
},
{
"epoch": 2.96,
"learning_rate": 6.4595181575910496e-09,
"loss": 0.3083,
"step": 2228
},
{
"epoch": 2.97,
"learning_rate": 5.953142187395955e-09,
"loss": 0.2914,
"step": 2229
},
{
"epoch": 2.97,
"learning_rate": 5.467426590739511e-09,
"loss": 0.2973,
"step": 2230
},
{
"epoch": 2.97,
"learning_rate": 5.0023723717274046e-09,
"loss": 0.2863,
"step": 2231
},
{
"epoch": 2.97,
"learning_rate": 4.557980491750602e-09,
"loss": 0.305,
"step": 2232
},
{
"epoch": 2.97,
"learning_rate": 4.13425186948535e-09,
"loss": 0.2825,
"step": 2233
},
{
"epoch": 2.97,
"learning_rate": 3.731187380893176e-09,
"loss": 0.3157,
"step": 2234
},
{
"epoch": 2.97,
"learning_rate": 3.3487878592175504e-09,
"loss": 0.281,
"step": 2235
},
{
"epoch": 2.98,
"learning_rate": 2.987054094978348e-09,
"loss": 0.3058,
"step": 2236
},
{
"epoch": 2.98,
"learning_rate": 2.645986835977388e-09,
"loss": 0.2702,
"step": 2237
},
{
"epoch": 2.98,
"learning_rate": 2.3255867872928885e-09,
"loss": 0.2822,
"step": 2238
},
{
"epoch": 2.98,
"learning_rate": 2.0258546112761346e-09,
"loss": 0.3309,
"step": 2239
},
{
"epoch": 2.98,
"learning_rate": 1.746790927554809e-09,
"loss": 0.2629,
"step": 2240
},
{
"epoch": 2.98,
"learning_rate": 1.4883963130285506e-09,
"loss": 0.3183,
"step": 2241
},
{
"epoch": 2.98,
"learning_rate": 1.2506713018667349e-09,
"loss": 0.2686,
"step": 2242
},
{
"epoch": 2.98,
"learning_rate": 1.0336163855129143e-09,
"loss": 0.2866,
"step": 2243
},
{
"epoch": 2.99,
"learning_rate": 8.37232012675937e-10,
"loss": 0.2834,
"step": 2244
},
{
"epoch": 2.99,
"learning_rate": 6.615185893366072e-10,
"loss": 0.288,
"step": 2245
},
{
"epoch": 2.99,
"learning_rate": 5.064764787399145e-10,
"loss": 0.3076,
"step": 2246
},
{
"epoch": 2.99,
"learning_rate": 3.7210600140058504e-10,
"loss": 0.2942,
"step": 2247
},
{
"epoch": 2.99,
"learning_rate": 2.584074350986399e-10,
"loss": 0.3193,
"step": 2248
},
{
"epoch": 2.99,
"learning_rate": 1.6538101487828528e-10,
"loss": 0.2969,
"step": 2249
},
{
"epoch": 2.99,
"learning_rate": 9.302693305124345e-11,
"loss": 0.2726,
"step": 2250
},
{
"epoch": 3.0,
"learning_rate": 4.1345339191201095e-11,
"loss": 0.268,
"step": 2251
},
{
"epoch": 3.0,
"learning_rate": 1.03363401393608e-11,
"loss": 0.2617,
"step": 2252
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.2828,
"step": 2253
},
{
"epoch": 3.0,
"step": 2253,
"total_flos": 2.868554207550505e+18,
"train_loss": 0.4686343488603817,
"train_runtime": 27294.7496,
"train_samples_per_second": 10.571,
"train_steps_per_second": 0.083
}
],
"max_steps": 2253,
"num_train_epochs": 3,
"total_flos": 2.868554207550505e+18,
"trial_name": null,
"trial_params": null
}