internvl_buildingfacades / trainer_state.json
seshing's picture
Upload folder using huggingface_hub
345097f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.996437356858088,
"eval_steps": 500,
"global_step": 1413,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.8604651162790698e-07,
"loss": 2.0392,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 3.7209302325581396e-07,
"loss": 1.6846,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 5.581395348837209e-07,
"loss": 1.6097,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 7.441860465116279e-07,
"loss": 2.3086,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 9.302325581395349e-07,
"loss": 1.6364,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.1162790697674417e-06,
"loss": 1.7009,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 1.302325581395349e-06,
"loss": 2.0812,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 1.4883720930232558e-06,
"loss": 1.9181,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 1.6744186046511629e-06,
"loss": 1.3402,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 1.8604651162790697e-06,
"loss": 1.2286,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 2.0465116279069768e-06,
"loss": 1.1485,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 2.2325581395348834e-06,
"loss": 0.9198,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 2.4186046511627905e-06,
"loss": 0.8345,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 2.604651162790698e-06,
"loss": 1.0269,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 2.7906976744186046e-06,
"loss": 0.7902,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 2.9767441860465116e-06,
"loss": 0.7482,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 3.1627906976744183e-06,
"loss": 0.7846,
"step": 17
},
{
"epoch": 0.04,
"learning_rate": 3.3488372093023258e-06,
"loss": 0.735,
"step": 18
},
{
"epoch": 0.04,
"learning_rate": 3.5348837209302324e-06,
"loss": 0.6928,
"step": 19
},
{
"epoch": 0.04,
"learning_rate": 3.7209302325581394e-06,
"loss": 0.6898,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 3.906976744186046e-06,
"loss": 0.6526,
"step": 21
},
{
"epoch": 0.05,
"learning_rate": 4.0930232558139536e-06,
"loss": 0.6669,
"step": 22
},
{
"epoch": 0.05,
"learning_rate": 4.27906976744186e-06,
"loss": 0.6992,
"step": 23
},
{
"epoch": 0.05,
"learning_rate": 4.465116279069767e-06,
"loss": 0.7552,
"step": 24
},
{
"epoch": 0.05,
"learning_rate": 4.651162790697674e-06,
"loss": 0.7492,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 4.837209302325581e-06,
"loss": 0.6134,
"step": 26
},
{
"epoch": 0.06,
"learning_rate": 5.023255813953488e-06,
"loss": 0.6963,
"step": 27
},
{
"epoch": 0.06,
"learning_rate": 5.209302325581396e-06,
"loss": 0.5131,
"step": 28
},
{
"epoch": 0.06,
"learning_rate": 5.395348837209302e-06,
"loss": 0.6743,
"step": 29
},
{
"epoch": 0.06,
"learning_rate": 5.581395348837209e-06,
"loss": 0.6523,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 5.767441860465116e-06,
"loss": 0.5206,
"step": 31
},
{
"epoch": 0.07,
"learning_rate": 5.953488372093023e-06,
"loss": 0.6574,
"step": 32
},
{
"epoch": 0.07,
"learning_rate": 6.13953488372093e-06,
"loss": 0.6243,
"step": 33
},
{
"epoch": 0.07,
"learning_rate": 6.3255813953488365e-06,
"loss": 0.5946,
"step": 34
},
{
"epoch": 0.07,
"learning_rate": 6.511627906976744e-06,
"loss": 0.6132,
"step": 35
},
{
"epoch": 0.08,
"learning_rate": 6.6976744186046515e-06,
"loss": 0.5163,
"step": 36
},
{
"epoch": 0.08,
"learning_rate": 6.883720930232557e-06,
"loss": 0.6358,
"step": 37
},
{
"epoch": 0.08,
"learning_rate": 7.069767441860465e-06,
"loss": 0.6571,
"step": 38
},
{
"epoch": 0.08,
"learning_rate": 7.255813953488371e-06,
"loss": 0.5816,
"step": 39
},
{
"epoch": 0.08,
"learning_rate": 7.441860465116279e-06,
"loss": 0.5938,
"step": 40
},
{
"epoch": 0.09,
"learning_rate": 7.6279069767441855e-06,
"loss": 0.6899,
"step": 41
},
{
"epoch": 0.09,
"learning_rate": 7.813953488372092e-06,
"loss": 0.6033,
"step": 42
},
{
"epoch": 0.09,
"learning_rate": 8e-06,
"loss": 0.5794,
"step": 43
},
{
"epoch": 0.09,
"learning_rate": 7.999989483083726e-06,
"loss": 0.5525,
"step": 44
},
{
"epoch": 0.1,
"learning_rate": 7.999957932390211e-06,
"loss": 0.5775,
"step": 45
},
{
"epoch": 0.1,
"learning_rate": 7.99990534808536e-06,
"loss": 0.547,
"step": 46
},
{
"epoch": 0.1,
"learning_rate": 7.999831730445688e-06,
"loss": 0.5855,
"step": 47
},
{
"epoch": 0.1,
"learning_rate": 7.999737079858309e-06,
"loss": 0.6557,
"step": 48
},
{
"epoch": 0.1,
"learning_rate": 7.999621396820937e-06,
"loss": 0.6552,
"step": 49
},
{
"epoch": 0.11,
"learning_rate": 7.99948468194189e-06,
"loss": 0.6311,
"step": 50
},
{
"epoch": 0.11,
"learning_rate": 7.999326935940076e-06,
"loss": 0.5301,
"step": 51
},
{
"epoch": 0.11,
"learning_rate": 7.999148159644996e-06,
"loss": 0.557,
"step": 52
},
{
"epoch": 0.11,
"learning_rate": 7.998948353996736e-06,
"loss": 0.5448,
"step": 53
},
{
"epoch": 0.11,
"learning_rate": 7.998727520045967e-06,
"loss": 0.6503,
"step": 54
},
{
"epoch": 0.12,
"learning_rate": 7.998485658953935e-06,
"loss": 0.6246,
"step": 55
},
{
"epoch": 0.12,
"learning_rate": 7.998222771992458e-06,
"loss": 0.5755,
"step": 56
},
{
"epoch": 0.12,
"learning_rate": 7.997938860543913e-06,
"loss": 0.6239,
"step": 57
},
{
"epoch": 0.12,
"learning_rate": 7.997633926101238e-06,
"loss": 0.5797,
"step": 58
},
{
"epoch": 0.13,
"learning_rate": 7.997307970267918e-06,
"loss": 0.5273,
"step": 59
},
{
"epoch": 0.13,
"learning_rate": 7.996960994757979e-06,
"loss": 0.4711,
"step": 60
},
{
"epoch": 0.13,
"learning_rate": 7.996593001395974e-06,
"loss": 0.5911,
"step": 61
},
{
"epoch": 0.13,
"learning_rate": 7.996203992116987e-06,
"loss": 0.6178,
"step": 62
},
{
"epoch": 0.13,
"learning_rate": 7.9957939689666e-06,
"loss": 0.5103,
"step": 63
},
{
"epoch": 0.14,
"learning_rate": 7.995362934100906e-06,
"loss": 0.6371,
"step": 64
},
{
"epoch": 0.14,
"learning_rate": 7.994910889786486e-06,
"loss": 0.5945,
"step": 65
},
{
"epoch": 0.14,
"learning_rate": 7.99443783840039e-06,
"loss": 0.5246,
"step": 66
},
{
"epoch": 0.14,
"learning_rate": 7.993943782430143e-06,
"loss": 0.5202,
"step": 67
},
{
"epoch": 0.14,
"learning_rate": 7.993428724473718e-06,
"loss": 0.5669,
"step": 68
},
{
"epoch": 0.15,
"learning_rate": 7.992892667239526e-06,
"loss": 0.5739,
"step": 69
},
{
"epoch": 0.15,
"learning_rate": 7.992335613546396e-06,
"loss": 0.636,
"step": 70
},
{
"epoch": 0.15,
"learning_rate": 7.99175756632358e-06,
"loss": 0.5692,
"step": 71
},
{
"epoch": 0.15,
"learning_rate": 7.991158528610708e-06,
"loss": 0.6142,
"step": 72
},
{
"epoch": 0.15,
"learning_rate": 7.9905385035578e-06,
"loss": 0.5496,
"step": 73
},
{
"epoch": 0.16,
"learning_rate": 7.989897494425228e-06,
"loss": 0.5533,
"step": 74
},
{
"epoch": 0.16,
"learning_rate": 7.989235504583714e-06,
"loss": 0.5174,
"step": 75
},
{
"epoch": 0.16,
"learning_rate": 7.988552537514302e-06,
"loss": 0.5859,
"step": 76
},
{
"epoch": 0.16,
"learning_rate": 7.987848596808347e-06,
"loss": 0.56,
"step": 77
},
{
"epoch": 0.17,
"learning_rate": 7.98712368616749e-06,
"loss": 0.6045,
"step": 78
},
{
"epoch": 0.17,
"learning_rate": 7.986377809403648e-06,
"loss": 0.5238,
"step": 79
},
{
"epoch": 0.17,
"learning_rate": 7.985610970438977e-06,
"loss": 0.5774,
"step": 80
},
{
"epoch": 0.17,
"learning_rate": 7.984823173305872e-06,
"loss": 0.5689,
"step": 81
},
{
"epoch": 0.17,
"learning_rate": 7.984014422146926e-06,
"loss": 0.6355,
"step": 82
},
{
"epoch": 0.18,
"learning_rate": 7.98318472121493e-06,
"loss": 0.509,
"step": 83
},
{
"epoch": 0.18,
"learning_rate": 7.982334074872826e-06,
"loss": 0.5356,
"step": 84
},
{
"epoch": 0.18,
"learning_rate": 7.981462487593704e-06,
"loss": 0.5086,
"step": 85
},
{
"epoch": 0.18,
"learning_rate": 7.98056996396077e-06,
"loss": 0.5245,
"step": 86
},
{
"epoch": 0.18,
"learning_rate": 7.97965650866732e-06,
"loss": 0.4611,
"step": 87
},
{
"epoch": 0.19,
"learning_rate": 7.978722126516724e-06,
"loss": 0.5607,
"step": 88
},
{
"epoch": 0.19,
"learning_rate": 7.97776682242239e-06,
"loss": 0.5196,
"step": 89
},
{
"epoch": 0.19,
"learning_rate": 7.976790601407744e-06,
"loss": 0.5956,
"step": 90
},
{
"epoch": 0.19,
"learning_rate": 7.975793468606202e-06,
"loss": 0.5261,
"step": 91
},
{
"epoch": 0.2,
"learning_rate": 7.974775429261146e-06,
"loss": 0.4927,
"step": 92
},
{
"epoch": 0.2,
"learning_rate": 7.973736488725894e-06,
"loss": 0.6336,
"step": 93
},
{
"epoch": 0.2,
"learning_rate": 7.972676652463672e-06,
"loss": 0.4601,
"step": 94
},
{
"epoch": 0.2,
"learning_rate": 7.971595926047584e-06,
"loss": 0.518,
"step": 95
},
{
"epoch": 0.2,
"learning_rate": 7.970494315160581e-06,
"loss": 0.6252,
"step": 96
},
{
"epoch": 0.21,
"learning_rate": 7.969371825595446e-06,
"loss": 0.6422,
"step": 97
},
{
"epoch": 0.21,
"learning_rate": 7.968228463254734e-06,
"loss": 0.5604,
"step": 98
},
{
"epoch": 0.21,
"learning_rate": 7.967064234150774e-06,
"loss": 0.5668,
"step": 99
},
{
"epoch": 0.21,
"learning_rate": 7.965879144405614e-06,
"loss": 0.5038,
"step": 100
},
{
"epoch": 0.21,
"learning_rate": 7.964673200251e-06,
"loss": 0.5135,
"step": 101
},
{
"epoch": 0.22,
"learning_rate": 7.963446408028337e-06,
"loss": 0.5891,
"step": 102
},
{
"epoch": 0.22,
"learning_rate": 7.962198774188664e-06,
"loss": 0.5601,
"step": 103
},
{
"epoch": 0.22,
"learning_rate": 7.960930305292606e-06,
"loss": 0.5299,
"step": 104
},
{
"epoch": 0.22,
"learning_rate": 7.959641008010358e-06,
"loss": 0.5386,
"step": 105
},
{
"epoch": 0.22,
"learning_rate": 7.958330889121633e-06,
"loss": 0.4753,
"step": 106
},
{
"epoch": 0.23,
"learning_rate": 7.956999955515639e-06,
"loss": 0.5309,
"step": 107
},
{
"epoch": 0.23,
"learning_rate": 7.955648214191032e-06,
"loss": 0.4613,
"step": 108
},
{
"epoch": 0.23,
"learning_rate": 7.954275672255889e-06,
"loss": 0.5896,
"step": 109
},
{
"epoch": 0.23,
"learning_rate": 7.952882336927664e-06,
"loss": 0.5575,
"step": 110
},
{
"epoch": 0.24,
"learning_rate": 7.951468215533152e-06,
"loss": 0.5446,
"step": 111
},
{
"epoch": 0.24,
"learning_rate": 7.950033315508452e-06,
"loss": 0.5002,
"step": 112
},
{
"epoch": 0.24,
"learning_rate": 7.948577644398923e-06,
"loss": 0.5286,
"step": 113
},
{
"epoch": 0.24,
"learning_rate": 7.947101209859155e-06,
"loss": 0.5512,
"step": 114
},
{
"epoch": 0.24,
"learning_rate": 7.945604019652914e-06,
"loss": 0.4671,
"step": 115
},
{
"epoch": 0.25,
"learning_rate": 7.944086081653113e-06,
"loss": 0.4763,
"step": 116
},
{
"epoch": 0.25,
"learning_rate": 7.942547403841764e-06,
"loss": 0.4876,
"step": 117
},
{
"epoch": 0.25,
"learning_rate": 7.940987994309944e-06,
"loss": 0.5406,
"step": 118
},
{
"epoch": 0.25,
"learning_rate": 7.939407861257737e-06,
"loss": 0.5201,
"step": 119
},
{
"epoch": 0.25,
"learning_rate": 7.937807012994212e-06,
"loss": 0.5437,
"step": 120
},
{
"epoch": 0.26,
"learning_rate": 7.936185457937358e-06,
"loss": 0.5829,
"step": 121
},
{
"epoch": 0.26,
"learning_rate": 7.934543204614057e-06,
"loss": 0.5079,
"step": 122
},
{
"epoch": 0.26,
"learning_rate": 7.93288026166003e-06,
"loss": 0.5478,
"step": 123
},
{
"epoch": 0.26,
"learning_rate": 7.931196637819789e-06,
"loss": 0.4746,
"step": 124
},
{
"epoch": 0.27,
"learning_rate": 7.929492341946603e-06,
"loss": 0.5347,
"step": 125
},
{
"epoch": 0.27,
"learning_rate": 7.92776738300244e-06,
"loss": 0.4957,
"step": 126
},
{
"epoch": 0.27,
"learning_rate": 7.926021770057926e-06,
"loss": 0.5816,
"step": 127
},
{
"epoch": 0.27,
"learning_rate": 7.92425551229229e-06,
"loss": 0.4931,
"step": 128
},
{
"epoch": 0.27,
"learning_rate": 7.922468618993326e-06,
"loss": 0.491,
"step": 129
},
{
"epoch": 0.28,
"learning_rate": 7.920661099557337e-06,
"loss": 0.5166,
"step": 130
},
{
"epoch": 0.28,
"learning_rate": 7.91883296348909e-06,
"loss": 0.5181,
"step": 131
},
{
"epoch": 0.28,
"learning_rate": 7.916984220401761e-06,
"loss": 0.5518,
"step": 132
},
{
"epoch": 0.28,
"learning_rate": 7.915114880016888e-06,
"loss": 0.5376,
"step": 133
},
{
"epoch": 0.28,
"learning_rate": 7.91322495216432e-06,
"loss": 0.4974,
"step": 134
},
{
"epoch": 0.29,
"learning_rate": 7.911314446782161e-06,
"loss": 0.5021,
"step": 135
},
{
"epoch": 0.29,
"learning_rate": 7.909383373916727e-06,
"loss": 0.5164,
"step": 136
},
{
"epoch": 0.29,
"learning_rate": 7.907431743722481e-06,
"loss": 0.5514,
"step": 137
},
{
"epoch": 0.29,
"learning_rate": 7.905459566461991e-06,
"loss": 0.6097,
"step": 138
},
{
"epoch": 0.29,
"learning_rate": 7.903466852505868e-06,
"loss": 0.6404,
"step": 139
},
{
"epoch": 0.3,
"learning_rate": 7.901453612332712e-06,
"loss": 0.5212,
"step": 140
},
{
"epoch": 0.3,
"learning_rate": 7.899419856529065e-06,
"loss": 0.4706,
"step": 141
},
{
"epoch": 0.3,
"learning_rate": 7.897365595789349e-06,
"loss": 0.5085,
"step": 142
},
{
"epoch": 0.3,
"learning_rate": 7.895290840915802e-06,
"loss": 0.4753,
"step": 143
},
{
"epoch": 0.31,
"learning_rate": 7.893195602818441e-06,
"loss": 0.452,
"step": 144
},
{
"epoch": 0.31,
"learning_rate": 7.891079892514985e-06,
"loss": 0.5328,
"step": 145
},
{
"epoch": 0.31,
"learning_rate": 7.88894372113081e-06,
"loss": 0.4966,
"step": 146
},
{
"epoch": 0.31,
"learning_rate": 7.886787099898881e-06,
"loss": 0.499,
"step": 147
},
{
"epoch": 0.31,
"learning_rate": 7.884610040159704e-06,
"loss": 0.5076,
"step": 148
},
{
"epoch": 0.32,
"learning_rate": 7.882412553361251e-06,
"loss": 0.4399,
"step": 149
},
{
"epoch": 0.32,
"learning_rate": 7.880194651058922e-06,
"loss": 0.5146,
"step": 150
},
{
"epoch": 0.32,
"learning_rate": 7.877956344915458e-06,
"loss": 0.5713,
"step": 151
},
{
"epoch": 0.32,
"learning_rate": 7.875697646700899e-06,
"loss": 0.5859,
"step": 152
},
{
"epoch": 0.32,
"learning_rate": 7.873418568292516e-06,
"loss": 0.47,
"step": 153
},
{
"epoch": 0.33,
"learning_rate": 7.871119121674747e-06,
"loss": 0.5046,
"step": 154
},
{
"epoch": 0.33,
"learning_rate": 7.868799318939135e-06,
"loss": 0.4822,
"step": 155
},
{
"epoch": 0.33,
"learning_rate": 7.866459172284266e-06,
"loss": 0.5664,
"step": 156
},
{
"epoch": 0.33,
"learning_rate": 7.864098694015706e-06,
"loss": 0.4816,
"step": 157
},
{
"epoch": 0.34,
"learning_rate": 7.861717896545927e-06,
"loss": 0.5024,
"step": 158
},
{
"epoch": 0.34,
"learning_rate": 7.859316792394255e-06,
"loss": 0.5695,
"step": 159
},
{
"epoch": 0.34,
"learning_rate": 7.856895394186795e-06,
"loss": 0.5718,
"step": 160
},
{
"epoch": 0.34,
"learning_rate": 7.854453714656367e-06,
"loss": 0.4732,
"step": 161
},
{
"epoch": 0.34,
"learning_rate": 7.851991766642444e-06,
"loss": 0.5216,
"step": 162
},
{
"epoch": 0.35,
"learning_rate": 7.849509563091072e-06,
"loss": 0.476,
"step": 163
},
{
"epoch": 0.35,
"learning_rate": 7.847007117054819e-06,
"loss": 0.5058,
"step": 164
},
{
"epoch": 0.35,
"learning_rate": 7.844484441692688e-06,
"loss": 0.5806,
"step": 165
},
{
"epoch": 0.35,
"learning_rate": 7.841941550270068e-06,
"loss": 0.4735,
"step": 166
},
{
"epoch": 0.35,
"learning_rate": 7.839378456158641e-06,
"loss": 0.5271,
"step": 167
},
{
"epoch": 0.36,
"learning_rate": 7.836795172836333e-06,
"loss": 0.4618,
"step": 168
},
{
"epoch": 0.36,
"learning_rate": 7.83419171388723e-06,
"loss": 0.5463,
"step": 169
},
{
"epoch": 0.36,
"learning_rate": 7.831568093001514e-06,
"loss": 0.5609,
"step": 170
},
{
"epoch": 0.36,
"learning_rate": 7.828924323975384e-06,
"loss": 0.4121,
"step": 171
},
{
"epoch": 0.36,
"learning_rate": 7.82626042071099e-06,
"loss": 0.5599,
"step": 172
},
{
"epoch": 0.37,
"learning_rate": 7.823576397216353e-06,
"loss": 0.5507,
"step": 173
},
{
"epoch": 0.37,
"learning_rate": 7.8208722676053e-06,
"loss": 0.5429,
"step": 174
},
{
"epoch": 0.37,
"learning_rate": 7.818148046097385e-06,
"loss": 0.4892,
"step": 175
},
{
"epoch": 0.37,
"learning_rate": 7.815403747017809e-06,
"loss": 0.5723,
"step": 176
},
{
"epoch": 0.38,
"learning_rate": 7.812639384797356e-06,
"loss": 0.497,
"step": 177
},
{
"epoch": 0.38,
"learning_rate": 7.80985497397231e-06,
"loss": 0.4353,
"step": 178
},
{
"epoch": 0.38,
"learning_rate": 7.807050529184378e-06,
"loss": 0.5138,
"step": 179
},
{
"epoch": 0.38,
"learning_rate": 7.804226065180613e-06,
"loss": 0.6011,
"step": 180
},
{
"epoch": 0.38,
"learning_rate": 7.801381596813346e-06,
"loss": 0.4651,
"step": 181
},
{
"epoch": 0.39,
"learning_rate": 7.79851713904009e-06,
"loss": 0.3993,
"step": 182
},
{
"epoch": 0.39,
"learning_rate": 7.795632706923479e-06,
"loss": 0.4822,
"step": 183
},
{
"epoch": 0.39,
"learning_rate": 7.792728315631177e-06,
"loss": 0.5465,
"step": 184
},
{
"epoch": 0.39,
"learning_rate": 7.789803980435803e-06,
"loss": 0.4937,
"step": 185
},
{
"epoch": 0.39,
"learning_rate": 7.786859716714854e-06,
"loss": 0.5817,
"step": 186
},
{
"epoch": 0.4,
"learning_rate": 7.783895539950617e-06,
"loss": 0.5089,
"step": 187
},
{
"epoch": 0.4,
"learning_rate": 7.78091146573009e-06,
"loss": 0.5045,
"step": 188
},
{
"epoch": 0.4,
"learning_rate": 7.777907509744903e-06,
"loss": 0.6175,
"step": 189
},
{
"epoch": 0.4,
"learning_rate": 7.774883687791232e-06,
"loss": 0.5375,
"step": 190
},
{
"epoch": 0.41,
"learning_rate": 7.77184001576972e-06,
"loss": 0.4996,
"step": 191
},
{
"epoch": 0.41,
"learning_rate": 7.76877650968539e-06,
"loss": 0.4786,
"step": 192
},
{
"epoch": 0.41,
"learning_rate": 7.765693185647557e-06,
"loss": 0.5974,
"step": 193
},
{
"epoch": 0.41,
"learning_rate": 7.762590059869751e-06,
"loss": 0.4305,
"step": 194
},
{
"epoch": 0.41,
"learning_rate": 7.759467148669631e-06,
"loss": 0.4898,
"step": 195
},
{
"epoch": 0.42,
"learning_rate": 7.756324468468896e-06,
"loss": 0.4873,
"step": 196
},
{
"epoch": 0.42,
"learning_rate": 7.753162035793197e-06,
"loss": 0.4564,
"step": 197
},
{
"epoch": 0.42,
"learning_rate": 7.749979867272054e-06,
"loss": 0.519,
"step": 198
},
{
"epoch": 0.42,
"learning_rate": 7.746777979638766e-06,
"loss": 0.4762,
"step": 199
},
{
"epoch": 0.42,
"learning_rate": 7.743556389730326e-06,
"loss": 0.4299,
"step": 200
},
{
"epoch": 0.43,
"learning_rate": 7.740315114487328e-06,
"loss": 0.4618,
"step": 201
},
{
"epoch": 0.43,
"learning_rate": 7.737054170953887e-06,
"loss": 0.5635,
"step": 202
},
{
"epoch": 0.43,
"learning_rate": 7.73377357627753e-06,
"loss": 0.5106,
"step": 203
},
{
"epoch": 0.43,
"learning_rate": 7.730473347709133e-06,
"loss": 0.488,
"step": 204
},
{
"epoch": 0.43,
"learning_rate": 7.727153502602811e-06,
"loss": 0.5428,
"step": 205
},
{
"epoch": 0.44,
"learning_rate": 7.723814058415826e-06,
"loss": 0.5067,
"step": 206
},
{
"epoch": 0.44,
"learning_rate": 7.720455032708504e-06,
"loss": 0.4982,
"step": 207
},
{
"epoch": 0.44,
"learning_rate": 7.717076443144148e-06,
"loss": 0.491,
"step": 208
},
{
"epoch": 0.44,
"learning_rate": 7.713678307488927e-06,
"loss": 0.3632,
"step": 209
},
{
"epoch": 0.45,
"learning_rate": 7.710260643611792e-06,
"loss": 0.4876,
"step": 210
},
{
"epoch": 0.45,
"learning_rate": 7.706823469484386e-06,
"loss": 0.494,
"step": 211
},
{
"epoch": 0.45,
"learning_rate": 7.703366803180947e-06,
"loss": 0.4321,
"step": 212
},
{
"epoch": 0.45,
"learning_rate": 7.699890662878213e-06,
"loss": 0.4424,
"step": 213
},
{
"epoch": 0.45,
"learning_rate": 7.696395066855317e-06,
"loss": 0.506,
"step": 214
},
{
"epoch": 0.46,
"learning_rate": 7.692880033493706e-06,
"loss": 0.5065,
"step": 215
},
{
"epoch": 0.46,
"learning_rate": 7.689345581277037e-06,
"loss": 0.4266,
"step": 216
},
{
"epoch": 0.46,
"learning_rate": 7.685791728791079e-06,
"loss": 0.5454,
"step": 217
},
{
"epoch": 0.46,
"learning_rate": 7.682218494723613e-06,
"loss": 0.5058,
"step": 218
},
{
"epoch": 0.46,
"learning_rate": 7.678625897864346e-06,
"loss": 0.4914,
"step": 219
},
{
"epoch": 0.47,
"learning_rate": 7.675013957104795e-06,
"loss": 0.4479,
"step": 220
},
{
"epoch": 0.47,
"learning_rate": 7.671382691438199e-06,
"loss": 0.4883,
"step": 221
},
{
"epoch": 0.47,
"learning_rate": 7.667732119959419e-06,
"loss": 0.4656,
"step": 222
},
{
"epoch": 0.47,
"learning_rate": 7.66406226186483e-06,
"loss": 0.5097,
"step": 223
},
{
"epoch": 0.48,
"learning_rate": 7.660373136452227e-06,
"loss": 0.5149,
"step": 224
},
{
"epoch": 0.48,
"learning_rate": 7.656664763120723e-06,
"loss": 0.4258,
"step": 225
},
{
"epoch": 0.48,
"learning_rate": 7.652937161370643e-06,
"loss": 0.476,
"step": 226
},
{
"epoch": 0.48,
"learning_rate": 7.649190350803425e-06,
"loss": 0.4781,
"step": 227
},
{
"epoch": 0.48,
"learning_rate": 7.645424351121518e-06,
"loss": 0.4783,
"step": 228
},
{
"epoch": 0.49,
"learning_rate": 7.641639182128269e-06,
"loss": 0.4735,
"step": 229
},
{
"epoch": 0.49,
"learning_rate": 7.637834863727832e-06,
"loss": 0.4702,
"step": 230
},
{
"epoch": 0.49,
"learning_rate": 7.634011415925058e-06,
"loss": 0.4501,
"step": 231
},
{
"epoch": 0.49,
"learning_rate": 7.630168858825388e-06,
"loss": 0.4906,
"step": 232
},
{
"epoch": 0.49,
"learning_rate": 7.626307212634744e-06,
"loss": 0.5146,
"step": 233
},
{
"epoch": 0.5,
"learning_rate": 7.622426497659433e-06,
"loss": 0.3979,
"step": 234
},
{
"epoch": 0.5,
"learning_rate": 7.618526734306033e-06,
"loss": 0.439,
"step": 235
},
{
"epoch": 0.5,
"learning_rate": 7.6146079430812834e-06,
"loss": 0.424,
"step": 236
},
{
"epoch": 0.5,
"learning_rate": 7.610670144591987e-06,
"loss": 0.5036,
"step": 237
},
{
"epoch": 0.5,
"learning_rate": 7.606713359544891e-06,
"loss": 0.4838,
"step": 238
},
{
"epoch": 0.51,
"learning_rate": 7.602737608746583e-06,
"loss": 0.5796,
"step": 239
},
{
"epoch": 0.51,
"learning_rate": 7.5987429131033845e-06,
"loss": 0.5559,
"step": 240
},
{
"epoch": 0.51,
"learning_rate": 7.594729293621232e-06,
"loss": 0.5669,
"step": 241
},
{
"epoch": 0.51,
"learning_rate": 7.590696771405578e-06,
"loss": 0.4606,
"step": 242
},
{
"epoch": 0.52,
"learning_rate": 7.586645367661271e-06,
"loss": 0.575,
"step": 243
},
{
"epoch": 0.52,
"learning_rate": 7.5825751036924495e-06,
"loss": 0.4341,
"step": 244
},
{
"epoch": 0.52,
"learning_rate": 7.578486000902423e-06,
"loss": 0.576,
"step": 245
},
{
"epoch": 0.52,
"learning_rate": 7.57437808079357e-06,
"loss": 0.5135,
"step": 246
},
{
"epoch": 0.52,
"learning_rate": 7.570251364967216e-06,
"loss": 0.4206,
"step": 247
},
{
"epoch": 0.53,
"learning_rate": 7.566105875123522e-06,
"loss": 0.5472,
"step": 248
},
{
"epoch": 0.53,
"learning_rate": 7.561941633061374e-06,
"loss": 0.4756,
"step": 249
},
{
"epoch": 0.53,
"learning_rate": 7.5577586606782644e-06,
"loss": 0.4792,
"step": 250
},
{
"epoch": 0.53,
"learning_rate": 7.553556979970178e-06,
"loss": 0.5208,
"step": 251
},
{
"epoch": 0.53,
"learning_rate": 7.549336613031478e-06,
"loss": 0.4277,
"step": 252
},
{
"epoch": 0.54,
"learning_rate": 7.545097582054785e-06,
"loss": 0.4843,
"step": 253
},
{
"epoch": 0.54,
"learning_rate": 7.540839909330868e-06,
"loss": 0.4407,
"step": 254
},
{
"epoch": 0.54,
"learning_rate": 7.536563617248522e-06,
"loss": 0.5072,
"step": 255
},
{
"epoch": 0.54,
"learning_rate": 7.532268728294445e-06,
"loss": 0.4303,
"step": 256
},
{
"epoch": 0.54,
"learning_rate": 7.527955265053135e-06,
"loss": 0.4282,
"step": 257
},
{
"epoch": 0.55,
"learning_rate": 7.523623250206757e-06,
"loss": 0.3934,
"step": 258
},
{
"epoch": 0.55,
"learning_rate": 7.51927270653503e-06,
"loss": 0.3748,
"step": 259
},
{
"epoch": 0.55,
"learning_rate": 7.5149036569151034e-06,
"loss": 0.4103,
"step": 260
},
{
"epoch": 0.55,
"learning_rate": 7.510516124321446e-06,
"loss": 0.5058,
"step": 261
},
{
"epoch": 0.56,
"learning_rate": 7.506110131825709e-06,
"loss": 0.4894,
"step": 262
},
{
"epoch": 0.56,
"learning_rate": 7.501685702596624e-06,
"loss": 0.5382,
"step": 263
},
{
"epoch": 0.56,
"learning_rate": 7.497242859899865e-06,
"loss": 0.4708,
"step": 264
},
{
"epoch": 0.56,
"learning_rate": 7.492781627097933e-06,
"loss": 0.5604,
"step": 265
},
{
"epoch": 0.56,
"learning_rate": 7.4883020276500374e-06,
"loss": 0.4747,
"step": 266
},
{
"epoch": 0.57,
"learning_rate": 7.4838040851119605e-06,
"loss": 0.4831,
"step": 267
},
{
"epoch": 0.57,
"learning_rate": 7.479287823135949e-06,
"loss": 0.4334,
"step": 268
},
{
"epoch": 0.57,
"learning_rate": 7.474753265470573e-06,
"loss": 0.4775,
"step": 269
},
{
"epoch": 0.57,
"learning_rate": 7.470200435960617e-06,
"loss": 0.4522,
"step": 270
},
{
"epoch": 0.57,
"learning_rate": 7.465629358546944e-06,
"loss": 0.5341,
"step": 271
},
{
"epoch": 0.58,
"learning_rate": 7.461040057266372e-06,
"loss": 0.4744,
"step": 272
},
{
"epoch": 0.58,
"learning_rate": 7.45643255625155e-06,
"loss": 0.5266,
"step": 273
},
{
"epoch": 0.58,
"learning_rate": 7.4518068797308315e-06,
"loss": 0.4811,
"step": 274
},
{
"epoch": 0.58,
"learning_rate": 7.44716305202814e-06,
"loss": 0.3954,
"step": 275
},
{
"epoch": 0.59,
"learning_rate": 7.4425010975628495e-06,
"loss": 0.4264,
"step": 276
},
{
"epoch": 0.59,
"learning_rate": 7.437821040849656e-06,
"loss": 0.4892,
"step": 277
},
{
"epoch": 0.59,
"learning_rate": 7.433122906498436e-06,
"loss": 0.4538,
"step": 278
},
{
"epoch": 0.59,
"learning_rate": 7.428406719214136e-06,
"loss": 0.4018,
"step": 279
},
{
"epoch": 0.59,
"learning_rate": 7.423672503796628e-06,
"loss": 0.4905,
"step": 280
},
{
"epoch": 0.6,
"learning_rate": 7.418920285140586e-06,
"loss": 0.4158,
"step": 281
},
{
"epoch": 0.6,
"learning_rate": 7.414150088235354e-06,
"loss": 0.4585,
"step": 282
},
{
"epoch": 0.6,
"learning_rate": 7.40936193816481e-06,
"loss": 0.3843,
"step": 283
},
{
"epoch": 0.6,
"learning_rate": 7.404555860107244e-06,
"loss": 0.4019,
"step": 284
},
{
"epoch": 0.6,
"learning_rate": 7.399731879335213e-06,
"loss": 0.4606,
"step": 285
},
{
"epoch": 0.61,
"learning_rate": 7.394890021215422e-06,
"loss": 0.4871,
"step": 286
},
{
"epoch": 0.61,
"learning_rate": 7.390030311208574e-06,
"loss": 0.4384,
"step": 287
},
{
"epoch": 0.61,
"learning_rate": 7.385152774869256e-06,
"loss": 0.3412,
"step": 288
},
{
"epoch": 0.61,
"learning_rate": 7.380257437845784e-06,
"loss": 0.4972,
"step": 289
},
{
"epoch": 0.61,
"learning_rate": 7.375344325880087e-06,
"loss": 0.5358,
"step": 290
},
{
"epoch": 0.62,
"learning_rate": 7.370413464807555e-06,
"loss": 0.4793,
"step": 291
},
{
"epoch": 0.62,
"learning_rate": 7.365464880556916e-06,
"loss": 0.4448,
"step": 292
},
{
"epoch": 0.62,
"learning_rate": 7.360498599150094e-06,
"loss": 0.5746,
"step": 293
},
{
"epoch": 0.62,
"learning_rate": 7.35551464670207e-06,
"loss": 0.469,
"step": 294
},
{
"epoch": 0.63,
"learning_rate": 7.350513049420751e-06,
"loss": 0.4803,
"step": 295
},
{
"epoch": 0.63,
"learning_rate": 7.345493833606826e-06,
"loss": 0.3804,
"step": 296
},
{
"epoch": 0.63,
"learning_rate": 7.3404570256536305e-06,
"loss": 0.4617,
"step": 297
},
{
"epoch": 0.63,
"learning_rate": 7.33540265204701e-06,
"loss": 0.4373,
"step": 298
},
{
"epoch": 0.63,
"learning_rate": 7.330330739365177e-06,
"loss": 0.4687,
"step": 299
},
{
"epoch": 0.64,
"learning_rate": 7.325241314278569e-06,
"loss": 0.4681,
"step": 300
},
{
"epoch": 0.64,
"learning_rate": 7.320134403549718e-06,
"loss": 0.4614,
"step": 301
},
{
"epoch": 0.64,
"learning_rate": 7.315010034033097e-06,
"loss": 0.4657,
"step": 302
},
{
"epoch": 0.64,
"learning_rate": 7.309868232674991e-06,
"loss": 0.4189,
"step": 303
},
{
"epoch": 0.64,
"learning_rate": 7.304709026513347e-06,
"loss": 0.4187,
"step": 304
},
{
"epoch": 0.65,
"learning_rate": 7.299532442677634e-06,
"loss": 0.4242,
"step": 305
},
{
"epoch": 0.65,
"learning_rate": 7.294338508388701e-06,
"loss": 0.524,
"step": 306
},
{
"epoch": 0.65,
"learning_rate": 7.289127250958635e-06,
"loss": 0.4778,
"step": 307
},
{
"epoch": 0.65,
"learning_rate": 7.283898697790614e-06,
"loss": 0.5113,
"step": 308
},
{
"epoch": 0.66,
"learning_rate": 7.278652876378768e-06,
"loss": 0.4564,
"step": 309
},
{
"epoch": 0.66,
"learning_rate": 7.273389814308027e-06,
"loss": 0.4455,
"step": 310
},
{
"epoch": 0.66,
"learning_rate": 7.268109539253983e-06,
"loss": 0.493,
"step": 311
},
{
"epoch": 0.66,
"learning_rate": 7.262812078982742e-06,
"loss": 0.4508,
"step": 312
},
{
"epoch": 0.66,
"learning_rate": 7.257497461350777e-06,
"loss": 0.4226,
"step": 313
},
{
"epoch": 0.67,
"learning_rate": 7.252165714304782e-06,
"loss": 0.4664,
"step": 314
},
{
"epoch": 0.67,
"learning_rate": 7.246816865881525e-06,
"loss": 0.4826,
"step": 315
},
{
"epoch": 0.67,
"learning_rate": 7.241450944207703e-06,
"loss": 0.4462,
"step": 316
},
{
"epoch": 0.67,
"learning_rate": 7.2360679774997895e-06,
"loss": 0.3952,
"step": 317
},
{
"epoch": 0.67,
"learning_rate": 7.23066799406389e-06,
"loss": 0.4112,
"step": 318
},
{
"epoch": 0.68,
"learning_rate": 7.225251022295591e-06,
"loss": 0.5491,
"step": 319
},
{
"epoch": 0.68,
"learning_rate": 7.219817090679812e-06,
"loss": 0.5139,
"step": 320
},
{
"epoch": 0.68,
"learning_rate": 7.214366227790655e-06,
"loss": 0.4228,
"step": 321
},
{
"epoch": 0.68,
"learning_rate": 7.208898462291253e-06,
"loss": 0.4306,
"step": 322
},
{
"epoch": 0.68,
"learning_rate": 7.203413822933625e-06,
"loss": 0.5346,
"step": 323
},
{
"epoch": 0.69,
"learning_rate": 7.197912338558515e-06,
"loss": 0.5154,
"step": 324
},
{
"epoch": 0.69,
"learning_rate": 7.1923940380952486e-06,
"loss": 0.4478,
"step": 325
},
{
"epoch": 0.69,
"learning_rate": 7.186858950561579e-06,
"loss": 0.5137,
"step": 326
},
{
"epoch": 0.69,
"learning_rate": 7.181307105063529e-06,
"loss": 0.4471,
"step": 327
},
{
"epoch": 0.7,
"learning_rate": 7.17573853079525e-06,
"loss": 0.5168,
"step": 328
},
{
"epoch": 0.7,
"learning_rate": 7.170153257038854e-06,
"loss": 0.4464,
"step": 329
},
{
"epoch": 0.7,
"learning_rate": 7.16455131316427e-06,
"loss": 0.4612,
"step": 330
},
{
"epoch": 0.7,
"learning_rate": 7.158932728629086e-06,
"loss": 0.4236,
"step": 331
},
{
"epoch": 0.7,
"learning_rate": 7.153297532978392e-06,
"loss": 0.3832,
"step": 332
},
{
"epoch": 0.71,
"learning_rate": 7.147645755844629e-06,
"loss": 0.4487,
"step": 333
},
{
"epoch": 0.71,
"learning_rate": 7.1419774269474305e-06,
"loss": 0.4025,
"step": 334
},
{
"epoch": 0.71,
"learning_rate": 7.136292576093468e-06,
"loss": 0.5025,
"step": 335
},
{
"epoch": 0.71,
"learning_rate": 7.13059123317629e-06,
"loss": 0.5122,
"step": 336
},
{
"epoch": 0.71,
"learning_rate": 7.124873428176171e-06,
"loss": 0.4852,
"step": 337
},
{
"epoch": 0.72,
"learning_rate": 7.119139191159948e-06,
"loss": 0.4314,
"step": 338
},
{
"epoch": 0.72,
"learning_rate": 7.113388552280866e-06,
"loss": 0.4837,
"step": 339
},
{
"epoch": 0.72,
"learning_rate": 7.107621541778421e-06,
"loss": 0.4836,
"step": 340
},
{
"epoch": 0.72,
"learning_rate": 7.101838189978193e-06,
"loss": 0.4527,
"step": 341
},
{
"epoch": 0.73,
"learning_rate": 7.096038527291699e-06,
"loss": 0.3796,
"step": 342
},
{
"epoch": 0.73,
"learning_rate": 7.09022258421622e-06,
"loss": 0.4869,
"step": 343
},
{
"epoch": 0.73,
"learning_rate": 7.084390391334649e-06,
"loss": 0.4134,
"step": 344
},
{
"epoch": 0.73,
"learning_rate": 7.07854197931533e-06,
"loss": 0.3975,
"step": 345
},
{
"epoch": 0.73,
"learning_rate": 7.07267737891189e-06,
"loss": 0.4144,
"step": 346
},
{
"epoch": 0.74,
"learning_rate": 7.066796620963089e-06,
"loss": 0.4882,
"step": 347
},
{
"epoch": 0.74,
"learning_rate": 7.060899736392642e-06,
"loss": 0.3924,
"step": 348
},
{
"epoch": 0.74,
"learning_rate": 7.0549867562090715e-06,
"loss": 0.4532,
"step": 349
},
{
"epoch": 0.74,
"learning_rate": 7.049057711505536e-06,
"loss": 0.4795,
"step": 350
},
{
"epoch": 0.74,
"learning_rate": 7.043112633459671e-06,
"loss": 0.4602,
"step": 351
},
{
"epoch": 0.75,
"learning_rate": 7.037151553333417e-06,
"loss": 0.3884,
"step": 352
},
{
"epoch": 0.75,
"learning_rate": 7.031174502472867e-06,
"loss": 0.384,
"step": 353
},
{
"epoch": 0.75,
"learning_rate": 7.02518151230809e-06,
"loss": 0.5014,
"step": 354
},
{
"epoch": 0.75,
"learning_rate": 7.019172614352977e-06,
"loss": 0.3907,
"step": 355
},
{
"epoch": 0.75,
"learning_rate": 7.013147840205065e-06,
"loss": 0.4334,
"step": 356
},
{
"epoch": 0.76,
"learning_rate": 7.007107221545375e-06,
"loss": 0.4261,
"step": 357
},
{
"epoch": 0.76,
"learning_rate": 7.00105079013825e-06,
"loss": 0.4448,
"step": 358
},
{
"epoch": 0.76,
"learning_rate": 6.99497857783118e-06,
"loss": 0.4571,
"step": 359
},
{
"epoch": 0.76,
"learning_rate": 6.98889061655464e-06,
"loss": 0.397,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 6.982786938321917e-06,
"loss": 0.5282,
"step": 361
},
{
"epoch": 0.77,
"learning_rate": 6.976667575228949e-06,
"loss": 0.4177,
"step": 362
},
{
"epoch": 0.77,
"learning_rate": 6.970532559454152e-06,
"loss": 0.4443,
"step": 363
},
{
"epoch": 0.77,
"learning_rate": 6.964381923258248e-06,
"loss": 0.3849,
"step": 364
},
{
"epoch": 0.77,
"learning_rate": 6.9582156989840995e-06,
"loss": 0.4263,
"step": 365
},
{
"epoch": 0.78,
"learning_rate": 6.95203391905654e-06,
"loss": 0.4899,
"step": 366
},
{
"epoch": 0.78,
"learning_rate": 6.9458366159822e-06,
"loss": 0.4577,
"step": 367
},
{
"epoch": 0.78,
"learning_rate": 6.939623822349337e-06,
"loss": 0.4412,
"step": 368
},
{
"epoch": 0.78,
"learning_rate": 6.93339557082767e-06,
"loss": 0.4228,
"step": 369
},
{
"epoch": 0.78,
"learning_rate": 6.927151894168194e-06,
"loss": 0.403,
"step": 370
},
{
"epoch": 0.79,
"learning_rate": 6.920892825203026e-06,
"loss": 0.4028,
"step": 371
},
{
"epoch": 0.79,
"learning_rate": 6.9146183968452144e-06,
"loss": 0.4651,
"step": 372
},
{
"epoch": 0.79,
"learning_rate": 6.908328642088578e-06,
"loss": 0.4636,
"step": 373
},
{
"epoch": 0.79,
"learning_rate": 6.902023594007533e-06,
"loss": 0.4928,
"step": 374
},
{
"epoch": 0.8,
"learning_rate": 6.895703285756907e-06,
"loss": 0.3784,
"step": 375
},
{
"epoch": 0.8,
"learning_rate": 6.889367750571777e-06,
"loss": 0.453,
"step": 376
},
{
"epoch": 0.8,
"learning_rate": 6.8830170217672905e-06,
"loss": 0.4229,
"step": 377
},
{
"epoch": 0.8,
"learning_rate": 6.876651132738489e-06,
"loss": 0.4435,
"step": 378
},
{
"epoch": 0.8,
"learning_rate": 6.870270116960132e-06,
"loss": 0.4433,
"step": 379
},
{
"epoch": 0.81,
"learning_rate": 6.863874007986526e-06,
"loss": 0.4006,
"step": 380
},
{
"epoch": 0.81,
"learning_rate": 6.85746283945134e-06,
"loss": 0.4228,
"step": 381
},
{
"epoch": 0.81,
"learning_rate": 6.851036645067437e-06,
"loss": 0.4261,
"step": 382
},
{
"epoch": 0.81,
"learning_rate": 6.84459545862669e-06,
"loss": 0.4391,
"step": 383
},
{
"epoch": 0.81,
"learning_rate": 6.83813931399981e-06,
"loss": 0.4812,
"step": 384
},
{
"epoch": 0.82,
"learning_rate": 6.8316682451361615e-06,
"loss": 0.3479,
"step": 385
},
{
"epoch": 0.82,
"learning_rate": 6.82518228606359e-06,
"loss": 0.5266,
"step": 386
},
{
"epoch": 0.82,
"learning_rate": 6.818681470888239e-06,
"loss": 0.4013,
"step": 387
},
{
"epoch": 0.82,
"learning_rate": 6.8121658337943736e-06,
"loss": 0.4608,
"step": 388
},
{
"epoch": 0.82,
"learning_rate": 6.8056354090442e-06,
"loss": 0.439,
"step": 389
},
{
"epoch": 0.83,
"learning_rate": 6.79909023097768e-06,
"loss": 0.4746,
"step": 390
},
{
"epoch": 0.83,
"learning_rate": 6.7925303340123615e-06,
"loss": 0.4865,
"step": 391
},
{
"epoch": 0.83,
"learning_rate": 6.785955752643187e-06,
"loss": 0.4544,
"step": 392
},
{
"epoch": 0.83,
"learning_rate": 6.779366521442317e-06,
"loss": 0.492,
"step": 393
},
{
"epoch": 0.84,
"learning_rate": 6.772762675058949e-06,
"loss": 0.4232,
"step": 394
},
{
"epoch": 0.84,
"learning_rate": 6.7661442482191316e-06,
"loss": 0.4923,
"step": 395
},
{
"epoch": 0.84,
"learning_rate": 6.759511275725586e-06,
"loss": 0.4106,
"step": 396
},
{
"epoch": 0.84,
"learning_rate": 6.75286379245752e-06,
"loss": 0.3977,
"step": 397
},
{
"epoch": 0.84,
"learning_rate": 6.7462018333704465e-06,
"loss": 0.4891,
"step": 398
},
{
"epoch": 0.85,
"learning_rate": 6.739525433495998e-06,
"loss": 0.4572,
"step": 399
},
{
"epoch": 0.85,
"learning_rate": 6.732834627941744e-06,
"loss": 0.4054,
"step": 400
},
{
"epoch": 0.85,
"learning_rate": 6.726129451891004e-06,
"loss": 0.4421,
"step": 401
},
{
"epoch": 0.85,
"learning_rate": 6.719409940602668e-06,
"loss": 0.3797,
"step": 402
},
{
"epoch": 0.85,
"learning_rate": 6.7126761294110035e-06,
"loss": 0.4286,
"step": 403
},
{
"epoch": 0.86,
"learning_rate": 6.705928053725476e-06,
"loss": 0.4313,
"step": 404
},
{
"epoch": 0.86,
"learning_rate": 6.699165749030556e-06,
"loss": 0.4527,
"step": 405
},
{
"epoch": 0.86,
"learning_rate": 6.692389250885542e-06,
"loss": 0.4512,
"step": 406
},
{
"epoch": 0.86,
"learning_rate": 6.685598594924366e-06,
"loss": 0.4658,
"step": 407
},
{
"epoch": 0.87,
"learning_rate": 6.6787938168554075e-06,
"loss": 0.4802,
"step": 408
},
{
"epoch": 0.87,
"learning_rate": 6.671974952461307e-06,
"loss": 0.5127,
"step": 409
},
{
"epoch": 0.87,
"learning_rate": 6.665142037598778e-06,
"loss": 0.546,
"step": 410
},
{
"epoch": 0.87,
"learning_rate": 6.658295108198416e-06,
"loss": 0.4744,
"step": 411
},
{
"epoch": 0.87,
"learning_rate": 6.651434200264513e-06,
"loss": 0.4198,
"step": 412
},
{
"epoch": 0.88,
"learning_rate": 6.644559349874868e-06,
"loss": 0.3673,
"step": 413
},
{
"epoch": 0.88,
"learning_rate": 6.63767059318059e-06,
"loss": 0.465,
"step": 414
},
{
"epoch": 0.88,
"learning_rate": 6.630767966405923e-06,
"loss": 0.4162,
"step": 415
},
{
"epoch": 0.88,
"learning_rate": 6.623851505848036e-06,
"loss": 0.4321,
"step": 416
},
{
"epoch": 0.88,
"learning_rate": 6.616921247876851e-06,
"loss": 0.4142,
"step": 417
},
{
"epoch": 0.89,
"learning_rate": 6.609977228934837e-06,
"loss": 0.4443,
"step": 418
},
{
"epoch": 0.89,
"learning_rate": 6.603019485536827e-06,
"loss": 0.466,
"step": 419
},
{
"epoch": 0.89,
"learning_rate": 6.596048054269825e-06,
"loss": 0.4174,
"step": 420
},
{
"epoch": 0.89,
"learning_rate": 6.58906297179281e-06,
"loss": 0.4396,
"step": 421
},
{
"epoch": 0.89,
"learning_rate": 6.582064274836545e-06,
"loss": 0.4313,
"step": 422
},
{
"epoch": 0.9,
"learning_rate": 6.575052000203385e-06,
"loss": 0.4246,
"step": 423
},
{
"epoch": 0.9,
"learning_rate": 6.568026184767083e-06,
"loss": 0.4202,
"step": 424
},
{
"epoch": 0.9,
"learning_rate": 6.560986865472597e-06,
"loss": 0.458,
"step": 425
},
{
"epoch": 0.9,
"learning_rate": 6.553934079335889e-06,
"loss": 0.5062,
"step": 426
},
{
"epoch": 0.91,
"learning_rate": 6.546867863443741e-06,
"loss": 0.4487,
"step": 427
},
{
"epoch": 0.91,
"learning_rate": 6.539788254953555e-06,
"loss": 0.5074,
"step": 428
},
{
"epoch": 0.91,
"learning_rate": 6.532695291093156e-06,
"loss": 0.3911,
"step": 429
},
{
"epoch": 0.91,
"learning_rate": 6.525589009160597e-06,
"loss": 0.4618,
"step": 430
},
{
"epoch": 0.91,
"learning_rate": 6.518469446523962e-06,
"loss": 0.3625,
"step": 431
},
{
"epoch": 0.92,
"learning_rate": 6.511336640621175e-06,
"loss": 0.4606,
"step": 432
},
{
"epoch": 0.92,
"learning_rate": 6.504190628959797e-06,
"loss": 0.3762,
"step": 433
},
{
"epoch": 0.92,
"learning_rate": 6.497031449116832e-06,
"loss": 0.4018,
"step": 434
},
{
"epoch": 0.92,
"learning_rate": 6.489859138738526e-06,
"loss": 0.4092,
"step": 435
},
{
"epoch": 0.92,
"learning_rate": 6.482673735540172e-06,
"loss": 0.3975,
"step": 436
},
{
"epoch": 0.93,
"learning_rate": 6.475475277305915e-06,
"loss": 0.4393,
"step": 437
},
{
"epoch": 0.93,
"learning_rate": 6.468263801888545e-06,
"loss": 0.4282,
"step": 438
},
{
"epoch": 0.93,
"learning_rate": 6.461039347209302e-06,
"loss": 0.4255,
"step": 439
},
{
"epoch": 0.93,
"learning_rate": 6.453801951257681e-06,
"loss": 0.513,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 6.446551652091223e-06,
"loss": 0.4321,
"step": 441
},
{
"epoch": 0.94,
"learning_rate": 6.439288487835325e-06,
"loss": 0.4872,
"step": 442
},
{
"epoch": 0.94,
"learning_rate": 6.4320124966830304e-06,
"loss": 0.485,
"step": 443
},
{
"epoch": 0.94,
"learning_rate": 6.424723716894835e-06,
"loss": 0.502,
"step": 444
},
{
"epoch": 0.94,
"learning_rate": 6.417422186798482e-06,
"loss": 0.4049,
"step": 445
},
{
"epoch": 0.95,
"learning_rate": 6.410107944788763e-06,
"loss": 0.4333,
"step": 446
},
{
"epoch": 0.95,
"learning_rate": 6.40278102932731e-06,
"loss": 0.3984,
"step": 447
},
{
"epoch": 0.95,
"learning_rate": 6.3954414789424035e-06,
"loss": 0.3831,
"step": 448
},
{
"epoch": 0.95,
"learning_rate": 6.388089332228764e-06,
"loss": 0.3706,
"step": 449
},
{
"epoch": 0.95,
"learning_rate": 6.380724627847343e-06,
"loss": 0.4587,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 6.373347404525134e-06,
"loss": 0.4058,
"step": 451
},
{
"epoch": 0.96,
"learning_rate": 6.365957701054954e-06,
"loss": 0.5062,
"step": 452
},
{
"epoch": 0.96,
"learning_rate": 6.358555556295251e-06,
"loss": 0.3817,
"step": 453
},
{
"epoch": 0.96,
"learning_rate": 6.351141009169892e-06,
"loss": 0.4271,
"step": 454
},
{
"epoch": 0.96,
"learning_rate": 6.343714098667964e-06,
"loss": 0.4173,
"step": 455
},
{
"epoch": 0.97,
"learning_rate": 6.336274863843566e-06,
"loss": 0.5117,
"step": 456
},
{
"epoch": 0.97,
"learning_rate": 6.3288233438155995e-06,
"loss": 0.4135,
"step": 457
},
{
"epoch": 0.97,
"learning_rate": 6.321359577767573e-06,
"loss": 0.4454,
"step": 458
},
{
"epoch": 0.97,
"learning_rate": 6.313883604947388e-06,
"loss": 0.4789,
"step": 459
},
{
"epoch": 0.98,
"learning_rate": 6.306395464667133e-06,
"loss": 0.4406,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 6.298895196302881e-06,
"loss": 0.4257,
"step": 461
},
{
"epoch": 0.98,
"learning_rate": 6.291382839294479e-06,
"loss": 0.3963,
"step": 462
},
{
"epoch": 0.98,
"learning_rate": 6.283858433145344e-06,
"loss": 0.3931,
"step": 463
},
{
"epoch": 0.98,
"learning_rate": 6.276322017422248e-06,
"loss": 0.4213,
"step": 464
},
{
"epoch": 0.99,
"learning_rate": 6.268773631755118e-06,
"loss": 0.4193,
"step": 465
},
{
"epoch": 0.99,
"learning_rate": 6.261213315836825e-06,
"loss": 0.4106,
"step": 466
},
{
"epoch": 0.99,
"learning_rate": 6.2536411094229715e-06,
"loss": 0.4813,
"step": 467
},
{
"epoch": 0.99,
"learning_rate": 6.246057052331691e-06,
"loss": 0.5012,
"step": 468
},
{
"epoch": 0.99,
"learning_rate": 6.23846118444343e-06,
"loss": 0.4133,
"step": 469
},
{
"epoch": 1.0,
"learning_rate": 6.230853545700739e-06,
"loss": 0.5327,
"step": 470
},
{
"epoch": 1.0,
"learning_rate": 6.22323417610807e-06,
"loss": 0.4831,
"step": 471
},
{
"epoch": 1.0,
"learning_rate": 6.21560311573156e-06,
"loss": 0.4026,
"step": 472
},
{
"epoch": 1.0,
"learning_rate": 6.207960404698817e-06,
"loss": 0.3656,
"step": 473
},
{
"epoch": 1.01,
"learning_rate": 6.20030608319872e-06,
"loss": 0.4121,
"step": 474
},
{
"epoch": 1.01,
"learning_rate": 6.192640191481197e-06,
"loss": 0.3757,
"step": 475
},
{
"epoch": 1.01,
"learning_rate": 6.1849627698570195e-06,
"loss": 0.4363,
"step": 476
},
{
"epoch": 1.01,
"learning_rate": 6.177273858697587e-06,
"loss": 0.4333,
"step": 477
},
{
"epoch": 1.01,
"learning_rate": 6.169573498434717e-06,
"loss": 0.4304,
"step": 478
},
{
"epoch": 1.02,
"learning_rate": 6.1618617295604315e-06,
"loss": 0.4233,
"step": 479
},
{
"epoch": 1.02,
"learning_rate": 6.1541385926267446e-06,
"loss": 0.3771,
"step": 480
},
{
"epoch": 1.02,
"learning_rate": 6.146404128245448e-06,
"loss": 0.4304,
"step": 481
},
{
"epoch": 1.02,
"learning_rate": 6.1386583770878994e-06,
"loss": 0.4226,
"step": 482
},
{
"epoch": 1.02,
"learning_rate": 6.130901379884807e-06,
"loss": 0.4151,
"step": 483
},
{
"epoch": 1.03,
"learning_rate": 6.123133177426016e-06,
"loss": 0.4144,
"step": 484
},
{
"epoch": 1.03,
"learning_rate": 6.115353810560293e-06,
"loss": 0.3793,
"step": 485
},
{
"epoch": 1.03,
"learning_rate": 6.107563320195114e-06,
"loss": 0.3304,
"step": 486
},
{
"epoch": 1.03,
"learning_rate": 6.099761747296446e-06,
"loss": 0.3824,
"step": 487
},
{
"epoch": 1.03,
"learning_rate": 6.091949132888533e-06,
"loss": 0.4095,
"step": 488
},
{
"epoch": 1.04,
"learning_rate": 6.084125518053681e-06,
"loss": 0.3464,
"step": 489
},
{
"epoch": 1.04,
"learning_rate": 6.0762909439320425e-06,
"loss": 0.3443,
"step": 490
},
{
"epoch": 1.04,
"learning_rate": 6.068445451721396e-06,
"loss": 0.4684,
"step": 491
},
{
"epoch": 1.04,
"learning_rate": 6.060589082676933e-06,
"loss": 0.3989,
"step": 492
},
{
"epoch": 1.05,
"learning_rate": 6.052721878111044e-06,
"loss": 0.4601,
"step": 493
},
{
"epoch": 1.05,
"learning_rate": 6.0448438793930925e-06,
"loss": 0.3757,
"step": 494
},
{
"epoch": 1.05,
"learning_rate": 6.036955127949206e-06,
"loss": 0.3606,
"step": 495
},
{
"epoch": 1.05,
"learning_rate": 6.029055665262053e-06,
"loss": 0.298,
"step": 496
},
{
"epoch": 1.05,
"learning_rate": 6.021145532870628e-06,
"loss": 0.4146,
"step": 497
},
{
"epoch": 1.06,
"learning_rate": 6.013224772370031e-06,
"loss": 0.4418,
"step": 498
},
{
"epoch": 1.06,
"learning_rate": 6.005293425411251e-06,
"loss": 0.3674,
"step": 499
},
{
"epoch": 1.06,
"learning_rate": 5.997351533700939e-06,
"loss": 0.3945,
"step": 500
},
{
"epoch": 1.06,
"learning_rate": 5.989399139001206e-06,
"loss": 0.4051,
"step": 501
},
{
"epoch": 1.06,
"learning_rate": 5.981436283129383e-06,
"loss": 0.4457,
"step": 502
},
{
"epoch": 1.07,
"learning_rate": 5.973463007957816e-06,
"loss": 0.3127,
"step": 503
},
{
"epoch": 1.07,
"learning_rate": 5.965479355413637e-06,
"loss": 0.3909,
"step": 504
},
{
"epoch": 1.07,
"learning_rate": 5.957485367478551e-06,
"loss": 0.3843,
"step": 505
},
{
"epoch": 1.07,
"learning_rate": 5.949481086188607e-06,
"loss": 0.4235,
"step": 506
},
{
"epoch": 1.08,
"learning_rate": 5.941466553633983e-06,
"loss": 0.3598,
"step": 507
},
{
"epoch": 1.08,
"learning_rate": 5.933441811958763e-06,
"loss": 0.3937,
"step": 508
},
{
"epoch": 1.08,
"learning_rate": 5.925406903360716e-06,
"loss": 0.4074,
"step": 509
},
{
"epoch": 1.08,
"learning_rate": 5.917361870091073e-06,
"loss": 0.351,
"step": 510
},
{
"epoch": 1.08,
"learning_rate": 5.909306754454304e-06,
"loss": 0.463,
"step": 511
},
{
"epoch": 1.09,
"learning_rate": 5.9012415988078965e-06,
"loss": 0.3958,
"step": 512
},
{
"epoch": 1.09,
"learning_rate": 5.893166445562133e-06,
"loss": 0.4059,
"step": 513
},
{
"epoch": 1.09,
"learning_rate": 5.885081337179873e-06,
"loss": 0.4071,
"step": 514
},
{
"epoch": 1.09,
"learning_rate": 5.876986316176316e-06,
"loss": 0.3505,
"step": 515
},
{
"epoch": 1.09,
"learning_rate": 5.868881425118795e-06,
"loss": 0.3885,
"step": 516
},
{
"epoch": 1.1,
"learning_rate": 5.860766706626536e-06,
"loss": 0.2975,
"step": 517
},
{
"epoch": 1.1,
"learning_rate": 5.8526422033704486e-06,
"loss": 0.3606,
"step": 518
},
{
"epoch": 1.1,
"learning_rate": 5.844507958072895e-06,
"loss": 0.4259,
"step": 519
},
{
"epoch": 1.1,
"learning_rate": 5.8363640135074595e-06,
"loss": 0.3555,
"step": 520
},
{
"epoch": 1.1,
"learning_rate": 5.828210412498737e-06,
"loss": 0.3902,
"step": 521
},
{
"epoch": 1.11,
"learning_rate": 5.8200471979220955e-06,
"loss": 0.3933,
"step": 522
},
{
"epoch": 1.11,
"learning_rate": 5.811874412703457e-06,
"loss": 0.3749,
"step": 523
},
{
"epoch": 1.11,
"learning_rate": 5.803692099819072e-06,
"loss": 0.3806,
"step": 524
},
{
"epoch": 1.11,
"learning_rate": 5.795500302295289e-06,
"loss": 0.4314,
"step": 525
},
{
"epoch": 1.12,
"learning_rate": 5.7872990632083304e-06,
"loss": 0.3964,
"step": 526
},
{
"epoch": 1.12,
"learning_rate": 5.779088425684073e-06,
"loss": 0.4215,
"step": 527
},
{
"epoch": 1.12,
"learning_rate": 5.770868432897808e-06,
"loss": 0.3749,
"step": 528
},
{
"epoch": 1.12,
"learning_rate": 5.762639128074024e-06,
"loss": 0.3758,
"step": 529
},
{
"epoch": 1.12,
"learning_rate": 5.754400554486174e-06,
"loss": 0.3959,
"step": 530
},
{
"epoch": 1.13,
"learning_rate": 5.746152755456455e-06,
"loss": 0.3748,
"step": 531
},
{
"epoch": 1.13,
"learning_rate": 5.737895774355571e-06,
"loss": 0.3739,
"step": 532
},
{
"epoch": 1.13,
"learning_rate": 5.729629654602513e-06,
"loss": 0.3878,
"step": 533
},
{
"epoch": 1.13,
"learning_rate": 5.721354439664324e-06,
"loss": 0.3574,
"step": 534
},
{
"epoch": 1.13,
"learning_rate": 5.713070173055875e-06,
"loss": 0.4608,
"step": 535
},
{
"epoch": 1.14,
"learning_rate": 5.704776898339638e-06,
"loss": 0.4358,
"step": 536
},
{
"epoch": 1.14,
"learning_rate": 5.696474659125449e-06,
"loss": 0.4242,
"step": 537
},
{
"epoch": 1.14,
"learning_rate": 5.688163499070285e-06,
"loss": 0.3961,
"step": 538
},
{
"epoch": 1.14,
"learning_rate": 5.679843461878035e-06,
"loss": 0.3815,
"step": 539
},
{
"epoch": 1.15,
"learning_rate": 5.6715145912992634e-06,
"loss": 0.3388,
"step": 540
},
{
"epoch": 1.15,
"learning_rate": 5.66317693113099e-06,
"loss": 0.4109,
"step": 541
},
{
"epoch": 1.15,
"learning_rate": 5.654830525216452e-06,
"loss": 0.3314,
"step": 542
},
{
"epoch": 1.15,
"learning_rate": 5.646475417444872e-06,
"loss": 0.3877,
"step": 543
},
{
"epoch": 1.15,
"learning_rate": 5.638111651751237e-06,
"loss": 0.4004,
"step": 544
},
{
"epoch": 1.16,
"learning_rate": 5.62973927211606e-06,
"loss": 0.401,
"step": 545
},
{
"epoch": 1.16,
"learning_rate": 5.621358322565147e-06,
"loss": 0.3832,
"step": 546
},
{
"epoch": 1.16,
"learning_rate": 5.612968847169371e-06,
"loss": 0.3816,
"step": 547
},
{
"epoch": 1.16,
"learning_rate": 5.604570890044436e-06,
"loss": 0.384,
"step": 548
},
{
"epoch": 1.16,
"learning_rate": 5.596164495350648e-06,
"loss": 0.4038,
"step": 549
},
{
"epoch": 1.17,
"learning_rate": 5.5877497072926845e-06,
"loss": 0.4314,
"step": 550
},
{
"epoch": 1.17,
"learning_rate": 5.5793265701193534e-06,
"loss": 0.3462,
"step": 551
},
{
"epoch": 1.17,
"learning_rate": 5.570895128123369e-06,
"loss": 0.3897,
"step": 552
},
{
"epoch": 1.17,
"learning_rate": 5.562455425641117e-06,
"loss": 0.3536,
"step": 553
},
{
"epoch": 1.17,
"learning_rate": 5.554007507052419e-06,
"loss": 0.3527,
"step": 554
},
{
"epoch": 1.18,
"learning_rate": 5.545551416780303e-06,
"loss": 0.3505,
"step": 555
},
{
"epoch": 1.18,
"learning_rate": 5.537087199290763e-06,
"loss": 0.3339,
"step": 556
},
{
"epoch": 1.18,
"learning_rate": 5.528614899092534e-06,
"loss": 0.4514,
"step": 557
},
{
"epoch": 1.18,
"learning_rate": 5.520134560736851e-06,
"loss": 0.3611,
"step": 558
},
{
"epoch": 1.19,
"learning_rate": 5.511646228817219e-06,
"loss": 0.4345,
"step": 559
},
{
"epoch": 1.19,
"learning_rate": 5.503149947969175e-06,
"loss": 0.3351,
"step": 560
},
{
"epoch": 1.19,
"learning_rate": 5.4946457628700575e-06,
"loss": 0.3718,
"step": 561
},
{
"epoch": 1.19,
"learning_rate": 5.486133718238768e-06,
"loss": 0.3347,
"step": 562
},
{
"epoch": 1.19,
"learning_rate": 5.477613858835535e-06,
"loss": 0.3693,
"step": 563
},
{
"epoch": 1.2,
"learning_rate": 5.4690862294616836e-06,
"loss": 0.3756,
"step": 564
},
{
"epoch": 1.2,
"learning_rate": 5.460550874959397e-06,
"loss": 0.3475,
"step": 565
},
{
"epoch": 1.2,
"learning_rate": 5.452007840211478e-06,
"loss": 0.3827,
"step": 566
},
{
"epoch": 1.2,
"learning_rate": 5.4434571701411164e-06,
"loss": 0.3568,
"step": 567
},
{
"epoch": 1.2,
"learning_rate": 5.4348989097116555e-06,
"loss": 0.3652,
"step": 568
},
{
"epoch": 1.21,
"learning_rate": 5.426333103926346e-06,
"loss": 0.4602,
"step": 569
},
{
"epoch": 1.21,
"learning_rate": 5.417759797828122e-06,
"loss": 0.2827,
"step": 570
},
{
"epoch": 1.21,
"learning_rate": 5.409179036499354e-06,
"loss": 0.3609,
"step": 571
},
{
"epoch": 1.21,
"learning_rate": 5.400590865061616e-06,
"loss": 0.3534,
"step": 572
},
{
"epoch": 1.22,
"learning_rate": 5.391995328675447e-06,
"loss": 0.3434,
"step": 573
},
{
"epoch": 1.22,
"learning_rate": 5.383392472540118e-06,
"loss": 0.4009,
"step": 574
},
{
"epoch": 1.22,
"learning_rate": 5.374782341893383e-06,
"loss": 0.359,
"step": 575
},
{
"epoch": 1.22,
"learning_rate": 5.366164982011259e-06,
"loss": 0.3613,
"step": 576
},
{
"epoch": 1.22,
"learning_rate": 5.357540438207769e-06,
"loss": 0.4756,
"step": 577
},
{
"epoch": 1.23,
"learning_rate": 5.348908755834716e-06,
"loss": 0.3476,
"step": 578
},
{
"epoch": 1.23,
"learning_rate": 5.34026998028144e-06,
"loss": 0.3277,
"step": 579
},
{
"epoch": 1.23,
"learning_rate": 5.3316241569745824e-06,
"loss": 0.3651,
"step": 580
},
{
"epoch": 1.23,
"learning_rate": 5.322971331377841e-06,
"loss": 0.3432,
"step": 581
},
{
"epoch": 1.23,
"learning_rate": 5.314311548991738e-06,
"loss": 0.4095,
"step": 582
},
{
"epoch": 1.24,
"learning_rate": 5.305644855353377e-06,
"loss": 0.397,
"step": 583
},
{
"epoch": 1.24,
"learning_rate": 5.296971296036203e-06,
"loss": 0.3919,
"step": 584
},
{
"epoch": 1.24,
"learning_rate": 5.288290916649765e-06,
"loss": 0.3526,
"step": 585
},
{
"epoch": 1.24,
"learning_rate": 5.279603762839474e-06,
"loss": 0.3367,
"step": 586
},
{
"epoch": 1.24,
"learning_rate": 5.270909880286365e-06,
"loss": 0.4198,
"step": 587
},
{
"epoch": 1.25,
"learning_rate": 5.262209314706856e-06,
"loss": 0.3373,
"step": 588
},
{
"epoch": 1.25,
"learning_rate": 5.253502111852505e-06,
"loss": 0.3915,
"step": 589
},
{
"epoch": 1.25,
"learning_rate": 5.244788317509775e-06,
"loss": 0.4185,
"step": 590
},
{
"epoch": 1.25,
"learning_rate": 5.236067977499789e-06,
"loss": 0.3949,
"step": 591
},
{
"epoch": 1.26,
"learning_rate": 5.227341137678091e-06,
"loss": 0.4002,
"step": 592
},
{
"epoch": 1.26,
"learning_rate": 5.2186078439344e-06,
"loss": 0.3976,
"step": 593
},
{
"epoch": 1.26,
"learning_rate": 5.209868142192378e-06,
"loss": 0.3462,
"step": 594
},
{
"epoch": 1.26,
"learning_rate": 5.201122078409378e-06,
"loss": 0.3877,
"step": 595
},
{
"epoch": 1.26,
"learning_rate": 5.192369698576213e-06,
"loss": 0.3644,
"step": 596
},
{
"epoch": 1.27,
"learning_rate": 5.183611048716906e-06,
"loss": 0.4436,
"step": 597
},
{
"epoch": 1.27,
"learning_rate": 5.174846174888447e-06,
"loss": 0.4494,
"step": 598
},
{
"epoch": 1.27,
"learning_rate": 5.166075123180562e-06,
"loss": 0.3041,
"step": 599
},
{
"epoch": 1.27,
"learning_rate": 5.15729793971546e-06,
"loss": 0.3415,
"step": 600
},
{
"epoch": 1.27,
"learning_rate": 5.148514670647587e-06,
"loss": 0.3146,
"step": 601
},
{
"epoch": 1.28,
"learning_rate": 5.1397253621634015e-06,
"loss": 0.3698,
"step": 602
},
{
"epoch": 1.28,
"learning_rate": 5.130930060481113e-06,
"loss": 0.4309,
"step": 603
},
{
"epoch": 1.28,
"learning_rate": 5.122128811850443e-06,
"loss": 0.4139,
"step": 604
},
{
"epoch": 1.28,
"learning_rate": 5.113321662552395e-06,
"loss": 0.3479,
"step": 605
},
{
"epoch": 1.29,
"learning_rate": 5.104508658898992e-06,
"loss": 0.3506,
"step": 606
},
{
"epoch": 1.29,
"learning_rate": 5.0956898472330445e-06,
"loss": 0.3617,
"step": 607
},
{
"epoch": 1.29,
"learning_rate": 5.0868652739279035e-06,
"loss": 0.3378,
"step": 608
},
{
"epoch": 1.29,
"learning_rate": 5.078034985387221e-06,
"loss": 0.3692,
"step": 609
},
{
"epoch": 1.29,
"learning_rate": 5.069199028044699e-06,
"loss": 0.4453,
"step": 610
},
{
"epoch": 1.3,
"learning_rate": 5.060357448363848e-06,
"loss": 0.3917,
"step": 611
},
{
"epoch": 1.3,
"learning_rate": 5.051510292837744e-06,
"loss": 0.3321,
"step": 612
},
{
"epoch": 1.3,
"learning_rate": 5.0426576079887865e-06,
"loss": 0.4098,
"step": 613
},
{
"epoch": 1.3,
"learning_rate": 5.033799440368446e-06,
"loss": 0.3092,
"step": 614
},
{
"epoch": 1.3,
"learning_rate": 5.024935836557028e-06,
"loss": 0.3629,
"step": 615
},
{
"epoch": 1.31,
"learning_rate": 5.016066843163419e-06,
"loss": 0.354,
"step": 616
},
{
"epoch": 1.31,
"learning_rate": 5.007192506824853e-06,
"loss": 0.4121,
"step": 617
},
{
"epoch": 1.31,
"learning_rate": 4.998312874206655e-06,
"loss": 0.3791,
"step": 618
},
{
"epoch": 1.31,
"learning_rate": 4.989427992002e-06,
"loss": 0.4082,
"step": 619
},
{
"epoch": 1.31,
"learning_rate": 4.980537906931671e-06,
"loss": 0.418,
"step": 620
},
{
"epoch": 1.32,
"learning_rate": 4.971642665743806e-06,
"loss": 0.4362,
"step": 621
},
{
"epoch": 1.32,
"learning_rate": 4.962742315213659e-06,
"loss": 0.3922,
"step": 622
},
{
"epoch": 1.32,
"learning_rate": 4.953836902143354e-06,
"loss": 0.2981,
"step": 623
},
{
"epoch": 1.32,
"learning_rate": 4.944926473361628e-06,
"loss": 0.3489,
"step": 624
},
{
"epoch": 1.33,
"learning_rate": 4.936011075723599e-06,
"loss": 0.3961,
"step": 625
},
{
"epoch": 1.33,
"learning_rate": 4.927090756110516e-06,
"loss": 0.3934,
"step": 626
},
{
"epoch": 1.33,
"learning_rate": 4.918165561429502e-06,
"loss": 0.4478,
"step": 627
},
{
"epoch": 1.33,
"learning_rate": 4.909235538613321e-06,
"loss": 0.4854,
"step": 628
},
{
"epoch": 1.33,
"learning_rate": 4.9003007346201245e-06,
"loss": 0.3177,
"step": 629
},
{
"epoch": 1.34,
"learning_rate": 4.891361196433204e-06,
"loss": 0.2589,
"step": 630
},
{
"epoch": 1.34,
"learning_rate": 4.882416971060748e-06,
"loss": 0.3758,
"step": 631
},
{
"epoch": 1.34,
"learning_rate": 4.8734681055355915e-06,
"loss": 0.3793,
"step": 632
},
{
"epoch": 1.34,
"learning_rate": 4.864514646914967e-06,
"loss": 0.3236,
"step": 633
},
{
"epoch": 1.34,
"learning_rate": 4.855556642280264e-06,
"loss": 0.3848,
"step": 634
},
{
"epoch": 1.35,
"learning_rate": 4.8465941387367755e-06,
"loss": 0.4049,
"step": 635
},
{
"epoch": 1.35,
"learning_rate": 4.837627183413449e-06,
"loss": 0.3364,
"step": 636
},
{
"epoch": 1.35,
"learning_rate": 4.828655823462645e-06,
"loss": 0.3589,
"step": 637
},
{
"epoch": 1.35,
"learning_rate": 4.819680106059883e-06,
"loss": 0.3804,
"step": 638
},
{
"epoch": 1.36,
"learning_rate": 4.8107000784035985e-06,
"loss": 0.3914,
"step": 639
},
{
"epoch": 1.36,
"learning_rate": 4.8017157877148915e-06,
"loss": 0.2931,
"step": 640
},
{
"epoch": 1.36,
"learning_rate": 4.792727281237277e-06,
"loss": 0.3607,
"step": 641
},
{
"epoch": 1.36,
"learning_rate": 4.783734606236439e-06,
"loss": 0.2696,
"step": 642
},
{
"epoch": 1.36,
"learning_rate": 4.774737809999986e-06,
"loss": 0.3975,
"step": 643
},
{
"epoch": 1.37,
"learning_rate": 4.765736939837192e-06,
"loss": 0.3786,
"step": 644
},
{
"epoch": 1.37,
"learning_rate": 4.756732043078756e-06,
"loss": 0.3759,
"step": 645
},
{
"epoch": 1.37,
"learning_rate": 4.7477231670765525e-06,
"loss": 0.3361,
"step": 646
},
{
"epoch": 1.37,
"learning_rate": 4.738710359203374e-06,
"loss": 0.3457,
"step": 647
},
{
"epoch": 1.37,
"learning_rate": 4.729693666852698e-06,
"loss": 0.371,
"step": 648
},
{
"epoch": 1.38,
"learning_rate": 4.720673137438425e-06,
"loss": 0.3394,
"step": 649
},
{
"epoch": 1.38,
"learning_rate": 4.711648818394626e-06,
"loss": 0.3223,
"step": 650
},
{
"epoch": 1.38,
"learning_rate": 4.702620757175308e-06,
"loss": 0.3103,
"step": 651
},
{
"epoch": 1.38,
"learning_rate": 4.693589001254154e-06,
"loss": 0.3741,
"step": 652
},
{
"epoch": 1.38,
"learning_rate": 4.684553598124272e-06,
"loss": 0.3386,
"step": 653
},
{
"epoch": 1.39,
"learning_rate": 4.675514595297952e-06,
"loss": 0.3359,
"step": 654
},
{
"epoch": 1.39,
"learning_rate": 4.666472040306414e-06,
"loss": 0.4346,
"step": 655
},
{
"epoch": 1.39,
"learning_rate": 4.65742598069955e-06,
"loss": 0.4029,
"step": 656
},
{
"epoch": 1.39,
"learning_rate": 4.648376464045691e-06,
"loss": 0.4228,
"step": 657
},
{
"epoch": 1.4,
"learning_rate": 4.639323537931338e-06,
"loss": 0.4007,
"step": 658
},
{
"epoch": 1.4,
"learning_rate": 4.630267249960926e-06,
"loss": 0.4058,
"step": 659
},
{
"epoch": 1.4,
"learning_rate": 4.621207647756565e-06,
"loss": 0.3582,
"step": 660
},
{
"epoch": 1.4,
"learning_rate": 4.612144778957794e-06,
"loss": 0.372,
"step": 661
},
{
"epoch": 1.4,
"learning_rate": 4.60307869122133e-06,
"loss": 0.4415,
"step": 662
},
{
"epoch": 1.41,
"learning_rate": 4.5940094322208154e-06,
"loss": 0.2884,
"step": 663
},
{
"epoch": 1.41,
"learning_rate": 4.584937049646567e-06,
"loss": 0.3424,
"step": 664
},
{
"epoch": 1.41,
"learning_rate": 4.575861591205333e-06,
"loss": 0.3819,
"step": 665
},
{
"epoch": 1.41,
"learning_rate": 4.566783104620028e-06,
"loss": 0.3278,
"step": 666
},
{
"epoch": 1.41,
"learning_rate": 4.557701637629496e-06,
"loss": 0.2786,
"step": 667
},
{
"epoch": 1.42,
"learning_rate": 4.548617237988248e-06,
"loss": 0.3874,
"step": 668
},
{
"epoch": 1.42,
"learning_rate": 4.539529953466223e-06,
"loss": 0.3782,
"step": 669
},
{
"epoch": 1.42,
"learning_rate": 4.530439831848524e-06,
"loss": 0.4174,
"step": 670
},
{
"epoch": 1.42,
"learning_rate": 4.5213469209351745e-06,
"loss": 0.3274,
"step": 671
},
{
"epoch": 1.43,
"learning_rate": 4.512251268540868e-06,
"loss": 0.3762,
"step": 672
},
{
"epoch": 1.43,
"learning_rate": 4.50315292249471e-06,
"loss": 0.4249,
"step": 673
},
{
"epoch": 1.43,
"learning_rate": 4.494051930639972e-06,
"loss": 0.4217,
"step": 674
},
{
"epoch": 1.43,
"learning_rate": 4.484948340833841e-06,
"loss": 0.2993,
"step": 675
},
{
"epoch": 1.43,
"learning_rate": 4.47584220094716e-06,
"loss": 0.3597,
"step": 676
},
{
"epoch": 1.44,
"learning_rate": 4.466733558864185e-06,
"loss": 0.2738,
"step": 677
},
{
"epoch": 1.44,
"learning_rate": 4.457622462482333e-06,
"loss": 0.3443,
"step": 678
},
{
"epoch": 1.44,
"learning_rate": 4.4485089597119175e-06,
"loss": 0.3809,
"step": 679
},
{
"epoch": 1.44,
"learning_rate": 4.439393098475914e-06,
"loss": 0.3003,
"step": 680
},
{
"epoch": 1.44,
"learning_rate": 4.430274926709698e-06,
"loss": 0.3801,
"step": 681
},
{
"epoch": 1.45,
"learning_rate": 4.421154492360791e-06,
"loss": 0.3748,
"step": 682
},
{
"epoch": 1.45,
"learning_rate": 4.412031843388617e-06,
"loss": 0.2902,
"step": 683
},
{
"epoch": 1.45,
"learning_rate": 4.402907027764245e-06,
"loss": 0.3584,
"step": 684
},
{
"epoch": 1.45,
"learning_rate": 4.393780093470134e-06,
"loss": 0.418,
"step": 685
},
{
"epoch": 1.45,
"learning_rate": 4.384651088499886e-06,
"loss": 0.3436,
"step": 686
},
{
"epoch": 1.46,
"learning_rate": 4.375520060857993e-06,
"loss": 0.3171,
"step": 687
},
{
"epoch": 1.46,
"learning_rate": 4.366387058559579e-06,
"loss": 0.4112,
"step": 688
},
{
"epoch": 1.46,
"learning_rate": 4.357252129630158e-06,
"loss": 0.3329,
"step": 689
},
{
"epoch": 1.46,
"learning_rate": 4.348115322105368e-06,
"loss": 0.3829,
"step": 690
},
{
"epoch": 1.47,
"learning_rate": 4.33897668403073e-06,
"loss": 0.3473,
"step": 691
},
{
"epoch": 1.47,
"learning_rate": 4.32983626346139e-06,
"loss": 0.3749,
"step": 692
},
{
"epoch": 1.47,
"learning_rate": 4.320694108461867e-06,
"loss": 0.3507,
"step": 693
},
{
"epoch": 1.47,
"learning_rate": 4.3115502671058e-06,
"loss": 0.3414,
"step": 694
},
{
"epoch": 1.47,
"learning_rate": 4.3024047874756964e-06,
"loss": 0.3379,
"step": 695
},
{
"epoch": 1.48,
"learning_rate": 4.293257717662677e-06,
"loss": 0.2809,
"step": 696
},
{
"epoch": 1.48,
"learning_rate": 4.284109105766226e-06,
"loss": 0.3585,
"step": 697
},
{
"epoch": 1.48,
"learning_rate": 4.274958999893936e-06,
"loss": 0.4589,
"step": 698
},
{
"epoch": 1.48,
"learning_rate": 4.265807448161257e-06,
"loss": 0.4205,
"step": 699
},
{
"epoch": 1.48,
"learning_rate": 4.2566544986912375e-06,
"loss": 0.3545,
"step": 700
},
{
"epoch": 1.49,
"learning_rate": 4.2475001996142835e-06,
"loss": 0.3821,
"step": 701
},
{
"epoch": 1.49,
"learning_rate": 4.238344599067889e-06,
"loss": 0.3258,
"step": 702
},
{
"epoch": 1.49,
"learning_rate": 4.229187745196399e-06,
"loss": 0.3117,
"step": 703
},
{
"epoch": 1.49,
"learning_rate": 4.220029686150747e-06,
"loss": 0.4029,
"step": 704
},
{
"epoch": 1.5,
"learning_rate": 4.2108704700881994e-06,
"loss": 0.3934,
"step": 705
},
{
"epoch": 1.5,
"learning_rate": 4.2017101451721135e-06,
"loss": 0.3817,
"step": 706
},
{
"epoch": 1.5,
"learning_rate": 4.192548759571674e-06,
"loss": 0.3913,
"step": 707
},
{
"epoch": 1.5,
"learning_rate": 4.183386361461642e-06,
"loss": 0.3775,
"step": 708
},
{
"epoch": 1.5,
"learning_rate": 4.174222999022106e-06,
"loss": 0.331,
"step": 709
},
{
"epoch": 1.51,
"learning_rate": 4.165058720438224e-06,
"loss": 0.3255,
"step": 710
},
{
"epoch": 1.51,
"learning_rate": 4.155893573899969e-06,
"loss": 0.3321,
"step": 711
},
{
"epoch": 1.51,
"learning_rate": 4.146727607601883e-06,
"loss": 0.3433,
"step": 712
},
{
"epoch": 1.51,
"learning_rate": 4.137560869742815e-06,
"loss": 0.3257,
"step": 713
},
{
"epoch": 1.51,
"learning_rate": 4.128393408525673e-06,
"loss": 0.3376,
"step": 714
},
{
"epoch": 1.52,
"learning_rate": 4.119225272157166e-06,
"loss": 0.3852,
"step": 715
},
{
"epoch": 1.52,
"learning_rate": 4.110056508847557e-06,
"loss": 0.3837,
"step": 716
},
{
"epoch": 1.52,
"learning_rate": 4.100887166810404e-06,
"loss": 0.3367,
"step": 717
},
{
"epoch": 1.52,
"learning_rate": 4.091717294262307e-06,
"loss": 0.3386,
"step": 718
},
{
"epoch": 1.52,
"learning_rate": 4.082546939422658e-06,
"loss": 0.3543,
"step": 719
},
{
"epoch": 1.53,
"learning_rate": 4.073376150513383e-06,
"loss": 0.3475,
"step": 720
},
{
"epoch": 1.53,
"learning_rate": 4.064204975758693e-06,
"loss": 0.376,
"step": 721
},
{
"epoch": 1.53,
"learning_rate": 4.0550334633848275e-06,
"loss": 0.3645,
"step": 722
},
{
"epoch": 1.53,
"learning_rate": 4.0458616616197954e-06,
"loss": 0.3692,
"step": 723
},
{
"epoch": 1.54,
"learning_rate": 4.036689618693138e-06,
"loss": 0.3123,
"step": 724
},
{
"epoch": 1.54,
"learning_rate": 4.027517382835656e-06,
"loss": 0.3389,
"step": 725
},
{
"epoch": 1.54,
"learning_rate": 4.018345002279168e-06,
"loss": 0.3324,
"step": 726
},
{
"epoch": 1.54,
"learning_rate": 4.009172525256254e-06,
"loss": 0.3923,
"step": 727
},
{
"epoch": 1.54,
"learning_rate": 4e-06,
"loss": 0.3363,
"step": 728
},
{
"epoch": 1.55,
"learning_rate": 3.990827474743746e-06,
"loss": 0.3559,
"step": 729
},
{
"epoch": 1.55,
"learning_rate": 3.9816549977208314e-06,
"loss": 0.378,
"step": 730
},
{
"epoch": 1.55,
"learning_rate": 3.9724826171643435e-06,
"loss": 0.3215,
"step": 731
},
{
"epoch": 1.55,
"learning_rate": 3.963310381306862e-06,
"loss": 0.3831,
"step": 732
},
{
"epoch": 1.55,
"learning_rate": 3.954138338380203e-06,
"loss": 0.4419,
"step": 733
},
{
"epoch": 1.56,
"learning_rate": 3.944966536615173e-06,
"loss": 0.3942,
"step": 734
},
{
"epoch": 1.56,
"learning_rate": 3.935795024241306e-06,
"loss": 0.3829,
"step": 735
},
{
"epoch": 1.56,
"learning_rate": 3.926623849486618e-06,
"loss": 0.4076,
"step": 736
},
{
"epoch": 1.56,
"learning_rate": 3.9174530605773425e-06,
"loss": 0.3441,
"step": 737
},
{
"epoch": 1.57,
"learning_rate": 3.9082827057376935e-06,
"loss": 0.4521,
"step": 738
},
{
"epoch": 1.57,
"learning_rate": 3.899112833189598e-06,
"loss": 0.3162,
"step": 739
},
{
"epoch": 1.57,
"learning_rate": 3.889943491152442e-06,
"loss": 0.3672,
"step": 740
},
{
"epoch": 1.57,
"learning_rate": 3.880774727842834e-06,
"loss": 0.3228,
"step": 741
},
{
"epoch": 1.57,
"learning_rate": 3.8716065914743285e-06,
"loss": 0.3278,
"step": 742
},
{
"epoch": 1.58,
"learning_rate": 3.862439130257184e-06,
"loss": 0.3315,
"step": 743
},
{
"epoch": 1.58,
"learning_rate": 3.853272392398117e-06,
"loss": 0.3457,
"step": 744
},
{
"epoch": 1.58,
"learning_rate": 3.844106426100031e-06,
"loss": 0.3062,
"step": 745
},
{
"epoch": 1.58,
"learning_rate": 3.834941279561775e-06,
"loss": 0.4032,
"step": 746
},
{
"epoch": 1.58,
"learning_rate": 3.825777000977894e-06,
"loss": 0.334,
"step": 747
},
{
"epoch": 1.59,
"learning_rate": 3.816613638538359e-06,
"loss": 0.4068,
"step": 748
},
{
"epoch": 1.59,
"learning_rate": 3.807451240428327e-06,
"loss": 0.3049,
"step": 749
},
{
"epoch": 1.59,
"learning_rate": 3.7982898548278866e-06,
"loss": 0.3583,
"step": 750
},
{
"epoch": 1.59,
"learning_rate": 3.789129529911801e-06,
"loss": 0.3458,
"step": 751
},
{
"epoch": 1.59,
"learning_rate": 3.7799703138492543e-06,
"loss": 0.3246,
"step": 752
},
{
"epoch": 1.6,
"learning_rate": 3.7708122548036006e-06,
"loss": 0.3503,
"step": 753
},
{
"epoch": 1.6,
"learning_rate": 3.7616554009321105e-06,
"loss": 0.3589,
"step": 754
},
{
"epoch": 1.6,
"learning_rate": 3.7524998003857174e-06,
"loss": 0.3894,
"step": 755
},
{
"epoch": 1.6,
"learning_rate": 3.7433455013087617e-06,
"loss": 0.3699,
"step": 756
},
{
"epoch": 1.61,
"learning_rate": 3.7341925518387434e-06,
"loss": 0.3611,
"step": 757
},
{
"epoch": 1.61,
"learning_rate": 3.7250410001060638e-06,
"loss": 0.3684,
"step": 758
},
{
"epoch": 1.61,
"learning_rate": 3.715890894233774e-06,
"loss": 0.3784,
"step": 759
},
{
"epoch": 1.61,
"learning_rate": 3.7067422823373235e-06,
"loss": 0.3689,
"step": 760
},
{
"epoch": 1.61,
"learning_rate": 3.6975952125243045e-06,
"loss": 0.3894,
"step": 761
},
{
"epoch": 1.62,
"learning_rate": 3.688449732894201e-06,
"loss": 0.4219,
"step": 762
},
{
"epoch": 1.62,
"learning_rate": 3.6793058915381323e-06,
"loss": 0.4097,
"step": 763
},
{
"epoch": 1.62,
"learning_rate": 3.6701637365386094e-06,
"loss": 0.3849,
"step": 764
},
{
"epoch": 1.62,
"learning_rate": 3.6610233159692706e-06,
"loss": 0.3241,
"step": 765
},
{
"epoch": 1.62,
"learning_rate": 3.6518846778946315e-06,
"loss": 0.3849,
"step": 766
},
{
"epoch": 1.63,
"learning_rate": 3.6427478703698418e-06,
"loss": 0.424,
"step": 767
},
{
"epoch": 1.63,
"learning_rate": 3.6336129414404208e-06,
"loss": 0.3193,
"step": 768
},
{
"epoch": 1.63,
"learning_rate": 3.6244799391420067e-06,
"loss": 0.4062,
"step": 769
},
{
"epoch": 1.63,
"learning_rate": 3.6153489115001145e-06,
"loss": 0.337,
"step": 770
},
{
"epoch": 1.63,
"learning_rate": 3.606219906529867e-06,
"loss": 0.3086,
"step": 771
},
{
"epoch": 1.64,
"learning_rate": 3.5970929722357544e-06,
"loss": 0.3668,
"step": 772
},
{
"epoch": 1.64,
"learning_rate": 3.5879681566113828e-06,
"loss": 0.3461,
"step": 773
},
{
"epoch": 1.64,
"learning_rate": 3.5788455076392096e-06,
"loss": 0.3184,
"step": 774
},
{
"epoch": 1.64,
"learning_rate": 3.5697250732903033e-06,
"loss": 0.3858,
"step": 775
},
{
"epoch": 1.65,
"learning_rate": 3.5606069015240857e-06,
"loss": 0.4107,
"step": 776
},
{
"epoch": 1.65,
"learning_rate": 3.5514910402880826e-06,
"loss": 0.3741,
"step": 777
},
{
"epoch": 1.65,
"learning_rate": 3.542377537517668e-06,
"loss": 0.3861,
"step": 778
},
{
"epoch": 1.65,
"learning_rate": 3.533266441135814e-06,
"loss": 0.2995,
"step": 779
},
{
"epoch": 1.65,
"learning_rate": 3.5241577990528404e-06,
"loss": 0.409,
"step": 780
},
{
"epoch": 1.66,
"learning_rate": 3.5150516591661604e-06,
"loss": 0.4122,
"step": 781
},
{
"epoch": 1.66,
"learning_rate": 3.505948069360028e-06,
"loss": 0.3995,
"step": 782
},
{
"epoch": 1.66,
"learning_rate": 3.496847077505291e-06,
"loss": 0.3397,
"step": 783
},
{
"epoch": 1.66,
"learning_rate": 3.487748731459133e-06,
"loss": 0.3801,
"step": 784
},
{
"epoch": 1.66,
"learning_rate": 3.478653079064825e-06,
"loss": 0.3558,
"step": 785
},
{
"epoch": 1.67,
"learning_rate": 3.4695601681514763e-06,
"loss": 0.3934,
"step": 786
},
{
"epoch": 1.67,
"learning_rate": 3.460470046533777e-06,
"loss": 0.3421,
"step": 787
},
{
"epoch": 1.67,
"learning_rate": 3.4513827620117526e-06,
"loss": 0.3771,
"step": 788
},
{
"epoch": 1.67,
"learning_rate": 3.4422983623705043e-06,
"loss": 0.3369,
"step": 789
},
{
"epoch": 1.68,
"learning_rate": 3.4332168953799718e-06,
"loss": 0.3816,
"step": 790
},
{
"epoch": 1.68,
"learning_rate": 3.4241384087946685e-06,
"loss": 0.3522,
"step": 791
},
{
"epoch": 1.68,
"learning_rate": 3.4150629503534315e-06,
"loss": 0.331,
"step": 792
},
{
"epoch": 1.68,
"learning_rate": 3.405990567779185e-06,
"loss": 0.368,
"step": 793
},
{
"epoch": 1.68,
"learning_rate": 3.396921308778671e-06,
"loss": 0.3653,
"step": 794
},
{
"epoch": 1.69,
"learning_rate": 3.387855221042205e-06,
"loss": 0.3658,
"step": 795
},
{
"epoch": 1.69,
"learning_rate": 3.3787923522434355e-06,
"loss": 0.3568,
"step": 796
},
{
"epoch": 1.69,
"learning_rate": 3.3697327500390746e-06,
"loss": 0.3296,
"step": 797
},
{
"epoch": 1.69,
"learning_rate": 3.3606764620686607e-06,
"loss": 0.3389,
"step": 798
},
{
"epoch": 1.69,
"learning_rate": 3.3516235359543094e-06,
"loss": 0.3488,
"step": 799
},
{
"epoch": 1.7,
"learning_rate": 3.34257401930045e-06,
"loss": 0.4002,
"step": 800
},
{
"epoch": 1.7,
"learning_rate": 3.3335279596935877e-06,
"loss": 0.3457,
"step": 801
},
{
"epoch": 1.7,
"learning_rate": 3.3244854047020473e-06,
"loss": 0.3322,
"step": 802
},
{
"epoch": 1.7,
"learning_rate": 3.3154464018757276e-06,
"loss": 0.3996,
"step": 803
},
{
"epoch": 1.7,
"learning_rate": 3.3064109987458465e-06,
"loss": 0.3192,
"step": 804
},
{
"epoch": 1.71,
"learning_rate": 3.297379242824691e-06,
"loss": 0.3602,
"step": 805
},
{
"epoch": 1.71,
"learning_rate": 3.2883511816053736e-06,
"loss": 0.4305,
"step": 806
},
{
"epoch": 1.71,
"learning_rate": 3.279326862561576e-06,
"loss": 0.323,
"step": 807
},
{
"epoch": 1.71,
"learning_rate": 3.2703063331473006e-06,
"loss": 0.3759,
"step": 808
},
{
"epoch": 1.72,
"learning_rate": 3.2612896407966253e-06,
"loss": 0.3099,
"step": 809
},
{
"epoch": 1.72,
"learning_rate": 3.2522768329234493e-06,
"loss": 0.3237,
"step": 810
},
{
"epoch": 1.72,
"learning_rate": 3.2432679569212438e-06,
"loss": 0.3985,
"step": 811
},
{
"epoch": 1.72,
"learning_rate": 3.234263060162808e-06,
"loss": 0.3395,
"step": 812
},
{
"epoch": 1.72,
"learning_rate": 3.225262190000014e-06,
"loss": 0.314,
"step": 813
},
{
"epoch": 1.73,
"learning_rate": 3.2162653937635614e-06,
"loss": 0.3553,
"step": 814
},
{
"epoch": 1.73,
"learning_rate": 3.2072727187627236e-06,
"loss": 0.3595,
"step": 815
},
{
"epoch": 1.73,
"learning_rate": 3.1982842122851086e-06,
"loss": 0.3612,
"step": 816
},
{
"epoch": 1.73,
"learning_rate": 3.189299921596402e-06,
"loss": 0.3879,
"step": 817
},
{
"epoch": 1.73,
"learning_rate": 3.1803198939401163e-06,
"loss": 0.3802,
"step": 818
},
{
"epoch": 1.74,
"learning_rate": 3.171344176537355e-06,
"loss": 0.3881,
"step": 819
},
{
"epoch": 1.74,
"learning_rate": 3.1623728165865513e-06,
"loss": 0.3807,
"step": 820
},
{
"epoch": 1.74,
"learning_rate": 3.153405861263224e-06,
"loss": 0.3748,
"step": 821
},
{
"epoch": 1.74,
"learning_rate": 3.1444433577197353e-06,
"loss": 0.3725,
"step": 822
},
{
"epoch": 1.75,
"learning_rate": 3.1354853530850332e-06,
"loss": 0.3475,
"step": 823
},
{
"epoch": 1.75,
"learning_rate": 3.126531894464408e-06,
"loss": 0.3698,
"step": 824
},
{
"epoch": 1.75,
"learning_rate": 3.1175830289392523e-06,
"loss": 0.2918,
"step": 825
},
{
"epoch": 1.75,
"learning_rate": 3.1086388035667964e-06,
"loss": 0.3248,
"step": 826
},
{
"epoch": 1.75,
"learning_rate": 3.099699265379877e-06,
"loss": 0.3625,
"step": 827
},
{
"epoch": 1.76,
"learning_rate": 3.090764461386679e-06,
"loss": 0.3704,
"step": 828
},
{
"epoch": 1.76,
"learning_rate": 3.0818344385704984e-06,
"loss": 0.337,
"step": 829
},
{
"epoch": 1.76,
"learning_rate": 3.0729092438894846e-06,
"loss": 0.3518,
"step": 830
},
{
"epoch": 1.76,
"learning_rate": 3.0639889242763994e-06,
"loss": 0.3978,
"step": 831
},
{
"epoch": 1.76,
"learning_rate": 3.0550735266383724e-06,
"loss": 0.339,
"step": 832
},
{
"epoch": 1.77,
"learning_rate": 3.0461630978566472e-06,
"loss": 0.3159,
"step": 833
},
{
"epoch": 1.77,
"learning_rate": 3.0372576847863404e-06,
"loss": 0.3974,
"step": 834
},
{
"epoch": 1.77,
"learning_rate": 3.028357334256195e-06,
"loss": 0.3294,
"step": 835
},
{
"epoch": 1.77,
"learning_rate": 3.0194620930683307e-06,
"loss": 0.387,
"step": 836
},
{
"epoch": 1.77,
"learning_rate": 3.0105720079980003e-06,
"loss": 0.3634,
"step": 837
},
{
"epoch": 1.78,
"learning_rate": 3.0016871257933452e-06,
"loss": 0.3874,
"step": 838
},
{
"epoch": 1.78,
"learning_rate": 2.992807493175147e-06,
"loss": 0.3841,
"step": 839
},
{
"epoch": 1.78,
"learning_rate": 2.9839331568365818e-06,
"loss": 0.3103,
"step": 840
},
{
"epoch": 1.78,
"learning_rate": 2.9750641634429726e-06,
"loss": 0.2763,
"step": 841
},
{
"epoch": 1.79,
"learning_rate": 2.9662005596315542e-06,
"loss": 0.3447,
"step": 842
},
{
"epoch": 1.79,
"learning_rate": 2.9573423920112152e-06,
"loss": 0.3586,
"step": 843
},
{
"epoch": 1.79,
"learning_rate": 2.9484897071622554e-06,
"loss": 0.3354,
"step": 844
},
{
"epoch": 1.79,
"learning_rate": 2.9396425516361523e-06,
"loss": 0.3302,
"step": 845
},
{
"epoch": 1.79,
"learning_rate": 2.9308009719553026e-06,
"loss": 0.3501,
"step": 846
},
{
"epoch": 1.8,
"learning_rate": 2.9219650146127776e-06,
"loss": 0.3618,
"step": 847
},
{
"epoch": 1.8,
"learning_rate": 2.913134726072096e-06,
"loss": 0.3726,
"step": 848
},
{
"epoch": 1.8,
"learning_rate": 2.9043101527669564e-06,
"loss": 0.3413,
"step": 849
},
{
"epoch": 1.8,
"learning_rate": 2.8954913411010077e-06,
"loss": 0.3583,
"step": 850
},
{
"epoch": 1.8,
"learning_rate": 2.8866783374476047e-06,
"loss": 0.348,
"step": 851
},
{
"epoch": 1.81,
"learning_rate": 2.877871188149556e-06,
"loss": 0.3331,
"step": 852
},
{
"epoch": 1.81,
"learning_rate": 2.869069939518887e-06,
"loss": 0.3623,
"step": 853
},
{
"epoch": 1.81,
"learning_rate": 2.860274637836598e-06,
"loss": 0.34,
"step": 854
},
{
"epoch": 1.81,
"learning_rate": 2.8514853293524125e-06,
"loss": 0.3579,
"step": 855
},
{
"epoch": 1.82,
"learning_rate": 2.842702060284542e-06,
"loss": 0.3761,
"step": 856
},
{
"epoch": 1.82,
"learning_rate": 2.833924876819437e-06,
"loss": 0.3117,
"step": 857
},
{
"epoch": 1.82,
"learning_rate": 2.8251538251115527e-06,
"loss": 0.2625,
"step": 858
},
{
"epoch": 1.82,
"learning_rate": 2.816388951283095e-06,
"loss": 0.3593,
"step": 859
},
{
"epoch": 1.82,
"learning_rate": 2.807630301423787e-06,
"loss": 0.3543,
"step": 860
},
{
"epoch": 1.83,
"learning_rate": 2.798877921590622e-06,
"loss": 0.3238,
"step": 861
},
{
"epoch": 1.83,
"learning_rate": 2.790131857807623e-06,
"loss": 0.3292,
"step": 862
},
{
"epoch": 1.83,
"learning_rate": 2.7813921560656e-06,
"loss": 0.3375,
"step": 863
},
{
"epoch": 1.83,
"learning_rate": 2.7726588623219096e-06,
"loss": 0.3944,
"step": 864
},
{
"epoch": 1.83,
"learning_rate": 2.7639320225002105e-06,
"loss": 0.3362,
"step": 865
},
{
"epoch": 1.84,
"learning_rate": 2.7552116824902243e-06,
"loss": 0.3563,
"step": 866
},
{
"epoch": 1.84,
"learning_rate": 2.7464978881474945e-06,
"loss": 0.3778,
"step": 867
},
{
"epoch": 1.84,
"learning_rate": 2.737790685293145e-06,
"loss": 0.4274,
"step": 868
},
{
"epoch": 1.84,
"learning_rate": 2.729090119713636e-06,
"loss": 0.2822,
"step": 869
},
{
"epoch": 1.84,
"learning_rate": 2.720396237160526e-06,
"loss": 0.3054,
"step": 870
},
{
"epoch": 1.85,
"learning_rate": 2.711709083350235e-06,
"loss": 0.3469,
"step": 871
},
{
"epoch": 1.85,
"learning_rate": 2.7030287039637975e-06,
"loss": 0.3622,
"step": 872
},
{
"epoch": 1.85,
"learning_rate": 2.694355144646622e-06,
"loss": 0.3853,
"step": 873
},
{
"epoch": 1.85,
"learning_rate": 2.685688451008262e-06,
"loss": 0.3527,
"step": 874
},
{
"epoch": 1.86,
"learning_rate": 2.6770286686221594e-06,
"loss": 0.3539,
"step": 875
},
{
"epoch": 1.86,
"learning_rate": 2.6683758430254172e-06,
"loss": 0.3335,
"step": 876
},
{
"epoch": 1.86,
"learning_rate": 2.65973001971856e-06,
"loss": 0.2477,
"step": 877
},
{
"epoch": 1.86,
"learning_rate": 2.6510912441652853e-06,
"loss": 0.3791,
"step": 878
},
{
"epoch": 1.86,
"learning_rate": 2.6424595617922306e-06,
"loss": 0.3376,
"step": 879
},
{
"epoch": 1.87,
"learning_rate": 2.6338350179887415e-06,
"loss": 0.3444,
"step": 880
},
{
"epoch": 1.87,
"learning_rate": 2.6252176581066166e-06,
"loss": 0.2822,
"step": 881
},
{
"epoch": 1.87,
"learning_rate": 2.6166075274598838e-06,
"loss": 0.377,
"step": 882
},
{
"epoch": 1.87,
"learning_rate": 2.6080046713245525e-06,
"loss": 0.2903,
"step": 883
},
{
"epoch": 1.87,
"learning_rate": 2.5994091349383845e-06,
"loss": 0.3673,
"step": 884
},
{
"epoch": 1.88,
"learning_rate": 2.590820963500646e-06,
"loss": 0.4123,
"step": 885
},
{
"epoch": 1.88,
"learning_rate": 2.582240202171877e-06,
"loss": 0.2797,
"step": 886
},
{
"epoch": 1.88,
"learning_rate": 2.5736668960736535e-06,
"loss": 0.3671,
"step": 887
},
{
"epoch": 1.88,
"learning_rate": 2.5651010902883454e-06,
"loss": 0.341,
"step": 888
},
{
"epoch": 1.89,
"learning_rate": 2.556542829858883e-06,
"loss": 0.3077,
"step": 889
},
{
"epoch": 1.89,
"learning_rate": 2.5479921597885225e-06,
"loss": 0.3148,
"step": 890
},
{
"epoch": 1.89,
"learning_rate": 2.539449125040603e-06,
"loss": 0.3172,
"step": 891
},
{
"epoch": 1.89,
"learning_rate": 2.5309137705383152e-06,
"loss": 0.2691,
"step": 892
},
{
"epoch": 1.89,
"learning_rate": 2.5223861411644644e-06,
"loss": 0.3207,
"step": 893
},
{
"epoch": 1.9,
"learning_rate": 2.513866281761232e-06,
"loss": 0.2953,
"step": 894
},
{
"epoch": 1.9,
"learning_rate": 2.5053542371299426e-06,
"loss": 0.3313,
"step": 895
},
{
"epoch": 1.9,
"learning_rate": 2.4968500520308242e-06,
"loss": 0.3013,
"step": 896
},
{
"epoch": 1.9,
"learning_rate": 2.4883537711827807e-06,
"loss": 0.3486,
"step": 897
},
{
"epoch": 1.9,
"learning_rate": 2.47986543926315e-06,
"loss": 0.3292,
"step": 898
},
{
"epoch": 1.91,
"learning_rate": 2.471385100907466e-06,
"loss": 0.3666,
"step": 899
},
{
"epoch": 1.91,
"learning_rate": 2.4629128007092376e-06,
"loss": 0.3048,
"step": 900
},
{
"epoch": 1.91,
"learning_rate": 2.4544485832196982e-06,
"loss": 0.343,
"step": 901
},
{
"epoch": 1.91,
"learning_rate": 2.44599249294758e-06,
"loss": 0.2941,
"step": 902
},
{
"epoch": 1.91,
"learning_rate": 2.437544574358883e-06,
"loss": 0.3552,
"step": 903
},
{
"epoch": 1.92,
"learning_rate": 2.429104871876632e-06,
"loss": 0.3379,
"step": 904
},
{
"epoch": 1.92,
"learning_rate": 2.4206734298806462e-06,
"loss": 0.3651,
"step": 905
},
{
"epoch": 1.92,
"learning_rate": 2.4122502927073164e-06,
"loss": 0.3023,
"step": 906
},
{
"epoch": 1.92,
"learning_rate": 2.4038355046493513e-06,
"loss": 0.3447,
"step": 907
},
{
"epoch": 1.93,
"learning_rate": 2.395429109955565e-06,
"loss": 0.354,
"step": 908
},
{
"epoch": 1.93,
"learning_rate": 2.38703115283063e-06,
"loss": 0.3267,
"step": 909
},
{
"epoch": 1.93,
"learning_rate": 2.3786416774348534e-06,
"loss": 0.3248,
"step": 910
},
{
"epoch": 1.93,
"learning_rate": 2.3702607278839406e-06,
"loss": 0.3363,
"step": 911
},
{
"epoch": 1.93,
"learning_rate": 2.361888348248762e-06,
"loss": 0.275,
"step": 912
},
{
"epoch": 1.94,
"learning_rate": 2.353524582555128e-06,
"loss": 0.3155,
"step": 913
},
{
"epoch": 1.94,
"learning_rate": 2.3451694747835495e-06,
"loss": 0.3706,
"step": 914
},
{
"epoch": 1.94,
"learning_rate": 2.3368230688690097e-06,
"loss": 0.3516,
"step": 915
},
{
"epoch": 1.94,
"learning_rate": 2.328485408700737e-06,
"loss": 0.2699,
"step": 916
},
{
"epoch": 1.94,
"learning_rate": 2.3201565381219655e-06,
"loss": 0.3673,
"step": 917
},
{
"epoch": 1.95,
"learning_rate": 2.3118365009297143e-06,
"loss": 0.2994,
"step": 918
},
{
"epoch": 1.95,
"learning_rate": 2.303525340874552e-06,
"loss": 0.3673,
"step": 919
},
{
"epoch": 1.95,
"learning_rate": 2.2952231016603616e-06,
"loss": 0.323,
"step": 920
},
{
"epoch": 1.95,
"learning_rate": 2.2869298269441254e-06,
"loss": 0.3356,
"step": 921
},
{
"epoch": 1.96,
"learning_rate": 2.2786455603356772e-06,
"loss": 0.3326,
"step": 922
},
{
"epoch": 1.96,
"learning_rate": 2.270370345397488e-06,
"loss": 0.2808,
"step": 923
},
{
"epoch": 1.96,
"learning_rate": 2.26210422564443e-06,
"loss": 0.3464,
"step": 924
},
{
"epoch": 1.96,
"learning_rate": 2.253847244543546e-06,
"loss": 0.324,
"step": 925
},
{
"epoch": 1.96,
"learning_rate": 2.2455994455138256e-06,
"loss": 0.3269,
"step": 926
},
{
"epoch": 1.97,
"learning_rate": 2.2373608719259773e-06,
"loss": 0.3471,
"step": 927
},
{
"epoch": 1.97,
"learning_rate": 2.229131567102192e-06,
"loss": 0.3528,
"step": 928
},
{
"epoch": 1.97,
"learning_rate": 2.2209115743159264e-06,
"loss": 0.3703,
"step": 929
},
{
"epoch": 1.97,
"learning_rate": 2.2127009367916688e-06,
"loss": 0.3721,
"step": 930
},
{
"epoch": 1.97,
"learning_rate": 2.204499697704712e-06,
"loss": 0.2902,
"step": 931
},
{
"epoch": 1.98,
"learning_rate": 2.196307900180928e-06,
"loss": 0.3199,
"step": 932
},
{
"epoch": 1.98,
"learning_rate": 2.1881255872965433e-06,
"loss": 0.3533,
"step": 933
},
{
"epoch": 1.98,
"learning_rate": 2.1799528020779063e-06,
"loss": 0.3212,
"step": 934
},
{
"epoch": 1.98,
"learning_rate": 2.171789587501263e-06,
"loss": 0.3083,
"step": 935
},
{
"epoch": 1.98,
"learning_rate": 2.163635986492541e-06,
"loss": 0.324,
"step": 936
},
{
"epoch": 1.99,
"learning_rate": 2.1554920419271074e-06,
"loss": 0.3425,
"step": 937
},
{
"epoch": 1.99,
"learning_rate": 2.1473577966295502e-06,
"loss": 0.3464,
"step": 938
},
{
"epoch": 1.99,
"learning_rate": 2.1392332933734646e-06,
"loss": 0.3359,
"step": 939
},
{
"epoch": 1.99,
"learning_rate": 2.1311185748812074e-06,
"loss": 0.2975,
"step": 940
},
{
"epoch": 2.0,
"learning_rate": 2.1230136838236824e-06,
"loss": 0.2978,
"step": 941
},
{
"epoch": 2.0,
"learning_rate": 2.114918662820127e-06,
"loss": 0.3034,
"step": 942
},
{
"epoch": 2.0,
"learning_rate": 2.106833554437867e-06,
"loss": 0.3515,
"step": 943
},
{
"epoch": 2.0,
"learning_rate": 2.0987584011921027e-06,
"loss": 0.2545,
"step": 944
},
{
"epoch": 2.0,
"learning_rate": 2.0906932455456964e-06,
"loss": 0.2313,
"step": 945
},
{
"epoch": 2.01,
"learning_rate": 2.0826381299089277e-06,
"loss": 0.3029,
"step": 946
},
{
"epoch": 2.01,
"learning_rate": 2.0745930966392837e-06,
"loss": 0.245,
"step": 947
},
{
"epoch": 2.01,
"learning_rate": 2.0665581880412363e-06,
"loss": 0.2897,
"step": 948
},
{
"epoch": 2.01,
"learning_rate": 2.0585334463660185e-06,
"loss": 0.3204,
"step": 949
},
{
"epoch": 2.01,
"learning_rate": 2.050518913811394e-06,
"loss": 0.249,
"step": 950
},
{
"epoch": 2.02,
"learning_rate": 2.0425146325214492e-06,
"loss": 0.3227,
"step": 951
},
{
"epoch": 2.02,
"learning_rate": 2.034520644586363e-06,
"loss": 0.3016,
"step": 952
},
{
"epoch": 2.02,
"learning_rate": 2.0265369920421834e-06,
"loss": 0.261,
"step": 953
},
{
"epoch": 2.02,
"learning_rate": 2.0185637168706157e-06,
"loss": 0.2601,
"step": 954
},
{
"epoch": 2.03,
"learning_rate": 2.010600860998794e-06,
"loss": 0.2129,
"step": 955
},
{
"epoch": 2.03,
"learning_rate": 2.0026484662990593e-06,
"loss": 0.262,
"step": 956
},
{
"epoch": 2.03,
"learning_rate": 1.994706574588749e-06,
"loss": 0.2696,
"step": 957
},
{
"epoch": 2.03,
"learning_rate": 1.9867752276299683e-06,
"loss": 0.2329,
"step": 958
},
{
"epoch": 2.03,
"learning_rate": 1.9788544671293706e-06,
"loss": 0.2549,
"step": 959
},
{
"epoch": 2.04,
"learning_rate": 1.9709443347379467e-06,
"loss": 0.2301,
"step": 960
},
{
"epoch": 2.04,
"learning_rate": 1.9630448720507943e-06,
"loss": 0.2762,
"step": 961
},
{
"epoch": 2.04,
"learning_rate": 1.9551561206069067e-06,
"loss": 0.2556,
"step": 962
},
{
"epoch": 2.04,
"learning_rate": 1.9472781218889565e-06,
"loss": 0.2915,
"step": 963
},
{
"epoch": 2.04,
"learning_rate": 1.9394109173230665e-06,
"loss": 0.3008,
"step": 964
},
{
"epoch": 2.05,
"learning_rate": 1.931554548278604e-06,
"loss": 0.2627,
"step": 965
},
{
"epoch": 2.05,
"learning_rate": 1.923709056067958e-06,
"loss": 0.258,
"step": 966
},
{
"epoch": 2.05,
"learning_rate": 1.9158744819463186e-06,
"loss": 0.2374,
"step": 967
},
{
"epoch": 2.05,
"learning_rate": 1.9080508671114676e-06,
"loss": 0.2401,
"step": 968
},
{
"epoch": 2.05,
"learning_rate": 1.9002382527035543e-06,
"loss": 0.271,
"step": 969
},
{
"epoch": 2.06,
"learning_rate": 1.8924366798048857e-06,
"loss": 0.2209,
"step": 970
},
{
"epoch": 2.06,
"learning_rate": 1.8846461894397074e-06,
"loss": 0.2643,
"step": 971
},
{
"epoch": 2.06,
"learning_rate": 1.8768668225739842e-06,
"loss": 0.25,
"step": 972
},
{
"epoch": 2.06,
"learning_rate": 1.8690986201151938e-06,
"loss": 0.2718,
"step": 973
},
{
"epoch": 2.07,
"learning_rate": 1.8613416229121008e-06,
"loss": 0.2762,
"step": 974
},
{
"epoch": 2.07,
"learning_rate": 1.8535958717545519e-06,
"loss": 0.3023,
"step": 975
},
{
"epoch": 2.07,
"learning_rate": 1.8458614073732566e-06,
"loss": 0.2677,
"step": 976
},
{
"epoch": 2.07,
"learning_rate": 1.8381382704395692e-06,
"loss": 0.2729,
"step": 977
},
{
"epoch": 2.07,
"learning_rate": 1.830426501565283e-06,
"loss": 0.3024,
"step": 978
},
{
"epoch": 2.08,
"learning_rate": 1.822726141302414e-06,
"loss": 0.2886,
"step": 979
},
{
"epoch": 2.08,
"learning_rate": 1.8150372301429804e-06,
"loss": 0.2886,
"step": 980
},
{
"epoch": 2.08,
"learning_rate": 1.807359808518802e-06,
"loss": 0.2741,
"step": 981
},
{
"epoch": 2.08,
"learning_rate": 1.79969391680128e-06,
"loss": 0.3323,
"step": 982
},
{
"epoch": 2.08,
"learning_rate": 1.7920395953011828e-06,
"loss": 0.2436,
"step": 983
},
{
"epoch": 2.09,
"learning_rate": 1.7843968842684397e-06,
"loss": 0.2593,
"step": 984
},
{
"epoch": 2.09,
"learning_rate": 1.7767658238919298e-06,
"loss": 0.2693,
"step": 985
},
{
"epoch": 2.09,
"learning_rate": 1.7691464542992617e-06,
"loss": 0.233,
"step": 986
},
{
"epoch": 2.09,
"learning_rate": 1.7615388155565695e-06,
"loss": 0.2326,
"step": 987
},
{
"epoch": 2.1,
"learning_rate": 1.7539429476683085e-06,
"loss": 0.2966,
"step": 988
},
{
"epoch": 2.1,
"learning_rate": 1.7463588905770288e-06,
"loss": 0.2695,
"step": 989
},
{
"epoch": 2.1,
"learning_rate": 1.7387866841631747e-06,
"loss": 0.2722,
"step": 990
},
{
"epoch": 2.1,
"learning_rate": 1.7312263682448825e-06,
"loss": 0.2418,
"step": 991
},
{
"epoch": 2.1,
"learning_rate": 1.7236779825777536e-06,
"loss": 0.263,
"step": 992
},
{
"epoch": 2.11,
"learning_rate": 1.7161415668546556e-06,
"loss": 0.3395,
"step": 993
},
{
"epoch": 2.11,
"learning_rate": 1.7086171607055202e-06,
"loss": 0.2792,
"step": 994
},
{
"epoch": 2.11,
"learning_rate": 1.7011048036971198e-06,
"loss": 0.2759,
"step": 995
},
{
"epoch": 2.11,
"learning_rate": 1.6936045353328662e-06,
"loss": 0.2552,
"step": 996
},
{
"epoch": 2.11,
"learning_rate": 1.6861163950526125e-06,
"loss": 0.2908,
"step": 997
},
{
"epoch": 2.12,
"learning_rate": 1.6786404222324277e-06,
"loss": 0.2961,
"step": 998
},
{
"epoch": 2.12,
"learning_rate": 1.671176656184401e-06,
"loss": 0.2575,
"step": 999
},
{
"epoch": 2.12,
"learning_rate": 1.6637251361564345e-06,
"loss": 0.2916,
"step": 1000
},
{
"epoch": 2.12,
"learning_rate": 1.656285901332036e-06,
"loss": 0.2425,
"step": 1001
},
{
"epoch": 2.12,
"learning_rate": 1.6488589908301079e-06,
"loss": 0.3184,
"step": 1002
},
{
"epoch": 2.13,
"learning_rate": 1.6414444437047488e-06,
"loss": 0.2707,
"step": 1003
},
{
"epoch": 2.13,
"learning_rate": 1.6340422989450464e-06,
"loss": 0.2639,
"step": 1004
},
{
"epoch": 2.13,
"learning_rate": 1.6266525954748663e-06,
"loss": 0.2402,
"step": 1005
},
{
"epoch": 2.13,
"learning_rate": 1.6192753721526558e-06,
"loss": 0.2914,
"step": 1006
},
{
"epoch": 2.14,
"learning_rate": 1.6119106677712364e-06,
"loss": 0.2728,
"step": 1007
},
{
"epoch": 2.14,
"learning_rate": 1.6045585210575951e-06,
"loss": 0.2182,
"step": 1008
},
{
"epoch": 2.14,
"learning_rate": 1.5972189706726891e-06,
"loss": 0.3468,
"step": 1009
},
{
"epoch": 2.14,
"learning_rate": 1.589892055211238e-06,
"loss": 0.2267,
"step": 1010
},
{
"epoch": 2.14,
"learning_rate": 1.5825778132015174e-06,
"loss": 0.2971,
"step": 1011
},
{
"epoch": 2.15,
"learning_rate": 1.5752762831051655e-06,
"loss": 0.2602,
"step": 1012
},
{
"epoch": 2.15,
"learning_rate": 1.5679875033169698e-06,
"loss": 0.254,
"step": 1013
},
{
"epoch": 2.15,
"learning_rate": 1.560711512164675e-06,
"loss": 0.2387,
"step": 1014
},
{
"epoch": 2.15,
"learning_rate": 1.5534483479087772e-06,
"loss": 0.26,
"step": 1015
},
{
"epoch": 2.15,
"learning_rate": 1.5461980487423194e-06,
"loss": 0.2452,
"step": 1016
},
{
"epoch": 2.16,
"learning_rate": 1.5389606527906974e-06,
"loss": 0.351,
"step": 1017
},
{
"epoch": 2.16,
"learning_rate": 1.5317361981114557e-06,
"loss": 0.2857,
"step": 1018
},
{
"epoch": 2.16,
"learning_rate": 1.5245247226940845e-06,
"loss": 0.2604,
"step": 1019
},
{
"epoch": 2.16,
"learning_rate": 1.517326264459828e-06,
"loss": 0.2795,
"step": 1020
},
{
"epoch": 2.17,
"learning_rate": 1.5101408612614747e-06,
"loss": 0.2843,
"step": 1021
},
{
"epoch": 2.17,
"learning_rate": 1.5029685508831683e-06,
"loss": 0.2592,
"step": 1022
},
{
"epoch": 2.17,
"learning_rate": 1.4958093710402033e-06,
"loss": 0.2407,
"step": 1023
},
{
"epoch": 2.17,
"learning_rate": 1.4886633593788251e-06,
"loss": 0.2696,
"step": 1024
},
{
"epoch": 2.17,
"learning_rate": 1.4815305534760394e-06,
"loss": 0.2416,
"step": 1025
},
{
"epoch": 2.18,
"learning_rate": 1.4744109908394044e-06,
"loss": 0.3104,
"step": 1026
},
{
"epoch": 2.18,
"learning_rate": 1.4673047089068437e-06,
"loss": 0.2491,
"step": 1027
},
{
"epoch": 2.18,
"learning_rate": 1.460211745046445e-06,
"loss": 0.2733,
"step": 1028
},
{
"epoch": 2.18,
"learning_rate": 1.4531321365562588e-06,
"loss": 0.2954,
"step": 1029
},
{
"epoch": 2.18,
"learning_rate": 1.4460659206641116e-06,
"loss": 0.2703,
"step": 1030
},
{
"epoch": 2.19,
"learning_rate": 1.4390131345274048e-06,
"loss": 0.2942,
"step": 1031
},
{
"epoch": 2.19,
"learning_rate": 1.431973815232917e-06,
"loss": 0.3826,
"step": 1032
},
{
"epoch": 2.19,
"learning_rate": 1.4249479997966142e-06,
"loss": 0.2039,
"step": 1033
},
{
"epoch": 2.19,
"learning_rate": 1.4179357251634554e-06,
"loss": 0.2487,
"step": 1034
},
{
"epoch": 2.19,
"learning_rate": 1.41093702820719e-06,
"loss": 0.2395,
"step": 1035
},
{
"epoch": 2.2,
"learning_rate": 1.4039519457301738e-06,
"loss": 0.28,
"step": 1036
},
{
"epoch": 2.2,
"learning_rate": 1.3969805144631726e-06,
"loss": 0.3089,
"step": 1037
},
{
"epoch": 2.2,
"learning_rate": 1.3900227710651647e-06,
"loss": 0.2551,
"step": 1038
},
{
"epoch": 2.2,
"learning_rate": 1.3830787521231488e-06,
"loss": 0.2607,
"step": 1039
},
{
"epoch": 2.21,
"learning_rate": 1.3761484941519638e-06,
"loss": 0.2322,
"step": 1040
},
{
"epoch": 2.21,
"learning_rate": 1.3692320335940784e-06,
"loss": 0.2388,
"step": 1041
},
{
"epoch": 2.21,
"learning_rate": 1.3623294068194083e-06,
"loss": 0.27,
"step": 1042
},
{
"epoch": 2.21,
"learning_rate": 1.355440650125133e-06,
"loss": 0.2368,
"step": 1043
},
{
"epoch": 2.21,
"learning_rate": 1.3485657997354878e-06,
"loss": 0.2532,
"step": 1044
},
{
"epoch": 2.22,
"learning_rate": 1.3417048918015836e-06,
"loss": 0.2646,
"step": 1045
},
{
"epoch": 2.22,
"learning_rate": 1.3348579624012227e-06,
"loss": 0.2736,
"step": 1046
},
{
"epoch": 2.22,
"learning_rate": 1.3280250475386936e-06,
"loss": 0.2414,
"step": 1047
},
{
"epoch": 2.22,
"learning_rate": 1.321206183144591e-06,
"loss": 0.2816,
"step": 1048
},
{
"epoch": 2.22,
"learning_rate": 1.314401405075633e-06,
"loss": 0.2208,
"step": 1049
},
{
"epoch": 2.23,
"learning_rate": 1.307610749114458e-06,
"loss": 0.3263,
"step": 1050
},
{
"epoch": 2.23,
"learning_rate": 1.3008342509694439e-06,
"loss": 0.2282,
"step": 1051
},
{
"epoch": 2.23,
"learning_rate": 1.2940719462745246e-06,
"loss": 0.255,
"step": 1052
},
{
"epoch": 2.23,
"learning_rate": 1.2873238705889966e-06,
"loss": 0.2878,
"step": 1053
},
{
"epoch": 2.24,
"learning_rate": 1.280590059397332e-06,
"loss": 0.2875,
"step": 1054
},
{
"epoch": 2.24,
"learning_rate": 1.273870548108995e-06,
"loss": 0.3157,
"step": 1055
},
{
"epoch": 2.24,
"learning_rate": 1.2671653720582573e-06,
"loss": 0.2602,
"step": 1056
},
{
"epoch": 2.24,
"learning_rate": 1.2604745665040023e-06,
"loss": 0.2217,
"step": 1057
},
{
"epoch": 2.24,
"learning_rate": 1.2537981666295533e-06,
"loss": 0.274,
"step": 1058
},
{
"epoch": 2.25,
"learning_rate": 1.2471362075424802e-06,
"loss": 0.2466,
"step": 1059
},
{
"epoch": 2.25,
"learning_rate": 1.2404887242744137e-06,
"loss": 0.2716,
"step": 1060
},
{
"epoch": 2.25,
"learning_rate": 1.2338557517808674e-06,
"loss": 0.2755,
"step": 1061
},
{
"epoch": 2.25,
"learning_rate": 1.227237324941051e-06,
"loss": 0.2414,
"step": 1062
},
{
"epoch": 2.25,
"learning_rate": 1.2206334785576822e-06,
"loss": 0.2685,
"step": 1063
},
{
"epoch": 2.26,
"learning_rate": 1.2140442473568135e-06,
"loss": 0.2686,
"step": 1064
},
{
"epoch": 2.26,
"learning_rate": 1.2074696659876384e-06,
"loss": 0.2292,
"step": 1065
},
{
"epoch": 2.26,
"learning_rate": 1.2009097690223193e-06,
"loss": 0.2626,
"step": 1066
},
{
"epoch": 2.26,
"learning_rate": 1.194364590955801e-06,
"loss": 0.2903,
"step": 1067
},
{
"epoch": 2.26,
"learning_rate": 1.1878341662056257e-06,
"loss": 0.3413,
"step": 1068
},
{
"epoch": 2.27,
"learning_rate": 1.1813185291117603e-06,
"loss": 0.2639,
"step": 1069
},
{
"epoch": 2.27,
"learning_rate": 1.1748177139364103e-06,
"loss": 0.3068,
"step": 1070
},
{
"epoch": 2.27,
"learning_rate": 1.1683317548638382e-06,
"loss": 0.2879,
"step": 1071
},
{
"epoch": 2.27,
"learning_rate": 1.1618606860001907e-06,
"loss": 0.2407,
"step": 1072
},
{
"epoch": 2.28,
"learning_rate": 1.1554045413733095e-06,
"loss": 0.278,
"step": 1073
},
{
"epoch": 2.28,
"learning_rate": 1.148963354932563e-06,
"loss": 0.2644,
"step": 1074
},
{
"epoch": 2.28,
"learning_rate": 1.1425371605486605e-06,
"loss": 0.2428,
"step": 1075
},
{
"epoch": 2.28,
"learning_rate": 1.1361259920134747e-06,
"loss": 0.243,
"step": 1076
},
{
"epoch": 2.28,
"learning_rate": 1.1297298830398676e-06,
"loss": 0.2757,
"step": 1077
},
{
"epoch": 2.29,
"learning_rate": 1.123348867261512e-06,
"loss": 0.3367,
"step": 1078
},
{
"epoch": 2.29,
"learning_rate": 1.1169829782327091e-06,
"loss": 0.2418,
"step": 1079
},
{
"epoch": 2.29,
"learning_rate": 1.1106322494282236e-06,
"loss": 0.2531,
"step": 1080
},
{
"epoch": 2.29,
"learning_rate": 1.1042967142430933e-06,
"loss": 0.2963,
"step": 1081
},
{
"epoch": 2.29,
"learning_rate": 1.0979764059924668e-06,
"loss": 0.3322,
"step": 1082
},
{
"epoch": 2.3,
"learning_rate": 1.0916713579114217e-06,
"loss": 0.2655,
"step": 1083
},
{
"epoch": 2.3,
"learning_rate": 1.0853816031547865e-06,
"loss": 0.2628,
"step": 1084
},
{
"epoch": 2.3,
"learning_rate": 1.0791071747969743e-06,
"loss": 0.2698,
"step": 1085
},
{
"epoch": 2.3,
"learning_rate": 1.0728481058318054e-06,
"loss": 0.2356,
"step": 1086
},
{
"epoch": 2.31,
"learning_rate": 1.0666044291723305e-06,
"loss": 0.2745,
"step": 1087
},
{
"epoch": 2.31,
"learning_rate": 1.0603761776506614e-06,
"loss": 0.242,
"step": 1088
},
{
"epoch": 2.31,
"learning_rate": 1.0541633840178006e-06,
"loss": 0.2456,
"step": 1089
},
{
"epoch": 2.31,
"learning_rate": 1.04796608094346e-06,
"loss": 0.2414,
"step": 1090
},
{
"epoch": 2.31,
"learning_rate": 1.0417843010159e-06,
"loss": 0.2361,
"step": 1091
},
{
"epoch": 2.32,
"learning_rate": 1.035618076741753e-06,
"loss": 0.2595,
"step": 1092
},
{
"epoch": 2.32,
"learning_rate": 1.029467440545849e-06,
"loss": 0.2387,
"step": 1093
},
{
"epoch": 2.32,
"learning_rate": 1.0233324247710497e-06,
"loss": 0.2423,
"step": 1094
},
{
"epoch": 2.32,
"learning_rate": 1.0172130616780834e-06,
"loss": 0.2851,
"step": 1095
},
{
"epoch": 2.32,
"learning_rate": 1.0111093834453614e-06,
"loss": 0.2492,
"step": 1096
},
{
"epoch": 2.33,
"learning_rate": 1.005021422168819e-06,
"loss": 0.2438,
"step": 1097
},
{
"epoch": 2.33,
"learning_rate": 9.989492098617493e-07,
"loss": 0.3107,
"step": 1098
},
{
"epoch": 2.33,
"learning_rate": 9.928927784546251e-07,
"loss": 0.2113,
"step": 1099
},
{
"epoch": 2.33,
"learning_rate": 9.86852159794935e-07,
"loss": 0.2067,
"step": 1100
},
{
"epoch": 2.33,
"learning_rate": 9.808273856470228e-07,
"loss": 0.2174,
"step": 1101
},
{
"epoch": 2.34,
"learning_rate": 9.748184876919103e-07,
"loss": 0.2451,
"step": 1102
},
{
"epoch": 2.34,
"learning_rate": 9.688254975271325e-07,
"loss": 0.2202,
"step": 1103
},
{
"epoch": 2.34,
"learning_rate": 9.628484466665828e-07,
"loss": 0.2694,
"step": 1104
},
{
"epoch": 2.34,
"learning_rate": 9.568873665403302e-07,
"loss": 0.2604,
"step": 1105
},
{
"epoch": 2.35,
"learning_rate": 9.509422884944634e-07,
"loss": 0.2958,
"step": 1106
},
{
"epoch": 2.35,
"learning_rate": 9.450132437909282e-07,
"loss": 0.3149,
"step": 1107
},
{
"epoch": 2.35,
"learning_rate": 9.391002636073589e-07,
"loss": 0.2795,
"step": 1108
},
{
"epoch": 2.35,
"learning_rate": 9.332033790369118e-07,
"loss": 0.2955,
"step": 1109
},
{
"epoch": 2.35,
"learning_rate": 9.273226210881087e-07,
"loss": 0.2134,
"step": 1110
},
{
"epoch": 2.36,
"learning_rate": 9.214580206846707e-07,
"loss": 0.2857,
"step": 1111
},
{
"epoch": 2.36,
"learning_rate": 9.156096086653504e-07,
"loss": 0.2571,
"step": 1112
},
{
"epoch": 2.36,
"learning_rate": 9.097774157837799e-07,
"loss": 0.2651,
"step": 1113
},
{
"epoch": 2.36,
"learning_rate": 9.039614727083012e-07,
"loss": 0.206,
"step": 1114
},
{
"epoch": 2.36,
"learning_rate": 8.98161810021806e-07,
"loss": 0.2589,
"step": 1115
},
{
"epoch": 2.37,
"learning_rate": 8.923784582215788e-07,
"loss": 0.302,
"step": 1116
},
{
"epoch": 2.37,
"learning_rate": 8.866114477191335e-07,
"loss": 0.2575,
"step": 1117
},
{
"epoch": 2.37,
"learning_rate": 8.808608088400519e-07,
"loss": 0.2391,
"step": 1118
},
{
"epoch": 2.37,
"learning_rate": 8.751265718238295e-07,
"loss": 0.2552,
"step": 1119
},
{
"epoch": 2.38,
"learning_rate": 8.694087668237094e-07,
"loss": 0.2551,
"step": 1120
},
{
"epoch": 2.38,
"learning_rate": 8.637074239065314e-07,
"loss": 0.2506,
"step": 1121
},
{
"epoch": 2.38,
"learning_rate": 8.58022573052569e-07,
"loss": 0.3408,
"step": 1122
},
{
"epoch": 2.38,
"learning_rate": 8.523542441553711e-07,
"loss": 0.2347,
"step": 1123
},
{
"epoch": 2.38,
"learning_rate": 8.46702467021609e-07,
"loss": 0.2537,
"step": 1124
},
{
"epoch": 2.39,
"learning_rate": 8.410672713709148e-07,
"loss": 0.2526,
"step": 1125
},
{
"epoch": 2.39,
"learning_rate": 8.3544868683573e-07,
"loss": 0.2116,
"step": 1126
},
{
"epoch": 2.39,
"learning_rate": 8.298467429611466e-07,
"loss": 0.2855,
"step": 1127
},
{
"epoch": 2.39,
"learning_rate": 8.242614692047505e-07,
"loss": 0.2903,
"step": 1128
},
{
"epoch": 2.39,
"learning_rate": 8.186928949364703e-07,
"loss": 0.2923,
"step": 1129
},
{
"epoch": 2.4,
"learning_rate": 8.131410494384226e-07,
"loss": 0.2848,
"step": 1130
},
{
"epoch": 2.4,
"learning_rate": 8.076059619047515e-07,
"loss": 0.2321,
"step": 1131
},
{
"epoch": 2.4,
"learning_rate": 8.020876614414858e-07,
"loss": 0.2647,
"step": 1132
},
{
"epoch": 2.4,
"learning_rate": 7.965861770663748e-07,
"loss": 0.33,
"step": 1133
},
{
"epoch": 2.4,
"learning_rate": 7.911015377087458e-07,
"loss": 0.2359,
"step": 1134
},
{
"epoch": 2.41,
"learning_rate": 7.856337722093456e-07,
"loss": 0.2609,
"step": 1135
},
{
"epoch": 2.41,
"learning_rate": 7.801829093201879e-07,
"loss": 0.273,
"step": 1136
},
{
"epoch": 2.41,
"learning_rate": 7.747489777044088e-07,
"loss": 0.2832,
"step": 1137
},
{
"epoch": 2.41,
"learning_rate": 7.693320059361106e-07,
"loss": 0.2981,
"step": 1138
},
{
"epoch": 2.42,
"learning_rate": 7.639320225002106e-07,
"loss": 0.2558,
"step": 1139
},
{
"epoch": 2.42,
"learning_rate": 7.585490557922969e-07,
"loss": 0.2668,
"step": 1140
},
{
"epoch": 2.42,
"learning_rate": 7.531831341184753e-07,
"loss": 0.2426,
"step": 1141
},
{
"epoch": 2.42,
"learning_rate": 7.478342856952186e-07,
"loss": 0.247,
"step": 1142
},
{
"epoch": 2.42,
"learning_rate": 7.42502538649223e-07,
"loss": 0.2606,
"step": 1143
},
{
"epoch": 2.43,
"learning_rate": 7.371879210172584e-07,
"loss": 0.2594,
"step": 1144
},
{
"epoch": 2.43,
"learning_rate": 7.318904607460181e-07,
"loss": 0.2985,
"step": 1145
},
{
"epoch": 2.43,
"learning_rate": 7.266101856919728e-07,
"loss": 0.2841,
"step": 1146
},
{
"epoch": 2.43,
"learning_rate": 7.213471236212325e-07,
"loss": 0.2486,
"step": 1147
},
{
"epoch": 2.43,
"learning_rate": 7.16101302209386e-07,
"loss": 0.2418,
"step": 1148
},
{
"epoch": 2.44,
"learning_rate": 7.108727490413642e-07,
"loss": 0.257,
"step": 1149
},
{
"epoch": 2.44,
"learning_rate": 7.056614916112988e-07,
"loss": 0.3122,
"step": 1150
},
{
"epoch": 2.44,
"learning_rate": 7.00467557322367e-07,
"loss": 0.2438,
"step": 1151
},
{
"epoch": 2.44,
"learning_rate": 6.952909734866525e-07,
"loss": 0.298,
"step": 1152
},
{
"epoch": 2.45,
"learning_rate": 6.901317673250089e-07,
"loss": 0.2087,
"step": 1153
},
{
"epoch": 2.45,
"learning_rate": 6.849899659669036e-07,
"loss": 0.2045,
"step": 1154
},
{
"epoch": 2.45,
"learning_rate": 6.798655964502819e-07,
"loss": 0.2591,
"step": 1155
},
{
"epoch": 2.45,
"learning_rate": 6.747586857214305e-07,
"loss": 0.2495,
"step": 1156
},
{
"epoch": 2.45,
"learning_rate": 6.696692606348238e-07,
"loss": 0.2941,
"step": 1157
},
{
"epoch": 2.46,
"learning_rate": 6.645973479529891e-07,
"loss": 0.2195,
"step": 1158
},
{
"epoch": 2.46,
"learning_rate": 6.595429743463685e-07,
"loss": 0.2938,
"step": 1159
},
{
"epoch": 2.46,
"learning_rate": 6.545061663931743e-07,
"loss": 0.2302,
"step": 1160
},
{
"epoch": 2.46,
"learning_rate": 6.494869505792491e-07,
"loss": 0.3048,
"step": 1161
},
{
"epoch": 2.46,
"learning_rate": 6.444853532979295e-07,
"loss": 0.2848,
"step": 1162
},
{
"epoch": 2.47,
"learning_rate": 6.395014008499067e-07,
"loss": 0.2343,
"step": 1163
},
{
"epoch": 2.47,
"learning_rate": 6.345351194430835e-07,
"loss": 0.2778,
"step": 1164
},
{
"epoch": 2.47,
"learning_rate": 6.295865351924448e-07,
"loss": 0.3015,
"step": 1165
},
{
"epoch": 2.47,
"learning_rate": 6.246556741199134e-07,
"loss": 0.3403,
"step": 1166
},
{
"epoch": 2.47,
"learning_rate": 6.197425621542152e-07,
"loss": 0.2828,
"step": 1167
},
{
"epoch": 2.48,
"learning_rate": 6.148472251307435e-07,
"loss": 0.3331,
"step": 1168
},
{
"epoch": 2.48,
"learning_rate": 6.099696887914252e-07,
"loss": 0.2602,
"step": 1169
},
{
"epoch": 2.48,
"learning_rate": 6.05109978784578e-07,
"loss": 0.2534,
"step": 1170
},
{
"epoch": 2.48,
"learning_rate": 6.002681206647864e-07,
"loss": 0.2424,
"step": 1171
},
{
"epoch": 2.49,
"learning_rate": 5.954441398927561e-07,
"loss": 0.2194,
"step": 1172
},
{
"epoch": 2.49,
"learning_rate": 5.906380618351892e-07,
"loss": 0.2377,
"step": 1173
},
{
"epoch": 2.49,
"learning_rate": 5.858499117646465e-07,
"loss": 0.2973,
"step": 1174
},
{
"epoch": 2.49,
"learning_rate": 5.810797148594137e-07,
"loss": 0.2768,
"step": 1175
},
{
"epoch": 2.49,
"learning_rate": 5.763274962033726e-07,
"loss": 0.2742,
"step": 1176
},
{
"epoch": 2.5,
"learning_rate": 5.715932807858648e-07,
"loss": 0.2622,
"step": 1177
},
{
"epoch": 2.5,
"learning_rate": 5.668770935015641e-07,
"loss": 0.2534,
"step": 1178
},
{
"epoch": 2.5,
"learning_rate": 5.621789591503452e-07,
"loss": 0.2347,
"step": 1179
},
{
"epoch": 2.5,
"learning_rate": 5.574989024371493e-07,
"loss": 0.275,
"step": 1180
},
{
"epoch": 2.5,
"learning_rate": 5.528369479718594e-07,
"loss": 0.2419,
"step": 1181
},
{
"epoch": 2.51,
"learning_rate": 5.481931202691688e-07,
"loss": 0.2678,
"step": 1182
},
{
"epoch": 2.51,
"learning_rate": 5.435674437484489e-07,
"loss": 0.2593,
"step": 1183
},
{
"epoch": 2.51,
"learning_rate": 5.389599427336287e-07,
"loss": 0.231,
"step": 1184
},
{
"epoch": 2.51,
"learning_rate": 5.343706414530565e-07,
"loss": 0.286,
"step": 1185
},
{
"epoch": 2.52,
"learning_rate": 5.297995640393829e-07,
"loss": 0.2812,
"step": 1186
},
{
"epoch": 2.52,
"learning_rate": 5.252467345294271e-07,
"loss": 0.2351,
"step": 1187
},
{
"epoch": 2.52,
"learning_rate": 5.207121768640519e-07,
"loss": 0.2122,
"step": 1188
},
{
"epoch": 2.52,
"learning_rate": 5.161959148880384e-07,
"loss": 0.2453,
"step": 1189
},
{
"epoch": 2.52,
"learning_rate": 5.116979723499631e-07,
"loss": 0.2435,
"step": 1190
},
{
"epoch": 2.53,
"learning_rate": 5.072183729020661e-07,
"loss": 0.2444,
"step": 1191
},
{
"epoch": 2.53,
"learning_rate": 5.027571401001349e-07,
"loss": 0.2233,
"step": 1192
},
{
"epoch": 2.53,
"learning_rate": 4.983142974033763e-07,
"loss": 0.2771,
"step": 1193
},
{
"epoch": 2.53,
"learning_rate": 4.938898681742905e-07,
"loss": 0.2549,
"step": 1194
},
{
"epoch": 2.53,
"learning_rate": 4.894838756785544e-07,
"loss": 0.2315,
"step": 1195
},
{
"epoch": 2.54,
"learning_rate": 4.85096343084896e-07,
"loss": 0.2603,
"step": 1196
},
{
"epoch": 2.54,
"learning_rate": 4.807272934649709e-07,
"loss": 0.2286,
"step": 1197
},
{
"epoch": 2.54,
"learning_rate": 4.7637674979324185e-07,
"loss": 0.2874,
"step": 1198
},
{
"epoch": 2.54,
"learning_rate": 4.7204473494686457e-07,
"loss": 0.2681,
"step": 1199
},
{
"epoch": 2.54,
"learning_rate": 4.67731271705555e-07,
"loss": 0.2081,
"step": 1200
},
{
"epoch": 2.55,
"learning_rate": 4.6343638275147824e-07,
"loss": 0.2378,
"step": 1201
},
{
"epoch": 2.55,
"learning_rate": 4.591600906691315e-07,
"loss": 0.2466,
"step": 1202
},
{
"epoch": 2.55,
"learning_rate": 4.54902417945215e-07,
"loss": 0.2377,
"step": 1203
},
{
"epoch": 2.55,
"learning_rate": 4.506633869685217e-07,
"loss": 0.2226,
"step": 1204
},
{
"epoch": 2.56,
"learning_rate": 4.464430200298217e-07,
"loss": 0.3036,
"step": 1205
},
{
"epoch": 2.56,
"learning_rate": 4.422413393217361e-07,
"loss": 0.2747,
"step": 1206
},
{
"epoch": 2.56,
"learning_rate": 4.380583669386251e-07,
"loss": 0.2607,
"step": 1207
},
{
"epoch": 2.56,
"learning_rate": 4.3389412487647804e-07,
"loss": 0.2405,
"step": 1208
},
{
"epoch": 2.56,
"learning_rate": 4.297486350327846e-07,
"loss": 0.2643,
"step": 1209
},
{
"epoch": 2.57,
"learning_rate": 4.2562191920642965e-07,
"loss": 0.2251,
"step": 1210
},
{
"epoch": 2.57,
"learning_rate": 4.2151399909757626e-07,
"loss": 0.241,
"step": 1211
},
{
"epoch": 2.57,
"learning_rate": 4.1742489630755085e-07,
"loss": 0.2522,
"step": 1212
},
{
"epoch": 2.57,
"learning_rate": 4.1335463233872805e-07,
"loss": 0.2456,
"step": 1213
},
{
"epoch": 2.57,
"learning_rate": 4.0930322859442115e-07,
"loss": 0.2997,
"step": 1214
},
{
"epoch": 2.58,
"learning_rate": 4.0527070637876814e-07,
"loss": 0.2668,
"step": 1215
},
{
"epoch": 2.58,
"learning_rate": 4.0125708689661587e-07,
"loss": 0.3431,
"step": 1216
},
{
"epoch": 2.58,
"learning_rate": 3.972623912534163e-07,
"loss": 0.2467,
"step": 1217
},
{
"epoch": 2.58,
"learning_rate": 3.932866404551092e-07,
"loss": 0.2797,
"step": 1218
},
{
"epoch": 2.59,
"learning_rate": 3.8932985540801245e-07,
"loss": 0.2452,
"step": 1219
},
{
"epoch": 2.59,
"learning_rate": 3.8539205691871547e-07,
"loss": 0.3477,
"step": 1220
},
{
"epoch": 2.59,
"learning_rate": 3.8147326569396745e-07,
"loss": 0.2702,
"step": 1221
},
{
"epoch": 2.59,
"learning_rate": 3.775735023405664e-07,
"loss": 0.2576,
"step": 1222
},
{
"epoch": 2.59,
"learning_rate": 3.736927873652558e-07,
"loss": 0.2545,
"step": 1223
},
{
"epoch": 2.6,
"learning_rate": 3.69831141174612e-07,
"loss": 0.2702,
"step": 1224
},
{
"epoch": 2.6,
"learning_rate": 3.659885840749406e-07,
"loss": 0.3055,
"step": 1225
},
{
"epoch": 2.6,
"learning_rate": 3.6216513627216736e-07,
"loss": 0.2787,
"step": 1226
},
{
"epoch": 2.6,
"learning_rate": 3.5836081787173146e-07,
"loss": 0.2892,
"step": 1227
},
{
"epoch": 2.6,
"learning_rate": 3.5457564887848257e-07,
"loss": 0.2413,
"step": 1228
},
{
"epoch": 2.61,
"learning_rate": 3.5080964919657417e-07,
"loss": 0.2573,
"step": 1229
},
{
"epoch": 2.61,
"learning_rate": 3.470628386293564e-07,
"loss": 0.2438,
"step": 1230
},
{
"epoch": 2.61,
"learning_rate": 3.4333523687927725e-07,
"loss": 0.3176,
"step": 1231
},
{
"epoch": 2.61,
"learning_rate": 3.396268635477733e-07,
"loss": 0.2656,
"step": 1232
},
{
"epoch": 2.61,
"learning_rate": 3.3593773813517066e-07,
"loss": 0.1924,
"step": 1233
},
{
"epoch": 2.62,
"learning_rate": 3.32267880040582e-07,
"loss": 0.2279,
"step": 1234
},
{
"epoch": 2.62,
"learning_rate": 3.286173085618009e-07,
"loss": 0.2721,
"step": 1235
},
{
"epoch": 2.62,
"learning_rate": 3.2498604289520603e-07,
"loss": 0.2232,
"step": 1236
},
{
"epoch": 2.62,
"learning_rate": 3.2137410213565463e-07,
"loss": 0.2646,
"step": 1237
},
{
"epoch": 2.63,
"learning_rate": 3.1778150527638657e-07,
"loss": 0.2748,
"step": 1238
},
{
"epoch": 2.63,
"learning_rate": 3.1420827120892225e-07,
"loss": 0.2535,
"step": 1239
},
{
"epoch": 2.63,
"learning_rate": 3.1065441872296293e-07,
"loss": 0.2551,
"step": 1240
},
{
"epoch": 2.63,
"learning_rate": 3.0711996650629335e-07,
"loss": 0.2831,
"step": 1241
},
{
"epoch": 2.63,
"learning_rate": 3.036049331446833e-07,
"loss": 0.2774,
"step": 1242
},
{
"epoch": 2.64,
"learning_rate": 3.0010933712178733e-07,
"loss": 0.2993,
"step": 1243
},
{
"epoch": 2.64,
"learning_rate": 2.966331968190512e-07,
"loss": 0.2857,
"step": 1244
},
{
"epoch": 2.64,
"learning_rate": 2.93176530515614e-07,
"loss": 0.3196,
"step": 1245
},
{
"epoch": 2.64,
"learning_rate": 2.897393563882091e-07,
"loss": 0.2551,
"step": 1246
},
{
"epoch": 2.64,
"learning_rate": 2.8632169251107383e-07,
"loss": 0.1975,
"step": 1247
},
{
"epoch": 2.65,
"learning_rate": 2.8292355685585144e-07,
"loss": 0.2521,
"step": 1248
},
{
"epoch": 2.65,
"learning_rate": 2.7954496729149447e-07,
"loss": 0.1795,
"step": 1249
},
{
"epoch": 2.65,
"learning_rate": 2.761859415841745e-07,
"loss": 0.2473,
"step": 1250
},
{
"epoch": 2.65,
"learning_rate": 2.728464973971896e-07,
"loss": 0.321,
"step": 1251
},
{
"epoch": 2.66,
"learning_rate": 2.695266522908657e-07,
"loss": 0.2644,
"step": 1252
},
{
"epoch": 2.66,
"learning_rate": 2.662264237224687e-07,
"loss": 0.2359,
"step": 1253
},
{
"epoch": 2.66,
"learning_rate": 2.629458290461146e-07,
"loss": 0.2755,
"step": 1254
},
{
"epoch": 2.66,
"learning_rate": 2.5968488551267166e-07,
"loss": 0.2305,
"step": 1255
},
{
"epoch": 2.66,
"learning_rate": 2.5644361026967385e-07,
"loss": 0.2777,
"step": 1256
},
{
"epoch": 2.67,
"learning_rate": 2.532220203612341e-07,
"loss": 0.2899,
"step": 1257
},
{
"epoch": 2.67,
"learning_rate": 2.5002013272794653e-07,
"loss": 0.2478,
"step": 1258
},
{
"epoch": 2.67,
"learning_rate": 2.468379642068017e-07,
"loss": 0.2211,
"step": 1259
},
{
"epoch": 2.67,
"learning_rate": 2.4367553153110274e-07,
"loss": 0.2067,
"step": 1260
},
{
"epoch": 2.67,
"learning_rate": 2.4053285133036796e-07,
"loss": 0.2107,
"step": 1261
},
{
"epoch": 2.68,
"learning_rate": 2.3740994013024873e-07,
"loss": 0.2695,
"step": 1262
},
{
"epoch": 2.68,
"learning_rate": 2.3430681435244381e-07,
"loss": 0.2686,
"step": 1263
},
{
"epoch": 2.68,
"learning_rate": 2.3122349031461062e-07,
"loss": 0.2197,
"step": 1264
},
{
"epoch": 2.68,
"learning_rate": 2.2815998423027837e-07,
"loss": 0.2406,
"step": 1265
},
{
"epoch": 2.68,
"learning_rate": 2.251163122087667e-07,
"loss": 0.2692,
"step": 1266
},
{
"epoch": 2.69,
"learning_rate": 2.2209249025509692e-07,
"loss": 0.2611,
"step": 1267
},
{
"epoch": 2.69,
"learning_rate": 2.1908853426990982e-07,
"loss": 0.2462,
"step": 1268
},
{
"epoch": 2.69,
"learning_rate": 2.161044600493831e-07,
"loss": 0.3073,
"step": 1269
},
{
"epoch": 2.69,
"learning_rate": 2.1314028328514566e-07,
"loss": 0.2099,
"step": 1270
},
{
"epoch": 2.7,
"learning_rate": 2.1019601956419674e-07,
"loss": 0.2273,
"step": 1271
},
{
"epoch": 2.7,
"learning_rate": 2.0727168436882382e-07,
"loss": 0.293,
"step": 1272
},
{
"epoch": 2.7,
"learning_rate": 2.0436729307652212e-07,
"loss": 0.235,
"step": 1273
},
{
"epoch": 2.7,
"learning_rate": 2.0148286095990996e-07,
"loss": 0.2978,
"step": 1274
},
{
"epoch": 2.7,
"learning_rate": 1.9861840318665445e-07,
"loss": 0.2407,
"step": 1275
},
{
"epoch": 2.71,
"learning_rate": 1.9577393481938586e-07,
"loss": 0.2987,
"step": 1276
},
{
"epoch": 2.71,
"learning_rate": 1.9294947081562208e-07,
"loss": 0.2359,
"step": 1277
},
{
"epoch": 2.71,
"learning_rate": 1.9014502602769e-07,
"loss": 0.3236,
"step": 1278
},
{
"epoch": 2.71,
"learning_rate": 1.8736061520264346e-07,
"loss": 0.2285,
"step": 1279
},
{
"epoch": 2.71,
"learning_rate": 1.8459625298219117e-07,
"loss": 0.2466,
"step": 1280
},
{
"epoch": 2.72,
"learning_rate": 1.8185195390261643e-07,
"loss": 0.2181,
"step": 1281
},
{
"epoch": 2.72,
"learning_rate": 1.791277323946998e-07,
"loss": 0.2567,
"step": 1282
},
{
"epoch": 2.72,
"learning_rate": 1.7642360278364765e-07,
"loss": 0.2746,
"step": 1283
},
{
"epoch": 2.72,
"learning_rate": 1.737395792890113e-07,
"loss": 0.315,
"step": 1284
},
{
"epoch": 2.72,
"learning_rate": 1.7107567602461547e-07,
"loss": 0.2386,
"step": 1285
},
{
"epoch": 2.73,
"learning_rate": 1.6843190699848565e-07,
"loss": 0.2142,
"step": 1286
},
{
"epoch": 2.73,
"learning_rate": 1.658082861127692e-07,
"loss": 0.2747,
"step": 1287
},
{
"epoch": 2.73,
"learning_rate": 1.6320482716366725e-07,
"loss": 0.3016,
"step": 1288
},
{
"epoch": 2.73,
"learning_rate": 1.6062154384135894e-07,
"loss": 0.2739,
"step": 1289
},
{
"epoch": 2.74,
"learning_rate": 1.5805844972993243e-07,
"loss": 0.2921,
"step": 1290
},
{
"epoch": 2.74,
"learning_rate": 1.555155583073109e-07,
"loss": 0.3238,
"step": 1291
},
{
"epoch": 2.74,
"learning_rate": 1.529928829451812e-07,
"loss": 0.2896,
"step": 1292
},
{
"epoch": 2.74,
"learning_rate": 1.5049043690892727e-07,
"loss": 0.2519,
"step": 1293
},
{
"epoch": 2.74,
"learning_rate": 1.4800823335755676e-07,
"loss": 0.2463,
"step": 1294
},
{
"epoch": 2.75,
"learning_rate": 1.4554628534363266e-07,
"loss": 0.2411,
"step": 1295
},
{
"epoch": 2.75,
"learning_rate": 1.4310460581320505e-07,
"loss": 0.2541,
"step": 1296
},
{
"epoch": 2.75,
"learning_rate": 1.4068320760574516e-07,
"loss": 0.256,
"step": 1297
},
{
"epoch": 2.75,
"learning_rate": 1.3828210345407265e-07,
"loss": 0.2276,
"step": 1298
},
{
"epoch": 2.75,
"learning_rate": 1.359013059842935e-07,
"loss": 0.3029,
"step": 1299
},
{
"epoch": 2.76,
"learning_rate": 1.3354082771573238e-07,
"loss": 0.2583,
"step": 1300
},
{
"epoch": 2.76,
"learning_rate": 1.3120068106086392e-07,
"loss": 0.3016,
"step": 1301
},
{
"epoch": 2.76,
"learning_rate": 1.2888087832525264e-07,
"loss": 0.2739,
"step": 1302
},
{
"epoch": 2.76,
"learning_rate": 1.265814317074838e-07,
"loss": 0.2576,
"step": 1303
},
{
"epoch": 2.77,
"learning_rate": 1.2430235329910076e-07,
"loss": 0.2464,
"step": 1304
},
{
"epoch": 2.77,
"learning_rate": 1.2204365508454185e-07,
"loss": 0.238,
"step": 1305
},
{
"epoch": 2.77,
"learning_rate": 1.1980534894107774e-07,
"loss": 0.2292,
"step": 1306
},
{
"epoch": 2.77,
"learning_rate": 1.1758744663874765e-07,
"loss": 0.3191,
"step": 1307
},
{
"epoch": 2.77,
"learning_rate": 1.1538995984029653e-07,
"loss": 0.2299,
"step": 1308
},
{
"epoch": 2.78,
"learning_rate": 1.1321290010111884e-07,
"loss": 0.2536,
"step": 1309
},
{
"epoch": 2.78,
"learning_rate": 1.110562788691909e-07,
"loss": 0.2465,
"step": 1310
},
{
"epoch": 2.78,
"learning_rate": 1.0892010748501413e-07,
"loss": 0.3041,
"step": 1311
},
{
"epoch": 2.78,
"learning_rate": 1.0680439718155865e-07,
"loss": 0.213,
"step": 1312
},
{
"epoch": 2.78,
"learning_rate": 1.0470915908419708e-07,
"loss": 0.2786,
"step": 1313
},
{
"epoch": 2.79,
"learning_rate": 1.0263440421065128e-07,
"loss": 0.1773,
"step": 1314
},
{
"epoch": 2.79,
"learning_rate": 1.0058014347093324e-07,
"loss": 0.2504,
"step": 1315
},
{
"epoch": 2.79,
"learning_rate": 9.85463876672874e-08,
"loss": 0.2554,
"step": 1316
},
{
"epoch": 2.79,
"learning_rate": 9.653314749413244e-08,
"loss": 0.2249,
"step": 1317
},
{
"epoch": 2.79,
"learning_rate": 9.4540433538008e-08,
"loss": 0.2648,
"step": 1318
},
{
"epoch": 2.8,
"learning_rate": 9.256825627751785e-08,
"loss": 0.268,
"step": 1319
},
{
"epoch": 2.8,
"learning_rate": 9.061662608327214e-08,
"loss": 0.2306,
"step": 1320
},
{
"epoch": 2.8,
"learning_rate": 8.868555321783765e-08,
"loss": 0.2583,
"step": 1321
},
{
"epoch": 2.8,
"learning_rate": 8.677504783568013e-08,
"loss": 0.2736,
"step": 1322
},
{
"epoch": 2.81,
"learning_rate": 8.488511998311132e-08,
"loss": 0.225,
"step": 1323
},
{
"epoch": 2.81,
"learning_rate": 8.301577959823802e-08,
"loss": 0.292,
"step": 1324
},
{
"epoch": 2.81,
"learning_rate": 8.116703651090916e-08,
"loss": 0.3001,
"step": 1325
},
{
"epoch": 2.81,
"learning_rate": 7.933890044266167e-08,
"loss": 0.1987,
"step": 1326
},
{
"epoch": 2.81,
"learning_rate": 7.753138100667334e-08,
"loss": 0.2309,
"step": 1327
},
{
"epoch": 2.82,
"learning_rate": 7.574448770771003e-08,
"loss": 0.2266,
"step": 1328
},
{
"epoch": 2.82,
"learning_rate": 7.39782299420737e-08,
"loss": 0.208,
"step": 1329
},
{
"epoch": 2.82,
"learning_rate": 7.2232616997558e-08,
"loss": 0.2336,
"step": 1330
},
{
"epoch": 2.82,
"learning_rate": 7.050765805339542e-08,
"loss": 0.2604,
"step": 1331
},
{
"epoch": 2.82,
"learning_rate": 6.880336218020977e-08,
"loss": 0.2501,
"step": 1332
},
{
"epoch": 2.83,
"learning_rate": 6.711973833997087e-08,
"loss": 0.255,
"step": 1333
},
{
"epoch": 2.83,
"learning_rate": 6.545679538594262e-08,
"loss": 0.2789,
"step": 1334
},
{
"epoch": 2.83,
"learning_rate": 6.38145420626417e-08,
"loss": 0.2394,
"step": 1335
},
{
"epoch": 2.83,
"learning_rate": 6.219298700578824e-08,
"loss": 0.2728,
"step": 1336
},
{
"epoch": 2.84,
"learning_rate": 6.059213874226188e-08,
"loss": 0.2935,
"step": 1337
},
{
"epoch": 2.84,
"learning_rate": 5.9012005690056484e-08,
"loss": 0.2304,
"step": 1338
},
{
"epoch": 2.84,
"learning_rate": 5.7452596158234836e-08,
"loss": 0.2665,
"step": 1339
},
{
"epoch": 2.84,
"learning_rate": 5.591391834688686e-08,
"loss": 0.3066,
"step": 1340
},
{
"epoch": 2.84,
"learning_rate": 5.439598034708614e-08,
"loss": 0.294,
"step": 1341
},
{
"epoch": 2.85,
"learning_rate": 5.289879014084464e-08,
"loss": 0.2261,
"step": 1342
},
{
"epoch": 2.85,
"learning_rate": 5.142235560107622e-08,
"loss": 0.3047,
"step": 1343
},
{
"epoch": 2.85,
"learning_rate": 4.9966684491548286e-08,
"loss": 0.2357,
"step": 1344
},
{
"epoch": 2.85,
"learning_rate": 4.853178446684758e-08,
"loss": 0.2664,
"step": 1345
},
{
"epoch": 2.85,
"learning_rate": 4.711766307233578e-08,
"loss": 0.221,
"step": 1346
},
{
"epoch": 2.86,
"learning_rate": 4.572432774411039e-08,
"loss": 0.235,
"step": 1347
},
{
"epoch": 2.86,
"learning_rate": 4.435178580896748e-08,
"loss": 0.2497,
"step": 1348
},
{
"epoch": 2.86,
"learning_rate": 4.300004448436123e-08,
"loss": 0.2267,
"step": 1349
},
{
"epoch": 2.86,
"learning_rate": 4.1669110878366664e-08,
"loss": 0.2585,
"step": 1350
},
{
"epoch": 2.86,
"learning_rate": 4.0358991989642324e-08,
"loss": 0.2499,
"step": 1351
},
{
"epoch": 2.87,
"learning_rate": 3.9069694707393855e-08,
"loss": 0.2798,
"step": 1352
},
{
"epoch": 2.87,
"learning_rate": 3.7801225811336713e-08,
"loss": 0.2736,
"step": 1353
},
{
"epoch": 2.87,
"learning_rate": 3.655359197166197e-08,
"loss": 0.2375,
"step": 1354
},
{
"epoch": 2.87,
"learning_rate": 3.532679974899943e-08,
"loss": 0.2507,
"step": 1355
},
{
"epoch": 2.88,
"learning_rate": 3.412085559438527e-08,
"loss": 0.2787,
"step": 1356
},
{
"epoch": 2.88,
"learning_rate": 3.293576584922508e-08,
"loss": 0.2832,
"step": 1357
},
{
"epoch": 2.88,
"learning_rate": 3.177153674526556e-08,
"loss": 0.2679,
"step": 1358
},
{
"epoch": 2.88,
"learning_rate": 3.062817440455534e-08,
"loss": 0.272,
"step": 1359
},
{
"epoch": 2.88,
"learning_rate": 2.9505684839417067e-08,
"loss": 0.2807,
"step": 1360
},
{
"epoch": 2.89,
"learning_rate": 2.8404073952416284e-08,
"loss": 0.3096,
"step": 1361
},
{
"epoch": 2.89,
"learning_rate": 2.73233475363277e-08,
"loss": 0.2709,
"step": 1362
},
{
"epoch": 2.89,
"learning_rate": 2.6263511274104976e-08,
"loss": 0.2428,
"step": 1363
},
{
"epoch": 2.89,
"learning_rate": 2.5224570738853202e-08,
"loss": 0.2544,
"step": 1364
},
{
"epoch": 2.89,
"learning_rate": 2.4206531393798246e-08,
"loss": 0.3054,
"step": 1365
},
{
"epoch": 2.9,
"learning_rate": 2.3209398592256125e-08,
"loss": 0.2663,
"step": 1366
},
{
"epoch": 2.9,
"learning_rate": 2.2233177577609007e-08,
"loss": 0.2688,
"step": 1367
},
{
"epoch": 2.9,
"learning_rate": 2.1277873483274145e-08,
"loss": 0.2266,
"step": 1368
},
{
"epoch": 2.9,
"learning_rate": 2.0343491332677653e-08,
"loss": 0.2298,
"step": 1369
},
{
"epoch": 2.91,
"learning_rate": 1.9430036039229213e-08,
"loss": 0.2277,
"step": 1370
},
{
"epoch": 2.91,
"learning_rate": 1.853751240629542e-08,
"loss": 0.2375,
"step": 1371
},
{
"epoch": 2.91,
"learning_rate": 1.7665925127173576e-08,
"loss": 0.2523,
"step": 1372
},
{
"epoch": 2.91,
"learning_rate": 1.681527878506994e-08,
"loss": 0.2578,
"step": 1373
},
{
"epoch": 2.91,
"learning_rate": 1.598557785307264e-08,
"loss": 0.2307,
"step": 1374
},
{
"epoch": 2.92,
"learning_rate": 1.5176826694128563e-08,
"loss": 0.2681,
"step": 1375
},
{
"epoch": 2.92,
"learning_rate": 1.4389029561022059e-08,
"loss": 0.2968,
"step": 1376
},
{
"epoch": 2.92,
"learning_rate": 1.3622190596351835e-08,
"loss": 0.218,
"step": 1377
},
{
"epoch": 2.92,
"learning_rate": 1.2876313832508312e-08,
"loss": 0.2747,
"step": 1378
},
{
"epoch": 2.92,
"learning_rate": 1.2151403191652754e-08,
"loss": 0.2337,
"step": 1379
},
{
"epoch": 2.93,
"learning_rate": 1.1447462485697723e-08,
"loss": 0.2258,
"step": 1380
},
{
"epoch": 2.93,
"learning_rate": 1.0764495416286212e-08,
"loss": 0.238,
"step": 1381
},
{
"epoch": 2.93,
"learning_rate": 1.0102505574771659e-08,
"loss": 0.2424,
"step": 1382
},
{
"epoch": 2.93,
"learning_rate": 9.461496442199735e-09,
"loss": 0.2527,
"step": 1383
},
{
"epoch": 2.93,
"learning_rate": 8.84147138929059e-09,
"loss": 0.2335,
"step": 1384
},
{
"epoch": 2.94,
"learning_rate": 8.24243367642019e-09,
"loss": 0.2668,
"step": 1385
},
{
"epoch": 2.94,
"learning_rate": 7.664386453602567e-09,
"loss": 0.2323,
"step": 1386
},
{
"epoch": 2.94,
"learning_rate": 7.107332760475149e-09,
"loss": 0.3155,
"step": 1387
},
{
"epoch": 2.94,
"learning_rate": 6.57127552628145e-09,
"loss": 0.2288,
"step": 1388
},
{
"epoch": 2.95,
"learning_rate": 6.056217569855971e-09,
"loss": 0.2449,
"step": 1389
},
{
"epoch": 2.95,
"learning_rate": 5.5621615996099825e-09,
"loss": 0.2525,
"step": 1390
},
{
"epoch": 2.95,
"learning_rate": 5.089110213515102e-09,
"loss": 0.2184,
"step": 1391
},
{
"epoch": 2.95,
"learning_rate": 4.637065899092629e-09,
"loss": 0.2384,
"step": 1392
},
{
"epoch": 2.95,
"learning_rate": 4.206031033399338e-09,
"loss": 0.1903,
"step": 1393
},
{
"epoch": 2.96,
"learning_rate": 3.7960078830132635e-09,
"loss": 0.2389,
"step": 1394
},
{
"epoch": 2.96,
"learning_rate": 3.406998604023936e-09,
"loss": 0.2197,
"step": 1395
},
{
"epoch": 2.96,
"learning_rate": 3.0390052420208313e-09,
"loss": 0.2488,
"step": 1396
},
{
"epoch": 2.96,
"learning_rate": 2.69202973208138e-09,
"loss": 0.3215,
"step": 1397
},
{
"epoch": 2.96,
"learning_rate": 2.3660738987616447e-09,
"loss": 0.2713,
"step": 1398
},
{
"epoch": 2.97,
"learning_rate": 2.061139456087435e-09,
"loss": 0.2686,
"step": 1399
},
{
"epoch": 2.97,
"learning_rate": 1.7772280075427637e-09,
"loss": 0.2983,
"step": 1400
},
{
"epoch": 2.97,
"learning_rate": 1.5143410460645157e-09,
"loss": 0.2746,
"step": 1401
},
{
"epoch": 2.97,
"learning_rate": 1.2724799540331233e-09,
"loss": 0.2684,
"step": 1402
},
{
"epoch": 2.98,
"learning_rate": 1.0516460032641283e-09,
"loss": 0.2348,
"step": 1403
},
{
"epoch": 2.98,
"learning_rate": 8.518403550046294e-10,
"loss": 0.2285,
"step": 1404
},
{
"epoch": 2.98,
"learning_rate": 6.730640599239557e-10,
"loss": 0.2542,
"step": 1405
},
{
"epoch": 2.98,
"learning_rate": 5.153180581092265e-10,
"loss": 0.2855,
"step": 1406
},
{
"epoch": 2.98,
"learning_rate": 3.7860317906224236e-10,
"loss": 0.2713,
"step": 1407
},
{
"epoch": 2.99,
"learning_rate": 2.629201416910476e-10,
"loss": 0.2403,
"step": 1408
},
{
"epoch": 2.99,
"learning_rate": 1.6826955431126221e-10,
"loss": 0.3237,
"step": 1409
},
{
"epoch": 2.99,
"learning_rate": 9.465191463853273e-11,
"loss": 0.2607,
"step": 1410
},
{
"epoch": 2.99,
"learning_rate": 4.206760978853197e-11,
"loss": 0.1945,
"step": 1411
},
{
"epoch": 2.99,
"learning_rate": 1.0516916272962362e-11,
"loss": 0.2344,
"step": 1412
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.2814,
"step": 1413
},
{
"epoch": 3.0,
"step": 1413,
"total_flos": 5.437824739505494e+21,
"train_loss": 0.38383744233100026,
"train_runtime": 18726.0235,
"train_samples_per_second": 9.443,
"train_steps_per_second": 0.075
}
],
"logging_steps": 1.0,
"max_steps": 1413,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 5.437824739505494e+21,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}