llava-med-40 / trainer_state.json
taindp98's picture
Add files using upload-large-folder tool
daba8cf verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.991549295774648,
"global_step": 531,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.25e-06,
"loss": 1.1094,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 2.5e-06,
"loss": 1.1226,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.995,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 5e-06,
"loss": 0.9446,
"step": 4
},
{
"epoch": 0.03,
"learning_rate": 6.25e-06,
"loss": 0.9452,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 7.500000000000001e-06,
"loss": 0.9096,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 8.750000000000001e-06,
"loss": 0.931,
"step": 7
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 0.8846,
"step": 8
},
{
"epoch": 0.05,
"learning_rate": 1.125e-05,
"loss": 0.8401,
"step": 9
},
{
"epoch": 0.06,
"learning_rate": 1.25e-05,
"loss": 0.8941,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 1.375e-05,
"loss": 0.8645,
"step": 11
},
{
"epoch": 0.07,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.8188,
"step": 12
},
{
"epoch": 0.07,
"learning_rate": 1.6250000000000002e-05,
"loss": 0.8554,
"step": 13
},
{
"epoch": 0.08,
"learning_rate": 1.7500000000000002e-05,
"loss": 0.8108,
"step": 14
},
{
"epoch": 0.08,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.8178,
"step": 15
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.82,
"step": 16
},
{
"epoch": 0.1,
"learning_rate": 1.9999813939602312e-05,
"loss": 0.7755,
"step": 17
},
{
"epoch": 0.1,
"learning_rate": 1.9999255765332947e-05,
"loss": 0.813,
"step": 18
},
{
"epoch": 0.11,
"learning_rate": 1.9998325497962724e-05,
"loss": 0.8127,
"step": 19
},
{
"epoch": 0.11,
"learning_rate": 1.999702317210883e-05,
"loss": 0.8011,
"step": 20
},
{
"epoch": 0.12,
"learning_rate": 1.9995348836233517e-05,
"loss": 0.8315,
"step": 21
},
{
"epoch": 0.12,
"learning_rate": 1.9993302552642306e-05,
"loss": 0.8075,
"step": 22
},
{
"epoch": 0.13,
"learning_rate": 1.9990884397481664e-05,
"loss": 0.7879,
"step": 23
},
{
"epoch": 0.14,
"learning_rate": 1.9988094460736175e-05,
"loss": 0.8185,
"step": 24
},
{
"epoch": 0.14,
"learning_rate": 1.998493284622518e-05,
"loss": 0.818,
"step": 25
},
{
"epoch": 0.15,
"learning_rate": 1.998139967159894e-05,
"loss": 0.7796,
"step": 26
},
{
"epoch": 0.15,
"learning_rate": 1.9977495068334223e-05,
"loss": 0.7653,
"step": 27
},
{
"epoch": 0.16,
"learning_rate": 1.997321918172944e-05,
"loss": 0.7758,
"step": 28
},
{
"epoch": 0.16,
"learning_rate": 1.996857217089922e-05,
"loss": 0.7504,
"step": 29
},
{
"epoch": 0.17,
"learning_rate": 1.9963554208768502e-05,
"loss": 0.7599,
"step": 30
},
{
"epoch": 0.17,
"learning_rate": 1.9958165482066094e-05,
"loss": 0.7667,
"step": 31
},
{
"epoch": 0.18,
"learning_rate": 1.9952406191317718e-05,
"loss": 0.7626,
"step": 32
},
{
"epoch": 0.19,
"learning_rate": 1.994627655083856e-05,
"loss": 0.7985,
"step": 33
},
{
"epoch": 0.19,
"learning_rate": 1.9939776788725296e-05,
"loss": 0.7512,
"step": 34
},
{
"epoch": 0.2,
"learning_rate": 1.993290714684758e-05,
"loss": 0.7604,
"step": 35
},
{
"epoch": 0.2,
"learning_rate": 1.992566788083908e-05,
"loss": 0.7225,
"step": 36
},
{
"epoch": 0.21,
"learning_rate": 1.9918059260087937e-05,
"loss": 0.7572,
"step": 37
},
{
"epoch": 0.21,
"learning_rate": 1.9910081567726746e-05,
"loss": 0.7767,
"step": 38
},
{
"epoch": 0.22,
"learning_rate": 1.9901735100622038e-05,
"loss": 0.7256,
"step": 39
},
{
"epoch": 0.23,
"learning_rate": 1.9893020169363203e-05,
"loss": 0.7386,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 1.9883937098250962e-05,
"loss": 0.7766,
"step": 41
},
{
"epoch": 0.24,
"learning_rate": 1.9874486225285278e-05,
"loss": 0.7665,
"step": 42
},
{
"epoch": 0.24,
"learning_rate": 1.986466790215279e-05,
"loss": 0.7294,
"step": 43
},
{
"epoch": 0.25,
"learning_rate": 1.985448249421371e-05,
"loss": 0.7798,
"step": 44
},
{
"epoch": 0.25,
"learning_rate": 1.9843930380488257e-05,
"loss": 0.7859,
"step": 45
},
{
"epoch": 0.26,
"learning_rate": 1.9833011953642525e-05,
"loss": 0.7533,
"step": 46
},
{
"epoch": 0.26,
"learning_rate": 1.9821727619973884e-05,
"loss": 0.7672,
"step": 47
},
{
"epoch": 0.27,
"learning_rate": 1.9810077799395847e-05,
"loss": 0.7952,
"step": 48
},
{
"epoch": 0.28,
"learning_rate": 1.9798062925422474e-05,
"loss": 0.7117,
"step": 49
},
{
"epoch": 0.28,
"learning_rate": 1.9785683445152205e-05,
"loss": 0.7319,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 1.9772939819251247e-05,
"loss": 0.7491,
"step": 51
},
{
"epoch": 0.29,
"learning_rate": 1.9759832521936424e-05,
"loss": 0.7559,
"step": 52
},
{
"epoch": 0.3,
"learning_rate": 1.974636204095752e-05,
"loss": 0.714,
"step": 53
},
{
"epoch": 0.3,
"learning_rate": 1.9732528877579145e-05,
"loss": 0.7416,
"step": 54
},
{
"epoch": 0.31,
"learning_rate": 1.971833354656208e-05,
"loss": 0.7311,
"step": 55
},
{
"epoch": 0.32,
"learning_rate": 1.9703776576144106e-05,
"loss": 0.7392,
"step": 56
},
{
"epoch": 0.32,
"learning_rate": 1.968885850802037e-05,
"loss": 0.7321,
"step": 57
},
{
"epoch": 0.33,
"learning_rate": 1.9673579897323204e-05,
"loss": 0.7592,
"step": 58
},
{
"epoch": 0.33,
"learning_rate": 1.9657941312601486e-05,
"loss": 0.7302,
"step": 59
},
{
"epoch": 0.34,
"learning_rate": 1.964194333579948e-05,
"loss": 0.7368,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 1.962558656223516e-05,
"loss": 0.7504,
"step": 61
},
{
"epoch": 0.35,
"learning_rate": 1.9608871600578095e-05,
"loss": 0.7226,
"step": 62
},
{
"epoch": 0.35,
"learning_rate": 1.9591799072826766e-05,
"loss": 0.6994,
"step": 63
},
{
"epoch": 0.36,
"learning_rate": 1.9574369614285426e-05,
"loss": 0.7191,
"step": 64
},
{
"epoch": 0.37,
"learning_rate": 1.9556583873540483e-05,
"loss": 0.7758,
"step": 65
},
{
"epoch": 0.37,
"learning_rate": 1.953844251243633e-05,
"loss": 0.7308,
"step": 66
},
{
"epoch": 0.38,
"learning_rate": 1.9519946206050737e-05,
"loss": 0.7293,
"step": 67
},
{
"epoch": 0.38,
"learning_rate": 1.9501095642669737e-05,
"loss": 0.7096,
"step": 68
},
{
"epoch": 0.39,
"learning_rate": 1.9481891523761985e-05,
"loss": 0.7234,
"step": 69
},
{
"epoch": 0.39,
"learning_rate": 1.946233456395269e-05,
"loss": 0.7762,
"step": 70
},
{
"epoch": 0.4,
"learning_rate": 1.9442425490996987e-05,
"loss": 0.7034,
"step": 71
},
{
"epoch": 0.41,
"learning_rate": 1.942216504575289e-05,
"loss": 0.7576,
"step": 72
},
{
"epoch": 0.41,
"learning_rate": 1.940155398215369e-05,
"loss": 0.7312,
"step": 73
},
{
"epoch": 0.42,
"learning_rate": 1.9380593067179934e-05,
"loss": 0.6968,
"step": 74
},
{
"epoch": 0.42,
"learning_rate": 1.9359283080830856e-05,
"loss": 0.678,
"step": 75
},
{
"epoch": 0.43,
"learning_rate": 1.933762481609536e-05,
"loss": 0.7154,
"step": 76
},
{
"epoch": 0.43,
"learning_rate": 1.9315619078922512e-05,
"loss": 0.7206,
"step": 77
},
{
"epoch": 0.44,
"learning_rate": 1.929326668819156e-05,
"loss": 0.7176,
"step": 78
},
{
"epoch": 0.45,
"learning_rate": 1.9270568475681442e-05,
"loss": 0.7361,
"step": 79
},
{
"epoch": 0.45,
"learning_rate": 1.9247525286039855e-05,
"loss": 0.7342,
"step": 80
},
{
"epoch": 0.46,
"learning_rate": 1.9224137976751797e-05,
"loss": 0.7231,
"step": 81
},
{
"epoch": 0.46,
"learning_rate": 1.920040741810768e-05,
"loss": 0.7328,
"step": 82
},
{
"epoch": 0.47,
"learning_rate": 1.917633449317095e-05,
"loss": 0.6792,
"step": 83
},
{
"epoch": 0.47,
"learning_rate": 1.915192009774519e-05,
"loss": 0.7461,
"step": 84
},
{
"epoch": 0.48,
"learning_rate": 1.9127165140340837e-05,
"loss": 0.7143,
"step": 85
},
{
"epoch": 0.48,
"learning_rate": 1.910207054214133e-05,
"loss": 0.7507,
"step": 86
},
{
"epoch": 0.49,
"learning_rate": 1.9076637236968852e-05,
"loss": 0.6742,
"step": 87
},
{
"epoch": 0.5,
"learning_rate": 1.905086617124958e-05,
"loss": 0.736,
"step": 88
},
{
"epoch": 0.5,
"learning_rate": 1.9024758303978457e-05,
"loss": 0.757,
"step": 89
},
{
"epoch": 0.51,
"learning_rate": 1.8998314606683522e-05,
"loss": 0.7392,
"step": 90
},
{
"epoch": 0.51,
"learning_rate": 1.8971536063389745e-05,
"loss": 0.7028,
"step": 91
},
{
"epoch": 0.52,
"learning_rate": 1.89444236705824e-05,
"loss": 0.7357,
"step": 92
},
{
"epoch": 0.52,
"learning_rate": 1.8916978437170006e-05,
"loss": 0.7076,
"step": 93
},
{
"epoch": 0.53,
"learning_rate": 1.888920138444678e-05,
"loss": 0.7689,
"step": 94
},
{
"epoch": 0.54,
"learning_rate": 1.8861093546054605e-05,
"loss": 0.7018,
"step": 95
},
{
"epoch": 0.54,
"learning_rate": 1.8832655967944607e-05,
"loss": 0.6787,
"step": 96
},
{
"epoch": 0.55,
"learning_rate": 1.8803889708338205e-05,
"loss": 0.7327,
"step": 97
},
{
"epoch": 0.55,
"learning_rate": 1.877479583768774e-05,
"loss": 0.7318,
"step": 98
},
{
"epoch": 0.56,
"learning_rate": 1.8745375438636632e-05,
"loss": 0.7109,
"step": 99
},
{
"epoch": 0.56,
"learning_rate": 1.871562960597912e-05,
"loss": 0.7287,
"step": 100
},
{
"epoch": 0.57,
"learning_rate": 1.868555944661949e-05,
"loss": 0.7207,
"step": 101
},
{
"epoch": 0.57,
"learning_rate": 1.8655166079530906e-05,
"loss": 0.6855,
"step": 102
},
{
"epoch": 0.58,
"learning_rate": 1.862445063571376e-05,
"loss": 0.6921,
"step": 103
},
{
"epoch": 0.59,
"learning_rate": 1.8593414258153588e-05,
"loss": 0.7363,
"step": 104
},
{
"epoch": 0.59,
"learning_rate": 1.856205810177855e-05,
"loss": 0.7143,
"step": 105
},
{
"epoch": 0.6,
"learning_rate": 1.853038333341642e-05,
"loss": 0.7234,
"step": 106
},
{
"epoch": 0.6,
"learning_rate": 1.84983911317512e-05,
"loss": 0.6807,
"step": 107
},
{
"epoch": 0.61,
"learning_rate": 1.8466082687279247e-05,
"loss": 0.7104,
"step": 108
},
{
"epoch": 0.61,
"learning_rate": 1.8433459202264963e-05,
"loss": 0.7297,
"step": 109
},
{
"epoch": 0.62,
"learning_rate": 1.8400521890696068e-05,
"loss": 0.7574,
"step": 110
},
{
"epoch": 0.63,
"learning_rate": 1.8367271978238422e-05,
"loss": 0.666,
"step": 111
},
{
"epoch": 0.63,
"learning_rate": 1.833371070219041e-05,
"loss": 0.7313,
"step": 112
},
{
"epoch": 0.64,
"learning_rate": 1.8299839311436905e-05,
"loss": 0.7253,
"step": 113
},
{
"epoch": 0.64,
"learning_rate": 1.8265659066402794e-05,
"loss": 0.7251,
"step": 114
},
{
"epoch": 0.65,
"learning_rate": 1.8231171239006077e-05,
"loss": 0.714,
"step": 115
},
{
"epoch": 0.65,
"learning_rate": 1.8196377112610524e-05,
"loss": 0.7023,
"step": 116
},
{
"epoch": 0.66,
"learning_rate": 1.8161277981977942e-05,
"loss": 0.6867,
"step": 117
},
{
"epoch": 0.66,
"learning_rate": 1.8125875153219966e-05,
"loss": 0.7594,
"step": 118
},
{
"epoch": 0.67,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.6866,
"step": 119
},
{
"epoch": 0.68,
"learning_rate": 1.8054163682231567e-05,
"loss": 0.7443,
"step": 120
},
{
"epoch": 0.68,
"learning_rate": 1.8017857708534107e-05,
"loss": 0.7269,
"step": 121
},
{
"epoch": 0.69,
"learning_rate": 1.798125337367788e-05,
"loss": 0.7142,
"step": 122
},
{
"epoch": 0.69,
"learning_rate": 1.79443520397863e-05,
"loss": 0.6999,
"step": 123
},
{
"epoch": 0.7,
"learning_rate": 1.790715508003474e-05,
"loss": 0.7005,
"step": 124
},
{
"epoch": 0.7,
"learning_rate": 1.786966387859943e-05,
"loss": 0.6898,
"step": 125
},
{
"epoch": 0.71,
"learning_rate": 1.783187983060594e-05,
"loss": 0.7002,
"step": 126
},
{
"epoch": 0.72,
"learning_rate": 1.7793804342077258e-05,
"loss": 0.6982,
"step": 127
},
{
"epoch": 0.72,
"learning_rate": 1.7755438829881503e-05,
"loss": 0.6991,
"step": 128
},
{
"epoch": 0.73,
"learning_rate": 1.771678472167916e-05,
"loss": 0.7005,
"step": 129
},
{
"epoch": 0.73,
"learning_rate": 1.7677843455869984e-05,
"loss": 0.6961,
"step": 130
},
{
"epoch": 0.74,
"learning_rate": 1.763861648153945e-05,
"loss": 0.7546,
"step": 131
},
{
"epoch": 0.74,
"learning_rate": 1.759910525840485e-05,
"loss": 0.7155,
"step": 132
},
{
"epoch": 0.75,
"learning_rate": 1.7559311256760958e-05,
"loss": 0.7057,
"step": 133
},
{
"epoch": 0.75,
"learning_rate": 1.7519235957425334e-05,
"loss": 0.6821,
"step": 134
},
{
"epoch": 0.76,
"learning_rate": 1.74788808516832e-05,
"loss": 0.6797,
"step": 135
},
{
"epoch": 0.77,
"learning_rate": 1.743824744123196e-05,
"loss": 0.745,
"step": 136
},
{
"epoch": 0.77,
"learning_rate": 1.739733723812532e-05,
"loss": 0.703,
"step": 137
},
{
"epoch": 0.78,
"learning_rate": 1.7356151764717012e-05,
"loss": 0.6906,
"step": 138
},
{
"epoch": 0.78,
"learning_rate": 1.7314692553604143e-05,
"loss": 0.6708,
"step": 139
},
{
"epoch": 0.79,
"learning_rate": 1.7272961147570177e-05,
"loss": 0.6731,
"step": 140
},
{
"epoch": 0.79,
"learning_rate": 1.7230959099527512e-05,
"loss": 0.7075,
"step": 141
},
{
"epoch": 0.8,
"learning_rate": 1.7188687972459707e-05,
"loss": 0.7039,
"step": 142
},
{
"epoch": 0.81,
"learning_rate": 1.71461493393633e-05,
"loss": 0.7294,
"step": 143
},
{
"epoch": 0.81,
"learning_rate": 1.7103344783189292e-05,
"loss": 0.73,
"step": 144
},
{
"epoch": 0.82,
"learning_rate": 1.7060275896784225e-05,
"loss": 0.7184,
"step": 145
},
{
"epoch": 0.82,
"learning_rate": 1.7016944282830935e-05,
"loss": 0.6974,
"step": 146
},
{
"epoch": 0.83,
"learning_rate": 1.697335155378888e-05,
"loss": 0.6929,
"step": 147
},
{
"epoch": 0.83,
"learning_rate": 1.692949933183416e-05,
"loss": 0.7045,
"step": 148
},
{
"epoch": 0.84,
"learning_rate": 1.6885389248799153e-05,
"loss": 0.6938,
"step": 149
},
{
"epoch": 0.85,
"learning_rate": 1.6841022946111774e-05,
"loss": 0.7276,
"step": 150
},
{
"epoch": 0.85,
"learning_rate": 1.6796402074734404e-05,
"loss": 0.6981,
"step": 151
},
{
"epoch": 0.86,
"learning_rate": 1.675152829510246e-05,
"loss": 0.7262,
"step": 152
},
{
"epoch": 0.86,
"learning_rate": 1.67064032770626e-05,
"loss": 0.6724,
"step": 153
},
{
"epoch": 0.87,
"learning_rate": 1.666102869981059e-05,
"loss": 0.7055,
"step": 154
},
{
"epoch": 0.87,
"learning_rate": 1.6615406251828794e-05,
"loss": 0.6914,
"step": 155
},
{
"epoch": 0.88,
"learning_rate": 1.6569537630823385e-05,
"loss": 0.667,
"step": 156
},
{
"epoch": 0.88,
"learning_rate": 1.652342454366113e-05,
"loss": 0.736,
"step": 157
},
{
"epoch": 0.89,
"learning_rate": 1.64770687063059e-05,
"loss": 0.7076,
"step": 158
},
{
"epoch": 0.9,
"learning_rate": 1.6430471843754806e-05,
"loss": 0.6791,
"step": 159
},
{
"epoch": 0.9,
"learning_rate": 1.6383635689973997e-05,
"loss": 0.6903,
"step": 160
},
{
"epoch": 0.91,
"learning_rate": 1.6336561987834155e-05,
"loss": 0.6791,
"step": 161
},
{
"epoch": 0.91,
"learning_rate": 1.6289252489045625e-05,
"loss": 0.6617,
"step": 162
},
{
"epoch": 0.92,
"learning_rate": 1.6241708954093242e-05,
"loss": 0.6653,
"step": 163
},
{
"epoch": 0.92,
"learning_rate": 1.6193933152170812e-05,
"loss": 0.6855,
"step": 164
},
{
"epoch": 0.93,
"learning_rate": 1.614592686111527e-05,
"loss": 0.7106,
"step": 165
},
{
"epoch": 0.94,
"learning_rate": 1.6097691867340547e-05,
"loss": 0.6551,
"step": 166
},
{
"epoch": 0.94,
"learning_rate": 1.6049229965771054e-05,
"loss": 0.7079,
"step": 167
},
{
"epoch": 0.95,
"learning_rate": 1.600054295977494e-05,
"loss": 0.7083,
"step": 168
},
{
"epoch": 0.95,
"learning_rate": 1.5951632661096932e-05,
"loss": 0.6969,
"step": 169
},
{
"epoch": 0.96,
"learning_rate": 1.590250088979097e-05,
"loss": 0.6657,
"step": 170
},
{
"epoch": 0.96,
"learning_rate": 1.585314947415242e-05,
"loss": 0.6789,
"step": 171
},
{
"epoch": 0.97,
"learning_rate": 1.5803580250650098e-05,
"loss": 0.6983,
"step": 172
},
{
"epoch": 0.97,
"learning_rate": 1.5753795063857886e-05,
"loss": 0.651,
"step": 173
},
{
"epoch": 0.98,
"learning_rate": 1.5703795766386114e-05,
"loss": 0.6485,
"step": 174
},
{
"epoch": 0.99,
"learning_rate": 1.565358421881262e-05,
"loss": 0.742,
"step": 175
},
{
"epoch": 0.99,
"learning_rate": 1.5603162289613503e-05,
"loss": 0.6903,
"step": 176
},
{
"epoch": 1.0,
"learning_rate": 1.55525318550936e-05,
"loss": 0.6678,
"step": 177
},
{
"epoch": 1.0,
"learning_rate": 1.5501694799316672e-05,
"loss": 0.5696,
"step": 178
},
{
"epoch": 1.01,
"learning_rate": 1.5450653014035288e-05,
"loss": 0.5072,
"step": 179
},
{
"epoch": 1.01,
"learning_rate": 1.539940839862041e-05,
"loss": 0.5298,
"step": 180
},
{
"epoch": 1.02,
"learning_rate": 1.5347962859990744e-05,
"loss": 0.4914,
"step": 181
},
{
"epoch": 1.03,
"learning_rate": 1.5296318312541768e-05,
"loss": 0.4611,
"step": 182
},
{
"epoch": 1.03,
"learning_rate": 1.5244476678074496e-05,
"loss": 0.4769,
"step": 183
},
{
"epoch": 1.04,
"learning_rate": 1.5192439885723942e-05,
"loss": 0.4666,
"step": 184
},
{
"epoch": 1.04,
"learning_rate": 1.514020987188737e-05,
"loss": 0.4796,
"step": 185
},
{
"epoch": 1.05,
"learning_rate": 1.5087788580152207e-05,
"loss": 0.4725,
"step": 186
},
{
"epoch": 1.05,
"learning_rate": 1.5035177961223727e-05,
"loss": 0.4876,
"step": 187
},
{
"epoch": 1.06,
"learning_rate": 1.4982379972852471e-05,
"loss": 0.5065,
"step": 188
},
{
"epoch": 1.06,
"learning_rate": 1.4929396579761378e-05,
"loss": 0.4662,
"step": 189
},
{
"epoch": 1.07,
"learning_rate": 1.4876229753572688e-05,
"loss": 0.4547,
"step": 190
},
{
"epoch": 1.08,
"learning_rate": 1.4822881472734563e-05,
"loss": 0.4817,
"step": 191
},
{
"epoch": 1.08,
"learning_rate": 1.4769353722447477e-05,
"loss": 0.4428,
"step": 192
},
{
"epoch": 1.09,
"learning_rate": 1.4715648494590327e-05,
"loss": 0.4835,
"step": 193
},
{
"epoch": 1.09,
"learning_rate": 1.4661767787646328e-05,
"loss": 0.4737,
"step": 194
},
{
"epoch": 1.1,
"learning_rate": 1.4607713606628627e-05,
"loss": 0.4999,
"step": 195
},
{
"epoch": 1.1,
"learning_rate": 1.4553487963005712e-05,
"loss": 0.5113,
"step": 196
},
{
"epoch": 1.11,
"learning_rate": 1.4499092874626546e-05,
"loss": 0.4856,
"step": 197
},
{
"epoch": 1.12,
"learning_rate": 1.444453036564548e-05,
"loss": 0.4806,
"step": 198
},
{
"epoch": 1.12,
"learning_rate": 1.4389802466446942e-05,
"loss": 0.4808,
"step": 199
},
{
"epoch": 1.13,
"learning_rate": 1.4334911213569872e-05,
"loss": 0.4667,
"step": 200
},
{
"epoch": 1.13,
"learning_rate": 1.427985864963193e-05,
"loss": 0.4798,
"step": 201
},
{
"epoch": 1.14,
"learning_rate": 1.4224646823253512e-05,
"loss": 0.4477,
"step": 202
},
{
"epoch": 1.14,
"learning_rate": 1.4169277788981489e-05,
"loss": 0.4907,
"step": 203
},
{
"epoch": 1.15,
"learning_rate": 1.4113753607212768e-05,
"loss": 0.4584,
"step": 204
},
{
"epoch": 1.15,
"learning_rate": 1.4058076344117617e-05,
"loss": 0.4593,
"step": 205
},
{
"epoch": 1.16,
"learning_rate": 1.400224807156278e-05,
"loss": 0.4871,
"step": 206
},
{
"epoch": 1.17,
"learning_rate": 1.3946270867034377e-05,
"loss": 0.4938,
"step": 207
},
{
"epoch": 1.17,
"learning_rate": 1.3890146813560592e-05,
"loss": 0.4844,
"step": 208
},
{
"epoch": 1.18,
"learning_rate": 1.383387799963417e-05,
"loss": 0.4567,
"step": 209
},
{
"epoch": 1.18,
"learning_rate": 1.3777466519134686e-05,
"loss": 0.4662,
"step": 210
},
{
"epoch": 1.19,
"learning_rate": 1.3720914471250644e-05,
"loss": 0.4884,
"step": 211
},
{
"epoch": 1.19,
"learning_rate": 1.3664223960401345e-05,
"loss": 0.4705,
"step": 212
},
{
"epoch": 1.2,
"learning_rate": 1.3607397096158588e-05,
"loss": 0.4653,
"step": 213
},
{
"epoch": 1.21,
"learning_rate": 1.3550435993168164e-05,
"loss": 0.4546,
"step": 214
},
{
"epoch": 1.21,
"learning_rate": 1.3493342771071171e-05,
"loss": 0.5141,
"step": 215
},
{
"epoch": 1.22,
"learning_rate": 1.3436119554425133e-05,
"loss": 0.4794,
"step": 216
},
{
"epoch": 1.22,
"learning_rate": 1.337876847262493e-05,
"loss": 0.4834,
"step": 217
},
{
"epoch": 1.23,
"learning_rate": 1.3321291659823588e-05,
"loss": 0.4697,
"step": 218
},
{
"epoch": 1.23,
"learning_rate": 1.3263691254852836e-05,
"loss": 0.5022,
"step": 219
},
{
"epoch": 1.24,
"learning_rate": 1.3205969401143517e-05,
"loss": 0.4845,
"step": 220
},
{
"epoch": 1.25,
"learning_rate": 1.314812824664585e-05,
"loss": 0.4907,
"step": 221
},
{
"epoch": 1.25,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.4705,
"step": 222
},
{
"epoch": 1.26,
"learning_rate": 1.303209664920337e-05,
"loss": 0.4651,
"step": 223
},
{
"epoch": 1.26,
"learning_rate": 1.2973910524035588e-05,
"loss": 0.4918,
"step": 224
},
{
"epoch": 1.27,
"learning_rate": 1.2915613733472849e-05,
"loss": 0.4668,
"step": 225
},
{
"epoch": 1.27,
"learning_rate": 1.285720844685996e-05,
"loss": 0.4932,
"step": 226
},
{
"epoch": 1.28,
"learning_rate": 1.279869683757909e-05,
"loss": 0.4721,
"step": 227
},
{
"epoch": 1.28,
"learning_rate": 1.27400810829689e-05,
"loss": 0.4725,
"step": 228
},
{
"epoch": 1.29,
"learning_rate": 1.2681363364243511e-05,
"loss": 0.4366,
"step": 229
},
{
"epoch": 1.3,
"learning_rate": 1.2622545866411345e-05,
"loss": 0.4701,
"step": 230
},
{
"epoch": 1.3,
"learning_rate": 1.2563630778193805e-05,
"loss": 0.4725,
"step": 231
},
{
"epoch": 1.31,
"learning_rate": 1.2504620291943841e-05,
"loss": 0.4966,
"step": 232
},
{
"epoch": 1.31,
"learning_rate": 1.2445516603564365e-05,
"loss": 0.4431,
"step": 233
},
{
"epoch": 1.32,
"learning_rate": 1.2386321912426524e-05,
"loss": 0.4566,
"step": 234
},
{
"epoch": 1.32,
"learning_rate": 1.2327038421287879e-05,
"loss": 0.437,
"step": 235
},
{
"epoch": 1.33,
"learning_rate": 1.2267668336210411e-05,
"loss": 0.448,
"step": 236
},
{
"epoch": 1.34,
"learning_rate": 1.2208213866478454e-05,
"loss": 0.4411,
"step": 237
},
{
"epoch": 1.34,
"learning_rate": 1.214867722451646e-05,
"loss": 0.4848,
"step": 238
},
{
"epoch": 1.35,
"learning_rate": 1.2089060625806686e-05,
"loss": 0.479,
"step": 239
},
{
"epoch": 1.35,
"learning_rate": 1.202936628880675e-05,
"loss": 0.4511,
"step": 240
},
{
"epoch": 1.36,
"learning_rate": 1.1969596434867063e-05,
"loss": 0.4733,
"step": 241
},
{
"epoch": 1.36,
"learning_rate": 1.1909753288148183e-05,
"loss": 0.464,
"step": 242
},
{
"epoch": 1.37,
"learning_rate": 1.184983907553805e-05,
"loss": 0.4579,
"step": 243
},
{
"epoch": 1.37,
"learning_rate": 1.1789856026569103e-05,
"loss": 0.4772,
"step": 244
},
{
"epoch": 1.38,
"learning_rate": 1.1729806373335337e-05,
"loss": 0.4372,
"step": 245
},
{
"epoch": 1.39,
"learning_rate": 1.1669692350409223e-05,
"loss": 0.4962,
"step": 246
},
{
"epoch": 1.39,
"learning_rate": 1.1609516194758562e-05,
"loss": 0.4626,
"step": 247
},
{
"epoch": 1.4,
"learning_rate": 1.1549280145663245e-05,
"loss": 0.4809,
"step": 248
},
{
"epoch": 1.4,
"learning_rate": 1.148898644463192e-05,
"loss": 0.4721,
"step": 249
},
{
"epoch": 1.41,
"learning_rate": 1.142863733531859e-05,
"loss": 0.4763,
"step": 250
},
{
"epoch": 1.41,
"learning_rate": 1.1368235063439103e-05,
"loss": 0.4414,
"step": 251
},
{
"epoch": 1.42,
"learning_rate": 1.1307781876687611e-05,
"loss": 0.4621,
"step": 252
},
{
"epoch": 1.43,
"learning_rate": 1.124728002465291e-05,
"loss": 0.4525,
"step": 253
},
{
"epoch": 1.43,
"learning_rate": 1.1186731758734722e-05,
"loss": 0.4496,
"step": 254
},
{
"epoch": 1.44,
"learning_rate": 1.112613933205994e-05,
"loss": 0.4551,
"step": 255
},
{
"epoch": 1.44,
"learning_rate": 1.1065504999398762e-05,
"loss": 0.4506,
"step": 256
},
{
"epoch": 1.45,
"learning_rate": 1.1004831017080802e-05,
"loss": 0.4775,
"step": 257
},
{
"epoch": 1.45,
"learning_rate": 1.0944119642911108e-05,
"loss": 0.4732,
"step": 258
},
{
"epoch": 1.46,
"learning_rate": 1.0883373136086173e-05,
"loss": 0.4736,
"step": 259
},
{
"epoch": 1.46,
"learning_rate": 1.0822593757109835e-05,
"loss": 0.4692,
"step": 260
},
{
"epoch": 1.47,
"learning_rate": 1.0761783767709182e-05,
"loss": 0.4532,
"step": 261
},
{
"epoch": 1.48,
"learning_rate": 1.0700945430750373e-05,
"loss": 0.4855,
"step": 262
},
{
"epoch": 1.48,
"learning_rate": 1.0640081010154444e-05,
"loss": 0.4737,
"step": 263
},
{
"epoch": 1.49,
"learning_rate": 1.0579192770813053e-05,
"loss": 0.4691,
"step": 264
},
{
"epoch": 1.49,
"learning_rate": 1.0518282978504209e-05,
"loss": 0.4843,
"step": 265
},
{
"epoch": 1.5,
"learning_rate": 1.0457353899807947e-05,
"loss": 0.4376,
"step": 266
},
{
"epoch": 1.5,
"learning_rate": 1.0396407802021986e-05,
"loss": 0.4597,
"step": 267
},
{
"epoch": 1.51,
"learning_rate": 1.0335446953077366e-05,
"loss": 0.4639,
"step": 268
},
{
"epoch": 1.52,
"learning_rate": 1.027447362145405e-05,
"loss": 0.4622,
"step": 269
},
{
"epoch": 1.52,
"learning_rate": 1.0213490076096502e-05,
"loss": 0.483,
"step": 270
},
{
"epoch": 1.53,
"learning_rate": 1.015249858632926e-05,
"loss": 0.47,
"step": 271
},
{
"epoch": 1.53,
"learning_rate": 1.0091501421772496e-05,
"loss": 0.4806,
"step": 272
},
{
"epoch": 1.54,
"learning_rate": 1.0030500852257545e-05,
"loss": 0.489,
"step": 273
},
{
"epoch": 1.54,
"learning_rate": 9.969499147742455e-06,
"loss": 0.4736,
"step": 274
},
{
"epoch": 1.55,
"learning_rate": 9.908498578227505e-06,
"loss": 0.4608,
"step": 275
},
{
"epoch": 1.55,
"learning_rate": 9.847501413670742e-06,
"loss": 0.4895,
"step": 276
},
{
"epoch": 1.56,
"learning_rate": 9.786509923903503e-06,
"loss": 0.4773,
"step": 277
},
{
"epoch": 1.57,
"learning_rate": 9.725526378545953e-06,
"loss": 0.4446,
"step": 278
},
{
"epoch": 1.57,
"learning_rate": 9.664553046922634e-06,
"loss": 0.4815,
"step": 279
},
{
"epoch": 1.58,
"learning_rate": 9.603592197978017e-06,
"loss": 0.4996,
"step": 280
},
{
"epoch": 1.58,
"learning_rate": 9.542646100192056e-06,
"loss": 0.4772,
"step": 281
},
{
"epoch": 1.59,
"learning_rate": 9.481717021495795e-06,
"loss": 0.4468,
"step": 282
},
{
"epoch": 1.59,
"learning_rate": 9.42080722918695e-06,
"loss": 0.4736,
"step": 283
},
{
"epoch": 1.6,
"learning_rate": 9.359918989845558e-06,
"loss": 0.482,
"step": 284
},
{
"epoch": 1.61,
"learning_rate": 9.299054569249629e-06,
"loss": 0.518,
"step": 285
},
{
"epoch": 1.61,
"learning_rate": 9.238216232290821e-06,
"loss": 0.4673,
"step": 286
},
{
"epoch": 1.62,
"learning_rate": 9.177406242890168e-06,
"loss": 0.4968,
"step": 287
},
{
"epoch": 1.62,
"learning_rate": 9.116626863913827e-06,
"loss": 0.477,
"step": 288
},
{
"epoch": 1.63,
"learning_rate": 9.055880357088892e-06,
"loss": 0.4549,
"step": 289
},
{
"epoch": 1.63,
"learning_rate": 8.995168982919203e-06,
"loss": 0.4776,
"step": 290
},
{
"epoch": 1.64,
"learning_rate": 8.934495000601241e-06,
"loss": 0.4776,
"step": 291
},
{
"epoch": 1.65,
"learning_rate": 8.873860667940066e-06,
"loss": 0.4818,
"step": 292
},
{
"epoch": 1.65,
"learning_rate": 8.81326824126528e-06,
"loss": 0.4708,
"step": 293
},
{
"epoch": 1.66,
"learning_rate": 8.752719975347094e-06,
"loss": 0.4835,
"step": 294
},
{
"epoch": 1.66,
"learning_rate": 8.692218123312392e-06,
"loss": 0.4828,
"step": 295
},
{
"epoch": 1.67,
"learning_rate": 8.6317649365609e-06,
"loss": 0.4881,
"step": 296
},
{
"epoch": 1.67,
"learning_rate": 8.571362664681416e-06,
"loss": 0.4538,
"step": 297
},
{
"epoch": 1.68,
"learning_rate": 8.511013555368081e-06,
"loss": 0.4855,
"step": 298
},
{
"epoch": 1.68,
"learning_rate": 8.450719854336759e-06,
"loss": 0.4503,
"step": 299
},
{
"epoch": 1.69,
"learning_rate": 8.390483805241442e-06,
"loss": 0.447,
"step": 300
},
{
"epoch": 1.7,
"learning_rate": 8.330307649590782e-06,
"loss": 0.4663,
"step": 301
},
{
"epoch": 1.7,
"learning_rate": 8.270193626664666e-06,
"loss": 0.4758,
"step": 302
},
{
"epoch": 1.71,
"learning_rate": 8.210143973430897e-06,
"loss": 0.4567,
"step": 303
},
{
"epoch": 1.71,
"learning_rate": 8.150160924461954e-06,
"loss": 0.445,
"step": 304
},
{
"epoch": 1.72,
"learning_rate": 8.09024671185182e-06,
"loss": 0.4801,
"step": 305
},
{
"epoch": 1.72,
"learning_rate": 8.030403565132942e-06,
"loss": 0.4593,
"step": 306
},
{
"epoch": 1.73,
"learning_rate": 7.970633711193253e-06,
"loss": 0.4467,
"step": 307
},
{
"epoch": 1.74,
"learning_rate": 7.910939374193314e-06,
"loss": 0.4602,
"step": 308
},
{
"epoch": 1.74,
"learning_rate": 7.851322775483543e-06,
"loss": 0.4715,
"step": 309
},
{
"epoch": 1.75,
"learning_rate": 7.791786133521548e-06,
"loss": 0.44,
"step": 310
},
{
"epoch": 1.75,
"learning_rate": 7.732331663789592e-06,
"loss": 0.4562,
"step": 311
},
{
"epoch": 1.76,
"learning_rate": 7.672961578712126e-06,
"loss": 0.4518,
"step": 312
},
{
"epoch": 1.76,
"learning_rate": 7.613678087573476e-06,
"loss": 0.4855,
"step": 313
},
{
"epoch": 1.77,
"learning_rate": 7.554483396435638e-06,
"loss": 0.4739,
"step": 314
},
{
"epoch": 1.77,
"learning_rate": 7.495379708056162e-06,
"loss": 0.476,
"step": 315
},
{
"epoch": 1.78,
"learning_rate": 7.436369221806201e-06,
"loss": 0.4569,
"step": 316
},
{
"epoch": 1.79,
"learning_rate": 7.377454133588657e-06,
"loss": 0.4674,
"step": 317
},
{
"epoch": 1.79,
"learning_rate": 7.318636635756491e-06,
"loss": 0.4715,
"step": 318
},
{
"epoch": 1.8,
"learning_rate": 7.259918917031103e-06,
"loss": 0.4461,
"step": 319
},
{
"epoch": 1.8,
"learning_rate": 7.201303162420914e-06,
"loss": 0.4682,
"step": 320
},
{
"epoch": 1.81,
"learning_rate": 7.142791553140045e-06,
"loss": 0.4432,
"step": 321
},
{
"epoch": 1.81,
"learning_rate": 7.084386266527152e-06,
"loss": 0.4648,
"step": 322
},
{
"epoch": 1.82,
"learning_rate": 7.026089475964415e-06,
"loss": 0.473,
"step": 323
},
{
"epoch": 1.83,
"learning_rate": 6.967903350796632e-06,
"loss": 0.4472,
"step": 324
},
{
"epoch": 1.83,
"learning_rate": 6.909830056250527e-06,
"loss": 0.4828,
"step": 325
},
{
"epoch": 1.84,
"learning_rate": 6.851871753354154e-06,
"loss": 0.473,
"step": 326
},
{
"epoch": 1.84,
"learning_rate": 6.794030598856484e-06,
"loss": 0.4938,
"step": 327
},
{
"epoch": 1.85,
"learning_rate": 6.736308745147169e-06,
"loss": 0.4475,
"step": 328
},
{
"epoch": 1.85,
"learning_rate": 6.678708340176414e-06,
"loss": 0.4679,
"step": 329
},
{
"epoch": 1.86,
"learning_rate": 6.621231527375071e-06,
"loss": 0.4997,
"step": 330
},
{
"epoch": 1.86,
"learning_rate": 6.563880445574873e-06,
"loss": 0.4632,
"step": 331
},
{
"epoch": 1.87,
"learning_rate": 6.506657228928828e-06,
"loss": 0.4436,
"step": 332
},
{
"epoch": 1.88,
"learning_rate": 6.4495640068318365e-06,
"loss": 0.449,
"step": 333
},
{
"epoch": 1.88,
"learning_rate": 6.392602903841416e-06,
"loss": 0.4882,
"step": 334
},
{
"epoch": 1.89,
"learning_rate": 6.33577603959866e-06,
"loss": 0.4583,
"step": 335
},
{
"epoch": 1.89,
"learning_rate": 6.2790855287493605e-06,
"loss": 0.4686,
"step": 336
},
{
"epoch": 1.9,
"learning_rate": 6.222533480865316e-06,
"loss": 0.4691,
"step": 337
},
{
"epoch": 1.9,
"learning_rate": 6.166122000365835e-06,
"loss": 0.4757,
"step": 338
},
{
"epoch": 1.91,
"learning_rate": 6.109853186439411e-06,
"loss": 0.4659,
"step": 339
},
{
"epoch": 1.92,
"learning_rate": 6.053729132965626e-06,
"loss": 0.4869,
"step": 340
},
{
"epoch": 1.92,
"learning_rate": 5.99775192843722e-06,
"loss": 0.4737,
"step": 341
},
{
"epoch": 1.93,
"learning_rate": 5.9419236558823845e-06,
"loss": 0.4778,
"step": 342
},
{
"epoch": 1.93,
"learning_rate": 5.886246392787235e-06,
"loss": 0.4526,
"step": 343
},
{
"epoch": 1.94,
"learning_rate": 5.830722211018517e-06,
"loss": 0.4575,
"step": 344
},
{
"epoch": 1.94,
"learning_rate": 5.7753531767464895e-06,
"loss": 0.4463,
"step": 345
},
{
"epoch": 1.95,
"learning_rate": 5.720141350368072e-06,
"loss": 0.4316,
"step": 346
},
{
"epoch": 1.95,
"learning_rate": 5.665088786430129e-06,
"loss": 0.4701,
"step": 347
},
{
"epoch": 1.96,
"learning_rate": 5.610197533553058e-06,
"loss": 0.4637,
"step": 348
},
{
"epoch": 1.97,
"learning_rate": 5.555469634354521e-06,
"loss": 0.4754,
"step": 349
},
{
"epoch": 1.97,
"learning_rate": 5.500907125373458e-06,
"loss": 0.4665,
"step": 350
},
{
"epoch": 1.98,
"learning_rate": 5.446512036994287e-06,
"loss": 0.4801,
"step": 351
},
{
"epoch": 1.98,
"learning_rate": 5.392286393371373e-06,
"loss": 0.4374,
"step": 352
},
{
"epoch": 1.99,
"learning_rate": 5.338232212353676e-06,
"loss": 0.4634,
"step": 353
},
{
"epoch": 1.99,
"learning_rate": 5.284351505409675e-06,
"loss": 0.4495,
"step": 354
},
{
"epoch": 2.0,
"learning_rate": 5.230646277552528e-06,
"loss": 0.4081,
"step": 355
},
{
"epoch": 2.01,
"learning_rate": 5.177118527265438e-06,
"loss": 0.2993,
"step": 356
},
{
"epoch": 2.01,
"learning_rate": 5.123770246427315e-06,
"loss": 0.3176,
"step": 357
},
{
"epoch": 2.02,
"learning_rate": 5.070603420238625e-06,
"loss": 0.2931,
"step": 358
},
{
"epoch": 2.02,
"learning_rate": 5.017620027147534e-06,
"loss": 0.2917,
"step": 359
},
{
"epoch": 2.03,
"learning_rate": 4.964822038776277e-06,
"loss": 0.3262,
"step": 360
},
{
"epoch": 2.03,
"learning_rate": 4.912211419847795e-06,
"loss": 0.2681,
"step": 361
},
{
"epoch": 2.04,
"learning_rate": 4.859790128112631e-06,
"loss": 0.2766,
"step": 362
},
{
"epoch": 2.05,
"learning_rate": 4.807560114276059e-06,
"loss": 0.2923,
"step": 363
},
{
"epoch": 2.05,
"learning_rate": 4.755523321925508e-06,
"loss": 0.2703,
"step": 364
},
{
"epoch": 2.06,
"learning_rate": 4.703681687458231e-06,
"loss": 0.264,
"step": 365
},
{
"epoch": 2.06,
"learning_rate": 4.652037140009259e-06,
"loss": 0.2852,
"step": 366
},
{
"epoch": 2.07,
"learning_rate": 4.600591601379597e-06,
"loss": 0.2717,
"step": 367
},
{
"epoch": 2.07,
"learning_rate": 4.549346985964719e-06,
"loss": 0.2791,
"step": 368
},
{
"epoch": 2.08,
"learning_rate": 4.49830520068333e-06,
"loss": 0.2714,
"step": 369
},
{
"epoch": 2.08,
"learning_rate": 4.447468144906401e-06,
"loss": 0.2834,
"step": 370
},
{
"epoch": 2.09,
"learning_rate": 4.396837710386503e-06,
"loss": 0.2984,
"step": 371
},
{
"epoch": 2.1,
"learning_rate": 4.346415781187386e-06,
"loss": 0.2778,
"step": 372
},
{
"epoch": 2.1,
"learning_rate": 4.296204233613888e-06,
"loss": 0.2775,
"step": 373
},
{
"epoch": 2.11,
"learning_rate": 4.246204936142116e-06,
"loss": 0.2705,
"step": 374
},
{
"epoch": 2.11,
"learning_rate": 4.196419749349905e-06,
"loss": 0.2828,
"step": 375
},
{
"epoch": 2.12,
"learning_rate": 4.1468505258475785e-06,
"loss": 0.2695,
"step": 376
},
{
"epoch": 2.12,
"learning_rate": 4.097499110209032e-06,
"loss": 0.2637,
"step": 377
},
{
"epoch": 2.13,
"learning_rate": 4.0483673389030675e-06,
"loss": 0.2809,
"step": 378
},
{
"epoch": 2.14,
"learning_rate": 3.9994570402250656e-06,
"loss": 0.2638,
"step": 379
},
{
"epoch": 2.14,
"learning_rate": 3.950770034228946e-06,
"loss": 0.2842,
"step": 380
},
{
"epoch": 2.15,
"learning_rate": 3.902308132659457e-06,
"loss": 0.2625,
"step": 381
},
{
"epoch": 2.15,
"learning_rate": 3.854073138884731e-06,
"loss": 0.2624,
"step": 382
},
{
"epoch": 2.16,
"learning_rate": 3.806066847829192e-06,
"loss": 0.2998,
"step": 383
},
{
"epoch": 2.16,
"learning_rate": 3.758291045906761e-06,
"loss": 0.2672,
"step": 384
},
{
"epoch": 2.17,
"learning_rate": 3.7107475109543767e-06,
"loss": 0.2729,
"step": 385
},
{
"epoch": 2.17,
"learning_rate": 3.6634380121658484e-06,
"loss": 0.2758,
"step": 386
},
{
"epoch": 2.18,
"learning_rate": 3.616364310026006e-06,
"loss": 0.2573,
"step": 387
},
{
"epoch": 2.19,
"learning_rate": 3.5695281562451965e-06,
"loss": 0.2593,
"step": 388
},
{
"epoch": 2.19,
"learning_rate": 3.5229312936941017e-06,
"loss": 0.2614,
"step": 389
},
{
"epoch": 2.2,
"learning_rate": 3.476575456338871e-06,
"loss": 0.2877,
"step": 390
},
{
"epoch": 2.2,
"learning_rate": 3.4304623691766193e-06,
"loss": 0.28,
"step": 391
},
{
"epoch": 2.21,
"learning_rate": 3.38459374817121e-06,
"loss": 0.2853,
"step": 392
},
{
"epoch": 2.21,
"learning_rate": 3.3389713001894163e-06,
"loss": 0.28,
"step": 393
},
{
"epoch": 2.22,
"learning_rate": 3.293596722937399e-06,
"loss": 0.2792,
"step": 394
},
{
"epoch": 2.23,
"learning_rate": 3.24847170489754e-06,
"loss": 0.2887,
"step": 395
},
{
"epoch": 2.23,
"learning_rate": 3.203597925265598e-06,
"loss": 0.2718,
"step": 396
},
{
"epoch": 2.24,
"learning_rate": 3.1589770538882303e-06,
"loss": 0.2661,
"step": 397
},
{
"epoch": 2.24,
"learning_rate": 3.1146107512008505e-06,
"loss": 0.3056,
"step": 398
},
{
"epoch": 2.25,
"learning_rate": 3.07050066816584e-06,
"loss": 0.2823,
"step": 399
},
{
"epoch": 2.25,
"learning_rate": 3.0266484462111244e-06,
"loss": 0.2702,
"step": 400
},
{
"epoch": 2.26,
"learning_rate": 2.98305571716907e-06,
"loss": 0.2929,
"step": 401
},
{
"epoch": 2.26,
"learning_rate": 2.9397241032157764e-06,
"loss": 0.2758,
"step": 402
},
{
"epoch": 2.27,
"learning_rate": 2.8966552168107133e-06,
"loss": 0.2792,
"step": 403
},
{
"epoch": 2.28,
"learning_rate": 2.8538506606367033e-06,
"loss": 0.2856,
"step": 404
},
{
"epoch": 2.28,
"learning_rate": 2.8113120275402937e-06,
"loss": 0.2858,
"step": 405
},
{
"epoch": 2.29,
"learning_rate": 2.7690409004724883e-06,
"loss": 0.2864,
"step": 406
},
{
"epoch": 2.29,
"learning_rate": 2.7270388524298262e-06,
"loss": 0.2815,
"step": 407
},
{
"epoch": 2.3,
"learning_rate": 2.6853074463958618e-06,
"loss": 0.2969,
"step": 408
},
{
"epoch": 2.3,
"learning_rate": 2.6438482352829896e-06,
"loss": 0.2545,
"step": 409
},
{
"epoch": 2.31,
"learning_rate": 2.6026627618746793e-06,
"loss": 0.2652,
"step": 410
},
{
"epoch": 2.32,
"learning_rate": 2.56175255876804e-06,
"loss": 0.2715,
"step": 411
},
{
"epoch": 2.32,
"learning_rate": 2.521119148316803e-06,
"loss": 0.2738,
"step": 412
},
{
"epoch": 2.33,
"learning_rate": 2.4807640425746693e-06,
"loss": 0.2736,
"step": 413
},
{
"epoch": 2.33,
"learning_rate": 2.4406887432390426e-06,
"loss": 0.2651,
"step": 414
},
{
"epoch": 2.34,
"learning_rate": 2.400894741595152e-06,
"loss": 0.2757,
"step": 415
},
{
"epoch": 2.34,
"learning_rate": 2.3613835184605527e-06,
"loss": 0.2973,
"step": 416
},
{
"epoch": 2.35,
"learning_rate": 2.32215654413002e-06,
"loss": 0.2528,
"step": 417
},
{
"epoch": 2.35,
"learning_rate": 2.2832152783208393e-06,
"loss": 0.2672,
"step": 418
},
{
"epoch": 2.36,
"learning_rate": 2.2445611701185e-06,
"loss": 0.2783,
"step": 419
},
{
"epoch": 2.37,
"learning_rate": 2.206195657922745e-06,
"loss": 0.2881,
"step": 420
},
{
"epoch": 2.37,
"learning_rate": 2.1681201693940667e-06,
"loss": 0.2982,
"step": 421
},
{
"epoch": 2.38,
"learning_rate": 2.1303361214005723e-06,
"loss": 0.2579,
"step": 422
},
{
"epoch": 2.38,
"learning_rate": 2.0928449199652602e-06,
"loss": 0.2859,
"step": 423
},
{
"epoch": 2.39,
"learning_rate": 2.0556479602137036e-06,
"loss": 0.2796,
"step": 424
},
{
"epoch": 2.39,
"learning_rate": 2.0187466263221243e-06,
"loss": 0.2626,
"step": 425
},
{
"epoch": 2.4,
"learning_rate": 1.982142291465896e-06,
"loss": 0.2751,
"step": 426
},
{
"epoch": 2.41,
"learning_rate": 1.9458363177684368e-06,
"loss": 0.2801,
"step": 427
},
{
"epoch": 2.41,
"learning_rate": 1.9098300562505266e-06,
"loss": 0.2709,
"step": 428
},
{
"epoch": 2.42,
"learning_rate": 1.8741248467800366e-06,
"loss": 0.2621,
"step": 429
},
{
"epoch": 2.42,
"learning_rate": 1.838722018022061e-06,
"loss": 0.2532,
"step": 430
},
{
"epoch": 2.43,
"learning_rate": 1.8036228873894745e-06,
"loss": 0.2769,
"step": 431
},
{
"epoch": 2.43,
"learning_rate": 1.7688287609939248e-06,
"loss": 0.25,
"step": 432
},
{
"epoch": 2.44,
"learning_rate": 1.7343409335972071e-06,
"loss": 0.2861,
"step": 433
},
{
"epoch": 2.45,
"learning_rate": 1.700160688563095e-06,
"loss": 0.268,
"step": 434
},
{
"epoch": 2.45,
"learning_rate": 1.6662892978095912e-06,
"loss": 0.2649,
"step": 435
},
{
"epoch": 2.46,
"learning_rate": 1.6327280217615793e-06,
"loss": 0.294,
"step": 436
},
{
"epoch": 2.46,
"learning_rate": 1.5994781093039336e-06,
"loss": 0.2808,
"step": 437
},
{
"epoch": 2.47,
"learning_rate": 1.566540797735039e-06,
"loss": 0.2561,
"step": 438
},
{
"epoch": 2.47,
"learning_rate": 1.5339173127207564e-06,
"loss": 0.2523,
"step": 439
},
{
"epoch": 2.48,
"learning_rate": 1.5016088682488027e-06,
"loss": 0.2657,
"step": 440
},
{
"epoch": 2.48,
"learning_rate": 1.4696166665835853e-06,
"loss": 0.2931,
"step": 441
},
{
"epoch": 2.49,
"learning_rate": 1.4379418982214544e-06,
"loss": 0.2747,
"step": 442
},
{
"epoch": 2.5,
"learning_rate": 1.4065857418464123e-06,
"loss": 0.2759,
"step": 443
},
{
"epoch": 2.5,
"learning_rate": 1.375549364286244e-06,
"loss": 0.2907,
"step": 444
},
{
"epoch": 2.51,
"learning_rate": 1.3448339204690975e-06,
"loss": 0.2907,
"step": 445
},
{
"epoch": 2.51,
"learning_rate": 1.3144405533805138e-06,
"loss": 0.2615,
"step": 446
},
{
"epoch": 2.52,
"learning_rate": 1.2843703940208818e-06,
"loss": 0.2708,
"step": 447
},
{
"epoch": 2.52,
"learning_rate": 1.254624561363369e-06,
"loss": 0.2554,
"step": 448
},
{
"epoch": 2.53,
"learning_rate": 1.2252041623122646e-06,
"loss": 0.2733,
"step": 449
},
{
"epoch": 2.54,
"learning_rate": 1.1961102916617962e-06,
"loss": 0.2721,
"step": 450
},
{
"epoch": 2.54,
"learning_rate": 1.1673440320553941e-06,
"loss": 0.2719,
"step": 451
},
{
"epoch": 2.55,
"learning_rate": 1.1389064539453953e-06,
"loss": 0.2629,
"step": 452
},
{
"epoch": 2.55,
"learning_rate": 1.1107986155532247e-06,
"loss": 0.2506,
"step": 453
},
{
"epoch": 2.56,
"learning_rate": 1.0830215628299956e-06,
"loss": 0.2671,
"step": 454
},
{
"epoch": 2.56,
"learning_rate": 1.0555763294176047e-06,
"loss": 0.2729,
"step": 455
},
{
"epoch": 2.57,
"learning_rate": 1.02846393661026e-06,
"loss": 0.2729,
"step": 456
},
{
"epoch": 2.57,
"learning_rate": 1.0016853933164773e-06,
"loss": 0.2661,
"step": 457
},
{
"epoch": 2.58,
"learning_rate": 9.75241696021544e-07,
"loss": 0.2659,
"step": 458
},
{
"epoch": 2.59,
"learning_rate": 9.491338287504249e-07,
"loss": 0.2772,
"step": 459
},
{
"epoch": 2.59,
"learning_rate": 9.233627630311503e-07,
"loss": 0.2874,
"step": 460
},
{
"epoch": 2.6,
"learning_rate": 8.979294578586739e-07,
"loss": 0.2815,
"step": 461
},
{
"epoch": 2.6,
"learning_rate": 8.728348596591641e-07,
"loss": 0.2656,
"step": 462
},
{
"epoch": 2.61,
"learning_rate": 8.480799022548114e-07,
"loss": 0.2672,
"step": 463
},
{
"epoch": 2.61,
"learning_rate": 8.236655068290556e-07,
"loss": 0.2705,
"step": 464
},
{
"epoch": 2.62,
"learning_rate": 7.995925818923222e-07,
"loss": 0.2587,
"step": 465
},
{
"epoch": 2.63,
"learning_rate": 7.758620232482083e-07,
"loss": 0.2844,
"step": 466
},
{
"epoch": 2.63,
"learning_rate": 7.524747139601474e-07,
"loss": 0.2846,
"step": 467
},
{
"epoch": 2.64,
"learning_rate": 7.294315243185579e-07,
"loss": 0.2802,
"step": 468
},
{
"epoch": 2.64,
"learning_rate": 7.067333118084429e-07,
"loss": 0.2652,
"step": 469
},
{
"epoch": 2.65,
"learning_rate": 6.843809210774921e-07,
"loss": 0.2939,
"step": 470
},
{
"epoch": 2.65,
"learning_rate": 6.623751839046455e-07,
"loss": 0.2772,
"step": 471
},
{
"epoch": 2.66,
"learning_rate": 6.407169191691465e-07,
"loss": 0.2659,
"step": 472
},
{
"epoch": 2.66,
"learning_rate": 6.194069328200669e-07,
"loss": 0.2629,
"step": 473
},
{
"epoch": 2.67,
"learning_rate": 5.984460178463103e-07,
"loss": 0.2837,
"step": 474
},
{
"epoch": 2.68,
"learning_rate": 5.77834954247114e-07,
"loss": 0.2851,
"step": 475
},
{
"epoch": 2.68,
"learning_rate": 5.575745090030138e-07,
"loss": 0.2751,
"step": 476
},
{
"epoch": 2.69,
"learning_rate": 5.376654360473121e-07,
"loss": 0.2587,
"step": 477
},
{
"epoch": 2.69,
"learning_rate": 5.181084762380151e-07,
"loss": 0.2821,
"step": 478
},
{
"epoch": 2.7,
"learning_rate": 4.989043573302655e-07,
"loss": 0.2923,
"step": 479
},
{
"epoch": 2.7,
"learning_rate": 4.800537939492645e-07,
"loss": 0.2614,
"step": 480
},
{
"epoch": 2.71,
"learning_rate": 4.61557487563673e-07,
"loss": 0.2755,
"step": 481
},
{
"epoch": 2.72,
"learning_rate": 4.4341612645952047e-07,
"loss": 0.2675,
"step": 482
},
{
"epoch": 2.72,
"learning_rate": 4.2563038571457605e-07,
"loss": 0.2566,
"step": 483
},
{
"epoch": 2.73,
"learning_rate": 4.08200927173239e-07,
"loss": 0.2642,
"step": 484
},
{
"epoch": 2.73,
"learning_rate": 3.9112839942190727e-07,
"loss": 0.2584,
"step": 485
},
{
"epoch": 2.74,
"learning_rate": 3.7441343776484116e-07,
"loss": 0.2634,
"step": 486
},
{
"epoch": 2.74,
"learning_rate": 3.5805666420052456e-07,
"loss": 0.2768,
"step": 487
},
{
"epoch": 2.75,
"learning_rate": 3.420586873985132e-07,
"loss": 0.2945,
"step": 488
},
{
"epoch": 2.75,
"learning_rate": 3.264201026767977e-07,
"loss": 0.2661,
"step": 489
},
{
"epoch": 2.76,
"learning_rate": 3.1114149197963185e-07,
"loss": 0.2597,
"step": 490
},
{
"epoch": 2.77,
"learning_rate": 2.9622342385589256e-07,
"loss": 0.2616,
"step": 491
},
{
"epoch": 2.77,
"learning_rate": 2.8166645343792096e-07,
"loss": 0.2608,
"step": 492
},
{
"epoch": 2.78,
"learning_rate": 2.674711224208548e-07,
"loss": 0.2586,
"step": 493
},
{
"epoch": 2.78,
"learning_rate": 2.536379590424809e-07,
"loss": 0.266,
"step": 494
},
{
"epoch": 2.79,
"learning_rate": 2.4016747806357657e-07,
"loss": 0.2741,
"step": 495
},
{
"epoch": 2.79,
"learning_rate": 2.2706018074875046e-07,
"loss": 0.2456,
"step": 496
},
{
"epoch": 2.8,
"learning_rate": 2.1431655484779435e-07,
"loss": 0.2601,
"step": 497
},
{
"epoch": 2.81,
"learning_rate": 2.019370745775273e-07,
"loss": 0.2556,
"step": 498
},
{
"epoch": 2.81,
"learning_rate": 1.8992220060415346e-07,
"loss": 0.2482,
"step": 499
},
{
"epoch": 2.82,
"learning_rate": 1.782723800261199e-07,
"loss": 0.2855,
"step": 500
},
{
"epoch": 2.82,
"learning_rate": 1.669880463574758e-07,
"loss": 0.2928,
"step": 501
},
{
"epoch": 2.83,
"learning_rate": 1.5606961951174394e-07,
"loss": 0.2831,
"step": 502
},
{
"epoch": 2.83,
"learning_rate": 1.4551750578629232e-07,
"loss": 0.2842,
"step": 503
},
{
"epoch": 2.84,
"learning_rate": 1.3533209784721502e-07,
"loss": 0.2862,
"step": 504
},
{
"epoch": 2.85,
"learning_rate": 1.2551377471472282e-07,
"loss": 0.2873,
"step": 505
},
{
"epoch": 2.85,
"learning_rate": 1.160629017490389e-07,
"loss": 0.2757,
"step": 506
},
{
"epoch": 2.86,
"learning_rate": 1.069798306367975e-07,
"loss": 0.2611,
"step": 507
},
{
"epoch": 2.86,
"learning_rate": 9.826489937796557e-08,
"loss": 0.2871,
"step": 508
},
{
"epoch": 2.87,
"learning_rate": 8.991843227325492e-08,
"loss": 0.2887,
"step": 509
},
{
"epoch": 2.87,
"learning_rate": 8.194073991206641e-08,
"loss": 0.2654,
"step": 510
},
{
"epoch": 2.88,
"learning_rate": 7.433211916092143e-08,
"loss": 0.2701,
"step": 511
},
{
"epoch": 2.88,
"learning_rate": 6.709285315242064e-08,
"loss": 0.2744,
"step": 512
},
{
"epoch": 2.89,
"learning_rate": 6.022321127470698e-08,
"loss": 0.2575,
"step": 513
},
{
"epoch": 2.9,
"learning_rate": 5.3723449161439124e-08,
"loss": 0.2832,
"step": 514
},
{
"epoch": 2.9,
"learning_rate": 4.759380868228247e-08,
"loss": 0.2522,
"step": 515
},
{
"epoch": 2.91,
"learning_rate": 4.183451793390747e-08,
"loss": 0.2872,
"step": 516
},
{
"epoch": 2.91,
"learning_rate": 3.6445791231497496e-08,
"loss": 0.2871,
"step": 517
},
{
"epoch": 2.92,
"learning_rate": 3.1427829100779686e-08,
"loss": 0.2628,
"step": 518
},
{
"epoch": 2.92,
"learning_rate": 2.6780818270562002e-08,
"loss": 0.2716,
"step": 519
},
{
"epoch": 2.93,
"learning_rate": 2.250493166577772e-08,
"loss": 0.2707,
"step": 520
},
{
"epoch": 2.94,
"learning_rate": 1.860032840106163e-08,
"loss": 0.273,
"step": 521
},
{
"epoch": 2.94,
"learning_rate": 1.5067153774820375e-08,
"loss": 0.2606,
"step": 522
},
{
"epoch": 2.95,
"learning_rate": 1.190553926382898e-08,
"loss": 0.2783,
"step": 523
},
{
"epoch": 2.95,
"learning_rate": 9.115602518338096e-09,
"loss": 0.2792,
"step": 524
},
{
"epoch": 2.96,
"learning_rate": 6.697447357695286e-09,
"loss": 0.2715,
"step": 525
},
{
"epoch": 2.96,
"learning_rate": 4.651163766484779e-09,
"loss": 0.2901,
"step": 526
},
{
"epoch": 2.97,
"learning_rate": 2.9768278911723737e-09,
"loss": 0.2608,
"step": 527
},
{
"epoch": 2.97,
"learning_rate": 1.6745020372777033e-09,
"loss": 0.2788,
"step": 528
},
{
"epoch": 2.98,
"learning_rate": 7.442346670549771e-10,
"loss": 0.2611,
"step": 529
},
{
"epoch": 2.99,
"learning_rate": 1.8606039768775952e-10,
"loss": 0.2693,
"step": 530
},
{
"epoch": 2.99,
"learning_rate": 0.0,
"loss": 0.2671,
"step": 531
},
{
"epoch": 2.99,
"step": 531,
"total_flos": 5.02302897078272e+17,
"train_loss": 0.49581964204540363,
"train_runtime": 4921.9709,
"train_samples_per_second": 13.814,
"train_steps_per_second": 0.108
}
],
"max_steps": 531,
"num_train_epochs": 3,
"total_flos": 5.02302897078272e+17,
"trial_name": null,
"trial_params": null
}