embodied_reasoner_qwen2vl / trainer_state.json
sunkichen's picture
Upload folder using huggingface_hub
169dc79 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9987217724755006,
"eval_steps": 500,
"global_step": 586,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0034086067319982955,
"grad_norm": 7.117070422384071,
"learning_rate": 3.3898305084745766e-07,
"loss": 1.5934,
"step": 2
},
{
"epoch": 0.006817213463996591,
"grad_norm": 7.58934528658197,
"learning_rate": 6.779661016949153e-07,
"loss": 1.6231,
"step": 4
},
{
"epoch": 0.010225820195994887,
"grad_norm": 6.51402630995005,
"learning_rate": 1.016949152542373e-06,
"loss": 1.5692,
"step": 6
},
{
"epoch": 0.013634426927993182,
"grad_norm": 5.957629945500037,
"learning_rate": 1.3559322033898307e-06,
"loss": 1.5249,
"step": 8
},
{
"epoch": 0.017043033659991477,
"grad_norm": 4.594385046827305,
"learning_rate": 1.6949152542372882e-06,
"loss": 1.5132,
"step": 10
},
{
"epoch": 0.020451640391989774,
"grad_norm": 3.4805968864154875,
"learning_rate": 2.033898305084746e-06,
"loss": 1.4339,
"step": 12
},
{
"epoch": 0.02386024712398807,
"grad_norm": 3.672492727269049,
"learning_rate": 2.372881355932204e-06,
"loss": 1.3733,
"step": 14
},
{
"epoch": 0.027268853855986364,
"grad_norm": 3.5696770668674653,
"learning_rate": 2.7118644067796613e-06,
"loss": 1.2981,
"step": 16
},
{
"epoch": 0.03067746058798466,
"grad_norm": 3.721066217222145,
"learning_rate": 3.0508474576271192e-06,
"loss": 1.2152,
"step": 18
},
{
"epoch": 0.034086067319982954,
"grad_norm": 3.3165399160593765,
"learning_rate": 3.3898305084745763e-06,
"loss": 1.1972,
"step": 20
},
{
"epoch": 0.03749467405198125,
"grad_norm": 3.0704960315045087,
"learning_rate": 3.7288135593220342e-06,
"loss": 1.1098,
"step": 22
},
{
"epoch": 0.04090328078397955,
"grad_norm": 3.0249632466321708,
"learning_rate": 4.067796610169492e-06,
"loss": 1.1101,
"step": 24
},
{
"epoch": 0.044311887515977845,
"grad_norm": 2.51223844701648,
"learning_rate": 4.40677966101695e-06,
"loss": 1.0707,
"step": 26
},
{
"epoch": 0.04772049424797614,
"grad_norm": 2.337756418129475,
"learning_rate": 4.745762711864408e-06,
"loss": 1.0054,
"step": 28
},
{
"epoch": 0.05112910097997444,
"grad_norm": 2.507569726418487,
"learning_rate": 5.084745762711865e-06,
"loss": 0.9751,
"step": 30
},
{
"epoch": 0.05453770771197273,
"grad_norm": 2.8133808280998664,
"learning_rate": 5.423728813559323e-06,
"loss": 1.0073,
"step": 32
},
{
"epoch": 0.057946314443971025,
"grad_norm": 2.259104843714805,
"learning_rate": 5.7627118644067805e-06,
"loss": 0.9816,
"step": 34
},
{
"epoch": 0.06135492117596932,
"grad_norm": 2.489954458392594,
"learning_rate": 6.1016949152542385e-06,
"loss": 0.9969,
"step": 36
},
{
"epoch": 0.06476352790796762,
"grad_norm": 2.489196411046845,
"learning_rate": 6.440677966101695e-06,
"loss": 0.9751,
"step": 38
},
{
"epoch": 0.06817213463996591,
"grad_norm": 2.328531532406521,
"learning_rate": 6.779661016949153e-06,
"loss": 0.9063,
"step": 40
},
{
"epoch": 0.07158074137196421,
"grad_norm": 1.8777822207540218,
"learning_rate": 7.1186440677966106e-06,
"loss": 0.9076,
"step": 42
},
{
"epoch": 0.0749893481039625,
"grad_norm": 2.3564822959993403,
"learning_rate": 7.4576271186440685e-06,
"loss": 0.9544,
"step": 44
},
{
"epoch": 0.0783979548359608,
"grad_norm": 2.0258103744170604,
"learning_rate": 7.796610169491526e-06,
"loss": 0.9056,
"step": 46
},
{
"epoch": 0.0818065615679591,
"grad_norm": 2.156862728985429,
"learning_rate": 8.135593220338983e-06,
"loss": 0.853,
"step": 48
},
{
"epoch": 0.08521516829995739,
"grad_norm": 2.34893762869447,
"learning_rate": 8.47457627118644e-06,
"loss": 0.8894,
"step": 50
},
{
"epoch": 0.08862377503195569,
"grad_norm": 2.315393254315105,
"learning_rate": 8.8135593220339e-06,
"loss": 0.9133,
"step": 52
},
{
"epoch": 0.09203238176395398,
"grad_norm": 2.301164621363547,
"learning_rate": 9.152542372881356e-06,
"loss": 0.8895,
"step": 54
},
{
"epoch": 0.09544098849595228,
"grad_norm": 2.2398380284062425,
"learning_rate": 9.491525423728815e-06,
"loss": 0.8563,
"step": 56
},
{
"epoch": 0.09884959522795057,
"grad_norm": 2.029764049433947,
"learning_rate": 9.830508474576272e-06,
"loss": 0.8813,
"step": 58
},
{
"epoch": 0.10225820195994888,
"grad_norm": 2.0880574238328387,
"learning_rate": 9.999911158222826e-06,
"loss": 0.9337,
"step": 60
},
{
"epoch": 0.10566680869194717,
"grad_norm": 2.3366310826461993,
"learning_rate": 9.999200442948178e-06,
"loss": 0.924,
"step": 62
},
{
"epoch": 0.10907541542394546,
"grad_norm": 2.344634528525901,
"learning_rate": 9.997779113423916e-06,
"loss": 0.9312,
"step": 64
},
{
"epoch": 0.11248402215594376,
"grad_norm": 2.551521533584163,
"learning_rate": 9.99564737168575e-06,
"loss": 0.9062,
"step": 66
},
{
"epoch": 0.11589262888794205,
"grad_norm": 2.2111244932074,
"learning_rate": 9.992805520751352e-06,
"loss": 0.9032,
"step": 68
},
{
"epoch": 0.11930123561994035,
"grad_norm": 2.2310561621390463,
"learning_rate": 9.989253964577267e-06,
"loss": 0.8851,
"step": 70
},
{
"epoch": 0.12270984235193864,
"grad_norm": 2.1278482556209153,
"learning_rate": 9.984993208001513e-06,
"loss": 0.8663,
"step": 72
},
{
"epoch": 0.12611844908393693,
"grad_norm": 2.173728187499986,
"learning_rate": 9.980023856671805e-06,
"loss": 0.889,
"step": 74
},
{
"epoch": 0.12952705581593524,
"grad_norm": 2.0928351193635026,
"learning_rate": 9.974346616959476e-06,
"loss": 0.8293,
"step": 76
},
{
"epoch": 0.13293566254793354,
"grad_norm": 2.8128383896971902,
"learning_rate": 9.96796229585906e-06,
"loss": 0.8746,
"step": 78
},
{
"epoch": 0.13634426927993182,
"grad_norm": 2.0208631286469254,
"learning_rate": 9.960871800873586e-06,
"loss": 0.9004,
"step": 80
},
{
"epoch": 0.13975287601193012,
"grad_norm": 2.1974668712171157,
"learning_rate": 9.953076139885581e-06,
"loss": 0.9384,
"step": 82
},
{
"epoch": 0.14316148274392843,
"grad_norm": 2.3881999042131516,
"learning_rate": 9.944576421013804e-06,
"loss": 0.8679,
"step": 84
},
{
"epoch": 0.1465700894759267,
"grad_norm": 1.9809033021368312,
"learning_rate": 9.935373852455724e-06,
"loss": 0.855,
"step": 86
},
{
"epoch": 0.149978696207925,
"grad_norm": 2.063145920288984,
"learning_rate": 9.925469742315799e-06,
"loss": 0.8509,
"step": 88
},
{
"epoch": 0.1533873029399233,
"grad_norm": 2.1854349522591248,
"learning_rate": 9.91486549841951e-06,
"loss": 0.8774,
"step": 90
},
{
"epoch": 0.1567959096719216,
"grad_norm": 1.9442802721360337,
"learning_rate": 9.903562628113266e-06,
"loss": 0.9293,
"step": 92
},
{
"epoch": 0.1602045164039199,
"grad_norm": 1.9595403019093638,
"learning_rate": 9.891562738050127e-06,
"loss": 0.835,
"step": 94
},
{
"epoch": 0.1636131231359182,
"grad_norm": 2.001941381433757,
"learning_rate": 9.878867533961434e-06,
"loss": 0.8594,
"step": 96
},
{
"epoch": 0.1670217298679165,
"grad_norm": 2.24822151912714,
"learning_rate": 9.865478820414344e-06,
"loss": 0.85,
"step": 98
},
{
"epoch": 0.17043033659991477,
"grad_norm": 1.8987704947701014,
"learning_rate": 9.851398500555321e-06,
"loss": 0.8647,
"step": 100
},
{
"epoch": 0.17383894333191308,
"grad_norm": 1.9294894488605314,
"learning_rate": 9.8366285758396e-06,
"loss": 0.8374,
"step": 102
},
{
"epoch": 0.17724755006391138,
"grad_norm": 2.0659769053985455,
"learning_rate": 9.82117114574671e-06,
"loss": 0.8625,
"step": 104
},
{
"epoch": 0.18065615679590968,
"grad_norm": 2.1086334049039044,
"learning_rate": 9.805028407482027e-06,
"loss": 0.8418,
"step": 106
},
{
"epoch": 0.18406476352790796,
"grad_norm": 1.9093180149494513,
"learning_rate": 9.78820265566445e-06,
"loss": 0.8378,
"step": 108
},
{
"epoch": 0.18747337025990626,
"grad_norm": 2.108586166296398,
"learning_rate": 9.770696282000245e-06,
"loss": 0.8717,
"step": 110
},
{
"epoch": 0.19088197699190457,
"grad_norm": 1.9600773842942703,
"learning_rate": 9.75251177494306e-06,
"loss": 0.8626,
"step": 112
},
{
"epoch": 0.19429058372390284,
"grad_norm": 2.2720190152376425,
"learning_rate": 9.733651719340207e-06,
"loss": 0.8714,
"step": 114
},
{
"epoch": 0.19769919045590115,
"grad_norm": 1.9850702888722942,
"learning_rate": 9.714118796065243e-06,
"loss": 0.8529,
"step": 116
},
{
"epoch": 0.20110779718789945,
"grad_norm": 2.0995202528398234,
"learning_rate": 9.693915781636887e-06,
"loss": 0.8508,
"step": 118
},
{
"epoch": 0.20451640391989775,
"grad_norm": 2.191390132974462,
"learning_rate": 9.673045547824356e-06,
"loss": 0.8699,
"step": 120
},
{
"epoch": 0.20792501065189603,
"grad_norm": 1.92525268724109,
"learning_rate": 9.651511061239151e-06,
"loss": 0.8746,
"step": 122
},
{
"epoch": 0.21133361738389433,
"grad_norm": 2.1743856429859765,
"learning_rate": 9.62931538291337e-06,
"loss": 0.8643,
"step": 124
},
{
"epoch": 0.21474222411589264,
"grad_norm": 2.2545723733622642,
"learning_rate": 9.606461667864603e-06,
"loss": 0.8518,
"step": 126
},
{
"epoch": 0.21815083084789091,
"grad_norm": 2.088328769098791,
"learning_rate": 9.582953164647438e-06,
"loss": 0.8036,
"step": 128
},
{
"epoch": 0.22155943757988922,
"grad_norm": 1.7154521796104316,
"learning_rate": 9.558793214891716e-06,
"loss": 0.8749,
"step": 130
},
{
"epoch": 0.22496804431188752,
"grad_norm": 1.9971816903951989,
"learning_rate": 9.533985252827525e-06,
"loss": 0.8442,
"step": 132
},
{
"epoch": 0.22837665104388583,
"grad_norm": 2.0452202203810246,
"learning_rate": 9.508532804797035e-06,
"loss": 0.8331,
"step": 134
},
{
"epoch": 0.2317852577758841,
"grad_norm": 1.8873007094774574,
"learning_rate": 9.482439488753248e-06,
"loss": 0.8404,
"step": 136
},
{
"epoch": 0.2351938645078824,
"grad_norm": 1.8694505860373616,
"learning_rate": 9.455709013745726e-06,
"loss": 0.8518,
"step": 138
},
{
"epoch": 0.2386024712398807,
"grad_norm": 1.8826067133760322,
"learning_rate": 9.42834517939336e-06,
"loss": 0.8224,
"step": 140
},
{
"epoch": 0.24201107797187899,
"grad_norm": 1.783995366074571,
"learning_rate": 9.400351875344268e-06,
"loss": 0.8368,
"step": 142
},
{
"epoch": 0.2454196847038773,
"grad_norm": 1.9541798533923145,
"learning_rate": 9.371733080722911e-06,
"loss": 0.8753,
"step": 144
},
{
"epoch": 0.2488282914358756,
"grad_norm": 1.8725559407975727,
"learning_rate": 9.342492863564468e-06,
"loss": 0.8382,
"step": 146
},
{
"epoch": 0.25223689816787387,
"grad_norm": 2.166287610640464,
"learning_rate": 9.312635380236582e-06,
"loss": 0.8311,
"step": 148
},
{
"epoch": 0.2556455048998722,
"grad_norm": 2.100500640351814,
"learning_rate": 9.282164874848554e-06,
"loss": 0.8731,
"step": 150
},
{
"epoch": 0.2590541116318705,
"grad_norm": 2.269255130053819,
"learning_rate": 9.251085678648072e-06,
"loss": 0.832,
"step": 152
},
{
"epoch": 0.26246271836386875,
"grad_norm": 2.019471552415879,
"learning_rate": 9.219402209405519e-06,
"loss": 0.8715,
"step": 154
},
{
"epoch": 0.2658713250958671,
"grad_norm": 2.057431118897924,
"learning_rate": 9.18711897078603e-06,
"loss": 0.8809,
"step": 156
},
{
"epoch": 0.26927993182786536,
"grad_norm": 1.7964562596950788,
"learning_rate": 9.154240551709298e-06,
"loss": 0.8159,
"step": 158
},
{
"epoch": 0.27268853855986364,
"grad_norm": 1.77492002243483,
"learning_rate": 9.1207716256973e-06,
"loss": 0.8443,
"step": 160
},
{
"epoch": 0.27609714529186197,
"grad_norm": 2.042551909185088,
"learning_rate": 9.08671695020995e-06,
"loss": 0.8718,
"step": 162
},
{
"epoch": 0.27950575202386024,
"grad_norm": 1.9983795965696414,
"learning_rate": 9.052081365968873e-06,
"loss": 0.827,
"step": 164
},
{
"epoch": 0.2829143587558585,
"grad_norm": 2.052158753663356,
"learning_rate": 9.016869796269299e-06,
"loss": 0.8478,
"step": 166
},
{
"epoch": 0.28632296548785685,
"grad_norm": 1.8439668797771172,
"learning_rate": 8.981087246280246e-06,
"loss": 0.8063,
"step": 168
},
{
"epoch": 0.2897315722198551,
"grad_norm": 1.8391702615486196,
"learning_rate": 8.944738802333062e-06,
"loss": 0.8345,
"step": 170
},
{
"epoch": 0.2931401789518534,
"grad_norm": 1.832576311204302,
"learning_rate": 8.907829631198412e-06,
"loss": 0.7921,
"step": 172
},
{
"epoch": 0.29654878568385173,
"grad_norm": 1.9349714568402026,
"learning_rate": 8.870364979351862e-06,
"loss": 0.8122,
"step": 174
},
{
"epoch": 0.29995739241585,
"grad_norm": 2.4339551873431104,
"learning_rate": 8.832350172228099e-06,
"loss": 0.82,
"step": 176
},
{
"epoch": 0.30336599914784834,
"grad_norm": 1.5892853144502257,
"learning_rate": 8.793790613463956e-06,
"loss": 0.8362,
"step": 178
},
{
"epoch": 0.3067746058798466,
"grad_norm": 1.7805951409311684,
"learning_rate": 8.754691784130298e-06,
"loss": 0.8283,
"step": 180
},
{
"epoch": 0.3101832126118449,
"grad_norm": 1.6741127801982758,
"learning_rate": 8.715059241952925e-06,
"loss": 0.8055,
"step": 182
},
{
"epoch": 0.3135918193438432,
"grad_norm": 1.7776583452428674,
"learning_rate": 8.674898620522558e-06,
"loss": 0.8238,
"step": 184
},
{
"epoch": 0.3170004260758415,
"grad_norm": 1.7939673520888073,
"learning_rate": 8.634215628494044e-06,
"loss": 0.7946,
"step": 186
},
{
"epoch": 0.3204090328078398,
"grad_norm": 1.9491996130141838,
"learning_rate": 8.593016048774911e-06,
"loss": 0.8269,
"step": 188
},
{
"epoch": 0.3238176395398381,
"grad_norm": 2.0260518716817355,
"learning_rate": 8.551305737703338e-06,
"loss": 0.8205,
"step": 190
},
{
"epoch": 0.3272262462718364,
"grad_norm": 2.0407011387236915,
"learning_rate": 8.509090624215708e-06,
"loss": 0.7965,
"step": 192
},
{
"epoch": 0.33063485300383466,
"grad_norm": 1.885729706338136,
"learning_rate": 8.466376709003841e-06,
"loss": 0.8444,
"step": 194
},
{
"epoch": 0.334043459735833,
"grad_norm": 1.8594029276174733,
"learning_rate": 8.423170063662014e-06,
"loss": 0.8301,
"step": 196
},
{
"epoch": 0.33745206646783127,
"grad_norm": 2.0661661167934775,
"learning_rate": 8.379476829823919e-06,
"loss": 0.8194,
"step": 198
},
{
"epoch": 0.34086067319982954,
"grad_norm": 1.843312820214377,
"learning_rate": 8.335303218289645e-06,
"loss": 0.8223,
"step": 200
},
{
"epoch": 0.3442692799318279,
"grad_norm": 1.9237357576874556,
"learning_rate": 8.290655508142855e-06,
"loss": 0.8066,
"step": 202
},
{
"epoch": 0.34767788666382615,
"grad_norm": 1.8680286378652424,
"learning_rate": 8.245540045858229e-06,
"loss": 0.8035,
"step": 204
},
{
"epoch": 0.3510864933958245,
"grad_norm": 1.7375474557232102,
"learning_rate": 8.199963244399351e-06,
"loss": 0.8115,
"step": 206
},
{
"epoch": 0.35449510012782276,
"grad_norm": 1.8317920240540273,
"learning_rate": 8.153931582307124e-06,
"loss": 0.8333,
"step": 208
},
{
"epoch": 0.35790370685982104,
"grad_norm": 1.8807672342031,
"learning_rate": 8.107451602778887e-06,
"loss": 0.8114,
"step": 210
},
{
"epoch": 0.36131231359181937,
"grad_norm": 1.8233277253885543,
"learning_rate": 8.060529912738316e-06,
"loss": 0.8084,
"step": 212
},
{
"epoch": 0.36472092032381764,
"grad_norm": 1.8018328218174473,
"learning_rate": 8.013173181896283e-06,
"loss": 0.8036,
"step": 214
},
{
"epoch": 0.3681295270558159,
"grad_norm": 1.7500277066768286,
"learning_rate": 7.965388141802792e-06,
"loss": 0.8493,
"step": 216
},
{
"epoch": 0.37153813378781425,
"grad_norm": 1.8202749968468963,
"learning_rate": 7.917181584890113e-06,
"loss": 0.8666,
"step": 218
},
{
"epoch": 0.3749467405198125,
"grad_norm": 1.7463930119282958,
"learning_rate": 7.868560363507268e-06,
"loss": 0.8104,
"step": 220
},
{
"epoch": 0.3783553472518108,
"grad_norm": 1.687535685348095,
"learning_rate": 7.819531388946e-06,
"loss": 0.7423,
"step": 222
},
{
"epoch": 0.38176395398380913,
"grad_norm": 1.8780292088928865,
"learning_rate": 7.770101630458364e-06,
"loss": 0.7817,
"step": 224
},
{
"epoch": 0.3851725607158074,
"grad_norm": 1.7870199236678057,
"learning_rate": 7.720278114266084e-06,
"loss": 0.8055,
"step": 226
},
{
"epoch": 0.3885811674478057,
"grad_norm": 2.1478936673535522,
"learning_rate": 7.670067922561795e-06,
"loss": 0.8181,
"step": 228
},
{
"epoch": 0.391989774179804,
"grad_norm": 1.996195916952366,
"learning_rate": 7.619478192502352e-06,
"loss": 0.786,
"step": 230
},
{
"epoch": 0.3953983809118023,
"grad_norm": 1.7607026492930253,
"learning_rate": 7.5685161151943066e-06,
"loss": 0.7861,
"step": 232
},
{
"epoch": 0.39880698764380057,
"grad_norm": 1.982916095534787,
"learning_rate": 7.5171889346717264e-06,
"loss": 0.8314,
"step": 234
},
{
"epoch": 0.4022155943757989,
"grad_norm": 1.8475361020735406,
"learning_rate": 7.465503946866491e-06,
"loss": 0.8045,
"step": 236
},
{
"epoch": 0.4056242011077972,
"grad_norm": 1.6483972591254976,
"learning_rate": 7.413468498571199e-06,
"loss": 0.8149,
"step": 238
},
{
"epoch": 0.4090328078397955,
"grad_norm": 1.9862409096042428,
"learning_rate": 7.361089986394863e-06,
"loss": 0.8038,
"step": 240
},
{
"epoch": 0.4124414145717938,
"grad_norm": 1.9381151415852729,
"learning_rate": 7.308375855711508e-06,
"loss": 0.8203,
"step": 242
},
{
"epoch": 0.41585002130379206,
"grad_norm": 1.8252397931133695,
"learning_rate": 7.2553335996018485e-06,
"loss": 0.8096,
"step": 244
},
{
"epoch": 0.4192586280357904,
"grad_norm": 1.8096704344426044,
"learning_rate": 7.201970757788172e-06,
"loss": 0.7692,
"step": 246
},
{
"epoch": 0.42266723476778867,
"grad_norm": 1.7220473220530976,
"learning_rate": 7.148294915562617e-06,
"loss": 0.7941,
"step": 248
},
{
"epoch": 0.42607584149978694,
"grad_norm": 1.6560074887380722,
"learning_rate": 7.09431370270894e-06,
"loss": 0.795,
"step": 250
},
{
"epoch": 0.4294844482317853,
"grad_norm": 1.6420030994162023,
"learning_rate": 7.040034792417986e-06,
"loss": 0.7854,
"step": 252
},
{
"epoch": 0.43289305496378355,
"grad_norm": 1.8777722598215576,
"learning_rate": 6.985465900196971e-06,
"loss": 0.8445,
"step": 254
},
{
"epoch": 0.43630166169578183,
"grad_norm": 1.8421388677475183,
"learning_rate": 6.930614782772766e-06,
"loss": 0.8372,
"step": 256
},
{
"epoch": 0.43971026842778016,
"grad_norm": 1.9176127454855982,
"learning_rate": 6.875489236989306e-06,
"loss": 0.82,
"step": 258
},
{
"epoch": 0.44311887515977844,
"grad_norm": 1.8186020984518791,
"learning_rate": 6.8200970986993e-06,
"loss": 0.8208,
"step": 260
},
{
"epoch": 0.4465274818917767,
"grad_norm": 1.9882080130853592,
"learning_rate": 6.764446241650411e-06,
"loss": 0.8073,
"step": 262
},
{
"epoch": 0.44993608862377504,
"grad_norm": 1.5626224821883135,
"learning_rate": 6.708544576366024e-06,
"loss": 0.8231,
"step": 264
},
{
"epoch": 0.4533446953557733,
"grad_norm": 1.8993741801940567,
"learning_rate": 6.65240004902081e-06,
"loss": 0.8065,
"step": 266
},
{
"epoch": 0.45675330208777165,
"grad_norm": 1.7550634910546117,
"learning_rate": 6.5960206403112045e-06,
"loss": 0.7485,
"step": 268
},
{
"epoch": 0.4601619088197699,
"grad_norm": 1.9326203800829964,
"learning_rate": 6.539414364320997e-06,
"loss": 0.779,
"step": 270
},
{
"epoch": 0.4635705155517682,
"grad_norm": 1.6516836712807916,
"learning_rate": 6.4825892673821556e-06,
"loss": 0.794,
"step": 272
},
{
"epoch": 0.46697912228376653,
"grad_norm": 2.003145254209738,
"learning_rate": 6.425553426931075e-06,
"loss": 0.7618,
"step": 274
},
{
"epoch": 0.4703877290157648,
"grad_norm": 1.6246913128967992,
"learning_rate": 6.368314950360416e-06,
"loss": 0.7785,
"step": 276
},
{
"epoch": 0.4737963357477631,
"grad_norm": 1.8754262243075055,
"learning_rate": 6.31088197386666e-06,
"loss": 0.765,
"step": 278
},
{
"epoch": 0.4772049424797614,
"grad_norm": 1.6448889983604882,
"learning_rate": 6.2532626612936035e-06,
"loss": 0.7977,
"step": 280
},
{
"epoch": 0.4806135492117597,
"grad_norm": 1.8537195734950456,
"learning_rate": 6.195465202971883e-06,
"loss": 0.7637,
"step": 282
},
{
"epoch": 0.48402215594375797,
"grad_norm": 1.7350572092463452,
"learning_rate": 6.137497814554773e-06,
"loss": 0.7974,
"step": 284
},
{
"epoch": 0.4874307626757563,
"grad_norm": 1.659202469943535,
"learning_rate": 6.079368735850355e-06,
"loss": 0.78,
"step": 286
},
{
"epoch": 0.4908393694077546,
"grad_norm": 2.0704522103071326,
"learning_rate": 6.021086229650273e-06,
"loss": 0.78,
"step": 288
},
{
"epoch": 0.49424797613975285,
"grad_norm": 1.7654110381757835,
"learning_rate": 5.96265858055521e-06,
"loss": 0.8101,
"step": 290
},
{
"epoch": 0.4976565828717512,
"grad_norm": 1.6744010674594727,
"learning_rate": 5.904094093797274e-06,
"loss": 0.7854,
"step": 292
},
{
"epoch": 0.5010651896037495,
"grad_norm": 1.672935281461675,
"learning_rate": 5.845401094059439e-06,
"loss": 0.8233,
"step": 294
},
{
"epoch": 0.5044737963357477,
"grad_norm": 1.7571827575826455,
"learning_rate": 5.786587924292228e-06,
"loss": 0.8072,
"step": 296
},
{
"epoch": 0.507882403067746,
"grad_norm": 1.7879627913914475,
"learning_rate": 5.727662944527808e-06,
"loss": 0.8236,
"step": 298
},
{
"epoch": 0.5112910097997444,
"grad_norm": 1.6693554243950373,
"learning_rate": 5.668634530691639e-06,
"loss": 0.7829,
"step": 300
},
{
"epoch": 0.5146996165317427,
"grad_norm": 1.7885967539315661,
"learning_rate": 5.609511073411881e-06,
"loss": 0.8069,
"step": 302
},
{
"epoch": 0.518108223263741,
"grad_norm": 1.8927285112086825,
"learning_rate": 5.550300976826697e-06,
"loss": 0.7978,
"step": 304
},
{
"epoch": 0.5215168299957392,
"grad_norm": 1.9422631435450946,
"learning_rate": 5.491012657389644e-06,
"loss": 0.8143,
"step": 306
},
{
"epoch": 0.5249254367277375,
"grad_norm": 1.8664920158914793,
"learning_rate": 5.431654542673315e-06,
"loss": 0.8184,
"step": 308
},
{
"epoch": 0.5283340434597358,
"grad_norm": 1.7545208791954934,
"learning_rate": 5.372235070171387e-06,
"loss": 0.7976,
"step": 310
},
{
"epoch": 0.5317426501917342,
"grad_norm": 1.8345783742469912,
"learning_rate": 5.312762686099282e-06,
"loss": 0.7726,
"step": 312
},
{
"epoch": 0.5351512569237324,
"grad_norm": 1.777874135882686,
"learning_rate": 5.253245844193564e-06,
"loss": 0.791,
"step": 314
},
{
"epoch": 0.5385598636557307,
"grad_norm": 1.7559490288145514,
"learning_rate": 5.193693004510284e-06,
"loss": 0.7654,
"step": 316
},
{
"epoch": 0.541968470387729,
"grad_norm": 1.7582347301570869,
"learning_rate": 5.1341126322224145e-06,
"loss": 0.7509,
"step": 318
},
{
"epoch": 0.5453770771197273,
"grad_norm": 1.6309306234524417,
"learning_rate": 5.0745131964165686e-06,
"loss": 0.7737,
"step": 320
},
{
"epoch": 0.5487856838517257,
"grad_norm": 1.6033761601218945,
"learning_rate": 5.014903168889155e-06,
"loss": 0.7814,
"step": 322
},
{
"epoch": 0.5521942905837239,
"grad_norm": 1.9401976205946008,
"learning_rate": 4.955291022942146e-06,
"loss": 0.7782,
"step": 324
},
{
"epoch": 0.5556028973157222,
"grad_norm": 1.8683852058875654,
"learning_rate": 4.8956852321786394e-06,
"loss": 0.802,
"step": 326
},
{
"epoch": 0.5590115040477205,
"grad_norm": 1.859200797460236,
"learning_rate": 4.836094269298373e-06,
"loss": 0.7798,
"step": 328
},
{
"epoch": 0.5624201107797188,
"grad_norm": 1.803688727471679,
"learning_rate": 4.7765266048933626e-06,
"loss": 0.8098,
"step": 330
},
{
"epoch": 0.565828717511717,
"grad_norm": 1.7851903640220046,
"learning_rate": 4.7169907062438556e-06,
"loss": 0.7728,
"step": 332
},
{
"epoch": 0.5692373242437154,
"grad_norm": 1.6343651082668207,
"learning_rate": 4.657495036114731e-06,
"loss": 0.7721,
"step": 334
},
{
"epoch": 0.5726459309757137,
"grad_norm": 1.6586804458742679,
"learning_rate": 4.598048051552563e-06,
"loss": 0.8186,
"step": 336
},
{
"epoch": 0.576054537707712,
"grad_norm": 1.750348106992188,
"learning_rate": 4.53865820268349e-06,
"loss": 0.7503,
"step": 338
},
{
"epoch": 0.5794631444397103,
"grad_norm": 1.7165597335606,
"learning_rate": 4.479333931512064e-06,
"loss": 0.7915,
"step": 340
},
{
"epoch": 0.5828717511717085,
"grad_norm": 1.7582639909981337,
"learning_rate": 4.4200836707212525e-06,
"loss": 0.7754,
"step": 342
},
{
"epoch": 0.5862803579037068,
"grad_norm": 1.6838798515151536,
"learning_rate": 4.360915842473778e-06,
"loss": 0.7543,
"step": 344
},
{
"epoch": 0.5896889646357052,
"grad_norm": 1.4519358598788679,
"learning_rate": 4.301838857214944e-06,
"loss": 0.7629,
"step": 346
},
{
"epoch": 0.5930975713677035,
"grad_norm": 1.6951153457921704,
"learning_rate": 4.2428611124771184e-06,
"loss": 0.7919,
"step": 348
},
{
"epoch": 0.5965061780997017,
"grad_norm": 1.716470856323328,
"learning_rate": 4.183990991686071e-06,
"loss": 0.7853,
"step": 350
},
{
"epoch": 0.5999147848317,
"grad_norm": 1.8973005312026041,
"learning_rate": 4.125236862969304e-06,
"loss": 0.7695,
"step": 352
},
{
"epoch": 0.6033233915636983,
"grad_norm": 1.4862142786601558,
"learning_rate": 4.066607077966559e-06,
"loss": 0.7395,
"step": 354
},
{
"epoch": 0.6067319982956967,
"grad_norm": 1.8815334843344234,
"learning_rate": 4.008109970642665e-06,
"loss": 0.787,
"step": 356
},
{
"epoch": 0.610140605027695,
"grad_norm": 1.6378926667179599,
"learning_rate": 3.949753856102909e-06,
"loss": 0.7393,
"step": 358
},
{
"epoch": 0.6135492117596932,
"grad_norm": 1.7084737118259692,
"learning_rate": 3.891547029411074e-06,
"loss": 0.8053,
"step": 360
},
{
"epoch": 0.6169578184916915,
"grad_norm": 1.8544822094338145,
"learning_rate": 3.833497764410336e-06,
"loss": 0.7674,
"step": 362
},
{
"epoch": 0.6203664252236898,
"grad_norm": 1.7409266320356052,
"learning_rate": 3.7756143125471745e-06,
"loss": 0.7489,
"step": 364
},
{
"epoch": 0.6237750319556881,
"grad_norm": 1.8302377955954783,
"learning_rate": 3.7179049016984616e-06,
"loss": 0.7757,
"step": 366
},
{
"epoch": 0.6271836386876865,
"grad_norm": 1.6552784195481047,
"learning_rate": 3.660377735001911e-06,
"loss": 0.7656,
"step": 368
},
{
"epoch": 0.6305922454196847,
"grad_norm": 1.8034934811229115,
"learning_rate": 3.6030409896900333e-06,
"loss": 0.7731,
"step": 370
},
{
"epoch": 0.634000852151683,
"grad_norm": 1.656497367167841,
"learning_rate": 3.545902815927783e-06,
"loss": 0.7564,
"step": 372
},
{
"epoch": 0.6374094588836813,
"grad_norm": 1.6538858927486708,
"learning_rate": 3.4889713356540435e-06,
"loss": 0.7556,
"step": 374
},
{
"epoch": 0.6408180656156796,
"grad_norm": 1.7721211559316303,
"learning_rate": 3.432254641427136e-06,
"loss": 0.7754,
"step": 376
},
{
"epoch": 0.6442266723476778,
"grad_norm": 2.0169569259903946,
"learning_rate": 3.375760795274493e-06,
"loss": 0.7745,
"step": 378
},
{
"epoch": 0.6476352790796762,
"grad_norm": 1.5174685690539855,
"learning_rate": 3.3194978275466755e-06,
"loss": 0.7649,
"step": 380
},
{
"epoch": 0.6510438858116745,
"grad_norm": 1.5043926003321177,
"learning_rate": 3.2634737357758994e-06,
"loss": 0.74,
"step": 382
},
{
"epoch": 0.6544524925436728,
"grad_norm": 1.6692309306036763,
"learning_rate": 3.207696483539219e-06,
"loss": 0.7611,
"step": 384
},
{
"epoch": 0.657861099275671,
"grad_norm": 1.784881350588902,
"learning_rate": 3.152173999326537e-06,
"loss": 0.7675,
"step": 386
},
{
"epoch": 0.6612697060076693,
"grad_norm": 1.7444177474948184,
"learning_rate": 3.096914175413611e-06,
"loss": 0.7685,
"step": 388
},
{
"epoch": 0.6646783127396677,
"grad_norm": 1.999124406609706,
"learning_rate": 3.041924866740197e-06,
"loss": 0.7505,
"step": 390
},
{
"epoch": 0.668086919471666,
"grad_norm": 1.5350179140801363,
"learning_rate": 2.987213889793502e-06,
"loss": 0.7792,
"step": 392
},
{
"epoch": 0.6714955262036643,
"grad_norm": 1.7053442818016058,
"learning_rate": 2.9327890214971135e-06,
"loss": 0.7661,
"step": 394
},
{
"epoch": 0.6749041329356625,
"grad_norm": 1.6957720054975949,
"learning_rate": 2.878657998105533e-06,
"loss": 0.7587,
"step": 396
},
{
"epoch": 0.6783127396676608,
"grad_norm": 1.7615190253028947,
"learning_rate": 2.824828514104512e-06,
"loss": 0.8067,
"step": 398
},
{
"epoch": 0.6817213463996591,
"grad_norm": 1.5571276091989836,
"learning_rate": 2.771308221117309e-06,
"loss": 0.7478,
"step": 400
},
{
"epoch": 0.6851299531316575,
"grad_norm": 1.5280419708791328,
"learning_rate": 2.7181047268170436e-06,
"loss": 0.8071,
"step": 402
},
{
"epoch": 0.6885385598636558,
"grad_norm": 1.6771310372158614,
"learning_rate": 2.665225593845307e-06,
"loss": 0.7799,
"step": 404
},
{
"epoch": 0.691947166595654,
"grad_norm": 1.7562403180703012,
"learning_rate": 2.6126783387371545e-06,
"loss": 0.7471,
"step": 406
},
{
"epoch": 0.6953557733276523,
"grad_norm": 1.796003576907647,
"learning_rate": 2.5604704308526784e-06,
"loss": 0.7716,
"step": 408
},
{
"epoch": 0.6987643800596506,
"grad_norm": 1.6859622044955247,
"learning_rate": 2.5086092913152533e-06,
"loss": 0.8003,
"step": 410
},
{
"epoch": 0.702172986791649,
"grad_norm": 1.6580555467710547,
"learning_rate": 2.4571022919566747e-06,
"loss": 0.7463,
"step": 412
},
{
"epoch": 0.7055815935236472,
"grad_norm": 1.5775642264745176,
"learning_rate": 2.4059567542692688e-06,
"loss": 0.7329,
"step": 414
},
{
"epoch": 0.7089902002556455,
"grad_norm": 1.6145511797148977,
"learning_rate": 2.3551799483651894e-06,
"loss": 0.7557,
"step": 416
},
{
"epoch": 0.7123988069876438,
"grad_norm": 1.815641939119739,
"learning_rate": 2.3047790919429895e-06,
"loss": 0.7618,
"step": 418
},
{
"epoch": 0.7158074137196421,
"grad_norm": 1.751085671764699,
"learning_rate": 2.254761349261676e-06,
"loss": 0.7307,
"step": 420
},
{
"epoch": 0.7192160204516403,
"grad_norm": 1.722113083895161,
"learning_rate": 2.2051338301223187e-06,
"loss": 0.7536,
"step": 422
},
{
"epoch": 0.7226246271836387,
"grad_norm": 1.5887620089800378,
"learning_rate": 2.1559035888574433e-06,
"loss": 0.7139,
"step": 424
},
{
"epoch": 0.726033233915637,
"grad_norm": 1.792034913644711,
"learning_rate": 2.107077623328275e-06,
"loss": 0.7335,
"step": 426
},
{
"epoch": 0.7294418406476353,
"grad_norm": 1.5385202560838132,
"learning_rate": 2.0586628739300386e-06,
"loss": 0.7752,
"step": 428
},
{
"epoch": 0.7328504473796336,
"grad_norm": 1.554007093824859,
"learning_rate": 2.0106662226053924e-06,
"loss": 0.7814,
"step": 430
},
{
"epoch": 0.7362590541116318,
"grad_norm": 1.60530576083176,
"learning_rate": 1.9630944918662122e-06,
"loss": 0.7552,
"step": 432
},
{
"epoch": 0.7396676608436301,
"grad_norm": 1.5045851644291919,
"learning_rate": 1.9159544438237797e-06,
"loss": 0.7602,
"step": 434
},
{
"epoch": 0.7430762675756285,
"grad_norm": 1.6227515256198517,
"learning_rate": 1.8692527792276e-06,
"loss": 0.7695,
"step": 436
},
{
"epoch": 0.7464848743076268,
"grad_norm": 1.6302337283434762,
"learning_rate": 1.8229961365128985e-06,
"loss": 0.732,
"step": 438
},
{
"epoch": 0.749893481039625,
"grad_norm": 1.5834938000524268,
"learning_rate": 1.7771910908570156e-06,
"loss": 0.7566,
"step": 440
},
{
"epoch": 0.7533020877716233,
"grad_norm": 1.4483744882069087,
"learning_rate": 1.7318441532447555e-06,
"loss": 0.8073,
"step": 442
},
{
"epoch": 0.7567106945036216,
"grad_norm": 1.6825397102814452,
"learning_rate": 1.6869617695429024e-06,
"loss": 0.7579,
"step": 444
},
{
"epoch": 0.76011930123562,
"grad_norm": 1.5595787362408242,
"learning_rate": 1.6425503195839416e-06,
"loss": 0.8201,
"step": 446
},
{
"epoch": 0.7635279079676183,
"grad_norm": 1.6860253511961054,
"learning_rate": 1.5986161162592173e-06,
"loss": 0.7515,
"step": 448
},
{
"epoch": 0.7669365146996165,
"grad_norm": 1.6217349191290344,
"learning_rate": 1.555165404621567e-06,
"loss": 0.7684,
"step": 450
},
{
"epoch": 0.7703451214316148,
"grad_norm": 1.7202733697764292,
"learning_rate": 1.5122043609976282e-06,
"loss": 0.7335,
"step": 452
},
{
"epoch": 0.7737537281636131,
"grad_norm": 1.7746564761962131,
"learning_rate": 1.4697390921098886e-06,
"loss": 0.7848,
"step": 454
},
{
"epoch": 0.7771623348956114,
"grad_norm": 1.5349803625176617,
"learning_rate": 1.4277756342086524e-06,
"loss": 0.7757,
"step": 456
},
{
"epoch": 0.7805709416276098,
"grad_norm": 1.6124010704416651,
"learning_rate": 1.3863199522140053e-06,
"loss": 0.8031,
"step": 458
},
{
"epoch": 0.783979548359608,
"grad_norm": 1.7539891598801893,
"learning_rate": 1.3453779388679378e-06,
"loss": 0.7826,
"step": 460
},
{
"epoch": 0.7873881550916063,
"grad_norm": 1.6197274527555163,
"learning_rate": 1.3049554138967052e-06,
"loss": 0.7706,
"step": 462
},
{
"epoch": 0.7907967618236046,
"grad_norm": 1.5679920515099621,
"learning_rate": 1.2650581231835923e-06,
"loss": 0.7943,
"step": 464
},
{
"epoch": 0.7942053685556029,
"grad_norm": 1.6044331803289653,
"learning_rate": 1.225691737952152e-06,
"loss": 0.7558,
"step": 466
},
{
"epoch": 0.7976139752876011,
"grad_norm": 1.5903299233431156,
"learning_rate": 1.1868618539600718e-06,
"loss": 0.767,
"step": 468
},
{
"epoch": 0.8010225820195995,
"grad_norm": 1.5362855174389376,
"learning_rate": 1.1485739907037546e-06,
"loss": 0.7838,
"step": 470
},
{
"epoch": 0.8044311887515978,
"grad_norm": 1.6430906694597753,
"learning_rate": 1.1108335906337559e-06,
"loss": 0.7519,
"step": 472
},
{
"epoch": 0.8078397954835961,
"grad_norm": 1.7499785715417322,
"learning_rate": 1.0736460183811547e-06,
"loss": 0.7681,
"step": 474
},
{
"epoch": 0.8112484022155944,
"grad_norm": 1.7219048713168092,
"learning_rate": 1.0370165599950026e-06,
"loss": 0.7552,
"step": 476
},
{
"epoch": 0.8146570089475926,
"grad_norm": 1.797003265591149,
"learning_rate": 1.0009504221909284e-06,
"loss": 0.7336,
"step": 478
},
{
"epoch": 0.818065615679591,
"grad_norm": 1.322267929113636,
"learning_rate": 9.654527316110323e-07,
"loss": 0.7478,
"step": 480
},
{
"epoch": 0.8214742224115893,
"grad_norm": 1.6060348997953555,
"learning_rate": 9.305285340951509e-07,
"loss": 0.7693,
"step": 482
},
{
"epoch": 0.8248828291435876,
"grad_norm": 1.589739100394995,
"learning_rate": 8.961827939636198e-07,
"loss": 0.762,
"step": 484
},
{
"epoch": 0.8282914358755858,
"grad_norm": 1.5977203911776114,
"learning_rate": 8.624203933116088e-07,
"loss": 0.7517,
"step": 486
},
{
"epoch": 0.8317000426075841,
"grad_norm": 1.4433282291047127,
"learning_rate": 8.292461313151662e-07,
"loss": 0.7814,
"step": 488
},
{
"epoch": 0.8351086493395824,
"grad_norm": 1.5861673926723674,
"learning_rate": 7.966647235490222e-07,
"loss": 0.7988,
"step": 490
},
{
"epoch": 0.8385172560715808,
"grad_norm": 1.7785118279929921,
"learning_rate": 7.646808013163026e-07,
"loss": 0.7871,
"step": 492
},
{
"epoch": 0.8419258628035791,
"grad_norm": 1.4097703537190163,
"learning_rate": 7.332989109902028e-07,
"loss": 0.7471,
"step": 494
},
{
"epoch": 0.8453344695355773,
"grad_norm": 1.6394050318483955,
"learning_rate": 7.025235133677455e-07,
"loss": 0.7793,
"step": 496
},
{
"epoch": 0.8487430762675756,
"grad_norm": 1.4882317995688061,
"learning_rate": 6.723589830356925e-07,
"loss": 0.7587,
"step": 498
},
{
"epoch": 0.8521516829995739,
"grad_norm": 1.5993598547748789,
"learning_rate": 6.428096077487244e-07,
"loss": 0.7256,
"step": 500
},
{
"epoch": 0.8555602897315722,
"grad_norm": 1.8369103578526638,
"learning_rate": 6.138795878199467e-07,
"loss": 0.7346,
"step": 502
},
{
"epoch": 0.8589688964635706,
"grad_norm": 1.3809387771101223,
"learning_rate": 5.855730355238415e-07,
"loss": 0.7389,
"step": 504
},
{
"epoch": 0.8623775031955688,
"grad_norm": 1.606520808055538,
"learning_rate": 5.578939745117173e-07,
"loss": 0.7626,
"step": 506
},
{
"epoch": 0.8657861099275671,
"grad_norm": 1.4252640716496985,
"learning_rate": 5.308463392397756e-07,
"loss": 0.7471,
"step": 508
},
{
"epoch": 0.8691947166595654,
"grad_norm": 1.5620326120060357,
"learning_rate": 5.044339744098348e-07,
"loss": 0.7919,
"step": 510
},
{
"epoch": 0.8726033233915637,
"grad_norm": 1.582860405051893,
"learning_rate": 4.786606344228317e-07,
"loss": 0.8001,
"step": 512
},
{
"epoch": 0.876011930123562,
"grad_norm": 1.6390377107564265,
"learning_rate": 4.5352998284514004e-07,
"loss": 0.7693,
"step": 514
},
{
"epoch": 0.8794205368555603,
"grad_norm": 1.5740989498771922,
"learning_rate": 4.2904559188782334e-07,
"loss": 0.743,
"step": 516
},
{
"epoch": 0.8828291435875586,
"grad_norm": 1.6277724611514495,
"learning_rate": 4.05210941898847e-07,
"loss": 0.7204,
"step": 518
},
{
"epoch": 0.8862377503195569,
"grad_norm": 1.4964610352537944,
"learning_rate": 3.82029420868375e-07,
"loss": 0.7572,
"step": 520
},
{
"epoch": 0.8896463570515551,
"grad_norm": 1.5631623729163737,
"learning_rate": 3.5950432394717296e-07,
"loss": 0.7451,
"step": 522
},
{
"epoch": 0.8930549637835534,
"grad_norm": 1.6364549935760584,
"learning_rate": 3.3763885297822153e-07,
"loss": 0.7748,
"step": 524
},
{
"epoch": 0.8964635705155518,
"grad_norm": 1.901698775456035,
"learning_rate": 3.1643611604158687e-07,
"loss": 0.7467,
"step": 526
},
{
"epoch": 0.8998721772475501,
"grad_norm": 1.6649529271504364,
"learning_rate": 2.958991270126221e-07,
"loss": 0.7316,
"step": 528
},
{
"epoch": 0.9032807839795484,
"grad_norm": 1.7118286972837617,
"learning_rate": 2.760308051335564e-07,
"loss": 0.7546,
"step": 530
},
{
"epoch": 0.9066893907115466,
"grad_norm": 1.5718606351870987,
"learning_rate": 2.5683397459854045e-07,
"loss": 0.7064,
"step": 532
},
{
"epoch": 0.9100979974435449,
"grad_norm": 1.3894669570974365,
"learning_rate": 2.383113641521956e-07,
"loss": 0.747,
"step": 534
},
{
"epoch": 0.9135066041755433,
"grad_norm": 1.4787818567245852,
"learning_rate": 2.2046560670173912e-07,
"loss": 0.7447,
"step": 536
},
{
"epoch": 0.9169152109075416,
"grad_norm": 1.6746803954544742,
"learning_rate": 2.0329923894272463e-07,
"loss": 0.7329,
"step": 538
},
{
"epoch": 0.9203238176395399,
"grad_norm": 1.6476225947328762,
"learning_rate": 1.8681470099846554e-07,
"loss": 0.7494,
"step": 540
},
{
"epoch": 0.9237324243715381,
"grad_norm": 1.4591813987858449,
"learning_rate": 1.710143360731781e-07,
"loss": 0.7251,
"step": 542
},
{
"epoch": 0.9271410311035364,
"grad_norm": 1.6247674197735347,
"learning_rate": 1.559003901189099e-07,
"loss": 0.7689,
"step": 544
},
{
"epoch": 0.9305496378355347,
"grad_norm": 1.7245251671767248,
"learning_rate": 1.4147501151628273e-07,
"loss": 0.7592,
"step": 546
},
{
"epoch": 0.9339582445675331,
"grad_norm": 1.4107246676973044,
"learning_rate": 1.2774025076911445e-07,
"loss": 0.7369,
"step": 548
},
{
"epoch": 0.9373668512995313,
"grad_norm": 1.6265296555944986,
"learning_rate": 1.146980602129466e-07,
"loss": 0.735,
"step": 550
},
{
"epoch": 0.9407754580315296,
"grad_norm": 1.7696409777572968,
"learning_rate": 1.0235029373752758e-07,
"loss": 0.7895,
"step": 552
},
{
"epoch": 0.9441840647635279,
"grad_norm": 1.6245561044198507,
"learning_rate": 9.069870652329283e-08,
"loss": 0.8128,
"step": 554
},
{
"epoch": 0.9475926714955262,
"grad_norm": 1.5989317111370667,
"learning_rate": 7.974495479187271e-08,
"loss": 0.7438,
"step": 556
},
{
"epoch": 0.9510012782275244,
"grad_norm": 1.4760047355170607,
"learning_rate": 6.949059557066596e-08,
"loss": 0.7748,
"step": 558
},
{
"epoch": 0.9544098849595228,
"grad_norm": 1.7501965543108127,
"learning_rate": 5.993708647151874e-08,
"loss": 0.775,
"step": 560
},
{
"epoch": 0.9578184916915211,
"grad_norm": 1.4579885728200068,
"learning_rate": 5.108578548352838e-08,
"loss": 0.7588,
"step": 562
},
{
"epoch": 0.9612270984235194,
"grad_norm": 1.5049680408512587,
"learning_rate": 4.2937950780013174e-08,
"loss": 0.7671,
"step": 564
},
{
"epoch": 0.9646357051555177,
"grad_norm": 1.6438976356754593,
"learning_rate": 3.549474053966828e-08,
"loss": 0.7908,
"step": 566
},
{
"epoch": 0.9680443118875159,
"grad_norm": 1.4470095982360724,
"learning_rate": 2.8757212781935218e-08,
"loss": 0.7466,
"step": 568
},
{
"epoch": 0.9714529186195143,
"grad_norm": 1.8960810552290914,
"learning_rate": 2.2726325216608224e-08,
"loss": 0.7697,
"step": 570
},
{
"epoch": 0.9748615253515126,
"grad_norm": 1.6811694769201933,
"learning_rate": 1.7402935107702634e-08,
"loss": 0.7753,
"step": 572
},
{
"epoch": 0.9782701320835109,
"grad_norm": 1.487143334303855,
"learning_rate": 1.2787799151596225e-08,
"loss": 0.7234,
"step": 574
},
{
"epoch": 0.9816787388155092,
"grad_norm": 1.3834640793692283,
"learning_rate": 8.881573369469153e-09,
"loss": 0.7403,
"step": 576
},
{
"epoch": 0.9850873455475074,
"grad_norm": 1.6803671958080162,
"learning_rate": 5.684813014052437e-09,
"loss": 0.7659,
"step": 578
},
{
"epoch": 0.9884959522795057,
"grad_norm": 1.556690011938104,
"learning_rate": 3.197972490704415e-09,
"loss": 0.7572,
"step": 580
},
{
"epoch": 0.9919045590115041,
"grad_norm": 1.7044244176997279,
"learning_rate": 1.421405292813538e-09,
"loss": 0.7572,
"step": 582
},
{
"epoch": 0.9953131657435024,
"grad_norm": 1.5088989927512526,
"learning_rate": 3.553639515574414e-10,
"loss": 0.752,
"step": 584
},
{
"epoch": 0.9987217724755006,
"grad_norm": 1.4553290863162762,
"learning_rate": 0.0,
"loss": 0.7504,
"step": 586
},
{
"epoch": 0.9987217724755006,
"step": 586,
"total_flos": 6.719053758464e+16,
"train_loss": 0.8281301904863871,
"train_runtime": 9052.2037,
"train_samples_per_second": 1.037,
"train_steps_per_second": 0.065
}
],
"logging_steps": 2,
"max_steps": 586,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.719053758464e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}