30epoch29feb / trainer_state.json
RadAlienware's picture
checkpoint-800-uploaded-manually
6149692 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.62962962962963,
"eval_steps": 500,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 1.999970808109463e-05,
"loss": 5.1275,
"step": 2
},
{
"epoch": 0.15,
"learning_rate": 1.9998814492475354e-05,
"loss": 4.575,
"step": 4
},
{
"epoch": 0.22,
"learning_rate": 1.9997319220216543e-05,
"loss": 3.7996,
"step": 6
},
{
"epoch": 0.3,
"learning_rate": 1.999522235429695e-05,
"loss": 3.5835,
"step": 8
},
{
"epoch": 0.37,
"learning_rate": 1.9992524020896517e-05,
"loss": 3.185,
"step": 10
},
{
"epoch": 0.44,
"learning_rate": 1.9989224382388813e-05,
"loss": 2.7431,
"step": 12
},
{
"epoch": 0.52,
"learning_rate": 1.9985323637331214e-05,
"loss": 2.6319,
"step": 14
},
{
"epoch": 0.59,
"learning_rate": 1.9980822020453007e-05,
"loss": 2.4273,
"step": 16
},
{
"epoch": 0.67,
"learning_rate": 1.9975719802641226e-05,
"loss": 2.3277,
"step": 18
},
{
"epoch": 0.74,
"learning_rate": 1.9970017290924365e-05,
"loss": 2.2614,
"step": 20
},
{
"epoch": 0.81,
"learning_rate": 1.99637148284539e-05,
"loss": 2.2018,
"step": 22
},
{
"epoch": 0.89,
"learning_rate": 1.9956812794483658e-05,
"loss": 2.0017,
"step": 24
},
{
"epoch": 0.96,
"learning_rate": 1.9949311604346965e-05,
"loss": 2.0208,
"step": 26
},
{
"epoch": 1.04,
"learning_rate": 1.994121170943167e-05,
"loss": 1.8773,
"step": 28
},
{
"epoch": 1.11,
"learning_rate": 1.993251359715298e-05,
"loss": 1.7932,
"step": 30
},
{
"epoch": 1.19,
"learning_rate": 1.992321779092414e-05,
"loss": 1.806,
"step": 32
},
{
"epoch": 1.26,
"learning_rate": 1.991332485012491e-05,
"loss": 1.731,
"step": 34
},
{
"epoch": 1.33,
"learning_rate": 1.9902835370067927e-05,
"loss": 1.7323,
"step": 36
},
{
"epoch": 1.41,
"learning_rate": 1.989174998196288e-05,
"loss": 1.7465,
"step": 38
},
{
"epoch": 1.48,
"learning_rate": 1.9880069352878508e-05,
"loss": 1.7926,
"step": 40
},
{
"epoch": 1.56,
"learning_rate": 1.986779418570249e-05,
"loss": 1.7291,
"step": 42
},
{
"epoch": 1.63,
"learning_rate": 1.9854925219099116e-05,
"loss": 1.716,
"step": 44
},
{
"epoch": 1.7,
"learning_rate": 1.9841463227464862e-05,
"loss": 1.5832,
"step": 46
},
{
"epoch": 1.78,
"learning_rate": 1.9827409020881775e-05,
"loss": 1.6131,
"step": 48
},
{
"epoch": 1.85,
"learning_rate": 1.9812763445068725e-05,
"loss": 1.6786,
"step": 50
},
{
"epoch": 1.93,
"learning_rate": 1.979752738133054e-05,
"loss": 1.5621,
"step": 52
},
{
"epoch": 2.0,
"learning_rate": 1.9781701746504926e-05,
"loss": 1.6118,
"step": 54
},
{
"epoch": 2.07,
"learning_rate": 1.9765287492907347e-05,
"loss": 1.4956,
"step": 56
},
{
"epoch": 2.15,
"learning_rate": 1.974828560827368e-05,
"loss": 1.5483,
"step": 58
},
{
"epoch": 2.22,
"learning_rate": 1.97306971157008e-05,
"loss": 1.4634,
"step": 60
},
{
"epoch": 2.3,
"learning_rate": 1.9712523073585e-05,
"loss": 1.5822,
"step": 62
},
{
"epoch": 2.37,
"learning_rate": 1.969376457555831e-05,
"loss": 1.4255,
"step": 64
},
{
"epoch": 2.44,
"learning_rate": 1.967442275042269e-05,
"loss": 1.5571,
"step": 66
},
{
"epoch": 2.52,
"learning_rate": 1.9654498762082096e-05,
"loss": 1.4493,
"step": 68
},
{
"epoch": 2.59,
"learning_rate": 1.963399380947244e-05,
"loss": 1.4962,
"step": 70
},
{
"epoch": 2.67,
"learning_rate": 1.9612909126489457e-05,
"loss": 1.445,
"step": 72
},
{
"epoch": 2.74,
"learning_rate": 1.9591245981914446e-05,
"loss": 1.5671,
"step": 74
},
{
"epoch": 2.81,
"learning_rate": 1.9569005679337905e-05,
"loss": 1.4461,
"step": 76
},
{
"epoch": 2.89,
"learning_rate": 1.9546189557081124e-05,
"loss": 1.4698,
"step": 78
},
{
"epoch": 2.96,
"learning_rate": 1.952279898811561e-05,
"loss": 1.5083,
"step": 80
},
{
"epoch": 3.04,
"learning_rate": 1.949883537998049e-05,
"loss": 1.382,
"step": 82
},
{
"epoch": 3.11,
"learning_rate": 1.947430017469783e-05,
"loss": 1.3836,
"step": 84
},
{
"epoch": 3.19,
"learning_rate": 1.9449194848685807e-05,
"loss": 1.386,
"step": 86
},
{
"epoch": 3.26,
"learning_rate": 1.9423520912669913e-05,
"loss": 1.4059,
"step": 88
},
{
"epoch": 3.33,
"learning_rate": 1.9397279911592034e-05,
"loss": 1.3551,
"step": 90
},
{
"epoch": 3.41,
"learning_rate": 1.9370473424517463e-05,
"loss": 1.2524,
"step": 92
},
{
"epoch": 3.48,
"learning_rate": 1.9343103064539915e-05,
"loss": 1.3327,
"step": 94
},
{
"epoch": 3.56,
"learning_rate": 1.9315170478684423e-05,
"loss": 1.3513,
"step": 96
},
{
"epoch": 3.63,
"learning_rate": 1.928667734780825e-05,
"loss": 1.3488,
"step": 98
},
{
"epoch": 3.7,
"learning_rate": 1.9257625386499716e-05,
"loss": 1.372,
"step": 100
},
{
"epoch": 3.78,
"learning_rate": 1.922801634297507e-05,
"loss": 1.298,
"step": 102
},
{
"epoch": 3.85,
"learning_rate": 1.9197851998973237e-05,
"loss": 1.3144,
"step": 104
},
{
"epoch": 3.93,
"learning_rate": 1.916713416964864e-05,
"loss": 1.3531,
"step": 106
},
{
"epoch": 4.0,
"learning_rate": 1.9135864703461943e-05,
"loss": 1.3806,
"step": 108
},
{
"epoch": 4.07,
"learning_rate": 1.910404548206884e-05,
"loss": 1.2281,
"step": 110
},
{
"epoch": 4.15,
"learning_rate": 1.907167842020683e-05,
"loss": 1.2588,
"step": 112
},
{
"epoch": 4.22,
"learning_rate": 1.903876546557996e-05,
"loss": 1.2666,
"step": 114
},
{
"epoch": 4.3,
"learning_rate": 1.9005308598741662e-05,
"loss": 1.213,
"step": 116
},
{
"epoch": 4.37,
"learning_rate": 1.897130983297555e-05,
"loss": 1.187,
"step": 118
},
{
"epoch": 4.44,
"learning_rate": 1.8936771214174292e-05,
"loss": 1.1696,
"step": 120
},
{
"epoch": 4.52,
"learning_rate": 1.8901694820716456e-05,
"loss": 1.1778,
"step": 122
},
{
"epoch": 4.59,
"learning_rate": 1.886608276334148e-05,
"loss": 1.2148,
"step": 124
},
{
"epoch": 4.67,
"learning_rate": 1.882993718502265e-05,
"loss": 1.1771,
"step": 126
},
{
"epoch": 4.74,
"learning_rate": 1.8793260260838127e-05,
"loss": 1.2162,
"step": 128
},
{
"epoch": 4.81,
"learning_rate": 1.875605419784009e-05,
"loss": 1.213,
"step": 130
},
{
"epoch": 4.89,
"learning_rate": 1.8718321234921895e-05,
"loss": 1.2071,
"step": 132
},
{
"epoch": 4.96,
"learning_rate": 1.868006364268337e-05,
"loss": 1.1868,
"step": 134
},
{
"epoch": 5.04,
"learning_rate": 1.8641283723294177e-05,
"loss": 1.1492,
"step": 136
},
{
"epoch": 5.11,
"learning_rate": 1.8601983810355265e-05,
"loss": 1.1094,
"step": 138
},
{
"epoch": 5.19,
"learning_rate": 1.8562166268758462e-05,
"loss": 1.1069,
"step": 140
},
{
"epoch": 5.26,
"learning_rate": 1.8521833494544153e-05,
"loss": 1.1004,
"step": 142
},
{
"epoch": 5.33,
"learning_rate": 1.848098791475711e-05,
"loss": 1.0538,
"step": 144
},
{
"epoch": 5.41,
"learning_rate": 1.8439631987300418e-05,
"loss": 1.0556,
"step": 146
},
{
"epoch": 5.48,
"learning_rate": 1.8397768200787613e-05,
"loss": 1.0134,
"step": 148
},
{
"epoch": 5.56,
"learning_rate": 1.8355399074392875e-05,
"loss": 1.0615,
"step": 150
},
{
"epoch": 5.63,
"learning_rate": 1.8312527157699488e-05,
"loss": 1.0209,
"step": 152
},
{
"epoch": 5.7,
"learning_rate": 1.826915503054637e-05,
"loss": 1.0104,
"step": 154
},
{
"epoch": 5.78,
"learning_rate": 1.8225285302872872e-05,
"loss": 1.0017,
"step": 156
},
{
"epoch": 5.85,
"learning_rate": 1.818092061456169e-05,
"loss": 1.0131,
"step": 158
},
{
"epoch": 5.93,
"learning_rate": 1.8136063635280036e-05,
"loss": 1.0151,
"step": 160
},
{
"epoch": 6.0,
"learning_rate": 1.8090717064318958e-05,
"loss": 0.9734,
"step": 162
},
{
"epoch": 6.07,
"learning_rate": 1.804488363043094e-05,
"loss": 0.8721,
"step": 164
},
{
"epoch": 6.15,
"learning_rate": 1.799856609166568e-05,
"loss": 0.885,
"step": 166
},
{
"epoch": 6.22,
"learning_rate": 1.7951767235204142e-05,
"loss": 0.8312,
"step": 168
},
{
"epoch": 6.3,
"learning_rate": 1.79044898771908e-05,
"loss": 0.8543,
"step": 170
},
{
"epoch": 6.37,
"learning_rate": 1.7856736862564208e-05,
"loss": 0.8782,
"step": 172
},
{
"epoch": 6.44,
"learning_rate": 1.7808511064885798e-05,
"loss": 0.8549,
"step": 174
},
{
"epoch": 6.52,
"learning_rate": 1.7759815386166945e-05,
"loss": 0.795,
"step": 176
},
{
"epoch": 6.59,
"learning_rate": 1.771065275669436e-05,
"loss": 0.883,
"step": 178
},
{
"epoch": 6.67,
"learning_rate": 1.766102613485375e-05,
"loss": 0.8116,
"step": 180
},
{
"epoch": 6.74,
"learning_rate": 1.761093850695178e-05,
"loss": 0.8215,
"step": 182
},
{
"epoch": 6.81,
"learning_rate": 1.75603928870364e-05,
"loss": 0.8416,
"step": 184
},
{
"epoch": 6.89,
"learning_rate": 1.7509392316715445e-05,
"loss": 0.7994,
"step": 186
},
{
"epoch": 6.96,
"learning_rate": 1.7457939864973625e-05,
"loss": 0.8312,
"step": 188
},
{
"epoch": 7.04,
"learning_rate": 1.7406038627987835e-05,
"loss": 0.7368,
"step": 190
},
{
"epoch": 7.11,
"learning_rate": 1.7353691728940852e-05,
"loss": 0.6201,
"step": 192
},
{
"epoch": 7.19,
"learning_rate": 1.7300902317833377e-05,
"loss": 0.6334,
"step": 194
},
{
"epoch": 7.26,
"learning_rate": 1.724767357129451e-05,
"loss": 0.6199,
"step": 196
},
{
"epoch": 7.33,
"learning_rate": 1.7194008692390566e-05,
"loss": 0.6208,
"step": 198
},
{
"epoch": 7.41,
"learning_rate": 1.7139910910432353e-05,
"loss": 0.6039,
"step": 200
},
{
"epoch": 7.48,
"learning_rate": 1.708538348078082e-05,
"loss": 0.5737,
"step": 202
},
{
"epoch": 7.56,
"learning_rate": 1.7030429684651203e-05,
"loss": 0.6267,
"step": 204
},
{
"epoch": 7.63,
"learning_rate": 1.6975052828915534e-05,
"loss": 0.6654,
"step": 206
},
{
"epoch": 7.7,
"learning_rate": 1.6919256245903675e-05,
"loss": 0.6041,
"step": 208
},
{
"epoch": 7.78,
"learning_rate": 1.686304329320278e-05,
"loss": 0.6349,
"step": 210
},
{
"epoch": 7.85,
"learning_rate": 1.6806417353455267e-05,
"loss": 0.6032,
"step": 212
},
{
"epoch": 7.93,
"learning_rate": 1.674938183415525e-05,
"loss": 0.6501,
"step": 214
},
{
"epoch": 8.0,
"learning_rate": 1.6691940167443487e-05,
"loss": 0.5961,
"step": 216
},
{
"epoch": 8.07,
"learning_rate": 1.6634095809900873e-05,
"loss": 0.4455,
"step": 218
},
{
"epoch": 8.15,
"learning_rate": 1.657585224234042e-05,
"loss": 0.4305,
"step": 220
},
{
"epoch": 8.22,
"learning_rate": 1.6517212969597793e-05,
"loss": 0.4517,
"step": 222
},
{
"epoch": 8.3,
"learning_rate": 1.6458181520320426e-05,
"loss": 0.4096,
"step": 224
},
{
"epoch": 8.37,
"learning_rate": 1.639876144675516e-05,
"loss": 0.4079,
"step": 226
},
{
"epoch": 8.44,
"learning_rate": 1.6338956324534514e-05,
"loss": 0.4153,
"step": 228
},
{
"epoch": 8.52,
"learning_rate": 1.6278769752461477e-05,
"loss": 0.3869,
"step": 230
},
{
"epoch": 8.59,
"learning_rate": 1.6218205352292984e-05,
"loss": 0.3988,
"step": 232
},
{
"epoch": 8.67,
"learning_rate": 1.6157266768521967e-05,
"loss": 0.4212,
"step": 234
},
{
"epoch": 8.74,
"learning_rate": 1.6095957668158043e-05,
"loss": 0.4137,
"step": 236
},
{
"epoch": 8.81,
"learning_rate": 1.6034281740506846e-05,
"loss": 0.4265,
"step": 238
},
{
"epoch": 8.89,
"learning_rate": 1.5972242696948024e-05,
"loss": 0.3991,
"step": 240
},
{
"epoch": 8.96,
"learning_rate": 1.59098442707119e-05,
"loss": 0.384,
"step": 242
},
{
"epoch": 9.04,
"learning_rate": 1.584709021665485e-05,
"loss": 0.3623,
"step": 244
},
{
"epoch": 9.11,
"learning_rate": 1.5783984311033296e-05,
"loss": 0.2732,
"step": 246
},
{
"epoch": 9.19,
"learning_rate": 1.5720530351276542e-05,
"loss": 0.2461,
"step": 248
},
{
"epoch": 9.26,
"learning_rate": 1.565673215575819e-05,
"loss": 0.266,
"step": 250
},
{
"epoch": 9.33,
"learning_rate": 1.5592593563566424e-05,
"loss": 0.2524,
"step": 252
},
{
"epoch": 9.41,
"learning_rate": 1.552811843427295e-05,
"loss": 0.2587,
"step": 254
},
{
"epoch": 9.48,
"learning_rate": 1.5463310647700777e-05,
"loss": 0.2538,
"step": 256
},
{
"epoch": 9.56,
"learning_rate": 1.5398174103690717e-05,
"loss": 0.2541,
"step": 258
},
{
"epoch": 9.63,
"learning_rate": 1.5332712721866735e-05,
"loss": 0.2695,
"step": 260
},
{
"epoch": 9.7,
"learning_rate": 1.5266930441400066e-05,
"loss": 0.2887,
"step": 262
},
{
"epoch": 9.78,
"learning_rate": 1.5200831220772187e-05,
"loss": 0.2753,
"step": 264
},
{
"epoch": 9.85,
"learning_rate": 1.5134419037536603e-05,
"loss": 0.2613,
"step": 266
},
{
"epoch": 9.93,
"learning_rate": 1.5067697888079496e-05,
"loss": 0.2655,
"step": 268
},
{
"epoch": 10.0,
"learning_rate": 1.5000671787379257e-05,
"loss": 0.2828,
"step": 270
},
{
"epoch": 10.07,
"learning_rate": 1.4933344768764857e-05,
"loss": 0.2013,
"step": 272
},
{
"epoch": 10.15,
"learning_rate": 1.4865720883673153e-05,
"loss": 0.194,
"step": 274
},
{
"epoch": 10.22,
"learning_rate": 1.4797804201405099e-05,
"loss": 0.1917,
"step": 276
},
{
"epoch": 10.3,
"learning_rate": 1.472959880888086e-05,
"loss": 0.1783,
"step": 278
},
{
"epoch": 10.37,
"learning_rate": 1.4661108810393883e-05,
"loss": 0.1992,
"step": 280
},
{
"epoch": 10.44,
"learning_rate": 1.4592338327363917e-05,
"loss": 0.1804,
"step": 282
},
{
"epoch": 10.52,
"learning_rate": 1.4523291498089021e-05,
"loss": 0.1834,
"step": 284
},
{
"epoch": 10.59,
"learning_rate": 1.4453972477496509e-05,
"loss": 0.1938,
"step": 286
},
{
"epoch": 10.67,
"learning_rate": 1.4384385436892963e-05,
"loss": 0.1756,
"step": 288
},
{
"epoch": 10.74,
"learning_rate": 1.4314534563713186e-05,
"loss": 0.1928,
"step": 290
},
{
"epoch": 10.81,
"learning_rate": 1.4244424061268239e-05,
"loss": 0.2066,
"step": 292
},
{
"epoch": 10.89,
"learning_rate": 1.4174058148492515e-05,
"loss": 0.2115,
"step": 294
},
{
"epoch": 10.96,
"learning_rate": 1.4103441059689843e-05,
"loss": 0.2111,
"step": 296
},
{
"epoch": 11.04,
"learning_rate": 1.4032577044278695e-05,
"loss": 0.1766,
"step": 298
},
{
"epoch": 11.11,
"learning_rate": 1.3961470366536472e-05,
"loss": 0.1426,
"step": 300
},
{
"epoch": 11.19,
"learning_rate": 1.3890125305342905e-05,
"loss": 0.1705,
"step": 302
},
{
"epoch": 11.26,
"learning_rate": 1.3818546153922567e-05,
"loss": 0.1591,
"step": 304
},
{
"epoch": 11.33,
"learning_rate": 1.374673721958653e-05,
"loss": 0.1579,
"step": 306
},
{
"epoch": 11.41,
"learning_rate": 1.3674702823473165e-05,
"loss": 0.1579,
"step": 308
},
{
"epoch": 11.48,
"learning_rate": 1.3602447300288114e-05,
"loss": 0.1607,
"step": 310
},
{
"epoch": 11.56,
"learning_rate": 1.3529974998043453e-05,
"loss": 0.1569,
"step": 312
},
{
"epoch": 11.63,
"learning_rate": 1.345729027779605e-05,
"loss": 0.1809,
"step": 314
},
{
"epoch": 11.7,
"learning_rate": 1.3384397513385126e-05,
"loss": 0.1573,
"step": 316
},
{
"epoch": 11.78,
"learning_rate": 1.3311301091169067e-05,
"loss": 0.1686,
"step": 318
},
{
"epoch": 11.85,
"learning_rate": 1.3238005409761468e-05,
"loss": 0.1827,
"step": 320
},
{
"epoch": 11.93,
"learning_rate": 1.3164514879766443e-05,
"loss": 0.1791,
"step": 322
},
{
"epoch": 12.0,
"learning_rate": 1.309083392351322e-05,
"loss": 0.1692,
"step": 324
},
{
"epoch": 12.07,
"learning_rate": 1.3016966974790018e-05,
"loss": 0.1352,
"step": 326
},
{
"epoch": 12.15,
"learning_rate": 1.294291847857725e-05,
"loss": 0.1432,
"step": 328
},
{
"epoch": 12.22,
"learning_rate": 1.2868692890780036e-05,
"loss": 0.139,
"step": 330
},
{
"epoch": 12.3,
"learning_rate": 1.2794294677960073e-05,
"loss": 0.1614,
"step": 332
},
{
"epoch": 12.37,
"learning_rate": 1.2719728317066853e-05,
"loss": 0.1562,
"step": 334
},
{
"epoch": 12.44,
"learning_rate": 1.2644998295168257e-05,
"loss": 0.1419,
"step": 336
},
{
"epoch": 12.52,
"learning_rate": 1.2570109109180561e-05,
"loss": 0.1649,
"step": 338
},
{
"epoch": 12.59,
"learning_rate": 1.249506526559781e-05,
"loss": 0.1539,
"step": 340
},
{
"epoch": 12.67,
"learning_rate": 1.2419871280220646e-05,
"loss": 0.1572,
"step": 342
},
{
"epoch": 12.74,
"learning_rate": 1.2344531677884572e-05,
"loss": 0.1558,
"step": 344
},
{
"epoch": 12.81,
"learning_rate": 1.2269050992187666e-05,
"loss": 0.1521,
"step": 346
},
{
"epoch": 12.89,
"learning_rate": 1.219343376521776e-05,
"loss": 0.153,
"step": 348
},
{
"epoch": 12.96,
"learning_rate": 1.211768454727913e-05,
"loss": 0.1582,
"step": 350
},
{
"epoch": 13.04,
"learning_rate": 1.2041807896618676e-05,
"loss": 0.1504,
"step": 352
},
{
"epoch": 13.11,
"learning_rate": 1.1965808379151622e-05,
"loss": 0.1386,
"step": 354
},
{
"epoch": 13.19,
"learning_rate": 1.1889690568186767e-05,
"loss": 0.1438,
"step": 356
},
{
"epoch": 13.26,
"learning_rate": 1.1813459044151276e-05,
"loss": 0.1299,
"step": 358
},
{
"epoch": 13.33,
"learning_rate": 1.1737118394315052e-05,
"loss": 0.1398,
"step": 360
},
{
"epoch": 13.41,
"learning_rate": 1.1660673212514708e-05,
"loss": 0.1417,
"step": 362
},
{
"epoch": 13.48,
"learning_rate": 1.1584128098877112e-05,
"loss": 0.1463,
"step": 364
},
{
"epoch": 13.56,
"learning_rate": 1.1507487659542578e-05,
"loss": 0.1483,
"step": 366
},
{
"epoch": 13.63,
"learning_rate": 1.1430756506387699e-05,
"loss": 0.1514,
"step": 368
},
{
"epoch": 13.7,
"learning_rate": 1.135393925674781e-05,
"loss": 0.1428,
"step": 370
},
{
"epoch": 13.78,
"learning_rate": 1.1277040533139149e-05,
"loss": 0.1588,
"step": 372
},
{
"epoch": 13.85,
"learning_rate": 1.1200064962980692e-05,
"loss": 0.156,
"step": 374
},
{
"epoch": 13.93,
"learning_rate": 1.1123017178315692e-05,
"loss": 0.1455,
"step": 376
},
{
"epoch": 14.0,
"learning_rate": 1.104590181553294e-05,
"loss": 0.1627,
"step": 378
},
{
"epoch": 14.07,
"learning_rate": 1.0968723515087786e-05,
"loss": 0.1357,
"step": 380
},
{
"epoch": 14.15,
"learning_rate": 1.0891486921222877e-05,
"loss": 0.1405,
"step": 382
},
{
"epoch": 14.22,
"learning_rate": 1.0814196681688695e-05,
"loss": 0.1307,
"step": 384
},
{
"epoch": 14.3,
"learning_rate": 1.0736857447463887e-05,
"loss": 0.121,
"step": 386
},
{
"epoch": 14.37,
"learning_rate": 1.065947387247536e-05,
"loss": 0.1381,
"step": 388
},
{
"epoch": 14.44,
"learning_rate": 1.0582050613318276e-05,
"loss": 0.1364,
"step": 390
},
{
"epoch": 14.52,
"learning_rate": 1.0504592328975783e-05,
"loss": 0.139,
"step": 392
},
{
"epoch": 14.59,
"learning_rate": 1.0427103680538706e-05,
"loss": 0.137,
"step": 394
},
{
"epoch": 14.67,
"learning_rate": 1.0349589330925039e-05,
"loss": 0.1381,
"step": 396
},
{
"epoch": 14.74,
"learning_rate": 1.0272053944599358e-05,
"loss": 0.1438,
"step": 398
},
{
"epoch": 14.81,
"learning_rate": 1.0194502187292128e-05,
"loss": 0.135,
"step": 400
},
{
"epoch": 14.89,
"learning_rate": 1.0116938725718956e-05,
"loss": 0.1435,
"step": 402
},
{
"epoch": 14.96,
"learning_rate": 1.0039368227299753e-05,
"loss": 0.1518,
"step": 404
},
{
"epoch": 15.04,
"learning_rate": 9.961795359877873e-06,
"loss": 0.1295,
"step": 406
},
{
"epoch": 15.11,
"learning_rate": 9.884224791439229e-06,
"loss": 0.1267,
"step": 408
},
{
"epoch": 15.19,
"learning_rate": 9.806661189831397e-06,
"loss": 0.1278,
"step": 410
},
{
"epoch": 15.26,
"learning_rate": 9.729109222482708e-06,
"loss": 0.1344,
"step": 412
},
{
"epoch": 15.33,
"learning_rate": 9.651573556121408e-06,
"loss": 0.1359,
"step": 414
},
{
"epoch": 15.41,
"learning_rate": 9.574058856494819e-06,
"loss": 0.1313,
"step": 416
},
{
"epoch": 15.48,
"learning_rate": 9.496569788088572e-06,
"loss": 0.1369,
"step": 418
},
{
"epoch": 15.56,
"learning_rate": 9.419111013845939e-06,
"loss": 0.1224,
"step": 420
},
{
"epoch": 15.63,
"learning_rate": 9.341687194887216e-06,
"loss": 0.1433,
"step": 422
},
{
"epoch": 15.7,
"learning_rate": 9.264302990229253e-06,
"loss": 0.1263,
"step": 424
},
{
"epoch": 15.78,
"learning_rate": 9.186963056505082e-06,
"loss": 0.1449,
"step": 426
},
{
"epoch": 15.85,
"learning_rate": 9.109672047683723e-06,
"loss": 0.1351,
"step": 428
},
{
"epoch": 15.93,
"learning_rate": 9.032434614790101e-06,
"loss": 0.1391,
"step": 430
},
{
"epoch": 16.0,
"learning_rate": 8.955255405625195e-06,
"loss": 0.1392,
"step": 432
},
{
"epoch": 16.07,
"learning_rate": 8.878139064486332e-06,
"loss": 0.1294,
"step": 434
},
{
"epoch": 16.15,
"learning_rate": 8.801090231887733e-06,
"loss": 0.1151,
"step": 436
},
{
"epoch": 16.22,
"learning_rate": 8.724113544281248e-06,
"loss": 0.113,
"step": 438
},
{
"epoch": 16.3,
"learning_rate": 8.647213633777381e-06,
"loss": 0.1272,
"step": 440
},
{
"epoch": 16.37,
"learning_rate": 8.570395127866521e-06,
"loss": 0.1246,
"step": 442
},
{
"epoch": 16.44,
"learning_rate": 8.49366264914051e-06,
"loss": 0.1322,
"step": 444
},
{
"epoch": 16.52,
"learning_rate": 8.417020815014442e-06,
"loss": 0.1351,
"step": 446
},
{
"epoch": 16.59,
"learning_rate": 8.340474237448851e-06,
"loss": 0.1286,
"step": 448
},
{
"epoch": 16.67,
"learning_rate": 8.264027522672134e-06,
"loss": 0.1317,
"step": 450
},
{
"epoch": 16.74,
"learning_rate": 8.187685270903419e-06,
"loss": 0.1277,
"step": 452
},
{
"epoch": 16.81,
"learning_rate": 8.1114520760757e-06,
"loss": 0.1361,
"step": 454
},
{
"epoch": 16.89,
"learning_rate": 8.035332525559436e-06,
"loss": 0.1389,
"step": 456
},
{
"epoch": 16.96,
"learning_rate": 7.959331199886464e-06,
"loss": 0.1382,
"step": 458
},
{
"epoch": 17.04,
"learning_rate": 7.88345267247439e-06,
"loss": 0.1342,
"step": 460
},
{
"epoch": 17.11,
"learning_rate": 7.807701509351382e-06,
"loss": 0.1158,
"step": 462
},
{
"epoch": 17.19,
"learning_rate": 7.732082268881377e-06,
"loss": 0.1207,
"step": 464
},
{
"epoch": 17.26,
"learning_rate": 7.656599501489827e-06,
"loss": 0.116,
"step": 466
},
{
"epoch": 17.33,
"learning_rate": 7.581257749389826e-06,
"loss": 0.1275,
"step": 468
},
{
"epoch": 17.41,
"learning_rate": 7.506061546308818e-06,
"loss": 0.1182,
"step": 470
},
{
"epoch": 17.48,
"learning_rate": 7.431015417215751e-06,
"loss": 0.1328,
"step": 472
},
{
"epoch": 17.56,
"learning_rate": 7.356123878048806e-06,
"loss": 0.1206,
"step": 474
},
{
"epoch": 17.63,
"learning_rate": 7.2813914354436325e-06,
"loss": 0.125,
"step": 476
},
{
"epoch": 17.7,
"learning_rate": 7.206822586462168e-06,
"loss": 0.1229,
"step": 478
},
{
"epoch": 17.78,
"learning_rate": 7.132421818322015e-06,
"loss": 0.1301,
"step": 480
},
{
"epoch": 17.85,
"learning_rate": 7.058193608126439e-06,
"loss": 0.1367,
"step": 482
},
{
"epoch": 17.93,
"learning_rate": 6.984142422594935e-06,
"loss": 0.1368,
"step": 484
},
{
"epoch": 18.0,
"learning_rate": 6.910272717794462e-06,
"loss": 0.1279,
"step": 486
},
{
"epoch": 18.07,
"learning_rate": 6.836588938871271e-06,
"loss": 0.1116,
"step": 488
},
{
"epoch": 18.15,
"learning_rate": 6.763095519783447e-06,
"loss": 0.1124,
"step": 490
},
{
"epoch": 18.22,
"learning_rate": 6.689796883034059e-06,
"loss": 0.1192,
"step": 492
},
{
"epoch": 18.3,
"learning_rate": 6.616697439405073e-06,
"loss": 0.1209,
"step": 494
},
{
"epoch": 18.37,
"learning_rate": 6.54380158769189e-06,
"loss": 0.1154,
"step": 496
},
{
"epoch": 18.44,
"learning_rate": 6.471113714438679e-06,
"loss": 0.1261,
"step": 498
},
{
"epoch": 18.52,
"learning_rate": 6.398638193674396e-06,
"loss": 0.1325,
"step": 500
},
{
"epoch": 18.59,
"learning_rate": 6.326379386649579e-06,
"loss": 0.1204,
"step": 502
},
{
"epoch": 18.67,
"learning_rate": 6.2543416415739175e-06,
"loss": 0.1184,
"step": 504
},
{
"epoch": 18.74,
"learning_rate": 6.182529293354578e-06,
"loss": 0.1292,
"step": 506
},
{
"epoch": 18.81,
"learning_rate": 6.110946663335366e-06,
"loss": 0.129,
"step": 508
},
{
"epoch": 18.89,
"learning_rate": 6.0395980590366685e-06,
"loss": 0.1234,
"step": 510
},
{
"epoch": 18.96,
"learning_rate": 5.968487773896281e-06,
"loss": 0.1337,
"step": 512
},
{
"epoch": 19.04,
"learning_rate": 5.897620087010998e-06,
"loss": 0.1263,
"step": 514
},
{
"epoch": 19.11,
"learning_rate": 5.826999262879164e-06,
"loss": 0.1153,
"step": 516
},
{
"epoch": 19.19,
"learning_rate": 5.7566295511440195e-06,
"loss": 0.1218,
"step": 518
},
{
"epoch": 19.26,
"learning_rate": 5.68651518633801e-06,
"loss": 0.1215,
"step": 520
},
{
"epoch": 19.33,
"learning_rate": 5.616660387627925e-06,
"loss": 0.1173,
"step": 522
},
{
"epoch": 19.41,
"learning_rate": 5.547069358561064e-06,
"loss": 0.1194,
"step": 524
},
{
"epoch": 19.48,
"learning_rate": 5.47774628681224e-06,
"loss": 0.1169,
"step": 526
},
{
"epoch": 19.56,
"learning_rate": 5.4086953439318024e-06,
"loss": 0.1179,
"step": 528
},
{
"epoch": 19.63,
"learning_rate": 5.339920685094613e-06,
"loss": 0.1214,
"step": 530
},
{
"epoch": 19.7,
"learning_rate": 5.27142644885001e-06,
"loss": 0.1222,
"step": 532
},
{
"epoch": 19.78,
"learning_rate": 5.203216756872746e-06,
"loss": 0.1205,
"step": 534
},
{
"epoch": 19.85,
"learning_rate": 5.135295713714998e-06,
"loss": 0.1186,
"step": 536
},
{
"epoch": 19.93,
"learning_rate": 5.067667406559349e-06,
"loss": 0.1257,
"step": 538
},
{
"epoch": 20.0,
"learning_rate": 5.000335904972846e-06,
"loss": 0.1201,
"step": 540
},
{
"epoch": 20.07,
"learning_rate": 4.933305260662111e-06,
"loss": 0.117,
"step": 542
},
{
"epoch": 20.15,
"learning_rate": 4.8665795072295455e-06,
"loss": 0.1169,
"step": 544
},
{
"epoch": 20.22,
"learning_rate": 4.800162659930563e-06,
"loss": 0.1121,
"step": 546
},
{
"epoch": 20.3,
"learning_rate": 4.734058715432019e-06,
"loss": 0.1111,
"step": 548
},
{
"epoch": 20.37,
"learning_rate": 4.668271651571691e-06,
"loss": 0.1177,
"step": 550
},
{
"epoch": 20.44,
"learning_rate": 4.602805427118878e-06,
"loss": 0.1091,
"step": 552
},
{
"epoch": 20.52,
"learning_rate": 4.537663981536239e-06,
"loss": 0.1134,
"step": 554
},
{
"epoch": 20.59,
"learning_rate": 4.4728512347426835e-06,
"loss": 0.1226,
"step": 556
},
{
"epoch": 20.67,
"learning_rate": 4.408371086877516e-06,
"loss": 0.1233,
"step": 558
},
{
"epoch": 20.74,
"learning_rate": 4.344227418065728e-06,
"loss": 0.121,
"step": 560
},
{
"epoch": 20.81,
"learning_rate": 4.280424088184534e-06,
"loss": 0.114,
"step": 562
},
{
"epoch": 20.89,
"learning_rate": 4.216964936631054e-06,
"loss": 0.1245,
"step": 564
},
{
"epoch": 20.96,
"learning_rate": 4.153853782091335e-06,
"loss": 0.1194,
"step": 566
},
{
"epoch": 21.04,
"learning_rate": 4.0910944223105155e-06,
"loss": 0.1161,
"step": 568
},
{
"epoch": 21.11,
"learning_rate": 4.028690633864312e-06,
"loss": 0.1107,
"step": 570
},
{
"epoch": 21.19,
"learning_rate": 3.966646171931754e-06,
"loss": 0.1113,
"step": 572
},
{
"epoch": 21.26,
"learning_rate": 3.904964770069233e-06,
"loss": 0.114,
"step": 574
},
{
"epoch": 21.33,
"learning_rate": 3.843650139985807e-06,
"loss": 0.1113,
"step": 576
},
{
"epoch": 21.41,
"learning_rate": 3.7827059713198646e-06,
"loss": 0.1151,
"step": 578
},
{
"epoch": 21.48,
"learning_rate": 3.7221359314170933e-06,
"loss": 0.1147,
"step": 580
},
{
"epoch": 21.56,
"learning_rate": 3.661943665109796e-06,
"loss": 0.1212,
"step": 582
},
{
"epoch": 21.63,
"learning_rate": 3.602132794497558e-06,
"loss": 0.1211,
"step": 584
},
{
"epoch": 21.7,
"learning_rate": 3.5427069187293007e-06,
"loss": 0.1139,
"step": 586
},
{
"epoch": 21.78,
"learning_rate": 3.4836696137866755e-06,
"loss": 0.1138,
"step": 588
},
{
"epoch": 21.85,
"learning_rate": 3.425024432268894e-06,
"loss": 0.1219,
"step": 590
},
{
"epoch": 21.93,
"learning_rate": 3.3667749031789554e-06,
"loss": 0.1186,
"step": 592
},
{
"epoch": 22.0,
"learning_rate": 3.308924531711265e-06,
"loss": 0.1188,
"step": 594
},
{
"epoch": 22.07,
"learning_rate": 3.251476799040725e-06,
"loss": 0.1145,
"step": 596
},
{
"epoch": 22.15,
"learning_rate": 3.194435162113245e-06,
"loss": 0.1152,
"step": 598
},
{
"epoch": 22.22,
"learning_rate": 3.137803053437725e-06,
"loss": 0.1144,
"step": 600
},
{
"epoch": 22.3,
"learning_rate": 3.0815838808794907e-06,
"loss": 0.1073,
"step": 602
},
{
"epoch": 22.37,
"learning_rate": 3.0257810274552444e-06,
"loss": 0.1045,
"step": 604
},
{
"epoch": 22.44,
"learning_rate": 2.970397851129467e-06,
"loss": 0.1181,
"step": 606
},
{
"epoch": 22.52,
"learning_rate": 2.9154376846123633e-06,
"loss": 0.1146,
"step": 608
},
{
"epoch": 22.59,
"learning_rate": 2.86090383515931e-06,
"loss": 0.1165,
"step": 610
},
{
"epoch": 22.67,
"learning_rate": 2.8067995843718544e-06,
"loss": 0.1164,
"step": 612
},
{
"epoch": 22.74,
"learning_rate": 2.753128188000208e-06,
"loss": 0.1131,
"step": 614
},
{
"epoch": 22.81,
"learning_rate": 2.6998928757473764e-06,
"loss": 0.1138,
"step": 616
},
{
"epoch": 22.89,
"learning_rate": 2.64709685107477e-06,
"loss": 0.1168,
"step": 618
},
{
"epoch": 22.96,
"learning_rate": 2.594743291009456e-06,
"loss": 0.1178,
"step": 620
},
{
"epoch": 23.04,
"learning_rate": 2.54283534595297e-06,
"loss": 0.1173,
"step": 622
},
{
"epoch": 23.11,
"learning_rate": 2.491376139491756e-06,
"loss": 0.1119,
"step": 624
},
{
"epoch": 23.19,
"learning_rate": 2.440368768209167e-06,
"loss": 0.1075,
"step": 626
},
{
"epoch": 23.26,
"learning_rate": 2.3898163014991694e-06,
"loss": 0.1128,
"step": 628
},
{
"epoch": 23.33,
"learning_rate": 2.3397217813816107e-06,
"loss": 0.1125,
"step": 630
},
{
"epoch": 23.41,
"learning_rate": 2.2900882223191766e-06,
"loss": 0.118,
"step": 632
},
{
"epoch": 23.48,
"learning_rate": 2.240918611035988e-06,
"loss": 0.107,
"step": 634
},
{
"epoch": 23.56,
"learning_rate": 2.1922159063378876e-06,
"loss": 0.1201,
"step": 636
},
{
"epoch": 23.63,
"learning_rate": 2.1439830389343695e-06,
"loss": 0.1083,
"step": 638
},
{
"epoch": 23.7,
"learning_rate": 2.0962229112622403e-06,
"loss": 0.1152,
"step": 640
},
{
"epoch": 23.78,
"learning_rate": 2.0489383973109643e-06,
"loss": 0.113,
"step": 642
},
{
"epoch": 23.85,
"learning_rate": 2.0021323424496962e-06,
"loss": 0.1149,
"step": 644
},
{
"epoch": 23.93,
"learning_rate": 1.955807563256097e-06,
"loss": 0.111,
"step": 646
},
{
"epoch": 24.0,
"learning_rate": 1.9099668473468113e-06,
"loss": 0.1164,
"step": 648
},
{
"epoch": 24.07,
"learning_rate": 1.8646129532097391e-06,
"loss": 0.1129,
"step": 650
},
{
"epoch": 24.15,
"learning_rate": 1.8197486100380357e-06,
"loss": 0.1089,
"step": 652
},
{
"epoch": 24.22,
"learning_rate": 1.775376517565891e-06,
"loss": 0.116,
"step": 654
},
{
"epoch": 24.3,
"learning_rate": 1.7314993459060492e-06,
"loss": 0.1121,
"step": 656
},
{
"epoch": 24.37,
"learning_rate": 1.6881197353891643e-06,
"loss": 0.1118,
"step": 658
},
{
"epoch": 24.44,
"learning_rate": 1.645240296404892e-06,
"loss": 0.11,
"step": 660
},
{
"epoch": 24.52,
"learning_rate": 1.6028636092448168e-06,
"loss": 0.1134,
"step": 662
},
{
"epoch": 24.59,
"learning_rate": 1.5609922239471842e-06,
"loss": 0.1079,
"step": 664
},
{
"epoch": 24.67,
"learning_rate": 1.5196286601434508e-06,
"loss": 0.1128,
"step": 666
},
{
"epoch": 24.74,
"learning_rate": 1.4787754069066574e-06,
"loss": 0.1102,
"step": 668
},
{
"epoch": 24.81,
"learning_rate": 1.4384349226016559e-06,
"loss": 0.1159,
"step": 670
},
{
"epoch": 24.89,
"learning_rate": 1.3986096347371692e-06,
"loss": 0.1145,
"step": 672
},
{
"epoch": 24.96,
"learning_rate": 1.3593019398197216e-06,
"loss": 0.1113,
"step": 674
},
{
"epoch": 25.04,
"learning_rate": 1.3205142032094188e-06,
"loss": 0.1113,
"step": 676
},
{
"epoch": 25.11,
"learning_rate": 1.2822487589776234e-06,
"loss": 0.1088,
"step": 678
},
{
"epoch": 25.19,
"learning_rate": 1.2445079097664914e-06,
"loss": 0.1127,
"step": 680
},
{
"epoch": 25.26,
"learning_rate": 1.2072939266504068e-06,
"loss": 0.1066,
"step": 682
},
{
"epoch": 25.33,
"learning_rate": 1.1706090489993348e-06,
"loss": 0.1193,
"step": 684
},
{
"epoch": 25.41,
"learning_rate": 1.134455484344047e-06,
"loss": 0.108,
"step": 686
},
{
"epoch": 25.48,
"learning_rate": 1.098835408243295e-06,
"loss": 0.1083,
"step": 688
},
{
"epoch": 25.56,
"learning_rate": 1.0637509641528876e-06,
"loss": 0.112,
"step": 690
},
{
"epoch": 25.63,
"learning_rate": 1.0292042632967158e-06,
"loss": 0.1157,
"step": 692
},
{
"epoch": 25.7,
"learning_rate": 9.95197384539698e-07,
"loss": 0.1128,
"step": 694
},
{
"epoch": 25.78,
"learning_rate": 9.61732374262696e-07,
"loss": 0.1123,
"step": 696
},
{
"epoch": 25.85,
"learning_rate": 9.288112462393605e-07,
"loss": 0.1111,
"step": 698
},
{
"epoch": 25.93,
"learning_rate": 8.964359815149593e-07,
"loss": 0.1194,
"step": 700
},
{
"epoch": 26.0,
"learning_rate": 8.646085282871597e-07,
"loss": 0.1103,
"step": 702
},
{
"epoch": 26.07,
"learning_rate": 8.33330801788812e-07,
"loss": 0.113,
"step": 704
},
{
"epoch": 26.15,
"learning_rate": 8.026046841726687e-07,
"loss": 0.1165,
"step": 706
},
{
"epoch": 26.22,
"learning_rate": 7.724320243981587e-07,
"loss": 0.1231,
"step": 708
},
{
"epoch": 26.3,
"learning_rate": 7.42814638120104e-07,
"loss": 0.1117,
"step": 710
},
{
"epoch": 26.37,
"learning_rate": 7.137543075794651e-07,
"loss": 0.1102,
"step": 712
},
{
"epoch": 26.44,
"learning_rate": 6.852527814960986e-07,
"loss": 0.1106,
"step": 714
},
{
"epoch": 26.52,
"learning_rate": 6.573117749635294e-07,
"loss": 0.1136,
"step": 716
},
{
"epoch": 26.59,
"learning_rate": 6.299329693457279e-07,
"loss": 0.1118,
"step": 718
},
{
"epoch": 26.67,
"learning_rate": 6.031180121759572e-07,
"loss": 0.1072,
"step": 720
},
{
"epoch": 26.74,
"learning_rate": 5.768685170576105e-07,
"loss": 0.1102,
"step": 722
},
{
"epoch": 26.81,
"learning_rate": 5.511860635671229e-07,
"loss": 0.1089,
"step": 724
},
{
"epoch": 26.89,
"learning_rate": 5.260721971589211e-07,
"loss": 0.1124,
"step": 726
},
{
"epoch": 26.96,
"learning_rate": 5.015284290724165e-07,
"loss": 0.1043,
"step": 728
},
{
"epoch": 27.04,
"learning_rate": 4.775562362410724e-07,
"loss": 0.1137,
"step": 730
},
{
"epoch": 27.11,
"learning_rate": 4.5415706120352287e-07,
"loss": 0.1112,
"step": 732
},
{
"epoch": 27.19,
"learning_rate": 4.3133231201677894e-07,
"loss": 0.1094,
"step": 734
},
{
"epoch": 27.26,
"learning_rate": 4.0908336217147957e-07,
"loss": 0.1145,
"step": 736
},
{
"epoch": 27.33,
"learning_rate": 3.8741155050926237e-07,
"loss": 0.1128,
"step": 738
},
{
"epoch": 27.41,
"learning_rate": 3.6631818114218007e-07,
"loss": 0.1109,
"step": 740
},
{
"epoch": 27.48,
"learning_rate": 3.4580452337423465e-07,
"loss": 0.1109,
"step": 742
},
{
"epoch": 27.56,
"learning_rate": 3.258718116249937e-07,
"loss": 0.1139,
"step": 744
},
{
"epoch": 27.63,
"learning_rate": 3.0652124535531125e-07,
"loss": 0.1066,
"step": 746
},
{
"epoch": 27.7,
"learning_rate": 2.8775398899514304e-07,
"loss": 0.1104,
"step": 748
},
{
"epoch": 27.78,
"learning_rate": 2.695711718734839e-07,
"loss": 0.1088,
"step": 750
},
{
"epoch": 27.85,
"learning_rate": 2.5197388815040634e-07,
"loss": 0.1112,
"step": 752
},
{
"epoch": 27.93,
"learning_rate": 2.3496319675121892e-07,
"loss": 0.1123,
"step": 754
},
{
"epoch": 28.0,
"learning_rate": 2.185401213027405e-07,
"loss": 0.1143,
"step": 756
},
{
"epoch": 28.07,
"learning_rate": 2.0270565007171726e-07,
"loss": 0.1103,
"step": 758
},
{
"epoch": 28.15,
"learning_rate": 1.8746073590533487e-07,
"loss": 0.1096,
"step": 760
},
{
"epoch": 28.22,
"learning_rate": 1.7280629617389187e-07,
"loss": 0.1085,
"step": 762
},
{
"epoch": 28.3,
"learning_rate": 1.5874321271559634e-07,
"loss": 0.1052,
"step": 764
},
{
"epoch": 28.37,
"learning_rate": 1.4527233178349255e-07,
"loss": 0.1165,
"step": 766
},
{
"epoch": 28.44,
"learning_rate": 1.323944639945507e-07,
"loss": 0.1179,
"step": 768
},
{
"epoch": 28.52,
"learning_rate": 1.2011038428087263e-07,
"loss": 0.1173,
"step": 770
},
{
"epoch": 28.59,
"learning_rate": 1.0842083184307239e-07,
"loss": 0.1095,
"step": 772
},
{
"epoch": 28.67,
"learning_rate": 9.732651010578408e-08,
"loss": 0.1135,
"step": 774
},
{
"epoch": 28.74,
"learning_rate": 8.682808667534348e-08,
"loss": 0.1166,
"step": 776
},
{
"epoch": 28.81,
"learning_rate": 7.692619329960571e-08,
"loss": 0.1044,
"step": 778
},
{
"epoch": 28.89,
"learning_rate": 6.762142582993458e-08,
"loss": 0.1179,
"step": 780
},
{
"epoch": 28.96,
"learning_rate": 5.8914344185344584e-08,
"loss": 0.1084,
"step": 782
},
{
"epoch": 29.04,
"learning_rate": 5.0805472318806726e-08,
"loss": 0.1074,
"step": 784
},
{
"epoch": 29.11,
"learning_rate": 4.329529818572265e-08,
"loss": 0.1149,
"step": 786
},
{
"epoch": 29.19,
"learning_rate": 3.6384273714560324e-08,
"loss": 0.1215,
"step": 788
},
{
"epoch": 29.26,
"learning_rate": 3.0072814779656956e-08,
"loss": 0.1111,
"step": 790
},
{
"epoch": 29.33,
"learning_rate": 2.4361301176195617e-08,
"loss": 0.1036,
"step": 792
},
{
"epoch": 29.41,
"learning_rate": 1.9250076597351343e-08,
"loss": 0.1069,
"step": 794
},
{
"epoch": 29.48,
"learning_rate": 1.4739448613607655e-08,
"loss": 0.1097,
"step": 796
},
{
"epoch": 29.56,
"learning_rate": 1.082968865424805e-08,
"loss": 0.1123,
"step": 798
},
{
"epoch": 29.63,
"learning_rate": 7.521031991024608e-09,
"loss": 0.1128,
"step": 800
}
],
"logging_steps": 2,
"max_steps": 810,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"total_flos": 1.2489159543555686e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}