wav2vec2-5Class-Validation-Mobil / trainer_state.json
anderloh's picture
End of training
8e6d934 verified
{
"best_metric": 0.5836298932384342,
"best_model_checkpoint": "wav2vec2-5Class-Validation-Mobil/checkpoint-773",
"epoch": 276.9230769230769,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.92,
"eval_accuracy": 0.3202846975088968,
"eval_loss": 1.602386713027954,
"eval_runtime": 4.3468,
"eval_samples_per_second": 64.645,
"eval_steps_per_second": 0.69,
"step": 3
},
{
"epoch": 1.85,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.6022303104400635,
"eval_runtime": 3.573,
"eval_samples_per_second": 78.645,
"eval_steps_per_second": 0.84,
"step": 6
},
{
"epoch": 2.77,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.601974368095398,
"eval_runtime": 4.6151,
"eval_samples_per_second": 60.887,
"eval_steps_per_second": 0.65,
"step": 9
},
{
"epoch": 4.0,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.6014597415924072,
"eval_runtime": 5.3659,
"eval_samples_per_second": 52.368,
"eval_steps_per_second": 0.559,
"step": 13
},
{
"epoch": 4.92,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.6009386777877808,
"eval_runtime": 3.4504,
"eval_samples_per_second": 81.439,
"eval_steps_per_second": 0.869,
"step": 16
},
{
"epoch": 5.85,
"eval_accuracy": 0.31316725978647686,
"eval_loss": 1.6003268957138062,
"eval_runtime": 4.2937,
"eval_samples_per_second": 65.445,
"eval_steps_per_second": 0.699,
"step": 19
},
{
"epoch": 6.77,
"eval_accuracy": 0.30604982206405695,
"eval_loss": 1.5995941162109375,
"eval_runtime": 3.7057,
"eval_samples_per_second": 75.828,
"eval_steps_per_second": 0.81,
"step": 22
},
{
"epoch": 8.0,
"eval_accuracy": 0.298932384341637,
"eval_loss": 1.5984183549880981,
"eval_runtime": 4.6458,
"eval_samples_per_second": 60.484,
"eval_steps_per_second": 0.646,
"step": 26
},
{
"epoch": 8.92,
"eval_accuracy": 0.2918149466192171,
"eval_loss": 1.5974235534667969,
"eval_runtime": 5.0303,
"eval_samples_per_second": 55.861,
"eval_steps_per_second": 0.596,
"step": 29
},
{
"epoch": 9.85,
"eval_accuracy": 0.27402135231316727,
"eval_loss": 1.596360445022583,
"eval_runtime": 3.3268,
"eval_samples_per_second": 84.465,
"eval_steps_per_second": 0.902,
"step": 32
},
{
"epoch": 10.77,
"eval_accuracy": 0.2597864768683274,
"eval_loss": 1.5951836109161377,
"eval_runtime": 3.1882,
"eval_samples_per_second": 88.138,
"eval_steps_per_second": 0.941,
"step": 35
},
{
"epoch": 12.0,
"eval_accuracy": 0.26334519572953735,
"eval_loss": 1.593432903289795,
"eval_runtime": 4.2078,
"eval_samples_per_second": 66.78,
"eval_steps_per_second": 0.713,
"step": 39
},
{
"epoch": 12.92,
"eval_accuracy": 0.27402135231316727,
"eval_loss": 1.5920255184173584,
"eval_runtime": 4.9074,
"eval_samples_per_second": 57.261,
"eval_steps_per_second": 0.611,
"step": 42
},
{
"epoch": 13.85,
"eval_accuracy": 0.298932384341637,
"eval_loss": 1.5904992818832397,
"eval_runtime": 5.4737,
"eval_samples_per_second": 51.336,
"eval_steps_per_second": 0.548,
"step": 45
},
{
"epoch": 14.77,
"eval_accuracy": 0.298932384341637,
"eval_loss": 1.5889027118682861,
"eval_runtime": 5.4844,
"eval_samples_per_second": 51.236,
"eval_steps_per_second": 0.547,
"step": 48
},
{
"epoch": 16.0,
"eval_accuracy": 0.2846975088967972,
"eval_loss": 1.5867795944213867,
"eval_runtime": 4.8027,
"eval_samples_per_second": 58.508,
"eval_steps_per_second": 0.625,
"step": 52
},
{
"epoch": 16.92,
"eval_accuracy": 0.2846975088967972,
"eval_loss": 1.5850844383239746,
"eval_runtime": 4.5938,
"eval_samples_per_second": 61.169,
"eval_steps_per_second": 0.653,
"step": 55
},
{
"epoch": 17.85,
"eval_accuracy": 0.2846975088967972,
"eval_loss": 1.5833449363708496,
"eval_runtime": 3.4722,
"eval_samples_per_second": 80.929,
"eval_steps_per_second": 0.864,
"step": 58
},
{
"epoch": 18.77,
"eval_accuracy": 0.26334519572953735,
"eval_loss": 1.58156418800354,
"eval_runtime": 3.9515,
"eval_samples_per_second": 71.112,
"eval_steps_per_second": 0.759,
"step": 61
},
{
"epoch": 20.0,
"eval_accuracy": 0.24555160142348753,
"eval_loss": 1.579047441482544,
"eval_runtime": 4.2125,
"eval_samples_per_second": 66.707,
"eval_steps_per_second": 0.712,
"step": 65
},
{
"epoch": 20.92,
"eval_accuracy": 0.24199288256227758,
"eval_loss": 1.576985478401184,
"eval_runtime": 4.6275,
"eval_samples_per_second": 60.724,
"eval_steps_per_second": 0.648,
"step": 68
},
{
"epoch": 21.85,
"eval_accuracy": 0.23487544483985764,
"eval_loss": 1.574812650680542,
"eval_runtime": 4.9061,
"eval_samples_per_second": 57.275,
"eval_steps_per_second": 0.611,
"step": 71
},
{
"epoch": 22.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5727591514587402,
"eval_runtime": 5.6003,
"eval_samples_per_second": 50.176,
"eval_steps_per_second": 0.536,
"step": 74
},
{
"epoch": 24.0,
"eval_accuracy": 0.2277580071174377,
"eval_loss": 1.5699430704116821,
"eval_runtime": 4.5057,
"eval_samples_per_second": 62.365,
"eval_steps_per_second": 0.666,
"step": 78
},
{
"epoch": 24.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.567823052406311,
"eval_runtime": 4.5731,
"eval_samples_per_second": 61.446,
"eval_steps_per_second": 0.656,
"step": 81
},
{
"epoch": 25.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5657496452331543,
"eval_runtime": 4.3556,
"eval_samples_per_second": 64.515,
"eval_steps_per_second": 0.689,
"step": 84
},
{
"epoch": 26.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5637929439544678,
"eval_runtime": 5.9441,
"eval_samples_per_second": 47.274,
"eval_steps_per_second": 0.505,
"step": 87
},
{
"epoch": 28.0,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5613017082214355,
"eval_runtime": 4.5762,
"eval_samples_per_second": 61.404,
"eval_steps_per_second": 0.656,
"step": 91
},
{
"epoch": 28.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5597190856933594,
"eval_runtime": 4.1813,
"eval_samples_per_second": 67.204,
"eval_steps_per_second": 0.717,
"step": 94
},
{
"epoch": 29.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5587605237960815,
"eval_runtime": 4.6749,
"eval_samples_per_second": 60.108,
"eval_steps_per_second": 0.642,
"step": 97
},
{
"epoch": 30.77,
"grad_norm": 66708.1953125,
"learning_rate": 2.962962962962963e-05,
"loss": 1.561,
"step": 100
},
{
"epoch": 30.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5586402416229248,
"eval_runtime": 5.2059,
"eval_samples_per_second": 53.977,
"eval_steps_per_second": 0.576,
"step": 100
},
{
"epoch": 32.0,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5596789121627808,
"eval_runtime": 4.428,
"eval_samples_per_second": 63.46,
"eval_steps_per_second": 0.678,
"step": 104
},
{
"epoch": 32.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5619100332260132,
"eval_runtime": 3.3009,
"eval_samples_per_second": 85.128,
"eval_steps_per_second": 0.909,
"step": 107
},
{
"epoch": 33.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5660569667816162,
"eval_runtime": 3.371,
"eval_samples_per_second": 83.357,
"eval_steps_per_second": 0.89,
"step": 110
},
{
"epoch": 34.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5720349550247192,
"eval_runtime": 3.9013,
"eval_samples_per_second": 72.028,
"eval_steps_per_second": 0.769,
"step": 113
},
{
"epoch": 36.0,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5833308696746826,
"eval_runtime": 4.7161,
"eval_samples_per_second": 59.583,
"eval_steps_per_second": 0.636,
"step": 117
},
{
"epoch": 36.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.5957212448120117,
"eval_runtime": 4.1977,
"eval_samples_per_second": 66.942,
"eval_steps_per_second": 0.715,
"step": 120
},
{
"epoch": 37.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.6119521856307983,
"eval_runtime": 3.034,
"eval_samples_per_second": 92.618,
"eval_steps_per_second": 0.989,
"step": 123
},
{
"epoch": 38.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.631814956665039,
"eval_runtime": 3.0252,
"eval_samples_per_second": 92.887,
"eval_steps_per_second": 0.992,
"step": 126
},
{
"epoch": 40.0,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.663757085800171,
"eval_runtime": 3.243,
"eval_samples_per_second": 86.648,
"eval_steps_per_second": 0.925,
"step": 130
},
{
"epoch": 40.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.6904593706130981,
"eval_runtime": 3.1943,
"eval_samples_per_second": 87.97,
"eval_steps_per_second": 0.939,
"step": 133
},
{
"epoch": 41.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.7196571826934814,
"eval_runtime": 3.4764,
"eval_samples_per_second": 80.832,
"eval_steps_per_second": 0.863,
"step": 136
},
{
"epoch": 42.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.750288724899292,
"eval_runtime": 3.415,
"eval_samples_per_second": 82.283,
"eval_steps_per_second": 0.878,
"step": 139
},
{
"epoch": 44.0,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.7802847623825073,
"eval_runtime": 3.0779,
"eval_samples_per_second": 91.295,
"eval_steps_per_second": 0.975,
"step": 143
},
{
"epoch": 44.92,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.7917312383651733,
"eval_runtime": 3.6229,
"eval_samples_per_second": 77.562,
"eval_steps_per_second": 0.828,
"step": 146
},
{
"epoch": 45.85,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.7919948101043701,
"eval_runtime": 3.2733,
"eval_samples_per_second": 85.845,
"eval_steps_per_second": 0.916,
"step": 149
},
{
"epoch": 46.77,
"eval_accuracy": 0.2313167259786477,
"eval_loss": 1.7869282960891724,
"eval_runtime": 3.1081,
"eval_samples_per_second": 90.408,
"eval_steps_per_second": 0.965,
"step": 152
},
{
"epoch": 48.0,
"eval_accuracy": 0.2597864768683274,
"eval_loss": 1.7699986696243286,
"eval_runtime": 3.2526,
"eval_samples_per_second": 86.392,
"eval_steps_per_second": 0.922,
"step": 156
},
{
"epoch": 48.92,
"eval_accuracy": 0.27402135231316727,
"eval_loss": 1.7525370121002197,
"eval_runtime": 2.789,
"eval_samples_per_second": 100.754,
"eval_steps_per_second": 1.076,
"step": 159
},
{
"epoch": 49.85,
"eval_accuracy": 0.2775800711743772,
"eval_loss": 1.7406829595565796,
"eval_runtime": 3.5203,
"eval_samples_per_second": 79.822,
"eval_steps_per_second": 0.852,
"step": 162
},
{
"epoch": 50.77,
"eval_accuracy": 0.2918149466192171,
"eval_loss": 1.7306878566741943,
"eval_runtime": 3.4092,
"eval_samples_per_second": 82.424,
"eval_steps_per_second": 0.88,
"step": 165
},
{
"epoch": 52.0,
"eval_accuracy": 0.3096085409252669,
"eval_loss": 1.7241473197937012,
"eval_runtime": 3.4771,
"eval_samples_per_second": 80.815,
"eval_steps_per_second": 0.863,
"step": 169
},
{
"epoch": 52.92,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.7242671251296997,
"eval_runtime": 3.338,
"eval_samples_per_second": 84.182,
"eval_steps_per_second": 0.899,
"step": 172
},
{
"epoch": 53.85,
"eval_accuracy": 0.3167259786476868,
"eval_loss": 1.7253814935684204,
"eval_runtime": 3.037,
"eval_samples_per_second": 92.524,
"eval_steps_per_second": 0.988,
"step": 175
},
{
"epoch": 54.77,
"eval_accuracy": 0.3238434163701068,
"eval_loss": 1.7232733964920044,
"eval_runtime": 3.3453,
"eval_samples_per_second": 84.0,
"eval_steps_per_second": 0.897,
"step": 178
},
{
"epoch": 56.0,
"eval_accuracy": 0.3238434163701068,
"eval_loss": 1.7224737405776978,
"eval_runtime": 4.1856,
"eval_samples_per_second": 67.135,
"eval_steps_per_second": 0.717,
"step": 182
},
{
"epoch": 56.92,
"eval_accuracy": 0.3274021352313167,
"eval_loss": 1.7187089920043945,
"eval_runtime": 4.0825,
"eval_samples_per_second": 68.831,
"eval_steps_per_second": 0.735,
"step": 185
},
{
"epoch": 57.85,
"eval_accuracy": 0.3274021352313167,
"eval_loss": 1.7172435522079468,
"eval_runtime": 4.3988,
"eval_samples_per_second": 63.881,
"eval_steps_per_second": 0.682,
"step": 188
},
{
"epoch": 58.77,
"eval_accuracy": 0.33451957295373663,
"eval_loss": 1.7145518064498901,
"eval_runtime": 3.5886,
"eval_samples_per_second": 78.303,
"eval_steps_per_second": 0.836,
"step": 191
},
{
"epoch": 60.0,
"eval_accuracy": 0.3487544483985765,
"eval_loss": 1.711957573890686,
"eval_runtime": 3.0988,
"eval_samples_per_second": 90.681,
"eval_steps_per_second": 0.968,
"step": 195
},
{
"epoch": 60.92,
"eval_accuracy": 0.35587188612099646,
"eval_loss": 1.7048858404159546,
"eval_runtime": 3.3244,
"eval_samples_per_second": 84.526,
"eval_steps_per_second": 0.902,
"step": 198
},
{
"epoch": 61.54,
"grad_norm": 26972.24609375,
"learning_rate": 2.5925925925925925e-05,
"loss": 1.3094,
"step": 200
},
{
"epoch": 61.85,
"eval_accuracy": 0.3594306049822064,
"eval_loss": 1.702221155166626,
"eval_runtime": 2.9103,
"eval_samples_per_second": 96.553,
"eval_steps_per_second": 1.031,
"step": 201
},
{
"epoch": 62.77,
"eval_accuracy": 0.3736654804270463,
"eval_loss": 1.6912201642990112,
"eval_runtime": 3.4935,
"eval_samples_per_second": 80.435,
"eval_steps_per_second": 0.859,
"step": 204
},
{
"epoch": 64.0,
"eval_accuracy": 0.37722419928825623,
"eval_loss": 1.6797984838485718,
"eval_runtime": 3.0757,
"eval_samples_per_second": 91.361,
"eval_steps_per_second": 0.975,
"step": 208
},
{
"epoch": 64.92,
"eval_accuracy": 0.3807829181494662,
"eval_loss": 1.6687328815460205,
"eval_runtime": 3.281,
"eval_samples_per_second": 85.645,
"eval_steps_per_second": 0.914,
"step": 211
},
{
"epoch": 65.85,
"eval_accuracy": 0.38434163701067614,
"eval_loss": 1.6568727493286133,
"eval_runtime": 3.0158,
"eval_samples_per_second": 93.174,
"eval_steps_per_second": 0.995,
"step": 214
},
{
"epoch": 66.77,
"eval_accuracy": 0.3914590747330961,
"eval_loss": 1.642698049545288,
"eval_runtime": 2.9377,
"eval_samples_per_second": 95.654,
"eval_steps_per_second": 1.021,
"step": 217
},
{
"epoch": 68.0,
"eval_accuracy": 0.3914590747330961,
"eval_loss": 1.6301021575927734,
"eval_runtime": 2.9188,
"eval_samples_per_second": 96.272,
"eval_steps_per_second": 1.028,
"step": 221
},
{
"epoch": 68.92,
"eval_accuracy": 0.39501779359430605,
"eval_loss": 1.6217372417449951,
"eval_runtime": 3.1297,
"eval_samples_per_second": 89.784,
"eval_steps_per_second": 0.959,
"step": 224
},
{
"epoch": 69.85,
"eval_accuracy": 0.39501779359430605,
"eval_loss": 1.6203086376190186,
"eval_runtime": 3.3261,
"eval_samples_per_second": 84.482,
"eval_steps_per_second": 0.902,
"step": 227
},
{
"epoch": 70.77,
"eval_accuracy": 0.39501779359430605,
"eval_loss": 1.6257439851760864,
"eval_runtime": 3.1941,
"eval_samples_per_second": 87.974,
"eval_steps_per_second": 0.939,
"step": 230
},
{
"epoch": 72.0,
"eval_accuracy": 0.40213523131672596,
"eval_loss": 1.6192444562911987,
"eval_runtime": 2.8716,
"eval_samples_per_second": 97.855,
"eval_steps_per_second": 1.045,
"step": 234
},
{
"epoch": 72.92,
"eval_accuracy": 0.4092526690391459,
"eval_loss": 1.6044347286224365,
"eval_runtime": 3.3231,
"eval_samples_per_second": 84.559,
"eval_steps_per_second": 0.903,
"step": 237
},
{
"epoch": 73.85,
"eval_accuracy": 0.4306049822064057,
"eval_loss": 1.5868154764175415,
"eval_runtime": 3.0078,
"eval_samples_per_second": 93.422,
"eval_steps_per_second": 0.997,
"step": 240
},
{
"epoch": 74.77,
"eval_accuracy": 0.4377224199288256,
"eval_loss": 1.5786783695220947,
"eval_runtime": 3.1108,
"eval_samples_per_second": 90.332,
"eval_steps_per_second": 0.964,
"step": 243
},
{
"epoch": 76.0,
"eval_accuracy": 0.43416370106761565,
"eval_loss": 1.5762073993682861,
"eval_runtime": 4.8033,
"eval_samples_per_second": 58.501,
"eval_steps_per_second": 0.625,
"step": 247
},
{
"epoch": 76.92,
"eval_accuracy": 0.4377224199288256,
"eval_loss": 1.5717052221298218,
"eval_runtime": 4.9388,
"eval_samples_per_second": 56.896,
"eval_steps_per_second": 0.607,
"step": 250
},
{
"epoch": 77.85,
"eval_accuracy": 0.43416370106761565,
"eval_loss": 1.5673516988754272,
"eval_runtime": 3.5439,
"eval_samples_per_second": 79.29,
"eval_steps_per_second": 0.847,
"step": 253
},
{
"epoch": 78.77,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.5683715343475342,
"eval_runtime": 2.9479,
"eval_samples_per_second": 95.323,
"eval_steps_per_second": 1.018,
"step": 256
},
{
"epoch": 80.0,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.5619009733200073,
"eval_runtime": 3.2494,
"eval_samples_per_second": 86.478,
"eval_steps_per_second": 0.923,
"step": 260
},
{
"epoch": 80.92,
"eval_accuracy": 0.4306049822064057,
"eval_loss": 1.5554527044296265,
"eval_runtime": 3.0649,
"eval_samples_per_second": 91.683,
"eval_steps_per_second": 0.979,
"step": 263
},
{
"epoch": 81.85,
"eval_accuracy": 0.43416370106761565,
"eval_loss": 1.550489068031311,
"eval_runtime": 3.1587,
"eval_samples_per_second": 88.96,
"eval_steps_per_second": 0.95,
"step": 266
},
{
"epoch": 82.77,
"eval_accuracy": 0.4412811387900356,
"eval_loss": 1.5385645627975464,
"eval_runtime": 3.1715,
"eval_samples_per_second": 88.601,
"eval_steps_per_second": 0.946,
"step": 269
},
{
"epoch": 84.0,
"eval_accuracy": 0.4377224199288256,
"eval_loss": 1.536201000213623,
"eval_runtime": 3.2602,
"eval_samples_per_second": 86.191,
"eval_steps_per_second": 0.92,
"step": 273
},
{
"epoch": 84.92,
"eval_accuracy": 0.43416370106761565,
"eval_loss": 1.5410619974136353,
"eval_runtime": 2.9845,
"eval_samples_per_second": 94.153,
"eval_steps_per_second": 1.005,
"step": 276
},
{
"epoch": 85.85,
"eval_accuracy": 0.43416370106761565,
"eval_loss": 1.5452691316604614,
"eval_runtime": 3.4013,
"eval_samples_per_second": 82.616,
"eval_steps_per_second": 0.882,
"step": 279
},
{
"epoch": 86.77,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.5611252784729004,
"eval_runtime": 2.9135,
"eval_samples_per_second": 96.447,
"eval_steps_per_second": 1.03,
"step": 282
},
{
"epoch": 88.0,
"eval_accuracy": 0.4199288256227758,
"eval_loss": 1.5766078233718872,
"eval_runtime": 2.8634,
"eval_samples_per_second": 98.135,
"eval_steps_per_second": 1.048,
"step": 286
},
{
"epoch": 88.92,
"eval_accuracy": 0.4199288256227758,
"eval_loss": 1.5781065225601196,
"eval_runtime": 3.1014,
"eval_samples_per_second": 90.606,
"eval_steps_per_second": 0.967,
"step": 289
},
{
"epoch": 89.85,
"eval_accuracy": 0.4234875444839858,
"eval_loss": 1.5674538612365723,
"eval_runtime": 3.5418,
"eval_samples_per_second": 79.339,
"eval_steps_per_second": 0.847,
"step": 292
},
{
"epoch": 90.77,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.558840036392212,
"eval_runtime": 4.5717,
"eval_samples_per_second": 61.464,
"eval_steps_per_second": 0.656,
"step": 295
},
{
"epoch": 92.0,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.5495978593826294,
"eval_runtime": 2.971,
"eval_samples_per_second": 94.581,
"eval_steps_per_second": 1.01,
"step": 299
},
{
"epoch": 92.31,
"grad_norm": 27984.919921875,
"learning_rate": 2.222222222222222e-05,
"loss": 1.0538,
"step": 300
},
{
"epoch": 92.92,
"eval_accuracy": 0.42704626334519574,
"eval_loss": 1.5492929220199585,
"eval_runtime": 3.229,
"eval_samples_per_second": 87.023,
"eval_steps_per_second": 0.929,
"step": 302
},
{
"epoch": 93.85,
"eval_accuracy": 0.4234875444839858,
"eval_loss": 1.5539740324020386,
"eval_runtime": 2.993,
"eval_samples_per_second": 93.886,
"eval_steps_per_second": 1.002,
"step": 305
},
{
"epoch": 94.77,
"eval_accuracy": 0.41637010676156583,
"eval_loss": 1.5620365142822266,
"eval_runtime": 3.5102,
"eval_samples_per_second": 80.052,
"eval_steps_per_second": 0.855,
"step": 308
},
{
"epoch": 96.0,
"eval_accuracy": 0.41637010676156583,
"eval_loss": 1.564751148223877,
"eval_runtime": 3.7132,
"eval_samples_per_second": 75.677,
"eval_steps_per_second": 0.808,
"step": 312
},
{
"epoch": 96.92,
"eval_accuracy": 0.41637010676156583,
"eval_loss": 1.561686396598816,
"eval_runtime": 4.9316,
"eval_samples_per_second": 56.98,
"eval_steps_per_second": 0.608,
"step": 315
},
{
"epoch": 97.85,
"eval_accuracy": 0.4234875444839858,
"eval_loss": 1.5461145639419556,
"eval_runtime": 3.1512,
"eval_samples_per_second": 89.173,
"eval_steps_per_second": 0.952,
"step": 318
},
{
"epoch": 98.77,
"eval_accuracy": 0.4306049822064057,
"eval_loss": 1.5348182916641235,
"eval_runtime": 4.3294,
"eval_samples_per_second": 64.906,
"eval_steps_per_second": 0.693,
"step": 321
},
{
"epoch": 100.0,
"eval_accuracy": 0.4306049822064057,
"eval_loss": 1.5345805883407593,
"eval_runtime": 3.3762,
"eval_samples_per_second": 83.23,
"eval_steps_per_second": 0.889,
"step": 325
},
{
"epoch": 100.92,
"eval_accuracy": 0.41637010676156583,
"eval_loss": 1.5465843677520752,
"eval_runtime": 3.8288,
"eval_samples_per_second": 73.391,
"eval_steps_per_second": 0.784,
"step": 328
},
{
"epoch": 101.85,
"eval_accuracy": 0.4128113879003559,
"eval_loss": 1.5547189712524414,
"eval_runtime": 4.3332,
"eval_samples_per_second": 64.848,
"eval_steps_per_second": 0.692,
"step": 331
},
{
"epoch": 102.77,
"eval_accuracy": 0.4128113879003559,
"eval_loss": 1.5559605360031128,
"eval_runtime": 3.2588,
"eval_samples_per_second": 86.229,
"eval_steps_per_second": 0.921,
"step": 334
},
{
"epoch": 104.0,
"eval_accuracy": 0.4306049822064057,
"eval_loss": 1.5315039157867432,
"eval_runtime": 4.5744,
"eval_samples_per_second": 61.429,
"eval_steps_per_second": 0.656,
"step": 338
},
{
"epoch": 104.92,
"eval_accuracy": 0.44483985765124556,
"eval_loss": 1.5124022960662842,
"eval_runtime": 3.3067,
"eval_samples_per_second": 84.979,
"eval_steps_per_second": 0.907,
"step": 341
},
{
"epoch": 105.85,
"eval_accuracy": 0.44483985765124556,
"eval_loss": 1.5044087171554565,
"eval_runtime": 3.9949,
"eval_samples_per_second": 70.341,
"eval_steps_per_second": 0.751,
"step": 344
},
{
"epoch": 106.77,
"eval_accuracy": 0.4483985765124555,
"eval_loss": 1.5010027885437012,
"eval_runtime": 3.5698,
"eval_samples_per_second": 78.716,
"eval_steps_per_second": 0.84,
"step": 347
},
{
"epoch": 108.0,
"eval_accuracy": 0.44483985765124556,
"eval_loss": 1.5004721879959106,
"eval_runtime": 2.9807,
"eval_samples_per_second": 94.273,
"eval_steps_per_second": 1.006,
"step": 351
},
{
"epoch": 108.92,
"eval_accuracy": 0.44483985765124556,
"eval_loss": 1.499153971672058,
"eval_runtime": 2.8868,
"eval_samples_per_second": 97.339,
"eval_steps_per_second": 1.039,
"step": 354
},
{
"epoch": 109.85,
"eval_accuracy": 0.4483985765124555,
"eval_loss": 1.4993938207626343,
"eval_runtime": 3.2052,
"eval_samples_per_second": 87.67,
"eval_steps_per_second": 0.936,
"step": 357
},
{
"epoch": 110.77,
"eval_accuracy": 0.45195729537366547,
"eval_loss": 1.4987653493881226,
"eval_runtime": 3.3473,
"eval_samples_per_second": 83.949,
"eval_steps_per_second": 0.896,
"step": 360
},
{
"epoch": 112.0,
"eval_accuracy": 0.46619217081850534,
"eval_loss": 1.5004514455795288,
"eval_runtime": 2.8714,
"eval_samples_per_second": 97.862,
"eval_steps_per_second": 1.045,
"step": 364
},
{
"epoch": 112.92,
"eval_accuracy": 0.47330960854092524,
"eval_loss": 1.5010361671447754,
"eval_runtime": 3.6886,
"eval_samples_per_second": 76.182,
"eval_steps_per_second": 0.813,
"step": 367
},
{
"epoch": 113.85,
"eval_accuracy": 0.4697508896797153,
"eval_loss": 1.4968541860580444,
"eval_runtime": 3.5621,
"eval_samples_per_second": 78.886,
"eval_steps_per_second": 0.842,
"step": 370
},
{
"epoch": 114.77,
"eval_accuracy": 0.47330960854092524,
"eval_loss": 1.4775702953338623,
"eval_runtime": 4.3842,
"eval_samples_per_second": 64.093,
"eval_steps_per_second": 0.684,
"step": 373
},
{
"epoch": 116.0,
"eval_accuracy": 0.47686832740213525,
"eval_loss": 1.4527899026870728,
"eval_runtime": 4.7808,
"eval_samples_per_second": 58.777,
"eval_steps_per_second": 0.628,
"step": 377
},
{
"epoch": 116.92,
"eval_accuracy": 0.49466192170818507,
"eval_loss": 1.4394866228103638,
"eval_runtime": 5.0753,
"eval_samples_per_second": 55.366,
"eval_steps_per_second": 0.591,
"step": 380
},
{
"epoch": 117.85,
"eval_accuracy": 0.498220640569395,
"eval_loss": 1.4310173988342285,
"eval_runtime": 4.758,
"eval_samples_per_second": 59.058,
"eval_steps_per_second": 0.631,
"step": 383
},
{
"epoch": 118.77,
"eval_accuracy": 0.49466192170818507,
"eval_loss": 1.4314603805541992,
"eval_runtime": 3.9673,
"eval_samples_per_second": 70.829,
"eval_steps_per_second": 0.756,
"step": 386
},
{
"epoch": 120.0,
"eval_accuracy": 0.49466192170818507,
"eval_loss": 1.4388599395751953,
"eval_runtime": 4.1069,
"eval_samples_per_second": 68.422,
"eval_steps_per_second": 0.73,
"step": 390
},
{
"epoch": 120.92,
"eval_accuracy": 0.498220640569395,
"eval_loss": 1.4374699592590332,
"eval_runtime": 5.1154,
"eval_samples_per_second": 54.933,
"eval_steps_per_second": 0.586,
"step": 393
},
{
"epoch": 121.85,
"eval_accuracy": 0.498220640569395,
"eval_loss": 1.4381343126296997,
"eval_runtime": 4.1133,
"eval_samples_per_second": 68.315,
"eval_steps_per_second": 0.729,
"step": 396
},
{
"epoch": 122.77,
"eval_accuracy": 0.498220640569395,
"eval_loss": 1.4246776103973389,
"eval_runtime": 3.9833,
"eval_samples_per_second": 70.544,
"eval_steps_per_second": 0.753,
"step": 399
},
{
"epoch": 123.08,
"grad_norm": 31388.482421875,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.8509,
"step": 400
},
{
"epoch": 124.0,
"eval_accuracy": 0.498220640569395,
"eval_loss": 1.4195659160614014,
"eval_runtime": 4.1654,
"eval_samples_per_second": 67.461,
"eval_steps_per_second": 0.72,
"step": 403
},
{
"epoch": 124.92,
"eval_accuracy": 0.505338078291815,
"eval_loss": 1.4178649187088013,
"eval_runtime": 5.0869,
"eval_samples_per_second": 55.239,
"eval_steps_per_second": 0.59,
"step": 406
},
{
"epoch": 125.85,
"eval_accuracy": 0.505338078291815,
"eval_loss": 1.40910804271698,
"eval_runtime": 4.5242,
"eval_samples_per_second": 62.11,
"eval_steps_per_second": 0.663,
"step": 409
},
{
"epoch": 126.77,
"eval_accuracy": 0.505338078291815,
"eval_loss": 1.3957635164260864,
"eval_runtime": 4.5377,
"eval_samples_per_second": 61.926,
"eval_steps_per_second": 0.661,
"step": 412
},
{
"epoch": 128.0,
"eval_accuracy": 0.5088967971530249,
"eval_loss": 1.3736003637313843,
"eval_runtime": 3.6994,
"eval_samples_per_second": 75.958,
"eval_steps_per_second": 0.811,
"step": 416
},
{
"epoch": 128.92,
"eval_accuracy": 0.5088967971530249,
"eval_loss": 1.3661431074142456,
"eval_runtime": 4.0248,
"eval_samples_per_second": 69.817,
"eval_steps_per_second": 0.745,
"step": 419
},
{
"epoch": 129.85,
"eval_accuracy": 0.5124555160142349,
"eval_loss": 1.369443416595459,
"eval_runtime": 4.9876,
"eval_samples_per_second": 56.34,
"eval_steps_per_second": 0.601,
"step": 422
},
{
"epoch": 130.77,
"eval_accuracy": 0.5124555160142349,
"eval_loss": 1.3807623386383057,
"eval_runtime": 3.5494,
"eval_samples_per_second": 79.169,
"eval_steps_per_second": 0.845,
"step": 425
},
{
"epoch": 132.0,
"eval_accuracy": 0.5124555160142349,
"eval_loss": 1.3818711042404175,
"eval_runtime": 3.9503,
"eval_samples_per_second": 71.134,
"eval_steps_per_second": 0.759,
"step": 429
},
{
"epoch": 132.92,
"eval_accuracy": 0.5124555160142349,
"eval_loss": 1.3859163522720337,
"eval_runtime": 4.2041,
"eval_samples_per_second": 66.84,
"eval_steps_per_second": 0.714,
"step": 432
},
{
"epoch": 133.85,
"eval_accuracy": 0.5231316725978647,
"eval_loss": 1.378004789352417,
"eval_runtime": 3.8384,
"eval_samples_per_second": 73.208,
"eval_steps_per_second": 0.782,
"step": 435
},
{
"epoch": 134.77,
"eval_accuracy": 0.5231316725978647,
"eval_loss": 1.3696413040161133,
"eval_runtime": 4.6334,
"eval_samples_per_second": 60.646,
"eval_steps_per_second": 0.647,
"step": 438
},
{
"epoch": 136.0,
"eval_accuracy": 0.5302491103202847,
"eval_loss": 1.3564013242721558,
"eval_runtime": 4.002,
"eval_samples_per_second": 70.215,
"eval_steps_per_second": 0.75,
"step": 442
},
{
"epoch": 136.92,
"eval_accuracy": 0.5338078291814946,
"eval_loss": 1.3421210050582886,
"eval_runtime": 4.0161,
"eval_samples_per_second": 69.968,
"eval_steps_per_second": 0.747,
"step": 445
},
{
"epoch": 137.85,
"eval_accuracy": 0.5373665480427047,
"eval_loss": 1.325627326965332,
"eval_runtime": 4.156,
"eval_samples_per_second": 67.613,
"eval_steps_per_second": 0.722,
"step": 448
},
{
"epoch": 138.77,
"eval_accuracy": 0.5373665480427047,
"eval_loss": 1.3274290561676025,
"eval_runtime": 3.9911,
"eval_samples_per_second": 70.407,
"eval_steps_per_second": 0.752,
"step": 451
},
{
"epoch": 140.0,
"eval_accuracy": 0.5409252669039146,
"eval_loss": 1.3401566743850708,
"eval_runtime": 4.4088,
"eval_samples_per_second": 63.736,
"eval_steps_per_second": 0.68,
"step": 455
},
{
"epoch": 140.92,
"eval_accuracy": 0.5409252669039146,
"eval_loss": 1.351689338684082,
"eval_runtime": 4.4409,
"eval_samples_per_second": 63.276,
"eval_steps_per_second": 0.676,
"step": 458
},
{
"epoch": 141.85,
"eval_accuracy": 0.5409252669039146,
"eval_loss": 1.3585495948791504,
"eval_runtime": 3.7955,
"eval_samples_per_second": 74.035,
"eval_steps_per_second": 0.79,
"step": 461
},
{
"epoch": 142.77,
"eval_accuracy": 0.5373665480427047,
"eval_loss": 1.3592112064361572,
"eval_runtime": 3.3552,
"eval_samples_per_second": 83.75,
"eval_steps_per_second": 0.894,
"step": 464
},
{
"epoch": 144.0,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.3329293727874756,
"eval_runtime": 5.3044,
"eval_samples_per_second": 52.975,
"eval_steps_per_second": 0.566,
"step": 468
},
{
"epoch": 144.92,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.312560796737671,
"eval_runtime": 4.319,
"eval_samples_per_second": 65.061,
"eval_steps_per_second": 0.695,
"step": 471
},
{
"epoch": 145.85,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3075566291809082,
"eval_runtime": 3.9528,
"eval_samples_per_second": 71.09,
"eval_steps_per_second": 0.759,
"step": 474
},
{
"epoch": 146.77,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.3146412372589111,
"eval_runtime": 4.3249,
"eval_samples_per_second": 64.973,
"eval_steps_per_second": 0.694,
"step": 477
},
{
"epoch": 148.0,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3345069885253906,
"eval_runtime": 3.9127,
"eval_samples_per_second": 71.817,
"eval_steps_per_second": 0.767,
"step": 481
},
{
"epoch": 148.92,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3408929109573364,
"eval_runtime": 4.1463,
"eval_samples_per_second": 67.771,
"eval_steps_per_second": 0.724,
"step": 484
},
{
"epoch": 149.85,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3374032974243164,
"eval_runtime": 4.2345,
"eval_samples_per_second": 66.359,
"eval_steps_per_second": 0.708,
"step": 487
},
{
"epoch": 150.77,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.3227189779281616,
"eval_runtime": 4.3006,
"eval_samples_per_second": 65.339,
"eval_steps_per_second": 0.698,
"step": 490
},
{
"epoch": 152.0,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3200651407241821,
"eval_runtime": 4.4216,
"eval_samples_per_second": 63.551,
"eval_steps_per_second": 0.678,
"step": 494
},
{
"epoch": 152.92,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3174102306365967,
"eval_runtime": 4.4898,
"eval_samples_per_second": 62.586,
"eval_steps_per_second": 0.668,
"step": 497
},
{
"epoch": 153.85,
"grad_norm": 24984.7734375,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.7118,
"step": 500
},
{
"epoch": 153.85,
"eval_accuracy": 0.5444839857651246,
"eval_loss": 1.3073471784591675,
"eval_runtime": 4.2385,
"eval_samples_per_second": 66.297,
"eval_steps_per_second": 0.708,
"step": 500
},
{
"epoch": 154.77,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.2983657121658325,
"eval_runtime": 3.4569,
"eval_samples_per_second": 81.286,
"eval_steps_per_second": 0.868,
"step": 503
},
{
"epoch": 156.0,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.2974605560302734,
"eval_runtime": 4.3467,
"eval_samples_per_second": 64.647,
"eval_steps_per_second": 0.69,
"step": 507
},
{
"epoch": 156.92,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.3027478456497192,
"eval_runtime": 4.5106,
"eval_samples_per_second": 62.297,
"eval_steps_per_second": 0.665,
"step": 510
},
{
"epoch": 157.85,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.3088507652282715,
"eval_runtime": 4.2508,
"eval_samples_per_second": 66.105,
"eval_steps_per_second": 0.706,
"step": 513
},
{
"epoch": 158.77,
"eval_accuracy": 0.5480427046263345,
"eval_loss": 1.3138750791549683,
"eval_runtime": 4.4205,
"eval_samples_per_second": 63.567,
"eval_steps_per_second": 0.679,
"step": 516
},
{
"epoch": 160.0,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.3067928552627563,
"eval_runtime": 4.2488,
"eval_samples_per_second": 66.136,
"eval_steps_per_second": 0.706,
"step": 520
},
{
"epoch": 160.92,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.3011025190353394,
"eval_runtime": 4.3025,
"eval_samples_per_second": 65.31,
"eval_steps_per_second": 0.697,
"step": 523
},
{
"epoch": 161.85,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.2957364320755005,
"eval_runtime": 4.3812,
"eval_samples_per_second": 64.137,
"eval_steps_per_second": 0.685,
"step": 526
},
{
"epoch": 162.77,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.296021819114685,
"eval_runtime": 4.6921,
"eval_samples_per_second": 59.887,
"eval_steps_per_second": 0.639,
"step": 529
},
{
"epoch": 164.0,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.3158953189849854,
"eval_runtime": 4.7452,
"eval_samples_per_second": 59.218,
"eval_steps_per_second": 0.632,
"step": 533
},
{
"epoch": 164.92,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.3257168531417847,
"eval_runtime": 4.4128,
"eval_samples_per_second": 63.678,
"eval_steps_per_second": 0.68,
"step": 536
},
{
"epoch": 165.85,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.3312301635742188,
"eval_runtime": 3.6447,
"eval_samples_per_second": 77.099,
"eval_steps_per_second": 0.823,
"step": 539
},
{
"epoch": 166.77,
"eval_accuracy": 0.5516014234875445,
"eval_loss": 1.322218418121338,
"eval_runtime": 4.2773,
"eval_samples_per_second": 65.695,
"eval_steps_per_second": 0.701,
"step": 542
},
{
"epoch": 168.0,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.298622488975525,
"eval_runtime": 4.5789,
"eval_samples_per_second": 61.369,
"eval_steps_per_second": 0.655,
"step": 546
},
{
"epoch": 168.92,
"eval_accuracy": 0.5587188612099644,
"eval_loss": 1.289797306060791,
"eval_runtime": 4.5328,
"eval_samples_per_second": 61.993,
"eval_steps_per_second": 0.662,
"step": 549
},
{
"epoch": 169.85,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.2937852144241333,
"eval_runtime": 3.7509,
"eval_samples_per_second": 74.915,
"eval_steps_per_second": 0.8,
"step": 552
},
{
"epoch": 170.77,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.290231704711914,
"eval_runtime": 4.1153,
"eval_samples_per_second": 68.282,
"eval_steps_per_second": 0.729,
"step": 555
},
{
"epoch": 172.0,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.287913203239441,
"eval_runtime": 4.7912,
"eval_samples_per_second": 58.649,
"eval_steps_per_second": 0.626,
"step": 559
},
{
"epoch": 172.92,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.283803939819336,
"eval_runtime": 4.5456,
"eval_samples_per_second": 61.818,
"eval_steps_per_second": 0.66,
"step": 562
},
{
"epoch": 173.85,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2811965942382812,
"eval_runtime": 4.4869,
"eval_samples_per_second": 62.627,
"eval_steps_per_second": 0.669,
"step": 565
},
{
"epoch": 174.77,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2863661050796509,
"eval_runtime": 4.2715,
"eval_samples_per_second": 65.785,
"eval_steps_per_second": 0.702,
"step": 568
},
{
"epoch": 176.0,
"eval_accuracy": 0.5551601423487544,
"eval_loss": 1.2934131622314453,
"eval_runtime": 4.643,
"eval_samples_per_second": 60.522,
"eval_steps_per_second": 0.646,
"step": 572
},
{
"epoch": 176.92,
"eval_accuracy": 0.5587188612099644,
"eval_loss": 1.2940202951431274,
"eval_runtime": 4.2681,
"eval_samples_per_second": 65.837,
"eval_steps_per_second": 0.703,
"step": 575
},
{
"epoch": 177.85,
"eval_accuracy": 0.5587188612099644,
"eval_loss": 1.298832654953003,
"eval_runtime": 4.2991,
"eval_samples_per_second": 65.363,
"eval_steps_per_second": 0.698,
"step": 578
},
{
"epoch": 178.77,
"eval_accuracy": 0.5622775800711743,
"eval_loss": 1.295286774635315,
"eval_runtime": 4.1989,
"eval_samples_per_second": 66.922,
"eval_steps_per_second": 0.714,
"step": 581
},
{
"epoch": 180.0,
"eval_accuracy": 0.5587188612099644,
"eval_loss": 1.2971975803375244,
"eval_runtime": 4.7188,
"eval_samples_per_second": 59.549,
"eval_steps_per_second": 0.636,
"step": 585
},
{
"epoch": 180.92,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2936004400253296,
"eval_runtime": 4.813,
"eval_samples_per_second": 58.383,
"eval_steps_per_second": 0.623,
"step": 588
},
{
"epoch": 181.85,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2928047180175781,
"eval_runtime": 4.1735,
"eval_samples_per_second": 67.33,
"eval_steps_per_second": 0.719,
"step": 591
},
{
"epoch": 182.77,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.291295051574707,
"eval_runtime": 4.4694,
"eval_samples_per_second": 62.872,
"eval_steps_per_second": 0.671,
"step": 594
},
{
"epoch": 184.0,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2824889421463013,
"eval_runtime": 4.0765,
"eval_samples_per_second": 68.932,
"eval_steps_per_second": 0.736,
"step": 598
},
{
"epoch": 184.62,
"grad_norm": 29892.71484375,
"learning_rate": 1.111111111111111e-05,
"loss": 0.6473,
"step": 600
},
{
"epoch": 184.92,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2735832929611206,
"eval_runtime": 4.6704,
"eval_samples_per_second": 60.166,
"eval_steps_per_second": 0.642,
"step": 601
},
{
"epoch": 185.85,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2714898586273193,
"eval_runtime": 4.6432,
"eval_samples_per_second": 60.519,
"eval_steps_per_second": 0.646,
"step": 604
},
{
"epoch": 186.77,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2703534364700317,
"eval_runtime": 4.1853,
"eval_samples_per_second": 67.139,
"eval_steps_per_second": 0.717,
"step": 607
},
{
"epoch": 188.0,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2716755867004395,
"eval_runtime": 4.1775,
"eval_samples_per_second": 67.265,
"eval_steps_per_second": 0.718,
"step": 611
},
{
"epoch": 188.92,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2724348306655884,
"eval_runtime": 4.9312,
"eval_samples_per_second": 56.984,
"eval_steps_per_second": 0.608,
"step": 614
},
{
"epoch": 189.85,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2763242721557617,
"eval_runtime": 3.5712,
"eval_samples_per_second": 78.685,
"eval_steps_per_second": 0.84,
"step": 617
},
{
"epoch": 190.77,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2811599969863892,
"eval_runtime": 4.2324,
"eval_samples_per_second": 66.393,
"eval_steps_per_second": 0.709,
"step": 620
},
{
"epoch": 192.0,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2791301012039185,
"eval_runtime": 4.4625,
"eval_samples_per_second": 62.97,
"eval_steps_per_second": 0.672,
"step": 624
},
{
"epoch": 192.92,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2697654962539673,
"eval_runtime": 4.2766,
"eval_samples_per_second": 65.707,
"eval_steps_per_second": 0.701,
"step": 627
},
{
"epoch": 193.85,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.269476294517517,
"eval_runtime": 4.2862,
"eval_samples_per_second": 65.56,
"eval_steps_per_second": 0.7,
"step": 630
},
{
"epoch": 194.77,
"eval_accuracy": 0.5693950177935944,
"eval_loss": 1.2703962326049805,
"eval_runtime": 4.2135,
"eval_samples_per_second": 66.69,
"eval_steps_per_second": 0.712,
"step": 633
},
{
"epoch": 196.0,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2736749649047852,
"eval_runtime": 4.1666,
"eval_samples_per_second": 67.441,
"eval_steps_per_second": 0.72,
"step": 637
},
{
"epoch": 196.92,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2782082557678223,
"eval_runtime": 4.3682,
"eval_samples_per_second": 64.329,
"eval_steps_per_second": 0.687,
"step": 640
},
{
"epoch": 197.85,
"eval_accuracy": 0.5622775800711743,
"eval_loss": 1.2813825607299805,
"eval_runtime": 5.6488,
"eval_samples_per_second": 49.745,
"eval_steps_per_second": 0.531,
"step": 643
},
{
"epoch": 198.77,
"eval_accuracy": 0.5622775800711743,
"eval_loss": 1.2819089889526367,
"eval_runtime": 5.1916,
"eval_samples_per_second": 54.126,
"eval_steps_per_second": 0.578,
"step": 646
},
{
"epoch": 200.0,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.274595022201538,
"eval_runtime": 4.4378,
"eval_samples_per_second": 63.32,
"eval_steps_per_second": 0.676,
"step": 650
},
{
"epoch": 200.92,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2694467306137085,
"eval_runtime": 4.797,
"eval_samples_per_second": 58.579,
"eval_steps_per_second": 0.625,
"step": 653
},
{
"epoch": 201.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.262547254562378,
"eval_runtime": 4.6991,
"eval_samples_per_second": 59.798,
"eval_steps_per_second": 0.638,
"step": 656
},
{
"epoch": 202.77,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2575123310089111,
"eval_runtime": 4.9663,
"eval_samples_per_second": 56.582,
"eval_steps_per_second": 0.604,
"step": 659
},
{
"epoch": 204.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2548755407333374,
"eval_runtime": 5.2012,
"eval_samples_per_second": 54.026,
"eval_steps_per_second": 0.577,
"step": 663
},
{
"epoch": 204.92,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2623133659362793,
"eval_runtime": 4.5347,
"eval_samples_per_second": 61.967,
"eval_steps_per_second": 0.662,
"step": 666
},
{
"epoch": 205.85,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2665455341339111,
"eval_runtime": 3.1603,
"eval_samples_per_second": 88.917,
"eval_steps_per_second": 0.949,
"step": 669
},
{
"epoch": 206.77,
"eval_accuracy": 0.5658362989323843,
"eval_loss": 1.2684026956558228,
"eval_runtime": 4.2009,
"eval_samples_per_second": 66.89,
"eval_steps_per_second": 0.714,
"step": 672
},
{
"epoch": 208.0,
"eval_accuracy": 0.5622775800711743,
"eval_loss": 1.277047038078308,
"eval_runtime": 4.3489,
"eval_samples_per_second": 64.613,
"eval_steps_per_second": 0.69,
"step": 676
},
{
"epoch": 208.92,
"eval_accuracy": 0.5622775800711743,
"eval_loss": 1.2807551622390747,
"eval_runtime": 3.8563,
"eval_samples_per_second": 72.867,
"eval_steps_per_second": 0.778,
"step": 679
},
{
"epoch": 209.85,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2761532068252563,
"eval_runtime": 4.7161,
"eval_samples_per_second": 59.583,
"eval_steps_per_second": 0.636,
"step": 682
},
{
"epoch": 210.77,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2759194374084473,
"eval_runtime": 5.0376,
"eval_samples_per_second": 55.781,
"eval_steps_per_second": 0.596,
"step": 685
},
{
"epoch": 212.0,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2752187252044678,
"eval_runtime": 4.5842,
"eval_samples_per_second": 61.297,
"eval_steps_per_second": 0.654,
"step": 689
},
{
"epoch": 212.92,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.275394082069397,
"eval_runtime": 4.2209,
"eval_samples_per_second": 66.573,
"eval_steps_per_second": 0.711,
"step": 692
},
{
"epoch": 213.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.272161602973938,
"eval_runtime": 4.7348,
"eval_samples_per_second": 59.347,
"eval_steps_per_second": 0.634,
"step": 695
},
{
"epoch": 214.77,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.273858904838562,
"eval_runtime": 4.0254,
"eval_samples_per_second": 69.808,
"eval_steps_per_second": 0.745,
"step": 698
},
{
"epoch": 215.38,
"grad_norm": 28098.056640625,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.613,
"step": 700
},
{
"epoch": 216.0,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2782981395721436,
"eval_runtime": 4.6723,
"eval_samples_per_second": 60.142,
"eval_steps_per_second": 0.642,
"step": 702
},
{
"epoch": 216.92,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2774933576583862,
"eval_runtime": 4.576,
"eval_samples_per_second": 61.407,
"eval_steps_per_second": 0.656,
"step": 705
},
{
"epoch": 217.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2740654945373535,
"eval_runtime": 4.8253,
"eval_samples_per_second": 58.234,
"eval_steps_per_second": 0.622,
"step": 708
},
{
"epoch": 218.77,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2705509662628174,
"eval_runtime": 4.386,
"eval_samples_per_second": 64.067,
"eval_steps_per_second": 0.684,
"step": 711
},
{
"epoch": 220.0,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2627956867218018,
"eval_runtime": 4.2817,
"eval_samples_per_second": 65.628,
"eval_steps_per_second": 0.701,
"step": 715
},
{
"epoch": 220.92,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2580970525741577,
"eval_runtime": 3.9386,
"eval_samples_per_second": 71.344,
"eval_steps_per_second": 0.762,
"step": 718
},
{
"epoch": 221.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2567566633224487,
"eval_runtime": 4.3353,
"eval_samples_per_second": 64.817,
"eval_steps_per_second": 0.692,
"step": 721
},
{
"epoch": 222.77,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2558982372283936,
"eval_runtime": 3.7135,
"eval_samples_per_second": 75.67,
"eval_steps_per_second": 0.808,
"step": 724
},
{
"epoch": 224.0,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2502700090408325,
"eval_runtime": 5.0636,
"eval_samples_per_second": 55.494,
"eval_steps_per_second": 0.592,
"step": 728
},
{
"epoch": 224.92,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2497973442077637,
"eval_runtime": 4.7669,
"eval_samples_per_second": 58.948,
"eval_steps_per_second": 0.629,
"step": 731
},
{
"epoch": 225.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2500195503234863,
"eval_runtime": 3.9522,
"eval_samples_per_second": 71.099,
"eval_steps_per_second": 0.759,
"step": 734
},
{
"epoch": 226.77,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2490234375,
"eval_runtime": 4.1869,
"eval_samples_per_second": 67.114,
"eval_steps_per_second": 0.717,
"step": 737
},
{
"epoch": 228.0,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2531741857528687,
"eval_runtime": 3.9865,
"eval_samples_per_second": 70.489,
"eval_steps_per_second": 0.753,
"step": 741
},
{
"epoch": 228.92,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2572293281555176,
"eval_runtime": 5.2241,
"eval_samples_per_second": 53.789,
"eval_steps_per_second": 0.574,
"step": 744
},
{
"epoch": 229.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2598803043365479,
"eval_runtime": 4.1402,
"eval_samples_per_second": 67.87,
"eval_steps_per_second": 0.725,
"step": 747
},
{
"epoch": 230.77,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2600898742675781,
"eval_runtime": 3.9785,
"eval_samples_per_second": 70.63,
"eval_steps_per_second": 0.754,
"step": 750
},
{
"epoch": 232.0,
"eval_accuracy": 0.5729537366548043,
"eval_loss": 1.2625129222869873,
"eval_runtime": 4.1458,
"eval_samples_per_second": 67.779,
"eval_steps_per_second": 0.724,
"step": 754
},
{
"epoch": 232.92,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2635974884033203,
"eval_runtime": 4.5032,
"eval_samples_per_second": 62.401,
"eval_steps_per_second": 0.666,
"step": 757
},
{
"epoch": 233.85,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2629433870315552,
"eval_runtime": 4.1399,
"eval_samples_per_second": 67.876,
"eval_steps_per_second": 0.725,
"step": 760
},
{
"epoch": 234.77,
"eval_accuracy": 0.5765124555160143,
"eval_loss": 1.2600425481796265,
"eval_runtime": 4.2407,
"eval_samples_per_second": 66.263,
"eval_steps_per_second": 0.707,
"step": 763
},
{
"epoch": 236.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2558783292770386,
"eval_runtime": 4.1208,
"eval_samples_per_second": 68.19,
"eval_steps_per_second": 0.728,
"step": 767
},
{
"epoch": 236.92,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2534478902816772,
"eval_runtime": 3.8139,
"eval_samples_per_second": 73.678,
"eval_steps_per_second": 0.787,
"step": 770
},
{
"epoch": 237.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2513927221298218,
"eval_runtime": 4.6813,
"eval_samples_per_second": 60.026,
"eval_steps_per_second": 0.641,
"step": 773
},
{
"epoch": 238.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2508091926574707,
"eval_runtime": 4.2671,
"eval_samples_per_second": 65.852,
"eval_steps_per_second": 0.703,
"step": 776
},
{
"epoch": 240.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2487518787384033,
"eval_runtime": 3.7642,
"eval_samples_per_second": 74.651,
"eval_steps_per_second": 0.797,
"step": 780
},
{
"epoch": 240.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2483351230621338,
"eval_runtime": 4.8941,
"eval_samples_per_second": 57.416,
"eval_steps_per_second": 0.613,
"step": 783
},
{
"epoch": 241.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2500139474868774,
"eval_runtime": 4.274,
"eval_samples_per_second": 65.746,
"eval_steps_per_second": 0.702,
"step": 786
},
{
"epoch": 242.77,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2503968477249146,
"eval_runtime": 4.4982,
"eval_samples_per_second": 62.469,
"eval_steps_per_second": 0.667,
"step": 789
},
{
"epoch": 244.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2521419525146484,
"eval_runtime": 4.0413,
"eval_samples_per_second": 69.532,
"eval_steps_per_second": 0.742,
"step": 793
},
{
"epoch": 244.92,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2532862424850464,
"eval_runtime": 4.1262,
"eval_samples_per_second": 68.101,
"eval_steps_per_second": 0.727,
"step": 796
},
{
"epoch": 245.85,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.251287817955017,
"eval_runtime": 3.9321,
"eval_samples_per_second": 71.463,
"eval_steps_per_second": 0.763,
"step": 799
},
{
"epoch": 246.15,
"grad_norm": 63046.29296875,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.5946,
"step": 800
},
{
"epoch": 246.77,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2513457536697388,
"eval_runtime": 4.1155,
"eval_samples_per_second": 68.279,
"eval_steps_per_second": 0.729,
"step": 802
},
{
"epoch": 248.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2507133483886719,
"eval_runtime": 4.3807,
"eval_samples_per_second": 64.145,
"eval_steps_per_second": 0.685,
"step": 806
},
{
"epoch": 248.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2491704225540161,
"eval_runtime": 4.0611,
"eval_samples_per_second": 69.193,
"eval_steps_per_second": 0.739,
"step": 809
},
{
"epoch": 249.85,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2499818801879883,
"eval_runtime": 3.9673,
"eval_samples_per_second": 70.828,
"eval_steps_per_second": 0.756,
"step": 812
},
{
"epoch": 250.77,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2505466938018799,
"eval_runtime": 4.5211,
"eval_samples_per_second": 62.153,
"eval_steps_per_second": 0.664,
"step": 815
},
{
"epoch": 252.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2519145011901855,
"eval_runtime": 5.2859,
"eval_samples_per_second": 53.16,
"eval_steps_per_second": 0.568,
"step": 819
},
{
"epoch": 252.92,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.253113865852356,
"eval_runtime": 4.0658,
"eval_samples_per_second": 69.113,
"eval_steps_per_second": 0.738,
"step": 822
},
{
"epoch": 253.85,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2538248300552368,
"eval_runtime": 4.1084,
"eval_samples_per_second": 68.396,
"eval_steps_per_second": 0.73,
"step": 825
},
{
"epoch": 254.77,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2532281875610352,
"eval_runtime": 4.0615,
"eval_samples_per_second": 69.186,
"eval_steps_per_second": 0.739,
"step": 828
},
{
"epoch": 256.0,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.2527676820755005,
"eval_runtime": 4.6892,
"eval_samples_per_second": 59.925,
"eval_steps_per_second": 0.64,
"step": 832
},
{
"epoch": 256.92,
"eval_accuracy": 0.5800711743772242,
"eval_loss": 1.252835988998413,
"eval_runtime": 3.7759,
"eval_samples_per_second": 74.42,
"eval_steps_per_second": 0.795,
"step": 835
},
{
"epoch": 257.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2521347999572754,
"eval_runtime": 3.5788,
"eval_samples_per_second": 78.519,
"eval_steps_per_second": 0.838,
"step": 838
},
{
"epoch": 258.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.252551555633545,
"eval_runtime": 3.9885,
"eval_samples_per_second": 70.452,
"eval_steps_per_second": 0.752,
"step": 841
},
{
"epoch": 260.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2527978420257568,
"eval_runtime": 3.7855,
"eval_samples_per_second": 74.231,
"eval_steps_per_second": 0.792,
"step": 845
},
{
"epoch": 260.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2529038190841675,
"eval_runtime": 3.8445,
"eval_samples_per_second": 73.091,
"eval_steps_per_second": 0.78,
"step": 848
},
{
"epoch": 261.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2528387308120728,
"eval_runtime": 3.7891,
"eval_samples_per_second": 74.16,
"eval_steps_per_second": 0.792,
"step": 851
},
{
"epoch": 262.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2516640424728394,
"eval_runtime": 4.2868,
"eval_samples_per_second": 65.55,
"eval_steps_per_second": 0.7,
"step": 854
},
{
"epoch": 264.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.251232385635376,
"eval_runtime": 3.7886,
"eval_samples_per_second": 74.169,
"eval_steps_per_second": 0.792,
"step": 858
},
{
"epoch": 264.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.251160979270935,
"eval_runtime": 4.4131,
"eval_samples_per_second": 63.674,
"eval_steps_per_second": 0.68,
"step": 861
},
{
"epoch": 265.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2503511905670166,
"eval_runtime": 3.8431,
"eval_samples_per_second": 73.118,
"eval_steps_per_second": 0.781,
"step": 864
},
{
"epoch": 266.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2499034404754639,
"eval_runtime": 3.7109,
"eval_samples_per_second": 75.723,
"eval_steps_per_second": 0.808,
"step": 867
},
{
"epoch": 268.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2496285438537598,
"eval_runtime": 4.0777,
"eval_samples_per_second": 68.912,
"eval_steps_per_second": 0.736,
"step": 871
},
{
"epoch": 268.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2497419118881226,
"eval_runtime": 4.4854,
"eval_samples_per_second": 62.648,
"eval_steps_per_second": 0.669,
"step": 874
},
{
"epoch": 269.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2500321865081787,
"eval_runtime": 5.2487,
"eval_samples_per_second": 53.537,
"eval_steps_per_second": 0.572,
"step": 877
},
{
"epoch": 270.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.250011682510376,
"eval_runtime": 4.3951,
"eval_samples_per_second": 63.935,
"eval_steps_per_second": 0.683,
"step": 880
},
{
"epoch": 272.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2498865127563477,
"eval_runtime": 4.1755,
"eval_samples_per_second": 67.297,
"eval_steps_per_second": 0.718,
"step": 884
},
{
"epoch": 272.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2500803470611572,
"eval_runtime": 4.6562,
"eval_samples_per_second": 60.349,
"eval_steps_per_second": 0.644,
"step": 887
},
{
"epoch": 273.85,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2503583431243896,
"eval_runtime": 4.5464,
"eval_samples_per_second": 61.807,
"eval_steps_per_second": 0.66,
"step": 890
},
{
"epoch": 274.77,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2506159543991089,
"eval_runtime": 3.9624,
"eval_samples_per_second": 70.917,
"eval_steps_per_second": 0.757,
"step": 893
},
{
"epoch": 276.0,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2505924701690674,
"eval_runtime": 4.0033,
"eval_samples_per_second": 70.192,
"eval_steps_per_second": 0.749,
"step": 897
},
{
"epoch": 276.92,
"grad_norm": 30700.47265625,
"learning_rate": 0.0,
"loss": 0.588,
"step": 900
},
{
"epoch": 276.92,
"eval_accuracy": 0.5836298932384342,
"eval_loss": 1.2505559921264648,
"eval_runtime": 4.7936,
"eval_samples_per_second": 58.62,
"eval_steps_per_second": 0.626,
"step": 900
},
{
"epoch": 276.92,
"step": 900,
"total_flos": 3.755576946691584e+18,
"train_loss": 0.8810926691691081,
"train_runtime": 3759.0782,
"train_samples_per_second": 123.541,
"train_steps_per_second": 0.239
}
],
"logging_steps": 100,
"max_steps": 900,
"num_input_tokens_seen": 0,
"num_train_epochs": 300,
"save_steps": 500,
"total_flos": 3.755576946691584e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}