SW2-TO-DA / trainer_state.json
Augusto777's picture
End of training
c5c8d69 verified
{
"best_metric": 0.9193548387096774,
"best_model_checkpoint": "SW2-TO-DA\\checkpoint-130",
"epoch": 38.62068965517241,
"eval_steps": 500,
"global_step": 560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.69,
"learning_rate": 2.6785714285714284e-05,
"loss": 1.4955,
"step": 10
},
{
"epoch": 0.97,
"eval_accuracy": 0.08064516129032258,
"eval_loss": 1.558031439781189,
"eval_runtime": 2.4051,
"eval_samples_per_second": 25.778,
"eval_steps_per_second": 1.663,
"step": 14
},
{
"epoch": 1.38,
"learning_rate": 5.357142857142857e-05,
"loss": 1.3943,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.6451612903225806,
"eval_loss": 1.1315828561782837,
"eval_runtime": 1.9485,
"eval_samples_per_second": 31.82,
"eval_steps_per_second": 2.053,
"step": 29
},
{
"epoch": 2.07,
"learning_rate": 8.035714285714285e-05,
"loss": 1.2678,
"step": 30
},
{
"epoch": 2.76,
"learning_rate": 0.00010714285714285714,
"loss": 1.0056,
"step": 40
},
{
"epoch": 2.97,
"eval_accuracy": 0.7419354838709677,
"eval_loss": 0.6406522393226624,
"eval_runtime": 2.1315,
"eval_samples_per_second": 29.087,
"eval_steps_per_second": 1.877,
"step": 43
},
{
"epoch": 3.45,
"learning_rate": 0.00013392857142857144,
"loss": 0.7744,
"step": 50
},
{
"epoch": 4.0,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.4265057146549225,
"eval_runtime": 1.9565,
"eval_samples_per_second": 31.69,
"eval_steps_per_second": 2.045,
"step": 58
},
{
"epoch": 4.14,
"learning_rate": 0.0001488095238095238,
"loss": 0.7109,
"step": 60
},
{
"epoch": 4.83,
"learning_rate": 0.00014583333333333332,
"loss": 0.6022,
"step": 70
},
{
"epoch": 4.97,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.4360726773738861,
"eval_runtime": 1.949,
"eval_samples_per_second": 31.812,
"eval_steps_per_second": 2.052,
"step": 72
},
{
"epoch": 5.52,
"learning_rate": 0.00014285714285714284,
"loss": 0.5854,
"step": 80
},
{
"epoch": 6.0,
"eval_accuracy": 0.8064516129032258,
"eval_loss": 0.5508103370666504,
"eval_runtime": 1.948,
"eval_samples_per_second": 31.828,
"eval_steps_per_second": 2.053,
"step": 87
},
{
"epoch": 6.21,
"learning_rate": 0.00013988095238095236,
"loss": 0.5151,
"step": 90
},
{
"epoch": 6.9,
"learning_rate": 0.00013690476190476189,
"loss": 0.4581,
"step": 100
},
{
"epoch": 6.97,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.3123740255832672,
"eval_runtime": 1.9775,
"eval_samples_per_second": 31.353,
"eval_steps_per_second": 2.023,
"step": 101
},
{
"epoch": 7.59,
"learning_rate": 0.00013392857142857144,
"loss": 0.386,
"step": 110
},
{
"epoch": 8.0,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.31687411665916443,
"eval_runtime": 2.0035,
"eval_samples_per_second": 30.946,
"eval_steps_per_second": 1.997,
"step": 116
},
{
"epoch": 8.28,
"learning_rate": 0.00013095238095238093,
"loss": 0.4182,
"step": 120
},
{
"epoch": 8.97,
"learning_rate": 0.00012797619047619045,
"loss": 0.347,
"step": 130
},
{
"epoch": 8.97,
"eval_accuracy": 0.9193548387096774,
"eval_loss": 0.22072885930538177,
"eval_runtime": 2.034,
"eval_samples_per_second": 30.482,
"eval_steps_per_second": 1.967,
"step": 130
},
{
"epoch": 9.66,
"learning_rate": 0.000125,
"loss": 0.3873,
"step": 140
},
{
"epoch": 10.0,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.5968738198280334,
"eval_runtime": 1.986,
"eval_samples_per_second": 31.219,
"eval_steps_per_second": 2.014,
"step": 145
},
{
"epoch": 10.34,
"learning_rate": 0.00012202380952380951,
"loss": 0.3508,
"step": 150
},
{
"epoch": 10.97,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.342462956905365,
"eval_runtime": 1.981,
"eval_samples_per_second": 31.298,
"eval_steps_per_second": 2.019,
"step": 159
},
{
"epoch": 11.03,
"learning_rate": 0.00011904761904761903,
"loss": 0.3437,
"step": 160
},
{
"epoch": 11.72,
"learning_rate": 0.00011607142857142857,
"loss": 0.274,
"step": 170
},
{
"epoch": 12.0,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.3376210927963257,
"eval_runtime": 2.0573,
"eval_samples_per_second": 30.136,
"eval_steps_per_second": 1.944,
"step": 174
},
{
"epoch": 12.41,
"learning_rate": 0.00011309523809523808,
"loss": 0.2615,
"step": 180
},
{
"epoch": 12.97,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.4912601709365845,
"eval_runtime": 2.019,
"eval_samples_per_second": 30.709,
"eval_steps_per_second": 1.981,
"step": 188
},
{
"epoch": 13.1,
"learning_rate": 0.0001101190476190476,
"loss": 0.2471,
"step": 190
},
{
"epoch": 13.79,
"learning_rate": 0.00010714285714285714,
"loss": 0.3118,
"step": 200
},
{
"epoch": 14.0,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.40341296792030334,
"eval_runtime": 1.9425,
"eval_samples_per_second": 31.918,
"eval_steps_per_second": 2.059,
"step": 203
},
{
"epoch": 14.48,
"learning_rate": 0.00010416666666666666,
"loss": 0.2205,
"step": 210
},
{
"epoch": 14.97,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.3167090117931366,
"eval_runtime": 1.966,
"eval_samples_per_second": 31.537,
"eval_steps_per_second": 2.035,
"step": 217
},
{
"epoch": 15.17,
"learning_rate": 0.00010119047619047618,
"loss": 0.2102,
"step": 220
},
{
"epoch": 15.86,
"learning_rate": 9.82142857142857e-05,
"loss": 0.2325,
"step": 230
},
{
"epoch": 16.0,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.3042737543582916,
"eval_runtime": 1.9585,
"eval_samples_per_second": 31.658,
"eval_steps_per_second": 2.042,
"step": 232
},
{
"epoch": 16.55,
"learning_rate": 9.523809523809523e-05,
"loss": 0.1914,
"step": 240
},
{
"epoch": 16.97,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.4256087839603424,
"eval_runtime": 1.9625,
"eval_samples_per_second": 31.593,
"eval_steps_per_second": 2.038,
"step": 246
},
{
"epoch": 17.24,
"learning_rate": 9.226190476190476e-05,
"loss": 0.2044,
"step": 250
},
{
"epoch": 17.93,
"learning_rate": 8.928571428571427e-05,
"loss": 0.1997,
"step": 260
},
{
"epoch": 18.0,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.37694820761680603,
"eval_runtime": 2.009,
"eval_samples_per_second": 30.862,
"eval_steps_per_second": 1.991,
"step": 261
},
{
"epoch": 18.62,
"learning_rate": 8.63095238095238e-05,
"loss": 0.1752,
"step": 270
},
{
"epoch": 18.97,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.587546169757843,
"eval_runtime": 2.1045,
"eval_samples_per_second": 29.461,
"eval_steps_per_second": 1.901,
"step": 275
},
{
"epoch": 19.31,
"learning_rate": 8.333333333333333e-05,
"loss": 0.237,
"step": 280
},
{
"epoch": 20.0,
"learning_rate": 8.035714285714285e-05,
"loss": 0.1685,
"step": 290
},
{
"epoch": 20.0,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.41043660044670105,
"eval_runtime": 1.8789,
"eval_samples_per_second": 32.997,
"eval_steps_per_second": 2.129,
"step": 290
},
{
"epoch": 20.69,
"learning_rate": 7.738095238095239e-05,
"loss": 0.1736,
"step": 300
},
{
"epoch": 20.97,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.5480897426605225,
"eval_runtime": 1.8865,
"eval_samples_per_second": 32.866,
"eval_steps_per_second": 2.12,
"step": 304
},
{
"epoch": 21.38,
"learning_rate": 7.44047619047619e-05,
"loss": 0.1901,
"step": 310
},
{
"epoch": 22.0,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.3800370693206787,
"eval_runtime": 1.916,
"eval_samples_per_second": 32.36,
"eval_steps_per_second": 2.088,
"step": 319
},
{
"epoch": 22.07,
"learning_rate": 7.142857142857142e-05,
"loss": 0.1718,
"step": 320
},
{
"epoch": 22.76,
"learning_rate": 6.845238095238094e-05,
"loss": 0.1426,
"step": 330
},
{
"epoch": 22.97,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.44246000051498413,
"eval_runtime": 1.939,
"eval_samples_per_second": 31.976,
"eval_steps_per_second": 2.063,
"step": 333
},
{
"epoch": 23.45,
"learning_rate": 6.547619047619047e-05,
"loss": 0.1251,
"step": 340
},
{
"epoch": 24.0,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.3373814523220062,
"eval_runtime": 1.9735,
"eval_samples_per_second": 31.416,
"eval_steps_per_second": 2.027,
"step": 348
},
{
"epoch": 24.14,
"learning_rate": 6.25e-05,
"loss": 0.1329,
"step": 350
},
{
"epoch": 24.83,
"learning_rate": 5.952380952380952e-05,
"loss": 0.1326,
"step": 360
},
{
"epoch": 24.97,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.3627336323261261,
"eval_runtime": 1.951,
"eval_samples_per_second": 31.779,
"eval_steps_per_second": 2.05,
"step": 362
},
{
"epoch": 25.52,
"learning_rate": 5.654761904761904e-05,
"loss": 0.1271,
"step": 370
},
{
"epoch": 26.0,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.4767535328865051,
"eval_runtime": 1.977,
"eval_samples_per_second": 31.361,
"eval_steps_per_second": 2.023,
"step": 377
},
{
"epoch": 26.21,
"learning_rate": 5.357142857142857e-05,
"loss": 0.1415,
"step": 380
},
{
"epoch": 26.9,
"learning_rate": 5.059523809523809e-05,
"loss": 0.1835,
"step": 390
},
{
"epoch": 26.97,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.5603958368301392,
"eval_runtime": 2.171,
"eval_samples_per_second": 28.558,
"eval_steps_per_second": 1.842,
"step": 391
},
{
"epoch": 27.59,
"learning_rate": 4.7619047619047614e-05,
"loss": 0.1378,
"step": 400
},
{
"epoch": 28.0,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.4130818247795105,
"eval_runtime": 2.0055,
"eval_samples_per_second": 30.916,
"eval_steps_per_second": 1.995,
"step": 406
},
{
"epoch": 28.28,
"learning_rate": 4.4642857142857136e-05,
"loss": 0.1253,
"step": 410
},
{
"epoch": 28.97,
"learning_rate": 4.1666666666666665e-05,
"loss": 0.1349,
"step": 420
},
{
"epoch": 28.97,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.5103474259376526,
"eval_runtime": 1.9155,
"eval_samples_per_second": 32.368,
"eval_steps_per_second": 2.088,
"step": 420
},
{
"epoch": 29.66,
"learning_rate": 3.8690476190476195e-05,
"loss": 0.0999,
"step": 430
},
{
"epoch": 30.0,
"eval_accuracy": 0.9193548387096774,
"eval_loss": 0.37231481075286865,
"eval_runtime": 1.94,
"eval_samples_per_second": 31.959,
"eval_steps_per_second": 2.062,
"step": 435
},
{
"epoch": 30.34,
"learning_rate": 3.571428571428571e-05,
"loss": 0.1198,
"step": 440
},
{
"epoch": 30.97,
"eval_accuracy": 0.8709677419354839,
"eval_loss": 0.5360597968101501,
"eval_runtime": 1.9235,
"eval_samples_per_second": 32.233,
"eval_steps_per_second": 2.08,
"step": 449
},
{
"epoch": 31.03,
"learning_rate": 3.273809523809523e-05,
"loss": 0.1301,
"step": 450
},
{
"epoch": 31.72,
"learning_rate": 2.976190476190476e-05,
"loss": 0.1195,
"step": 460
},
{
"epoch": 32.0,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.41935569047927856,
"eval_runtime": 1.9165,
"eval_samples_per_second": 32.351,
"eval_steps_per_second": 2.087,
"step": 464
},
{
"epoch": 32.41,
"learning_rate": 2.6785714285714284e-05,
"loss": 0.0766,
"step": 470
},
{
"epoch": 32.97,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.4133478105068207,
"eval_runtime": 1.924,
"eval_samples_per_second": 32.225,
"eval_steps_per_second": 2.079,
"step": 478
},
{
"epoch": 33.1,
"learning_rate": 2.3809523809523807e-05,
"loss": 0.1043,
"step": 480
},
{
"epoch": 33.79,
"learning_rate": 2.0833333333333333e-05,
"loss": 0.0862,
"step": 490
},
{
"epoch": 34.0,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.42390120029449463,
"eval_runtime": 1.9,
"eval_samples_per_second": 32.632,
"eval_steps_per_second": 2.105,
"step": 493
},
{
"epoch": 34.48,
"learning_rate": 1.7857142857142855e-05,
"loss": 0.1048,
"step": 500
},
{
"epoch": 34.97,
"eval_accuracy": 0.9193548387096774,
"eval_loss": 0.4120253920555115,
"eval_runtime": 1.954,
"eval_samples_per_second": 31.73,
"eval_steps_per_second": 2.047,
"step": 507
},
{
"epoch": 35.17,
"learning_rate": 1.488095238095238e-05,
"loss": 0.0884,
"step": 510
},
{
"epoch": 35.86,
"learning_rate": 1.1904761904761903e-05,
"loss": 0.0902,
"step": 520
},
{
"epoch": 36.0,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.44083285331726074,
"eval_runtime": 1.9034,
"eval_samples_per_second": 32.573,
"eval_steps_per_second": 2.101,
"step": 522
},
{
"epoch": 36.55,
"learning_rate": 8.928571428571428e-06,
"loss": 0.088,
"step": 530
},
{
"epoch": 36.97,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.4435848295688629,
"eval_runtime": 1.8659,
"eval_samples_per_second": 33.227,
"eval_steps_per_second": 2.144,
"step": 536
},
{
"epoch": 37.24,
"learning_rate": 5.952380952380952e-06,
"loss": 0.0864,
"step": 540
},
{
"epoch": 37.93,
"learning_rate": 2.976190476190476e-06,
"loss": 0.089,
"step": 550
},
{
"epoch": 38.0,
"eval_accuracy": 0.9032258064516129,
"eval_loss": 0.46484920382499695,
"eval_runtime": 1.9695,
"eval_samples_per_second": 31.481,
"eval_steps_per_second": 2.031,
"step": 551
},
{
"epoch": 38.62,
"learning_rate": 0.0,
"loss": 0.1089,
"step": 560
},
{
"epoch": 38.62,
"eval_accuracy": 0.8870967741935484,
"eval_loss": 0.46501168608665466,
"eval_runtime": 2.034,
"eval_samples_per_second": 30.482,
"eval_steps_per_second": 1.967,
"step": 560
},
{
"epoch": 38.62,
"step": 560,
"total_flos": 1.1660953582043136e+18,
"train_loss": 0.30870234380875317,
"train_runtime": 1509.6082,
"train_samples_per_second": 24.589,
"train_steps_per_second": 0.371
}
],
"logging_steps": 10,
"max_steps": 560,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 1.1660953582043136e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}