gs-aristoBERTo / trainer_state.json
CNR-ILC's picture
ILC-CNR/gs-aristoBERTo
1e3af91 verified
{
"best_global_step": null,
"best_metric": 1.8485572338104248,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 24760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 8.519086837768555,
"learning_rate": 3.60080775444265e-05,
"loss": 2.7844,
"step": 2476
},
{
"epoch": 1.0,
"eval_loss": 2.363213539123535,
"eval_runtime": 30.9857,
"eval_samples_per_second": 142.582,
"eval_steps_per_second": 17.847,
"step": 2476
},
{
"epoch": 2.0,
"grad_norm": 8.55427360534668,
"learning_rate": 3.2008077544426496e-05,
"loss": 2.3842,
"step": 4952
},
{
"epoch": 2.0,
"eval_loss": 2.1710569858551025,
"eval_runtime": 31.1916,
"eval_samples_per_second": 141.641,
"eval_steps_per_second": 17.729,
"step": 4952
},
{
"epoch": 3.0,
"grad_norm": 7.62738037109375,
"learning_rate": 2.8012924071082395e-05,
"loss": 2.2331,
"step": 7428
},
{
"epoch": 3.0,
"eval_loss": 2.086531162261963,
"eval_runtime": 31.228,
"eval_samples_per_second": 141.475,
"eval_steps_per_second": 17.708,
"step": 7428
},
{
"epoch": 4.0,
"grad_norm": 8.015053749084473,
"learning_rate": 2.4012924071082392e-05,
"loss": 2.1379,
"step": 9904
},
{
"epoch": 4.0,
"eval_loss": 2.0061919689178467,
"eval_runtime": 31.2124,
"eval_samples_per_second": 141.546,
"eval_steps_per_second": 17.717,
"step": 9904
},
{
"epoch": 5.0,
"grad_norm": 7.610133647918701,
"learning_rate": 2.001292407108239e-05,
"loss": 2.0648,
"step": 12380
},
{
"epoch": 5.0,
"eval_loss": 1.9433549642562866,
"eval_runtime": 29.5538,
"eval_samples_per_second": 149.49,
"eval_steps_per_second": 18.712,
"step": 12380
},
{
"epoch": 6.0,
"grad_norm": 9.3466796875,
"learning_rate": 1.6016155088852988e-05,
"loss": 2.0067,
"step": 14856
},
{
"epoch": 6.0,
"eval_loss": 1.9069130420684814,
"eval_runtime": 31.3798,
"eval_samples_per_second": 140.791,
"eval_steps_per_second": 17.623,
"step": 14856
},
{
"epoch": 7.0,
"grad_norm": 8.526801109313965,
"learning_rate": 1.201615508885299e-05,
"loss": 1.9619,
"step": 17332
},
{
"epoch": 7.0,
"eval_loss": 1.8961998224258423,
"eval_runtime": 31.5705,
"eval_samples_per_second": 139.941,
"eval_steps_per_second": 17.516,
"step": 17332
},
{
"epoch": 8.0,
"grad_norm": 7.524344444274902,
"learning_rate": 8.017770597738289e-06,
"loss": 1.9311,
"step": 19808
},
{
"epoch": 8.0,
"eval_loss": 1.8643718957901,
"eval_runtime": 31.7102,
"eval_samples_per_second": 139.324,
"eval_steps_per_second": 17.439,
"step": 19808
},
{
"epoch": 9.0,
"grad_norm": 8.725274085998535,
"learning_rate": 4.019386106623587e-06,
"loss": 1.9096,
"step": 22284
},
{
"epoch": 9.0,
"eval_loss": 1.8564258813858032,
"eval_runtime": 21.3743,
"eval_samples_per_second": 206.697,
"eval_steps_per_second": 25.872,
"step": 22284
},
{
"epoch": 10.0,
"grad_norm": 7.118903636932373,
"learning_rate": 2.2617124394184172e-08,
"loss": 1.8867,
"step": 24760
},
{
"epoch": 10.0,
"eval_loss": 1.8485572338104248,
"eval_runtime": 21.6819,
"eval_samples_per_second": 203.765,
"eval_steps_per_second": 25.505,
"step": 24760
},
{
"epoch": 10.0,
"step": 24760,
"total_flos": 2.606916709515264e+16,
"train_loss": 2.1300495104566335,
"train_runtime": 6282.2508,
"train_samples_per_second": 63.081,
"train_steps_per_second": 3.941
},
{
"epoch": 10.0,
"eval_loss": 1.8408753871917725,
"eval_runtime": 21.4119,
"eval_samples_per_second": 206.334,
"eval_steps_per_second": 25.827,
"step": 24760
}
],
"logging_steps": 500,
"max_steps": 24760,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.606916709515264e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}