roberta-base-downstream-ildc / trainer_state.json
MHGanainy's picture
End of training
5c7f4eb verified
{
"best_metric": 0.6684856753069577,
"best_model_checkpoint": "logs/ildc/roberta-base/seed_1/checkpoint-1010",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 4040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.49504950495049505,
"grad_norm": 0.7472001314163208,
"learning_rate": 2.925891089108911e-05,
"loss": 0.6885,
"step": 500
},
{
"epoch": 0.9900990099009901,
"grad_norm": 1.0043890476226807,
"learning_rate": 2.8516336633663364e-05,
"loss": 0.6863,
"step": 1000
},
{
"epoch": 1.0,
"eval_accuracy": 0.5110663983903421,
"eval_best_threshold": 0.43776339292526245,
"eval_f1": 0.6684856753069577,
"eval_loss": 0.7003927230834961,
"eval_precision": 0.5056759545923633,
"eval_recall": 0.9859154929577465,
"eval_runtime": 8.3061,
"eval_samples_per_second": 119.671,
"eval_steps_per_second": 3.853,
"step": 1010
},
{
"epoch": 1.4851485148514851,
"grad_norm": 0.5840580463409424,
"learning_rate": 2.7773762376237626e-05,
"loss": 0.6843,
"step": 1500
},
{
"epoch": 1.9801980198019802,
"grad_norm": 1.2000515460968018,
"learning_rate": 2.7031188118811882e-05,
"loss": 0.6812,
"step": 2000
},
{
"epoch": 2.0,
"eval_accuracy": 0.5030181086519114,
"eval_best_threshold": 0.43332192301750183,
"eval_f1": 0.6671159029649596,
"eval_loss": 0.699403703212738,
"eval_precision": 0.5015197568389058,
"eval_recall": 0.9959758551307847,
"eval_runtime": 8.2485,
"eval_samples_per_second": 120.507,
"eval_steps_per_second": 3.88,
"step": 2020
},
{
"epoch": 2.4752475247524752,
"grad_norm": 1.7046343088150024,
"learning_rate": 2.628861386138614e-05,
"loss": 0.6823,
"step": 2500
},
{
"epoch": 2.9702970297029703,
"grad_norm": 0.7660498023033142,
"learning_rate": 2.5546039603960396e-05,
"loss": 0.6816,
"step": 3000
},
{
"epoch": 3.0,
"eval_accuracy": 0.5030181086519114,
"eval_best_threshold": 0.332852303981781,
"eval_f1": 0.6644021739130435,
"eval_loss": 0.751548707485199,
"eval_precision": 0.5015384615384615,
"eval_recall": 0.9839034205231388,
"eval_runtime": 8.2689,
"eval_samples_per_second": 120.21,
"eval_steps_per_second": 3.87,
"step": 3030
},
{
"epoch": 3.4653465346534653,
"grad_norm": 0.4976850748062134,
"learning_rate": 2.480346534653465e-05,
"loss": 0.6795,
"step": 3500
},
{
"epoch": 3.9603960396039604,
"grad_norm": 1.8062459230422974,
"learning_rate": 2.4060891089108914e-05,
"loss": 0.6796,
"step": 4000
},
{
"epoch": 4.0,
"eval_accuracy": 0.5030181086519114,
"eval_best_threshold": 0.4006907343864441,
"eval_f1": 0.6671159029649596,
"eval_loss": 0.7038925290107727,
"eval_precision": 0.5015197568389058,
"eval_recall": 0.9959758551307847,
"eval_runtime": 8.2224,
"eval_samples_per_second": 120.89,
"eval_steps_per_second": 3.892,
"step": 4040
},
{
"epoch": 4.0,
"step": 4040,
"total_flos": 6.343227643160166e+17,
"train_loss": 0.6829259282291525,
"train_runtime": 2722.2879,
"train_samples_per_second": 237.337,
"train_steps_per_second": 7.42
}
],
"logging_steps": 500,
"max_steps": 20200,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.343227643160166e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}