bmuscato's picture
Upload folder using huggingface_hub
d155f8a verified
{
"best_global_step": 252,
"best_metric": 0.7365710735321045,
"best_model_checkpoint": "./multitask_model2/checkpoint-252",
"epoch": 12.0,
"eval_steps": 500,
"global_step": 252,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.7021834254264832,
"learning_rate": 1.904761904761905e-05,
"loss": 0.7665,
"step": 21
},
{
"epoch": 1.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.755448579788208,
"eval_runtime": 0.4506,
"eval_samples_per_second": 312.894,
"eval_steps_per_second": 11.096,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.680282711982727,
"step": 21
},
{
"epoch": 2.0,
"grad_norm": 0.5450627207756042,
"learning_rate": 1.804761904761905e-05,
"loss": 0.7636,
"step": 42
},
{
"epoch": 2.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.7531750202178955,
"eval_runtime": 0.3837,
"eval_samples_per_second": 367.514,
"eval_steps_per_second": 13.032,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.678588330745697,
"step": 42
},
{
"epoch": 3.0,
"grad_norm": 0.9124093651771545,
"learning_rate": 1.704761904761905e-05,
"loss": 0.7657,
"step": 63
},
{
"epoch": 3.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.7538319826126099,
"eval_runtime": 0.4379,
"eval_samples_per_second": 321.999,
"eval_steps_per_second": 11.418,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.6760473847389221,
"step": 63
},
{
"epoch": 4.0,
"grad_norm": 0.8770959377288818,
"learning_rate": 1.604761904761905e-05,
"loss": 0.7601,
"step": 84
},
{
"epoch": 4.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.7534122467041016,
"eval_runtime": 0.4598,
"eval_samples_per_second": 306.631,
"eval_steps_per_second": 10.873,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.6773362159729004,
"step": 84
},
{
"epoch": 5.0,
"grad_norm": 1.3157461881637573,
"learning_rate": 1.5047619047619049e-05,
"loss": 0.7586,
"step": 105
},
{
"epoch": 5.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.7500014305114746,
"eval_runtime": 0.4435,
"eval_samples_per_second": 317.923,
"eval_steps_per_second": 11.274,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.6725690960884094,
"step": 105
},
{
"epoch": 6.0,
"grad_norm": 1.533734679222107,
"learning_rate": 1.4047619047619048e-05,
"loss": 0.7467,
"step": 126
},
{
"epoch": 6.0,
"eval_dis_accuracy": 0.6666666666666666,
"eval_dis_f1": 0.14545454545454545,
"eval_dis_precision": 0.6666666666666666,
"eval_dis_recall": 0.08163265306122448,
"eval_loss": 0.745786190032959,
"eval_runtime": 0.466,
"eval_samples_per_second": 302.556,
"eval_steps_per_second": 10.729,
"eval_target_accuracy": 0.5886524822695035,
"eval_target_f1": 0.38603603603603603,
"eval_target_precision": 0.46376811594202894,
"eval_target_recall": 0.49686716791979946,
"eval_target_soft_ce": 0.6714953184127808,
"step": 126
},
{
"epoch": 7.0,
"grad_norm": 2.0812835693359375,
"learning_rate": 1.304761904761905e-05,
"loss": 0.7387,
"step": 147
},
{
"epoch": 7.0,
"eval_dis_accuracy": 0.6524822695035462,
"eval_dis_f1": 0.0,
"eval_dis_precision": 0.0,
"eval_dis_recall": 0.0,
"eval_loss": 0.7480353116989136,
"eval_runtime": 0.4471,
"eval_samples_per_second": 315.367,
"eval_steps_per_second": 11.183,
"eval_target_accuracy": 0.5957446808510638,
"eval_target_f1": 0.37333333333333335,
"eval_target_precision": 0.2978723404255319,
"eval_target_recall": 0.5,
"eval_target_soft_ce": 0.6714373826980591,
"step": 147
},
{
"epoch": 8.0,
"grad_norm": 4.809630870819092,
"learning_rate": 1.2047619047619049e-05,
"loss": 0.74,
"step": 168
},
{
"epoch": 8.0,
"eval_dis_accuracy": 0.5177304964539007,
"eval_dis_f1": 0.40350877192982454,
"eval_dis_precision": 0.35384615384615387,
"eval_dis_recall": 0.46938775510204084,
"eval_loss": 0.7411231994628906,
"eval_runtime": 0.4289,
"eval_samples_per_second": 328.731,
"eval_steps_per_second": 11.657,
"eval_target_accuracy": 0.574468085106383,
"eval_target_f1": 0.4683257918552036,
"eval_target_precision": 0.5142857142857142,
"eval_target_recall": 0.5075187969924811,
"eval_target_soft_ce": 0.6614766120910645,
"step": 168
},
{
"epoch": 9.0,
"grad_norm": 7.420032024383545,
"learning_rate": 1.104761904761905e-05,
"loss": 0.7288,
"step": 189
},
{
"epoch": 9.0,
"eval_dis_accuracy": 0.6382978723404256,
"eval_dis_f1": 0.2153846153846154,
"eval_dis_precision": 0.4375,
"eval_dis_recall": 0.14285714285714285,
"eval_loss": 0.7409353852272034,
"eval_runtime": 0.4589,
"eval_samples_per_second": 307.224,
"eval_steps_per_second": 10.894,
"eval_target_accuracy": 0.5886524822695035,
"eval_target_f1": 0.44892183288409704,
"eval_target_precision": 0.5315504807692308,
"eval_target_recall": 0.5109649122807017,
"eval_target_soft_ce": 0.6628619432449341,
"step": 189
},
{
"epoch": 10.0,
"grad_norm": 3.0795819759368896,
"learning_rate": 1.0047619047619048e-05,
"loss": 0.7314,
"step": 210
},
{
"epoch": 10.0,
"eval_dis_accuracy": 0.5886524822695035,
"eval_dis_f1": 0.21621621621621623,
"eval_dis_precision": 0.32,
"eval_dis_recall": 0.16326530612244897,
"eval_loss": 0.743432879447937,
"eval_runtime": 0.429,
"eval_samples_per_second": 328.682,
"eval_steps_per_second": 11.655,
"eval_target_accuracy": 0.5886524822695035,
"eval_target_f1": 0.48604826546003016,
"eval_target_precision": 0.5422619047619047,
"eval_target_recall": 0.5222431077694236,
"eval_target_soft_ce": 0.6649101972579956,
"step": 210
},
{
"epoch": 11.0,
"grad_norm": 5.447195053100586,
"learning_rate": 9.047619047619049e-06,
"loss": 0.7192,
"step": 231
},
{
"epoch": 11.0,
"eval_dis_accuracy": 0.6382978723404256,
"eval_dis_f1": 0.23880597014925373,
"eval_dis_precision": 0.4444444444444444,
"eval_dis_recall": 0.16326530612244897,
"eval_loss": 0.7414493560791016,
"eval_runtime": 0.4442,
"eval_samples_per_second": 317.394,
"eval_steps_per_second": 11.255,
"eval_target_accuracy": 0.6099290780141844,
"eval_target_f1": 0.4723412941416616,
"eval_target_precision": 0.5978682170542635,
"eval_target_recall": 0.531641604010025,
"eval_target_soft_ce": 0.6644006371498108,
"step": 231
},
{
"epoch": 12.0,
"grad_norm": 2.391383647918701,
"learning_rate": 8.047619047619048e-06,
"loss": 0.7218,
"step": 252
},
{
"epoch": 12.0,
"eval_dis_accuracy": 0.5957446808510638,
"eval_dis_f1": 0.27848101265822783,
"eval_dis_precision": 0.36666666666666664,
"eval_dis_recall": 0.22448979591836735,
"eval_loss": 0.7365710735321045,
"eval_runtime": 0.4606,
"eval_samples_per_second": 306.148,
"eval_steps_per_second": 10.856,
"eval_target_accuracy": 0.6028368794326241,
"eval_target_f1": 0.487006237006237,
"eval_target_precision": 0.571157495256167,
"eval_target_recall": 0.531328320802005,
"eval_target_soft_ce": 0.6578279137611389,
"step": 252
}
],
"logging_steps": 500,
"max_steps": 420,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}