sentiment-seq_bn-rf64-1 / trainer_state.json
apwic's picture
End of training
4b4b0ed verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.611067295074463,
"learning_rate": 4.75e-05,
"loss": 0.5624,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6280256288561936,
"eval_loss": 0.521743655204773,
"eval_precision": 0.6565268987341772,
"eval_recall": 0.6199308965266412,
"eval_runtime": 1.7104,
"eval_samples_per_second": 233.272,
"eval_steps_per_second": 29.232,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 8.575949668884277,
"learning_rate": 4.5e-05,
"loss": 0.5051,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7192982456140351,
"eval_f1": 0.6887502089485709,
"eval_loss": 0.520839512348175,
"eval_precision": 0.6835816181502343,
"eval_recall": 0.7114020731042008,
"eval_runtime": 1.709,
"eval_samples_per_second": 233.477,
"eval_steps_per_second": 29.258,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.63930606842041,
"learning_rate": 4.25e-05,
"loss": 0.4776,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7719298245614035,
"eval_f1": 0.7269117082966674,
"eval_loss": 0.46694815158843994,
"eval_precision": 0.7253136200716845,
"eval_recall": 0.7286324786324787,
"eval_runtime": 1.707,
"eval_samples_per_second": 233.745,
"eval_steps_per_second": 29.291,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 3.649268388748169,
"learning_rate": 4e-05,
"loss": 0.4447,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7794486215538847,
"eval_f1": 0.7190500576110612,
"eval_loss": 0.43937617540359497,
"eval_precision": 0.7353382945313034,
"eval_recall": 0.7089470812875068,
"eval_runtime": 1.7082,
"eval_samples_per_second": 233.577,
"eval_steps_per_second": 29.27,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 15.028106689453125,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4309,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.7994987468671679,
"eval_f1": 0.768009768009768,
"eval_loss": 0.4312213063240051,
"eval_precision": 0.7598063973063973,
"eval_recall": 0.7806419348972541,
"eval_runtime": 1.7067,
"eval_samples_per_second": 233.781,
"eval_steps_per_second": 29.296,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.052947759628296,
"learning_rate": 3.5e-05,
"loss": 0.395,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8020050125313283,
"eval_f1": 0.7732905629436768,
"eval_loss": 0.41726621985435486,
"eval_precision": 0.7638030888030888,
"eval_recall": 0.7899163484269867,
"eval_runtime": 1.7037,
"eval_samples_per_second": 234.197,
"eval_steps_per_second": 29.348,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.5561754703521729,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3841,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8245614035087719,
"eval_f1": 0.7883706128386979,
"eval_loss": 0.4011886715888977,
"eval_precision": 0.7883706128386979,
"eval_recall": 0.7883706128386979,
"eval_runtime": 1.7038,
"eval_samples_per_second": 234.18,
"eval_steps_per_second": 29.346,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 3.902942419052124,
"learning_rate": 3e-05,
"loss": 0.3621,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.7929065743944637,
"eval_loss": 0.38815978169441223,
"eval_precision": 0.8062188401994228,
"eval_recall": 0.7829605382796871,
"eval_runtime": 1.7036,
"eval_samples_per_second": 234.212,
"eval_steps_per_second": 29.35,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 5.38352632522583,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3562,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8320802005012531,
"eval_f1": 0.7969311405674351,
"eval_loss": 0.39120376110076904,
"eval_precision": 0.7976879493115634,
"eval_recall": 0.7961902164029824,
"eval_runtime": 1.7036,
"eval_samples_per_second": 234.206,
"eval_steps_per_second": 29.349,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.1229166984558105,
"learning_rate": 2.5e-05,
"loss": 0.3428,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8157894736842105,
"eval_loss": 0.37669265270233154,
"eval_precision": 0.8210867117117118,
"eval_recall": 0.8111020185488271,
"eval_runtime": 1.7026,
"eval_samples_per_second": 234.354,
"eval_steps_per_second": 29.368,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 7.114827632904053,
"learning_rate": 2.25e-05,
"loss": 0.3282,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8242843661528783,
"eval_loss": 0.37359458208084106,
"eval_precision": 0.8389366308055628,
"eval_recall": 0.8131933078741589,
"eval_runtime": 1.7031,
"eval_samples_per_second": 234.281,
"eval_steps_per_second": 29.359,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 8.90954875946045,
"learning_rate": 2e-05,
"loss": 0.3308,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8254579780661698,
"eval_loss": 0.36908435821533203,
"eval_precision": 0.8299216027874565,
"eval_recall": 0.8214220767412257,
"eval_runtime": 1.7026,
"eval_samples_per_second": 234.352,
"eval_steps_per_second": 29.367,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 4.10761022567749,
"learning_rate": 1.75e-05,
"loss": 0.3143,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8222604047346316,
"eval_loss": 0.36313948035240173,
"eval_precision": 0.8423737373737374,
"eval_recall": 0.8081923986179305,
"eval_runtime": 1.7031,
"eval_samples_per_second": 234.283,
"eval_steps_per_second": 29.359,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 6.354517936706543,
"learning_rate": 1.5e-05,
"loss": 0.3173,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8228567054500919,
"eval_loss": 0.35919609665870667,
"eval_precision": 0.8263351692555232,
"eval_recall": 0.8196490270958356,
"eval_runtime": 1.7057,
"eval_samples_per_second": 233.927,
"eval_steps_per_second": 29.314,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 3.759216070175171,
"learning_rate": 1.25e-05,
"loss": 0.305,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8167483159828537,
"eval_loss": 0.3542298674583435,
"eval_precision": 0.8201621387462095,
"eval_recall": 0.8136024731769412,
"eval_runtime": 1.7036,
"eval_samples_per_second": 234.208,
"eval_steps_per_second": 29.349,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 3.776470899581909,
"learning_rate": 1e-05,
"loss": 0.2968,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8237632508833923,
"eval_loss": 0.354081928730011,
"eval_precision": 0.8254439681567667,
"eval_recall": 0.8221494817239499,
"eval_runtime": 1.7034,
"eval_samples_per_second": 234.232,
"eval_steps_per_second": 29.352,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 4.986289024353027,
"learning_rate": 7.5e-06,
"loss": 0.3049,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8209821152299028,
"eval_loss": 0.34866294264793396,
"eval_precision": 0.8284245491932933,
"eval_recall": 0.8146481178396072,
"eval_runtime": 1.7032,
"eval_samples_per_second": 234.27,
"eval_steps_per_second": 29.357,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 5.735553741455078,
"learning_rate": 5e-06,
"loss": 0.3001,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8255172205802521,
"eval_loss": 0.3513628840446472,
"eval_precision": 0.8239495798319327,
"eval_recall": 0.8271503909801782,
"eval_runtime": 1.7032,
"eval_samples_per_second": 234.26,
"eval_steps_per_second": 29.356,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 3.22857666015625,
"learning_rate": 2.5e-06,
"loss": 0.2986,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8297847585805701,
"eval_loss": 0.34794819355010986,
"eval_precision": 0.8385357006491028,
"eval_recall": 0.8224677214038916,
"eval_runtime": 1.703,
"eval_samples_per_second": 234.296,
"eval_steps_per_second": 29.36,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 3.945305109024048,
"learning_rate": 0.0,
"loss": 0.2894,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8167483159828537,
"eval_loss": 0.34796616435050964,
"eval_precision": 0.8201621387462095,
"eval_recall": 0.8136024731769412,
"eval_runtime": 1.7033,
"eval_samples_per_second": 234.246,
"eval_steps_per_second": 29.354,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7604291693904000.0,
"train_loss": 0.3673181408741435,
"train_runtime": 612.7283,
"train_samples_per_second": 118.748,
"train_steps_per_second": 3.982
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7604291693904000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}