MuhammadHelmy's picture
Upload folder using huggingface_hub
6c71039 verified
{
"best_global_step": 7500,
"best_metric": 0.7842453714436081,
"best_model_checkpoint": "./distilbert-base-AuthId/checkpoint-7500",
"epoch": 3.642544924720738,
"eval_steps": 500,
"global_step": 7500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24283632831471588,
"grad_norm": 10.536812782287598,
"learning_rate": 4.8446601941747576e-05,
"loss": 2.4424,
"step": 500
},
{
"epoch": 0.24283632831471588,
"eval_accuracy": 0.4645176810199663,
"eval_f1_macro": 0.38214194553778474,
"eval_loss": 1.7513902187347412,
"eval_precision_macro": 0.42410548716505947,
"eval_recall_macro": 0.4570777134531091,
"eval_runtime": 44.1019,
"eval_samples_per_second": 94.259,
"eval_steps_per_second": 5.895,
"step": 500
},
{
"epoch": 0.48567265662943176,
"grad_norm": 8.661422729492188,
"learning_rate": 7.989001438410175e-05,
"loss": 1.3721,
"step": 1000
},
{
"epoch": 0.48567265662943176,
"eval_accuracy": 0.5208082751984604,
"eval_f1_macro": 0.4454367070109959,
"eval_loss": 1.5558406114578247,
"eval_precision_macro": 0.49026128200564273,
"eval_recall_macro": 0.5024706104342445,
"eval_runtime": 44.445,
"eval_samples_per_second": 93.531,
"eval_steps_per_second": 5.85,
"step": 1000
},
{
"epoch": 0.7285089849441476,
"grad_norm": 11.807583808898926,
"learning_rate": 7.837406915300635e-05,
"loss": 0.9647,
"step": 1500
},
{
"epoch": 0.7285089849441476,
"eval_accuracy": 0.6812605244166466,
"eval_f1_macro": 0.5864448295094548,
"eval_loss": 1.1119331121444702,
"eval_precision_macro": 0.5984183492980578,
"eval_recall_macro": 0.6268369019392432,
"eval_runtime": 44.5983,
"eval_samples_per_second": 93.21,
"eval_steps_per_second": 5.83,
"step": 1500
},
{
"epoch": 0.9713453132588635,
"grad_norm": 27.49161720275879,
"learning_rate": 7.514108265155405e-05,
"loss": 0.7867,
"step": 2000
},
{
"epoch": 0.9713453132588635,
"eval_accuracy": 0.713014192927592,
"eval_f1_macro": 0.6520857054306285,
"eval_loss": 1.003157615661621,
"eval_precision_macro": 0.6618591636767543,
"eval_recall_macro": 0.6977243147676733,
"eval_runtime": 45.2348,
"eval_samples_per_second": 91.898,
"eval_steps_per_second": 5.748,
"step": 2000
},
{
"epoch": 1.2141816415735793,
"grad_norm": 17.49410629272461,
"learning_rate": 7.033571431701654e-05,
"loss": 0.5712,
"step": 2500
},
{
"epoch": 1.2141816415735793,
"eval_accuracy": 0.7101274957902334,
"eval_f1_macro": 0.6578185186923463,
"eval_loss": 1.0687695741653442,
"eval_precision_macro": 0.6947126776112352,
"eval_recall_macro": 0.6807690177369954,
"eval_runtime": 45.4304,
"eval_samples_per_second": 91.503,
"eval_steps_per_second": 5.723,
"step": 2500
},
{
"epoch": 1.4570179698882952,
"grad_norm": 14.797587394714355,
"learning_rate": 6.417297954733794e-05,
"loss": 0.5118,
"step": 3000
},
{
"epoch": 1.4570179698882952,
"eval_accuracy": 0.7466923261967765,
"eval_f1_macro": 0.6802785705299167,
"eval_loss": 0.9464945197105408,
"eval_precision_macro": 0.6962547831411214,
"eval_recall_macro": 0.7067511752266095,
"eval_runtime": 45.0782,
"eval_samples_per_second": 92.217,
"eval_steps_per_second": 5.768,
"step": 3000
},
{
"epoch": 1.6998542982030111,
"grad_norm": 11.592080116271973,
"learning_rate": 5.692862887331573e-05,
"loss": 0.4238,
"step": 3500
},
{
"epoch": 1.6998542982030111,
"eval_accuracy": 0.7529468366610537,
"eval_f1_macro": 0.6887735293131003,
"eval_loss": 1.0284276008605957,
"eval_precision_macro": 0.7004815690389604,
"eval_recall_macro": 0.7209895448469402,
"eval_runtime": 45.0958,
"eval_samples_per_second": 92.182,
"eval_steps_per_second": 5.766,
"step": 3500
},
{
"epoch": 1.942690626517727,
"grad_norm": 1.7279024124145508,
"learning_rate": 4.892680954747708e-05,
"loss": 0.383,
"step": 4000
},
{
"epoch": 1.942690626517727,
"eval_accuracy": 0.7784459947077219,
"eval_f1_macro": 0.7004470067611618,
"eval_loss": 0.9373700618743896,
"eval_precision_macro": 0.7012745858922871,
"eval_recall_macro": 0.7363165995805543,
"eval_runtime": 45.0166,
"eval_samples_per_second": 92.344,
"eval_steps_per_second": 5.776,
"step": 4000
},
{
"epoch": 2.1855269548324427,
"grad_norm": 11.464333534240723,
"learning_rate": 4.052556162978074e-05,
"loss": 0.2498,
"step": 4500
},
{
"epoch": 2.1855269548324427,
"eval_accuracy": 0.7897522251623768,
"eval_f1_macro": 0.736934271206392,
"eval_loss": 0.8516831398010254,
"eval_precision_macro": 0.7471052855094972,
"eval_recall_macro": 0.7513822718605427,
"eval_runtime": 45.2063,
"eval_samples_per_second": 91.956,
"eval_steps_per_second": 5.751,
"step": 4500
},
{
"epoch": 2.4283632831471587,
"grad_norm": 13.227448463439941,
"learning_rate": 3.2100797545350794e-05,
"loss": 0.2012,
"step": 5000
},
{
"epoch": 2.4283632831471587,
"eval_accuracy": 0.7952850613423141,
"eval_f1_macro": 0.7384644794255656,
"eval_loss": 0.8422956466674805,
"eval_precision_macro": 0.7407858112231859,
"eval_recall_macro": 0.7593629429944239,
"eval_runtime": 45.0668,
"eval_samples_per_second": 92.241,
"eval_steps_per_second": 5.769,
"step": 5000
},
{
"epoch": 2.6711996114618746,
"grad_norm": 14.610297203063965,
"learning_rate": 2.402948194623674e-05,
"loss": 0.1888,
"step": 5500
},
{
"epoch": 2.6711996114618746,
"eval_accuracy": 0.8383449603079144,
"eval_f1_macro": 0.7668856441209034,
"eval_loss": 0.7612192034721375,
"eval_precision_macro": 0.759848794474667,
"eval_recall_macro": 0.7875978932242379,
"eval_runtime": 44.9628,
"eval_samples_per_second": 92.454,
"eval_steps_per_second": 5.783,
"step": 5500
},
{
"epoch": 2.9140359397765905,
"grad_norm": 2.9459526538848877,
"learning_rate": 1.667276449145691e-05,
"loss": 0.1478,
"step": 6000
},
{
"epoch": 2.9140359397765905,
"eval_accuracy": 0.8284820784219389,
"eval_f1_macro": 0.7648949869466338,
"eval_loss": 0.7956407070159912,
"eval_precision_macro": 0.7582819516660327,
"eval_recall_macro": 0.7872534623468304,
"eval_runtime": 45.0026,
"eval_samples_per_second": 92.372,
"eval_steps_per_second": 5.777,
"step": 6000
},
{
"epoch": 3.1568722680913064,
"grad_norm": 0.04966867342591286,
"learning_rate": 1.035982026624521e-05,
"loss": 0.0805,
"step": 6500
},
{
"epoch": 3.1568722680913064,
"eval_accuracy": 0.8397883088765937,
"eval_f1_macro": 0.779163123362359,
"eval_loss": 0.7845637798309326,
"eval_precision_macro": 0.7736785485737475,
"eval_recall_macro": 0.7972932500440235,
"eval_runtime": 44.623,
"eval_samples_per_second": 93.158,
"eval_steps_per_second": 5.827,
"step": 6500
},
{
"epoch": 3.3997085964060223,
"grad_norm": 0.054828643798828125,
"learning_rate": 5.373120898224163e-06,
"loss": 0.054,
"step": 7000
},
{
"epoch": 3.3997085964060223,
"eval_accuracy": 0.8390666345922541,
"eval_f1_macro": 0.7789886096417518,
"eval_loss": 0.8231712579727173,
"eval_precision_macro": 0.7722144919792829,
"eval_recall_macro": 0.7985903156380745,
"eval_runtime": 44.6487,
"eval_samples_per_second": 93.105,
"eval_steps_per_second": 5.823,
"step": 7000
},
{
"epoch": 3.642544924720738,
"grad_norm": 8.141491889953613,
"learning_rate": 1.935795411934156e-06,
"loss": 0.0683,
"step": 7500
},
{
"epoch": 3.642544924720738,
"eval_accuracy": 0.8448400288669714,
"eval_f1_macro": 0.7842453714436081,
"eval_loss": 0.7753947377204895,
"eval_precision_macro": 0.7754804853445821,
"eval_recall_macro": 0.8051232260351716,
"eval_runtime": 45.7414,
"eval_samples_per_second": 90.881,
"eval_steps_per_second": 5.684,
"step": 7500
}
],
"logging_steps": 500,
"max_steps": 8236,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.5900281427207168e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}