vit5-hsd-span / checkpoint-5040 /trainer_state.json
AnnyNguyen's picture
Upload checkpoint-5040/trainer_state.json with huggingface_hub
b011018 verified
{
"best_global_step": 5040,
"best_metric": 0.8438760541734693,
"best_model_checkpoint": "outputs/runs/vit5/checkpoint-5040",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 5040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968253968253968,
"grad_norm": 48.960147857666016,
"learning_rate": 4.999432333543028e-06,
"loss": 4.2164,
"step": 314
},
{
"epoch": 1.0,
"eval_accuracy": 0.8120646345156866,
"eval_f1": 0.7880041194112739,
"eval_loss": 1.4588117599487305,
"eval_precision": 0.8369479599718508,
"eval_recall": 0.7755522799379979,
"eval_runtime": 1.628,
"eval_samples_per_second": 799.123,
"eval_steps_per_second": 25.184,
"step": 315
},
{
"epoch": 1.9936507936507937,
"grad_norm": 15.666463851928711,
"learning_rate": 4.996525669931999e-06,
"loss": 1.5636,
"step": 628
},
{
"epoch": 2.0,
"eval_accuracy": 0.8272790436657106,
"eval_f1": 0.8200906148872792,
"eval_loss": 0.8009101748466492,
"eval_precision": 0.819129062168523,
"eval_recall": 0.8211574663523349,
"eval_runtime": 1.6386,
"eval_samples_per_second": 793.961,
"eval_steps_per_second": 25.021,
"step": 630
},
{
"epoch": 2.9904761904761905,
"grad_norm": 19.285402297973633,
"learning_rate": 4.991155236893945e-06,
"loss": 1.1621,
"step": 942
},
{
"epoch": 3.0,
"eval_accuracy": 0.8486576045322549,
"eval_f1": 0.8387492829457615,
"eval_loss": 0.6665186285972595,
"eval_precision": 0.8476389137544842,
"eval_recall": 0.833091047050099,
"eval_runtime": 1.6403,
"eval_samples_per_second": 793.151,
"eval_steps_per_second": 24.996,
"step": 945
},
{
"epoch": 3.9873015873015873,
"grad_norm": 28.716716766357422,
"learning_rate": 4.983326334397891e-06,
"loss": 0.9673,
"step": 1256
},
{
"epoch": 4.0,
"eval_accuracy": 0.8452548502609966,
"eval_f1": 0.8306632859859202,
"eval_loss": 0.7017992734909058,
"eval_precision": 0.8573660674461923,
"eval_recall": 0.8196344002399345,
"eval_runtime": 1.6405,
"eval_samples_per_second": 793.027,
"eval_steps_per_second": 24.992,
"step": 1260
},
{
"epoch": 4.984126984126984,
"grad_norm": 62.80952072143555,
"learning_rate": 4.97304668862541e-06,
"loss": 0.8578,
"step": 1570
},
{
"epoch": 5.0,
"eval_accuracy": 0.8483012951844792,
"eval_f1": 0.8374094515073132,
"eval_loss": 0.5731419920921326,
"eval_precision": 0.8497690158969053,
"eval_recall": 0.8303468421765257,
"eval_runtime": 1.6424,
"eval_samples_per_second": 792.155,
"eval_steps_per_second": 24.964,
"step": 1575
},
{
"epoch": 5.980952380952381,
"grad_norm": 28.359893798828125,
"learning_rate": 4.9603264443458e-06,
"loss": 0.7528,
"step": 1884
},
{
"epoch": 6.0,
"eval_accuracy": 0.8424043754787907,
"eval_f1": 0.8368542522225871,
"eval_loss": 0.5212520360946655,
"eval_precision": 0.8343696637314615,
"eval_recall": 0.8403125137303477,
"eval_runtime": 1.6431,
"eval_samples_per_second": 791.777,
"eval_steps_per_second": 24.952,
"step": 1890
},
{
"epoch": 6.977777777777778,
"grad_norm": 13.184873580932617,
"learning_rate": 4.945178154904432e-06,
"loss": 0.6746,
"step": 2198
},
{
"epoch": 7.0,
"eval_accuracy": 0.8452726657283853,
"eval_f1": 0.8388474476297365,
"eval_loss": 0.5077288746833801,
"eval_precision": 0.8378019445498359,
"eval_recall": 0.8400070136197805,
"eval_runtime": 1.6424,
"eval_samples_per_second": 792.11,
"eval_steps_per_second": 24.963,
"step": 2205
},
{
"epoch": 7.974603174603175,
"grad_norm": 9.050921440124512,
"learning_rate": 4.92761676983411e-06,
"loss": 0.6276,
"step": 2512
},
{
"epoch": 8.0,
"eval_accuracy": 0.8498868717820812,
"eval_f1": 0.8402758437579932,
"eval_loss": 0.48333004117012024,
"eval_precision": 0.8484423552268903,
"eval_recall": 0.8349357523808886,
"eval_runtime": 1.6444,
"eval_samples_per_second": 791.192,
"eval_steps_per_second": 24.934,
"step": 2520
},
{
"epoch": 8.971428571428572,
"grad_norm": 14.836031913757324,
"learning_rate": 4.9076596201016856e-06,
"loss": 0.591,
"step": 2826
},
{
"epoch": 9.0,
"eval_accuracy": 0.8510092462275748,
"eval_f1": 0.8380468647887628,
"eval_loss": 0.5170930624008179,
"eval_precision": 0.8600643338742522,
"eval_recall": 0.8279538920826988,
"eval_runtime": 1.6449,
"eval_samples_per_second": 790.913,
"eval_steps_per_second": 24.925,
"step": 2835
},
{
"epoch": 9.968253968253968,
"grad_norm": 17.490633010864258,
"learning_rate": 4.88532640100449e-06,
"loss": 0.5411,
"step": 3140
},
{
"epoch": 10.0,
"eval_accuracy": 0.840872245283355,
"eval_f1": 0.8357643191911723,
"eval_loss": 0.4666968882083893,
"eval_precision": 0.8328248424035201,
"eval_recall": 0.8404652389103109,
"eval_runtime": 1.6447,
"eval_samples_per_second": 791.007,
"eval_steps_per_second": 24.928,
"step": 3150
},
{
"epoch": 10.965079365079365,
"grad_norm": 27.608245849609375,
"learning_rate": 4.860639152733449e-06,
"loss": 0.5193,
"step": 3454
},
{
"epoch": 11.0,
"eval_accuracy": 0.8485863426626997,
"eval_f1": 0.8406162312164841,
"eval_loss": 0.4396151602268219,
"eval_precision": 0.8434539842909232,
"eval_recall": 0.8382779460247154,
"eval_runtime": 1.6435,
"eval_samples_per_second": 791.592,
"eval_steps_per_second": 24.946,
"step": 3465
},
{
"epoch": 11.961904761904762,
"grad_norm": 7.845660209655762,
"learning_rate": 4.833622238622079e-06,
"loss": 0.4943,
"step": 3768
},
{
"epoch": 12.0,
"eval_accuracy": 0.8496374552386382,
"eval_f1": 0.842659244060233,
"eval_loss": 0.43954724073410034,
"eval_precision": 0.843136531563339,
"eval_recall": 0.8422001823877059,
"eval_runtime": 1.6428,
"eval_samples_per_second": 791.964,
"eval_steps_per_second": 24.958,
"step": 3780
},
{
"epoch": 12.958730158730159,
"grad_norm": 9.281864166259766,
"learning_rate": 4.804302321102816e-06,
"loss": 0.4715,
"step": 4082
},
{
"epoch": 13.0,
"eval_accuracy": 0.8486576045322549,
"eval_f1": 0.8384831543048918,
"eval_loss": 0.478777676820755,
"eval_precision": 0.8483098559263628,
"eval_recall": 0.8324188900247866,
"eval_runtime": 1.6428,
"eval_samples_per_second": 791.931,
"eval_steps_per_second": 24.957,
"step": 4095
},
{
"epoch": 13.955555555555556,
"grad_norm": 21.69331169128418,
"learning_rate": 4.772708335394416e-06,
"loss": 0.4482,
"step": 4396
},
{
"epoch": 14.0,
"eval_accuracy": 0.8356344978710516,
"eval_f1": 0.8313378359536021,
"eval_loss": 0.49439841508865356,
"eval_precision": 0.8281393298498705,
"eval_recall": 0.8386230277449718,
"eval_runtime": 1.6423,
"eval_samples_per_second": 792.192,
"eval_steps_per_second": 24.965,
"step": 4410
},
{
"epoch": 14.952380952380953,
"grad_norm": 11.402544021606445,
"learning_rate": 4.738871460946384e-06,
"loss": 0.4305,
"step": 4710
},
{
"epoch": 15.0,
"eval_accuracy": 0.8504569667385223,
"eval_f1": 0.8397878915405506,
"eval_loss": 0.4550122618675232,
"eval_precision": 0.8519608079867367,
"eval_recall": 0.8327656095361328,
"eval_runtime": 1.6443,
"eval_samples_per_second": 791.219,
"eval_steps_per_second": 24.935,
"step": 4725
},
{
"epoch": 15.94920634920635,
"grad_norm": 9.356096267700195,
"learning_rate": 4.702825090668624e-06,
"loss": 0.4115,
"step": 5024
},
{
"epoch": 16.0,
"eval_accuracy": 0.8531114713794516,
"eval_f1": 0.8438760541734693,
"eval_loss": 0.4762667417526245,
"eval_precision": 0.8515227136302977,
"eval_recall": 0.8387574009086176,
"eval_runtime": 1.6403,
"eval_samples_per_second": 793.152,
"eval_steps_per_second": 24.996,
"step": 5040
}
],
"logging_steps": 314,
"max_steps": 31500,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5258284886974464.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}