vit-house-classifier / trainer_state.json
DejanX13's picture
Upload trainer_state.json with huggingface_hub
c2c5740 verified
{
"best_global_step": 450,
"best_metric": 0.5457363724708557,
"best_model_checkpoint": "./vit-results/checkpoint-400",
"epoch": 10.0,
"eval_steps": 50,
"global_step": 490,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.20408163265306123,
"grad_norm": 1.3306224346160889,
"learning_rate": 1.963265306122449e-05,
"loss": 1.3374,
"step": 10
},
{
"epoch": 0.40816326530612246,
"grad_norm": 1.4784319400787354,
"learning_rate": 1.922448979591837e-05,
"loss": 1.265,
"step": 20
},
{
"epoch": 0.6122448979591837,
"grad_norm": 2.0332705974578857,
"learning_rate": 1.8816326530612246e-05,
"loss": 1.2081,
"step": 30
},
{
"epoch": 0.8163265306122449,
"grad_norm": 1.4771761894226074,
"learning_rate": 1.8408163265306125e-05,
"loss": 1.2104,
"step": 40
},
{
"epoch": 1.0204081632653061,
"grad_norm": 1.5638868808746338,
"learning_rate": 1.8e-05,
"loss": 1.1453,
"step": 50
},
{
"epoch": 1.0204081632653061,
"eval_accuracy": 0.475,
"eval_loss": 1.1232645511627197,
"eval_runtime": 1.3068,
"eval_samples_per_second": 61.216,
"eval_steps_per_second": 7.652,
"step": 50
},
{
"epoch": 1.2244897959183674,
"grad_norm": 2.1241655349731445,
"learning_rate": 1.759183673469388e-05,
"loss": 1.0284,
"step": 60
},
{
"epoch": 1.4285714285714286,
"grad_norm": 2.0577926635742188,
"learning_rate": 1.7183673469387755e-05,
"loss": 1.0428,
"step": 70
},
{
"epoch": 1.6326530612244898,
"grad_norm": 1.7612333297729492,
"learning_rate": 1.6775510204081634e-05,
"loss": 1.008,
"step": 80
},
{
"epoch": 1.836734693877551,
"grad_norm": 2.2085816860198975,
"learning_rate": 1.6367346938775513e-05,
"loss": 1.0095,
"step": 90
},
{
"epoch": 2.0408163265306123,
"grad_norm": 1.5611677169799805,
"learning_rate": 1.595918367346939e-05,
"loss": 0.9155,
"step": 100
},
{
"epoch": 2.0408163265306123,
"eval_accuracy": 0.7,
"eval_loss": 0.964668869972229,
"eval_runtime": 1.2647,
"eval_samples_per_second": 63.255,
"eval_steps_per_second": 7.907,
"step": 100
},
{
"epoch": 2.2448979591836733,
"grad_norm": 2.1504409313201904,
"learning_rate": 1.5551020408163265e-05,
"loss": 0.8396,
"step": 110
},
{
"epoch": 2.4489795918367347,
"grad_norm": 1.9358028173446655,
"learning_rate": 1.5142857142857144e-05,
"loss": 0.8618,
"step": 120
},
{
"epoch": 2.6530612244897958,
"grad_norm": 1.5132843255996704,
"learning_rate": 1.4734693877551021e-05,
"loss": 0.8077,
"step": 130
},
{
"epoch": 2.857142857142857,
"grad_norm": 2.5402004718780518,
"learning_rate": 1.43265306122449e-05,
"loss": 0.7742,
"step": 140
},
{
"epoch": 3.061224489795918,
"grad_norm": 2.259558916091919,
"learning_rate": 1.3918367346938776e-05,
"loss": 0.7638,
"step": 150
},
{
"epoch": 3.061224489795918,
"eval_accuracy": 0.75,
"eval_loss": 0.8326537013053894,
"eval_runtime": 1.2728,
"eval_samples_per_second": 62.853,
"eval_steps_per_second": 7.857,
"step": 150
},
{
"epoch": 3.2653061224489797,
"grad_norm": 2.010672092437744,
"learning_rate": 1.3510204081632654e-05,
"loss": 0.672,
"step": 160
},
{
"epoch": 3.4693877551020407,
"grad_norm": 1.8449556827545166,
"learning_rate": 1.3102040816326531e-05,
"loss": 0.6367,
"step": 170
},
{
"epoch": 3.673469387755102,
"grad_norm": 1.8838822841644287,
"learning_rate": 1.2693877551020409e-05,
"loss": 0.6125,
"step": 180
},
{
"epoch": 3.877551020408163,
"grad_norm": 2.325084686279297,
"learning_rate": 1.2285714285714288e-05,
"loss": 0.5768,
"step": 190
},
{
"epoch": 4.081632653061225,
"grad_norm": 2.084455966949463,
"learning_rate": 1.1877551020408165e-05,
"loss": 0.537,
"step": 200
},
{
"epoch": 4.081632653061225,
"eval_accuracy": 0.7875,
"eval_loss": 0.7291887402534485,
"eval_runtime": 1.2754,
"eval_samples_per_second": 62.726,
"eval_steps_per_second": 7.841,
"step": 200
},
{
"epoch": 4.285714285714286,
"grad_norm": 2.3336734771728516,
"learning_rate": 1.146938775510204e-05,
"loss": 0.5,
"step": 210
},
{
"epoch": 4.489795918367347,
"grad_norm": 2.6175546646118164,
"learning_rate": 1.1061224489795918e-05,
"loss": 0.4665,
"step": 220
},
{
"epoch": 4.6938775510204085,
"grad_norm": 1.6590876579284668,
"learning_rate": 1.0653061224489796e-05,
"loss": 0.448,
"step": 230
},
{
"epoch": 4.8979591836734695,
"grad_norm": 1.8714507818222046,
"learning_rate": 1.0244897959183675e-05,
"loss": 0.4491,
"step": 240
},
{
"epoch": 5.1020408163265305,
"grad_norm": 2.239349365234375,
"learning_rate": 9.836734693877552e-06,
"loss": 0.3957,
"step": 250
},
{
"epoch": 5.1020408163265305,
"eval_accuracy": 0.825,
"eval_loss": 0.6328426599502563,
"eval_runtime": 1.2652,
"eval_samples_per_second": 63.23,
"eval_steps_per_second": 7.904,
"step": 250
},
{
"epoch": 5.3061224489795915,
"grad_norm": 2.4021127223968506,
"learning_rate": 9.42857142857143e-06,
"loss": 0.3601,
"step": 260
},
{
"epoch": 5.510204081632653,
"grad_norm": 1.2185922861099243,
"learning_rate": 9.020408163265307e-06,
"loss": 0.3431,
"step": 270
},
{
"epoch": 5.714285714285714,
"grad_norm": 1.0699914693832397,
"learning_rate": 8.612244897959184e-06,
"loss": 0.3646,
"step": 280
},
{
"epoch": 5.918367346938775,
"grad_norm": 1.5863635540008545,
"learning_rate": 8.204081632653062e-06,
"loss": 0.3129,
"step": 290
},
{
"epoch": 6.122448979591836,
"grad_norm": 1.0668057203292847,
"learning_rate": 7.79591836734694e-06,
"loss": 0.2842,
"step": 300
},
{
"epoch": 6.122448979591836,
"eval_accuracy": 0.7875,
"eval_loss": 0.5894995927810669,
"eval_runtime": 1.2447,
"eval_samples_per_second": 64.272,
"eval_steps_per_second": 8.034,
"step": 300
},
{
"epoch": 6.326530612244898,
"grad_norm": 1.0372223854064941,
"learning_rate": 7.387755102040817e-06,
"loss": 0.2725,
"step": 310
},
{
"epoch": 6.530612244897959,
"grad_norm": 1.855797529220581,
"learning_rate": 6.979591836734695e-06,
"loss": 0.2596,
"step": 320
},
{
"epoch": 6.73469387755102,
"grad_norm": 1.3400880098342896,
"learning_rate": 6.571428571428572e-06,
"loss": 0.2579,
"step": 330
},
{
"epoch": 6.938775510204081,
"grad_norm": 1.0782897472381592,
"learning_rate": 6.163265306122449e-06,
"loss": 0.2394,
"step": 340
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.1473782062530518,
"learning_rate": 5.755102040816327e-06,
"loss": 0.2266,
"step": 350
},
{
"epoch": 7.142857142857143,
"eval_accuracy": 0.8125,
"eval_loss": 0.5921751260757446,
"eval_runtime": 1.2621,
"eval_samples_per_second": 63.388,
"eval_steps_per_second": 7.924,
"step": 350
},
{
"epoch": 7.346938775510204,
"grad_norm": 0.8374194502830505,
"learning_rate": 5.3469387755102045e-06,
"loss": 0.2043,
"step": 360
},
{
"epoch": 7.551020408163265,
"grad_norm": 0.9697467684745789,
"learning_rate": 4.938775510204082e-06,
"loss": 0.2076,
"step": 370
},
{
"epoch": 7.755102040816326,
"grad_norm": 1.7021687030792236,
"learning_rate": 4.530612244897959e-06,
"loss": 0.2111,
"step": 380
},
{
"epoch": 7.959183673469388,
"grad_norm": 0.8718199729919434,
"learning_rate": 4.122448979591837e-06,
"loss": 0.188,
"step": 390
},
{
"epoch": 8.16326530612245,
"grad_norm": 0.7752737998962402,
"learning_rate": 3.7142857142857146e-06,
"loss": 0.1798,
"step": 400
},
{
"epoch": 8.16326530612245,
"eval_accuracy": 0.8125,
"eval_loss": 0.5628954172134399,
"eval_runtime": 1.291,
"eval_samples_per_second": 61.967,
"eval_steps_per_second": 7.746,
"step": 400
},
{
"epoch": 8.36734693877551,
"grad_norm": 1.137776494026184,
"learning_rate": 3.3061224489795924e-06,
"loss": 0.1778,
"step": 410
},
{
"epoch": 8.571428571428571,
"grad_norm": 0.7163811326026917,
"learning_rate": 2.8979591836734694e-06,
"loss": 0.1697,
"step": 420
},
{
"epoch": 8.775510204081632,
"grad_norm": 0.860792875289917,
"learning_rate": 2.489795918367347e-06,
"loss": 0.1803,
"step": 430
},
{
"epoch": 8.979591836734693,
"grad_norm": 0.8092204928398132,
"learning_rate": 2.0816326530612247e-06,
"loss": 0.1792,
"step": 440
},
{
"epoch": 9.183673469387756,
"grad_norm": 3.376288890838623,
"learning_rate": 1.6734693877551023e-06,
"loss": 0.1746,
"step": 450
},
{
"epoch": 9.183673469387756,
"eval_accuracy": 0.825,
"eval_loss": 0.5457363724708557,
"eval_runtime": 1.2999,
"eval_samples_per_second": 61.544,
"eval_steps_per_second": 7.693,
"step": 450
},
{
"epoch": 9.387755102040817,
"grad_norm": 0.6227843165397644,
"learning_rate": 1.2653061224489795e-06,
"loss": 0.1631,
"step": 460
},
{
"epoch": 9.591836734693878,
"grad_norm": 0.7905530333518982,
"learning_rate": 8.571428571428572e-07,
"loss": 0.1603,
"step": 470
},
{
"epoch": 9.795918367346939,
"grad_norm": 0.6496562361717224,
"learning_rate": 4.489795918367347e-07,
"loss": 0.1581,
"step": 480
},
{
"epoch": 10.0,
"grad_norm": 0.9721047282218933,
"learning_rate": 4.0816326530612253e-08,
"loss": 0.1594,
"step": 490
},
{
"epoch": 10.0,
"step": 490,
"total_flos": 6.013486186109338e+17,
"train_loss": 0.5295110111333886,
"train_runtime": 345.9048,
"train_samples_per_second": 22.434,
"train_steps_per_second": 1.417
}
],
"logging_steps": 10,
"max_steps": 490,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.013486186109338e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}