compliance_verification_ViT / trainer_state.json
ikram98ai's picture
ikram98ai/compliance_verification_ViT
3960bc9 verified
{
"best_global_step": 120,
"best_metric": 0.5769068002700806,
"best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-120",
"epoch": 1.0,
"eval_steps": 30,
"global_step": 122,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.040983606557377046,
"grad_norm": 63320.57421875,
"learning_rate": 0.00019344262295081967,
"loss": 0.5881,
"step": 5
},
{
"epoch": 0.08196721311475409,
"grad_norm": 20211.603515625,
"learning_rate": 0.00018524590163934427,
"loss": 0.6081,
"step": 10
},
{
"epoch": 0.12295081967213115,
"grad_norm": 38881.02734375,
"learning_rate": 0.00017704918032786885,
"loss": 0.5742,
"step": 15
},
{
"epoch": 0.16393442622950818,
"grad_norm": 19475.11328125,
"learning_rate": 0.00016885245901639346,
"loss": 0.6071,
"step": 20
},
{
"epoch": 0.20491803278688525,
"grad_norm": 9836.5791015625,
"learning_rate": 0.00016065573770491804,
"loss": 0.5601,
"step": 25
},
{
"epoch": 0.2459016393442623,
"grad_norm": 30543.1875,
"learning_rate": 0.00015245901639344262,
"loss": 0.5984,
"step": 30
},
{
"epoch": 0.2459016393442623,
"eval_accuracy": 0.6399794713882474,
"eval_loss": 0.5906035304069519,
"eval_runtime": 267.3699,
"eval_samples_per_second": 14.575,
"eval_steps_per_second": 0.06,
"step": 30
},
{
"epoch": 0.28688524590163933,
"grad_norm": 7142.35205078125,
"learning_rate": 0.00014426229508196722,
"loss": 0.5782,
"step": 35
},
{
"epoch": 0.32786885245901637,
"grad_norm": 20473.416015625,
"learning_rate": 0.0001360655737704918,
"loss": 0.5865,
"step": 40
},
{
"epoch": 0.36885245901639346,
"grad_norm": 7634.8935546875,
"learning_rate": 0.0001278688524590164,
"loss": 0.5969,
"step": 45
},
{
"epoch": 0.4098360655737705,
"grad_norm": 13310.08203125,
"learning_rate": 0.00011967213114754099,
"loss": 0.5731,
"step": 50
},
{
"epoch": 0.45081967213114754,
"grad_norm": 10772.8818359375,
"learning_rate": 0.00011147540983606557,
"loss": 0.5825,
"step": 55
},
{
"epoch": 0.4918032786885246,
"grad_norm": 9481.56640625,
"learning_rate": 0.00010327868852459018,
"loss": 0.6018,
"step": 60
},
{
"epoch": 0.4918032786885246,
"eval_accuracy": 0.6579420066717988,
"eval_loss": 0.5840609073638916,
"eval_runtime": 276.2712,
"eval_samples_per_second": 14.106,
"eval_steps_per_second": 0.058,
"step": 60
},
{
"epoch": 0.5327868852459017,
"grad_norm": 19119.552734375,
"learning_rate": 9.508196721311476e-05,
"loss": 0.5955,
"step": 65
},
{
"epoch": 0.5737704918032787,
"grad_norm": 18152.6171875,
"learning_rate": 8.688524590163935e-05,
"loss": 0.5934,
"step": 70
},
{
"epoch": 0.6147540983606558,
"grad_norm": 9084.3095703125,
"learning_rate": 7.868852459016394e-05,
"loss": 0.602,
"step": 75
},
{
"epoch": 0.6557377049180327,
"grad_norm": 18591.34375,
"learning_rate": 7.049180327868853e-05,
"loss": 0.6009,
"step": 80
},
{
"epoch": 0.6967213114754098,
"grad_norm": 17511.595703125,
"learning_rate": 6.229508196721313e-05,
"loss": 0.5888,
"step": 85
},
{
"epoch": 0.7377049180327869,
"grad_norm": 9820.2109375,
"learning_rate": 5.409836065573771e-05,
"loss": 0.5816,
"step": 90
},
{
"epoch": 0.7377049180327869,
"eval_accuracy": 0.6433153707980498,
"eval_loss": 0.5819421410560608,
"eval_runtime": 262.5138,
"eval_samples_per_second": 14.845,
"eval_steps_per_second": 0.061,
"step": 90
},
{
"epoch": 0.7786885245901639,
"grad_norm": 10211.087890625,
"learning_rate": 4.59016393442623e-05,
"loss": 0.5958,
"step": 95
},
{
"epoch": 0.819672131147541,
"grad_norm": 18134.767578125,
"learning_rate": 3.7704918032786885e-05,
"loss": 0.5626,
"step": 100
},
{
"epoch": 0.860655737704918,
"grad_norm": 13692.79296875,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.5686,
"step": 105
},
{
"epoch": 0.9016393442622951,
"grad_norm": 17321.041015625,
"learning_rate": 2.1311475409836064e-05,
"loss": 0.5841,
"step": 110
},
{
"epoch": 0.9426229508196722,
"grad_norm": 10492.5380859375,
"learning_rate": 1.3114754098360657e-05,
"loss": 0.5818,
"step": 115
},
{
"epoch": 0.9836065573770492,
"grad_norm": 17490.5,
"learning_rate": 4.918032786885246e-06,
"loss": 0.594,
"step": 120
},
{
"epoch": 0.9836065573770492,
"eval_accuracy": 0.6584552219656146,
"eval_loss": 0.5769068002700806,
"eval_runtime": 263.9515,
"eval_samples_per_second": 14.764,
"eval_steps_per_second": 0.061,
"step": 120
},
{
"epoch": 1.0,
"step": 122,
"total_flos": 1.2078676421226455e+18,
"train_loss": 0.587343445566834,
"train_runtime": 5701.3536,
"train_samples_per_second": 2.734,
"train_steps_per_second": 0.021
}
],
"logging_steps": 5,
"max_steps": 122,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 30,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2078676421226455e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}