{ "best_metric": 0.8677333333333334, "best_model_checkpoint": "./vit_finetuned_models_dataset/CIFAR100/50_from_100/facebook_vit-mae-base/model_idx_0084/checkpoints/checkpoint-2997", "epoch": 9.0, "eval_steps": 500, "global_step": 2997, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": Infinity, "learning_rate": 0.00026676676676676677, "loss": 2.4075, "step": 333 }, { "epoch": 1.0, "eval_accuracy": 0.6682666666666667, "eval_loss": 1.152270793914795, "eval_runtime": 7.6866, "eval_samples_per_second": 487.862, "eval_steps_per_second": 7.676, "step": 333 }, { "epoch": 2.0, "grad_norm": 3.5095269680023193, "learning_rate": 0.00023343343343343342, "loss": 0.8266, "step": 666 }, { "epoch": 2.0, "eval_accuracy": 0.7816, "eval_loss": 0.703055202960968, "eval_runtime": 7.0333, "eval_samples_per_second": 533.179, "eval_steps_per_second": 8.389, "step": 666 }, { "epoch": 3.0, "grad_norm": 0.25592198967933655, "learning_rate": 0.00020010010010010007, "loss": 0.3848, "step": 999 }, { "epoch": 3.0, "eval_accuracy": 0.8034666666666667, "eval_loss": 0.6375019550323486, "eval_runtime": 6.3423, "eval_samples_per_second": 591.272, "eval_steps_per_second": 9.303, "step": 999 }, { "epoch": 4.0, "grad_norm": 0.08299722522497177, "learning_rate": 0.00016676676676676675, "loss": 0.1805, "step": 1332 }, { "epoch": 4.0, "eval_accuracy": 0.8314666666666667, "eval_loss": 0.6055590510368347, "eval_runtime": 6.4741, "eval_samples_per_second": 579.232, "eval_steps_per_second": 9.113, "step": 1332 }, { "epoch": 5.0, "grad_norm": 0.5929628610610962, "learning_rate": 0.00013343343343343343, "loss": 0.0793, "step": 1665 }, { "epoch": 5.0, "eval_accuracy": 0.8378666666666666, "eval_loss": 0.5855091214179993, "eval_runtime": 6.4105, "eval_samples_per_second": 584.978, "eval_steps_per_second": 9.204, "step": 1665 }, { "epoch": 6.0, "grad_norm": 0.9704152345657349, "learning_rate": 0.00010010010010010008, "loss": 0.0331, "step": 1998 }, { "epoch": 6.0, "eval_accuracy": 0.8426666666666667, "eval_loss": 0.6151939034461975, "eval_runtime": 6.5449, "eval_samples_per_second": 572.968, "eval_steps_per_second": 9.015, "step": 1998 }, { "epoch": 7.0, "grad_norm": 0.026865333318710327, "learning_rate": 6.676676676676677e-05, "loss": 0.0089, "step": 2331 }, { "epoch": 7.0, "eval_accuracy": 0.852, "eval_loss": 0.6132980585098267, "eval_runtime": 6.3088, "eval_samples_per_second": 594.409, "eval_steps_per_second": 9.352, "step": 2331 }, { "epoch": 8.0, "grad_norm": 0.01385582610964775, "learning_rate": 3.343343343343343e-05, "loss": 0.0017, "step": 2664 }, { "epoch": 8.0, "eval_accuracy": 0.8656, "eval_loss": 0.5507284998893738, "eval_runtime": 6.4722, "eval_samples_per_second": 579.399, "eval_steps_per_second": 9.116, "step": 2664 }, { "epoch": 9.0, "grad_norm": 0.005359232425689697, "learning_rate": 1.001001001001001e-07, "loss": 0.0008, "step": 2997 }, { "epoch": 9.0, "eval_accuracy": 0.8677333333333334, "eval_loss": 0.5497774481773376, "eval_runtime": 6.2958, "eval_samples_per_second": 595.636, "eval_steps_per_second": 9.371, "step": 2997 } ], "logging_steps": 500, "max_steps": 2997, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.482671885603328e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }