{ "best_metric": 0.8997333333333334, "best_model_checkpoint": "./resnet_finetuned_models_dataset/CIFAR100/50_from_100/microsoft_resnet-101/model_idx_0595/checkpoints/checkpoint-2997", "epoch": 9.0, "eval_steps": 500, "global_step": 2997, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 31.006071090698242, "learning_rate": 0.0004444444444444444, "loss": 1.2234, "step": 333 }, { "epoch": 1.0, "eval_accuracy": 0.8344, "eval_loss": 0.5291814804077148, "eval_runtime": 7.9123, "eval_samples_per_second": 473.943, "eval_steps_per_second": 7.457, "step": 333 }, { "epoch": 2.0, "grad_norm": 45.96661376953125, "learning_rate": 0.0003888888888888889, "loss": 0.3672, "step": 666 }, { "epoch": 2.0, "eval_accuracy": 0.8650666666666667, "eval_loss": 0.4304977357387543, "eval_runtime": 7.6252, "eval_samples_per_second": 491.789, "eval_steps_per_second": 7.737, "step": 666 }, { "epoch": 3.0, "grad_norm": 26.87983512878418, "learning_rate": 0.0003333333333333333, "loss": 0.1957, "step": 999 }, { "epoch": 3.0, "eval_accuracy": 0.8682666666666666, "eval_loss": 0.4820173680782318, "eval_runtime": 7.5754, "eval_samples_per_second": 495.022, "eval_steps_per_second": 7.788, "step": 999 }, { "epoch": 4.0, "grad_norm": 24.122703552246094, "learning_rate": 0.0002777777777777778, "loss": 0.1146, "step": 1332 }, { "epoch": 4.0, "eval_accuracy": 0.8808, "eval_loss": 0.44177883863449097, "eval_runtime": 7.6164, "eval_samples_per_second": 492.358, "eval_steps_per_second": 7.746, "step": 1332 }, { "epoch": 5.0, "grad_norm": 22.64092445373535, "learning_rate": 0.0002222222222222222, "loss": 0.0535, "step": 1665 }, { "epoch": 5.0, "eval_accuracy": 0.8861333333333333, "eval_loss": 0.45425719022750854, "eval_runtime": 7.7481, "eval_samples_per_second": 483.991, "eval_steps_per_second": 7.615, "step": 1665 }, { "epoch": 6.0, "grad_norm": 43.797035217285156, "learning_rate": 0.00016666666666666666, "loss": 0.0353, "step": 1998 }, { "epoch": 6.0, "eval_accuracy": 0.8962666666666667, "eval_loss": 0.44793862104415894, "eval_runtime": 7.6442, "eval_samples_per_second": 490.569, "eval_steps_per_second": 7.718, "step": 1998 }, { "epoch": 7.0, "grad_norm": 33.60854721069336, "learning_rate": 0.0001111111111111111, "loss": 0.0257, "step": 2331 }, { "epoch": 7.0, "eval_accuracy": 0.8973333333333333, "eval_loss": 0.4569612443447113, "eval_runtime": 7.6977, "eval_samples_per_second": 487.159, "eval_steps_per_second": 7.665, "step": 2331 }, { "epoch": 8.0, "grad_norm": 36.8309326171875, "learning_rate": 5.555555555555555e-05, "loss": 0.0193, "step": 2664 }, { "epoch": 8.0, "eval_accuracy": 0.8976, "eval_loss": 0.49047908186912537, "eval_runtime": 7.6427, "eval_samples_per_second": 490.663, "eval_steps_per_second": 7.72, "step": 2664 }, { "epoch": 9.0, "grad_norm": 29.092975616455078, "learning_rate": 0.0, "loss": 0.0089, "step": 2997 }, { "epoch": 9.0, "eval_accuracy": 0.8997333333333334, "eval_loss": 0.46292173862457275, "eval_runtime": 7.6217, "eval_samples_per_second": 492.014, "eval_steps_per_second": 7.741, "step": 2997 } ], "logging_steps": 500, "max_steps": 2997, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.3587863155968e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }