{ "best_metric": 0.7098743915557861, "best_model_checkpoint": "./vit-base-rocks/checkpoint-120", "epoch": 25.0, "eval_steps": 10, "global_step": 175, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.4285714285714286, "grad_norm": 1.127541422843933, "learning_rate": 0.00018857142857142857, "loss": 2.0408, "step": 10 }, { "epoch": 1.4285714285714286, "eval_accuracy": 0.6111111111111112, "eval_loss": 1.7371398210525513, "eval_runtime": 0.8763, "eval_samples_per_second": 61.624, "eval_steps_per_second": 7.988, "step": 10 }, { "epoch": 2.857142857142857, "grad_norm": 1.358176589012146, "learning_rate": 0.00017714285714285713, "loss": 1.4489, "step": 20 }, { "epoch": 2.857142857142857, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.3254319429397583, "eval_runtime": 0.7796, "eval_samples_per_second": 69.27, "eval_steps_per_second": 8.979, "step": 20 }, { "epoch": 4.285714285714286, "grad_norm": 1.1722267866134644, "learning_rate": 0.00016571428571428575, "loss": 0.9469, "step": 30 }, { "epoch": 4.285714285714286, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.0767871141433716, "eval_runtime": 0.7949, "eval_samples_per_second": 67.932, "eval_steps_per_second": 8.806, "step": 30 }, { "epoch": 5.714285714285714, "grad_norm": 0.8477162718772888, "learning_rate": 0.0001542857142857143, "loss": 0.586, "step": 40 }, { "epoch": 5.714285714285714, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.9117566347122192, "eval_runtime": 0.7894, "eval_samples_per_second": 68.404, "eval_steps_per_second": 8.867, "step": 40 }, { "epoch": 7.142857142857143, "grad_norm": 1.0646145343780518, "learning_rate": 0.00014285714285714287, "loss": 0.3757, "step": 50 }, { "epoch": 7.142857142857143, "eval_accuracy": 0.6851851851851852, "eval_loss": 0.9901659488677979, "eval_runtime": 0.7947, "eval_samples_per_second": 67.952, "eval_steps_per_second": 8.809, "step": 50 }, { "epoch": 8.571428571428571, "grad_norm": 0.5591130256652832, "learning_rate": 0.00013142857142857143, "loss": 0.2798, "step": 60 }, { "epoch": 8.571428571428571, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.849772036075592, "eval_runtime": 0.784, "eval_samples_per_second": 68.874, "eval_steps_per_second": 8.928, "step": 60 }, { "epoch": 10.0, "grad_norm": 0.6118601560592651, "learning_rate": 0.00012, "loss": 0.2087, "step": 70 }, { "epoch": 10.0, "eval_accuracy": 0.7407407407407407, "eval_loss": 0.7938927412033081, "eval_runtime": 0.7888, "eval_samples_per_second": 68.455, "eval_steps_per_second": 8.874, "step": 70 }, { "epoch": 11.428571428571429, "grad_norm": 0.31403398513793945, "learning_rate": 0.00010857142857142856, "loss": 0.176, "step": 80 }, { "epoch": 11.428571428571429, "eval_accuracy": 0.7222222222222222, "eval_loss": 0.8219618797302246, "eval_runtime": 0.7893, "eval_samples_per_second": 68.415, "eval_steps_per_second": 8.869, "step": 80 }, { "epoch": 12.857142857142858, "grad_norm": 0.3314395248889923, "learning_rate": 9.714285714285715e-05, "loss": 0.1613, "step": 90 }, { "epoch": 12.857142857142858, "eval_accuracy": 0.8148148148148148, "eval_loss": 0.7287979125976562, "eval_runtime": 0.7896, "eval_samples_per_second": 68.386, "eval_steps_per_second": 8.865, "step": 90 }, { "epoch": 14.285714285714286, "grad_norm": 0.26313790678977966, "learning_rate": 8.571428571428571e-05, "loss": 0.1337, "step": 100 }, { "epoch": 14.285714285714286, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.7177786827087402, "eval_runtime": 0.7907, "eval_samples_per_second": 68.296, "eval_steps_per_second": 8.853, "step": 100 }, { "epoch": 15.714285714285714, "grad_norm": 0.2856982350349426, "learning_rate": 7.428571428571429e-05, "loss": 0.1326, "step": 110 }, { "epoch": 15.714285714285714, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.7402880787849426, "eval_runtime": 0.7898, "eval_samples_per_second": 68.373, "eval_steps_per_second": 8.863, "step": 110 }, { "epoch": 17.142857142857142, "grad_norm": 0.26772505044937134, "learning_rate": 6.285714285714286e-05, "loss": 0.119, "step": 120 }, { "epoch": 17.142857142857142, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.7098743915557861, "eval_runtime": 0.7788, "eval_samples_per_second": 69.34, "eval_steps_per_second": 8.989, "step": 120 }, { "epoch": 18.571428571428573, "grad_norm": 0.37468403577804565, "learning_rate": 5.142857142857143e-05, "loss": 0.1193, "step": 130 }, { "epoch": 18.571428571428573, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.7625551819801331, "eval_runtime": 0.7785, "eval_samples_per_second": 69.366, "eval_steps_per_second": 8.992, "step": 130 }, { "epoch": 20.0, "grad_norm": 0.27853044867515564, "learning_rate": 4e-05, "loss": 0.1227, "step": 140 }, { "epoch": 20.0, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.7125461101531982, "eval_runtime": 0.7792, "eval_samples_per_second": 69.302, "eval_steps_per_second": 8.984, "step": 140 }, { "epoch": 21.428571428571427, "grad_norm": 0.23199671506881714, "learning_rate": 2.857142857142857e-05, "loss": 0.1102, "step": 150 }, { "epoch": 21.428571428571427, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.7493270635604858, "eval_runtime": 0.7731, "eval_samples_per_second": 69.851, "eval_steps_per_second": 9.055, "step": 150 }, { "epoch": 22.857142857142858, "grad_norm": 0.2707015573978424, "learning_rate": 1.7142857142857145e-05, "loss": 0.1134, "step": 160 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.7395932078361511, "eval_runtime": 0.7812, "eval_samples_per_second": 69.127, "eval_steps_per_second": 8.961, "step": 160 }, { "epoch": 24.285714285714285, "grad_norm": 0.191811203956604, "learning_rate": 5.7142857142857145e-06, "loss": 0.1173, "step": 170 }, { "epoch": 24.285714285714285, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.718734622001648, "eval_runtime": 0.7909, "eval_samples_per_second": 68.28, "eval_steps_per_second": 8.851, "step": 170 }, { "epoch": 25.0, "step": 175, "total_flos": 7.575385112128512e+17, "train_loss": 0.41437330859048027, "train_runtime": 335.3194, "train_samples_per_second": 29.151, "train_steps_per_second": 0.522 } ], "logging_steps": 10, "max_steps": 175, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.575385112128512e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }