| { |
| "best_metric": 0.7098743915557861, |
| "best_model_checkpoint": "./vit-base-rocks/checkpoint-120", |
| "epoch": 25.0, |
| "eval_steps": 10, |
| "global_step": 175, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.127541422843933, |
| "learning_rate": 0.00018857142857142857, |
| "loss": 2.0408, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "eval_accuracy": 0.6111111111111112, |
| "eval_loss": 1.7371398210525513, |
| "eval_runtime": 0.8763, |
| "eval_samples_per_second": 61.624, |
| "eval_steps_per_second": 7.988, |
| "step": 10 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.358176589012146, |
| "learning_rate": 0.00017714285714285713, |
| "loss": 1.4489, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "eval_accuracy": 0.7407407407407407, |
| "eval_loss": 1.3254319429397583, |
| "eval_runtime": 0.7796, |
| "eval_samples_per_second": 69.27, |
| "eval_steps_per_second": 8.979, |
| "step": 20 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 1.1722267866134644, |
| "learning_rate": 0.00016571428571428575, |
| "loss": 0.9469, |
| "step": 30 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "eval_accuracy": 0.7407407407407407, |
| "eval_loss": 1.0767871141433716, |
| "eval_runtime": 0.7949, |
| "eval_samples_per_second": 67.932, |
| "eval_steps_per_second": 8.806, |
| "step": 30 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.8477162718772888, |
| "learning_rate": 0.0001542857142857143, |
| "loss": 0.586, |
| "step": 40 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_loss": 0.9117566347122192, |
| "eval_runtime": 0.7894, |
| "eval_samples_per_second": 68.404, |
| "eval_steps_per_second": 8.867, |
| "step": 40 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.0646145343780518, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 0.3757, |
| "step": 50 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "eval_accuracy": 0.6851851851851852, |
| "eval_loss": 0.9901659488677979, |
| "eval_runtime": 0.7947, |
| "eval_samples_per_second": 67.952, |
| "eval_steps_per_second": 8.809, |
| "step": 50 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.5591130256652832, |
| "learning_rate": 0.00013142857142857143, |
| "loss": 0.2798, |
| "step": 60 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_loss": 0.849772036075592, |
| "eval_runtime": 0.784, |
| "eval_samples_per_second": 68.874, |
| "eval_steps_per_second": 8.928, |
| "step": 60 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.6118601560592651, |
| "learning_rate": 0.00012, |
| "loss": 0.2087, |
| "step": 70 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7407407407407407, |
| "eval_loss": 0.7938927412033081, |
| "eval_runtime": 0.7888, |
| "eval_samples_per_second": 68.455, |
| "eval_steps_per_second": 8.874, |
| "step": 70 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 0.31403398513793945, |
| "learning_rate": 0.00010857142857142856, |
| "loss": 0.176, |
| "step": 80 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "eval_accuracy": 0.7222222222222222, |
| "eval_loss": 0.8219618797302246, |
| "eval_runtime": 0.7893, |
| "eval_samples_per_second": 68.415, |
| "eval_steps_per_second": 8.869, |
| "step": 80 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "grad_norm": 0.3314395248889923, |
| "learning_rate": 9.714285714285715e-05, |
| "loss": 0.1613, |
| "step": 90 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "eval_accuracy": 0.8148148148148148, |
| "eval_loss": 0.7287979125976562, |
| "eval_runtime": 0.7896, |
| "eval_samples_per_second": 68.386, |
| "eval_steps_per_second": 8.865, |
| "step": 90 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.26313790678977966, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.1337, |
| "step": 100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 0.7177786827087402, |
| "eval_runtime": 0.7907, |
| "eval_samples_per_second": 68.296, |
| "eval_steps_per_second": 8.853, |
| "step": 100 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "grad_norm": 0.2856982350349426, |
| "learning_rate": 7.428571428571429e-05, |
| "loss": 0.1326, |
| "step": 110 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_loss": 0.7402880787849426, |
| "eval_runtime": 0.7898, |
| "eval_samples_per_second": 68.373, |
| "eval_steps_per_second": 8.863, |
| "step": 110 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "grad_norm": 0.26772505044937134, |
| "learning_rate": 6.285714285714286e-05, |
| "loss": 0.119, |
| "step": 120 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_loss": 0.7098743915557861, |
| "eval_runtime": 0.7788, |
| "eval_samples_per_second": 69.34, |
| "eval_steps_per_second": 8.989, |
| "step": 120 |
| }, |
| { |
| "epoch": 18.571428571428573, |
| "grad_norm": 0.37468403577804565, |
| "learning_rate": 5.142857142857143e-05, |
| "loss": 0.1193, |
| "step": 130 |
| }, |
| { |
| "epoch": 18.571428571428573, |
| "eval_accuracy": 0.7777777777777778, |
| "eval_loss": 0.7625551819801331, |
| "eval_runtime": 0.7785, |
| "eval_samples_per_second": 69.366, |
| "eval_steps_per_second": 8.992, |
| "step": 130 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.27853044867515564, |
| "learning_rate": 4e-05, |
| "loss": 0.1227, |
| "step": 140 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 0.7125461101531982, |
| "eval_runtime": 0.7792, |
| "eval_samples_per_second": 69.302, |
| "eval_steps_per_second": 8.984, |
| "step": 140 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 0.23199671506881714, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.1102, |
| "step": 150 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 0.7493270635604858, |
| "eval_runtime": 0.7731, |
| "eval_samples_per_second": 69.851, |
| "eval_steps_per_second": 9.055, |
| "step": 150 |
| }, |
| { |
| "epoch": 22.857142857142858, |
| "grad_norm": 0.2707015573978424, |
| "learning_rate": 1.7142857142857145e-05, |
| "loss": 0.1134, |
| "step": 160 |
| }, |
| { |
| "epoch": 22.857142857142858, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 0.7395932078361511, |
| "eval_runtime": 0.7812, |
| "eval_samples_per_second": 69.127, |
| "eval_steps_per_second": 8.961, |
| "step": 160 |
| }, |
| { |
| "epoch": 24.285714285714285, |
| "grad_norm": 0.191811203956604, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.1173, |
| "step": 170 |
| }, |
| { |
| "epoch": 24.285714285714285, |
| "eval_accuracy": 0.7962962962962963, |
| "eval_loss": 0.718734622001648, |
| "eval_runtime": 0.7909, |
| "eval_samples_per_second": 68.28, |
| "eval_steps_per_second": 8.851, |
| "step": 170 |
| }, |
| { |
| "epoch": 25.0, |
| "step": 175, |
| "total_flos": 7.575385112128512e+17, |
| "train_loss": 0.41437330859048027, |
| "train_runtime": 335.3194, |
| "train_samples_per_second": 29.151, |
| "train_steps_per_second": 0.522 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 175, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.575385112128512e+17, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|