| { | |
| "best_metric": 0.06278952211141586, | |
| "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-217", | |
| "epoch": 40.0, | |
| "global_step": 280, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.4529457688331604, | |
| "eval_runtime": 8.6926, | |
| "eval_samples_per_second": 9.088, | |
| "eval_steps_per_second": 0.115, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 0.5024, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.39888718724250793, | |
| "eval_runtime": 9.0793, | |
| "eval_samples_per_second": 8.701, | |
| "eval_steps_per_second": 0.11, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.8571428571428575e-05, | |
| "loss": 0.3533, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.37408992648124695, | |
| "eval_runtime": 9.0097, | |
| "eval_samples_per_second": 8.768, | |
| "eval_steps_per_second": 0.111, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.3160648047924042, | |
| "eval_runtime": 9.1428, | |
| "eval_samples_per_second": 8.641, | |
| "eval_steps_per_second": 0.109, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.285, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.282362163066864, | |
| "eval_runtime": 9.1103, | |
| "eval_samples_per_second": 8.671, | |
| "eval_steps_per_second": 0.11, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.2491, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.8607594936708861, | |
| "eval_loss": 0.2700817883014679, | |
| "eval_runtime": 8.8127, | |
| "eval_samples_per_second": 8.964, | |
| "eval_steps_per_second": 0.113, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.9113924050632911, | |
| "eval_loss": 0.2062235176563263, | |
| "eval_runtime": 8.5646, | |
| "eval_samples_per_second": 9.224, | |
| "eval_steps_per_second": 0.117, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 1.642857142857143e-05, | |
| "loss": 0.2032, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.20497918128967285, | |
| "eval_runtime": 8.6022, | |
| "eval_samples_per_second": 9.184, | |
| "eval_steps_per_second": 0.116, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.157, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.20131482183933258, | |
| "eval_runtime": 8.5286, | |
| "eval_samples_per_second": 9.263, | |
| "eval_steps_per_second": 0.117, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.1127, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.9367088607594937, | |
| "eval_loss": 0.19601519405841827, | |
| "eval_runtime": 8.52, | |
| "eval_samples_per_second": 9.272, | |
| "eval_steps_per_second": 0.117, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.14171478152275085, | |
| "eval_runtime": 8.4584, | |
| "eval_samples_per_second": 9.34, | |
| "eval_steps_per_second": 0.118, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.0903, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.1306915283203125, | |
| "eval_runtime": 8.4328, | |
| "eval_samples_per_second": 9.368, | |
| "eval_steps_per_second": 0.119, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "learning_rate": 1.3571428571428574e-05, | |
| "loss": 0.0922, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.9873417721518988, | |
| "eval_loss": 0.08702569454908371, | |
| "eval_runtime": 8.8303, | |
| "eval_samples_per_second": 8.947, | |
| "eval_steps_per_second": 0.113, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.9240506329113924, | |
| "eval_loss": 0.20480988919734955, | |
| "eval_runtime": 8.6551, | |
| "eval_samples_per_second": 9.128, | |
| "eval_steps_per_second": 0.116, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 1.2857142857142859e-05, | |
| "loss": 0.0595, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.9620253164556962, | |
| "eval_loss": 0.12036000937223434, | |
| "eval_runtime": 8.6389, | |
| "eval_samples_per_second": 9.145, | |
| "eval_steps_per_second": 0.116, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 15.71, | |
| "learning_rate": 1.2142857142857142e-05, | |
| "loss": 0.0527, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.9367088607594937, | |
| "eval_loss": 0.2552852928638458, | |
| "eval_runtime": 8.4875, | |
| "eval_samples_per_second": 9.308, | |
| "eval_steps_per_second": 0.118, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.9367088607594937, | |
| "eval_loss": 0.16753825545310974, | |
| "eval_runtime": 8.4252, | |
| "eval_samples_per_second": 9.377, | |
| "eval_steps_per_second": 0.119, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.0477, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.9240506329113924, | |
| "eval_loss": 0.22650040686130524, | |
| "eval_runtime": 8.392, | |
| "eval_samples_per_second": 9.414, | |
| "eval_steps_per_second": 0.119, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 18.57, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 0.0411, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.9367088607594937, | |
| "eval_loss": 0.1900627613067627, | |
| "eval_runtime": 8.4937, | |
| "eval_samples_per_second": 9.301, | |
| "eval_steps_per_second": 0.118, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0299, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.9240506329113924, | |
| "eval_loss": 0.2422873079776764, | |
| "eval_runtime": 8.7448, | |
| "eval_samples_per_second": 9.034, | |
| "eval_steps_per_second": 0.114, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.9873417721518988, | |
| "eval_loss": 0.06394638121128082, | |
| "eval_runtime": 8.7204, | |
| "eval_samples_per_second": 9.059, | |
| "eval_steps_per_second": 0.115, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 21.43, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 0.0487, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.12548162043094635, | |
| "eval_runtime": 8.9171, | |
| "eval_samples_per_second": 8.859, | |
| "eval_steps_per_second": 0.112, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0359, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.12127607315778732, | |
| "eval_runtime": 8.6147, | |
| "eval_samples_per_second": 9.17, | |
| "eval_steps_per_second": 0.116, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.9746835443037974, | |
| "eval_loss": 0.07274330407381058, | |
| "eval_runtime": 8.5807, | |
| "eval_samples_per_second": 9.207, | |
| "eval_steps_per_second": 0.117, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 0.0302, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.11162865161895752, | |
| "eval_runtime": 8.6735, | |
| "eval_samples_per_second": 9.108, | |
| "eval_steps_per_second": 0.115, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.0304, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.10622164607048035, | |
| "eval_runtime": 8.8211, | |
| "eval_samples_per_second": 8.956, | |
| "eval_steps_per_second": 0.113, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_f1": 0.9240506329113924, | |
| "eval_loss": 0.20966486632823944, | |
| "eval_runtime": 8.7082, | |
| "eval_samples_per_second": 9.072, | |
| "eval_steps_per_second": 0.115, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 0.0274, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.1276017129421234, | |
| "eval_runtime": 8.676, | |
| "eval_samples_per_second": 9.106, | |
| "eval_steps_per_second": 0.115, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.0291, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.09670199453830719, | |
| "eval_runtime": 8.5086, | |
| "eval_samples_per_second": 9.285, | |
| "eval_steps_per_second": 0.118, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0202, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_f1": 0.9746835443037974, | |
| "eval_loss": 0.07649976760149002, | |
| "eval_runtime": 8.9676, | |
| "eval_samples_per_second": 8.81, | |
| "eval_steps_per_second": 0.112, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_f1": 0.9873417721518988, | |
| "eval_loss": 0.06278952211141586, | |
| "eval_runtime": 8.7232, | |
| "eval_samples_per_second": 9.056, | |
| "eval_steps_per_second": 0.115, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 31.43, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.0232, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.13882263004779816, | |
| "eval_runtime": 8.6516, | |
| "eval_samples_per_second": 9.131, | |
| "eval_steps_per_second": 0.116, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 0.0264, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.10616844147443771, | |
| "eval_runtime": 8.6952, | |
| "eval_samples_per_second": 9.085, | |
| "eval_steps_per_second": 0.115, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.1320488601922989, | |
| "eval_runtime": 8.7896, | |
| "eval_samples_per_second": 8.988, | |
| "eval_steps_per_second": 0.114, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.0219, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.1528194099664688, | |
| "eval_runtime": 8.8303, | |
| "eval_samples_per_second": 8.946, | |
| "eval_steps_per_second": 0.113, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 35.71, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.0194, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.1746273934841156, | |
| "eval_runtime": 8.7909, | |
| "eval_samples_per_second": 8.987, | |
| "eval_steps_per_second": 0.114, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.16089513897895813, | |
| "eval_runtime": 8.8367, | |
| "eval_samples_per_second": 8.94, | |
| "eval_steps_per_second": 0.113, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 37.14, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.0204, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.14817634224891663, | |
| "eval_runtime": 8.8819, | |
| "eval_samples_per_second": 8.895, | |
| "eval_steps_per_second": 0.113, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 38.57, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.0217, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.152223601937294, | |
| "eval_runtime": 8.8958, | |
| "eval_samples_per_second": 8.881, | |
| "eval_steps_per_second": 0.112, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0216, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_f1": 0.9493670886075949, | |
| "eval_loss": 0.14991530776023865, | |
| "eval_runtime": 8.6557, | |
| "eval_samples_per_second": 9.127, | |
| "eval_steps_per_second": 0.116, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "step": 280, | |
| "total_flos": 1.4091487038849024e+18, | |
| "train_loss": 0.09473916946777276, | |
| "train_runtime": 2533.8499, | |
| "train_samples_per_second": 7.041, | |
| "train_steps_per_second": 0.111 | |
| } | |
| ], | |
| "max_steps": 280, | |
| "num_train_epochs": 40, | |
| "total_flos": 1.4091487038849024e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |