| { |
| "best_metric": 0.41899441340782123, |
| "best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-128", |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.0416666666666668e-05, |
| "loss": 1.6, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.29608938547486036, |
| "eval_loss": 1.5315604209899902, |
| "eval_runtime": 6.8342, |
| "eval_samples_per_second": 52.384, |
| "eval_steps_per_second": 1.756, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.0833333333333336e-05, |
| "loss": 1.535, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 3.125e-05, |
| "loss": 1.5084, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.2849162011173184, |
| "eval_loss": 1.5060781240463257, |
| "eval_runtime": 7.4696, |
| "eval_samples_per_second": 47.928, |
| "eval_steps_per_second": 1.607, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 1.5134, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.3240223463687151, |
| "eval_loss": 1.4968407154083252, |
| "eval_runtime": 7.1172, |
| "eval_samples_per_second": 50.301, |
| "eval_steps_per_second": 1.686, |
| "step": 48 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 4.976851851851852e-05, |
| "loss": 1.4694, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 4.8611111111111115e-05, |
| "loss": 1.4663, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.33519553072625696, |
| "eval_loss": 1.4607384204864502, |
| "eval_runtime": 7.4013, |
| "eval_samples_per_second": 48.37, |
| "eval_steps_per_second": 1.621, |
| "step": 64 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 4.745370370370371e-05, |
| "loss": 1.4375, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 4.62962962962963e-05, |
| "loss": 1.4046, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.3268156424581006, |
| "eval_loss": 1.4509011507034302, |
| "eval_runtime": 6.83, |
| "eval_samples_per_second": 52.416, |
| "eval_steps_per_second": 1.757, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 4.5138888888888894e-05, |
| "loss": 1.4085, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.388268156424581, |
| "eval_loss": 1.4423185586929321, |
| "eval_runtime": 7.3683, |
| "eval_samples_per_second": 48.587, |
| "eval_steps_per_second": 1.629, |
| "step": 96 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 4.3981481481481486e-05, |
| "loss": 1.3765, |
| "step": 100 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 4.282407407407408e-05, |
| "loss": 1.3443, |
| "step": 110 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.4022346368715084, |
| "eval_loss": 1.400512456893921, |
| "eval_runtime": 6.9156, |
| "eval_samples_per_second": 51.767, |
| "eval_steps_per_second": 1.735, |
| "step": 112 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 1.3025, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.41899441340782123, |
| "eval_loss": 1.359870195388794, |
| "eval_runtime": 7.041, |
| "eval_samples_per_second": 50.845, |
| "eval_steps_per_second": 1.704, |
| "step": 128 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 4.0509259259259265e-05, |
| "loss": 1.2668, |
| "step": 130 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 3.935185185185186e-05, |
| "loss": 1.2627, |
| "step": 140 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.39106145251396646, |
| "eval_loss": 1.3638169765472412, |
| "eval_runtime": 7.2532, |
| "eval_samples_per_second": 49.357, |
| "eval_steps_per_second": 1.654, |
| "step": 144 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 3.8194444444444444e-05, |
| "loss": 1.2427, |
| "step": 150 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 3.7037037037037037e-05, |
| "loss": 1.2099, |
| "step": 160 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.34916201117318435, |
| "eval_loss": 1.4057648181915283, |
| "eval_runtime": 7.508, |
| "eval_samples_per_second": 47.682, |
| "eval_steps_per_second": 1.598, |
| "step": 160 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 3.587962962962963e-05, |
| "loss": 1.2086, |
| "step": 170 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.3407821229050279, |
| "eval_loss": 1.443146824836731, |
| "eval_runtime": 7.4283, |
| "eval_samples_per_second": 48.194, |
| "eval_steps_per_second": 1.615, |
| "step": 176 |
| }, |
| { |
| "epoch": 11.25, |
| "learning_rate": 3.472222222222222e-05, |
| "loss": 1.1684, |
| "step": 180 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 3.3564814814814815e-05, |
| "loss": 1.1393, |
| "step": 190 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.34916201117318435, |
| "eval_loss": 1.4143450260162354, |
| "eval_runtime": 7.1396, |
| "eval_samples_per_second": 50.143, |
| "eval_steps_per_second": 1.681, |
| "step": 192 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 3.240740740740741e-05, |
| "loss": 1.1039, |
| "step": 200 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.388268156424581, |
| "eval_loss": 1.4305065870285034, |
| "eval_runtime": 6.8442, |
| "eval_samples_per_second": 52.307, |
| "eval_steps_per_second": 1.753, |
| "step": 208 |
| }, |
| { |
| "epoch": 13.12, |
| "learning_rate": 3.125e-05, |
| "loss": 1.0641, |
| "step": 210 |
| }, |
| { |
| "epoch": 13.75, |
| "learning_rate": 3.0092592592592593e-05, |
| "loss": 1.0551, |
| "step": 220 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.35195530726256985, |
| "eval_loss": 1.5202596187591553, |
| "eval_runtime": 7.2925, |
| "eval_samples_per_second": 49.091, |
| "eval_steps_per_second": 1.646, |
| "step": 224 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 2.8935185185185186e-05, |
| "loss": 1.0686, |
| "step": 230 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 1.0368, |
| "step": 240 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.3324022346368715, |
| "eval_loss": 1.5117393732070923, |
| "eval_runtime": 6.8377, |
| "eval_samples_per_second": 52.356, |
| "eval_steps_per_second": 1.755, |
| "step": 240 |
| }, |
| { |
| "epoch": 15.62, |
| "learning_rate": 2.6620370370370372e-05, |
| "loss": 0.9753, |
| "step": 250 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.3770949720670391, |
| "eval_loss": 1.4545259475708008, |
| "eval_runtime": 7.0796, |
| "eval_samples_per_second": 50.568, |
| "eval_steps_per_second": 1.695, |
| "step": 256 |
| }, |
| { |
| "epoch": 16.25, |
| "learning_rate": 2.5462962962962965e-05, |
| "loss": 0.9677, |
| "step": 260 |
| }, |
| { |
| "epoch": 16.88, |
| "learning_rate": 2.4305555555555558e-05, |
| "loss": 0.938, |
| "step": 270 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.33519553072625696, |
| "eval_loss": 1.5396308898925781, |
| "eval_runtime": 7.1168, |
| "eval_samples_per_second": 50.304, |
| "eval_steps_per_second": 1.686, |
| "step": 272 |
| }, |
| { |
| "epoch": 17.5, |
| "learning_rate": 2.314814814814815e-05, |
| "loss": 0.899, |
| "step": 280 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.3407821229050279, |
| "eval_loss": 1.5770219564437866, |
| "eval_runtime": 6.8822, |
| "eval_samples_per_second": 52.018, |
| "eval_steps_per_second": 1.744, |
| "step": 288 |
| }, |
| { |
| "epoch": 18.12, |
| "learning_rate": 2.1990740740740743e-05, |
| "loss": 0.9047, |
| "step": 290 |
| }, |
| { |
| "epoch": 18.75, |
| "learning_rate": 2.0833333333333336e-05, |
| "loss": 0.8629, |
| "step": 300 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.3128491620111732, |
| "eval_loss": 1.7105906009674072, |
| "eval_runtime": 7.3502, |
| "eval_samples_per_second": 48.706, |
| "eval_steps_per_second": 1.633, |
| "step": 304 |
| }, |
| { |
| "epoch": 19.38, |
| "learning_rate": 1.967592592592593e-05, |
| "loss": 0.8624, |
| "step": 310 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 1.8518518518518518e-05, |
| "loss": 0.8674, |
| "step": 320 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.33519553072625696, |
| "eval_loss": 1.5864217281341553, |
| "eval_runtime": 7.1963, |
| "eval_samples_per_second": 49.748, |
| "eval_steps_per_second": 1.668, |
| "step": 320 |
| }, |
| { |
| "epoch": 20.62, |
| "learning_rate": 1.736111111111111e-05, |
| "loss": 0.7789, |
| "step": 330 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.3407821229050279, |
| "eval_loss": 1.6129050254821777, |
| "eval_runtime": 7.1496, |
| "eval_samples_per_second": 50.073, |
| "eval_steps_per_second": 1.678, |
| "step": 336 |
| }, |
| { |
| "epoch": 21.25, |
| "learning_rate": 1.6203703703703704e-05, |
| "loss": 0.8161, |
| "step": 340 |
| }, |
| { |
| "epoch": 21.88, |
| "learning_rate": 1.5046296296296297e-05, |
| "loss": 0.7426, |
| "step": 350 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.36033519553072624, |
| "eval_loss": 1.6353477239608765, |
| "eval_runtime": 7.4456, |
| "eval_samples_per_second": 48.082, |
| "eval_steps_per_second": 1.612, |
| "step": 352 |
| }, |
| { |
| "epoch": 22.5, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 0.7677, |
| "step": 360 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.3463687150837989, |
| "eval_loss": 1.6793445348739624, |
| "eval_runtime": 6.994, |
| "eval_samples_per_second": 51.187, |
| "eval_steps_per_second": 1.716, |
| "step": 368 |
| }, |
| { |
| "epoch": 23.12, |
| "learning_rate": 1.2731481481481482e-05, |
| "loss": 0.7327, |
| "step": 370 |
| }, |
| { |
| "epoch": 23.75, |
| "learning_rate": 1.1574074074074075e-05, |
| "loss": 0.7172, |
| "step": 380 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.3575418994413408, |
| "eval_loss": 1.6759321689605713, |
| "eval_runtime": 7.4394, |
| "eval_samples_per_second": 48.122, |
| "eval_steps_per_second": 1.613, |
| "step": 384 |
| }, |
| { |
| "epoch": 24.38, |
| "learning_rate": 1.0416666666666668e-05, |
| "loss": 0.6759, |
| "step": 390 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 9.259259259259259e-06, |
| "loss": 0.6809, |
| "step": 400 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.3659217877094972, |
| "eval_loss": 1.701292634010315, |
| "eval_runtime": 7.4138, |
| "eval_samples_per_second": 48.288, |
| "eval_steps_per_second": 1.619, |
| "step": 400 |
| }, |
| { |
| "epoch": 25.62, |
| "learning_rate": 8.101851851851852e-06, |
| "loss": 0.6619, |
| "step": 410 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.36312849162011174, |
| "eval_loss": 1.7108293771743774, |
| "eval_runtime": 7.238, |
| "eval_samples_per_second": 49.461, |
| "eval_steps_per_second": 1.658, |
| "step": 416 |
| }, |
| { |
| "epoch": 26.25, |
| "learning_rate": 6.944444444444445e-06, |
| "loss": 0.6773, |
| "step": 420 |
| }, |
| { |
| "epoch": 26.88, |
| "learning_rate": 5.787037037037038e-06, |
| "loss": 0.6656, |
| "step": 430 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.3715083798882682, |
| "eval_loss": 1.7327028512954712, |
| "eval_runtime": 6.8416, |
| "eval_samples_per_second": 52.327, |
| "eval_steps_per_second": 1.754, |
| "step": 432 |
| }, |
| { |
| "epoch": 27.5, |
| "learning_rate": 4.6296296296296296e-06, |
| "loss": 0.6258, |
| "step": 440 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.3547486033519553, |
| "eval_loss": 1.7377949953079224, |
| "eval_runtime": 7.2785, |
| "eval_samples_per_second": 49.186, |
| "eval_steps_per_second": 1.649, |
| "step": 448 |
| }, |
| { |
| "epoch": 28.12, |
| "learning_rate": 3.4722222222222224e-06, |
| "loss": 0.6646, |
| "step": 450 |
| }, |
| { |
| "epoch": 28.75, |
| "learning_rate": 2.3148148148148148e-06, |
| "loss": 0.6173, |
| "step": 460 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.36033519553072624, |
| "eval_loss": 1.7461235523223877, |
| "eval_runtime": 6.8622, |
| "eval_samples_per_second": 52.17, |
| "eval_steps_per_second": 1.749, |
| "step": 464 |
| }, |
| { |
| "epoch": 29.38, |
| "learning_rate": 1.1574074074074074e-06, |
| "loss": 0.6482, |
| "step": 470 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 0.0, |
| "loss": 0.6214, |
| "step": 480 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.35195530726256985, |
| "eval_loss": 1.7475444078445435, |
| "eval_runtime": 7.4355, |
| "eval_samples_per_second": 48.148, |
| "eval_steps_per_second": 1.614, |
| "step": 480 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 480, |
| "total_flos": 4.840276186658304e+18, |
| "train_loss": 1.0325976332028708, |
| "train_runtime": 3493.542, |
| "train_samples_per_second": 17.492, |
| "train_steps_per_second": 0.137 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 480, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "total_flos": 4.840276186658304e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|