{ "best_metric": 0.85, "best_model_checkpoint": "vit-base-patch16-224-RU5-40\\checkpoint-177", "epoch": 37.96610169491525, "eval_steps": 500, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.68, "learning_rate": 1.785714285714286e-05, "loss": 1.3806, "step": 10 }, { "epoch": 0.95, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.3384602069854736, "eval_runtime": 1.0175, "eval_samples_per_second": 58.969, "eval_steps_per_second": 1.966, "step": 14 }, { "epoch": 1.36, "learning_rate": 3.571428571428572e-05, "loss": 1.3323, "step": 20 }, { "epoch": 1.97, "eval_accuracy": 0.6, "eval_loss": 1.180294156074524, "eval_runtime": 1.0059, "eval_samples_per_second": 59.648, "eval_steps_per_second": 1.988, "step": 29 }, { "epoch": 2.03, "learning_rate": 4.981203007518797e-05, "loss": 1.2486, "step": 30 }, { "epoch": 2.71, "learning_rate": 4.887218045112782e-05, "loss": 1.1086, "step": 40 }, { "epoch": 2.98, "eval_accuracy": 0.6333333333333333, "eval_loss": 0.9834739565849304, "eval_runtime": 1.0161, "eval_samples_per_second": 59.048, "eval_steps_per_second": 1.968, "step": 44 }, { "epoch": 3.39, "learning_rate": 4.793233082706767e-05, "loss": 0.927, "step": 50 }, { "epoch": 4.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.8339998722076416, "eval_runtime": 1.0317, "eval_samples_per_second": 58.155, "eval_steps_per_second": 1.939, "step": 59 }, { "epoch": 4.07, "learning_rate": 4.699248120300752e-05, "loss": 0.7855, "step": 60 }, { "epoch": 4.75, "learning_rate": 4.605263157894737e-05, "loss": 0.6591, "step": 70 }, { "epoch": 4.95, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7842686176300049, "eval_runtime": 0.9874, "eval_samples_per_second": 60.766, "eval_steps_per_second": 2.026, "step": 73 }, { "epoch": 5.42, "learning_rate": 4.511278195488722e-05, "loss": 0.5201, "step": 80 }, { "epoch": 5.97, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7682653665542603, "eval_runtime": 1.07, "eval_samples_per_second": 56.076, "eval_steps_per_second": 1.869, "step": 88 }, { "epoch": 6.1, "learning_rate": 4.4172932330827074e-05, "loss": 0.435, "step": 90 }, { "epoch": 6.78, "learning_rate": 4.323308270676692e-05, "loss": 0.3763, "step": 100 }, { "epoch": 6.98, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.7880204319953918, "eval_runtime": 1.0266, "eval_samples_per_second": 58.446, "eval_steps_per_second": 1.948, "step": 103 }, { "epoch": 7.46, "learning_rate": 4.229323308270677e-05, "loss": 0.26, "step": 110 }, { "epoch": 8.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.6876354217529297, "eval_runtime": 1.0561, "eval_samples_per_second": 56.811, "eval_steps_per_second": 1.894, "step": 118 }, { "epoch": 8.14, "learning_rate": 4.135338345864662e-05, "loss": 0.2651, "step": 120 }, { "epoch": 8.81, "learning_rate": 4.041353383458647e-05, "loss": 0.2219, "step": 130 }, { "epoch": 8.95, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.7187811136245728, "eval_runtime": 1.0257, "eval_samples_per_second": 58.496, "eval_steps_per_second": 1.95, "step": 132 }, { "epoch": 9.49, "learning_rate": 3.9473684210526316e-05, "loss": 0.2243, "step": 140 }, { "epoch": 9.97, "eval_accuracy": 0.7, "eval_loss": 0.8730494379997253, "eval_runtime": 1.0464, "eval_samples_per_second": 57.337, "eval_steps_per_second": 1.911, "step": 147 }, { "epoch": 10.17, "learning_rate": 3.8533834586466165e-05, "loss": 0.1972, "step": 150 }, { "epoch": 10.85, "learning_rate": 3.759398496240601e-05, "loss": 0.178, "step": 160 }, { "epoch": 10.98, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.6872482895851135, "eval_runtime": 1.0432, "eval_samples_per_second": 57.515, "eval_steps_per_second": 1.917, "step": 162 }, { "epoch": 11.53, "learning_rate": 3.665413533834587e-05, "loss": 0.1944, "step": 170 }, { "epoch": 12.0, "eval_accuracy": 0.85, "eval_loss": 0.6149908304214478, "eval_runtime": 1.0719, "eval_samples_per_second": 55.973, "eval_steps_per_second": 1.866, "step": 177 }, { "epoch": 12.2, "learning_rate": 3.571428571428572e-05, "loss": 0.1628, "step": 180 }, { "epoch": 12.88, "learning_rate": 3.4774436090225565e-05, "loss": 0.1422, "step": 190 }, { "epoch": 12.95, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.6831592917442322, "eval_runtime": 1.034, "eval_samples_per_second": 58.029, "eval_steps_per_second": 1.934, "step": 191 }, { "epoch": 13.56, "learning_rate": 3.3834586466165414e-05, "loss": 0.1117, "step": 200 }, { "epoch": 13.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.7590230107307434, "eval_runtime": 1.1059, "eval_samples_per_second": 54.254, "eval_steps_per_second": 1.808, "step": 206 }, { "epoch": 14.24, "learning_rate": 3.289473684210527e-05, "loss": 0.1081, "step": 210 }, { "epoch": 14.92, "learning_rate": 3.195488721804512e-05, "loss": 0.117, "step": 220 }, { "epoch": 14.98, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.8428652882575989, "eval_runtime": 1.0698, "eval_samples_per_second": 56.085, "eval_steps_per_second": 1.87, "step": 221 }, { "epoch": 15.59, "learning_rate": 3.1015037593984966e-05, "loss": 0.1176, "step": 230 }, { "epoch": 16.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9740858674049377, "eval_runtime": 1.0637, "eval_samples_per_second": 56.408, "eval_steps_per_second": 1.88, "step": 236 }, { "epoch": 16.27, "learning_rate": 3.007518796992481e-05, "loss": 0.0974, "step": 240 }, { "epoch": 16.95, "learning_rate": 2.9135338345864667e-05, "loss": 0.1081, "step": 250 }, { "epoch": 16.95, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9106397032737732, "eval_runtime": 1.0253, "eval_samples_per_second": 58.521, "eval_steps_per_second": 1.951, "step": 250 }, { "epoch": 17.63, "learning_rate": 2.8195488721804515e-05, "loss": 0.0928, "step": 260 }, { "epoch": 17.97, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.9178593158721924, "eval_runtime": 1.0767, "eval_samples_per_second": 55.725, "eval_steps_per_second": 1.857, "step": 265 }, { "epoch": 18.31, "learning_rate": 2.7255639097744363e-05, "loss": 0.0763, "step": 270 }, { "epoch": 18.98, "learning_rate": 2.6315789473684212e-05, "loss": 0.0848, "step": 280 }, { "epoch": 18.98, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9694532155990601, "eval_runtime": 1.0431, "eval_samples_per_second": 57.519, "eval_steps_per_second": 1.917, "step": 280 }, { "epoch": 19.66, "learning_rate": 2.5375939849624064e-05, "loss": 0.1045, "step": 290 }, { "epoch": 20.0, "eval_accuracy": 0.8, "eval_loss": 0.8805408477783203, "eval_runtime": 1.0552, "eval_samples_per_second": 56.862, "eval_steps_per_second": 1.895, "step": 295 }, { "epoch": 20.34, "learning_rate": 2.443609022556391e-05, "loss": 0.1159, "step": 300 }, { "epoch": 20.95, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9457910060882568, "eval_runtime": 1.0314, "eval_samples_per_second": 58.171, "eval_steps_per_second": 1.939, "step": 309 }, { "epoch": 21.02, "learning_rate": 2.349624060150376e-05, "loss": 0.084, "step": 310 }, { "epoch": 21.69, "learning_rate": 2.255639097744361e-05, "loss": 0.0748, "step": 320 }, { "epoch": 21.97, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.8462807536125183, "eval_runtime": 1.0447, "eval_samples_per_second": 57.434, "eval_steps_per_second": 1.914, "step": 324 }, { "epoch": 22.37, "learning_rate": 2.161654135338346e-05, "loss": 0.0641, "step": 330 }, { "epoch": 22.98, "eval_accuracy": 0.8, "eval_loss": 0.8815339207649231, "eval_runtime": 1.0477, "eval_samples_per_second": 57.27, "eval_steps_per_second": 1.909, "step": 339 }, { "epoch": 23.05, "learning_rate": 2.067669172932331e-05, "loss": 0.0892, "step": 340 }, { "epoch": 23.73, "learning_rate": 1.9736842105263158e-05, "loss": 0.0799, "step": 350 }, { "epoch": 24.0, "eval_accuracy": 0.75, "eval_loss": 0.9426201581954956, "eval_runtime": 1.0401, "eval_samples_per_second": 57.685, "eval_steps_per_second": 1.923, "step": 354 }, { "epoch": 24.41, "learning_rate": 1.8796992481203007e-05, "loss": 0.0921, "step": 360 }, { "epoch": 24.95, "eval_accuracy": 0.75, "eval_loss": 0.9211530089378357, "eval_runtime": 1.0219, "eval_samples_per_second": 58.712, "eval_steps_per_second": 1.957, "step": 368 }, { "epoch": 25.08, "learning_rate": 1.785714285714286e-05, "loss": 0.0699, "step": 370 }, { "epoch": 25.76, "learning_rate": 1.6917293233082707e-05, "loss": 0.0602, "step": 380 }, { "epoch": 25.97, "eval_accuracy": 0.75, "eval_loss": 0.9827994704246521, "eval_runtime": 1.0441, "eval_samples_per_second": 57.465, "eval_steps_per_second": 1.916, "step": 383 }, { "epoch": 26.44, "learning_rate": 1.597744360902256e-05, "loss": 0.059, "step": 390 }, { "epoch": 26.98, "eval_accuracy": 0.8, "eval_loss": 0.8860684633255005, "eval_runtime": 1.0631, "eval_samples_per_second": 56.44, "eval_steps_per_second": 1.881, "step": 398 }, { "epoch": 27.12, "learning_rate": 1.5037593984962406e-05, "loss": 0.0544, "step": 400 }, { "epoch": 27.8, "learning_rate": 1.4097744360902257e-05, "loss": 0.0669, "step": 410 }, { "epoch": 28.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.9302221536636353, "eval_runtime": 1.0241, "eval_samples_per_second": 58.591, "eval_steps_per_second": 1.953, "step": 413 }, { "epoch": 28.47, "learning_rate": 1.3157894736842106e-05, "loss": 0.0508, "step": 420 }, { "epoch": 28.95, "eval_accuracy": 0.7166666666666667, "eval_loss": 1.0306384563446045, "eval_runtime": 1.0394, "eval_samples_per_second": 57.723, "eval_steps_per_second": 1.924, "step": 427 }, { "epoch": 29.15, "learning_rate": 1.2218045112781954e-05, "loss": 0.0658, "step": 430 }, { "epoch": 29.83, "learning_rate": 1.1278195488721805e-05, "loss": 0.0585, "step": 440 }, { "epoch": 29.97, "eval_accuracy": 0.75, "eval_loss": 0.9148640036582947, "eval_runtime": 1.0382, "eval_samples_per_second": 57.79, "eval_steps_per_second": 1.926, "step": 442 }, { "epoch": 30.51, "learning_rate": 1.0338345864661655e-05, "loss": 0.0619, "step": 450 }, { "epoch": 30.98, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8941699862480164, "eval_runtime": 1.0414, "eval_samples_per_second": 57.616, "eval_steps_per_second": 1.921, "step": 457 }, { "epoch": 31.19, "learning_rate": 9.398496240601503e-06, "loss": 0.048, "step": 460 }, { "epoch": 31.86, "learning_rate": 8.458646616541353e-06, "loss": 0.0626, "step": 470 }, { "epoch": 32.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9069377779960632, "eval_runtime": 1.0588, "eval_samples_per_second": 56.665, "eval_steps_per_second": 1.889, "step": 472 }, { "epoch": 32.54, "learning_rate": 7.518796992481203e-06, "loss": 0.0575, "step": 480 }, { "epoch": 32.95, "eval_accuracy": 0.8, "eval_loss": 0.8656221628189087, "eval_runtime": 1.0337, "eval_samples_per_second": 58.042, "eval_steps_per_second": 1.935, "step": 486 }, { "epoch": 33.22, "learning_rate": 6.578947368421053e-06, "loss": 0.052, "step": 490 }, { "epoch": 33.9, "learning_rate": 5.639097744360902e-06, "loss": 0.0483, "step": 500 }, { "epoch": 33.97, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8778695464134216, "eval_runtime": 1.0357, "eval_samples_per_second": 57.935, "eval_steps_per_second": 1.931, "step": 501 }, { "epoch": 34.58, "learning_rate": 4.699248120300752e-06, "loss": 0.0576, "step": 510 }, { "epoch": 34.98, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9077943563461304, "eval_runtime": 1.0205, "eval_samples_per_second": 58.793, "eval_steps_per_second": 1.96, "step": 516 }, { "epoch": 35.25, "learning_rate": 3.7593984962406014e-06, "loss": 0.0429, "step": 520 }, { "epoch": 35.93, "learning_rate": 2.819548872180451e-06, "loss": 0.0633, "step": 530 }, { "epoch": 36.0, "eval_accuracy": 0.8, "eval_loss": 0.8879902958869934, "eval_runtime": 1.0817, "eval_samples_per_second": 55.469, "eval_steps_per_second": 1.849, "step": 531 }, { "epoch": 36.61, "learning_rate": 1.8796992481203007e-06, "loss": 0.0511, "step": 540 }, { "epoch": 36.95, "eval_accuracy": 0.8, "eval_loss": 0.8573408722877502, "eval_runtime": 1.0249, "eval_samples_per_second": 58.542, "eval_steps_per_second": 1.951, "step": 545 }, { "epoch": 37.29, "learning_rate": 9.398496240601504e-07, "loss": 0.0402, "step": 550 }, { "epoch": 37.97, "learning_rate": 0.0, "loss": 0.049, "step": 560 }, { "epoch": 37.97, "eval_accuracy": 0.8, "eval_loss": 0.8563874363899231, "eval_runtime": 1.0455, "eval_samples_per_second": 57.387, "eval_steps_per_second": 1.913, "step": 560 }, { "epoch": 37.97, "step": 560, "total_flos": 5.526052834168259e+18, "train_loss": 0.23852362962705748, "train_runtime": 1111.7542, "train_samples_per_second": 67.569, "train_steps_per_second": 0.504 } ], "logging_steps": 10, "max_steps": 560, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 5.526052834168259e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }