| { | |
| "best_metric": 0.85, | |
| "best_model_checkpoint": "vit-base-patch16-224-RU5-40\\checkpoint-177", | |
| "epoch": 37.96610169491525, | |
| "eval_steps": 500, | |
| "global_step": 560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 1.3806, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.48333333333333334, | |
| "eval_loss": 1.3384602069854736, | |
| "eval_runtime": 1.0175, | |
| "eval_samples_per_second": 58.969, | |
| "eval_steps_per_second": 1.966, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 1.3323, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.180294156074524, | |
| "eval_runtime": 1.0059, | |
| "eval_samples_per_second": 59.648, | |
| "eval_steps_per_second": 1.988, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.981203007518797e-05, | |
| "loss": 1.2486, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.887218045112782e-05, | |
| "loss": 1.1086, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.6333333333333333, | |
| "eval_loss": 0.9834739565849304, | |
| "eval_runtime": 1.0161, | |
| "eval_samples_per_second": 59.048, | |
| "eval_steps_per_second": 1.968, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 4.793233082706767e-05, | |
| "loss": 0.927, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7166666666666667, | |
| "eval_loss": 0.8339998722076416, | |
| "eval_runtime": 1.0317, | |
| "eval_samples_per_second": 58.155, | |
| "eval_steps_per_second": 1.939, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 4.699248120300752e-05, | |
| "loss": 0.7855, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 4.605263157894737e-05, | |
| "loss": 0.6591, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 0.7166666666666667, | |
| "eval_loss": 0.7842686176300049, | |
| "eval_runtime": 0.9874, | |
| "eval_samples_per_second": 60.766, | |
| "eval_steps_per_second": 2.026, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 4.511278195488722e-05, | |
| "loss": 0.5201, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "eval_accuracy": 0.7166666666666667, | |
| "eval_loss": 0.7682653665542603, | |
| "eval_runtime": 1.07, | |
| "eval_samples_per_second": 56.076, | |
| "eval_steps_per_second": 1.869, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 4.4172932330827074e-05, | |
| "loss": 0.435, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 4.323308270676692e-05, | |
| "loss": 0.3763, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.6833333333333333, | |
| "eval_loss": 0.7880204319953918, | |
| "eval_runtime": 1.0266, | |
| "eval_samples_per_second": 58.446, | |
| "eval_steps_per_second": 1.948, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 4.229323308270677e-05, | |
| "loss": 0.26, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.6876354217529297, | |
| "eval_runtime": 1.0561, | |
| "eval_samples_per_second": 56.811, | |
| "eval_steps_per_second": 1.894, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 4.135338345864662e-05, | |
| "loss": 0.2651, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 4.041353383458647e-05, | |
| "loss": 0.2219, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.7187811136245728, | |
| "eval_runtime": 1.0257, | |
| "eval_samples_per_second": 58.496, | |
| "eval_steps_per_second": 1.95, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.2243, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_accuracy": 0.7, | |
| "eval_loss": 0.8730494379997253, | |
| "eval_runtime": 1.0464, | |
| "eval_samples_per_second": 57.337, | |
| "eval_steps_per_second": 1.911, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 3.8533834586466165e-05, | |
| "loss": 0.1972, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 3.759398496240601e-05, | |
| "loss": 0.178, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.6872482895851135, | |
| "eval_runtime": 1.0432, | |
| "eval_samples_per_second": 57.515, | |
| "eval_steps_per_second": 1.917, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 3.665413533834587e-05, | |
| "loss": 0.1944, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.85, | |
| "eval_loss": 0.6149908304214478, | |
| "eval_runtime": 1.0719, | |
| "eval_samples_per_second": 55.973, | |
| "eval_steps_per_second": 1.866, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.1628, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "learning_rate": 3.4774436090225565e-05, | |
| "loss": 0.1422, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.6831592917442322, | |
| "eval_runtime": 1.034, | |
| "eval_samples_per_second": 58.029, | |
| "eval_steps_per_second": 1.934, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 13.56, | |
| "learning_rate": 3.3834586466165414e-05, | |
| "loss": 0.1117, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.7590230107307434, | |
| "eval_runtime": 1.1059, | |
| "eval_samples_per_second": 54.254, | |
| "eval_steps_per_second": 1.808, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "learning_rate": 3.289473684210527e-05, | |
| "loss": 0.1081, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "learning_rate": 3.195488721804512e-05, | |
| "loss": 0.117, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.8428652882575989, | |
| "eval_runtime": 1.0698, | |
| "eval_samples_per_second": 56.085, | |
| "eval_steps_per_second": 1.87, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 15.59, | |
| "learning_rate": 3.1015037593984966e-05, | |
| "loss": 0.1176, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.9740858674049377, | |
| "eval_runtime": 1.0637, | |
| "eval_samples_per_second": 56.408, | |
| "eval_steps_per_second": 1.88, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 3.007518796992481e-05, | |
| "loss": 0.0974, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "learning_rate": 2.9135338345864667e-05, | |
| "loss": 0.1081, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.9106397032737732, | |
| "eval_runtime": 1.0253, | |
| "eval_samples_per_second": 58.521, | |
| "eval_steps_per_second": 1.951, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 17.63, | |
| "learning_rate": 2.8195488721804515e-05, | |
| "loss": 0.0928, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 17.97, | |
| "eval_accuracy": 0.7333333333333333, | |
| "eval_loss": 0.9178593158721924, | |
| "eval_runtime": 1.0767, | |
| "eval_samples_per_second": 55.725, | |
| "eval_steps_per_second": 1.857, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "learning_rate": 2.7255639097744363e-05, | |
| "loss": 0.0763, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.0848, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.9694532155990601, | |
| "eval_runtime": 1.0431, | |
| "eval_samples_per_second": 57.519, | |
| "eval_steps_per_second": 1.917, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "learning_rate": 2.5375939849624064e-05, | |
| "loss": 0.1045, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8805408477783203, | |
| "eval_runtime": 1.0552, | |
| "eval_samples_per_second": 56.862, | |
| "eval_steps_per_second": 1.895, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 20.34, | |
| "learning_rate": 2.443609022556391e-05, | |
| "loss": 0.1159, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.95, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.9457910060882568, | |
| "eval_runtime": 1.0314, | |
| "eval_samples_per_second": 58.171, | |
| "eval_steps_per_second": 1.939, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 21.02, | |
| "learning_rate": 2.349624060150376e-05, | |
| "loss": 0.084, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "learning_rate": 2.255639097744361e-05, | |
| "loss": 0.0748, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 21.97, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.8462807536125183, | |
| "eval_runtime": 1.0447, | |
| "eval_samples_per_second": 57.434, | |
| "eval_steps_per_second": 1.914, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "learning_rate": 2.161654135338346e-05, | |
| "loss": 0.0641, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 22.98, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8815339207649231, | |
| "eval_runtime": 1.0477, | |
| "eval_samples_per_second": 57.27, | |
| "eval_steps_per_second": 1.909, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 23.05, | |
| "learning_rate": 2.067669172932331e-05, | |
| "loss": 0.0892, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 23.73, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.0799, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.9426201581954956, | |
| "eval_runtime": 1.0401, | |
| "eval_samples_per_second": 57.685, | |
| "eval_steps_per_second": 1.923, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "learning_rate": 1.8796992481203007e-05, | |
| "loss": 0.0921, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.9211530089378357, | |
| "eval_runtime": 1.0219, | |
| "eval_samples_per_second": 58.712, | |
| "eval_steps_per_second": 1.957, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 25.08, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.0699, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 25.76, | |
| "learning_rate": 1.6917293233082707e-05, | |
| "loss": 0.0602, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.9827994704246521, | |
| "eval_runtime": 1.0441, | |
| "eval_samples_per_second": 57.465, | |
| "eval_steps_per_second": 1.916, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 26.44, | |
| "learning_rate": 1.597744360902256e-05, | |
| "loss": 0.059, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 26.98, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8860684633255005, | |
| "eval_runtime": 1.0631, | |
| "eval_samples_per_second": 56.44, | |
| "eval_steps_per_second": 1.881, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 27.12, | |
| "learning_rate": 1.5037593984962406e-05, | |
| "loss": 0.0544, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 27.8, | |
| "learning_rate": 1.4097744360902257e-05, | |
| "loss": 0.0669, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7333333333333333, | |
| "eval_loss": 0.9302221536636353, | |
| "eval_runtime": 1.0241, | |
| "eval_samples_per_second": 58.591, | |
| "eval_steps_per_second": 1.953, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 28.47, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 0.0508, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 28.95, | |
| "eval_accuracy": 0.7166666666666667, | |
| "eval_loss": 1.0306384563446045, | |
| "eval_runtime": 1.0394, | |
| "eval_samples_per_second": 57.723, | |
| "eval_steps_per_second": 1.924, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 29.15, | |
| "learning_rate": 1.2218045112781954e-05, | |
| "loss": 0.0658, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 29.83, | |
| "learning_rate": 1.1278195488721805e-05, | |
| "loss": 0.0585, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 29.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.9148640036582947, | |
| "eval_runtime": 1.0382, | |
| "eval_samples_per_second": 57.79, | |
| "eval_steps_per_second": 1.926, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 30.51, | |
| "learning_rate": 1.0338345864661655e-05, | |
| "loss": 0.0619, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.8941699862480164, | |
| "eval_runtime": 1.0414, | |
| "eval_samples_per_second": 57.616, | |
| "eval_steps_per_second": 1.921, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 31.19, | |
| "learning_rate": 9.398496240601503e-06, | |
| "loss": 0.048, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 31.86, | |
| "learning_rate": 8.458646616541353e-06, | |
| "loss": 0.0626, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7666666666666667, | |
| "eval_loss": 0.9069377779960632, | |
| "eval_runtime": 1.0588, | |
| "eval_samples_per_second": 56.665, | |
| "eval_steps_per_second": 1.889, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 32.54, | |
| "learning_rate": 7.518796992481203e-06, | |
| "loss": 0.0575, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 32.95, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8656221628189087, | |
| "eval_runtime": 1.0337, | |
| "eval_samples_per_second": 58.042, | |
| "eval_steps_per_second": 1.935, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 33.22, | |
| "learning_rate": 6.578947368421053e-06, | |
| "loss": 0.052, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 33.9, | |
| "learning_rate": 5.639097744360902e-06, | |
| "loss": 0.0483, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 33.97, | |
| "eval_accuracy": 0.8166666666666667, | |
| "eval_loss": 0.8778695464134216, | |
| "eval_runtime": 1.0357, | |
| "eval_samples_per_second": 57.935, | |
| "eval_steps_per_second": 1.931, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 34.58, | |
| "learning_rate": 4.699248120300752e-06, | |
| "loss": 0.0576, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 34.98, | |
| "eval_accuracy": 0.7833333333333333, | |
| "eval_loss": 0.9077943563461304, | |
| "eval_runtime": 1.0205, | |
| "eval_samples_per_second": 58.793, | |
| "eval_steps_per_second": 1.96, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 35.25, | |
| "learning_rate": 3.7593984962406014e-06, | |
| "loss": 0.0429, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 35.93, | |
| "learning_rate": 2.819548872180451e-06, | |
| "loss": 0.0633, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8879902958869934, | |
| "eval_runtime": 1.0817, | |
| "eval_samples_per_second": 55.469, | |
| "eval_steps_per_second": 1.849, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 36.61, | |
| "learning_rate": 1.8796992481203007e-06, | |
| "loss": 0.0511, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 36.95, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8573408722877502, | |
| "eval_runtime": 1.0249, | |
| "eval_samples_per_second": 58.542, | |
| "eval_steps_per_second": 1.951, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 37.29, | |
| "learning_rate": 9.398496240601504e-07, | |
| "loss": 0.0402, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 37.97, | |
| "learning_rate": 0.0, | |
| "loss": 0.049, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 37.97, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.8563874363899231, | |
| "eval_runtime": 1.0455, | |
| "eval_samples_per_second": 57.387, | |
| "eval_steps_per_second": 1.913, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 37.97, | |
| "step": 560, | |
| "total_flos": 5.526052834168259e+18, | |
| "train_loss": 0.23852362962705748, | |
| "train_runtime": 1111.7542, | |
| "train_samples_per_second": 67.569, | |
| "train_steps_per_second": 0.504 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 5.526052834168259e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |