| { | |
| "best_metric": 0.22444555163383484, | |
| "best_model_checkpoint": "vit-base-tarsh/checkpoint-500", | |
| "epoch": 3.937007874015748, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019606299212598428, | |
| "loss": 1.623, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001921259842519685, | |
| "loss": 1.2641, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018818897637795277, | |
| "loss": 0.9268, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.000184251968503937, | |
| "loss": 0.7366, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018031496062992125, | |
| "loss": 0.7007, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00017637795275590552, | |
| "loss": 0.557, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017244094488188977, | |
| "loss": 0.6111, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.000168503937007874, | |
| "loss": 0.6436, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016456692913385828, | |
| "loss": 0.4778, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016062992125984252, | |
| "loss": 0.4692, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.9018567639257294, | |
| "eval_loss": 0.4172811806201935, | |
| "eval_runtime": 347.1671, | |
| "eval_samples_per_second": 1.086, | |
| "eval_steps_per_second": 0.138, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001566929133858268, | |
| "loss": 0.2606, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015275590551181104, | |
| "loss": 0.2834, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014881889763779528, | |
| "loss": 0.2804, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00014488188976377955, | |
| "loss": 0.2504, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014094488188976377, | |
| "loss": 0.184, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013700787401574804, | |
| "loss": 0.213, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0001330708661417323, | |
| "loss": 0.2614, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012913385826771653, | |
| "loss": 0.2567, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0001251968503937008, | |
| "loss": 0.1094, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00012125984251968505, | |
| "loss": 0.2064, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_accuracy": 0.9257294429708223, | |
| "eval_loss": 0.29030969738960266, | |
| "eval_runtime": 349.8071, | |
| "eval_samples_per_second": 1.078, | |
| "eval_steps_per_second": 0.137, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011732283464566928, | |
| "loss": 0.1707, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011338582677165355, | |
| "loss": 0.2581, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010944881889763781, | |
| "loss": 0.0885, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00010551181102362204, | |
| "loss": 0.1996, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.0001015748031496063, | |
| "loss": 0.1614, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 9.763779527559055e-05, | |
| "loss": 0.1308, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 9.370078740157481e-05, | |
| "loss": 0.0973, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 8.976377952755905e-05, | |
| "loss": 0.0764, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.582677165354331e-05, | |
| "loss": 0.039, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 8.188976377952757e-05, | |
| "loss": 0.0748, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.9363395225464191, | |
| "eval_loss": 0.2725047767162323, | |
| "eval_runtime": 349.7967, | |
| "eval_samples_per_second": 1.078, | |
| "eval_steps_per_second": 0.137, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 7.795275590551181e-05, | |
| "loss": 0.0508, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.401574803149607e-05, | |
| "loss": 0.0781, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.007874015748031e-05, | |
| "loss": 0.1238, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 6.614173228346457e-05, | |
| "loss": 0.0506, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 6.220472440944882e-05, | |
| "loss": 0.05, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 5.826771653543307e-05, | |
| "loss": 0.0748, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 5.433070866141733e-05, | |
| "loss": 0.0257, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 5.0393700787401575e-05, | |
| "loss": 0.0861, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 4.645669291338583e-05, | |
| "loss": 0.029, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 4.251968503937008e-05, | |
| "loss": 0.0343, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_accuracy": 0.9283819628647215, | |
| "eval_loss": 0.2931118607521057, | |
| "eval_runtime": 343.908, | |
| "eval_samples_per_second": 1.096, | |
| "eval_steps_per_second": 0.14, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.858267716535433e-05, | |
| "loss": 0.0234, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.464566929133858e-05, | |
| "loss": 0.0208, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.070866141732284e-05, | |
| "loss": 0.0186, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.677165354330709e-05, | |
| "loss": 0.0398, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.283464566929134e-05, | |
| "loss": 0.0262, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.889763779527559e-05, | |
| "loss": 0.0482, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.4960629921259845e-05, | |
| "loss": 0.0191, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.1023622047244095e-05, | |
| "loss": 0.02, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 7.086614173228347e-06, | |
| "loss": 0.0501, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.1496062992125985e-06, | |
| "loss": 0.0176, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_accuracy": 0.9442970822281167, | |
| "eval_loss": 0.22444555163383484, | |
| "eval_runtime": 342.5831, | |
| "eval_samples_per_second": 1.1, | |
| "eval_steps_per_second": 0.14, | |
| "step": 500 | |
| } | |
| ], | |
| "max_steps": 508, | |
| "num_train_epochs": 4, | |
| "total_flos": 6.169413753348895e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |