| { | |
| "best_metric": 0.022612320259213448, | |
| "best_model_checkpoint": "./vit-base-beans/checkpoint-350", | |
| "epoch": 4.0, | |
| "global_step": 520, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001980769230769231, | |
| "loss": 1.0645, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019615384615384615, | |
| "loss": 0.9394, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019423076923076924, | |
| "loss": 0.5477, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019230769230769233, | |
| "loss": 0.3987, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019038461538461538, | |
| "loss": 0.4813, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018846153846153847, | |
| "loss": 0.3066, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018653846153846154, | |
| "loss": 0.3715, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 0.3459, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001826923076923077, | |
| "loss": 0.2347, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018076923076923077, | |
| "loss": 0.3365, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.9323308270676691, | |
| "eval_loss": 0.24552951753139496, | |
| "eval_runtime": 1.1355, | |
| "eval_samples_per_second": 117.125, | |
| "eval_steps_per_second": 14.971, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017884615384615386, | |
| "loss": 0.3685, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017692307692307693, | |
| "loss": 0.457, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.000175, | |
| "loss": 0.3547, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001730769230769231, | |
| "loss": 0.232, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017115384615384616, | |
| "loss": 0.1802, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 0.2408, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016730769230769232, | |
| "loss": 0.3017, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001653846153846154, | |
| "loss": 0.1542, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016346153846153846, | |
| "loss": 0.2903, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00016153846153846155, | |
| "loss": 0.1728, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.9548872180451128, | |
| "eval_loss": 0.15442077815532684, | |
| "eval_runtime": 1.0999, | |
| "eval_samples_per_second": 120.916, | |
| "eval_steps_per_second": 15.455, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015961538461538462, | |
| "loss": 0.3258, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001576923076923077, | |
| "loss": 0.2786, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015576923076923078, | |
| "loss": 0.0742, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 0.1314, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015192307692307692, | |
| "loss": 0.4592, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.0886, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014807692307692308, | |
| "loss": 0.6189, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014615384615384615, | |
| "loss": 0.238, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014423076923076924, | |
| "loss": 0.3974, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0001423076923076923, | |
| "loss": 0.1519, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_accuracy": 0.9624060150375939, | |
| "eval_loss": 0.10723142325878143, | |
| "eval_runtime": 1.0943, | |
| "eval_samples_per_second": 121.537, | |
| "eval_steps_per_second": 15.535, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014038461538461538, | |
| "loss": 0.1581, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 0.0366, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00013653846153846154, | |
| "loss": 0.0276, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00013461538461538464, | |
| "loss": 0.131, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0001326923076923077, | |
| "loss": 0.0184, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013076923076923077, | |
| "loss": 0.0916, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012884615384615387, | |
| "loss": 0.2499, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00012692307692307693, | |
| "loss": 0.1041, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.000125, | |
| "loss": 0.0679, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 0.0209, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.9624060150375939, | |
| "eval_loss": 0.15938644111156464, | |
| "eval_runtime": 1.0964, | |
| "eval_samples_per_second": 121.302, | |
| "eval_steps_per_second": 15.505, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012115384615384615, | |
| "loss": 0.2929, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011961538461538462, | |
| "loss": 0.1205, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0001176923076923077, | |
| "loss": 0.0196, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011576923076923079, | |
| "loss": 0.0352, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011384615384615384, | |
| "loss": 0.095, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011192307692307694, | |
| "loss": 0.0972, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00011000000000000002, | |
| "loss": 0.1027, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00010807692307692307, | |
| "loss": 0.217, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00010615384615384615, | |
| "loss": 0.1365, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00010423076923076925, | |
| "loss": 0.0206, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.09133641421794891, | |
| "eval_runtime": 1.1116, | |
| "eval_samples_per_second": 119.647, | |
| "eval_steps_per_second": 15.293, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0001023076923076923, | |
| "loss": 0.1112, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00010038461538461538, | |
| "loss": 0.1386, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.846153846153848e-05, | |
| "loss": 0.102, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 9.653846153846155e-05, | |
| "loss": 0.1268, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.461538461538461e-05, | |
| "loss": 0.0212, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 9.26923076923077e-05, | |
| "loss": 0.0114, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 9.076923076923078e-05, | |
| "loss": 0.0115, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 8.884615384615384e-05, | |
| "loss": 0.0106, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.692307692307692e-05, | |
| "loss": 0.0121, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 8.5e-05, | |
| "loss": 0.0135, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.9624060150375939, | |
| "eval_loss": 0.14881500601768494, | |
| "eval_runtime": 4.4199, | |
| "eval_samples_per_second": 30.091, | |
| "eval_steps_per_second": 3.846, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 8.307692307692309e-05, | |
| "loss": 0.0089, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 8.115384615384616e-05, | |
| "loss": 0.0093, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 7.923076923076924e-05, | |
| "loss": 0.1047, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.730769230769232e-05, | |
| "loss": 0.1241, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 7.538461538461539e-05, | |
| "loss": 0.0661, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.346153846153847e-05, | |
| "loss": 0.0251, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.153846153846155e-05, | |
| "loss": 0.0217, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.961538461538462e-05, | |
| "loss": 0.0088, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.76923076923077e-05, | |
| "loss": 0.088, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 6.576923076923078e-05, | |
| "loss": 0.0079, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_accuracy": 0.9924812030075187, | |
| "eval_loss": 0.022612320259213448, | |
| "eval_runtime": 1.0945, | |
| "eval_samples_per_second": 121.517, | |
| "eval_steps_per_second": 15.532, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 6.384615384615385e-05, | |
| "loss": 0.0098, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 6.192307692307693e-05, | |
| "loss": 0.1305, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 6e-05, | |
| "loss": 0.1144, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 5.807692307692308e-05, | |
| "loss": 0.0102, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 5.615384615384616e-05, | |
| "loss": 0.0096, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 5.423076923076923e-05, | |
| "loss": 0.0083, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5.230769230769231e-05, | |
| "loss": 0.0098, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 5.038461538461539e-05, | |
| "loss": 0.013, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4.846153846153846e-05, | |
| "loss": 0.0087, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 4.653846153846154e-05, | |
| "loss": 0.0074, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_accuracy": 0.9924812030075187, | |
| "eval_loss": 0.05820807069540024, | |
| "eval_runtime": 1.1545, | |
| "eval_samples_per_second": 115.2, | |
| "eval_steps_per_second": 14.725, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 4.461538461538462e-05, | |
| "loss": 0.007, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 4.269230769230769e-05, | |
| "loss": 0.0071, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 4.0769230769230773e-05, | |
| "loss": 0.0068, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.884615384615385e-05, | |
| "loss": 0.0073, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.692307692307693e-05, | |
| "loss": 0.0073, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0066, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.307692307692308e-05, | |
| "loss": 0.0066, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.115384615384615e-05, | |
| "loss": 0.0108, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.9230769230769234e-05, | |
| "loss": 0.0064, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.7307692307692305e-05, | |
| "loss": 0.0064, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "eval_accuracy": 0.9774436090225563, | |
| "eval_loss": 0.09841261804103851, | |
| "eval_runtime": 1.0924, | |
| "eval_samples_per_second": 121.749, | |
| "eval_steps_per_second": 15.562, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.5384615384615383e-05, | |
| "loss": 0.0063, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.3461538461538464e-05, | |
| "loss": 0.0065, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.1538461538461542e-05, | |
| "loss": 0.0061, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.9615384615384617e-05, | |
| "loss": 0.0063, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.7692307692307694e-05, | |
| "loss": 0.0067, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.576923076923077e-05, | |
| "loss": 0.0066, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1.3846153846153847e-05, | |
| "loss": 0.0067, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.1923076923076925e-05, | |
| "loss": 0.0062, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0067, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 8.076923076923077e-06, | |
| "loss": 0.0061, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "eval_accuracy": 0.9699248120300752, | |
| "eval_loss": 0.11507026106119156, | |
| "eval_runtime": 4.2976, | |
| "eval_samples_per_second": 30.948, | |
| "eval_steps_per_second": 3.956, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 0.0064, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 0.0067, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.0072, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 0.006, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 520, | |
| "total_flos": 3.205097416476426e+17, | |
| "train_loss": 0.1355182125018193, | |
| "train_runtime": 149.664, | |
| "train_samples_per_second": 27.635, | |
| "train_steps_per_second": 3.474 | |
| } | |
| ], | |
| "max_steps": 520, | |
| "num_train_epochs": 4, | |
| "total_flos": 3.205097416476426e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |