{ "best_metric": 0.8431372549019608, "best_model_checkpoint": "vit-base-patch16-224-RXL1-24\\checkpoint-96", "epoch": 22.69090909090909, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.73, "learning_rate": 3.4375e-05, "loss": 1.3745, "step": 10 }, { "epoch": 0.95, "eval_accuracy": 0.47058823529411764, "eval_loss": 1.3056340217590332, "eval_runtime": 0.7424, "eval_samples_per_second": 68.693, "eval_steps_per_second": 2.694, "step": 13 }, { "epoch": 1.45, "learning_rate": 5.4256756756756764e-05, "loss": 1.2896, "step": 20 }, { "epoch": 1.96, "eval_accuracy": 0.6470588235294118, "eval_loss": 1.103859543800354, "eval_runtime": 0.7848, "eval_samples_per_second": 64.988, "eval_steps_per_second": 2.549, "step": 27 }, { "epoch": 2.18, "learning_rate": 5.239864864864865e-05, "loss": 1.1563, "step": 30 }, { "epoch": 2.91, "learning_rate": 5.0540540540540544e-05, "loss": 0.9896, "step": 40 }, { "epoch": 2.98, "eval_accuracy": 0.6470588235294118, "eval_loss": 0.9412985444068909, "eval_runtime": 0.7684, "eval_samples_per_second": 66.368, "eval_steps_per_second": 2.603, "step": 41 }, { "epoch": 3.64, "learning_rate": 4.868243243243243e-05, "loss": 0.8472, "step": 50 }, { "epoch": 4.0, "eval_accuracy": 0.6274509803921569, "eval_loss": 0.905930757522583, "eval_runtime": 0.7968, "eval_samples_per_second": 64.003, "eval_steps_per_second": 2.51, "step": 55 }, { "epoch": 4.36, "learning_rate": 4.6824324324324325e-05, "loss": 0.7375, "step": 60 }, { "epoch": 4.95, "eval_accuracy": 0.803921568627451, "eval_loss": 0.6519917249679565, "eval_runtime": 0.822, "eval_samples_per_second": 62.047, "eval_steps_per_second": 2.433, "step": 68 }, { "epoch": 5.09, "learning_rate": 4.496621621621622e-05, "loss": 0.5786, "step": 70 }, { "epoch": 5.82, "learning_rate": 4.3108108108108106e-05, "loss": 0.458, "step": 80 }, { "epoch": 5.96, "eval_accuracy": 0.803921568627451, "eval_loss": 0.6753640174865723, "eval_runtime": 0.8618, "eval_samples_per_second": 59.178, "eval_steps_per_second": 2.321, "step": 82 }, { "epoch": 6.55, "learning_rate": 4.125e-05, "loss": 0.3807, "step": 90 }, { "epoch": 6.98, "eval_accuracy": 0.8431372549019608, "eval_loss": 0.6158193945884705, "eval_runtime": 0.7813, "eval_samples_per_second": 65.275, "eval_steps_per_second": 2.56, "step": 96 }, { "epoch": 7.27, "learning_rate": 3.9391891891891894e-05, "loss": 0.3282, "step": 100 }, { "epoch": 8.0, "learning_rate": 3.753378378378379e-05, "loss": 0.3003, "step": 110 }, { "epoch": 8.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.5665538311004639, "eval_runtime": 0.7924, "eval_samples_per_second": 64.358, "eval_steps_per_second": 2.524, "step": 110 }, { "epoch": 8.73, "learning_rate": 3.567567567567568e-05, "loss": 0.2337, "step": 120 }, { "epoch": 8.95, "eval_accuracy": 0.803921568627451, "eval_loss": 0.5409361124038696, "eval_runtime": 0.8091, "eval_samples_per_second": 63.033, "eval_steps_per_second": 2.472, "step": 123 }, { "epoch": 9.45, "learning_rate": 3.381756756756757e-05, "loss": 0.2252, "step": 130 }, { "epoch": 9.96, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.7381905913352966, "eval_runtime": 0.8081, "eval_samples_per_second": 63.11, "eval_steps_per_second": 2.475, "step": 137 }, { "epoch": 10.18, "learning_rate": 3.1959459459459455e-05, "loss": 0.1952, "step": 140 }, { "epoch": 10.91, "learning_rate": 3.010135135135135e-05, "loss": 0.1644, "step": 150 }, { "epoch": 10.98, "eval_accuracy": 0.803921568627451, "eval_loss": 0.6363002061843872, "eval_runtime": 0.7874, "eval_samples_per_second": 64.774, "eval_steps_per_second": 2.54, "step": 151 }, { "epoch": 11.64, "learning_rate": 2.8243243243243243e-05, "loss": 0.1608, "step": 160 }, { "epoch": 12.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.6941252946853638, "eval_runtime": 0.809, "eval_samples_per_second": 63.04, "eval_steps_per_second": 2.472, "step": 165 }, { "epoch": 12.36, "learning_rate": 2.6385135135135137e-05, "loss": 0.1354, "step": 170 }, { "epoch": 12.95, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.6985132694244385, "eval_runtime": 0.7967, "eval_samples_per_second": 64.012, "eval_steps_per_second": 2.51, "step": 178 }, { "epoch": 13.09, "learning_rate": 2.4527027027027027e-05, "loss": 0.1287, "step": 180 }, { "epoch": 13.82, "learning_rate": 2.266891891891892e-05, "loss": 0.1298, "step": 190 }, { "epoch": 13.96, "eval_accuracy": 0.803921568627451, "eval_loss": 0.6610224843025208, "eval_runtime": 0.8038, "eval_samples_per_second": 63.446, "eval_steps_per_second": 2.488, "step": 192 }, { "epoch": 14.55, "learning_rate": 2.081081081081081e-05, "loss": 0.1333, "step": 200 }, { "epoch": 14.98, "eval_accuracy": 0.803921568627451, "eval_loss": 0.675108015537262, "eval_runtime": 0.7776, "eval_samples_per_second": 65.588, "eval_steps_per_second": 2.572, "step": 206 }, { "epoch": 15.27, "learning_rate": 1.89527027027027e-05, "loss": 0.1241, "step": 210 }, { "epoch": 16.0, "learning_rate": 1.7094594594594595e-05, "loss": 0.1209, "step": 220 }, { "epoch": 16.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.7722602486610413, "eval_runtime": 0.7727, "eval_samples_per_second": 66.007, "eval_steps_per_second": 2.588, "step": 220 }, { "epoch": 16.73, "learning_rate": 1.5236486486486487e-05, "loss": 0.1057, "step": 230 }, { "epoch": 16.95, "eval_accuracy": 0.7254901960784313, "eval_loss": 0.8037699460983276, "eval_runtime": 0.7639, "eval_samples_per_second": 66.761, "eval_steps_per_second": 2.618, "step": 233 }, { "epoch": 17.45, "learning_rate": 1.337837837837838e-05, "loss": 0.0972, "step": 240 }, { "epoch": 17.96, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.8375279307365417, "eval_runtime": 0.7813, "eval_samples_per_second": 65.279, "eval_steps_per_second": 2.56, "step": 247 }, { "epoch": 18.18, "learning_rate": 1.152027027027027e-05, "loss": 0.0974, "step": 250 }, { "epoch": 18.91, "learning_rate": 9.662162162162164e-06, "loss": 0.0789, "step": 260 }, { "epoch": 18.98, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.697125256061554, "eval_runtime": 0.8068, "eval_samples_per_second": 63.21, "eval_steps_per_second": 2.479, "step": 261 }, { "epoch": 19.64, "learning_rate": 7.804054054054054e-06, "loss": 0.0833, "step": 270 }, { "epoch": 20.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.7507321238517761, "eval_runtime": 0.8032, "eval_samples_per_second": 63.499, "eval_steps_per_second": 2.49, "step": 275 }, { "epoch": 20.36, "learning_rate": 5.945945945945946e-06, "loss": 0.0813, "step": 280 }, { "epoch": 20.95, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.7085339426994324, "eval_runtime": 0.7675, "eval_samples_per_second": 66.445, "eval_steps_per_second": 2.606, "step": 288 }, { "epoch": 21.09, "learning_rate": 4.087837837837838e-06, "loss": 0.0874, "step": 290 }, { "epoch": 21.82, "learning_rate": 2.22972972972973e-06, "loss": 0.0803, "step": 300 }, { "epoch": 21.96, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.7565922737121582, "eval_runtime": 0.8044, "eval_samples_per_second": 63.404, "eval_steps_per_second": 2.486, "step": 302 }, { "epoch": 22.55, "learning_rate": 3.7162162162162164e-07, "loss": 0.0693, "step": 310 }, { "epoch": 22.69, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.7772119045257568, "eval_runtime": 0.7864, "eval_samples_per_second": 64.855, "eval_steps_per_second": 2.543, "step": 312 }, { "epoch": 22.69, "step": 312, "total_flos": 3.0760221736008253e+18, "train_loss": 0.3522763775709348, "train_runtime": 628.612, "train_samples_per_second": 66.776, "train_steps_per_second": 0.496 } ], "logging_steps": 10, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 24, "save_steps": 500, "total_flos": 3.0760221736008253e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }