| { | |
| "best_metric": 0.8431372549019608, | |
| "best_model_checkpoint": "vit-base-patch16-224-RXL1-24\\checkpoint-96", | |
| "epoch": 22.69090909090909, | |
| "eval_steps": 500, | |
| "global_step": 312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.4375e-05, | |
| "loss": 1.3745, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.47058823529411764, | |
| "eval_loss": 1.3056340217590332, | |
| "eval_runtime": 0.7424, | |
| "eval_samples_per_second": 68.693, | |
| "eval_steps_per_second": 2.694, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.4256756756756764e-05, | |
| "loss": 1.2896, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.6470588235294118, | |
| "eval_loss": 1.103859543800354, | |
| "eval_runtime": 0.7848, | |
| "eval_samples_per_second": 64.988, | |
| "eval_steps_per_second": 2.549, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 5.239864864864865e-05, | |
| "loss": 1.1563, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 5.0540540540540544e-05, | |
| "loss": 0.9896, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.6470588235294118, | |
| "eval_loss": 0.9412985444068909, | |
| "eval_runtime": 0.7684, | |
| "eval_samples_per_second": 66.368, | |
| "eval_steps_per_second": 2.603, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 4.868243243243243e-05, | |
| "loss": 0.8472, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6274509803921569, | |
| "eval_loss": 0.905930757522583, | |
| "eval_runtime": 0.7968, | |
| "eval_samples_per_second": 64.003, | |
| "eval_steps_per_second": 2.51, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 4.6824324324324325e-05, | |
| "loss": 0.7375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.6519917249679565, | |
| "eval_runtime": 0.822, | |
| "eval_samples_per_second": 62.047, | |
| "eval_steps_per_second": 2.433, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 4.496621621621622e-05, | |
| "loss": 0.5786, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 4.3108108108108106e-05, | |
| "loss": 0.458, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.6753640174865723, | |
| "eval_runtime": 0.8618, | |
| "eval_samples_per_second": 59.178, | |
| "eval_steps_per_second": 2.321, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 4.125e-05, | |
| "loss": 0.3807, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.8431372549019608, | |
| "eval_loss": 0.6158193945884705, | |
| "eval_runtime": 0.7813, | |
| "eval_samples_per_second": 65.275, | |
| "eval_steps_per_second": 2.56, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 3.9391891891891894e-05, | |
| "loss": 0.3282, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.753378378378379e-05, | |
| "loss": 0.3003, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.5665538311004639, | |
| "eval_runtime": 0.7924, | |
| "eval_samples_per_second": 64.358, | |
| "eval_steps_per_second": 2.524, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 3.567567567567568e-05, | |
| "loss": 0.2337, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.5409361124038696, | |
| "eval_runtime": 0.8091, | |
| "eval_samples_per_second": 63.033, | |
| "eval_steps_per_second": 2.472, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 3.381756756756757e-05, | |
| "loss": 0.2252, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.7381905913352966, | |
| "eval_runtime": 0.8081, | |
| "eval_samples_per_second": 63.11, | |
| "eval_steps_per_second": 2.475, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "learning_rate": 3.1959459459459455e-05, | |
| "loss": 0.1952, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 3.010135135135135e-05, | |
| "loss": 0.1644, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.6363002061843872, | |
| "eval_runtime": 0.7874, | |
| "eval_samples_per_second": 64.774, | |
| "eval_steps_per_second": 2.54, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 2.8243243243243243e-05, | |
| "loss": 0.1608, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.6941252946853638, | |
| "eval_runtime": 0.809, | |
| "eval_samples_per_second": 63.04, | |
| "eval_steps_per_second": 2.472, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "learning_rate": 2.6385135135135137e-05, | |
| "loss": 0.1354, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.6985132694244385, | |
| "eval_runtime": 0.7967, | |
| "eval_samples_per_second": 64.012, | |
| "eval_steps_per_second": 2.51, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "learning_rate": 2.4527027027027027e-05, | |
| "loss": 0.1287, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "learning_rate": 2.266891891891892e-05, | |
| "loss": 0.1298, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.6610224843025208, | |
| "eval_runtime": 0.8038, | |
| "eval_samples_per_second": 63.446, | |
| "eval_steps_per_second": 2.488, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 2.081081081081081e-05, | |
| "loss": 0.1333, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.675108015537262, | |
| "eval_runtime": 0.7776, | |
| "eval_samples_per_second": 65.588, | |
| "eval_steps_per_second": 2.572, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 15.27, | |
| "learning_rate": 1.89527027027027e-05, | |
| "loss": 0.1241, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 1.7094594594594595e-05, | |
| "loss": 0.1209, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.7722602486610413, | |
| "eval_runtime": 0.7727, | |
| "eval_samples_per_second": 66.007, | |
| "eval_steps_per_second": 2.588, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "learning_rate": 1.5236486486486487e-05, | |
| "loss": 0.1057, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "eval_accuracy": 0.7254901960784313, | |
| "eval_loss": 0.8037699460983276, | |
| "eval_runtime": 0.7639, | |
| "eval_samples_per_second": 66.761, | |
| "eval_steps_per_second": 2.618, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 17.45, | |
| "learning_rate": 1.337837837837838e-05, | |
| "loss": 0.0972, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.8375279307365417, | |
| "eval_runtime": 0.7813, | |
| "eval_samples_per_second": 65.279, | |
| "eval_steps_per_second": 2.56, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "learning_rate": 1.152027027027027e-05, | |
| "loss": 0.0974, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 18.91, | |
| "learning_rate": 9.662162162162164e-06, | |
| "loss": 0.0789, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.697125256061554, | |
| "eval_runtime": 0.8068, | |
| "eval_samples_per_second": 63.21, | |
| "eval_steps_per_second": 2.479, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "learning_rate": 7.804054054054054e-06, | |
| "loss": 0.0833, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.7507321238517761, | |
| "eval_runtime": 0.8032, | |
| "eval_samples_per_second": 63.499, | |
| "eval_steps_per_second": 2.49, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 20.36, | |
| "learning_rate": 5.945945945945946e-06, | |
| "loss": 0.0813, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 20.95, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.7085339426994324, | |
| "eval_runtime": 0.7675, | |
| "eval_samples_per_second": 66.445, | |
| "eval_steps_per_second": 2.606, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 21.09, | |
| "learning_rate": 4.087837837837838e-06, | |
| "loss": 0.0874, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 21.82, | |
| "learning_rate": 2.22972972972973e-06, | |
| "loss": 0.0803, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.7565922737121582, | |
| "eval_runtime": 0.8044, | |
| "eval_samples_per_second": 63.404, | |
| "eval_steps_per_second": 2.486, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 22.55, | |
| "learning_rate": 3.7162162162162164e-07, | |
| "loss": 0.0693, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.7772119045257568, | |
| "eval_runtime": 0.7864, | |
| "eval_samples_per_second": 64.855, | |
| "eval_steps_per_second": 2.543, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "step": 312, | |
| "total_flos": 3.0760221736008253e+18, | |
| "train_loss": 0.3522763775709348, | |
| "train_runtime": 628.612, | |
| "train_samples_per_second": 66.776, | |
| "train_steps_per_second": 0.496 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 312, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 24, | |
| "save_steps": 500, | |
| "total_flos": 3.0760221736008253e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |