| { | |
| "best_metric": 0.6914285714285714, | |
| "best_model_checkpoint": "dinov2-base-finetuned-eurosat/checkpoint-308", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 330, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 71.39833068847656, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 6.646, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.004285714285714286, | |
| "eval_loss": 6.344563961029053, | |
| "eval_runtime": 12.2748, | |
| "eval_samples_per_second": 57.027, | |
| "eval_steps_per_second": 0.896, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 36.275108337402344, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 6.0586, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.037142857142857144, | |
| "eval_loss": 5.812839031219482, | |
| "eval_runtime": 11.6948, | |
| "eval_samples_per_second": 59.856, | |
| "eval_steps_per_second": 0.941, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 78.4278564453125, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 4.9553, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.24285714285714285, | |
| "eval_loss": 4.52340030670166, | |
| "eval_runtime": 11.5613, | |
| "eval_samples_per_second": 60.547, | |
| "eval_steps_per_second": 0.951, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 94.20513153076172, | |
| "learning_rate": 4.882154882154882e-05, | |
| "loss": 3.2097, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.48428571428571426, | |
| "eval_loss": 3.1874964237213135, | |
| "eval_runtime": 11.6294, | |
| "eval_samples_per_second": 60.192, | |
| "eval_steps_per_second": 0.946, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 55.16205596923828, | |
| "learning_rate": 4.713804713804714e-05, | |
| "loss": 1.6208, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5957142857142858, | |
| "eval_loss": 2.3652451038360596, | |
| "eval_runtime": 11.6572, | |
| "eval_samples_per_second": 60.048, | |
| "eval_steps_per_second": 0.944, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "grad_norm": 28.252750396728516, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.7822, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6485714285714286, | |
| "eval_loss": 2.007438898086548, | |
| "eval_runtime": 11.7326, | |
| "eval_samples_per_second": 59.663, | |
| "eval_steps_per_second": 0.938, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 17.972673416137695, | |
| "learning_rate": 4.3771043771043774e-05, | |
| "loss": 0.3699, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.66, | |
| "eval_loss": 1.9254851341247559, | |
| "eval_runtime": 11.7512, | |
| "eval_samples_per_second": 59.569, | |
| "eval_steps_per_second": 0.936, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "grad_norm": 21.875259399414062, | |
| "learning_rate": 4.208754208754209e-05, | |
| "loss": 0.1745, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6557142857142857, | |
| "eval_loss": 1.865968942642212, | |
| "eval_runtime": 11.609, | |
| "eval_samples_per_second": 60.298, | |
| "eval_steps_per_second": 0.948, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 13.34464168548584, | |
| "learning_rate": 4.0404040404040405e-05, | |
| "loss": 0.1285, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6571428571428571, | |
| "eval_loss": 1.8786249160766602, | |
| "eval_runtime": 11.6967, | |
| "eval_samples_per_second": 59.846, | |
| "eval_steps_per_second": 0.94, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "grad_norm": 11.297475814819336, | |
| "learning_rate": 3.872053872053872e-05, | |
| "loss": 0.1178, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 16.153575897216797, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.0883, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6585714285714286, | |
| "eval_loss": 1.8617857694625854, | |
| "eval_runtime": 11.6432, | |
| "eval_samples_per_second": 60.121, | |
| "eval_steps_per_second": 0.945, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "grad_norm": 13.22706127166748, | |
| "learning_rate": 3.535353535353535e-05, | |
| "loss": 0.0721, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6514285714285715, | |
| "eval_loss": 1.9431724548339844, | |
| "eval_runtime": 12.6039, | |
| "eval_samples_per_second": 55.538, | |
| "eval_steps_per_second": 0.873, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "grad_norm": 8.195013046264648, | |
| "learning_rate": 3.3670033670033675e-05, | |
| "loss": 0.0693, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6642857142857143, | |
| "eval_loss": 1.873042345046997, | |
| "eval_runtime": 11.7524, | |
| "eval_samples_per_second": 59.562, | |
| "eval_steps_per_second": 0.936, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 12.73, | |
| "grad_norm": 9.13159465789795, | |
| "learning_rate": 3.198653198653199e-05, | |
| "loss": 0.0901, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6557142857142857, | |
| "eval_loss": 1.8676621913909912, | |
| "eval_runtime": 11.7011, | |
| "eval_samples_per_second": 59.823, | |
| "eval_steps_per_second": 0.94, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "grad_norm": 5.170494556427002, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 0.0608, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6757142857142857, | |
| "eval_loss": 1.846497654914856, | |
| "eval_runtime": 12.5066, | |
| "eval_samples_per_second": 55.971, | |
| "eval_steps_per_second": 0.88, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "grad_norm": 4.774472713470459, | |
| "learning_rate": 2.8619528619528618e-05, | |
| "loss": 0.0443, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6642857142857143, | |
| "eval_loss": 1.8421980142593384, | |
| "eval_runtime": 11.808, | |
| "eval_samples_per_second": 59.282, | |
| "eval_steps_per_second": 0.932, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "grad_norm": 2.623682737350464, | |
| "learning_rate": 2.6936026936026937e-05, | |
| "loss": 0.0552, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6585714285714286, | |
| "eval_loss": 1.9717400074005127, | |
| "eval_runtime": 11.7743, | |
| "eval_samples_per_second": 59.451, | |
| "eval_steps_per_second": 0.934, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "grad_norm": 3.4440066814422607, | |
| "learning_rate": 2.5252525252525256e-05, | |
| "loss": 0.0416, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6657142857142857, | |
| "eval_loss": 1.8076777458190918, | |
| "eval_runtime": 11.7226, | |
| "eval_samples_per_second": 59.714, | |
| "eval_steps_per_second": 0.938, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "grad_norm": 8.230661392211914, | |
| "learning_rate": 2.356902356902357e-05, | |
| "loss": 0.0366, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6742857142857143, | |
| "eval_loss": 1.8198397159576416, | |
| "eval_runtime": 11.6594, | |
| "eval_samples_per_second": 60.037, | |
| "eval_steps_per_second": 0.943, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "grad_norm": 3.6574606895446777, | |
| "learning_rate": 2.1885521885521887e-05, | |
| "loss": 0.0313, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6757142857142857, | |
| "eval_loss": 1.8081269264221191, | |
| "eval_runtime": 11.957, | |
| "eval_samples_per_second": 58.543, | |
| "eval_steps_per_second": 0.92, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 19.09, | |
| "grad_norm": 4.515919208526611, | |
| "learning_rate": 2.0202020202020203e-05, | |
| "loss": 0.0272, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 4.542725086212158, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0296, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.6785714285714286, | |
| "eval_loss": 1.776505947113037, | |
| "eval_runtime": 11.5903, | |
| "eval_samples_per_second": 60.395, | |
| "eval_steps_per_second": 0.949, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 20.91, | |
| "grad_norm": 2.6347365379333496, | |
| "learning_rate": 1.6835016835016837e-05, | |
| "loss": 0.0215, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.6828571428571428, | |
| "eval_loss": 1.6916331052780151, | |
| "eval_runtime": 13.3341, | |
| "eval_samples_per_second": 52.497, | |
| "eval_steps_per_second": 0.825, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 21.82, | |
| "grad_norm": 0.4444705545902252, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 0.0144, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6857142857142857, | |
| "eval_loss": 1.7237095832824707, | |
| "eval_runtime": 11.4907, | |
| "eval_samples_per_second": 60.919, | |
| "eval_steps_per_second": 0.957, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "grad_norm": 4.013304710388184, | |
| "learning_rate": 1.3468013468013468e-05, | |
| "loss": 0.0108, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.67, | |
| "eval_loss": 1.792176365852356, | |
| "eval_runtime": 11.5859, | |
| "eval_samples_per_second": 60.418, | |
| "eval_steps_per_second": 0.949, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 23.64, | |
| "grad_norm": 0.9613437056541443, | |
| "learning_rate": 1.1784511784511786e-05, | |
| "loss": 0.0232, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6828571428571428, | |
| "eval_loss": 1.7594307661056519, | |
| "eval_runtime": 12.8499, | |
| "eval_samples_per_second": 54.475, | |
| "eval_steps_per_second": 0.856, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "grad_norm": 2.5503318309783936, | |
| "learning_rate": 1.0101010101010101e-05, | |
| "loss": 0.0129, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.6828571428571428, | |
| "eval_loss": 1.7361136674880981, | |
| "eval_runtime": 11.7158, | |
| "eval_samples_per_second": 59.749, | |
| "eval_steps_per_second": 0.939, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "grad_norm": 5.675755977630615, | |
| "learning_rate": 8.417508417508419e-06, | |
| "loss": 0.0093, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.6828571428571428, | |
| "eval_loss": 1.7426681518554688, | |
| "eval_runtime": 12.593, | |
| "eval_samples_per_second": 55.586, | |
| "eval_steps_per_second": 0.873, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 26.36, | |
| "grad_norm": 2.090123176574707, | |
| "learning_rate": 6.734006734006734e-06, | |
| "loss": 0.0067, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.69, | |
| "eval_loss": 1.730440378189087, | |
| "eval_runtime": 11.8655, | |
| "eval_samples_per_second": 58.995, | |
| "eval_steps_per_second": 0.927, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 27.27, | |
| "grad_norm": 0.6074270009994507, | |
| "learning_rate": 5.050505050505051e-06, | |
| "loss": 0.0013, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6914285714285714, | |
| "eval_loss": 1.726584792137146, | |
| "eval_runtime": 11.8751, | |
| "eval_samples_per_second": 58.947, | |
| "eval_steps_per_second": 0.926, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 28.18, | |
| "grad_norm": 0.04077678918838501, | |
| "learning_rate": 3.367003367003367e-06, | |
| "loss": 0.0031, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.69, | |
| "eval_loss": 1.7368921041488647, | |
| "eval_runtime": 11.7621, | |
| "eval_samples_per_second": 59.513, | |
| "eval_steps_per_second": 0.935, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 29.09, | |
| "grad_norm": 0.32179221510887146, | |
| "learning_rate": 1.6835016835016836e-06, | |
| "loss": 0.002, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.05003494769334793, | |
| "learning_rate": 0.0, | |
| "loss": 0.0019, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.69, | |
| "eval_loss": 1.7391921281814575, | |
| "eval_runtime": 12.8063, | |
| "eval_samples_per_second": 54.661, | |
| "eval_steps_per_second": 0.859, | |
| "step": 330 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 330, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 5.2828663104e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |