{ "best_global_step": 969, "best_metric": 0.12907913327217102, "best_model_checkpoint": "./vit_focus_full/checkpoint-969", "epoch": 29.985507246376812, "eval_steps": 500, "global_step": 1530, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7729468599033816, "grad_norm": 10.6144437789917, "learning_rate": 4.872549019607843e-05, "loss": 0.3146, "step": 40 }, { "epoch": 0.9855072463768116, "eval_loss": 0.05953465402126312, "eval_mae": 0.3265259861946106, "eval_mse": 0.1403445601463318, "eval_runtime": 57.1346, "eval_samples_per_second": 7.246, "eval_steps_per_second": 0.91, "step": 51 }, { "epoch": 1.5603864734299517, "grad_norm": 5.5437469482421875, "learning_rate": 4.741830065359477e-05, "loss": 0.2488, "step": 80 }, { "epoch": 1.9855072463768115, "eval_loss": 0.05661279708147049, "eval_mae": 0.3253043293952942, "eval_mse": 0.13950611650943756, "eval_runtime": 56.4199, "eval_samples_per_second": 7.338, "eval_steps_per_second": 0.922, "step": 102 }, { "epoch": 2.3478260869565215, "grad_norm": 4.464972972869873, "learning_rate": 4.6111111111111115e-05, "loss": 0.2278, "step": 120 }, { "epoch": 2.9855072463768115, "eval_loss": 0.06113344058394432, "eval_mae": 0.3287981450557709, "eval_mse": 0.14261041581630707, "eval_runtime": 56.9728, "eval_samples_per_second": 7.267, "eval_steps_per_second": 0.913, "step": 153 }, { "epoch": 3.135265700483092, "grad_norm": 8.567197799682617, "learning_rate": 4.480392156862745e-05, "loss": 0.2148, "step": 160 }, { "epoch": 3.9082125603864735, "grad_norm": 5.523195266723633, "learning_rate": 4.3496732026143795e-05, "loss": 0.206, "step": 200 }, { "epoch": 3.9855072463768115, "eval_loss": 0.05355362221598625, "eval_mae": 0.31797775626182556, "eval_mse": 0.13227654993534088, "eval_runtime": 56.8583, "eval_samples_per_second": 7.281, "eval_steps_per_second": 0.915, "step": 204 }, { "epoch": 4.695652173913043, "grad_norm": 6.00140380859375, "learning_rate": 4.218954248366013e-05, "loss": 0.1902, "step": 240 }, { "epoch": 4.9855072463768115, "eval_loss": 0.06186218187212944, "eval_mae": 0.3270839750766754, "eval_mse": 0.1410592794418335, "eval_runtime": 56.1466, "eval_samples_per_second": 7.374, "eval_steps_per_second": 0.926, "step": 255 }, { "epoch": 5.483091787439614, "grad_norm": 9.328702926635742, "learning_rate": 4.0882352941176474e-05, "loss": 0.187, "step": 280 }, { "epoch": 5.9855072463768115, "eval_loss": 0.05080530419945717, "eval_mae": 0.3168753385543823, "eval_mse": 0.1319676637649536, "eval_runtime": 57.4612, "eval_samples_per_second": 7.205, "eval_steps_per_second": 0.905, "step": 306 }, { "epoch": 6.270531400966184, "grad_norm": 7.799366474151611, "learning_rate": 3.957516339869281e-05, "loss": 0.1757, "step": 320 }, { "epoch": 6.9855072463768115, "eval_loss": 0.05371123179793358, "eval_mae": 0.31825557351112366, "eval_mse": 0.13387194275856018, "eval_runtime": 57.3782, "eval_samples_per_second": 7.215, "eval_steps_per_second": 0.906, "step": 357 }, { "epoch": 7.057971014492754, "grad_norm": 4.06664514541626, "learning_rate": 3.8267973856209146e-05, "loss": 0.1677, "step": 360 }, { "epoch": 7.830917874396135, "grad_norm": 5.403101921081543, "learning_rate": 3.6960784313725496e-05, "loss": 0.1523, "step": 400 }, { "epoch": 7.9855072463768115, "eval_loss": 0.055755238980054855, "eval_mae": 0.31683334708213806, "eval_mse": 0.13297995924949646, "eval_runtime": 65.7904, "eval_samples_per_second": 6.293, "eval_steps_per_second": 0.79, "step": 408 }, { "epoch": 8.618357487922705, "grad_norm": 6.7577948570251465, "learning_rate": 3.565359477124183e-05, "loss": 0.1528, "step": 440 }, { "epoch": 8.985507246376812, "eval_loss": 0.05914789438247681, "eval_mae": 0.3224806785583496, "eval_mse": 0.1381232738494873, "eval_runtime": 56.9831, "eval_samples_per_second": 7.265, "eval_steps_per_second": 0.913, "step": 459 }, { "epoch": 9.405797101449275, "grad_norm": 4.654517650604248, "learning_rate": 3.434640522875817e-05, "loss": 0.1416, "step": 480 }, { "epoch": 9.985507246376812, "eval_loss": 0.05355934053659439, "eval_mae": 0.3197546601295471, "eval_mse": 0.1352616846561432, "eval_runtime": 57.4136, "eval_samples_per_second": 7.211, "eval_steps_per_second": 0.906, "step": 510 }, { "epoch": 10.193236714975846, "grad_norm": 4.063232421875, "learning_rate": 3.303921568627451e-05, "loss": 0.1391, "step": 520 }, { "epoch": 10.966183574879228, "grad_norm": 4.905858993530273, "learning_rate": 3.173202614379085e-05, "loss": 0.1298, "step": 560 }, { "epoch": 10.985507246376812, "eval_loss": 0.05300646275281906, "eval_mae": 0.3164079189300537, "eval_mse": 0.132521390914917, "eval_runtime": 58.995, "eval_samples_per_second": 7.018, "eval_steps_per_second": 0.881, "step": 561 }, { "epoch": 11.753623188405797, "grad_norm": 4.643632411956787, "learning_rate": 3.0424836601307187e-05, "loss": 0.1161, "step": 600 }, { "epoch": 11.985507246376812, "eval_loss": 0.0511205680668354, "eval_mae": 0.315570205450058, "eval_mse": 0.13146661221981049, "eval_runtime": 57.0447, "eval_samples_per_second": 7.257, "eval_steps_per_second": 0.912, "step": 612 }, { "epoch": 12.541062801932368, "grad_norm": 3.0849831104278564, "learning_rate": 2.9117647058823534e-05, "loss": 0.1085, "step": 640 }, { "epoch": 12.985507246376812, "eval_loss": 0.05314180254936218, "eval_mae": 0.32430657744407654, "eval_mse": 0.13849547505378723, "eval_runtime": 631.8234, "eval_samples_per_second": 0.655, "eval_steps_per_second": 0.082, "step": 663 }, { "epoch": 13.328502415458937, "grad_norm": 5.586836338043213, "learning_rate": 2.7810457516339873e-05, "loss": 0.1028, "step": 680 }, { "epoch": 13.985507246376812, "eval_loss": 0.05296429246664047, "eval_mae": 0.31508708000183105, "eval_mse": 0.1316087543964386, "eval_runtime": 57.8458, "eval_samples_per_second": 7.157, "eval_steps_per_second": 0.899, "step": 714 }, { "epoch": 14.115942028985508, "grad_norm": 3.5024545192718506, "learning_rate": 2.650326797385621e-05, "loss": 0.0974, "step": 720 }, { "epoch": 14.88888888888889, "grad_norm": 3.7782580852508545, "learning_rate": 2.519607843137255e-05, "loss": 0.0891, "step": 760 }, { "epoch": 14.985507246376812, "eval_loss": 0.0540492981672287, "eval_mae": 0.31779569387435913, "eval_mse": 0.1337898075580597, "eval_runtime": 57.6717, "eval_samples_per_second": 7.179, "eval_steps_per_second": 0.902, "step": 765 }, { "epoch": 15.676328502415458, "grad_norm": 3.615967035293579, "learning_rate": 2.3888888888888892e-05, "loss": 0.0878, "step": 800 }, { "epoch": 15.985507246376812, "eval_loss": 0.05357988178730011, "eval_mae": 0.3177140951156616, "eval_mse": 0.13350851833820343, "eval_runtime": 57.5097, "eval_samples_per_second": 7.199, "eval_steps_per_second": 0.904, "step": 816 }, { "epoch": 16.463768115942027, "grad_norm": 9.533724784851074, "learning_rate": 2.258169934640523e-05, "loss": 0.077, "step": 840 }, { "epoch": 16.985507246376812, "eval_loss": 0.05338989570736885, "eval_mae": 0.31321439146995544, "eval_mse": 0.12988565862178802, "eval_runtime": 58.1505, "eval_samples_per_second": 7.119, "eval_steps_per_second": 0.894, "step": 867 }, { "epoch": 17.2512077294686, "grad_norm": 3.7093381881713867, "learning_rate": 2.1274509803921568e-05, "loss": 0.0769, "step": 880 }, { "epoch": 17.985507246376812, "eval_loss": 0.0548611618578434, "eval_mae": 0.3149418532848358, "eval_mse": 0.1313086301088333, "eval_runtime": 56.4832, "eval_samples_per_second": 7.33, "eval_steps_per_second": 0.921, "step": 918 }, { "epoch": 18.03864734299517, "grad_norm": 2.9852871894836426, "learning_rate": 1.996732026143791e-05, "loss": 0.0717, "step": 920 }, { "epoch": 18.81159420289855, "grad_norm": 3.3752264976501465, "learning_rate": 1.866013071895425e-05, "loss": 0.0663, "step": 960 }, { "epoch": 18.985507246376812, "eval_loss": 0.05310577526688576, "eval_mae": 0.3118866980075836, "eval_mse": 0.12907913327217102, "eval_runtime": 58.2255, "eval_samples_per_second": 7.11, "eval_steps_per_second": 0.893, "step": 969 }, { "epoch": 19.59903381642512, "grad_norm": 2.9139506816864014, "learning_rate": 1.735294117647059e-05, "loss": 0.064, "step": 1000 }, { "epoch": 19.985507246376812, "eval_loss": 0.05400167778134346, "eval_mae": 0.31967055797576904, "eval_mse": 0.13520964980125427, "eval_runtime": 58.0572, "eval_samples_per_second": 7.131, "eval_steps_per_second": 0.896, "step": 1020 }, { "epoch": 20.386473429951693, "grad_norm": 3.1011509895324707, "learning_rate": 1.604575163398693e-05, "loss": 0.0608, "step": 1040 }, { "epoch": 20.985507246376812, "eval_loss": 0.05348004400730133, "eval_mae": 0.3179128170013428, "eval_mse": 0.13336069881916046, "eval_runtime": 56.8284, "eval_samples_per_second": 7.285, "eval_steps_per_second": 0.915, "step": 1071 }, { "epoch": 21.17391304347826, "grad_norm": 2.4269816875457764, "learning_rate": 1.473856209150327e-05, "loss": 0.0558, "step": 1080 }, { "epoch": 21.946859903381643, "grad_norm": 2.612093925476074, "learning_rate": 1.3431372549019607e-05, "loss": 0.0548, "step": 1120 }, { "epoch": 21.985507246376812, "eval_loss": 0.052902594208717346, "eval_mae": 0.3134055733680725, "eval_mse": 0.129911869764328, "eval_runtime": 57.5407, "eval_samples_per_second": 7.195, "eval_steps_per_second": 0.904, "step": 1122 }, { "epoch": 22.734299516908212, "grad_norm": 1.7072349786758423, "learning_rate": 1.2124183006535949e-05, "loss": 0.0517, "step": 1160 }, { "epoch": 22.985507246376812, "eval_loss": 0.05338846519589424, "eval_mae": 0.31519371271133423, "eval_mse": 0.13099054992198944, "eval_runtime": 2988.6114, "eval_samples_per_second": 0.139, "eval_steps_per_second": 0.017, "step": 1173 }, { "epoch": 23.52173913043478, "grad_norm": 2.942000389099121, "learning_rate": 1.0816993464052288e-05, "loss": 0.0498, "step": 1200 }, { "epoch": 23.985507246376812, "eval_loss": 0.05435283109545708, "eval_mae": 0.31506991386413574, "eval_mse": 0.13137240707874298, "eval_runtime": 158.629, "eval_samples_per_second": 2.61, "eval_steps_per_second": 0.328, "step": 1224 }, { "epoch": 24.309178743961354, "grad_norm": 1.7872236967086792, "learning_rate": 9.509803921568628e-06, "loss": 0.047, "step": 1240 }, { "epoch": 24.985507246376812, "eval_loss": 0.05310087278485298, "eval_mae": 0.3145076036453247, "eval_mse": 0.13092052936553955, "eval_runtime": 59.2601, "eval_samples_per_second": 6.986, "eval_steps_per_second": 0.877, "step": 1275 }, { "epoch": 25.096618357487923, "grad_norm": 1.8146392107009888, "learning_rate": 8.202614379084967e-06, "loss": 0.0467, "step": 1280 }, { "epoch": 25.869565217391305, "grad_norm": 1.8770432472229004, "learning_rate": 6.895424836601308e-06, "loss": 0.0443, "step": 1320 }, { "epoch": 25.985507246376812, "eval_loss": 0.053730811923742294, "eval_mae": 0.31641700863838196, "eval_mse": 0.1325235366821289, "eval_runtime": 8331.3737, "eval_samples_per_second": 0.05, "eval_steps_per_second": 0.006, "step": 1326 }, { "epoch": 26.657004830917874, "grad_norm": 2.1211466789245605, "learning_rate": 5.588235294117647e-06, "loss": 0.042, "step": 1360 }, { "epoch": 26.985507246376812, "eval_loss": 0.05325399339199066, "eval_mae": 0.31560125946998596, "eval_mse": 0.13193772733211517, "eval_runtime": 3946.2723, "eval_samples_per_second": 0.105, "eval_steps_per_second": 0.013, "step": 1377 }, { "epoch": 27.444444444444443, "grad_norm": 1.9497586488723755, "learning_rate": 4.281045751633987e-06, "loss": 0.0397, "step": 1400 }, { "epoch": 27.985507246376812, "eval_loss": 0.052952226251363754, "eval_mae": 0.3155405521392822, "eval_mse": 0.13170257210731506, "eval_runtime": 58.8468, "eval_samples_per_second": 7.035, "eval_steps_per_second": 0.884, "step": 1428 }, { "epoch": 28.231884057971016, "grad_norm": 5.6321330070495605, "learning_rate": 2.9738562091503266e-06, "loss": 0.0411, "step": 1440 }, { "epoch": 28.985507246376812, "eval_loss": 0.05421222001314163, "eval_mae": 0.31665799021720886, "eval_mse": 0.13281531631946564, "eval_runtime": 60.076, "eval_samples_per_second": 6.891, "eval_steps_per_second": 0.866, "step": 1479 }, { "epoch": 29.019323671497585, "grad_norm": 1.5062155723571777, "learning_rate": 1.6666666666666667e-06, "loss": 0.0385, "step": 1480 }, { "epoch": 29.792270531400966, "grad_norm": 3.7781600952148438, "learning_rate": 3.5947712418300653e-07, "loss": 0.0382, "step": 1520 }, { "epoch": 29.985507246376812, "eval_loss": 0.05334796383976936, "eval_mae": 0.31658393144607544, "eval_mse": 0.13268809020519257, "eval_runtime": 61.4065, "eval_samples_per_second": 6.742, "eval_steps_per_second": 0.847, "step": 1530 }, { "epoch": 29.985507246376812, "step": 1530, "total_flos": 0.0, "train_loss": 0.11046674571006126, "train_runtime": 98364.9673, "train_samples_per_second": 0.504, "train_steps_per_second": 0.016 } ], "logging_steps": 40, "max_steps": 1530, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }