| { | |
| "best_global_step": 969, | |
| "best_metric": 0.12907913327217102, | |
| "best_model_checkpoint": "./vit_focus_full/checkpoint-969", | |
| "epoch": 29.985507246376812, | |
| "eval_steps": 500, | |
| "global_step": 1530, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 10.6144437789917, | |
| "learning_rate": 4.872549019607843e-05, | |
| "loss": 0.3146, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.9855072463768116, | |
| "eval_loss": 0.05953465402126312, | |
| "eval_mae": 0.3265259861946106, | |
| "eval_mse": 0.1403445601463318, | |
| "eval_runtime": 57.1346, | |
| "eval_samples_per_second": 7.246, | |
| "eval_steps_per_second": 0.91, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.5603864734299517, | |
| "grad_norm": 5.5437469482421875, | |
| "learning_rate": 4.741830065359477e-05, | |
| "loss": 0.2488, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.9855072463768115, | |
| "eval_loss": 0.05661279708147049, | |
| "eval_mae": 0.3253043293952942, | |
| "eval_mse": 0.13950611650943756, | |
| "eval_runtime": 56.4199, | |
| "eval_samples_per_second": 7.338, | |
| "eval_steps_per_second": 0.922, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.3478260869565215, | |
| "grad_norm": 4.464972972869873, | |
| "learning_rate": 4.6111111111111115e-05, | |
| "loss": 0.2278, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.9855072463768115, | |
| "eval_loss": 0.06113344058394432, | |
| "eval_mae": 0.3287981450557709, | |
| "eval_mse": 0.14261041581630707, | |
| "eval_runtime": 56.9728, | |
| "eval_samples_per_second": 7.267, | |
| "eval_steps_per_second": 0.913, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 3.135265700483092, | |
| "grad_norm": 8.567197799682617, | |
| "learning_rate": 4.480392156862745e-05, | |
| "loss": 0.2148, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.9082125603864735, | |
| "grad_norm": 5.523195266723633, | |
| "learning_rate": 4.3496732026143795e-05, | |
| "loss": 0.206, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.9855072463768115, | |
| "eval_loss": 0.05355362221598625, | |
| "eval_mae": 0.31797775626182556, | |
| "eval_mse": 0.13227654993534088, | |
| "eval_runtime": 56.8583, | |
| "eval_samples_per_second": 7.281, | |
| "eval_steps_per_second": 0.915, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 4.695652173913043, | |
| "grad_norm": 6.00140380859375, | |
| "learning_rate": 4.218954248366013e-05, | |
| "loss": 0.1902, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.9855072463768115, | |
| "eval_loss": 0.06186218187212944, | |
| "eval_mae": 0.3270839750766754, | |
| "eval_mse": 0.1410592794418335, | |
| "eval_runtime": 56.1466, | |
| "eval_samples_per_second": 7.374, | |
| "eval_steps_per_second": 0.926, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.483091787439614, | |
| "grad_norm": 9.328702926635742, | |
| "learning_rate": 4.0882352941176474e-05, | |
| "loss": 0.187, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.9855072463768115, | |
| "eval_loss": 0.05080530419945717, | |
| "eval_mae": 0.3168753385543823, | |
| "eval_mse": 0.1319676637649536, | |
| "eval_runtime": 57.4612, | |
| "eval_samples_per_second": 7.205, | |
| "eval_steps_per_second": 0.905, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 6.270531400966184, | |
| "grad_norm": 7.799366474151611, | |
| "learning_rate": 3.957516339869281e-05, | |
| "loss": 0.1757, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.9855072463768115, | |
| "eval_loss": 0.05371123179793358, | |
| "eval_mae": 0.31825557351112366, | |
| "eval_mse": 0.13387194275856018, | |
| "eval_runtime": 57.3782, | |
| "eval_samples_per_second": 7.215, | |
| "eval_steps_per_second": 0.906, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 7.057971014492754, | |
| "grad_norm": 4.06664514541626, | |
| "learning_rate": 3.8267973856209146e-05, | |
| "loss": 0.1677, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 7.830917874396135, | |
| "grad_norm": 5.403101921081543, | |
| "learning_rate": 3.6960784313725496e-05, | |
| "loss": 0.1523, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.9855072463768115, | |
| "eval_loss": 0.055755238980054855, | |
| "eval_mae": 0.31683334708213806, | |
| "eval_mse": 0.13297995924949646, | |
| "eval_runtime": 65.7904, | |
| "eval_samples_per_second": 6.293, | |
| "eval_steps_per_second": 0.79, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 8.618357487922705, | |
| "grad_norm": 6.7577948570251465, | |
| "learning_rate": 3.565359477124183e-05, | |
| "loss": 0.1528, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 8.985507246376812, | |
| "eval_loss": 0.05914789438247681, | |
| "eval_mae": 0.3224806785583496, | |
| "eval_mse": 0.1381232738494873, | |
| "eval_runtime": 56.9831, | |
| "eval_samples_per_second": 7.265, | |
| "eval_steps_per_second": 0.913, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 9.405797101449275, | |
| "grad_norm": 4.654517650604248, | |
| "learning_rate": 3.434640522875817e-05, | |
| "loss": 0.1416, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 9.985507246376812, | |
| "eval_loss": 0.05355934053659439, | |
| "eval_mae": 0.3197546601295471, | |
| "eval_mse": 0.1352616846561432, | |
| "eval_runtime": 57.4136, | |
| "eval_samples_per_second": 7.211, | |
| "eval_steps_per_second": 0.906, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.193236714975846, | |
| "grad_norm": 4.063232421875, | |
| "learning_rate": 3.303921568627451e-05, | |
| "loss": 0.1391, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 10.966183574879228, | |
| "grad_norm": 4.905858993530273, | |
| "learning_rate": 3.173202614379085e-05, | |
| "loss": 0.1298, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 10.985507246376812, | |
| "eval_loss": 0.05300646275281906, | |
| "eval_mae": 0.3164079189300537, | |
| "eval_mse": 0.132521390914917, | |
| "eval_runtime": 58.995, | |
| "eval_samples_per_second": 7.018, | |
| "eval_steps_per_second": 0.881, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 11.753623188405797, | |
| "grad_norm": 4.643632411956787, | |
| "learning_rate": 3.0424836601307187e-05, | |
| "loss": 0.1161, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 11.985507246376812, | |
| "eval_loss": 0.0511205680668354, | |
| "eval_mae": 0.315570205450058, | |
| "eval_mse": 0.13146661221981049, | |
| "eval_runtime": 57.0447, | |
| "eval_samples_per_second": 7.257, | |
| "eval_steps_per_second": 0.912, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 12.541062801932368, | |
| "grad_norm": 3.0849831104278564, | |
| "learning_rate": 2.9117647058823534e-05, | |
| "loss": 0.1085, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 12.985507246376812, | |
| "eval_loss": 0.05314180254936218, | |
| "eval_mae": 0.32430657744407654, | |
| "eval_mse": 0.13849547505378723, | |
| "eval_runtime": 631.8234, | |
| "eval_samples_per_second": 0.655, | |
| "eval_steps_per_second": 0.082, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 13.328502415458937, | |
| "grad_norm": 5.586836338043213, | |
| "learning_rate": 2.7810457516339873e-05, | |
| "loss": 0.1028, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 13.985507246376812, | |
| "eval_loss": 0.05296429246664047, | |
| "eval_mae": 0.31508708000183105, | |
| "eval_mse": 0.1316087543964386, | |
| "eval_runtime": 57.8458, | |
| "eval_samples_per_second": 7.157, | |
| "eval_steps_per_second": 0.899, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 14.115942028985508, | |
| "grad_norm": 3.5024545192718506, | |
| "learning_rate": 2.650326797385621e-05, | |
| "loss": 0.0974, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 14.88888888888889, | |
| "grad_norm": 3.7782580852508545, | |
| "learning_rate": 2.519607843137255e-05, | |
| "loss": 0.0891, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 14.985507246376812, | |
| "eval_loss": 0.0540492981672287, | |
| "eval_mae": 0.31779569387435913, | |
| "eval_mse": 0.1337898075580597, | |
| "eval_runtime": 57.6717, | |
| "eval_samples_per_second": 7.179, | |
| "eval_steps_per_second": 0.902, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 15.676328502415458, | |
| "grad_norm": 3.615967035293579, | |
| "learning_rate": 2.3888888888888892e-05, | |
| "loss": 0.0878, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 15.985507246376812, | |
| "eval_loss": 0.05357988178730011, | |
| "eval_mae": 0.3177140951156616, | |
| "eval_mse": 0.13350851833820343, | |
| "eval_runtime": 57.5097, | |
| "eval_samples_per_second": 7.199, | |
| "eval_steps_per_second": 0.904, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 16.463768115942027, | |
| "grad_norm": 9.533724784851074, | |
| "learning_rate": 2.258169934640523e-05, | |
| "loss": 0.077, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 16.985507246376812, | |
| "eval_loss": 0.05338989570736885, | |
| "eval_mae": 0.31321439146995544, | |
| "eval_mse": 0.12988565862178802, | |
| "eval_runtime": 58.1505, | |
| "eval_samples_per_second": 7.119, | |
| "eval_steps_per_second": 0.894, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 17.2512077294686, | |
| "grad_norm": 3.7093381881713867, | |
| "learning_rate": 2.1274509803921568e-05, | |
| "loss": 0.0769, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 17.985507246376812, | |
| "eval_loss": 0.0548611618578434, | |
| "eval_mae": 0.3149418532848358, | |
| "eval_mse": 0.1313086301088333, | |
| "eval_runtime": 56.4832, | |
| "eval_samples_per_second": 7.33, | |
| "eval_steps_per_second": 0.921, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 18.03864734299517, | |
| "grad_norm": 2.9852871894836426, | |
| "learning_rate": 1.996732026143791e-05, | |
| "loss": 0.0717, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 18.81159420289855, | |
| "grad_norm": 3.3752264976501465, | |
| "learning_rate": 1.866013071895425e-05, | |
| "loss": 0.0663, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 18.985507246376812, | |
| "eval_loss": 0.05310577526688576, | |
| "eval_mae": 0.3118866980075836, | |
| "eval_mse": 0.12907913327217102, | |
| "eval_runtime": 58.2255, | |
| "eval_samples_per_second": 7.11, | |
| "eval_steps_per_second": 0.893, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 19.59903381642512, | |
| "grad_norm": 2.9139506816864014, | |
| "learning_rate": 1.735294117647059e-05, | |
| "loss": 0.064, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 19.985507246376812, | |
| "eval_loss": 0.05400167778134346, | |
| "eval_mae": 0.31967055797576904, | |
| "eval_mse": 0.13520964980125427, | |
| "eval_runtime": 58.0572, | |
| "eval_samples_per_second": 7.131, | |
| "eval_steps_per_second": 0.896, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 20.386473429951693, | |
| "grad_norm": 3.1011509895324707, | |
| "learning_rate": 1.604575163398693e-05, | |
| "loss": 0.0608, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 20.985507246376812, | |
| "eval_loss": 0.05348004400730133, | |
| "eval_mae": 0.3179128170013428, | |
| "eval_mse": 0.13336069881916046, | |
| "eval_runtime": 56.8284, | |
| "eval_samples_per_second": 7.285, | |
| "eval_steps_per_second": 0.915, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 21.17391304347826, | |
| "grad_norm": 2.4269816875457764, | |
| "learning_rate": 1.473856209150327e-05, | |
| "loss": 0.0558, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 21.946859903381643, | |
| "grad_norm": 2.612093925476074, | |
| "learning_rate": 1.3431372549019607e-05, | |
| "loss": 0.0548, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 21.985507246376812, | |
| "eval_loss": 0.052902594208717346, | |
| "eval_mae": 0.3134055733680725, | |
| "eval_mse": 0.129911869764328, | |
| "eval_runtime": 57.5407, | |
| "eval_samples_per_second": 7.195, | |
| "eval_steps_per_second": 0.904, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 22.734299516908212, | |
| "grad_norm": 1.7072349786758423, | |
| "learning_rate": 1.2124183006535949e-05, | |
| "loss": 0.0517, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 22.985507246376812, | |
| "eval_loss": 0.05338846519589424, | |
| "eval_mae": 0.31519371271133423, | |
| "eval_mse": 0.13099054992198944, | |
| "eval_runtime": 2988.6114, | |
| "eval_samples_per_second": 0.139, | |
| "eval_steps_per_second": 0.017, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 23.52173913043478, | |
| "grad_norm": 2.942000389099121, | |
| "learning_rate": 1.0816993464052288e-05, | |
| "loss": 0.0498, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 23.985507246376812, | |
| "eval_loss": 0.05435283109545708, | |
| "eval_mae": 0.31506991386413574, | |
| "eval_mse": 0.13137240707874298, | |
| "eval_runtime": 158.629, | |
| "eval_samples_per_second": 2.61, | |
| "eval_steps_per_second": 0.328, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 24.309178743961354, | |
| "grad_norm": 1.7872236967086792, | |
| "learning_rate": 9.509803921568628e-06, | |
| "loss": 0.047, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 24.985507246376812, | |
| "eval_loss": 0.05310087278485298, | |
| "eval_mae": 0.3145076036453247, | |
| "eval_mse": 0.13092052936553955, | |
| "eval_runtime": 59.2601, | |
| "eval_samples_per_second": 6.986, | |
| "eval_steps_per_second": 0.877, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 25.096618357487923, | |
| "grad_norm": 1.8146392107009888, | |
| "learning_rate": 8.202614379084967e-06, | |
| "loss": 0.0467, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 25.869565217391305, | |
| "grad_norm": 1.8770432472229004, | |
| "learning_rate": 6.895424836601308e-06, | |
| "loss": 0.0443, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 25.985507246376812, | |
| "eval_loss": 0.053730811923742294, | |
| "eval_mae": 0.31641700863838196, | |
| "eval_mse": 0.1325235366821289, | |
| "eval_runtime": 8331.3737, | |
| "eval_samples_per_second": 0.05, | |
| "eval_steps_per_second": 0.006, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 26.657004830917874, | |
| "grad_norm": 2.1211466789245605, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 0.042, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 26.985507246376812, | |
| "eval_loss": 0.05325399339199066, | |
| "eval_mae": 0.31560125946998596, | |
| "eval_mse": 0.13193772733211517, | |
| "eval_runtime": 3946.2723, | |
| "eval_samples_per_second": 0.105, | |
| "eval_steps_per_second": 0.013, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 27.444444444444443, | |
| "grad_norm": 1.9497586488723755, | |
| "learning_rate": 4.281045751633987e-06, | |
| "loss": 0.0397, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 27.985507246376812, | |
| "eval_loss": 0.052952226251363754, | |
| "eval_mae": 0.3155405521392822, | |
| "eval_mse": 0.13170257210731506, | |
| "eval_runtime": 58.8468, | |
| "eval_samples_per_second": 7.035, | |
| "eval_steps_per_second": 0.884, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 28.231884057971016, | |
| "grad_norm": 5.6321330070495605, | |
| "learning_rate": 2.9738562091503266e-06, | |
| "loss": 0.0411, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 28.985507246376812, | |
| "eval_loss": 0.05421222001314163, | |
| "eval_mae": 0.31665799021720886, | |
| "eval_mse": 0.13281531631946564, | |
| "eval_runtime": 60.076, | |
| "eval_samples_per_second": 6.891, | |
| "eval_steps_per_second": 0.866, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 29.019323671497585, | |
| "grad_norm": 1.5062155723571777, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0385, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 29.792270531400966, | |
| "grad_norm": 3.7781600952148438, | |
| "learning_rate": 3.5947712418300653e-07, | |
| "loss": 0.0382, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 29.985507246376812, | |
| "eval_loss": 0.05334796383976936, | |
| "eval_mae": 0.31658393144607544, | |
| "eval_mse": 0.13268809020519257, | |
| "eval_runtime": 61.4065, | |
| "eval_samples_per_second": 6.742, | |
| "eval_steps_per_second": 0.847, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 29.985507246376812, | |
| "step": 1530, | |
| "total_flos": 0.0, | |
| "train_loss": 0.11046674571006126, | |
| "train_runtime": 98364.9673, | |
| "train_samples_per_second": 0.504, | |
| "train_steps_per_second": 0.016 | |
| } | |
| ], | |
| "logging_steps": 40, | |
| "max_steps": 1530, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |