| { | |
| "best_metric": 0.8837955535182214, | |
| "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-MM_Classification_base_web_images/checkpoint-342", | |
| "epoch": 6.948905109489051, | |
| "eval_steps": 500, | |
| "global_step": 476, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 3.675107717514038, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 1.0612, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 3.70519757270813, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.8853, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 3.8755295276641846, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.7099, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 2.727118492126465, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.6121, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 3.260768175125122, | |
| "learning_rate": 4.976635514018692e-05, | |
| "loss": 0.5736, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 5.847088813781738, | |
| "learning_rate": 4.85981308411215e-05, | |
| "loss": 0.517, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "eval_accuracy": 0.8157231262892505, | |
| "eval_loss": 0.4430324137210846, | |
| "eval_runtime": 35.8383, | |
| "eval_samples_per_second": 121.741, | |
| "eval_steps_per_second": 1.925, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 5.806203365325928, | |
| "learning_rate": 4.742990654205608e-05, | |
| "loss": 0.4972, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.167883211678832, | |
| "grad_norm": 2.751803159713745, | |
| "learning_rate": 4.6261682242990654e-05, | |
| "loss": 0.4767, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.313868613138686, | |
| "grad_norm": 2.5074498653411865, | |
| "learning_rate": 4.5093457943925236e-05, | |
| "loss": 0.4509, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 3.3779966831207275, | |
| "learning_rate": 4.392523364485982e-05, | |
| "loss": 0.4496, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.6058394160583942, | |
| "grad_norm": 4.8917741775512695, | |
| "learning_rate": 4.27570093457944e-05, | |
| "loss": 0.4528, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.7518248175182483, | |
| "grad_norm": 4.1523966789245605, | |
| "learning_rate": 4.1588785046728974e-05, | |
| "loss": 0.4535, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 6.303163051605225, | |
| "learning_rate": 4.0420560747663556e-05, | |
| "loss": 0.4211, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8457483382993353, | |
| "eval_loss": 0.38001278042793274, | |
| "eval_runtime": 36.8858, | |
| "eval_samples_per_second": 118.284, | |
| "eval_steps_per_second": 1.871, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 2.0437956204379564, | |
| "grad_norm": 6.562156677246094, | |
| "learning_rate": 3.925233644859813e-05, | |
| "loss": 0.4435, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.18978102189781, | |
| "grad_norm": 3.601300001144409, | |
| "learning_rate": 3.808411214953271e-05, | |
| "loss": 0.3971, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.335766423357664, | |
| "grad_norm": 4.384274005889893, | |
| "learning_rate": 3.691588785046729e-05, | |
| "loss": 0.4075, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.4817518248175183, | |
| "grad_norm": 3.9660568237304688, | |
| "learning_rate": 3.574766355140187e-05, | |
| "loss": 0.3788, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.627737226277372, | |
| "grad_norm": 4.195096492767334, | |
| "learning_rate": 3.457943925233645e-05, | |
| "loss": 0.376, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.7737226277372264, | |
| "grad_norm": 3.6591451168060303, | |
| "learning_rate": 3.341121495327103e-05, | |
| "loss": 0.3514, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.9197080291970803, | |
| "grad_norm": 3.5800323486328125, | |
| "learning_rate": 3.224299065420561e-05, | |
| "loss": 0.3532, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.9927007299270074, | |
| "eval_accuracy": 0.8615631446252578, | |
| "eval_loss": 0.3563055098056793, | |
| "eval_runtime": 36.2136, | |
| "eval_samples_per_second": 120.48, | |
| "eval_steps_per_second": 1.905, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.065693430656934, | |
| "grad_norm": 6.1199421882629395, | |
| "learning_rate": 3.107476635514019e-05, | |
| "loss": 0.3963, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.2116788321167884, | |
| "grad_norm": 4.893133163452148, | |
| "learning_rate": 2.9906542056074764e-05, | |
| "loss": 0.3358, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.3576642335766422, | |
| "grad_norm": 4.033535957336426, | |
| "learning_rate": 2.873831775700935e-05, | |
| "loss": 0.3398, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.5036496350364965, | |
| "grad_norm": 4.510129928588867, | |
| "learning_rate": 2.7570093457943924e-05, | |
| "loss": 0.3628, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.6496350364963503, | |
| "grad_norm": 3.0796680450439453, | |
| "learning_rate": 2.6401869158878506e-05, | |
| "loss": 0.3171, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.795620437956204, | |
| "grad_norm": 4.170633316040039, | |
| "learning_rate": 2.5233644859813084e-05, | |
| "loss": 0.3299, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.9416058394160585, | |
| "grad_norm": 9.109073638916016, | |
| "learning_rate": 2.4065420560747666e-05, | |
| "loss": 0.3365, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8700435480174192, | |
| "eval_loss": 0.3332568407058716, | |
| "eval_runtime": 36.1676, | |
| "eval_samples_per_second": 120.633, | |
| "eval_steps_per_second": 1.908, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 4.087591240875913, | |
| "grad_norm": 4.610809803009033, | |
| "learning_rate": 2.2897196261682244e-05, | |
| "loss": 0.3744, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.233576642335766, | |
| "grad_norm": 4.210699558258057, | |
| "learning_rate": 2.1728971962616822e-05, | |
| "loss": 0.3022, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.37956204379562, | |
| "grad_norm": 4.85805082321167, | |
| "learning_rate": 2.05607476635514e-05, | |
| "loss": 0.2934, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.525547445255475, | |
| "grad_norm": 3.4125349521636963, | |
| "learning_rate": 1.9392523364485982e-05, | |
| "loss": 0.312, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.671532846715328, | |
| "grad_norm": 4.196589946746826, | |
| "learning_rate": 1.822429906542056e-05, | |
| "loss": 0.2963, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.817518248175182, | |
| "grad_norm": 3.81192684173584, | |
| "learning_rate": 1.705607476635514e-05, | |
| "loss": 0.3019, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.963503649635037, | |
| "grad_norm": 3.5110998153686523, | |
| "learning_rate": 1.588785046728972e-05, | |
| "loss": 0.2976, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.992700729927007, | |
| "eval_accuracy": 0.8837955535182214, | |
| "eval_loss": 0.30171430110931396, | |
| "eval_runtime": 35.5004, | |
| "eval_samples_per_second": 122.9, | |
| "eval_steps_per_second": 1.944, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 5.109489051094891, | |
| "grad_norm": 6.99758768081665, | |
| "learning_rate": 1.4719626168224299e-05, | |
| "loss": 0.259, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.255474452554744, | |
| "grad_norm": 4.643028736114502, | |
| "learning_rate": 1.3551401869158877e-05, | |
| "loss": 0.2668, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.401459854014599, | |
| "grad_norm": 3.8736965656280518, | |
| "learning_rate": 1.2383177570093459e-05, | |
| "loss": 0.278, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.547445255474453, | |
| "grad_norm": 5.259199142456055, | |
| "learning_rate": 1.1214953271028037e-05, | |
| "loss": 0.2677, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.693430656934306, | |
| "grad_norm": 3.9390275478363037, | |
| "learning_rate": 1.0046728971962617e-05, | |
| "loss": 0.2664, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.839416058394161, | |
| "grad_norm": 4.5700507164001465, | |
| "learning_rate": 8.878504672897196e-06, | |
| "loss": 0.2745, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.985401459854015, | |
| "grad_norm": 3.543813705444336, | |
| "learning_rate": 7.710280373831776e-06, | |
| "loss": 0.2611, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8810451524180609, | |
| "eval_loss": 0.3118920624256134, | |
| "eval_runtime": 35.1336, | |
| "eval_samples_per_second": 124.183, | |
| "eval_steps_per_second": 1.964, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 6.131386861313868, | |
| "grad_norm": 4.951327800750732, | |
| "learning_rate": 6.542056074766355e-06, | |
| "loss": 0.2515, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.2773722627737225, | |
| "grad_norm": 9.216889381408691, | |
| "learning_rate": 5.373831775700935e-06, | |
| "loss": 0.2588, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.423357664233577, | |
| "grad_norm": 4.5815606117248535, | |
| "learning_rate": 4.205607476635514e-06, | |
| "loss": 0.2531, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.569343065693431, | |
| "grad_norm": 2.8593881130218506, | |
| "learning_rate": 3.0373831775700936e-06, | |
| "loss": 0.2453, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.7153284671532845, | |
| "grad_norm": 5.8815598487854, | |
| "learning_rate": 1.8691588785046728e-06, | |
| "loss": 0.2746, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.861313868613139, | |
| "grad_norm": 7.535807132720947, | |
| "learning_rate": 7.009345794392523e-07, | |
| "loss": 0.255, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 6.948905109489051, | |
| "eval_accuracy": 0.8819619527847811, | |
| "eval_loss": 0.3084806203842163, | |
| "eval_runtime": 35.4691, | |
| "eval_samples_per_second": 123.008, | |
| "eval_steps_per_second": 1.945, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 6.948905109489051, | |
| "step": 476, | |
| "total_flos": 9.517373864500433e+18, | |
| "train_loss": 0.3870567884765753, | |
| "train_runtime": 2171.3346, | |
| "train_samples_per_second": 56.33, | |
| "train_steps_per_second": 0.219 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 476, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.517373864500433e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |