| { | |
| "best_metric": 0.8425624321389794, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-Mid-NonMidMarket-Classification/checkpoint-453", | |
| "epoch": 9.884169884169884, | |
| "eval_steps": 500, | |
| "global_step": 640, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15444015444015444, | |
| "grad_norm": 6.145582675933838, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 1.1143, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3088803088803089, | |
| "grad_norm": 4.804866313934326, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.9049, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.46332046332046334, | |
| "grad_norm": 4.023855686187744, | |
| "learning_rate": 2.34375e-05, | |
| "loss": 0.8321, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6177606177606177, | |
| "grad_norm": 5.751364707946777, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.7323, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7722007722007722, | |
| "grad_norm": 4.711376667022705, | |
| "learning_rate": 3.90625e-05, | |
| "loss": 0.6296, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9266409266409267, | |
| "grad_norm": 5.55043363571167, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.5809, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9884169884169884, | |
| "eval_accuracy": 0.7937024972855592, | |
| "eval_loss": 0.5023894309997559, | |
| "eval_runtime": 107.4922, | |
| "eval_samples_per_second": 8.568, | |
| "eval_steps_per_second": 0.27, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 4.052460670471191, | |
| "learning_rate": 4.947916666666667e-05, | |
| "loss": 0.5564, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2355212355212355, | |
| "grad_norm": 4.5774359703063965, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.5529, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3899613899613898, | |
| "grad_norm": 7.030020713806152, | |
| "learning_rate": 4.774305555555556e-05, | |
| "loss": 0.5762, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.5444015444015444, | |
| "grad_norm": 4.231429100036621, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.5017, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.698841698841699, | |
| "grad_norm": 3.0762908458709717, | |
| "learning_rate": 4.6006944444444444e-05, | |
| "loss": 0.5173, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8532818532818531, | |
| "grad_norm": 3.419095039367676, | |
| "learning_rate": 4.5138888888888894e-05, | |
| "loss": 0.5326, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.9922779922779923, | |
| "eval_accuracy": 0.8132464712269273, | |
| "eval_loss": 0.4402076005935669, | |
| "eval_runtime": 106.2614, | |
| "eval_samples_per_second": 8.667, | |
| "eval_steps_per_second": 0.273, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.0077220077220077, | |
| "grad_norm": 4.168376445770264, | |
| "learning_rate": 4.4270833333333337e-05, | |
| "loss": 0.5082, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 4.488511562347412, | |
| "learning_rate": 4.340277777777778e-05, | |
| "loss": 0.4642, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.3166023166023164, | |
| "grad_norm": 3.359255313873291, | |
| "learning_rate": 4.253472222222222e-05, | |
| "loss": 0.4952, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.471042471042471, | |
| "grad_norm": 4.701355457305908, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.4925, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.6254826254826256, | |
| "grad_norm": 4.631283283233643, | |
| "learning_rate": 4.0798611111111115e-05, | |
| "loss": 0.4795, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.7799227799227797, | |
| "grad_norm": 4.823609352111816, | |
| "learning_rate": 3.993055555555556e-05, | |
| "loss": 0.4878, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.9343629343629343, | |
| "grad_norm": 3.890035390853882, | |
| "learning_rate": 3.90625e-05, | |
| "loss": 0.4626, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.9961389961389964, | |
| "eval_accuracy": 0.8284473398479913, | |
| "eval_loss": 0.4243987202644348, | |
| "eval_runtime": 107.2966, | |
| "eval_samples_per_second": 8.584, | |
| "eval_steps_per_second": 0.27, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 3.088803088803089, | |
| "grad_norm": 3.165153741836548, | |
| "learning_rate": 3.8194444444444444e-05, | |
| "loss": 0.4434, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 3.860111713409424, | |
| "learning_rate": 3.7326388888888893e-05, | |
| "loss": 0.4383, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.3976833976833976, | |
| "grad_norm": 7.453273296356201, | |
| "learning_rate": 3.6458333333333336e-05, | |
| "loss": 0.4432, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.552123552123552, | |
| "grad_norm": 5.200356483459473, | |
| "learning_rate": 3.559027777777778e-05, | |
| "loss": 0.4766, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.7065637065637067, | |
| "grad_norm": 4.4838433265686035, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.4428, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.861003861003861, | |
| "grad_norm": 3.7621071338653564, | |
| "learning_rate": 3.385416666666667e-05, | |
| "loss": 0.4778, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8273615635179153, | |
| "eval_loss": 0.42337119579315186, | |
| "eval_runtime": 106.8233, | |
| "eval_samples_per_second": 8.622, | |
| "eval_steps_per_second": 0.271, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 4.015444015444015, | |
| "grad_norm": 4.093021869659424, | |
| "learning_rate": 3.2986111111111115e-05, | |
| "loss": 0.4306, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.1698841698841695, | |
| "grad_norm": 3.669459581375122, | |
| "learning_rate": 3.211805555555556e-05, | |
| "loss": 0.4606, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.324324324324325, | |
| "grad_norm": 4.8231892585754395, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.3992, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.478764478764479, | |
| "grad_norm": 4.587674617767334, | |
| "learning_rate": 3.0381944444444444e-05, | |
| "loss": 0.4024, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.633204633204633, | |
| "grad_norm": 9.029142379760742, | |
| "learning_rate": 2.951388888888889e-05, | |
| "loss": 0.4408, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.787644787644788, | |
| "grad_norm": 3.7836337089538574, | |
| "learning_rate": 2.8645833333333333e-05, | |
| "loss": 0.4332, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.942084942084942, | |
| "grad_norm": 5.781239032745361, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.4109, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.988416988416988, | |
| "eval_accuracy": 0.8306188925081434, | |
| "eval_loss": 0.4197309613227844, | |
| "eval_runtime": 106.1724, | |
| "eval_samples_per_second": 8.675, | |
| "eval_steps_per_second": 0.273, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 5.096525096525096, | |
| "grad_norm": 4.933718204498291, | |
| "learning_rate": 2.6909722222222222e-05, | |
| "loss": 0.4144, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.250965250965251, | |
| "grad_norm": 4.699091911315918, | |
| "learning_rate": 2.604166666666667e-05, | |
| "loss": 0.4191, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.405405405405405, | |
| "grad_norm": 5.412081718444824, | |
| "learning_rate": 2.517361111111111e-05, | |
| "loss": 0.4314, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.559845559845559, | |
| "grad_norm": 3.4998362064361572, | |
| "learning_rate": 2.4305555555555558e-05, | |
| "loss": 0.3793, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 3.954893112182617, | |
| "learning_rate": 2.34375e-05, | |
| "loss": 0.3815, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.8687258687258685, | |
| "grad_norm": 4.797443866729736, | |
| "learning_rate": 2.2569444444444447e-05, | |
| "loss": 0.3764, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.992277992277993, | |
| "eval_accuracy": 0.8295331161780674, | |
| "eval_loss": 0.4095376133918762, | |
| "eval_runtime": 106.8074, | |
| "eval_samples_per_second": 8.623, | |
| "eval_steps_per_second": 0.272, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 6.023166023166024, | |
| "grad_norm": 6.05122709274292, | |
| "learning_rate": 2.170138888888889e-05, | |
| "loss": 0.3947, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 6.177606177606178, | |
| "grad_norm": 4.6088666915893555, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.3712, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.332046332046332, | |
| "grad_norm": 3.9029107093811035, | |
| "learning_rate": 1.996527777777778e-05, | |
| "loss": 0.359, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.486486486486487, | |
| "grad_norm": 4.032750129699707, | |
| "learning_rate": 1.9097222222222222e-05, | |
| "loss": 0.4075, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.640926640926641, | |
| "grad_norm": 4.65377140045166, | |
| "learning_rate": 1.8229166666666668e-05, | |
| "loss": 0.3921, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.795366795366795, | |
| "grad_norm": 5.404110908508301, | |
| "learning_rate": 1.736111111111111e-05, | |
| "loss": 0.3905, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.94980694980695, | |
| "grad_norm": 4.541908264160156, | |
| "learning_rate": 1.6493055555555557e-05, | |
| "loss": 0.3725, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.996138996138996, | |
| "eval_accuracy": 0.8425624321389794, | |
| "eval_loss": 0.4046495258808136, | |
| "eval_runtime": 105.8848, | |
| "eval_samples_per_second": 8.698, | |
| "eval_steps_per_second": 0.274, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 7.104247104247104, | |
| "grad_norm": 4.969923496246338, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.258687258687258, | |
| "grad_norm": 4.746829986572266, | |
| "learning_rate": 1.4756944444444445e-05, | |
| "loss": 0.3536, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.413127413127413, | |
| "grad_norm": 6.098570823669434, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.3418, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.5675675675675675, | |
| "grad_norm": 9.79119873046875, | |
| "learning_rate": 1.3020833333333334e-05, | |
| "loss": 0.3769, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.722007722007722, | |
| "grad_norm": 7.424502849578857, | |
| "learning_rate": 1.2152777777777779e-05, | |
| "loss": 0.3598, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.876447876447877, | |
| "grad_norm": 3.9304542541503906, | |
| "learning_rate": 1.1284722222222223e-05, | |
| "loss": 0.3583, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8371335504885994, | |
| "eval_loss": 0.4108859896659851, | |
| "eval_runtime": 107.4011, | |
| "eval_samples_per_second": 8.575, | |
| "eval_steps_per_second": 0.27, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 8.03088803088803, | |
| "grad_norm": 4.0118632316589355, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 0.36, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 8.185328185328185, | |
| "grad_norm": 6.711050510406494, | |
| "learning_rate": 9.548611111111111e-06, | |
| "loss": 0.3427, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 8.339768339768339, | |
| "grad_norm": 4.516994476318359, | |
| "learning_rate": 8.680555555555556e-06, | |
| "loss": 0.3335, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 8.494208494208495, | |
| "grad_norm": 7.737695217132568, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 0.3658, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 8.64864864864865, | |
| "grad_norm": 5.0886759757995605, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.3635, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 8.803088803088803, | |
| "grad_norm": 3.6143264770507812, | |
| "learning_rate": 6.076388888888889e-06, | |
| "loss": 0.3493, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 8.957528957528957, | |
| "grad_norm": 4.734116554260254, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 0.3451, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 8.988416988416988, | |
| "eval_accuracy": 0.8349619978284474, | |
| "eval_loss": 0.4170722961425781, | |
| "eval_runtime": 108.6012, | |
| "eval_samples_per_second": 8.481, | |
| "eval_steps_per_second": 0.267, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 9.111969111969112, | |
| "grad_norm": 4.281556129455566, | |
| "learning_rate": 4.340277777777778e-06, | |
| "loss": 0.3268, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 9.266409266409266, | |
| "grad_norm": 3.9609580039978027, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 0.371, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.420849420849422, | |
| "grad_norm": 5.559605598449707, | |
| "learning_rate": 2.604166666666667e-06, | |
| "loss": 0.3446, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 9.575289575289576, | |
| "grad_norm": 4.212625980377197, | |
| "learning_rate": 1.7361111111111112e-06, | |
| "loss": 0.3206, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 9.72972972972973, | |
| "grad_norm": 3.9171366691589355, | |
| "learning_rate": 8.680555555555556e-07, | |
| "loss": 0.3654, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 9.884169884169884, | |
| "grad_norm": 5.055610179901123, | |
| "learning_rate": 0.0, | |
| "loss": 0.3351, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 9.884169884169884, | |
| "eval_accuracy": 0.8403908794788274, | |
| "eval_loss": 0.41527804732322693, | |
| "eval_runtime": 106.36, | |
| "eval_samples_per_second": 8.659, | |
| "eval_steps_per_second": 0.273, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 9.884169884169884, | |
| "step": 640, | |
| "total_flos": 2.0360358039744737e+18, | |
| "train_loss": 0.45154881179332734, | |
| "train_runtime": 9581.2043, | |
| "train_samples_per_second": 8.649, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 640, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0360358039744737e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |