| { |
| "best_metric": 0.958904109589041, |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV3/checkpoint-525", |
| "epoch": 28.585365853658537, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 12.429728507995605, |
| "learning_rate": 9.999999999999999e-06, |
| "loss": 4.7086, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 15.98269271850586, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 3.9845, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.3424657534246575, |
| "eval_loss": 1.6832486391067505, |
| "eval_runtime": 0.6873, |
| "eval_samples_per_second": 106.209, |
| "eval_steps_per_second": 7.275, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.4390243902439024, |
| "grad_norm": 19.85782814025879, |
| "learning_rate": 3e-05, |
| "loss": 2.9173, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.9268292682926829, |
| "grad_norm": 23.9186954498291, |
| "learning_rate": 2.9473684210526314e-05, |
| "loss": 2.4369, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.4383561643835616, |
| "eval_loss": 1.1981052160263062, |
| "eval_runtime": 0.7433, |
| "eval_samples_per_second": 98.212, |
| "eval_steps_per_second": 6.727, |
| "step": 42 |
| }, |
| { |
| "epoch": 2.3902439024390243, |
| "grad_norm": 39.31980895996094, |
| "learning_rate": 2.8947368421052634e-05, |
| "loss": 1.9976, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.8780487804878048, |
| "grad_norm": 34.83562469482422, |
| "learning_rate": 2.8421052631578946e-05, |
| "loss": 1.7752, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6301369863013698, |
| "eval_loss": 0.8411616086959839, |
| "eval_runtime": 0.7016, |
| "eval_samples_per_second": 104.055, |
| "eval_steps_per_second": 7.127, |
| "step": 63 |
| }, |
| { |
| "epoch": 3.341463414634146, |
| "grad_norm": 24.540483474731445, |
| "learning_rate": 2.7894736842105263e-05, |
| "loss": 1.5326, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.8292682926829267, |
| "grad_norm": 33.162715911865234, |
| "learning_rate": 2.736842105263158e-05, |
| "loss": 1.3772, |
| "step": 80 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7123287671232876, |
| "eval_loss": 0.7895165681838989, |
| "eval_runtime": 0.6266, |
| "eval_samples_per_second": 116.511, |
| "eval_steps_per_second": 7.98, |
| "step": 84 |
| }, |
| { |
| "epoch": 4.2926829268292686, |
| "grad_norm": 21.98711585998535, |
| "learning_rate": 2.6842105263157896e-05, |
| "loss": 1.414, |
| "step": 90 |
| }, |
| { |
| "epoch": 4.780487804878049, |
| "grad_norm": 27.87204360961914, |
| "learning_rate": 2.631578947368421e-05, |
| "loss": 1.1556, |
| "step": 100 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7808219178082192, |
| "eval_loss": 0.7384896874427795, |
| "eval_runtime": 0.6327, |
| "eval_samples_per_second": 115.385, |
| "eval_steps_per_second": 7.903, |
| "step": 105 |
| }, |
| { |
| "epoch": 5.2439024390243905, |
| "grad_norm": 24.45848274230957, |
| "learning_rate": 2.578947368421053e-05, |
| "loss": 1.0818, |
| "step": 110 |
| }, |
| { |
| "epoch": 5.7317073170731705, |
| "grad_norm": 16.963436126708984, |
| "learning_rate": 2.526315789473684e-05, |
| "loss": 1.0059, |
| "step": 120 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8082191780821918, |
| "eval_loss": 0.6626368165016174, |
| "eval_runtime": 0.6356, |
| "eval_samples_per_second": 114.844, |
| "eval_steps_per_second": 7.866, |
| "step": 126 |
| }, |
| { |
| "epoch": 6.195121951219512, |
| "grad_norm": 45.89384460449219, |
| "learning_rate": 2.4736842105263158e-05, |
| "loss": 0.9054, |
| "step": 130 |
| }, |
| { |
| "epoch": 6.682926829268292, |
| "grad_norm": 27.633718490600586, |
| "learning_rate": 2.4210526315789474e-05, |
| "loss": 0.8598, |
| "step": 140 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7808219178082192, |
| "eval_loss": 0.5402742624282837, |
| "eval_runtime": 0.6334, |
| "eval_samples_per_second": 115.247, |
| "eval_steps_per_second": 7.894, |
| "step": 147 |
| }, |
| { |
| "epoch": 7.146341463414634, |
| "grad_norm": 25.943758010864258, |
| "learning_rate": 2.368421052631579e-05, |
| "loss": 0.8268, |
| "step": 150 |
| }, |
| { |
| "epoch": 7.634146341463414, |
| "grad_norm": 37.037078857421875, |
| "learning_rate": 2.3157894736842103e-05, |
| "loss": 0.8724, |
| "step": 160 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.821917808219178, |
| "eval_loss": 0.5519894361495972, |
| "eval_runtime": 0.6379, |
| "eval_samples_per_second": 114.432, |
| "eval_steps_per_second": 7.838, |
| "step": 168 |
| }, |
| { |
| "epoch": 8.097560975609756, |
| "grad_norm": 22.22051429748535, |
| "learning_rate": 2.2631578947368423e-05, |
| "loss": 0.7427, |
| "step": 170 |
| }, |
| { |
| "epoch": 8.585365853658537, |
| "grad_norm": 33.7209587097168, |
| "learning_rate": 2.2105263157894736e-05, |
| "loss": 0.7096, |
| "step": 180 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8356164383561644, |
| "eval_loss": 0.5182141661643982, |
| "eval_runtime": 0.7436, |
| "eval_samples_per_second": 98.169, |
| "eval_steps_per_second": 6.724, |
| "step": 189 |
| }, |
| { |
| "epoch": 9.048780487804878, |
| "grad_norm": 17.677778244018555, |
| "learning_rate": 2.1578947368421053e-05, |
| "loss": 0.5608, |
| "step": 190 |
| }, |
| { |
| "epoch": 9.536585365853659, |
| "grad_norm": 34.34571075439453, |
| "learning_rate": 2.105263157894737e-05, |
| "loss": 0.5748, |
| "step": 200 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 9.038191795349121, |
| "learning_rate": 2.0526315789473685e-05, |
| "loss": 0.5038, |
| "step": 210 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8493150684931506, |
| "eval_loss": 0.4132954180240631, |
| "eval_runtime": 0.6251, |
| "eval_samples_per_second": 116.788, |
| "eval_steps_per_second": 7.999, |
| "step": 210 |
| }, |
| { |
| "epoch": 10.487804878048781, |
| "grad_norm": 31.543596267700195, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 0.5488, |
| "step": 220 |
| }, |
| { |
| "epoch": 10.975609756097562, |
| "grad_norm": 23.798019409179688, |
| "learning_rate": 1.9473684210526318e-05, |
| "loss": 0.4951, |
| "step": 230 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8767123287671232, |
| "eval_loss": 0.3548040986061096, |
| "eval_runtime": 0.6389, |
| "eval_samples_per_second": 114.257, |
| "eval_steps_per_second": 7.826, |
| "step": 231 |
| }, |
| { |
| "epoch": 11.439024390243903, |
| "grad_norm": 16.509632110595703, |
| "learning_rate": 1.894736842105263e-05, |
| "loss": 0.5018, |
| "step": 240 |
| }, |
| { |
| "epoch": 11.926829268292684, |
| "grad_norm": 17.9029598236084, |
| "learning_rate": 1.8421052631578947e-05, |
| "loss": 0.4692, |
| "step": 250 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8493150684931506, |
| "eval_loss": 0.38450247049331665, |
| "eval_runtime": 0.6355, |
| "eval_samples_per_second": 114.878, |
| "eval_steps_per_second": 7.868, |
| "step": 252 |
| }, |
| { |
| "epoch": 12.390243902439025, |
| "grad_norm": 29.795612335205078, |
| "learning_rate": 1.7894736842105264e-05, |
| "loss": 0.5316, |
| "step": 260 |
| }, |
| { |
| "epoch": 12.878048780487806, |
| "grad_norm": 14.258842468261719, |
| "learning_rate": 1.736842105263158e-05, |
| "loss": 0.5339, |
| "step": 270 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8904109589041096, |
| "eval_loss": 0.3178386390209198, |
| "eval_runtime": 0.6456, |
| "eval_samples_per_second": 113.071, |
| "eval_steps_per_second": 7.745, |
| "step": 273 |
| }, |
| { |
| "epoch": 13.341463414634147, |
| "grad_norm": 28.944801330566406, |
| "learning_rate": 1.6842105263157893e-05, |
| "loss": 0.5021, |
| "step": 280 |
| }, |
| { |
| "epoch": 13.829268292682928, |
| "grad_norm": 19.169776916503906, |
| "learning_rate": 1.6315789473684213e-05, |
| "loss": 0.4536, |
| "step": 290 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8904109589041096, |
| "eval_loss": 0.3252336084842682, |
| "eval_runtime": 0.6543, |
| "eval_samples_per_second": 111.574, |
| "eval_steps_per_second": 7.642, |
| "step": 294 |
| }, |
| { |
| "epoch": 14.292682926829269, |
| "grad_norm": 19.354726791381836, |
| "learning_rate": 1.5789473684210526e-05, |
| "loss": 0.3055, |
| "step": 300 |
| }, |
| { |
| "epoch": 14.78048780487805, |
| "grad_norm": 18.675071716308594, |
| "learning_rate": 1.5263157894736842e-05, |
| "loss": 0.4369, |
| "step": 310 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8904109589041096, |
| "eval_loss": 0.27849265933036804, |
| "eval_runtime": 0.6484, |
| "eval_samples_per_second": 112.585, |
| "eval_steps_per_second": 7.711, |
| "step": 315 |
| }, |
| { |
| "epoch": 15.24390243902439, |
| "grad_norm": 29.19399642944336, |
| "learning_rate": 1.4736842105263157e-05, |
| "loss": 0.447, |
| "step": 320 |
| }, |
| { |
| "epoch": 15.731707317073171, |
| "grad_norm": 5.669158935546875, |
| "learning_rate": 1.4210526315789473e-05, |
| "loss": 0.3941, |
| "step": 330 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9041095890410958, |
| "eval_loss": 0.28995245695114136, |
| "eval_runtime": 0.6348, |
| "eval_samples_per_second": 114.996, |
| "eval_steps_per_second": 7.876, |
| "step": 336 |
| }, |
| { |
| "epoch": 16.195121951219512, |
| "grad_norm": 21.81118392944336, |
| "learning_rate": 1.368421052631579e-05, |
| "loss": 0.3228, |
| "step": 340 |
| }, |
| { |
| "epoch": 16.682926829268293, |
| "grad_norm": 34.80079650878906, |
| "learning_rate": 1.3157894736842104e-05, |
| "loss": 0.4363, |
| "step": 350 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.863013698630137, |
| "eval_loss": 0.3426441252231598, |
| "eval_runtime": 0.6573, |
| "eval_samples_per_second": 111.053, |
| "eval_steps_per_second": 7.606, |
| "step": 357 |
| }, |
| { |
| "epoch": 17.146341463414632, |
| "grad_norm": 25.49156379699707, |
| "learning_rate": 1.263157894736842e-05, |
| "loss": 0.3729, |
| "step": 360 |
| }, |
| { |
| "epoch": 17.634146341463413, |
| "grad_norm": 25.156068801879883, |
| "learning_rate": 1.2105263157894737e-05, |
| "loss": 0.2819, |
| "step": 370 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9041095890410958, |
| "eval_loss": 0.283920019865036, |
| "eval_runtime": 0.7572, |
| "eval_samples_per_second": 96.41, |
| "eval_steps_per_second": 6.603, |
| "step": 378 |
| }, |
| { |
| "epoch": 18.097560975609756, |
| "grad_norm": 36.39301300048828, |
| "learning_rate": 1.1578947368421052e-05, |
| "loss": 0.3253, |
| "step": 380 |
| }, |
| { |
| "epoch": 18.585365853658537, |
| "grad_norm": 24.839868545532227, |
| "learning_rate": 1.1052631578947368e-05, |
| "loss": 0.361, |
| "step": 390 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9041095890410958, |
| "eval_loss": 0.22234712541103363, |
| "eval_runtime": 0.7512, |
| "eval_samples_per_second": 97.175, |
| "eval_steps_per_second": 6.656, |
| "step": 399 |
| }, |
| { |
| "epoch": 19.048780487804876, |
| "grad_norm": 22.352935791015625, |
| "learning_rate": 1.0526315789473684e-05, |
| "loss": 0.3204, |
| "step": 400 |
| }, |
| { |
| "epoch": 19.536585365853657, |
| "grad_norm": 12.527485847473145, |
| "learning_rate": 9.999999999999999e-06, |
| "loss": 0.2965, |
| "step": 410 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 16.140165328979492, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.1857, |
| "step": 420 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9178082191780822, |
| "eval_loss": 0.25217577815055847, |
| "eval_runtime": 0.6292, |
| "eval_samples_per_second": 116.026, |
| "eval_steps_per_second": 7.947, |
| "step": 420 |
| }, |
| { |
| "epoch": 20.48780487804878, |
| "grad_norm": 12.794975280761719, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.1904, |
| "step": 430 |
| }, |
| { |
| "epoch": 20.975609756097562, |
| "grad_norm": 32.452125549316406, |
| "learning_rate": 8.421052631578947e-06, |
| "loss": 0.3161, |
| "step": 440 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.9178082191780822, |
| "eval_loss": 0.21637919545173645, |
| "eval_runtime": 0.6487, |
| "eval_samples_per_second": 112.528, |
| "eval_steps_per_second": 7.707, |
| "step": 441 |
| }, |
| { |
| "epoch": 21.4390243902439, |
| "grad_norm": 17.289514541625977, |
| "learning_rate": 7.894736842105263e-06, |
| "loss": 0.2523, |
| "step": 450 |
| }, |
| { |
| "epoch": 21.926829268292682, |
| "grad_norm": 42.501861572265625, |
| "learning_rate": 7.3684210526315784e-06, |
| "loss": 0.3273, |
| "step": 460 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.22238127887248993, |
| "eval_runtime": 0.6384, |
| "eval_samples_per_second": 114.34, |
| "eval_steps_per_second": 7.831, |
| "step": 462 |
| }, |
| { |
| "epoch": 22.390243902439025, |
| "grad_norm": 32.69398498535156, |
| "learning_rate": 6.842105263157895e-06, |
| "loss": 0.1996, |
| "step": 470 |
| }, |
| { |
| "epoch": 22.878048780487806, |
| "grad_norm": 21.498504638671875, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.3458, |
| "step": 480 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.9452054794520548, |
| "eval_loss": 0.21988777816295624, |
| "eval_runtime": 0.7497, |
| "eval_samples_per_second": 97.368, |
| "eval_steps_per_second": 6.669, |
| "step": 483 |
| }, |
| { |
| "epoch": 23.341463414634145, |
| "grad_norm": 8.661595344543457, |
| "learning_rate": 5.789473684210526e-06, |
| "loss": 0.2248, |
| "step": 490 |
| }, |
| { |
| "epoch": 23.829268292682926, |
| "grad_norm": 23.505203247070312, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.337, |
| "step": 500 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.23766528069972992, |
| "eval_runtime": 0.768, |
| "eval_samples_per_second": 95.058, |
| "eval_steps_per_second": 6.511, |
| "step": 504 |
| }, |
| { |
| "epoch": 24.29268292682927, |
| "grad_norm": 13.873770713806152, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.201, |
| "step": 510 |
| }, |
| { |
| "epoch": 24.78048780487805, |
| "grad_norm": 16.176956176757812, |
| "learning_rate": 4.210526315789473e-06, |
| "loss": 0.1801, |
| "step": 520 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.958904109589041, |
| "eval_loss": 0.20672719180583954, |
| "eval_runtime": 0.6363, |
| "eval_samples_per_second": 114.732, |
| "eval_steps_per_second": 7.858, |
| "step": 525 |
| }, |
| { |
| "epoch": 25.24390243902439, |
| "grad_norm": 39.25809097290039, |
| "learning_rate": 3.6842105263157892e-06, |
| "loss": 0.2077, |
| "step": 530 |
| }, |
| { |
| "epoch": 25.73170731707317, |
| "grad_norm": 46.6181755065918, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.3283, |
| "step": 540 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.24006159603595734, |
| "eval_runtime": 0.6339, |
| "eval_samples_per_second": 115.158, |
| "eval_steps_per_second": 7.888, |
| "step": 546 |
| }, |
| { |
| "epoch": 26.195121951219512, |
| "grad_norm": 10.685463905334473, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 0.3054, |
| "step": 550 |
| }, |
| { |
| "epoch": 26.682926829268293, |
| "grad_norm": 28.955364227294922, |
| "learning_rate": 2.1052631578947366e-06, |
| "loss": 0.2211, |
| "step": 560 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.21667610108852386, |
| "eval_runtime": 0.6327, |
| "eval_samples_per_second": 115.37, |
| "eval_steps_per_second": 7.902, |
| "step": 567 |
| }, |
| { |
| "epoch": 27.146341463414632, |
| "grad_norm": 20.726945877075195, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 0.2556, |
| "step": 570 |
| }, |
| { |
| "epoch": 27.634146341463413, |
| "grad_norm": 6.582526206970215, |
| "learning_rate": 1.0526315789473683e-06, |
| "loss": 0.1783, |
| "step": 580 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.21801576018333435, |
| "eval_runtime": 0.6499, |
| "eval_samples_per_second": 112.334, |
| "eval_steps_per_second": 7.694, |
| "step": 588 |
| }, |
| { |
| "epoch": 28.097560975609756, |
| "grad_norm": 16.22028923034668, |
| "learning_rate": 5.263157894736842e-07, |
| "loss": 0.1982, |
| "step": 590 |
| }, |
| { |
| "epoch": 28.585365853658537, |
| "grad_norm": 15.770591735839844, |
| "learning_rate": 0.0, |
| "loss": 0.2783, |
| "step": 600 |
| }, |
| { |
| "epoch": 28.585365853658537, |
| "eval_accuracy": 0.9315068493150684, |
| "eval_loss": 0.22231744229793549, |
| "eval_runtime": 0.8149, |
| "eval_samples_per_second": 89.581, |
| "eval_steps_per_second": 6.136, |
| "step": 600 |
| }, |
| { |
| "epoch": 28.585365853658537, |
| "step": 600, |
| "total_flos": 6.102198151010058e+17, |
| "train_loss": 0.7502146526177724, |
| "train_runtime": 483.2384, |
| "train_samples_per_second": 40.725, |
| "train_steps_per_second": 1.242 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.102198151010058e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|