| { | |
| "best_metric": 0.8693982074263764, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-MM_Classification/checkpoint-361", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 380, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 3.3412868976593018, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 1.0476, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6530089628681178, | |
| "eval_loss": 0.7707358002662659, | |
| "eval_runtime": 104.799, | |
| "eval_samples_per_second": 7.452, | |
| "eval_steps_per_second": 0.067, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 3.460216999053955, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.7869, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 3.0562336444854736, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.6226, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8104993597951344, | |
| "eval_loss": 0.47430700063705444, | |
| "eval_runtime": 75.387, | |
| "eval_samples_per_second": 10.36, | |
| "eval_steps_per_second": 0.093, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 3.6400160789489746, | |
| "learning_rate": 4.970760233918128e-05, | |
| "loss": 0.5102, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 2.88069486618042, | |
| "learning_rate": 4.824561403508772e-05, | |
| "loss": 0.4477, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8322663252240717, | |
| "eval_loss": 0.41332316398620605, | |
| "eval_runtime": 75.6871, | |
| "eval_samples_per_second": 10.319, | |
| "eval_steps_per_second": 0.092, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 2.5295557975769043, | |
| "learning_rate": 4.678362573099415e-05, | |
| "loss": 0.4205, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 2.7619762420654297, | |
| "learning_rate": 4.5321637426900585e-05, | |
| "loss": 0.3963, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8476312419974392, | |
| "eval_loss": 0.38133054971694946, | |
| "eval_runtime": 75.5986, | |
| "eval_samples_per_second": 10.331, | |
| "eval_steps_per_second": 0.093, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 2.971311330795288, | |
| "learning_rate": 4.3859649122807014e-05, | |
| "loss": 0.3788, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 3.20125412940979, | |
| "learning_rate": 4.239766081871345e-05, | |
| "loss": 0.3694, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8540332906530089, | |
| "eval_loss": 0.37533658742904663, | |
| "eval_runtime": 74.9213, | |
| "eval_samples_per_second": 10.424, | |
| "eval_steps_per_second": 0.093, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 5.2631578947368425, | |
| "grad_norm": 3.091187000274658, | |
| "learning_rate": 4.093567251461988e-05, | |
| "loss": 0.3499, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.7894736842105265, | |
| "grad_norm": 3.088123321533203, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.3451, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8489116517285531, | |
| "eval_loss": 0.3586506247520447, | |
| "eval_runtime": 75.7408, | |
| "eval_samples_per_second": 10.311, | |
| "eval_steps_per_second": 0.092, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 2.5398190021514893, | |
| "learning_rate": 3.8011695906432746e-05, | |
| "loss": 0.3421, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 6.842105263157895, | |
| "grad_norm": 3.2862489223480225, | |
| "learning_rate": 3.654970760233918e-05, | |
| "loss": 0.3382, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.3531467616558075, | |
| "eval_runtime": 75.2863, | |
| "eval_samples_per_second": 10.374, | |
| "eval_steps_per_second": 0.093, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 7.368421052631579, | |
| "grad_norm": 2.175835609436035, | |
| "learning_rate": 3.508771929824561e-05, | |
| "loss": 0.3397, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 7.894736842105263, | |
| "grad_norm": 2.2023236751556396, | |
| "learning_rate": 3.362573099415205e-05, | |
| "loss": 0.3253, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8578745198463509, | |
| "eval_loss": 0.34979528188705444, | |
| "eval_runtime": 75.6534, | |
| "eval_samples_per_second": 10.323, | |
| "eval_steps_per_second": 0.093, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 8.421052631578947, | |
| "grad_norm": 3.327239513397217, | |
| "learning_rate": 3.216374269005848e-05, | |
| "loss": 0.3156, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 8.947368421052632, | |
| "grad_norm": 3.0708658695220947, | |
| "learning_rate": 3.0701754385964913e-05, | |
| "loss": 0.3121, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8578745198463509, | |
| "eval_loss": 0.34373539686203003, | |
| "eval_runtime": 74.9284, | |
| "eval_samples_per_second": 10.423, | |
| "eval_steps_per_second": 0.093, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 9.473684210526315, | |
| "grad_norm": 2.4968678951263428, | |
| "learning_rate": 2.9239766081871346e-05, | |
| "loss": 0.3042, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.865316152572632, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.2855, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8655569782330346, | |
| "eval_loss": 0.3446912467479706, | |
| "eval_runtime": 75.4844, | |
| "eval_samples_per_second": 10.347, | |
| "eval_steps_per_second": 0.093, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 10.526315789473685, | |
| "grad_norm": 3.000030279159546, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.2961, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8617157490396927, | |
| "eval_loss": 0.3350251019001007, | |
| "eval_runtime": 74.9625, | |
| "eval_samples_per_second": 10.419, | |
| "eval_steps_per_second": 0.093, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 11.052631578947368, | |
| "grad_norm": 3.1556169986724854, | |
| "learning_rate": 2.485380116959064e-05, | |
| "loss": 0.291, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 11.578947368421053, | |
| "grad_norm": 2.82590389251709, | |
| "learning_rate": 2.3391812865497074e-05, | |
| "loss": 0.273, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8565941101152369, | |
| "eval_loss": 0.34841132164001465, | |
| "eval_runtime": 75.6499, | |
| "eval_samples_per_second": 10.324, | |
| "eval_steps_per_second": 0.093, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 12.105263157894736, | |
| "grad_norm": 2.110739231109619, | |
| "learning_rate": 2.1929824561403507e-05, | |
| "loss": 0.2767, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 12.631578947368421, | |
| "grad_norm": 2.7739641666412354, | |
| "learning_rate": 2.046783625730994e-05, | |
| "loss": 0.2745, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8604353393085787, | |
| "eval_loss": 0.34332236647605896, | |
| "eval_runtime": 75.3714, | |
| "eval_samples_per_second": 10.362, | |
| "eval_steps_per_second": 0.093, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 13.157894736842104, | |
| "grad_norm": 2.834440231323242, | |
| "learning_rate": 1.9005847953216373e-05, | |
| "loss": 0.2678, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 13.68421052631579, | |
| "grad_norm": 4.303690433502197, | |
| "learning_rate": 1.7543859649122806e-05, | |
| "loss": 0.2613, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8642765685019206, | |
| "eval_loss": 0.34982678294181824, | |
| "eval_runtime": 76.0549, | |
| "eval_samples_per_second": 10.269, | |
| "eval_steps_per_second": 0.092, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 14.210526315789474, | |
| "grad_norm": 3.9326910972595215, | |
| "learning_rate": 1.608187134502924e-05, | |
| "loss": 0.2713, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 14.736842105263158, | |
| "grad_norm": 3.0511579513549805, | |
| "learning_rate": 1.4619883040935673e-05, | |
| "loss": 0.2527, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8578745198463509, | |
| "eval_loss": 0.33652085065841675, | |
| "eval_runtime": 78.4572, | |
| "eval_samples_per_second": 9.954, | |
| "eval_steps_per_second": 0.089, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 15.263157894736842, | |
| "grad_norm": 2.9660778045654297, | |
| "learning_rate": 1.3157894736842106e-05, | |
| "loss": 0.246, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 15.789473684210526, | |
| "grad_norm": 2.621548652648926, | |
| "learning_rate": 1.1695906432748537e-05, | |
| "loss": 0.2619, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8617157490396927, | |
| "eval_loss": 0.3450033366680145, | |
| "eval_runtime": 78.025, | |
| "eval_samples_per_second": 10.01, | |
| "eval_steps_per_second": 0.09, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 16.31578947368421, | |
| "grad_norm": 2.7999181747436523, | |
| "learning_rate": 1.023391812865497e-05, | |
| "loss": 0.2469, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 16.842105263157894, | |
| "grad_norm": 2.3670365810394287, | |
| "learning_rate": 8.771929824561403e-06, | |
| "loss": 0.2436, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8681177976952625, | |
| "eval_loss": 0.34535887837409973, | |
| "eval_runtime": 78.3498, | |
| "eval_samples_per_second": 9.968, | |
| "eval_steps_per_second": 0.089, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 17.36842105263158, | |
| "grad_norm": 2.8293299674987793, | |
| "learning_rate": 7.3099415204678366e-06, | |
| "loss": 0.2457, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 17.894736842105264, | |
| "grad_norm": 2.825676441192627, | |
| "learning_rate": 5.8479532163742686e-06, | |
| "loss": 0.2518, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8681177976952625, | |
| "eval_loss": 0.3437488377094269, | |
| "eval_runtime": 77.0059, | |
| "eval_samples_per_second": 10.142, | |
| "eval_steps_per_second": 0.091, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 18.42105263157895, | |
| "grad_norm": 2.9020776748657227, | |
| "learning_rate": 4.3859649122807014e-06, | |
| "loss": 0.2362, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 18.94736842105263, | |
| "grad_norm": 2.6419830322265625, | |
| "learning_rate": 2.9239766081871343e-06, | |
| "loss": 0.243, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8693982074263764, | |
| "eval_loss": 0.34680071473121643, | |
| "eval_runtime": 76.7708, | |
| "eval_samples_per_second": 10.173, | |
| "eval_steps_per_second": 0.091, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 19.473684210526315, | |
| "grad_norm": 2.185123920440674, | |
| "learning_rate": 1.4619883040935671e-06, | |
| "loss": 0.2452, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 4.370210647583008, | |
| "learning_rate": 0.0, | |
| "loss": 0.2415, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8693982074263764, | |
| "eval_loss": 0.3454751670360565, | |
| "eval_runtime": 76.6964, | |
| "eval_samples_per_second": 10.183, | |
| "eval_steps_per_second": 0.091, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 380, | |
| "total_flos": 4.783917310653358e+18, | |
| "train_loss": 0.3464228366550646, | |
| "train_runtime": 17694.0061, | |
| "train_samples_per_second": 10.877, | |
| "train_steps_per_second": 0.021 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 380, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.783917310653358e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |