| { |
| "best_global_step": 750, |
| "best_metric": 0.9028571428571428, |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV71/checkpoint-750", |
| "epoch": 56.0, |
| "eval_steps": 500, |
| "global_step": 840, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 3.3897862434387207, |
| "learning_rate": 6.428571428571429e-06, |
| "loss": 1.1246, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.41714285714285715, |
| "eval_loss": 1.05587899684906, |
| "eval_runtime": 2.0285, |
| "eval_samples_per_second": 86.269, |
| "eval_steps_per_second": 5.423, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.3389830508474576, |
| "grad_norm": 4.0562825202941895, |
| "learning_rate": 1.3571428571428572e-05, |
| "loss": 1.0075, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.147009372711182, |
| "learning_rate": 2.0714285714285715e-05, |
| "loss": 0.8728, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6971428571428572, |
| "eval_loss": 0.746044933795929, |
| "eval_runtime": 2.3392, |
| "eval_samples_per_second": 74.811, |
| "eval_steps_per_second": 4.702, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.6779661016949152, |
| "grad_norm": 8.447574615478516, |
| "learning_rate": 2.7857142857142858e-05, |
| "loss": 0.6663, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8057142857142857, |
| "eval_loss": 0.4562816619873047, |
| "eval_runtime": 1.9074, |
| "eval_samples_per_second": 91.749, |
| "eval_steps_per_second": 5.767, |
| "step": 45 |
| }, |
| { |
| "epoch": 3.3389830508474576, |
| "grad_norm": 11.307677268981934, |
| "learning_rate": 3.5000000000000004e-05, |
| "loss": 0.5432, |
| "step": 50 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 8.582294464111328, |
| "learning_rate": 4.214285714285714e-05, |
| "loss": 0.4632, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8285714285714286, |
| "eval_loss": 0.40758973360061646, |
| "eval_runtime": 1.8904, |
| "eval_samples_per_second": 92.573, |
| "eval_steps_per_second": 5.819, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.677966101694915, |
| "grad_norm": 9.387711524963379, |
| "learning_rate": 4.928571428571428e-05, |
| "loss": 0.4278, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.84, |
| "eval_loss": 0.3669876456260681, |
| "eval_runtime": 2.6198, |
| "eval_samples_per_second": 66.799, |
| "eval_steps_per_second": 4.199, |
| "step": 75 |
| }, |
| { |
| "epoch": 5.338983050847458, |
| "grad_norm": 8.6371431350708, |
| "learning_rate": 5.642857142857143e-05, |
| "loss": 0.3608, |
| "step": 80 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 11.995199203491211, |
| "learning_rate": 5.96031746031746e-05, |
| "loss": 0.361, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8457142857142858, |
| "eval_loss": 0.36241406202316284, |
| "eval_runtime": 1.9339, |
| "eval_samples_per_second": 90.49, |
| "eval_steps_per_second": 5.688, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.677966101694915, |
| "grad_norm": 10.142115592956543, |
| "learning_rate": 5.880952380952381e-05, |
| "loss": 0.3742, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.3504450023174286, |
| "eval_runtime": 1.8843, |
| "eval_samples_per_second": 92.872, |
| "eval_steps_per_second": 5.838, |
| "step": 105 |
| }, |
| { |
| "epoch": 7.338983050847458, |
| "grad_norm": 11.923420906066895, |
| "learning_rate": 5.801587301587302e-05, |
| "loss": 0.2351, |
| "step": 110 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 4.905794620513916, |
| "learning_rate": 5.722222222222223e-05, |
| "loss": 0.3313, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.2962282598018646, |
| "eval_runtime": 2.4213, |
| "eval_samples_per_second": 72.274, |
| "eval_steps_per_second": 4.543, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.677966101694915, |
| "grad_norm": 8.893570899963379, |
| "learning_rate": 5.642857142857143e-05, |
| "loss": 0.2977, |
| "step": 130 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.33206191658973694, |
| "eval_runtime": 1.9246, |
| "eval_samples_per_second": 90.929, |
| "eval_steps_per_second": 5.716, |
| "step": 135 |
| }, |
| { |
| "epoch": 9.338983050847457, |
| "grad_norm": 12.26343822479248, |
| "learning_rate": 5.563492063492064e-05, |
| "loss": 0.3156, |
| "step": 140 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 6.5488362312316895, |
| "learning_rate": 5.4841269841269845e-05, |
| "loss": 0.2589, |
| "step": 150 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.34251242876052856, |
| "eval_runtime": 1.9199, |
| "eval_samples_per_second": 91.152, |
| "eval_steps_per_second": 5.73, |
| "step": 150 |
| }, |
| { |
| "epoch": 10.677966101694915, |
| "grad_norm": 5.538010120391846, |
| "learning_rate": 5.404761904761905e-05, |
| "loss": 0.2477, |
| "step": 160 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8457142857142858, |
| "eval_loss": 0.39819851517677307, |
| "eval_runtime": 1.9172, |
| "eval_samples_per_second": 91.281, |
| "eval_steps_per_second": 5.738, |
| "step": 165 |
| }, |
| { |
| "epoch": 11.338983050847457, |
| "grad_norm": 6.982760906219482, |
| "learning_rate": 5.333333333333333e-05, |
| "loss": 0.2527, |
| "step": 170 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 8.375303268432617, |
| "learning_rate": 5.253968253968254e-05, |
| "loss": 0.2187, |
| "step": 180 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8514285714285714, |
| "eval_loss": 0.5953956246376038, |
| "eval_runtime": 2.0286, |
| "eval_samples_per_second": 86.268, |
| "eval_steps_per_second": 5.423, |
| "step": 180 |
| }, |
| { |
| "epoch": 12.677966101694915, |
| "grad_norm": 7.4971723556518555, |
| "learning_rate": 5.174603174603175e-05, |
| "loss": 0.2342, |
| "step": 190 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8514285714285714, |
| "eval_loss": 0.3745245933532715, |
| "eval_runtime": 1.8701, |
| "eval_samples_per_second": 93.58, |
| "eval_steps_per_second": 5.882, |
| "step": 195 |
| }, |
| { |
| "epoch": 13.338983050847457, |
| "grad_norm": 6.758434772491455, |
| "learning_rate": 5.095238095238095e-05, |
| "loss": 0.2354, |
| "step": 200 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 4.655900001525879, |
| "learning_rate": 5.015873015873016e-05, |
| "loss": 0.2444, |
| "step": 210 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.5219993591308594, |
| "eval_runtime": 2.5324, |
| "eval_samples_per_second": 69.106, |
| "eval_steps_per_second": 4.344, |
| "step": 210 |
| }, |
| { |
| "epoch": 14.677966101694915, |
| "grad_norm": 8.788654327392578, |
| "learning_rate": 4.936507936507937e-05, |
| "loss": 0.2067, |
| "step": 220 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8457142857142858, |
| "eval_loss": 0.44333723187446594, |
| "eval_runtime": 1.9312, |
| "eval_samples_per_second": 90.617, |
| "eval_steps_per_second": 5.696, |
| "step": 225 |
| }, |
| { |
| "epoch": 15.338983050847457, |
| "grad_norm": 8.221491813659668, |
| "learning_rate": 4.8571428571428576e-05, |
| "loss": 0.2097, |
| "step": 230 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 5.007316589355469, |
| "learning_rate": 4.777777777777778e-05, |
| "loss": 0.1882, |
| "step": 240 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.3937312960624695, |
| "eval_runtime": 1.922, |
| "eval_samples_per_second": 91.052, |
| "eval_steps_per_second": 5.723, |
| "step": 240 |
| }, |
| { |
| "epoch": 16.677966101694913, |
| "grad_norm": 9.630002975463867, |
| "learning_rate": 4.6984126984126986e-05, |
| "loss": 0.199, |
| "step": 250 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.5102602243423462, |
| "eval_runtime": 2.5902, |
| "eval_samples_per_second": 67.563, |
| "eval_steps_per_second": 4.247, |
| "step": 255 |
| }, |
| { |
| "epoch": 17.338983050847457, |
| "grad_norm": 5.763312339782715, |
| "learning_rate": 4.6190476190476194e-05, |
| "loss": 0.174, |
| "step": 260 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 2.7853012084960938, |
| "learning_rate": 4.53968253968254e-05, |
| "loss": 0.1565, |
| "step": 270 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.36082425713539124, |
| "eval_runtime": 1.8955, |
| "eval_samples_per_second": 92.326, |
| "eval_steps_per_second": 5.803, |
| "step": 270 |
| }, |
| { |
| "epoch": 18.677966101694913, |
| "grad_norm": 7.840061187744141, |
| "learning_rate": 4.4603174603174604e-05, |
| "loss": 0.2068, |
| "step": 280 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.3678865134716034, |
| "eval_runtime": 1.914, |
| "eval_samples_per_second": 91.43, |
| "eval_steps_per_second": 5.747, |
| "step": 285 |
| }, |
| { |
| "epoch": 19.338983050847457, |
| "grad_norm": 10.269192695617676, |
| "learning_rate": 4.3809523809523805e-05, |
| "loss": 0.1742, |
| "step": 290 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 11.602302551269531, |
| "learning_rate": 4.301587301587302e-05, |
| "loss": 0.194, |
| "step": 300 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.8457142857142858, |
| "eval_loss": 0.5581231117248535, |
| "eval_runtime": 2.5904, |
| "eval_samples_per_second": 67.556, |
| "eval_steps_per_second": 4.246, |
| "step": 300 |
| }, |
| { |
| "epoch": 20.677966101694913, |
| "grad_norm": 4.199820518493652, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.1654, |
| "step": 310 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.5074398517608643, |
| "eval_runtime": 1.9301, |
| "eval_samples_per_second": 90.667, |
| "eval_steps_per_second": 5.699, |
| "step": 315 |
| }, |
| { |
| "epoch": 21.338983050847457, |
| "grad_norm": 8.24092960357666, |
| "learning_rate": 4.142857142857143e-05, |
| "loss": 0.1393, |
| "step": 320 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 6.02392053604126, |
| "learning_rate": 4.063492063492063e-05, |
| "loss": 0.1986, |
| "step": 330 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.4395482540130615, |
| "eval_runtime": 1.9058, |
| "eval_samples_per_second": 91.826, |
| "eval_steps_per_second": 5.772, |
| "step": 330 |
| }, |
| { |
| "epoch": 22.677966101694913, |
| "grad_norm": 5.646173000335693, |
| "learning_rate": 3.9841269841269846e-05, |
| "loss": 0.1257, |
| "step": 340 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.42931947112083435, |
| "eval_runtime": 2.3278, |
| "eval_samples_per_second": 75.179, |
| "eval_steps_per_second": 4.726, |
| "step": 345 |
| }, |
| { |
| "epoch": 23.338983050847457, |
| "grad_norm": 7.199140548706055, |
| "learning_rate": 3.904761904761905e-05, |
| "loss": 0.1364, |
| "step": 350 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 2.1807098388671875, |
| "learning_rate": 3.8253968253968256e-05, |
| "loss": 0.1976, |
| "step": 360 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_loss": 0.4932045638561249, |
| "eval_runtime": 1.9503, |
| "eval_samples_per_second": 89.731, |
| "eval_steps_per_second": 5.64, |
| "step": 360 |
| }, |
| { |
| "epoch": 24.677966101694913, |
| "grad_norm": 3.774115562438965, |
| "learning_rate": 3.746031746031746e-05, |
| "loss": 0.1563, |
| "step": 370 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.42544516921043396, |
| "eval_runtime": 1.9489, |
| "eval_samples_per_second": 89.793, |
| "eval_steps_per_second": 5.644, |
| "step": 375 |
| }, |
| { |
| "epoch": 25.338983050847457, |
| "grad_norm": 5.813924789428711, |
| "learning_rate": 3.666666666666667e-05, |
| "loss": 0.1537, |
| "step": 380 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 5.670418739318848, |
| "learning_rate": 3.5873015873015874e-05, |
| "loss": 0.0985, |
| "step": 390 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.5096610188484192, |
| "eval_runtime": 2.2632, |
| "eval_samples_per_second": 77.323, |
| "eval_steps_per_second": 4.86, |
| "step": 390 |
| }, |
| { |
| "epoch": 26.677966101694913, |
| "grad_norm": 7.973656177520752, |
| "learning_rate": 3.507936507936508e-05, |
| "loss": 0.1238, |
| "step": 400 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.8514285714285714, |
| "eval_loss": 0.7264113426208496, |
| "eval_runtime": 2.2954, |
| "eval_samples_per_second": 76.239, |
| "eval_steps_per_second": 4.792, |
| "step": 405 |
| }, |
| { |
| "epoch": 27.338983050847457, |
| "grad_norm": 5.4732866287231445, |
| "learning_rate": 3.4285714285714284e-05, |
| "loss": 0.1385, |
| "step": 410 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 4.48883056640625, |
| "learning_rate": 3.34920634920635e-05, |
| "loss": 0.1577, |
| "step": 420 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_loss": 0.4826878011226654, |
| "eval_runtime": 1.9183, |
| "eval_samples_per_second": 91.227, |
| "eval_steps_per_second": 5.734, |
| "step": 420 |
| }, |
| { |
| "epoch": 28.677966101694913, |
| "grad_norm": 4.5706787109375, |
| "learning_rate": 3.26984126984127e-05, |
| "loss": 0.1271, |
| "step": 430 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.530450165271759, |
| "eval_runtime": 2.147, |
| "eval_samples_per_second": 81.509, |
| "eval_steps_per_second": 5.123, |
| "step": 435 |
| }, |
| { |
| "epoch": 29.338983050847457, |
| "grad_norm": 8.417387962341309, |
| "learning_rate": 3.190476190476191e-05, |
| "loss": 0.1171, |
| "step": 440 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 1.3924190998077393, |
| "learning_rate": 3.111111111111111e-05, |
| "loss": 0.1002, |
| "step": 450 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.5888301134109497, |
| "eval_runtime": 1.8837, |
| "eval_samples_per_second": 92.902, |
| "eval_steps_per_second": 5.84, |
| "step": 450 |
| }, |
| { |
| "epoch": 30.677966101694913, |
| "grad_norm": 4.39676570892334, |
| "learning_rate": 3.031746031746032e-05, |
| "loss": 0.1268, |
| "step": 460 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_loss": 0.6432701945304871, |
| "eval_runtime": 1.9204, |
| "eval_samples_per_second": 91.126, |
| "eval_steps_per_second": 5.728, |
| "step": 465 |
| }, |
| { |
| "epoch": 31.338983050847457, |
| "grad_norm": 6.017373561859131, |
| "learning_rate": 2.9523809523809523e-05, |
| "loss": 0.1077, |
| "step": 470 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 5.33542013168335, |
| "learning_rate": 2.873015873015873e-05, |
| "loss": 0.1153, |
| "step": 480 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.8342857142857143, |
| "eval_loss": 0.8394165635108948, |
| "eval_runtime": 2.2924, |
| "eval_samples_per_second": 76.34, |
| "eval_steps_per_second": 4.798, |
| "step": 480 |
| }, |
| { |
| "epoch": 32.67796610169491, |
| "grad_norm": 13.854134559631348, |
| "learning_rate": 2.7936507936507936e-05, |
| "loss": 0.1191, |
| "step": 490 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.84, |
| "eval_loss": 0.747542142868042, |
| "eval_runtime": 1.9178, |
| "eval_samples_per_second": 91.249, |
| "eval_steps_per_second": 5.736, |
| "step": 495 |
| }, |
| { |
| "epoch": 33.33898305084746, |
| "grad_norm": 8.244441986083984, |
| "learning_rate": 2.7142857142857144e-05, |
| "loss": 0.1271, |
| "step": 500 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 2.1750755310058594, |
| "learning_rate": 2.634920634920635e-05, |
| "loss": 0.1184, |
| "step": 510 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.4883846938610077, |
| "eval_runtime": 1.8782, |
| "eval_samples_per_second": 93.173, |
| "eval_steps_per_second": 5.857, |
| "step": 510 |
| }, |
| { |
| "epoch": 34.67796610169491, |
| "grad_norm": 9.822646141052246, |
| "learning_rate": 2.5555555555555557e-05, |
| "loss": 0.1332, |
| "step": 520 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.5834174752235413, |
| "eval_runtime": 2.1992, |
| "eval_samples_per_second": 79.573, |
| "eval_steps_per_second": 5.002, |
| "step": 525 |
| }, |
| { |
| "epoch": 35.33898305084746, |
| "grad_norm": 5.659489154815674, |
| "learning_rate": 2.4761904761904762e-05, |
| "loss": 0.0804, |
| "step": 530 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 5.710267066955566, |
| "learning_rate": 2.396825396825397e-05, |
| "loss": 0.1071, |
| "step": 540 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.8571428571428571, |
| "eval_loss": 0.627878725528717, |
| "eval_runtime": 1.9363, |
| "eval_samples_per_second": 90.377, |
| "eval_steps_per_second": 5.681, |
| "step": 540 |
| }, |
| { |
| "epoch": 36.67796610169491, |
| "grad_norm": 6.440234661102295, |
| "learning_rate": 2.3174603174603175e-05, |
| "loss": 0.0886, |
| "step": 550 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.8628571428571429, |
| "eval_loss": 0.6998600363731384, |
| "eval_runtime": 1.9136, |
| "eval_samples_per_second": 91.451, |
| "eval_steps_per_second": 5.748, |
| "step": 555 |
| }, |
| { |
| "epoch": 37.33898305084746, |
| "grad_norm": 4.174771785736084, |
| "learning_rate": 2.238095238095238e-05, |
| "loss": 0.0845, |
| "step": 560 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 0.6355146765708923, |
| "learning_rate": 2.1587301587301585e-05, |
| "loss": 0.0744, |
| "step": 570 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.729452908039093, |
| "eval_runtime": 2.1953, |
| "eval_samples_per_second": 79.716, |
| "eval_steps_per_second": 5.011, |
| "step": 570 |
| }, |
| { |
| "epoch": 38.67796610169491, |
| "grad_norm": 12.643016815185547, |
| "learning_rate": 2.0793650793650793e-05, |
| "loss": 0.1274, |
| "step": 580 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.8914285714285715, |
| "eval_loss": 0.6137147545814514, |
| "eval_runtime": 1.9072, |
| "eval_samples_per_second": 91.758, |
| "eval_steps_per_second": 5.768, |
| "step": 585 |
| }, |
| { |
| "epoch": 39.33898305084746, |
| "grad_norm": 8.217287063598633, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 0.0905, |
| "step": 590 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 2.896934986114502, |
| "learning_rate": 1.9206349206349206e-05, |
| "loss": 0.0795, |
| "step": 600 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.5706229209899902, |
| "eval_runtime": 2.3574, |
| "eval_samples_per_second": 74.235, |
| "eval_steps_per_second": 4.666, |
| "step": 600 |
| }, |
| { |
| "epoch": 40.67796610169491, |
| "grad_norm": 8.571166038513184, |
| "learning_rate": 1.841269841269841e-05, |
| "loss": 0.0962, |
| "step": 610 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.6100188493728638, |
| "eval_runtime": 2.0136, |
| "eval_samples_per_second": 86.908, |
| "eval_steps_per_second": 5.463, |
| "step": 615 |
| }, |
| { |
| "epoch": 41.33898305084746, |
| "grad_norm": 4.700484752655029, |
| "learning_rate": 1.761904761904762e-05, |
| "loss": 0.0749, |
| "step": 620 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 2.4115490913391113, |
| "learning_rate": 1.6825396825396824e-05, |
| "loss": 0.094, |
| "step": 630 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.6148616075515747, |
| "eval_runtime": 1.9168, |
| "eval_samples_per_second": 91.298, |
| "eval_steps_per_second": 5.739, |
| "step": 630 |
| }, |
| { |
| "epoch": 42.67796610169491, |
| "grad_norm": 4.732550144195557, |
| "learning_rate": 1.6031746031746033e-05, |
| "loss": 0.0945, |
| "step": 640 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.5688998699188232, |
| "eval_runtime": 2.0595, |
| "eval_samples_per_second": 84.974, |
| "eval_steps_per_second": 5.341, |
| "step": 645 |
| }, |
| { |
| "epoch": 43.33898305084746, |
| "grad_norm": 4.677188396453857, |
| "learning_rate": 1.5238095238095238e-05, |
| "loss": 0.0851, |
| "step": 650 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 6.458128452301025, |
| "learning_rate": 1.4444444444444444e-05, |
| "loss": 0.0584, |
| "step": 660 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.7018650770187378, |
| "eval_runtime": 2.3309, |
| "eval_samples_per_second": 75.079, |
| "eval_steps_per_second": 4.719, |
| "step": 660 |
| }, |
| { |
| "epoch": 44.67796610169491, |
| "grad_norm": 4.308237552642822, |
| "learning_rate": 1.365079365079365e-05, |
| "loss": 0.0676, |
| "step": 670 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.6934124231338501, |
| "eval_runtime": 1.9251, |
| "eval_samples_per_second": 90.902, |
| "eval_steps_per_second": 5.714, |
| "step": 675 |
| }, |
| { |
| "epoch": 45.33898305084746, |
| "grad_norm": 2.8312790393829346, |
| "learning_rate": 1.2857142857142857e-05, |
| "loss": 0.0893, |
| "step": 680 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 7.0031328201293945, |
| "learning_rate": 1.2063492063492064e-05, |
| "loss": 0.0763, |
| "step": 690 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_accuracy": 0.8914285714285715, |
| "eval_loss": 0.6047118902206421, |
| "eval_runtime": 2.0296, |
| "eval_samples_per_second": 86.224, |
| "eval_steps_per_second": 5.42, |
| "step": 690 |
| }, |
| { |
| "epoch": 46.67796610169491, |
| "grad_norm": 8.401297569274902, |
| "learning_rate": 1.126984126984127e-05, |
| "loss": 0.0762, |
| "step": 700 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.6063617467880249, |
| "eval_runtime": 1.8566, |
| "eval_samples_per_second": 94.259, |
| "eval_steps_per_second": 5.925, |
| "step": 705 |
| }, |
| { |
| "epoch": 47.33898305084746, |
| "grad_norm": 1.462274432182312, |
| "learning_rate": 1.0476190476190475e-05, |
| "loss": 0.0563, |
| "step": 710 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 1.8739376068115234, |
| "learning_rate": 9.682539682539682e-06, |
| "loss": 0.0696, |
| "step": 720 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.7335702776908875, |
| "eval_runtime": 1.911, |
| "eval_samples_per_second": 91.576, |
| "eval_steps_per_second": 5.756, |
| "step": 720 |
| }, |
| { |
| "epoch": 48.67796610169491, |
| "grad_norm": 2.9189000129699707, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.0555, |
| "step": 730 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.6598544120788574, |
| "eval_runtime": 1.9333, |
| "eval_samples_per_second": 90.519, |
| "eval_steps_per_second": 5.69, |
| "step": 735 |
| }, |
| { |
| "epoch": 49.33898305084746, |
| "grad_norm": 3.1225035190582275, |
| "learning_rate": 8.095238095238095e-06, |
| "loss": 0.1129, |
| "step": 740 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 2.0588467121124268, |
| "learning_rate": 7.301587301587301e-06, |
| "loss": 0.0572, |
| "step": 750 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_accuracy": 0.9028571428571428, |
| "eval_loss": 0.597748875617981, |
| "eval_runtime": 2.541, |
| "eval_samples_per_second": 68.87, |
| "eval_steps_per_second": 4.329, |
| "step": 750 |
| }, |
| { |
| "epoch": 50.67796610169491, |
| "grad_norm": 2.578906536102295, |
| "learning_rate": 6.507936507936508e-06, |
| "loss": 0.0648, |
| "step": 760 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.6257001757621765, |
| "eval_runtime": 1.8911, |
| "eval_samples_per_second": 92.541, |
| "eval_steps_per_second": 5.817, |
| "step": 765 |
| }, |
| { |
| "epoch": 51.33898305084746, |
| "grad_norm": 3.871882677078247, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.0521, |
| "step": 770 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 3.6807923316955566, |
| "learning_rate": 4.92063492063492e-06, |
| "loss": 0.0705, |
| "step": 780 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_accuracy": 0.8857142857142857, |
| "eval_loss": 0.6653619408607483, |
| "eval_runtime": 1.8778, |
| "eval_samples_per_second": 93.193, |
| "eval_steps_per_second": 5.858, |
| "step": 780 |
| }, |
| { |
| "epoch": 52.67796610169491, |
| "grad_norm": 6.3525519371032715, |
| "learning_rate": 4.126984126984127e-06, |
| "loss": 0.0646, |
| "step": 790 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_accuracy": 0.8685714285714285, |
| "eval_loss": 0.6813338994979858, |
| "eval_runtime": 1.8783, |
| "eval_samples_per_second": 93.171, |
| "eval_steps_per_second": 5.856, |
| "step": 795 |
| }, |
| { |
| "epoch": 53.33898305084746, |
| "grad_norm": 5.389460563659668, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0463, |
| "step": 800 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 0.5001619458198547, |
| "learning_rate": 2.5396825396825395e-06, |
| "loss": 0.0795, |
| "step": 810 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.6209337711334229, |
| "eval_runtime": 2.6137, |
| "eval_samples_per_second": 66.955, |
| "eval_steps_per_second": 4.209, |
| "step": 810 |
| }, |
| { |
| "epoch": 54.67796610169491, |
| "grad_norm": 9.643752098083496, |
| "learning_rate": 1.746031746031746e-06, |
| "loss": 0.0828, |
| "step": 820 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_accuracy": 0.8742857142857143, |
| "eval_loss": 0.6456648707389832, |
| "eval_runtime": 1.8674, |
| "eval_samples_per_second": 93.711, |
| "eval_steps_per_second": 5.89, |
| "step": 825 |
| }, |
| { |
| "epoch": 55.33898305084746, |
| "grad_norm": 7.717844486236572, |
| "learning_rate": 9.523809523809523e-07, |
| "loss": 0.0916, |
| "step": 830 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 3.563279390335083, |
| "learning_rate": 1.5873015873015872e-07, |
| "loss": 0.0674, |
| "step": 840 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_accuracy": 0.88, |
| "eval_loss": 0.6521316766738892, |
| "eval_runtime": 1.8761, |
| "eval_samples_per_second": 93.278, |
| "eval_steps_per_second": 5.863, |
| "step": 840 |
| }, |
| { |
| "epoch": 56.0, |
| "step": 840, |
| "total_flos": 1.7108328318259692e+18, |
| "train_loss": 0.19382851386354083, |
| "train_runtime": 1219.347, |
| "train_samples_per_second": 46.205, |
| "train_steps_per_second": 0.689 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 840, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 60, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7108328318259692e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|